{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 500, "global_step": 7177, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.00013934369121438027, "grad_norm": 65.10064697265625, "learning_rate": 0.0, "loss": 0.73809814453125, "step": 1 }, { "epoch": 0.00027868738242876054, "grad_norm": 78.92225646972656, "learning_rate": 1.953125e-08, "loss": 0.74761962890625, "step": 2 }, { "epoch": 0.0004180310736431408, "grad_norm": 75.29267120361328, "learning_rate": 3.90625e-08, "loss": 0.74468994140625, "step": 3 }, { "epoch": 0.0005573747648575211, "grad_norm": 78.02611541748047, "learning_rate": 5.859375000000001e-08, "loss": 0.74615478515625, "step": 4 }, { "epoch": 0.0006967184560719013, "grad_norm": 82.30753326416016, "learning_rate": 7.8125e-08, "loss": 0.7509765625, "step": 5 }, { "epoch": 0.0008360621472862816, "grad_norm": 73.82789611816406, "learning_rate": 9.765625e-08, "loss": 0.74298095703125, "step": 6 }, { "epoch": 0.0009754058385006619, "grad_norm": 81.60810852050781, "learning_rate": 1.1718750000000002e-07, "loss": 0.748046875, "step": 7 }, { "epoch": 0.0011147495297150422, "grad_norm": 80.95870208740234, "learning_rate": 1.3671875000000001e-07, "loss": 0.74798583984375, "step": 8 }, { "epoch": 0.0012540932209294225, "grad_norm": 78.38546752929688, "learning_rate": 1.5625e-07, "loss": 0.7481689453125, "step": 9 }, { "epoch": 0.0013934369121438026, "grad_norm": 79.6450424194336, "learning_rate": 1.7578125e-07, "loss": 0.7491455078125, "step": 10 }, { "epoch": 0.001532780603358183, "grad_norm": 79.1056900024414, "learning_rate": 1.953125e-07, "loss": 0.748046875, "step": 11 }, { "epoch": 0.0016721242945725633, "grad_norm": 77.35982513427734, "learning_rate": 2.1484375e-07, "loss": 0.745361328125, "step": 12 }, { "epoch": 0.0018114679857869436, "grad_norm": 76.72103118896484, "learning_rate": 2.3437500000000003e-07, "loss": 0.743896484375, "step": 13 }, { "epoch": 0.0019508116770013237, "grad_norm": 80.26400756835938, "learning_rate": 2.5390625000000003e-07, "loss": 0.74810791015625, "step": 14 }, { "epoch": 0.0020901553682157042, "grad_norm": 75.09522247314453, "learning_rate": 2.7343750000000003e-07, "loss": 0.742919921875, "step": 15 }, { "epoch": 0.0022294990594300844, "grad_norm": 78.40135955810547, "learning_rate": 2.9296875000000003e-07, "loss": 0.7449951171875, "step": 16 }, { "epoch": 0.0023688427506444645, "grad_norm": 75.74083709716797, "learning_rate": 3.125e-07, "loss": 0.7452392578125, "step": 17 }, { "epoch": 0.002508186441858845, "grad_norm": 80.96931457519531, "learning_rate": 3.3203125e-07, "loss": 0.74658203125, "step": 18 }, { "epoch": 0.002647530133073225, "grad_norm": 80.98963928222656, "learning_rate": 3.515625e-07, "loss": 0.746337890625, "step": 19 }, { "epoch": 0.0027868738242876052, "grad_norm": 71.98030853271484, "learning_rate": 3.7109375e-07, "loss": 0.7388916015625, "step": 20 }, { "epoch": 0.0029262175155019858, "grad_norm": 75.8864974975586, "learning_rate": 3.90625e-07, "loss": 0.74072265625, "step": 21 }, { "epoch": 0.003065561206716366, "grad_norm": 78.73393249511719, "learning_rate": 4.1015625e-07, "loss": 0.740966796875, "step": 22 }, { "epoch": 0.003204904897930746, "grad_norm": 81.12713623046875, "learning_rate": 4.296875e-07, "loss": 0.74169921875, "step": 23 }, { "epoch": 0.0033442485891451265, "grad_norm": 74.96858978271484, "learning_rate": 4.4921875e-07, "loss": 0.73443603515625, "step": 24 }, { "epoch": 0.0034835922803595066, "grad_norm": 73.58860778808594, "learning_rate": 4.6875000000000006e-07, "loss": 0.733154296875, "step": 25 }, { "epoch": 0.003622935971573887, "grad_norm": 73.56008911132812, "learning_rate": 4.8828125e-07, "loss": 0.7315673828125, "step": 26 }, { "epoch": 0.0037622796627882673, "grad_norm": 75.45930480957031, "learning_rate": 5.078125000000001e-07, "loss": 0.7318115234375, "step": 27 }, { "epoch": 0.0039016233540026474, "grad_norm": 79.81358337402344, "learning_rate": 5.2734375e-07, "loss": 0.7340087890625, "step": 28 }, { "epoch": 0.0040409670452170275, "grad_norm": 77.53575897216797, "learning_rate": 5.468750000000001e-07, "loss": 0.7330322265625, "step": 29 }, { "epoch": 0.0041803107364314085, "grad_norm": 75.1562728881836, "learning_rate": 5.6640625e-07, "loss": 0.72247314453125, "step": 30 }, { "epoch": 0.004319654427645789, "grad_norm": 77.07166290283203, "learning_rate": 5.859375000000001e-07, "loss": 0.72210693359375, "step": 31 }, { "epoch": 0.004458998118860169, "grad_norm": 79.1263198852539, "learning_rate": 6.0546875e-07, "loss": 0.72344970703125, "step": 32 }, { "epoch": 0.004598341810074549, "grad_norm": 76.24040985107422, "learning_rate": 6.25e-07, "loss": 0.70562744140625, "step": 33 }, { "epoch": 0.004737685501288929, "grad_norm": 67.07266235351562, "learning_rate": 6.445312500000001e-07, "loss": 0.69818115234375, "step": 34 }, { "epoch": 0.004877029192503309, "grad_norm": 71.64059448242188, "learning_rate": 6.640625e-07, "loss": 0.698974609375, "step": 35 }, { "epoch": 0.00501637288371769, "grad_norm": 73.37696075439453, "learning_rate": 6.835937500000001e-07, "loss": 0.69586181640625, "step": 36 }, { "epoch": 0.00515571657493207, "grad_norm": 67.53209686279297, "learning_rate": 7.03125e-07, "loss": 0.6923828125, "step": 37 }, { "epoch": 0.00529506026614645, "grad_norm": 69.56661224365234, "learning_rate": 7.226562500000001e-07, "loss": 0.69158935546875, "step": 38 }, { "epoch": 0.00543440395736083, "grad_norm": 74.78086853027344, "learning_rate": 7.421875e-07, "loss": 0.6895751953125, "step": 39 }, { "epoch": 0.0055737476485752105, "grad_norm": 75.86820983886719, "learning_rate": 7.617187500000001e-07, "loss": 0.6898193359375, "step": 40 }, { "epoch": 0.005713091339789591, "grad_norm": 67.13249206542969, "learning_rate": 7.8125e-07, "loss": 0.6790771484375, "step": 41 }, { "epoch": 0.0058524350310039715, "grad_norm": 65.73731994628906, "learning_rate": 8.007812500000001e-07, "loss": 0.66595458984375, "step": 42 }, { "epoch": 0.005991778722218352, "grad_norm": 59.57167434692383, "learning_rate": 8.203125e-07, "loss": 0.66741943359375, "step": 43 }, { "epoch": 0.006131122413432732, "grad_norm": 66.3625259399414, "learning_rate": 8.398437500000001e-07, "loss": 0.66107177734375, "step": 44 }, { "epoch": 0.006270466104647112, "grad_norm": 56.25913619995117, "learning_rate": 8.59375e-07, "loss": 0.66192626953125, "step": 45 }, { "epoch": 0.006409809795861492, "grad_norm": 62.17911911010742, "learning_rate": 8.789062500000001e-07, "loss": 0.63165283203125, "step": 46 }, { "epoch": 0.006549153487075873, "grad_norm": 48.83024597167969, "learning_rate": 8.984375e-07, "loss": 0.6212158203125, "step": 47 }, { "epoch": 0.006688497178290253, "grad_norm": 54.91569900512695, "learning_rate": 9.179687500000001e-07, "loss": 0.6043701171875, "step": 48 }, { "epoch": 0.006827840869504633, "grad_norm": 62.44080352783203, "learning_rate": 9.375000000000001e-07, "loss": 0.59771728515625, "step": 49 }, { "epoch": 0.006967184560719013, "grad_norm": 52.200199127197266, "learning_rate": 9.570312500000002e-07, "loss": 0.6015625, "step": 50 }, { "epoch": 0.007106528251933393, "grad_norm": 56.06832504272461, "learning_rate": 9.765625e-07, "loss": 0.59722900390625, "step": 51 }, { "epoch": 0.007245871943147774, "grad_norm": 51.7149658203125, "learning_rate": 9.9609375e-07, "loss": 0.5953369140625, "step": 52 }, { "epoch": 0.0073852156343621545, "grad_norm": 56.40342712402344, "learning_rate": 1.0156250000000001e-06, "loss": 0.584228515625, "step": 53 }, { "epoch": 0.007524559325576535, "grad_norm": 49.80830001831055, "learning_rate": 1.0351562500000002e-06, "loss": 0.59405517578125, "step": 54 }, { "epoch": 0.007663903016790915, "grad_norm": 53.431983947753906, "learning_rate": 1.0546875e-06, "loss": 0.58441162109375, "step": 55 }, { "epoch": 0.007803246708005295, "grad_norm": 45.46272277832031, "learning_rate": 1.07421875e-06, "loss": 0.59112548828125, "step": 56 }, { "epoch": 0.007942590399219676, "grad_norm": 44.624515533447266, "learning_rate": 1.0937500000000001e-06, "loss": 0.57568359375, "step": 57 }, { "epoch": 0.008081934090434055, "grad_norm": 52.97477340698242, "learning_rate": 1.1132812500000002e-06, "loss": 0.54736328125, "step": 58 }, { "epoch": 0.008221277781648436, "grad_norm": 48.944580078125, "learning_rate": 1.1328125e-06, "loss": 0.54620361328125, "step": 59 }, { "epoch": 0.008360621472862817, "grad_norm": 41.90934753417969, "learning_rate": 1.15234375e-06, "loss": 0.548370361328125, "step": 60 }, { "epoch": 0.008499965164077196, "grad_norm": 40.535728454589844, "learning_rate": 1.1718750000000001e-06, "loss": 0.536346435546875, "step": 61 }, { "epoch": 0.008639308855291577, "grad_norm": 37.19288635253906, "learning_rate": 1.1914062500000002e-06, "loss": 0.52685546875, "step": 62 }, { "epoch": 0.008778652546505956, "grad_norm": 33.07185745239258, "learning_rate": 1.2109375e-06, "loss": 0.52252197265625, "step": 63 }, { "epoch": 0.008917996237720337, "grad_norm": 34.75764465332031, "learning_rate": 1.23046875e-06, "loss": 0.486419677734375, "step": 64 }, { "epoch": 0.009057339928934717, "grad_norm": 30.236209869384766, "learning_rate": 1.25e-06, "loss": 0.488433837890625, "step": 65 }, { "epoch": 0.009196683620149098, "grad_norm": 33.85698318481445, "learning_rate": 1.2695312500000002e-06, "loss": 0.450286865234375, "step": 66 }, { "epoch": 0.009336027311363479, "grad_norm": 36.47811508178711, "learning_rate": 1.2890625000000002e-06, "loss": 0.437469482421875, "step": 67 }, { "epoch": 0.009475371002577858, "grad_norm": 36.03645324707031, "learning_rate": 1.30859375e-06, "loss": 0.41632080078125, "step": 68 }, { "epoch": 0.009614714693792239, "grad_norm": 25.62830352783203, "learning_rate": 1.328125e-06, "loss": 0.46624755859375, "step": 69 }, { "epoch": 0.009754058385006618, "grad_norm": 32.04475021362305, "learning_rate": 1.3476562500000001e-06, "loss": 0.41705322265625, "step": 70 }, { "epoch": 0.009893402076220999, "grad_norm": 21.188400268554688, "learning_rate": 1.3671875000000002e-06, "loss": 0.4656982421875, "step": 71 }, { "epoch": 0.01003274576743538, "grad_norm": 29.110692977905273, "learning_rate": 1.38671875e-06, "loss": 0.415679931640625, "step": 72 }, { "epoch": 0.01017208945864976, "grad_norm": 16.76731300354004, "learning_rate": 1.40625e-06, "loss": 0.484954833984375, "step": 73 }, { "epoch": 0.01031143314986414, "grad_norm": 28.016374588012695, "learning_rate": 1.4257812500000001e-06, "loss": 0.41815185546875, "step": 74 }, { "epoch": 0.01045077684107852, "grad_norm": 30.479352951049805, "learning_rate": 1.4453125000000002e-06, "loss": 0.386962890625, "step": 75 }, { "epoch": 0.0105901205322929, "grad_norm": 15.488816261291504, "learning_rate": 1.46484375e-06, "loss": 0.4647216796875, "step": 76 }, { "epoch": 0.010729464223507281, "grad_norm": 21.805572509765625, "learning_rate": 1.484375e-06, "loss": 0.41461181640625, "step": 77 }, { "epoch": 0.01086880791472166, "grad_norm": 20.45802116394043, "learning_rate": 1.5039062500000001e-06, "loss": 0.413818359375, "step": 78 }, { "epoch": 0.011008151605936042, "grad_norm": 17.78790855407715, "learning_rate": 1.5234375000000002e-06, "loss": 0.4189453125, "step": 79 }, { "epoch": 0.011147495297150421, "grad_norm": 17.41199493408203, "learning_rate": 1.54296875e-06, "loss": 0.41033935546875, "step": 80 }, { "epoch": 0.011286838988364802, "grad_norm": 9.749347686767578, "learning_rate": 1.5625e-06, "loss": 0.45709228515625, "step": 81 }, { "epoch": 0.011426182679579183, "grad_norm": 10.469582557678223, "learning_rate": 1.5820312500000001e-06, "loss": 0.44000244140625, "step": 82 }, { "epoch": 0.011565526370793562, "grad_norm": 7.593387603759766, "learning_rate": 1.6015625000000002e-06, "loss": 0.4549560546875, "step": 83 }, { "epoch": 0.011704870062007943, "grad_norm": 13.522713661193848, "learning_rate": 1.6210937500000002e-06, "loss": 0.39459228515625, "step": 84 }, { "epoch": 0.011844213753222322, "grad_norm": 12.032242774963379, "learning_rate": 1.640625e-06, "loss": 0.39703369140625, "step": 85 }, { "epoch": 0.011983557444436703, "grad_norm": 12.38936710357666, "learning_rate": 1.6601562500000001e-06, "loss": 0.38873291015625, "step": 86 }, { "epoch": 0.012122901135651083, "grad_norm": 14.84524917602539, "learning_rate": 1.6796875000000002e-06, "loss": 0.356201171875, "step": 87 }, { "epoch": 0.012262244826865464, "grad_norm": 9.917829513549805, "learning_rate": 1.6992187500000002e-06, "loss": 0.38836669921875, "step": 88 }, { "epoch": 0.012401588518079844, "grad_norm": 10.890185356140137, "learning_rate": 1.71875e-06, "loss": 0.3763885498046875, "step": 89 }, { "epoch": 0.012540932209294224, "grad_norm": 11.864543914794922, "learning_rate": 1.7382812500000001e-06, "loss": 0.3515472412109375, "step": 90 }, { "epoch": 0.012680275900508605, "grad_norm": 9.631448745727539, "learning_rate": 1.7578125000000002e-06, "loss": 0.363677978515625, "step": 91 }, { "epoch": 0.012819619591722984, "grad_norm": 11.855984687805176, "learning_rate": 1.7773437500000002e-06, "loss": 0.33416748046875, "step": 92 }, { "epoch": 0.012958963282937365, "grad_norm": 3.08948016166687, "learning_rate": 1.796875e-06, "loss": 0.4496307373046875, "step": 93 }, { "epoch": 0.013098306974151746, "grad_norm": 9.693194389343262, "learning_rate": 1.81640625e-06, "loss": 0.3441009521484375, "step": 94 }, { "epoch": 0.013237650665366125, "grad_norm": 10.126220703125, "learning_rate": 1.8359375000000002e-06, "loss": 0.3387451171875, "step": 95 }, { "epoch": 0.013376994356580506, "grad_norm": 12.566305160522461, "learning_rate": 1.8554687500000002e-06, "loss": 0.3203125, "step": 96 }, { "epoch": 0.013516338047794885, "grad_norm": 8.41284465789795, "learning_rate": 1.8750000000000003e-06, "loss": 0.354705810546875, "step": 97 }, { "epoch": 0.013655681739009266, "grad_norm": 8.68389892578125, "learning_rate": 1.89453125e-06, "loss": 0.348846435546875, "step": 98 }, { "epoch": 0.013795025430223647, "grad_norm": 3.780381679534912, "learning_rate": 1.9140625000000004e-06, "loss": 0.4035491943359375, "step": 99 }, { "epoch": 0.013934369121438027, "grad_norm": 8.293663024902344, "learning_rate": 1.93359375e-06, "loss": 0.3441162109375, "step": 100 }, { "epoch": 0.014073712812652408, "grad_norm": 11.273847579956055, "learning_rate": 1.953125e-06, "loss": 0.3058319091796875, "step": 101 }, { "epoch": 0.014213056503866787, "grad_norm": 3.354212522506714, "learning_rate": 1.97265625e-06, "loss": 0.41326904296875, "step": 102 }, { "epoch": 0.014352400195081168, "grad_norm": 8.541099548339844, "learning_rate": 1.9921875e-06, "loss": 0.34088134765625, "step": 103 }, { "epoch": 0.014491743886295549, "grad_norm": 5.458468914031982, "learning_rate": 2.01171875e-06, "loss": 0.374786376953125, "step": 104 }, { "epoch": 0.014631087577509928, "grad_norm": 9.970693588256836, "learning_rate": 2.0312500000000002e-06, "loss": 0.3062286376953125, "step": 105 }, { "epoch": 0.014770431268724309, "grad_norm": 6.131522178649902, "learning_rate": 2.0507812500000003e-06, "loss": 0.3623199462890625, "step": 106 }, { "epoch": 0.014909774959938688, "grad_norm": 8.758720397949219, "learning_rate": 2.0703125000000003e-06, "loss": 0.3175811767578125, "step": 107 }, { "epoch": 0.01504911865115307, "grad_norm": 10.767895698547363, "learning_rate": 2.08984375e-06, "loss": 0.2716522216796875, "step": 108 }, { "epoch": 0.01518846234236745, "grad_norm": 6.005537509918213, "learning_rate": 2.109375e-06, "loss": 0.334228515625, "step": 109 }, { "epoch": 0.01532780603358183, "grad_norm": 6.020750045776367, "learning_rate": 2.12890625e-06, "loss": 0.3370513916015625, "step": 110 }, { "epoch": 0.01546714972479621, "grad_norm": 10.706913948059082, "learning_rate": 2.1484375e-06, "loss": 0.2930908203125, "step": 111 }, { "epoch": 0.01560649341601059, "grad_norm": 6.077110290527344, "learning_rate": 2.16796875e-06, "loss": 0.3428497314453125, "step": 112 }, { "epoch": 0.01574583710722497, "grad_norm": 6.072598934173584, "learning_rate": 2.1875000000000002e-06, "loss": 0.3807830810546875, "step": 113 }, { "epoch": 0.01588518079843935, "grad_norm": 8.411965370178223, "learning_rate": 2.2070312500000003e-06, "loss": 0.330047607421875, "step": 114 }, { "epoch": 0.016024524489653733, "grad_norm": 9.596375465393066, "learning_rate": 2.2265625000000003e-06, "loss": 0.2802734375, "step": 115 }, { "epoch": 0.01616386818086811, "grad_norm": 6.960529804229736, "learning_rate": 2.2460937500000004e-06, "loss": 0.392242431640625, "step": 116 }, { "epoch": 0.01630321187208249, "grad_norm": 9.579331398010254, "learning_rate": 2.265625e-06, "loss": 0.295135498046875, "step": 117 }, { "epoch": 0.016442555563296872, "grad_norm": 9.610276222229004, "learning_rate": 2.28515625e-06, "loss": 0.286407470703125, "step": 118 }, { "epoch": 0.016581899254511253, "grad_norm": 7.111317157745361, "learning_rate": 2.3046875e-06, "loss": 0.339630126953125, "step": 119 }, { "epoch": 0.016721242945725634, "grad_norm": 6.593630313873291, "learning_rate": 2.32421875e-06, "loss": 0.3627471923828125, "step": 120 }, { "epoch": 0.01686058663694001, "grad_norm": 5.655117511749268, "learning_rate": 2.3437500000000002e-06, "loss": 0.3681488037109375, "step": 121 }, { "epoch": 0.016999930328154392, "grad_norm": 12.018743515014648, "learning_rate": 2.3632812500000003e-06, "loss": 0.289947509765625, "step": 122 }, { "epoch": 0.017139274019368773, "grad_norm": 13.513590812683105, "learning_rate": 2.3828125000000003e-06, "loss": 0.2751617431640625, "step": 123 }, { "epoch": 0.017278617710583154, "grad_norm": 11.23283576965332, "learning_rate": 2.4023437500000004e-06, "loss": 0.3376922607421875, "step": 124 }, { "epoch": 0.017417961401797532, "grad_norm": 8.407795906066895, "learning_rate": 2.421875e-06, "loss": 0.2696533203125, "step": 125 }, { "epoch": 0.017557305093011913, "grad_norm": 6.905283451080322, "learning_rate": 2.44140625e-06, "loss": 0.2768096923828125, "step": 126 }, { "epoch": 0.017696648784226294, "grad_norm": 6.369194984436035, "learning_rate": 2.4609375e-06, "loss": 0.30767822265625, "step": 127 }, { "epoch": 0.017835992475440675, "grad_norm": 9.105620384216309, "learning_rate": 2.48046875e-06, "loss": 0.3165130615234375, "step": 128 }, { "epoch": 0.017975336166655056, "grad_norm": 8.54529857635498, "learning_rate": 2.5e-06, "loss": 0.306488037109375, "step": 129 }, { "epoch": 0.018114679857869433, "grad_norm": 8.964430809020996, "learning_rate": 2.5195312500000003e-06, "loss": 0.2397308349609375, "step": 130 }, { "epoch": 0.018254023549083814, "grad_norm": 7.420598983764648, "learning_rate": 2.5390625000000003e-06, "loss": 0.310791015625, "step": 131 }, { "epoch": 0.018393367240298195, "grad_norm": 5.447384357452393, "learning_rate": 2.5585937500000004e-06, "loss": 0.3407135009765625, "step": 132 }, { "epoch": 0.018532710931512576, "grad_norm": 8.593621253967285, "learning_rate": 2.5781250000000004e-06, "loss": 0.24603271484375, "step": 133 }, { "epoch": 0.018672054622726957, "grad_norm": 12.004803657531738, "learning_rate": 2.59765625e-06, "loss": 0.2552032470703125, "step": 134 }, { "epoch": 0.018811398313941335, "grad_norm": 9.695544242858887, "learning_rate": 2.6171875e-06, "loss": 0.2474365234375, "step": 135 }, { "epoch": 0.018950742005155716, "grad_norm": 9.458828926086426, "learning_rate": 2.63671875e-06, "loss": 0.35064697265625, "step": 136 }, { "epoch": 0.019090085696370097, "grad_norm": 7.40460205078125, "learning_rate": 2.65625e-06, "loss": 0.2787628173828125, "step": 137 }, { "epoch": 0.019229429387584478, "grad_norm": 8.775396347045898, "learning_rate": 2.6757812500000002e-06, "loss": 0.29524993896484375, "step": 138 }, { "epoch": 0.01936877307879886, "grad_norm": 8.088225364685059, "learning_rate": 2.6953125000000003e-06, "loss": 0.31607818603515625, "step": 139 }, { "epoch": 0.019508116770013236, "grad_norm": 7.7596001625061035, "learning_rate": 2.7148437500000003e-06, "loss": 0.24835968017578125, "step": 140 }, { "epoch": 0.019647460461227617, "grad_norm": 7.207478046417236, "learning_rate": 2.7343750000000004e-06, "loss": 0.33051300048828125, "step": 141 }, { "epoch": 0.019786804152441998, "grad_norm": 8.641013145446777, "learning_rate": 2.75390625e-06, "loss": 0.2651519775390625, "step": 142 }, { "epoch": 0.01992614784365638, "grad_norm": 7.278974533081055, "learning_rate": 2.7734375e-06, "loss": 0.32326507568359375, "step": 143 }, { "epoch": 0.02006549153487076, "grad_norm": 7.116968154907227, "learning_rate": 2.79296875e-06, "loss": 0.293487548828125, "step": 144 }, { "epoch": 0.020204835226085138, "grad_norm": 6.702129364013672, "learning_rate": 2.8125e-06, "loss": 0.2668609619140625, "step": 145 }, { "epoch": 0.02034417891729952, "grad_norm": 8.049309730529785, "learning_rate": 2.8320312500000002e-06, "loss": 0.2510986328125, "step": 146 }, { "epoch": 0.0204835226085139, "grad_norm": 6.790170192718506, "learning_rate": 2.8515625000000003e-06, "loss": 0.28633880615234375, "step": 147 }, { "epoch": 0.02062286629972828, "grad_norm": 11.673249244689941, "learning_rate": 2.8710937500000003e-06, "loss": 0.25943756103515625, "step": 148 }, { "epoch": 0.02076220999094266, "grad_norm": 5.931828022003174, "learning_rate": 2.8906250000000004e-06, "loss": 0.22537994384765625, "step": 149 }, { "epoch": 0.02090155368215704, "grad_norm": 6.256399631500244, "learning_rate": 2.9101562500000004e-06, "loss": 0.255706787109375, "step": 150 }, { "epoch": 0.02104089737337142, "grad_norm": 8.622085571289062, "learning_rate": 2.9296875e-06, "loss": 0.30789947509765625, "step": 151 }, { "epoch": 0.0211802410645858, "grad_norm": 5.119392395019531, "learning_rate": 2.94921875e-06, "loss": 0.29903411865234375, "step": 152 }, { "epoch": 0.021319584755800182, "grad_norm": 7.510827541351318, "learning_rate": 2.96875e-06, "loss": 0.20166015625, "step": 153 }, { "epoch": 0.021458928447014563, "grad_norm": 12.177742004394531, "learning_rate": 2.9882812500000002e-06, "loss": 0.26532745361328125, "step": 154 }, { "epoch": 0.02159827213822894, "grad_norm": 5.12837553024292, "learning_rate": 3.0078125000000003e-06, "loss": 0.25586700439453125, "step": 155 }, { "epoch": 0.02173761582944332, "grad_norm": 9.150864601135254, "learning_rate": 3.0273437500000003e-06, "loss": 0.251953125, "step": 156 }, { "epoch": 0.021876959520657702, "grad_norm": 9.116172790527344, "learning_rate": 3.0468750000000004e-06, "loss": 0.2472991943359375, "step": 157 }, { "epoch": 0.022016303211872083, "grad_norm": 5.955121994018555, "learning_rate": 3.0664062500000004e-06, "loss": 0.30332183837890625, "step": 158 }, { "epoch": 0.022155646903086464, "grad_norm": 4.61868143081665, "learning_rate": 3.0859375e-06, "loss": 0.22451019287109375, "step": 159 }, { "epoch": 0.022294990594300842, "grad_norm": 9.569432258605957, "learning_rate": 3.10546875e-06, "loss": 0.29358673095703125, "step": 160 }, { "epoch": 0.022434334285515223, "grad_norm": 11.118877410888672, "learning_rate": 3.125e-06, "loss": 0.29990386962890625, "step": 161 }, { "epoch": 0.022573677976729604, "grad_norm": 5.652851104736328, "learning_rate": 3.14453125e-06, "loss": 0.198089599609375, "step": 162 }, { "epoch": 0.022713021667943985, "grad_norm": 10.992175102233887, "learning_rate": 3.1640625000000003e-06, "loss": 0.25012969970703125, "step": 163 }, { "epoch": 0.022852365359158366, "grad_norm": 8.059642791748047, "learning_rate": 3.1835937500000003e-06, "loss": 0.19985198974609375, "step": 164 }, { "epoch": 0.022991709050372743, "grad_norm": 5.298708438873291, "learning_rate": 3.2031250000000004e-06, "loss": 0.233154296875, "step": 165 }, { "epoch": 0.023131052741587124, "grad_norm": 7.510090351104736, "learning_rate": 3.2226562500000004e-06, "loss": 0.26042938232421875, "step": 166 }, { "epoch": 0.023270396432801505, "grad_norm": 12.54176139831543, "learning_rate": 3.2421875000000005e-06, "loss": 0.23970794677734375, "step": 167 }, { "epoch": 0.023409740124015886, "grad_norm": 9.910327911376953, "learning_rate": 3.26171875e-06, "loss": 0.194091796875, "step": 168 }, { "epoch": 0.023549083815230267, "grad_norm": 10.536211967468262, "learning_rate": 3.28125e-06, "loss": 0.1996002197265625, "step": 169 }, { "epoch": 0.023688427506444645, "grad_norm": 4.7909722328186035, "learning_rate": 3.30078125e-06, "loss": 0.17577362060546875, "step": 170 }, { "epoch": 0.023827771197659026, "grad_norm": 7.563351631164551, "learning_rate": 3.3203125000000002e-06, "loss": 0.260284423828125, "step": 171 }, { "epoch": 0.023967114888873407, "grad_norm": 5.00044584274292, "learning_rate": 3.3398437500000003e-06, "loss": 0.19191741943359375, "step": 172 }, { "epoch": 0.024106458580087788, "grad_norm": 5.003511905670166, "learning_rate": 3.3593750000000003e-06, "loss": 0.20490264892578125, "step": 173 }, { "epoch": 0.024245802271302165, "grad_norm": 4.997885227203369, "learning_rate": 3.3789062500000004e-06, "loss": 0.2277984619140625, "step": 174 }, { "epoch": 0.024385145962516546, "grad_norm": 6.975695610046387, "learning_rate": 3.3984375000000004e-06, "loss": 0.277618408203125, "step": 175 }, { "epoch": 0.024524489653730927, "grad_norm": 7.626981258392334, "learning_rate": 3.41796875e-06, "loss": 0.23442840576171875, "step": 176 }, { "epoch": 0.024663833344945308, "grad_norm": 7.425158500671387, "learning_rate": 3.4375e-06, "loss": 0.19139862060546875, "step": 177 }, { "epoch": 0.02480317703615969, "grad_norm": 8.046871185302734, "learning_rate": 3.45703125e-06, "loss": 0.20842742919921875, "step": 178 }, { "epoch": 0.024942520727374067, "grad_norm": 5.006322860717773, "learning_rate": 3.4765625000000002e-06, "loss": 0.1876220703125, "step": 179 }, { "epoch": 0.025081864418588447, "grad_norm": 6.431712627410889, "learning_rate": 3.4960937500000003e-06, "loss": 0.1618499755859375, "step": 180 }, { "epoch": 0.02522120810980283, "grad_norm": 6.301774501800537, "learning_rate": 3.5156250000000003e-06, "loss": 0.22601318359375, "step": 181 }, { "epoch": 0.02536055180101721, "grad_norm": 7.458109378814697, "learning_rate": 3.5351562500000004e-06, "loss": 0.2382049560546875, "step": 182 }, { "epoch": 0.02549989549223159, "grad_norm": 5.9412407875061035, "learning_rate": 3.5546875000000004e-06, "loss": 0.3061676025390625, "step": 183 }, { "epoch": 0.025639239183445968, "grad_norm": 9.942524909973145, "learning_rate": 3.5742187500000005e-06, "loss": 0.2581939697265625, "step": 184 }, { "epoch": 0.02577858287466035, "grad_norm": 5.99011754989624, "learning_rate": 3.59375e-06, "loss": 0.21595001220703125, "step": 185 }, { "epoch": 0.02591792656587473, "grad_norm": 5.396662712097168, "learning_rate": 3.61328125e-06, "loss": 0.2552032470703125, "step": 186 }, { "epoch": 0.02605727025708911, "grad_norm": 4.9180192947387695, "learning_rate": 3.6328125e-06, "loss": 0.24720001220703125, "step": 187 }, { "epoch": 0.026196613948303492, "grad_norm": 10.721936225891113, "learning_rate": 3.6523437500000003e-06, "loss": 0.24346160888671875, "step": 188 }, { "epoch": 0.02633595763951787, "grad_norm": 3.0831849575042725, "learning_rate": 3.6718750000000003e-06, "loss": 0.19023895263671875, "step": 189 }, { "epoch": 0.02647530133073225, "grad_norm": 7.040544033050537, "learning_rate": 3.6914062500000004e-06, "loss": 0.24385833740234375, "step": 190 }, { "epoch": 0.02661464502194663, "grad_norm": 8.249387741088867, "learning_rate": 3.7109375000000004e-06, "loss": 0.2121734619140625, "step": 191 }, { "epoch": 0.026753988713161012, "grad_norm": 3.282736301422119, "learning_rate": 3.7304687500000005e-06, "loss": 0.27854156494140625, "step": 192 }, { "epoch": 0.026893332404375393, "grad_norm": 8.653287887573242, "learning_rate": 3.7500000000000005e-06, "loss": 0.31970977783203125, "step": 193 }, { "epoch": 0.02703267609558977, "grad_norm": 3.9707181453704834, "learning_rate": 3.76953125e-06, "loss": 0.19870758056640625, "step": 194 }, { "epoch": 0.02717201978680415, "grad_norm": 8.208913803100586, "learning_rate": 3.7890625e-06, "loss": 0.249298095703125, "step": 195 }, { "epoch": 0.027311363478018533, "grad_norm": 10.77297592163086, "learning_rate": 3.8085937500000002e-06, "loss": 0.188812255859375, "step": 196 }, { "epoch": 0.027450707169232914, "grad_norm": 7.4710917472839355, "learning_rate": 3.828125000000001e-06, "loss": 0.2685089111328125, "step": 197 }, { "epoch": 0.027590050860447295, "grad_norm": 11.933334350585938, "learning_rate": 3.84765625e-06, "loss": 0.2162017822265625, "step": 198 }, { "epoch": 0.027729394551661672, "grad_norm": 12.923249244689941, "learning_rate": 3.8671875e-06, "loss": 0.2278594970703125, "step": 199 }, { "epoch": 0.027868738242876053, "grad_norm": 10.857962608337402, "learning_rate": 3.88671875e-06, "loss": 0.2580413818359375, "step": 200 }, { "epoch": 0.028008081934090434, "grad_norm": 16.118755340576172, "learning_rate": 3.90625e-06, "loss": 0.212188720703125, "step": 201 }, { "epoch": 0.028147425625304815, "grad_norm": 12.282231330871582, "learning_rate": 3.92578125e-06, "loss": 0.1568145751953125, "step": 202 }, { "epoch": 0.028286769316519196, "grad_norm": 6.135168075561523, "learning_rate": 3.9453125e-06, "loss": 0.15282058715820312, "step": 203 }, { "epoch": 0.028426113007733574, "grad_norm": 11.680459022521973, "learning_rate": 3.96484375e-06, "loss": 0.18939208984375, "step": 204 }, { "epoch": 0.028565456698947955, "grad_norm": 12.598455429077148, "learning_rate": 3.984375e-06, "loss": 0.22891998291015625, "step": 205 }, { "epoch": 0.028704800390162336, "grad_norm": 4.283128261566162, "learning_rate": 4.00390625e-06, "loss": 0.17500686645507812, "step": 206 }, { "epoch": 0.028844144081376717, "grad_norm": 5.539274215698242, "learning_rate": 4.0234375e-06, "loss": 0.21436309814453125, "step": 207 }, { "epoch": 0.028983487772591097, "grad_norm": 4.878520965576172, "learning_rate": 4.0429687500000004e-06, "loss": 0.2011260986328125, "step": 208 }, { "epoch": 0.029122831463805475, "grad_norm": 3.4016273021698, "learning_rate": 4.0625000000000005e-06, "loss": 0.1453857421875, "step": 209 }, { "epoch": 0.029262175155019856, "grad_norm": 7.130232810974121, "learning_rate": 4.0820312500000005e-06, "loss": 0.2510223388671875, "step": 210 }, { "epoch": 0.029401518846234237, "grad_norm": 5.583443641662598, "learning_rate": 4.101562500000001e-06, "loss": 0.175079345703125, "step": 211 }, { "epoch": 0.029540862537448618, "grad_norm": 8.579259872436523, "learning_rate": 4.121093750000001e-06, "loss": 0.2077789306640625, "step": 212 }, { "epoch": 0.029680206228663, "grad_norm": 9.357271194458008, "learning_rate": 4.140625000000001e-06, "loss": 0.23496246337890625, "step": 213 }, { "epoch": 0.029819549919877376, "grad_norm": 8.082529067993164, "learning_rate": 4.160156250000001e-06, "loss": 0.2576446533203125, "step": 214 }, { "epoch": 0.029958893611091757, "grad_norm": 6.871522426605225, "learning_rate": 4.1796875e-06, "loss": 0.26678466796875, "step": 215 }, { "epoch": 0.03009823730230614, "grad_norm": 8.732352256774902, "learning_rate": 4.19921875e-06, "loss": 0.2734375, "step": 216 }, { "epoch": 0.03023758099352052, "grad_norm": 4.361593723297119, "learning_rate": 4.21875e-06, "loss": 0.19646835327148438, "step": 217 }, { "epoch": 0.0303769246847349, "grad_norm": 3.8722517490386963, "learning_rate": 4.23828125e-06, "loss": 0.21826171875, "step": 218 }, { "epoch": 0.030516268375949278, "grad_norm": 10.531266212463379, "learning_rate": 4.2578125e-06, "loss": 0.18744659423828125, "step": 219 }, { "epoch": 0.03065561206716366, "grad_norm": 5.928267955780029, "learning_rate": 4.27734375e-06, "loss": 0.20369720458984375, "step": 220 }, { "epoch": 0.03079495575837804, "grad_norm": 5.380374908447266, "learning_rate": 4.296875e-06, "loss": 0.23256683349609375, "step": 221 }, { "epoch": 0.03093429944959242, "grad_norm": 13.21196174621582, "learning_rate": 4.31640625e-06, "loss": 0.299468994140625, "step": 222 }, { "epoch": 0.031073643140806798, "grad_norm": 16.001474380493164, "learning_rate": 4.3359375e-06, "loss": 0.27252197265625, "step": 223 }, { "epoch": 0.03121298683202118, "grad_norm": 7.9855265617370605, "learning_rate": 4.35546875e-06, "loss": 0.2083587646484375, "step": 224 }, { "epoch": 0.031352330523235564, "grad_norm": 8.146829605102539, "learning_rate": 4.3750000000000005e-06, "loss": 0.218780517578125, "step": 225 }, { "epoch": 0.03149167421444994, "grad_norm": 11.410808563232422, "learning_rate": 4.3945312500000005e-06, "loss": 0.19887542724609375, "step": 226 }, { "epoch": 0.03163101790566432, "grad_norm": 5.428371429443359, "learning_rate": 4.4140625000000006e-06, "loss": 0.18627166748046875, "step": 227 }, { "epoch": 0.0317703615968787, "grad_norm": 6.605254650115967, "learning_rate": 4.433593750000001e-06, "loss": 0.2442626953125, "step": 228 }, { "epoch": 0.03190970528809308, "grad_norm": 12.940912246704102, "learning_rate": 4.453125000000001e-06, "loss": 0.26593017578125, "step": 229 }, { "epoch": 0.032049048979307465, "grad_norm": 8.060370445251465, "learning_rate": 4.472656250000001e-06, "loss": 0.285125732421875, "step": 230 }, { "epoch": 0.03218839267052184, "grad_norm": 9.543622970581055, "learning_rate": 4.492187500000001e-06, "loss": 0.19443893432617188, "step": 231 }, { "epoch": 0.03232773636173622, "grad_norm": 14.069732666015625, "learning_rate": 4.51171875e-06, "loss": 0.26737213134765625, "step": 232 }, { "epoch": 0.032467080052950605, "grad_norm": 5.229206085205078, "learning_rate": 4.53125e-06, "loss": 0.18822479248046875, "step": 233 }, { "epoch": 0.03260642374416498, "grad_norm": 11.531126976013184, "learning_rate": 4.55078125e-06, "loss": 0.28722381591796875, "step": 234 }, { "epoch": 0.032745767435379367, "grad_norm": 14.438918113708496, "learning_rate": 4.5703125e-06, "loss": 0.22002792358398438, "step": 235 }, { "epoch": 0.032885111126593744, "grad_norm": 15.263252258300781, "learning_rate": 4.58984375e-06, "loss": 0.1923370361328125, "step": 236 }, { "epoch": 0.03302445481780812, "grad_norm": 10.053017616271973, "learning_rate": 4.609375e-06, "loss": 0.208648681640625, "step": 237 }, { "epoch": 0.033163798509022506, "grad_norm": 11.863564491271973, "learning_rate": 4.62890625e-06, "loss": 0.18483734130859375, "step": 238 }, { "epoch": 0.033303142200236883, "grad_norm": 6.273617744445801, "learning_rate": 4.6484375e-06, "loss": 0.13856887817382812, "step": 239 }, { "epoch": 0.03344248589145127, "grad_norm": 6.753890037536621, "learning_rate": 4.66796875e-06, "loss": 0.2239227294921875, "step": 240 }, { "epoch": 0.033581829582665645, "grad_norm": 7.47318172454834, "learning_rate": 4.6875000000000004e-06, "loss": 0.173309326171875, "step": 241 }, { "epoch": 0.03372117327388002, "grad_norm": 4.911384582519531, "learning_rate": 4.7070312500000005e-06, "loss": 0.17118072509765625, "step": 242 }, { "epoch": 0.03386051696509441, "grad_norm": 9.138935089111328, "learning_rate": 4.7265625000000005e-06, "loss": 0.249755859375, "step": 243 }, { "epoch": 0.033999860656308785, "grad_norm": 10.548995018005371, "learning_rate": 4.746093750000001e-06, "loss": 0.2490081787109375, "step": 244 }, { "epoch": 0.03413920434752317, "grad_norm": 6.172849178314209, "learning_rate": 4.765625000000001e-06, "loss": 0.20672607421875, "step": 245 }, { "epoch": 0.03427854803873755, "grad_norm": 2.821122169494629, "learning_rate": 4.785156250000001e-06, "loss": 0.14510345458984375, "step": 246 }, { "epoch": 0.034417891729951924, "grad_norm": 8.07036018371582, "learning_rate": 4.804687500000001e-06, "loss": 0.19968414306640625, "step": 247 }, { "epoch": 0.03455723542116631, "grad_norm": 12.376590728759766, "learning_rate": 4.824218750000001e-06, "loss": 0.21384429931640625, "step": 248 }, { "epoch": 0.034696579112380686, "grad_norm": 8.739068984985352, "learning_rate": 4.84375e-06, "loss": 0.2590293884277344, "step": 249 }, { "epoch": 0.034835922803595064, "grad_norm": 5.375982284545898, "learning_rate": 4.86328125e-06, "loss": 0.17487335205078125, "step": 250 }, { "epoch": 0.03497526649480945, "grad_norm": 8.094696044921875, "learning_rate": 4.8828125e-06, "loss": 0.20062255859375, "step": 251 }, { "epoch": 0.035114610186023826, "grad_norm": 12.7658052444458, "learning_rate": 4.90234375e-06, "loss": 0.2811126708984375, "step": 252 }, { "epoch": 0.03525395387723821, "grad_norm": 7.396042346954346, "learning_rate": 4.921875e-06, "loss": 0.23323822021484375, "step": 253 }, { "epoch": 0.03539329756845259, "grad_norm": 5.590040683746338, "learning_rate": 4.94140625e-06, "loss": 0.183624267578125, "step": 254 }, { "epoch": 0.035532641259666965, "grad_norm": 11.025784492492676, "learning_rate": 4.9609375e-06, "loss": 0.21396636962890625, "step": 255 }, { "epoch": 0.03567198495088135, "grad_norm": 16.25074577331543, "learning_rate": 4.98046875e-06, "loss": 0.242218017578125, "step": 256 }, { "epoch": 0.03581132864209573, "grad_norm": 16.830867767333984, "learning_rate": 5e-06, "loss": 0.234161376953125, "step": 257 }, { "epoch": 0.03595067233331011, "grad_norm": 6.311812400817871, "learning_rate": 5.0195312500000005e-06, "loss": 0.18854522705078125, "step": 258 }, { "epoch": 0.03609001602452449, "grad_norm": 8.201610565185547, "learning_rate": 5.0390625000000005e-06, "loss": 0.2245025634765625, "step": 259 }, { "epoch": 0.03622935971573887, "grad_norm": 7.92599630355835, "learning_rate": 5.0585937500000006e-06, "loss": 0.19482421875, "step": 260 }, { "epoch": 0.03636870340695325, "grad_norm": 7.615984916687012, "learning_rate": 5.078125000000001e-06, "loss": 0.20172119140625, "step": 261 }, { "epoch": 0.03650804709816763, "grad_norm": 5.913804531097412, "learning_rate": 5.097656250000001e-06, "loss": 0.226837158203125, "step": 262 }, { "epoch": 0.03664739078938201, "grad_norm": 3.5635664463043213, "learning_rate": 5.117187500000001e-06, "loss": 0.24306488037109375, "step": 263 }, { "epoch": 0.03678673448059639, "grad_norm": 6.681707382202148, "learning_rate": 5.136718750000001e-06, "loss": 0.2574920654296875, "step": 264 }, { "epoch": 0.03692607817181077, "grad_norm": 11.13838005065918, "learning_rate": 5.156250000000001e-06, "loss": 0.2767791748046875, "step": 265 }, { "epoch": 0.03706542186302515, "grad_norm": 12.875737190246582, "learning_rate": 5.17578125e-06, "loss": 0.17960357666015625, "step": 266 }, { "epoch": 0.03720476555423953, "grad_norm": 6.452615261077881, "learning_rate": 5.1953125e-06, "loss": 0.18991851806640625, "step": 267 }, { "epoch": 0.037344109245453914, "grad_norm": 18.703691482543945, "learning_rate": 5.21484375e-06, "loss": 0.26599884033203125, "step": 268 }, { "epoch": 0.03748345293666829, "grad_norm": 18.880165100097656, "learning_rate": 5.234375e-06, "loss": 0.20916748046875, "step": 269 }, { "epoch": 0.03762279662788267, "grad_norm": 11.504010200500488, "learning_rate": 5.25390625e-06, "loss": 0.19481658935546875, "step": 270 }, { "epoch": 0.037762140319097054, "grad_norm": 5.648892402648926, "learning_rate": 5.2734375e-06, "loss": 0.19671630859375, "step": 271 }, { "epoch": 0.03790148401031143, "grad_norm": 8.575284957885742, "learning_rate": 5.29296875e-06, "loss": 0.22154617309570312, "step": 272 }, { "epoch": 0.038040827701525816, "grad_norm": 12.991547584533691, "learning_rate": 5.3125e-06, "loss": 0.21489715576171875, "step": 273 }, { "epoch": 0.03818017139274019, "grad_norm": 15.245475769042969, "learning_rate": 5.3320312500000004e-06, "loss": 0.16384124755859375, "step": 274 }, { "epoch": 0.03831951508395457, "grad_norm": 9.1818265914917, "learning_rate": 5.3515625000000005e-06, "loss": 0.200103759765625, "step": 275 }, { "epoch": 0.038458858775168955, "grad_norm": 6.469560623168945, "learning_rate": 5.3710937500000005e-06, "loss": 0.14857864379882812, "step": 276 }, { "epoch": 0.03859820246638333, "grad_norm": 6.5204339027404785, "learning_rate": 5.390625000000001e-06, "loss": 0.21407699584960938, "step": 277 }, { "epoch": 0.03873754615759772, "grad_norm": 7.564596652984619, "learning_rate": 5.410156250000001e-06, "loss": 0.22911834716796875, "step": 278 }, { "epoch": 0.038876889848812095, "grad_norm": 3.6699390411376953, "learning_rate": 5.429687500000001e-06, "loss": 0.20662307739257812, "step": 279 }, { "epoch": 0.03901623354002647, "grad_norm": 3.7049479484558105, "learning_rate": 5.449218750000001e-06, "loss": 0.17403411865234375, "step": 280 }, { "epoch": 0.03915557723124086, "grad_norm": 9.19936466217041, "learning_rate": 5.468750000000001e-06, "loss": 0.2264862060546875, "step": 281 }, { "epoch": 0.039294920922455234, "grad_norm": 6.43047571182251, "learning_rate": 5.488281250000001e-06, "loss": 0.16980743408203125, "step": 282 }, { "epoch": 0.03943426461366962, "grad_norm": 5.538243770599365, "learning_rate": 5.5078125e-06, "loss": 0.2296295166015625, "step": 283 }, { "epoch": 0.039573608304883996, "grad_norm": 3.6688742637634277, "learning_rate": 5.52734375e-06, "loss": 0.3197059631347656, "step": 284 }, { "epoch": 0.039712951996098374, "grad_norm": 9.55119800567627, "learning_rate": 5.546875e-06, "loss": 0.21274948120117188, "step": 285 }, { "epoch": 0.03985229568731276, "grad_norm": 10.165766716003418, "learning_rate": 5.56640625e-06, "loss": 0.19806289672851562, "step": 286 }, { "epoch": 0.039991639378527136, "grad_norm": 5.315456867218018, "learning_rate": 5.5859375e-06, "loss": 0.2054901123046875, "step": 287 }, { "epoch": 0.04013098306974152, "grad_norm": 12.081915855407715, "learning_rate": 5.60546875e-06, "loss": 0.16115951538085938, "step": 288 }, { "epoch": 0.0402703267609559, "grad_norm": 16.739784240722656, "learning_rate": 5.625e-06, "loss": 0.2079925537109375, "step": 289 }, { "epoch": 0.040409670452170275, "grad_norm": 7.735535144805908, "learning_rate": 5.64453125e-06, "loss": 0.1976776123046875, "step": 290 }, { "epoch": 0.04054901414338466, "grad_norm": 7.442333221435547, "learning_rate": 5.6640625000000005e-06, "loss": 0.21815872192382812, "step": 291 }, { "epoch": 0.04068835783459904, "grad_norm": 7.271765232086182, "learning_rate": 5.6835937500000005e-06, "loss": 0.24643707275390625, "step": 292 }, { "epoch": 0.04082770152581342, "grad_norm": 7.956097602844238, "learning_rate": 5.7031250000000006e-06, "loss": 0.2848052978515625, "step": 293 }, { "epoch": 0.0409670452170278, "grad_norm": 4.749429702758789, "learning_rate": 5.722656250000001e-06, "loss": 0.19135284423828125, "step": 294 }, { "epoch": 0.04110638890824218, "grad_norm": 3.340813636779785, "learning_rate": 5.742187500000001e-06, "loss": 0.18774795532226562, "step": 295 }, { "epoch": 0.04124573259945656, "grad_norm": 6.049768447875977, "learning_rate": 5.761718750000001e-06, "loss": 0.24670028686523438, "step": 296 }, { "epoch": 0.04138507629067094, "grad_norm": 7.445785045623779, "learning_rate": 5.781250000000001e-06, "loss": 0.17401885986328125, "step": 297 }, { "epoch": 0.04152441998188532, "grad_norm": 5.823215961456299, "learning_rate": 5.800781250000001e-06, "loss": 0.147674560546875, "step": 298 }, { "epoch": 0.0416637636730997, "grad_norm": 8.716926574707031, "learning_rate": 5.820312500000001e-06, "loss": 0.15755844116210938, "step": 299 }, { "epoch": 0.04180310736431408, "grad_norm": 8.415566444396973, "learning_rate": 5.83984375e-06, "loss": 0.22275161743164062, "step": 300 }, { "epoch": 0.04194245105552846, "grad_norm": 7.372264862060547, "learning_rate": 5.859375e-06, "loss": 0.24066162109375, "step": 301 }, { "epoch": 0.04208179474674284, "grad_norm": 12.92746353149414, "learning_rate": 5.87890625e-06, "loss": 0.22229766845703125, "step": 302 }, { "epoch": 0.042221138437957224, "grad_norm": 6.724241256713867, "learning_rate": 5.8984375e-06, "loss": 0.17209243774414062, "step": 303 }, { "epoch": 0.0423604821291716, "grad_norm": 4.899860858917236, "learning_rate": 5.91796875e-06, "loss": 0.2611427307128906, "step": 304 }, { "epoch": 0.04249982582038598, "grad_norm": 10.653367042541504, "learning_rate": 5.9375e-06, "loss": 0.18952178955078125, "step": 305 }, { "epoch": 0.042639169511600364, "grad_norm": 14.222881317138672, "learning_rate": 5.95703125e-06, "loss": 0.2800445556640625, "step": 306 }, { "epoch": 0.04277851320281474, "grad_norm": 8.191673278808594, "learning_rate": 5.9765625000000004e-06, "loss": 0.21331405639648438, "step": 307 }, { "epoch": 0.042917856894029126, "grad_norm": 8.729677200317383, "learning_rate": 5.9960937500000005e-06, "loss": 0.1812591552734375, "step": 308 }, { "epoch": 0.0430572005852435, "grad_norm": 12.6878023147583, "learning_rate": 6.0156250000000005e-06, "loss": 0.12833023071289062, "step": 309 }, { "epoch": 0.04319654427645788, "grad_norm": 12.055069923400879, "learning_rate": 6.035156250000001e-06, "loss": 0.2475128173828125, "step": 310 }, { "epoch": 0.043335887967672265, "grad_norm": 8.646258354187012, "learning_rate": 6.054687500000001e-06, "loss": 0.20026397705078125, "step": 311 }, { "epoch": 0.04347523165888664, "grad_norm": 8.444477081298828, "learning_rate": 6.074218750000001e-06, "loss": 0.2686271667480469, "step": 312 }, { "epoch": 0.04361457535010103, "grad_norm": 13.501977920532227, "learning_rate": 6.093750000000001e-06, "loss": 0.19277572631835938, "step": 313 }, { "epoch": 0.043753919041315405, "grad_norm": 10.623396873474121, "learning_rate": 6.113281250000001e-06, "loss": 0.2085723876953125, "step": 314 }, { "epoch": 0.04389326273252978, "grad_norm": 5.287269592285156, "learning_rate": 6.132812500000001e-06, "loss": 0.20190811157226562, "step": 315 }, { "epoch": 0.04403260642374417, "grad_norm": 4.71015739440918, "learning_rate": 6.152343750000001e-06, "loss": 0.18700408935546875, "step": 316 }, { "epoch": 0.044171950114958544, "grad_norm": 6.860650539398193, "learning_rate": 6.171875e-06, "loss": 0.22278594970703125, "step": 317 }, { "epoch": 0.04431129380617293, "grad_norm": 5.241203784942627, "learning_rate": 6.19140625e-06, "loss": 0.20586776733398438, "step": 318 }, { "epoch": 0.044450637497387306, "grad_norm": 3.629643201828003, "learning_rate": 6.2109375e-06, "loss": 0.1564178466796875, "step": 319 }, { "epoch": 0.044589981188601684, "grad_norm": 3.9022133350372314, "learning_rate": 6.23046875e-06, "loss": 0.16471099853515625, "step": 320 }, { "epoch": 0.04472932487981607, "grad_norm": 3.641348123550415, "learning_rate": 6.25e-06, "loss": 0.17827224731445312, "step": 321 }, { "epoch": 0.044868668571030446, "grad_norm": 5.800506591796875, "learning_rate": 6.26953125e-06, "loss": 0.18242645263671875, "step": 322 }, { "epoch": 0.04500801226224483, "grad_norm": 8.100198745727539, "learning_rate": 6.2890625e-06, "loss": 0.21911239624023438, "step": 323 }, { "epoch": 0.04514735595345921, "grad_norm": 4.407508850097656, "learning_rate": 6.3085937500000005e-06, "loss": 0.14146041870117188, "step": 324 }, { "epoch": 0.045286699644673585, "grad_norm": 4.387955665588379, "learning_rate": 6.3281250000000005e-06, "loss": 0.201141357421875, "step": 325 }, { "epoch": 0.04542604333588797, "grad_norm": 5.672333717346191, "learning_rate": 6.3476562500000006e-06, "loss": 0.21506118774414062, "step": 326 }, { "epoch": 0.04556538702710235, "grad_norm": 6.685797214508057, "learning_rate": 6.367187500000001e-06, "loss": 0.14650344848632812, "step": 327 }, { "epoch": 0.04570473071831673, "grad_norm": 5.835655212402344, "learning_rate": 6.386718750000001e-06, "loss": 0.20204544067382812, "step": 328 }, { "epoch": 0.04584407440953111, "grad_norm": 10.985638618469238, "learning_rate": 6.406250000000001e-06, "loss": 0.20668792724609375, "step": 329 }, { "epoch": 0.045983418100745486, "grad_norm": 3.5662968158721924, "learning_rate": 6.425781250000001e-06, "loss": 0.20897293090820312, "step": 330 }, { "epoch": 0.04612276179195987, "grad_norm": 8.491086959838867, "learning_rate": 6.445312500000001e-06, "loss": 0.198333740234375, "step": 331 }, { "epoch": 0.04626210548317425, "grad_norm": 9.383012771606445, "learning_rate": 6.464843750000001e-06, "loss": 0.19866943359375, "step": 332 }, { "epoch": 0.04640144917438863, "grad_norm": 3.667117118835449, "learning_rate": 6.484375000000001e-06, "loss": 0.17930221557617188, "step": 333 }, { "epoch": 0.04654079286560301, "grad_norm": 9.549654006958008, "learning_rate": 6.50390625e-06, "loss": 0.18783187866210938, "step": 334 }, { "epoch": 0.04668013655681739, "grad_norm": 9.378718376159668, "learning_rate": 6.5234375e-06, "loss": 0.21389007568359375, "step": 335 }, { "epoch": 0.04681948024803177, "grad_norm": 3.6718270778656006, "learning_rate": 6.54296875e-06, "loss": 0.16433334350585938, "step": 336 }, { "epoch": 0.04695882393924615, "grad_norm": 8.30208683013916, "learning_rate": 6.5625e-06, "loss": 0.15383148193359375, "step": 337 }, { "epoch": 0.047098167630460534, "grad_norm": 6.12687349319458, "learning_rate": 6.58203125e-06, "loss": 0.19197845458984375, "step": 338 }, { "epoch": 0.04723751132167491, "grad_norm": 4.246114730834961, "learning_rate": 6.6015625e-06, "loss": 0.15600967407226562, "step": 339 }, { "epoch": 0.04737685501288929, "grad_norm": 6.350311756134033, "learning_rate": 6.6210937500000004e-06, "loss": 0.153350830078125, "step": 340 }, { "epoch": 0.047516198704103674, "grad_norm": 12.99950122833252, "learning_rate": 6.6406250000000005e-06, "loss": 0.205596923828125, "step": 341 }, { "epoch": 0.04765554239531805, "grad_norm": 6.263738632202148, "learning_rate": 6.6601562500000005e-06, "loss": 0.21688079833984375, "step": 342 }, { "epoch": 0.047794886086532436, "grad_norm": 4.854628086090088, "learning_rate": 6.679687500000001e-06, "loss": 0.18871307373046875, "step": 343 }, { "epoch": 0.04793422977774681, "grad_norm": 14.280949592590332, "learning_rate": 6.699218750000001e-06, "loss": 0.19455718994140625, "step": 344 }, { "epoch": 0.04807357346896119, "grad_norm": 11.356968879699707, "learning_rate": 6.718750000000001e-06, "loss": 0.18689727783203125, "step": 345 }, { "epoch": 0.048212917160175575, "grad_norm": 3.978911876678467, "learning_rate": 6.738281250000001e-06, "loss": 0.15298843383789062, "step": 346 }, { "epoch": 0.04835226085138995, "grad_norm": 5.336716175079346, "learning_rate": 6.757812500000001e-06, "loss": 0.24925994873046875, "step": 347 }, { "epoch": 0.04849160454260433, "grad_norm": 7.439921855926514, "learning_rate": 6.777343750000001e-06, "loss": 0.18851470947265625, "step": 348 }, { "epoch": 0.048630948233818715, "grad_norm": 2.5345380306243896, "learning_rate": 6.796875000000001e-06, "loss": 0.22794342041015625, "step": 349 }, { "epoch": 0.04877029192503309, "grad_norm": 3.799032211303711, "learning_rate": 6.816406250000001e-06, "loss": 0.17238998413085938, "step": 350 }, { "epoch": 0.04890963561624748, "grad_norm": 4.733077049255371, "learning_rate": 6.8359375e-06, "loss": 0.18795013427734375, "step": 351 }, { "epoch": 0.049048979307461854, "grad_norm": 3.7592523097991943, "learning_rate": 6.85546875e-06, "loss": 0.152984619140625, "step": 352 }, { "epoch": 0.04918832299867623, "grad_norm": 3.8418686389923096, "learning_rate": 6.875e-06, "loss": 0.13797378540039062, "step": 353 }, { "epoch": 0.049327666689890616, "grad_norm": 5.929385185241699, "learning_rate": 6.89453125e-06, "loss": 0.2133026123046875, "step": 354 }, { "epoch": 0.049467010381104994, "grad_norm": 2.355891704559326, "learning_rate": 6.9140625e-06, "loss": 0.16080856323242188, "step": 355 }, { "epoch": 0.04960635407231938, "grad_norm": 5.134737014770508, "learning_rate": 6.93359375e-06, "loss": 0.21912765502929688, "step": 356 }, { "epoch": 0.049745697763533755, "grad_norm": 11.238104820251465, "learning_rate": 6.9531250000000004e-06, "loss": 0.24190902709960938, "step": 357 }, { "epoch": 0.04988504145474813, "grad_norm": 3.637293815612793, "learning_rate": 6.9726562500000005e-06, "loss": 0.17470932006835938, "step": 358 }, { "epoch": 0.05002438514596252, "grad_norm": 3.5990424156188965, "learning_rate": 6.9921875000000006e-06, "loss": 0.175628662109375, "step": 359 }, { "epoch": 0.050163728837176895, "grad_norm": 5.14459753036499, "learning_rate": 7.011718750000001e-06, "loss": 0.2607154846191406, "step": 360 }, { "epoch": 0.05030307252839128, "grad_norm": 9.052400588989258, "learning_rate": 7.031250000000001e-06, "loss": 0.1967010498046875, "step": 361 }, { "epoch": 0.05044241621960566, "grad_norm": 9.941020965576172, "learning_rate": 7.050781250000001e-06, "loss": 0.22502899169921875, "step": 362 }, { "epoch": 0.050581759910820034, "grad_norm": 3.8782129287719727, "learning_rate": 7.070312500000001e-06, "loss": 0.20290374755859375, "step": 363 }, { "epoch": 0.05072110360203442, "grad_norm": 10.779797554016113, "learning_rate": 7.089843750000001e-06, "loss": 0.18623733520507812, "step": 364 }, { "epoch": 0.050860447293248796, "grad_norm": 11.733384132385254, "learning_rate": 7.109375000000001e-06, "loss": 0.161285400390625, "step": 365 }, { "epoch": 0.05099979098446318, "grad_norm": 8.263612747192383, "learning_rate": 7.128906250000001e-06, "loss": 0.2001190185546875, "step": 366 }, { "epoch": 0.05113913467567756, "grad_norm": 6.811421871185303, "learning_rate": 7.148437500000001e-06, "loss": 0.18634414672851562, "step": 367 }, { "epoch": 0.051278478366891936, "grad_norm": 6.302134037017822, "learning_rate": 7.16796875e-06, "loss": 0.19775390625, "step": 368 }, { "epoch": 0.05141782205810632, "grad_norm": 6.280002593994141, "learning_rate": 7.1875e-06, "loss": 0.23797225952148438, "step": 369 }, { "epoch": 0.0515571657493207, "grad_norm": 7.855819225311279, "learning_rate": 7.20703125e-06, "loss": 0.24396514892578125, "step": 370 }, { "epoch": 0.05169650944053508, "grad_norm": 8.376664161682129, "learning_rate": 7.2265625e-06, "loss": 0.2071380615234375, "step": 371 }, { "epoch": 0.05183585313174946, "grad_norm": 1.8263567686080933, "learning_rate": 7.24609375e-06, "loss": 0.127532958984375, "step": 372 }, { "epoch": 0.05197519682296384, "grad_norm": 6.014217376708984, "learning_rate": 7.265625e-06, "loss": 0.21414947509765625, "step": 373 }, { "epoch": 0.05211454051417822, "grad_norm": 7.24362850189209, "learning_rate": 7.2851562500000005e-06, "loss": 0.21315383911132812, "step": 374 }, { "epoch": 0.0522538842053926, "grad_norm": 8.956491470336914, "learning_rate": 7.3046875000000005e-06, "loss": 0.22388076782226562, "step": 375 }, { "epoch": 0.052393227896606984, "grad_norm": 2.913290023803711, "learning_rate": 7.3242187500000006e-06, "loss": 0.21420669555664062, "step": 376 }, { "epoch": 0.05253257158782136, "grad_norm": 2.4282419681549072, "learning_rate": 7.343750000000001e-06, "loss": 0.1826629638671875, "step": 377 }, { "epoch": 0.05267191527903574, "grad_norm": 5.580237865447998, "learning_rate": 7.363281250000001e-06, "loss": 0.19495582580566406, "step": 378 }, { "epoch": 0.05281125897025012, "grad_norm": 9.234643936157227, "learning_rate": 7.382812500000001e-06, "loss": 0.2589988708496094, "step": 379 }, { "epoch": 0.0529506026614645, "grad_norm": 9.315163612365723, "learning_rate": 7.402343750000001e-06, "loss": 0.192169189453125, "step": 380 }, { "epoch": 0.053089946352678885, "grad_norm": 6.904824256896973, "learning_rate": 7.421875000000001e-06, "loss": 0.1942596435546875, "step": 381 }, { "epoch": 0.05322929004389326, "grad_norm": 5.819599151611328, "learning_rate": 7.441406250000001e-06, "loss": 0.2034149169921875, "step": 382 }, { "epoch": 0.05336863373510764, "grad_norm": 7.070249557495117, "learning_rate": 7.460937500000001e-06, "loss": 0.18907546997070312, "step": 383 }, { "epoch": 0.053507977426322025, "grad_norm": 4.002919673919678, "learning_rate": 7.480468750000001e-06, "loss": 0.17665863037109375, "step": 384 }, { "epoch": 0.0536473211175364, "grad_norm": 8.137691497802734, "learning_rate": 7.500000000000001e-06, "loss": 0.15872955322265625, "step": 385 }, { "epoch": 0.053786664808750786, "grad_norm": 6.550365447998047, "learning_rate": 7.51953125e-06, "loss": 0.2095489501953125, "step": 386 }, { "epoch": 0.053926008499965164, "grad_norm": 4.5294389724731445, "learning_rate": 7.5390625e-06, "loss": 0.18274688720703125, "step": 387 }, { "epoch": 0.05406535219117954, "grad_norm": 6.622796058654785, "learning_rate": 7.55859375e-06, "loss": 0.2691307067871094, "step": 388 }, { "epoch": 0.054204695882393926, "grad_norm": 11.813982009887695, "learning_rate": 7.578125e-06, "loss": 0.16804122924804688, "step": 389 }, { "epoch": 0.0543440395736083, "grad_norm": 9.020718574523926, "learning_rate": 7.5976562500000004e-06, "loss": 0.20658493041992188, "step": 390 }, { "epoch": 0.05448338326482269, "grad_norm": 5.736223220825195, "learning_rate": 7.6171875000000005e-06, "loss": 0.1911163330078125, "step": 391 }, { "epoch": 0.054622726956037065, "grad_norm": 8.085708618164062, "learning_rate": 7.63671875e-06, "loss": 0.20000839233398438, "step": 392 }, { "epoch": 0.05476207064725144, "grad_norm": 4.880776882171631, "learning_rate": 7.656250000000001e-06, "loss": 0.20056915283203125, "step": 393 }, { "epoch": 0.05490141433846583, "grad_norm": 8.802689552307129, "learning_rate": 7.67578125e-06, "loss": 0.2212371826171875, "step": 394 }, { "epoch": 0.055040758029680205, "grad_norm": 5.32822847366333, "learning_rate": 7.6953125e-06, "loss": 0.20833587646484375, "step": 395 }, { "epoch": 0.05518010172089459, "grad_norm": 5.311394214630127, "learning_rate": 7.71484375e-06, "loss": 0.16143417358398438, "step": 396 }, { "epoch": 0.05531944541210897, "grad_norm": 7.011579990386963, "learning_rate": 7.734375e-06, "loss": 0.20888519287109375, "step": 397 }, { "epoch": 0.055458789103323344, "grad_norm": 4.251484394073486, "learning_rate": 7.753906250000001e-06, "loss": 0.19412612915039062, "step": 398 }, { "epoch": 0.05559813279453773, "grad_norm": 6.236745357513428, "learning_rate": 7.7734375e-06, "loss": 0.17551803588867188, "step": 399 }, { "epoch": 0.055737476485752106, "grad_norm": 2.8424301147460938, "learning_rate": 7.792968750000001e-06, "loss": 0.13372039794921875, "step": 400 }, { "epoch": 0.05587682017696649, "grad_norm": 13.106990814208984, "learning_rate": 7.8125e-06, "loss": 0.26494598388671875, "step": 401 }, { "epoch": 0.05601616386818087, "grad_norm": 7.45327091217041, "learning_rate": 7.832031250000001e-06, "loss": 0.15591812133789062, "step": 402 }, { "epoch": 0.056155507559395246, "grad_norm": 3.310678005218506, "learning_rate": 7.8515625e-06, "loss": 0.21547317504882812, "step": 403 }, { "epoch": 0.05629485125060963, "grad_norm": 2.6378138065338135, "learning_rate": 7.871093750000001e-06, "loss": 0.15465545654296875, "step": 404 }, { "epoch": 0.05643419494182401, "grad_norm": 7.475671291351318, "learning_rate": 7.890625e-06, "loss": 0.20846939086914062, "step": 405 }, { "epoch": 0.05657353863303839, "grad_norm": 4.460272789001465, "learning_rate": 7.910156250000001e-06, "loss": 0.17292022705078125, "step": 406 }, { "epoch": 0.05671288232425277, "grad_norm": 4.678399085998535, "learning_rate": 7.9296875e-06, "loss": 0.20997238159179688, "step": 407 }, { "epoch": 0.05685222601546715, "grad_norm": 4.171482563018799, "learning_rate": 7.949218750000001e-06, "loss": 0.17902374267578125, "step": 408 }, { "epoch": 0.05699156970668153, "grad_norm": 2.5478250980377197, "learning_rate": 7.96875e-06, "loss": 0.15472793579101562, "step": 409 }, { "epoch": 0.05713091339789591, "grad_norm": 6.540135860443115, "learning_rate": 7.988281250000001e-06, "loss": 0.16460418701171875, "step": 410 }, { "epoch": 0.057270257089110294, "grad_norm": 9.569079399108887, "learning_rate": 8.0078125e-06, "loss": 0.2089691162109375, "step": 411 }, { "epoch": 0.05740960078032467, "grad_norm": 8.057820320129395, "learning_rate": 8.02734375e-06, "loss": 0.2007293701171875, "step": 412 }, { "epoch": 0.05754894447153905, "grad_norm": 3.2956995964050293, "learning_rate": 8.046875e-06, "loss": 0.18453216552734375, "step": 413 }, { "epoch": 0.05768828816275343, "grad_norm": 8.563572883605957, "learning_rate": 8.06640625e-06, "loss": 0.2124481201171875, "step": 414 }, { "epoch": 0.05782763185396781, "grad_norm": 10.516339302062988, "learning_rate": 8.085937500000001e-06, "loss": 0.197418212890625, "step": 415 }, { "epoch": 0.057966975545182195, "grad_norm": 4.367123603820801, "learning_rate": 8.10546875e-06, "loss": 0.1580047607421875, "step": 416 }, { "epoch": 0.05810631923639657, "grad_norm": 6.939394950866699, "learning_rate": 8.125000000000001e-06, "loss": 0.19604873657226562, "step": 417 }, { "epoch": 0.05824566292761095, "grad_norm": 4.036535739898682, "learning_rate": 8.14453125e-06, "loss": 0.23574066162109375, "step": 418 }, { "epoch": 0.058385006618825334, "grad_norm": 5.493677139282227, "learning_rate": 8.164062500000001e-06, "loss": 0.178924560546875, "step": 419 }, { "epoch": 0.05852435031003971, "grad_norm": 3.2795250415802, "learning_rate": 8.18359375e-06, "loss": 0.18146896362304688, "step": 420 }, { "epoch": 0.058663694001254096, "grad_norm": 2.1183974742889404, "learning_rate": 8.203125000000001e-06, "loss": 0.17180252075195312, "step": 421 }, { "epoch": 0.058803037692468474, "grad_norm": 4.210144996643066, "learning_rate": 8.22265625e-06, "loss": 0.15643692016601562, "step": 422 }, { "epoch": 0.05894238138368285, "grad_norm": 7.841027736663818, "learning_rate": 8.242187500000001e-06, "loss": 0.19511795043945312, "step": 423 }, { "epoch": 0.059081725074897236, "grad_norm": 8.519875526428223, "learning_rate": 8.26171875e-06, "loss": 0.21895408630371094, "step": 424 }, { "epoch": 0.05922106876611161, "grad_norm": 7.189184665679932, "learning_rate": 8.281250000000001e-06, "loss": 0.26677703857421875, "step": 425 }, { "epoch": 0.059360412457326, "grad_norm": 7.022824764251709, "learning_rate": 8.30078125e-06, "loss": 0.2862701416015625, "step": 426 }, { "epoch": 0.059499756148540375, "grad_norm": 4.680766582489014, "learning_rate": 8.320312500000001e-06, "loss": 0.1960296630859375, "step": 427 }, { "epoch": 0.05963909983975475, "grad_norm": 3.4621596336364746, "learning_rate": 8.33984375e-06, "loss": 0.20432281494140625, "step": 428 }, { "epoch": 0.05977844353096914, "grad_norm": 6.2614288330078125, "learning_rate": 8.359375e-06, "loss": 0.21722412109375, "step": 429 }, { "epoch": 0.059917787222183515, "grad_norm": 5.86396598815918, "learning_rate": 8.37890625e-06, "loss": 0.13425064086914062, "step": 430 }, { "epoch": 0.0600571309133979, "grad_norm": 5.701080322265625, "learning_rate": 8.3984375e-06, "loss": 0.17905426025390625, "step": 431 }, { "epoch": 0.06019647460461228, "grad_norm": 5.430967807769775, "learning_rate": 8.417968750000001e-06, "loss": 0.16657638549804688, "step": 432 }, { "epoch": 0.060335818295826654, "grad_norm": 2.490443229675293, "learning_rate": 8.4375e-06, "loss": 0.159698486328125, "step": 433 }, { "epoch": 0.06047516198704104, "grad_norm": 8.332354545593262, "learning_rate": 8.457031250000001e-06, "loss": 0.24555206298828125, "step": 434 }, { "epoch": 0.060614505678255416, "grad_norm": 10.76302719116211, "learning_rate": 8.4765625e-06, "loss": 0.168060302734375, "step": 435 }, { "epoch": 0.0607538493694698, "grad_norm": 5.50603723526001, "learning_rate": 8.496093750000001e-06, "loss": 0.1828460693359375, "step": 436 }, { "epoch": 0.06089319306068418, "grad_norm": 3.6275269985198975, "learning_rate": 8.515625e-06, "loss": 0.17604827880859375, "step": 437 }, { "epoch": 0.061032536751898556, "grad_norm": 4.240610599517822, "learning_rate": 8.535156250000001e-06, "loss": 0.15715408325195312, "step": 438 }, { "epoch": 0.06117188044311294, "grad_norm": 2.4557454586029053, "learning_rate": 8.5546875e-06, "loss": 0.15205764770507812, "step": 439 }, { "epoch": 0.06131122413432732, "grad_norm": 10.334798812866211, "learning_rate": 8.574218750000001e-06, "loss": 0.21110916137695312, "step": 440 }, { "epoch": 0.0614505678255417, "grad_norm": 11.098021507263184, "learning_rate": 8.59375e-06, "loss": 0.175628662109375, "step": 441 }, { "epoch": 0.06158991151675608, "grad_norm": 8.24803352355957, "learning_rate": 8.613281250000001e-06, "loss": 0.21450042724609375, "step": 442 }, { "epoch": 0.06172925520797046, "grad_norm": 12.86413288116455, "learning_rate": 8.6328125e-06, "loss": 0.21487808227539062, "step": 443 }, { "epoch": 0.06186859889918484, "grad_norm": 13.730978965759277, "learning_rate": 8.652343750000002e-06, "loss": 0.22037506103515625, "step": 444 }, { "epoch": 0.06200794259039922, "grad_norm": 13.76801586151123, "learning_rate": 8.671875e-06, "loss": 0.25601959228515625, "step": 445 }, { "epoch": 0.062147286281613597, "grad_norm": 2.6724305152893066, "learning_rate": 8.69140625e-06, "loss": 0.160980224609375, "step": 446 }, { "epoch": 0.06228662997282798, "grad_norm": 4.903120994567871, "learning_rate": 8.7109375e-06, "loss": 0.18418502807617188, "step": 447 }, { "epoch": 0.06242597366404236, "grad_norm": 9.746091842651367, "learning_rate": 8.73046875e-06, "loss": 0.18193435668945312, "step": 448 }, { "epoch": 0.06256531735525674, "grad_norm": 9.004277229309082, "learning_rate": 8.750000000000001e-06, "loss": 0.18455886840820312, "step": 449 }, { "epoch": 0.06270466104647113, "grad_norm": 8.048401832580566, "learning_rate": 8.76953125e-06, "loss": 0.2685737609863281, "step": 450 }, { "epoch": 0.0628440047376855, "grad_norm": 2.675717830657959, "learning_rate": 8.789062500000001e-06, "loss": 0.2340240478515625, "step": 451 }, { "epoch": 0.06298334842889988, "grad_norm": 6.432124137878418, "learning_rate": 8.80859375e-06, "loss": 0.22728729248046875, "step": 452 }, { "epoch": 0.06312269212011426, "grad_norm": 3.5375213623046875, "learning_rate": 8.828125000000001e-06, "loss": 0.17244338989257812, "step": 453 }, { "epoch": 0.06326203581132864, "grad_norm": 2.555880308151245, "learning_rate": 8.84765625e-06, "loss": 0.154327392578125, "step": 454 }, { "epoch": 0.06340137950254303, "grad_norm": 4.404837131500244, "learning_rate": 8.867187500000001e-06, "loss": 0.15210342407226562, "step": 455 }, { "epoch": 0.0635407231937574, "grad_norm": 6.336852550506592, "learning_rate": 8.88671875e-06, "loss": 0.15602874755859375, "step": 456 }, { "epoch": 0.06368006688497178, "grad_norm": 5.0978899002075195, "learning_rate": 8.906250000000001e-06, "loss": 0.1670989990234375, "step": 457 }, { "epoch": 0.06381941057618616, "grad_norm": 5.322371006011963, "learning_rate": 8.92578125e-06, "loss": 0.20893478393554688, "step": 458 }, { "epoch": 0.06395875426740054, "grad_norm": 7.107874870300293, "learning_rate": 8.945312500000001e-06, "loss": 0.16304397583007812, "step": 459 }, { "epoch": 0.06409809795861493, "grad_norm": 3.0915629863739014, "learning_rate": 8.96484375e-06, "loss": 0.20317459106445312, "step": 460 }, { "epoch": 0.06423744164982931, "grad_norm": 8.380990982055664, "learning_rate": 8.984375000000002e-06, "loss": 0.21990203857421875, "step": 461 }, { "epoch": 0.06437678534104369, "grad_norm": 10.563145637512207, "learning_rate": 9.00390625e-06, "loss": 0.1810302734375, "step": 462 }, { "epoch": 0.06451612903225806, "grad_norm": 7.550082683563232, "learning_rate": 9.0234375e-06, "loss": 0.15771102905273438, "step": 463 }, { "epoch": 0.06465547272347244, "grad_norm": 1.6156595945358276, "learning_rate": 9.042968750000001e-06, "loss": 0.13830947875976562, "step": 464 }, { "epoch": 0.06479481641468683, "grad_norm": 4.606024742126465, "learning_rate": 9.0625e-06, "loss": 0.14937973022460938, "step": 465 }, { "epoch": 0.06493416010590121, "grad_norm": 4.131673336029053, "learning_rate": 9.082031250000001e-06, "loss": 0.19379806518554688, "step": 466 }, { "epoch": 0.06507350379711559, "grad_norm": 5.7675700187683105, "learning_rate": 9.1015625e-06, "loss": 0.223236083984375, "step": 467 }, { "epoch": 0.06521284748832996, "grad_norm": 1.337154746055603, "learning_rate": 9.121093750000001e-06, "loss": 0.15113067626953125, "step": 468 }, { "epoch": 0.06535219117954434, "grad_norm": 2.0882773399353027, "learning_rate": 9.140625e-06, "loss": 0.15967178344726562, "step": 469 }, { "epoch": 0.06549153487075873, "grad_norm": 1.8022133111953735, "learning_rate": 9.160156250000001e-06, "loss": 0.14054107666015625, "step": 470 }, { "epoch": 0.06563087856197311, "grad_norm": 6.093435287475586, "learning_rate": 9.1796875e-06, "loss": 0.20096969604492188, "step": 471 }, { "epoch": 0.06577022225318749, "grad_norm": 2.456252336502075, "learning_rate": 9.199218750000001e-06, "loss": 0.2001190185546875, "step": 472 }, { "epoch": 0.06590956594440187, "grad_norm": 3.423647880554199, "learning_rate": 9.21875e-06, "loss": 0.21169281005859375, "step": 473 }, { "epoch": 0.06604890963561624, "grad_norm": 3.1430046558380127, "learning_rate": 9.238281250000001e-06, "loss": 0.1676025390625, "step": 474 }, { "epoch": 0.06618825332683063, "grad_norm": 5.441662311553955, "learning_rate": 9.2578125e-06, "loss": 0.2104034423828125, "step": 475 }, { "epoch": 0.06632759701804501, "grad_norm": 2.8514034748077393, "learning_rate": 9.277343750000001e-06, "loss": 0.14615249633789062, "step": 476 }, { "epoch": 0.06646694070925939, "grad_norm": 2.598968029022217, "learning_rate": 9.296875e-06, "loss": 0.176513671875, "step": 477 }, { "epoch": 0.06660628440047377, "grad_norm": 5.906002521514893, "learning_rate": 9.316406250000002e-06, "loss": 0.14185333251953125, "step": 478 }, { "epoch": 0.06674562809168814, "grad_norm": 2.5905001163482666, "learning_rate": 9.3359375e-06, "loss": 0.21894073486328125, "step": 479 }, { "epoch": 0.06688497178290254, "grad_norm": 6.7835588455200195, "learning_rate": 9.35546875e-06, "loss": 0.229522705078125, "step": 480 }, { "epoch": 0.06702431547411691, "grad_norm": 9.340956687927246, "learning_rate": 9.375000000000001e-06, "loss": 0.22647857666015625, "step": 481 }, { "epoch": 0.06716365916533129, "grad_norm": 7.054582595825195, "learning_rate": 9.39453125e-06, "loss": 0.18571090698242188, "step": 482 }, { "epoch": 0.06730300285654567, "grad_norm": 4.589204788208008, "learning_rate": 9.414062500000001e-06, "loss": 0.234375, "step": 483 }, { "epoch": 0.06744234654776005, "grad_norm": 4.205818176269531, "learning_rate": 9.43359375e-06, "loss": 0.18556976318359375, "step": 484 }, { "epoch": 0.06758169023897444, "grad_norm": 6.2703399658203125, "learning_rate": 9.453125000000001e-06, "loss": 0.20261764526367188, "step": 485 }, { "epoch": 0.06772103393018881, "grad_norm": 4.759555339813232, "learning_rate": 9.47265625e-06, "loss": 0.173187255859375, "step": 486 }, { "epoch": 0.06786037762140319, "grad_norm": 5.38169527053833, "learning_rate": 9.492187500000001e-06, "loss": 0.15093231201171875, "step": 487 }, { "epoch": 0.06799972131261757, "grad_norm": 3.3303937911987305, "learning_rate": 9.51171875e-06, "loss": 0.15619659423828125, "step": 488 }, { "epoch": 0.06813906500383195, "grad_norm": 5.266639709472656, "learning_rate": 9.531250000000001e-06, "loss": 0.15703201293945312, "step": 489 }, { "epoch": 0.06827840869504634, "grad_norm": 8.590157508850098, "learning_rate": 9.55078125e-06, "loss": 0.17977142333984375, "step": 490 }, { "epoch": 0.06841775238626072, "grad_norm": 4.0041608810424805, "learning_rate": 9.570312500000001e-06, "loss": 0.18603897094726562, "step": 491 }, { "epoch": 0.0685570960774751, "grad_norm": 5.099331855773926, "learning_rate": 9.58984375e-06, "loss": 0.17690277099609375, "step": 492 }, { "epoch": 0.06869643976868947, "grad_norm": 4.631252288818359, "learning_rate": 9.609375000000001e-06, "loss": 0.15470504760742188, "step": 493 }, { "epoch": 0.06883578345990385, "grad_norm": 3.3139827251434326, "learning_rate": 9.62890625e-06, "loss": 0.24722671508789062, "step": 494 }, { "epoch": 0.06897512715111823, "grad_norm": 10.17294979095459, "learning_rate": 9.648437500000002e-06, "loss": 0.17094039916992188, "step": 495 }, { "epoch": 0.06911447084233262, "grad_norm": 8.600760459899902, "learning_rate": 9.66796875e-06, "loss": 0.15645599365234375, "step": 496 }, { "epoch": 0.069253814533547, "grad_norm": 3.2307612895965576, "learning_rate": 9.6875e-06, "loss": 0.126129150390625, "step": 497 }, { "epoch": 0.06939315822476137, "grad_norm": 3.730618953704834, "learning_rate": 9.707031250000001e-06, "loss": 0.16046905517578125, "step": 498 }, { "epoch": 0.06953250191597575, "grad_norm": 7.172944068908691, "learning_rate": 9.7265625e-06, "loss": 0.18846893310546875, "step": 499 }, { "epoch": 0.06967184560719013, "grad_norm": 6.139108657836914, "learning_rate": 9.746093750000001e-06, "loss": 0.196014404296875, "step": 500 }, { "epoch": 0.06981118929840452, "grad_norm": 2.986083745956421, "learning_rate": 9.765625e-06, "loss": 0.16774368286132812, "step": 501 }, { "epoch": 0.0699505329896189, "grad_norm": 8.3906831741333, "learning_rate": 9.785156250000001e-06, "loss": 0.24935150146484375, "step": 502 }, { "epoch": 0.07008987668083327, "grad_norm": 10.382150650024414, "learning_rate": 9.8046875e-06, "loss": 0.20114517211914062, "step": 503 }, { "epoch": 0.07022922037204765, "grad_norm": 7.373335361480713, "learning_rate": 9.824218750000001e-06, "loss": 0.18650054931640625, "step": 504 }, { "epoch": 0.07036856406326203, "grad_norm": 4.047264099121094, "learning_rate": 9.84375e-06, "loss": 0.18671798706054688, "step": 505 }, { "epoch": 0.07050790775447642, "grad_norm": 5.2397894859313965, "learning_rate": 9.863281250000001e-06, "loss": 0.19576263427734375, "step": 506 }, { "epoch": 0.0706472514456908, "grad_norm": 4.469723701477051, "learning_rate": 9.8828125e-06, "loss": 0.1782684326171875, "step": 507 }, { "epoch": 0.07078659513690518, "grad_norm": 3.7904586791992188, "learning_rate": 9.902343750000001e-06, "loss": 0.17147064208984375, "step": 508 }, { "epoch": 0.07092593882811955, "grad_norm": 3.291221857070923, "learning_rate": 9.921875e-06, "loss": 0.15186691284179688, "step": 509 }, { "epoch": 0.07106528251933393, "grad_norm": 4.658570766448975, "learning_rate": 9.941406250000002e-06, "loss": 0.1779327392578125, "step": 510 }, { "epoch": 0.07120462621054832, "grad_norm": 7.447791576385498, "learning_rate": 9.9609375e-06, "loss": 0.234161376953125, "step": 511 }, { "epoch": 0.0713439699017627, "grad_norm": 5.6031622886657715, "learning_rate": 9.980468750000002e-06, "loss": 0.20190048217773438, "step": 512 }, { "epoch": 0.07148331359297708, "grad_norm": 6.096009254455566, "learning_rate": 1e-05, "loss": 0.19103240966796875, "step": 513 }, { "epoch": 0.07162265728419145, "grad_norm": 3.0966591835021973, "learning_rate": 9.999999444557077e-06, "loss": 0.14989471435546875, "step": 514 }, { "epoch": 0.07176200097540583, "grad_norm": 3.983635902404785, "learning_rate": 9.999997778228428e-06, "loss": 0.17371368408203125, "step": 515 }, { "epoch": 0.07190134466662022, "grad_norm": 3.728212594985962, "learning_rate": 9.999995001014424e-06, "loss": 0.19709014892578125, "step": 516 }, { "epoch": 0.0720406883578346, "grad_norm": 3.3078575134277344, "learning_rate": 9.999991112915685e-06, "loss": 0.19537734985351562, "step": 517 }, { "epoch": 0.07218003204904898, "grad_norm": 4.042120456695557, "learning_rate": 9.999986113933071e-06, "loss": 0.22626113891601562, "step": 518 }, { "epoch": 0.07231937574026336, "grad_norm": 4.309176445007324, "learning_rate": 9.999980004067694e-06, "loss": 0.21327972412109375, "step": 519 }, { "epoch": 0.07245871943147773, "grad_norm": 2.5453925132751465, "learning_rate": 9.99997278332091e-06, "loss": 0.17152023315429688, "step": 520 }, { "epoch": 0.07259806312269212, "grad_norm": 3.373051404953003, "learning_rate": 9.999964451694328e-06, "loss": 0.20768356323242188, "step": 521 }, { "epoch": 0.0727374068139065, "grad_norm": 1.6267839670181274, "learning_rate": 9.999955009189795e-06, "loss": 0.14057540893554688, "step": 522 }, { "epoch": 0.07287675050512088, "grad_norm": 2.297797203063965, "learning_rate": 9.999944455809408e-06, "loss": 0.21090316772460938, "step": 523 }, { "epoch": 0.07301609419633526, "grad_norm": 3.6441867351531982, "learning_rate": 9.999932791555516e-06, "loss": 0.1936798095703125, "step": 524 }, { "epoch": 0.07315543788754963, "grad_norm": 5.50390100479126, "learning_rate": 9.999920016430706e-06, "loss": 0.22728729248046875, "step": 525 }, { "epoch": 0.07329478157876403, "grad_norm": 4.811466693878174, "learning_rate": 9.99990613043782e-06, "loss": 0.2032318115234375, "step": 526 }, { "epoch": 0.0734341252699784, "grad_norm": 2.5745627880096436, "learning_rate": 9.999891133579941e-06, "loss": 0.1411285400390625, "step": 527 }, { "epoch": 0.07357346896119278, "grad_norm": 2.580009698867798, "learning_rate": 9.999875025860401e-06, "loss": 0.17952728271484375, "step": 528 }, { "epoch": 0.07371281265240716, "grad_norm": 2.978564739227295, "learning_rate": 9.99985780728278e-06, "loss": 0.2535438537597656, "step": 529 }, { "epoch": 0.07385215634362154, "grad_norm": 5.499250888824463, "learning_rate": 9.999839477850903e-06, "loss": 0.181884765625, "step": 530 }, { "epoch": 0.07399150003483593, "grad_norm": 1.548425555229187, "learning_rate": 9.999820037568844e-06, "loss": 0.16544342041015625, "step": 531 }, { "epoch": 0.0741308437260503, "grad_norm": 7.499763488769531, "learning_rate": 9.999799486440917e-06, "loss": 0.203857421875, "step": 532 }, { "epoch": 0.07427018741726468, "grad_norm": 5.2831010818481445, "learning_rate": 9.999777824471694e-06, "loss": 0.148529052734375, "step": 533 }, { "epoch": 0.07440953110847906, "grad_norm": 2.051800012588501, "learning_rate": 9.999755051665985e-06, "loss": 0.14984893798828125, "step": 534 }, { "epoch": 0.07454887479969344, "grad_norm": 3.309114456176758, "learning_rate": 9.99973116802885e-06, "loss": 0.17547607421875, "step": 535 }, { "epoch": 0.07468821849090783, "grad_norm": 4.319460391998291, "learning_rate": 9.999706173565594e-06, "loss": 0.14223480224609375, "step": 536 }, { "epoch": 0.0748275621821222, "grad_norm": 5.41981840133667, "learning_rate": 9.999680068281773e-06, "loss": 0.1790618896484375, "step": 537 }, { "epoch": 0.07496690587333658, "grad_norm": 5.5593132972717285, "learning_rate": 9.999652852183184e-06, "loss": 0.2380523681640625, "step": 538 }, { "epoch": 0.07510624956455096, "grad_norm": 9.490010261535645, "learning_rate": 9.999624525275875e-06, "loss": 0.21530914306640625, "step": 539 }, { "epoch": 0.07524559325576534, "grad_norm": 11.765621185302734, "learning_rate": 9.99959508756614e-06, "loss": 0.24469757080078125, "step": 540 }, { "epoch": 0.07538493694697973, "grad_norm": 4.416570663452148, "learning_rate": 9.99956453906052e-06, "loss": 0.15856170654296875, "step": 541 }, { "epoch": 0.07552428063819411, "grad_norm": 2.1854686737060547, "learning_rate": 9.999532879765801e-06, "loss": 0.18609619140625, "step": 542 }, { "epoch": 0.07566362432940849, "grad_norm": 3.0698037147521973, "learning_rate": 9.999500109689018e-06, "loss": 0.19289398193359375, "step": 543 }, { "epoch": 0.07580296802062286, "grad_norm": 5.008580684661865, "learning_rate": 9.999466228837452e-06, "loss": 0.15303802490234375, "step": 544 }, { "epoch": 0.07594231171183724, "grad_norm": 7.515979290008545, "learning_rate": 9.999431237218629e-06, "loss": 0.18787765502929688, "step": 545 }, { "epoch": 0.07608165540305163, "grad_norm": 11.646830558776855, "learning_rate": 9.999395134840323e-06, "loss": 0.2394561767578125, "step": 546 }, { "epoch": 0.07622099909426601, "grad_norm": 2.614711046218872, "learning_rate": 9.999357921710557e-06, "loss": 0.14289474487304688, "step": 547 }, { "epoch": 0.07636034278548039, "grad_norm": 7.5944671630859375, "learning_rate": 9.999319597837599e-06, "loss": 0.2001819610595703, "step": 548 }, { "epoch": 0.07649968647669476, "grad_norm": 6.567246913909912, "learning_rate": 9.99928016322996e-06, "loss": 0.1969146728515625, "step": 549 }, { "epoch": 0.07663903016790914, "grad_norm": 5.220876693725586, "learning_rate": 9.999239617896406e-06, "loss": 0.20954132080078125, "step": 550 }, { "epoch": 0.07677837385912353, "grad_norm": 2.0921339988708496, "learning_rate": 9.999197961845943e-06, "loss": 0.18413543701171875, "step": 551 }, { "epoch": 0.07691771755033791, "grad_norm": 1.7068170309066772, "learning_rate": 9.999155195087826e-06, "loss": 0.17445755004882812, "step": 552 }, { "epoch": 0.07705706124155229, "grad_norm": 4.979001045227051, "learning_rate": 9.999111317631559e-06, "loss": 0.18991851806640625, "step": 553 }, { "epoch": 0.07719640493276667, "grad_norm": 7.079829692840576, "learning_rate": 9.999066329486888e-06, "loss": 0.22924041748046875, "step": 554 }, { "epoch": 0.07733574862398104, "grad_norm": 1.1873546838760376, "learning_rate": 9.999020230663809e-06, "loss": 0.12067794799804688, "step": 555 }, { "epoch": 0.07747509231519543, "grad_norm": 3.5458695888519287, "learning_rate": 9.998973021172564e-06, "loss": 0.18756866455078125, "step": 556 }, { "epoch": 0.07761443600640981, "grad_norm": 5.245108127593994, "learning_rate": 9.998924701023645e-06, "loss": 0.18675994873046875, "step": 557 }, { "epoch": 0.07775377969762419, "grad_norm": 3.454397439956665, "learning_rate": 9.998875270227781e-06, "loss": 0.18753814697265625, "step": 558 }, { "epoch": 0.07789312338883857, "grad_norm": 4.763269901275635, "learning_rate": 9.99882472879596e-06, "loss": 0.143157958984375, "step": 559 }, { "epoch": 0.07803246708005294, "grad_norm": 4.470091819763184, "learning_rate": 9.998773076739409e-06, "loss": 0.20722198486328125, "step": 560 }, { "epoch": 0.07817181077126734, "grad_norm": 1.9104300737380981, "learning_rate": 9.998720314069606e-06, "loss": 0.1394500732421875, "step": 561 }, { "epoch": 0.07831115446248171, "grad_norm": 3.7981483936309814, "learning_rate": 9.99866644079827e-06, "loss": 0.218505859375, "step": 562 }, { "epoch": 0.07845049815369609, "grad_norm": 3.7664265632629395, "learning_rate": 9.998611456937373e-06, "loss": 0.21532821655273438, "step": 563 }, { "epoch": 0.07858984184491047, "grad_norm": 4.017005443572998, "learning_rate": 9.99855536249913e-06, "loss": 0.23566436767578125, "step": 564 }, { "epoch": 0.07872918553612485, "grad_norm": 2.132524251937866, "learning_rate": 9.998498157496004e-06, "loss": 0.17549514770507812, "step": 565 }, { "epoch": 0.07886852922733924, "grad_norm": 1.6924238204956055, "learning_rate": 9.998439841940706e-06, "loss": 0.15088272094726562, "step": 566 }, { "epoch": 0.07900787291855361, "grad_norm": 1.640000820159912, "learning_rate": 9.998380415846191e-06, "loss": 0.18236160278320312, "step": 567 }, { "epoch": 0.07914721660976799, "grad_norm": 8.163175582885742, "learning_rate": 9.998319879225662e-06, "loss": 0.2305450439453125, "step": 568 }, { "epoch": 0.07928656030098237, "grad_norm": 7.568604946136475, "learning_rate": 9.998258232092571e-06, "loss": 0.19678878784179688, "step": 569 }, { "epoch": 0.07942590399219675, "grad_norm": 3.1746184825897217, "learning_rate": 9.998195474460613e-06, "loss": 0.1623992919921875, "step": 570 }, { "epoch": 0.07956524768341114, "grad_norm": 5.533189296722412, "learning_rate": 9.998131606343729e-06, "loss": 0.1870574951171875, "step": 571 }, { "epoch": 0.07970459137462552, "grad_norm": 3.458726167678833, "learning_rate": 9.998066627756113e-06, "loss": 0.1801910400390625, "step": 572 }, { "epoch": 0.0798439350658399, "grad_norm": 5.429698467254639, "learning_rate": 9.9980005387122e-06, "loss": 0.19110488891601562, "step": 573 }, { "epoch": 0.07998327875705427, "grad_norm": 5.166138172149658, "learning_rate": 9.997933339226675e-06, "loss": 0.21079635620117188, "step": 574 }, { "epoch": 0.08012262244826865, "grad_norm": 3.687854051589966, "learning_rate": 9.997865029314464e-06, "loss": 0.2083740234375, "step": 575 }, { "epoch": 0.08026196613948304, "grad_norm": 5.215585231781006, "learning_rate": 9.997795608990749e-06, "loss": 0.2561454772949219, "step": 576 }, { "epoch": 0.08040130983069742, "grad_norm": 3.923853635787964, "learning_rate": 9.99772507827095e-06, "loss": 0.1728668212890625, "step": 577 }, { "epoch": 0.0805406535219118, "grad_norm": 2.2903122901916504, "learning_rate": 9.997653437170739e-06, "loss": 0.17037200927734375, "step": 578 }, { "epoch": 0.08067999721312617, "grad_norm": 4.252383708953857, "learning_rate": 9.997580685706032e-06, "loss": 0.1731719970703125, "step": 579 }, { "epoch": 0.08081934090434055, "grad_norm": 7.948487281799316, "learning_rate": 9.997506823892993e-06, "loss": 0.18291473388671875, "step": 580 }, { "epoch": 0.08095868459555494, "grad_norm": 3.969212055206299, "learning_rate": 9.997431851748034e-06, "loss": 0.18587875366210938, "step": 581 }, { "epoch": 0.08109802828676932, "grad_norm": 2.3562281131744385, "learning_rate": 9.99735576928781e-06, "loss": 0.15253067016601562, "step": 582 }, { "epoch": 0.0812373719779837, "grad_norm": 4.49169397354126, "learning_rate": 9.997278576529228e-06, "loss": 0.20842742919921875, "step": 583 }, { "epoch": 0.08137671566919807, "grad_norm": 2.065708875656128, "learning_rate": 9.997200273489434e-06, "loss": 0.14508438110351562, "step": 584 }, { "epoch": 0.08151605936041245, "grad_norm": 3.3022587299346924, "learning_rate": 9.997120860185827e-06, "loss": 0.1490936279296875, "step": 585 }, { "epoch": 0.08165540305162684, "grad_norm": 4.3835344314575195, "learning_rate": 9.997040336636052e-06, "loss": 0.16361618041992188, "step": 586 }, { "epoch": 0.08179474674284122, "grad_norm": 3.0222368240356445, "learning_rate": 9.996958702857997e-06, "loss": 0.1806201934814453, "step": 587 }, { "epoch": 0.0819340904340556, "grad_norm": 3.157742977142334, "learning_rate": 9.996875958869803e-06, "loss": 0.14906692504882812, "step": 588 }, { "epoch": 0.08207343412526998, "grad_norm": 3.924682140350342, "learning_rate": 9.996792104689849e-06, "loss": 0.15850830078125, "step": 589 }, { "epoch": 0.08221277781648435, "grad_norm": 4.597708702087402, "learning_rate": 9.99670714033677e-06, "loss": 0.165618896484375, "step": 590 }, { "epoch": 0.08235212150769874, "grad_norm": 2.366647958755493, "learning_rate": 9.996621065829442e-06, "loss": 0.15864181518554688, "step": 591 }, { "epoch": 0.08249146519891312, "grad_norm": 2.951780080795288, "learning_rate": 9.996533881186986e-06, "loss": 0.15915298461914062, "step": 592 }, { "epoch": 0.0826308088901275, "grad_norm": 3.3547708988189697, "learning_rate": 9.996445586428776e-06, "loss": 0.16376495361328125, "step": 593 }, { "epoch": 0.08277015258134188, "grad_norm": 3.1398110389709473, "learning_rate": 9.996356181574425e-06, "loss": 0.1568603515625, "step": 594 }, { "epoch": 0.08290949627255625, "grad_norm": 2.5357296466827393, "learning_rate": 9.9962656666438e-06, "loss": 0.17844009399414062, "step": 595 }, { "epoch": 0.08304883996377065, "grad_norm": 1.100768804550171, "learning_rate": 9.996174041657012e-06, "loss": 0.1395263671875, "step": 596 }, { "epoch": 0.08318818365498502, "grad_norm": 6.432078838348389, "learning_rate": 9.996081306634416e-06, "loss": 0.174072265625, "step": 597 }, { "epoch": 0.0833275273461994, "grad_norm": 4.193227767944336, "learning_rate": 9.995987461596617e-06, "loss": 0.14044570922851562, "step": 598 }, { "epoch": 0.08346687103741378, "grad_norm": 4.913259983062744, "learning_rate": 9.995892506564461e-06, "loss": 0.1835174560546875, "step": 599 }, { "epoch": 0.08360621472862816, "grad_norm": 3.1273033618927, "learning_rate": 9.995796441559052e-06, "loss": 0.16754531860351562, "step": 600 }, { "epoch": 0.08374555841984255, "grad_norm": 3.9639272689819336, "learning_rate": 9.995699266601728e-06, "loss": 0.16668319702148438, "step": 601 }, { "epoch": 0.08388490211105692, "grad_norm": 8.07323169708252, "learning_rate": 9.995600981714082e-06, "loss": 0.18786239624023438, "step": 602 }, { "epoch": 0.0840242458022713, "grad_norm": 9.337714195251465, "learning_rate": 9.995501586917949e-06, "loss": 0.20465087890625, "step": 603 }, { "epoch": 0.08416358949348568, "grad_norm": 4.9304046630859375, "learning_rate": 9.99540108223541e-06, "loss": 0.164276123046875, "step": 604 }, { "epoch": 0.08430293318470006, "grad_norm": 4.349210739135742, "learning_rate": 9.9952994676888e-06, "loss": 0.13516616821289062, "step": 605 }, { "epoch": 0.08444227687591445, "grad_norm": 5.290085792541504, "learning_rate": 9.995196743300693e-06, "loss": 0.21987152099609375, "step": 606 }, { "epoch": 0.08458162056712883, "grad_norm": 2.6547696590423584, "learning_rate": 9.995092909093911e-06, "loss": 0.17817306518554688, "step": 607 }, { "epoch": 0.0847209642583432, "grad_norm": 2.4031152725219727, "learning_rate": 9.994987965091525e-06, "loss": 0.15894317626953125, "step": 608 }, { "epoch": 0.08486030794955758, "grad_norm": 1.776401162147522, "learning_rate": 9.994881911316849e-06, "loss": 0.17289352416992188, "step": 609 }, { "epoch": 0.08499965164077196, "grad_norm": 1.651786208152771, "learning_rate": 9.99477474779345e-06, "loss": 0.16566848754882812, "step": 610 }, { "epoch": 0.08513899533198635, "grad_norm": 3.390427589416504, "learning_rate": 9.994666474545133e-06, "loss": 0.17387008666992188, "step": 611 }, { "epoch": 0.08527833902320073, "grad_norm": 6.343812942504883, "learning_rate": 9.994557091595956e-06, "loss": 0.19684600830078125, "step": 612 }, { "epoch": 0.0854176827144151, "grad_norm": 5.05409049987793, "learning_rate": 9.99444659897022e-06, "loss": 0.1547374725341797, "step": 613 }, { "epoch": 0.08555702640562948, "grad_norm": 3.912553310394287, "learning_rate": 9.994334996692476e-06, "loss": 0.19105911254882812, "step": 614 }, { "epoch": 0.08569637009684386, "grad_norm": 5.252843856811523, "learning_rate": 9.994222284787519e-06, "loss": 0.2208251953125, "step": 615 }, { "epoch": 0.08583571378805825, "grad_norm": 5.346504211425781, "learning_rate": 9.99410846328039e-06, "loss": 0.17065811157226562, "step": 616 }, { "epoch": 0.08597505747927263, "grad_norm": 10.575658798217773, "learning_rate": 9.993993532196376e-06, "loss": 0.18361282348632812, "step": 617 }, { "epoch": 0.086114401170487, "grad_norm": 3.8102147579193115, "learning_rate": 9.993877491561015e-06, "loss": 0.17013168334960938, "step": 618 }, { "epoch": 0.08625374486170138, "grad_norm": 3.472377061843872, "learning_rate": 9.99376034140009e-06, "loss": 0.16974639892578125, "step": 619 }, { "epoch": 0.08639308855291576, "grad_norm": 6.625734806060791, "learning_rate": 9.993642081739623e-06, "loss": 0.16272354125976562, "step": 620 }, { "epoch": 0.08653243224413015, "grad_norm": 4.911305904388428, "learning_rate": 9.993522712605895e-06, "loss": 0.17259597778320312, "step": 621 }, { "epoch": 0.08667177593534453, "grad_norm": 1.7151514291763306, "learning_rate": 9.993402234025422e-06, "loss": 0.17232131958007812, "step": 622 }, { "epoch": 0.08681111962655891, "grad_norm": 6.854325294494629, "learning_rate": 9.993280646024975e-06, "loss": 0.22336959838867188, "step": 623 }, { "epoch": 0.08695046331777329, "grad_norm": 6.254745006561279, "learning_rate": 9.993157948631566e-06, "loss": 0.20981979370117188, "step": 624 }, { "epoch": 0.08708980700898766, "grad_norm": 3.5822505950927734, "learning_rate": 9.993034141872459e-06, "loss": 0.22006607055664062, "step": 625 }, { "epoch": 0.08722915070020205, "grad_norm": 2.855529308319092, "learning_rate": 9.992909225775157e-06, "loss": 0.17668533325195312, "step": 626 }, { "epoch": 0.08736849439141643, "grad_norm": 5.444334030151367, "learning_rate": 9.992783200367414e-06, "loss": 0.21399688720703125, "step": 627 }, { "epoch": 0.08750783808263081, "grad_norm": 5.308550834655762, "learning_rate": 9.992656065677234e-06, "loss": 0.21529388427734375, "step": 628 }, { "epoch": 0.08764718177384519, "grad_norm": 2.2706310749053955, "learning_rate": 9.992527821732858e-06, "loss": 0.12792587280273438, "step": 629 }, { "epoch": 0.08778652546505956, "grad_norm": 3.8455920219421387, "learning_rate": 9.992398468562782e-06, "loss": 0.14516067504882812, "step": 630 }, { "epoch": 0.08792586915627396, "grad_norm": 9.332847595214844, "learning_rate": 9.992268006195744e-06, "loss": 0.20294570922851562, "step": 631 }, { "epoch": 0.08806521284748833, "grad_norm": 2.212047815322876, "learning_rate": 9.992136434660733e-06, "loss": 0.184173583984375, "step": 632 }, { "epoch": 0.08820455653870271, "grad_norm": 3.0197126865386963, "learning_rate": 9.992003753986976e-06, "loss": 0.17195510864257812, "step": 633 }, { "epoch": 0.08834390022991709, "grad_norm": 4.9717559814453125, "learning_rate": 9.991869964203955e-06, "loss": 0.16509628295898438, "step": 634 }, { "epoch": 0.08848324392113147, "grad_norm": 2.398054599761963, "learning_rate": 9.991735065341394e-06, "loss": 0.17297744750976562, "step": 635 }, { "epoch": 0.08862258761234586, "grad_norm": 1.9059274196624756, "learning_rate": 9.991599057429266e-06, "loss": 0.14867591857910156, "step": 636 }, { "epoch": 0.08876193130356023, "grad_norm": 3.059579610824585, "learning_rate": 9.991461940497786e-06, "loss": 0.23725128173828125, "step": 637 }, { "epoch": 0.08890127499477461, "grad_norm": 1.6452951431274414, "learning_rate": 9.991323714577421e-06, "loss": 0.16901397705078125, "step": 638 }, { "epoch": 0.08904061868598899, "grad_norm": 2.3424570560455322, "learning_rate": 9.99118437969888e-06, "loss": 0.17339324951171875, "step": 639 }, { "epoch": 0.08917996237720337, "grad_norm": 2.355879545211792, "learning_rate": 9.99104393589312e-06, "loss": 0.17688369750976562, "step": 640 }, { "epoch": 0.08931930606841776, "grad_norm": 5.29227876663208, "learning_rate": 9.990902383191346e-06, "loss": 0.21146774291992188, "step": 641 }, { "epoch": 0.08945864975963214, "grad_norm": 3.3369734287261963, "learning_rate": 9.990759721625005e-06, "loss": 0.15349578857421875, "step": 642 }, { "epoch": 0.08959799345084651, "grad_norm": 3.3968403339385986, "learning_rate": 9.990615951225797e-06, "loss": 0.15186309814453125, "step": 643 }, { "epoch": 0.08973733714206089, "grad_norm": 5.1413397789001465, "learning_rate": 9.99047107202566e-06, "loss": 0.21112060546875, "step": 644 }, { "epoch": 0.08987668083327527, "grad_norm": 4.930606365203857, "learning_rate": 9.990325084056787e-06, "loss": 0.177093505859375, "step": 645 }, { "epoch": 0.09001602452448966, "grad_norm": 2.1512656211853027, "learning_rate": 9.99017798735161e-06, "loss": 0.15178298950195312, "step": 646 }, { "epoch": 0.09015536821570404, "grad_norm": 3.3072588443756104, "learning_rate": 9.990029781942814e-06, "loss": 0.13821029663085938, "step": 647 }, { "epoch": 0.09029471190691842, "grad_norm": 6.650473117828369, "learning_rate": 9.989880467863323e-06, "loss": 0.195709228515625, "step": 648 }, { "epoch": 0.09043405559813279, "grad_norm": 3.438976287841797, "learning_rate": 9.989730045146313e-06, "loss": 0.18338775634765625, "step": 649 }, { "epoch": 0.09057339928934717, "grad_norm": 3.9721262454986572, "learning_rate": 9.989578513825205e-06, "loss": 0.25785064697265625, "step": 650 }, { "epoch": 0.09071274298056156, "grad_norm": 6.768970966339111, "learning_rate": 9.989425873933666e-06, "loss": 0.17303466796875, "step": 651 }, { "epoch": 0.09085208667177594, "grad_norm": 3.0254292488098145, "learning_rate": 9.989272125505606e-06, "loss": 0.16353225708007812, "step": 652 }, { "epoch": 0.09099143036299032, "grad_norm": 2.4457924365997314, "learning_rate": 9.98911726857519e-06, "loss": 0.15834808349609375, "step": 653 }, { "epoch": 0.0911307740542047, "grad_norm": 4.907850742340088, "learning_rate": 9.988961303176818e-06, "loss": 0.18992996215820312, "step": 654 }, { "epoch": 0.09127011774541907, "grad_norm": 7.051778316497803, "learning_rate": 9.988804229345146e-06, "loss": 0.1858654022216797, "step": 655 }, { "epoch": 0.09140946143663346, "grad_norm": 7.793381690979004, "learning_rate": 9.98864604711507e-06, "loss": 0.17379188537597656, "step": 656 }, { "epoch": 0.09154880512784784, "grad_norm": 2.4904375076293945, "learning_rate": 9.988486756521733e-06, "loss": 0.2183685302734375, "step": 657 }, { "epoch": 0.09168814881906222, "grad_norm": 2.9155523777008057, "learning_rate": 9.98832635760053e-06, "loss": 0.12981414794921875, "step": 658 }, { "epoch": 0.0918274925102766, "grad_norm": 3.4159719944000244, "learning_rate": 9.988164850387095e-06, "loss": 0.19886016845703125, "step": 659 }, { "epoch": 0.09196683620149097, "grad_norm": 3.2588229179382324, "learning_rate": 9.988002234917312e-06, "loss": 0.2501678466796875, "step": 660 }, { "epoch": 0.09210617989270536, "grad_norm": 3.704847812652588, "learning_rate": 9.987838511227311e-06, "loss": 0.16958999633789062, "step": 661 }, { "epoch": 0.09224552358391974, "grad_norm": 3.765031099319458, "learning_rate": 9.987673679353467e-06, "loss": 0.17704391479492188, "step": 662 }, { "epoch": 0.09238486727513412, "grad_norm": 2.5993337631225586, "learning_rate": 9.987507739332401e-06, "loss": 0.1848316192626953, "step": 663 }, { "epoch": 0.0925242109663485, "grad_norm": 1.6149803400039673, "learning_rate": 9.987340691200984e-06, "loss": 0.13819122314453125, "step": 664 }, { "epoch": 0.09266355465756287, "grad_norm": 2.8460187911987305, "learning_rate": 9.987172534996326e-06, "loss": 0.16202163696289062, "step": 665 }, { "epoch": 0.09280289834877727, "grad_norm": 1.403602957725525, "learning_rate": 9.98700327075579e-06, "loss": 0.12182235717773438, "step": 666 }, { "epoch": 0.09294224203999164, "grad_norm": 2.642812728881836, "learning_rate": 9.986832898516985e-06, "loss": 0.17470359802246094, "step": 667 }, { "epoch": 0.09308158573120602, "grad_norm": 3.1116037368774414, "learning_rate": 9.986661418317759e-06, "loss": 0.2016754150390625, "step": 668 }, { "epoch": 0.0932209294224204, "grad_norm": 2.5352230072021484, "learning_rate": 9.986488830196215e-06, "loss": 0.153717041015625, "step": 669 }, { "epoch": 0.09336027311363478, "grad_norm": 2.6041183471679688, "learning_rate": 9.986315134190694e-06, "loss": 0.12366104125976562, "step": 670 }, { "epoch": 0.09349961680484917, "grad_norm": 4.167490482330322, "learning_rate": 9.98614033033979e-06, "loss": 0.21541213989257812, "step": 671 }, { "epoch": 0.09363896049606354, "grad_norm": 2.031244993209839, "learning_rate": 9.985964418682342e-06, "loss": 0.19457244873046875, "step": 672 }, { "epoch": 0.09377830418727792, "grad_norm": 3.0501058101654053, "learning_rate": 9.985787399257431e-06, "loss": 0.16075515747070312, "step": 673 }, { "epoch": 0.0939176478784923, "grad_norm": 6.145337104797363, "learning_rate": 9.985609272104387e-06, "loss": 0.24637603759765625, "step": 674 }, { "epoch": 0.09405699156970668, "grad_norm": 2.146080732345581, "learning_rate": 9.985430037262787e-06, "loss": 0.23146438598632812, "step": 675 }, { "epoch": 0.09419633526092107, "grad_norm": 3.026611804962158, "learning_rate": 9.98524969477245e-06, "loss": 0.17021560668945312, "step": 676 }, { "epoch": 0.09433567895213545, "grad_norm": 3.580148220062256, "learning_rate": 9.985068244673449e-06, "loss": 0.13362503051757812, "step": 677 }, { "epoch": 0.09447502264334982, "grad_norm": 2.649965524673462, "learning_rate": 9.984885687006093e-06, "loss": 0.23088836669921875, "step": 678 }, { "epoch": 0.0946143663345642, "grad_norm": 3.231034517288208, "learning_rate": 9.984702021810944e-06, "loss": 0.2088775634765625, "step": 679 }, { "epoch": 0.09475371002577858, "grad_norm": 2.632511854171753, "learning_rate": 9.98451724912881e-06, "loss": 0.17505264282226562, "step": 680 }, { "epoch": 0.09489305371699297, "grad_norm": 2.258728265762329, "learning_rate": 9.984331369000739e-06, "loss": 0.15288543701171875, "step": 681 }, { "epoch": 0.09503239740820735, "grad_norm": 3.5411365032196045, "learning_rate": 9.984144381468035e-06, "loss": 0.1732940673828125, "step": 682 }, { "epoch": 0.09517174109942172, "grad_norm": 5.909050941467285, "learning_rate": 9.983956286572238e-06, "loss": 0.21643829345703125, "step": 683 }, { "epoch": 0.0953110847906361, "grad_norm": 1.9087692499160767, "learning_rate": 9.983767084355141e-06, "loss": 0.16051483154296875, "step": 684 }, { "epoch": 0.09545042848185048, "grad_norm": 2.7932000160217285, "learning_rate": 9.983576774858776e-06, "loss": 0.12625885009765625, "step": 685 }, { "epoch": 0.09558977217306487, "grad_norm": 2.3779520988464355, "learning_rate": 9.983385358125432e-06, "loss": 0.14071273803710938, "step": 686 }, { "epoch": 0.09572911586427925, "grad_norm": 4.696537494659424, "learning_rate": 9.983192834197633e-06, "loss": 0.1772308349609375, "step": 687 }, { "epoch": 0.09586845955549363, "grad_norm": 4.498350620269775, "learning_rate": 9.982999203118153e-06, "loss": 0.15452957153320312, "step": 688 }, { "epoch": 0.096007803246708, "grad_norm": 2.82552433013916, "learning_rate": 9.982804464930016e-06, "loss": 0.20767974853515625, "step": 689 }, { "epoch": 0.09614714693792238, "grad_norm": 4.4976983070373535, "learning_rate": 9.982608619676485e-06, "loss": 0.1902923583984375, "step": 690 }, { "epoch": 0.09628649062913676, "grad_norm": 5.344203472137451, "learning_rate": 9.982411667401076e-06, "loss": 0.1852264404296875, "step": 691 }, { "epoch": 0.09642583432035115, "grad_norm": 5.825819492340088, "learning_rate": 9.982213608147541e-06, "loss": 0.2186431884765625, "step": 692 }, { "epoch": 0.09656517801156553, "grad_norm": 2.449988603591919, "learning_rate": 9.982014441959891e-06, "loss": 0.17136383056640625, "step": 693 }, { "epoch": 0.0967045217027799, "grad_norm": 1.647334337234497, "learning_rate": 9.98181416888237e-06, "loss": 0.1639251708984375, "step": 694 }, { "epoch": 0.09684386539399428, "grad_norm": 3.5855586528778076, "learning_rate": 9.981612788959481e-06, "loss": 0.17279815673828125, "step": 695 }, { "epoch": 0.09698320908520866, "grad_norm": 4.438831329345703, "learning_rate": 9.981410302235962e-06, "loss": 0.18683242797851562, "step": 696 }, { "epoch": 0.09712255277642305, "grad_norm": 3.876133441925049, "learning_rate": 9.9812067087568e-06, "loss": 0.18144989013671875, "step": 697 }, { "epoch": 0.09726189646763743, "grad_norm": 1.7106804847717285, "learning_rate": 9.98100200856723e-06, "loss": 0.18314361572265625, "step": 698 }, { "epoch": 0.0974012401588518, "grad_norm": 2.033510208129883, "learning_rate": 9.980796201712734e-06, "loss": 0.148284912109375, "step": 699 }, { "epoch": 0.09754058385006618, "grad_norm": 2.0853517055511475, "learning_rate": 9.980589288239034e-06, "loss": 0.17348480224609375, "step": 700 }, { "epoch": 0.09767992754128056, "grad_norm": 1.9331254959106445, "learning_rate": 9.980381268192103e-06, "loss": 0.16909027099609375, "step": 701 }, { "epoch": 0.09781927123249495, "grad_norm": 2.724804162979126, "learning_rate": 9.980172141618159e-06, "loss": 0.19833755493164062, "step": 702 }, { "epoch": 0.09795861492370933, "grad_norm": 2.038895845413208, "learning_rate": 9.979961908563663e-06, "loss": 0.15005874633789062, "step": 703 }, { "epoch": 0.09809795861492371, "grad_norm": 3.745532989501953, "learning_rate": 9.979750569075325e-06, "loss": 0.16992568969726562, "step": 704 }, { "epoch": 0.09823730230613809, "grad_norm": 2.4722845554351807, "learning_rate": 9.979538123200102e-06, "loss": 0.19301605224609375, "step": 705 }, { "epoch": 0.09837664599735246, "grad_norm": 1.334954023361206, "learning_rate": 9.979324570985194e-06, "loss": 0.15372085571289062, "step": 706 }, { "epoch": 0.09851598968856685, "grad_norm": 1.870392918586731, "learning_rate": 9.979109912478044e-06, "loss": 0.1614837646484375, "step": 707 }, { "epoch": 0.09865533337978123, "grad_norm": 5.118520736694336, "learning_rate": 9.978894147726346e-06, "loss": 0.2424163818359375, "step": 708 }, { "epoch": 0.09879467707099561, "grad_norm": 3.587675094604492, "learning_rate": 9.97867727677804e-06, "loss": 0.14807510375976562, "step": 709 }, { "epoch": 0.09893402076220999, "grad_norm": 4.706286430358887, "learning_rate": 9.978459299681306e-06, "loss": 0.20059967041015625, "step": 710 }, { "epoch": 0.09907336445342436, "grad_norm": 1.2410787343978882, "learning_rate": 9.978240216484579e-06, "loss": 0.1499481201171875, "step": 711 }, { "epoch": 0.09921270814463876, "grad_norm": 2.0932867527008057, "learning_rate": 9.978020027236529e-06, "loss": 0.1565704345703125, "step": 712 }, { "epoch": 0.09935205183585313, "grad_norm": 1.8012882471084595, "learning_rate": 9.977798731986079e-06, "loss": 0.13369369506835938, "step": 713 }, { "epoch": 0.09949139552706751, "grad_norm": 2.828915596008301, "learning_rate": 9.977576330782397e-06, "loss": 0.21648406982421875, "step": 714 }, { "epoch": 0.09963073921828189, "grad_norm": 1.5652586221694946, "learning_rate": 9.977352823674893e-06, "loss": 0.15652084350585938, "step": 715 }, { "epoch": 0.09977008290949627, "grad_norm": 3.17553448677063, "learning_rate": 9.977128210713227e-06, "loss": 0.19598007202148438, "step": 716 }, { "epoch": 0.09990942660071066, "grad_norm": 2.2512624263763428, "learning_rate": 9.976902491947303e-06, "loss": 0.17401885986328125, "step": 717 }, { "epoch": 0.10004877029192503, "grad_norm": 1.7586499452590942, "learning_rate": 9.976675667427268e-06, "loss": 0.1521453857421875, "step": 718 }, { "epoch": 0.10018811398313941, "grad_norm": 4.399901866912842, "learning_rate": 9.976447737203521e-06, "loss": 0.24100875854492188, "step": 719 }, { "epoch": 0.10032745767435379, "grad_norm": 2.728177309036255, "learning_rate": 9.976218701326701e-06, "loss": 0.16223526000976562, "step": 720 }, { "epoch": 0.10046680136556817, "grad_norm": 2.957819938659668, "learning_rate": 9.975988559847693e-06, "loss": 0.18360137939453125, "step": 721 }, { "epoch": 0.10060614505678256, "grad_norm": 1.999668836593628, "learning_rate": 9.975757312817634e-06, "loss": 0.13450241088867188, "step": 722 }, { "epoch": 0.10074548874799694, "grad_norm": 1.7542517185211182, "learning_rate": 9.975524960287895e-06, "loss": 0.13387680053710938, "step": 723 }, { "epoch": 0.10088483243921131, "grad_norm": 2.0502169132232666, "learning_rate": 9.975291502310105e-06, "loss": 0.172607421875, "step": 724 }, { "epoch": 0.10102417613042569, "grad_norm": 1.31890869140625, "learning_rate": 9.975056938936129e-06, "loss": 0.12579345703125, "step": 725 }, { "epoch": 0.10116351982164007, "grad_norm": 5.275089263916016, "learning_rate": 9.974821270218086e-06, "loss": 0.2686347961425781, "step": 726 }, { "epoch": 0.10130286351285446, "grad_norm": 1.5459221601486206, "learning_rate": 9.974584496208334e-06, "loss": 0.14749908447265625, "step": 727 }, { "epoch": 0.10144220720406884, "grad_norm": 2.9200563430786133, "learning_rate": 9.974346616959476e-06, "loss": 0.20926666259765625, "step": 728 }, { "epoch": 0.10158155089528322, "grad_norm": 1.3744250535964966, "learning_rate": 9.974107632524368e-06, "loss": 0.17899322509765625, "step": 729 }, { "epoch": 0.10172089458649759, "grad_norm": 1.5580867528915405, "learning_rate": 9.973867542956104e-06, "loss": 0.15052413940429688, "step": 730 }, { "epoch": 0.10186023827771197, "grad_norm": 2.776296615600586, "learning_rate": 9.973626348308027e-06, "loss": 0.16496658325195312, "step": 731 }, { "epoch": 0.10199958196892636, "grad_norm": 3.3146040439605713, "learning_rate": 9.973384048633728e-06, "loss": 0.1760711669921875, "step": 732 }, { "epoch": 0.10213892566014074, "grad_norm": 2.589884042739868, "learning_rate": 9.973140643987034e-06, "loss": 0.13373184204101562, "step": 733 }, { "epoch": 0.10227826935135512, "grad_norm": 2.191955804824829, "learning_rate": 9.97289613442203e-06, "loss": 0.17717742919921875, "step": 734 }, { "epoch": 0.1024176130425695, "grad_norm": 2.5909783840179443, "learning_rate": 9.972650519993037e-06, "loss": 0.17024993896484375, "step": 735 }, { "epoch": 0.10255695673378387, "grad_norm": 2.168811321258545, "learning_rate": 9.972403800754626e-06, "loss": 0.18262481689453125, "step": 736 }, { "epoch": 0.10269630042499826, "grad_norm": 3.7039639949798584, "learning_rate": 9.972155976761613e-06, "loss": 0.16596603393554688, "step": 737 }, { "epoch": 0.10283564411621264, "grad_norm": 2.185715913772583, "learning_rate": 9.971907048069058e-06, "loss": 0.17164993286132812, "step": 738 }, { "epoch": 0.10297498780742702, "grad_norm": 2.5360937118530273, "learning_rate": 9.971657014732268e-06, "loss": 0.19658279418945312, "step": 739 }, { "epoch": 0.1031143314986414, "grad_norm": 4.115532875061035, "learning_rate": 9.971405876806792e-06, "loss": 0.15425872802734375, "step": 740 }, { "epoch": 0.10325367518985577, "grad_norm": 2.0063071250915527, "learning_rate": 9.971153634348431e-06, "loss": 0.1542949676513672, "step": 741 }, { "epoch": 0.10339301888107016, "grad_norm": 3.268922805786133, "learning_rate": 9.970900287413225e-06, "loss": 0.179412841796875, "step": 742 }, { "epoch": 0.10353236257228454, "grad_norm": 1.7466195821762085, "learning_rate": 9.970645836057464e-06, "loss": 0.15670394897460938, "step": 743 }, { "epoch": 0.10367170626349892, "grad_norm": 1.5192497968673706, "learning_rate": 9.970390280337681e-06, "loss": 0.19761276245117188, "step": 744 }, { "epoch": 0.1038110499547133, "grad_norm": 1.5703824758529663, "learning_rate": 9.970133620310652e-06, "loss": 0.19295883178710938, "step": 745 }, { "epoch": 0.10395039364592767, "grad_norm": 2.523097515106201, "learning_rate": 9.969875856033402e-06, "loss": 0.1660785675048828, "step": 746 }, { "epoch": 0.10408973733714207, "grad_norm": 3.9193332195281982, "learning_rate": 9.969616987563202e-06, "loss": 0.168731689453125, "step": 747 }, { "epoch": 0.10422908102835644, "grad_norm": 1.4699571132659912, "learning_rate": 9.969357014957564e-06, "loss": 0.12546920776367188, "step": 748 }, { "epoch": 0.10436842471957082, "grad_norm": 1.2805136442184448, "learning_rate": 9.969095938274251e-06, "loss": 0.15423202514648438, "step": 749 }, { "epoch": 0.1045077684107852, "grad_norm": 1.8336864709854126, "learning_rate": 9.968833757571268e-06, "loss": 0.14368438720703125, "step": 750 }, { "epoch": 0.10464711210199958, "grad_norm": 2.0758118629455566, "learning_rate": 9.968570472906862e-06, "loss": 0.16166305541992188, "step": 751 }, { "epoch": 0.10478645579321397, "grad_norm": 3.367671012878418, "learning_rate": 9.968306084339534e-06, "loss": 0.1863250732421875, "step": 752 }, { "epoch": 0.10492579948442834, "grad_norm": 4.215916156768799, "learning_rate": 9.96804059192802e-06, "loss": 0.206939697265625, "step": 753 }, { "epoch": 0.10506514317564272, "grad_norm": 2.9921622276306152, "learning_rate": 9.96777399573131e-06, "loss": 0.19839859008789062, "step": 754 }, { "epoch": 0.1052044868668571, "grad_norm": 1.8131831884384155, "learning_rate": 9.967506295808634e-06, "loss": 0.1505889892578125, "step": 755 }, { "epoch": 0.10534383055807148, "grad_norm": 5.264008045196533, "learning_rate": 9.96723749221947e-06, "loss": 0.18518447875976562, "step": 756 }, { "epoch": 0.10548317424928587, "grad_norm": 4.178497314453125, "learning_rate": 9.96696758502354e-06, "loss": 0.16393661499023438, "step": 757 }, { "epoch": 0.10562251794050025, "grad_norm": 2.0123414993286133, "learning_rate": 9.966696574280808e-06, "loss": 0.16783523559570312, "step": 758 }, { "epoch": 0.10576186163171462, "grad_norm": 3.216810703277588, "learning_rate": 9.966424460051489e-06, "loss": 0.19490432739257812, "step": 759 }, { "epoch": 0.105901205322929, "grad_norm": 1.8964177370071411, "learning_rate": 9.96615124239604e-06, "loss": 0.17840194702148438, "step": 760 }, { "epoch": 0.10604054901414338, "grad_norm": 2.086470365524292, "learning_rate": 9.965876921375165e-06, "loss": 0.14752578735351562, "step": 761 }, { "epoch": 0.10617989270535777, "grad_norm": 2.4057531356811523, "learning_rate": 9.965601497049812e-06, "loss": 0.18367767333984375, "step": 762 }, { "epoch": 0.10631923639657215, "grad_norm": 3.006272792816162, "learning_rate": 9.965324969481172e-06, "loss": 0.15416717529296875, "step": 763 }, { "epoch": 0.10645858008778653, "grad_norm": 3.5461912155151367, "learning_rate": 9.965047338730685e-06, "loss": 0.22340774536132812, "step": 764 }, { "epoch": 0.1065979237790009, "grad_norm": 3.978886127471924, "learning_rate": 9.964768604860033e-06, "loss": 0.14395904541015625, "step": 765 }, { "epoch": 0.10673726747021528, "grad_norm": 1.9975593090057373, "learning_rate": 9.964488767931144e-06, "loss": 0.17626190185546875, "step": 766 }, { "epoch": 0.10687661116142967, "grad_norm": 2.8370556831359863, "learning_rate": 9.964207828006191e-06, "loss": 0.18908309936523438, "step": 767 }, { "epoch": 0.10701595485264405, "grad_norm": 2.20206618309021, "learning_rate": 9.963925785147595e-06, "loss": 0.16591262817382812, "step": 768 }, { "epoch": 0.10715529854385843, "grad_norm": 3.0521035194396973, "learning_rate": 9.963642639418018e-06, "loss": 0.16859054565429688, "step": 769 }, { "epoch": 0.1072946422350728, "grad_norm": 3.9804975986480713, "learning_rate": 9.963358390880367e-06, "loss": 0.18433761596679688, "step": 770 }, { "epoch": 0.10743398592628718, "grad_norm": 4.205430507659912, "learning_rate": 9.963073039597798e-06, "loss": 0.20354461669921875, "step": 771 }, { "epoch": 0.10757332961750157, "grad_norm": 1.9932537078857422, "learning_rate": 9.962786585633708e-06, "loss": 0.14938735961914062, "step": 772 }, { "epoch": 0.10771267330871595, "grad_norm": 2.9030730724334717, "learning_rate": 9.962499029051742e-06, "loss": 0.14476394653320312, "step": 773 }, { "epoch": 0.10785201699993033, "grad_norm": 1.928141474723816, "learning_rate": 9.962210369915787e-06, "loss": 0.16669845581054688, "step": 774 }, { "epoch": 0.1079913606911447, "grad_norm": 2.4149439334869385, "learning_rate": 9.961920608289977e-06, "loss": 0.16546630859375, "step": 775 }, { "epoch": 0.10813070438235908, "grad_norm": 2.2206203937530518, "learning_rate": 9.96162974423869e-06, "loss": 0.1503448486328125, "step": 776 }, { "epoch": 0.10827004807357347, "grad_norm": 1.943772792816162, "learning_rate": 9.961337777826549e-06, "loss": 0.16563796997070312, "step": 777 }, { "epoch": 0.10840939176478785, "grad_norm": 1.9068461656570435, "learning_rate": 9.961044709118425e-06, "loss": 0.21203994750976562, "step": 778 }, { "epoch": 0.10854873545600223, "grad_norm": 1.957749366760254, "learning_rate": 9.960750538179428e-06, "loss": 0.15060806274414062, "step": 779 }, { "epoch": 0.1086880791472166, "grad_norm": 1.0969899892807007, "learning_rate": 9.960455265074918e-06, "loss": 0.14550399780273438, "step": 780 }, { "epoch": 0.10882742283843098, "grad_norm": 4.412909030914307, "learning_rate": 9.960158889870495e-06, "loss": 0.17485809326171875, "step": 781 }, { "epoch": 0.10896676652964538, "grad_norm": 3.504122734069824, "learning_rate": 9.959861412632011e-06, "loss": 0.18091201782226562, "step": 782 }, { "epoch": 0.10910611022085975, "grad_norm": 4.59026575088501, "learning_rate": 9.959562833425557e-06, "loss": 0.17608261108398438, "step": 783 }, { "epoch": 0.10924545391207413, "grad_norm": 3.700793504714966, "learning_rate": 9.95926315231747e-06, "loss": 0.2100372314453125, "step": 784 }, { "epoch": 0.10938479760328851, "grad_norm": 4.262257099151611, "learning_rate": 9.958962369374333e-06, "loss": 0.21320724487304688, "step": 785 }, { "epoch": 0.10952414129450289, "grad_norm": 1.8643531799316406, "learning_rate": 9.95866048466297e-06, "loss": 0.19610977172851562, "step": 786 }, { "epoch": 0.10966348498571728, "grad_norm": 1.337660312652588, "learning_rate": 9.958357498250457e-06, "loss": 0.13827133178710938, "step": 787 }, { "epoch": 0.10980282867693165, "grad_norm": 2.9200692176818848, "learning_rate": 9.95805341020411e-06, "loss": 0.215118408203125, "step": 788 }, { "epoch": 0.10994217236814603, "grad_norm": 2.93645977973938, "learning_rate": 9.957748220591487e-06, "loss": 0.15674972534179688, "step": 789 }, { "epoch": 0.11008151605936041, "grad_norm": 4.807394504547119, "learning_rate": 9.9574419294804e-06, "loss": 0.22133255004882812, "step": 790 }, { "epoch": 0.11022085975057479, "grad_norm": 2.6809306144714355, "learning_rate": 9.957134536938894e-06, "loss": 0.21162796020507812, "step": 791 }, { "epoch": 0.11036020344178918, "grad_norm": 3.0023553371429443, "learning_rate": 9.956826043035268e-06, "loss": 0.1616058349609375, "step": 792 }, { "epoch": 0.11049954713300356, "grad_norm": 4.79586124420166, "learning_rate": 9.956516447838063e-06, "loss": 0.202606201171875, "step": 793 }, { "epoch": 0.11063889082421793, "grad_norm": 4.426729679107666, "learning_rate": 9.95620575141606e-06, "loss": 0.15277481079101562, "step": 794 }, { "epoch": 0.11077823451543231, "grad_norm": 1.561102271080017, "learning_rate": 9.955893953838293e-06, "loss": 0.15618133544921875, "step": 795 }, { "epoch": 0.11091757820664669, "grad_norm": 2.1695950031280518, "learning_rate": 9.955581055174034e-06, "loss": 0.1728973388671875, "step": 796 }, { "epoch": 0.11105692189786108, "grad_norm": 3.299140214920044, "learning_rate": 9.9552670554928e-06, "loss": 0.18560791015625, "step": 797 }, { "epoch": 0.11119626558907546, "grad_norm": 3.164327621459961, "learning_rate": 9.954951954864361e-06, "loss": 0.17534637451171875, "step": 798 }, { "epoch": 0.11133560928028984, "grad_norm": 3.8048036098480225, "learning_rate": 9.954635753358718e-06, "loss": 0.18617820739746094, "step": 799 }, { "epoch": 0.11147495297150421, "grad_norm": 3.3329060077667236, "learning_rate": 9.954318451046128e-06, "loss": 0.15944290161132812, "step": 800 }, { "epoch": 0.11161429666271859, "grad_norm": 2.580436944961548, "learning_rate": 9.954000047997088e-06, "loss": 0.19260787963867188, "step": 801 }, { "epoch": 0.11175364035393298, "grad_norm": 1.7461061477661133, "learning_rate": 9.953680544282338e-06, "loss": 0.167083740234375, "step": 802 }, { "epoch": 0.11189298404514736, "grad_norm": 2.462045431137085, "learning_rate": 9.953359939972866e-06, "loss": 0.15219879150390625, "step": 803 }, { "epoch": 0.11203232773636174, "grad_norm": 2.4310009479522705, "learning_rate": 9.953038235139902e-06, "loss": 0.16941452026367188, "step": 804 }, { "epoch": 0.11217167142757611, "grad_norm": 1.8167341947555542, "learning_rate": 9.952715429854923e-06, "loss": 0.16044235229492188, "step": 805 }, { "epoch": 0.11231101511879049, "grad_norm": 1.351154088973999, "learning_rate": 9.952391524189646e-06, "loss": 0.15280914306640625, "step": 806 }, { "epoch": 0.11245035881000488, "grad_norm": 2.8666467666625977, "learning_rate": 9.952066518216039e-06, "loss": 0.1857452392578125, "step": 807 }, { "epoch": 0.11258970250121926, "grad_norm": 2.4725894927978516, "learning_rate": 9.951740412006308e-06, "loss": 0.14764022827148438, "step": 808 }, { "epoch": 0.11272904619243364, "grad_norm": 6.095164775848389, "learning_rate": 9.95141320563291e-06, "loss": 0.20228958129882812, "step": 809 }, { "epoch": 0.11286838988364802, "grad_norm": 1.526947021484375, "learning_rate": 9.951084899168537e-06, "loss": 0.14933013916015625, "step": 810 }, { "epoch": 0.11300773357486239, "grad_norm": 1.2984601259231567, "learning_rate": 9.950755492686138e-06, "loss": 0.12489700317382812, "step": 811 }, { "epoch": 0.11314707726607678, "grad_norm": 2.1421236991882324, "learning_rate": 9.950424986258893e-06, "loss": 0.17455291748046875, "step": 812 }, { "epoch": 0.11328642095729116, "grad_norm": 3.7019155025482178, "learning_rate": 9.950093379960238e-06, "loss": 0.16074752807617188, "step": 813 }, { "epoch": 0.11342576464850554, "grad_norm": 3.640148639678955, "learning_rate": 9.949760673863846e-06, "loss": 0.16954421997070312, "step": 814 }, { "epoch": 0.11356510833971992, "grad_norm": 1.7329028844833374, "learning_rate": 9.949426868043638e-06, "loss": 0.15032196044921875, "step": 815 }, { "epoch": 0.1137044520309343, "grad_norm": 5.807930946350098, "learning_rate": 9.949091962573775e-06, "loss": 0.14604949951171875, "step": 816 }, { "epoch": 0.11384379572214869, "grad_norm": 3.666903495788574, "learning_rate": 9.94875595752867e-06, "loss": 0.126678466796875, "step": 817 }, { "epoch": 0.11398313941336306, "grad_norm": 3.913349151611328, "learning_rate": 9.948418852982973e-06, "loss": 0.19491195678710938, "step": 818 }, { "epoch": 0.11412248310457744, "grad_norm": 1.8116697072982788, "learning_rate": 9.948080649011582e-06, "loss": 0.17575836181640625, "step": 819 }, { "epoch": 0.11426182679579182, "grad_norm": 2.166325569152832, "learning_rate": 9.947741345689635e-06, "loss": 0.16719818115234375, "step": 820 }, { "epoch": 0.1144011704870062, "grad_norm": 2.073324203491211, "learning_rate": 9.947400943092522e-06, "loss": 0.20654296875, "step": 821 }, { "epoch": 0.11454051417822059, "grad_norm": 3.2724668979644775, "learning_rate": 9.94705944129587e-06, "loss": 0.22057723999023438, "step": 822 }, { "epoch": 0.11467985786943496, "grad_norm": 3.115734577178955, "learning_rate": 9.946716840375552e-06, "loss": 0.22149276733398438, "step": 823 }, { "epoch": 0.11481920156064934, "grad_norm": 1.747188925743103, "learning_rate": 9.946373140407688e-06, "loss": 0.17734909057617188, "step": 824 }, { "epoch": 0.11495854525186372, "grad_norm": 2.250894784927368, "learning_rate": 9.946028341468642e-06, "loss": 0.16717529296875, "step": 825 }, { "epoch": 0.1150978889430781, "grad_norm": 1.3669145107269287, "learning_rate": 9.945682443635015e-06, "loss": 0.1427154541015625, "step": 826 }, { "epoch": 0.11523723263429249, "grad_norm": 0.7190285921096802, "learning_rate": 9.945335446983662e-06, "loss": 0.12916946411132812, "step": 827 }, { "epoch": 0.11537657632550687, "grad_norm": 1.5873820781707764, "learning_rate": 9.944987351591677e-06, "loss": 0.14923858642578125, "step": 828 }, { "epoch": 0.11551592001672124, "grad_norm": 3.3915772438049316, "learning_rate": 9.944638157536399e-06, "loss": 0.15767288208007812, "step": 829 }, { "epoch": 0.11565526370793562, "grad_norm": 3.4161107540130615, "learning_rate": 9.94428786489541e-06, "loss": 0.250152587890625, "step": 830 }, { "epoch": 0.11579460739915, "grad_norm": 2.938044786453247, "learning_rate": 9.943936473746539e-06, "loss": 0.17104339599609375, "step": 831 }, { "epoch": 0.11593395109036439, "grad_norm": 3.1153223514556885, "learning_rate": 9.943583984167853e-06, "loss": 0.17169570922851562, "step": 832 }, { "epoch": 0.11607329478157877, "grad_norm": 2.1770823001861572, "learning_rate": 9.94323039623767e-06, "loss": 0.1708526611328125, "step": 833 }, { "epoch": 0.11621263847279314, "grad_norm": 2.1095900535583496, "learning_rate": 9.942875710034549e-06, "loss": 0.141021728515625, "step": 834 }, { "epoch": 0.11635198216400752, "grad_norm": 2.2535240650177, "learning_rate": 9.942519925637293e-06, "loss": 0.16474533081054688, "step": 835 }, { "epoch": 0.1164913258552219, "grad_norm": 1.6899245977401733, "learning_rate": 9.942163043124951e-06, "loss": 0.13654327392578125, "step": 836 }, { "epoch": 0.11663066954643629, "grad_norm": 2.653510808944702, "learning_rate": 9.941805062576811e-06, "loss": 0.14737319946289062, "step": 837 }, { "epoch": 0.11677001323765067, "grad_norm": 2.3035242557525635, "learning_rate": 9.941445984072408e-06, "loss": 0.16320419311523438, "step": 838 }, { "epoch": 0.11690935692886505, "grad_norm": 1.7052720785140991, "learning_rate": 9.941085807691524e-06, "loss": 0.16278457641601562, "step": 839 }, { "epoch": 0.11704870062007942, "grad_norm": 1.8400887250900269, "learning_rate": 9.94072453351418e-06, "loss": 0.146728515625, "step": 840 }, { "epoch": 0.1171880443112938, "grad_norm": 2.359339952468872, "learning_rate": 9.940362161620644e-06, "loss": 0.15762710571289062, "step": 841 }, { "epoch": 0.11732738800250819, "grad_norm": 2.191215753555298, "learning_rate": 9.939998692091427e-06, "loss": 0.1417999267578125, "step": 842 }, { "epoch": 0.11746673169372257, "grad_norm": 1.7773349285125732, "learning_rate": 9.939634125007279e-06, "loss": 0.13447952270507812, "step": 843 }, { "epoch": 0.11760607538493695, "grad_norm": 1.7601768970489502, "learning_rate": 9.939268460449205e-06, "loss": 0.15033721923828125, "step": 844 }, { "epoch": 0.11774541907615133, "grad_norm": 1.3186695575714111, "learning_rate": 9.938901698498444e-06, "loss": 0.16153717041015625, "step": 845 }, { "epoch": 0.1178847627673657, "grad_norm": 4.2205047607421875, "learning_rate": 9.938533839236483e-06, "loss": 0.1875762939453125, "step": 846 }, { "epoch": 0.1180241064585801, "grad_norm": 1.6212105751037598, "learning_rate": 9.938164882745051e-06, "loss": 0.16119766235351562, "step": 847 }, { "epoch": 0.11816345014979447, "grad_norm": 1.847929835319519, "learning_rate": 9.937794829106122e-06, "loss": 0.22357559204101562, "step": 848 }, { "epoch": 0.11830279384100885, "grad_norm": 1.6450310945510864, "learning_rate": 9.937423678401913e-06, "loss": 0.163787841796875, "step": 849 }, { "epoch": 0.11844213753222323, "grad_norm": 1.3028457164764404, "learning_rate": 9.937051430714888e-06, "loss": 0.13217926025390625, "step": 850 }, { "epoch": 0.1185814812234376, "grad_norm": 1.324638843536377, "learning_rate": 9.936678086127749e-06, "loss": 0.14348220825195312, "step": 851 }, { "epoch": 0.118720824914652, "grad_norm": 1.7746973037719727, "learning_rate": 9.936303644723446e-06, "loss": 0.14417266845703125, "step": 852 }, { "epoch": 0.11886016860586637, "grad_norm": 1.4103437662124634, "learning_rate": 9.93592810658517e-06, "loss": 0.21768951416015625, "step": 853 }, { "epoch": 0.11899951229708075, "grad_norm": 2.3373286724090576, "learning_rate": 9.935551471796358e-06, "loss": 0.13978958129882812, "step": 854 }, { "epoch": 0.11913885598829513, "grad_norm": 1.8944979906082153, "learning_rate": 9.935173740440692e-06, "loss": 0.15196990966796875, "step": 855 }, { "epoch": 0.1192781996795095, "grad_norm": 5.95789909362793, "learning_rate": 9.93479491260209e-06, "loss": 0.18541336059570312, "step": 856 }, { "epoch": 0.1194175433707239, "grad_norm": 6.1581292152404785, "learning_rate": 9.934414988364722e-06, "loss": 0.20302200317382812, "step": 857 }, { "epoch": 0.11955688706193827, "grad_norm": 1.8307266235351562, "learning_rate": 9.934033967812998e-06, "loss": 0.16485595703125, "step": 858 }, { "epoch": 0.11969623075315265, "grad_norm": 2.8567728996276855, "learning_rate": 9.933651851031573e-06, "loss": 0.18229293823242188, "step": 859 }, { "epoch": 0.11983557444436703, "grad_norm": 1.993895411491394, "learning_rate": 9.933268638105345e-06, "loss": 0.15320205688476562, "step": 860 }, { "epoch": 0.11997491813558141, "grad_norm": 2.3237545490264893, "learning_rate": 9.932884329119452e-06, "loss": 0.17299652099609375, "step": 861 }, { "epoch": 0.1201142618267958, "grad_norm": 2.6829276084899902, "learning_rate": 9.932498924159281e-06, "loss": 0.16802978515625, "step": 862 }, { "epoch": 0.12025360551801018, "grad_norm": 2.4148032665252686, "learning_rate": 9.93211242331046e-06, "loss": 0.14741134643554688, "step": 863 }, { "epoch": 0.12039294920922455, "grad_norm": 4.336479187011719, "learning_rate": 9.931724826658861e-06, "loss": 0.26902008056640625, "step": 864 }, { "epoch": 0.12053229290043893, "grad_norm": 3.9758565425872803, "learning_rate": 9.931336134290598e-06, "loss": 0.19696044921875, "step": 865 }, { "epoch": 0.12067163659165331, "grad_norm": 2.7490453720092773, "learning_rate": 9.930946346292032e-06, "loss": 0.18829727172851562, "step": 866 }, { "epoch": 0.1208109802828677, "grad_norm": 3.482215642929077, "learning_rate": 9.930555462749762e-06, "loss": 0.18785476684570312, "step": 867 }, { "epoch": 0.12095032397408208, "grad_norm": 1.2521177530288696, "learning_rate": 9.930163483750636e-06, "loss": 0.12441253662109375, "step": 868 }, { "epoch": 0.12108966766529645, "grad_norm": 4.197849273681641, "learning_rate": 9.92977040938174e-06, "loss": 0.1893138885498047, "step": 869 }, { "epoch": 0.12122901135651083, "grad_norm": 4.086376190185547, "learning_rate": 9.929376239730408e-06, "loss": 0.1775665283203125, "step": 870 }, { "epoch": 0.12136835504772521, "grad_norm": 3.1414597034454346, "learning_rate": 9.928980974884215e-06, "loss": 0.18038177490234375, "step": 871 }, { "epoch": 0.1215076987389396, "grad_norm": 1.755487322807312, "learning_rate": 9.928584614930981e-06, "loss": 0.18470001220703125, "step": 872 }, { "epoch": 0.12164704243015398, "grad_norm": 2.8909878730773926, "learning_rate": 9.928187159958764e-06, "loss": 0.16634368896484375, "step": 873 }, { "epoch": 0.12178638612136836, "grad_norm": 5.779233455657959, "learning_rate": 9.927788610055875e-06, "loss": 0.22149658203125, "step": 874 }, { "epoch": 0.12192572981258273, "grad_norm": 2.604761838912964, "learning_rate": 9.92738896531086e-06, "loss": 0.16870880126953125, "step": 875 }, { "epoch": 0.12206507350379711, "grad_norm": 2.873410701751709, "learning_rate": 9.926988225812511e-06, "loss": 0.2080841064453125, "step": 876 }, { "epoch": 0.1222044171950115, "grad_norm": 1.6994441747665405, "learning_rate": 9.926586391649863e-06, "loss": 0.11190032958984375, "step": 877 }, { "epoch": 0.12234376088622588, "grad_norm": 1.4622482061386108, "learning_rate": 9.926183462912196e-06, "loss": 0.15394973754882812, "step": 878 }, { "epoch": 0.12248310457744026, "grad_norm": 1.0746222734451294, "learning_rate": 9.925779439689028e-06, "loss": 0.16520309448242188, "step": 879 }, { "epoch": 0.12262244826865464, "grad_norm": 2.10135817527771, "learning_rate": 9.925374322070126e-06, "loss": 0.167236328125, "step": 880 }, { "epoch": 0.12276179195986901, "grad_norm": 1.7378472089767456, "learning_rate": 9.9249681101455e-06, "loss": 0.1766510009765625, "step": 881 }, { "epoch": 0.1229011356510834, "grad_norm": 0.9567672610282898, "learning_rate": 9.924560804005397e-06, "loss": 0.13457489013671875, "step": 882 }, { "epoch": 0.12304047934229778, "grad_norm": 2.57576584815979, "learning_rate": 9.924152403740315e-06, "loss": 0.17705535888671875, "step": 883 }, { "epoch": 0.12317982303351216, "grad_norm": 2.565058469772339, "learning_rate": 9.923742909440987e-06, "loss": 0.198577880859375, "step": 884 }, { "epoch": 0.12331916672472654, "grad_norm": 2.1855037212371826, "learning_rate": 9.923332321198396e-06, "loss": 0.20456695556640625, "step": 885 }, { "epoch": 0.12345851041594091, "grad_norm": 3.6315994262695312, "learning_rate": 9.922920639103766e-06, "loss": 0.20357513427734375, "step": 886 }, { "epoch": 0.12359785410715529, "grad_norm": 2.6045329570770264, "learning_rate": 9.92250786324856e-06, "loss": 0.19877243041992188, "step": 887 }, { "epoch": 0.12373719779836968, "grad_norm": 1.9652456045150757, "learning_rate": 9.922093993724492e-06, "loss": 0.16933441162109375, "step": 888 }, { "epoch": 0.12387654148958406, "grad_norm": 2.723449230194092, "learning_rate": 9.92167903062351e-06, "loss": 0.1597137451171875, "step": 889 }, { "epoch": 0.12401588518079844, "grad_norm": 4.422956466674805, "learning_rate": 9.921262974037813e-06, "loss": 0.186492919921875, "step": 890 }, { "epoch": 0.12415522887201282, "grad_norm": 4.019234657287598, "learning_rate": 9.920845824059836e-06, "loss": 0.16651535034179688, "step": 891 }, { "epoch": 0.12429457256322719, "grad_norm": 3.394670009613037, "learning_rate": 9.920427580782263e-06, "loss": 0.18286895751953125, "step": 892 }, { "epoch": 0.12443391625444158, "grad_norm": 1.8467844724655151, "learning_rate": 9.920008244298016e-06, "loss": 0.16858673095703125, "step": 893 }, { "epoch": 0.12457325994565596, "grad_norm": 2.149270534515381, "learning_rate": 9.919587814700262e-06, "loss": 0.16605377197265625, "step": 894 }, { "epoch": 0.12471260363687034, "grad_norm": 3.564455270767212, "learning_rate": 9.919166292082414e-06, "loss": 0.18818283081054688, "step": 895 }, { "epoch": 0.12485194732808472, "grad_norm": 1.9385876655578613, "learning_rate": 9.91874367653812e-06, "loss": 0.18356704711914062, "step": 896 }, { "epoch": 0.1249912910192991, "grad_norm": 2.959458589553833, "learning_rate": 9.91831996816128e-06, "loss": 0.1881256103515625, "step": 897 }, { "epoch": 0.12513063471051347, "grad_norm": 2.5169992446899414, "learning_rate": 9.917895167046027e-06, "loss": 0.17923355102539062, "step": 898 }, { "epoch": 0.12526997840172785, "grad_norm": 3.322206735610962, "learning_rate": 9.917469273286749e-06, "loss": 0.17261123657226562, "step": 899 }, { "epoch": 0.12540932209294225, "grad_norm": 2.9809203147888184, "learning_rate": 9.917042286978064e-06, "loss": 0.15196990966796875, "step": 900 }, { "epoch": 0.12554866578415663, "grad_norm": 2.849165678024292, "learning_rate": 9.916614208214841e-06, "loss": 0.19049835205078125, "step": 901 }, { "epoch": 0.125688009475371, "grad_norm": 1.5579876899719238, "learning_rate": 9.91618503709219e-06, "loss": 0.14512252807617188, "step": 902 }, { "epoch": 0.1258273531665854, "grad_norm": 3.6058123111724854, "learning_rate": 9.915754773705461e-06, "loss": 0.22011566162109375, "step": 903 }, { "epoch": 0.12596669685779976, "grad_norm": 1.7062089443206787, "learning_rate": 9.915323418150252e-06, "loss": 0.15746307373046875, "step": 904 }, { "epoch": 0.12610604054901414, "grad_norm": 1.8259642124176025, "learning_rate": 9.914890970522397e-06, "loss": 0.15607833862304688, "step": 905 }, { "epoch": 0.12624538424022852, "grad_norm": 2.0323452949523926, "learning_rate": 9.914457430917977e-06, "loss": 0.19217681884765625, "step": 906 }, { "epoch": 0.1263847279314429, "grad_norm": 1.5921417474746704, "learning_rate": 9.914022799433315e-06, "loss": 0.187408447265625, "step": 907 }, { "epoch": 0.12652407162265727, "grad_norm": 2.4915311336517334, "learning_rate": 9.913587076164976e-06, "loss": 0.15674972534179688, "step": 908 }, { "epoch": 0.12666341531387165, "grad_norm": 2.7281572818756104, "learning_rate": 9.913150261209767e-06, "loss": 0.19499969482421875, "step": 909 }, { "epoch": 0.12680275900508606, "grad_norm": 2.431154727935791, "learning_rate": 9.91271235466474e-06, "loss": 0.16276931762695312, "step": 910 }, { "epoch": 0.12694210269630044, "grad_norm": 2.8653151988983154, "learning_rate": 9.912273356627188e-06, "loss": 0.16312408447265625, "step": 911 }, { "epoch": 0.1270814463875148, "grad_norm": 2.202653646469116, "learning_rate": 9.911833267194643e-06, "loss": 0.21437454223632812, "step": 912 }, { "epoch": 0.1272207900787292, "grad_norm": 1.266716718673706, "learning_rate": 9.911392086464886e-06, "loss": 0.1162872314453125, "step": 913 }, { "epoch": 0.12736013376994357, "grad_norm": 1.5671656131744385, "learning_rate": 9.910949814535936e-06, "loss": 0.18141937255859375, "step": 914 }, { "epoch": 0.12749947746115795, "grad_norm": 0.9672275185585022, "learning_rate": 9.910506451506056e-06, "loss": 0.1444244384765625, "step": 915 }, { "epoch": 0.12763882115237232, "grad_norm": 1.1133419275283813, "learning_rate": 9.910061997473753e-06, "loss": 0.1190643310546875, "step": 916 }, { "epoch": 0.1277781648435867, "grad_norm": 1.931981086730957, "learning_rate": 9.909616452537772e-06, "loss": 0.15074539184570312, "step": 917 }, { "epoch": 0.12791750853480108, "grad_norm": 2.2207701206207275, "learning_rate": 9.909169816797102e-06, "loss": 0.14374923706054688, "step": 918 }, { "epoch": 0.12805685222601546, "grad_norm": 3.7517294883728027, "learning_rate": 9.908722090350979e-06, "loss": 0.17767333984375, "step": 919 }, { "epoch": 0.12819619591722986, "grad_norm": 2.254528045654297, "learning_rate": 9.908273273298874e-06, "loss": 0.14483642578125, "step": 920 }, { "epoch": 0.12833553960844424, "grad_norm": 1.176747441291809, "learning_rate": 9.907823365740507e-06, "loss": 0.16205596923828125, "step": 921 }, { "epoch": 0.12847488329965862, "grad_norm": 3.489192008972168, "learning_rate": 9.907372367775834e-06, "loss": 0.16419219970703125, "step": 922 }, { "epoch": 0.128614226990873, "grad_norm": 2.98726224899292, "learning_rate": 9.906920279505058e-06, "loss": 0.14940261840820312, "step": 923 }, { "epoch": 0.12875357068208737, "grad_norm": 7.851959228515625, "learning_rate": 9.906467101028625e-06, "loss": 0.2635078430175781, "step": 924 }, { "epoch": 0.12889291437330175, "grad_norm": 3.210786819458008, "learning_rate": 9.906012832447219e-06, "loss": 0.21021080017089844, "step": 925 }, { "epoch": 0.12903225806451613, "grad_norm": 1.8284941911697388, "learning_rate": 9.905557473861764e-06, "loss": 0.15709686279296875, "step": 926 }, { "epoch": 0.1291716017557305, "grad_norm": 2.3154125213623047, "learning_rate": 9.905101025373438e-06, "loss": 0.18984222412109375, "step": 927 }, { "epoch": 0.12931094544694488, "grad_norm": 1.7081457376480103, "learning_rate": 9.904643487083648e-06, "loss": 0.1357574462890625, "step": 928 }, { "epoch": 0.12945028913815926, "grad_norm": 2.986159563064575, "learning_rate": 9.90418485909405e-06, "loss": 0.1478118896484375, "step": 929 }, { "epoch": 0.12958963282937366, "grad_norm": 2.4240713119506836, "learning_rate": 9.903725141506539e-06, "loss": 0.14725875854492188, "step": 930 }, { "epoch": 0.12972897652058804, "grad_norm": 2.7870030403137207, "learning_rate": 9.903264334423258e-06, "loss": 0.1718597412109375, "step": 931 }, { "epoch": 0.12986832021180242, "grad_norm": 1.6235865354537964, "learning_rate": 9.902802437946584e-06, "loss": 0.14244461059570312, "step": 932 }, { "epoch": 0.1300076639030168, "grad_norm": 2.495516061782837, "learning_rate": 9.902339452179142e-06, "loss": 0.18912887573242188, "step": 933 }, { "epoch": 0.13014700759423117, "grad_norm": 1.8636064529418945, "learning_rate": 9.901875377223796e-06, "loss": 0.2408447265625, "step": 934 }, { "epoch": 0.13028635128544555, "grad_norm": 2.4858481884002686, "learning_rate": 9.901410213183653e-06, "loss": 0.17578506469726562, "step": 935 }, { "epoch": 0.13042569497665993, "grad_norm": 3.2294719219207764, "learning_rate": 9.900943960162061e-06, "loss": 0.2643089294433594, "step": 936 }, { "epoch": 0.1305650386678743, "grad_norm": 2.0653560161590576, "learning_rate": 9.900476618262612e-06, "loss": 0.147186279296875, "step": 937 }, { "epoch": 0.13070438235908868, "grad_norm": 1.923415184020996, "learning_rate": 9.900008187589138e-06, "loss": 0.15357589721679688, "step": 938 }, { "epoch": 0.13084372605030306, "grad_norm": 1.287346601486206, "learning_rate": 9.899538668245713e-06, "loss": 0.13859176635742188, "step": 939 }, { "epoch": 0.13098306974151747, "grad_norm": 1.7620081901550293, "learning_rate": 9.899068060336656e-06, "loss": 0.18091201782226562, "step": 940 }, { "epoch": 0.13112241343273184, "grad_norm": 2.363635778427124, "learning_rate": 9.898596363966523e-06, "loss": 0.15262603759765625, "step": 941 }, { "epoch": 0.13126175712394622, "grad_norm": 2.0103416442871094, "learning_rate": 9.898123579240115e-06, "loss": 0.17406845092773438, "step": 942 }, { "epoch": 0.1314011008151606, "grad_norm": 2.386070728302002, "learning_rate": 9.897649706262474e-06, "loss": 0.13594436645507812, "step": 943 }, { "epoch": 0.13154044450637498, "grad_norm": 1.4648844003677368, "learning_rate": 9.897174745138883e-06, "loss": 0.165771484375, "step": 944 }, { "epoch": 0.13167978819758935, "grad_norm": 2.0991270542144775, "learning_rate": 9.896698695974866e-06, "loss": 0.13891220092773438, "step": 945 }, { "epoch": 0.13181913188880373, "grad_norm": 1.6323614120483398, "learning_rate": 9.896221558876195e-06, "loss": 0.16036224365234375, "step": 946 }, { "epoch": 0.1319584755800181, "grad_norm": 3.6522395610809326, "learning_rate": 9.895743333948875e-06, "loss": 0.16862869262695312, "step": 947 }, { "epoch": 0.1320978192712325, "grad_norm": 1.6368886232376099, "learning_rate": 9.895264021299158e-06, "loss": 0.11810302734375, "step": 948 }, { "epoch": 0.13223716296244686, "grad_norm": 1.6710095405578613, "learning_rate": 9.894783621033538e-06, "loss": 0.17529678344726562, "step": 949 }, { "epoch": 0.13237650665366127, "grad_norm": 5.092850685119629, "learning_rate": 9.894302133258747e-06, "loss": 0.24879837036132812, "step": 950 }, { "epoch": 0.13251585034487565, "grad_norm": 2.4118950366973877, "learning_rate": 9.893819558081759e-06, "loss": 0.1504669189453125, "step": 951 }, { "epoch": 0.13265519403609002, "grad_norm": 2.143752336502075, "learning_rate": 9.893335895609792e-06, "loss": 0.20286178588867188, "step": 952 }, { "epoch": 0.1327945377273044, "grad_norm": 3.015186071395874, "learning_rate": 9.892851145950308e-06, "loss": 0.21229171752929688, "step": 953 }, { "epoch": 0.13293388141851878, "grad_norm": 1.900866985321045, "learning_rate": 9.892365309211005e-06, "loss": 0.1549072265625, "step": 954 }, { "epoch": 0.13307322510973316, "grad_norm": 3.2735836505889893, "learning_rate": 9.891878385499825e-06, "loss": 0.16571044921875, "step": 955 }, { "epoch": 0.13321256880094753, "grad_norm": 2.3417251110076904, "learning_rate": 9.891390374924949e-06, "loss": 0.14632797241210938, "step": 956 }, { "epoch": 0.1333519124921619, "grad_norm": 2.4118542671203613, "learning_rate": 9.890901277594806e-06, "loss": 0.182098388671875, "step": 957 }, { "epoch": 0.1334912561833763, "grad_norm": 1.1350691318511963, "learning_rate": 9.89041109361806e-06, "loss": 0.12827301025390625, "step": 958 }, { "epoch": 0.13363059987459067, "grad_norm": 2.147951126098633, "learning_rate": 9.889919823103618e-06, "loss": 0.16230392456054688, "step": 959 }, { "epoch": 0.13376994356580507, "grad_norm": 2.3890650272369385, "learning_rate": 9.889427466160633e-06, "loss": 0.21557998657226562, "step": 960 }, { "epoch": 0.13390928725701945, "grad_norm": 1.0842691659927368, "learning_rate": 9.888934022898488e-06, "loss": 0.167816162109375, "step": 961 }, { "epoch": 0.13404863094823383, "grad_norm": 2.1056079864501953, "learning_rate": 9.888439493426824e-06, "loss": 0.1595306396484375, "step": 962 }, { "epoch": 0.1341879746394482, "grad_norm": 1.0388083457946777, "learning_rate": 9.887943877855505e-06, "loss": 0.14861297607421875, "step": 963 }, { "epoch": 0.13432731833066258, "grad_norm": 0.9649723768234253, "learning_rate": 9.887447176294653e-06, "loss": 0.16063308715820312, "step": 964 }, { "epoch": 0.13446666202187696, "grad_norm": 0.9436282515525818, "learning_rate": 9.88694938885462e-06, "loss": 0.13828659057617188, "step": 965 }, { "epoch": 0.13460600571309134, "grad_norm": 2.3832836151123047, "learning_rate": 9.886450515646005e-06, "loss": 0.18193817138671875, "step": 966 }, { "epoch": 0.13474534940430571, "grad_norm": 1.5625437498092651, "learning_rate": 9.885950556779644e-06, "loss": 0.16274642944335938, "step": 967 }, { "epoch": 0.1348846930955201, "grad_norm": 2.1358482837677, "learning_rate": 9.885449512366617e-06, "loss": 0.186431884765625, "step": 968 }, { "epoch": 0.13502403678673447, "grad_norm": 1.5519949197769165, "learning_rate": 9.884947382518247e-06, "loss": 0.168487548828125, "step": 969 }, { "epoch": 0.13516338047794887, "grad_norm": 2.942392587661743, "learning_rate": 9.88444416734609e-06, "loss": 0.15933609008789062, "step": 970 }, { "epoch": 0.13530272416916325, "grad_norm": 1.8592779636383057, "learning_rate": 9.883939866961956e-06, "loss": 0.17573165893554688, "step": 971 }, { "epoch": 0.13544206786037763, "grad_norm": 2.2015435695648193, "learning_rate": 9.883434481477885e-06, "loss": 0.18421554565429688, "step": 972 }, { "epoch": 0.135581411551592, "grad_norm": 2.8603713512420654, "learning_rate": 9.882928011006163e-06, "loss": 0.16084671020507812, "step": 973 }, { "epoch": 0.13572075524280638, "grad_norm": 3.5782759189605713, "learning_rate": 9.882420455659316e-06, "loss": 0.17504501342773438, "step": 974 }, { "epoch": 0.13586009893402076, "grad_norm": 1.7755762338638306, "learning_rate": 9.881911815550111e-06, "loss": 0.16390228271484375, "step": 975 }, { "epoch": 0.13599944262523514, "grad_norm": 2.681462049484253, "learning_rate": 9.881402090791556e-06, "loss": 0.16349029541015625, "step": 976 }, { "epoch": 0.13613878631644952, "grad_norm": 3.422821283340454, "learning_rate": 9.880891281496901e-06, "loss": 0.18805694580078125, "step": 977 }, { "epoch": 0.1362781300076639, "grad_norm": 1.4992303848266602, "learning_rate": 9.880379387779637e-06, "loss": 0.13851547241210938, "step": 978 }, { "epoch": 0.13641747369887827, "grad_norm": 1.3968745470046997, "learning_rate": 9.879866409753493e-06, "loss": 0.17865753173828125, "step": 979 }, { "epoch": 0.13655681739009268, "grad_norm": 2.09902024269104, "learning_rate": 9.879352347532442e-06, "loss": 0.16246604919433594, "step": 980 }, { "epoch": 0.13669616108130705, "grad_norm": 2.254192590713501, "learning_rate": 9.878837201230697e-06, "loss": 0.138397216796875, "step": 981 }, { "epoch": 0.13683550477252143, "grad_norm": 3.2142586708068848, "learning_rate": 9.878320970962712e-06, "loss": 0.202178955078125, "step": 982 }, { "epoch": 0.1369748484637358, "grad_norm": 0.928220272064209, "learning_rate": 9.877803656843182e-06, "loss": 0.14115142822265625, "step": 983 }, { "epoch": 0.1371141921549502, "grad_norm": 1.887052297592163, "learning_rate": 9.877285258987039e-06, "loss": 0.1996612548828125, "step": 984 }, { "epoch": 0.13725353584616456, "grad_norm": 2.5797572135925293, "learning_rate": 9.876765777509463e-06, "loss": 0.1506500244140625, "step": 985 }, { "epoch": 0.13739287953737894, "grad_norm": 1.365474820137024, "learning_rate": 9.87624521252587e-06, "loss": 0.16166305541992188, "step": 986 }, { "epoch": 0.13753222322859332, "grad_norm": 1.9163709878921509, "learning_rate": 9.875723564151918e-06, "loss": 0.20726394653320312, "step": 987 }, { "epoch": 0.1376715669198077, "grad_norm": 3.031442642211914, "learning_rate": 9.875200832503505e-06, "loss": 0.15173912048339844, "step": 988 }, { "epoch": 0.13781091061102207, "grad_norm": 1.747515320777893, "learning_rate": 9.874677017696769e-06, "loss": 0.15509414672851562, "step": 989 }, { "epoch": 0.13795025430223645, "grad_norm": 3.124591112136841, "learning_rate": 9.87415211984809e-06, "loss": 0.15444183349609375, "step": 990 }, { "epoch": 0.13808959799345086, "grad_norm": 4.344653129577637, "learning_rate": 9.873626139074088e-06, "loss": 0.200927734375, "step": 991 }, { "epoch": 0.13822894168466524, "grad_norm": 3.5668184757232666, "learning_rate": 9.873099075491626e-06, "loss": 0.15292739868164062, "step": 992 }, { "epoch": 0.1383682853758796, "grad_norm": 1.1636962890625, "learning_rate": 9.872570929217804e-06, "loss": 0.13983917236328125, "step": 993 }, { "epoch": 0.138507629067094, "grad_norm": 3.482391834259033, "learning_rate": 9.872041700369965e-06, "loss": 0.18695831298828125, "step": 994 }, { "epoch": 0.13864697275830837, "grad_norm": 3.007829189300537, "learning_rate": 9.871511389065689e-06, "loss": 0.18606948852539062, "step": 995 }, { "epoch": 0.13878631644952275, "grad_norm": 1.9797812700271606, "learning_rate": 9.870979995422803e-06, "loss": 0.17000579833984375, "step": 996 }, { "epoch": 0.13892566014073712, "grad_norm": 1.4128435850143433, "learning_rate": 9.870447519559366e-06, "loss": 0.13553237915039062, "step": 997 }, { "epoch": 0.1390650038319515, "grad_norm": 3.2581892013549805, "learning_rate": 9.869913961593685e-06, "loss": 0.13753509521484375, "step": 998 }, { "epoch": 0.13920434752316588, "grad_norm": 1.9535539150238037, "learning_rate": 9.869379321644306e-06, "loss": 0.18130874633789062, "step": 999 }, { "epoch": 0.13934369121438026, "grad_norm": 1.7645772695541382, "learning_rate": 9.868843599830009e-06, "loss": 0.2299957275390625, "step": 1000 }, { "epoch": 0.13948303490559466, "grad_norm": 3.1288201808929443, "learning_rate": 9.868306796269822e-06, "loss": 0.17208099365234375, "step": 1001 }, { "epoch": 0.13962237859680904, "grad_norm": 1.8149311542510986, "learning_rate": 9.86776891108301e-06, "loss": 0.1705780029296875, "step": 1002 }, { "epoch": 0.13976172228802342, "grad_norm": 1.5944546461105347, "learning_rate": 9.86722994438908e-06, "loss": 0.21743392944335938, "step": 1003 }, { "epoch": 0.1399010659792378, "grad_norm": 2.6374082565307617, "learning_rate": 9.866689896307778e-06, "loss": 0.14109420776367188, "step": 1004 }, { "epoch": 0.14004040967045217, "grad_norm": 2.9828546047210693, "learning_rate": 9.866148766959087e-06, "loss": 0.140289306640625, "step": 1005 }, { "epoch": 0.14017975336166655, "grad_norm": 2.8784592151641846, "learning_rate": 9.865606556463239e-06, "loss": 0.14374542236328125, "step": 1006 }, { "epoch": 0.14031909705288093, "grad_norm": 1.184831976890564, "learning_rate": 9.865063264940695e-06, "loss": 0.1334991455078125, "step": 1007 }, { "epoch": 0.1404584407440953, "grad_norm": 1.1696521043777466, "learning_rate": 9.864518892512167e-06, "loss": 0.14551162719726562, "step": 1008 }, { "epoch": 0.14059778443530968, "grad_norm": 1.9448105096817017, "learning_rate": 9.863973439298597e-06, "loss": 0.1931610107421875, "step": 1009 }, { "epoch": 0.14073712812652406, "grad_norm": 2.2878305912017822, "learning_rate": 9.863426905421179e-06, "loss": 0.15793609619140625, "step": 1010 }, { "epoch": 0.14087647181773846, "grad_norm": 1.2535814046859741, "learning_rate": 9.862879291001334e-06, "loss": 0.12295150756835938, "step": 1011 }, { "epoch": 0.14101581550895284, "grad_norm": 4.273500919342041, "learning_rate": 9.862330596160732e-06, "loss": 0.2405242919921875, "step": 1012 }, { "epoch": 0.14115515920016722, "grad_norm": 2.4581451416015625, "learning_rate": 9.861780821021282e-06, "loss": 0.15462493896484375, "step": 1013 }, { "epoch": 0.1412945028913816, "grad_norm": 0.7712424993515015, "learning_rate": 9.861229965705129e-06, "loss": 0.12784957885742188, "step": 1014 }, { "epoch": 0.14143384658259597, "grad_norm": 2.748119592666626, "learning_rate": 9.86067803033466e-06, "loss": 0.20077133178710938, "step": 1015 }, { "epoch": 0.14157319027381035, "grad_norm": 1.357459545135498, "learning_rate": 9.860125015032506e-06, "loss": 0.1405181884765625, "step": 1016 }, { "epoch": 0.14171253396502473, "grad_norm": 3.3267345428466797, "learning_rate": 9.859570919921533e-06, "loss": 0.17513656616210938, "step": 1017 }, { "epoch": 0.1418518776562391, "grad_norm": 2.5599870681762695, "learning_rate": 9.859015745124844e-06, "loss": 0.14781570434570312, "step": 1018 }, { "epoch": 0.14199122134745348, "grad_norm": 2.0751545429229736, "learning_rate": 9.858459490765792e-06, "loss": 0.1556835174560547, "step": 1019 }, { "epoch": 0.14213056503866786, "grad_norm": 1.8162630796432495, "learning_rate": 9.857902156967961e-06, "loss": 0.1649169921875, "step": 1020 }, { "epoch": 0.14226990872988227, "grad_norm": 1.6497441530227661, "learning_rate": 9.857343743855178e-06, "loss": 0.1501178741455078, "step": 1021 }, { "epoch": 0.14240925242109664, "grad_norm": 3.3377108573913574, "learning_rate": 9.856784251551512e-06, "loss": 0.16289520263671875, "step": 1022 }, { "epoch": 0.14254859611231102, "grad_norm": 2.088881731033325, "learning_rate": 9.856223680181267e-06, "loss": 0.15045166015625, "step": 1023 }, { "epoch": 0.1426879398035254, "grad_norm": 2.1757631301879883, "learning_rate": 9.85566202986899e-06, "loss": 0.23987579345703125, "step": 1024 }, { "epoch": 0.14282728349473978, "grad_norm": 2.4014220237731934, "learning_rate": 9.855099300739463e-06, "loss": 0.18248367309570312, "step": 1025 }, { "epoch": 0.14296662718595415, "grad_norm": 2.041106700897217, "learning_rate": 9.854535492917718e-06, "loss": 0.14673995971679688, "step": 1026 }, { "epoch": 0.14310597087716853, "grad_norm": 1.1712578535079956, "learning_rate": 9.853970606529018e-06, "loss": 0.12004852294921875, "step": 1027 }, { "epoch": 0.1432453145683829, "grad_norm": 1.5129525661468506, "learning_rate": 9.853404641698866e-06, "loss": 0.17199325561523438, "step": 1028 }, { "epoch": 0.1433846582595973, "grad_norm": 1.6469067335128784, "learning_rate": 9.85283759855301e-06, "loss": 0.16548728942871094, "step": 1029 }, { "epoch": 0.14352400195081166, "grad_norm": 1.1625276803970337, "learning_rate": 9.852269477217428e-06, "loss": 0.1496429443359375, "step": 1030 }, { "epoch": 0.14366334564202607, "grad_norm": 1.554938554763794, "learning_rate": 9.85170027781835e-06, "loss": 0.17441558837890625, "step": 1031 }, { "epoch": 0.14380268933324045, "grad_norm": 1.0496373176574707, "learning_rate": 9.851130000482236e-06, "loss": 0.13828086853027344, "step": 1032 }, { "epoch": 0.14394203302445482, "grad_norm": 1.6966513395309448, "learning_rate": 9.85055864533579e-06, "loss": 0.15291213989257812, "step": 1033 }, { "epoch": 0.1440813767156692, "grad_norm": 2.0782687664031982, "learning_rate": 9.849986212505952e-06, "loss": 0.18770217895507812, "step": 1034 }, { "epoch": 0.14422072040688358, "grad_norm": 1.706167459487915, "learning_rate": 9.849412702119905e-06, "loss": 0.15915679931640625, "step": 1035 }, { "epoch": 0.14436006409809796, "grad_norm": 1.3127847909927368, "learning_rate": 9.848838114305069e-06, "loss": 0.17179107666015625, "step": 1036 }, { "epoch": 0.14449940778931233, "grad_norm": 2.911959171295166, "learning_rate": 9.848262449189105e-06, "loss": 0.16613006591796875, "step": 1037 }, { "epoch": 0.1446387514805267, "grad_norm": 1.5116662979125977, "learning_rate": 9.847685706899913e-06, "loss": 0.1750659942626953, "step": 1038 }, { "epoch": 0.1447780951717411, "grad_norm": 2.414057970046997, "learning_rate": 9.84710788756563e-06, "loss": 0.163177490234375, "step": 1039 }, { "epoch": 0.14491743886295547, "grad_norm": 4.228857517242432, "learning_rate": 9.846528991314638e-06, "loss": 0.1477794647216797, "step": 1040 }, { "epoch": 0.14505678255416987, "grad_norm": 1.9944995641708374, "learning_rate": 9.845949018275551e-06, "loss": 0.16678619384765625, "step": 1041 }, { "epoch": 0.14519612624538425, "grad_norm": 1.7131547927856445, "learning_rate": 9.845367968577229e-06, "loss": 0.17217254638671875, "step": 1042 }, { "epoch": 0.14533546993659863, "grad_norm": 1.2502248287200928, "learning_rate": 9.844785842348764e-06, "loss": 0.15869903564453125, "step": 1043 }, { "epoch": 0.145474813627813, "grad_norm": 2.058324098587036, "learning_rate": 9.844202639719492e-06, "loss": 0.2410125732421875, "step": 1044 }, { "epoch": 0.14561415731902738, "grad_norm": 2.0622987747192383, "learning_rate": 9.84361836081899e-06, "loss": 0.16048431396484375, "step": 1045 }, { "epoch": 0.14575350101024176, "grad_norm": 2.362863779067993, "learning_rate": 9.84303300577707e-06, "loss": 0.17258453369140625, "step": 1046 }, { "epoch": 0.14589284470145614, "grad_norm": 0.8354880809783936, "learning_rate": 9.842446574723786e-06, "loss": 0.15041732788085938, "step": 1047 }, { "epoch": 0.14603218839267051, "grad_norm": 2.829923629760742, "learning_rate": 9.841859067789425e-06, "loss": 0.17613983154296875, "step": 1048 }, { "epoch": 0.1461715320838849, "grad_norm": 3.311335802078247, "learning_rate": 9.841270485104522e-06, "loss": 0.19432449340820312, "step": 1049 }, { "epoch": 0.14631087577509927, "grad_norm": 1.0730576515197754, "learning_rate": 9.840680826799845e-06, "loss": 0.1169586181640625, "step": 1050 }, { "epoch": 0.14645021946631367, "grad_norm": 1.2906898260116577, "learning_rate": 9.840090093006403e-06, "loss": 0.16135406494140625, "step": 1051 }, { "epoch": 0.14658956315752805, "grad_norm": 1.0264382362365723, "learning_rate": 9.839498283855444e-06, "loss": 0.16068267822265625, "step": 1052 }, { "epoch": 0.14672890684874243, "grad_norm": 1.4936474561691284, "learning_rate": 9.838905399478453e-06, "loss": 0.1387939453125, "step": 1053 }, { "epoch": 0.1468682505399568, "grad_norm": 0.8178783655166626, "learning_rate": 9.838311440007159e-06, "loss": 0.13112640380859375, "step": 1054 }, { "epoch": 0.14700759423117118, "grad_norm": 2.828167200088501, "learning_rate": 9.83771640557352e-06, "loss": 0.16175460815429688, "step": 1055 }, { "epoch": 0.14714693792238556, "grad_norm": 3.3706188201904297, "learning_rate": 9.837120296309744e-06, "loss": 0.27178955078125, "step": 1056 }, { "epoch": 0.14728628161359994, "grad_norm": 2.089242696762085, "learning_rate": 9.836523112348271e-06, "loss": 0.16786766052246094, "step": 1057 }, { "epoch": 0.14742562530481432, "grad_norm": 1.5946259498596191, "learning_rate": 9.835924853821783e-06, "loss": 0.15882492065429688, "step": 1058 }, { "epoch": 0.1475649689960287, "grad_norm": 2.1155409812927246, "learning_rate": 9.8353255208632e-06, "loss": 0.17049026489257812, "step": 1059 }, { "epoch": 0.14770431268724307, "grad_norm": 1.3455859422683716, "learning_rate": 9.834725113605676e-06, "loss": 0.15898513793945312, "step": 1060 }, { "epoch": 0.14784365637845748, "grad_norm": 1.716949224472046, "learning_rate": 9.83412363218261e-06, "loss": 0.127655029296875, "step": 1061 }, { "epoch": 0.14798300006967186, "grad_norm": 1.7510020732879639, "learning_rate": 9.833521076727638e-06, "loss": 0.15483474731445312, "step": 1062 }, { "epoch": 0.14812234376088623, "grad_norm": 1.522081732749939, "learning_rate": 9.832917447374637e-06, "loss": 0.15301513671875, "step": 1063 }, { "epoch": 0.1482616874521006, "grad_norm": 1.8355700969696045, "learning_rate": 9.832312744257715e-06, "loss": 0.14631271362304688, "step": 1064 }, { "epoch": 0.148401031143315, "grad_norm": 1.1556516885757446, "learning_rate": 9.831706967511223e-06, "loss": 0.17492294311523438, "step": 1065 }, { "epoch": 0.14854037483452937, "grad_norm": 1.5612952709197998, "learning_rate": 9.831100117269755e-06, "loss": 0.19419479370117188, "step": 1066 }, { "epoch": 0.14867971852574374, "grad_norm": 1.9080356359481812, "learning_rate": 9.830492193668135e-06, "loss": 0.13305282592773438, "step": 1067 }, { "epoch": 0.14881906221695812, "grad_norm": 1.3503144979476929, "learning_rate": 9.829883196841433e-06, "loss": 0.17604827880859375, "step": 1068 }, { "epoch": 0.1489584059081725, "grad_norm": 1.409915804862976, "learning_rate": 9.829273126924952e-06, "loss": 0.14429092407226562, "step": 1069 }, { "epoch": 0.14909774959938688, "grad_norm": 2.1023433208465576, "learning_rate": 9.828661984054238e-06, "loss": 0.15804672241210938, "step": 1070 }, { "epoch": 0.14923709329060128, "grad_norm": 1.3361499309539795, "learning_rate": 9.82804976836507e-06, "loss": 0.15448760986328125, "step": 1071 }, { "epoch": 0.14937643698181566, "grad_norm": 1.9718984365463257, "learning_rate": 9.827436479993468e-06, "loss": 0.13877105712890625, "step": 1072 }, { "epoch": 0.14951578067303004, "grad_norm": 1.2452679872512817, "learning_rate": 9.826822119075694e-06, "loss": 0.11529922485351562, "step": 1073 }, { "epoch": 0.1496551243642444, "grad_norm": 1.804949164390564, "learning_rate": 9.826206685748242e-06, "loss": 0.16595458984375, "step": 1074 }, { "epoch": 0.1497944680554588, "grad_norm": 1.46828293800354, "learning_rate": 9.825590180147852e-06, "loss": 0.12575912475585938, "step": 1075 }, { "epoch": 0.14993381174667317, "grad_norm": 2.116391181945801, "learning_rate": 9.82497260241149e-06, "loss": 0.134185791015625, "step": 1076 }, { "epoch": 0.15007315543788755, "grad_norm": 1.7492636442184448, "learning_rate": 9.824353952676375e-06, "loss": 0.17179489135742188, "step": 1077 }, { "epoch": 0.15021249912910192, "grad_norm": 1.0885988473892212, "learning_rate": 9.823734231079953e-06, "loss": 0.12475967407226562, "step": 1078 }, { "epoch": 0.1503518428203163, "grad_norm": 3.5750842094421387, "learning_rate": 9.823113437759912e-06, "loss": 0.19211959838867188, "step": 1079 }, { "epoch": 0.15049118651153068, "grad_norm": 2.5545153617858887, "learning_rate": 9.822491572854178e-06, "loss": 0.18545913696289062, "step": 1080 }, { "epoch": 0.15063053020274508, "grad_norm": 1.3409606218338013, "learning_rate": 9.821868636500917e-06, "loss": 0.17647933959960938, "step": 1081 }, { "epoch": 0.15076987389395946, "grad_norm": 1.6179906129837036, "learning_rate": 9.82124462883853e-06, "loss": 0.15326309204101562, "step": 1082 }, { "epoch": 0.15090921758517384, "grad_norm": 1.9578139781951904, "learning_rate": 9.820619550005656e-06, "loss": 0.191436767578125, "step": 1083 }, { "epoch": 0.15104856127638822, "grad_norm": 1.9928569793701172, "learning_rate": 9.819993400141176e-06, "loss": 0.146148681640625, "step": 1084 }, { "epoch": 0.1511879049676026, "grad_norm": 1.779413104057312, "learning_rate": 9.819366179384204e-06, "loss": 0.16701889038085938, "step": 1085 }, { "epoch": 0.15132724865881697, "grad_norm": 0.9792899489402771, "learning_rate": 9.818737887874097e-06, "loss": 0.1484699249267578, "step": 1086 }, { "epoch": 0.15146659235003135, "grad_norm": 0.9760263562202454, "learning_rate": 9.818108525750442e-06, "loss": 0.157958984375, "step": 1087 }, { "epoch": 0.15160593604124573, "grad_norm": 1.213348150253296, "learning_rate": 9.817478093153074e-06, "loss": 0.13091278076171875, "step": 1088 }, { "epoch": 0.1517452797324601, "grad_norm": 2.9184439182281494, "learning_rate": 9.816846590222058e-06, "loss": 0.1701812744140625, "step": 1089 }, { "epoch": 0.15188462342367448, "grad_norm": 0.9072906374931335, "learning_rate": 9.8162140170977e-06, "loss": 0.14400482177734375, "step": 1090 }, { "epoch": 0.15202396711488889, "grad_norm": 1.9547216892242432, "learning_rate": 9.815580373920543e-06, "loss": 0.14972305297851562, "step": 1091 }, { "epoch": 0.15216331080610326, "grad_norm": 2.3461248874664307, "learning_rate": 9.81494566083137e-06, "loss": 0.157379150390625, "step": 1092 }, { "epoch": 0.15230265449731764, "grad_norm": 1.1087168455123901, "learning_rate": 9.814309877971195e-06, "loss": 0.16144561767578125, "step": 1093 }, { "epoch": 0.15244199818853202, "grad_norm": 1.0651090145111084, "learning_rate": 9.81367302548128e-06, "loss": 0.16064453125, "step": 1094 }, { "epoch": 0.1525813418797464, "grad_norm": 1.3139628171920776, "learning_rate": 9.813035103503116e-06, "loss": 0.15865707397460938, "step": 1095 }, { "epoch": 0.15272068557096077, "grad_norm": 1.5574569702148438, "learning_rate": 9.812396112178437e-06, "loss": 0.1580963134765625, "step": 1096 }, { "epoch": 0.15286002926217515, "grad_norm": 2.0276737213134766, "learning_rate": 9.811756051649209e-06, "loss": 0.17824935913085938, "step": 1097 }, { "epoch": 0.15299937295338953, "grad_norm": 1.630545735359192, "learning_rate": 9.811114922057642e-06, "loss": 0.184326171875, "step": 1098 }, { "epoch": 0.1531387166446039, "grad_norm": 2.0554165840148926, "learning_rate": 9.810472723546178e-06, "loss": 0.17765045166015625, "step": 1099 }, { "epoch": 0.15327806033581828, "grad_norm": 1.7224451303482056, "learning_rate": 9.8098294562575e-06, "loss": 0.14744186401367188, "step": 1100 }, { "epoch": 0.1534174040270327, "grad_norm": 1.6079154014587402, "learning_rate": 9.809185120334528e-06, "loss": 0.176605224609375, "step": 1101 }, { "epoch": 0.15355674771824707, "grad_norm": 1.829040765762329, "learning_rate": 9.808539715920415e-06, "loss": 0.18359375, "step": 1102 }, { "epoch": 0.15369609140946144, "grad_norm": 1.4046467542648315, "learning_rate": 9.807893243158562e-06, "loss": 0.15606689453125, "step": 1103 }, { "epoch": 0.15383543510067582, "grad_norm": 1.2654824256896973, "learning_rate": 9.807245702192593e-06, "loss": 0.16444778442382812, "step": 1104 }, { "epoch": 0.1539747787918902, "grad_norm": 1.512998104095459, "learning_rate": 9.80659709316638e-06, "loss": 0.15055465698242188, "step": 1105 }, { "epoch": 0.15411412248310458, "grad_norm": 1.4276597499847412, "learning_rate": 9.805947416224034e-06, "loss": 0.15139389038085938, "step": 1106 }, { "epoch": 0.15425346617431895, "grad_norm": 1.4570566415786743, "learning_rate": 9.80529667150989e-06, "loss": 0.15483856201171875, "step": 1107 }, { "epoch": 0.15439280986553333, "grad_norm": 1.378745198249817, "learning_rate": 9.804644859168534e-06, "loss": 0.12006759643554688, "step": 1108 }, { "epoch": 0.1545321535567477, "grad_norm": 1.3098984956741333, "learning_rate": 9.80399197934478e-06, "loss": 0.15525436401367188, "step": 1109 }, { "epoch": 0.1546714972479621, "grad_norm": 1.9281615018844604, "learning_rate": 9.803338032183686e-06, "loss": 0.12535476684570312, "step": 1110 }, { "epoch": 0.1548108409391765, "grad_norm": 2.7881970405578613, "learning_rate": 9.802683017830544e-06, "loss": 0.15117645263671875, "step": 1111 }, { "epoch": 0.15495018463039087, "grad_norm": 2.163752555847168, "learning_rate": 9.802026936430883e-06, "loss": 0.16022872924804688, "step": 1112 }, { "epoch": 0.15508952832160525, "grad_norm": 1.5137676000595093, "learning_rate": 9.801369788130468e-06, "loss": 0.1527538299560547, "step": 1113 }, { "epoch": 0.15522887201281962, "grad_norm": 2.6003353595733643, "learning_rate": 9.800711573075303e-06, "loss": 0.16891098022460938, "step": 1114 }, { "epoch": 0.155368215704034, "grad_norm": 1.544028639793396, "learning_rate": 9.80005229141163e-06, "loss": 0.18408584594726562, "step": 1115 }, { "epoch": 0.15550755939524838, "grad_norm": 2.4172520637512207, "learning_rate": 9.799391943285923e-06, "loss": 0.15765380859375, "step": 1116 }, { "epoch": 0.15564690308646276, "grad_norm": 3.238797187805176, "learning_rate": 9.798730528844899e-06, "loss": 0.18062210083007812, "step": 1117 }, { "epoch": 0.15578624677767713, "grad_norm": 2.541339635848999, "learning_rate": 9.79806804823551e-06, "loss": 0.17756271362304688, "step": 1118 }, { "epoch": 0.1559255904688915, "grad_norm": 1.9617035388946533, "learning_rate": 9.79740450160494e-06, "loss": 0.17780685424804688, "step": 1119 }, { "epoch": 0.1560649341601059, "grad_norm": 1.2101854085922241, "learning_rate": 9.796739889100617e-06, "loss": 0.1334075927734375, "step": 1120 }, { "epoch": 0.1562042778513203, "grad_norm": 2.111110210418701, "learning_rate": 9.796074210870204e-06, "loss": 0.21496963500976562, "step": 1121 }, { "epoch": 0.15634362154253467, "grad_norm": 2.3409016132354736, "learning_rate": 9.795407467061596e-06, "loss": 0.18873977661132812, "step": 1122 }, { "epoch": 0.15648296523374905, "grad_norm": 2.353865146636963, "learning_rate": 9.794739657822929e-06, "loss": 0.16180801391601562, "step": 1123 }, { "epoch": 0.15662230892496343, "grad_norm": 1.0820711851119995, "learning_rate": 9.794070783302576e-06, "loss": 0.14345932006835938, "step": 1124 }, { "epoch": 0.1567616526161778, "grad_norm": 1.9734961986541748, "learning_rate": 9.793400843649146e-06, "loss": 0.14302825927734375, "step": 1125 }, { "epoch": 0.15690099630739218, "grad_norm": 1.6874796152114868, "learning_rate": 9.792729839011484e-06, "loss": 0.17561721801757812, "step": 1126 }, { "epoch": 0.15704033999860656, "grad_norm": 2.1209678649902344, "learning_rate": 9.792057769538672e-06, "loss": 0.19632530212402344, "step": 1127 }, { "epoch": 0.15717968368982094, "grad_norm": 1.683135747909546, "learning_rate": 9.791384635380028e-06, "loss": 0.14852142333984375, "step": 1128 }, { "epoch": 0.15731902738103531, "grad_norm": 2.900745153427124, "learning_rate": 9.790710436685105e-06, "loss": 0.18876266479492188, "step": 1129 }, { "epoch": 0.1574583710722497, "grad_norm": 1.8865302801132202, "learning_rate": 9.790035173603699e-06, "loss": 0.15003204345703125, "step": 1130 }, { "epoch": 0.1575977147634641, "grad_norm": 1.7177621126174927, "learning_rate": 9.789358846285835e-06, "loss": 0.1744537353515625, "step": 1131 }, { "epoch": 0.15773705845467847, "grad_norm": 2.632206439971924, "learning_rate": 9.788681454881778e-06, "loss": 0.19709014892578125, "step": 1132 }, { "epoch": 0.15787640214589285, "grad_norm": 3.247283697128296, "learning_rate": 9.78800299954203e-06, "loss": 0.1829833984375, "step": 1133 }, { "epoch": 0.15801574583710723, "grad_norm": 2.283170700073242, "learning_rate": 9.787323480417328e-06, "loss": 0.14276123046875, "step": 1134 }, { "epoch": 0.1581550895283216, "grad_norm": 1.298680305480957, "learning_rate": 9.786642897658645e-06, "loss": 0.14679336547851562, "step": 1135 }, { "epoch": 0.15829443321953598, "grad_norm": 2.2157130241394043, "learning_rate": 9.78596125141719e-06, "loss": 0.16594314575195312, "step": 1136 }, { "epoch": 0.15843377691075036, "grad_norm": 4.665394306182861, "learning_rate": 9.785278541844409e-06, "loss": 0.20782852172851562, "step": 1137 }, { "epoch": 0.15857312060196474, "grad_norm": 2.2343220710754395, "learning_rate": 9.784594769091989e-06, "loss": 0.14331817626953125, "step": 1138 }, { "epoch": 0.15871246429317912, "grad_norm": 1.2509849071502686, "learning_rate": 9.783909933311844e-06, "loss": 0.14791107177734375, "step": 1139 }, { "epoch": 0.1588518079843935, "grad_norm": 2.6605048179626465, "learning_rate": 9.78322403465613e-06, "loss": 0.2061920166015625, "step": 1140 }, { "epoch": 0.1589911516756079, "grad_norm": 1.9220480918884277, "learning_rate": 9.782537073277238e-06, "loss": 0.20465850830078125, "step": 1141 }, { "epoch": 0.15913049536682228, "grad_norm": 2.3617186546325684, "learning_rate": 9.781849049327796e-06, "loss": 0.15416526794433594, "step": 1142 }, { "epoch": 0.15926983905803666, "grad_norm": 1.6613247394561768, "learning_rate": 9.781159962960667e-06, "loss": 0.16695022583007812, "step": 1143 }, { "epoch": 0.15940918274925103, "grad_norm": 2.9190685749053955, "learning_rate": 9.78046981432895e-06, "loss": 0.18732070922851562, "step": 1144 }, { "epoch": 0.1595485264404654, "grad_norm": 3.1979873180389404, "learning_rate": 9.77977860358598e-06, "loss": 0.215179443359375, "step": 1145 }, { "epoch": 0.1596878701316798, "grad_norm": 2.3394806385040283, "learning_rate": 9.779086330885328e-06, "loss": 0.17305374145507812, "step": 1146 }, { "epoch": 0.15982721382289417, "grad_norm": 4.4543914794921875, "learning_rate": 9.778392996380803e-06, "loss": 0.1705322265625, "step": 1147 }, { "epoch": 0.15996655751410854, "grad_norm": 2.7325947284698486, "learning_rate": 9.777698600226446e-06, "loss": 0.15743255615234375, "step": 1148 }, { "epoch": 0.16010590120532292, "grad_norm": 1.6601214408874512, "learning_rate": 9.777003142576536e-06, "loss": 0.14453887939453125, "step": 1149 }, { "epoch": 0.1602452448965373, "grad_norm": 1.5075210332870483, "learning_rate": 9.77630662358559e-06, "loss": 0.19239425659179688, "step": 1150 }, { "epoch": 0.1603845885877517, "grad_norm": 1.471432089805603, "learning_rate": 9.775609043408356e-06, "loss": 0.1717071533203125, "step": 1151 }, { "epoch": 0.16052393227896608, "grad_norm": 1.6575008630752563, "learning_rate": 9.774910402199821e-06, "loss": 0.1465606689453125, "step": 1152 }, { "epoch": 0.16066327597018046, "grad_norm": 1.7197988033294678, "learning_rate": 9.774210700115209e-06, "loss": 0.17126846313476562, "step": 1153 }, { "epoch": 0.16080261966139484, "grad_norm": 2.0815441608428955, "learning_rate": 9.773509937309978e-06, "loss": 0.18928909301757812, "step": 1154 }, { "epoch": 0.1609419633526092, "grad_norm": 1.8402706384658813, "learning_rate": 9.772808113939819e-06, "loss": 0.20426177978515625, "step": 1155 }, { "epoch": 0.1610813070438236, "grad_norm": 1.4692069292068481, "learning_rate": 9.77210523016066e-06, "loss": 0.14171600341796875, "step": 1156 }, { "epoch": 0.16122065073503797, "grad_norm": 1.276181697845459, "learning_rate": 9.771401286128668e-06, "loss": 0.14428329467773438, "step": 1157 }, { "epoch": 0.16135999442625235, "grad_norm": 3.925196409225464, "learning_rate": 9.770696282000245e-06, "loss": 0.17850112915039062, "step": 1158 }, { "epoch": 0.16149933811746672, "grad_norm": 3.423738479614258, "learning_rate": 9.769990217932023e-06, "loss": 0.16177749633789062, "step": 1159 }, { "epoch": 0.1616386818086811, "grad_norm": 1.932698369026184, "learning_rate": 9.769283094080878e-06, "loss": 0.19432830810546875, "step": 1160 }, { "epoch": 0.1617780254998955, "grad_norm": 1.6066175699234009, "learning_rate": 9.768574910603912e-06, "loss": 0.18822097778320312, "step": 1161 }, { "epoch": 0.16191736919110988, "grad_norm": 1.6031935214996338, "learning_rate": 9.767865667658472e-06, "loss": 0.11801528930664062, "step": 1162 }, { "epoch": 0.16205671288232426, "grad_norm": 3.1268208026885986, "learning_rate": 9.76715536540213e-06, "loss": 0.1611175537109375, "step": 1163 }, { "epoch": 0.16219605657353864, "grad_norm": 1.4019395112991333, "learning_rate": 9.766444003992704e-06, "loss": 0.14422607421875, "step": 1164 }, { "epoch": 0.16233540026475302, "grad_norm": 3.075521469116211, "learning_rate": 9.765731583588237e-06, "loss": 0.18520355224609375, "step": 1165 }, { "epoch": 0.1624747439559674, "grad_norm": 3.807288885116577, "learning_rate": 9.765018104347017e-06, "loss": 0.20442962646484375, "step": 1166 }, { "epoch": 0.16261408764718177, "grad_norm": 2.794682741165161, "learning_rate": 9.764303566427561e-06, "loss": 0.17717742919921875, "step": 1167 }, { "epoch": 0.16275343133839615, "grad_norm": 0.7282239198684692, "learning_rate": 9.763587969988626e-06, "loss": 0.13478469848632812, "step": 1168 }, { "epoch": 0.16289277502961053, "grad_norm": 3.977158784866333, "learning_rate": 9.762871315189198e-06, "loss": 0.21076393127441406, "step": 1169 }, { "epoch": 0.1630321187208249, "grad_norm": 2.815103054046631, "learning_rate": 9.7621536021885e-06, "loss": 0.19240760803222656, "step": 1170 }, { "epoch": 0.1631714624120393, "grad_norm": 1.5859243869781494, "learning_rate": 9.761434831145995e-06, "loss": 0.13890838623046875, "step": 1171 }, { "epoch": 0.1633108061032537, "grad_norm": 1.998612642288208, "learning_rate": 9.760715002221375e-06, "loss": 0.14713287353515625, "step": 1172 }, { "epoch": 0.16345014979446806, "grad_norm": 2.0079848766326904, "learning_rate": 9.759994115574571e-06, "loss": 0.1665782928466797, "step": 1173 }, { "epoch": 0.16358949348568244, "grad_norm": 2.751070022583008, "learning_rate": 9.759272171365746e-06, "loss": 0.17601776123046875, "step": 1174 }, { "epoch": 0.16372883717689682, "grad_norm": 1.214308500289917, "learning_rate": 9.758549169755302e-06, "loss": 0.15247535705566406, "step": 1175 }, { "epoch": 0.1638681808681112, "grad_norm": 1.9956183433532715, "learning_rate": 9.757825110903872e-06, "loss": 0.13373565673828125, "step": 1176 }, { "epoch": 0.16400752455932557, "grad_norm": 2.276353597640991, "learning_rate": 9.757099994972323e-06, "loss": 0.16447830200195312, "step": 1177 }, { "epoch": 0.16414686825053995, "grad_norm": 1.9363571405410767, "learning_rate": 9.756373822121762e-06, "loss": 0.15817642211914062, "step": 1178 }, { "epoch": 0.16428621194175433, "grad_norm": 2.0977272987365723, "learning_rate": 9.75564659251353e-06, "loss": 0.2096405029296875, "step": 1179 }, { "epoch": 0.1644255556329687, "grad_norm": 1.6106435060501099, "learning_rate": 9.754918306309197e-06, "loss": 0.15176773071289062, "step": 1180 }, { "epoch": 0.16456489932418308, "grad_norm": 2.064776659011841, "learning_rate": 9.754188963670573e-06, "loss": 0.12082290649414062, "step": 1181 }, { "epoch": 0.1647042430153975, "grad_norm": 3.9722414016723633, "learning_rate": 9.753458564759701e-06, "loss": 0.20653152465820312, "step": 1182 }, { "epoch": 0.16484358670661187, "grad_norm": 2.0973334312438965, "learning_rate": 9.752727109738859e-06, "loss": 0.1825714111328125, "step": 1183 }, { "epoch": 0.16498293039782624, "grad_norm": 1.515430212020874, "learning_rate": 9.751994598770563e-06, "loss": 0.13472557067871094, "step": 1184 }, { "epoch": 0.16512227408904062, "grad_norm": 3.2159111499786377, "learning_rate": 9.751261032017553e-06, "loss": 0.18472671508789062, "step": 1185 }, { "epoch": 0.165261617780255, "grad_norm": 2.2937817573547363, "learning_rate": 9.750526409642818e-06, "loss": 0.140716552734375, "step": 1186 }, { "epoch": 0.16540096147146938, "grad_norm": 3.2003040313720703, "learning_rate": 9.749790731809568e-06, "loss": 0.15610122680664062, "step": 1187 }, { "epoch": 0.16554030516268375, "grad_norm": 2.466876268386841, "learning_rate": 9.74905399868126e-06, "loss": 0.1890411376953125, "step": 1188 }, { "epoch": 0.16567964885389813, "grad_norm": 0.9818329811096191, "learning_rate": 9.748316210421573e-06, "loss": 0.1553192138671875, "step": 1189 }, { "epoch": 0.1658189925451125, "grad_norm": 0.8665810823440552, "learning_rate": 9.747577367194432e-06, "loss": 0.13246536254882812, "step": 1190 }, { "epoch": 0.1659583362363269, "grad_norm": 1.8701764345169067, "learning_rate": 9.74683746916399e-06, "loss": 0.12420654296875, "step": 1191 }, { "epoch": 0.1660976799275413, "grad_norm": 2.1368744373321533, "learning_rate": 9.746096516494632e-06, "loss": 0.14170074462890625, "step": 1192 }, { "epoch": 0.16623702361875567, "grad_norm": 1.7916346788406372, "learning_rate": 9.745354509350983e-06, "loss": 0.15533065795898438, "step": 1193 }, { "epoch": 0.16637636730997005, "grad_norm": 1.336715817451477, "learning_rate": 9.744611447897902e-06, "loss": 0.1376800537109375, "step": 1194 }, { "epoch": 0.16651571100118442, "grad_norm": 1.538220763206482, "learning_rate": 9.743867332300478e-06, "loss": 0.135650634765625, "step": 1195 }, { "epoch": 0.1666550546923988, "grad_norm": 2.4073879718780518, "learning_rate": 9.743122162724038e-06, "loss": 0.16202926635742188, "step": 1196 }, { "epoch": 0.16679439838361318, "grad_norm": 3.1029317378997803, "learning_rate": 9.742375939334141e-06, "loss": 0.1716461181640625, "step": 1197 }, { "epoch": 0.16693374207482756, "grad_norm": 2.0038387775421143, "learning_rate": 9.74162866229658e-06, "loss": 0.15180015563964844, "step": 1198 }, { "epoch": 0.16707308576604193, "grad_norm": 1.2777700424194336, "learning_rate": 9.740880331777383e-06, "loss": 0.13356781005859375, "step": 1199 }, { "epoch": 0.1672124294572563, "grad_norm": 2.4775354862213135, "learning_rate": 9.740130947942812e-06, "loss": 0.17286300659179688, "step": 1200 }, { "epoch": 0.1673517731484707, "grad_norm": 1.310405969619751, "learning_rate": 9.739380510959365e-06, "loss": 0.15337371826171875, "step": 1201 }, { "epoch": 0.1674911168396851, "grad_norm": 0.9951294660568237, "learning_rate": 9.738629020993769e-06, "loss": 0.1262969970703125, "step": 1202 }, { "epoch": 0.16763046053089947, "grad_norm": 1.5657566785812378, "learning_rate": 9.737876478212989e-06, "loss": 0.17194366455078125, "step": 1203 }, { "epoch": 0.16776980422211385, "grad_norm": 0.9957324266433716, "learning_rate": 9.737122882784225e-06, "loss": 0.14297103881835938, "step": 1204 }, { "epoch": 0.16790914791332823, "grad_norm": 1.7303982973098755, "learning_rate": 9.736368234874904e-06, "loss": 0.15924072265625, "step": 1205 }, { "epoch": 0.1680484916045426, "grad_norm": 1.702060580253601, "learning_rate": 9.735612534652697e-06, "loss": 0.13753890991210938, "step": 1206 }, { "epoch": 0.16818783529575698, "grad_norm": 1.224490761756897, "learning_rate": 9.734855782285499e-06, "loss": 0.11861038208007812, "step": 1207 }, { "epoch": 0.16832717898697136, "grad_norm": 3.710127115249634, "learning_rate": 9.734097977941446e-06, "loss": 0.18414688110351562, "step": 1208 }, { "epoch": 0.16846652267818574, "grad_norm": 2.922307014465332, "learning_rate": 9.733339121788903e-06, "loss": 0.18619918823242188, "step": 1209 }, { "epoch": 0.16860586636940011, "grad_norm": 3.299457311630249, "learning_rate": 9.73257921399647e-06, "loss": 0.18320465087890625, "step": 1210 }, { "epoch": 0.1687452100606145, "grad_norm": 2.434950351715088, "learning_rate": 9.731818254732983e-06, "loss": 0.1601104736328125, "step": 1211 }, { "epoch": 0.1688845537518289, "grad_norm": 0.8590441942214966, "learning_rate": 9.73105624416751e-06, "loss": 0.13120269775390625, "step": 1212 }, { "epoch": 0.16902389744304328, "grad_norm": 1.4772672653198242, "learning_rate": 9.73029318246935e-06, "loss": 0.159454345703125, "step": 1213 }, { "epoch": 0.16916324113425765, "grad_norm": 1.4206522703170776, "learning_rate": 9.72952906980804e-06, "loss": 0.15266799926757812, "step": 1214 }, { "epoch": 0.16930258482547203, "grad_norm": 2.5458803176879883, "learning_rate": 9.72876390635335e-06, "loss": 0.18542098999023438, "step": 1215 }, { "epoch": 0.1694419285166864, "grad_norm": 2.053262948989868, "learning_rate": 9.727997692275275e-06, "loss": 0.147430419921875, "step": 1216 }, { "epoch": 0.16958127220790079, "grad_norm": 1.345496654510498, "learning_rate": 9.727230427744058e-06, "loss": 0.18023681640625, "step": 1217 }, { "epoch": 0.16972061589911516, "grad_norm": 1.8458967208862305, "learning_rate": 9.726462112930165e-06, "loss": 0.18469619750976562, "step": 1218 }, { "epoch": 0.16985995959032954, "grad_norm": 1.0481932163238525, "learning_rate": 9.725692748004295e-06, "loss": 0.1485137939453125, "step": 1219 }, { "epoch": 0.16999930328154392, "grad_norm": 2.241281747817993, "learning_rate": 9.724922333137385e-06, "loss": 0.16415786743164062, "step": 1220 }, { "epoch": 0.1701386469727583, "grad_norm": 2.18265438079834, "learning_rate": 9.724150868500607e-06, "loss": 0.15816879272460938, "step": 1221 }, { "epoch": 0.1702779906639727, "grad_norm": 3.430205821990967, "learning_rate": 9.72337835426536e-06, "loss": 0.17266464233398438, "step": 1222 }, { "epoch": 0.17041733435518708, "grad_norm": 1.9153364896774292, "learning_rate": 9.722604790603279e-06, "loss": 0.16474151611328125, "step": 1223 }, { "epoch": 0.17055667804640146, "grad_norm": 1.087502121925354, "learning_rate": 9.721830177686231e-06, "loss": 0.11590576171875, "step": 1224 }, { "epoch": 0.17069602173761583, "grad_norm": 0.8266100883483887, "learning_rate": 9.72105451568632e-06, "loss": 0.10791397094726562, "step": 1225 }, { "epoch": 0.1708353654288302, "grad_norm": 2.9448678493499756, "learning_rate": 9.720277804775879e-06, "loss": 0.14180755615234375, "step": 1226 }, { "epoch": 0.1709747091200446, "grad_norm": 2.1834280490875244, "learning_rate": 9.719500045127475e-06, "loss": 0.16494369506835938, "step": 1227 }, { "epoch": 0.17111405281125897, "grad_norm": 2.960143566131592, "learning_rate": 9.718721236913909e-06, "loss": 0.16629791259765625, "step": 1228 }, { "epoch": 0.17125339650247334, "grad_norm": 3.899885416030884, "learning_rate": 9.717941380308216e-06, "loss": 0.20075607299804688, "step": 1229 }, { "epoch": 0.17139274019368772, "grad_norm": 2.1538243293762207, "learning_rate": 9.717160475483659e-06, "loss": 0.17001724243164062, "step": 1230 }, { "epoch": 0.1715320838849021, "grad_norm": 2.1707303524017334, "learning_rate": 9.71637852261374e-06, "loss": 0.20822906494140625, "step": 1231 }, { "epoch": 0.1716714275761165, "grad_norm": 1.729801893234253, "learning_rate": 9.71559552187219e-06, "loss": 0.14535903930664062, "step": 1232 }, { "epoch": 0.17181077126733088, "grad_norm": 1.9941973686218262, "learning_rate": 9.714811473432973e-06, "loss": 0.14417457580566406, "step": 1233 }, { "epoch": 0.17195011495854526, "grad_norm": 1.9248722791671753, "learning_rate": 9.714026377470287e-06, "loss": 0.1340789794921875, "step": 1234 }, { "epoch": 0.17208945864975964, "grad_norm": 5.1353559494018555, "learning_rate": 9.713240234158565e-06, "loss": 0.189727783203125, "step": 1235 }, { "epoch": 0.172228802340974, "grad_norm": 1.1454031467437744, "learning_rate": 9.712453043672467e-06, "loss": 0.12961196899414062, "step": 1236 }, { "epoch": 0.1723681460321884, "grad_norm": 1.5528666973114014, "learning_rate": 9.71166480618689e-06, "loss": 0.14254188537597656, "step": 1237 }, { "epoch": 0.17250748972340277, "grad_norm": 1.7323684692382812, "learning_rate": 9.71087552187696e-06, "loss": 0.17959213256835938, "step": 1238 }, { "epoch": 0.17264683341461715, "grad_norm": 1.3027900457382202, "learning_rate": 9.710085190918044e-06, "loss": 0.13853073120117188, "step": 1239 }, { "epoch": 0.17278617710583152, "grad_norm": 2.121694326400757, "learning_rate": 9.70929381348573e-06, "loss": 0.17083740234375, "step": 1240 }, { "epoch": 0.1729255207970459, "grad_norm": 1.714451789855957, "learning_rate": 9.708501389755846e-06, "loss": 0.15357208251953125, "step": 1241 }, { "epoch": 0.1730648644882603, "grad_norm": 2.0837841033935547, "learning_rate": 9.70770791990445e-06, "loss": 0.15690994262695312, "step": 1242 }, { "epoch": 0.17320420817947468, "grad_norm": 1.7127833366394043, "learning_rate": 9.706913404107832e-06, "loss": 0.17430877685546875, "step": 1243 }, { "epoch": 0.17334355187068906, "grad_norm": 1.08486008644104, "learning_rate": 9.706117842542517e-06, "loss": 0.15871047973632812, "step": 1244 }, { "epoch": 0.17348289556190344, "grad_norm": 1.6974997520446777, "learning_rate": 9.70532123538526e-06, "loss": 0.1478729248046875, "step": 1245 }, { "epoch": 0.17362223925311782, "grad_norm": 2.1847708225250244, "learning_rate": 9.704523582813049e-06, "loss": 0.1685638427734375, "step": 1246 }, { "epoch": 0.1737615829443322, "grad_norm": 1.2166004180908203, "learning_rate": 9.703724885003102e-06, "loss": 0.16637420654296875, "step": 1247 }, { "epoch": 0.17390092663554657, "grad_norm": 1.0119553804397583, "learning_rate": 9.702925142132876e-06, "loss": 0.14287948608398438, "step": 1248 }, { "epoch": 0.17404027032676095, "grad_norm": 0.9648314118385315, "learning_rate": 9.70212435438005e-06, "loss": 0.13396072387695312, "step": 1249 }, { "epoch": 0.17417961401797533, "grad_norm": 0.9204569458961487, "learning_rate": 9.701322521922549e-06, "loss": 0.14498329162597656, "step": 1250 }, { "epoch": 0.1743189577091897, "grad_norm": 2.2281999588012695, "learning_rate": 9.700519644938513e-06, "loss": 0.22162628173828125, "step": 1251 }, { "epoch": 0.1744583014004041, "grad_norm": 1.5827677249908447, "learning_rate": 9.699715723606327e-06, "loss": 0.1816253662109375, "step": 1252 }, { "epoch": 0.1745976450916185, "grad_norm": 1.1503466367721558, "learning_rate": 9.698910758104603e-06, "loss": 0.14383697509765625, "step": 1253 }, { "epoch": 0.17473698878283286, "grad_norm": 2.7937090396881104, "learning_rate": 9.698104748612187e-06, "loss": 0.22759246826171875, "step": 1254 }, { "epoch": 0.17487633247404724, "grad_norm": 2.2656784057617188, "learning_rate": 9.697297695308157e-06, "loss": 0.20268630981445312, "step": 1255 }, { "epoch": 0.17501567616526162, "grad_norm": 1.017861008644104, "learning_rate": 9.696489598371817e-06, "loss": 0.11984634399414062, "step": 1256 }, { "epoch": 0.175155019856476, "grad_norm": 1.9544509649276733, "learning_rate": 9.695680457982713e-06, "loss": 0.16543197631835938, "step": 1257 }, { "epoch": 0.17529436354769037, "grad_norm": 1.8556008338928223, "learning_rate": 9.694870274320616e-06, "loss": 0.13031005859375, "step": 1258 }, { "epoch": 0.17543370723890475, "grad_norm": 1.5780470371246338, "learning_rate": 9.694059047565529e-06, "loss": 0.19327926635742188, "step": 1259 }, { "epoch": 0.17557305093011913, "grad_norm": 2.0753908157348633, "learning_rate": 9.69324677789769e-06, "loss": 0.12904739379882812, "step": 1260 }, { "epoch": 0.1757123946213335, "grad_norm": 1.324973225593567, "learning_rate": 9.692433465497562e-06, "loss": 0.09717559814453125, "step": 1261 }, { "epoch": 0.1758517383125479, "grad_norm": 1.17978036403656, "learning_rate": 9.69161911054585e-06, "loss": 0.17218780517578125, "step": 1262 }, { "epoch": 0.1759910820037623, "grad_norm": 1.3670073747634888, "learning_rate": 9.690803713223485e-06, "loss": 0.17427825927734375, "step": 1263 }, { "epoch": 0.17613042569497667, "grad_norm": 1.219565987586975, "learning_rate": 9.689987273711626e-06, "loss": 0.14675331115722656, "step": 1264 }, { "epoch": 0.17626976938619104, "grad_norm": 1.0944852828979492, "learning_rate": 9.68916979219167e-06, "loss": 0.15585708618164062, "step": 1265 }, { "epoch": 0.17640911307740542, "grad_norm": 0.7088665962219238, "learning_rate": 9.68835126884524e-06, "loss": 0.13094711303710938, "step": 1266 }, { "epoch": 0.1765484567686198, "grad_norm": 2.1421988010406494, "learning_rate": 9.687531703854196e-06, "loss": 0.18642425537109375, "step": 1267 }, { "epoch": 0.17668780045983418, "grad_norm": 0.9984857439994812, "learning_rate": 9.686711097400625e-06, "loss": 0.14206695556640625, "step": 1268 }, { "epoch": 0.17682714415104855, "grad_norm": 1.5703125, "learning_rate": 9.685889449666849e-06, "loss": 0.16051864624023438, "step": 1269 }, { "epoch": 0.17696648784226293, "grad_norm": 1.200579285621643, "learning_rate": 9.685066760835417e-06, "loss": 0.18584060668945312, "step": 1270 }, { "epoch": 0.1771058315334773, "grad_norm": 1.8695416450500488, "learning_rate": 9.684243031089113e-06, "loss": 0.1881561279296875, "step": 1271 }, { "epoch": 0.17724517522469171, "grad_norm": 1.759854793548584, "learning_rate": 9.68341826061095e-06, "loss": 0.16440582275390625, "step": 1272 }, { "epoch": 0.1773845189159061, "grad_norm": 2.4739773273468018, "learning_rate": 9.682592449584174e-06, "loss": 0.17210006713867188, "step": 1273 }, { "epoch": 0.17752386260712047, "grad_norm": 1.171352505683899, "learning_rate": 9.68176559819226e-06, "loss": 0.16967010498046875, "step": 1274 }, { "epoch": 0.17766320629833485, "grad_norm": 2.045830011367798, "learning_rate": 9.680937706618919e-06, "loss": 0.18562698364257812, "step": 1275 }, { "epoch": 0.17780254998954922, "grad_norm": 1.0994244813919067, "learning_rate": 9.680108775048087e-06, "loss": 0.134002685546875, "step": 1276 }, { "epoch": 0.1779418936807636, "grad_norm": 1.2078194618225098, "learning_rate": 9.679278803663932e-06, "loss": 0.14901351928710938, "step": 1277 }, { "epoch": 0.17808123737197798, "grad_norm": 3.05073618888855, "learning_rate": 9.678447792650858e-06, "loss": 0.1663970947265625, "step": 1278 }, { "epoch": 0.17822058106319236, "grad_norm": 3.2758538722991943, "learning_rate": 9.677615742193495e-06, "loss": 0.15040969848632812, "step": 1279 }, { "epoch": 0.17835992475440673, "grad_norm": 1.8737547397613525, "learning_rate": 9.676782652476705e-06, "loss": 0.13570785522460938, "step": 1280 }, { "epoch": 0.1784992684456211, "grad_norm": 1.7658125162124634, "learning_rate": 9.675948523685583e-06, "loss": 0.17388534545898438, "step": 1281 }, { "epoch": 0.17863861213683552, "grad_norm": 1.547401785850525, "learning_rate": 9.675113356005453e-06, "loss": 0.15674591064453125, "step": 1282 }, { "epoch": 0.1787779558280499, "grad_norm": 1.7106633186340332, "learning_rate": 9.674277149621869e-06, "loss": 0.1664581298828125, "step": 1283 }, { "epoch": 0.17891729951926427, "grad_norm": 1.8257477283477783, "learning_rate": 9.673439904720619e-06, "loss": 0.18884658813476562, "step": 1284 }, { "epoch": 0.17905664321047865, "grad_norm": 1.8508590459823608, "learning_rate": 9.672601621487718e-06, "loss": 0.15494918823242188, "step": 1285 }, { "epoch": 0.17919598690169303, "grad_norm": 1.7812292575836182, "learning_rate": 9.671762300109415e-06, "loss": 0.14819717407226562, "step": 1286 }, { "epoch": 0.1793353305929074, "grad_norm": 4.192465305328369, "learning_rate": 9.670921940772186e-06, "loss": 0.2348480224609375, "step": 1287 }, { "epoch": 0.17947467428412178, "grad_norm": 2.4257774353027344, "learning_rate": 9.670080543662742e-06, "loss": 0.20477676391601562, "step": 1288 }, { "epoch": 0.17961401797533616, "grad_norm": 1.7300976514816284, "learning_rate": 9.669238108968018e-06, "loss": 0.19544219970703125, "step": 1289 }, { "epoch": 0.17975336166655054, "grad_norm": 1.2839698791503906, "learning_rate": 9.668394636875188e-06, "loss": 0.178558349609375, "step": 1290 }, { "epoch": 0.17989270535776491, "grad_norm": 1.0777182579040527, "learning_rate": 9.667550127571653e-06, "loss": 0.1534576416015625, "step": 1291 }, { "epoch": 0.18003204904897932, "grad_norm": 3.6717684268951416, "learning_rate": 9.666704581245041e-06, "loss": 0.21076202392578125, "step": 1292 }, { "epoch": 0.1801713927401937, "grad_norm": 1.5892778635025024, "learning_rate": 9.665857998083212e-06, "loss": 0.16297531127929688, "step": 1293 }, { "epoch": 0.18031073643140808, "grad_norm": 2.223853349685669, "learning_rate": 9.66501037827426e-06, "loss": 0.1901702880859375, "step": 1294 }, { "epoch": 0.18045008012262245, "grad_norm": 1.611809253692627, "learning_rate": 9.664161722006506e-06, "loss": 0.135406494140625, "step": 1295 }, { "epoch": 0.18058942381383683, "grad_norm": 2.137739896774292, "learning_rate": 9.663312029468504e-06, "loss": 0.17818069458007812, "step": 1296 }, { "epoch": 0.1807287675050512, "grad_norm": 0.7015976309776306, "learning_rate": 9.662461300849031e-06, "loss": 0.12036895751953125, "step": 1297 }, { "epoch": 0.18086811119626559, "grad_norm": 2.2903521060943604, "learning_rate": 9.661609536337104e-06, "loss": 0.19094085693359375, "step": 1298 }, { "epoch": 0.18100745488747996, "grad_norm": 3.1924960613250732, "learning_rate": 9.660756736121964e-06, "loss": 0.19161605834960938, "step": 1299 }, { "epoch": 0.18114679857869434, "grad_norm": 1.9924358129501343, "learning_rate": 9.659902900393086e-06, "loss": 0.14381790161132812, "step": 1300 }, { "epoch": 0.18128614226990872, "grad_norm": 1.737860918045044, "learning_rate": 9.659048029340169e-06, "loss": 0.1674346923828125, "step": 1301 }, { "epoch": 0.18142548596112312, "grad_norm": 1.3991296291351318, "learning_rate": 9.658192123153149e-06, "loss": 0.134918212890625, "step": 1302 }, { "epoch": 0.1815648296523375, "grad_norm": 1.9684433937072754, "learning_rate": 9.657335182022187e-06, "loss": 0.17574310302734375, "step": 1303 }, { "epoch": 0.18170417334355188, "grad_norm": 2.2225775718688965, "learning_rate": 9.656477206137675e-06, "loss": 0.18767929077148438, "step": 1304 }, { "epoch": 0.18184351703476626, "grad_norm": 3.0993690490722656, "learning_rate": 9.655618195690239e-06, "loss": 0.22434234619140625, "step": 1305 }, { "epoch": 0.18198286072598063, "grad_norm": 1.9684576988220215, "learning_rate": 9.654758150870728e-06, "loss": 0.16643142700195312, "step": 1306 }, { "epoch": 0.182122204417195, "grad_norm": 1.6655834913253784, "learning_rate": 9.653897071870226e-06, "loss": 0.18372344970703125, "step": 1307 }, { "epoch": 0.1822615481084094, "grad_norm": 2.0431487560272217, "learning_rate": 9.653034958880045e-06, "loss": 0.17168045043945312, "step": 1308 }, { "epoch": 0.18240089179962377, "grad_norm": 1.314318299293518, "learning_rate": 9.652171812091728e-06, "loss": 0.1739349365234375, "step": 1309 }, { "epoch": 0.18254023549083814, "grad_norm": 1.6611413955688477, "learning_rate": 9.651307631697044e-06, "loss": 0.17461776733398438, "step": 1310 }, { "epoch": 0.18267957918205252, "grad_norm": 2.0319957733154297, "learning_rate": 9.650442417887995e-06, "loss": 0.14877700805664062, "step": 1311 }, { "epoch": 0.18281892287326693, "grad_norm": 1.4952280521392822, "learning_rate": 9.649576170856814e-06, "loss": 0.13717269897460938, "step": 1312 }, { "epoch": 0.1829582665644813, "grad_norm": 1.1813178062438965, "learning_rate": 9.64870889079596e-06, "loss": 0.15247726440429688, "step": 1313 }, { "epoch": 0.18309761025569568, "grad_norm": 0.87452232837677, "learning_rate": 9.64784057789812e-06, "loss": 0.1492919921875, "step": 1314 }, { "epoch": 0.18323695394691006, "grad_norm": 1.396746039390564, "learning_rate": 9.646971232356215e-06, "loss": 0.18624114990234375, "step": 1315 }, { "epoch": 0.18337629763812444, "grad_norm": 0.9305240511894226, "learning_rate": 9.646100854363396e-06, "loss": 0.13377761840820312, "step": 1316 }, { "epoch": 0.1835156413293388, "grad_norm": 5.897001266479492, "learning_rate": 9.64522944411304e-06, "loss": 0.19167327880859375, "step": 1317 }, { "epoch": 0.1836549850205532, "grad_norm": 3.048222064971924, "learning_rate": 9.644357001798752e-06, "loss": 0.15433502197265625, "step": 1318 }, { "epoch": 0.18379432871176757, "grad_norm": 2.122662305831909, "learning_rate": 9.643483527614372e-06, "loss": 0.12639236450195312, "step": 1319 }, { "epoch": 0.18393367240298195, "grad_norm": 2.8852431774139404, "learning_rate": 9.642609021753964e-06, "loss": 0.16657638549804688, "step": 1320 }, { "epoch": 0.18407301609419632, "grad_norm": 0.9957558512687683, "learning_rate": 9.641733484411823e-06, "loss": 0.1376781463623047, "step": 1321 }, { "epoch": 0.18421235978541073, "grad_norm": 2.5902421474456787, "learning_rate": 9.640856915782477e-06, "loss": 0.18045425415039062, "step": 1322 }, { "epoch": 0.1843517034766251, "grad_norm": 1.2549952268600464, "learning_rate": 9.639979316060675e-06, "loss": 0.17255783081054688, "step": 1323 }, { "epoch": 0.18449104716783948, "grad_norm": 2.5844783782958984, "learning_rate": 9.639100685441403e-06, "loss": 0.14607620239257812, "step": 1324 }, { "epoch": 0.18463039085905386, "grad_norm": 1.3560677766799927, "learning_rate": 9.638221024119869e-06, "loss": 0.1667633056640625, "step": 1325 }, { "epoch": 0.18476973455026824, "grad_norm": 2.3691773414611816, "learning_rate": 9.637340332291518e-06, "loss": 0.15279388427734375, "step": 1326 }, { "epoch": 0.18490907824148262, "grad_norm": 2.713993787765503, "learning_rate": 9.636458610152015e-06, "loss": 0.18236160278320312, "step": 1327 }, { "epoch": 0.185048421932697, "grad_norm": 1.8823298215866089, "learning_rate": 9.635575857897264e-06, "loss": 0.14734649658203125, "step": 1328 }, { "epoch": 0.18518776562391137, "grad_norm": 0.72788405418396, "learning_rate": 9.634692075723386e-06, "loss": 0.12579727172851562, "step": 1329 }, { "epoch": 0.18532710931512575, "grad_norm": 1.3659242391586304, "learning_rate": 9.633807263826745e-06, "loss": 0.167266845703125, "step": 1330 }, { "epoch": 0.18546645300634013, "grad_norm": 1.067946195602417, "learning_rate": 9.632921422403918e-06, "loss": 0.15363693237304688, "step": 1331 }, { "epoch": 0.18560579669755453, "grad_norm": 1.2575942277908325, "learning_rate": 9.632034551651723e-06, "loss": 0.19350433349609375, "step": 1332 }, { "epoch": 0.1857451403887689, "grad_norm": 2.645829916000366, "learning_rate": 9.631146651767202e-06, "loss": 0.1561126708984375, "step": 1333 }, { "epoch": 0.1858844840799833, "grad_norm": 2.715977907180786, "learning_rate": 9.630257722947625e-06, "loss": 0.15780258178710938, "step": 1334 }, { "epoch": 0.18602382777119766, "grad_norm": 1.24515700340271, "learning_rate": 9.629367765390494e-06, "loss": 0.14079666137695312, "step": 1335 }, { "epoch": 0.18616317146241204, "grad_norm": 1.0759406089782715, "learning_rate": 9.628476779293536e-06, "loss": 0.13768768310546875, "step": 1336 }, { "epoch": 0.18630251515362642, "grad_norm": 0.7526584267616272, "learning_rate": 9.627584764854706e-06, "loss": 0.11240768432617188, "step": 1337 }, { "epoch": 0.1864418588448408, "grad_norm": 1.5964198112487793, "learning_rate": 9.626691722272193e-06, "loss": 0.16495513916015625, "step": 1338 }, { "epoch": 0.18658120253605517, "grad_norm": 1.9611303806304932, "learning_rate": 9.625797651744406e-06, "loss": 0.1271820068359375, "step": 1339 }, { "epoch": 0.18672054622726955, "grad_norm": 2.0626463890075684, "learning_rate": 9.62490255346999e-06, "loss": 0.15114212036132812, "step": 1340 }, { "epoch": 0.18685988991848393, "grad_norm": 1.7488682270050049, "learning_rate": 9.624006427647817e-06, "loss": 0.16213226318359375, "step": 1341 }, { "epoch": 0.18699923360969833, "grad_norm": 1.1200346946716309, "learning_rate": 9.623109274476982e-06, "loss": 0.14696884155273438, "step": 1342 }, { "epoch": 0.1871385773009127, "grad_norm": 1.4526113271713257, "learning_rate": 9.622211094156812e-06, "loss": 0.15998458862304688, "step": 1343 }, { "epoch": 0.1872779209921271, "grad_norm": 1.6746861934661865, "learning_rate": 9.621311886886866e-06, "loss": 0.21938705444335938, "step": 1344 }, { "epoch": 0.18741726468334147, "grad_norm": 0.985698401927948, "learning_rate": 9.620411652866926e-06, "loss": 0.1636219024658203, "step": 1345 }, { "epoch": 0.18755660837455584, "grad_norm": 1.863769769668579, "learning_rate": 9.619510392297e-06, "loss": 0.18066024780273438, "step": 1346 }, { "epoch": 0.18769595206577022, "grad_norm": 1.101658582687378, "learning_rate": 9.618608105377331e-06, "loss": 0.1632862091064453, "step": 1347 }, { "epoch": 0.1878352957569846, "grad_norm": 1.8523517847061157, "learning_rate": 9.617704792308387e-06, "loss": 0.16779708862304688, "step": 1348 }, { "epoch": 0.18797463944819898, "grad_norm": 0.7971680760383606, "learning_rate": 9.61680045329086e-06, "loss": 0.1350250244140625, "step": 1349 }, { "epoch": 0.18811398313941335, "grad_norm": 1.903668999671936, "learning_rate": 9.615895088525677e-06, "loss": 0.1894855499267578, "step": 1350 }, { "epoch": 0.18825332683062773, "grad_norm": 1.3704121112823486, "learning_rate": 9.614988698213987e-06, "loss": 0.13686752319335938, "step": 1351 }, { "epoch": 0.18839267052184214, "grad_norm": 1.2762266397476196, "learning_rate": 9.614081282557172e-06, "loss": 0.1698627471923828, "step": 1352 }, { "epoch": 0.18853201421305651, "grad_norm": 1.4025849103927612, "learning_rate": 9.613172841756835e-06, "loss": 0.15699005126953125, "step": 1353 }, { "epoch": 0.1886713579042709, "grad_norm": 1.3882166147232056, "learning_rate": 9.612263376014815e-06, "loss": 0.18958282470703125, "step": 1354 }, { "epoch": 0.18881070159548527, "grad_norm": 2.3450534343719482, "learning_rate": 9.611352885533171e-06, "loss": 0.20032978057861328, "step": 1355 }, { "epoch": 0.18895004528669965, "grad_norm": 1.8177510499954224, "learning_rate": 9.610441370514196e-06, "loss": 0.14575958251953125, "step": 1356 }, { "epoch": 0.18908938897791402, "grad_norm": 2.1142523288726807, "learning_rate": 9.609528831160407e-06, "loss": 0.20636367797851562, "step": 1357 }, { "epoch": 0.1892287326691284, "grad_norm": 2.5841636657714844, "learning_rate": 9.608615267674548e-06, "loss": 0.19232940673828125, "step": 1358 }, { "epoch": 0.18936807636034278, "grad_norm": 1.3625000715255737, "learning_rate": 9.607700680259593e-06, "loss": 0.16713714599609375, "step": 1359 }, { "epoch": 0.18950742005155716, "grad_norm": 2.3310961723327637, "learning_rate": 9.606785069118742e-06, "loss": 0.18236351013183594, "step": 1360 }, { "epoch": 0.18964676374277153, "grad_norm": 1.7920291423797607, "learning_rate": 9.605868434455426e-06, "loss": 0.14261627197265625, "step": 1361 }, { "epoch": 0.18978610743398594, "grad_norm": 1.5136444568634033, "learning_rate": 9.604950776473294e-06, "loss": 0.15168380737304688, "step": 1362 }, { "epoch": 0.18992545112520032, "grad_norm": 2.5055174827575684, "learning_rate": 9.604032095376234e-06, "loss": 0.17898178100585938, "step": 1363 }, { "epoch": 0.1900647948164147, "grad_norm": 1.6266062259674072, "learning_rate": 9.603112391368354e-06, "loss": 0.16868209838867188, "step": 1364 }, { "epoch": 0.19020413850762907, "grad_norm": 2.435702323913574, "learning_rate": 9.602191664653992e-06, "loss": 0.181488037109375, "step": 1365 }, { "epoch": 0.19034348219884345, "grad_norm": 1.5093134641647339, "learning_rate": 9.601269915437713e-06, "loss": 0.1656932830810547, "step": 1366 }, { "epoch": 0.19048282589005783, "grad_norm": 0.8452178239822388, "learning_rate": 9.600347143924305e-06, "loss": 0.1348724365234375, "step": 1367 }, { "epoch": 0.1906221695812722, "grad_norm": 1.5074846744537354, "learning_rate": 9.599423350318791e-06, "loss": 0.15751266479492188, "step": 1368 }, { "epoch": 0.19076151327248658, "grad_norm": 1.8115692138671875, "learning_rate": 9.598498534826414e-06, "loss": 0.15621566772460938, "step": 1369 }, { "epoch": 0.19090085696370096, "grad_norm": 1.029038429260254, "learning_rate": 9.597572697652649e-06, "loss": 0.1351318359375, "step": 1370 }, { "epoch": 0.19104020065491534, "grad_norm": 0.7182593941688538, "learning_rate": 9.596645839003196e-06, "loss": 0.1112823486328125, "step": 1371 }, { "epoch": 0.19117954434612974, "grad_norm": 1.2587134838104248, "learning_rate": 9.595717959083978e-06, "loss": 0.18869781494140625, "step": 1372 }, { "epoch": 0.19131888803734412, "grad_norm": 2.140219211578369, "learning_rate": 9.594789058101154e-06, "loss": 0.20311355590820312, "step": 1373 }, { "epoch": 0.1914582317285585, "grad_norm": 1.2740784883499146, "learning_rate": 9.593859136261102e-06, "loss": 0.15185165405273438, "step": 1374 }, { "epoch": 0.19159757541977288, "grad_norm": 1.4965447187423706, "learning_rate": 9.592928193770427e-06, "loss": 0.16587448120117188, "step": 1375 }, { "epoch": 0.19173691911098725, "grad_norm": 1.4841305017471313, "learning_rate": 9.591996230835968e-06, "loss": 0.180450439453125, "step": 1376 }, { "epoch": 0.19187626280220163, "grad_norm": 0.9229658246040344, "learning_rate": 9.591063247664783e-06, "loss": 0.14310073852539062, "step": 1377 }, { "epoch": 0.192015606493416, "grad_norm": 1.708585500717163, "learning_rate": 9.59012924446416e-06, "loss": 0.14985275268554688, "step": 1378 }, { "epoch": 0.19215495018463039, "grad_norm": 1.8701757192611694, "learning_rate": 9.589194221441614e-06, "loss": 0.16284942626953125, "step": 1379 }, { "epoch": 0.19229429387584476, "grad_norm": 1.0897574424743652, "learning_rate": 9.588258178804884e-06, "loss": 0.14296340942382812, "step": 1380 }, { "epoch": 0.19243363756705914, "grad_norm": 1.3004525899887085, "learning_rate": 9.587321116761938e-06, "loss": 0.156829833984375, "step": 1381 }, { "epoch": 0.19257298125827352, "grad_norm": 1.4493776559829712, "learning_rate": 9.586383035520972e-06, "loss": 0.1500396728515625, "step": 1382 }, { "epoch": 0.19271232494948792, "grad_norm": 0.7257071137428284, "learning_rate": 9.585443935290403e-06, "loss": 0.12938308715820312, "step": 1383 }, { "epoch": 0.1928516686407023, "grad_norm": 1.8584873676300049, "learning_rate": 9.58450381627888e-06, "loss": 0.15000534057617188, "step": 1384 }, { "epoch": 0.19299101233191668, "grad_norm": 3.5175223350524902, "learning_rate": 9.583562678695275e-06, "loss": 0.163665771484375, "step": 1385 }, { "epoch": 0.19313035602313106, "grad_norm": 2.083559274673462, "learning_rate": 9.582620522748686e-06, "loss": 0.129364013671875, "step": 1386 }, { "epoch": 0.19326969971434543, "grad_norm": 2.157625913619995, "learning_rate": 9.58167734864844e-06, "loss": 0.16891098022460938, "step": 1387 }, { "epoch": 0.1934090434055598, "grad_norm": 1.698575735092163, "learning_rate": 9.58073315660409e-06, "loss": 0.13485336303710938, "step": 1388 }, { "epoch": 0.1935483870967742, "grad_norm": 2.124830961227417, "learning_rate": 9.579787946825411e-06, "loss": 0.19234848022460938, "step": 1389 }, { "epoch": 0.19368773078798857, "grad_norm": 1.8665308952331543, "learning_rate": 9.57884171952241e-06, "loss": 0.14928817749023438, "step": 1390 }, { "epoch": 0.19382707447920294, "grad_norm": 1.8156403303146362, "learning_rate": 9.577894474905314e-06, "loss": 0.16703033447265625, "step": 1391 }, { "epoch": 0.19396641817041732, "grad_norm": 1.6224894523620605, "learning_rate": 9.576946213184583e-06, "loss": 0.1527996063232422, "step": 1392 }, { "epoch": 0.19410576186163173, "grad_norm": 2.2277562618255615, "learning_rate": 9.575996934570896e-06, "loss": 0.1680755615234375, "step": 1393 }, { "epoch": 0.1942451055528461, "grad_norm": 1.4421993494033813, "learning_rate": 9.57504663927516e-06, "loss": 0.17527389526367188, "step": 1394 }, { "epoch": 0.19438444924406048, "grad_norm": 0.9211018681526184, "learning_rate": 9.574095327508513e-06, "loss": 0.1259918212890625, "step": 1395 }, { "epoch": 0.19452379293527486, "grad_norm": 2.1789710521698, "learning_rate": 9.573142999482313e-06, "loss": 0.161773681640625, "step": 1396 }, { "epoch": 0.19466313662648924, "grad_norm": 1.5497279167175293, "learning_rate": 9.572189655408144e-06, "loss": 0.145843505859375, "step": 1397 }, { "epoch": 0.1948024803177036, "grad_norm": 2.199441909790039, "learning_rate": 9.571235295497818e-06, "loss": 0.15239334106445312, "step": 1398 }, { "epoch": 0.194941824008918, "grad_norm": 2.06896710395813, "learning_rate": 9.570279919963373e-06, "loss": 0.16685104370117188, "step": 1399 }, { "epoch": 0.19508116770013237, "grad_norm": 1.3999725580215454, "learning_rate": 9.569323529017071e-06, "loss": 0.16106796264648438, "step": 1400 }, { "epoch": 0.19522051139134675, "grad_norm": 2.5728299617767334, "learning_rate": 9.568366122871399e-06, "loss": 0.19068145751953125, "step": 1401 }, { "epoch": 0.19535985508256112, "grad_norm": 1.8946919441223145, "learning_rate": 9.567407701739075e-06, "loss": 0.14856719970703125, "step": 1402 }, { "epoch": 0.19549919877377553, "grad_norm": 1.7790411710739136, "learning_rate": 9.566448265833034e-06, "loss": 0.13333511352539062, "step": 1403 }, { "epoch": 0.1956385424649899, "grad_norm": 1.9564213752746582, "learning_rate": 9.56548781536644e-06, "loss": 0.1989593505859375, "step": 1404 }, { "epoch": 0.19577788615620428, "grad_norm": 2.199446201324463, "learning_rate": 9.564526350552689e-06, "loss": 0.14671707153320312, "step": 1405 }, { "epoch": 0.19591722984741866, "grad_norm": 1.3066418170928955, "learning_rate": 9.56356387160539e-06, "loss": 0.12752914428710938, "step": 1406 }, { "epoch": 0.19605657353863304, "grad_norm": 2.1551339626312256, "learning_rate": 9.562600378738389e-06, "loss": 0.17063522338867188, "step": 1407 }, { "epoch": 0.19619591722984742, "grad_norm": 1.632474422454834, "learning_rate": 9.561635872165747e-06, "loss": 0.1434326171875, "step": 1408 }, { "epoch": 0.1963352609210618, "grad_norm": 1.9917694330215454, "learning_rate": 9.56067035210176e-06, "loss": 0.1688385009765625, "step": 1409 }, { "epoch": 0.19647460461227617, "grad_norm": 2.804328441619873, "learning_rate": 9.559703818760943e-06, "loss": 0.18893051147460938, "step": 1410 }, { "epoch": 0.19661394830349055, "grad_norm": 1.228693962097168, "learning_rate": 9.558736272358036e-06, "loss": 0.1613311767578125, "step": 1411 }, { "epoch": 0.19675329199470493, "grad_norm": 2.8923423290252686, "learning_rate": 9.557767713108009e-06, "loss": 0.21533584594726562, "step": 1412 }, { "epoch": 0.19689263568591933, "grad_norm": 1.1099499464035034, "learning_rate": 9.55679814122605e-06, "loss": 0.1349620819091797, "step": 1413 }, { "epoch": 0.1970319793771337, "grad_norm": 1.4815152883529663, "learning_rate": 9.555827556927578e-06, "loss": 0.13888168334960938, "step": 1414 }, { "epoch": 0.1971713230683481, "grad_norm": 0.7194700241088867, "learning_rate": 9.554855960428234e-06, "loss": 0.11412429809570312, "step": 1415 }, { "epoch": 0.19731066675956246, "grad_norm": 1.217875361442566, "learning_rate": 9.553883351943882e-06, "loss": 0.13573455810546875, "step": 1416 }, { "epoch": 0.19745001045077684, "grad_norm": 1.5735167264938354, "learning_rate": 9.55290973169062e-06, "loss": 0.22037506103515625, "step": 1417 }, { "epoch": 0.19758935414199122, "grad_norm": 1.53750479221344, "learning_rate": 9.55193509988476e-06, "loss": 0.14474105834960938, "step": 1418 }, { "epoch": 0.1977286978332056, "grad_norm": 1.4889659881591797, "learning_rate": 9.55095945674284e-06, "loss": 0.14230728149414062, "step": 1419 }, { "epoch": 0.19786804152441997, "grad_norm": 1.1592450141906738, "learning_rate": 9.549982802481632e-06, "loss": 0.1521148681640625, "step": 1420 }, { "epoch": 0.19800738521563435, "grad_norm": 1.2023944854736328, "learning_rate": 9.549005137318122e-06, "loss": 0.14610671997070312, "step": 1421 }, { "epoch": 0.19814672890684873, "grad_norm": 1.0254124402999878, "learning_rate": 9.548026461469527e-06, "loss": 0.16711807250976562, "step": 1422 }, { "epoch": 0.19828607259806313, "grad_norm": 1.280474305152893, "learning_rate": 9.547046775153285e-06, "loss": 0.14188385009765625, "step": 1423 }, { "epoch": 0.1984254162892775, "grad_norm": 1.9008828401565552, "learning_rate": 9.54606607858706e-06, "loss": 0.180450439453125, "step": 1424 }, { "epoch": 0.1985647599804919, "grad_norm": 2.2261545658111572, "learning_rate": 9.545084371988743e-06, "loss": 0.2035064697265625, "step": 1425 }, { "epoch": 0.19870410367170627, "grad_norm": 1.5094943046569824, "learning_rate": 9.54410165557644e-06, "loss": 0.16064453125, "step": 1426 }, { "epoch": 0.19884344736292064, "grad_norm": 0.7085208892822266, "learning_rate": 9.543117929568497e-06, "loss": 0.12782669067382812, "step": 1427 }, { "epoch": 0.19898279105413502, "grad_norm": 1.1839814186096191, "learning_rate": 9.542133194183469e-06, "loss": 0.14318466186523438, "step": 1428 }, { "epoch": 0.1991221347453494, "grad_norm": 1.425066351890564, "learning_rate": 9.541147449640145e-06, "loss": 0.15365219116210938, "step": 1429 }, { "epoch": 0.19926147843656378, "grad_norm": 2.2694244384765625, "learning_rate": 9.540160696157532e-06, "loss": 0.19448471069335938, "step": 1430 }, { "epoch": 0.19940082212777815, "grad_norm": 1.042564868927002, "learning_rate": 9.539172933954867e-06, "loss": 0.14391326904296875, "step": 1431 }, { "epoch": 0.19954016581899253, "grad_norm": 1.2574589252471924, "learning_rate": 9.538184163251608e-06, "loss": 0.17367172241210938, "step": 1432 }, { "epoch": 0.19967950951020694, "grad_norm": 1.4420413970947266, "learning_rate": 9.537194384267436e-06, "loss": 0.1718425750732422, "step": 1433 }, { "epoch": 0.19981885320142131, "grad_norm": 1.439146876335144, "learning_rate": 9.536203597222259e-06, "loss": 0.17467689514160156, "step": 1434 }, { "epoch": 0.1999581968926357, "grad_norm": 1.2624435424804688, "learning_rate": 9.535211802336204e-06, "loss": 0.1331024169921875, "step": 1435 }, { "epoch": 0.20009754058385007, "grad_norm": 1.739274263381958, "learning_rate": 9.534218999829627e-06, "loss": 0.15546417236328125, "step": 1436 }, { "epoch": 0.20023688427506445, "grad_norm": 2.0967254638671875, "learning_rate": 9.533225189923107e-06, "loss": 0.14361572265625, "step": 1437 }, { "epoch": 0.20037622796627882, "grad_norm": 1.4622498750686646, "learning_rate": 9.532230372837446e-06, "loss": 0.16044998168945312, "step": 1438 }, { "epoch": 0.2005155716574932, "grad_norm": 2.741564989089966, "learning_rate": 9.531234548793667e-06, "loss": 0.21049118041992188, "step": 1439 }, { "epoch": 0.20065491534870758, "grad_norm": 1.1257890462875366, "learning_rate": 9.530237718013023e-06, "loss": 0.14031982421875, "step": 1440 }, { "epoch": 0.20079425903992196, "grad_norm": 1.8188873529434204, "learning_rate": 9.529239880716983e-06, "loss": 0.14299774169921875, "step": 1441 }, { "epoch": 0.20093360273113633, "grad_norm": 1.216572880744934, "learning_rate": 9.528241037127247e-06, "loss": 0.15882110595703125, "step": 1442 }, { "epoch": 0.20107294642235074, "grad_norm": 1.11855149269104, "learning_rate": 9.527241187465735e-06, "loss": 0.12341690063476562, "step": 1443 }, { "epoch": 0.20121229011356512, "grad_norm": 2.663956880569458, "learning_rate": 9.526240331954589e-06, "loss": 0.18645477294921875, "step": 1444 }, { "epoch": 0.2013516338047795, "grad_norm": 1.203482985496521, "learning_rate": 9.525238470816176e-06, "loss": 0.16230010986328125, "step": 1445 }, { "epoch": 0.20149097749599387, "grad_norm": 1.2565609216690063, "learning_rate": 9.524235604273088e-06, "loss": 0.16258621215820312, "step": 1446 }, { "epoch": 0.20163032118720825, "grad_norm": 1.1767425537109375, "learning_rate": 9.523231732548139e-06, "loss": 0.11672210693359375, "step": 1447 }, { "epoch": 0.20176966487842263, "grad_norm": 0.9462999701499939, "learning_rate": 9.522226855864366e-06, "loss": 0.12397384643554688, "step": 1448 }, { "epoch": 0.201909008569637, "grad_norm": 0.8870232105255127, "learning_rate": 9.521220974445032e-06, "loss": 0.13082122802734375, "step": 1449 }, { "epoch": 0.20204835226085138, "grad_norm": 3.3766283988952637, "learning_rate": 9.520214088513616e-06, "loss": 0.16154098510742188, "step": 1450 }, { "epoch": 0.20218769595206576, "grad_norm": 2.936894178390503, "learning_rate": 9.519206198293828e-06, "loss": 0.16707611083984375, "step": 1451 }, { "epoch": 0.20232703964328014, "grad_norm": 1.1112295389175415, "learning_rate": 9.5181973040096e-06, "loss": 0.14800643920898438, "step": 1452 }, { "epoch": 0.20246638333449454, "grad_norm": 1.7311394214630127, "learning_rate": 9.517187405885082e-06, "loss": 0.19343948364257812, "step": 1453 }, { "epoch": 0.20260572702570892, "grad_norm": 1.8605358600616455, "learning_rate": 9.516176504144652e-06, "loss": 0.15985488891601562, "step": 1454 }, { "epoch": 0.2027450707169233, "grad_norm": 1.5981653928756714, "learning_rate": 9.515164599012908e-06, "loss": 0.12992477416992188, "step": 1455 }, { "epoch": 0.20288441440813768, "grad_norm": 1.8344321250915527, "learning_rate": 9.514151690714672e-06, "loss": 0.16206741333007812, "step": 1456 }, { "epoch": 0.20302375809935205, "grad_norm": 1.421933889389038, "learning_rate": 9.513137779474992e-06, "loss": 0.10458755493164062, "step": 1457 }, { "epoch": 0.20316310179056643, "grad_norm": 2.366546392440796, "learning_rate": 9.512122865519135e-06, "loss": 0.19656753540039062, "step": 1458 }, { "epoch": 0.2033024454817808, "grad_norm": 1.247010350227356, "learning_rate": 9.511106949072588e-06, "loss": 0.16338729858398438, "step": 1459 }, { "epoch": 0.20344178917299519, "grad_norm": 1.546154260635376, "learning_rate": 9.51009003036107e-06, "loss": 0.13174057006835938, "step": 1460 }, { "epoch": 0.20358113286420956, "grad_norm": 1.571069359779358, "learning_rate": 9.509072109610514e-06, "loss": 0.15674209594726562, "step": 1461 }, { "epoch": 0.20372047655542394, "grad_norm": 1.1857895851135254, "learning_rate": 9.508053187047077e-06, "loss": 0.12366104125976562, "step": 1462 }, { "epoch": 0.20385982024663835, "grad_norm": 0.9927472472190857, "learning_rate": 9.507033262897142e-06, "loss": 0.13300323486328125, "step": 1463 }, { "epoch": 0.20399916393785272, "grad_norm": 1.4209985733032227, "learning_rate": 9.506012337387315e-06, "loss": 0.12723541259765625, "step": 1464 }, { "epoch": 0.2041385076290671, "grad_norm": 2.7422854900360107, "learning_rate": 9.504990410744422e-06, "loss": 0.16747283935546875, "step": 1465 }, { "epoch": 0.20427785132028148, "grad_norm": 3.4991841316223145, "learning_rate": 9.503967483195509e-06, "loss": 0.2004852294921875, "step": 1466 }, { "epoch": 0.20441719501149586, "grad_norm": 1.9517345428466797, "learning_rate": 9.502943554967848e-06, "loss": 0.13694000244140625, "step": 1467 }, { "epoch": 0.20455653870271023, "grad_norm": 0.9736098647117615, "learning_rate": 9.501918626288935e-06, "loss": 0.14496231079101562, "step": 1468 }, { "epoch": 0.2046958823939246, "grad_norm": 1.5872583389282227, "learning_rate": 9.500892697386482e-06, "loss": 0.1612548828125, "step": 1469 }, { "epoch": 0.204835226085139, "grad_norm": 1.7228453159332275, "learning_rate": 9.499865768488429e-06, "loss": 0.16032028198242188, "step": 1470 }, { "epoch": 0.20497456977635337, "grad_norm": 2.538679599761963, "learning_rate": 9.498837839822936e-06, "loss": 0.16368865966796875, "step": 1471 }, { "epoch": 0.20511391346756774, "grad_norm": 3.712491989135742, "learning_rate": 9.497808911618385e-06, "loss": 0.18444061279296875, "step": 1472 }, { "epoch": 0.20525325715878215, "grad_norm": 2.086770534515381, "learning_rate": 9.496778984103381e-06, "loss": 0.17613983154296875, "step": 1473 }, { "epoch": 0.20539260084999653, "grad_norm": 3.363456964492798, "learning_rate": 9.49574805750675e-06, "loss": 0.1877117156982422, "step": 1474 }, { "epoch": 0.2055319445412109, "grad_norm": 1.589687466621399, "learning_rate": 9.49471613205754e-06, "loss": 0.16781234741210938, "step": 1475 }, { "epoch": 0.20567128823242528, "grad_norm": 1.276055097579956, "learning_rate": 9.493683207985022e-06, "loss": 0.16059112548828125, "step": 1476 }, { "epoch": 0.20581063192363966, "grad_norm": 1.6638062000274658, "learning_rate": 9.492649285518688e-06, "loss": 0.17812728881835938, "step": 1477 }, { "epoch": 0.20594997561485404, "grad_norm": 1.0911521911621094, "learning_rate": 9.49161436488825e-06, "loss": 0.1227264404296875, "step": 1478 }, { "epoch": 0.2060893193060684, "grad_norm": 2.415783405303955, "learning_rate": 9.490578446323646e-06, "loss": 0.15657424926757812, "step": 1479 }, { "epoch": 0.2062286629972828, "grad_norm": 1.3977854251861572, "learning_rate": 9.489541530055034e-06, "loss": 0.14809417724609375, "step": 1480 }, { "epoch": 0.20636800668849717, "grad_norm": 1.128359317779541, "learning_rate": 9.488503616312793e-06, "loss": 0.1724090576171875, "step": 1481 }, { "epoch": 0.20650735037971155, "grad_norm": 1.0115582942962646, "learning_rate": 9.48746470532752e-06, "loss": 0.17507171630859375, "step": 1482 }, { "epoch": 0.20664669407092595, "grad_norm": 1.46697998046875, "learning_rate": 9.48642479733004e-06, "loss": 0.15118789672851562, "step": 1483 }, { "epoch": 0.20678603776214033, "grad_norm": 2.1643049716949463, "learning_rate": 9.4853838925514e-06, "loss": 0.15715789794921875, "step": 1484 }, { "epoch": 0.2069253814533547, "grad_norm": 2.1834752559661865, "learning_rate": 9.484341991222858e-06, "loss": 0.16980552673339844, "step": 1485 }, { "epoch": 0.20706472514456908, "grad_norm": 0.8033286333084106, "learning_rate": 9.483299093575909e-06, "loss": 0.14742660522460938, "step": 1486 }, { "epoch": 0.20720406883578346, "grad_norm": 0.9840332865715027, "learning_rate": 9.482255199842254e-06, "loss": 0.1475067138671875, "step": 1487 }, { "epoch": 0.20734341252699784, "grad_norm": 1.653754711151123, "learning_rate": 9.481210310253826e-06, "loss": 0.16907501220703125, "step": 1488 }, { "epoch": 0.20748275621821222, "grad_norm": 1.9649416208267212, "learning_rate": 9.480164425042775e-06, "loss": 0.158905029296875, "step": 1489 }, { "epoch": 0.2076220999094266, "grad_norm": 0.6127334833145142, "learning_rate": 9.479117544441472e-06, "loss": 0.11310958862304688, "step": 1490 }, { "epoch": 0.20776144360064097, "grad_norm": 1.333641767501831, "learning_rate": 9.47806966868251e-06, "loss": 0.1410675048828125, "step": 1491 }, { "epoch": 0.20790078729185535, "grad_norm": 0.888230562210083, "learning_rate": 9.477020797998707e-06, "loss": 0.121185302734375, "step": 1492 }, { "epoch": 0.20804013098306975, "grad_norm": 0.7540216445922852, "learning_rate": 9.47597093262309e-06, "loss": 0.10601425170898438, "step": 1493 }, { "epoch": 0.20817947467428413, "grad_norm": 1.6888986825942993, "learning_rate": 9.474920072788925e-06, "loss": 0.19219589233398438, "step": 1494 }, { "epoch": 0.2083188183654985, "grad_norm": 0.5940014123916626, "learning_rate": 9.47386821872968e-06, "loss": 0.10087776184082031, "step": 1495 }, { "epoch": 0.2084581620567129, "grad_norm": 0.9879565834999084, "learning_rate": 9.47281537067906e-06, "loss": 0.12511062622070312, "step": 1496 }, { "epoch": 0.20859750574792726, "grad_norm": 1.1791635751724243, "learning_rate": 9.471761528870978e-06, "loss": 0.15701675415039062, "step": 1497 }, { "epoch": 0.20873684943914164, "grad_norm": 1.6767688989639282, "learning_rate": 9.470706693539578e-06, "loss": 0.14227294921875, "step": 1498 }, { "epoch": 0.20887619313035602, "grad_norm": 0.989753007888794, "learning_rate": 9.469650864919217e-06, "loss": 0.16323471069335938, "step": 1499 }, { "epoch": 0.2090155368215704, "grad_norm": 1.9783002138137817, "learning_rate": 9.46859404324448e-06, "loss": 0.16939544677734375, "step": 1500 }, { "epoch": 0.20915488051278477, "grad_norm": 0.8692682385444641, "learning_rate": 9.467536228750166e-06, "loss": 0.1392803192138672, "step": 1501 }, { "epoch": 0.20929422420399915, "grad_norm": 1.8220758438110352, "learning_rate": 9.466477421671296e-06, "loss": 0.16289520263671875, "step": 1502 }, { "epoch": 0.20943356789521356, "grad_norm": 1.5227330923080444, "learning_rate": 9.465417622243116e-06, "loss": 0.14363861083984375, "step": 1503 }, { "epoch": 0.20957291158642793, "grad_norm": 1.7792694568634033, "learning_rate": 9.464356830701086e-06, "loss": 0.13895416259765625, "step": 1504 }, { "epoch": 0.2097122552776423, "grad_norm": 1.6686255931854248, "learning_rate": 9.463295047280892e-06, "loss": 0.17481613159179688, "step": 1505 }, { "epoch": 0.2098515989688567, "grad_norm": 1.8620580434799194, "learning_rate": 9.462232272218437e-06, "loss": 0.1676177978515625, "step": 1506 }, { "epoch": 0.20999094266007107, "grad_norm": 1.426020860671997, "learning_rate": 9.461168505749847e-06, "loss": 0.18455123901367188, "step": 1507 }, { "epoch": 0.21013028635128544, "grad_norm": 1.8635131120681763, "learning_rate": 9.460103748111462e-06, "loss": 0.152801513671875, "step": 1508 }, { "epoch": 0.21026963004249982, "grad_norm": 1.3478668928146362, "learning_rate": 9.459037999539852e-06, "loss": 0.1529979705810547, "step": 1509 }, { "epoch": 0.2104089737337142, "grad_norm": 2.4727671146392822, "learning_rate": 9.4579712602718e-06, "loss": 0.215850830078125, "step": 1510 }, { "epoch": 0.21054831742492858, "grad_norm": 1.3590065240859985, "learning_rate": 9.456903530544312e-06, "loss": 0.1379871368408203, "step": 1511 }, { "epoch": 0.21068766111614295, "grad_norm": 1.647182583808899, "learning_rate": 9.455834810594611e-06, "loss": 0.18808364868164062, "step": 1512 }, { "epoch": 0.21082700480735736, "grad_norm": 1.2711261510849, "learning_rate": 9.454765100660144e-06, "loss": 0.13874435424804688, "step": 1513 }, { "epoch": 0.21096634849857174, "grad_norm": 0.8036137223243713, "learning_rate": 9.453694400978576e-06, "loss": 0.134674072265625, "step": 1514 }, { "epoch": 0.21110569218978611, "grad_norm": 0.9797644019126892, "learning_rate": 9.452622711787793e-06, "loss": 0.15586090087890625, "step": 1515 }, { "epoch": 0.2112450358810005, "grad_norm": 1.3424243927001953, "learning_rate": 9.451550033325896e-06, "loss": 0.19547271728515625, "step": 1516 }, { "epoch": 0.21138437957221487, "grad_norm": 1.9894026517868042, "learning_rate": 9.450476365831214e-06, "loss": 0.196746826171875, "step": 1517 }, { "epoch": 0.21152372326342925, "grad_norm": 0.8574327230453491, "learning_rate": 9.449401709542289e-06, "loss": 0.141021728515625, "step": 1518 }, { "epoch": 0.21166306695464362, "grad_norm": 0.9691628217697144, "learning_rate": 9.448326064697886e-06, "loss": 0.14404678344726562, "step": 1519 }, { "epoch": 0.211802410645858, "grad_norm": 1.1554709672927856, "learning_rate": 9.447249431536987e-06, "loss": 0.1199188232421875, "step": 1520 }, { "epoch": 0.21194175433707238, "grad_norm": 2.469568967819214, "learning_rate": 9.446171810298799e-06, "loss": 0.1727447509765625, "step": 1521 }, { "epoch": 0.21208109802828676, "grad_norm": 1.086167812347412, "learning_rate": 9.44509320122274e-06, "loss": 0.1365375518798828, "step": 1522 }, { "epoch": 0.21222044171950116, "grad_norm": 0.7664378881454468, "learning_rate": 9.444013604548457e-06, "loss": 0.12765884399414062, "step": 1523 }, { "epoch": 0.21235978541071554, "grad_norm": 1.1379112005233765, "learning_rate": 9.442933020515808e-06, "loss": 0.131591796875, "step": 1524 }, { "epoch": 0.21249912910192992, "grad_norm": 1.5670292377471924, "learning_rate": 9.441851449364878e-06, "loss": 0.1845703125, "step": 1525 }, { "epoch": 0.2126384727931443, "grad_norm": 1.1116381883621216, "learning_rate": 9.440768891335962e-06, "loss": 0.1388702392578125, "step": 1526 }, { "epoch": 0.21277781648435867, "grad_norm": 2.0963401794433594, "learning_rate": 9.439685346669585e-06, "loss": 0.17783164978027344, "step": 1527 }, { "epoch": 0.21291716017557305, "grad_norm": 1.5439070463180542, "learning_rate": 9.438600815606483e-06, "loss": 0.13484573364257812, "step": 1528 }, { "epoch": 0.21305650386678743, "grad_norm": 1.1734118461608887, "learning_rate": 9.437515298387617e-06, "loss": 0.12493896484375, "step": 1529 }, { "epoch": 0.2131958475580018, "grad_norm": 1.090402603149414, "learning_rate": 9.436428795254159e-06, "loss": 0.17816543579101562, "step": 1530 }, { "epoch": 0.21333519124921618, "grad_norm": 2.223680257797241, "learning_rate": 9.43534130644751e-06, "loss": 0.14941787719726562, "step": 1531 }, { "epoch": 0.21347453494043056, "grad_norm": 2.6831214427948, "learning_rate": 9.43425283220928e-06, "loss": 0.1843547821044922, "step": 1532 }, { "epoch": 0.21361387863164497, "grad_norm": 1.3865658044815063, "learning_rate": 9.43316337278131e-06, "loss": 0.1712493896484375, "step": 1533 }, { "epoch": 0.21375322232285934, "grad_norm": 0.8216031193733215, "learning_rate": 9.432072928405648e-06, "loss": 0.13756179809570312, "step": 1534 }, { "epoch": 0.21389256601407372, "grad_norm": 1.581756830215454, "learning_rate": 9.430981499324567e-06, "loss": 0.15055084228515625, "step": 1535 }, { "epoch": 0.2140319097052881, "grad_norm": 1.0200133323669434, "learning_rate": 9.429889085780559e-06, "loss": 0.129638671875, "step": 1536 }, { "epoch": 0.21417125339650248, "grad_norm": 0.7470645904541016, "learning_rate": 9.42879568801633e-06, "loss": 0.14037704467773438, "step": 1537 }, { "epoch": 0.21431059708771685, "grad_norm": 1.400606393814087, "learning_rate": 9.427701306274812e-06, "loss": 0.15671730041503906, "step": 1538 }, { "epoch": 0.21444994077893123, "grad_norm": 1.241302728652954, "learning_rate": 9.42660594079915e-06, "loss": 0.13207244873046875, "step": 1539 }, { "epoch": 0.2145892844701456, "grad_norm": 1.427795648574829, "learning_rate": 9.42550959183271e-06, "loss": 0.13640975952148438, "step": 1540 }, { "epoch": 0.21472862816135999, "grad_norm": 0.9325164556503296, "learning_rate": 9.424412259619073e-06, "loss": 0.140350341796875, "step": 1541 }, { "epoch": 0.21486797185257436, "grad_norm": 2.5141258239746094, "learning_rate": 9.423313944402043e-06, "loss": 0.16968917846679688, "step": 1542 }, { "epoch": 0.21500731554378877, "grad_norm": 1.4262763261795044, "learning_rate": 9.422214646425641e-06, "loss": 0.15427017211914062, "step": 1543 }, { "epoch": 0.21514665923500315, "grad_norm": 2.0077500343322754, "learning_rate": 9.421114365934105e-06, "loss": 0.17861557006835938, "step": 1544 }, { "epoch": 0.21528600292621752, "grad_norm": 0.8413079977035522, "learning_rate": 9.420013103171893e-06, "loss": 0.14766311645507812, "step": 1545 }, { "epoch": 0.2154253466174319, "grad_norm": 2.2113077640533447, "learning_rate": 9.418910858383681e-06, "loss": 0.18416595458984375, "step": 1546 }, { "epoch": 0.21556469030864628, "grad_norm": 1.3715993165969849, "learning_rate": 9.41780763181436e-06, "loss": 0.1271820068359375, "step": 1547 }, { "epoch": 0.21570403399986066, "grad_norm": 1.789000391960144, "learning_rate": 9.416703423709044e-06, "loss": 0.170379638671875, "step": 1548 }, { "epoch": 0.21584337769107503, "grad_norm": 1.0897059440612793, "learning_rate": 9.415598234313064e-06, "loss": 0.13710784912109375, "step": 1549 }, { "epoch": 0.2159827213822894, "grad_norm": 1.7797259092330933, "learning_rate": 9.414492063871964e-06, "loss": 0.14513397216796875, "step": 1550 }, { "epoch": 0.2161220650735038, "grad_norm": 1.8025065660476685, "learning_rate": 9.413384912631512e-06, "loss": 0.1757965087890625, "step": 1551 }, { "epoch": 0.21626140876471817, "grad_norm": 1.3403661251068115, "learning_rate": 9.412276780837692e-06, "loss": 0.18998336791992188, "step": 1552 }, { "epoch": 0.21640075245593257, "grad_norm": 1.125470519065857, "learning_rate": 9.411167668736707e-06, "loss": 0.14178085327148438, "step": 1553 }, { "epoch": 0.21654009614714695, "grad_norm": 1.0162168741226196, "learning_rate": 9.410057576574974e-06, "loss": 0.134735107421875, "step": 1554 }, { "epoch": 0.21667943983836133, "grad_norm": 2.298100709915161, "learning_rate": 9.408946504599131e-06, "loss": 0.18410873413085938, "step": 1555 }, { "epoch": 0.2168187835295757, "grad_norm": 1.5863182544708252, "learning_rate": 9.40783445305603e-06, "loss": 0.15898704528808594, "step": 1556 }, { "epoch": 0.21695812722079008, "grad_norm": 1.4438400268554688, "learning_rate": 9.406721422192748e-06, "loss": 0.18773269653320312, "step": 1557 }, { "epoch": 0.21709747091200446, "grad_norm": 1.1268638372421265, "learning_rate": 9.405607412256573e-06, "loss": 0.13882064819335938, "step": 1558 }, { "epoch": 0.21723681460321884, "grad_norm": 1.7780556678771973, "learning_rate": 9.404492423495012e-06, "loss": 0.16062545776367188, "step": 1559 }, { "epoch": 0.2173761582944332, "grad_norm": 1.2691165208816528, "learning_rate": 9.403376456155792e-06, "loss": 0.15454483032226562, "step": 1560 }, { "epoch": 0.2175155019856476, "grad_norm": 1.7905786037445068, "learning_rate": 9.402259510486855e-06, "loss": 0.18688011169433594, "step": 1561 }, { "epoch": 0.21765484567686197, "grad_norm": 1.5204474925994873, "learning_rate": 9.401141586736359e-06, "loss": 0.14591217041015625, "step": 1562 }, { "epoch": 0.21779418936807637, "grad_norm": 1.4081732034683228, "learning_rate": 9.400022685152683e-06, "loss": 0.23717880249023438, "step": 1563 }, { "epoch": 0.21793353305929075, "grad_norm": 2.3981096744537354, "learning_rate": 9.398902805984417e-06, "loss": 0.1868743896484375, "step": 1564 }, { "epoch": 0.21807287675050513, "grad_norm": 1.6129570007324219, "learning_rate": 9.397781949480381e-06, "loss": 0.15401840209960938, "step": 1565 }, { "epoch": 0.2182122204417195, "grad_norm": 1.130746841430664, "learning_rate": 9.396660115889596e-06, "loss": 0.1513214111328125, "step": 1566 }, { "epoch": 0.21835156413293388, "grad_norm": 2.3323514461517334, "learning_rate": 9.395537305461312e-06, "loss": 0.16534423828125, "step": 1567 }, { "epoch": 0.21849090782414826, "grad_norm": 1.0609492063522339, "learning_rate": 9.394413518444989e-06, "loss": 0.1315460205078125, "step": 1568 }, { "epoch": 0.21863025151536264, "grad_norm": 2.904642105102539, "learning_rate": 9.39328875509031e-06, "loss": 0.18410110473632812, "step": 1569 }, { "epoch": 0.21876959520657702, "grad_norm": 2.472235918045044, "learning_rate": 9.39216301564717e-06, "loss": 0.1798248291015625, "step": 1570 }, { "epoch": 0.2189089388977914, "grad_norm": 1.1429368257522583, "learning_rate": 9.391036300365681e-06, "loss": 0.12887191772460938, "step": 1571 }, { "epoch": 0.21904828258900577, "grad_norm": 1.41802978515625, "learning_rate": 9.389908609496177e-06, "loss": 0.17352676391601562, "step": 1572 }, { "epoch": 0.21918762628022015, "grad_norm": 1.2701356410980225, "learning_rate": 9.388779943289204e-06, "loss": 0.13026046752929688, "step": 1573 }, { "epoch": 0.21932696997143455, "grad_norm": 0.9579776525497437, "learning_rate": 9.387650301995523e-06, "loss": 0.16218185424804688, "step": 1574 }, { "epoch": 0.21946631366264893, "grad_norm": 1.5300655364990234, "learning_rate": 9.386519685866117e-06, "loss": 0.17827606201171875, "step": 1575 }, { "epoch": 0.2196056573538633, "grad_norm": 0.9804102182388306, "learning_rate": 9.385388095152184e-06, "loss": 0.15410614013671875, "step": 1576 }, { "epoch": 0.2197450010450777, "grad_norm": 4.533372402191162, "learning_rate": 9.384255530105136e-06, "loss": 0.20203018188476562, "step": 1577 }, { "epoch": 0.21988434473629206, "grad_norm": 1.1578421592712402, "learning_rate": 9.383121990976602e-06, "loss": 0.15365982055664062, "step": 1578 }, { "epoch": 0.22002368842750644, "grad_norm": 1.8213262557983398, "learning_rate": 9.381987478018431e-06, "loss": 0.14360809326171875, "step": 1579 }, { "epoch": 0.22016303211872082, "grad_norm": 1.289742112159729, "learning_rate": 9.380851991482685e-06, "loss": 0.181427001953125, "step": 1580 }, { "epoch": 0.2203023758099352, "grad_norm": 2.692084550857544, "learning_rate": 9.379715531621642e-06, "loss": 0.216888427734375, "step": 1581 }, { "epoch": 0.22044171950114957, "grad_norm": 2.869314193725586, "learning_rate": 9.3785780986878e-06, "loss": 0.19077682495117188, "step": 1582 }, { "epoch": 0.22058106319236395, "grad_norm": 3.08652925491333, "learning_rate": 9.377439692933869e-06, "loss": 0.1371288299560547, "step": 1583 }, { "epoch": 0.22072040688357836, "grad_norm": 1.946378231048584, "learning_rate": 9.376300314612775e-06, "loss": 0.11879348754882812, "step": 1584 }, { "epoch": 0.22085975057479273, "grad_norm": 2.9106225967407227, "learning_rate": 9.375159963977668e-06, "loss": 0.19269561767578125, "step": 1585 }, { "epoch": 0.2209990942660071, "grad_norm": 0.8290851712226868, "learning_rate": 9.374018641281898e-06, "loss": 0.15399551391601562, "step": 1586 }, { "epoch": 0.2211384379572215, "grad_norm": 1.6633888483047485, "learning_rate": 9.37287634677905e-06, "loss": 0.13669776916503906, "step": 1587 }, { "epoch": 0.22127778164843587, "grad_norm": 1.6593607664108276, "learning_rate": 9.371733080722911e-06, "loss": 0.13306808471679688, "step": 1588 }, { "epoch": 0.22141712533965024, "grad_norm": 2.1444249153137207, "learning_rate": 9.37058884336749e-06, "loss": 0.15723800659179688, "step": 1589 }, { "epoch": 0.22155646903086462, "grad_norm": 2.14919114112854, "learning_rate": 9.36944363496701e-06, "loss": 0.14079666137695312, "step": 1590 }, { "epoch": 0.221695812722079, "grad_norm": 1.8978471755981445, "learning_rate": 9.368297455775911e-06, "loss": 0.19614410400390625, "step": 1591 }, { "epoch": 0.22183515641329338, "grad_norm": 0.721153736114502, "learning_rate": 9.367150306048847e-06, "loss": 0.11305999755859375, "step": 1592 }, { "epoch": 0.22197450010450775, "grad_norm": 1.65359365940094, "learning_rate": 9.36600218604069e-06, "loss": 0.12309646606445312, "step": 1593 }, { "epoch": 0.22211384379572216, "grad_norm": 2.1218647956848145, "learning_rate": 9.364853096006523e-06, "loss": 0.1605987548828125, "step": 1594 }, { "epoch": 0.22225318748693654, "grad_norm": 1.6102101802825928, "learning_rate": 9.36370303620165e-06, "loss": 0.1790924072265625, "step": 1595 }, { "epoch": 0.22239253117815092, "grad_norm": 0.9717047810554504, "learning_rate": 9.362552006881588e-06, "loss": 0.13452529907226562, "step": 1596 }, { "epoch": 0.2225318748693653, "grad_norm": 1.1912654638290405, "learning_rate": 9.361400008302068e-06, "loss": 0.18191909790039062, "step": 1597 }, { "epoch": 0.22267121856057967, "grad_norm": 0.6211940050125122, "learning_rate": 9.36024704071904e-06, "loss": 0.12737655639648438, "step": 1598 }, { "epoch": 0.22281056225179405, "grad_norm": 1.7658560276031494, "learning_rate": 9.359093104388663e-06, "loss": 0.15633392333984375, "step": 1599 }, { "epoch": 0.22294990594300843, "grad_norm": 1.252242922782898, "learning_rate": 9.35793819956732e-06, "loss": 0.1575469970703125, "step": 1600 }, { "epoch": 0.2230892496342228, "grad_norm": 1.5491716861724854, "learning_rate": 9.356782326511602e-06, "loss": 0.2092132568359375, "step": 1601 }, { "epoch": 0.22322859332543718, "grad_norm": 1.485917091369629, "learning_rate": 9.355625485478319e-06, "loss": 0.21832656860351562, "step": 1602 }, { "epoch": 0.22336793701665156, "grad_norm": 1.6362918615341187, "learning_rate": 9.354467676724491e-06, "loss": 0.16681289672851562, "step": 1603 }, { "epoch": 0.22350728070786596, "grad_norm": 2.199364423751831, "learning_rate": 9.353308900507361e-06, "loss": 0.15096664428710938, "step": 1604 }, { "epoch": 0.22364662439908034, "grad_norm": 2.056676149368286, "learning_rate": 9.352149157084383e-06, "loss": 0.19741058349609375, "step": 1605 }, { "epoch": 0.22378596809029472, "grad_norm": 1.9452139139175415, "learning_rate": 9.350988446713221e-06, "loss": 0.18216705322265625, "step": 1606 }, { "epoch": 0.2239253117815091, "grad_norm": 0.7937310934066772, "learning_rate": 9.349826769651762e-06, "loss": 0.14137649536132812, "step": 1607 }, { "epoch": 0.22406465547272347, "grad_norm": 2.402937412261963, "learning_rate": 9.348664126158103e-06, "loss": 0.18432998657226562, "step": 1608 }, { "epoch": 0.22420399916393785, "grad_norm": 1.8349416255950928, "learning_rate": 9.347500516490555e-06, "loss": 0.11986351013183594, "step": 1609 }, { "epoch": 0.22434334285515223, "grad_norm": 2.503967523574829, "learning_rate": 9.346335940907648e-06, "loss": 0.17583847045898438, "step": 1610 }, { "epoch": 0.2244826865463666, "grad_norm": 1.0572208166122437, "learning_rate": 9.345170399668127e-06, "loss": 0.1498260498046875, "step": 1611 }, { "epoch": 0.22462203023758098, "grad_norm": 0.48912638425827026, "learning_rate": 9.344003893030942e-06, "loss": 0.102020263671875, "step": 1612 }, { "epoch": 0.22476137392879536, "grad_norm": 1.0497713088989258, "learning_rate": 9.342836421255268e-06, "loss": 0.1405792236328125, "step": 1613 }, { "epoch": 0.22490071762000977, "grad_norm": 3.522062301635742, "learning_rate": 9.341667984600489e-06, "loss": 0.20253753662109375, "step": 1614 }, { "epoch": 0.22504006131122414, "grad_norm": 2.155843734741211, "learning_rate": 9.340498583326208e-06, "loss": 0.13474273681640625, "step": 1615 }, { "epoch": 0.22517940500243852, "grad_norm": 0.5627976655960083, "learning_rate": 9.339328217692233e-06, "loss": 0.10765457153320312, "step": 1616 }, { "epoch": 0.2253187486936529, "grad_norm": 2.271055221557617, "learning_rate": 9.3381568879586e-06, "loss": 0.1673603057861328, "step": 1617 }, { "epoch": 0.22545809238486728, "grad_norm": 0.9300485849380493, "learning_rate": 9.336984594385547e-06, "loss": 0.15052413940429688, "step": 1618 }, { "epoch": 0.22559743607608165, "grad_norm": 0.7072260975837708, "learning_rate": 9.335811337233533e-06, "loss": 0.13318252563476562, "step": 1619 }, { "epoch": 0.22573677976729603, "grad_norm": 1.0748652219772339, "learning_rate": 9.334637116763227e-06, "loss": 0.13088226318359375, "step": 1620 }, { "epoch": 0.2258761234585104, "grad_norm": 1.255942940711975, "learning_rate": 9.333461933235517e-06, "loss": 0.13923263549804688, "step": 1621 }, { "epoch": 0.22601546714972479, "grad_norm": 1.3484184741973877, "learning_rate": 9.332285786911498e-06, "loss": 0.15419387817382812, "step": 1622 }, { "epoch": 0.22615481084093916, "grad_norm": 0.7185392379760742, "learning_rate": 9.331108678052485e-06, "loss": 0.13851165771484375, "step": 1623 }, { "epoch": 0.22629415453215357, "grad_norm": 0.947361946105957, "learning_rate": 9.329930606920005e-06, "loss": 0.16924285888671875, "step": 1624 }, { "epoch": 0.22643349822336795, "grad_norm": 1.4747145175933838, "learning_rate": 9.3287515737758e-06, "loss": 0.15000534057617188, "step": 1625 }, { "epoch": 0.22657284191458232, "grad_norm": 0.8305121660232544, "learning_rate": 9.32757157888182e-06, "loss": 0.10587692260742188, "step": 1626 }, { "epoch": 0.2267121856057967, "grad_norm": 1.147843360900879, "learning_rate": 9.326390622500236e-06, "loss": 0.1422252655029297, "step": 1627 }, { "epoch": 0.22685152929701108, "grad_norm": 1.2383556365966797, "learning_rate": 9.32520870489343e-06, "loss": 0.13252830505371094, "step": 1628 }, { "epoch": 0.22699087298822546, "grad_norm": 1.605017066001892, "learning_rate": 9.324025826323995e-06, "loss": 0.17161941528320312, "step": 1629 }, { "epoch": 0.22713021667943983, "grad_norm": 0.8153094053268433, "learning_rate": 9.322841987054741e-06, "loss": 0.11948204040527344, "step": 1630 }, { "epoch": 0.2272695603706542, "grad_norm": 0.9539587497711182, "learning_rate": 9.321657187348689e-06, "loss": 0.11747169494628906, "step": 1631 }, { "epoch": 0.2274089040618686, "grad_norm": 1.9259988069534302, "learning_rate": 9.320471427469076e-06, "loss": 0.1493682861328125, "step": 1632 }, { "epoch": 0.22754824775308297, "grad_norm": 1.8396735191345215, "learning_rate": 9.319284707679348e-06, "loss": 0.15920257568359375, "step": 1633 }, { "epoch": 0.22768759144429737, "grad_norm": 3.8115103244781494, "learning_rate": 9.31809702824317e-06, "loss": 0.2427520751953125, "step": 1634 }, { "epoch": 0.22782693513551175, "grad_norm": 1.9201759099960327, "learning_rate": 9.316908389424416e-06, "loss": 0.15543365478515625, "step": 1635 }, { "epoch": 0.22796627882672613, "grad_norm": 2.368729591369629, "learning_rate": 9.315718791487175e-06, "loss": 0.22520828247070312, "step": 1636 }, { "epoch": 0.2281056225179405, "grad_norm": 2.345236301422119, "learning_rate": 9.314528234695747e-06, "loss": 0.136871337890625, "step": 1637 }, { "epoch": 0.22824496620915488, "grad_norm": 2.0419600009918213, "learning_rate": 9.31333671931465e-06, "loss": 0.17386627197265625, "step": 1638 }, { "epoch": 0.22838430990036926, "grad_norm": 2.4528603553771973, "learning_rate": 9.312144245608608e-06, "loss": 0.21068572998046875, "step": 1639 }, { "epoch": 0.22852365359158364, "grad_norm": 3.481595039367676, "learning_rate": 9.31095081384256e-06, "loss": 0.19837188720703125, "step": 1640 }, { "epoch": 0.22866299728279801, "grad_norm": 1.3878257274627686, "learning_rate": 9.309756424281664e-06, "loss": 0.15159988403320312, "step": 1641 }, { "epoch": 0.2288023409740124, "grad_norm": 1.8247766494750977, "learning_rate": 9.308561077191284e-06, "loss": 0.17118072509765625, "step": 1642 }, { "epoch": 0.22894168466522677, "grad_norm": 0.9835690259933472, "learning_rate": 9.307364772837e-06, "loss": 0.12276840209960938, "step": 1643 }, { "epoch": 0.22908102835644117, "grad_norm": 1.021437168121338, "learning_rate": 9.306167511484601e-06, "loss": 0.14254379272460938, "step": 1644 }, { "epoch": 0.22922037204765555, "grad_norm": 1.5477968454360962, "learning_rate": 9.304969293400092e-06, "loss": 0.14130401611328125, "step": 1645 }, { "epoch": 0.22935971573886993, "grad_norm": 2.2919845581054688, "learning_rate": 9.303770118849692e-06, "loss": 0.19603347778320312, "step": 1646 }, { "epoch": 0.2294990594300843, "grad_norm": 2.762885570526123, "learning_rate": 9.302569988099825e-06, "loss": 0.15081024169921875, "step": 1647 }, { "epoch": 0.22963840312129868, "grad_norm": 3.83034348487854, "learning_rate": 9.301368901417138e-06, "loss": 0.1846599578857422, "step": 1648 }, { "epoch": 0.22977774681251306, "grad_norm": 2.245785713195801, "learning_rate": 9.300166859068482e-06, "loss": 0.15929412841796875, "step": 1649 }, { "epoch": 0.22991709050372744, "grad_norm": 2.173491954803467, "learning_rate": 9.298963861320927e-06, "loss": 0.1580982208251953, "step": 1650 }, { "epoch": 0.23005643419494182, "grad_norm": 1.1388568878173828, "learning_rate": 9.297759908441747e-06, "loss": 0.14181137084960938, "step": 1651 }, { "epoch": 0.2301957778861562, "grad_norm": 2.1568846702575684, "learning_rate": 9.296555000698435e-06, "loss": 0.14161300659179688, "step": 1652 }, { "epoch": 0.23033512157737057, "grad_norm": 2.300069808959961, "learning_rate": 9.295349138358693e-06, "loss": 0.14508056640625, "step": 1653 }, { "epoch": 0.23047446526858498, "grad_norm": 1.5801341533660889, "learning_rate": 9.294142321690438e-06, "loss": 0.14141845703125, "step": 1654 }, { "epoch": 0.23061380895979935, "grad_norm": 1.0350956916809082, "learning_rate": 9.292934550961796e-06, "loss": 0.13832473754882812, "step": 1655 }, { "epoch": 0.23075315265101373, "grad_norm": 2.826582431793213, "learning_rate": 9.291725826441107e-06, "loss": 0.21059799194335938, "step": 1656 }, { "epoch": 0.2308924963422281, "grad_norm": 2.7874088287353516, "learning_rate": 9.29051614839692e-06, "loss": 0.15056610107421875, "step": 1657 }, { "epoch": 0.2310318400334425, "grad_norm": 1.953673243522644, "learning_rate": 9.289305517098e-06, "loss": 0.15690231323242188, "step": 1658 }, { "epoch": 0.23117118372465686, "grad_norm": 2.4412336349487305, "learning_rate": 9.28809393281332e-06, "loss": 0.128753662109375, "step": 1659 }, { "epoch": 0.23131052741587124, "grad_norm": 2.446756362915039, "learning_rate": 9.286881395812066e-06, "loss": 0.2057342529296875, "step": 1660 }, { "epoch": 0.23144987110708562, "grad_norm": 1.8484313488006592, "learning_rate": 9.285667906363637e-06, "loss": 0.1639404296875, "step": 1661 }, { "epoch": 0.2315892147983, "grad_norm": 1.2772016525268555, "learning_rate": 9.284453464737644e-06, "loss": 0.136505126953125, "step": 1662 }, { "epoch": 0.23172855848951437, "grad_norm": 1.7928446531295776, "learning_rate": 9.283238071203907e-06, "loss": 0.15370559692382812, "step": 1663 }, { "epoch": 0.23186790218072878, "grad_norm": 2.8999040126800537, "learning_rate": 9.282021726032457e-06, "loss": 0.16141891479492188, "step": 1664 }, { "epoch": 0.23200724587194316, "grad_norm": 1.2566165924072266, "learning_rate": 9.280804429493542e-06, "loss": 0.16407012939453125, "step": 1665 }, { "epoch": 0.23214658956315753, "grad_norm": 1.8252907991409302, "learning_rate": 9.279586181857613e-06, "loss": 0.15100479125976562, "step": 1666 }, { "epoch": 0.2322859332543719, "grad_norm": 1.0798351764678955, "learning_rate": 9.278366983395341e-06, "loss": 0.12207412719726562, "step": 1667 }, { "epoch": 0.2324252769455863, "grad_norm": 1.478204607963562, "learning_rate": 9.277146834377601e-06, "loss": 0.18274307250976562, "step": 1668 }, { "epoch": 0.23256462063680067, "grad_norm": 1.3642483949661255, "learning_rate": 9.275925735075484e-06, "loss": 0.13557052612304688, "step": 1669 }, { "epoch": 0.23270396432801504, "grad_norm": 0.5034958124160767, "learning_rate": 9.274703685760287e-06, "loss": 0.10297393798828125, "step": 1670 }, { "epoch": 0.23284330801922942, "grad_norm": 1.430587887763977, "learning_rate": 9.273480686703526e-06, "loss": 0.15974044799804688, "step": 1671 }, { "epoch": 0.2329826517104438, "grad_norm": 0.7950683236122131, "learning_rate": 9.272256738176924e-06, "loss": 0.11165237426757812, "step": 1672 }, { "epoch": 0.23312199540165818, "grad_norm": 2.451028823852539, "learning_rate": 9.271031840452409e-06, "loss": 0.23754119873046875, "step": 1673 }, { "epoch": 0.23326133909287258, "grad_norm": 0.9556198120117188, "learning_rate": 9.26980599380213e-06, "loss": 0.1355438232421875, "step": 1674 }, { "epoch": 0.23340068278408696, "grad_norm": 1.711567759513855, "learning_rate": 9.268579198498438e-06, "loss": 0.17569732666015625, "step": 1675 }, { "epoch": 0.23354002647530134, "grad_norm": 1.2847709655761719, "learning_rate": 9.267351454813904e-06, "loss": 0.15084075927734375, "step": 1676 }, { "epoch": 0.23367937016651572, "grad_norm": 0.8722214698791504, "learning_rate": 9.266122763021302e-06, "loss": 0.14606857299804688, "step": 1677 }, { "epoch": 0.2338187138577301, "grad_norm": 1.460516095161438, "learning_rate": 9.264893123393618e-06, "loss": 0.17354965209960938, "step": 1678 }, { "epoch": 0.23395805754894447, "grad_norm": 1.4118690490722656, "learning_rate": 9.26366253620405e-06, "loss": 0.15959930419921875, "step": 1679 }, { "epoch": 0.23409740124015885, "grad_norm": 1.246877670288086, "learning_rate": 9.26243100172601e-06, "loss": 0.16276931762695312, "step": 1680 }, { "epoch": 0.23423674493137323, "grad_norm": 1.0961456298828125, "learning_rate": 9.261198520233113e-06, "loss": 0.17032623291015625, "step": 1681 }, { "epoch": 0.2343760886225876, "grad_norm": 1.4948481321334839, "learning_rate": 9.25996509199919e-06, "loss": 0.14977264404296875, "step": 1682 }, { "epoch": 0.23451543231380198, "grad_norm": 0.9389553666114807, "learning_rate": 9.258730717298281e-06, "loss": 0.13352584838867188, "step": 1683 }, { "epoch": 0.23465477600501639, "grad_norm": 2.0781946182250977, "learning_rate": 9.257495396404635e-06, "loss": 0.17543411254882812, "step": 1684 }, { "epoch": 0.23479411969623076, "grad_norm": 1.1453523635864258, "learning_rate": 9.256259129592711e-06, "loss": 0.1508026123046875, "step": 1685 }, { "epoch": 0.23493346338744514, "grad_norm": 1.1182149648666382, "learning_rate": 9.255021917137181e-06, "loss": 0.164886474609375, "step": 1686 }, { "epoch": 0.23507280707865952, "grad_norm": 1.3888078927993774, "learning_rate": 9.253783759312924e-06, "loss": 0.1752777099609375, "step": 1687 }, { "epoch": 0.2352121507698739, "grad_norm": 1.5040003061294556, "learning_rate": 9.252544656395033e-06, "loss": 0.13687515258789062, "step": 1688 }, { "epoch": 0.23535149446108827, "grad_norm": 2.5654335021972656, "learning_rate": 9.251304608658806e-06, "loss": 0.1636962890625, "step": 1689 }, { "epoch": 0.23549083815230265, "grad_norm": 1.2403737306594849, "learning_rate": 9.250063616379754e-06, "loss": 0.17624473571777344, "step": 1690 }, { "epoch": 0.23563018184351703, "grad_norm": 3.1631221771240234, "learning_rate": 9.248821679833596e-06, "loss": 0.18340301513671875, "step": 1691 }, { "epoch": 0.2357695255347314, "grad_norm": 1.6440293788909912, "learning_rate": 9.247578799296263e-06, "loss": 0.1537628173828125, "step": 1692 }, { "epoch": 0.23590886922594578, "grad_norm": 1.9155333042144775, "learning_rate": 9.246334975043896e-06, "loss": 0.16451263427734375, "step": 1693 }, { "epoch": 0.2360482129171602, "grad_norm": 1.9669474363327026, "learning_rate": 9.245090207352842e-06, "loss": 0.17105865478515625, "step": 1694 }, { "epoch": 0.23618755660837457, "grad_norm": 0.968415379524231, "learning_rate": 9.243844496499661e-06, "loss": 0.14943313598632812, "step": 1695 }, { "epoch": 0.23632690029958894, "grad_norm": 1.6809037923812866, "learning_rate": 9.242597842761123e-06, "loss": 0.15253829956054688, "step": 1696 }, { "epoch": 0.23646624399080332, "grad_norm": 3.003373146057129, "learning_rate": 9.241350246414203e-06, "loss": 0.18509292602539062, "step": 1697 }, { "epoch": 0.2366055876820177, "grad_norm": 1.868179440498352, "learning_rate": 9.24010170773609e-06, "loss": 0.1526336669921875, "step": 1698 }, { "epoch": 0.23674493137323208, "grad_norm": 1.3444797992706299, "learning_rate": 9.23885222700418e-06, "loss": 0.18117523193359375, "step": 1699 }, { "epoch": 0.23688427506444645, "grad_norm": 0.9236157536506653, "learning_rate": 9.237601804496081e-06, "loss": 0.12732315063476562, "step": 1700 }, { "epoch": 0.23702361875566083, "grad_norm": 1.986329197883606, "learning_rate": 9.236350440489608e-06, "loss": 0.17716598510742188, "step": 1701 }, { "epoch": 0.2371629624468752, "grad_norm": 1.4167677164077759, "learning_rate": 9.235098135262783e-06, "loss": 0.13961410522460938, "step": 1702 }, { "epoch": 0.23730230613808959, "grad_norm": 1.987930178642273, "learning_rate": 9.233844889093842e-06, "loss": 0.15768051147460938, "step": 1703 }, { "epoch": 0.237441649829304, "grad_norm": 1.589408040046692, "learning_rate": 9.232590702261227e-06, "loss": 0.16327285766601562, "step": 1704 }, { "epoch": 0.23758099352051837, "grad_norm": 1.2932069301605225, "learning_rate": 9.23133557504359e-06, "loss": 0.16878890991210938, "step": 1705 }, { "epoch": 0.23772033721173275, "grad_norm": 3.7591946125030518, "learning_rate": 9.23007950771979e-06, "loss": 0.185638427734375, "step": 1706 }, { "epoch": 0.23785968090294712, "grad_norm": 2.090791940689087, "learning_rate": 9.228822500568898e-06, "loss": 0.1819629669189453, "step": 1707 }, { "epoch": 0.2379990245941615, "grad_norm": 2.988218307495117, "learning_rate": 9.227564553870192e-06, "loss": 0.170745849609375, "step": 1708 }, { "epoch": 0.23813836828537588, "grad_norm": 3.8029892444610596, "learning_rate": 9.226305667903159e-06, "loss": 0.18721771240234375, "step": 1709 }, { "epoch": 0.23827771197659026, "grad_norm": 0.6543132662773132, "learning_rate": 9.225045842947496e-06, "loss": 0.1433258056640625, "step": 1710 }, { "epoch": 0.23841705566780463, "grad_norm": 1.018912672996521, "learning_rate": 9.223785079283106e-06, "loss": 0.12082290649414062, "step": 1711 }, { "epoch": 0.238556399359019, "grad_norm": 1.0966408252716064, "learning_rate": 9.2225233771901e-06, "loss": 0.14097976684570312, "step": 1712 }, { "epoch": 0.2386957430502334, "grad_norm": 1.0739976167678833, "learning_rate": 9.221260736948803e-06, "loss": 0.14780426025390625, "step": 1713 }, { "epoch": 0.2388350867414478, "grad_norm": 0.8169497847557068, "learning_rate": 9.219997158839743e-06, "loss": 0.15158462524414062, "step": 1714 }, { "epoch": 0.23897443043266217, "grad_norm": 1.154314637184143, "learning_rate": 9.21873264314366e-06, "loss": 0.16833114624023438, "step": 1715 }, { "epoch": 0.23911377412387655, "grad_norm": 0.9978910684585571, "learning_rate": 9.217467190141498e-06, "loss": 0.11098861694335938, "step": 1716 }, { "epoch": 0.23925311781509093, "grad_norm": 1.0972554683685303, "learning_rate": 9.216200800114412e-06, "loss": 0.14438247680664062, "step": 1717 }, { "epoch": 0.2393924615063053, "grad_norm": 2.085960626602173, "learning_rate": 9.214933473343765e-06, "loss": 0.16305923461914062, "step": 1718 }, { "epoch": 0.23953180519751968, "grad_norm": 1.8590987920761108, "learning_rate": 9.213665210111131e-06, "loss": 0.1596832275390625, "step": 1719 }, { "epoch": 0.23967114888873406, "grad_norm": 1.2052063941955566, "learning_rate": 9.212396010698286e-06, "loss": 0.16611480712890625, "step": 1720 }, { "epoch": 0.23981049257994844, "grad_norm": 0.7644645571708679, "learning_rate": 9.211125875387217e-06, "loss": 0.11638259887695312, "step": 1721 }, { "epoch": 0.23994983627116281, "grad_norm": 1.3282912969589233, "learning_rate": 9.209854804460121e-06, "loss": 0.1239013671875, "step": 1722 }, { "epoch": 0.2400891799623772, "grad_norm": 2.5073227882385254, "learning_rate": 9.208582798199402e-06, "loss": 0.18420791625976562, "step": 1723 }, { "epoch": 0.2402285236535916, "grad_norm": 1.3215985298156738, "learning_rate": 9.207309856887664e-06, "loss": 0.16038894653320312, "step": 1724 }, { "epoch": 0.24036786734480597, "grad_norm": 1.6280015707015991, "learning_rate": 9.206035980807734e-06, "loss": 0.1766223907470703, "step": 1725 }, { "epoch": 0.24050721103602035, "grad_norm": 1.413689136505127, "learning_rate": 9.204761170242635e-06, "loss": 0.14736557006835938, "step": 1726 }, { "epoch": 0.24064655472723473, "grad_norm": 1.0404095649719238, "learning_rate": 9.203485425475598e-06, "loss": 0.13550186157226562, "step": 1727 }, { "epoch": 0.2407858984184491, "grad_norm": 1.1619853973388672, "learning_rate": 9.202208746790069e-06, "loss": 0.12998199462890625, "step": 1728 }, { "epoch": 0.24092524210966348, "grad_norm": 0.8775332570075989, "learning_rate": 9.200931134469692e-06, "loss": 0.16312408447265625, "step": 1729 }, { "epoch": 0.24106458580087786, "grad_norm": 1.0977898836135864, "learning_rate": 9.199652588798327e-06, "loss": 0.14382171630859375, "step": 1730 }, { "epoch": 0.24120392949209224, "grad_norm": 1.0653131008148193, "learning_rate": 9.198373110060037e-06, "loss": 0.13836288452148438, "step": 1731 }, { "epoch": 0.24134327318330662, "grad_norm": 1.4281131029129028, "learning_rate": 9.197092698539092e-06, "loss": 0.16590118408203125, "step": 1732 }, { "epoch": 0.241482616874521, "grad_norm": 1.2650662660598755, "learning_rate": 9.19581135451997e-06, "loss": 0.17584609985351562, "step": 1733 }, { "epoch": 0.2416219605657354, "grad_norm": 0.9070515632629395, "learning_rate": 9.194529078287358e-06, "loss": 0.16089630126953125, "step": 1734 }, { "epoch": 0.24176130425694978, "grad_norm": 1.4649975299835205, "learning_rate": 9.193245870126147e-06, "loss": 0.13089752197265625, "step": 1735 }, { "epoch": 0.24190064794816415, "grad_norm": 1.9638553857803345, "learning_rate": 9.191961730321437e-06, "loss": 0.14453125, "step": 1736 }, { "epoch": 0.24203999163937853, "grad_norm": 1.4374028444290161, "learning_rate": 9.190676659158535e-06, "loss": 0.14138031005859375, "step": 1737 }, { "epoch": 0.2421793353305929, "grad_norm": 0.7881308794021606, "learning_rate": 9.189390656922955e-06, "loss": 0.12676239013671875, "step": 1738 }, { "epoch": 0.2423186790218073, "grad_norm": 1.7593008279800415, "learning_rate": 9.188103723900414e-06, "loss": 0.17846298217773438, "step": 1739 }, { "epoch": 0.24245802271302166, "grad_norm": 1.7321343421936035, "learning_rate": 9.186815860376843e-06, "loss": 0.1475067138671875, "step": 1740 }, { "epoch": 0.24259736640423604, "grad_norm": 2.26202130317688, "learning_rate": 9.185527066638375e-06, "loss": 0.17822647094726562, "step": 1741 }, { "epoch": 0.24273671009545042, "grad_norm": 0.8137302398681641, "learning_rate": 9.184237342971349e-06, "loss": 0.1317596435546875, "step": 1742 }, { "epoch": 0.2428760537866648, "grad_norm": 1.8643865585327148, "learning_rate": 9.182946689662314e-06, "loss": 0.16153717041015625, "step": 1743 }, { "epoch": 0.2430153974778792, "grad_norm": 2.2154793739318848, "learning_rate": 9.181655106998023e-06, "loss": 0.189910888671875, "step": 1744 }, { "epoch": 0.24315474116909358, "grad_norm": 0.5372365117073059, "learning_rate": 9.180362595265435e-06, "loss": 0.10654449462890625, "step": 1745 }, { "epoch": 0.24329408486030796, "grad_norm": 1.2426493167877197, "learning_rate": 9.179069154751718e-06, "loss": 0.16211318969726562, "step": 1746 }, { "epoch": 0.24343342855152234, "grad_norm": 1.1319936513900757, "learning_rate": 9.177774785744245e-06, "loss": 0.14365005493164062, "step": 1747 }, { "epoch": 0.2435727722427367, "grad_norm": 1.1067942380905151, "learning_rate": 9.176479488530594e-06, "loss": 0.14780426025390625, "step": 1748 }, { "epoch": 0.2437121159339511, "grad_norm": 0.9455718398094177, "learning_rate": 9.175183263398553e-06, "loss": 0.14597320556640625, "step": 1749 }, { "epoch": 0.24385145962516547, "grad_norm": 1.6624188423156738, "learning_rate": 9.17388611063611e-06, "loss": 0.14010238647460938, "step": 1750 }, { "epoch": 0.24399080331637985, "grad_norm": 1.2855511903762817, "learning_rate": 9.172588030531467e-06, "loss": 0.18526077270507812, "step": 1751 }, { "epoch": 0.24413014700759422, "grad_norm": 1.9873682260513306, "learning_rate": 9.171289023373022e-06, "loss": 0.191558837890625, "step": 1752 }, { "epoch": 0.2442694906988086, "grad_norm": 1.158539891242981, "learning_rate": 9.16998908944939e-06, "loss": 0.1677703857421875, "step": 1753 }, { "epoch": 0.244408834390023, "grad_norm": 1.2517791986465454, "learning_rate": 9.168688229049386e-06, "loss": 0.13427734375, "step": 1754 }, { "epoch": 0.24454817808123738, "grad_norm": 2.2212815284729004, "learning_rate": 9.167386442462029e-06, "loss": 0.15380477905273438, "step": 1755 }, { "epoch": 0.24468752177245176, "grad_norm": 2.415863275527954, "learning_rate": 9.166083729976547e-06, "loss": 0.15538406372070312, "step": 1756 }, { "epoch": 0.24482686546366614, "grad_norm": 1.2576904296875, "learning_rate": 9.164780091882374e-06, "loss": 0.139312744140625, "step": 1757 }, { "epoch": 0.24496620915488052, "grad_norm": 1.0594998598098755, "learning_rate": 9.163475528469148e-06, "loss": 0.13653564453125, "step": 1758 }, { "epoch": 0.2451055528460949, "grad_norm": 1.034072756767273, "learning_rate": 9.162170040026714e-06, "loss": 0.12519073486328125, "step": 1759 }, { "epoch": 0.24524489653730927, "grad_norm": 1.2380692958831787, "learning_rate": 9.16086362684512e-06, "loss": 0.17070770263671875, "step": 1760 }, { "epoch": 0.24538424022852365, "grad_norm": 1.4269115924835205, "learning_rate": 9.159556289214623e-06, "loss": 0.15708541870117188, "step": 1761 }, { "epoch": 0.24552358391973803, "grad_norm": 1.2111930847167969, "learning_rate": 9.158248027425683e-06, "loss": 0.15561485290527344, "step": 1762 }, { "epoch": 0.2456629276109524, "grad_norm": 1.6284897327423096, "learning_rate": 9.156938841768965e-06, "loss": 0.17132186889648438, "step": 1763 }, { "epoch": 0.2458022713021668, "grad_norm": 0.6556436419487, "learning_rate": 9.155628732535342e-06, "loss": 0.12093734741210938, "step": 1764 }, { "epoch": 0.24594161499338119, "grad_norm": 1.2135753631591797, "learning_rate": 9.15431770001589e-06, "loss": 0.1547393798828125, "step": 1765 }, { "epoch": 0.24608095868459556, "grad_norm": 1.3244479894638062, "learning_rate": 9.153005744501886e-06, "loss": 0.15578079223632812, "step": 1766 }, { "epoch": 0.24622030237580994, "grad_norm": 1.5614075660705566, "learning_rate": 9.151692866284824e-06, "loss": 0.1858673095703125, "step": 1767 }, { "epoch": 0.24635964606702432, "grad_norm": 0.6636113524436951, "learning_rate": 9.150379065656389e-06, "loss": 0.10478782653808594, "step": 1768 }, { "epoch": 0.2464989897582387, "grad_norm": 2.1258277893066406, "learning_rate": 9.149064342908482e-06, "loss": 0.16765594482421875, "step": 1769 }, { "epoch": 0.24663833344945307, "grad_norm": 1.03105890750885, "learning_rate": 9.147748698333203e-06, "loss": 0.16967391967773438, "step": 1770 }, { "epoch": 0.24677767714066745, "grad_norm": 1.1891647577285767, "learning_rate": 9.146432132222858e-06, "loss": 0.15361595153808594, "step": 1771 }, { "epoch": 0.24691702083188183, "grad_norm": 1.298911690711975, "learning_rate": 9.145114644869957e-06, "loss": 0.1578369140625, "step": 1772 }, { "epoch": 0.2470563645230962, "grad_norm": 2.155330181121826, "learning_rate": 9.143796236567218e-06, "loss": 0.18285751342773438, "step": 1773 }, { "epoch": 0.24719570821431058, "grad_norm": 1.2247841358184814, "learning_rate": 9.142476907607558e-06, "loss": 0.1440601348876953, "step": 1774 }, { "epoch": 0.247335051905525, "grad_norm": 1.2687574625015259, "learning_rate": 9.141156658284104e-06, "loss": 0.1268157958984375, "step": 1775 }, { "epoch": 0.24747439559673937, "grad_norm": 0.9781771302223206, "learning_rate": 9.139835488890186e-06, "loss": 0.14139938354492188, "step": 1776 }, { "epoch": 0.24761373928795374, "grad_norm": 2.0001718997955322, "learning_rate": 9.138513399719335e-06, "loss": 0.17380142211914062, "step": 1777 }, { "epoch": 0.24775308297916812, "grad_norm": 1.4760838747024536, "learning_rate": 9.13719039106529e-06, "loss": 0.17211151123046875, "step": 1778 }, { "epoch": 0.2478924266703825, "grad_norm": 1.8013932704925537, "learning_rate": 9.135866463221994e-06, "loss": 0.16361236572265625, "step": 1779 }, { "epoch": 0.24803177036159688, "grad_norm": 1.3159986734390259, "learning_rate": 9.134541616483594e-06, "loss": 0.18299102783203125, "step": 1780 }, { "epoch": 0.24817111405281125, "grad_norm": 2.0126256942749023, "learning_rate": 9.13321585114444e-06, "loss": 0.15156173706054688, "step": 1781 }, { "epoch": 0.24831045774402563, "grad_norm": 0.7438353896141052, "learning_rate": 9.131889167499086e-06, "loss": 0.1255626678466797, "step": 1782 }, { "epoch": 0.24844980143524, "grad_norm": 2.3857946395874023, "learning_rate": 9.130561565842293e-06, "loss": 0.17380523681640625, "step": 1783 }, { "epoch": 0.24858914512645439, "grad_norm": 1.219537377357483, "learning_rate": 9.129233046469021e-06, "loss": 0.14934158325195312, "step": 1784 }, { "epoch": 0.2487284888176688, "grad_norm": 0.9668885469436646, "learning_rate": 9.12790360967444e-06, "loss": 0.1609344482421875, "step": 1785 }, { "epoch": 0.24886783250888317, "grad_norm": 2.245924234390259, "learning_rate": 9.126573255753917e-06, "loss": 0.16852569580078125, "step": 1786 }, { "epoch": 0.24900717620009755, "grad_norm": 2.3727614879608154, "learning_rate": 9.125241985003028e-06, "loss": 0.20125198364257812, "step": 1787 }, { "epoch": 0.24914651989131192, "grad_norm": 1.209249496459961, "learning_rate": 9.123909797717551e-06, "loss": 0.18590545654296875, "step": 1788 }, { "epoch": 0.2492858635825263, "grad_norm": 1.109782338142395, "learning_rate": 9.122576694193467e-06, "loss": 0.14516067504882812, "step": 1789 }, { "epoch": 0.24942520727374068, "grad_norm": 0.9658738970756531, "learning_rate": 9.121242674726962e-06, "loss": 0.15715789794921875, "step": 1790 }, { "epoch": 0.24956455096495506, "grad_norm": 0.6866849064826965, "learning_rate": 9.119907739614424e-06, "loss": 0.13958358764648438, "step": 1791 }, { "epoch": 0.24970389465616943, "grad_norm": 1.3581346273422241, "learning_rate": 9.118571889152445e-06, "loss": 0.15988540649414062, "step": 1792 }, { "epoch": 0.2498432383473838, "grad_norm": 1.3127373456954956, "learning_rate": 9.117235123637822e-06, "loss": 0.15627288818359375, "step": 1793 }, { "epoch": 0.2499825820385982, "grad_norm": 0.44014984369277954, "learning_rate": 9.115897443367552e-06, "loss": 0.11906051635742188, "step": 1794 }, { "epoch": 0.25012192572981257, "grad_norm": 1.03160560131073, "learning_rate": 9.114558848638836e-06, "loss": 0.12272453308105469, "step": 1795 }, { "epoch": 0.25026126942102694, "grad_norm": 0.8961682915687561, "learning_rate": 9.113219339749084e-06, "loss": 0.14735794067382812, "step": 1796 }, { "epoch": 0.2504006131122413, "grad_norm": 1.3804656267166138, "learning_rate": 9.1118789169959e-06, "loss": 0.1729278564453125, "step": 1797 }, { "epoch": 0.2505399568034557, "grad_norm": 1.5184760093688965, "learning_rate": 9.110537580677094e-06, "loss": 0.17005538940429688, "step": 1798 }, { "epoch": 0.2506793004946701, "grad_norm": 2.095592975616455, "learning_rate": 9.109195331090685e-06, "loss": 0.15586280822753906, "step": 1799 }, { "epoch": 0.2508186441858845, "grad_norm": 1.70883047580719, "learning_rate": 9.10785216853489e-06, "loss": 0.14723587036132812, "step": 1800 }, { "epoch": 0.2509579878770989, "grad_norm": 0.7838926911354065, "learning_rate": 9.106508093308123e-06, "loss": 0.13521575927734375, "step": 1801 }, { "epoch": 0.25109733156831326, "grad_norm": 0.9949594140052795, "learning_rate": 9.105163105709011e-06, "loss": 0.1283416748046875, "step": 1802 }, { "epoch": 0.25123667525952764, "grad_norm": 1.790096640586853, "learning_rate": 9.103817206036383e-06, "loss": 0.2092437744140625, "step": 1803 }, { "epoch": 0.251376018950742, "grad_norm": 1.0062315464019775, "learning_rate": 9.10247039458926e-06, "loss": 0.11513900756835938, "step": 1804 }, { "epoch": 0.2515153626419564, "grad_norm": 0.8725445866584778, "learning_rate": 9.101122671666878e-06, "loss": 0.15038299560546875, "step": 1805 }, { "epoch": 0.2516547063331708, "grad_norm": 1.1728733777999878, "learning_rate": 9.09977403756867e-06, "loss": 0.14575958251953125, "step": 1806 }, { "epoch": 0.25179405002438515, "grad_norm": 0.7806732058525085, "learning_rate": 9.098424492594268e-06, "loss": 0.13210296630859375, "step": 1807 }, { "epoch": 0.25193339371559953, "grad_norm": 1.085610032081604, "learning_rate": 9.097074037043512e-06, "loss": 0.13621902465820312, "step": 1808 }, { "epoch": 0.2520727374068139, "grad_norm": 1.264579176902771, "learning_rate": 9.095722671216443e-06, "loss": 0.15918350219726562, "step": 1809 }, { "epoch": 0.2522120810980283, "grad_norm": 2.6879842281341553, "learning_rate": 9.094370395413306e-06, "loss": 0.22655487060546875, "step": 1810 }, { "epoch": 0.25235142478924266, "grad_norm": 1.7998727560043335, "learning_rate": 9.09301720993454e-06, "loss": 0.2163066864013672, "step": 1811 }, { "epoch": 0.25249076848045704, "grad_norm": 1.1910746097564697, "learning_rate": 9.091663115080797e-06, "loss": 0.1643829345703125, "step": 1812 }, { "epoch": 0.2526301121716714, "grad_norm": 1.8740023374557495, "learning_rate": 9.090308111152924e-06, "loss": 0.15087127685546875, "step": 1813 }, { "epoch": 0.2527694558628858, "grad_norm": 1.2282711267471313, "learning_rate": 9.08895219845197e-06, "loss": 0.17942428588867188, "step": 1814 }, { "epoch": 0.25290879955410017, "grad_norm": 1.0570656061172485, "learning_rate": 9.087595377279192e-06, "loss": 0.15363311767578125, "step": 1815 }, { "epoch": 0.25304814324531455, "grad_norm": 1.9408663511276245, "learning_rate": 9.086237647936043e-06, "loss": 0.20204544067382812, "step": 1816 }, { "epoch": 0.2531874869365289, "grad_norm": 1.4422001838684082, "learning_rate": 9.084879010724177e-06, "loss": 0.19049453735351562, "step": 1817 }, { "epoch": 0.2533268306277433, "grad_norm": 1.3111485242843628, "learning_rate": 9.083519465945456e-06, "loss": 0.15189743041992188, "step": 1818 }, { "epoch": 0.2534661743189577, "grad_norm": 1.3778395652770996, "learning_rate": 9.082159013901937e-06, "loss": 0.16090011596679688, "step": 1819 }, { "epoch": 0.2536055180101721, "grad_norm": 2.0259830951690674, "learning_rate": 9.080797654895883e-06, "loss": 0.20624542236328125, "step": 1820 }, { "epoch": 0.2537448617013865, "grad_norm": 1.28840172290802, "learning_rate": 9.079435389229755e-06, "loss": 0.1770172119140625, "step": 1821 }, { "epoch": 0.25388420539260087, "grad_norm": 2.377324104309082, "learning_rate": 9.07807221720622e-06, "loss": 0.16265869140625, "step": 1822 }, { "epoch": 0.25402354908381525, "grad_norm": 1.120911955833435, "learning_rate": 9.07670813912814e-06, "loss": 0.1253814697265625, "step": 1823 }, { "epoch": 0.2541628927750296, "grad_norm": 1.7284659147262573, "learning_rate": 9.075343155298589e-06, "loss": 0.16829681396484375, "step": 1824 }, { "epoch": 0.254302236466244, "grad_norm": 0.7720327973365784, "learning_rate": 9.073977266020826e-06, "loss": 0.1295318603515625, "step": 1825 }, { "epoch": 0.2544415801574584, "grad_norm": 1.0054312944412231, "learning_rate": 9.072610471598327e-06, "loss": 0.16159439086914062, "step": 1826 }, { "epoch": 0.25458092384867276, "grad_norm": 1.022829532623291, "learning_rate": 9.07124277233476e-06, "loss": 0.12910079956054688, "step": 1827 }, { "epoch": 0.25472026753988714, "grad_norm": 1.227944016456604, "learning_rate": 9.069874168533996e-06, "loss": 0.169952392578125, "step": 1828 }, { "epoch": 0.2548596112311015, "grad_norm": 2.130554676055908, "learning_rate": 9.068504660500111e-06, "loss": 0.14299774169921875, "step": 1829 }, { "epoch": 0.2549989549223159, "grad_norm": 1.5293289422988892, "learning_rate": 9.067134248537374e-06, "loss": 0.14218521118164062, "step": 1830 }, { "epoch": 0.25513829861353027, "grad_norm": 1.915529489517212, "learning_rate": 9.065762932950262e-06, "loss": 0.1789093017578125, "step": 1831 }, { "epoch": 0.25527764230474465, "grad_norm": 1.2976503372192383, "learning_rate": 9.06439071404345e-06, "loss": 0.15325927734375, "step": 1832 }, { "epoch": 0.255416985995959, "grad_norm": 1.2739512920379639, "learning_rate": 9.063017592121812e-06, "loss": 0.14474105834960938, "step": 1833 }, { "epoch": 0.2555563296871734, "grad_norm": 1.2673790454864502, "learning_rate": 9.061643567490425e-06, "loss": 0.15753555297851562, "step": 1834 }, { "epoch": 0.2556956733783878, "grad_norm": 1.3662229776382446, "learning_rate": 9.060268640454565e-06, "loss": 0.183868408203125, "step": 1835 }, { "epoch": 0.25583501706960216, "grad_norm": 1.175687313079834, "learning_rate": 9.058892811319713e-06, "loss": 0.142608642578125, "step": 1836 }, { "epoch": 0.25597436076081653, "grad_norm": 1.814066767692566, "learning_rate": 9.057516080391544e-06, "loss": 0.16158676147460938, "step": 1837 }, { "epoch": 0.2561137044520309, "grad_norm": 2.160673141479492, "learning_rate": 9.056138447975936e-06, "loss": 0.15959548950195312, "step": 1838 }, { "epoch": 0.2562530481432453, "grad_norm": 1.4217734336853027, "learning_rate": 9.05475991437897e-06, "loss": 0.12623214721679688, "step": 1839 }, { "epoch": 0.2563923918344597, "grad_norm": 1.4795422554016113, "learning_rate": 9.053380479906919e-06, "loss": 0.19382095336914062, "step": 1840 }, { "epoch": 0.2565317355256741, "grad_norm": 0.9891914129257202, "learning_rate": 9.052000144866269e-06, "loss": 0.14162063598632812, "step": 1841 }, { "epoch": 0.2566710792168885, "grad_norm": 0.8029426336288452, "learning_rate": 9.050618909563693e-06, "loss": 0.16879653930664062, "step": 1842 }, { "epoch": 0.25681042290810285, "grad_norm": 0.7654977440834045, "learning_rate": 9.049236774306073e-06, "loss": 0.11768150329589844, "step": 1843 }, { "epoch": 0.25694976659931723, "grad_norm": 1.46807861328125, "learning_rate": 9.04785373940049e-06, "loss": 0.14165878295898438, "step": 1844 }, { "epoch": 0.2570891102905316, "grad_norm": 2.0695199966430664, "learning_rate": 9.046469805154218e-06, "loss": 0.17811203002929688, "step": 1845 }, { "epoch": 0.257228453981746, "grad_norm": 1.6004759073257446, "learning_rate": 9.045084971874738e-06, "loss": 0.16057586669921875, "step": 1846 }, { "epoch": 0.25736779767296036, "grad_norm": 1.6976839303970337, "learning_rate": 9.043699239869727e-06, "loss": 0.14374923706054688, "step": 1847 }, { "epoch": 0.25750714136417474, "grad_norm": 1.9151347875595093, "learning_rate": 9.042312609447066e-06, "loss": 0.14936447143554688, "step": 1848 }, { "epoch": 0.2576464850553891, "grad_norm": 1.6294337511062622, "learning_rate": 9.040925080914832e-06, "loss": 0.16552734375, "step": 1849 }, { "epoch": 0.2577858287466035, "grad_norm": 1.113520860671997, "learning_rate": 9.039536654581297e-06, "loss": 0.15700912475585938, "step": 1850 }, { "epoch": 0.2579251724378179, "grad_norm": 1.3242746591567993, "learning_rate": 9.038147330754944e-06, "loss": 0.14857864379882812, "step": 1851 }, { "epoch": 0.25806451612903225, "grad_norm": 0.9623203277587891, "learning_rate": 9.036757109744447e-06, "loss": 0.14534950256347656, "step": 1852 }, { "epoch": 0.25820385982024663, "grad_norm": 0.9667872786521912, "learning_rate": 9.035365991858679e-06, "loss": 0.1341094970703125, "step": 1853 }, { "epoch": 0.258343203511461, "grad_norm": 0.9743611812591553, "learning_rate": 9.033973977406718e-06, "loss": 0.18206405639648438, "step": 1854 }, { "epoch": 0.2584825472026754, "grad_norm": 1.032759428024292, "learning_rate": 9.032581066697836e-06, "loss": 0.12737655639648438, "step": 1855 }, { "epoch": 0.25862189089388976, "grad_norm": 1.158491849899292, "learning_rate": 9.031187260041505e-06, "loss": 0.1581878662109375, "step": 1856 }, { "epoch": 0.25876123458510414, "grad_norm": 1.2281548976898193, "learning_rate": 9.0297925577474e-06, "loss": 0.13434600830078125, "step": 1857 }, { "epoch": 0.2589005782763185, "grad_norm": 0.9903150796890259, "learning_rate": 9.028396960125392e-06, "loss": 0.12957000732421875, "step": 1858 }, { "epoch": 0.2590399219675329, "grad_norm": 1.867350459098816, "learning_rate": 9.027000467485547e-06, "loss": 0.14660263061523438, "step": 1859 }, { "epoch": 0.2591792656587473, "grad_norm": 1.2516188621520996, "learning_rate": 9.025603080138136e-06, "loss": 0.1765289306640625, "step": 1860 }, { "epoch": 0.2593186093499617, "grad_norm": 1.880262851715088, "learning_rate": 9.024204798393627e-06, "loss": 0.13638687133789062, "step": 1861 }, { "epoch": 0.2594579530411761, "grad_norm": 1.8122559785842896, "learning_rate": 9.022805622562687e-06, "loss": 0.16184616088867188, "step": 1862 }, { "epoch": 0.25959729673239046, "grad_norm": 0.9578235745429993, "learning_rate": 9.02140555295618e-06, "loss": 0.13159561157226562, "step": 1863 }, { "epoch": 0.25973664042360484, "grad_norm": 1.4516983032226562, "learning_rate": 9.020004589885167e-06, "loss": 0.13557720184326172, "step": 1864 }, { "epoch": 0.2598759841148192, "grad_norm": 1.0978337526321411, "learning_rate": 9.018602733660915e-06, "loss": 0.14029693603515625, "step": 1865 }, { "epoch": 0.2600153278060336, "grad_norm": 0.8119058609008789, "learning_rate": 9.01719998459488e-06, "loss": 0.12902069091796875, "step": 1866 }, { "epoch": 0.26015467149724797, "grad_norm": 0.7746114730834961, "learning_rate": 9.015796342998724e-06, "loss": 0.11092758178710938, "step": 1867 }, { "epoch": 0.26029401518846235, "grad_norm": 0.8541579246520996, "learning_rate": 9.014391809184302e-06, "loss": 0.14640426635742188, "step": 1868 }, { "epoch": 0.2604333588796767, "grad_norm": 1.3316192626953125, "learning_rate": 9.01298638346367e-06, "loss": 0.1358509063720703, "step": 1869 }, { "epoch": 0.2605727025708911, "grad_norm": 0.9492895603179932, "learning_rate": 9.011580066149081e-06, "loss": 0.12852859497070312, "step": 1870 }, { "epoch": 0.2607120462621055, "grad_norm": 2.6512298583984375, "learning_rate": 9.010172857552989e-06, "loss": 0.19438552856445312, "step": 1871 }, { "epoch": 0.26085138995331986, "grad_norm": 1.372001051902771, "learning_rate": 9.008764757988042e-06, "loss": 0.13499832153320312, "step": 1872 }, { "epoch": 0.26099073364453423, "grad_norm": 1.1935824155807495, "learning_rate": 9.007355767767085e-06, "loss": 0.13625335693359375, "step": 1873 }, { "epoch": 0.2611300773357486, "grad_norm": 2.0230836868286133, "learning_rate": 9.005945887203167e-06, "loss": 0.19900131225585938, "step": 1874 }, { "epoch": 0.261269421026963, "grad_norm": 1.3207271099090576, "learning_rate": 9.004535116609532e-06, "loss": 0.15038299560546875, "step": 1875 }, { "epoch": 0.26140876471817737, "grad_norm": 1.5905667543411255, "learning_rate": 9.003123456299617e-06, "loss": 0.19210052490234375, "step": 1876 }, { "epoch": 0.26154810840939174, "grad_norm": 2.200141191482544, "learning_rate": 9.001710906587064e-06, "loss": 0.19388961791992188, "step": 1877 }, { "epoch": 0.2616874521006061, "grad_norm": 0.6754549741744995, "learning_rate": 9.000297467785708e-06, "loss": 0.13946151733398438, "step": 1878 }, { "epoch": 0.2618267957918205, "grad_norm": 2.530435085296631, "learning_rate": 8.998883140209582e-06, "loss": 0.18714523315429688, "step": 1879 }, { "epoch": 0.26196613948303493, "grad_norm": 0.8797516822814941, "learning_rate": 8.99746792417292e-06, "loss": 0.14676666259765625, "step": 1880 }, { "epoch": 0.2621054831742493, "grad_norm": 1.2995892763137817, "learning_rate": 8.996051819990148e-06, "loss": 0.17035675048828125, "step": 1881 }, { "epoch": 0.2622448268654637, "grad_norm": 0.8987159729003906, "learning_rate": 8.994634827975892e-06, "loss": 0.13263702392578125, "step": 1882 }, { "epoch": 0.26238417055667806, "grad_norm": 1.5743427276611328, "learning_rate": 8.993216948444978e-06, "loss": 0.14465713500976562, "step": 1883 }, { "epoch": 0.26252351424789244, "grad_norm": 1.0427594184875488, "learning_rate": 8.991798181712423e-06, "loss": 0.13906097412109375, "step": 1884 }, { "epoch": 0.2626628579391068, "grad_norm": 0.9445350170135498, "learning_rate": 8.99037852809345e-06, "loss": 0.1209716796875, "step": 1885 }, { "epoch": 0.2628022016303212, "grad_norm": 0.7156503200531006, "learning_rate": 8.988957987903467e-06, "loss": 0.12111854553222656, "step": 1886 }, { "epoch": 0.2629415453215356, "grad_norm": 1.7288246154785156, "learning_rate": 8.987536561458088e-06, "loss": 0.16724777221679688, "step": 1887 }, { "epoch": 0.26308088901274995, "grad_norm": 1.5260792970657349, "learning_rate": 8.986114249073122e-06, "loss": 0.14780426025390625, "step": 1888 }, { "epoch": 0.26322023270396433, "grad_norm": 1.1469236612319946, "learning_rate": 8.984691051064576e-06, "loss": 0.12332534790039062, "step": 1889 }, { "epoch": 0.2633595763951787, "grad_norm": 0.9052799940109253, "learning_rate": 8.98326696774865e-06, "loss": 0.14006614685058594, "step": 1890 }, { "epoch": 0.2634989200863931, "grad_norm": 1.2365751266479492, "learning_rate": 8.981841999441743e-06, "loss": 0.12572860717773438, "step": 1891 }, { "epoch": 0.26363826377760746, "grad_norm": 1.6645071506500244, "learning_rate": 8.980416146460452e-06, "loss": 0.11721611022949219, "step": 1892 }, { "epoch": 0.26377760746882184, "grad_norm": 1.4403071403503418, "learning_rate": 8.978989409121565e-06, "loss": 0.14542007446289062, "step": 1893 }, { "epoch": 0.2639169511600362, "grad_norm": 0.6254674792289734, "learning_rate": 8.977561787742074e-06, "loss": 0.11341094970703125, "step": 1894 }, { "epoch": 0.2640562948512506, "grad_norm": 1.0846015214920044, "learning_rate": 8.976133282639166e-06, "loss": 0.14455795288085938, "step": 1895 }, { "epoch": 0.264195638542465, "grad_norm": 1.6715346574783325, "learning_rate": 8.974703894130218e-06, "loss": 0.17270278930664062, "step": 1896 }, { "epoch": 0.26433498223367935, "grad_norm": 1.6425803899765015, "learning_rate": 8.973273622532806e-06, "loss": 0.21299362182617188, "step": 1897 }, { "epoch": 0.2644743259248937, "grad_norm": 1.743930459022522, "learning_rate": 8.97184246816471e-06, "loss": 0.20682525634765625, "step": 1898 }, { "epoch": 0.2646136696161081, "grad_norm": 1.9230685234069824, "learning_rate": 8.970410431343892e-06, "loss": 0.20474624633789062, "step": 1899 }, { "epoch": 0.26475301330732254, "grad_norm": 0.9900375008583069, "learning_rate": 8.968977512388524e-06, "loss": 0.1224212646484375, "step": 1900 }, { "epoch": 0.2648923569985369, "grad_norm": 3.2338409423828125, "learning_rate": 8.967543711616968e-06, "loss": 0.22240447998046875, "step": 1901 }, { "epoch": 0.2650317006897513, "grad_norm": 2.01806902885437, "learning_rate": 8.966109029347777e-06, "loss": 0.17416763305664062, "step": 1902 }, { "epoch": 0.26517104438096567, "grad_norm": 0.8957557082176208, "learning_rate": 8.96467346589971e-06, "loss": 0.14727783203125, "step": 1903 }, { "epoch": 0.26531038807218005, "grad_norm": 1.4433729648590088, "learning_rate": 8.963237021591714e-06, "loss": 0.13936614990234375, "step": 1904 }, { "epoch": 0.2654497317633944, "grad_norm": 0.6681258082389832, "learning_rate": 8.961799696742933e-06, "loss": 0.12332916259765625, "step": 1905 }, { "epoch": 0.2655890754546088, "grad_norm": 1.5040972232818604, "learning_rate": 8.960361491672708e-06, "loss": 0.1509857177734375, "step": 1906 }, { "epoch": 0.2657284191458232, "grad_norm": 1.8980211019515991, "learning_rate": 8.958922406700578e-06, "loss": 0.15273284912109375, "step": 1907 }, { "epoch": 0.26586776283703756, "grad_norm": 1.1249231100082397, "learning_rate": 8.957482442146271e-06, "loss": 0.15096282958984375, "step": 1908 }, { "epoch": 0.26600710652825194, "grad_norm": 0.9302837252616882, "learning_rate": 8.956041598329716e-06, "loss": 0.13792800903320312, "step": 1909 }, { "epoch": 0.2661464502194663, "grad_norm": 1.1979924440383911, "learning_rate": 8.954599875571039e-06, "loss": 0.18947982788085938, "step": 1910 }, { "epoch": 0.2662857939106807, "grad_norm": 2.610013961791992, "learning_rate": 8.953157274190552e-06, "loss": 0.17842864990234375, "step": 1911 }, { "epoch": 0.26642513760189507, "grad_norm": 1.5310736894607544, "learning_rate": 8.951713794508771e-06, "loss": 0.1244964599609375, "step": 1912 }, { "epoch": 0.26656448129310945, "grad_norm": 1.201343297958374, "learning_rate": 8.950269436846405e-06, "loss": 0.1245269775390625, "step": 1913 }, { "epoch": 0.2667038249843238, "grad_norm": 1.1785109043121338, "learning_rate": 8.948824201524355e-06, "loss": 0.13421630859375, "step": 1914 }, { "epoch": 0.2668431686755382, "grad_norm": 2.0595552921295166, "learning_rate": 8.947378088863722e-06, "loss": 0.19783401489257812, "step": 1915 }, { "epoch": 0.2669825123667526, "grad_norm": 1.5027109384536743, "learning_rate": 8.945931099185798e-06, "loss": 0.1821746826171875, "step": 1916 }, { "epoch": 0.26712185605796696, "grad_norm": 2.1790361404418945, "learning_rate": 8.94448323281207e-06, "loss": 0.18568801879882812, "step": 1917 }, { "epoch": 0.26726119974918133, "grad_norm": 1.3968887329101562, "learning_rate": 8.943034490064222e-06, "loss": 0.13850021362304688, "step": 1918 }, { "epoch": 0.2674005434403957, "grad_norm": 0.8437862992286682, "learning_rate": 8.941584871264131e-06, "loss": 0.12375640869140625, "step": 1919 }, { "epoch": 0.26753988713161014, "grad_norm": 2.0897064208984375, "learning_rate": 8.940134376733869e-06, "loss": 0.15763473510742188, "step": 1920 }, { "epoch": 0.2676792308228245, "grad_norm": 1.2266439199447632, "learning_rate": 8.938683006795704e-06, "loss": 0.1377716064453125, "step": 1921 }, { "epoch": 0.2678185745140389, "grad_norm": 1.1842787265777588, "learning_rate": 8.937230761772098e-06, "loss": 0.1344146728515625, "step": 1922 }, { "epoch": 0.2679579182052533, "grad_norm": 1.585093379020691, "learning_rate": 8.935777641985704e-06, "loss": 0.154754638671875, "step": 1923 }, { "epoch": 0.26809726189646765, "grad_norm": 2.317349672317505, "learning_rate": 8.934323647759373e-06, "loss": 0.17083740234375, "step": 1924 }, { "epoch": 0.26823660558768203, "grad_norm": 1.7560539245605469, "learning_rate": 8.932868779416148e-06, "loss": 0.13193130493164062, "step": 1925 }, { "epoch": 0.2683759492788964, "grad_norm": 1.228771686553955, "learning_rate": 8.931413037279271e-06, "loss": 0.14626693725585938, "step": 1926 }, { "epoch": 0.2685152929701108, "grad_norm": 0.9306968450546265, "learning_rate": 8.929956421672172e-06, "loss": 0.14520263671875, "step": 1927 }, { "epoch": 0.26865463666132516, "grad_norm": 1.570969820022583, "learning_rate": 8.92849893291848e-06, "loss": 0.14445114135742188, "step": 1928 }, { "epoch": 0.26879398035253954, "grad_norm": 1.1827507019042969, "learning_rate": 8.927040571342014e-06, "loss": 0.13544464111328125, "step": 1929 }, { "epoch": 0.2689333240437539, "grad_norm": 0.9795280694961548, "learning_rate": 8.92558133726679e-06, "loss": 0.15557479858398438, "step": 1930 }, { "epoch": 0.2690726677349683, "grad_norm": 1.7031067609786987, "learning_rate": 8.924121231017012e-06, "loss": 0.17791366577148438, "step": 1931 }, { "epoch": 0.2692120114261827, "grad_norm": 2.353550910949707, "learning_rate": 8.922660252917088e-06, "loss": 0.19852828979492188, "step": 1932 }, { "epoch": 0.26935135511739705, "grad_norm": 1.3135014772415161, "learning_rate": 8.92119840329161e-06, "loss": 0.1637725830078125, "step": 1933 }, { "epoch": 0.26949069880861143, "grad_norm": 0.78957599401474, "learning_rate": 8.919735682465372e-06, "loss": 0.11992073059082031, "step": 1934 }, { "epoch": 0.2696300424998258, "grad_norm": 1.055876612663269, "learning_rate": 8.918272090763352e-06, "loss": 0.15274810791015625, "step": 1935 }, { "epoch": 0.2697693861910402, "grad_norm": 1.4058958292007446, "learning_rate": 8.91680762851073e-06, "loss": 0.15137100219726562, "step": 1936 }, { "epoch": 0.26990872988225456, "grad_norm": 1.4161207675933838, "learning_rate": 8.915342296032874e-06, "loss": 0.17517471313476562, "step": 1937 }, { "epoch": 0.27004807357346894, "grad_norm": 1.232064127922058, "learning_rate": 8.913876093655351e-06, "loss": 0.16594314575195312, "step": 1938 }, { "epoch": 0.2701874172646833, "grad_norm": 1.6534829139709473, "learning_rate": 8.912409021703914e-06, "loss": 0.13206100463867188, "step": 1939 }, { "epoch": 0.27032676095589775, "grad_norm": 2.6778173446655273, "learning_rate": 8.910941080504514e-06, "loss": 0.16994476318359375, "step": 1940 }, { "epoch": 0.2704661046471121, "grad_norm": 2.539551019668579, "learning_rate": 8.909472270383293e-06, "loss": 0.20895767211914062, "step": 1941 }, { "epoch": 0.2706054483383265, "grad_norm": 0.9042821526527405, "learning_rate": 8.90800259166659e-06, "loss": 0.13976287841796875, "step": 1942 }, { "epoch": 0.2707447920295409, "grad_norm": 0.9823083877563477, "learning_rate": 8.906532044680933e-06, "loss": 0.1524505615234375, "step": 1943 }, { "epoch": 0.27088413572075526, "grad_norm": 1.2908759117126465, "learning_rate": 8.905060629753041e-06, "loss": 0.15883636474609375, "step": 1944 }, { "epoch": 0.27102347941196964, "grad_norm": 1.3293981552124023, "learning_rate": 8.903588347209833e-06, "loss": 0.14674758911132812, "step": 1945 }, { "epoch": 0.271162823103184, "grad_norm": 1.274626612663269, "learning_rate": 8.902115197378414e-06, "loss": 0.15736770629882812, "step": 1946 }, { "epoch": 0.2713021667943984, "grad_norm": 2.253410577774048, "learning_rate": 8.900641180586086e-06, "loss": 0.177978515625, "step": 1947 }, { "epoch": 0.27144151048561277, "grad_norm": 1.0870579481124878, "learning_rate": 8.89916629716034e-06, "loss": 0.15065383911132812, "step": 1948 }, { "epoch": 0.27158085417682715, "grad_norm": 1.50031578540802, "learning_rate": 8.897690547428861e-06, "loss": 0.15480422973632812, "step": 1949 }, { "epoch": 0.2717201978680415, "grad_norm": 1.347642421722412, "learning_rate": 8.89621393171953e-06, "loss": 0.16825485229492188, "step": 1950 }, { "epoch": 0.2718595415592559, "grad_norm": 0.7352496981620789, "learning_rate": 8.894736450360415e-06, "loss": 0.13011932373046875, "step": 1951 }, { "epoch": 0.2719988852504703, "grad_norm": 2.415762424468994, "learning_rate": 8.893258103679779e-06, "loss": 0.202117919921875, "step": 1952 }, { "epoch": 0.27213822894168466, "grad_norm": 1.6947132349014282, "learning_rate": 8.891778892006077e-06, "loss": 0.18958663940429688, "step": 1953 }, { "epoch": 0.27227757263289903, "grad_norm": 1.5333011150360107, "learning_rate": 8.890298815667956e-06, "loss": 0.15206527709960938, "step": 1954 }, { "epoch": 0.2724169163241134, "grad_norm": 1.9178074598312378, "learning_rate": 8.888817874994254e-06, "loss": 0.13391876220703125, "step": 1955 }, { "epoch": 0.2725562600153278, "grad_norm": 2.2849857807159424, "learning_rate": 8.887336070314005e-06, "loss": 0.13581466674804688, "step": 1956 }, { "epoch": 0.27269560370654217, "grad_norm": 3.1926093101501465, "learning_rate": 8.88585340195643e-06, "loss": 0.17003631591796875, "step": 1957 }, { "epoch": 0.27283494739775654, "grad_norm": 2.8843181133270264, "learning_rate": 8.884369870250945e-06, "loss": 0.18160247802734375, "step": 1958 }, { "epoch": 0.2729742910889709, "grad_norm": 0.897146999835968, "learning_rate": 8.882885475527156e-06, "loss": 0.1065521240234375, "step": 1959 }, { "epoch": 0.27311363478018535, "grad_norm": 2.3171494007110596, "learning_rate": 8.881400218114861e-06, "loss": 0.18482208251953125, "step": 1960 }, { "epoch": 0.27325297847139973, "grad_norm": 1.4249964952468872, "learning_rate": 8.879914098344053e-06, "loss": 0.11239433288574219, "step": 1961 }, { "epoch": 0.2733923221626141, "grad_norm": 2.5707995891571045, "learning_rate": 8.878427116544912e-06, "loss": 0.16301727294921875, "step": 1962 }, { "epoch": 0.2735316658538285, "grad_norm": 1.2705103158950806, "learning_rate": 8.876939273047813e-06, "loss": 0.17500686645507812, "step": 1963 }, { "epoch": 0.27367100954504286, "grad_norm": 1.5696871280670166, "learning_rate": 8.875450568183318e-06, "loss": 0.15296554565429688, "step": 1964 }, { "epoch": 0.27381035323625724, "grad_norm": 1.7164188623428345, "learning_rate": 8.873961002282185e-06, "loss": 0.15817642211914062, "step": 1965 }, { "epoch": 0.2739496969274716, "grad_norm": 1.3429577350616455, "learning_rate": 8.872470575675361e-06, "loss": 0.14306259155273438, "step": 1966 }, { "epoch": 0.274089040618686, "grad_norm": 0.7174415588378906, "learning_rate": 8.870979288693985e-06, "loss": 0.14094924926757812, "step": 1967 }, { "epoch": 0.2742283843099004, "grad_norm": 0.766541600227356, "learning_rate": 8.86948714166939e-06, "loss": 0.12637710571289062, "step": 1968 }, { "epoch": 0.27436772800111475, "grad_norm": 0.7440057396888733, "learning_rate": 8.86799413493309e-06, "loss": 0.14209747314453125, "step": 1969 }, { "epoch": 0.27450707169232913, "grad_norm": 1.7931458950042725, "learning_rate": 8.866500268816803e-06, "loss": 0.15242576599121094, "step": 1970 }, { "epoch": 0.2746464153835435, "grad_norm": 1.517648696899414, "learning_rate": 8.865005543652428e-06, "loss": 0.147857666015625, "step": 1971 }, { "epoch": 0.2747857590747579, "grad_norm": 1.1562118530273438, "learning_rate": 8.863509959772064e-06, "loss": 0.12479782104492188, "step": 1972 }, { "epoch": 0.27492510276597226, "grad_norm": 1.736750602722168, "learning_rate": 8.86201351750799e-06, "loss": 0.1737823486328125, "step": 1973 }, { "epoch": 0.27506444645718664, "grad_norm": 0.6929688453674316, "learning_rate": 8.860516217192683e-06, "loss": 0.1315784454345703, "step": 1974 }, { "epoch": 0.275203790148401, "grad_norm": 1.2959741353988647, "learning_rate": 8.85901805915881e-06, "loss": 0.129302978515625, "step": 1975 }, { "epoch": 0.2753431338396154, "grad_norm": 1.4064340591430664, "learning_rate": 8.85751904373923e-06, "loss": 0.15017318725585938, "step": 1976 }, { "epoch": 0.2754824775308298, "grad_norm": 1.2132234573364258, "learning_rate": 8.856019171266984e-06, "loss": 0.1254119873046875, "step": 1977 }, { "epoch": 0.27562182122204415, "grad_norm": 2.0618457794189453, "learning_rate": 8.854518442075313e-06, "loss": 0.18367767333984375, "step": 1978 }, { "epoch": 0.2757611649132585, "grad_norm": 1.182834267616272, "learning_rate": 8.853016856497646e-06, "loss": 0.1450958251953125, "step": 1979 }, { "epoch": 0.2759005086044729, "grad_norm": 1.1646252870559692, "learning_rate": 8.8515144148676e-06, "loss": 0.16884231567382812, "step": 1980 }, { "epoch": 0.27603985229568734, "grad_norm": 1.5148195028305054, "learning_rate": 8.85001111751898e-06, "loss": 0.20566940307617188, "step": 1981 }, { "epoch": 0.2761791959869017, "grad_norm": 1.3959388732910156, "learning_rate": 8.848506964785789e-06, "loss": 0.13091278076171875, "step": 1982 }, { "epoch": 0.2763185396781161, "grad_norm": 1.4520325660705566, "learning_rate": 8.847001957002211e-06, "loss": 0.1683197021484375, "step": 1983 }, { "epoch": 0.27645788336933047, "grad_norm": 1.9341391324996948, "learning_rate": 8.845496094502628e-06, "loss": 0.16629791259765625, "step": 1984 }, { "epoch": 0.27659722706054485, "grad_norm": 0.8084222078323364, "learning_rate": 8.843989377621606e-06, "loss": 0.12465286254882812, "step": 1985 }, { "epoch": 0.2767365707517592, "grad_norm": 1.0415805578231812, "learning_rate": 8.842481806693906e-06, "loss": 0.1312713623046875, "step": 1986 }, { "epoch": 0.2768759144429736, "grad_norm": 1.2889357805252075, "learning_rate": 8.840973382054472e-06, "loss": 0.14656448364257812, "step": 1987 }, { "epoch": 0.277015258134188, "grad_norm": 1.3403291702270508, "learning_rate": 8.839464104038445e-06, "loss": 0.12266159057617188, "step": 1988 }, { "epoch": 0.27715460182540236, "grad_norm": 1.3814237117767334, "learning_rate": 8.83795397298115e-06, "loss": 0.13733673095703125, "step": 1989 }, { "epoch": 0.27729394551661674, "grad_norm": 0.7449581623077393, "learning_rate": 8.836442989218104e-06, "loss": 0.10938644409179688, "step": 1990 }, { "epoch": 0.2774332892078311, "grad_norm": 1.4160525798797607, "learning_rate": 8.834931153085014e-06, "loss": 0.14438247680664062, "step": 1991 }, { "epoch": 0.2775726328990455, "grad_norm": 1.2674061059951782, "learning_rate": 8.833418464917774e-06, "loss": 0.12296676635742188, "step": 1992 }, { "epoch": 0.27771197659025987, "grad_norm": 2.429042339324951, "learning_rate": 8.831904925052468e-06, "loss": 0.20604705810546875, "step": 1993 }, { "epoch": 0.27785132028147425, "grad_norm": 1.3876138925552368, "learning_rate": 8.830390533825373e-06, "loss": 0.13485336303710938, "step": 1994 }, { "epoch": 0.2779906639726886, "grad_norm": 1.6551591157913208, "learning_rate": 8.828875291572951e-06, "loss": 0.1861724853515625, "step": 1995 }, { "epoch": 0.278130007663903, "grad_norm": 1.246660828590393, "learning_rate": 8.827359198631854e-06, "loss": 0.12392044067382812, "step": 1996 }, { "epoch": 0.2782693513551174, "grad_norm": 1.2605409622192383, "learning_rate": 8.825842255338923e-06, "loss": 0.14554977416992188, "step": 1997 }, { "epoch": 0.27840869504633176, "grad_norm": 1.138596773147583, "learning_rate": 8.824324462031189e-06, "loss": 0.14782333374023438, "step": 1998 }, { "epoch": 0.27854803873754613, "grad_norm": 1.5981464385986328, "learning_rate": 8.822805819045869e-06, "loss": 0.24048233032226562, "step": 1999 }, { "epoch": 0.2786873824287605, "grad_norm": 1.0799601078033447, "learning_rate": 8.821286326720372e-06, "loss": 0.15272903442382812, "step": 2000 }, { "epoch": 0.27882672611997494, "grad_norm": 1.6165251731872559, "learning_rate": 8.819765985392297e-06, "loss": 0.1532421112060547, "step": 2001 }, { "epoch": 0.2789660698111893, "grad_norm": 1.4336216449737549, "learning_rate": 8.818244795399425e-06, "loss": 0.14867019653320312, "step": 2002 }, { "epoch": 0.2791054135024037, "grad_norm": 1.4931448698043823, "learning_rate": 8.81672275707973e-06, "loss": 0.1927490234375, "step": 2003 }, { "epoch": 0.2792447571936181, "grad_norm": 1.3384966850280762, "learning_rate": 8.815199870771378e-06, "loss": 0.16817092895507812, "step": 2004 }, { "epoch": 0.27938410088483245, "grad_norm": 1.4518048763275146, "learning_rate": 8.813676136812717e-06, "loss": 0.12701988220214844, "step": 2005 }, { "epoch": 0.27952344457604683, "grad_norm": 1.3951278924942017, "learning_rate": 8.812151555542286e-06, "loss": 0.153167724609375, "step": 2006 }, { "epoch": 0.2796627882672612, "grad_norm": 1.556423544883728, "learning_rate": 8.81062612729881e-06, "loss": 0.17887496948242188, "step": 2007 }, { "epoch": 0.2798021319584756, "grad_norm": 0.7510414719581604, "learning_rate": 8.80909985242121e-06, "loss": 0.12012100219726562, "step": 2008 }, { "epoch": 0.27994147564968996, "grad_norm": 0.7492923736572266, "learning_rate": 8.807572731248583e-06, "loss": 0.12190628051757812, "step": 2009 }, { "epoch": 0.28008081934090434, "grad_norm": 1.3688637018203735, "learning_rate": 8.806044764120226e-06, "loss": 0.139190673828125, "step": 2010 }, { "epoch": 0.2802201630321187, "grad_norm": 2.7569336891174316, "learning_rate": 8.804515951375615e-06, "loss": 0.2190532684326172, "step": 2011 }, { "epoch": 0.2803595067233331, "grad_norm": 1.8043622970581055, "learning_rate": 8.802986293354418e-06, "loss": 0.15643692016601562, "step": 2012 }, { "epoch": 0.2804988504145475, "grad_norm": 0.7575451135635376, "learning_rate": 8.80145579039649e-06, "loss": 0.14331817626953125, "step": 2013 }, { "epoch": 0.28063819410576185, "grad_norm": 0.6912246346473694, "learning_rate": 8.799924442841873e-06, "loss": 0.11194229125976562, "step": 2014 }, { "epoch": 0.28077753779697623, "grad_norm": 2.1755943298339844, "learning_rate": 8.798392251030801e-06, "loss": 0.13924026489257812, "step": 2015 }, { "epoch": 0.2809168814881906, "grad_norm": 0.951601505279541, "learning_rate": 8.796859215303688e-06, "loss": 0.12964248657226562, "step": 2016 }, { "epoch": 0.281056225179405, "grad_norm": 1.5115270614624023, "learning_rate": 8.795325336001143e-06, "loss": 0.14841842651367188, "step": 2017 }, { "epoch": 0.28119556887061936, "grad_norm": 0.9802440404891968, "learning_rate": 8.793790613463956e-06, "loss": 0.15819931030273438, "step": 2018 }, { "epoch": 0.28133491256183374, "grad_norm": 2.089965343475342, "learning_rate": 8.792255048033106e-06, "loss": 0.20097732543945312, "step": 2019 }, { "epoch": 0.2814742562530481, "grad_norm": 1.3254644870758057, "learning_rate": 8.790718640049767e-06, "loss": 0.14842987060546875, "step": 2020 }, { "epoch": 0.28161359994426255, "grad_norm": 1.242186188697815, "learning_rate": 8.789181389855288e-06, "loss": 0.1740264892578125, "step": 2021 }, { "epoch": 0.2817529436354769, "grad_norm": 1.9145582914352417, "learning_rate": 8.787643297791214e-06, "loss": 0.22455215454101562, "step": 2022 }, { "epoch": 0.2818922873266913, "grad_norm": 1.2397867441177368, "learning_rate": 8.78610436419927e-06, "loss": 0.1421966552734375, "step": 2023 }, { "epoch": 0.2820316310179057, "grad_norm": 1.2098760604858398, "learning_rate": 8.784564589421373e-06, "loss": 0.15933990478515625, "step": 2024 }, { "epoch": 0.28217097470912006, "grad_norm": 1.2523647546768188, "learning_rate": 8.783023973799632e-06, "loss": 0.16402816772460938, "step": 2025 }, { "epoch": 0.28231031840033444, "grad_norm": 0.8705010414123535, "learning_rate": 8.78148251767633e-06, "loss": 0.12067413330078125, "step": 2026 }, { "epoch": 0.2824496620915488, "grad_norm": 1.1629234552383423, "learning_rate": 8.779940221393946e-06, "loss": 0.1439361572265625, "step": 2027 }, { "epoch": 0.2825890057827632, "grad_norm": 1.3962377309799194, "learning_rate": 8.778397085295141e-06, "loss": 0.17679214477539062, "step": 2028 }, { "epoch": 0.28272834947397757, "grad_norm": 0.811346173286438, "learning_rate": 8.776853109722765e-06, "loss": 0.13006591796875, "step": 2029 }, { "epoch": 0.28286769316519195, "grad_norm": 0.6384456753730774, "learning_rate": 8.775308295019857e-06, "loss": 0.12086105346679688, "step": 2030 }, { "epoch": 0.2830070368564063, "grad_norm": 2.255262851715088, "learning_rate": 8.773762641529637e-06, "loss": 0.17302703857421875, "step": 2031 }, { "epoch": 0.2831463805476207, "grad_norm": 1.7116307020187378, "learning_rate": 8.772216149595515e-06, "loss": 0.1502513885498047, "step": 2032 }, { "epoch": 0.2832857242388351, "grad_norm": 0.7878170609474182, "learning_rate": 8.770668819561085e-06, "loss": 0.12633514404296875, "step": 2033 }, { "epoch": 0.28342506793004946, "grad_norm": 1.3494460582733154, "learning_rate": 8.769120651770128e-06, "loss": 0.15326309204101562, "step": 2034 }, { "epoch": 0.28356441162126383, "grad_norm": 1.435888648033142, "learning_rate": 8.767571646566615e-06, "loss": 0.16889190673828125, "step": 2035 }, { "epoch": 0.2837037553124782, "grad_norm": 1.1407872438430786, "learning_rate": 8.766021804294697e-06, "loss": 0.1314697265625, "step": 2036 }, { "epoch": 0.2838430990036926, "grad_norm": 1.0321953296661377, "learning_rate": 8.764471125298712e-06, "loss": 0.131683349609375, "step": 2037 }, { "epoch": 0.28398244269490697, "grad_norm": 1.0323023796081543, "learning_rate": 8.76291960992319e-06, "loss": 0.14725875854492188, "step": 2038 }, { "epoch": 0.28412178638612134, "grad_norm": 1.386784553527832, "learning_rate": 8.761367258512838e-06, "loss": 0.16059494018554688, "step": 2039 }, { "epoch": 0.2842611300773357, "grad_norm": 1.8751977682113647, "learning_rate": 8.759814071412554e-06, "loss": 0.19646835327148438, "step": 2040 }, { "epoch": 0.28440047376855015, "grad_norm": 0.878367006778717, "learning_rate": 8.758260048967421e-06, "loss": 0.14413070678710938, "step": 2041 }, { "epoch": 0.28453981745976453, "grad_norm": 0.6232907772064209, "learning_rate": 8.75670519152271e-06, "loss": 0.1246337890625, "step": 2042 }, { "epoch": 0.2846791611509789, "grad_norm": 1.4564487934112549, "learning_rate": 8.755149499423871e-06, "loss": 0.1454010009765625, "step": 2043 }, { "epoch": 0.2848185048421933, "grad_norm": 0.9601089358329773, "learning_rate": 8.753592973016545e-06, "loss": 0.1358489990234375, "step": 2044 }, { "epoch": 0.28495784853340766, "grad_norm": 3.0026984214782715, "learning_rate": 8.752035612646557e-06, "loss": 0.23123550415039062, "step": 2045 }, { "epoch": 0.28509719222462204, "grad_norm": 1.2332139015197754, "learning_rate": 8.750477418659914e-06, "loss": 0.15880966186523438, "step": 2046 }, { "epoch": 0.2852365359158364, "grad_norm": 1.0344618558883667, "learning_rate": 8.748918391402816e-06, "loss": 0.13988113403320312, "step": 2047 }, { "epoch": 0.2853758796070508, "grad_norm": 1.057403802871704, "learning_rate": 8.74735853122164e-06, "loss": 0.11330413818359375, "step": 2048 }, { "epoch": 0.2855152232982652, "grad_norm": 2.139326810836792, "learning_rate": 8.745797838462951e-06, "loss": 0.1610870361328125, "step": 2049 }, { "epoch": 0.28565456698947955, "grad_norm": 1.3219456672668457, "learning_rate": 8.7442363134735e-06, "loss": 0.16177749633789062, "step": 2050 }, { "epoch": 0.28579391068069393, "grad_norm": 0.9095986485481262, "learning_rate": 8.742673956600225e-06, "loss": 0.15032577514648438, "step": 2051 }, { "epoch": 0.2859332543719083, "grad_norm": 0.817528247833252, "learning_rate": 8.741110768190242e-06, "loss": 0.13749313354492188, "step": 2052 }, { "epoch": 0.2860725980631227, "grad_norm": 1.7566834688186646, "learning_rate": 8.739546748590857e-06, "loss": 0.12007713317871094, "step": 2053 }, { "epoch": 0.28621194175433706, "grad_norm": 2.5031628608703613, "learning_rate": 8.73798189814956e-06, "loss": 0.14908218383789062, "step": 2054 }, { "epoch": 0.28635128544555144, "grad_norm": 2.194432258605957, "learning_rate": 8.736416217214026e-06, "loss": 0.15694427490234375, "step": 2055 }, { "epoch": 0.2864906291367658, "grad_norm": 0.7243310213088989, "learning_rate": 8.734849706132112e-06, "loss": 0.13374710083007812, "step": 2056 }, { "epoch": 0.2866299728279802, "grad_norm": 1.0348377227783203, "learning_rate": 8.733282365251858e-06, "loss": 0.14398956298828125, "step": 2057 }, { "epoch": 0.2867693165191946, "grad_norm": 0.7072692513465881, "learning_rate": 8.731714194921498e-06, "loss": 0.12382888793945312, "step": 2058 }, { "epoch": 0.28690866021040895, "grad_norm": 1.4379903078079224, "learning_rate": 8.73014519548944e-06, "loss": 0.16581344604492188, "step": 2059 }, { "epoch": 0.2870480039016233, "grad_norm": 2.182924270629883, "learning_rate": 8.72857536730428e-06, "loss": 0.18080520629882812, "step": 2060 }, { "epoch": 0.28718734759283776, "grad_norm": 1.1302517652511597, "learning_rate": 8.7270047107148e-06, "loss": 0.13202285766601562, "step": 2061 }, { "epoch": 0.28732669128405214, "grad_norm": 1.218212366104126, "learning_rate": 8.72543322606996e-06, "loss": 0.1590576171875, "step": 2062 }, { "epoch": 0.2874660349752665, "grad_norm": 1.6639779806137085, "learning_rate": 8.72386091371891e-06, "loss": 0.17257308959960938, "step": 2063 }, { "epoch": 0.2876053786664809, "grad_norm": 0.8972955346107483, "learning_rate": 8.722287774010983e-06, "loss": 0.11588859558105469, "step": 2064 }, { "epoch": 0.28774472235769527, "grad_norm": 2.0348668098449707, "learning_rate": 8.720713807295692e-06, "loss": 0.18458938598632812, "step": 2065 }, { "epoch": 0.28788406604890965, "grad_norm": 1.603200078010559, "learning_rate": 8.71913901392274e-06, "loss": 0.15599822998046875, "step": 2066 }, { "epoch": 0.288023409740124, "grad_norm": 1.5347363948822021, "learning_rate": 8.71756339424201e-06, "loss": 0.15905380249023438, "step": 2067 }, { "epoch": 0.2881627534313384, "grad_norm": 0.9932821393013, "learning_rate": 8.715986948603566e-06, "loss": 0.1440582275390625, "step": 2068 }, { "epoch": 0.2883020971225528, "grad_norm": 1.6689856052398682, "learning_rate": 8.71440967735766e-06, "loss": 0.16645431518554688, "step": 2069 }, { "epoch": 0.28844144081376716, "grad_norm": 0.6709661483764648, "learning_rate": 8.712831580854724e-06, "loss": 0.14455795288085938, "step": 2070 }, { "epoch": 0.28858078450498154, "grad_norm": 1.0910956859588623, "learning_rate": 8.711252659445378e-06, "loss": 0.1665496826171875, "step": 2071 }, { "epoch": 0.2887201281961959, "grad_norm": 1.0057562589645386, "learning_rate": 8.709672913480418e-06, "loss": 0.149169921875, "step": 2072 }, { "epoch": 0.2888594718874103, "grad_norm": 1.2010998725891113, "learning_rate": 8.70809234331083e-06, "loss": 0.16425704956054688, "step": 2073 }, { "epoch": 0.28899881557862467, "grad_norm": 1.3855317831039429, "learning_rate": 8.706510949287782e-06, "loss": 0.18324661254882812, "step": 2074 }, { "epoch": 0.28913815926983905, "grad_norm": 1.642284870147705, "learning_rate": 8.70492873176262e-06, "loss": 0.15412139892578125, "step": 2075 }, { "epoch": 0.2892775029610534, "grad_norm": 1.3704752922058105, "learning_rate": 8.703345691086882e-06, "loss": 0.1203765869140625, "step": 2076 }, { "epoch": 0.2894168466522678, "grad_norm": 1.2240041494369507, "learning_rate": 8.701761827612278e-06, "loss": 0.14272308349609375, "step": 2077 }, { "epoch": 0.2895561903434822, "grad_norm": 1.3705881834030151, "learning_rate": 8.700177141690708e-06, "loss": 0.14475631713867188, "step": 2078 }, { "epoch": 0.28969553403469656, "grad_norm": 1.7804477214813232, "learning_rate": 8.698591633674256e-06, "loss": 0.1719207763671875, "step": 2079 }, { "epoch": 0.28983487772591093, "grad_norm": 1.0298644304275513, "learning_rate": 8.697005303915183e-06, "loss": 0.1386699676513672, "step": 2080 }, { "epoch": 0.28997422141712537, "grad_norm": 1.8070815801620483, "learning_rate": 8.695418152765933e-06, "loss": 0.15656280517578125, "step": 2081 }, { "epoch": 0.29011356510833974, "grad_norm": 1.1512603759765625, "learning_rate": 8.693830180579139e-06, "loss": 0.13642501831054688, "step": 2082 }, { "epoch": 0.2902529087995541, "grad_norm": 1.5050238370895386, "learning_rate": 8.69224138770761e-06, "loss": 0.14417648315429688, "step": 2083 }, { "epoch": 0.2903922524907685, "grad_norm": 2.459225654602051, "learning_rate": 8.69065177450434e-06, "loss": 0.195465087890625, "step": 2084 }, { "epoch": 0.2905315961819829, "grad_norm": 0.9293490052223206, "learning_rate": 8.689061341322505e-06, "loss": 0.12220382690429688, "step": 2085 }, { "epoch": 0.29067093987319725, "grad_norm": 0.8454229831695557, "learning_rate": 8.687470088515464e-06, "loss": 0.13436126708984375, "step": 2086 }, { "epoch": 0.29081028356441163, "grad_norm": 1.3060988187789917, "learning_rate": 8.685878016436753e-06, "loss": 0.15255355834960938, "step": 2087 }, { "epoch": 0.290949627255626, "grad_norm": 1.3037959337234497, "learning_rate": 8.684285125440099e-06, "loss": 0.139556884765625, "step": 2088 }, { "epoch": 0.2910889709468404, "grad_norm": 0.9609202742576599, "learning_rate": 8.682691415879402e-06, "loss": 0.10697364807128906, "step": 2089 }, { "epoch": 0.29122831463805476, "grad_norm": 1.5621013641357422, "learning_rate": 8.681096888108751e-06, "loss": 0.14659500122070312, "step": 2090 }, { "epoch": 0.29136765832926914, "grad_norm": 1.4699960947036743, "learning_rate": 8.679501542482412e-06, "loss": 0.15601348876953125, "step": 2091 }, { "epoch": 0.2915070020204835, "grad_norm": 1.4947410821914673, "learning_rate": 8.677905379354834e-06, "loss": 0.15609359741210938, "step": 2092 }, { "epoch": 0.2916463457116979, "grad_norm": 2.013388156890869, "learning_rate": 8.67630839908065e-06, "loss": 0.20479583740234375, "step": 2093 }, { "epoch": 0.2917856894029123, "grad_norm": 1.3516491651535034, "learning_rate": 8.674710602014672e-06, "loss": 0.1784210205078125, "step": 2094 }, { "epoch": 0.29192503309412665, "grad_norm": 1.304867148399353, "learning_rate": 8.673111988511892e-06, "loss": 0.15210342407226562, "step": 2095 }, { "epoch": 0.29206437678534103, "grad_norm": 0.6865823864936829, "learning_rate": 8.671512558927483e-06, "loss": 0.1225738525390625, "step": 2096 }, { "epoch": 0.2922037204765554, "grad_norm": 1.1062915325164795, "learning_rate": 8.669912313616811e-06, "loss": 0.18015289306640625, "step": 2097 }, { "epoch": 0.2923430641677698, "grad_norm": 0.779522180557251, "learning_rate": 8.668311252935407e-06, "loss": 0.13668060302734375, "step": 2098 }, { "epoch": 0.29248240785898416, "grad_norm": 0.9962401986122131, "learning_rate": 8.66670937723899e-06, "loss": 0.13002777099609375, "step": 2099 }, { "epoch": 0.29262175155019854, "grad_norm": 0.9500732421875, "learning_rate": 8.665106686883461e-06, "loss": 0.15476036071777344, "step": 2100 }, { "epoch": 0.29276109524141297, "grad_norm": 0.9572453498840332, "learning_rate": 8.663503182224906e-06, "loss": 0.13586807250976562, "step": 2101 }, { "epoch": 0.29290043893262735, "grad_norm": 0.9533047676086426, "learning_rate": 8.66189886361958e-06, "loss": 0.14704132080078125, "step": 2102 }, { "epoch": 0.2930397826238417, "grad_norm": 1.9752980470657349, "learning_rate": 8.660293731423929e-06, "loss": 0.21368789672851562, "step": 2103 }, { "epoch": 0.2931791263150561, "grad_norm": 1.090815544128418, "learning_rate": 8.658687785994579e-06, "loss": 0.14531707763671875, "step": 2104 }, { "epoch": 0.2933184700062705, "grad_norm": 1.4065937995910645, "learning_rate": 8.657081027688332e-06, "loss": 0.1565837860107422, "step": 2105 }, { "epoch": 0.29345781369748486, "grad_norm": 0.7100845575332642, "learning_rate": 8.655473456862172e-06, "loss": 0.13181304931640625, "step": 2106 }, { "epoch": 0.29359715738869924, "grad_norm": 1.6074050664901733, "learning_rate": 8.653865073873265e-06, "loss": 0.1390228271484375, "step": 2107 }, { "epoch": 0.2937365010799136, "grad_norm": 1.217483401298523, "learning_rate": 8.652255879078959e-06, "loss": 0.13196563720703125, "step": 2108 }, { "epoch": 0.293875844771128, "grad_norm": 0.8795140981674194, "learning_rate": 8.650645872836779e-06, "loss": 0.14402389526367188, "step": 2109 }, { "epoch": 0.29401518846234237, "grad_norm": 1.4976993799209595, "learning_rate": 8.649035055504431e-06, "loss": 0.14324569702148438, "step": 2110 }, { "epoch": 0.29415453215355675, "grad_norm": 0.6215610504150391, "learning_rate": 8.647423427439804e-06, "loss": 0.1176300048828125, "step": 2111 }, { "epoch": 0.2942938758447711, "grad_norm": 1.854934573173523, "learning_rate": 8.645810989000962e-06, "loss": 0.12884521484375, "step": 2112 }, { "epoch": 0.2944332195359855, "grad_norm": 0.8351986408233643, "learning_rate": 8.644197740546153e-06, "loss": 0.14381027221679688, "step": 2113 }, { "epoch": 0.2945725632271999, "grad_norm": 0.6444517374038696, "learning_rate": 8.642583682433808e-06, "loss": 0.12695693969726562, "step": 2114 }, { "epoch": 0.29471190691841426, "grad_norm": 1.4927115440368652, "learning_rate": 8.640968815022529e-06, "loss": 0.15725326538085938, "step": 2115 }, { "epoch": 0.29485125060962863, "grad_norm": 0.9975505471229553, "learning_rate": 8.6393531386711e-06, "loss": 0.146759033203125, "step": 2116 }, { "epoch": 0.294990594300843, "grad_norm": 1.2045727968215942, "learning_rate": 8.637736653738496e-06, "loss": 0.1616382598876953, "step": 2117 }, { "epoch": 0.2951299379920574, "grad_norm": 1.056904911994934, "learning_rate": 8.636119360583857e-06, "loss": 0.14418411254882812, "step": 2118 }, { "epoch": 0.29526928168327177, "grad_norm": 1.2415812015533447, "learning_rate": 8.63450125956651e-06, "loss": 0.13112640380859375, "step": 2119 }, { "epoch": 0.29540862537448614, "grad_norm": 1.2528339624404907, "learning_rate": 8.63288235104596e-06, "loss": 0.14966201782226562, "step": 2120 }, { "epoch": 0.2955479690657006, "grad_norm": 1.0252286195755005, "learning_rate": 8.631262635381892e-06, "loss": 0.1229095458984375, "step": 2121 }, { "epoch": 0.29568731275691496, "grad_norm": 0.7514464259147644, "learning_rate": 8.629642112934169e-06, "loss": 0.11303901672363281, "step": 2122 }, { "epoch": 0.29582665644812933, "grad_norm": 0.4952964782714844, "learning_rate": 8.628020784062837e-06, "loss": 0.10290145874023438, "step": 2123 }, { "epoch": 0.2959660001393437, "grad_norm": 1.8377020359039307, "learning_rate": 8.626398649128113e-06, "loss": 0.14014244079589844, "step": 2124 }, { "epoch": 0.2961053438305581, "grad_norm": 0.6825270652770996, "learning_rate": 8.624775708490403e-06, "loss": 0.11124038696289062, "step": 2125 }, { "epoch": 0.29624468752177247, "grad_norm": 0.9328694343566895, "learning_rate": 8.623151962510284e-06, "loss": 0.15589141845703125, "step": 2126 }, { "epoch": 0.29638403121298684, "grad_norm": 1.0543941259384155, "learning_rate": 8.621527411548517e-06, "loss": 0.16100311279296875, "step": 2127 }, { "epoch": 0.2965233749042012, "grad_norm": 1.6806840896606445, "learning_rate": 8.619902055966043e-06, "loss": 0.19321060180664062, "step": 2128 }, { "epoch": 0.2966627185954156, "grad_norm": 1.8113577365875244, "learning_rate": 8.618275896123973e-06, "loss": 0.17676544189453125, "step": 2129 }, { "epoch": 0.29680206228663, "grad_norm": 1.1342060565948486, "learning_rate": 8.616648932383607e-06, "loss": 0.15016555786132812, "step": 2130 }, { "epoch": 0.29694140597784435, "grad_norm": 1.1642282009124756, "learning_rate": 8.615021165106415e-06, "loss": 0.15631484985351562, "step": 2131 }, { "epoch": 0.29708074966905873, "grad_norm": 1.4183785915374756, "learning_rate": 8.613392594654056e-06, "loss": 0.14557266235351562, "step": 2132 }, { "epoch": 0.2972200933602731, "grad_norm": 0.944288969039917, "learning_rate": 8.611763221388356e-06, "loss": 0.12269973754882812, "step": 2133 }, { "epoch": 0.2973594370514875, "grad_norm": 0.7821986079216003, "learning_rate": 8.610133045671325e-06, "loss": 0.13248825073242188, "step": 2134 }, { "epoch": 0.29749878074270186, "grad_norm": 1.3220326900482178, "learning_rate": 8.608502067865155e-06, "loss": 0.14342880249023438, "step": 2135 }, { "epoch": 0.29763812443391624, "grad_norm": 0.8644072413444519, "learning_rate": 8.606870288332206e-06, "loss": 0.12346267700195312, "step": 2136 }, { "epoch": 0.2977774681251306, "grad_norm": 1.2871546745300293, "learning_rate": 8.605237707435028e-06, "loss": 0.15112686157226562, "step": 2137 }, { "epoch": 0.297916811816345, "grad_norm": 1.0998166799545288, "learning_rate": 8.603604325536338e-06, "loss": 0.14333724975585938, "step": 2138 }, { "epoch": 0.2980561555075594, "grad_norm": 1.7349597215652466, "learning_rate": 8.60197014299904e-06, "loss": 0.14615631103515625, "step": 2139 }, { "epoch": 0.29819549919877375, "grad_norm": 1.672113299369812, "learning_rate": 8.600335160186208e-06, "loss": 0.18575096130371094, "step": 2140 }, { "epoch": 0.2983348428899882, "grad_norm": 0.8102411031723022, "learning_rate": 8.598699377461104e-06, "loss": 0.14660263061523438, "step": 2141 }, { "epoch": 0.29847418658120256, "grad_norm": 0.6592290997505188, "learning_rate": 8.597062795187157e-06, "loss": 0.1175689697265625, "step": 2142 }, { "epoch": 0.29861353027241694, "grad_norm": 2.609468698501587, "learning_rate": 8.595425413727979e-06, "loss": 0.167938232421875, "step": 2143 }, { "epoch": 0.2987528739636313, "grad_norm": 1.302126407623291, "learning_rate": 8.593787233447357e-06, "loss": 0.15998077392578125, "step": 2144 }, { "epoch": 0.2988922176548457, "grad_norm": 0.8826500177383423, "learning_rate": 8.592148254709262e-06, "loss": 0.118438720703125, "step": 2145 }, { "epoch": 0.29903156134606007, "grad_norm": 0.7957780361175537, "learning_rate": 8.590508477877834e-06, "loss": 0.15314483642578125, "step": 2146 }, { "epoch": 0.29917090503727445, "grad_norm": 1.412819266319275, "learning_rate": 8.588867903317395e-06, "loss": 0.19578170776367188, "step": 2147 }, { "epoch": 0.2993102487284888, "grad_norm": 1.2517746686935425, "learning_rate": 8.587226531392443e-06, "loss": 0.17403793334960938, "step": 2148 }, { "epoch": 0.2994495924197032, "grad_norm": 1.8601776361465454, "learning_rate": 8.585584362467652e-06, "loss": 0.17452239990234375, "step": 2149 }, { "epoch": 0.2995889361109176, "grad_norm": 0.8833549618721008, "learning_rate": 8.583941396907877e-06, "loss": 0.12638092041015625, "step": 2150 }, { "epoch": 0.29972827980213196, "grad_norm": 0.72746342420578, "learning_rate": 8.582297635078149e-06, "loss": 0.14226150512695312, "step": 2151 }, { "epoch": 0.29986762349334634, "grad_norm": 0.8248234987258911, "learning_rate": 8.58065307734367e-06, "loss": 0.128692626953125, "step": 2152 }, { "epoch": 0.3000069671845607, "grad_norm": 1.4301648139953613, "learning_rate": 8.579007724069823e-06, "loss": 0.17513656616210938, "step": 2153 }, { "epoch": 0.3001463108757751, "grad_norm": 0.8533750772476196, "learning_rate": 8.577361575622171e-06, "loss": 0.11527824401855469, "step": 2154 }, { "epoch": 0.30028565456698947, "grad_norm": 1.6602154970169067, "learning_rate": 8.575714632366451e-06, "loss": 0.1564178466796875, "step": 2155 }, { "epoch": 0.30042499825820385, "grad_norm": 1.590995192527771, "learning_rate": 8.574066894668573e-06, "loss": 0.15779876708984375, "step": 2156 }, { "epoch": 0.3005643419494182, "grad_norm": 1.34828519821167, "learning_rate": 8.57241836289463e-06, "loss": 0.19274520874023438, "step": 2157 }, { "epoch": 0.3007036856406326, "grad_norm": 2.3091354370117188, "learning_rate": 8.570769037410885e-06, "loss": 0.16422653198242188, "step": 2158 }, { "epoch": 0.300843029331847, "grad_norm": 3.0269076824188232, "learning_rate": 8.56911891858378e-06, "loss": 0.19752883911132812, "step": 2159 }, { "epoch": 0.30098237302306136, "grad_norm": 1.4792349338531494, "learning_rate": 8.56746800677994e-06, "loss": 0.16070938110351562, "step": 2160 }, { "epoch": 0.3011217167142758, "grad_norm": 0.9454519748687744, "learning_rate": 8.565816302366151e-06, "loss": 0.11327934265136719, "step": 2161 }, { "epoch": 0.30126106040549017, "grad_norm": 0.6694503426551819, "learning_rate": 8.564163805709393e-06, "loss": 0.1301708221435547, "step": 2162 }, { "epoch": 0.30140040409670454, "grad_norm": 0.6678603291511536, "learning_rate": 8.562510517176807e-06, "loss": 0.12679290771484375, "step": 2163 }, { "epoch": 0.3015397477879189, "grad_norm": 1.9580117464065552, "learning_rate": 8.560856437135716e-06, "loss": 0.1876373291015625, "step": 2164 }, { "epoch": 0.3016790914791333, "grad_norm": 1.8684855699539185, "learning_rate": 8.559201565953623e-06, "loss": 0.16494369506835938, "step": 2165 }, { "epoch": 0.3018184351703477, "grad_norm": 1.5165294408798218, "learning_rate": 8.557545903998197e-06, "loss": 0.16260147094726562, "step": 2166 }, { "epoch": 0.30195777886156205, "grad_norm": 1.6357052326202393, "learning_rate": 8.555889451637294e-06, "loss": 0.18580055236816406, "step": 2167 }, { "epoch": 0.30209712255277643, "grad_norm": 1.1543787717819214, "learning_rate": 8.554232209238935e-06, "loss": 0.1625537872314453, "step": 2168 }, { "epoch": 0.3022364662439908, "grad_norm": 1.461698293685913, "learning_rate": 8.552574177171326e-06, "loss": 0.15442657470703125, "step": 2169 }, { "epoch": 0.3023758099352052, "grad_norm": 1.1735279560089111, "learning_rate": 8.55091535580284e-06, "loss": 0.13464736938476562, "step": 2170 }, { "epoch": 0.30251515362641956, "grad_norm": 1.5877610445022583, "learning_rate": 8.54925574550203e-06, "loss": 0.18545913696289062, "step": 2171 }, { "epoch": 0.30265449731763394, "grad_norm": 1.4453110694885254, "learning_rate": 8.547595346637624e-06, "loss": 0.15001678466796875, "step": 2172 }, { "epoch": 0.3027938410088483, "grad_norm": 0.8978962898254395, "learning_rate": 8.545934159578527e-06, "loss": 0.15891265869140625, "step": 2173 }, { "epoch": 0.3029331847000627, "grad_norm": 1.3097065687179565, "learning_rate": 8.544272184693814e-06, "loss": 0.16099929809570312, "step": 2174 }, { "epoch": 0.3030725283912771, "grad_norm": 1.5524883270263672, "learning_rate": 8.542609422352738e-06, "loss": 0.1641559600830078, "step": 2175 }, { "epoch": 0.30321187208249145, "grad_norm": 2.74043869972229, "learning_rate": 8.540945872924728e-06, "loss": 0.18642044067382812, "step": 2176 }, { "epoch": 0.30335121577370583, "grad_norm": 0.9375717639923096, "learning_rate": 8.539281536779388e-06, "loss": 0.15598678588867188, "step": 2177 }, { "epoch": 0.3034905594649202, "grad_norm": 0.8195748925209045, "learning_rate": 8.537616414286491e-06, "loss": 0.13097763061523438, "step": 2178 }, { "epoch": 0.3036299031561346, "grad_norm": 0.6986966729164124, "learning_rate": 8.535950505815993e-06, "loss": 0.1237335205078125, "step": 2179 }, { "epoch": 0.30376924684734896, "grad_norm": 1.2240540981292725, "learning_rate": 8.53428381173802e-06, "loss": 0.16619491577148438, "step": 2180 }, { "epoch": 0.30390859053856334, "grad_norm": 1.3608719110488892, "learning_rate": 8.532616332422872e-06, "loss": 0.15695571899414062, "step": 2181 }, { "epoch": 0.30404793422977777, "grad_norm": 1.4255471229553223, "learning_rate": 8.530948068241028e-06, "loss": 0.12851715087890625, "step": 2182 }, { "epoch": 0.30418727792099215, "grad_norm": 1.0078811645507812, "learning_rate": 8.529279019563133e-06, "loss": 0.13776779174804688, "step": 2183 }, { "epoch": 0.3043266216122065, "grad_norm": 0.9065312743186951, "learning_rate": 8.527609186760017e-06, "loss": 0.15069961547851562, "step": 2184 }, { "epoch": 0.3044659653034209, "grad_norm": 0.948142945766449, "learning_rate": 8.525938570202676e-06, "loss": 0.12871932983398438, "step": 2185 }, { "epoch": 0.3046053089946353, "grad_norm": 1.2718125581741333, "learning_rate": 8.524267170262283e-06, "loss": 0.17584228515625, "step": 2186 }, { "epoch": 0.30474465268584966, "grad_norm": 0.9754272103309631, "learning_rate": 8.522594987310184e-06, "loss": 0.11561965942382812, "step": 2187 }, { "epoch": 0.30488399637706404, "grad_norm": 1.1114293336868286, "learning_rate": 8.520922021717903e-06, "loss": 0.13805389404296875, "step": 2188 }, { "epoch": 0.3050233400682784, "grad_norm": 1.225780725479126, "learning_rate": 8.519248273857132e-06, "loss": 0.1580944061279297, "step": 2189 }, { "epoch": 0.3051626837594928, "grad_norm": 0.985074520111084, "learning_rate": 8.51757374409974e-06, "loss": 0.14410781860351562, "step": 2190 }, { "epoch": 0.30530202745070717, "grad_norm": 1.1834344863891602, "learning_rate": 8.51589843281777e-06, "loss": 0.19025421142578125, "step": 2191 }, { "epoch": 0.30544137114192155, "grad_norm": 0.9956873655319214, "learning_rate": 8.514222340383438e-06, "loss": 0.1374053955078125, "step": 2192 }, { "epoch": 0.3055807148331359, "grad_norm": 1.53929603099823, "learning_rate": 8.512545467169133e-06, "loss": 0.14641571044921875, "step": 2193 }, { "epoch": 0.3057200585243503, "grad_norm": 0.9849341511726379, "learning_rate": 8.510867813547417e-06, "loss": 0.13893890380859375, "step": 2194 }, { "epoch": 0.3058594022155647, "grad_norm": 1.2723275423049927, "learning_rate": 8.509189379891029e-06, "loss": 0.14117431640625, "step": 2195 }, { "epoch": 0.30599874590677906, "grad_norm": 0.8544754981994629, "learning_rate": 8.507510166572875e-06, "loss": 0.14946365356445312, "step": 2196 }, { "epoch": 0.30613808959799343, "grad_norm": 1.4698057174682617, "learning_rate": 8.50583017396604e-06, "loss": 0.16811752319335938, "step": 2197 }, { "epoch": 0.3062774332892078, "grad_norm": 2.0125722885131836, "learning_rate": 8.504149402443782e-06, "loss": 0.13117408752441406, "step": 2198 }, { "epoch": 0.3064167769804222, "grad_norm": 1.7483443021774292, "learning_rate": 8.502467852379526e-06, "loss": 0.149658203125, "step": 2199 }, { "epoch": 0.30655612067163657, "grad_norm": 0.6634429097175598, "learning_rate": 8.500785524146875e-06, "loss": 0.11905479431152344, "step": 2200 }, { "epoch": 0.30669546436285094, "grad_norm": 0.8609215021133423, "learning_rate": 8.499102418119607e-06, "loss": 0.137725830078125, "step": 2201 }, { "epoch": 0.3068348080540654, "grad_norm": 0.9741875529289246, "learning_rate": 8.497418534671666e-06, "loss": 0.14833450317382812, "step": 2202 }, { "epoch": 0.30697415174527976, "grad_norm": 0.8945652842521667, "learning_rate": 8.495733874177176e-06, "loss": 0.14543914794921875, "step": 2203 }, { "epoch": 0.30711349543649413, "grad_norm": 1.7654666900634766, "learning_rate": 8.494048437010427e-06, "loss": 0.15951919555664062, "step": 2204 }, { "epoch": 0.3072528391277085, "grad_norm": 1.0599429607391357, "learning_rate": 8.492362223545884e-06, "loss": 0.1370086669921875, "step": 2205 }, { "epoch": 0.3073921828189229, "grad_norm": 1.1319197416305542, "learning_rate": 8.49067523415819e-06, "loss": 0.1353626251220703, "step": 2206 }, { "epoch": 0.30753152651013727, "grad_norm": 1.4506105184555054, "learning_rate": 8.48898746922215e-06, "loss": 0.1659698486328125, "step": 2207 }, { "epoch": 0.30767087020135164, "grad_norm": 1.188976526260376, "learning_rate": 8.487298929112751e-06, "loss": 0.16358375549316406, "step": 2208 }, { "epoch": 0.307810213892566, "grad_norm": 1.702538013458252, "learning_rate": 8.485609614205146e-06, "loss": 0.17244338989257812, "step": 2209 }, { "epoch": 0.3079495575837804, "grad_norm": 0.9889676570892334, "learning_rate": 8.483919524874661e-06, "loss": 0.12813186645507812, "step": 2210 }, { "epoch": 0.3080889012749948, "grad_norm": 1.0518327951431274, "learning_rate": 8.482228661496797e-06, "loss": 0.14551544189453125, "step": 2211 }, { "epoch": 0.30822824496620915, "grad_norm": 1.6467134952545166, "learning_rate": 8.480537024447227e-06, "loss": 0.15399932861328125, "step": 2212 }, { "epoch": 0.30836758865742353, "grad_norm": 1.5025627613067627, "learning_rate": 8.478844614101792e-06, "loss": 0.15196609497070312, "step": 2213 }, { "epoch": 0.3085069323486379, "grad_norm": 1.4763497114181519, "learning_rate": 8.477151430836505e-06, "loss": 0.11957550048828125, "step": 2214 }, { "epoch": 0.3086462760398523, "grad_norm": 1.3506925106048584, "learning_rate": 8.475457475027555e-06, "loss": 0.13370895385742188, "step": 2215 }, { "epoch": 0.30878561973106666, "grad_norm": 1.4090458154678345, "learning_rate": 8.473762747051302e-06, "loss": 0.1648101806640625, "step": 2216 }, { "epoch": 0.30892496342228104, "grad_norm": 0.6066166758537292, "learning_rate": 8.472067247284272e-06, "loss": 0.12872695922851562, "step": 2217 }, { "epoch": 0.3090643071134954, "grad_norm": 0.7530635595321655, "learning_rate": 8.470370976103171e-06, "loss": 0.13596343994140625, "step": 2218 }, { "epoch": 0.3092036508047098, "grad_norm": 1.6839205026626587, "learning_rate": 8.468673933884867e-06, "loss": 0.14289093017578125, "step": 2219 }, { "epoch": 0.3093429944959242, "grad_norm": 1.5845832824707031, "learning_rate": 8.466976121006407e-06, "loss": 0.14113616943359375, "step": 2220 }, { "epoch": 0.30948233818713855, "grad_norm": 2.011927843093872, "learning_rate": 8.465277537845004e-06, "loss": 0.16870880126953125, "step": 2221 }, { "epoch": 0.309621681878353, "grad_norm": 0.7224236130714417, "learning_rate": 8.463578184778047e-06, "loss": 0.12750625610351562, "step": 2222 }, { "epoch": 0.30976102556956736, "grad_norm": 0.8678048253059387, "learning_rate": 8.461878062183092e-06, "loss": 0.13564682006835938, "step": 2223 }, { "epoch": 0.30990036926078174, "grad_norm": 1.1755316257476807, "learning_rate": 8.460177170437865e-06, "loss": 0.142822265625, "step": 2224 }, { "epoch": 0.3100397129519961, "grad_norm": 1.216827154159546, "learning_rate": 8.458475509920272e-06, "loss": 0.15314292907714844, "step": 2225 }, { "epoch": 0.3101790566432105, "grad_norm": 2.4473729133605957, "learning_rate": 8.456773081008376e-06, "loss": 0.1561431884765625, "step": 2226 }, { "epoch": 0.31031840033442487, "grad_norm": 1.5744197368621826, "learning_rate": 8.455069884080422e-06, "loss": 0.14902496337890625, "step": 2227 }, { "epoch": 0.31045774402563925, "grad_norm": 1.067522644996643, "learning_rate": 8.45336591951482e-06, "loss": 0.16146564483642578, "step": 2228 }, { "epoch": 0.3105970877168536, "grad_norm": 0.5621907711029053, "learning_rate": 8.451661187690154e-06, "loss": 0.10259246826171875, "step": 2229 }, { "epoch": 0.310736431408068, "grad_norm": 0.9544671177864075, "learning_rate": 8.449955688985174e-06, "loss": 0.13338088989257812, "step": 2230 }, { "epoch": 0.3108757750992824, "grad_norm": 1.5062668323516846, "learning_rate": 8.448249423778802e-06, "loss": 0.15204620361328125, "step": 2231 }, { "epoch": 0.31101511879049676, "grad_norm": 1.8977922201156616, "learning_rate": 8.446542392450134e-06, "loss": 0.2109527587890625, "step": 2232 }, { "epoch": 0.31115446248171114, "grad_norm": 1.3838717937469482, "learning_rate": 8.444834595378434e-06, "loss": 0.1605072021484375, "step": 2233 }, { "epoch": 0.3112938061729255, "grad_norm": 1.4648336172103882, "learning_rate": 8.443126032943132e-06, "loss": 0.14675521850585938, "step": 2234 }, { "epoch": 0.3114331498641399, "grad_norm": 1.5274611711502075, "learning_rate": 8.441416705523834e-06, "loss": 0.160797119140625, "step": 2235 }, { "epoch": 0.31157249355535427, "grad_norm": 1.3349424600601196, "learning_rate": 8.439706613500312e-06, "loss": 0.15291595458984375, "step": 2236 }, { "epoch": 0.31171183724656865, "grad_norm": 1.8560740947723389, "learning_rate": 8.43799575725251e-06, "loss": 0.1769866943359375, "step": 2237 }, { "epoch": 0.311851180937783, "grad_norm": 0.8129417896270752, "learning_rate": 8.436284137160544e-06, "loss": 0.1563873291015625, "step": 2238 }, { "epoch": 0.3119905246289974, "grad_norm": 0.8824335336685181, "learning_rate": 8.434571753604693e-06, "loss": 0.1536407470703125, "step": 2239 }, { "epoch": 0.3121298683202118, "grad_norm": 0.7841432690620422, "learning_rate": 8.432858606965411e-06, "loss": 0.14453506469726562, "step": 2240 }, { "epoch": 0.31226921201142616, "grad_norm": 1.5005285739898682, "learning_rate": 8.43114469762332e-06, "loss": 0.12734603881835938, "step": 2241 }, { "epoch": 0.3124085557026406, "grad_norm": 1.8091480731964111, "learning_rate": 8.429430025959212e-06, "loss": 0.1560211181640625, "step": 2242 }, { "epoch": 0.31254789939385497, "grad_norm": 1.3236559629440308, "learning_rate": 8.427714592354046e-06, "loss": 0.1899261474609375, "step": 2243 }, { "epoch": 0.31268724308506934, "grad_norm": 0.9226680994033813, "learning_rate": 8.425998397188955e-06, "loss": 0.11303329467773438, "step": 2244 }, { "epoch": 0.3128265867762837, "grad_norm": 1.318761944770813, "learning_rate": 8.424281440845236e-06, "loss": 0.155731201171875, "step": 2245 }, { "epoch": 0.3129659304674981, "grad_norm": 1.2185938358306885, "learning_rate": 8.42256372370436e-06, "loss": 0.155792236328125, "step": 2246 }, { "epoch": 0.3131052741587125, "grad_norm": 0.8165801167488098, "learning_rate": 8.420845246147961e-06, "loss": 0.14134597778320312, "step": 2247 }, { "epoch": 0.31324461784992685, "grad_norm": 1.9234211444854736, "learning_rate": 8.41912600855785e-06, "loss": 0.204681396484375, "step": 2248 }, { "epoch": 0.31338396154114123, "grad_norm": 1.1351218223571777, "learning_rate": 8.417406011316e-06, "loss": 0.1349639892578125, "step": 2249 }, { "epoch": 0.3135233052323556, "grad_norm": 1.2045706510543823, "learning_rate": 8.415685254804552e-06, "loss": 0.14568328857421875, "step": 2250 }, { "epoch": 0.31366264892357, "grad_norm": 0.7921881675720215, "learning_rate": 8.413963739405824e-06, "loss": 0.12015533447265625, "step": 2251 }, { "epoch": 0.31380199261478436, "grad_norm": 1.0956270694732666, "learning_rate": 8.412241465502294e-06, "loss": 0.15435791015625, "step": 2252 }, { "epoch": 0.31394133630599874, "grad_norm": 2.526259660720825, "learning_rate": 8.410518433476613e-06, "loss": 0.19245529174804688, "step": 2253 }, { "epoch": 0.3140806799972131, "grad_norm": 1.1522228717803955, "learning_rate": 8.408794643711601e-06, "loss": 0.13607025146484375, "step": 2254 }, { "epoch": 0.3142200236884275, "grad_norm": 1.0234678983688354, "learning_rate": 8.407070096590243e-06, "loss": 0.115234375, "step": 2255 }, { "epoch": 0.3143593673796419, "grad_norm": 0.7581057548522949, "learning_rate": 8.405344792495694e-06, "loss": 0.146484375, "step": 2256 }, { "epoch": 0.31449871107085625, "grad_norm": 0.973900318145752, "learning_rate": 8.403618731811277e-06, "loss": 0.11559295654296875, "step": 2257 }, { "epoch": 0.31463805476207063, "grad_norm": 1.7028979063034058, "learning_rate": 8.401891914920483e-06, "loss": 0.16988372802734375, "step": 2258 }, { "epoch": 0.314777398453285, "grad_norm": 0.8065655827522278, "learning_rate": 8.400164342206973e-06, "loss": 0.16866302490234375, "step": 2259 }, { "epoch": 0.3149167421444994, "grad_norm": 0.715257465839386, "learning_rate": 8.398436014054575e-06, "loss": 0.13079452514648438, "step": 2260 }, { "epoch": 0.31505608583571376, "grad_norm": 1.2304829359054565, "learning_rate": 8.39670693084728e-06, "loss": 0.1590900421142578, "step": 2261 }, { "epoch": 0.3151954295269282, "grad_norm": 0.6127623915672302, "learning_rate": 8.394977092969253e-06, "loss": 0.12294387817382812, "step": 2262 }, { "epoch": 0.31533477321814257, "grad_norm": 1.4310128688812256, "learning_rate": 8.393246500804825e-06, "loss": 0.150634765625, "step": 2263 }, { "epoch": 0.31547411690935695, "grad_norm": 1.239534616470337, "learning_rate": 8.391515154738495e-06, "loss": 0.20727920532226562, "step": 2264 }, { "epoch": 0.3156134606005713, "grad_norm": 0.9299101829528809, "learning_rate": 8.389783055154925e-06, "loss": 0.14588165283203125, "step": 2265 }, { "epoch": 0.3157528042917857, "grad_norm": 0.8668771982192993, "learning_rate": 8.388050202438952e-06, "loss": 0.13680267333984375, "step": 2266 }, { "epoch": 0.3158921479830001, "grad_norm": 0.5192999839782715, "learning_rate": 8.386316596975574e-06, "loss": 0.11832809448242188, "step": 2267 }, { "epoch": 0.31603149167421446, "grad_norm": 0.827117383480072, "learning_rate": 8.38458223914996e-06, "loss": 0.14615631103515625, "step": 2268 }, { "epoch": 0.31617083536542884, "grad_norm": 1.9042099714279175, "learning_rate": 8.38284712934744e-06, "loss": 0.17302703857421875, "step": 2269 }, { "epoch": 0.3163101790566432, "grad_norm": 1.021083950996399, "learning_rate": 8.381111267953523e-06, "loss": 0.15397262573242188, "step": 2270 }, { "epoch": 0.3164495227478576, "grad_norm": 1.16817307472229, "learning_rate": 8.379374655353874e-06, "loss": 0.1534881591796875, "step": 2271 }, { "epoch": 0.31658886643907197, "grad_norm": 1.8899719715118408, "learning_rate": 8.377637291934329e-06, "loss": 0.166961669921875, "step": 2272 }, { "epoch": 0.31672821013028635, "grad_norm": 0.8430889248847961, "learning_rate": 8.37589917808089e-06, "loss": 0.12874984741210938, "step": 2273 }, { "epoch": 0.3168675538215007, "grad_norm": 1.5008909702301025, "learning_rate": 8.374160314179727e-06, "loss": 0.17305755615234375, "step": 2274 }, { "epoch": 0.3170068975127151, "grad_norm": 0.573156476020813, "learning_rate": 8.372420700617176e-06, "loss": 0.12026405334472656, "step": 2275 }, { "epoch": 0.3171462412039295, "grad_norm": 1.647874355316162, "learning_rate": 8.370680337779737e-06, "loss": 0.15438461303710938, "step": 2276 }, { "epoch": 0.31728558489514386, "grad_norm": 1.0129269361495972, "learning_rate": 8.368939226054083e-06, "loss": 0.1385040283203125, "step": 2277 }, { "epoch": 0.31742492858635823, "grad_norm": 1.7546526193618774, "learning_rate": 8.367197365827047e-06, "loss": 0.17810440063476562, "step": 2278 }, { "epoch": 0.3175642722775726, "grad_norm": 1.5071592330932617, "learning_rate": 8.36545475748563e-06, "loss": 0.24188995361328125, "step": 2279 }, { "epoch": 0.317703615968787, "grad_norm": 1.8797084093093872, "learning_rate": 8.363711401417e-06, "loss": 0.16361618041992188, "step": 2280 }, { "epoch": 0.31784295966000137, "grad_norm": 1.3889731168746948, "learning_rate": 8.361967298008494e-06, "loss": 0.1466217041015625, "step": 2281 }, { "epoch": 0.3179823033512158, "grad_norm": 1.5360586643218994, "learning_rate": 8.360222447647606e-06, "loss": 0.13765335083007812, "step": 2282 }, { "epoch": 0.3181216470424302, "grad_norm": 1.3588436841964722, "learning_rate": 8.358476850722007e-06, "loss": 0.13208770751953125, "step": 2283 }, { "epoch": 0.31826099073364456, "grad_norm": 1.1727352142333984, "learning_rate": 8.356730507619526e-06, "loss": 0.15109634399414062, "step": 2284 }, { "epoch": 0.31840033442485893, "grad_norm": 1.3799660205841064, "learning_rate": 8.354983418728165e-06, "loss": 0.11561965942382812, "step": 2285 }, { "epoch": 0.3185396781160733, "grad_norm": 1.5993032455444336, "learning_rate": 8.353235584436082e-06, "loss": 0.17805099487304688, "step": 2286 }, { "epoch": 0.3186790218072877, "grad_norm": 0.9326078295707703, "learning_rate": 8.351487005131606e-06, "loss": 0.12066078186035156, "step": 2287 }, { "epoch": 0.31881836549850207, "grad_norm": 0.7449610233306885, "learning_rate": 8.349737681203234e-06, "loss": 0.1181488037109375, "step": 2288 }, { "epoch": 0.31895770918971644, "grad_norm": 0.9833586812019348, "learning_rate": 8.347987613039626e-06, "loss": 0.1746368408203125, "step": 2289 }, { "epoch": 0.3190970528809308, "grad_norm": 0.9621439576148987, "learning_rate": 8.346236801029605e-06, "loss": 0.1466217041015625, "step": 2290 }, { "epoch": 0.3192363965721452, "grad_norm": 1.1909466981887817, "learning_rate": 8.344485245562165e-06, "loss": 0.1526336669921875, "step": 2291 }, { "epoch": 0.3193757402633596, "grad_norm": 0.7218484282493591, "learning_rate": 8.342732947026457e-06, "loss": 0.128509521484375, "step": 2292 }, { "epoch": 0.31951508395457395, "grad_norm": 0.7063557505607605, "learning_rate": 8.340979905811805e-06, "loss": 0.13304901123046875, "step": 2293 }, { "epoch": 0.31965442764578833, "grad_norm": 1.4512885808944702, "learning_rate": 8.339226122307696e-06, "loss": 0.148040771484375, "step": 2294 }, { "epoch": 0.3197937713370027, "grad_norm": 0.9069974422454834, "learning_rate": 8.337471596903774e-06, "loss": 0.13027191162109375, "step": 2295 }, { "epoch": 0.3199331150282171, "grad_norm": 0.6183248162269592, "learning_rate": 8.335716329989863e-06, "loss": 0.11423110961914062, "step": 2296 }, { "epoch": 0.32007245871943146, "grad_norm": 1.160406470298767, "learning_rate": 8.333960321955937e-06, "loss": 0.15211868286132812, "step": 2297 }, { "epoch": 0.32021180241064584, "grad_norm": 1.1064780950546265, "learning_rate": 8.332203573192143e-06, "loss": 0.12260055541992188, "step": 2298 }, { "epoch": 0.3203511461018602, "grad_norm": 0.5035974979400635, "learning_rate": 8.330446084088791e-06, "loss": 0.09700393676757812, "step": 2299 }, { "epoch": 0.3204904897930746, "grad_norm": 1.0550423860549927, "learning_rate": 8.328687855036355e-06, "loss": 0.15851211547851562, "step": 2300 }, { "epoch": 0.320629833484289, "grad_norm": 1.1421103477478027, "learning_rate": 8.326928886425471e-06, "loss": 0.13587570190429688, "step": 2301 }, { "epoch": 0.3207691771755034, "grad_norm": 1.7704219818115234, "learning_rate": 8.325169178646946e-06, "loss": 0.15695953369140625, "step": 2302 }, { "epoch": 0.3209085208667178, "grad_norm": 0.8582000732421875, "learning_rate": 8.323408732091743e-06, "loss": 0.1452007293701172, "step": 2303 }, { "epoch": 0.32104786455793216, "grad_norm": 1.2110434770584106, "learning_rate": 8.321647547150995e-06, "loss": 0.14903640747070312, "step": 2304 }, { "epoch": 0.32118720824914654, "grad_norm": 2.222667694091797, "learning_rate": 8.319885624215996e-06, "loss": 0.17916488647460938, "step": 2305 }, { "epoch": 0.3213265519403609, "grad_norm": 1.1649816036224365, "learning_rate": 8.318122963678206e-06, "loss": 0.14191818237304688, "step": 2306 }, { "epoch": 0.3214658956315753, "grad_norm": 0.62156742811203, "learning_rate": 8.316359565929248e-06, "loss": 0.10878753662109375, "step": 2307 }, { "epoch": 0.32160523932278967, "grad_norm": 0.6884058117866516, "learning_rate": 8.314595431360906e-06, "loss": 0.11959457397460938, "step": 2308 }, { "epoch": 0.32174458301400405, "grad_norm": 0.4372009336948395, "learning_rate": 8.312830560365136e-06, "loss": 0.10549354553222656, "step": 2309 }, { "epoch": 0.3218839267052184, "grad_norm": 0.8885916471481323, "learning_rate": 8.311064953334046e-06, "loss": 0.15610885620117188, "step": 2310 }, { "epoch": 0.3220232703964328, "grad_norm": 1.5016167163848877, "learning_rate": 8.309298610659917e-06, "loss": 0.17281341552734375, "step": 2311 }, { "epoch": 0.3221626140876472, "grad_norm": 1.6358129978179932, "learning_rate": 8.307531532735188e-06, "loss": 0.15688323974609375, "step": 2312 }, { "epoch": 0.32230195777886156, "grad_norm": 1.306868314743042, "learning_rate": 8.305763719952467e-06, "loss": 0.14530467987060547, "step": 2313 }, { "epoch": 0.32244130147007594, "grad_norm": 0.6332684755325317, "learning_rate": 8.303995172704519e-06, "loss": 0.11865234375, "step": 2314 }, { "epoch": 0.3225806451612903, "grad_norm": 1.5788956880569458, "learning_rate": 8.302225891384275e-06, "loss": 0.16080474853515625, "step": 2315 }, { "epoch": 0.3227199888525047, "grad_norm": 0.825365424156189, "learning_rate": 8.300455876384827e-06, "loss": 0.1243743896484375, "step": 2316 }, { "epoch": 0.32285933254371907, "grad_norm": 1.6673120260238647, "learning_rate": 8.298685128099437e-06, "loss": 0.16025543212890625, "step": 2317 }, { "epoch": 0.32299867623493345, "grad_norm": 1.067936897277832, "learning_rate": 8.29691364692152e-06, "loss": 0.14990234375, "step": 2318 }, { "epoch": 0.3231380199261478, "grad_norm": 1.0255123376846313, "learning_rate": 8.29514143324466e-06, "loss": 0.15419387817382812, "step": 2319 }, { "epoch": 0.3232773636173622, "grad_norm": 1.2559489011764526, "learning_rate": 8.293368487462604e-06, "loss": 0.1819915771484375, "step": 2320 }, { "epoch": 0.3234167073085766, "grad_norm": 0.9563826322555542, "learning_rate": 8.29159480996926e-06, "loss": 0.13699722290039062, "step": 2321 }, { "epoch": 0.323556050999791, "grad_norm": 0.8072509765625, "learning_rate": 8.289820401158695e-06, "loss": 0.1339740753173828, "step": 2322 }, { "epoch": 0.3236953946910054, "grad_norm": 1.0457985401153564, "learning_rate": 8.288045261425146e-06, "loss": 0.13756179809570312, "step": 2323 }, { "epoch": 0.32383473838221977, "grad_norm": 0.7965518236160278, "learning_rate": 8.286269391163006e-06, "loss": 0.14998245239257812, "step": 2324 }, { "epoch": 0.32397408207343414, "grad_norm": 1.0762780904769897, "learning_rate": 8.284492790766835e-06, "loss": 0.18486785888671875, "step": 2325 }, { "epoch": 0.3241134257646485, "grad_norm": 1.0166468620300293, "learning_rate": 8.282715460631354e-06, "loss": 0.16505050659179688, "step": 2326 }, { "epoch": 0.3242527694558629, "grad_norm": 1.456793189048767, "learning_rate": 8.280937401151441e-06, "loss": 0.2013397216796875, "step": 2327 }, { "epoch": 0.3243921131470773, "grad_norm": 1.8521629571914673, "learning_rate": 8.279158612722145e-06, "loss": 0.2140045166015625, "step": 2328 }, { "epoch": 0.32453145683829165, "grad_norm": 0.969364583492279, "learning_rate": 8.277379095738668e-06, "loss": 0.14718246459960938, "step": 2329 }, { "epoch": 0.32467080052950603, "grad_norm": 1.098504900932312, "learning_rate": 8.27559885059638e-06, "loss": 0.1229705810546875, "step": 2330 }, { "epoch": 0.3248101442207204, "grad_norm": 2.3588993549346924, "learning_rate": 8.273817877690809e-06, "loss": 0.18040847778320312, "step": 2331 }, { "epoch": 0.3249494879119348, "grad_norm": 0.7690607905387878, "learning_rate": 8.272036177417649e-06, "loss": 0.11311721801757812, "step": 2332 }, { "epoch": 0.32508883160314916, "grad_norm": 0.7651403546333313, "learning_rate": 8.270253750172754e-06, "loss": 0.147674560546875, "step": 2333 }, { "epoch": 0.32522817529436354, "grad_norm": 0.8082922697067261, "learning_rate": 8.268470596352134e-06, "loss": 0.1186981201171875, "step": 2334 }, { "epoch": 0.3253675189855779, "grad_norm": 1.8202613592147827, "learning_rate": 8.26668671635197e-06, "loss": 0.15142440795898438, "step": 2335 }, { "epoch": 0.3255068626767923, "grad_norm": 2.241617441177368, "learning_rate": 8.264902110568598e-06, "loss": 0.23215484619140625, "step": 2336 }, { "epoch": 0.3256462063680067, "grad_norm": 1.1791027784347534, "learning_rate": 8.263116779398514e-06, "loss": 0.1196441650390625, "step": 2337 }, { "epoch": 0.32578555005922105, "grad_norm": 1.443052053451538, "learning_rate": 8.261330723238381e-06, "loss": 0.16243743896484375, "step": 2338 }, { "epoch": 0.32592489375043543, "grad_norm": 1.220360279083252, "learning_rate": 8.25954394248502e-06, "loss": 0.13653182983398438, "step": 2339 }, { "epoch": 0.3260642374416498, "grad_norm": 1.2884501218795776, "learning_rate": 8.25775643753541e-06, "loss": 0.158538818359375, "step": 2340 }, { "epoch": 0.3262035811328642, "grad_norm": 1.587760090827942, "learning_rate": 8.255968208786694e-06, "loss": 0.17620468139648438, "step": 2341 }, { "epoch": 0.3263429248240786, "grad_norm": 0.8176992535591125, "learning_rate": 8.25417925663618e-06, "loss": 0.12317085266113281, "step": 2342 }, { "epoch": 0.326482268515293, "grad_norm": 0.655275285243988, "learning_rate": 8.252389581481328e-06, "loss": 0.11256790161132812, "step": 2343 }, { "epoch": 0.3266216122065074, "grad_norm": 0.7663647532463074, "learning_rate": 8.250599183719763e-06, "loss": 0.14472198486328125, "step": 2344 }, { "epoch": 0.32676095589772175, "grad_norm": 0.9775019884109497, "learning_rate": 8.248808063749273e-06, "loss": 0.16933822631835938, "step": 2345 }, { "epoch": 0.3269002995889361, "grad_norm": 1.0569175481796265, "learning_rate": 8.247016221967802e-06, "loss": 0.13934326171875, "step": 2346 }, { "epoch": 0.3270396432801505, "grad_norm": 1.2695293426513672, "learning_rate": 8.245223658773459e-06, "loss": 0.15404891967773438, "step": 2347 }, { "epoch": 0.3271789869713649, "grad_norm": 1.5054872035980225, "learning_rate": 8.243430374564507e-06, "loss": 0.1761932373046875, "step": 2348 }, { "epoch": 0.32731833066257926, "grad_norm": 0.9268682599067688, "learning_rate": 8.241636369739376e-06, "loss": 0.13251495361328125, "step": 2349 }, { "epoch": 0.32745767435379364, "grad_norm": 1.484777808189392, "learning_rate": 8.23984164469665e-06, "loss": 0.16522979736328125, "step": 2350 }, { "epoch": 0.327597018045008, "grad_norm": 1.6845346689224243, "learning_rate": 8.23804619983508e-06, "loss": 0.14900588989257812, "step": 2351 }, { "epoch": 0.3277363617362224, "grad_norm": 1.406761646270752, "learning_rate": 8.236250035553569e-06, "loss": 0.16073226928710938, "step": 2352 }, { "epoch": 0.32787570542743677, "grad_norm": 0.6526510715484619, "learning_rate": 8.234453152251183e-06, "loss": 0.1292724609375, "step": 2353 }, { "epoch": 0.32801504911865115, "grad_norm": 0.7362359762191772, "learning_rate": 8.23265555032715e-06, "loss": 0.133209228515625, "step": 2354 }, { "epoch": 0.3281543928098655, "grad_norm": 1.408353328704834, "learning_rate": 8.23085723018086e-06, "loss": 0.16153335571289062, "step": 2355 }, { "epoch": 0.3282937365010799, "grad_norm": 0.712138831615448, "learning_rate": 8.229058192211851e-06, "loss": 0.12438583374023438, "step": 2356 }, { "epoch": 0.3284330801922943, "grad_norm": 1.0482133626937866, "learning_rate": 8.227258436819836e-06, "loss": 0.163299560546875, "step": 2357 }, { "epoch": 0.32857242388350866, "grad_norm": 1.0846991539001465, "learning_rate": 8.225457964404675e-06, "loss": 0.1382598876953125, "step": 2358 }, { "epoch": 0.32871176757472303, "grad_norm": 0.8203637599945068, "learning_rate": 8.223656775366393e-06, "loss": 0.14276504516601562, "step": 2359 }, { "epoch": 0.3288511112659374, "grad_norm": 0.8390489220619202, "learning_rate": 8.221854870105172e-06, "loss": 0.1324787139892578, "step": 2360 }, { "epoch": 0.3289904549571518, "grad_norm": 1.091652512550354, "learning_rate": 8.220052249021356e-06, "loss": 0.13545989990234375, "step": 2361 }, { "epoch": 0.32912979864836617, "grad_norm": 0.6052064299583435, "learning_rate": 8.218248912515443e-06, "loss": 0.13413238525390625, "step": 2362 }, { "epoch": 0.3292691423395806, "grad_norm": 1.6907285451889038, "learning_rate": 8.216444860988098e-06, "loss": 0.17206192016601562, "step": 2363 }, { "epoch": 0.329408486030795, "grad_norm": 1.6761395931243896, "learning_rate": 8.214640094840136e-06, "loss": 0.143585205078125, "step": 2364 }, { "epoch": 0.32954782972200936, "grad_norm": 1.063666820526123, "learning_rate": 8.212834614472538e-06, "loss": 0.15329742431640625, "step": 2365 }, { "epoch": 0.32968717341322373, "grad_norm": 1.0777530670166016, "learning_rate": 8.211028420286437e-06, "loss": 0.1605682373046875, "step": 2366 }, { "epoch": 0.3298265171044381, "grad_norm": 0.7185034155845642, "learning_rate": 8.209221512683132e-06, "loss": 0.1293010711669922, "step": 2367 }, { "epoch": 0.3299658607956525, "grad_norm": 1.6903914213180542, "learning_rate": 8.207413892064073e-06, "loss": 0.18081283569335938, "step": 2368 }, { "epoch": 0.33010520448686687, "grad_norm": 1.2059686183929443, "learning_rate": 8.205605558830873e-06, "loss": 0.17154312133789062, "step": 2369 }, { "epoch": 0.33024454817808124, "grad_norm": 1.3993382453918457, "learning_rate": 8.203796513385307e-06, "loss": 0.16921615600585938, "step": 2370 }, { "epoch": 0.3303838918692956, "grad_norm": 1.0181760787963867, "learning_rate": 8.201986756129297e-06, "loss": 0.11228561401367188, "step": 2371 }, { "epoch": 0.33052323556051, "grad_norm": 1.051184058189392, "learning_rate": 8.200176287464931e-06, "loss": 0.14947509765625, "step": 2372 }, { "epoch": 0.3306625792517244, "grad_norm": 1.454946517944336, "learning_rate": 8.198365107794457e-06, "loss": 0.14263534545898438, "step": 2373 }, { "epoch": 0.33080192294293875, "grad_norm": 0.9748407602310181, "learning_rate": 8.196553217520275e-06, "loss": 0.13924789428710938, "step": 2374 }, { "epoch": 0.33094126663415313, "grad_norm": 0.692716658115387, "learning_rate": 8.194740617044948e-06, "loss": 0.15445327758789062, "step": 2375 }, { "epoch": 0.3310806103253675, "grad_norm": 0.6610167622566223, "learning_rate": 8.192927306771193e-06, "loss": 0.11178970336914062, "step": 2376 }, { "epoch": 0.3312199540165819, "grad_norm": 0.9629775881767273, "learning_rate": 8.191113287101884e-06, "loss": 0.1342182159423828, "step": 2377 }, { "epoch": 0.33135929770779626, "grad_norm": 1.264047622680664, "learning_rate": 8.18929855844006e-06, "loss": 0.15960693359375, "step": 2378 }, { "epoch": 0.33149864139901064, "grad_norm": 0.9470515847206116, "learning_rate": 8.187483121188908e-06, "loss": 0.1540374755859375, "step": 2379 }, { "epoch": 0.331637985090225, "grad_norm": 0.8012553453445435, "learning_rate": 8.185666975751778e-06, "loss": 0.13393020629882812, "step": 2380 }, { "epoch": 0.3317773287814394, "grad_norm": 0.738779604434967, "learning_rate": 8.183850122532174e-06, "loss": 0.11819839477539062, "step": 2381 }, { "epoch": 0.3319166724726538, "grad_norm": 1.0925869941711426, "learning_rate": 8.182032561933764e-06, "loss": 0.16412353515625, "step": 2382 }, { "epoch": 0.3320560161638682, "grad_norm": 1.2202295064926147, "learning_rate": 8.180214294360365e-06, "loss": 0.15208816528320312, "step": 2383 }, { "epoch": 0.3321953598550826, "grad_norm": 0.5849182605743408, "learning_rate": 8.178395320215953e-06, "loss": 0.12258148193359375, "step": 2384 }, { "epoch": 0.33233470354629696, "grad_norm": 1.190800666809082, "learning_rate": 8.176575639904668e-06, "loss": 0.18292999267578125, "step": 2385 }, { "epoch": 0.33247404723751134, "grad_norm": 1.371573805809021, "learning_rate": 8.174755253830797e-06, "loss": 0.17380523681640625, "step": 2386 }, { "epoch": 0.3326133909287257, "grad_norm": 1.2702306509017944, "learning_rate": 8.17293416239879e-06, "loss": 0.16404342651367188, "step": 2387 }, { "epoch": 0.3327527346199401, "grad_norm": 1.3707337379455566, "learning_rate": 8.171112366013252e-06, "loss": 0.15187835693359375, "step": 2388 }, { "epoch": 0.33289207831115447, "grad_norm": 1.5374751091003418, "learning_rate": 8.169289865078942e-06, "loss": 0.1903839111328125, "step": 2389 }, { "epoch": 0.33303142200236885, "grad_norm": 1.1638914346694946, "learning_rate": 8.167466660000781e-06, "loss": 0.19017791748046875, "step": 2390 }, { "epoch": 0.3331707656935832, "grad_norm": 0.8756290078163147, "learning_rate": 8.165642751183844e-06, "loss": 0.13594436645507812, "step": 2391 }, { "epoch": 0.3333101093847976, "grad_norm": 1.4224845170974731, "learning_rate": 8.163818139033359e-06, "loss": 0.14188003540039062, "step": 2392 }, { "epoch": 0.333449453076012, "grad_norm": 0.7183130979537964, "learning_rate": 8.161992823954715e-06, "loss": 0.12336349487304688, "step": 2393 }, { "epoch": 0.33358879676722636, "grad_norm": 1.0853122472763062, "learning_rate": 8.160166806353455e-06, "loss": 0.1551055908203125, "step": 2394 }, { "epoch": 0.33372814045844074, "grad_norm": 0.6977395415306091, "learning_rate": 8.15834008663528e-06, "loss": 0.14202117919921875, "step": 2395 }, { "epoch": 0.3338674841496551, "grad_norm": 1.1141680479049683, "learning_rate": 8.156512665206043e-06, "loss": 0.14116287231445312, "step": 2396 }, { "epoch": 0.3340068278408695, "grad_norm": 0.8748314380645752, "learning_rate": 8.154684542471754e-06, "loss": 0.12868881225585938, "step": 2397 }, { "epoch": 0.33414617153208387, "grad_norm": 0.6864430904388428, "learning_rate": 8.152855718838583e-06, "loss": 0.1073150634765625, "step": 2398 }, { "epoch": 0.33428551522329825, "grad_norm": 1.3980910778045654, "learning_rate": 8.151026194712854e-06, "loss": 0.16925048828125, "step": 2399 }, { "epoch": 0.3344248589145126, "grad_norm": 0.6837722063064575, "learning_rate": 8.149195970501043e-06, "loss": 0.12876510620117188, "step": 2400 }, { "epoch": 0.334564202605727, "grad_norm": 1.202326774597168, "learning_rate": 8.147365046609786e-06, "loss": 0.1356353759765625, "step": 2401 }, { "epoch": 0.3347035462969414, "grad_norm": 1.2095988988876343, "learning_rate": 8.145533423445869e-06, "loss": 0.14266204833984375, "step": 2402 }, { "epoch": 0.3348428899881558, "grad_norm": 1.1264952421188354, "learning_rate": 8.14370110141624e-06, "loss": 0.13169479370117188, "step": 2403 }, { "epoch": 0.3349822336793702, "grad_norm": 0.7149360775947571, "learning_rate": 8.141868080927998e-06, "loss": 0.12506866455078125, "step": 2404 }, { "epoch": 0.33512157737058457, "grad_norm": 1.8425235748291016, "learning_rate": 8.140034362388398e-06, "loss": 0.19521713256835938, "step": 2405 }, { "epoch": 0.33526092106179894, "grad_norm": 1.6337229013442993, "learning_rate": 8.13819994620485e-06, "loss": 0.15153121948242188, "step": 2406 }, { "epoch": 0.3354002647530133, "grad_norm": 0.9905885457992554, "learning_rate": 8.136364832784923e-06, "loss": 0.15050506591796875, "step": 2407 }, { "epoch": 0.3355396084442277, "grad_norm": 1.8411566019058228, "learning_rate": 8.134529022536332e-06, "loss": 0.1776275634765625, "step": 2408 }, { "epoch": 0.3356789521354421, "grad_norm": 0.895760715007782, "learning_rate": 8.132692515866959e-06, "loss": 0.12273406982421875, "step": 2409 }, { "epoch": 0.33581829582665645, "grad_norm": 1.48466157913208, "learning_rate": 8.130855313184824e-06, "loss": 0.16167640686035156, "step": 2410 }, { "epoch": 0.33595763951787083, "grad_norm": 1.1750668287277222, "learning_rate": 8.129017414898121e-06, "loss": 0.1310443878173828, "step": 2411 }, { "epoch": 0.3360969832090852, "grad_norm": 1.8523937463760376, "learning_rate": 8.127178821415183e-06, "loss": 0.16745567321777344, "step": 2412 }, { "epoch": 0.3362363269002996, "grad_norm": 1.0166921615600586, "learning_rate": 8.125339533144507e-06, "loss": 0.1647796630859375, "step": 2413 }, { "epoch": 0.33637567059151396, "grad_norm": 0.7496773600578308, "learning_rate": 8.123499550494737e-06, "loss": 0.11578178405761719, "step": 2414 }, { "epoch": 0.33651501428272834, "grad_norm": 1.2373310327529907, "learning_rate": 8.12165887387468e-06, "loss": 0.1323699951171875, "step": 2415 }, { "epoch": 0.3366543579739427, "grad_norm": 1.581307291984558, "learning_rate": 8.11981750369329e-06, "loss": 0.12073516845703125, "step": 2416 }, { "epoch": 0.3367937016651571, "grad_norm": 0.766645073890686, "learning_rate": 8.117975440359677e-06, "loss": 0.15102386474609375, "step": 2417 }, { "epoch": 0.3369330453563715, "grad_norm": 0.9604763984680176, "learning_rate": 8.116132684283104e-06, "loss": 0.12723541259765625, "step": 2418 }, { "epoch": 0.33707238904758585, "grad_norm": 0.6172603368759155, "learning_rate": 8.114289235872993e-06, "loss": 0.12454986572265625, "step": 2419 }, { "epoch": 0.33721173273880023, "grad_norm": 0.8922236561775208, "learning_rate": 8.112445095538915e-06, "loss": 0.15710830688476562, "step": 2420 }, { "epoch": 0.3373510764300146, "grad_norm": 0.8248695731163025, "learning_rate": 8.110600263690592e-06, "loss": 0.13225173950195312, "step": 2421 }, { "epoch": 0.337490420121229, "grad_norm": 0.6835898160934448, "learning_rate": 8.10875474073791e-06, "loss": 0.1501617431640625, "step": 2422 }, { "epoch": 0.3376297638124434, "grad_norm": 0.9455791711807251, "learning_rate": 8.106908527090895e-06, "loss": 0.16903305053710938, "step": 2423 }, { "epoch": 0.3377691075036578, "grad_norm": 1.0647457838058472, "learning_rate": 8.10506162315974e-06, "loss": 0.14018821716308594, "step": 2424 }, { "epoch": 0.3379084511948722, "grad_norm": 1.070406436920166, "learning_rate": 8.103214029354783e-06, "loss": 0.18186569213867188, "step": 2425 }, { "epoch": 0.33804779488608655, "grad_norm": 1.1474045515060425, "learning_rate": 8.101365746086514e-06, "loss": 0.137542724609375, "step": 2426 }, { "epoch": 0.3381871385773009, "grad_norm": 0.9496793150901794, "learning_rate": 8.099516773765581e-06, "loss": 0.14120101928710938, "step": 2427 }, { "epoch": 0.3383264822685153, "grad_norm": 0.5875399708747864, "learning_rate": 8.097667112802784e-06, "loss": 0.13525772094726562, "step": 2428 }, { "epoch": 0.3384658259597297, "grad_norm": 0.7845638990402222, "learning_rate": 8.095816763609077e-06, "loss": 0.14110755920410156, "step": 2429 }, { "epoch": 0.33860516965094406, "grad_norm": 1.067177414894104, "learning_rate": 8.093965726595565e-06, "loss": 0.14104843139648438, "step": 2430 }, { "epoch": 0.33874451334215844, "grad_norm": 1.0154780149459839, "learning_rate": 8.092114002173503e-06, "loss": 0.14028167724609375, "step": 2431 }, { "epoch": 0.3388838570333728, "grad_norm": 1.3940149545669556, "learning_rate": 8.090261590754304e-06, "loss": 0.19330978393554688, "step": 2432 }, { "epoch": 0.3390232007245872, "grad_norm": 1.176094889640808, "learning_rate": 8.088408492749534e-06, "loss": 0.14923477172851562, "step": 2433 }, { "epoch": 0.33916254441580157, "grad_norm": 0.8079965710639954, "learning_rate": 8.086554708570901e-06, "loss": 0.13885116577148438, "step": 2434 }, { "epoch": 0.33930188810701595, "grad_norm": 1.6171480417251587, "learning_rate": 8.084700238630283e-06, "loss": 0.18149185180664062, "step": 2435 }, { "epoch": 0.3394412317982303, "grad_norm": 1.2884283065795898, "learning_rate": 8.082845083339698e-06, "loss": 0.1769866943359375, "step": 2436 }, { "epoch": 0.3395805754894447, "grad_norm": 1.587579607963562, "learning_rate": 8.080989243111315e-06, "loss": 0.14836502075195312, "step": 2437 }, { "epoch": 0.3397199191806591, "grad_norm": 1.5537458658218384, "learning_rate": 8.079132718357465e-06, "loss": 0.1477489471435547, "step": 2438 }, { "epoch": 0.33985926287187346, "grad_norm": 0.7828855514526367, "learning_rate": 8.07727550949062e-06, "loss": 0.13546371459960938, "step": 2439 }, { "epoch": 0.33999860656308784, "grad_norm": 0.48857006430625916, "learning_rate": 8.075417616923413e-06, "loss": 0.11385726928710938, "step": 2440 }, { "epoch": 0.3401379502543022, "grad_norm": 1.5428416728973389, "learning_rate": 8.073559041068626e-06, "loss": 0.14111328125, "step": 2441 }, { "epoch": 0.3402772939455166, "grad_norm": 0.7423590421676636, "learning_rate": 8.071699782339188e-06, "loss": 0.12211227416992188, "step": 2442 }, { "epoch": 0.340416637636731, "grad_norm": 0.9968569874763489, "learning_rate": 8.06983984114819e-06, "loss": 0.17191314697265625, "step": 2443 }, { "epoch": 0.3405559813279454, "grad_norm": 1.2585710287094116, "learning_rate": 8.067979217908864e-06, "loss": 0.21035003662109375, "step": 2444 }, { "epoch": 0.3406953250191598, "grad_norm": 1.0320686101913452, "learning_rate": 8.066117913034597e-06, "loss": 0.16469955444335938, "step": 2445 }, { "epoch": 0.34083466871037416, "grad_norm": 1.5427608489990234, "learning_rate": 8.06425592693893e-06, "loss": 0.15796279907226562, "step": 2446 }, { "epoch": 0.34097401240158853, "grad_norm": 1.7600001096725464, "learning_rate": 8.062393260035557e-06, "loss": 0.17525482177734375, "step": 2447 }, { "epoch": 0.3411133560928029, "grad_norm": 1.2637033462524414, "learning_rate": 8.060529912738316e-06, "loss": 0.16109085083007812, "step": 2448 }, { "epoch": 0.3412526997840173, "grad_norm": 2.0757229328155518, "learning_rate": 8.058665885461201e-06, "loss": 0.17794036865234375, "step": 2449 }, { "epoch": 0.34139204347523167, "grad_norm": 2.2102034091949463, "learning_rate": 8.056801178618357e-06, "loss": 0.13804244995117188, "step": 2450 }, { "epoch": 0.34153138716644604, "grad_norm": 1.9554160833358765, "learning_rate": 8.05493579262408e-06, "loss": 0.14735794067382812, "step": 2451 }, { "epoch": 0.3416707308576604, "grad_norm": 1.1615597009658813, "learning_rate": 8.053069727892813e-06, "loss": 0.18633270263671875, "step": 2452 }, { "epoch": 0.3418100745488748, "grad_norm": 1.301830530166626, "learning_rate": 8.051202984839157e-06, "loss": 0.15468978881835938, "step": 2453 }, { "epoch": 0.3419494182400892, "grad_norm": 1.2106093168258667, "learning_rate": 8.049335563877858e-06, "loss": 0.15038681030273438, "step": 2454 }, { "epoch": 0.34208876193130355, "grad_norm": 1.1681777238845825, "learning_rate": 8.047467465423813e-06, "loss": 0.12520599365234375, "step": 2455 }, { "epoch": 0.34222810562251793, "grad_norm": 1.1481637954711914, "learning_rate": 8.045598689892072e-06, "loss": 0.15373611450195312, "step": 2456 }, { "epoch": 0.3423674493137323, "grad_norm": 1.4728773832321167, "learning_rate": 8.043729237697835e-06, "loss": 0.169677734375, "step": 2457 }, { "epoch": 0.3425067930049467, "grad_norm": 0.8153510093688965, "learning_rate": 8.041859109256452e-06, "loss": 0.10138702392578125, "step": 2458 }, { "epoch": 0.34264613669616106, "grad_norm": 2.075955867767334, "learning_rate": 8.03998830498342e-06, "loss": 0.16379547119140625, "step": 2459 }, { "epoch": 0.34278548038737544, "grad_norm": 1.8112269639968872, "learning_rate": 8.038116825294393e-06, "loss": 0.17362594604492188, "step": 2460 }, { "epoch": 0.3429248240785898, "grad_norm": 0.8423580527305603, "learning_rate": 8.036244670605166e-06, "loss": 0.14985275268554688, "step": 2461 }, { "epoch": 0.3430641677698042, "grad_norm": 1.0398215055465698, "learning_rate": 8.034371841331693e-06, "loss": 0.18904876708984375, "step": 2462 }, { "epoch": 0.34320351146101863, "grad_norm": 1.6058878898620605, "learning_rate": 8.032498337890073e-06, "loss": 0.17404556274414062, "step": 2463 }, { "epoch": 0.343342855152233, "grad_norm": 1.3314967155456543, "learning_rate": 8.030624160696554e-06, "loss": 0.18132972717285156, "step": 2464 }, { "epoch": 0.3434821988434474, "grad_norm": 1.1916972398757935, "learning_rate": 8.02874931016754e-06, "loss": 0.16312408447265625, "step": 2465 }, { "epoch": 0.34362154253466176, "grad_norm": 0.9447700381278992, "learning_rate": 8.026873786719574e-06, "loss": 0.12607192993164062, "step": 2466 }, { "epoch": 0.34376088622587614, "grad_norm": 0.8060306906700134, "learning_rate": 8.024997590769359e-06, "loss": 0.1253509521484375, "step": 2467 }, { "epoch": 0.3439002299170905, "grad_norm": 1.842868447303772, "learning_rate": 8.02312072273374e-06, "loss": 0.19844627380371094, "step": 2468 }, { "epoch": 0.3440395736083049, "grad_norm": 0.9542020559310913, "learning_rate": 8.021243183029715e-06, "loss": 0.14926528930664062, "step": 2469 }, { "epoch": 0.34417891729951927, "grad_norm": 0.949608564376831, "learning_rate": 8.019364972074432e-06, "loss": 0.134521484375, "step": 2470 }, { "epoch": 0.34431826099073365, "grad_norm": 0.7847649455070496, "learning_rate": 8.017486090285185e-06, "loss": 0.12200164794921875, "step": 2471 }, { "epoch": 0.344457604681948, "grad_norm": 0.7990655303001404, "learning_rate": 8.01560653807942e-06, "loss": 0.133514404296875, "step": 2472 }, { "epoch": 0.3445969483731624, "grad_norm": 1.1381338834762573, "learning_rate": 8.013726315874729e-06, "loss": 0.1427001953125, "step": 2473 }, { "epoch": 0.3447362920643768, "grad_norm": 0.7339212894439697, "learning_rate": 8.011845424088856e-06, "loss": 0.1461334228515625, "step": 2474 }, { "epoch": 0.34487563575559116, "grad_norm": 0.9343142509460449, "learning_rate": 8.009963863139689e-06, "loss": 0.13328170776367188, "step": 2475 }, { "epoch": 0.34501497944680554, "grad_norm": 1.1634142398834229, "learning_rate": 8.008081633445272e-06, "loss": 0.18358230590820312, "step": 2476 }, { "epoch": 0.3451543231380199, "grad_norm": 0.8541991710662842, "learning_rate": 8.00619873542379e-06, "loss": 0.12530136108398438, "step": 2477 }, { "epoch": 0.3452936668292343, "grad_norm": 1.4555014371871948, "learning_rate": 8.004315169493586e-06, "loss": 0.14165496826171875, "step": 2478 }, { "epoch": 0.34543301052044867, "grad_norm": 0.9261483550071716, "learning_rate": 8.002430936073137e-06, "loss": 0.12994766235351562, "step": 2479 }, { "epoch": 0.34557235421166305, "grad_norm": 1.1831666231155396, "learning_rate": 8.000546035581083e-06, "loss": 0.14965438842773438, "step": 2480 }, { "epoch": 0.3457116979028774, "grad_norm": 0.87895268201828, "learning_rate": 7.998660468436202e-06, "loss": 0.13827133178710938, "step": 2481 }, { "epoch": 0.3458510415940918, "grad_norm": 1.3005115985870361, "learning_rate": 7.996774235057425e-06, "loss": 0.17022323608398438, "step": 2482 }, { "epoch": 0.34599038528530623, "grad_norm": 0.6588543653488159, "learning_rate": 7.994887335863832e-06, "loss": 0.13051605224609375, "step": 2483 }, { "epoch": 0.3461297289765206, "grad_norm": 0.9090948700904846, "learning_rate": 7.992999771274646e-06, "loss": 0.14896392822265625, "step": 2484 }, { "epoch": 0.346269072667735, "grad_norm": 0.6999915242195129, "learning_rate": 7.991111541709244e-06, "loss": 0.11438369750976562, "step": 2485 }, { "epoch": 0.34640841635894937, "grad_norm": 1.4897218942642212, "learning_rate": 7.989222647587146e-06, "loss": 0.1522369384765625, "step": 2486 }, { "epoch": 0.34654776005016374, "grad_norm": 1.537187099456787, "learning_rate": 7.987333089328018e-06, "loss": 0.14814376831054688, "step": 2487 }, { "epoch": 0.3466871037413781, "grad_norm": 0.7937643527984619, "learning_rate": 7.985442867351682e-06, "loss": 0.12026214599609375, "step": 2488 }, { "epoch": 0.3468264474325925, "grad_norm": 1.6112213134765625, "learning_rate": 7.983551982078097e-06, "loss": 0.1356048583984375, "step": 2489 }, { "epoch": 0.3469657911238069, "grad_norm": 1.2670427560806274, "learning_rate": 7.98166043392738e-06, "loss": 0.141998291015625, "step": 2490 }, { "epoch": 0.34710513481502125, "grad_norm": 1.0872445106506348, "learning_rate": 7.979768223319786e-06, "loss": 0.13904571533203125, "step": 2491 }, { "epoch": 0.34724447850623563, "grad_norm": 0.6787402033805847, "learning_rate": 7.977875350675721e-06, "loss": 0.12525177001953125, "step": 2492 }, { "epoch": 0.34738382219745, "grad_norm": 1.0832455158233643, "learning_rate": 7.975981816415741e-06, "loss": 0.13833236694335938, "step": 2493 }, { "epoch": 0.3475231658886644, "grad_norm": 1.5269689559936523, "learning_rate": 7.974087620960543e-06, "loss": 0.1902923583984375, "step": 2494 }, { "epoch": 0.34766250957987876, "grad_norm": 1.695232629776001, "learning_rate": 7.972192764730975e-06, "loss": 0.19935226440429688, "step": 2495 }, { "epoch": 0.34780185327109314, "grad_norm": 1.4999381303787231, "learning_rate": 7.970297248148033e-06, "loss": 0.17983245849609375, "step": 2496 }, { "epoch": 0.3479411969623075, "grad_norm": 0.5172876119613647, "learning_rate": 7.968401071632854e-06, "loss": 0.11737251281738281, "step": 2497 }, { "epoch": 0.3480805406535219, "grad_norm": 0.8156893849372864, "learning_rate": 7.966504235606726e-06, "loss": 0.15480804443359375, "step": 2498 }, { "epoch": 0.3482198843447363, "grad_norm": 1.1627826690673828, "learning_rate": 7.964606740491085e-06, "loss": 0.13748931884765625, "step": 2499 }, { "epoch": 0.34835922803595065, "grad_norm": 1.0681992769241333, "learning_rate": 7.962708586707508e-06, "loss": 0.11860084533691406, "step": 2500 }, { "epoch": 0.34849857172716503, "grad_norm": 1.5019487142562866, "learning_rate": 7.960809774677722e-06, "loss": 0.15621566772460938, "step": 2501 }, { "epoch": 0.3486379154183794, "grad_norm": 0.9734637141227722, "learning_rate": 7.958910304823603e-06, "loss": 0.15233993530273438, "step": 2502 }, { "epoch": 0.34877725910959384, "grad_norm": 0.9276806712150574, "learning_rate": 7.957010177567167e-06, "loss": 0.15043258666992188, "step": 2503 }, { "epoch": 0.3489166028008082, "grad_norm": 1.408628225326538, "learning_rate": 7.955109393330577e-06, "loss": 0.1389293670654297, "step": 2504 }, { "epoch": 0.3490559464920226, "grad_norm": 1.1040029525756836, "learning_rate": 7.953207952536147e-06, "loss": 0.1429767608642578, "step": 2505 }, { "epoch": 0.349195290183237, "grad_norm": 0.7650058269500732, "learning_rate": 7.951305855606333e-06, "loss": 0.11266708374023438, "step": 2506 }, { "epoch": 0.34933463387445135, "grad_norm": 0.9379114508628845, "learning_rate": 7.949403102963738e-06, "loss": 0.16561126708984375, "step": 2507 }, { "epoch": 0.3494739775656657, "grad_norm": 0.8176896572113037, "learning_rate": 7.947499695031108e-06, "loss": 0.131011962890625, "step": 2508 }, { "epoch": 0.3496133212568801, "grad_norm": 0.8900582790374756, "learning_rate": 7.94559563223134e-06, "loss": 0.14899826049804688, "step": 2509 }, { "epoch": 0.3497526649480945, "grad_norm": 1.1890493631362915, "learning_rate": 7.943690914987472e-06, "loss": 0.14087677001953125, "step": 2510 }, { "epoch": 0.34989200863930886, "grad_norm": 1.1760011911392212, "learning_rate": 7.941785543722686e-06, "loss": 0.1730194091796875, "step": 2511 }, { "epoch": 0.35003135233052324, "grad_norm": 0.9345393180847168, "learning_rate": 7.939879518860316e-06, "loss": 0.15974807739257812, "step": 2512 }, { "epoch": 0.3501706960217376, "grad_norm": 1.246927261352539, "learning_rate": 7.937972840823836e-06, "loss": 0.163665771484375, "step": 2513 }, { "epoch": 0.350310039712952, "grad_norm": 2.515176296234131, "learning_rate": 7.936065510036863e-06, "loss": 0.23915481567382812, "step": 2514 }, { "epoch": 0.35044938340416637, "grad_norm": 1.436145544052124, "learning_rate": 7.934157526923167e-06, "loss": 0.13896560668945312, "step": 2515 }, { "epoch": 0.35058872709538075, "grad_norm": 0.7870035767555237, "learning_rate": 7.932248891906657e-06, "loss": 0.1222076416015625, "step": 2516 }, { "epoch": 0.3507280707865951, "grad_norm": 1.1822718381881714, "learning_rate": 7.930339605411387e-06, "loss": 0.14488601684570312, "step": 2517 }, { "epoch": 0.3508674144778095, "grad_norm": 1.7033530473709106, "learning_rate": 7.92842966786156e-06, "loss": 0.16075515747070312, "step": 2518 }, { "epoch": 0.3510067581690239, "grad_norm": 1.5284605026245117, "learning_rate": 7.926519079681514e-06, "loss": 0.13837814331054688, "step": 2519 }, { "epoch": 0.35114610186023826, "grad_norm": 1.2878916263580322, "learning_rate": 7.924607841295744e-06, "loss": 0.21851348876953125, "step": 2520 }, { "epoch": 0.35128544555145264, "grad_norm": 1.7771787643432617, "learning_rate": 7.92269595312888e-06, "loss": 0.129547119140625, "step": 2521 }, { "epoch": 0.351424789242667, "grad_norm": 0.7542772889137268, "learning_rate": 7.920783415605703e-06, "loss": 0.12049102783203125, "step": 2522 }, { "epoch": 0.35156413293388145, "grad_norm": 1.0352545976638794, "learning_rate": 7.918870229151134e-06, "loss": 0.1483612060546875, "step": 2523 }, { "epoch": 0.3517034766250958, "grad_norm": 0.8436196446418762, "learning_rate": 7.916956394190238e-06, "loss": 0.12352371215820312, "step": 2524 }, { "epoch": 0.3518428203163102, "grad_norm": 0.851044237613678, "learning_rate": 7.915041911148229e-06, "loss": 0.13653945922851562, "step": 2525 }, { "epoch": 0.3519821640075246, "grad_norm": 0.8700271248817444, "learning_rate": 7.913126780450455e-06, "loss": 0.14306640625, "step": 2526 }, { "epoch": 0.35212150769873896, "grad_norm": 1.4575735330581665, "learning_rate": 7.911211002522422e-06, "loss": 0.1498870849609375, "step": 2527 }, { "epoch": 0.35226085138995333, "grad_norm": 0.9555741548538208, "learning_rate": 7.909294577789765e-06, "loss": 0.12477874755859375, "step": 2528 }, { "epoch": 0.3524001950811677, "grad_norm": 1.0386958122253418, "learning_rate": 7.907377506678274e-06, "loss": 0.11603355407714844, "step": 2529 }, { "epoch": 0.3525395387723821, "grad_norm": 1.951560378074646, "learning_rate": 7.905459789613878e-06, "loss": 0.15572357177734375, "step": 2530 }, { "epoch": 0.35267888246359647, "grad_norm": 1.6051630973815918, "learning_rate": 7.90354142702265e-06, "loss": 0.13341903686523438, "step": 2531 }, { "epoch": 0.35281822615481084, "grad_norm": 1.3991694450378418, "learning_rate": 7.901622419330805e-06, "loss": 0.13783645629882812, "step": 2532 }, { "epoch": 0.3529575698460252, "grad_norm": 0.8409508466720581, "learning_rate": 7.899702766964705e-06, "loss": 0.12548065185546875, "step": 2533 }, { "epoch": 0.3530969135372396, "grad_norm": 0.7166169285774231, "learning_rate": 7.89778247035085e-06, "loss": 0.14216232299804688, "step": 2534 }, { "epoch": 0.353236257228454, "grad_norm": 0.8201520442962646, "learning_rate": 7.895861529915889e-06, "loss": 0.14556884765625, "step": 2535 }, { "epoch": 0.35337560091966835, "grad_norm": 1.5431987047195435, "learning_rate": 7.893939946086609e-06, "loss": 0.1409759521484375, "step": 2536 }, { "epoch": 0.35351494461088273, "grad_norm": 0.7789369821548462, "learning_rate": 7.892017719289941e-06, "loss": 0.1243743896484375, "step": 2537 }, { "epoch": 0.3536542883020971, "grad_norm": 1.2960618734359741, "learning_rate": 7.890094849952964e-06, "loss": 0.13540267944335938, "step": 2538 }, { "epoch": 0.3537936319933115, "grad_norm": 0.9716634154319763, "learning_rate": 7.888171338502893e-06, "loss": 0.1543121337890625, "step": 2539 }, { "epoch": 0.35393297568452586, "grad_norm": 0.8277673721313477, "learning_rate": 7.886247185367088e-06, "loss": 0.1533966064453125, "step": 2540 }, { "epoch": 0.35407231937574024, "grad_norm": 0.5147867202758789, "learning_rate": 7.884322390973053e-06, "loss": 0.115020751953125, "step": 2541 }, { "epoch": 0.3542116630669546, "grad_norm": 1.1664332151412964, "learning_rate": 7.882396955748432e-06, "loss": 0.13336181640625, "step": 2542 }, { "epoch": 0.35435100675816905, "grad_norm": 1.3818854093551636, "learning_rate": 7.880470880121015e-06, "loss": 0.14447402954101562, "step": 2543 }, { "epoch": 0.35449035044938343, "grad_norm": 1.2987362146377563, "learning_rate": 7.878544164518731e-06, "loss": 0.14615249633789062, "step": 2544 }, { "epoch": 0.3546296941405978, "grad_norm": 1.2808603048324585, "learning_rate": 7.87661680936965e-06, "loss": 0.12822341918945312, "step": 2545 }, { "epoch": 0.3547690378318122, "grad_norm": 1.2895805835723877, "learning_rate": 7.87468881510199e-06, "loss": 0.1456127166748047, "step": 2546 }, { "epoch": 0.35490838152302656, "grad_norm": 1.2626451253890991, "learning_rate": 7.872760182144104e-06, "loss": 0.161529541015625, "step": 2547 }, { "epoch": 0.35504772521424094, "grad_norm": 0.800849437713623, "learning_rate": 7.870830910924491e-06, "loss": 0.12097358703613281, "step": 2548 }, { "epoch": 0.3551870689054553, "grad_norm": 1.4459084272384644, "learning_rate": 7.868901001871797e-06, "loss": 0.14381790161132812, "step": 2549 }, { "epoch": 0.3553264125966697, "grad_norm": 1.3432327508926392, "learning_rate": 7.866970455414793e-06, "loss": 0.161285400390625, "step": 2550 }, { "epoch": 0.35546575628788407, "grad_norm": 0.5748004913330078, "learning_rate": 7.86503927198241e-06, "loss": 0.1120758056640625, "step": 2551 }, { "epoch": 0.35560509997909845, "grad_norm": 1.0473004579544067, "learning_rate": 7.863107452003711e-06, "loss": 0.1427001953125, "step": 2552 }, { "epoch": 0.3557444436703128, "grad_norm": 2.6631827354431152, "learning_rate": 7.861174995907901e-06, "loss": 0.16809654235839844, "step": 2553 }, { "epoch": 0.3558837873615272, "grad_norm": 1.6383798122406006, "learning_rate": 7.85924190412433e-06, "loss": 0.14540863037109375, "step": 2554 }, { "epoch": 0.3560231310527416, "grad_norm": 1.6980446577072144, "learning_rate": 7.857308177082484e-06, "loss": 0.215850830078125, "step": 2555 }, { "epoch": 0.35616247474395596, "grad_norm": 0.9996764659881592, "learning_rate": 7.855373815211995e-06, "loss": 0.12131118774414062, "step": 2556 }, { "epoch": 0.35630181843517034, "grad_norm": 0.8062540292739868, "learning_rate": 7.853438818942633e-06, "loss": 0.1159515380859375, "step": 2557 }, { "epoch": 0.3564411621263847, "grad_norm": 1.327341914176941, "learning_rate": 7.851503188704312e-06, "loss": 0.1716766357421875, "step": 2558 }, { "epoch": 0.3565805058175991, "grad_norm": 1.6173752546310425, "learning_rate": 7.849566924927082e-06, "loss": 0.17563629150390625, "step": 2559 }, { "epoch": 0.35671984950881347, "grad_norm": 1.305605411529541, "learning_rate": 7.84763002804114e-06, "loss": 0.13815689086914062, "step": 2560 }, { "epoch": 0.35685919320002785, "grad_norm": 0.9513415098190308, "learning_rate": 7.845692498476816e-06, "loss": 0.14934539794921875, "step": 2561 }, { "epoch": 0.3569985368912422, "grad_norm": 1.1428169012069702, "learning_rate": 7.843754336664589e-06, "loss": 0.14699172973632812, "step": 2562 }, { "epoch": 0.3571378805824566, "grad_norm": 1.1045982837677002, "learning_rate": 7.84181554303507e-06, "loss": 0.15060806274414062, "step": 2563 }, { "epoch": 0.35727722427367103, "grad_norm": 1.0665416717529297, "learning_rate": 7.839876118019019e-06, "loss": 0.16773605346679688, "step": 2564 }, { "epoch": 0.3574165679648854, "grad_norm": 0.9436895847320557, "learning_rate": 7.837936062047329e-06, "loss": 0.12027359008789062, "step": 2565 }, { "epoch": 0.3575559116560998, "grad_norm": 0.7773550748825073, "learning_rate": 7.835995375551038e-06, "loss": 0.1464080810546875, "step": 2566 }, { "epoch": 0.35769525534731417, "grad_norm": 1.3702259063720703, "learning_rate": 7.83405405896132e-06, "loss": 0.1586761474609375, "step": 2567 }, { "epoch": 0.35783459903852854, "grad_norm": 0.7441948652267456, "learning_rate": 7.832112112709496e-06, "loss": 0.12739944458007812, "step": 2568 }, { "epoch": 0.3579739427297429, "grad_norm": 0.8130411505699158, "learning_rate": 7.830169537227015e-06, "loss": 0.11910629272460938, "step": 2569 }, { "epoch": 0.3581132864209573, "grad_norm": 2.8386802673339844, "learning_rate": 7.828226332945479e-06, "loss": 0.17046737670898438, "step": 2570 }, { "epoch": 0.3582526301121717, "grad_norm": 2.519611120223999, "learning_rate": 7.82628250029662e-06, "loss": 0.18066787719726562, "step": 2571 }, { "epoch": 0.35839197380338605, "grad_norm": 1.7651910781860352, "learning_rate": 7.824338039712316e-06, "loss": 0.22602081298828125, "step": 2572 }, { "epoch": 0.35853131749460043, "grad_norm": 1.3593755960464478, "learning_rate": 7.82239295162458e-06, "loss": 0.15318679809570312, "step": 2573 }, { "epoch": 0.3586706611858148, "grad_norm": 1.0973742008209229, "learning_rate": 7.820447236465565e-06, "loss": 0.1778564453125, "step": 2574 }, { "epoch": 0.3588100048770292, "grad_norm": 1.1597388982772827, "learning_rate": 7.818500894667566e-06, "loss": 0.17652511596679688, "step": 2575 }, { "epoch": 0.35894934856824356, "grad_norm": 0.9259942173957825, "learning_rate": 7.816553926663018e-06, "loss": 0.15196990966796875, "step": 2576 }, { "epoch": 0.35908869225945794, "grad_norm": 1.9911818504333496, "learning_rate": 7.81460633288449e-06, "loss": 0.1298542022705078, "step": 2577 }, { "epoch": 0.3592280359506723, "grad_norm": 1.7343043088912964, "learning_rate": 7.812658113764691e-06, "loss": 0.16883468627929688, "step": 2578 }, { "epoch": 0.3593673796418867, "grad_norm": 0.8248856067657471, "learning_rate": 7.810709269736476e-06, "loss": 0.14926528930664062, "step": 2579 }, { "epoch": 0.3595067233331011, "grad_norm": 1.1463040113449097, "learning_rate": 7.808759801232829e-06, "loss": 0.14994430541992188, "step": 2580 }, { "epoch": 0.35964606702431545, "grad_norm": 0.9534621238708496, "learning_rate": 7.80680970868688e-06, "loss": 0.12589645385742188, "step": 2581 }, { "epoch": 0.35978541071552983, "grad_norm": 1.530026912689209, "learning_rate": 7.804858992531893e-06, "loss": 0.16899871826171875, "step": 2582 }, { "epoch": 0.3599247544067442, "grad_norm": 0.7494660019874573, "learning_rate": 7.802907653201275e-06, "loss": 0.12709808349609375, "step": 2583 }, { "epoch": 0.36006409809795864, "grad_norm": 0.9962627291679382, "learning_rate": 7.800955691128568e-06, "loss": 0.14340972900390625, "step": 2584 }, { "epoch": 0.360203441789173, "grad_norm": 1.8997557163238525, "learning_rate": 7.799003106747453e-06, "loss": 0.22067642211914062, "step": 2585 }, { "epoch": 0.3603427854803874, "grad_norm": 1.0348881483078003, "learning_rate": 7.79704990049175e-06, "loss": 0.15165138244628906, "step": 2586 }, { "epoch": 0.3604821291716018, "grad_norm": 1.201763391494751, "learning_rate": 7.795096072795418e-06, "loss": 0.12281036376953125, "step": 2587 }, { "epoch": 0.36062147286281615, "grad_norm": 0.7263826131820679, "learning_rate": 7.793141624092551e-06, "loss": 0.13712692260742188, "step": 2588 }, { "epoch": 0.36076081655403053, "grad_norm": 1.4455732107162476, "learning_rate": 7.791186554817383e-06, "loss": 0.14926910400390625, "step": 2589 }, { "epoch": 0.3609001602452449, "grad_norm": 0.5753989219665527, "learning_rate": 7.789230865404287e-06, "loss": 0.12104415893554688, "step": 2590 }, { "epoch": 0.3610395039364593, "grad_norm": 0.9901994466781616, "learning_rate": 7.787274556287771e-06, "loss": 0.11455535888671875, "step": 2591 }, { "epoch": 0.36117884762767366, "grad_norm": 1.314936637878418, "learning_rate": 7.785317627902484e-06, "loss": 0.14628028869628906, "step": 2592 }, { "epoch": 0.36131819131888804, "grad_norm": 0.6840811967849731, "learning_rate": 7.783360080683212e-06, "loss": 0.11468696594238281, "step": 2593 }, { "epoch": 0.3614575350101024, "grad_norm": 0.8047733306884766, "learning_rate": 7.781401915064873e-06, "loss": 0.14716339111328125, "step": 2594 }, { "epoch": 0.3615968787013168, "grad_norm": 1.194474458694458, "learning_rate": 7.779443131482529e-06, "loss": 0.1411285400390625, "step": 2595 }, { "epoch": 0.36173622239253117, "grad_norm": 1.0708822011947632, "learning_rate": 7.777483730371375e-06, "loss": 0.15302658081054688, "step": 2596 }, { "epoch": 0.36187556608374555, "grad_norm": 1.3901960849761963, "learning_rate": 7.77552371216675e-06, "loss": 0.13307571411132812, "step": 2597 }, { "epoch": 0.3620149097749599, "grad_norm": 1.287971019744873, "learning_rate": 7.773563077304123e-06, "loss": 0.13595199584960938, "step": 2598 }, { "epoch": 0.3621542534661743, "grad_norm": 0.9944455623626709, "learning_rate": 7.7716018262191e-06, "loss": 0.10866928100585938, "step": 2599 }, { "epoch": 0.3622935971573887, "grad_norm": 1.1862807273864746, "learning_rate": 7.769639959347428e-06, "loss": 0.14710617065429688, "step": 2600 }, { "epoch": 0.36243294084860306, "grad_norm": 1.0049177408218384, "learning_rate": 7.767677477124988e-06, "loss": 0.138275146484375, "step": 2601 }, { "epoch": 0.36257228453981744, "grad_norm": 1.558915138244629, "learning_rate": 7.765714379987804e-06, "loss": 0.1985015869140625, "step": 2602 }, { "epoch": 0.3627116282310318, "grad_norm": 1.6652767658233643, "learning_rate": 7.763750668372023e-06, "loss": 0.1757354736328125, "step": 2603 }, { "epoch": 0.36285097192224625, "grad_norm": 0.8969981670379639, "learning_rate": 7.761786342713941e-06, "loss": 0.14683914184570312, "step": 2604 }, { "epoch": 0.3629903156134606, "grad_norm": 1.3558493852615356, "learning_rate": 7.75982140344999e-06, "loss": 0.19463348388671875, "step": 2605 }, { "epoch": 0.363129659304675, "grad_norm": 1.3640531301498413, "learning_rate": 7.757855851016727e-06, "loss": 0.13039016723632812, "step": 2606 }, { "epoch": 0.3632690029958894, "grad_norm": 1.0281306505203247, "learning_rate": 7.755889685850858e-06, "loss": 0.1344738006591797, "step": 2607 }, { "epoch": 0.36340834668710376, "grad_norm": 0.8756628036499023, "learning_rate": 7.75392290838922e-06, "loss": 0.1686553955078125, "step": 2608 }, { "epoch": 0.36354769037831813, "grad_norm": 0.9136698842048645, "learning_rate": 7.751955519068783e-06, "loss": 0.14379119873046875, "step": 2609 }, { "epoch": 0.3636870340695325, "grad_norm": 1.566785454750061, "learning_rate": 7.74998751832666e-06, "loss": 0.14292144775390625, "step": 2610 }, { "epoch": 0.3638263777607469, "grad_norm": 1.905659556388855, "learning_rate": 7.748018906600092e-06, "loss": 0.15169143676757812, "step": 2611 }, { "epoch": 0.36396572145196127, "grad_norm": 1.8138350248336792, "learning_rate": 7.746049684326462e-06, "loss": 0.14379501342773438, "step": 2612 }, { "epoch": 0.36410506514317564, "grad_norm": 1.8628487586975098, "learning_rate": 7.744079851943286e-06, "loss": 0.14072036743164062, "step": 2613 }, { "epoch": 0.36424440883439, "grad_norm": 1.3750934600830078, "learning_rate": 7.742109409888213e-06, "loss": 0.14875030517578125, "step": 2614 }, { "epoch": 0.3643837525256044, "grad_norm": 0.8031954765319824, "learning_rate": 7.740138358599035e-06, "loss": 0.12333297729492188, "step": 2615 }, { "epoch": 0.3645230962168188, "grad_norm": 1.6193561553955078, "learning_rate": 7.73816669851367e-06, "loss": 0.13471221923828125, "step": 2616 }, { "epoch": 0.36466243990803315, "grad_norm": 1.7540370225906372, "learning_rate": 7.73619443007018e-06, "loss": 0.15456390380859375, "step": 2617 }, { "epoch": 0.36480178359924753, "grad_norm": 2.1277711391448975, "learning_rate": 7.734221553706756e-06, "loss": 0.13791275024414062, "step": 2618 }, { "epoch": 0.3649411272904619, "grad_norm": 1.4291150569915771, "learning_rate": 7.732248069861726e-06, "loss": 0.12319374084472656, "step": 2619 }, { "epoch": 0.3650804709816763, "grad_norm": 2.239410877227783, "learning_rate": 7.730273978973552e-06, "loss": 0.14887619018554688, "step": 2620 }, { "epoch": 0.36521981467289066, "grad_norm": 1.0547255277633667, "learning_rate": 7.728299281480833e-06, "loss": 0.14270782470703125, "step": 2621 }, { "epoch": 0.36535915836410504, "grad_norm": 1.1460704803466797, "learning_rate": 7.726323977822304e-06, "loss": 0.16065597534179688, "step": 2622 }, { "epoch": 0.3654985020553194, "grad_norm": 0.7663756012916565, "learning_rate": 7.72434806843683e-06, "loss": 0.11708831787109375, "step": 2623 }, { "epoch": 0.36563784574653385, "grad_norm": 1.429153323173523, "learning_rate": 7.72237155376341e-06, "loss": 0.12875747680664062, "step": 2624 }, { "epoch": 0.36577718943774823, "grad_norm": 1.932536005973816, "learning_rate": 7.720394434241185e-06, "loss": 0.15395736694335938, "step": 2625 }, { "epoch": 0.3659165331289626, "grad_norm": 1.8893085718154907, "learning_rate": 7.718416710309425e-06, "loss": 0.14897918701171875, "step": 2626 }, { "epoch": 0.366055876820177, "grad_norm": 2.2884159088134766, "learning_rate": 7.716438382407534e-06, "loss": 0.1617145538330078, "step": 2627 }, { "epoch": 0.36619522051139136, "grad_norm": 1.6473551988601685, "learning_rate": 7.714459450975052e-06, "loss": 0.18746566772460938, "step": 2628 }, { "epoch": 0.36633456420260574, "grad_norm": 0.6622990965843201, "learning_rate": 7.712479916451651e-06, "loss": 0.12720298767089844, "step": 2629 }, { "epoch": 0.3664739078938201, "grad_norm": 1.1045360565185547, "learning_rate": 7.710499779277141e-06, "loss": 0.14597320556640625, "step": 2630 }, { "epoch": 0.3666132515850345, "grad_norm": 0.8581730723381042, "learning_rate": 7.708519039891462e-06, "loss": 0.13982391357421875, "step": 2631 }, { "epoch": 0.36675259527624887, "grad_norm": 2.262437105178833, "learning_rate": 7.70653769873469e-06, "loss": 0.14619064331054688, "step": 2632 }, { "epoch": 0.36689193896746325, "grad_norm": 1.485264539718628, "learning_rate": 7.70455575624703e-06, "loss": 0.16541671752929688, "step": 2633 }, { "epoch": 0.3670312826586776, "grad_norm": 1.2842668294906616, "learning_rate": 7.702573212868827e-06, "loss": 0.17784500122070312, "step": 2634 }, { "epoch": 0.367170626349892, "grad_norm": 1.020900845527649, "learning_rate": 7.70059006904056e-06, "loss": 0.10727310180664062, "step": 2635 }, { "epoch": 0.3673099700411064, "grad_norm": 3.323085069656372, "learning_rate": 7.698606325202832e-06, "loss": 0.17823028564453125, "step": 2636 }, { "epoch": 0.36744931373232076, "grad_norm": 1.4110329151153564, "learning_rate": 7.69662198179639e-06, "loss": 0.14395523071289062, "step": 2637 }, { "epoch": 0.36758865742353514, "grad_norm": 1.5290015935897827, "learning_rate": 7.694637039262109e-06, "loss": 0.16727828979492188, "step": 2638 }, { "epoch": 0.3677280011147495, "grad_norm": 0.7553715109825134, "learning_rate": 7.692651498040996e-06, "loss": 0.13619232177734375, "step": 2639 }, { "epoch": 0.3678673448059639, "grad_norm": 0.8666397929191589, "learning_rate": 7.690665358574197e-06, "loss": 0.15076446533203125, "step": 2640 }, { "epoch": 0.36800668849717827, "grad_norm": 0.9356755614280701, "learning_rate": 7.688678621302981e-06, "loss": 0.11557769775390625, "step": 2641 }, { "epoch": 0.36814603218839265, "grad_norm": 1.5772061347961426, "learning_rate": 7.686691286668761e-06, "loss": 0.14171981811523438, "step": 2642 }, { "epoch": 0.368285375879607, "grad_norm": 1.646828532218933, "learning_rate": 7.684703355113074e-06, "loss": 0.167938232421875, "step": 2643 }, { "epoch": 0.36842471957082146, "grad_norm": 1.0026912689208984, "learning_rate": 7.682714827077595e-06, "loss": 0.11522674560546875, "step": 2644 }, { "epoch": 0.36856406326203583, "grad_norm": 1.01878821849823, "learning_rate": 7.68072570300413e-06, "loss": 0.13121795654296875, "step": 2645 }, { "epoch": 0.3687034069532502, "grad_norm": 1.1130322217941284, "learning_rate": 7.678735983334615e-06, "loss": 0.13839340209960938, "step": 2646 }, { "epoch": 0.3688427506444646, "grad_norm": 0.7625519037246704, "learning_rate": 7.676745668511121e-06, "loss": 0.13376617431640625, "step": 2647 }, { "epoch": 0.36898209433567897, "grad_norm": 0.8217455744743347, "learning_rate": 7.67475475897585e-06, "loss": 0.13930130004882812, "step": 2648 }, { "epoch": 0.36912143802689334, "grad_norm": 1.352894902229309, "learning_rate": 7.672763255171138e-06, "loss": 0.15485382080078125, "step": 2649 }, { "epoch": 0.3692607817181077, "grad_norm": 1.0875895023345947, "learning_rate": 7.67077115753945e-06, "loss": 0.14059829711914062, "step": 2650 }, { "epoch": 0.3694001254093221, "grad_norm": 1.0865176916122437, "learning_rate": 7.668778466523386e-06, "loss": 0.13584518432617188, "step": 2651 }, { "epoch": 0.3695394691005365, "grad_norm": 0.9074121713638306, "learning_rate": 7.666785182565676e-06, "loss": 0.1416950225830078, "step": 2652 }, { "epoch": 0.36967881279175085, "grad_norm": 1.5908758640289307, "learning_rate": 7.664791306109183e-06, "loss": 0.1473236083984375, "step": 2653 }, { "epoch": 0.36981815648296523, "grad_norm": 1.1723685264587402, "learning_rate": 7.6627968375969e-06, "loss": 0.1498870849609375, "step": 2654 }, { "epoch": 0.3699575001741796, "grad_norm": 1.399504542350769, "learning_rate": 7.660801777471951e-06, "loss": 0.19367599487304688, "step": 2655 }, { "epoch": 0.370096843865394, "grad_norm": 0.622369647026062, "learning_rate": 7.658806126177596e-06, "loss": 0.136138916015625, "step": 2656 }, { "epoch": 0.37023618755660836, "grad_norm": 1.4662102460861206, "learning_rate": 7.65680988415722e-06, "loss": 0.16036224365234375, "step": 2657 }, { "epoch": 0.37037553124782274, "grad_norm": 1.115412712097168, "learning_rate": 7.654813051854345e-06, "loss": 0.11462211608886719, "step": 2658 }, { "epoch": 0.3705148749390371, "grad_norm": 2.009852170944214, "learning_rate": 7.652815629712616e-06, "loss": 0.17099761962890625, "step": 2659 }, { "epoch": 0.3706542186302515, "grad_norm": 2.301582098007202, "learning_rate": 7.650817618175824e-06, "loss": 0.18610382080078125, "step": 2660 }, { "epoch": 0.3707935623214659, "grad_norm": 1.0197690725326538, "learning_rate": 7.648819017687875e-06, "loss": 0.1330413818359375, "step": 2661 }, { "epoch": 0.37093290601268025, "grad_norm": 1.031774640083313, "learning_rate": 7.646819828692813e-06, "loss": 0.1260051727294922, "step": 2662 }, { "epoch": 0.37107224970389463, "grad_norm": 0.7189230918884277, "learning_rate": 7.644820051634813e-06, "loss": 0.13766098022460938, "step": 2663 }, { "epoch": 0.37121159339510906, "grad_norm": 0.9771978855133057, "learning_rate": 7.64281968695818e-06, "loss": 0.16384124755859375, "step": 2664 }, { "epoch": 0.37135093708632344, "grad_norm": 0.6240331530570984, "learning_rate": 7.640818735107351e-06, "loss": 0.12182998657226562, "step": 2665 }, { "epoch": 0.3714902807775378, "grad_norm": 1.330284595489502, "learning_rate": 7.638817196526887e-06, "loss": 0.18571090698242188, "step": 2666 }, { "epoch": 0.3716296244687522, "grad_norm": 1.0216989517211914, "learning_rate": 7.636815071661488e-06, "loss": 0.14583206176757812, "step": 2667 }, { "epoch": 0.3717689681599666, "grad_norm": 0.7881343960762024, "learning_rate": 7.634812360955982e-06, "loss": 0.13444137573242188, "step": 2668 }, { "epoch": 0.37190831185118095, "grad_norm": 1.0221481323242188, "learning_rate": 7.63280906485532e-06, "loss": 0.16524505615234375, "step": 2669 }, { "epoch": 0.37204765554239533, "grad_norm": 0.7573403716087341, "learning_rate": 7.630805183804593e-06, "loss": 0.13604354858398438, "step": 2670 }, { "epoch": 0.3721869992336097, "grad_norm": 0.8553392291069031, "learning_rate": 7.628800718249017e-06, "loss": 0.12990570068359375, "step": 2671 }, { "epoch": 0.3723263429248241, "grad_norm": 0.7913074493408203, "learning_rate": 7.626795668633938e-06, "loss": 0.11278152465820312, "step": 2672 }, { "epoch": 0.37246568661603846, "grad_norm": 0.8515723943710327, "learning_rate": 7.624790035404831e-06, "loss": 0.14269638061523438, "step": 2673 }, { "epoch": 0.37260503030725284, "grad_norm": 0.5950754880905151, "learning_rate": 7.622783819007305e-06, "loss": 0.12929153442382812, "step": 2674 }, { "epoch": 0.3727443739984672, "grad_norm": 0.7767215967178345, "learning_rate": 7.620777019887091e-06, "loss": 0.13074493408203125, "step": 2675 }, { "epoch": 0.3728837176896816, "grad_norm": 0.7927660942077637, "learning_rate": 7.6187696384900585e-06, "loss": 0.1322174072265625, "step": 2676 }, { "epoch": 0.37302306138089597, "grad_norm": 1.4707139730453491, "learning_rate": 7.616761675262199e-06, "loss": 0.1868896484375, "step": 2677 }, { "epoch": 0.37316240507211035, "grad_norm": 0.9726321697235107, "learning_rate": 7.614753130649638e-06, "loss": 0.16135406494140625, "step": 2678 }, { "epoch": 0.3733017487633247, "grad_norm": 1.1039621829986572, "learning_rate": 7.612744005098625e-06, "loss": 0.12979507446289062, "step": 2679 }, { "epoch": 0.3734410924545391, "grad_norm": 0.6138408780097961, "learning_rate": 7.6107342990555466e-06, "loss": 0.11019325256347656, "step": 2680 }, { "epoch": 0.3735804361457535, "grad_norm": 1.8794763088226318, "learning_rate": 7.60872401296691e-06, "loss": 0.13084030151367188, "step": 2681 }, { "epoch": 0.37371977983696786, "grad_norm": 0.9192080497741699, "learning_rate": 7.606713147279356e-06, "loss": 0.174530029296875, "step": 2682 }, { "epoch": 0.37385912352818224, "grad_norm": 1.5239585638046265, "learning_rate": 7.604701702439652e-06, "loss": 0.18428421020507812, "step": 2683 }, { "epoch": 0.37399846721939667, "grad_norm": 0.7423714399337769, "learning_rate": 7.602689678894697e-06, "loss": 0.14210891723632812, "step": 2684 }, { "epoch": 0.37413781091061105, "grad_norm": 0.7190447449684143, "learning_rate": 7.6006770770915165e-06, "loss": 0.12097549438476562, "step": 2685 }, { "epoch": 0.3742771546018254, "grad_norm": 0.8137429356575012, "learning_rate": 7.598663897477263e-06, "loss": 0.1368865966796875, "step": 2686 }, { "epoch": 0.3744164982930398, "grad_norm": 1.136041283607483, "learning_rate": 7.59665014049922e-06, "loss": 0.1433563232421875, "step": 2687 }, { "epoch": 0.3745558419842542, "grad_norm": 0.9049399495124817, "learning_rate": 7.594635806604797e-06, "loss": 0.14751815795898438, "step": 2688 }, { "epoch": 0.37469518567546856, "grad_norm": 0.8391434550285339, "learning_rate": 7.592620896241536e-06, "loss": 0.12223052978515625, "step": 2689 }, { "epoch": 0.37483452936668293, "grad_norm": 0.8132484555244446, "learning_rate": 7.590605409857103e-06, "loss": 0.15159988403320312, "step": 2690 }, { "epoch": 0.3749738730578973, "grad_norm": 0.6775627136230469, "learning_rate": 7.58858934789929e-06, "loss": 0.1370086669921875, "step": 2691 }, { "epoch": 0.3751132167491117, "grad_norm": 1.1702364683151245, "learning_rate": 7.586572710816025e-06, "loss": 0.17711257934570312, "step": 2692 }, { "epoch": 0.37525256044032607, "grad_norm": 1.7172077894210815, "learning_rate": 7.584555499055355e-06, "loss": 0.16147232055664062, "step": 2693 }, { "epoch": 0.37539190413154044, "grad_norm": 1.2928659915924072, "learning_rate": 7.58253771306546e-06, "loss": 0.14461135864257812, "step": 2694 }, { "epoch": 0.3755312478227548, "grad_norm": 1.0937639474868774, "learning_rate": 7.5805193532946445e-06, "loss": 0.13054275512695312, "step": 2695 }, { "epoch": 0.3756705915139692, "grad_norm": 1.161756992340088, "learning_rate": 7.578500420191344e-06, "loss": 0.17987442016601562, "step": 2696 }, { "epoch": 0.3758099352051836, "grad_norm": 0.6002914309501648, "learning_rate": 7.576480914204118e-06, "loss": 0.12420654296875, "step": 2697 }, { "epoch": 0.37594927889639795, "grad_norm": 1.301962971687317, "learning_rate": 7.574460835781654e-06, "loss": 0.15591049194335938, "step": 2698 }, { "epoch": 0.37608862258761233, "grad_norm": 1.2915793657302856, "learning_rate": 7.572440185372769e-06, "loss": 0.13260650634765625, "step": 2699 }, { "epoch": 0.3762279662788267, "grad_norm": 0.7483757734298706, "learning_rate": 7.570418963426405e-06, "loss": 0.13671112060546875, "step": 2700 }, { "epoch": 0.3763673099700411, "grad_norm": 1.2376264333724976, "learning_rate": 7.568397170391631e-06, "loss": 0.22204971313476562, "step": 2701 }, { "epoch": 0.37650665366125546, "grad_norm": 1.0388776063919067, "learning_rate": 7.566374806717642e-06, "loss": 0.1367645263671875, "step": 2702 }, { "epoch": 0.37664599735246984, "grad_norm": 1.2823420763015747, "learning_rate": 7.564351872853763e-06, "loss": 0.16793441772460938, "step": 2703 }, { "epoch": 0.3767853410436843, "grad_norm": 0.9043310880661011, "learning_rate": 7.562328369249443e-06, "loss": 0.1386871337890625, "step": 2704 }, { "epoch": 0.37692468473489865, "grad_norm": 0.7658891677856445, "learning_rate": 7.560304296354259e-06, "loss": 0.14430618286132812, "step": 2705 }, { "epoch": 0.37706402842611303, "grad_norm": 1.0420260429382324, "learning_rate": 7.5582796546179125e-06, "loss": 0.14603805541992188, "step": 2706 }, { "epoch": 0.3772033721173274, "grad_norm": 0.6833192706108093, "learning_rate": 7.556254444490232e-06, "loss": 0.14244651794433594, "step": 2707 }, { "epoch": 0.3773427158085418, "grad_norm": 1.224250078201294, "learning_rate": 7.554228666421176e-06, "loss": 0.15319061279296875, "step": 2708 }, { "epoch": 0.37748205949975616, "grad_norm": 0.581798791885376, "learning_rate": 7.552202320860823e-06, "loss": 0.1240692138671875, "step": 2709 }, { "epoch": 0.37762140319097054, "grad_norm": 2.4623019695281982, "learning_rate": 7.550175408259383e-06, "loss": 0.16458892822265625, "step": 2710 }, { "epoch": 0.3777607468821849, "grad_norm": 1.0136306285858154, "learning_rate": 7.548147929067189e-06, "loss": 0.13912582397460938, "step": 2711 }, { "epoch": 0.3779000905733993, "grad_norm": 1.2575981616973877, "learning_rate": 7.546119883734699e-06, "loss": 0.16315460205078125, "step": 2712 }, { "epoch": 0.37803943426461367, "grad_norm": 1.486251711845398, "learning_rate": 7.544091272712501e-06, "loss": 0.19793701171875, "step": 2713 }, { "epoch": 0.37817877795582805, "grad_norm": 1.2812442779541016, "learning_rate": 7.542062096451306e-06, "loss": 0.15639877319335938, "step": 2714 }, { "epoch": 0.3783181216470424, "grad_norm": 0.869779109954834, "learning_rate": 7.540032355401948e-06, "loss": 0.13344573974609375, "step": 2715 }, { "epoch": 0.3784574653382568, "grad_norm": 2.09721302986145, "learning_rate": 7.53800205001539e-06, "loss": 0.15362548828125, "step": 2716 }, { "epoch": 0.3785968090294712, "grad_norm": 1.0279935598373413, "learning_rate": 7.53597118074272e-06, "loss": 0.14102554321289062, "step": 2717 }, { "epoch": 0.37873615272068556, "grad_norm": 1.6606932878494263, "learning_rate": 7.5339397480351525e-06, "loss": 0.14455032348632812, "step": 2718 }, { "epoch": 0.37887549641189994, "grad_norm": 0.9696327447891235, "learning_rate": 7.531907752344023e-06, "loss": 0.17405319213867188, "step": 2719 }, { "epoch": 0.3790148401031143, "grad_norm": 1.712658405303955, "learning_rate": 7.529875194120795e-06, "loss": 0.18762969970703125, "step": 2720 }, { "epoch": 0.3791541837943287, "grad_norm": 1.6845141649246216, "learning_rate": 7.527842073817056e-06, "loss": 0.14075469970703125, "step": 2721 }, { "epoch": 0.37929352748554307, "grad_norm": 0.884860098361969, "learning_rate": 7.525808391884521e-06, "loss": 0.12349319458007812, "step": 2722 }, { "epoch": 0.37943287117675745, "grad_norm": 0.8962256908416748, "learning_rate": 7.523774148775027e-06, "loss": 0.127471923828125, "step": 2723 }, { "epoch": 0.3795722148679719, "grad_norm": 1.0623600482940674, "learning_rate": 7.521739344940535e-06, "loss": 0.11751174926757812, "step": 2724 }, { "epoch": 0.37971155855918626, "grad_norm": 0.6696377992630005, "learning_rate": 7.519703980833133e-06, "loss": 0.12967300415039062, "step": 2725 }, { "epoch": 0.37985090225040063, "grad_norm": 1.0304819345474243, "learning_rate": 7.517668056905033e-06, "loss": 0.15585899353027344, "step": 2726 }, { "epoch": 0.379990245941615, "grad_norm": 1.11080002784729, "learning_rate": 7.515631573608568e-06, "loss": 0.1592254638671875, "step": 2727 }, { "epoch": 0.3801295896328294, "grad_norm": 0.8499696850776672, "learning_rate": 7.513594531396202e-06, "loss": 0.13433837890625, "step": 2728 }, { "epoch": 0.38026893332404377, "grad_norm": 2.529658317565918, "learning_rate": 7.511556930720517e-06, "loss": 0.1603679656982422, "step": 2729 }, { "epoch": 0.38040827701525814, "grad_norm": 0.7229617834091187, "learning_rate": 7.5095187720342224e-06, "loss": 0.12060165405273438, "step": 2730 }, { "epoch": 0.3805476207064725, "grad_norm": 1.003859281539917, "learning_rate": 7.50748005579015e-06, "loss": 0.155487060546875, "step": 2731 }, { "epoch": 0.3806869643976869, "grad_norm": 0.7851641178131104, "learning_rate": 7.505440782441256e-06, "loss": 0.14432144165039062, "step": 2732 }, { "epoch": 0.3808263080889013, "grad_norm": 0.9496159553527832, "learning_rate": 7.503400952440618e-06, "loss": 0.1661224365234375, "step": 2733 }, { "epoch": 0.38096565178011566, "grad_norm": 1.2398790121078491, "learning_rate": 7.501360566241444e-06, "loss": 0.15286636352539062, "step": 2734 }, { "epoch": 0.38110499547133003, "grad_norm": 0.9277260303497314, "learning_rate": 7.499319624297059e-06, "loss": 0.14378738403320312, "step": 2735 }, { "epoch": 0.3812443391625444, "grad_norm": 0.7796008586883545, "learning_rate": 7.497278127060914e-06, "loss": 0.12188339233398438, "step": 2736 }, { "epoch": 0.3813836828537588, "grad_norm": 1.5983091592788696, "learning_rate": 7.4952360749865825e-06, "loss": 0.17311668395996094, "step": 2737 }, { "epoch": 0.38152302654497317, "grad_norm": 2.6270341873168945, "learning_rate": 7.493193468527764e-06, "loss": 0.16482925415039062, "step": 2738 }, { "epoch": 0.38166237023618754, "grad_norm": 1.1613177061080933, "learning_rate": 7.491150308138275e-06, "loss": 0.16627120971679688, "step": 2739 }, { "epoch": 0.3818017139274019, "grad_norm": 1.4090747833251953, "learning_rate": 7.489106594272063e-06, "loss": 0.17964553833007812, "step": 2740 }, { "epoch": 0.3819410576186163, "grad_norm": 0.9229554533958435, "learning_rate": 7.487062327383192e-06, "loss": 0.1206817626953125, "step": 2741 }, { "epoch": 0.3820804013098307, "grad_norm": 1.0637017488479614, "learning_rate": 7.485017507925853e-06, "loss": 0.16040420532226562, "step": 2742 }, { "epoch": 0.38221974500104505, "grad_norm": 0.8770521283149719, "learning_rate": 7.482972136354359e-06, "loss": 0.14191818237304688, "step": 2743 }, { "epoch": 0.3823590886922595, "grad_norm": 0.9848634004592896, "learning_rate": 7.480926213123142e-06, "loss": 0.1640167236328125, "step": 2744 }, { "epoch": 0.38249843238347386, "grad_norm": 0.479636549949646, "learning_rate": 7.4788797386867596e-06, "loss": 0.11829376220703125, "step": 2745 }, { "epoch": 0.38263777607468824, "grad_norm": 1.0757488012313843, "learning_rate": 7.476832713499896e-06, "loss": 0.15427398681640625, "step": 2746 }, { "epoch": 0.3827771197659026, "grad_norm": 0.7209101915359497, "learning_rate": 7.474785138017349e-06, "loss": 0.13729476928710938, "step": 2747 }, { "epoch": 0.382916463457117, "grad_norm": 1.505640983581543, "learning_rate": 7.472737012694045e-06, "loss": 0.1876983642578125, "step": 2748 }, { "epoch": 0.3830558071483314, "grad_norm": 0.8664607405662537, "learning_rate": 7.470688337985029e-06, "loss": 0.1350860595703125, "step": 2749 }, { "epoch": 0.38319515083954575, "grad_norm": 0.6297271847724915, "learning_rate": 7.468639114345473e-06, "loss": 0.14698028564453125, "step": 2750 }, { "epoch": 0.38333449453076013, "grad_norm": 1.016725778579712, "learning_rate": 7.466589342230664e-06, "loss": 0.17340087890625, "step": 2751 }, { "epoch": 0.3834738382219745, "grad_norm": 0.6317765116691589, "learning_rate": 7.464539022096018e-06, "loss": 0.1145477294921875, "step": 2752 }, { "epoch": 0.3836131819131889, "grad_norm": 3.0442018508911133, "learning_rate": 7.462488154397067e-06, "loss": 0.1826171875, "step": 2753 }, { "epoch": 0.38375252560440326, "grad_norm": 1.4908701181411743, "learning_rate": 7.460436739589467e-06, "loss": 0.19316864013671875, "step": 2754 }, { "epoch": 0.38389186929561764, "grad_norm": 0.6998999118804932, "learning_rate": 7.458384778128997e-06, "loss": 0.12348556518554688, "step": 2755 }, { "epoch": 0.384031212986832, "grad_norm": 1.031733512878418, "learning_rate": 7.4563322704715556e-06, "loss": 0.18272018432617188, "step": 2756 }, { "epoch": 0.3841705566780464, "grad_norm": 0.7329317331314087, "learning_rate": 7.45427921707316e-06, "loss": 0.13040924072265625, "step": 2757 }, { "epoch": 0.38430990036926077, "grad_norm": 1.2393395900726318, "learning_rate": 7.452225618389959e-06, "loss": 0.18480682373046875, "step": 2758 }, { "epoch": 0.38444924406047515, "grad_norm": 0.6268706321716309, "learning_rate": 7.450171474878207e-06, "loss": 0.13272857666015625, "step": 2759 }, { "epoch": 0.3845885877516895, "grad_norm": 0.7700570225715637, "learning_rate": 7.4481167869942934e-06, "loss": 0.16393280029296875, "step": 2760 }, { "epoch": 0.3847279314429039, "grad_norm": 1.8658636808395386, "learning_rate": 7.446061555194721e-06, "loss": 0.1238250732421875, "step": 2761 }, { "epoch": 0.3848672751341183, "grad_norm": 1.273795247077942, "learning_rate": 7.4440057799361155e-06, "loss": 0.11950111389160156, "step": 2762 }, { "epoch": 0.38500661882533266, "grad_norm": 1.2550972700119019, "learning_rate": 7.441949461675223e-06, "loss": 0.131317138671875, "step": 2763 }, { "epoch": 0.38514596251654704, "grad_norm": 1.523703694343567, "learning_rate": 7.439892600868911e-06, "loss": 0.18433189392089844, "step": 2764 }, { "epoch": 0.38528530620776147, "grad_norm": 0.9775182008743286, "learning_rate": 7.437835197974167e-06, "loss": 0.13836669921875, "step": 2765 }, { "epoch": 0.38542464989897585, "grad_norm": 1.661749243736267, "learning_rate": 7.435777253448099e-06, "loss": 0.1746673583984375, "step": 2766 }, { "epoch": 0.3855639935901902, "grad_norm": 1.1527855396270752, "learning_rate": 7.433718767747934e-06, "loss": 0.13874435424804688, "step": 2767 }, { "epoch": 0.3857033372814046, "grad_norm": 0.844602644443512, "learning_rate": 7.431659741331022e-06, "loss": 0.11573410034179688, "step": 2768 }, { "epoch": 0.385842680972619, "grad_norm": 1.1855924129486084, "learning_rate": 7.429600174654832e-06, "loss": 0.15438461303710938, "step": 2769 }, { "epoch": 0.38598202466383336, "grad_norm": 1.118921160697937, "learning_rate": 7.427540068176951e-06, "loss": 0.12624359130859375, "step": 2770 }, { "epoch": 0.38612136835504773, "grad_norm": 1.2382222414016724, "learning_rate": 7.4254794223550885e-06, "loss": 0.15534210205078125, "step": 2771 }, { "epoch": 0.3862607120462621, "grad_norm": 1.1189390420913696, "learning_rate": 7.423418237647073e-06, "loss": 0.168731689453125, "step": 2772 }, { "epoch": 0.3864000557374765, "grad_norm": 0.7287687659263611, "learning_rate": 7.421356514510853e-06, "loss": 0.12298583984375, "step": 2773 }, { "epoch": 0.38653939942869087, "grad_norm": 0.8996638655662537, "learning_rate": 7.419294253404497e-06, "loss": 0.12872695922851562, "step": 2774 }, { "epoch": 0.38667874311990524, "grad_norm": 1.0522063970565796, "learning_rate": 7.417231454786189e-06, "loss": 0.14324951171875, "step": 2775 }, { "epoch": 0.3868180868111196, "grad_norm": 0.843388557434082, "learning_rate": 7.41516811911424e-06, "loss": 0.15643310546875, "step": 2776 }, { "epoch": 0.386957430502334, "grad_norm": 1.2585543394088745, "learning_rate": 7.4131042468470725e-06, "loss": 0.163482666015625, "step": 2777 }, { "epoch": 0.3870967741935484, "grad_norm": 0.7542191743850708, "learning_rate": 7.411039838443234e-06, "loss": 0.1354217529296875, "step": 2778 }, { "epoch": 0.38723611788476275, "grad_norm": 0.9935397505760193, "learning_rate": 7.4089748943613895e-06, "loss": 0.12912368774414062, "step": 2779 }, { "epoch": 0.38737546157597713, "grad_norm": 1.1198320388793945, "learning_rate": 7.406909415060321e-06, "loss": 0.17364501953125, "step": 2780 }, { "epoch": 0.3875148052671915, "grad_norm": 0.9378870725631714, "learning_rate": 7.404843400998931e-06, "loss": 0.1601104736328125, "step": 2781 }, { "epoch": 0.3876541489584059, "grad_norm": 1.2636281251907349, "learning_rate": 7.4027768526362395e-06, "loss": 0.15167999267578125, "step": 2782 }, { "epoch": 0.38779349264962026, "grad_norm": 1.03976309299469, "learning_rate": 7.4007097704313894e-06, "loss": 0.15511703491210938, "step": 2783 }, { "epoch": 0.38793283634083464, "grad_norm": 2.2084059715270996, "learning_rate": 7.398642154843637e-06, "loss": 0.17998123168945312, "step": 2784 }, { "epoch": 0.3880721800320491, "grad_norm": 1.6220686435699463, "learning_rate": 7.39657400633236e-06, "loss": 0.11775588989257812, "step": 2785 }, { "epoch": 0.38821152372326345, "grad_norm": 1.096657633781433, "learning_rate": 7.394505325357053e-06, "loss": 0.116455078125, "step": 2786 }, { "epoch": 0.38835086741447783, "grad_norm": 0.760381281375885, "learning_rate": 7.392436112377331e-06, "loss": 0.12982559204101562, "step": 2787 }, { "epoch": 0.3884902111056922, "grad_norm": 1.1611078977584839, "learning_rate": 7.390366367852923e-06, "loss": 0.14955902099609375, "step": 2788 }, { "epoch": 0.3886295547969066, "grad_norm": 0.7152012586593628, "learning_rate": 7.388296092243683e-06, "loss": 0.12039756774902344, "step": 2789 }, { "epoch": 0.38876889848812096, "grad_norm": 0.6865350008010864, "learning_rate": 7.386225286009576e-06, "loss": 0.11343002319335938, "step": 2790 }, { "epoch": 0.38890824217933534, "grad_norm": 0.9658218622207642, "learning_rate": 7.384153949610689e-06, "loss": 0.12818145751953125, "step": 2791 }, { "epoch": 0.3890475858705497, "grad_norm": 0.8508884906768799, "learning_rate": 7.382082083507226e-06, "loss": 0.12309646606445312, "step": 2792 }, { "epoch": 0.3891869295617641, "grad_norm": 1.2129508256912231, "learning_rate": 7.380009688159507e-06, "loss": 0.14828872680664062, "step": 2793 }, { "epoch": 0.38932627325297847, "grad_norm": 1.0121066570281982, "learning_rate": 7.377936764027973e-06, "loss": 0.16159439086914062, "step": 2794 }, { "epoch": 0.38946561694419285, "grad_norm": 0.9924467206001282, "learning_rate": 7.375863311573179e-06, "loss": 0.13557052612304688, "step": 2795 }, { "epoch": 0.3896049606354072, "grad_norm": 1.3456629514694214, "learning_rate": 7.373789331255799e-06, "loss": 0.17431640625, "step": 2796 }, { "epoch": 0.3897443043266216, "grad_norm": 1.8099684715270996, "learning_rate": 7.371714823536624e-06, "loss": 0.18040847778320312, "step": 2797 }, { "epoch": 0.389883648017836, "grad_norm": 1.1690479516983032, "learning_rate": 7.369639788876561e-06, "loss": 0.15601730346679688, "step": 2798 }, { "epoch": 0.39002299170905036, "grad_norm": 1.2558866739273071, "learning_rate": 7.367564227736639e-06, "loss": 0.1613006591796875, "step": 2799 }, { "epoch": 0.39016233540026474, "grad_norm": 1.23955500125885, "learning_rate": 7.365488140577997e-06, "loss": 0.1365184783935547, "step": 2800 }, { "epoch": 0.3903016790914791, "grad_norm": 1.915033221244812, "learning_rate": 7.3634115278618955e-06, "loss": 0.22084426879882812, "step": 2801 }, { "epoch": 0.3904410227826935, "grad_norm": 1.1778533458709717, "learning_rate": 7.36133439004971e-06, "loss": 0.1496429443359375, "step": 2802 }, { "epoch": 0.39058036647390787, "grad_norm": 1.2191717624664307, "learning_rate": 7.3592567276029336e-06, "loss": 0.15880966186523438, "step": 2803 }, { "epoch": 0.39071971016512225, "grad_norm": 0.5683438777923584, "learning_rate": 7.357178540983174e-06, "loss": 0.111083984375, "step": 2804 }, { "epoch": 0.3908590538563367, "grad_norm": 0.8696767687797546, "learning_rate": 7.355099830652159e-06, "loss": 0.1517791748046875, "step": 2805 }, { "epoch": 0.39099839754755106, "grad_norm": 0.832183301448822, "learning_rate": 7.353020597071729e-06, "loss": 0.13463401794433594, "step": 2806 }, { "epoch": 0.39113774123876544, "grad_norm": 1.1772509813308716, "learning_rate": 7.350940840703842e-06, "loss": 0.16260910034179688, "step": 2807 }, { "epoch": 0.3912770849299798, "grad_norm": 0.8074333071708679, "learning_rate": 7.348860562010574e-06, "loss": 0.14615821838378906, "step": 2808 }, { "epoch": 0.3914164286211942, "grad_norm": 2.086914300918579, "learning_rate": 7.346779761454113e-06, "loss": 0.21292495727539062, "step": 2809 }, { "epoch": 0.39155577231240857, "grad_norm": 1.2872639894485474, "learning_rate": 7.3446984394967705e-06, "loss": 0.20571136474609375, "step": 2810 }, { "epoch": 0.39169511600362295, "grad_norm": 1.535856008529663, "learning_rate": 7.342616596600961e-06, "loss": 0.15822410583496094, "step": 2811 }, { "epoch": 0.3918344596948373, "grad_norm": 1.5324100255966187, "learning_rate": 7.3405342332292286e-06, "loss": 0.13473129272460938, "step": 2812 }, { "epoch": 0.3919738033860517, "grad_norm": 1.4126255512237549, "learning_rate": 7.338451349844225e-06, "loss": 0.15972900390625, "step": 2813 }, { "epoch": 0.3921131470772661, "grad_norm": 1.8348464965820312, "learning_rate": 7.336367946908718e-06, "loss": 0.16949081420898438, "step": 2814 }, { "epoch": 0.39225249076848046, "grad_norm": 0.8096253871917725, "learning_rate": 7.334284024885595e-06, "loss": 0.14481735229492188, "step": 2815 }, { "epoch": 0.39239183445969483, "grad_norm": 2.0226380825042725, "learning_rate": 7.332199584237854e-06, "loss": 0.15293121337890625, "step": 2816 }, { "epoch": 0.3925311781509092, "grad_norm": 1.3351781368255615, "learning_rate": 7.330114625428609e-06, "loss": 0.15024185180664062, "step": 2817 }, { "epoch": 0.3926705218421236, "grad_norm": 0.8095303177833557, "learning_rate": 7.328029148921093e-06, "loss": 0.11898422241210938, "step": 2818 }, { "epoch": 0.39280986553333797, "grad_norm": 0.6157229542732239, "learning_rate": 7.32594315517865e-06, "loss": 0.12856292724609375, "step": 2819 }, { "epoch": 0.39294920922455234, "grad_norm": 1.273870825767517, "learning_rate": 7.32385664466474e-06, "loss": 0.16009140014648438, "step": 2820 }, { "epoch": 0.3930885529157667, "grad_norm": 0.773266613483429, "learning_rate": 7.321769617842937e-06, "loss": 0.12579727172851562, "step": 2821 }, { "epoch": 0.3932278966069811, "grad_norm": 1.3160874843597412, "learning_rate": 7.319682075176932e-06, "loss": 0.18662261962890625, "step": 2822 }, { "epoch": 0.3933672402981955, "grad_norm": 0.824358344078064, "learning_rate": 7.317594017130529e-06, "loss": 0.16192626953125, "step": 2823 }, { "epoch": 0.39350658398940985, "grad_norm": 0.7695131301879883, "learning_rate": 7.3155054441676485e-06, "loss": 0.1319255828857422, "step": 2824 }, { "epoch": 0.3936459276806243, "grad_norm": 1.225743055343628, "learning_rate": 7.313416356752321e-06, "loss": 0.13926315307617188, "step": 2825 }, { "epoch": 0.39378527137183866, "grad_norm": 1.1745402812957764, "learning_rate": 7.311326755348697e-06, "loss": 0.20128250122070312, "step": 2826 }, { "epoch": 0.39392461506305304, "grad_norm": 1.3758103847503662, "learning_rate": 7.309236640421033e-06, "loss": 0.18948936462402344, "step": 2827 }, { "epoch": 0.3940639587542674, "grad_norm": 1.1522164344787598, "learning_rate": 7.30714601243371e-06, "loss": 0.1769256591796875, "step": 2828 }, { "epoch": 0.3942033024454818, "grad_norm": 1.9919404983520508, "learning_rate": 7.305054871851217e-06, "loss": 0.18000030517578125, "step": 2829 }, { "epoch": 0.3943426461366962, "grad_norm": 1.1907899379730225, "learning_rate": 7.302963219138156e-06, "loss": 0.14812660217285156, "step": 2830 }, { "epoch": 0.39448198982791055, "grad_norm": 2.138505458831787, "learning_rate": 7.3008710547592465e-06, "loss": 0.188323974609375, "step": 2831 }, { "epoch": 0.39462133351912493, "grad_norm": 0.7010085582733154, "learning_rate": 7.298778379179317e-06, "loss": 0.14800643920898438, "step": 2832 }, { "epoch": 0.3947606772103393, "grad_norm": 1.109709620475769, "learning_rate": 7.296685192863313e-06, "loss": 0.141815185546875, "step": 2833 }, { "epoch": 0.3949000209015537, "grad_norm": 1.587319254875183, "learning_rate": 7.2945914962762954e-06, "loss": 0.15088653564453125, "step": 2834 }, { "epoch": 0.39503936459276806, "grad_norm": 1.0647618770599365, "learning_rate": 7.292497289883432e-06, "loss": 0.17282867431640625, "step": 2835 }, { "epoch": 0.39517870828398244, "grad_norm": 1.7181671857833862, "learning_rate": 7.29040257415001e-06, "loss": 0.13947296142578125, "step": 2836 }, { "epoch": 0.3953180519751968, "grad_norm": 1.3011772632598877, "learning_rate": 7.288307349541427e-06, "loss": 0.1307392120361328, "step": 2837 }, { "epoch": 0.3954573956664112, "grad_norm": 0.7898372411727905, "learning_rate": 7.286211616523193e-06, "loss": 0.12079238891601562, "step": 2838 }, { "epoch": 0.39559673935762557, "grad_norm": 1.8885349035263062, "learning_rate": 7.284115375560934e-06, "loss": 0.17353439331054688, "step": 2839 }, { "epoch": 0.39573608304883995, "grad_norm": 0.7717078328132629, "learning_rate": 7.282018627120386e-06, "loss": 0.12247467041015625, "step": 2840 }, { "epoch": 0.3958754267400543, "grad_norm": 0.8468783497810364, "learning_rate": 7.279921371667397e-06, "loss": 0.11977005004882812, "step": 2841 }, { "epoch": 0.3960147704312687, "grad_norm": 1.0317710638046265, "learning_rate": 7.2778236096679325e-06, "loss": 0.13605880737304688, "step": 2842 }, { "epoch": 0.3961541141224831, "grad_norm": 1.6422861814498901, "learning_rate": 7.275725341588064e-06, "loss": 0.15536880493164062, "step": 2843 }, { "epoch": 0.39629345781369746, "grad_norm": 0.7106417417526245, "learning_rate": 7.27362656789398e-06, "loss": 0.11214828491210938, "step": 2844 }, { "epoch": 0.3964328015049119, "grad_norm": 1.0349743366241455, "learning_rate": 7.2715272890519815e-06, "loss": 0.13871002197265625, "step": 2845 }, { "epoch": 0.39657214519612627, "grad_norm": 1.534546136856079, "learning_rate": 7.2694275055284795e-06, "loss": 0.17367172241210938, "step": 2846 }, { "epoch": 0.39671148888734065, "grad_norm": 1.340836763381958, "learning_rate": 7.267327217789998e-06, "loss": 0.16299819946289062, "step": 2847 }, { "epoch": 0.396850832578555, "grad_norm": 0.9463247656822205, "learning_rate": 7.26522642630317e-06, "loss": 0.15458297729492188, "step": 2848 }, { "epoch": 0.3969901762697694, "grad_norm": 0.7466742396354675, "learning_rate": 7.263125131534749e-06, "loss": 0.1338634490966797, "step": 2849 }, { "epoch": 0.3971295199609838, "grad_norm": 0.7494770288467407, "learning_rate": 7.26102333395159e-06, "loss": 0.13100814819335938, "step": 2850 }, { "epoch": 0.39726886365219816, "grad_norm": 1.3256380558013916, "learning_rate": 7.2589210340206675e-06, "loss": 0.13070297241210938, "step": 2851 }, { "epoch": 0.39740820734341253, "grad_norm": 1.086499571800232, "learning_rate": 7.256818232209062e-06, "loss": 0.14869308471679688, "step": 2852 }, { "epoch": 0.3975475510346269, "grad_norm": 0.837826132774353, "learning_rate": 7.25471492898397e-06, "loss": 0.1509857177734375, "step": 2853 }, { "epoch": 0.3976868947258413, "grad_norm": 1.1130895614624023, "learning_rate": 7.2526111248126976e-06, "loss": 0.18560409545898438, "step": 2854 }, { "epoch": 0.39782623841705567, "grad_norm": 0.8911879062652588, "learning_rate": 7.250506820162661e-06, "loss": 0.15163040161132812, "step": 2855 }, { "epoch": 0.39796558210827004, "grad_norm": 0.7433512806892395, "learning_rate": 7.248402015501388e-06, "loss": 0.14432907104492188, "step": 2856 }, { "epoch": 0.3981049257994844, "grad_norm": 0.8771493434906006, "learning_rate": 7.246296711296519e-06, "loss": 0.1302490234375, "step": 2857 }, { "epoch": 0.3982442694906988, "grad_norm": 1.2122342586517334, "learning_rate": 7.244190908015805e-06, "loss": 0.13119125366210938, "step": 2858 }, { "epoch": 0.3983836131819132, "grad_norm": 1.6239982843399048, "learning_rate": 7.2420846061271065e-06, "loss": 0.17010498046875, "step": 2859 }, { "epoch": 0.39852295687312755, "grad_norm": 1.096716046333313, "learning_rate": 7.239977806098398e-06, "loss": 0.11954498291015625, "step": 2860 }, { "epoch": 0.39866230056434193, "grad_norm": 0.8122132420539856, "learning_rate": 7.237870508397757e-06, "loss": 0.1449432373046875, "step": 2861 }, { "epoch": 0.3988016442555563, "grad_norm": 1.0104838609695435, "learning_rate": 7.235762713493384e-06, "loss": 0.13268280029296875, "step": 2862 }, { "epoch": 0.3989409879467707, "grad_norm": 0.980343222618103, "learning_rate": 7.2336544218535776e-06, "loss": 0.15072250366210938, "step": 2863 }, { "epoch": 0.39908033163798506, "grad_norm": 1.0156036615371704, "learning_rate": 7.231545633946755e-06, "loss": 0.14604949951171875, "step": 2864 }, { "epoch": 0.3992196753291995, "grad_norm": 1.3162637948989868, "learning_rate": 7.229436350241439e-06, "loss": 0.16104507446289062, "step": 2865 }, { "epoch": 0.3993590190204139, "grad_norm": 0.8684198260307312, "learning_rate": 7.2273265712062646e-06, "loss": 0.11748886108398438, "step": 2866 }, { "epoch": 0.39949836271162825, "grad_norm": 0.7915223836898804, "learning_rate": 7.225216297309977e-06, "loss": 0.13582992553710938, "step": 2867 }, { "epoch": 0.39963770640284263, "grad_norm": 1.049399495124817, "learning_rate": 7.22310552902143e-06, "loss": 0.13648605346679688, "step": 2868 }, { "epoch": 0.399777050094057, "grad_norm": 0.6616933941841125, "learning_rate": 7.220994266809591e-06, "loss": 0.12240409851074219, "step": 2869 }, { "epoch": 0.3999163937852714, "grad_norm": 0.8340529203414917, "learning_rate": 7.21888251114353e-06, "loss": 0.13732528686523438, "step": 2870 }, { "epoch": 0.40005573747648576, "grad_norm": 1.088769555091858, "learning_rate": 7.2167702624924345e-06, "loss": 0.15545654296875, "step": 2871 }, { "epoch": 0.40019508116770014, "grad_norm": 1.6324158906936646, "learning_rate": 7.2146575213255945e-06, "loss": 0.17935943603515625, "step": 2872 }, { "epoch": 0.4003344248589145, "grad_norm": 0.6942306756973267, "learning_rate": 7.212544288112415e-06, "loss": 0.1252899169921875, "step": 2873 }, { "epoch": 0.4004737685501289, "grad_norm": 0.6130203604698181, "learning_rate": 7.21043056332241e-06, "loss": 0.1072235107421875, "step": 2874 }, { "epoch": 0.40061311224134327, "grad_norm": 2.218186378479004, "learning_rate": 7.208316347425197e-06, "loss": 0.18365097045898438, "step": 2875 }, { "epoch": 0.40075245593255765, "grad_norm": 1.3829782009124756, "learning_rate": 7.206201640890509e-06, "loss": 0.1699371337890625, "step": 2876 }, { "epoch": 0.400891799623772, "grad_norm": 1.2144603729248047, "learning_rate": 7.204086444188184e-06, "loss": 0.14653778076171875, "step": 2877 }, { "epoch": 0.4010311433149864, "grad_norm": 1.025233507156372, "learning_rate": 7.201970757788172e-06, "loss": 0.12840652465820312, "step": 2878 }, { "epoch": 0.4011704870062008, "grad_norm": 1.3493320941925049, "learning_rate": 7.199854582160529e-06, "loss": 0.14854049682617188, "step": 2879 }, { "epoch": 0.40130983069741516, "grad_norm": 1.1800957918167114, "learning_rate": 7.197737917775422e-06, "loss": 0.1517791748046875, "step": 2880 }, { "epoch": 0.40144917438862954, "grad_norm": 1.3587335348129272, "learning_rate": 7.1956207651031254e-06, "loss": 0.17061233520507812, "step": 2881 }, { "epoch": 0.4015885180798439, "grad_norm": 0.8954737186431885, "learning_rate": 7.193503124614021e-06, "loss": 0.17142105102539062, "step": 2882 }, { "epoch": 0.4017278617710583, "grad_norm": 1.035774827003479, "learning_rate": 7.191384996778601e-06, "loss": 0.14861679077148438, "step": 2883 }, { "epoch": 0.40186720546227267, "grad_norm": 1.1347475051879883, "learning_rate": 7.189266382067464e-06, "loss": 0.15088272094726562, "step": 2884 }, { "epoch": 0.4020065491534871, "grad_norm": 0.7848290205001831, "learning_rate": 7.1871472809513185e-06, "loss": 0.12401962280273438, "step": 2885 }, { "epoch": 0.4021458928447015, "grad_norm": 1.5565663576126099, "learning_rate": 7.185027693900982e-06, "loss": 0.15962600708007812, "step": 2886 }, { "epoch": 0.40228523653591586, "grad_norm": 1.0283820629119873, "learning_rate": 7.182907621387376e-06, "loss": 0.16282272338867188, "step": 2887 }, { "epoch": 0.40242458022713024, "grad_norm": 0.47646573185920715, "learning_rate": 7.180787063881534e-06, "loss": 0.109100341796875, "step": 2888 }, { "epoch": 0.4025639239183446, "grad_norm": 1.015351414680481, "learning_rate": 7.178666021854593e-06, "loss": 0.14061355590820312, "step": 2889 }, { "epoch": 0.402703267609559, "grad_norm": 0.6768199801445007, "learning_rate": 7.176544495777804e-06, "loss": 0.1273365020751953, "step": 2890 }, { "epoch": 0.40284261130077337, "grad_norm": 1.5387827157974243, "learning_rate": 7.174422486122517e-06, "loss": 0.1434326171875, "step": 2891 }, { "epoch": 0.40298195499198775, "grad_norm": 0.9457686543464661, "learning_rate": 7.1722999933602e-06, "loss": 0.12959861755371094, "step": 2892 }, { "epoch": 0.4031212986832021, "grad_norm": 1.6586017608642578, "learning_rate": 7.170177017962415e-06, "loss": 0.16646194458007812, "step": 2893 }, { "epoch": 0.4032606423744165, "grad_norm": 0.9724928140640259, "learning_rate": 7.168053560400845e-06, "loss": 0.13738250732421875, "step": 2894 }, { "epoch": 0.4033999860656309, "grad_norm": 1.359948992729187, "learning_rate": 7.16592962114727e-06, "loss": 0.1975555419921875, "step": 2895 }, { "epoch": 0.40353932975684526, "grad_norm": 0.9932147860527039, "learning_rate": 7.163805200673584e-06, "loss": 0.15630722045898438, "step": 2896 }, { "epoch": 0.40367867344805963, "grad_norm": 1.3172552585601807, "learning_rate": 7.161680299451782e-06, "loss": 0.18577194213867188, "step": 2897 }, { "epoch": 0.403818017139274, "grad_norm": 1.4221051931381226, "learning_rate": 7.159554917953968e-06, "loss": 0.12181472778320312, "step": 2898 }, { "epoch": 0.4039573608304884, "grad_norm": 1.1094651222229004, "learning_rate": 7.157429056652357e-06, "loss": 0.17737579345703125, "step": 2899 }, { "epoch": 0.40409670452170277, "grad_norm": 1.1352607011795044, "learning_rate": 7.155302716019263e-06, "loss": 0.10690689086914062, "step": 2900 }, { "epoch": 0.40423604821291714, "grad_norm": 1.8597396612167358, "learning_rate": 7.153175896527112e-06, "loss": 0.14812469482421875, "step": 2901 }, { "epoch": 0.4043753919041315, "grad_norm": 1.1509099006652832, "learning_rate": 7.151048598648436e-06, "loss": 0.11033821105957031, "step": 2902 }, { "epoch": 0.4045147355953459, "grad_norm": 0.6724464297294617, "learning_rate": 7.148920822855869e-06, "loss": 0.13380813598632812, "step": 2903 }, { "epoch": 0.4046540792865603, "grad_norm": 0.8540085554122925, "learning_rate": 7.146792569622157e-06, "loss": 0.1610870361328125, "step": 2904 }, { "epoch": 0.4047934229777747, "grad_norm": 0.8161090612411499, "learning_rate": 7.144663839420147e-06, "loss": 0.16601181030273438, "step": 2905 }, { "epoch": 0.4049327666689891, "grad_norm": 0.8630205392837524, "learning_rate": 7.142534632722797e-06, "loss": 0.13640975952148438, "step": 2906 }, { "epoch": 0.40507211036020346, "grad_norm": 1.1293656826019287, "learning_rate": 7.140404950003164e-06, "loss": 0.15340805053710938, "step": 2907 }, { "epoch": 0.40521145405141784, "grad_norm": 1.3989681005477905, "learning_rate": 7.138274791734421e-06, "loss": 0.16937255859375, "step": 2908 }, { "epoch": 0.4053507977426322, "grad_norm": 1.1691193580627441, "learning_rate": 7.136144158389834e-06, "loss": 0.13869094848632812, "step": 2909 }, { "epoch": 0.4054901414338466, "grad_norm": 1.7292951345443726, "learning_rate": 7.134013050442785e-06, "loss": 0.13205337524414062, "step": 2910 }, { "epoch": 0.405629485125061, "grad_norm": 1.432829737663269, "learning_rate": 7.1318814683667555e-06, "loss": 0.19389724731445312, "step": 2911 }, { "epoch": 0.40576882881627535, "grad_norm": 0.6993548274040222, "learning_rate": 7.129749412635337e-06, "loss": 0.1210174560546875, "step": 2912 }, { "epoch": 0.40590817250748973, "grad_norm": 0.7410462498664856, "learning_rate": 7.1276168837222215e-06, "loss": 0.12318801879882812, "step": 2913 }, { "epoch": 0.4060475161987041, "grad_norm": 1.1024768352508545, "learning_rate": 7.125483882101208e-06, "loss": 0.14825820922851562, "step": 2914 }, { "epoch": 0.4061868598899185, "grad_norm": 1.0105444192886353, "learning_rate": 7.123350408246203e-06, "loss": 0.139434814453125, "step": 2915 }, { "epoch": 0.40632620358113286, "grad_norm": 1.1317286491394043, "learning_rate": 7.121216462631213e-06, "loss": 0.14653396606445312, "step": 2916 }, { "epoch": 0.40646554727234724, "grad_norm": 0.45743149518966675, "learning_rate": 7.1190820457303535e-06, "loss": 0.09906005859375, "step": 2917 }, { "epoch": 0.4066048909635616, "grad_norm": 0.8541709184646606, "learning_rate": 7.116947158017842e-06, "loss": 0.1723480224609375, "step": 2918 }, { "epoch": 0.406744234654776, "grad_norm": 1.169371485710144, "learning_rate": 7.114811799968005e-06, "loss": 0.15962600708007812, "step": 2919 }, { "epoch": 0.40688357834599037, "grad_norm": 0.945003867149353, "learning_rate": 7.1126759720552665e-06, "loss": 0.14162445068359375, "step": 2920 }, { "epoch": 0.40702292203720475, "grad_norm": 0.9800986051559448, "learning_rate": 7.11053967475416e-06, "loss": 0.12112998962402344, "step": 2921 }, { "epoch": 0.4071622657284191, "grad_norm": 0.9325768947601318, "learning_rate": 7.108402908539323e-06, "loss": 0.14742279052734375, "step": 2922 }, { "epoch": 0.4073016094196335, "grad_norm": 0.8918681740760803, "learning_rate": 7.106265673885494e-06, "loss": 0.11899948120117188, "step": 2923 }, { "epoch": 0.4074409531108479, "grad_norm": 0.9986909627914429, "learning_rate": 7.104127971267521e-06, "loss": 0.1261444091796875, "step": 2924 }, { "epoch": 0.4075802968020623, "grad_norm": 1.105690360069275, "learning_rate": 7.10198980116035e-06, "loss": 0.13072586059570312, "step": 2925 }, { "epoch": 0.4077196404932767, "grad_norm": 1.1097536087036133, "learning_rate": 7.099851164039035e-06, "loss": 0.14923477172851562, "step": 2926 }, { "epoch": 0.40785898418449107, "grad_norm": 1.194473385810852, "learning_rate": 7.0977120603787296e-06, "loss": 0.13127708435058594, "step": 2927 }, { "epoch": 0.40799832787570545, "grad_norm": 1.9144763946533203, "learning_rate": 7.095572490654698e-06, "loss": 0.2017974853515625, "step": 2928 }, { "epoch": 0.4081376715669198, "grad_norm": 1.1481163501739502, "learning_rate": 7.0934324553423015e-06, "loss": 0.14397811889648438, "step": 2929 }, { "epoch": 0.4082770152581342, "grad_norm": 1.8678792715072632, "learning_rate": 7.091291954917007e-06, "loss": 0.16750717163085938, "step": 2930 }, { "epoch": 0.4084163589493486, "grad_norm": 1.4189671277999878, "learning_rate": 7.089150989854385e-06, "loss": 0.18194961547851562, "step": 2931 }, { "epoch": 0.40855570264056296, "grad_norm": 0.8037697672843933, "learning_rate": 7.0870095606301095e-06, "loss": 0.13674545288085938, "step": 2932 }, { "epoch": 0.40869504633177733, "grad_norm": 1.8768061399459839, "learning_rate": 7.084867667719957e-06, "loss": 0.1796722412109375, "step": 2933 }, { "epoch": 0.4088343900229917, "grad_norm": 1.3136883974075317, "learning_rate": 7.082725311599808e-06, "loss": 0.17306900024414062, "step": 2934 }, { "epoch": 0.4089737337142061, "grad_norm": 1.1845306158065796, "learning_rate": 7.080582492745642e-06, "loss": 0.12459945678710938, "step": 2935 }, { "epoch": 0.40911307740542047, "grad_norm": 1.5068968534469604, "learning_rate": 7.0784392116335475e-06, "loss": 0.1644878387451172, "step": 2936 }, { "epoch": 0.40925242109663484, "grad_norm": 1.13645601272583, "learning_rate": 7.076295468739711e-06, "loss": 0.1788787841796875, "step": 2937 }, { "epoch": 0.4093917647878492, "grad_norm": 1.6394860744476318, "learning_rate": 7.074151264540425e-06, "loss": 0.20809555053710938, "step": 2938 }, { "epoch": 0.4095311084790636, "grad_norm": 0.9304131269454956, "learning_rate": 7.0720065995120815e-06, "loss": 0.15156936645507812, "step": 2939 }, { "epoch": 0.409670452170278, "grad_norm": 1.3979344367980957, "learning_rate": 7.069861474131176e-06, "loss": 0.12285995483398438, "step": 2940 }, { "epoch": 0.40980979586149235, "grad_norm": 0.7453914284706116, "learning_rate": 7.067715888874307e-06, "loss": 0.138427734375, "step": 2941 }, { "epoch": 0.40994913955270673, "grad_norm": 0.9824515581130981, "learning_rate": 7.065569844218175e-06, "loss": 0.14014244079589844, "step": 2942 }, { "epoch": 0.4100884832439211, "grad_norm": 0.6987271904945374, "learning_rate": 7.0634233406395806e-06, "loss": 0.12274551391601562, "step": 2943 }, { "epoch": 0.4102278269351355, "grad_norm": 1.0058200359344482, "learning_rate": 7.061276378615428e-06, "loss": 0.15494918823242188, "step": 2944 }, { "epoch": 0.4103671706263499, "grad_norm": 0.9406147599220276, "learning_rate": 7.059128958622725e-06, "loss": 0.16463470458984375, "step": 2945 }, { "epoch": 0.4105065143175643, "grad_norm": 1.1782177686691284, "learning_rate": 7.056981081138578e-06, "loss": 0.17244338989257812, "step": 2946 }, { "epoch": 0.4106458580087787, "grad_norm": 1.0858007669448853, "learning_rate": 7.054832746640196e-06, "loss": 0.15594863891601562, "step": 2947 }, { "epoch": 0.41078520169999305, "grad_norm": 1.4570814371109009, "learning_rate": 7.05268395560489e-06, "loss": 0.17195892333984375, "step": 2948 }, { "epoch": 0.41092454539120743, "grad_norm": 0.7921966910362244, "learning_rate": 7.050534708510073e-06, "loss": 0.14913177490234375, "step": 2949 }, { "epoch": 0.4110638890824218, "grad_norm": 0.7909671068191528, "learning_rate": 7.048385005833258e-06, "loss": 0.116424560546875, "step": 2950 }, { "epoch": 0.4112032327736362, "grad_norm": 1.1514332294464111, "learning_rate": 7.04623484805206e-06, "loss": 0.14178085327148438, "step": 2951 }, { "epoch": 0.41134257646485056, "grad_norm": 0.7645600438117981, "learning_rate": 7.044084235644196e-06, "loss": 0.13346099853515625, "step": 2952 }, { "epoch": 0.41148192015606494, "grad_norm": 0.8380874991416931, "learning_rate": 7.041933169087482e-06, "loss": 0.1354522705078125, "step": 2953 }, { "epoch": 0.4116212638472793, "grad_norm": 1.3997883796691895, "learning_rate": 7.039781648859836e-06, "loss": 0.17472076416015625, "step": 2954 }, { "epoch": 0.4117606075384937, "grad_norm": 1.0067917108535767, "learning_rate": 7.037629675439276e-06, "loss": 0.12067604064941406, "step": 2955 }, { "epoch": 0.4118999512297081, "grad_norm": 0.9297123551368713, "learning_rate": 7.035477249303923e-06, "loss": 0.16356277465820312, "step": 2956 }, { "epoch": 0.41203929492092245, "grad_norm": 0.7896031141281128, "learning_rate": 7.033324370931993e-06, "loss": 0.12726974487304688, "step": 2957 }, { "epoch": 0.4121786386121368, "grad_norm": 0.6642199754714966, "learning_rate": 7.031171040801813e-06, "loss": 0.13860702514648438, "step": 2958 }, { "epoch": 0.4123179823033512, "grad_norm": 1.178234338760376, "learning_rate": 7.029017259391797e-06, "loss": 0.133819580078125, "step": 2959 }, { "epoch": 0.4124573259945656, "grad_norm": 1.124151349067688, "learning_rate": 7.026863027180472e-06, "loss": 0.16887664794921875, "step": 2960 }, { "epoch": 0.41259666968577996, "grad_norm": 1.8208963871002197, "learning_rate": 7.024708344646455e-06, "loss": 0.16088104248046875, "step": 2961 }, { "epoch": 0.41273601337699434, "grad_norm": 1.0805786848068237, "learning_rate": 7.022553212268469e-06, "loss": 0.15357208251953125, "step": 2962 }, { "epoch": 0.4128753570682087, "grad_norm": 0.8227380514144897, "learning_rate": 7.020397630525336e-06, "loss": 0.14687347412109375, "step": 2963 }, { "epoch": 0.4130147007594231, "grad_norm": 0.694649875164032, "learning_rate": 7.018241599895974e-06, "loss": 0.1435985565185547, "step": 2964 }, { "epoch": 0.41315404445063747, "grad_norm": 0.6407105922698975, "learning_rate": 7.016085120859406e-06, "loss": 0.14596939086914062, "step": 2965 }, { "epoch": 0.4132933881418519, "grad_norm": 0.98707515001297, "learning_rate": 7.013928193894753e-06, "loss": 0.13858795166015625, "step": 2966 }, { "epoch": 0.4134327318330663, "grad_norm": 0.7747942805290222, "learning_rate": 7.011770819481234e-06, "loss": 0.15267181396484375, "step": 2967 }, { "epoch": 0.41357207552428066, "grad_norm": 1.0949169397354126, "learning_rate": 7.0096129980981674e-06, "loss": 0.14068603515625, "step": 2968 }, { "epoch": 0.41371141921549504, "grad_norm": 0.907581090927124, "learning_rate": 7.0074547302249755e-06, "loss": 0.151458740234375, "step": 2969 }, { "epoch": 0.4138507629067094, "grad_norm": 0.6231398582458496, "learning_rate": 7.005296016341171e-06, "loss": 0.10750198364257812, "step": 2970 }, { "epoch": 0.4139901065979238, "grad_norm": 1.0448153018951416, "learning_rate": 7.003136856926374e-06, "loss": 0.13151931762695312, "step": 2971 }, { "epoch": 0.41412945028913817, "grad_norm": 1.7671732902526855, "learning_rate": 7.0009772524603e-06, "loss": 0.14021682739257812, "step": 2972 }, { "epoch": 0.41426879398035255, "grad_norm": 1.3484430313110352, "learning_rate": 6.998817203422763e-06, "loss": 0.15404510498046875, "step": 2973 }, { "epoch": 0.4144081376715669, "grad_norm": 1.2786076068878174, "learning_rate": 6.996656710293679e-06, "loss": 0.15919876098632812, "step": 2974 }, { "epoch": 0.4145474813627813, "grad_norm": 1.0279244184494019, "learning_rate": 6.994495773553056e-06, "loss": 0.14958572387695312, "step": 2975 }, { "epoch": 0.4146868250539957, "grad_norm": 0.9880893230438232, "learning_rate": 6.992334393681008e-06, "loss": 0.13668441772460938, "step": 2976 }, { "epoch": 0.41482616874521006, "grad_norm": 0.6134176850318909, "learning_rate": 6.990172571157744e-06, "loss": 0.10850143432617188, "step": 2977 }, { "epoch": 0.41496551243642443, "grad_norm": 0.8688188791275024, "learning_rate": 6.988010306463571e-06, "loss": 0.1668701171875, "step": 2978 }, { "epoch": 0.4151048561276388, "grad_norm": 1.2720142602920532, "learning_rate": 6.985847600078894e-06, "loss": 0.17913055419921875, "step": 2979 }, { "epoch": 0.4152441998188532, "grad_norm": 1.3740111589431763, "learning_rate": 6.98368445248422e-06, "loss": 0.15148162841796875, "step": 2980 }, { "epoch": 0.41538354351006757, "grad_norm": 0.8856058120727539, "learning_rate": 6.981520864160147e-06, "loss": 0.12775421142578125, "step": 2981 }, { "epoch": 0.41552288720128194, "grad_norm": 1.204453945159912, "learning_rate": 6.979356835587377e-06, "loss": 0.141754150390625, "step": 2982 }, { "epoch": 0.4156622308924963, "grad_norm": 1.147149682044983, "learning_rate": 6.977192367246709e-06, "loss": 0.1434173583984375, "step": 2983 }, { "epoch": 0.4158015745837107, "grad_norm": 0.6717551350593567, "learning_rate": 6.9750274596190344e-06, "loss": 0.13326644897460938, "step": 2984 }, { "epoch": 0.4159409182749251, "grad_norm": 1.1370980739593506, "learning_rate": 6.972862113185353e-06, "loss": 0.16159439086914062, "step": 2985 }, { "epoch": 0.4160802619661395, "grad_norm": 1.3025368452072144, "learning_rate": 6.970696328426749e-06, "loss": 0.15066146850585938, "step": 2986 }, { "epoch": 0.4162196056573539, "grad_norm": 1.228977084159851, "learning_rate": 6.968530105824413e-06, "loss": 0.14551544189453125, "step": 2987 }, { "epoch": 0.41635894934856826, "grad_norm": 0.7209714651107788, "learning_rate": 6.966363445859629e-06, "loss": 0.10952568054199219, "step": 2988 }, { "epoch": 0.41649829303978264, "grad_norm": 0.8690730333328247, "learning_rate": 6.96419634901378e-06, "loss": 0.11856460571289062, "step": 2989 }, { "epoch": 0.416637636730997, "grad_norm": 0.7010977864265442, "learning_rate": 6.962028815768347e-06, "loss": 0.1442108154296875, "step": 2990 }, { "epoch": 0.4167769804222114, "grad_norm": 0.8587853908538818, "learning_rate": 6.959860846604903e-06, "loss": 0.14003753662109375, "step": 2991 }, { "epoch": 0.4169163241134258, "grad_norm": 0.8702548742294312, "learning_rate": 6.957692442005126e-06, "loss": 0.1550312042236328, "step": 2992 }, { "epoch": 0.41705566780464015, "grad_norm": 1.4755101203918457, "learning_rate": 6.95552360245078e-06, "loss": 0.156097412109375, "step": 2993 }, { "epoch": 0.41719501149585453, "grad_norm": 0.8902637362480164, "learning_rate": 6.953354328423737e-06, "loss": 0.12131118774414062, "step": 2994 }, { "epoch": 0.4173343551870689, "grad_norm": 0.829564094543457, "learning_rate": 6.951184620405958e-06, "loss": 0.15793228149414062, "step": 2995 }, { "epoch": 0.4174736988782833, "grad_norm": 1.4542872905731201, "learning_rate": 6.949014478879502e-06, "loss": 0.17529296875, "step": 2996 }, { "epoch": 0.41761304256949766, "grad_norm": 1.5100581645965576, "learning_rate": 6.946843904326527e-06, "loss": 0.16534423828125, "step": 2997 }, { "epoch": 0.41775238626071204, "grad_norm": 1.1736959218978882, "learning_rate": 6.944672897229282e-06, "loss": 0.14499473571777344, "step": 2998 }, { "epoch": 0.4178917299519264, "grad_norm": 0.8905315399169922, "learning_rate": 6.942501458070117e-06, "loss": 0.14357376098632812, "step": 2999 }, { "epoch": 0.4180310736431408, "grad_norm": 0.6535256505012512, "learning_rate": 6.940329587331477e-06, "loss": 0.11371231079101562, "step": 3000 }, { "epoch": 0.41817041733435517, "grad_norm": 1.4501138925552368, "learning_rate": 6.938157285495901e-06, "loss": 0.12603378295898438, "step": 3001 }, { "epoch": 0.41830976102556955, "grad_norm": 0.6167706251144409, "learning_rate": 6.935984553046025e-06, "loss": 0.10597991943359375, "step": 3002 }, { "epoch": 0.4184491047167839, "grad_norm": 0.6119076013565063, "learning_rate": 6.93381139046458e-06, "loss": 0.12076187133789062, "step": 3003 }, { "epoch": 0.4185884484079983, "grad_norm": 0.5482880473136902, "learning_rate": 6.931637798234394e-06, "loss": 0.10852432250976562, "step": 3004 }, { "epoch": 0.4187277920992127, "grad_norm": 0.7660735845565796, "learning_rate": 6.929463776838389e-06, "loss": 0.14979934692382812, "step": 3005 }, { "epoch": 0.4188671357904271, "grad_norm": 0.9671830534934998, "learning_rate": 6.927289326759585e-06, "loss": 0.13266754150390625, "step": 3006 }, { "epoch": 0.4190064794816415, "grad_norm": 0.8835368156433105, "learning_rate": 6.925114448481089e-06, "loss": 0.15137863159179688, "step": 3007 }, { "epoch": 0.41914582317285587, "grad_norm": 2.0251471996307373, "learning_rate": 6.922939142486118e-06, "loss": 0.170166015625, "step": 3008 }, { "epoch": 0.41928516686407025, "grad_norm": 0.7953140735626221, "learning_rate": 6.9207634092579686e-06, "loss": 0.12932968139648438, "step": 3009 }, { "epoch": 0.4194245105552846, "grad_norm": 0.9714140295982361, "learning_rate": 6.9185872492800434e-06, "loss": 0.14867019653320312, "step": 3010 }, { "epoch": 0.419563854246499, "grad_norm": 0.9224780797958374, "learning_rate": 6.916410663035832e-06, "loss": 0.14846420288085938, "step": 3011 }, { "epoch": 0.4197031979377134, "grad_norm": 0.8664102554321289, "learning_rate": 6.9142336510089235e-06, "loss": 0.15195083618164062, "step": 3012 }, { "epoch": 0.41984254162892776, "grad_norm": 0.8452885746955872, "learning_rate": 6.912056213683001e-06, "loss": 0.1400299072265625, "step": 3013 }, { "epoch": 0.41998188532014213, "grad_norm": 0.8141997456550598, "learning_rate": 6.909878351541841e-06, "loss": 0.12184906005859375, "step": 3014 }, { "epoch": 0.4201212290113565, "grad_norm": 0.7228703498840332, "learning_rate": 6.907700065069315e-06, "loss": 0.11428451538085938, "step": 3015 }, { "epoch": 0.4202605727025709, "grad_norm": 1.1157867908477783, "learning_rate": 6.905521354749387e-06, "loss": 0.1389598846435547, "step": 3016 }, { "epoch": 0.42039991639378527, "grad_norm": 1.3023467063903809, "learning_rate": 6.90334222106612e-06, "loss": 0.11229324340820312, "step": 3017 }, { "epoch": 0.42053926008499964, "grad_norm": 0.8240524530410767, "learning_rate": 6.901162664503662e-06, "loss": 0.134002685546875, "step": 3018 }, { "epoch": 0.420678603776214, "grad_norm": 0.9615700840950012, "learning_rate": 6.898982685546267e-06, "loss": 0.1390533447265625, "step": 3019 }, { "epoch": 0.4208179474674284, "grad_norm": 1.3066587448120117, "learning_rate": 6.896802284678273e-06, "loss": 0.1681671142578125, "step": 3020 }, { "epoch": 0.4209572911586428, "grad_norm": 1.1180152893066406, "learning_rate": 6.894621462384116e-06, "loss": 0.1408233642578125, "step": 3021 }, { "epoch": 0.42109663484985715, "grad_norm": 0.9615068435668945, "learning_rate": 6.8924402191483245e-06, "loss": 0.13535308837890625, "step": 3022 }, { "epoch": 0.42123597854107153, "grad_norm": 0.8527628183364868, "learning_rate": 6.890258555455521e-06, "loss": 0.153839111328125, "step": 3023 }, { "epoch": 0.4213753222322859, "grad_norm": 1.2621568441390991, "learning_rate": 6.888076471790423e-06, "loss": 0.18482589721679688, "step": 3024 }, { "epoch": 0.4215146659235003, "grad_norm": 0.9896140694618225, "learning_rate": 6.8858939686378376e-06, "loss": 0.1766357421875, "step": 3025 }, { "epoch": 0.4216540096147147, "grad_norm": 0.9956377744674683, "learning_rate": 6.8837110464826685e-06, "loss": 0.1499156951904297, "step": 3026 }, { "epoch": 0.4217933533059291, "grad_norm": 1.0182384252548218, "learning_rate": 6.881527705809912e-06, "loss": 0.16110992431640625, "step": 3027 }, { "epoch": 0.4219326969971435, "grad_norm": 1.3530652523040771, "learning_rate": 6.879343947104653e-06, "loss": 0.16432952880859375, "step": 3028 }, { "epoch": 0.42207204068835785, "grad_norm": 1.2457096576690674, "learning_rate": 6.8771597708520766e-06, "loss": 0.15085601806640625, "step": 3029 }, { "epoch": 0.42221138437957223, "grad_norm": 1.3088117837905884, "learning_rate": 6.874975177537455e-06, "loss": 0.15793609619140625, "step": 3030 }, { "epoch": 0.4223507280707866, "grad_norm": 1.8729602098464966, "learning_rate": 6.872790167646155e-06, "loss": 0.17541122436523438, "step": 3031 }, { "epoch": 0.422490071762001, "grad_norm": 1.0969092845916748, "learning_rate": 6.870604741663638e-06, "loss": 0.16513442993164062, "step": 3032 }, { "epoch": 0.42262941545321536, "grad_norm": 0.7656052112579346, "learning_rate": 6.868418900075452e-06, "loss": 0.130859375, "step": 3033 }, { "epoch": 0.42276875914442974, "grad_norm": 0.6529524922370911, "learning_rate": 6.866232643367243e-06, "loss": 0.12723159790039062, "step": 3034 }, { "epoch": 0.4229081028356441, "grad_norm": 0.49959689378738403, "learning_rate": 6.864045972024749e-06, "loss": 0.1171875, "step": 3035 }, { "epoch": 0.4230474465268585, "grad_norm": 1.0178245306015015, "learning_rate": 6.861858886533796e-06, "loss": 0.14005279541015625, "step": 3036 }, { "epoch": 0.4231867902180729, "grad_norm": 2.0071699619293213, "learning_rate": 6.859671387380307e-06, "loss": 0.18712615966796875, "step": 3037 }, { "epoch": 0.42332613390928725, "grad_norm": 1.1518471240997314, "learning_rate": 6.85748347505029e-06, "loss": 0.1162872314453125, "step": 3038 }, { "epoch": 0.4234654776005016, "grad_norm": 1.0704354047775269, "learning_rate": 6.855295150029853e-06, "loss": 0.15564346313476562, "step": 3039 }, { "epoch": 0.423604821291716, "grad_norm": 0.7643802762031555, "learning_rate": 6.853106412805192e-06, "loss": 0.12711715698242188, "step": 3040 }, { "epoch": 0.4237441649829304, "grad_norm": 0.9388693571090698, "learning_rate": 6.850917263862591e-06, "loss": 0.16225051879882812, "step": 3041 }, { "epoch": 0.42388350867414476, "grad_norm": 0.6116604804992676, "learning_rate": 6.848727703688432e-06, "loss": 0.13646697998046875, "step": 3042 }, { "epoch": 0.42402285236535914, "grad_norm": 0.8457749485969543, "learning_rate": 6.846537732769185e-06, "loss": 0.14871978759765625, "step": 3043 }, { "epoch": 0.4241621960565735, "grad_norm": 1.6107566356658936, "learning_rate": 6.8443473515914105e-06, "loss": 0.16263580322265625, "step": 3044 }, { "epoch": 0.4243015397477879, "grad_norm": 0.7213291525840759, "learning_rate": 6.842156560641762e-06, "loss": 0.131195068359375, "step": 3045 }, { "epoch": 0.4244408834390023, "grad_norm": 0.9309505820274353, "learning_rate": 6.839965360406983e-06, "loss": 0.12283515930175781, "step": 3046 }, { "epoch": 0.4245802271302167, "grad_norm": 0.7644999027252197, "learning_rate": 6.837773751373908e-06, "loss": 0.11954498291015625, "step": 3047 }, { "epoch": 0.4247195708214311, "grad_norm": 0.9760636687278748, "learning_rate": 6.835581734029462e-06, "loss": 0.15321731567382812, "step": 3048 }, { "epoch": 0.42485891451264546, "grad_norm": 0.9460352659225464, "learning_rate": 6.833389308860662e-06, "loss": 0.1263275146484375, "step": 3049 }, { "epoch": 0.42499825820385984, "grad_norm": 0.7505794167518616, "learning_rate": 6.831196476354615e-06, "loss": 0.12876129150390625, "step": 3050 }, { "epoch": 0.4251376018950742, "grad_norm": 1.3616396188735962, "learning_rate": 6.829003236998517e-06, "loss": 0.1692047119140625, "step": 3051 }, { "epoch": 0.4252769455862886, "grad_norm": 0.7010299563407898, "learning_rate": 6.8268095912796574e-06, "loss": 0.10466384887695312, "step": 3052 }, { "epoch": 0.42541628927750297, "grad_norm": 0.9492394328117371, "learning_rate": 6.824615539685413e-06, "loss": 0.1236114501953125, "step": 3053 }, { "epoch": 0.42555563296871735, "grad_norm": 1.5091900825500488, "learning_rate": 6.822421082703253e-06, "loss": 0.17435073852539062, "step": 3054 }, { "epoch": 0.4256949766599317, "grad_norm": 1.1866440773010254, "learning_rate": 6.820226220820733e-06, "loss": 0.14583206176757812, "step": 3055 }, { "epoch": 0.4258343203511461, "grad_norm": 1.2941770553588867, "learning_rate": 6.818030954525505e-06, "loss": 0.14101791381835938, "step": 3056 }, { "epoch": 0.4259736640423605, "grad_norm": 1.1761044263839722, "learning_rate": 6.815835284305304e-06, "loss": 0.17049407958984375, "step": 3057 }, { "epoch": 0.42611300773357486, "grad_norm": 1.3695580959320068, "learning_rate": 6.8136392106479624e-06, "loss": 0.14075851440429688, "step": 3058 }, { "epoch": 0.42625235142478923, "grad_norm": 0.6771361827850342, "learning_rate": 6.81144273404139e-06, "loss": 0.13267898559570312, "step": 3059 }, { "epoch": 0.4263916951160036, "grad_norm": 0.8146265745162964, "learning_rate": 6.8092458549736e-06, "loss": 0.13885498046875, "step": 3060 }, { "epoch": 0.426531038807218, "grad_norm": 0.8292896747589111, "learning_rate": 6.807048573932687e-06, "loss": 0.12963485717773438, "step": 3061 }, { "epoch": 0.42667038249843237, "grad_norm": 2.592806339263916, "learning_rate": 6.8048508914068355e-06, "loss": 0.1768665313720703, "step": 3062 }, { "epoch": 0.42680972618964674, "grad_norm": 1.0982953310012817, "learning_rate": 6.802652807884322e-06, "loss": 0.15025711059570312, "step": 3063 }, { "epoch": 0.4269490698808611, "grad_norm": 0.991356611251831, "learning_rate": 6.80045432385351e-06, "loss": 0.12577056884765625, "step": 3064 }, { "epoch": 0.4270884135720755, "grad_norm": 0.8067721724510193, "learning_rate": 6.798255439802852e-06, "loss": 0.14319229125976562, "step": 3065 }, { "epoch": 0.42722775726328993, "grad_norm": 1.0551011562347412, "learning_rate": 6.796056156220892e-06, "loss": 0.13753509521484375, "step": 3066 }, { "epoch": 0.4273671009545043, "grad_norm": 1.191865086555481, "learning_rate": 6.793856473596256e-06, "loss": 0.1559581756591797, "step": 3067 }, { "epoch": 0.4275064446457187, "grad_norm": 0.7527815699577332, "learning_rate": 6.791656392417666e-06, "loss": 0.13918304443359375, "step": 3068 }, { "epoch": 0.42764578833693306, "grad_norm": 0.5933191180229187, "learning_rate": 6.789455913173933e-06, "loss": 0.1022796630859375, "step": 3069 }, { "epoch": 0.42778513202814744, "grad_norm": 1.1774543523788452, "learning_rate": 6.787255036353947e-06, "loss": 0.13215065002441406, "step": 3070 }, { "epoch": 0.4279244757193618, "grad_norm": 1.5434783697128296, "learning_rate": 6.785053762446696e-06, "loss": 0.1455078125, "step": 3071 }, { "epoch": 0.4280638194105762, "grad_norm": 0.7872225642204285, "learning_rate": 6.782852091941254e-06, "loss": 0.11952590942382812, "step": 3072 }, { "epoch": 0.4282031631017906, "grad_norm": 1.0173605680465698, "learning_rate": 6.780650025326778e-06, "loss": 0.18270111083984375, "step": 3073 }, { "epoch": 0.42834250679300495, "grad_norm": 1.3623530864715576, "learning_rate": 6.778447563092523e-06, "loss": 0.138824462890625, "step": 3074 }, { "epoch": 0.42848185048421933, "grad_norm": 1.2193210124969482, "learning_rate": 6.776244705727818e-06, "loss": 0.1662750244140625, "step": 3075 }, { "epoch": 0.4286211941754337, "grad_norm": 1.6879441738128662, "learning_rate": 6.774041453722093e-06, "loss": 0.20053482055664062, "step": 3076 }, { "epoch": 0.4287605378666481, "grad_norm": 0.9445731043815613, "learning_rate": 6.771837807564861e-06, "loss": 0.14735984802246094, "step": 3077 }, { "epoch": 0.42889988155786246, "grad_norm": 0.7157371044158936, "learning_rate": 6.769633767745718e-06, "loss": 0.13902854919433594, "step": 3078 }, { "epoch": 0.42903922524907684, "grad_norm": 1.162217378616333, "learning_rate": 6.767429334754354e-06, "loss": 0.15695571899414062, "step": 3079 }, { "epoch": 0.4291785689402912, "grad_norm": 1.837258219718933, "learning_rate": 6.7652245090805426e-06, "loss": 0.185028076171875, "step": 3080 }, { "epoch": 0.4293179126315056, "grad_norm": 0.9939569234848022, "learning_rate": 6.763019291214146e-06, "loss": 0.1357421875, "step": 3081 }, { "epoch": 0.42945725632271997, "grad_norm": 0.8753823637962341, "learning_rate": 6.760813681645114e-06, "loss": 0.1450042724609375, "step": 3082 }, { "epoch": 0.42959660001393435, "grad_norm": 1.2247778177261353, "learning_rate": 6.758607680863481e-06, "loss": 0.16672134399414062, "step": 3083 }, { "epoch": 0.4297359437051487, "grad_norm": 1.8135799169540405, "learning_rate": 6.756401289359371e-06, "loss": 0.15258026123046875, "step": 3084 }, { "epoch": 0.4298752873963631, "grad_norm": 1.1596832275390625, "learning_rate": 6.754194507622995e-06, "loss": 0.137451171875, "step": 3085 }, { "epoch": 0.43001463108757754, "grad_norm": 0.9514084458351135, "learning_rate": 6.7519873361446475e-06, "loss": 0.1483306884765625, "step": 3086 }, { "epoch": 0.4301539747787919, "grad_norm": 1.4532369375228882, "learning_rate": 6.7497797754147134e-06, "loss": 0.15105819702148438, "step": 3087 }, { "epoch": 0.4302933184700063, "grad_norm": 1.0749326944351196, "learning_rate": 6.74757182592366e-06, "loss": 0.14184951782226562, "step": 3088 }, { "epoch": 0.43043266216122067, "grad_norm": 1.3598055839538574, "learning_rate": 6.7453634881620445e-06, "loss": 0.16957855224609375, "step": 3089 }, { "epoch": 0.43057200585243505, "grad_norm": 0.7091758251190186, "learning_rate": 6.743154762620511e-06, "loss": 0.12413787841796875, "step": 3090 }, { "epoch": 0.4307113495436494, "grad_norm": 1.288082480430603, "learning_rate": 6.740945649789784e-06, "loss": 0.15184783935546875, "step": 3091 }, { "epoch": 0.4308506932348638, "grad_norm": 1.0556950569152832, "learning_rate": 6.738736150160681e-06, "loss": 0.1583404541015625, "step": 3092 }, { "epoch": 0.4309900369260782, "grad_norm": 0.9912159442901611, "learning_rate": 6.736526264224101e-06, "loss": 0.1357421875, "step": 3093 }, { "epoch": 0.43112938061729256, "grad_norm": 0.459882915019989, "learning_rate": 6.734315992471032e-06, "loss": 0.11319351196289062, "step": 3094 }, { "epoch": 0.43126872430850693, "grad_norm": 1.5790700912475586, "learning_rate": 6.7321053353925446e-06, "loss": 0.15346527099609375, "step": 3095 }, { "epoch": 0.4314080679997213, "grad_norm": 2.1015853881835938, "learning_rate": 6.729894293479795e-06, "loss": 0.15592575073242188, "step": 3096 }, { "epoch": 0.4315474116909357, "grad_norm": 1.4666138887405396, "learning_rate": 6.727682867224028e-06, "loss": 0.137176513671875, "step": 3097 }, { "epoch": 0.43168675538215007, "grad_norm": 1.202412486076355, "learning_rate": 6.725471057116573e-06, "loss": 0.16091156005859375, "step": 3098 }, { "epoch": 0.43182609907336444, "grad_norm": 2.1460769176483154, "learning_rate": 6.723258863648841e-06, "loss": 0.17094802856445312, "step": 3099 }, { "epoch": 0.4319654427645788, "grad_norm": 1.1892454624176025, "learning_rate": 6.72104628731233e-06, "loss": 0.15761184692382812, "step": 3100 }, { "epoch": 0.4321047864557932, "grad_norm": 1.333451509475708, "learning_rate": 6.718833328598629e-06, "loss": 0.15272903442382812, "step": 3101 }, { "epoch": 0.4322441301470076, "grad_norm": 0.5808300375938416, "learning_rate": 6.716619987999404e-06, "loss": 0.12447738647460938, "step": 3102 }, { "epoch": 0.43238347383822195, "grad_norm": 1.5997529029846191, "learning_rate": 6.714406266006408e-06, "loss": 0.13554763793945312, "step": 3103 }, { "epoch": 0.43252281752943633, "grad_norm": 2.6560065746307373, "learning_rate": 6.712192163111481e-06, "loss": 0.20738601684570312, "step": 3104 }, { "epoch": 0.4326621612206507, "grad_norm": 1.2595351934432983, "learning_rate": 6.709977679806543e-06, "loss": 0.14533042907714844, "step": 3105 }, { "epoch": 0.43280150491186514, "grad_norm": 1.0313074588775635, "learning_rate": 6.707762816583608e-06, "loss": 0.146728515625, "step": 3106 }, { "epoch": 0.4329408486030795, "grad_norm": 1.3402239084243774, "learning_rate": 6.705547573934759e-06, "loss": 0.15302276611328125, "step": 3107 }, { "epoch": 0.4330801922942939, "grad_norm": 1.7266846895217896, "learning_rate": 6.703331952352181e-06, "loss": 0.14663314819335938, "step": 3108 }, { "epoch": 0.4332195359855083, "grad_norm": 1.135130524635315, "learning_rate": 6.70111595232813e-06, "loss": 0.16445541381835938, "step": 3109 }, { "epoch": 0.43335887967672265, "grad_norm": 0.7545472979545593, "learning_rate": 6.6988995743549516e-06, "loss": 0.14209556579589844, "step": 3110 }, { "epoch": 0.43349822336793703, "grad_norm": 0.8868799805641174, "learning_rate": 6.696682818925074e-06, "loss": 0.1467132568359375, "step": 3111 }, { "epoch": 0.4336375670591514, "grad_norm": 1.0091725587844849, "learning_rate": 6.694465686531011e-06, "loss": 0.13587379455566406, "step": 3112 }, { "epoch": 0.4337769107503658, "grad_norm": 2.348688840866089, "learning_rate": 6.692248177665357e-06, "loss": 0.18851470947265625, "step": 3113 }, { "epoch": 0.43391625444158016, "grad_norm": 0.7983863353729248, "learning_rate": 6.690030292820792e-06, "loss": 0.1126556396484375, "step": 3114 }, { "epoch": 0.43405559813279454, "grad_norm": 1.9730207920074463, "learning_rate": 6.687812032490081e-06, "loss": 0.1555023193359375, "step": 3115 }, { "epoch": 0.4341949418240089, "grad_norm": 1.298728346824646, "learning_rate": 6.685593397166069e-06, "loss": 0.14110946655273438, "step": 3116 }, { "epoch": 0.4343342855152233, "grad_norm": 0.5834141969680786, "learning_rate": 6.683374387341688e-06, "loss": 0.11485671997070312, "step": 3117 }, { "epoch": 0.4344736292064377, "grad_norm": 0.7637326121330261, "learning_rate": 6.681155003509949e-06, "loss": 0.13080215454101562, "step": 3118 }, { "epoch": 0.43461297289765205, "grad_norm": 1.3119761943817139, "learning_rate": 6.67893524616395e-06, "loss": 0.17206954956054688, "step": 3119 }, { "epoch": 0.4347523165888664, "grad_norm": 0.9846585988998413, "learning_rate": 6.67671511579687e-06, "loss": 0.1559295654296875, "step": 3120 }, { "epoch": 0.4348916602800808, "grad_norm": 1.014349341392517, "learning_rate": 6.67449461290197e-06, "loss": 0.15436553955078125, "step": 3121 }, { "epoch": 0.4350310039712952, "grad_norm": 0.687850296497345, "learning_rate": 6.6722737379726e-06, "loss": 0.1145172119140625, "step": 3122 }, { "epoch": 0.43517034766250956, "grad_norm": 1.1296601295471191, "learning_rate": 6.670052491502182e-06, "loss": 0.16267013549804688, "step": 3123 }, { "epoch": 0.43530969135372394, "grad_norm": 1.304732084274292, "learning_rate": 6.667830873984228e-06, "loss": 0.17660140991210938, "step": 3124 }, { "epoch": 0.4354490350449383, "grad_norm": 0.8576861023902893, "learning_rate": 6.66560888591233e-06, "loss": 0.14303207397460938, "step": 3125 }, { "epoch": 0.43558837873615275, "grad_norm": 0.7895107865333557, "learning_rate": 6.663386527780166e-06, "loss": 0.16039657592773438, "step": 3126 }, { "epoch": 0.4357277224273671, "grad_norm": 0.867745041847229, "learning_rate": 6.66116380008149e-06, "loss": 0.13723182678222656, "step": 3127 }, { "epoch": 0.4358670661185815, "grad_norm": 2.0470170974731445, "learning_rate": 6.6589407033101435e-06, "loss": 0.195068359375, "step": 3128 }, { "epoch": 0.4360064098097959, "grad_norm": 0.7950080037117004, "learning_rate": 6.656717237960047e-06, "loss": 0.11178207397460938, "step": 3129 }, { "epoch": 0.43614575350101026, "grad_norm": 0.669097363948822, "learning_rate": 6.654493404525204e-06, "loss": 0.12389755249023438, "step": 3130 }, { "epoch": 0.43628509719222464, "grad_norm": 1.1348333358764648, "learning_rate": 6.652269203499699e-06, "loss": 0.12996673583984375, "step": 3131 }, { "epoch": 0.436424440883439, "grad_norm": 1.065250277519226, "learning_rate": 6.650044635377698e-06, "loss": 0.14608001708984375, "step": 3132 }, { "epoch": 0.4365637845746534, "grad_norm": 0.9733928442001343, "learning_rate": 6.64781970065345e-06, "loss": 0.1360607147216797, "step": 3133 }, { "epoch": 0.43670312826586777, "grad_norm": 1.2346855401992798, "learning_rate": 6.645594399821286e-06, "loss": 0.11819648742675781, "step": 3134 }, { "epoch": 0.43684247195708215, "grad_norm": 0.5411527752876282, "learning_rate": 6.6433687333756165e-06, "loss": 0.11474609375, "step": 3135 }, { "epoch": 0.4369818156482965, "grad_norm": 0.7533265948295593, "learning_rate": 6.641142701810932e-06, "loss": 0.11460685729980469, "step": 3136 }, { "epoch": 0.4371211593395109, "grad_norm": 0.7992932796478271, "learning_rate": 6.638916305621807e-06, "loss": 0.14330291748046875, "step": 3137 }, { "epoch": 0.4372605030307253, "grad_norm": 1.3328633308410645, "learning_rate": 6.636689545302898e-06, "loss": 0.15864944458007812, "step": 3138 }, { "epoch": 0.43739984672193966, "grad_norm": 1.1004315614700317, "learning_rate": 6.634462421348935e-06, "loss": 0.155548095703125, "step": 3139 }, { "epoch": 0.43753919041315403, "grad_norm": 1.6167268753051758, "learning_rate": 6.63223493425474e-06, "loss": 0.18482208251953125, "step": 3140 }, { "epoch": 0.4376785341043684, "grad_norm": 0.7523947358131409, "learning_rate": 6.630007084515205e-06, "loss": 0.12921142578125, "step": 3141 }, { "epoch": 0.4378178777955828, "grad_norm": 0.7284949421882629, "learning_rate": 6.627778872625311e-06, "loss": 0.11743545532226562, "step": 3142 }, { "epoch": 0.43795722148679717, "grad_norm": 1.0311983823776245, "learning_rate": 6.625550299080115e-06, "loss": 0.1516876220703125, "step": 3143 }, { "epoch": 0.43809656517801154, "grad_norm": 0.3832229971885681, "learning_rate": 6.6233213643747525e-06, "loss": 0.08745002746582031, "step": 3144 }, { "epoch": 0.4382359088692259, "grad_norm": 0.6782422065734863, "learning_rate": 6.621092069004445e-06, "loss": 0.14290237426757812, "step": 3145 }, { "epoch": 0.4383752525604403, "grad_norm": 0.964012861251831, "learning_rate": 6.618862413464491e-06, "loss": 0.12493896484375, "step": 3146 }, { "epoch": 0.43851459625165473, "grad_norm": 0.8302952647209167, "learning_rate": 6.616632398250266e-06, "loss": 0.14229583740234375, "step": 3147 }, { "epoch": 0.4386539399428691, "grad_norm": 1.0448535680770874, "learning_rate": 6.614402023857231e-06, "loss": 0.11395645141601562, "step": 3148 }, { "epoch": 0.4387932836340835, "grad_norm": 0.8942304849624634, "learning_rate": 6.612171290780925e-06, "loss": 0.11265945434570312, "step": 3149 }, { "epoch": 0.43893262732529786, "grad_norm": 1.012300729751587, "learning_rate": 6.6099401995169635e-06, "loss": 0.14983749389648438, "step": 3150 }, { "epoch": 0.43907197101651224, "grad_norm": 1.0171465873718262, "learning_rate": 6.607708750561046e-06, "loss": 0.14905929565429688, "step": 3151 }, { "epoch": 0.4392113147077266, "grad_norm": 0.9034327864646912, "learning_rate": 6.605476944408948e-06, "loss": 0.13612747192382812, "step": 3152 }, { "epoch": 0.439350658398941, "grad_norm": 1.5602127313613892, "learning_rate": 6.603244781556527e-06, "loss": 0.17706298828125, "step": 3153 }, { "epoch": 0.4394900020901554, "grad_norm": 1.22225821018219, "learning_rate": 6.601012262499718e-06, "loss": 0.15695953369140625, "step": 3154 }, { "epoch": 0.43962934578136975, "grad_norm": 1.0423567295074463, "learning_rate": 6.598779387734535e-06, "loss": 0.17657470703125, "step": 3155 }, { "epoch": 0.43976868947258413, "grad_norm": 1.272886037826538, "learning_rate": 6.596546157757075e-06, "loss": 0.147674560546875, "step": 3156 }, { "epoch": 0.4399080331637985, "grad_norm": 1.2521514892578125, "learning_rate": 6.594312573063506e-06, "loss": 0.14486312866210938, "step": 3157 }, { "epoch": 0.4400473768550129, "grad_norm": 0.9085395336151123, "learning_rate": 6.592078634150084e-06, "loss": 0.15037155151367188, "step": 3158 }, { "epoch": 0.44018672054622726, "grad_norm": 0.9200889468193054, "learning_rate": 6.589844341513137e-06, "loss": 0.12357330322265625, "step": 3159 }, { "epoch": 0.44032606423744164, "grad_norm": 1.022845983505249, "learning_rate": 6.587609695649073e-06, "loss": 0.13176727294921875, "step": 3160 }, { "epoch": 0.440465407928656, "grad_norm": 1.014675259590149, "learning_rate": 6.585374697054382e-06, "loss": 0.1721649169921875, "step": 3161 }, { "epoch": 0.4406047516198704, "grad_norm": 1.2112452983856201, "learning_rate": 6.583139346225627e-06, "loss": 0.13280105590820312, "step": 3162 }, { "epoch": 0.44074409531108477, "grad_norm": 0.9798945188522339, "learning_rate": 6.580903643659453e-06, "loss": 0.128692626953125, "step": 3163 }, { "epoch": 0.44088343900229915, "grad_norm": 1.8982075452804565, "learning_rate": 6.578667589852583e-06, "loss": 0.14063453674316406, "step": 3164 }, { "epoch": 0.4410227826935135, "grad_norm": 1.422831416130066, "learning_rate": 6.576431185301815e-06, "loss": 0.157135009765625, "step": 3165 }, { "epoch": 0.4411621263847279, "grad_norm": 0.9912333488464355, "learning_rate": 6.574194430504027e-06, "loss": 0.13060379028320312, "step": 3166 }, { "epoch": 0.44130147007594234, "grad_norm": 1.638776183128357, "learning_rate": 6.571957325956178e-06, "loss": 0.19222259521484375, "step": 3167 }, { "epoch": 0.4414408137671567, "grad_norm": 0.8477353453636169, "learning_rate": 6.569719872155299e-06, "loss": 0.1532440185546875, "step": 3168 }, { "epoch": 0.4415801574583711, "grad_norm": 1.055118203163147, "learning_rate": 6.567482069598503e-06, "loss": 0.14061737060546875, "step": 3169 }, { "epoch": 0.44171950114958547, "grad_norm": 1.9950981140136719, "learning_rate": 6.565243918782975e-06, "loss": 0.16368484497070312, "step": 3170 }, { "epoch": 0.44185884484079985, "grad_norm": 0.6552330255508423, "learning_rate": 6.563005420205984e-06, "loss": 0.11624526977539062, "step": 3171 }, { "epoch": 0.4419981885320142, "grad_norm": 1.4124119281768799, "learning_rate": 6.560766574364874e-06, "loss": 0.13782882690429688, "step": 3172 }, { "epoch": 0.4421375322232286, "grad_norm": 1.3159890174865723, "learning_rate": 6.558527381757063e-06, "loss": 0.18837356567382812, "step": 3173 }, { "epoch": 0.442276875914443, "grad_norm": 1.4179391860961914, "learning_rate": 6.55628784288005e-06, "loss": 0.1568012237548828, "step": 3174 }, { "epoch": 0.44241621960565736, "grad_norm": 1.0396450757980347, "learning_rate": 6.5540479582314085e-06, "loss": 0.14907073974609375, "step": 3175 }, { "epoch": 0.44255556329687173, "grad_norm": 0.6967904567718506, "learning_rate": 6.55180772830879e-06, "loss": 0.115814208984375, "step": 3176 }, { "epoch": 0.4426949069880861, "grad_norm": 0.8631801605224609, "learning_rate": 6.5495671536099235e-06, "loss": 0.141204833984375, "step": 3177 }, { "epoch": 0.4428342506793005, "grad_norm": 1.1705387830734253, "learning_rate": 6.5473262346326125e-06, "loss": 0.15045547485351562, "step": 3178 }, { "epoch": 0.44297359437051487, "grad_norm": 1.2832427024841309, "learning_rate": 6.545084971874738e-06, "loss": 0.16048431396484375, "step": 3179 }, { "epoch": 0.44311293806172924, "grad_norm": 1.3053680658340454, "learning_rate": 6.542843365834257e-06, "loss": 0.17369842529296875, "step": 3180 }, { "epoch": 0.4432522817529436, "grad_norm": 0.8912508487701416, "learning_rate": 6.540601417009205e-06, "loss": 0.11771011352539062, "step": 3181 }, { "epoch": 0.443391625444158, "grad_norm": 0.9153555631637573, "learning_rate": 6.538359125897691e-06, "loss": 0.15226364135742188, "step": 3182 }, { "epoch": 0.4435309691353724, "grad_norm": 0.8726659417152405, "learning_rate": 6.536116492997899e-06, "loss": 0.14572906494140625, "step": 3183 }, { "epoch": 0.44367031282658675, "grad_norm": 1.3237972259521484, "learning_rate": 6.5338735188080916e-06, "loss": 0.170806884765625, "step": 3184 }, { "epoch": 0.44380965651780113, "grad_norm": 0.9292314648628235, "learning_rate": 6.53163020382661e-06, "loss": 0.1656494140625, "step": 3185 }, { "epoch": 0.4439490002090155, "grad_norm": 0.821452796459198, "learning_rate": 6.529386548551864e-06, "loss": 0.13496017456054688, "step": 3186 }, { "epoch": 0.44408834390022994, "grad_norm": 0.8365475535392761, "learning_rate": 6.5271425534823415e-06, "loss": 0.115325927734375, "step": 3187 }, { "epoch": 0.4442276875914443, "grad_norm": 1.1346570253372192, "learning_rate": 6.524898219116612e-06, "loss": 0.12099075317382812, "step": 3188 }, { "epoch": 0.4443670312826587, "grad_norm": 1.7150070667266846, "learning_rate": 6.522653545953309e-06, "loss": 0.1477508544921875, "step": 3189 }, { "epoch": 0.4445063749738731, "grad_norm": 1.0923326015472412, "learning_rate": 6.520408534491154e-06, "loss": 0.11269569396972656, "step": 3190 }, { "epoch": 0.44464571866508745, "grad_norm": 1.3322739601135254, "learning_rate": 6.518163185228932e-06, "loss": 0.14599227905273438, "step": 3191 }, { "epoch": 0.44478506235630183, "grad_norm": 1.797436237335205, "learning_rate": 6.515917498665511e-06, "loss": 0.20792198181152344, "step": 3192 }, { "epoch": 0.4449244060475162, "grad_norm": 0.7494935393333435, "learning_rate": 6.51367147529983e-06, "loss": 0.11561775207519531, "step": 3193 }, { "epoch": 0.4450637497387306, "grad_norm": 1.0684932470321655, "learning_rate": 6.511425115630906e-06, "loss": 0.132354736328125, "step": 3194 }, { "epoch": 0.44520309342994496, "grad_norm": 0.9758116602897644, "learning_rate": 6.509178420157828e-06, "loss": 0.12443733215332031, "step": 3195 }, { "epoch": 0.44534243712115934, "grad_norm": 0.8896856904029846, "learning_rate": 6.506931389379759e-06, "loss": 0.1466522216796875, "step": 3196 }, { "epoch": 0.4454817808123737, "grad_norm": 1.1964318752288818, "learning_rate": 6.50468402379594e-06, "loss": 0.15860748291015625, "step": 3197 }, { "epoch": 0.4456211245035881, "grad_norm": 1.19602370262146, "learning_rate": 6.502436323905683e-06, "loss": 0.17209625244140625, "step": 3198 }, { "epoch": 0.4457604681948025, "grad_norm": 0.8094944953918457, "learning_rate": 6.500188290208377e-06, "loss": 0.12842941284179688, "step": 3199 }, { "epoch": 0.44589981188601685, "grad_norm": 0.7594572901725769, "learning_rate": 6.49793992320348e-06, "loss": 0.1527385711669922, "step": 3200 }, { "epoch": 0.44603915557723123, "grad_norm": 0.9905003905296326, "learning_rate": 6.495691223390534e-06, "loss": 0.16256332397460938, "step": 3201 }, { "epoch": 0.4461784992684456, "grad_norm": 0.9670624136924744, "learning_rate": 6.4934421912691445e-06, "loss": 0.13169097900390625, "step": 3202 }, { "epoch": 0.44631784295966, "grad_norm": 1.3306949138641357, "learning_rate": 6.4911928273389946e-06, "loss": 0.17804336547851562, "step": 3203 }, { "epoch": 0.44645718665087436, "grad_norm": 0.7348541021347046, "learning_rate": 6.488943132099845e-06, "loss": 0.11239242553710938, "step": 3204 }, { "epoch": 0.44659653034208874, "grad_norm": 0.9178628921508789, "learning_rate": 6.486693106051523e-06, "loss": 0.1730804443359375, "step": 3205 }, { "epoch": 0.4467358740333031, "grad_norm": 0.531450092792511, "learning_rate": 6.484442749693935e-06, "loss": 0.13098907470703125, "step": 3206 }, { "epoch": 0.44687521772451755, "grad_norm": 1.300447702407837, "learning_rate": 6.482192063527058e-06, "loss": 0.153656005859375, "step": 3207 }, { "epoch": 0.4470145614157319, "grad_norm": 0.8280913233757019, "learning_rate": 6.479941048050944e-06, "loss": 0.13096237182617188, "step": 3208 }, { "epoch": 0.4471539051069463, "grad_norm": 1.2107354402542114, "learning_rate": 6.477689703765717e-06, "loss": 0.15505218505859375, "step": 3209 }, { "epoch": 0.4472932487981607, "grad_norm": 0.5874936580657959, "learning_rate": 6.475438031171574e-06, "loss": 0.11655807495117188, "step": 3210 }, { "epoch": 0.44743259248937506, "grad_norm": 0.9695199131965637, "learning_rate": 6.4731860307687845e-06, "loss": 0.14448928833007812, "step": 3211 }, { "epoch": 0.44757193618058944, "grad_norm": 1.1304839849472046, "learning_rate": 6.470933703057693e-06, "loss": 0.14861106872558594, "step": 3212 }, { "epoch": 0.4477112798718038, "grad_norm": 0.9515057802200317, "learning_rate": 6.468681048538715e-06, "loss": 0.136260986328125, "step": 3213 }, { "epoch": 0.4478506235630182, "grad_norm": 2.307559013366699, "learning_rate": 6.4664280677123385e-06, "loss": 0.21489334106445312, "step": 3214 }, { "epoch": 0.44798996725423257, "grad_norm": 0.6398398280143738, "learning_rate": 6.464174761079124e-06, "loss": 0.1285247802734375, "step": 3215 }, { "epoch": 0.44812931094544695, "grad_norm": 1.6603504419326782, "learning_rate": 6.461921129139704e-06, "loss": 0.179656982421875, "step": 3216 }, { "epoch": 0.4482686546366613, "grad_norm": 1.1062822341918945, "learning_rate": 6.459667172394788e-06, "loss": 0.15100479125976562, "step": 3217 }, { "epoch": 0.4484079983278757, "grad_norm": 1.7795580625534058, "learning_rate": 6.4574128913451495e-06, "loss": 0.20954132080078125, "step": 3218 }, { "epoch": 0.4485473420190901, "grad_norm": 2.00362229347229, "learning_rate": 6.455158286491641e-06, "loss": 0.21691131591796875, "step": 3219 }, { "epoch": 0.44868668571030446, "grad_norm": 0.749996542930603, "learning_rate": 6.452903358335182e-06, "loss": 0.15225982666015625, "step": 3220 }, { "epoch": 0.44882602940151883, "grad_norm": 1.3631922006607056, "learning_rate": 6.450648107376767e-06, "loss": 0.22371292114257812, "step": 3221 }, { "epoch": 0.4489653730927332, "grad_norm": 0.6683086156845093, "learning_rate": 6.4483925341174625e-06, "loss": 0.12921905517578125, "step": 3222 }, { "epoch": 0.4491047167839476, "grad_norm": 1.1453883647918701, "learning_rate": 6.4461366390584025e-06, "loss": 0.166961669921875, "step": 3223 }, { "epoch": 0.44924406047516197, "grad_norm": 1.1705139875411987, "learning_rate": 6.443880422700799e-06, "loss": 0.15875244140625, "step": 3224 }, { "epoch": 0.44938340416637634, "grad_norm": 0.7938489317893982, "learning_rate": 6.441623885545929e-06, "loss": 0.10894775390625, "step": 3225 }, { "epoch": 0.4495227478575907, "grad_norm": 1.1368814706802368, "learning_rate": 6.439367028095145e-06, "loss": 0.14845657348632812, "step": 3226 }, { "epoch": 0.44966209154880515, "grad_norm": 0.8717121481895447, "learning_rate": 6.437109850849868e-06, "loss": 0.14548492431640625, "step": 3227 }, { "epoch": 0.44980143524001953, "grad_norm": 1.2582138776779175, "learning_rate": 6.434852354311592e-06, "loss": 0.15570068359375, "step": 3228 }, { "epoch": 0.4499407789312339, "grad_norm": 1.2927857637405396, "learning_rate": 6.432594538981881e-06, "loss": 0.16838836669921875, "step": 3229 }, { "epoch": 0.4500801226224483, "grad_norm": 1.1716554164886475, "learning_rate": 6.430336405362371e-06, "loss": 0.19152069091796875, "step": 3230 }, { "epoch": 0.45021946631366266, "grad_norm": 0.8613409399986267, "learning_rate": 6.428077953954766e-06, "loss": 0.13757705688476562, "step": 3231 }, { "epoch": 0.45035881000487704, "grad_norm": 1.0273512601852417, "learning_rate": 6.425819185260842e-06, "loss": 0.14099884033203125, "step": 3232 }, { "epoch": 0.4504981536960914, "grad_norm": 1.6126601696014404, "learning_rate": 6.42356009978245e-06, "loss": 0.19265174865722656, "step": 3233 }, { "epoch": 0.4506374973873058, "grad_norm": 1.1386725902557373, "learning_rate": 6.421300698021502e-06, "loss": 0.14153289794921875, "step": 3234 }, { "epoch": 0.4507768410785202, "grad_norm": 0.7934890985488892, "learning_rate": 6.419040980479989e-06, "loss": 0.12587738037109375, "step": 3235 }, { "epoch": 0.45091618476973455, "grad_norm": 1.1510006189346313, "learning_rate": 6.416780947659967e-06, "loss": 0.15522003173828125, "step": 3236 }, { "epoch": 0.45105552846094893, "grad_norm": 2.184030771255493, "learning_rate": 6.4145206000635626e-06, "loss": 0.20864486694335938, "step": 3237 }, { "epoch": 0.4511948721521633, "grad_norm": 0.7999956607818604, "learning_rate": 6.412259938192978e-06, "loss": 0.10290145874023438, "step": 3238 }, { "epoch": 0.4513342158433777, "grad_norm": 1.0096956491470337, "learning_rate": 6.4099989625504756e-06, "loss": 0.16553115844726562, "step": 3239 }, { "epoch": 0.45147355953459206, "grad_norm": 0.8760218620300293, "learning_rate": 6.4077376736383954e-06, "loss": 0.12285041809082031, "step": 3240 }, { "epoch": 0.45161290322580644, "grad_norm": 1.1076440811157227, "learning_rate": 6.405476071959142e-06, "loss": 0.18323707580566406, "step": 3241 }, { "epoch": 0.4517522469170208, "grad_norm": 1.0215811729431152, "learning_rate": 6.403214158015194e-06, "loss": 0.12591552734375, "step": 3242 }, { "epoch": 0.4518915906082352, "grad_norm": 1.3623703718185425, "learning_rate": 6.400951932309097e-06, "loss": 0.1663837432861328, "step": 3243 }, { "epoch": 0.45203093429944957, "grad_norm": 0.794085681438446, "learning_rate": 6.3986893953434625e-06, "loss": 0.13108444213867188, "step": 3244 }, { "epoch": 0.45217027799066395, "grad_norm": 1.0290195941925049, "learning_rate": 6.396426547620979e-06, "loss": 0.15102005004882812, "step": 3245 }, { "epoch": 0.4523096216818783, "grad_norm": 0.9692726135253906, "learning_rate": 6.394163389644397e-06, "loss": 0.1450328826904297, "step": 3246 }, { "epoch": 0.45244896537309276, "grad_norm": 1.1954671144485474, "learning_rate": 6.391899921916538e-06, "loss": 0.15040206909179688, "step": 3247 }, { "epoch": 0.45258830906430714, "grad_norm": 1.3398284912109375, "learning_rate": 6.389636144940294e-06, "loss": 0.139617919921875, "step": 3248 }, { "epoch": 0.4527276527555215, "grad_norm": 1.251666784286499, "learning_rate": 6.387372059218626e-06, "loss": 0.13370132446289062, "step": 3249 }, { "epoch": 0.4528669964467359, "grad_norm": 1.6014766693115234, "learning_rate": 6.38510766525456e-06, "loss": 0.16985702514648438, "step": 3250 }, { "epoch": 0.45300634013795027, "grad_norm": 1.1372417211532593, "learning_rate": 6.382842963551193e-06, "loss": 0.13141632080078125, "step": 3251 }, { "epoch": 0.45314568382916465, "grad_norm": 1.1226404905319214, "learning_rate": 6.380577954611691e-06, "loss": 0.15196609497070312, "step": 3252 }, { "epoch": 0.453285027520379, "grad_norm": 1.1696490049362183, "learning_rate": 6.378312638939286e-06, "loss": 0.14347457885742188, "step": 3253 }, { "epoch": 0.4534243712115934, "grad_norm": 1.7079553604125977, "learning_rate": 6.3760470170372815e-06, "loss": 0.2106475830078125, "step": 3254 }, { "epoch": 0.4535637149028078, "grad_norm": 0.6561135053634644, "learning_rate": 6.373781089409043e-06, "loss": 0.13100433349609375, "step": 3255 }, { "epoch": 0.45370305859402216, "grad_norm": 1.192775011062622, "learning_rate": 6.371514856558013e-06, "loss": 0.14196014404296875, "step": 3256 }, { "epoch": 0.45384240228523653, "grad_norm": 1.1668416261672974, "learning_rate": 6.369248318987692e-06, "loss": 0.19228744506835938, "step": 3257 }, { "epoch": 0.4539817459764509, "grad_norm": 1.1119542121887207, "learning_rate": 6.3669814772016555e-06, "loss": 0.15648651123046875, "step": 3258 }, { "epoch": 0.4541210896676653, "grad_norm": 0.6294342279434204, "learning_rate": 6.3647143317035445e-06, "loss": 0.12760543823242188, "step": 3259 }, { "epoch": 0.45426043335887967, "grad_norm": 0.8971338272094727, "learning_rate": 6.362446882997064e-06, "loss": 0.14989089965820312, "step": 3260 }, { "epoch": 0.45439977705009404, "grad_norm": 0.7881752252578735, "learning_rate": 6.360179131585993e-06, "loss": 0.12171363830566406, "step": 3261 }, { "epoch": 0.4545391207413084, "grad_norm": 1.3258508443832397, "learning_rate": 6.357911077974173e-06, "loss": 0.1588153839111328, "step": 3262 }, { "epoch": 0.4546784644325228, "grad_norm": 1.0301434993743896, "learning_rate": 6.355642722665512e-06, "loss": 0.15592575073242188, "step": 3263 }, { "epoch": 0.4548178081237372, "grad_norm": 1.2179821729660034, "learning_rate": 6.353374066163988e-06, "loss": 0.16127777099609375, "step": 3264 }, { "epoch": 0.45495715181495155, "grad_norm": 1.7447218894958496, "learning_rate": 6.351105108973644e-06, "loss": 0.20836257934570312, "step": 3265 }, { "epoch": 0.45509649550616593, "grad_norm": 0.7014114260673523, "learning_rate": 6.34883585159859e-06, "loss": 0.1375598907470703, "step": 3266 }, { "epoch": 0.45523583919738037, "grad_norm": 0.7583632469177246, "learning_rate": 6.346566294543008e-06, "loss": 0.136962890625, "step": 3267 }, { "epoch": 0.45537518288859474, "grad_norm": 2.1663241386413574, "learning_rate": 6.344296438311134e-06, "loss": 0.22341537475585938, "step": 3268 }, { "epoch": 0.4555145265798091, "grad_norm": 0.9966523051261902, "learning_rate": 6.342026283407286e-06, "loss": 0.16503143310546875, "step": 3269 }, { "epoch": 0.4556538702710235, "grad_norm": 0.6998243927955627, "learning_rate": 6.339755830335834e-06, "loss": 0.12955665588378906, "step": 3270 }, { "epoch": 0.4557932139622379, "grad_norm": 1.0123012065887451, "learning_rate": 6.337485079601224e-06, "loss": 0.107757568359375, "step": 3271 }, { "epoch": 0.45593255765345225, "grad_norm": 1.1155449151992798, "learning_rate": 6.335214031707966e-06, "loss": 0.1614227294921875, "step": 3272 }, { "epoch": 0.45607190134466663, "grad_norm": 0.7318257689476013, "learning_rate": 6.332942687160632e-06, "loss": 0.15500640869140625, "step": 3273 }, { "epoch": 0.456211245035881, "grad_norm": 1.4675402641296387, "learning_rate": 6.3306710464638645e-06, "loss": 0.17342567443847656, "step": 3274 }, { "epoch": 0.4563505887270954, "grad_norm": 1.0776435136795044, "learning_rate": 6.328399110122371e-06, "loss": 0.1603527069091797, "step": 3275 }, { "epoch": 0.45648993241830976, "grad_norm": 1.4404709339141846, "learning_rate": 6.3261268786409225e-06, "loss": 0.14487457275390625, "step": 3276 }, { "epoch": 0.45662927610952414, "grad_norm": 0.7081888318061829, "learning_rate": 6.323854352524359e-06, "loss": 0.1154022216796875, "step": 3277 }, { "epoch": 0.4567686198007385, "grad_norm": 1.3531458377838135, "learning_rate": 6.321581532277581e-06, "loss": 0.17563247680664062, "step": 3278 }, { "epoch": 0.4569079634919529, "grad_norm": 2.0495810508728027, "learning_rate": 6.319308418405559e-06, "loss": 0.15198898315429688, "step": 3279 }, { "epoch": 0.4570473071831673, "grad_norm": 0.9015189409255981, "learning_rate": 6.317035011413327e-06, "loss": 0.15401458740234375, "step": 3280 }, { "epoch": 0.45718665087438165, "grad_norm": 0.7902728319168091, "learning_rate": 6.314761311805983e-06, "loss": 0.1309356689453125, "step": 3281 }, { "epoch": 0.45732599456559603, "grad_norm": 1.2287530899047852, "learning_rate": 6.312487320088693e-06, "loss": 0.14272689819335938, "step": 3282 }, { "epoch": 0.4574653382568104, "grad_norm": 1.0868027210235596, "learning_rate": 6.3102130367666855e-06, "loss": 0.148712158203125, "step": 3283 }, { "epoch": 0.4576046819480248, "grad_norm": 1.1185683012008667, "learning_rate": 6.307938462345253e-06, "loss": 0.1956787109375, "step": 3284 }, { "epoch": 0.45774402563923916, "grad_norm": 1.38821542263031, "learning_rate": 6.305663597329756e-06, "loss": 0.17051315307617188, "step": 3285 }, { "epoch": 0.45788336933045354, "grad_norm": 0.9868544340133667, "learning_rate": 6.303388442225616e-06, "loss": 0.15242385864257812, "step": 3286 }, { "epoch": 0.45802271302166797, "grad_norm": 0.8947083950042725, "learning_rate": 6.30111299753832e-06, "loss": 0.12879180908203125, "step": 3287 }, { "epoch": 0.45816205671288235, "grad_norm": 0.6772927641868591, "learning_rate": 6.298837263773423e-06, "loss": 0.11244583129882812, "step": 3288 }, { "epoch": 0.4583014004040967, "grad_norm": 0.652625322341919, "learning_rate": 6.2965612414365365e-06, "loss": 0.11291885375976562, "step": 3289 }, { "epoch": 0.4584407440953111, "grad_norm": 0.565509021282196, "learning_rate": 6.294284931033344e-06, "loss": 0.09607315063476562, "step": 3290 }, { "epoch": 0.4585800877865255, "grad_norm": 1.8608267307281494, "learning_rate": 6.292008333069589e-06, "loss": 0.15381240844726562, "step": 3291 }, { "epoch": 0.45871943147773986, "grad_norm": 1.2874103784561157, "learning_rate": 6.289731448051079e-06, "loss": 0.13161087036132812, "step": 3292 }, { "epoch": 0.45885877516895424, "grad_norm": 1.1680163145065308, "learning_rate": 6.287454276483687e-06, "loss": 0.14437103271484375, "step": 3293 }, { "epoch": 0.4589981188601686, "grad_norm": 0.9085342288017273, "learning_rate": 6.2851768188733485e-06, "loss": 0.16603469848632812, "step": 3294 }, { "epoch": 0.459137462551383, "grad_norm": 1.1350592374801636, "learning_rate": 6.282899075726061e-06, "loss": 0.14469146728515625, "step": 3295 }, { "epoch": 0.45927680624259737, "grad_norm": 0.8496928811073303, "learning_rate": 6.280621047547888e-06, "loss": 0.12773513793945312, "step": 3296 }, { "epoch": 0.45941614993381175, "grad_norm": 0.9322134256362915, "learning_rate": 6.278342734844955e-06, "loss": 0.1477813720703125, "step": 3297 }, { "epoch": 0.4595554936250261, "grad_norm": 0.9105129837989807, "learning_rate": 6.276064138123453e-06, "loss": 0.1485137939453125, "step": 3298 }, { "epoch": 0.4596948373162405, "grad_norm": 1.0818418264389038, "learning_rate": 6.27378525788963e-06, "loss": 0.1408233642578125, "step": 3299 }, { "epoch": 0.4598341810074549, "grad_norm": 2.329425811767578, "learning_rate": 6.271506094649804e-06, "loss": 0.18142318725585938, "step": 3300 }, { "epoch": 0.45997352469866926, "grad_norm": 0.9967410564422607, "learning_rate": 6.269226648910356e-06, "loss": 0.12738609313964844, "step": 3301 }, { "epoch": 0.46011286838988363, "grad_norm": 1.5259907245635986, "learning_rate": 6.266946921177721e-06, "loss": 0.16353225708007812, "step": 3302 }, { "epoch": 0.460252212081098, "grad_norm": 0.912823498249054, "learning_rate": 6.264666911958404e-06, "loss": 0.15826034545898438, "step": 3303 }, { "epoch": 0.4603915557723124, "grad_norm": 0.949093222618103, "learning_rate": 6.262386621758975e-06, "loss": 0.12562942504882812, "step": 3304 }, { "epoch": 0.46053089946352677, "grad_norm": 1.2982385158538818, "learning_rate": 6.2601060510860565e-06, "loss": 0.14110565185546875, "step": 3305 }, { "epoch": 0.46067024315474114, "grad_norm": 1.0223582983016968, "learning_rate": 6.2578252004463436e-06, "loss": 0.16009140014648438, "step": 3306 }, { "epoch": 0.4608095868459556, "grad_norm": 0.5941421985626221, "learning_rate": 6.255544070346588e-06, "loss": 0.12756729125976562, "step": 3307 }, { "epoch": 0.46094893053716995, "grad_norm": 0.7899266481399536, "learning_rate": 6.2532626612936035e-06, "loss": 0.13232421875, "step": 3308 }, { "epoch": 0.46108827422838433, "grad_norm": 0.8938568234443665, "learning_rate": 6.250980973794268e-06, "loss": 0.15422439575195312, "step": 3309 }, { "epoch": 0.4612276179195987, "grad_norm": 1.7666096687316895, "learning_rate": 6.248699008355522e-06, "loss": 0.17307281494140625, "step": 3310 }, { "epoch": 0.4613669616108131, "grad_norm": 0.9135603308677673, "learning_rate": 6.2464167654843645e-06, "loss": 0.1266613006591797, "step": 3311 }, { "epoch": 0.46150630530202746, "grad_norm": 1.1743892431259155, "learning_rate": 6.2441342456878565e-06, "loss": 0.1847076416015625, "step": 3312 }, { "epoch": 0.46164564899324184, "grad_norm": 0.8627392053604126, "learning_rate": 6.2418514494731245e-06, "loss": 0.13248062133789062, "step": 3313 }, { "epoch": 0.4617849926844562, "grad_norm": 1.8593087196350098, "learning_rate": 6.239568377347352e-06, "loss": 0.16892051696777344, "step": 3314 }, { "epoch": 0.4619243363756706, "grad_norm": 0.781502902507782, "learning_rate": 6.237285029817786e-06, "loss": 0.128997802734375, "step": 3315 }, { "epoch": 0.462063680066885, "grad_norm": 0.864271879196167, "learning_rate": 6.235001407391732e-06, "loss": 0.1571197509765625, "step": 3316 }, { "epoch": 0.46220302375809935, "grad_norm": 0.8275068998336792, "learning_rate": 6.232717510576563e-06, "loss": 0.13242149353027344, "step": 3317 }, { "epoch": 0.46234236744931373, "grad_norm": 0.9190830588340759, "learning_rate": 6.230433339879706e-06, "loss": 0.16747665405273438, "step": 3318 }, { "epoch": 0.4624817111405281, "grad_norm": 1.2431176900863647, "learning_rate": 6.228148895808652e-06, "loss": 0.12462997436523438, "step": 3319 }, { "epoch": 0.4626210548317425, "grad_norm": 1.1916338205337524, "learning_rate": 6.225864178870954e-06, "loss": 0.1495494842529297, "step": 3320 }, { "epoch": 0.46276039852295686, "grad_norm": 0.576150119304657, "learning_rate": 6.22357918957422e-06, "loss": 0.10115432739257812, "step": 3321 }, { "epoch": 0.46289974221417124, "grad_norm": 0.758671760559082, "learning_rate": 6.221293928426128e-06, "loss": 0.1312408447265625, "step": 3322 }, { "epoch": 0.4630390859053856, "grad_norm": 1.1334917545318604, "learning_rate": 6.219008395934405e-06, "loss": 0.18564224243164062, "step": 3323 }, { "epoch": 0.4631784295966, "grad_norm": 1.5703730583190918, "learning_rate": 6.216722592606847e-06, "loss": 0.16065216064453125, "step": 3324 }, { "epoch": 0.46331777328781437, "grad_norm": 0.6190488934516907, "learning_rate": 6.214436518951308e-06, "loss": 0.09521293640136719, "step": 3325 }, { "epoch": 0.46345711697902875, "grad_norm": 2.3284385204315186, "learning_rate": 6.212150175475701e-06, "loss": 0.1883697509765625, "step": 3326 }, { "epoch": 0.4635964606702432, "grad_norm": 1.1960344314575195, "learning_rate": 6.209863562687998e-06, "loss": 0.14085006713867188, "step": 3327 }, { "epoch": 0.46373580436145756, "grad_norm": 0.8994547128677368, "learning_rate": 6.207576681096233e-06, "loss": 0.13171768188476562, "step": 3328 }, { "epoch": 0.46387514805267194, "grad_norm": 0.8673781156539917, "learning_rate": 6.2052895312085e-06, "loss": 0.13739776611328125, "step": 3329 }, { "epoch": 0.4640144917438863, "grad_norm": 0.9278318285942078, "learning_rate": 6.203002113532949e-06, "loss": 0.12847518920898438, "step": 3330 }, { "epoch": 0.4641538354351007, "grad_norm": 1.2425020933151245, "learning_rate": 6.200714428577794e-06, "loss": 0.13425827026367188, "step": 3331 }, { "epoch": 0.46429317912631507, "grad_norm": 1.366126298904419, "learning_rate": 6.198426476851305e-06, "loss": 0.1786823272705078, "step": 3332 }, { "epoch": 0.46443252281752945, "grad_norm": 0.6186270713806152, "learning_rate": 6.196138258861815e-06, "loss": 0.11416244506835938, "step": 3333 }, { "epoch": 0.4645718665087438, "grad_norm": 0.6859275698661804, "learning_rate": 6.193849775117709e-06, "loss": 0.11480712890625, "step": 3334 }, { "epoch": 0.4647112101999582, "grad_norm": 0.8217864632606506, "learning_rate": 6.191561026127444e-06, "loss": 0.12744140625, "step": 3335 }, { "epoch": 0.4648505538911726, "grad_norm": 0.8755031228065491, "learning_rate": 6.18927201239952e-06, "loss": 0.15411949157714844, "step": 3336 }, { "epoch": 0.46498989758238696, "grad_norm": 0.6672231554985046, "learning_rate": 6.186982734442505e-06, "loss": 0.13491439819335938, "step": 3337 }, { "epoch": 0.46512924127360133, "grad_norm": 1.150607705116272, "learning_rate": 6.184693192765028e-06, "loss": 0.15636062622070312, "step": 3338 }, { "epoch": 0.4652685849648157, "grad_norm": 0.8624992370605469, "learning_rate": 6.1824033878757685e-06, "loss": 0.12969970703125, "step": 3339 }, { "epoch": 0.4654079286560301, "grad_norm": 0.9408179521560669, "learning_rate": 6.180113320283473e-06, "loss": 0.15952682495117188, "step": 3340 }, { "epoch": 0.46554727234724447, "grad_norm": 1.3545525074005127, "learning_rate": 6.177822990496939e-06, "loss": 0.16499710083007812, "step": 3341 }, { "epoch": 0.46568661603845884, "grad_norm": 0.6280935406684875, "learning_rate": 6.175532399025027e-06, "loss": 0.14129638671875, "step": 3342 }, { "epoch": 0.4658259597296732, "grad_norm": 1.722334384918213, "learning_rate": 6.173241546376654e-06, "loss": 0.1662445068359375, "step": 3343 }, { "epoch": 0.4659653034208876, "grad_norm": 1.3048419952392578, "learning_rate": 6.170950433060795e-06, "loss": 0.18113327026367188, "step": 3344 }, { "epoch": 0.466104647112102, "grad_norm": 1.3762205839157104, "learning_rate": 6.168659059586483e-06, "loss": 0.120269775390625, "step": 3345 }, { "epoch": 0.46624399080331635, "grad_norm": 1.264370083808899, "learning_rate": 6.166367426462808e-06, "loss": 0.15022659301757812, "step": 3346 }, { "epoch": 0.46638333449453073, "grad_norm": 0.5386861562728882, "learning_rate": 6.16407553419892e-06, "loss": 0.10499954223632812, "step": 3347 }, { "epoch": 0.46652267818574517, "grad_norm": 1.3782645463943481, "learning_rate": 6.161783383304024e-06, "loss": 0.15895462036132812, "step": 3348 }, { "epoch": 0.46666202187695954, "grad_norm": 0.9608484506607056, "learning_rate": 6.159490974287386e-06, "loss": 0.14233779907226562, "step": 3349 }, { "epoch": 0.4668013655681739, "grad_norm": 0.7903417944908142, "learning_rate": 6.157198307658323e-06, "loss": 0.14992523193359375, "step": 3350 }, { "epoch": 0.4669407092593883, "grad_norm": 1.4951424598693848, "learning_rate": 6.154905383926218e-06, "loss": 0.16612625122070312, "step": 3351 }, { "epoch": 0.4670800529506027, "grad_norm": 1.3027969598770142, "learning_rate": 6.152612203600502e-06, "loss": 0.17424392700195312, "step": 3352 }, { "epoch": 0.46721939664181705, "grad_norm": 0.7341320514678955, "learning_rate": 6.150318767190668e-06, "loss": 0.12471389770507812, "step": 3353 }, { "epoch": 0.46735874033303143, "grad_norm": 1.1174734830856323, "learning_rate": 6.148025075206268e-06, "loss": 0.14556884765625, "step": 3354 }, { "epoch": 0.4674980840242458, "grad_norm": 0.7687016129493713, "learning_rate": 6.145731128156904e-06, "loss": 0.12185287475585938, "step": 3355 }, { "epoch": 0.4676374277154602, "grad_norm": 1.027671217918396, "learning_rate": 6.143436926552242e-06, "loss": 0.1307830810546875, "step": 3356 }, { "epoch": 0.46777677140667456, "grad_norm": 1.1423929929733276, "learning_rate": 6.141142470902001e-06, "loss": 0.16344451904296875, "step": 3357 }, { "epoch": 0.46791611509788894, "grad_norm": 0.7431107759475708, "learning_rate": 6.138847761715955e-06, "loss": 0.13931655883789062, "step": 3358 }, { "epoch": 0.4680554587891033, "grad_norm": 0.9178742170333862, "learning_rate": 6.1365527995039366e-06, "loss": 0.15263938903808594, "step": 3359 }, { "epoch": 0.4681948024803177, "grad_norm": 0.5758052468299866, "learning_rate": 6.134257584775833e-06, "loss": 0.10467147827148438, "step": 3360 }, { "epoch": 0.4683341461715321, "grad_norm": 1.3781651258468628, "learning_rate": 6.131962118041591e-06, "loss": 0.15504837036132812, "step": 3361 }, { "epoch": 0.46847348986274645, "grad_norm": 0.8728213906288147, "learning_rate": 6.129666399811209e-06, "loss": 0.11838150024414062, "step": 3362 }, { "epoch": 0.46861283355396083, "grad_norm": 1.1988152265548706, "learning_rate": 6.127370430594745e-06, "loss": 0.14705276489257812, "step": 3363 }, { "epoch": 0.4687521772451752, "grad_norm": 1.1974793672561646, "learning_rate": 6.125074210902307e-06, "loss": 0.13838577270507812, "step": 3364 }, { "epoch": 0.4688915209363896, "grad_norm": 2.3206703662872314, "learning_rate": 6.122777741244067e-06, "loss": 0.18764305114746094, "step": 3365 }, { "epoch": 0.46903086462760396, "grad_norm": 1.7933058738708496, "learning_rate": 6.120481022130245e-06, "loss": 0.17231369018554688, "step": 3366 }, { "epoch": 0.46917020831881834, "grad_norm": 0.8844132423400879, "learning_rate": 6.118184054071124e-06, "loss": 0.11388778686523438, "step": 3367 }, { "epoch": 0.46930955201003277, "grad_norm": 0.5816996097564697, "learning_rate": 6.115886837577031e-06, "loss": 0.10413742065429688, "step": 3368 }, { "epoch": 0.46944889570124715, "grad_norm": 1.165748119354248, "learning_rate": 6.113589373158361e-06, "loss": 0.15077590942382812, "step": 3369 }, { "epoch": 0.4695882393924615, "grad_norm": 2.303232192993164, "learning_rate": 6.111291661325556e-06, "loss": 0.17251968383789062, "step": 3370 }, { "epoch": 0.4697275830836759, "grad_norm": 1.2599143981933594, "learning_rate": 6.108993702589114e-06, "loss": 0.13814163208007812, "step": 3371 }, { "epoch": 0.4698669267748903, "grad_norm": 0.6670021414756775, "learning_rate": 6.106695497459591e-06, "loss": 0.12320327758789062, "step": 3372 }, { "epoch": 0.47000627046610466, "grad_norm": 1.060646891593933, "learning_rate": 6.104397046447593e-06, "loss": 0.13551712036132812, "step": 3373 }, { "epoch": 0.47014561415731904, "grad_norm": 0.8276228904724121, "learning_rate": 6.102098350063786e-06, "loss": 0.13782119750976562, "step": 3374 }, { "epoch": 0.4702849578485334, "grad_norm": 0.7578611969947815, "learning_rate": 6.099799408818889e-06, "loss": 0.13306427001953125, "step": 3375 }, { "epoch": 0.4704243015397478, "grad_norm": 0.8664342164993286, "learning_rate": 6.097500223223669e-06, "loss": 0.1433563232421875, "step": 3376 }, { "epoch": 0.47056364523096217, "grad_norm": 0.6529463529586792, "learning_rate": 6.095200793788958e-06, "loss": 0.1282806396484375, "step": 3377 }, { "epoch": 0.47070298892217655, "grad_norm": 0.8277193903923035, "learning_rate": 6.092901121025634e-06, "loss": 0.11746597290039062, "step": 3378 }, { "epoch": 0.4708423326133909, "grad_norm": 1.1421712636947632, "learning_rate": 6.090601205444632e-06, "loss": 0.15664291381835938, "step": 3379 }, { "epoch": 0.4709816763046053, "grad_norm": 1.403314232826233, "learning_rate": 6.088301047556942e-06, "loss": 0.18731307983398438, "step": 3380 }, { "epoch": 0.4711210199958197, "grad_norm": 1.106423258781433, "learning_rate": 6.086000647873604e-06, "loss": 0.1535491943359375, "step": 3381 }, { "epoch": 0.47126036368703406, "grad_norm": 0.6319853067398071, "learning_rate": 6.083700006905715e-06, "loss": 0.12961959838867188, "step": 3382 }, { "epoch": 0.47139970737824843, "grad_norm": 0.7462827563285828, "learning_rate": 6.081399125164429e-06, "loss": 0.11949920654296875, "step": 3383 }, { "epoch": 0.4715390510694628, "grad_norm": 0.9029961824417114, "learning_rate": 6.079098003160943e-06, "loss": 0.15319442749023438, "step": 3384 }, { "epoch": 0.4716783947606772, "grad_norm": 0.5343156456947327, "learning_rate": 6.076796641406518e-06, "loss": 0.12064743041992188, "step": 3385 }, { "epoch": 0.47181773845189157, "grad_norm": 1.766312599182129, "learning_rate": 6.074495040412465e-06, "loss": 0.17968082427978516, "step": 3386 }, { "epoch": 0.47195708214310594, "grad_norm": 1.0163748264312744, "learning_rate": 6.072193200690142e-06, "loss": 0.15020751953125, "step": 3387 }, { "epoch": 0.4720964258343204, "grad_norm": 0.6171576976776123, "learning_rate": 6.069891122750971e-06, "loss": 0.12412643432617188, "step": 3388 }, { "epoch": 0.47223576952553475, "grad_norm": 1.1781768798828125, "learning_rate": 6.067588807106416e-06, "loss": 0.15238571166992188, "step": 3389 }, { "epoch": 0.47237511321674913, "grad_norm": 0.8684206008911133, "learning_rate": 6.0652862542680034e-06, "loss": 0.15916061401367188, "step": 3390 }, { "epoch": 0.4725144569079635, "grad_norm": 1.163199543952942, "learning_rate": 6.062983464747305e-06, "loss": 0.18143081665039062, "step": 3391 }, { "epoch": 0.4726538005991779, "grad_norm": 0.7975446581840515, "learning_rate": 6.06068043905595e-06, "loss": 0.11161422729492188, "step": 3392 }, { "epoch": 0.47279314429039226, "grad_norm": 0.9965944290161133, "learning_rate": 6.0583771777056166e-06, "loss": 0.14591598510742188, "step": 3393 }, { "epoch": 0.47293248798160664, "grad_norm": 0.8767224550247192, "learning_rate": 6.056073681208038e-06, "loss": 0.14023971557617188, "step": 3394 }, { "epoch": 0.473071831672821, "grad_norm": 0.9994769096374512, "learning_rate": 6.053769950074997e-06, "loss": 0.13540267944335938, "step": 3395 }, { "epoch": 0.4732111753640354, "grad_norm": 0.7787732481956482, "learning_rate": 6.051465984818332e-06, "loss": 0.12374114990234375, "step": 3396 }, { "epoch": 0.4733505190552498, "grad_norm": 0.7865322828292847, "learning_rate": 6.049161785949931e-06, "loss": 0.13711166381835938, "step": 3397 }, { "epoch": 0.47348986274646415, "grad_norm": 0.776082456111908, "learning_rate": 6.046857353981732e-06, "loss": 0.14389801025390625, "step": 3398 }, { "epoch": 0.47362920643767853, "grad_norm": 1.2143646478652954, "learning_rate": 6.044552689425731e-06, "loss": 0.15063858032226562, "step": 3399 }, { "epoch": 0.4737685501288929, "grad_norm": 1.565767765045166, "learning_rate": 6.042247792793968e-06, "loss": 0.22271728515625, "step": 3400 }, { "epoch": 0.4739078938201073, "grad_norm": 0.9097768068313599, "learning_rate": 6.0399426645985424e-06, "loss": 0.1212310791015625, "step": 3401 }, { "epoch": 0.47404723751132166, "grad_norm": 1.3312000036239624, "learning_rate": 6.037637305351599e-06, "loss": 0.15643310546875, "step": 3402 }, { "epoch": 0.47418658120253604, "grad_norm": 2.628983736038208, "learning_rate": 6.035331715565333e-06, "loss": 0.2063121795654297, "step": 3403 }, { "epoch": 0.4743259248937504, "grad_norm": 0.9598926901817322, "learning_rate": 6.033025895752002e-06, "loss": 0.14552688598632812, "step": 3404 }, { "epoch": 0.4744652685849648, "grad_norm": 0.6489072442054749, "learning_rate": 6.030719846423897e-06, "loss": 0.12591171264648438, "step": 3405 }, { "epoch": 0.47460461227617917, "grad_norm": 0.8160454630851746, "learning_rate": 6.028413568093375e-06, "loss": 0.12870407104492188, "step": 3406 }, { "epoch": 0.47474395596739355, "grad_norm": 1.8629494905471802, "learning_rate": 6.026107061272838e-06, "loss": 0.17826461791992188, "step": 3407 }, { "epoch": 0.474883299658608, "grad_norm": 0.7158263325691223, "learning_rate": 6.023800326474738e-06, "loss": 0.1275959014892578, "step": 3408 }, { "epoch": 0.47502264334982236, "grad_norm": 0.8030351400375366, "learning_rate": 6.0214933642115794e-06, "loss": 0.1371002197265625, "step": 3409 }, { "epoch": 0.47516198704103674, "grad_norm": 0.4932692348957062, "learning_rate": 6.019186174995916e-06, "loss": 0.11648178100585938, "step": 3410 }, { "epoch": 0.4753013307322511, "grad_norm": 0.8618855476379395, "learning_rate": 6.016878759340352e-06, "loss": 0.16124343872070312, "step": 3411 }, { "epoch": 0.4754406744234655, "grad_norm": 0.8537644147872925, "learning_rate": 6.014571117757545e-06, "loss": 0.135040283203125, "step": 3412 }, { "epoch": 0.47558001811467987, "grad_norm": 0.9189814925193787, "learning_rate": 6.012263250760199e-06, "loss": 0.14858245849609375, "step": 3413 }, { "epoch": 0.47571936180589425, "grad_norm": 1.6854392290115356, "learning_rate": 6.009955158861066e-06, "loss": 0.1765899658203125, "step": 3414 }, { "epoch": 0.4758587054971086, "grad_norm": 0.7888510227203369, "learning_rate": 6.007646842572959e-06, "loss": 0.14144515991210938, "step": 3415 }, { "epoch": 0.475998049188323, "grad_norm": 0.8629212975502014, "learning_rate": 6.005338302408724e-06, "loss": 0.14170074462890625, "step": 3416 }, { "epoch": 0.4761373928795374, "grad_norm": 0.7058839797973633, "learning_rate": 6.0030295388812736e-06, "loss": 0.15611648559570312, "step": 3417 }, { "epoch": 0.47627673657075176, "grad_norm": 0.9793515801429749, "learning_rate": 6.000720552503557e-06, "loss": 0.12653732299804688, "step": 3418 }, { "epoch": 0.47641608026196614, "grad_norm": 0.990776002407074, "learning_rate": 5.998411343788582e-06, "loss": 0.1633148193359375, "step": 3419 }, { "epoch": 0.4765554239531805, "grad_norm": 0.6015273332595825, "learning_rate": 5.996101913249402e-06, "loss": 0.1024017333984375, "step": 3420 }, { "epoch": 0.4766947676443949, "grad_norm": 0.5879206657409668, "learning_rate": 5.993792261399115e-06, "loss": 0.09792327880859375, "step": 3421 }, { "epoch": 0.47683411133560927, "grad_norm": 1.4933048486709595, "learning_rate": 5.991482388750878e-06, "loss": 0.14215087890625, "step": 3422 }, { "epoch": 0.47697345502682365, "grad_norm": 1.1216577291488647, "learning_rate": 5.989172295817889e-06, "loss": 0.14872169494628906, "step": 3423 }, { "epoch": 0.477112798718038, "grad_norm": 1.2204562425613403, "learning_rate": 5.9868619831134e-06, "loss": 0.18094635009765625, "step": 3424 }, { "epoch": 0.4772521424092524, "grad_norm": 1.3224213123321533, "learning_rate": 5.984551451150709e-06, "loss": 0.1286487579345703, "step": 3425 }, { "epoch": 0.4773914861004668, "grad_norm": 0.610492467880249, "learning_rate": 5.9822407004431625e-06, "loss": 0.11992454528808594, "step": 3426 }, { "epoch": 0.47753082979168116, "grad_norm": 1.2239934206008911, "learning_rate": 5.979929731504158e-06, "loss": 0.160552978515625, "step": 3427 }, { "epoch": 0.4776701734828956, "grad_norm": 1.6699293851852417, "learning_rate": 5.977618544847139e-06, "loss": 0.22795486450195312, "step": 3428 }, { "epoch": 0.47780951717410997, "grad_norm": 1.037121295928955, "learning_rate": 5.975307140985599e-06, "loss": 0.13529205322265625, "step": 3429 }, { "epoch": 0.47794886086532434, "grad_norm": 0.8060754537582397, "learning_rate": 5.972995520433078e-06, "loss": 0.15157699584960938, "step": 3430 }, { "epoch": 0.4780882045565387, "grad_norm": 0.9123064875602722, "learning_rate": 5.970683683703168e-06, "loss": 0.14660263061523438, "step": 3431 }, { "epoch": 0.4782275482477531, "grad_norm": 0.4275446832180023, "learning_rate": 5.968371631309502e-06, "loss": 0.10441207885742188, "step": 3432 }, { "epoch": 0.4783668919389675, "grad_norm": 1.0081253051757812, "learning_rate": 5.966059363765771e-06, "loss": 0.17502212524414062, "step": 3433 }, { "epoch": 0.47850623563018185, "grad_norm": 0.7659911513328552, "learning_rate": 5.9637468815857016e-06, "loss": 0.13620376586914062, "step": 3434 }, { "epoch": 0.47864557932139623, "grad_norm": 1.0184837579727173, "learning_rate": 5.961434185283079e-06, "loss": 0.11273002624511719, "step": 3435 }, { "epoch": 0.4787849230126106, "grad_norm": 1.4996929168701172, "learning_rate": 5.959121275371732e-06, "loss": 0.14322662353515625, "step": 3436 }, { "epoch": 0.478924266703825, "grad_norm": 1.429459571838379, "learning_rate": 5.956808152365532e-06, "loss": 0.167633056640625, "step": 3437 }, { "epoch": 0.47906361039503936, "grad_norm": 1.078452229499817, "learning_rate": 5.954494816778408e-06, "loss": 0.16040420532226562, "step": 3438 }, { "epoch": 0.47920295408625374, "grad_norm": 1.528756856918335, "learning_rate": 5.952181269124324e-06, "loss": 0.16242027282714844, "step": 3439 }, { "epoch": 0.4793422977774681, "grad_norm": 1.6432656049728394, "learning_rate": 5.949867509917303e-06, "loss": 0.17006301879882812, "step": 3440 }, { "epoch": 0.4794816414686825, "grad_norm": 1.3685152530670166, "learning_rate": 5.9475535396714055e-06, "loss": 0.13356399536132812, "step": 3441 }, { "epoch": 0.4796209851598969, "grad_norm": 0.5676911473274231, "learning_rate": 5.945239358900746e-06, "loss": 0.11869049072265625, "step": 3442 }, { "epoch": 0.47976032885111125, "grad_norm": 0.9194523096084595, "learning_rate": 5.94292496811948e-06, "loss": 0.1524505615234375, "step": 3443 }, { "epoch": 0.47989967254232563, "grad_norm": 0.8642321825027466, "learning_rate": 5.940610367841815e-06, "loss": 0.13727188110351562, "step": 3444 }, { "epoch": 0.48003901623354, "grad_norm": 1.080061435699463, "learning_rate": 5.938295558581999e-06, "loss": 0.14896011352539062, "step": 3445 }, { "epoch": 0.4801783599247544, "grad_norm": 1.1080776453018188, "learning_rate": 5.935980540854332e-06, "loss": 0.1768035888671875, "step": 3446 }, { "epoch": 0.48031770361596876, "grad_norm": 0.6759480237960815, "learning_rate": 5.933665315173158e-06, "loss": 0.11757278442382812, "step": 3447 }, { "epoch": 0.4804570473071832, "grad_norm": 1.297598123550415, "learning_rate": 5.931349882052866e-06, "loss": 0.12774276733398438, "step": 3448 }, { "epoch": 0.48059639099839757, "grad_norm": 1.229778528213501, "learning_rate": 5.929034242007895e-06, "loss": 0.15758895874023438, "step": 3449 }, { "epoch": 0.48073573468961195, "grad_norm": 1.4302849769592285, "learning_rate": 5.926718395552723e-06, "loss": 0.151092529296875, "step": 3450 }, { "epoch": 0.4808750783808263, "grad_norm": 1.1238927841186523, "learning_rate": 5.924402343201883e-06, "loss": 0.13888931274414062, "step": 3451 }, { "epoch": 0.4810144220720407, "grad_norm": 1.0430420637130737, "learning_rate": 5.922086085469947e-06, "loss": 0.14832687377929688, "step": 3452 }, { "epoch": 0.4811537657632551, "grad_norm": 0.5666401982307434, "learning_rate": 5.919769622871533e-06, "loss": 0.12613296508789062, "step": 3453 }, { "epoch": 0.48129310945446946, "grad_norm": 0.943243682384491, "learning_rate": 5.917452955921309e-06, "loss": 0.13372039794921875, "step": 3454 }, { "epoch": 0.48143245314568384, "grad_norm": 1.3219780921936035, "learning_rate": 5.915136085133983e-06, "loss": 0.203033447265625, "step": 3455 }, { "epoch": 0.4815717968368982, "grad_norm": 1.1531553268432617, "learning_rate": 5.9128190110243115e-06, "loss": 0.16293716430664062, "step": 3456 }, { "epoch": 0.4817111405281126, "grad_norm": 0.6362953782081604, "learning_rate": 5.910501734107097e-06, "loss": 0.12117767333984375, "step": 3457 }, { "epoch": 0.48185048421932697, "grad_norm": 1.0795484781265259, "learning_rate": 5.908184254897183e-06, "loss": 0.1158905029296875, "step": 3458 }, { "epoch": 0.48198982791054135, "grad_norm": 1.4421778917312622, "learning_rate": 5.905866573909462e-06, "loss": 0.15533828735351562, "step": 3459 }, { "epoch": 0.4821291716017557, "grad_norm": 1.422773838043213, "learning_rate": 5.9035486916588705e-06, "loss": 0.142822265625, "step": 3460 }, { "epoch": 0.4822685152929701, "grad_norm": 1.1631426811218262, "learning_rate": 5.901230608660386e-06, "loss": 0.16161346435546875, "step": 3461 }, { "epoch": 0.4824078589841845, "grad_norm": 1.0024940967559814, "learning_rate": 5.898912325429038e-06, "loss": 0.12972640991210938, "step": 3462 }, { "epoch": 0.48254720267539886, "grad_norm": 0.8303297162055969, "learning_rate": 5.896593842479893e-06, "loss": 0.14449691772460938, "step": 3463 }, { "epoch": 0.48268654636661323, "grad_norm": 0.8458378911018372, "learning_rate": 5.8942751603280645e-06, "loss": 0.13980484008789062, "step": 3464 }, { "epoch": 0.4828258900578276, "grad_norm": 0.7288020849227905, "learning_rate": 5.891956279488715e-06, "loss": 0.11701393127441406, "step": 3465 }, { "epoch": 0.482965233749042, "grad_norm": 0.7407890558242798, "learning_rate": 5.889637200477041e-06, "loss": 0.1404876708984375, "step": 3466 }, { "epoch": 0.48310457744025637, "grad_norm": 1.3550529479980469, "learning_rate": 5.887317923808294e-06, "loss": 0.15829849243164062, "step": 3467 }, { "epoch": 0.4832439211314708, "grad_norm": 0.8350240588188171, "learning_rate": 5.88499844999776e-06, "loss": 0.1308002471923828, "step": 3468 }, { "epoch": 0.4833832648226852, "grad_norm": 0.8779287934303284, "learning_rate": 5.882678779560776e-06, "loss": 0.1440582275390625, "step": 3469 }, { "epoch": 0.48352260851389955, "grad_norm": 1.2696044445037842, "learning_rate": 5.880358913012722e-06, "loss": 0.14705657958984375, "step": 3470 }, { "epoch": 0.48366195220511393, "grad_norm": 1.0532021522521973, "learning_rate": 5.878038850869012e-06, "loss": 0.13150787353515625, "step": 3471 }, { "epoch": 0.4838012958963283, "grad_norm": 1.0605307817459106, "learning_rate": 5.875718593645118e-06, "loss": 0.13648223876953125, "step": 3472 }, { "epoch": 0.4839406395875427, "grad_norm": 1.0079752206802368, "learning_rate": 5.873398141856545e-06, "loss": 0.12808990478515625, "step": 3473 }, { "epoch": 0.48407998327875706, "grad_norm": 1.2430907487869263, "learning_rate": 5.871077496018844e-06, "loss": 0.1565093994140625, "step": 3474 }, { "epoch": 0.48421932696997144, "grad_norm": 0.6670424342155457, "learning_rate": 5.868756656647611e-06, "loss": 0.1363372802734375, "step": 3475 }, { "epoch": 0.4843586706611858, "grad_norm": 0.9999164342880249, "learning_rate": 5.866435624258483e-06, "loss": 0.16246604919433594, "step": 3476 }, { "epoch": 0.4844980143524002, "grad_norm": 1.0457736253738403, "learning_rate": 5.86411439936714e-06, "loss": 0.14080047607421875, "step": 3477 }, { "epoch": 0.4846373580436146, "grad_norm": 0.678146481513977, "learning_rate": 5.861792982489306e-06, "loss": 0.1156768798828125, "step": 3478 }, { "epoch": 0.48477670173482895, "grad_norm": 1.142720341682434, "learning_rate": 5.8594713741407465e-06, "loss": 0.17894744873046875, "step": 3479 }, { "epoch": 0.48491604542604333, "grad_norm": 1.0797683000564575, "learning_rate": 5.857149574837269e-06, "loss": 0.15456008911132812, "step": 3480 }, { "epoch": 0.4850553891172577, "grad_norm": 0.8107438683509827, "learning_rate": 5.854827585094725e-06, "loss": 0.144134521484375, "step": 3481 }, { "epoch": 0.4851947328084721, "grad_norm": 0.71903395652771, "learning_rate": 5.852505405429007e-06, "loss": 0.12847137451171875, "step": 3482 }, { "epoch": 0.48533407649968646, "grad_norm": 1.290354609489441, "learning_rate": 5.850183036356054e-06, "loss": 0.15744400024414062, "step": 3483 }, { "epoch": 0.48547342019090084, "grad_norm": 1.0171198844909668, "learning_rate": 5.847860478391838e-06, "loss": 0.16242218017578125, "step": 3484 }, { "epoch": 0.4856127638821152, "grad_norm": 0.7440661191940308, "learning_rate": 5.845537732052381e-06, "loss": 0.10387039184570312, "step": 3485 }, { "epoch": 0.4857521075733296, "grad_norm": 2.5050244331359863, "learning_rate": 5.8432147978537444e-06, "loss": 0.22522735595703125, "step": 3486 }, { "epoch": 0.48589145126454397, "grad_norm": 0.7893975973129272, "learning_rate": 5.840891676312029e-06, "loss": 0.1317291259765625, "step": 3487 }, { "epoch": 0.4860307949557584, "grad_norm": 0.8176038265228271, "learning_rate": 5.838568367943383e-06, "loss": 0.14431381225585938, "step": 3488 }, { "epoch": 0.4861701386469728, "grad_norm": 0.5684943199157715, "learning_rate": 5.836244873263989e-06, "loss": 0.121673583984375, "step": 3489 }, { "epoch": 0.48630948233818716, "grad_norm": 0.7604247331619263, "learning_rate": 5.8339211927900776e-06, "loss": 0.11657333374023438, "step": 3490 }, { "epoch": 0.48644882602940154, "grad_norm": 1.3010108470916748, "learning_rate": 5.831597327037914e-06, "loss": 0.12993240356445312, "step": 3491 }, { "epoch": 0.4865881697206159, "grad_norm": 1.3662097454071045, "learning_rate": 5.829273276523811e-06, "loss": 0.17570877075195312, "step": 3492 }, { "epoch": 0.4867275134118303, "grad_norm": 0.8584035038948059, "learning_rate": 5.82694904176412e-06, "loss": 0.13362884521484375, "step": 3493 }, { "epoch": 0.48686685710304467, "grad_norm": 0.8249996900558472, "learning_rate": 5.82462462327523e-06, "loss": 0.13393402099609375, "step": 3494 }, { "epoch": 0.48700620079425905, "grad_norm": 1.3576210737228394, "learning_rate": 5.822300021573574e-06, "loss": 0.16751861572265625, "step": 3495 }, { "epoch": 0.4871455444854734, "grad_norm": 1.1102159023284912, "learning_rate": 5.819975237175629e-06, "loss": 0.16481399536132812, "step": 3496 }, { "epoch": 0.4872848881766878, "grad_norm": 0.796377420425415, "learning_rate": 5.817650270597906e-06, "loss": 0.13370132446289062, "step": 3497 }, { "epoch": 0.4874242318679022, "grad_norm": 1.0047073364257812, "learning_rate": 5.815325122356959e-06, "loss": 0.15078163146972656, "step": 3498 }, { "epoch": 0.48756357555911656, "grad_norm": 1.2458925247192383, "learning_rate": 5.8129997929693845e-06, "loss": 0.19742202758789062, "step": 3499 }, { "epoch": 0.48770291925033094, "grad_norm": 0.9102441668510437, "learning_rate": 5.810674282951817e-06, "loss": 0.1352386474609375, "step": 3500 }, { "epoch": 0.4878422629415453, "grad_norm": 0.9966127872467041, "learning_rate": 5.808348592820932e-06, "loss": 0.14517784118652344, "step": 3501 }, { "epoch": 0.4879816066327597, "grad_norm": 0.8764151930809021, "learning_rate": 5.806022723093445e-06, "loss": 0.13742446899414062, "step": 3502 }, { "epoch": 0.48812095032397407, "grad_norm": 0.9552504420280457, "learning_rate": 5.80369667428611e-06, "loss": 0.128082275390625, "step": 3503 }, { "epoch": 0.48826029401518845, "grad_norm": 1.1559290885925293, "learning_rate": 5.801370446915724e-06, "loss": 0.160614013671875, "step": 3504 }, { "epoch": 0.4883996377064028, "grad_norm": 0.5873827338218689, "learning_rate": 5.799044041499119e-06, "loss": 0.12644577026367188, "step": 3505 }, { "epoch": 0.4885389813976172, "grad_norm": 1.080858588218689, "learning_rate": 5.7967174585531705e-06, "loss": 0.14876556396484375, "step": 3506 }, { "epoch": 0.4886783250888316, "grad_norm": 0.9751526713371277, "learning_rate": 5.794390698594793e-06, "loss": 0.15117835998535156, "step": 3507 }, { "epoch": 0.488817668780046, "grad_norm": 0.9677221179008484, "learning_rate": 5.792063762140938e-06, "loss": 0.15045547485351562, "step": 3508 }, { "epoch": 0.4889570124712604, "grad_norm": 1.190293312072754, "learning_rate": 5.789736649708598e-06, "loss": 0.15853500366210938, "step": 3509 }, { "epoch": 0.48909635616247477, "grad_norm": 1.0849452018737793, "learning_rate": 5.787409361814805e-06, "loss": 0.15418243408203125, "step": 3510 }, { "epoch": 0.48923569985368914, "grad_norm": 0.6734659075737, "learning_rate": 5.785081898976627e-06, "loss": 0.1194610595703125, "step": 3511 }, { "epoch": 0.4893750435449035, "grad_norm": 0.7893627285957336, "learning_rate": 5.782754261711177e-06, "loss": 0.15323257446289062, "step": 3512 }, { "epoch": 0.4895143872361179, "grad_norm": 0.8723308444023132, "learning_rate": 5.7804264505356e-06, "loss": 0.15033340454101562, "step": 3513 }, { "epoch": 0.4896537309273323, "grad_norm": 1.4477735757827759, "learning_rate": 5.778098465967082e-06, "loss": 0.15753936767578125, "step": 3514 }, { "epoch": 0.48979307461854665, "grad_norm": 0.9142647981643677, "learning_rate": 5.7757703085228515e-06, "loss": 0.1442394256591797, "step": 3515 }, { "epoch": 0.48993241830976103, "grad_norm": 0.8115500807762146, "learning_rate": 5.773441978720167e-06, "loss": 0.14615631103515625, "step": 3516 }, { "epoch": 0.4900717620009754, "grad_norm": 1.1214823722839355, "learning_rate": 5.771113477076335e-06, "loss": 0.1535053253173828, "step": 3517 }, { "epoch": 0.4902111056921898, "grad_norm": 1.550154685974121, "learning_rate": 5.7687848041086905e-06, "loss": 0.1966705322265625, "step": 3518 }, { "epoch": 0.49035044938340416, "grad_norm": 1.0209647417068481, "learning_rate": 5.766455960334616e-06, "loss": 0.1292724609375, "step": 3519 }, { "epoch": 0.49048979307461854, "grad_norm": 0.7322020530700684, "learning_rate": 5.764126946271526e-06, "loss": 0.10455322265625, "step": 3520 }, { "epoch": 0.4906291367658329, "grad_norm": 0.6679695248603821, "learning_rate": 5.761797762436872e-06, "loss": 0.10901641845703125, "step": 3521 }, { "epoch": 0.4907684804570473, "grad_norm": 0.7313768863677979, "learning_rate": 5.759468409348149e-06, "loss": 0.11506843566894531, "step": 3522 }, { "epoch": 0.4909078241482617, "grad_norm": 0.7164704203605652, "learning_rate": 5.757138887522884e-06, "loss": 0.11224365234375, "step": 3523 }, { "epoch": 0.49104716783947605, "grad_norm": 1.084845781326294, "learning_rate": 5.754809197478644e-06, "loss": 0.1496906280517578, "step": 3524 }, { "epoch": 0.49118651153069043, "grad_norm": 1.6172791719436646, "learning_rate": 5.752479339733033e-06, "loss": 0.18149566650390625, "step": 3525 }, { "epoch": 0.4913258552219048, "grad_norm": 1.1293226480484009, "learning_rate": 5.750149314803691e-06, "loss": 0.1860504150390625, "step": 3526 }, { "epoch": 0.4914651989131192, "grad_norm": 1.0251826047897339, "learning_rate": 5.747819123208299e-06, "loss": 0.15935707092285156, "step": 3527 }, { "epoch": 0.4916045426043336, "grad_norm": 0.9970347881317139, "learning_rate": 5.7454887654645706e-06, "loss": 0.15193557739257812, "step": 3528 }, { "epoch": 0.491743886295548, "grad_norm": 1.3164129257202148, "learning_rate": 5.7431582420902576e-06, "loss": 0.16370582580566406, "step": 3529 }, { "epoch": 0.49188322998676237, "grad_norm": 0.8602709770202637, "learning_rate": 5.740827553603149e-06, "loss": 0.1411571502685547, "step": 3530 }, { "epoch": 0.49202257367797675, "grad_norm": 0.6497316956520081, "learning_rate": 5.738496700521073e-06, "loss": 0.11924362182617188, "step": 3531 }, { "epoch": 0.4921619173691911, "grad_norm": 1.265792965888977, "learning_rate": 5.736165683361889e-06, "loss": 0.1459503173828125, "step": 3532 }, { "epoch": 0.4923012610604055, "grad_norm": 0.790892481803894, "learning_rate": 5.7338345026434995e-06, "loss": 0.13026046752929688, "step": 3533 }, { "epoch": 0.4924406047516199, "grad_norm": 0.8262191414833069, "learning_rate": 5.731503158883835e-06, "loss": 0.14179229736328125, "step": 3534 }, { "epoch": 0.49257994844283426, "grad_norm": 1.1290627717971802, "learning_rate": 5.729171652600869e-06, "loss": 0.1488189697265625, "step": 3535 }, { "epoch": 0.49271929213404864, "grad_norm": 0.7670531868934631, "learning_rate": 5.726839984312611e-06, "loss": 0.13058090209960938, "step": 3536 }, { "epoch": 0.492858635825263, "grad_norm": 0.7672622799873352, "learning_rate": 5.724508154537101e-06, "loss": 0.11452102661132812, "step": 3537 }, { "epoch": 0.4929979795164774, "grad_norm": 0.813879668712616, "learning_rate": 5.72217616379242e-06, "loss": 0.15104103088378906, "step": 3538 }, { "epoch": 0.49313732320769177, "grad_norm": 0.5396586656570435, "learning_rate": 5.719844012596683e-06, "loss": 0.10569381713867188, "step": 3539 }, { "epoch": 0.49327666689890615, "grad_norm": 0.855061948299408, "learning_rate": 5.7175117014680415e-06, "loss": 0.13691329956054688, "step": 3540 }, { "epoch": 0.4934160105901205, "grad_norm": 2.159332752227783, "learning_rate": 5.71517923092468e-06, "loss": 0.1869354248046875, "step": 3541 }, { "epoch": 0.4935553542813349, "grad_norm": 1.9775135517120361, "learning_rate": 5.712846601484822e-06, "loss": 0.1998138427734375, "step": 3542 }, { "epoch": 0.4936946979725493, "grad_norm": 0.7522145509719849, "learning_rate": 5.710513813666722e-06, "loss": 0.130706787109375, "step": 3543 }, { "epoch": 0.49383404166376366, "grad_norm": 1.0224158763885498, "learning_rate": 5.708180867988676e-06, "loss": 0.14947509765625, "step": 3544 }, { "epoch": 0.49397338535497803, "grad_norm": 0.6559562683105469, "learning_rate": 5.705847764969008e-06, "loss": 0.12554550170898438, "step": 3545 }, { "epoch": 0.4941127290461924, "grad_norm": 1.4759045839309692, "learning_rate": 5.703514505126081e-06, "loss": 0.17914199829101562, "step": 3546 }, { "epoch": 0.4942520727374068, "grad_norm": 1.6103150844573975, "learning_rate": 5.701181088978295e-06, "loss": 0.1407470703125, "step": 3547 }, { "epoch": 0.49439141642862117, "grad_norm": 1.0723553895950317, "learning_rate": 5.698847517044076e-06, "loss": 0.157745361328125, "step": 3548 }, { "epoch": 0.4945307601198356, "grad_norm": 1.2800883054733276, "learning_rate": 5.696513789841897e-06, "loss": 0.15999603271484375, "step": 3549 }, { "epoch": 0.49467010381105, "grad_norm": 0.8486586809158325, "learning_rate": 5.6941799078902525e-06, "loss": 0.13788986206054688, "step": 3550 }, { "epoch": 0.49480944750226435, "grad_norm": 1.2356327772140503, "learning_rate": 5.691845871707682e-06, "loss": 0.15718460083007812, "step": 3551 }, { "epoch": 0.49494879119347873, "grad_norm": 0.8076690435409546, "learning_rate": 5.689511681812755e-06, "loss": 0.13457870483398438, "step": 3552 }, { "epoch": 0.4950881348846931, "grad_norm": 0.8115559816360474, "learning_rate": 5.687177338724073e-06, "loss": 0.13360977172851562, "step": 3553 }, { "epoch": 0.4952274785759075, "grad_norm": 1.5692691802978516, "learning_rate": 5.684842842960276e-06, "loss": 0.21114730834960938, "step": 3554 }, { "epoch": 0.49536682226712186, "grad_norm": 1.1497654914855957, "learning_rate": 5.682508195040032e-06, "loss": 0.2050933837890625, "step": 3555 }, { "epoch": 0.49550616595833624, "grad_norm": 1.2764790058135986, "learning_rate": 5.68017339548205e-06, "loss": 0.19164276123046875, "step": 3556 }, { "epoch": 0.4956455096495506, "grad_norm": 0.8835187554359436, "learning_rate": 5.6778384448050694e-06, "loss": 0.15208053588867188, "step": 3557 }, { "epoch": 0.495784853340765, "grad_norm": 1.2897984981536865, "learning_rate": 5.675503343527861e-06, "loss": 0.1566009521484375, "step": 3558 }, { "epoch": 0.4959241970319794, "grad_norm": 0.9627783894538879, "learning_rate": 5.673168092169231e-06, "loss": 0.1285572052001953, "step": 3559 }, { "epoch": 0.49606354072319375, "grad_norm": 0.712152361869812, "learning_rate": 5.670832691248021e-06, "loss": 0.1317901611328125, "step": 3560 }, { "epoch": 0.49620288441440813, "grad_norm": 1.0009702444076538, "learning_rate": 5.668497141283101e-06, "loss": 0.11441612243652344, "step": 3561 }, { "epoch": 0.4963422281056225, "grad_norm": 1.4079740047454834, "learning_rate": 5.66616144279338e-06, "loss": 0.138824462890625, "step": 3562 }, { "epoch": 0.4964815717968369, "grad_norm": 0.9989672303199768, "learning_rate": 5.663825596297794e-06, "loss": 0.14476776123046875, "step": 3563 }, { "epoch": 0.49662091548805126, "grad_norm": 1.3411760330200195, "learning_rate": 5.661489602315314e-06, "loss": 0.15620994567871094, "step": 3564 }, { "epoch": 0.49676025917926564, "grad_norm": 0.5648705959320068, "learning_rate": 5.6591534613649505e-06, "loss": 0.10906791687011719, "step": 3565 }, { "epoch": 0.49689960287048, "grad_norm": 0.7879400849342346, "learning_rate": 5.656817173965733e-06, "loss": 0.12831497192382812, "step": 3566 }, { "epoch": 0.4970389465616944, "grad_norm": 0.537635087966919, "learning_rate": 5.6544807406367365e-06, "loss": 0.11034011840820312, "step": 3567 }, { "epoch": 0.49717829025290877, "grad_norm": 0.9260613322257996, "learning_rate": 5.6521441618970605e-06, "loss": 0.13896942138671875, "step": 3568 }, { "epoch": 0.4973176339441232, "grad_norm": 1.502480387687683, "learning_rate": 5.649807438265842e-06, "loss": 0.156585693359375, "step": 3569 }, { "epoch": 0.4974569776353376, "grad_norm": 1.006874918937683, "learning_rate": 5.647470570262246e-06, "loss": 0.15076065063476562, "step": 3570 }, { "epoch": 0.49759632132655196, "grad_norm": 0.8575005531311035, "learning_rate": 5.64513355840547e-06, "loss": 0.12815475463867188, "step": 3571 }, { "epoch": 0.49773566501776634, "grad_norm": 0.8033887147903442, "learning_rate": 5.642796403214747e-06, "loss": 0.1199493408203125, "step": 3572 }, { "epoch": 0.4978750087089807, "grad_norm": 1.0932343006134033, "learning_rate": 5.640459105209337e-06, "loss": 0.11975479125976562, "step": 3573 }, { "epoch": 0.4980143524001951, "grad_norm": 1.1436423063278198, "learning_rate": 5.638121664908537e-06, "loss": 0.16252517700195312, "step": 3574 }, { "epoch": 0.49815369609140947, "grad_norm": 0.9769996404647827, "learning_rate": 5.635784082831671e-06, "loss": 0.14830780029296875, "step": 3575 }, { "epoch": 0.49829303978262385, "grad_norm": 0.8887362480163574, "learning_rate": 5.633446359498098e-06, "loss": 0.139617919921875, "step": 3576 }, { "epoch": 0.4984323834738382, "grad_norm": 1.5638188123703003, "learning_rate": 5.6311084954272055e-06, "loss": 0.168060302734375, "step": 3577 }, { "epoch": 0.4985717271650526, "grad_norm": 0.6877388954162598, "learning_rate": 5.628770491138414e-06, "loss": 0.1306915283203125, "step": 3578 }, { "epoch": 0.498711070856267, "grad_norm": 1.4659889936447144, "learning_rate": 5.626432347151173e-06, "loss": 0.17586898803710938, "step": 3579 }, { "epoch": 0.49885041454748136, "grad_norm": 1.0535731315612793, "learning_rate": 5.624094063984967e-06, "loss": 0.15091705322265625, "step": 3580 }, { "epoch": 0.49898975823869574, "grad_norm": 0.7168593406677246, "learning_rate": 5.621755642159309e-06, "loss": 0.12866973876953125, "step": 3581 }, { "epoch": 0.4991291019299101, "grad_norm": 1.01133131980896, "learning_rate": 5.61941708219374e-06, "loss": 0.14570999145507812, "step": 3582 }, { "epoch": 0.4992684456211245, "grad_norm": 1.2295880317687988, "learning_rate": 5.617078384607839e-06, "loss": 0.1342334747314453, "step": 3583 }, { "epoch": 0.49940778931233887, "grad_norm": 1.4313377141952515, "learning_rate": 5.614739549921208e-06, "loss": 0.16900253295898438, "step": 3584 }, { "epoch": 0.49954713300355325, "grad_norm": 0.9984386563301086, "learning_rate": 5.612400578653484e-06, "loss": 0.13623809814453125, "step": 3585 }, { "epoch": 0.4996864766947676, "grad_norm": 1.1533704996109009, "learning_rate": 5.610061471324335e-06, "loss": 0.14865684509277344, "step": 3586 }, { "epoch": 0.499825820385982, "grad_norm": 1.217478632926941, "learning_rate": 5.607722228453452e-06, "loss": 0.13571548461914062, "step": 3587 }, { "epoch": 0.4999651640771964, "grad_norm": 0.9787909388542175, "learning_rate": 5.605382850560565e-06, "loss": 0.12710952758789062, "step": 3588 }, { "epoch": 0.5001045077684108, "grad_norm": 1.051702857017517, "learning_rate": 5.6030433381654305e-06, "loss": 0.1425952911376953, "step": 3589 }, { "epoch": 0.5002438514596251, "grad_norm": 1.0082441568374634, "learning_rate": 5.600703691787833e-06, "loss": 0.15529251098632812, "step": 3590 }, { "epoch": 0.5003831951508395, "grad_norm": 0.9124755263328552, "learning_rate": 5.598363911947591e-06, "loss": 0.178070068359375, "step": 3591 }, { "epoch": 0.5005225388420539, "grad_norm": 1.1087145805358887, "learning_rate": 5.596023999164547e-06, "loss": 0.14186477661132812, "step": 3592 }, { "epoch": 0.5006618825332683, "grad_norm": 1.0549476146697998, "learning_rate": 5.593683953958579e-06, "loss": 0.146697998046875, "step": 3593 }, { "epoch": 0.5008012262244826, "grad_norm": 0.9506168961524963, "learning_rate": 5.591343776849591e-06, "loss": 0.140716552734375, "step": 3594 }, { "epoch": 0.500940569915697, "grad_norm": 0.8244226574897766, "learning_rate": 5.5890034683575145e-06, "loss": 0.13147354125976562, "step": 3595 }, { "epoch": 0.5010799136069114, "grad_norm": 1.4231425523757935, "learning_rate": 5.586663029002314e-06, "loss": 0.14267730712890625, "step": 3596 }, { "epoch": 0.5012192572981258, "grad_norm": 0.7384944558143616, "learning_rate": 5.584322459303984e-06, "loss": 0.13356399536132812, "step": 3597 }, { "epoch": 0.5013586009893402, "grad_norm": 0.5633699893951416, "learning_rate": 5.581981759782543e-06, "loss": 0.10937118530273438, "step": 3598 }, { "epoch": 0.5014979446805546, "grad_norm": 0.9434652328491211, "learning_rate": 5.579640930958043e-06, "loss": 0.13720321655273438, "step": 3599 }, { "epoch": 0.501637288371769, "grad_norm": 0.7612566351890564, "learning_rate": 5.57729997335056e-06, "loss": 0.12929916381835938, "step": 3600 }, { "epoch": 0.5017766320629834, "grad_norm": 1.9743311405181885, "learning_rate": 5.5749588874802055e-06, "loss": 0.21015548706054688, "step": 3601 }, { "epoch": 0.5019159757541978, "grad_norm": 0.7490715384483337, "learning_rate": 5.572617673867111e-06, "loss": 0.13141632080078125, "step": 3602 }, { "epoch": 0.5020553194454122, "grad_norm": 0.615598201751709, "learning_rate": 5.570276333031441e-06, "loss": 0.129119873046875, "step": 3603 }, { "epoch": 0.5021946631366265, "grad_norm": 0.8430519104003906, "learning_rate": 5.567934865493392e-06, "loss": 0.1256256103515625, "step": 3604 }, { "epoch": 0.5023340068278409, "grad_norm": 1.1326894760131836, "learning_rate": 5.5655932717731805e-06, "loss": 0.17319107055664062, "step": 3605 }, { "epoch": 0.5024733505190553, "grad_norm": 1.0910086631774902, "learning_rate": 5.563251552391058e-06, "loss": 0.15834426879882812, "step": 3606 }, { "epoch": 0.5026126942102697, "grad_norm": 1.1403534412384033, "learning_rate": 5.560909707867299e-06, "loss": 0.15210342407226562, "step": 3607 }, { "epoch": 0.502752037901484, "grad_norm": 1.028394103050232, "learning_rate": 5.558567738722208e-06, "loss": 0.12844085693359375, "step": 3608 }, { "epoch": 0.5028913815926984, "grad_norm": 0.5890423059463501, "learning_rate": 5.556225645476119e-06, "loss": 0.11837959289550781, "step": 3609 }, { "epoch": 0.5030307252839128, "grad_norm": 1.4076199531555176, "learning_rate": 5.55388342864939e-06, "loss": 0.17284393310546875, "step": 3610 }, { "epoch": 0.5031700689751272, "grad_norm": 1.0886530876159668, "learning_rate": 5.5515410887624085e-06, "loss": 0.1379566192626953, "step": 3611 }, { "epoch": 0.5033094126663415, "grad_norm": 1.1394071578979492, "learning_rate": 5.549198626335589e-06, "loss": 0.15554428100585938, "step": 3612 }, { "epoch": 0.5034487563575559, "grad_norm": 1.0365543365478516, "learning_rate": 5.546856041889374e-06, "loss": 0.16747665405273438, "step": 3613 }, { "epoch": 0.5035881000487703, "grad_norm": 0.9153153300285339, "learning_rate": 5.544513335944228e-06, "loss": 0.15177154541015625, "step": 3614 }, { "epoch": 0.5037274437399847, "grad_norm": 1.3075472116470337, "learning_rate": 5.542170509020655e-06, "loss": 0.17566299438476562, "step": 3615 }, { "epoch": 0.5038667874311991, "grad_norm": 0.9079391360282898, "learning_rate": 5.539827561639169e-06, "loss": 0.13933944702148438, "step": 3616 }, { "epoch": 0.5040061311224134, "grad_norm": 0.929037868976593, "learning_rate": 5.537484494320324e-06, "loss": 0.15374755859375, "step": 3617 }, { "epoch": 0.5041454748136278, "grad_norm": 0.7169744968414307, "learning_rate": 5.535141307584697e-06, "loss": 0.1159515380859375, "step": 3618 }, { "epoch": 0.5042848185048422, "grad_norm": 0.7869469523429871, "learning_rate": 5.532798001952888e-06, "loss": 0.13724899291992188, "step": 3619 }, { "epoch": 0.5044241621960566, "grad_norm": 0.8029569387435913, "learning_rate": 5.530454577945529e-06, "loss": 0.12941360473632812, "step": 3620 }, { "epoch": 0.504563505887271, "grad_norm": 0.9840570092201233, "learning_rate": 5.52811103608327e-06, "loss": 0.13827896118164062, "step": 3621 }, { "epoch": 0.5047028495784853, "grad_norm": 0.6943359971046448, "learning_rate": 5.525767376886797e-06, "loss": 0.13401412963867188, "step": 3622 }, { "epoch": 0.5048421932696997, "grad_norm": 1.4088579416275024, "learning_rate": 5.523423600876816e-06, "loss": 0.18828201293945312, "step": 3623 }, { "epoch": 0.5049815369609141, "grad_norm": 0.8821516633033752, "learning_rate": 5.521079708574062e-06, "loss": 0.134490966796875, "step": 3624 }, { "epoch": 0.5051208806521285, "grad_norm": 0.8249541521072388, "learning_rate": 5.5187357004992926e-06, "loss": 0.12630462646484375, "step": 3625 }, { "epoch": 0.5052602243433428, "grad_norm": 0.6972466111183167, "learning_rate": 5.516391577173293e-06, "loss": 0.12597084045410156, "step": 3626 }, { "epoch": 0.5053995680345572, "grad_norm": 0.9279592633247375, "learning_rate": 5.514047339116874e-06, "loss": 0.147613525390625, "step": 3627 }, { "epoch": 0.5055389117257716, "grad_norm": 0.668394923210144, "learning_rate": 5.511702986850873e-06, "loss": 0.11568450927734375, "step": 3628 }, { "epoch": 0.505678255416986, "grad_norm": 0.8909756541252136, "learning_rate": 5.509358520896151e-06, "loss": 0.16685104370117188, "step": 3629 }, { "epoch": 0.5058175991082003, "grad_norm": 0.8338183164596558, "learning_rate": 5.507013941773593e-06, "loss": 0.12796401977539062, "step": 3630 }, { "epoch": 0.5059569427994147, "grad_norm": 0.8584946990013123, "learning_rate": 5.504669250004116e-06, "loss": 0.14202499389648438, "step": 3631 }, { "epoch": 0.5060962864906291, "grad_norm": 0.856921374797821, "learning_rate": 5.502324446108649e-06, "loss": 0.1470489501953125, "step": 3632 }, { "epoch": 0.5062356301818435, "grad_norm": 0.8764118552207947, "learning_rate": 5.49997953060816e-06, "loss": 0.14656448364257812, "step": 3633 }, { "epoch": 0.5063749738730579, "grad_norm": 0.769443154335022, "learning_rate": 5.497634504023634e-06, "loss": 0.11781692504882812, "step": 3634 }, { "epoch": 0.5065143175642722, "grad_norm": 1.4472126960754395, "learning_rate": 5.495289366876083e-06, "loss": 0.16598892211914062, "step": 3635 }, { "epoch": 0.5066536612554866, "grad_norm": 1.2872071266174316, "learning_rate": 5.492944119686544e-06, "loss": 0.17081260681152344, "step": 3636 }, { "epoch": 0.506793004946701, "grad_norm": 0.8860676884651184, "learning_rate": 5.4905987629760724e-06, "loss": 0.15467453002929688, "step": 3637 }, { "epoch": 0.5069323486379154, "grad_norm": 0.7334084510803223, "learning_rate": 5.488253297265757e-06, "loss": 0.13166046142578125, "step": 3638 }, { "epoch": 0.5070716923291299, "grad_norm": 0.7359012365341187, "learning_rate": 5.485907723076708e-06, "loss": 0.12911033630371094, "step": 3639 }, { "epoch": 0.5072110360203442, "grad_norm": 0.8605457544326782, "learning_rate": 5.483562040930055e-06, "loss": 0.14676666259765625, "step": 3640 }, { "epoch": 0.5073503797115586, "grad_norm": 0.8824799060821533, "learning_rate": 5.481216251346956e-06, "loss": 0.14156341552734375, "step": 3641 }, { "epoch": 0.507489723402773, "grad_norm": 0.9042781591415405, "learning_rate": 5.478870354848593e-06, "loss": 0.1782684326171875, "step": 3642 }, { "epoch": 0.5076290670939874, "grad_norm": 1.1059684753417969, "learning_rate": 5.47652435195617e-06, "loss": 0.14886093139648438, "step": 3643 }, { "epoch": 0.5077684107852017, "grad_norm": 0.63032466173172, "learning_rate": 5.4741782431909144e-06, "loss": 0.1302947998046875, "step": 3644 }, { "epoch": 0.5079077544764161, "grad_norm": 0.8079556226730347, "learning_rate": 5.471832029074079e-06, "loss": 0.16121292114257812, "step": 3645 }, { "epoch": 0.5080470981676305, "grad_norm": 0.9322712421417236, "learning_rate": 5.469485710126938e-06, "loss": 0.13071060180664062, "step": 3646 }, { "epoch": 0.5081864418588449, "grad_norm": 1.2035346031188965, "learning_rate": 5.467139286870794e-06, "loss": 0.15777015686035156, "step": 3647 }, { "epoch": 0.5083257855500593, "grad_norm": 1.1155802011489868, "learning_rate": 5.464792759826962e-06, "loss": 0.17711639404296875, "step": 3648 }, { "epoch": 0.5084651292412736, "grad_norm": 1.2497832775115967, "learning_rate": 5.462446129516793e-06, "loss": 0.19211578369140625, "step": 3649 }, { "epoch": 0.508604472932488, "grad_norm": 0.5331181883811951, "learning_rate": 5.460099396461649e-06, "loss": 0.11337852478027344, "step": 3650 }, { "epoch": 0.5087438166237024, "grad_norm": 1.3100637197494507, "learning_rate": 5.457752561182924e-06, "loss": 0.1693572998046875, "step": 3651 }, { "epoch": 0.5088831603149168, "grad_norm": 0.5351494550704956, "learning_rate": 5.455405624202032e-06, "loss": 0.11498641967773438, "step": 3652 }, { "epoch": 0.5090225040061311, "grad_norm": 0.6688527464866638, "learning_rate": 5.453058586040406e-06, "loss": 0.1432971954345703, "step": 3653 }, { "epoch": 0.5091618476973455, "grad_norm": 0.7586675882339478, "learning_rate": 5.450711447219507e-06, "loss": 0.15220260620117188, "step": 3654 }, { "epoch": 0.5093011913885599, "grad_norm": 0.5767110586166382, "learning_rate": 5.448364208260813e-06, "loss": 0.1217498779296875, "step": 3655 }, { "epoch": 0.5094405350797743, "grad_norm": 1.0496106147766113, "learning_rate": 5.446016869685829e-06, "loss": 0.11527252197265625, "step": 3656 }, { "epoch": 0.5095798787709886, "grad_norm": 0.6279868483543396, "learning_rate": 5.44366943201608e-06, "loss": 0.11255264282226562, "step": 3657 }, { "epoch": 0.509719222462203, "grad_norm": 0.8479551076889038, "learning_rate": 5.441321895773112e-06, "loss": 0.1241607666015625, "step": 3658 }, { "epoch": 0.5098585661534174, "grad_norm": 0.7933252453804016, "learning_rate": 5.438974261478494e-06, "loss": 0.1355438232421875, "step": 3659 }, { "epoch": 0.5099979098446318, "grad_norm": 0.6852331161499023, "learning_rate": 5.436626529653817e-06, "loss": 0.12664413452148438, "step": 3660 }, { "epoch": 0.5101372535358462, "grad_norm": 0.8808180093765259, "learning_rate": 5.434278700820693e-06, "loss": 0.13268661499023438, "step": 3661 }, { "epoch": 0.5102765972270605, "grad_norm": 0.7685002684593201, "learning_rate": 5.431930775500756e-06, "loss": 0.14002227783203125, "step": 3662 }, { "epoch": 0.5104159409182749, "grad_norm": 1.1171258687973022, "learning_rate": 5.429582754215664e-06, "loss": 0.1414337158203125, "step": 3663 }, { "epoch": 0.5105552846094893, "grad_norm": 1.4996142387390137, "learning_rate": 5.4272346374870885e-06, "loss": 0.18157958984375, "step": 3664 }, { "epoch": 0.5106946283007037, "grad_norm": 0.6737104058265686, "learning_rate": 5.424886425836734e-06, "loss": 0.12574005126953125, "step": 3665 }, { "epoch": 0.510833971991918, "grad_norm": 0.7659430503845215, "learning_rate": 5.4225381197863135e-06, "loss": 0.131591796875, "step": 3666 }, { "epoch": 0.5109733156831324, "grad_norm": 0.7558187246322632, "learning_rate": 5.420189719857571e-06, "loss": 0.1157989501953125, "step": 3667 }, { "epoch": 0.5111126593743468, "grad_norm": 0.5998610854148865, "learning_rate": 5.417841226572263e-06, "loss": 0.13279342651367188, "step": 3668 }, { "epoch": 0.5112520030655612, "grad_norm": 0.6694426536560059, "learning_rate": 5.415492640452177e-06, "loss": 0.12620162963867188, "step": 3669 }, { "epoch": 0.5113913467567756, "grad_norm": 0.4628196954727173, "learning_rate": 5.4131439620191115e-06, "loss": 0.09998703002929688, "step": 3670 }, { "epoch": 0.5115306904479899, "grad_norm": 2.1390769481658936, "learning_rate": 5.4107951917948896e-06, "loss": 0.19524002075195312, "step": 3671 }, { "epoch": 0.5116700341392043, "grad_norm": 1.4371119737625122, "learning_rate": 5.408446330301355e-06, "loss": 0.13181304931640625, "step": 3672 }, { "epoch": 0.5118093778304187, "grad_norm": 0.880574643611908, "learning_rate": 5.40609737806037e-06, "loss": 0.1281757354736328, "step": 3673 }, { "epoch": 0.5119487215216331, "grad_norm": 0.9424279928207397, "learning_rate": 5.403748335593819e-06, "loss": 0.13185501098632812, "step": 3674 }, { "epoch": 0.5120880652128474, "grad_norm": 0.8017308712005615, "learning_rate": 5.4013992034236065e-06, "loss": 0.12012481689453125, "step": 3675 }, { "epoch": 0.5122274089040618, "grad_norm": 1.0033756494522095, "learning_rate": 5.3990499820716545e-06, "loss": 0.14064788818359375, "step": 3676 }, { "epoch": 0.5123667525952762, "grad_norm": 1.506286382675171, "learning_rate": 5.396700672059907e-06, "loss": 0.16895294189453125, "step": 3677 }, { "epoch": 0.5125060962864906, "grad_norm": 0.7518654465675354, "learning_rate": 5.394351273910327e-06, "loss": 0.11831092834472656, "step": 3678 }, { "epoch": 0.5126454399777051, "grad_norm": 1.5215169191360474, "learning_rate": 5.392001788144897e-06, "loss": 0.15973663330078125, "step": 3679 }, { "epoch": 0.5127847836689194, "grad_norm": 1.106783390045166, "learning_rate": 5.389652215285618e-06, "loss": 0.15035247802734375, "step": 3680 }, { "epoch": 0.5129241273601338, "grad_norm": 0.9064530730247498, "learning_rate": 5.387302555854516e-06, "loss": 0.14829635620117188, "step": 3681 }, { "epoch": 0.5130634710513482, "grad_norm": 1.0382111072540283, "learning_rate": 5.384952810373625e-06, "loss": 0.14704132080078125, "step": 3682 }, { "epoch": 0.5132028147425626, "grad_norm": 1.3660469055175781, "learning_rate": 5.382602979365009e-06, "loss": 0.128875732421875, "step": 3683 }, { "epoch": 0.513342158433777, "grad_norm": 0.8329203128814697, "learning_rate": 5.380253063350747e-06, "loss": 0.13296890258789062, "step": 3684 }, { "epoch": 0.5134815021249913, "grad_norm": 0.9236925840377808, "learning_rate": 5.377903062852935e-06, "loss": 0.13201141357421875, "step": 3685 }, { "epoch": 0.5136208458162057, "grad_norm": 0.8218601942062378, "learning_rate": 5.375552978393691e-06, "loss": 0.13334274291992188, "step": 3686 }, { "epoch": 0.5137601895074201, "grad_norm": 1.4083181619644165, "learning_rate": 5.373202810495149e-06, "loss": 0.16120529174804688, "step": 3687 }, { "epoch": 0.5138995331986345, "grad_norm": 0.7972576022148132, "learning_rate": 5.370852559679461e-06, "loss": 0.11623382568359375, "step": 3688 }, { "epoch": 0.5140388768898488, "grad_norm": 1.039365530014038, "learning_rate": 5.368502226468803e-06, "loss": 0.14063072204589844, "step": 3689 }, { "epoch": 0.5141782205810632, "grad_norm": 0.8826172947883606, "learning_rate": 5.366151811385363e-06, "loss": 0.15559768676757812, "step": 3690 }, { "epoch": 0.5143175642722776, "grad_norm": 1.6709051132202148, "learning_rate": 5.363801314951349e-06, "loss": 0.1680755615234375, "step": 3691 }, { "epoch": 0.514456907963492, "grad_norm": 1.0906500816345215, "learning_rate": 5.361450737688989e-06, "loss": 0.1314105987548828, "step": 3692 }, { "epoch": 0.5145962516547063, "grad_norm": 1.0873408317565918, "learning_rate": 5.359100080120527e-06, "loss": 0.12524032592773438, "step": 3693 }, { "epoch": 0.5147355953459207, "grad_norm": 1.3974123001098633, "learning_rate": 5.356749342768226e-06, "loss": 0.18027114868164062, "step": 3694 }, { "epoch": 0.5148749390371351, "grad_norm": 0.7932977080345154, "learning_rate": 5.354398526154365e-06, "loss": 0.13788223266601562, "step": 3695 }, { "epoch": 0.5150142827283495, "grad_norm": 0.7325065732002258, "learning_rate": 5.352047630801242e-06, "loss": 0.143768310546875, "step": 3696 }, { "epoch": 0.5151536264195639, "grad_norm": 0.9929336309432983, "learning_rate": 5.349696657231176e-06, "loss": 0.16066741943359375, "step": 3697 }, { "epoch": 0.5152929701107782, "grad_norm": 1.418704867362976, "learning_rate": 5.347345605966493e-06, "loss": 0.16857147216796875, "step": 3698 }, { "epoch": 0.5154323138019926, "grad_norm": 0.8051213622093201, "learning_rate": 5.344994477529548e-06, "loss": 0.12328338623046875, "step": 3699 }, { "epoch": 0.515571657493207, "grad_norm": 0.9083417654037476, "learning_rate": 5.342643272442706e-06, "loss": 0.156158447265625, "step": 3700 }, { "epoch": 0.5157110011844214, "grad_norm": 1.1643000841140747, "learning_rate": 5.340291991228352e-06, "loss": 0.16452789306640625, "step": 3701 }, { "epoch": 0.5158503448756357, "grad_norm": 0.4478699862957001, "learning_rate": 5.337940634408888e-06, "loss": 0.10557746887207031, "step": 3702 }, { "epoch": 0.5159896885668501, "grad_norm": 0.7081668376922607, "learning_rate": 5.335589202506727e-06, "loss": 0.12537765502929688, "step": 3703 }, { "epoch": 0.5161290322580645, "grad_norm": 1.5310777425765991, "learning_rate": 5.333237696044309e-06, "loss": 0.15320396423339844, "step": 3704 }, { "epoch": 0.5162683759492789, "grad_norm": 0.9094979763031006, "learning_rate": 5.330886115544081e-06, "loss": 0.14162445068359375, "step": 3705 }, { "epoch": 0.5164077196404933, "grad_norm": 0.9428578019142151, "learning_rate": 5.328534461528515e-06, "loss": 0.13100814819335938, "step": 3706 }, { "epoch": 0.5165470633317076, "grad_norm": 1.973273515701294, "learning_rate": 5.326182734520091e-06, "loss": 0.16904067993164062, "step": 3707 }, { "epoch": 0.516686407022922, "grad_norm": 1.4096018075942993, "learning_rate": 5.32383093504131e-06, "loss": 0.173583984375, "step": 3708 }, { "epoch": 0.5168257507141364, "grad_norm": 1.4151874780654907, "learning_rate": 5.32147906361469e-06, "loss": 0.1735076904296875, "step": 3709 }, { "epoch": 0.5169650944053508, "grad_norm": 1.6068744659423828, "learning_rate": 5.31912712076276e-06, "loss": 0.16536331176757812, "step": 3710 }, { "epoch": 0.5171044380965651, "grad_norm": 0.6581392288208008, "learning_rate": 5.316775107008069e-06, "loss": 0.138153076171875, "step": 3711 }, { "epoch": 0.5172437817877795, "grad_norm": 1.0196346044540405, "learning_rate": 5.314423022873181e-06, "loss": 0.13935089111328125, "step": 3712 }, { "epoch": 0.5173831254789939, "grad_norm": 0.6005691885948181, "learning_rate": 5.312070868880678e-06, "loss": 0.13793373107910156, "step": 3713 }, { "epoch": 0.5175224691702083, "grad_norm": 0.8952246308326721, "learning_rate": 5.3097186455531506e-06, "loss": 0.151947021484375, "step": 3714 }, { "epoch": 0.5176618128614227, "grad_norm": 0.7535478472709656, "learning_rate": 5.307366353413214e-06, "loss": 0.14217376708984375, "step": 3715 }, { "epoch": 0.517801156552637, "grad_norm": 0.9157778024673462, "learning_rate": 5.305013992983487e-06, "loss": 0.15268707275390625, "step": 3716 }, { "epoch": 0.5179405002438514, "grad_norm": 0.8390627503395081, "learning_rate": 5.302661564786617e-06, "loss": 0.15098190307617188, "step": 3717 }, { "epoch": 0.5180798439350658, "grad_norm": 0.7563731074333191, "learning_rate": 5.300309069345257e-06, "loss": 0.12379264831542969, "step": 3718 }, { "epoch": 0.5182191876262803, "grad_norm": 0.6670566201210022, "learning_rate": 5.297956507182077e-06, "loss": 0.12305068969726562, "step": 3719 }, { "epoch": 0.5183585313174947, "grad_norm": 1.4797863960266113, "learning_rate": 5.295603878819764e-06, "loss": 0.19902801513671875, "step": 3720 }, { "epoch": 0.518497875008709, "grad_norm": 1.1234732866287231, "learning_rate": 5.2932511847810175e-06, "loss": 0.15826034545898438, "step": 3721 }, { "epoch": 0.5186372186999234, "grad_norm": 0.7157477140426636, "learning_rate": 5.290898425588553e-06, "loss": 0.13012313842773438, "step": 3722 }, { "epoch": 0.5187765623911378, "grad_norm": 1.3218255043029785, "learning_rate": 5.2885456017651e-06, "loss": 0.15556716918945312, "step": 3723 }, { "epoch": 0.5189159060823522, "grad_norm": 1.6964762210845947, "learning_rate": 5.286192713833402e-06, "loss": 0.1717376708984375, "step": 3724 }, { "epoch": 0.5190552497735665, "grad_norm": 0.7770852446556091, "learning_rate": 5.283839762316217e-06, "loss": 0.11230087280273438, "step": 3725 }, { "epoch": 0.5191945934647809, "grad_norm": 0.6968972086906433, "learning_rate": 5.281486747736316e-06, "loss": 0.1325836181640625, "step": 3726 }, { "epoch": 0.5193339371559953, "grad_norm": 1.2054246664047241, "learning_rate": 5.279133670616488e-06, "loss": 0.15765380859375, "step": 3727 }, { "epoch": 0.5194732808472097, "grad_norm": 0.860978364944458, "learning_rate": 5.276780531479528e-06, "loss": 0.1441192626953125, "step": 3728 }, { "epoch": 0.519612624538424, "grad_norm": 0.9849502444267273, "learning_rate": 5.274427330848257e-06, "loss": 0.15958404541015625, "step": 3729 }, { "epoch": 0.5197519682296384, "grad_norm": 0.9039480686187744, "learning_rate": 5.2720740692454944e-06, "loss": 0.12308120727539062, "step": 3730 }, { "epoch": 0.5198913119208528, "grad_norm": 1.836044192314148, "learning_rate": 5.269720747194088e-06, "loss": 0.15935897827148438, "step": 3731 }, { "epoch": 0.5200306556120672, "grad_norm": 1.4421442747116089, "learning_rate": 5.267367365216887e-06, "loss": 0.18015098571777344, "step": 3732 }, { "epoch": 0.5201699993032816, "grad_norm": 0.8833238482475281, "learning_rate": 5.265013923836763e-06, "loss": 0.15045547485351562, "step": 3733 }, { "epoch": 0.5203093429944959, "grad_norm": 0.9546316862106323, "learning_rate": 5.262660423576595e-06, "loss": 0.1375732421875, "step": 3734 }, { "epoch": 0.5204486866857103, "grad_norm": 0.6120074987411499, "learning_rate": 5.260306864959278e-06, "loss": 0.11191368103027344, "step": 3735 }, { "epoch": 0.5205880303769247, "grad_norm": 1.426118016242981, "learning_rate": 5.25795324850772e-06, "loss": 0.14136505126953125, "step": 3736 }, { "epoch": 0.5207273740681391, "grad_norm": 1.9977773427963257, "learning_rate": 5.255599574744836e-06, "loss": 0.1734619140625, "step": 3737 }, { "epoch": 0.5208667177593534, "grad_norm": 1.0836820602416992, "learning_rate": 5.253245844193564e-06, "loss": 0.17363739013671875, "step": 3738 }, { "epoch": 0.5210060614505678, "grad_norm": 0.49943801760673523, "learning_rate": 5.250892057376848e-06, "loss": 0.11026573181152344, "step": 3739 }, { "epoch": 0.5211454051417822, "grad_norm": 0.8100417256355286, "learning_rate": 5.248538214817642e-06, "loss": 0.15602874755859375, "step": 3740 }, { "epoch": 0.5212847488329966, "grad_norm": 0.7048986554145813, "learning_rate": 5.246184317038922e-06, "loss": 0.129852294921875, "step": 3741 }, { "epoch": 0.521424092524211, "grad_norm": 1.0637013912200928, "learning_rate": 5.243830364563665e-06, "loss": 0.13994979858398438, "step": 3742 }, { "epoch": 0.5215634362154253, "grad_norm": 0.755468487739563, "learning_rate": 5.241476357914869e-06, "loss": 0.1268463134765625, "step": 3743 }, { "epoch": 0.5217027799066397, "grad_norm": 0.8862323760986328, "learning_rate": 5.239122297615539e-06, "loss": 0.14093017578125, "step": 3744 }, { "epoch": 0.5218421235978541, "grad_norm": 1.1194350719451904, "learning_rate": 5.236768184188693e-06, "loss": 0.19402694702148438, "step": 3745 }, { "epoch": 0.5219814672890685, "grad_norm": 0.9019198417663574, "learning_rate": 5.234414018157361e-06, "loss": 0.1503925323486328, "step": 3746 }, { "epoch": 0.5221208109802828, "grad_norm": 1.199649691581726, "learning_rate": 5.232059800044589e-06, "loss": 0.143707275390625, "step": 3747 }, { "epoch": 0.5222601546714972, "grad_norm": 1.591555118560791, "learning_rate": 5.229705530373424e-06, "loss": 0.18222808837890625, "step": 3748 }, { "epoch": 0.5223994983627116, "grad_norm": 0.7354734539985657, "learning_rate": 5.2273512096669364e-06, "loss": 0.132843017578125, "step": 3749 }, { "epoch": 0.522538842053926, "grad_norm": 0.9760013818740845, "learning_rate": 5.2249968384482e-06, "loss": 0.13787078857421875, "step": 3750 }, { "epoch": 0.5226781857451404, "grad_norm": 0.8383069634437561, "learning_rate": 5.222642417240305e-06, "loss": 0.14586639404296875, "step": 3751 }, { "epoch": 0.5228175294363547, "grad_norm": 1.0536226034164429, "learning_rate": 5.220287946566347e-06, "loss": 0.13897323608398438, "step": 3752 }, { "epoch": 0.5229568731275691, "grad_norm": 0.609294056892395, "learning_rate": 5.2179334269494345e-06, "loss": 0.12305450439453125, "step": 3753 }, { "epoch": 0.5230962168187835, "grad_norm": 1.170890212059021, "learning_rate": 5.215578858912691e-06, "loss": 0.15899276733398438, "step": 3754 }, { "epoch": 0.5232355605099979, "grad_norm": 0.8648903369903564, "learning_rate": 5.213224242979247e-06, "loss": 0.129547119140625, "step": 3755 }, { "epoch": 0.5233749042012122, "grad_norm": 0.6932501792907715, "learning_rate": 5.2108695796722446e-06, "loss": 0.13761520385742188, "step": 3756 }, { "epoch": 0.5235142478924266, "grad_norm": 0.8097542524337769, "learning_rate": 5.208514869514835e-06, "loss": 0.12762069702148438, "step": 3757 }, { "epoch": 0.523653591583641, "grad_norm": 1.1583175659179688, "learning_rate": 5.206160113030182e-06, "loss": 0.1635284423828125, "step": 3758 }, { "epoch": 0.5237929352748554, "grad_norm": 1.3374943733215332, "learning_rate": 5.203805310741459e-06, "loss": 0.15084075927734375, "step": 3759 }, { "epoch": 0.5239322789660699, "grad_norm": 0.8072215914726257, "learning_rate": 5.201450463171849e-06, "loss": 0.15196990966796875, "step": 3760 }, { "epoch": 0.5240716226572842, "grad_norm": 0.7030718922615051, "learning_rate": 5.199095570844546e-06, "loss": 0.1353435516357422, "step": 3761 }, { "epoch": 0.5242109663484986, "grad_norm": 1.253183364868164, "learning_rate": 5.19674063428275e-06, "loss": 0.13718223571777344, "step": 3762 }, { "epoch": 0.524350310039713, "grad_norm": 1.1326559782028198, "learning_rate": 5.1943856540096795e-06, "loss": 0.12408447265625, "step": 3763 }, { "epoch": 0.5244896537309274, "grad_norm": 1.474371075630188, "learning_rate": 5.192030630548552e-06, "loss": 0.18545913696289062, "step": 3764 }, { "epoch": 0.5246289974221418, "grad_norm": 1.2875992059707642, "learning_rate": 5.1896755644226046e-06, "loss": 0.1295948028564453, "step": 3765 }, { "epoch": 0.5247683411133561, "grad_norm": 0.7784647345542908, "learning_rate": 5.1873204561550764e-06, "loss": 0.1134033203125, "step": 3766 }, { "epoch": 0.5249076848045705, "grad_norm": 0.8545252084732056, "learning_rate": 5.18496530626922e-06, "loss": 0.13196182250976562, "step": 3767 }, { "epoch": 0.5250470284957849, "grad_norm": 1.0727386474609375, "learning_rate": 5.182610115288296e-06, "loss": 0.1451740264892578, "step": 3768 }, { "epoch": 0.5251863721869993, "grad_norm": 1.0722583532333374, "learning_rate": 5.180254883735571e-06, "loss": 0.14597702026367188, "step": 3769 }, { "epoch": 0.5253257158782136, "grad_norm": 1.0252381563186646, "learning_rate": 5.1778996121343274e-06, "loss": 0.13562393188476562, "step": 3770 }, { "epoch": 0.525465059569428, "grad_norm": 1.1643387079238892, "learning_rate": 5.175544301007852e-06, "loss": 0.1648693084716797, "step": 3771 }, { "epoch": 0.5256044032606424, "grad_norm": 1.2733708620071411, "learning_rate": 5.173188950879441e-06, "loss": 0.16015052795410156, "step": 3772 }, { "epoch": 0.5257437469518568, "grad_norm": 0.9032249450683594, "learning_rate": 5.170833562272398e-06, "loss": 0.11191940307617188, "step": 3773 }, { "epoch": 0.5258830906430711, "grad_norm": 0.8475756049156189, "learning_rate": 5.168478135710038e-06, "loss": 0.14725875854492188, "step": 3774 }, { "epoch": 0.5260224343342855, "grad_norm": 1.276658535003662, "learning_rate": 5.166122671715683e-06, "loss": 0.16620635986328125, "step": 3775 }, { "epoch": 0.5261617780254999, "grad_norm": 0.9399904608726501, "learning_rate": 5.163767170812663e-06, "loss": 0.15198516845703125, "step": 3776 }, { "epoch": 0.5263011217167143, "grad_norm": 0.8803045153617859, "learning_rate": 5.1614116335243155e-06, "loss": 0.15303993225097656, "step": 3777 }, { "epoch": 0.5264404654079287, "grad_norm": 2.006561040878296, "learning_rate": 5.1590560603739885e-06, "loss": 0.14701461791992188, "step": 3778 }, { "epoch": 0.526579809099143, "grad_norm": 1.3157230615615845, "learning_rate": 5.156700451885037e-06, "loss": 0.16955184936523438, "step": 3779 }, { "epoch": 0.5267191527903574, "grad_norm": 1.2805538177490234, "learning_rate": 5.154344808580821e-06, "loss": 0.1454620361328125, "step": 3780 }, { "epoch": 0.5268584964815718, "grad_norm": 1.7994965314865112, "learning_rate": 5.151989130984715e-06, "loss": 0.20902252197265625, "step": 3781 }, { "epoch": 0.5269978401727862, "grad_norm": 1.4318573474884033, "learning_rate": 5.149633419620092e-06, "loss": 0.15609359741210938, "step": 3782 }, { "epoch": 0.5271371838640005, "grad_norm": 0.838212251663208, "learning_rate": 5.147277675010339e-06, "loss": 0.12974929809570312, "step": 3783 }, { "epoch": 0.5272765275552149, "grad_norm": 0.7593927383422852, "learning_rate": 5.144921897678851e-06, "loss": 0.12955856323242188, "step": 3784 }, { "epoch": 0.5274158712464293, "grad_norm": 1.4856196641921997, "learning_rate": 5.142566088149024e-06, "loss": 0.16680526733398438, "step": 3785 }, { "epoch": 0.5275552149376437, "grad_norm": 0.7933855056762695, "learning_rate": 5.1402102469442686e-06, "loss": 0.13450241088867188, "step": 3786 }, { "epoch": 0.5276945586288581, "grad_norm": 0.7642908692359924, "learning_rate": 5.137854374587996e-06, "loss": 0.14161300659179688, "step": 3787 }, { "epoch": 0.5278339023200724, "grad_norm": 0.8710026144981384, "learning_rate": 5.135498471603629e-06, "loss": 0.12462615966796875, "step": 3788 }, { "epoch": 0.5279732460112868, "grad_norm": 0.9877367615699768, "learning_rate": 5.133142538514596e-06, "loss": 0.15367889404296875, "step": 3789 }, { "epoch": 0.5281125897025012, "grad_norm": 0.8378744125366211, "learning_rate": 5.130786575844329e-06, "loss": 0.16411209106445312, "step": 3790 }, { "epoch": 0.5282519333937156, "grad_norm": 0.765055239200592, "learning_rate": 5.128430584116273e-06, "loss": 0.12665557861328125, "step": 3791 }, { "epoch": 0.52839127708493, "grad_norm": 0.8511494994163513, "learning_rate": 5.126074563853872e-06, "loss": 0.1264972686767578, "step": 3792 }, { "epoch": 0.5285306207761443, "grad_norm": 0.8119720220565796, "learning_rate": 5.123718515580581e-06, "loss": 0.1360931396484375, "step": 3793 }, { "epoch": 0.5286699644673587, "grad_norm": 1.3177708387374878, "learning_rate": 5.1213624398198606e-06, "loss": 0.13290786743164062, "step": 3794 }, { "epoch": 0.5288093081585731, "grad_norm": 1.0448927879333496, "learning_rate": 5.119006337095178e-06, "loss": 0.13761520385742188, "step": 3795 }, { "epoch": 0.5289486518497875, "grad_norm": 1.776512622833252, "learning_rate": 5.1166502079300015e-06, "loss": 0.18801116943359375, "step": 3796 }, { "epoch": 0.5290879955410018, "grad_norm": 1.569900631904602, "learning_rate": 5.114294052847814e-06, "loss": 0.20007705688476562, "step": 3797 }, { "epoch": 0.5292273392322162, "grad_norm": 1.144530177116394, "learning_rate": 5.111937872372097e-06, "loss": 0.17785263061523438, "step": 3798 }, { "epoch": 0.5293666829234306, "grad_norm": 1.0198549032211304, "learning_rate": 5.109581667026341e-06, "loss": 0.17840576171875, "step": 3799 }, { "epoch": 0.5295060266146451, "grad_norm": 1.1433722972869873, "learning_rate": 5.107225437334039e-06, "loss": 0.17821502685546875, "step": 3800 }, { "epoch": 0.5296453703058595, "grad_norm": 0.7696691751480103, "learning_rate": 5.1048691838186935e-06, "loss": 0.13842391967773438, "step": 3801 }, { "epoch": 0.5297847139970738, "grad_norm": 1.0889415740966797, "learning_rate": 5.102512907003812e-06, "loss": 0.16436386108398438, "step": 3802 }, { "epoch": 0.5299240576882882, "grad_norm": 1.071521520614624, "learning_rate": 5.100156607412899e-06, "loss": 0.14652633666992188, "step": 3803 }, { "epoch": 0.5300634013795026, "grad_norm": 1.1082340478897095, "learning_rate": 5.097800285569476e-06, "loss": 0.14934539794921875, "step": 3804 }, { "epoch": 0.530202745070717, "grad_norm": 0.7252720594406128, "learning_rate": 5.095443941997062e-06, "loss": 0.127777099609375, "step": 3805 }, { "epoch": 0.5303420887619313, "grad_norm": 0.8061463236808777, "learning_rate": 5.093087577219183e-06, "loss": 0.13916778564453125, "step": 3806 }, { "epoch": 0.5304814324531457, "grad_norm": 0.5740865468978882, "learning_rate": 5.090731191759371e-06, "loss": 0.10785293579101562, "step": 3807 }, { "epoch": 0.5306207761443601, "grad_norm": 0.8819135427474976, "learning_rate": 5.088374786141159e-06, "loss": 0.1423797607421875, "step": 3808 }, { "epoch": 0.5307601198355745, "grad_norm": 0.9180012941360474, "learning_rate": 5.086018360888087e-06, "loss": 0.13295745849609375, "step": 3809 }, { "epoch": 0.5308994635267889, "grad_norm": 1.3533202409744263, "learning_rate": 5.083661916523699e-06, "loss": 0.14672088623046875, "step": 3810 }, { "epoch": 0.5310388072180032, "grad_norm": 0.9526568055152893, "learning_rate": 5.081305453571543e-06, "loss": 0.13879776000976562, "step": 3811 }, { "epoch": 0.5311781509092176, "grad_norm": 1.1157530546188354, "learning_rate": 5.07894897255517e-06, "loss": 0.16408920288085938, "step": 3812 }, { "epoch": 0.531317494600432, "grad_norm": 0.9380144476890564, "learning_rate": 5.076592473998141e-06, "loss": 0.12396621704101562, "step": 3813 }, { "epoch": 0.5314568382916464, "grad_norm": 0.9692339301109314, "learning_rate": 5.07423595842401e-06, "loss": 0.13011550903320312, "step": 3814 }, { "epoch": 0.5315961819828607, "grad_norm": 1.0729014873504639, "learning_rate": 5.071879426356345e-06, "loss": 0.12324142456054688, "step": 3815 }, { "epoch": 0.5317355256740751, "grad_norm": 1.4201488494873047, "learning_rate": 5.069522878318712e-06, "loss": 0.16802597045898438, "step": 3816 }, { "epoch": 0.5318748693652895, "grad_norm": 1.4426367282867432, "learning_rate": 5.067166314834684e-06, "loss": 0.16963577270507812, "step": 3817 }, { "epoch": 0.5320142130565039, "grad_norm": 2.435380458831787, "learning_rate": 5.064809736427835e-06, "loss": 0.2238616943359375, "step": 3818 }, { "epoch": 0.5321535567477182, "grad_norm": 1.2595298290252686, "learning_rate": 5.062453143621739e-06, "loss": 0.16277694702148438, "step": 3819 }, { "epoch": 0.5322929004389326, "grad_norm": 0.9997852444648743, "learning_rate": 5.060096536939982e-06, "loss": 0.14288711547851562, "step": 3820 }, { "epoch": 0.532432244130147, "grad_norm": 0.9652493000030518, "learning_rate": 5.057739916906147e-06, "loss": 0.1454925537109375, "step": 3821 }, { "epoch": 0.5325715878213614, "grad_norm": 0.9220625758171082, "learning_rate": 5.05538328404382e-06, "loss": 0.1513957977294922, "step": 3822 }, { "epoch": 0.5327109315125758, "grad_norm": 1.008368730545044, "learning_rate": 5.053026638876591e-06, "loss": 0.1435260772705078, "step": 3823 }, { "epoch": 0.5328502752037901, "grad_norm": 1.9630241394042969, "learning_rate": 5.050669981928056e-06, "loss": 0.18561935424804688, "step": 3824 }, { "epoch": 0.5329896188950045, "grad_norm": 0.954806923866272, "learning_rate": 5.048313313721806e-06, "loss": 0.13716888427734375, "step": 3825 }, { "epoch": 0.5331289625862189, "grad_norm": 1.7213057279586792, "learning_rate": 5.04595663478144e-06, "loss": 0.192474365234375, "step": 3826 }, { "epoch": 0.5332683062774333, "grad_norm": 0.9765536785125732, "learning_rate": 5.0435999456305605e-06, "loss": 0.12493896484375, "step": 3827 }, { "epoch": 0.5334076499686476, "grad_norm": 1.2018260955810547, "learning_rate": 5.0412432467927674e-06, "loss": 0.14166641235351562, "step": 3828 }, { "epoch": 0.533546993659862, "grad_norm": 0.944084644317627, "learning_rate": 5.038886538791668e-06, "loss": 0.11798858642578125, "step": 3829 }, { "epoch": 0.5336863373510764, "grad_norm": 1.0994590520858765, "learning_rate": 5.036529822150865e-06, "loss": 0.14846038818359375, "step": 3830 }, { "epoch": 0.5338256810422908, "grad_norm": 0.7680566310882568, "learning_rate": 5.034173097393973e-06, "loss": 0.13389205932617188, "step": 3831 }, { "epoch": 0.5339650247335052, "grad_norm": 0.8877233266830444, "learning_rate": 5.031816365044595e-06, "loss": 0.13758468627929688, "step": 3832 }, { "epoch": 0.5341043684247195, "grad_norm": 1.0531055927276611, "learning_rate": 5.02945962562635e-06, "loss": 0.14400482177734375, "step": 3833 }, { "epoch": 0.5342437121159339, "grad_norm": 0.7671190500259399, "learning_rate": 5.027102879662847e-06, "loss": 0.128021240234375, "step": 3834 }, { "epoch": 0.5343830558071483, "grad_norm": 1.301560878753662, "learning_rate": 5.024746127677703e-06, "loss": 0.16688919067382812, "step": 3835 }, { "epoch": 0.5345223994983627, "grad_norm": 1.0803585052490234, "learning_rate": 5.022389370194536e-06, "loss": 0.18171310424804688, "step": 3836 }, { "epoch": 0.534661743189577, "grad_norm": 0.62887042760849, "learning_rate": 5.020032607736961e-06, "loss": 0.1206817626953125, "step": 3837 }, { "epoch": 0.5348010868807914, "grad_norm": 0.6906318664550781, "learning_rate": 5.017675840828597e-06, "loss": 0.12739181518554688, "step": 3838 }, { "epoch": 0.5349404305720058, "grad_norm": 0.9622116684913635, "learning_rate": 5.015319069993066e-06, "loss": 0.1735858917236328, "step": 3839 }, { "epoch": 0.5350797742632203, "grad_norm": 0.6090491414070129, "learning_rate": 5.012962295753988e-06, "loss": 0.11901092529296875, "step": 3840 }, { "epoch": 0.5352191179544347, "grad_norm": 0.7548745274543762, "learning_rate": 5.010605518634982e-06, "loss": 0.12652587890625, "step": 3841 }, { "epoch": 0.535358461645649, "grad_norm": 1.7646640539169312, "learning_rate": 5.008248739159674e-06, "loss": 0.1767730712890625, "step": 3842 }, { "epoch": 0.5354978053368634, "grad_norm": 1.3860397338867188, "learning_rate": 5.005891957851683e-06, "loss": 0.15092086791992188, "step": 3843 }, { "epoch": 0.5356371490280778, "grad_norm": 0.7873938083648682, "learning_rate": 5.003535175234633e-06, "loss": 0.13043594360351562, "step": 3844 }, { "epoch": 0.5357764927192922, "grad_norm": 0.9561295509338379, "learning_rate": 5.001178391832149e-06, "loss": 0.12611770629882812, "step": 3845 }, { "epoch": 0.5359158364105066, "grad_norm": 0.9389334917068481, "learning_rate": 4.998821608167853e-06, "loss": 0.14403533935546875, "step": 3846 }, { "epoch": 0.5360551801017209, "grad_norm": 0.7807409763336182, "learning_rate": 4.996464824765369e-06, "loss": 0.12755966186523438, "step": 3847 }, { "epoch": 0.5361945237929353, "grad_norm": 2.1470861434936523, "learning_rate": 4.994108042148318e-06, "loss": 0.2041168212890625, "step": 3848 }, { "epoch": 0.5363338674841497, "grad_norm": 0.8469387888908386, "learning_rate": 4.991751260840328e-06, "loss": 0.12665367126464844, "step": 3849 }, { "epoch": 0.5364732111753641, "grad_norm": 1.0394299030303955, "learning_rate": 4.9893944813650185e-06, "loss": 0.1429157257080078, "step": 3850 }, { "epoch": 0.5366125548665784, "grad_norm": 0.7621784806251526, "learning_rate": 4.987037704246015e-06, "loss": 0.12522125244140625, "step": 3851 }, { "epoch": 0.5367518985577928, "grad_norm": 0.664576530456543, "learning_rate": 4.984680930006936e-06, "loss": 0.12645339965820312, "step": 3852 }, { "epoch": 0.5368912422490072, "grad_norm": 0.7725340127944946, "learning_rate": 4.982324159171404e-06, "loss": 0.12850189208984375, "step": 3853 }, { "epoch": 0.5370305859402216, "grad_norm": 1.034020185470581, "learning_rate": 4.979967392263041e-06, "loss": 0.13611984252929688, "step": 3854 }, { "epoch": 0.537169929631436, "grad_norm": 1.1973145008087158, "learning_rate": 4.977610629805465e-06, "loss": 0.1294078826904297, "step": 3855 }, { "epoch": 0.5373092733226503, "grad_norm": 1.0495045185089111, "learning_rate": 4.975253872322297e-06, "loss": 0.17520904541015625, "step": 3856 }, { "epoch": 0.5374486170138647, "grad_norm": 1.4054802656173706, "learning_rate": 4.972897120337155e-06, "loss": 0.15181350708007812, "step": 3857 }, { "epoch": 0.5375879607050791, "grad_norm": 1.3506708145141602, "learning_rate": 4.970540374373653e-06, "loss": 0.156463623046875, "step": 3858 }, { "epoch": 0.5377273043962935, "grad_norm": 0.9244630932807922, "learning_rate": 4.9681836349554064e-06, "loss": 0.16399002075195312, "step": 3859 }, { "epoch": 0.5378666480875078, "grad_norm": 0.5752825736999512, "learning_rate": 4.965826902606029e-06, "loss": 0.11684989929199219, "step": 3860 }, { "epoch": 0.5380059917787222, "grad_norm": 1.054882287979126, "learning_rate": 4.963470177849135e-06, "loss": 0.136932373046875, "step": 3861 }, { "epoch": 0.5381453354699366, "grad_norm": 0.7131116986274719, "learning_rate": 4.961113461208335e-06, "loss": 0.13904953002929688, "step": 3862 }, { "epoch": 0.538284679161151, "grad_norm": 1.4195210933685303, "learning_rate": 4.958756753207234e-06, "loss": 0.17609024047851562, "step": 3863 }, { "epoch": 0.5384240228523653, "grad_norm": 0.8193963170051575, "learning_rate": 4.956400054369441e-06, "loss": 0.1285686492919922, "step": 3864 }, { "epoch": 0.5385633665435797, "grad_norm": 0.5715042352676392, "learning_rate": 4.954043365218561e-06, "loss": 0.1065826416015625, "step": 3865 }, { "epoch": 0.5387027102347941, "grad_norm": 1.1548206806182861, "learning_rate": 4.951686686278195e-06, "loss": 0.1710662841796875, "step": 3866 }, { "epoch": 0.5388420539260085, "grad_norm": 0.7404905557632446, "learning_rate": 4.949330018071947e-06, "loss": 0.1295318603515625, "step": 3867 }, { "epoch": 0.5389813976172229, "grad_norm": 0.7098348140716553, "learning_rate": 4.946973361123411e-06, "loss": 0.12738800048828125, "step": 3868 }, { "epoch": 0.5391207413084372, "grad_norm": 0.8871694207191467, "learning_rate": 4.9446167159561814e-06, "loss": 0.133514404296875, "step": 3869 }, { "epoch": 0.5392600849996516, "grad_norm": 1.0181751251220703, "learning_rate": 4.942260083093854e-06, "loss": 0.15303421020507812, "step": 3870 }, { "epoch": 0.539399428690866, "grad_norm": 0.9509626626968384, "learning_rate": 4.939903463060018e-06, "loss": 0.14564895629882812, "step": 3871 }, { "epoch": 0.5395387723820804, "grad_norm": 0.8193216919898987, "learning_rate": 4.937546856378263e-06, "loss": 0.1435699462890625, "step": 3872 }, { "epoch": 0.5396781160732947, "grad_norm": 0.8227224349975586, "learning_rate": 4.935190263572168e-06, "loss": 0.12314987182617188, "step": 3873 }, { "epoch": 0.5398174597645091, "grad_norm": 1.6057976484298706, "learning_rate": 4.932833685165318e-06, "loss": 0.1648406982421875, "step": 3874 }, { "epoch": 0.5399568034557235, "grad_norm": 1.156334638595581, "learning_rate": 4.930477121681289e-06, "loss": 0.11676025390625, "step": 3875 }, { "epoch": 0.5400961471469379, "grad_norm": 0.7952852845191956, "learning_rate": 4.9281205736436555e-06, "loss": 0.10449981689453125, "step": 3876 }, { "epoch": 0.5402354908381523, "grad_norm": 1.1055737733840942, "learning_rate": 4.925764041575991e-06, "loss": 0.1560211181640625, "step": 3877 }, { "epoch": 0.5403748345293666, "grad_norm": 1.0140467882156372, "learning_rate": 4.9234075260018615e-06, "loss": 0.15610504150390625, "step": 3878 }, { "epoch": 0.540514178220581, "grad_norm": 1.1679505109786987, "learning_rate": 4.921051027444831e-06, "loss": 0.16469955444335938, "step": 3879 }, { "epoch": 0.5406535219117955, "grad_norm": 1.056836724281311, "learning_rate": 4.918694546428458e-06, "loss": 0.14987564086914062, "step": 3880 }, { "epoch": 0.5407928656030099, "grad_norm": 0.7445563673973083, "learning_rate": 4.916338083476303e-06, "loss": 0.14194107055664062, "step": 3881 }, { "epoch": 0.5409322092942243, "grad_norm": 0.5401502251625061, "learning_rate": 4.913981639111914e-06, "loss": 0.12454986572265625, "step": 3882 }, { "epoch": 0.5410715529854386, "grad_norm": 1.0494458675384521, "learning_rate": 4.9116252138588435e-06, "loss": 0.16433334350585938, "step": 3883 }, { "epoch": 0.541210896676653, "grad_norm": 1.3869798183441162, "learning_rate": 4.90926880824063e-06, "loss": 0.18838882446289062, "step": 3884 }, { "epoch": 0.5413502403678674, "grad_norm": 1.386735439300537, "learning_rate": 4.906912422780818e-06, "loss": 0.14667892456054688, "step": 3885 }, { "epoch": 0.5414895840590818, "grad_norm": 1.1847469806671143, "learning_rate": 4.904556058002939e-06, "loss": 0.16108322143554688, "step": 3886 }, { "epoch": 0.5416289277502961, "grad_norm": 0.9174978137016296, "learning_rate": 4.902199714430525e-06, "loss": 0.13880157470703125, "step": 3887 }, { "epoch": 0.5417682714415105, "grad_norm": 0.8898897171020508, "learning_rate": 4.899843392587104e-06, "loss": 0.116851806640625, "step": 3888 }, { "epoch": 0.5419076151327249, "grad_norm": 0.8043422102928162, "learning_rate": 4.8974870929961915e-06, "loss": 0.130157470703125, "step": 3889 }, { "epoch": 0.5420469588239393, "grad_norm": 1.3968952894210815, "learning_rate": 4.895130816181307e-06, "loss": 0.14809799194335938, "step": 3890 }, { "epoch": 0.5421863025151537, "grad_norm": 1.1853700876235962, "learning_rate": 4.8927745626659625e-06, "loss": 0.13590621948242188, "step": 3891 }, { "epoch": 0.542325646206368, "grad_norm": 1.3420380353927612, "learning_rate": 4.89041833297366e-06, "loss": 0.13130760192871094, "step": 3892 }, { "epoch": 0.5424649898975824, "grad_norm": 1.1532257795333862, "learning_rate": 4.888062127627904e-06, "loss": 0.16723251342773438, "step": 3893 }, { "epoch": 0.5426043335887968, "grad_norm": 0.8294055461883545, "learning_rate": 4.885705947152187e-06, "loss": 0.14363861083984375, "step": 3894 }, { "epoch": 0.5427436772800112, "grad_norm": 0.9090837240219116, "learning_rate": 4.883349792069999e-06, "loss": 0.13422012329101562, "step": 3895 }, { "epoch": 0.5428830209712255, "grad_norm": 0.8323323726654053, "learning_rate": 4.880993662904824e-06, "loss": 0.11176300048828125, "step": 3896 }, { "epoch": 0.5430223646624399, "grad_norm": 1.2832705974578857, "learning_rate": 4.87863756018014e-06, "loss": 0.15311050415039062, "step": 3897 }, { "epoch": 0.5431617083536543, "grad_norm": 0.8827212452888489, "learning_rate": 4.87628148441942e-06, "loss": 0.15839004516601562, "step": 3898 }, { "epoch": 0.5433010520448687, "grad_norm": 0.65012127161026, "learning_rate": 4.8739254361461305e-06, "loss": 0.11299896240234375, "step": 3899 }, { "epoch": 0.543440395736083, "grad_norm": 1.0021966695785522, "learning_rate": 4.871569415883729e-06, "loss": 0.16377639770507812, "step": 3900 }, { "epoch": 0.5435797394272974, "grad_norm": 0.8229274153709412, "learning_rate": 4.869213424155671e-06, "loss": 0.137969970703125, "step": 3901 }, { "epoch": 0.5437190831185118, "grad_norm": 0.9683976173400879, "learning_rate": 4.8668574614854055e-06, "loss": 0.1260528564453125, "step": 3902 }, { "epoch": 0.5438584268097262, "grad_norm": 0.7506749033927917, "learning_rate": 4.864501528396371e-06, "loss": 0.12198257446289062, "step": 3903 }, { "epoch": 0.5439977705009406, "grad_norm": 0.7047180533409119, "learning_rate": 4.862145625412006e-06, "loss": 0.11383247375488281, "step": 3904 }, { "epoch": 0.5441371141921549, "grad_norm": 0.7463590502738953, "learning_rate": 4.859789753055734e-06, "loss": 0.1287212371826172, "step": 3905 }, { "epoch": 0.5442764578833693, "grad_norm": 0.8571996092796326, "learning_rate": 4.857433911850977e-06, "loss": 0.15002822875976562, "step": 3906 }, { "epoch": 0.5444158015745837, "grad_norm": 1.049627661705017, "learning_rate": 4.8550781023211516e-06, "loss": 0.19196701049804688, "step": 3907 }, { "epoch": 0.5445551452657981, "grad_norm": 1.599989891052246, "learning_rate": 4.852722324989661e-06, "loss": 0.17116546630859375, "step": 3908 }, { "epoch": 0.5446944889570124, "grad_norm": 1.305342674255371, "learning_rate": 4.85036658037991e-06, "loss": 0.14561843872070312, "step": 3909 }, { "epoch": 0.5448338326482268, "grad_norm": 1.1790739297866821, "learning_rate": 4.848010869015288e-06, "loss": 0.1433563232421875, "step": 3910 }, { "epoch": 0.5449731763394412, "grad_norm": 0.7885940670967102, "learning_rate": 4.84565519141918e-06, "loss": 0.1398468017578125, "step": 3911 }, { "epoch": 0.5451125200306556, "grad_norm": 1.2571942806243896, "learning_rate": 4.843299548114964e-06, "loss": 0.14098167419433594, "step": 3912 }, { "epoch": 0.54525186372187, "grad_norm": 1.280439019203186, "learning_rate": 4.840943939626012e-06, "loss": 0.14236068725585938, "step": 3913 }, { "epoch": 0.5453912074130843, "grad_norm": 1.1723324060440063, "learning_rate": 4.838588366475685e-06, "loss": 0.1542816162109375, "step": 3914 }, { "epoch": 0.5455305511042987, "grad_norm": 1.2760422229766846, "learning_rate": 4.83623282918734e-06, "loss": 0.12680816650390625, "step": 3915 }, { "epoch": 0.5456698947955131, "grad_norm": 0.9740176796913147, "learning_rate": 4.833877328284319e-06, "loss": 0.14654922485351562, "step": 3916 }, { "epoch": 0.5458092384867275, "grad_norm": 1.268986463546753, "learning_rate": 4.831521864289964e-06, "loss": 0.16127967834472656, "step": 3917 }, { "epoch": 0.5459485821779418, "grad_norm": 0.9642014503479004, "learning_rate": 4.829166437727603e-06, "loss": 0.17026519775390625, "step": 3918 }, { "epoch": 0.5460879258691562, "grad_norm": 1.1753296852111816, "learning_rate": 4.82681104912056e-06, "loss": 0.18780899047851562, "step": 3919 }, { "epoch": 0.5462272695603707, "grad_norm": 1.2907636165618896, "learning_rate": 4.82445569899215e-06, "loss": 0.15700912475585938, "step": 3920 }, { "epoch": 0.5463666132515851, "grad_norm": 1.107631802558899, "learning_rate": 4.822100387865673e-06, "loss": 0.13228988647460938, "step": 3921 }, { "epoch": 0.5465059569427995, "grad_norm": 0.9224560260772705, "learning_rate": 4.8197451162644305e-06, "loss": 0.15074539184570312, "step": 3922 }, { "epoch": 0.5466453006340138, "grad_norm": 0.6103959083557129, "learning_rate": 4.817389884711706e-06, "loss": 0.118316650390625, "step": 3923 }, { "epoch": 0.5467846443252282, "grad_norm": 1.2742574214935303, "learning_rate": 4.815034693730781e-06, "loss": 0.16597366333007812, "step": 3924 }, { "epoch": 0.5469239880164426, "grad_norm": 0.8881912231445312, "learning_rate": 4.812679543844924e-06, "loss": 0.14214515686035156, "step": 3925 }, { "epoch": 0.547063331707657, "grad_norm": 0.8049730062484741, "learning_rate": 4.810324435577397e-06, "loss": 0.12603759765625, "step": 3926 }, { "epoch": 0.5472026753988714, "grad_norm": 0.6937485933303833, "learning_rate": 4.807969369451449e-06, "loss": 0.123077392578125, "step": 3927 }, { "epoch": 0.5473420190900857, "grad_norm": 1.2686131000518799, "learning_rate": 4.805614345990322e-06, "loss": 0.178955078125, "step": 3928 }, { "epoch": 0.5474813627813001, "grad_norm": 0.8417442440986633, "learning_rate": 4.803259365717251e-06, "loss": 0.1414031982421875, "step": 3929 }, { "epoch": 0.5476207064725145, "grad_norm": 0.5841554999351501, "learning_rate": 4.800904429155458e-06, "loss": 0.11751365661621094, "step": 3930 }, { "epoch": 0.5477600501637289, "grad_norm": 0.8487544059753418, "learning_rate": 4.7985495368281534e-06, "loss": 0.12497329711914062, "step": 3931 }, { "epoch": 0.5478993938549432, "grad_norm": 1.602893352508545, "learning_rate": 4.796194689258542e-06, "loss": 0.16066741943359375, "step": 3932 }, { "epoch": 0.5480387375461576, "grad_norm": 0.9090427756309509, "learning_rate": 4.793839886969819e-06, "loss": 0.1303234100341797, "step": 3933 }, { "epoch": 0.548178081237372, "grad_norm": 1.0524725914001465, "learning_rate": 4.791485130485167e-06, "loss": 0.15717315673828125, "step": 3934 }, { "epoch": 0.5483174249285864, "grad_norm": 1.634890079498291, "learning_rate": 4.789130420327756e-06, "loss": 0.13513565063476562, "step": 3935 }, { "epoch": 0.5484567686198007, "grad_norm": 0.8235584497451782, "learning_rate": 4.786775757020755e-06, "loss": 0.13353729248046875, "step": 3936 }, { "epoch": 0.5485961123110151, "grad_norm": 1.1068284511566162, "learning_rate": 4.784421141087311e-06, "loss": 0.1541728973388672, "step": 3937 }, { "epoch": 0.5487354560022295, "grad_norm": 0.7406612038612366, "learning_rate": 4.782066573050567e-06, "loss": 0.12313079833984375, "step": 3938 }, { "epoch": 0.5488747996934439, "grad_norm": 0.8809435963630676, "learning_rate": 4.779712053433655e-06, "loss": 0.11097526550292969, "step": 3939 }, { "epoch": 0.5490141433846583, "grad_norm": 1.0564113855361938, "learning_rate": 4.777357582759696e-06, "loss": 0.117889404296875, "step": 3940 }, { "epoch": 0.5491534870758726, "grad_norm": 0.8514466285705566, "learning_rate": 4.7750031615518e-06, "loss": 0.12326812744140625, "step": 3941 }, { "epoch": 0.549292830767087, "grad_norm": 1.1114422082901, "learning_rate": 4.772648790333065e-06, "loss": 0.14322662353515625, "step": 3942 }, { "epoch": 0.5494321744583014, "grad_norm": 0.7276751399040222, "learning_rate": 4.7702944696265766e-06, "loss": 0.11646270751953125, "step": 3943 }, { "epoch": 0.5495715181495158, "grad_norm": 1.1086159944534302, "learning_rate": 4.767940199955413e-06, "loss": 0.17565155029296875, "step": 3944 }, { "epoch": 0.5497108618407301, "grad_norm": 1.130951166152954, "learning_rate": 4.765585981842639e-06, "loss": 0.12612533569335938, "step": 3945 }, { "epoch": 0.5498502055319445, "grad_norm": 0.7434191703796387, "learning_rate": 4.76323181581131e-06, "loss": 0.10725021362304688, "step": 3946 }, { "epoch": 0.5499895492231589, "grad_norm": 0.8666731715202332, "learning_rate": 4.760877702384464e-06, "loss": 0.11544609069824219, "step": 3947 }, { "epoch": 0.5501288929143733, "grad_norm": 1.0545527935028076, "learning_rate": 4.758523642085133e-06, "loss": 0.12601470947265625, "step": 3948 }, { "epoch": 0.5502682366055877, "grad_norm": 1.073586344718933, "learning_rate": 4.756169635436336e-06, "loss": 0.1267681121826172, "step": 3949 }, { "epoch": 0.550407580296802, "grad_norm": 0.9374731183052063, "learning_rate": 4.75381568296108e-06, "loss": 0.1198577880859375, "step": 3950 }, { "epoch": 0.5505469239880164, "grad_norm": 1.4374079704284668, "learning_rate": 4.751461785182358e-06, "loss": 0.14962387084960938, "step": 3951 }, { "epoch": 0.5506862676792308, "grad_norm": 1.0438356399536133, "learning_rate": 4.7491079426231556e-06, "loss": 0.1458139419555664, "step": 3952 }, { "epoch": 0.5508256113704452, "grad_norm": 1.104506492614746, "learning_rate": 4.746754155806437e-06, "loss": 0.1539783477783203, "step": 3953 }, { "epoch": 0.5509649550616595, "grad_norm": 0.9692369103431702, "learning_rate": 4.744400425255165e-06, "loss": 0.17095565795898438, "step": 3954 }, { "epoch": 0.5511042987528739, "grad_norm": 0.6312143802642822, "learning_rate": 4.7420467514922815e-06, "loss": 0.125885009765625, "step": 3955 }, { "epoch": 0.5512436424440883, "grad_norm": 1.4687442779541016, "learning_rate": 4.739693135040722e-06, "loss": 0.13395118713378906, "step": 3956 }, { "epoch": 0.5513829861353027, "grad_norm": 0.8839119076728821, "learning_rate": 4.737339576423406e-06, "loss": 0.10318374633789062, "step": 3957 }, { "epoch": 0.551522329826517, "grad_norm": 1.178290605545044, "learning_rate": 4.734986076163238e-06, "loss": 0.17907333374023438, "step": 3958 }, { "epoch": 0.5516616735177314, "grad_norm": 0.7935131192207336, "learning_rate": 4.732632634783114e-06, "loss": 0.12650680541992188, "step": 3959 }, { "epoch": 0.5518010172089458, "grad_norm": 1.0986322164535522, "learning_rate": 4.730279252805914e-06, "loss": 0.13335800170898438, "step": 3960 }, { "epoch": 0.5519403609001603, "grad_norm": 1.2216976881027222, "learning_rate": 4.727925930754506e-06, "loss": 0.15389633178710938, "step": 3961 }, { "epoch": 0.5520797045913747, "grad_norm": 0.8619162440299988, "learning_rate": 4.725572669151747e-06, "loss": 0.13850021362304688, "step": 3962 }, { "epoch": 0.552219048282589, "grad_norm": 1.1366358995437622, "learning_rate": 4.723219468520474e-06, "loss": 0.1570281982421875, "step": 3963 }, { "epoch": 0.5523583919738034, "grad_norm": 1.1857149600982666, "learning_rate": 4.720866329383514e-06, "loss": 0.13238906860351562, "step": 3964 }, { "epoch": 0.5524977356650178, "grad_norm": 1.5489473342895508, "learning_rate": 4.718513252263685e-06, "loss": 0.12318801879882812, "step": 3965 }, { "epoch": 0.5526370793562322, "grad_norm": 1.231961965560913, "learning_rate": 4.716160237683785e-06, "loss": 0.14188766479492188, "step": 3966 }, { "epoch": 0.5527764230474466, "grad_norm": 2.038788318634033, "learning_rate": 4.7138072861666e-06, "loss": 0.14432907104492188, "step": 3967 }, { "epoch": 0.5529157667386609, "grad_norm": 1.1939070224761963, "learning_rate": 4.711454398234902e-06, "loss": 0.14908218383789062, "step": 3968 }, { "epoch": 0.5530551104298753, "grad_norm": 1.725354552268982, "learning_rate": 4.7091015744114475e-06, "loss": 0.14955902099609375, "step": 3969 }, { "epoch": 0.5531944541210897, "grad_norm": 1.277007818222046, "learning_rate": 4.706748815218984e-06, "loss": 0.12657546997070312, "step": 3970 }, { "epoch": 0.5533337978123041, "grad_norm": 1.5908749103546143, "learning_rate": 4.704396121180237e-06, "loss": 0.157012939453125, "step": 3971 }, { "epoch": 0.5534731415035185, "grad_norm": 0.9930599331855774, "learning_rate": 4.702043492817924e-06, "loss": 0.13705825805664062, "step": 3972 }, { "epoch": 0.5536124851947328, "grad_norm": 1.2935470342636108, "learning_rate": 4.6996909306547455e-06, "loss": 0.11966705322265625, "step": 3973 }, { "epoch": 0.5537518288859472, "grad_norm": 1.1375900506973267, "learning_rate": 4.697338435213385e-06, "loss": 0.126434326171875, "step": 3974 }, { "epoch": 0.5538911725771616, "grad_norm": 1.0764086246490479, "learning_rate": 4.694986007016514e-06, "loss": 0.13727569580078125, "step": 3975 }, { "epoch": 0.554030516268376, "grad_norm": 1.6329265832901, "learning_rate": 4.692633646586788e-06, "loss": 0.17434310913085938, "step": 3976 }, { "epoch": 0.5541698599595903, "grad_norm": 1.4196199178695679, "learning_rate": 4.690281354446849e-06, "loss": 0.17479705810546875, "step": 3977 }, { "epoch": 0.5543092036508047, "grad_norm": 2.031362295150757, "learning_rate": 4.6879291311193244e-06, "loss": 0.17766761779785156, "step": 3978 }, { "epoch": 0.5544485473420191, "grad_norm": 1.0087264776229858, "learning_rate": 4.68557697712682e-06, "loss": 0.1564788818359375, "step": 3979 }, { "epoch": 0.5545878910332335, "grad_norm": 1.2055573463439941, "learning_rate": 4.683224892991932e-06, "loss": 0.11657333374023438, "step": 3980 }, { "epoch": 0.5547272347244478, "grad_norm": 1.125880479812622, "learning_rate": 4.680872879237242e-06, "loss": 0.1461467742919922, "step": 3981 }, { "epoch": 0.5548665784156622, "grad_norm": 0.8477230072021484, "learning_rate": 4.678520936385313e-06, "loss": 0.14562416076660156, "step": 3982 }, { "epoch": 0.5550059221068766, "grad_norm": 1.098196029663086, "learning_rate": 4.676169064958692e-06, "loss": 0.13138961791992188, "step": 3983 }, { "epoch": 0.555145265798091, "grad_norm": 1.3143649101257324, "learning_rate": 4.6738172654799105e-06, "loss": 0.11611175537109375, "step": 3984 }, { "epoch": 0.5552846094893054, "grad_norm": 1.378760576248169, "learning_rate": 4.671465538471487e-06, "loss": 0.1387481689453125, "step": 3985 }, { "epoch": 0.5554239531805197, "grad_norm": 1.3092607259750366, "learning_rate": 4.66911388445592e-06, "loss": 0.1114959716796875, "step": 3986 }, { "epoch": 0.5555632968717341, "grad_norm": 0.6447846293449402, "learning_rate": 4.666762303955692e-06, "loss": 0.11269569396972656, "step": 3987 }, { "epoch": 0.5557026405629485, "grad_norm": 1.1025135517120361, "learning_rate": 4.664410797493275e-06, "loss": 0.13271713256835938, "step": 3988 }, { "epoch": 0.5558419842541629, "grad_norm": 1.1852728128433228, "learning_rate": 4.662059365591115e-06, "loss": 0.19116973876953125, "step": 3989 }, { "epoch": 0.5559813279453772, "grad_norm": 1.5372681617736816, "learning_rate": 4.6597080087716494e-06, "loss": 0.16943359375, "step": 3990 }, { "epoch": 0.5561206716365916, "grad_norm": 1.2327852249145508, "learning_rate": 4.657356727557295e-06, "loss": 0.13628196716308594, "step": 3991 }, { "epoch": 0.556260015327806, "grad_norm": 1.8497010469436646, "learning_rate": 4.655005522470453e-06, "loss": 0.15972137451171875, "step": 3992 }, { "epoch": 0.5563993590190204, "grad_norm": 0.6013913154602051, "learning_rate": 4.652654394033508e-06, "loss": 0.12457275390625, "step": 3993 }, { "epoch": 0.5565387027102348, "grad_norm": 1.0767993927001953, "learning_rate": 4.650303342768827e-06, "loss": 0.12789154052734375, "step": 3994 }, { "epoch": 0.5566780464014491, "grad_norm": 1.109703540802002, "learning_rate": 4.6479523691987585e-06, "loss": 0.14640045166015625, "step": 3995 }, { "epoch": 0.5568173900926635, "grad_norm": 1.3137227296829224, "learning_rate": 4.645601473845636e-06, "loss": 0.17669677734375, "step": 3996 }, { "epoch": 0.5569567337838779, "grad_norm": 1.2868038415908813, "learning_rate": 4.6432506572317754e-06, "loss": 0.15802383422851562, "step": 3997 }, { "epoch": 0.5570960774750923, "grad_norm": 1.7827178239822388, "learning_rate": 4.6408999198794744e-06, "loss": 0.1591339111328125, "step": 3998 }, { "epoch": 0.5572354211663066, "grad_norm": 1.0878024101257324, "learning_rate": 4.6385492623110135e-06, "loss": 0.15735244750976562, "step": 3999 }, { "epoch": 0.557374764857521, "grad_norm": 1.0037331581115723, "learning_rate": 4.636198685048653e-06, "loss": 0.11565780639648438, "step": 4000 }, { "epoch": 0.5575141085487355, "grad_norm": 0.9351118803024292, "learning_rate": 4.633848188614639e-06, "loss": 0.15525436401367188, "step": 4001 }, { "epoch": 0.5576534522399499, "grad_norm": 0.949372410774231, "learning_rate": 4.631497773531199e-06, "loss": 0.12460899353027344, "step": 4002 }, { "epoch": 0.5577927959311643, "grad_norm": 0.9112458229064941, "learning_rate": 4.629147440320539e-06, "loss": 0.15507888793945312, "step": 4003 }, { "epoch": 0.5579321396223786, "grad_norm": 0.9182927012443542, "learning_rate": 4.626797189504855e-06, "loss": 0.12731552124023438, "step": 4004 }, { "epoch": 0.558071483313593, "grad_norm": 0.7648161053657532, "learning_rate": 4.624447021606311e-06, "loss": 0.11725997924804688, "step": 4005 }, { "epoch": 0.5582108270048074, "grad_norm": 1.0734431743621826, "learning_rate": 4.6220969371470665e-06, "loss": 0.16373825073242188, "step": 4006 }, { "epoch": 0.5583501706960218, "grad_norm": 1.0294486284255981, "learning_rate": 4.619746936649254e-06, "loss": 0.1253662109375, "step": 4007 }, { "epoch": 0.5584895143872362, "grad_norm": 1.0613573789596558, "learning_rate": 4.617397020634991e-06, "loss": 0.16124725341796875, "step": 4008 }, { "epoch": 0.5586288580784505, "grad_norm": 0.9943825602531433, "learning_rate": 4.615047189626376e-06, "loss": 0.12435150146484375, "step": 4009 }, { "epoch": 0.5587682017696649, "grad_norm": 0.7212405800819397, "learning_rate": 4.612697444145487e-06, "loss": 0.11157989501953125, "step": 4010 }, { "epoch": 0.5589075454608793, "grad_norm": 0.6468190550804138, "learning_rate": 4.610347784714383e-06, "loss": 0.1201171875, "step": 4011 }, { "epoch": 0.5590468891520937, "grad_norm": 0.7449504733085632, "learning_rate": 4.6079982118551045e-06, "loss": 0.13177490234375, "step": 4012 }, { "epoch": 0.559186232843308, "grad_norm": 1.1067317724227905, "learning_rate": 4.605648726089674e-06, "loss": 0.16931915283203125, "step": 4013 }, { "epoch": 0.5593255765345224, "grad_norm": 0.8478820323944092, "learning_rate": 4.603299327940094e-06, "loss": 0.11439132690429688, "step": 4014 }, { "epoch": 0.5594649202257368, "grad_norm": 1.2747348546981812, "learning_rate": 4.600950017928348e-06, "loss": 0.14803314208984375, "step": 4015 }, { "epoch": 0.5596042639169512, "grad_norm": 0.9187073111534119, "learning_rate": 4.598600796576395e-06, "loss": 0.1439666748046875, "step": 4016 }, { "epoch": 0.5597436076081655, "grad_norm": 1.4492756128311157, "learning_rate": 4.596251664406182e-06, "loss": 0.14569473266601562, "step": 4017 }, { "epoch": 0.5598829512993799, "grad_norm": 1.5935804843902588, "learning_rate": 4.593902621939632e-06, "loss": 0.16164398193359375, "step": 4018 }, { "epoch": 0.5600222949905943, "grad_norm": 1.247867226600647, "learning_rate": 4.591553669698646e-06, "loss": 0.15875625610351562, "step": 4019 }, { "epoch": 0.5601616386818087, "grad_norm": 0.7571991086006165, "learning_rate": 4.589204808205113e-06, "loss": 0.14400863647460938, "step": 4020 }, { "epoch": 0.5603009823730231, "grad_norm": 1.2591001987457275, "learning_rate": 4.58685603798089e-06, "loss": 0.19791793823242188, "step": 4021 }, { "epoch": 0.5604403260642374, "grad_norm": 0.6899906396865845, "learning_rate": 4.5845073595478245e-06, "loss": 0.12906646728515625, "step": 4022 }, { "epoch": 0.5605796697554518, "grad_norm": 0.7136978507041931, "learning_rate": 4.5821587734277374e-06, "loss": 0.13867950439453125, "step": 4023 }, { "epoch": 0.5607190134466662, "grad_norm": 0.7146223187446594, "learning_rate": 4.57981028014243e-06, "loss": 0.1357269287109375, "step": 4024 }, { "epoch": 0.5608583571378806, "grad_norm": 0.8893799185752869, "learning_rate": 4.577461880213688e-06, "loss": 0.17134666442871094, "step": 4025 }, { "epoch": 0.560997700829095, "grad_norm": 1.7196837663650513, "learning_rate": 4.575113574163269e-06, "loss": 0.2017192840576172, "step": 4026 }, { "epoch": 0.5611370445203093, "grad_norm": 1.048940896987915, "learning_rate": 4.572765362512912e-06, "loss": 0.13741302490234375, "step": 4027 }, { "epoch": 0.5612763882115237, "grad_norm": 0.8245667815208435, "learning_rate": 4.570417245784337e-06, "loss": 0.119415283203125, "step": 4028 }, { "epoch": 0.5614157319027381, "grad_norm": 1.2991669178009033, "learning_rate": 4.568069224499244e-06, "loss": 0.14630889892578125, "step": 4029 }, { "epoch": 0.5615550755939525, "grad_norm": 0.5695136785507202, "learning_rate": 4.565721299179308e-06, "loss": 0.110260009765625, "step": 4030 }, { "epoch": 0.5616944192851668, "grad_norm": 0.7996627688407898, "learning_rate": 4.563373470346186e-06, "loss": 0.12156295776367188, "step": 4031 }, { "epoch": 0.5618337629763812, "grad_norm": 1.1700129508972168, "learning_rate": 4.561025738521508e-06, "loss": 0.16987228393554688, "step": 4032 }, { "epoch": 0.5619731066675956, "grad_norm": 1.1024715900421143, "learning_rate": 4.55867810422689e-06, "loss": 0.164581298828125, "step": 4033 }, { "epoch": 0.56211245035881, "grad_norm": 0.8137705326080322, "learning_rate": 4.5563305679839214e-06, "loss": 0.14459991455078125, "step": 4034 }, { "epoch": 0.5622517940500243, "grad_norm": 1.5339045524597168, "learning_rate": 4.553983130314171e-06, "loss": 0.18616867065429688, "step": 4035 }, { "epoch": 0.5623911377412387, "grad_norm": 1.2646995782852173, "learning_rate": 4.551635791739188e-06, "loss": 0.13565444946289062, "step": 4036 }, { "epoch": 0.5625304814324531, "grad_norm": 0.6035820841789246, "learning_rate": 4.549288552780494e-06, "loss": 0.11869430541992188, "step": 4037 }, { "epoch": 0.5626698251236675, "grad_norm": 1.4550095796585083, "learning_rate": 4.546941413959595e-06, "loss": 0.18357086181640625, "step": 4038 }, { "epoch": 0.5628091688148819, "grad_norm": 1.4254848957061768, "learning_rate": 4.544594375797969e-06, "loss": 0.16726303100585938, "step": 4039 }, { "epoch": 0.5629485125060962, "grad_norm": 0.7893524169921875, "learning_rate": 4.542247438817076e-06, "loss": 0.1388397216796875, "step": 4040 }, { "epoch": 0.5630878561973107, "grad_norm": 0.7589138150215149, "learning_rate": 4.539900603538352e-06, "loss": 0.1407928466796875, "step": 4041 }, { "epoch": 0.5632271998885251, "grad_norm": 0.7519834637641907, "learning_rate": 4.53755387048321e-06, "loss": 0.11455917358398438, "step": 4042 }, { "epoch": 0.5633665435797395, "grad_norm": 0.6764050126075745, "learning_rate": 4.53520724017304e-06, "loss": 0.13204193115234375, "step": 4043 }, { "epoch": 0.5635058872709539, "grad_norm": 0.742545485496521, "learning_rate": 4.532860713129208e-06, "loss": 0.13220977783203125, "step": 4044 }, { "epoch": 0.5636452309621682, "grad_norm": 0.7083654999732971, "learning_rate": 4.530514289873062e-06, "loss": 0.11238861083984375, "step": 4045 }, { "epoch": 0.5637845746533826, "grad_norm": 0.6834072470664978, "learning_rate": 4.528167970925922e-06, "loss": 0.1308746337890625, "step": 4046 }, { "epoch": 0.563923918344597, "grad_norm": 1.2371610403060913, "learning_rate": 4.525821756809088e-06, "loss": 0.138397216796875, "step": 4047 }, { "epoch": 0.5640632620358114, "grad_norm": 0.9311294555664062, "learning_rate": 4.523475648043832e-06, "loss": 0.15428924560546875, "step": 4048 }, { "epoch": 0.5642026057270257, "grad_norm": 0.8910958170890808, "learning_rate": 4.5211296451514085e-06, "loss": 0.11745071411132812, "step": 4049 }, { "epoch": 0.5643419494182401, "grad_norm": 1.4897871017456055, "learning_rate": 4.518783748653045e-06, "loss": 0.13817214965820312, "step": 4050 }, { "epoch": 0.5644812931094545, "grad_norm": 0.9112259745597839, "learning_rate": 4.516437959069946e-06, "loss": 0.14387130737304688, "step": 4051 }, { "epoch": 0.5646206368006689, "grad_norm": 1.029287576675415, "learning_rate": 4.514092276923295e-06, "loss": 0.14883041381835938, "step": 4052 }, { "epoch": 0.5647599804918833, "grad_norm": 1.5648412704467773, "learning_rate": 4.5117467027342435e-06, "loss": 0.17808914184570312, "step": 4053 }, { "epoch": 0.5648993241830976, "grad_norm": 1.222579836845398, "learning_rate": 4.509401237023928e-06, "loss": 0.16574478149414062, "step": 4054 }, { "epoch": 0.565038667874312, "grad_norm": 1.0213934183120728, "learning_rate": 4.507055880313458e-06, "loss": 0.13936233520507812, "step": 4055 }, { "epoch": 0.5651780115655264, "grad_norm": 1.0095833539962769, "learning_rate": 4.504710633123917e-06, "loss": 0.1399383544921875, "step": 4056 }, { "epoch": 0.5653173552567408, "grad_norm": 1.6721662282943726, "learning_rate": 4.502365495976367e-06, "loss": 0.16768264770507812, "step": 4057 }, { "epoch": 0.5654566989479551, "grad_norm": 0.5905022621154785, "learning_rate": 4.5000204693918405e-06, "loss": 0.13193511962890625, "step": 4058 }, { "epoch": 0.5655960426391695, "grad_norm": 0.721025824546814, "learning_rate": 4.497675553891352e-06, "loss": 0.13490676879882812, "step": 4059 }, { "epoch": 0.5657353863303839, "grad_norm": 1.4617915153503418, "learning_rate": 4.495330749995887e-06, "loss": 0.1446990966796875, "step": 4060 }, { "epoch": 0.5658747300215983, "grad_norm": 0.8398436307907104, "learning_rate": 4.492986058226407e-06, "loss": 0.1534900665283203, "step": 4061 }, { "epoch": 0.5660140737128126, "grad_norm": 1.519730567932129, "learning_rate": 4.490641479103851e-06, "loss": 0.16935348510742188, "step": 4062 }, { "epoch": 0.566153417404027, "grad_norm": 1.2179007530212402, "learning_rate": 4.4882970131491286e-06, "loss": 0.1490020751953125, "step": 4063 }, { "epoch": 0.5662927610952414, "grad_norm": 1.327043056488037, "learning_rate": 4.485952660883126e-06, "loss": 0.17530059814453125, "step": 4064 }, { "epoch": 0.5664321047864558, "grad_norm": 0.9761595726013184, "learning_rate": 4.483608422826708e-06, "loss": 0.16605758666992188, "step": 4065 }, { "epoch": 0.5665714484776702, "grad_norm": 1.1286039352416992, "learning_rate": 4.481264299500709e-06, "loss": 0.17216873168945312, "step": 4066 }, { "epoch": 0.5667107921688845, "grad_norm": 0.7788815498352051, "learning_rate": 4.478920291425939e-06, "loss": 0.14713096618652344, "step": 4067 }, { "epoch": 0.5668501358600989, "grad_norm": 0.8405678272247314, "learning_rate": 4.476576399123187e-06, "loss": 0.14974594116210938, "step": 4068 }, { "epoch": 0.5669894795513133, "grad_norm": 0.7110956311225891, "learning_rate": 4.474232623113204e-06, "loss": 0.12663650512695312, "step": 4069 }, { "epoch": 0.5671288232425277, "grad_norm": 1.4305840730667114, "learning_rate": 4.471888963916732e-06, "loss": 0.1490306854248047, "step": 4070 }, { "epoch": 0.567268166933742, "grad_norm": 1.1247713565826416, "learning_rate": 4.4695454220544735e-06, "loss": 0.16196823120117188, "step": 4071 }, { "epoch": 0.5674075106249564, "grad_norm": 0.846656084060669, "learning_rate": 4.467201998047112e-06, "loss": 0.1313343048095703, "step": 4072 }, { "epoch": 0.5675468543161708, "grad_norm": 0.8498222231864929, "learning_rate": 4.464858692415304e-06, "loss": 0.14514541625976562, "step": 4073 }, { "epoch": 0.5676861980073852, "grad_norm": 1.0211125612258911, "learning_rate": 4.462515505679677e-06, "loss": 0.15425491333007812, "step": 4074 }, { "epoch": 0.5678255416985996, "grad_norm": 0.9075194001197815, "learning_rate": 4.460172438360832e-06, "loss": 0.14910125732421875, "step": 4075 }, { "epoch": 0.5679648853898139, "grad_norm": 1.4477742910385132, "learning_rate": 4.457829490979347e-06, "loss": 0.16790008544921875, "step": 4076 }, { "epoch": 0.5681042290810283, "grad_norm": 1.3043131828308105, "learning_rate": 4.455486664055772e-06, "loss": 0.14925384521484375, "step": 4077 }, { "epoch": 0.5682435727722427, "grad_norm": 0.9947264194488525, "learning_rate": 4.4531439581106295e-06, "loss": 0.14968490600585938, "step": 4078 }, { "epoch": 0.5683829164634571, "grad_norm": 0.7921098470687866, "learning_rate": 4.450801373664413e-06, "loss": 0.12560272216796875, "step": 4079 }, { "epoch": 0.5685222601546714, "grad_norm": 0.8619521856307983, "learning_rate": 4.448458911237593e-06, "loss": 0.11639404296875, "step": 4080 }, { "epoch": 0.5686616038458859, "grad_norm": 1.5367306470870972, "learning_rate": 4.446116571350611e-06, "loss": 0.18549728393554688, "step": 4081 }, { "epoch": 0.5688009475371003, "grad_norm": 0.9007667899131775, "learning_rate": 4.443774354523883e-06, "loss": 0.13265228271484375, "step": 4082 }, { "epoch": 0.5689402912283147, "grad_norm": 0.7688214182853699, "learning_rate": 4.441432261277794e-06, "loss": 0.12289047241210938, "step": 4083 }, { "epoch": 0.5690796349195291, "grad_norm": 2.4635024070739746, "learning_rate": 4.4390902921327025e-06, "loss": 0.18986892700195312, "step": 4084 }, { "epoch": 0.5692189786107434, "grad_norm": 1.0191502571105957, "learning_rate": 4.436748447608944e-06, "loss": 0.14777374267578125, "step": 4085 }, { "epoch": 0.5693583223019578, "grad_norm": 0.6400270462036133, "learning_rate": 4.43440672822682e-06, "loss": 0.12872695922851562, "step": 4086 }, { "epoch": 0.5694976659931722, "grad_norm": 0.8646252155303955, "learning_rate": 4.432065134506608e-06, "loss": 0.1144866943359375, "step": 4087 }, { "epoch": 0.5696370096843866, "grad_norm": 1.165106177330017, "learning_rate": 4.429723666968559e-06, "loss": 0.14807510375976562, "step": 4088 }, { "epoch": 0.569776353375601, "grad_norm": 0.906266450881958, "learning_rate": 4.427382326132892e-06, "loss": 0.14659500122070312, "step": 4089 }, { "epoch": 0.5699156970668153, "grad_norm": 0.9241173267364502, "learning_rate": 4.425041112519797e-06, "loss": 0.110870361328125, "step": 4090 }, { "epoch": 0.5700550407580297, "grad_norm": 1.7969293594360352, "learning_rate": 4.42270002664944e-06, "loss": 0.172119140625, "step": 4091 }, { "epoch": 0.5701943844492441, "grad_norm": 1.1334714889526367, "learning_rate": 4.4203590690419575e-06, "loss": 0.11957931518554688, "step": 4092 }, { "epoch": 0.5703337281404585, "grad_norm": 0.7404032945632935, "learning_rate": 4.418018240217457e-06, "loss": 0.13824081420898438, "step": 4093 }, { "epoch": 0.5704730718316728, "grad_norm": 0.9970378279685974, "learning_rate": 4.415677540696017e-06, "loss": 0.13977432250976562, "step": 4094 }, { "epoch": 0.5706124155228872, "grad_norm": 0.9784004092216492, "learning_rate": 4.413336970997687e-06, "loss": 0.11000442504882812, "step": 4095 }, { "epoch": 0.5707517592141016, "grad_norm": 0.9881983399391174, "learning_rate": 4.410996531642487e-06, "loss": 0.11815834045410156, "step": 4096 }, { "epoch": 0.570891102905316, "grad_norm": 0.9720642566680908, "learning_rate": 4.408656223150412e-06, "loss": 0.1499481201171875, "step": 4097 }, { "epoch": 0.5710304465965303, "grad_norm": 1.9283249378204346, "learning_rate": 4.406316046041423e-06, "loss": 0.1895751953125, "step": 4098 }, { "epoch": 0.5711697902877447, "grad_norm": 0.557201087474823, "learning_rate": 4.4039760008354556e-06, "loss": 0.11700439453125, "step": 4099 }, { "epoch": 0.5713091339789591, "grad_norm": 0.5317284464836121, "learning_rate": 4.401636088052411e-06, "loss": 0.11445999145507812, "step": 4100 }, { "epoch": 0.5714484776701735, "grad_norm": 0.8969095349311829, "learning_rate": 4.399296308212168e-06, "loss": 0.14569473266601562, "step": 4101 }, { "epoch": 0.5715878213613879, "grad_norm": 0.6038305163383484, "learning_rate": 4.396956661834571e-06, "loss": 0.11780929565429688, "step": 4102 }, { "epoch": 0.5717271650526022, "grad_norm": 0.9750729203224182, "learning_rate": 4.394617149439435e-06, "loss": 0.14656829833984375, "step": 4103 }, { "epoch": 0.5718665087438166, "grad_norm": 0.8405888080596924, "learning_rate": 4.392277771546549e-06, "loss": 0.14336776733398438, "step": 4104 }, { "epoch": 0.572005852435031, "grad_norm": 1.1374917030334473, "learning_rate": 4.389938528675668e-06, "loss": 0.16279983520507812, "step": 4105 }, { "epoch": 0.5721451961262454, "grad_norm": 0.9407082796096802, "learning_rate": 4.387599421346517e-06, "loss": 0.13543701171875, "step": 4106 }, { "epoch": 0.5722845398174597, "grad_norm": 0.8943197131156921, "learning_rate": 4.385260450078793e-06, "loss": 0.126190185546875, "step": 4107 }, { "epoch": 0.5724238835086741, "grad_norm": 1.0943360328674316, "learning_rate": 4.382921615392162e-06, "loss": 0.1374187469482422, "step": 4108 }, { "epoch": 0.5725632271998885, "grad_norm": 1.0901074409484863, "learning_rate": 4.38058291780626e-06, "loss": 0.17792129516601562, "step": 4109 }, { "epoch": 0.5727025708911029, "grad_norm": 0.48541390895843506, "learning_rate": 4.378244357840694e-06, "loss": 0.12136268615722656, "step": 4110 }, { "epoch": 0.5728419145823173, "grad_norm": 0.8943926692008972, "learning_rate": 4.375905936015035e-06, "loss": 0.14462661743164062, "step": 4111 }, { "epoch": 0.5729812582735316, "grad_norm": 0.7792488932609558, "learning_rate": 4.373567652848828e-06, "loss": 0.10839462280273438, "step": 4112 }, { "epoch": 0.573120601964746, "grad_norm": 0.9295403361320496, "learning_rate": 4.371229508861588e-06, "loss": 0.118927001953125, "step": 4113 }, { "epoch": 0.5732599456559604, "grad_norm": 1.3461997509002686, "learning_rate": 4.368891504572796e-06, "loss": 0.18159103393554688, "step": 4114 }, { "epoch": 0.5733992893471748, "grad_norm": 0.7298077344894409, "learning_rate": 4.3665536405019045e-06, "loss": 0.13152694702148438, "step": 4115 }, { "epoch": 0.5735386330383891, "grad_norm": 1.6900203227996826, "learning_rate": 4.36421591716833e-06, "loss": 0.17783737182617188, "step": 4116 }, { "epoch": 0.5736779767296035, "grad_norm": 0.930135190486908, "learning_rate": 4.361878335091464e-06, "loss": 0.12890052795410156, "step": 4117 }, { "epoch": 0.5738173204208179, "grad_norm": 0.7192721962928772, "learning_rate": 4.3595408947906644e-06, "loss": 0.13634109497070312, "step": 4118 }, { "epoch": 0.5739566641120323, "grad_norm": 0.5867680311203003, "learning_rate": 4.357203596785254e-06, "loss": 0.1273040771484375, "step": 4119 }, { "epoch": 0.5740960078032467, "grad_norm": 0.910954475402832, "learning_rate": 4.3548664415945326e-06, "loss": 0.15739822387695312, "step": 4120 }, { "epoch": 0.5742353514944611, "grad_norm": 1.0806242227554321, "learning_rate": 4.3525294297377566e-06, "loss": 0.15332412719726562, "step": 4121 }, { "epoch": 0.5743746951856755, "grad_norm": 0.7177514433860779, "learning_rate": 4.35019256173416e-06, "loss": 0.13611221313476562, "step": 4122 }, { "epoch": 0.5745140388768899, "grad_norm": 1.0241451263427734, "learning_rate": 4.34785583810294e-06, "loss": 0.13414382934570312, "step": 4123 }, { "epoch": 0.5746533825681043, "grad_norm": 1.0473214387893677, "learning_rate": 4.345519259363264e-06, "loss": 0.12318611145019531, "step": 4124 }, { "epoch": 0.5747927262593187, "grad_norm": 0.7035033702850342, "learning_rate": 4.343182826034268e-06, "loss": 0.12031936645507812, "step": 4125 }, { "epoch": 0.574932069950533, "grad_norm": 0.730377197265625, "learning_rate": 4.340846538635053e-06, "loss": 0.12151718139648438, "step": 4126 }, { "epoch": 0.5750714136417474, "grad_norm": 1.3487348556518555, "learning_rate": 4.338510397684687e-06, "loss": 0.12132835388183594, "step": 4127 }, { "epoch": 0.5752107573329618, "grad_norm": 0.8737024664878845, "learning_rate": 4.336174403702208e-06, "loss": 0.12828445434570312, "step": 4128 }, { "epoch": 0.5753501010241762, "grad_norm": 0.638648509979248, "learning_rate": 4.333838557206623e-06, "loss": 0.1261749267578125, "step": 4129 }, { "epoch": 0.5754894447153905, "grad_norm": 1.530282974243164, "learning_rate": 4.3315028587169e-06, "loss": 0.1650829315185547, "step": 4130 }, { "epoch": 0.5756287884066049, "grad_norm": 1.3946350812911987, "learning_rate": 4.329167308751982e-06, "loss": 0.14672470092773438, "step": 4131 }, { "epoch": 0.5757681320978193, "grad_norm": 0.7824389338493347, "learning_rate": 4.3268319078307695e-06, "loss": 0.12528610229492188, "step": 4132 }, { "epoch": 0.5759074757890337, "grad_norm": 1.5492228269577026, "learning_rate": 4.324496656472141e-06, "loss": 0.14180755615234375, "step": 4133 }, { "epoch": 0.576046819480248, "grad_norm": 0.7222307920455933, "learning_rate": 4.322161555194932e-06, "loss": 0.12378311157226562, "step": 4134 }, { "epoch": 0.5761861631714624, "grad_norm": 0.7538719177246094, "learning_rate": 4.31982660451795e-06, "loss": 0.12671279907226562, "step": 4135 }, { "epoch": 0.5763255068626768, "grad_norm": 0.795744776725769, "learning_rate": 4.3174918049599705e-06, "loss": 0.15684127807617188, "step": 4136 }, { "epoch": 0.5764648505538912, "grad_norm": 0.6863347887992859, "learning_rate": 4.315157157039727e-06, "loss": 0.1161041259765625, "step": 4137 }, { "epoch": 0.5766041942451056, "grad_norm": 0.758938193321228, "learning_rate": 4.312822661275929e-06, "loss": 0.12050247192382812, "step": 4138 }, { "epoch": 0.5767435379363199, "grad_norm": 1.2237071990966797, "learning_rate": 4.310488318187247e-06, "loss": 0.14800643920898438, "step": 4139 }, { "epoch": 0.5768828816275343, "grad_norm": 1.1007851362228394, "learning_rate": 4.308154128292318e-06, "loss": 0.12049102783203125, "step": 4140 }, { "epoch": 0.5770222253187487, "grad_norm": 0.8371685743331909, "learning_rate": 4.305820092109748e-06, "loss": 0.12228775024414062, "step": 4141 }, { "epoch": 0.5771615690099631, "grad_norm": 1.701651692390442, "learning_rate": 4.303486210158106e-06, "loss": 0.184844970703125, "step": 4142 }, { "epoch": 0.5773009127011774, "grad_norm": 1.705075740814209, "learning_rate": 4.301152482955926e-06, "loss": 0.2071075439453125, "step": 4143 }, { "epoch": 0.5774402563923918, "grad_norm": 0.8053405284881592, "learning_rate": 4.298818911021707e-06, "loss": 0.1320648193359375, "step": 4144 }, { "epoch": 0.5775796000836062, "grad_norm": 0.9134140610694885, "learning_rate": 4.296485494873919e-06, "loss": 0.12474441528320312, "step": 4145 }, { "epoch": 0.5777189437748206, "grad_norm": 0.9987466931343079, "learning_rate": 4.294152235030993e-06, "loss": 0.12166595458984375, "step": 4146 }, { "epoch": 0.577858287466035, "grad_norm": 1.105791449546814, "learning_rate": 4.291819132011327e-06, "loss": 0.13636016845703125, "step": 4147 }, { "epoch": 0.5779976311572493, "grad_norm": 1.2008668184280396, "learning_rate": 4.2894861863332785e-06, "loss": 0.15487289428710938, "step": 4148 }, { "epoch": 0.5781369748484637, "grad_norm": 1.3316456079483032, "learning_rate": 4.28715339851518e-06, "loss": 0.16872787475585938, "step": 4149 }, { "epoch": 0.5782763185396781, "grad_norm": 1.0081560611724854, "learning_rate": 4.284820769075322e-06, "loss": 0.16968917846679688, "step": 4150 }, { "epoch": 0.5784156622308925, "grad_norm": 0.8439881801605225, "learning_rate": 4.282488298531959e-06, "loss": 0.10144996643066406, "step": 4151 }, { "epoch": 0.5785550059221068, "grad_norm": 0.9665664434432983, "learning_rate": 4.28015598740332e-06, "loss": 0.13529586791992188, "step": 4152 }, { "epoch": 0.5786943496133212, "grad_norm": 0.6703671216964722, "learning_rate": 4.277823836207581e-06, "loss": 0.13092803955078125, "step": 4153 }, { "epoch": 0.5788336933045356, "grad_norm": 0.8144615888595581, "learning_rate": 4.275491845462901e-06, "loss": 0.1403789520263672, "step": 4154 }, { "epoch": 0.57897303699575, "grad_norm": 1.5267523527145386, "learning_rate": 4.27316001568739e-06, "loss": 0.1805419921875, "step": 4155 }, { "epoch": 0.5791123806869644, "grad_norm": 1.6767903566360474, "learning_rate": 4.270828347399131e-06, "loss": 0.1671600341796875, "step": 4156 }, { "epoch": 0.5792517243781787, "grad_norm": 0.7936223745346069, "learning_rate": 4.268496841116166e-06, "loss": 0.16109657287597656, "step": 4157 }, { "epoch": 0.5793910680693931, "grad_norm": 0.9347303509712219, "learning_rate": 4.266165497356503e-06, "loss": 0.12133407592773438, "step": 4158 }, { "epoch": 0.5795304117606075, "grad_norm": 0.9324110746383667, "learning_rate": 4.2638343166381115e-06, "loss": 0.13190460205078125, "step": 4159 }, { "epoch": 0.5796697554518219, "grad_norm": 0.5780167579650879, "learning_rate": 4.261503299478928e-06, "loss": 0.11867618560791016, "step": 4160 }, { "epoch": 0.5798090991430362, "grad_norm": 0.598127543926239, "learning_rate": 4.259172446396851e-06, "loss": 0.1158905029296875, "step": 4161 }, { "epoch": 0.5799484428342507, "grad_norm": 0.9147084951400757, "learning_rate": 4.256841757909744e-06, "loss": 0.13914108276367188, "step": 4162 }, { "epoch": 0.5800877865254651, "grad_norm": 1.191933035850525, "learning_rate": 4.254511234535432e-06, "loss": 0.1387786865234375, "step": 4163 }, { "epoch": 0.5802271302166795, "grad_norm": 0.7395572662353516, "learning_rate": 4.2521808767917024e-06, "loss": 0.11785125732421875, "step": 4164 }, { "epoch": 0.5803664739078939, "grad_norm": 0.7477087378501892, "learning_rate": 4.2498506851963095e-06, "loss": 0.11897659301757812, "step": 4165 }, { "epoch": 0.5805058175991082, "grad_norm": 0.9614532589912415, "learning_rate": 4.247520660266969e-06, "loss": 0.13590621948242188, "step": 4166 }, { "epoch": 0.5806451612903226, "grad_norm": 1.1784168481826782, "learning_rate": 4.245190802521356e-06, "loss": 0.15229415893554688, "step": 4167 }, { "epoch": 0.580784504981537, "grad_norm": 1.2201582193374634, "learning_rate": 4.2428611124771184e-06, "loss": 0.13369178771972656, "step": 4168 }, { "epoch": 0.5809238486727514, "grad_norm": 0.768278956413269, "learning_rate": 4.240531590651853e-06, "loss": 0.13115310668945312, "step": 4169 }, { "epoch": 0.5810631923639658, "grad_norm": 1.0284947156906128, "learning_rate": 4.238202237563129e-06, "loss": 0.1436767578125, "step": 4170 }, { "epoch": 0.5812025360551801, "grad_norm": 1.0758086442947388, "learning_rate": 4.235873053728475e-06, "loss": 0.14204025268554688, "step": 4171 }, { "epoch": 0.5813418797463945, "grad_norm": 1.4767824411392212, "learning_rate": 4.233544039665385e-06, "loss": 0.17543792724609375, "step": 4172 }, { "epoch": 0.5814812234376089, "grad_norm": 1.0158673524856567, "learning_rate": 4.231215195891311e-06, "loss": 0.15683364868164062, "step": 4173 }, { "epoch": 0.5816205671288233, "grad_norm": 1.0090537071228027, "learning_rate": 4.228886522923668e-06, "loss": 0.15863418579101562, "step": 4174 }, { "epoch": 0.5817599108200376, "grad_norm": 1.5257338285446167, "learning_rate": 4.2265580212798355e-06, "loss": 0.13728713989257812, "step": 4175 }, { "epoch": 0.581899254511252, "grad_norm": 1.1098582744598389, "learning_rate": 4.224229691477151e-06, "loss": 0.16955947875976562, "step": 4176 }, { "epoch": 0.5820385982024664, "grad_norm": 0.7220280766487122, "learning_rate": 4.221901534032918e-06, "loss": 0.12511444091796875, "step": 4177 }, { "epoch": 0.5821779418936808, "grad_norm": 1.02704918384552, "learning_rate": 4.219573549464403e-06, "loss": 0.13901901245117188, "step": 4178 }, { "epoch": 0.5823172855848952, "grad_norm": 0.6395032405853271, "learning_rate": 4.217245738288825e-06, "loss": 0.12244415283203125, "step": 4179 }, { "epoch": 0.5824566292761095, "grad_norm": 0.9848383069038391, "learning_rate": 4.2149181010233734e-06, "loss": 0.15224838256835938, "step": 4180 }, { "epoch": 0.5825959729673239, "grad_norm": 0.67945396900177, "learning_rate": 4.212590638185196e-06, "loss": 0.11939620971679688, "step": 4181 }, { "epoch": 0.5827353166585383, "grad_norm": 0.904911458492279, "learning_rate": 4.2102633502914035e-06, "loss": 0.118499755859375, "step": 4182 }, { "epoch": 0.5828746603497527, "grad_norm": 0.8329911828041077, "learning_rate": 4.2079362378590625e-06, "loss": 0.14118194580078125, "step": 4183 }, { "epoch": 0.583014004040967, "grad_norm": 1.5155770778656006, "learning_rate": 4.2056093014052085e-06, "loss": 0.1276092529296875, "step": 4184 }, { "epoch": 0.5831533477321814, "grad_norm": 0.6479266881942749, "learning_rate": 4.20328254144683e-06, "loss": 0.114166259765625, "step": 4185 }, { "epoch": 0.5832926914233958, "grad_norm": 1.030987024307251, "learning_rate": 4.2009559585008826e-06, "loss": 0.14986801147460938, "step": 4186 }, { "epoch": 0.5834320351146102, "grad_norm": 0.6837633848190308, "learning_rate": 4.198629553084277e-06, "loss": 0.1300506591796875, "step": 4187 }, { "epoch": 0.5835713788058245, "grad_norm": 1.3828905820846558, "learning_rate": 4.1963033257138904e-06, "loss": 0.14439010620117188, "step": 4188 }, { "epoch": 0.5837107224970389, "grad_norm": 1.0313167572021484, "learning_rate": 4.193977276906557e-06, "loss": 0.14699172973632812, "step": 4189 }, { "epoch": 0.5838500661882533, "grad_norm": 0.8069913983345032, "learning_rate": 4.191651407179069e-06, "loss": 0.1143646240234375, "step": 4190 }, { "epoch": 0.5839894098794677, "grad_norm": 1.6112079620361328, "learning_rate": 4.189325717048185e-06, "loss": 0.18578338623046875, "step": 4191 }, { "epoch": 0.5841287535706821, "grad_norm": 1.37142813205719, "learning_rate": 4.187000207030616e-06, "loss": 0.16316604614257812, "step": 4192 }, { "epoch": 0.5842680972618964, "grad_norm": 1.2046691179275513, "learning_rate": 4.184674877643042e-06, "loss": 0.16938400268554688, "step": 4193 }, { "epoch": 0.5844074409531108, "grad_norm": 1.458557367324829, "learning_rate": 4.182349729402097e-06, "loss": 0.17562484741210938, "step": 4194 }, { "epoch": 0.5845467846443252, "grad_norm": 1.4634445905685425, "learning_rate": 4.180024762824374e-06, "loss": 0.19609642028808594, "step": 4195 }, { "epoch": 0.5846861283355396, "grad_norm": 1.1034735441207886, "learning_rate": 4.177699978426426e-06, "loss": 0.12041091918945312, "step": 4196 }, { "epoch": 0.584825472026754, "grad_norm": 1.514438271522522, "learning_rate": 4.175375376724772e-06, "loss": 0.1354217529296875, "step": 4197 }, { "epoch": 0.5849648157179683, "grad_norm": 0.6193238496780396, "learning_rate": 4.173050958235882e-06, "loss": 0.1163482666015625, "step": 4198 }, { "epoch": 0.5851041594091827, "grad_norm": 0.8388391137123108, "learning_rate": 4.170726723476189e-06, "loss": 0.1390533447265625, "step": 4199 }, { "epoch": 0.5852435031003971, "grad_norm": 0.6306102871894836, "learning_rate": 4.168402672962086e-06, "loss": 0.11565399169921875, "step": 4200 }, { "epoch": 0.5853828467916115, "grad_norm": 0.6953423023223877, "learning_rate": 4.166078807209924e-06, "loss": 0.11688995361328125, "step": 4201 }, { "epoch": 0.5855221904828259, "grad_norm": 0.9613156914710999, "learning_rate": 4.163755126736011e-06, "loss": 0.15675735473632812, "step": 4202 }, { "epoch": 0.5856615341740403, "grad_norm": 0.6681920289993286, "learning_rate": 4.1614316320566174e-06, "loss": 0.12511062622070312, "step": 4203 }, { "epoch": 0.5858008778652547, "grad_norm": 1.5275355577468872, "learning_rate": 4.159108323687971e-06, "loss": 0.15501785278320312, "step": 4204 }, { "epoch": 0.5859402215564691, "grad_norm": 1.080194115638733, "learning_rate": 4.156785202146257e-06, "loss": 0.1545867919921875, "step": 4205 }, { "epoch": 0.5860795652476835, "grad_norm": 0.7571771740913391, "learning_rate": 4.154462267947621e-06, "loss": 0.1298503875732422, "step": 4206 }, { "epoch": 0.5862189089388978, "grad_norm": 0.8379063606262207, "learning_rate": 4.152139521608164e-06, "loss": 0.13578033447265625, "step": 4207 }, { "epoch": 0.5863582526301122, "grad_norm": 0.8996016383171082, "learning_rate": 4.149816963643947e-06, "loss": 0.13303375244140625, "step": 4208 }, { "epoch": 0.5864975963213266, "grad_norm": 0.7330151200294495, "learning_rate": 4.147494594570992e-06, "loss": 0.13504791259765625, "step": 4209 }, { "epoch": 0.586636940012541, "grad_norm": 1.1122944355010986, "learning_rate": 4.1451724149052764e-06, "loss": 0.1749114990234375, "step": 4210 }, { "epoch": 0.5867762837037553, "grad_norm": 0.6791863441467285, "learning_rate": 4.1428504251627335e-06, "loss": 0.13149261474609375, "step": 4211 }, { "epoch": 0.5869156273949697, "grad_norm": 0.9872690439224243, "learning_rate": 4.140528625859254e-06, "loss": 0.12934112548828125, "step": 4212 }, { "epoch": 0.5870549710861841, "grad_norm": 0.997520387172699, "learning_rate": 4.138207017510696e-06, "loss": 0.130279541015625, "step": 4213 }, { "epoch": 0.5871943147773985, "grad_norm": 0.9691494703292847, "learning_rate": 4.1358856006328614e-06, "loss": 0.15835189819335938, "step": 4214 }, { "epoch": 0.5873336584686129, "grad_norm": 1.0018951892852783, "learning_rate": 4.1335643757415195e-06, "loss": 0.15737152099609375, "step": 4215 }, { "epoch": 0.5874730021598272, "grad_norm": 0.6207454204559326, "learning_rate": 4.131243343352391e-06, "loss": 0.101806640625, "step": 4216 }, { "epoch": 0.5876123458510416, "grad_norm": 0.7651593685150146, "learning_rate": 4.128922503981158e-06, "loss": 0.133087158203125, "step": 4217 }, { "epoch": 0.587751689542256, "grad_norm": 1.1514803171157837, "learning_rate": 4.126601858143457e-06, "loss": 0.1543254852294922, "step": 4218 }, { "epoch": 0.5878910332334704, "grad_norm": 1.0126733779907227, "learning_rate": 4.124281406354883e-06, "loss": 0.12453842163085938, "step": 4219 }, { "epoch": 0.5880303769246847, "grad_norm": 1.2329574823379517, "learning_rate": 4.121961149130989e-06, "loss": 0.1747722625732422, "step": 4220 }, { "epoch": 0.5881697206158991, "grad_norm": 0.8449411988258362, "learning_rate": 4.119641086987282e-06, "loss": 0.1547393798828125, "step": 4221 }, { "epoch": 0.5883090643071135, "grad_norm": 1.340112328529358, "learning_rate": 4.1173212204392245e-06, "loss": 0.13037109375, "step": 4222 }, { "epoch": 0.5884484079983279, "grad_norm": 0.5902171730995178, "learning_rate": 4.115001550002241e-06, "loss": 0.10587692260742188, "step": 4223 }, { "epoch": 0.5885877516895422, "grad_norm": 0.737190306186676, "learning_rate": 4.1126820761917075e-06, "loss": 0.12705612182617188, "step": 4224 }, { "epoch": 0.5887270953807566, "grad_norm": 0.9169921278953552, "learning_rate": 4.11036279952296e-06, "loss": 0.15333938598632812, "step": 4225 }, { "epoch": 0.588866439071971, "grad_norm": 0.8418458104133606, "learning_rate": 4.108043720511287e-06, "loss": 0.13220977783203125, "step": 4226 }, { "epoch": 0.5890057827631854, "grad_norm": 0.7002966403961182, "learning_rate": 4.105724839671936e-06, "loss": 0.11960029602050781, "step": 4227 }, { "epoch": 0.5891451264543998, "grad_norm": 1.0054372549057007, "learning_rate": 4.103406157520108e-06, "loss": 0.1147613525390625, "step": 4228 }, { "epoch": 0.5892844701456141, "grad_norm": 0.8328045606613159, "learning_rate": 4.101087674570963e-06, "loss": 0.1375713348388672, "step": 4229 }, { "epoch": 0.5894238138368285, "grad_norm": 1.855636715888977, "learning_rate": 4.0987693913396145e-06, "loss": 0.230743408203125, "step": 4230 }, { "epoch": 0.5895631575280429, "grad_norm": 1.3231686353683472, "learning_rate": 4.096451308341132e-06, "loss": 0.13970565795898438, "step": 4231 }, { "epoch": 0.5897025012192573, "grad_norm": 0.7872354388237, "learning_rate": 4.094133426090539e-06, "loss": 0.12962722778320312, "step": 4232 }, { "epoch": 0.5898418449104716, "grad_norm": 0.7903165221214294, "learning_rate": 4.091815745102818e-06, "loss": 0.14436721801757812, "step": 4233 }, { "epoch": 0.589981188601686, "grad_norm": 1.0696690082550049, "learning_rate": 4.089498265892905e-06, "loss": 0.14687347412109375, "step": 4234 }, { "epoch": 0.5901205322929004, "grad_norm": 0.7120679020881653, "learning_rate": 4.0871809889756884e-06, "loss": 0.10828018188476562, "step": 4235 }, { "epoch": 0.5902598759841148, "grad_norm": 1.1370153427124023, "learning_rate": 4.084863914866018e-06, "loss": 0.17176055908203125, "step": 4236 }, { "epoch": 0.5903992196753292, "grad_norm": 1.096245288848877, "learning_rate": 4.082547044078693e-06, "loss": 0.1468505859375, "step": 4237 }, { "epoch": 0.5905385633665435, "grad_norm": 0.7577084898948669, "learning_rate": 4.0802303771284685e-06, "loss": 0.13472366333007812, "step": 4238 }, { "epoch": 0.5906779070577579, "grad_norm": 0.9347554445266724, "learning_rate": 4.0779139145300536e-06, "loss": 0.13863754272460938, "step": 4239 }, { "epoch": 0.5908172507489723, "grad_norm": 1.1202425956726074, "learning_rate": 4.075597656798117e-06, "loss": 0.1519489288330078, "step": 4240 }, { "epoch": 0.5909565944401867, "grad_norm": 0.6261022090911865, "learning_rate": 4.073281604447277e-06, "loss": 0.11025238037109375, "step": 4241 }, { "epoch": 0.5910959381314012, "grad_norm": 1.045609951019287, "learning_rate": 4.0709657579921075e-06, "loss": 0.15333938598632812, "step": 4242 }, { "epoch": 0.5912352818226155, "grad_norm": 1.3013837337493896, "learning_rate": 4.068650117947135e-06, "loss": 0.154937744140625, "step": 4243 }, { "epoch": 0.5913746255138299, "grad_norm": 1.1376254558563232, "learning_rate": 4.0663346848268435e-06, "loss": 0.1652069091796875, "step": 4244 }, { "epoch": 0.5915139692050443, "grad_norm": 1.6356959342956543, "learning_rate": 4.064019459145669e-06, "loss": 0.17356109619140625, "step": 4245 }, { "epoch": 0.5916533128962587, "grad_norm": 1.0655404329299927, "learning_rate": 4.061704441418002e-06, "loss": 0.15761756896972656, "step": 4246 }, { "epoch": 0.591792656587473, "grad_norm": 0.98643559217453, "learning_rate": 4.059389632158189e-06, "loss": 0.1442699432373047, "step": 4247 }, { "epoch": 0.5919320002786874, "grad_norm": 1.1094367504119873, "learning_rate": 4.057075031880521e-06, "loss": 0.1400299072265625, "step": 4248 }, { "epoch": 0.5920713439699018, "grad_norm": 0.7824642658233643, "learning_rate": 4.054760641099256e-06, "loss": 0.12962722778320312, "step": 4249 }, { "epoch": 0.5922106876611162, "grad_norm": 0.7888097167015076, "learning_rate": 4.052446460328595e-06, "loss": 0.13884735107421875, "step": 4250 }, { "epoch": 0.5923500313523306, "grad_norm": 0.9418435096740723, "learning_rate": 4.050132490082698e-06, "loss": 0.15467453002929688, "step": 4251 }, { "epoch": 0.5924893750435449, "grad_norm": 0.6822934150695801, "learning_rate": 4.0478187308756775e-06, "loss": 0.11388015747070312, "step": 4252 }, { "epoch": 0.5926287187347593, "grad_norm": 1.699842929840088, "learning_rate": 4.045505183221594e-06, "loss": 0.16628646850585938, "step": 4253 }, { "epoch": 0.5927680624259737, "grad_norm": 0.633351743221283, "learning_rate": 4.043191847634469e-06, "loss": 0.11379432678222656, "step": 4254 }, { "epoch": 0.5929074061171881, "grad_norm": 1.3258469104766846, "learning_rate": 4.040878724628269e-06, "loss": 0.16691970825195312, "step": 4255 }, { "epoch": 0.5930467498084024, "grad_norm": 0.7145062685012817, "learning_rate": 4.038565814716921e-06, "loss": 0.12417221069335938, "step": 4256 }, { "epoch": 0.5931860934996168, "grad_norm": 1.8593188524246216, "learning_rate": 4.036253118414299e-06, "loss": 0.19661331176757812, "step": 4257 }, { "epoch": 0.5933254371908312, "grad_norm": 1.1704761981964111, "learning_rate": 4.033940636234233e-06, "loss": 0.12981605529785156, "step": 4258 }, { "epoch": 0.5934647808820456, "grad_norm": 0.9137312173843384, "learning_rate": 4.0316283686905e-06, "loss": 0.13451385498046875, "step": 4259 }, { "epoch": 0.59360412457326, "grad_norm": 1.339461326599121, "learning_rate": 4.029316316296834e-06, "loss": 0.15930557250976562, "step": 4260 }, { "epoch": 0.5937434682644743, "grad_norm": 0.8706071376800537, "learning_rate": 4.027004479566923e-06, "loss": 0.12712478637695312, "step": 4261 }, { "epoch": 0.5938828119556887, "grad_norm": 0.688646674156189, "learning_rate": 4.024692859014403e-06, "loss": 0.14380836486816406, "step": 4262 }, { "epoch": 0.5940221556469031, "grad_norm": 0.5590473413467407, "learning_rate": 4.022381455152863e-06, "loss": 0.11657333374023438, "step": 4263 }, { "epoch": 0.5941614993381175, "grad_norm": 1.269221544265747, "learning_rate": 4.020070268495844e-06, "loss": 0.1774139404296875, "step": 4264 }, { "epoch": 0.5943008430293318, "grad_norm": 1.0773329734802246, "learning_rate": 4.017759299556838e-06, "loss": 0.15113449096679688, "step": 4265 }, { "epoch": 0.5944401867205462, "grad_norm": 1.0574898719787598, "learning_rate": 4.015448548849293e-06, "loss": 0.14162635803222656, "step": 4266 }, { "epoch": 0.5945795304117606, "grad_norm": 0.8509233593940735, "learning_rate": 4.0131380168866e-06, "loss": 0.14156723022460938, "step": 4267 }, { "epoch": 0.594718874102975, "grad_norm": 0.9594283699989319, "learning_rate": 4.010827704182113e-06, "loss": 0.12031936645507812, "step": 4268 }, { "epoch": 0.5948582177941893, "grad_norm": 0.6743450164794922, "learning_rate": 4.0085176112491245e-06, "loss": 0.13947677612304688, "step": 4269 }, { "epoch": 0.5949975614854037, "grad_norm": 0.8773754835128784, "learning_rate": 4.006207738600887e-06, "loss": 0.12764739990234375, "step": 4270 }, { "epoch": 0.5951369051766181, "grad_norm": 0.8453159928321838, "learning_rate": 4.0038980867506e-06, "loss": 0.11922454833984375, "step": 4271 }, { "epoch": 0.5952762488678325, "grad_norm": 0.8429125547409058, "learning_rate": 4.001588656211418e-06, "loss": 0.14759445190429688, "step": 4272 }, { "epoch": 0.5954155925590469, "grad_norm": 0.7534430027008057, "learning_rate": 3.999279447496444e-06, "loss": 0.13514328002929688, "step": 4273 }, { "epoch": 0.5955549362502612, "grad_norm": 0.5877218842506409, "learning_rate": 3.996970461118729e-06, "loss": 0.12906646728515625, "step": 4274 }, { "epoch": 0.5956942799414756, "grad_norm": 0.7751796245574951, "learning_rate": 3.994661697591278e-06, "loss": 0.14881134033203125, "step": 4275 }, { "epoch": 0.59583362363269, "grad_norm": 0.8120980262756348, "learning_rate": 3.992353157427044e-06, "loss": 0.15302276611328125, "step": 4276 }, { "epoch": 0.5959729673239044, "grad_norm": 0.953586220741272, "learning_rate": 3.990044841138934e-06, "loss": 0.1503143310546875, "step": 4277 }, { "epoch": 0.5961123110151187, "grad_norm": 1.6378556489944458, "learning_rate": 3.987736749239804e-06, "loss": 0.1757946014404297, "step": 4278 }, { "epoch": 0.5962516547063331, "grad_norm": 0.9481470584869385, "learning_rate": 3.985428882242458e-06, "loss": 0.16292190551757812, "step": 4279 }, { "epoch": 0.5963909983975475, "grad_norm": 1.106764554977417, "learning_rate": 3.983121240659649e-06, "loss": 0.17011260986328125, "step": 4280 }, { "epoch": 0.5965303420887619, "grad_norm": 0.7004576325416565, "learning_rate": 3.980813825004086e-06, "loss": 0.14243698120117188, "step": 4281 }, { "epoch": 0.5966696857799764, "grad_norm": 1.0646264553070068, "learning_rate": 3.978506635788423e-06, "loss": 0.15501785278320312, "step": 4282 }, { "epoch": 0.5968090294711907, "grad_norm": 1.080505132675171, "learning_rate": 3.976199673525263e-06, "loss": 0.17081451416015625, "step": 4283 }, { "epoch": 0.5969483731624051, "grad_norm": 1.2703182697296143, "learning_rate": 3.973892938727164e-06, "loss": 0.13591766357421875, "step": 4284 }, { "epoch": 0.5970877168536195, "grad_norm": 0.961674153804779, "learning_rate": 3.971586431906627e-06, "loss": 0.13677215576171875, "step": 4285 }, { "epoch": 0.5972270605448339, "grad_norm": 1.0875989198684692, "learning_rate": 3.969280153576105e-06, "loss": 0.1372203826904297, "step": 4286 }, { "epoch": 0.5973664042360483, "grad_norm": 1.8890036344528198, "learning_rate": 3.966974104248001e-06, "loss": 0.164337158203125, "step": 4287 }, { "epoch": 0.5975057479272626, "grad_norm": 1.4471052885055542, "learning_rate": 3.964668284434666e-06, "loss": 0.1496105194091797, "step": 4288 }, { "epoch": 0.597645091618477, "grad_norm": 0.7531256079673767, "learning_rate": 3.962362694648404e-06, "loss": 0.12439727783203125, "step": 4289 }, { "epoch": 0.5977844353096914, "grad_norm": 0.8150600790977478, "learning_rate": 3.960057335401459e-06, "loss": 0.12288665771484375, "step": 4290 }, { "epoch": 0.5979237790009058, "grad_norm": 1.1042957305908203, "learning_rate": 3.9577522072060336e-06, "loss": 0.13885498046875, "step": 4291 }, { "epoch": 0.5980631226921201, "grad_norm": 1.3860975503921509, "learning_rate": 3.95544731057427e-06, "loss": 0.1722259521484375, "step": 4292 }, { "epoch": 0.5982024663833345, "grad_norm": 0.9113876819610596, "learning_rate": 3.953142646018269e-06, "loss": 0.11820220947265625, "step": 4293 }, { "epoch": 0.5983418100745489, "grad_norm": 0.9056123495101929, "learning_rate": 3.95083821405007e-06, "loss": 0.13654708862304688, "step": 4294 }, { "epoch": 0.5984811537657633, "grad_norm": 0.849687397480011, "learning_rate": 3.948534015181671e-06, "loss": 0.14603424072265625, "step": 4295 }, { "epoch": 0.5986204974569777, "grad_norm": 0.9042789340019226, "learning_rate": 3.946230049925004e-06, "loss": 0.13887786865234375, "step": 4296 }, { "epoch": 0.598759841148192, "grad_norm": 0.8084331750869751, "learning_rate": 3.9439263187919635e-06, "loss": 0.14299774169921875, "step": 4297 }, { "epoch": 0.5988991848394064, "grad_norm": 0.9376019835472107, "learning_rate": 3.941622822294385e-06, "loss": 0.14184951782226562, "step": 4298 }, { "epoch": 0.5990385285306208, "grad_norm": 1.0393174886703491, "learning_rate": 3.939319560944051e-06, "loss": 0.14032363891601562, "step": 4299 }, { "epoch": 0.5991778722218352, "grad_norm": 0.41697314381599426, "learning_rate": 3.937016535252696e-06, "loss": 0.09281539916992188, "step": 4300 }, { "epoch": 0.5993172159130495, "grad_norm": 1.0145339965820312, "learning_rate": 3.934713745731998e-06, "loss": 0.133331298828125, "step": 4301 }, { "epoch": 0.5994565596042639, "grad_norm": 1.0816147327423096, "learning_rate": 3.932411192893586e-06, "loss": 0.14011383056640625, "step": 4302 }, { "epoch": 0.5995959032954783, "grad_norm": 0.5141347646713257, "learning_rate": 3.93010887724903e-06, "loss": 0.1077423095703125, "step": 4303 }, { "epoch": 0.5997352469866927, "grad_norm": 0.8740277290344238, "learning_rate": 3.927806799309859e-06, "loss": 0.1343994140625, "step": 4304 }, { "epoch": 0.599874590677907, "grad_norm": 0.7679859399795532, "learning_rate": 3.925504959587538e-06, "loss": 0.12497329711914062, "step": 4305 }, { "epoch": 0.6000139343691214, "grad_norm": 1.2161357402801514, "learning_rate": 3.9232033585934835e-06, "loss": 0.14423370361328125, "step": 4306 }, { "epoch": 0.6001532780603358, "grad_norm": 1.0415946245193481, "learning_rate": 3.920901996839059e-06, "loss": 0.1306743621826172, "step": 4307 }, { "epoch": 0.6002926217515502, "grad_norm": 0.9370400309562683, "learning_rate": 3.918600874835573e-06, "loss": 0.15240478515625, "step": 4308 }, { "epoch": 0.6004319654427646, "grad_norm": 0.818911075592041, "learning_rate": 3.916299993094285e-06, "loss": 0.148193359375, "step": 4309 }, { "epoch": 0.6005713091339789, "grad_norm": 0.6636017560958862, "learning_rate": 3.913999352126399e-06, "loss": 0.12154006958007812, "step": 4310 }, { "epoch": 0.6007106528251933, "grad_norm": 0.9155848026275635, "learning_rate": 3.9116989524430615e-06, "loss": 0.16238784790039062, "step": 4311 }, { "epoch": 0.6008499965164077, "grad_norm": 0.836836576461792, "learning_rate": 3.90939879455537e-06, "loss": 0.14165115356445312, "step": 4312 }, { "epoch": 0.6009893402076221, "grad_norm": 0.766355037689209, "learning_rate": 3.907098878974367e-06, "loss": 0.13569259643554688, "step": 4313 }, { "epoch": 0.6011286838988364, "grad_norm": 0.9395158290863037, "learning_rate": 3.9047992062110435e-06, "loss": 0.158172607421875, "step": 4314 }, { "epoch": 0.6012680275900508, "grad_norm": 0.8683967590332031, "learning_rate": 3.902499776776331e-06, "loss": 0.13346099853515625, "step": 4315 }, { "epoch": 0.6014073712812652, "grad_norm": 1.4065546989440918, "learning_rate": 3.900200591181114e-06, "loss": 0.14842987060546875, "step": 4316 }, { "epoch": 0.6015467149724796, "grad_norm": 0.5691830515861511, "learning_rate": 3.897901649936215e-06, "loss": 0.1097412109375, "step": 4317 }, { "epoch": 0.601686058663694, "grad_norm": 0.7753686904907227, "learning_rate": 3.895602953552408e-06, "loss": 0.11057281494140625, "step": 4318 }, { "epoch": 0.6018254023549083, "grad_norm": 1.1911755800247192, "learning_rate": 3.8933045025404105e-06, "loss": 0.15063095092773438, "step": 4319 }, { "epoch": 0.6019647460461227, "grad_norm": 0.7409306168556213, "learning_rate": 3.891006297410887e-06, "loss": 0.11888504028320312, "step": 4320 }, { "epoch": 0.6021040897373371, "grad_norm": 0.9339765310287476, "learning_rate": 3.888708338674447e-06, "loss": 0.1263885498046875, "step": 4321 }, { "epoch": 0.6022434334285516, "grad_norm": 0.8349093794822693, "learning_rate": 3.8864106268416416e-06, "loss": 0.13895797729492188, "step": 4322 }, { "epoch": 0.602382777119766, "grad_norm": 0.6127476096153259, "learning_rate": 3.884113162422971e-06, "loss": 0.11458969116210938, "step": 4323 }, { "epoch": 0.6025221208109803, "grad_norm": 1.0588371753692627, "learning_rate": 3.881815945928879e-06, "loss": 0.13887405395507812, "step": 4324 }, { "epoch": 0.6026614645021947, "grad_norm": 1.455953598022461, "learning_rate": 3.879518977869755e-06, "loss": 0.17549896240234375, "step": 4325 }, { "epoch": 0.6028008081934091, "grad_norm": 0.7932500243186951, "learning_rate": 3.8772222587559345e-06, "loss": 0.12959671020507812, "step": 4326 }, { "epoch": 0.6029401518846235, "grad_norm": 1.2522964477539062, "learning_rate": 3.874925789097695e-06, "loss": 0.1376800537109375, "step": 4327 }, { "epoch": 0.6030794955758378, "grad_norm": 0.6594012975692749, "learning_rate": 3.872629569405257e-06, "loss": 0.12711715698242188, "step": 4328 }, { "epoch": 0.6032188392670522, "grad_norm": 0.9145895838737488, "learning_rate": 3.870333600188792e-06, "loss": 0.1428680419921875, "step": 4329 }, { "epoch": 0.6033581829582666, "grad_norm": 1.2480865716934204, "learning_rate": 3.86803788195841e-06, "loss": 0.14817047119140625, "step": 4330 }, { "epoch": 0.603497526649481, "grad_norm": 1.2862669229507446, "learning_rate": 3.865742415224169e-06, "loss": 0.165740966796875, "step": 4331 }, { "epoch": 0.6036368703406954, "grad_norm": 1.174070119857788, "learning_rate": 3.863447200496065e-06, "loss": 0.17219161987304688, "step": 4332 }, { "epoch": 0.6037762140319097, "grad_norm": 0.9593729376792908, "learning_rate": 3.8611522382840476e-06, "loss": 0.14835739135742188, "step": 4333 }, { "epoch": 0.6039155577231241, "grad_norm": 0.9280356764793396, "learning_rate": 3.858857529098001e-06, "loss": 0.15337371826171875, "step": 4334 }, { "epoch": 0.6040549014143385, "grad_norm": 0.966920018196106, "learning_rate": 3.8565630734477575e-06, "loss": 0.147674560546875, "step": 4335 }, { "epoch": 0.6041942451055529, "grad_norm": 1.055437445640564, "learning_rate": 3.854268871843096e-06, "loss": 0.15958786010742188, "step": 4336 }, { "epoch": 0.6043335887967672, "grad_norm": 0.8002368211746216, "learning_rate": 3.851974924793734e-06, "loss": 0.13190841674804688, "step": 4337 }, { "epoch": 0.6044729324879816, "grad_norm": 1.9529081583023071, "learning_rate": 3.8496812328093335e-06, "loss": 0.15593719482421875, "step": 4338 }, { "epoch": 0.604612276179196, "grad_norm": 1.0924067497253418, "learning_rate": 3.8473877963995e-06, "loss": 0.1442108154296875, "step": 4339 }, { "epoch": 0.6047516198704104, "grad_norm": 0.8488298654556274, "learning_rate": 3.845094616073783e-06, "loss": 0.12597274780273438, "step": 4340 }, { "epoch": 0.6048909635616248, "grad_norm": 0.4739251434803009, "learning_rate": 3.8428016923416775e-06, "loss": 0.09540748596191406, "step": 4341 }, { "epoch": 0.6050303072528391, "grad_norm": 0.6082900166511536, "learning_rate": 3.840509025712616e-06, "loss": 0.12023162841796875, "step": 4342 }, { "epoch": 0.6051696509440535, "grad_norm": 1.4232312440872192, "learning_rate": 3.838216616695977e-06, "loss": 0.16391372680664062, "step": 4343 }, { "epoch": 0.6053089946352679, "grad_norm": 0.6183677911758423, "learning_rate": 3.835924465801081e-06, "loss": 0.11939620971679688, "step": 4344 }, { "epoch": 0.6054483383264823, "grad_norm": 1.4580395221710205, "learning_rate": 3.833632573537193e-06, "loss": 0.16492462158203125, "step": 4345 }, { "epoch": 0.6055876820176966, "grad_norm": 0.6900798678398132, "learning_rate": 3.831340940413519e-06, "loss": 0.13131332397460938, "step": 4346 }, { "epoch": 0.605727025708911, "grad_norm": 0.8776410222053528, "learning_rate": 3.8290495669392085e-06, "loss": 0.12572288513183594, "step": 4347 }, { "epoch": 0.6058663694001254, "grad_norm": 1.1538463830947876, "learning_rate": 3.826758453623348e-06, "loss": 0.18378448486328125, "step": 4348 }, { "epoch": 0.6060057130913398, "grad_norm": 1.2057794332504272, "learning_rate": 3.8244676009749745e-06, "loss": 0.16713333129882812, "step": 4349 }, { "epoch": 0.6061450567825541, "grad_norm": 0.6724324226379395, "learning_rate": 3.8221770095030625e-06, "loss": 0.1224212646484375, "step": 4350 }, { "epoch": 0.6062844004737685, "grad_norm": 0.7613294124603271, "learning_rate": 3.819886679716528e-06, "loss": 0.14971923828125, "step": 4351 }, { "epoch": 0.6064237441649829, "grad_norm": 0.5770276188850403, "learning_rate": 3.8175966121242314e-06, "loss": 0.11504745483398438, "step": 4352 }, { "epoch": 0.6065630878561973, "grad_norm": 1.5943820476531982, "learning_rate": 3.815306807234974e-06, "loss": 0.18596267700195312, "step": 4353 }, { "epoch": 0.6067024315474117, "grad_norm": 1.5236979722976685, "learning_rate": 3.8130172655574963e-06, "loss": 0.142669677734375, "step": 4354 }, { "epoch": 0.606841775238626, "grad_norm": 1.0571601390838623, "learning_rate": 3.810727987600482e-06, "loss": 0.13667678833007812, "step": 4355 }, { "epoch": 0.6069811189298404, "grad_norm": 0.7937259674072266, "learning_rate": 3.808438973872558e-06, "loss": 0.12718582153320312, "step": 4356 }, { "epoch": 0.6071204626210548, "grad_norm": 0.8384339213371277, "learning_rate": 3.80615022488229e-06, "loss": 0.1253662109375, "step": 4357 }, { "epoch": 0.6072598063122692, "grad_norm": 0.864533007144928, "learning_rate": 3.8038617411381876e-06, "loss": 0.14405441284179688, "step": 4358 }, { "epoch": 0.6073991500034835, "grad_norm": 1.277390718460083, "learning_rate": 3.8015735231486974e-06, "loss": 0.15524673461914062, "step": 4359 }, { "epoch": 0.6075384936946979, "grad_norm": 1.040945291519165, "learning_rate": 3.799285571422208e-06, "loss": 0.1271820068359375, "step": 4360 }, { "epoch": 0.6076778373859123, "grad_norm": 0.8667135834693909, "learning_rate": 3.7969978864670527e-06, "loss": 0.13346481323242188, "step": 4361 }, { "epoch": 0.6078171810771267, "grad_norm": 0.8088968992233276, "learning_rate": 3.794710468791502e-06, "loss": 0.13547897338867188, "step": 4362 }, { "epoch": 0.6079565247683412, "grad_norm": 0.6092634797096252, "learning_rate": 3.7924233189037697e-06, "loss": 0.11195755004882812, "step": 4363 }, { "epoch": 0.6080958684595555, "grad_norm": 0.9604632258415222, "learning_rate": 3.7901364373120036e-06, "loss": 0.14391326904296875, "step": 4364 }, { "epoch": 0.6082352121507699, "grad_norm": 1.3588851690292358, "learning_rate": 3.787849824524301e-06, "loss": 0.1428241729736328, "step": 4365 }, { "epoch": 0.6083745558419843, "grad_norm": 1.2602136135101318, "learning_rate": 3.7855634810486936e-06, "loss": 0.12152099609375, "step": 4366 }, { "epoch": 0.6085138995331987, "grad_norm": 1.2525430917739868, "learning_rate": 3.7832774073931535e-06, "loss": 0.15290069580078125, "step": 4367 }, { "epoch": 0.608653243224413, "grad_norm": 0.7384583950042725, "learning_rate": 3.780991604065598e-06, "loss": 0.131622314453125, "step": 4368 }, { "epoch": 0.6087925869156274, "grad_norm": 1.1458629369735718, "learning_rate": 3.778706071573875e-06, "loss": 0.14580535888671875, "step": 4369 }, { "epoch": 0.6089319306068418, "grad_norm": 1.6777276992797852, "learning_rate": 3.776420810425781e-06, "loss": 0.18318939208984375, "step": 4370 }, { "epoch": 0.6090712742980562, "grad_norm": 0.7351699471473694, "learning_rate": 3.774135821129047e-06, "loss": 0.13262176513671875, "step": 4371 }, { "epoch": 0.6092106179892706, "grad_norm": 0.509121298789978, "learning_rate": 3.771851104191348e-06, "loss": 0.11008644104003906, "step": 4372 }, { "epoch": 0.6093499616804849, "grad_norm": 0.876210629940033, "learning_rate": 3.7695666601202944e-06, "loss": 0.1438446044921875, "step": 4373 }, { "epoch": 0.6094893053716993, "grad_norm": 0.7970649003982544, "learning_rate": 3.7672824894234388e-06, "loss": 0.12012100219726562, "step": 4374 }, { "epoch": 0.6096286490629137, "grad_norm": 0.7326534986495972, "learning_rate": 3.7649985926082695e-06, "loss": 0.13513946533203125, "step": 4375 }, { "epoch": 0.6097679927541281, "grad_norm": 0.8493383526802063, "learning_rate": 3.762714970182216e-06, "loss": 0.15704727172851562, "step": 4376 }, { "epoch": 0.6099073364453425, "grad_norm": 0.9604399800300598, "learning_rate": 3.76043162265265e-06, "loss": 0.130035400390625, "step": 4377 }, { "epoch": 0.6100466801365568, "grad_norm": 0.9650564193725586, "learning_rate": 3.758148550526877e-06, "loss": 0.15404891967773438, "step": 4378 }, { "epoch": 0.6101860238277712, "grad_norm": 0.6061825156211853, "learning_rate": 3.7558657543121456e-06, "loss": 0.11675262451171875, "step": 4379 }, { "epoch": 0.6103253675189856, "grad_norm": 0.8565155863761902, "learning_rate": 3.7535832345156376e-06, "loss": 0.12702369689941406, "step": 4380 }, { "epoch": 0.6104647112102, "grad_norm": 1.9305996894836426, "learning_rate": 3.7513009916444797e-06, "loss": 0.20943069458007812, "step": 4381 }, { "epoch": 0.6106040549014143, "grad_norm": 1.2117584943771362, "learning_rate": 3.7490190262057322e-06, "loss": 0.1328887939453125, "step": 4382 }, { "epoch": 0.6107433985926287, "grad_norm": 0.6533942222595215, "learning_rate": 3.7467373387063973e-06, "loss": 0.11376571655273438, "step": 4383 }, { "epoch": 0.6108827422838431, "grad_norm": 1.1217964887619019, "learning_rate": 3.7444559296534144e-06, "loss": 0.15708541870117188, "step": 4384 }, { "epoch": 0.6110220859750575, "grad_norm": 1.33692467212677, "learning_rate": 3.7421747995536585e-06, "loss": 0.15997695922851562, "step": 4385 }, { "epoch": 0.6111614296662718, "grad_norm": 0.8705374598503113, "learning_rate": 3.739893948913945e-06, "loss": 0.14802169799804688, "step": 4386 }, { "epoch": 0.6113007733574862, "grad_norm": 1.0750161409378052, "learning_rate": 3.7376133782410275e-06, "loss": 0.16360855102539062, "step": 4387 }, { "epoch": 0.6114401170487006, "grad_norm": 0.8757566213607788, "learning_rate": 3.7353330880415963e-06, "loss": 0.1366252899169922, "step": 4388 }, { "epoch": 0.611579460739915, "grad_norm": 0.9386341571807861, "learning_rate": 3.7330530788222807e-06, "loss": 0.14347076416015625, "step": 4389 }, { "epoch": 0.6117188044311294, "grad_norm": 0.9185933470726013, "learning_rate": 3.730773351089647e-06, "loss": 0.10711288452148438, "step": 4390 }, { "epoch": 0.6118581481223437, "grad_norm": 0.864750862121582, "learning_rate": 3.7284939053501966e-06, "loss": 0.12572860717773438, "step": 4391 }, { "epoch": 0.6119974918135581, "grad_norm": 0.9080959558486938, "learning_rate": 3.7262147421103713e-06, "loss": 0.13830184936523438, "step": 4392 }, { "epoch": 0.6121368355047725, "grad_norm": 1.029009461402893, "learning_rate": 3.723935861876549e-06, "loss": 0.15884017944335938, "step": 4393 }, { "epoch": 0.6122761791959869, "grad_norm": 0.7590280771255493, "learning_rate": 3.7216572651550453e-06, "loss": 0.13348388671875, "step": 4394 }, { "epoch": 0.6124155228872012, "grad_norm": 0.5221071839332581, "learning_rate": 3.7193789524521146e-06, "loss": 0.11178398132324219, "step": 4395 }, { "epoch": 0.6125548665784156, "grad_norm": 0.586344838142395, "learning_rate": 3.717100924273941e-06, "loss": 0.11386871337890625, "step": 4396 }, { "epoch": 0.61269421026963, "grad_norm": 0.9861702919006348, "learning_rate": 3.714823181126653e-06, "loss": 0.14986801147460938, "step": 4397 }, { "epoch": 0.6128335539608444, "grad_norm": 0.7117627859115601, "learning_rate": 3.7125457235163144e-06, "loss": 0.12642860412597656, "step": 4398 }, { "epoch": 0.6129728976520588, "grad_norm": 2.063145160675049, "learning_rate": 3.710268551948921e-06, "loss": 0.16497421264648438, "step": 4399 }, { "epoch": 0.6131122413432731, "grad_norm": 1.4657052755355835, "learning_rate": 3.7079916669304127e-06, "loss": 0.13878631591796875, "step": 4400 }, { "epoch": 0.6132515850344875, "grad_norm": 0.6714819669723511, "learning_rate": 3.7057150689666577e-06, "loss": 0.12619781494140625, "step": 4401 }, { "epoch": 0.6133909287257019, "grad_norm": 0.5938574075698853, "learning_rate": 3.7034387585634656e-06, "loss": 0.1074981689453125, "step": 4402 }, { "epoch": 0.6135302724169164, "grad_norm": 0.6843019723892212, "learning_rate": 3.701162736226579e-06, "loss": 0.13350677490234375, "step": 4403 }, { "epoch": 0.6136696161081308, "grad_norm": 1.0017414093017578, "learning_rate": 3.6988870024616807e-06, "loss": 0.1337432861328125, "step": 4404 }, { "epoch": 0.6138089597993451, "grad_norm": 1.3407970666885376, "learning_rate": 3.6966115577743865e-06, "loss": 0.16374588012695312, "step": 4405 }, { "epoch": 0.6139483034905595, "grad_norm": 0.6523319482803345, "learning_rate": 3.6943364026702466e-06, "loss": 0.12370872497558594, "step": 4406 }, { "epoch": 0.6140876471817739, "grad_norm": 0.735145092010498, "learning_rate": 3.6920615376547487e-06, "loss": 0.12720870971679688, "step": 4407 }, { "epoch": 0.6142269908729883, "grad_norm": 2.4833531379699707, "learning_rate": 3.6897869632333157e-06, "loss": 0.2106475830078125, "step": 4408 }, { "epoch": 0.6143663345642026, "grad_norm": 0.6387578248977661, "learning_rate": 3.687512679911307e-06, "loss": 0.13099288940429688, "step": 4409 }, { "epoch": 0.614505678255417, "grad_norm": 0.8421850800514221, "learning_rate": 3.685238688194016e-06, "loss": 0.14472198486328125, "step": 4410 }, { "epoch": 0.6146450219466314, "grad_norm": 0.7321077585220337, "learning_rate": 3.682964988586675e-06, "loss": 0.12685394287109375, "step": 4411 }, { "epoch": 0.6147843656378458, "grad_norm": 0.9939286708831787, "learning_rate": 3.6806915815944422e-06, "loss": 0.16713333129882812, "step": 4412 }, { "epoch": 0.6149237093290602, "grad_norm": 0.5222665071487427, "learning_rate": 3.6784184677224204e-06, "loss": 0.10124969482421875, "step": 4413 }, { "epoch": 0.6150630530202745, "grad_norm": 0.6067512631416321, "learning_rate": 3.676145647475643e-06, "loss": 0.11732101440429688, "step": 4414 }, { "epoch": 0.6152023967114889, "grad_norm": 0.9868491888046265, "learning_rate": 3.673873121359077e-06, "loss": 0.14800262451171875, "step": 4415 }, { "epoch": 0.6153417404027033, "grad_norm": 0.7305862307548523, "learning_rate": 3.6716008898776306e-06, "loss": 0.14220809936523438, "step": 4416 }, { "epoch": 0.6154810840939177, "grad_norm": 0.8577948808670044, "learning_rate": 3.669328953536137e-06, "loss": 0.13115692138671875, "step": 4417 }, { "epoch": 0.615620427785132, "grad_norm": 0.9747541546821594, "learning_rate": 3.6670573128393704e-06, "loss": 0.16052627563476562, "step": 4418 }, { "epoch": 0.6157597714763464, "grad_norm": 0.7250021696090698, "learning_rate": 3.664785968292036e-06, "loss": 0.14686965942382812, "step": 4419 }, { "epoch": 0.6158991151675608, "grad_norm": 0.8680084943771362, "learning_rate": 3.662514920398777e-06, "loss": 0.141326904296875, "step": 4420 }, { "epoch": 0.6160384588587752, "grad_norm": 0.7353823781013489, "learning_rate": 3.6602441696641684e-06, "loss": 0.14020538330078125, "step": 4421 }, { "epoch": 0.6161778025499896, "grad_norm": 1.5816904306411743, "learning_rate": 3.6579737165927176e-06, "loss": 0.17899703979492188, "step": 4422 }, { "epoch": 0.6163171462412039, "grad_norm": 1.3714717626571655, "learning_rate": 3.655703561688867e-06, "loss": 0.13694381713867188, "step": 4423 }, { "epoch": 0.6164564899324183, "grad_norm": 1.298721432685852, "learning_rate": 3.653433705456994e-06, "loss": 0.15353012084960938, "step": 4424 }, { "epoch": 0.6165958336236327, "grad_norm": 1.1166753768920898, "learning_rate": 3.651164148401409e-06, "loss": 0.14995193481445312, "step": 4425 }, { "epoch": 0.6167351773148471, "grad_norm": 0.7132190465927124, "learning_rate": 3.648894891026358e-06, "loss": 0.12299346923828125, "step": 4426 }, { "epoch": 0.6168745210060614, "grad_norm": 1.0054783821105957, "learning_rate": 3.646625933836015e-06, "loss": 0.15833663940429688, "step": 4427 }, { "epoch": 0.6170138646972758, "grad_norm": 0.7317361831665039, "learning_rate": 3.64435727733449e-06, "loss": 0.12589645385742188, "step": 4428 }, { "epoch": 0.6171532083884902, "grad_norm": 1.604451298713684, "learning_rate": 3.6420889220258295e-06, "loss": 0.18845748901367188, "step": 4429 }, { "epoch": 0.6172925520797046, "grad_norm": 1.2032650709152222, "learning_rate": 3.639820868414008e-06, "loss": 0.15194320678710938, "step": 4430 }, { "epoch": 0.617431895770919, "grad_norm": 1.0906566381454468, "learning_rate": 3.6375531170029356e-06, "loss": 0.1611328125, "step": 4431 }, { "epoch": 0.6175712394621333, "grad_norm": 0.8389455080032349, "learning_rate": 3.6352856682964576e-06, "loss": 0.159393310546875, "step": 4432 }, { "epoch": 0.6177105831533477, "grad_norm": 0.884926974773407, "learning_rate": 3.633018522798346e-06, "loss": 0.13097381591796875, "step": 4433 }, { "epoch": 0.6178499268445621, "grad_norm": 0.7861533761024475, "learning_rate": 3.6307516810123095e-06, "loss": 0.14963912963867188, "step": 4434 }, { "epoch": 0.6179892705357765, "grad_norm": 0.4511423408985138, "learning_rate": 3.6284851434419886e-06, "loss": 0.1118316650390625, "step": 4435 }, { "epoch": 0.6181286142269908, "grad_norm": 0.8801863193511963, "learning_rate": 3.6262189105909574e-06, "loss": 0.124786376953125, "step": 4436 }, { "epoch": 0.6182679579182052, "grad_norm": 1.2523077726364136, "learning_rate": 3.6239529829627214e-06, "loss": 0.184478759765625, "step": 4437 }, { "epoch": 0.6184073016094196, "grad_norm": 1.1413036584854126, "learning_rate": 3.6216873610607155e-06, "loss": 0.1513671875, "step": 4438 }, { "epoch": 0.618546645300634, "grad_norm": 0.9574642181396484, "learning_rate": 3.61942204538831e-06, "loss": 0.14070892333984375, "step": 4439 }, { "epoch": 0.6186859889918483, "grad_norm": 0.9309896230697632, "learning_rate": 3.6171570364488075e-06, "loss": 0.14843368530273438, "step": 4440 }, { "epoch": 0.6188253326830627, "grad_norm": 0.6119570732116699, "learning_rate": 3.6148923347454413e-06, "loss": 0.11941146850585938, "step": 4441 }, { "epoch": 0.6189646763742771, "grad_norm": 1.0068871974945068, "learning_rate": 3.6126279407813765e-06, "loss": 0.1470794677734375, "step": 4442 }, { "epoch": 0.6191040200654916, "grad_norm": 1.1691641807556152, "learning_rate": 3.6103638550597074e-06, "loss": 0.17292404174804688, "step": 4443 }, { "epoch": 0.619243363756706, "grad_norm": 1.032471776008606, "learning_rate": 3.6081000780834635e-06, "loss": 0.1491851806640625, "step": 4444 }, { "epoch": 0.6193827074479203, "grad_norm": 1.0388251543045044, "learning_rate": 3.6058366103556055e-06, "loss": 0.13993072509765625, "step": 4445 }, { "epoch": 0.6195220511391347, "grad_norm": 1.294129729270935, "learning_rate": 3.6035734523790235e-06, "loss": 0.15911102294921875, "step": 4446 }, { "epoch": 0.6196613948303491, "grad_norm": 0.7123086452484131, "learning_rate": 3.6013106046565383e-06, "loss": 0.12334060668945312, "step": 4447 }, { "epoch": 0.6198007385215635, "grad_norm": 1.2619513273239136, "learning_rate": 3.5990480676909055e-06, "loss": 0.15525436401367188, "step": 4448 }, { "epoch": 0.6199400822127779, "grad_norm": 0.9638437628746033, "learning_rate": 3.5967858419848077e-06, "loss": 0.14393997192382812, "step": 4449 }, { "epoch": 0.6200794259039922, "grad_norm": 1.0971986055374146, "learning_rate": 3.5945239280408596e-06, "loss": 0.1245880126953125, "step": 4450 }, { "epoch": 0.6202187695952066, "grad_norm": 1.1688506603240967, "learning_rate": 3.592262326361606e-06, "loss": 0.1486663818359375, "step": 4451 }, { "epoch": 0.620358113286421, "grad_norm": 1.0649343729019165, "learning_rate": 3.5900010374495252e-06, "loss": 0.150848388671875, "step": 4452 }, { "epoch": 0.6204974569776354, "grad_norm": 0.9335168600082397, "learning_rate": 3.587740061807024e-06, "loss": 0.14511871337890625, "step": 4453 }, { "epoch": 0.6206368006688497, "grad_norm": 0.8759202361106873, "learning_rate": 3.585479399936438e-06, "loss": 0.12998580932617188, "step": 4454 }, { "epoch": 0.6207761443600641, "grad_norm": 0.5148478150367737, "learning_rate": 3.583219052340034e-06, "loss": 0.11292839050292969, "step": 4455 }, { "epoch": 0.6209154880512785, "grad_norm": 0.8380141854286194, "learning_rate": 3.5809590195200115e-06, "loss": 0.14859390258789062, "step": 4456 }, { "epoch": 0.6210548317424929, "grad_norm": 1.2622121572494507, "learning_rate": 3.578699301978499e-06, "loss": 0.1303558349609375, "step": 4457 }, { "epoch": 0.6211941754337073, "grad_norm": 0.84162837266922, "learning_rate": 3.576439900217552e-06, "loss": 0.14078903198242188, "step": 4458 }, { "epoch": 0.6213335191249216, "grad_norm": 1.6541109085083008, "learning_rate": 3.5741808147391587e-06, "loss": 0.16749191284179688, "step": 4459 }, { "epoch": 0.621472862816136, "grad_norm": 1.6464362144470215, "learning_rate": 3.571922046045235e-06, "loss": 0.15613555908203125, "step": 4460 }, { "epoch": 0.6216122065073504, "grad_norm": 0.7785850167274475, "learning_rate": 3.5696635946376305e-06, "loss": 0.14500808715820312, "step": 4461 }, { "epoch": 0.6217515501985648, "grad_norm": 0.6553338170051575, "learning_rate": 3.5674054610181203e-06, "loss": 0.13356399536132812, "step": 4462 }, { "epoch": 0.6218908938897791, "grad_norm": 0.6297213435173035, "learning_rate": 3.5651476456884103e-06, "loss": 0.12017059326171875, "step": 4463 }, { "epoch": 0.6220302375809935, "grad_norm": 1.372265338897705, "learning_rate": 3.562890149150134e-06, "loss": 0.16509628295898438, "step": 4464 }, { "epoch": 0.6221695812722079, "grad_norm": 0.7511188387870789, "learning_rate": 3.560632971904857e-06, "loss": 0.11285400390625, "step": 4465 }, { "epoch": 0.6223089249634223, "grad_norm": 1.0587257146835327, "learning_rate": 3.558376114454073e-06, "loss": 0.15550994873046875, "step": 4466 }, { "epoch": 0.6224482686546366, "grad_norm": 1.2747793197631836, "learning_rate": 3.556119577299202e-06, "loss": 0.16318511962890625, "step": 4467 }, { "epoch": 0.622587612345851, "grad_norm": 0.7022801637649536, "learning_rate": 3.553863360941598e-06, "loss": 0.13010787963867188, "step": 4468 }, { "epoch": 0.6227269560370654, "grad_norm": 1.5522387027740479, "learning_rate": 3.55160746588254e-06, "loss": 0.12872314453125, "step": 4469 }, { "epoch": 0.6228662997282798, "grad_norm": 0.8660989999771118, "learning_rate": 3.5493518926232352e-06, "loss": 0.14510726928710938, "step": 4470 }, { "epoch": 0.6230056434194942, "grad_norm": 1.3090814352035522, "learning_rate": 3.547096641664819e-06, "loss": 0.1547698974609375, "step": 4471 }, { "epoch": 0.6231449871107085, "grad_norm": 0.8537712097167969, "learning_rate": 3.5448417135083603e-06, "loss": 0.14498519897460938, "step": 4472 }, { "epoch": 0.6232843308019229, "grad_norm": 1.2277204990386963, "learning_rate": 3.5425871086548513e-06, "loss": 0.12562942504882812, "step": 4473 }, { "epoch": 0.6234236744931373, "grad_norm": 0.7322688102722168, "learning_rate": 3.540332827605214e-06, "loss": 0.11219406127929688, "step": 4474 }, { "epoch": 0.6235630181843517, "grad_norm": 1.4375760555267334, "learning_rate": 3.538078870860297e-06, "loss": 0.15115737915039062, "step": 4475 }, { "epoch": 0.623702361875566, "grad_norm": 0.7899020910263062, "learning_rate": 3.5358252389208777e-06, "loss": 0.13019561767578125, "step": 4476 }, { "epoch": 0.6238417055667804, "grad_norm": 1.0310415029525757, "learning_rate": 3.533571932287663e-06, "loss": 0.16444778442382812, "step": 4477 }, { "epoch": 0.6239810492579948, "grad_norm": 0.5517436265945435, "learning_rate": 3.5313189514612867e-06, "loss": 0.11594009399414062, "step": 4478 }, { "epoch": 0.6241203929492092, "grad_norm": 0.9889384508132935, "learning_rate": 3.5290662969423097e-06, "loss": 0.16524887084960938, "step": 4479 }, { "epoch": 0.6242597366404236, "grad_norm": 0.8312255144119263, "learning_rate": 3.5268139692312163e-06, "loss": 0.1352558135986328, "step": 4480 }, { "epoch": 0.6243990803316379, "grad_norm": 0.8706336617469788, "learning_rate": 3.5245619688284277e-06, "loss": 0.13988494873046875, "step": 4481 }, { "epoch": 0.6245384240228523, "grad_norm": 1.002730369567871, "learning_rate": 3.522310296234285e-06, "loss": 0.14371490478515625, "step": 4482 }, { "epoch": 0.6246777677140668, "grad_norm": 0.722119927406311, "learning_rate": 3.520058951949056e-06, "loss": 0.1427764892578125, "step": 4483 }, { "epoch": 0.6248171114052812, "grad_norm": 1.239729642868042, "learning_rate": 3.517807936472942e-06, "loss": 0.1607208251953125, "step": 4484 }, { "epoch": 0.6249564550964956, "grad_norm": 1.1374669075012207, "learning_rate": 3.515557250306067e-06, "loss": 0.16657447814941406, "step": 4485 }, { "epoch": 0.6250957987877099, "grad_norm": 0.7993919253349304, "learning_rate": 3.5133068939484793e-06, "loss": 0.1419219970703125, "step": 4486 }, { "epoch": 0.6252351424789243, "grad_norm": 1.7291226387023926, "learning_rate": 3.511056867900157e-06, "loss": 0.16359710693359375, "step": 4487 }, { "epoch": 0.6253744861701387, "grad_norm": 0.8171960115432739, "learning_rate": 3.508807172661006e-06, "loss": 0.14126205444335938, "step": 4488 }, { "epoch": 0.6255138298613531, "grad_norm": 1.107182502746582, "learning_rate": 3.506557808730857e-06, "loss": 0.1501007080078125, "step": 4489 }, { "epoch": 0.6256531735525674, "grad_norm": 1.2799549102783203, "learning_rate": 3.504308776609468e-06, "loss": 0.13087844848632812, "step": 4490 }, { "epoch": 0.6257925172437818, "grad_norm": 1.1569850444793701, "learning_rate": 3.502060076796521e-06, "loss": 0.13588714599609375, "step": 4491 }, { "epoch": 0.6259318609349962, "grad_norm": 0.9507628679275513, "learning_rate": 3.4998117097916247e-06, "loss": 0.1334228515625, "step": 4492 }, { "epoch": 0.6260712046262106, "grad_norm": 0.8929740786552429, "learning_rate": 3.4975636760943177e-06, "loss": 0.11861419677734375, "step": 4493 }, { "epoch": 0.626210548317425, "grad_norm": 1.2054256200790405, "learning_rate": 3.49531597620406e-06, "loss": 0.14144134521484375, "step": 4494 }, { "epoch": 0.6263498920086393, "grad_norm": 1.1682075262069702, "learning_rate": 3.4930686106202428e-06, "loss": 0.16631317138671875, "step": 4495 }, { "epoch": 0.6264892356998537, "grad_norm": 0.8724787831306458, "learning_rate": 3.4908215798421737e-06, "loss": 0.14493560791015625, "step": 4496 }, { "epoch": 0.6266285793910681, "grad_norm": 0.7969359755516052, "learning_rate": 3.488574884369095e-06, "loss": 0.13418197631835938, "step": 4497 }, { "epoch": 0.6267679230822825, "grad_norm": 1.0186209678649902, "learning_rate": 3.486328524700171e-06, "loss": 0.12331771850585938, "step": 4498 }, { "epoch": 0.6269072667734968, "grad_norm": 0.9299783706665039, "learning_rate": 3.4840825013344897e-06, "loss": 0.16323471069335938, "step": 4499 }, { "epoch": 0.6270466104647112, "grad_norm": 0.691344141960144, "learning_rate": 3.48183681477107e-06, "loss": 0.1164398193359375, "step": 4500 }, { "epoch": 0.6271859541559256, "grad_norm": 0.9248480200767517, "learning_rate": 3.4795914655088486e-06, "loss": 0.12334060668945312, "step": 4501 }, { "epoch": 0.62732529784714, "grad_norm": 0.8161419034004211, "learning_rate": 3.4773464540466917e-06, "loss": 0.12984085083007812, "step": 4502 }, { "epoch": 0.6274646415383544, "grad_norm": 0.9345267415046692, "learning_rate": 3.47510178088339e-06, "loss": 0.11580848693847656, "step": 4503 }, { "epoch": 0.6276039852295687, "grad_norm": 1.0204416513442993, "learning_rate": 3.4728574465176585e-06, "loss": 0.13219451904296875, "step": 4504 }, { "epoch": 0.6277433289207831, "grad_norm": 1.0726141929626465, "learning_rate": 3.4706134514481372e-06, "loss": 0.15735435485839844, "step": 4505 }, { "epoch": 0.6278826726119975, "grad_norm": 0.8238826990127563, "learning_rate": 3.468369796173392e-06, "loss": 0.13997650146484375, "step": 4506 }, { "epoch": 0.6280220163032119, "grad_norm": 1.5039682388305664, "learning_rate": 3.4661264811919093e-06, "loss": 0.15221405029296875, "step": 4507 }, { "epoch": 0.6281613599944262, "grad_norm": 0.5675734281539917, "learning_rate": 3.4638835070021027e-06, "loss": 0.11181259155273438, "step": 4508 }, { "epoch": 0.6283007036856406, "grad_norm": 0.5511537194252014, "learning_rate": 3.4616408741023113e-06, "loss": 0.11395645141601562, "step": 4509 }, { "epoch": 0.628440047376855, "grad_norm": 0.504237949848175, "learning_rate": 3.459398582990795e-06, "loss": 0.10448837280273438, "step": 4510 }, { "epoch": 0.6285793910680694, "grad_norm": 1.442887783050537, "learning_rate": 3.4571566341657446e-06, "loss": 0.15399169921875, "step": 4511 }, { "epoch": 0.6287187347592837, "grad_norm": 0.5038486123085022, "learning_rate": 3.4549150281252635e-06, "loss": 0.10903167724609375, "step": 4512 }, { "epoch": 0.6288580784504981, "grad_norm": 0.9427143335342407, "learning_rate": 3.452673765367389e-06, "loss": 0.12737274169921875, "step": 4513 }, { "epoch": 0.6289974221417125, "grad_norm": 0.9821219444274902, "learning_rate": 3.450432846390078e-06, "loss": 0.15641403198242188, "step": 4514 }, { "epoch": 0.6291367658329269, "grad_norm": 1.2785789966583252, "learning_rate": 3.4481922716912097e-06, "loss": 0.1623992919921875, "step": 4515 }, { "epoch": 0.6292761095241413, "grad_norm": 1.5852779150009155, "learning_rate": 3.445952041768593e-06, "loss": 0.18513107299804688, "step": 4516 }, { "epoch": 0.6294154532153556, "grad_norm": 0.6374714374542236, "learning_rate": 3.443712157119952e-06, "loss": 0.10548782348632812, "step": 4517 }, { "epoch": 0.62955479690657, "grad_norm": 1.1767878532409668, "learning_rate": 3.4414726182429388e-06, "loss": 0.15835189819335938, "step": 4518 }, { "epoch": 0.6296941405977844, "grad_norm": 1.1752766370773315, "learning_rate": 3.4392334256351265e-06, "loss": 0.1809539794921875, "step": 4519 }, { "epoch": 0.6298334842889988, "grad_norm": 1.5237839221954346, "learning_rate": 3.436994579794016e-06, "loss": 0.17945480346679688, "step": 4520 }, { "epoch": 0.6299728279802131, "grad_norm": 0.7095295786857605, "learning_rate": 3.4347560812170267e-06, "loss": 0.12201118469238281, "step": 4521 }, { "epoch": 0.6301121716714275, "grad_norm": 0.9229053258895874, "learning_rate": 3.4325179304014997e-06, "loss": 0.11752700805664062, "step": 4522 }, { "epoch": 0.6302515153626419, "grad_norm": 1.4918763637542725, "learning_rate": 3.4302801278447028e-06, "loss": 0.16571044921875, "step": 4523 }, { "epoch": 0.6303908590538564, "grad_norm": 1.3977197408676147, "learning_rate": 3.428042674043822e-06, "loss": 0.16275405883789062, "step": 4524 }, { "epoch": 0.6305302027450708, "grad_norm": 0.9982629418373108, "learning_rate": 3.425805569495973e-06, "loss": 0.14513397216796875, "step": 4525 }, { "epoch": 0.6306695464362851, "grad_norm": 0.8605537414550781, "learning_rate": 3.4235688146981854e-06, "loss": 0.16501808166503906, "step": 4526 }, { "epoch": 0.6308088901274995, "grad_norm": 1.5015003681182861, "learning_rate": 3.42133241014742e-06, "loss": 0.16722869873046875, "step": 4527 }, { "epoch": 0.6309482338187139, "grad_norm": 1.1031198501586914, "learning_rate": 3.4190963563405482e-06, "loss": 0.1627655029296875, "step": 4528 }, { "epoch": 0.6310875775099283, "grad_norm": 0.97159343957901, "learning_rate": 3.416860653774374e-06, "loss": 0.14786148071289062, "step": 4529 }, { "epoch": 0.6312269212011427, "grad_norm": 1.243023157119751, "learning_rate": 3.4146253029456195e-06, "loss": 0.16392898559570312, "step": 4530 }, { "epoch": 0.631366264892357, "grad_norm": 0.933708906173706, "learning_rate": 3.4123903043509267e-06, "loss": 0.1548004150390625, "step": 4531 }, { "epoch": 0.6315056085835714, "grad_norm": 1.1650034189224243, "learning_rate": 3.4101556584868646e-06, "loss": 0.13003921508789062, "step": 4532 }, { "epoch": 0.6316449522747858, "grad_norm": 1.1286526918411255, "learning_rate": 3.407921365849917e-06, "loss": 0.14676284790039062, "step": 4533 }, { "epoch": 0.6317842959660002, "grad_norm": 1.2221449613571167, "learning_rate": 3.4056874269364946e-06, "loss": 0.146759033203125, "step": 4534 }, { "epoch": 0.6319236396572145, "grad_norm": 1.1027084589004517, "learning_rate": 3.4034538422429263e-06, "loss": 0.16117477416992188, "step": 4535 }, { "epoch": 0.6320629833484289, "grad_norm": 1.64602792263031, "learning_rate": 3.401220612265465e-06, "loss": 0.1797637939453125, "step": 4536 }, { "epoch": 0.6322023270396433, "grad_norm": 0.8856783509254456, "learning_rate": 3.3989877375002846e-06, "loss": 0.1354522705078125, "step": 4537 }, { "epoch": 0.6323416707308577, "grad_norm": 1.1267306804656982, "learning_rate": 3.3967552184434753e-06, "loss": 0.13698196411132812, "step": 4538 }, { "epoch": 0.632481014422072, "grad_norm": 0.6091804504394531, "learning_rate": 3.3945230555910534e-06, "loss": 0.11912345886230469, "step": 4539 }, { "epoch": 0.6326203581132864, "grad_norm": 1.512067437171936, "learning_rate": 3.3922912494389554e-06, "loss": 0.1855010986328125, "step": 4540 }, { "epoch": 0.6327597018045008, "grad_norm": 0.5330554842948914, "learning_rate": 3.3900598004830377e-06, "loss": 0.11488723754882812, "step": 4541 }, { "epoch": 0.6328990454957152, "grad_norm": 0.8498944640159607, "learning_rate": 3.387828709219075e-06, "loss": 0.145904541015625, "step": 4542 }, { "epoch": 0.6330383891869296, "grad_norm": 1.0485256910324097, "learning_rate": 3.3855979761427705e-06, "loss": 0.15771484375, "step": 4543 }, { "epoch": 0.6331777328781439, "grad_norm": 0.7531256079673767, "learning_rate": 3.3833676017497353e-06, "loss": 0.1299591064453125, "step": 4544 }, { "epoch": 0.6333170765693583, "grad_norm": 0.7073264122009277, "learning_rate": 3.381137586535511e-06, "loss": 0.105804443359375, "step": 4545 }, { "epoch": 0.6334564202605727, "grad_norm": 1.2686039209365845, "learning_rate": 3.3789079309955556e-06, "loss": 0.16094970703125, "step": 4546 }, { "epoch": 0.6335957639517871, "grad_norm": 1.2708852291107178, "learning_rate": 3.3766786356252466e-06, "loss": 0.15554046630859375, "step": 4547 }, { "epoch": 0.6337351076430014, "grad_norm": 0.8253521919250488, "learning_rate": 3.374449700919887e-06, "loss": 0.15071868896484375, "step": 4548 }, { "epoch": 0.6338744513342158, "grad_norm": 0.8398085236549377, "learning_rate": 3.37222112737469e-06, "loss": 0.13257789611816406, "step": 4549 }, { "epoch": 0.6340137950254302, "grad_norm": 0.7176737189292908, "learning_rate": 3.3699929154847957e-06, "loss": 0.13654327392578125, "step": 4550 }, { "epoch": 0.6341531387166446, "grad_norm": 1.431321620941162, "learning_rate": 3.367765065745261e-06, "loss": 0.15473365783691406, "step": 4551 }, { "epoch": 0.634292482407859, "grad_norm": 1.15238356590271, "learning_rate": 3.365537578651065e-06, "loss": 0.1278705596923828, "step": 4552 }, { "epoch": 0.6344318260990733, "grad_norm": 0.8665179014205933, "learning_rate": 3.3633104546971052e-06, "loss": 0.12655258178710938, "step": 4553 }, { "epoch": 0.6345711697902877, "grad_norm": 1.026777982711792, "learning_rate": 3.3610836943781945e-06, "loss": 0.14099502563476562, "step": 4554 }, { "epoch": 0.6347105134815021, "grad_norm": 0.7460346221923828, "learning_rate": 3.358857298189069e-06, "loss": 0.12849807739257812, "step": 4555 }, { "epoch": 0.6348498571727165, "grad_norm": 1.4278239011764526, "learning_rate": 3.356631266624385e-06, "loss": 0.16114425659179688, "step": 4556 }, { "epoch": 0.6349892008639308, "grad_norm": 0.7831341028213501, "learning_rate": 3.3544056001787146e-06, "loss": 0.14413070678710938, "step": 4557 }, { "epoch": 0.6351285445551452, "grad_norm": 0.9741170406341553, "learning_rate": 3.3521802993465513e-06, "loss": 0.15505027770996094, "step": 4558 }, { "epoch": 0.6352678882463596, "grad_norm": 1.740246057510376, "learning_rate": 3.3499553646223037e-06, "loss": 0.15215301513671875, "step": 4559 }, { "epoch": 0.635407231937574, "grad_norm": 0.7198710441589355, "learning_rate": 3.3477307965003026e-06, "loss": 0.1359405517578125, "step": 4560 }, { "epoch": 0.6355465756287884, "grad_norm": 1.8246674537658691, "learning_rate": 3.345506595474798e-06, "loss": 0.1858062744140625, "step": 4561 }, { "epoch": 0.6356859193200027, "grad_norm": 0.9804759621620178, "learning_rate": 3.3432827620399543e-06, "loss": 0.16823196411132812, "step": 4562 }, { "epoch": 0.6358252630112171, "grad_norm": 0.8857194185256958, "learning_rate": 3.3410592966898565e-06, "loss": 0.12035751342773438, "step": 4563 }, { "epoch": 0.6359646067024316, "grad_norm": 1.037232756614685, "learning_rate": 3.3388361999185105e-06, "loss": 0.13057899475097656, "step": 4564 }, { "epoch": 0.636103950393646, "grad_norm": 1.7312430143356323, "learning_rate": 3.3366134722198352e-06, "loss": 0.1762847900390625, "step": 4565 }, { "epoch": 0.6362432940848604, "grad_norm": 1.2541145086288452, "learning_rate": 3.3343911140876704e-06, "loss": 0.15214920043945312, "step": 4566 }, { "epoch": 0.6363826377760747, "grad_norm": 0.8300063610076904, "learning_rate": 3.332169126015773e-06, "loss": 0.1041107177734375, "step": 4567 }, { "epoch": 0.6365219814672891, "grad_norm": 1.1832339763641357, "learning_rate": 3.3299475084978195e-06, "loss": 0.1761322021484375, "step": 4568 }, { "epoch": 0.6366613251585035, "grad_norm": 1.1623331308364868, "learning_rate": 3.3277262620274025e-06, "loss": 0.16775131225585938, "step": 4569 }, { "epoch": 0.6368006688497179, "grad_norm": 1.331512451171875, "learning_rate": 3.3255053870980304e-06, "loss": 0.15863800048828125, "step": 4570 }, { "epoch": 0.6369400125409322, "grad_norm": 0.6122661828994751, "learning_rate": 3.3232848842031306e-06, "loss": 0.12974166870117188, "step": 4571 }, { "epoch": 0.6370793562321466, "grad_norm": 1.3100553750991821, "learning_rate": 3.3210647538360514e-06, "loss": 0.1641998291015625, "step": 4572 }, { "epoch": 0.637218699923361, "grad_norm": 1.687827467918396, "learning_rate": 3.3188449964900527e-06, "loss": 0.1933441162109375, "step": 4573 }, { "epoch": 0.6373580436145754, "grad_norm": 0.4846026301383972, "learning_rate": 3.316625612658315e-06, "loss": 0.09895133972167969, "step": 4574 }, { "epoch": 0.6374973873057898, "grad_norm": 1.3682787418365479, "learning_rate": 3.314406602833933e-06, "loss": 0.13448143005371094, "step": 4575 }, { "epoch": 0.6376367309970041, "grad_norm": 1.3281182050704956, "learning_rate": 3.3121879675099205e-06, "loss": 0.16329193115234375, "step": 4576 }, { "epoch": 0.6377760746882185, "grad_norm": 0.9316226840019226, "learning_rate": 3.3099697071792093e-06, "loss": 0.14733505249023438, "step": 4577 }, { "epoch": 0.6379154183794329, "grad_norm": 0.8270205855369568, "learning_rate": 3.3077518223346448e-06, "loss": 0.15045928955078125, "step": 4578 }, { "epoch": 0.6380547620706473, "grad_norm": 0.6221325993537903, "learning_rate": 3.30553431346899e-06, "loss": 0.12081146240234375, "step": 4579 }, { "epoch": 0.6381941057618616, "grad_norm": 0.9267491102218628, "learning_rate": 3.3033171810749274e-06, "loss": 0.15047073364257812, "step": 4580 }, { "epoch": 0.638333449453076, "grad_norm": 0.6086012125015259, "learning_rate": 3.3011004256450497e-06, "loss": 0.12383270263671875, "step": 4581 }, { "epoch": 0.6384727931442904, "grad_norm": 0.7802524566650391, "learning_rate": 3.2988840476718713e-06, "loss": 0.13745498657226562, "step": 4582 }, { "epoch": 0.6386121368355048, "grad_norm": 0.6126614212989807, "learning_rate": 3.2966680476478196e-06, "loss": 0.11533737182617188, "step": 4583 }, { "epoch": 0.6387514805267192, "grad_norm": 0.9158896803855896, "learning_rate": 3.294452426065241e-06, "loss": 0.14664459228515625, "step": 4584 }, { "epoch": 0.6388908242179335, "grad_norm": 0.5489588379859924, "learning_rate": 3.2922371834163958e-06, "loss": 0.12344741821289062, "step": 4585 }, { "epoch": 0.6390301679091479, "grad_norm": 0.9315679669380188, "learning_rate": 3.2900223201934584e-06, "loss": 0.16442489624023438, "step": 4586 }, { "epoch": 0.6391695116003623, "grad_norm": 0.8355491757392883, "learning_rate": 3.287807836888521e-06, "loss": 0.12778854370117188, "step": 4587 }, { "epoch": 0.6393088552915767, "grad_norm": 0.6426994204521179, "learning_rate": 3.2855937339935933e-06, "loss": 0.12457656860351562, "step": 4588 }, { "epoch": 0.639448198982791, "grad_norm": 0.7668339014053345, "learning_rate": 3.2833800120005977e-06, "loss": 0.11144256591796875, "step": 4589 }, { "epoch": 0.6395875426740054, "grad_norm": 1.0372391939163208, "learning_rate": 3.2811666714013724e-06, "loss": 0.14289093017578125, "step": 4590 }, { "epoch": 0.6397268863652198, "grad_norm": 0.7519617080688477, "learning_rate": 3.2789537126876714e-06, "loss": 0.10736465454101562, "step": 4591 }, { "epoch": 0.6398662300564342, "grad_norm": 0.9551413655281067, "learning_rate": 3.2767411363511613e-06, "loss": 0.1465129852294922, "step": 4592 }, { "epoch": 0.6400055737476485, "grad_norm": 0.8191533088684082, "learning_rate": 3.2745289428834294e-06, "loss": 0.14583206176757812, "step": 4593 }, { "epoch": 0.6401449174388629, "grad_norm": 0.7753344774246216, "learning_rate": 3.272317132775972e-06, "loss": 0.13748550415039062, "step": 4594 }, { "epoch": 0.6402842611300773, "grad_norm": 1.0029107332229614, "learning_rate": 3.270105706520207e-06, "loss": 0.14665603637695312, "step": 4595 }, { "epoch": 0.6404236048212917, "grad_norm": 1.0265626907348633, "learning_rate": 3.267894664607457e-06, "loss": 0.15385055541992188, "step": 4596 }, { "epoch": 0.6405629485125061, "grad_norm": 1.3909274339675903, "learning_rate": 3.265684007528969e-06, "loss": 0.1336669921875, "step": 4597 }, { "epoch": 0.6407022922037204, "grad_norm": 1.1662665605545044, "learning_rate": 3.2634737357758994e-06, "loss": 0.14881134033203125, "step": 4598 }, { "epoch": 0.6408416358949348, "grad_norm": 0.7728201150894165, "learning_rate": 3.261263849839319e-06, "loss": 0.14238357543945312, "step": 4599 }, { "epoch": 0.6409809795861492, "grad_norm": 1.162006139755249, "learning_rate": 3.2590543502102163e-06, "loss": 0.15439605712890625, "step": 4600 }, { "epoch": 0.6411203232773636, "grad_norm": 0.9082977771759033, "learning_rate": 3.256845237379491e-06, "loss": 0.12053298950195312, "step": 4601 }, { "epoch": 0.641259666968578, "grad_norm": 1.0122859477996826, "learning_rate": 3.254636511837957e-06, "loss": 0.13235759735107422, "step": 4602 }, { "epoch": 0.6413990106597923, "grad_norm": 1.0493478775024414, "learning_rate": 3.252428174076341e-06, "loss": 0.14117431640625, "step": 4603 }, { "epoch": 0.6415383543510068, "grad_norm": 1.1109153032302856, "learning_rate": 3.2502202245852887e-06, "loss": 0.15164947509765625, "step": 4604 }, { "epoch": 0.6416776980422212, "grad_norm": 1.1352729797363281, "learning_rate": 3.2480126638553533e-06, "loss": 0.1465740203857422, "step": 4605 }, { "epoch": 0.6418170417334356, "grad_norm": 0.934138834476471, "learning_rate": 3.245805492377007e-06, "loss": 0.1391315460205078, "step": 4606 }, { "epoch": 0.64195638542465, "grad_norm": 0.9632015824317932, "learning_rate": 3.243598710640631e-06, "loss": 0.15840911865234375, "step": 4607 }, { "epoch": 0.6420957291158643, "grad_norm": 1.144814133644104, "learning_rate": 3.2413923191365203e-06, "loss": 0.1578521728515625, "step": 4608 }, { "epoch": 0.6422350728070787, "grad_norm": 0.8832117915153503, "learning_rate": 3.2391863183548877e-06, "loss": 0.15891265869140625, "step": 4609 }, { "epoch": 0.6423744164982931, "grad_norm": 0.9093109965324402, "learning_rate": 3.236980708785854e-06, "loss": 0.13644790649414062, "step": 4610 }, { "epoch": 0.6425137601895075, "grad_norm": 0.6867451071739197, "learning_rate": 3.2347754909194595e-06, "loss": 0.12650299072265625, "step": 4611 }, { "epoch": 0.6426531038807218, "grad_norm": 1.473522663116455, "learning_rate": 3.232570665245648e-06, "loss": 0.16493606567382812, "step": 4612 }, { "epoch": 0.6427924475719362, "grad_norm": 1.4137589931488037, "learning_rate": 3.2303662322542835e-06, "loss": 0.19896697998046875, "step": 4613 }, { "epoch": 0.6429317912631506, "grad_norm": 1.2368963956832886, "learning_rate": 3.2281621924351407e-06, "loss": 0.14597320556640625, "step": 4614 }, { "epoch": 0.643071134954365, "grad_norm": 1.0129854679107666, "learning_rate": 3.2259585462779063e-06, "loss": 0.14334487915039062, "step": 4615 }, { "epoch": 0.6432104786455793, "grad_norm": 1.137221336364746, "learning_rate": 3.2237552942721832e-06, "loss": 0.17392730712890625, "step": 4616 }, { "epoch": 0.6433498223367937, "grad_norm": 1.2797777652740479, "learning_rate": 3.2215524369074802e-06, "loss": 0.1775054931640625, "step": 4617 }, { "epoch": 0.6434891660280081, "grad_norm": 0.7690567374229431, "learning_rate": 3.219349974673223e-06, "loss": 0.1419830322265625, "step": 4618 }, { "epoch": 0.6436285097192225, "grad_norm": 1.367922067642212, "learning_rate": 3.2171479080587475e-06, "loss": 0.15446853637695312, "step": 4619 }, { "epoch": 0.6437678534104369, "grad_norm": 0.822965681552887, "learning_rate": 3.2149462375533046e-06, "loss": 0.155609130859375, "step": 4620 }, { "epoch": 0.6439071971016512, "grad_norm": 0.8512961864471436, "learning_rate": 3.212744963646054e-06, "loss": 0.14736557006835938, "step": 4621 }, { "epoch": 0.6440465407928656, "grad_norm": 0.8216956257820129, "learning_rate": 3.2105440868260706e-06, "loss": 0.15574264526367188, "step": 4622 }, { "epoch": 0.64418588448408, "grad_norm": 0.5997189283370972, "learning_rate": 3.2083436075823353e-06, "loss": 0.1437225341796875, "step": 4623 }, { "epoch": 0.6443252281752944, "grad_norm": 0.4622360169887543, "learning_rate": 3.2061435264037457e-06, "loss": 0.09427642822265625, "step": 4624 }, { "epoch": 0.6444645718665087, "grad_norm": 0.42468705773353577, "learning_rate": 3.2039438437791105e-06, "loss": 0.11519241333007812, "step": 4625 }, { "epoch": 0.6446039155577231, "grad_norm": 0.7639225721359253, "learning_rate": 3.2017445601971474e-06, "loss": 0.12523841857910156, "step": 4626 }, { "epoch": 0.6447432592489375, "grad_norm": 0.5932284593582153, "learning_rate": 3.199545676146492e-06, "loss": 0.10225868225097656, "step": 4627 }, { "epoch": 0.6448826029401519, "grad_norm": 0.6698479056358337, "learning_rate": 3.197347192115679e-06, "loss": 0.11899185180664062, "step": 4628 }, { "epoch": 0.6450219466313662, "grad_norm": 0.785440981388092, "learning_rate": 3.1951491085931657e-06, "loss": 0.13126373291015625, "step": 4629 }, { "epoch": 0.6451612903225806, "grad_norm": 0.6525790095329285, "learning_rate": 3.1929514260673145e-06, "loss": 0.13273239135742188, "step": 4630 }, { "epoch": 0.645300634013795, "grad_norm": 0.8504865169525146, "learning_rate": 3.1907541450264003e-06, "loss": 0.14520645141601562, "step": 4631 }, { "epoch": 0.6454399777050094, "grad_norm": 1.338874340057373, "learning_rate": 3.188557265958612e-06, "loss": 0.1587982177734375, "step": 4632 }, { "epoch": 0.6455793213962238, "grad_norm": 0.7460195422172546, "learning_rate": 3.186360789352041e-06, "loss": 0.10771369934082031, "step": 4633 }, { "epoch": 0.6457186650874381, "grad_norm": 1.747907280921936, "learning_rate": 3.184164715694697e-06, "loss": 0.16727066040039062, "step": 4634 }, { "epoch": 0.6458580087786525, "grad_norm": 0.6287732720375061, "learning_rate": 3.1819690454744956e-06, "loss": 0.12096786499023438, "step": 4635 }, { "epoch": 0.6459973524698669, "grad_norm": 1.665893316268921, "learning_rate": 3.1797737791792672e-06, "loss": 0.1791534423828125, "step": 4636 }, { "epoch": 0.6461366961610813, "grad_norm": 0.7929869890213013, "learning_rate": 3.1775789172967486e-06, "loss": 0.13660049438476562, "step": 4637 }, { "epoch": 0.6462760398522956, "grad_norm": 0.9108754396438599, "learning_rate": 3.1753844603145894e-06, "loss": 0.15311050415039062, "step": 4638 }, { "epoch": 0.64641538354351, "grad_norm": 1.2378425598144531, "learning_rate": 3.1731904087203442e-06, "loss": 0.1915740966796875, "step": 4639 }, { "epoch": 0.6465547272347244, "grad_norm": 0.678006112575531, "learning_rate": 3.1709967630014844e-06, "loss": 0.12313270568847656, "step": 4640 }, { "epoch": 0.6466940709259388, "grad_norm": 0.9738245010375977, "learning_rate": 3.168803523645387e-06, "loss": 0.12084197998046875, "step": 4641 }, { "epoch": 0.6468334146171532, "grad_norm": 1.2507836818695068, "learning_rate": 3.166610691139338e-06, "loss": 0.1542205810546875, "step": 4642 }, { "epoch": 0.6469727583083675, "grad_norm": 0.8230048418045044, "learning_rate": 3.1644182659705403e-06, "loss": 0.1297760009765625, "step": 4643 }, { "epoch": 0.647112101999582, "grad_norm": 1.4663169384002686, "learning_rate": 3.1622262486260936e-06, "loss": 0.1716604232788086, "step": 4644 }, { "epoch": 0.6472514456907964, "grad_norm": 0.7723669409751892, "learning_rate": 3.160034639593018e-06, "loss": 0.13499069213867188, "step": 4645 }, { "epoch": 0.6473907893820108, "grad_norm": 0.85855633020401, "learning_rate": 3.1578434393582392e-06, "loss": 0.14255332946777344, "step": 4646 }, { "epoch": 0.6475301330732252, "grad_norm": 0.9433040022850037, "learning_rate": 3.155652648408589e-06, "loss": 0.1682891845703125, "step": 4647 }, { "epoch": 0.6476694767644395, "grad_norm": 0.8654564619064331, "learning_rate": 3.1534622672308165e-06, "loss": 0.14417266845703125, "step": 4648 }, { "epoch": 0.6478088204556539, "grad_norm": 0.6666261553764343, "learning_rate": 3.1512722963115693e-06, "loss": 0.1218719482421875, "step": 4649 }, { "epoch": 0.6479481641468683, "grad_norm": 1.2117854356765747, "learning_rate": 3.1490827361374105e-06, "loss": 0.148651123046875, "step": 4650 }, { "epoch": 0.6480875078380827, "grad_norm": 1.0508878231048584, "learning_rate": 3.1468935871948096e-06, "loss": 0.17780303955078125, "step": 4651 }, { "epoch": 0.648226851529297, "grad_norm": 1.1358122825622559, "learning_rate": 3.1447048499701478e-06, "loss": 0.165985107421875, "step": 4652 }, { "epoch": 0.6483661952205114, "grad_norm": 1.091977596282959, "learning_rate": 3.1425165249497118e-06, "loss": 0.1648273468017578, "step": 4653 }, { "epoch": 0.6485055389117258, "grad_norm": 1.5721251964569092, "learning_rate": 3.1403286126196963e-06, "loss": 0.16616439819335938, "step": 4654 }, { "epoch": 0.6486448826029402, "grad_norm": 1.528577208518982, "learning_rate": 3.138141113466205e-06, "loss": 0.17347335815429688, "step": 4655 }, { "epoch": 0.6487842262941546, "grad_norm": 1.1287380456924438, "learning_rate": 3.135954027975252e-06, "loss": 0.1309051513671875, "step": 4656 }, { "epoch": 0.6489235699853689, "grad_norm": 1.015032410621643, "learning_rate": 3.1337673566327575e-06, "loss": 0.12497138977050781, "step": 4657 }, { "epoch": 0.6490629136765833, "grad_norm": 0.8496800661087036, "learning_rate": 3.1315810999245483e-06, "loss": 0.1334075927734375, "step": 4658 }, { "epoch": 0.6492022573677977, "grad_norm": 2.2267351150512695, "learning_rate": 3.1293952583363653e-06, "loss": 0.16094589233398438, "step": 4659 }, { "epoch": 0.6493416010590121, "grad_norm": 0.9488645792007446, "learning_rate": 3.127209832353846e-06, "loss": 0.12742996215820312, "step": 4660 }, { "epoch": 0.6494809447502264, "grad_norm": 1.2158149480819702, "learning_rate": 3.1250248224625463e-06, "loss": 0.1610107421875, "step": 4661 }, { "epoch": 0.6496202884414408, "grad_norm": 0.9001191854476929, "learning_rate": 3.1228402291479243e-06, "loss": 0.15799713134765625, "step": 4662 }, { "epoch": 0.6497596321326552, "grad_norm": 0.8772286772727966, "learning_rate": 3.1206560528953467e-06, "loss": 0.1353912353515625, "step": 4663 }, { "epoch": 0.6498989758238696, "grad_norm": 1.0089787244796753, "learning_rate": 3.1184722941900902e-06, "loss": 0.14268875122070312, "step": 4664 }, { "epoch": 0.650038319515084, "grad_norm": 0.7324913740158081, "learning_rate": 3.1162889535173323e-06, "loss": 0.11931610107421875, "step": 4665 }, { "epoch": 0.6501776632062983, "grad_norm": 0.7357174158096313, "learning_rate": 3.1141060313621637e-06, "loss": 0.12710189819335938, "step": 4666 }, { "epoch": 0.6503170068975127, "grad_norm": 1.5005797147750854, "learning_rate": 3.111923528209577e-06, "loss": 0.16225814819335938, "step": 4667 }, { "epoch": 0.6504563505887271, "grad_norm": 0.8888037800788879, "learning_rate": 3.1097414445444796e-06, "loss": 0.15997695922851562, "step": 4668 }, { "epoch": 0.6505956942799415, "grad_norm": 0.8245553374290466, "learning_rate": 3.1075597808516776e-06, "loss": 0.14233779907226562, "step": 4669 }, { "epoch": 0.6507350379711558, "grad_norm": 1.3072093725204468, "learning_rate": 3.1053785376158865e-06, "loss": 0.18928909301757812, "step": 4670 }, { "epoch": 0.6508743816623702, "grad_norm": 1.4528242349624634, "learning_rate": 3.1031977153217286e-06, "loss": 0.16806793212890625, "step": 4671 }, { "epoch": 0.6510137253535846, "grad_norm": 1.0551190376281738, "learning_rate": 3.1010173144537348e-06, "loss": 0.164642333984375, "step": 4672 }, { "epoch": 0.651153069044799, "grad_norm": 1.119523286819458, "learning_rate": 3.0988373354963387e-06, "loss": 0.16031265258789062, "step": 4673 }, { "epoch": 0.6512924127360133, "grad_norm": 0.9315175414085388, "learning_rate": 3.0966577789338812e-06, "loss": 0.1417865753173828, "step": 4674 }, { "epoch": 0.6514317564272277, "grad_norm": 0.8970337510108948, "learning_rate": 3.0944786452506147e-06, "loss": 0.14223861694335938, "step": 4675 }, { "epoch": 0.6515711001184421, "grad_norm": 1.2515188455581665, "learning_rate": 3.092299934930686e-06, "loss": 0.14822769165039062, "step": 4676 }, { "epoch": 0.6517104438096565, "grad_norm": 0.8437542915344238, "learning_rate": 3.0901216484581597e-06, "loss": 0.14023971557617188, "step": 4677 }, { "epoch": 0.6518497875008709, "grad_norm": 1.298586368560791, "learning_rate": 3.087943786316999e-06, "loss": 0.1951141357421875, "step": 4678 }, { "epoch": 0.6519891311920852, "grad_norm": 0.6100492477416992, "learning_rate": 3.085766348991076e-06, "loss": 0.11142349243164062, "step": 4679 }, { "epoch": 0.6521284748832996, "grad_norm": 0.9177610278129578, "learning_rate": 3.0835893369641694e-06, "loss": 0.11529922485351562, "step": 4680 }, { "epoch": 0.652267818574514, "grad_norm": 1.1272575855255127, "learning_rate": 3.0814127507199587e-06, "loss": 0.16241455078125, "step": 4681 }, { "epoch": 0.6524071622657284, "grad_norm": 0.7945782542228699, "learning_rate": 3.0792365907420323e-06, "loss": 0.14188766479492188, "step": 4682 }, { "epoch": 0.6525465059569427, "grad_norm": 1.0652225017547607, "learning_rate": 3.0770608575138825e-06, "loss": 0.14563369750976562, "step": 4683 }, { "epoch": 0.6526858496481572, "grad_norm": 0.8004659414291382, "learning_rate": 3.0748855515189104e-06, "loss": 0.11510086059570312, "step": 4684 }, { "epoch": 0.6528251933393716, "grad_norm": 0.6956062912940979, "learning_rate": 3.0727106732404183e-06, "loss": 0.12682342529296875, "step": 4685 }, { "epoch": 0.652964537030586, "grad_norm": 0.9263647794723511, "learning_rate": 3.0705362231616133e-06, "loss": 0.1298828125, "step": 4686 }, { "epoch": 0.6531038807218004, "grad_norm": 0.8419755101203918, "learning_rate": 3.0683622017656074e-06, "loss": 0.1443939208984375, "step": 4687 }, { "epoch": 0.6532432244130147, "grad_norm": 0.6663353443145752, "learning_rate": 3.066188609535421e-06, "loss": 0.12413406372070312, "step": 4688 }, { "epoch": 0.6533825681042291, "grad_norm": 0.7251214385032654, "learning_rate": 3.064015446953977e-06, "loss": 0.12059783935546875, "step": 4689 }, { "epoch": 0.6535219117954435, "grad_norm": 0.881909191608429, "learning_rate": 3.0618427145041017e-06, "loss": 0.12939453125, "step": 4690 }, { "epoch": 0.6536612554866579, "grad_norm": 1.2397674322128296, "learning_rate": 3.059670412668525e-06, "loss": 0.16633224487304688, "step": 4691 }, { "epoch": 0.6538005991778723, "grad_norm": 0.5104419589042664, "learning_rate": 3.0574985419298843e-06, "loss": 0.1124114990234375, "step": 4692 }, { "epoch": 0.6539399428690866, "grad_norm": 0.9745343923568726, "learning_rate": 3.055327102770719e-06, "loss": 0.15343666076660156, "step": 4693 }, { "epoch": 0.654079286560301, "grad_norm": 1.2525862455368042, "learning_rate": 3.053156095673474e-06, "loss": 0.15561676025390625, "step": 4694 }, { "epoch": 0.6542186302515154, "grad_norm": 0.9697344303131104, "learning_rate": 3.0509855211204976e-06, "loss": 0.1557464599609375, "step": 4695 }, { "epoch": 0.6543579739427298, "grad_norm": 0.9806828498840332, "learning_rate": 3.048815379594043e-06, "loss": 0.10562515258789062, "step": 4696 }, { "epoch": 0.6544973176339441, "grad_norm": 0.7321063280105591, "learning_rate": 3.046645671576264e-06, "loss": 0.13936233520507812, "step": 4697 }, { "epoch": 0.6546366613251585, "grad_norm": 0.7563351988792419, "learning_rate": 3.044476397549221e-06, "loss": 0.13006973266601562, "step": 4698 }, { "epoch": 0.6547760050163729, "grad_norm": 0.7788310647010803, "learning_rate": 3.0423075579948756e-06, "loss": 0.13813400268554688, "step": 4699 }, { "epoch": 0.6549153487075873, "grad_norm": 0.6173183917999268, "learning_rate": 3.0401391533950976e-06, "loss": 0.12200546264648438, "step": 4700 }, { "epoch": 0.6550546923988017, "grad_norm": 1.0986170768737793, "learning_rate": 3.037971184231655e-06, "loss": 0.17124176025390625, "step": 4701 }, { "epoch": 0.655194036090016, "grad_norm": 0.6567041277885437, "learning_rate": 3.035803650986222e-06, "loss": 0.11057662963867188, "step": 4702 }, { "epoch": 0.6553333797812304, "grad_norm": 1.3299741744995117, "learning_rate": 3.0336365541403723e-06, "loss": 0.15996551513671875, "step": 4703 }, { "epoch": 0.6554727234724448, "grad_norm": 1.0964702367782593, "learning_rate": 3.0314698941755886e-06, "loss": 0.13954925537109375, "step": 4704 }, { "epoch": 0.6556120671636592, "grad_norm": 0.46213364601135254, "learning_rate": 3.0293036715732527e-06, "loss": 0.09739875793457031, "step": 4705 }, { "epoch": 0.6557514108548735, "grad_norm": 0.7909055352210999, "learning_rate": 3.0271378868146494e-06, "loss": 0.13411331176757812, "step": 4706 }, { "epoch": 0.6558907545460879, "grad_norm": 1.1323027610778809, "learning_rate": 3.024972540380966e-06, "loss": 0.1454753875732422, "step": 4707 }, { "epoch": 0.6560300982373023, "grad_norm": 0.7478463053703308, "learning_rate": 3.0228076327532925e-06, "loss": 0.1429271697998047, "step": 4708 }, { "epoch": 0.6561694419285167, "grad_norm": 1.2725380659103394, "learning_rate": 3.0206431644126234e-06, "loss": 0.16881561279296875, "step": 4709 }, { "epoch": 0.656308785619731, "grad_norm": 1.1451507806777954, "learning_rate": 3.0184791358398537e-06, "loss": 0.13781356811523438, "step": 4710 }, { "epoch": 0.6564481293109454, "grad_norm": 0.5021770596504211, "learning_rate": 3.016315547515783e-06, "loss": 0.1083831787109375, "step": 4711 }, { "epoch": 0.6565874730021598, "grad_norm": 1.5814114809036255, "learning_rate": 3.0141523999211065e-06, "loss": 0.16356277465820312, "step": 4712 }, { "epoch": 0.6567268166933742, "grad_norm": 0.919849157333374, "learning_rate": 3.0119896935364305e-06, "loss": 0.13890838623046875, "step": 4713 }, { "epoch": 0.6568661603845886, "grad_norm": 1.1025187969207764, "learning_rate": 3.009827428842258e-06, "loss": 0.14367294311523438, "step": 4714 }, { "epoch": 0.6570055040758029, "grad_norm": 0.6433926820755005, "learning_rate": 3.0076656063189926e-06, "loss": 0.1263103485107422, "step": 4715 }, { "epoch": 0.6571448477670173, "grad_norm": 0.932594895362854, "learning_rate": 3.0055042264469447e-06, "loss": 0.1404590606689453, "step": 4716 }, { "epoch": 0.6572841914582317, "grad_norm": 0.9228476285934448, "learning_rate": 3.003343289706324e-06, "loss": 0.156768798828125, "step": 4717 }, { "epoch": 0.6574235351494461, "grad_norm": 0.7171561121940613, "learning_rate": 3.001182796577239e-06, "loss": 0.14138031005859375, "step": 4718 }, { "epoch": 0.6575628788406604, "grad_norm": 0.8809361457824707, "learning_rate": 2.999022747539701e-06, "loss": 0.13382530212402344, "step": 4719 }, { "epoch": 0.6577022225318748, "grad_norm": 0.5580158233642578, "learning_rate": 2.9968631430736274e-06, "loss": 0.1134796142578125, "step": 4720 }, { "epoch": 0.6578415662230892, "grad_norm": 0.6450066566467285, "learning_rate": 2.99470398365883e-06, "loss": 0.11502456665039062, "step": 4721 }, { "epoch": 0.6579809099143036, "grad_norm": 0.8891087174415588, "learning_rate": 2.9925452697750275e-06, "loss": 0.134033203125, "step": 4722 }, { "epoch": 0.658120253605518, "grad_norm": 1.667590618133545, "learning_rate": 2.990387001901834e-06, "loss": 0.18166351318359375, "step": 4723 }, { "epoch": 0.6582595972967323, "grad_norm": 0.9811928272247314, "learning_rate": 2.988229180518767e-06, "loss": 0.1135101318359375, "step": 4724 }, { "epoch": 0.6583989409879468, "grad_norm": 1.3307362794876099, "learning_rate": 2.9860718061052478e-06, "loss": 0.18591690063476562, "step": 4725 }, { "epoch": 0.6585382846791612, "grad_norm": 1.1020376682281494, "learning_rate": 2.9839148791405937e-06, "loss": 0.1350250244140625, "step": 4726 }, { "epoch": 0.6586776283703756, "grad_norm": 0.6961632370948792, "learning_rate": 2.981758400104028e-06, "loss": 0.12077713012695312, "step": 4727 }, { "epoch": 0.65881697206159, "grad_norm": 0.9706714749336243, "learning_rate": 2.979602369474667e-06, "loss": 0.14073562622070312, "step": 4728 }, { "epoch": 0.6589563157528043, "grad_norm": 1.2482408285140991, "learning_rate": 2.977446787731532e-06, "loss": 0.18415451049804688, "step": 4729 }, { "epoch": 0.6590956594440187, "grad_norm": 0.6935722231864929, "learning_rate": 2.975291655353546e-06, "loss": 0.12193679809570312, "step": 4730 }, { "epoch": 0.6592350031352331, "grad_norm": 1.4921457767486572, "learning_rate": 2.9731369728195288e-06, "loss": 0.19184494018554688, "step": 4731 }, { "epoch": 0.6593743468264475, "grad_norm": 1.7641819715499878, "learning_rate": 2.9709827406082028e-06, "loss": 0.18555068969726562, "step": 4732 }, { "epoch": 0.6595136905176618, "grad_norm": 1.2236196994781494, "learning_rate": 2.9688289591981887e-06, "loss": 0.14319610595703125, "step": 4733 }, { "epoch": 0.6596530342088762, "grad_norm": 0.7262697815895081, "learning_rate": 2.9666756290680078e-06, "loss": 0.138397216796875, "step": 4734 }, { "epoch": 0.6597923779000906, "grad_norm": 1.1238998174667358, "learning_rate": 2.964522750696079e-06, "loss": 0.17373275756835938, "step": 4735 }, { "epoch": 0.659931721591305, "grad_norm": 0.8316878080368042, "learning_rate": 2.962370324560725e-06, "loss": 0.1243143081665039, "step": 4736 }, { "epoch": 0.6600710652825194, "grad_norm": 0.7072175741195679, "learning_rate": 2.9602183511401656e-06, "loss": 0.12478256225585938, "step": 4737 }, { "epoch": 0.6602104089737337, "grad_norm": 1.2214298248291016, "learning_rate": 2.9580668309125203e-06, "loss": 0.17766189575195312, "step": 4738 }, { "epoch": 0.6603497526649481, "grad_norm": 0.8806374073028564, "learning_rate": 2.9559157643558046e-06, "loss": 0.12864303588867188, "step": 4739 }, { "epoch": 0.6604890963561625, "grad_norm": 0.9671952724456787, "learning_rate": 2.9537651519479403e-06, "loss": 0.14180374145507812, "step": 4740 }, { "epoch": 0.6606284400473769, "grad_norm": 0.8033651113510132, "learning_rate": 2.951614994166743e-06, "loss": 0.12311172485351562, "step": 4741 }, { "epoch": 0.6607677837385912, "grad_norm": 0.7036438584327698, "learning_rate": 2.9494652914899267e-06, "loss": 0.12744522094726562, "step": 4742 }, { "epoch": 0.6609071274298056, "grad_norm": 0.7412497997283936, "learning_rate": 2.947316044395112e-06, "loss": 0.14032363891601562, "step": 4743 }, { "epoch": 0.66104647112102, "grad_norm": 1.0271631479263306, "learning_rate": 2.945167253359806e-06, "loss": 0.13642120361328125, "step": 4744 }, { "epoch": 0.6611858148122344, "grad_norm": 1.084274411201477, "learning_rate": 2.943018918861424e-06, "loss": 0.1522979736328125, "step": 4745 }, { "epoch": 0.6613251585034488, "grad_norm": 0.7014951109886169, "learning_rate": 2.940871041377277e-06, "loss": 0.11790084838867188, "step": 4746 }, { "epoch": 0.6614645021946631, "grad_norm": 0.760520875453949, "learning_rate": 2.938723621384572e-06, "loss": 0.11188316345214844, "step": 4747 }, { "epoch": 0.6616038458858775, "grad_norm": 0.7107992172241211, "learning_rate": 2.936576659360421e-06, "loss": 0.13560104370117188, "step": 4748 }, { "epoch": 0.6617431895770919, "grad_norm": 1.245070457458496, "learning_rate": 2.9344301557818267e-06, "loss": 0.15518951416015625, "step": 4749 }, { "epoch": 0.6618825332683063, "grad_norm": 1.4133862257003784, "learning_rate": 2.9322841111256937e-06, "loss": 0.14091110229492188, "step": 4750 }, { "epoch": 0.6620218769595206, "grad_norm": 1.3182324171066284, "learning_rate": 2.930138525868824e-06, "loss": 0.14498519897460938, "step": 4751 }, { "epoch": 0.662161220650735, "grad_norm": 1.0796313285827637, "learning_rate": 2.927993400487919e-06, "loss": 0.13535690307617188, "step": 4752 }, { "epoch": 0.6623005643419494, "grad_norm": 0.8003983497619629, "learning_rate": 2.9258487354595754e-06, "loss": 0.14121627807617188, "step": 4753 }, { "epoch": 0.6624399080331638, "grad_norm": 1.3410711288452148, "learning_rate": 2.9237045312602908e-06, "loss": 0.14446258544921875, "step": 4754 }, { "epoch": 0.6625792517243781, "grad_norm": 0.7571313381195068, "learning_rate": 2.921560788366454e-06, "loss": 0.12185287475585938, "step": 4755 }, { "epoch": 0.6627185954155925, "grad_norm": 0.7638202905654907, "learning_rate": 2.9194175072543594e-06, "loss": 0.1329193115234375, "step": 4756 }, { "epoch": 0.6628579391068069, "grad_norm": 1.446527123451233, "learning_rate": 2.9172746884001944e-06, "loss": 0.16414642333984375, "step": 4757 }, { "epoch": 0.6629972827980213, "grad_norm": 1.5096814632415771, "learning_rate": 2.9151323322800433e-06, "loss": 0.17719268798828125, "step": 4758 }, { "epoch": 0.6631366264892357, "grad_norm": 1.2983182668685913, "learning_rate": 2.9129904393698917e-06, "loss": 0.14975929260253906, "step": 4759 }, { "epoch": 0.66327597018045, "grad_norm": 0.8768365383148193, "learning_rate": 2.910849010145617e-06, "loss": 0.14471435546875, "step": 4760 }, { "epoch": 0.6634153138716644, "grad_norm": 0.6523977518081665, "learning_rate": 2.908708045082994e-06, "loss": 0.12810516357421875, "step": 4761 }, { "epoch": 0.6635546575628788, "grad_norm": 1.2454843521118164, "learning_rate": 2.906567544657699e-06, "loss": 0.17136383056640625, "step": 4762 }, { "epoch": 0.6636940012540932, "grad_norm": 1.0625759363174438, "learning_rate": 2.9044275093453034e-06, "loss": 0.15484237670898438, "step": 4763 }, { "epoch": 0.6638333449453075, "grad_norm": 1.8046036958694458, "learning_rate": 2.902287939621272e-06, "loss": 0.1571502685546875, "step": 4764 }, { "epoch": 0.663972688636522, "grad_norm": 0.6922909617424011, "learning_rate": 2.9001488359609676e-06, "loss": 0.12500381469726562, "step": 4765 }, { "epoch": 0.6641120323277364, "grad_norm": 1.2537944316864014, "learning_rate": 2.898010198839651e-06, "loss": 0.1609630584716797, "step": 4766 }, { "epoch": 0.6642513760189508, "grad_norm": 0.9654868841171265, "learning_rate": 2.895872028732481e-06, "loss": 0.14804458618164062, "step": 4767 }, { "epoch": 0.6643907197101652, "grad_norm": 1.211026906967163, "learning_rate": 2.893734326114506e-06, "loss": 0.13835525512695312, "step": 4768 }, { "epoch": 0.6645300634013795, "grad_norm": 0.6225255727767944, "learning_rate": 2.8915970914606793e-06, "loss": 0.12233734130859375, "step": 4769 }, { "epoch": 0.6646694070925939, "grad_norm": 0.44368186593055725, "learning_rate": 2.8894603252458407e-06, "loss": 0.10792922973632812, "step": 4770 }, { "epoch": 0.6648087507838083, "grad_norm": 0.6966109275817871, "learning_rate": 2.8873240279447355e-06, "loss": 0.12547874450683594, "step": 4771 }, { "epoch": 0.6649480944750227, "grad_norm": 1.0858778953552246, "learning_rate": 2.8851882000319966e-06, "loss": 0.16184234619140625, "step": 4772 }, { "epoch": 0.665087438166237, "grad_norm": 1.6476274728775024, "learning_rate": 2.883052841982157e-06, "loss": 0.14179229736328125, "step": 4773 }, { "epoch": 0.6652267818574514, "grad_norm": 0.5537122488021851, "learning_rate": 2.8809179542696474e-06, "loss": 0.10686492919921875, "step": 4774 }, { "epoch": 0.6653661255486658, "grad_norm": 0.5765326023101807, "learning_rate": 2.878783537368789e-06, "loss": 0.09531021118164062, "step": 4775 }, { "epoch": 0.6655054692398802, "grad_norm": 1.3566216230392456, "learning_rate": 2.8766495917537985e-06, "loss": 0.13991928100585938, "step": 4776 }, { "epoch": 0.6656448129310946, "grad_norm": 0.6505550146102905, "learning_rate": 2.874516117898792e-06, "loss": 0.1285247802734375, "step": 4777 }, { "epoch": 0.6657841566223089, "grad_norm": 0.7471497654914856, "learning_rate": 2.8723831162777806e-06, "loss": 0.12810516357421875, "step": 4778 }, { "epoch": 0.6659235003135233, "grad_norm": 0.834114134311676, "learning_rate": 2.8702505873646636e-06, "loss": 0.13141632080078125, "step": 4779 }, { "epoch": 0.6660628440047377, "grad_norm": 1.0664094686508179, "learning_rate": 2.8681185316332453e-06, "loss": 0.14857864379882812, "step": 4780 }, { "epoch": 0.6662021876959521, "grad_norm": 0.623386025428772, "learning_rate": 2.865986949557218e-06, "loss": 0.10449600219726562, "step": 4781 }, { "epoch": 0.6663415313871665, "grad_norm": 1.1433465480804443, "learning_rate": 2.8638558416101683e-06, "loss": 0.1435089111328125, "step": 4782 }, { "epoch": 0.6664808750783808, "grad_norm": 0.8999923467636108, "learning_rate": 2.8617252082655813e-06, "loss": 0.13927459716796875, "step": 4783 }, { "epoch": 0.6666202187695952, "grad_norm": 0.7090534567832947, "learning_rate": 2.8595950499968352e-06, "loss": 0.137115478515625, "step": 4784 }, { "epoch": 0.6667595624608096, "grad_norm": 1.047314167022705, "learning_rate": 2.8574653672772068e-06, "loss": 0.12110519409179688, "step": 4785 }, { "epoch": 0.666898906152024, "grad_norm": 1.124475359916687, "learning_rate": 2.8553361605798545e-06, "loss": 0.14569854736328125, "step": 4786 }, { "epoch": 0.6670382498432383, "grad_norm": 0.7980071902275085, "learning_rate": 2.8532074303778446e-06, "loss": 0.12326812744140625, "step": 4787 }, { "epoch": 0.6671775935344527, "grad_norm": 1.3690193891525269, "learning_rate": 2.8510791771441327e-06, "loss": 0.17091751098632812, "step": 4788 }, { "epoch": 0.6673169372256671, "grad_norm": 0.8977006673812866, "learning_rate": 2.8489514013515656e-06, "loss": 0.10653877258300781, "step": 4789 }, { "epoch": 0.6674562809168815, "grad_norm": 1.023292899131775, "learning_rate": 2.8468241034728878e-06, "loss": 0.1319732666015625, "step": 4790 }, { "epoch": 0.6675956246080959, "grad_norm": 1.0337921380996704, "learning_rate": 2.8446972839807384e-06, "loss": 0.1414337158203125, "step": 4791 }, { "epoch": 0.6677349682993102, "grad_norm": 1.7068372964859009, "learning_rate": 2.8425709433476455e-06, "loss": 0.16914939880371094, "step": 4792 }, { "epoch": 0.6678743119905246, "grad_norm": 0.8043528199195862, "learning_rate": 2.8404450820460326e-06, "loss": 0.13151931762695312, "step": 4793 }, { "epoch": 0.668013655681739, "grad_norm": 0.7854050397872925, "learning_rate": 2.8383197005482187e-06, "loss": 0.12791824340820312, "step": 4794 }, { "epoch": 0.6681529993729534, "grad_norm": 0.7977933883666992, "learning_rate": 2.8361947993264185e-06, "loss": 0.12950897216796875, "step": 4795 }, { "epoch": 0.6682923430641677, "grad_norm": 1.3772070407867432, "learning_rate": 2.834070378852732e-06, "loss": 0.176849365234375, "step": 4796 }, { "epoch": 0.6684316867553821, "grad_norm": 1.4490901231765747, "learning_rate": 2.8319464395991567e-06, "loss": 0.167388916015625, "step": 4797 }, { "epoch": 0.6685710304465965, "grad_norm": 0.7665924429893494, "learning_rate": 2.829822982037585e-06, "loss": 0.12734222412109375, "step": 4798 }, { "epoch": 0.6687103741378109, "grad_norm": 1.0697698593139648, "learning_rate": 2.8277000066398032e-06, "loss": 0.1605072021484375, "step": 4799 }, { "epoch": 0.6688497178290252, "grad_norm": 1.0818713903427124, "learning_rate": 2.8255775138774827e-06, "loss": 0.13767242431640625, "step": 4800 }, { "epoch": 0.6689890615202396, "grad_norm": 1.6008069515228271, "learning_rate": 2.823455504222198e-06, "loss": 0.1851043701171875, "step": 4801 }, { "epoch": 0.669128405211454, "grad_norm": 1.2234381437301636, "learning_rate": 2.821333978145407e-06, "loss": 0.1504077911376953, "step": 4802 }, { "epoch": 0.6692677489026684, "grad_norm": 0.6941165328025818, "learning_rate": 2.8192129361184685e-06, "loss": 0.12724685668945312, "step": 4803 }, { "epoch": 0.6694070925938828, "grad_norm": 1.0869184732437134, "learning_rate": 2.817092378612625e-06, "loss": 0.13352203369140625, "step": 4804 }, { "epoch": 0.6695464362850972, "grad_norm": 1.533772587776184, "learning_rate": 2.814972306099018e-06, "loss": 0.13127899169921875, "step": 4805 }, { "epoch": 0.6696857799763116, "grad_norm": 1.0055516958236694, "learning_rate": 2.8128527190486823e-06, "loss": 0.16178131103515625, "step": 4806 }, { "epoch": 0.669825123667526, "grad_norm": 0.7031250596046448, "learning_rate": 2.8107336179325383e-06, "loss": 0.1218414306640625, "step": 4807 }, { "epoch": 0.6699644673587404, "grad_norm": 0.8965019583702087, "learning_rate": 2.808615003221401e-06, "loss": 0.14923858642578125, "step": 4808 }, { "epoch": 0.6701038110499548, "grad_norm": 1.0498952865600586, "learning_rate": 2.80649687538598e-06, "loss": 0.14690399169921875, "step": 4809 }, { "epoch": 0.6702431547411691, "grad_norm": 0.7830439209938049, "learning_rate": 2.8043792348968767e-06, "loss": 0.13771820068359375, "step": 4810 }, { "epoch": 0.6703824984323835, "grad_norm": 1.051305890083313, "learning_rate": 2.8022620822245782e-06, "loss": 0.12154388427734375, "step": 4811 }, { "epoch": 0.6705218421235979, "grad_norm": 0.8091933727264404, "learning_rate": 2.8001454178394715e-06, "loss": 0.15571975708007812, "step": 4812 }, { "epoch": 0.6706611858148123, "grad_norm": 1.4422303438186646, "learning_rate": 2.7980292422118282e-06, "loss": 0.15295791625976562, "step": 4813 }, { "epoch": 0.6708005295060266, "grad_norm": 1.286285161972046, "learning_rate": 2.795913555811817e-06, "loss": 0.20518875122070312, "step": 4814 }, { "epoch": 0.670939873197241, "grad_norm": 1.0542103052139282, "learning_rate": 2.793798359109492e-06, "loss": 0.16791534423828125, "step": 4815 }, { "epoch": 0.6710792168884554, "grad_norm": 0.8934426307678223, "learning_rate": 2.7916836525748024e-06, "loss": 0.13177108764648438, "step": 4816 }, { "epoch": 0.6712185605796698, "grad_norm": 1.1675974130630493, "learning_rate": 2.7895694366775934e-06, "loss": 0.15253829956054688, "step": 4817 }, { "epoch": 0.6713579042708842, "grad_norm": 0.9733559489250183, "learning_rate": 2.7874557118875863e-06, "loss": 0.14877700805664062, "step": 4818 }, { "epoch": 0.6714972479620985, "grad_norm": 1.0684139728546143, "learning_rate": 2.7853424786744068e-06, "loss": 0.16264724731445312, "step": 4819 }, { "epoch": 0.6716365916533129, "grad_norm": 0.5517742037773132, "learning_rate": 2.7832297375075685e-06, "loss": 0.11602592468261719, "step": 4820 }, { "epoch": 0.6717759353445273, "grad_norm": 0.6448459625244141, "learning_rate": 2.7811174888564713e-06, "loss": 0.12151527404785156, "step": 4821 }, { "epoch": 0.6719152790357417, "grad_norm": 1.64206862449646, "learning_rate": 2.779005733190412e-06, "loss": 0.18152618408203125, "step": 4822 }, { "epoch": 0.672054622726956, "grad_norm": 0.8501552939414978, "learning_rate": 2.7768944709785705e-06, "loss": 0.13589859008789062, "step": 4823 }, { "epoch": 0.6721939664181704, "grad_norm": 0.8846162557601929, "learning_rate": 2.774783702690025e-06, "loss": 0.14996337890625, "step": 4824 }, { "epoch": 0.6723333101093848, "grad_norm": 0.778411865234375, "learning_rate": 2.7726734287937367e-06, "loss": 0.12595748901367188, "step": 4825 }, { "epoch": 0.6724726538005992, "grad_norm": 1.03666090965271, "learning_rate": 2.770563649758562e-06, "loss": 0.17095947265625, "step": 4826 }, { "epoch": 0.6726119974918136, "grad_norm": 0.7581774592399597, "learning_rate": 2.768454366053247e-06, "loss": 0.13257217407226562, "step": 4827 }, { "epoch": 0.6727513411830279, "grad_norm": 0.8903812170028687, "learning_rate": 2.7663455781464245e-06, "loss": 0.12048721313476562, "step": 4828 }, { "epoch": 0.6728906848742423, "grad_norm": 0.69153892993927, "learning_rate": 2.764237286506618e-06, "loss": 0.11702346801757812, "step": 4829 }, { "epoch": 0.6730300285654567, "grad_norm": 0.876467764377594, "learning_rate": 2.7621294916022423e-06, "loss": 0.11858367919921875, "step": 4830 }, { "epoch": 0.6731693722566711, "grad_norm": 1.0408910512924194, "learning_rate": 2.760022193901605e-06, "loss": 0.132110595703125, "step": 4831 }, { "epoch": 0.6733087159478854, "grad_norm": 1.0639604330062866, "learning_rate": 2.7579153938728943e-06, "loss": 0.152008056640625, "step": 4832 }, { "epoch": 0.6734480596390998, "grad_norm": 1.4514654874801636, "learning_rate": 2.7558090919841972e-06, "loss": 0.17231369018554688, "step": 4833 }, { "epoch": 0.6735874033303142, "grad_norm": 0.7527730464935303, "learning_rate": 2.753703288703482e-06, "loss": 0.14171600341796875, "step": 4834 }, { "epoch": 0.6737267470215286, "grad_norm": 1.3554097414016724, "learning_rate": 2.7515979844986148e-06, "loss": 0.17087554931640625, "step": 4835 }, { "epoch": 0.673866090712743, "grad_norm": 1.3785847425460815, "learning_rate": 2.749493179837341e-06, "loss": 0.15102767944335938, "step": 4836 }, { "epoch": 0.6740054344039573, "grad_norm": 0.9863121509552002, "learning_rate": 2.747388875187303e-06, "loss": 0.1566905975341797, "step": 4837 }, { "epoch": 0.6741447780951717, "grad_norm": 1.0320574045181274, "learning_rate": 2.7452850710160305e-06, "loss": 0.11302947998046875, "step": 4838 }, { "epoch": 0.6742841217863861, "grad_norm": 0.8306481838226318, "learning_rate": 2.74318176779094e-06, "loss": 0.132598876953125, "step": 4839 }, { "epoch": 0.6744234654776005, "grad_norm": 0.7365309000015259, "learning_rate": 2.741078965979334e-06, "loss": 0.12466049194335938, "step": 4840 }, { "epoch": 0.6745628091688148, "grad_norm": 0.9148313403129578, "learning_rate": 2.7389766660484103e-06, "loss": 0.15875244140625, "step": 4841 }, { "epoch": 0.6747021528600292, "grad_norm": 1.5832868814468384, "learning_rate": 2.736874868465253e-06, "loss": 0.176177978515625, "step": 4842 }, { "epoch": 0.6748414965512436, "grad_norm": 0.9966923594474792, "learning_rate": 2.7347735736968318e-06, "loss": 0.13292694091796875, "step": 4843 }, { "epoch": 0.674980840242458, "grad_norm": 0.8575792908668518, "learning_rate": 2.7326727822100047e-06, "loss": 0.13085556030273438, "step": 4844 }, { "epoch": 0.6751201839336725, "grad_norm": 0.8557451367378235, "learning_rate": 2.7305724944715218e-06, "loss": 0.13488006591796875, "step": 4845 }, { "epoch": 0.6752595276248868, "grad_norm": 1.0697640180587769, "learning_rate": 2.72847271094802e-06, "loss": 0.13350486755371094, "step": 4846 }, { "epoch": 0.6753988713161012, "grad_norm": 1.1953274011611938, "learning_rate": 2.7263734321060198e-06, "loss": 0.13908004760742188, "step": 4847 }, { "epoch": 0.6755382150073156, "grad_norm": 0.9003778100013733, "learning_rate": 2.7242746584119364e-06, "loss": 0.139373779296875, "step": 4848 }, { "epoch": 0.67567755869853, "grad_norm": 0.9281959533691406, "learning_rate": 2.722176390332071e-06, "loss": 0.138671875, "step": 4849 }, { "epoch": 0.6758169023897443, "grad_norm": 1.0452919006347656, "learning_rate": 2.720078628332605e-06, "loss": 0.15701675415039062, "step": 4850 }, { "epoch": 0.6759562460809587, "grad_norm": 1.1004810333251953, "learning_rate": 2.7179813728796156e-06, "loss": 0.12750244140625, "step": 4851 }, { "epoch": 0.6760955897721731, "grad_norm": 0.7477563619613647, "learning_rate": 2.7158846244390657e-06, "loss": 0.14491653442382812, "step": 4852 }, { "epoch": 0.6762349334633875, "grad_norm": 0.7068290710449219, "learning_rate": 2.7137883834768076e-06, "loss": 0.12874984741210938, "step": 4853 }, { "epoch": 0.6763742771546019, "grad_norm": 0.7345033884048462, "learning_rate": 2.7116926504585756e-06, "loss": 0.11971664428710938, "step": 4854 }, { "epoch": 0.6765136208458162, "grad_norm": 0.7586578130722046, "learning_rate": 2.7095974258499914e-06, "loss": 0.13998031616210938, "step": 4855 }, { "epoch": 0.6766529645370306, "grad_norm": 0.8541437387466431, "learning_rate": 2.7075027101165706e-06, "loss": 0.11832809448242188, "step": 4856 }, { "epoch": 0.676792308228245, "grad_norm": 0.6037557721138, "learning_rate": 2.7054085037237066e-06, "loss": 0.10429954528808594, "step": 4857 }, { "epoch": 0.6769316519194594, "grad_norm": 0.8718892335891724, "learning_rate": 2.7033148071366866e-06, "loss": 0.17212295532226562, "step": 4858 }, { "epoch": 0.6770709956106737, "grad_norm": 1.3855749368667603, "learning_rate": 2.701221620820685e-06, "loss": 0.15555953979492188, "step": 4859 }, { "epoch": 0.6772103393018881, "grad_norm": 1.1494282484054565, "learning_rate": 2.6991289452407564e-06, "loss": 0.14296340942382812, "step": 4860 }, { "epoch": 0.6773496829931025, "grad_norm": 1.0079227685928345, "learning_rate": 2.697036780861845e-06, "loss": 0.1479034423828125, "step": 4861 }, { "epoch": 0.6774890266843169, "grad_norm": 0.9260138273239136, "learning_rate": 2.694945128148784e-06, "loss": 0.13546371459960938, "step": 4862 }, { "epoch": 0.6776283703755313, "grad_norm": 0.9135720729827881, "learning_rate": 2.692853987566291e-06, "loss": 0.1395721435546875, "step": 4863 }, { "epoch": 0.6777677140667456, "grad_norm": 0.9416045546531677, "learning_rate": 2.690763359578969e-06, "loss": 0.15875816345214844, "step": 4864 }, { "epoch": 0.67790705775796, "grad_norm": 1.1640193462371826, "learning_rate": 2.6886732446513066e-06, "loss": 0.19738388061523438, "step": 4865 }, { "epoch": 0.6780464014491744, "grad_norm": 0.8301196098327637, "learning_rate": 2.68658364324768e-06, "loss": 0.12771224975585938, "step": 4866 }, { "epoch": 0.6781857451403888, "grad_norm": 0.8004889488220215, "learning_rate": 2.684494555832353e-06, "loss": 0.12302017211914062, "step": 4867 }, { "epoch": 0.6783250888316031, "grad_norm": 1.536137580871582, "learning_rate": 2.6824059828694715e-06, "loss": 0.16259002685546875, "step": 4868 }, { "epoch": 0.6784644325228175, "grad_norm": 0.7274228930473328, "learning_rate": 2.680317924823068e-06, "loss": 0.12868499755859375, "step": 4869 }, { "epoch": 0.6786037762140319, "grad_norm": 1.254258155822754, "learning_rate": 2.6782303821570644e-06, "loss": 0.1492919921875, "step": 4870 }, { "epoch": 0.6787431199052463, "grad_norm": 0.8979316353797913, "learning_rate": 2.676143355335263e-06, "loss": 0.1490154266357422, "step": 4871 }, { "epoch": 0.6788824635964607, "grad_norm": 0.8711488842964172, "learning_rate": 2.6740568448213523e-06, "loss": 0.14182662963867188, "step": 4872 }, { "epoch": 0.679021807287675, "grad_norm": 1.3001313209533691, "learning_rate": 2.6719708510789077e-06, "loss": 0.14596176147460938, "step": 4873 }, { "epoch": 0.6791611509788894, "grad_norm": 1.0839247703552246, "learning_rate": 2.669885374571392e-06, "loss": 0.17902374267578125, "step": 4874 }, { "epoch": 0.6793004946701038, "grad_norm": 1.2412389516830444, "learning_rate": 2.667800415762149e-06, "loss": 0.17812728881835938, "step": 4875 }, { "epoch": 0.6794398383613182, "grad_norm": 0.7102017998695374, "learning_rate": 2.665715975114407e-06, "loss": 0.135711669921875, "step": 4876 }, { "epoch": 0.6795791820525325, "grad_norm": 0.7231717109680176, "learning_rate": 2.6636320530912817e-06, "loss": 0.12981796264648438, "step": 4877 }, { "epoch": 0.6797185257437469, "grad_norm": 1.1273397207260132, "learning_rate": 2.6615486501557765e-06, "loss": 0.150115966796875, "step": 4878 }, { "epoch": 0.6798578694349613, "grad_norm": 1.7414577007293701, "learning_rate": 2.659465766770772e-06, "loss": 0.18136978149414062, "step": 4879 }, { "epoch": 0.6799972131261757, "grad_norm": 0.9030622839927673, "learning_rate": 2.6573834033990404e-06, "loss": 0.12305831909179688, "step": 4880 }, { "epoch": 0.68013655681739, "grad_norm": 1.2710869312286377, "learning_rate": 2.655301560503234e-06, "loss": 0.14019012451171875, "step": 4881 }, { "epoch": 0.6802759005086044, "grad_norm": 0.7992080450057983, "learning_rate": 2.6532202385458875e-06, "loss": 0.13162994384765625, "step": 4882 }, { "epoch": 0.6804152441998188, "grad_norm": 1.5759022235870361, "learning_rate": 2.6511394379894274e-06, "loss": 0.14111709594726562, "step": 4883 }, { "epoch": 0.6805545878910332, "grad_norm": 1.468508005142212, "learning_rate": 2.649059159296158e-06, "loss": 0.1533355712890625, "step": 4884 }, { "epoch": 0.6806939315822477, "grad_norm": 1.7601717710494995, "learning_rate": 2.6469794029282726e-06, "loss": 0.15222549438476562, "step": 4885 }, { "epoch": 0.680833275273462, "grad_norm": 0.8627062439918518, "learning_rate": 2.6449001693478438e-06, "loss": 0.11742782592773438, "step": 4886 }, { "epoch": 0.6809726189646764, "grad_norm": 1.6279237270355225, "learning_rate": 2.642821459016827e-06, "loss": 0.180206298828125, "step": 4887 }, { "epoch": 0.6811119626558908, "grad_norm": 0.7447090148925781, "learning_rate": 2.6407432723970694e-06, "loss": 0.13092994689941406, "step": 4888 }, { "epoch": 0.6812513063471052, "grad_norm": 0.807962954044342, "learning_rate": 2.6386656099502917e-06, "loss": 0.13762664794921875, "step": 4889 }, { "epoch": 0.6813906500383196, "grad_norm": 0.5996717810630798, "learning_rate": 2.6365884721381045e-06, "loss": 0.10415267944335938, "step": 4890 }, { "epoch": 0.6815299937295339, "grad_norm": 0.6426937580108643, "learning_rate": 2.6345118594220044e-06, "loss": 0.13682174682617188, "step": 4891 }, { "epoch": 0.6816693374207483, "grad_norm": 0.5560246706008911, "learning_rate": 2.632435772263363e-06, "loss": 0.11186027526855469, "step": 4892 }, { "epoch": 0.6818086811119627, "grad_norm": 1.122844934463501, "learning_rate": 2.6303602111234394e-06, "loss": 0.15610122680664062, "step": 4893 }, { "epoch": 0.6819480248031771, "grad_norm": 1.3866939544677734, "learning_rate": 2.6282851764633765e-06, "loss": 0.142578125, "step": 4894 }, { "epoch": 0.6820873684943914, "grad_norm": 0.785469651222229, "learning_rate": 2.626210668744203e-06, "loss": 0.14011764526367188, "step": 4895 }, { "epoch": 0.6822267121856058, "grad_norm": 0.817403256893158, "learning_rate": 2.624136688426824e-06, "loss": 0.12155342102050781, "step": 4896 }, { "epoch": 0.6823660558768202, "grad_norm": 1.042198896408081, "learning_rate": 2.6220632359720287e-06, "loss": 0.151031494140625, "step": 4897 }, { "epoch": 0.6825053995680346, "grad_norm": 1.1358251571655273, "learning_rate": 2.6199903118404934e-06, "loss": 0.13682937622070312, "step": 4898 }, { "epoch": 0.682644743259249, "grad_norm": 0.5989206433296204, "learning_rate": 2.617917916492776e-06, "loss": 0.11905670166015625, "step": 4899 }, { "epoch": 0.6827840869504633, "grad_norm": 1.6418336629867554, "learning_rate": 2.615846050389312e-06, "loss": 0.16501426696777344, "step": 4900 }, { "epoch": 0.6829234306416777, "grad_norm": 0.659949541091919, "learning_rate": 2.6137747139904262e-06, "loss": 0.12363052368164062, "step": 4901 }, { "epoch": 0.6830627743328921, "grad_norm": 1.0322370529174805, "learning_rate": 2.611703907756319e-06, "loss": 0.1642303466796875, "step": 4902 }, { "epoch": 0.6832021180241065, "grad_norm": 1.169925332069397, "learning_rate": 2.6096336321470796e-06, "loss": 0.13419723510742188, "step": 4903 }, { "epoch": 0.6833414617153208, "grad_norm": 0.9620140790939331, "learning_rate": 2.6075638876226715e-06, "loss": 0.12456130981445312, "step": 4904 }, { "epoch": 0.6834808054065352, "grad_norm": 0.7708479762077332, "learning_rate": 2.605494674642948e-06, "loss": 0.11800003051757812, "step": 4905 }, { "epoch": 0.6836201490977496, "grad_norm": 1.2840309143066406, "learning_rate": 2.603425993667642e-06, "loss": 0.1695556640625, "step": 4906 }, { "epoch": 0.683759492788964, "grad_norm": 0.8435967564582825, "learning_rate": 2.6013578451563653e-06, "loss": 0.13588333129882812, "step": 4907 }, { "epoch": 0.6838988364801784, "grad_norm": 0.797077476978302, "learning_rate": 2.599290229568612e-06, "loss": 0.1113739013671875, "step": 4908 }, { "epoch": 0.6840381801713927, "grad_norm": 0.8754479885101318, "learning_rate": 2.59722314736376e-06, "loss": 0.13492584228515625, "step": 4909 }, { "epoch": 0.6841775238626071, "grad_norm": 1.1522444486618042, "learning_rate": 2.5951565990010706e-06, "loss": 0.15407562255859375, "step": 4910 }, { "epoch": 0.6843168675538215, "grad_norm": 0.8777978420257568, "learning_rate": 2.5930905849396792e-06, "loss": 0.11028671264648438, "step": 4911 }, { "epoch": 0.6844562112450359, "grad_norm": 0.7785833477973938, "learning_rate": 2.5910251056386113e-06, "loss": 0.1285858154296875, "step": 4912 }, { "epoch": 0.6845955549362502, "grad_norm": 1.1803466081619263, "learning_rate": 2.5889601615567657e-06, "loss": 0.2041473388671875, "step": 4913 }, { "epoch": 0.6847348986274646, "grad_norm": 0.5495147705078125, "learning_rate": 2.5868957531529283e-06, "loss": 0.1167755126953125, "step": 4914 }, { "epoch": 0.684874242318679, "grad_norm": 1.5276196002960205, "learning_rate": 2.584831880885761e-06, "loss": 0.18605422973632812, "step": 4915 }, { "epoch": 0.6850135860098934, "grad_norm": 1.2034319639205933, "learning_rate": 2.582768545213811e-06, "loss": 0.17383956909179688, "step": 4916 }, { "epoch": 0.6851529297011077, "grad_norm": 0.8506329655647278, "learning_rate": 2.5807057465955065e-06, "loss": 0.13376998901367188, "step": 4917 }, { "epoch": 0.6852922733923221, "grad_norm": 1.0499674081802368, "learning_rate": 2.5786434854891482e-06, "loss": 0.13679885864257812, "step": 4918 }, { "epoch": 0.6854316170835365, "grad_norm": 0.7103410363197327, "learning_rate": 2.576581762352928e-06, "loss": 0.115936279296875, "step": 4919 }, { "epoch": 0.6855709607747509, "grad_norm": 1.356954574584961, "learning_rate": 2.574520577644913e-06, "loss": 0.12882232666015625, "step": 4920 }, { "epoch": 0.6857103044659653, "grad_norm": 1.1496392488479614, "learning_rate": 2.5724599318230504e-06, "loss": 0.136260986328125, "step": 4921 }, { "epoch": 0.6858496481571796, "grad_norm": 0.5500063896179199, "learning_rate": 2.570399825345169e-06, "loss": 0.11022186279296875, "step": 4922 }, { "epoch": 0.685988991848394, "grad_norm": 0.7450648546218872, "learning_rate": 2.5683402586689788e-06, "loss": 0.13725662231445312, "step": 4923 }, { "epoch": 0.6861283355396084, "grad_norm": 1.076427936553955, "learning_rate": 2.566281232252068e-06, "loss": 0.13554763793945312, "step": 4924 }, { "epoch": 0.6862676792308228, "grad_norm": 1.2069718837738037, "learning_rate": 2.564222746551903e-06, "loss": 0.1483306884765625, "step": 4925 }, { "epoch": 0.6864070229220373, "grad_norm": 1.1796519756317139, "learning_rate": 2.562164802025834e-06, "loss": 0.15549087524414062, "step": 4926 }, { "epoch": 0.6865463666132516, "grad_norm": 1.4412415027618408, "learning_rate": 2.5601073991310903e-06, "loss": 0.15927505493164062, "step": 4927 }, { "epoch": 0.686685710304466, "grad_norm": 1.0669372081756592, "learning_rate": 2.5580505383247796e-06, "loss": 0.14390182495117188, "step": 4928 }, { "epoch": 0.6868250539956804, "grad_norm": 0.9983933568000793, "learning_rate": 2.5559942200638866e-06, "loss": 0.14926528930664062, "step": 4929 }, { "epoch": 0.6869643976868948, "grad_norm": 0.9265382885932922, "learning_rate": 2.5539384448052797e-06, "loss": 0.12126922607421875, "step": 4930 }, { "epoch": 0.6871037413781091, "grad_norm": 0.6886403560638428, "learning_rate": 2.5518832130057082e-06, "loss": 0.13386154174804688, "step": 4931 }, { "epoch": 0.6872430850693235, "grad_norm": 1.2452744245529175, "learning_rate": 2.5498285251217938e-06, "loss": 0.14776992797851562, "step": 4932 }, { "epoch": 0.6873824287605379, "grad_norm": 1.3198052644729614, "learning_rate": 2.5477743816100443e-06, "loss": 0.17497634887695312, "step": 4933 }, { "epoch": 0.6875217724517523, "grad_norm": 1.11469566822052, "learning_rate": 2.5457207829268394e-06, "loss": 0.13323211669921875, "step": 4934 }, { "epoch": 0.6876611161429667, "grad_norm": 0.9507842659950256, "learning_rate": 2.5436677295284474e-06, "loss": 0.1624603271484375, "step": 4935 }, { "epoch": 0.687800459834181, "grad_norm": 0.853307843208313, "learning_rate": 2.5416152218710044e-06, "loss": 0.13195037841796875, "step": 4936 }, { "epoch": 0.6879398035253954, "grad_norm": 1.0793448686599731, "learning_rate": 2.539563260410533e-06, "loss": 0.15134429931640625, "step": 4937 }, { "epoch": 0.6880791472166098, "grad_norm": 1.0662060976028442, "learning_rate": 2.5375118456029345e-06, "loss": 0.16236114501953125, "step": 4938 }, { "epoch": 0.6882184909078242, "grad_norm": 1.9524327516555786, "learning_rate": 2.5354609779039844e-06, "loss": 0.1748504638671875, "step": 4939 }, { "epoch": 0.6883578345990385, "grad_norm": 1.060794472694397, "learning_rate": 2.533410657769337e-06, "loss": 0.15528488159179688, "step": 4940 }, { "epoch": 0.6884971782902529, "grad_norm": 1.5030192136764526, "learning_rate": 2.531360885654528e-06, "loss": 0.15590286254882812, "step": 4941 }, { "epoch": 0.6886365219814673, "grad_norm": 0.8003371357917786, "learning_rate": 2.529311662014972e-06, "loss": 0.14564895629882812, "step": 4942 }, { "epoch": 0.6887758656726817, "grad_norm": 1.3064724206924438, "learning_rate": 2.5272629873059564e-06, "loss": 0.13517189025878906, "step": 4943 }, { "epoch": 0.688915209363896, "grad_norm": 0.9544034600257874, "learning_rate": 2.5252148619826535e-06, "loss": 0.10291862487792969, "step": 4944 }, { "epoch": 0.6890545530551104, "grad_norm": 0.9987162947654724, "learning_rate": 2.5231672865001056e-06, "loss": 0.14873504638671875, "step": 4945 }, { "epoch": 0.6891938967463248, "grad_norm": 0.5679595470428467, "learning_rate": 2.5211202613132413e-06, "loss": 0.10900497436523438, "step": 4946 }, { "epoch": 0.6893332404375392, "grad_norm": 0.949765145778656, "learning_rate": 2.5190737868768592e-06, "loss": 0.16104888916015625, "step": 4947 }, { "epoch": 0.6894725841287536, "grad_norm": 1.1591581106185913, "learning_rate": 2.5170278636456413e-06, "loss": 0.15194320678710938, "step": 4948 }, { "epoch": 0.6896119278199679, "grad_norm": 0.7437825202941895, "learning_rate": 2.5149824920741493e-06, "loss": 0.12543487548828125, "step": 4949 }, { "epoch": 0.6897512715111823, "grad_norm": 0.8225600719451904, "learning_rate": 2.51293767261681e-06, "loss": 0.13479232788085938, "step": 4950 }, { "epoch": 0.6898906152023967, "grad_norm": 0.9022487998008728, "learning_rate": 2.5108934057279376e-06, "loss": 0.1535797119140625, "step": 4951 }, { "epoch": 0.6900299588936111, "grad_norm": 0.6255491375923157, "learning_rate": 2.5088496918617243e-06, "loss": 0.11224937438964844, "step": 4952 }, { "epoch": 0.6901693025848255, "grad_norm": 1.0793343782424927, "learning_rate": 2.5068065314722378e-06, "loss": 0.1174163818359375, "step": 4953 }, { "epoch": 0.6903086462760398, "grad_norm": 0.6398220658302307, "learning_rate": 2.504763925013419e-06, "loss": 0.11885452270507812, "step": 4954 }, { "epoch": 0.6904479899672542, "grad_norm": 0.7904298305511475, "learning_rate": 2.5027218729390867e-06, "loss": 0.14501380920410156, "step": 4955 }, { "epoch": 0.6905873336584686, "grad_norm": 0.9366775751113892, "learning_rate": 2.500680375702943e-06, "loss": 0.147674560546875, "step": 4956 }, { "epoch": 0.690726677349683, "grad_norm": 0.7256610989570618, "learning_rate": 2.498639433758557e-06, "loss": 0.12739181518554688, "step": 4957 }, { "epoch": 0.6908660210408973, "grad_norm": 0.6311099529266357, "learning_rate": 2.4965990475593814e-06, "loss": 0.10406875610351562, "step": 4958 }, { "epoch": 0.6910053647321117, "grad_norm": 0.9438827037811279, "learning_rate": 2.494559217558746e-06, "loss": 0.15032196044921875, "step": 4959 }, { "epoch": 0.6911447084233261, "grad_norm": 0.567787766456604, "learning_rate": 2.492519944209853e-06, "loss": 0.11235809326171875, "step": 4960 }, { "epoch": 0.6912840521145405, "grad_norm": 1.424391269683838, "learning_rate": 2.4904812279657792e-06, "loss": 0.14121246337890625, "step": 4961 }, { "epoch": 0.6914233958057548, "grad_norm": 1.623247504234314, "learning_rate": 2.488443069279483e-06, "loss": 0.16969680786132812, "step": 4962 }, { "epoch": 0.6915627394969692, "grad_norm": 1.0763906240463257, "learning_rate": 2.4864054686037993e-06, "loss": 0.1378936767578125, "step": 4963 }, { "epoch": 0.6917020831881836, "grad_norm": 0.7937214374542236, "learning_rate": 2.484368426391432e-06, "loss": 0.10640335083007812, "step": 4964 }, { "epoch": 0.691841426879398, "grad_norm": 1.1331290006637573, "learning_rate": 2.482331943094969e-06, "loss": 0.1536846160888672, "step": 4965 }, { "epoch": 0.6919807705706125, "grad_norm": 0.991424560546875, "learning_rate": 2.480296019166868e-06, "loss": 0.15111541748046875, "step": 4966 }, { "epoch": 0.6921201142618268, "grad_norm": 0.4870913624763489, "learning_rate": 2.478260655059467e-06, "loss": 0.1071319580078125, "step": 4967 }, { "epoch": 0.6922594579530412, "grad_norm": 0.9498189687728882, "learning_rate": 2.4762258512249745e-06, "loss": 0.16797637939453125, "step": 4968 }, { "epoch": 0.6923988016442556, "grad_norm": 0.5488287210464478, "learning_rate": 2.4741916081154786e-06, "loss": 0.1007843017578125, "step": 4969 }, { "epoch": 0.69253814533547, "grad_norm": 1.0795893669128418, "learning_rate": 2.472157926182945e-06, "loss": 0.13143157958984375, "step": 4970 }, { "epoch": 0.6926774890266844, "grad_norm": 0.7929447889328003, "learning_rate": 2.470124805879208e-06, "loss": 0.12477493286132812, "step": 4971 }, { "epoch": 0.6928168327178987, "grad_norm": 0.8789539337158203, "learning_rate": 2.468092247655979e-06, "loss": 0.147308349609375, "step": 4972 }, { "epoch": 0.6929561764091131, "grad_norm": 1.114202618598938, "learning_rate": 2.466060251964848e-06, "loss": 0.15413856506347656, "step": 4973 }, { "epoch": 0.6930955201003275, "grad_norm": 1.0999780893325806, "learning_rate": 2.464028819257281e-06, "loss": 0.1390533447265625, "step": 4974 }, { "epoch": 0.6932348637915419, "grad_norm": 0.9974029660224915, "learning_rate": 2.4619979499846127e-06, "loss": 0.15604019165039062, "step": 4975 }, { "epoch": 0.6933742074827562, "grad_norm": 0.7390916347503662, "learning_rate": 2.459967644598054e-06, "loss": 0.12201309204101562, "step": 4976 }, { "epoch": 0.6935135511739706, "grad_norm": 0.7209645509719849, "learning_rate": 2.457937903548695e-06, "loss": 0.14775848388671875, "step": 4977 }, { "epoch": 0.693652894865185, "grad_norm": 0.8222251534461975, "learning_rate": 2.4559087272875e-06, "loss": 0.14178848266601562, "step": 4978 }, { "epoch": 0.6937922385563994, "grad_norm": 0.8255000710487366, "learning_rate": 2.4538801162653002e-06, "loss": 0.10725784301757812, "step": 4979 }, { "epoch": 0.6939315822476138, "grad_norm": 1.103493332862854, "learning_rate": 2.451852070932811e-06, "loss": 0.14939117431640625, "step": 4980 }, { "epoch": 0.6940709259388281, "grad_norm": 0.7036241292953491, "learning_rate": 2.4498245917406195e-06, "loss": 0.123565673828125, "step": 4981 }, { "epoch": 0.6942102696300425, "grad_norm": 0.8655698895454407, "learning_rate": 2.4477976791391784e-06, "loss": 0.12232017517089844, "step": 4982 }, { "epoch": 0.6943496133212569, "grad_norm": 0.9969395399093628, "learning_rate": 2.445771333578825e-06, "loss": 0.15218734741210938, "step": 4983 }, { "epoch": 0.6944889570124713, "grad_norm": 0.9298047423362732, "learning_rate": 2.443745555509768e-06, "loss": 0.14685821533203125, "step": 4984 }, { "epoch": 0.6946283007036856, "grad_norm": 0.7355666756629944, "learning_rate": 2.4417203453820892e-06, "loss": 0.13199234008789062, "step": 4985 }, { "epoch": 0.6947676443949, "grad_norm": 0.8111489415168762, "learning_rate": 2.4396957036457443e-06, "loss": 0.13660430908203125, "step": 4986 }, { "epoch": 0.6949069880861144, "grad_norm": 0.8632299900054932, "learning_rate": 2.437671630750558e-06, "loss": 0.13474273681640625, "step": 4987 }, { "epoch": 0.6950463317773288, "grad_norm": 0.8964822888374329, "learning_rate": 2.4356481271462396e-06, "loss": 0.13622665405273438, "step": 4988 }, { "epoch": 0.6951856754685432, "grad_norm": 0.7690643668174744, "learning_rate": 2.4336251932823594e-06, "loss": 0.11679458618164062, "step": 4989 }, { "epoch": 0.6953250191597575, "grad_norm": 0.9778768420219421, "learning_rate": 2.4316028296083705e-06, "loss": 0.15638351440429688, "step": 4990 }, { "epoch": 0.6954643628509719, "grad_norm": 0.8133928775787354, "learning_rate": 2.4295810365735974e-06, "loss": 0.12699508666992188, "step": 4991 }, { "epoch": 0.6956037065421863, "grad_norm": 0.6893616318702698, "learning_rate": 2.427559814627234e-06, "loss": 0.128387451171875, "step": 4992 }, { "epoch": 0.6957430502334007, "grad_norm": 1.073096513748169, "learning_rate": 2.425539164218348e-06, "loss": 0.14759063720703125, "step": 4993 }, { "epoch": 0.695882393924615, "grad_norm": 0.8200150728225708, "learning_rate": 2.4235190857958834e-06, "loss": 0.1436176300048828, "step": 4994 }, { "epoch": 0.6960217376158294, "grad_norm": 1.2475833892822266, "learning_rate": 2.4214995798086584e-06, "loss": 0.14203643798828125, "step": 4995 }, { "epoch": 0.6961610813070438, "grad_norm": 2.949618101119995, "learning_rate": 2.4194806467053584e-06, "loss": 0.22119522094726562, "step": 4996 }, { "epoch": 0.6963004249982582, "grad_norm": 1.1486213207244873, "learning_rate": 2.417462286934543e-06, "loss": 0.14740371704101562, "step": 4997 }, { "epoch": 0.6964397686894725, "grad_norm": 0.5052714943885803, "learning_rate": 2.4154445009446457e-06, "loss": 0.11095046997070312, "step": 4998 }, { "epoch": 0.6965791123806869, "grad_norm": 1.2506330013275146, "learning_rate": 2.413427289183977e-06, "loss": 0.15855026245117188, "step": 4999 }, { "epoch": 0.6967184560719013, "grad_norm": 1.1108930110931396, "learning_rate": 2.41141065210071e-06, "loss": 0.1729564666748047, "step": 5000 }, { "epoch": 0.6968577997631157, "grad_norm": 1.3099013566970825, "learning_rate": 2.4093945901428977e-06, "loss": 0.169769287109375, "step": 5001 }, { "epoch": 0.6969971434543301, "grad_norm": 0.6934844255447388, "learning_rate": 2.4073791037584648e-06, "loss": 0.11222267150878906, "step": 5002 }, { "epoch": 0.6971364871455444, "grad_norm": 0.6647637486457825, "learning_rate": 2.4053641933952043e-06, "loss": 0.12105560302734375, "step": 5003 }, { "epoch": 0.6972758308367588, "grad_norm": 1.032873511314392, "learning_rate": 2.403349859500782e-06, "loss": 0.15526580810546875, "step": 5004 }, { "epoch": 0.6974151745279732, "grad_norm": 0.7353174090385437, "learning_rate": 2.4013361025227384e-06, "loss": 0.12479019165039062, "step": 5005 }, { "epoch": 0.6975545182191877, "grad_norm": 1.2012989521026611, "learning_rate": 2.3993229229084856e-06, "loss": 0.140869140625, "step": 5006 }, { "epoch": 0.6976938619104021, "grad_norm": 0.6585250496864319, "learning_rate": 2.3973103211053052e-06, "loss": 0.12143611907958984, "step": 5007 }, { "epoch": 0.6978332056016164, "grad_norm": 0.889302670955658, "learning_rate": 2.3952982975603494e-06, "loss": 0.14420700073242188, "step": 5008 }, { "epoch": 0.6979725492928308, "grad_norm": 0.7426949143409729, "learning_rate": 2.393286852720645e-06, "loss": 0.14504623413085938, "step": 5009 }, { "epoch": 0.6981118929840452, "grad_norm": 0.7920369505882263, "learning_rate": 2.391275987033092e-06, "loss": 0.1347522735595703, "step": 5010 }, { "epoch": 0.6982512366752596, "grad_norm": 0.8955190181732178, "learning_rate": 2.3892657009444543e-06, "loss": 0.1186676025390625, "step": 5011 }, { "epoch": 0.698390580366474, "grad_norm": 0.629004180431366, "learning_rate": 2.387255994901376e-06, "loss": 0.1265544891357422, "step": 5012 }, { "epoch": 0.6985299240576883, "grad_norm": 1.1166006326675415, "learning_rate": 2.3852468693503635e-06, "loss": 0.13535690307617188, "step": 5013 }, { "epoch": 0.6986692677489027, "grad_norm": 0.7074090242385864, "learning_rate": 2.3832383247378025e-06, "loss": 0.13366317749023438, "step": 5014 }, { "epoch": 0.6988086114401171, "grad_norm": 0.7461395263671875, "learning_rate": 2.3812303615099423e-06, "loss": 0.13272476196289062, "step": 5015 }, { "epoch": 0.6989479551313315, "grad_norm": 1.061367154121399, "learning_rate": 2.3792229801129086e-06, "loss": 0.14260482788085938, "step": 5016 }, { "epoch": 0.6990872988225458, "grad_norm": 1.1806974411010742, "learning_rate": 2.3772161809926973e-06, "loss": 0.1359405517578125, "step": 5017 }, { "epoch": 0.6992266425137602, "grad_norm": 1.219419002532959, "learning_rate": 2.375209964595171e-06, "loss": 0.12677955627441406, "step": 5018 }, { "epoch": 0.6993659862049746, "grad_norm": 1.1683320999145508, "learning_rate": 2.373204331366064e-06, "loss": 0.15022659301757812, "step": 5019 }, { "epoch": 0.699505329896189, "grad_norm": 1.2759826183319092, "learning_rate": 2.3711992817509854e-06, "loss": 0.15433502197265625, "step": 5020 }, { "epoch": 0.6996446735874033, "grad_norm": 0.7240054607391357, "learning_rate": 2.3691948161954083e-06, "loss": 0.11920547485351562, "step": 5021 }, { "epoch": 0.6997840172786177, "grad_norm": 0.7206324338912964, "learning_rate": 2.3671909351446802e-06, "loss": 0.10867118835449219, "step": 5022 }, { "epoch": 0.6999233609698321, "grad_norm": 1.6782106161117554, "learning_rate": 2.365187639044021e-06, "loss": 0.143890380859375, "step": 5023 }, { "epoch": 0.7000627046610465, "grad_norm": 1.07645845413208, "learning_rate": 2.363184928338514e-06, "loss": 0.12828445434570312, "step": 5024 }, { "epoch": 0.7002020483522609, "grad_norm": 1.2328886985778809, "learning_rate": 2.3611828034731144e-06, "loss": 0.15903472900390625, "step": 5025 }, { "epoch": 0.7003413920434752, "grad_norm": 1.7795379161834717, "learning_rate": 2.359181264892651e-06, "loss": 0.14681625366210938, "step": 5026 }, { "epoch": 0.7004807357346896, "grad_norm": 1.2958163022994995, "learning_rate": 2.3571803130418215e-06, "loss": 0.1727752685546875, "step": 5027 }, { "epoch": 0.700620079425904, "grad_norm": 1.7724329233169556, "learning_rate": 2.3551799483651894e-06, "loss": 0.19740676879882812, "step": 5028 }, { "epoch": 0.7007594231171184, "grad_norm": 1.5029758214950562, "learning_rate": 2.3531801713071887e-06, "loss": 0.16904449462890625, "step": 5029 }, { "epoch": 0.7008987668083327, "grad_norm": 1.0820518732070923, "learning_rate": 2.351180982312127e-06, "loss": 0.12956619262695312, "step": 5030 }, { "epoch": 0.7010381104995471, "grad_norm": 0.9820839762687683, "learning_rate": 2.349182381824178e-06, "loss": 0.15063095092773438, "step": 5031 }, { "epoch": 0.7011774541907615, "grad_norm": 0.7649637460708618, "learning_rate": 2.3471843702873835e-06, "loss": 0.1167449951171875, "step": 5032 }, { "epoch": 0.7013167978819759, "grad_norm": 0.7732885479927063, "learning_rate": 2.345186948145659e-06, "loss": 0.13623046875, "step": 5033 }, { "epoch": 0.7014561415731903, "grad_norm": 0.8955416679382324, "learning_rate": 2.343190115842782e-06, "loss": 0.15100479125976562, "step": 5034 }, { "epoch": 0.7015954852644046, "grad_norm": 1.514868140220642, "learning_rate": 2.341193873822407e-06, "loss": 0.14361190795898438, "step": 5035 }, { "epoch": 0.701734828955619, "grad_norm": 0.6224585175514221, "learning_rate": 2.33919822252805e-06, "loss": 0.12139892578125, "step": 5036 }, { "epoch": 0.7018741726468334, "grad_norm": 1.5144842863082886, "learning_rate": 2.337203162403101e-06, "loss": 0.1591033935546875, "step": 5037 }, { "epoch": 0.7020135163380478, "grad_norm": 0.9729025959968567, "learning_rate": 2.335208693890819e-06, "loss": 0.15290451049804688, "step": 5038 }, { "epoch": 0.7021528600292621, "grad_norm": 0.829496443271637, "learning_rate": 2.3332148174343257e-06, "loss": 0.15143203735351562, "step": 5039 }, { "epoch": 0.7022922037204765, "grad_norm": 0.826005220413208, "learning_rate": 2.331221533476615e-06, "loss": 0.12253952026367188, "step": 5040 }, { "epoch": 0.7024315474116909, "grad_norm": 0.7258819937705994, "learning_rate": 2.3292288424605503e-06, "loss": 0.12129402160644531, "step": 5041 }, { "epoch": 0.7025708911029053, "grad_norm": 0.7145364284515381, "learning_rate": 2.327236744828864e-06, "loss": 0.13269805908203125, "step": 5042 }, { "epoch": 0.7027102347941196, "grad_norm": 0.8335204124450684, "learning_rate": 2.325245241024151e-06, "loss": 0.13945388793945312, "step": 5043 }, { "epoch": 0.702849578485334, "grad_norm": 0.7638847827911377, "learning_rate": 2.323254331488881e-06, "loss": 0.12656784057617188, "step": 5044 }, { "epoch": 0.7029889221765484, "grad_norm": 1.0719671249389648, "learning_rate": 2.3212640166653868e-06, "loss": 0.14485931396484375, "step": 5045 }, { "epoch": 0.7031282658677629, "grad_norm": 0.6465599536895752, "learning_rate": 2.319274296995872e-06, "loss": 0.11672592163085938, "step": 5046 }, { "epoch": 0.7032676095589773, "grad_norm": 0.929014265537262, "learning_rate": 2.3172851729224056e-06, "loss": 0.13402175903320312, "step": 5047 }, { "epoch": 0.7034069532501916, "grad_norm": 1.0862168073654175, "learning_rate": 2.315296644886926e-06, "loss": 0.13292312622070312, "step": 5048 }, { "epoch": 0.703546296941406, "grad_norm": 1.0365689992904663, "learning_rate": 2.313308713331242e-06, "loss": 0.18148422241210938, "step": 5049 }, { "epoch": 0.7036856406326204, "grad_norm": 1.6554293632507324, "learning_rate": 2.3113213786970205e-06, "loss": 0.15491867065429688, "step": 5050 }, { "epoch": 0.7038249843238348, "grad_norm": 1.552561640739441, "learning_rate": 2.3093346414258054e-06, "loss": 0.1630401611328125, "step": 5051 }, { "epoch": 0.7039643280150492, "grad_norm": 0.9150753021240234, "learning_rate": 2.3073485019590043e-06, "loss": 0.16943359375, "step": 5052 }, { "epoch": 0.7041036717062635, "grad_norm": 1.2394425868988037, "learning_rate": 2.305362960737893e-06, "loss": 0.14870452880859375, "step": 5053 }, { "epoch": 0.7042430153974779, "grad_norm": 1.0801293849945068, "learning_rate": 2.3033780182036127e-06, "loss": 0.16152000427246094, "step": 5054 }, { "epoch": 0.7043823590886923, "grad_norm": 0.8917136192321777, "learning_rate": 2.301393674797169e-06, "loss": 0.143646240234375, "step": 5055 }, { "epoch": 0.7045217027799067, "grad_norm": 0.7656229138374329, "learning_rate": 2.2994099309594437e-06, "loss": 0.12692832946777344, "step": 5056 }, { "epoch": 0.704661046471121, "grad_norm": 0.5324113965034485, "learning_rate": 2.297426787131174e-06, "loss": 0.10677719116210938, "step": 5057 }, { "epoch": 0.7048003901623354, "grad_norm": 1.174627661705017, "learning_rate": 2.2954442437529705e-06, "loss": 0.15166854858398438, "step": 5058 }, { "epoch": 0.7049397338535498, "grad_norm": 0.7181844115257263, "learning_rate": 2.293462301265313e-06, "loss": 0.12158584594726562, "step": 5059 }, { "epoch": 0.7050790775447642, "grad_norm": 0.6692888736724854, "learning_rate": 2.2914809601085405e-06, "loss": 0.10884284973144531, "step": 5060 }, { "epoch": 0.7052184212359786, "grad_norm": 1.4974915981292725, "learning_rate": 2.28950022072286e-06, "loss": 0.18556976318359375, "step": 5061 }, { "epoch": 0.7053577649271929, "grad_norm": 1.1622130870819092, "learning_rate": 2.2875200835483486e-06, "loss": 0.148193359375, "step": 5062 }, { "epoch": 0.7054971086184073, "grad_norm": 0.646767795085907, "learning_rate": 2.2855405490249498e-06, "loss": 0.11907958984375, "step": 5063 }, { "epoch": 0.7056364523096217, "grad_norm": 0.9376974701881409, "learning_rate": 2.283561617592467e-06, "loss": 0.14864730834960938, "step": 5064 }, { "epoch": 0.7057757960008361, "grad_norm": 1.0464658737182617, "learning_rate": 2.2815832896905772e-06, "loss": 0.12701797485351562, "step": 5065 }, { "epoch": 0.7059151396920504, "grad_norm": 0.6669820547103882, "learning_rate": 2.279605565758816e-06, "loss": 0.12269210815429688, "step": 5066 }, { "epoch": 0.7060544833832648, "grad_norm": 0.7525417804718018, "learning_rate": 2.277628446236592e-06, "loss": 0.12837982177734375, "step": 5067 }, { "epoch": 0.7061938270744792, "grad_norm": 0.8152780532836914, "learning_rate": 2.275651931563173e-06, "loss": 0.13158416748046875, "step": 5068 }, { "epoch": 0.7063331707656936, "grad_norm": 0.9633156657218933, "learning_rate": 2.273676022177697e-06, "loss": 0.14911651611328125, "step": 5069 }, { "epoch": 0.706472514456908, "grad_norm": 1.170649766921997, "learning_rate": 2.2717007185191673e-06, "loss": 0.11333465576171875, "step": 5070 }, { "epoch": 0.7066118581481223, "grad_norm": 0.9571818113327026, "learning_rate": 2.2697260210264506e-06, "loss": 0.15245437622070312, "step": 5071 }, { "epoch": 0.7067512018393367, "grad_norm": 1.66960608959198, "learning_rate": 2.267751930138276e-06, "loss": 0.16286277770996094, "step": 5072 }, { "epoch": 0.7068905455305511, "grad_norm": 1.2676827907562256, "learning_rate": 2.265778446293245e-06, "loss": 0.12379264831542969, "step": 5073 }, { "epoch": 0.7070298892217655, "grad_norm": 0.6992618441581726, "learning_rate": 2.263805569929821e-06, "loss": 0.11433029174804688, "step": 5074 }, { "epoch": 0.7071692329129798, "grad_norm": 1.034354329109192, "learning_rate": 2.2618333014863296e-06, "loss": 0.17077255249023438, "step": 5075 }, { "epoch": 0.7073085766041942, "grad_norm": 1.1445107460021973, "learning_rate": 2.259861641400967e-06, "loss": 0.14073562622070312, "step": 5076 }, { "epoch": 0.7074479202954086, "grad_norm": 0.8611301183700562, "learning_rate": 2.2578905901117876e-06, "loss": 0.13245010375976562, "step": 5077 }, { "epoch": 0.707587263986623, "grad_norm": 0.8892211318016052, "learning_rate": 2.255920148056717e-06, "loss": 0.12570571899414062, "step": 5078 }, { "epoch": 0.7077266076778373, "grad_norm": 0.8914601802825928, "learning_rate": 2.2539503156735392e-06, "loss": 0.1374359130859375, "step": 5079 }, { "epoch": 0.7078659513690517, "grad_norm": 0.9703517556190491, "learning_rate": 2.2519810933999085e-06, "loss": 0.14162445068359375, "step": 5080 }, { "epoch": 0.7080052950602661, "grad_norm": 0.9764469265937805, "learning_rate": 2.2500124816733437e-06, "loss": 0.12022781372070312, "step": 5081 }, { "epoch": 0.7081446387514805, "grad_norm": 1.2778899669647217, "learning_rate": 2.248044480931219e-06, "loss": 0.16082382202148438, "step": 5082 }, { "epoch": 0.7082839824426949, "grad_norm": 1.5336617231369019, "learning_rate": 2.2460770916107823e-06, "loss": 0.143310546875, "step": 5083 }, { "epoch": 0.7084233261339092, "grad_norm": 0.7409664988517761, "learning_rate": 2.2441103141491424e-06, "loss": 0.1270732879638672, "step": 5084 }, { "epoch": 0.7085626698251236, "grad_norm": 0.8894287347793579, "learning_rate": 2.2421441489832745e-06, "loss": 0.135528564453125, "step": 5085 }, { "epoch": 0.7087020135163381, "grad_norm": 0.9967676997184753, "learning_rate": 2.240178596550014e-06, "loss": 0.126129150390625, "step": 5086 }, { "epoch": 0.7088413572075525, "grad_norm": 1.5462807416915894, "learning_rate": 2.23821365728606e-06, "loss": 0.15549087524414062, "step": 5087 }, { "epoch": 0.7089807008987669, "grad_norm": 0.9676381945610046, "learning_rate": 2.23624933162798e-06, "loss": 0.13063430786132812, "step": 5088 }, { "epoch": 0.7091200445899812, "grad_norm": 0.9547827243804932, "learning_rate": 2.2342856200121993e-06, "loss": 0.13759994506835938, "step": 5089 }, { "epoch": 0.7092593882811956, "grad_norm": 1.294608473777771, "learning_rate": 2.2323225228750113e-06, "loss": 0.14057540893554688, "step": 5090 }, { "epoch": 0.70939873197241, "grad_norm": 0.965938150882721, "learning_rate": 2.230360040652574e-06, "loss": 0.13869857788085938, "step": 5091 }, { "epoch": 0.7095380756636244, "grad_norm": 1.087731122970581, "learning_rate": 2.228398173780903e-06, "loss": 0.13443756103515625, "step": 5092 }, { "epoch": 0.7096774193548387, "grad_norm": 0.9887388944625854, "learning_rate": 2.2264369226958794e-06, "loss": 0.12886428833007812, "step": 5093 }, { "epoch": 0.7098167630460531, "grad_norm": 0.7659105658531189, "learning_rate": 2.2244762878332506e-06, "loss": 0.12522125244140625, "step": 5094 }, { "epoch": 0.7099561067372675, "grad_norm": 0.9281448721885681, "learning_rate": 2.222516269628626e-06, "loss": 0.12471389770507812, "step": 5095 }, { "epoch": 0.7100954504284819, "grad_norm": 0.9165273308753967, "learning_rate": 2.220556868517473e-06, "loss": 0.12738418579101562, "step": 5096 }, { "epoch": 0.7102347941196963, "grad_norm": 1.0393041372299194, "learning_rate": 2.2185980849351295e-06, "loss": 0.14902114868164062, "step": 5097 }, { "epoch": 0.7103741378109106, "grad_norm": 1.11234712600708, "learning_rate": 2.2166399193167905e-06, "loss": 0.1716156005859375, "step": 5098 }, { "epoch": 0.710513481502125, "grad_norm": 0.6638284921646118, "learning_rate": 2.214682372097517e-06, "loss": 0.12179183959960938, "step": 5099 }, { "epoch": 0.7106528251933394, "grad_norm": 1.3015127182006836, "learning_rate": 2.212725443712229e-06, "loss": 0.14209747314453125, "step": 5100 }, { "epoch": 0.7107921688845538, "grad_norm": 0.7674906253814697, "learning_rate": 2.2107691345957133e-06, "loss": 0.13022232055664062, "step": 5101 }, { "epoch": 0.7109315125757681, "grad_norm": 0.9260849356651306, "learning_rate": 2.208813445182618e-06, "loss": 0.13746070861816406, "step": 5102 }, { "epoch": 0.7110708562669825, "grad_norm": 0.9740056991577148, "learning_rate": 2.2068583759074513e-06, "loss": 0.146148681640625, "step": 5103 }, { "epoch": 0.7112101999581969, "grad_norm": 1.1007494926452637, "learning_rate": 2.2049039272045837e-06, "loss": 0.15142440795898438, "step": 5104 }, { "epoch": 0.7113495436494113, "grad_norm": 0.9127984642982483, "learning_rate": 2.2029500995082497e-06, "loss": 0.1241455078125, "step": 5105 }, { "epoch": 0.7114888873406257, "grad_norm": 0.5869483947753906, "learning_rate": 2.2009968932525478e-06, "loss": 0.11341476440429688, "step": 5106 }, { "epoch": 0.71162823103184, "grad_norm": 0.8445344567298889, "learning_rate": 2.199044308871434e-06, "loss": 0.14035797119140625, "step": 5107 }, { "epoch": 0.7117675747230544, "grad_norm": 0.9235764145851135, "learning_rate": 2.197092346798726e-06, "loss": 0.14729690551757812, "step": 5108 }, { "epoch": 0.7119069184142688, "grad_norm": 1.1765062808990479, "learning_rate": 2.1951410074681074e-06, "loss": 0.15398788452148438, "step": 5109 }, { "epoch": 0.7120462621054832, "grad_norm": 0.654844343662262, "learning_rate": 2.193190291313122e-06, "loss": 0.10797691345214844, "step": 5110 }, { "epoch": 0.7121856057966975, "grad_norm": 0.8516945838928223, "learning_rate": 2.1912401987671724e-06, "loss": 0.12206459045410156, "step": 5111 }, { "epoch": 0.7123249494879119, "grad_norm": 1.0543544292449951, "learning_rate": 2.1892907302635246e-06, "loss": 0.15682601928710938, "step": 5112 }, { "epoch": 0.7124642931791263, "grad_norm": 1.620140552520752, "learning_rate": 2.1873418862353095e-06, "loss": 0.16012191772460938, "step": 5113 }, { "epoch": 0.7126036368703407, "grad_norm": 1.0299519300460815, "learning_rate": 2.185393667115513e-06, "loss": 0.12347793579101562, "step": 5114 }, { "epoch": 0.712742980561555, "grad_norm": 1.6756980419158936, "learning_rate": 2.1834460733369835e-06, "loss": 0.17735671997070312, "step": 5115 }, { "epoch": 0.7128823242527694, "grad_norm": 0.9496586322784424, "learning_rate": 2.181499105332433e-06, "loss": 0.11848068237304688, "step": 5116 }, { "epoch": 0.7130216679439838, "grad_norm": 0.8203914165496826, "learning_rate": 2.179552763534436e-06, "loss": 0.12268829345703125, "step": 5117 }, { "epoch": 0.7131610116351982, "grad_norm": 1.0039713382720947, "learning_rate": 2.177607048375423e-06, "loss": 0.1338939666748047, "step": 5118 }, { "epoch": 0.7133003553264126, "grad_norm": 0.8745076060295105, "learning_rate": 2.1756619602876857e-06, "loss": 0.15206527709960938, "step": 5119 }, { "epoch": 0.7134396990176269, "grad_norm": 1.2424548864364624, "learning_rate": 2.1737174997033818e-06, "loss": 0.1285247802734375, "step": 5120 }, { "epoch": 0.7135790427088413, "grad_norm": 0.623471200466156, "learning_rate": 2.1717736670545226e-06, "loss": 0.1240081787109375, "step": 5121 }, { "epoch": 0.7137183864000557, "grad_norm": 1.474934697151184, "learning_rate": 2.169830462772985e-06, "loss": 0.14995765686035156, "step": 5122 }, { "epoch": 0.7138577300912701, "grad_norm": 1.232581377029419, "learning_rate": 2.1678878872905063e-06, "loss": 0.18488693237304688, "step": 5123 }, { "epoch": 0.7139970737824844, "grad_norm": 0.6375126242637634, "learning_rate": 2.1659459410386814e-06, "loss": 0.123992919921875, "step": 5124 }, { "epoch": 0.7141364174736988, "grad_norm": 1.592206358909607, "learning_rate": 2.1640046244489637e-06, "loss": 0.16527557373046875, "step": 5125 }, { "epoch": 0.7142757611649132, "grad_norm": 0.7833719253540039, "learning_rate": 2.1620639379526715e-06, "loss": 0.12495040893554688, "step": 5126 }, { "epoch": 0.7144151048561277, "grad_norm": 0.5200539827346802, "learning_rate": 2.1601238819809827e-06, "loss": 0.10237503051757812, "step": 5127 }, { "epoch": 0.7145544485473421, "grad_norm": 0.7197864651679993, "learning_rate": 2.158184456964932e-06, "loss": 0.1285400390625, "step": 5128 }, { "epoch": 0.7146937922385564, "grad_norm": 0.8945698738098145, "learning_rate": 2.156245663335414e-06, "loss": 0.12633895874023438, "step": 5129 }, { "epoch": 0.7148331359297708, "grad_norm": 0.7794166803359985, "learning_rate": 2.154307501523185e-06, "loss": 0.127685546875, "step": 5130 }, { "epoch": 0.7149724796209852, "grad_norm": 1.155676245689392, "learning_rate": 2.1523699719588633e-06, "loss": 0.17523956298828125, "step": 5131 }, { "epoch": 0.7151118233121996, "grad_norm": 1.0571069717407227, "learning_rate": 2.1504330750729185e-06, "loss": 0.12944984436035156, "step": 5132 }, { "epoch": 0.715251167003414, "grad_norm": 1.4185274839401245, "learning_rate": 2.1484968112956884e-06, "loss": 0.18743896484375, "step": 5133 }, { "epoch": 0.7153905106946283, "grad_norm": 1.314103603363037, "learning_rate": 2.146561181057368e-06, "loss": 0.15967941284179688, "step": 5134 }, { "epoch": 0.7155298543858427, "grad_norm": 0.8179298043251038, "learning_rate": 2.1446261847880073e-06, "loss": 0.12480926513671875, "step": 5135 }, { "epoch": 0.7156691980770571, "grad_norm": 0.8192213177680969, "learning_rate": 2.1426918229175175e-06, "loss": 0.12614822387695312, "step": 5136 }, { "epoch": 0.7158085417682715, "grad_norm": 1.2510634660720825, "learning_rate": 2.140758095875671e-06, "loss": 0.1756591796875, "step": 5137 }, { "epoch": 0.7159478854594858, "grad_norm": 1.831084966659546, "learning_rate": 2.1388250040921007e-06, "loss": 0.17676925659179688, "step": 5138 }, { "epoch": 0.7160872291507002, "grad_norm": 0.8122415542602539, "learning_rate": 2.136892547996292e-06, "loss": 0.12652587890625, "step": 5139 }, { "epoch": 0.7162265728419146, "grad_norm": 0.928229033946991, "learning_rate": 2.1349607280175918e-06, "loss": 0.12842178344726562, "step": 5140 }, { "epoch": 0.716365916533129, "grad_norm": 0.956800639629364, "learning_rate": 2.133029544585207e-06, "loss": 0.14337921142578125, "step": 5141 }, { "epoch": 0.7165052602243434, "grad_norm": 1.3632633686065674, "learning_rate": 2.1310989981282067e-06, "loss": 0.1352519989013672, "step": 5142 }, { "epoch": 0.7166446039155577, "grad_norm": 0.9659520983695984, "learning_rate": 2.1291690890755078e-06, "loss": 0.13903427124023438, "step": 5143 }, { "epoch": 0.7167839476067721, "grad_norm": 1.042087197303772, "learning_rate": 2.127239817855897e-06, "loss": 0.1384563446044922, "step": 5144 }, { "epoch": 0.7169232912979865, "grad_norm": 0.929501473903656, "learning_rate": 2.1253111848980113e-06, "loss": 0.14173507690429688, "step": 5145 }, { "epoch": 0.7170626349892009, "grad_norm": 1.1538232564926147, "learning_rate": 2.1233831906303514e-06, "loss": 0.15615081787109375, "step": 5146 }, { "epoch": 0.7172019786804152, "grad_norm": 0.9917104840278625, "learning_rate": 2.121455835481271e-06, "loss": 0.12278366088867188, "step": 5147 }, { "epoch": 0.7173413223716296, "grad_norm": 0.8676825761795044, "learning_rate": 2.119529119878985e-06, "loss": 0.13061141967773438, "step": 5148 }, { "epoch": 0.717480666062844, "grad_norm": 1.1331133842468262, "learning_rate": 2.1176030442515704e-06, "loss": 0.14499664306640625, "step": 5149 }, { "epoch": 0.7176200097540584, "grad_norm": 1.870604157447815, "learning_rate": 2.115677609026949e-06, "loss": 0.164794921875, "step": 5150 }, { "epoch": 0.7177593534452728, "grad_norm": 0.8527688384056091, "learning_rate": 2.1137528146329133e-06, "loss": 0.14107513427734375, "step": 5151 }, { "epoch": 0.7178986971364871, "grad_norm": 0.6909233927726746, "learning_rate": 2.1118286614971075e-06, "loss": 0.12343215942382812, "step": 5152 }, { "epoch": 0.7180380408277015, "grad_norm": 2.7152352333068848, "learning_rate": 2.1099051500470368e-06, "loss": 0.20374298095703125, "step": 5153 }, { "epoch": 0.7181773845189159, "grad_norm": 1.3789302110671997, "learning_rate": 2.1079822807100585e-06, "loss": 0.1726531982421875, "step": 5154 }, { "epoch": 0.7183167282101303, "grad_norm": 0.6107057332992554, "learning_rate": 2.1060600539133928e-06, "loss": 0.13625717163085938, "step": 5155 }, { "epoch": 0.7184560719013446, "grad_norm": 1.0184725522994995, "learning_rate": 2.104138470084114e-06, "loss": 0.160614013671875, "step": 5156 }, { "epoch": 0.718595415592559, "grad_norm": 1.1781585216522217, "learning_rate": 2.1022175296491516e-06, "loss": 0.13550949096679688, "step": 5157 }, { "epoch": 0.7187347592837734, "grad_norm": 1.3483027219772339, "learning_rate": 2.100297233035296e-06, "loss": 0.1732177734375, "step": 5158 }, { "epoch": 0.7188741029749878, "grad_norm": 0.648726224899292, "learning_rate": 2.098377580669196e-06, "loss": 0.11738967895507812, "step": 5159 }, { "epoch": 0.7190134466662021, "grad_norm": 1.010301113128662, "learning_rate": 2.096458572977352e-06, "loss": 0.1526012420654297, "step": 5160 }, { "epoch": 0.7191527903574165, "grad_norm": 0.8148935437202454, "learning_rate": 2.0945402103861233e-06, "loss": 0.12741661071777344, "step": 5161 }, { "epoch": 0.7192921340486309, "grad_norm": 1.0034079551696777, "learning_rate": 2.0926224933217267e-06, "loss": 0.15369033813476562, "step": 5162 }, { "epoch": 0.7194314777398453, "grad_norm": 1.1863049268722534, "learning_rate": 2.0907054222102367e-06, "loss": 0.16123199462890625, "step": 5163 }, { "epoch": 0.7195708214310597, "grad_norm": 0.9903561472892761, "learning_rate": 2.0887889974775805e-06, "loss": 0.164093017578125, "step": 5164 }, { "epoch": 0.719710165122274, "grad_norm": 0.839038610458374, "learning_rate": 2.0868732195495463e-06, "loss": 0.13262176513671875, "step": 5165 }, { "epoch": 0.7198495088134884, "grad_norm": 0.7597564458847046, "learning_rate": 2.0849580888517733e-06, "loss": 0.1197967529296875, "step": 5166 }, { "epoch": 0.7199888525047029, "grad_norm": 0.7380322217941284, "learning_rate": 2.083043605809763e-06, "loss": 0.12299728393554688, "step": 5167 }, { "epoch": 0.7201281961959173, "grad_norm": 1.1470836400985718, "learning_rate": 2.081129770848867e-06, "loss": 0.14737319946289062, "step": 5168 }, { "epoch": 0.7202675398871317, "grad_norm": 1.0325212478637695, "learning_rate": 2.0792165843942963e-06, "loss": 0.1443634033203125, "step": 5169 }, { "epoch": 0.720406883578346, "grad_norm": 1.235276699066162, "learning_rate": 2.0773040468711205e-06, "loss": 0.1786041259765625, "step": 5170 }, { "epoch": 0.7205462272695604, "grad_norm": 1.1188302040100098, "learning_rate": 2.0753921587042586e-06, "loss": 0.14038848876953125, "step": 5171 }, { "epoch": 0.7206855709607748, "grad_norm": 0.7268931269645691, "learning_rate": 2.0734809203184873e-06, "loss": 0.10632896423339844, "step": 5172 }, { "epoch": 0.7208249146519892, "grad_norm": 1.8710567951202393, "learning_rate": 2.071570332138442e-06, "loss": 0.2005157470703125, "step": 5173 }, { "epoch": 0.7209642583432035, "grad_norm": 0.6558059453964233, "learning_rate": 2.0696603945886133e-06, "loss": 0.12698745727539062, "step": 5174 }, { "epoch": 0.7211036020344179, "grad_norm": 0.9313496351242065, "learning_rate": 2.067751108093343e-06, "loss": 0.13767242431640625, "step": 5175 }, { "epoch": 0.7212429457256323, "grad_norm": 0.9104449152946472, "learning_rate": 2.0658424730768335e-06, "loss": 0.13404464721679688, "step": 5176 }, { "epoch": 0.7213822894168467, "grad_norm": 1.1962087154388428, "learning_rate": 2.063934489963137e-06, "loss": 0.16205215454101562, "step": 5177 }, { "epoch": 0.7215216331080611, "grad_norm": 1.168258547782898, "learning_rate": 2.0620271591761666e-06, "loss": 0.16244125366210938, "step": 5178 }, { "epoch": 0.7216609767992754, "grad_norm": 0.9825623631477356, "learning_rate": 2.0601204811396847e-06, "loss": 0.15692520141601562, "step": 5179 }, { "epoch": 0.7218003204904898, "grad_norm": 1.0021919012069702, "learning_rate": 2.058214456277314e-06, "loss": 0.127838134765625, "step": 5180 }, { "epoch": 0.7219396641817042, "grad_norm": 1.239786982536316, "learning_rate": 2.0563090850125318e-06, "loss": 0.12601470947265625, "step": 5181 }, { "epoch": 0.7220790078729186, "grad_norm": 0.8108434081077576, "learning_rate": 2.054404367768662e-06, "loss": 0.14042282104492188, "step": 5182 }, { "epoch": 0.7222183515641329, "grad_norm": 0.8032947778701782, "learning_rate": 2.0525003049688923e-06, "loss": 0.133941650390625, "step": 5183 }, { "epoch": 0.7223576952553473, "grad_norm": 1.237004041671753, "learning_rate": 2.0505968970362627e-06, "loss": 0.12690353393554688, "step": 5184 }, { "epoch": 0.7224970389465617, "grad_norm": 0.873264491558075, "learning_rate": 2.048694144393668e-06, "loss": 0.12143325805664062, "step": 5185 }, { "epoch": 0.7226363826377761, "grad_norm": 1.1818883419036865, "learning_rate": 2.0467920474638552e-06, "loss": 0.16190338134765625, "step": 5186 }, { "epoch": 0.7227757263289905, "grad_norm": 0.8301977515220642, "learning_rate": 2.0448906066694247e-06, "loss": 0.12075042724609375, "step": 5187 }, { "epoch": 0.7229150700202048, "grad_norm": 0.7394042015075684, "learning_rate": 2.042989822432837e-06, "loss": 0.11195755004882812, "step": 5188 }, { "epoch": 0.7230544137114192, "grad_norm": 1.0243160724639893, "learning_rate": 2.041089695176399e-06, "loss": 0.14239120483398438, "step": 5189 }, { "epoch": 0.7231937574026336, "grad_norm": 1.0258573293685913, "learning_rate": 2.0391902253222777e-06, "loss": 0.14971160888671875, "step": 5190 }, { "epoch": 0.723333101093848, "grad_norm": 1.035935640335083, "learning_rate": 2.037291413292494e-06, "loss": 0.12982559204101562, "step": 5191 }, { "epoch": 0.7234724447850623, "grad_norm": 1.8071776628494263, "learning_rate": 2.035393259508919e-06, "loss": 0.1343536376953125, "step": 5192 }, { "epoch": 0.7236117884762767, "grad_norm": 2.2382166385650635, "learning_rate": 2.0334957643932757e-06, "loss": 0.2069683074951172, "step": 5193 }, { "epoch": 0.7237511321674911, "grad_norm": 1.5275393724441528, "learning_rate": 2.0315989283671474e-06, "loss": 0.163330078125, "step": 5194 }, { "epoch": 0.7238904758587055, "grad_norm": 0.9568719267845154, "learning_rate": 2.0297027518519696e-06, "loss": 0.12676620483398438, "step": 5195 }, { "epoch": 0.7240298195499199, "grad_norm": 0.7419878840446472, "learning_rate": 2.0278072352690253e-06, "loss": 0.121551513671875, "step": 5196 }, { "epoch": 0.7241691632411342, "grad_norm": 0.9754608869552612, "learning_rate": 2.0259123790394587e-06, "loss": 0.12673568725585938, "step": 5197 }, { "epoch": 0.7243085069323486, "grad_norm": 0.8981016874313354, "learning_rate": 2.0240181835842605e-06, "loss": 0.12445831298828125, "step": 5198 }, { "epoch": 0.724447850623563, "grad_norm": 1.2273555994033813, "learning_rate": 2.0221246493242802e-06, "loss": 0.15620040893554688, "step": 5199 }, { "epoch": 0.7245871943147774, "grad_norm": 0.5415080189704895, "learning_rate": 2.0202317766802155e-06, "loss": 0.1104583740234375, "step": 5200 }, { "epoch": 0.7247265380059917, "grad_norm": 1.036645770072937, "learning_rate": 2.0183395660726208e-06, "loss": 0.15697097778320312, "step": 5201 }, { "epoch": 0.7248658816972061, "grad_norm": 1.007156491279602, "learning_rate": 2.0164480179219038e-06, "loss": 0.15443038940429688, "step": 5202 }, { "epoch": 0.7250052253884205, "grad_norm": 0.7384284138679504, "learning_rate": 2.014557132648321e-06, "loss": 0.13836669921875, "step": 5203 }, { "epoch": 0.7251445690796349, "grad_norm": 0.865945041179657, "learning_rate": 2.0126669106719833e-06, "loss": 0.157745361328125, "step": 5204 }, { "epoch": 0.7252839127708492, "grad_norm": 1.5047388076782227, "learning_rate": 2.010777352412856e-06, "loss": 0.14229202270507812, "step": 5205 }, { "epoch": 0.7254232564620636, "grad_norm": 0.948375940322876, "learning_rate": 2.0088884582907574e-06, "loss": 0.1466999053955078, "step": 5206 }, { "epoch": 0.7255626001532781, "grad_norm": 1.1129387617111206, "learning_rate": 2.0070002287253554e-06, "loss": 0.15682601928710938, "step": 5207 }, { "epoch": 0.7257019438444925, "grad_norm": 1.0658890008926392, "learning_rate": 2.0051126641361697e-06, "loss": 0.15709686279296875, "step": 5208 }, { "epoch": 0.7258412875357069, "grad_norm": 0.8928623795509338, "learning_rate": 2.0032257649425753e-06, "loss": 0.13193893432617188, "step": 5209 }, { "epoch": 0.7259806312269212, "grad_norm": 0.8264750242233276, "learning_rate": 2.0013395315637997e-06, "loss": 0.11717987060546875, "step": 5210 }, { "epoch": 0.7261199749181356, "grad_norm": 0.7450883388519287, "learning_rate": 1.9994539644189183e-06, "loss": 0.12659454345703125, "step": 5211 }, { "epoch": 0.72625931860935, "grad_norm": 1.1312284469604492, "learning_rate": 1.9975690639268623e-06, "loss": 0.17509078979492188, "step": 5212 }, { "epoch": 0.7263986623005644, "grad_norm": 1.054996371269226, "learning_rate": 1.9956848305064156e-06, "loss": 0.10895919799804688, "step": 5213 }, { "epoch": 0.7265380059917788, "grad_norm": 0.6520665884017944, "learning_rate": 1.99380126457621e-06, "loss": 0.12070465087890625, "step": 5214 }, { "epoch": 0.7266773496829931, "grad_norm": 1.6962063312530518, "learning_rate": 1.9919183665547285e-06, "loss": 0.17657089233398438, "step": 5215 }, { "epoch": 0.7268166933742075, "grad_norm": 0.8260923624038696, "learning_rate": 1.9900361368603104e-06, "loss": 0.1373004913330078, "step": 5216 }, { "epoch": 0.7269560370654219, "grad_norm": 0.7146342396736145, "learning_rate": 1.988154575911146e-06, "loss": 0.14255142211914062, "step": 5217 }, { "epoch": 0.7270953807566363, "grad_norm": 0.7686663269996643, "learning_rate": 1.9862736841252734e-06, "loss": 0.1166839599609375, "step": 5218 }, { "epoch": 0.7272347244478506, "grad_norm": 0.9618436098098755, "learning_rate": 1.984393461920581e-06, "loss": 0.11835479736328125, "step": 5219 }, { "epoch": 0.727374068139065, "grad_norm": 1.5869358777999878, "learning_rate": 1.9825139097148166e-06, "loss": 0.16080093383789062, "step": 5220 }, { "epoch": 0.7275134118302794, "grad_norm": 1.7269805669784546, "learning_rate": 1.980635027925569e-06, "loss": 0.18519020080566406, "step": 5221 }, { "epoch": 0.7276527555214938, "grad_norm": 1.548466682434082, "learning_rate": 1.9787568169702848e-06, "loss": 0.16127395629882812, "step": 5222 }, { "epoch": 0.7277920992127082, "grad_norm": 0.7830275893211365, "learning_rate": 1.9768792772662616e-06, "loss": 0.13185882568359375, "step": 5223 }, { "epoch": 0.7279314429039225, "grad_norm": 0.8752524256706238, "learning_rate": 1.975002409230644e-06, "loss": 0.12018203735351562, "step": 5224 }, { "epoch": 0.7280707865951369, "grad_norm": 0.8467622399330139, "learning_rate": 1.9731262132804275e-06, "loss": 0.1480693817138672, "step": 5225 }, { "epoch": 0.7282101302863513, "grad_norm": 0.9444297552108765, "learning_rate": 1.9712506898324613e-06, "loss": 0.14515113830566406, "step": 5226 }, { "epoch": 0.7283494739775657, "grad_norm": 0.5598741769790649, "learning_rate": 1.969375839303447e-06, "loss": 0.12445449829101562, "step": 5227 }, { "epoch": 0.72848881766878, "grad_norm": 0.9238976836204529, "learning_rate": 1.967501662109928e-06, "loss": 0.1690216064453125, "step": 5228 }, { "epoch": 0.7286281613599944, "grad_norm": 0.8727299571037292, "learning_rate": 1.965628158668309e-06, "loss": 0.12081527709960938, "step": 5229 }, { "epoch": 0.7287675050512088, "grad_norm": 0.5794031023979187, "learning_rate": 1.9637553293948353e-06, "loss": 0.11856842041015625, "step": 5230 }, { "epoch": 0.7289068487424232, "grad_norm": 0.6299552917480469, "learning_rate": 1.9618831747056106e-06, "loss": 0.12487411499023438, "step": 5231 }, { "epoch": 0.7290461924336376, "grad_norm": 0.6327224969863892, "learning_rate": 1.960011695016581e-06, "loss": 0.11590957641601562, "step": 5232 }, { "epoch": 0.7291855361248519, "grad_norm": 0.8583663105964661, "learning_rate": 1.958140890743549e-06, "loss": 0.15029525756835938, "step": 5233 }, { "epoch": 0.7293248798160663, "grad_norm": 0.9594740867614746, "learning_rate": 1.956270762302166e-06, "loss": 0.1362762451171875, "step": 5234 }, { "epoch": 0.7294642235072807, "grad_norm": 0.832170307636261, "learning_rate": 1.9544013101079295e-06, "loss": 0.14886474609375, "step": 5235 }, { "epoch": 0.7296035671984951, "grad_norm": 1.0741409063339233, "learning_rate": 1.9525325345761887e-06, "loss": 0.13684463500976562, "step": 5236 }, { "epoch": 0.7297429108897094, "grad_norm": 0.8376543521881104, "learning_rate": 1.950664436122144e-06, "loss": 0.14548873901367188, "step": 5237 }, { "epoch": 0.7298822545809238, "grad_norm": 1.3751169443130493, "learning_rate": 1.948797015160845e-06, "loss": 0.18764495849609375, "step": 5238 }, { "epoch": 0.7300215982721382, "grad_norm": 0.8103137016296387, "learning_rate": 1.94693027210719e-06, "loss": 0.11551475524902344, "step": 5239 }, { "epoch": 0.7301609419633526, "grad_norm": 0.8293259143829346, "learning_rate": 1.945064207375923e-06, "loss": 0.12533187866210938, "step": 5240 }, { "epoch": 0.730300285654567, "grad_norm": 0.8019658923149109, "learning_rate": 1.9431988213816444e-06, "loss": 0.12862014770507812, "step": 5241 }, { "epoch": 0.7304396293457813, "grad_norm": 1.5499968528747559, "learning_rate": 1.9413341145388013e-06, "loss": 0.1843719482421875, "step": 5242 }, { "epoch": 0.7305789730369957, "grad_norm": 1.045803427696228, "learning_rate": 1.9394700872616856e-06, "loss": 0.14097213745117188, "step": 5243 }, { "epoch": 0.7307183167282101, "grad_norm": 1.3114511966705322, "learning_rate": 1.9376067399644456e-06, "loss": 0.16114425659179688, "step": 5244 }, { "epoch": 0.7308576604194245, "grad_norm": 0.8620997667312622, "learning_rate": 1.93574407306107e-06, "loss": 0.1276092529296875, "step": 5245 }, { "epoch": 0.7309970041106388, "grad_norm": 1.0025633573532104, "learning_rate": 1.9338820869654056e-06, "loss": 0.154876708984375, "step": 5246 }, { "epoch": 0.7311363478018533, "grad_norm": 0.7756446599960327, "learning_rate": 1.9320207820911387e-06, "loss": 0.138671875, "step": 5247 }, { "epoch": 0.7312756914930677, "grad_norm": 1.261311650276184, "learning_rate": 1.930160158851811e-06, "loss": 0.16405868530273438, "step": 5248 }, { "epoch": 0.7314150351842821, "grad_norm": 0.6289763450622559, "learning_rate": 1.9283002176608116e-06, "loss": 0.11036300659179688, "step": 5249 }, { "epoch": 0.7315543788754965, "grad_norm": 1.4006949663162231, "learning_rate": 1.9264409589313767e-06, "loss": 0.18465042114257812, "step": 5250 }, { "epoch": 0.7316937225667108, "grad_norm": 0.9077017903327942, "learning_rate": 1.9245823830765874e-06, "loss": 0.1632843017578125, "step": 5251 }, { "epoch": 0.7318330662579252, "grad_norm": 1.2874137163162231, "learning_rate": 1.92272449050938e-06, "loss": 0.17409515380859375, "step": 5252 }, { "epoch": 0.7319724099491396, "grad_norm": 0.9901373982429504, "learning_rate": 1.920867281642538e-06, "loss": 0.145538330078125, "step": 5253 }, { "epoch": 0.732111753640354, "grad_norm": 0.9759407043457031, "learning_rate": 1.919010756888685e-06, "loss": 0.15047264099121094, "step": 5254 }, { "epoch": 0.7322510973315683, "grad_norm": 1.011871337890625, "learning_rate": 1.917154916660304e-06, "loss": 0.1610107421875, "step": 5255 }, { "epoch": 0.7323904410227827, "grad_norm": 0.695650041103363, "learning_rate": 1.9152997613697184e-06, "loss": 0.13630294799804688, "step": 5256 }, { "epoch": 0.7325297847139971, "grad_norm": 0.8356901407241821, "learning_rate": 1.913445291429099e-06, "loss": 0.11772537231445312, "step": 5257 }, { "epoch": 0.7326691284052115, "grad_norm": 0.7157645225524902, "learning_rate": 1.9115915072504683e-06, "loss": 0.1327362060546875, "step": 5258 }, { "epoch": 0.7328084720964259, "grad_norm": 0.9442697167396545, "learning_rate": 1.909738409245697e-06, "loss": 0.16115951538085938, "step": 5259 }, { "epoch": 0.7329478157876402, "grad_norm": 0.8868359327316284, "learning_rate": 1.9078859978264995e-06, "loss": 0.13124847412109375, "step": 5260 }, { "epoch": 0.7330871594788546, "grad_norm": 0.6784740090370178, "learning_rate": 1.9060342734044374e-06, "loss": 0.12067031860351562, "step": 5261 }, { "epoch": 0.733226503170069, "grad_norm": 0.856418251991272, "learning_rate": 1.904183236390923e-06, "loss": 0.123199462890625, "step": 5262 }, { "epoch": 0.7333658468612834, "grad_norm": 0.9951573014259338, "learning_rate": 1.9023328871972163e-06, "loss": 0.14030075073242188, "step": 5263 }, { "epoch": 0.7335051905524977, "grad_norm": 0.9377641677856445, "learning_rate": 1.9004832262344197e-06, "loss": 0.15185165405273438, "step": 5264 }, { "epoch": 0.7336445342437121, "grad_norm": 1.1908196210861206, "learning_rate": 1.8986342539134873e-06, "loss": 0.142425537109375, "step": 5265 }, { "epoch": 0.7337838779349265, "grad_norm": 0.9516086578369141, "learning_rate": 1.8967859706452196e-06, "loss": 0.14445877075195312, "step": 5266 }, { "epoch": 0.7339232216261409, "grad_norm": 0.8133694529533386, "learning_rate": 1.894938376840262e-06, "loss": 0.12245559692382812, "step": 5267 }, { "epoch": 0.7340625653173553, "grad_norm": 0.8452646732330322, "learning_rate": 1.8930914729091055e-06, "loss": 0.124114990234375, "step": 5268 }, { "epoch": 0.7342019090085696, "grad_norm": 1.5041115283966064, "learning_rate": 1.8912452592620916e-06, "loss": 0.15829849243164062, "step": 5269 }, { "epoch": 0.734341252699784, "grad_norm": 0.9588634967803955, "learning_rate": 1.8893997363094086e-06, "loss": 0.15432357788085938, "step": 5270 }, { "epoch": 0.7344805963909984, "grad_norm": 0.5672001838684082, "learning_rate": 1.8875549044610886e-06, "loss": 0.11941146850585938, "step": 5271 }, { "epoch": 0.7346199400822128, "grad_norm": 1.5699117183685303, "learning_rate": 1.8857107641270084e-06, "loss": 0.20017623901367188, "step": 5272 }, { "epoch": 0.7347592837734271, "grad_norm": 0.549655556678772, "learning_rate": 1.8838673157168956e-06, "loss": 0.10700607299804688, "step": 5273 }, { "epoch": 0.7348986274646415, "grad_norm": 0.9429145455360413, "learning_rate": 1.8820245596403253e-06, "loss": 0.14517593383789062, "step": 5274 }, { "epoch": 0.7350379711558559, "grad_norm": 0.6991184949874878, "learning_rate": 1.8801824963067105e-06, "loss": 0.11054229736328125, "step": 5275 }, { "epoch": 0.7351773148470703, "grad_norm": 0.8595814108848572, "learning_rate": 1.8783411261253208e-06, "loss": 0.1500396728515625, "step": 5276 }, { "epoch": 0.7353166585382847, "grad_norm": 1.0395923852920532, "learning_rate": 1.8765004495052623e-06, "loss": 0.14405059814453125, "step": 5277 }, { "epoch": 0.735456002229499, "grad_norm": 0.6010565757751465, "learning_rate": 1.8746604668554952e-06, "loss": 0.12826156616210938, "step": 5278 }, { "epoch": 0.7355953459207134, "grad_norm": 0.8527465462684631, "learning_rate": 1.8728211785848176e-06, "loss": 0.13837432861328125, "step": 5279 }, { "epoch": 0.7357346896119278, "grad_norm": 0.6427788734436035, "learning_rate": 1.8709825851018798e-06, "loss": 0.12668228149414062, "step": 5280 }, { "epoch": 0.7358740333031422, "grad_norm": 1.0837922096252441, "learning_rate": 1.869144686815178e-06, "loss": 0.15275192260742188, "step": 5281 }, { "epoch": 0.7360133769943565, "grad_norm": 0.9297360777854919, "learning_rate": 1.8673074841330447e-06, "loss": 0.14490890502929688, "step": 5282 }, { "epoch": 0.7361527206855709, "grad_norm": 1.0196062326431274, "learning_rate": 1.8654709774636676e-06, "loss": 0.15525245666503906, "step": 5283 }, { "epoch": 0.7362920643767853, "grad_norm": 0.7885321378707886, "learning_rate": 1.8636351672150771e-06, "loss": 0.12708282470703125, "step": 5284 }, { "epoch": 0.7364314080679997, "grad_norm": 0.6702994108200073, "learning_rate": 1.8618000537951496e-06, "loss": 0.11419677734375, "step": 5285 }, { "epoch": 0.736570751759214, "grad_norm": 0.7844999432563782, "learning_rate": 1.8599656376116026e-06, "loss": 0.13771438598632812, "step": 5286 }, { "epoch": 0.7367100954504285, "grad_norm": 1.2285152673721313, "learning_rate": 1.8581319190720038e-06, "loss": 0.18524932861328125, "step": 5287 }, { "epoch": 0.7368494391416429, "grad_norm": 1.1779415607452393, "learning_rate": 1.8562988985837632e-06, "loss": 0.14594650268554688, "step": 5288 }, { "epoch": 0.7369887828328573, "grad_norm": 0.4828054904937744, "learning_rate": 1.854466576554133e-06, "loss": 0.10785293579101562, "step": 5289 }, { "epoch": 0.7371281265240717, "grad_norm": 0.6551745533943176, "learning_rate": 1.8526349533902161e-06, "loss": 0.11428451538085938, "step": 5290 }, { "epoch": 0.737267470215286, "grad_norm": 0.6977458000183105, "learning_rate": 1.8508040294989588e-06, "loss": 0.1368408203125, "step": 5291 }, { "epoch": 0.7374068139065004, "grad_norm": 1.7676441669464111, "learning_rate": 1.8489738052871486e-06, "loss": 0.14563369750976562, "step": 5292 }, { "epoch": 0.7375461575977148, "grad_norm": 1.2389272451400757, "learning_rate": 1.8471442811614177e-06, "loss": 0.18166732788085938, "step": 5293 }, { "epoch": 0.7376855012889292, "grad_norm": 0.7141821384429932, "learning_rate": 1.8453154575282472e-06, "loss": 0.11712265014648438, "step": 5294 }, { "epoch": 0.7378248449801436, "grad_norm": 0.7879305481910706, "learning_rate": 1.8434873347939608e-06, "loss": 0.1275463104248047, "step": 5295 }, { "epoch": 0.7379641886713579, "grad_norm": 1.3890132904052734, "learning_rate": 1.8416599133647223e-06, "loss": 0.16607666015625, "step": 5296 }, { "epoch": 0.7381035323625723, "grad_norm": 0.883466899394989, "learning_rate": 1.839833193646547e-06, "loss": 0.14415359497070312, "step": 5297 }, { "epoch": 0.7382428760537867, "grad_norm": 0.9562667012214661, "learning_rate": 1.8380071760452862e-06, "loss": 0.1331634521484375, "step": 5298 }, { "epoch": 0.7383822197450011, "grad_norm": 0.8000630140304565, "learning_rate": 1.8361818609666433e-06, "loss": 0.12825393676757812, "step": 5299 }, { "epoch": 0.7385215634362154, "grad_norm": 0.6725588440895081, "learning_rate": 1.8343572488161576e-06, "loss": 0.121795654296875, "step": 5300 }, { "epoch": 0.7386609071274298, "grad_norm": 1.2240616083145142, "learning_rate": 1.832533339999219e-06, "loss": 0.17170333862304688, "step": 5301 }, { "epoch": 0.7388002508186442, "grad_norm": 0.9316009283065796, "learning_rate": 1.8307101349210588e-06, "loss": 0.13671493530273438, "step": 5302 }, { "epoch": 0.7389395945098586, "grad_norm": 1.1487598419189453, "learning_rate": 1.8288876339867511e-06, "loss": 0.13518524169921875, "step": 5303 }, { "epoch": 0.739078938201073, "grad_norm": 1.4029678106307983, "learning_rate": 1.8270658376012112e-06, "loss": 0.1699542999267578, "step": 5304 }, { "epoch": 0.7392182818922873, "grad_norm": 0.9995510578155518, "learning_rate": 1.8252447461692029e-06, "loss": 0.162994384765625, "step": 5305 }, { "epoch": 0.7393576255835017, "grad_norm": 1.224746584892273, "learning_rate": 1.8234243600953334e-06, "loss": 0.18260574340820312, "step": 5306 }, { "epoch": 0.7394969692747161, "grad_norm": 0.9265666604042053, "learning_rate": 1.8216046797840465e-06, "loss": 0.14260482788085938, "step": 5307 }, { "epoch": 0.7396363129659305, "grad_norm": 0.9134953618049622, "learning_rate": 1.8197857056396372e-06, "loss": 0.1465015411376953, "step": 5308 }, { "epoch": 0.7397756566571448, "grad_norm": 1.7641310691833496, "learning_rate": 1.8179674380662372e-06, "loss": 0.16443252563476562, "step": 5309 }, { "epoch": 0.7399150003483592, "grad_norm": 1.2995574474334717, "learning_rate": 1.8161498774678271e-06, "loss": 0.1778087615966797, "step": 5310 }, { "epoch": 0.7400543440395736, "grad_norm": 1.008107304573059, "learning_rate": 1.8143330242482244e-06, "loss": 0.1531219482421875, "step": 5311 }, { "epoch": 0.740193687730788, "grad_norm": 0.9213442802429199, "learning_rate": 1.8125168788110932e-06, "loss": 0.12681198120117188, "step": 5312 }, { "epoch": 0.7403330314220024, "grad_norm": 1.2661389112472534, "learning_rate": 1.8107014415599416e-06, "loss": 0.12420272827148438, "step": 5313 }, { "epoch": 0.7404723751132167, "grad_norm": 0.8284503817558289, "learning_rate": 1.808886712898117e-06, "loss": 0.127197265625, "step": 5314 }, { "epoch": 0.7406117188044311, "grad_norm": 1.5280014276504517, "learning_rate": 1.8070726932288086e-06, "loss": 0.18828964233398438, "step": 5315 }, { "epoch": 0.7407510624956455, "grad_norm": 0.5932251214981079, "learning_rate": 1.8052593829550525e-06, "loss": 0.09460067749023438, "step": 5316 }, { "epoch": 0.7408904061868599, "grad_norm": 1.407775640487671, "learning_rate": 1.8034467824797252e-06, "loss": 0.14876937866210938, "step": 5317 }, { "epoch": 0.7410297498780742, "grad_norm": 0.6992147564888, "learning_rate": 1.8016348922055448e-06, "loss": 0.1170196533203125, "step": 5318 }, { "epoch": 0.7411690935692886, "grad_norm": 0.6539985537528992, "learning_rate": 1.7998237125350698e-06, "loss": 0.12019729614257812, "step": 5319 }, { "epoch": 0.741308437260503, "grad_norm": 0.8531630635261536, "learning_rate": 1.7980132438707059e-06, "loss": 0.13800811767578125, "step": 5320 }, { "epoch": 0.7414477809517174, "grad_norm": 1.4119517803192139, "learning_rate": 1.7962034866146954e-06, "loss": 0.17998123168945312, "step": 5321 }, { "epoch": 0.7415871246429317, "grad_norm": 1.0631515979766846, "learning_rate": 1.794394441169126e-06, "loss": 0.1292743682861328, "step": 5322 }, { "epoch": 0.7417264683341461, "grad_norm": 1.6247386932373047, "learning_rate": 1.7925861079359268e-06, "loss": 0.16417694091796875, "step": 5323 }, { "epoch": 0.7418658120253605, "grad_norm": 2.085768938064575, "learning_rate": 1.790778487316871e-06, "loss": 0.15046310424804688, "step": 5324 }, { "epoch": 0.7420051557165749, "grad_norm": 0.730361819267273, "learning_rate": 1.7889715797135643e-06, "loss": 0.1446685791015625, "step": 5325 }, { "epoch": 0.7421444994077893, "grad_norm": 1.0197995901107788, "learning_rate": 1.7871653855274634e-06, "loss": 0.14249038696289062, "step": 5326 }, { "epoch": 0.7422838430990036, "grad_norm": 0.7627466320991516, "learning_rate": 1.7853599051598658e-06, "loss": 0.13576507568359375, "step": 5327 }, { "epoch": 0.7424231867902181, "grad_norm": 0.8814473152160645, "learning_rate": 1.7835551390119033e-06, "loss": 0.15695953369140625, "step": 5328 }, { "epoch": 0.7425625304814325, "grad_norm": 0.8465365767478943, "learning_rate": 1.7817510874845585e-06, "loss": 0.13834381103515625, "step": 5329 }, { "epoch": 0.7427018741726469, "grad_norm": 1.0917398929595947, "learning_rate": 1.779947750978646e-06, "loss": 0.13998031616210938, "step": 5330 }, { "epoch": 0.7428412178638613, "grad_norm": 1.2218509912490845, "learning_rate": 1.7781451298948305e-06, "loss": 0.16098403930664062, "step": 5331 }, { "epoch": 0.7429805615550756, "grad_norm": 0.8690909147262573, "learning_rate": 1.7763432246336087e-06, "loss": 0.14956283569335938, "step": 5332 }, { "epoch": 0.74311990524629, "grad_norm": 1.029604434967041, "learning_rate": 1.7745420355953253e-06, "loss": 0.15198898315429688, "step": 5333 }, { "epoch": 0.7432592489375044, "grad_norm": 0.602551281452179, "learning_rate": 1.7727415631801648e-06, "loss": 0.11761856079101562, "step": 5334 }, { "epoch": 0.7433985926287188, "grad_norm": 1.0585246086120605, "learning_rate": 1.7709418077881495e-06, "loss": 0.15228652954101562, "step": 5335 }, { "epoch": 0.7435379363199331, "grad_norm": 1.3891189098358154, "learning_rate": 1.7691427698191422e-06, "loss": 0.1531219482421875, "step": 5336 }, { "epoch": 0.7436772800111475, "grad_norm": 1.174615740776062, "learning_rate": 1.7673444496728493e-06, "loss": 0.14983749389648438, "step": 5337 }, { "epoch": 0.7438166237023619, "grad_norm": 1.5505298376083374, "learning_rate": 1.7655468477488191e-06, "loss": 0.16538619995117188, "step": 5338 }, { "epoch": 0.7439559673935763, "grad_norm": 1.543349266052246, "learning_rate": 1.763749964446435e-06, "loss": 0.17090225219726562, "step": 5339 }, { "epoch": 0.7440953110847907, "grad_norm": 0.6700465679168701, "learning_rate": 1.7619538001649228e-06, "loss": 0.12577438354492188, "step": 5340 }, { "epoch": 0.744234654776005, "grad_norm": 1.1921846866607666, "learning_rate": 1.7601583553033502e-06, "loss": 0.16181373596191406, "step": 5341 }, { "epoch": 0.7443739984672194, "grad_norm": 1.0527002811431885, "learning_rate": 1.7583636302606254e-06, "loss": 0.14205551147460938, "step": 5342 }, { "epoch": 0.7445133421584338, "grad_norm": 1.0568010807037354, "learning_rate": 1.756569625435493e-06, "loss": 0.15442276000976562, "step": 5343 }, { "epoch": 0.7446526858496482, "grad_norm": 1.39496648311615, "learning_rate": 1.7547763412265412e-06, "loss": 0.17455673217773438, "step": 5344 }, { "epoch": 0.7447920295408625, "grad_norm": 1.6356627941131592, "learning_rate": 1.7529837780321979e-06, "loss": 0.17496871948242188, "step": 5345 }, { "epoch": 0.7449313732320769, "grad_norm": 0.5292528867721558, "learning_rate": 1.751191936250729e-06, "loss": 0.1171722412109375, "step": 5346 }, { "epoch": 0.7450707169232913, "grad_norm": 0.9838164448738098, "learning_rate": 1.7494008162802378e-06, "loss": 0.15896987915039062, "step": 5347 }, { "epoch": 0.7452100606145057, "grad_norm": 1.9766814708709717, "learning_rate": 1.7476104185186737e-06, "loss": 0.16810989379882812, "step": 5348 }, { "epoch": 0.74534940430572, "grad_norm": 0.9508851170539856, "learning_rate": 1.7458207433638225e-06, "loss": 0.13376808166503906, "step": 5349 }, { "epoch": 0.7454887479969344, "grad_norm": 1.2441322803497314, "learning_rate": 1.7440317912133076e-06, "loss": 0.1484375, "step": 5350 }, { "epoch": 0.7456280916881488, "grad_norm": 1.195059895515442, "learning_rate": 1.7422435624645928e-06, "loss": 0.1497955322265625, "step": 5351 }, { "epoch": 0.7457674353793632, "grad_norm": 1.1178419589996338, "learning_rate": 1.7404560575149821e-06, "loss": 0.128814697265625, "step": 5352 }, { "epoch": 0.7459067790705776, "grad_norm": 0.7815138101577759, "learning_rate": 1.7386692767616204e-06, "loss": 0.141876220703125, "step": 5353 }, { "epoch": 0.7460461227617919, "grad_norm": 1.027488350868225, "learning_rate": 1.7368832206014863e-06, "loss": 0.14472579956054688, "step": 5354 }, { "epoch": 0.7461854664530063, "grad_norm": 0.6334220170974731, "learning_rate": 1.735097889431404e-06, "loss": 0.10861778259277344, "step": 5355 }, { "epoch": 0.7463248101442207, "grad_norm": 0.854465663433075, "learning_rate": 1.733313283648032e-06, "loss": 0.12640762329101562, "step": 5356 }, { "epoch": 0.7464641538354351, "grad_norm": 0.7378467917442322, "learning_rate": 1.7315294036478664e-06, "loss": 0.13668060302734375, "step": 5357 }, { "epoch": 0.7466034975266495, "grad_norm": 1.082593321800232, "learning_rate": 1.7297462498272476e-06, "loss": 0.1519927978515625, "step": 5358 }, { "epoch": 0.7467428412178638, "grad_norm": 1.0651792287826538, "learning_rate": 1.727963822582352e-06, "loss": 0.14958572387695312, "step": 5359 }, { "epoch": 0.7468821849090782, "grad_norm": 1.0582122802734375, "learning_rate": 1.7261821223091918e-06, "loss": 0.13100814819335938, "step": 5360 }, { "epoch": 0.7470215286002926, "grad_norm": 1.2815401554107666, "learning_rate": 1.7244011494036228e-06, "loss": 0.17174911499023438, "step": 5361 }, { "epoch": 0.747160872291507, "grad_norm": 1.020308256149292, "learning_rate": 1.722620904261334e-06, "loss": 0.15192413330078125, "step": 5362 }, { "epoch": 0.7473002159827213, "grad_norm": 1.2832958698272705, "learning_rate": 1.720841387277858e-06, "loss": 0.17731857299804688, "step": 5363 }, { "epoch": 0.7474395596739357, "grad_norm": 0.7202019095420837, "learning_rate": 1.7190625988485593e-06, "loss": 0.12737274169921875, "step": 5364 }, { "epoch": 0.7475789033651501, "grad_norm": 1.0243825912475586, "learning_rate": 1.7172845393686465e-06, "loss": 0.16481781005859375, "step": 5365 }, { "epoch": 0.7477182470563645, "grad_norm": 0.6771966814994812, "learning_rate": 1.7155072092331648e-06, "loss": 0.11103057861328125, "step": 5366 }, { "epoch": 0.7478575907475788, "grad_norm": 1.0123804807662964, "learning_rate": 1.7137306088369948e-06, "loss": 0.15483474731445312, "step": 5367 }, { "epoch": 0.7479969344387933, "grad_norm": 1.6664841175079346, "learning_rate": 1.7119547385748552e-06, "loss": 0.217529296875, "step": 5368 }, { "epoch": 0.7481362781300077, "grad_norm": 1.1629995107650757, "learning_rate": 1.7101795988413056e-06, "loss": 0.16800880432128906, "step": 5369 }, { "epoch": 0.7482756218212221, "grad_norm": 0.7260962128639221, "learning_rate": 1.708405190030743e-06, "loss": 0.13204193115234375, "step": 5370 }, { "epoch": 0.7484149655124365, "grad_norm": 0.9295766949653625, "learning_rate": 1.7066315125373984e-06, "loss": 0.1517658233642578, "step": 5371 }, { "epoch": 0.7485543092036508, "grad_norm": 0.9325398802757263, "learning_rate": 1.7048585667553414e-06, "loss": 0.13158035278320312, "step": 5372 }, { "epoch": 0.7486936528948652, "grad_norm": 1.140912413597107, "learning_rate": 1.7030863530784814e-06, "loss": 0.1513214111328125, "step": 5373 }, { "epoch": 0.7488329965860796, "grad_norm": 0.7379797101020813, "learning_rate": 1.7013148719005652e-06, "loss": 0.12959671020507812, "step": 5374 }, { "epoch": 0.748972340277294, "grad_norm": 1.0732492208480835, "learning_rate": 1.6995441236151732e-06, "loss": 0.135223388671875, "step": 5375 }, { "epoch": 0.7491116839685084, "grad_norm": 0.8265062570571899, "learning_rate": 1.6977741086157273e-06, "loss": 0.14032745361328125, "step": 5376 }, { "epoch": 0.7492510276597227, "grad_norm": 0.6909268498420715, "learning_rate": 1.6960048272954821e-06, "loss": 0.12119674682617188, "step": 5377 }, { "epoch": 0.7493903713509371, "grad_norm": 0.707757830619812, "learning_rate": 1.6942362800475343e-06, "loss": 0.13013076782226562, "step": 5378 }, { "epoch": 0.7495297150421515, "grad_norm": 0.7281516194343567, "learning_rate": 1.6924684672648117e-06, "loss": 0.117950439453125, "step": 5379 }, { "epoch": 0.7496690587333659, "grad_norm": 1.0639857053756714, "learning_rate": 1.6907013893400838e-06, "loss": 0.1476459503173828, "step": 5380 }, { "epoch": 0.7498084024245802, "grad_norm": 0.6366891264915466, "learning_rate": 1.6889350466659554e-06, "loss": 0.1219940185546875, "step": 5381 }, { "epoch": 0.7499477461157946, "grad_norm": 0.7990272641181946, "learning_rate": 1.687169439634867e-06, "loss": 0.12345504760742188, "step": 5382 }, { "epoch": 0.750087089807009, "grad_norm": 1.3650912046432495, "learning_rate": 1.6854045686390947e-06, "loss": 0.196441650390625, "step": 5383 }, { "epoch": 0.7502264334982234, "grad_norm": 0.8403595089912415, "learning_rate": 1.6836404340707535e-06, "loss": 0.12954330444335938, "step": 5384 }, { "epoch": 0.7503657771894378, "grad_norm": 0.9580997228622437, "learning_rate": 1.6818770363217957e-06, "loss": 0.11964035034179688, "step": 5385 }, { "epoch": 0.7505051208806521, "grad_norm": 1.3144714832305908, "learning_rate": 1.6801143757840043e-06, "loss": 0.13855743408203125, "step": 5386 }, { "epoch": 0.7506444645718665, "grad_norm": 0.5980182886123657, "learning_rate": 1.678352452849007e-06, "loss": 0.10610580444335938, "step": 5387 }, { "epoch": 0.7507838082630809, "grad_norm": 0.9490573406219482, "learning_rate": 1.6765912679082592e-06, "loss": 0.13824462890625, "step": 5388 }, { "epoch": 0.7509231519542953, "grad_norm": 1.2381455898284912, "learning_rate": 1.6748308213530555e-06, "loss": 0.14493942260742188, "step": 5389 }, { "epoch": 0.7510624956455096, "grad_norm": 0.8799588084220886, "learning_rate": 1.6730711135745287e-06, "loss": 0.13210678100585938, "step": 5390 }, { "epoch": 0.751201839336724, "grad_norm": 0.98264080286026, "learning_rate": 1.6713121449636471e-06, "loss": 0.12784957885742188, "step": 5391 }, { "epoch": 0.7513411830279384, "grad_norm": 1.3515353202819824, "learning_rate": 1.6695539159112112e-06, "loss": 0.1746368408203125, "step": 5392 }, { "epoch": 0.7514805267191528, "grad_norm": 1.158768892288208, "learning_rate": 1.6677964268078584e-06, "loss": 0.13082504272460938, "step": 5393 }, { "epoch": 0.7516198704103672, "grad_norm": 0.6540553569793701, "learning_rate": 1.666039678044064e-06, "loss": 0.11543655395507812, "step": 5394 }, { "epoch": 0.7517592141015815, "grad_norm": 1.1255905628204346, "learning_rate": 1.6642836700101396e-06, "loss": 0.15705490112304688, "step": 5395 }, { "epoch": 0.7518985577927959, "grad_norm": 0.7503942251205444, "learning_rate": 1.6625284030962257e-06, "loss": 0.12478065490722656, "step": 5396 }, { "epoch": 0.7520379014840103, "grad_norm": 1.1435260772705078, "learning_rate": 1.6607738776923072e-06, "loss": 0.14915847778320312, "step": 5397 }, { "epoch": 0.7521772451752247, "grad_norm": 1.3554505109786987, "learning_rate": 1.659020094188195e-06, "loss": 0.16341018676757812, "step": 5398 }, { "epoch": 0.752316588866439, "grad_norm": 0.6825370788574219, "learning_rate": 1.657267052973544e-06, "loss": 0.11175537109375, "step": 5399 }, { "epoch": 0.7524559325576534, "grad_norm": 0.9489462375640869, "learning_rate": 1.6555147544378364e-06, "loss": 0.12943267822265625, "step": 5400 }, { "epoch": 0.7525952762488678, "grad_norm": 0.9111690521240234, "learning_rate": 1.653763198970394e-06, "loss": 0.12921714782714844, "step": 5401 }, { "epoch": 0.7527346199400822, "grad_norm": 1.0975675582885742, "learning_rate": 1.652012386960375e-06, "loss": 0.15265274047851562, "step": 5402 }, { "epoch": 0.7528739636312965, "grad_norm": 0.7650169134140015, "learning_rate": 1.6502623187967675e-06, "loss": 0.11459732055664062, "step": 5403 }, { "epoch": 0.7530133073225109, "grad_norm": 0.779686450958252, "learning_rate": 1.6485129948683954e-06, "loss": 0.12681961059570312, "step": 5404 }, { "epoch": 0.7531526510137253, "grad_norm": 0.9621797800064087, "learning_rate": 1.64676441556392e-06, "loss": 0.13734817504882812, "step": 5405 }, { "epoch": 0.7532919947049397, "grad_norm": 0.8225488662719727, "learning_rate": 1.6450165812718377e-06, "loss": 0.1310749053955078, "step": 5406 }, { "epoch": 0.7534313383961541, "grad_norm": 0.7760523557662964, "learning_rate": 1.643269492380473e-06, "loss": 0.13247299194335938, "step": 5407 }, { "epoch": 0.7535706820873685, "grad_norm": 0.6851054430007935, "learning_rate": 1.6415231492779942e-06, "loss": 0.118621826171875, "step": 5408 }, { "epoch": 0.7537100257785829, "grad_norm": 0.77647465467453, "learning_rate": 1.6397775523523946e-06, "loss": 0.13808822631835938, "step": 5409 }, { "epoch": 0.7538493694697973, "grad_norm": 0.9619609117507935, "learning_rate": 1.6380327019915088e-06, "loss": 0.15932655334472656, "step": 5410 }, { "epoch": 0.7539887131610117, "grad_norm": 0.884662389755249, "learning_rate": 1.6362885985830001e-06, "loss": 0.14537811279296875, "step": 5411 }, { "epoch": 0.7541280568522261, "grad_norm": 0.5255986452102661, "learning_rate": 1.6345452425143705e-06, "loss": 0.1092987060546875, "step": 5412 }, { "epoch": 0.7542674005434404, "grad_norm": 1.1306719779968262, "learning_rate": 1.6328026341729547e-06, "loss": 0.14992141723632812, "step": 5413 }, { "epoch": 0.7544067442346548, "grad_norm": 1.1362215280532837, "learning_rate": 1.6310607739459188e-06, "loss": 0.1435699462890625, "step": 5414 }, { "epoch": 0.7545460879258692, "grad_norm": 1.382681965827942, "learning_rate": 1.6293196622202635e-06, "loss": 0.18379592895507812, "step": 5415 }, { "epoch": 0.7546854316170836, "grad_norm": 0.8419610857963562, "learning_rate": 1.6275792993828249e-06, "loss": 0.144775390625, "step": 5416 }, { "epoch": 0.754824775308298, "grad_norm": 0.9698936939239502, "learning_rate": 1.6258396858202746e-06, "loss": 0.15292739868164062, "step": 5417 }, { "epoch": 0.7549641189995123, "grad_norm": 0.7519508600234985, "learning_rate": 1.6241008219191107e-06, "loss": 0.12266921997070312, "step": 5418 }, { "epoch": 0.7551034626907267, "grad_norm": 0.8582730889320374, "learning_rate": 1.622362708065673e-06, "loss": 0.13349151611328125, "step": 5419 }, { "epoch": 0.7552428063819411, "grad_norm": 1.4235323667526245, "learning_rate": 1.6206253446461278e-06, "loss": 0.1570281982421875, "step": 5420 }, { "epoch": 0.7553821500731555, "grad_norm": 0.6906813979148865, "learning_rate": 1.618888732046478e-06, "loss": 0.1213979721069336, "step": 5421 }, { "epoch": 0.7555214937643698, "grad_norm": 1.398767352104187, "learning_rate": 1.6171528706525596e-06, "loss": 0.17208099365234375, "step": 5422 }, { "epoch": 0.7556608374555842, "grad_norm": 0.6955111026763916, "learning_rate": 1.6154177608500415e-06, "loss": 0.13138961791992188, "step": 5423 }, { "epoch": 0.7558001811467986, "grad_norm": 0.9417698383331299, "learning_rate": 1.6136834030244292e-06, "loss": 0.16875076293945312, "step": 5424 }, { "epoch": 0.755939524838013, "grad_norm": 1.4163187742233276, "learning_rate": 1.61194979756105e-06, "loss": 0.15443038940429688, "step": 5425 }, { "epoch": 0.7560788685292273, "grad_norm": 1.1372272968292236, "learning_rate": 1.6102169448450756e-06, "loss": 0.15358352661132812, "step": 5426 }, { "epoch": 0.7562182122204417, "grad_norm": 0.7378349900245667, "learning_rate": 1.6084848452615076e-06, "loss": 0.11663818359375, "step": 5427 }, { "epoch": 0.7563575559116561, "grad_norm": 0.830532968044281, "learning_rate": 1.6067534991951754e-06, "loss": 0.12522506713867188, "step": 5428 }, { "epoch": 0.7564968996028705, "grad_norm": 1.0714229345321655, "learning_rate": 1.6050229070307488e-06, "loss": 0.1534881591796875, "step": 5429 }, { "epoch": 0.7566362432940849, "grad_norm": 1.14556086063385, "learning_rate": 1.6032930691527214e-06, "loss": 0.1446380615234375, "step": 5430 }, { "epoch": 0.7567755869852992, "grad_norm": 1.4217283725738525, "learning_rate": 1.6015639859454278e-06, "loss": 0.14498519897460938, "step": 5431 }, { "epoch": 0.7569149306765136, "grad_norm": 1.0668702125549316, "learning_rate": 1.5998356577930274e-06, "loss": 0.15872955322265625, "step": 5432 }, { "epoch": 0.757054274367728, "grad_norm": 1.7072901725769043, "learning_rate": 1.5981080850795171e-06, "loss": 0.16789627075195312, "step": 5433 }, { "epoch": 0.7571936180589424, "grad_norm": 0.7022960782051086, "learning_rate": 1.5963812681887248e-06, "loss": 0.12961959838867188, "step": 5434 }, { "epoch": 0.7573329617501567, "grad_norm": 0.6806585788726807, "learning_rate": 1.5946552075043092e-06, "loss": 0.12675094604492188, "step": 5435 }, { "epoch": 0.7574723054413711, "grad_norm": 1.6702406406402588, "learning_rate": 1.592929903409759e-06, "loss": 0.15476226806640625, "step": 5436 }, { "epoch": 0.7576116491325855, "grad_norm": 1.6023293733596802, "learning_rate": 1.5912053562884e-06, "loss": 0.18201065063476562, "step": 5437 }, { "epoch": 0.7577509928237999, "grad_norm": 1.0602517127990723, "learning_rate": 1.589481566523388e-06, "loss": 0.14069175720214844, "step": 5438 }, { "epoch": 0.7578903365150143, "grad_norm": 0.791073739528656, "learning_rate": 1.587758534497707e-06, "loss": 0.13356971740722656, "step": 5439 }, { "epoch": 0.7580296802062286, "grad_norm": 1.015844702720642, "learning_rate": 1.5860362605941788e-06, "loss": 0.10285186767578125, "step": 5440 }, { "epoch": 0.758169023897443, "grad_norm": 1.0957016944885254, "learning_rate": 1.5843147451954493e-06, "loss": 0.15148162841796875, "step": 5441 }, { "epoch": 0.7583083675886574, "grad_norm": 1.4355244636535645, "learning_rate": 1.5825939886840036e-06, "loss": 0.1943035125732422, "step": 5442 }, { "epoch": 0.7584477112798718, "grad_norm": 0.9310797452926636, "learning_rate": 1.5808739914421512e-06, "loss": 0.16303634643554688, "step": 5443 }, { "epoch": 0.7585870549710861, "grad_norm": 0.9581086039543152, "learning_rate": 1.5791547538520386e-06, "loss": 0.13241004943847656, "step": 5444 }, { "epoch": 0.7587263986623005, "grad_norm": 0.7371009588241577, "learning_rate": 1.5774362762956414e-06, "loss": 0.14016342163085938, "step": 5445 }, { "epoch": 0.7588657423535149, "grad_norm": 1.1393882036209106, "learning_rate": 1.5757185591547653e-06, "loss": 0.13463973999023438, "step": 5446 }, { "epoch": 0.7590050860447293, "grad_norm": 0.8822434544563293, "learning_rate": 1.574001602811046e-06, "loss": 0.11289215087890625, "step": 5447 }, { "epoch": 0.7591444297359438, "grad_norm": 0.73545241355896, "learning_rate": 1.5722854076459538e-06, "loss": 0.11185455322265625, "step": 5448 }, { "epoch": 0.7592837734271581, "grad_norm": 1.1699780225753784, "learning_rate": 1.57056997404079e-06, "loss": 0.15804290771484375, "step": 5449 }, { "epoch": 0.7594231171183725, "grad_norm": 1.1900758743286133, "learning_rate": 1.5688553023766823e-06, "loss": 0.17783355712890625, "step": 5450 }, { "epoch": 0.7595624608095869, "grad_norm": 1.0006657838821411, "learning_rate": 1.5671413930345902e-06, "loss": 0.145233154296875, "step": 5451 }, { "epoch": 0.7597018045008013, "grad_norm": 1.085832953453064, "learning_rate": 1.5654282463953074e-06, "loss": 0.1576557159423828, "step": 5452 }, { "epoch": 0.7598411481920156, "grad_norm": 1.3366812467575073, "learning_rate": 1.5637158628394572e-06, "loss": 0.1613941192626953, "step": 5453 }, { "epoch": 0.75998049188323, "grad_norm": 1.6850991249084473, "learning_rate": 1.5620042427474892e-06, "loss": 0.16277694702148438, "step": 5454 }, { "epoch": 0.7601198355744444, "grad_norm": 1.0243849754333496, "learning_rate": 1.5602933864996872e-06, "loss": 0.16393661499023438, "step": 5455 }, { "epoch": 0.7602591792656588, "grad_norm": 0.6834743022918701, "learning_rate": 1.5585832944761686e-06, "loss": 0.12429428100585938, "step": 5456 }, { "epoch": 0.7603985229568732, "grad_norm": 0.9805715680122375, "learning_rate": 1.5568739670568693e-06, "loss": 0.15984725952148438, "step": 5457 }, { "epoch": 0.7605378666480875, "grad_norm": 0.46164730191230774, "learning_rate": 1.555165404621567e-06, "loss": 0.1029510498046875, "step": 5458 }, { "epoch": 0.7606772103393019, "grad_norm": 0.8300297856330872, "learning_rate": 1.5534576075498664e-06, "loss": 0.1345081329345703, "step": 5459 }, { "epoch": 0.7608165540305163, "grad_norm": 0.6996000409126282, "learning_rate": 1.5517505762211982e-06, "loss": 0.11960220336914062, "step": 5460 }, { "epoch": 0.7609558977217307, "grad_norm": 0.6801254749298096, "learning_rate": 1.5500443110148283e-06, "loss": 0.13057327270507812, "step": 5461 }, { "epoch": 0.761095241412945, "grad_norm": 1.129528284072876, "learning_rate": 1.5483388123098474e-06, "loss": 0.16381072998046875, "step": 5462 }, { "epoch": 0.7612345851041594, "grad_norm": 0.8665534257888794, "learning_rate": 1.546634080485181e-06, "loss": 0.13869857788085938, "step": 5463 }, { "epoch": 0.7613739287953738, "grad_norm": 1.093444585800171, "learning_rate": 1.5449301159195785e-06, "loss": 0.15926742553710938, "step": 5464 }, { "epoch": 0.7615132724865882, "grad_norm": 0.9164952635765076, "learning_rate": 1.5432269189916237e-06, "loss": 0.11923599243164062, "step": 5465 }, { "epoch": 0.7616526161778026, "grad_norm": 0.8278144598007202, "learning_rate": 1.54152449007973e-06, "loss": 0.1300506591796875, "step": 5466 }, { "epoch": 0.7617919598690169, "grad_norm": 0.6610721349716187, "learning_rate": 1.539822829562136e-06, "loss": 0.12216758728027344, "step": 5467 }, { "epoch": 0.7619313035602313, "grad_norm": 0.9473404288291931, "learning_rate": 1.5381219378169103e-06, "loss": 0.1268482208251953, "step": 5468 }, { "epoch": 0.7620706472514457, "grad_norm": 1.5537971258163452, "learning_rate": 1.5364218152219545e-06, "loss": 0.19925880432128906, "step": 5469 }, { "epoch": 0.7622099909426601, "grad_norm": 0.6491141319274902, "learning_rate": 1.5347224621549978e-06, "loss": 0.12490081787109375, "step": 5470 }, { "epoch": 0.7623493346338744, "grad_norm": 1.3948813676834106, "learning_rate": 1.5330238789935963e-06, "loss": 0.15345382690429688, "step": 5471 }, { "epoch": 0.7624886783250888, "grad_norm": 0.8373867273330688, "learning_rate": 1.5313260661151352e-06, "loss": 0.12703704833984375, "step": 5472 }, { "epoch": 0.7626280220163032, "grad_norm": 0.7078614234924316, "learning_rate": 1.5296290238968303e-06, "loss": 0.12986373901367188, "step": 5473 }, { "epoch": 0.7627673657075176, "grad_norm": 0.9124235510826111, "learning_rate": 1.5279327527157289e-06, "loss": 0.13906478881835938, "step": 5474 }, { "epoch": 0.762906709398732, "grad_norm": 1.220609426498413, "learning_rate": 1.526237252948699e-06, "loss": 0.14171981811523438, "step": 5475 }, { "epoch": 0.7630460530899463, "grad_norm": 1.6237688064575195, "learning_rate": 1.5245425249724443e-06, "loss": 0.17183303833007812, "step": 5476 }, { "epoch": 0.7631853967811607, "grad_norm": 0.8942971229553223, "learning_rate": 1.5228485691634964e-06, "loss": 0.1294269561767578, "step": 5477 }, { "epoch": 0.7633247404723751, "grad_norm": 1.0552432537078857, "learning_rate": 1.5211553858982115e-06, "loss": 0.14789581298828125, "step": 5478 }, { "epoch": 0.7634640841635895, "grad_norm": 0.74024498462677, "learning_rate": 1.5194629755527746e-06, "loss": 0.1389923095703125, "step": 5479 }, { "epoch": 0.7636034278548038, "grad_norm": 0.7177631855010986, "learning_rate": 1.517771338503203e-06, "loss": 0.1181182861328125, "step": 5480 }, { "epoch": 0.7637427715460182, "grad_norm": 0.9001690745353699, "learning_rate": 1.5160804751253405e-06, "loss": 0.13263702392578125, "step": 5481 }, { "epoch": 0.7638821152372326, "grad_norm": 0.8118863105773926, "learning_rate": 1.5143903857948572e-06, "loss": 0.13600921630859375, "step": 5482 }, { "epoch": 0.764021458928447, "grad_norm": 0.8580214381217957, "learning_rate": 1.5127010708872513e-06, "loss": 0.143585205078125, "step": 5483 }, { "epoch": 0.7641608026196614, "grad_norm": 0.7345266938209534, "learning_rate": 1.5110125307778506e-06, "loss": 0.13299179077148438, "step": 5484 }, { "epoch": 0.7643001463108757, "grad_norm": 0.7781555652618408, "learning_rate": 1.5093247658418125e-06, "loss": 0.1387042999267578, "step": 5485 }, { "epoch": 0.7644394900020901, "grad_norm": 0.9293884038925171, "learning_rate": 1.5076377764541162e-06, "loss": 0.14781951904296875, "step": 5486 }, { "epoch": 0.7645788336933045, "grad_norm": 1.017266869544983, "learning_rate": 1.5059515629895754e-06, "loss": 0.1559429168701172, "step": 5487 }, { "epoch": 0.764718177384519, "grad_norm": 0.8183402419090271, "learning_rate": 1.5042661258228268e-06, "loss": 0.13329339027404785, "step": 5488 }, { "epoch": 0.7648575210757333, "grad_norm": 1.0675241947174072, "learning_rate": 1.502581465328335e-06, "loss": 0.14049720764160156, "step": 5489 }, { "epoch": 0.7649968647669477, "grad_norm": 1.4446911811828613, "learning_rate": 1.5008975818803939e-06, "loss": 0.13704299926757812, "step": 5490 }, { "epoch": 0.7651362084581621, "grad_norm": 0.5869758129119873, "learning_rate": 1.4992144758531257e-06, "loss": 0.10643768310546875, "step": 5491 }, { "epoch": 0.7652755521493765, "grad_norm": 0.9683411717414856, "learning_rate": 1.4975321476204767e-06, "loss": 0.1495361328125, "step": 5492 }, { "epoch": 0.7654148958405909, "grad_norm": 0.7811377048492432, "learning_rate": 1.4958505975562205e-06, "loss": 0.13773345947265625, "step": 5493 }, { "epoch": 0.7655542395318052, "grad_norm": 0.8119271397590637, "learning_rate": 1.49416982603396e-06, "loss": 0.12248992919921875, "step": 5494 }, { "epoch": 0.7656935832230196, "grad_norm": 1.250072717666626, "learning_rate": 1.4924898334271265e-06, "loss": 0.128509521484375, "step": 5495 }, { "epoch": 0.765832926914234, "grad_norm": 0.9456384778022766, "learning_rate": 1.4908106201089722e-06, "loss": 0.15250778198242188, "step": 5496 }, { "epoch": 0.7659722706054484, "grad_norm": 1.3616865873336792, "learning_rate": 1.4891321864525826e-06, "loss": 0.1427288055419922, "step": 5497 }, { "epoch": 0.7661116142966627, "grad_norm": 0.8144639730453491, "learning_rate": 1.4874545328308681e-06, "loss": 0.13147735595703125, "step": 5498 }, { "epoch": 0.7662509579878771, "grad_norm": 0.7429901361465454, "learning_rate": 1.4857776596165635e-06, "loss": 0.12490463256835938, "step": 5499 }, { "epoch": 0.7663903016790915, "grad_norm": 0.6315878629684448, "learning_rate": 1.4841015671822306e-06, "loss": 0.10296630859375, "step": 5500 }, { "epoch": 0.7665296453703059, "grad_norm": 1.1695696115493774, "learning_rate": 1.4824262559002595e-06, "loss": 0.15813446044921875, "step": 5501 }, { "epoch": 0.7666689890615203, "grad_norm": 1.3135753870010376, "learning_rate": 1.480751726142869e-06, "loss": 0.1898651123046875, "step": 5502 }, { "epoch": 0.7668083327527346, "grad_norm": 0.48398357629776, "learning_rate": 1.4790779782820991e-06, "loss": 0.09849357604980469, "step": 5503 }, { "epoch": 0.766947676443949, "grad_norm": 1.2316724061965942, "learning_rate": 1.4774050126898164e-06, "loss": 0.16327285766601562, "step": 5504 }, { "epoch": 0.7670870201351634, "grad_norm": 0.8373910188674927, "learning_rate": 1.4757328297377177e-06, "loss": 0.12280654907226562, "step": 5505 }, { "epoch": 0.7672263638263778, "grad_norm": 1.1838322877883911, "learning_rate": 1.474061429797326e-06, "loss": 0.13732147216796875, "step": 5506 }, { "epoch": 0.7673657075175921, "grad_norm": 0.7799462080001831, "learning_rate": 1.4723908132399838e-06, "loss": 0.13280105590820312, "step": 5507 }, { "epoch": 0.7675050512088065, "grad_norm": 0.9959344863891602, "learning_rate": 1.4707209804368683e-06, "loss": 0.11254119873046875, "step": 5508 }, { "epoch": 0.7676443949000209, "grad_norm": 0.6451816558837891, "learning_rate": 1.4690519317589742e-06, "loss": 0.11561203002929688, "step": 5509 }, { "epoch": 0.7677837385912353, "grad_norm": 0.869405210018158, "learning_rate": 1.4673836675771298e-06, "loss": 0.1421356201171875, "step": 5510 }, { "epoch": 0.7679230822824497, "grad_norm": 0.6490334868431091, "learning_rate": 1.4657161882619814e-06, "loss": 0.11764144897460938, "step": 5511 }, { "epoch": 0.768062425973664, "grad_norm": 0.8988577723503113, "learning_rate": 1.4640494941840072e-06, "loss": 0.14923858642578125, "step": 5512 }, { "epoch": 0.7682017696648784, "grad_norm": 1.3531365394592285, "learning_rate": 1.4623835857135099e-06, "loss": 0.1477813720703125, "step": 5513 }, { "epoch": 0.7683411133560928, "grad_norm": 1.7295721769332886, "learning_rate": 1.460718463220615e-06, "loss": 0.18188858032226562, "step": 5514 }, { "epoch": 0.7684804570473072, "grad_norm": 1.2129852771759033, "learning_rate": 1.4590541270752723e-06, "loss": 0.15301513671875, "step": 5515 }, { "epoch": 0.7686198007385215, "grad_norm": 0.78243488073349, "learning_rate": 1.457390577647262e-06, "loss": 0.14423370361328125, "step": 5516 }, { "epoch": 0.7687591444297359, "grad_norm": 0.7047964334487915, "learning_rate": 1.455727815306187e-06, "loss": 0.12993240356445312, "step": 5517 }, { "epoch": 0.7688984881209503, "grad_norm": 0.9094959497451782, "learning_rate": 1.454065840421473e-06, "loss": 0.14458084106445312, "step": 5518 }, { "epoch": 0.7690378318121647, "grad_norm": 1.2284274101257324, "learning_rate": 1.4524046533623758e-06, "loss": 0.16557693481445312, "step": 5519 }, { "epoch": 0.769177175503379, "grad_norm": 1.0292564630508423, "learning_rate": 1.450744254497972e-06, "loss": 0.15155410766601562, "step": 5520 }, { "epoch": 0.7693165191945934, "grad_norm": 0.9527946710586548, "learning_rate": 1.4490846441971624e-06, "loss": 0.142303466796875, "step": 5521 }, { "epoch": 0.7694558628858078, "grad_norm": 0.8348268866539001, "learning_rate": 1.4474258228286758e-06, "loss": 0.1470489501953125, "step": 5522 }, { "epoch": 0.7695952065770222, "grad_norm": 0.6516096591949463, "learning_rate": 1.4457677907610646e-06, "loss": 0.11794471740722656, "step": 5523 }, { "epoch": 0.7697345502682366, "grad_norm": 0.7689725756645203, "learning_rate": 1.4441105483627088e-06, "loss": 0.12737274169921875, "step": 5524 }, { "epoch": 0.7698738939594509, "grad_norm": 0.5631232261657715, "learning_rate": 1.442454096001804e-06, "loss": 0.11978530883789062, "step": 5525 }, { "epoch": 0.7700132376506653, "grad_norm": 1.9442036151885986, "learning_rate": 1.4407984340463794e-06, "loss": 0.2019824981689453, "step": 5526 }, { "epoch": 0.7701525813418797, "grad_norm": 1.2051371335983276, "learning_rate": 1.4391435628642853e-06, "loss": 0.15362167358398438, "step": 5527 }, { "epoch": 0.7702919250330941, "grad_norm": 1.0974640846252441, "learning_rate": 1.437489482823195e-06, "loss": 0.16204833984375, "step": 5528 }, { "epoch": 0.7704312687243086, "grad_norm": 1.1690514087677002, "learning_rate": 1.4358361942906097e-06, "loss": 0.14013290405273438, "step": 5529 }, { "epoch": 0.7705706124155229, "grad_norm": 1.3237452507019043, "learning_rate": 1.4341836976338485e-06, "loss": 0.16051101684570312, "step": 5530 }, { "epoch": 0.7707099561067373, "grad_norm": 1.0317885875701904, "learning_rate": 1.4325319932200631e-06, "loss": 0.12051010131835938, "step": 5531 }, { "epoch": 0.7708492997979517, "grad_norm": 1.0572196245193481, "learning_rate": 1.43088108141622e-06, "loss": 0.14560317993164062, "step": 5532 }, { "epoch": 0.7709886434891661, "grad_norm": 0.8194844722747803, "learning_rate": 1.4292309625891166e-06, "loss": 0.14234542846679688, "step": 5533 }, { "epoch": 0.7711279871803804, "grad_norm": 0.8233112692832947, "learning_rate": 1.4275816371053725e-06, "loss": 0.14629364013671875, "step": 5534 }, { "epoch": 0.7712673308715948, "grad_norm": 0.6776682734489441, "learning_rate": 1.425933105331429e-06, "loss": 0.11760330200195312, "step": 5535 }, { "epoch": 0.7714066745628092, "grad_norm": 2.170128345489502, "learning_rate": 1.424285367633551e-06, "loss": 0.15884780883789062, "step": 5536 }, { "epoch": 0.7715460182540236, "grad_norm": 1.1651158332824707, "learning_rate": 1.422638424377829e-06, "loss": 0.16214752197265625, "step": 5537 }, { "epoch": 0.771685361945238, "grad_norm": 1.0274637937545776, "learning_rate": 1.420992275930178e-06, "loss": 0.1421661376953125, "step": 5538 }, { "epoch": 0.7718247056364523, "grad_norm": 1.1126787662506104, "learning_rate": 1.4193469226563322e-06, "loss": 0.1458272933959961, "step": 5539 }, { "epoch": 0.7719640493276667, "grad_norm": 0.6349291801452637, "learning_rate": 1.4177023649218536e-06, "loss": 0.11780166625976562, "step": 5540 }, { "epoch": 0.7721033930188811, "grad_norm": 0.5387141108512878, "learning_rate": 1.4160586030921224e-06, "loss": 0.11107826232910156, "step": 5541 }, { "epoch": 0.7722427367100955, "grad_norm": 0.8511763215065002, "learning_rate": 1.4144156375323486e-06, "loss": 0.12754058837890625, "step": 5542 }, { "epoch": 0.7723820804013098, "grad_norm": 0.8688876628875732, "learning_rate": 1.4127734686075589e-06, "loss": 0.13999176025390625, "step": 5543 }, { "epoch": 0.7725214240925242, "grad_norm": 0.9753671884536743, "learning_rate": 1.411132096682606e-06, "loss": 0.131134033203125, "step": 5544 }, { "epoch": 0.7726607677837386, "grad_norm": 1.0024209022521973, "learning_rate": 1.4094915221221677e-06, "loss": 0.13153076171875, "step": 5545 }, { "epoch": 0.772800111474953, "grad_norm": 1.7166625261306763, "learning_rate": 1.4078517452907403e-06, "loss": 0.16414642333984375, "step": 5546 }, { "epoch": 0.7729394551661674, "grad_norm": 0.9161235690116882, "learning_rate": 1.4062127665526438e-06, "loss": 0.1297607421875, "step": 5547 }, { "epoch": 0.7730787988573817, "grad_norm": 0.7860314249992371, "learning_rate": 1.4045745862720227e-06, "loss": 0.12732696533203125, "step": 5548 }, { "epoch": 0.7732181425485961, "grad_norm": 1.6111400127410889, "learning_rate": 1.4029372048128454e-06, "loss": 0.19945144653320312, "step": 5549 }, { "epoch": 0.7733574862398105, "grad_norm": 0.939275860786438, "learning_rate": 1.401300622538897e-06, "loss": 0.12966537475585938, "step": 5550 }, { "epoch": 0.7734968299310249, "grad_norm": 0.9993011951446533, "learning_rate": 1.3996648398137924e-06, "loss": 0.1593017578125, "step": 5551 }, { "epoch": 0.7736361736222392, "grad_norm": 0.712755560874939, "learning_rate": 1.398029857000962e-06, "loss": 0.1211700439453125, "step": 5552 }, { "epoch": 0.7737755173134536, "grad_norm": 1.069273829460144, "learning_rate": 1.3963956744636642e-06, "loss": 0.16070175170898438, "step": 5553 }, { "epoch": 0.773914861004668, "grad_norm": 0.7044382691383362, "learning_rate": 1.394762292564974e-06, "loss": 0.11480331420898438, "step": 5554 }, { "epoch": 0.7740542046958824, "grad_norm": 1.0227853059768677, "learning_rate": 1.393129711667794e-06, "loss": 0.13634872436523438, "step": 5555 }, { "epoch": 0.7741935483870968, "grad_norm": 0.8426199555397034, "learning_rate": 1.3914979321348488e-06, "loss": 0.13033294677734375, "step": 5556 }, { "epoch": 0.7743328920783111, "grad_norm": 0.7944507598876953, "learning_rate": 1.3898669543286763e-06, "loss": 0.12474632263183594, "step": 5557 }, { "epoch": 0.7744722357695255, "grad_norm": 0.7607647776603699, "learning_rate": 1.3882367786116458e-06, "loss": 0.12060928344726562, "step": 5558 }, { "epoch": 0.7746115794607399, "grad_norm": 0.9085588455200195, "learning_rate": 1.3866074053459465e-06, "loss": 0.13416671752929688, "step": 5559 }, { "epoch": 0.7747509231519543, "grad_norm": 1.0918396711349487, "learning_rate": 1.3849788348935856e-06, "loss": 0.13129425048828125, "step": 5560 }, { "epoch": 0.7748902668431686, "grad_norm": 0.7830016613006592, "learning_rate": 1.3833510676163963e-06, "loss": 0.12625885009765625, "step": 5561 }, { "epoch": 0.775029610534383, "grad_norm": 0.9012940526008606, "learning_rate": 1.3817241038760287e-06, "loss": 0.12970352172851562, "step": 5562 }, { "epoch": 0.7751689542255974, "grad_norm": 1.0658035278320312, "learning_rate": 1.3800979440339602e-06, "loss": 0.13155555725097656, "step": 5563 }, { "epoch": 0.7753082979168118, "grad_norm": 0.8664523959159851, "learning_rate": 1.3784725884514833e-06, "loss": 0.14529037475585938, "step": 5564 }, { "epoch": 0.7754476416080262, "grad_norm": 1.3420112133026123, "learning_rate": 1.3768480374897163e-06, "loss": 0.179962158203125, "step": 5565 }, { "epoch": 0.7755869852992405, "grad_norm": 1.4036198854446411, "learning_rate": 1.3752242915095993e-06, "loss": 0.1736907958984375, "step": 5566 }, { "epoch": 0.7757263289904549, "grad_norm": 0.9469723701477051, "learning_rate": 1.3736013508718892e-06, "loss": 0.13090896606445312, "step": 5567 }, { "epoch": 0.7758656726816693, "grad_norm": 0.8279178738594055, "learning_rate": 1.371979215937166e-06, "loss": 0.11737442016601562, "step": 5568 }, { "epoch": 0.7760050163728838, "grad_norm": 1.3289952278137207, "learning_rate": 1.3703578870658312e-06, "loss": 0.1645526885986328, "step": 5569 }, { "epoch": 0.7761443600640981, "grad_norm": 0.9267190098762512, "learning_rate": 1.3687373646181095e-06, "loss": 0.14470291137695312, "step": 5570 }, { "epoch": 0.7762837037553125, "grad_norm": 0.9346184730529785, "learning_rate": 1.3671176489540406e-06, "loss": 0.13722610473632812, "step": 5571 }, { "epoch": 0.7764230474465269, "grad_norm": 1.0320396423339844, "learning_rate": 1.3654987404334917e-06, "loss": 0.14646530151367188, "step": 5572 }, { "epoch": 0.7765623911377413, "grad_norm": 0.6648268103599548, "learning_rate": 1.363880639416144e-06, "loss": 0.09629440307617188, "step": 5573 }, { "epoch": 0.7767017348289557, "grad_norm": 0.9039913415908813, "learning_rate": 1.3622633462615058e-06, "loss": 0.13562393188476562, "step": 5574 }, { "epoch": 0.77684107852017, "grad_norm": 1.4060742855072021, "learning_rate": 1.3606468613288997e-06, "loss": 0.14122390747070312, "step": 5575 }, { "epoch": 0.7769804222113844, "grad_norm": 0.8977643847465515, "learning_rate": 1.359031184977473e-06, "loss": 0.12440299987792969, "step": 5576 }, { "epoch": 0.7771197659025988, "grad_norm": 1.0766831636428833, "learning_rate": 1.3574163175661936e-06, "loss": 0.1491546630859375, "step": 5577 }, { "epoch": 0.7772591095938132, "grad_norm": 1.021713137626648, "learning_rate": 1.3558022594538473e-06, "loss": 0.1465778350830078, "step": 5578 }, { "epoch": 0.7773984532850275, "grad_norm": 1.0969668626785278, "learning_rate": 1.3541890109990386e-06, "loss": 0.17201995849609375, "step": 5579 }, { "epoch": 0.7775377969762419, "grad_norm": 1.0868464708328247, "learning_rate": 1.3525765725601964e-06, "loss": 0.13716506958007812, "step": 5580 }, { "epoch": 0.7776771406674563, "grad_norm": 1.2432324886322021, "learning_rate": 1.3509649444955697e-06, "loss": 0.165191650390625, "step": 5581 }, { "epoch": 0.7778164843586707, "grad_norm": 1.0967553853988647, "learning_rate": 1.3493541271632227e-06, "loss": 0.17171096801757812, "step": 5582 }, { "epoch": 0.7779558280498851, "grad_norm": 0.8603468537330627, "learning_rate": 1.3477441209210418e-06, "loss": 0.12735366821289062, "step": 5583 }, { "epoch": 0.7780951717410994, "grad_norm": 0.9331943988800049, "learning_rate": 1.3461349261267347e-06, "loss": 0.13730239868164062, "step": 5584 }, { "epoch": 0.7782345154323138, "grad_norm": 0.6111783385276794, "learning_rate": 1.3445265431378297e-06, "loss": 0.11836624145507812, "step": 5585 }, { "epoch": 0.7783738591235282, "grad_norm": 0.8336743712425232, "learning_rate": 1.3429189723116693e-06, "loss": 0.14456939697265625, "step": 5586 }, { "epoch": 0.7785132028147426, "grad_norm": 1.017134428024292, "learning_rate": 1.3413122140054219e-06, "loss": 0.12480926513671875, "step": 5587 }, { "epoch": 0.7786525465059569, "grad_norm": 0.864697277545929, "learning_rate": 1.3397062685760715e-06, "loss": 0.13776206970214844, "step": 5588 }, { "epoch": 0.7787918901971713, "grad_norm": 1.0818603038787842, "learning_rate": 1.3381011363804208e-06, "loss": 0.14539337158203125, "step": 5589 }, { "epoch": 0.7789312338883857, "grad_norm": 0.9116232395172119, "learning_rate": 1.3364968177750953e-06, "loss": 0.131561279296875, "step": 5590 }, { "epoch": 0.7790705775796001, "grad_norm": 1.0718753337860107, "learning_rate": 1.3348933131165387e-06, "loss": 0.13109970092773438, "step": 5591 }, { "epoch": 0.7792099212708145, "grad_norm": 0.8127289414405823, "learning_rate": 1.333290622761011e-06, "loss": 0.12009811401367188, "step": 5592 }, { "epoch": 0.7793492649620288, "grad_norm": 1.0405969619750977, "learning_rate": 1.3316887470645956e-06, "loss": 0.14670562744140625, "step": 5593 }, { "epoch": 0.7794886086532432, "grad_norm": 0.5692636370658875, "learning_rate": 1.3300876863831903e-06, "loss": 0.11525726318359375, "step": 5594 }, { "epoch": 0.7796279523444576, "grad_norm": 1.095960259437561, "learning_rate": 1.3284874410725174e-06, "loss": 0.1590423583984375, "step": 5595 }, { "epoch": 0.779767296035672, "grad_norm": 1.4307526350021362, "learning_rate": 1.3268880114881112e-06, "loss": 0.1443033218383789, "step": 5596 }, { "epoch": 0.7799066397268863, "grad_norm": 0.8318306803703308, "learning_rate": 1.3252893979853304e-06, "loss": 0.11407852172851562, "step": 5597 }, { "epoch": 0.7800459834181007, "grad_norm": 0.7173914909362793, "learning_rate": 1.3236916009193517e-06, "loss": 0.13141632080078125, "step": 5598 }, { "epoch": 0.7801853271093151, "grad_norm": 1.365767240524292, "learning_rate": 1.3220946206451678e-06, "loss": 0.1469707489013672, "step": 5599 }, { "epoch": 0.7803246708005295, "grad_norm": 0.9346945285797119, "learning_rate": 1.3204984575175893e-06, "loss": 0.14035797119140625, "step": 5600 }, { "epoch": 0.7804640144917439, "grad_norm": 1.0478492975234985, "learning_rate": 1.31890311189125e-06, "loss": 0.14313888549804688, "step": 5601 }, { "epoch": 0.7806033581829582, "grad_norm": 0.849553108215332, "learning_rate": 1.317308584120599e-06, "loss": 0.1387481689453125, "step": 5602 }, { "epoch": 0.7807427018741726, "grad_norm": 0.8269895911216736, "learning_rate": 1.3157148745599035e-06, "loss": 0.154327392578125, "step": 5603 }, { "epoch": 0.780882045565387, "grad_norm": 1.2524052858352661, "learning_rate": 1.314121983563248e-06, "loss": 0.15076828002929688, "step": 5604 }, { "epoch": 0.7810213892566014, "grad_norm": 1.3610330820083618, "learning_rate": 1.3125299114845375e-06, "loss": 0.1662464141845703, "step": 5605 }, { "epoch": 0.7811607329478157, "grad_norm": 0.9930189847946167, "learning_rate": 1.3109386586774958e-06, "loss": 0.13396835327148438, "step": 5606 }, { "epoch": 0.7813000766390301, "grad_norm": 0.6691921949386597, "learning_rate": 1.3093482254956602e-06, "loss": 0.13303756713867188, "step": 5607 }, { "epoch": 0.7814394203302445, "grad_norm": 0.5876034498214722, "learning_rate": 1.3077586122923896e-06, "loss": 0.10369873046875, "step": 5608 }, { "epoch": 0.781578764021459, "grad_norm": 1.4605976343154907, "learning_rate": 1.3061698194208616e-06, "loss": 0.17637252807617188, "step": 5609 }, { "epoch": 0.7817181077126734, "grad_norm": 1.05280601978302, "learning_rate": 1.3045818472340683e-06, "loss": 0.1461811065673828, "step": 5610 }, { "epoch": 0.7818574514038877, "grad_norm": 0.5880358219146729, "learning_rate": 1.3029946960848188e-06, "loss": 0.10454559326171875, "step": 5611 }, { "epoch": 0.7819967950951021, "grad_norm": 1.2379177808761597, "learning_rate": 1.3014083663257443e-06, "loss": 0.16832542419433594, "step": 5612 }, { "epoch": 0.7821361387863165, "grad_norm": 0.990449070930481, "learning_rate": 1.299822858309292e-06, "loss": 0.13647842407226562, "step": 5613 }, { "epoch": 0.7822754824775309, "grad_norm": 0.7745367884635925, "learning_rate": 1.2982381723877235e-06, "loss": 0.12334060668945312, "step": 5614 }, { "epoch": 0.7824148261687452, "grad_norm": 0.8669166564941406, "learning_rate": 1.2966543089131196e-06, "loss": 0.11426925659179688, "step": 5615 }, { "epoch": 0.7825541698599596, "grad_norm": 0.7494964599609375, "learning_rate": 1.295071268237379e-06, "loss": 0.13544082641601562, "step": 5616 }, { "epoch": 0.782693513551174, "grad_norm": 0.8340129852294922, "learning_rate": 1.2934890507122195e-06, "loss": 0.10141181945800781, "step": 5617 }, { "epoch": 0.7828328572423884, "grad_norm": 1.2163523435592651, "learning_rate": 1.2919076566891703e-06, "loss": 0.16280746459960938, "step": 5618 }, { "epoch": 0.7829722009336028, "grad_norm": 1.4663530588150024, "learning_rate": 1.2903270865195837e-06, "loss": 0.18273162841796875, "step": 5619 }, { "epoch": 0.7831115446248171, "grad_norm": 1.5357403755187988, "learning_rate": 1.2887473405546254e-06, "loss": 0.12270355224609375, "step": 5620 }, { "epoch": 0.7832508883160315, "grad_norm": 1.0278390645980835, "learning_rate": 1.2871684191452772e-06, "loss": 0.16469192504882812, "step": 5621 }, { "epoch": 0.7833902320072459, "grad_norm": 0.7439588308334351, "learning_rate": 1.2855903226423412e-06, "loss": 0.13933563232421875, "step": 5622 }, { "epoch": 0.7835295756984603, "grad_norm": 0.7915599942207336, "learning_rate": 1.2840130513964338e-06, "loss": 0.13260459899902344, "step": 5623 }, { "epoch": 0.7836689193896746, "grad_norm": 1.4910922050476074, "learning_rate": 1.2824366057579917e-06, "loss": 0.12489128112792969, "step": 5624 }, { "epoch": 0.783808263080889, "grad_norm": 1.166624665260315, "learning_rate": 1.2808609860772598e-06, "loss": 0.14929580688476562, "step": 5625 }, { "epoch": 0.7839476067721034, "grad_norm": 2.419104814529419, "learning_rate": 1.2792861927043071e-06, "loss": 0.20404052734375, "step": 5626 }, { "epoch": 0.7840869504633178, "grad_norm": 0.9753483533859253, "learning_rate": 1.277712225989019e-06, "loss": 0.13194656372070312, "step": 5627 }, { "epoch": 0.7842262941545322, "grad_norm": 0.9736486077308655, "learning_rate": 1.2761390862810907e-06, "loss": 0.1562347412109375, "step": 5628 }, { "epoch": 0.7843656378457465, "grad_norm": 1.1149522066116333, "learning_rate": 1.274566773930041e-06, "loss": 0.127777099609375, "step": 5629 }, { "epoch": 0.7845049815369609, "grad_norm": 1.1631451845169067, "learning_rate": 1.272995289285202e-06, "loss": 0.13053321838378906, "step": 5630 }, { "epoch": 0.7846443252281753, "grad_norm": 0.567216157913208, "learning_rate": 1.2714246326957213e-06, "loss": 0.10960006713867188, "step": 5631 }, { "epoch": 0.7847836689193897, "grad_norm": 0.7977142333984375, "learning_rate": 1.2698548045105608e-06, "loss": 0.13495635986328125, "step": 5632 }, { "epoch": 0.784923012610604, "grad_norm": 0.8315332531929016, "learning_rate": 1.2682858050785018e-06, "loss": 0.12753677368164062, "step": 5633 }, { "epoch": 0.7850623563018184, "grad_norm": 0.6983924508094788, "learning_rate": 1.266717634748142e-06, "loss": 0.11172866821289062, "step": 5634 }, { "epoch": 0.7852016999930328, "grad_norm": 0.8526766300201416, "learning_rate": 1.2651502938678917e-06, "loss": 0.13486480712890625, "step": 5635 }, { "epoch": 0.7853410436842472, "grad_norm": 1.067368984222412, "learning_rate": 1.2635837827859766e-06, "loss": 0.1796875, "step": 5636 }, { "epoch": 0.7854803873754616, "grad_norm": 0.7332726120948792, "learning_rate": 1.2620181018504406e-06, "loss": 0.132080078125, "step": 5637 }, { "epoch": 0.7856197310666759, "grad_norm": 1.0193356275558472, "learning_rate": 1.2604532514091444e-06, "loss": 0.13373565673828125, "step": 5638 }, { "epoch": 0.7857590747578903, "grad_norm": 0.7593685388565063, "learning_rate": 1.258889231809759e-06, "loss": 0.12924957275390625, "step": 5639 }, { "epoch": 0.7858984184491047, "grad_norm": 0.8610525727272034, "learning_rate": 1.2573260433997768e-06, "loss": 0.14013671875, "step": 5640 }, { "epoch": 0.7860377621403191, "grad_norm": 1.6285665035247803, "learning_rate": 1.2557636865265e-06, "loss": 0.163330078125, "step": 5641 }, { "epoch": 0.7861771058315334, "grad_norm": 1.0566940307617188, "learning_rate": 1.254202161537051e-06, "loss": 0.14939498901367188, "step": 5642 }, { "epoch": 0.7863164495227478, "grad_norm": 0.7486653327941895, "learning_rate": 1.2526414687783616e-06, "loss": 0.11588668823242188, "step": 5643 }, { "epoch": 0.7864557932139622, "grad_norm": 1.4569339752197266, "learning_rate": 1.2510816085971849e-06, "loss": 0.184783935546875, "step": 5644 }, { "epoch": 0.7865951369051766, "grad_norm": 0.6408018469810486, "learning_rate": 1.2495225813400864e-06, "loss": 0.11446380615234375, "step": 5645 }, { "epoch": 0.786734480596391, "grad_norm": 0.6654394865036011, "learning_rate": 1.247964387353446e-06, "loss": 0.1230926513671875, "step": 5646 }, { "epoch": 0.7868738242876053, "grad_norm": 1.485235571861267, "learning_rate": 1.2464070269834566e-06, "loss": 0.14396286010742188, "step": 5647 }, { "epoch": 0.7870131679788197, "grad_norm": 0.9532725214958191, "learning_rate": 1.2448505005761297e-06, "loss": 0.1296234130859375, "step": 5648 }, { "epoch": 0.7871525116700342, "grad_norm": 0.8684122562408447, "learning_rate": 1.2432948084772917e-06, "loss": 0.1318206787109375, "step": 5649 }, { "epoch": 0.7872918553612486, "grad_norm": 0.7513676881790161, "learning_rate": 1.2417399510325785e-06, "loss": 0.12772178649902344, "step": 5650 }, { "epoch": 0.787431199052463, "grad_norm": 0.8532817363739014, "learning_rate": 1.2401859285874474e-06, "loss": 0.145751953125, "step": 5651 }, { "epoch": 0.7875705427436773, "grad_norm": 0.9102729558944702, "learning_rate": 1.2386327414871635e-06, "loss": 0.132171630859375, "step": 5652 }, { "epoch": 0.7877098864348917, "grad_norm": 0.7016096115112305, "learning_rate": 1.237080390076812e-06, "loss": 0.13248443603515625, "step": 5653 }, { "epoch": 0.7878492301261061, "grad_norm": 0.7882422208786011, "learning_rate": 1.2355288747012878e-06, "loss": 0.12890625, "step": 5654 }, { "epoch": 0.7879885738173205, "grad_norm": 0.5053160786628723, "learning_rate": 1.2339781957053031e-06, "loss": 0.09659194946289062, "step": 5655 }, { "epoch": 0.7881279175085348, "grad_norm": 0.5413309931755066, "learning_rate": 1.232428353433387e-06, "loss": 0.10982894897460938, "step": 5656 }, { "epoch": 0.7882672611997492, "grad_norm": 0.5734738707542419, "learning_rate": 1.2308793482298724e-06, "loss": 0.11482620239257812, "step": 5657 }, { "epoch": 0.7884066048909636, "grad_norm": 0.9198747277259827, "learning_rate": 1.2293311804389162e-06, "loss": 0.13120269775390625, "step": 5658 }, { "epoch": 0.788545948582178, "grad_norm": 1.0096378326416016, "learning_rate": 1.227783850404487e-06, "loss": 0.151153564453125, "step": 5659 }, { "epoch": 0.7886852922733923, "grad_norm": 1.095862865447998, "learning_rate": 1.2262373584703642e-06, "loss": 0.13341522216796875, "step": 5660 }, { "epoch": 0.7888246359646067, "grad_norm": 1.0471248626708984, "learning_rate": 1.2246917049801449e-06, "loss": 0.13937091827392578, "step": 5661 }, { "epoch": 0.7889639796558211, "grad_norm": 0.652387797832489, "learning_rate": 1.2231468902772354e-06, "loss": 0.106536865234375, "step": 5662 }, { "epoch": 0.7891033233470355, "grad_norm": 1.3653564453125, "learning_rate": 1.221602914704862e-06, "loss": 0.15500259399414062, "step": 5663 }, { "epoch": 0.7892426670382499, "grad_norm": 1.3699272871017456, "learning_rate": 1.2200597786060565e-06, "loss": 0.16636276245117188, "step": 5664 }, { "epoch": 0.7893820107294642, "grad_norm": 1.3133615255355835, "learning_rate": 1.2185174823236711e-06, "loss": 0.15813827514648438, "step": 5665 }, { "epoch": 0.7895213544206786, "grad_norm": 1.331064224243164, "learning_rate": 1.2169760262003693e-06, "loss": 0.15825653076171875, "step": 5666 }, { "epoch": 0.789660698111893, "grad_norm": 0.9387915134429932, "learning_rate": 1.2154354105786276e-06, "loss": 0.10683059692382812, "step": 5667 }, { "epoch": 0.7898000418031074, "grad_norm": 1.1706700325012207, "learning_rate": 1.2138956358007325e-06, "loss": 0.15855789184570312, "step": 5668 }, { "epoch": 0.7899393854943217, "grad_norm": 0.6743504405021667, "learning_rate": 1.212356702208789e-06, "loss": 0.10332298278808594, "step": 5669 }, { "epoch": 0.7900787291855361, "grad_norm": 0.7292343378067017, "learning_rate": 1.210818610144714e-06, "loss": 0.13084793090820312, "step": 5670 }, { "epoch": 0.7902180728767505, "grad_norm": 0.6235049962997437, "learning_rate": 1.209281359950234e-06, "loss": 0.12082481384277344, "step": 5671 }, { "epoch": 0.7903574165679649, "grad_norm": 1.519820213317871, "learning_rate": 1.2077449519668943e-06, "loss": 0.15832901000976562, "step": 5672 }, { "epoch": 0.7904967602591793, "grad_norm": 0.8433401584625244, "learning_rate": 1.2062093865360458e-06, "loss": 0.1349163055419922, "step": 5673 }, { "epoch": 0.7906361039503936, "grad_norm": 1.7359532117843628, "learning_rate": 1.2046746639988593e-06, "loss": 0.15056991577148438, "step": 5674 }, { "epoch": 0.790775447641608, "grad_norm": 1.0321400165557861, "learning_rate": 1.2031407846963122e-06, "loss": 0.175567626953125, "step": 5675 }, { "epoch": 0.7909147913328224, "grad_norm": 0.7364153861999512, "learning_rate": 1.201607748969199e-06, "loss": 0.12404251098632812, "step": 5676 }, { "epoch": 0.7910541350240368, "grad_norm": 0.6439130306243896, "learning_rate": 1.2000755571581263e-06, "loss": 0.09421730041503906, "step": 5677 }, { "epoch": 0.7911934787152511, "grad_norm": 1.1916379928588867, "learning_rate": 1.1985442096035116e-06, "loss": 0.13106536865234375, "step": 5678 }, { "epoch": 0.7913328224064655, "grad_norm": 0.8414901494979858, "learning_rate": 1.1970137066455834e-06, "loss": 0.12700653076171875, "step": 5679 }, { "epoch": 0.7914721660976799, "grad_norm": 0.9932646155357361, "learning_rate": 1.1954840486243857e-06, "loss": 0.1514129638671875, "step": 5680 }, { "epoch": 0.7916115097888943, "grad_norm": 1.2197030782699585, "learning_rate": 1.193955235879775e-06, "loss": 0.144500732421875, "step": 5681 }, { "epoch": 0.7917508534801087, "grad_norm": 0.8300508260726929, "learning_rate": 1.1924272687514182e-06, "loss": 0.12320518493652344, "step": 5682 }, { "epoch": 0.791890197171323, "grad_norm": 0.9090919494628906, "learning_rate": 1.1909001475787917e-06, "loss": 0.1276092529296875, "step": 5683 }, { "epoch": 0.7920295408625374, "grad_norm": 0.9279232025146484, "learning_rate": 1.1893738727011894e-06, "loss": 0.14216995239257812, "step": 5684 }, { "epoch": 0.7921688845537518, "grad_norm": 1.576629877090454, "learning_rate": 1.187848444457716e-06, "loss": 0.16954421997070312, "step": 5685 }, { "epoch": 0.7923082282449662, "grad_norm": 1.529453992843628, "learning_rate": 1.1863238631872843e-06, "loss": 0.15278244018554688, "step": 5686 }, { "epoch": 0.7924475719361805, "grad_norm": 0.7590490579605103, "learning_rate": 1.184800129228622e-06, "loss": 0.137542724609375, "step": 5687 }, { "epoch": 0.7925869156273949, "grad_norm": 0.9996505975723267, "learning_rate": 1.1832772429202716e-06, "loss": 0.138824462890625, "step": 5688 }, { "epoch": 0.7927262593186094, "grad_norm": 0.9697558879852295, "learning_rate": 1.1817552046005777e-06, "loss": 0.14062118530273438, "step": 5689 }, { "epoch": 0.7928656030098238, "grad_norm": 0.8574894666671753, "learning_rate": 1.1802340146077045e-06, "loss": 0.12334442138671875, "step": 5690 }, { "epoch": 0.7930049467010382, "grad_norm": 1.370846152305603, "learning_rate": 1.1787136732796289e-06, "loss": 0.14959335327148438, "step": 5691 }, { "epoch": 0.7931442903922525, "grad_norm": 0.7718380093574524, "learning_rate": 1.177194180954132e-06, "loss": 0.12968826293945312, "step": 5692 }, { "epoch": 0.7932836340834669, "grad_norm": 0.9334777593612671, "learning_rate": 1.1756755379688133e-06, "loss": 0.1471710205078125, "step": 5693 }, { "epoch": 0.7934229777746813, "grad_norm": 1.0451607704162598, "learning_rate": 1.174157744661078e-06, "loss": 0.12833023071289062, "step": 5694 }, { "epoch": 0.7935623214658957, "grad_norm": 1.1776820421218872, "learning_rate": 1.1726408013681473e-06, "loss": 0.15435791015625, "step": 5695 }, { "epoch": 0.79370166515711, "grad_norm": 1.1188594102859497, "learning_rate": 1.1711247084270494e-06, "loss": 0.16837310791015625, "step": 5696 }, { "epoch": 0.7938410088483244, "grad_norm": 0.6097725033760071, "learning_rate": 1.1696094661746267e-06, "loss": 0.12237358093261719, "step": 5697 }, { "epoch": 0.7939803525395388, "grad_norm": 1.3667113780975342, "learning_rate": 1.1680950749475328e-06, "loss": 0.1261730194091797, "step": 5698 }, { "epoch": 0.7941196962307532, "grad_norm": 0.678302526473999, "learning_rate": 1.1665815350822291e-06, "loss": 0.10673713684082031, "step": 5699 }, { "epoch": 0.7942590399219676, "grad_norm": 0.6466969847679138, "learning_rate": 1.1650688469149884e-06, "loss": 0.11728668212890625, "step": 5700 }, { "epoch": 0.7943983836131819, "grad_norm": 0.8922133445739746, "learning_rate": 1.1635570107818973e-06, "loss": 0.129852294921875, "step": 5701 }, { "epoch": 0.7945377273043963, "grad_norm": 0.9259788393974304, "learning_rate": 1.1620460270188516e-06, "loss": 0.14867401123046875, "step": 5702 }, { "epoch": 0.7946770709956107, "grad_norm": 0.9997316002845764, "learning_rate": 1.1605358959615559e-06, "loss": 0.14383697509765625, "step": 5703 }, { "epoch": 0.7948164146868251, "grad_norm": 0.8549936413764954, "learning_rate": 1.159026617945529e-06, "loss": 0.14042282104492188, "step": 5704 }, { "epoch": 0.7949557583780394, "grad_norm": 0.7923263907432556, "learning_rate": 1.1575181933060952e-06, "loss": 0.11526870727539062, "step": 5705 }, { "epoch": 0.7950951020692538, "grad_norm": 0.8020225167274475, "learning_rate": 1.156010622378395e-06, "loss": 0.12382698059082031, "step": 5706 }, { "epoch": 0.7952344457604682, "grad_norm": 0.8361298441886902, "learning_rate": 1.1545039054973733e-06, "loss": 0.11371612548828125, "step": 5707 }, { "epoch": 0.7953737894516826, "grad_norm": 0.9194107055664062, "learning_rate": 1.1529980429977899e-06, "loss": 0.1354656219482422, "step": 5708 }, { "epoch": 0.795513133142897, "grad_norm": 1.500041127204895, "learning_rate": 1.151493035214214e-06, "loss": 0.13948822021484375, "step": 5709 }, { "epoch": 0.7956524768341113, "grad_norm": 0.85030597448349, "learning_rate": 1.1499888824810223e-06, "loss": 0.14459228515625, "step": 5710 }, { "epoch": 0.7957918205253257, "grad_norm": 0.8507792949676514, "learning_rate": 1.148485585132403e-06, "loss": 0.13821029663085938, "step": 5711 }, { "epoch": 0.7959311642165401, "grad_norm": 1.2976043224334717, "learning_rate": 1.1469831435023542e-06, "loss": 0.16362380981445312, "step": 5712 }, { "epoch": 0.7960705079077545, "grad_norm": 1.346495509147644, "learning_rate": 1.1454815579246874e-06, "loss": 0.15510940551757812, "step": 5713 }, { "epoch": 0.7962098515989688, "grad_norm": 1.41831374168396, "learning_rate": 1.143980828733018e-06, "loss": 0.168182373046875, "step": 5714 }, { "epoch": 0.7963491952901832, "grad_norm": 0.7963563799858093, "learning_rate": 1.1424809562607725e-06, "loss": 0.12410354614257812, "step": 5715 }, { "epoch": 0.7964885389813976, "grad_norm": 0.8997980356216431, "learning_rate": 1.1409819408411898e-06, "loss": 0.1307525634765625, "step": 5716 }, { "epoch": 0.796627882672612, "grad_norm": 1.2639418840408325, "learning_rate": 1.1394837828073184e-06, "loss": 0.161102294921875, "step": 5717 }, { "epoch": 0.7967672263638264, "grad_norm": 1.4448994398117065, "learning_rate": 1.1379864824920116e-06, "loss": 0.16984176635742188, "step": 5718 }, { "epoch": 0.7969065700550407, "grad_norm": 1.3526053428649902, "learning_rate": 1.1364900402279394e-06, "loss": 0.14155197143554688, "step": 5719 }, { "epoch": 0.7970459137462551, "grad_norm": 1.0550169944763184, "learning_rate": 1.134994456347574e-06, "loss": 0.12784957885742188, "step": 5720 }, { "epoch": 0.7971852574374695, "grad_norm": 0.7224124670028687, "learning_rate": 1.1334997311832003e-06, "loss": 0.12941932678222656, "step": 5721 }, { "epoch": 0.7973246011286839, "grad_norm": 0.5352163910865784, "learning_rate": 1.132005865066912e-06, "loss": 0.11644744873046875, "step": 5722 }, { "epoch": 0.7974639448198982, "grad_norm": 1.3711378574371338, "learning_rate": 1.1305128583306125e-06, "loss": 0.20084381103515625, "step": 5723 }, { "epoch": 0.7976032885111126, "grad_norm": 1.051470160484314, "learning_rate": 1.1290207113060158e-06, "loss": 0.1376190185546875, "step": 5724 }, { "epoch": 0.797742632202327, "grad_norm": 1.1338857412338257, "learning_rate": 1.127529424324641e-06, "loss": 0.15316009521484375, "step": 5725 }, { "epoch": 0.7978819758935414, "grad_norm": 0.9598795771598816, "learning_rate": 1.1260389977178166e-06, "loss": 0.13873291015625, "step": 5726 }, { "epoch": 0.7980213195847558, "grad_norm": 0.6026377081871033, "learning_rate": 1.1245494318166844e-06, "loss": 0.11713027954101562, "step": 5727 }, { "epoch": 0.7981606632759701, "grad_norm": 1.0339837074279785, "learning_rate": 1.1230607269521886e-06, "loss": 0.13122940063476562, "step": 5728 }, { "epoch": 0.7983000069671845, "grad_norm": 0.7467756867408752, "learning_rate": 1.1215728834550877e-06, "loss": 0.1207122802734375, "step": 5729 }, { "epoch": 0.798439350658399, "grad_norm": 0.7860749363899231, "learning_rate": 1.1200859016559473e-06, "loss": 0.133056640625, "step": 5730 }, { "epoch": 0.7985786943496134, "grad_norm": 0.9058138728141785, "learning_rate": 1.1185997818851402e-06, "loss": 0.12446212768554688, "step": 5731 }, { "epoch": 0.7987180380408277, "grad_norm": 0.8511393666267395, "learning_rate": 1.1171145244728454e-06, "loss": 0.13850784301757812, "step": 5732 }, { "epoch": 0.7988573817320421, "grad_norm": 1.2549022436141968, "learning_rate": 1.1156301297490563e-06, "loss": 0.17456817626953125, "step": 5733 }, { "epoch": 0.7989967254232565, "grad_norm": 0.8376312255859375, "learning_rate": 1.1141465980435713e-06, "loss": 0.1341552734375, "step": 5734 }, { "epoch": 0.7991360691144709, "grad_norm": 0.8983606696128845, "learning_rate": 1.112663929685997e-06, "loss": 0.14153289794921875, "step": 5735 }, { "epoch": 0.7992754128056853, "grad_norm": 0.9855079054832458, "learning_rate": 1.111182125005747e-06, "loss": 0.1309661865234375, "step": 5736 }, { "epoch": 0.7994147564968996, "grad_norm": 1.211167573928833, "learning_rate": 1.1097011843320454e-06, "loss": 0.14334487915039062, "step": 5737 }, { "epoch": 0.799554100188114, "grad_norm": 1.0967328548431396, "learning_rate": 1.1082211079939248e-06, "loss": 0.13287734985351562, "step": 5738 }, { "epoch": 0.7996934438793284, "grad_norm": 1.0298724174499512, "learning_rate": 1.106741896320222e-06, "loss": 0.1324310302734375, "step": 5739 }, { "epoch": 0.7998327875705428, "grad_norm": 1.366921305656433, "learning_rate": 1.1052635496395864e-06, "loss": 0.12864303588867188, "step": 5740 }, { "epoch": 0.7999721312617571, "grad_norm": 1.7161411046981812, "learning_rate": 1.1037860682804708e-06, "loss": 0.16478729248046875, "step": 5741 }, { "epoch": 0.8001114749529715, "grad_norm": 0.9607152938842773, "learning_rate": 1.1023094525711397e-06, "loss": 0.16007232666015625, "step": 5742 }, { "epoch": 0.8002508186441859, "grad_norm": 1.7392269372940063, "learning_rate": 1.1008337028396616e-06, "loss": 0.14760208129882812, "step": 5743 }, { "epoch": 0.8003901623354003, "grad_norm": 0.45656442642211914, "learning_rate": 1.099358819413915e-06, "loss": 0.10462570190429688, "step": 5744 }, { "epoch": 0.8005295060266147, "grad_norm": 0.8079397678375244, "learning_rate": 1.0978848026215865e-06, "loss": 0.13583755493164062, "step": 5745 }, { "epoch": 0.800668849717829, "grad_norm": 1.691278100013733, "learning_rate": 1.0964116527901686e-06, "loss": 0.15212249755859375, "step": 5746 }, { "epoch": 0.8008081934090434, "grad_norm": 1.0093668699264526, "learning_rate": 1.094939370246959e-06, "loss": 0.1322460174560547, "step": 5747 }, { "epoch": 0.8009475371002578, "grad_norm": 1.2028353214263916, "learning_rate": 1.093467955319068e-06, "loss": 0.14022445678710938, "step": 5748 }, { "epoch": 0.8010868807914722, "grad_norm": 0.6279057860374451, "learning_rate": 1.0919974083334106e-06, "loss": 0.11325454711914062, "step": 5749 }, { "epoch": 0.8012262244826865, "grad_norm": 1.2524112462997437, "learning_rate": 1.0905277296167066e-06, "loss": 0.17235565185546875, "step": 5750 }, { "epoch": 0.8013655681739009, "grad_norm": 0.6510720252990723, "learning_rate": 1.089058919495488e-06, "loss": 0.131622314453125, "step": 5751 }, { "epoch": 0.8015049118651153, "grad_norm": 1.1061173677444458, "learning_rate": 1.0875909782960887e-06, "loss": 0.17827987670898438, "step": 5752 }, { "epoch": 0.8016442555563297, "grad_norm": 1.0781141519546509, "learning_rate": 1.0861239063446511e-06, "loss": 0.15246963500976562, "step": 5753 }, { "epoch": 0.801783599247544, "grad_norm": 0.8683924674987793, "learning_rate": 1.0846577039671263e-06, "loss": 0.13100433349609375, "step": 5754 }, { "epoch": 0.8019229429387584, "grad_norm": 0.9475213289260864, "learning_rate": 1.0831923714892706e-06, "loss": 0.12647628784179688, "step": 5755 }, { "epoch": 0.8020622866299728, "grad_norm": 0.835676908493042, "learning_rate": 1.0817279092366507e-06, "loss": 0.1258087158203125, "step": 5756 }, { "epoch": 0.8022016303211872, "grad_norm": 0.8896772265434265, "learning_rate": 1.0802643175346312e-06, "loss": 0.1444225311279297, "step": 5757 }, { "epoch": 0.8023409740124016, "grad_norm": 0.758098840713501, "learning_rate": 1.0788015967083904e-06, "loss": 0.1254119873046875, "step": 5758 }, { "epoch": 0.8024803177036159, "grad_norm": 1.3312851190567017, "learning_rate": 1.0773397470829145e-06, "loss": 0.15970993041992188, "step": 5759 }, { "epoch": 0.8026196613948303, "grad_norm": 1.0708388090133667, "learning_rate": 1.0758787689829891e-06, "loss": 0.1355133056640625, "step": 5760 }, { "epoch": 0.8027590050860447, "grad_norm": 0.9595205783843994, "learning_rate": 1.074418662733212e-06, "loss": 0.14302444458007812, "step": 5761 }, { "epoch": 0.8028983487772591, "grad_norm": 0.8499404191970825, "learning_rate": 1.0729594286579876e-06, "loss": 0.12792015075683594, "step": 5762 }, { "epoch": 0.8030376924684735, "grad_norm": 0.5988301038742065, "learning_rate": 1.0715010670815212e-06, "loss": 0.10939788818359375, "step": 5763 }, { "epoch": 0.8031770361596878, "grad_norm": 1.043005347251892, "learning_rate": 1.0700435783278278e-06, "loss": 0.1516571044921875, "step": 5764 }, { "epoch": 0.8033163798509022, "grad_norm": 0.9129500985145569, "learning_rate": 1.068586962720729e-06, "loss": 0.13451004028320312, "step": 5765 }, { "epoch": 0.8034557235421166, "grad_norm": 0.617099940776825, "learning_rate": 1.0671312205838525e-06, "loss": 0.12022018432617188, "step": 5766 }, { "epoch": 0.803595067233331, "grad_norm": 0.8455580472946167, "learning_rate": 1.06567635224063e-06, "loss": 0.1268138885498047, "step": 5767 }, { "epoch": 0.8037344109245453, "grad_norm": 1.0326701402664185, "learning_rate": 1.0642223580142985e-06, "loss": 0.13300132751464844, "step": 5768 }, { "epoch": 0.8038737546157597, "grad_norm": 1.0101535320281982, "learning_rate": 1.0627692382279038e-06, "loss": 0.14043045043945312, "step": 5769 }, { "epoch": 0.8040130983069742, "grad_norm": 0.9798620939254761, "learning_rate": 1.0613169932042972e-06, "loss": 0.15102386474609375, "step": 5770 }, { "epoch": 0.8041524419981886, "grad_norm": 0.8517095446586609, "learning_rate": 1.0598656232661313e-06, "loss": 0.13644790649414062, "step": 5771 }, { "epoch": 0.804291785689403, "grad_norm": 0.9261455535888672, "learning_rate": 1.0584151287358708e-06, "loss": 0.12587928771972656, "step": 5772 }, { "epoch": 0.8044311293806173, "grad_norm": 1.9289206266403198, "learning_rate": 1.0569655099357795e-06, "loss": 0.172027587890625, "step": 5773 }, { "epoch": 0.8045704730718317, "grad_norm": 1.0226765871047974, "learning_rate": 1.0555167671879319e-06, "loss": 0.1309223175048828, "step": 5774 }, { "epoch": 0.8047098167630461, "grad_norm": 0.7108274102210999, "learning_rate": 1.0540689008142035e-06, "loss": 0.114501953125, "step": 5775 }, { "epoch": 0.8048491604542605, "grad_norm": 1.1003457307815552, "learning_rate": 1.052621911136278e-06, "loss": 0.1556243896484375, "step": 5776 }, { "epoch": 0.8049885041454748, "grad_norm": 1.2046399116516113, "learning_rate": 1.0511757984756455e-06, "loss": 0.142303466796875, "step": 5777 }, { "epoch": 0.8051278478366892, "grad_norm": 1.0006390810012817, "learning_rate": 1.049730563153597e-06, "loss": 0.14023971557617188, "step": 5778 }, { "epoch": 0.8052671915279036, "grad_norm": 1.2645213603973389, "learning_rate": 1.0482862054912296e-06, "loss": 0.131683349609375, "step": 5779 }, { "epoch": 0.805406535219118, "grad_norm": 0.6295934915542603, "learning_rate": 1.0468427258094481e-06, "loss": 0.12069320678710938, "step": 5780 }, { "epoch": 0.8055458789103324, "grad_norm": 1.1530611515045166, "learning_rate": 1.045400124428963e-06, "loss": 0.1473541259765625, "step": 5781 }, { "epoch": 0.8056852226015467, "grad_norm": 0.6448743343353271, "learning_rate": 1.043958401670283e-06, "loss": 0.1222686767578125, "step": 5782 }, { "epoch": 0.8058245662927611, "grad_norm": 0.9701953530311584, "learning_rate": 1.04251755785373e-06, "loss": 0.13945388793945312, "step": 5783 }, { "epoch": 0.8059639099839755, "grad_norm": 0.7679936289787292, "learning_rate": 1.0410775932994232e-06, "loss": 0.10942840576171875, "step": 5784 }, { "epoch": 0.8061032536751899, "grad_norm": 0.991496205329895, "learning_rate": 1.039638508327293e-06, "loss": 0.15758895874023438, "step": 5785 }, { "epoch": 0.8062425973664042, "grad_norm": 0.7745764851570129, "learning_rate": 1.0382003032570682e-06, "loss": 0.120361328125, "step": 5786 }, { "epoch": 0.8063819410576186, "grad_norm": 0.5563730001449585, "learning_rate": 1.0367629784082867e-06, "loss": 0.12003517150878906, "step": 5787 }, { "epoch": 0.806521284748833, "grad_norm": 0.9310613870620728, "learning_rate": 1.0353265341002916e-06, "loss": 0.14539337158203125, "step": 5788 }, { "epoch": 0.8066606284400474, "grad_norm": 0.6357003450393677, "learning_rate": 1.0338909706522232e-06, "loss": 0.12523269653320312, "step": 5789 }, { "epoch": 0.8067999721312618, "grad_norm": 0.8479598760604858, "learning_rate": 1.032456288383033e-06, "loss": 0.13990402221679688, "step": 5790 }, { "epoch": 0.8069393158224761, "grad_norm": 1.0585767030715942, "learning_rate": 1.0310224876114766e-06, "loss": 0.130126953125, "step": 5791 }, { "epoch": 0.8070786595136905, "grad_norm": 0.9988833665847778, "learning_rate": 1.0295895686561087e-06, "loss": 0.14764022827148438, "step": 5792 }, { "epoch": 0.8072180032049049, "grad_norm": 0.9215015769004822, "learning_rate": 1.0281575318352937e-06, "loss": 0.113494873046875, "step": 5793 }, { "epoch": 0.8073573468961193, "grad_norm": 0.8786799311637878, "learning_rate": 1.0267263774671953e-06, "loss": 0.1280975341796875, "step": 5794 }, { "epoch": 0.8074966905873336, "grad_norm": 0.8569660186767578, "learning_rate": 1.0252961058697858e-06, "loss": 0.14487838745117188, "step": 5795 }, { "epoch": 0.807636034278548, "grad_norm": 0.8658820390701294, "learning_rate": 1.0238667173608364e-06, "loss": 0.14959716796875, "step": 5796 }, { "epoch": 0.8077753779697624, "grad_norm": 0.7801284193992615, "learning_rate": 1.0224382122579256e-06, "loss": 0.12342453002929688, "step": 5797 }, { "epoch": 0.8079147216609768, "grad_norm": 0.6549676656723022, "learning_rate": 1.0210105908784362e-06, "loss": 0.11297225952148438, "step": 5798 }, { "epoch": 0.8080540653521912, "grad_norm": 1.3028494119644165, "learning_rate": 1.0195838535395514e-06, "loss": 0.15856552124023438, "step": 5799 }, { "epoch": 0.8081934090434055, "grad_norm": 1.0471807718276978, "learning_rate": 1.0181580005582586e-06, "loss": 0.12524032592773438, "step": 5800 }, { "epoch": 0.8083327527346199, "grad_norm": 0.8944958448410034, "learning_rate": 1.0167330322513508e-06, "loss": 0.13623046875, "step": 5801 }, { "epoch": 0.8084720964258343, "grad_norm": 0.5883796811103821, "learning_rate": 1.0153089489354256e-06, "loss": 0.11682510375976562, "step": 5802 }, { "epoch": 0.8086114401170487, "grad_norm": 1.159557580947876, "learning_rate": 1.0138857509268784e-06, "loss": 0.16371917724609375, "step": 5803 }, { "epoch": 0.808750783808263, "grad_norm": 0.6104142665863037, "learning_rate": 1.012463438541914e-06, "loss": 0.11463546752929688, "step": 5804 }, { "epoch": 0.8088901274994774, "grad_norm": 0.9481605291366577, "learning_rate": 1.0110420120965354e-06, "loss": 0.1597118377685547, "step": 5805 }, { "epoch": 0.8090294711906918, "grad_norm": 0.6157817840576172, "learning_rate": 1.0096214719065534e-06, "loss": 0.11682891845703125, "step": 5806 }, { "epoch": 0.8091688148819062, "grad_norm": 1.4812604188919067, "learning_rate": 1.008201818287577e-06, "loss": 0.19430923461914062, "step": 5807 }, { "epoch": 0.8093081585731206, "grad_norm": 0.9900199174880981, "learning_rate": 1.0067830515550224e-06, "loss": 0.1392822265625, "step": 5808 }, { "epoch": 0.8094475022643349, "grad_norm": 1.3799768686294556, "learning_rate": 1.0053651720241087e-06, "loss": 0.225250244140625, "step": 5809 }, { "epoch": 0.8095868459555494, "grad_norm": 1.2374132871627808, "learning_rate": 1.0039481800098545e-06, "loss": 0.15073394775390625, "step": 5810 }, { "epoch": 0.8097261896467638, "grad_norm": 1.009484887123108, "learning_rate": 1.0025320758270819e-06, "loss": 0.16010665893554688, "step": 5811 }, { "epoch": 0.8098655333379782, "grad_norm": 0.9637340307235718, "learning_rate": 1.001116859790418e-06, "loss": 0.12892723083496094, "step": 5812 }, { "epoch": 0.8100048770291925, "grad_norm": 0.9319520592689514, "learning_rate": 9.997025322142934e-07, "loss": 0.13885116577148438, "step": 5813 }, { "epoch": 0.8101442207204069, "grad_norm": 0.779498279094696, "learning_rate": 9.98289093412938e-07, "loss": 0.120880126953125, "step": 5814 }, { "epoch": 0.8102835644116213, "grad_norm": 0.5255457162857056, "learning_rate": 9.96876543700384e-07, "loss": 0.11105728149414062, "step": 5815 }, { "epoch": 0.8104229081028357, "grad_norm": 0.9200354218482971, "learning_rate": 9.95464883390469e-07, "loss": 0.1393280029296875, "step": 5816 }, { "epoch": 0.8105622517940501, "grad_norm": 0.6061319708824158, "learning_rate": 9.940541127968335e-07, "loss": 0.11781597137451172, "step": 5817 }, { "epoch": 0.8107015954852644, "grad_norm": 0.7988432049751282, "learning_rate": 9.92644232232915e-07, "loss": 0.13084793090820312, "step": 5818 }, { "epoch": 0.8108409391764788, "grad_norm": 0.8498813509941101, "learning_rate": 9.912352420119587e-07, "loss": 0.1212158203125, "step": 5819 }, { "epoch": 0.8109802828676932, "grad_norm": 0.9932354092597961, "learning_rate": 9.89827142447013e-07, "loss": 0.13682937622070312, "step": 5820 }, { "epoch": 0.8111196265589076, "grad_norm": 0.8356021046638489, "learning_rate": 9.884199338509193e-07, "loss": 0.096710205078125, "step": 5821 }, { "epoch": 0.811258970250122, "grad_norm": 0.4472658038139343, "learning_rate": 9.87013616536331e-07, "loss": 0.10498046875, "step": 5822 }, { "epoch": 0.8113983139413363, "grad_norm": 0.7769871354103088, "learning_rate": 9.856081908156984e-07, "loss": 0.14296340942382812, "step": 5823 }, { "epoch": 0.8115376576325507, "grad_norm": 1.1488093137741089, "learning_rate": 9.842036570012776e-07, "loss": 0.14195823669433594, "step": 5824 }, { "epoch": 0.8116770013237651, "grad_norm": 2.097473621368408, "learning_rate": 9.828000154051216e-07, "loss": 0.15526962280273438, "step": 5825 }, { "epoch": 0.8118163450149795, "grad_norm": 1.4507564306259155, "learning_rate": 9.813972663390864e-07, "loss": 0.14122390747070312, "step": 5826 }, { "epoch": 0.8119556887061938, "grad_norm": 1.2349835634231567, "learning_rate": 9.79995410114834e-07, "loss": 0.14176559448242188, "step": 5827 }, { "epoch": 0.8120950323974082, "grad_norm": 0.6024320721626282, "learning_rate": 9.785944470438218e-07, "loss": 0.11687469482421875, "step": 5828 }, { "epoch": 0.8122343760886226, "grad_norm": 1.2642896175384521, "learning_rate": 9.771943774373138e-07, "loss": 0.17633056640625, "step": 5829 }, { "epoch": 0.812373719779837, "grad_norm": 0.9296405911445618, "learning_rate": 9.757952016063738e-07, "loss": 0.13042831420898438, "step": 5830 }, { "epoch": 0.8125130634710513, "grad_norm": 1.0765795707702637, "learning_rate": 9.743969198618659e-07, "loss": 0.14335250854492188, "step": 5831 }, { "epoch": 0.8126524071622657, "grad_norm": 0.9946756362915039, "learning_rate": 9.729995325144548e-07, "loss": 0.12707901000976562, "step": 5832 }, { "epoch": 0.8127917508534801, "grad_norm": 0.9302501082420349, "learning_rate": 9.716030398746096e-07, "loss": 0.15196609497070312, "step": 5833 }, { "epoch": 0.8129310945446945, "grad_norm": 0.6134467124938965, "learning_rate": 9.702074422526004e-07, "loss": 0.10619354248046875, "step": 5834 }, { "epoch": 0.8130704382359089, "grad_norm": 1.865241289138794, "learning_rate": 9.688127399584956e-07, "loss": 0.1818389892578125, "step": 5835 }, { "epoch": 0.8132097819271232, "grad_norm": 0.425711065530777, "learning_rate": 9.674189333021655e-07, "loss": 0.10408401489257812, "step": 5836 }, { "epoch": 0.8133491256183376, "grad_norm": 0.9573469758033752, "learning_rate": 9.660260225932834e-07, "loss": 0.1253662109375, "step": 5837 }, { "epoch": 0.813488469309552, "grad_norm": 0.782508134841919, "learning_rate": 9.646340081413225e-07, "loss": 0.13619232177734375, "step": 5838 }, { "epoch": 0.8136278130007664, "grad_norm": 0.7125786542892456, "learning_rate": 9.632428902555546e-07, "loss": 0.11971282958984375, "step": 5839 }, { "epoch": 0.8137671566919807, "grad_norm": 0.994347870349884, "learning_rate": 9.618526692450564e-07, "loss": 0.14503860473632812, "step": 5840 }, { "epoch": 0.8139065003831951, "grad_norm": 0.78817218542099, "learning_rate": 9.604633454187035e-07, "loss": 0.13631057739257812, "step": 5841 }, { "epoch": 0.8140458440744095, "grad_norm": 1.6241117715835571, "learning_rate": 9.59074919085171e-07, "loss": 0.12897491455078125, "step": 5842 }, { "epoch": 0.8141851877656239, "grad_norm": 1.0993694067001343, "learning_rate": 9.57687390552935e-07, "loss": 0.15552139282226562, "step": 5843 }, { "epoch": 0.8143245314568383, "grad_norm": 1.2052693367004395, "learning_rate": 9.563007601302727e-07, "loss": 0.15327835083007812, "step": 5844 }, { "epoch": 0.8144638751480526, "grad_norm": 1.317427158355713, "learning_rate": 9.549150281252633e-07, "loss": 0.1688232421875, "step": 5845 }, { "epoch": 0.814603218839267, "grad_norm": 0.8741366267204285, "learning_rate": 9.535301948457842e-07, "loss": 0.16356658935546875, "step": 5846 }, { "epoch": 0.8147425625304814, "grad_norm": 0.9845162630081177, "learning_rate": 9.521462605995119e-07, "loss": 0.1460723876953125, "step": 5847 }, { "epoch": 0.8148819062216958, "grad_norm": 0.9384101629257202, "learning_rate": 9.507632256939264e-07, "loss": 0.15788841247558594, "step": 5848 }, { "epoch": 0.8150212499129101, "grad_norm": 1.5574678182601929, "learning_rate": 9.493810904363077e-07, "loss": 0.17238235473632812, "step": 5849 }, { "epoch": 0.8151605936041246, "grad_norm": 1.220157265663147, "learning_rate": 9.479998551337322e-07, "loss": 0.177001953125, "step": 5850 }, { "epoch": 0.815299937295339, "grad_norm": 1.1188068389892578, "learning_rate": 9.466195200930817e-07, "loss": 0.15681838989257812, "step": 5851 }, { "epoch": 0.8154392809865534, "grad_norm": 1.0563403367996216, "learning_rate": 9.452400856210337e-07, "loss": 0.14182281494140625, "step": 5852 }, { "epoch": 0.8155786246777678, "grad_norm": 0.7868084907531738, "learning_rate": 9.438615520240651e-07, "loss": 0.12219619750976562, "step": 5853 }, { "epoch": 0.8157179683689821, "grad_norm": 0.7321150302886963, "learning_rate": 9.424839196084568e-07, "loss": 0.11334991455078125, "step": 5854 }, { "epoch": 0.8158573120601965, "grad_norm": 1.2463284730911255, "learning_rate": 9.411071886802869e-07, "loss": 0.15796279907226562, "step": 5855 }, { "epoch": 0.8159966557514109, "grad_norm": 0.773446798324585, "learning_rate": 9.397313595454349e-07, "loss": 0.13750076293945312, "step": 5856 }, { "epoch": 0.8161359994426253, "grad_norm": 1.403953194618225, "learning_rate": 9.383564325095767e-07, "loss": 0.14695358276367188, "step": 5857 }, { "epoch": 0.8162753431338396, "grad_norm": 0.8893654346466064, "learning_rate": 9.369824078781897e-07, "loss": 0.12933349609375, "step": 5858 }, { "epoch": 0.816414686825054, "grad_norm": 0.8346343636512756, "learning_rate": 9.356092859565524e-07, "loss": 0.13368988037109375, "step": 5859 }, { "epoch": 0.8165540305162684, "grad_norm": 0.9285354614257812, "learning_rate": 9.342370670497391e-07, "loss": 0.14334487915039062, "step": 5860 }, { "epoch": 0.8166933742074828, "grad_norm": 0.8748355507850647, "learning_rate": 9.328657514626266e-07, "loss": 0.13447952270507812, "step": 5861 }, { "epoch": 0.8168327178986972, "grad_norm": 0.7257243990898132, "learning_rate": 9.314953394998905e-07, "loss": 0.13489532470703125, "step": 5862 }, { "epoch": 0.8169720615899115, "grad_norm": 1.1366887092590332, "learning_rate": 9.30125831466005e-07, "loss": 0.14842605590820312, "step": 5863 }, { "epoch": 0.8171114052811259, "grad_norm": 0.8958733081817627, "learning_rate": 9.287572276652417e-07, "loss": 0.14120101928710938, "step": 5864 }, { "epoch": 0.8172507489723403, "grad_norm": 0.776624858379364, "learning_rate": 9.273895284016743e-07, "loss": 0.12125778198242188, "step": 5865 }, { "epoch": 0.8173900926635547, "grad_norm": 1.1017731428146362, "learning_rate": 9.260227339791755e-07, "loss": 0.13991928100585938, "step": 5866 }, { "epoch": 0.817529436354769, "grad_norm": 1.051472783088684, "learning_rate": 9.246568447014148e-07, "loss": 0.1414203643798828, "step": 5867 }, { "epoch": 0.8176687800459834, "grad_norm": 1.0147696733474731, "learning_rate": 9.232918608718599e-07, "loss": 0.10883712768554688, "step": 5868 }, { "epoch": 0.8178081237371978, "grad_norm": 0.7857296466827393, "learning_rate": 9.219277827937811e-07, "loss": 0.13877296447753906, "step": 5869 }, { "epoch": 0.8179474674284122, "grad_norm": 0.8115462064743042, "learning_rate": 9.205646107702465e-07, "loss": 0.12900543212890625, "step": 5870 }, { "epoch": 0.8180868111196266, "grad_norm": 1.1450098752975464, "learning_rate": 9.192023451041187e-07, "loss": 0.15107345581054688, "step": 5871 }, { "epoch": 0.8182261548108409, "grad_norm": 1.2384023666381836, "learning_rate": 9.178409860980648e-07, "loss": 0.14434814453125, "step": 5872 }, { "epoch": 0.8183654985020553, "grad_norm": 0.5496143102645874, "learning_rate": 9.164805340545457e-07, "loss": 0.12068367004394531, "step": 5873 }, { "epoch": 0.8185048421932697, "grad_norm": 1.0419228076934814, "learning_rate": 9.151209892758245e-07, "loss": 0.1469879150390625, "step": 5874 }, { "epoch": 0.8186441858844841, "grad_norm": 1.5503768920898438, "learning_rate": 9.137623520639588e-07, "loss": 0.1509246826171875, "step": 5875 }, { "epoch": 0.8187835295756984, "grad_norm": 0.7680712938308716, "learning_rate": 9.124046227208083e-07, "loss": 0.13950157165527344, "step": 5876 }, { "epoch": 0.8189228732669128, "grad_norm": 1.1733332872390747, "learning_rate": 9.110478015480301e-07, "loss": 0.17925643920898438, "step": 5877 }, { "epoch": 0.8190622169581272, "grad_norm": 1.0517995357513428, "learning_rate": 9.096918888470785e-07, "loss": 0.12073516845703125, "step": 5878 }, { "epoch": 0.8192015606493416, "grad_norm": 0.9674650430679321, "learning_rate": 9.083368849192042e-07, "loss": 0.13454246520996094, "step": 5879 }, { "epoch": 0.819340904340556, "grad_norm": 1.070406198501587, "learning_rate": 9.069827900654604e-07, "loss": 0.14281082153320312, "step": 5880 }, { "epoch": 0.8194802480317703, "grad_norm": 1.0493223667144775, "learning_rate": 9.056296045866964e-07, "loss": 0.1367340087890625, "step": 5881 }, { "epoch": 0.8196195917229847, "grad_norm": 0.7030336260795593, "learning_rate": 9.042773287835566e-07, "loss": 0.12959671020507812, "step": 5882 }, { "epoch": 0.8197589354141991, "grad_norm": 0.681235671043396, "learning_rate": 9.02925962956489e-07, "loss": 0.1164093017578125, "step": 5883 }, { "epoch": 0.8198982791054135, "grad_norm": 1.3698149919509888, "learning_rate": 9.015755074057336e-07, "loss": 0.14869117736816406, "step": 5884 }, { "epoch": 0.8200376227966278, "grad_norm": 1.1395237445831299, "learning_rate": 9.002259624313325e-07, "loss": 0.1310272216796875, "step": 5885 }, { "epoch": 0.8201769664878422, "grad_norm": 0.6909003853797913, "learning_rate": 8.98877328333122e-07, "loss": 0.120452880859375, "step": 5886 }, { "epoch": 0.8203163101790566, "grad_norm": 1.1517244577407837, "learning_rate": 8.975296054107396e-07, "loss": 0.1757678985595703, "step": 5887 }, { "epoch": 0.820455653870271, "grad_norm": 0.7241272926330566, "learning_rate": 8.961827939636198e-07, "loss": 0.12353897094726562, "step": 5888 }, { "epoch": 0.8205949975614854, "grad_norm": 0.6552055478096008, "learning_rate": 8.948368942909891e-07, "loss": 0.11036300659179688, "step": 5889 }, { "epoch": 0.8207343412526998, "grad_norm": 0.9146871566772461, "learning_rate": 8.934919066918779e-07, "loss": 0.13106536865234375, "step": 5890 }, { "epoch": 0.8208736849439142, "grad_norm": 1.0032670497894287, "learning_rate": 8.921478314651133e-07, "loss": 0.14190673828125, "step": 5891 }, { "epoch": 0.8210130286351286, "grad_norm": 1.0957303047180176, "learning_rate": 8.908046689093153e-07, "loss": 0.16905593872070312, "step": 5892 }, { "epoch": 0.821152372326343, "grad_norm": 1.4236983060836792, "learning_rate": 8.894624193229051e-07, "loss": 0.19122695922851562, "step": 5893 }, { "epoch": 0.8212917160175573, "grad_norm": 0.6628167629241943, "learning_rate": 8.88121083004102e-07, "loss": 0.11459732055664062, "step": 5894 }, { "epoch": 0.8214310597087717, "grad_norm": 1.0084890127182007, "learning_rate": 8.867806602509177e-07, "loss": 0.15534591674804688, "step": 5895 }, { "epoch": 0.8215704033999861, "grad_norm": 1.1000752449035645, "learning_rate": 8.854411513611638e-07, "loss": 0.1558971405029297, "step": 5896 }, { "epoch": 0.8217097470912005, "grad_norm": 1.5197839736938477, "learning_rate": 8.841025566324485e-07, "loss": 0.15018463134765625, "step": 5897 }, { "epoch": 0.8218490907824149, "grad_norm": 0.9907402992248535, "learning_rate": 8.827648763621793e-07, "loss": 0.11828994750976562, "step": 5898 }, { "epoch": 0.8219884344736292, "grad_norm": 0.7562654614448547, "learning_rate": 8.814281108475565e-07, "loss": 0.14200210571289062, "step": 5899 }, { "epoch": 0.8221277781648436, "grad_norm": 1.0593035221099854, "learning_rate": 8.800922603855772e-07, "loss": 0.13160324096679688, "step": 5900 }, { "epoch": 0.822267121856058, "grad_norm": 0.8941949009895325, "learning_rate": 8.787573252730386e-07, "loss": 0.1379241943359375, "step": 5901 }, { "epoch": 0.8224064655472724, "grad_norm": 1.8368637561798096, "learning_rate": 8.774233058065346e-07, "loss": 0.1531524658203125, "step": 5902 }, { "epoch": 0.8225458092384867, "grad_norm": 0.7826642990112305, "learning_rate": 8.760902022824502e-07, "loss": 0.10498046875, "step": 5903 }, { "epoch": 0.8226851529297011, "grad_norm": 1.06023108959198, "learning_rate": 8.747580149969737e-07, "loss": 0.14502716064453125, "step": 5904 }, { "epoch": 0.8228244966209155, "grad_norm": 0.9908533096313477, "learning_rate": 8.734267442460842e-07, "loss": 0.11557388305664062, "step": 5905 }, { "epoch": 0.8229638403121299, "grad_norm": 1.7487186193466187, "learning_rate": 8.720963903255619e-07, "loss": 0.1927661895751953, "step": 5906 }, { "epoch": 0.8231031840033443, "grad_norm": 1.104716420173645, "learning_rate": 8.707669535309793e-07, "loss": 0.1515350341796875, "step": 5907 }, { "epoch": 0.8232425276945586, "grad_norm": 0.7709018588066101, "learning_rate": 8.694384341577072e-07, "loss": 0.1354198455810547, "step": 5908 }, { "epoch": 0.823381871385773, "grad_norm": 0.6077791452407837, "learning_rate": 8.681108325009141e-07, "loss": 0.1139984130859375, "step": 5909 }, { "epoch": 0.8235212150769874, "grad_norm": 0.6902068853378296, "learning_rate": 8.667841488555617e-07, "loss": 0.14301490783691406, "step": 5910 }, { "epoch": 0.8236605587682018, "grad_norm": 1.2590935230255127, "learning_rate": 8.654583835164066e-07, "loss": 0.17266082763671875, "step": 5911 }, { "epoch": 0.8237999024594161, "grad_norm": 0.6656687259674072, "learning_rate": 8.641335367780057e-07, "loss": 0.11143302917480469, "step": 5912 }, { "epoch": 0.8239392461506305, "grad_norm": 0.9089729189872742, "learning_rate": 8.62809608934711e-07, "loss": 0.14138031005859375, "step": 5913 }, { "epoch": 0.8240785898418449, "grad_norm": 0.6647921204566956, "learning_rate": 8.614866002806665e-07, "loss": 0.12669754028320312, "step": 5914 }, { "epoch": 0.8242179335330593, "grad_norm": 1.2626248598098755, "learning_rate": 8.601645111098162e-07, "loss": 0.16924285888671875, "step": 5915 }, { "epoch": 0.8243572772242737, "grad_norm": 0.9606404900550842, "learning_rate": 8.588433417158965e-07, "loss": 0.15052032470703125, "step": 5916 }, { "epoch": 0.824496620915488, "grad_norm": 0.9242450594902039, "learning_rate": 8.575230923924432e-07, "loss": 0.14670562744140625, "step": 5917 }, { "epoch": 0.8246359646067024, "grad_norm": 2.063063383102417, "learning_rate": 8.562037634327836e-07, "loss": 0.21518707275390625, "step": 5918 }, { "epoch": 0.8247753082979168, "grad_norm": 1.6947933435440063, "learning_rate": 8.548853551300429e-07, "loss": 0.19846343994140625, "step": 5919 }, { "epoch": 0.8249146519891312, "grad_norm": 0.9138956665992737, "learning_rate": 8.535678677771441e-07, "loss": 0.14633560180664062, "step": 5920 }, { "epoch": 0.8250539956803455, "grad_norm": 0.7843437194824219, "learning_rate": 8.522513016667982e-07, "loss": 0.11955642700195312, "step": 5921 }, { "epoch": 0.8251933393715599, "grad_norm": 0.979396641254425, "learning_rate": 8.509356570915184e-07, "loss": 0.13404083251953125, "step": 5922 }, { "epoch": 0.8253326830627743, "grad_norm": 0.7233904600143433, "learning_rate": 8.496209343436101e-07, "loss": 0.12532424926757812, "step": 5923 }, { "epoch": 0.8254720267539887, "grad_norm": 0.8825996518135071, "learning_rate": 8.483071337151777e-07, "loss": 0.12529754638671875, "step": 5924 }, { "epoch": 0.825611370445203, "grad_norm": 1.1681082248687744, "learning_rate": 8.469942554981148e-07, "loss": 0.12386703491210938, "step": 5925 }, { "epoch": 0.8257507141364174, "grad_norm": 0.7008220553398132, "learning_rate": 8.456822999841125e-07, "loss": 0.10698318481445312, "step": 5926 }, { "epoch": 0.8258900578276318, "grad_norm": 1.1677249670028687, "learning_rate": 8.443712674646598e-07, "loss": 0.15781784057617188, "step": 5927 }, { "epoch": 0.8260294015188462, "grad_norm": 0.7651766538619995, "learning_rate": 8.430611582310355e-07, "loss": 0.12681007385253906, "step": 5928 }, { "epoch": 0.8261687452100606, "grad_norm": 1.280015230178833, "learning_rate": 8.417519725743173e-07, "loss": 0.17959976196289062, "step": 5929 }, { "epoch": 0.8263080889012749, "grad_norm": 0.9454085230827332, "learning_rate": 8.40443710785378e-07, "loss": 0.14623641967773438, "step": 5930 }, { "epoch": 0.8264474325924894, "grad_norm": 1.1423195600509644, "learning_rate": 8.391363731548813e-07, "loss": 0.14049911499023438, "step": 5931 }, { "epoch": 0.8265867762837038, "grad_norm": 0.7102352380752563, "learning_rate": 8.378299599732875e-07, "loss": 0.12343215942382812, "step": 5932 }, { "epoch": 0.8267261199749182, "grad_norm": 1.6667368412017822, "learning_rate": 8.365244715308524e-07, "loss": 0.162841796875, "step": 5933 }, { "epoch": 0.8268654636661326, "grad_norm": 0.5996479988098145, "learning_rate": 8.352199081176271e-07, "loss": 0.11512374877929688, "step": 5934 }, { "epoch": 0.8270048073573469, "grad_norm": 1.4241999387741089, "learning_rate": 8.339162700234537e-07, "loss": 0.1734161376953125, "step": 5935 }, { "epoch": 0.8271441510485613, "grad_norm": 0.8657843470573425, "learning_rate": 8.326135575379729e-07, "loss": 0.1257171630859375, "step": 5936 }, { "epoch": 0.8272834947397757, "grad_norm": 0.7631528973579407, "learning_rate": 8.313117709506158e-07, "loss": 0.115631103515625, "step": 5937 }, { "epoch": 0.8274228384309901, "grad_norm": 0.7237550616264343, "learning_rate": 8.30010910550611e-07, "loss": 0.11057090759277344, "step": 5938 }, { "epoch": 0.8275621821222044, "grad_norm": 0.7588958740234375, "learning_rate": 8.287109766269786e-07, "loss": 0.11907577514648438, "step": 5939 }, { "epoch": 0.8277015258134188, "grad_norm": 0.7859882712364197, "learning_rate": 8.274119694685345e-07, "loss": 0.12722396850585938, "step": 5940 }, { "epoch": 0.8278408695046332, "grad_norm": 1.0406782627105713, "learning_rate": 8.26113889363891e-07, "loss": 0.14950942993164062, "step": 5941 }, { "epoch": 0.8279802131958476, "grad_norm": 0.6973357200622559, "learning_rate": 8.248167366014493e-07, "loss": 0.1197195053100586, "step": 5942 }, { "epoch": 0.828119556887062, "grad_norm": 0.8082815408706665, "learning_rate": 8.235205114694067e-07, "loss": 0.12437820434570312, "step": 5943 }, { "epoch": 0.8282589005782763, "grad_norm": 1.2849204540252686, "learning_rate": 8.222252142557557e-07, "loss": 0.15592575073242188, "step": 5944 }, { "epoch": 0.8283982442694907, "grad_norm": 0.7190844416618347, "learning_rate": 8.209308452482829e-07, "loss": 0.10748672485351562, "step": 5945 }, { "epoch": 0.8285375879607051, "grad_norm": 0.9891827702522278, "learning_rate": 8.196374047345668e-07, "loss": 0.15096664428710938, "step": 5946 }, { "epoch": 0.8286769316519195, "grad_norm": 1.070447564125061, "learning_rate": 8.183448930019783e-07, "loss": 0.14211273193359375, "step": 5947 }, { "epoch": 0.8288162753431338, "grad_norm": 1.0726207494735718, "learning_rate": 8.170533103376865e-07, "loss": 0.13647842407226562, "step": 5948 }, { "epoch": 0.8289556190343482, "grad_norm": 0.7459667921066284, "learning_rate": 8.157626570286515e-07, "loss": 0.12050056457519531, "step": 5949 }, { "epoch": 0.8290949627255626, "grad_norm": 1.1302449703216553, "learning_rate": 8.144729333616259e-07, "loss": 0.14089202880859375, "step": 5950 }, { "epoch": 0.829234306416777, "grad_norm": 1.1072006225585938, "learning_rate": 8.131841396231566e-07, "loss": 0.1511077880859375, "step": 5951 }, { "epoch": 0.8293736501079914, "grad_norm": 1.2141404151916504, "learning_rate": 8.118962760995874e-07, "loss": 0.15143966674804688, "step": 5952 }, { "epoch": 0.8295129937992057, "grad_norm": 1.0319875478744507, "learning_rate": 8.106093430770473e-07, "loss": 0.1707000732421875, "step": 5953 }, { "epoch": 0.8296523374904201, "grad_norm": 0.6758532524108887, "learning_rate": 8.093233408414658e-07, "loss": 0.1138153076171875, "step": 5954 }, { "epoch": 0.8297916811816345, "grad_norm": 0.6771032214164734, "learning_rate": 8.080382696785627e-07, "loss": 0.12323760986328125, "step": 5955 }, { "epoch": 0.8299310248728489, "grad_norm": 0.7959683537483215, "learning_rate": 8.067541298738535e-07, "loss": 0.1196136474609375, "step": 5956 }, { "epoch": 0.8300703685640632, "grad_norm": 0.8085365295410156, "learning_rate": 8.054709217126433e-07, "loss": 0.14214706420898438, "step": 5957 }, { "epoch": 0.8302097122552776, "grad_norm": 0.7529299855232239, "learning_rate": 8.041886454800307e-07, "loss": 0.11478805541992188, "step": 5958 }, { "epoch": 0.830349055946492, "grad_norm": 1.231642723083496, "learning_rate": 8.029073014609096e-07, "loss": 0.15803909301757812, "step": 5959 }, { "epoch": 0.8304883996377064, "grad_norm": 1.1171036958694458, "learning_rate": 8.016268899399643e-07, "loss": 0.15151596069335938, "step": 5960 }, { "epoch": 0.8306277433289208, "grad_norm": 0.7034115791320801, "learning_rate": 8.00347411201673e-07, "loss": 0.1215972900390625, "step": 5961 }, { "epoch": 0.8307670870201351, "grad_norm": 0.57674241065979, "learning_rate": 7.990688655303086e-07, "loss": 0.10496902465820312, "step": 5962 }, { "epoch": 0.8309064307113495, "grad_norm": 1.6305506229400635, "learning_rate": 7.977912532099336e-07, "loss": 0.14863967895507812, "step": 5963 }, { "epoch": 0.8310457744025639, "grad_norm": 1.2495675086975098, "learning_rate": 7.965145745244029e-07, "loss": 0.1362133026123047, "step": 5964 }, { "epoch": 0.8311851180937783, "grad_norm": 1.0634357929229736, "learning_rate": 7.95238829757366e-07, "loss": 0.1447601318359375, "step": 5965 }, { "epoch": 0.8313244617849926, "grad_norm": 1.081475019454956, "learning_rate": 7.939640191922665e-07, "loss": 0.12692642211914062, "step": 5966 }, { "epoch": 0.831463805476207, "grad_norm": 1.2546184062957764, "learning_rate": 7.926901431123362e-07, "loss": 0.1913604736328125, "step": 5967 }, { "epoch": 0.8316031491674214, "grad_norm": 0.9496360421180725, "learning_rate": 7.914172018006006e-07, "loss": 0.14535903930664062, "step": 5968 }, { "epoch": 0.8317424928586358, "grad_norm": 1.969122052192688, "learning_rate": 7.901451955398792e-07, "loss": 0.16171836853027344, "step": 5969 }, { "epoch": 0.8318818365498502, "grad_norm": 0.7563175559043884, "learning_rate": 7.88874124612784e-07, "loss": 0.1119232177734375, "step": 5970 }, { "epoch": 0.8320211802410646, "grad_norm": 1.399856448173523, "learning_rate": 7.876039893017151e-07, "loss": 0.15906143188476562, "step": 5971 }, { "epoch": 0.832160523932279, "grad_norm": 1.5648778676986694, "learning_rate": 7.863347898888696e-07, "loss": 0.21144866943359375, "step": 5972 }, { "epoch": 0.8322998676234934, "grad_norm": 1.0991418361663818, "learning_rate": 7.850665266562352e-07, "loss": 0.13178634643554688, "step": 5973 }, { "epoch": 0.8324392113147078, "grad_norm": 0.868964433670044, "learning_rate": 7.837991998855899e-07, "loss": 0.1147003173828125, "step": 5974 }, { "epoch": 0.8325785550059221, "grad_norm": 1.3287882804870605, "learning_rate": 7.825328098585039e-07, "loss": 0.1949005126953125, "step": 5975 }, { "epoch": 0.8327178986971365, "grad_norm": 1.0503968000411987, "learning_rate": 7.812673568563406e-07, "loss": 0.162689208984375, "step": 5976 }, { "epoch": 0.8328572423883509, "grad_norm": 1.0565046072006226, "learning_rate": 7.800028411602572e-07, "loss": 0.16542816162109375, "step": 5977 }, { "epoch": 0.8329965860795653, "grad_norm": 0.9785115718841553, "learning_rate": 7.78739263051198e-07, "loss": 0.13006210327148438, "step": 5978 }, { "epoch": 0.8331359297707797, "grad_norm": 0.946023166179657, "learning_rate": 7.774766228099001e-07, "loss": 0.1497039794921875, "step": 5979 }, { "epoch": 0.833275273461994, "grad_norm": 0.9518866539001465, "learning_rate": 7.762149207168951e-07, "loss": 0.13177108764648438, "step": 5980 }, { "epoch": 0.8334146171532084, "grad_norm": 0.9010510444641113, "learning_rate": 7.749541570525054e-07, "loss": 0.12923431396484375, "step": 5981 }, { "epoch": 0.8335539608444228, "grad_norm": 0.854048490524292, "learning_rate": 7.736943320968409e-07, "loss": 0.11099052429199219, "step": 5982 }, { "epoch": 0.8336933045356372, "grad_norm": 0.7404348254203796, "learning_rate": 7.724354461298089e-07, "loss": 0.1256103515625, "step": 5983 }, { "epoch": 0.8338326482268515, "grad_norm": 0.544975221157074, "learning_rate": 7.711774994311027e-07, "loss": 0.0936737060546875, "step": 5984 }, { "epoch": 0.8339719919180659, "grad_norm": 1.168068289756775, "learning_rate": 7.699204922802123e-07, "loss": 0.14717674255371094, "step": 5985 }, { "epoch": 0.8341113356092803, "grad_norm": 1.0424597263336182, "learning_rate": 7.686644249564124e-07, "loss": 0.14893150329589844, "step": 5986 }, { "epoch": 0.8342506793004947, "grad_norm": 0.877447247505188, "learning_rate": 7.674092977387737e-07, "loss": 0.14298248291015625, "step": 5987 }, { "epoch": 0.8343900229917091, "grad_norm": 1.0030385255813599, "learning_rate": 7.661551109061593e-07, "loss": 0.16020584106445312, "step": 5988 }, { "epoch": 0.8345293666829234, "grad_norm": 1.0690463781356812, "learning_rate": 7.649018647372186e-07, "loss": 0.11872291564941406, "step": 5989 }, { "epoch": 0.8346687103741378, "grad_norm": 0.9236339926719666, "learning_rate": 7.636495595103938e-07, "loss": 0.1240081787109375, "step": 5990 }, { "epoch": 0.8348080540653522, "grad_norm": 1.1884634494781494, "learning_rate": 7.6239819550392e-07, "loss": 0.13468170166015625, "step": 5991 }, { "epoch": 0.8349473977565666, "grad_norm": 1.0125502347946167, "learning_rate": 7.611477729958205e-07, "loss": 0.10547256469726562, "step": 5992 }, { "epoch": 0.835086741447781, "grad_norm": 0.7291249632835388, "learning_rate": 7.598982922639109e-07, "loss": 0.12145614624023438, "step": 5993 }, { "epoch": 0.8352260851389953, "grad_norm": 0.8839526176452637, "learning_rate": 7.586497535857984e-07, "loss": 0.12054634094238281, "step": 5994 }, { "epoch": 0.8353654288302097, "grad_norm": 1.1666364669799805, "learning_rate": 7.574021572388795e-07, "loss": 0.15016555786132812, "step": 5995 }, { "epoch": 0.8355047725214241, "grad_norm": 0.8661769032478333, "learning_rate": 7.561555035003398e-07, "loss": 0.10878753662109375, "step": 5996 }, { "epoch": 0.8356441162126385, "grad_norm": 1.1319957971572876, "learning_rate": 7.549097926471583e-07, "loss": 0.13711166381835938, "step": 5997 }, { "epoch": 0.8357834599038528, "grad_norm": 0.9697688817977905, "learning_rate": 7.536650249561056e-07, "loss": 0.11936187744140625, "step": 5998 }, { "epoch": 0.8359228035950672, "grad_norm": 1.321640968322754, "learning_rate": 7.524212007037385e-07, "loss": 0.18201446533203125, "step": 5999 }, { "epoch": 0.8360621472862816, "grad_norm": 1.2475956678390503, "learning_rate": 7.511783201664053e-07, "loss": 0.15948486328125, "step": 6000 }, { "epoch": 0.836201490977496, "grad_norm": 0.9259753823280334, "learning_rate": 7.499363836202472e-07, "loss": 0.12502288818359375, "step": 6001 }, { "epoch": 0.8363408346687103, "grad_norm": 1.0444685220718384, "learning_rate": 7.486953913411954e-07, "loss": 0.13756179809570312, "step": 6002 }, { "epoch": 0.8364801783599247, "grad_norm": 0.6839392185211182, "learning_rate": 7.474553436049675e-07, "loss": 0.12713241577148438, "step": 6003 }, { "epoch": 0.8366195220511391, "grad_norm": 0.9847165942192078, "learning_rate": 7.462162406870766e-07, "loss": 0.13898086547851562, "step": 6004 }, { "epoch": 0.8367588657423535, "grad_norm": 1.9402960538864136, "learning_rate": 7.4497808286282e-07, "loss": 0.1812896728515625, "step": 6005 }, { "epoch": 0.8368982094335679, "grad_norm": 0.7139809727668762, "learning_rate": 7.437408704072907e-07, "loss": 0.13768386840820312, "step": 6006 }, { "epoch": 0.8370375531247822, "grad_norm": 0.792580783367157, "learning_rate": 7.425046035953665e-07, "loss": 0.13660049438476562, "step": 6007 }, { "epoch": 0.8371768968159966, "grad_norm": 1.2314929962158203, "learning_rate": 7.412692827017193e-07, "loss": 0.16114044189453125, "step": 6008 }, { "epoch": 0.837316240507211, "grad_norm": 1.2975423336029053, "learning_rate": 7.400349080008107e-07, "loss": 0.14194107055664062, "step": 6009 }, { "epoch": 0.8374555841984254, "grad_norm": 0.7229291796684265, "learning_rate": 7.38801479766888e-07, "loss": 0.11037063598632812, "step": 6010 }, { "epoch": 0.8375949278896399, "grad_norm": 1.372485637664795, "learning_rate": 7.375689982739915e-07, "loss": 0.1273345947265625, "step": 6011 }, { "epoch": 0.8377342715808542, "grad_norm": 0.6713290810585022, "learning_rate": 7.363374637959498e-07, "loss": 0.12418365478515625, "step": 6012 }, { "epoch": 0.8378736152720686, "grad_norm": 1.5893406867980957, "learning_rate": 7.35106876606384e-07, "loss": 0.1591949462890625, "step": 6013 }, { "epoch": 0.838012958963283, "grad_norm": 0.642493724822998, "learning_rate": 7.338772369787001e-07, "loss": 0.12237167358398438, "step": 6014 }, { "epoch": 0.8381523026544974, "grad_norm": 0.8678367137908936, "learning_rate": 7.326485451860976e-07, "loss": 0.13620376586914062, "step": 6015 }, { "epoch": 0.8382916463457117, "grad_norm": 0.7692468166351318, "learning_rate": 7.314208015015623e-07, "loss": 0.12525177001953125, "step": 6016 }, { "epoch": 0.8384309900369261, "grad_norm": 1.0851739645004272, "learning_rate": 7.301940061978724e-07, "loss": 0.18384552001953125, "step": 6017 }, { "epoch": 0.8385703337281405, "grad_norm": 0.816895067691803, "learning_rate": 7.289681595475922e-07, "loss": 0.11932754516601562, "step": 6018 }, { "epoch": 0.8387096774193549, "grad_norm": 1.2323668003082275, "learning_rate": 7.277432618230773e-07, "loss": 0.15761947631835938, "step": 6019 }, { "epoch": 0.8388490211105692, "grad_norm": 0.6663787364959717, "learning_rate": 7.265193132964749e-07, "loss": 0.1241302490234375, "step": 6020 }, { "epoch": 0.8389883648017836, "grad_norm": 0.8550790548324585, "learning_rate": 7.252963142397134e-07, "loss": 0.12895965576171875, "step": 6021 }, { "epoch": 0.839127708492998, "grad_norm": 0.6952922940254211, "learning_rate": 7.24074264924518e-07, "loss": 0.12722396850585938, "step": 6022 }, { "epoch": 0.8392670521842124, "grad_norm": 1.4178569316864014, "learning_rate": 7.228531656223997e-07, "loss": 0.1844482421875, "step": 6023 }, { "epoch": 0.8394063958754268, "grad_norm": 0.988893449306488, "learning_rate": 7.216330166046603e-07, "loss": 0.15563583374023438, "step": 6024 }, { "epoch": 0.8395457395666411, "grad_norm": 0.599959135055542, "learning_rate": 7.204138181423881e-07, "loss": 0.1187286376953125, "step": 6025 }, { "epoch": 0.8396850832578555, "grad_norm": 0.9342814683914185, "learning_rate": 7.191955705064591e-07, "loss": 0.14328765869140625, "step": 6026 }, { "epoch": 0.8398244269490699, "grad_norm": 0.628864049911499, "learning_rate": 7.179782739675434e-07, "loss": 0.11067962646484375, "step": 6027 }, { "epoch": 0.8399637706402843, "grad_norm": 0.7844841480255127, "learning_rate": 7.167619287960942e-07, "loss": 0.12705612182617188, "step": 6028 }, { "epoch": 0.8401031143314986, "grad_norm": 1.0604910850524902, "learning_rate": 7.155465352623559e-07, "loss": 0.12911224365234375, "step": 6029 }, { "epoch": 0.840242458022713, "grad_norm": 1.5380157232284546, "learning_rate": 7.143320936363629e-07, "loss": 0.14971542358398438, "step": 6030 }, { "epoch": 0.8403818017139274, "grad_norm": 0.9364911913871765, "learning_rate": 7.131186041879357e-07, "loss": 0.13898086547851562, "step": 6031 }, { "epoch": 0.8405211454051418, "grad_norm": 1.3589931726455688, "learning_rate": 7.119060671866817e-07, "loss": 0.17193984985351562, "step": 6032 }, { "epoch": 0.8406604890963562, "grad_norm": 0.7742661237716675, "learning_rate": 7.106944829020013e-07, "loss": 0.13588714599609375, "step": 6033 }, { "epoch": 0.8407998327875705, "grad_norm": 1.3330118656158447, "learning_rate": 7.094838516030811e-07, "loss": 0.15929794311523438, "step": 6034 }, { "epoch": 0.8409391764787849, "grad_norm": 1.1613167524337769, "learning_rate": 7.082741735588938e-07, "loss": 0.157867431640625, "step": 6035 }, { "epoch": 0.8410785201699993, "grad_norm": 0.5911583304405212, "learning_rate": 7.070654490382045e-07, "loss": 0.10869216918945312, "step": 6036 }, { "epoch": 0.8412178638612137, "grad_norm": 0.8165602684020996, "learning_rate": 7.058576783095622e-07, "loss": 0.1414794921875, "step": 6037 }, { "epoch": 0.841357207552428, "grad_norm": 0.9031465649604797, "learning_rate": 7.046508616413078e-07, "loss": 0.13508987426757812, "step": 6038 }, { "epoch": 0.8414965512436424, "grad_norm": 0.9869087338447571, "learning_rate": 7.034449993015663e-07, "loss": 0.13458251953125, "step": 6039 }, { "epoch": 0.8416358949348568, "grad_norm": 0.869652509689331, "learning_rate": 7.022400915582539e-07, "loss": 0.15054702758789062, "step": 6040 }, { "epoch": 0.8417752386260712, "grad_norm": 1.0293281078338623, "learning_rate": 7.010361386790748e-07, "loss": 0.17873764038085938, "step": 6041 }, { "epoch": 0.8419145823172856, "grad_norm": 0.7802844643592834, "learning_rate": 6.998331409315184e-07, "loss": 0.11981964111328125, "step": 6042 }, { "epoch": 0.8420539260084999, "grad_norm": 1.4865037202835083, "learning_rate": 6.986310985828626e-07, "loss": 0.1560821533203125, "step": 6043 }, { "epoch": 0.8421932696997143, "grad_norm": 1.233520746231079, "learning_rate": 6.974300119001754e-07, "loss": 0.14274978637695312, "step": 6044 }, { "epoch": 0.8423326133909287, "grad_norm": 1.345657229423523, "learning_rate": 6.962298811503104e-07, "loss": 0.15871238708496094, "step": 6045 }, { "epoch": 0.8424719570821431, "grad_norm": 0.704169750213623, "learning_rate": 6.950307065999085e-07, "loss": 0.10773468017578125, "step": 6046 }, { "epoch": 0.8426113007733574, "grad_norm": 0.7652950882911682, "learning_rate": 6.938324885154007e-07, "loss": 0.12020492553710938, "step": 6047 }, { "epoch": 0.8427506444645718, "grad_norm": 0.773941695690155, "learning_rate": 6.92635227163001e-07, "loss": 0.127899169921875, "step": 6048 }, { "epoch": 0.8428899881557862, "grad_norm": 0.9293785691261292, "learning_rate": 6.914389228087165e-07, "loss": 0.15892791748046875, "step": 6049 }, { "epoch": 0.8430293318470006, "grad_norm": 0.7567495107650757, "learning_rate": 6.902435757183357e-07, "loss": 0.1147308349609375, "step": 6050 }, { "epoch": 0.8431686755382151, "grad_norm": 0.958183228969574, "learning_rate": 6.890491861574389e-07, "loss": 0.13921737670898438, "step": 6051 }, { "epoch": 0.8433080192294294, "grad_norm": 1.269045114517212, "learning_rate": 6.87855754391395e-07, "loss": 0.18156814575195312, "step": 6052 }, { "epoch": 0.8434473629206438, "grad_norm": 1.3073744773864746, "learning_rate": 6.866632806853518e-07, "loss": 0.16145706176757812, "step": 6053 }, { "epoch": 0.8435867066118582, "grad_norm": 0.6220908164978027, "learning_rate": 6.854717653042531e-07, "loss": 0.10680770874023438, "step": 6054 }, { "epoch": 0.8437260503030726, "grad_norm": 1.0481821298599243, "learning_rate": 6.842812085128253e-07, "loss": 0.15129470825195312, "step": 6055 }, { "epoch": 0.843865393994287, "grad_norm": 0.8865636587142944, "learning_rate": 6.830916105755847e-07, "loss": 0.13709640502929688, "step": 6056 }, { "epoch": 0.8440047376855013, "grad_norm": 0.6410291790962219, "learning_rate": 6.819029717568315e-07, "loss": 0.13295364379882812, "step": 6057 }, { "epoch": 0.8441440813767157, "grad_norm": 0.8234827518463135, "learning_rate": 6.807152923206528e-07, "loss": 0.1269664764404297, "step": 6058 }, { "epoch": 0.8442834250679301, "grad_norm": 1.1685456037521362, "learning_rate": 6.795285725309269e-07, "loss": 0.15622711181640625, "step": 6059 }, { "epoch": 0.8444227687591445, "grad_norm": 1.4396378993988037, "learning_rate": 6.783428126513125e-07, "loss": 0.18072509765625, "step": 6060 }, { "epoch": 0.8445621124503588, "grad_norm": 0.7919757962226868, "learning_rate": 6.771580129452604e-07, "loss": 0.14099884033203125, "step": 6061 }, { "epoch": 0.8447014561415732, "grad_norm": 1.2067352533340454, "learning_rate": 6.759741736760062e-07, "loss": 0.18555450439453125, "step": 6062 }, { "epoch": 0.8448407998327876, "grad_norm": 0.6817162036895752, "learning_rate": 6.747912951065722e-07, "loss": 0.132415771484375, "step": 6063 }, { "epoch": 0.844980143524002, "grad_norm": 0.7862582206726074, "learning_rate": 6.736093774997643e-07, "loss": 0.13191986083984375, "step": 6064 }, { "epoch": 0.8451194872152163, "grad_norm": 1.202465295791626, "learning_rate": 6.724284211181803e-07, "loss": 0.15384864807128906, "step": 6065 }, { "epoch": 0.8452588309064307, "grad_norm": 0.9705876111984253, "learning_rate": 6.712484262242014e-07, "loss": 0.12925338745117188, "step": 6066 }, { "epoch": 0.8453981745976451, "grad_norm": 0.7769604921340942, "learning_rate": 6.700693930799945e-07, "loss": 0.13867568969726562, "step": 6067 }, { "epoch": 0.8455375182888595, "grad_norm": 0.697146475315094, "learning_rate": 6.688913219475158e-07, "loss": 0.11767196655273438, "step": 6068 }, { "epoch": 0.8456768619800739, "grad_norm": 1.358731746673584, "learning_rate": 6.677142130885028e-07, "loss": 0.14745330810546875, "step": 6069 }, { "epoch": 0.8458162056712882, "grad_norm": 0.6937955021858215, "learning_rate": 6.665380667644849e-07, "loss": 0.126129150390625, "step": 6070 }, { "epoch": 0.8459555493625026, "grad_norm": 0.6375516057014465, "learning_rate": 6.653628832367731e-07, "loss": 0.10800933837890625, "step": 6071 }, { "epoch": 0.846094893053717, "grad_norm": 0.5945130586624146, "learning_rate": 6.641886627664673e-07, "loss": 0.10013008117675781, "step": 6072 }, { "epoch": 0.8462342367449314, "grad_norm": 0.6221403479576111, "learning_rate": 6.630154056144533e-07, "loss": 0.12479019165039062, "step": 6073 }, { "epoch": 0.8463735804361457, "grad_norm": 1.1050546169281006, "learning_rate": 6.618431120414015e-07, "loss": 0.1488189697265625, "step": 6074 }, { "epoch": 0.8465129241273601, "grad_norm": 0.7299669981002808, "learning_rate": 6.606717823077669e-07, "loss": 0.12567520141601562, "step": 6075 }, { "epoch": 0.8466522678185745, "grad_norm": 0.8759570121765137, "learning_rate": 6.59501416673794e-07, "loss": 0.13690185546875, "step": 6076 }, { "epoch": 0.8467916115097889, "grad_norm": 1.1279770135879517, "learning_rate": 6.583320153995121e-07, "loss": 0.14604949951171875, "step": 6077 }, { "epoch": 0.8469309552010033, "grad_norm": 1.5710006952285767, "learning_rate": 6.571635787447339e-07, "loss": 0.16154098510742188, "step": 6078 }, { "epoch": 0.8470702988922176, "grad_norm": 1.535714864730835, "learning_rate": 6.559961069690596e-07, "loss": 0.16615676879882812, "step": 6079 }, { "epoch": 0.847209642583432, "grad_norm": 1.1449588537216187, "learning_rate": 6.548296003318744e-07, "loss": 0.17012786865234375, "step": 6080 }, { "epoch": 0.8473489862746464, "grad_norm": 1.2504147291183472, "learning_rate": 6.536640590923515e-07, "loss": 0.1351032257080078, "step": 6081 }, { "epoch": 0.8474883299658608, "grad_norm": 0.8729118704795837, "learning_rate": 6.52499483509445e-07, "loss": 0.15577316284179688, "step": 6082 }, { "epoch": 0.8476276736570751, "grad_norm": 0.9963039755821228, "learning_rate": 6.51335873841899e-07, "loss": 0.14091110229492188, "step": 6083 }, { "epoch": 0.8477670173482895, "grad_norm": 1.079681158065796, "learning_rate": 6.501732303482394e-07, "loss": 0.17520523071289062, "step": 6084 }, { "epoch": 0.8479063610395039, "grad_norm": 0.6669557690620422, "learning_rate": 6.490115532867808e-07, "loss": 0.1254100799560547, "step": 6085 }, { "epoch": 0.8480457047307183, "grad_norm": 1.4926197528839111, "learning_rate": 6.478508429156189e-07, "loss": 0.1506500244140625, "step": 6086 }, { "epoch": 0.8481850484219327, "grad_norm": 0.6389238238334656, "learning_rate": 6.466910994926384e-07, "loss": 0.10680007934570312, "step": 6087 }, { "epoch": 0.848324392113147, "grad_norm": 0.8620396852493286, "learning_rate": 6.455323232755095e-07, "loss": 0.12154388427734375, "step": 6088 }, { "epoch": 0.8484637358043614, "grad_norm": 0.7677833437919617, "learning_rate": 6.44374514521684e-07, "loss": 0.13269805908203125, "step": 6089 }, { "epoch": 0.8486030794955758, "grad_norm": 0.9077752828598022, "learning_rate": 6.432176734883994e-07, "loss": 0.124359130859375, "step": 6090 }, { "epoch": 0.8487424231867903, "grad_norm": 0.9738633036613464, "learning_rate": 6.420618004326818e-07, "loss": 0.1382904052734375, "step": 6091 }, { "epoch": 0.8488817668780047, "grad_norm": 0.6883298754692078, "learning_rate": 6.409068956113379e-07, "loss": 0.12954330444335938, "step": 6092 }, { "epoch": 0.849021110569219, "grad_norm": 0.9940577745437622, "learning_rate": 6.397529592809615e-07, "loss": 0.16913223266601562, "step": 6093 }, { "epoch": 0.8491604542604334, "grad_norm": 1.3141958713531494, "learning_rate": 6.38599991697933e-07, "loss": 0.174163818359375, "step": 6094 }, { "epoch": 0.8492997979516478, "grad_norm": 0.6591677665710449, "learning_rate": 6.374479931184141e-07, "loss": 0.1155242919921875, "step": 6095 }, { "epoch": 0.8494391416428622, "grad_norm": 1.1264379024505615, "learning_rate": 6.362969637983507e-07, "loss": 0.15240478515625, "step": 6096 }, { "epoch": 0.8495784853340765, "grad_norm": 0.8474053144454956, "learning_rate": 6.351469039934771e-07, "loss": 0.13055038452148438, "step": 6097 }, { "epoch": 0.8497178290252909, "grad_norm": 1.6259185075759888, "learning_rate": 6.339978139593117e-07, "loss": 0.18700027465820312, "step": 6098 }, { "epoch": 0.8498571727165053, "grad_norm": 0.8917346000671387, "learning_rate": 6.328496939511541e-07, "loss": 0.1418304443359375, "step": 6099 }, { "epoch": 0.8499965164077197, "grad_norm": 1.5300893783569336, "learning_rate": 6.317025442240893e-07, "loss": 0.15310287475585938, "step": 6100 }, { "epoch": 0.850135860098934, "grad_norm": 0.7904303669929504, "learning_rate": 6.305563650329899e-07, "loss": 0.13911819458007812, "step": 6101 }, { "epoch": 0.8502752037901484, "grad_norm": 0.7616844177246094, "learning_rate": 6.294111566325106e-07, "loss": 0.12383270263671875, "step": 6102 }, { "epoch": 0.8504145474813628, "grad_norm": 1.2689578533172607, "learning_rate": 6.282669192770896e-07, "loss": 0.14719772338867188, "step": 6103 }, { "epoch": 0.8505538911725772, "grad_norm": 0.9333845376968384, "learning_rate": 6.271236532209502e-07, "loss": 0.14639663696289062, "step": 6104 }, { "epoch": 0.8506932348637916, "grad_norm": 1.2251105308532715, "learning_rate": 6.259813587181024e-07, "loss": 0.17687606811523438, "step": 6105 }, { "epoch": 0.8508325785550059, "grad_norm": 0.8052720427513123, "learning_rate": 6.248400360223355e-07, "loss": 0.13355636596679688, "step": 6106 }, { "epoch": 0.8509719222462203, "grad_norm": 0.8032337427139282, "learning_rate": 6.236996853872251e-07, "loss": 0.12787818908691406, "step": 6107 }, { "epoch": 0.8511112659374347, "grad_norm": 1.1172372102737427, "learning_rate": 6.225603070661318e-07, "loss": 0.14457130432128906, "step": 6108 }, { "epoch": 0.8512506096286491, "grad_norm": 0.8810243010520935, "learning_rate": 6.214219013122008e-07, "loss": 0.14389419555664062, "step": 6109 }, { "epoch": 0.8513899533198634, "grad_norm": 0.9451615810394287, "learning_rate": 6.202844683783587e-07, "loss": 0.13744354248046875, "step": 6110 }, { "epoch": 0.8515292970110778, "grad_norm": 1.0889379978179932, "learning_rate": 6.191480085173163e-07, "loss": 0.14161300659179688, "step": 6111 }, { "epoch": 0.8516686407022922, "grad_norm": 0.666245698928833, "learning_rate": 6.180125219815697e-07, "loss": 0.10291671752929688, "step": 6112 }, { "epoch": 0.8518079843935066, "grad_norm": 0.8695173263549805, "learning_rate": 6.168780090233994e-07, "loss": 0.13106536865234375, "step": 6113 }, { "epoch": 0.851947328084721, "grad_norm": 1.3343498706817627, "learning_rate": 6.157444698948656e-07, "loss": 0.16160202026367188, "step": 6114 }, { "epoch": 0.8520866717759353, "grad_norm": 0.9465512037277222, "learning_rate": 6.146119048478177e-07, "loss": 0.16096878051757812, "step": 6115 }, { "epoch": 0.8522260154671497, "grad_norm": 1.3550949096679688, "learning_rate": 6.134803141338835e-07, "loss": 0.16582298278808594, "step": 6116 }, { "epoch": 0.8523653591583641, "grad_norm": 1.0093752145767212, "learning_rate": 6.123496980044785e-07, "loss": 0.13665199279785156, "step": 6117 }, { "epoch": 0.8525047028495785, "grad_norm": 1.547860026359558, "learning_rate": 6.112200567107978e-07, "loss": 0.16323471069335938, "step": 6118 }, { "epoch": 0.8526440465407928, "grad_norm": 0.7023031115531921, "learning_rate": 6.10091390503823e-07, "loss": 0.10567474365234375, "step": 6119 }, { "epoch": 0.8527833902320072, "grad_norm": 0.8516184091567993, "learning_rate": 6.089636996343202e-07, "loss": 0.12883567810058594, "step": 6120 }, { "epoch": 0.8529227339232216, "grad_norm": 1.0864713191986084, "learning_rate": 6.07836984352832e-07, "loss": 0.15952682495117188, "step": 6121 }, { "epoch": 0.853062077614436, "grad_norm": 0.705501139163971, "learning_rate": 6.067112449096907e-07, "loss": 0.13495635986328125, "step": 6122 }, { "epoch": 0.8532014213056504, "grad_norm": 0.9218714237213135, "learning_rate": 6.055864815550106e-07, "loss": 0.13328933715820312, "step": 6123 }, { "epoch": 0.8533407649968647, "grad_norm": 0.8512553572654724, "learning_rate": 6.044626945386894e-07, "loss": 0.12467575073242188, "step": 6124 }, { "epoch": 0.8534801086880791, "grad_norm": 0.54787278175354, "learning_rate": 6.033398841104043e-07, "loss": 0.09770774841308594, "step": 6125 }, { "epoch": 0.8536194523792935, "grad_norm": 0.7571362257003784, "learning_rate": 6.022180505196207e-07, "loss": 0.12020111083984375, "step": 6126 }, { "epoch": 0.8537587960705079, "grad_norm": 0.8866241574287415, "learning_rate": 6.01097194015583e-07, "loss": 0.15460586547851562, "step": 6127 }, { "epoch": 0.8538981397617222, "grad_norm": 1.2654393911361694, "learning_rate": 5.999773148473193e-07, "loss": 0.15268707275390625, "step": 6128 }, { "epoch": 0.8540374834529366, "grad_norm": 1.0551403760910034, "learning_rate": 5.988584132636421e-07, "loss": 0.13065338134765625, "step": 6129 }, { "epoch": 0.854176827144151, "grad_norm": 1.3916678428649902, "learning_rate": 5.977404895131467e-07, "loss": 0.20458984375, "step": 6130 }, { "epoch": 0.8543161708353654, "grad_norm": 0.7698041796684265, "learning_rate": 5.966235438442086e-07, "loss": 0.12530899047851562, "step": 6131 }, { "epoch": 0.8544555145265799, "grad_norm": 1.7976980209350586, "learning_rate": 5.955075765049878e-07, "loss": 0.17608642578125, "step": 6132 }, { "epoch": 0.8545948582177942, "grad_norm": 0.9741660952568054, "learning_rate": 5.943925877434276e-07, "loss": 0.15313720703125, "step": 6133 }, { "epoch": 0.8547342019090086, "grad_norm": 1.0687826871871948, "learning_rate": 5.932785778072531e-07, "loss": 0.13498687744140625, "step": 6134 }, { "epoch": 0.854873545600223, "grad_norm": 1.2054970264434814, "learning_rate": 5.921655469439708e-07, "loss": 0.16043853759765625, "step": 6135 }, { "epoch": 0.8550128892914374, "grad_norm": 1.0393165349960327, "learning_rate": 5.910534954008718e-07, "loss": 0.1323089599609375, "step": 6136 }, { "epoch": 0.8551522329826517, "grad_norm": 1.075895071029663, "learning_rate": 5.899424234250278e-07, "loss": 0.16291427612304688, "step": 6137 }, { "epoch": 0.8552915766738661, "grad_norm": 0.5952576994895935, "learning_rate": 5.888323312632948e-07, "loss": 0.114532470703125, "step": 6138 }, { "epoch": 0.8554309203650805, "grad_norm": 1.4351446628570557, "learning_rate": 5.877232191623078e-07, "loss": 0.12126541137695312, "step": 6139 }, { "epoch": 0.8555702640562949, "grad_norm": 1.009639024734497, "learning_rate": 5.866150873684878e-07, "loss": 0.1429290771484375, "step": 6140 }, { "epoch": 0.8557096077475093, "grad_norm": 1.1988449096679688, "learning_rate": 5.855079361280374e-07, "loss": 0.1717853546142578, "step": 6141 }, { "epoch": 0.8558489514387236, "grad_norm": 1.0869046449661255, "learning_rate": 5.844017656869389e-07, "loss": 0.15466690063476562, "step": 6142 }, { "epoch": 0.855988295129938, "grad_norm": 0.513739287853241, "learning_rate": 5.83296576290957e-07, "loss": 0.09922409057617188, "step": 6143 }, { "epoch": 0.8561276388211524, "grad_norm": 1.3889402151107788, "learning_rate": 5.821923681856406e-07, "loss": 0.14873123168945312, "step": 6144 }, { "epoch": 0.8562669825123668, "grad_norm": 0.7569050788879395, "learning_rate": 5.810891416163211e-07, "loss": 0.11975288391113281, "step": 6145 }, { "epoch": 0.8564063262035811, "grad_norm": 1.6082199811935425, "learning_rate": 5.799868968281075e-07, "loss": 0.18557357788085938, "step": 6146 }, { "epoch": 0.8565456698947955, "grad_norm": 0.8519449234008789, "learning_rate": 5.788856340658966e-07, "loss": 0.13328933715820312, "step": 6147 }, { "epoch": 0.8566850135860099, "grad_norm": 0.7339284420013428, "learning_rate": 5.777853535743605e-07, "loss": 0.11263656616210938, "step": 6148 }, { "epoch": 0.8568243572772243, "grad_norm": 0.6815208792686462, "learning_rate": 5.766860555979586e-07, "loss": 0.12246322631835938, "step": 6149 }, { "epoch": 0.8569637009684387, "grad_norm": 1.2839007377624512, "learning_rate": 5.755877403809284e-07, "loss": 0.17713546752929688, "step": 6150 }, { "epoch": 0.857103044659653, "grad_norm": 0.5333544611930847, "learning_rate": 5.744904081672914e-07, "loss": 0.11107254028320312, "step": 6151 }, { "epoch": 0.8572423883508674, "grad_norm": 0.4947330951690674, "learning_rate": 5.733940592008519e-07, "loss": 0.109893798828125, "step": 6152 }, { "epoch": 0.8573817320420818, "grad_norm": 0.785266101360321, "learning_rate": 5.72298693725189e-07, "loss": 0.1374359130859375, "step": 6153 }, { "epoch": 0.8575210757332962, "grad_norm": 0.6283155083656311, "learning_rate": 5.712043119836702e-07, "loss": 0.11974334716796875, "step": 6154 }, { "epoch": 0.8576604194245105, "grad_norm": 1.3250316381454468, "learning_rate": 5.701109142194422e-07, "loss": 0.19933319091796875, "step": 6155 }, { "epoch": 0.8577997631157249, "grad_norm": 0.6994273066520691, "learning_rate": 5.69018500675434e-07, "loss": 0.12276649475097656, "step": 6156 }, { "epoch": 0.8579391068069393, "grad_norm": 0.7201435565948486, "learning_rate": 5.679270715943535e-07, "loss": 0.11171913146972656, "step": 6157 }, { "epoch": 0.8580784504981537, "grad_norm": 0.8597027659416199, "learning_rate": 5.668366272186915e-07, "loss": 0.14899444580078125, "step": 6158 }, { "epoch": 0.858217794189368, "grad_norm": 0.5359700322151184, "learning_rate": 5.657471677907205e-07, "loss": 0.10512161254882812, "step": 6159 }, { "epoch": 0.8583571378805824, "grad_norm": 0.7463438510894775, "learning_rate": 5.646586935524922e-07, "loss": 0.11370468139648438, "step": 6160 }, { "epoch": 0.8584964815717968, "grad_norm": 2.0736076831817627, "learning_rate": 5.635712047458419e-07, "loss": 0.14842605590820312, "step": 6161 }, { "epoch": 0.8586358252630112, "grad_norm": 0.8937824964523315, "learning_rate": 5.624847016123847e-07, "loss": 0.11203384399414062, "step": 6162 }, { "epoch": 0.8587751689542256, "grad_norm": 0.9986937046051025, "learning_rate": 5.613991843935179e-07, "loss": 0.14319610595703125, "step": 6163 }, { "epoch": 0.8589145126454399, "grad_norm": 0.7951384782791138, "learning_rate": 5.60314653330416e-07, "loss": 0.12097549438476562, "step": 6164 }, { "epoch": 0.8590538563366543, "grad_norm": 1.2905144691467285, "learning_rate": 5.592311086640379e-07, "loss": 0.1607513427734375, "step": 6165 }, { "epoch": 0.8591932000278687, "grad_norm": 1.1077157258987427, "learning_rate": 5.581485506351242e-07, "loss": 0.13593673706054688, "step": 6166 }, { "epoch": 0.8593325437190831, "grad_norm": 1.3972861766815186, "learning_rate": 5.570669794841921e-07, "loss": 0.16168212890625, "step": 6167 }, { "epoch": 0.8594718874102975, "grad_norm": 0.737461268901825, "learning_rate": 5.559863954515448e-07, "loss": 0.10747528076171875, "step": 6168 }, { "epoch": 0.8596112311015118, "grad_norm": 1.0988454818725586, "learning_rate": 5.549067987772605e-07, "loss": 0.15769577026367188, "step": 6169 }, { "epoch": 0.8597505747927262, "grad_norm": 0.8262186050415039, "learning_rate": 5.538281897012032e-07, "loss": 0.145904541015625, "step": 6170 }, { "epoch": 0.8598899184839406, "grad_norm": 1.0862936973571777, "learning_rate": 5.527505684630136e-07, "loss": 0.1430644989013672, "step": 6171 }, { "epoch": 0.8600292621751551, "grad_norm": 0.9360519051551819, "learning_rate": 5.51673935302115e-07, "loss": 0.1322793960571289, "step": 6172 }, { "epoch": 0.8601686058663695, "grad_norm": 0.9482347369194031, "learning_rate": 5.505982904577123e-07, "loss": 0.13369178771972656, "step": 6173 }, { "epoch": 0.8603079495575838, "grad_norm": 0.7552902698516846, "learning_rate": 5.495236341687876e-07, "loss": 0.14189720153808594, "step": 6174 }, { "epoch": 0.8604472932487982, "grad_norm": 1.339653730392456, "learning_rate": 5.484499666741044e-07, "loss": 0.18966293334960938, "step": 6175 }, { "epoch": 0.8605866369400126, "grad_norm": 0.9194268584251404, "learning_rate": 5.47377288212208e-07, "loss": 0.15470123291015625, "step": 6176 }, { "epoch": 0.860725980631227, "grad_norm": 0.9189404249191284, "learning_rate": 5.463055990214245e-07, "loss": 0.12235641479492188, "step": 6177 }, { "epoch": 0.8608653243224413, "grad_norm": 0.7193818688392639, "learning_rate": 5.452348993398566e-07, "loss": 0.13592529296875, "step": 6178 }, { "epoch": 0.8610046680136557, "grad_norm": 0.4857068657875061, "learning_rate": 5.441651894053895e-07, "loss": 0.10972976684570312, "step": 6179 }, { "epoch": 0.8611440117048701, "grad_norm": 0.7073398232460022, "learning_rate": 5.430964694556884e-07, "loss": 0.10602760314941406, "step": 6180 }, { "epoch": 0.8612833553960845, "grad_norm": 0.8839578032493591, "learning_rate": 5.420287397282004e-07, "loss": 0.1287078857421875, "step": 6181 }, { "epoch": 0.8614226990872988, "grad_norm": 1.2852565050125122, "learning_rate": 5.409620004601479e-07, "loss": 0.14390182495117188, "step": 6182 }, { "epoch": 0.8615620427785132, "grad_norm": 0.6593136787414551, "learning_rate": 5.398962518885375e-07, "loss": 0.12029266357421875, "step": 6183 }, { "epoch": 0.8617013864697276, "grad_norm": 0.6496924161911011, "learning_rate": 5.388314942501549e-07, "loss": 0.1152496337890625, "step": 6184 }, { "epoch": 0.861840730160942, "grad_norm": 0.9410533905029297, "learning_rate": 5.377677277815646e-07, "loss": 0.14682388305664062, "step": 6185 }, { "epoch": 0.8619800738521564, "grad_norm": 1.5685749053955078, "learning_rate": 5.367049527191093e-07, "loss": 0.17683792114257812, "step": 6186 }, { "epoch": 0.8621194175433707, "grad_norm": 0.6108804941177368, "learning_rate": 5.356431692989144e-07, "loss": 0.11300277709960938, "step": 6187 }, { "epoch": 0.8622587612345851, "grad_norm": 2.076730251312256, "learning_rate": 5.345823777568859e-07, "loss": 0.18435096740722656, "step": 6188 }, { "epoch": 0.8623981049257995, "grad_norm": 1.6919443607330322, "learning_rate": 5.335225783287051e-07, "loss": 0.20957183837890625, "step": 6189 }, { "epoch": 0.8625374486170139, "grad_norm": 1.3111951351165771, "learning_rate": 5.324637712498359e-07, "loss": 0.14012908935546875, "step": 6190 }, { "epoch": 0.8626767923082282, "grad_norm": 1.1786831617355347, "learning_rate": 5.314059567555213e-07, "loss": 0.15561676025390625, "step": 6191 }, { "epoch": 0.8628161359994426, "grad_norm": 0.9946407675743103, "learning_rate": 5.303491350807832e-07, "loss": 0.14615249633789062, "step": 6192 }, { "epoch": 0.862955479690657, "grad_norm": 1.1943870782852173, "learning_rate": 5.292933064604228e-07, "loss": 0.1644439697265625, "step": 6193 }, { "epoch": 0.8630948233818714, "grad_norm": 0.757683277130127, "learning_rate": 5.282384711290228e-07, "loss": 0.10795021057128906, "step": 6194 }, { "epoch": 0.8632341670730858, "grad_norm": 0.6696810126304626, "learning_rate": 5.271846293209426e-07, "loss": 0.110626220703125, "step": 6195 }, { "epoch": 0.8633735107643001, "grad_norm": 1.6597073078155518, "learning_rate": 5.261317812703204e-07, "loss": 0.16941452026367188, "step": 6196 }, { "epoch": 0.8635128544555145, "grad_norm": 1.3027368783950806, "learning_rate": 5.250799272110768e-07, "loss": 0.16880035400390625, "step": 6197 }, { "epoch": 0.8636521981467289, "grad_norm": 1.2277262210845947, "learning_rate": 5.240290673769099e-07, "loss": 0.15972137451171875, "step": 6198 }, { "epoch": 0.8637915418379433, "grad_norm": 0.8077073693275452, "learning_rate": 5.229792020012947e-07, "loss": 0.13796615600585938, "step": 6199 }, { "epoch": 0.8639308855291576, "grad_norm": 0.7967644333839417, "learning_rate": 5.2193033131749e-07, "loss": 0.13212966918945312, "step": 6200 }, { "epoch": 0.864070229220372, "grad_norm": 1.0421228408813477, "learning_rate": 5.20882455558529e-07, "loss": 0.15439987182617188, "step": 6201 }, { "epoch": 0.8642095729115864, "grad_norm": 1.2157548666000366, "learning_rate": 5.19835574957227e-07, "loss": 0.14783859252929688, "step": 6202 }, { "epoch": 0.8643489166028008, "grad_norm": 0.7546905875205994, "learning_rate": 5.187896897461752e-07, "loss": 0.120330810546875, "step": 6203 }, { "epoch": 0.8644882602940152, "grad_norm": 0.7647699117660522, "learning_rate": 5.177448001577468e-07, "loss": 0.13030624389648438, "step": 6204 }, { "epoch": 0.8646276039852295, "grad_norm": 1.0398368835449219, "learning_rate": 5.167009064240936e-07, "loss": 0.16025161743164062, "step": 6205 }, { "epoch": 0.8647669476764439, "grad_norm": 0.6171146035194397, "learning_rate": 5.156580087771429e-07, "loss": 0.11647415161132812, "step": 6206 }, { "epoch": 0.8649062913676583, "grad_norm": 0.8303402662277222, "learning_rate": 5.146161074486022e-07, "loss": 0.12125015258789062, "step": 6207 }, { "epoch": 0.8650456350588727, "grad_norm": 0.7558333277702332, "learning_rate": 5.135752026699597e-07, "loss": 0.1294403076171875, "step": 6208 }, { "epoch": 0.865184978750087, "grad_norm": 0.8033178448677063, "learning_rate": 5.125352946724816e-07, "loss": 0.12058639526367188, "step": 6209 }, { "epoch": 0.8653243224413014, "grad_norm": 0.8688879013061523, "learning_rate": 5.114963836872105e-07, "loss": 0.12853622436523438, "step": 6210 }, { "epoch": 0.8654636661325158, "grad_norm": 0.7184528112411499, "learning_rate": 5.104584699449671e-07, "loss": 0.12435150146484375, "step": 6211 }, { "epoch": 0.8656030098237303, "grad_norm": 0.9137703776359558, "learning_rate": 5.094215536763541e-07, "loss": 0.14084243774414062, "step": 6212 }, { "epoch": 0.8657423535149447, "grad_norm": 0.9195219874382019, "learning_rate": 5.083856351117511e-07, "loss": 0.13727188110351562, "step": 6213 }, { "epoch": 0.865881697206159, "grad_norm": 0.8019293546676636, "learning_rate": 5.073507144813139e-07, "loss": 0.13393783569335938, "step": 6214 }, { "epoch": 0.8660210408973734, "grad_norm": 0.7718213200569153, "learning_rate": 5.063167920149797e-07, "loss": 0.1194915771484375, "step": 6215 }, { "epoch": 0.8661603845885878, "grad_norm": 0.5936844348907471, "learning_rate": 5.052838679424609e-07, "loss": 0.12074089050292969, "step": 6216 }, { "epoch": 0.8662997282798022, "grad_norm": 0.7542784810066223, "learning_rate": 5.042519424932512e-07, "loss": 0.13266372680664062, "step": 6217 }, { "epoch": 0.8664390719710166, "grad_norm": 0.8139941096305847, "learning_rate": 5.0322101589662e-07, "loss": 0.13747406005859375, "step": 6218 }, { "epoch": 0.8665784156622309, "grad_norm": 1.2830923795700073, "learning_rate": 5.02191088381615e-07, "loss": 0.16916465759277344, "step": 6219 }, { "epoch": 0.8667177593534453, "grad_norm": 0.6019620299339294, "learning_rate": 5.01162160177065e-07, "loss": 0.111236572265625, "step": 6220 }, { "epoch": 0.8668571030446597, "grad_norm": 1.503550410270691, "learning_rate": 5.001342315115726e-07, "loss": 0.1558990478515625, "step": 6221 }, { "epoch": 0.8669964467358741, "grad_norm": 2.721994161605835, "learning_rate": 4.991073026135196e-07, "loss": 0.18255233764648438, "step": 6222 }, { "epoch": 0.8671357904270884, "grad_norm": 1.051223635673523, "learning_rate": 4.980813737110662e-07, "loss": 0.15727615356445312, "step": 6223 }, { "epoch": 0.8672751341183028, "grad_norm": 1.019364833831787, "learning_rate": 4.970564450321525e-07, "loss": 0.11453628540039062, "step": 6224 }, { "epoch": 0.8674144778095172, "grad_norm": 1.247437834739685, "learning_rate": 4.960325168044916e-07, "loss": 0.1513214111328125, "step": 6225 }, { "epoch": 0.8675538215007316, "grad_norm": 1.2208365201950073, "learning_rate": 4.950095892555789e-07, "loss": 0.18608856201171875, "step": 6226 }, { "epoch": 0.867693165191946, "grad_norm": 0.8995647430419922, "learning_rate": 4.93987662612685e-07, "loss": 0.11233901977539062, "step": 6227 }, { "epoch": 0.8678325088831603, "grad_norm": 0.6017724871635437, "learning_rate": 4.929667371028579e-07, "loss": 0.11981582641601562, "step": 6228 }, { "epoch": 0.8679718525743747, "grad_norm": 0.7366700172424316, "learning_rate": 4.919468129529237e-07, "loss": 0.12043952941894531, "step": 6229 }, { "epoch": 0.8681111962655891, "grad_norm": 0.8256680965423584, "learning_rate": 4.909278903894887e-07, "loss": 0.12667083740234375, "step": 6230 }, { "epoch": 0.8682505399568035, "grad_norm": 1.213277816772461, "learning_rate": 4.89909969638932e-07, "loss": 0.1672515869140625, "step": 6231 }, { "epoch": 0.8683898836480178, "grad_norm": 0.8542367815971375, "learning_rate": 4.888930509274125e-07, "loss": 0.13603591918945312, "step": 6232 }, { "epoch": 0.8685292273392322, "grad_norm": 0.6857790350914001, "learning_rate": 4.878771344808664e-07, "loss": 0.12123489379882812, "step": 6233 }, { "epoch": 0.8686685710304466, "grad_norm": 0.7363210320472717, "learning_rate": 4.868622205250089e-07, "loss": 0.1240997314453125, "step": 6234 }, { "epoch": 0.868807914721661, "grad_norm": 0.9610161185264587, "learning_rate": 4.858483092853278e-07, "loss": 0.15578460693359375, "step": 6235 }, { "epoch": 0.8689472584128753, "grad_norm": 0.7020800113677979, "learning_rate": 4.848354009870931e-07, "loss": 0.12002754211425781, "step": 6236 }, { "epoch": 0.8690866021040897, "grad_norm": 1.0000025033950806, "learning_rate": 4.838234958553501e-07, "loss": 0.16106033325195312, "step": 6237 }, { "epoch": 0.8692259457953041, "grad_norm": 1.7550231218338013, "learning_rate": 4.828125941149197e-07, "loss": 0.18088150024414062, "step": 6238 }, { "epoch": 0.8693652894865185, "grad_norm": 0.9081085324287415, "learning_rate": 4.818026959904016e-07, "loss": 0.13616561889648438, "step": 6239 }, { "epoch": 0.8695046331777329, "grad_norm": 0.9186362624168396, "learning_rate": 4.80793801706172e-07, "loss": 0.13834381103515625, "step": 6240 }, { "epoch": 0.8696439768689472, "grad_norm": 1.243837594985962, "learning_rate": 4.797859114863851e-07, "loss": 0.14678573608398438, "step": 6241 }, { "epoch": 0.8697833205601616, "grad_norm": 1.3375647068023682, "learning_rate": 4.787790255549707e-07, "loss": 0.17474746704101562, "step": 6242 }, { "epoch": 0.869922664251376, "grad_norm": 0.9335896372795105, "learning_rate": 4.777731441356342e-07, "loss": 0.1384258270263672, "step": 6243 }, { "epoch": 0.8700620079425904, "grad_norm": 0.9050582647323608, "learning_rate": 4.7676826745186144e-07, "loss": 0.15064239501953125, "step": 6244 }, { "epoch": 0.8702013516338047, "grad_norm": 1.0972274541854858, "learning_rate": 4.757643957269131e-07, "loss": 0.15326499938964844, "step": 6245 }, { "epoch": 0.8703406953250191, "grad_norm": 0.7754322290420532, "learning_rate": 4.7476152918382535e-07, "loss": 0.11478424072265625, "step": 6246 }, { "epoch": 0.8704800390162335, "grad_norm": 0.9476706385612488, "learning_rate": 4.737596680454137e-07, "loss": 0.13825225830078125, "step": 6247 }, { "epoch": 0.8706193827074479, "grad_norm": 1.0076287984848022, "learning_rate": 4.727588125342669e-07, "loss": 0.11694717407226562, "step": 6248 }, { "epoch": 0.8707587263986623, "grad_norm": 0.6464809775352478, "learning_rate": 4.7175896287275424e-07, "loss": 0.12348556518554688, "step": 6249 }, { "epoch": 0.8708980700898766, "grad_norm": 0.5968886613845825, "learning_rate": 4.7076011928301803e-07, "loss": 0.11187362670898438, "step": 6250 }, { "epoch": 0.871037413781091, "grad_norm": 0.6971783638000488, "learning_rate": 4.6976228198697847e-07, "loss": 0.13632774353027344, "step": 6251 }, { "epoch": 0.8711767574723055, "grad_norm": 0.8986730575561523, "learning_rate": 4.687654512063344e-07, "loss": 0.13752174377441406, "step": 6252 }, { "epoch": 0.8713161011635199, "grad_norm": 0.8171427845954895, "learning_rate": 4.6776962716255593e-07, "loss": 0.114715576171875, "step": 6253 }, { "epoch": 0.8714554448547343, "grad_norm": 1.0071744918823242, "learning_rate": 4.667748100768937e-07, "loss": 0.13288497924804688, "step": 6254 }, { "epoch": 0.8715947885459486, "grad_norm": 0.6825315356254578, "learning_rate": 4.657810001703733e-07, "loss": 0.10571670532226562, "step": 6255 }, { "epoch": 0.871734132237163, "grad_norm": 1.5057204961776733, "learning_rate": 4.647881976637975e-07, "loss": 0.19313430786132812, "step": 6256 }, { "epoch": 0.8718734759283774, "grad_norm": 0.8224225640296936, "learning_rate": 4.637964027777425e-07, "loss": 0.1505298614501953, "step": 6257 }, { "epoch": 0.8720128196195918, "grad_norm": 1.45622980594635, "learning_rate": 4.62805615732565e-07, "loss": 0.15761947631835938, "step": 6258 }, { "epoch": 0.8721521633108061, "grad_norm": 0.9432463049888611, "learning_rate": 4.6181583674839323e-07, "loss": 0.14432525634765625, "step": 6259 }, { "epoch": 0.8722915070020205, "grad_norm": 0.8203635811805725, "learning_rate": 4.6082706604513307e-07, "loss": 0.13728713989257812, "step": 6260 }, { "epoch": 0.8724308506932349, "grad_norm": 0.728397786617279, "learning_rate": 4.598393038424681e-07, "loss": 0.13947296142578125, "step": 6261 }, { "epoch": 0.8725701943844493, "grad_norm": 0.7078805565834045, "learning_rate": 4.5885255035985675e-07, "loss": 0.12435150146484375, "step": 6262 }, { "epoch": 0.8727095380756636, "grad_norm": 1.0168646574020386, "learning_rate": 4.578668058165325e-07, "loss": 0.15512847900390625, "step": 6263 }, { "epoch": 0.872848881766878, "grad_norm": 0.8565886616706848, "learning_rate": 4.5688207043150467e-07, "loss": 0.134002685546875, "step": 6264 }, { "epoch": 0.8729882254580924, "grad_norm": 1.6554596424102783, "learning_rate": 4.5589834442355986e-07, "loss": 0.17487716674804688, "step": 6265 }, { "epoch": 0.8731275691493068, "grad_norm": 0.8266158699989319, "learning_rate": 4.549156280112599e-07, "loss": 0.13436508178710938, "step": 6266 }, { "epoch": 0.8732669128405212, "grad_norm": 0.4924282133579254, "learning_rate": 4.5393392141294066e-07, "loss": 0.10131454467773438, "step": 6267 }, { "epoch": 0.8734062565317355, "grad_norm": 0.7842754125595093, "learning_rate": 4.5295322484671667e-07, "loss": 0.12447929382324219, "step": 6268 }, { "epoch": 0.8735456002229499, "grad_norm": 0.700209379196167, "learning_rate": 4.519735385304741e-07, "loss": 0.11431312561035156, "step": 6269 }, { "epoch": 0.8736849439141643, "grad_norm": 1.023067593574524, "learning_rate": 4.509948626818789e-07, "loss": 0.13351058959960938, "step": 6270 }, { "epoch": 0.8738242876053787, "grad_norm": 0.8315699696540833, "learning_rate": 4.500171975183687e-07, "loss": 0.13286209106445312, "step": 6271 }, { "epoch": 0.873963631296593, "grad_norm": 0.8790693879127502, "learning_rate": 4.4904054325715927e-07, "loss": 0.13592529296875, "step": 6272 }, { "epoch": 0.8741029749878074, "grad_norm": 0.6554561853408813, "learning_rate": 4.4806490011524205e-07, "loss": 0.12269973754882812, "step": 6273 }, { "epoch": 0.8742423186790218, "grad_norm": 1.5707076787948608, "learning_rate": 4.4709026830938194e-07, "loss": 0.156097412109375, "step": 6274 }, { "epoch": 0.8743816623702362, "grad_norm": 1.264766812324524, "learning_rate": 4.46116648056118e-07, "loss": 0.16752243041992188, "step": 6275 }, { "epoch": 0.8745210060614506, "grad_norm": 1.1319010257720947, "learning_rate": 4.451440395717682e-07, "loss": 0.14583206176757812, "step": 6276 }, { "epoch": 0.8746603497526649, "grad_norm": 1.0444409847259521, "learning_rate": 4.441724430724248e-07, "loss": 0.1268157958984375, "step": 6277 }, { "epoch": 0.8747996934438793, "grad_norm": 1.5659807920455933, "learning_rate": 4.432018587739517e-07, "loss": 0.13120651245117188, "step": 6278 }, { "epoch": 0.8749390371350937, "grad_norm": 0.9177461266517639, "learning_rate": 4.422322868919937e-07, "loss": 0.15310287475585938, "step": 6279 }, { "epoch": 0.8750783808263081, "grad_norm": 0.7385509014129639, "learning_rate": 4.4126372764196457e-07, "loss": 0.12641143798828125, "step": 6280 }, { "epoch": 0.8752177245175224, "grad_norm": 1.0648788213729858, "learning_rate": 4.402961812390588e-07, "loss": 0.15116500854492188, "step": 6281 }, { "epoch": 0.8753570682087368, "grad_norm": 1.3792409896850586, "learning_rate": 4.3932964789824064e-07, "loss": 0.15688705444335938, "step": 6282 }, { "epoch": 0.8754964118999512, "grad_norm": 0.9664058089256287, "learning_rate": 4.3836412783425265e-07, "loss": 0.13642120361328125, "step": 6283 }, { "epoch": 0.8756357555911656, "grad_norm": 0.6119021773338318, "learning_rate": 4.3739962126161273e-07, "loss": 0.10825347900390625, "step": 6284 }, { "epoch": 0.87577509928238, "grad_norm": 1.0864906311035156, "learning_rate": 4.3643612839461057e-07, "loss": 0.14036941528320312, "step": 6285 }, { "epoch": 0.8759144429735943, "grad_norm": 1.7036646604537964, "learning_rate": 4.354736494473122e-07, "loss": 0.17787551879882812, "step": 6286 }, { "epoch": 0.8760537866648087, "grad_norm": 1.0430546998977661, "learning_rate": 4.345121846335593e-07, "loss": 0.11140823364257812, "step": 6287 }, { "epoch": 0.8761931303560231, "grad_norm": 1.278036117553711, "learning_rate": 4.335517341669676e-07, "loss": 0.1687774658203125, "step": 6288 }, { "epoch": 0.8763324740472375, "grad_norm": 1.1411406993865967, "learning_rate": 4.3259229826092655e-07, "loss": 0.15608978271484375, "step": 6289 }, { "epoch": 0.8764718177384518, "grad_norm": 0.6769304871559143, "learning_rate": 4.316338771286005e-07, "loss": 0.11642074584960938, "step": 6290 }, { "epoch": 0.8766111614296662, "grad_norm": 1.756168007850647, "learning_rate": 4.3067647098293033e-07, "loss": 0.20661544799804688, "step": 6291 }, { "epoch": 0.8767505051208806, "grad_norm": 1.0999151468276978, "learning_rate": 4.29720080036628e-07, "loss": 0.1257476806640625, "step": 6292 }, { "epoch": 0.8768898488120951, "grad_norm": 1.163297176361084, "learning_rate": 4.2876470450218254e-07, "loss": 0.15672683715820312, "step": 6293 }, { "epoch": 0.8770291925033095, "grad_norm": 0.8701852560043335, "learning_rate": 4.278103445918569e-07, "loss": 0.13937759399414062, "step": 6294 }, { "epoch": 0.8771685361945238, "grad_norm": 0.5412652492523193, "learning_rate": 4.268570005176892e-07, "loss": 0.09718990325927734, "step": 6295 }, { "epoch": 0.8773078798857382, "grad_norm": 1.110366702079773, "learning_rate": 4.259046724914878e-07, "loss": 0.14211273193359375, "step": 6296 }, { "epoch": 0.8774472235769526, "grad_norm": 1.3308839797973633, "learning_rate": 4.2495336072484015e-07, "loss": 0.1524219512939453, "step": 6297 }, { "epoch": 0.877586567268167, "grad_norm": 0.6904951930046082, "learning_rate": 4.240030654291061e-07, "loss": 0.1243743896484375, "step": 6298 }, { "epoch": 0.8777259109593814, "grad_norm": 0.7862755060195923, "learning_rate": 4.2305378681541833e-07, "loss": 0.1352386474609375, "step": 6299 }, { "epoch": 0.8778652546505957, "grad_norm": 0.7007458806037903, "learning_rate": 4.221055250946865e-07, "loss": 0.1262359619140625, "step": 6300 }, { "epoch": 0.8780045983418101, "grad_norm": 1.288152813911438, "learning_rate": 4.21158280477591e-07, "loss": 0.16263198852539062, "step": 6301 }, { "epoch": 0.8781439420330245, "grad_norm": 1.0659409761428833, "learning_rate": 4.202120531745896e-07, "loss": 0.128631591796875, "step": 6302 }, { "epoch": 0.8782832857242389, "grad_norm": 1.1857719421386719, "learning_rate": 4.192668433959113e-07, "loss": 0.13720703125, "step": 6303 }, { "epoch": 0.8784226294154532, "grad_norm": 1.0101444721221924, "learning_rate": 4.183226513515598e-07, "loss": 0.137054443359375, "step": 6304 }, { "epoch": 0.8785619731066676, "grad_norm": 0.7427616715431213, "learning_rate": 4.173794772513151e-07, "loss": 0.12107467651367188, "step": 6305 }, { "epoch": 0.878701316797882, "grad_norm": 0.7390968799591064, "learning_rate": 4.1643732130472737e-07, "loss": 0.10985183715820312, "step": 6306 }, { "epoch": 0.8788406604890964, "grad_norm": 1.270798921585083, "learning_rate": 4.1549618372112135e-07, "loss": 0.170989990234375, "step": 6307 }, { "epoch": 0.8789800041803107, "grad_norm": 1.8284354209899902, "learning_rate": 4.1455606470959755e-07, "loss": 0.1764373779296875, "step": 6308 }, { "epoch": 0.8791193478715251, "grad_norm": 0.7029841542243958, "learning_rate": 4.1361696447902944e-07, "loss": 0.1290740966796875, "step": 6309 }, { "epoch": 0.8792586915627395, "grad_norm": 1.5849857330322266, "learning_rate": 4.1267888323806294e-07, "loss": 0.1728382110595703, "step": 6310 }, { "epoch": 0.8793980352539539, "grad_norm": 1.1925008296966553, "learning_rate": 4.117418211951174e-07, "loss": 0.16824722290039062, "step": 6311 }, { "epoch": 0.8795373789451683, "grad_norm": 0.8491295576095581, "learning_rate": 4.1080577855838746e-07, "loss": 0.10822296142578125, "step": 6312 }, { "epoch": 0.8796767226363826, "grad_norm": 1.0955641269683838, "learning_rate": 4.098707555358411e-07, "loss": 0.14179420471191406, "step": 6313 }, { "epoch": 0.879816066327597, "grad_norm": 1.7104504108428955, "learning_rate": 4.0893675233521777e-07, "loss": 0.18942642211914062, "step": 6314 }, { "epoch": 0.8799554100188114, "grad_norm": 1.2807819843292236, "learning_rate": 4.080037691640321e-07, "loss": 0.15122222900390625, "step": 6315 }, { "epoch": 0.8800947537100258, "grad_norm": 0.9215747117996216, "learning_rate": 4.070718062295731e-07, "loss": 0.13495826721191406, "step": 6316 }, { "epoch": 0.8802340974012401, "grad_norm": 0.5509042739868164, "learning_rate": 4.0614086373890026e-07, "loss": 0.11523818969726562, "step": 6317 }, { "epoch": 0.8803734410924545, "grad_norm": 1.0626622438430786, "learning_rate": 4.05210941898847e-07, "loss": 0.12836074829101562, "step": 6318 }, { "epoch": 0.8805127847836689, "grad_norm": 0.866628885269165, "learning_rate": 4.042820409160214e-07, "loss": 0.15418624877929688, "step": 6319 }, { "epoch": 0.8806521284748833, "grad_norm": 0.6031200289726257, "learning_rate": 4.033541609968056e-07, "loss": 0.12562942504882812, "step": 6320 }, { "epoch": 0.8807914721660977, "grad_norm": 1.4848666191101074, "learning_rate": 4.0242730234735184e-07, "loss": 0.15743637084960938, "step": 6321 }, { "epoch": 0.880930815857312, "grad_norm": 0.8442098498344421, "learning_rate": 4.01501465173586e-07, "loss": 0.11925888061523438, "step": 6322 }, { "epoch": 0.8810701595485264, "grad_norm": 0.813210666179657, "learning_rate": 4.005766496812097e-07, "loss": 0.11940765380859375, "step": 6323 }, { "epoch": 0.8812095032397408, "grad_norm": 0.8170698285102844, "learning_rate": 3.9965285607569573e-07, "loss": 0.13160324096679688, "step": 6324 }, { "epoch": 0.8813488469309552, "grad_norm": 1.1222431659698486, "learning_rate": 3.987300845622882e-07, "loss": 0.14603424072265625, "step": 6325 }, { "epoch": 0.8814881906221695, "grad_norm": 1.178407907485962, "learning_rate": 3.978083353460083e-07, "loss": 0.14542007446289062, "step": 6326 }, { "epoch": 0.8816275343133839, "grad_norm": 0.8330102562904358, "learning_rate": 3.96887608631647e-07, "loss": 0.130615234375, "step": 6327 }, { "epoch": 0.8817668780045983, "grad_norm": 0.6426865458488464, "learning_rate": 3.959679046237663e-07, "loss": 0.10672760009765625, "step": 6328 }, { "epoch": 0.8819062216958127, "grad_norm": 0.7708615660667419, "learning_rate": 3.950492235267062e-07, "loss": 0.1142120361328125, "step": 6329 }, { "epoch": 0.882045565387027, "grad_norm": 1.04684579372406, "learning_rate": 3.9413156554457655e-07, "loss": 0.14111328125, "step": 6330 }, { "epoch": 0.8821849090782414, "grad_norm": 1.1159775257110596, "learning_rate": 3.9321493088125774e-07, "loss": 0.14447402954101562, "step": 6331 }, { "epoch": 0.8823242527694558, "grad_norm": 0.5422207713127136, "learning_rate": 3.9229931974040844e-07, "loss": 0.105377197265625, "step": 6332 }, { "epoch": 0.8824635964606703, "grad_norm": 1.30521821975708, "learning_rate": 3.9138473232545326e-07, "loss": 0.13833236694335938, "step": 6333 }, { "epoch": 0.8826029401518847, "grad_norm": 0.7153957486152649, "learning_rate": 3.9047116883959513e-07, "loss": 0.12646865844726562, "step": 6334 }, { "epoch": 0.882742283843099, "grad_norm": 0.7387098073959351, "learning_rate": 3.895586294858045e-07, "loss": 0.1211700439453125, "step": 6335 }, { "epoch": 0.8828816275343134, "grad_norm": 0.8840189576148987, "learning_rate": 3.886471144668291e-07, "loss": 0.1404590606689453, "step": 6336 }, { "epoch": 0.8830209712255278, "grad_norm": 1.0440901517868042, "learning_rate": 3.8773662398518596e-07, "loss": 0.1494293212890625, "step": 6337 }, { "epoch": 0.8831603149167422, "grad_norm": 0.8914977312088013, "learning_rate": 3.8682715824316594e-07, "loss": 0.1382598876953125, "step": 6338 }, { "epoch": 0.8832996586079566, "grad_norm": 0.9414033889770508, "learning_rate": 3.8591871744282973e-07, "loss": 0.14937210083007812, "step": 6339 }, { "epoch": 0.8834390022991709, "grad_norm": 1.0611993074417114, "learning_rate": 3.85011301786013e-07, "loss": 0.13659095764160156, "step": 6340 }, { "epoch": 0.8835783459903853, "grad_norm": 0.7867819666862488, "learning_rate": 3.841049114743239e-07, "loss": 0.13283538818359375, "step": 6341 }, { "epoch": 0.8837176896815997, "grad_norm": 1.0638779401779175, "learning_rate": 3.8319954670914094e-07, "loss": 0.15254592895507812, "step": 6342 }, { "epoch": 0.8838570333728141, "grad_norm": 1.4454492330551147, "learning_rate": 3.8229520769161474e-07, "loss": 0.1337127685546875, "step": 6343 }, { "epoch": 0.8839963770640284, "grad_norm": 0.7161829471588135, "learning_rate": 3.813918946226691e-07, "loss": 0.11417770385742188, "step": 6344 }, { "epoch": 0.8841357207552428, "grad_norm": 1.1843547821044922, "learning_rate": 3.804896077030007e-07, "loss": 0.13141250610351562, "step": 6345 }, { "epoch": 0.8842750644464572, "grad_norm": 0.8927050828933716, "learning_rate": 3.7958834713307524e-07, "loss": 0.115570068359375, "step": 6346 }, { "epoch": 0.8844144081376716, "grad_norm": 0.7617089748382568, "learning_rate": 3.786881131131348e-07, "loss": 0.11539840698242188, "step": 6347 }, { "epoch": 0.884553751828886, "grad_norm": 0.677152693271637, "learning_rate": 3.7778890584318773e-07, "loss": 0.11678504943847656, "step": 6348 }, { "epoch": 0.8846930955201003, "grad_norm": 0.9712453484535217, "learning_rate": 3.7689072552301973e-07, "loss": 0.13857269287109375, "step": 6349 }, { "epoch": 0.8848324392113147, "grad_norm": 0.8373802304267883, "learning_rate": 3.759935723521846e-07, "loss": 0.1405200958251953, "step": 6350 }, { "epoch": 0.8849717829025291, "grad_norm": 0.7409818768501282, "learning_rate": 3.7509744653001e-07, "loss": 0.12264633178710938, "step": 6351 }, { "epoch": 0.8851111265937435, "grad_norm": 1.1058429479599, "learning_rate": 3.742023482555951e-07, "loss": 0.1540985107421875, "step": 6352 }, { "epoch": 0.8852504702849578, "grad_norm": 1.0172516107559204, "learning_rate": 3.7330827772780967e-07, "loss": 0.13202667236328125, "step": 6353 }, { "epoch": 0.8853898139761722, "grad_norm": 0.8756654858589172, "learning_rate": 3.7241523514529476e-07, "loss": 0.13400840759277344, "step": 6354 }, { "epoch": 0.8855291576673866, "grad_norm": 0.7756472826004028, "learning_rate": 3.715232207064651e-07, "loss": 0.13943099975585938, "step": 6355 }, { "epoch": 0.885668501358601, "grad_norm": 0.7642573118209839, "learning_rate": 3.7063223460950705e-07, "loss": 0.12434768676757812, "step": 6356 }, { "epoch": 0.8858078450498154, "grad_norm": 0.5776758193969727, "learning_rate": 3.697422770523751e-07, "loss": 0.11979103088378906, "step": 6357 }, { "epoch": 0.8859471887410297, "grad_norm": 1.1586743593215942, "learning_rate": 3.688533482327994e-07, "loss": 0.16635894775390625, "step": 6358 }, { "epoch": 0.8860865324322441, "grad_norm": 1.0814249515533447, "learning_rate": 3.6796544834827865e-07, "loss": 0.14199447631835938, "step": 6359 }, { "epoch": 0.8862258761234585, "grad_norm": 0.8597733974456787, "learning_rate": 3.670785775960839e-07, "loss": 0.10455703735351562, "step": 6360 }, { "epoch": 0.8863652198146729, "grad_norm": 0.7925397753715515, "learning_rate": 3.66192736173257e-07, "loss": 0.12814712524414062, "step": 6361 }, { "epoch": 0.8865045635058872, "grad_norm": 0.6691511273384094, "learning_rate": 3.653079242766139e-07, "loss": 0.10195541381835938, "step": 6362 }, { "epoch": 0.8866439071971016, "grad_norm": 1.2513092756271362, "learning_rate": 3.6442414210273834e-07, "loss": 0.19588470458984375, "step": 6363 }, { "epoch": 0.886783250888316, "grad_norm": 1.3068019151687622, "learning_rate": 3.6354138984798506e-07, "loss": 0.15377044677734375, "step": 6364 }, { "epoch": 0.8869225945795304, "grad_norm": 1.123582363128662, "learning_rate": 3.6265966770848314e-07, "loss": 0.1549835205078125, "step": 6365 }, { "epoch": 0.8870619382707448, "grad_norm": 1.2652925252914429, "learning_rate": 3.6177897588013154e-07, "loss": 0.14194869995117188, "step": 6366 }, { "epoch": 0.8872012819619591, "grad_norm": 1.2360938787460327, "learning_rate": 3.608993145585987e-07, "loss": 0.1512298583984375, "step": 6367 }, { "epoch": 0.8873406256531735, "grad_norm": 0.6848655343055725, "learning_rate": 3.600206839393261e-07, "loss": 0.1310749053955078, "step": 6368 }, { "epoch": 0.8874799693443879, "grad_norm": 0.865624725818634, "learning_rate": 3.591430842175242e-07, "loss": 0.1250762939453125, "step": 6369 }, { "epoch": 0.8876193130356023, "grad_norm": 0.5932074785232544, "learning_rate": 3.5826651558817703e-07, "loss": 0.11088371276855469, "step": 6370 }, { "epoch": 0.8877586567268166, "grad_norm": 0.7494011521339417, "learning_rate": 3.5739097824603665e-07, "loss": 0.13104629516601562, "step": 6371 }, { "epoch": 0.887898000418031, "grad_norm": 0.7198171019554138, "learning_rate": 3.5651647238562904e-07, "loss": 0.11009597778320312, "step": 6372 }, { "epoch": 0.8880373441092455, "grad_norm": 0.9339787364006042, "learning_rate": 3.5564299820124883e-07, "loss": 0.1312713623046875, "step": 6373 }, { "epoch": 0.8881766878004599, "grad_norm": 1.2872503995895386, "learning_rate": 3.547705558869624e-07, "loss": 0.13768386840820312, "step": 6374 }, { "epoch": 0.8883160314916743, "grad_norm": 0.7445709109306335, "learning_rate": 3.5389914563660475e-07, "loss": 0.12870025634765625, "step": 6375 }, { "epoch": 0.8884553751828886, "grad_norm": 0.7501681447029114, "learning_rate": 3.530287676437849e-07, "loss": 0.11522674560546875, "step": 6376 }, { "epoch": 0.888594718874103, "grad_norm": 0.5994613170623779, "learning_rate": 3.5215942210188204e-07, "loss": 0.101348876953125, "step": 6377 }, { "epoch": 0.8887340625653174, "grad_norm": 0.6752424836158752, "learning_rate": 3.512911092040422e-07, "loss": 0.11921501159667969, "step": 6378 }, { "epoch": 0.8888734062565318, "grad_norm": 0.8014930486679077, "learning_rate": 3.5042382914318716e-07, "loss": 0.12609100341796875, "step": 6379 }, { "epoch": 0.8890127499477462, "grad_norm": 0.8919384479522705, "learning_rate": 3.495575821120045e-07, "loss": 0.12529373168945312, "step": 6380 }, { "epoch": 0.8891520936389605, "grad_norm": 0.8666979074478149, "learning_rate": 3.4869236830295695e-07, "loss": 0.13372802734375, "step": 6381 }, { "epoch": 0.8892914373301749, "grad_norm": 0.643762469291687, "learning_rate": 3.478281879082729e-07, "loss": 0.11476516723632812, "step": 6382 }, { "epoch": 0.8894307810213893, "grad_norm": 1.1005325317382812, "learning_rate": 3.469650411199543e-07, "loss": 0.1429290771484375, "step": 6383 }, { "epoch": 0.8895701247126037, "grad_norm": 0.7387282252311707, "learning_rate": 3.4610292812977454e-07, "loss": 0.12990188598632812, "step": 6384 }, { "epoch": 0.889709468403818, "grad_norm": 0.6439253091812134, "learning_rate": 3.452418491292731e-07, "loss": 0.11086273193359375, "step": 6385 }, { "epoch": 0.8898488120950324, "grad_norm": 1.2339307069778442, "learning_rate": 3.4438180430976243e-07, "loss": 0.17316436767578125, "step": 6386 }, { "epoch": 0.8899881557862468, "grad_norm": 1.0962165594100952, "learning_rate": 3.4352279386232535e-07, "loss": 0.13324356079101562, "step": 6387 }, { "epoch": 0.8901274994774612, "grad_norm": 0.9994251132011414, "learning_rate": 3.426648179778147e-07, "loss": 0.14479446411132812, "step": 6388 }, { "epoch": 0.8902668431686755, "grad_norm": 0.973721981048584, "learning_rate": 3.4180787684685246e-07, "loss": 0.1382579803466797, "step": 6389 }, { "epoch": 0.8904061868598899, "grad_norm": 1.025280475616455, "learning_rate": 3.409519706598324e-07, "loss": 0.14416122436523438, "step": 6390 }, { "epoch": 0.8905455305511043, "grad_norm": 0.737148642539978, "learning_rate": 3.400970996069164e-07, "loss": 0.12347984313964844, "step": 6391 }, { "epoch": 0.8906848742423187, "grad_norm": 0.8352575898170471, "learning_rate": 3.392432638780363e-07, "loss": 0.11083602905273438, "step": 6392 }, { "epoch": 0.8908242179335331, "grad_norm": 0.9115039110183716, "learning_rate": 3.383904636628965e-07, "loss": 0.132843017578125, "step": 6393 }, { "epoch": 0.8909635616247474, "grad_norm": 1.1347790956497192, "learning_rate": 3.3753869915096936e-07, "loss": 0.1519756317138672, "step": 6394 }, { "epoch": 0.8911029053159618, "grad_norm": 0.9026370048522949, "learning_rate": 3.3668797053149907e-07, "loss": 0.14037132263183594, "step": 6395 }, { "epoch": 0.8912422490071762, "grad_norm": 0.9763910174369812, "learning_rate": 3.3583827799349486e-07, "loss": 0.15394020080566406, "step": 6396 }, { "epoch": 0.8913815926983906, "grad_norm": 1.476947546005249, "learning_rate": 3.3498962172574033e-07, "loss": 0.196533203125, "step": 6397 }, { "epoch": 0.891520936389605, "grad_norm": 1.0874916315078735, "learning_rate": 3.3414200191678903e-07, "loss": 0.13508224487304688, "step": 6398 }, { "epoch": 0.8916602800808193, "grad_norm": 1.4866714477539062, "learning_rate": 3.332954187549603e-07, "loss": 0.13458633422851562, "step": 6399 }, { "epoch": 0.8917996237720337, "grad_norm": 0.745399534702301, "learning_rate": 3.3244987242834816e-07, "loss": 0.12890625, "step": 6400 }, { "epoch": 0.8919389674632481, "grad_norm": 0.6949218511581421, "learning_rate": 3.3160536312481174e-07, "loss": 0.11716461181640625, "step": 6401 }, { "epoch": 0.8920783111544625, "grad_norm": 0.8834328055381775, "learning_rate": 3.3076189103198265e-07, "loss": 0.14413070678710938, "step": 6402 }, { "epoch": 0.8922176548456768, "grad_norm": 1.1921401023864746, "learning_rate": 3.299194563372604e-07, "loss": 0.14171218872070312, "step": 6403 }, { "epoch": 0.8923569985368912, "grad_norm": 1.2250804901123047, "learning_rate": 3.290780592278148e-07, "loss": 0.1441650390625, "step": 6404 }, { "epoch": 0.8924963422281056, "grad_norm": 0.846614420413971, "learning_rate": 3.2823769989058674e-07, "loss": 0.13624954223632812, "step": 6405 }, { "epoch": 0.89263568591932, "grad_norm": 0.8159672617912292, "learning_rate": 3.2739837851228306e-07, "loss": 0.12630081176757812, "step": 6406 }, { "epoch": 0.8927750296105343, "grad_norm": 0.47053736448287964, "learning_rate": 3.265600952793818e-07, "loss": 0.10780715942382812, "step": 6407 }, { "epoch": 0.8929143733017487, "grad_norm": 0.6370142698287964, "learning_rate": 3.2572285037813123e-07, "loss": 0.13044357299804688, "step": 6408 }, { "epoch": 0.8930537169929631, "grad_norm": 1.7453773021697998, "learning_rate": 3.248866439945486e-07, "loss": 0.16289138793945312, "step": 6409 }, { "epoch": 0.8931930606841775, "grad_norm": 0.7429875135421753, "learning_rate": 3.2405147631441757e-07, "loss": 0.11253738403320312, "step": 6410 }, { "epoch": 0.8933324043753919, "grad_norm": 1.0179582834243774, "learning_rate": 3.232173475232964e-07, "loss": 0.154998779296875, "step": 6411 }, { "epoch": 0.8934717480666062, "grad_norm": 0.8511015176773071, "learning_rate": 3.2238425780650617e-07, "loss": 0.1292724609375, "step": 6412 }, { "epoch": 0.8936110917578207, "grad_norm": 0.7148891687393188, "learning_rate": 3.215522073491434e-07, "loss": 0.1292572021484375, "step": 6413 }, { "epoch": 0.8937504354490351, "grad_norm": 0.8825834393501282, "learning_rate": 3.2072119633606845e-07, "loss": 0.12906646728515625, "step": 6414 }, { "epoch": 0.8938897791402495, "grad_norm": 0.9935001134872437, "learning_rate": 3.198912249519143e-07, "loss": 0.132415771484375, "step": 6415 }, { "epoch": 0.8940291228314639, "grad_norm": 0.692466676235199, "learning_rate": 3.190622933810816e-07, "loss": 0.10605239868164062, "step": 6416 }, { "epoch": 0.8941684665226782, "grad_norm": 0.8115369081497192, "learning_rate": 3.182344018077399e-07, "loss": 0.12616348266601562, "step": 6417 }, { "epoch": 0.8943078102138926, "grad_norm": 0.736531674861908, "learning_rate": 3.1740755041582694e-07, "loss": 0.11959457397460938, "step": 6418 }, { "epoch": 0.894447153905107, "grad_norm": 1.1396381855010986, "learning_rate": 3.1658173938905023e-07, "loss": 0.14722824096679688, "step": 6419 }, { "epoch": 0.8945864975963214, "grad_norm": 1.3128210306167603, "learning_rate": 3.1575696891088804e-07, "loss": 0.1520538330078125, "step": 6420 }, { "epoch": 0.8947258412875357, "grad_norm": 0.943548321723938, "learning_rate": 3.149332391645843e-07, "loss": 0.13763046264648438, "step": 6421 }, { "epoch": 0.8948651849787501, "grad_norm": 1.5353400707244873, "learning_rate": 3.1411055033315207e-07, "loss": 0.18742752075195312, "step": 6422 }, { "epoch": 0.8950045286699645, "grad_norm": 1.6658142805099487, "learning_rate": 3.132889025993746e-07, "loss": 0.13954925537109375, "step": 6423 }, { "epoch": 0.8951438723611789, "grad_norm": 1.5425978899002075, "learning_rate": 3.1246829614580476e-07, "loss": 0.15648651123046875, "step": 6424 }, { "epoch": 0.8952832160523932, "grad_norm": 0.6337814927101135, "learning_rate": 3.1164873115476056e-07, "loss": 0.12264633178710938, "step": 6425 }, { "epoch": 0.8954225597436076, "grad_norm": 1.0574524402618408, "learning_rate": 3.1083020780833137e-07, "loss": 0.11956405639648438, "step": 6426 }, { "epoch": 0.895561903434822, "grad_norm": 1.1116857528686523, "learning_rate": 3.1001272628837565e-07, "loss": 0.15694427490234375, "step": 6427 }, { "epoch": 0.8957012471260364, "grad_norm": 1.6477526426315308, "learning_rate": 3.0919628677651636e-07, "loss": 0.14375686645507812, "step": 6428 }, { "epoch": 0.8958405908172508, "grad_norm": 0.7328246235847473, "learning_rate": 3.083808894541496e-07, "loss": 0.12508392333984375, "step": 6429 }, { "epoch": 0.8959799345084651, "grad_norm": 1.0321353673934937, "learning_rate": 3.075665345024387e-07, "loss": 0.14738082885742188, "step": 6430 }, { "epoch": 0.8961192781996795, "grad_norm": 0.8297752737998962, "learning_rate": 3.0675322210231227e-07, "loss": 0.13199520111083984, "step": 6431 }, { "epoch": 0.8962586218908939, "grad_norm": 1.2677170038223267, "learning_rate": 3.0594095243447254e-07, "loss": 0.16547393798828125, "step": 6432 }, { "epoch": 0.8963979655821083, "grad_norm": 0.8016654849052429, "learning_rate": 3.0512972567938505e-07, "loss": 0.12602996826171875, "step": 6433 }, { "epoch": 0.8965373092733226, "grad_norm": 1.3982535600662231, "learning_rate": 3.043195420172879e-07, "loss": 0.1571197509765625, "step": 6434 }, { "epoch": 0.896676652964537, "grad_norm": 0.9065146446228027, "learning_rate": 3.035104016281831e-07, "loss": 0.14135169982910156, "step": 6435 }, { "epoch": 0.8968159966557514, "grad_norm": 1.2410582304000854, "learning_rate": 3.027023046918448e-07, "loss": 0.15567398071289062, "step": 6436 }, { "epoch": 0.8969553403469658, "grad_norm": 1.7445734739303589, "learning_rate": 3.018952513878137e-07, "loss": 0.16815567016601562, "step": 6437 }, { "epoch": 0.8970946840381802, "grad_norm": 0.8180146217346191, "learning_rate": 3.010892418953981e-07, "loss": 0.12720108032226562, "step": 6438 }, { "epoch": 0.8972340277293945, "grad_norm": 0.9033174514770508, "learning_rate": 3.0028427639367475e-07, "loss": 0.1361083984375, "step": 6439 }, { "epoch": 0.8973733714206089, "grad_norm": 0.9375796318054199, "learning_rate": 2.994803550614883e-07, "loss": 0.1500701904296875, "step": 6440 }, { "epoch": 0.8975127151118233, "grad_norm": 1.146252989768982, "learning_rate": 2.9867747807745315e-07, "loss": 0.1641387939453125, "step": 6441 }, { "epoch": 0.8976520588030377, "grad_norm": 1.1394538879394531, "learning_rate": 2.978756456199494e-07, "loss": 0.16331863403320312, "step": 6442 }, { "epoch": 0.897791402494252, "grad_norm": 0.9634565114974976, "learning_rate": 2.970748578671251e-07, "loss": 0.12532615661621094, "step": 6443 }, { "epoch": 0.8979307461854664, "grad_norm": 0.5328091979026794, "learning_rate": 2.9627511499689787e-07, "loss": 0.11126518249511719, "step": 6444 }, { "epoch": 0.8980700898766808, "grad_norm": 0.9279407858848572, "learning_rate": 2.9547641718695285e-07, "loss": 0.14014434814453125, "step": 6445 }, { "epoch": 0.8982094335678952, "grad_norm": 1.1214797496795654, "learning_rate": 2.946787646147414e-07, "loss": 0.13944435119628906, "step": 6446 }, { "epoch": 0.8983487772591096, "grad_norm": 0.9080222249031067, "learning_rate": 2.9388215745748347e-07, "loss": 0.12671279907226562, "step": 6447 }, { "epoch": 0.8984881209503239, "grad_norm": 0.7620078921318054, "learning_rate": 2.9308659589216913e-07, "loss": 0.13784027099609375, "step": 6448 }, { "epoch": 0.8986274646415383, "grad_norm": 0.9515164494514465, "learning_rate": 2.92292080095552e-07, "loss": 0.145477294921875, "step": 6449 }, { "epoch": 0.8987668083327527, "grad_norm": 0.8509071469306946, "learning_rate": 2.9149861024415526e-07, "loss": 0.13811492919921875, "step": 6450 }, { "epoch": 0.8989061520239671, "grad_norm": 1.1317092180252075, "learning_rate": 2.9070618651427073e-07, "loss": 0.11236000061035156, "step": 6451 }, { "epoch": 0.8990454957151814, "grad_norm": 0.8889166116714478, "learning_rate": 2.89914809081957e-07, "loss": 0.1250438690185547, "step": 6452 }, { "epoch": 0.8991848394063959, "grad_norm": 0.6375834345817566, "learning_rate": 2.8912447812303956e-07, "loss": 0.12063217163085938, "step": 6453 }, { "epoch": 0.8993241830976103, "grad_norm": 0.9940078854560852, "learning_rate": 2.8833519381311127e-07, "loss": 0.16199493408203125, "step": 6454 }, { "epoch": 0.8994635267888247, "grad_norm": 1.4704053401947021, "learning_rate": 2.8754695632753406e-07, "loss": 0.16537094116210938, "step": 6455 }, { "epoch": 0.8996028704800391, "grad_norm": 1.1214027404785156, "learning_rate": 2.867597658414367e-07, "loss": 0.134613037109375, "step": 6456 }, { "epoch": 0.8997422141712534, "grad_norm": 0.9157158732414246, "learning_rate": 2.859736225297133e-07, "loss": 0.13266563415527344, "step": 6457 }, { "epoch": 0.8998815578624678, "grad_norm": 0.8918075561523438, "learning_rate": 2.8518852656702845e-07, "loss": 0.13083648681640625, "step": 6458 }, { "epoch": 0.9000209015536822, "grad_norm": 1.2383358478546143, "learning_rate": 2.844044781278127e-07, "loss": 0.20648956298828125, "step": 6459 }, { "epoch": 0.9001602452448966, "grad_norm": 1.3310279846191406, "learning_rate": 2.836214773862617e-07, "loss": 0.15198516845703125, "step": 6460 }, { "epoch": 0.900299588936111, "grad_norm": 1.0515862703323364, "learning_rate": 2.828395245163418e-07, "loss": 0.15430831909179688, "step": 6461 }, { "epoch": 0.9004389326273253, "grad_norm": 1.1883660554885864, "learning_rate": 2.820586196917857e-07, "loss": 0.16840362548828125, "step": 6462 }, { "epoch": 0.9005782763185397, "grad_norm": 0.7964641451835632, "learning_rate": 2.812787630860919e-07, "loss": 0.12597274780273438, "step": 6463 }, { "epoch": 0.9007176200097541, "grad_norm": 0.8612417578697205, "learning_rate": 2.8049995487252625e-07, "loss": 0.13336563110351562, "step": 6464 }, { "epoch": 0.9008569637009685, "grad_norm": 0.9896900057792664, "learning_rate": 2.7972219522412194e-07, "loss": 0.115509033203125, "step": 6465 }, { "epoch": 0.9009963073921828, "grad_norm": 1.0306941270828247, "learning_rate": 2.789454843136813e-07, "loss": 0.13808059692382812, "step": 6466 }, { "epoch": 0.9011356510833972, "grad_norm": 0.8156508207321167, "learning_rate": 2.7816982231376964e-07, "loss": 0.10543632507324219, "step": 6467 }, { "epoch": 0.9012749947746116, "grad_norm": 1.4637269973754883, "learning_rate": 2.773952093967225e-07, "loss": 0.1760845184326172, "step": 6468 }, { "epoch": 0.901414338465826, "grad_norm": 0.7849430441856384, "learning_rate": 2.7662164573464156e-07, "loss": 0.12931442260742188, "step": 6469 }, { "epoch": 0.9015536821570403, "grad_norm": 0.7155048847198486, "learning_rate": 2.758491314993944e-07, "loss": 0.129364013671875, "step": 6470 }, { "epoch": 0.9016930258482547, "grad_norm": 1.0205307006835938, "learning_rate": 2.750776668626148e-07, "loss": 0.14157867431640625, "step": 6471 }, { "epoch": 0.9018323695394691, "grad_norm": 0.7781556248664856, "learning_rate": 2.743072519957063e-07, "loss": 0.13262367248535156, "step": 6472 }, { "epoch": 0.9019717132306835, "grad_norm": 0.4769732654094696, "learning_rate": 2.73537887069838e-07, "loss": 0.12325668334960938, "step": 6473 }, { "epoch": 0.9021110569218979, "grad_norm": 0.7502051591873169, "learning_rate": 2.7276957225594367e-07, "loss": 0.097991943359375, "step": 6474 }, { "epoch": 0.9022504006131122, "grad_norm": 1.2295197248458862, "learning_rate": 2.7200230772472526e-07, "loss": 0.15588760375976562, "step": 6475 }, { "epoch": 0.9023897443043266, "grad_norm": 0.8284196257591248, "learning_rate": 2.712360936466524e-07, "loss": 0.14557838439941406, "step": 6476 }, { "epoch": 0.902529087995541, "grad_norm": 1.0531214475631714, "learning_rate": 2.704709301919606e-07, "loss": 0.14719772338867188, "step": 6477 }, { "epoch": 0.9026684316867554, "grad_norm": 1.2318766117095947, "learning_rate": 2.6970681753065e-07, "loss": 0.15440750122070312, "step": 6478 }, { "epoch": 0.9028077753779697, "grad_norm": 0.6849604249000549, "learning_rate": 2.6894375583249144e-07, "loss": 0.13084793090820312, "step": 6479 }, { "epoch": 0.9029471190691841, "grad_norm": 0.8559218049049377, "learning_rate": 2.681817452670171e-07, "loss": 0.12096405029296875, "step": 6480 }, { "epoch": 0.9030864627603985, "grad_norm": 0.6090338230133057, "learning_rate": 2.6742078600353106e-07, "loss": 0.11524581909179688, "step": 6481 }, { "epoch": 0.9032258064516129, "grad_norm": 1.2901660203933716, "learning_rate": 2.6666087821109855e-07, "loss": 0.17365264892578125, "step": 6482 }, { "epoch": 0.9033651501428273, "grad_norm": 1.0889958143234253, "learning_rate": 2.6590202205855506e-07, "loss": 0.14528274536132812, "step": 6483 }, { "epoch": 0.9035044938340416, "grad_norm": 1.20474112033844, "learning_rate": 2.6514421771450194e-07, "loss": 0.15421104431152344, "step": 6484 }, { "epoch": 0.903643837525256, "grad_norm": 0.8114590644836426, "learning_rate": 2.6438746534730497e-07, "loss": 0.13245010375976562, "step": 6485 }, { "epoch": 0.9037831812164704, "grad_norm": 2.0300116539001465, "learning_rate": 2.6363176512509637e-07, "loss": 0.17392349243164062, "step": 6486 }, { "epoch": 0.9039225249076848, "grad_norm": 0.9153027534484863, "learning_rate": 2.628771172157768e-07, "loss": 0.16033172607421875, "step": 6487 }, { "epoch": 0.9040618685988991, "grad_norm": 0.6903954744338989, "learning_rate": 2.621235217870116e-07, "loss": 0.13496780395507812, "step": 6488 }, { "epoch": 0.9042012122901135, "grad_norm": 1.0186679363250732, "learning_rate": 2.6137097900623185e-07, "loss": 0.142364501953125, "step": 6489 }, { "epoch": 0.9043405559813279, "grad_norm": 1.4266563653945923, "learning_rate": 2.6061948904063663e-07, "loss": 0.1578216552734375, "step": 6490 }, { "epoch": 0.9044798996725423, "grad_norm": 0.8396922945976257, "learning_rate": 2.598690520571889e-07, "loss": 0.15192794799804688, "step": 6491 }, { "epoch": 0.9046192433637567, "grad_norm": 0.6939305663108826, "learning_rate": 2.591196682226182e-07, "loss": 0.11944580078125, "step": 6492 }, { "epoch": 0.904758587054971, "grad_norm": 0.858189046382904, "learning_rate": 2.5837133770342135e-07, "loss": 0.1235198974609375, "step": 6493 }, { "epoch": 0.9048979307461855, "grad_norm": 1.0128364562988281, "learning_rate": 2.5762406066585976e-07, "loss": 0.1289215087890625, "step": 6494 }, { "epoch": 0.9050372744373999, "grad_norm": 0.5707311034202576, "learning_rate": 2.568778372759628e-07, "loss": 0.1091766357421875, "step": 6495 }, { "epoch": 0.9051766181286143, "grad_norm": 1.3536884784698486, "learning_rate": 2.5613266769952183e-07, "loss": 0.13478469848632812, "step": 6496 }, { "epoch": 0.9053159618198287, "grad_norm": 0.7302553057670593, "learning_rate": 2.5538855210209823e-07, "loss": 0.11911773681640625, "step": 6497 }, { "epoch": 0.905455305511043, "grad_norm": 1.629258394241333, "learning_rate": 2.54645490649017e-07, "loss": 0.17270660400390625, "step": 6498 }, { "epoch": 0.9055946492022574, "grad_norm": 0.9859768152236938, "learning_rate": 2.5390348350536887e-07, "loss": 0.1419219970703125, "step": 6499 }, { "epoch": 0.9057339928934718, "grad_norm": 0.8535857796669006, "learning_rate": 2.531625308360125e-07, "loss": 0.14417648315429688, "step": 6500 }, { "epoch": 0.9058733365846862, "grad_norm": 1.4475769996643066, "learning_rate": 2.52422632805569e-07, "loss": 0.20040512084960938, "step": 6501 }, { "epoch": 0.9060126802759005, "grad_norm": 0.5824934840202332, "learning_rate": 2.5168378957842797e-07, "loss": 0.11568832397460938, "step": 6502 }, { "epoch": 0.9061520239671149, "grad_norm": 0.949874222278595, "learning_rate": 2.5094600131874205e-07, "loss": 0.12674713134765625, "step": 6503 }, { "epoch": 0.9062913676583293, "grad_norm": 0.7369186282157898, "learning_rate": 2.5020926819043223e-07, "loss": 0.12495994567871094, "step": 6504 }, { "epoch": 0.9064307113495437, "grad_norm": 0.7793540954589844, "learning_rate": 2.4947359035718434e-07, "loss": 0.12617111206054688, "step": 6505 }, { "epoch": 0.906570055040758, "grad_norm": 0.9068283438682556, "learning_rate": 2.487389679824481e-07, "loss": 0.14021682739257812, "step": 6506 }, { "epoch": 0.9067093987319724, "grad_norm": 0.9147942662239075, "learning_rate": 2.4800540122943915e-07, "loss": 0.13529205322265625, "step": 6507 }, { "epoch": 0.9068487424231868, "grad_norm": 0.8309602737426758, "learning_rate": 2.4727289026114043e-07, "loss": 0.12817764282226562, "step": 6508 }, { "epoch": 0.9069880861144012, "grad_norm": 1.2347549200057983, "learning_rate": 2.4654143524029896e-07, "loss": 0.13679122924804688, "step": 6509 }, { "epoch": 0.9071274298056156, "grad_norm": 1.1966886520385742, "learning_rate": 2.4581103632942747e-07, "loss": 0.14189910888671875, "step": 6510 }, { "epoch": 0.9072667734968299, "grad_norm": 0.5734440684318542, "learning_rate": 2.4508169369080404e-07, "loss": 0.11553001403808594, "step": 6511 }, { "epoch": 0.9074061171880443, "grad_norm": 0.6068629026412964, "learning_rate": 2.443534074864706e-07, "loss": 0.11307907104492188, "step": 6512 }, { "epoch": 0.9075454608792587, "grad_norm": 0.6948513388633728, "learning_rate": 2.436261778782378e-07, "loss": 0.1207733154296875, "step": 6513 }, { "epoch": 0.9076848045704731, "grad_norm": 0.9507372975349426, "learning_rate": 2.4290000502767755e-07, "loss": 0.12937545776367188, "step": 6514 }, { "epoch": 0.9078241482616874, "grad_norm": 0.9373472332954407, "learning_rate": 2.421748890961301e-07, "loss": 0.161376953125, "step": 6515 }, { "epoch": 0.9079634919529018, "grad_norm": 1.7132728099822998, "learning_rate": 2.4145083024469996e-07, "loss": 0.16016006469726562, "step": 6516 }, { "epoch": 0.9081028356441162, "grad_norm": 0.5986470580101013, "learning_rate": 2.407278286342557e-07, "loss": 0.1127166748046875, "step": 6517 }, { "epoch": 0.9082421793353306, "grad_norm": 1.449976921081543, "learning_rate": 2.40005884425431e-07, "loss": 0.1849365234375, "step": 6518 }, { "epoch": 0.908381523026545, "grad_norm": 0.8203670382499695, "learning_rate": 2.39284997778626e-07, "loss": 0.11901473999023438, "step": 6519 }, { "epoch": 0.9085208667177593, "grad_norm": 0.6158709526062012, "learning_rate": 2.3856516885400693e-07, "loss": 0.11922836303710938, "step": 6520 }, { "epoch": 0.9086602104089737, "grad_norm": 0.8558225035667419, "learning_rate": 2.3784639781150143e-07, "loss": 0.13739013671875, "step": 6521 }, { "epoch": 0.9087995541001881, "grad_norm": 1.1901113986968994, "learning_rate": 2.3712868481080397e-07, "loss": 0.14720916748046875, "step": 6522 }, { "epoch": 0.9089388977914025, "grad_norm": 1.0597769021987915, "learning_rate": 2.364120300113748e-07, "loss": 0.15577125549316406, "step": 6523 }, { "epoch": 0.9090782414826168, "grad_norm": 1.0227329730987549, "learning_rate": 2.356964335724382e-07, "loss": 0.16560745239257812, "step": 6524 }, { "epoch": 0.9092175851738312, "grad_norm": 0.9838873147964478, "learning_rate": 2.3498189565298312e-07, "loss": 0.12933731079101562, "step": 6525 }, { "epoch": 0.9093569288650456, "grad_norm": 0.9794092178344727, "learning_rate": 2.3426841641176311e-07, "loss": 0.13602066040039062, "step": 6526 }, { "epoch": 0.90949627255626, "grad_norm": 0.5023376941680908, "learning_rate": 2.3355599600729916e-07, "loss": 0.10274314880371094, "step": 6527 }, { "epoch": 0.9096356162474744, "grad_norm": 0.8121068477630615, "learning_rate": 2.328446345978713e-07, "loss": 0.13253402709960938, "step": 6528 }, { "epoch": 0.9097749599386887, "grad_norm": 0.7448227405548096, "learning_rate": 2.3213433234152982e-07, "loss": 0.1290435791015625, "step": 6529 }, { "epoch": 0.9099143036299031, "grad_norm": 0.8222090005874634, "learning_rate": 2.3142508939608844e-07, "loss": 0.13869476318359375, "step": 6530 }, { "epoch": 0.9100536473211175, "grad_norm": 1.052786111831665, "learning_rate": 2.3071690591912277e-07, "loss": 0.16409683227539062, "step": 6531 }, { "epoch": 0.9101929910123319, "grad_norm": 0.8803541660308838, "learning_rate": 2.3000978206797697e-07, "loss": 0.13410568237304688, "step": 6532 }, { "epoch": 0.9103323347035462, "grad_norm": 1.2193964719772339, "learning_rate": 2.2930371799975593e-07, "loss": 0.13359451293945312, "step": 6533 }, { "epoch": 0.9104716783947607, "grad_norm": 0.9407850503921509, "learning_rate": 2.2859871387133248e-07, "loss": 0.14714622497558594, "step": 6534 }, { "epoch": 0.9106110220859751, "grad_norm": 0.9849115610122681, "learning_rate": 2.2789476983934133e-07, "loss": 0.15844345092773438, "step": 6535 }, { "epoch": 0.9107503657771895, "grad_norm": 0.8952014446258545, "learning_rate": 2.271918860601835e-07, "loss": 0.13954925537109375, "step": 6536 }, { "epoch": 0.9108897094684039, "grad_norm": 0.9240652322769165, "learning_rate": 2.2649006269002406e-07, "loss": 0.14794921875, "step": 6537 }, { "epoch": 0.9110290531596182, "grad_norm": 0.9283528327941895, "learning_rate": 2.257892998847916e-07, "loss": 0.13601303100585938, "step": 6538 }, { "epoch": 0.9111683968508326, "grad_norm": 1.055322527885437, "learning_rate": 2.250895978001788e-07, "loss": 0.15401458740234375, "step": 6539 }, { "epoch": 0.911307740542047, "grad_norm": 1.2040328979492188, "learning_rate": 2.2439095659164467e-07, "loss": 0.13701629638671875, "step": 6540 }, { "epoch": 0.9114470842332614, "grad_norm": 0.8750362992286682, "learning_rate": 2.236933764144117e-07, "loss": 0.1348114013671875, "step": 6541 }, { "epoch": 0.9115864279244758, "grad_norm": 1.2470664978027344, "learning_rate": 2.2299685742346423e-07, "loss": 0.15156936645507812, "step": 6542 }, { "epoch": 0.9117257716156901, "grad_norm": 0.7357209920883179, "learning_rate": 2.223013997735557e-07, "loss": 0.13317298889160156, "step": 6543 }, { "epoch": 0.9118651153069045, "grad_norm": 0.89300936460495, "learning_rate": 2.2160700361919807e-07, "loss": 0.1182708740234375, "step": 6544 }, { "epoch": 0.9120044589981189, "grad_norm": 0.8757017850875854, "learning_rate": 2.2091366911467238e-07, "loss": 0.12916946411132812, "step": 6545 }, { "epoch": 0.9121438026893333, "grad_norm": 0.8523747324943542, "learning_rate": 2.2022139641402095e-07, "loss": 0.15895462036132812, "step": 6546 }, { "epoch": 0.9122831463805476, "grad_norm": 0.8192906379699707, "learning_rate": 2.1953018567105078e-07, "loss": 0.11799049377441406, "step": 6547 }, { "epoch": 0.912422490071762, "grad_norm": 0.8925725817680359, "learning_rate": 2.1884003703933343e-07, "loss": 0.1417865753173828, "step": 6548 }, { "epoch": 0.9125618337629764, "grad_norm": 1.0717549324035645, "learning_rate": 2.181509506722046e-07, "loss": 0.14600372314453125, "step": 6549 }, { "epoch": 0.9127011774541908, "grad_norm": 0.6577878594398499, "learning_rate": 2.1746292672276238e-07, "loss": 0.1219329833984375, "step": 6550 }, { "epoch": 0.9128405211454051, "grad_norm": 1.1728417873382568, "learning_rate": 2.1677596534387114e-07, "loss": 0.15746688842773438, "step": 6551 }, { "epoch": 0.9129798648366195, "grad_norm": 0.8301608562469482, "learning_rate": 2.1609006668815768e-07, "loss": 0.1323699951171875, "step": 6552 }, { "epoch": 0.9131192085278339, "grad_norm": 1.039873480796814, "learning_rate": 2.1540523090801292e-07, "loss": 0.13652801513671875, "step": 6553 }, { "epoch": 0.9132585522190483, "grad_norm": 0.9083632230758667, "learning_rate": 2.1472145815559064e-07, "loss": 0.11076736450195312, "step": 6554 }, { "epoch": 0.9133978959102627, "grad_norm": 1.0988553762435913, "learning_rate": 2.1403874858281104e-07, "loss": 0.16121864318847656, "step": 6555 }, { "epoch": 0.913537239601477, "grad_norm": 0.7739725112915039, "learning_rate": 2.133571023413572e-07, "loss": 0.12006759643554688, "step": 6556 }, { "epoch": 0.9136765832926914, "grad_norm": 1.2324002981185913, "learning_rate": 2.1267651958267298e-07, "loss": 0.12945938110351562, "step": 6557 }, { "epoch": 0.9138159269839058, "grad_norm": 0.6411879062652588, "learning_rate": 2.1199700045797077e-07, "loss": 0.10730361938476562, "step": 6558 }, { "epoch": 0.9139552706751202, "grad_norm": 0.8052206635475159, "learning_rate": 2.113185451182226e-07, "loss": 0.13474655151367188, "step": 6559 }, { "epoch": 0.9140946143663345, "grad_norm": 1.004707932472229, "learning_rate": 2.106411537141656e-07, "loss": 0.156768798828125, "step": 6560 }, { "epoch": 0.9142339580575489, "grad_norm": 1.2002893686294556, "learning_rate": 2.0996482639630167e-07, "loss": 0.1575775146484375, "step": 6561 }, { "epoch": 0.9143733017487633, "grad_norm": 0.5361700057983398, "learning_rate": 2.0928956331489558e-07, "loss": 0.11155319213867188, "step": 6562 }, { "epoch": 0.9145126454399777, "grad_norm": 1.4726182222366333, "learning_rate": 2.08615364619974e-07, "loss": 0.15096664428710938, "step": 6563 }, { "epoch": 0.9146519891311921, "grad_norm": 1.046618103981018, "learning_rate": 2.079422304613299e-07, "loss": 0.14525604248046875, "step": 6564 }, { "epoch": 0.9147913328224064, "grad_norm": 1.1467939615249634, "learning_rate": 2.0727016098851694e-07, "loss": 0.12485504150390625, "step": 6565 }, { "epoch": 0.9149306765136208, "grad_norm": 0.7532833814620972, "learning_rate": 2.0659915635085515e-07, "loss": 0.10608291625976562, "step": 6566 }, { "epoch": 0.9150700202048352, "grad_norm": 1.1376514434814453, "learning_rate": 2.0592921669742528e-07, "loss": 0.14010238647460938, "step": 6567 }, { "epoch": 0.9152093638960496, "grad_norm": 0.700549304485321, "learning_rate": 2.0526034217707213e-07, "loss": 0.11589622497558594, "step": 6568 }, { "epoch": 0.9153487075872639, "grad_norm": 0.5657132267951965, "learning_rate": 2.0459253293840632e-07, "loss": 0.11557388305664062, "step": 6569 }, { "epoch": 0.9154880512784783, "grad_norm": 0.6872866749763489, "learning_rate": 2.0392578912979853e-07, "loss": 0.1311492919921875, "step": 6570 }, { "epoch": 0.9156273949696927, "grad_norm": 1.4674155712127686, "learning_rate": 2.032601108993837e-07, "loss": 0.14430999755859375, "step": 6571 }, { "epoch": 0.9157667386609071, "grad_norm": 0.8755407929420471, "learning_rate": 2.0259549839506064e-07, "loss": 0.1450042724609375, "step": 6572 }, { "epoch": 0.9159060823521215, "grad_norm": 1.860660433769226, "learning_rate": 2.0193195176449188e-07, "loss": 0.18417739868164062, "step": 6573 }, { "epoch": 0.9160454260433359, "grad_norm": 0.796375572681427, "learning_rate": 2.0126947115510165e-07, "loss": 0.1288738250732422, "step": 6574 }, { "epoch": 0.9161847697345503, "grad_norm": 1.3854252099990845, "learning_rate": 2.006080567140778e-07, "loss": 0.21694183349609375, "step": 6575 }, { "epoch": 0.9163241134257647, "grad_norm": 1.6768882274627686, "learning_rate": 1.999477085883711e-07, "loss": 0.15932464599609375, "step": 6576 }, { "epoch": 0.9164634571169791, "grad_norm": 1.2536559104919434, "learning_rate": 1.9928842692469752e-07, "loss": 0.18143463134765625, "step": 6577 }, { "epoch": 0.9166028008081935, "grad_norm": 0.6814470291137695, "learning_rate": 1.9863021186953268e-07, "loss": 0.1209564208984375, "step": 6578 }, { "epoch": 0.9167421444994078, "grad_norm": 0.9155371189117432, "learning_rate": 1.9797306356911793e-07, "loss": 0.13880538940429688, "step": 6579 }, { "epoch": 0.9168814881906222, "grad_norm": 0.8827211856842041, "learning_rate": 1.973169821694565e-07, "loss": 0.13458251953125, "step": 6580 }, { "epoch": 0.9170208318818366, "grad_norm": 1.0903407335281372, "learning_rate": 1.9666196781631453e-07, "loss": 0.171173095703125, "step": 6581 }, { "epoch": 0.917160175573051, "grad_norm": 1.4843746423721313, "learning_rate": 1.9600802065522063e-07, "loss": 0.1702423095703125, "step": 6582 }, { "epoch": 0.9172995192642653, "grad_norm": 0.6331637501716614, "learning_rate": 1.95355140831468e-07, "loss": 0.11252784729003906, "step": 6583 }, { "epoch": 0.9174388629554797, "grad_norm": 1.1632741689682007, "learning_rate": 1.947033284901112e-07, "loss": 0.14984130859375, "step": 6584 }, { "epoch": 0.9175782066466941, "grad_norm": 0.5238611102104187, "learning_rate": 1.9405258377596825e-07, "loss": 0.11571502685546875, "step": 6585 }, { "epoch": 0.9177175503379085, "grad_norm": 0.6265460252761841, "learning_rate": 1.9340290683361907e-07, "loss": 0.11886978149414062, "step": 6586 }, { "epoch": 0.9178568940291228, "grad_norm": 0.97805255651474, "learning_rate": 1.9275429780740763e-07, "loss": 0.13995742797851562, "step": 6587 }, { "epoch": 0.9179962377203372, "grad_norm": 1.3353923559188843, "learning_rate": 1.921067568414403e-07, "loss": 0.14563369750976562, "step": 6588 }, { "epoch": 0.9181355814115516, "grad_norm": 1.3129991292953491, "learning_rate": 1.9146028407958483e-07, "loss": 0.18279647827148438, "step": 6589 }, { "epoch": 0.918274925102766, "grad_norm": 1.1112173795700073, "learning_rate": 1.9081487966547407e-07, "loss": 0.15575790405273438, "step": 6590 }, { "epoch": 0.9184142687939804, "grad_norm": 0.8470931053161621, "learning_rate": 1.9017054374250111e-07, "loss": 0.13832473754882812, "step": 6591 }, { "epoch": 0.9185536124851947, "grad_norm": 0.8620619177818298, "learning_rate": 1.8952727645382307e-07, "loss": 0.10909271240234375, "step": 6592 }, { "epoch": 0.9186929561764091, "grad_norm": 0.890386164188385, "learning_rate": 1.88885077942359e-07, "loss": 0.140838623046875, "step": 6593 }, { "epoch": 0.9188322998676235, "grad_norm": 0.8443624377250671, "learning_rate": 1.8824394835079086e-07, "loss": 0.13710403442382812, "step": 6594 }, { "epoch": 0.9189716435588379, "grad_norm": 1.5423671007156372, "learning_rate": 1.8760388782156468e-07, "loss": 0.17493629455566406, "step": 6595 }, { "epoch": 0.9191109872500522, "grad_norm": 0.8740038871765137, "learning_rate": 1.8696489649688454e-07, "loss": 0.13518524169921875, "step": 6596 }, { "epoch": 0.9192503309412666, "grad_norm": 0.8164645433425903, "learning_rate": 1.8632697451872074e-07, "loss": 0.1501312255859375, "step": 6597 }, { "epoch": 0.919389674632481, "grad_norm": 0.791538417339325, "learning_rate": 1.8569012202880599e-07, "loss": 0.13176727294921875, "step": 6598 }, { "epoch": 0.9195290183236954, "grad_norm": 1.1684798002243042, "learning_rate": 1.850543391686327e-07, "loss": 0.15604209899902344, "step": 6599 }, { "epoch": 0.9196683620149098, "grad_norm": 0.6561930179595947, "learning_rate": 1.8441962607945786e-07, "loss": 0.10524749755859375, "step": 6600 }, { "epoch": 0.9198077057061241, "grad_norm": 0.7641294002532959, "learning_rate": 1.83785982902302e-07, "loss": 0.13195037841796875, "step": 6601 }, { "epoch": 0.9199470493973385, "grad_norm": 0.7385129332542419, "learning_rate": 1.8315340977794415e-07, "loss": 0.11893463134765625, "step": 6602 }, { "epoch": 0.9200863930885529, "grad_norm": 0.7470661401748657, "learning_rate": 1.825219068469275e-07, "loss": 0.1348114013671875, "step": 6603 }, { "epoch": 0.9202257367797673, "grad_norm": 0.5599554777145386, "learning_rate": 1.818914742495581e-07, "loss": 0.11389541625976562, "step": 6604 }, { "epoch": 0.9203650804709816, "grad_norm": 1.176348328590393, "learning_rate": 1.8126211212590505e-07, "loss": 0.13121795654296875, "step": 6605 }, { "epoch": 0.920504424162196, "grad_norm": 0.5225864052772522, "learning_rate": 1.8063382061579648e-07, "loss": 0.10326766967773438, "step": 6606 }, { "epoch": 0.9206437678534104, "grad_norm": 0.9896993041038513, "learning_rate": 1.8000659985882463e-07, "loss": 0.15136337280273438, "step": 6607 }, { "epoch": 0.9207831115446248, "grad_norm": 0.8691785931587219, "learning_rate": 1.7938044999434412e-07, "loss": 0.12397384643554688, "step": 6608 }, { "epoch": 0.9209224552358392, "grad_norm": 1.346169114112854, "learning_rate": 1.7875537116147146e-07, "loss": 0.14393997192382812, "step": 6609 }, { "epoch": 0.9210617989270535, "grad_norm": 1.1147592067718506, "learning_rate": 1.781313634990839e-07, "loss": 0.15940093994140625, "step": 6610 }, { "epoch": 0.9212011426182679, "grad_norm": 0.5544715523719788, "learning_rate": 1.7750842714582272e-07, "loss": 0.110595703125, "step": 6611 }, { "epoch": 0.9213404863094823, "grad_norm": 1.5536348819732666, "learning_rate": 1.7688656224008893e-07, "loss": 0.16046142578125, "step": 6612 }, { "epoch": 0.9214798300006967, "grad_norm": 0.8492612242698669, "learning_rate": 1.762657689200481e-07, "loss": 0.14246559143066406, "step": 6613 }, { "epoch": 0.9216191736919112, "grad_norm": 1.079262375831604, "learning_rate": 1.7564604732362545e-07, "loss": 0.12126541137695312, "step": 6614 }, { "epoch": 0.9217585173831255, "grad_norm": 0.6260824203491211, "learning_rate": 1.7502739758850863e-07, "loss": 0.09997940063476562, "step": 6615 }, { "epoch": 0.9218978610743399, "grad_norm": 1.0568567514419556, "learning_rate": 1.7440981985214933e-07, "loss": 0.1546001434326172, "step": 6616 }, { "epoch": 0.9220372047655543, "grad_norm": 0.6607904434204102, "learning_rate": 1.7379331425175728e-07, "loss": 0.13984298706054688, "step": 6617 }, { "epoch": 0.9221765484567687, "grad_norm": 1.3679184913635254, "learning_rate": 1.7317788092430676e-07, "loss": 0.1608600616455078, "step": 6618 }, { "epoch": 0.922315892147983, "grad_norm": 1.1356852054595947, "learning_rate": 1.725635200065323e-07, "loss": 0.168792724609375, "step": 6619 }, { "epoch": 0.9224552358391974, "grad_norm": 0.6436255574226379, "learning_rate": 1.7195023163493253e-07, "loss": 0.12021636962890625, "step": 6620 }, { "epoch": 0.9225945795304118, "grad_norm": 1.470846176147461, "learning_rate": 1.7133801594576393e-07, "loss": 0.17948150634765625, "step": 6621 }, { "epoch": 0.9227339232216262, "grad_norm": 1.1193335056304932, "learning_rate": 1.7072687307504887e-07, "loss": 0.16895675659179688, "step": 6622 }, { "epoch": 0.9228732669128406, "grad_norm": 0.879754900932312, "learning_rate": 1.701168031585676e-07, "loss": 0.15439224243164062, "step": 6623 }, { "epoch": 0.9230126106040549, "grad_norm": 1.1560059785842896, "learning_rate": 1.695078063318656e-07, "loss": 0.13006591796875, "step": 6624 }, { "epoch": 0.9231519542952693, "grad_norm": 1.4946088790893555, "learning_rate": 1.6889988273024627e-07, "loss": 0.15351486206054688, "step": 6625 }, { "epoch": 0.9232912979864837, "grad_norm": 0.8677798509597778, "learning_rate": 1.682930324887766e-07, "loss": 0.11826324462890625, "step": 6626 }, { "epoch": 0.9234306416776981, "grad_norm": 1.151567816734314, "learning_rate": 1.6768725574228706e-07, "loss": 0.12273025512695312, "step": 6627 }, { "epoch": 0.9235699853689124, "grad_norm": 0.8862648606300354, "learning_rate": 1.6708255262536443e-07, "loss": 0.13451004028320312, "step": 6628 }, { "epoch": 0.9237093290601268, "grad_norm": 1.1327145099639893, "learning_rate": 1.6647892327236125e-07, "loss": 0.150604248046875, "step": 6629 }, { "epoch": 0.9238486727513412, "grad_norm": 0.5726187825202942, "learning_rate": 1.658763678173908e-07, "loss": 0.09126663208007812, "step": 6630 }, { "epoch": 0.9239880164425556, "grad_norm": 0.9476815462112427, "learning_rate": 1.6527488639432543e-07, "loss": 0.1320953369140625, "step": 6631 }, { "epoch": 0.92412736013377, "grad_norm": 1.0951085090637207, "learning_rate": 1.6467447913680268e-07, "loss": 0.15480804443359375, "step": 6632 }, { "epoch": 0.9242667038249843, "grad_norm": 0.9522867798805237, "learning_rate": 1.6407514617821752e-07, "loss": 0.15746688842773438, "step": 6633 }, { "epoch": 0.9244060475161987, "grad_norm": 1.122534155845642, "learning_rate": 1.6347688765172953e-07, "loss": 0.1434173583984375, "step": 6634 }, { "epoch": 0.9245453912074131, "grad_norm": 1.2562297582626343, "learning_rate": 1.6287970369025686e-07, "loss": 0.15137481689453125, "step": 6635 }, { "epoch": 0.9246847348986275, "grad_norm": 0.9643144607543945, "learning_rate": 1.6228359442648112e-07, "loss": 0.1641082763671875, "step": 6636 }, { "epoch": 0.9248240785898418, "grad_norm": 0.7292590141296387, "learning_rate": 1.616885599928436e-07, "loss": 0.1327075958251953, "step": 6637 }, { "epoch": 0.9249634222810562, "grad_norm": 0.773746132850647, "learning_rate": 1.6109460052154802e-07, "loss": 0.12570953369140625, "step": 6638 }, { "epoch": 0.9251027659722706, "grad_norm": 1.0092353820800781, "learning_rate": 1.6050171614455712e-07, "loss": 0.131591796875, "step": 6639 }, { "epoch": 0.925242109663485, "grad_norm": 1.1139922142028809, "learning_rate": 1.5990990699359777e-07, "loss": 0.15388870239257812, "step": 6640 }, { "epoch": 0.9253814533546993, "grad_norm": 0.9509718418121338, "learning_rate": 1.593191732001559e-07, "loss": 0.11782455444335938, "step": 6641 }, { "epoch": 0.9255207970459137, "grad_norm": 0.9043558239936829, "learning_rate": 1.5872951489547926e-07, "loss": 0.13149261474609375, "step": 6642 }, { "epoch": 0.9256601407371281, "grad_norm": 0.8805952668190002, "learning_rate": 1.5814093221057647e-07, "loss": 0.12621498107910156, "step": 6643 }, { "epoch": 0.9257994844283425, "grad_norm": 0.612366259098053, "learning_rate": 1.575534252762162e-07, "loss": 0.1252574920654297, "step": 6644 }, { "epoch": 0.9259388281195569, "grad_norm": 0.807767391204834, "learning_rate": 1.5696699422293072e-07, "loss": 0.112701416015625, "step": 6645 }, { "epoch": 0.9260781718107712, "grad_norm": 0.6971763968467712, "learning_rate": 1.5638163918101024e-07, "loss": 0.12947463989257812, "step": 6646 }, { "epoch": 0.9262175155019856, "grad_norm": 1.0835075378417969, "learning_rate": 1.5579736028050797e-07, "loss": 0.14958763122558594, "step": 6647 }, { "epoch": 0.9263568591932, "grad_norm": 1.2333695888519287, "learning_rate": 1.5521415765123783e-07, "loss": 0.14946365356445312, "step": 6648 }, { "epoch": 0.9264962028844144, "grad_norm": 0.9557672142982483, "learning_rate": 1.546320314227734e-07, "loss": 0.1406230926513672, "step": 6649 }, { "epoch": 0.9266355465756287, "grad_norm": 0.6193857789039612, "learning_rate": 1.5405098172444954e-07, "loss": 0.10931777954101562, "step": 6650 }, { "epoch": 0.9267748902668431, "grad_norm": 1.353527545928955, "learning_rate": 1.5347100868536246e-07, "loss": 0.15822219848632812, "step": 6651 }, { "epoch": 0.9269142339580575, "grad_norm": 1.2885463237762451, "learning_rate": 1.5289211243436964e-07, "loss": 0.14220046997070312, "step": 6652 }, { "epoch": 0.9270535776492719, "grad_norm": 0.9133144021034241, "learning_rate": 1.5231429310008817e-07, "loss": 0.13817596435546875, "step": 6653 }, { "epoch": 0.9271929213404864, "grad_norm": 0.6628894209861755, "learning_rate": 1.5173755081089536e-07, "loss": 0.12107467651367188, "step": 6654 }, { "epoch": 0.9273322650317007, "grad_norm": 0.6787355542182922, "learning_rate": 1.511618856949315e-07, "loss": 0.12983322143554688, "step": 6655 }, { "epoch": 0.9274716087229151, "grad_norm": 1.5999633073806763, "learning_rate": 1.5058729788009597e-07, "loss": 0.167938232421875, "step": 6656 }, { "epoch": 0.9276109524141295, "grad_norm": 1.1817418336868286, "learning_rate": 1.5001378749404883e-07, "loss": 0.1314544677734375, "step": 6657 }, { "epoch": 0.9277502961053439, "grad_norm": 1.3389617204666138, "learning_rate": 1.4944135466421095e-07, "loss": 0.15651702880859375, "step": 6658 }, { "epoch": 0.9278896397965583, "grad_norm": 0.893328070640564, "learning_rate": 1.4886999951776448e-07, "loss": 0.12711334228515625, "step": 6659 }, { "epoch": 0.9280289834877726, "grad_norm": 1.0502480268478394, "learning_rate": 1.4829972218165013e-07, "loss": 0.1269378662109375, "step": 6660 }, { "epoch": 0.928168327178987, "grad_norm": 0.8080911040306091, "learning_rate": 1.477305227825715e-07, "loss": 0.13785171508789062, "step": 6661 }, { "epoch": 0.9283076708702014, "grad_norm": 0.9644597768783569, "learning_rate": 1.471624014469919e-07, "loss": 0.16214370727539062, "step": 6662 }, { "epoch": 0.9284470145614158, "grad_norm": 1.033399224281311, "learning_rate": 1.4659535830113368e-07, "loss": 0.14722442626953125, "step": 6663 }, { "epoch": 0.9285863582526301, "grad_norm": 1.2997490167617798, "learning_rate": 1.4602939347098278e-07, "loss": 0.1554584503173828, "step": 6664 }, { "epoch": 0.9287257019438445, "grad_norm": 1.0834404230117798, "learning_rate": 1.454645070822819e-07, "loss": 0.13983154296875, "step": 6665 }, { "epoch": 0.9288650456350589, "grad_norm": 1.0185261964797974, "learning_rate": 1.449006992605373e-07, "loss": 0.14564895629882812, "step": 6666 }, { "epoch": 0.9290043893262733, "grad_norm": 0.9090662598609924, "learning_rate": 1.443379701310127e-07, "loss": 0.13475418090820312, "step": 6667 }, { "epoch": 0.9291437330174876, "grad_norm": 0.9250311851501465, "learning_rate": 1.4377631981873474e-07, "loss": 0.11949539184570312, "step": 6668 }, { "epoch": 0.929283076708702, "grad_norm": 0.9846729636192322, "learning_rate": 1.432157484484892e-07, "loss": 0.13285446166992188, "step": 6669 }, { "epoch": 0.9294224203999164, "grad_norm": 0.9993211627006531, "learning_rate": 1.4265625614482247e-07, "loss": 0.12335968017578125, "step": 6670 }, { "epoch": 0.9295617640911308, "grad_norm": 1.0303175449371338, "learning_rate": 1.4209784303203965e-07, "loss": 0.1443958282470703, "step": 6671 }, { "epoch": 0.9297011077823452, "grad_norm": 0.9749584197998047, "learning_rate": 1.415405092342087e-07, "loss": 0.15067672729492188, "step": 6672 }, { "epoch": 0.9298404514735595, "grad_norm": 0.9614453911781311, "learning_rate": 1.4098425487515665e-07, "loss": 0.1410369873046875, "step": 6673 }, { "epoch": 0.9299797951647739, "grad_norm": 2.0703303813934326, "learning_rate": 1.4042908007846912e-07, "loss": 0.1809844970703125, "step": 6674 }, { "epoch": 0.9301191388559883, "grad_norm": 1.1808905601501465, "learning_rate": 1.3987498496749463e-07, "loss": 0.12544631958007812, "step": 6675 }, { "epoch": 0.9302584825472027, "grad_norm": 0.5556203126907349, "learning_rate": 1.3932196966533972e-07, "loss": 0.1136016845703125, "step": 6676 }, { "epoch": 0.930397826238417, "grad_norm": 1.8815494775772095, "learning_rate": 1.3877003429487224e-07, "loss": 0.16423797607421875, "step": 6677 }, { "epoch": 0.9305371699296314, "grad_norm": 0.876811683177948, "learning_rate": 1.3821917897871905e-07, "loss": 0.13247108459472656, "step": 6678 }, { "epoch": 0.9306765136208458, "grad_norm": 0.7592368125915527, "learning_rate": 1.3766940383926785e-07, "loss": 0.126434326171875, "step": 6679 }, { "epoch": 0.9308158573120602, "grad_norm": 0.9263736009597778, "learning_rate": 1.3712070899866704e-07, "loss": 0.128814697265625, "step": 6680 }, { "epoch": 0.9309552010032746, "grad_norm": 0.8054322600364685, "learning_rate": 1.3657309457882294e-07, "loss": 0.11153030395507812, "step": 6681 }, { "epoch": 0.9310945446944889, "grad_norm": 0.9735404849052429, "learning_rate": 1.3602656070140275e-07, "loss": 0.121368408203125, "step": 6682 }, { "epoch": 0.9312338883857033, "grad_norm": 0.7062562108039856, "learning_rate": 1.3548110748783426e-07, "loss": 0.12470245361328125, "step": 6683 }, { "epoch": 0.9313732320769177, "grad_norm": 0.7757453918457031, "learning_rate": 1.349367350593056e-07, "loss": 0.11480522155761719, "step": 6684 }, { "epoch": 0.9315125757681321, "grad_norm": 1.2806590795516968, "learning_rate": 1.3439344353676276e-07, "loss": 0.16286087036132812, "step": 6685 }, { "epoch": 0.9316519194593464, "grad_norm": 0.7578094601631165, "learning_rate": 1.3385123304091306e-07, "loss": 0.111541748046875, "step": 6686 }, { "epoch": 0.9317912631505608, "grad_norm": 1.0818634033203125, "learning_rate": 1.3331010369222298e-07, "loss": 0.1395282745361328, "step": 6687 }, { "epoch": 0.9319306068417752, "grad_norm": 1.4911974668502808, "learning_rate": 1.3277005561092016e-07, "loss": 0.14084815979003906, "step": 6688 }, { "epoch": 0.9320699505329896, "grad_norm": 0.8678600788116455, "learning_rate": 1.3223108891698976e-07, "loss": 0.13941574096679688, "step": 6689 }, { "epoch": 0.932209294224204, "grad_norm": 1.2364771366119385, "learning_rate": 1.316932037301788e-07, "loss": 0.1825714111328125, "step": 6690 }, { "epoch": 0.9323486379154183, "grad_norm": 1.062173843383789, "learning_rate": 1.3115640016999222e-07, "loss": 0.16083908081054688, "step": 6691 }, { "epoch": 0.9324879816066327, "grad_norm": 0.8656576871871948, "learning_rate": 1.3062067835569625e-07, "loss": 0.13176727294921875, "step": 6692 }, { "epoch": 0.9326273252978471, "grad_norm": 0.9108487963676453, "learning_rate": 1.3008603840631516e-07, "loss": 0.15520095825195312, "step": 6693 }, { "epoch": 0.9327666689890615, "grad_norm": 0.9869466423988342, "learning_rate": 1.2955248044063452e-07, "loss": 0.1442127227783203, "step": 6694 }, { "epoch": 0.932906012680276, "grad_norm": 1.3883475065231323, "learning_rate": 1.2902000457719886e-07, "loss": 0.115692138671875, "step": 6695 }, { "epoch": 0.9330453563714903, "grad_norm": 0.7724692821502686, "learning_rate": 1.2848861093431143e-07, "loss": 0.1201629638671875, "step": 6696 }, { "epoch": 0.9331847000627047, "grad_norm": 0.8517941236495972, "learning_rate": 1.2795829963003604e-07, "loss": 0.1421356201171875, "step": 6697 }, { "epoch": 0.9333240437539191, "grad_norm": 0.9516575336456299, "learning_rate": 1.274290707821968e-07, "loss": 0.12734413146972656, "step": 6698 }, { "epoch": 0.9334633874451335, "grad_norm": 2.097489833831787, "learning_rate": 1.269009245083741e-07, "loss": 0.1345539093017578, "step": 6699 }, { "epoch": 0.9336027311363478, "grad_norm": 1.0022062063217163, "learning_rate": 1.2637386092591187e-07, "loss": 0.13256454467773438, "step": 6700 }, { "epoch": 0.9337420748275622, "grad_norm": 0.936276376247406, "learning_rate": 1.258478801519114e-07, "loss": 0.13842201232910156, "step": 6701 }, { "epoch": 0.9338814185187766, "grad_norm": 0.8481155633926392, "learning_rate": 1.2532298230323258e-07, "loss": 0.131744384765625, "step": 6702 }, { "epoch": 0.934020762209991, "grad_norm": 1.0569902658462524, "learning_rate": 1.2479916749649657e-07, "loss": 0.14638900756835938, "step": 6703 }, { "epoch": 0.9341601059012054, "grad_norm": 0.8980891108512878, "learning_rate": 1.2427643584808246e-07, "loss": 0.1413726806640625, "step": 6704 }, { "epoch": 0.9342994495924197, "grad_norm": 1.4672127962112427, "learning_rate": 1.2375478747413017e-07, "loss": 0.14375686645507812, "step": 6705 }, { "epoch": 0.9344387932836341, "grad_norm": 0.9508512616157532, "learning_rate": 1.2323422249053696e-07, "loss": 0.13355255126953125, "step": 6706 }, { "epoch": 0.9345781369748485, "grad_norm": 0.8119966387748718, "learning_rate": 1.2271474101296144e-07, "loss": 0.13300514221191406, "step": 6707 }, { "epoch": 0.9347174806660629, "grad_norm": 1.4335240125656128, "learning_rate": 1.2219634315681962e-07, "loss": 0.19708633422851562, "step": 6708 }, { "epoch": 0.9348568243572772, "grad_norm": 0.7716966271400452, "learning_rate": 1.2167902903728879e-07, "loss": 0.13198471069335938, "step": 6709 }, { "epoch": 0.9349961680484916, "grad_norm": 0.648211658000946, "learning_rate": 1.211627987693037e-07, "loss": 0.10662841796875, "step": 6710 }, { "epoch": 0.935135511739706, "grad_norm": 0.85460364818573, "learning_rate": 1.206476524675587e-07, "loss": 0.14324188232421875, "step": 6711 }, { "epoch": 0.9352748554309204, "grad_norm": 0.8630365133285522, "learning_rate": 1.2013359024650785e-07, "loss": 0.14654922485351562, "step": 6712 }, { "epoch": 0.9354141991221347, "grad_norm": 0.6946357488632202, "learning_rate": 1.196206122203647e-07, "loss": 0.12278175354003906, "step": 6713 }, { "epoch": 0.9355535428133491, "grad_norm": 0.8212522864341736, "learning_rate": 1.1910871850309979e-07, "loss": 0.12630844116210938, "step": 6714 }, { "epoch": 0.9356928865045635, "grad_norm": 1.2906279563903809, "learning_rate": 1.1859790920844494e-07, "loss": 0.1677398681640625, "step": 6715 }, { "epoch": 0.9358322301957779, "grad_norm": 1.156108021736145, "learning_rate": 1.1808818444989046e-07, "loss": 0.17295074462890625, "step": 6716 }, { "epoch": 0.9359715738869923, "grad_norm": 1.2766389846801758, "learning_rate": 1.1757954434068574e-07, "loss": 0.14551162719726562, "step": 6717 }, { "epoch": 0.9361109175782066, "grad_norm": 0.5503196120262146, "learning_rate": 1.1707198899383875e-07, "loss": 0.11698532104492188, "step": 6718 }, { "epoch": 0.936250261269421, "grad_norm": 0.6464820504188538, "learning_rate": 1.1656551852211595e-07, "loss": 0.10438919067382812, "step": 6719 }, { "epoch": 0.9363896049606354, "grad_norm": 1.5095250606536865, "learning_rate": 1.1606013303804508e-07, "loss": 0.15985870361328125, "step": 6720 }, { "epoch": 0.9365289486518498, "grad_norm": 0.885051965713501, "learning_rate": 1.1555583265390968e-07, "loss": 0.14624404907226562, "step": 6721 }, { "epoch": 0.9366682923430641, "grad_norm": 0.9603905081748962, "learning_rate": 1.1505261748175512e-07, "loss": 0.13601303100585938, "step": 6722 }, { "epoch": 0.9368076360342785, "grad_norm": 0.8361486196517944, "learning_rate": 1.1455048763338361e-07, "loss": 0.1294403076171875, "step": 6723 }, { "epoch": 0.9369469797254929, "grad_norm": 0.557680070400238, "learning_rate": 1.1404944322035705e-07, "loss": 0.09410667419433594, "step": 6724 }, { "epoch": 0.9370863234167073, "grad_norm": 1.0547295808792114, "learning_rate": 1.1354948435399582e-07, "loss": 0.15636444091796875, "step": 6725 }, { "epoch": 0.9372256671079217, "grad_norm": 0.5370838642120361, "learning_rate": 1.130506111453794e-07, "loss": 0.10295295715332031, "step": 6726 }, { "epoch": 0.937365010799136, "grad_norm": 0.7837815284729004, "learning_rate": 1.1255282370534748e-07, "loss": 0.10614776611328125, "step": 6727 }, { "epoch": 0.9375043544903504, "grad_norm": 0.8014469146728516, "learning_rate": 1.1205612214449434e-07, "loss": 0.13594436645507812, "step": 6728 }, { "epoch": 0.9376436981815648, "grad_norm": 1.1450289487838745, "learning_rate": 1.1156050657317785e-07, "loss": 0.142333984375, "step": 6729 }, { "epoch": 0.9377830418727792, "grad_norm": 0.46724334359169006, "learning_rate": 1.1106597710151157e-07, "loss": 0.10577011108398438, "step": 6730 }, { "epoch": 0.9379223855639935, "grad_norm": 1.022631049156189, "learning_rate": 1.1057253383936928e-07, "loss": 0.1377105712890625, "step": 6731 }, { "epoch": 0.9380617292552079, "grad_norm": 1.0709447860717773, "learning_rate": 1.1008017689638162e-07, "loss": 0.15762710571289062, "step": 6732 }, { "epoch": 0.9382010729464223, "grad_norm": 1.0447725057601929, "learning_rate": 1.0958890638194108e-07, "loss": 0.14998626708984375, "step": 6733 }, { "epoch": 0.9383404166376367, "grad_norm": 1.4898234605789185, "learning_rate": 1.0909872240519481e-07, "loss": 0.1565093994140625, "step": 6734 }, { "epoch": 0.9384797603288512, "grad_norm": 0.8863793015480042, "learning_rate": 1.0860962507505124e-07, "loss": 0.1318988800048828, "step": 6735 }, { "epoch": 0.9386191040200655, "grad_norm": 0.8893446922302246, "learning_rate": 1.0812161450017678e-07, "loss": 0.15027236938476562, "step": 6736 }, { "epoch": 0.9387584477112799, "grad_norm": 1.0309257507324219, "learning_rate": 1.0763469078899635e-07, "loss": 0.14334869384765625, "step": 6737 }, { "epoch": 0.9388977914024943, "grad_norm": 0.8464950919151306, "learning_rate": 1.0714885404969288e-07, "loss": 0.14078140258789062, "step": 6738 }, { "epoch": 0.9390371350937087, "grad_norm": 0.7795383930206299, "learning_rate": 1.0666410439020836e-07, "loss": 0.14644622802734375, "step": 6739 }, { "epoch": 0.939176478784923, "grad_norm": 1.1316676139831543, "learning_rate": 1.0618044191824273e-07, "loss": 0.17631149291992188, "step": 6740 }, { "epoch": 0.9393158224761374, "grad_norm": 0.7752349972724915, "learning_rate": 1.056978667412556e-07, "loss": 0.11779594421386719, "step": 6741 }, { "epoch": 0.9394551661673518, "grad_norm": 1.501503348350525, "learning_rate": 1.0521637896646286e-07, "loss": 0.2054290771484375, "step": 6742 }, { "epoch": 0.9395945098585662, "grad_norm": 0.7906860113143921, "learning_rate": 1.0473597870084174e-07, "loss": 0.1395587921142578, "step": 6743 }, { "epoch": 0.9397338535497806, "grad_norm": 0.45000746846199036, "learning_rate": 1.0425666605112516e-07, "loss": 0.09416007995605469, "step": 6744 }, { "epoch": 0.9398731972409949, "grad_norm": 0.9199137091636658, "learning_rate": 1.0377844112380575e-07, "loss": 0.13101959228515625, "step": 6745 }, { "epoch": 0.9400125409322093, "grad_norm": 1.5181446075439453, "learning_rate": 1.0330130402513406e-07, "loss": 0.15915679931640625, "step": 6746 }, { "epoch": 0.9401518846234237, "grad_norm": 1.124677300453186, "learning_rate": 1.028252548611186e-07, "loss": 0.16728591918945312, "step": 6747 }, { "epoch": 0.9402912283146381, "grad_norm": 0.7882256507873535, "learning_rate": 1.0235029373752758e-07, "loss": 0.13251113891601562, "step": 6748 }, { "epoch": 0.9404305720058524, "grad_norm": 0.5733861327171326, "learning_rate": 1.0187642075988602e-07, "loss": 0.10527229309082031, "step": 6749 }, { "epoch": 0.9405699156970668, "grad_norm": 0.8987041115760803, "learning_rate": 1.0140363603347747e-07, "loss": 0.12918853759765625, "step": 6750 }, { "epoch": 0.9407092593882812, "grad_norm": 0.6902685165405273, "learning_rate": 1.0093193966334403e-07, "loss": 0.12131500244140625, "step": 6751 }, { "epoch": 0.9408486030794956, "grad_norm": 0.6208061575889587, "learning_rate": 1.0046133175428685e-07, "loss": 0.11630630493164062, "step": 6752 }, { "epoch": 0.94098794677071, "grad_norm": 0.5003477334976196, "learning_rate": 9.999181241086231e-08, "loss": 0.10138320922851562, "step": 6753 }, { "epoch": 0.9411272904619243, "grad_norm": 1.4981352090835571, "learning_rate": 9.952338173738862e-08, "loss": 0.16614913940429688, "step": 6754 }, { "epoch": 0.9412666341531387, "grad_norm": 1.1480920314788818, "learning_rate": 9.905603983793921e-08, "loss": 0.16131210327148438, "step": 6755 }, { "epoch": 0.9414059778443531, "grad_norm": 0.5931784510612488, "learning_rate": 9.858978681634823e-08, "loss": 0.11588287353515625, "step": 6756 }, { "epoch": 0.9415453215355675, "grad_norm": 0.7833506464958191, "learning_rate": 9.81246227762045e-08, "loss": 0.13461685180664062, "step": 6757 }, { "epoch": 0.9416846652267818, "grad_norm": 0.6971127986907959, "learning_rate": 9.76605478208581e-08, "loss": 0.13526535034179688, "step": 6758 }, { "epoch": 0.9418240089179962, "grad_norm": 0.7298930883407593, "learning_rate": 9.719756205341658e-08, "loss": 0.14898681640625, "step": 6759 }, { "epoch": 0.9419633526092106, "grad_norm": 1.0342206954956055, "learning_rate": 9.673566557674263e-08, "loss": 0.14482498168945312, "step": 6760 }, { "epoch": 0.942102696300425, "grad_norm": 0.7161206603050232, "learning_rate": 9.627485849346085e-08, "loss": 0.124176025390625, "step": 6761 }, { "epoch": 0.9422420399916394, "grad_norm": 1.0392385721206665, "learning_rate": 9.581514090595212e-08, "loss": 0.13698577880859375, "step": 6762 }, { "epoch": 0.9423813836828537, "grad_norm": 0.6396362781524658, "learning_rate": 9.535651291635362e-08, "loss": 0.12108230590820312, "step": 6763 }, { "epoch": 0.9425207273740681, "grad_norm": 0.8247681856155396, "learning_rate": 9.489897462656383e-08, "loss": 0.12933731079101562, "step": 6764 }, { "epoch": 0.9426600710652825, "grad_norm": 1.2842351198196411, "learning_rate": 9.44425261382359e-08, "loss": 0.14319992065429688, "step": 6765 }, { "epoch": 0.9427994147564969, "grad_norm": 0.714444100856781, "learning_rate": 9.39871675527837e-08, "loss": 0.12828826904296875, "step": 6766 }, { "epoch": 0.9429387584477112, "grad_norm": 0.7455735802650452, "learning_rate": 9.353289897137574e-08, "loss": 0.1136016845703125, "step": 6767 }, { "epoch": 0.9430781021389256, "grad_norm": 1.0420446395874023, "learning_rate": 9.30797204949413e-08, "loss": 0.13140869140625, "step": 6768 }, { "epoch": 0.94321744583014, "grad_norm": 0.9725601673126221, "learning_rate": 9.262763222416649e-08, "loss": 0.13780975341796875, "step": 6769 }, { "epoch": 0.9433567895213544, "grad_norm": 0.5129982233047485, "learning_rate": 9.217663425949486e-08, "loss": 0.11342620849609375, "step": 6770 }, { "epoch": 0.9434961332125688, "grad_norm": 0.8622804880142212, "learning_rate": 9.172672670112681e-08, "loss": 0.12407112121582031, "step": 6771 }, { "epoch": 0.9436354769037831, "grad_norm": 0.9365994334220886, "learning_rate": 9.127790964902239e-08, "loss": 0.13721275329589844, "step": 6772 }, { "epoch": 0.9437748205949975, "grad_norm": 0.9928549528121948, "learning_rate": 9.083018320289849e-08, "loss": 0.1083984375, "step": 6773 }, { "epoch": 0.9439141642862119, "grad_norm": 0.9571124911308289, "learning_rate": 9.038354746222999e-08, "loss": 0.16482925415039062, "step": 6774 }, { "epoch": 0.9440535079774264, "grad_norm": 0.632163405418396, "learning_rate": 8.993800252624863e-08, "loss": 0.11646842956542969, "step": 6775 }, { "epoch": 0.9441928516686408, "grad_norm": 1.1108036041259766, "learning_rate": 8.94935484939441e-08, "loss": 0.13082504272460938, "step": 6776 }, { "epoch": 0.9443321953598551, "grad_norm": 1.159233570098877, "learning_rate": 8.905018546406519e-08, "loss": 0.14490509033203125, "step": 6777 }, { "epoch": 0.9444715390510695, "grad_norm": 0.9977253079414368, "learning_rate": 8.860791353511532e-08, "loss": 0.14223670959472656, "step": 6778 }, { "epoch": 0.9446108827422839, "grad_norm": 0.6592791080474854, "learning_rate": 8.816673280535815e-08, "loss": 0.12766647338867188, "step": 6779 }, { "epoch": 0.9447502264334983, "grad_norm": 0.7724053263664246, "learning_rate": 8.772664337281412e-08, "loss": 0.12930679321289062, "step": 6780 }, { "epoch": 0.9448895701247126, "grad_norm": 1.3975965976715088, "learning_rate": 8.728764533526112e-08, "loss": 0.16833877563476562, "step": 6781 }, { "epoch": 0.945028913815927, "grad_norm": 1.1029226779937744, "learning_rate": 8.684973879023395e-08, "loss": 0.1527557373046875, "step": 6782 }, { "epoch": 0.9451682575071414, "grad_norm": 0.5048050880432129, "learning_rate": 8.641292383502531e-08, "loss": 0.10388374328613281, "step": 6783 }, { "epoch": 0.9453076011983558, "grad_norm": 0.785764217376709, "learning_rate": 8.597720056668646e-08, "loss": 0.12369537353515625, "step": 6784 }, { "epoch": 0.9454469448895702, "grad_norm": 0.7883588075637817, "learning_rate": 8.55425690820244e-08, "loss": 0.13357162475585938, "step": 6785 }, { "epoch": 0.9455862885807845, "grad_norm": 1.2949150800704956, "learning_rate": 8.510902947760469e-08, "loss": 0.1346588134765625, "step": 6786 }, { "epoch": 0.9457256322719989, "grad_norm": 0.5806105732917786, "learning_rate": 8.467658184974914e-08, "loss": 0.11293792724609375, "step": 6787 }, { "epoch": 0.9458649759632133, "grad_norm": 0.7285953164100647, "learning_rate": 8.424522629453924e-08, "loss": 0.130889892578125, "step": 6788 }, { "epoch": 0.9460043196544277, "grad_norm": 1.5685703754425049, "learning_rate": 8.381496290781055e-08, "loss": 0.17584228515625, "step": 6789 }, { "epoch": 0.946143663345642, "grad_norm": 1.1889079809188843, "learning_rate": 8.338579178515882e-08, "loss": 0.1475811004638672, "step": 6790 }, { "epoch": 0.9462830070368564, "grad_norm": 0.696012020111084, "learning_rate": 8.295771302193723e-08, "loss": 0.11107444763183594, "step": 6791 }, { "epoch": 0.9464223507280708, "grad_norm": 0.814603328704834, "learning_rate": 8.253072671325246e-08, "loss": 0.14890670776367188, "step": 6792 }, { "epoch": 0.9465616944192852, "grad_norm": 1.0409505367279053, "learning_rate": 8.210483295397309e-08, "loss": 0.12230300903320312, "step": 6793 }, { "epoch": 0.9467010381104995, "grad_norm": 1.7208036184310913, "learning_rate": 8.168003183872175e-08, "loss": 0.15691375732421875, "step": 6794 }, { "epoch": 0.9468403818017139, "grad_norm": 0.5465826988220215, "learning_rate": 8.125632346188073e-08, "loss": 0.1022186279296875, "step": 6795 }, { "epoch": 0.9469797254929283, "grad_norm": 0.695022463798523, "learning_rate": 8.083370791758804e-08, "loss": 0.11900138854980469, "step": 6796 }, { "epoch": 0.9471190691841427, "grad_norm": 1.021024465560913, "learning_rate": 8.04121852997386e-08, "loss": 0.13708114624023438, "step": 6797 }, { "epoch": 0.9472584128753571, "grad_norm": 0.7750123143196106, "learning_rate": 7.999175570198526e-08, "loss": 0.12244033813476562, "step": 6798 }, { "epoch": 0.9473977565665714, "grad_norm": 0.9426019191741943, "learning_rate": 7.957241921773828e-08, "loss": 0.16485214233398438, "step": 6799 }, { "epoch": 0.9475371002577858, "grad_norm": 0.9746572971343994, "learning_rate": 7.915417594016428e-08, "loss": 0.13026046752929688, "step": 6800 }, { "epoch": 0.9476764439490002, "grad_norm": 1.540727972984314, "learning_rate": 7.873702596218836e-08, "loss": 0.18944549560546875, "step": 6801 }, { "epoch": 0.9478157876402146, "grad_norm": 0.9584885835647583, "learning_rate": 7.83209693764908e-08, "loss": 0.12675857543945312, "step": 6802 }, { "epoch": 0.947955131331429, "grad_norm": 1.4686319828033447, "learning_rate": 7.790600627550937e-08, "loss": 0.18029403686523438, "step": 6803 }, { "epoch": 0.9480944750226433, "grad_norm": 0.960382878780365, "learning_rate": 7.749213675143974e-08, "loss": 0.14368057250976562, "step": 6804 }, { "epoch": 0.9482338187138577, "grad_norm": 1.0909087657928467, "learning_rate": 7.707936089623558e-08, "loss": 0.1516857147216797, "step": 6805 }, { "epoch": 0.9483731624050721, "grad_norm": 0.933485209941864, "learning_rate": 7.666767880160464e-08, "loss": 0.14282608032226562, "step": 6806 }, { "epoch": 0.9485125060962865, "grad_norm": 0.8700838685035706, "learning_rate": 7.625709055901375e-08, "loss": 0.12911605834960938, "step": 6807 }, { "epoch": 0.9486518497875008, "grad_norm": 0.6475270390510559, "learning_rate": 7.584759625968663e-08, "loss": 0.11301040649414062, "step": 6808 }, { "epoch": 0.9487911934787152, "grad_norm": 1.0679301023483276, "learning_rate": 7.543919599460325e-08, "loss": 0.123870849609375, "step": 6809 }, { "epoch": 0.9489305371699296, "grad_norm": 0.6234498620033264, "learning_rate": 7.503188985450105e-08, "loss": 0.12014007568359375, "step": 6810 }, { "epoch": 0.949069880861144, "grad_norm": 0.7502124905586243, "learning_rate": 7.462567792987374e-08, "loss": 0.11939430236816406, "step": 6811 }, { "epoch": 0.9492092245523583, "grad_norm": 0.7535973787307739, "learning_rate": 7.422056031097302e-08, "loss": 0.13106918334960938, "step": 6812 }, { "epoch": 0.9493485682435727, "grad_norm": 0.8169223070144653, "learning_rate": 7.381653708780578e-08, "loss": 0.135467529296875, "step": 6813 }, { "epoch": 0.9494879119347871, "grad_norm": 0.64400714635849, "learning_rate": 7.341360835013745e-08, "loss": 0.0976104736328125, "step": 6814 }, { "epoch": 0.9496272556260016, "grad_norm": 0.9516388773918152, "learning_rate": 7.301177418748973e-08, "loss": 0.15519332885742188, "step": 6815 }, { "epoch": 0.949766599317216, "grad_norm": 0.6588060855865479, "learning_rate": 7.261103468914066e-08, "loss": 0.12270355224609375, "step": 6816 }, { "epoch": 0.9499059430084303, "grad_norm": 0.9968149662017822, "learning_rate": 7.221138994412569e-08, "loss": 0.1524658203125, "step": 6817 }, { "epoch": 0.9500452866996447, "grad_norm": 0.763359010219574, "learning_rate": 7.181284004123601e-08, "loss": 0.13587188720703125, "step": 6818 }, { "epoch": 0.9501846303908591, "grad_norm": 0.6799383163452148, "learning_rate": 7.14153850690208e-08, "loss": 0.12132644653320312, "step": 6819 }, { "epoch": 0.9503239740820735, "grad_norm": 0.64691561460495, "learning_rate": 7.101902511578606e-08, "loss": 0.11112594604492188, "step": 6820 }, { "epoch": 0.9504633177732879, "grad_norm": 0.6366909742355347, "learning_rate": 7.062376026959305e-08, "loss": 0.12336540222167969, "step": 6821 }, { "epoch": 0.9506026614645022, "grad_norm": 0.9036715626716614, "learning_rate": 7.022959061826151e-08, "loss": 0.13852310180664062, "step": 6822 }, { "epoch": 0.9507420051557166, "grad_norm": 0.8699920773506165, "learning_rate": 6.983651624936527e-08, "loss": 0.14088821411132812, "step": 6823 }, { "epoch": 0.950881348846931, "grad_norm": 1.3941761255264282, "learning_rate": 6.944453725023836e-08, "loss": 0.16854095458984375, "step": 6824 }, { "epoch": 0.9510206925381454, "grad_norm": 0.8894477486610413, "learning_rate": 6.905365370796891e-08, "loss": 0.11638259887695312, "step": 6825 }, { "epoch": 0.9511600362293597, "grad_norm": 1.3129597902297974, "learning_rate": 6.866386570940132e-08, "loss": 0.16024398803710938, "step": 6826 }, { "epoch": 0.9512993799205741, "grad_norm": 0.9041678309440613, "learning_rate": 6.827517334113965e-08, "loss": 0.1407489776611328, "step": 6827 }, { "epoch": 0.9514387236117885, "grad_norm": 0.548263430595398, "learning_rate": 6.788757668954038e-08, "loss": 0.10195159912109375, "step": 6828 }, { "epoch": 0.9515780673030029, "grad_norm": 0.6365494728088379, "learning_rate": 6.750107584071964e-08, "loss": 0.13358306884765625, "step": 6829 }, { "epoch": 0.9517174109942172, "grad_norm": 1.1774998903274536, "learning_rate": 6.711567088054927e-08, "loss": 0.15161895751953125, "step": 6830 }, { "epoch": 0.9518567546854316, "grad_norm": 1.0300697088241577, "learning_rate": 6.67313618946569e-08, "loss": 0.1798248291015625, "step": 6831 }, { "epoch": 0.951996098376646, "grad_norm": 0.7573988437652588, "learning_rate": 6.634814896842757e-08, "loss": 0.11273956298828125, "step": 6832 }, { "epoch": 0.9521354420678604, "grad_norm": 1.1987192630767822, "learning_rate": 6.59660321870026e-08, "loss": 0.15832901000976562, "step": 6833 }, { "epoch": 0.9522747857590748, "grad_norm": 1.2826534509658813, "learning_rate": 6.558501163527964e-08, "loss": 0.1439361572265625, "step": 6834 }, { "epoch": 0.9524141294502891, "grad_norm": 1.1101926565170288, "learning_rate": 6.520508739791153e-08, "loss": 0.16514205932617188, "step": 6835 }, { "epoch": 0.9525534731415035, "grad_norm": 0.9673881530761719, "learning_rate": 6.482625955931022e-08, "loss": 0.1471405029296875, "step": 6836 }, { "epoch": 0.9526928168327179, "grad_norm": 1.3531056642532349, "learning_rate": 6.444852820364222e-08, "loss": 0.1760082244873047, "step": 6837 }, { "epoch": 0.9528321605239323, "grad_norm": 0.8217846155166626, "learning_rate": 6.407189341483044e-08, "loss": 0.1425151824951172, "step": 6838 }, { "epoch": 0.9529715042151466, "grad_norm": 0.8326387405395508, "learning_rate": 6.369635527655515e-08, "loss": 0.12946319580078125, "step": 6839 }, { "epoch": 0.953110847906361, "grad_norm": 0.6376768946647644, "learning_rate": 6.332191387225128e-08, "loss": 0.11076736450195312, "step": 6840 }, { "epoch": 0.9532501915975754, "grad_norm": 0.9294549226760864, "learning_rate": 6.294856928511284e-08, "loss": 0.120758056640625, "step": 6841 }, { "epoch": 0.9533895352887898, "grad_norm": 0.881020188331604, "learning_rate": 6.257632159808679e-08, "loss": 0.136199951171875, "step": 6842 }, { "epoch": 0.9535288789800042, "grad_norm": 0.7673536539077759, "learning_rate": 6.220517089387867e-08, "loss": 0.12773895263671875, "step": 6843 }, { "epoch": 0.9536682226712185, "grad_norm": 0.9648326635360718, "learning_rate": 6.183511725495028e-08, "loss": 0.15594863891601562, "step": 6844 }, { "epoch": 0.9538075663624329, "grad_norm": 1.4243069887161255, "learning_rate": 6.146616076351864e-08, "loss": 0.1981658935546875, "step": 6845 }, { "epoch": 0.9539469100536473, "grad_norm": 1.7060586214065552, "learning_rate": 6.109830150155705e-08, "loss": 0.15116119384765625, "step": 6846 }, { "epoch": 0.9540862537448617, "grad_norm": 0.9188789129257202, "learning_rate": 6.07315395507957e-08, "loss": 0.16348648071289062, "step": 6847 }, { "epoch": 0.954225597436076, "grad_norm": 1.3248275518417358, "learning_rate": 6.036587499272161e-08, "loss": 0.15621566772460938, "step": 6848 }, { "epoch": 0.9543649411272904, "grad_norm": 0.7430052161216736, "learning_rate": 6.000130790857595e-08, "loss": 0.12693023681640625, "step": 6849 }, { "epoch": 0.9545042848185048, "grad_norm": 0.7458785176277161, "learning_rate": 5.963783837935722e-08, "loss": 0.15305328369140625, "step": 6850 }, { "epoch": 0.9546436285097192, "grad_norm": 0.8454581499099731, "learning_rate": 5.927546648582083e-08, "loss": 0.12275314331054688, "step": 6851 }, { "epoch": 0.9547829722009336, "grad_norm": 0.724779486656189, "learning_rate": 5.8914192308476835e-08, "loss": 0.11688995361328125, "step": 6852 }, { "epoch": 0.9549223158921479, "grad_norm": 0.7892240881919861, "learning_rate": 5.855401592759269e-08, "loss": 0.126617431640625, "step": 6853 }, { "epoch": 0.9550616595833623, "grad_norm": 1.1677043437957764, "learning_rate": 5.8194937423191043e-08, "loss": 0.15577316284179688, "step": 6854 }, { "epoch": 0.9552010032745768, "grad_norm": 0.8708532452583313, "learning_rate": 5.783695687505087e-08, "loss": 0.14321136474609375, "step": 6855 }, { "epoch": 0.9553403469657912, "grad_norm": 0.6195106506347656, "learning_rate": 5.7480074362707415e-08, "loss": 0.1161956787109375, "step": 6856 }, { "epoch": 0.9554796906570056, "grad_norm": 0.5925561189651489, "learning_rate": 5.712428996545172e-08, "loss": 0.11754798889160156, "step": 6857 }, { "epoch": 0.9556190343482199, "grad_norm": 1.5925447940826416, "learning_rate": 5.6769603762331096e-08, "loss": 0.19965362548828125, "step": 6858 }, { "epoch": 0.9557583780394343, "grad_norm": 1.2159532308578491, "learning_rate": 5.641601583214862e-08, "loss": 0.17249679565429688, "step": 6859 }, { "epoch": 0.9558977217306487, "grad_norm": 0.9063015580177307, "learning_rate": 5.606352625346368e-08, "loss": 0.13074684143066406, "step": 6860 }, { "epoch": 0.9560370654218631, "grad_norm": 1.0644633769989014, "learning_rate": 5.571213510459084e-08, "loss": 0.157257080078125, "step": 6861 }, { "epoch": 0.9561764091130774, "grad_norm": 0.7240573763847351, "learning_rate": 5.53618424636021e-08, "loss": 0.12899398803710938, "step": 6862 }, { "epoch": 0.9563157528042918, "grad_norm": 0.9722546339035034, "learning_rate": 5.501264840832299e-08, "loss": 0.1358184814453125, "step": 6863 }, { "epoch": 0.9564550964955062, "grad_norm": 1.3779125213623047, "learning_rate": 5.466455301633811e-08, "loss": 0.16860580444335938, "step": 6864 }, { "epoch": 0.9565944401867206, "grad_norm": 1.0382814407348633, "learning_rate": 5.431755636498559e-08, "loss": 0.14947509765625, "step": 6865 }, { "epoch": 0.956733783877935, "grad_norm": 0.988021194934845, "learning_rate": 5.3971658531360436e-08, "loss": 0.135009765625, "step": 6866 }, { "epoch": 0.9568731275691493, "grad_norm": 1.5429530143737793, "learning_rate": 5.362685959231284e-08, "loss": 0.159576416015625, "step": 6867 }, { "epoch": 0.9570124712603637, "grad_norm": 0.5926460027694702, "learning_rate": 5.3283159624448745e-08, "loss": 0.10894012451171875, "step": 6868 }, { "epoch": 0.9571518149515781, "grad_norm": 0.6685112714767456, "learning_rate": 5.294055870413206e-08, "loss": 0.12589263916015625, "step": 6869 }, { "epoch": 0.9572911586427925, "grad_norm": 1.055148720741272, "learning_rate": 5.2599056907479685e-08, "loss": 0.1565418243408203, "step": 6870 }, { "epoch": 0.9574305023340068, "grad_norm": 0.857701301574707, "learning_rate": 5.2258654310365366e-08, "loss": 0.13045501708984375, "step": 6871 }, { "epoch": 0.9575698460252212, "grad_norm": 0.8351601958274841, "learning_rate": 5.1919350988419716e-08, "loss": 0.11249542236328125, "step": 6872 }, { "epoch": 0.9577091897164356, "grad_norm": 1.4202163219451904, "learning_rate": 5.1581147017027434e-08, "loss": 0.1779937744140625, "step": 6873 }, { "epoch": 0.95784853340765, "grad_norm": 0.6879609227180481, "learning_rate": 5.124404247133008e-08, "loss": 0.1216278076171875, "step": 6874 }, { "epoch": 0.9579878770988643, "grad_norm": 0.9119488000869751, "learning_rate": 5.090803742622441e-08, "loss": 0.142608642578125, "step": 6875 }, { "epoch": 0.9581272207900787, "grad_norm": 0.8257356286048889, "learning_rate": 5.057313195636293e-08, "loss": 0.11702919006347656, "step": 6876 }, { "epoch": 0.9582665644812931, "grad_norm": 1.3734782934188843, "learning_rate": 5.0239326136154454e-08, "loss": 0.16178512573242188, "step": 6877 }, { "epoch": 0.9584059081725075, "grad_norm": 1.318259596824646, "learning_rate": 4.990662003976243e-08, "loss": 0.16788101196289062, "step": 6878 }, { "epoch": 0.9585452518637219, "grad_norm": 1.4688836336135864, "learning_rate": 4.957501374110718e-08, "loss": 0.15967178344726562, "step": 6879 }, { "epoch": 0.9586845955549362, "grad_norm": 0.9380475282669067, "learning_rate": 4.924450731386365e-08, "loss": 0.14363479614257812, "step": 6880 }, { "epoch": 0.9588239392461506, "grad_norm": 0.9374989867210388, "learning_rate": 4.8915100831463116e-08, "loss": 0.15781021118164062, "step": 6881 }, { "epoch": 0.958963282937365, "grad_norm": 1.1192659139633179, "learning_rate": 4.858679436709201e-08, "loss": 0.12847518920898438, "step": 6882 }, { "epoch": 0.9591026266285794, "grad_norm": 0.9751996397972107, "learning_rate": 4.825958799369201e-08, "loss": 0.13714027404785156, "step": 6883 }, { "epoch": 0.9592419703197937, "grad_norm": 1.4922620058059692, "learning_rate": 4.7933481783961624e-08, "loss": 0.1507568359375, "step": 6884 }, { "epoch": 0.9593813140110081, "grad_norm": 0.6809203624725342, "learning_rate": 4.760847581035399e-08, "loss": 0.12703895568847656, "step": 6885 }, { "epoch": 0.9595206577022225, "grad_norm": 0.7911767959594727, "learning_rate": 4.728457014507859e-08, "loss": 0.12051010131835938, "step": 6886 }, { "epoch": 0.9596600013934369, "grad_norm": 0.7348757982254028, "learning_rate": 4.69617648600984e-08, "loss": 0.13274383544921875, "step": 6887 }, { "epoch": 0.9597993450846513, "grad_norm": 0.6947963237762451, "learning_rate": 4.664006002713495e-08, "loss": 0.11585044860839844, "step": 6888 }, { "epoch": 0.9599386887758656, "grad_norm": 1.0271621942520142, "learning_rate": 4.631945571766272e-08, "loss": 0.159423828125, "step": 6889 }, { "epoch": 0.96007803246708, "grad_norm": 0.7813093662261963, "learning_rate": 4.5999952002912516e-08, "loss": 0.13174819946289062, "step": 6890 }, { "epoch": 0.9602173761582944, "grad_norm": 0.728668749332428, "learning_rate": 4.5681548953872555e-08, "loss": 0.10205459594726562, "step": 6891 }, { "epoch": 0.9603567198495088, "grad_norm": 0.9345290660858154, "learning_rate": 4.536424664128236e-08, "loss": 0.14584732055664062, "step": 6892 }, { "epoch": 0.9604960635407231, "grad_norm": 0.8951891660690308, "learning_rate": 4.504804513564054e-08, "loss": 0.12422561645507812, "step": 6893 }, { "epoch": 0.9606354072319375, "grad_norm": 0.6970809698104858, "learning_rate": 4.473294450719923e-08, "loss": 0.13025474548339844, "step": 6894 }, { "epoch": 0.9607747509231519, "grad_norm": 0.9530086517333984, "learning_rate": 4.441894482596743e-08, "loss": 0.12913894653320312, "step": 6895 }, { "epoch": 0.9609140946143664, "grad_norm": 0.7974449992179871, "learning_rate": 4.410604616170822e-08, "loss": 0.1391143798828125, "step": 6896 }, { "epoch": 0.9610534383055808, "grad_norm": 0.7205310463905334, "learning_rate": 4.379424858394043e-08, "loss": 0.12914657592773438, "step": 6897 }, { "epoch": 0.9611927819967951, "grad_norm": 1.1699860095977783, "learning_rate": 4.348355216193867e-08, "loss": 0.13920974731445312, "step": 6898 }, { "epoch": 0.9613321256880095, "grad_norm": 1.2350201606750488, "learning_rate": 4.3173956964732145e-08, "loss": 0.16476821899414062, "step": 6899 }, { "epoch": 0.9614714693792239, "grad_norm": 0.66543048620224, "learning_rate": 4.286546306110639e-08, "loss": 0.11237335205078125, "step": 6900 }, { "epoch": 0.9616108130704383, "grad_norm": 1.0743530988693237, "learning_rate": 4.2558070519601594e-08, "loss": 0.15677261352539062, "step": 6901 }, { "epoch": 0.9617501567616527, "grad_norm": 1.0110701322555542, "learning_rate": 4.2251779408513104e-08, "loss": 0.15291976928710938, "step": 6902 }, { "epoch": 0.961889500452867, "grad_norm": 1.1458137035369873, "learning_rate": 4.19465897958915e-08, "loss": 0.16391372680664062, "step": 6903 }, { "epoch": 0.9620288441440814, "grad_norm": 1.057858943939209, "learning_rate": 4.164250174954365e-08, "loss": 0.12965774536132812, "step": 6904 }, { "epoch": 0.9621681878352958, "grad_norm": 0.8400500416755676, "learning_rate": 4.133951533703107e-08, "loss": 0.1175079345703125, "step": 6905 }, { "epoch": 0.9623075315265102, "grad_norm": 0.6416149735450745, "learning_rate": 4.1037630625669345e-08, "loss": 0.1140594482421875, "step": 6906 }, { "epoch": 0.9624468752177245, "grad_norm": 0.9895318746566772, "learning_rate": 4.07368476825315e-08, "loss": 0.14485549926757812, "step": 6907 }, { "epoch": 0.9625862189089389, "grad_norm": 1.105789065361023, "learning_rate": 4.043716657444407e-08, "loss": 0.13529586791992188, "step": 6908 }, { "epoch": 0.9627255626001533, "grad_norm": 0.8912789821624756, "learning_rate": 4.0138587367989365e-08, "loss": 0.14586639404296875, "step": 6909 }, { "epoch": 0.9628649062913677, "grad_norm": 0.5950611233711243, "learning_rate": 3.984111012950487e-08, "loss": 0.11302375793457031, "step": 6910 }, { "epoch": 0.963004249982582, "grad_norm": 0.901561439037323, "learning_rate": 3.9544734925083264e-08, "loss": 0.15252304077148438, "step": 6911 }, { "epoch": 0.9631435936737964, "grad_norm": 1.1991513967514038, "learning_rate": 3.924946182057299e-08, "loss": 0.14302825927734375, "step": 6912 }, { "epoch": 0.9632829373650108, "grad_norm": 0.9602375626564026, "learning_rate": 3.8955290881576566e-08, "loss": 0.14666748046875, "step": 6913 }, { "epoch": 0.9634222810562252, "grad_norm": 0.9169764518737793, "learning_rate": 3.866222217345117e-08, "loss": 0.1400279998779297, "step": 6914 }, { "epoch": 0.9635616247474396, "grad_norm": 1.3116347789764404, "learning_rate": 3.837025576131137e-08, "loss": 0.18352890014648438, "step": 6915 }, { "epoch": 0.9637009684386539, "grad_norm": 1.2801852226257324, "learning_rate": 3.807939171002473e-08, "loss": 0.14324188232421875, "step": 6916 }, { "epoch": 0.9638403121298683, "grad_norm": 1.1426366567611694, "learning_rate": 3.778963008421455e-08, "loss": 0.14227676391601562, "step": 6917 }, { "epoch": 0.9639796558210827, "grad_norm": 0.9020674228668213, "learning_rate": 3.750097094825933e-08, "loss": 0.1314067840576172, "step": 6918 }, { "epoch": 0.9641189995122971, "grad_norm": 0.9107460975646973, "learning_rate": 3.721341436629222e-08, "loss": 0.1285247802734375, "step": 6919 }, { "epoch": 0.9642583432035114, "grad_norm": 0.8124200105667114, "learning_rate": 3.6926960402202674e-08, "loss": 0.13621902465820312, "step": 6920 }, { "epoch": 0.9643976868947258, "grad_norm": 2.1184351444244385, "learning_rate": 3.66416091196331e-08, "loss": 0.15837860107421875, "step": 6921 }, { "epoch": 0.9645370305859402, "grad_norm": 1.6221553087234497, "learning_rate": 3.63573605819828e-08, "loss": 0.17129135131835938, "step": 6922 }, { "epoch": 0.9646763742771546, "grad_norm": 1.3726736307144165, "learning_rate": 3.6074214852405695e-08, "loss": 0.16307830810546875, "step": 6923 }, { "epoch": 0.964815717968369, "grad_norm": 1.3338451385498047, "learning_rate": 3.5792171993809244e-08, "loss": 0.15397262573242188, "step": 6924 }, { "epoch": 0.9649550616595833, "grad_norm": 0.561217725276947, "learning_rate": 3.55112320688572e-08, "loss": 0.10950469970703125, "step": 6925 }, { "epoch": 0.9650944053507977, "grad_norm": 1.020622730255127, "learning_rate": 3.523139513996798e-08, "loss": 0.15869522094726562, "step": 6926 }, { "epoch": 0.9652337490420121, "grad_norm": 0.8496119976043701, "learning_rate": 3.495266126931574e-08, "loss": 0.13122940063476562, "step": 6927 }, { "epoch": 0.9653730927332265, "grad_norm": 0.6614634990692139, "learning_rate": 3.467503051882815e-08, "loss": 0.12261199951171875, "step": 6928 }, { "epoch": 0.9655124364244408, "grad_norm": 1.0889257192611694, "learning_rate": 3.4398502950188096e-08, "loss": 0.13164901733398438, "step": 6929 }, { "epoch": 0.9656517801156552, "grad_norm": 1.4855985641479492, "learning_rate": 3.4123078624834214e-08, "loss": 0.19348907470703125, "step": 6930 }, { "epoch": 0.9657911238068696, "grad_norm": 0.885087251663208, "learning_rate": 3.384875760395978e-08, "loss": 0.12649917602539062, "step": 6931 }, { "epoch": 0.965930467498084, "grad_norm": 1.093185544013977, "learning_rate": 3.3575539948511595e-08, "loss": 0.15796279907226562, "step": 6932 }, { "epoch": 0.9660698111892984, "grad_norm": 1.2416574954986572, "learning_rate": 3.330342571919332e-08, "loss": 0.14744949340820312, "step": 6933 }, { "epoch": 0.9662091548805127, "grad_norm": 0.9009302258491516, "learning_rate": 3.30324149764627e-08, "loss": 0.1526165008544922, "step": 6934 }, { "epoch": 0.9663484985717271, "grad_norm": 1.1029706001281738, "learning_rate": 3.2762507780531026e-08, "loss": 0.13303756713867188, "step": 6935 }, { "epoch": 0.9664878422629416, "grad_norm": 1.110844373703003, "learning_rate": 3.249370419136644e-08, "loss": 0.15291213989257812, "step": 6936 }, { "epoch": 0.966627185954156, "grad_norm": 1.211211085319519, "learning_rate": 3.2226004268690605e-08, "loss": 0.16091537475585938, "step": 6937 }, { "epoch": 0.9667665296453704, "grad_norm": 1.2876372337341309, "learning_rate": 3.195940807198039e-08, "loss": 0.133575439453125, "step": 6938 }, { "epoch": 0.9669058733365847, "grad_norm": 1.17920982837677, "learning_rate": 3.169391566046731e-08, "loss": 0.1683502197265625, "step": 6939 }, { "epoch": 0.9670452170277991, "grad_norm": 0.5250743627548218, "learning_rate": 3.142952709313807e-08, "loss": 0.10626792907714844, "step": 6940 }, { "epoch": 0.9671845607190135, "grad_norm": 0.8733405470848083, "learning_rate": 3.116624242873345e-08, "loss": 0.13445281982421875, "step": 6941 }, { "epoch": 0.9673239044102279, "grad_norm": 0.8886290192604065, "learning_rate": 3.090406172574889e-08, "loss": 0.1461334228515625, "step": 6942 }, { "epoch": 0.9674632481014422, "grad_norm": 0.8909391164779663, "learning_rate": 3.064298504243612e-08, "loss": 0.14604759216308594, "step": 6943 }, { "epoch": 0.9676025917926566, "grad_norm": 0.6364779472351074, "learning_rate": 3.0383012436799306e-08, "loss": 0.1087799072265625, "step": 6944 }, { "epoch": 0.967741935483871, "grad_norm": 0.8365761041641235, "learning_rate": 3.0124143966599464e-08, "loss": 0.13291549682617188, "step": 6945 }, { "epoch": 0.9678812791750854, "grad_norm": 0.8281123042106628, "learning_rate": 2.9866379689350024e-08, "loss": 0.12989044189453125, "step": 6946 }, { "epoch": 0.9680206228662998, "grad_norm": 1.4029208421707153, "learning_rate": 2.9609719662320735e-08, "loss": 0.17555618286132812, "step": 6947 }, { "epoch": 0.9681599665575141, "grad_norm": 0.849494993686676, "learning_rate": 2.9354163942535983e-08, "loss": 0.12261009216308594, "step": 6948 }, { "epoch": 0.9682993102487285, "grad_norm": 1.287341594696045, "learning_rate": 2.90997125867748e-08, "loss": 0.152191162109375, "step": 6949 }, { "epoch": 0.9684386539399429, "grad_norm": 1.2142980098724365, "learning_rate": 2.8846365651569175e-08, "loss": 0.1523284912109375, "step": 6950 }, { "epoch": 0.9685779976311573, "grad_norm": 1.0085545778274536, "learning_rate": 2.8594123193207978e-08, "loss": 0.15925216674804688, "step": 6951 }, { "epoch": 0.9687173413223716, "grad_norm": 0.7875611782073975, "learning_rate": 2.83429852677336e-08, "loss": 0.10461044311523438, "step": 6952 }, { "epoch": 0.968856685013586, "grad_norm": 0.6400355100631714, "learning_rate": 2.809295193094308e-08, "loss": 0.11635971069335938, "step": 6953 }, { "epoch": 0.9689960287048004, "grad_norm": 1.2683429718017578, "learning_rate": 2.7844023238388084e-08, "loss": 0.15700912475585938, "step": 6954 }, { "epoch": 0.9691353723960148, "grad_norm": 1.386183738708496, "learning_rate": 2.759619924537438e-08, "loss": 0.1572723388671875, "step": 6955 }, { "epoch": 0.9692747160872291, "grad_norm": 0.5751063823699951, "learning_rate": 2.7349480006964023e-08, "loss": 0.12188720703125, "step": 6956 }, { "epoch": 0.9694140597784435, "grad_norm": 0.9093526005744934, "learning_rate": 2.7103865577970955e-08, "loss": 0.13982391357421875, "step": 6957 }, { "epoch": 0.9695534034696579, "grad_norm": 1.3549582958221436, "learning_rate": 2.6859356012965964e-08, "loss": 0.13977432250976562, "step": 6958 }, { "epoch": 0.9696927471608723, "grad_norm": 0.834677517414093, "learning_rate": 2.661595136627393e-08, "loss": 0.13952255249023438, "step": 6959 }, { "epoch": 0.9698320908520867, "grad_norm": 0.9246061444282532, "learning_rate": 2.63736516919727e-08, "loss": 0.13855361938476562, "step": 6960 }, { "epoch": 0.969971434543301, "grad_norm": 0.8534976840019226, "learning_rate": 2.6132457043896442e-08, "loss": 0.13199615478515625, "step": 6961 }, { "epoch": 0.9701107782345154, "grad_norm": 0.9037952423095703, "learning_rate": 2.589236747563284e-08, "loss": 0.13669967651367188, "step": 6962 }, { "epoch": 0.9702501219257298, "grad_norm": 0.5818400382995605, "learning_rate": 2.5653383040524228e-08, "loss": 0.1131744384765625, "step": 6963 }, { "epoch": 0.9703894656169442, "grad_norm": 0.9846943020820618, "learning_rate": 2.5415503791667573e-08, "loss": 0.13858795166015625, "step": 6964 }, { "epoch": 0.9705288093081585, "grad_norm": 0.8922971487045288, "learning_rate": 2.5178729781915046e-08, "loss": 0.13908004760742188, "step": 6965 }, { "epoch": 0.9706681529993729, "grad_norm": 0.847814679145813, "learning_rate": 2.4943061063870678e-08, "loss": 0.15024566650390625, "step": 6966 }, { "epoch": 0.9708074966905873, "grad_norm": 0.7409277558326721, "learning_rate": 2.4708497689896472e-08, "loss": 0.13130950927734375, "step": 6967 }, { "epoch": 0.9709468403818017, "grad_norm": 0.6289575099945068, "learning_rate": 2.4475039712105742e-08, "loss": 0.12395095825195312, "step": 6968 }, { "epoch": 0.9710861840730161, "grad_norm": 0.8508925437927246, "learning_rate": 2.4242687182368106e-08, "loss": 0.12595367431640625, "step": 6969 }, { "epoch": 0.9712255277642304, "grad_norm": 0.766044020652771, "learning_rate": 2.401144015230672e-08, "loss": 0.139007568359375, "step": 6970 }, { "epoch": 0.9713648714554448, "grad_norm": 1.1317036151885986, "learning_rate": 2.3781298673299924e-08, "loss": 0.14875411987304688, "step": 6971 }, { "epoch": 0.9715042151466592, "grad_norm": 0.8174901008605957, "learning_rate": 2.3552262796479042e-08, "loss": 0.12158584594726562, "step": 6972 }, { "epoch": 0.9716435588378736, "grad_norm": 0.7654085159301758, "learning_rate": 2.33243325727317e-08, "loss": 0.11316680908203125, "step": 6973 }, { "epoch": 0.9717829025290879, "grad_norm": 0.5661042332649231, "learning_rate": 2.3097508052697948e-08, "loss": 0.12149810791015625, "step": 6974 }, { "epoch": 0.9719222462203023, "grad_norm": 0.7990139126777649, "learning_rate": 2.2871789286773582e-08, "loss": 0.12573623657226562, "step": 6975 }, { "epoch": 0.9720615899115168, "grad_norm": 0.9744881391525269, "learning_rate": 2.264717632510738e-08, "loss": 0.14887237548828125, "step": 6976 }, { "epoch": 0.9722009336027312, "grad_norm": 1.232680082321167, "learning_rate": 2.2423669217604415e-08, "loss": 0.16168594360351562, "step": 6977 }, { "epoch": 0.9723402772939456, "grad_norm": 0.8642265200614929, "learning_rate": 2.220126801392164e-08, "loss": 0.1532135009765625, "step": 6978 }, { "epoch": 0.9724796209851599, "grad_norm": 1.2505898475646973, "learning_rate": 2.1979972763471747e-08, "loss": 0.14471054077148438, "step": 6979 }, { "epoch": 0.9726189646763743, "grad_norm": 1.377753496170044, "learning_rate": 2.1759783515422074e-08, "loss": 0.19237518310546875, "step": 6980 }, { "epoch": 0.9727583083675887, "grad_norm": 1.7106244564056396, "learning_rate": 2.1540700318693487e-08, "loss": 0.1694049835205078, "step": 6981 }, { "epoch": 0.9728976520588031, "grad_norm": 0.833490252494812, "learning_rate": 2.132272322196094e-08, "loss": 0.13513565063476562, "step": 6982 }, { "epoch": 0.9730369957500175, "grad_norm": 1.0827386379241943, "learning_rate": 2.110585227365458e-08, "loss": 0.15459060668945312, "step": 6983 }, { "epoch": 0.9731763394412318, "grad_norm": 0.8380700349807739, "learning_rate": 2.0890087521957536e-08, "loss": 0.13107681274414062, "step": 6984 }, { "epoch": 0.9733156831324462, "grad_norm": 0.8476294875144958, "learning_rate": 2.0675429014807568e-08, "loss": 0.12556076049804688, "step": 6985 }, { "epoch": 0.9734550268236606, "grad_norm": 0.9381924867630005, "learning_rate": 2.0461876799898196e-08, "loss": 0.1327228546142578, "step": 6986 }, { "epoch": 0.973594370514875, "grad_norm": 1.2616028785705566, "learning_rate": 2.024943092467424e-08, "loss": 0.1332683563232422, "step": 6987 }, { "epoch": 0.9737337142060893, "grad_norm": 0.7669104337692261, "learning_rate": 2.0038091436337392e-08, "loss": 0.14155960083007812, "step": 6988 }, { "epoch": 0.9738730578973037, "grad_norm": 0.8777995705604553, "learning_rate": 1.9827858381842312e-08, "loss": 0.14395904541015625, "step": 6989 }, { "epoch": 0.9740124015885181, "grad_norm": 0.905543327331543, "learning_rate": 1.961873180789775e-08, "loss": 0.1299591064453125, "step": 6990 }, { "epoch": 0.9741517452797325, "grad_norm": 0.8114557862281799, "learning_rate": 1.9410711760967092e-08, "loss": 0.12264251708984375, "step": 6991 }, { "epoch": 0.9742910889709469, "grad_norm": 0.7149608731269836, "learning_rate": 1.920379828726726e-08, "loss": 0.11399078369140625, "step": 6992 }, { "epoch": 0.9744304326621612, "grad_norm": 0.9732621312141418, "learning_rate": 1.8997991432769812e-08, "loss": 0.1409759521484375, "step": 6993 }, { "epoch": 0.9745697763533756, "grad_norm": 0.6190322637557983, "learning_rate": 1.8793291243200396e-08, "loss": 0.13472747802734375, "step": 6994 }, { "epoch": 0.97470912004459, "grad_norm": 0.8010445237159729, "learning_rate": 1.8589697764039295e-08, "loss": 0.13576889038085938, "step": 6995 }, { "epoch": 0.9748484637358044, "grad_norm": 1.1131389141082764, "learning_rate": 1.8387211040519216e-08, "loss": 0.15032577514648438, "step": 6996 }, { "epoch": 0.9749878074270187, "grad_norm": 0.9225115776062012, "learning_rate": 1.818583111762917e-08, "loss": 0.11827850341796875, "step": 6997 }, { "epoch": 0.9751271511182331, "grad_norm": 1.2778240442276, "learning_rate": 1.7985558040110594e-08, "loss": 0.14405059814453125, "step": 6998 }, { "epoch": 0.9752664948094475, "grad_norm": 1.0447083711624146, "learning_rate": 1.778639185245956e-08, "loss": 0.15871429443359375, "step": 6999 }, { "epoch": 0.9754058385006619, "grad_norm": 0.5695931315422058, "learning_rate": 1.758833259892623e-08, "loss": 0.11576080322265625, "step": 7000 }, { "epoch": 0.9755451821918762, "grad_norm": 1.0033905506134033, "learning_rate": 1.7391380323515395e-08, "loss": 0.13314056396484375, "step": 7001 }, { "epoch": 0.9756845258830906, "grad_norm": 0.9602681994438171, "learning_rate": 1.7195535069984838e-08, "loss": 0.14380645751953125, "step": 7002 }, { "epoch": 0.975823869574305, "grad_norm": 1.0189961194992065, "learning_rate": 1.700079688184697e-08, "loss": 0.16797637939453125, "step": 7003 }, { "epoch": 0.9759632132655194, "grad_norm": 1.0236191749572754, "learning_rate": 1.6807165802368297e-08, "loss": 0.148956298828125, "step": 7004 }, { "epoch": 0.9761025569567338, "grad_norm": 0.9068326950073242, "learning_rate": 1.661464187456885e-08, "loss": 0.12398910522460938, "step": 7005 }, { "epoch": 0.9762419006479481, "grad_norm": 1.0486280918121338, "learning_rate": 1.6423225141223854e-08, "loss": 0.15571975708007812, "step": 7006 }, { "epoch": 0.9763812443391625, "grad_norm": 0.719314455986023, "learning_rate": 1.623291564486096e-08, "loss": 0.11402320861816406, "step": 7007 }, { "epoch": 0.9765205880303769, "grad_norm": 1.4125248193740845, "learning_rate": 1.604371342776301e-08, "loss": 0.1515350341796875, "step": 7008 }, { "epoch": 0.9766599317215913, "grad_norm": 0.7604340314865112, "learning_rate": 1.585561853196582e-08, "loss": 0.12107086181640625, "step": 7009 }, { "epoch": 0.9767992754128056, "grad_norm": 0.8480497002601624, "learning_rate": 1.5668630999260968e-08, "loss": 0.14250946044921875, "step": 7010 }, { "epoch": 0.97693861910402, "grad_norm": 0.9477865695953369, "learning_rate": 1.5482750871191333e-08, "loss": 0.11493301391601562, "step": 7011 }, { "epoch": 0.9770779627952344, "grad_norm": 1.6325130462646484, "learning_rate": 1.529797818905665e-08, "loss": 0.17984962463378906, "step": 7012 }, { "epoch": 0.9772173064864488, "grad_norm": 0.8666277527809143, "learning_rate": 1.5114312993908532e-08, "loss": 0.11784172058105469, "step": 7013 }, { "epoch": 0.9773566501776632, "grad_norm": 0.9160754680633545, "learning_rate": 1.4931755326552667e-08, "loss": 0.15122604370117188, "step": 7014 }, { "epoch": 0.9774959938688775, "grad_norm": 0.8898971676826477, "learning_rate": 1.4750305227549943e-08, "loss": 0.13724136352539062, "step": 7015 }, { "epoch": 0.977635337560092, "grad_norm": 1.370010495185852, "learning_rate": 1.4569962737214228e-08, "loss": 0.16962242126464844, "step": 7016 }, { "epoch": 0.9777746812513064, "grad_norm": 1.2389808893203735, "learning_rate": 1.4390727895613465e-08, "loss": 0.132415771484375, "step": 7017 }, { "epoch": 0.9779140249425208, "grad_norm": 1.3316761255264282, "learning_rate": 1.4212600742569694e-08, "loss": 0.16596603393554688, "step": 7018 }, { "epoch": 0.9780533686337352, "grad_norm": 0.6713621020317078, "learning_rate": 1.4035581317658476e-08, "loss": 0.11277008056640625, "step": 7019 }, { "epoch": 0.9781927123249495, "grad_norm": 1.1257437467575073, "learning_rate": 1.3859669660209463e-08, "loss": 0.17914581298828125, "step": 7020 }, { "epoch": 0.9783320560161639, "grad_norm": 0.8100171089172363, "learning_rate": 1.368486580930639e-08, "loss": 0.12890625, "step": 7021 }, { "epoch": 0.9784713997073783, "grad_norm": 0.9181806445121765, "learning_rate": 1.3511169803786527e-08, "loss": 0.13415145874023438, "step": 7022 }, { "epoch": 0.9786107433985927, "grad_norm": 0.9216269254684448, "learning_rate": 1.333858168224178e-08, "loss": 0.1439971923828125, "step": 7023 }, { "epoch": 0.978750087089807, "grad_norm": 0.7295988202095032, "learning_rate": 1.3167101483016476e-08, "loss": 0.12270736694335938, "step": 7024 }, { "epoch": 0.9788894307810214, "grad_norm": 0.8473370671272278, "learning_rate": 1.2996729244209583e-08, "loss": 0.12182998657226562, "step": 7025 }, { "epoch": 0.9790287744722358, "grad_norm": 0.7150574922561646, "learning_rate": 1.282746500367471e-08, "loss": 0.11739349365234375, "step": 7026 }, { "epoch": 0.9791681181634502, "grad_norm": 1.4415630102157593, "learning_rate": 1.2659308799017889e-08, "loss": 0.192291259765625, "step": 7027 }, { "epoch": 0.9793074618546646, "grad_norm": 0.7481850385665894, "learning_rate": 1.2492260667599232e-08, "loss": 0.10908889770507812, "step": 7028 }, { "epoch": 0.9794468055458789, "grad_norm": 0.7075655460357666, "learning_rate": 1.2326320646534051e-08, "loss": 0.12086868286132812, "step": 7029 }, { "epoch": 0.9795861492370933, "grad_norm": 1.296706199645996, "learning_rate": 1.2161488772690077e-08, "loss": 0.141693115234375, "step": 7030 }, { "epoch": 0.9797254929283077, "grad_norm": 1.0970350503921509, "learning_rate": 1.1997765082688573e-08, "loss": 0.14104461669921875, "step": 7031 }, { "epoch": 0.9798648366195221, "grad_norm": 0.9031831622123718, "learning_rate": 1.1835149612905438e-08, "loss": 0.12797164916992188, "step": 7032 }, { "epoch": 0.9800041803107364, "grad_norm": 0.7380760908126831, "learning_rate": 1.1673642399470663e-08, "loss": 0.1272125244140625, "step": 7033 }, { "epoch": 0.9801435240019508, "grad_norm": 0.5833706855773926, "learning_rate": 1.1513243478267211e-08, "loss": 0.11452865600585938, "step": 7034 }, { "epoch": 0.9802828676931652, "grad_norm": 1.7822479009628296, "learning_rate": 1.135395288493213e-08, "loss": 0.1724872589111328, "step": 7035 }, { "epoch": 0.9804222113843796, "grad_norm": 0.6282265782356262, "learning_rate": 1.1195770654855443e-08, "loss": 0.12423515319824219, "step": 7036 }, { "epoch": 0.980561555075594, "grad_norm": 0.7153410911560059, "learning_rate": 1.1038696823182372e-08, "loss": 0.12616729736328125, "step": 7037 }, { "epoch": 0.9807008987668083, "grad_norm": 0.9266884922981262, "learning_rate": 1.088273142481111e-08, "loss": 0.13899993896484375, "step": 7038 }, { "epoch": 0.9808402424580227, "grad_norm": 1.099208116531372, "learning_rate": 1.0727874494393386e-08, "loss": 0.13124847412109375, "step": 7039 }, { "epoch": 0.9809795861492371, "grad_norm": 1.3574100732803345, "learning_rate": 1.0574126066335011e-08, "loss": 0.1659088134765625, "step": 7040 }, { "epoch": 0.9811189298404515, "grad_norm": 0.5481081604957581, "learning_rate": 1.0421486174795326e-08, "loss": 0.11852645874023438, "step": 7041 }, { "epoch": 0.9812582735316658, "grad_norm": 0.7082172632217407, "learning_rate": 1.0269954853687202e-08, "loss": 0.1157379150390625, "step": 7042 }, { "epoch": 0.9813976172228802, "grad_norm": 0.5915371179580688, "learning_rate": 1.01195321366776e-08, "loss": 0.10744476318359375, "step": 7043 }, { "epoch": 0.9815369609140946, "grad_norm": 1.4711241722106934, "learning_rate": 9.970218057187009e-09, "loss": 0.1334095001220703, "step": 7044 }, { "epoch": 0.981676304605309, "grad_norm": 0.9161356091499329, "learning_rate": 9.82201264839e-09, "loss": 0.14017486572265625, "step": 7045 }, { "epoch": 0.9818156482965233, "grad_norm": 0.8232769966125488, "learning_rate": 9.67491594321357e-09, "loss": 0.11521339416503906, "step": 7046 }, { "epoch": 0.9819549919877377, "grad_norm": 0.7893767356872559, "learning_rate": 9.528927974339908e-09, "loss": 0.12377357482910156, "step": 7047 }, { "epoch": 0.9820943356789521, "grad_norm": 1.0244101285934448, "learning_rate": 9.38404877420418e-09, "loss": 0.1512603759765625, "step": 7048 }, { "epoch": 0.9822336793701665, "grad_norm": 0.6359296441078186, "learning_rate": 9.240278374995637e-09, "loss": 0.10213851928710938, "step": 7049 }, { "epoch": 0.9823730230613809, "grad_norm": 0.935226559638977, "learning_rate": 9.097616808655396e-09, "loss": 0.157501220703125, "step": 7050 }, { "epoch": 0.9825123667525952, "grad_norm": 0.8232277631759644, "learning_rate": 8.95606410688088e-09, "loss": 0.13592529296875, "step": 7051 }, { "epoch": 0.9826517104438096, "grad_norm": 0.6824791431427002, "learning_rate": 8.815620301121375e-09, "loss": 0.12961196899414062, "step": 7052 }, { "epoch": 0.982791054135024, "grad_norm": 1.0708776712417603, "learning_rate": 8.676285422580255e-09, "loss": 0.1296062469482422, "step": 7053 }, { "epoch": 0.9829303978262384, "grad_norm": 0.9162082076072693, "learning_rate": 8.538059502214979e-09, "loss": 0.12313079833984375, "step": 7054 }, { "epoch": 0.9830697415174527, "grad_norm": 0.9056510925292969, "learning_rate": 8.400942570735427e-09, "loss": 0.13356399536132812, "step": 7055 }, { "epoch": 0.9832090852086672, "grad_norm": 0.8589694499969482, "learning_rate": 8.264934658606672e-09, "loss": 0.11826515197753906, "step": 7056 }, { "epoch": 0.9833484288998816, "grad_norm": 0.7537327408790588, "learning_rate": 8.13003579604621e-09, "loss": 0.12690353393554688, "step": 7057 }, { "epoch": 0.983487772591096, "grad_norm": 0.745009183883667, "learning_rate": 7.996246013025067e-09, "loss": 0.13077163696289062, "step": 7058 }, { "epoch": 0.9836271162823104, "grad_norm": 1.3144315481185913, "learning_rate": 7.863565339268908e-09, "loss": 0.14017486572265625, "step": 7059 }, { "epoch": 0.9837664599735247, "grad_norm": 0.9089825749397278, "learning_rate": 7.731993804256378e-09, "loss": 0.1365509033203125, "step": 7060 }, { "epoch": 0.9839058036647391, "grad_norm": 0.6227002739906311, "learning_rate": 7.60153143721909e-09, "loss": 0.10655593872070312, "step": 7061 }, { "epoch": 0.9840451473559535, "grad_norm": 0.9379951357841492, "learning_rate": 7.472178267143304e-09, "loss": 0.1476593017578125, "step": 7062 }, { "epoch": 0.9841844910471679, "grad_norm": 1.214293122291565, "learning_rate": 7.343934322767699e-09, "loss": 0.16666793823242188, "step": 7063 }, { "epoch": 0.9843238347383823, "grad_norm": 0.831813633441925, "learning_rate": 7.216799632586147e-09, "loss": 0.13726043701171875, "step": 7064 }, { "epoch": 0.9844631784295966, "grad_norm": 0.6321477890014648, "learning_rate": 7.0907742248443875e-09, "loss": 0.12317657470703125, "step": 7065 }, { "epoch": 0.984602522120811, "grad_norm": 0.7403538823127747, "learning_rate": 6.965858127542247e-09, "loss": 0.1352977752685547, "step": 7066 }, { "epoch": 0.9847418658120254, "grad_norm": 0.9079886078834534, "learning_rate": 6.842051368433633e-09, "loss": 0.13921356201171875, "step": 7067 }, { "epoch": 0.9848812095032398, "grad_norm": 1.5327057838439941, "learning_rate": 6.719353975025989e-09, "loss": 0.13111495971679688, "step": 7068 }, { "epoch": 0.9850205531944541, "grad_norm": 0.8488468527793884, "learning_rate": 6.5977659745786185e-09, "loss": 0.1343536376953125, "step": 7069 }, { "epoch": 0.9851598968856685, "grad_norm": 1.3240388631820679, "learning_rate": 6.477287394107134e-09, "loss": 0.16831588745117188, "step": 7070 }, { "epoch": 0.9852992405768829, "grad_norm": 1.2417588233947754, "learning_rate": 6.357918260377349e-09, "loss": 0.18682479858398438, "step": 7071 }, { "epoch": 0.9854385842680973, "grad_norm": 0.6235087513923645, "learning_rate": 6.239658599911935e-09, "loss": 0.110443115234375, "step": 7072 }, { "epoch": 0.9855779279593117, "grad_norm": 0.7411391139030457, "learning_rate": 6.122508438984875e-09, "loss": 0.116058349609375, "step": 7073 }, { "epoch": 0.985717271650526, "grad_norm": 1.5365135669708252, "learning_rate": 6.0064678036242385e-09, "loss": 0.17166900634765625, "step": 7074 }, { "epoch": 0.9858566153417404, "grad_norm": 0.7892328500747681, "learning_rate": 5.891536719611624e-09, "loss": 0.1187896728515625, "step": 7075 }, { "epoch": 0.9859959590329548, "grad_norm": 1.637088656425476, "learning_rate": 5.77771521248216e-09, "loss": 0.1761627197265625, "step": 7076 }, { "epoch": 0.9861353027241692, "grad_norm": 1.338351845741272, "learning_rate": 5.665003307524508e-09, "loss": 0.15126419067382812, "step": 7077 }, { "epoch": 0.9862746464153835, "grad_norm": 0.7813568711280823, "learning_rate": 5.5534010297803034e-09, "loss": 0.10732078552246094, "step": 7078 }, { "epoch": 0.9864139901065979, "grad_norm": 1.2804409265518188, "learning_rate": 5.4429084040452665e-09, "loss": 0.158233642578125, "step": 7079 }, { "epoch": 0.9865533337978123, "grad_norm": 0.6356475353240967, "learning_rate": 5.333525454868094e-09, "loss": 0.11881828308105469, "step": 7080 }, { "epoch": 0.9866926774890267, "grad_norm": 0.8437211513519287, "learning_rate": 5.225252206551568e-09, "loss": 0.12664031982421875, "step": 7081 }, { "epoch": 0.986832021180241, "grad_norm": 1.3465102910995483, "learning_rate": 5.118088683151445e-09, "loss": 0.15815353393554688, "step": 7082 }, { "epoch": 0.9869713648714554, "grad_norm": 0.8408297300338745, "learning_rate": 5.01203490847646e-09, "loss": 0.14340972900390625, "step": 7083 }, { "epoch": 0.9871107085626698, "grad_norm": 0.8821683526039124, "learning_rate": 4.907090906090539e-09, "loss": 0.1493072509765625, "step": 7084 }, { "epoch": 0.9872500522538842, "grad_norm": 0.9138985872268677, "learning_rate": 4.803256699308923e-09, "loss": 0.13921356201171875, "step": 7085 }, { "epoch": 0.9873893959450986, "grad_norm": 1.026130199432373, "learning_rate": 4.700532311200934e-09, "loss": 0.150238037109375, "step": 7086 }, { "epoch": 0.9875287396363129, "grad_norm": 1.2213013172149658, "learning_rate": 4.598917764590538e-09, "loss": 0.15485763549804688, "step": 7087 }, { "epoch": 0.9876680833275273, "grad_norm": 0.9614902138710022, "learning_rate": 4.498413082053566e-09, "loss": 0.13384246826171875, "step": 7088 }, { "epoch": 0.9878074270187417, "grad_norm": 1.1097369194030762, "learning_rate": 4.399018285919376e-09, "loss": 0.1504688262939453, "step": 7089 }, { "epoch": 0.9879467707099561, "grad_norm": 0.9738209843635559, "learning_rate": 4.300733398272528e-09, "loss": 0.1308746337890625, "step": 7090 }, { "epoch": 0.9880861144011704, "grad_norm": 1.0717672109603882, "learning_rate": 4.203558440948885e-09, "loss": 0.1541614532470703, "step": 7091 }, { "epoch": 0.9882254580923848, "grad_norm": 0.7673239707946777, "learning_rate": 4.1074934355384015e-09, "loss": 0.12671661376953125, "step": 7092 }, { "epoch": 0.9883648017835992, "grad_norm": 0.9230731129646301, "learning_rate": 4.0125384033845586e-09, "loss": 0.11856842041015625, "step": 7093 }, { "epoch": 0.9885041454748136, "grad_norm": 1.0456135272979736, "learning_rate": 3.91869336558437e-09, "loss": 0.1684112548828125, "step": 7094 }, { "epoch": 0.988643489166028, "grad_norm": 1.1006405353546143, "learning_rate": 3.8259583429883785e-09, "loss": 0.14425277709960938, "step": 7095 }, { "epoch": 0.9887828328572423, "grad_norm": 0.7569211721420288, "learning_rate": 3.734333356199548e-09, "loss": 0.11986541748046875, "step": 7096 }, { "epoch": 0.9889221765484568, "grad_norm": 0.8263541460037231, "learning_rate": 3.643818425575485e-09, "loss": 0.14043617248535156, "step": 7097 }, { "epoch": 0.9890615202396712, "grad_norm": 0.8507075905799866, "learning_rate": 3.5544135712262116e-09, "loss": 0.13233184814453125, "step": 7098 }, { "epoch": 0.9892008639308856, "grad_norm": 1.0879287719726562, "learning_rate": 3.4661188130147295e-09, "loss": 0.14198684692382812, "step": 7099 }, { "epoch": 0.9893402076221, "grad_norm": 1.1113309860229492, "learning_rate": 3.378934170559789e-09, "loss": 0.15576171875, "step": 7100 }, { "epoch": 0.9894795513133143, "grad_norm": 1.0162715911865234, "learning_rate": 3.292859663230341e-09, "loss": 0.13998031616210938, "step": 7101 }, { "epoch": 0.9896188950045287, "grad_norm": 0.7450385093688965, "learning_rate": 3.207895310150533e-09, "loss": 0.13325881958007812, "step": 7102 }, { "epoch": 0.9897582386957431, "grad_norm": 1.4929523468017578, "learning_rate": 3.1240411301980413e-09, "loss": 0.1852264404296875, "step": 7103 }, { "epoch": 0.9898975823869575, "grad_norm": 0.8237869739532471, "learning_rate": 3.0412971420029636e-09, "loss": 0.12700653076171875, "step": 7104 }, { "epoch": 0.9900369260781718, "grad_norm": 0.8737243413925171, "learning_rate": 2.959663363949483e-09, "loss": 0.13315963745117188, "step": 7105 }, { "epoch": 0.9901762697693862, "grad_norm": 0.7753551006317139, "learning_rate": 2.8791398141736484e-09, "loss": 0.14306259155273438, "step": 7106 }, { "epoch": 0.9903156134606006, "grad_norm": 1.2200895547866821, "learning_rate": 2.799726510567258e-09, "loss": 0.18133163452148438, "step": 7107 }, { "epoch": 0.990454957151815, "grad_norm": 0.7911178469657898, "learning_rate": 2.721423470773421e-09, "loss": 0.11389541625976562, "step": 7108 }, { "epoch": 0.9905943008430294, "grad_norm": 0.9530290365219116, "learning_rate": 2.644230712189888e-09, "loss": 0.1372833251953125, "step": 7109 }, { "epoch": 0.9907336445342437, "grad_norm": 0.794988214969635, "learning_rate": 2.5681482519662736e-09, "loss": 0.12788772583007812, "step": 7110 }, { "epoch": 0.9908729882254581, "grad_norm": 0.6980516314506531, "learning_rate": 2.493176107006834e-09, "loss": 0.12554359436035156, "step": 7111 }, { "epoch": 0.9910123319166725, "grad_norm": 0.8745511770248413, "learning_rate": 2.4193142939687996e-09, "loss": 0.13482666015625, "step": 7112 }, { "epoch": 0.9911516756078869, "grad_norm": 1.092488408088684, "learning_rate": 2.3465628292623776e-09, "loss": 0.1413726806640625, "step": 7113 }, { "epoch": 0.9912910192991012, "grad_norm": 1.0209332704544067, "learning_rate": 2.2749217290513048e-09, "loss": 0.16254425048828125, "step": 7114 }, { "epoch": 0.9914303629903156, "grad_norm": 0.8850387334823608, "learning_rate": 2.2043910092522935e-09, "loss": 0.14593887329101562, "step": 7115 }, { "epoch": 0.99156970668153, "grad_norm": 1.252876877784729, "learning_rate": 2.134970685536697e-09, "loss": 0.18941879272460938, "step": 7116 }, { "epoch": 0.9917090503727444, "grad_norm": 0.9681565165519714, "learning_rate": 2.066660773326623e-09, "loss": 0.12291717529296875, "step": 7117 }, { "epoch": 0.9918483940639587, "grad_norm": 0.8377609252929688, "learning_rate": 1.999461287800486e-09, "loss": 0.14577484130859375, "step": 7118 }, { "epoch": 0.9919877377551731, "grad_norm": 1.0339592695236206, "learning_rate": 1.9333722438874548e-09, "loss": 0.13835525512695312, "step": 7119 }, { "epoch": 0.9921270814463875, "grad_norm": 0.7999751567840576, "learning_rate": 1.868393656271339e-09, "loss": 0.11729049682617188, "step": 7120 }, { "epoch": 0.9922664251376019, "grad_norm": 1.4002420902252197, "learning_rate": 1.8045255393889238e-09, "loss": 0.1614532470703125, "step": 7121 }, { "epoch": 0.9924057688288163, "grad_norm": 0.8105090260505676, "learning_rate": 1.7417679074299698e-09, "loss": 0.14126968383789062, "step": 7122 }, { "epoch": 0.9925451125200306, "grad_norm": 0.7580491900444031, "learning_rate": 1.680120774338323e-09, "loss": 0.11705398559570312, "step": 7123 }, { "epoch": 0.992684456211245, "grad_norm": 0.7834908366203308, "learning_rate": 1.6195841538096947e-09, "loss": 0.13100051879882812, "step": 7124 }, { "epoch": 0.9928237999024594, "grad_norm": 0.8395032286643982, "learning_rate": 1.5601580592949916e-09, "loss": 0.13236618041992188, "step": 7125 }, { "epoch": 0.9929631435936738, "grad_norm": 0.7559058666229248, "learning_rate": 1.5018425039969864e-09, "loss": 0.12850189208984375, "step": 7126 }, { "epoch": 0.9931024872848881, "grad_norm": 0.8839855194091797, "learning_rate": 1.4446375008714264e-09, "loss": 0.1411590576171875, "step": 7127 }, { "epoch": 0.9932418309761025, "grad_norm": 0.9011359810829163, "learning_rate": 1.3885430626287e-09, "loss": 0.1311969757080078, "step": 7128 }, { "epoch": 0.9933811746673169, "grad_norm": 0.7938371300697327, "learning_rate": 1.3335592017316156e-09, "loss": 0.139373779296875, "step": 7129 }, { "epoch": 0.9935205183585313, "grad_norm": 0.6185827851295471, "learning_rate": 1.2796859303959575e-09, "loss": 0.11087608337402344, "step": 7130 }, { "epoch": 0.9936598620497457, "grad_norm": 0.5377659201622009, "learning_rate": 1.2269232605915948e-09, "loss": 0.09711074829101562, "step": 7131 }, { "epoch": 0.99379920574096, "grad_norm": 1.2349613904953003, "learning_rate": 1.1752712040408176e-09, "loss": 0.15224838256835938, "step": 7132 }, { "epoch": 0.9939385494321744, "grad_norm": 0.7735406756401062, "learning_rate": 1.124729772219446e-09, "loss": 0.1351165771484375, "step": 7133 }, { "epoch": 0.9940778931233888, "grad_norm": 0.9416895508766174, "learning_rate": 1.075298976356831e-09, "loss": 0.14171981811523438, "step": 7134 }, { "epoch": 0.9942172368146032, "grad_norm": 1.1494790315628052, "learning_rate": 1.026978827435854e-09, "loss": 0.160125732421875, "step": 7135 }, { "epoch": 0.9943565805058175, "grad_norm": 0.890387237071991, "learning_rate": 9.797693361912607e-10, "loss": 0.13290977478027344, "step": 7136 }, { "epoch": 0.994495924197032, "grad_norm": 0.7589311599731445, "learning_rate": 9.33670513112439e-10, "loss": 0.14990615844726562, "step": 7137 }, { "epoch": 0.9946352678882464, "grad_norm": 1.2802090644836426, "learning_rate": 8.886823684417512e-10, "loss": 0.14727020263671875, "step": 7138 }, { "epoch": 0.9947746115794608, "grad_norm": 1.5277799367904663, "learning_rate": 8.448049121739798e-10, "loss": 0.13438796997070312, "step": 7139 }, { "epoch": 0.9949139552706752, "grad_norm": 1.492636799812317, "learning_rate": 8.020381540579936e-10, "loss": 0.17812728881835938, "step": 7140 }, { "epoch": 0.9950532989618895, "grad_norm": 0.9297862648963928, "learning_rate": 7.603821035950809e-10, "loss": 0.15232086181640625, "step": 7141 }, { "epoch": 0.9951926426531039, "grad_norm": 1.1156057119369507, "learning_rate": 7.198367700411712e-10, "loss": 0.13721656799316406, "step": 7142 }, { "epoch": 0.9953319863443183, "grad_norm": 1.4561494588851929, "learning_rate": 6.80402162403504e-10, "loss": 0.13843154907226562, "step": 7143 }, { "epoch": 0.9954713300355327, "grad_norm": 1.25209641456604, "learning_rate": 6.420782894445144e-10, "loss": 0.15613555908203125, "step": 7144 }, { "epoch": 0.995610673726747, "grad_norm": 0.7390438914299011, "learning_rate": 6.048651596785027e-10, "loss": 0.11433219909667969, "step": 7145 }, { "epoch": 0.9957500174179614, "grad_norm": 0.7593472599983215, "learning_rate": 5.687627813727448e-10, "loss": 0.13330841064453125, "step": 7146 }, { "epoch": 0.9958893611091758, "grad_norm": 0.8544654846191406, "learning_rate": 5.337711625497122e-10, "loss": 0.13139724731445312, "step": 7147 }, { "epoch": 0.9960287048003902, "grad_norm": 0.8806941509246826, "learning_rate": 4.998903109826314e-10, "loss": 0.12530899047851562, "step": 7148 }, { "epoch": 0.9961680484916046, "grad_norm": 1.6819846630096436, "learning_rate": 4.671202341993697e-10, "loss": 0.16350173950195312, "step": 7149 }, { "epoch": 0.9963073921828189, "grad_norm": 1.1212114095687866, "learning_rate": 4.354609394802145e-10, "loss": 0.188140869140625, "step": 7150 }, { "epoch": 0.9964467358740333, "grad_norm": 0.7340941429138184, "learning_rate": 4.0491243386009403e-10, "loss": 0.14007949829101562, "step": 7151 }, { "epoch": 0.9965860795652477, "grad_norm": 0.9333603978157043, "learning_rate": 3.7547472412580167e-10, "loss": 0.12584686279296875, "step": 7152 }, { "epoch": 0.9967254232564621, "grad_norm": 0.8485683798789978, "learning_rate": 3.471478168176612e-10, "loss": 0.12685394287109375, "step": 7153 }, { "epoch": 0.9968647669476765, "grad_norm": 0.6744230389595032, "learning_rate": 3.19931718229527e-10, "loss": 0.1275196075439453, "step": 7154 }, { "epoch": 0.9970041106388908, "grad_norm": 0.8410812616348267, "learning_rate": 2.9382643440767354e-10, "loss": 0.128936767578125, "step": 7155 }, { "epoch": 0.9971434543301052, "grad_norm": 1.0100016593933105, "learning_rate": 2.6883197115190606e-10, "loss": 0.15123367309570312, "step": 7156 }, { "epoch": 0.9972827980213196, "grad_norm": 0.7430000901222229, "learning_rate": 2.4494833401667027e-10, "loss": 0.12113571166992188, "step": 7157 }, { "epoch": 0.997422141712534, "grad_norm": 0.9948369264602661, "learning_rate": 2.2217552830716693e-10, "loss": 0.12982940673828125, "step": 7158 }, { "epoch": 0.9975614854037483, "grad_norm": 1.0033162832260132, "learning_rate": 2.0051355908323743e-10, "loss": 0.14682388305664062, "step": 7159 }, { "epoch": 0.9977008290949627, "grad_norm": 1.079065203666687, "learning_rate": 1.7996243115769863e-10, "loss": 0.13365554809570312, "step": 7160 }, { "epoch": 0.9978401727861771, "grad_norm": 0.6164223551750183, "learning_rate": 1.605221490968978e-10, "loss": 0.12080764770507812, "step": 7161 }, { "epoch": 0.9979795164773915, "grad_norm": 1.3443260192871094, "learning_rate": 1.421927172201576e-10, "loss": 0.17339324951171875, "step": 7162 }, { "epoch": 0.9981188601686058, "grad_norm": 1.2024661302566528, "learning_rate": 1.24974139599221e-10, "loss": 0.14928054809570312, "step": 7163 }, { "epoch": 0.9982582038598202, "grad_norm": 2.271519899368286, "learning_rate": 1.0886642005991654e-10, "loss": 0.15433502197265625, "step": 7164 }, { "epoch": 0.9983975475510346, "grad_norm": 0.5916943550109863, "learning_rate": 9.386956218104815e-11, "loss": 0.10861587524414062, "step": 7165 }, { "epoch": 0.998536891242249, "grad_norm": 1.068396806716919, "learning_rate": 7.998356929439511e-11, "loss": 0.13859176635742188, "step": 7166 }, { "epoch": 0.9986762349334634, "grad_norm": 0.861706554889679, "learning_rate": 6.72084444852672e-11, "loss": 0.12215423583984375, "step": 7167 }, { "epoch": 0.9988155786246777, "grad_norm": 0.4580114483833313, "learning_rate": 5.554419059250471e-11, "loss": 0.1006011962890625, "step": 7168 }, { "epoch": 0.9989549223158921, "grad_norm": 1.272417664527893, "learning_rate": 4.499081020681306e-11, "loss": 0.1990680694580078, "step": 7169 }, { "epoch": 0.9990942660071065, "grad_norm": 1.9550714492797852, "learning_rate": 3.554830567298328e-11, "loss": 0.18977928161621094, "step": 7170 }, { "epoch": 0.9992336096983209, "grad_norm": 0.819919764995575, "learning_rate": 2.7216679089892008e-11, "loss": 0.12973403930664062, "step": 7171 }, { "epoch": 0.9993729533895352, "grad_norm": 0.6745305061340332, "learning_rate": 1.9995932307170783e-11, "loss": 0.11665725708007812, "step": 7172 }, { "epoch": 0.9995122970807496, "grad_norm": 1.0938024520874023, "learning_rate": 1.3886066930202113e-11, "loss": 0.15660476684570312, "step": 7173 }, { "epoch": 0.999651640771964, "grad_norm": 1.0001366138458252, "learning_rate": 8.88708431623364e-12, "loss": 0.12456703186035156, "step": 7174 }, { "epoch": 0.9997909844631784, "grad_norm": 1.098741054534912, "learning_rate": 4.998985576043503e-12, "loss": 0.14303016662597656, "step": 7175 }, { "epoch": 0.9999303281543928, "grad_norm": 1.2043172121047974, "learning_rate": 2.2217715728301003e-12, "loss": 0.1626129150390625, "step": 7176 }, { "epoch": 1.0, "grad_norm": 1.5785499811172485, "learning_rate": 5.554429238774361e-13, "loss": 0.18517303466796875, "step": 7177 }, { "epoch": 1.0, "step": 7177, "total_flos": 5.055987703972102e+19, "train_loss": 0.0823975771071769, "train_runtime": 41428.5184, "train_samples_per_second": 44.345, "train_steps_per_second": 0.173 } ], "logging_steps": 1.0, "max_steps": 7177, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 1000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 5.055987703972102e+19, "train_batch_size": 4, "trial_name": null, "trial_params": null }