{ "best_metric": null, "best_model_checkpoint": null, "epoch": 2.9991577765300392, "eval_steps": 500, "global_step": 3561, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0008422234699606962, "grad_norm": 30.991933822631836, "learning_rate": 2.8011204481792718e-08, "loss": 4.9658, "step": 1 }, { "epoch": 0.0016844469399213925, "grad_norm": 32.37934112548828, "learning_rate": 5.6022408963585437e-08, "loss": 5.3825, "step": 2 }, { "epoch": 0.0025266704098820887, "grad_norm": 31.8385009765625, "learning_rate": 8.403361344537815e-08, "loss": 4.9937, "step": 3 }, { "epoch": 0.003368893879842785, "grad_norm": 31.4580078125, "learning_rate": 1.1204481792717087e-07, "loss": 4.9687, "step": 4 }, { "epoch": 0.004211117349803481, "grad_norm": 31.649675369262695, "learning_rate": 1.400560224089636e-07, "loss": 5.0928, "step": 5 }, { "epoch": 0.0050533408197641775, "grad_norm": 30.02669334411621, "learning_rate": 1.680672268907563e-07, "loss": 4.5973, "step": 6 }, { "epoch": 0.005895564289724873, "grad_norm": 33.28166198730469, "learning_rate": 1.9607843137254904e-07, "loss": 5.721, "step": 7 }, { "epoch": 0.00673778775968557, "grad_norm": 34.23237609863281, "learning_rate": 2.2408963585434175e-07, "loss": 5.8201, "step": 8 }, { "epoch": 0.007580011229646266, "grad_norm": 32.18265151977539, "learning_rate": 2.5210084033613445e-07, "loss": 5.0691, "step": 9 }, { "epoch": 0.008422234699606962, "grad_norm": 30.266523361206055, "learning_rate": 2.801120448179272e-07, "loss": 4.7771, "step": 10 }, { "epoch": 0.009264458169567658, "grad_norm": 29.57845115661621, "learning_rate": 3.081232492997199e-07, "loss": 4.7677, "step": 11 }, { "epoch": 0.010106681639528355, "grad_norm": 30.926292419433594, "learning_rate": 3.361344537815126e-07, "loss": 5.0789, "step": 12 }, { "epoch": 0.010948905109489052, "grad_norm": 30.451416015625, "learning_rate": 3.641456582633054e-07, "loss": 4.8971, "step": 13 }, { "epoch": 0.011791128579449747, "grad_norm": 32.91046905517578, "learning_rate": 3.921568627450981e-07, "loss": 5.1747, "step": 14 }, { "epoch": 0.012633352049410443, "grad_norm": 27.805429458618164, "learning_rate": 4.201680672268908e-07, "loss": 4.2511, "step": 15 }, { "epoch": 0.01347557551937114, "grad_norm": 30.205392837524414, "learning_rate": 4.481792717086835e-07, "loss": 4.687, "step": 16 }, { "epoch": 0.014317798989331837, "grad_norm": 29.482707977294922, "learning_rate": 4.7619047619047623e-07, "loss": 4.749, "step": 17 }, { "epoch": 0.015160022459292532, "grad_norm": 30.675325393676758, "learning_rate": 5.042016806722689e-07, "loss": 4.8769, "step": 18 }, { "epoch": 0.016002245929253228, "grad_norm": 29.693439483642578, "learning_rate": 5.322128851540616e-07, "loss": 4.7735, "step": 19 }, { "epoch": 0.016844469399213923, "grad_norm": 28.60666275024414, "learning_rate": 5.602240896358544e-07, "loss": 4.4744, "step": 20 }, { "epoch": 0.01768669286917462, "grad_norm": 29.011796951293945, "learning_rate": 5.882352941176471e-07, "loss": 4.7373, "step": 21 }, { "epoch": 0.018528916339135316, "grad_norm": 29.433164596557617, "learning_rate": 6.162464985994398e-07, "loss": 4.8206, "step": 22 }, { "epoch": 0.019371139809096015, "grad_norm": 31.936656951904297, "learning_rate": 6.442577030812325e-07, "loss": 5.1623, "step": 23 }, { "epoch": 0.02021336327905671, "grad_norm": 29.96930694580078, "learning_rate": 6.722689075630252e-07, "loss": 4.8289, "step": 24 }, { "epoch": 0.021055586749017405, "grad_norm": 29.24088478088379, "learning_rate": 7.002801120448179e-07, "loss": 4.6245, "step": 25 }, { "epoch": 0.021897810218978103, "grad_norm": 26.089712142944336, "learning_rate": 7.282913165266108e-07, "loss": 4.2817, "step": 26 }, { "epoch": 0.022740033688938798, "grad_norm": 26.193500518798828, "learning_rate": 7.563025210084034e-07, "loss": 4.2326, "step": 27 }, { "epoch": 0.023582257158899493, "grad_norm": 26.907108306884766, "learning_rate": 7.843137254901962e-07, "loss": 4.2285, "step": 28 }, { "epoch": 0.02442448062886019, "grad_norm": 26.67288589477539, "learning_rate": 8.123249299719889e-07, "loss": 4.3059, "step": 29 }, { "epoch": 0.025266704098820886, "grad_norm": 26.997589111328125, "learning_rate": 8.403361344537816e-07, "loss": 4.2297, "step": 30 }, { "epoch": 0.026108927568781585, "grad_norm": 24.931304931640625, "learning_rate": 8.683473389355742e-07, "loss": 3.8221, "step": 31 }, { "epoch": 0.02695115103874228, "grad_norm": 26.835569381713867, "learning_rate": 8.96358543417367e-07, "loss": 4.1937, "step": 32 }, { "epoch": 0.027793374508702975, "grad_norm": 25.200382232666016, "learning_rate": 9.243697478991598e-07, "loss": 3.8944, "step": 33 }, { "epoch": 0.028635597978663673, "grad_norm": 24.978811264038086, "learning_rate": 9.523809523809525e-07, "loss": 3.8302, "step": 34 }, { "epoch": 0.029477821448624368, "grad_norm": 17.974613189697266, "learning_rate": 9.80392156862745e-07, "loss": 3.1628, "step": 35 }, { "epoch": 0.030320044918585063, "grad_norm": 14.774263381958008, "learning_rate": 1.0084033613445378e-06, "loss": 2.8232, "step": 36 }, { "epoch": 0.03116226838854576, "grad_norm": 14.507879257202148, "learning_rate": 1.0364145658263308e-06, "loss": 2.7378, "step": 37 }, { "epoch": 0.032004491858506456, "grad_norm": 14.030415534973145, "learning_rate": 1.0644257703081233e-06, "loss": 2.7958, "step": 38 }, { "epoch": 0.032846715328467155, "grad_norm": 13.379121780395508, "learning_rate": 1.092436974789916e-06, "loss": 2.702, "step": 39 }, { "epoch": 0.033688938798427846, "grad_norm": 14.61095905303955, "learning_rate": 1.1204481792717088e-06, "loss": 3.0312, "step": 40 }, { "epoch": 0.034531162268388545, "grad_norm": 12.001578330993652, "learning_rate": 1.1484593837535015e-06, "loss": 2.5911, "step": 41 }, { "epoch": 0.03537338573834924, "grad_norm": 10.606494903564453, "learning_rate": 1.1764705882352942e-06, "loss": 2.5982, "step": 42 }, { "epoch": 0.03621560920830994, "grad_norm": 9.748920440673828, "learning_rate": 1.204481792717087e-06, "loss": 2.5818, "step": 43 }, { "epoch": 0.03705783267827063, "grad_norm": 8.774176597595215, "learning_rate": 1.2324929971988797e-06, "loss": 2.4245, "step": 44 }, { "epoch": 0.03790005614823133, "grad_norm": 8.072068214416504, "learning_rate": 1.2605042016806724e-06, "loss": 2.7158, "step": 45 }, { "epoch": 0.03874227961819203, "grad_norm": 6.882115364074707, "learning_rate": 1.288515406162465e-06, "loss": 2.5097, "step": 46 }, { "epoch": 0.03958450308815272, "grad_norm": 7.557218074798584, "learning_rate": 1.316526610644258e-06, "loss": 2.3914, "step": 47 }, { "epoch": 0.04042672655811342, "grad_norm": 6.377716064453125, "learning_rate": 1.3445378151260504e-06, "loss": 2.0522, "step": 48 }, { "epoch": 0.04126895002807412, "grad_norm": 8.32819938659668, "learning_rate": 1.3725490196078434e-06, "loss": 2.4231, "step": 49 }, { "epoch": 0.04211117349803481, "grad_norm": 4.25623083114624, "learning_rate": 1.4005602240896359e-06, "loss": 1.9766, "step": 50 }, { "epoch": 0.04295339696799551, "grad_norm": 4.567144870758057, "learning_rate": 1.4285714285714286e-06, "loss": 2.3313, "step": 51 }, { "epoch": 0.043795620437956206, "grad_norm": 3.063164710998535, "learning_rate": 1.4565826330532216e-06, "loss": 1.9683, "step": 52 }, { "epoch": 0.0446378439079169, "grad_norm": 2.819375991821289, "learning_rate": 1.484593837535014e-06, "loss": 2.0484, "step": 53 }, { "epoch": 0.045480067377877596, "grad_norm": 2.9028871059417725, "learning_rate": 1.5126050420168068e-06, "loss": 2.1671, "step": 54 }, { "epoch": 0.046322290847838295, "grad_norm": 2.9542436599731445, "learning_rate": 1.5406162464985996e-06, "loss": 2.0937, "step": 55 }, { "epoch": 0.047164514317798986, "grad_norm": 2.782745599746704, "learning_rate": 1.5686274509803923e-06, "loss": 2.1805, "step": 56 }, { "epoch": 0.048006737787759685, "grad_norm": 3.511082410812378, "learning_rate": 1.5966386554621848e-06, "loss": 2.4903, "step": 57 }, { "epoch": 0.04884896125772038, "grad_norm": 2.4524970054626465, "learning_rate": 1.6246498599439778e-06, "loss": 1.9566, "step": 58 }, { "epoch": 0.04969118472768108, "grad_norm": 2.2930288314819336, "learning_rate": 1.6526610644257705e-06, "loss": 1.804, "step": 59 }, { "epoch": 0.05053340819764177, "grad_norm": 2.505305290222168, "learning_rate": 1.6806722689075632e-06, "loss": 2.0736, "step": 60 }, { "epoch": 0.05137563166760247, "grad_norm": 2.336763381958008, "learning_rate": 1.708683473389356e-06, "loss": 1.8074, "step": 61 }, { "epoch": 0.05221785513756317, "grad_norm": 1.6546568870544434, "learning_rate": 1.7366946778711485e-06, "loss": 1.5967, "step": 62 }, { "epoch": 0.05306007860752386, "grad_norm": 2.142577886581421, "learning_rate": 1.7647058823529414e-06, "loss": 1.8236, "step": 63 }, { "epoch": 0.05390230207748456, "grad_norm": 1.8201619386672974, "learning_rate": 1.792717086834734e-06, "loss": 1.6296, "step": 64 }, { "epoch": 0.05474452554744526, "grad_norm": 1.9650517702102661, "learning_rate": 1.8207282913165267e-06, "loss": 1.7321, "step": 65 }, { "epoch": 0.05558674901740595, "grad_norm": 1.8311152458190918, "learning_rate": 1.8487394957983196e-06, "loss": 1.8154, "step": 66 }, { "epoch": 0.05642897248736665, "grad_norm": 1.7876816987991333, "learning_rate": 1.8767507002801122e-06, "loss": 1.7079, "step": 67 }, { "epoch": 0.057271195957327346, "grad_norm": 1.5612218379974365, "learning_rate": 1.904761904761905e-06, "loss": 1.5344, "step": 68 }, { "epoch": 0.05811341942728804, "grad_norm": 1.5710315704345703, "learning_rate": 1.932773109243698e-06, "loss": 1.6248, "step": 69 }, { "epoch": 0.058955642897248736, "grad_norm": 1.3844945430755615, "learning_rate": 1.96078431372549e-06, "loss": 1.5531, "step": 70 }, { "epoch": 0.059797866367209435, "grad_norm": 1.466734528541565, "learning_rate": 1.988795518207283e-06, "loss": 1.6241, "step": 71 }, { "epoch": 0.060640089837170126, "grad_norm": 1.458426594734192, "learning_rate": 2.0168067226890756e-06, "loss": 1.6721, "step": 72 }, { "epoch": 0.061482313307130824, "grad_norm": 1.5119848251342773, "learning_rate": 2.0448179271708684e-06, "loss": 1.7649, "step": 73 }, { "epoch": 0.06232453677709152, "grad_norm": 1.4704101085662842, "learning_rate": 2.0728291316526615e-06, "loss": 1.5647, "step": 74 }, { "epoch": 0.06316676024705221, "grad_norm": 1.1354776620864868, "learning_rate": 2.100840336134454e-06, "loss": 1.2569, "step": 75 }, { "epoch": 0.06400898371701291, "grad_norm": 1.1222261190414429, "learning_rate": 2.1288515406162466e-06, "loss": 1.3374, "step": 76 }, { "epoch": 0.06485120718697361, "grad_norm": 1.1856663227081299, "learning_rate": 2.1568627450980393e-06, "loss": 1.4166, "step": 77 }, { "epoch": 0.06569343065693431, "grad_norm": 1.4114402532577515, "learning_rate": 2.184873949579832e-06, "loss": 1.5388, "step": 78 }, { "epoch": 0.06653565412689501, "grad_norm": 1.2940617799758911, "learning_rate": 2.2128851540616248e-06, "loss": 1.4312, "step": 79 }, { "epoch": 0.06737787759685569, "grad_norm": 1.2822506427764893, "learning_rate": 2.2408963585434175e-06, "loss": 1.5211, "step": 80 }, { "epoch": 0.06822010106681639, "grad_norm": 1.2207342386245728, "learning_rate": 2.2689075630252102e-06, "loss": 1.3315, "step": 81 }, { "epoch": 0.06906232453677709, "grad_norm": 0.8764878511428833, "learning_rate": 2.296918767507003e-06, "loss": 1.2594, "step": 82 }, { "epoch": 0.06990454800673779, "grad_norm": 0.9004432559013367, "learning_rate": 2.3249299719887957e-06, "loss": 1.2445, "step": 83 }, { "epoch": 0.07074677147669849, "grad_norm": 0.829721212387085, "learning_rate": 2.3529411764705885e-06, "loss": 1.1928, "step": 84 }, { "epoch": 0.07158899494665918, "grad_norm": 0.8306326270103455, "learning_rate": 2.380952380952381e-06, "loss": 1.1941, "step": 85 }, { "epoch": 0.07243121841661988, "grad_norm": 0.9379575848579407, "learning_rate": 2.408963585434174e-06, "loss": 1.3011, "step": 86 }, { "epoch": 0.07327344188658057, "grad_norm": 0.786998450756073, "learning_rate": 2.4369747899159667e-06, "loss": 1.1674, "step": 87 }, { "epoch": 0.07411566535654127, "grad_norm": 0.9020125865936279, "learning_rate": 2.4649859943977594e-06, "loss": 1.2704, "step": 88 }, { "epoch": 0.07495788882650196, "grad_norm": 0.914875864982605, "learning_rate": 2.492997198879552e-06, "loss": 1.2173, "step": 89 }, { "epoch": 0.07580011229646266, "grad_norm": 0.9144728779792786, "learning_rate": 2.521008403361345e-06, "loss": 1.2235, "step": 90 }, { "epoch": 0.07664233576642336, "grad_norm": 0.9120346903800964, "learning_rate": 2.549019607843137e-06, "loss": 1.1396, "step": 91 }, { "epoch": 0.07748455923638406, "grad_norm": 0.9745689630508423, "learning_rate": 2.57703081232493e-06, "loss": 1.1117, "step": 92 }, { "epoch": 0.07832678270634474, "grad_norm": 0.7130041122436523, "learning_rate": 2.605042016806723e-06, "loss": 1.109, "step": 93 }, { "epoch": 0.07916900617630544, "grad_norm": 1.0465285778045654, "learning_rate": 2.633053221288516e-06, "loss": 1.1417, "step": 94 }, { "epoch": 0.08001122964626614, "grad_norm": 0.7097454071044922, "learning_rate": 2.6610644257703085e-06, "loss": 1.0869, "step": 95 }, { "epoch": 0.08085345311622684, "grad_norm": 0.6800182461738586, "learning_rate": 2.689075630252101e-06, "loss": 1.0642, "step": 96 }, { "epoch": 0.08169567658618754, "grad_norm": 0.832053542137146, "learning_rate": 2.7170868347338936e-06, "loss": 1.0834, "step": 97 }, { "epoch": 0.08253790005614824, "grad_norm": 0.6903117895126343, "learning_rate": 2.7450980392156867e-06, "loss": 1.0095, "step": 98 }, { "epoch": 0.08338012352610892, "grad_norm": 0.5809651613235474, "learning_rate": 2.7731092436974795e-06, "loss": 0.9472, "step": 99 }, { "epoch": 0.08422234699606962, "grad_norm": 0.6343488097190857, "learning_rate": 2.8011204481792718e-06, "loss": 1.0418, "step": 100 }, { "epoch": 0.08506457046603032, "grad_norm": 0.6845325827598572, "learning_rate": 2.8291316526610645e-06, "loss": 1.0412, "step": 101 }, { "epoch": 0.08590679393599102, "grad_norm": 0.5882618427276611, "learning_rate": 2.8571428571428573e-06, "loss": 1.0389, "step": 102 }, { "epoch": 0.08674901740595171, "grad_norm": 0.563722550868988, "learning_rate": 2.88515406162465e-06, "loss": 0.9514, "step": 103 }, { "epoch": 0.08759124087591241, "grad_norm": 0.5568670034408569, "learning_rate": 2.913165266106443e-06, "loss": 1.0123, "step": 104 }, { "epoch": 0.08843346434587311, "grad_norm": 0.5534277558326721, "learning_rate": 2.9411764705882355e-06, "loss": 0.9887, "step": 105 }, { "epoch": 0.0892756878158338, "grad_norm": 0.4879157543182373, "learning_rate": 2.969187675070028e-06, "loss": 0.9041, "step": 106 }, { "epoch": 0.0901179112857945, "grad_norm": 0.4778350591659546, "learning_rate": 2.997198879551821e-06, "loss": 0.8829, "step": 107 }, { "epoch": 0.09096013475575519, "grad_norm": 0.5213919281959534, "learning_rate": 3.0252100840336137e-06, "loss": 0.9614, "step": 108 }, { "epoch": 0.09180235822571589, "grad_norm": 0.4584609568119049, "learning_rate": 3.053221288515407e-06, "loss": 0.9301, "step": 109 }, { "epoch": 0.09264458169567659, "grad_norm": 0.4374285638332367, "learning_rate": 3.081232492997199e-06, "loss": 0.8886, "step": 110 }, { "epoch": 0.09348680516563729, "grad_norm": 0.46922776103019714, "learning_rate": 3.109243697478992e-06, "loss": 0.9331, "step": 111 }, { "epoch": 0.09432902863559797, "grad_norm": 0.45767855644226074, "learning_rate": 3.1372549019607846e-06, "loss": 0.8576, "step": 112 }, { "epoch": 0.09517125210555867, "grad_norm": 0.43065571784973145, "learning_rate": 3.1652661064425773e-06, "loss": 0.8751, "step": 113 }, { "epoch": 0.09601347557551937, "grad_norm": 0.4365083873271942, "learning_rate": 3.1932773109243696e-06, "loss": 0.9112, "step": 114 }, { "epoch": 0.09685569904548007, "grad_norm": 0.4650225341320038, "learning_rate": 3.221288515406163e-06, "loss": 0.884, "step": 115 }, { "epoch": 0.09769792251544077, "grad_norm": 0.4421593248844147, "learning_rate": 3.2492997198879555e-06, "loss": 0.876, "step": 116 }, { "epoch": 0.09854014598540146, "grad_norm": 0.40221500396728516, "learning_rate": 3.2773109243697483e-06, "loss": 0.8763, "step": 117 }, { "epoch": 0.09938236945536216, "grad_norm": 0.4209982752799988, "learning_rate": 3.305322128851541e-06, "loss": 0.8955, "step": 118 }, { "epoch": 0.10022459292532285, "grad_norm": 0.4189291000366211, "learning_rate": 3.3333333333333333e-06, "loss": 0.8573, "step": 119 }, { "epoch": 0.10106681639528355, "grad_norm": 0.37386554479599, "learning_rate": 3.3613445378151265e-06, "loss": 0.8372, "step": 120 }, { "epoch": 0.10190903986524424, "grad_norm": 0.4138484299182892, "learning_rate": 3.3893557422969192e-06, "loss": 0.9128, "step": 121 }, { "epoch": 0.10275126333520494, "grad_norm": 0.428212970495224, "learning_rate": 3.417366946778712e-06, "loss": 0.8784, "step": 122 }, { "epoch": 0.10359348680516564, "grad_norm": 0.4094778001308441, "learning_rate": 3.4453781512605043e-06, "loss": 0.8508, "step": 123 }, { "epoch": 0.10443571027512634, "grad_norm": 0.34166431427001953, "learning_rate": 3.473389355742297e-06, "loss": 0.7721, "step": 124 }, { "epoch": 0.10527793374508702, "grad_norm": 0.3657137453556061, "learning_rate": 3.5014005602240897e-06, "loss": 0.811, "step": 125 }, { "epoch": 0.10612015721504772, "grad_norm": 0.37153807282447815, "learning_rate": 3.529411764705883e-06, "loss": 0.8942, "step": 126 }, { "epoch": 0.10696238068500842, "grad_norm": 0.3560837209224701, "learning_rate": 3.5574229691876756e-06, "loss": 0.8319, "step": 127 }, { "epoch": 0.10780460415496912, "grad_norm": 0.40500515699386597, "learning_rate": 3.585434173669468e-06, "loss": 0.8182, "step": 128 }, { "epoch": 0.10864682762492982, "grad_norm": 0.34681516885757446, "learning_rate": 3.6134453781512607e-06, "loss": 0.8226, "step": 129 }, { "epoch": 0.10948905109489052, "grad_norm": 0.37008172273635864, "learning_rate": 3.6414565826330534e-06, "loss": 0.8596, "step": 130 }, { "epoch": 0.1103312745648512, "grad_norm": 0.3901808559894562, "learning_rate": 3.669467787114846e-06, "loss": 0.8438, "step": 131 }, { "epoch": 0.1111734980348119, "grad_norm": 0.3330650329589844, "learning_rate": 3.6974789915966393e-06, "loss": 0.7757, "step": 132 }, { "epoch": 0.1120157215047726, "grad_norm": 0.38784700632095337, "learning_rate": 3.7254901960784316e-06, "loss": 0.8329, "step": 133 }, { "epoch": 0.1128579449747333, "grad_norm": 0.36473771929740906, "learning_rate": 3.7535014005602243e-06, "loss": 0.8146, "step": 134 }, { "epoch": 0.113700168444694, "grad_norm": 0.34097734093666077, "learning_rate": 3.781512605042017e-06, "loss": 0.7802, "step": 135 }, { "epoch": 0.11454239191465469, "grad_norm": 0.377946138381958, "learning_rate": 3.80952380952381e-06, "loss": 0.8341, "step": 136 }, { "epoch": 0.11538461538461539, "grad_norm": 0.3468363881111145, "learning_rate": 3.8375350140056026e-06, "loss": 0.7498, "step": 137 }, { "epoch": 0.11622683885457608, "grad_norm": 0.3832269608974457, "learning_rate": 3.865546218487396e-06, "loss": 0.801, "step": 138 }, { "epoch": 0.11706906232453677, "grad_norm": 0.3194640278816223, "learning_rate": 3.893557422969188e-06, "loss": 0.725, "step": 139 }, { "epoch": 0.11791128579449747, "grad_norm": 0.33322054147720337, "learning_rate": 3.92156862745098e-06, "loss": 0.7849, "step": 140 }, { "epoch": 0.11875350926445817, "grad_norm": 0.3820238411426544, "learning_rate": 3.9495798319327735e-06, "loss": 0.7816, "step": 141 }, { "epoch": 0.11959573273441887, "grad_norm": 0.3466508388519287, "learning_rate": 3.977591036414566e-06, "loss": 0.7514, "step": 142 }, { "epoch": 0.12043795620437957, "grad_norm": 0.3406356871128082, "learning_rate": 4.005602240896359e-06, "loss": 0.7632, "step": 143 }, { "epoch": 0.12128017967434025, "grad_norm": 0.34604403376579285, "learning_rate": 4.033613445378151e-06, "loss": 0.8274, "step": 144 }, { "epoch": 0.12212240314430095, "grad_norm": 0.3634200990200043, "learning_rate": 4.0616246498599444e-06, "loss": 0.7827, "step": 145 }, { "epoch": 0.12296462661426165, "grad_norm": 0.3886551558971405, "learning_rate": 4.089635854341737e-06, "loss": 0.806, "step": 146 }, { "epoch": 0.12380685008422235, "grad_norm": 0.3560991883277893, "learning_rate": 4.11764705882353e-06, "loss": 0.7756, "step": 147 }, { "epoch": 0.12464907355418305, "grad_norm": 0.3407435417175293, "learning_rate": 4.145658263305323e-06, "loss": 0.7561, "step": 148 }, { "epoch": 0.12549129702414374, "grad_norm": 0.3990819454193115, "learning_rate": 4.173669467787115e-06, "loss": 0.7653, "step": 149 }, { "epoch": 0.12633352049410443, "grad_norm": 0.33961188793182373, "learning_rate": 4.201680672268908e-06, "loss": 0.8003, "step": 150 }, { "epoch": 0.12717574396406514, "grad_norm": 0.3487538695335388, "learning_rate": 4.229691876750701e-06, "loss": 0.7049, "step": 151 }, { "epoch": 0.12801796743402583, "grad_norm": 0.382635235786438, "learning_rate": 4.257703081232493e-06, "loss": 0.7689, "step": 152 }, { "epoch": 0.12886019090398654, "grad_norm": 0.3133614957332611, "learning_rate": 4.2857142857142855e-06, "loss": 0.777, "step": 153 }, { "epoch": 0.12970241437394722, "grad_norm": 0.34764987230300903, "learning_rate": 4.313725490196079e-06, "loss": 0.7779, "step": 154 }, { "epoch": 0.1305446378439079, "grad_norm": 0.3675849139690399, "learning_rate": 4.341736694677872e-06, "loss": 0.8251, "step": 155 }, { "epoch": 0.13138686131386862, "grad_norm": 0.3127296268939972, "learning_rate": 4.369747899159664e-06, "loss": 0.7684, "step": 156 }, { "epoch": 0.1322290847838293, "grad_norm": 0.3142247200012207, "learning_rate": 4.397759103641457e-06, "loss": 0.8005, "step": 157 }, { "epoch": 0.13307130825379002, "grad_norm": 0.29640063643455505, "learning_rate": 4.4257703081232496e-06, "loss": 0.7472, "step": 158 }, { "epoch": 0.1339135317237507, "grad_norm": 0.3259788155555725, "learning_rate": 4.453781512605043e-06, "loss": 0.7872, "step": 159 }, { "epoch": 0.13475575519371139, "grad_norm": 0.35886889696121216, "learning_rate": 4.481792717086835e-06, "loss": 0.7429, "step": 160 }, { "epoch": 0.1355979786636721, "grad_norm": 0.3772079646587372, "learning_rate": 4.509803921568628e-06, "loss": 0.8095, "step": 161 }, { "epoch": 0.13644020213363278, "grad_norm": 0.2996552586555481, "learning_rate": 4.5378151260504205e-06, "loss": 0.7187, "step": 162 }, { "epoch": 0.1372824256035935, "grad_norm": 0.33632251620292664, "learning_rate": 4.565826330532213e-06, "loss": 0.7387, "step": 163 }, { "epoch": 0.13812464907355418, "grad_norm": 0.44071030616760254, "learning_rate": 4.593837535014006e-06, "loss": 0.8149, "step": 164 }, { "epoch": 0.1389668725435149, "grad_norm": 0.3242075741291046, "learning_rate": 4.621848739495799e-06, "loss": 0.7156, "step": 165 }, { "epoch": 0.13980909601347558, "grad_norm": 2.6580650806427, "learning_rate": 4.6498599439775914e-06, "loss": 0.855, "step": 166 }, { "epoch": 0.14065131948343626, "grad_norm": 0.3378385007381439, "learning_rate": 4.677871148459384e-06, "loss": 0.7395, "step": 167 }, { "epoch": 0.14149354295339697, "grad_norm": 0.39721402525901794, "learning_rate": 4.705882352941177e-06, "loss": 0.7831, "step": 168 }, { "epoch": 0.14233576642335766, "grad_norm": 0.30812257528305054, "learning_rate": 4.733893557422969e-06, "loss": 0.7193, "step": 169 }, { "epoch": 0.14317798989331837, "grad_norm": 0.6287363171577454, "learning_rate": 4.761904761904762e-06, "loss": 0.7635, "step": 170 }, { "epoch": 0.14402021336327905, "grad_norm": 0.3249841332435608, "learning_rate": 4.7899159663865555e-06, "loss": 0.7126, "step": 171 }, { "epoch": 0.14486243683323977, "grad_norm": 0.32982781529426575, "learning_rate": 4.817927170868348e-06, "loss": 0.7516, "step": 172 }, { "epoch": 0.14570466030320045, "grad_norm": 0.32603293657302856, "learning_rate": 4.84593837535014e-06, "loss": 0.7272, "step": 173 }, { "epoch": 0.14654688377316114, "grad_norm": 0.3163414001464844, "learning_rate": 4.873949579831933e-06, "loss": 0.6917, "step": 174 }, { "epoch": 0.14738910724312185, "grad_norm": 0.37258076667785645, "learning_rate": 4.901960784313726e-06, "loss": 0.7564, "step": 175 }, { "epoch": 0.14823133071308253, "grad_norm": 0.38822147250175476, "learning_rate": 4.929971988795519e-06, "loss": 0.7393, "step": 176 }, { "epoch": 0.14907355418304324, "grad_norm": 0.33828988671302795, "learning_rate": 4.957983193277311e-06, "loss": 0.7281, "step": 177 }, { "epoch": 0.14991577765300393, "grad_norm": 0.4172876179218292, "learning_rate": 4.985994397759104e-06, "loss": 0.8106, "step": 178 }, { "epoch": 0.1507580011229646, "grad_norm": 0.32077351212501526, "learning_rate": 5.0140056022408966e-06, "loss": 0.7167, "step": 179 }, { "epoch": 0.15160022459292533, "grad_norm": 0.29903048276901245, "learning_rate": 5.04201680672269e-06, "loss": 0.7247, "step": 180 }, { "epoch": 0.152442448062886, "grad_norm": 0.2842431962490082, "learning_rate": 5.070028011204482e-06, "loss": 0.6761, "step": 181 }, { "epoch": 0.15328467153284672, "grad_norm": 0.35760748386383057, "learning_rate": 5.098039215686274e-06, "loss": 0.7326, "step": 182 }, { "epoch": 0.1541268950028074, "grad_norm": 0.3822750747203827, "learning_rate": 5.1260504201680675e-06, "loss": 0.7326, "step": 183 }, { "epoch": 0.15496911847276812, "grad_norm": 0.3131263852119446, "learning_rate": 5.15406162464986e-06, "loss": 0.7373, "step": 184 }, { "epoch": 0.1558113419427288, "grad_norm": 0.3290548324584961, "learning_rate": 5.182072829131654e-06, "loss": 0.7618, "step": 185 }, { "epoch": 0.1566535654126895, "grad_norm": 0.3675447404384613, "learning_rate": 5.210084033613446e-06, "loss": 0.7062, "step": 186 }, { "epoch": 0.1574957888826502, "grad_norm": 0.34263330698013306, "learning_rate": 5.2380952380952384e-06, "loss": 0.656, "step": 187 }, { "epoch": 0.15833801235261089, "grad_norm": 0.35228684544563293, "learning_rate": 5.266106442577032e-06, "loss": 0.7067, "step": 188 }, { "epoch": 0.1591802358225716, "grad_norm": 0.35112953186035156, "learning_rate": 5.294117647058824e-06, "loss": 0.7699, "step": 189 }, { "epoch": 0.16002245929253228, "grad_norm": 0.33077454566955566, "learning_rate": 5.322128851540617e-06, "loss": 0.7097, "step": 190 }, { "epoch": 0.160864682762493, "grad_norm": 0.31899309158325195, "learning_rate": 5.350140056022409e-06, "loss": 0.669, "step": 191 }, { "epoch": 0.16170690623245368, "grad_norm": 0.31999748945236206, "learning_rate": 5.378151260504202e-06, "loss": 0.689, "step": 192 }, { "epoch": 0.16254912970241436, "grad_norm": 0.3235485553741455, "learning_rate": 5.406162464985995e-06, "loss": 0.6961, "step": 193 }, { "epoch": 0.16339135317237508, "grad_norm": 0.33618244528770447, "learning_rate": 5.434173669467787e-06, "loss": 0.7006, "step": 194 }, { "epoch": 0.16423357664233576, "grad_norm": 0.32017093896865845, "learning_rate": 5.4621848739495795e-06, "loss": 0.6995, "step": 195 }, { "epoch": 0.16507580011229647, "grad_norm": 0.3416753113269806, "learning_rate": 5.4901960784313735e-06, "loss": 0.668, "step": 196 }, { "epoch": 0.16591802358225716, "grad_norm": 0.3047206699848175, "learning_rate": 5.518207282913166e-06, "loss": 0.7305, "step": 197 }, { "epoch": 0.16676024705221784, "grad_norm": 0.29472023248672485, "learning_rate": 5.546218487394959e-06, "loss": 0.7317, "step": 198 }, { "epoch": 0.16760247052217855, "grad_norm": 0.3650378882884979, "learning_rate": 5.574229691876751e-06, "loss": 0.6923, "step": 199 }, { "epoch": 0.16844469399213924, "grad_norm": 0.35154587030410767, "learning_rate": 5.6022408963585436e-06, "loss": 0.7488, "step": 200 }, { "epoch": 0.16928691746209995, "grad_norm": 0.3071550726890564, "learning_rate": 5.630252100840337e-06, "loss": 0.7114, "step": 201 }, { "epoch": 0.17012914093206064, "grad_norm": 0.29619914293289185, "learning_rate": 5.658263305322129e-06, "loss": 0.6501, "step": 202 }, { "epoch": 0.17097136440202135, "grad_norm": 0.2946092486381531, "learning_rate": 5.686274509803922e-06, "loss": 0.6826, "step": 203 }, { "epoch": 0.17181358787198203, "grad_norm": 0.3281421661376953, "learning_rate": 5.7142857142857145e-06, "loss": 0.7354, "step": 204 }, { "epoch": 0.17265581134194272, "grad_norm": 0.33160722255706787, "learning_rate": 5.742296918767507e-06, "loss": 0.6742, "step": 205 }, { "epoch": 0.17349803481190343, "grad_norm": 0.28359898924827576, "learning_rate": 5.7703081232493e-06, "loss": 0.6535, "step": 206 }, { "epoch": 0.1743402582818641, "grad_norm": 0.2952044606208801, "learning_rate": 5.798319327731093e-06, "loss": 0.6825, "step": 207 }, { "epoch": 0.17518248175182483, "grad_norm": 0.3402995467185974, "learning_rate": 5.826330532212886e-06, "loss": 0.7442, "step": 208 }, { "epoch": 0.1760247052217855, "grad_norm": 0.333694726228714, "learning_rate": 5.854341736694679e-06, "loss": 0.6806, "step": 209 }, { "epoch": 0.17686692869174622, "grad_norm": 0.32840678095817566, "learning_rate": 5.882352941176471e-06, "loss": 0.7168, "step": 210 }, { "epoch": 0.1777091521617069, "grad_norm": 0.3290562629699707, "learning_rate": 5.910364145658264e-06, "loss": 0.712, "step": 211 }, { "epoch": 0.1785513756316676, "grad_norm": 0.3183731138706207, "learning_rate": 5.938375350140056e-06, "loss": 0.6732, "step": 212 }, { "epoch": 0.1793935991016283, "grad_norm": 0.32780721783638, "learning_rate": 5.9663865546218495e-06, "loss": 0.7169, "step": 213 }, { "epoch": 0.180235822571589, "grad_norm": 0.34455549716949463, "learning_rate": 5.994397759103642e-06, "loss": 0.7279, "step": 214 }, { "epoch": 0.1810780460415497, "grad_norm": 0.28951582312583923, "learning_rate": 6.022408963585434e-06, "loss": 0.697, "step": 215 }, { "epoch": 0.18192026951151039, "grad_norm": 0.2932921051979065, "learning_rate": 6.050420168067227e-06, "loss": 0.7046, "step": 216 }, { "epoch": 0.1827624929814711, "grad_norm": 0.2841632664203644, "learning_rate": 6.07843137254902e-06, "loss": 0.6874, "step": 217 }, { "epoch": 0.18360471645143178, "grad_norm": 0.2906087636947632, "learning_rate": 6.106442577030814e-06, "loss": 0.6475, "step": 218 }, { "epoch": 0.18444693992139247, "grad_norm": 0.2902671992778778, "learning_rate": 6.134453781512606e-06, "loss": 0.6991, "step": 219 }, { "epoch": 0.18528916339135318, "grad_norm": 0.37987902760505676, "learning_rate": 6.162464985994398e-06, "loss": 0.6928, "step": 220 }, { "epoch": 0.18613138686131386, "grad_norm": 0.28720220923423767, "learning_rate": 6.1904761904761914e-06, "loss": 0.6719, "step": 221 }, { "epoch": 0.18697361033127458, "grad_norm": 0.29819604754447937, "learning_rate": 6.218487394957984e-06, "loss": 0.6961, "step": 222 }, { "epoch": 0.18781583380123526, "grad_norm": 0.3018217980861664, "learning_rate": 6.246498599439776e-06, "loss": 0.668, "step": 223 }, { "epoch": 0.18865805727119594, "grad_norm": 0.28797048330307007, "learning_rate": 6.274509803921569e-06, "loss": 0.6486, "step": 224 }, { "epoch": 0.18950028074115666, "grad_norm": 0.28039395809173584, "learning_rate": 6.3025210084033615e-06, "loss": 0.6423, "step": 225 }, { "epoch": 0.19034250421111734, "grad_norm": 0.3053869605064392, "learning_rate": 6.330532212885155e-06, "loss": 0.6911, "step": 226 }, { "epoch": 0.19118472768107805, "grad_norm": 0.2803293466567993, "learning_rate": 6.358543417366947e-06, "loss": 0.6896, "step": 227 }, { "epoch": 0.19202695115103874, "grad_norm": 0.29825830459594727, "learning_rate": 6.386554621848739e-06, "loss": 0.6626, "step": 228 }, { "epoch": 0.19286917462099945, "grad_norm": 0.3159964978694916, "learning_rate": 6.414565826330533e-06, "loss": 0.665, "step": 229 }, { "epoch": 0.19371139809096014, "grad_norm": 0.32100504636764526, "learning_rate": 6.442577030812326e-06, "loss": 0.6981, "step": 230 }, { "epoch": 0.19455362156092082, "grad_norm": 0.2983344495296478, "learning_rate": 6.470588235294119e-06, "loss": 0.6849, "step": 231 }, { "epoch": 0.19539584503088153, "grad_norm": 0.28560224175453186, "learning_rate": 6.498599439775911e-06, "loss": 0.6301, "step": 232 }, { "epoch": 0.19623806850084222, "grad_norm": 0.3205236792564392, "learning_rate": 6.526610644257703e-06, "loss": 0.7283, "step": 233 }, { "epoch": 0.19708029197080293, "grad_norm": 0.3338777720928192, "learning_rate": 6.5546218487394966e-06, "loss": 0.6876, "step": 234 }, { "epoch": 0.1979225154407636, "grad_norm": 0.28424522280693054, "learning_rate": 6.582633053221289e-06, "loss": 0.6538, "step": 235 }, { "epoch": 0.19876473891072433, "grad_norm": 0.30589622259140015, "learning_rate": 6.610644257703082e-06, "loss": 0.6252, "step": 236 }, { "epoch": 0.199606962380685, "grad_norm": 0.3003689646720886, "learning_rate": 6.638655462184874e-06, "loss": 0.678, "step": 237 }, { "epoch": 0.2004491858506457, "grad_norm": 0.3050166964530945, "learning_rate": 6.666666666666667e-06, "loss": 0.6532, "step": 238 }, { "epoch": 0.2012914093206064, "grad_norm": 0.30692800879478455, "learning_rate": 6.69467787114846e-06, "loss": 0.6315, "step": 239 }, { "epoch": 0.2021336327905671, "grad_norm": 0.29647132754325867, "learning_rate": 6.722689075630253e-06, "loss": 0.6597, "step": 240 }, { "epoch": 0.2029758562605278, "grad_norm": 0.3090716004371643, "learning_rate": 6.750700280112046e-06, "loss": 0.6937, "step": 241 }, { "epoch": 0.2038180797304885, "grad_norm": 0.3128722906112671, "learning_rate": 6.7787114845938384e-06, "loss": 0.6568, "step": 242 }, { "epoch": 0.20466030320044917, "grad_norm": 0.30862483382225037, "learning_rate": 6.806722689075631e-06, "loss": 0.7253, "step": 243 }, { "epoch": 0.20550252667040989, "grad_norm": 0.29958224296569824, "learning_rate": 6.834733893557424e-06, "loss": 0.6604, "step": 244 }, { "epoch": 0.20634475014037057, "grad_norm": 0.28663939237594604, "learning_rate": 6.862745098039216e-06, "loss": 0.6443, "step": 245 }, { "epoch": 0.20718697361033128, "grad_norm": 0.30499759316444397, "learning_rate": 6.8907563025210085e-06, "loss": 0.6744, "step": 246 }, { "epoch": 0.20802919708029197, "grad_norm": 0.3361145257949829, "learning_rate": 6.918767507002802e-06, "loss": 0.7105, "step": 247 }, { "epoch": 0.20887142055025268, "grad_norm": 0.3030431866645813, "learning_rate": 6.946778711484594e-06, "loss": 0.6494, "step": 248 }, { "epoch": 0.20971364402021336, "grad_norm": 0.3239549696445465, "learning_rate": 6.974789915966387e-06, "loss": 0.6468, "step": 249 }, { "epoch": 0.21055586749017405, "grad_norm": 0.2851716876029968, "learning_rate": 7.0028011204481795e-06, "loss": 0.6026, "step": 250 }, { "epoch": 0.21139809096013476, "grad_norm": 0.3075682818889618, "learning_rate": 7.030812324929972e-06, "loss": 0.6558, "step": 251 }, { "epoch": 0.21224031443009544, "grad_norm": 0.27826419472694397, "learning_rate": 7.058823529411766e-06, "loss": 0.6507, "step": 252 }, { "epoch": 0.21308253790005616, "grad_norm": 0.34275171160697937, "learning_rate": 7.086834733893558e-06, "loss": 0.6562, "step": 253 }, { "epoch": 0.21392476137001684, "grad_norm": 0.3005102276802063, "learning_rate": 7.114845938375351e-06, "loss": 0.6778, "step": 254 }, { "epoch": 0.21476698483997755, "grad_norm": 0.30616575479507446, "learning_rate": 7.1428571428571436e-06, "loss": 0.6631, "step": 255 }, { "epoch": 0.21560920830993824, "grad_norm": 0.31981250643730164, "learning_rate": 7.170868347338936e-06, "loss": 0.6747, "step": 256 }, { "epoch": 0.21645143177989892, "grad_norm": 0.3178783357143402, "learning_rate": 7.198879551820729e-06, "loss": 0.6462, "step": 257 }, { "epoch": 0.21729365524985964, "grad_norm": 0.3487645983695984, "learning_rate": 7.226890756302521e-06, "loss": 0.6358, "step": 258 }, { "epoch": 0.21813587871982032, "grad_norm": 0.28799307346343994, "learning_rate": 7.2549019607843145e-06, "loss": 0.6585, "step": 259 }, { "epoch": 0.21897810218978103, "grad_norm": 0.305675745010376, "learning_rate": 7.282913165266107e-06, "loss": 0.627, "step": 260 }, { "epoch": 0.21982032565974172, "grad_norm": 0.28535935282707214, "learning_rate": 7.310924369747899e-06, "loss": 0.6205, "step": 261 }, { "epoch": 0.2206625491297024, "grad_norm": 0.2872690260410309, "learning_rate": 7.338935574229692e-06, "loss": 0.623, "step": 262 }, { "epoch": 0.2215047725996631, "grad_norm": 0.3426709771156311, "learning_rate": 7.3669467787114854e-06, "loss": 0.7098, "step": 263 }, { "epoch": 0.2223469960696238, "grad_norm": 0.28326380252838135, "learning_rate": 7.394957983193279e-06, "loss": 0.6505, "step": 264 }, { "epoch": 0.2231892195395845, "grad_norm": 0.3487233519554138, "learning_rate": 7.422969187675071e-06, "loss": 0.6621, "step": 265 }, { "epoch": 0.2240314430095452, "grad_norm": 0.3204447329044342, "learning_rate": 7.450980392156863e-06, "loss": 0.6638, "step": 266 }, { "epoch": 0.2248736664795059, "grad_norm": 0.30730658769607544, "learning_rate": 7.478991596638656e-06, "loss": 0.6556, "step": 267 }, { "epoch": 0.2257158899494666, "grad_norm": 0.2656487226486206, "learning_rate": 7.507002801120449e-06, "loss": 0.6293, "step": 268 }, { "epoch": 0.22655811341942728, "grad_norm": 0.3004642724990845, "learning_rate": 7.535014005602241e-06, "loss": 0.6074, "step": 269 }, { "epoch": 0.227400336889388, "grad_norm": 0.32797226309776306, "learning_rate": 7.563025210084034e-06, "loss": 0.6396, "step": 270 }, { "epoch": 0.22824256035934867, "grad_norm": 0.3530418276786804, "learning_rate": 7.5910364145658265e-06, "loss": 0.6682, "step": 271 }, { "epoch": 0.22908478382930939, "grad_norm": 0.3089774250984192, "learning_rate": 7.61904761904762e-06, "loss": 0.6466, "step": 272 }, { "epoch": 0.22992700729927007, "grad_norm": 0.3472631275653839, "learning_rate": 7.647058823529411e-06, "loss": 0.667, "step": 273 }, { "epoch": 0.23076923076923078, "grad_norm": 0.26114019751548767, "learning_rate": 7.675070028011205e-06, "loss": 0.5798, "step": 274 }, { "epoch": 0.23161145423919147, "grad_norm": 0.27844899892807007, "learning_rate": 7.703081232492997e-06, "loss": 0.6387, "step": 275 }, { "epoch": 0.23245367770915215, "grad_norm": 0.29107460379600525, "learning_rate": 7.731092436974791e-06, "loss": 0.6532, "step": 276 }, { "epoch": 0.23329590117911286, "grad_norm": 0.3076872229576111, "learning_rate": 7.759103641456584e-06, "loss": 0.6582, "step": 277 }, { "epoch": 0.23413812464907355, "grad_norm": 0.31410232186317444, "learning_rate": 7.787114845938376e-06, "loss": 0.694, "step": 278 }, { "epoch": 0.23498034811903426, "grad_norm": 0.3545025587081909, "learning_rate": 7.815126050420168e-06, "loss": 0.6463, "step": 279 }, { "epoch": 0.23582257158899494, "grad_norm": 0.2788660526275635, "learning_rate": 7.84313725490196e-06, "loss": 0.6046, "step": 280 }, { "epoch": 0.23666479505895563, "grad_norm": 0.29069772362709045, "learning_rate": 7.871148459383755e-06, "loss": 0.6693, "step": 281 }, { "epoch": 0.23750701852891634, "grad_norm": 0.2946566045284271, "learning_rate": 7.899159663865547e-06, "loss": 0.6252, "step": 282 }, { "epoch": 0.23834924199887703, "grad_norm": 0.3297198712825775, "learning_rate": 7.92717086834734e-06, "loss": 0.6559, "step": 283 }, { "epoch": 0.23919146546883774, "grad_norm": 0.28595635294914246, "learning_rate": 7.955182072829132e-06, "loss": 0.6008, "step": 284 }, { "epoch": 0.24003368893879842, "grad_norm": 0.3898308277130127, "learning_rate": 7.983193277310926e-06, "loss": 0.6853, "step": 285 }, { "epoch": 0.24087591240875914, "grad_norm": 0.2797921299934387, "learning_rate": 8.011204481792718e-06, "loss": 0.5958, "step": 286 }, { "epoch": 0.24171813587871982, "grad_norm": 0.3019198477268219, "learning_rate": 8.03921568627451e-06, "loss": 0.6469, "step": 287 }, { "epoch": 0.2425603593486805, "grad_norm": 0.2953309714794159, "learning_rate": 8.067226890756303e-06, "loss": 0.6556, "step": 288 }, { "epoch": 0.24340258281864122, "grad_norm": 0.26449036598205566, "learning_rate": 8.095238095238097e-06, "loss": 0.588, "step": 289 }, { "epoch": 0.2442448062886019, "grad_norm": 0.2577894628047943, "learning_rate": 8.123249299719889e-06, "loss": 0.6045, "step": 290 }, { "epoch": 0.2450870297585626, "grad_norm": 0.34463486075401306, "learning_rate": 8.151260504201681e-06, "loss": 0.6633, "step": 291 }, { "epoch": 0.2459292532285233, "grad_norm": 0.26166850328445435, "learning_rate": 8.179271708683473e-06, "loss": 0.5809, "step": 292 }, { "epoch": 0.246771476698484, "grad_norm": 0.2851448059082031, "learning_rate": 8.207282913165266e-06, "loss": 0.6393, "step": 293 }, { "epoch": 0.2476137001684447, "grad_norm": 0.2909039556980133, "learning_rate": 8.23529411764706e-06, "loss": 0.6378, "step": 294 }, { "epoch": 0.24845592363840538, "grad_norm": 0.2851593792438507, "learning_rate": 8.263305322128852e-06, "loss": 0.5826, "step": 295 }, { "epoch": 0.2492981471083661, "grad_norm": 0.29334354400634766, "learning_rate": 8.291316526610646e-06, "loss": 0.6326, "step": 296 }, { "epoch": 0.2501403705783268, "grad_norm": 0.26468589901924133, "learning_rate": 8.319327731092438e-06, "loss": 0.6229, "step": 297 }, { "epoch": 0.2509825940482875, "grad_norm": 0.2999300956726074, "learning_rate": 8.34733893557423e-06, "loss": 0.6216, "step": 298 }, { "epoch": 0.2518248175182482, "grad_norm": 0.2942752242088318, "learning_rate": 8.375350140056023e-06, "loss": 0.6367, "step": 299 }, { "epoch": 0.25266704098820886, "grad_norm": 0.25672420859336853, "learning_rate": 8.403361344537815e-06, "loss": 0.6081, "step": 300 }, { "epoch": 0.25350926445816957, "grad_norm": 0.2761206328868866, "learning_rate": 8.43137254901961e-06, "loss": 0.65, "step": 301 }, { "epoch": 0.2543514879281303, "grad_norm": 0.28315573930740356, "learning_rate": 8.459383753501402e-06, "loss": 0.5824, "step": 302 }, { "epoch": 0.25519371139809094, "grad_norm": 0.2764551043510437, "learning_rate": 8.487394957983194e-06, "loss": 0.6371, "step": 303 }, { "epoch": 0.25603593486805165, "grad_norm": 0.307086318731308, "learning_rate": 8.515406162464986e-06, "loss": 0.6486, "step": 304 }, { "epoch": 0.25687815833801236, "grad_norm": 0.3122142255306244, "learning_rate": 8.543417366946779e-06, "loss": 0.6383, "step": 305 }, { "epoch": 0.2577203818079731, "grad_norm": 0.3065318763256073, "learning_rate": 8.571428571428571e-06, "loss": 0.5834, "step": 306 }, { "epoch": 0.25856260527793373, "grad_norm": 0.2969425320625305, "learning_rate": 8.599439775910365e-06, "loss": 0.6297, "step": 307 }, { "epoch": 0.25940482874789444, "grad_norm": 0.3011825680732727, "learning_rate": 8.627450980392157e-06, "loss": 0.6202, "step": 308 }, { "epoch": 0.26024705221785516, "grad_norm": 0.30776724219322205, "learning_rate": 8.655462184873951e-06, "loss": 0.605, "step": 309 }, { "epoch": 0.2610892756878158, "grad_norm": 0.2775612473487854, "learning_rate": 8.683473389355744e-06, "loss": 0.6001, "step": 310 }, { "epoch": 0.2619314991577765, "grad_norm": 0.27496930956840515, "learning_rate": 8.711484593837536e-06, "loss": 0.6257, "step": 311 }, { "epoch": 0.26277372262773724, "grad_norm": 0.316589891910553, "learning_rate": 8.739495798319328e-06, "loss": 0.6415, "step": 312 }, { "epoch": 0.2636159460976979, "grad_norm": 0.27638915181159973, "learning_rate": 8.76750700280112e-06, "loss": 0.6429, "step": 313 }, { "epoch": 0.2644581695676586, "grad_norm": 0.3319483697414398, "learning_rate": 8.795518207282914e-06, "loss": 0.6623, "step": 314 }, { "epoch": 0.2653003930376193, "grad_norm": 0.303477942943573, "learning_rate": 8.823529411764707e-06, "loss": 0.5915, "step": 315 }, { "epoch": 0.26614261650758003, "grad_norm": 0.3123064339160919, "learning_rate": 8.851540616246499e-06, "loss": 0.6565, "step": 316 }, { "epoch": 0.2669848399775407, "grad_norm": 0.3353332579135895, "learning_rate": 8.879551820728291e-06, "loss": 0.5866, "step": 317 }, { "epoch": 0.2678270634475014, "grad_norm": 0.29215964674949646, "learning_rate": 8.907563025210085e-06, "loss": 0.6297, "step": 318 }, { "epoch": 0.2686692869174621, "grad_norm": 0.3320382237434387, "learning_rate": 8.935574229691878e-06, "loss": 0.6397, "step": 319 }, { "epoch": 0.26951151038742277, "grad_norm": 0.3021487295627594, "learning_rate": 8.96358543417367e-06, "loss": 0.5951, "step": 320 }, { "epoch": 0.2703537338573835, "grad_norm": 0.28049853444099426, "learning_rate": 8.991596638655462e-06, "loss": 0.6242, "step": 321 }, { "epoch": 0.2711959573273442, "grad_norm": 0.31154200434684753, "learning_rate": 9.019607843137256e-06, "loss": 0.6279, "step": 322 }, { "epoch": 0.2720381807973049, "grad_norm": 0.2950107753276825, "learning_rate": 9.047619047619049e-06, "loss": 0.5887, "step": 323 }, { "epoch": 0.27288040426726556, "grad_norm": 0.33065423369407654, "learning_rate": 9.075630252100841e-06, "loss": 0.5985, "step": 324 }, { "epoch": 0.2737226277372263, "grad_norm": 0.28331801295280457, "learning_rate": 9.103641456582633e-06, "loss": 0.6091, "step": 325 }, { "epoch": 0.274564851207187, "grad_norm": 0.32754063606262207, "learning_rate": 9.131652661064426e-06, "loss": 0.6114, "step": 326 }, { "epoch": 0.27540707467714765, "grad_norm": 0.30769217014312744, "learning_rate": 9.15966386554622e-06, "loss": 0.6083, "step": 327 }, { "epoch": 0.27624929814710836, "grad_norm": 0.3023301661014557, "learning_rate": 9.187675070028012e-06, "loss": 0.5971, "step": 328 }, { "epoch": 0.27709152161706907, "grad_norm": 0.3037400543689728, "learning_rate": 9.215686274509804e-06, "loss": 0.5969, "step": 329 }, { "epoch": 0.2779337450870298, "grad_norm": 0.3065560758113861, "learning_rate": 9.243697478991598e-06, "loss": 0.6306, "step": 330 }, { "epoch": 0.27877596855699044, "grad_norm": 0.30304092168807983, "learning_rate": 9.27170868347339e-06, "loss": 0.6412, "step": 331 }, { "epoch": 0.27961819202695115, "grad_norm": 0.30642905831336975, "learning_rate": 9.299719887955183e-06, "loss": 0.5854, "step": 332 }, { "epoch": 0.28046041549691186, "grad_norm": 0.28413695096969604, "learning_rate": 9.327731092436975e-06, "loss": 0.6633, "step": 333 }, { "epoch": 0.2813026389668725, "grad_norm": 0.29158738255500793, "learning_rate": 9.355742296918767e-06, "loss": 0.6349, "step": 334 }, { "epoch": 0.28214486243683323, "grad_norm": 0.33267754316329956, "learning_rate": 9.383753501400561e-06, "loss": 0.6211, "step": 335 }, { "epoch": 0.28298708590679394, "grad_norm": 0.2988111078739166, "learning_rate": 9.411764705882354e-06, "loss": 0.6625, "step": 336 }, { "epoch": 0.28382930937675466, "grad_norm": 0.3006874620914459, "learning_rate": 9.439775910364146e-06, "loss": 0.6337, "step": 337 }, { "epoch": 0.2846715328467153, "grad_norm": 0.2909769117832184, "learning_rate": 9.467787114845938e-06, "loss": 0.5775, "step": 338 }, { "epoch": 0.285513756316676, "grad_norm": 0.3071255087852478, "learning_rate": 9.49579831932773e-06, "loss": 0.5903, "step": 339 }, { "epoch": 0.28635597978663674, "grad_norm": 0.28771814703941345, "learning_rate": 9.523809523809525e-06, "loss": 0.634, "step": 340 }, { "epoch": 0.2871982032565974, "grad_norm": 0.28755223751068115, "learning_rate": 9.551820728291317e-06, "loss": 0.6057, "step": 341 }, { "epoch": 0.2880404267265581, "grad_norm": 0.30197960138320923, "learning_rate": 9.579831932773111e-06, "loss": 0.5849, "step": 342 }, { "epoch": 0.2888826501965188, "grad_norm": 0.30697691440582275, "learning_rate": 9.607843137254903e-06, "loss": 0.6486, "step": 343 }, { "epoch": 0.28972487366647953, "grad_norm": 0.28160950541496277, "learning_rate": 9.635854341736696e-06, "loss": 0.6255, "step": 344 }, { "epoch": 0.2905670971364402, "grad_norm": 0.28579211235046387, "learning_rate": 9.663865546218488e-06, "loss": 0.6208, "step": 345 }, { "epoch": 0.2914093206064009, "grad_norm": 0.2600557506084442, "learning_rate": 9.69187675070028e-06, "loss": 0.5557, "step": 346 }, { "epoch": 0.2922515440763616, "grad_norm": 0.2575308382511139, "learning_rate": 9.719887955182074e-06, "loss": 0.5699, "step": 347 }, { "epoch": 0.29309376754632227, "grad_norm": 0.29674142599105835, "learning_rate": 9.747899159663867e-06, "loss": 0.5756, "step": 348 }, { "epoch": 0.293935991016283, "grad_norm": 0.2970217168331146, "learning_rate": 9.775910364145659e-06, "loss": 0.6191, "step": 349 }, { "epoch": 0.2947782144862437, "grad_norm": 0.2925812900066376, "learning_rate": 9.803921568627451e-06, "loss": 0.6146, "step": 350 }, { "epoch": 0.2956204379562044, "grad_norm": 0.28842100501060486, "learning_rate": 9.831932773109244e-06, "loss": 0.5624, "step": 351 }, { "epoch": 0.29646266142616506, "grad_norm": 0.3006283938884735, "learning_rate": 9.859943977591038e-06, "loss": 0.5643, "step": 352 }, { "epoch": 0.2973048848961258, "grad_norm": 0.3097642958164215, "learning_rate": 9.88795518207283e-06, "loss": 0.6295, "step": 353 }, { "epoch": 0.2981471083660865, "grad_norm": 0.299500972032547, "learning_rate": 9.915966386554622e-06, "loss": 0.6237, "step": 354 }, { "epoch": 0.29898933183604715, "grad_norm": 0.2741241753101349, "learning_rate": 9.943977591036416e-06, "loss": 0.5928, "step": 355 }, { "epoch": 0.29983155530600786, "grad_norm": 0.25367021560668945, "learning_rate": 9.971988795518209e-06, "loss": 0.597, "step": 356 }, { "epoch": 0.30067377877596857, "grad_norm": 0.2842653691768646, "learning_rate": 1e-05, "loss": 0.625, "step": 357 }, { "epoch": 0.3015160022459292, "grad_norm": 0.2788257896900177, "learning_rate": 9.99999759644146e-06, "loss": 0.6045, "step": 358 }, { "epoch": 0.30235822571588994, "grad_norm": 0.3037748336791992, "learning_rate": 9.999990385768144e-06, "loss": 0.6378, "step": 359 }, { "epoch": 0.30320044918585065, "grad_norm": 0.3290916681289673, "learning_rate": 9.999978367986988e-06, "loss": 0.5534, "step": 360 }, { "epoch": 0.30404267265581136, "grad_norm": 0.2966806888580322, "learning_rate": 9.999961543109546e-06, "loss": 0.5906, "step": 361 }, { "epoch": 0.304884896125772, "grad_norm": 0.311151921749115, "learning_rate": 9.999939911151992e-06, "loss": 0.6126, "step": 362 }, { "epoch": 0.30572711959573273, "grad_norm": 0.33373013138771057, "learning_rate": 9.999913472135126e-06, "loss": 0.5974, "step": 363 }, { "epoch": 0.30656934306569344, "grad_norm": 0.31550362706184387, "learning_rate": 9.999882226084366e-06, "loss": 0.6073, "step": 364 }, { "epoch": 0.3074115665356541, "grad_norm": 0.2890888750553131, "learning_rate": 9.999846173029752e-06, "loss": 0.6099, "step": 365 }, { "epoch": 0.3082537900056148, "grad_norm": 0.3105107545852661, "learning_rate": 9.999805313005946e-06, "loss": 0.6186, "step": 366 }, { "epoch": 0.3090960134755755, "grad_norm": 0.34104666113853455, "learning_rate": 9.999759646052234e-06, "loss": 0.5892, "step": 367 }, { "epoch": 0.30993823694553624, "grad_norm": 0.33179807662963867, "learning_rate": 9.99970917221252e-06, "loss": 0.5982, "step": 368 }, { "epoch": 0.3107804604154969, "grad_norm": 0.28424715995788574, "learning_rate": 9.99965389153533e-06, "loss": 0.5812, "step": 369 }, { "epoch": 0.3116226838854576, "grad_norm": 0.31297922134399414, "learning_rate": 9.999593804073812e-06, "loss": 0.5979, "step": 370 }, { "epoch": 0.3124649073554183, "grad_norm": 0.28201475739479065, "learning_rate": 9.999528909885738e-06, "loss": 0.5567, "step": 371 }, { "epoch": 0.313307130825379, "grad_norm": 0.29714828729629517, "learning_rate": 9.999459209033495e-06, "loss": 0.6194, "step": 372 }, { "epoch": 0.3141493542953397, "grad_norm": 0.3645322918891907, "learning_rate": 9.999384701584098e-06, "loss": 0.5845, "step": 373 }, { "epoch": 0.3149915777653004, "grad_norm": 0.3254341781139374, "learning_rate": 9.99930538760918e-06, "loss": 0.6078, "step": 374 }, { "epoch": 0.3158338012352611, "grad_norm": 0.2904176115989685, "learning_rate": 9.999221267184993e-06, "loss": 0.5305, "step": 375 }, { "epoch": 0.31667602470522177, "grad_norm": 0.32241684198379517, "learning_rate": 9.999132340392416e-06, "loss": 0.6123, "step": 376 }, { "epoch": 0.3175182481751825, "grad_norm": 0.28825363516807556, "learning_rate": 9.999038607316942e-06, "loss": 0.5862, "step": 377 }, { "epoch": 0.3183604716451432, "grad_norm": 0.2919767498970032, "learning_rate": 9.998940068048688e-06, "loss": 0.6009, "step": 378 }, { "epoch": 0.31920269511510385, "grad_norm": 0.36973923444747925, "learning_rate": 9.998836722682397e-06, "loss": 0.6047, "step": 379 }, { "epoch": 0.32004491858506456, "grad_norm": 0.29019874334335327, "learning_rate": 9.998728571317422e-06, "loss": 0.5985, "step": 380 }, { "epoch": 0.3208871420550253, "grad_norm": 0.2987769544124603, "learning_rate": 9.998615614057743e-06, "loss": 0.6316, "step": 381 }, { "epoch": 0.321729365524986, "grad_norm": 0.32720085978507996, "learning_rate": 9.998497851011963e-06, "loss": 0.6206, "step": 382 }, { "epoch": 0.32257158899494665, "grad_norm": 0.286574125289917, "learning_rate": 9.998375282293298e-06, "loss": 0.6157, "step": 383 }, { "epoch": 0.32341381246490736, "grad_norm": 0.3270733952522278, "learning_rate": 9.998247908019594e-06, "loss": 0.6434, "step": 384 }, { "epoch": 0.32425603593486807, "grad_norm": 0.3130631446838379, "learning_rate": 9.998115728313305e-06, "loss": 0.5606, "step": 385 }, { "epoch": 0.3250982594048287, "grad_norm": 0.317548543214798, "learning_rate": 9.997978743301516e-06, "loss": 0.5993, "step": 386 }, { "epoch": 0.32594048287478944, "grad_norm": 0.28737637400627136, "learning_rate": 9.997836953115927e-06, "loss": 0.6071, "step": 387 }, { "epoch": 0.32678270634475015, "grad_norm": 0.3050701916217804, "learning_rate": 9.997690357892857e-06, "loss": 0.6357, "step": 388 }, { "epoch": 0.32762492981471086, "grad_norm": 0.30053994059562683, "learning_rate": 9.997538957773248e-06, "loss": 0.5891, "step": 389 }, { "epoch": 0.3284671532846715, "grad_norm": 0.2825310528278351, "learning_rate": 9.997382752902658e-06, "loss": 0.5387, "step": 390 }, { "epoch": 0.32930937675463223, "grad_norm": 0.2746358811855316, "learning_rate": 9.997221743431267e-06, "loss": 0.5873, "step": 391 }, { "epoch": 0.33015160022459294, "grad_norm": 0.3565182685852051, "learning_rate": 9.997055929513873e-06, "loss": 0.6481, "step": 392 }, { "epoch": 0.3309938236945536, "grad_norm": 0.27936241030693054, "learning_rate": 9.996885311309892e-06, "loss": 0.6152, "step": 393 }, { "epoch": 0.3318360471645143, "grad_norm": 0.2764663100242615, "learning_rate": 9.996709888983362e-06, "loss": 0.5941, "step": 394 }, { "epoch": 0.332678270634475, "grad_norm": 0.33086487650871277, "learning_rate": 9.99652966270294e-06, "loss": 0.5978, "step": 395 }, { "epoch": 0.3335204941044357, "grad_norm": 0.26096946001052856, "learning_rate": 9.996344632641895e-06, "loss": 0.5896, "step": 396 }, { "epoch": 0.3343627175743964, "grad_norm": 0.30619925260543823, "learning_rate": 9.996154798978122e-06, "loss": 0.5867, "step": 397 }, { "epoch": 0.3352049410443571, "grad_norm": 0.29689934849739075, "learning_rate": 9.995960161894132e-06, "loss": 0.6336, "step": 398 }, { "epoch": 0.3360471645143178, "grad_norm": 0.287649542093277, "learning_rate": 9.995760721577053e-06, "loss": 0.6225, "step": 399 }, { "epoch": 0.3368893879842785, "grad_norm": 0.28658822178840637, "learning_rate": 9.99555647821863e-06, "loss": 0.5883, "step": 400 }, { "epoch": 0.3377316114542392, "grad_norm": 0.26411089301109314, "learning_rate": 9.99534743201523e-06, "loss": 0.5239, "step": 401 }, { "epoch": 0.3385738349241999, "grad_norm": 0.31125351786613464, "learning_rate": 9.995133583167833e-06, "loss": 0.6236, "step": 402 }, { "epoch": 0.33941605839416056, "grad_norm": 0.2889030873775482, "learning_rate": 9.99491493188204e-06, "loss": 0.5917, "step": 403 }, { "epoch": 0.34025828186412127, "grad_norm": 0.30180904269218445, "learning_rate": 9.994691478368067e-06, "loss": 0.5497, "step": 404 }, { "epoch": 0.341100505334082, "grad_norm": 0.2879675030708313, "learning_rate": 9.994463222840748e-06, "loss": 0.6031, "step": 405 }, { "epoch": 0.3419427288040427, "grad_norm": 0.31978437304496765, "learning_rate": 9.994230165519529e-06, "loss": 0.6108, "step": 406 }, { "epoch": 0.34278495227400335, "grad_norm": 0.3402162194252014, "learning_rate": 9.993992306628481e-06, "loss": 0.6047, "step": 407 }, { "epoch": 0.34362717574396406, "grad_norm": 0.25986260175704956, "learning_rate": 9.993749646396286e-06, "loss": 0.5777, "step": 408 }, { "epoch": 0.3444693992139248, "grad_norm": 0.371998131275177, "learning_rate": 9.993502185056244e-06, "loss": 0.6173, "step": 409 }, { "epoch": 0.34531162268388543, "grad_norm": 0.3067457675933838, "learning_rate": 9.993249922846269e-06, "loss": 0.5817, "step": 410 }, { "epoch": 0.34615384615384615, "grad_norm": 0.25754910707473755, "learning_rate": 9.992992860008893e-06, "loss": 0.6036, "step": 411 }, { "epoch": 0.34699606962380686, "grad_norm": 0.30477985739707947, "learning_rate": 9.99273099679126e-06, "loss": 0.6022, "step": 412 }, { "epoch": 0.34783829309376757, "grad_norm": 0.29544609785079956, "learning_rate": 9.992464333445134e-06, "loss": 0.5921, "step": 413 }, { "epoch": 0.3486805165637282, "grad_norm": 0.39228934049606323, "learning_rate": 9.99219287022689e-06, "loss": 0.5738, "step": 414 }, { "epoch": 0.34952274003368894, "grad_norm": 0.2826155126094818, "learning_rate": 9.99191660739752e-06, "loss": 0.5319, "step": 415 }, { "epoch": 0.35036496350364965, "grad_norm": 0.30915045738220215, "learning_rate": 9.991635545222628e-06, "loss": 0.5877, "step": 416 }, { "epoch": 0.3512071869736103, "grad_norm": 0.2759340703487396, "learning_rate": 9.991349683972435e-06, "loss": 0.5629, "step": 417 }, { "epoch": 0.352049410443571, "grad_norm": 0.283549040555954, "learning_rate": 9.991059023921773e-06, "loss": 0.5942, "step": 418 }, { "epoch": 0.35289163391353173, "grad_norm": 0.3547176122665405, "learning_rate": 9.990763565350092e-06, "loss": 0.6081, "step": 419 }, { "epoch": 0.35373385738349244, "grad_norm": 0.30846771597862244, "learning_rate": 9.990463308541452e-06, "loss": 0.6113, "step": 420 }, { "epoch": 0.3545760808534531, "grad_norm": 0.30908066034317017, "learning_rate": 9.990158253784525e-06, "loss": 0.6047, "step": 421 }, { "epoch": 0.3554183043234138, "grad_norm": 0.29910022020339966, "learning_rate": 9.989848401372602e-06, "loss": 0.5539, "step": 422 }, { "epoch": 0.3562605277933745, "grad_norm": 0.2902754247188568, "learning_rate": 9.989533751603578e-06, "loss": 0.618, "step": 423 }, { "epoch": 0.3571027512633352, "grad_norm": 0.3126077651977539, "learning_rate": 9.989214304779965e-06, "loss": 0.6312, "step": 424 }, { "epoch": 0.3579449747332959, "grad_norm": 0.31801289319992065, "learning_rate": 9.988890061208889e-06, "loss": 0.5597, "step": 425 }, { "epoch": 0.3587871982032566, "grad_norm": 0.2828069031238556, "learning_rate": 9.988561021202083e-06, "loss": 0.5827, "step": 426 }, { "epoch": 0.3596294216732173, "grad_norm": 0.2666841745376587, "learning_rate": 9.988227185075897e-06, "loss": 0.5743, "step": 427 }, { "epoch": 0.360471645143178, "grad_norm": 0.28845253586769104, "learning_rate": 9.987888553151285e-06, "loss": 0.6097, "step": 428 }, { "epoch": 0.3613138686131387, "grad_norm": 0.3055371344089508, "learning_rate": 9.987545125753818e-06, "loss": 0.596, "step": 429 }, { "epoch": 0.3621560920830994, "grad_norm": 0.2908022999763489, "learning_rate": 9.987196903213677e-06, "loss": 0.5726, "step": 430 }, { "epoch": 0.36299831555306006, "grad_norm": 0.29283666610717773, "learning_rate": 9.986843885865649e-06, "loss": 0.5835, "step": 431 }, { "epoch": 0.36384053902302077, "grad_norm": 0.27590832114219666, "learning_rate": 9.986486074049131e-06, "loss": 0.5702, "step": 432 }, { "epoch": 0.3646827624929815, "grad_norm": 0.3125220239162445, "learning_rate": 9.986123468108134e-06, "loss": 0.5764, "step": 433 }, { "epoch": 0.3655249859629422, "grad_norm": 0.28861671686172485, "learning_rate": 9.985756068391276e-06, "loss": 0.6008, "step": 434 }, { "epoch": 0.36636720943290285, "grad_norm": 0.29077187180519104, "learning_rate": 9.985383875251783e-06, "loss": 0.5836, "step": 435 }, { "epoch": 0.36720943290286356, "grad_norm": 0.29431429505348206, "learning_rate": 9.985006889047492e-06, "loss": 0.5941, "step": 436 }, { "epoch": 0.3680516563728243, "grad_norm": 0.3162149488925934, "learning_rate": 9.984625110140844e-06, "loss": 0.6213, "step": 437 }, { "epoch": 0.36889387984278493, "grad_norm": 0.28870201110839844, "learning_rate": 9.98423853889889e-06, "loss": 0.5788, "step": 438 }, { "epoch": 0.36973610331274565, "grad_norm": 0.2584831714630127, "learning_rate": 9.983847175693291e-06, "loss": 0.5345, "step": 439 }, { "epoch": 0.37057832678270636, "grad_norm": 0.3190908133983612, "learning_rate": 9.983451020900312e-06, "loss": 0.6024, "step": 440 }, { "epoch": 0.371420550252667, "grad_norm": 0.2836264371871948, "learning_rate": 9.983050074900824e-06, "loss": 0.5983, "step": 441 }, { "epoch": 0.3722627737226277, "grad_norm": 0.3124723434448242, "learning_rate": 9.982644338080308e-06, "loss": 0.5992, "step": 442 }, { "epoch": 0.37310499719258844, "grad_norm": 0.2985681891441345, "learning_rate": 9.982233810828846e-06, "loss": 0.5835, "step": 443 }, { "epoch": 0.37394722066254915, "grad_norm": 0.28582248091697693, "learning_rate": 9.98181849354113e-06, "loss": 0.5898, "step": 444 }, { "epoch": 0.3747894441325098, "grad_norm": 0.27790576219558716, "learning_rate": 9.98139838661646e-06, "loss": 0.5908, "step": 445 }, { "epoch": 0.3756316676024705, "grad_norm": 0.286074161529541, "learning_rate": 9.980973490458728e-06, "loss": 0.5688, "step": 446 }, { "epoch": 0.37647389107243123, "grad_norm": 0.28162631392478943, "learning_rate": 9.980543805476447e-06, "loss": 0.5613, "step": 447 }, { "epoch": 0.3773161145423919, "grad_norm": 0.2655501365661621, "learning_rate": 9.980109332082722e-06, "loss": 0.5862, "step": 448 }, { "epoch": 0.3781583380123526, "grad_norm": 0.31029173731803894, "learning_rate": 9.979670070695265e-06, "loss": 0.6056, "step": 449 }, { "epoch": 0.3790005614823133, "grad_norm": 0.3101709187030792, "learning_rate": 9.979226021736396e-06, "loss": 0.6086, "step": 450 }, { "epoch": 0.379842784952274, "grad_norm": 0.2903156578540802, "learning_rate": 9.978777185633032e-06, "loss": 0.574, "step": 451 }, { "epoch": 0.3806850084222347, "grad_norm": 0.30957308411598206, "learning_rate": 9.978323562816693e-06, "loss": 0.5541, "step": 452 }, { "epoch": 0.3815272318921954, "grad_norm": 0.2907249927520752, "learning_rate": 9.977865153723508e-06, "loss": 0.5814, "step": 453 }, { "epoch": 0.3823694553621561, "grad_norm": 0.3012019991874695, "learning_rate": 9.977401958794194e-06, "loss": 0.5688, "step": 454 }, { "epoch": 0.38321167883211676, "grad_norm": 0.3068523406982422, "learning_rate": 9.976933978474085e-06, "loss": 0.5678, "step": 455 }, { "epoch": 0.3840539023020775, "grad_norm": 0.29860228300094604, "learning_rate": 9.976461213213104e-06, "loss": 0.5564, "step": 456 }, { "epoch": 0.3848961257720382, "grad_norm": 0.2640736401081085, "learning_rate": 9.97598366346578e-06, "loss": 0.5354, "step": 457 }, { "epoch": 0.3857383492419989, "grad_norm": 0.27538251876831055, "learning_rate": 9.975501329691241e-06, "loss": 0.5922, "step": 458 }, { "epoch": 0.38658057271195956, "grad_norm": 0.28571370244026184, "learning_rate": 9.975014212353212e-06, "loss": 0.5756, "step": 459 }, { "epoch": 0.38742279618192027, "grad_norm": 0.2961525022983551, "learning_rate": 9.974522311920021e-06, "loss": 0.6211, "step": 460 }, { "epoch": 0.388265019651881, "grad_norm": 0.2742634415626526, "learning_rate": 9.974025628864592e-06, "loss": 0.5566, "step": 461 }, { "epoch": 0.38910724312184164, "grad_norm": 0.26125699281692505, "learning_rate": 9.973524163664447e-06, "loss": 0.5258, "step": 462 }, { "epoch": 0.38994946659180235, "grad_norm": 0.2871203124523163, "learning_rate": 9.973017916801708e-06, "loss": 0.5742, "step": 463 }, { "epoch": 0.39079169006176306, "grad_norm": 0.28794196248054504, "learning_rate": 9.972506888763092e-06, "loss": 0.5588, "step": 464 }, { "epoch": 0.3916339135317238, "grad_norm": 0.2721008360385895, "learning_rate": 9.971991080039912e-06, "loss": 0.5805, "step": 465 }, { "epoch": 0.39247613700168443, "grad_norm": 0.2713991701602936, "learning_rate": 9.971470491128077e-06, "loss": 0.5558, "step": 466 }, { "epoch": 0.39331836047164515, "grad_norm": 0.2985425293445587, "learning_rate": 9.9709451225281e-06, "loss": 0.5777, "step": 467 }, { "epoch": 0.39416058394160586, "grad_norm": 0.29002735018730164, "learning_rate": 9.970414974745077e-06, "loss": 0.5765, "step": 468 }, { "epoch": 0.3950028074115665, "grad_norm": 0.2989473044872284, "learning_rate": 9.969880048288704e-06, "loss": 0.5804, "step": 469 }, { "epoch": 0.3958450308815272, "grad_norm": 0.26290056109428406, "learning_rate": 9.969340343673277e-06, "loss": 0.5541, "step": 470 }, { "epoch": 0.39668725435148794, "grad_norm": 0.2519911229610443, "learning_rate": 9.968795861417676e-06, "loss": 0.587, "step": 471 }, { "epoch": 0.39752947782144865, "grad_norm": 0.27111974358558655, "learning_rate": 9.96824660204538e-06, "loss": 0.5754, "step": 472 }, { "epoch": 0.3983717012914093, "grad_norm": 0.28742605447769165, "learning_rate": 9.96769256608446e-06, "loss": 0.5931, "step": 473 }, { "epoch": 0.39921392476137, "grad_norm": 0.2936950623989105, "learning_rate": 9.967133754067581e-06, "loss": 0.5605, "step": 474 }, { "epoch": 0.40005614823133073, "grad_norm": 0.2708987593650818, "learning_rate": 9.966570166531997e-06, "loss": 0.5794, "step": 475 }, { "epoch": 0.4008983717012914, "grad_norm": 0.2808172404766083, "learning_rate": 9.966001804019552e-06, "loss": 0.6087, "step": 476 }, { "epoch": 0.4017405951712521, "grad_norm": 0.2726587653160095, "learning_rate": 9.965428667076687e-06, "loss": 0.5751, "step": 477 }, { "epoch": 0.4025828186412128, "grad_norm": 0.2734958529472351, "learning_rate": 9.964850756254426e-06, "loss": 0.5318, "step": 478 }, { "epoch": 0.40342504211117347, "grad_norm": 0.2823241055011749, "learning_rate": 9.964268072108385e-06, "loss": 0.5633, "step": 479 }, { "epoch": 0.4042672655811342, "grad_norm": 0.3200957775115967, "learning_rate": 9.963680615198774e-06, "loss": 0.5961, "step": 480 }, { "epoch": 0.4051094890510949, "grad_norm": 0.2972870171070099, "learning_rate": 9.963088386090386e-06, "loss": 0.5289, "step": 481 }, { "epoch": 0.4059517125210556, "grad_norm": 0.3127574026584625, "learning_rate": 9.962491385352601e-06, "loss": 0.583, "step": 482 }, { "epoch": 0.40679393599101626, "grad_norm": 0.2816845774650574, "learning_rate": 9.961889613559396e-06, "loss": 0.5833, "step": 483 }, { "epoch": 0.407636159460977, "grad_norm": 0.3111397922039032, "learning_rate": 9.961283071289323e-06, "loss": 0.5956, "step": 484 }, { "epoch": 0.4084783829309377, "grad_norm": 0.3070847988128662, "learning_rate": 9.960671759125529e-06, "loss": 0.6039, "step": 485 }, { "epoch": 0.40932060640089835, "grad_norm": 0.2825812101364136, "learning_rate": 9.960055677655743e-06, "loss": 0.5533, "step": 486 }, { "epoch": 0.41016282987085906, "grad_norm": 0.28924474120140076, "learning_rate": 9.959434827472278e-06, "loss": 0.5724, "step": 487 }, { "epoch": 0.41100505334081977, "grad_norm": 0.2941766679286957, "learning_rate": 9.958809209172038e-06, "loss": 0.5721, "step": 488 }, { "epoch": 0.4118472768107805, "grad_norm": 0.2622920870780945, "learning_rate": 9.958178823356503e-06, "loss": 0.5427, "step": 489 }, { "epoch": 0.41268950028074114, "grad_norm": 0.28629186749458313, "learning_rate": 9.957543670631743e-06, "loss": 0.564, "step": 490 }, { "epoch": 0.41353172375070185, "grad_norm": 0.3371151387691498, "learning_rate": 9.956903751608409e-06, "loss": 0.5943, "step": 491 }, { "epoch": 0.41437394722066256, "grad_norm": 0.2797635793685913, "learning_rate": 9.956259066901733e-06, "loss": 0.5456, "step": 492 }, { "epoch": 0.4152161706906232, "grad_norm": 0.3028247654438019, "learning_rate": 9.95560961713153e-06, "loss": 0.5679, "step": 493 }, { "epoch": 0.41605839416058393, "grad_norm": 0.28627875447273254, "learning_rate": 9.954955402922195e-06, "loss": 0.5466, "step": 494 }, { "epoch": 0.41690061763054465, "grad_norm": 0.2852529287338257, "learning_rate": 9.954296424902709e-06, "loss": 0.5717, "step": 495 }, { "epoch": 0.41774284110050536, "grad_norm": 0.31919950246810913, "learning_rate": 9.953632683706624e-06, "loss": 0.62, "step": 496 }, { "epoch": 0.418585064570466, "grad_norm": 0.30590227246284485, "learning_rate": 9.95296417997208e-06, "loss": 0.5919, "step": 497 }, { "epoch": 0.4194272880404267, "grad_norm": 0.2817164957523346, "learning_rate": 9.95229091434179e-06, "loss": 0.5706, "step": 498 }, { "epoch": 0.42026951151038744, "grad_norm": 0.30957427620887756, "learning_rate": 9.95161288746305e-06, "loss": 0.5959, "step": 499 }, { "epoch": 0.4211117349803481, "grad_norm": 0.3075709342956543, "learning_rate": 9.950930099987728e-06, "loss": 0.5757, "step": 500 }, { "epoch": 0.4219539584503088, "grad_norm": 0.2979077994823456, "learning_rate": 9.950242552572272e-06, "loss": 0.5801, "step": 501 }, { "epoch": 0.4227961819202695, "grad_norm": 0.31041932106018066, "learning_rate": 9.949550245877708e-06, "loss": 0.5532, "step": 502 }, { "epoch": 0.42363840539023023, "grad_norm": 0.2895644009113312, "learning_rate": 9.948853180569635e-06, "loss": 0.5834, "step": 503 }, { "epoch": 0.4244806288601909, "grad_norm": 0.3410837948322296, "learning_rate": 9.948151357318228e-06, "loss": 0.56, "step": 504 }, { "epoch": 0.4253228523301516, "grad_norm": 0.3022382855415344, "learning_rate": 9.947444776798235e-06, "loss": 0.5753, "step": 505 }, { "epoch": 0.4261650758001123, "grad_norm": 0.3283178210258484, "learning_rate": 9.946733439688982e-06, "loss": 0.5518, "step": 506 }, { "epoch": 0.42700729927007297, "grad_norm": 0.32419440150260925, "learning_rate": 9.946017346674362e-06, "loss": 0.603, "step": 507 }, { "epoch": 0.4278495227400337, "grad_norm": 0.32701459527015686, "learning_rate": 9.945296498442845e-06, "loss": 0.5579, "step": 508 }, { "epoch": 0.4286917462099944, "grad_norm": 0.32415464520454407, "learning_rate": 9.944570895687471e-06, "loss": 0.5572, "step": 509 }, { "epoch": 0.4295339696799551, "grad_norm": 0.2944953739643097, "learning_rate": 9.943840539105853e-06, "loss": 0.5433, "step": 510 }, { "epoch": 0.43037619314991576, "grad_norm": 0.31668347120285034, "learning_rate": 9.943105429400171e-06, "loss": 0.5463, "step": 511 }, { "epoch": 0.4312184166198765, "grad_norm": 0.3471798002719879, "learning_rate": 9.942365567277178e-06, "loss": 0.5423, "step": 512 }, { "epoch": 0.4320606400898372, "grad_norm": 0.32781630754470825, "learning_rate": 9.941620953448195e-06, "loss": 0.5829, "step": 513 }, { "epoch": 0.43290286355979785, "grad_norm": 0.3048093616962433, "learning_rate": 9.940871588629108e-06, "loss": 0.5816, "step": 514 }, { "epoch": 0.43374508702975856, "grad_norm": 0.3503579795360565, "learning_rate": 9.940117473540377e-06, "loss": 0.5932, "step": 515 }, { "epoch": 0.43458731049971927, "grad_norm": 0.330410361289978, "learning_rate": 9.939358608907026e-06, "loss": 0.5599, "step": 516 }, { "epoch": 0.43542953396968, "grad_norm": 0.31735819578170776, "learning_rate": 9.938594995458644e-06, "loss": 0.5799, "step": 517 }, { "epoch": 0.43627175743964064, "grad_norm": 0.2707124352455139, "learning_rate": 9.937826633929388e-06, "loss": 0.5655, "step": 518 }, { "epoch": 0.43711398090960135, "grad_norm": 0.2993689477443695, "learning_rate": 9.937053525057977e-06, "loss": 0.5771, "step": 519 }, { "epoch": 0.43795620437956206, "grad_norm": 0.3467310667037964, "learning_rate": 9.936275669587697e-06, "loss": 0.5372, "step": 520 }, { "epoch": 0.4387984278495227, "grad_norm": 0.3113314211368561, "learning_rate": 9.935493068266396e-06, "loss": 0.5883, "step": 521 }, { "epoch": 0.43964065131948343, "grad_norm": 0.29431214928627014, "learning_rate": 9.934705721846487e-06, "loss": 0.5862, "step": 522 }, { "epoch": 0.44048287478944415, "grad_norm": 0.29330211877822876, "learning_rate": 9.933913631084942e-06, "loss": 0.5496, "step": 523 }, { "epoch": 0.4413250982594048, "grad_norm": 0.2852388918399811, "learning_rate": 9.933116796743294e-06, "loss": 0.5384, "step": 524 }, { "epoch": 0.4421673217293655, "grad_norm": 0.27741649746894836, "learning_rate": 9.932315219587641e-06, "loss": 0.5591, "step": 525 }, { "epoch": 0.4430095451993262, "grad_norm": 0.2641161382198334, "learning_rate": 9.931508900388635e-06, "loss": 0.5336, "step": 526 }, { "epoch": 0.44385176866928694, "grad_norm": 0.27964410185813904, "learning_rate": 9.930697839921496e-06, "loss": 0.5501, "step": 527 }, { "epoch": 0.4446939921392476, "grad_norm": 0.2807518243789673, "learning_rate": 9.92988203896599e-06, "loss": 0.5471, "step": 528 }, { "epoch": 0.4455362156092083, "grad_norm": 0.2480703592300415, "learning_rate": 9.929061498306448e-06, "loss": 0.5644, "step": 529 }, { "epoch": 0.446378439079169, "grad_norm": 0.29594850540161133, "learning_rate": 9.92823621873176e-06, "loss": 0.5313, "step": 530 }, { "epoch": 0.4472206625491297, "grad_norm": 0.26430532336235046, "learning_rate": 9.927406201035368e-06, "loss": 0.5547, "step": 531 }, { "epoch": 0.4480628860190904, "grad_norm": 0.2848137319087982, "learning_rate": 9.926571446015271e-06, "loss": 0.5517, "step": 532 }, { "epoch": 0.4489051094890511, "grad_norm": 0.3416444957256317, "learning_rate": 9.92573195447402e-06, "loss": 0.5574, "step": 533 }, { "epoch": 0.4497473329590118, "grad_norm": 0.26709824800491333, "learning_rate": 9.924887727218724e-06, "loss": 0.5507, "step": 534 }, { "epoch": 0.45058955642897247, "grad_norm": 0.27491578459739685, "learning_rate": 9.924038765061042e-06, "loss": 0.5599, "step": 535 }, { "epoch": 0.4514317798989332, "grad_norm": 0.27268335223197937, "learning_rate": 9.923185068817184e-06, "loss": 0.5574, "step": 536 }, { "epoch": 0.4522740033688939, "grad_norm": 0.28244131803512573, "learning_rate": 9.922326639307918e-06, "loss": 0.5689, "step": 537 }, { "epoch": 0.45311622683885455, "grad_norm": 0.2768328785896301, "learning_rate": 9.921463477358555e-06, "loss": 0.6184, "step": 538 }, { "epoch": 0.45395845030881526, "grad_norm": 0.2974526286125183, "learning_rate": 9.920595583798959e-06, "loss": 0.5982, "step": 539 }, { "epoch": 0.454800673778776, "grad_norm": 0.28029245138168335, "learning_rate": 9.919722959463545e-06, "loss": 0.5715, "step": 540 }, { "epoch": 0.4556428972487367, "grad_norm": 0.2576904594898224, "learning_rate": 9.918845605191274e-06, "loss": 0.5584, "step": 541 }, { "epoch": 0.45648512071869735, "grad_norm": 0.2636285424232483, "learning_rate": 9.917963521825653e-06, "loss": 0.5595, "step": 542 }, { "epoch": 0.45732734418865806, "grad_norm": 0.26268860697746277, "learning_rate": 9.917076710214739e-06, "loss": 0.5431, "step": 543 }, { "epoch": 0.45816956765861877, "grad_norm": 0.31010252237319946, "learning_rate": 9.916185171211135e-06, "loss": 0.585, "step": 544 }, { "epoch": 0.4590117911285794, "grad_norm": 0.23888449370861053, "learning_rate": 9.915288905671986e-06, "loss": 0.512, "step": 545 }, { "epoch": 0.45985401459854014, "grad_norm": 0.25300729274749756, "learning_rate": 9.914387914458983e-06, "loss": 0.5558, "step": 546 }, { "epoch": 0.46069623806850085, "grad_norm": 0.28950658440589905, "learning_rate": 9.913482198438357e-06, "loss": 0.5892, "step": 547 }, { "epoch": 0.46153846153846156, "grad_norm": 0.2782799005508423, "learning_rate": 9.912571758480892e-06, "loss": 0.6034, "step": 548 }, { "epoch": 0.4623806850084222, "grad_norm": 0.26232120394706726, "learning_rate": 9.911656595461899e-06, "loss": 0.5632, "step": 549 }, { "epoch": 0.46322290847838293, "grad_norm": 0.2710348963737488, "learning_rate": 9.910736710261238e-06, "loss": 0.5507, "step": 550 }, { "epoch": 0.46406513194834365, "grad_norm": 0.2660346031188965, "learning_rate": 9.909812103763312e-06, "loss": 0.5317, "step": 551 }, { "epoch": 0.4649073554183043, "grad_norm": 0.25447556376457214, "learning_rate": 9.908882776857057e-06, "loss": 0.5662, "step": 552 }, { "epoch": 0.465749578888265, "grad_norm": 0.3041627109050751, "learning_rate": 9.90794873043595e-06, "loss": 0.5536, "step": 553 }, { "epoch": 0.4665918023582257, "grad_norm": 0.2859501242637634, "learning_rate": 9.907009965398005e-06, "loss": 0.5459, "step": 554 }, { "epoch": 0.46743402582818644, "grad_norm": 0.28481972217559814, "learning_rate": 9.906066482645774e-06, "loss": 0.5622, "step": 555 }, { "epoch": 0.4682762492981471, "grad_norm": 0.2844685912132263, "learning_rate": 9.905118283086341e-06, "loss": 0.5589, "step": 556 }, { "epoch": 0.4691184727681078, "grad_norm": 0.2621094286441803, "learning_rate": 9.904165367631329e-06, "loss": 0.5253, "step": 557 }, { "epoch": 0.4699606962380685, "grad_norm": 0.26447856426239014, "learning_rate": 9.903207737196892e-06, "loss": 0.543, "step": 558 }, { "epoch": 0.4708029197080292, "grad_norm": 0.26808005571365356, "learning_rate": 9.902245392703719e-06, "loss": 0.5494, "step": 559 }, { "epoch": 0.4716451431779899, "grad_norm": 0.26249802112579346, "learning_rate": 9.901278335077031e-06, "loss": 0.5462, "step": 560 }, { "epoch": 0.4724873666479506, "grad_norm": 0.2437005192041397, "learning_rate": 9.900306565246579e-06, "loss": 0.5368, "step": 561 }, { "epoch": 0.47332959011791126, "grad_norm": 0.29105547070503235, "learning_rate": 9.899330084146646e-06, "loss": 0.5709, "step": 562 }, { "epoch": 0.47417181358787197, "grad_norm": 0.26888585090637207, "learning_rate": 9.898348892716042e-06, "loss": 0.5517, "step": 563 }, { "epoch": 0.4750140370578327, "grad_norm": 0.28115442395210266, "learning_rate": 9.89736299189811e-06, "loss": 0.5578, "step": 564 }, { "epoch": 0.4758562605277934, "grad_norm": 0.2989422678947449, "learning_rate": 9.896372382640718e-06, "loss": 0.5364, "step": 565 }, { "epoch": 0.47669848399775405, "grad_norm": 0.27374622225761414, "learning_rate": 9.895377065896259e-06, "loss": 0.5545, "step": 566 }, { "epoch": 0.47754070746771476, "grad_norm": 0.28134864568710327, "learning_rate": 9.894377042621654e-06, "loss": 0.5546, "step": 567 }, { "epoch": 0.4783829309376755, "grad_norm": 0.31898465752601624, "learning_rate": 9.89337231377835e-06, "loss": 0.5734, "step": 568 }, { "epoch": 0.47922515440763613, "grad_norm": 0.27615052461624146, "learning_rate": 9.892362880332316e-06, "loss": 0.5212, "step": 569 }, { "epoch": 0.48006737787759685, "grad_norm": 0.31586167216300964, "learning_rate": 9.891348743254046e-06, "loss": 0.5283, "step": 570 }, { "epoch": 0.48090960134755756, "grad_norm": 0.2580047845840454, "learning_rate": 9.890329903518554e-06, "loss": 0.5583, "step": 571 }, { "epoch": 0.48175182481751827, "grad_norm": 0.28087735176086426, "learning_rate": 9.889306362105377e-06, "loss": 0.5539, "step": 572 }, { "epoch": 0.4825940482874789, "grad_norm": 0.29097360372543335, "learning_rate": 9.888278119998573e-06, "loss": 0.566, "step": 573 }, { "epoch": 0.48343627175743964, "grad_norm": 0.26751434803009033, "learning_rate": 9.887245178186715e-06, "loss": 0.5874, "step": 574 }, { "epoch": 0.48427849522740035, "grad_norm": 0.30106499791145325, "learning_rate": 9.886207537662899e-06, "loss": 0.5621, "step": 575 }, { "epoch": 0.485120718697361, "grad_norm": 0.3399796187877655, "learning_rate": 9.885165199424738e-06, "loss": 0.5253, "step": 576 }, { "epoch": 0.4859629421673217, "grad_norm": 0.28826215863227844, "learning_rate": 9.884118164474359e-06, "loss": 0.595, "step": 577 }, { "epoch": 0.48680516563728243, "grad_norm": 0.26182663440704346, "learning_rate": 9.883066433818404e-06, "loss": 0.5573, "step": 578 }, { "epoch": 0.48764738910724315, "grad_norm": 0.2832515239715576, "learning_rate": 9.882010008468038e-06, "loss": 0.5151, "step": 579 }, { "epoch": 0.4884896125772038, "grad_norm": 0.33280453085899353, "learning_rate": 9.880948889438923e-06, "loss": 0.5866, "step": 580 }, { "epoch": 0.4893318360471645, "grad_norm": 0.3361736238002777, "learning_rate": 9.879883077751255e-06, "loss": 0.569, "step": 581 }, { "epoch": 0.4901740595171252, "grad_norm": 0.2852288782596588, "learning_rate": 9.878812574429722e-06, "loss": 0.5773, "step": 582 }, { "epoch": 0.4910162829870859, "grad_norm": 0.31822672486305237, "learning_rate": 9.877737380503534e-06, "loss": 0.5478, "step": 583 }, { "epoch": 0.4918585064570466, "grad_norm": 0.3404358923435211, "learning_rate": 9.876657497006408e-06, "loss": 0.5601, "step": 584 }, { "epoch": 0.4927007299270073, "grad_norm": 0.3061826527118683, "learning_rate": 9.875572924976568e-06, "loss": 0.5282, "step": 585 }, { "epoch": 0.493542953396968, "grad_norm": 0.3043714761734009, "learning_rate": 9.874483665456746e-06, "loss": 0.5613, "step": 586 }, { "epoch": 0.4943851768669287, "grad_norm": 0.32635340094566345, "learning_rate": 9.873389719494186e-06, "loss": 0.5576, "step": 587 }, { "epoch": 0.4952274003368894, "grad_norm": 0.3158304691314697, "learning_rate": 9.87229108814063e-06, "loss": 0.5488, "step": 588 }, { "epoch": 0.4960696238068501, "grad_norm": 0.3049567937850952, "learning_rate": 9.871187772452327e-06, "loss": 0.5862, "step": 589 }, { "epoch": 0.49691184727681076, "grad_norm": 0.30487871170043945, "learning_rate": 9.870079773490033e-06, "loss": 0.5669, "step": 590 }, { "epoch": 0.49775407074677147, "grad_norm": 0.3044317960739136, "learning_rate": 9.868967092319003e-06, "loss": 0.5449, "step": 591 }, { "epoch": 0.4985962942167322, "grad_norm": 0.31184467673301697, "learning_rate": 9.867849730008994e-06, "loss": 0.5554, "step": 592 }, { "epoch": 0.4994385176866929, "grad_norm": 0.30911222100257874, "learning_rate": 9.866727687634266e-06, "loss": 0.5706, "step": 593 }, { "epoch": 0.5002807411566536, "grad_norm": 0.3000037968158722, "learning_rate": 9.865600966273576e-06, "loss": 0.4982, "step": 594 }, { "epoch": 0.5011229646266142, "grad_norm": 0.3167884051799774, "learning_rate": 9.86446956701018e-06, "loss": 0.5716, "step": 595 }, { "epoch": 0.501965188096575, "grad_norm": 0.31100404262542725, "learning_rate": 9.86333349093183e-06, "loss": 0.5655, "step": 596 }, { "epoch": 0.5028074115665356, "grad_norm": 0.2798563838005066, "learning_rate": 9.86219273913078e-06, "loss": 0.5357, "step": 597 }, { "epoch": 0.5036496350364964, "grad_norm": 0.29137733578681946, "learning_rate": 9.861047312703772e-06, "loss": 0.5282, "step": 598 }, { "epoch": 0.5044918585064571, "grad_norm": 0.3712019622325897, "learning_rate": 9.859897212752049e-06, "loss": 0.5948, "step": 599 }, { "epoch": 0.5053340819764177, "grad_norm": 0.2836729884147644, "learning_rate": 9.858742440381343e-06, "loss": 0.5605, "step": 600 }, { "epoch": 0.5061763054463785, "grad_norm": 0.2974667251110077, "learning_rate": 9.857582996701878e-06, "loss": 0.5754, "step": 601 }, { "epoch": 0.5070185289163391, "grad_norm": 0.30603906512260437, "learning_rate": 9.856418882828368e-06, "loss": 0.5803, "step": 602 }, { "epoch": 0.5078607523862998, "grad_norm": 0.26042771339416504, "learning_rate": 9.855250099880026e-06, "loss": 0.5505, "step": 603 }, { "epoch": 0.5087029758562606, "grad_norm": 0.2775833308696747, "learning_rate": 9.854076648980543e-06, "loss": 0.5752, "step": 604 }, { "epoch": 0.5095451993262212, "grad_norm": 0.29585912823677063, "learning_rate": 9.852898531258102e-06, "loss": 0.5707, "step": 605 }, { "epoch": 0.5103874227961819, "grad_norm": 0.2672126293182373, "learning_rate": 9.851715747845372e-06, "loss": 0.5612, "step": 606 }, { "epoch": 0.5112296462661426, "grad_norm": 0.2686072885990143, "learning_rate": 9.850528299879513e-06, "loss": 0.5422, "step": 607 }, { "epoch": 0.5120718697361033, "grad_norm": 0.2555972635746002, "learning_rate": 9.84933618850216e-06, "loss": 0.5448, "step": 608 }, { "epoch": 0.512914093206064, "grad_norm": 0.28891631960868835, "learning_rate": 9.848139414859441e-06, "loss": 0.565, "step": 609 }, { "epoch": 0.5137563166760247, "grad_norm": 0.2660050690174103, "learning_rate": 9.84693798010196e-06, "loss": 0.5461, "step": 610 }, { "epoch": 0.5145985401459854, "grad_norm": 0.27829819917678833, "learning_rate": 9.845731885384806e-06, "loss": 0.5391, "step": 611 }, { "epoch": 0.5154407636159462, "grad_norm": 0.2501044273376465, "learning_rate": 9.844521131867546e-06, "loss": 0.531, "step": 612 }, { "epoch": 0.5162829870859068, "grad_norm": 0.2777661681175232, "learning_rate": 9.843305720714227e-06, "loss": 0.5451, "step": 613 }, { "epoch": 0.5171252105558675, "grad_norm": 0.29411131143569946, "learning_rate": 9.842085653093372e-06, "loss": 0.5753, "step": 614 }, { "epoch": 0.5179674340258282, "grad_norm": 0.290451318025589, "learning_rate": 9.840860930177984e-06, "loss": 0.5631, "step": 615 }, { "epoch": 0.5188096574957889, "grad_norm": 0.27914848923683167, "learning_rate": 9.83963155314554e-06, "loss": 0.5219, "step": 616 }, { "epoch": 0.5196518809657495, "grad_norm": 0.3240012526512146, "learning_rate": 9.838397523177993e-06, "loss": 0.5831, "step": 617 }, { "epoch": 0.5204941044357103, "grad_norm": 0.27916327118873596, "learning_rate": 9.837158841461767e-06, "loss": 0.5527, "step": 618 }, { "epoch": 0.521336327905671, "grad_norm": 0.3210696876049042, "learning_rate": 9.835915509187759e-06, "loss": 0.5759, "step": 619 }, { "epoch": 0.5221785513756316, "grad_norm": 0.28243666887283325, "learning_rate": 9.834667527551341e-06, "loss": 0.5404, "step": 620 }, { "epoch": 0.5230207748455924, "grad_norm": 0.3341071903705597, "learning_rate": 9.833414897752346e-06, "loss": 0.5337, "step": 621 }, { "epoch": 0.523862998315553, "grad_norm": 0.2805865406990051, "learning_rate": 9.832157620995088e-06, "loss": 0.5554, "step": 622 }, { "epoch": 0.5247052217855137, "grad_norm": 0.300152987241745, "learning_rate": 9.830895698488341e-06, "loss": 0.5466, "step": 623 }, { "epoch": 0.5255474452554745, "grad_norm": 0.3191191554069519, "learning_rate": 9.829629131445342e-06, "loss": 0.565, "step": 624 }, { "epoch": 0.5263896687254351, "grad_norm": 0.2937040328979492, "learning_rate": 9.828357921083803e-06, "loss": 0.51, "step": 625 }, { "epoch": 0.5272318921953958, "grad_norm": 0.30944088101387024, "learning_rate": 9.827082068625893e-06, "loss": 0.5384, "step": 626 }, { "epoch": 0.5280741156653566, "grad_norm": 0.30448734760284424, "learning_rate": 9.825801575298248e-06, "loss": 0.5954, "step": 627 }, { "epoch": 0.5289163391353172, "grad_norm": 0.2866262197494507, "learning_rate": 9.824516442331963e-06, "loss": 0.5928, "step": 628 }, { "epoch": 0.529758562605278, "grad_norm": 0.35177361965179443, "learning_rate": 9.823226670962598e-06, "loss": 0.5435, "step": 629 }, { "epoch": 0.5306007860752386, "grad_norm": 0.2727670967578888, "learning_rate": 9.821932262430164e-06, "loss": 0.5674, "step": 630 }, { "epoch": 0.5314430095451993, "grad_norm": 0.2828494906425476, "learning_rate": 9.82063321797914e-06, "loss": 0.5482, "step": 631 }, { "epoch": 0.5322852330151601, "grad_norm": 0.3056531846523285, "learning_rate": 9.819329538858458e-06, "loss": 0.55, "step": 632 }, { "epoch": 0.5331274564851207, "grad_norm": 0.25969940423965454, "learning_rate": 9.818021226321502e-06, "loss": 0.5186, "step": 633 }, { "epoch": 0.5339696799550814, "grad_norm": 0.3123166859149933, "learning_rate": 9.816708281626116e-06, "loss": 0.5269, "step": 634 }, { "epoch": 0.5348119034250421, "grad_norm": 0.3389025628566742, "learning_rate": 9.815390706034598e-06, "loss": 0.576, "step": 635 }, { "epoch": 0.5356541268950028, "grad_norm": 0.27614694833755493, "learning_rate": 9.814068500813692e-06, "loss": 0.5733, "step": 636 }, { "epoch": 0.5364963503649635, "grad_norm": 0.3287207782268524, "learning_rate": 9.812741667234599e-06, "loss": 0.5282, "step": 637 }, { "epoch": 0.5373385738349242, "grad_norm": 0.30682307481765747, "learning_rate": 9.811410206572972e-06, "loss": 0.5053, "step": 638 }, { "epoch": 0.5381807973048849, "grad_norm": 0.27649804949760437, "learning_rate": 9.8100741201089e-06, "loss": 0.5526, "step": 639 }, { "epoch": 0.5390230207748455, "grad_norm": 0.3138522207736969, "learning_rate": 9.808733409126934e-06, "loss": 0.5615, "step": 640 }, { "epoch": 0.5398652442448063, "grad_norm": 0.2902156710624695, "learning_rate": 9.807388074916064e-06, "loss": 0.5539, "step": 641 }, { "epoch": 0.540707467714767, "grad_norm": 0.27175477147102356, "learning_rate": 9.806038118769724e-06, "loss": 0.5335, "step": 642 }, { "epoch": 0.5415496911847277, "grad_norm": 0.2899860143661499, "learning_rate": 9.804683541985796e-06, "loss": 0.5851, "step": 643 }, { "epoch": 0.5423919146546884, "grad_norm": 0.2765223979949951, "learning_rate": 9.803324345866599e-06, "loss": 0.5426, "step": 644 }, { "epoch": 0.543234138124649, "grad_norm": 0.2824662923812866, "learning_rate": 9.801960531718898e-06, "loss": 0.5582, "step": 645 }, { "epoch": 0.5440763615946098, "grad_norm": 0.287604957818985, "learning_rate": 9.800592100853894e-06, "loss": 0.5421, "step": 646 }, { "epoch": 0.5449185850645705, "grad_norm": 0.24745766818523407, "learning_rate": 9.79921905458723e-06, "loss": 0.5372, "step": 647 }, { "epoch": 0.5457608085345311, "grad_norm": 0.29974305629730225, "learning_rate": 9.797841394238987e-06, "loss": 0.5541, "step": 648 }, { "epoch": 0.5466030320044919, "grad_norm": 0.2882145345211029, "learning_rate": 9.796459121133675e-06, "loss": 0.5513, "step": 649 }, { "epoch": 0.5474452554744526, "grad_norm": 0.2727745473384857, "learning_rate": 9.795072236600247e-06, "loss": 0.5289, "step": 650 }, { "epoch": 0.5482874789444132, "grad_norm": 0.3126402497291565, "learning_rate": 9.793680741972084e-06, "loss": 0.5557, "step": 651 }, { "epoch": 0.549129702414374, "grad_norm": 0.25911572575569153, "learning_rate": 9.792284638587005e-06, "loss": 0.5361, "step": 652 }, { "epoch": 0.5499719258843346, "grad_norm": 0.2662903070449829, "learning_rate": 9.790883927787254e-06, "loss": 0.5221, "step": 653 }, { "epoch": 0.5508141493542953, "grad_norm": 0.2707592248916626, "learning_rate": 9.789478610919508e-06, "loss": 0.5002, "step": 654 }, { "epoch": 0.5516563728242561, "grad_norm": 0.27932313084602356, "learning_rate": 9.78806868933487e-06, "loss": 0.5423, "step": 655 }, { "epoch": 0.5524985962942167, "grad_norm": 0.28952470421791077, "learning_rate": 9.786654164388873e-06, "loss": 0.546, "step": 656 }, { "epoch": 0.5533408197641775, "grad_norm": 0.28274601697921753, "learning_rate": 9.785235037441473e-06, "loss": 0.5413, "step": 657 }, { "epoch": 0.5541830432341381, "grad_norm": 0.28720128536224365, "learning_rate": 9.783811309857057e-06, "loss": 0.5529, "step": 658 }, { "epoch": 0.5550252667040988, "grad_norm": 0.31252020597457886, "learning_rate": 9.782382983004424e-06, "loss": 0.5376, "step": 659 }, { "epoch": 0.5558674901740596, "grad_norm": 0.29948094487190247, "learning_rate": 9.780950058256802e-06, "loss": 0.5565, "step": 660 }, { "epoch": 0.5567097136440202, "grad_norm": 0.29022789001464844, "learning_rate": 9.779512536991839e-06, "loss": 0.591, "step": 661 }, { "epoch": 0.5575519371139809, "grad_norm": 0.30034705996513367, "learning_rate": 9.778070420591603e-06, "loss": 0.5582, "step": 662 }, { "epoch": 0.5583941605839416, "grad_norm": 0.2657175660133362, "learning_rate": 9.77662371044258e-06, "loss": 0.5035, "step": 663 }, { "epoch": 0.5592363840539023, "grad_norm": 0.31835946440696716, "learning_rate": 9.775172407935664e-06, "loss": 0.5725, "step": 664 }, { "epoch": 0.560078607523863, "grad_norm": 0.3749072551727295, "learning_rate": 9.773716514466179e-06, "loss": 0.6075, "step": 665 }, { "epoch": 0.5609208309938237, "grad_norm": 0.32358434796333313, "learning_rate": 9.77225603143385e-06, "loss": 0.5172, "step": 666 }, { "epoch": 0.5617630544637844, "grad_norm": 0.30132830142974854, "learning_rate": 9.770790960242821e-06, "loss": 0.5402, "step": 667 }, { "epoch": 0.562605277933745, "grad_norm": 0.28154462575912476, "learning_rate": 9.769321302301648e-06, "loss": 0.5271, "step": 668 }, { "epoch": 0.5634475014037058, "grad_norm": 0.304368793964386, "learning_rate": 9.767847059023292e-06, "loss": 0.5463, "step": 669 }, { "epoch": 0.5642897248736665, "grad_norm": 0.33246830105781555, "learning_rate": 9.766368231825126e-06, "loss": 0.5679, "step": 670 }, { "epoch": 0.5651319483436271, "grad_norm": 0.2923736274242401, "learning_rate": 9.764884822128928e-06, "loss": 0.5101, "step": 671 }, { "epoch": 0.5659741718135879, "grad_norm": 0.2547902464866638, "learning_rate": 9.763396831360884e-06, "loss": 0.5071, "step": 672 }, { "epoch": 0.5668163952835485, "grad_norm": 0.2953874170780182, "learning_rate": 9.761904260951583e-06, "loss": 0.529, "step": 673 }, { "epoch": 0.5676586187535093, "grad_norm": 0.3329527974128723, "learning_rate": 9.760407112336016e-06, "loss": 0.5137, "step": 674 }, { "epoch": 0.56850084222347, "grad_norm": 0.36898308992385864, "learning_rate": 9.75890538695358e-06, "loss": 0.5546, "step": 675 }, { "epoch": 0.5693430656934306, "grad_norm": 0.27325403690338135, "learning_rate": 9.757399086248062e-06, "loss": 0.5256, "step": 676 }, { "epoch": 0.5701852891633914, "grad_norm": 0.3069285452365875, "learning_rate": 9.755888211667663e-06, "loss": 0.5044, "step": 677 }, { "epoch": 0.571027512633352, "grad_norm": 0.3612178862094879, "learning_rate": 9.75437276466497e-06, "loss": 0.5796, "step": 678 }, { "epoch": 0.5718697361033127, "grad_norm": 0.27157002687454224, "learning_rate": 9.752852746696968e-06, "loss": 0.5354, "step": 679 }, { "epoch": 0.5727119595732735, "grad_norm": 0.31574705243110657, "learning_rate": 9.751328159225037e-06, "loss": 0.5315, "step": 680 }, { "epoch": 0.5735541830432341, "grad_norm": 0.3401617109775543, "learning_rate": 9.749799003714954e-06, "loss": 0.566, "step": 681 }, { "epoch": 0.5743964065131948, "grad_norm": 0.30500537157058716, "learning_rate": 9.748265281636885e-06, "loss": 0.5536, "step": 682 }, { "epoch": 0.5752386299831556, "grad_norm": 0.3128681182861328, "learning_rate": 9.746726994465383e-06, "loss": 0.5239, "step": 683 }, { "epoch": 0.5760808534531162, "grad_norm": 0.305155873298645, "learning_rate": 9.745184143679398e-06, "loss": 0.541, "step": 684 }, { "epoch": 0.5769230769230769, "grad_norm": 0.28788790106773376, "learning_rate": 9.743636730762259e-06, "loss": 0.5504, "step": 685 }, { "epoch": 0.5777653003930376, "grad_norm": 0.328341543674469, "learning_rate": 9.742084757201684e-06, "loss": 0.5404, "step": 686 }, { "epoch": 0.5786075238629983, "grad_norm": 0.2818812131881714, "learning_rate": 9.74052822448978e-06, "loss": 0.552, "step": 687 }, { "epoch": 0.5794497473329591, "grad_norm": 0.2618277966976166, "learning_rate": 9.738967134123035e-06, "loss": 0.5251, "step": 688 }, { "epoch": 0.5802919708029197, "grad_norm": 0.29583173990249634, "learning_rate": 9.737401487602314e-06, "loss": 0.5474, "step": 689 }, { "epoch": 0.5811341942728804, "grad_norm": 0.31271207332611084, "learning_rate": 9.735831286432869e-06, "loss": 0.5506, "step": 690 }, { "epoch": 0.5819764177428411, "grad_norm": 0.2589702010154724, "learning_rate": 9.734256532124326e-06, "loss": 0.5165, "step": 691 }, { "epoch": 0.5828186412128018, "grad_norm": 0.29388707876205444, "learning_rate": 9.732677226190692e-06, "loss": 0.4927, "step": 692 }, { "epoch": 0.5836608646827625, "grad_norm": 0.2685982584953308, "learning_rate": 9.731093370150349e-06, "loss": 0.5456, "step": 693 }, { "epoch": 0.5845030881527232, "grad_norm": 0.27857697010040283, "learning_rate": 9.729504965526053e-06, "loss": 0.5412, "step": 694 }, { "epoch": 0.5853453116226839, "grad_norm": 0.34779441356658936, "learning_rate": 9.727912013844933e-06, "loss": 0.5662, "step": 695 }, { "epoch": 0.5861875350926445, "grad_norm": 0.26133742928504944, "learning_rate": 9.72631451663849e-06, "loss": 0.5334, "step": 696 }, { "epoch": 0.5870297585626053, "grad_norm": 0.28489023447036743, "learning_rate": 9.724712475442597e-06, "loss": 0.5335, "step": 697 }, { "epoch": 0.587871982032566, "grad_norm": 0.28201156854629517, "learning_rate": 9.72310589179749e-06, "loss": 0.5737, "step": 698 }, { "epoch": 0.5887142055025266, "grad_norm": 0.2620236277580261, "learning_rate": 9.721494767247779e-06, "loss": 0.5262, "step": 699 }, { "epoch": 0.5895564289724874, "grad_norm": 0.24238553643226624, "learning_rate": 9.719879103342438e-06, "loss": 0.5089, "step": 700 }, { "epoch": 0.590398652442448, "grad_norm": 0.2871243357658386, "learning_rate": 9.718258901634802e-06, "loss": 0.5283, "step": 701 }, { "epoch": 0.5912408759124088, "grad_norm": 0.27586671710014343, "learning_rate": 9.71663416368257e-06, "loss": 0.5316, "step": 702 }, { "epoch": 0.5920830993823695, "grad_norm": 0.24755996465682983, "learning_rate": 9.715004891047805e-06, "loss": 0.5048, "step": 703 }, { "epoch": 0.5929253228523301, "grad_norm": 0.2543037235736847, "learning_rate": 9.71337108529693e-06, "loss": 0.5027, "step": 704 }, { "epoch": 0.5937675463222909, "grad_norm": 0.2692468464374542, "learning_rate": 9.71173274800072e-06, "loss": 0.5424, "step": 705 }, { "epoch": 0.5946097697922516, "grad_norm": 0.28084051609039307, "learning_rate": 9.71008988073431e-06, "loss": 0.5384, "step": 706 }, { "epoch": 0.5954519932622122, "grad_norm": 0.2592713236808777, "learning_rate": 9.708442485077197e-06, "loss": 0.5454, "step": 707 }, { "epoch": 0.596294216732173, "grad_norm": 0.26486143469810486, "learning_rate": 9.70679056261322e-06, "loss": 0.5208, "step": 708 }, { "epoch": 0.5971364402021336, "grad_norm": 0.2715683579444885, "learning_rate": 9.70513411493058e-06, "loss": 0.5187, "step": 709 }, { "epoch": 0.5979786636720943, "grad_norm": 0.31581470370292664, "learning_rate": 9.70347314362182e-06, "loss": 0.5574, "step": 710 }, { "epoch": 0.5988208871420551, "grad_norm": 0.26435837149620056, "learning_rate": 9.70180765028384e-06, "loss": 0.5627, "step": 711 }, { "epoch": 0.5996631106120157, "grad_norm": 0.2888749837875366, "learning_rate": 9.700137636517884e-06, "loss": 0.5553, "step": 712 }, { "epoch": 0.6005053340819764, "grad_norm": 0.26229944825172424, "learning_rate": 9.698463103929542e-06, "loss": 0.4944, "step": 713 }, { "epoch": 0.6013475575519371, "grad_norm": 0.2774117588996887, "learning_rate": 9.696784054128749e-06, "loss": 0.5361, "step": 714 }, { "epoch": 0.6021897810218978, "grad_norm": 0.2632806599140167, "learning_rate": 9.695100488729784e-06, "loss": 0.5615, "step": 715 }, { "epoch": 0.6030320044918585, "grad_norm": 0.2651554346084595, "learning_rate": 9.693412409351264e-06, "loss": 0.5624, "step": 716 }, { "epoch": 0.6038742279618192, "grad_norm": 0.29611605405807495, "learning_rate": 9.691719817616148e-06, "loss": 0.542, "step": 717 }, { "epoch": 0.6047164514317799, "grad_norm": 0.27711188793182373, "learning_rate": 9.690022715151734e-06, "loss": 0.5561, "step": 718 }, { "epoch": 0.6055586749017406, "grad_norm": 0.25447237491607666, "learning_rate": 9.688321103589659e-06, "loss": 0.5249, "step": 719 }, { "epoch": 0.6064008983717013, "grad_norm": 0.29457733035087585, "learning_rate": 9.686614984565888e-06, "loss": 0.4951, "step": 720 }, { "epoch": 0.607243121841662, "grad_norm": 0.28455182909965515, "learning_rate": 9.684904359720724e-06, "loss": 0.5446, "step": 721 }, { "epoch": 0.6080853453116227, "grad_norm": 0.2887328565120697, "learning_rate": 9.683189230698804e-06, "loss": 0.5743, "step": 722 }, { "epoch": 0.6089275687815834, "grad_norm": 0.2567411959171295, "learning_rate": 9.681469599149093e-06, "loss": 0.5335, "step": 723 }, { "epoch": 0.609769792251544, "grad_norm": 0.250745564699173, "learning_rate": 9.679745466724884e-06, "loss": 0.5098, "step": 724 }, { "epoch": 0.6106120157215048, "grad_norm": 0.25986871123313904, "learning_rate": 9.678016835083798e-06, "loss": 0.5485, "step": 725 }, { "epoch": 0.6114542391914655, "grad_norm": 0.2616027295589447, "learning_rate": 9.676283705887783e-06, "loss": 0.4951, "step": 726 }, { "epoch": 0.6122964626614261, "grad_norm": 0.26990678906440735, "learning_rate": 9.674546080803109e-06, "loss": 0.5246, "step": 727 }, { "epoch": 0.6131386861313869, "grad_norm": 0.27456650137901306, "learning_rate": 9.67280396150037e-06, "loss": 0.5571, "step": 728 }, { "epoch": 0.6139809096013475, "grad_norm": 0.2868647277355194, "learning_rate": 9.671057349654481e-06, "loss": 0.5372, "step": 729 }, { "epoch": 0.6148231330713082, "grad_norm": 0.2722848951816559, "learning_rate": 9.669306246944674e-06, "loss": 0.5284, "step": 730 }, { "epoch": 0.615665356541269, "grad_norm": 0.2949526309967041, "learning_rate": 9.6675506550545e-06, "loss": 0.5591, "step": 731 }, { "epoch": 0.6165075800112296, "grad_norm": 0.2903030216693878, "learning_rate": 9.66579057567183e-06, "loss": 0.546, "step": 732 }, { "epoch": 0.6173498034811904, "grad_norm": 0.35168275237083435, "learning_rate": 9.66402601048884e-06, "loss": 0.5243, "step": 733 }, { "epoch": 0.618192026951151, "grad_norm": 0.29197341203689575, "learning_rate": 9.662256961202028e-06, "loss": 0.5273, "step": 734 }, { "epoch": 0.6190342504211117, "grad_norm": 0.32244163751602173, "learning_rate": 9.660483429512198e-06, "loss": 0.5357, "step": 735 }, { "epoch": 0.6198764738910725, "grad_norm": 0.28133445978164673, "learning_rate": 9.658705417124466e-06, "loss": 0.5185, "step": 736 }, { "epoch": 0.6207186973610331, "grad_norm": 0.31020379066467285, "learning_rate": 9.656922925748254e-06, "loss": 0.5352, "step": 737 }, { "epoch": 0.6215609208309938, "grad_norm": 0.2960975468158722, "learning_rate": 9.65513595709729e-06, "loss": 0.5272, "step": 738 }, { "epoch": 0.6224031443009546, "grad_norm": 0.29433655738830566, "learning_rate": 9.653344512889608e-06, "loss": 0.5584, "step": 739 }, { "epoch": 0.6232453677709152, "grad_norm": 0.25807225704193115, "learning_rate": 9.651548594847546e-06, "loss": 0.5412, "step": 740 }, { "epoch": 0.6240875912408759, "grad_norm": 0.29804277420043945, "learning_rate": 9.649748204697741e-06, "loss": 0.532, "step": 741 }, { "epoch": 0.6249298147108366, "grad_norm": 0.26192671060562134, "learning_rate": 9.647943344171129e-06, "loss": 0.5216, "step": 742 }, { "epoch": 0.6257720381807973, "grad_norm": 0.27286529541015625, "learning_rate": 9.646134015002946e-06, "loss": 0.5366, "step": 743 }, { "epoch": 0.626614261650758, "grad_norm": 0.28849753737449646, "learning_rate": 9.644320218932723e-06, "loss": 0.55, "step": 744 }, { "epoch": 0.6274564851207187, "grad_norm": 0.24898023903369904, "learning_rate": 9.642501957704287e-06, "loss": 0.5027, "step": 745 }, { "epoch": 0.6282987085906794, "grad_norm": 0.2840454578399658, "learning_rate": 9.640679233065755e-06, "loss": 0.5325, "step": 746 }, { "epoch": 0.62914093206064, "grad_norm": 0.2941761016845703, "learning_rate": 9.63885204676954e-06, "loss": 0.5075, "step": 747 }, { "epoch": 0.6299831555306008, "grad_norm": 0.2872334122657776, "learning_rate": 9.637020400572339e-06, "loss": 0.5628, "step": 748 }, { "epoch": 0.6308253790005615, "grad_norm": 0.3049048185348511, "learning_rate": 9.63518429623514e-06, "loss": 0.5708, "step": 749 }, { "epoch": 0.6316676024705222, "grad_norm": 0.27694258093833923, "learning_rate": 9.63334373552322e-06, "loss": 0.5426, "step": 750 }, { "epoch": 0.6325098259404829, "grad_norm": 0.27333056926727295, "learning_rate": 9.631498720206132e-06, "loss": 0.5491, "step": 751 }, { "epoch": 0.6333520494104435, "grad_norm": 0.2796083390712738, "learning_rate": 9.62964925205772e-06, "loss": 0.5144, "step": 752 }, { "epoch": 0.6341942728804043, "grad_norm": 0.32730987668037415, "learning_rate": 9.627795332856107e-06, "loss": 0.5391, "step": 753 }, { "epoch": 0.635036496350365, "grad_norm": 0.2712855935096741, "learning_rate": 9.625936964383691e-06, "loss": 0.5193, "step": 754 }, { "epoch": 0.6358787198203256, "grad_norm": 0.3123883605003357, "learning_rate": 9.624074148427154e-06, "loss": 0.5368, "step": 755 }, { "epoch": 0.6367209432902864, "grad_norm": 0.3086954653263092, "learning_rate": 9.622206886777448e-06, "loss": 0.516, "step": 756 }, { "epoch": 0.637563166760247, "grad_norm": 0.2965905964374542, "learning_rate": 9.620335181229805e-06, "loss": 0.5309, "step": 757 }, { "epoch": 0.6384053902302077, "grad_norm": 0.28992176055908203, "learning_rate": 9.618459033583725e-06, "loss": 0.5452, "step": 758 }, { "epoch": 0.6392476137001685, "grad_norm": 0.2970007359981537, "learning_rate": 9.616578445642982e-06, "loss": 0.5149, "step": 759 }, { "epoch": 0.6400898371701291, "grad_norm": 0.3202485740184784, "learning_rate": 9.614693419215613e-06, "loss": 0.5464, "step": 760 }, { "epoch": 0.6409320606400898, "grad_norm": 0.2665902376174927, "learning_rate": 9.612803956113932e-06, "loss": 0.4741, "step": 761 }, { "epoch": 0.6417742841100506, "grad_norm": 0.26840388774871826, "learning_rate": 9.61091005815451e-06, "loss": 0.5365, "step": 762 }, { "epoch": 0.6426165075800112, "grad_norm": 0.3106727600097656, "learning_rate": 9.609011727158184e-06, "loss": 0.5471, "step": 763 }, { "epoch": 0.643458731049972, "grad_norm": 0.2658637762069702, "learning_rate": 9.607108964950056e-06, "loss": 0.4799, "step": 764 }, { "epoch": 0.6443009545199326, "grad_norm": 0.30031299591064453, "learning_rate": 9.605201773359485e-06, "loss": 0.5272, "step": 765 }, { "epoch": 0.6451431779898933, "grad_norm": 0.2806590497493744, "learning_rate": 9.603290154220091e-06, "loss": 0.5285, "step": 766 }, { "epoch": 0.6459854014598541, "grad_norm": 0.28272736072540283, "learning_rate": 9.601374109369746e-06, "loss": 0.5315, "step": 767 }, { "epoch": 0.6468276249298147, "grad_norm": 0.2869163155555725, "learning_rate": 9.599453640650585e-06, "loss": 0.5362, "step": 768 }, { "epoch": 0.6476698483997754, "grad_norm": 0.2983999252319336, "learning_rate": 9.59752874990899e-06, "loss": 0.5333, "step": 769 }, { "epoch": 0.6485120718697361, "grad_norm": 0.2709924876689911, "learning_rate": 9.595599438995593e-06, "loss": 0.5507, "step": 770 }, { "epoch": 0.6493542953396968, "grad_norm": 0.26626572012901306, "learning_rate": 9.59366570976528e-06, "loss": 0.5443, "step": 771 }, { "epoch": 0.6501965188096575, "grad_norm": 0.2706218659877777, "learning_rate": 9.591727564077189e-06, "loss": 0.5248, "step": 772 }, { "epoch": 0.6510387422796182, "grad_norm": 0.28881949186325073, "learning_rate": 9.589785003794692e-06, "loss": 0.5392, "step": 773 }, { "epoch": 0.6518809657495789, "grad_norm": 0.2802217900753021, "learning_rate": 9.587838030785413e-06, "loss": 0.5243, "step": 774 }, { "epoch": 0.6527231892195395, "grad_norm": 0.2461254596710205, "learning_rate": 9.585886646921221e-06, "loss": 0.511, "step": 775 }, { "epoch": 0.6535654126895003, "grad_norm": 0.2964099049568176, "learning_rate": 9.583930854078219e-06, "loss": 0.5717, "step": 776 }, { "epoch": 0.654407636159461, "grad_norm": 0.2655602693557739, "learning_rate": 9.581970654136752e-06, "loss": 0.5224, "step": 777 }, { "epoch": 0.6552498596294217, "grad_norm": 0.27261045575141907, "learning_rate": 9.580006048981403e-06, "loss": 0.5112, "step": 778 }, { "epoch": 0.6560920830993824, "grad_norm": 0.25574275851249695, "learning_rate": 9.578037040500992e-06, "loss": 0.533, "step": 779 }, { "epoch": 0.656934306569343, "grad_norm": 0.27810147404670715, "learning_rate": 9.576063630588563e-06, "loss": 0.4978, "step": 780 }, { "epoch": 0.6577765300393038, "grad_norm": 0.2598312795162201, "learning_rate": 9.574085821141406e-06, "loss": 0.5317, "step": 781 }, { "epoch": 0.6586187535092645, "grad_norm": 0.29050466418266296, "learning_rate": 9.572103614061029e-06, "loss": 0.5076, "step": 782 }, { "epoch": 0.6594609769792251, "grad_norm": 0.2905493974685669, "learning_rate": 9.570117011253173e-06, "loss": 0.5403, "step": 783 }, { "epoch": 0.6603032004491859, "grad_norm": 0.28868380188941956, "learning_rate": 9.568126014627805e-06, "loss": 0.551, "step": 784 }, { "epoch": 0.6611454239191465, "grad_norm": 0.27608051896095276, "learning_rate": 9.566130626099118e-06, "loss": 0.5311, "step": 785 }, { "epoch": 0.6619876473891072, "grad_norm": 0.2693360447883606, "learning_rate": 9.56413084758552e-06, "loss": 0.5244, "step": 786 }, { "epoch": 0.662829870859068, "grad_norm": 0.27516603469848633, "learning_rate": 9.562126681009649e-06, "loss": 0.5046, "step": 787 }, { "epoch": 0.6636720943290286, "grad_norm": 0.28835856914520264, "learning_rate": 9.560118128298355e-06, "loss": 0.5687, "step": 788 }, { "epoch": 0.6645143177989893, "grad_norm": 0.27149343490600586, "learning_rate": 9.55810519138271e-06, "loss": 0.5486, "step": 789 }, { "epoch": 0.66535654126895, "grad_norm": 0.29005834460258484, "learning_rate": 9.556087872197997e-06, "loss": 0.4995, "step": 790 }, { "epoch": 0.6661987647389107, "grad_norm": 0.3182181715965271, "learning_rate": 9.554066172683715e-06, "loss": 0.5682, "step": 791 }, { "epoch": 0.6670409882088714, "grad_norm": 0.24996811151504517, "learning_rate": 9.552040094783575e-06, "loss": 0.5553, "step": 792 }, { "epoch": 0.6678832116788321, "grad_norm": 0.2604743540287018, "learning_rate": 9.550009640445492e-06, "loss": 0.5106, "step": 793 }, { "epoch": 0.6687254351487928, "grad_norm": 0.31558993458747864, "learning_rate": 9.547974811621594e-06, "loss": 0.536, "step": 794 }, { "epoch": 0.6695676586187536, "grad_norm": 0.26184195280075073, "learning_rate": 9.545935610268213e-06, "loss": 0.5223, "step": 795 }, { "epoch": 0.6704098820887142, "grad_norm": 0.28619760274887085, "learning_rate": 9.543892038345885e-06, "loss": 0.5257, "step": 796 }, { "epoch": 0.6712521055586749, "grad_norm": 0.307253897190094, "learning_rate": 9.541844097819347e-06, "loss": 0.5354, "step": 797 }, { "epoch": 0.6720943290286356, "grad_norm": 0.29323315620422363, "learning_rate": 9.53979179065754e-06, "loss": 0.5486, "step": 798 }, { "epoch": 0.6729365524985963, "grad_norm": 0.2677949070930481, "learning_rate": 9.537735118833595e-06, "loss": 0.5302, "step": 799 }, { "epoch": 0.673778775968557, "grad_norm": 0.2691217064857483, "learning_rate": 9.53567408432485e-06, "loss": 0.5026, "step": 800 }, { "epoch": 0.6746209994385177, "grad_norm": 0.30234163999557495, "learning_rate": 9.533608689112827e-06, "loss": 0.5396, "step": 801 }, { "epoch": 0.6754632229084784, "grad_norm": 0.28876832127571106, "learning_rate": 9.531538935183252e-06, "loss": 0.5665, "step": 802 }, { "epoch": 0.676305446378439, "grad_norm": 0.2805941700935364, "learning_rate": 9.529464824526027e-06, "loss": 0.5471, "step": 803 }, { "epoch": 0.6771476698483998, "grad_norm": 0.33430808782577515, "learning_rate": 9.527386359135254e-06, "loss": 0.5515, "step": 804 }, { "epoch": 0.6779898933183605, "grad_norm": 0.2605018615722656, "learning_rate": 9.525303541009218e-06, "loss": 0.5179, "step": 805 }, { "epoch": 0.6788321167883211, "grad_norm": 0.2856140434741974, "learning_rate": 9.523216372150393e-06, "loss": 0.5727, "step": 806 }, { "epoch": 0.6796743402582819, "grad_norm": 0.2726636230945587, "learning_rate": 9.521124854565425e-06, "loss": 0.5624, "step": 807 }, { "epoch": 0.6805165637282425, "grad_norm": 0.30046346783638, "learning_rate": 9.519028990265153e-06, "loss": 0.5643, "step": 808 }, { "epoch": 0.6813587871982033, "grad_norm": 0.2833278179168701, "learning_rate": 9.516928781264588e-06, "loss": 0.5315, "step": 809 }, { "epoch": 0.682201010668164, "grad_norm": 0.2701883316040039, "learning_rate": 9.514824229582922e-06, "loss": 0.5365, "step": 810 }, { "epoch": 0.6830432341381246, "grad_norm": 0.26827722787857056, "learning_rate": 9.512715337243517e-06, "loss": 0.5336, "step": 811 }, { "epoch": 0.6838854576080854, "grad_norm": 0.2981036901473999, "learning_rate": 9.510602106273914e-06, "loss": 0.5224, "step": 812 }, { "epoch": 0.684727681078046, "grad_norm": 0.2652583122253418, "learning_rate": 9.508484538705823e-06, "loss": 0.5368, "step": 813 }, { "epoch": 0.6855699045480067, "grad_norm": 0.27172061800956726, "learning_rate": 9.506362636575122e-06, "loss": 0.552, "step": 814 }, { "epoch": 0.6864121280179675, "grad_norm": 0.3000926971435547, "learning_rate": 9.504236401921856e-06, "loss": 0.5537, "step": 815 }, { "epoch": 0.6872543514879281, "grad_norm": 0.2769169807434082, "learning_rate": 9.50210583679024e-06, "loss": 0.5587, "step": 816 }, { "epoch": 0.6880965749578888, "grad_norm": 0.26903122663497925, "learning_rate": 9.499970943228646e-06, "loss": 0.5169, "step": 817 }, { "epoch": 0.6889387984278496, "grad_norm": 0.2675398588180542, "learning_rate": 9.497831723289615e-06, "loss": 0.5356, "step": 818 }, { "epoch": 0.6897810218978102, "grad_norm": 0.2599860727787018, "learning_rate": 9.495688179029838e-06, "loss": 0.505, "step": 819 }, { "epoch": 0.6906232453677709, "grad_norm": 0.3098326325416565, "learning_rate": 9.493540312510173e-06, "loss": 0.5521, "step": 820 }, { "epoch": 0.6914654688377316, "grad_norm": 0.25566792488098145, "learning_rate": 9.491388125795623e-06, "loss": 0.5582, "step": 821 }, { "epoch": 0.6923076923076923, "grad_norm": 0.2803921699523926, "learning_rate": 9.48923162095536e-06, "loss": 0.5491, "step": 822 }, { "epoch": 0.6931499157776531, "grad_norm": 0.2700415551662445, "learning_rate": 9.487070800062689e-06, "loss": 0.545, "step": 823 }, { "epoch": 0.6939921392476137, "grad_norm": 0.3185098469257355, "learning_rate": 9.48490566519508e-06, "loss": 0.5686, "step": 824 }, { "epoch": 0.6948343627175744, "grad_norm": 0.26263660192489624, "learning_rate": 9.482736218434144e-06, "loss": 0.5204, "step": 825 }, { "epoch": 0.6956765861875351, "grad_norm": 0.2548392713069916, "learning_rate": 9.480562461865634e-06, "loss": 0.4815, "step": 826 }, { "epoch": 0.6965188096574958, "grad_norm": 0.2675054669380188, "learning_rate": 9.478384397579452e-06, "loss": 0.5279, "step": 827 }, { "epoch": 0.6973610331274565, "grad_norm": 0.2857420742511749, "learning_rate": 9.476202027669644e-06, "loss": 0.5336, "step": 828 }, { "epoch": 0.6982032565974172, "grad_norm": 0.2935926020145416, "learning_rate": 9.474015354234385e-06, "loss": 0.5422, "step": 829 }, { "epoch": 0.6990454800673779, "grad_norm": 0.2799871861934662, "learning_rate": 9.471824379375998e-06, "loss": 0.5436, "step": 830 }, { "epoch": 0.6998877035373385, "grad_norm": 0.2527009844779968, "learning_rate": 9.469629105200937e-06, "loss": 0.4977, "step": 831 }, { "epoch": 0.7007299270072993, "grad_norm": 0.3244398236274719, "learning_rate": 9.46742953381979e-06, "loss": 0.5114, "step": 832 }, { "epoch": 0.70157215047726, "grad_norm": 0.2824684679508209, "learning_rate": 9.465225667347275e-06, "loss": 0.5278, "step": 833 }, { "epoch": 0.7024143739472206, "grad_norm": 0.262867271900177, "learning_rate": 9.463017507902245e-06, "loss": 0.5304, "step": 834 }, { "epoch": 0.7032565974171814, "grad_norm": 0.2689286172389984, "learning_rate": 9.460805057607671e-06, "loss": 0.5613, "step": 835 }, { "epoch": 0.704098820887142, "grad_norm": 0.26899948716163635, "learning_rate": 9.458588318590659e-06, "loss": 0.4859, "step": 836 }, { "epoch": 0.7049410443571027, "grad_norm": 0.26895368099212646, "learning_rate": 9.45636729298243e-06, "loss": 0.5077, "step": 837 }, { "epoch": 0.7057832678270635, "grad_norm": 0.25743672251701355, "learning_rate": 9.45414198291833e-06, "loss": 0.5245, "step": 838 }, { "epoch": 0.7066254912970241, "grad_norm": 0.25638890266418457, "learning_rate": 9.451912390537828e-06, "loss": 0.5285, "step": 839 }, { "epoch": 0.7074677147669849, "grad_norm": 0.2825080156326294, "learning_rate": 9.449678517984503e-06, "loss": 0.5171, "step": 840 }, { "epoch": 0.7083099382369455, "grad_norm": 0.24943333864212036, "learning_rate": 9.447440367406053e-06, "loss": 0.5085, "step": 841 }, { "epoch": 0.7091521617069062, "grad_norm": 0.2641408443450928, "learning_rate": 9.445197940954292e-06, "loss": 0.5345, "step": 842 }, { "epoch": 0.709994385176867, "grad_norm": 0.27245619893074036, "learning_rate": 9.442951240785135e-06, "loss": 0.5687, "step": 843 }, { "epoch": 0.7108366086468276, "grad_norm": 0.2528143525123596, "learning_rate": 9.440700269058617e-06, "loss": 0.5169, "step": 844 }, { "epoch": 0.7116788321167883, "grad_norm": 0.30088067054748535, "learning_rate": 9.438445027938873e-06, "loss": 0.5527, "step": 845 }, { "epoch": 0.712521055586749, "grad_norm": 0.30703431367874146, "learning_rate": 9.436185519594145e-06, "loss": 0.5243, "step": 846 }, { "epoch": 0.7133632790567097, "grad_norm": 0.2805617153644562, "learning_rate": 9.433921746196777e-06, "loss": 0.5453, "step": 847 }, { "epoch": 0.7142055025266704, "grad_norm": 0.32050612568855286, "learning_rate": 9.431653709923214e-06, "loss": 0.573, "step": 848 }, { "epoch": 0.7150477259966311, "grad_norm": 0.29783114790916443, "learning_rate": 9.429381412954e-06, "loss": 0.5362, "step": 849 }, { "epoch": 0.7158899494665918, "grad_norm": 0.281932532787323, "learning_rate": 9.427104857473773e-06, "loss": 0.5127, "step": 850 }, { "epoch": 0.7167321729365524, "grad_norm": 0.2548541724681854, "learning_rate": 9.424824045671267e-06, "loss": 0.5722, "step": 851 }, { "epoch": 0.7175743964065132, "grad_norm": 0.30742502212524414, "learning_rate": 9.422538979739307e-06, "loss": 0.5553, "step": 852 }, { "epoch": 0.7184166198764739, "grad_norm": 0.28410792350769043, "learning_rate": 9.420249661874812e-06, "loss": 0.5513, "step": 853 }, { "epoch": 0.7192588433464346, "grad_norm": 0.26795634627342224, "learning_rate": 9.417956094278784e-06, "loss": 0.4912, "step": 854 }, { "epoch": 0.7201010668163953, "grad_norm": 0.25963959097862244, "learning_rate": 9.415658279156312e-06, "loss": 0.5399, "step": 855 }, { "epoch": 0.720943290286356, "grad_norm": 0.2586013972759247, "learning_rate": 9.41335621871657e-06, "loss": 0.5419, "step": 856 }, { "epoch": 0.7217855137563167, "grad_norm": 0.26465001702308655, "learning_rate": 9.41104991517281e-06, "loss": 0.5056, "step": 857 }, { "epoch": 0.7226277372262774, "grad_norm": 0.26760923862457275, "learning_rate": 9.408739370742372e-06, "loss": 0.5142, "step": 858 }, { "epoch": 0.723469960696238, "grad_norm": 0.2878692150115967, "learning_rate": 9.406424587646664e-06, "loss": 0.5399, "step": 859 }, { "epoch": 0.7243121841661988, "grad_norm": 0.2679644525051117, "learning_rate": 9.404105568111173e-06, "loss": 0.528, "step": 860 }, { "epoch": 0.7251544076361595, "grad_norm": 0.28021249175071716, "learning_rate": 9.401782314365458e-06, "loss": 0.5165, "step": 861 }, { "epoch": 0.7259966311061201, "grad_norm": 0.2511672079563141, "learning_rate": 9.39945482864315e-06, "loss": 0.5362, "step": 862 }, { "epoch": 0.7268388545760809, "grad_norm": 0.26389527320861816, "learning_rate": 9.39712311318195e-06, "loss": 0.5261, "step": 863 }, { "epoch": 0.7276810780460415, "grad_norm": 0.29182079434394836, "learning_rate": 9.39478717022362e-06, "loss": 0.5262, "step": 864 }, { "epoch": 0.7285233015160022, "grad_norm": 0.26455703377723694, "learning_rate": 9.392447002013996e-06, "loss": 0.5082, "step": 865 }, { "epoch": 0.729365524985963, "grad_norm": 0.2523685395717621, "learning_rate": 9.390102610802965e-06, "loss": 0.5193, "step": 866 }, { "epoch": 0.7302077484559236, "grad_norm": 0.2558920681476593, "learning_rate": 9.387753998844482e-06, "loss": 0.4954, "step": 867 }, { "epoch": 0.7310499719258844, "grad_norm": 0.2697160542011261, "learning_rate": 9.385401168396558e-06, "loss": 0.508, "step": 868 }, { "epoch": 0.731892195395845, "grad_norm": 0.26095300912857056, "learning_rate": 9.383044121721257e-06, "loss": 0.4954, "step": 869 }, { "epoch": 0.7327344188658057, "grad_norm": 0.2691548764705658, "learning_rate": 9.380682861084703e-06, "loss": 0.5464, "step": 870 }, { "epoch": 0.7335766423357665, "grad_norm": 0.2676098942756653, "learning_rate": 9.378317388757062e-06, "loss": 0.5336, "step": 871 }, { "epoch": 0.7344188658057271, "grad_norm": 0.27561846375465393, "learning_rate": 9.375947707012558e-06, "loss": 0.4851, "step": 872 }, { "epoch": 0.7352610892756878, "grad_norm": 0.307340532541275, "learning_rate": 9.37357381812946e-06, "loss": 0.5246, "step": 873 }, { "epoch": 0.7361033127456486, "grad_norm": 0.2767391502857208, "learning_rate": 9.371195724390075e-06, "loss": 0.4955, "step": 874 }, { "epoch": 0.7369455362156092, "grad_norm": 0.2774464786052704, "learning_rate": 9.368813428080763e-06, "loss": 0.5332, "step": 875 }, { "epoch": 0.7377877596855699, "grad_norm": 0.25979113578796387, "learning_rate": 9.366426931491917e-06, "loss": 0.5108, "step": 876 }, { "epoch": 0.7386299831555306, "grad_norm": 0.2568913698196411, "learning_rate": 9.364036236917972e-06, "loss": 0.5412, "step": 877 }, { "epoch": 0.7394722066254913, "grad_norm": 0.2612326443195343, "learning_rate": 9.361641346657396e-06, "loss": 0.5247, "step": 878 }, { "epoch": 0.740314430095452, "grad_norm": 0.29878029227256775, "learning_rate": 9.359242263012693e-06, "loss": 0.5397, "step": 879 }, { "epoch": 0.7411566535654127, "grad_norm": 0.25059062242507935, "learning_rate": 9.356838988290401e-06, "loss": 0.521, "step": 880 }, { "epoch": 0.7419988770353734, "grad_norm": 0.260985791683197, "learning_rate": 9.354431524801082e-06, "loss": 0.5468, "step": 881 }, { "epoch": 0.742841100505334, "grad_norm": 0.266897588968277, "learning_rate": 9.352019874859326e-06, "loss": 0.5213, "step": 882 }, { "epoch": 0.7436833239752948, "grad_norm": 0.2500898241996765, "learning_rate": 9.349604040783754e-06, "loss": 0.5129, "step": 883 }, { "epoch": 0.7445255474452555, "grad_norm": 0.24773405492305756, "learning_rate": 9.347184024897003e-06, "loss": 0.534, "step": 884 }, { "epoch": 0.7453677709152162, "grad_norm": 0.3046588897705078, "learning_rate": 9.344759829525734e-06, "loss": 0.562, "step": 885 }, { "epoch": 0.7462099943851769, "grad_norm": 0.2873043715953827, "learning_rate": 9.342331457000621e-06, "loss": 0.5453, "step": 886 }, { "epoch": 0.7470522178551375, "grad_norm": 0.27233925461769104, "learning_rate": 9.339898909656364e-06, "loss": 0.5363, "step": 887 }, { "epoch": 0.7478944413250983, "grad_norm": 0.278315931558609, "learning_rate": 9.33746218983167e-06, "loss": 0.5435, "step": 888 }, { "epoch": 0.748736664795059, "grad_norm": 0.2837781310081482, "learning_rate": 9.335021299869256e-06, "loss": 0.5755, "step": 889 }, { "epoch": 0.7495788882650196, "grad_norm": 0.26151353120803833, "learning_rate": 9.332576242115852e-06, "loss": 0.5016, "step": 890 }, { "epoch": 0.7504211117349804, "grad_norm": 0.25589028000831604, "learning_rate": 9.330127018922195e-06, "loss": 0.5076, "step": 891 }, { "epoch": 0.751263335204941, "grad_norm": 0.27819058299064636, "learning_rate": 9.327673632643021e-06, "loss": 0.541, "step": 892 }, { "epoch": 0.7521055586749017, "grad_norm": 0.25694236159324646, "learning_rate": 9.32521608563708e-06, "loss": 0.5196, "step": 893 }, { "epoch": 0.7529477821448625, "grad_norm": 0.2595864236354828, "learning_rate": 9.32275438026711e-06, "loss": 0.5122, "step": 894 }, { "epoch": 0.7537900056148231, "grad_norm": 0.27614614367485046, "learning_rate": 9.320288518899853e-06, "loss": 0.5195, "step": 895 }, { "epoch": 0.7546322290847838, "grad_norm": 0.28381258249282837, "learning_rate": 9.317818503906046e-06, "loss": 0.5174, "step": 896 }, { "epoch": 0.7554744525547445, "grad_norm": 0.26316675543785095, "learning_rate": 9.315344337660422e-06, "loss": 0.5162, "step": 897 }, { "epoch": 0.7563166760247052, "grad_norm": 0.2511134147644043, "learning_rate": 9.312866022541697e-06, "loss": 0.5074, "step": 898 }, { "epoch": 0.757158899494666, "grad_norm": 0.29434195160865784, "learning_rate": 9.310383560932587e-06, "loss": 0.5104, "step": 899 }, { "epoch": 0.7580011229646266, "grad_norm": 0.2786406874656677, "learning_rate": 9.307896955219787e-06, "loss": 0.5043, "step": 900 }, { "epoch": 0.7588433464345873, "grad_norm": 0.30582770705223083, "learning_rate": 9.305406207793974e-06, "loss": 0.5614, "step": 901 }, { "epoch": 0.759685569904548, "grad_norm": 0.2651727497577667, "learning_rate": 9.302911321049818e-06, "loss": 0.5027, "step": 902 }, { "epoch": 0.7605277933745087, "grad_norm": 0.3081914186477661, "learning_rate": 9.300412297385954e-06, "loss": 0.521, "step": 903 }, { "epoch": 0.7613700168444694, "grad_norm": 0.344078928232193, "learning_rate": 9.297909139205005e-06, "loss": 0.5475, "step": 904 }, { "epoch": 0.7622122403144301, "grad_norm": 0.3318915367126465, "learning_rate": 9.295401848913569e-06, "loss": 0.5136, "step": 905 }, { "epoch": 0.7630544637843908, "grad_norm": 0.2659371495246887, "learning_rate": 9.29289042892221e-06, "loss": 0.521, "step": 906 }, { "epoch": 0.7638966872543514, "grad_norm": 0.28537461161613464, "learning_rate": 9.290374881645465e-06, "loss": 0.5342, "step": 907 }, { "epoch": 0.7647389107243122, "grad_norm": 0.27734076976776123, "learning_rate": 9.287855209501844e-06, "loss": 0.5445, "step": 908 }, { "epoch": 0.7655811341942729, "grad_norm": 0.3111424446105957, "learning_rate": 9.285331414913816e-06, "loss": 0.558, "step": 909 }, { "epoch": 0.7664233576642335, "grad_norm": 0.24878092110157013, "learning_rate": 9.282803500307818e-06, "loss": 0.4861, "step": 910 }, { "epoch": 0.7672655811341943, "grad_norm": 0.30805066227912903, "learning_rate": 9.280271468114243e-06, "loss": 0.5214, "step": 911 }, { "epoch": 0.768107804604155, "grad_norm": 0.2879858613014221, "learning_rate": 9.277735320767449e-06, "loss": 0.5432, "step": 912 }, { "epoch": 0.7689500280741156, "grad_norm": 0.2624782919883728, "learning_rate": 9.275195060705749e-06, "loss": 0.5042, "step": 913 }, { "epoch": 0.7697922515440764, "grad_norm": 0.3096674084663391, "learning_rate": 9.272650690371403e-06, "loss": 0.5178, "step": 914 }, { "epoch": 0.770634475014037, "grad_norm": 0.27192240953445435, "learning_rate": 9.270102212210632e-06, "loss": 0.5252, "step": 915 }, { "epoch": 0.7714766984839978, "grad_norm": 0.26907551288604736, "learning_rate": 9.267549628673603e-06, "loss": 0.5257, "step": 916 }, { "epoch": 0.7723189219539585, "grad_norm": 0.25610947608947754, "learning_rate": 9.264992942214427e-06, "loss": 0.543, "step": 917 }, { "epoch": 0.7731611454239191, "grad_norm": 0.4047147333621979, "learning_rate": 9.262432155291167e-06, "loss": 0.5012, "step": 918 }, { "epoch": 0.7740033688938799, "grad_norm": 0.31880781054496765, "learning_rate": 9.25986727036582e-06, "loss": 0.4946, "step": 919 }, { "epoch": 0.7748455923638405, "grad_norm": 0.3308691382408142, "learning_rate": 9.257298289904324e-06, "loss": 0.4979, "step": 920 }, { "epoch": 0.7756878158338012, "grad_norm": 0.26055172085762024, "learning_rate": 9.254725216376562e-06, "loss": 0.5148, "step": 921 }, { "epoch": 0.776530039303762, "grad_norm": 0.26787835359573364, "learning_rate": 9.252148052256343e-06, "loss": 0.5296, "step": 922 }, { "epoch": 0.7773722627737226, "grad_norm": 0.35683828592300415, "learning_rate": 9.249566800021417e-06, "loss": 0.5504, "step": 923 }, { "epoch": 0.7782144862436833, "grad_norm": 0.2828162610530853, "learning_rate": 9.246981462153456e-06, "loss": 0.5715, "step": 924 }, { "epoch": 0.779056709713644, "grad_norm": 0.320357084274292, "learning_rate": 9.244392041138068e-06, "loss": 0.4828, "step": 925 }, { "epoch": 0.7798989331836047, "grad_norm": 0.30529463291168213, "learning_rate": 9.24179853946478e-06, "loss": 0.5518, "step": 926 }, { "epoch": 0.7807411566535654, "grad_norm": 0.28834110498428345, "learning_rate": 9.239200959627048e-06, "loss": 0.5092, "step": 927 }, { "epoch": 0.7815833801235261, "grad_norm": 0.27639341354370117, "learning_rate": 9.236599304122246e-06, "loss": 0.4987, "step": 928 }, { "epoch": 0.7824256035934868, "grad_norm": 0.3466230630874634, "learning_rate": 9.233993575451663e-06, "loss": 0.5151, "step": 929 }, { "epoch": 0.7832678270634476, "grad_norm": 0.26943811774253845, "learning_rate": 9.231383776120512e-06, "loss": 0.5166, "step": 930 }, { "epoch": 0.7841100505334082, "grad_norm": 0.3739957809448242, "learning_rate": 9.228769908637912e-06, "loss": 0.5521, "step": 931 }, { "epoch": 0.7849522740033689, "grad_norm": 0.30110248923301697, "learning_rate": 9.226151975516897e-06, "loss": 0.5407, "step": 932 }, { "epoch": 0.7857944974733296, "grad_norm": 0.30617061257362366, "learning_rate": 9.223529979274411e-06, "loss": 0.5211, "step": 933 }, { "epoch": 0.7866367209432903, "grad_norm": 0.2747458517551422, "learning_rate": 9.220903922431302e-06, "loss": 0.5224, "step": 934 }, { "epoch": 0.787478944413251, "grad_norm": 0.2789233326911926, "learning_rate": 9.218273807512318e-06, "loss": 0.5528, "step": 935 }, { "epoch": 0.7883211678832117, "grad_norm": 0.3090609908103943, "learning_rate": 9.215639637046121e-06, "loss": 0.5264, "step": 936 }, { "epoch": 0.7891633913531724, "grad_norm": 0.2782890796661377, "learning_rate": 9.213001413565259e-06, "loss": 0.5372, "step": 937 }, { "epoch": 0.790005614823133, "grad_norm": 0.27642208337783813, "learning_rate": 9.210359139606183e-06, "loss": 0.5342, "step": 938 }, { "epoch": 0.7908478382930938, "grad_norm": 0.26705002784729004, "learning_rate": 9.207712817709237e-06, "loss": 0.525, "step": 939 }, { "epoch": 0.7916900617630545, "grad_norm": 0.26462623476982117, "learning_rate": 9.205062450418655e-06, "loss": 0.5022, "step": 940 }, { "epoch": 0.7925322852330151, "grad_norm": 0.27399033308029175, "learning_rate": 9.202408040282567e-06, "loss": 0.4885, "step": 941 }, { "epoch": 0.7933745087029759, "grad_norm": 0.29105493426322937, "learning_rate": 9.19974958985298e-06, "loss": 0.5342, "step": 942 }, { "epoch": 0.7942167321729365, "grad_norm": 0.27100351452827454, "learning_rate": 9.197087101685794e-06, "loss": 0.5183, "step": 943 }, { "epoch": 0.7950589556428973, "grad_norm": 0.2834257185459137, "learning_rate": 9.194420578340785e-06, "loss": 0.5233, "step": 944 }, { "epoch": 0.795901179112858, "grad_norm": 0.26956048607826233, "learning_rate": 9.191750022381613e-06, "loss": 0.5269, "step": 945 }, { "epoch": 0.7967434025828186, "grad_norm": 0.298949271440506, "learning_rate": 9.189075436375813e-06, "loss": 0.5609, "step": 946 }, { "epoch": 0.7975856260527794, "grad_norm": 0.2485303282737732, "learning_rate": 9.186396822894792e-06, "loss": 0.5217, "step": 947 }, { "epoch": 0.79842784952274, "grad_norm": 0.2531762421131134, "learning_rate": 9.183714184513832e-06, "loss": 0.4962, "step": 948 }, { "epoch": 0.7992700729927007, "grad_norm": 0.2855762541294098, "learning_rate": 9.181027523812088e-06, "loss": 0.5072, "step": 949 }, { "epoch": 0.8001122964626615, "grad_norm": 0.23555149137973785, "learning_rate": 9.178336843372576e-06, "loss": 0.5182, "step": 950 }, { "epoch": 0.8009545199326221, "grad_norm": 0.25049617886543274, "learning_rate": 9.175642145782179e-06, "loss": 0.516, "step": 951 }, { "epoch": 0.8017967434025828, "grad_norm": 0.2827242612838745, "learning_rate": 9.172943433631642e-06, "loss": 0.5199, "step": 952 }, { "epoch": 0.8026389668725435, "grad_norm": 0.3099197447299957, "learning_rate": 9.170240709515573e-06, "loss": 0.5272, "step": 953 }, { "epoch": 0.8034811903425042, "grad_norm": 0.28747087717056274, "learning_rate": 9.16753397603243e-06, "loss": 0.5223, "step": 954 }, { "epoch": 0.8043234138124649, "grad_norm": 0.28616851568222046, "learning_rate": 9.164823235784535e-06, "loss": 0.531, "step": 955 }, { "epoch": 0.8051656372824256, "grad_norm": 0.31229186058044434, "learning_rate": 9.162108491378051e-06, "loss": 0.5552, "step": 956 }, { "epoch": 0.8060078607523863, "grad_norm": 0.280046671628952, "learning_rate": 9.159389745423003e-06, "loss": 0.5319, "step": 957 }, { "epoch": 0.8068500842223469, "grad_norm": 0.2896442115306854, "learning_rate": 9.156667000533251e-06, "loss": 0.5212, "step": 958 }, { "epoch": 0.8076923076923077, "grad_norm": 0.29848065972328186, "learning_rate": 9.153940259326511e-06, "loss": 0.5488, "step": 959 }, { "epoch": 0.8085345311622684, "grad_norm": 0.281321257352829, "learning_rate": 9.151209524424333e-06, "loss": 0.4888, "step": 960 }, { "epoch": 0.8093767546322291, "grad_norm": 0.28328341245651245, "learning_rate": 9.14847479845211e-06, "loss": 0.5562, "step": 961 }, { "epoch": 0.8102189781021898, "grad_norm": 0.2625272870063782, "learning_rate": 9.145736084039073e-06, "loss": 0.525, "step": 962 }, { "epoch": 0.8110612015721504, "grad_norm": 0.2671937942504883, "learning_rate": 9.142993383818284e-06, "loss": 0.525, "step": 963 }, { "epoch": 0.8119034250421112, "grad_norm": 0.3134777843952179, "learning_rate": 9.14024670042664e-06, "loss": 0.5785, "step": 964 }, { "epoch": 0.8127456485120719, "grad_norm": 0.26303115487098694, "learning_rate": 9.137496036504868e-06, "loss": 0.495, "step": 965 }, { "epoch": 0.8135878719820325, "grad_norm": 0.2722974419593811, "learning_rate": 9.134741394697517e-06, "loss": 0.4981, "step": 966 }, { "epoch": 0.8144300954519933, "grad_norm": 0.29560592770576477, "learning_rate": 9.131982777652967e-06, "loss": 0.5275, "step": 967 }, { "epoch": 0.815272318921954, "grad_norm": 0.3064015209674835, "learning_rate": 9.129220188023419e-06, "loss": 0.5241, "step": 968 }, { "epoch": 0.8161145423919146, "grad_norm": 0.26863396167755127, "learning_rate": 9.126453628464889e-06, "loss": 0.5138, "step": 969 }, { "epoch": 0.8169567658618754, "grad_norm": 0.25996148586273193, "learning_rate": 9.12368310163721e-06, "loss": 0.5356, "step": 970 }, { "epoch": 0.817798989331836, "grad_norm": 0.3080447018146515, "learning_rate": 9.120908610204036e-06, "loss": 0.548, "step": 971 }, { "epoch": 0.8186412128017967, "grad_norm": 0.2954155206680298, "learning_rate": 9.118130156832823e-06, "loss": 0.5113, "step": 972 }, { "epoch": 0.8194834362717575, "grad_norm": 0.279940128326416, "learning_rate": 9.115347744194844e-06, "loss": 0.5331, "step": 973 }, { "epoch": 0.8203256597417181, "grad_norm": 0.29592153429985046, "learning_rate": 9.112561374965177e-06, "loss": 0.5477, "step": 974 }, { "epoch": 0.8211678832116789, "grad_norm": 0.3176156282424927, "learning_rate": 9.109771051822702e-06, "loss": 0.4833, "step": 975 }, { "epoch": 0.8220101066816395, "grad_norm": 0.2858186364173889, "learning_rate": 9.106976777450099e-06, "loss": 0.5359, "step": 976 }, { "epoch": 0.8228523301516002, "grad_norm": 0.3218308687210083, "learning_rate": 9.10417855453385e-06, "loss": 0.5216, "step": 977 }, { "epoch": 0.823694553621561, "grad_norm": 0.296644002199173, "learning_rate": 9.10137638576423e-06, "loss": 0.5385, "step": 978 }, { "epoch": 0.8245367770915216, "grad_norm": 0.28808072209358215, "learning_rate": 9.098570273835314e-06, "loss": 0.5225, "step": 979 }, { "epoch": 0.8253790005614823, "grad_norm": 0.27570444345474243, "learning_rate": 9.09576022144496e-06, "loss": 0.5132, "step": 980 }, { "epoch": 0.826221224031443, "grad_norm": 0.28410089015960693, "learning_rate": 9.09294623129482e-06, "loss": 0.4762, "step": 981 }, { "epoch": 0.8270634475014037, "grad_norm": 0.2990991175174713, "learning_rate": 9.090128306090329e-06, "loss": 0.5387, "step": 982 }, { "epoch": 0.8279056709713644, "grad_norm": 0.26986634731292725, "learning_rate": 9.087306448540707e-06, "loss": 0.5311, "step": 983 }, { "epoch": 0.8287478944413251, "grad_norm": 0.2602924406528473, "learning_rate": 9.084480661358954e-06, "loss": 0.5061, "step": 984 }, { "epoch": 0.8295901179112858, "grad_norm": 0.26557275652885437, "learning_rate": 9.081650947261847e-06, "loss": 0.4783, "step": 985 }, { "epoch": 0.8304323413812464, "grad_norm": 0.2597447335720062, "learning_rate": 9.07881730896994e-06, "loss": 0.5287, "step": 986 }, { "epoch": 0.8312745648512072, "grad_norm": 0.2529871463775635, "learning_rate": 9.07597974920756e-06, "loss": 0.5259, "step": 987 }, { "epoch": 0.8321167883211679, "grad_norm": 0.2312147617340088, "learning_rate": 9.073138270702804e-06, "loss": 0.505, "step": 988 }, { "epoch": 0.8329590117911286, "grad_norm": 0.23826858401298523, "learning_rate": 9.070292876187532e-06, "loss": 0.4911, "step": 989 }, { "epoch": 0.8338012352610893, "grad_norm": 0.2860755920410156, "learning_rate": 9.067443568397378e-06, "loss": 0.5465, "step": 990 }, { "epoch": 0.83464345873105, "grad_norm": 0.2611585259437561, "learning_rate": 9.06459035007173e-06, "loss": 0.5409, "step": 991 }, { "epoch": 0.8354856822010107, "grad_norm": 0.2624194324016571, "learning_rate": 9.061733223953738e-06, "loss": 0.5099, "step": 992 }, { "epoch": 0.8363279056709714, "grad_norm": 0.24360141158103943, "learning_rate": 9.058872192790314e-06, "loss": 0.51, "step": 993 }, { "epoch": 0.837170129140932, "grad_norm": 0.3062153458595276, "learning_rate": 9.056007259332115e-06, "loss": 0.5572, "step": 994 }, { "epoch": 0.8380123526108928, "grad_norm": 0.2593601942062378, "learning_rate": 9.053138426333562e-06, "loss": 0.546, "step": 995 }, { "epoch": 0.8388545760808535, "grad_norm": 0.28551265597343445, "learning_rate": 9.05026569655281e-06, "loss": 0.5277, "step": 996 }, { "epoch": 0.8396967995508141, "grad_norm": 0.2667967975139618, "learning_rate": 9.047389072751777e-06, "loss": 0.4973, "step": 997 }, { "epoch": 0.8405390230207749, "grad_norm": 0.2676384449005127, "learning_rate": 9.044508557696111e-06, "loss": 0.4897, "step": 998 }, { "epoch": 0.8413812464907355, "grad_norm": 0.2680940628051758, "learning_rate": 9.041624154155208e-06, "loss": 0.5366, "step": 999 }, { "epoch": 0.8422234699606962, "grad_norm": 0.30755850672721863, "learning_rate": 9.038735864902201e-06, "loss": 0.5092, "step": 1000 }, { "epoch": 0.843065693430657, "grad_norm": 0.29476872086524963, "learning_rate": 9.035843692713961e-06, "loss": 0.529, "step": 1001 }, { "epoch": 0.8439079169006176, "grad_norm": 0.2624370753765106, "learning_rate": 9.032947640371086e-06, "loss": 0.5353, "step": 1002 }, { "epoch": 0.8447501403705783, "grad_norm": 0.3220387101173401, "learning_rate": 9.030047710657912e-06, "loss": 0.5207, "step": 1003 }, { "epoch": 0.845592363840539, "grad_norm": 0.30318212509155273, "learning_rate": 9.027143906362499e-06, "loss": 0.5134, "step": 1004 }, { "epoch": 0.8464345873104997, "grad_norm": 0.26170846819877625, "learning_rate": 9.02423623027663e-06, "loss": 0.509, "step": 1005 }, { "epoch": 0.8472768107804605, "grad_norm": 0.2849772274494171, "learning_rate": 9.021324685195814e-06, "loss": 0.5239, "step": 1006 }, { "epoch": 0.8481190342504211, "grad_norm": 0.2705881893634796, "learning_rate": 9.018409273919279e-06, "loss": 0.5007, "step": 1007 }, { "epoch": 0.8489612577203818, "grad_norm": 0.592229962348938, "learning_rate": 9.01548999924997e-06, "loss": 0.5394, "step": 1008 }, { "epoch": 0.8498034811903425, "grad_norm": 0.27070802450180054, "learning_rate": 9.012566863994548e-06, "loss": 0.5307, "step": 1009 }, { "epoch": 0.8506457046603032, "grad_norm": 0.2830778956413269, "learning_rate": 9.00963987096338e-06, "loss": 0.5348, "step": 1010 }, { "epoch": 0.8514879281302639, "grad_norm": 0.2614271342754364, "learning_rate": 9.006709022970547e-06, "loss": 0.5431, "step": 1011 }, { "epoch": 0.8523301516002246, "grad_norm": 0.28097134828567505, "learning_rate": 9.003774322833835e-06, "loss": 0.522, "step": 1012 }, { "epoch": 0.8531723750701853, "grad_norm": 0.2825312316417694, "learning_rate": 9.000835773374733e-06, "loss": 0.5319, "step": 1013 }, { "epoch": 0.8540145985401459, "grad_norm": 0.2914729416370392, "learning_rate": 8.997893377418432e-06, "loss": 0.5336, "step": 1014 }, { "epoch": 0.8548568220101067, "grad_norm": 0.25748324394226074, "learning_rate": 8.99494713779382e-06, "loss": 0.4925, "step": 1015 }, { "epoch": 0.8556990454800674, "grad_norm": 0.30522391200065613, "learning_rate": 8.991997057333481e-06, "loss": 0.5118, "step": 1016 }, { "epoch": 0.856541268950028, "grad_norm": 0.2790256142616272, "learning_rate": 8.98904313887369e-06, "loss": 0.5319, "step": 1017 }, { "epoch": 0.8573834924199888, "grad_norm": 0.3284727931022644, "learning_rate": 8.986085385254417e-06, "loss": 0.5711, "step": 1018 }, { "epoch": 0.8582257158899494, "grad_norm": 0.26617902517318726, "learning_rate": 8.983123799319312e-06, "loss": 0.5342, "step": 1019 }, { "epoch": 0.8590679393599102, "grad_norm": 0.3000130355358124, "learning_rate": 8.980158383915714e-06, "loss": 0.5125, "step": 1020 }, { "epoch": 0.8599101628298709, "grad_norm": 0.3121154308319092, "learning_rate": 8.977189141894645e-06, "loss": 0.5674, "step": 1021 }, { "epoch": 0.8607523862998315, "grad_norm": 0.2778458595275879, "learning_rate": 8.9742160761108e-06, "loss": 0.4992, "step": 1022 }, { "epoch": 0.8615946097697923, "grad_norm": 0.31009209156036377, "learning_rate": 8.971239189422555e-06, "loss": 0.4924, "step": 1023 }, { "epoch": 0.862436833239753, "grad_norm": 0.3196420967578888, "learning_rate": 8.968258484691961e-06, "loss": 0.5374, "step": 1024 }, { "epoch": 0.8632790567097136, "grad_norm": 0.28517118096351624, "learning_rate": 8.965273964784735e-06, "loss": 0.5065, "step": 1025 }, { "epoch": 0.8641212801796744, "grad_norm": 0.30996036529541016, "learning_rate": 8.962285632570266e-06, "loss": 0.5136, "step": 1026 }, { "epoch": 0.864963503649635, "grad_norm": 0.3184329867362976, "learning_rate": 8.959293490921606e-06, "loss": 0.5173, "step": 1027 }, { "epoch": 0.8658057271195957, "grad_norm": 5.426473140716553, "learning_rate": 8.956297542715469e-06, "loss": 0.569, "step": 1028 }, { "epoch": 0.8666479505895565, "grad_norm": 0.3818562924861908, "learning_rate": 8.953297790832231e-06, "loss": 0.5237, "step": 1029 }, { "epoch": 0.8674901740595171, "grad_norm": 0.2824288606643677, "learning_rate": 8.950294238155924e-06, "loss": 0.5066, "step": 1030 }, { "epoch": 0.8683323975294778, "grad_norm": 0.2561205327510834, "learning_rate": 8.947286887574234e-06, "loss": 0.531, "step": 1031 }, { "epoch": 0.8691746209994385, "grad_norm": 0.3025212585926056, "learning_rate": 8.944275741978495e-06, "loss": 0.5254, "step": 1032 }, { "epoch": 0.8700168444693992, "grad_norm": 0.3255753219127655, "learning_rate": 8.941260804263697e-06, "loss": 0.5738, "step": 1033 }, { "epoch": 0.87085906793936, "grad_norm": 0.31651684641838074, "learning_rate": 8.938242077328469e-06, "loss": 0.5458, "step": 1034 }, { "epoch": 0.8717012914093206, "grad_norm": 0.30347517132759094, "learning_rate": 8.935219564075087e-06, "loss": 0.5071, "step": 1035 }, { "epoch": 0.8725435148792813, "grad_norm": 0.26859143376350403, "learning_rate": 8.932193267409465e-06, "loss": 0.5154, "step": 1036 }, { "epoch": 0.873385738349242, "grad_norm": 0.3361642062664032, "learning_rate": 8.929163190241157e-06, "loss": 0.5079, "step": 1037 }, { "epoch": 0.8742279618192027, "grad_norm": 0.27315592765808105, "learning_rate": 8.92612933548335e-06, "loss": 0.5024, "step": 1038 }, { "epoch": 0.8750701852891634, "grad_norm": 0.254569947719574, "learning_rate": 8.923091706052863e-06, "loss": 0.4782, "step": 1039 }, { "epoch": 0.8759124087591241, "grad_norm": 0.2732038199901581, "learning_rate": 8.920050304870142e-06, "loss": 0.5093, "step": 1040 }, { "epoch": 0.8767546322290848, "grad_norm": 0.2811090052127838, "learning_rate": 8.917005134859263e-06, "loss": 0.5061, "step": 1041 }, { "epoch": 0.8775968556990454, "grad_norm": 0.273803174495697, "learning_rate": 8.913956198947923e-06, "loss": 0.4981, "step": 1042 }, { "epoch": 0.8784390791690062, "grad_norm": 0.2916918396949768, "learning_rate": 8.910903500067443e-06, "loss": 0.5173, "step": 1043 }, { "epoch": 0.8792813026389669, "grad_norm": 0.2519601583480835, "learning_rate": 8.907847041152757e-06, "loss": 0.5143, "step": 1044 }, { "epoch": 0.8801235261089275, "grad_norm": 0.2867165207862854, "learning_rate": 8.904786825142416e-06, "loss": 0.4958, "step": 1045 }, { "epoch": 0.8809657495788883, "grad_norm": 0.32696467638015747, "learning_rate": 8.901722854978582e-06, "loss": 0.5272, "step": 1046 }, { "epoch": 0.881807973048849, "grad_norm": 0.26765474677085876, "learning_rate": 8.89865513360703e-06, "loss": 0.5118, "step": 1047 }, { "epoch": 0.8826501965188096, "grad_norm": 0.28882232308387756, "learning_rate": 8.89558366397714e-06, "loss": 0.4966, "step": 1048 }, { "epoch": 0.8834924199887704, "grad_norm": 0.28051137924194336, "learning_rate": 8.892508449041893e-06, "loss": 0.5163, "step": 1049 }, { "epoch": 0.884334643458731, "grad_norm": 0.2658673822879791, "learning_rate": 8.889429491757872e-06, "loss": 0.5199, "step": 1050 }, { "epoch": 0.8851768669286918, "grad_norm": 0.28430694341659546, "learning_rate": 8.88634679508526e-06, "loss": 0.5163, "step": 1051 }, { "epoch": 0.8860190903986525, "grad_norm": 0.28242236375808716, "learning_rate": 8.883260361987833e-06, "loss": 0.5017, "step": 1052 }, { "epoch": 0.8868613138686131, "grad_norm": 0.26047077775001526, "learning_rate": 8.88017019543296e-06, "loss": 0.5018, "step": 1053 }, { "epoch": 0.8877035373385739, "grad_norm": 0.2861049175262451, "learning_rate": 8.8770762983916e-06, "loss": 0.5633, "step": 1054 }, { "epoch": 0.8885457608085345, "grad_norm": 0.31295421719551086, "learning_rate": 8.8739786738383e-06, "loss": 0.5139, "step": 1055 }, { "epoch": 0.8893879842784952, "grad_norm": 0.2635347843170166, "learning_rate": 8.870877324751186e-06, "loss": 0.5494, "step": 1056 }, { "epoch": 0.890230207748456, "grad_norm": 0.2500781714916229, "learning_rate": 8.867772254111966e-06, "loss": 0.4869, "step": 1057 }, { "epoch": 0.8910724312184166, "grad_norm": 0.31911179423332214, "learning_rate": 8.864663464905933e-06, "loss": 0.5077, "step": 1058 }, { "epoch": 0.8919146546883773, "grad_norm": 0.3214774429798126, "learning_rate": 8.861550960121946e-06, "loss": 0.5149, "step": 1059 }, { "epoch": 0.892756878158338, "grad_norm": 0.26300933957099915, "learning_rate": 8.85843474275244e-06, "loss": 0.5397, "step": 1060 }, { "epoch": 0.8935991016282987, "grad_norm": 0.2968040704727173, "learning_rate": 8.85531481579342e-06, "loss": 0.527, "step": 1061 }, { "epoch": 0.8944413250982594, "grad_norm": 0.2760694921016693, "learning_rate": 8.852191182244456e-06, "loss": 0.5192, "step": 1062 }, { "epoch": 0.8952835485682201, "grad_norm": 0.27794432640075684, "learning_rate": 8.849063845108685e-06, "loss": 0.5005, "step": 1063 }, { "epoch": 0.8961257720381808, "grad_norm": 0.26003462076187134, "learning_rate": 8.8459328073928e-06, "loss": 0.4823, "step": 1064 }, { "epoch": 0.8969679955081415, "grad_norm": 0.26606038212776184, "learning_rate": 8.842798072107055e-06, "loss": 0.5076, "step": 1065 }, { "epoch": 0.8978102189781022, "grad_norm": 0.29581573605537415, "learning_rate": 8.839659642265259e-06, "loss": 0.5022, "step": 1066 }, { "epoch": 0.8986524424480629, "grad_norm": 0.27086734771728516, "learning_rate": 8.836517520884768e-06, "loss": 0.4826, "step": 1067 }, { "epoch": 0.8994946659180236, "grad_norm": 0.2601306736469269, "learning_rate": 8.833371710986493e-06, "loss": 0.4924, "step": 1068 }, { "epoch": 0.9003368893879843, "grad_norm": 0.3044421970844269, "learning_rate": 8.83022221559489e-06, "loss": 0.5223, "step": 1069 }, { "epoch": 0.9011791128579449, "grad_norm": 0.27083179354667664, "learning_rate": 8.827069037737958e-06, "loss": 0.5272, "step": 1070 }, { "epoch": 0.9020213363279057, "grad_norm": 0.2835432291030884, "learning_rate": 8.823912180447237e-06, "loss": 0.4868, "step": 1071 }, { "epoch": 0.9028635597978664, "grad_norm": 0.2831193208694458, "learning_rate": 8.820751646757798e-06, "loss": 0.5295, "step": 1072 }, { "epoch": 0.903705783267827, "grad_norm": 0.2650393545627594, "learning_rate": 8.81758743970826e-06, "loss": 0.5254, "step": 1073 }, { "epoch": 0.9045480067377878, "grad_norm": 0.32293012738227844, "learning_rate": 8.81441956234076e-06, "loss": 0.5601, "step": 1074 }, { "epoch": 0.9053902302077484, "grad_norm": 0.29047584533691406, "learning_rate": 8.81124801770097e-06, "loss": 0.5343, "step": 1075 }, { "epoch": 0.9062324536777091, "grad_norm": 0.29274147748947144, "learning_rate": 8.80807280883809e-06, "loss": 0.5173, "step": 1076 }, { "epoch": 0.9070746771476699, "grad_norm": 0.2970353662967682, "learning_rate": 8.804893938804839e-06, "loss": 0.505, "step": 1077 }, { "epoch": 0.9079169006176305, "grad_norm": 0.3026399612426758, "learning_rate": 8.801711410657456e-06, "loss": 0.5422, "step": 1078 }, { "epoch": 0.9087591240875912, "grad_norm": 0.29471299052238464, "learning_rate": 8.7985252274557e-06, "loss": 0.5149, "step": 1079 }, { "epoch": 0.909601347557552, "grad_norm": 0.29917067289352417, "learning_rate": 8.795335392262841e-06, "loss": 0.5147, "step": 1080 }, { "epoch": 0.9104435710275126, "grad_norm": 0.3287378251552582, "learning_rate": 8.79214190814566e-06, "loss": 0.505, "step": 1081 }, { "epoch": 0.9112857944974734, "grad_norm": 0.3185473382472992, "learning_rate": 8.78894477817445e-06, "loss": 0.4777, "step": 1082 }, { "epoch": 0.912128017967434, "grad_norm": 0.25620511174201965, "learning_rate": 8.785744005423003e-06, "loss": 0.5152, "step": 1083 }, { "epoch": 0.9129702414373947, "grad_norm": 0.3176286816596985, "learning_rate": 8.78253959296862e-06, "loss": 0.5043, "step": 1084 }, { "epoch": 0.9138124649073555, "grad_norm": 0.33481892943382263, "learning_rate": 8.779331543892097e-06, "loss": 0.5137, "step": 1085 }, { "epoch": 0.9146546883773161, "grad_norm": 0.271247535943985, "learning_rate": 8.77611986127773e-06, "loss": 0.5043, "step": 1086 }, { "epoch": 0.9154969118472768, "grad_norm": 0.2754053473472595, "learning_rate": 8.772904548213301e-06, "loss": 0.4911, "step": 1087 }, { "epoch": 0.9163391353172375, "grad_norm": 0.300803005695343, "learning_rate": 8.769685607790091e-06, "loss": 0.5342, "step": 1088 }, { "epoch": 0.9171813587871982, "grad_norm": 0.29227790236473083, "learning_rate": 8.766463043102864e-06, "loss": 0.5067, "step": 1089 }, { "epoch": 0.9180235822571589, "grad_norm": 0.2986677587032318, "learning_rate": 8.76323685724987e-06, "loss": 0.5091, "step": 1090 }, { "epoch": 0.9188658057271196, "grad_norm": 0.3242659568786621, "learning_rate": 8.760007053332837e-06, "loss": 0.5232, "step": 1091 }, { "epoch": 0.9197080291970803, "grad_norm": 0.2903384566307068, "learning_rate": 8.756773634456975e-06, "loss": 0.5368, "step": 1092 }, { "epoch": 0.9205502526670409, "grad_norm": 0.3146713376045227, "learning_rate": 8.75353660373097e-06, "loss": 0.5426, "step": 1093 }, { "epoch": 0.9213924761370017, "grad_norm": 0.32357197999954224, "learning_rate": 8.750295964266979e-06, "loss": 0.5273, "step": 1094 }, { "epoch": 0.9222346996069624, "grad_norm": 0.29547402262687683, "learning_rate": 8.747051719180626e-06, "loss": 0.5435, "step": 1095 }, { "epoch": 0.9230769230769231, "grad_norm": 0.2693501114845276, "learning_rate": 8.743803871591008e-06, "loss": 0.5241, "step": 1096 }, { "epoch": 0.9239191465468838, "grad_norm": 0.2909233272075653, "learning_rate": 8.740552424620679e-06, "loss": 0.5076, "step": 1097 }, { "epoch": 0.9247613700168444, "grad_norm": 0.2732381522655487, "learning_rate": 8.737297381395657e-06, "loss": 0.5054, "step": 1098 }, { "epoch": 0.9256035934868052, "grad_norm": 0.24633647501468658, "learning_rate": 8.734038745045419e-06, "loss": 0.5144, "step": 1099 }, { "epoch": 0.9264458169567659, "grad_norm": 0.2972082197666168, "learning_rate": 8.730776518702891e-06, "loss": 0.5131, "step": 1100 }, { "epoch": 0.9272880404267265, "grad_norm": 0.28994646668434143, "learning_rate": 8.727510705504453e-06, "loss": 0.5059, "step": 1101 }, { "epoch": 0.9281302638966873, "grad_norm": 0.2939974069595337, "learning_rate": 8.72424130858994e-06, "loss": 0.5495, "step": 1102 }, { "epoch": 0.928972487366648, "grad_norm": 0.2383164018392563, "learning_rate": 8.72096833110262e-06, "loss": 0.4777, "step": 1103 }, { "epoch": 0.9298147108366086, "grad_norm": 0.30057772994041443, "learning_rate": 8.717691776189214e-06, "loss": 0.5245, "step": 1104 }, { "epoch": 0.9306569343065694, "grad_norm": 0.2478383630514145, "learning_rate": 8.714411646999878e-06, "loss": 0.4849, "step": 1105 }, { "epoch": 0.93149915777653, "grad_norm": 0.2696175277233124, "learning_rate": 8.711127946688207e-06, "loss": 0.5375, "step": 1106 }, { "epoch": 0.9323413812464907, "grad_norm": 0.2821519672870636, "learning_rate": 8.707840678411223e-06, "loss": 0.506, "step": 1107 }, { "epoch": 0.9331836047164515, "grad_norm": 0.2729851305484772, "learning_rate": 8.704549845329386e-06, "loss": 0.5117, "step": 1108 }, { "epoch": 0.9340258281864121, "grad_norm": 0.24856886267662048, "learning_rate": 8.701255450606579e-06, "loss": 0.4901, "step": 1109 }, { "epoch": 0.9348680516563729, "grad_norm": 0.27755337953567505, "learning_rate": 8.69795749741011e-06, "loss": 0.5415, "step": 1110 }, { "epoch": 0.9357102751263335, "grad_norm": 0.30170515179634094, "learning_rate": 8.694655988910707e-06, "loss": 0.553, "step": 1111 }, { "epoch": 0.9365524985962942, "grad_norm": 0.2664754092693329, "learning_rate": 8.69135092828252e-06, "loss": 0.518, "step": 1112 }, { "epoch": 0.937394722066255, "grad_norm": 0.2706061899662018, "learning_rate": 8.688042318703111e-06, "loss": 0.5156, "step": 1113 }, { "epoch": 0.9382369455362156, "grad_norm": 0.28203722834587097, "learning_rate": 8.684730163353457e-06, "loss": 0.4933, "step": 1114 }, { "epoch": 0.9390791690061763, "grad_norm": 0.2821604013442993, "learning_rate": 8.681414465417936e-06, "loss": 0.5009, "step": 1115 }, { "epoch": 0.939921392476137, "grad_norm": 0.29476234316825867, "learning_rate": 8.678095228084343e-06, "loss": 0.5426, "step": 1116 }, { "epoch": 0.9407636159460977, "grad_norm": 0.2935395836830139, "learning_rate": 8.674772454543869e-06, "loss": 0.5369, "step": 1117 }, { "epoch": 0.9416058394160584, "grad_norm": 0.29218339920043945, "learning_rate": 8.671446147991103e-06, "loss": 0.4702, "step": 1118 }, { "epoch": 0.9424480628860191, "grad_norm": 0.2953758239746094, "learning_rate": 8.66811631162404e-06, "loss": 0.506, "step": 1119 }, { "epoch": 0.9432902863559798, "grad_norm": 0.3538503050804138, "learning_rate": 8.664782948644058e-06, "loss": 0.5307, "step": 1120 }, { "epoch": 0.9441325098259404, "grad_norm": 0.2945340871810913, "learning_rate": 8.661446062255931e-06, "loss": 0.5149, "step": 1121 }, { "epoch": 0.9449747332959012, "grad_norm": 0.25228723883628845, "learning_rate": 8.65810565566782e-06, "loss": 0.4975, "step": 1122 }, { "epoch": 0.9458169567658619, "grad_norm": 0.31329214572906494, "learning_rate": 8.654761732091271e-06, "loss": 0.5466, "step": 1123 }, { "epoch": 0.9466591802358225, "grad_norm": 0.3104366660118103, "learning_rate": 8.65141429474121e-06, "loss": 0.5027, "step": 1124 }, { "epoch": 0.9475014037057833, "grad_norm": 0.29008805751800537, "learning_rate": 8.648063346835943e-06, "loss": 0.5259, "step": 1125 }, { "epoch": 0.9483436271757439, "grad_norm": 0.2853054702281952, "learning_rate": 8.644708891597147e-06, "loss": 0.5127, "step": 1126 }, { "epoch": 0.9491858506457047, "grad_norm": 0.27270349860191345, "learning_rate": 8.641350932249876e-06, "loss": 0.5113, "step": 1127 }, { "epoch": 0.9500280741156654, "grad_norm": 0.26318827271461487, "learning_rate": 8.637989472022548e-06, "loss": 0.5161, "step": 1128 }, { "epoch": 0.950870297585626, "grad_norm": 0.28094732761383057, "learning_rate": 8.634624514146954e-06, "loss": 0.518, "step": 1129 }, { "epoch": 0.9517125210555868, "grad_norm": 0.26232588291168213, "learning_rate": 8.631256061858238e-06, "loss": 0.4804, "step": 1130 }, { "epoch": 0.9525547445255474, "grad_norm": 0.2685281038284302, "learning_rate": 8.627884118394913e-06, "loss": 0.5434, "step": 1131 }, { "epoch": 0.9533969679955081, "grad_norm": 0.25896337628364563, "learning_rate": 8.624508686998846e-06, "loss": 0.5122, "step": 1132 }, { "epoch": 0.9542391914654689, "grad_norm": 0.28447291254997253, "learning_rate": 8.621129770915248e-06, "loss": 0.4704, "step": 1133 }, { "epoch": 0.9550814149354295, "grad_norm": 0.2987595796585083, "learning_rate": 8.617747373392697e-06, "loss": 0.5003, "step": 1134 }, { "epoch": 0.9559236384053902, "grad_norm": 0.28660279512405396, "learning_rate": 8.614361497683102e-06, "loss": 0.4939, "step": 1135 }, { "epoch": 0.956765861875351, "grad_norm": 0.2903859615325928, "learning_rate": 8.61097214704173e-06, "loss": 0.5204, "step": 1136 }, { "epoch": 0.9576080853453116, "grad_norm": 0.270903080701828, "learning_rate": 8.607579324727175e-06, "loss": 0.5312, "step": 1137 }, { "epoch": 0.9584503088152723, "grad_norm": 0.2767120599746704, "learning_rate": 8.60418303400138e-06, "loss": 0.5185, "step": 1138 }, { "epoch": 0.959292532285233, "grad_norm": 0.2829473614692688, "learning_rate": 8.600783278129617e-06, "loss": 0.5365, "step": 1139 }, { "epoch": 0.9601347557551937, "grad_norm": 0.26663389801979065, "learning_rate": 8.597380060380493e-06, "loss": 0.5417, "step": 1140 }, { "epoch": 0.9609769792251545, "grad_norm": 0.2922547459602356, "learning_rate": 8.59397338402594e-06, "loss": 0.484, "step": 1141 }, { "epoch": 0.9618192026951151, "grad_norm": 0.2750844657421112, "learning_rate": 8.590563252341216e-06, "loss": 0.4827, "step": 1142 }, { "epoch": 0.9626614261650758, "grad_norm": 0.24595698714256287, "learning_rate": 8.5871496686049e-06, "loss": 0.4699, "step": 1143 }, { "epoch": 0.9635036496350365, "grad_norm": 0.2853884994983673, "learning_rate": 8.583732636098895e-06, "loss": 0.5303, "step": 1144 }, { "epoch": 0.9643458731049972, "grad_norm": 0.30569061636924744, "learning_rate": 8.580312158108413e-06, "loss": 0.4966, "step": 1145 }, { "epoch": 0.9651880965749579, "grad_norm": 0.27694955468177795, "learning_rate": 8.576888237921983e-06, "loss": 0.4816, "step": 1146 }, { "epoch": 0.9660303200449186, "grad_norm": 0.29900193214416504, "learning_rate": 8.57346087883144e-06, "loss": 0.4908, "step": 1147 }, { "epoch": 0.9668725435148793, "grad_norm": 0.2581435739994049, "learning_rate": 8.570030084131933e-06, "loss": 0.5136, "step": 1148 }, { "epoch": 0.9677147669848399, "grad_norm": 0.2943570911884308, "learning_rate": 8.566595857121902e-06, "loss": 0.5112, "step": 1149 }, { "epoch": 0.9685569904548007, "grad_norm": 0.32825493812561035, "learning_rate": 8.563158201103096e-06, "loss": 0.5126, "step": 1150 }, { "epoch": 0.9693992139247614, "grad_norm": 0.2827991247177124, "learning_rate": 8.559717119380558e-06, "loss": 0.5087, "step": 1151 }, { "epoch": 0.970241437394722, "grad_norm": 0.28734272718429565, "learning_rate": 8.556272615262623e-06, "loss": 0.4848, "step": 1152 }, { "epoch": 0.9710836608646828, "grad_norm": 0.27075448632240295, "learning_rate": 8.55282469206092e-06, "loss": 0.4815, "step": 1153 }, { "epoch": 0.9719258843346434, "grad_norm": 0.2601170837879181, "learning_rate": 8.549373353090362e-06, "loss": 0.4988, "step": 1154 }, { "epoch": 0.9727681078046042, "grad_norm": 0.26569998264312744, "learning_rate": 8.545918601669147e-06, "loss": 0.473, "step": 1155 }, { "epoch": 0.9736103312745649, "grad_norm": 0.2744729518890381, "learning_rate": 8.542460441118756e-06, "loss": 0.5221, "step": 1156 }, { "epoch": 0.9744525547445255, "grad_norm": 0.28309130668640137, "learning_rate": 8.538998874763942e-06, "loss": 0.5379, "step": 1157 }, { "epoch": 0.9752947782144863, "grad_norm": 0.25224748253822327, "learning_rate": 8.535533905932739e-06, "loss": 0.5154, "step": 1158 }, { "epoch": 0.976137001684447, "grad_norm": 0.2499162256717682, "learning_rate": 8.532065537956446e-06, "loss": 0.5066, "step": 1159 }, { "epoch": 0.9769792251544076, "grad_norm": 0.27321192622184753, "learning_rate": 8.528593774169637e-06, "loss": 0.5364, "step": 1160 }, { "epoch": 0.9778214486243684, "grad_norm": 0.2518339157104492, "learning_rate": 8.525118617910144e-06, "loss": 0.5112, "step": 1161 }, { "epoch": 0.978663672094329, "grad_norm": 0.26582229137420654, "learning_rate": 8.521640072519066e-06, "loss": 0.5027, "step": 1162 }, { "epoch": 0.9795058955642897, "grad_norm": 0.26972490549087524, "learning_rate": 8.518158141340755e-06, "loss": 0.5416, "step": 1163 }, { "epoch": 0.9803481190342505, "grad_norm": 0.2585470676422119, "learning_rate": 8.514672827722824e-06, "loss": 0.5008, "step": 1164 }, { "epoch": 0.9811903425042111, "grad_norm": 0.270262748003006, "learning_rate": 8.511184135016134e-06, "loss": 0.5098, "step": 1165 }, { "epoch": 0.9820325659741718, "grad_norm": 0.2741972506046295, "learning_rate": 8.507692066574795e-06, "loss": 0.507, "step": 1166 }, { "epoch": 0.9828747894441325, "grad_norm": 0.2607283592224121, "learning_rate": 8.504196625756166e-06, "loss": 0.5004, "step": 1167 }, { "epoch": 0.9837170129140932, "grad_norm": 0.28498339653015137, "learning_rate": 8.500697815920843e-06, "loss": 0.4991, "step": 1168 }, { "epoch": 0.9845592363840538, "grad_norm": 0.2764589786529541, "learning_rate": 8.497195640432664e-06, "loss": 0.4974, "step": 1169 }, { "epoch": 0.9854014598540146, "grad_norm": 0.2914160192012787, "learning_rate": 8.493690102658703e-06, "loss": 0.5333, "step": 1170 }, { "epoch": 0.9862436833239753, "grad_norm": 0.2813591957092285, "learning_rate": 8.490181205969268e-06, "loss": 0.5463, "step": 1171 }, { "epoch": 0.987085906793936, "grad_norm": 0.2869223654270172, "learning_rate": 8.486668953737891e-06, "loss": 0.5138, "step": 1172 }, { "epoch": 0.9879281302638967, "grad_norm": 0.27986517548561096, "learning_rate": 8.483153349341336e-06, "loss": 0.5279, "step": 1173 }, { "epoch": 0.9887703537338574, "grad_norm": 0.27165326476097107, "learning_rate": 8.479634396159587e-06, "loss": 0.5323, "step": 1174 }, { "epoch": 0.9896125772038181, "grad_norm": 0.2613833248615265, "learning_rate": 8.476112097575845e-06, "loss": 0.5013, "step": 1175 }, { "epoch": 0.9904548006737788, "grad_norm": 0.25128450989723206, "learning_rate": 8.472586456976534e-06, "loss": 0.4796, "step": 1176 }, { "epoch": 0.9912970241437394, "grad_norm": 0.2501789331436157, "learning_rate": 8.46905747775129e-06, "loss": 0.5044, "step": 1177 }, { "epoch": 0.9921392476137002, "grad_norm": 0.28970757126808167, "learning_rate": 8.465525163292948e-06, "loss": 0.519, "step": 1178 }, { "epoch": 0.9929814710836609, "grad_norm": 0.26291942596435547, "learning_rate": 8.461989516997565e-06, "loss": 0.5306, "step": 1179 }, { "epoch": 0.9938236945536215, "grad_norm": 0.26036500930786133, "learning_rate": 8.458450542264391e-06, "loss": 0.5227, "step": 1180 }, { "epoch": 0.9946659180235823, "grad_norm": 0.2818317711353302, "learning_rate": 8.45490824249588e-06, "loss": 0.5012, "step": 1181 }, { "epoch": 0.9955081414935429, "grad_norm": 0.28614890575408936, "learning_rate": 8.45136262109768e-06, "loss": 0.4722, "step": 1182 }, { "epoch": 0.9963503649635036, "grad_norm": 0.2663925290107727, "learning_rate": 8.447813681478638e-06, "loss": 0.5278, "step": 1183 }, { "epoch": 0.9971925884334644, "grad_norm": 0.2640809416770935, "learning_rate": 8.444261427050786e-06, "loss": 0.54, "step": 1184 }, { "epoch": 0.998034811903425, "grad_norm": 0.30570313334465027, "learning_rate": 8.440705861229344e-06, "loss": 0.4965, "step": 1185 }, { "epoch": 0.9988770353733858, "grad_norm": 0.2607339024543762, "learning_rate": 8.437146987432717e-06, "loss": 0.5218, "step": 1186 }, { "epoch": 0.9997192588433464, "grad_norm": 0.27122095227241516, "learning_rate": 8.43358480908249e-06, "loss": 0.4789, "step": 1187 }, { "epoch": 1.000561482313307, "grad_norm": 0.47950249910354614, "learning_rate": 8.430019329603423e-06, "loss": 0.8047, "step": 1188 }, { "epoch": 1.0014037057832679, "grad_norm": 0.26870396733283997, "learning_rate": 8.426450552423451e-06, "loss": 0.5017, "step": 1189 }, { "epoch": 1.0022459292532284, "grad_norm": 0.2743620276451111, "learning_rate": 8.422878480973681e-06, "loss": 0.4713, "step": 1190 }, { "epoch": 1.0030881527231892, "grad_norm": 0.300184428691864, "learning_rate": 8.41930311868839e-06, "loss": 0.4922, "step": 1191 }, { "epoch": 1.00393037619315, "grad_norm": 0.27534767985343933, "learning_rate": 8.41572446900501e-06, "loss": 0.5173, "step": 1192 }, { "epoch": 1.0047725996631107, "grad_norm": 0.24081802368164062, "learning_rate": 8.412142535364139e-06, "loss": 0.4157, "step": 1193 }, { "epoch": 1.0056148231330713, "grad_norm": 0.2478712499141693, "learning_rate": 8.408557321209534e-06, "loss": 0.5013, "step": 1194 }, { "epoch": 1.006457046603032, "grad_norm": 0.2628666162490845, "learning_rate": 8.404968829988102e-06, "loss": 0.5099, "step": 1195 }, { "epoch": 1.0072992700729928, "grad_norm": 0.2632077634334564, "learning_rate": 8.401377065149904e-06, "loss": 0.4802, "step": 1196 }, { "epoch": 1.0081414935429533, "grad_norm": 0.25894999504089355, "learning_rate": 8.397782030148147e-06, "loss": 0.4949, "step": 1197 }, { "epoch": 1.0089837170129141, "grad_norm": 0.25566548109054565, "learning_rate": 8.39418372843918e-06, "loss": 0.4522, "step": 1198 }, { "epoch": 1.0098259404828749, "grad_norm": 0.25323349237442017, "learning_rate": 8.390582163482497e-06, "loss": 0.4773, "step": 1199 }, { "epoch": 1.0106681639528354, "grad_norm": 0.25757476687431335, "learning_rate": 8.386977338740724e-06, "loss": 0.4772, "step": 1200 }, { "epoch": 1.0115103874227962, "grad_norm": 0.32139402627944946, "learning_rate": 8.383369257679625e-06, "loss": 0.5389, "step": 1201 }, { "epoch": 1.012352610892757, "grad_norm": 0.2652696371078491, "learning_rate": 8.379757923768094e-06, "loss": 0.4517, "step": 1202 }, { "epoch": 1.0131948343627175, "grad_norm": 0.3158402144908905, "learning_rate": 8.376143340478153e-06, "loss": 0.5113, "step": 1203 }, { "epoch": 1.0140370578326783, "grad_norm": 0.28151413798332214, "learning_rate": 8.372525511284945e-06, "loss": 0.5013, "step": 1204 }, { "epoch": 1.014879281302639, "grad_norm": 0.27820125222206116, "learning_rate": 8.368904439666739e-06, "loss": 0.4868, "step": 1205 }, { "epoch": 1.0157215047725996, "grad_norm": 0.27826550602912903, "learning_rate": 8.365280129104912e-06, "loss": 0.4707, "step": 1206 }, { "epoch": 1.0165637282425604, "grad_norm": 0.252223402261734, "learning_rate": 8.361652583083968e-06, "loss": 0.4467, "step": 1207 }, { "epoch": 1.0174059517125211, "grad_norm": 0.2796793282032013, "learning_rate": 8.358021805091509e-06, "loss": 0.4905, "step": 1208 }, { "epoch": 1.0182481751824817, "grad_norm": 0.25153306126594543, "learning_rate": 8.354387798618254e-06, "loss": 0.4663, "step": 1209 }, { "epoch": 1.0190903986524424, "grad_norm": 0.24486416578292847, "learning_rate": 8.35075056715802e-06, "loss": 0.4661, "step": 1210 }, { "epoch": 1.0199326221224032, "grad_norm": 0.3036920428276062, "learning_rate": 8.347110114207727e-06, "loss": 0.5347, "step": 1211 }, { "epoch": 1.0207748455923638, "grad_norm": 0.25432732701301575, "learning_rate": 8.34346644326739e-06, "loss": 0.4563, "step": 1212 }, { "epoch": 1.0216170690623245, "grad_norm": 0.29234981536865234, "learning_rate": 8.339819557840124e-06, "loss": 0.5386, "step": 1213 }, { "epoch": 1.0224592925322853, "grad_norm": 0.2503797113895416, "learning_rate": 8.336169461432125e-06, "loss": 0.4867, "step": 1214 }, { "epoch": 1.0233015160022458, "grad_norm": 0.28468289971351624, "learning_rate": 8.332516157552684e-06, "loss": 0.5389, "step": 1215 }, { "epoch": 1.0241437394722066, "grad_norm": 0.24577568471431732, "learning_rate": 8.328859649714171e-06, "loss": 0.4524, "step": 1216 }, { "epoch": 1.0249859629421674, "grad_norm": 0.2555333971977234, "learning_rate": 8.32519994143204e-06, "loss": 0.5351, "step": 1217 }, { "epoch": 1.025828186412128, "grad_norm": 0.2426941692829132, "learning_rate": 8.321537036224822e-06, "loss": 0.4456, "step": 1218 }, { "epoch": 1.0266704098820887, "grad_norm": 0.27127930521965027, "learning_rate": 8.317870937614115e-06, "loss": 0.5061, "step": 1219 }, { "epoch": 1.0275126333520495, "grad_norm": 0.27275264263153076, "learning_rate": 8.314201649124595e-06, "loss": 0.4866, "step": 1220 }, { "epoch": 1.02835485682201, "grad_norm": 0.2618161737918854, "learning_rate": 8.310529174284004e-06, "loss": 0.4614, "step": 1221 }, { "epoch": 1.0291970802919708, "grad_norm": 0.2504390478134155, "learning_rate": 8.30685351662314e-06, "loss": 0.4799, "step": 1222 }, { "epoch": 1.0300393037619315, "grad_norm": 0.31729549169540405, "learning_rate": 8.30317467967587e-06, "loss": 0.503, "step": 1223 }, { "epoch": 1.0308815272318923, "grad_norm": 0.28788065910339355, "learning_rate": 8.299492666979114e-06, "loss": 0.5527, "step": 1224 }, { "epoch": 1.0317237507018528, "grad_norm": 0.2555885314941406, "learning_rate": 8.295807482072842e-06, "loss": 0.4851, "step": 1225 }, { "epoch": 1.0325659741718136, "grad_norm": 0.2923148274421692, "learning_rate": 8.292119128500082e-06, "loss": 0.5009, "step": 1226 }, { "epoch": 1.0334081976417744, "grad_norm": 0.2758176624774933, "learning_rate": 8.288427609806899e-06, "loss": 0.4931, "step": 1227 }, { "epoch": 1.034250421111735, "grad_norm": 0.2749158442020416, "learning_rate": 8.28473292954241e-06, "loss": 0.5178, "step": 1228 }, { "epoch": 1.0350926445816957, "grad_norm": 0.2915363907814026, "learning_rate": 8.281035091258762e-06, "loss": 0.5054, "step": 1229 }, { "epoch": 1.0359348680516565, "grad_norm": 0.27395522594451904, "learning_rate": 8.277334098511147e-06, "loss": 0.5131, "step": 1230 }, { "epoch": 1.036777091521617, "grad_norm": 0.23627036809921265, "learning_rate": 8.273629954857784e-06, "loss": 0.4665, "step": 1231 }, { "epoch": 1.0376193149915778, "grad_norm": 0.2609037458896637, "learning_rate": 8.269922663859926e-06, "loss": 0.4675, "step": 1232 }, { "epoch": 1.0384615384615385, "grad_norm": 0.2762940227985382, "learning_rate": 8.266212229081846e-06, "loss": 0.5493, "step": 1233 }, { "epoch": 1.039303761931499, "grad_norm": 0.24586625397205353, "learning_rate": 8.262498654090846e-06, "loss": 0.4804, "step": 1234 }, { "epoch": 1.0401459854014599, "grad_norm": 0.28218767046928406, "learning_rate": 8.258781942457244e-06, "loss": 0.4968, "step": 1235 }, { "epoch": 1.0409882088714206, "grad_norm": 0.2385130524635315, "learning_rate": 8.255062097754371e-06, "loss": 0.4472, "step": 1236 }, { "epoch": 1.0418304323413812, "grad_norm": 0.2639881670475006, "learning_rate": 8.251339123558573e-06, "loss": 0.5103, "step": 1237 }, { "epoch": 1.042672655811342, "grad_norm": 0.250960111618042, "learning_rate": 8.247613023449209e-06, "loss": 0.4621, "step": 1238 }, { "epoch": 1.0435148792813027, "grad_norm": 0.2943269908428192, "learning_rate": 8.243883801008632e-06, "loss": 0.5558, "step": 1239 }, { "epoch": 1.0443571027512633, "grad_norm": 0.26202598214149475, "learning_rate": 8.240151459822207e-06, "loss": 0.4532, "step": 1240 }, { "epoch": 1.045199326221224, "grad_norm": 0.2692597806453705, "learning_rate": 8.236416003478295e-06, "loss": 0.5084, "step": 1241 }, { "epoch": 1.0460415496911848, "grad_norm": 0.2976280748844147, "learning_rate": 8.232677435568252e-06, "loss": 0.5051, "step": 1242 }, { "epoch": 1.0468837731611453, "grad_norm": 0.2510835528373718, "learning_rate": 8.228935759686424e-06, "loss": 0.4277, "step": 1243 }, { "epoch": 1.047725996631106, "grad_norm": 0.2668900191783905, "learning_rate": 8.225190979430145e-06, "loss": 0.523, "step": 1244 }, { "epoch": 1.0485682201010669, "grad_norm": 0.2782909870147705, "learning_rate": 8.221443098399733e-06, "loss": 0.4986, "step": 1245 }, { "epoch": 1.0494104435710274, "grad_norm": 0.3013979494571686, "learning_rate": 8.217692120198492e-06, "loss": 0.5056, "step": 1246 }, { "epoch": 1.0502526670409882, "grad_norm": 0.25285816192626953, "learning_rate": 8.213938048432697e-06, "loss": 0.4929, "step": 1247 }, { "epoch": 1.051094890510949, "grad_norm": 0.26069334149360657, "learning_rate": 8.210180886711603e-06, "loss": 0.5199, "step": 1248 }, { "epoch": 1.0519371139809095, "grad_norm": 0.2803276777267456, "learning_rate": 8.206420638647433e-06, "loss": 0.49, "step": 1249 }, { "epoch": 1.0527793374508703, "grad_norm": 0.25456011295318604, "learning_rate": 8.202657307855376e-06, "loss": 0.4446, "step": 1250 }, { "epoch": 1.053621560920831, "grad_norm": 0.2769217789173126, "learning_rate": 8.198890897953586e-06, "loss": 0.4954, "step": 1251 }, { "epoch": 1.0544637843907916, "grad_norm": 0.2901635468006134, "learning_rate": 8.19512141256318e-06, "loss": 0.5324, "step": 1252 }, { "epoch": 1.0553060078607523, "grad_norm": 0.2660444378852844, "learning_rate": 8.191348855308229e-06, "loss": 0.4822, "step": 1253 }, { "epoch": 1.0561482313307131, "grad_norm": 0.2616792917251587, "learning_rate": 8.187573229815757e-06, "loss": 0.4571, "step": 1254 }, { "epoch": 1.0569904548006739, "grad_norm": 0.29134103655815125, "learning_rate": 8.18379453971574e-06, "loss": 0.5372, "step": 1255 }, { "epoch": 1.0578326782706344, "grad_norm": 0.23620973527431488, "learning_rate": 8.180012788641097e-06, "loss": 0.4675, "step": 1256 }, { "epoch": 1.0586749017405952, "grad_norm": 0.2700193226337433, "learning_rate": 8.176227980227693e-06, "loss": 0.4971, "step": 1257 }, { "epoch": 1.059517125210556, "grad_norm": 0.276531457901001, "learning_rate": 8.172440118114332e-06, "loss": 0.533, "step": 1258 }, { "epoch": 1.0603593486805165, "grad_norm": 0.23521380126476288, "learning_rate": 8.168649205942753e-06, "loss": 0.4635, "step": 1259 }, { "epoch": 1.0612015721504773, "grad_norm": 0.28151533007621765, "learning_rate": 8.164855247357628e-06, "loss": 0.4954, "step": 1260 }, { "epoch": 1.062043795620438, "grad_norm": 0.2417442351579666, "learning_rate": 8.161058246006558e-06, "loss": 0.4907, "step": 1261 }, { "epoch": 1.0628860190903986, "grad_norm": 0.25382816791534424, "learning_rate": 8.157258205540069e-06, "loss": 0.4751, "step": 1262 }, { "epoch": 1.0637282425603594, "grad_norm": 0.2967683970928192, "learning_rate": 8.153455129611605e-06, "loss": 0.4905, "step": 1263 }, { "epoch": 1.0645704660303201, "grad_norm": 0.27463826537132263, "learning_rate": 8.14964902187754e-06, "loss": 0.4988, "step": 1264 }, { "epoch": 1.0654126895002807, "grad_norm": 0.238150492310524, "learning_rate": 8.145839885997146e-06, "loss": 0.4457, "step": 1265 }, { "epoch": 1.0662549129702414, "grad_norm": 0.31667405366897583, "learning_rate": 8.142027725632622e-06, "loss": 0.4826, "step": 1266 }, { "epoch": 1.0670971364402022, "grad_norm": 0.28296101093292236, "learning_rate": 8.138212544449067e-06, "loss": 0.5035, "step": 1267 }, { "epoch": 1.0679393599101628, "grad_norm": 0.23936676979064941, "learning_rate": 8.134394346114486e-06, "loss": 0.4927, "step": 1268 }, { "epoch": 1.0687815833801235, "grad_norm": 0.30065807700157166, "learning_rate": 8.130573134299782e-06, "loss": 0.4904, "step": 1269 }, { "epoch": 1.0696238068500843, "grad_norm": 0.31204476952552795, "learning_rate": 8.126748912678757e-06, "loss": 0.5269, "step": 1270 }, { "epoch": 1.0704660303200448, "grad_norm": 0.24706806242465973, "learning_rate": 8.122921684928111e-06, "loss": 0.4341, "step": 1271 }, { "epoch": 1.0713082537900056, "grad_norm": 0.25574401021003723, "learning_rate": 8.119091454727427e-06, "loss": 0.4902, "step": 1272 }, { "epoch": 1.0721504772599664, "grad_norm": 0.29619911313056946, "learning_rate": 8.11525822575918e-06, "loss": 0.5057, "step": 1273 }, { "epoch": 1.072992700729927, "grad_norm": 0.27129700779914856, "learning_rate": 8.111422001708725e-06, "loss": 0.4689, "step": 1274 }, { "epoch": 1.0738349241998877, "grad_norm": 0.2735674977302551, "learning_rate": 8.107582786264299e-06, "loss": 0.4884, "step": 1275 }, { "epoch": 1.0746771476698485, "grad_norm": 0.2975695729255676, "learning_rate": 8.10374058311701e-06, "loss": 0.5118, "step": 1276 }, { "epoch": 1.075519371139809, "grad_norm": 0.26207512617111206, "learning_rate": 8.099895395960847e-06, "loss": 0.4894, "step": 1277 }, { "epoch": 1.0763615946097698, "grad_norm": 0.2750672996044159, "learning_rate": 8.09604722849266e-06, "loss": 0.4872, "step": 1278 }, { "epoch": 1.0772038180797305, "grad_norm": 0.2707706391811371, "learning_rate": 8.092196084412167e-06, "loss": 0.5104, "step": 1279 }, { "epoch": 1.078046041549691, "grad_norm": 0.2825748026371002, "learning_rate": 8.08834196742195e-06, "loss": 0.4857, "step": 1280 }, { "epoch": 1.0788882650196518, "grad_norm": 0.254472941160202, "learning_rate": 8.084484881227449e-06, "loss": 0.4635, "step": 1281 }, { "epoch": 1.0797304884896126, "grad_norm": 0.2876235842704773, "learning_rate": 8.080624829536949e-06, "loss": 0.4942, "step": 1282 }, { "epoch": 1.0805727119595732, "grad_norm": 0.2555425465106964, "learning_rate": 8.076761816061603e-06, "loss": 0.4509, "step": 1283 }, { "epoch": 1.081414935429534, "grad_norm": 0.24022382497787476, "learning_rate": 8.072895844515398e-06, "loss": 0.5009, "step": 1284 }, { "epoch": 1.0822571588994947, "grad_norm": 0.289009153842926, "learning_rate": 8.069026918615173e-06, "loss": 0.4856, "step": 1285 }, { "epoch": 1.0830993823694555, "grad_norm": 0.28754937648773193, "learning_rate": 8.065155042080599e-06, "loss": 0.4882, "step": 1286 }, { "epoch": 1.083941605839416, "grad_norm": 0.2890631854534149, "learning_rate": 8.061280218634192e-06, "loss": 0.4728, "step": 1287 }, { "epoch": 1.0847838293093768, "grad_norm": 0.31688225269317627, "learning_rate": 8.057402452001298e-06, "loss": 0.5264, "step": 1288 }, { "epoch": 1.0856260527793375, "grad_norm": 0.3393896222114563, "learning_rate": 8.05352174591009e-06, "loss": 0.5071, "step": 1289 }, { "epoch": 1.086468276249298, "grad_norm": 0.26153868436813354, "learning_rate": 8.049638104091575e-06, "loss": 0.4849, "step": 1290 }, { "epoch": 1.0873104997192589, "grad_norm": 0.31423893570899963, "learning_rate": 8.04575153027957e-06, "loss": 0.5265, "step": 1291 }, { "epoch": 1.0881527231892196, "grad_norm": 0.27897655963897705, "learning_rate": 8.041862028210725e-06, "loss": 0.4539, "step": 1292 }, { "epoch": 1.0889949466591802, "grad_norm": 0.30081385374069214, "learning_rate": 8.037969601624495e-06, "loss": 0.4978, "step": 1293 }, { "epoch": 1.089837170129141, "grad_norm": 0.24179252982139587, "learning_rate": 8.034074254263152e-06, "loss": 0.4533, "step": 1294 }, { "epoch": 1.0906793935991017, "grad_norm": 0.3100072145462036, "learning_rate": 8.030175989871769e-06, "loss": 0.4955, "step": 1295 }, { "epoch": 1.0915216170690623, "grad_norm": 0.28106310963630676, "learning_rate": 8.026274812198235e-06, "loss": 0.4811, "step": 1296 }, { "epoch": 1.092363840539023, "grad_norm": 0.25135448575019836, "learning_rate": 8.022370724993229e-06, "loss": 0.4784, "step": 1297 }, { "epoch": 1.0932060640089838, "grad_norm": 0.2874135971069336, "learning_rate": 8.018463732010235e-06, "loss": 0.5541, "step": 1298 }, { "epoch": 1.0940482874789443, "grad_norm": 0.2468324452638626, "learning_rate": 8.014553837005527e-06, "loss": 0.4164, "step": 1299 }, { "epoch": 1.094890510948905, "grad_norm": 0.25179803371429443, "learning_rate": 8.010641043738167e-06, "loss": 0.5017, "step": 1300 }, { "epoch": 1.0957327344188659, "grad_norm": 0.2762503921985626, "learning_rate": 8.006725355970008e-06, "loss": 0.5267, "step": 1301 }, { "epoch": 1.0965749578888264, "grad_norm": 0.26368752121925354, "learning_rate": 8.002806777465685e-06, "loss": 0.4733, "step": 1302 }, { "epoch": 1.0974171813587872, "grad_norm": 0.24741004407405853, "learning_rate": 7.99888531199261e-06, "loss": 0.4479, "step": 1303 }, { "epoch": 1.098259404828748, "grad_norm": 0.2590891420841217, "learning_rate": 7.99496096332097e-06, "loss": 0.4891, "step": 1304 }, { "epoch": 1.0991016282987085, "grad_norm": 0.25630730390548706, "learning_rate": 7.99103373522373e-06, "loss": 0.4975, "step": 1305 }, { "epoch": 1.0999438517686693, "grad_norm": 0.24819126725196838, "learning_rate": 7.987103631476615e-06, "loss": 0.4628, "step": 1306 }, { "epoch": 1.10078607523863, "grad_norm": 0.2987162470817566, "learning_rate": 7.98317065585812e-06, "loss": 0.5164, "step": 1307 }, { "epoch": 1.1016282987085906, "grad_norm": 0.25178202986717224, "learning_rate": 7.9792348121495e-06, "loss": 0.4699, "step": 1308 }, { "epoch": 1.1024705221785513, "grad_norm": 0.30883368849754333, "learning_rate": 7.975296104134768e-06, "loss": 0.5572, "step": 1309 }, { "epoch": 1.1033127456485121, "grad_norm": 0.28644341230392456, "learning_rate": 7.97135453560069e-06, "loss": 0.501, "step": 1310 }, { "epoch": 1.1041549691184729, "grad_norm": 0.2594358026981354, "learning_rate": 7.967410110336782e-06, "loss": 0.448, "step": 1311 }, { "epoch": 1.1049971925884334, "grad_norm": 0.29231691360473633, "learning_rate": 7.963462832135307e-06, "loss": 0.5424, "step": 1312 }, { "epoch": 1.1058394160583942, "grad_norm": 0.24354399740695953, "learning_rate": 7.959512704791269e-06, "loss": 0.4656, "step": 1313 }, { "epoch": 1.1066816395283547, "grad_norm": 0.29599636793136597, "learning_rate": 7.955559732102414e-06, "loss": 0.5515, "step": 1314 }, { "epoch": 1.1075238629983155, "grad_norm": 0.2673192024230957, "learning_rate": 7.951603917869223e-06, "loss": 0.4725, "step": 1315 }, { "epoch": 1.1083660864682763, "grad_norm": 0.25360578298568726, "learning_rate": 7.94764526589491e-06, "loss": 0.4691, "step": 1316 }, { "epoch": 1.109208309938237, "grad_norm": 0.24341201782226562, "learning_rate": 7.943683779985412e-06, "loss": 0.4525, "step": 1317 }, { "epoch": 1.1100505334081976, "grad_norm": 0.2569681406021118, "learning_rate": 7.939719463949398e-06, "loss": 0.4601, "step": 1318 }, { "epoch": 1.1108927568781584, "grad_norm": 0.26451829075813293, "learning_rate": 7.93575232159825e-06, "loss": 0.4765, "step": 1319 }, { "epoch": 1.1117349803481191, "grad_norm": 0.26079991459846497, "learning_rate": 7.931782356746076e-06, "loss": 0.4824, "step": 1320 }, { "epoch": 1.1125772038180797, "grad_norm": 0.25029054284095764, "learning_rate": 7.927809573209691e-06, "loss": 0.4789, "step": 1321 }, { "epoch": 1.1134194272880404, "grad_norm": 0.2562805414199829, "learning_rate": 7.923833974808622e-06, "loss": 0.4486, "step": 1322 }, { "epoch": 1.1142616507580012, "grad_norm": 0.2927616834640503, "learning_rate": 7.919855565365102e-06, "loss": 0.5295, "step": 1323 }, { "epoch": 1.1151038742279618, "grad_norm": 0.24658368527889252, "learning_rate": 7.91587434870407e-06, "loss": 0.4997, "step": 1324 }, { "epoch": 1.1159460976979225, "grad_norm": 0.23201754689216614, "learning_rate": 7.911890328653156e-06, "loss": 0.4655, "step": 1325 }, { "epoch": 1.1167883211678833, "grad_norm": 0.3032664358615875, "learning_rate": 7.907903509042696e-06, "loss": 0.5122, "step": 1326 }, { "epoch": 1.1176305446378438, "grad_norm": 0.2591528296470642, "learning_rate": 7.903913893705706e-06, "loss": 0.46, "step": 1327 }, { "epoch": 1.1184727681078046, "grad_norm": 0.2667114734649658, "learning_rate": 7.899921486477899e-06, "loss": 0.5109, "step": 1328 }, { "epoch": 1.1193149915777654, "grad_norm": 0.27189770340919495, "learning_rate": 7.895926291197667e-06, "loss": 0.5083, "step": 1329 }, { "epoch": 1.120157215047726, "grad_norm": 0.28602808713912964, "learning_rate": 7.891928311706088e-06, "loss": 0.4656, "step": 1330 }, { "epoch": 1.1209994385176867, "grad_norm": 0.29398611187934875, "learning_rate": 7.887927551846908e-06, "loss": 0.4854, "step": 1331 }, { "epoch": 1.1218416619876475, "grad_norm": 0.2511121928691864, "learning_rate": 7.883924015466554e-06, "loss": 0.4646, "step": 1332 }, { "epoch": 1.122683885457608, "grad_norm": 0.2989715039730072, "learning_rate": 7.87991770641412e-06, "loss": 0.5375, "step": 1333 }, { "epoch": 1.1235261089275688, "grad_norm": 0.27495017647743225, "learning_rate": 7.875908628541363e-06, "loss": 0.5227, "step": 1334 }, { "epoch": 1.1243683323975295, "grad_norm": 0.27533674240112305, "learning_rate": 7.871896785702707e-06, "loss": 0.4946, "step": 1335 }, { "epoch": 1.12521055586749, "grad_norm": 0.2675479054450989, "learning_rate": 7.86788218175523e-06, "loss": 0.4653, "step": 1336 }, { "epoch": 1.1260527793374508, "grad_norm": 0.24898561835289001, "learning_rate": 7.863864820558669e-06, "loss": 0.446, "step": 1337 }, { "epoch": 1.1268950028074116, "grad_norm": 0.27794691920280457, "learning_rate": 7.859844705975405e-06, "loss": 0.5404, "step": 1338 }, { "epoch": 1.1277372262773722, "grad_norm": 0.2477172464132309, "learning_rate": 7.855821841870472e-06, "loss": 0.4833, "step": 1339 }, { "epoch": 1.128579449747333, "grad_norm": 0.25270986557006836, "learning_rate": 7.851796232111546e-06, "loss": 0.5078, "step": 1340 }, { "epoch": 1.1294216732172937, "grad_norm": 0.2690238952636719, "learning_rate": 7.847767880568944e-06, "loss": 0.4877, "step": 1341 }, { "epoch": 1.1302638966872545, "grad_norm": 0.2752285599708557, "learning_rate": 7.843736791115614e-06, "loss": 0.4832, "step": 1342 }, { "epoch": 1.131106120157215, "grad_norm": 0.28053146600723267, "learning_rate": 7.839702967627145e-06, "loss": 0.4991, "step": 1343 }, { "epoch": 1.1319483436271758, "grad_norm": 0.28123781085014343, "learning_rate": 7.835666413981744e-06, "loss": 0.4474, "step": 1344 }, { "epoch": 1.1327905670971363, "grad_norm": 0.2601757347583771, "learning_rate": 7.831627134060249e-06, "loss": 0.4786, "step": 1345 }, { "epoch": 1.133632790567097, "grad_norm": 0.27673688530921936, "learning_rate": 7.827585131746122e-06, "loss": 0.5253, "step": 1346 }, { "epoch": 1.1344750140370579, "grad_norm": 0.26718631386756897, "learning_rate": 7.823540410925434e-06, "loss": 0.5166, "step": 1347 }, { "epoch": 1.1353172375070186, "grad_norm": 0.2908550798892975, "learning_rate": 7.81949297548688e-06, "loss": 0.4903, "step": 1348 }, { "epoch": 1.1361594609769792, "grad_norm": 0.2730693221092224, "learning_rate": 7.815442829321754e-06, "loss": 0.4767, "step": 1349 }, { "epoch": 1.13700168444694, "grad_norm": 0.277336061000824, "learning_rate": 7.811389976323963e-06, "loss": 0.5487, "step": 1350 }, { "epoch": 1.1378439079169007, "grad_norm": 0.2639406621456146, "learning_rate": 7.807334420390014e-06, "loss": 0.4855, "step": 1351 }, { "epoch": 1.1386861313868613, "grad_norm": 0.25777140259742737, "learning_rate": 7.803276165419015e-06, "loss": 0.4923, "step": 1352 }, { "epoch": 1.139528354856822, "grad_norm": 0.2718185484409332, "learning_rate": 7.799215215312667e-06, "loss": 0.5232, "step": 1353 }, { "epoch": 1.1403705783267828, "grad_norm": 0.25214725732803345, "learning_rate": 7.795151573975262e-06, "loss": 0.4615, "step": 1354 }, { "epoch": 1.1412128017967433, "grad_norm": 0.24531804025173187, "learning_rate": 7.79108524531368e-06, "loss": 0.4591, "step": 1355 }, { "epoch": 1.142055025266704, "grad_norm": 0.3230135440826416, "learning_rate": 7.787016233237387e-06, "loss": 0.4623, "step": 1356 }, { "epoch": 1.1428972487366649, "grad_norm": 0.2862343490123749, "learning_rate": 7.782944541658423e-06, "loss": 0.4735, "step": 1357 }, { "epoch": 1.1437394722066254, "grad_norm": 0.2878127992153168, "learning_rate": 7.778870174491408e-06, "loss": 0.4874, "step": 1358 }, { "epoch": 1.1445816956765862, "grad_norm": 0.32911059260368347, "learning_rate": 7.774793135653537e-06, "loss": 0.4836, "step": 1359 }, { "epoch": 1.145423919146547, "grad_norm": 0.3135271668434143, "learning_rate": 7.770713429064567e-06, "loss": 0.4638, "step": 1360 }, { "epoch": 1.1462661426165075, "grad_norm": 0.25909462571144104, "learning_rate": 7.766631058646826e-06, "loss": 0.4951, "step": 1361 }, { "epoch": 1.1471083660864683, "grad_norm": 0.3350861668586731, "learning_rate": 7.7625460283252e-06, "loss": 0.5194, "step": 1362 }, { "epoch": 1.147950589556429, "grad_norm": 0.31338566541671753, "learning_rate": 7.75845834202713e-06, "loss": 0.4906, "step": 1363 }, { "epoch": 1.1487928130263896, "grad_norm": 0.263044536113739, "learning_rate": 7.754368003682617e-06, "loss": 0.4168, "step": 1364 }, { "epoch": 1.1496350364963503, "grad_norm": 0.2804284989833832, "learning_rate": 7.750275017224208e-06, "loss": 0.5218, "step": 1365 }, { "epoch": 1.1504772599663111, "grad_norm": 0.27582812309265137, "learning_rate": 7.746179386586994e-06, "loss": 0.4664, "step": 1366 }, { "epoch": 1.1513194834362717, "grad_norm": 0.30131247639656067, "learning_rate": 7.74208111570861e-06, "loss": 0.53, "step": 1367 }, { "epoch": 1.1521617069062324, "grad_norm": 0.26888802647590637, "learning_rate": 7.737980208529232e-06, "loss": 0.5018, "step": 1368 }, { "epoch": 1.1530039303761932, "grad_norm": 0.2710627317428589, "learning_rate": 7.733876668991565e-06, "loss": 0.4839, "step": 1369 }, { "epoch": 1.1538461538461537, "grad_norm": 0.2608696222305298, "learning_rate": 7.72977050104085e-06, "loss": 0.4829, "step": 1370 }, { "epoch": 1.1546883773161145, "grad_norm": 0.29602470993995667, "learning_rate": 7.725661708624855e-06, "loss": 0.4827, "step": 1371 }, { "epoch": 1.1555306007860753, "grad_norm": 0.26592186093330383, "learning_rate": 7.721550295693865e-06, "loss": 0.5199, "step": 1372 }, { "epoch": 1.156372824256036, "grad_norm": 0.27896401286125183, "learning_rate": 7.71743626620069e-06, "loss": 0.5085, "step": 1373 }, { "epoch": 1.1572150477259966, "grad_norm": 0.24746102094650269, "learning_rate": 7.713319624100657e-06, "loss": 0.4336, "step": 1374 }, { "epoch": 1.1580572711959574, "grad_norm": 0.27517345547676086, "learning_rate": 7.7092003733516e-06, "loss": 0.4556, "step": 1375 }, { "epoch": 1.158899494665918, "grad_norm": 0.29119405150413513, "learning_rate": 7.705078517913862e-06, "loss": 0.53, "step": 1376 }, { "epoch": 1.1597417181358787, "grad_norm": 0.24503324925899506, "learning_rate": 7.700954061750295e-06, "loss": 0.5022, "step": 1377 }, { "epoch": 1.1605839416058394, "grad_norm": 0.30053943395614624, "learning_rate": 7.696827008826242e-06, "loss": 0.5159, "step": 1378 }, { "epoch": 1.1614261650758002, "grad_norm": 0.3215782940387726, "learning_rate": 7.692697363109553e-06, "loss": 0.5272, "step": 1379 }, { "epoch": 1.1622683885457608, "grad_norm": 0.2552134096622467, "learning_rate": 7.688565128570564e-06, "loss": 0.4709, "step": 1380 }, { "epoch": 1.1631106120157215, "grad_norm": 0.24121862649917603, "learning_rate": 7.684430309182106e-06, "loss": 0.4626, "step": 1381 }, { "epoch": 1.1639528354856823, "grad_norm": 0.3254956007003784, "learning_rate": 7.680292908919485e-06, "loss": 0.5295, "step": 1382 }, { "epoch": 1.1647950589556428, "grad_norm": 0.2930539846420288, "learning_rate": 7.676152931760496e-06, "loss": 0.4717, "step": 1383 }, { "epoch": 1.1656372824256036, "grad_norm": 0.2765038311481476, "learning_rate": 7.672010381685416e-06, "loss": 0.5047, "step": 1384 }, { "epoch": 1.1664795058955644, "grad_norm": 0.28217676281929016, "learning_rate": 7.667865262676981e-06, "loss": 0.4561, "step": 1385 }, { "epoch": 1.167321729365525, "grad_norm": 0.3046446740627289, "learning_rate": 7.663717578720412e-06, "loss": 0.5008, "step": 1386 }, { "epoch": 1.1681639528354857, "grad_norm": 0.2403436154127121, "learning_rate": 7.659567333803386e-06, "loss": 0.4378, "step": 1387 }, { "epoch": 1.1690061763054465, "grad_norm": 0.28352391719818115, "learning_rate": 7.655414531916048e-06, "loss": 0.4846, "step": 1388 }, { "epoch": 1.169848399775407, "grad_norm": 0.24462343752384186, "learning_rate": 7.651259177050996e-06, "loss": 0.4424, "step": 1389 }, { "epoch": 1.1706906232453678, "grad_norm": 0.26550188660621643, "learning_rate": 7.647101273203289e-06, "loss": 0.4701, "step": 1390 }, { "epoch": 1.1715328467153285, "grad_norm": 0.24608036875724792, "learning_rate": 7.642940824370429e-06, "loss": 0.4513, "step": 1391 }, { "epoch": 1.172375070185289, "grad_norm": 0.27792564034461975, "learning_rate": 7.638777834552372e-06, "loss": 0.5189, "step": 1392 }, { "epoch": 1.1732172936552498, "grad_norm": 0.23271256685256958, "learning_rate": 7.634612307751513e-06, "loss": 0.444, "step": 1393 }, { "epoch": 1.1740595171252106, "grad_norm": 0.26528099179267883, "learning_rate": 7.630444247972688e-06, "loss": 0.5122, "step": 1394 }, { "epoch": 1.1749017405951712, "grad_norm": 0.23890940845012665, "learning_rate": 7.626273659223166e-06, "loss": 0.49, "step": 1395 }, { "epoch": 1.175743964065132, "grad_norm": 0.273708701133728, "learning_rate": 7.622100545512648e-06, "loss": 0.5174, "step": 1396 }, { "epoch": 1.1765861875350927, "grad_norm": 0.26235008239746094, "learning_rate": 7.617924910853266e-06, "loss": 0.5026, "step": 1397 }, { "epoch": 1.1774284110050532, "grad_norm": 0.2598300576210022, "learning_rate": 7.61374675925957e-06, "loss": 0.4914, "step": 1398 }, { "epoch": 1.178270634475014, "grad_norm": 0.24822638928890228, "learning_rate": 7.609566094748535e-06, "loss": 0.4849, "step": 1399 }, { "epoch": 1.1791128579449748, "grad_norm": 0.2925652861595154, "learning_rate": 7.605382921339548e-06, "loss": 0.4837, "step": 1400 }, { "epoch": 1.1799550814149353, "grad_norm": 0.28387361764907837, "learning_rate": 7.601197243054411e-06, "loss": 0.5265, "step": 1401 }, { "epoch": 1.180797304884896, "grad_norm": 0.23857703804969788, "learning_rate": 7.597009063917333e-06, "loss": 0.4419, "step": 1402 }, { "epoch": 1.1816395283548569, "grad_norm": 0.2641632854938507, "learning_rate": 7.5928183879549274e-06, "loss": 0.5194, "step": 1403 }, { "epoch": 1.1824817518248176, "grad_norm": 0.2593880891799927, "learning_rate": 7.588625219196208e-06, "loss": 0.5278, "step": 1404 }, { "epoch": 1.1833239752947782, "grad_norm": 0.25237536430358887, "learning_rate": 7.584429561672586e-06, "loss": 0.49, "step": 1405 }, { "epoch": 1.184166198764739, "grad_norm": 0.26864004135131836, "learning_rate": 7.580231419417863e-06, "loss": 0.5146, "step": 1406 }, { "epoch": 1.1850084222346995, "grad_norm": 0.2453378289937973, "learning_rate": 7.576030796468233e-06, "loss": 0.489, "step": 1407 }, { "epoch": 1.1858506457046603, "grad_norm": 0.2403554469347, "learning_rate": 7.571827696862274e-06, "loss": 0.4446, "step": 1408 }, { "epoch": 1.186692869174621, "grad_norm": 0.2665308117866516, "learning_rate": 7.567622124640942e-06, "loss": 0.5017, "step": 1409 }, { "epoch": 1.1875350926445818, "grad_norm": 0.292610764503479, "learning_rate": 7.563414083847573e-06, "loss": 0.5636, "step": 1410 }, { "epoch": 1.1883773161145423, "grad_norm": 0.23078353703022003, "learning_rate": 7.55920357852788e-06, "loss": 0.4517, "step": 1411 }, { "epoch": 1.189219539584503, "grad_norm": 0.254619300365448, "learning_rate": 7.554990612729936e-06, "loss": 0.4936, "step": 1412 }, { "epoch": 1.1900617630544639, "grad_norm": 0.24065718054771423, "learning_rate": 7.5507751905041885e-06, "loss": 0.4801, "step": 1413 }, { "epoch": 1.1909039865244244, "grad_norm": 0.2689630389213562, "learning_rate": 7.5465573159034396e-06, "loss": 0.4976, "step": 1414 }, { "epoch": 1.1917462099943852, "grad_norm": 0.26624974608421326, "learning_rate": 7.542336992982857e-06, "loss": 0.478, "step": 1415 }, { "epoch": 1.192588433464346, "grad_norm": 0.26114994287490845, "learning_rate": 7.538114225799955e-06, "loss": 0.5051, "step": 1416 }, { "epoch": 1.1934306569343065, "grad_norm": 0.2481708973646164, "learning_rate": 7.533889018414602e-06, "loss": 0.5253, "step": 1417 }, { "epoch": 1.1942728804042673, "grad_norm": 0.23741896450519562, "learning_rate": 7.529661374889011e-06, "loss": 0.4292, "step": 1418 }, { "epoch": 1.195115103874228, "grad_norm": 0.26251858472824097, "learning_rate": 7.525431299287737e-06, "loss": 0.4882, "step": 1419 }, { "epoch": 1.1959573273441886, "grad_norm": 0.2471136450767517, "learning_rate": 7.5211987956776755e-06, "loss": 0.4861, "step": 1420 }, { "epoch": 1.1967995508141493, "grad_norm": 0.2624058723449707, "learning_rate": 7.516963868128054e-06, "loss": 0.5184, "step": 1421 }, { "epoch": 1.1976417742841101, "grad_norm": 0.2781965136528015, "learning_rate": 7.512726520710429e-06, "loss": 0.4809, "step": 1422 }, { "epoch": 1.1984839977540707, "grad_norm": 0.2547696530818939, "learning_rate": 7.508486757498687e-06, "loss": 0.4623, "step": 1423 }, { "epoch": 1.1993262212240314, "grad_norm": 0.2191547006368637, "learning_rate": 7.5042445825690344e-06, "loss": 0.4452, "step": 1424 }, { "epoch": 1.2001684446939922, "grad_norm": 0.2602192461490631, "learning_rate": 7.500000000000001e-06, "loss": 0.5027, "step": 1425 }, { "epoch": 1.2010106681639527, "grad_norm": 0.25940102338790894, "learning_rate": 7.4957530138724245e-06, "loss": 0.4634, "step": 1426 }, { "epoch": 1.2018528916339135, "grad_norm": 0.26419907808303833, "learning_rate": 7.491503628269458e-06, "loss": 0.4989, "step": 1427 }, { "epoch": 1.2026951151038743, "grad_norm": 0.25661998987197876, "learning_rate": 7.4872518472765594e-06, "loss": 0.5453, "step": 1428 }, { "epoch": 1.203537338573835, "grad_norm": 0.2525199353694916, "learning_rate": 7.4829976749814935e-06, "loss": 0.4094, "step": 1429 }, { "epoch": 1.2043795620437956, "grad_norm": 0.24944117665290833, "learning_rate": 7.4787411154743175e-06, "loss": 0.4524, "step": 1430 }, { "epoch": 1.2052217855137564, "grad_norm": 0.28944623470306396, "learning_rate": 7.474482172847391e-06, "loss": 0.5057, "step": 1431 }, { "epoch": 1.206064008983717, "grad_norm": 0.259177565574646, "learning_rate": 7.470220851195356e-06, "loss": 0.4895, "step": 1432 }, { "epoch": 1.2069062324536777, "grad_norm": 0.2620944678783417, "learning_rate": 7.46595715461515e-06, "loss": 0.4936, "step": 1433 }, { "epoch": 1.2077484559236384, "grad_norm": 0.29046958684921265, "learning_rate": 7.461691087205993e-06, "loss": 0.4617, "step": 1434 }, { "epoch": 1.2085906793935992, "grad_norm": 0.27004650235176086, "learning_rate": 7.457422653069379e-06, "loss": 0.5049, "step": 1435 }, { "epoch": 1.2094329028635598, "grad_norm": 0.26012033224105835, "learning_rate": 7.45315185630908e-06, "loss": 0.4735, "step": 1436 }, { "epoch": 1.2102751263335205, "grad_norm": 0.24434496462345123, "learning_rate": 7.4488787010311425e-06, "loss": 0.476, "step": 1437 }, { "epoch": 1.211117349803481, "grad_norm": 0.24302519857883453, "learning_rate": 7.444603191343878e-06, "loss": 0.4511, "step": 1438 }, { "epoch": 1.2119595732734418, "grad_norm": 0.2689550220966339, "learning_rate": 7.440325331357858e-06, "loss": 0.4601, "step": 1439 }, { "epoch": 1.2128017967434026, "grad_norm": 0.2641448974609375, "learning_rate": 7.436045125185923e-06, "loss": 0.5071, "step": 1440 }, { "epoch": 1.2136440202133634, "grad_norm": 0.2527853548526764, "learning_rate": 7.431762576943157e-06, "loss": 0.4681, "step": 1441 }, { "epoch": 1.214486243683324, "grad_norm": 0.2483433485031128, "learning_rate": 7.427477690746906e-06, "loss": 0.4734, "step": 1442 }, { "epoch": 1.2153284671532847, "grad_norm": 0.26769644021987915, "learning_rate": 7.423190470716761e-06, "loss": 0.4574, "step": 1443 }, { "epoch": 1.2161706906232455, "grad_norm": 0.3024654686450958, "learning_rate": 7.418900920974552e-06, "loss": 0.5011, "step": 1444 }, { "epoch": 1.217012914093206, "grad_norm": 0.25458261370658875, "learning_rate": 7.414609045644356e-06, "loss": 0.5197, "step": 1445 }, { "epoch": 1.2178551375631668, "grad_norm": 0.23563776910305023, "learning_rate": 7.4103148488524824e-06, "loss": 0.4731, "step": 1446 }, { "epoch": 1.2186973610331275, "grad_norm": 0.2838168442249298, "learning_rate": 7.40601833472747e-06, "loss": 0.5087, "step": 1447 }, { "epoch": 1.219539584503088, "grad_norm": 0.27839285135269165, "learning_rate": 7.401719507400088e-06, "loss": 0.4486, "step": 1448 }, { "epoch": 1.2203818079730488, "grad_norm": 0.2659938633441925, "learning_rate": 7.3974183710033334e-06, "loss": 0.4864, "step": 1449 }, { "epoch": 1.2212240314430096, "grad_norm": 0.2917943298816681, "learning_rate": 7.393114929672414e-06, "loss": 0.5335, "step": 1450 }, { "epoch": 1.2220662549129702, "grad_norm": 0.252844899892807, "learning_rate": 7.388809187544764e-06, "loss": 0.4921, "step": 1451 }, { "epoch": 1.222908478382931, "grad_norm": 0.25147438049316406, "learning_rate": 7.384501148760024e-06, "loss": 0.4782, "step": 1452 }, { "epoch": 1.2237507018528917, "grad_norm": 0.24088972806930542, "learning_rate": 7.38019081746004e-06, "loss": 0.4678, "step": 1453 }, { "epoch": 1.2245929253228522, "grad_norm": 0.24127790331840515, "learning_rate": 7.3758781977888684e-06, "loss": 0.4776, "step": 1454 }, { "epoch": 1.225435148792813, "grad_norm": 0.268416166305542, "learning_rate": 7.371563293892761e-06, "loss": 0.4535, "step": 1455 }, { "epoch": 1.2262773722627738, "grad_norm": 0.27311310172080994, "learning_rate": 7.367246109920171e-06, "loss": 0.4457, "step": 1456 }, { "epoch": 1.2271195957327343, "grad_norm": 0.27863165736198425, "learning_rate": 7.362926650021736e-06, "loss": 0.5168, "step": 1457 }, { "epoch": 1.227961819202695, "grad_norm": 0.31702518463134766, "learning_rate": 7.3586049183502875e-06, "loss": 0.5229, "step": 1458 }, { "epoch": 1.2288040426726559, "grad_norm": 0.2683735489845276, "learning_rate": 7.354280919060839e-06, "loss": 0.4796, "step": 1459 }, { "epoch": 1.2296462661426166, "grad_norm": 0.24436862766742706, "learning_rate": 7.349954656310585e-06, "loss": 0.4326, "step": 1460 }, { "epoch": 1.2304884896125772, "grad_norm": 0.3138126730918884, "learning_rate": 7.345626134258897e-06, "loss": 0.55, "step": 1461 }, { "epoch": 1.231330713082538, "grad_norm": 0.2964186668395996, "learning_rate": 7.341295357067315e-06, "loss": 0.5471, "step": 1462 }, { "epoch": 1.2321729365524985, "grad_norm": 0.26007014513015747, "learning_rate": 7.336962328899553e-06, "loss": 0.4143, "step": 1463 }, { "epoch": 1.2330151600224593, "grad_norm": 0.31831294298171997, "learning_rate": 7.3326270539214826e-06, "loss": 0.5184, "step": 1464 }, { "epoch": 1.23385738349242, "grad_norm": 0.26267844438552856, "learning_rate": 7.3282895363011405e-06, "loss": 0.4617, "step": 1465 }, { "epoch": 1.2346996069623808, "grad_norm": 0.25274890661239624, "learning_rate": 7.323949780208717e-06, "loss": 0.479, "step": 1466 }, { "epoch": 1.2355418304323413, "grad_norm": 0.2982969582080841, "learning_rate": 7.319607789816555e-06, "loss": 0.5175, "step": 1467 }, { "epoch": 1.236384053902302, "grad_norm": 0.2912946939468384, "learning_rate": 7.315263569299147e-06, "loss": 0.4769, "step": 1468 }, { "epoch": 1.2372262773722627, "grad_norm": 0.2732820510864258, "learning_rate": 7.310917122833127e-06, "loss": 0.5592, "step": 1469 }, { "epoch": 1.2380685008422234, "grad_norm": 0.24414239823818207, "learning_rate": 7.306568454597269e-06, "loss": 0.4807, "step": 1470 }, { "epoch": 1.2389107243121842, "grad_norm": 0.2770892381668091, "learning_rate": 7.302217568772488e-06, "loss": 0.4667, "step": 1471 }, { "epoch": 1.239752947782145, "grad_norm": 0.3075725734233856, "learning_rate": 7.297864469541826e-06, "loss": 0.4847, "step": 1472 }, { "epoch": 1.2405951712521055, "grad_norm": 0.2653881311416626, "learning_rate": 7.293509161090453e-06, "loss": 0.5021, "step": 1473 }, { "epoch": 1.2414373947220663, "grad_norm": 0.25346940755844116, "learning_rate": 7.289151647605668e-06, "loss": 0.4376, "step": 1474 }, { "epoch": 1.242279618192027, "grad_norm": 0.2759556770324707, "learning_rate": 7.284791933276883e-06, "loss": 0.4879, "step": 1475 }, { "epoch": 1.2431218416619876, "grad_norm": 0.24478305876255035, "learning_rate": 7.28043002229563e-06, "loss": 0.478, "step": 1476 }, { "epoch": 1.2439640651319483, "grad_norm": 0.24486036598682404, "learning_rate": 7.276065918855554e-06, "loss": 0.4926, "step": 1477 }, { "epoch": 1.2448062886019091, "grad_norm": 0.2637145221233368, "learning_rate": 7.271699627152406e-06, "loss": 0.4574, "step": 1478 }, { "epoch": 1.2456485120718697, "grad_norm": 0.23206742107868195, "learning_rate": 7.2673311513840395e-06, "loss": 0.4793, "step": 1479 }, { "epoch": 1.2464907355418304, "grad_norm": 0.27162712812423706, "learning_rate": 7.26296049575041e-06, "loss": 0.518, "step": 1480 }, { "epoch": 1.2473329590117912, "grad_norm": 0.26764142513275146, "learning_rate": 7.2585876644535705e-06, "loss": 0.5239, "step": 1481 }, { "epoch": 1.2481751824817517, "grad_norm": 0.25708284974098206, "learning_rate": 7.2542126616976596e-06, "loss": 0.4489, "step": 1482 }, { "epoch": 1.2490174059517125, "grad_norm": 0.2414480596780777, "learning_rate": 7.24983549168891e-06, "loss": 0.4656, "step": 1483 }, { "epoch": 1.2498596294216733, "grad_norm": 0.2746238708496094, "learning_rate": 7.2454561586356355e-06, "loss": 0.4644, "step": 1484 }, { "epoch": 1.250701852891634, "grad_norm": 0.2658059298992157, "learning_rate": 7.241074666748228e-06, "loss": 0.4653, "step": 1485 }, { "epoch": 1.2515440763615946, "grad_norm": 0.25551167130470276, "learning_rate": 7.236691020239157e-06, "loss": 0.4862, "step": 1486 }, { "epoch": 1.2523862998315554, "grad_norm": 0.2554570734500885, "learning_rate": 7.232305223322963e-06, "loss": 0.4967, "step": 1487 }, { "epoch": 1.253228523301516, "grad_norm": 0.23625071346759796, "learning_rate": 7.227917280216254e-06, "loss": 0.4404, "step": 1488 }, { "epoch": 1.2540707467714767, "grad_norm": 0.27637526392936707, "learning_rate": 7.2235271951377005e-06, "loss": 0.5411, "step": 1489 }, { "epoch": 1.2549129702414374, "grad_norm": 0.24953800439834595, "learning_rate": 7.219134972308035e-06, "loss": 0.469, "step": 1490 }, { "epoch": 1.2557551937113982, "grad_norm": 0.2471066564321518, "learning_rate": 7.214740615950041e-06, "loss": 0.4824, "step": 1491 }, { "epoch": 1.2565974171813588, "grad_norm": 0.2605708837509155, "learning_rate": 7.210344130288558e-06, "loss": 0.4531, "step": 1492 }, { "epoch": 1.2574396406513195, "grad_norm": 0.24090851843357086, "learning_rate": 7.205945519550467e-06, "loss": 0.5071, "step": 1493 }, { "epoch": 1.25828186412128, "grad_norm": 0.24479755759239197, "learning_rate": 7.201544787964698e-06, "loss": 0.4946, "step": 1494 }, { "epoch": 1.2591240875912408, "grad_norm": 0.2507603168487549, "learning_rate": 7.197141939762217e-06, "loss": 0.4276, "step": 1495 }, { "epoch": 1.2599663110612016, "grad_norm": 0.27449530363082886, "learning_rate": 7.192736979176025e-06, "loss": 0.5225, "step": 1496 }, { "epoch": 1.2608085345311624, "grad_norm": 0.28063130378723145, "learning_rate": 7.188329910441154e-06, "loss": 0.4802, "step": 1497 }, { "epoch": 1.261650758001123, "grad_norm": 0.23446723818778992, "learning_rate": 7.183920737794663e-06, "loss": 0.4641, "step": 1498 }, { "epoch": 1.2624929814710837, "grad_norm": 0.2912273108959198, "learning_rate": 7.179509465475636e-06, "loss": 0.4767, "step": 1499 }, { "epoch": 1.2633352049410442, "grad_norm": 0.26745331287384033, "learning_rate": 7.175096097725169e-06, "loss": 0.5064, "step": 1500 }, { "epoch": 1.264177428411005, "grad_norm": 0.2523433566093445, "learning_rate": 7.170680638786383e-06, "loss": 0.4694, "step": 1501 }, { "epoch": 1.2650196518809658, "grad_norm": 0.25101587176322937, "learning_rate": 7.166263092904399e-06, "loss": 0.4645, "step": 1502 }, { "epoch": 1.2658618753509265, "grad_norm": 0.2552841007709503, "learning_rate": 7.161843464326349e-06, "loss": 0.4929, "step": 1503 }, { "epoch": 1.266704098820887, "grad_norm": 0.24196158349514008, "learning_rate": 7.157421757301371e-06, "loss": 0.4767, "step": 1504 }, { "epoch": 1.2675463222908478, "grad_norm": 0.24240978062152863, "learning_rate": 7.1529979760805946e-06, "loss": 0.4786, "step": 1505 }, { "epoch": 1.2683885457608086, "grad_norm": 0.2581647038459778, "learning_rate": 7.148572124917148e-06, "loss": 0.4734, "step": 1506 }, { "epoch": 1.2692307692307692, "grad_norm": 0.22856487333774567, "learning_rate": 7.144144208066148e-06, "loss": 0.4596, "step": 1507 }, { "epoch": 1.27007299270073, "grad_norm": 0.24044837057590485, "learning_rate": 7.1397142297846975e-06, "loss": 0.4802, "step": 1508 }, { "epoch": 1.2709152161706907, "grad_norm": 0.2671823799610138, "learning_rate": 7.135282194331881e-06, "loss": 0.5166, "step": 1509 }, { "epoch": 1.2717574396406512, "grad_norm": 0.2416498064994812, "learning_rate": 7.130848105968762e-06, "loss": 0.4783, "step": 1510 }, { "epoch": 1.272599663110612, "grad_norm": 0.23545511066913605, "learning_rate": 7.126411968958374e-06, "loss": 0.4737, "step": 1511 }, { "epoch": 1.2734418865805728, "grad_norm": 0.264239102602005, "learning_rate": 7.121973787565727e-06, "loss": 0.5182, "step": 1512 }, { "epoch": 1.2742841100505333, "grad_norm": 0.2653980255126953, "learning_rate": 7.1175335660577906e-06, "loss": 0.4999, "step": 1513 }, { "epoch": 1.275126333520494, "grad_norm": 0.24832101166248322, "learning_rate": 7.113091308703498e-06, "loss": 0.4976, "step": 1514 }, { "epoch": 1.2759685569904549, "grad_norm": 0.24185889959335327, "learning_rate": 7.1086470197737405e-06, "loss": 0.4803, "step": 1515 }, { "epoch": 1.2768107804604156, "grad_norm": 0.2695640027523041, "learning_rate": 7.104200703541358e-06, "loss": 0.4884, "step": 1516 }, { "epoch": 1.2776530039303762, "grad_norm": 0.2379835993051529, "learning_rate": 7.099752364281147e-06, "loss": 0.4848, "step": 1517 }, { "epoch": 1.278495227400337, "grad_norm": 0.2668949365615845, "learning_rate": 7.095302006269842e-06, "loss": 0.46, "step": 1518 }, { "epoch": 1.2793374508702975, "grad_norm": 0.2570989429950714, "learning_rate": 7.090849633786125e-06, "loss": 0.4952, "step": 1519 }, { "epoch": 1.2801796743402583, "grad_norm": 0.23110096156597137, "learning_rate": 7.0863952511106075e-06, "loss": 0.5093, "step": 1520 }, { "epoch": 1.281021897810219, "grad_norm": 0.2304736226797104, "learning_rate": 7.0819388625258385e-06, "loss": 0.4183, "step": 1521 }, { "epoch": 1.2818641212801798, "grad_norm": 0.30342116951942444, "learning_rate": 7.077480472316296e-06, "loss": 0.5584, "step": 1522 }, { "epoch": 1.2827063447501403, "grad_norm": 0.24792760610580444, "learning_rate": 7.0730200847683795e-06, "loss": 0.4417, "step": 1523 }, { "epoch": 1.283548568220101, "grad_norm": 0.26771119236946106, "learning_rate": 7.06855770417041e-06, "loss": 0.5246, "step": 1524 }, { "epoch": 1.2843907916900617, "grad_norm": 0.26909416913986206, "learning_rate": 7.0640933348126235e-06, "loss": 0.4847, "step": 1525 }, { "epoch": 1.2852330151600224, "grad_norm": 0.25534069538116455, "learning_rate": 7.059626980987172e-06, "loss": 0.4558, "step": 1526 }, { "epoch": 1.2860752386299832, "grad_norm": 0.26232805848121643, "learning_rate": 7.05515864698811e-06, "loss": 0.5147, "step": 1527 }, { "epoch": 1.286917462099944, "grad_norm": 0.26543840765953064, "learning_rate": 7.0506883371114e-06, "loss": 0.4785, "step": 1528 }, { "epoch": 1.2877596855699045, "grad_norm": 0.27721336483955383, "learning_rate": 7.046216055654902e-06, "loss": 0.4822, "step": 1529 }, { "epoch": 1.2886019090398653, "grad_norm": 0.24770015478134155, "learning_rate": 7.041741806918372e-06, "loss": 0.5099, "step": 1530 }, { "epoch": 1.2894441325098258, "grad_norm": 0.21358808875083923, "learning_rate": 7.0372655952034575e-06, "loss": 0.4324, "step": 1531 }, { "epoch": 1.2902863559797866, "grad_norm": 0.2809373736381531, "learning_rate": 7.032787424813694e-06, "loss": 0.5155, "step": 1532 }, { "epoch": 1.2911285794497473, "grad_norm": 0.25470876693725586, "learning_rate": 7.028307300054499e-06, "loss": 0.4924, "step": 1533 }, { "epoch": 1.2919708029197081, "grad_norm": 0.2645720839500427, "learning_rate": 7.023825225233169e-06, "loss": 0.4642, "step": 1534 }, { "epoch": 1.2928130263896687, "grad_norm": 0.2849072217941284, "learning_rate": 7.019341204658876e-06, "loss": 0.5155, "step": 1535 }, { "epoch": 1.2936552498596294, "grad_norm": 0.25961869955062866, "learning_rate": 7.014855242642662e-06, "loss": 0.5062, "step": 1536 }, { "epoch": 1.2944974733295902, "grad_norm": 0.24877537786960602, "learning_rate": 7.0103673434974375e-06, "loss": 0.4573, "step": 1537 }, { "epoch": 1.2953396967995507, "grad_norm": 0.2846996486186981, "learning_rate": 7.0058775115379705e-06, "loss": 0.509, "step": 1538 }, { "epoch": 1.2961819202695115, "grad_norm": 0.230912446975708, "learning_rate": 7.0013857510808934e-06, "loss": 0.4558, "step": 1539 }, { "epoch": 1.2970241437394723, "grad_norm": 0.24324123561382294, "learning_rate": 6.99689206644469e-06, "loss": 0.4309, "step": 1540 }, { "epoch": 1.2978663672094328, "grad_norm": 0.2616897523403168, "learning_rate": 6.992396461949693e-06, "loss": 0.5108, "step": 1541 }, { "epoch": 1.2987085906793936, "grad_norm": 0.2500344514846802, "learning_rate": 6.987898941918082e-06, "loss": 0.4988, "step": 1542 }, { "epoch": 1.2995508141493544, "grad_norm": 0.24906115233898163, "learning_rate": 6.9833995106738774e-06, "loss": 0.4642, "step": 1543 }, { "epoch": 1.300393037619315, "grad_norm": 0.2536349594593048, "learning_rate": 6.978898172542939e-06, "loss": 0.4905, "step": 1544 }, { "epoch": 1.3012352610892757, "grad_norm": 0.24975118041038513, "learning_rate": 6.974394931852957e-06, "loss": 0.4454, "step": 1545 }, { "epoch": 1.3020774845592364, "grad_norm": 0.272616446018219, "learning_rate": 6.969889792933454e-06, "loss": 0.4985, "step": 1546 }, { "epoch": 1.3029197080291972, "grad_norm": 0.23043309152126312, "learning_rate": 6.965382760115775e-06, "loss": 0.4458, "step": 1547 }, { "epoch": 1.3037619314991578, "grad_norm": 0.2563420236110687, "learning_rate": 6.960873837733089e-06, "loss": 0.5246, "step": 1548 }, { "epoch": 1.3046041549691185, "grad_norm": 0.2467152327299118, "learning_rate": 6.956363030120377e-06, "loss": 0.5024, "step": 1549 }, { "epoch": 1.305446378439079, "grad_norm": 0.25033146142959595, "learning_rate": 6.951850341614436e-06, "loss": 0.4767, "step": 1550 }, { "epoch": 1.3062886019090398, "grad_norm": 0.27767738699913025, "learning_rate": 6.94733577655387e-06, "loss": 0.4853, "step": 1551 }, { "epoch": 1.3071308253790006, "grad_norm": 0.24779146909713745, "learning_rate": 6.942819339279089e-06, "loss": 0.4714, "step": 1552 }, { "epoch": 1.3079730488489614, "grad_norm": 0.23222114145755768, "learning_rate": 6.9383010341323e-06, "loss": 0.4663, "step": 1553 }, { "epoch": 1.308815272318922, "grad_norm": 0.25075116753578186, "learning_rate": 6.933780865457508e-06, "loss": 0.4428, "step": 1554 }, { "epoch": 1.3096574957888827, "grad_norm": 0.28354814648628235, "learning_rate": 6.9292588376005095e-06, "loss": 0.5351, "step": 1555 }, { "epoch": 1.3104997192588432, "grad_norm": 0.26149824261665344, "learning_rate": 6.924734954908887e-06, "loss": 0.4373, "step": 1556 }, { "epoch": 1.311341942728804, "grad_norm": 0.3042897880077362, "learning_rate": 6.920209221732007e-06, "loss": 0.5248, "step": 1557 }, { "epoch": 1.3121841661987648, "grad_norm": 0.2226887047290802, "learning_rate": 6.9156816424210175e-06, "loss": 0.4166, "step": 1558 }, { "epoch": 1.3130263896687255, "grad_norm": 0.255575031042099, "learning_rate": 6.911152221328837e-06, "loss": 0.476, "step": 1559 }, { "epoch": 1.313868613138686, "grad_norm": 0.2860569953918457, "learning_rate": 6.90662096281016e-06, "loss": 0.4938, "step": 1560 }, { "epoch": 1.3147108366086468, "grad_norm": 0.28478285670280457, "learning_rate": 6.902087871221439e-06, "loss": 0.4703, "step": 1561 }, { "epoch": 1.3155530600786074, "grad_norm": 0.24288587272167206, "learning_rate": 6.897552950920898e-06, "loss": 0.4811, "step": 1562 }, { "epoch": 1.3163952835485682, "grad_norm": 0.25944191217422485, "learning_rate": 6.893016206268518e-06, "loss": 0.4929, "step": 1563 }, { "epoch": 1.317237507018529, "grad_norm": 0.2520546317100525, "learning_rate": 6.888477641626027e-06, "loss": 0.4382, "step": 1564 }, { "epoch": 1.3180797304884897, "grad_norm": 0.2706393599510193, "learning_rate": 6.88393726135691e-06, "loss": 0.4997, "step": 1565 }, { "epoch": 1.3189219539584502, "grad_norm": 0.2881643772125244, "learning_rate": 6.879395069826394e-06, "loss": 0.4984, "step": 1566 }, { "epoch": 1.319764177428411, "grad_norm": 0.25058284401893616, "learning_rate": 6.874851071401448e-06, "loss": 0.4647, "step": 1567 }, { "epoch": 1.3206064008983718, "grad_norm": 0.25014370679855347, "learning_rate": 6.870305270450779e-06, "loss": 0.4847, "step": 1568 }, { "epoch": 1.3214486243683323, "grad_norm": 0.2702711224555969, "learning_rate": 6.865757671344827e-06, "loss": 0.49, "step": 1569 }, { "epoch": 1.322290847838293, "grad_norm": 0.27079862356185913, "learning_rate": 6.861208278455759e-06, "loss": 0.5498, "step": 1570 }, { "epoch": 1.3231330713082539, "grad_norm": 0.25469863414764404, "learning_rate": 6.856657096157469e-06, "loss": 0.4207, "step": 1571 }, { "epoch": 1.3239752947782144, "grad_norm": 0.25752612948417664, "learning_rate": 6.85210412882557e-06, "loss": 0.506, "step": 1572 }, { "epoch": 1.3248175182481752, "grad_norm": 0.2658270597457886, "learning_rate": 6.8475493808373895e-06, "loss": 0.5043, "step": 1573 }, { "epoch": 1.325659741718136, "grad_norm": 0.26716962456703186, "learning_rate": 6.8429928565719724e-06, "loss": 0.5098, "step": 1574 }, { "epoch": 1.3265019651880965, "grad_norm": 0.26751160621643066, "learning_rate": 6.838434560410064e-06, "loss": 0.4793, "step": 1575 }, { "epoch": 1.3273441886580573, "grad_norm": 0.29317373037338257, "learning_rate": 6.833874496734122e-06, "loss": 0.5027, "step": 1576 }, { "epoch": 1.328186412128018, "grad_norm": 0.2632593512535095, "learning_rate": 6.829312669928293e-06, "loss": 0.472, "step": 1577 }, { "epoch": 1.3290286355979788, "grad_norm": 0.24566617608070374, "learning_rate": 6.824749084378428e-06, "loss": 0.4601, "step": 1578 }, { "epoch": 1.3298708590679393, "grad_norm": 0.2553458511829376, "learning_rate": 6.820183744472062e-06, "loss": 0.4951, "step": 1579 }, { "epoch": 1.3307130825379, "grad_norm": 0.2624034583568573, "learning_rate": 6.81561665459842e-06, "loss": 0.5034, "step": 1580 }, { "epoch": 1.3315553060078607, "grad_norm": 0.26907604932785034, "learning_rate": 6.811047819148413e-06, "loss": 0.4966, "step": 1581 }, { "epoch": 1.3323975294778214, "grad_norm": 0.26033350825309753, "learning_rate": 6.806477242514623e-06, "loss": 0.4211, "step": 1582 }, { "epoch": 1.3332397529477822, "grad_norm": 0.27940523624420166, "learning_rate": 6.801904929091311e-06, "loss": 0.5105, "step": 1583 }, { "epoch": 1.334081976417743, "grad_norm": 0.2805331349372864, "learning_rate": 6.7973308832744035e-06, "loss": 0.534, "step": 1584 }, { "epoch": 1.3349241998877035, "grad_norm": 0.2629338800907135, "learning_rate": 6.792755109461498e-06, "loss": 0.4578, "step": 1585 }, { "epoch": 1.3357664233576643, "grad_norm": 0.2871420383453369, "learning_rate": 6.78817761205185e-06, "loss": 0.4736, "step": 1586 }, { "epoch": 1.3366086468276248, "grad_norm": 0.25821858644485474, "learning_rate": 6.783598395446371e-06, "loss": 0.4742, "step": 1587 }, { "epoch": 1.3374508702975856, "grad_norm": 0.2395980805158615, "learning_rate": 6.779017464047629e-06, "loss": 0.4632, "step": 1588 }, { "epoch": 1.3382930937675463, "grad_norm": 0.30653828382492065, "learning_rate": 6.7744348222598386e-06, "loss": 0.5272, "step": 1589 }, { "epoch": 1.3391353172375071, "grad_norm": 0.2672419846057892, "learning_rate": 6.769850474488859e-06, "loss": 0.4927, "step": 1590 }, { "epoch": 1.3399775407074677, "grad_norm": 0.24619075655937195, "learning_rate": 6.7652644251421875e-06, "loss": 0.4248, "step": 1591 }, { "epoch": 1.3408197641774284, "grad_norm": 0.24904806911945343, "learning_rate": 6.7606766786289624e-06, "loss": 0.4783, "step": 1592 }, { "epoch": 1.341661987647389, "grad_norm": 0.26381221413612366, "learning_rate": 6.756087239359948e-06, "loss": 0.4703, "step": 1593 }, { "epoch": 1.3425042111173497, "grad_norm": 0.3002656400203705, "learning_rate": 6.75149611174754e-06, "loss": 0.4824, "step": 1594 }, { "epoch": 1.3433464345873105, "grad_norm": 0.26381823420524597, "learning_rate": 6.746903300205756e-06, "loss": 0.4988, "step": 1595 }, { "epoch": 1.3441886580572713, "grad_norm": 0.27803394198417664, "learning_rate": 6.742308809150232e-06, "loss": 0.5009, "step": 1596 }, { "epoch": 1.3450308815272318, "grad_norm": 0.25334930419921875, "learning_rate": 6.737712642998219e-06, "loss": 0.4614, "step": 1597 }, { "epoch": 1.3458731049971926, "grad_norm": 0.26449981331825256, "learning_rate": 6.7331148061685796e-06, "loss": 0.505, "step": 1598 }, { "epoch": 1.3467153284671534, "grad_norm": 0.25313830375671387, "learning_rate": 6.728515303081782e-06, "loss": 0.45, "step": 1599 }, { "epoch": 1.347557551937114, "grad_norm": 0.2503660321235657, "learning_rate": 6.723914138159895e-06, "loss": 0.4871, "step": 1600 }, { "epoch": 1.3483997754070747, "grad_norm": 0.29349786043167114, "learning_rate": 6.719311315826589e-06, "loss": 0.512, "step": 1601 }, { "epoch": 1.3492419988770354, "grad_norm": 0.2606284022331238, "learning_rate": 6.714706840507122e-06, "loss": 0.4375, "step": 1602 }, { "epoch": 1.350084222346996, "grad_norm": 0.2654425799846649, "learning_rate": 6.710100716628345e-06, "loss": 0.4984, "step": 1603 }, { "epoch": 1.3509264458169568, "grad_norm": 0.299533486366272, "learning_rate": 6.705492948618694e-06, "loss": 0.5079, "step": 1604 }, { "epoch": 1.3517686692869175, "grad_norm": 0.22936008870601654, "learning_rate": 6.700883540908185e-06, "loss": 0.4765, "step": 1605 }, { "epoch": 1.352610892756878, "grad_norm": 0.2604401111602783, "learning_rate": 6.696272497928411e-06, "loss": 0.5387, "step": 1606 }, { "epoch": 1.3534531162268388, "grad_norm": 0.25388434529304504, "learning_rate": 6.691659824112535e-06, "loss": 0.4414, "step": 1607 }, { "epoch": 1.3542953396967996, "grad_norm": 0.23398436605930328, "learning_rate": 6.687045523895292e-06, "loss": 0.4297, "step": 1608 }, { "epoch": 1.3551375631667604, "grad_norm": 0.2580506503582001, "learning_rate": 6.682429601712976e-06, "loss": 0.4988, "step": 1609 }, { "epoch": 1.355979786636721, "grad_norm": 0.23704153299331665, "learning_rate": 6.6778120620034455e-06, "loss": 0.4593, "step": 1610 }, { "epoch": 1.3568220101066817, "grad_norm": 0.3208867013454437, "learning_rate": 6.673192909206109e-06, "loss": 0.5353, "step": 1611 }, { "epoch": 1.3576642335766422, "grad_norm": 0.24589763581752777, "learning_rate": 6.668572147761929e-06, "loss": 0.4573, "step": 1612 }, { "epoch": 1.358506457046603, "grad_norm": 0.2476811707019806, "learning_rate": 6.663949782113413e-06, "loss": 0.4616, "step": 1613 }, { "epoch": 1.3593486805165638, "grad_norm": 0.2853114604949951, "learning_rate": 6.6593258167046115e-06, "loss": 0.4846, "step": 1614 }, { "epoch": 1.3601909039865245, "grad_norm": 0.23582874238491058, "learning_rate": 6.654700255981115e-06, "loss": 0.4343, "step": 1615 }, { "epoch": 1.361033127456485, "grad_norm": 0.2583693563938141, "learning_rate": 6.6500731043900425e-06, "loss": 0.4671, "step": 1616 }, { "epoch": 1.3618753509264458, "grad_norm": 0.244578018784523, "learning_rate": 6.64544436638005e-06, "loss": 0.4766, "step": 1617 }, { "epoch": 1.3627175743964064, "grad_norm": 0.2990475296974182, "learning_rate": 6.640814046401312e-06, "loss": 0.5473, "step": 1618 }, { "epoch": 1.3635597978663672, "grad_norm": 0.2515740394592285, "learning_rate": 6.6361821489055275e-06, "loss": 0.4884, "step": 1619 }, { "epoch": 1.364402021336328, "grad_norm": 0.27345091104507446, "learning_rate": 6.63154867834591e-06, "loss": 0.5109, "step": 1620 }, { "epoch": 1.3652442448062887, "grad_norm": 0.23216070234775543, "learning_rate": 6.626913639177189e-06, "loss": 0.428, "step": 1621 }, { "epoch": 1.3660864682762492, "grad_norm": 0.2838047742843628, "learning_rate": 6.622277035855596e-06, "loss": 0.5226, "step": 1622 }, { "epoch": 1.36692869174621, "grad_norm": 0.23887330293655396, "learning_rate": 6.617638872838874e-06, "loss": 0.436, "step": 1623 }, { "epoch": 1.3677709152161706, "grad_norm": 0.32898977398872375, "learning_rate": 6.61299915458626e-06, "loss": 0.4475, "step": 1624 }, { "epoch": 1.3686131386861313, "grad_norm": 0.2699831426143646, "learning_rate": 6.608357885558485e-06, "loss": 0.4814, "step": 1625 }, { "epoch": 1.369455362156092, "grad_norm": 0.25687894225120544, "learning_rate": 6.603715070217779e-06, "loss": 0.4944, "step": 1626 }, { "epoch": 1.3702975856260529, "grad_norm": 0.23249584436416626, "learning_rate": 6.599070713027849e-06, "loss": 0.4575, "step": 1627 }, { "epoch": 1.3711398090960134, "grad_norm": 0.2534252107143402, "learning_rate": 6.594424818453891e-06, "loss": 0.4633, "step": 1628 }, { "epoch": 1.3719820325659742, "grad_norm": 0.3180943727493286, "learning_rate": 6.589777390962575e-06, "loss": 0.4841, "step": 1629 }, { "epoch": 1.372824256035935, "grad_norm": 0.26884499192237854, "learning_rate": 6.58512843502205e-06, "loss": 0.4682, "step": 1630 }, { "epoch": 1.3736664795058955, "grad_norm": 0.24556021392345428, "learning_rate": 6.580477955101927e-06, "loss": 0.4497, "step": 1631 }, { "epoch": 1.3745087029758563, "grad_norm": 0.2950051724910736, "learning_rate": 6.5758259556732896e-06, "loss": 0.4895, "step": 1632 }, { "epoch": 1.375350926445817, "grad_norm": 0.2906494438648224, "learning_rate": 6.571172441208678e-06, "loss": 0.4922, "step": 1633 }, { "epoch": 1.3761931499157778, "grad_norm": 0.24155600368976593, "learning_rate": 6.566517416182088e-06, "loss": 0.4579, "step": 1634 }, { "epoch": 1.3770353733857383, "grad_norm": 0.22993499040603638, "learning_rate": 6.561860885068972e-06, "loss": 0.4365, "step": 1635 }, { "epoch": 1.377877596855699, "grad_norm": 0.26292091608047485, "learning_rate": 6.5572028523462275e-06, "loss": 0.4951, "step": 1636 }, { "epoch": 1.3787198203256597, "grad_norm": 0.23864281177520752, "learning_rate": 6.552543322492195e-06, "loss": 0.4598, "step": 1637 }, { "epoch": 1.3795620437956204, "grad_norm": 0.23401755094528198, "learning_rate": 6.547882299986658e-06, "loss": 0.4742, "step": 1638 }, { "epoch": 1.3804042672655812, "grad_norm": 0.25583621859550476, "learning_rate": 6.54321978931083e-06, "loss": 0.4828, "step": 1639 }, { "epoch": 1.381246490735542, "grad_norm": 0.22914354503154755, "learning_rate": 6.53855579494736e-06, "loss": 0.4787, "step": 1640 }, { "epoch": 1.3820887142055025, "grad_norm": 0.255979061126709, "learning_rate": 6.53389032138032e-06, "loss": 0.5061, "step": 1641 }, { "epoch": 1.3829309376754633, "grad_norm": 0.23105530440807343, "learning_rate": 6.5292233730952074e-06, "loss": 0.4718, "step": 1642 }, { "epoch": 1.3837731611454238, "grad_norm": 0.2645004391670227, "learning_rate": 6.5245549545789335e-06, "loss": 0.4913, "step": 1643 }, { "epoch": 1.3846153846153846, "grad_norm": 0.22742195427417755, "learning_rate": 6.519885070319827e-06, "loss": 0.4384, "step": 1644 }, { "epoch": 1.3854576080853453, "grad_norm": 0.24580222368240356, "learning_rate": 6.515213724807621e-06, "loss": 0.4914, "step": 1645 }, { "epoch": 1.3862998315553061, "grad_norm": 0.2949240803718567, "learning_rate": 6.51054092253346e-06, "loss": 0.5127, "step": 1646 }, { "epoch": 1.3871420550252667, "grad_norm": 0.2198009043931961, "learning_rate": 6.505866667989884e-06, "loss": 0.4354, "step": 1647 }, { "epoch": 1.3879842784952274, "grad_norm": 0.2510626018047333, "learning_rate": 6.5011909656708305e-06, "loss": 0.5034, "step": 1648 }, { "epoch": 1.388826501965188, "grad_norm": 0.2359648495912552, "learning_rate": 6.49651382007163e-06, "loss": 0.4242, "step": 1649 }, { "epoch": 1.3896687254351487, "grad_norm": 0.26217037439346313, "learning_rate": 6.491835235688999e-06, "loss": 0.4905, "step": 1650 }, { "epoch": 1.3905109489051095, "grad_norm": 0.2349315732717514, "learning_rate": 6.487155217021039e-06, "loss": 0.465, "step": 1651 }, { "epoch": 1.3913531723750703, "grad_norm": 0.236787810921669, "learning_rate": 6.482473768567228e-06, "loss": 0.4685, "step": 1652 }, { "epoch": 1.3921953958450308, "grad_norm": 0.2573763430118561, "learning_rate": 6.477790894828422e-06, "loss": 0.469, "step": 1653 }, { "epoch": 1.3930376193149916, "grad_norm": 0.26800739765167236, "learning_rate": 6.473106600306842e-06, "loss": 0.4871, "step": 1654 }, { "epoch": 1.3938798427849521, "grad_norm": 0.22743229568004608, "learning_rate": 6.468420889506084e-06, "loss": 0.4532, "step": 1655 }, { "epoch": 1.394722066254913, "grad_norm": 0.24076132476329803, "learning_rate": 6.463733766931096e-06, "loss": 0.4764, "step": 1656 }, { "epoch": 1.3955642897248737, "grad_norm": 0.274238258600235, "learning_rate": 6.459045237088189e-06, "loss": 0.496, "step": 1657 }, { "epoch": 1.3964065131948344, "grad_norm": 0.23892028629779816, "learning_rate": 6.454355304485024e-06, "loss": 0.445, "step": 1658 }, { "epoch": 1.397248736664795, "grad_norm": 0.23968744277954102, "learning_rate": 6.449663973630613e-06, "loss": 0.4456, "step": 1659 }, { "epoch": 1.3980909601347558, "grad_norm": 0.25141534209251404, "learning_rate": 6.444971249035312e-06, "loss": 0.4831, "step": 1660 }, { "epoch": 1.3989331836047165, "grad_norm": 0.262102335691452, "learning_rate": 6.440277135210815e-06, "loss": 0.5027, "step": 1661 }, { "epoch": 1.399775407074677, "grad_norm": 0.25355109572410583, "learning_rate": 6.435581636670154e-06, "loss": 0.475, "step": 1662 }, { "epoch": 1.4006176305446378, "grad_norm": 0.2663498818874359, "learning_rate": 6.43088475792769e-06, "loss": 0.4761, "step": 1663 }, { "epoch": 1.4014598540145986, "grad_norm": 0.24228806793689728, "learning_rate": 6.426186503499114e-06, "loss": 0.4652, "step": 1664 }, { "epoch": 1.4023020774845594, "grad_norm": 0.24927838146686554, "learning_rate": 6.421486877901436e-06, "loss": 0.4619, "step": 1665 }, { "epoch": 1.40314430095452, "grad_norm": 0.2569688856601715, "learning_rate": 6.4167858856529875e-06, "loss": 0.5114, "step": 1666 }, { "epoch": 1.4039865244244807, "grad_norm": 0.2334774136543274, "learning_rate": 6.412083531273411e-06, "loss": 0.493, "step": 1667 }, { "epoch": 1.4048287478944412, "grad_norm": 0.26208698749542236, "learning_rate": 6.407379819283661e-06, "loss": 0.4612, "step": 1668 }, { "epoch": 1.405670971364402, "grad_norm": 0.2711883783340454, "learning_rate": 6.402674754205998e-06, "loss": 0.4775, "step": 1669 }, { "epoch": 1.4065131948343628, "grad_norm": 0.2504367530345917, "learning_rate": 6.397968340563978e-06, "loss": 0.4934, "step": 1670 }, { "epoch": 1.4073554183043235, "grad_norm": 0.24861888587474823, "learning_rate": 6.393260582882462e-06, "loss": 0.4302, "step": 1671 }, { "epoch": 1.408197641774284, "grad_norm": 0.257630318403244, "learning_rate": 6.3885514856875945e-06, "loss": 0.5337, "step": 1672 }, { "epoch": 1.4090398652442448, "grad_norm": 0.24855968356132507, "learning_rate": 6.383841053506813e-06, "loss": 0.5158, "step": 1673 }, { "epoch": 1.4098820887142054, "grad_norm": 0.2498617321252823, "learning_rate": 6.379129290868837e-06, "loss": 0.4539, "step": 1674 }, { "epoch": 1.4107243121841662, "grad_norm": 0.2719767391681671, "learning_rate": 6.3744162023036685e-06, "loss": 0.521, "step": 1675 }, { "epoch": 1.411566535654127, "grad_norm": 0.23374223709106445, "learning_rate": 6.369701792342576e-06, "loss": 0.479, "step": 1676 }, { "epoch": 1.4124087591240877, "grad_norm": 0.24482077360153198, "learning_rate": 6.364986065518106e-06, "loss": 0.4411, "step": 1677 }, { "epoch": 1.4132509825940482, "grad_norm": 0.2616766095161438, "learning_rate": 6.360269026364071e-06, "loss": 0.5213, "step": 1678 }, { "epoch": 1.414093206064009, "grad_norm": 0.24608784914016724, "learning_rate": 6.35555067941554e-06, "loss": 0.4768, "step": 1679 }, { "epoch": 1.4149354295339696, "grad_norm": 0.31159549951553345, "learning_rate": 6.350831029208844e-06, "loss": 0.4668, "step": 1680 }, { "epoch": 1.4157776530039303, "grad_norm": 0.26355910301208496, "learning_rate": 6.3461100802815625e-06, "loss": 0.4799, "step": 1681 }, { "epoch": 1.416619876473891, "grad_norm": 0.285707950592041, "learning_rate": 6.34138783717253e-06, "loss": 0.4642, "step": 1682 }, { "epoch": 1.4174620999438519, "grad_norm": 0.22955606877803802, "learning_rate": 6.336664304421818e-06, "loss": 0.4033, "step": 1683 }, { "epoch": 1.4183043234138124, "grad_norm": 0.24209065735340118, "learning_rate": 6.331939486570745e-06, "loss": 0.5074, "step": 1684 }, { "epoch": 1.4191465468837732, "grad_norm": 0.22434142231941223, "learning_rate": 6.3272133881618596e-06, "loss": 0.466, "step": 1685 }, { "epoch": 1.4199887703537337, "grad_norm": 0.25292786955833435, "learning_rate": 6.322486013738942e-06, "loss": 0.4599, "step": 1686 }, { "epoch": 1.4208309938236945, "grad_norm": 0.2578209936618805, "learning_rate": 6.317757367847005e-06, "loss": 0.4251, "step": 1687 }, { "epoch": 1.4216732172936553, "grad_norm": 0.3514237105846405, "learning_rate": 6.313027455032274e-06, "loss": 0.5547, "step": 1688 }, { "epoch": 1.422515440763616, "grad_norm": 0.2430252879858017, "learning_rate": 6.308296279842204e-06, "loss": 0.4471, "step": 1689 }, { "epoch": 1.4233576642335766, "grad_norm": 0.30116620659828186, "learning_rate": 6.303563846825453e-06, "loss": 0.5035, "step": 1690 }, { "epoch": 1.4241998877035373, "grad_norm": 0.27001529932022095, "learning_rate": 6.298830160531895e-06, "loss": 0.4941, "step": 1691 }, { "epoch": 1.425042111173498, "grad_norm": 0.24336984753608704, "learning_rate": 6.294095225512604e-06, "loss": 0.4685, "step": 1692 }, { "epoch": 1.4258843346434587, "grad_norm": 0.2546102702617645, "learning_rate": 6.289359046319862e-06, "loss": 0.5516, "step": 1693 }, { "epoch": 1.4267265581134194, "grad_norm": 0.23157474398612976, "learning_rate": 6.2846216275071395e-06, "loss": 0.4253, "step": 1694 }, { "epoch": 1.4275687815833802, "grad_norm": 0.26054608821868896, "learning_rate": 6.279882973629101e-06, "loss": 0.4801, "step": 1695 }, { "epoch": 1.428411005053341, "grad_norm": 0.2527260184288025, "learning_rate": 6.275143089241603e-06, "loss": 0.4408, "step": 1696 }, { "epoch": 1.4292532285233015, "grad_norm": 0.26699066162109375, "learning_rate": 6.270401978901678e-06, "loss": 0.5469, "step": 1697 }, { "epoch": 1.4300954519932623, "grad_norm": 0.2563816010951996, "learning_rate": 6.265659647167542e-06, "loss": 0.4841, "step": 1698 }, { "epoch": 1.4309376754632228, "grad_norm": 0.23184800148010254, "learning_rate": 6.260916098598584e-06, "loss": 0.4203, "step": 1699 }, { "epoch": 1.4317798989331836, "grad_norm": 0.2794727087020874, "learning_rate": 6.256171337755362e-06, "loss": 0.4583, "step": 1700 }, { "epoch": 1.4326221224031443, "grad_norm": 0.28808194398880005, "learning_rate": 6.2514253691996e-06, "loss": 0.5104, "step": 1701 }, { "epoch": 1.4334643458731051, "grad_norm": 0.27513131499290466, "learning_rate": 6.246678197494185e-06, "loss": 0.524, "step": 1702 }, { "epoch": 1.4343065693430657, "grad_norm": 0.27593469619750977, "learning_rate": 6.241929827203156e-06, "loss": 0.4807, "step": 1703 }, { "epoch": 1.4351487928130264, "grad_norm": 0.32182741165161133, "learning_rate": 6.237180262891709e-06, "loss": 0.5402, "step": 1704 }, { "epoch": 1.435991016282987, "grad_norm": 0.27566930651664734, "learning_rate": 6.2324295091261885e-06, "loss": 0.4872, "step": 1705 }, { "epoch": 1.4368332397529477, "grad_norm": 0.2532426416873932, "learning_rate": 6.227677570474077e-06, "loss": 0.4911, "step": 1706 }, { "epoch": 1.4376754632229085, "grad_norm": 0.24899719655513763, "learning_rate": 6.222924451504001e-06, "loss": 0.4723, "step": 1707 }, { "epoch": 1.4385176866928693, "grad_norm": 0.26722055673599243, "learning_rate": 6.21817015678572e-06, "loss": 0.4433, "step": 1708 }, { "epoch": 1.4393599101628298, "grad_norm": 0.2936233878135681, "learning_rate": 6.213414690890125e-06, "loss": 0.5336, "step": 1709 }, { "epoch": 1.4402021336327906, "grad_norm": 0.24608315527439117, "learning_rate": 6.208658058389232e-06, "loss": 0.4389, "step": 1710 }, { "epoch": 1.4410443571027511, "grad_norm": 0.2820180058479309, "learning_rate": 6.203900263856177e-06, "loss": 0.4929, "step": 1711 }, { "epoch": 1.441886580572712, "grad_norm": 0.22745788097381592, "learning_rate": 6.19914131186522e-06, "loss": 0.4404, "step": 1712 }, { "epoch": 1.4427288040426727, "grad_norm": 0.2479826956987381, "learning_rate": 6.194381206991723e-06, "loss": 0.4923, "step": 1713 }, { "epoch": 1.4435710275126334, "grad_norm": 0.2517240047454834, "learning_rate": 6.189619953812167e-06, "loss": 0.4958, "step": 1714 }, { "epoch": 1.444413250982594, "grad_norm": 0.2638949155807495, "learning_rate": 6.184857556904129e-06, "loss": 0.4701, "step": 1715 }, { "epoch": 1.4452554744525548, "grad_norm": 0.2863232493400574, "learning_rate": 6.180094020846291e-06, "loss": 0.5061, "step": 1716 }, { "epoch": 1.4460976979225155, "grad_norm": 0.24833954870700836, "learning_rate": 6.175329350218426e-06, "loss": 0.4405, "step": 1717 }, { "epoch": 1.446939921392476, "grad_norm": 0.2631756663322449, "learning_rate": 6.170563549601402e-06, "loss": 0.4809, "step": 1718 }, { "epoch": 1.4477821448624368, "grad_norm": 0.24429740011692047, "learning_rate": 6.165796623577171e-06, "loss": 0.473, "step": 1719 }, { "epoch": 1.4486243683323976, "grad_norm": 0.27534031867980957, "learning_rate": 6.161028576728767e-06, "loss": 0.4775, "step": 1720 }, { "epoch": 1.4494665918023582, "grad_norm": 0.28510719537734985, "learning_rate": 6.156259413640302e-06, "loss": 0.5323, "step": 1721 }, { "epoch": 1.450308815272319, "grad_norm": 0.2575216293334961, "learning_rate": 6.15148913889696e-06, "loss": 0.461, "step": 1722 }, { "epoch": 1.4511510387422797, "grad_norm": 0.2952944040298462, "learning_rate": 6.146717757084995e-06, "loss": 0.5466, "step": 1723 }, { "epoch": 1.4519932622122402, "grad_norm": 0.23868177831172943, "learning_rate": 6.141945272791727e-06, "loss": 0.4796, "step": 1724 }, { "epoch": 1.452835485682201, "grad_norm": 0.24002686142921448, "learning_rate": 6.1371716906055336e-06, "loss": 0.4724, "step": 1725 }, { "epoch": 1.4536777091521618, "grad_norm": 0.24285222589969635, "learning_rate": 6.132397015115846e-06, "loss": 0.4465, "step": 1726 }, { "epoch": 1.4545199326221225, "grad_norm": 0.27442654967308044, "learning_rate": 6.127621250913152e-06, "loss": 0.5198, "step": 1727 }, { "epoch": 1.455362156092083, "grad_norm": 0.268309086561203, "learning_rate": 6.122844402588982e-06, "loss": 0.49, "step": 1728 }, { "epoch": 1.4562043795620438, "grad_norm": 0.2613791227340698, "learning_rate": 6.11806647473591e-06, "loss": 0.4778, "step": 1729 }, { "epoch": 1.4570466030320044, "grad_norm": 0.23474282026290894, "learning_rate": 6.113287471947547e-06, "loss": 0.4746, "step": 1730 }, { "epoch": 1.4578888265019652, "grad_norm": 0.2548374831676483, "learning_rate": 6.10850739881854e-06, "loss": 0.4817, "step": 1731 }, { "epoch": 1.458731049971926, "grad_norm": 0.2718791961669922, "learning_rate": 6.103726259944562e-06, "loss": 0.4709, "step": 1732 }, { "epoch": 1.4595732734418867, "grad_norm": 0.2719106078147888, "learning_rate": 6.098944059922311e-06, "loss": 0.4517, "step": 1733 }, { "epoch": 1.4604154969118472, "grad_norm": 0.257980614900589, "learning_rate": 6.094160803349508e-06, "loss": 0.4695, "step": 1734 }, { "epoch": 1.461257720381808, "grad_norm": 0.29096782207489014, "learning_rate": 6.089376494824886e-06, "loss": 0.5393, "step": 1735 }, { "epoch": 1.4620999438517686, "grad_norm": 0.22989623248577118, "learning_rate": 6.084591138948192e-06, "loss": 0.4316, "step": 1736 }, { "epoch": 1.4629421673217293, "grad_norm": 0.23434767127037048, "learning_rate": 6.079804740320181e-06, "loss": 0.4427, "step": 1737 }, { "epoch": 1.46378439079169, "grad_norm": 0.26117947697639465, "learning_rate": 6.075017303542605e-06, "loss": 0.5525, "step": 1738 }, { "epoch": 1.4646266142616509, "grad_norm": 0.24167770147323608, "learning_rate": 6.070228833218221e-06, "loss": 0.4323, "step": 1739 }, { "epoch": 1.4654688377316114, "grad_norm": 0.2948307991027832, "learning_rate": 6.065439333950776e-06, "loss": 0.52, "step": 1740 }, { "epoch": 1.4663110612015722, "grad_norm": 0.25685733556747437, "learning_rate": 6.060648810345006e-06, "loss": 0.4365, "step": 1741 }, { "epoch": 1.4671532846715327, "grad_norm": 0.24863596260547638, "learning_rate": 6.055857267006631e-06, "loss": 0.4847, "step": 1742 }, { "epoch": 1.4679955081414935, "grad_norm": 0.26202818751335144, "learning_rate": 6.051064708542357e-06, "loss": 0.5034, "step": 1743 }, { "epoch": 1.4688377316114543, "grad_norm": 0.22497782111167908, "learning_rate": 6.046271139559859e-06, "loss": 0.4578, "step": 1744 }, { "epoch": 1.469679955081415, "grad_norm": 0.22547268867492676, "learning_rate": 6.041476564667785e-06, "loss": 0.4689, "step": 1745 }, { "epoch": 1.4705221785513756, "grad_norm": 0.23064996302127838, "learning_rate": 6.036680988475756e-06, "loss": 0.4319, "step": 1746 }, { "epoch": 1.4713644020213363, "grad_norm": 0.2537640929222107, "learning_rate": 6.031884415594347e-06, "loss": 0.5054, "step": 1747 }, { "epoch": 1.472206625491297, "grad_norm": 0.24937893450260162, "learning_rate": 6.0270868506351e-06, "loss": 0.4389, "step": 1748 }, { "epoch": 1.4730488489612577, "grad_norm": 0.25662675499916077, "learning_rate": 6.022288298210502e-06, "loss": 0.5561, "step": 1749 }, { "epoch": 1.4738910724312184, "grad_norm": 0.27563199400901794, "learning_rate": 6.017488762933996e-06, "loss": 0.5088, "step": 1750 }, { "epoch": 1.4747332959011792, "grad_norm": 0.2220403552055359, "learning_rate": 6.012688249419966e-06, "loss": 0.4883, "step": 1751 }, { "epoch": 1.4755755193711397, "grad_norm": 0.2951650619506836, "learning_rate": 6.00788676228374e-06, "loss": 0.5071, "step": 1752 }, { "epoch": 1.4764177428411005, "grad_norm": 0.2666932940483093, "learning_rate": 6.003084306141579e-06, "loss": 0.4521, "step": 1753 }, { "epoch": 1.4772599663110613, "grad_norm": 0.24799270927906036, "learning_rate": 5.998280885610677e-06, "loss": 0.4437, "step": 1754 }, { "epoch": 1.4781021897810218, "grad_norm": 0.27791568636894226, "learning_rate": 5.993476505309154e-06, "loss": 0.5477, "step": 1755 }, { "epoch": 1.4789444132509826, "grad_norm": 0.24963220953941345, "learning_rate": 5.988671169856056e-06, "loss": 0.4438, "step": 1756 }, { "epoch": 1.4797866367209433, "grad_norm": 0.2684602439403534, "learning_rate": 5.983864883871344e-06, "loss": 0.4752, "step": 1757 }, { "epoch": 1.4806288601909041, "grad_norm": 0.285150945186615, "learning_rate": 5.979057651975893e-06, "loss": 0.5077, "step": 1758 }, { "epoch": 1.4814710836608647, "grad_norm": 0.26350918412208557, "learning_rate": 5.974249478791489e-06, "loss": 0.473, "step": 1759 }, { "epoch": 1.4823133071308254, "grad_norm": 0.26852312684059143, "learning_rate": 5.969440368940823e-06, "loss": 0.4604, "step": 1760 }, { "epoch": 1.483155530600786, "grad_norm": 0.2543701231479645, "learning_rate": 5.964630327047485e-06, "loss": 0.4928, "step": 1761 }, { "epoch": 1.4839977540707467, "grad_norm": 0.2555846571922302, "learning_rate": 5.9598193577359606e-06, "loss": 0.4735, "step": 1762 }, { "epoch": 1.4848399775407075, "grad_norm": 0.2270192950963974, "learning_rate": 5.955007465631632e-06, "loss": 0.4645, "step": 1763 }, { "epoch": 1.4856822010106683, "grad_norm": 0.25241661071777344, "learning_rate": 5.9501946553607615e-06, "loss": 0.489, "step": 1764 }, { "epoch": 1.4865244244806288, "grad_norm": 0.30182966589927673, "learning_rate": 5.945380931550497e-06, "loss": 0.5352, "step": 1765 }, { "epoch": 1.4873666479505896, "grad_norm": 0.2478613704442978, "learning_rate": 5.940566298828871e-06, "loss": 0.4699, "step": 1766 }, { "epoch": 1.4882088714205501, "grad_norm": 0.2640896439552307, "learning_rate": 5.935750761824777e-06, "loss": 0.4937, "step": 1767 }, { "epoch": 1.489051094890511, "grad_norm": 0.2500910460948944, "learning_rate": 5.93093432516799e-06, "loss": 0.4581, "step": 1768 }, { "epoch": 1.4898933183604717, "grad_norm": 0.2924606204032898, "learning_rate": 5.926116993489143e-06, "loss": 0.4623, "step": 1769 }, { "epoch": 1.4907355418304324, "grad_norm": 0.3028809726238251, "learning_rate": 5.921298771419731e-06, "loss": 0.4624, "step": 1770 }, { "epoch": 1.491577765300393, "grad_norm": 0.22979748249053955, "learning_rate": 5.916479663592107e-06, "loss": 0.4422, "step": 1771 }, { "epoch": 1.4924199887703538, "grad_norm": 0.28133314847946167, "learning_rate": 5.911659674639473e-06, "loss": 0.4793, "step": 1772 }, { "epoch": 1.4932622122403143, "grad_norm": 0.2907586991786957, "learning_rate": 5.906838809195879e-06, "loss": 0.5107, "step": 1773 }, { "epoch": 1.494104435710275, "grad_norm": 0.2834010422229767, "learning_rate": 5.90201707189622e-06, "loss": 0.4661, "step": 1774 }, { "epoch": 1.4949466591802358, "grad_norm": 0.27418527007102966, "learning_rate": 5.897194467376226e-06, "loss": 0.4708, "step": 1775 }, { "epoch": 1.4957888826501966, "grad_norm": 0.32390543818473816, "learning_rate": 5.8923710002724595e-06, "loss": 0.5002, "step": 1776 }, { "epoch": 1.4966311061201572, "grad_norm": 0.281429260969162, "learning_rate": 5.887546675222319e-06, "loss": 0.4675, "step": 1777 }, { "epoch": 1.497473329590118, "grad_norm": 0.3122255206108093, "learning_rate": 5.8827214968640215e-06, "loss": 0.5237, "step": 1778 }, { "epoch": 1.4983155530600787, "grad_norm": 0.2955033481121063, "learning_rate": 5.877895469836604e-06, "loss": 0.4856, "step": 1779 }, { "epoch": 1.4991577765300392, "grad_norm": 0.2839592397212982, "learning_rate": 5.873068598779926e-06, "loss": 0.477, "step": 1780 }, { "epoch": 1.5, "grad_norm": 0.28811055421829224, "learning_rate": 5.8682408883346535e-06, "loss": 0.4625, "step": 1781 }, { "epoch": 1.5008422234699608, "grad_norm": 0.3016136586666107, "learning_rate": 5.863412343142258e-06, "loss": 0.472, "step": 1782 }, { "epoch": 1.5016844469399215, "grad_norm": 0.2882804870605469, "learning_rate": 5.858582967845018e-06, "loss": 0.4896, "step": 1783 }, { "epoch": 1.502526670409882, "grad_norm": 0.32045626640319824, "learning_rate": 5.853752767086007e-06, "loss": 0.4928, "step": 1784 }, { "epoch": 1.5033688938798426, "grad_norm": 0.3231285810470581, "learning_rate": 5.848921745509094e-06, "loss": 0.4733, "step": 1785 }, { "epoch": 1.5042111173498034, "grad_norm": 0.2845571041107178, "learning_rate": 5.844089907758935e-06, "loss": 0.4887, "step": 1786 }, { "epoch": 1.5050533408197642, "grad_norm": 0.2962283194065094, "learning_rate": 5.839257258480974e-06, "loss": 0.5284, "step": 1787 }, { "epoch": 1.505895564289725, "grad_norm": 0.30280447006225586, "learning_rate": 5.8344238023214305e-06, "loss": 0.4401, "step": 1788 }, { "epoch": 1.5067377877596857, "grad_norm": 0.34830188751220703, "learning_rate": 5.829589543927305e-06, "loss": 0.535, "step": 1789 }, { "epoch": 1.5075800112296462, "grad_norm": 0.22128595411777496, "learning_rate": 5.824754487946366e-06, "loss": 0.4519, "step": 1790 }, { "epoch": 1.508422234699607, "grad_norm": 0.2612576484680176, "learning_rate": 5.819918639027149e-06, "loss": 0.5224, "step": 1791 }, { "epoch": 1.5092644581695676, "grad_norm": 0.23196659982204437, "learning_rate": 5.815082001818951e-06, "loss": 0.4276, "step": 1792 }, { "epoch": 1.5101066816395283, "grad_norm": 0.316211074590683, "learning_rate": 5.8102445809718325e-06, "loss": 0.5074, "step": 1793 }, { "epoch": 1.510948905109489, "grad_norm": 0.24991190433502197, "learning_rate": 5.805406381136598e-06, "loss": 0.4907, "step": 1794 }, { "epoch": 1.5117911285794499, "grad_norm": 0.2663284242153168, "learning_rate": 5.80056740696481e-06, "loss": 0.4753, "step": 1795 }, { "epoch": 1.5126333520494104, "grad_norm": 0.22733762860298157, "learning_rate": 5.79572766310877e-06, "loss": 0.3942, "step": 1796 }, { "epoch": 1.5134755755193712, "grad_norm": 0.2662948966026306, "learning_rate": 5.790887154221521e-06, "loss": 0.5136, "step": 1797 }, { "epoch": 1.5143177989893317, "grad_norm": 0.32026082277297974, "learning_rate": 5.7860458849568425e-06, "loss": 0.5152, "step": 1798 }, { "epoch": 1.5151600224592925, "grad_norm": 0.29845133423805237, "learning_rate": 5.781203859969242e-06, "loss": 0.4771, "step": 1799 }, { "epoch": 1.5160022459292533, "grad_norm": 0.2562868595123291, "learning_rate": 5.776361083913959e-06, "loss": 0.4644, "step": 1800 }, { "epoch": 1.516844469399214, "grad_norm": 0.24452854692935944, "learning_rate": 5.771517561446949e-06, "loss": 0.4278, "step": 1801 }, { "epoch": 1.5176866928691746, "grad_norm": 0.26284608244895935, "learning_rate": 5.766673297224889e-06, "loss": 0.4551, "step": 1802 }, { "epoch": 1.5185289163391353, "grad_norm": 0.3216002881526947, "learning_rate": 5.7618282959051685e-06, "loss": 0.5209, "step": 1803 }, { "epoch": 1.5193711398090959, "grad_norm": 0.25540193915367126, "learning_rate": 5.756982562145884e-06, "loss": 0.4893, "step": 1804 }, { "epoch": 1.5202133632790567, "grad_norm": 0.2595300078392029, "learning_rate": 5.75213610060584e-06, "loss": 0.4852, "step": 1805 }, { "epoch": 1.5210555867490174, "grad_norm": 0.22537751495838165, "learning_rate": 5.747288915944533e-06, "loss": 0.4328, "step": 1806 }, { "epoch": 1.5218978102189782, "grad_norm": 0.2734992802143097, "learning_rate": 5.742441012822166e-06, "loss": 0.4997, "step": 1807 }, { "epoch": 1.522740033688939, "grad_norm": 0.2463618963956833, "learning_rate": 5.737592395899623e-06, "loss": 0.4923, "step": 1808 }, { "epoch": 1.5235822571588995, "grad_norm": 0.2690679430961609, "learning_rate": 5.7327430698384775e-06, "loss": 0.4629, "step": 1809 }, { "epoch": 1.52442448062886, "grad_norm": 0.262220561504364, "learning_rate": 5.727893039300987e-06, "loss": 0.4873, "step": 1810 }, { "epoch": 1.5252667040988208, "grad_norm": 0.27451103925704956, "learning_rate": 5.7230423089500845e-06, "loss": 0.4802, "step": 1811 }, { "epoch": 1.5261089275687816, "grad_norm": 0.2719501852989197, "learning_rate": 5.718190883449373e-06, "loss": 0.5069, "step": 1812 }, { "epoch": 1.5269511510387423, "grad_norm": 0.25495198369026184, "learning_rate": 5.713338767463129e-06, "loss": 0.442, "step": 1813 }, { "epoch": 1.5277933745087031, "grad_norm": 0.29632821679115295, "learning_rate": 5.708485965656291e-06, "loss": 0.4983, "step": 1814 }, { "epoch": 1.5286355979786637, "grad_norm": 0.25878381729125977, "learning_rate": 5.703632482694453e-06, "loss": 0.4641, "step": 1815 }, { "epoch": 1.5294778214486242, "grad_norm": 0.2607625722885132, "learning_rate": 5.698778323243871e-06, "loss": 0.5219, "step": 1816 }, { "epoch": 1.530320044918585, "grad_norm": 0.2535508871078491, "learning_rate": 5.693923491971445e-06, "loss": 0.487, "step": 1817 }, { "epoch": 1.5311622683885457, "grad_norm": 0.2768633961677551, "learning_rate": 5.689067993544726e-06, "loss": 0.4806, "step": 1818 }, { "epoch": 1.5320044918585065, "grad_norm": 0.257542222738266, "learning_rate": 5.6842118326318996e-06, "loss": 0.5084, "step": 1819 }, { "epoch": 1.5328467153284673, "grad_norm": 0.28328055143356323, "learning_rate": 5.679355013901797e-06, "loss": 0.513, "step": 1820 }, { "epoch": 1.5336889387984278, "grad_norm": 0.2560610771179199, "learning_rate": 5.674497542023875e-06, "loss": 0.4623, "step": 1821 }, { "epoch": 1.5345311622683886, "grad_norm": 0.2662545442581177, "learning_rate": 5.669639421668221e-06, "loss": 0.509, "step": 1822 }, { "epoch": 1.5353733857383491, "grad_norm": 0.24711458384990692, "learning_rate": 5.664780657505547e-06, "loss": 0.4682, "step": 1823 }, { "epoch": 1.53621560920831, "grad_norm": 0.2424210160970688, "learning_rate": 5.659921254207183e-06, "loss": 0.5056, "step": 1824 }, { "epoch": 1.5370578326782707, "grad_norm": 0.23518222570419312, "learning_rate": 5.65506121644507e-06, "loss": 0.4227, "step": 1825 }, { "epoch": 1.5379000561482314, "grad_norm": 0.2589225471019745, "learning_rate": 5.650200548891764e-06, "loss": 0.4882, "step": 1826 }, { "epoch": 1.538742279618192, "grad_norm": 0.2337670624256134, "learning_rate": 5.645339256220427e-06, "loss": 0.4614, "step": 1827 }, { "epoch": 1.5395845030881528, "grad_norm": 0.24503163993358612, "learning_rate": 5.640477343104815e-06, "loss": 0.4962, "step": 1828 }, { "epoch": 1.5404267265581133, "grad_norm": 0.24026913940906525, "learning_rate": 5.635614814219289e-06, "loss": 0.4797, "step": 1829 }, { "epoch": 1.541268950028074, "grad_norm": 0.2496921569108963, "learning_rate": 5.630751674238796e-06, "loss": 0.4487, "step": 1830 }, { "epoch": 1.5421111734980348, "grad_norm": 0.2782098650932312, "learning_rate": 5.625887927838872e-06, "loss": 0.5112, "step": 1831 }, { "epoch": 1.5429533969679956, "grad_norm": 0.2562282681465149, "learning_rate": 5.6210235796956395e-06, "loss": 0.4423, "step": 1832 }, { "epoch": 1.5437956204379562, "grad_norm": 0.25320789217948914, "learning_rate": 5.616158634485793e-06, "loss": 0.535, "step": 1833 }, { "epoch": 1.544637843907917, "grad_norm": 0.26096171140670776, "learning_rate": 5.61129309688661e-06, "loss": 0.4629, "step": 1834 }, { "epoch": 1.5454800673778775, "grad_norm": 0.2720514237880707, "learning_rate": 5.606426971575926e-06, "loss": 0.52, "step": 1835 }, { "epoch": 1.5463222908478382, "grad_norm": 0.24554921686649323, "learning_rate": 5.601560263232153e-06, "loss": 0.4667, "step": 1836 }, { "epoch": 1.547164514317799, "grad_norm": 0.23600547015666962, "learning_rate": 5.596692976534256e-06, "loss": 0.492, "step": 1837 }, { "epoch": 1.5480067377877598, "grad_norm": 0.2511468529701233, "learning_rate": 5.591825116161758e-06, "loss": 0.5104, "step": 1838 }, { "epoch": 1.5488489612577205, "grad_norm": 0.2521441578865051, "learning_rate": 5.5869566867947344e-06, "loss": 0.4753, "step": 1839 }, { "epoch": 1.549691184727681, "grad_norm": 0.24649223685264587, "learning_rate": 5.582087693113808e-06, "loss": 0.4846, "step": 1840 }, { "epoch": 1.5505334081976416, "grad_norm": 0.22618024051189423, "learning_rate": 5.577218139800143e-06, "loss": 0.4518, "step": 1841 }, { "epoch": 1.5513756316676024, "grad_norm": 0.23207631707191467, "learning_rate": 5.572348031535442e-06, "loss": 0.4611, "step": 1842 }, { "epoch": 1.5522178551375632, "grad_norm": 0.2525484561920166, "learning_rate": 5.567477373001942e-06, "loss": 0.463, "step": 1843 }, { "epoch": 1.553060078607524, "grad_norm": 0.27067646384239197, "learning_rate": 5.562606168882404e-06, "loss": 0.5029, "step": 1844 }, { "epoch": 1.5539023020774847, "grad_norm": 0.25548219680786133, "learning_rate": 5.557734423860122e-06, "loss": 0.4777, "step": 1845 }, { "epoch": 1.5547445255474452, "grad_norm": 0.21819068491458893, "learning_rate": 5.552862142618906e-06, "loss": 0.4067, "step": 1846 }, { "epoch": 1.5555867490174058, "grad_norm": 0.25024688243865967, "learning_rate": 5.547989329843079e-06, "loss": 0.4982, "step": 1847 }, { "epoch": 1.5564289724873666, "grad_norm": 0.2694171369075775, "learning_rate": 5.543115990217478e-06, "loss": 0.5057, "step": 1848 }, { "epoch": 1.5572711959573273, "grad_norm": 0.3138599693775177, "learning_rate": 5.538242128427444e-06, "loss": 0.4782, "step": 1849 }, { "epoch": 1.558113419427288, "grad_norm": 0.25465336441993713, "learning_rate": 5.533367749158829e-06, "loss": 0.5042, "step": 1850 }, { "epoch": 1.5589556428972489, "grad_norm": 0.2542472779750824, "learning_rate": 5.528492857097966e-06, "loss": 0.4766, "step": 1851 }, { "epoch": 1.5597978663672094, "grad_norm": 0.28288909792900085, "learning_rate": 5.523617456931696e-06, "loss": 0.437, "step": 1852 }, { "epoch": 1.5606400898371702, "grad_norm": 0.25646159052848816, "learning_rate": 5.518741553347341e-06, "loss": 0.5217, "step": 1853 }, { "epoch": 1.5614823133071307, "grad_norm": 0.2634013593196869, "learning_rate": 5.513865151032709e-06, "loss": 0.4396, "step": 1854 }, { "epoch": 1.5623245367770915, "grad_norm": 0.223186194896698, "learning_rate": 5.508988254676087e-06, "loss": 0.4616, "step": 1855 }, { "epoch": 1.5631667602470523, "grad_norm": 0.2652823328971863, "learning_rate": 5.504110868966239e-06, "loss": 0.4495, "step": 1856 }, { "epoch": 1.564008983717013, "grad_norm": 0.24666236340999603, "learning_rate": 5.499232998592399e-06, "loss": 0.4619, "step": 1857 }, { "epoch": 1.5648512071869736, "grad_norm": 0.2436274290084839, "learning_rate": 5.49435464824426e-06, "loss": 0.4863, "step": 1858 }, { "epoch": 1.5656934306569343, "grad_norm": 0.21874786913394928, "learning_rate": 5.489475822611988e-06, "loss": 0.4439, "step": 1859 }, { "epoch": 1.5665356541268949, "grad_norm": 0.25331079959869385, "learning_rate": 5.484596526386198e-06, "loss": 0.4938, "step": 1860 }, { "epoch": 1.5673778775968557, "grad_norm": 0.246526837348938, "learning_rate": 5.479716764257961e-06, "loss": 0.484, "step": 1861 }, { "epoch": 1.5682201010668164, "grad_norm": 0.24246542155742645, "learning_rate": 5.474836540918791e-06, "loss": 0.4698, "step": 1862 }, { "epoch": 1.5690623245367772, "grad_norm": 0.2459675520658493, "learning_rate": 5.469955861060653e-06, "loss": 0.4669, "step": 1863 }, { "epoch": 1.5699045480067377, "grad_norm": 0.24595963954925537, "learning_rate": 5.465074729375944e-06, "loss": 0.4889, "step": 1864 }, { "epoch": 1.5707467714766985, "grad_norm": 0.27058717608451843, "learning_rate": 5.4601931505575e-06, "loss": 0.4891, "step": 1865 }, { "epoch": 1.571588994946659, "grad_norm": 0.22745418548583984, "learning_rate": 5.455311129298586e-06, "loss": 0.4258, "step": 1866 }, { "epoch": 1.5724312184166198, "grad_norm": 0.24998918175697327, "learning_rate": 5.450428670292889e-06, "loss": 0.4814, "step": 1867 }, { "epoch": 1.5732734418865806, "grad_norm": 0.24531488120555878, "learning_rate": 5.445545778234522e-06, "loss": 0.5162, "step": 1868 }, { "epoch": 1.5741156653565413, "grad_norm": 0.25102102756500244, "learning_rate": 5.44066245781801e-06, "loss": 0.5015, "step": 1869 }, { "epoch": 1.5749578888265021, "grad_norm": 0.2631896436214447, "learning_rate": 5.435778713738292e-06, "loss": 0.44, "step": 1870 }, { "epoch": 1.5758001122964627, "grad_norm": 0.24563737213611603, "learning_rate": 5.430894550690714e-06, "loss": 0.4781, "step": 1871 }, { "epoch": 1.5766423357664232, "grad_norm": 0.22482982277870178, "learning_rate": 5.426009973371026e-06, "loss": 0.4718, "step": 1872 }, { "epoch": 1.577484559236384, "grad_norm": 0.23750506341457367, "learning_rate": 5.421124986475371e-06, "loss": 0.4762, "step": 1873 }, { "epoch": 1.5783267827063447, "grad_norm": 0.26698100566864014, "learning_rate": 5.416239594700294e-06, "loss": 0.4915, "step": 1874 }, { "epoch": 1.5791690061763055, "grad_norm": 0.286347895860672, "learning_rate": 5.4113538027427245e-06, "loss": 0.511, "step": 1875 }, { "epoch": 1.5800112296462663, "grad_norm": 0.21996836364269257, "learning_rate": 5.4064676152999765e-06, "loss": 0.4251, "step": 1876 }, { "epoch": 1.5808534531162268, "grad_norm": 0.2259243130683899, "learning_rate": 5.4015810370697445e-06, "loss": 0.4467, "step": 1877 }, { "epoch": 1.5816956765861874, "grad_norm": 0.276563823223114, "learning_rate": 5.396694072750099e-06, "loss": 0.4945, "step": 1878 }, { "epoch": 1.5825379000561481, "grad_norm": 0.27084630727767944, "learning_rate": 5.391806727039484e-06, "loss": 0.509, "step": 1879 }, { "epoch": 1.583380123526109, "grad_norm": 0.23408527672290802, "learning_rate": 5.386919004636706e-06, "loss": 0.4613, "step": 1880 }, { "epoch": 1.5842223469960697, "grad_norm": 0.2540043890476227, "learning_rate": 5.382030910240936e-06, "loss": 0.4949, "step": 1881 }, { "epoch": 1.5850645704660304, "grad_norm": 0.21495820581912994, "learning_rate": 5.3771424485517034e-06, "loss": 0.431, "step": 1882 }, { "epoch": 1.585906793935991, "grad_norm": 0.24904629588127136, "learning_rate": 5.3722536242688895e-06, "loss": 0.4511, "step": 1883 }, { "epoch": 1.5867490174059518, "grad_norm": 0.2400527447462082, "learning_rate": 5.367364442092724e-06, "loss": 0.4612, "step": 1884 }, { "epoch": 1.5875912408759123, "grad_norm": 0.26573503017425537, "learning_rate": 5.362474906723781e-06, "loss": 0.4755, "step": 1885 }, { "epoch": 1.588433464345873, "grad_norm": 0.2640140652656555, "learning_rate": 5.357585022862977e-06, "loss": 0.4981, "step": 1886 }, { "epoch": 1.5892756878158338, "grad_norm": 0.254152774810791, "learning_rate": 5.352694795211555e-06, "loss": 0.503, "step": 1887 }, { "epoch": 1.5901179112857946, "grad_norm": 0.25700879096984863, "learning_rate": 5.347804228471101e-06, "loss": 0.4769, "step": 1888 }, { "epoch": 1.5909601347557552, "grad_norm": 0.2385103553533554, "learning_rate": 5.342913327343515e-06, "loss": 0.4658, "step": 1889 }, { "epoch": 1.591802358225716, "grad_norm": 0.28354698419570923, "learning_rate": 5.338022096531028e-06, "loss": 0.4983, "step": 1890 }, { "epoch": 1.5926445816956765, "grad_norm": 0.23742280900478363, "learning_rate": 5.33313054073618e-06, "loss": 0.4291, "step": 1891 }, { "epoch": 1.5934868051656372, "grad_norm": 0.30714526772499084, "learning_rate": 5.32823866466183e-06, "loss": 0.5498, "step": 1892 }, { "epoch": 1.594329028635598, "grad_norm": 0.2736995816230774, "learning_rate": 5.3233464730111426e-06, "loss": 0.4637, "step": 1893 }, { "epoch": 1.5951712521055588, "grad_norm": 0.2516764998435974, "learning_rate": 5.318453970487582e-06, "loss": 0.4429, "step": 1894 }, { "epoch": 1.5960134755755195, "grad_norm": 0.2516971826553345, "learning_rate": 5.31356116179492e-06, "loss": 0.4846, "step": 1895 }, { "epoch": 1.59685569904548, "grad_norm": 0.27679213881492615, "learning_rate": 5.308668051637213e-06, "loss": 0.4534, "step": 1896 }, { "epoch": 1.5976979225154406, "grad_norm": 0.2627556622028351, "learning_rate": 5.303774644718813e-06, "loss": 0.4673, "step": 1897 }, { "epoch": 1.5985401459854014, "grad_norm": 0.22050373256206512, "learning_rate": 5.298880945744356e-06, "loss": 0.4342, "step": 1898 }, { "epoch": 1.5993823694553622, "grad_norm": 0.2566528916358948, "learning_rate": 5.29398695941876e-06, "loss": 0.4994, "step": 1899 }, { "epoch": 1.600224592925323, "grad_norm": 0.2795170843601227, "learning_rate": 5.289092690447215e-06, "loss": 0.4777, "step": 1900 }, { "epoch": 1.6010668163952837, "grad_norm": 0.24378879368305206, "learning_rate": 5.284198143535188e-06, "loss": 0.4675, "step": 1901 }, { "epoch": 1.6019090398652442, "grad_norm": 0.2381599396467209, "learning_rate": 5.279303323388413e-06, "loss": 0.4479, "step": 1902 }, { "epoch": 1.6027512633352048, "grad_norm": 0.24433782696723938, "learning_rate": 5.274408234712881e-06, "loss": 0.475, "step": 1903 }, { "epoch": 1.6035934868051656, "grad_norm": 0.2567233741283417, "learning_rate": 5.2695128822148466e-06, "loss": 0.4392, "step": 1904 }, { "epoch": 1.6044357102751263, "grad_norm": 0.2829025387763977, "learning_rate": 5.2646172706008154e-06, "loss": 0.5383, "step": 1905 }, { "epoch": 1.605277933745087, "grad_norm": 0.2526053488254547, "learning_rate": 5.259721404577546e-06, "loss": 0.5048, "step": 1906 }, { "epoch": 1.6061201572150479, "grad_norm": 0.26718324422836304, "learning_rate": 5.254825288852033e-06, "loss": 0.4655, "step": 1907 }, { "epoch": 1.6069623806850084, "grad_norm": 0.27319657802581787, "learning_rate": 5.249928928131523e-06, "loss": 0.4628, "step": 1908 }, { "epoch": 1.607804604154969, "grad_norm": 0.2571570873260498, "learning_rate": 5.245032327123488e-06, "loss": 0.5325, "step": 1909 }, { "epoch": 1.6086468276249297, "grad_norm": 0.23908449709415436, "learning_rate": 5.240135490535635e-06, "loss": 0.4693, "step": 1910 }, { "epoch": 1.6094890510948905, "grad_norm": 0.24710819125175476, "learning_rate": 5.235238423075899e-06, "loss": 0.4575, "step": 1911 }, { "epoch": 1.6103312745648513, "grad_norm": 0.26572898030281067, "learning_rate": 5.230341129452434e-06, "loss": 0.4855, "step": 1912 }, { "epoch": 1.611173498034812, "grad_norm": 0.2543940842151642, "learning_rate": 5.225443614373614e-06, "loss": 0.4987, "step": 1913 }, { "epoch": 1.6120157215047726, "grad_norm": 0.2861873507499695, "learning_rate": 5.220545882548024e-06, "loss": 0.5041, "step": 1914 }, { "epoch": 1.6128579449747333, "grad_norm": 0.2772687077522278, "learning_rate": 5.215647938684458e-06, "loss": 0.4907, "step": 1915 }, { "epoch": 1.6137001684446939, "grad_norm": 0.23597025871276855, "learning_rate": 5.210749787491913e-06, "loss": 0.454, "step": 1916 }, { "epoch": 1.6145423919146547, "grad_norm": 0.24675235152244568, "learning_rate": 5.20585143367959e-06, "loss": 0.4554, "step": 1917 }, { "epoch": 1.6153846153846154, "grad_norm": 0.2565891146659851, "learning_rate": 5.200952881956875e-06, "loss": 0.4563, "step": 1918 }, { "epoch": 1.6162268388545762, "grad_norm": 0.23471416532993317, "learning_rate": 5.196054137033354e-06, "loss": 0.4432, "step": 1919 }, { "epoch": 1.6170690623245367, "grad_norm": 0.24037203192710876, "learning_rate": 5.191155203618796e-06, "loss": 0.4886, "step": 1920 }, { "epoch": 1.6179112857944975, "grad_norm": 0.22397062182426453, "learning_rate": 5.186256086423148e-06, "loss": 0.4673, "step": 1921 }, { "epoch": 1.618753509264458, "grad_norm": 0.25382331013679504, "learning_rate": 5.181356790156539e-06, "loss": 0.5211, "step": 1922 }, { "epoch": 1.6195957327344188, "grad_norm": 0.23827660083770752, "learning_rate": 5.176457319529264e-06, "loss": 0.4543, "step": 1923 }, { "epoch": 1.6204379562043796, "grad_norm": 0.24234288930892944, "learning_rate": 5.171557679251788e-06, "loss": 0.4879, "step": 1924 }, { "epoch": 1.6212801796743403, "grad_norm": 0.25108709931373596, "learning_rate": 5.166657874034745e-06, "loss": 0.4899, "step": 1925 }, { "epoch": 1.6221224031443011, "grad_norm": 0.2403172105550766, "learning_rate": 5.161757908588917e-06, "loss": 0.4706, "step": 1926 }, { "epoch": 1.6229646266142617, "grad_norm": 0.23360106348991394, "learning_rate": 5.156857787625249e-06, "loss": 0.4359, "step": 1927 }, { "epoch": 1.6238068500842222, "grad_norm": 0.2909252941608429, "learning_rate": 5.15195751585483e-06, "loss": 0.5192, "step": 1928 }, { "epoch": 1.624649073554183, "grad_norm": 0.24731476604938507, "learning_rate": 5.147057097988898e-06, "loss": 0.4194, "step": 1929 }, { "epoch": 1.6254912970241437, "grad_norm": 0.24169856309890747, "learning_rate": 5.142156538738827e-06, "loss": 0.517, "step": 1930 }, { "epoch": 1.6263335204941045, "grad_norm": 0.22618789970874786, "learning_rate": 5.137255842816132e-06, "loss": 0.4299, "step": 1931 }, { "epoch": 1.6271757439640653, "grad_norm": 0.24710828065872192, "learning_rate": 5.132355014932455e-06, "loss": 0.4803, "step": 1932 }, { "epoch": 1.6280179674340258, "grad_norm": 0.26915454864501953, "learning_rate": 5.127454059799567e-06, "loss": 0.5015, "step": 1933 }, { "epoch": 1.6288601909039864, "grad_norm": 0.2910863757133484, "learning_rate": 5.122552982129362e-06, "loss": 0.4882, "step": 1934 }, { "epoch": 1.6297024143739471, "grad_norm": 0.24038784205913544, "learning_rate": 5.1176517866338495e-06, "loss": 0.4455, "step": 1935 }, { "epoch": 1.630544637843908, "grad_norm": 0.23613595962524414, "learning_rate": 5.112750478025156e-06, "loss": 0.465, "step": 1936 }, { "epoch": 1.6313868613138687, "grad_norm": 0.22299955785274506, "learning_rate": 5.1078490610155105e-06, "loss": 0.408, "step": 1937 }, { "epoch": 1.6322290847838294, "grad_norm": 0.2718156576156616, "learning_rate": 5.102947540317254e-06, "loss": 0.5169, "step": 1938 }, { "epoch": 1.63307130825379, "grad_norm": 0.2451147437095642, "learning_rate": 5.09804592064282e-06, "loss": 0.456, "step": 1939 }, { "epoch": 1.6339135317237508, "grad_norm": 0.2693863809108734, "learning_rate": 5.093144206704743e-06, "loss": 0.4814, "step": 1940 }, { "epoch": 1.6347557551937113, "grad_norm": 0.2607814371585846, "learning_rate": 5.088242403215644e-06, "loss": 0.4708, "step": 1941 }, { "epoch": 1.635597978663672, "grad_norm": 0.23175933957099915, "learning_rate": 5.083340514888232e-06, "loss": 0.4625, "step": 1942 }, { "epoch": 1.6364402021336328, "grad_norm": 0.2722987234592438, "learning_rate": 5.078438546435298e-06, "loss": 0.5277, "step": 1943 }, { "epoch": 1.6372824256035936, "grad_norm": 0.22179189324378967, "learning_rate": 5.073536502569708e-06, "loss": 0.416, "step": 1944 }, { "epoch": 1.6381246490735542, "grad_norm": 0.2850702404975891, "learning_rate": 5.0686343880044044e-06, "loss": 0.5221, "step": 1945 }, { "epoch": 1.638966872543515, "grad_norm": 0.25036484003067017, "learning_rate": 5.063732207452391e-06, "loss": 0.4711, "step": 1946 }, { "epoch": 1.6398090960134755, "grad_norm": 0.23721033334732056, "learning_rate": 5.058829965626742e-06, "loss": 0.4414, "step": 1947 }, { "epoch": 1.6406513194834362, "grad_norm": 0.23568393290042877, "learning_rate": 5.053927667240585e-06, "loss": 0.4293, "step": 1948 }, { "epoch": 1.641493542953397, "grad_norm": 0.2530818283557892, "learning_rate": 5.049025317007108e-06, "loss": 0.4773, "step": 1949 }, { "epoch": 1.6423357664233578, "grad_norm": 0.24994856119155884, "learning_rate": 5.0441229196395416e-06, "loss": 0.498, "step": 1950 }, { "epoch": 1.6431779898933183, "grad_norm": 0.23636505007743835, "learning_rate": 5.039220479851167e-06, "loss": 0.459, "step": 1951 }, { "epoch": 1.644020213363279, "grad_norm": 0.25070124864578247, "learning_rate": 5.034318002355305e-06, "loss": 0.4829, "step": 1952 }, { "epoch": 1.6448624368332396, "grad_norm": 0.24547205865383148, "learning_rate": 5.029415491865311e-06, "loss": 0.484, "step": 1953 }, { "epoch": 1.6457046603032004, "grad_norm": 0.21958963572978973, "learning_rate": 5.024512953094577e-06, "loss": 0.446, "step": 1954 }, { "epoch": 1.6465468837731612, "grad_norm": 0.2321747988462448, "learning_rate": 5.019610390756513e-06, "loss": 0.4023, "step": 1955 }, { "epoch": 1.647389107243122, "grad_norm": 0.2657341957092285, "learning_rate": 5.014707809564562e-06, "loss": 0.5153, "step": 1956 }, { "epoch": 1.6482313307130827, "grad_norm": 0.22297105193138123, "learning_rate": 5.009805214232177e-06, "loss": 0.4318, "step": 1957 }, { "epoch": 1.6490735541830432, "grad_norm": 0.25263717770576477, "learning_rate": 5.004902609472831e-06, "loss": 0.4831, "step": 1958 }, { "epoch": 1.6499157776530038, "grad_norm": 0.2375510185956955, "learning_rate": 5e-06, "loss": 0.4985, "step": 1959 }, { "epoch": 1.6507580011229646, "grad_norm": 0.27207058668136597, "learning_rate": 4.995097390527171e-06, "loss": 0.5049, "step": 1960 }, { "epoch": 1.6516002245929253, "grad_norm": 0.2904370129108429, "learning_rate": 4.990194785767824e-06, "loss": 0.4762, "step": 1961 }, { "epoch": 1.652442448062886, "grad_norm": 0.24771693348884583, "learning_rate": 4.98529219043544e-06, "loss": 0.4463, "step": 1962 }, { "epoch": 1.6532846715328469, "grad_norm": 0.26332080364227295, "learning_rate": 4.980389609243488e-06, "loss": 0.507, "step": 1963 }, { "epoch": 1.6541268950028074, "grad_norm": 0.2572915554046631, "learning_rate": 4.975487046905426e-06, "loss": 0.431, "step": 1964 }, { "epoch": 1.654969118472768, "grad_norm": 0.2854689359664917, "learning_rate": 4.97058450813469e-06, "loss": 0.4778, "step": 1965 }, { "epoch": 1.6558113419427287, "grad_norm": 0.27358296513557434, "learning_rate": 4.9656819976446975e-06, "loss": 0.5004, "step": 1966 }, { "epoch": 1.6566535654126895, "grad_norm": 0.2921474874019623, "learning_rate": 4.960779520148835e-06, "loss": 0.5266, "step": 1967 }, { "epoch": 1.6574957888826503, "grad_norm": 0.24553194642066956, "learning_rate": 4.955877080360462e-06, "loss": 0.4274, "step": 1968 }, { "epoch": 1.658338012352611, "grad_norm": 0.2731702923774719, "learning_rate": 4.950974682992894e-06, "loss": 0.5333, "step": 1969 }, { "epoch": 1.6591802358225716, "grad_norm": 0.254703164100647, "learning_rate": 4.9460723327594175e-06, "loss": 0.4531, "step": 1970 }, { "epoch": 1.6600224592925323, "grad_norm": 0.22639574110507965, "learning_rate": 4.94117003437326e-06, "loss": 0.4534, "step": 1971 }, { "epoch": 1.6608646827624929, "grad_norm": 0.2423030585050583, "learning_rate": 4.9362677925476124e-06, "loss": 0.4545, "step": 1972 }, { "epoch": 1.6617069062324537, "grad_norm": 0.2738526165485382, "learning_rate": 4.931365611995598e-06, "loss": 0.4934, "step": 1973 }, { "epoch": 1.6625491297024144, "grad_norm": 0.24026601016521454, "learning_rate": 4.926463497430293e-06, "loss": 0.4404, "step": 1974 }, { "epoch": 1.6633913531723752, "grad_norm": 0.27125459909439087, "learning_rate": 4.921561453564704e-06, "loss": 0.4698, "step": 1975 }, { "epoch": 1.6642335766423357, "grad_norm": 0.2807263731956482, "learning_rate": 4.9166594851117696e-06, "loss": 0.4593, "step": 1976 }, { "epoch": 1.6650758001122965, "grad_norm": 0.2764016389846802, "learning_rate": 4.911757596784358e-06, "loss": 0.5038, "step": 1977 }, { "epoch": 1.665918023582257, "grad_norm": 0.25274550914764404, "learning_rate": 4.906855793295259e-06, "loss": 0.4715, "step": 1978 }, { "epoch": 1.6667602470522178, "grad_norm": 0.22888481616973877, "learning_rate": 4.901954079357182e-06, "loss": 0.3865, "step": 1979 }, { "epoch": 1.6676024705221786, "grad_norm": 0.2865358591079712, "learning_rate": 4.897052459682749e-06, "loss": 0.5132, "step": 1980 }, { "epoch": 1.6684446939921393, "grad_norm": 0.2585681974887848, "learning_rate": 4.892150938984491e-06, "loss": 0.4954, "step": 1981 }, { "epoch": 1.6692869174621, "grad_norm": 0.2315594106912613, "learning_rate": 4.887249521974848e-06, "loss": 0.4997, "step": 1982 }, { "epoch": 1.6701291409320607, "grad_norm": 0.2543342709541321, "learning_rate": 4.882348213366152e-06, "loss": 0.4542, "step": 1983 }, { "epoch": 1.6709713644020212, "grad_norm": 0.2756553292274475, "learning_rate": 4.8774470178706405e-06, "loss": 0.4972, "step": 1984 }, { "epoch": 1.671813587871982, "grad_norm": 0.2320590615272522, "learning_rate": 4.872545940200435e-06, "loss": 0.4458, "step": 1985 }, { "epoch": 1.6726558113419427, "grad_norm": 0.2724742889404297, "learning_rate": 4.867644985067548e-06, "loss": 0.4962, "step": 1986 }, { "epoch": 1.6734980348119035, "grad_norm": 0.28479263186454773, "learning_rate": 4.862744157183869e-06, "loss": 0.4998, "step": 1987 }, { "epoch": 1.6743402582818643, "grad_norm": 0.28115493059158325, "learning_rate": 4.857843461261176e-06, "loss": 0.5157, "step": 1988 }, { "epoch": 1.6751824817518248, "grad_norm": 0.25361353158950806, "learning_rate": 4.8529429020111035e-06, "loss": 0.459, "step": 1989 }, { "epoch": 1.6760247052217854, "grad_norm": 0.2361331582069397, "learning_rate": 4.8480424841451725e-06, "loss": 0.4991, "step": 1990 }, { "epoch": 1.6768669286917461, "grad_norm": 0.2616862952709198, "learning_rate": 4.8431422123747524e-06, "loss": 0.4847, "step": 1991 }, { "epoch": 1.677709152161707, "grad_norm": 0.24674531817436218, "learning_rate": 4.838242091411085e-06, "loss": 0.4203, "step": 1992 }, { "epoch": 1.6785513756316677, "grad_norm": 0.2514377534389496, "learning_rate": 4.833342125965257e-06, "loss": 0.4741, "step": 1993 }, { "epoch": 1.6793935991016284, "grad_norm": 0.253315269947052, "learning_rate": 4.828442320748213e-06, "loss": 0.4406, "step": 1994 }, { "epoch": 1.680235822571589, "grad_norm": 0.24072812497615814, "learning_rate": 4.823542680470738e-06, "loss": 0.4275, "step": 1995 }, { "epoch": 1.6810780460415495, "grad_norm": 0.26846417784690857, "learning_rate": 4.818643209843463e-06, "loss": 0.5223, "step": 1996 }, { "epoch": 1.6819202695115103, "grad_norm": 0.31028953194618225, "learning_rate": 4.813743913576852e-06, "loss": 0.4505, "step": 1997 }, { "epoch": 1.682762492981471, "grad_norm": 0.24616169929504395, "learning_rate": 4.808844796381205e-06, "loss": 0.503, "step": 1998 }, { "epoch": 1.6836047164514318, "grad_norm": 0.22284731268882751, "learning_rate": 4.803945862966646e-06, "loss": 0.4236, "step": 1999 }, { "epoch": 1.6844469399213926, "grad_norm": 0.2825278043746948, "learning_rate": 4.799047118043126e-06, "loss": 0.4983, "step": 2000 }, { "epoch": 1.6852891633913532, "grad_norm": 0.23764850199222565, "learning_rate": 4.794148566320412e-06, "loss": 0.4848, "step": 2001 }, { "epoch": 1.686131386861314, "grad_norm": 0.22966782748699188, "learning_rate": 4.789250212508088e-06, "loss": 0.4476, "step": 2002 }, { "epoch": 1.6869736103312745, "grad_norm": 0.2365970015525818, "learning_rate": 4.7843520613155434e-06, "loss": 0.5025, "step": 2003 }, { "epoch": 1.6878158338012352, "grad_norm": 0.2227955311536789, "learning_rate": 4.779454117451978e-06, "loss": 0.4778, "step": 2004 }, { "epoch": 1.688658057271196, "grad_norm": 0.22719447314739227, "learning_rate": 4.774556385626386e-06, "loss": 0.4855, "step": 2005 }, { "epoch": 1.6895002807411568, "grad_norm": 0.21644869446754456, "learning_rate": 4.769658870547567e-06, "loss": 0.4265, "step": 2006 }, { "epoch": 1.6903425042111173, "grad_norm": 0.22600358724594116, "learning_rate": 4.7647615769241e-06, "loss": 0.4652, "step": 2007 }, { "epoch": 1.691184727681078, "grad_norm": 0.23710064589977264, "learning_rate": 4.759864509464366e-06, "loss": 0.4509, "step": 2008 }, { "epoch": 1.6920269511510386, "grad_norm": 0.21621251106262207, "learning_rate": 4.754967672876513e-06, "loss": 0.4729, "step": 2009 }, { "epoch": 1.6928691746209994, "grad_norm": 0.2685050666332245, "learning_rate": 4.750071071868478e-06, "loss": 0.4932, "step": 2010 }, { "epoch": 1.6937113980909602, "grad_norm": 0.24373380839824677, "learning_rate": 4.745174711147967e-06, "loss": 0.4676, "step": 2011 }, { "epoch": 1.694553621560921, "grad_norm": 0.27241477370262146, "learning_rate": 4.7402785954224565e-06, "loss": 0.4854, "step": 2012 }, { "epoch": 1.6953958450308815, "grad_norm": 0.24667201936244965, "learning_rate": 4.7353827293991845e-06, "loss": 0.4803, "step": 2013 }, { "epoch": 1.6962380685008422, "grad_norm": 0.23230643570423126, "learning_rate": 4.730487117785155e-06, "loss": 0.4529, "step": 2014 }, { "epoch": 1.6970802919708028, "grad_norm": 0.2781843841075897, "learning_rate": 4.725591765287119e-06, "loss": 0.5175, "step": 2015 }, { "epoch": 1.6979225154407636, "grad_norm": 0.2518756687641144, "learning_rate": 4.720696676611589e-06, "loss": 0.4209, "step": 2016 }, { "epoch": 1.6987647389107243, "grad_norm": 0.29593271017074585, "learning_rate": 4.715801856464812e-06, "loss": 0.5124, "step": 2017 }, { "epoch": 1.699606962380685, "grad_norm": 0.2651813328266144, "learning_rate": 4.710907309552787e-06, "loss": 0.4543, "step": 2018 }, { "epoch": 1.7004491858506459, "grad_norm": 0.27470842003822327, "learning_rate": 4.706013040581242e-06, "loss": 0.4965, "step": 2019 }, { "epoch": 1.7012914093206064, "grad_norm": 0.25635066628456116, "learning_rate": 4.701119054255646e-06, "loss": 0.4637, "step": 2020 }, { "epoch": 1.702133632790567, "grad_norm": 0.23567962646484375, "learning_rate": 4.6962253552811885e-06, "loss": 0.4667, "step": 2021 }, { "epoch": 1.7029758562605277, "grad_norm": 0.21448321640491486, "learning_rate": 4.691331948362789e-06, "loss": 0.4076, "step": 2022 }, { "epoch": 1.7038180797304885, "grad_norm": 0.25385233759880066, "learning_rate": 4.6864388382050804e-06, "loss": 0.5075, "step": 2023 }, { "epoch": 1.7046603032004493, "grad_norm": 0.2564161717891693, "learning_rate": 4.6815460295124185e-06, "loss": 0.4984, "step": 2024 }, { "epoch": 1.70550252667041, "grad_norm": 0.2187940776348114, "learning_rate": 4.676653526988858e-06, "loss": 0.4506, "step": 2025 }, { "epoch": 1.7063447501403706, "grad_norm": 0.23352238535881042, "learning_rate": 4.671761335338171e-06, "loss": 0.4711, "step": 2026 }, { "epoch": 1.7071869736103311, "grad_norm": 0.24008113145828247, "learning_rate": 4.666869459263821e-06, "loss": 0.4996, "step": 2027 }, { "epoch": 1.7080291970802919, "grad_norm": 0.2463906854391098, "learning_rate": 4.661977903468974e-06, "loss": 0.4615, "step": 2028 }, { "epoch": 1.7088714205502527, "grad_norm": 0.2436271458864212, "learning_rate": 4.657086672656486e-06, "loss": 0.428, "step": 2029 }, { "epoch": 1.7097136440202134, "grad_norm": 0.24676041305065155, "learning_rate": 4.652195771528901e-06, "loss": 0.4834, "step": 2030 }, { "epoch": 1.7105558674901742, "grad_norm": 0.2483028918504715, "learning_rate": 4.647305204788445e-06, "loss": 0.4775, "step": 2031 }, { "epoch": 1.7113980909601347, "grad_norm": 0.2748636305332184, "learning_rate": 4.642414977137026e-06, "loss": 0.5018, "step": 2032 }, { "epoch": 1.7122403144300955, "grad_norm": 0.23401370644569397, "learning_rate": 4.63752509327622e-06, "loss": 0.4492, "step": 2033 }, { "epoch": 1.713082537900056, "grad_norm": 0.22928811609745026, "learning_rate": 4.632635557907277e-06, "loss": 0.4786, "step": 2034 }, { "epoch": 1.7139247613700168, "grad_norm": 0.28120723366737366, "learning_rate": 4.627746375731112e-06, "loss": 0.5378, "step": 2035 }, { "epoch": 1.7147669848399776, "grad_norm": 0.27036112546920776, "learning_rate": 4.622857551448297e-06, "loss": 0.4508, "step": 2036 }, { "epoch": 1.7156092083099383, "grad_norm": 0.25437602400779724, "learning_rate": 4.617969089759066e-06, "loss": 0.4526, "step": 2037 }, { "epoch": 1.716451431779899, "grad_norm": 0.2434171885251999, "learning_rate": 4.613080995363296e-06, "loss": 0.5213, "step": 2038 }, { "epoch": 1.7172936552498597, "grad_norm": 0.21452771127223969, "learning_rate": 4.608193272960519e-06, "loss": 0.4567, "step": 2039 }, { "epoch": 1.7181358787198202, "grad_norm": 0.2598097324371338, "learning_rate": 4.603305927249902e-06, "loss": 0.4664, "step": 2040 }, { "epoch": 1.718978102189781, "grad_norm": 0.244187131524086, "learning_rate": 4.598418962930258e-06, "loss": 0.4939, "step": 2041 }, { "epoch": 1.7198203256597417, "grad_norm": 0.2551167905330658, "learning_rate": 4.593532384700026e-06, "loss": 0.5373, "step": 2042 }, { "epoch": 1.7206625491297025, "grad_norm": 0.2712896168231964, "learning_rate": 4.588646197257278e-06, "loss": 0.5195, "step": 2043 }, { "epoch": 1.721504772599663, "grad_norm": 0.20474348962306976, "learning_rate": 4.583760405299707e-06, "loss": 0.4498, "step": 2044 }, { "epoch": 1.7223469960696238, "grad_norm": 0.23961833119392395, "learning_rate": 4.57887501352463e-06, "loss": 0.4466, "step": 2045 }, { "epoch": 1.7231892195395844, "grad_norm": 0.25548139214515686, "learning_rate": 4.573990026628976e-06, "loss": 0.4887, "step": 2046 }, { "epoch": 1.7240314430095451, "grad_norm": 0.22290314733982086, "learning_rate": 4.569105449309289e-06, "loss": 0.4859, "step": 2047 }, { "epoch": 1.724873666479506, "grad_norm": 0.23146553337574005, "learning_rate": 4.564221286261709e-06, "loss": 0.4323, "step": 2048 }, { "epoch": 1.7257158899494667, "grad_norm": 0.2553330659866333, "learning_rate": 4.559337542181993e-06, "loss": 0.5264, "step": 2049 }, { "epoch": 1.7265581134194274, "grad_norm": 0.22537465393543243, "learning_rate": 4.554454221765479e-06, "loss": 0.4312, "step": 2050 }, { "epoch": 1.727400336889388, "grad_norm": 0.23263882100582123, "learning_rate": 4.549571329707113e-06, "loss": 0.4629, "step": 2051 }, { "epoch": 1.7282425603593485, "grad_norm": 0.24165654182434082, "learning_rate": 4.544688870701416e-06, "loss": 0.4585, "step": 2052 }, { "epoch": 1.7290847838293093, "grad_norm": 0.26465511322021484, "learning_rate": 4.539806849442501e-06, "loss": 0.4553, "step": 2053 }, { "epoch": 1.72992700729927, "grad_norm": 0.2517217695713043, "learning_rate": 4.534925270624057e-06, "loss": 0.5164, "step": 2054 }, { "epoch": 1.7307692307692308, "grad_norm": 0.23587463796138763, "learning_rate": 4.53004413893935e-06, "loss": 0.4587, "step": 2055 }, { "epoch": 1.7316114542391916, "grad_norm": 0.2451951652765274, "learning_rate": 4.52516345908121e-06, "loss": 0.5025, "step": 2056 }, { "epoch": 1.7324536777091522, "grad_norm": 0.25360938906669617, "learning_rate": 4.520283235742042e-06, "loss": 0.525, "step": 2057 }, { "epoch": 1.7332959011791127, "grad_norm": 0.23567134141921997, "learning_rate": 4.5154034736138035e-06, "loss": 0.4415, "step": 2058 }, { "epoch": 1.7341381246490735, "grad_norm": 0.2337888926267624, "learning_rate": 4.510524177388014e-06, "loss": 0.4976, "step": 2059 }, { "epoch": 1.7349803481190342, "grad_norm": 0.23685359954833984, "learning_rate": 4.505645351755741e-06, "loss": 0.4825, "step": 2060 }, { "epoch": 1.735822571588995, "grad_norm": 0.24533124268054962, "learning_rate": 4.500767001407604e-06, "loss": 0.5193, "step": 2061 }, { "epoch": 1.7366647950589558, "grad_norm": 0.23314377665519714, "learning_rate": 4.495889131033762e-06, "loss": 0.4659, "step": 2062 }, { "epoch": 1.7375070185289163, "grad_norm": 0.24723856151103973, "learning_rate": 4.491011745323914e-06, "loss": 0.4526, "step": 2063 }, { "epoch": 1.738349241998877, "grad_norm": 0.26352548599243164, "learning_rate": 4.486134848967292e-06, "loss": 0.4489, "step": 2064 }, { "epoch": 1.7391914654688376, "grad_norm": 0.2554502785205841, "learning_rate": 4.481258446652662e-06, "loss": 0.487, "step": 2065 }, { "epoch": 1.7400336889387984, "grad_norm": 0.24739980697631836, "learning_rate": 4.4763825430683055e-06, "loss": 0.4769, "step": 2066 }, { "epoch": 1.7408759124087592, "grad_norm": 0.24096263945102692, "learning_rate": 4.471507142902036e-06, "loss": 0.4593, "step": 2067 }, { "epoch": 1.74171813587872, "grad_norm": 0.2620318830013275, "learning_rate": 4.466632250841173e-06, "loss": 0.4748, "step": 2068 }, { "epoch": 1.7425603593486805, "grad_norm": 0.2682340145111084, "learning_rate": 4.4617578715725565e-06, "loss": 0.4868, "step": 2069 }, { "epoch": 1.7434025828186412, "grad_norm": 0.2485581338405609, "learning_rate": 4.4568840097825225e-06, "loss": 0.4869, "step": 2070 }, { "epoch": 1.7442448062886018, "grad_norm": 0.2229103147983551, "learning_rate": 4.452010670156922e-06, "loss": 0.43, "step": 2071 }, { "epoch": 1.7450870297585626, "grad_norm": 0.23545841872692108, "learning_rate": 4.447137857381095e-06, "loss": 0.4877, "step": 2072 }, { "epoch": 1.7459292532285233, "grad_norm": 0.22514350712299347, "learning_rate": 4.4422655761398785e-06, "loss": 0.468, "step": 2073 }, { "epoch": 1.746771476698484, "grad_norm": 0.2534525394439697, "learning_rate": 4.437393831117596e-06, "loss": 0.5293, "step": 2074 }, { "epoch": 1.7476137001684446, "grad_norm": 0.2523089349269867, "learning_rate": 4.432522626998061e-06, "loss": 0.4956, "step": 2075 }, { "epoch": 1.7484559236384054, "grad_norm": 0.22219009697437286, "learning_rate": 4.427651968464559e-06, "loss": 0.4776, "step": 2076 }, { "epoch": 1.749298147108366, "grad_norm": 0.21809135377407074, "learning_rate": 4.4227818601998575e-06, "loss": 0.4218, "step": 2077 }, { "epoch": 1.7501403705783267, "grad_norm": 0.23955988883972168, "learning_rate": 4.417912306886192e-06, "loss": 0.4661, "step": 2078 }, { "epoch": 1.7509825940482875, "grad_norm": 0.2701292932033539, "learning_rate": 4.413043313205266e-06, "loss": 0.4883, "step": 2079 }, { "epoch": 1.7518248175182483, "grad_norm": 0.250299870967865, "learning_rate": 4.408174883838243e-06, "loss": 0.4308, "step": 2080 }, { "epoch": 1.752667040988209, "grad_norm": 0.24872496724128723, "learning_rate": 4.403307023465746e-06, "loss": 0.4585, "step": 2081 }, { "epoch": 1.7535092644581696, "grad_norm": 0.25244420766830444, "learning_rate": 4.3984397367678475e-06, "loss": 0.5064, "step": 2082 }, { "epoch": 1.7543514879281301, "grad_norm": 0.2547873854637146, "learning_rate": 4.393573028424075e-06, "loss": 0.4763, "step": 2083 }, { "epoch": 1.7551937113980909, "grad_norm": 0.26858794689178467, "learning_rate": 4.388706903113391e-06, "loss": 0.4781, "step": 2084 }, { "epoch": 1.7560359348680517, "grad_norm": 0.24431651830673218, "learning_rate": 4.383841365514208e-06, "loss": 0.4941, "step": 2085 }, { "epoch": 1.7568781583380124, "grad_norm": 0.25946319103240967, "learning_rate": 4.378976420304361e-06, "loss": 0.5239, "step": 2086 }, { "epoch": 1.7577203818079732, "grad_norm": 0.250829815864563, "learning_rate": 4.374112072161129e-06, "loss": 0.4676, "step": 2087 }, { "epoch": 1.7585626052779337, "grad_norm": 0.22532255947589874, "learning_rate": 4.369248325761205e-06, "loss": 0.4319, "step": 2088 }, { "epoch": 1.7594048287478943, "grad_norm": 0.2696113884449005, "learning_rate": 4.364385185780712e-06, "loss": 0.475, "step": 2089 }, { "epoch": 1.760247052217855, "grad_norm": 0.22161316871643066, "learning_rate": 4.359522656895185e-06, "loss": 0.3971, "step": 2090 }, { "epoch": 1.7610892756878158, "grad_norm": 0.25674107670783997, "learning_rate": 4.354660743779575e-06, "loss": 0.5257, "step": 2091 }, { "epoch": 1.7619314991577766, "grad_norm": 0.24941223859786987, "learning_rate": 4.349799451108236e-06, "loss": 0.4811, "step": 2092 }, { "epoch": 1.7627737226277373, "grad_norm": 0.2398083209991455, "learning_rate": 4.3449387835549305e-06, "loss": 0.4787, "step": 2093 }, { "epoch": 1.763615946097698, "grad_norm": 0.23516356945037842, "learning_rate": 4.340078745792818e-06, "loss": 0.4475, "step": 2094 }, { "epoch": 1.7644581695676587, "grad_norm": 0.23486120998859406, "learning_rate": 4.3352193424944535e-06, "loss": 0.4876, "step": 2095 }, { "epoch": 1.7653003930376192, "grad_norm": 0.2414945662021637, "learning_rate": 4.3303605783317794e-06, "loss": 0.4678, "step": 2096 }, { "epoch": 1.76614261650758, "grad_norm": 0.24568647146224976, "learning_rate": 4.325502457976126e-06, "loss": 0.4608, "step": 2097 }, { "epoch": 1.7669848399775407, "grad_norm": 0.25279858708381653, "learning_rate": 4.320644986098204e-06, "loss": 0.522, "step": 2098 }, { "epoch": 1.7678270634475015, "grad_norm": 0.22954513132572174, "learning_rate": 4.315788167368102e-06, "loss": 0.4468, "step": 2099 }, { "epoch": 1.768669286917462, "grad_norm": 0.2170470505952835, "learning_rate": 4.310932006455276e-06, "loss": 0.423, "step": 2100 }, { "epoch": 1.7695115103874228, "grad_norm": 0.24672669172286987, "learning_rate": 4.306076508028557e-06, "loss": 0.4512, "step": 2101 }, { "epoch": 1.7703537338573834, "grad_norm": 0.2549592852592468, "learning_rate": 4.301221676756129e-06, "loss": 0.4742, "step": 2102 }, { "epoch": 1.7711959573273441, "grad_norm": 0.2782382369041443, "learning_rate": 4.296367517305548e-06, "loss": 0.4607, "step": 2103 }, { "epoch": 1.772038180797305, "grad_norm": 0.23844127357006073, "learning_rate": 4.29151403434371e-06, "loss": 0.4285, "step": 2104 }, { "epoch": 1.7728804042672657, "grad_norm": 0.2491370588541031, "learning_rate": 4.286661232536873e-06, "loss": 0.4563, "step": 2105 }, { "epoch": 1.7737226277372264, "grad_norm": 0.24884110689163208, "learning_rate": 4.281809116550629e-06, "loss": 0.4761, "step": 2106 }, { "epoch": 1.774564851207187, "grad_norm": 0.21792751550674438, "learning_rate": 4.276957691049917e-06, "loss": 0.4148, "step": 2107 }, { "epoch": 1.7754070746771475, "grad_norm": 0.24464136362075806, "learning_rate": 4.272106960699015e-06, "loss": 0.5115, "step": 2108 }, { "epoch": 1.7762492981471083, "grad_norm": 0.21597449481487274, "learning_rate": 4.267256930161523e-06, "loss": 0.4221, "step": 2109 }, { "epoch": 1.777091521617069, "grad_norm": 0.25279051065444946, "learning_rate": 4.2624076041003794e-06, "loss": 0.4935, "step": 2110 }, { "epoch": 1.7779337450870298, "grad_norm": 0.22367523610591888, "learning_rate": 4.257558987177835e-06, "loss": 0.4502, "step": 2111 }, { "epoch": 1.7787759685569906, "grad_norm": 0.258608341217041, "learning_rate": 4.252711084055468e-06, "loss": 0.4831, "step": 2112 }, { "epoch": 1.7796181920269512, "grad_norm": 0.23281006515026093, "learning_rate": 4.247863899394162e-06, "loss": 0.4532, "step": 2113 }, { "epoch": 1.7804604154969117, "grad_norm": 0.24901798367500305, "learning_rate": 4.243017437854117e-06, "loss": 0.508, "step": 2114 }, { "epoch": 1.7813026389668725, "grad_norm": 0.25218674540519714, "learning_rate": 4.238171704094833e-06, "loss": 0.4417, "step": 2115 }, { "epoch": 1.7821448624368332, "grad_norm": 0.22459886968135834, "learning_rate": 4.2333267027751125e-06, "loss": 0.4471, "step": 2116 }, { "epoch": 1.782987085906794, "grad_norm": 0.22819103300571442, "learning_rate": 4.228482438553052e-06, "loss": 0.4955, "step": 2117 }, { "epoch": 1.7838293093767548, "grad_norm": 0.2266789674758911, "learning_rate": 4.223638916086044e-06, "loss": 0.465, "step": 2118 }, { "epoch": 1.7846715328467153, "grad_norm": 0.2116471230983734, "learning_rate": 4.218796140030759e-06, "loss": 0.427, "step": 2119 }, { "epoch": 1.7855137563166759, "grad_norm": 0.23195287585258484, "learning_rate": 4.21395411504316e-06, "loss": 0.4762, "step": 2120 }, { "epoch": 1.7863559797866366, "grad_norm": 0.23150375485420227, "learning_rate": 4.209112845778481e-06, "loss": 0.462, "step": 2121 }, { "epoch": 1.7871982032565974, "grad_norm": 0.26087597012519836, "learning_rate": 4.204272336891232e-06, "loss": 0.4616, "step": 2122 }, { "epoch": 1.7880404267265582, "grad_norm": 0.2432989478111267, "learning_rate": 4.199432593035192e-06, "loss": 0.5056, "step": 2123 }, { "epoch": 1.788882650196519, "grad_norm": 0.22448651492595673, "learning_rate": 4.194593618863404e-06, "loss": 0.4457, "step": 2124 }, { "epoch": 1.7897248736664795, "grad_norm": 0.2513195276260376, "learning_rate": 4.189755419028169e-06, "loss": 0.522, "step": 2125 }, { "epoch": 1.7905670971364402, "grad_norm": 0.24185556173324585, "learning_rate": 4.1849179981810506e-06, "loss": 0.5066, "step": 2126 }, { "epoch": 1.7914093206064008, "grad_norm": 0.24166759848594666, "learning_rate": 4.180081360972852e-06, "loss": 0.4398, "step": 2127 }, { "epoch": 1.7922515440763616, "grad_norm": 0.23057276010513306, "learning_rate": 4.175245512053637e-06, "loss": 0.4663, "step": 2128 }, { "epoch": 1.7930937675463223, "grad_norm": 0.25396135449409485, "learning_rate": 4.1704104560726955e-06, "loss": 0.4771, "step": 2129 }, { "epoch": 1.793935991016283, "grad_norm": 0.2549886107444763, "learning_rate": 4.165576197678571e-06, "loss": 0.4981, "step": 2130 }, { "epoch": 1.7947782144862436, "grad_norm": 0.2168891727924347, "learning_rate": 4.160742741519028e-06, "loss": 0.4546, "step": 2131 }, { "epoch": 1.7956204379562044, "grad_norm": 0.24747246503829956, "learning_rate": 4.1559100922410665e-06, "loss": 0.4694, "step": 2132 }, { "epoch": 1.796462661426165, "grad_norm": 0.27652689814567566, "learning_rate": 4.151078254490908e-06, "loss": 0.4658, "step": 2133 }, { "epoch": 1.7973048848961257, "grad_norm": 0.2780439853668213, "learning_rate": 4.146247232913996e-06, "loss": 0.4695, "step": 2134 }, { "epoch": 1.7981471083660865, "grad_norm": 0.2576582729816437, "learning_rate": 4.141417032154984e-06, "loss": 0.4919, "step": 2135 }, { "epoch": 1.7989893318360473, "grad_norm": 0.23514126241207123, "learning_rate": 4.136587656857744e-06, "loss": 0.4836, "step": 2136 }, { "epoch": 1.799831555306008, "grad_norm": 0.23796723783016205, "learning_rate": 4.131759111665349e-06, "loss": 0.4574, "step": 2137 }, { "epoch": 1.8006737787759686, "grad_norm": 0.2572718858718872, "learning_rate": 4.126931401220075e-06, "loss": 0.4885, "step": 2138 }, { "epoch": 1.8015160022459291, "grad_norm": 0.24792805314064026, "learning_rate": 4.122104530163397e-06, "loss": 0.4758, "step": 2139 }, { "epoch": 1.8023582257158899, "grad_norm": 0.278056263923645, "learning_rate": 4.117278503135981e-06, "loss": 0.4688, "step": 2140 }, { "epoch": 1.8032004491858507, "grad_norm": 0.24211832880973816, "learning_rate": 4.112453324777683e-06, "loss": 0.4336, "step": 2141 }, { "epoch": 1.8040426726558114, "grad_norm": 0.26367872953414917, "learning_rate": 4.107628999727542e-06, "loss": 0.5514, "step": 2142 }, { "epoch": 1.8048848961257722, "grad_norm": 0.23210996389389038, "learning_rate": 4.102805532623775e-06, "loss": 0.441, "step": 2143 }, { "epoch": 1.8057271195957327, "grad_norm": 0.23231682181358337, "learning_rate": 4.097982928103782e-06, "loss": 0.4724, "step": 2144 }, { "epoch": 1.8065693430656933, "grad_norm": 0.2784189283847809, "learning_rate": 4.09316119080412e-06, "loss": 0.5143, "step": 2145 }, { "epoch": 1.807411566535654, "grad_norm": 0.23337194323539734, "learning_rate": 4.088340325360529e-06, "loss": 0.3979, "step": 2146 }, { "epoch": 1.8082537900056148, "grad_norm": 0.23599334061145782, "learning_rate": 4.083520336407894e-06, "loss": 0.4842, "step": 2147 }, { "epoch": 1.8090960134755756, "grad_norm": 0.23311544954776764, "learning_rate": 4.0787012285802695e-06, "loss": 0.4594, "step": 2148 }, { "epoch": 1.8099382369455363, "grad_norm": 0.22791731357574463, "learning_rate": 4.073883006510858e-06, "loss": 0.4871, "step": 2149 }, { "epoch": 1.810780460415497, "grad_norm": 0.23026038706302643, "learning_rate": 4.069065674832011e-06, "loss": 0.4481, "step": 2150 }, { "epoch": 1.8116226838854577, "grad_norm": 0.22687843441963196, "learning_rate": 4.064249238175223e-06, "loss": 0.5196, "step": 2151 }, { "epoch": 1.8124649073554182, "grad_norm": 0.2442893236875534, "learning_rate": 4.059433701171131e-06, "loss": 0.5139, "step": 2152 }, { "epoch": 1.813307130825379, "grad_norm": 0.26727378368377686, "learning_rate": 4.054619068449502e-06, "loss": 0.4818, "step": 2153 }, { "epoch": 1.8141493542953397, "grad_norm": 0.26312392950057983, "learning_rate": 4.04980534463924e-06, "loss": 0.5232, "step": 2154 }, { "epoch": 1.8149915777653005, "grad_norm": 0.2423468679189682, "learning_rate": 4.044992534368369e-06, "loss": 0.4643, "step": 2155 }, { "epoch": 1.815833801235261, "grad_norm": 0.24071694910526276, "learning_rate": 4.04018064226404e-06, "loss": 0.45, "step": 2156 }, { "epoch": 1.8166760247052218, "grad_norm": 0.24079975485801697, "learning_rate": 4.035369672952516e-06, "loss": 0.4518, "step": 2157 }, { "epoch": 1.8175182481751824, "grad_norm": 0.23359672725200653, "learning_rate": 4.030559631059179e-06, "loss": 0.4744, "step": 2158 }, { "epoch": 1.8183604716451431, "grad_norm": 0.24040605127811432, "learning_rate": 4.025750521208512e-06, "loss": 0.4826, "step": 2159 }, { "epoch": 1.819202695115104, "grad_norm": 0.25223496556282043, "learning_rate": 4.020942348024108e-06, "loss": 0.5251, "step": 2160 }, { "epoch": 1.8200449185850647, "grad_norm": 0.2450094372034073, "learning_rate": 4.016135116128656e-06, "loss": 0.4558, "step": 2161 }, { "epoch": 1.8208871420550252, "grad_norm": 0.23229603469371796, "learning_rate": 4.011328830143945e-06, "loss": 0.4248, "step": 2162 }, { "epoch": 1.821729365524986, "grad_norm": 0.22150273621082306, "learning_rate": 4.0065234946908456e-06, "loss": 0.4466, "step": 2163 }, { "epoch": 1.8225715889949465, "grad_norm": 0.24507595598697662, "learning_rate": 4.001719114389325e-06, "loss": 0.4768, "step": 2164 }, { "epoch": 1.8234138124649073, "grad_norm": 0.25812336802482605, "learning_rate": 3.996915693858422e-06, "loss": 0.5185, "step": 2165 }, { "epoch": 1.824256035934868, "grad_norm": 0.23018379509449005, "learning_rate": 3.992113237716261e-06, "loss": 0.4507, "step": 2166 }, { "epoch": 1.8250982594048288, "grad_norm": 0.2388211041688919, "learning_rate": 3.987311750580035e-06, "loss": 0.4615, "step": 2167 }, { "epoch": 1.8259404828747896, "grad_norm": 0.2227044254541397, "learning_rate": 3.9825112370660055e-06, "loss": 0.4461, "step": 2168 }, { "epoch": 1.8267827063447502, "grad_norm": 0.2286769449710846, "learning_rate": 3.977711701789499e-06, "loss": 0.4789, "step": 2169 }, { "epoch": 1.8276249298147107, "grad_norm": 0.2515222430229187, "learning_rate": 3.972913149364902e-06, "loss": 0.5005, "step": 2170 }, { "epoch": 1.8284671532846715, "grad_norm": 0.2341202348470688, "learning_rate": 3.9681155844056525e-06, "loss": 0.4563, "step": 2171 }, { "epoch": 1.8293093767546322, "grad_norm": 0.23173372447490692, "learning_rate": 3.963319011524246e-06, "loss": 0.4592, "step": 2172 }, { "epoch": 1.830151600224593, "grad_norm": 0.23577053844928741, "learning_rate": 3.9585234353322155e-06, "loss": 0.4863, "step": 2173 }, { "epoch": 1.8309938236945538, "grad_norm": 0.24397295713424683, "learning_rate": 3.953728860440144e-06, "loss": 0.4486, "step": 2174 }, { "epoch": 1.8318360471645143, "grad_norm": 0.22254842519760132, "learning_rate": 3.948935291457645e-06, "loss": 0.4133, "step": 2175 }, { "epoch": 1.8326782706344749, "grad_norm": 0.23800231516361237, "learning_rate": 3.94414273299337e-06, "loss": 0.4579, "step": 2176 }, { "epoch": 1.8335204941044356, "grad_norm": 0.2660507559776306, "learning_rate": 3.939351189654996e-06, "loss": 0.4755, "step": 2177 }, { "epoch": 1.8343627175743964, "grad_norm": 0.2282324731349945, "learning_rate": 3.934560666049226e-06, "loss": 0.4631, "step": 2178 }, { "epoch": 1.8352049410443572, "grad_norm": 0.20728914439678192, "learning_rate": 3.929771166781781e-06, "loss": 0.4182, "step": 2179 }, { "epoch": 1.836047164514318, "grad_norm": 0.28055211901664734, "learning_rate": 3.9249826964573965e-06, "loss": 0.509, "step": 2180 }, { "epoch": 1.8368893879842785, "grad_norm": 0.24710287153720856, "learning_rate": 3.920195259679822e-06, "loss": 0.4772, "step": 2181 }, { "epoch": 1.8377316114542392, "grad_norm": 0.24627083539962769, "learning_rate": 3.915408861051809e-06, "loss": 0.4629, "step": 2182 }, { "epoch": 1.8385738349241998, "grad_norm": 0.2388210892677307, "learning_rate": 3.910623505175116e-06, "loss": 0.4454, "step": 2183 }, { "epoch": 1.8394160583941606, "grad_norm": 0.23326429724693298, "learning_rate": 3.905839196650494e-06, "loss": 0.4918, "step": 2184 }, { "epoch": 1.8402582818641213, "grad_norm": 0.271761029958725, "learning_rate": 3.901055940077691e-06, "loss": 0.4653, "step": 2185 }, { "epoch": 1.841100505334082, "grad_norm": 0.2580558657646179, "learning_rate": 3.8962737400554395e-06, "loss": 0.4901, "step": 2186 }, { "epoch": 1.8419427288040426, "grad_norm": 0.2472505122423172, "learning_rate": 3.891492601181462e-06, "loss": 0.4804, "step": 2187 }, { "epoch": 1.8427849522740034, "grad_norm": 0.2213340699672699, "learning_rate": 3.8867125280524535e-06, "loss": 0.4683, "step": 2188 }, { "epoch": 1.843627175743964, "grad_norm": 0.23129644989967346, "learning_rate": 3.881933525264092e-06, "loss": 0.4425, "step": 2189 }, { "epoch": 1.8444693992139247, "grad_norm": 0.2563159763813019, "learning_rate": 3.877155597411019e-06, "loss": 0.5367, "step": 2190 }, { "epoch": 1.8453116226838855, "grad_norm": 0.24226200580596924, "learning_rate": 3.87237874908685e-06, "loss": 0.4324, "step": 2191 }, { "epoch": 1.8461538461538463, "grad_norm": 0.28105345368385315, "learning_rate": 3.867602984884155e-06, "loss": 0.5196, "step": 2192 }, { "epoch": 1.8469960696238068, "grad_norm": 0.25595995783805847, "learning_rate": 3.862828309394469e-06, "loss": 0.4447, "step": 2193 }, { "epoch": 1.8478382930937676, "grad_norm": 0.21991926431655884, "learning_rate": 3.8580547272082746e-06, "loss": 0.4306, "step": 2194 }, { "epoch": 1.8486805165637281, "grad_norm": 0.2312726229429245, "learning_rate": 3.853282242915007e-06, "loss": 0.4752, "step": 2195 }, { "epoch": 1.8495227400336889, "grad_norm": 0.24545201659202576, "learning_rate": 3.8485108611030415e-06, "loss": 0.4903, "step": 2196 }, { "epoch": 1.8503649635036497, "grad_norm": 0.23864033818244934, "learning_rate": 3.843740586359701e-06, "loss": 0.4566, "step": 2197 }, { "epoch": 1.8512071869736104, "grad_norm": 0.23080633580684662, "learning_rate": 3.8389714232712346e-06, "loss": 0.4546, "step": 2198 }, { "epoch": 1.8520494104435712, "grad_norm": 0.28249937295913696, "learning_rate": 3.834203376422831e-06, "loss": 0.4358, "step": 2199 }, { "epoch": 1.8528916339135317, "grad_norm": 0.23615585267543793, "learning_rate": 3.829436450398599e-06, "loss": 0.4583, "step": 2200 }, { "epoch": 1.8537338573834923, "grad_norm": 0.22761037945747375, "learning_rate": 3.824670649781576e-06, "loss": 0.4946, "step": 2201 }, { "epoch": 1.854576080853453, "grad_norm": 0.23420101404190063, "learning_rate": 3.8199059791537105e-06, "loss": 0.4528, "step": 2202 }, { "epoch": 1.8554183043234138, "grad_norm": 0.23464335501194, "learning_rate": 3.815142443095873e-06, "loss": 0.4801, "step": 2203 }, { "epoch": 1.8562605277933746, "grad_norm": 0.2349872589111328, "learning_rate": 3.8103800461878344e-06, "loss": 0.4708, "step": 2204 }, { "epoch": 1.8571027512633353, "grad_norm": 0.24585379660129547, "learning_rate": 3.805618793008279e-06, "loss": 0.4748, "step": 2205 }, { "epoch": 1.857944974733296, "grad_norm": 0.24986626207828522, "learning_rate": 3.8008586881347815e-06, "loss": 0.4754, "step": 2206 }, { "epoch": 1.8587871982032564, "grad_norm": 0.23265676200389862, "learning_rate": 3.7960997361438235e-06, "loss": 0.4393, "step": 2207 }, { "epoch": 1.8596294216732172, "grad_norm": 0.22677557170391083, "learning_rate": 3.7913419416107692e-06, "loss": 0.4746, "step": 2208 }, { "epoch": 1.860471645143178, "grad_norm": 0.23419958353042603, "learning_rate": 3.786585309109877e-06, "loss": 0.4495, "step": 2209 }, { "epoch": 1.8613138686131387, "grad_norm": 0.24716877937316895, "learning_rate": 3.7818298432142814e-06, "loss": 0.4985, "step": 2210 }, { "epoch": 1.8621560920830995, "grad_norm": 0.2356598675251007, "learning_rate": 3.777075548496001e-06, "loss": 0.487, "step": 2211 }, { "epoch": 1.86299831555306, "grad_norm": 0.26035091280937195, "learning_rate": 3.7723224295259247e-06, "loss": 0.4629, "step": 2212 }, { "epoch": 1.8638405390230208, "grad_norm": 0.2498939335346222, "learning_rate": 3.7675704908738136e-06, "loss": 0.4886, "step": 2213 }, { "epoch": 1.8646827624929814, "grad_norm": 0.21181143820285797, "learning_rate": 3.7628197371082916e-06, "loss": 0.4207, "step": 2214 }, { "epoch": 1.8655249859629421, "grad_norm": 0.23479174077510834, "learning_rate": 3.758070172796846e-06, "loss": 0.4918, "step": 2215 }, { "epoch": 1.866367209432903, "grad_norm": 0.24509906768798828, "learning_rate": 3.753321802505817e-06, "loss": 0.5305, "step": 2216 }, { "epoch": 1.8672094329028637, "grad_norm": 0.27460622787475586, "learning_rate": 3.7485746308004013e-06, "loss": 0.4505, "step": 2217 }, { "epoch": 1.8680516563728242, "grad_norm": 0.25578269362449646, "learning_rate": 3.743828662244639e-06, "loss": 0.5221, "step": 2218 }, { "epoch": 1.868893879842785, "grad_norm": 0.2236570119857788, "learning_rate": 3.739083901401418e-06, "loss": 0.435, "step": 2219 }, { "epoch": 1.8697361033127455, "grad_norm": 0.2392781674861908, "learning_rate": 3.7343403528324574e-06, "loss": 0.485, "step": 2220 }, { "epoch": 1.8705783267827063, "grad_norm": 0.25104543566703796, "learning_rate": 3.7295980210983233e-06, "loss": 0.5052, "step": 2221 }, { "epoch": 1.871420550252667, "grad_norm": 0.23763924837112427, "learning_rate": 3.7248569107583976e-06, "loss": 0.4787, "step": 2222 }, { "epoch": 1.8722627737226278, "grad_norm": 0.21863232553005219, "learning_rate": 3.7201170263709004e-06, "loss": 0.4635, "step": 2223 }, { "epoch": 1.8731049971925884, "grad_norm": 0.2238796055316925, "learning_rate": 3.7153783724928617e-06, "loss": 0.4474, "step": 2224 }, { "epoch": 1.8739472206625492, "grad_norm": 0.24566030502319336, "learning_rate": 3.71064095368014e-06, "loss": 0.5063, "step": 2225 }, { "epoch": 1.8747894441325097, "grad_norm": 0.24029503762722015, "learning_rate": 3.705904774487396e-06, "loss": 0.4336, "step": 2226 }, { "epoch": 1.8756316676024705, "grad_norm": 0.259880393743515, "learning_rate": 3.7011698394681075e-06, "loss": 0.5107, "step": 2227 }, { "epoch": 1.8764738910724312, "grad_norm": 0.2057226449251175, "learning_rate": 3.696436153174548e-06, "loss": 0.4616, "step": 2228 }, { "epoch": 1.877316114542392, "grad_norm": 0.24683153629302979, "learning_rate": 3.6917037201577977e-06, "loss": 0.4594, "step": 2229 }, { "epoch": 1.8781583380123528, "grad_norm": 0.22176846861839294, "learning_rate": 3.6869725449677254e-06, "loss": 0.4736, "step": 2230 }, { "epoch": 1.8790005614823133, "grad_norm": 0.230673685669899, "learning_rate": 3.6822426321529967e-06, "loss": 0.4841, "step": 2231 }, { "epoch": 1.8798427849522739, "grad_norm": 0.2535287141799927, "learning_rate": 3.6775139862610577e-06, "loss": 0.4659, "step": 2232 }, { "epoch": 1.8806850084222346, "grad_norm": 0.21624226868152618, "learning_rate": 3.672786611838142e-06, "loss": 0.4295, "step": 2233 }, { "epoch": 1.8815272318921954, "grad_norm": 0.24784450232982635, "learning_rate": 3.668060513429256e-06, "loss": 0.4921, "step": 2234 }, { "epoch": 1.8823694553621562, "grad_norm": 0.2355220764875412, "learning_rate": 3.6633356955781827e-06, "loss": 0.4975, "step": 2235 }, { "epoch": 1.883211678832117, "grad_norm": 0.22048050165176392, "learning_rate": 3.658612162827472e-06, "loss": 0.47, "step": 2236 }, { "epoch": 1.8840539023020775, "grad_norm": 0.23743535578250885, "learning_rate": 3.653889919718439e-06, "loss": 0.4291, "step": 2237 }, { "epoch": 1.884896125772038, "grad_norm": 0.2380198836326599, "learning_rate": 3.649168970791157e-06, "loss": 0.4532, "step": 2238 }, { "epoch": 1.8857383492419988, "grad_norm": 0.22162163257598877, "learning_rate": 3.644449320584462e-06, "loss": 0.4588, "step": 2239 }, { "epoch": 1.8865805727119596, "grad_norm": 0.24349799752235413, "learning_rate": 3.639730973635929e-06, "loss": 0.5282, "step": 2240 }, { "epoch": 1.8874227961819203, "grad_norm": 0.22769512236118317, "learning_rate": 3.635013934481895e-06, "loss": 0.461, "step": 2241 }, { "epoch": 1.888265019651881, "grad_norm": 0.21868032217025757, "learning_rate": 3.6302982076574244e-06, "loss": 0.4369, "step": 2242 }, { "epoch": 1.8891072431218416, "grad_norm": 0.2511049211025238, "learning_rate": 3.6255837976963336e-06, "loss": 0.4575, "step": 2243 }, { "epoch": 1.8899494665918024, "grad_norm": 0.2520129084587097, "learning_rate": 3.620870709131163e-06, "loss": 0.4735, "step": 2244 }, { "epoch": 1.890791690061763, "grad_norm": 0.23167333006858826, "learning_rate": 3.616158946493188e-06, "loss": 0.4976, "step": 2245 }, { "epoch": 1.8916339135317237, "grad_norm": 0.21593600511550903, "learning_rate": 3.6114485143124068e-06, "loss": 0.4302, "step": 2246 }, { "epoch": 1.8924761370016845, "grad_norm": 0.2321697473526001, "learning_rate": 3.6067394171175397e-06, "loss": 0.4768, "step": 2247 }, { "epoch": 1.8933183604716453, "grad_norm": 0.23237182199954987, "learning_rate": 3.602031659436022e-06, "loss": 0.4783, "step": 2248 }, { "epoch": 1.8941605839416058, "grad_norm": 0.254102885723114, "learning_rate": 3.5973252457940034e-06, "loss": 0.4655, "step": 2249 }, { "epoch": 1.8950028074115666, "grad_norm": 0.2614903450012207, "learning_rate": 3.5926201807163384e-06, "loss": 0.4945, "step": 2250 }, { "epoch": 1.8958450308815271, "grad_norm": 0.24676762521266937, "learning_rate": 3.58791646872659e-06, "loss": 0.4951, "step": 2251 }, { "epoch": 1.8966872543514879, "grad_norm": 0.23170678317546844, "learning_rate": 3.5832141143470146e-06, "loss": 0.4687, "step": 2252 }, { "epoch": 1.8975294778214487, "grad_norm": 0.2281310260295868, "learning_rate": 3.578513122098566e-06, "loss": 0.4216, "step": 2253 }, { "epoch": 1.8983717012914094, "grad_norm": 0.2770254909992218, "learning_rate": 3.5738134965008885e-06, "loss": 0.515, "step": 2254 }, { "epoch": 1.89921392476137, "grad_norm": 0.2316131740808487, "learning_rate": 3.5691152420723115e-06, "loss": 0.4799, "step": 2255 }, { "epoch": 1.9000561482313307, "grad_norm": 0.2226591408252716, "learning_rate": 3.564418363329848e-06, "loss": 0.4612, "step": 2256 }, { "epoch": 1.9008983717012913, "grad_norm": 0.22595475614070892, "learning_rate": 3.559722864789187e-06, "loss": 0.4297, "step": 2257 }, { "epoch": 1.901740595171252, "grad_norm": 0.262628972530365, "learning_rate": 3.5550287509646902e-06, "loss": 0.4999, "step": 2258 }, { "epoch": 1.9025828186412128, "grad_norm": 0.21490174531936646, "learning_rate": 3.5503360263693887e-06, "loss": 0.4471, "step": 2259 }, { "epoch": 1.9034250421111736, "grad_norm": 0.24352744221687317, "learning_rate": 3.5456446955149783e-06, "loss": 0.4966, "step": 2260 }, { "epoch": 1.9042672655811343, "grad_norm": 0.2105543464422226, "learning_rate": 3.5409547629118124e-06, "loss": 0.4303, "step": 2261 }, { "epoch": 1.905109489051095, "grad_norm": 0.23113632202148438, "learning_rate": 3.5362662330689067e-06, "loss": 0.4756, "step": 2262 }, { "epoch": 1.9059517125210554, "grad_norm": 0.24818313121795654, "learning_rate": 3.531579110493917e-06, "loss": 0.4927, "step": 2263 }, { "epoch": 1.9067939359910162, "grad_norm": 0.23451022803783417, "learning_rate": 3.5268933996931596e-06, "loss": 0.4145, "step": 2264 }, { "epoch": 1.907636159460977, "grad_norm": 0.24803072214126587, "learning_rate": 3.5222091051715803e-06, "loss": 0.4714, "step": 2265 }, { "epoch": 1.9084783829309377, "grad_norm": 0.2636359632015228, "learning_rate": 3.517526231432775e-06, "loss": 0.4985, "step": 2266 }, { "epoch": 1.9093206064008985, "grad_norm": 0.22783705592155457, "learning_rate": 3.512844782978963e-06, "loss": 0.4476, "step": 2267 }, { "epoch": 1.910162829870859, "grad_norm": 0.2524937689304352, "learning_rate": 3.5081647643110028e-06, "loss": 0.4893, "step": 2268 }, { "epoch": 1.9110050533408196, "grad_norm": 0.22659601271152496, "learning_rate": 3.5034861799283713e-06, "loss": 0.4379, "step": 2269 }, { "epoch": 1.9118472768107804, "grad_norm": 0.21008813381195068, "learning_rate": 3.498809034329171e-06, "loss": 0.4601, "step": 2270 }, { "epoch": 1.9126895002807411, "grad_norm": 0.21374419331550598, "learning_rate": 3.4941333320101173e-06, "loss": 0.4506, "step": 2271 }, { "epoch": 1.913531723750702, "grad_norm": 0.28253480792045593, "learning_rate": 3.4894590774665414e-06, "loss": 0.4922, "step": 2272 }, { "epoch": 1.9143739472206627, "grad_norm": 0.24516262114048004, "learning_rate": 3.48478627519238e-06, "loss": 0.4562, "step": 2273 }, { "epoch": 1.9152161706906232, "grad_norm": 0.2221686691045761, "learning_rate": 3.480114929680176e-06, "loss": 0.473, "step": 2274 }, { "epoch": 1.916058394160584, "grad_norm": 0.2769831120967865, "learning_rate": 3.4754450454210686e-06, "loss": 0.512, "step": 2275 }, { "epoch": 1.9169006176305445, "grad_norm": 0.24484743177890778, "learning_rate": 3.470776626904795e-06, "loss": 0.4663, "step": 2276 }, { "epoch": 1.9177428411005053, "grad_norm": 0.23974156379699707, "learning_rate": 3.466109678619681e-06, "loss": 0.4831, "step": 2277 }, { "epoch": 1.918585064570466, "grad_norm": 0.23183496296405792, "learning_rate": 3.4614442050526424e-06, "loss": 0.4373, "step": 2278 }, { "epoch": 1.9194272880404268, "grad_norm": 0.26445215940475464, "learning_rate": 3.4567802106891724e-06, "loss": 0.5084, "step": 2279 }, { "epoch": 1.9202695115103874, "grad_norm": 0.24564138054847717, "learning_rate": 3.4521177000133456e-06, "loss": 0.4719, "step": 2280 }, { "epoch": 1.9211117349803482, "grad_norm": 0.26548781991004944, "learning_rate": 3.4474566775078055e-06, "loss": 0.4696, "step": 2281 }, { "epoch": 1.9219539584503087, "grad_norm": 0.2172420620918274, "learning_rate": 3.442797147653776e-06, "loss": 0.4136, "step": 2282 }, { "epoch": 1.9227961819202695, "grad_norm": 0.23656265437602997, "learning_rate": 3.4381391149310294e-06, "loss": 0.5098, "step": 2283 }, { "epoch": 1.9236384053902302, "grad_norm": 0.24886354804039001, "learning_rate": 3.4334825838179143e-06, "loss": 0.4915, "step": 2284 }, { "epoch": 1.924480628860191, "grad_norm": 0.2676074504852295, "learning_rate": 3.4288275587913235e-06, "loss": 0.479, "step": 2285 }, { "epoch": 1.9253228523301515, "grad_norm": 0.22299405932426453, "learning_rate": 3.4241740443267112e-06, "loss": 0.4541, "step": 2286 }, { "epoch": 1.9261650758001123, "grad_norm": 0.24628575146198273, "learning_rate": 3.419522044898073e-06, "loss": 0.4481, "step": 2287 }, { "epoch": 1.9270072992700729, "grad_norm": 0.22339026629924774, "learning_rate": 3.414871564977951e-06, "loss": 0.4985, "step": 2288 }, { "epoch": 1.9278495227400336, "grad_norm": 0.23601706326007843, "learning_rate": 3.4102226090374246e-06, "loss": 0.4978, "step": 2289 }, { "epoch": 1.9286917462099944, "grad_norm": 0.23136410117149353, "learning_rate": 3.4055751815461102e-06, "loss": 0.445, "step": 2290 }, { "epoch": 1.9295339696799552, "grad_norm": 0.22242151200771332, "learning_rate": 3.4009292869721516e-06, "loss": 0.4781, "step": 2291 }, { "epoch": 1.930376193149916, "grad_norm": 0.22979110479354858, "learning_rate": 3.3962849297822225e-06, "loss": 0.482, "step": 2292 }, { "epoch": 1.9312184166198765, "grad_norm": 0.2415197789669037, "learning_rate": 3.3916421144415146e-06, "loss": 0.4696, "step": 2293 }, { "epoch": 1.932060640089837, "grad_norm": 0.21571098268032074, "learning_rate": 3.387000845413742e-06, "loss": 0.4099, "step": 2294 }, { "epoch": 1.9329028635597978, "grad_norm": 0.2364652454853058, "learning_rate": 3.3823611271611266e-06, "loss": 0.5227, "step": 2295 }, { "epoch": 1.9337450870297586, "grad_norm": 0.2197917103767395, "learning_rate": 3.377722964144405e-06, "loss": 0.4387, "step": 2296 }, { "epoch": 1.9345873104997193, "grad_norm": 0.23961636424064636, "learning_rate": 3.3730863608228125e-06, "loss": 0.5009, "step": 2297 }, { "epoch": 1.93542953396968, "grad_norm": 0.240502268075943, "learning_rate": 3.368451321654091e-06, "loss": 0.4872, "step": 2298 }, { "epoch": 1.9362717574396406, "grad_norm": 0.2292996495962143, "learning_rate": 3.363817851094473e-06, "loss": 0.4225, "step": 2299 }, { "epoch": 1.9371139809096012, "grad_norm": 0.23704524338245392, "learning_rate": 3.3591859535986894e-06, "loss": 0.4908, "step": 2300 }, { "epoch": 1.937956204379562, "grad_norm": 0.2343161255121231, "learning_rate": 3.35455563361995e-06, "loss": 0.4869, "step": 2301 }, { "epoch": 1.9387984278495227, "grad_norm": 0.22588270902633667, "learning_rate": 3.3499268956099583e-06, "loss": 0.4616, "step": 2302 }, { "epoch": 1.9396406513194835, "grad_norm": 0.240634024143219, "learning_rate": 3.345299744018886e-06, "loss": 0.485, "step": 2303 }, { "epoch": 1.9404828747894443, "grad_norm": 0.24218091368675232, "learning_rate": 3.3406741832953893e-06, "loss": 0.4718, "step": 2304 }, { "epoch": 1.9413250982594048, "grad_norm": 0.24131685495376587, "learning_rate": 3.336050217886588e-06, "loss": 0.4795, "step": 2305 }, { "epoch": 1.9421673217293656, "grad_norm": 0.23108068108558655, "learning_rate": 3.331427852238073e-06, "loss": 0.4822, "step": 2306 }, { "epoch": 1.9430095451993261, "grad_norm": 0.2247542142868042, "learning_rate": 3.3268070907938915e-06, "loss": 0.4486, "step": 2307 }, { "epoch": 1.9438517686692869, "grad_norm": 0.2157062590122223, "learning_rate": 3.3221879379965553e-06, "loss": 0.4555, "step": 2308 }, { "epoch": 1.9446939921392477, "grad_norm": 0.26384252309799194, "learning_rate": 3.3175703982870232e-06, "loss": 0.4558, "step": 2309 }, { "epoch": 1.9455362156092084, "grad_norm": 0.23551000654697418, "learning_rate": 3.3129544761047093e-06, "loss": 0.4093, "step": 2310 }, { "epoch": 1.946378439079169, "grad_norm": 0.22781404852867126, "learning_rate": 3.3083401758874655e-06, "loss": 0.4846, "step": 2311 }, { "epoch": 1.9472206625491297, "grad_norm": 0.24994465708732605, "learning_rate": 3.303727502071591e-06, "loss": 0.5023, "step": 2312 }, { "epoch": 1.9480628860190903, "grad_norm": 0.216067835688591, "learning_rate": 3.2991164590918162e-06, "loss": 0.4404, "step": 2313 }, { "epoch": 1.948905109489051, "grad_norm": 0.22367478907108307, "learning_rate": 3.2945070513813082e-06, "loss": 0.4651, "step": 2314 }, { "epoch": 1.9497473329590118, "grad_norm": 0.24405960738658905, "learning_rate": 3.289899283371657e-06, "loss": 0.4952, "step": 2315 }, { "epoch": 1.9505895564289726, "grad_norm": 0.24524371325969696, "learning_rate": 3.2852931594928804e-06, "loss": 0.4497, "step": 2316 }, { "epoch": 1.9514317798989333, "grad_norm": 0.2377604842185974, "learning_rate": 3.280688684173412e-06, "loss": 0.4811, "step": 2317 }, { "epoch": 1.952274003368894, "grad_norm": 0.22415748238563538, "learning_rate": 3.276085861840106e-06, "loss": 0.4617, "step": 2318 }, { "epoch": 1.9531162268388544, "grad_norm": 0.22876887023448944, "learning_rate": 3.271484696918218e-06, "loss": 0.4228, "step": 2319 }, { "epoch": 1.9539584503088152, "grad_norm": 0.2776944935321808, "learning_rate": 3.2668851938314217e-06, "loss": 0.4942, "step": 2320 }, { "epoch": 1.954800673778776, "grad_norm": 0.24805109202861786, "learning_rate": 3.262287357001781e-06, "loss": 0.48, "step": 2321 }, { "epoch": 1.9556428972487367, "grad_norm": 0.24947591125965118, "learning_rate": 3.2576911908497695e-06, "loss": 0.4721, "step": 2322 }, { "epoch": 1.9564851207186975, "grad_norm": 0.23243258893489838, "learning_rate": 3.253096699794245e-06, "loss": 0.4849, "step": 2323 }, { "epoch": 1.957327344188658, "grad_norm": 0.2574131488800049, "learning_rate": 3.248503888252461e-06, "loss": 0.4578, "step": 2324 }, { "epoch": 1.9581695676586186, "grad_norm": 0.2409823089838028, "learning_rate": 3.2439127606400546e-06, "loss": 0.444, "step": 2325 }, { "epoch": 1.9590117911285794, "grad_norm": 0.23336327075958252, "learning_rate": 3.239323321371039e-06, "loss": 0.4671, "step": 2326 }, { "epoch": 1.9598540145985401, "grad_norm": 0.24250414967536926, "learning_rate": 3.2347355748578134e-06, "loss": 0.5309, "step": 2327 }, { "epoch": 1.960696238068501, "grad_norm": 0.21592780947685242, "learning_rate": 3.2301495255111426e-06, "loss": 0.4298, "step": 2328 }, { "epoch": 1.9615384615384617, "grad_norm": 0.23644445836544037, "learning_rate": 3.225565177740163e-06, "loss": 0.4828, "step": 2329 }, { "epoch": 1.9623806850084222, "grad_norm": 0.23241029679775238, "learning_rate": 3.2209825359523717e-06, "loss": 0.4446, "step": 2330 }, { "epoch": 1.9632229084783828, "grad_norm": 0.22448498010635376, "learning_rate": 3.2164016045536306e-06, "loss": 0.4781, "step": 2331 }, { "epoch": 1.9640651319483435, "grad_norm": 0.22267581522464752, "learning_rate": 3.2118223879481525e-06, "loss": 0.4622, "step": 2332 }, { "epoch": 1.9649073554183043, "grad_norm": 0.23176924884319305, "learning_rate": 3.2072448905385046e-06, "loss": 0.4725, "step": 2333 }, { "epoch": 1.965749578888265, "grad_norm": 0.24462567269802094, "learning_rate": 3.202669116725598e-06, "loss": 0.4842, "step": 2334 }, { "epoch": 1.9665918023582258, "grad_norm": 0.2311275750398636, "learning_rate": 3.1980950709086923e-06, "loss": 0.487, "step": 2335 }, { "epoch": 1.9674340258281864, "grad_norm": 0.22848251461982727, "learning_rate": 3.193522757485378e-06, "loss": 0.4842, "step": 2336 }, { "epoch": 1.9682762492981472, "grad_norm": 0.21176692843437195, "learning_rate": 3.1889521808515888e-06, "loss": 0.4284, "step": 2337 }, { "epoch": 1.9691184727681077, "grad_norm": 0.23995238542556763, "learning_rate": 3.1843833454015804e-06, "loss": 0.4782, "step": 2338 }, { "epoch": 1.9699606962380685, "grad_norm": 0.3006937801837921, "learning_rate": 3.179816255527941e-06, "loss": 0.4366, "step": 2339 }, { "epoch": 1.9708029197080292, "grad_norm": 0.2548455595970154, "learning_rate": 3.1752509156215738e-06, "loss": 0.4818, "step": 2340 }, { "epoch": 1.97164514317799, "grad_norm": 0.24040277302265167, "learning_rate": 3.1706873300717094e-06, "loss": 0.4525, "step": 2341 }, { "epoch": 1.9724873666479505, "grad_norm": 0.23224803805351257, "learning_rate": 3.16612550326588e-06, "loss": 0.4613, "step": 2342 }, { "epoch": 1.9733295901179113, "grad_norm": 0.23754388093948364, "learning_rate": 3.1615654395899377e-06, "loss": 0.4826, "step": 2343 }, { "epoch": 1.9741718135878719, "grad_norm": 0.22505979239940643, "learning_rate": 3.1570071434280292e-06, "loss": 0.4453, "step": 2344 }, { "epoch": 1.9750140370578326, "grad_norm": 0.23376987874507904, "learning_rate": 3.152450619162612e-06, "loss": 0.4402, "step": 2345 }, { "epoch": 1.9758562605277934, "grad_norm": 0.23378251492977142, "learning_rate": 3.1478958711744324e-06, "loss": 0.4895, "step": 2346 }, { "epoch": 1.9766984839977542, "grad_norm": 0.24050544202327728, "learning_rate": 3.1433429038425334e-06, "loss": 0.4403, "step": 2347 }, { "epoch": 1.977540707467715, "grad_norm": 0.25802502036094666, "learning_rate": 3.1387917215442427e-06, "loss": 0.4931, "step": 2348 }, { "epoch": 1.9783829309376755, "grad_norm": 0.25330525636672974, "learning_rate": 3.1342423286551756e-06, "loss": 0.4937, "step": 2349 }, { "epoch": 1.979225154407636, "grad_norm": 0.24163003265857697, "learning_rate": 3.1296947295492226e-06, "loss": 0.4728, "step": 2350 }, { "epoch": 1.9800673778775968, "grad_norm": 0.2694387137889862, "learning_rate": 3.125148928598554e-06, "loss": 0.4961, "step": 2351 }, { "epoch": 1.9809096013475576, "grad_norm": 0.2237536758184433, "learning_rate": 3.120604930173608e-06, "loss": 0.4229, "step": 2352 }, { "epoch": 1.9817518248175183, "grad_norm": 0.24758058786392212, "learning_rate": 3.116062738643092e-06, "loss": 0.5133, "step": 2353 }, { "epoch": 1.982594048287479, "grad_norm": 0.2267579883337021, "learning_rate": 3.1115223583739746e-06, "loss": 0.442, "step": 2354 }, { "epoch": 1.9834362717574396, "grad_norm": 0.2572832405567169, "learning_rate": 3.1069837937314846e-06, "loss": 0.4731, "step": 2355 }, { "epoch": 1.9842784952274002, "grad_norm": 0.26796436309814453, "learning_rate": 3.1024470490791027e-06, "loss": 0.4916, "step": 2356 }, { "epoch": 1.985120718697361, "grad_norm": 0.24975520372390747, "learning_rate": 3.097912128778563e-06, "loss": 0.4351, "step": 2357 }, { "epoch": 1.9859629421673217, "grad_norm": 0.2064504623413086, "learning_rate": 3.093379037189842e-06, "loss": 0.4514, "step": 2358 }, { "epoch": 1.9868051656372825, "grad_norm": 0.2245238870382309, "learning_rate": 3.0888477786711646e-06, "loss": 0.4638, "step": 2359 }, { "epoch": 1.9876473891072433, "grad_norm": 0.23739032447338104, "learning_rate": 3.0843183575789824e-06, "loss": 0.517, "step": 2360 }, { "epoch": 1.9884896125772038, "grad_norm": 0.23693303763866425, "learning_rate": 3.0797907782679944e-06, "loss": 0.4402, "step": 2361 }, { "epoch": 1.9893318360471643, "grad_norm": 0.22988222539424896, "learning_rate": 3.075265045091114e-06, "loss": 0.4265, "step": 2362 }, { "epoch": 1.9901740595171251, "grad_norm": 0.25686320662498474, "learning_rate": 3.070741162399492e-06, "loss": 0.5508, "step": 2363 }, { "epoch": 1.9910162829870859, "grad_norm": 0.24406404793262482, "learning_rate": 3.0662191345424925e-06, "loss": 0.4745, "step": 2364 }, { "epoch": 1.9918585064570467, "grad_norm": 0.23512901365756989, "learning_rate": 3.061698965867701e-06, "loss": 0.4857, "step": 2365 }, { "epoch": 1.9927007299270074, "grad_norm": 0.2306051403284073, "learning_rate": 3.057180660720912e-06, "loss": 0.4586, "step": 2366 }, { "epoch": 1.993542953396968, "grad_norm": 0.24025657773017883, "learning_rate": 3.0526642234461313e-06, "loss": 0.4331, "step": 2367 }, { "epoch": 1.9943851768669287, "grad_norm": 0.24769152700901031, "learning_rate": 3.048149658385565e-06, "loss": 0.4903, "step": 2368 }, { "epoch": 1.9952274003368893, "grad_norm": 0.2354225069284439, "learning_rate": 3.043636969879625e-06, "loss": 0.4727, "step": 2369 }, { "epoch": 1.99606962380685, "grad_norm": 0.23760485649108887, "learning_rate": 3.039126162266912e-06, "loss": 0.4797, "step": 2370 }, { "epoch": 1.9969118472768108, "grad_norm": 0.22512374818325043, "learning_rate": 3.0346172398842254e-06, "loss": 0.4732, "step": 2371 }, { "epoch": 1.9977540707467716, "grad_norm": 0.2514665126800537, "learning_rate": 3.0301102070665466e-06, "loss": 0.4821, "step": 2372 }, { "epoch": 1.9985962942167321, "grad_norm": 0.22923235595226288, "learning_rate": 3.0256050681470446e-06, "loss": 0.4305, "step": 2373 }, { "epoch": 1.999438517686693, "grad_norm": 0.24257679283618927, "learning_rate": 3.0211018274570625e-06, "loss": 0.4882, "step": 2374 }, { "epoch": 2.0002807411566534, "grad_norm": 0.48293599486351013, "learning_rate": 3.0166004893261247e-06, "loss": 0.7714, "step": 2375 }, { "epoch": 2.001122964626614, "grad_norm": 0.20712818205356598, "learning_rate": 3.012101058081919e-06, "loss": 0.4036, "step": 2376 }, { "epoch": 2.001965188096575, "grad_norm": 0.23958203196525574, "learning_rate": 3.007603538050309e-06, "loss": 0.4814, "step": 2377 }, { "epoch": 2.0028074115665357, "grad_norm": 0.22883524000644684, "learning_rate": 3.0031079335553097e-06, "loss": 0.3884, "step": 2378 }, { "epoch": 2.0036496350364965, "grad_norm": 0.24252818524837494, "learning_rate": 2.9986142489191074e-06, "loss": 0.5065, "step": 2379 }, { "epoch": 2.004491858506457, "grad_norm": 0.21617721021175385, "learning_rate": 2.994122488462029e-06, "loss": 0.4389, "step": 2380 }, { "epoch": 2.0053340819764176, "grad_norm": 0.2210903763771057, "learning_rate": 2.989632656502564e-06, "loss": 0.4277, "step": 2381 }, { "epoch": 2.0061763054463784, "grad_norm": 0.22908641397953033, "learning_rate": 2.9851447573573383e-06, "loss": 0.4625, "step": 2382 }, { "epoch": 2.007018528916339, "grad_norm": 0.24103571474552155, "learning_rate": 2.980658795341125e-06, "loss": 0.4359, "step": 2383 }, { "epoch": 2.0078607523863, "grad_norm": 0.2347957044839859, "learning_rate": 2.9761747747668314e-06, "loss": 0.4874, "step": 2384 }, { "epoch": 2.0087029758562607, "grad_norm": 0.24842455983161926, "learning_rate": 2.971692699945502e-06, "loss": 0.4919, "step": 2385 }, { "epoch": 2.0095451993262214, "grad_norm": 0.22086718678474426, "learning_rate": 2.9672125751863067e-06, "loss": 0.4376, "step": 2386 }, { "epoch": 2.0103874227961818, "grad_norm": 0.22541478276252747, "learning_rate": 2.9627344047965433e-06, "loss": 0.4135, "step": 2387 }, { "epoch": 2.0112296462661425, "grad_norm": 0.27595090866088867, "learning_rate": 2.958258193081629e-06, "loss": 0.5341, "step": 2388 }, { "epoch": 2.0120718697361033, "grad_norm": 0.20768007636070251, "learning_rate": 2.9537839443451e-06, "loss": 0.3978, "step": 2389 }, { "epoch": 2.012914093206064, "grad_norm": 0.2637966573238373, "learning_rate": 2.949311662888601e-06, "loss": 0.4581, "step": 2390 }, { "epoch": 2.013756316676025, "grad_norm": 0.24614278972148895, "learning_rate": 2.9448413530118912e-06, "loss": 0.4587, "step": 2391 }, { "epoch": 2.0145985401459856, "grad_norm": 0.26177600026130676, "learning_rate": 2.94037301901283e-06, "loss": 0.4659, "step": 2392 }, { "epoch": 2.015440763615946, "grad_norm": 0.20723330974578857, "learning_rate": 2.935906665187378e-06, "loss": 0.4031, "step": 2393 }, { "epoch": 2.0162829870859067, "grad_norm": 0.21708457171916962, "learning_rate": 2.9314422958295906e-06, "loss": 0.4582, "step": 2394 }, { "epoch": 2.0171252105558675, "grad_norm": 0.22536545991897583, "learning_rate": 2.9269799152316226e-06, "loss": 0.4781, "step": 2395 }, { "epoch": 2.0179674340258282, "grad_norm": 0.24768893420696259, "learning_rate": 2.922519527683706e-06, "loss": 0.4913, "step": 2396 }, { "epoch": 2.018809657495789, "grad_norm": 0.2274412363767624, "learning_rate": 2.9180611374741623e-06, "loss": 0.4184, "step": 2397 }, { "epoch": 2.0196518809657498, "grad_norm": 0.2155018299818039, "learning_rate": 2.913604748889395e-06, "loss": 0.4162, "step": 2398 }, { "epoch": 2.02049410443571, "grad_norm": 0.2427009642124176, "learning_rate": 2.9091503662138764e-06, "loss": 0.5116, "step": 2399 }, { "epoch": 2.021336327905671, "grad_norm": 0.21371640264987946, "learning_rate": 2.904697993730159e-06, "loss": 0.4566, "step": 2400 }, { "epoch": 2.0221785513756316, "grad_norm": 0.23852357268333435, "learning_rate": 2.900247635718856e-06, "loss": 0.4733, "step": 2401 }, { "epoch": 2.0230207748455924, "grad_norm": 0.2277056723833084, "learning_rate": 2.8957992964586445e-06, "loss": 0.4016, "step": 2402 }, { "epoch": 2.023862998315553, "grad_norm": 0.24434834718704224, "learning_rate": 2.891352980226262e-06, "loss": 0.4563, "step": 2403 }, { "epoch": 2.024705221785514, "grad_norm": 0.21470028162002563, "learning_rate": 2.886908691296504e-06, "loss": 0.3877, "step": 2404 }, { "epoch": 2.0255474452554743, "grad_norm": 0.20441220700740814, "learning_rate": 2.8824664339422115e-06, "loss": 0.4393, "step": 2405 }, { "epoch": 2.026389668725435, "grad_norm": 0.21229788661003113, "learning_rate": 2.8780262124342755e-06, "loss": 0.3875, "step": 2406 }, { "epoch": 2.027231892195396, "grad_norm": 0.2415217161178589, "learning_rate": 2.873588031041627e-06, "loss": 0.5148, "step": 2407 }, { "epoch": 2.0280741156653566, "grad_norm": 0.22217142581939697, "learning_rate": 2.8691518940312413e-06, "loss": 0.4837, "step": 2408 }, { "epoch": 2.0289163391353173, "grad_norm": 0.20931269228458405, "learning_rate": 2.8647178056681197e-06, "loss": 0.4431, "step": 2409 }, { "epoch": 2.029758562605278, "grad_norm": 0.25923463702201843, "learning_rate": 2.8602857702153054e-06, "loss": 0.4678, "step": 2410 }, { "epoch": 2.0306007860752384, "grad_norm": 0.26442405581474304, "learning_rate": 2.8558557919338537e-06, "loss": 0.4541, "step": 2411 }, { "epoch": 2.031443009545199, "grad_norm": 0.22383743524551392, "learning_rate": 2.8514278750828537e-06, "loss": 0.4268, "step": 2412 }, { "epoch": 2.03228523301516, "grad_norm": 0.2324676364660263, "learning_rate": 2.847002023919406e-06, "loss": 0.4975, "step": 2413 }, { "epoch": 2.0331274564851207, "grad_norm": 0.26658114790916443, "learning_rate": 2.8425782426986304e-06, "loss": 0.4888, "step": 2414 }, { "epoch": 2.0339696799550815, "grad_norm": 0.22974053025245667, "learning_rate": 2.838156535673652e-06, "loss": 0.4519, "step": 2415 }, { "epoch": 2.0348119034250423, "grad_norm": 0.22945885360240936, "learning_rate": 2.833736907095604e-06, "loss": 0.4589, "step": 2416 }, { "epoch": 2.035654126895003, "grad_norm": 0.22392810881137848, "learning_rate": 2.8293193612136183e-06, "loss": 0.4527, "step": 2417 }, { "epoch": 2.0364963503649633, "grad_norm": 0.28359919786453247, "learning_rate": 2.8249039022748315e-06, "loss": 0.467, "step": 2418 }, { "epoch": 2.037338573834924, "grad_norm": 0.2464691549539566, "learning_rate": 2.8204905345243664e-06, "loss": 0.4538, "step": 2419 }, { "epoch": 2.038180797304885, "grad_norm": 0.2072964459657669, "learning_rate": 2.816079262205339e-06, "loss": 0.4329, "step": 2420 }, { "epoch": 2.0390230207748457, "grad_norm": 0.22565150260925293, "learning_rate": 2.8116700895588473e-06, "loss": 0.4625, "step": 2421 }, { "epoch": 2.0398652442448064, "grad_norm": 0.21480637788772583, "learning_rate": 2.807263020823977e-06, "loss": 0.4512, "step": 2422 }, { "epoch": 2.040707467714767, "grad_norm": 0.22486989200115204, "learning_rate": 2.8028580602377852e-06, "loss": 0.4343, "step": 2423 }, { "epoch": 2.0415496911847275, "grad_norm": 0.23390941321849823, "learning_rate": 2.798455212035305e-06, "loss": 0.4675, "step": 2424 }, { "epoch": 2.0423919146546883, "grad_norm": 0.21503818035125732, "learning_rate": 2.7940544804495345e-06, "loss": 0.4362, "step": 2425 }, { "epoch": 2.043234138124649, "grad_norm": 0.20344333350658417, "learning_rate": 2.789655869711445e-06, "loss": 0.4015, "step": 2426 }, { "epoch": 2.04407636159461, "grad_norm": 0.2343718558549881, "learning_rate": 2.785259384049959e-06, "loss": 0.4898, "step": 2427 }, { "epoch": 2.0449185850645706, "grad_norm": 0.2462015002965927, "learning_rate": 2.780865027691968e-06, "loss": 0.4276, "step": 2428 }, { "epoch": 2.0457608085345313, "grad_norm": 0.23467113077640533, "learning_rate": 2.7764728048623003e-06, "loss": 0.4781, "step": 2429 }, { "epoch": 2.0466030320044917, "grad_norm": 0.20888268947601318, "learning_rate": 2.7720827197837475e-06, "loss": 0.4781, "step": 2430 }, { "epoch": 2.0474452554744524, "grad_norm": 0.24126654863357544, "learning_rate": 2.7676947766770367e-06, "loss": 0.4465, "step": 2431 }, { "epoch": 2.048287478944413, "grad_norm": 0.2366095632314682, "learning_rate": 2.7633089797608435e-06, "loss": 0.4803, "step": 2432 }, { "epoch": 2.049129702414374, "grad_norm": 0.22462713718414307, "learning_rate": 2.7589253332517736e-06, "loss": 0.4279, "step": 2433 }, { "epoch": 2.0499719258843347, "grad_norm": 0.2404090017080307, "learning_rate": 2.7545438413643666e-06, "loss": 0.4577, "step": 2434 }, { "epoch": 2.0508141493542955, "grad_norm": 0.23112651705741882, "learning_rate": 2.7501645083110893e-06, "loss": 0.4654, "step": 2435 }, { "epoch": 2.051656372824256, "grad_norm": 0.21822121739387512, "learning_rate": 2.745787338302341e-06, "loss": 0.4639, "step": 2436 }, { "epoch": 2.0524985962942166, "grad_norm": 0.22690536081790924, "learning_rate": 2.741412335546431e-06, "loss": 0.4677, "step": 2437 }, { "epoch": 2.0533408197641774, "grad_norm": 0.20652322471141815, "learning_rate": 2.7370395042495913e-06, "loss": 0.4355, "step": 2438 }, { "epoch": 2.054183043234138, "grad_norm": 0.20191927254199982, "learning_rate": 2.7326688486159613e-06, "loss": 0.4136, "step": 2439 }, { "epoch": 2.055025266704099, "grad_norm": 0.2241477072238922, "learning_rate": 2.7283003728475952e-06, "loss": 0.4413, "step": 2440 }, { "epoch": 2.0558674901740597, "grad_norm": 0.23900043964385986, "learning_rate": 2.7239340811444476e-06, "loss": 0.4847, "step": 2441 }, { "epoch": 2.05670971364402, "grad_norm": 0.24208380281925201, "learning_rate": 2.7195699777043723e-06, "loss": 0.4893, "step": 2442 }, { "epoch": 2.0575519371139808, "grad_norm": 0.22890697419643402, "learning_rate": 2.7152080667231185e-06, "loss": 0.4476, "step": 2443 }, { "epoch": 2.0583941605839415, "grad_norm": 0.2154841423034668, "learning_rate": 2.710848352394334e-06, "loss": 0.4581, "step": 2444 }, { "epoch": 2.0592363840539023, "grad_norm": 0.21848052740097046, "learning_rate": 2.706490838909547e-06, "loss": 0.3985, "step": 2445 }, { "epoch": 2.060078607523863, "grad_norm": 0.23574206233024597, "learning_rate": 2.7021355304581765e-06, "loss": 0.4612, "step": 2446 }, { "epoch": 2.060920830993824, "grad_norm": 0.24119287729263306, "learning_rate": 2.6977824312275123e-06, "loss": 0.4503, "step": 2447 }, { "epoch": 2.0617630544637846, "grad_norm": 0.23082804679870605, "learning_rate": 2.6934315454027323e-06, "loss": 0.4886, "step": 2448 }, { "epoch": 2.062605277933745, "grad_norm": 0.2255963236093521, "learning_rate": 2.6890828771668742e-06, "loss": 0.4862, "step": 2449 }, { "epoch": 2.0634475014037057, "grad_norm": 0.20872530341148376, "learning_rate": 2.684736430700854e-06, "loss": 0.4546, "step": 2450 }, { "epoch": 2.0642897248736665, "grad_norm": 0.21779081225395203, "learning_rate": 2.680392210183446e-06, "loss": 0.4444, "step": 2451 }, { "epoch": 2.0651319483436272, "grad_norm": 0.2511299252510071, "learning_rate": 2.6760502197912842e-06, "loss": 0.4541, "step": 2452 }, { "epoch": 2.065974171813588, "grad_norm": 0.2397814244031906, "learning_rate": 2.671710463698859e-06, "loss": 0.4545, "step": 2453 }, { "epoch": 2.0668163952835488, "grad_norm": 0.2288566529750824, "learning_rate": 2.6673729460785174e-06, "loss": 0.3998, "step": 2454 }, { "epoch": 2.067658618753509, "grad_norm": 0.24336493015289307, "learning_rate": 2.663037671100448e-06, "loss": 0.4854, "step": 2455 }, { "epoch": 2.06850084222347, "grad_norm": 0.23682641983032227, "learning_rate": 2.6587046429326855e-06, "loss": 0.4288, "step": 2456 }, { "epoch": 2.0693430656934306, "grad_norm": 0.21317684650421143, "learning_rate": 2.6543738657411033e-06, "loss": 0.4177, "step": 2457 }, { "epoch": 2.0701852891633914, "grad_norm": 0.22909586131572723, "learning_rate": 2.6500453436894157e-06, "loss": 0.4582, "step": 2458 }, { "epoch": 2.071027512633352, "grad_norm": 0.22728373110294342, "learning_rate": 2.6457190809391627e-06, "loss": 0.4282, "step": 2459 }, { "epoch": 2.071869736103313, "grad_norm": 0.2338845580816269, "learning_rate": 2.6413950816497146e-06, "loss": 0.4686, "step": 2460 }, { "epoch": 2.0727119595732733, "grad_norm": 0.22616741061210632, "learning_rate": 2.6370733499782654e-06, "loss": 0.4559, "step": 2461 }, { "epoch": 2.073554183043234, "grad_norm": 0.2314969301223755, "learning_rate": 2.6327538900798306e-06, "loss": 0.4059, "step": 2462 }, { "epoch": 2.074396406513195, "grad_norm": 0.23194873332977295, "learning_rate": 2.628436706107238e-06, "loss": 0.4766, "step": 2463 }, { "epoch": 2.0752386299831556, "grad_norm": 0.22997866570949554, "learning_rate": 2.6241218022111336e-06, "loss": 0.4728, "step": 2464 }, { "epoch": 2.0760808534531163, "grad_norm": 0.2403036206960678, "learning_rate": 2.6198091825399606e-06, "loss": 0.4562, "step": 2465 }, { "epoch": 2.076923076923077, "grad_norm": 0.2239658534526825, "learning_rate": 2.6154988512399784e-06, "loss": 0.4238, "step": 2466 }, { "epoch": 2.0777653003930374, "grad_norm": 0.23092861473560333, "learning_rate": 2.6111908124552355e-06, "loss": 0.4312, "step": 2467 }, { "epoch": 2.078607523862998, "grad_norm": 0.24298061430454254, "learning_rate": 2.6068850703275856e-06, "loss": 0.4527, "step": 2468 }, { "epoch": 2.079449747332959, "grad_norm": 0.24510498344898224, "learning_rate": 2.6025816289966703e-06, "loss": 0.4926, "step": 2469 }, { "epoch": 2.0802919708029197, "grad_norm": 0.20606239140033722, "learning_rate": 2.598280492599913e-06, "loss": 0.4279, "step": 2470 }, { "epoch": 2.0811341942728805, "grad_norm": 0.21954689919948578, "learning_rate": 2.5939816652725324e-06, "loss": 0.4527, "step": 2471 }, { "epoch": 2.0819764177428413, "grad_norm": 0.2350880354642868, "learning_rate": 2.5896851511475184e-06, "loss": 0.4738, "step": 2472 }, { "epoch": 2.0828186412128016, "grad_norm": 0.21325266361236572, "learning_rate": 2.5853909543556444e-06, "loss": 0.3987, "step": 2473 }, { "epoch": 2.0836608646827623, "grad_norm": 0.2476065456867218, "learning_rate": 2.5810990790254486e-06, "loss": 0.4836, "step": 2474 }, { "epoch": 2.084503088152723, "grad_norm": 0.21705298125743866, "learning_rate": 2.5768095292832412e-06, "loss": 0.4369, "step": 2475 }, { "epoch": 2.085345311622684, "grad_norm": 0.2251669019460678, "learning_rate": 2.5725223092530937e-06, "loss": 0.4467, "step": 2476 }, { "epoch": 2.0861875350926447, "grad_norm": 0.23440448939800262, "learning_rate": 2.568237423056844e-06, "loss": 0.4254, "step": 2477 }, { "epoch": 2.0870297585626054, "grad_norm": 0.25948014855384827, "learning_rate": 2.5639548748140803e-06, "loss": 0.4429, "step": 2478 }, { "epoch": 2.087871982032566, "grad_norm": 0.23411008715629578, "learning_rate": 2.5596746686421436e-06, "loss": 0.4505, "step": 2479 }, { "epoch": 2.0887142055025265, "grad_norm": 0.2615049183368683, "learning_rate": 2.5553968086561244e-06, "loss": 0.4923, "step": 2480 }, { "epoch": 2.0895564289724873, "grad_norm": 0.22435131669044495, "learning_rate": 2.5511212989688587e-06, "loss": 0.4138, "step": 2481 }, { "epoch": 2.090398652442448, "grad_norm": 0.22865146398544312, "learning_rate": 2.546848143690922e-06, "loss": 0.455, "step": 2482 }, { "epoch": 2.091240875912409, "grad_norm": 0.22732031345367432, "learning_rate": 2.5425773469306247e-06, "loss": 0.4206, "step": 2483 }, { "epoch": 2.0920830993823696, "grad_norm": 0.2407584935426712, "learning_rate": 2.5383089127940087e-06, "loss": 0.4284, "step": 2484 }, { "epoch": 2.0929253228523303, "grad_norm": 0.22615496814250946, "learning_rate": 2.534042845384851e-06, "loss": 0.4038, "step": 2485 }, { "epoch": 2.0937675463222907, "grad_norm": 0.23467832803726196, "learning_rate": 2.5297791488046445e-06, "loss": 0.4449, "step": 2486 }, { "epoch": 2.0946097697922514, "grad_norm": 0.21393455564975739, "learning_rate": 2.525517827152614e-06, "loss": 0.4834, "step": 2487 }, { "epoch": 2.095451993262212, "grad_norm": 0.23691707849502563, "learning_rate": 2.5212588845256837e-06, "loss": 0.4172, "step": 2488 }, { "epoch": 2.096294216732173, "grad_norm": 0.24893760681152344, "learning_rate": 2.517002325018508e-06, "loss": 0.431, "step": 2489 }, { "epoch": 2.0971364402021337, "grad_norm": 0.2392166405916214, "learning_rate": 2.5127481527234397e-06, "loss": 0.4393, "step": 2490 }, { "epoch": 2.0979786636720945, "grad_norm": 0.25762611627578735, "learning_rate": 2.508496371730543e-06, "loss": 0.5471, "step": 2491 }, { "epoch": 2.098820887142055, "grad_norm": 0.20623528957366943, "learning_rate": 2.5042469861275768e-06, "loss": 0.385, "step": 2492 }, { "epoch": 2.0996631106120156, "grad_norm": 0.22389215230941772, "learning_rate": 2.5000000000000015e-06, "loss": 0.4059, "step": 2493 }, { "epoch": 2.1005053340819764, "grad_norm": 0.23286154866218567, "learning_rate": 2.4957554174309655e-06, "loss": 0.4746, "step": 2494 }, { "epoch": 2.101347557551937, "grad_norm": 0.2427096962928772, "learning_rate": 2.491513242501315e-06, "loss": 0.461, "step": 2495 }, { "epoch": 2.102189781021898, "grad_norm": 0.23572784662246704, "learning_rate": 2.487273479289574e-06, "loss": 0.4253, "step": 2496 }, { "epoch": 2.1030320044918587, "grad_norm": 0.22471363842487335, "learning_rate": 2.4830361318719493e-06, "loss": 0.4685, "step": 2497 }, { "epoch": 2.103874227961819, "grad_norm": 0.20033560693264008, "learning_rate": 2.4788012043223253e-06, "loss": 0.4051, "step": 2498 }, { "epoch": 2.1047164514317798, "grad_norm": 0.26713186502456665, "learning_rate": 2.4745687007122636e-06, "loss": 0.5254, "step": 2499 }, { "epoch": 2.1055586749017405, "grad_norm": 0.24324576556682587, "learning_rate": 2.470338625110991e-06, "loss": 0.4624, "step": 2500 }, { "epoch": 2.1064008983717013, "grad_norm": 0.2230074256658554, "learning_rate": 2.4661109815854005e-06, "loss": 0.4466, "step": 2501 }, { "epoch": 2.107243121841662, "grad_norm": 0.22644412517547607, "learning_rate": 2.4618857742000463e-06, "loss": 0.4306, "step": 2502 }, { "epoch": 2.108085345311623, "grad_norm": 0.2364521026611328, "learning_rate": 2.4576630070171447e-06, "loss": 0.4463, "step": 2503 }, { "epoch": 2.108927568781583, "grad_norm": 0.24017439782619476, "learning_rate": 2.4534426840965604e-06, "loss": 0.436, "step": 2504 }, { "epoch": 2.109769792251544, "grad_norm": 0.24138575792312622, "learning_rate": 2.449224809495815e-06, "loss": 0.4797, "step": 2505 }, { "epoch": 2.1106120157215047, "grad_norm": 0.2099464386701584, "learning_rate": 2.4450093872700648e-06, "loss": 0.4272, "step": 2506 }, { "epoch": 2.1114542391914655, "grad_norm": 0.22252784669399261, "learning_rate": 2.440796421472122e-06, "loss": 0.47, "step": 2507 }, { "epoch": 2.1122964626614262, "grad_norm": 0.21156218647956848, "learning_rate": 2.436585916152426e-06, "loss": 0.4527, "step": 2508 }, { "epoch": 2.113138686131387, "grad_norm": 0.2469550520181656, "learning_rate": 2.4323778753590582e-06, "loss": 0.4799, "step": 2509 }, { "epoch": 2.1139809096013478, "grad_norm": 0.22976253926753998, "learning_rate": 2.4281723031377275e-06, "loss": 0.4327, "step": 2510 }, { "epoch": 2.114823133071308, "grad_norm": 0.22530746459960938, "learning_rate": 2.423969203531768e-06, "loss": 0.452, "step": 2511 }, { "epoch": 2.115665356541269, "grad_norm": 0.2401057928800583, "learning_rate": 2.419768580582137e-06, "loss": 0.4309, "step": 2512 }, { "epoch": 2.1165075800112296, "grad_norm": 0.24255669116973877, "learning_rate": 2.4155704383274154e-06, "loss": 0.465, "step": 2513 }, { "epoch": 2.1173498034811904, "grad_norm": 0.24360138177871704, "learning_rate": 2.411374780803793e-06, "loss": 0.4972, "step": 2514 }, { "epoch": 2.118192026951151, "grad_norm": 0.2239701747894287, "learning_rate": 2.4071816120450742e-06, "loss": 0.4656, "step": 2515 }, { "epoch": 2.119034250421112, "grad_norm": 0.2242681384086609, "learning_rate": 2.402990936082667e-06, "loss": 0.4155, "step": 2516 }, { "epoch": 2.1198764738910723, "grad_norm": 0.23750342428684235, "learning_rate": 2.3988027569455895e-06, "loss": 0.4887, "step": 2517 }, { "epoch": 2.120718697361033, "grad_norm": 0.23305754363536835, "learning_rate": 2.3946170786604526e-06, "loss": 0.4555, "step": 2518 }, { "epoch": 2.121560920830994, "grad_norm": 0.21899771690368652, "learning_rate": 2.390433905251467e-06, "loss": 0.4762, "step": 2519 }, { "epoch": 2.1224031443009546, "grad_norm": 0.25981372594833374, "learning_rate": 2.3862532407404306e-06, "loss": 0.4577, "step": 2520 }, { "epoch": 2.1232453677709153, "grad_norm": 0.226762056350708, "learning_rate": 2.3820750891467355e-06, "loss": 0.4468, "step": 2521 }, { "epoch": 2.124087591240876, "grad_norm": 0.2500561475753784, "learning_rate": 2.377899454487351e-06, "loss": 0.4459, "step": 2522 }, { "epoch": 2.1249298147108364, "grad_norm": 0.23626849055290222, "learning_rate": 2.373726340776837e-06, "loss": 0.4498, "step": 2523 }, { "epoch": 2.125772038180797, "grad_norm": 0.29681235551834106, "learning_rate": 2.369555752027313e-06, "loss": 0.4788, "step": 2524 }, { "epoch": 2.126614261650758, "grad_norm": 0.25273439288139343, "learning_rate": 2.365387692248488e-06, "loss": 0.4847, "step": 2525 }, { "epoch": 2.1274564851207187, "grad_norm": 0.21956391632556915, "learning_rate": 2.361222165447628e-06, "loss": 0.4159, "step": 2526 }, { "epoch": 2.1282987085906795, "grad_norm": 0.24703717231750488, "learning_rate": 2.3570591756295717e-06, "loss": 0.4725, "step": 2527 }, { "epoch": 2.1291409320606403, "grad_norm": 0.2385459691286087, "learning_rate": 2.3528987267967135e-06, "loss": 0.3955, "step": 2528 }, { "epoch": 2.1299831555306006, "grad_norm": 0.24787484109401703, "learning_rate": 2.348740822949006e-06, "loss": 0.4838, "step": 2529 }, { "epoch": 2.1308253790005613, "grad_norm": 0.22598832845687866, "learning_rate": 2.3445854680839534e-06, "loss": 0.434, "step": 2530 }, { "epoch": 2.131667602470522, "grad_norm": 0.20984116196632385, "learning_rate": 2.3404326661966148e-06, "loss": 0.4172, "step": 2531 }, { "epoch": 2.132509825940483, "grad_norm": 0.23548419773578644, "learning_rate": 2.33628242127959e-06, "loss": 0.4842, "step": 2532 }, { "epoch": 2.1333520494104437, "grad_norm": 0.21929802000522614, "learning_rate": 2.33213473732302e-06, "loss": 0.4704, "step": 2533 }, { "epoch": 2.1341942728804044, "grad_norm": 0.2415885478258133, "learning_rate": 2.3279896183145857e-06, "loss": 0.4188, "step": 2534 }, { "epoch": 2.1350364963503647, "grad_norm": 0.20752763748168945, "learning_rate": 2.323847068239504e-06, "loss": 0.4373, "step": 2535 }, { "epoch": 2.1358787198203255, "grad_norm": 0.24832738935947418, "learning_rate": 2.319707091080517e-06, "loss": 0.4862, "step": 2536 }, { "epoch": 2.1367209432902863, "grad_norm": 0.2423706203699112, "learning_rate": 2.3155696908178974e-06, "loss": 0.4427, "step": 2537 }, { "epoch": 2.137563166760247, "grad_norm": 0.22580891847610474, "learning_rate": 2.3114348714294355e-06, "loss": 0.4259, "step": 2538 }, { "epoch": 2.138405390230208, "grad_norm": 0.2483510673046112, "learning_rate": 2.3073026368904478e-06, "loss": 0.4281, "step": 2539 }, { "epoch": 2.1392476137001686, "grad_norm": 0.22319741547107697, "learning_rate": 2.3031729911737576e-06, "loss": 0.4263, "step": 2540 }, { "epoch": 2.1400898371701293, "grad_norm": 0.25614792108535767, "learning_rate": 2.2990459382497086e-06, "loss": 0.471, "step": 2541 }, { "epoch": 2.1409320606400897, "grad_norm": 0.2186392992734909, "learning_rate": 2.2949214820861403e-06, "loss": 0.4642, "step": 2542 }, { "epoch": 2.1417742841100504, "grad_norm": 0.21955475211143494, "learning_rate": 2.290799626648402e-06, "loss": 0.415, "step": 2543 }, { "epoch": 2.142616507580011, "grad_norm": 0.26484572887420654, "learning_rate": 2.2866803758993446e-06, "loss": 0.5056, "step": 2544 }, { "epoch": 2.143458731049972, "grad_norm": 0.2315676212310791, "learning_rate": 2.2825637337993094e-06, "loss": 0.4362, "step": 2545 }, { "epoch": 2.1443009545199327, "grad_norm": 0.25867605209350586, "learning_rate": 2.2784497043061384e-06, "loss": 0.4683, "step": 2546 }, { "epoch": 2.1451431779898935, "grad_norm": 0.21167941391468048, "learning_rate": 2.274338291375147e-06, "loss": 0.4273, "step": 2547 }, { "epoch": 2.145985401459854, "grad_norm": 0.23586876690387726, "learning_rate": 2.2702294989591513e-06, "loss": 0.4945, "step": 2548 }, { "epoch": 2.1468276249298146, "grad_norm": 0.2557182013988495, "learning_rate": 2.266123331008436e-06, "loss": 0.4411, "step": 2549 }, { "epoch": 2.1476698483997754, "grad_norm": 0.2767774164676666, "learning_rate": 2.262019791470772e-06, "loss": 0.4762, "step": 2550 }, { "epoch": 2.148512071869736, "grad_norm": 0.22382551431655884, "learning_rate": 2.257918884291392e-06, "loss": 0.4415, "step": 2551 }, { "epoch": 2.149354295339697, "grad_norm": 0.24649542570114136, "learning_rate": 2.253820613413009e-06, "loss": 0.4741, "step": 2552 }, { "epoch": 2.1501965188096577, "grad_norm": 0.2294304221868515, "learning_rate": 2.2497249827757933e-06, "loss": 0.4632, "step": 2553 }, { "epoch": 2.151038742279618, "grad_norm": 0.20797529816627502, "learning_rate": 2.245631996317384e-06, "loss": 0.3846, "step": 2554 }, { "epoch": 2.1518809657495788, "grad_norm": 0.24905717372894287, "learning_rate": 2.2415416579728714e-06, "loss": 0.5011, "step": 2555 }, { "epoch": 2.1527231892195395, "grad_norm": 0.23170393705368042, "learning_rate": 2.2374539716748034e-06, "loss": 0.4576, "step": 2556 }, { "epoch": 2.1535654126895003, "grad_norm": 0.22377340495586395, "learning_rate": 2.233368941353175e-06, "loss": 0.4336, "step": 2557 }, { "epoch": 2.154407636159461, "grad_norm": 0.2319219559431076, "learning_rate": 2.2292865709354346e-06, "loss": 0.4502, "step": 2558 }, { "epoch": 2.155249859629422, "grad_norm": 0.22596853971481323, "learning_rate": 2.225206864346465e-06, "loss": 0.4503, "step": 2559 }, { "epoch": 2.156092083099382, "grad_norm": 0.24754388630390167, "learning_rate": 2.221129825508593e-06, "loss": 0.4707, "step": 2560 }, { "epoch": 2.156934306569343, "grad_norm": 0.2050655335187912, "learning_rate": 2.2170554583415782e-06, "loss": 0.382, "step": 2561 }, { "epoch": 2.1577765300393037, "grad_norm": 0.2229257971048355, "learning_rate": 2.2129837667626147e-06, "loss": 0.4816, "step": 2562 }, { "epoch": 2.1586187535092645, "grad_norm": 0.23722471296787262, "learning_rate": 2.2089147546863187e-06, "loss": 0.4978, "step": 2563 }, { "epoch": 2.1594609769792252, "grad_norm": 0.21584592759609222, "learning_rate": 2.20484842602474e-06, "loss": 0.4394, "step": 2564 }, { "epoch": 2.160303200449186, "grad_norm": 0.22543089091777802, "learning_rate": 2.2007847846873342e-06, "loss": 0.4748, "step": 2565 }, { "epoch": 2.1611454239191463, "grad_norm": 0.20939874649047852, "learning_rate": 2.196723834580987e-06, "loss": 0.4193, "step": 2566 }, { "epoch": 2.161987647389107, "grad_norm": 0.23541796207427979, "learning_rate": 2.1926655796099873e-06, "loss": 0.4704, "step": 2567 }, { "epoch": 2.162829870859068, "grad_norm": 0.285149484872818, "learning_rate": 2.188610023676041e-06, "loss": 0.4233, "step": 2568 }, { "epoch": 2.1636720943290286, "grad_norm": 0.2412305474281311, "learning_rate": 2.1845571706782486e-06, "loss": 0.4683, "step": 2569 }, { "epoch": 2.1645143177989894, "grad_norm": 0.2424529492855072, "learning_rate": 2.1805070245131234e-06, "loss": 0.4611, "step": 2570 }, { "epoch": 2.16535654126895, "grad_norm": 0.20474351942539215, "learning_rate": 2.176459589074566e-06, "loss": 0.3985, "step": 2571 }, { "epoch": 2.166198764738911, "grad_norm": 0.25247058272361755, "learning_rate": 2.17241486825388e-06, "loss": 0.4829, "step": 2572 }, { "epoch": 2.1670409882088713, "grad_norm": 0.21647696197032928, "learning_rate": 2.1683728659397517e-06, "loss": 0.397, "step": 2573 }, { "epoch": 2.167883211678832, "grad_norm": 0.2209479659795761, "learning_rate": 2.164333586018259e-06, "loss": 0.44, "step": 2574 }, { "epoch": 2.168725435148793, "grad_norm": 0.2338055968284607, "learning_rate": 2.160297032372857e-06, "loss": 0.4705, "step": 2575 }, { "epoch": 2.1695676586187536, "grad_norm": 0.24021084606647491, "learning_rate": 2.156263208884386e-06, "loss": 0.5165, "step": 2576 }, { "epoch": 2.1704098820887143, "grad_norm": 0.211451455950737, "learning_rate": 2.1522321194310577e-06, "loss": 0.436, "step": 2577 }, { "epoch": 2.171252105558675, "grad_norm": 0.24177387356758118, "learning_rate": 2.148203767888455e-06, "loss": 0.4863, "step": 2578 }, { "epoch": 2.1720943290286354, "grad_norm": 0.21642626821994781, "learning_rate": 2.1441781581295286e-06, "loss": 0.4333, "step": 2579 }, { "epoch": 2.172936552498596, "grad_norm": 0.22844117879867554, "learning_rate": 2.1401552940245962e-06, "loss": 0.4472, "step": 2580 }, { "epoch": 2.173778775968557, "grad_norm": 0.21919569373130798, "learning_rate": 2.1361351794413334e-06, "loss": 0.4468, "step": 2581 }, { "epoch": 2.1746209994385177, "grad_norm": 0.22261032462120056, "learning_rate": 2.132117818244771e-06, "loss": 0.4504, "step": 2582 }, { "epoch": 2.1754632229084785, "grad_norm": 0.23934446275234222, "learning_rate": 2.1281032142972933e-06, "loss": 0.4866, "step": 2583 }, { "epoch": 2.1763054463784393, "grad_norm": 0.2313029170036316, "learning_rate": 2.124091371458638e-06, "loss": 0.4693, "step": 2584 }, { "epoch": 2.1771476698483996, "grad_norm": 0.3898187279701233, "learning_rate": 2.1200822935858807e-06, "loss": 0.5119, "step": 2585 }, { "epoch": 2.1779898933183603, "grad_norm": 0.20589038729667664, "learning_rate": 2.1160759845334483e-06, "loss": 0.4527, "step": 2586 }, { "epoch": 2.178832116788321, "grad_norm": 0.22007058560848236, "learning_rate": 2.1120724481530937e-06, "loss": 0.4264, "step": 2587 }, { "epoch": 2.179674340258282, "grad_norm": 0.23823048174381256, "learning_rate": 2.1080716882939145e-06, "loss": 0.4727, "step": 2588 }, { "epoch": 2.1805165637282427, "grad_norm": 0.22157374024391174, "learning_rate": 2.1040737088023323e-06, "loss": 0.4579, "step": 2589 }, { "epoch": 2.1813587871982034, "grad_norm": 0.2201603353023529, "learning_rate": 2.100078513522102e-06, "loss": 0.3984, "step": 2590 }, { "epoch": 2.182201010668164, "grad_norm": 0.21784386038780212, "learning_rate": 2.0960861062942956e-06, "loss": 0.4431, "step": 2591 }, { "epoch": 2.1830432341381245, "grad_norm": 0.22476977109909058, "learning_rate": 2.0920964909573065e-06, "loss": 0.4728, "step": 2592 }, { "epoch": 2.1838854576080853, "grad_norm": 0.2174328863620758, "learning_rate": 2.0881096713468435e-06, "loss": 0.4437, "step": 2593 }, { "epoch": 2.184727681078046, "grad_norm": 0.257700115442276, "learning_rate": 2.0841256512959314e-06, "loss": 0.4909, "step": 2594 }, { "epoch": 2.185569904548007, "grad_norm": 0.23174285888671875, "learning_rate": 2.080144434634898e-06, "loss": 0.4283, "step": 2595 }, { "epoch": 2.1864121280179676, "grad_norm": 0.2456558793783188, "learning_rate": 2.0761660251913795e-06, "loss": 0.4499, "step": 2596 }, { "epoch": 2.187254351487928, "grad_norm": 0.21995964646339417, "learning_rate": 2.0721904267903097e-06, "loss": 0.416, "step": 2597 }, { "epoch": 2.1880965749578887, "grad_norm": 0.2584898769855499, "learning_rate": 2.068217643253925e-06, "loss": 0.4512, "step": 2598 }, { "epoch": 2.1889387984278494, "grad_norm": 0.25370872020721436, "learning_rate": 2.0642476784017507e-06, "loss": 0.4836, "step": 2599 }, { "epoch": 2.18978102189781, "grad_norm": 0.22863511741161346, "learning_rate": 2.0602805360506044e-06, "loss": 0.4771, "step": 2600 }, { "epoch": 2.190623245367771, "grad_norm": 0.21971666812896729, "learning_rate": 2.056316220014588e-06, "loss": 0.4195, "step": 2601 }, { "epoch": 2.1914654688377317, "grad_norm": 0.2119145691394806, "learning_rate": 2.0523547341050913e-06, "loss": 0.4315, "step": 2602 }, { "epoch": 2.1923076923076925, "grad_norm": 0.21800579130649567, "learning_rate": 2.0483960821307757e-06, "loss": 0.4489, "step": 2603 }, { "epoch": 2.193149915777653, "grad_norm": 0.23993311822414398, "learning_rate": 2.0444402678975876e-06, "loss": 0.5027, "step": 2604 }, { "epoch": 2.1939921392476136, "grad_norm": 0.25369441509246826, "learning_rate": 2.040487295208732e-06, "loss": 0.4639, "step": 2605 }, { "epoch": 2.1948343627175744, "grad_norm": 0.23978962004184723, "learning_rate": 2.036537167864695e-06, "loss": 0.497, "step": 2606 }, { "epoch": 2.195676586187535, "grad_norm": 0.21981124579906464, "learning_rate": 2.0325898896632178e-06, "loss": 0.4363, "step": 2607 }, { "epoch": 2.196518809657496, "grad_norm": 0.2213098406791687, "learning_rate": 2.0286454643993097e-06, "loss": 0.4677, "step": 2608 }, { "epoch": 2.1973610331274567, "grad_norm": 0.21256223320960999, "learning_rate": 2.024703895865232e-06, "loss": 0.4486, "step": 2609 }, { "epoch": 2.198203256597417, "grad_norm": 0.22332869470119476, "learning_rate": 2.0207651878505e-06, "loss": 0.4316, "step": 2610 }, { "epoch": 2.1990454800673778, "grad_norm": 0.2228483259677887, "learning_rate": 2.0168293441418798e-06, "loss": 0.4331, "step": 2611 }, { "epoch": 2.1998877035373385, "grad_norm": 0.22422662377357483, "learning_rate": 2.012896368523386e-06, "loss": 0.4167, "step": 2612 }, { "epoch": 2.2007299270072993, "grad_norm": 0.26360195875167847, "learning_rate": 2.0089662647762716e-06, "loss": 0.4831, "step": 2613 }, { "epoch": 2.20157215047726, "grad_norm": 0.22850310802459717, "learning_rate": 2.0050390366790307e-06, "loss": 0.4319, "step": 2614 }, { "epoch": 2.202414373947221, "grad_norm": 0.22522534430027008, "learning_rate": 2.001114688007393e-06, "loss": 0.4345, "step": 2615 }, { "epoch": 2.203256597417181, "grad_norm": 0.2405281960964203, "learning_rate": 1.997193222534316e-06, "loss": 0.4621, "step": 2616 }, { "epoch": 2.204098820887142, "grad_norm": 0.2264600247144699, "learning_rate": 1.9932746440299926e-06, "loss": 0.4301, "step": 2617 }, { "epoch": 2.2049410443571027, "grad_norm": 0.23317106068134308, "learning_rate": 1.989358956261835e-06, "loss": 0.4355, "step": 2618 }, { "epoch": 2.2057832678270635, "grad_norm": 0.2321927547454834, "learning_rate": 1.9854461629944764e-06, "loss": 0.4455, "step": 2619 }, { "epoch": 2.2066254912970242, "grad_norm": 0.23446126282215118, "learning_rate": 1.981536267989766e-06, "loss": 0.464, "step": 2620 }, { "epoch": 2.207467714766985, "grad_norm": 0.22699198126792908, "learning_rate": 1.977629275006772e-06, "loss": 0.4594, "step": 2621 }, { "epoch": 2.2083099382369458, "grad_norm": 0.22004449367523193, "learning_rate": 1.9737251878017678e-06, "loss": 0.4217, "step": 2622 }, { "epoch": 2.209152161706906, "grad_norm": 0.22280080616474152, "learning_rate": 1.969824010128233e-06, "loss": 0.4546, "step": 2623 }, { "epoch": 2.209994385176867, "grad_norm": 0.2157144397497177, "learning_rate": 1.9659257457368503e-06, "loss": 0.4455, "step": 2624 }, { "epoch": 2.2108366086468276, "grad_norm": 0.22796675562858582, "learning_rate": 1.962030398375506e-06, "loss": 0.4848, "step": 2625 }, { "epoch": 2.2116788321167884, "grad_norm": 0.25354745984077454, "learning_rate": 1.9581379717892748e-06, "loss": 0.4711, "step": 2626 }, { "epoch": 2.212521055586749, "grad_norm": 0.2213258594274521, "learning_rate": 1.954248469720431e-06, "loss": 0.4304, "step": 2627 }, { "epoch": 2.2133632790567095, "grad_norm": 0.22778984904289246, "learning_rate": 1.950361895908427e-06, "loss": 0.4509, "step": 2628 }, { "epoch": 2.2142055025266703, "grad_norm": 0.21401827037334442, "learning_rate": 1.946478254089911e-06, "loss": 0.4379, "step": 2629 }, { "epoch": 2.215047725996631, "grad_norm": 0.2008509486913681, "learning_rate": 1.942597547998703e-06, "loss": 0.4054, "step": 2630 }, { "epoch": 2.215889949466592, "grad_norm": 0.2488488256931305, "learning_rate": 1.9387197813658092e-06, "loss": 0.5111, "step": 2631 }, { "epoch": 2.2167321729365526, "grad_norm": 0.2370956689119339, "learning_rate": 1.934844957919403e-06, "loss": 0.446, "step": 2632 }, { "epoch": 2.2175743964065133, "grad_norm": 0.22533829510211945, "learning_rate": 1.9309730813848302e-06, "loss": 0.4393, "step": 2633 }, { "epoch": 2.218416619876474, "grad_norm": 0.21753352880477905, "learning_rate": 1.927104155484602e-06, "loss": 0.4431, "step": 2634 }, { "epoch": 2.2192588433464344, "grad_norm": 0.21712705492973328, "learning_rate": 1.923238183938398e-06, "loss": 0.4058, "step": 2635 }, { "epoch": 2.220101066816395, "grad_norm": 0.2563003599643707, "learning_rate": 1.919375170463052e-06, "loss": 0.4822, "step": 2636 }, { "epoch": 2.220943290286356, "grad_norm": 0.23763945698738098, "learning_rate": 1.915515118772555e-06, "loss": 0.4436, "step": 2637 }, { "epoch": 2.2217855137563167, "grad_norm": 0.2260843813419342, "learning_rate": 1.9116580325780505e-06, "loss": 0.4424, "step": 2638 }, { "epoch": 2.2226277372262775, "grad_norm": 0.2255467176437378, "learning_rate": 1.9078039155878338e-06, "loss": 0.4331, "step": 2639 }, { "epoch": 2.2234699606962383, "grad_norm": 0.2240259349346161, "learning_rate": 1.9039527715073424e-06, "loss": 0.3976, "step": 2640 }, { "epoch": 2.2243121841661986, "grad_norm": 0.20436307787895203, "learning_rate": 1.9001046040391558e-06, "loss": 0.4504, "step": 2641 }, { "epoch": 2.2251544076361593, "grad_norm": 0.22403733432292938, "learning_rate": 1.8962594168829907e-06, "loss": 0.4642, "step": 2642 }, { "epoch": 2.22599663110612, "grad_norm": 0.2124016284942627, "learning_rate": 1.8924172137357038e-06, "loss": 0.4527, "step": 2643 }, { "epoch": 2.226838854576081, "grad_norm": 0.2040032595396042, "learning_rate": 1.8885779982912756e-06, "loss": 0.4207, "step": 2644 }, { "epoch": 2.2276810780460417, "grad_norm": 0.20801329612731934, "learning_rate": 1.884741774240823e-06, "loss": 0.4382, "step": 2645 }, { "epoch": 2.2285233015160024, "grad_norm": 0.21499739587306976, "learning_rate": 1.8809085452725744e-06, "loss": 0.4606, "step": 2646 }, { "epoch": 2.2293655249859627, "grad_norm": 0.2179976999759674, "learning_rate": 1.8770783150718913e-06, "loss": 0.4312, "step": 2647 }, { "epoch": 2.2302077484559235, "grad_norm": 0.21196310222148895, "learning_rate": 1.8732510873212428e-06, "loss": 0.4288, "step": 2648 }, { "epoch": 2.2310499719258843, "grad_norm": 0.20845340192317963, "learning_rate": 1.8694268657002197e-06, "loss": 0.3998, "step": 2649 }, { "epoch": 2.231892195395845, "grad_norm": 0.23754556477069855, "learning_rate": 1.865605653885516e-06, "loss": 0.4403, "step": 2650 }, { "epoch": 2.232734418865806, "grad_norm": 0.24576745927333832, "learning_rate": 1.8617874555509342e-06, "loss": 0.4533, "step": 2651 }, { "epoch": 2.2335766423357666, "grad_norm": 0.2329178750514984, "learning_rate": 1.8579722743673773e-06, "loss": 0.4249, "step": 2652 }, { "epoch": 2.2344188658057273, "grad_norm": 0.22083207964897156, "learning_rate": 1.8541601140028542e-06, "loss": 0.4886, "step": 2653 }, { "epoch": 2.2352610892756877, "grad_norm": 0.20678941905498505, "learning_rate": 1.8503509781224627e-06, "loss": 0.4174, "step": 2654 }, { "epoch": 2.2361033127456484, "grad_norm": 0.2378537505865097, "learning_rate": 1.8465448703883959e-06, "loss": 0.4627, "step": 2655 }, { "epoch": 2.236945536215609, "grad_norm": 0.2391277253627777, "learning_rate": 1.8427417944599325e-06, "loss": 0.4673, "step": 2656 }, { "epoch": 2.23778775968557, "grad_norm": 0.1945500671863556, "learning_rate": 1.8389417539934428e-06, "loss": 0.406, "step": 2657 }, { "epoch": 2.2386299831555307, "grad_norm": 0.23040063679218292, "learning_rate": 1.8351447526423728e-06, "loss": 0.47, "step": 2658 }, { "epoch": 2.239472206625491, "grad_norm": 0.242579847574234, "learning_rate": 1.8313507940572477e-06, "loss": 0.4669, "step": 2659 }, { "epoch": 2.240314430095452, "grad_norm": 0.22608298063278198, "learning_rate": 1.8275598818856682e-06, "loss": 0.4678, "step": 2660 }, { "epoch": 2.2411566535654126, "grad_norm": 0.21431788802146912, "learning_rate": 1.8237720197723075e-06, "loss": 0.4473, "step": 2661 }, { "epoch": 2.2419988770353734, "grad_norm": 0.2069939225912094, "learning_rate": 1.819987211358903e-06, "loss": 0.4449, "step": 2662 }, { "epoch": 2.242841100505334, "grad_norm": 0.22116661071777344, "learning_rate": 1.8162054602842621e-06, "loss": 0.4486, "step": 2663 }, { "epoch": 2.243683323975295, "grad_norm": 0.21528081595897675, "learning_rate": 1.812426770184243e-06, "loss": 0.3963, "step": 2664 }, { "epoch": 2.2445255474452557, "grad_norm": 0.2184889018535614, "learning_rate": 1.8086511446917715e-06, "loss": 0.4423, "step": 2665 }, { "epoch": 2.245367770915216, "grad_norm": 0.21412664651870728, "learning_rate": 1.8048785874368191e-06, "loss": 0.422, "step": 2666 }, { "epoch": 2.2462099943851768, "grad_norm": 0.23331928253173828, "learning_rate": 1.8011091020464138e-06, "loss": 0.4885, "step": 2667 }, { "epoch": 2.2470522178551375, "grad_norm": 0.20584741234779358, "learning_rate": 1.7973426921446258e-06, "loss": 0.4404, "step": 2668 }, { "epoch": 2.2478944413250983, "grad_norm": 0.22839532792568207, "learning_rate": 1.7935793613525693e-06, "loss": 0.459, "step": 2669 }, { "epoch": 2.248736664795059, "grad_norm": 0.21240678429603577, "learning_rate": 1.789819113288397e-06, "loss": 0.4346, "step": 2670 }, { "epoch": 2.24957888826502, "grad_norm": 0.21887412667274475, "learning_rate": 1.7860619515673034e-06, "loss": 0.4614, "step": 2671 }, { "epoch": 2.25042111173498, "grad_norm": 0.23613284528255463, "learning_rate": 1.7823078798015098e-06, "loss": 0.4708, "step": 2672 }, { "epoch": 2.251263335204941, "grad_norm": 0.20250287652015686, "learning_rate": 1.7785569016002686e-06, "loss": 0.3812, "step": 2673 }, { "epoch": 2.2521055586749017, "grad_norm": 0.2235361784696579, "learning_rate": 1.7748090205698565e-06, "loss": 0.48, "step": 2674 }, { "epoch": 2.2529477821448625, "grad_norm": 0.21719956398010254, "learning_rate": 1.7710642403135768e-06, "loss": 0.4603, "step": 2675 }, { "epoch": 2.2537900056148232, "grad_norm": 0.21418072283267975, "learning_rate": 1.7673225644317487e-06, "loss": 0.464, "step": 2676 }, { "epoch": 2.254632229084784, "grad_norm": 0.23324021697044373, "learning_rate": 1.7635839965217055e-06, "loss": 0.4399, "step": 2677 }, { "epoch": 2.2554744525547443, "grad_norm": 0.2531759440898895, "learning_rate": 1.7598485401777932e-06, "loss": 0.4578, "step": 2678 }, { "epoch": 2.256316676024705, "grad_norm": 0.22851026058197021, "learning_rate": 1.75611619899137e-06, "loss": 0.5035, "step": 2679 }, { "epoch": 2.257158899494666, "grad_norm": 0.19862686097621918, "learning_rate": 1.7523869765507928e-06, "loss": 0.4284, "step": 2680 }, { "epoch": 2.2580011229646266, "grad_norm": 0.2257077395915985, "learning_rate": 1.748660876441428e-06, "loss": 0.4892, "step": 2681 }, { "epoch": 2.2588433464345874, "grad_norm": 0.22375568747520447, "learning_rate": 1.7449379022456297e-06, "loss": 0.4538, "step": 2682 }, { "epoch": 2.259685569904548, "grad_norm": 0.24779602885246277, "learning_rate": 1.7412180575427572e-06, "loss": 0.4884, "step": 2683 }, { "epoch": 2.260527793374509, "grad_norm": 0.23613165318965912, "learning_rate": 1.7375013459091529e-06, "loss": 0.4767, "step": 2684 }, { "epoch": 2.2613700168444693, "grad_norm": 0.20717568695545197, "learning_rate": 1.7337877709181527e-06, "loss": 0.4213, "step": 2685 }, { "epoch": 2.26221224031443, "grad_norm": 0.2418050915002823, "learning_rate": 1.7300773361400746e-06, "loss": 0.4062, "step": 2686 }, { "epoch": 2.263054463784391, "grad_norm": 0.23134247958660126, "learning_rate": 1.7263700451422166e-06, "loss": 0.4946, "step": 2687 }, { "epoch": 2.2638966872543516, "grad_norm": 0.23937517404556274, "learning_rate": 1.7226659014888548e-06, "loss": 0.4578, "step": 2688 }, { "epoch": 2.2647389107243123, "grad_norm": 0.23540329933166504, "learning_rate": 1.7189649087412385e-06, "loss": 0.4861, "step": 2689 }, { "epoch": 2.2655811341942727, "grad_norm": 0.21406607329845428, "learning_rate": 1.7152670704575919e-06, "loss": 0.4255, "step": 2690 }, { "epoch": 2.2664233576642334, "grad_norm": 0.2364763766527176, "learning_rate": 1.711572390193102e-06, "loss": 0.4609, "step": 2691 }, { "epoch": 2.267265581134194, "grad_norm": 0.23578055202960968, "learning_rate": 1.7078808714999207e-06, "loss": 0.5032, "step": 2692 }, { "epoch": 2.268107804604155, "grad_norm": 0.2197696417570114, "learning_rate": 1.7041925179271584e-06, "loss": 0.4283, "step": 2693 }, { "epoch": 2.2689500280741157, "grad_norm": 0.23121149837970734, "learning_rate": 1.7005073330208881e-06, "loss": 0.4718, "step": 2694 }, { "epoch": 2.2697922515440765, "grad_norm": 0.21392446756362915, "learning_rate": 1.696825320324132e-06, "loss": 0.4661, "step": 2695 }, { "epoch": 2.2706344750140373, "grad_norm": 0.2154620885848999, "learning_rate": 1.6931464833768624e-06, "loss": 0.3998, "step": 2696 }, { "epoch": 2.2714766984839976, "grad_norm": 0.22883859276771545, "learning_rate": 1.689470825715998e-06, "loss": 0.4745, "step": 2697 }, { "epoch": 2.2723189219539583, "grad_norm": 0.22363759577274323, "learning_rate": 1.6857983508754056e-06, "loss": 0.435, "step": 2698 }, { "epoch": 2.273161145423919, "grad_norm": 0.2119286209344864, "learning_rate": 1.6821290623858865e-06, "loss": 0.4427, "step": 2699 }, { "epoch": 2.27400336889388, "grad_norm": 0.23841655254364014, "learning_rate": 1.6784629637751814e-06, "loss": 0.4959, "step": 2700 }, { "epoch": 2.2748455923638407, "grad_norm": 0.2380737066268921, "learning_rate": 1.6748000585679602e-06, "loss": 0.4505, "step": 2701 }, { "epoch": 2.2756878158338014, "grad_norm": 0.2227126657962799, "learning_rate": 1.6711403502858302e-06, "loss": 0.4366, "step": 2702 }, { "epoch": 2.2765300393037617, "grad_norm": 0.2185884714126587, "learning_rate": 1.6674838424473172e-06, "loss": 0.4321, "step": 2703 }, { "epoch": 2.2773722627737225, "grad_norm": 0.23536622524261475, "learning_rate": 1.6638305385678783e-06, "loss": 0.47, "step": 2704 }, { "epoch": 2.2782144862436833, "grad_norm": 0.21442575752735138, "learning_rate": 1.6601804421598787e-06, "loss": 0.4101, "step": 2705 }, { "epoch": 2.279056709713644, "grad_norm": 0.22663520276546478, "learning_rate": 1.6565335567326112e-06, "loss": 0.507, "step": 2706 }, { "epoch": 2.279898933183605, "grad_norm": 0.21832779049873352, "learning_rate": 1.6528898857922747e-06, "loss": 0.4097, "step": 2707 }, { "epoch": 2.2807411566535656, "grad_norm": 0.2377646118402481, "learning_rate": 1.6492494328419816e-06, "loss": 0.4791, "step": 2708 }, { "epoch": 2.281583380123526, "grad_norm": 0.22777646780014038, "learning_rate": 1.6456122013817477e-06, "loss": 0.509, "step": 2709 }, { "epoch": 2.2824256035934867, "grad_norm": 0.21122081577777863, "learning_rate": 1.6419781949084928e-06, "loss": 0.4613, "step": 2710 }, { "epoch": 2.2832678270634474, "grad_norm": 0.22042138874530792, "learning_rate": 1.6383474169160334e-06, "loss": 0.4771, "step": 2711 }, { "epoch": 2.284110050533408, "grad_norm": 0.21312664449214935, "learning_rate": 1.6347198708950884e-06, "loss": 0.4393, "step": 2712 }, { "epoch": 2.284952274003369, "grad_norm": 0.20261482894420624, "learning_rate": 1.631095560333264e-06, "loss": 0.4464, "step": 2713 }, { "epoch": 2.2857944974733297, "grad_norm": 0.2264494001865387, "learning_rate": 1.6274744887150562e-06, "loss": 0.4651, "step": 2714 }, { "epoch": 2.2866367209432905, "grad_norm": 0.22457510232925415, "learning_rate": 1.6238566595218475e-06, "loss": 0.4685, "step": 2715 }, { "epoch": 2.287478944413251, "grad_norm": 0.22349314391613007, "learning_rate": 1.6202420762319065e-06, "loss": 0.4585, "step": 2716 }, { "epoch": 2.2883211678832116, "grad_norm": 0.22813555598258972, "learning_rate": 1.6166307423203765e-06, "loss": 0.4415, "step": 2717 }, { "epoch": 2.2891633913531724, "grad_norm": 0.23154975473880768, "learning_rate": 1.6130226612592787e-06, "loss": 0.4567, "step": 2718 }, { "epoch": 2.290005614823133, "grad_norm": 0.24155132472515106, "learning_rate": 1.6094178365175044e-06, "loss": 0.4717, "step": 2719 }, { "epoch": 2.290847838293094, "grad_norm": 0.23024789988994598, "learning_rate": 1.6058162715608205e-06, "loss": 0.4304, "step": 2720 }, { "epoch": 2.2916900617630542, "grad_norm": 0.22668318450450897, "learning_rate": 1.6022179698518525e-06, "loss": 0.4672, "step": 2721 }, { "epoch": 2.292532285233015, "grad_norm": 0.2475437968969345, "learning_rate": 1.598622934850097e-06, "loss": 0.5047, "step": 2722 }, { "epoch": 2.2933745087029758, "grad_norm": 0.23964282870292664, "learning_rate": 1.595031170011898e-06, "loss": 0.46, "step": 2723 }, { "epoch": 2.2942167321729365, "grad_norm": 0.21064236760139465, "learning_rate": 1.591442678790467e-06, "loss": 0.4292, "step": 2724 }, { "epoch": 2.2950589556428973, "grad_norm": 0.24782468378543854, "learning_rate": 1.5878574646358608e-06, "loss": 0.5228, "step": 2725 }, { "epoch": 2.295901179112858, "grad_norm": 0.2155902236700058, "learning_rate": 1.584275530994991e-06, "loss": 0.4253, "step": 2726 }, { "epoch": 2.296743402582819, "grad_norm": 0.2140742540359497, "learning_rate": 1.580696881311611e-06, "loss": 0.494, "step": 2727 }, { "epoch": 2.297585626052779, "grad_norm": 0.20949889719486237, "learning_rate": 1.5771215190263183e-06, "loss": 0.4028, "step": 2728 }, { "epoch": 2.29842784952274, "grad_norm": 0.23589280247688293, "learning_rate": 1.573549447576549e-06, "loss": 0.4426, "step": 2729 }, { "epoch": 2.2992700729927007, "grad_norm": 0.21044345200061798, "learning_rate": 1.5699806703965787e-06, "loss": 0.4521, "step": 2730 }, { "epoch": 2.3001122964626615, "grad_norm": 0.2316303849220276, "learning_rate": 1.5664151909175124e-06, "loss": 0.4692, "step": 2731 }, { "epoch": 2.3009545199326222, "grad_norm": 0.24272464215755463, "learning_rate": 1.5628530125672848e-06, "loss": 0.4706, "step": 2732 }, { "epoch": 2.301796743402583, "grad_norm": 0.20875270664691925, "learning_rate": 1.5592941387706562e-06, "loss": 0.4345, "step": 2733 }, { "epoch": 2.3026389668725433, "grad_norm": 0.22224783897399902, "learning_rate": 1.555738572949214e-06, "loss": 0.465, "step": 2734 }, { "epoch": 2.303481190342504, "grad_norm": 0.2641976475715637, "learning_rate": 1.5521863185213626e-06, "loss": 0.5491, "step": 2735 }, { "epoch": 2.304323413812465, "grad_norm": 0.21460959315299988, "learning_rate": 1.5486373789023206e-06, "loss": 0.4115, "step": 2736 }, { "epoch": 2.3051656372824256, "grad_norm": 0.22224272787570953, "learning_rate": 1.5450917575041209e-06, "loss": 0.4539, "step": 2737 }, { "epoch": 2.3060078607523864, "grad_norm": 0.2087388038635254, "learning_rate": 1.54154945773561e-06, "loss": 0.4197, "step": 2738 }, { "epoch": 2.306850084222347, "grad_norm": 0.2051694542169571, "learning_rate": 1.538010483002435e-06, "loss": 0.4626, "step": 2739 }, { "epoch": 2.3076923076923075, "grad_norm": 0.2121002972126007, "learning_rate": 1.5344748367070534e-06, "loss": 0.441, "step": 2740 }, { "epoch": 2.3085345311622683, "grad_norm": 0.23802872002124786, "learning_rate": 1.5309425222487119e-06, "loss": 0.5235, "step": 2741 }, { "epoch": 2.309376754632229, "grad_norm": 0.22116675972938538, "learning_rate": 1.5274135430234654e-06, "loss": 0.4378, "step": 2742 }, { "epoch": 2.31021897810219, "grad_norm": 0.22681930661201477, "learning_rate": 1.5238879024241544e-06, "loss": 0.4264, "step": 2743 }, { "epoch": 2.3110612015721506, "grad_norm": 0.20690928399562836, "learning_rate": 1.5203656038404146e-06, "loss": 0.4234, "step": 2744 }, { "epoch": 2.3119034250421113, "grad_norm": 0.209259033203125, "learning_rate": 1.5168466506586654e-06, "loss": 0.4327, "step": 2745 }, { "epoch": 2.312745648512072, "grad_norm": 0.24167883396148682, "learning_rate": 1.5133310462621103e-06, "loss": 0.5191, "step": 2746 }, { "epoch": 2.3135878719820324, "grad_norm": 0.22800850868225098, "learning_rate": 1.509818794030733e-06, "loss": 0.4821, "step": 2747 }, { "epoch": 2.314430095451993, "grad_norm": 0.21234053373336792, "learning_rate": 1.506309897341297e-06, "loss": 0.3901, "step": 2748 }, { "epoch": 2.315272318921954, "grad_norm": 0.22482189536094666, "learning_rate": 1.502804359567337e-06, "loss": 0.4649, "step": 2749 }, { "epoch": 2.3161145423919147, "grad_norm": 0.22691449522972107, "learning_rate": 1.499302184079159e-06, "loss": 0.4373, "step": 2750 }, { "epoch": 2.3169567658618755, "grad_norm": 0.22310692071914673, "learning_rate": 1.4958033742438348e-06, "loss": 0.4373, "step": 2751 }, { "epoch": 2.317798989331836, "grad_norm": 0.21015821397304535, "learning_rate": 1.492307933425205e-06, "loss": 0.4068, "step": 2752 }, { "epoch": 2.3186412128017966, "grad_norm": 0.21774540841579437, "learning_rate": 1.4888158649838675e-06, "loss": 0.4485, "step": 2753 }, { "epoch": 2.3194834362717573, "grad_norm": 0.22782184183597565, "learning_rate": 1.4853271722771772e-06, "loss": 0.455, "step": 2754 }, { "epoch": 2.320325659741718, "grad_norm": 0.2116689234972, "learning_rate": 1.4818418586592448e-06, "loss": 0.4376, "step": 2755 }, { "epoch": 2.321167883211679, "grad_norm": 0.230124369263649, "learning_rate": 1.478359927480935e-06, "loss": 0.4638, "step": 2756 }, { "epoch": 2.3220101066816397, "grad_norm": 0.21921810507774353, "learning_rate": 1.4748813820898554e-06, "loss": 0.4312, "step": 2757 }, { "epoch": 2.3228523301516004, "grad_norm": 0.2241123616695404, "learning_rate": 1.4714062258303653e-06, "loss": 0.4808, "step": 2758 }, { "epoch": 2.3236945536215607, "grad_norm": 0.24824385344982147, "learning_rate": 1.4679344620435543e-06, "loss": 0.4844, "step": 2759 }, { "epoch": 2.3245367770915215, "grad_norm": 0.22403553128242493, "learning_rate": 1.4644660940672628e-06, "loss": 0.4523, "step": 2760 }, { "epoch": 2.3253790005614823, "grad_norm": 0.23550719022750854, "learning_rate": 1.4610011252360594e-06, "loss": 0.4784, "step": 2761 }, { "epoch": 2.326221224031443, "grad_norm": 0.1978815495967865, "learning_rate": 1.4575395588812452e-06, "loss": 0.4154, "step": 2762 }, { "epoch": 2.327063447501404, "grad_norm": 0.21280786395072937, "learning_rate": 1.454081398330855e-06, "loss": 0.48, "step": 2763 }, { "epoch": 2.3279056709713646, "grad_norm": 0.2087947279214859, "learning_rate": 1.450626646909639e-06, "loss": 0.4156, "step": 2764 }, { "epoch": 2.328747894441325, "grad_norm": 0.2108825296163559, "learning_rate": 1.4471753079390815e-06, "loss": 0.4352, "step": 2765 }, { "epoch": 2.3295901179112857, "grad_norm": 0.21255429089069366, "learning_rate": 1.4437273847373778e-06, "loss": 0.4754, "step": 2766 }, { "epoch": 2.3304323413812464, "grad_norm": 0.20749181509017944, "learning_rate": 1.4402828806194436e-06, "loss": 0.434, "step": 2767 }, { "epoch": 2.331274564851207, "grad_norm": 0.19894064962863922, "learning_rate": 1.4368417988969058e-06, "loss": 0.386, "step": 2768 }, { "epoch": 2.332116788321168, "grad_norm": 0.23795415461063385, "learning_rate": 1.4334041428781003e-06, "loss": 0.5171, "step": 2769 }, { "epoch": 2.3329590117911287, "grad_norm": 0.19911989569664001, "learning_rate": 1.429969915868068e-06, "loss": 0.4274, "step": 2770 }, { "epoch": 2.333801235261089, "grad_norm": 0.21800339221954346, "learning_rate": 1.4265391211685597e-06, "loss": 0.4509, "step": 2771 }, { "epoch": 2.33464345873105, "grad_norm": 0.2385159134864807, "learning_rate": 1.4231117620780188e-06, "loss": 0.4343, "step": 2772 }, { "epoch": 2.3354856822010106, "grad_norm": 0.21365594863891602, "learning_rate": 1.4196878418915894e-06, "loss": 0.4351, "step": 2773 }, { "epoch": 2.3363279056709714, "grad_norm": 0.22822968661785126, "learning_rate": 1.4162673639011065e-06, "loss": 0.4748, "step": 2774 }, { "epoch": 2.337170129140932, "grad_norm": 0.23817989230155945, "learning_rate": 1.4128503313951008e-06, "loss": 0.5103, "step": 2775 }, { "epoch": 2.338012352610893, "grad_norm": 0.2209175080060959, "learning_rate": 1.4094367476587867e-06, "loss": 0.4592, "step": 2776 }, { "epoch": 2.3388545760808537, "grad_norm": 0.23550374805927277, "learning_rate": 1.4060266159740627e-06, "loss": 0.427, "step": 2777 }, { "epoch": 2.339696799550814, "grad_norm": 0.19737476110458374, "learning_rate": 1.4026199396195078e-06, "loss": 0.4175, "step": 2778 }, { "epoch": 2.3405390230207748, "grad_norm": 0.22374357283115387, "learning_rate": 1.399216721870384e-06, "loss": 0.4956, "step": 2779 }, { "epoch": 2.3413812464907355, "grad_norm": 0.21555621922016144, "learning_rate": 1.3958169659986204e-06, "loss": 0.4468, "step": 2780 }, { "epoch": 2.3422234699606963, "grad_norm": 0.23126178979873657, "learning_rate": 1.3924206752728282e-06, "loss": 0.4473, "step": 2781 }, { "epoch": 2.343065693430657, "grad_norm": 0.2712365686893463, "learning_rate": 1.389027852958273e-06, "loss": 0.5407, "step": 2782 }, { "epoch": 2.3439079169006174, "grad_norm": 0.20937930047512054, "learning_rate": 1.385638502316899e-06, "loss": 0.4126, "step": 2783 }, { "epoch": 2.344750140370578, "grad_norm": 0.24225324392318726, "learning_rate": 1.3822526266073044e-06, "loss": 0.475, "step": 2784 }, { "epoch": 2.345592363840539, "grad_norm": 0.220139741897583, "learning_rate": 1.3788702290847517e-06, "loss": 0.3792, "step": 2785 }, { "epoch": 2.3464345873104997, "grad_norm": 0.23663264513015747, "learning_rate": 1.3754913130011566e-06, "loss": 0.536, "step": 2786 }, { "epoch": 2.3472768107804605, "grad_norm": 0.20419010519981384, "learning_rate": 1.3721158816050872e-06, "loss": 0.3968, "step": 2787 }, { "epoch": 2.3481190342504212, "grad_norm": 0.22789226472377777, "learning_rate": 1.3687439381417616e-06, "loss": 0.4493, "step": 2788 }, { "epoch": 2.348961257720382, "grad_norm": 0.22449129819869995, "learning_rate": 1.3653754858530477e-06, "loss": 0.4719, "step": 2789 }, { "epoch": 2.3498034811903423, "grad_norm": 0.212175652384758, "learning_rate": 1.3620105279774532e-06, "loss": 0.4529, "step": 2790 }, { "epoch": 2.350645704660303, "grad_norm": 0.23109833896160126, "learning_rate": 1.3586490677501269e-06, "loss": 0.4732, "step": 2791 }, { "epoch": 2.351487928130264, "grad_norm": 0.2255665361881256, "learning_rate": 1.3552911084028536e-06, "loss": 0.4291, "step": 2792 }, { "epoch": 2.3523301516002246, "grad_norm": 0.22925271093845367, "learning_rate": 1.3519366531640589e-06, "loss": 0.4485, "step": 2793 }, { "epoch": 2.3531723750701854, "grad_norm": 0.2320573776960373, "learning_rate": 1.3485857052587908e-06, "loss": 0.4799, "step": 2794 }, { "epoch": 2.354014598540146, "grad_norm": 0.20761503279209137, "learning_rate": 1.3452382679087307e-06, "loss": 0.4636, "step": 2795 }, { "epoch": 2.3548568220101065, "grad_norm": 0.2336365431547165, "learning_rate": 1.3418943443321807e-06, "loss": 0.4577, "step": 2796 }, { "epoch": 2.3556990454800673, "grad_norm": 0.2128058224916458, "learning_rate": 1.3385539377440709e-06, "loss": 0.3836, "step": 2797 }, { "epoch": 2.356541268950028, "grad_norm": 0.22745098173618317, "learning_rate": 1.3352170513559432e-06, "loss": 0.4906, "step": 2798 }, { "epoch": 2.357383492419989, "grad_norm": 0.23147542774677277, "learning_rate": 1.3318836883759634e-06, "loss": 0.463, "step": 2799 }, { "epoch": 2.3582257158899496, "grad_norm": 0.2209130972623825, "learning_rate": 1.3285538520088976e-06, "loss": 0.4375, "step": 2800 }, { "epoch": 2.3590679393599103, "grad_norm": 0.23008860647678375, "learning_rate": 1.3252275454561337e-06, "loss": 0.4871, "step": 2801 }, { "epoch": 2.3599101628298707, "grad_norm": 0.21160376071929932, "learning_rate": 1.3219047719156575e-06, "loss": 0.4506, "step": 2802 }, { "epoch": 2.3607523862998314, "grad_norm": 0.19938921928405762, "learning_rate": 1.318585534582064e-06, "loss": 0.4138, "step": 2803 }, { "epoch": 2.361594609769792, "grad_norm": 0.2405799776315689, "learning_rate": 1.3152698366465449e-06, "loss": 0.497, "step": 2804 }, { "epoch": 2.362436833239753, "grad_norm": 0.21737796068191528, "learning_rate": 1.3119576812968893e-06, "loss": 0.4694, "step": 2805 }, { "epoch": 2.3632790567097137, "grad_norm": 0.23105907440185547, "learning_rate": 1.30864907171748e-06, "loss": 0.4068, "step": 2806 }, { "epoch": 2.3641212801796745, "grad_norm": 0.2165161669254303, "learning_rate": 1.305344011089294e-06, "loss": 0.4432, "step": 2807 }, { "epoch": 2.3649635036496353, "grad_norm": 0.22276388108730316, "learning_rate": 1.3020425025898926e-06, "loss": 0.4447, "step": 2808 }, { "epoch": 2.3658057271195956, "grad_norm": 0.23289793729782104, "learning_rate": 1.2987445493934236e-06, "loss": 0.4646, "step": 2809 }, { "epoch": 2.3666479505895563, "grad_norm": 0.22700735926628113, "learning_rate": 1.295450154670615e-06, "loss": 0.4824, "step": 2810 }, { "epoch": 2.367490174059517, "grad_norm": 0.2119935154914856, "learning_rate": 1.292159321588778e-06, "loss": 0.4733, "step": 2811 }, { "epoch": 2.368332397529478, "grad_norm": 0.2053123116493225, "learning_rate": 1.288872053311795e-06, "loss": 0.4168, "step": 2812 }, { "epoch": 2.3691746209994387, "grad_norm": 0.21128658950328827, "learning_rate": 1.2855883530001228e-06, "loss": 0.4421, "step": 2813 }, { "epoch": 2.370016844469399, "grad_norm": 0.2092086672782898, "learning_rate": 1.282308223810786e-06, "loss": 0.4011, "step": 2814 }, { "epoch": 2.3708590679393597, "grad_norm": 0.22134679555892944, "learning_rate": 1.2790316688973809e-06, "loss": 0.4549, "step": 2815 }, { "epoch": 2.3717012914093205, "grad_norm": 0.2254960834980011, "learning_rate": 1.2757586914100612e-06, "loss": 0.5011, "step": 2816 }, { "epoch": 2.3725435148792813, "grad_norm": 0.23400871455669403, "learning_rate": 1.272489294495548e-06, "loss": 0.465, "step": 2817 }, { "epoch": 2.373385738349242, "grad_norm": 0.21689380705356598, "learning_rate": 1.2692234812971106e-06, "loss": 0.4575, "step": 2818 }, { "epoch": 2.374227961819203, "grad_norm": 0.21822021901607513, "learning_rate": 1.265961254954583e-06, "loss": 0.439, "step": 2819 }, { "epoch": 2.3750701852891636, "grad_norm": 0.21324209868907928, "learning_rate": 1.2627026186043423e-06, "loss": 0.4535, "step": 2820 }, { "epoch": 2.375912408759124, "grad_norm": 0.21242012083530426, "learning_rate": 1.2594475753793211e-06, "loss": 0.4402, "step": 2821 }, { "epoch": 2.3767546322290847, "grad_norm": 0.22092178463935852, "learning_rate": 1.256196128408993e-06, "loss": 0.4536, "step": 2822 }, { "epoch": 2.3775968556990454, "grad_norm": 0.22295038402080536, "learning_rate": 1.252948280819375e-06, "loss": 0.4585, "step": 2823 }, { "epoch": 2.378439079169006, "grad_norm": 0.2223598062992096, "learning_rate": 1.249704035733022e-06, "loss": 0.4629, "step": 2824 }, { "epoch": 2.379281302638967, "grad_norm": 0.23258928954601288, "learning_rate": 1.2464633962690304e-06, "loss": 0.4734, "step": 2825 }, { "epoch": 2.3801235261089277, "grad_norm": 0.2153090089559555, "learning_rate": 1.243226365543026e-06, "loss": 0.4737, "step": 2826 }, { "epoch": 2.3809657495788885, "grad_norm": 0.22382834553718567, "learning_rate": 1.239992946667165e-06, "loss": 0.4542, "step": 2827 }, { "epoch": 2.381807973048849, "grad_norm": 0.22419168055057526, "learning_rate": 1.2367631427501308e-06, "loss": 0.4641, "step": 2828 }, { "epoch": 2.3826501965188096, "grad_norm": 0.2243785262107849, "learning_rate": 1.2335369568971362e-06, "loss": 0.4734, "step": 2829 }, { "epoch": 2.3834924199887704, "grad_norm": 0.22873510420322418, "learning_rate": 1.2303143922099092e-06, "loss": 0.495, "step": 2830 }, { "epoch": 2.384334643458731, "grad_norm": 0.22396348416805267, "learning_rate": 1.2270954517867e-06, "loss": 0.448, "step": 2831 }, { "epoch": 2.385176866928692, "grad_norm": 0.21435068547725677, "learning_rate": 1.2238801387222716e-06, "loss": 0.4365, "step": 2832 }, { "epoch": 2.3860190903986522, "grad_norm": 0.21422205865383148, "learning_rate": 1.2206684561079035e-06, "loss": 0.4162, "step": 2833 }, { "epoch": 2.386861313868613, "grad_norm": 0.2292875200510025, "learning_rate": 1.2174604070313811e-06, "loss": 0.4486, "step": 2834 }, { "epoch": 2.3877035373385738, "grad_norm": 0.2135428488254547, "learning_rate": 1.2142559945769995e-06, "loss": 0.4364, "step": 2835 }, { "epoch": 2.3885457608085345, "grad_norm": 0.22384756803512573, "learning_rate": 1.211055221825554e-06, "loss": 0.5013, "step": 2836 }, { "epoch": 2.3893879842784953, "grad_norm": 0.23870252072811127, "learning_rate": 1.207858091854342e-06, "loss": 0.5024, "step": 2837 }, { "epoch": 2.390230207748456, "grad_norm": 0.22278691828250885, "learning_rate": 1.2046646077371615e-06, "loss": 0.4105, "step": 2838 }, { "epoch": 2.391072431218417, "grad_norm": 0.2292664796113968, "learning_rate": 1.2014747725443004e-06, "loss": 0.4534, "step": 2839 }, { "epoch": 2.391914654688377, "grad_norm": 0.23718619346618652, "learning_rate": 1.1982885893425455e-06, "loss": 0.499, "step": 2840 }, { "epoch": 2.392756878158338, "grad_norm": 0.20456264913082123, "learning_rate": 1.1951060611951615e-06, "loss": 0.4346, "step": 2841 }, { "epoch": 2.3935991016282987, "grad_norm": 0.2342398464679718, "learning_rate": 1.1919271911619106e-06, "loss": 0.4596, "step": 2842 }, { "epoch": 2.3944413250982595, "grad_norm": 0.22733108699321747, "learning_rate": 1.1887519822990296e-06, "loss": 0.4752, "step": 2843 }, { "epoch": 2.3952835485682202, "grad_norm": 0.21320438385009766, "learning_rate": 1.185580437659241e-06, "loss": 0.4483, "step": 2844 }, { "epoch": 2.3961257720381806, "grad_norm": 0.2183854579925537, "learning_rate": 1.1824125602917414e-06, "loss": 0.4788, "step": 2845 }, { "epoch": 2.3969679955081413, "grad_norm": 0.24755306541919708, "learning_rate": 1.1792483532422021e-06, "loss": 0.4483, "step": 2846 }, { "epoch": 2.397810218978102, "grad_norm": 0.23176369071006775, "learning_rate": 1.1760878195527642e-06, "loss": 0.4777, "step": 2847 }, { "epoch": 2.398652442448063, "grad_norm": 0.22684980928897858, "learning_rate": 1.1729309622620422e-06, "loss": 0.4178, "step": 2848 }, { "epoch": 2.3994946659180236, "grad_norm": 0.20656734704971313, "learning_rate": 1.1697777844051105e-06, "loss": 0.4212, "step": 2849 }, { "epoch": 2.4003368893879844, "grad_norm": 0.22643929719924927, "learning_rate": 1.1666282890135083e-06, "loss": 0.4753, "step": 2850 }, { "epoch": 2.401179112857945, "grad_norm": 0.21947984397411346, "learning_rate": 1.1634824791152334e-06, "loss": 0.4807, "step": 2851 }, { "epoch": 2.4020213363279055, "grad_norm": 0.214324951171875, "learning_rate": 1.1603403577347434e-06, "loss": 0.4024, "step": 2852 }, { "epoch": 2.4028635597978663, "grad_norm": 0.21958878636360168, "learning_rate": 1.1572019278929457e-06, "loss": 0.4388, "step": 2853 }, { "epoch": 2.403705783267827, "grad_norm": 0.23396573960781097, "learning_rate": 1.1540671926072012e-06, "loss": 0.4756, "step": 2854 }, { "epoch": 2.404548006737788, "grad_norm": 0.22927935421466827, "learning_rate": 1.1509361548913151e-06, "loss": 0.4649, "step": 2855 }, { "epoch": 2.4053902302077486, "grad_norm": 0.21047605574131012, "learning_rate": 1.147808817755544e-06, "loss": 0.4384, "step": 2856 }, { "epoch": 2.4062324536777093, "grad_norm": 0.22546419501304626, "learning_rate": 1.1446851842065804e-06, "loss": 0.4588, "step": 2857 }, { "epoch": 2.40707467714767, "grad_norm": 0.22898390889167786, "learning_rate": 1.1415652572475628e-06, "loss": 0.4325, "step": 2858 }, { "epoch": 2.4079169006176304, "grad_norm": 0.23377209901809692, "learning_rate": 1.1384490398780563e-06, "loss": 0.4748, "step": 2859 }, { "epoch": 2.408759124087591, "grad_norm": 0.22115401923656464, "learning_rate": 1.1353365350940688e-06, "loss": 0.4175, "step": 2860 }, { "epoch": 2.409601347557552, "grad_norm": 0.21956577897071838, "learning_rate": 1.1322277458880337e-06, "loss": 0.4547, "step": 2861 }, { "epoch": 2.4104435710275127, "grad_norm": 0.218822181224823, "learning_rate": 1.129122675248816e-06, "loss": 0.4634, "step": 2862 }, { "epoch": 2.4112857944974735, "grad_norm": 0.20647048950195312, "learning_rate": 1.1260213261617015e-06, "loss": 0.4325, "step": 2863 }, { "epoch": 2.412128017967434, "grad_norm": 0.2264227718114853, "learning_rate": 1.1229237016084005e-06, "loss": 0.4979, "step": 2864 }, { "epoch": 2.4129702414373946, "grad_norm": 0.2119198739528656, "learning_rate": 1.1198298045670402e-06, "loss": 0.4034, "step": 2865 }, { "epoch": 2.4138124649073553, "grad_norm": 0.2198447436094284, "learning_rate": 1.116739638012168e-06, "loss": 0.4471, "step": 2866 }, { "epoch": 2.414654688377316, "grad_norm": 0.21063226461410522, "learning_rate": 1.113653204914742e-06, "loss": 0.4504, "step": 2867 }, { "epoch": 2.415496911847277, "grad_norm": 0.20452992618083954, "learning_rate": 1.1105705082421303e-06, "loss": 0.4455, "step": 2868 }, { "epoch": 2.4163391353172377, "grad_norm": 0.22006185352802277, "learning_rate": 1.1074915509581086e-06, "loss": 0.4417, "step": 2869 }, { "epoch": 2.4171813587871984, "grad_norm": 0.2325064241886139, "learning_rate": 1.104416336022861e-06, "loss": 0.4757, "step": 2870 }, { "epoch": 2.4180235822571587, "grad_norm": 0.2233905792236328, "learning_rate": 1.1013448663929704e-06, "loss": 0.4923, "step": 2871 }, { "epoch": 2.4188658057271195, "grad_norm": 0.21179546415805817, "learning_rate": 1.0982771450214197e-06, "loss": 0.462, "step": 2872 }, { "epoch": 2.4197080291970803, "grad_norm": 0.21884697675704956, "learning_rate": 1.0952131748575855e-06, "loss": 0.4324, "step": 2873 }, { "epoch": 2.420550252667041, "grad_norm": 0.2328515350818634, "learning_rate": 1.0921529588472446e-06, "loss": 0.4428, "step": 2874 }, { "epoch": 2.421392476137002, "grad_norm": 0.22742263972759247, "learning_rate": 1.0890964999325566e-06, "loss": 0.4509, "step": 2875 }, { "epoch": 2.422234699606962, "grad_norm": 0.23213602602481842, "learning_rate": 1.0860438010520773e-06, "loss": 0.4186, "step": 2876 }, { "epoch": 2.423076923076923, "grad_norm": 0.23050396144390106, "learning_rate": 1.0829948651407374e-06, "loss": 0.4674, "step": 2877 }, { "epoch": 2.4239191465468837, "grad_norm": 0.21073007583618164, "learning_rate": 1.0799496951298595e-06, "loss": 0.4346, "step": 2878 }, { "epoch": 2.4247613700168444, "grad_norm": 0.21420925855636597, "learning_rate": 1.0769082939471382e-06, "loss": 0.4594, "step": 2879 }, { "epoch": 2.425603593486805, "grad_norm": 0.2333727926015854, "learning_rate": 1.0738706645166508e-06, "loss": 0.4678, "step": 2880 }, { "epoch": 2.426445816956766, "grad_norm": 0.20445296168327332, "learning_rate": 1.0708368097588435e-06, "loss": 0.4409, "step": 2881 }, { "epoch": 2.4272880404267267, "grad_norm": 0.23423421382904053, "learning_rate": 1.0678067325905362e-06, "loss": 0.4672, "step": 2882 }, { "epoch": 2.428130263896687, "grad_norm": 0.2212824821472168, "learning_rate": 1.0647804359249143e-06, "loss": 0.436, "step": 2883 }, { "epoch": 2.428972487366648, "grad_norm": 0.21209730207920074, "learning_rate": 1.0617579226715324e-06, "loss": 0.4217, "step": 2884 }, { "epoch": 2.4298147108366086, "grad_norm": 0.24588413536548615, "learning_rate": 1.0587391957363053e-06, "loss": 0.5099, "step": 2885 }, { "epoch": 2.4306569343065694, "grad_norm": 0.20285753905773163, "learning_rate": 1.0557242580215066e-06, "loss": 0.4433, "step": 2886 }, { "epoch": 2.43149915777653, "grad_norm": 0.2123468518257141, "learning_rate": 1.0527131124257677e-06, "loss": 0.4084, "step": 2887 }, { "epoch": 2.432341381246491, "grad_norm": 0.2119874507188797, "learning_rate": 1.0497057618440765e-06, "loss": 0.4422, "step": 2888 }, { "epoch": 2.4331836047164517, "grad_norm": 0.21684275567531586, "learning_rate": 1.0467022091677692e-06, "loss": 0.4509, "step": 2889 }, { "epoch": 2.434025828186412, "grad_norm": 0.2217414826154709, "learning_rate": 1.0437024572845317e-06, "loss": 0.4389, "step": 2890 }, { "epoch": 2.4348680516563728, "grad_norm": 0.2293258011341095, "learning_rate": 1.040706509078394e-06, "loss": 0.4594, "step": 2891 }, { "epoch": 2.4357102751263335, "grad_norm": 0.24163495004177094, "learning_rate": 1.037714367429734e-06, "loss": 0.4721, "step": 2892 }, { "epoch": 2.4365524985962943, "grad_norm": 0.21625536680221558, "learning_rate": 1.0347260352152644e-06, "loss": 0.4136, "step": 2893 }, { "epoch": 2.437394722066255, "grad_norm": 0.22497281432151794, "learning_rate": 1.0317415153080406e-06, "loss": 0.476, "step": 2894 }, { "epoch": 2.4382369455362154, "grad_norm": 0.21880336105823517, "learning_rate": 1.0287608105774456e-06, "loss": 0.4869, "step": 2895 }, { "epoch": 2.439079169006176, "grad_norm": 0.21237826347351074, "learning_rate": 1.025783923889202e-06, "loss": 0.4202, "step": 2896 }, { "epoch": 2.439921392476137, "grad_norm": 0.21691353619098663, "learning_rate": 1.0228108581053565e-06, "loss": 0.4887, "step": 2897 }, { "epoch": 2.4407636159460977, "grad_norm": 0.22694803774356842, "learning_rate": 1.019841616084286e-06, "loss": 0.4339, "step": 2898 }, { "epoch": 2.4416058394160585, "grad_norm": 0.21116051077842712, "learning_rate": 1.0168762006806886e-06, "loss": 0.4185, "step": 2899 }, { "epoch": 2.4424480628860192, "grad_norm": 0.2274029701948166, "learning_rate": 1.0139146147455842e-06, "loss": 0.4699, "step": 2900 }, { "epoch": 2.44329028635598, "grad_norm": 0.22564122080802917, "learning_rate": 1.0109568611263094e-06, "loss": 0.4152, "step": 2901 }, { "epoch": 2.4441325098259403, "grad_norm": 0.21974700689315796, "learning_rate": 1.0080029426665194e-06, "loss": 0.4201, "step": 2902 }, { "epoch": 2.444974733295901, "grad_norm": 0.2307429164648056, "learning_rate": 1.0050528622061805e-06, "loss": 0.4182, "step": 2903 }, { "epoch": 2.445816956765862, "grad_norm": 0.22359402477741241, "learning_rate": 1.002106622581569e-06, "loss": 0.4997, "step": 2904 }, { "epoch": 2.4466591802358226, "grad_norm": 0.2140013575553894, "learning_rate": 9.991642266252672e-07, "loss": 0.4086, "step": 2905 }, { "epoch": 2.4475014037057834, "grad_norm": 0.23775893449783325, "learning_rate": 9.96225677166166e-07, "loss": 0.4788, "step": 2906 }, { "epoch": 2.4483436271757437, "grad_norm": 0.21264496445655823, "learning_rate": 9.932909770294542e-07, "loss": 0.4333, "step": 2907 }, { "epoch": 2.4491858506457045, "grad_norm": 0.2076517790555954, "learning_rate": 9.903601290366217e-07, "loss": 0.4324, "step": 2908 }, { "epoch": 2.4500280741156653, "grad_norm": 0.2204224020242691, "learning_rate": 9.87433136005454e-07, "loss": 0.4521, "step": 2909 }, { "epoch": 2.450870297585626, "grad_norm": 0.23424261808395386, "learning_rate": 9.845100007500292e-07, "loss": 0.5205, "step": 2910 }, { "epoch": 2.451712521055587, "grad_norm": 0.18993841111660004, "learning_rate": 9.81590726080721e-07, "loss": 0.3636, "step": 2911 }, { "epoch": 2.4525547445255476, "grad_norm": 0.23375554382801056, "learning_rate": 9.786753148041871e-07, "loss": 0.4705, "step": 2912 }, { "epoch": 2.4533969679955083, "grad_norm": 0.23353996872901917, "learning_rate": 9.757637697233723e-07, "loss": 0.4649, "step": 2913 }, { "epoch": 2.4542391914654687, "grad_norm": 0.2393742799758911, "learning_rate": 9.728560936375032e-07, "loss": 0.4735, "step": 2914 }, { "epoch": 2.4550814149354294, "grad_norm": 0.23265178501605988, "learning_rate": 9.699522893420894e-07, "loss": 0.4495, "step": 2915 }, { "epoch": 2.45592363840539, "grad_norm": 0.20930179953575134, "learning_rate": 9.670523596289138e-07, "loss": 0.4285, "step": 2916 }, { "epoch": 2.456765861875351, "grad_norm": 0.20196101069450378, "learning_rate": 9.641563072860416e-07, "loss": 0.4485, "step": 2917 }, { "epoch": 2.4576080853453117, "grad_norm": 0.22859099507331848, "learning_rate": 9.61264135097799e-07, "loss": 0.4763, "step": 2918 }, { "epoch": 2.4584503088152725, "grad_norm": 0.21594616770744324, "learning_rate": 9.58375845844793e-07, "loss": 0.4322, "step": 2919 }, { "epoch": 2.4592925322852333, "grad_norm": 0.21915721893310547, "learning_rate": 9.55491442303889e-07, "loss": 0.4445, "step": 2920 }, { "epoch": 2.4601347557551936, "grad_norm": 0.21573758125305176, "learning_rate": 9.526109272482237e-07, "loss": 0.4402, "step": 2921 }, { "epoch": 2.4609769792251543, "grad_norm": 0.21713483333587646, "learning_rate": 9.497343034471896e-07, "loss": 0.5065, "step": 2922 }, { "epoch": 2.461819202695115, "grad_norm": 0.21134702861309052, "learning_rate": 9.468615736664405e-07, "loss": 0.4335, "step": 2923 }, { "epoch": 2.462661426165076, "grad_norm": 0.24229125678539276, "learning_rate": 9.439927406678845e-07, "loss": 0.5122, "step": 2924 }, { "epoch": 2.4635036496350367, "grad_norm": 0.22388233244419098, "learning_rate": 9.41127807209688e-07, "loss": 0.4797, "step": 2925 }, { "epoch": 2.464345873104997, "grad_norm": 0.19717565178871155, "learning_rate": 9.382667760462633e-07, "loss": 0.3763, "step": 2926 }, { "epoch": 2.4651880965749577, "grad_norm": 0.2242230474948883, "learning_rate": 9.354096499282728e-07, "loss": 0.4775, "step": 2927 }, { "epoch": 2.4660303200449185, "grad_norm": 0.21561893820762634, "learning_rate": 9.325564316026236e-07, "loss": 0.3865, "step": 2928 }, { "epoch": 2.4668725435148793, "grad_norm": 0.2625260651111603, "learning_rate": 9.297071238124683e-07, "loss": 0.4567, "step": 2929 }, { "epoch": 2.46771476698484, "grad_norm": 0.2294732630252838, "learning_rate": 9.268617292971982e-07, "loss": 0.4774, "step": 2930 }, { "epoch": 2.468556990454801, "grad_norm": 0.23237639665603638, "learning_rate": 9.240202507924412e-07, "loss": 0.4566, "step": 2931 }, { "epoch": 2.4693992139247616, "grad_norm": 0.23163242638111115, "learning_rate": 9.211826910300598e-07, "loss": 0.4513, "step": 2932 }, { "epoch": 2.470241437394722, "grad_norm": 0.22733403742313385, "learning_rate": 9.183490527381539e-07, "loss": 0.4837, "step": 2933 }, { "epoch": 2.4710836608646827, "grad_norm": 0.20523874461650848, "learning_rate": 9.155193386410466e-07, "loss": 0.439, "step": 2934 }, { "epoch": 2.4719258843346434, "grad_norm": 0.21069003641605377, "learning_rate": 9.126935514592949e-07, "loss": 0.4475, "step": 2935 }, { "epoch": 2.472768107804604, "grad_norm": 0.23113186657428741, "learning_rate": 9.098716939096719e-07, "loss": 0.4606, "step": 2936 }, { "epoch": 2.473610331274565, "grad_norm": 0.23173217475414276, "learning_rate": 9.070537687051817e-07, "loss": 0.5216, "step": 2937 }, { "epoch": 2.4744525547445253, "grad_norm": 0.21517056226730347, "learning_rate": 9.042397785550405e-07, "loss": 0.4667, "step": 2938 }, { "epoch": 2.475294778214486, "grad_norm": 0.23170128464698792, "learning_rate": 9.014297261646876e-07, "loss": 0.4474, "step": 2939 }, { "epoch": 2.476137001684447, "grad_norm": 0.22112274169921875, "learning_rate": 8.986236142357707e-07, "loss": 0.4283, "step": 2940 }, { "epoch": 2.4769792251544076, "grad_norm": 0.22455130517482758, "learning_rate": 8.958214454661529e-07, "loss": 0.4846, "step": 2941 }, { "epoch": 2.4778214486243684, "grad_norm": 0.22343380749225616, "learning_rate": 8.930232225499025e-07, "loss": 0.4687, "step": 2942 }, { "epoch": 2.478663672094329, "grad_norm": 0.22266903519630432, "learning_rate": 8.902289481772996e-07, "loss": 0.4556, "step": 2943 }, { "epoch": 2.47950589556429, "grad_norm": 0.22243323922157288, "learning_rate": 8.874386250348232e-07, "loss": 0.4603, "step": 2944 }, { "epoch": 2.4803481190342502, "grad_norm": 0.20521578192710876, "learning_rate": 8.846522558051563e-07, "loss": 0.4692, "step": 2945 }, { "epoch": 2.481190342504211, "grad_norm": 0.2179163545370102, "learning_rate": 8.818698431671774e-07, "loss": 0.4653, "step": 2946 }, { "epoch": 2.4820325659741718, "grad_norm": 0.23489288985729218, "learning_rate": 8.790913897959663e-07, "loss": 0.4353, "step": 2947 }, { "epoch": 2.4828747894441325, "grad_norm": 0.23459428548812866, "learning_rate": 8.763168983627912e-07, "loss": 0.467, "step": 2948 }, { "epoch": 2.4837170129140933, "grad_norm": 0.2222980111837387, "learning_rate": 8.735463715351139e-07, "loss": 0.4093, "step": 2949 }, { "epoch": 2.484559236384054, "grad_norm": 0.22328777611255646, "learning_rate": 8.70779811976582e-07, "loss": 0.4759, "step": 2950 }, { "epoch": 2.485401459854015, "grad_norm": 0.21624572575092316, "learning_rate": 8.680172223470329e-07, "loss": 0.4511, "step": 2951 }, { "epoch": 2.486243683323975, "grad_norm": 0.22853726148605347, "learning_rate": 8.652586053024836e-07, "loss": 0.4303, "step": 2952 }, { "epoch": 2.487085906793936, "grad_norm": 0.22206854820251465, "learning_rate": 8.625039634951354e-07, "loss": 0.4798, "step": 2953 }, { "epoch": 2.4879281302638967, "grad_norm": 0.20907264947891235, "learning_rate": 8.597532995733615e-07, "loss": 0.4066, "step": 2954 }, { "epoch": 2.4887703537338575, "grad_norm": 0.22885018587112427, "learning_rate": 8.570066161817176e-07, "loss": 0.4636, "step": 2955 }, { "epoch": 2.4896125772038182, "grad_norm": 0.22020630538463593, "learning_rate": 8.542639159609278e-07, "loss": 0.4488, "step": 2956 }, { "epoch": 2.4904548006737786, "grad_norm": 0.20688983798027039, "learning_rate": 8.515252015478915e-07, "loss": 0.4354, "step": 2957 }, { "epoch": 2.4912970241437393, "grad_norm": 0.22598083317279816, "learning_rate": 8.487904755756676e-07, "loss": 0.4608, "step": 2958 }, { "epoch": 2.4921392476137, "grad_norm": 0.23113945126533508, "learning_rate": 8.460597406734905e-07, "loss": 0.4471, "step": 2959 }, { "epoch": 2.492981471083661, "grad_norm": 0.21485038101673126, "learning_rate": 8.433329994667488e-07, "loss": 0.4389, "step": 2960 }, { "epoch": 2.4938236945536216, "grad_norm": 0.22982186079025269, "learning_rate": 8.406102545769989e-07, "loss": 0.4892, "step": 2961 }, { "epoch": 2.4946659180235824, "grad_norm": 0.1976630985736847, "learning_rate": 8.378915086219497e-07, "loss": 0.4053, "step": 2962 }, { "epoch": 2.495508141493543, "grad_norm": 0.21494325995445251, "learning_rate": 8.351767642154673e-07, "loss": 0.455, "step": 2963 }, { "epoch": 2.4963503649635035, "grad_norm": 0.22562402486801147, "learning_rate": 8.324660239675697e-07, "loss": 0.4639, "step": 2964 }, { "epoch": 2.4971925884334643, "grad_norm": 0.21593697369098663, "learning_rate": 8.297592904844282e-07, "loss": 0.4241, "step": 2965 }, { "epoch": 2.498034811903425, "grad_norm": 0.2131444662809372, "learning_rate": 8.270565663683583e-07, "loss": 0.4432, "step": 2966 }, { "epoch": 2.498877035373386, "grad_norm": 0.21333451569080353, "learning_rate": 8.243578542178227e-07, "loss": 0.4119, "step": 2967 }, { "epoch": 2.4997192588433466, "grad_norm": 0.22894816100597382, "learning_rate": 8.216631566274252e-07, "loss": 0.4484, "step": 2968 }, { "epoch": 2.500561482313307, "grad_norm": 0.2555752396583557, "learning_rate": 8.189724761879131e-07, "loss": 0.4769, "step": 2969 }, { "epoch": 2.501403705783268, "grad_norm": 0.21407730877399445, "learning_rate": 8.16285815486168e-07, "loss": 0.4544, "step": 2970 }, { "epoch": 2.5022459292532284, "grad_norm": 0.2127041071653366, "learning_rate": 8.13603177105211e-07, "loss": 0.4767, "step": 2971 }, { "epoch": 2.503088152723189, "grad_norm": 0.21714483201503754, "learning_rate": 8.109245636241892e-07, "loss": 0.4667, "step": 2972 }, { "epoch": 2.50393037619315, "grad_norm": 0.20753134787082672, "learning_rate": 8.082499776183883e-07, "loss": 0.367, "step": 2973 }, { "epoch": 2.5047725996631107, "grad_norm": 0.23908479511737823, "learning_rate": 8.05579421659215e-07, "loss": 0.5021, "step": 2974 }, { "epoch": 2.5056148231330715, "grad_norm": 0.22070732712745667, "learning_rate": 8.029128983142076e-07, "loss": 0.4316, "step": 2975 }, { "epoch": 2.506457046603032, "grad_norm": 0.20883601903915405, "learning_rate": 8.002504101470204e-07, "loss": 0.4117, "step": 2976 }, { "epoch": 2.5072992700729926, "grad_norm": 0.2151171863079071, "learning_rate": 7.975919597174342e-07, "loss": 0.4363, "step": 2977 }, { "epoch": 2.5081414935429533, "grad_norm": 0.2177562713623047, "learning_rate": 7.949375495813439e-07, "loss": 0.4384, "step": 2978 }, { "epoch": 2.508983717012914, "grad_norm": 0.20052863657474518, "learning_rate": 7.922871822907641e-07, "loss": 0.4392, "step": 2979 }, { "epoch": 2.509825940482875, "grad_norm": 0.19423501193523407, "learning_rate": 7.896408603938194e-07, "loss": 0.4016, "step": 2980 }, { "epoch": 2.5106681639528357, "grad_norm": 0.23165856301784515, "learning_rate": 7.869985864347424e-07, "loss": 0.51, "step": 2981 }, { "epoch": 2.5115103874227964, "grad_norm": 0.21626028418540955, "learning_rate": 7.843603629538804e-07, "loss": 0.4667, "step": 2982 }, { "epoch": 2.5123526108927567, "grad_norm": 0.2524791657924652, "learning_rate": 7.817261924876812e-07, "loss": 0.4003, "step": 2983 }, { "epoch": 2.5131948343627175, "grad_norm": 0.21956191956996918, "learning_rate": 7.790960775687001e-07, "loss": 0.462, "step": 2984 }, { "epoch": 2.5140370578326783, "grad_norm": 0.2243330031633377, "learning_rate": 7.764700207255904e-07, "loss": 0.4555, "step": 2985 }, { "epoch": 2.514879281302639, "grad_norm": 0.21564090251922607, "learning_rate": 7.738480244831042e-07, "loss": 0.452, "step": 2986 }, { "epoch": 2.5157215047726, "grad_norm": 0.201605886220932, "learning_rate": 7.71230091362089e-07, "loss": 0.4475, "step": 2987 }, { "epoch": 2.51656372824256, "grad_norm": 0.20789620280265808, "learning_rate": 7.686162238794898e-07, "loss": 0.4277, "step": 2988 }, { "epoch": 2.517405951712521, "grad_norm": 0.23049284517765045, "learning_rate": 7.660064245483384e-07, "loss": 0.4523, "step": 2989 }, { "epoch": 2.5182481751824817, "grad_norm": 0.2123447209596634, "learning_rate": 7.634006958777568e-07, "loss": 0.4275, "step": 2990 }, { "epoch": 2.5190903986524424, "grad_norm": 0.21662916243076324, "learning_rate": 7.607990403729526e-07, "loss": 0.4649, "step": 2991 }, { "epoch": 2.519932622122403, "grad_norm": 0.20006492733955383, "learning_rate": 7.582014605352206e-07, "loss": 0.4288, "step": 2992 }, { "epoch": 2.520774845592364, "grad_norm": 0.2165728062391281, "learning_rate": 7.556079588619341e-07, "loss": 0.4471, "step": 2993 }, { "epoch": 2.5216170690623247, "grad_norm": 0.20059868693351746, "learning_rate": 7.530185378465459e-07, "loss": 0.4197, "step": 2994 }, { "epoch": 2.522459292532285, "grad_norm": 0.24049893021583557, "learning_rate": 7.504331999785852e-07, "loss": 0.4863, "step": 2995 }, { "epoch": 2.523301516002246, "grad_norm": 0.20732012391090393, "learning_rate": 7.47851947743658e-07, "loss": 0.4281, "step": 2996 }, { "epoch": 2.5241437394722066, "grad_norm": 0.22026260197162628, "learning_rate": 7.452747836234392e-07, "loss": 0.4531, "step": 2997 }, { "epoch": 2.5249859629421674, "grad_norm": 0.1953624039888382, "learning_rate": 7.427017100956779e-07, "loss": 0.3904, "step": 2998 }, { "epoch": 2.525828186412128, "grad_norm": 0.2331678867340088, "learning_rate": 7.401327296341826e-07, "loss": 0.4362, "step": 2999 }, { "epoch": 2.5266704098820885, "grad_norm": 0.2290327548980713, "learning_rate": 7.375678447088347e-07, "loss": 0.4853, "step": 3000 }, { "epoch": 2.5275126333520497, "grad_norm": 0.2145089954137802, "learning_rate": 7.350070577855716e-07, "loss": 0.4681, "step": 3001 }, { "epoch": 2.52835485682201, "grad_norm": 0.21874764561653137, "learning_rate": 7.324503713263975e-07, "loss": 0.4581, "step": 3002 }, { "epoch": 2.5291970802919708, "grad_norm": 0.20200660824775696, "learning_rate": 7.298977877893688e-07, "loss": 0.4607, "step": 3003 }, { "epoch": 2.5300393037619315, "grad_norm": 0.21366097033023834, "learning_rate": 7.273493096285989e-07, "loss": 0.4435, "step": 3004 }, { "epoch": 2.5308815272318923, "grad_norm": 0.22368764877319336, "learning_rate": 7.24804939294253e-07, "loss": 0.4328, "step": 3005 }, { "epoch": 2.531723750701853, "grad_norm": 0.21564538776874542, "learning_rate": 7.222646792325516e-07, "loss": 0.4318, "step": 3006 }, { "epoch": 2.5325659741718134, "grad_norm": 0.23654761910438538, "learning_rate": 7.197285318857584e-07, "loss": 0.4876, "step": 3007 }, { "epoch": 2.533408197641774, "grad_norm": 0.24405063688755035, "learning_rate": 7.171964996921848e-07, "loss": 0.4772, "step": 3008 }, { "epoch": 2.534250421111735, "grad_norm": 0.21160666644573212, "learning_rate": 7.146685850861851e-07, "loss": 0.4502, "step": 3009 }, { "epoch": 2.5350926445816957, "grad_norm": 0.23612913489341736, "learning_rate": 7.121447904981571e-07, "loss": 0.4831, "step": 3010 }, { "epoch": 2.5359348680516565, "grad_norm": 0.21519958972930908, "learning_rate": 7.096251183545355e-07, "loss": 0.4339, "step": 3011 }, { "epoch": 2.5367770915216172, "grad_norm": 0.22563189268112183, "learning_rate": 7.071095710777925e-07, "loss": 0.4858, "step": 3012 }, { "epoch": 2.537619314991578, "grad_norm": 0.2238023728132248, "learning_rate": 7.045981510864319e-07, "loss": 0.4977, "step": 3013 }, { "epoch": 2.5384615384615383, "grad_norm": 0.2190610021352768, "learning_rate": 7.02090860794995e-07, "loss": 0.4599, "step": 3014 }, { "epoch": 2.539303761931499, "grad_norm": 0.19810840487480164, "learning_rate": 6.995877026140468e-07, "loss": 0.4551, "step": 3015 }, { "epoch": 2.54014598540146, "grad_norm": 0.20426884293556213, "learning_rate": 6.970886789501851e-07, "loss": 0.4184, "step": 3016 }, { "epoch": 2.5409882088714206, "grad_norm": 0.23381103575229645, "learning_rate": 6.945937922060259e-07, "loss": 0.504, "step": 3017 }, { "epoch": 2.5418304323413814, "grad_norm": 0.22545810043811798, "learning_rate": 6.921030447802146e-07, "loss": 0.4565, "step": 3018 }, { "epoch": 2.5426726558113417, "grad_norm": 0.22985078394412994, "learning_rate": 6.896164390674125e-07, "loss": 0.4471, "step": 3019 }, { "epoch": 2.5435148792813025, "grad_norm": 0.20497766137123108, "learning_rate": 6.871339774583025e-07, "loss": 0.4317, "step": 3020 }, { "epoch": 2.5443571027512633, "grad_norm": 0.21279610693454742, "learning_rate": 6.846556623395795e-07, "loss": 0.4677, "step": 3021 }, { "epoch": 2.545199326221224, "grad_norm": 0.21326163411140442, "learning_rate": 6.821814960939549e-07, "loss": 0.4513, "step": 3022 }, { "epoch": 2.546041549691185, "grad_norm": 0.19330334663391113, "learning_rate": 6.797114811001482e-07, "loss": 0.4112, "step": 3023 }, { "epoch": 2.5468837731611456, "grad_norm": 0.2364133596420288, "learning_rate": 6.772456197328919e-07, "loss": 0.5047, "step": 3024 }, { "epoch": 2.5477259966311063, "grad_norm": 0.21376802027225494, "learning_rate": 6.74783914362922e-07, "loss": 0.4585, "step": 3025 }, { "epoch": 2.5485682201010667, "grad_norm": 0.20153576135635376, "learning_rate": 6.723263673569796e-07, "loss": 0.403, "step": 3026 }, { "epoch": 2.5494104435710274, "grad_norm": 0.22697269916534424, "learning_rate": 6.698729810778065e-07, "loss": 0.4483, "step": 3027 }, { "epoch": 2.550252667040988, "grad_norm": 0.22424349188804626, "learning_rate": 6.674237578841486e-07, "loss": 0.4983, "step": 3028 }, { "epoch": 2.551094890510949, "grad_norm": 0.20160838961601257, "learning_rate": 6.649787001307451e-07, "loss": 0.3968, "step": 3029 }, { "epoch": 2.5519371139809097, "grad_norm": 0.23022699356079102, "learning_rate": 6.625378101683317e-07, "loss": 0.4559, "step": 3030 }, { "epoch": 2.55277933745087, "grad_norm": 0.2061414271593094, "learning_rate": 6.601010903436355e-07, "loss": 0.4378, "step": 3031 }, { "epoch": 2.5536215609208313, "grad_norm": 0.20317454636096954, "learning_rate": 6.57668542999379e-07, "loss": 0.4228, "step": 3032 }, { "epoch": 2.5544637843907916, "grad_norm": 0.22320309281349182, "learning_rate": 6.552401704742678e-07, "loss": 0.4476, "step": 3033 }, { "epoch": 2.5553060078607523, "grad_norm": 0.22818531095981598, "learning_rate": 6.528159751029988e-07, "loss": 0.5235, "step": 3034 }, { "epoch": 2.556148231330713, "grad_norm": 0.21867649257183075, "learning_rate": 6.503959592162468e-07, "loss": 0.4481, "step": 3035 }, { "epoch": 2.556990454800674, "grad_norm": 0.25423040986061096, "learning_rate": 6.479801251406748e-07, "loss": 0.5466, "step": 3036 }, { "epoch": 2.5578326782706347, "grad_norm": 0.20367974042892456, "learning_rate": 6.455684751989194e-07, "loss": 0.4456, "step": 3037 }, { "epoch": 2.558674901740595, "grad_norm": 0.2432841956615448, "learning_rate": 6.431610117095999e-07, "loss": 0.4755, "step": 3038 }, { "epoch": 2.5595171252105557, "grad_norm": 0.2185555398464203, "learning_rate": 6.40757736987307e-07, "loss": 0.4525, "step": 3039 }, { "epoch": 2.5603593486805165, "grad_norm": 0.2128622978925705, "learning_rate": 6.383586533426051e-07, "loss": 0.442, "step": 3040 }, { "epoch": 2.5612015721504773, "grad_norm": 0.2205616533756256, "learning_rate": 6.359637630820292e-07, "loss": 0.486, "step": 3041 }, { "epoch": 2.562043795620438, "grad_norm": 0.21860095858573914, "learning_rate": 6.335730685080838e-07, "loss": 0.4396, "step": 3042 }, { "epoch": 2.562886019090399, "grad_norm": 0.21992284059524536, "learning_rate": 6.311865719192384e-07, "loss": 0.4888, "step": 3043 }, { "epoch": 2.5637282425603596, "grad_norm": 0.19925783574581146, "learning_rate": 6.28804275609926e-07, "loss": 0.4109, "step": 3044 }, { "epoch": 2.56457046603032, "grad_norm": 0.22550450265407562, "learning_rate": 6.26426181870542e-07, "loss": 0.4542, "step": 3045 }, { "epoch": 2.5654126895002807, "grad_norm": 0.22123433649539948, "learning_rate": 6.24052292987442e-07, "loss": 0.4539, "step": 3046 }, { "epoch": 2.5662549129702414, "grad_norm": 0.21344059705734253, "learning_rate": 6.216826112429391e-07, "loss": 0.4636, "step": 3047 }, { "epoch": 2.567097136440202, "grad_norm": 0.21459849178791046, "learning_rate": 6.193171389152996e-07, "loss": 0.4586, "step": 3048 }, { "epoch": 2.567939359910163, "grad_norm": 0.22050870954990387, "learning_rate": 6.169558782787438e-07, "loss": 0.461, "step": 3049 }, { "epoch": 2.5687815833801233, "grad_norm": 0.22548720240592957, "learning_rate": 6.145988316034441e-07, "loss": 0.4697, "step": 3050 }, { "epoch": 2.569623806850084, "grad_norm": 0.21141058206558228, "learning_rate": 6.122460011555187e-07, "loss": 0.4619, "step": 3051 }, { "epoch": 2.570466030320045, "grad_norm": 0.21433334052562714, "learning_rate": 6.098973891970373e-07, "loss": 0.431, "step": 3052 }, { "epoch": 2.5713082537900056, "grad_norm": 0.20310567319393158, "learning_rate": 6.075529979860068e-07, "loss": 0.4185, "step": 3053 }, { "epoch": 2.5721504772599664, "grad_norm": 0.2397291511297226, "learning_rate": 6.052128297763804e-07, "loss": 0.4547, "step": 3054 }, { "epoch": 2.572992700729927, "grad_norm": 0.21833814680576324, "learning_rate": 6.028768868180523e-07, "loss": 0.4689, "step": 3055 }, { "epoch": 2.573834924199888, "grad_norm": 0.22176292538642883, "learning_rate": 6.005451713568505e-07, "loss": 0.4374, "step": 3056 }, { "epoch": 2.5746771476698482, "grad_norm": 0.22804497182369232, "learning_rate": 5.982176856345445e-07, "loss": 0.4526, "step": 3057 }, { "epoch": 2.575519371139809, "grad_norm": 0.22202150523662567, "learning_rate": 5.958944318888287e-07, "loss": 0.453, "step": 3058 }, { "epoch": 2.5763615946097698, "grad_norm": 0.22391051054000854, "learning_rate": 5.935754123533378e-07, "loss": 0.4387, "step": 3059 }, { "epoch": 2.5772038180797305, "grad_norm": 0.2108563631772995, "learning_rate": 5.912606292576284e-07, "loss": 0.4436, "step": 3060 }, { "epoch": 2.5780460415496913, "grad_norm": 0.22444257140159607, "learning_rate": 5.889500848271901e-07, "loss": 0.4095, "step": 3061 }, { "epoch": 2.5788882650196516, "grad_norm": 0.2147800475358963, "learning_rate": 5.866437812834325e-07, "loss": 0.4839, "step": 3062 }, { "epoch": 2.579730488489613, "grad_norm": 0.2193182408809662, "learning_rate": 5.843417208436908e-07, "loss": 0.4751, "step": 3063 }, { "epoch": 2.580572711959573, "grad_norm": 0.2143584042787552, "learning_rate": 5.82043905721218e-07, "loss": 0.4779, "step": 3064 }, { "epoch": 2.581414935429534, "grad_norm": 0.21984893083572388, "learning_rate": 5.797503381251896e-07, "loss": 0.4566, "step": 3065 }, { "epoch": 2.5822571588994947, "grad_norm": 0.21764080226421356, "learning_rate": 5.774610202606939e-07, "loss": 0.4686, "step": 3066 }, { "epoch": 2.5830993823694555, "grad_norm": 0.2286331057548523, "learning_rate": 5.751759543287355e-07, "loss": 0.4753, "step": 3067 }, { "epoch": 2.5839416058394162, "grad_norm": 0.2097063660621643, "learning_rate": 5.728951425262292e-07, "loss": 0.4004, "step": 3068 }, { "epoch": 2.5847838293093766, "grad_norm": 0.20624986290931702, "learning_rate": 5.706185870460018e-07, "loss": 0.4357, "step": 3069 }, { "epoch": 2.5856260527793373, "grad_norm": 0.21064163744449615, "learning_rate": 5.683462900767873e-07, "loss": 0.4506, "step": 3070 }, { "epoch": 2.586468276249298, "grad_norm": 0.20571066439151764, "learning_rate": 5.660782538032245e-07, "loss": 0.4418, "step": 3071 }, { "epoch": 2.587310499719259, "grad_norm": 0.2190343141555786, "learning_rate": 5.63814480405856e-07, "loss": 0.4508, "step": 3072 }, { "epoch": 2.5881527231892196, "grad_norm": 0.2067871391773224, "learning_rate": 5.61554972061128e-07, "loss": 0.432, "step": 3073 }, { "epoch": 2.5889949466591804, "grad_norm": 0.19997844099998474, "learning_rate": 5.592997309413834e-07, "loss": 0.4258, "step": 3074 }, { "epoch": 2.589837170129141, "grad_norm": 0.22065198421478271, "learning_rate": 5.570487592148666e-07, "loss": 0.479, "step": 3075 }, { "epoch": 2.5906793935991015, "grad_norm": 0.20657001435756683, "learning_rate": 5.548020590457098e-07, "loss": 0.4471, "step": 3076 }, { "epoch": 2.5915216170690623, "grad_norm": 0.21454353630542755, "learning_rate": 5.525596325939469e-07, "loss": 0.4588, "step": 3077 }, { "epoch": 2.592363840539023, "grad_norm": 0.21188409626483917, "learning_rate": 5.503214820154979e-07, "loss": 0.4195, "step": 3078 }, { "epoch": 2.593206064008984, "grad_norm": 0.2236805558204651, "learning_rate": 5.480876094621734e-07, "loss": 0.4438, "step": 3079 }, { "epoch": 2.5940482874789446, "grad_norm": 0.21946339309215546, "learning_rate": 5.458580170816713e-07, "loss": 0.4782, "step": 3080 }, { "epoch": 2.594890510948905, "grad_norm": 0.20850329101085663, "learning_rate": 5.436327070175729e-07, "loss": 0.4078, "step": 3081 }, { "epoch": 2.5957327344188657, "grad_norm": 0.26670852303504944, "learning_rate": 5.414116814093434e-07, "loss": 0.4915, "step": 3082 }, { "epoch": 2.5965749578888264, "grad_norm": 0.21706858277320862, "learning_rate": 5.391949423923298e-07, "loss": 0.4078, "step": 3083 }, { "epoch": 2.597417181358787, "grad_norm": 0.22960534691810608, "learning_rate": 5.369824920977567e-07, "loss": 0.4382, "step": 3084 }, { "epoch": 2.598259404828748, "grad_norm": 0.2265661209821701, "learning_rate": 5.347743326527255e-07, "loss": 0.4748, "step": 3085 }, { "epoch": 2.5991016282987087, "grad_norm": 0.22000598907470703, "learning_rate": 5.325704661802106e-07, "loss": 0.4843, "step": 3086 }, { "epoch": 2.5999438517686695, "grad_norm": 0.2288132607936859, "learning_rate": 5.303708947990638e-07, "loss": 0.4665, "step": 3087 }, { "epoch": 2.60078607523863, "grad_norm": 0.21303462982177734, "learning_rate": 5.281756206240035e-07, "loss": 0.4202, "step": 3088 }, { "epoch": 2.6016282987085906, "grad_norm": 0.21691040694713593, "learning_rate": 5.25984645765617e-07, "loss": 0.4741, "step": 3089 }, { "epoch": 2.6024705221785513, "grad_norm": 0.2075762301683426, "learning_rate": 5.237979723303582e-07, "loss": 0.4447, "step": 3090 }, { "epoch": 2.603312745648512, "grad_norm": 0.2183479517698288, "learning_rate": 5.216156024205482e-07, "loss": 0.4543, "step": 3091 }, { "epoch": 2.604154969118473, "grad_norm": 0.22463111579418182, "learning_rate": 5.194375381343664e-07, "loss": 0.433, "step": 3092 }, { "epoch": 2.604997192588433, "grad_norm": 0.24642163515090942, "learning_rate": 5.172637815658583e-07, "loss": 0.4704, "step": 3093 }, { "epoch": 2.6058394160583944, "grad_norm": 0.2192465364933014, "learning_rate": 5.150943348049198e-07, "loss": 0.4284, "step": 3094 }, { "epoch": 2.6066816395283547, "grad_norm": 0.23213103413581848, "learning_rate": 5.129291999373109e-07, "loss": 0.516, "step": 3095 }, { "epoch": 2.6075238629983155, "grad_norm": 0.20792293548583984, "learning_rate": 5.107683790446411e-07, "loss": 0.4246, "step": 3096 }, { "epoch": 2.6083660864682763, "grad_norm": 0.21721170842647552, "learning_rate": 5.086118742043761e-07, "loss": 0.4821, "step": 3097 }, { "epoch": 2.609208309938237, "grad_norm": 0.20097333192825317, "learning_rate": 5.064596874898292e-07, "loss": 0.4019, "step": 3098 }, { "epoch": 2.610050533408198, "grad_norm": 0.2210666537284851, "learning_rate": 5.04311820970163e-07, "loss": 0.4882, "step": 3099 }, { "epoch": 2.610892756878158, "grad_norm": 0.21812084317207336, "learning_rate": 5.021682767103858e-07, "loss": 0.4514, "step": 3100 }, { "epoch": 2.611734980348119, "grad_norm": 0.20964927971363068, "learning_rate": 5.000290567713533e-07, "loss": 0.4508, "step": 3101 }, { "epoch": 2.6125772038180797, "grad_norm": 0.2203534096479416, "learning_rate": 4.978941632097612e-07, "loss": 0.4334, "step": 3102 }, { "epoch": 2.6134194272880404, "grad_norm": 0.22487181425094604, "learning_rate": 4.957635980781445e-07, "loss": 0.4337, "step": 3103 }, { "epoch": 2.614261650758001, "grad_norm": 0.22125323116779327, "learning_rate": 4.936373634248792e-07, "loss": 0.4373, "step": 3104 }, { "epoch": 2.615103874227962, "grad_norm": 0.2111453264951706, "learning_rate": 4.915154612941781e-07, "loss": 0.4268, "step": 3105 }, { "epoch": 2.6159460976979227, "grad_norm": 0.23265127837657928, "learning_rate": 4.893978937260868e-07, "loss": 0.462, "step": 3106 }, { "epoch": 2.616788321167883, "grad_norm": 0.21938958764076233, "learning_rate": 4.872846627564842e-07, "loss": 0.4841, "step": 3107 }, { "epoch": 2.617630544637844, "grad_norm": 0.21520966291427612, "learning_rate": 4.851757704170796e-07, "loss": 0.4755, "step": 3108 }, { "epoch": 2.6184727681078046, "grad_norm": 0.23749597370624542, "learning_rate": 4.830712187354125e-07, "loss": 0.4707, "step": 3109 }, { "epoch": 2.6193149915777654, "grad_norm": 0.21514321863651276, "learning_rate": 4.809710097348469e-07, "loss": 0.4051, "step": 3110 }, { "epoch": 2.620157215047726, "grad_norm": 0.22925396263599396, "learning_rate": 4.788751454345763e-07, "loss": 0.4322, "step": 3111 }, { "epoch": 2.6209994385176865, "grad_norm": 0.2313118875026703, "learning_rate": 4.767836278496085e-07, "loss": 0.4593, "step": 3112 }, { "epoch": 2.6218416619876472, "grad_norm": 0.20012398064136505, "learning_rate": 4.7469645899078153e-07, "loss": 0.4328, "step": 3113 }, { "epoch": 2.622683885457608, "grad_norm": 0.21433241665363312, "learning_rate": 4.726136408647464e-07, "loss": 0.4568, "step": 3114 }, { "epoch": 2.6235261089275688, "grad_norm": 0.20200945436954498, "learning_rate": 4.7053517547397454e-07, "loss": 0.443, "step": 3115 }, { "epoch": 2.6243683323975295, "grad_norm": 0.20266477763652802, "learning_rate": 4.6846106481675035e-07, "loss": 0.4516, "step": 3116 }, { "epoch": 2.6252105558674903, "grad_norm": 0.19698618352413177, "learning_rate": 4.663913108871726e-07, "loss": 0.4322, "step": 3117 }, { "epoch": 2.626052779337451, "grad_norm": 0.21505902707576752, "learning_rate": 4.643259156751506e-07, "loss": 0.4676, "step": 3118 }, { "epoch": 2.6268950028074114, "grad_norm": 0.22126758098602295, "learning_rate": 4.622648811664049e-07, "loss": 0.4823, "step": 3119 }, { "epoch": 2.627737226277372, "grad_norm": 0.2000422477722168, "learning_rate": 4.60208209342462e-07, "loss": 0.431, "step": 3120 }, { "epoch": 2.628579449747333, "grad_norm": 0.21291717886924744, "learning_rate": 4.581559021806542e-07, "loss": 0.4637, "step": 3121 }, { "epoch": 2.6294216732172937, "grad_norm": 0.22120904922485352, "learning_rate": 4.561079616541164e-07, "loss": 0.426, "step": 3122 }, { "epoch": 2.6302638966872545, "grad_norm": 0.21242111921310425, "learning_rate": 4.540643897317887e-07, "loss": 0.4441, "step": 3123 }, { "epoch": 2.631106120157215, "grad_norm": 0.20309428870677948, "learning_rate": 4.520251883784077e-07, "loss": 0.4161, "step": 3124 }, { "epoch": 2.631948343627176, "grad_norm": 0.22438295185565948, "learning_rate": 4.4999035955450964e-07, "loss": 0.523, "step": 3125 }, { "epoch": 2.6327905670971363, "grad_norm": 0.21269455552101135, "learning_rate": 4.4795990521642684e-07, "loss": 0.4392, "step": 3126 }, { "epoch": 2.633632790567097, "grad_norm": 0.21824797987937927, "learning_rate": 4.459338273162844e-07, "loss": 0.4958, "step": 3127 }, { "epoch": 2.634475014037058, "grad_norm": 0.20907874405384064, "learning_rate": 4.439121278020031e-07, "loss": 0.47, "step": 3128 }, { "epoch": 2.6353172375070186, "grad_norm": 0.20027025043964386, "learning_rate": 4.4189480861729137e-07, "loss": 0.4214, "step": 3129 }, { "epoch": 2.6361594609769794, "grad_norm": 0.20055273175239563, "learning_rate": 4.3988187170164673e-07, "loss": 0.4242, "step": 3130 }, { "epoch": 2.6370016844469397, "grad_norm": 0.20994049310684204, "learning_rate": 4.378733189903528e-07, "loss": 0.4587, "step": 3131 }, { "epoch": 2.6378439079169005, "grad_norm": 0.2281000167131424, "learning_rate": 4.35869152414482e-07, "loss": 0.4691, "step": 3132 }, { "epoch": 2.6386861313868613, "grad_norm": 0.23304086923599243, "learning_rate": 4.3386937390088366e-07, "loss": 0.5011, "step": 3133 }, { "epoch": 2.639528354856822, "grad_norm": 0.21226024627685547, "learning_rate": 4.3187398537219593e-07, "loss": 0.426, "step": 3134 }, { "epoch": 2.640370578326783, "grad_norm": 0.20565003156661987, "learning_rate": 4.2988298874682754e-07, "loss": 0.4298, "step": 3135 }, { "epoch": 2.6412128017967436, "grad_norm": 0.2134355902671814, "learning_rate": 4.278963859389723e-07, "loss": 0.472, "step": 3136 }, { "epoch": 2.6420550252667043, "grad_norm": 0.20791460573673248, "learning_rate": 4.259141788585947e-07, "loss": 0.4638, "step": 3137 }, { "epoch": 2.6428972487366647, "grad_norm": 0.20309992134571075, "learning_rate": 4.239363694114368e-07, "loss": 0.4491, "step": 3138 }, { "epoch": 2.6437394722066254, "grad_norm": 0.2237853854894638, "learning_rate": 4.2196295949901044e-07, "loss": 0.4337, "step": 3139 }, { "epoch": 2.644581695676586, "grad_norm": 0.19258077442646027, "learning_rate": 4.1999395101859796e-07, "loss": 0.427, "step": 3140 }, { "epoch": 2.645423919146547, "grad_norm": 0.2164791375398636, "learning_rate": 4.1802934586324897e-07, "loss": 0.4735, "step": 3141 }, { "epoch": 2.6462661426165077, "grad_norm": 0.22637014091014862, "learning_rate": 4.160691459217825e-07, "loss": 0.442, "step": 3142 }, { "epoch": 2.647108366086468, "grad_norm": 0.21842758357524872, "learning_rate": 4.1411335307878056e-07, "loss": 0.4389, "step": 3143 }, { "epoch": 2.647950589556429, "grad_norm": 0.22120113670825958, "learning_rate": 4.1216196921458786e-07, "loss": 0.464, "step": 3144 }, { "epoch": 2.6487928130263896, "grad_norm": 0.2194875329732895, "learning_rate": 4.102149962053098e-07, "loss": 0.4612, "step": 3145 }, { "epoch": 2.6496350364963503, "grad_norm": 0.2001914083957672, "learning_rate": 4.0827243592281294e-07, "loss": 0.4422, "step": 3146 }, { "epoch": 2.650477259966311, "grad_norm": 0.24458694458007812, "learning_rate": 4.0633429023472004e-07, "loss": 0.4905, "step": 3147 }, { "epoch": 2.651319483436272, "grad_norm": 0.20347802340984344, "learning_rate": 4.044005610044094e-07, "loss": 0.4366, "step": 3148 }, { "epoch": 2.6521617069062327, "grad_norm": 0.21278584003448486, "learning_rate": 4.0247125009101275e-07, "loss": 0.4814, "step": 3149 }, { "epoch": 2.653003930376193, "grad_norm": 0.20905163884162903, "learning_rate": 4.0054635934941633e-07, "loss": 0.4742, "step": 3150 }, { "epoch": 2.6538461538461537, "grad_norm": 0.19769923388957977, "learning_rate": 3.986258906302543e-07, "loss": 0.4293, "step": 3151 }, { "epoch": 2.6546883773161145, "grad_norm": 0.22431741654872894, "learning_rate": 3.967098457799118e-07, "loss": 0.4894, "step": 3152 }, { "epoch": 2.6555306007860753, "grad_norm": 0.21026840806007385, "learning_rate": 3.947982266405159e-07, "loss": 0.4542, "step": 3153 }, { "epoch": 2.656372824256036, "grad_norm": 0.23965893685817719, "learning_rate": 3.928910350499454e-07, "loss": 0.4694, "step": 3154 }, { "epoch": 2.6572150477259964, "grad_norm": 0.19927382469177246, "learning_rate": 3.9098827284181683e-07, "loss": 0.411, "step": 3155 }, { "epoch": 2.6580572711959576, "grad_norm": 0.22473274171352386, "learning_rate": 3.890899418454913e-07, "loss": 0.4614, "step": 3156 }, { "epoch": 2.658899494665918, "grad_norm": 0.2284875363111496, "learning_rate": 3.871960438860689e-07, "loss": 0.4852, "step": 3157 }, { "epoch": 2.6597417181358787, "grad_norm": 0.213897243142128, "learning_rate": 3.8530658078438754e-07, "loss": 0.4523, "step": 3158 }, { "epoch": 2.6605839416058394, "grad_norm": 0.21387426555156708, "learning_rate": 3.834215543570191e-07, "loss": 0.4549, "step": 3159 }, { "epoch": 2.6614261650758, "grad_norm": 0.22125476598739624, "learning_rate": 3.81540966416275e-07, "loss": 0.4527, "step": 3160 }, { "epoch": 2.662268388545761, "grad_norm": 0.1979571431875229, "learning_rate": 3.796648187701957e-07, "loss": 0.4286, "step": 3161 }, { "epoch": 2.6631106120157213, "grad_norm": 0.22439590096473694, "learning_rate": 3.777931132225526e-07, "loss": 0.4607, "step": 3162 }, { "epoch": 2.663952835485682, "grad_norm": 0.21610695123672485, "learning_rate": 3.75925851572847e-07, "loss": 0.4452, "step": 3163 }, { "epoch": 2.664795058955643, "grad_norm": 0.2129330039024353, "learning_rate": 3.7406303561630996e-07, "loss": 0.4482, "step": 3164 }, { "epoch": 2.6656372824256036, "grad_norm": 0.21321649849414825, "learning_rate": 3.72204667143895e-07, "loss": 0.4248, "step": 3165 }, { "epoch": 2.6664795058955644, "grad_norm": 0.2157229632139206, "learning_rate": 3.703507479422813e-07, "loss": 0.4654, "step": 3166 }, { "epoch": 2.667321729365525, "grad_norm": 0.2200181782245636, "learning_rate": 3.6850127979386917e-07, "loss": 0.454, "step": 3167 }, { "epoch": 2.668163952835486, "grad_norm": 0.22381281852722168, "learning_rate": 3.666562644767824e-07, "loss": 0.4786, "step": 3168 }, { "epoch": 2.6690061763054462, "grad_norm": 0.22186662256717682, "learning_rate": 3.648157037648598e-07, "loss": 0.4887, "step": 3169 }, { "epoch": 2.669848399775407, "grad_norm": 0.22181908786296844, "learning_rate": 3.6297959942766303e-07, "loss": 0.4566, "step": 3170 }, { "epoch": 2.6706906232453678, "grad_norm": 0.21996498107910156, "learning_rate": 3.611479532304618e-07, "loss": 0.415, "step": 3171 }, { "epoch": 2.6715328467153285, "grad_norm": 0.22350367903709412, "learning_rate": 3.593207669342463e-07, "loss": 0.4326, "step": 3172 }, { "epoch": 2.6723750701852893, "grad_norm": 0.2166353315114975, "learning_rate": 3.574980422957147e-07, "loss": 0.4905, "step": 3173 }, { "epoch": 2.6732172936552496, "grad_norm": 0.2370651364326477, "learning_rate": 3.556797810672785e-07, "loss": 0.4626, "step": 3174 }, { "epoch": 2.6740595171252104, "grad_norm": 0.21759885549545288, "learning_rate": 3.538659849970555e-07, "loss": 0.4423, "step": 3175 }, { "epoch": 2.674901740595171, "grad_norm": 0.21770113706588745, "learning_rate": 3.5205665582887296e-07, "loss": 0.496, "step": 3176 }, { "epoch": 2.675743964065132, "grad_norm": 0.2164364606142044, "learning_rate": 3.5025179530225995e-07, "loss": 0.4219, "step": 3177 }, { "epoch": 2.6765861875350927, "grad_norm": 0.2290583997964859, "learning_rate": 3.484514051524546e-07, "loss": 0.4272, "step": 3178 }, { "epoch": 2.6774284110050535, "grad_norm": 0.22789859771728516, "learning_rate": 3.466554871103922e-07, "loss": 0.4329, "step": 3179 }, { "epoch": 2.6782706344750142, "grad_norm": 0.21482837200164795, "learning_rate": 3.4486404290271115e-07, "loss": 0.4752, "step": 3180 }, { "epoch": 2.6791128579449746, "grad_norm": 0.20594723522663116, "learning_rate": 3.43077074251747e-07, "loss": 0.442, "step": 3181 }, { "epoch": 2.6799550814149353, "grad_norm": 0.23200154304504395, "learning_rate": 3.4129458287553487e-07, "loss": 0.4618, "step": 3182 }, { "epoch": 2.680797304884896, "grad_norm": 0.23049423098564148, "learning_rate": 3.395165704878023e-07, "loss": 0.4715, "step": 3183 }, { "epoch": 2.681639528354857, "grad_norm": 0.21865615248680115, "learning_rate": 3.3774303879797297e-07, "loss": 0.4312, "step": 3184 }, { "epoch": 2.6824817518248176, "grad_norm": 0.20830515027046204, "learning_rate": 3.359739895111602e-07, "loss": 0.4343, "step": 3185 }, { "epoch": 2.683323975294778, "grad_norm": 0.22058694064617157, "learning_rate": 3.3420942432817127e-07, "loss": 0.4489, "step": 3186 }, { "epoch": 2.684166198764739, "grad_norm": 0.23171240091323853, "learning_rate": 3.324493449454991e-07, "loss": 0.4785, "step": 3187 }, { "epoch": 2.6850084222346995, "grad_norm": 0.21331998705863953, "learning_rate": 3.3069375305532725e-07, "loss": 0.446, "step": 3188 }, { "epoch": 2.6858506457046603, "grad_norm": 0.21295368671417236, "learning_rate": 3.289426503455201e-07, "loss": 0.4601, "step": 3189 }, { "epoch": 2.686692869174621, "grad_norm": 0.21003054082393646, "learning_rate": 3.271960384996309e-07, "loss": 0.4216, "step": 3190 }, { "epoch": 2.687535092644582, "grad_norm": 0.2239973098039627, "learning_rate": 3.2545391919689193e-07, "loss": 0.4546, "step": 3191 }, { "epoch": 2.6883773161145426, "grad_norm": 0.23115502297878265, "learning_rate": 3.237162941122185e-07, "loss": 0.4717, "step": 3192 }, { "epoch": 2.689219539584503, "grad_norm": 0.2013714462518692, "learning_rate": 3.2198316491620305e-07, "loss": 0.4082, "step": 3193 }, { "epoch": 2.6900617630544637, "grad_norm": 0.22289757430553436, "learning_rate": 3.202545332751178e-07, "loss": 0.4668, "step": 3194 }, { "epoch": 2.6909039865244244, "grad_norm": 0.20874209702014923, "learning_rate": 3.185304008509077e-07, "loss": 0.4255, "step": 3195 }, { "epoch": 2.691746209994385, "grad_norm": 0.1961173713207245, "learning_rate": 3.1681076930119626e-07, "loss": 0.4515, "step": 3196 }, { "epoch": 2.692588433464346, "grad_norm": 0.22621840238571167, "learning_rate": 3.150956402792765e-07, "loss": 0.472, "step": 3197 }, { "epoch": 2.6934306569343067, "grad_norm": 0.2015330046415329, "learning_rate": 3.133850154341139e-07, "loss": 0.4262, "step": 3198 }, { "epoch": 2.6942728804042675, "grad_norm": 0.20254331827163696, "learning_rate": 3.116788964103429e-07, "loss": 0.4444, "step": 3199 }, { "epoch": 2.695115103874228, "grad_norm": 0.21927085518836975, "learning_rate": 3.099772848482657e-07, "loss": 0.4398, "step": 3200 }, { "epoch": 2.6959573273441886, "grad_norm": 0.2045278549194336, "learning_rate": 3.082801823838527e-07, "loss": 0.4295, "step": 3201 }, { "epoch": 2.6967995508141493, "grad_norm": 0.2186572551727295, "learning_rate": 3.0658759064873755e-07, "loss": 0.4982, "step": 3202 }, { "epoch": 2.69764177428411, "grad_norm": 0.21246546506881714, "learning_rate": 3.0489951127021744e-07, "loss": 0.4408, "step": 3203 }, { "epoch": 2.698483997754071, "grad_norm": 0.1996469348669052, "learning_rate": 3.0321594587125083e-07, "loss": 0.3948, "step": 3204 }, { "epoch": 2.699326221224031, "grad_norm": 0.19090184569358826, "learning_rate": 3.015368960704584e-07, "loss": 0.4543, "step": 3205 }, { "epoch": 2.700168444693992, "grad_norm": 0.20154762268066406, "learning_rate": 2.9986236348211684e-07, "loss": 0.4751, "step": 3206 }, { "epoch": 2.7010106681639527, "grad_norm": 0.18929973244667053, "learning_rate": 2.9819234971616154e-07, "loss": 0.4258, "step": 3207 }, { "epoch": 2.7018528916339135, "grad_norm": 0.2118244618177414, "learning_rate": 2.9652685637818147e-07, "loss": 0.4372, "step": 3208 }, { "epoch": 2.7026951151038743, "grad_norm": 0.22121693193912506, "learning_rate": 2.9486588506942303e-07, "loss": 0.5015, "step": 3209 }, { "epoch": 2.703537338573835, "grad_norm": 0.21671797335147858, "learning_rate": 2.932094373867811e-07, "loss": 0.4602, "step": 3210 }, { "epoch": 2.704379562043796, "grad_norm": 0.2241819053888321, "learning_rate": 2.915575149228056e-07, "loss": 0.4463, "step": 3211 }, { "epoch": 2.705221785513756, "grad_norm": 0.23354129493236542, "learning_rate": 2.8991011926569003e-07, "loss": 0.4354, "step": 3212 }, { "epoch": 2.706064008983717, "grad_norm": 0.2079884111881256, "learning_rate": 2.882672519992824e-07, "loss": 0.4222, "step": 3213 }, { "epoch": 2.7069062324536777, "grad_norm": 0.21936790645122528, "learning_rate": 2.8662891470307154e-07, "loss": 0.465, "step": 3214 }, { "epoch": 2.7077484559236384, "grad_norm": 0.2030920535326004, "learning_rate": 2.8499510895219464e-07, "loss": 0.4164, "step": 3215 }, { "epoch": 2.708590679393599, "grad_norm": 0.18852412700653076, "learning_rate": 2.833658363174302e-07, "loss": 0.4322, "step": 3216 }, { "epoch": 2.7094329028635595, "grad_norm": 0.21277570724487305, "learning_rate": 2.817410983651997e-07, "loss": 0.4558, "step": 3217 }, { "epoch": 2.7102751263335207, "grad_norm": 0.2165585458278656, "learning_rate": 2.80120896657563e-07, "loss": 0.4701, "step": 3218 }, { "epoch": 2.711117349803481, "grad_norm": 0.2051648050546646, "learning_rate": 2.785052327522214e-07, "loss": 0.439, "step": 3219 }, { "epoch": 2.711959573273442, "grad_norm": 0.21937356889247894, "learning_rate": 2.768941082025112e-07, "loss": 0.4629, "step": 3220 }, { "epoch": 2.7128017967434026, "grad_norm": 0.21671779453754425, "learning_rate": 2.7528752455740606e-07, "loss": 0.4566, "step": 3221 }, { "epoch": 2.7136440202133634, "grad_norm": 0.22105757892131805, "learning_rate": 2.73685483361511e-07, "loss": 0.466, "step": 3222 }, { "epoch": 2.714486243683324, "grad_norm": 0.20988062024116516, "learning_rate": 2.720879861550685e-07, "loss": 0.4234, "step": 3223 }, { "epoch": 2.7153284671532845, "grad_norm": 0.21928894519805908, "learning_rate": 2.7049503447394874e-07, "loss": 0.4499, "step": 3224 }, { "epoch": 2.7161706906232452, "grad_norm": 0.2182295024394989, "learning_rate": 2.6890662984965234e-07, "loss": 0.4754, "step": 3225 }, { "epoch": 2.717012914093206, "grad_norm": 0.21740038692951202, "learning_rate": 2.6732277380930873e-07, "loss": 0.4517, "step": 3226 }, { "epoch": 2.7178551375631668, "grad_norm": 0.20217464864253998, "learning_rate": 2.657434678756754e-07, "loss": 0.4141, "step": 3227 }, { "epoch": 2.7186973610331275, "grad_norm": 0.2232842892408371, "learning_rate": 2.6416871356713224e-07, "loss": 0.4711, "step": 3228 }, { "epoch": 2.7195395845030883, "grad_norm": 0.23006275296211243, "learning_rate": 2.625985123976876e-07, "loss": 0.4671, "step": 3229 }, { "epoch": 2.720381807973049, "grad_norm": 0.22877484560012817, "learning_rate": 2.6103286587696674e-07, "loss": 0.4898, "step": 3230 }, { "epoch": 2.7212240314430094, "grad_norm": 0.20481570065021515, "learning_rate": 2.594717755102205e-07, "loss": 0.4567, "step": 3231 }, { "epoch": 2.72206625491297, "grad_norm": 0.2161417007446289, "learning_rate": 2.5791524279831613e-07, "loss": 0.4589, "step": 3232 }, { "epoch": 2.722908478382931, "grad_norm": 0.2081775665283203, "learning_rate": 2.5636326923774325e-07, "loss": 0.4321, "step": 3233 }, { "epoch": 2.7237507018528917, "grad_norm": 0.22723042964935303, "learning_rate": 2.548158563206038e-07, "loss": 0.4074, "step": 3234 }, { "epoch": 2.7245929253228525, "grad_norm": 0.21104645729064941, "learning_rate": 2.532730055346172e-07, "loss": 0.4306, "step": 3235 }, { "epoch": 2.725435148792813, "grad_norm": 0.210785910487175, "learning_rate": 2.517347183631158e-07, "loss": 0.4466, "step": 3236 }, { "epoch": 2.7262773722627736, "grad_norm": 0.20528872311115265, "learning_rate": 2.5020099628504603e-07, "loss": 0.4638, "step": 3237 }, { "epoch": 2.7271195957327343, "grad_norm": 0.2019481062889099, "learning_rate": 2.4867184077496333e-07, "loss": 0.4092, "step": 3238 }, { "epoch": 2.727961819202695, "grad_norm": 0.20898960530757904, "learning_rate": 2.471472533030339e-07, "loss": 0.471, "step": 3239 }, { "epoch": 2.728804042672656, "grad_norm": 0.207711860537529, "learning_rate": 2.4562723533503084e-07, "loss": 0.4044, "step": 3240 }, { "epoch": 2.7296462661426166, "grad_norm": 0.21202552318572998, "learning_rate": 2.441117883323374e-07, "loss": 0.4758, "step": 3241 }, { "epoch": 2.7304884896125774, "grad_norm": 0.2049902379512787, "learning_rate": 2.426009137519375e-07, "loss": 0.4614, "step": 3242 }, { "epoch": 2.7313307130825377, "grad_norm": 0.20932377874851227, "learning_rate": 2.4109461304642254e-07, "loss": 0.4488, "step": 3243 }, { "epoch": 2.7321729365524985, "grad_norm": 0.20634062588214874, "learning_rate": 2.395928876639847e-07, "loss": 0.4237, "step": 3244 }, { "epoch": 2.7330151600224593, "grad_norm": 0.20403897762298584, "learning_rate": 2.3809573904841844e-07, "loss": 0.416, "step": 3245 }, { "epoch": 2.73385738349242, "grad_norm": 0.2069391906261444, "learning_rate": 2.3660316863911682e-07, "loss": 0.4381, "step": 3246 }, { "epoch": 2.734699606962381, "grad_norm": 0.21885669231414795, "learning_rate": 2.3511517787107363e-07, "loss": 0.4788, "step": 3247 }, { "epoch": 2.735541830432341, "grad_norm": 0.20975841581821442, "learning_rate": 2.336317681748751e-07, "loss": 0.4239, "step": 3248 }, { "epoch": 2.7363840539023023, "grad_norm": 0.21098175644874573, "learning_rate": 2.3215294097670927e-07, "loss": 0.4091, "step": 3249 }, { "epoch": 2.7372262773722627, "grad_norm": 0.21726255118846893, "learning_rate": 2.3067869769835215e-07, "loss": 0.4567, "step": 3250 }, { "epoch": 2.7380685008422234, "grad_norm": 0.22506387531757355, "learning_rate": 2.292090397571789e-07, "loss": 0.4867, "step": 3251 }, { "epoch": 2.738910724312184, "grad_norm": 0.21078060567378998, "learning_rate": 2.277439685661509e-07, "loss": 0.4103, "step": 3252 }, { "epoch": 2.739752947782145, "grad_norm": 0.20390331745147705, "learning_rate": 2.262834855338225e-07, "loss": 0.4139, "step": 3253 }, { "epoch": 2.7405951712521057, "grad_norm": 0.21276633441448212, "learning_rate": 2.2482759206433613e-07, "loss": 0.4643, "step": 3254 }, { "epoch": 2.741437394722066, "grad_norm": 0.2178574502468109, "learning_rate": 2.2337628955742263e-07, "loss": 0.4636, "step": 3255 }, { "epoch": 2.742279618192027, "grad_norm": 0.21260902285575867, "learning_rate": 2.21929579408397e-07, "loss": 0.4778, "step": 3256 }, { "epoch": 2.7431218416619876, "grad_norm": 0.2052346020936966, "learning_rate": 2.204874630081616e-07, "loss": 0.4361, "step": 3257 }, { "epoch": 2.7439640651319483, "grad_norm": 0.21604369580745697, "learning_rate": 2.1904994174319903e-07, "loss": 0.4801, "step": 3258 }, { "epoch": 2.744806288601909, "grad_norm": 0.21535059809684753, "learning_rate": 2.1761701699557824e-07, "loss": 0.4226, "step": 3259 }, { "epoch": 2.74564851207187, "grad_norm": 0.21637174487113953, "learning_rate": 2.1618869014294498e-07, "loss": 0.4391, "step": 3260 }, { "epoch": 2.7464907355418307, "grad_norm": 0.22134044766426086, "learning_rate": 2.1476496255852685e-07, "loss": 0.449, "step": 3261 }, { "epoch": 2.747332959011791, "grad_norm": 0.21490821242332458, "learning_rate": 2.1334583561112786e-07, "loss": 0.4525, "step": 3262 }, { "epoch": 2.7481751824817517, "grad_norm": 0.22257821261882782, "learning_rate": 2.1193131066513107e-07, "loss": 0.4145, "step": 3263 }, { "epoch": 2.7490174059517125, "grad_norm": 0.2000625729560852, "learning_rate": 2.1052138908049303e-07, "loss": 0.4507, "step": 3264 }, { "epoch": 2.7498596294216733, "grad_norm": 0.20687691867351532, "learning_rate": 2.091160722127472e-07, "loss": 0.4074, "step": 3265 }, { "epoch": 2.750701852891634, "grad_norm": 0.23321890830993652, "learning_rate": 2.0771536141299565e-07, "loss": 0.4613, "step": 3266 }, { "epoch": 2.7515440763615944, "grad_norm": 0.21059465408325195, "learning_rate": 2.0631925802791608e-07, "loss": 0.4381, "step": 3267 }, { "epoch": 2.7523862998315556, "grad_norm": 0.21999216079711914, "learning_rate": 2.0492776339975374e-07, "loss": 0.4866, "step": 3268 }, { "epoch": 2.753228523301516, "grad_norm": 0.2358987033367157, "learning_rate": 2.0354087886632623e-07, "loss": 0.445, "step": 3269 }, { "epoch": 2.7540707467714767, "grad_norm": 0.19829314947128296, "learning_rate": 2.0215860576101532e-07, "loss": 0.4362, "step": 3270 }, { "epoch": 2.7549129702414374, "grad_norm": 0.2108384519815445, "learning_rate": 2.0078094541277016e-07, "loss": 0.4309, "step": 3271 }, { "epoch": 2.755755193711398, "grad_norm": 0.21728195250034332, "learning_rate": 1.9940789914610682e-07, "loss": 0.4628, "step": 3272 }, { "epoch": 2.756597417181359, "grad_norm": 0.1971876472234726, "learning_rate": 1.9803946828110376e-07, "loss": 0.433, "step": 3273 }, { "epoch": 2.7574396406513193, "grad_norm": 0.2151990532875061, "learning_rate": 1.966756541334025e-07, "loss": 0.4766, "step": 3274 }, { "epoch": 2.75828186412128, "grad_norm": 0.22942999005317688, "learning_rate": 1.953164580142064e-07, "loss": 0.4744, "step": 3275 }, { "epoch": 2.759124087591241, "grad_norm": 0.20308056473731995, "learning_rate": 1.9396188123027736e-07, "loss": 0.4326, "step": 3276 }, { "epoch": 2.7599663110612016, "grad_norm": 0.20088258385658264, "learning_rate": 1.9261192508393755e-07, "loss": 0.4079, "step": 3277 }, { "epoch": 2.7608085345311624, "grad_norm": 0.24038101732730865, "learning_rate": 1.912665908730671e-07, "loss": 0.4906, "step": 3278 }, { "epoch": 2.7616507580011227, "grad_norm": 0.2347426563501358, "learning_rate": 1.8992587989110133e-07, "loss": 0.4717, "step": 3279 }, { "epoch": 2.762492981471084, "grad_norm": 0.22140753269195557, "learning_rate": 1.8858979342703088e-07, "loss": 0.4483, "step": 3280 }, { "epoch": 2.7633352049410442, "grad_norm": 0.20641572773456573, "learning_rate": 1.8725833276540095e-07, "loss": 0.4381, "step": 3281 }, { "epoch": 2.764177428411005, "grad_norm": 0.21701404452323914, "learning_rate": 1.8593149918630927e-07, "loss": 0.5052, "step": 3282 }, { "epoch": 2.7650196518809658, "grad_norm": 0.21970410645008087, "learning_rate": 1.8460929396540428e-07, "loss": 0.4325, "step": 3283 }, { "epoch": 2.7658618753509265, "grad_norm": 0.21557782590389252, "learning_rate": 1.8329171837388527e-07, "loss": 0.4479, "step": 3284 }, { "epoch": 2.7667040988208873, "grad_norm": 0.22146831452846527, "learning_rate": 1.8197877367849948e-07, "loss": 0.4586, "step": 3285 }, { "epoch": 2.7675463222908476, "grad_norm": 0.20745447278022766, "learning_rate": 1.8067046114154386e-07, "loss": 0.4082, "step": 3286 }, { "epoch": 2.7683885457608084, "grad_norm": 0.22729450464248657, "learning_rate": 1.7936678202085945e-07, "loss": 0.48, "step": 3287 }, { "epoch": 2.769230769230769, "grad_norm": 0.21474407613277435, "learning_rate": 1.7806773756983641e-07, "loss": 0.4082, "step": 3288 }, { "epoch": 2.77007299270073, "grad_norm": 0.22894220054149628, "learning_rate": 1.7677332903740296e-07, "loss": 0.4688, "step": 3289 }, { "epoch": 2.7709152161706907, "grad_norm": 0.20574726164340973, "learning_rate": 1.7548355766803638e-07, "loss": 0.4528, "step": 3290 }, { "epoch": 2.7717574396406515, "grad_norm": 0.21582724153995514, "learning_rate": 1.7419842470175196e-07, "loss": 0.4757, "step": 3291 }, { "epoch": 2.7725996631106122, "grad_norm": 0.21540994942188263, "learning_rate": 1.7291793137410695e-07, "loss": 0.4759, "step": 3292 }, { "epoch": 2.7734418865805726, "grad_norm": 0.19602712988853455, "learning_rate": 1.7164207891619823e-07, "loss": 0.4428, "step": 3293 }, { "epoch": 2.7742841100505333, "grad_norm": 0.20655936002731323, "learning_rate": 1.7037086855465902e-07, "loss": 0.4485, "step": 3294 }, { "epoch": 2.775126333520494, "grad_norm": 0.22025364637374878, "learning_rate": 1.6910430151166058e-07, "loss": 0.4503, "step": 3295 }, { "epoch": 2.775968556990455, "grad_norm": 0.20780010521411896, "learning_rate": 1.6784237900491163e-07, "loss": 0.4643, "step": 3296 }, { "epoch": 2.7768107804604156, "grad_norm": 0.20008914172649384, "learning_rate": 1.6658510224765333e-07, "loss": 0.4621, "step": 3297 }, { "epoch": 2.777653003930376, "grad_norm": 0.19976972043514252, "learning_rate": 1.6533247244866102e-07, "loss": 0.4033, "step": 3298 }, { "epoch": 2.778495227400337, "grad_norm": 0.21648092567920685, "learning_rate": 1.6408449081224131e-07, "loss": 0.4739, "step": 3299 }, { "epoch": 2.7793374508702975, "grad_norm": 0.2208261936903, "learning_rate": 1.6284115853823445e-07, "loss": 0.4301, "step": 3300 }, { "epoch": 2.7801796743402583, "grad_norm": 0.20695707201957703, "learning_rate": 1.6160247682200813e-07, "loss": 0.4389, "step": 3301 }, { "epoch": 2.781021897810219, "grad_norm": 0.22153273224830627, "learning_rate": 1.6036844685446084e-07, "loss": 0.463, "step": 3302 }, { "epoch": 2.78186412128018, "grad_norm": 0.19083759188652039, "learning_rate": 1.5913906982201744e-07, "loss": 0.397, "step": 3303 }, { "epoch": 2.7827063447501406, "grad_norm": 0.2165522426366806, "learning_rate": 1.5791434690662966e-07, "loss": 0.491, "step": 3304 }, { "epoch": 2.783548568220101, "grad_norm": 0.22006559371948242, "learning_rate": 1.566942792857745e-07, "loss": 0.4559, "step": 3305 }, { "epoch": 2.7843907916900617, "grad_norm": 0.3007187843322754, "learning_rate": 1.554788681324554e-07, "loss": 0.4792, "step": 3306 }, { "epoch": 2.7852330151600224, "grad_norm": 0.21197086572647095, "learning_rate": 1.5426811461519419e-07, "loss": 0.4274, "step": 3307 }, { "epoch": 2.786075238629983, "grad_norm": 0.20490634441375732, "learning_rate": 1.530620198980398e-07, "loss": 0.4265, "step": 3308 }, { "epoch": 2.786917462099944, "grad_norm": 0.2233913391828537, "learning_rate": 1.5186058514055912e-07, "loss": 0.4808, "step": 3309 }, { "epoch": 2.7877596855699043, "grad_norm": 0.20144042372703552, "learning_rate": 1.506638114978398e-07, "loss": 0.4046, "step": 3310 }, { "epoch": 2.7886019090398655, "grad_norm": 0.21582743525505066, "learning_rate": 1.4947170012048872e-07, "loss": 0.4585, "step": 3311 }, { "epoch": 2.789444132509826, "grad_norm": 0.20355187356472015, "learning_rate": 1.482842521546285e-07, "loss": 0.4138, "step": 3312 }, { "epoch": 2.7902863559797866, "grad_norm": 0.22934609651565552, "learning_rate": 1.471014687418998e-07, "loss": 0.4969, "step": 3313 }, { "epoch": 2.7911285794497473, "grad_norm": 0.23003067076206207, "learning_rate": 1.4592335101945855e-07, "loss": 0.4735, "step": 3314 }, { "epoch": 2.791970802919708, "grad_norm": 0.20134614408016205, "learning_rate": 1.447499001199748e-07, "loss": 0.4051, "step": 3315 }, { "epoch": 2.792813026389669, "grad_norm": 0.23109528422355652, "learning_rate": 1.435811171716317e-07, "loss": 0.4995, "step": 3316 }, { "epoch": 2.793655249859629, "grad_norm": 0.22043763101100922, "learning_rate": 1.4241700329812368e-07, "loss": 0.4631, "step": 3317 }, { "epoch": 2.79449747332959, "grad_norm": 0.20721164345741272, "learning_rate": 1.4125755961865827e-07, "loss": 0.4271, "step": 3318 }, { "epoch": 2.7953396967995507, "grad_norm": 0.2181190848350525, "learning_rate": 1.4010278724795157e-07, "loss": 0.4109, "step": 3319 }, { "epoch": 2.7961819202695115, "grad_norm": 0.22932249307632446, "learning_rate": 1.3895268729622824e-07, "loss": 0.5028, "step": 3320 }, { "epoch": 2.7970241437394723, "grad_norm": 0.19980956614017487, "learning_rate": 1.3780726086922103e-07, "loss": 0.4258, "step": 3321 }, { "epoch": 2.797866367209433, "grad_norm": 0.20675943791866302, "learning_rate": 1.366665090681707e-07, "loss": 0.436, "step": 3322 }, { "epoch": 2.798708590679394, "grad_norm": 0.22191688418388367, "learning_rate": 1.355304329898216e-07, "loss": 0.4765, "step": 3323 }, { "epoch": 2.799550814149354, "grad_norm": 0.22678667306900024, "learning_rate": 1.3439903372642615e-07, "loss": 0.4055, "step": 3324 }, { "epoch": 2.800393037619315, "grad_norm": 0.20481416583061218, "learning_rate": 1.332723123657348e-07, "loss": 0.4677, "step": 3325 }, { "epoch": 2.8012352610892757, "grad_norm": 0.20951667428016663, "learning_rate": 1.3215026999100655e-07, "loss": 0.4868, "step": 3326 }, { "epoch": 2.8020774845592364, "grad_norm": 0.1918957680463791, "learning_rate": 1.3103290768099796e-07, "loss": 0.4142, "step": 3327 }, { "epoch": 2.802919708029197, "grad_norm": 0.23809050023555756, "learning_rate": 1.299202265099675e-07, "loss": 0.4502, "step": 3328 }, { "epoch": 2.8037619314991575, "grad_norm": 0.22177831828594208, "learning_rate": 1.288122275476733e-07, "loss": 0.4568, "step": 3329 }, { "epoch": 2.8046041549691187, "grad_norm": 0.22692367434501648, "learning_rate": 1.2770891185937106e-07, "loss": 0.4647, "step": 3330 }, { "epoch": 2.805446378439079, "grad_norm": 0.2068193405866623, "learning_rate": 1.2661028050581446e-07, "loss": 0.4771, "step": 3331 }, { "epoch": 2.80628860190904, "grad_norm": 0.2122860848903656, "learning_rate": 1.2551633454325362e-07, "loss": 0.4475, "step": 3332 }, { "epoch": 2.8071308253790006, "grad_norm": 0.20646287500858307, "learning_rate": 1.244270750234333e-07, "loss": 0.4616, "step": 3333 }, { "epoch": 2.8079730488489614, "grad_norm": 0.20550426840782166, "learning_rate": 1.2334250299359362e-07, "loss": 0.4043, "step": 3334 }, { "epoch": 2.808815272318922, "grad_norm": 0.22794756293296814, "learning_rate": 1.2226261949646656e-07, "loss": 0.482, "step": 3335 }, { "epoch": 2.8096574957888825, "grad_norm": 0.20333942770957947, "learning_rate": 1.2118742557027885e-07, "loss": 0.4331, "step": 3336 }, { "epoch": 2.8104997192588432, "grad_norm": 0.20841817557811737, "learning_rate": 1.201169222487464e-07, "loss": 0.4437, "step": 3337 }, { "epoch": 2.811341942728804, "grad_norm": 0.2069125473499298, "learning_rate": 1.1905111056107644e-07, "loss": 0.4675, "step": 3338 }, { "epoch": 2.8121841661987648, "grad_norm": 0.21505212783813477, "learning_rate": 1.1798999153196433e-07, "loss": 0.4589, "step": 3339 }, { "epoch": 2.8130263896687255, "grad_norm": 0.22416800260543823, "learning_rate": 1.1693356618159568e-07, "loss": 0.4216, "step": 3340 }, { "epoch": 2.813868613138686, "grad_norm": 0.21791650354862213, "learning_rate": 1.1588183552564247e-07, "loss": 0.4617, "step": 3341 }, { "epoch": 2.814710836608647, "grad_norm": 0.20381474494934082, "learning_rate": 1.1483480057526364e-07, "loss": 0.4652, "step": 3342 }, { "epoch": 2.8155530600786074, "grad_norm": 0.20153506100177765, "learning_rate": 1.1379246233710172e-07, "loss": 0.4575, "step": 3343 }, { "epoch": 2.816395283548568, "grad_norm": 0.23261725902557373, "learning_rate": 1.1275482181328568e-07, "loss": 0.4953, "step": 3344 }, { "epoch": 2.817237507018529, "grad_norm": 0.22672612965106964, "learning_rate": 1.1172188000142803e-07, "loss": 0.4468, "step": 3345 }, { "epoch": 2.8180797304884897, "grad_norm": 0.2177150994539261, "learning_rate": 1.1069363789462273e-07, "loss": 0.45, "step": 3346 }, { "epoch": 2.8189219539584505, "grad_norm": 0.18826892971992493, "learning_rate": 1.0967009648144621e-07, "loss": 0.3825, "step": 3347 }, { "epoch": 2.819764177428411, "grad_norm": 0.22237713634967804, "learning_rate": 1.0865125674595467e-07, "loss": 0.4619, "step": 3348 }, { "epoch": 2.8206064008983716, "grad_norm": 0.22334639728069305, "learning_rate": 1.0763711966768453e-07, "loss": 0.4783, "step": 3349 }, { "epoch": 2.8214486243683323, "grad_norm": 0.2130933254957199, "learning_rate": 1.0662768622165087e-07, "loss": 0.4261, "step": 3350 }, { "epoch": 2.822290847838293, "grad_norm": 0.20765499770641327, "learning_rate": 1.0562295737834738e-07, "loss": 0.4783, "step": 3351 }, { "epoch": 2.823133071308254, "grad_norm": 0.2238711565732956, "learning_rate": 1.0462293410374303e-07, "loss": 0.4628, "step": 3352 }, { "epoch": 2.8239752947782146, "grad_norm": 0.21880851686000824, "learning_rate": 1.0362761735928372e-07, "loss": 0.4623, "step": 3353 }, { "epoch": 2.8248175182481754, "grad_norm": 0.19876381754875183, "learning_rate": 1.026370081018907e-07, "loss": 0.4077, "step": 3354 }, { "epoch": 2.8256597417181357, "grad_norm": 0.21881850063800812, "learning_rate": 1.0165110728395878e-07, "loss": 0.444, "step": 3355 }, { "epoch": 2.8265019651880965, "grad_norm": 0.21453146636486053, "learning_rate": 1.0066991585335583e-07, "loss": 0.4494, "step": 3356 }, { "epoch": 2.8273441886580573, "grad_norm": 0.20412549376487732, "learning_rate": 9.969343475342285e-08, "loss": 0.4552, "step": 3357 }, { "epoch": 2.828186412128018, "grad_norm": 0.2223614752292633, "learning_rate": 9.872166492297052e-08, "loss": 0.4611, "step": 3358 }, { "epoch": 2.829028635597979, "grad_norm": 0.20876282453536987, "learning_rate": 9.775460729628262e-08, "loss": 0.4344, "step": 3359 }, { "epoch": 2.829870859067939, "grad_norm": 0.20215468108654022, "learning_rate": 9.679226280310982e-08, "loss": 0.4315, "step": 3360 }, { "epoch": 2.8307130825379003, "grad_norm": 0.21494834125041962, "learning_rate": 9.583463236867318e-08, "loss": 0.4495, "step": 3361 }, { "epoch": 2.8315553060078607, "grad_norm": 0.2065226286649704, "learning_rate": 9.48817169136601e-08, "loss": 0.4402, "step": 3362 }, { "epoch": 2.8323975294778214, "grad_norm": 0.2367149293422699, "learning_rate": 9.393351735422773e-08, "loss": 0.4833, "step": 3363 }, { "epoch": 2.833239752947782, "grad_norm": 0.1941768378019333, "learning_rate": 9.299003460199519e-08, "loss": 0.4395, "step": 3364 }, { "epoch": 2.834081976417743, "grad_norm": 0.23437419533729553, "learning_rate": 9.205126956405075e-08, "loss": 0.4486, "step": 3365 }, { "epoch": 2.8349241998877037, "grad_norm": 0.21141263842582703, "learning_rate": 9.111722314294358e-08, "loss": 0.4745, "step": 3366 }, { "epoch": 2.835766423357664, "grad_norm": 0.2238515019416809, "learning_rate": 9.018789623668866e-08, "loss": 0.4972, "step": 3367 }, { "epoch": 2.836608646827625, "grad_norm": 0.20354215800762177, "learning_rate": 8.926328973876242e-08, "loss": 0.4247, "step": 3368 }, { "epoch": 2.8374508702975856, "grad_norm": 0.22804734110832214, "learning_rate": 8.834340453810375e-08, "loss": 0.4933, "step": 3369 }, { "epoch": 2.8382930937675463, "grad_norm": 0.2254832237958908, "learning_rate": 8.742824151911022e-08, "loss": 0.4324, "step": 3370 }, { "epoch": 2.839135317237507, "grad_norm": 0.22040994465351105, "learning_rate": 8.651780156164302e-08, "loss": 0.4471, "step": 3371 }, { "epoch": 2.8399775407074674, "grad_norm": 0.21051527559757233, "learning_rate": 8.561208554101863e-08, "loss": 0.4764, "step": 3372 }, { "epoch": 2.8408197641774287, "grad_norm": 0.2103731781244278, "learning_rate": 8.471109432801494e-08, "loss": 0.4479, "step": 3373 }, { "epoch": 2.841661987647389, "grad_norm": 0.18601827323436737, "learning_rate": 8.381482878886571e-08, "loss": 0.4068, "step": 3374 }, { "epoch": 2.8425042111173497, "grad_norm": 0.2205764800310135, "learning_rate": 8.29232897852611e-08, "loss": 0.5322, "step": 3375 }, { "epoch": 2.8433464345873105, "grad_norm": 0.22605444490909576, "learning_rate": 8.203647817434823e-08, "loss": 0.4085, "step": 3376 }, { "epoch": 2.8441886580572713, "grad_norm": 0.2025367170572281, "learning_rate": 8.11543948087279e-08, "loss": 0.4013, "step": 3377 }, { "epoch": 2.845030881527232, "grad_norm": 0.22066029906272888, "learning_rate": 8.027704053645613e-08, "loss": 0.4875, "step": 3378 }, { "epoch": 2.8458731049971924, "grad_norm": 0.21926726400852203, "learning_rate": 7.94044162010421e-08, "loss": 0.4274, "step": 3379 }, { "epoch": 2.846715328467153, "grad_norm": 0.21057423949241638, "learning_rate": 7.85365226414464e-08, "loss": 0.4657, "step": 3380 }, { "epoch": 2.847557551937114, "grad_norm": 0.2143968939781189, "learning_rate": 7.76733606920832e-08, "loss": 0.5071, "step": 3381 }, { "epoch": 2.8483997754070747, "grad_norm": 0.2317008674144745, "learning_rate": 7.681493118281646e-08, "loss": 0.4367, "step": 3382 }, { "epoch": 2.8492419988770354, "grad_norm": 0.19140447676181793, "learning_rate": 7.59612349389599e-08, "loss": 0.4017, "step": 3383 }, { "epoch": 2.850084222346996, "grad_norm": 0.22115440666675568, "learning_rate": 7.511227278127697e-08, "loss": 0.4673, "step": 3384 }, { "epoch": 2.850926445816957, "grad_norm": 0.21155628561973572, "learning_rate": 7.426804552598088e-08, "loss": 0.4979, "step": 3385 }, { "epoch": 2.8517686692869173, "grad_norm": 0.2264985591173172, "learning_rate": 7.342855398472958e-08, "loss": 0.4703, "step": 3386 }, { "epoch": 2.852610892756878, "grad_norm": 0.19891738891601562, "learning_rate": 7.259379896463248e-08, "loss": 0.4341, "step": 3387 }, { "epoch": 2.853453116226839, "grad_norm": 0.21259985864162445, "learning_rate": 7.176378126824035e-08, "loss": 0.4663, "step": 3388 }, { "epoch": 2.8542953396967996, "grad_norm": 0.22603082656860352, "learning_rate": 7.093850169355266e-08, "loss": 0.4244, "step": 3389 }, { "epoch": 2.8551375631667604, "grad_norm": 0.2117343544960022, "learning_rate": 7.011796103401192e-08, "loss": 0.4659, "step": 3390 }, { "epoch": 2.8559797866367207, "grad_norm": 0.21496547758579254, "learning_rate": 6.930216007850598e-08, "loss": 0.4726, "step": 3391 }, { "epoch": 2.856822010106682, "grad_norm": 0.22398146986961365, "learning_rate": 6.849109961136468e-08, "loss": 0.4476, "step": 3392 }, { "epoch": 2.8576642335766422, "grad_norm": 0.20802558958530426, "learning_rate": 6.768478041236037e-08, "loss": 0.4315, "step": 3393 }, { "epoch": 2.858506457046603, "grad_norm": 0.1978541910648346, "learning_rate": 6.688320325670628e-08, "loss": 0.4414, "step": 3394 }, { "epoch": 2.8593486805165638, "grad_norm": 0.20445135235786438, "learning_rate": 6.608636891505982e-08, "loss": 0.4457, "step": 3395 }, { "epoch": 2.8601909039865245, "grad_norm": 0.24126103520393372, "learning_rate": 6.529427815351374e-08, "loss": 0.4409, "step": 3396 }, { "epoch": 2.8610331274564853, "grad_norm": 0.22536629438400269, "learning_rate": 6.450693173360445e-08, "loss": 0.4525, "step": 3397 }, { "epoch": 2.8618753509264456, "grad_norm": 0.2152460217475891, "learning_rate": 6.372433041230364e-08, "loss": 0.4677, "step": 3398 }, { "epoch": 2.8627175743964064, "grad_norm": 0.19877557456493378, "learning_rate": 6.294647494202444e-08, "loss": 0.4383, "step": 3399 }, { "epoch": 2.863559797866367, "grad_norm": 0.20739176869392395, "learning_rate": 6.217336607061364e-08, "loss": 0.4327, "step": 3400 }, { "epoch": 2.864402021336328, "grad_norm": 0.2135617434978485, "learning_rate": 6.140500454135668e-08, "loss": 0.4409, "step": 3401 }, { "epoch": 2.8652442448062887, "grad_norm": 0.23944738507270813, "learning_rate": 6.064139109297485e-08, "loss": 0.5447, "step": 3402 }, { "epoch": 2.866086468276249, "grad_norm": 0.2034779042005539, "learning_rate": 5.988252645962367e-08, "loss": 0.4234, "step": 3403 }, { "epoch": 2.8669286917462102, "grad_norm": 0.20496268570423126, "learning_rate": 5.912841137089287e-08, "loss": 0.4518, "step": 3404 }, { "epoch": 2.8677709152161706, "grad_norm": 0.20473258197307587, "learning_rate": 5.8379046551807486e-08, "loss": 0.4221, "step": 3405 }, { "epoch": 2.8686131386861313, "grad_norm": 0.20575886964797974, "learning_rate": 5.7634432722822875e-08, "loss": 0.4517, "step": 3406 }, { "epoch": 2.869455362156092, "grad_norm": 0.19624333083629608, "learning_rate": 5.6894570599829726e-08, "loss": 0.4253, "step": 3407 }, { "epoch": 2.870297585626053, "grad_norm": 0.21904800832271576, "learning_rate": 5.615946089414737e-08, "loss": 0.4622, "step": 3408 }, { "epoch": 2.8711398090960136, "grad_norm": 0.21903851628303528, "learning_rate": 5.542910431252935e-08, "loss": 0.4456, "step": 3409 }, { "epoch": 2.871982032565974, "grad_norm": 0.21252381801605225, "learning_rate": 5.470350155715565e-08, "loss": 0.4829, "step": 3410 }, { "epoch": 2.8728242560359347, "grad_norm": 0.21248923242092133, "learning_rate": 5.398265332563935e-08, "loss": 0.4565, "step": 3411 }, { "epoch": 2.8736664795058955, "grad_norm": 0.21892540156841278, "learning_rate": 5.32665603110194e-08, "loss": 0.482, "step": 3412 }, { "epoch": 2.8745087029758563, "grad_norm": 0.21474826335906982, "learning_rate": 5.255522320176565e-08, "loss": 0.4126, "step": 3413 }, { "epoch": 2.875350926445817, "grad_norm": 0.20215652883052826, "learning_rate": 5.1848642681773254e-08, "loss": 0.4208, "step": 3414 }, { "epoch": 2.876193149915778, "grad_norm": 0.21085378527641296, "learning_rate": 5.114681943036603e-08, "loss": 0.4412, "step": 3415 }, { "epoch": 2.8770353733857386, "grad_norm": 0.21306855976581573, "learning_rate": 5.0449754122292585e-08, "loss": 0.4971, "step": 3416 }, { "epoch": 2.877877596855699, "grad_norm": 0.20438706874847412, "learning_rate": 4.975744742772848e-08, "loss": 0.4485, "step": 3417 }, { "epoch": 2.8787198203256597, "grad_norm": 0.2262209951877594, "learning_rate": 4.906990001227296e-08, "loss": 0.4279, "step": 3418 }, { "epoch": 2.8795620437956204, "grad_norm": 0.22449712455272675, "learning_rate": 4.838711253695061e-08, "loss": 0.4982, "step": 3419 }, { "epoch": 2.880404267265581, "grad_norm": 0.21073214709758759, "learning_rate": 4.770908565820964e-08, "loss": 0.4369, "step": 3420 }, { "epoch": 2.881246490735542, "grad_norm": 0.23326878249645233, "learning_rate": 4.7035820027920284e-08, "loss": 0.409, "step": 3421 }, { "epoch": 2.8820887142055023, "grad_norm": 0.2274300754070282, "learning_rate": 4.636731629337587e-08, "loss": 0.4758, "step": 3422 }, { "epoch": 2.8829309376754635, "grad_norm": 0.20621298253536224, "learning_rate": 4.5703575097292286e-08, "loss": 0.4515, "step": 3423 }, { "epoch": 2.883773161145424, "grad_norm": 0.1926681101322174, "learning_rate": 4.5044597077805175e-08, "loss": 0.4316, "step": 3424 }, { "epoch": 2.8846153846153846, "grad_norm": 0.20255707204341888, "learning_rate": 4.439038286847164e-08, "loss": 0.4362, "step": 3425 }, { "epoch": 2.8854576080853453, "grad_norm": 0.20678085088729858, "learning_rate": 4.37409330982691e-08, "loss": 0.4742, "step": 3426 }, { "epoch": 2.886299831555306, "grad_norm": 0.2155006229877472, "learning_rate": 4.309624839159254e-08, "loss": 0.4943, "step": 3427 }, { "epoch": 2.887142055025267, "grad_norm": 0.2099962681531906, "learning_rate": 4.245632936825783e-08, "loss": 0.4782, "step": 3428 }, { "epoch": 2.887984278495227, "grad_norm": 0.20619666576385498, "learning_rate": 4.182117664349783e-08, "loss": 0.4019, "step": 3429 }, { "epoch": 2.888826501965188, "grad_norm": 0.24425427615642548, "learning_rate": 4.119079082796351e-08, "loss": 0.5019, "step": 3430 }, { "epoch": 2.8896687254351487, "grad_norm": 0.20036248862743378, "learning_rate": 4.056517252772229e-08, "loss": 0.4554, "step": 3431 }, { "epoch": 2.8905109489051095, "grad_norm": 0.20433729887008667, "learning_rate": 3.99443223442586e-08, "loss": 0.4401, "step": 3432 }, { "epoch": 2.8913531723750703, "grad_norm": 0.22625938057899475, "learning_rate": 3.9328240874471624e-08, "loss": 0.4783, "step": 3433 }, { "epoch": 2.892195395845031, "grad_norm": 0.19878585636615753, "learning_rate": 3.871692871067756e-08, "loss": 0.4244, "step": 3434 }, { "epoch": 2.893037619314992, "grad_norm": 0.21925108134746552, "learning_rate": 3.8110386440605164e-08, "loss": 0.4703, "step": 3435 }, { "epoch": 2.893879842784952, "grad_norm": 0.21508198976516724, "learning_rate": 3.750861464739908e-08, "loss": 0.441, "step": 3436 }, { "epoch": 2.894722066254913, "grad_norm": 0.22031207382678986, "learning_rate": 3.6911613909616505e-08, "loss": 0.4384, "step": 3437 }, { "epoch": 2.8955642897248737, "grad_norm": 0.22088637948036194, "learning_rate": 3.631938480122777e-08, "loss": 0.5001, "step": 3438 }, { "epoch": 2.8964065131948344, "grad_norm": 0.20095793902873993, "learning_rate": 3.573192789161628e-08, "loss": 0.3995, "step": 3439 }, { "epoch": 2.897248736664795, "grad_norm": 0.21288371086120605, "learning_rate": 3.514924374557638e-08, "loss": 0.4586, "step": 3440 }, { "epoch": 2.8980909601347555, "grad_norm": 0.23002006113529205, "learning_rate": 3.457133292331494e-08, "loss": 0.5029, "step": 3441 }, { "epoch": 2.8989331836047163, "grad_norm": 0.22111749649047852, "learning_rate": 3.3998195980448065e-08, "loss": 0.4383, "step": 3442 }, { "epoch": 2.899775407074677, "grad_norm": 0.21851050853729248, "learning_rate": 3.342983346800388e-08, "loss": 0.442, "step": 3443 }, { "epoch": 2.900617630544638, "grad_norm": 0.18614870309829712, "learning_rate": 3.2866245932418606e-08, "loss": 0.3966, "step": 3444 }, { "epoch": 2.9014598540145986, "grad_norm": 0.21642069518566132, "learning_rate": 3.230743391553881e-08, "loss": 0.4844, "step": 3445 }, { "epoch": 2.9023020774845594, "grad_norm": 0.20319654047489166, "learning_rate": 3.175339795462029e-08, "loss": 0.4278, "step": 3446 }, { "epoch": 2.90314430095452, "grad_norm": 0.2078375518321991, "learning_rate": 3.120413858232474e-08, "loss": 0.4597, "step": 3447 }, { "epoch": 2.9039865244244805, "grad_norm": 0.2184380143880844, "learning_rate": 3.0659656326724186e-08, "loss": 0.4612, "step": 3448 }, { "epoch": 2.9048287478944412, "grad_norm": 0.22082063555717468, "learning_rate": 3.011995171129545e-08, "loss": 0.4682, "step": 3449 }, { "epoch": 2.905670971364402, "grad_norm": 0.20876628160476685, "learning_rate": 2.9585025254924572e-08, "loss": 0.4599, "step": 3450 }, { "epoch": 2.9065131948343628, "grad_norm": 0.20629727840423584, "learning_rate": 2.9054877471901277e-08, "loss": 0.4518, "step": 3451 }, { "epoch": 2.9073554183043235, "grad_norm": 0.2066953033208847, "learning_rate": 2.852950887192285e-08, "loss": 0.4053, "step": 3452 }, { "epoch": 2.908197641774284, "grad_norm": 0.2207322120666504, "learning_rate": 2.8008919960090253e-08, "loss": 0.4845, "step": 3453 }, { "epoch": 2.909039865244245, "grad_norm": 0.1965230107307434, "learning_rate": 2.7493111236909787e-08, "loss": 0.4041, "step": 3454 }, { "epoch": 2.9098820887142054, "grad_norm": 0.19916747510433197, "learning_rate": 2.6982083198293096e-08, "loss": 0.4302, "step": 3455 }, { "epoch": 2.910724312184166, "grad_norm": 0.20160463452339172, "learning_rate": 2.6475836335553838e-08, "loss": 0.4464, "step": 3456 }, { "epoch": 2.911566535654127, "grad_norm": 0.20421305298805237, "learning_rate": 2.5974371135408792e-08, "loss": 0.4608, "step": 3457 }, { "epoch": 2.9124087591240877, "grad_norm": 0.21746154129505157, "learning_rate": 2.5477688079979522e-08, "loss": 0.4532, "step": 3458 }, { "epoch": 2.9132509825940485, "grad_norm": 0.20154322683811188, "learning_rate": 2.4985787646788497e-08, "loss": 0.4471, "step": 3459 }, { "epoch": 2.914093206064009, "grad_norm": 0.20532746613025665, "learning_rate": 2.4498670308760742e-08, "loss": 0.4067, "step": 3460 }, { "epoch": 2.9149354295339696, "grad_norm": 0.21404492855072021, "learning_rate": 2.401633653422053e-08, "loss": 0.4438, "step": 3461 }, { "epoch": 2.9157776530039303, "grad_norm": 0.229471817612648, "learning_rate": 2.3538786786896918e-08, "loss": 0.481, "step": 3462 }, { "epoch": 2.916619876473891, "grad_norm": 0.2054324448108673, "learning_rate": 2.306602152591597e-08, "loss": 0.4476, "step": 3463 }, { "epoch": 2.917462099943852, "grad_norm": 0.20869798958301544, "learning_rate": 2.2598041205806333e-08, "loss": 0.4229, "step": 3464 }, { "epoch": 2.9183043234138126, "grad_norm": 0.2150871902704239, "learning_rate": 2.2134846276494205e-08, "loss": 0.5271, "step": 3465 }, { "epoch": 2.9191465468837734, "grad_norm": 0.2209031581878662, "learning_rate": 2.1676437183306697e-08, "loss": 0.445, "step": 3466 }, { "epoch": 2.9199887703537337, "grad_norm": 0.19971045851707458, "learning_rate": 2.1222814366969048e-08, "loss": 0.4515, "step": 3467 }, { "epoch": 2.9208309938236945, "grad_norm": 0.2114984095096588, "learning_rate": 2.0773978263605164e-08, "loss": 0.4767, "step": 3468 }, { "epoch": 2.9216732172936553, "grad_norm": 0.23916718363761902, "learning_rate": 2.032992930473543e-08, "loss": 0.5074, "step": 3469 }, { "epoch": 2.922515440763616, "grad_norm": 0.2053539752960205, "learning_rate": 1.9890667917280006e-08, "loss": 0.4089, "step": 3470 }, { "epoch": 2.923357664233577, "grad_norm": 0.22927771508693695, "learning_rate": 1.9456194523554404e-08, "loss": 0.4795, "step": 3471 }, { "epoch": 2.924199887703537, "grad_norm": 0.21072590351104736, "learning_rate": 1.9026509541272276e-08, "loss": 0.4631, "step": 3472 }, { "epoch": 2.925042111173498, "grad_norm": 0.20538070797920227, "learning_rate": 1.860161338354205e-08, "loss": 0.4599, "step": 3473 }, { "epoch": 2.9258843346434587, "grad_norm": 0.21530789136886597, "learning_rate": 1.8181506458869735e-08, "loss": 0.4392, "step": 3474 }, { "epoch": 2.9267265581134194, "grad_norm": 0.20487457513809204, "learning_rate": 1.7766189171154468e-08, "loss": 0.4148, "step": 3475 }, { "epoch": 2.92756878158338, "grad_norm": 0.21693313121795654, "learning_rate": 1.7355661919693513e-08, "loss": 0.4512, "step": 3476 }, { "epoch": 2.928411005053341, "grad_norm": 0.1896018087863922, "learning_rate": 1.69499250991767e-08, "loss": 0.457, "step": 3477 }, { "epoch": 2.9292532285233017, "grad_norm": 0.2085801213979721, "learning_rate": 1.654897909968922e-08, "loss": 0.4215, "step": 3478 }, { "epoch": 2.930095451993262, "grad_norm": 0.20534156262874603, "learning_rate": 1.6152824306709392e-08, "loss": 0.461, "step": 3479 }, { "epoch": 2.930937675463223, "grad_norm": 0.2158776968717575, "learning_rate": 1.576146110111032e-08, "loss": 0.4652, "step": 3480 }, { "epoch": 2.9317798989331836, "grad_norm": 0.20524828135967255, "learning_rate": 1.5374889859157137e-08, "loss": 0.4373, "step": 3481 }, { "epoch": 2.9326221224031443, "grad_norm": 0.21023164689540863, "learning_rate": 1.4993110952509215e-08, "loss": 0.4275, "step": 3482 }, { "epoch": 2.933464345873105, "grad_norm": 0.21646900475025177, "learning_rate": 1.4616124748217387e-08, "loss": 0.501, "step": 3483 }, { "epoch": 2.9343065693430654, "grad_norm": 0.23689782619476318, "learning_rate": 1.424393160872506e-08, "loss": 0.4646, "step": 3484 }, { "epoch": 2.9351487928130267, "grad_norm": 0.20327641069889069, "learning_rate": 1.3876531891867106e-08, "loss": 0.4064, "step": 3485 }, { "epoch": 2.935991016282987, "grad_norm": 0.212618887424469, "learning_rate": 1.351392595087042e-08, "loss": 0.4133, "step": 3486 }, { "epoch": 2.9368332397529477, "grad_norm": 0.20986142754554749, "learning_rate": 1.3156114134352805e-08, "loss": 0.4567, "step": 3487 }, { "epoch": 2.9376754632229085, "grad_norm": 0.20672649145126343, "learning_rate": 1.2803096786323521e-08, "loss": 0.4375, "step": 3488 }, { "epoch": 2.9385176866928693, "grad_norm": 0.21226145327091217, "learning_rate": 1.2454874246181081e-08, "loss": 0.4483, "step": 3489 }, { "epoch": 2.93935991016283, "grad_norm": 0.21443919837474823, "learning_rate": 1.2111446848714347e-08, "loss": 0.4369, "step": 3490 }, { "epoch": 2.9402021336327904, "grad_norm": 0.224628284573555, "learning_rate": 1.1772814924103649e-08, "loss": 0.4831, "step": 3491 }, { "epoch": 2.941044357102751, "grad_norm": 0.199407696723938, "learning_rate": 1.1438978797916888e-08, "loss": 0.405, "step": 3492 }, { "epoch": 2.941886580572712, "grad_norm": 0.19767601788043976, "learning_rate": 1.1109938791112328e-08, "loss": 0.4427, "step": 3493 }, { "epoch": 2.9427288040426727, "grad_norm": 0.20052117109298706, "learning_rate": 1.0785695220035809e-08, "loss": 0.4418, "step": 3494 }, { "epoch": 2.9435710275126334, "grad_norm": 0.22425921261310577, "learning_rate": 1.0466248396424072e-08, "loss": 0.4329, "step": 3495 }, { "epoch": 2.944413250982594, "grad_norm": 0.21120692789554596, "learning_rate": 1.0151598627399784e-08, "loss": 0.4353, "step": 3496 }, { "epoch": 2.945255474452555, "grad_norm": 0.217824324965477, "learning_rate": 9.841746215474845e-09, "loss": 0.4163, "step": 3497 }, { "epoch": 2.9460976979225153, "grad_norm": 0.21929295361042023, "learning_rate": 9.536691458548741e-09, "loss": 0.4491, "step": 3498 }, { "epoch": 2.946939921392476, "grad_norm": 0.20783324539661407, "learning_rate": 9.236434649908532e-09, "loss": 0.4636, "step": 3499 }, { "epoch": 2.947782144862437, "grad_norm": 0.21476887166500092, "learning_rate": 8.940976078227193e-09, "loss": 0.4981, "step": 3500 }, { "epoch": 2.9486243683323976, "grad_norm": 0.2267165333032608, "learning_rate": 8.650316027566386e-09, "loss": 0.4166, "step": 3501 }, { "epoch": 2.9494665918023584, "grad_norm": 0.2204739898443222, "learning_rate": 8.364454777373132e-09, "loss": 0.4925, "step": 3502 }, { "epoch": 2.9503088152723187, "grad_norm": 0.21559306979179382, "learning_rate": 8.083392602481477e-09, "loss": 0.4555, "step": 3503 }, { "epoch": 2.9511510387422795, "grad_norm": 0.20753251016139984, "learning_rate": 7.807129773110822e-09, "loss": 0.4634, "step": 3504 }, { "epoch": 2.9519932622122402, "grad_norm": 0.2113957703113556, "learning_rate": 7.535666554866483e-09, "loss": 0.447, "step": 3505 }, { "epoch": 2.952835485682201, "grad_norm": 0.21507149934768677, "learning_rate": 7.269003208740244e-09, "loss": 0.4452, "step": 3506 }, { "epoch": 2.9536777091521618, "grad_norm": 0.19183401763439178, "learning_rate": 7.007139991108136e-09, "loss": 0.4279, "step": 3507 }, { "epoch": 2.9545199326221225, "grad_norm": 0.2206534743309021, "learning_rate": 6.750077153731549e-09, "loss": 0.4637, "step": 3508 }, { "epoch": 2.9553621560920833, "grad_norm": 0.2114880532026291, "learning_rate": 6.497814943756675e-09, "loss": 0.4523, "step": 3509 }, { "epoch": 2.9562043795620436, "grad_norm": 0.21355877816677094, "learning_rate": 6.25035360371451e-09, "loss": 0.481, "step": 3510 }, { "epoch": 2.9570466030320044, "grad_norm": 0.21932551264762878, "learning_rate": 6.00769337151974e-09, "loss": 0.4706, "step": 3511 }, { "epoch": 2.957888826501965, "grad_norm": 0.23873278498649597, "learning_rate": 5.769834480472414e-09, "loss": 0.407, "step": 3512 }, { "epoch": 2.958731049971926, "grad_norm": 0.21279257535934448, "learning_rate": 5.536777159254603e-09, "loss": 0.4514, "step": 3513 }, { "epoch": 2.9595732734418867, "grad_norm": 0.22771210968494415, "learning_rate": 5.308521631934294e-09, "loss": 0.5102, "step": 3514 }, { "epoch": 2.960415496911847, "grad_norm": 0.19968640804290771, "learning_rate": 5.08506811796039e-09, "loss": 0.4266, "step": 3515 }, { "epoch": 2.9612577203818082, "grad_norm": 0.22548070549964905, "learning_rate": 4.866416832167153e-09, "loss": 0.4717, "step": 3516 }, { "epoch": 2.9620999438517686, "grad_norm": 0.2218323051929474, "learning_rate": 4.652567984770873e-09, "loss": 0.466, "step": 3517 }, { "epoch": 2.9629421673217293, "grad_norm": 0.2366809993982315, "learning_rate": 4.443521781370974e-09, "loss": 0.4447, "step": 3518 }, { "epoch": 2.96378439079169, "grad_norm": 0.22608348727226257, "learning_rate": 4.239278422948911e-09, "loss": 0.4826, "step": 3519 }, { "epoch": 2.964626614261651, "grad_norm": 0.20553439855575562, "learning_rate": 4.0398381058692755e-09, "loss": 0.418, "step": 3520 }, { "epoch": 2.9654688377316116, "grad_norm": 0.2012038677930832, "learning_rate": 3.845201021879241e-09, "loss": 0.4342, "step": 3521 }, { "epoch": 2.966311061201572, "grad_norm": 0.2102879136800766, "learning_rate": 3.655367358106343e-09, "loss": 0.4457, "step": 3522 }, { "epoch": 2.9671532846715327, "grad_norm": 0.21517322957515717, "learning_rate": 3.470337297062365e-09, "loss": 0.4785, "step": 3523 }, { "epoch": 2.9679955081414935, "grad_norm": 0.21983104944229126, "learning_rate": 3.290111016638342e-09, "loss": 0.4343, "step": 3524 }, { "epoch": 2.9688377316114543, "grad_norm": 0.19521774351596832, "learning_rate": 3.1146886901090024e-09, "loss": 0.4358, "step": 3525 }, { "epoch": 2.969679955081415, "grad_norm": 0.19644705951213837, "learning_rate": 2.9440704861288804e-09, "loss": 0.4281, "step": 3526 }, { "epoch": 2.970522178551376, "grad_norm": 0.20724806189537048, "learning_rate": 2.7782565687339836e-09, "loss": 0.459, "step": 3527 }, { "epoch": 2.9713644020213366, "grad_norm": 0.21975044906139374, "learning_rate": 2.617247097342901e-09, "loss": 0.4498, "step": 3528 }, { "epoch": 2.972206625491297, "grad_norm": 0.22195209562778473, "learning_rate": 2.461042226752919e-09, "loss": 0.4198, "step": 3529 }, { "epoch": 2.9730488489612577, "grad_norm": 0.20503677427768707, "learning_rate": 2.3096421071433508e-09, "loss": 0.4723, "step": 3530 }, { "epoch": 2.9738910724312184, "grad_norm": 0.24516940116882324, "learning_rate": 2.1630468840738716e-09, "loss": 0.4345, "step": 3531 }, { "epoch": 2.974733295901179, "grad_norm": 0.22163571417331696, "learning_rate": 2.0212566984845194e-09, "loss": 0.4497, "step": 3532 }, { "epoch": 2.97557551937114, "grad_norm": 0.19674459099769592, "learning_rate": 1.8842716866956935e-09, "loss": 0.4229, "step": 3533 }, { "epoch": 2.9764177428411003, "grad_norm": 0.21590568125247955, "learning_rate": 1.7520919804075997e-09, "loss": 0.4801, "step": 3534 }, { "epoch": 2.977259966311061, "grad_norm": 0.19893136620521545, "learning_rate": 1.624717706701917e-09, "loss": 0.3907, "step": 3535 }, { "epoch": 2.978102189781022, "grad_norm": 0.2576192021369934, "learning_rate": 1.5021489880384653e-09, "loss": 0.4782, "step": 3536 }, { "epoch": 2.9789444132509826, "grad_norm": 0.18867576122283936, "learning_rate": 1.3843859422574269e-09, "loss": 0.4322, "step": 3537 }, { "epoch": 2.9797866367209433, "grad_norm": 0.22247786819934845, "learning_rate": 1.2714286825793453e-09, "loss": 0.4822, "step": 3538 }, { "epoch": 2.980628860190904, "grad_norm": 0.21036291122436523, "learning_rate": 1.163277317604572e-09, "loss": 0.4615, "step": 3539 }, { "epoch": 2.981471083660865, "grad_norm": 0.20375515520572662, "learning_rate": 1.0599319513115992e-09, "loss": 0.413, "step": 3540 }, { "epoch": 2.982313307130825, "grad_norm": 0.20990204811096191, "learning_rate": 9.613926830587262e-10, "loss": 0.3921, "step": 3541 }, { "epoch": 2.983155530600786, "grad_norm": 0.24520505964756012, "learning_rate": 8.676596075851696e-10, "loss": 0.5072, "step": 3542 }, { "epoch": 2.9839977540707467, "grad_norm": 0.19878950715065002, "learning_rate": 7.787328150071771e-10, "loss": 0.4341, "step": 3543 }, { "epoch": 2.9848399775407075, "grad_norm": 0.21862483024597168, "learning_rate": 6.946123908208036e-10, "loss": 0.4933, "step": 3544 }, { "epoch": 2.9856822010106683, "grad_norm": 0.19871605932712555, "learning_rate": 6.152984159024655e-10, "loss": 0.4052, "step": 3545 }, { "epoch": 2.9865244244806286, "grad_norm": 0.2167184203863144, "learning_rate": 5.40790966505611e-10, "loss": 0.4664, "step": 3546 }, { "epoch": 2.98736664795059, "grad_norm": 0.2018672674894333, "learning_rate": 4.710901142634949e-10, "loss": 0.4178, "step": 3547 }, { "epoch": 2.98820887142055, "grad_norm": 0.21038153767585754, "learning_rate": 4.061959261886239e-10, "loss": 0.4786, "step": 3548 }, { "epoch": 2.989051094890511, "grad_norm": 0.23533689975738525, "learning_rate": 3.4610846467109106e-10, "loss": 0.4942, "step": 3549 }, { "epoch": 2.9898933183604717, "grad_norm": 0.21003089845180511, "learning_rate": 2.9082778748135146e-10, "loss": 0.4314, "step": 3550 }, { "epoch": 2.9907355418304324, "grad_norm": 0.2149553745985031, "learning_rate": 2.403539477668915e-10, "loss": 0.419, "step": 3551 }, { "epoch": 2.991577765300393, "grad_norm": 0.22793185710906982, "learning_rate": 1.9468699405444936e-10, "loss": 0.4702, "step": 3552 }, { "epoch": 2.9924199887703535, "grad_norm": 0.21543332934379578, "learning_rate": 1.538269702494599e-10, "loss": 0.4682, "step": 3553 }, { "epoch": 2.9932622122403143, "grad_norm": 0.1920558214187622, "learning_rate": 1.1777391563549956e-10, "loss": 0.3955, "step": 3554 }, { "epoch": 2.994104435710275, "grad_norm": 0.21087360382080078, "learning_rate": 8.652786487484133e-11, "loss": 0.4368, "step": 3555 }, { "epoch": 2.994946659180236, "grad_norm": 0.23648805916309357, "learning_rate": 6.008884800845494e-11, "loss": 0.4981, "step": 3556 }, { "epoch": 2.9957888826501966, "grad_norm": 0.20154471695423126, "learning_rate": 3.8456890455451646e-11, "loss": 0.4429, "step": 3557 }, { "epoch": 2.9966311061201574, "grad_norm": 0.19853216409683228, "learning_rate": 2.1632013013084265e-11, "loss": 0.4119, "step": 3558 }, { "epoch": 2.997473329590118, "grad_norm": 0.2058398723602295, "learning_rate": 9.614231857302258e-12, "loss": 0.4608, "step": 3559 }, { "epoch": 2.9983155530600785, "grad_norm": 0.22598962485790253, "learning_rate": 2.403558542196649e-12, "loss": 0.4199, "step": 3560 }, { "epoch": 2.9991577765300392, "grad_norm": 0.20271840691566467, "learning_rate": 0.0, "loss": 0.4545, "step": 3561 }, { "epoch": 2.9991577765300392, "step": 3561, "total_flos": 4618853558419456.0, "train_loss": 0.565116970877539, "train_runtime": 71303.2223, "train_samples_per_second": 4.795, "train_steps_per_second": 0.05 } ], "logging_steps": 1.0, "max_steps": 3561, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 4618853558419456.0, "train_batch_size": 1, "trial_name": null, "trial_params": null }