{ "best_metric": null, "best_model_checkpoint": null, "epoch": 3.0, "eval_steps": 500, "global_step": 8790, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.00034129692832764505, "grad_norm": 8.071894645690918, "learning_rate": 0.0009998862343572241, "loss": 12.1418, "step": 1 }, { "epoch": 0.0006825938566552901, "grad_norm": 5.984577178955078, "learning_rate": 0.0009997724687144482, "loss": 10.9212, "step": 2 }, { "epoch": 0.0010238907849829352, "grad_norm": 5.766450881958008, "learning_rate": 0.0009996587030716723, "loss": 10.3976, "step": 3 }, { "epoch": 0.0013651877133105802, "grad_norm": 4.486149787902832, "learning_rate": 0.0009995449374288964, "loss": 9.6927, "step": 4 }, { "epoch": 0.0017064846416382253, "grad_norm": 4.054387092590332, "learning_rate": 0.0009994311717861205, "loss": 9.4759, "step": 5 }, { "epoch": 0.0020477815699658703, "grad_norm": 4.101088523864746, "learning_rate": 0.0009993174061433449, "loss": 9.2064, "step": 6 }, { "epoch": 0.002389078498293515, "grad_norm": 4.606311798095703, "learning_rate": 0.000999203640500569, "loss": 8.5577, "step": 7 }, { "epoch": 0.0027303754266211604, "grad_norm": 4.251020431518555, "learning_rate": 0.000999089874857793, "loss": 8.6099, "step": 8 }, { "epoch": 0.0030716723549488053, "grad_norm": 4.296706199645996, "learning_rate": 0.0009989761092150172, "loss": 8.6605, "step": 9 }, { "epoch": 0.0034129692832764505, "grad_norm": 4.185203552246094, "learning_rate": 0.000998862343572241, "loss": 8.4278, "step": 10 }, { "epoch": 0.0037542662116040954, "grad_norm": 19.910564422607422, "learning_rate": 0.0009987485779294652, "loss": 6.7563, "step": 11 }, { "epoch": 0.004095563139931741, "grad_norm": 4.908084392547607, "learning_rate": 0.0009986348122866895, "loss": 8.5025, "step": 12 }, { "epoch": 0.004436860068259386, "grad_norm": 4.213999271392822, "learning_rate": 0.0009985210466439136, "loss": 8.3884, "step": 13 }, { "epoch": 0.00477815699658703, "grad_norm": 5.283620834350586, "learning_rate": 0.0009984072810011377, "loss": 7.6757, "step": 14 }, { "epoch": 0.005119453924914676, "grad_norm": 4.11642599105835, "learning_rate": 0.0009982935153583618, "loss": 8.2126, "step": 15 }, { "epoch": 0.005460750853242321, "grad_norm": 3.9162049293518066, "learning_rate": 0.000998179749715586, "loss": 8.1421, "step": 16 }, { "epoch": 0.005802047781569966, "grad_norm": 3.7291955947875977, "learning_rate": 0.00099806598407281, "loss": 8.4332, "step": 17 }, { "epoch": 0.0061433447098976105, "grad_norm": 4.422487258911133, "learning_rate": 0.0009979522184300341, "loss": 7.373, "step": 18 }, { "epoch": 0.006484641638225256, "grad_norm": 3.9871609210968018, "learning_rate": 0.0009978384527872582, "loss": 7.8555, "step": 19 }, { "epoch": 0.006825938566552901, "grad_norm": 4.012209415435791, "learning_rate": 0.0009977246871444823, "loss": 7.9322, "step": 20 }, { "epoch": 0.007167235494880546, "grad_norm": 3.9194388389587402, "learning_rate": 0.0009976109215017064, "loss": 7.5508, "step": 21 }, { "epoch": 0.007508532423208191, "grad_norm": 3.990365743637085, "learning_rate": 0.0009974971558589305, "loss": 7.8027, "step": 22 }, { "epoch": 0.007849829351535836, "grad_norm": 4.236116409301758, "learning_rate": 0.0009973833902161549, "loss": 7.9234, "step": 23 }, { "epoch": 0.008191126279863481, "grad_norm": 9.924948692321777, "learning_rate": 0.000997269624573379, "loss": 7.0817, "step": 24 }, { "epoch": 0.008532423208191127, "grad_norm": 7.816974639892578, "learning_rate": 0.000997155858930603, "loss": 6.7681, "step": 25 }, { "epoch": 0.008873720136518772, "grad_norm": 4.295772075653076, "learning_rate": 0.0009970420932878272, "loss": 7.8656, "step": 26 }, { "epoch": 0.009215017064846417, "grad_norm": 8.219182968139648, "learning_rate": 0.0009969283276450513, "loss": 5.6737, "step": 27 }, { "epoch": 0.00955631399317406, "grad_norm": 6.722366809844971, "learning_rate": 0.0009968145620022754, "loss": 6.44, "step": 28 }, { "epoch": 0.009897610921501706, "grad_norm": 4.0717597007751465, "learning_rate": 0.0009967007963594995, "loss": 7.4495, "step": 29 }, { "epoch": 0.010238907849829351, "grad_norm": 3.9176180362701416, "learning_rate": 0.0009965870307167236, "loss": 7.2357, "step": 30 }, { "epoch": 0.010580204778156996, "grad_norm": 4.101375102996826, "learning_rate": 0.0009964732650739477, "loss": 7.068, "step": 31 }, { "epoch": 0.010921501706484642, "grad_norm": 3.5856902599334717, "learning_rate": 0.0009963594994311718, "loss": 7.6736, "step": 32 }, { "epoch": 0.011262798634812287, "grad_norm": 4.2619194984436035, "learning_rate": 0.000996245733788396, "loss": 7.0183, "step": 33 }, { "epoch": 0.011604095563139932, "grad_norm": 3.7279083728790283, "learning_rate": 0.00099613196814562, "loss": 7.1387, "step": 34 }, { "epoch": 0.011945392491467578, "grad_norm": 3.478987216949463, "learning_rate": 0.0009960182025028441, "loss": 7.4298, "step": 35 }, { "epoch": 0.012286689419795221, "grad_norm": 3.570570707321167, "learning_rate": 0.0009959044368600682, "loss": 7.0718, "step": 36 }, { "epoch": 0.012627986348122866, "grad_norm": 3.5319583415985107, "learning_rate": 0.0009957906712172923, "loss": 7.0964, "step": 37 }, { "epoch": 0.012969283276450512, "grad_norm": 3.5056099891662598, "learning_rate": 0.0009956769055745164, "loss": 7.1669, "step": 38 }, { "epoch": 0.013310580204778157, "grad_norm": 3.48520827293396, "learning_rate": 0.0009955631399317405, "loss": 7.5944, "step": 39 }, { "epoch": 0.013651877133105802, "grad_norm": 3.7257206439971924, "learning_rate": 0.0009954493742889647, "loss": 7.0551, "step": 40 }, { "epoch": 0.013993174061433447, "grad_norm": 4.222928524017334, "learning_rate": 0.000995335608646189, "loss": 6.9363, "step": 41 }, { "epoch": 0.014334470989761093, "grad_norm": 4.023824214935303, "learning_rate": 0.000995221843003413, "loss": 7.2418, "step": 42 }, { "epoch": 0.014675767918088738, "grad_norm": 3.970456600189209, "learning_rate": 0.0009951080773606372, "loss": 6.9394, "step": 43 }, { "epoch": 0.015017064846416382, "grad_norm": 3.697723627090454, "learning_rate": 0.0009949943117178613, "loss": 7.1638, "step": 44 }, { "epoch": 0.015358361774744027, "grad_norm": 9.966476440429688, "learning_rate": 0.0009948805460750854, "loss": 6.5215, "step": 45 }, { "epoch": 0.015699658703071672, "grad_norm": 4.007918357849121, "learning_rate": 0.0009947667804323095, "loss": 7.0861, "step": 46 }, { "epoch": 0.016040955631399317, "grad_norm": 4.113846778869629, "learning_rate": 0.0009946530147895336, "loss": 7.4096, "step": 47 }, { "epoch": 0.016382252559726963, "grad_norm": 3.8716022968292236, "learning_rate": 0.0009945392491467577, "loss": 7.1557, "step": 48 }, { "epoch": 0.016723549488054608, "grad_norm": 3.5497727394104004, "learning_rate": 0.0009944254835039818, "loss": 7.3205, "step": 49 }, { "epoch": 0.017064846416382253, "grad_norm": 3.501861810684204, "learning_rate": 0.000994311717861206, "loss": 7.1354, "step": 50 }, { "epoch": 0.0174061433447099, "grad_norm": 3.8758769035339355, "learning_rate": 0.00099419795221843, "loss": 6.743, "step": 51 }, { "epoch": 0.017747440273037544, "grad_norm": 4.309325218200684, "learning_rate": 0.0009940841865756541, "loss": 6.7807, "step": 52 }, { "epoch": 0.01808873720136519, "grad_norm": 3.619455337524414, "learning_rate": 0.0009939704209328782, "loss": 7.1105, "step": 53 }, { "epoch": 0.018430034129692834, "grad_norm": 3.5727694034576416, "learning_rate": 0.0009938566552901023, "loss": 7.1482, "step": 54 }, { "epoch": 0.01877133105802048, "grad_norm": 11.776286125183105, "learning_rate": 0.0009937428896473264, "loss": 5.6497, "step": 55 }, { "epoch": 0.01911262798634812, "grad_norm": 4.078109264373779, "learning_rate": 0.0009936291240045505, "loss": 7.2197, "step": 56 }, { "epoch": 0.019453924914675767, "grad_norm": 4.522604942321777, "learning_rate": 0.0009935153583617747, "loss": 6.3764, "step": 57 }, { "epoch": 0.019795221843003412, "grad_norm": 3.437112808227539, "learning_rate": 0.000993401592718999, "loss": 6.8425, "step": 58 }, { "epoch": 0.020136518771331057, "grad_norm": 3.7361385822296143, "learning_rate": 0.000993287827076223, "loss": 7.3883, "step": 59 }, { "epoch": 0.020477815699658702, "grad_norm": 3.4807488918304443, "learning_rate": 0.0009931740614334472, "loss": 7.353, "step": 60 }, { "epoch": 0.020819112627986348, "grad_norm": 3.5185515880584717, "learning_rate": 0.0009930602957906713, "loss": 7.1191, "step": 61 }, { "epoch": 0.021160409556313993, "grad_norm": 3.42722749710083, "learning_rate": 0.0009929465301478954, "loss": 7.0395, "step": 62 }, { "epoch": 0.021501706484641638, "grad_norm": 3.5530056953430176, "learning_rate": 0.0009928327645051195, "loss": 7.005, "step": 63 }, { "epoch": 0.021843003412969283, "grad_norm": 3.577638626098633, "learning_rate": 0.0009927189988623436, "loss": 7.1476, "step": 64 }, { "epoch": 0.02218430034129693, "grad_norm": 4.04377555847168, "learning_rate": 0.0009926052332195677, "loss": 7.1881, "step": 65 }, { "epoch": 0.022525597269624574, "grad_norm": 4.380084037780762, "learning_rate": 0.0009924914675767918, "loss": 6.6989, "step": 66 }, { "epoch": 0.02286689419795222, "grad_norm": 4.379724502563477, "learning_rate": 0.000992377701934016, "loss": 7.276, "step": 67 }, { "epoch": 0.023208191126279865, "grad_norm": 3.8870277404785156, "learning_rate": 0.00099226393629124, "loss": 7.2336, "step": 68 }, { "epoch": 0.02354948805460751, "grad_norm": 3.6682233810424805, "learning_rate": 0.0009921501706484641, "loss": 6.7899, "step": 69 }, { "epoch": 0.023890784982935155, "grad_norm": 3.732855796813965, "learning_rate": 0.0009920364050056882, "loss": 6.5689, "step": 70 }, { "epoch": 0.024232081911262797, "grad_norm": 3.7168567180633545, "learning_rate": 0.0009919226393629123, "loss": 6.9181, "step": 71 }, { "epoch": 0.024573378839590442, "grad_norm": 3.8344621658325195, "learning_rate": 0.0009918088737201364, "loss": 7.4924, "step": 72 }, { "epoch": 0.024914675767918087, "grad_norm": 5.3828606605529785, "learning_rate": 0.0009916951080773605, "loss": 6.9739, "step": 73 }, { "epoch": 0.025255972696245733, "grad_norm": 3.8628880977630615, "learning_rate": 0.0009915813424345847, "loss": 7.7034, "step": 74 }, { "epoch": 0.025597269624573378, "grad_norm": 4.138500213623047, "learning_rate": 0.000991467576791809, "loss": 7.1116, "step": 75 }, { "epoch": 0.025938566552901023, "grad_norm": 3.994074821472168, "learning_rate": 0.000991353811149033, "loss": 6.6436, "step": 76 }, { "epoch": 0.02627986348122867, "grad_norm": 3.761909246444702, "learning_rate": 0.0009912400455062572, "loss": 6.9494, "step": 77 }, { "epoch": 0.026621160409556314, "grad_norm": 3.97804594039917, "learning_rate": 0.0009911262798634813, "loss": 6.7642, "step": 78 }, { "epoch": 0.02696245733788396, "grad_norm": 12.43903923034668, "learning_rate": 0.0009910125142207054, "loss": 7.3236, "step": 79 }, { "epoch": 0.027303754266211604, "grad_norm": 4.261969566345215, "learning_rate": 0.0009908987485779295, "loss": 6.7744, "step": 80 }, { "epoch": 0.02764505119453925, "grad_norm": 7.615736484527588, "learning_rate": 0.0009907849829351536, "loss": 6.103, "step": 81 }, { "epoch": 0.027986348122866895, "grad_norm": 3.833221197128296, "learning_rate": 0.0009906712172923777, "loss": 6.8201, "step": 82 }, { "epoch": 0.02832764505119454, "grad_norm": 3.7682363986968994, "learning_rate": 0.0009905574516496018, "loss": 7.2732, "step": 83 }, { "epoch": 0.028668941979522185, "grad_norm": 3.788194179534912, "learning_rate": 0.000990443686006826, "loss": 6.8951, "step": 84 }, { "epoch": 0.02901023890784983, "grad_norm": 3.743475914001465, "learning_rate": 0.00099032992036405, "loss": 7.4113, "step": 85 }, { "epoch": 0.029351535836177476, "grad_norm": 6.027090072631836, "learning_rate": 0.0009902161547212743, "loss": 6.7511, "step": 86 }, { "epoch": 0.029692832764505118, "grad_norm": 11.415129661560059, "learning_rate": 0.0009901023890784985, "loss": 6.9999, "step": 87 }, { "epoch": 0.030034129692832763, "grad_norm": 3.9674999713897705, "learning_rate": 0.0009899886234357223, "loss": 7.0998, "step": 88 }, { "epoch": 0.03037542662116041, "grad_norm": 2.7847208976745605, "learning_rate": 0.0009898748577929464, "loss": 3.5909, "step": 89 }, { "epoch": 0.030716723549488054, "grad_norm": 4.207309722900391, "learning_rate": 0.0009897610921501705, "loss": 6.5897, "step": 90 }, { "epoch": 0.0310580204778157, "grad_norm": 3.6647539138793945, "learning_rate": 0.0009896473265073947, "loss": 7.064, "step": 91 }, { "epoch": 0.031399317406143344, "grad_norm": 3.664642572402954, "learning_rate": 0.000989533560864619, "loss": 6.9888, "step": 92 }, { "epoch": 0.03174061433447099, "grad_norm": 3.5732812881469727, "learning_rate": 0.000989419795221843, "loss": 6.7395, "step": 93 }, { "epoch": 0.032081911262798635, "grad_norm": 3.5871763229370117, "learning_rate": 0.0009893060295790672, "loss": 7.214, "step": 94 }, { "epoch": 0.032423208191126277, "grad_norm": 7.773305892944336, "learning_rate": 0.0009891922639362913, "loss": 6.2924, "step": 95 }, { "epoch": 0.032764505119453925, "grad_norm": 4.1082563400268555, "learning_rate": 0.0009890784982935154, "loss": 7.0112, "step": 96 }, { "epoch": 0.03310580204778157, "grad_norm": 3.709672212600708, "learning_rate": 0.0009889647326507395, "loss": 6.6266, "step": 97 }, { "epoch": 0.033447098976109216, "grad_norm": 5.293731212615967, "learning_rate": 0.0009888509670079636, "loss": 5.1656, "step": 98 }, { "epoch": 0.03378839590443686, "grad_norm": 3.9541916847229004, "learning_rate": 0.0009887372013651877, "loss": 7.2169, "step": 99 }, { "epoch": 0.034129692832764506, "grad_norm": 4.045575141906738, "learning_rate": 0.0009886234357224118, "loss": 6.9998, "step": 100 }, { "epoch": 0.03447098976109215, "grad_norm": 3.9652297496795654, "learning_rate": 0.000988509670079636, "loss": 7.1183, "step": 101 }, { "epoch": 0.0348122866894198, "grad_norm": 3.934983730316162, "learning_rate": 0.00098839590443686, "loss": 7.133, "step": 102 }, { "epoch": 0.03515358361774744, "grad_norm": 3.6193904876708984, "learning_rate": 0.0009882821387940843, "loss": 7.1528, "step": 103 }, { "epoch": 0.03549488054607509, "grad_norm": 3.532212018966675, "learning_rate": 0.0009881683731513085, "loss": 7.3217, "step": 104 }, { "epoch": 0.03583617747440273, "grad_norm": 5.649550437927246, "learning_rate": 0.0009880546075085326, "loss": 6.6923, "step": 105 }, { "epoch": 0.03617747440273038, "grad_norm": 3.867431402206421, "learning_rate": 0.0009879408418657567, "loss": 6.7362, "step": 106 }, { "epoch": 0.03651877133105802, "grad_norm": 3.762444019317627, "learning_rate": 0.0009878270762229806, "loss": 7.6837, "step": 107 }, { "epoch": 0.03686006825938567, "grad_norm": 3.932126522064209, "learning_rate": 0.0009877133105802047, "loss": 7.0018, "step": 108 }, { "epoch": 0.03720136518771331, "grad_norm": 5.752960205078125, "learning_rate": 0.000987599544937429, "loss": 6.8083, "step": 109 }, { "epoch": 0.03754266211604096, "grad_norm": 3.6438000202178955, "learning_rate": 0.000987485779294653, "loss": 6.7968, "step": 110 }, { "epoch": 0.0378839590443686, "grad_norm": 3.9477944374084473, "learning_rate": 0.0009873720136518772, "loss": 6.9717, "step": 111 }, { "epoch": 0.03822525597269624, "grad_norm": 3.776455879211426, "learning_rate": 0.0009872582480091013, "loss": 6.5417, "step": 112 }, { "epoch": 0.03856655290102389, "grad_norm": 4.05007791519165, "learning_rate": 0.0009871444823663254, "loss": 7.0061, "step": 113 }, { "epoch": 0.03890784982935153, "grad_norm": 3.8773951530456543, "learning_rate": 0.0009870307167235495, "loss": 7.1258, "step": 114 }, { "epoch": 0.03924914675767918, "grad_norm": 3.6618783473968506, "learning_rate": 0.0009869169510807736, "loss": 6.6578, "step": 115 }, { "epoch": 0.039590443686006824, "grad_norm": 3.705451726913452, "learning_rate": 0.0009868031854379977, "loss": 7.15, "step": 116 }, { "epoch": 0.03993174061433447, "grad_norm": 3.6103193759918213, "learning_rate": 0.0009866894197952218, "loss": 7.2358, "step": 117 }, { "epoch": 0.040273037542662114, "grad_norm": 3.8052639961242676, "learning_rate": 0.000986575654152446, "loss": 6.7413, "step": 118 }, { "epoch": 0.04061433447098976, "grad_norm": 3.7359206676483154, "learning_rate": 0.00098646188850967, "loss": 6.8834, "step": 119 }, { "epoch": 0.040955631399317405, "grad_norm": 3.7333805561065674, "learning_rate": 0.0009863481228668941, "loss": 6.7141, "step": 120 }, { "epoch": 0.041296928327645054, "grad_norm": 3.592845916748047, "learning_rate": 0.0009862343572241185, "loss": 6.9167, "step": 121 }, { "epoch": 0.041638225255972695, "grad_norm": 3.8049752712249756, "learning_rate": 0.0009861205915813426, "loss": 7.1732, "step": 122 }, { "epoch": 0.041979522184300344, "grad_norm": 3.7017672061920166, "learning_rate": 0.0009860068259385667, "loss": 7.1409, "step": 123 }, { "epoch": 0.042320819112627986, "grad_norm": 3.581944704055786, "learning_rate": 0.0009858930602957908, "loss": 7.401, "step": 124 }, { "epoch": 0.042662116040955635, "grad_norm": 3.60552716255188, "learning_rate": 0.0009857792946530149, "loss": 7.1492, "step": 125 }, { "epoch": 0.043003412969283276, "grad_norm": 3.5588576793670654, "learning_rate": 0.000985665529010239, "loss": 7.2996, "step": 126 }, { "epoch": 0.04334470989761092, "grad_norm": 3.566678524017334, "learning_rate": 0.000985551763367463, "loss": 6.7319, "step": 127 }, { "epoch": 0.04368600682593857, "grad_norm": 5.839478492736816, "learning_rate": 0.0009854379977246872, "loss": 6.2529, "step": 128 }, { "epoch": 0.04402730375426621, "grad_norm": 5.420695781707764, "learning_rate": 0.0009853242320819113, "loss": 6.3413, "step": 129 }, { "epoch": 0.04436860068259386, "grad_norm": 4.142327785491943, "learning_rate": 0.0009852104664391354, "loss": 7.3027, "step": 130 }, { "epoch": 0.0447098976109215, "grad_norm": 3.7694830894470215, "learning_rate": 0.0009850967007963595, "loss": 6.5612, "step": 131 }, { "epoch": 0.04505119453924915, "grad_norm": 3.7021262645721436, "learning_rate": 0.0009849829351535836, "loss": 7.1486, "step": 132 }, { "epoch": 0.04539249146757679, "grad_norm": 3.4137067794799805, "learning_rate": 0.0009848691695108077, "loss": 7.3512, "step": 133 }, { "epoch": 0.04573378839590444, "grad_norm": 3.9189088344573975, "learning_rate": 0.0009847554038680318, "loss": 6.877, "step": 134 }, { "epoch": 0.04607508532423208, "grad_norm": 3.598252534866333, "learning_rate": 0.000984641638225256, "loss": 7.1513, "step": 135 }, { "epoch": 0.04641638225255973, "grad_norm": 3.869384527206421, "learning_rate": 0.00098452787258248, "loss": 6.7868, "step": 136 }, { "epoch": 0.04675767918088737, "grad_norm": 4.416536808013916, "learning_rate": 0.0009844141069397041, "loss": 6.3465, "step": 137 }, { "epoch": 0.04709897610921502, "grad_norm": 3.654963493347168, "learning_rate": 0.0009843003412969285, "loss": 7.4472, "step": 138 }, { "epoch": 0.04744027303754266, "grad_norm": 3.6647439002990723, "learning_rate": 0.0009841865756541526, "loss": 7.0507, "step": 139 }, { "epoch": 0.04778156996587031, "grad_norm": 3.4522414207458496, "learning_rate": 0.0009840728100113767, "loss": 7.1764, "step": 140 }, { "epoch": 0.04812286689419795, "grad_norm": 3.3954365253448486, "learning_rate": 0.0009839590443686008, "loss": 6.9876, "step": 141 }, { "epoch": 0.048464163822525594, "grad_norm": 3.482323169708252, "learning_rate": 0.0009838452787258249, "loss": 6.5746, "step": 142 }, { "epoch": 0.04880546075085324, "grad_norm": 3.477513551712036, "learning_rate": 0.000983731513083049, "loss": 7.2068, "step": 143 }, { "epoch": 0.049146757679180884, "grad_norm": 3.6850838661193848, "learning_rate": 0.000983617747440273, "loss": 6.8805, "step": 144 }, { "epoch": 0.04948805460750853, "grad_norm": 6.7132673263549805, "learning_rate": 0.0009835039817974972, "loss": 6.5654, "step": 145 }, { "epoch": 0.049829351535836175, "grad_norm": 3.8716394901275635, "learning_rate": 0.0009833902161547213, "loss": 6.9398, "step": 146 }, { "epoch": 0.050170648464163824, "grad_norm": 3.562126874923706, "learning_rate": 0.0009832764505119454, "loss": 6.804, "step": 147 }, { "epoch": 0.050511945392491465, "grad_norm": 3.6806352138519287, "learning_rate": 0.0009831626848691695, "loss": 7.0423, "step": 148 }, { "epoch": 0.050853242320819114, "grad_norm": 5.191007614135742, "learning_rate": 0.0009830489192263936, "loss": 5.9842, "step": 149 }, { "epoch": 0.051194539249146756, "grad_norm": 3.838003396987915, "learning_rate": 0.0009829351535836177, "loss": 7.0365, "step": 150 }, { "epoch": 0.051535836177474405, "grad_norm": 4.209732532501221, "learning_rate": 0.0009828213879408418, "loss": 6.7704, "step": 151 }, { "epoch": 0.05187713310580205, "grad_norm": 3.973942995071411, "learning_rate": 0.000982707622298066, "loss": 6.5295, "step": 152 }, { "epoch": 0.052218430034129695, "grad_norm": 3.3907649517059326, "learning_rate": 0.00098259385665529, "loss": 7.0298, "step": 153 }, { "epoch": 0.05255972696245734, "grad_norm": 3.6388776302337646, "learning_rate": 0.0009824800910125141, "loss": 6.7746, "step": 154 }, { "epoch": 0.052901023890784986, "grad_norm": 3.418466091156006, "learning_rate": 0.0009823663253697385, "loss": 6.9274, "step": 155 }, { "epoch": 0.05324232081911263, "grad_norm": 4.016181945800781, "learning_rate": 0.0009822525597269626, "loss": 6.647, "step": 156 }, { "epoch": 0.053583617747440276, "grad_norm": 3.4281997680664062, "learning_rate": 0.0009821387940841867, "loss": 6.594, "step": 157 }, { "epoch": 0.05392491467576792, "grad_norm": 3.6327078342437744, "learning_rate": 0.0009820250284414108, "loss": 6.8371, "step": 158 }, { "epoch": 0.05426621160409556, "grad_norm": 3.937331438064575, "learning_rate": 0.0009819112627986349, "loss": 6.6783, "step": 159 }, { "epoch": 0.05460750853242321, "grad_norm": 5.26201057434082, "learning_rate": 0.000981797497155859, "loss": 5.6406, "step": 160 }, { "epoch": 0.05494880546075085, "grad_norm": 3.728435516357422, "learning_rate": 0.000981683731513083, "loss": 7.1284, "step": 161 }, { "epoch": 0.0552901023890785, "grad_norm": 3.79921555519104, "learning_rate": 0.0009815699658703072, "loss": 6.7235, "step": 162 }, { "epoch": 0.05563139931740614, "grad_norm": 3.5397472381591797, "learning_rate": 0.0009814562002275313, "loss": 6.6777, "step": 163 }, { "epoch": 0.05597269624573379, "grad_norm": 3.5530202388763428, "learning_rate": 0.0009813424345847554, "loss": 6.4635, "step": 164 }, { "epoch": 0.05631399317406143, "grad_norm": 4.582598686218262, "learning_rate": 0.0009812286689419795, "loss": 6.3199, "step": 165 }, { "epoch": 0.05665529010238908, "grad_norm": 4.460880279541016, "learning_rate": 0.0009811149032992036, "loss": 5.5054, "step": 166 }, { "epoch": 0.05699658703071672, "grad_norm": 3.812800168991089, "learning_rate": 0.0009810011376564277, "loss": 6.8298, "step": 167 }, { "epoch": 0.05733788395904437, "grad_norm": 3.747919797897339, "learning_rate": 0.0009808873720136518, "loss": 6.7751, "step": 168 }, { "epoch": 0.05767918088737201, "grad_norm": 6.039458274841309, "learning_rate": 0.000980773606370876, "loss": 5.7825, "step": 169 }, { "epoch": 0.05802047781569966, "grad_norm": 3.8591084480285645, "learning_rate": 0.0009806598407281, "loss": 6.8238, "step": 170 }, { "epoch": 0.0583617747440273, "grad_norm": 3.8271124362945557, "learning_rate": 0.0009805460750853241, "loss": 6.3032, "step": 171 }, { "epoch": 0.05870307167235495, "grad_norm": 3.730949640274048, "learning_rate": 0.0009804323094425485, "loss": 6.9856, "step": 172 }, { "epoch": 0.059044368600682594, "grad_norm": 5.848387718200684, "learning_rate": 0.0009803185437997726, "loss": 5.8503, "step": 173 }, { "epoch": 0.059385665529010236, "grad_norm": 5.451254844665527, "learning_rate": 0.0009802047781569967, "loss": 6.6104, "step": 174 }, { "epoch": 0.059726962457337884, "grad_norm": 4.356447219848633, "learning_rate": 0.0009800910125142208, "loss": 7.2519, "step": 175 }, { "epoch": 0.060068259385665526, "grad_norm": 3.5853893756866455, "learning_rate": 0.0009799772468714449, "loss": 6.8436, "step": 176 }, { "epoch": 0.060409556313993175, "grad_norm": 3.9135236740112305, "learning_rate": 0.000979863481228669, "loss": 6.7005, "step": 177 }, { "epoch": 0.06075085324232082, "grad_norm": 3.59726881980896, "learning_rate": 0.000979749715585893, "loss": 6.747, "step": 178 }, { "epoch": 0.061092150170648465, "grad_norm": 4.7263593673706055, "learning_rate": 0.0009796359499431172, "loss": 5.5305, "step": 179 }, { "epoch": 0.06143344709897611, "grad_norm": 3.808228015899658, "learning_rate": 0.0009795221843003413, "loss": 7.0182, "step": 180 }, { "epoch": 0.061774744027303756, "grad_norm": 3.7145042419433594, "learning_rate": 0.0009794084186575654, "loss": 6.8614, "step": 181 }, { "epoch": 0.0621160409556314, "grad_norm": 3.560530424118042, "learning_rate": 0.0009792946530147895, "loss": 6.8768, "step": 182 }, { "epoch": 0.062457337883959047, "grad_norm": 3.480214834213257, "learning_rate": 0.0009791808873720136, "loss": 6.9845, "step": 183 }, { "epoch": 0.06279863481228669, "grad_norm": 3.4854085445404053, "learning_rate": 0.000979067121729238, "loss": 6.9272, "step": 184 }, { "epoch": 0.06313993174061433, "grad_norm": 3.636730432510376, "learning_rate": 0.0009789533560864618, "loss": 6.84, "step": 185 }, { "epoch": 0.06348122866894199, "grad_norm": 3.543924331665039, "learning_rate": 0.000978839590443686, "loss": 6.9787, "step": 186 }, { "epoch": 0.06382252559726963, "grad_norm": 3.629248857498169, "learning_rate": 0.00097872582480091, "loss": 6.7263, "step": 187 }, { "epoch": 0.06416382252559727, "grad_norm": 3.5820744037628174, "learning_rate": 0.0009786120591581341, "loss": 6.6454, "step": 188 }, { "epoch": 0.06450511945392491, "grad_norm": 6.293400764465332, "learning_rate": 0.0009784982935153585, "loss": 4.73, "step": 189 }, { "epoch": 0.06484641638225255, "grad_norm": 6.925587177276611, "learning_rate": 0.0009783845278725826, "loss": 4.6797, "step": 190 }, { "epoch": 0.06518771331058021, "grad_norm": 4.270597457885742, "learning_rate": 0.0009782707622298067, "loss": 6.9059, "step": 191 }, { "epoch": 0.06552901023890785, "grad_norm": 3.773348808288574, "learning_rate": 0.0009781569965870308, "loss": 7.0965, "step": 192 }, { "epoch": 0.06587030716723549, "grad_norm": 3.767024517059326, "learning_rate": 0.0009780432309442549, "loss": 6.7144, "step": 193 }, { "epoch": 0.06621160409556313, "grad_norm": 3.9505908489227295, "learning_rate": 0.000977929465301479, "loss": 6.8606, "step": 194 }, { "epoch": 0.06655290102389079, "grad_norm": 3.617802381515503, "learning_rate": 0.000977815699658703, "loss": 6.7616, "step": 195 }, { "epoch": 0.06689419795221843, "grad_norm": 3.433324098587036, "learning_rate": 0.0009777019340159272, "loss": 6.7047, "step": 196 }, { "epoch": 0.06723549488054607, "grad_norm": 3.5975823402404785, "learning_rate": 0.0009775881683731513, "loss": 6.5858, "step": 197 }, { "epoch": 0.06757679180887372, "grad_norm": 3.548548936843872, "learning_rate": 0.0009774744027303754, "loss": 7.1213, "step": 198 }, { "epoch": 0.06791808873720137, "grad_norm": 3.8470911979675293, "learning_rate": 0.0009773606370875995, "loss": 6.3278, "step": 199 }, { "epoch": 0.06825938566552901, "grad_norm": 3.527210235595703, "learning_rate": 0.0009772468714448236, "loss": 6.8947, "step": 200 }, { "epoch": 0.06860068259385665, "grad_norm": 3.4403419494628906, "learning_rate": 0.000977133105802048, "loss": 6.738, "step": 201 }, { "epoch": 0.0689419795221843, "grad_norm": 3.733076810836792, "learning_rate": 0.000977019340159272, "loss": 6.9729, "step": 202 }, { "epoch": 0.06928327645051195, "grad_norm": 3.4491777420043945, "learning_rate": 0.0009769055745164961, "loss": 7.0452, "step": 203 }, { "epoch": 0.0696245733788396, "grad_norm": 12.765624046325684, "learning_rate": 0.0009767918088737202, "loss": 6.6967, "step": 204 }, { "epoch": 0.06996587030716724, "grad_norm": 4.026791095733643, "learning_rate": 0.0009766780432309441, "loss": 6.9201, "step": 205 }, { "epoch": 0.07030716723549488, "grad_norm": 6.290328502655029, "learning_rate": 0.0009765642775881682, "loss": 6.3519, "step": 206 }, { "epoch": 0.07064846416382252, "grad_norm": 3.867109775543213, "learning_rate": 0.0009764505119453925, "loss": 7.309, "step": 207 }, { "epoch": 0.07098976109215017, "grad_norm": 3.9246108531951904, "learning_rate": 0.0009763367463026166, "loss": 6.7805, "step": 208 }, { "epoch": 0.07133105802047782, "grad_norm": 3.378211498260498, "learning_rate": 0.0009762229806598408, "loss": 7.007, "step": 209 }, { "epoch": 0.07167235494880546, "grad_norm": 3.3665435314178467, "learning_rate": 0.0009761092150170649, "loss": 6.7892, "step": 210 }, { "epoch": 0.0720136518771331, "grad_norm": 3.457585096359253, "learning_rate": 0.000975995449374289, "loss": 6.8816, "step": 211 }, { "epoch": 0.07235494880546076, "grad_norm": 3.859353542327881, "learning_rate": 0.0009758816837315131, "loss": 5.9202, "step": 212 }, { "epoch": 0.0726962457337884, "grad_norm": 5.0818190574646, "learning_rate": 0.0009757679180887372, "loss": 6.0906, "step": 213 }, { "epoch": 0.07303754266211604, "grad_norm": 3.940415620803833, "learning_rate": 0.0009756541524459613, "loss": 7.1606, "step": 214 }, { "epoch": 0.07337883959044368, "grad_norm": 9.891190528869629, "learning_rate": 0.0009755403868031855, "loss": 4.2678, "step": 215 }, { "epoch": 0.07372013651877134, "grad_norm": 4.019010543823242, "learning_rate": 0.0009754266211604096, "loss": 6.3178, "step": 216 }, { "epoch": 0.07406143344709898, "grad_norm": 3.7409043312072754, "learning_rate": 0.0009753128555176337, "loss": 6.9846, "step": 217 }, { "epoch": 0.07440273037542662, "grad_norm": 3.7472164630889893, "learning_rate": 0.0009751990898748578, "loss": 6.3952, "step": 218 }, { "epoch": 0.07474402730375426, "grad_norm": 3.981301784515381, "learning_rate": 0.0009750853242320819, "loss": 6.888, "step": 219 }, { "epoch": 0.07508532423208192, "grad_norm": 7.142719268798828, "learning_rate": 0.000974971558589306, "loss": 6.4133, "step": 220 }, { "epoch": 0.07542662116040956, "grad_norm": 3.712938070297241, "learning_rate": 0.0009748577929465302, "loss": 7.2119, "step": 221 }, { "epoch": 0.0757679180887372, "grad_norm": 3.7851550579071045, "learning_rate": 0.0009747440273037544, "loss": 6.5805, "step": 222 }, { "epoch": 0.07610921501706484, "grad_norm": 3.5392749309539795, "learning_rate": 0.0009746302616609785, "loss": 6.6098, "step": 223 }, { "epoch": 0.07645051194539249, "grad_norm": 4.032394886016846, "learning_rate": 0.0009745164960182025, "loss": 6.7128, "step": 224 }, { "epoch": 0.07679180887372014, "grad_norm": 3.6618125438690186, "learning_rate": 0.0009744027303754266, "loss": 6.6606, "step": 225 }, { "epoch": 0.07713310580204778, "grad_norm": 3.458958148956299, "learning_rate": 0.0009742889647326507, "loss": 6.8682, "step": 226 }, { "epoch": 0.07747440273037542, "grad_norm": 4.641584396362305, "learning_rate": 0.0009741751990898749, "loss": 6.1076, "step": 227 }, { "epoch": 0.07781569965870307, "grad_norm": 3.7606310844421387, "learning_rate": 0.000974061433447099, "loss": 6.9889, "step": 228 }, { "epoch": 0.07815699658703072, "grad_norm": 3.448821783065796, "learning_rate": 0.0009739476678043231, "loss": 6.9313, "step": 229 }, { "epoch": 0.07849829351535836, "grad_norm": 3.49123477935791, "learning_rate": 0.0009738339021615472, "loss": 6.9526, "step": 230 }, { "epoch": 0.078839590443686, "grad_norm": 3.5688812732696533, "learning_rate": 0.0009737201365187713, "loss": 6.5122, "step": 231 }, { "epoch": 0.07918088737201365, "grad_norm": 3.416281223297119, "learning_rate": 0.0009736063708759955, "loss": 6.7054, "step": 232 }, { "epoch": 0.0795221843003413, "grad_norm": 3.3496880531311035, "learning_rate": 0.0009734926052332196, "loss": 6.6948, "step": 233 }, { "epoch": 0.07986348122866894, "grad_norm": 3.9867589473724365, "learning_rate": 0.0009733788395904437, "loss": 6.8821, "step": 234 }, { "epoch": 0.08020477815699659, "grad_norm": 3.6225593090057373, "learning_rate": 0.0009732650739476678, "loss": 6.706, "step": 235 }, { "epoch": 0.08054607508532423, "grad_norm": 3.7255892753601074, "learning_rate": 0.0009731513083048919, "loss": 7.0775, "step": 236 }, { "epoch": 0.08088737201365187, "grad_norm": 8.664634704589844, "learning_rate": 0.000973037542662116, "loss": 5.4271, "step": 237 }, { "epoch": 0.08122866894197953, "grad_norm": 4.9100117683410645, "learning_rate": 0.0009729237770193402, "loss": 6.4401, "step": 238 }, { "epoch": 0.08156996587030717, "grad_norm": 3.7028720378875732, "learning_rate": 0.0009728100113765644, "loss": 6.3939, "step": 239 }, { "epoch": 0.08191126279863481, "grad_norm": 3.713730573654175, "learning_rate": 0.0009726962457337885, "loss": 6.8395, "step": 240 }, { "epoch": 0.08225255972696245, "grad_norm": 3.7303857803344727, "learning_rate": 0.0009725824800910126, "loss": 6.5482, "step": 241 }, { "epoch": 0.08259385665529011, "grad_norm": 3.4762961864471436, "learning_rate": 0.0009724687144482367, "loss": 7.0664, "step": 242 }, { "epoch": 0.08293515358361775, "grad_norm": 3.4974660873413086, "learning_rate": 0.0009723549488054608, "loss": 6.7011, "step": 243 }, { "epoch": 0.08327645051194539, "grad_norm": 3.515742540359497, "learning_rate": 0.0009722411831626849, "loss": 7.2582, "step": 244 }, { "epoch": 0.08361774744027303, "grad_norm": 3.654632806777954, "learning_rate": 0.000972127417519909, "loss": 6.6722, "step": 245 }, { "epoch": 0.08395904436860069, "grad_norm": 3.580826759338379, "learning_rate": 0.0009720136518771331, "loss": 6.6658, "step": 246 }, { "epoch": 0.08430034129692833, "grad_norm": 3.44158673286438, "learning_rate": 0.0009718998862343572, "loss": 6.6069, "step": 247 }, { "epoch": 0.08464163822525597, "grad_norm": 3.856389045715332, "learning_rate": 0.0009717861205915813, "loss": 6.7927, "step": 248 }, { "epoch": 0.08498293515358361, "grad_norm": 3.6192235946655273, "learning_rate": 0.0009716723549488055, "loss": 6.9655, "step": 249 }, { "epoch": 0.08532423208191127, "grad_norm": 3.7248499393463135, "learning_rate": 0.0009715585893060296, "loss": 7.0733, "step": 250 }, { "epoch": 0.08566552901023891, "grad_norm": 3.485734701156616, "learning_rate": 0.0009714448236632537, "loss": 6.7725, "step": 251 }, { "epoch": 0.08600682593856655, "grad_norm": 3.442128896713257, "learning_rate": 0.0009713310580204778, "loss": 6.8307, "step": 252 }, { "epoch": 0.0863481228668942, "grad_norm": 4.0273308753967285, "learning_rate": 0.0009712172923777019, "loss": 6.2919, "step": 253 }, { "epoch": 0.08668941979522184, "grad_norm": 3.7515664100646973, "learning_rate": 0.000971103526734926, "loss": 6.7646, "step": 254 }, { "epoch": 0.08703071672354949, "grad_norm": 3.6255035400390625, "learning_rate": 0.0009709897610921503, "loss": 7.1922, "step": 255 }, { "epoch": 0.08737201365187713, "grad_norm": 3.9632773399353027, "learning_rate": 0.0009708759954493744, "loss": 6.6199, "step": 256 }, { "epoch": 0.08771331058020478, "grad_norm": 3.5362298488616943, "learning_rate": 0.0009707622298065985, "loss": 6.9201, "step": 257 }, { "epoch": 0.08805460750853242, "grad_norm": 3.4530086517333984, "learning_rate": 0.0009706484641638226, "loss": 6.8544, "step": 258 }, { "epoch": 0.08839590443686007, "grad_norm": 3.586812973022461, "learning_rate": 0.0009705346985210467, "loss": 6.6807, "step": 259 }, { "epoch": 0.08873720136518772, "grad_norm": 3.553516387939453, "learning_rate": 0.0009704209328782708, "loss": 7.0369, "step": 260 }, { "epoch": 0.08907849829351536, "grad_norm": 3.6874141693115234, "learning_rate": 0.000970307167235495, "loss": 6.7392, "step": 261 }, { "epoch": 0.089419795221843, "grad_norm": 3.5124759674072266, "learning_rate": 0.0009701934015927191, "loss": 6.6912, "step": 262 }, { "epoch": 0.08976109215017065, "grad_norm": 3.7072601318359375, "learning_rate": 0.0009700796359499431, "loss": 6.5064, "step": 263 }, { "epoch": 0.0901023890784983, "grad_norm": 4.079402446746826, "learning_rate": 0.0009699658703071672, "loss": 6.487, "step": 264 }, { "epoch": 0.09044368600682594, "grad_norm": 3.6425037384033203, "learning_rate": 0.0009698521046643913, "loss": 6.7205, "step": 265 }, { "epoch": 0.09078498293515358, "grad_norm": 3.5927422046661377, "learning_rate": 0.0009697383390216154, "loss": 6.6607, "step": 266 }, { "epoch": 0.09112627986348124, "grad_norm": 3.606675624847412, "learning_rate": 0.0009696245733788396, "loss": 7.0384, "step": 267 }, { "epoch": 0.09146757679180888, "grad_norm": 4.241934776306152, "learning_rate": 0.0009695108077360637, "loss": 6.8748, "step": 268 }, { "epoch": 0.09180887372013652, "grad_norm": 3.801959276199341, "learning_rate": 0.0009693970420932878, "loss": 7.1313, "step": 269 }, { "epoch": 0.09215017064846416, "grad_norm": 3.597132921218872, "learning_rate": 0.0009692832764505119, "loss": 6.954, "step": 270 }, { "epoch": 0.0924914675767918, "grad_norm": 3.9115519523620605, "learning_rate": 0.000969169510807736, "loss": 6.5418, "step": 271 }, { "epoch": 0.09283276450511946, "grad_norm": 4.029146671295166, "learning_rate": 0.0009690557451649603, "loss": 6.626, "step": 272 }, { "epoch": 0.0931740614334471, "grad_norm": 3.7334775924682617, "learning_rate": 0.0009689419795221844, "loss": 6.3663, "step": 273 }, { "epoch": 0.09351535836177474, "grad_norm": 3.6482112407684326, "learning_rate": 0.0009688282138794085, "loss": 6.9092, "step": 274 }, { "epoch": 0.09385665529010238, "grad_norm": 8.627721786499023, "learning_rate": 0.0009687144482366326, "loss": 5.9086, "step": 275 }, { "epoch": 0.09419795221843004, "grad_norm": 3.9426357746124268, "learning_rate": 0.0009686006825938567, "loss": 7.169, "step": 276 }, { "epoch": 0.09453924914675768, "grad_norm": 4.135240077972412, "learning_rate": 0.0009684869169510808, "loss": 6.7811, "step": 277 }, { "epoch": 0.09488054607508532, "grad_norm": 4.837364673614502, "learning_rate": 0.000968373151308305, "loss": 6.8342, "step": 278 }, { "epoch": 0.09522184300341296, "grad_norm": 3.7553555965423584, "learning_rate": 0.0009682593856655291, "loss": 6.4767, "step": 279 }, { "epoch": 0.09556313993174062, "grad_norm": 3.947141647338867, "learning_rate": 0.0009681456200227532, "loss": 6.2992, "step": 280 }, { "epoch": 0.09590443686006826, "grad_norm": 4.0404486656188965, "learning_rate": 0.0009680318543799773, "loss": 6.9428, "step": 281 }, { "epoch": 0.0962457337883959, "grad_norm": 3.603900194168091, "learning_rate": 0.0009679180887372013, "loss": 7.2719, "step": 282 }, { "epoch": 0.09658703071672355, "grad_norm": 5.454912185668945, "learning_rate": 0.0009678043230944254, "loss": 6.1625, "step": 283 }, { "epoch": 0.09692832764505119, "grad_norm": 4.6887736320495605, "learning_rate": 0.0009676905574516496, "loss": 4.2823, "step": 284 }, { "epoch": 0.09726962457337884, "grad_norm": 4.27936315536499, "learning_rate": 0.0009675767918088737, "loss": 6.8959, "step": 285 }, { "epoch": 0.09761092150170649, "grad_norm": 3.739509105682373, "learning_rate": 0.0009674630261660978, "loss": 7.2816, "step": 286 }, { "epoch": 0.09795221843003413, "grad_norm": 3.4555413722991943, "learning_rate": 0.0009673492605233219, "loss": 7.0285, "step": 287 }, { "epoch": 0.09829351535836177, "grad_norm": 3.6354432106018066, "learning_rate": 0.000967235494880546, "loss": 6.5368, "step": 288 }, { "epoch": 0.09863481228668942, "grad_norm": 3.558715343475342, "learning_rate": 0.0009671217292377701, "loss": 7.1075, "step": 289 }, { "epoch": 0.09897610921501707, "grad_norm": 9.740402221679688, "learning_rate": 0.0009670079635949944, "loss": 6.4126, "step": 290 }, { "epoch": 0.09931740614334471, "grad_norm": 3.963900089263916, "learning_rate": 0.0009668941979522185, "loss": 6.8788, "step": 291 }, { "epoch": 0.09965870307167235, "grad_norm": 3.8590564727783203, "learning_rate": 0.0009667804323094426, "loss": 7.1995, "step": 292 }, { "epoch": 0.1, "grad_norm": 3.5563805103302, "learning_rate": 0.0009666666666666667, "loss": 7.3654, "step": 293 }, { "epoch": 0.10034129692832765, "grad_norm": 3.6024012565612793, "learning_rate": 0.0009665529010238908, "loss": 6.7006, "step": 294 }, { "epoch": 0.10068259385665529, "grad_norm": 3.463623523712158, "learning_rate": 0.000966439135381115, "loss": 6.8185, "step": 295 }, { "epoch": 0.10102389078498293, "grad_norm": 5.106781005859375, "learning_rate": 0.0009663253697383391, "loss": 5.5867, "step": 296 }, { "epoch": 0.10136518771331059, "grad_norm": 3.8914577960968018, "learning_rate": 0.0009662116040955632, "loss": 7.6307, "step": 297 }, { "epoch": 0.10170648464163823, "grad_norm": 4.023569583892822, "learning_rate": 0.0009660978384527873, "loss": 6.4419, "step": 298 }, { "epoch": 0.10204778156996587, "grad_norm": 4.755953788757324, "learning_rate": 0.0009659840728100114, "loss": 6.7173, "step": 299 }, { "epoch": 0.10238907849829351, "grad_norm": 3.9473185539245605, "learning_rate": 0.0009658703071672355, "loss": 4.9165, "step": 300 }, { "epoch": 0.10273037542662115, "grad_norm": 4.2228684425354, "learning_rate": 0.0009657565415244597, "loss": 6.7526, "step": 301 }, { "epoch": 0.10307167235494881, "grad_norm": 3.5894923210144043, "learning_rate": 0.0009656427758816837, "loss": 7.1576, "step": 302 }, { "epoch": 0.10341296928327645, "grad_norm": 4.090893745422363, "learning_rate": 0.0009655290102389078, "loss": 6.4097, "step": 303 }, { "epoch": 0.1037542662116041, "grad_norm": 3.7067759037017822, "learning_rate": 0.0009654152445961319, "loss": 7.3378, "step": 304 }, { "epoch": 0.10409556313993173, "grad_norm": 3.431490182876587, "learning_rate": 0.000965301478953356, "loss": 7.1981, "step": 305 }, { "epoch": 0.10443686006825939, "grad_norm": 3.6670467853546143, "learning_rate": 0.0009651877133105801, "loss": 6.5654, "step": 306 }, { "epoch": 0.10477815699658703, "grad_norm": 3.554365634918213, "learning_rate": 0.0009650739476678044, "loss": 6.481, "step": 307 }, { "epoch": 0.10511945392491467, "grad_norm": 3.5014779567718506, "learning_rate": 0.0009649601820250285, "loss": 6.8385, "step": 308 }, { "epoch": 0.10546075085324232, "grad_norm": 10.873976707458496, "learning_rate": 0.0009648464163822526, "loss": 6.5274, "step": 309 }, { "epoch": 0.10580204778156997, "grad_norm": 3.9372551441192627, "learning_rate": 0.0009647326507394767, "loss": 7.2976, "step": 310 }, { "epoch": 0.10614334470989761, "grad_norm": 3.7556705474853516, "learning_rate": 0.0009646188850967008, "loss": 7.2823, "step": 311 }, { "epoch": 0.10648464163822526, "grad_norm": 5.166213512420654, "learning_rate": 0.000964505119453925, "loss": 6.1935, "step": 312 }, { "epoch": 0.1068259385665529, "grad_norm": 4.041624546051025, "learning_rate": 0.0009643913538111491, "loss": 7.1861, "step": 313 }, { "epoch": 0.10716723549488055, "grad_norm": 4.318609714508057, "learning_rate": 0.0009642775881683732, "loss": 6.4683, "step": 314 }, { "epoch": 0.1075085324232082, "grad_norm": 3.769338607788086, "learning_rate": 0.0009641638225255973, "loss": 6.7583, "step": 315 }, { "epoch": 0.10784982935153584, "grad_norm": 3.6845760345458984, "learning_rate": 0.0009640500568828214, "loss": 6.8184, "step": 316 }, { "epoch": 0.10819112627986348, "grad_norm": 3.5176687240600586, "learning_rate": 0.0009639362912400455, "loss": 6.7214, "step": 317 }, { "epoch": 0.10853242320819112, "grad_norm": 3.728588581085205, "learning_rate": 0.0009638225255972697, "loss": 7.0418, "step": 318 }, { "epoch": 0.10887372013651878, "grad_norm": 3.9855856895446777, "learning_rate": 0.0009637087599544938, "loss": 6.6959, "step": 319 }, { "epoch": 0.10921501706484642, "grad_norm": 4.362178802490234, "learning_rate": 0.0009635949943117179, "loss": 6.0196, "step": 320 }, { "epoch": 0.10955631399317406, "grad_norm": 5.292174816131592, "learning_rate": 0.0009634812286689419, "loss": 5.8822, "step": 321 }, { "epoch": 0.1098976109215017, "grad_norm": 4.121539115905762, "learning_rate": 0.000963367463026166, "loss": 7.0859, "step": 322 }, { "epoch": 0.11023890784982936, "grad_norm": 4.680109024047852, "learning_rate": 0.0009632536973833901, "loss": 6.4441, "step": 323 }, { "epoch": 0.110580204778157, "grad_norm": 3.6892054080963135, "learning_rate": 0.0009631399317406144, "loss": 7.0459, "step": 324 }, { "epoch": 0.11092150170648464, "grad_norm": 3.639540672302246, "learning_rate": 0.0009630261660978385, "loss": 7.1954, "step": 325 }, { "epoch": 0.11126279863481228, "grad_norm": 3.4080095291137695, "learning_rate": 0.0009629124004550626, "loss": 7.0554, "step": 326 }, { "epoch": 0.11160409556313994, "grad_norm": 3.4967737197875977, "learning_rate": 0.0009627986348122867, "loss": 7.17, "step": 327 }, { "epoch": 0.11194539249146758, "grad_norm": 3.8794071674346924, "learning_rate": 0.0009626848691695108, "loss": 6.759, "step": 328 }, { "epoch": 0.11228668941979522, "grad_norm": 3.408348321914673, "learning_rate": 0.0009625711035267349, "loss": 6.7917, "step": 329 }, { "epoch": 0.11262798634812286, "grad_norm": 3.614760637283325, "learning_rate": 0.0009624573378839591, "loss": 6.6399, "step": 330 }, { "epoch": 0.1129692832764505, "grad_norm": 3.6130752563476562, "learning_rate": 0.0009623435722411832, "loss": 6.8108, "step": 331 }, { "epoch": 0.11331058020477816, "grad_norm": 3.7819409370422363, "learning_rate": 0.0009622298065984073, "loss": 7.3848, "step": 332 }, { "epoch": 0.1136518771331058, "grad_norm": 3.626450300216675, "learning_rate": 0.0009621160409556314, "loss": 7.0753, "step": 333 }, { "epoch": 0.11399317406143344, "grad_norm": 3.5669236183166504, "learning_rate": 0.0009620022753128555, "loss": 6.5975, "step": 334 }, { "epoch": 0.11433447098976109, "grad_norm": 3.4628782272338867, "learning_rate": 0.0009618885096700797, "loss": 6.7684, "step": 335 }, { "epoch": 0.11467576791808874, "grad_norm": 7.344601154327393, "learning_rate": 0.0009617747440273038, "loss": 6.0625, "step": 336 }, { "epoch": 0.11501706484641638, "grad_norm": 3.9138948917388916, "learning_rate": 0.0009616609783845279, "loss": 6.9233, "step": 337 }, { "epoch": 0.11535836177474403, "grad_norm": 3.7263455390930176, "learning_rate": 0.000961547212741752, "loss": 7.1069, "step": 338 }, { "epoch": 0.11569965870307167, "grad_norm": 3.638993740081787, "learning_rate": 0.0009614334470989762, "loss": 6.5446, "step": 339 }, { "epoch": 0.11604095563139932, "grad_norm": 3.507223129272461, "learning_rate": 0.0009613196814562003, "loss": 6.8566, "step": 340 }, { "epoch": 0.11638225255972696, "grad_norm": 3.5542092323303223, "learning_rate": 0.0009612059158134244, "loss": 7.013, "step": 341 }, { "epoch": 0.1167235494880546, "grad_norm": 3.579059600830078, "learning_rate": 0.0009610921501706485, "loss": 7.4816, "step": 342 }, { "epoch": 0.11706484641638225, "grad_norm": 3.462669849395752, "learning_rate": 0.0009609783845278726, "loss": 6.7983, "step": 343 }, { "epoch": 0.1174061433447099, "grad_norm": 3.597429037094116, "learning_rate": 0.0009608646188850967, "loss": 6.9345, "step": 344 }, { "epoch": 0.11774744027303755, "grad_norm": 15.800237655639648, "learning_rate": 0.0009607508532423208, "loss": 6.5234, "step": 345 }, { "epoch": 0.11808873720136519, "grad_norm": 4.462514400482178, "learning_rate": 0.0009606370875995449, "loss": 7.2368, "step": 346 }, { "epoch": 0.11843003412969283, "grad_norm": 4.498167514801025, "learning_rate": 0.0009605233219567691, "loss": 6.6885, "step": 347 }, { "epoch": 0.11877133105802047, "grad_norm": 4.717837810516357, "learning_rate": 0.0009604095563139932, "loss": 5.435, "step": 348 }, { "epoch": 0.11911262798634813, "grad_norm": 3.9620487689971924, "learning_rate": 0.0009602957906712173, "loss": 7.6661, "step": 349 }, { "epoch": 0.11945392491467577, "grad_norm": 3.7153236865997314, "learning_rate": 0.0009601820250284414, "loss": 6.6377, "step": 350 }, { "epoch": 0.11979522184300341, "grad_norm": 4.2678632736206055, "learning_rate": 0.0009600682593856655, "loss": 6.5929, "step": 351 }, { "epoch": 0.12013651877133105, "grad_norm": 3.488931894302368, "learning_rate": 0.0009599544937428897, "loss": 6.7701, "step": 352 }, { "epoch": 0.12047781569965871, "grad_norm": 3.7950785160064697, "learning_rate": 0.0009598407281001138, "loss": 6.3283, "step": 353 }, { "epoch": 0.12081911262798635, "grad_norm": 3.611812114715576, "learning_rate": 0.0009597269624573379, "loss": 7.0418, "step": 354 }, { "epoch": 0.12116040955631399, "grad_norm": 3.4913434982299805, "learning_rate": 0.000959613196814562, "loss": 7.1764, "step": 355 }, { "epoch": 0.12150170648464163, "grad_norm": 3.7057766914367676, "learning_rate": 0.0009594994311717862, "loss": 6.6347, "step": 356 }, { "epoch": 0.12184300341296929, "grad_norm": 3.8991594314575195, "learning_rate": 0.0009593856655290103, "loss": 6.7696, "step": 357 }, { "epoch": 0.12218430034129693, "grad_norm": 3.863154172897339, "learning_rate": 0.0009592718998862345, "loss": 7.317, "step": 358 }, { "epoch": 0.12252559726962457, "grad_norm": 3.521376848220825, "learning_rate": 0.0009591581342434586, "loss": 7.5248, "step": 359 }, { "epoch": 0.12286689419795221, "grad_norm": 3.6891329288482666, "learning_rate": 0.0009590443686006826, "loss": 6.9287, "step": 360 }, { "epoch": 0.12320819112627987, "grad_norm": 3.7071328163146973, "learning_rate": 0.0009589306029579067, "loss": 7.086, "step": 361 }, { "epoch": 0.12354948805460751, "grad_norm": 5.102900981903076, "learning_rate": 0.0009588168373151308, "loss": 6.361, "step": 362 }, { "epoch": 0.12389078498293515, "grad_norm": 4.640048503875732, "learning_rate": 0.0009587030716723549, "loss": 5.9508, "step": 363 }, { "epoch": 0.1242320819112628, "grad_norm": 7.667442321777344, "learning_rate": 0.0009585893060295791, "loss": 7.4653, "step": 364 }, { "epoch": 0.12457337883959044, "grad_norm": 4.129282474517822, "learning_rate": 0.0009584755403868032, "loss": 7.1397, "step": 365 }, { "epoch": 0.12491467576791809, "grad_norm": 3.7277729511260986, "learning_rate": 0.0009583617747440273, "loss": 6.2993, "step": 366 }, { "epoch": 0.12525597269624572, "grad_norm": 3.472531795501709, "learning_rate": 0.0009582480091012514, "loss": 6.7378, "step": 367 }, { "epoch": 0.12559726962457338, "grad_norm": 4.953517913818359, "learning_rate": 0.0009581342434584755, "loss": 6.3897, "step": 368 }, { "epoch": 0.12593856655290103, "grad_norm": 3.745403289794922, "learning_rate": 0.0009580204778156996, "loss": 6.8016, "step": 369 }, { "epoch": 0.12627986348122866, "grad_norm": 3.9581098556518555, "learning_rate": 0.0009579067121729238, "loss": 6.9358, "step": 370 }, { "epoch": 0.12662116040955632, "grad_norm": 3.588125467300415, "learning_rate": 0.0009577929465301479, "loss": 6.5954, "step": 371 }, { "epoch": 0.12696245733788397, "grad_norm": 3.592097282409668, "learning_rate": 0.000957679180887372, "loss": 7.2236, "step": 372 }, { "epoch": 0.1273037542662116, "grad_norm": 3.409364700317383, "learning_rate": 0.0009575654152445962, "loss": 7.0846, "step": 373 }, { "epoch": 0.12764505119453926, "grad_norm": 4.143648147583008, "learning_rate": 0.0009574516496018203, "loss": 6.6434, "step": 374 }, { "epoch": 0.12798634812286688, "grad_norm": 4.170565128326416, "learning_rate": 0.0009573378839590445, "loss": 6.5485, "step": 375 }, { "epoch": 0.12832764505119454, "grad_norm": 3.7150259017944336, "learning_rate": 0.0009572241183162686, "loss": 6.7944, "step": 376 }, { "epoch": 0.1286689419795222, "grad_norm": 9.187422752380371, "learning_rate": 0.0009571103526734927, "loss": 5.1412, "step": 377 }, { "epoch": 0.12901023890784982, "grad_norm": 3.895047187805176, "learning_rate": 0.0009569965870307168, "loss": 6.4227, "step": 378 }, { "epoch": 0.12935153583617748, "grad_norm": 3.7599642276763916, "learning_rate": 0.0009568828213879409, "loss": 6.9761, "step": 379 }, { "epoch": 0.1296928327645051, "grad_norm": 3.6376614570617676, "learning_rate": 0.0009567690557451649, "loss": 7.0387, "step": 380 }, { "epoch": 0.13003412969283276, "grad_norm": 3.673825263977051, "learning_rate": 0.0009566552901023891, "loss": 6.6699, "step": 381 }, { "epoch": 0.13037542662116042, "grad_norm": 3.4586920738220215, "learning_rate": 0.0009565415244596132, "loss": 6.8109, "step": 382 }, { "epoch": 0.13071672354948805, "grad_norm": 3.3592073917388916, "learning_rate": 0.0009564277588168373, "loss": 7.0395, "step": 383 }, { "epoch": 0.1310580204778157, "grad_norm": 3.409188985824585, "learning_rate": 0.0009563139931740614, "loss": 6.7086, "step": 384 }, { "epoch": 0.13139931740614336, "grad_norm": 3.571364164352417, "learning_rate": 0.0009562002275312855, "loss": 7.0613, "step": 385 }, { "epoch": 0.13174061433447098, "grad_norm": 3.6313486099243164, "learning_rate": 0.0009560864618885096, "loss": 6.8737, "step": 386 }, { "epoch": 0.13208191126279864, "grad_norm": 5.386178970336914, "learning_rate": 0.0009559726962457338, "loss": 6.8494, "step": 387 }, { "epoch": 0.13242320819112627, "grad_norm": 3.736448287963867, "learning_rate": 0.0009558589306029579, "loss": 6.805, "step": 388 }, { "epoch": 0.13276450511945392, "grad_norm": 4.280710697174072, "learning_rate": 0.000955745164960182, "loss": 6.5263, "step": 389 }, { "epoch": 0.13310580204778158, "grad_norm": 3.7368850708007812, "learning_rate": 0.0009556313993174062, "loss": 6.8247, "step": 390 }, { "epoch": 0.1334470989761092, "grad_norm": 3.579983711242676, "learning_rate": 0.0009555176336746303, "loss": 7.1728, "step": 391 }, { "epoch": 0.13378839590443686, "grad_norm": 3.969280242919922, "learning_rate": 0.0009554038680318545, "loss": 6.2745, "step": 392 }, { "epoch": 0.13412969283276452, "grad_norm": 3.6488418579101562, "learning_rate": 0.0009552901023890786, "loss": 7.0, "step": 393 }, { "epoch": 0.13447098976109215, "grad_norm": 3.3817570209503174, "learning_rate": 0.0009551763367463027, "loss": 6.2211, "step": 394 }, { "epoch": 0.1348122866894198, "grad_norm": 3.5612990856170654, "learning_rate": 0.0009550625711035268, "loss": 6.9262, "step": 395 }, { "epoch": 0.13515358361774743, "grad_norm": 3.475904703140259, "learning_rate": 0.0009549488054607509, "loss": 7.1462, "step": 396 }, { "epoch": 0.13549488054607509, "grad_norm": 3.9977428913116455, "learning_rate": 0.000954835039817975, "loss": 6.8524, "step": 397 }, { "epoch": 0.13583617747440274, "grad_norm": 3.5686051845550537, "learning_rate": 0.0009547212741751992, "loss": 7.1196, "step": 398 }, { "epoch": 0.13617747440273037, "grad_norm": 3.616010904312134, "learning_rate": 0.0009546075085324232, "loss": 6.8889, "step": 399 }, { "epoch": 0.13651877133105803, "grad_norm": 3.5153353214263916, "learning_rate": 0.0009544937428896473, "loss": 7.0783, "step": 400 }, { "epoch": 0.13686006825938565, "grad_norm": 3.46384596824646, "learning_rate": 0.0009543799772468714, "loss": 7.2027, "step": 401 }, { "epoch": 0.1372013651877133, "grad_norm": 3.6632463932037354, "learning_rate": 0.0009542662116040955, "loss": 6.8579, "step": 402 }, { "epoch": 0.13754266211604096, "grad_norm": 3.654392719268799, "learning_rate": 0.0009541524459613196, "loss": 6.9971, "step": 403 }, { "epoch": 0.1378839590443686, "grad_norm": 3.7651548385620117, "learning_rate": 0.0009540386803185438, "loss": 6.9558, "step": 404 }, { "epoch": 0.13822525597269625, "grad_norm": 3.590897560119629, "learning_rate": 0.0009539249146757679, "loss": 6.9441, "step": 405 }, { "epoch": 0.1385665529010239, "grad_norm": 3.5330758094787598, "learning_rate": 0.000953811149032992, "loss": 7.3093, "step": 406 }, { "epoch": 0.13890784982935153, "grad_norm": 3.9493212699890137, "learning_rate": 0.0009536973833902162, "loss": 6.1789, "step": 407 }, { "epoch": 0.1392491467576792, "grad_norm": 3.6030216217041016, "learning_rate": 0.0009535836177474403, "loss": 6.7268, "step": 408 }, { "epoch": 0.13959044368600682, "grad_norm": 3.7132041454315186, "learning_rate": 0.0009534698521046644, "loss": 6.8771, "step": 409 }, { "epoch": 0.13993174061433447, "grad_norm": 3.7219643592834473, "learning_rate": 0.0009533560864618886, "loss": 6.7741, "step": 410 }, { "epoch": 0.14027303754266213, "grad_norm": 3.658827066421509, "learning_rate": 0.0009532423208191127, "loss": 7.0821, "step": 411 }, { "epoch": 0.14061433447098975, "grad_norm": 3.503436326980591, "learning_rate": 0.0009531285551763368, "loss": 7.0266, "step": 412 }, { "epoch": 0.1409556313993174, "grad_norm": 3.4202098846435547, "learning_rate": 0.0009530147895335609, "loss": 6.7716, "step": 413 }, { "epoch": 0.14129692832764504, "grad_norm": 3.497220993041992, "learning_rate": 0.000952901023890785, "loss": 7.1303, "step": 414 }, { "epoch": 0.1416382252559727, "grad_norm": 3.7073585987091064, "learning_rate": 0.0009527872582480092, "loss": 6.9418, "step": 415 }, { "epoch": 0.14197952218430035, "grad_norm": 3.451277256011963, "learning_rate": 0.0009526734926052333, "loss": 6.9495, "step": 416 }, { "epoch": 0.14232081911262798, "grad_norm": 3.564136505126953, "learning_rate": 0.0009525597269624574, "loss": 6.8177, "step": 417 }, { "epoch": 0.14266211604095563, "grad_norm": 4.293824672698975, "learning_rate": 0.0009524459613196815, "loss": 7.1537, "step": 418 }, { "epoch": 0.1430034129692833, "grad_norm": 4.002580165863037, "learning_rate": 0.0009523321956769055, "loss": 7.0574, "step": 419 }, { "epoch": 0.14334470989761092, "grad_norm": 3.631091833114624, "learning_rate": 0.0009522184300341296, "loss": 6.552, "step": 420 }, { "epoch": 0.14368600682593857, "grad_norm": 3.5741636753082275, "learning_rate": 0.0009521046643913538, "loss": 6.7028, "step": 421 }, { "epoch": 0.1440273037542662, "grad_norm": 3.8410189151763916, "learning_rate": 0.0009519908987485779, "loss": 7.0462, "step": 422 }, { "epoch": 0.14436860068259386, "grad_norm": 3.5977299213409424, "learning_rate": 0.000951877133105802, "loss": 7.1202, "step": 423 }, { "epoch": 0.1447098976109215, "grad_norm": 3.541287899017334, "learning_rate": 0.0009517633674630262, "loss": 6.6661, "step": 424 }, { "epoch": 0.14505119453924914, "grad_norm": 3.6210668087005615, "learning_rate": 0.0009516496018202503, "loss": 7.1838, "step": 425 }, { "epoch": 0.1453924914675768, "grad_norm": 4.462888717651367, "learning_rate": 0.0009515358361774744, "loss": 5.8725, "step": 426 }, { "epoch": 0.14573378839590442, "grad_norm": 3.566049814224243, "learning_rate": 0.0009514220705346986, "loss": 6.7938, "step": 427 }, { "epoch": 0.14607508532423208, "grad_norm": 3.4675862789154053, "learning_rate": 0.0009513083048919227, "loss": 6.5886, "step": 428 }, { "epoch": 0.14641638225255973, "grad_norm": 3.4165515899658203, "learning_rate": 0.0009511945392491468, "loss": 6.9825, "step": 429 }, { "epoch": 0.14675767918088736, "grad_norm": 3.5711846351623535, "learning_rate": 0.0009510807736063709, "loss": 6.9881, "step": 430 }, { "epoch": 0.14709897610921502, "grad_norm": 3.523361921310425, "learning_rate": 0.000950967007963595, "loss": 6.6169, "step": 431 }, { "epoch": 0.14744027303754267, "grad_norm": 3.6487302780151367, "learning_rate": 0.0009508532423208191, "loss": 7.0033, "step": 432 }, { "epoch": 0.1477815699658703, "grad_norm": 4.30921745300293, "learning_rate": 0.0009507394766780433, "loss": 6.4524, "step": 433 }, { "epoch": 0.14812286689419796, "grad_norm": 3.7454891204833984, "learning_rate": 0.0009506257110352674, "loss": 6.6826, "step": 434 }, { "epoch": 0.14846416382252559, "grad_norm": 3.676464796066284, "learning_rate": 0.0009505119453924915, "loss": 6.8758, "step": 435 }, { "epoch": 0.14880546075085324, "grad_norm": 3.738009452819824, "learning_rate": 0.0009503981797497156, "loss": 6.9021, "step": 436 }, { "epoch": 0.1491467576791809, "grad_norm": 3.388455629348755, "learning_rate": 0.0009502844141069397, "loss": 6.7852, "step": 437 }, { "epoch": 0.14948805460750852, "grad_norm": 5.163676738739014, "learning_rate": 0.0009501706484641638, "loss": 6.4292, "step": 438 }, { "epoch": 0.14982935153583618, "grad_norm": 3.6834332942962646, "learning_rate": 0.0009500568828213879, "loss": 6.8296, "step": 439 }, { "epoch": 0.15017064846416384, "grad_norm": 3.6888158321380615, "learning_rate": 0.000949943117178612, "loss": 6.7824, "step": 440 }, { "epoch": 0.15051194539249146, "grad_norm": 3.5720479488372803, "learning_rate": 0.0009498293515358362, "loss": 6.973, "step": 441 }, { "epoch": 0.15085324232081912, "grad_norm": 3.5570552349090576, "learning_rate": 0.0009497155858930603, "loss": 6.264, "step": 442 }, { "epoch": 0.15119453924914675, "grad_norm": 3.610084056854248, "learning_rate": 0.0009496018202502844, "loss": 6.2526, "step": 443 }, { "epoch": 0.1515358361774744, "grad_norm": 3.445772647857666, "learning_rate": 0.0009494880546075086, "loss": 6.888, "step": 444 }, { "epoch": 0.15187713310580206, "grad_norm": 3.443215847015381, "learning_rate": 0.0009493742889647327, "loss": 7.2213, "step": 445 }, { "epoch": 0.1522184300341297, "grad_norm": 3.4744131565093994, "learning_rate": 0.0009492605233219568, "loss": 6.1855, "step": 446 }, { "epoch": 0.15255972696245734, "grad_norm": 3.616422176361084, "learning_rate": 0.0009491467576791809, "loss": 6.7537, "step": 447 }, { "epoch": 0.15290102389078497, "grad_norm": 3.427217483520508, "learning_rate": 0.000949032992036405, "loss": 6.5635, "step": 448 }, { "epoch": 0.15324232081911263, "grad_norm": 3.655622720718384, "learning_rate": 0.0009489192263936291, "loss": 6.4325, "step": 449 }, { "epoch": 0.15358361774744028, "grad_norm": 3.9160969257354736, "learning_rate": 0.0009488054607508533, "loss": 6.4151, "step": 450 }, { "epoch": 0.1539249146757679, "grad_norm": 4.333205223083496, "learning_rate": 0.0009486916951080774, "loss": 6.0077, "step": 451 }, { "epoch": 0.15426621160409557, "grad_norm": 3.7891650199890137, "learning_rate": 0.0009485779294653015, "loss": 6.3463, "step": 452 }, { "epoch": 0.15460750853242322, "grad_norm": 3.907245635986328, "learning_rate": 0.0009484641638225256, "loss": 5.503, "step": 453 }, { "epoch": 0.15494880546075085, "grad_norm": 3.9546966552734375, "learning_rate": 0.0009483503981797497, "loss": 6.6111, "step": 454 }, { "epoch": 0.1552901023890785, "grad_norm": 3.712440252304077, "learning_rate": 0.000948236632536974, "loss": 6.6064, "step": 455 }, { "epoch": 0.15563139931740613, "grad_norm": 3.5256028175354004, "learning_rate": 0.0009481228668941981, "loss": 6.9209, "step": 456 }, { "epoch": 0.1559726962457338, "grad_norm": 4.240937232971191, "learning_rate": 0.0009480091012514222, "loss": 6.3623, "step": 457 }, { "epoch": 0.15631399317406144, "grad_norm": 3.398322820663452, "learning_rate": 0.0009478953356086462, "loss": 6.574, "step": 458 }, { "epoch": 0.15665529010238907, "grad_norm": 3.7059226036071777, "learning_rate": 0.0009477815699658703, "loss": 6.9253, "step": 459 }, { "epoch": 0.15699658703071673, "grad_norm": 4.11593770980835, "learning_rate": 0.0009476678043230944, "loss": 6.4204, "step": 460 }, { "epoch": 0.15733788395904436, "grad_norm": 3.5810751914978027, "learning_rate": 0.0009475540386803186, "loss": 6.8083, "step": 461 }, { "epoch": 0.157679180887372, "grad_norm": 3.538257360458374, "learning_rate": 0.0009474402730375427, "loss": 7.1334, "step": 462 }, { "epoch": 0.15802047781569967, "grad_norm": 3.456045627593994, "learning_rate": 0.0009473265073947668, "loss": 7.0928, "step": 463 }, { "epoch": 0.1583617747440273, "grad_norm": 3.6278865337371826, "learning_rate": 0.0009472127417519909, "loss": 6.8821, "step": 464 }, { "epoch": 0.15870307167235495, "grad_norm": 3.6520791053771973, "learning_rate": 0.000947098976109215, "loss": 6.5972, "step": 465 }, { "epoch": 0.1590443686006826, "grad_norm": 3.5707054138183594, "learning_rate": 0.0009469852104664391, "loss": 7.1507, "step": 466 }, { "epoch": 0.15938566552901023, "grad_norm": 3.7022809982299805, "learning_rate": 0.0009468714448236633, "loss": 6.5752, "step": 467 }, { "epoch": 0.1597269624573379, "grad_norm": 3.426180839538574, "learning_rate": 0.0009467576791808874, "loss": 6.6443, "step": 468 }, { "epoch": 0.16006825938566552, "grad_norm": 3.5428926944732666, "learning_rate": 0.0009466439135381115, "loss": 6.5234, "step": 469 }, { "epoch": 0.16040955631399317, "grad_norm": 3.7080366611480713, "learning_rate": 0.0009465301478953356, "loss": 7.1171, "step": 470 }, { "epoch": 0.16075085324232083, "grad_norm": 3.589245080947876, "learning_rate": 0.0009464163822525597, "loss": 6.8297, "step": 471 }, { "epoch": 0.16109215017064846, "grad_norm": 3.5839226245880127, "learning_rate": 0.0009463026166097838, "loss": 6.6766, "step": 472 }, { "epoch": 0.1614334470989761, "grad_norm": 3.460501194000244, "learning_rate": 0.0009461888509670081, "loss": 6.9474, "step": 473 }, { "epoch": 0.16177474402730374, "grad_norm": 3.6447677612304688, "learning_rate": 0.0009460750853242322, "loss": 7.2087, "step": 474 }, { "epoch": 0.1621160409556314, "grad_norm": 6.186277866363525, "learning_rate": 0.0009459613196814563, "loss": 5.5688, "step": 475 }, { "epoch": 0.16245733788395905, "grad_norm": 3.7447400093078613, "learning_rate": 0.0009458475540386804, "loss": 6.5, "step": 476 }, { "epoch": 0.16279863481228668, "grad_norm": 3.6991195678710938, "learning_rate": 0.0009457337883959044, "loss": 6.7563, "step": 477 }, { "epoch": 0.16313993174061434, "grad_norm": 3.4451916217803955, "learning_rate": 0.0009456200227531286, "loss": 7.0189, "step": 478 }, { "epoch": 0.163481228668942, "grad_norm": 17.707456588745117, "learning_rate": 0.0009455062571103527, "loss": 5.872, "step": 479 }, { "epoch": 0.16382252559726962, "grad_norm": 4.85144567489624, "learning_rate": 0.0009453924914675768, "loss": 6.0977, "step": 480 }, { "epoch": 0.16416382252559727, "grad_norm": 3.997605085372925, "learning_rate": 0.0009452787258248009, "loss": 7.2862, "step": 481 }, { "epoch": 0.1645051194539249, "grad_norm": 4.731887340545654, "learning_rate": 0.000945164960182025, "loss": 7.1271, "step": 482 }, { "epoch": 0.16484641638225256, "grad_norm": 3.7682464122772217, "learning_rate": 0.0009450511945392491, "loss": 7.1285, "step": 483 }, { "epoch": 0.16518771331058021, "grad_norm": 3.5759470462799072, "learning_rate": 0.0009449374288964733, "loss": 7.0162, "step": 484 }, { "epoch": 0.16552901023890784, "grad_norm": 3.409883499145508, "learning_rate": 0.0009448236632536974, "loss": 7.237, "step": 485 }, { "epoch": 0.1658703071672355, "grad_norm": 3.6246159076690674, "learning_rate": 0.0009447098976109215, "loss": 6.7235, "step": 486 }, { "epoch": 0.16621160409556315, "grad_norm": 3.5993361473083496, "learning_rate": 0.0009445961319681456, "loss": 6.7617, "step": 487 }, { "epoch": 0.16655290102389078, "grad_norm": 3.4608912467956543, "learning_rate": 0.0009444823663253697, "loss": 6.7189, "step": 488 }, { "epoch": 0.16689419795221844, "grad_norm": 4.949087619781494, "learning_rate": 0.0009443686006825938, "loss": 6.1499, "step": 489 }, { "epoch": 0.16723549488054607, "grad_norm": 3.6994855403900146, "learning_rate": 0.0009442548350398181, "loss": 7.1528, "step": 490 }, { "epoch": 0.16757679180887372, "grad_norm": 3.868940591812134, "learning_rate": 0.0009441410693970422, "loss": 7.0506, "step": 491 }, { "epoch": 0.16791808873720138, "grad_norm": 3.722750663757324, "learning_rate": 0.0009440273037542663, "loss": 7.0174, "step": 492 }, { "epoch": 0.168259385665529, "grad_norm": 7.372501373291016, "learning_rate": 0.0009439135381114904, "loss": 5.1276, "step": 493 }, { "epoch": 0.16860068259385666, "grad_norm": 5.211971282958984, "learning_rate": 0.0009437997724687145, "loss": 6.1224, "step": 494 }, { "epoch": 0.1689419795221843, "grad_norm": 3.662050485610962, "learning_rate": 0.0009436860068259387, "loss": 6.9951, "step": 495 }, { "epoch": 0.16928327645051194, "grad_norm": 3.8950295448303223, "learning_rate": 0.0009435722411831627, "loss": 6.9776, "step": 496 }, { "epoch": 0.1696245733788396, "grad_norm": 3.697416067123413, "learning_rate": 0.0009434584755403868, "loss": 6.5273, "step": 497 }, { "epoch": 0.16996587030716723, "grad_norm": 3.4824891090393066, "learning_rate": 0.0009433447098976109, "loss": 6.9454, "step": 498 }, { "epoch": 0.17030716723549488, "grad_norm": 3.859316825866699, "learning_rate": 0.000943230944254835, "loss": 5.7186, "step": 499 }, { "epoch": 0.17064846416382254, "grad_norm": 8.799308776855469, "learning_rate": 0.0009431171786120591, "loss": 6.3941, "step": 500 }, { "epoch": 0.17098976109215017, "grad_norm": 3.745943069458008, "learning_rate": 0.0009430034129692833, "loss": 7.0287, "step": 501 }, { "epoch": 0.17133105802047782, "grad_norm": 3.7031750679016113, "learning_rate": 0.0009428896473265074, "loss": 6.6921, "step": 502 }, { "epoch": 0.17167235494880545, "grad_norm": 3.5170814990997314, "learning_rate": 0.0009427758816837315, "loss": 7.0773, "step": 503 }, { "epoch": 0.1720136518771331, "grad_norm": 3.4774396419525146, "learning_rate": 0.0009426621160409556, "loss": 6.9226, "step": 504 }, { "epoch": 0.17235494880546076, "grad_norm": 3.5223052501678467, "learning_rate": 0.0009425483503981797, "loss": 6.7229, "step": 505 }, { "epoch": 0.1726962457337884, "grad_norm": 4.429044723510742, "learning_rate": 0.0009424345847554038, "loss": 6.1489, "step": 506 }, { "epoch": 0.17303754266211605, "grad_norm": 3.946762800216675, "learning_rate": 0.0009423208191126281, "loss": 6.3177, "step": 507 }, { "epoch": 0.17337883959044367, "grad_norm": 4.3716278076171875, "learning_rate": 0.0009422070534698522, "loss": 7.4142, "step": 508 }, { "epoch": 0.17372013651877133, "grad_norm": 4.144838809967041, "learning_rate": 0.0009420932878270763, "loss": 6.5615, "step": 509 }, { "epoch": 0.17406143344709898, "grad_norm": 3.442716121673584, "learning_rate": 0.0009419795221843004, "loss": 7.1919, "step": 510 }, { "epoch": 0.1744027303754266, "grad_norm": 3.616173505783081, "learning_rate": 0.0009418657565415245, "loss": 7.0364, "step": 511 }, { "epoch": 0.17474402730375427, "grad_norm": 3.2651257514953613, "learning_rate": 0.0009417519908987486, "loss": 7.1869, "step": 512 }, { "epoch": 0.17508532423208192, "grad_norm": 3.429654836654663, "learning_rate": 0.0009416382252559728, "loss": 7.3759, "step": 513 }, { "epoch": 0.17542662116040955, "grad_norm": 3.8366029262542725, "learning_rate": 0.0009415244596131969, "loss": 6.5422, "step": 514 }, { "epoch": 0.1757679180887372, "grad_norm": 6.700514793395996, "learning_rate": 0.000941410693970421, "loss": 5.9699, "step": 515 }, { "epoch": 0.17610921501706484, "grad_norm": 3.9318792819976807, "learning_rate": 0.000941296928327645, "loss": 7.1496, "step": 516 }, { "epoch": 0.1764505119453925, "grad_norm": 3.7701475620269775, "learning_rate": 0.0009411831626848691, "loss": 7.1363, "step": 517 }, { "epoch": 0.17679180887372015, "grad_norm": 3.5054924488067627, "learning_rate": 0.0009410693970420933, "loss": 6.9795, "step": 518 }, { "epoch": 0.17713310580204777, "grad_norm": 3.7713727951049805, "learning_rate": 0.0009409556313993174, "loss": 6.6172, "step": 519 }, { "epoch": 0.17747440273037543, "grad_norm": 8.710354804992676, "learning_rate": 0.0009408418657565415, "loss": 6.0337, "step": 520 }, { "epoch": 0.17781569965870306, "grad_norm": 4.916598320007324, "learning_rate": 0.0009407281001137656, "loss": 6.0356, "step": 521 }, { "epoch": 0.1781569965870307, "grad_norm": 3.8350720405578613, "learning_rate": 0.0009406143344709897, "loss": 7.201, "step": 522 }, { "epoch": 0.17849829351535837, "grad_norm": 3.7248024940490723, "learning_rate": 0.0009405005688282138, "loss": 6.5138, "step": 523 }, { "epoch": 0.178839590443686, "grad_norm": 3.730102062225342, "learning_rate": 0.0009403868031854381, "loss": 6.3692, "step": 524 }, { "epoch": 0.17918088737201365, "grad_norm": 3.4296181201934814, "learning_rate": 0.0009402730375426622, "loss": 6.7243, "step": 525 }, { "epoch": 0.1795221843003413, "grad_norm": 3.4603865146636963, "learning_rate": 0.0009401592718998863, "loss": 7.2087, "step": 526 }, { "epoch": 0.17986348122866894, "grad_norm": 3.4255411624908447, "learning_rate": 0.0009400455062571104, "loss": 6.6606, "step": 527 }, { "epoch": 0.1802047781569966, "grad_norm": 4.901156902313232, "learning_rate": 0.0009399317406143345, "loss": 6.4545, "step": 528 }, { "epoch": 0.18054607508532422, "grad_norm": 3.5273005962371826, "learning_rate": 0.0009398179749715586, "loss": 6.8521, "step": 529 }, { "epoch": 0.18088737201365188, "grad_norm": 3.794410467147827, "learning_rate": 0.0009397042093287828, "loss": 6.5531, "step": 530 }, { "epoch": 0.18122866894197953, "grad_norm": 3.8027398586273193, "learning_rate": 0.0009395904436860069, "loss": 6.9897, "step": 531 }, { "epoch": 0.18156996587030716, "grad_norm": 3.6400845050811768, "learning_rate": 0.000939476678043231, "loss": 6.4599, "step": 532 }, { "epoch": 0.18191126279863482, "grad_norm": 3.6795458793640137, "learning_rate": 0.0009393629124004551, "loss": 7.0436, "step": 533 }, { "epoch": 0.18225255972696247, "grad_norm": 3.549872398376465, "learning_rate": 0.0009392491467576792, "loss": 6.8872, "step": 534 }, { "epoch": 0.1825938566552901, "grad_norm": 3.6060047149658203, "learning_rate": 0.0009391353811149032, "loss": 6.8889, "step": 535 }, { "epoch": 0.18293515358361775, "grad_norm": 3.5164926052093506, "learning_rate": 0.0009390216154721274, "loss": 7.2238, "step": 536 }, { "epoch": 0.18327645051194538, "grad_norm": 4.204543590545654, "learning_rate": 0.0009389078498293515, "loss": 6.8331, "step": 537 }, { "epoch": 0.18361774744027304, "grad_norm": 3.713944673538208, "learning_rate": 0.0009387940841865756, "loss": 6.3832, "step": 538 }, { "epoch": 0.1839590443686007, "grad_norm": 3.504955530166626, "learning_rate": 0.0009386803185437997, "loss": 6.9558, "step": 539 }, { "epoch": 0.18430034129692832, "grad_norm": 3.7779481410980225, "learning_rate": 0.0009385665529010238, "loss": 6.7189, "step": 540 }, { "epoch": 0.18464163822525598, "grad_norm": 3.7830755710601807, "learning_rate": 0.0009384527872582481, "loss": 7.0508, "step": 541 }, { "epoch": 0.1849829351535836, "grad_norm": 3.6746010780334473, "learning_rate": 0.0009383390216154722, "loss": 6.6615, "step": 542 }, { "epoch": 0.18532423208191126, "grad_norm": 3.6559464931488037, "learning_rate": 0.0009382252559726963, "loss": 7.2021, "step": 543 }, { "epoch": 0.18566552901023892, "grad_norm": 3.4099745750427246, "learning_rate": 0.0009381114903299204, "loss": 6.8867, "step": 544 }, { "epoch": 0.18600682593856654, "grad_norm": 3.7949607372283936, "learning_rate": 0.0009379977246871445, "loss": 6.3174, "step": 545 }, { "epoch": 0.1863481228668942, "grad_norm": 3.6113579273223877, "learning_rate": 0.0009378839590443686, "loss": 7.2956, "step": 546 }, { "epoch": 0.18668941979522186, "grad_norm": 4.320201396942139, "learning_rate": 0.0009377701934015928, "loss": 7.0727, "step": 547 }, { "epoch": 0.18703071672354948, "grad_norm": 3.824106454849243, "learning_rate": 0.0009376564277588169, "loss": 6.5791, "step": 548 }, { "epoch": 0.18737201365187714, "grad_norm": 3.5648560523986816, "learning_rate": 0.000937542662116041, "loss": 7.0372, "step": 549 }, { "epoch": 0.18771331058020477, "grad_norm": 6.90482234954834, "learning_rate": 0.0009374288964732651, "loss": 6.3724, "step": 550 }, { "epoch": 0.18805460750853242, "grad_norm": 3.8881046772003174, "learning_rate": 0.0009373151308304892, "loss": 6.7275, "step": 551 }, { "epoch": 0.18839590443686008, "grad_norm": 3.4772567749023438, "learning_rate": 0.0009372013651877133, "loss": 7.1439, "step": 552 }, { "epoch": 0.1887372013651877, "grad_norm": 3.674238681793213, "learning_rate": 0.0009370875995449375, "loss": 7.3201, "step": 553 }, { "epoch": 0.18907849829351536, "grad_norm": 3.3719608783721924, "learning_rate": 0.0009369738339021616, "loss": 6.646, "step": 554 }, { "epoch": 0.189419795221843, "grad_norm": 3.6703298091888428, "learning_rate": 0.0009368600682593856, "loss": 6.7161, "step": 555 }, { "epoch": 0.18976109215017065, "grad_norm": 3.6245388984680176, "learning_rate": 0.0009367463026166097, "loss": 6.3849, "step": 556 }, { "epoch": 0.1901023890784983, "grad_norm": 3.5778818130493164, "learning_rate": 0.0009366325369738338, "loss": 7.4046, "step": 557 }, { "epoch": 0.19044368600682593, "grad_norm": 4.9418253898620605, "learning_rate": 0.000936518771331058, "loss": 6.5024, "step": 558 }, { "epoch": 0.19078498293515359, "grad_norm": 3.6862545013427734, "learning_rate": 0.0009364050056882822, "loss": 6.7619, "step": 559 }, { "epoch": 0.19112627986348124, "grad_norm": 4.22722864151001, "learning_rate": 0.0009362912400455063, "loss": 6.5308, "step": 560 }, { "epoch": 0.19146757679180887, "grad_norm": 3.733459949493408, "learning_rate": 0.0009361774744027304, "loss": 6.5565, "step": 561 }, { "epoch": 0.19180887372013652, "grad_norm": 9.069499015808105, "learning_rate": 0.0009360637087599545, "loss": 4.9708, "step": 562 }, { "epoch": 0.19215017064846415, "grad_norm": 3.968690872192383, "learning_rate": 0.0009359499431171786, "loss": 7.3263, "step": 563 }, { "epoch": 0.1924914675767918, "grad_norm": 3.5820865631103516, "learning_rate": 0.0009358361774744028, "loss": 6.7364, "step": 564 }, { "epoch": 0.19283276450511946, "grad_norm": 3.6914074420928955, "learning_rate": 0.0009357224118316269, "loss": 6.8361, "step": 565 }, { "epoch": 0.1931740614334471, "grad_norm": 3.580321788787842, "learning_rate": 0.000935608646188851, "loss": 6.4783, "step": 566 }, { "epoch": 0.19351535836177475, "grad_norm": 3.5576207637786865, "learning_rate": 0.0009354948805460751, "loss": 6.5367, "step": 567 }, { "epoch": 0.19385665529010238, "grad_norm": 3.515730619430542, "learning_rate": 0.0009353811149032992, "loss": 6.481, "step": 568 }, { "epoch": 0.19419795221843003, "grad_norm": 3.8840041160583496, "learning_rate": 0.0009352673492605233, "loss": 6.4732, "step": 569 }, { "epoch": 0.1945392491467577, "grad_norm": 3.75571346282959, "learning_rate": 0.0009351535836177475, "loss": 7.1131, "step": 570 }, { "epoch": 0.19488054607508531, "grad_norm": 4.804229259490967, "learning_rate": 0.0009350398179749716, "loss": 6.1646, "step": 571 }, { "epoch": 0.19522184300341297, "grad_norm": 3.6777968406677246, "learning_rate": 0.0009349260523321957, "loss": 6.8409, "step": 572 }, { "epoch": 0.19556313993174063, "grad_norm": 3.6401546001434326, "learning_rate": 0.0009348122866894199, "loss": 7.1515, "step": 573 }, { "epoch": 0.19590443686006825, "grad_norm": 3.532172679901123, "learning_rate": 0.0009346985210466438, "loss": 6.3835, "step": 574 }, { "epoch": 0.1962457337883959, "grad_norm": 3.4800662994384766, "learning_rate": 0.000934584755403868, "loss": 7.2508, "step": 575 }, { "epoch": 0.19658703071672354, "grad_norm": 3.7157084941864014, "learning_rate": 0.0009344709897610922, "loss": 6.508, "step": 576 }, { "epoch": 0.1969283276450512, "grad_norm": 3.7525947093963623, "learning_rate": 0.0009343572241183163, "loss": 6.3877, "step": 577 }, { "epoch": 0.19726962457337885, "grad_norm": 3.6418375968933105, "learning_rate": 0.0009342434584755404, "loss": 6.7508, "step": 578 }, { "epoch": 0.19761092150170648, "grad_norm": 4.113407135009766, "learning_rate": 0.0009341296928327645, "loss": 6.676, "step": 579 }, { "epoch": 0.19795221843003413, "grad_norm": 3.6704797744750977, "learning_rate": 0.0009340159271899886, "loss": 6.9985, "step": 580 }, { "epoch": 0.1982935153583618, "grad_norm": 3.590165138244629, "learning_rate": 0.0009339021615472128, "loss": 6.6784, "step": 581 }, { "epoch": 0.19863481228668942, "grad_norm": 3.575233221054077, "learning_rate": 0.0009337883959044369, "loss": 6.4992, "step": 582 }, { "epoch": 0.19897610921501707, "grad_norm": 3.404895782470703, "learning_rate": 0.000933674630261661, "loss": 6.9338, "step": 583 }, { "epoch": 0.1993174061433447, "grad_norm": 3.5058460235595703, "learning_rate": 0.0009335608646188851, "loss": 7.2116, "step": 584 }, { "epoch": 0.19965870307167236, "grad_norm": 3.462622880935669, "learning_rate": 0.0009334470989761092, "loss": 7.0462, "step": 585 }, { "epoch": 0.2, "grad_norm": 4.042704105377197, "learning_rate": 0.0009333333333333333, "loss": 5.7069, "step": 586 }, { "epoch": 0.20034129692832764, "grad_norm": 3.7907888889312744, "learning_rate": 0.0009332195676905575, "loss": 6.7271, "step": 587 }, { "epoch": 0.2006825938566553, "grad_norm": 3.531925916671753, "learning_rate": 0.0009331058020477816, "loss": 7.0678, "step": 588 }, { "epoch": 0.20102389078498292, "grad_norm": 3.567275285720825, "learning_rate": 0.0009329920364050057, "loss": 6.9811, "step": 589 }, { "epoch": 0.20136518771331058, "grad_norm": 3.8175978660583496, "learning_rate": 0.0009328782707622299, "loss": 6.6779, "step": 590 }, { "epoch": 0.20170648464163823, "grad_norm": 3.672842502593994, "learning_rate": 0.000932764505119454, "loss": 6.6118, "step": 591 }, { "epoch": 0.20204778156996586, "grad_norm": 3.625286102294922, "learning_rate": 0.0009326507394766781, "loss": 7.1495, "step": 592 }, { "epoch": 0.20238907849829352, "grad_norm": 3.4718549251556396, "learning_rate": 0.0009325369738339023, "loss": 7.5187, "step": 593 }, { "epoch": 0.20273037542662117, "grad_norm": 3.5792765617370605, "learning_rate": 0.0009324232081911263, "loss": 6.7316, "step": 594 }, { "epoch": 0.2030716723549488, "grad_norm": 19.332000732421875, "learning_rate": 0.0009323094425483504, "loss": 6.0635, "step": 595 }, { "epoch": 0.20341296928327646, "grad_norm": 3.741669178009033, "learning_rate": 0.0009321956769055745, "loss": 7.1053, "step": 596 }, { "epoch": 0.20375426621160408, "grad_norm": 4.053689002990723, "learning_rate": 0.0009320819112627986, "loss": 6.9079, "step": 597 }, { "epoch": 0.20409556313993174, "grad_norm": 3.5358211994171143, "learning_rate": 0.0009319681456200227, "loss": 6.9465, "step": 598 }, { "epoch": 0.2044368600682594, "grad_norm": 3.4627411365509033, "learning_rate": 0.0009318543799772469, "loss": 7.1842, "step": 599 }, { "epoch": 0.20477815699658702, "grad_norm": 3.443898916244507, "learning_rate": 0.000931740614334471, "loss": 6.5099, "step": 600 }, { "epoch": 0.20511945392491468, "grad_norm": 3.4302115440368652, "learning_rate": 0.0009316268486916951, "loss": 6.896, "step": 601 }, { "epoch": 0.2054607508532423, "grad_norm": 3.346616268157959, "learning_rate": 0.0009315130830489192, "loss": 6.8985, "step": 602 }, { "epoch": 0.20580204778156996, "grad_norm": 3.753207206726074, "learning_rate": 0.0009313993174061433, "loss": 6.4971, "step": 603 }, { "epoch": 0.20614334470989762, "grad_norm": 3.675739049911499, "learning_rate": 0.0009312855517633675, "loss": 6.6612, "step": 604 }, { "epoch": 0.20648464163822525, "grad_norm": 3.6538069248199463, "learning_rate": 0.0009311717861205916, "loss": 7.0008, "step": 605 }, { "epoch": 0.2068259385665529, "grad_norm": 3.7137982845306396, "learning_rate": 0.0009310580204778157, "loss": 7.0696, "step": 606 }, { "epoch": 0.20716723549488056, "grad_norm": 3.7337138652801514, "learning_rate": 0.0009309442548350399, "loss": 6.686, "step": 607 }, { "epoch": 0.2075085324232082, "grad_norm": 7.5925374031066895, "learning_rate": 0.000930830489192264, "loss": 6.9714, "step": 608 }, { "epoch": 0.20784982935153584, "grad_norm": 3.7731120586395264, "learning_rate": 0.0009307167235494881, "loss": 6.924, "step": 609 }, { "epoch": 0.20819112627986347, "grad_norm": 6.881464004516602, "learning_rate": 0.0009306029579067123, "loss": 6.9183, "step": 610 }, { "epoch": 0.20853242320819113, "grad_norm": 3.973215103149414, "learning_rate": 0.0009304891922639364, "loss": 6.7891, "step": 611 }, { "epoch": 0.20887372013651878, "grad_norm": 3.4739882946014404, "learning_rate": 0.0009303754266211605, "loss": 7.1653, "step": 612 }, { "epoch": 0.2092150170648464, "grad_norm": 3.3610455989837646, "learning_rate": 0.0009302616609783845, "loss": 6.6231, "step": 613 }, { "epoch": 0.20955631399317406, "grad_norm": 3.4226975440979004, "learning_rate": 0.0009301478953356086, "loss": 7.0169, "step": 614 }, { "epoch": 0.2098976109215017, "grad_norm": 4.903579235076904, "learning_rate": 0.0009300341296928327, "loss": 6.194, "step": 615 }, { "epoch": 0.21023890784982935, "grad_norm": 3.5574898719787598, "learning_rate": 0.0009299203640500569, "loss": 7.0527, "step": 616 }, { "epoch": 0.210580204778157, "grad_norm": 3.566174030303955, "learning_rate": 0.000929806598407281, "loss": 6.8922, "step": 617 }, { "epoch": 0.21092150170648463, "grad_norm": 4.031206130981445, "learning_rate": 0.0009296928327645051, "loss": 5.8383, "step": 618 }, { "epoch": 0.2112627986348123, "grad_norm": 3.552882671356201, "learning_rate": 0.0009295790671217292, "loss": 6.6364, "step": 619 }, { "epoch": 0.21160409556313994, "grad_norm": 6.967950820922852, "learning_rate": 0.0009294653014789533, "loss": 6.5468, "step": 620 }, { "epoch": 0.21194539249146757, "grad_norm": 3.7092936038970947, "learning_rate": 0.0009293515358361775, "loss": 7.2022, "step": 621 }, { "epoch": 0.21228668941979523, "grad_norm": 3.346576690673828, "learning_rate": 0.0009292377701934016, "loss": 6.4981, "step": 622 }, { "epoch": 0.21262798634812285, "grad_norm": 3.6055033206939697, "learning_rate": 0.0009291240045506257, "loss": 6.9378, "step": 623 }, { "epoch": 0.2129692832764505, "grad_norm": 3.5705137252807617, "learning_rate": 0.0009290102389078499, "loss": 7.0167, "step": 624 }, { "epoch": 0.21331058020477817, "grad_norm": 4.07545280456543, "learning_rate": 0.000928896473265074, "loss": 6.4161, "step": 625 }, { "epoch": 0.2136518771331058, "grad_norm": 3.458583116531372, "learning_rate": 0.0009287827076222981, "loss": 6.7419, "step": 626 }, { "epoch": 0.21399317406143345, "grad_norm": 3.4590044021606445, "learning_rate": 0.0009286689419795223, "loss": 6.5505, "step": 627 }, { "epoch": 0.2143344709897611, "grad_norm": 3.5659596920013428, "learning_rate": 0.0009285551763367464, "loss": 6.6099, "step": 628 }, { "epoch": 0.21467576791808873, "grad_norm": 3.5950722694396973, "learning_rate": 0.0009284414106939705, "loss": 6.6846, "step": 629 }, { "epoch": 0.2150170648464164, "grad_norm": 3.622309923171997, "learning_rate": 0.0009283276450511946, "loss": 6.7184, "step": 630 }, { "epoch": 0.21535836177474402, "grad_norm": 3.5090525150299072, "learning_rate": 0.0009282138794084187, "loss": 7.0747, "step": 631 }, { "epoch": 0.21569965870307167, "grad_norm": 3.4692866802215576, "learning_rate": 0.0009281001137656428, "loss": 7.4671, "step": 632 }, { "epoch": 0.21604095563139933, "grad_norm": 3.454890489578247, "learning_rate": 0.0009279863481228669, "loss": 7.0538, "step": 633 }, { "epoch": 0.21638225255972696, "grad_norm": 5.090976715087891, "learning_rate": 0.000927872582480091, "loss": 6.5268, "step": 634 }, { "epoch": 0.2167235494880546, "grad_norm": 3.852503776550293, "learning_rate": 0.0009277588168373151, "loss": 6.9396, "step": 635 }, { "epoch": 0.21706484641638224, "grad_norm": 3.7737314701080322, "learning_rate": 0.0009276450511945392, "loss": 6.7992, "step": 636 }, { "epoch": 0.2174061433447099, "grad_norm": 3.6202504634857178, "learning_rate": 0.0009275312855517633, "loss": 7.4372, "step": 637 }, { "epoch": 0.21774744027303755, "grad_norm": 3.5939218997955322, "learning_rate": 0.0009274175199089874, "loss": 6.6953, "step": 638 }, { "epoch": 0.21808873720136518, "grad_norm": 3.323547601699829, "learning_rate": 0.0009273037542662116, "loss": 6.7816, "step": 639 }, { "epoch": 0.21843003412969283, "grad_norm": 3.6134841442108154, "learning_rate": 0.0009271899886234357, "loss": 6.6858, "step": 640 }, { "epoch": 0.2187713310580205, "grad_norm": 3.660320520401001, "learning_rate": 0.0009270762229806599, "loss": 7.0058, "step": 641 }, { "epoch": 0.21911262798634812, "grad_norm": 3.380748748779297, "learning_rate": 0.000926962457337884, "loss": 6.735, "step": 642 }, { "epoch": 0.21945392491467577, "grad_norm": 6.264308452606201, "learning_rate": 0.0009268486916951081, "loss": 5.8211, "step": 643 }, { "epoch": 0.2197952218430034, "grad_norm": 3.7562389373779297, "learning_rate": 0.0009267349260523323, "loss": 6.7083, "step": 644 }, { "epoch": 0.22013651877133106, "grad_norm": 3.7538766860961914, "learning_rate": 0.0009266211604095564, "loss": 6.8536, "step": 645 }, { "epoch": 0.2204778156996587, "grad_norm": 3.648890972137451, "learning_rate": 0.0009265073947667805, "loss": 6.6039, "step": 646 }, { "epoch": 0.22081911262798634, "grad_norm": 3.616142988204956, "learning_rate": 0.0009263936291240046, "loss": 6.0385, "step": 647 }, { "epoch": 0.221160409556314, "grad_norm": 3.6815671920776367, "learning_rate": 0.0009262798634812287, "loss": 7.124, "step": 648 }, { "epoch": 0.22150170648464163, "grad_norm": 3.596876382827759, "learning_rate": 0.0009261660978384528, "loss": 6.5317, "step": 649 }, { "epoch": 0.22184300341296928, "grad_norm": 3.4325168132781982, "learning_rate": 0.000926052332195677, "loss": 6.7055, "step": 650 }, { "epoch": 0.22218430034129694, "grad_norm": 3.43967604637146, "learning_rate": 0.0009259385665529011, "loss": 6.9872, "step": 651 }, { "epoch": 0.22252559726962456, "grad_norm": 3.4720215797424316, "learning_rate": 0.0009258248009101251, "loss": 6.807, "step": 652 }, { "epoch": 0.22286689419795222, "grad_norm": 4.370832920074463, "learning_rate": 0.0009257110352673492, "loss": 5.8649, "step": 653 }, { "epoch": 0.22320819112627988, "grad_norm": 3.566333293914795, "learning_rate": 0.0009255972696245733, "loss": 6.6208, "step": 654 }, { "epoch": 0.2235494880546075, "grad_norm": 3.5970399379730225, "learning_rate": 0.0009254835039817974, "loss": 7.0027, "step": 655 }, { "epoch": 0.22389078498293516, "grad_norm": 3.698763370513916, "learning_rate": 0.0009253697383390216, "loss": 7.0716, "step": 656 }, { "epoch": 0.2242320819112628, "grad_norm": 3.4119129180908203, "learning_rate": 0.0009252559726962458, "loss": 6.8046, "step": 657 }, { "epoch": 0.22457337883959044, "grad_norm": 3.6655516624450684, "learning_rate": 0.0009251422070534699, "loss": 6.9696, "step": 658 }, { "epoch": 0.2249146757679181, "grad_norm": 5.750580787658691, "learning_rate": 0.000925028441410694, "loss": 5.6451, "step": 659 }, { "epoch": 0.22525597269624573, "grad_norm": 3.9216561317443848, "learning_rate": 0.0009249146757679181, "loss": 6.4134, "step": 660 }, { "epoch": 0.22559726962457338, "grad_norm": 4.239558696746826, "learning_rate": 0.0009248009101251423, "loss": 5.6066, "step": 661 }, { "epoch": 0.225938566552901, "grad_norm": 3.7342917919158936, "learning_rate": 0.0009246871444823664, "loss": 7.0818, "step": 662 }, { "epoch": 0.22627986348122867, "grad_norm": 5.248586177825928, "learning_rate": 0.0009245733788395905, "loss": 5.4571, "step": 663 }, { "epoch": 0.22662116040955632, "grad_norm": 3.6075022220611572, "learning_rate": 0.0009244596131968146, "loss": 6.9643, "step": 664 }, { "epoch": 0.22696245733788395, "grad_norm": 3.528815269470215, "learning_rate": 0.0009243458475540387, "loss": 7.1822, "step": 665 }, { "epoch": 0.2273037542662116, "grad_norm": 3.4905171394348145, "learning_rate": 0.0009242320819112628, "loss": 7.0671, "step": 666 }, { "epoch": 0.22764505119453926, "grad_norm": 3.64202880859375, "learning_rate": 0.000924118316268487, "loss": 6.6831, "step": 667 }, { "epoch": 0.2279863481228669, "grad_norm": 6.533376216888428, "learning_rate": 0.0009240045506257111, "loss": 6.1337, "step": 668 }, { "epoch": 0.22832764505119454, "grad_norm": 3.9284870624542236, "learning_rate": 0.0009238907849829352, "loss": 6.5213, "step": 669 }, { "epoch": 0.22866894197952217, "grad_norm": 3.7156975269317627, "learning_rate": 0.0009237770193401593, "loss": 6.5675, "step": 670 }, { "epoch": 0.22901023890784983, "grad_norm": 4.064757347106934, "learning_rate": 0.0009236632536973833, "loss": 7.1138, "step": 671 }, { "epoch": 0.22935153583617748, "grad_norm": 5.838778018951416, "learning_rate": 0.0009235494880546074, "loss": 6.6873, "step": 672 }, { "epoch": 0.2296928327645051, "grad_norm": 3.700157642364502, "learning_rate": 0.0009234357224118316, "loss": 6.5795, "step": 673 }, { "epoch": 0.23003412969283277, "grad_norm": 3.6335175037384033, "learning_rate": 0.0009233219567690558, "loss": 6.5419, "step": 674 }, { "epoch": 0.23037542662116042, "grad_norm": 3.073715925216675, "learning_rate": 0.0009232081911262799, "loss": 6.6049, "step": 675 }, { "epoch": 0.23071672354948805, "grad_norm": 3.373309850692749, "learning_rate": 0.000923094425483504, "loss": 6.7097, "step": 676 }, { "epoch": 0.2310580204778157, "grad_norm": 3.3596081733703613, "learning_rate": 0.0009229806598407281, "loss": 6.8117, "step": 677 }, { "epoch": 0.23139931740614333, "grad_norm": 3.4036943912506104, "learning_rate": 0.0009228668941979522, "loss": 7.1459, "step": 678 }, { "epoch": 0.231740614334471, "grad_norm": 5.635969638824463, "learning_rate": 0.0009227531285551764, "loss": 5.9685, "step": 679 }, { "epoch": 0.23208191126279865, "grad_norm": 4.183934688568115, "learning_rate": 0.0009226393629124005, "loss": 6.5457, "step": 680 }, { "epoch": 0.23242320819112627, "grad_norm": 3.8760788440704346, "learning_rate": 0.0009225255972696246, "loss": 7.0518, "step": 681 }, { "epoch": 0.23276450511945393, "grad_norm": 3.5754668712615967, "learning_rate": 0.0009224118316268487, "loss": 6.5852, "step": 682 }, { "epoch": 0.23310580204778156, "grad_norm": 3.5699808597564697, "learning_rate": 0.0009222980659840728, "loss": 7.1735, "step": 683 }, { "epoch": 0.2334470989761092, "grad_norm": 3.3643417358398438, "learning_rate": 0.000922184300341297, "loss": 6.8809, "step": 684 }, { "epoch": 0.23378839590443687, "grad_norm": 4.393078804016113, "learning_rate": 0.0009220705346985211, "loss": 6.2643, "step": 685 }, { "epoch": 0.2341296928327645, "grad_norm": 3.7724368572235107, "learning_rate": 0.0009219567690557452, "loss": 6.6099, "step": 686 }, { "epoch": 0.23447098976109215, "grad_norm": 3.7385144233703613, "learning_rate": 0.0009218430034129693, "loss": 6.8247, "step": 687 }, { "epoch": 0.2348122866894198, "grad_norm": 3.5062074661254883, "learning_rate": 0.0009217292377701934, "loss": 7.1594, "step": 688 }, { "epoch": 0.23515358361774744, "grad_norm": 4.198519229888916, "learning_rate": 0.0009216154721274175, "loss": 6.0545, "step": 689 }, { "epoch": 0.2354948805460751, "grad_norm": 3.814720392227173, "learning_rate": 0.0009215017064846418, "loss": 7.1292, "step": 690 }, { "epoch": 0.23583617747440272, "grad_norm": 3.642256498336792, "learning_rate": 0.0009213879408418658, "loss": 7.0849, "step": 691 }, { "epoch": 0.23617747440273038, "grad_norm": 4.872190475463867, "learning_rate": 0.0009212741751990899, "loss": 6.8024, "step": 692 }, { "epoch": 0.23651877133105803, "grad_norm": 3.5935611724853516, "learning_rate": 0.000921160409556314, "loss": 6.6592, "step": 693 }, { "epoch": 0.23686006825938566, "grad_norm": 4.270242214202881, "learning_rate": 0.0009210466439135381, "loss": 6.636, "step": 694 }, { "epoch": 0.23720136518771331, "grad_norm": 3.73964524269104, "learning_rate": 0.0009209328782707622, "loss": 7.3186, "step": 695 }, { "epoch": 0.23754266211604094, "grad_norm": 3.55539870262146, "learning_rate": 0.0009208191126279864, "loss": 7.008, "step": 696 }, { "epoch": 0.2378839590443686, "grad_norm": 3.5897581577301025, "learning_rate": 0.0009207053469852105, "loss": 6.8969, "step": 697 }, { "epoch": 0.23822525597269625, "grad_norm": 3.5224783420562744, "learning_rate": 0.0009205915813424346, "loss": 6.6032, "step": 698 }, { "epoch": 0.23856655290102388, "grad_norm": 3.5760772228240967, "learning_rate": 0.0009204778156996587, "loss": 6.8774, "step": 699 }, { "epoch": 0.23890784982935154, "grad_norm": 3.4625370502471924, "learning_rate": 0.0009203640500568828, "loss": 6.934, "step": 700 }, { "epoch": 0.2392491467576792, "grad_norm": 3.7003684043884277, "learning_rate": 0.0009202502844141069, "loss": 6.7796, "step": 701 }, { "epoch": 0.23959044368600682, "grad_norm": 3.4892773628234863, "learning_rate": 0.0009201365187713311, "loss": 6.6771, "step": 702 }, { "epoch": 0.23993174061433448, "grad_norm": 3.626009941101074, "learning_rate": 0.0009200227531285552, "loss": 6.4784, "step": 703 }, { "epoch": 0.2402730375426621, "grad_norm": 4.080516815185547, "learning_rate": 0.0009199089874857793, "loss": 6.844, "step": 704 }, { "epoch": 0.24061433447098976, "grad_norm": 3.7570433616638184, "learning_rate": 0.0009197952218430034, "loss": 6.3753, "step": 705 }, { "epoch": 0.24095563139931742, "grad_norm": 3.6478946208953857, "learning_rate": 0.0009196814562002275, "loss": 6.4808, "step": 706 }, { "epoch": 0.24129692832764504, "grad_norm": 3.6579360961914062, "learning_rate": 0.0009195676905574518, "loss": 6.345, "step": 707 }, { "epoch": 0.2416382252559727, "grad_norm": 3.3532590866088867, "learning_rate": 0.0009194539249146759, "loss": 6.8791, "step": 708 }, { "epoch": 0.24197952218430033, "grad_norm": 3.424499273300171, "learning_rate": 0.0009193401592719, "loss": 6.5488, "step": 709 }, { "epoch": 0.24232081911262798, "grad_norm": 3.450228452682495, "learning_rate": 0.000919226393629124, "loss": 6.8405, "step": 710 }, { "epoch": 0.24266211604095564, "grad_norm": 3.4876630306243896, "learning_rate": 0.0009191126279863481, "loss": 6.9152, "step": 711 }, { "epoch": 0.24300341296928327, "grad_norm": 3.635850429534912, "learning_rate": 0.0009189988623435722, "loss": 6.6264, "step": 712 }, { "epoch": 0.24334470989761092, "grad_norm": 3.780963897705078, "learning_rate": 0.0009188850967007964, "loss": 6.1287, "step": 713 }, { "epoch": 0.24368600682593858, "grad_norm": 3.4603798389434814, "learning_rate": 0.0009187713310580205, "loss": 6.6569, "step": 714 }, { "epoch": 0.2440273037542662, "grad_norm": 3.917168378829956, "learning_rate": 0.0009186575654152446, "loss": 6.2551, "step": 715 }, { "epoch": 0.24436860068259386, "grad_norm": 3.537386417388916, "learning_rate": 0.0009185437997724687, "loss": 7.0423, "step": 716 }, { "epoch": 0.2447098976109215, "grad_norm": 3.577162027359009, "learning_rate": 0.0009184300341296928, "loss": 6.9338, "step": 717 }, { "epoch": 0.24505119453924915, "grad_norm": 3.8202831745147705, "learning_rate": 0.0009183162684869169, "loss": 6.0808, "step": 718 }, { "epoch": 0.2453924914675768, "grad_norm": 3.6418538093566895, "learning_rate": 0.0009182025028441411, "loss": 7.1966, "step": 719 }, { "epoch": 0.24573378839590443, "grad_norm": 3.42926025390625, "learning_rate": 0.0009180887372013652, "loss": 7.0308, "step": 720 }, { "epoch": 0.24607508532423208, "grad_norm": 3.68467378616333, "learning_rate": 0.0009179749715585893, "loss": 6.9812, "step": 721 }, { "epoch": 0.24641638225255974, "grad_norm": 3.54465651512146, "learning_rate": 0.0009178612059158134, "loss": 6.8175, "step": 722 }, { "epoch": 0.24675767918088737, "grad_norm": 3.897510051727295, "learning_rate": 0.0009177474402730375, "loss": 6.6669, "step": 723 }, { "epoch": 0.24709897610921502, "grad_norm": 3.5380194187164307, "learning_rate": 0.0009176336746302618, "loss": 6.7135, "step": 724 }, { "epoch": 0.24744027303754265, "grad_norm": 4.017455577850342, "learning_rate": 0.0009175199089874859, "loss": 6.2163, "step": 725 }, { "epoch": 0.2477815699658703, "grad_norm": 3.646085739135742, "learning_rate": 0.00091740614334471, "loss": 6.6526, "step": 726 }, { "epoch": 0.24812286689419796, "grad_norm": 3.8709123134613037, "learning_rate": 0.0009172923777019341, "loss": 6.7876, "step": 727 }, { "epoch": 0.2484641638225256, "grad_norm": 3.960822582244873, "learning_rate": 0.0009171786120591582, "loss": 6.7348, "step": 728 }, { "epoch": 0.24880546075085325, "grad_norm": 3.8198039531707764, "learning_rate": 0.0009170648464163823, "loss": 6.4908, "step": 729 }, { "epoch": 0.24914675767918087, "grad_norm": 4.048033237457275, "learning_rate": 0.0009169510807736064, "loss": 7.083, "step": 730 }, { "epoch": 0.24948805460750853, "grad_norm": 3.5977671146392822, "learning_rate": 0.0009168373151308305, "loss": 7.1354, "step": 731 }, { "epoch": 0.24982935153583619, "grad_norm": 3.9307358264923096, "learning_rate": 0.0009167235494880546, "loss": 6.8698, "step": 732 }, { "epoch": 0.2501706484641638, "grad_norm": 3.6373484134674072, "learning_rate": 0.0009166097838452787, "loss": 6.9548, "step": 733 }, { "epoch": 0.25051194539249144, "grad_norm": 3.8144493103027344, "learning_rate": 0.0009164960182025028, "loss": 6.875, "step": 734 }, { "epoch": 0.2508532423208191, "grad_norm": 3.9613707065582275, "learning_rate": 0.0009163822525597269, "loss": 6.3705, "step": 735 }, { "epoch": 0.25119453924914675, "grad_norm": 4.828775882720947, "learning_rate": 0.0009162684869169511, "loss": 6.0426, "step": 736 }, { "epoch": 0.2515358361774744, "grad_norm": 3.8135440349578857, "learning_rate": 0.0009161547212741752, "loss": 6.7698, "step": 737 }, { "epoch": 0.25187713310580206, "grad_norm": 3.477957248687744, "learning_rate": 0.0009160409556313993, "loss": 6.7878, "step": 738 }, { "epoch": 0.2522184300341297, "grad_norm": 3.6808841228485107, "learning_rate": 0.0009159271899886234, "loss": 6.9381, "step": 739 }, { "epoch": 0.2525597269624573, "grad_norm": 3.500927448272705, "learning_rate": 0.0009158134243458475, "loss": 6.7481, "step": 740 }, { "epoch": 0.252901023890785, "grad_norm": 3.291139602661133, "learning_rate": 0.0009156996587030717, "loss": 6.6917, "step": 741 }, { "epoch": 0.25324232081911263, "grad_norm": 3.652759552001953, "learning_rate": 0.0009155858930602959, "loss": 7.108, "step": 742 }, { "epoch": 0.25358361774744026, "grad_norm": 3.6527302265167236, "learning_rate": 0.00091547212741752, "loss": 6.6711, "step": 743 }, { "epoch": 0.25392491467576794, "grad_norm": 3.4871606826782227, "learning_rate": 0.0009153583617747441, "loss": 7.0744, "step": 744 }, { "epoch": 0.25426621160409557, "grad_norm": 5.465394973754883, "learning_rate": 0.0009152445961319682, "loss": 6.4962, "step": 745 }, { "epoch": 0.2546075085324232, "grad_norm": 3.8844666481018066, "learning_rate": 0.0009151308304891923, "loss": 6.8589, "step": 746 }, { "epoch": 0.2549488054607508, "grad_norm": 3.7383275032043457, "learning_rate": 0.0009150170648464165, "loss": 7.1996, "step": 747 }, { "epoch": 0.2552901023890785, "grad_norm": 3.918121814727783, "learning_rate": 0.0009149032992036406, "loss": 6.5667, "step": 748 }, { "epoch": 0.25563139931740614, "grad_norm": 3.829987049102783, "learning_rate": 0.0009147895335608646, "loss": 6.5303, "step": 749 }, { "epoch": 0.25597269624573377, "grad_norm": 3.3914873600006104, "learning_rate": 0.0009146757679180887, "loss": 7.1671, "step": 750 }, { "epoch": 0.25631399317406145, "grad_norm": 3.518707275390625, "learning_rate": 0.0009145620022753128, "loss": 6.7905, "step": 751 }, { "epoch": 0.2566552901023891, "grad_norm": 3.641456127166748, "learning_rate": 0.0009144482366325369, "loss": 6.7043, "step": 752 }, { "epoch": 0.2569965870307167, "grad_norm": 3.5336403846740723, "learning_rate": 0.0009143344709897611, "loss": 6.4704, "step": 753 }, { "epoch": 0.2573378839590444, "grad_norm": 3.45283579826355, "learning_rate": 0.0009142207053469852, "loss": 6.7629, "step": 754 }, { "epoch": 0.257679180887372, "grad_norm": 3.4973959922790527, "learning_rate": 0.0009141069397042093, "loss": 6.82, "step": 755 }, { "epoch": 0.25802047781569964, "grad_norm": 3.7503435611724854, "learning_rate": 0.0009139931740614334, "loss": 6.2377, "step": 756 }, { "epoch": 0.25836177474402733, "grad_norm": 3.7249867916107178, "learning_rate": 0.0009138794084186575, "loss": 7.3, "step": 757 }, { "epoch": 0.25870307167235496, "grad_norm": 3.5649001598358154, "learning_rate": 0.0009137656427758817, "loss": 6.6075, "step": 758 }, { "epoch": 0.2590443686006826, "grad_norm": 4.057753562927246, "learning_rate": 0.0009136518771331059, "loss": 6.6559, "step": 759 }, { "epoch": 0.2593856655290102, "grad_norm": 3.949220657348633, "learning_rate": 0.00091353811149033, "loss": 6.8383, "step": 760 }, { "epoch": 0.2597269624573379, "grad_norm": 3.460750102996826, "learning_rate": 0.0009134243458475541, "loss": 6.892, "step": 761 }, { "epoch": 0.2600682593856655, "grad_norm": 7.605457305908203, "learning_rate": 0.0009133105802047782, "loss": 6.766, "step": 762 }, { "epoch": 0.26040955631399315, "grad_norm": 3.6430115699768066, "learning_rate": 0.0009131968145620023, "loss": 6.9554, "step": 763 }, { "epoch": 0.26075085324232083, "grad_norm": 3.728748083114624, "learning_rate": 0.0009130830489192265, "loss": 7.2722, "step": 764 }, { "epoch": 0.26109215017064846, "grad_norm": 3.6627445220947266, "learning_rate": 0.0009129692832764506, "loss": 6.8527, "step": 765 }, { "epoch": 0.2614334470989761, "grad_norm": 3.310154676437378, "learning_rate": 0.0009128555176336747, "loss": 7.1432, "step": 766 }, { "epoch": 0.2617747440273038, "grad_norm": 3.2910642623901367, "learning_rate": 0.0009127417519908988, "loss": 7.0282, "step": 767 }, { "epoch": 0.2621160409556314, "grad_norm": 3.301368236541748, "learning_rate": 0.0009126279863481229, "loss": 6.8306, "step": 768 }, { "epoch": 0.26245733788395903, "grad_norm": 6.695905685424805, "learning_rate": 0.0009125142207053469, "loss": 5.8919, "step": 769 }, { "epoch": 0.2627986348122867, "grad_norm": 3.5930447578430176, "learning_rate": 0.0009124004550625711, "loss": 6.3776, "step": 770 }, { "epoch": 0.26313993174061434, "grad_norm": 3.598477363586426, "learning_rate": 0.0009122866894197952, "loss": 6.9116, "step": 771 }, { "epoch": 0.26348122866894197, "grad_norm": 3.46647572517395, "learning_rate": 0.0009121729237770193, "loss": 6.6275, "step": 772 }, { "epoch": 0.2638225255972696, "grad_norm": 3.6713485717773438, "learning_rate": 0.0009120591581342434, "loss": 6.3535, "step": 773 }, { "epoch": 0.2641638225255973, "grad_norm": 4.576082229614258, "learning_rate": 0.0009119453924914675, "loss": 6.585, "step": 774 }, { "epoch": 0.2645051194539249, "grad_norm": 3.976733922958374, "learning_rate": 0.0009118316268486917, "loss": 6.479, "step": 775 }, { "epoch": 0.26484641638225254, "grad_norm": 3.6250696182250977, "learning_rate": 0.0009117178612059159, "loss": 6.4861, "step": 776 }, { "epoch": 0.2651877133105802, "grad_norm": 3.517406702041626, "learning_rate": 0.00091160409556314, "loss": 7.2256, "step": 777 }, { "epoch": 0.26552901023890785, "grad_norm": 3.710671901702881, "learning_rate": 0.0009114903299203641, "loss": 6.5771, "step": 778 }, { "epoch": 0.2658703071672355, "grad_norm": 3.4462730884552, "learning_rate": 0.0009113765642775882, "loss": 6.6247, "step": 779 }, { "epoch": 0.26621160409556316, "grad_norm": 3.5399367809295654, "learning_rate": 0.0009112627986348123, "loss": 6.4941, "step": 780 }, { "epoch": 0.2665529010238908, "grad_norm": 3.5168278217315674, "learning_rate": 0.0009111490329920364, "loss": 6.8313, "step": 781 }, { "epoch": 0.2668941979522184, "grad_norm": 3.445139169692993, "learning_rate": 0.0009110352673492606, "loss": 6.7253, "step": 782 }, { "epoch": 0.2672354948805461, "grad_norm": 3.9971537590026855, "learning_rate": 0.0009109215017064847, "loss": 6.8024, "step": 783 }, { "epoch": 0.2675767918088737, "grad_norm": 3.8834123611450195, "learning_rate": 0.0009108077360637088, "loss": 6.729, "step": 784 }, { "epoch": 0.26791808873720135, "grad_norm": 4.05747652053833, "learning_rate": 0.0009106939704209329, "loss": 6.3846, "step": 785 }, { "epoch": 0.26825938566552904, "grad_norm": 3.5583267211914062, "learning_rate": 0.000910580204778157, "loss": 7.0615, "step": 786 }, { "epoch": 0.26860068259385667, "grad_norm": 3.9900383949279785, "learning_rate": 0.0009104664391353812, "loss": 6.3194, "step": 787 }, { "epoch": 0.2689419795221843, "grad_norm": 3.564735174179077, "learning_rate": 0.0009103526734926052, "loss": 6.736, "step": 788 }, { "epoch": 0.2692832764505119, "grad_norm": 3.4203550815582275, "learning_rate": 0.0009102389078498293, "loss": 6.8528, "step": 789 }, { "epoch": 0.2696245733788396, "grad_norm": 3.507297992706299, "learning_rate": 0.0009101251422070534, "loss": 6.7446, "step": 790 }, { "epoch": 0.26996587030716723, "grad_norm": 3.489607095718384, "learning_rate": 0.0009100113765642775, "loss": 7.1027, "step": 791 }, { "epoch": 0.27030716723549486, "grad_norm": 3.3406436443328857, "learning_rate": 0.0009098976109215017, "loss": 6.9971, "step": 792 }, { "epoch": 0.27064846416382254, "grad_norm": 3.441478967666626, "learning_rate": 0.0009097838452787259, "loss": 6.8602, "step": 793 }, { "epoch": 0.27098976109215017, "grad_norm": 3.7188546657562256, "learning_rate": 0.00090967007963595, "loss": 6.4872, "step": 794 }, { "epoch": 0.2713310580204778, "grad_norm": 3.50281023979187, "learning_rate": 0.0009095563139931741, "loss": 6.6612, "step": 795 }, { "epoch": 0.2716723549488055, "grad_norm": 3.8761279582977295, "learning_rate": 0.0009094425483503982, "loss": 6.5065, "step": 796 }, { "epoch": 0.2720136518771331, "grad_norm": 5.031599521636963, "learning_rate": 0.0009093287827076223, "loss": 5.0877, "step": 797 }, { "epoch": 0.27235494880546074, "grad_norm": 3.8323252201080322, "learning_rate": 0.0009092150170648464, "loss": 6.7722, "step": 798 }, { "epoch": 0.2726962457337884, "grad_norm": 5.560990333557129, "learning_rate": 0.0009091012514220706, "loss": 5.4916, "step": 799 }, { "epoch": 0.27303754266211605, "grad_norm": 3.516458511352539, "learning_rate": 0.0009089874857792947, "loss": 6.7099, "step": 800 }, { "epoch": 0.2733788395904437, "grad_norm": 3.5895726680755615, "learning_rate": 0.0009088737201365188, "loss": 6.3197, "step": 801 }, { "epoch": 0.2737201365187713, "grad_norm": 3.489635467529297, "learning_rate": 0.0009087599544937429, "loss": 7.0153, "step": 802 }, { "epoch": 0.274061433447099, "grad_norm": 3.5357353687286377, "learning_rate": 0.000908646188850967, "loss": 6.392, "step": 803 }, { "epoch": 0.2744027303754266, "grad_norm": 3.435361623764038, "learning_rate": 0.0009085324232081912, "loss": 6.7289, "step": 804 }, { "epoch": 0.27474402730375425, "grad_norm": 3.4798481464385986, "learning_rate": 0.0009084186575654153, "loss": 7.1502, "step": 805 }, { "epoch": 0.27508532423208193, "grad_norm": 4.012742042541504, "learning_rate": 0.0009083048919226394, "loss": 5.9575, "step": 806 }, { "epoch": 0.27542662116040956, "grad_norm": 3.503770112991333, "learning_rate": 0.0009081911262798636, "loss": 6.7986, "step": 807 }, { "epoch": 0.2757679180887372, "grad_norm": 3.8158676624298096, "learning_rate": 0.0009080773606370875, "loss": 6.6379, "step": 808 }, { "epoch": 0.27610921501706487, "grad_norm": 3.9384515285491943, "learning_rate": 0.0009079635949943117, "loss": 6.6877, "step": 809 }, { "epoch": 0.2764505119453925, "grad_norm": 4.289785861968994, "learning_rate": 0.0009078498293515359, "loss": 6.4728, "step": 810 }, { "epoch": 0.2767918088737201, "grad_norm": 3.6413400173187256, "learning_rate": 0.00090773606370876, "loss": 6.9374, "step": 811 }, { "epoch": 0.2771331058020478, "grad_norm": 3.771023988723755, "learning_rate": 0.0009076222980659841, "loss": 7.0035, "step": 812 }, { "epoch": 0.27747440273037544, "grad_norm": 3.7076187133789062, "learning_rate": 0.0009075085324232082, "loss": 6.5376, "step": 813 }, { "epoch": 0.27781569965870306, "grad_norm": 4.858947277069092, "learning_rate": 0.0009073947667804323, "loss": 6.2922, "step": 814 }, { "epoch": 0.2781569965870307, "grad_norm": 3.7647488117218018, "learning_rate": 0.0009072810011376564, "loss": 7.0143, "step": 815 }, { "epoch": 0.2784982935153584, "grad_norm": 4.246391773223877, "learning_rate": 0.0009071672354948806, "loss": 6.8741, "step": 816 }, { "epoch": 0.278839590443686, "grad_norm": 3.448207139968872, "learning_rate": 0.0009070534698521047, "loss": 7.1626, "step": 817 }, { "epoch": 0.27918088737201363, "grad_norm": 3.4277663230895996, "learning_rate": 0.0009069397042093288, "loss": 7.1784, "step": 818 }, { "epoch": 0.2795221843003413, "grad_norm": 3.3655548095703125, "learning_rate": 0.0009068259385665529, "loss": 6.8989, "step": 819 }, { "epoch": 0.27986348122866894, "grad_norm": 15.125733375549316, "learning_rate": 0.000906712172923777, "loss": 6.2618, "step": 820 }, { "epoch": 0.28020477815699657, "grad_norm": 3.803508996963501, "learning_rate": 0.0009065984072810011, "loss": 7.2742, "step": 821 }, { "epoch": 0.28054607508532425, "grad_norm": 4.941516399383545, "learning_rate": 0.0009064846416382253, "loss": 6.3995, "step": 822 }, { "epoch": 0.2808873720136519, "grad_norm": 4.072309494018555, "learning_rate": 0.0009063708759954494, "loss": 6.865, "step": 823 }, { "epoch": 0.2812286689419795, "grad_norm": 3.5273547172546387, "learning_rate": 0.0009062571103526736, "loss": 6.8719, "step": 824 }, { "epoch": 0.2815699658703072, "grad_norm": 3.4187939167022705, "learning_rate": 0.0009061433447098977, "loss": 6.6182, "step": 825 }, { "epoch": 0.2819112627986348, "grad_norm": 5.67510461807251, "learning_rate": 0.0009060295790671218, "loss": 5.9724, "step": 826 }, { "epoch": 0.28225255972696245, "grad_norm": 3.5598597526550293, "learning_rate": 0.0009059158134243459, "loss": 7.0266, "step": 827 }, { "epoch": 0.2825938566552901, "grad_norm": 4.407433986663818, "learning_rate": 0.00090580204778157, "loss": 6.3405, "step": 828 }, { "epoch": 0.28293515358361776, "grad_norm": 3.8481311798095703, "learning_rate": 0.0009056882821387941, "loss": 6.6051, "step": 829 }, { "epoch": 0.2832764505119454, "grad_norm": 3.4790523052215576, "learning_rate": 0.0009055745164960182, "loss": 6.5085, "step": 830 }, { "epoch": 0.283617747440273, "grad_norm": 3.397590398788452, "learning_rate": 0.0009054607508532423, "loss": 7.1648, "step": 831 }, { "epoch": 0.2839590443686007, "grad_norm": 3.374161958694458, "learning_rate": 0.0009053469852104664, "loss": 6.8825, "step": 832 }, { "epoch": 0.2843003412969283, "grad_norm": 3.5871407985687256, "learning_rate": 0.0009052332195676906, "loss": 6.7742, "step": 833 }, { "epoch": 0.28464163822525596, "grad_norm": 5.533285140991211, "learning_rate": 0.0009051194539249147, "loss": 6.1539, "step": 834 }, { "epoch": 0.28498293515358364, "grad_norm": 4.258869647979736, "learning_rate": 0.0009050056882821388, "loss": 6.2765, "step": 835 }, { "epoch": 0.28532423208191127, "grad_norm": 3.785416603088379, "learning_rate": 0.0009048919226393629, "loss": 7.2967, "step": 836 }, { "epoch": 0.2856655290102389, "grad_norm": 3.6271724700927734, "learning_rate": 0.000904778156996587, "loss": 7.1172, "step": 837 }, { "epoch": 0.2860068259385666, "grad_norm": 3.934699296951294, "learning_rate": 0.0009046643913538111, "loss": 6.4742, "step": 838 }, { "epoch": 0.2863481228668942, "grad_norm": 3.78275990486145, "learning_rate": 0.0009045506257110353, "loss": 6.3398, "step": 839 }, { "epoch": 0.28668941979522183, "grad_norm": 4.096293926239014, "learning_rate": 0.0009044368600682594, "loss": 6.7516, "step": 840 }, { "epoch": 0.28703071672354946, "grad_norm": 9.30138111114502, "learning_rate": 0.0009043230944254836, "loss": 7.8543, "step": 841 }, { "epoch": 0.28737201365187715, "grad_norm": 3.7339835166931152, "learning_rate": 0.0009042093287827077, "loss": 7.1888, "step": 842 }, { "epoch": 0.2877133105802048, "grad_norm": 3.5420281887054443, "learning_rate": 0.0009040955631399318, "loss": 6.9893, "step": 843 }, { "epoch": 0.2880546075085324, "grad_norm": 3.5827724933624268, "learning_rate": 0.0009039817974971559, "loss": 6.8226, "step": 844 }, { "epoch": 0.2883959044368601, "grad_norm": 22.785751342773438, "learning_rate": 0.0009038680318543801, "loss": 6.6574, "step": 845 }, { "epoch": 0.2887372013651877, "grad_norm": 3.686525583267212, "learning_rate": 0.0009037542662116041, "loss": 6.6054, "step": 846 }, { "epoch": 0.28907849829351534, "grad_norm": 3.8692002296447754, "learning_rate": 0.0009036405005688282, "loss": 6.785, "step": 847 }, { "epoch": 0.289419795221843, "grad_norm": 5.953486919403076, "learning_rate": 0.0009035267349260523, "loss": 4.0383, "step": 848 }, { "epoch": 0.28976109215017065, "grad_norm": 3.760619878768921, "learning_rate": 0.0009034129692832764, "loss": 6.9499, "step": 849 }, { "epoch": 0.2901023890784983, "grad_norm": 4.419182300567627, "learning_rate": 0.0009032992036405006, "loss": 6.7118, "step": 850 }, { "epoch": 0.29044368600682596, "grad_norm": 3.5752365589141846, "learning_rate": 0.0009031854379977247, "loss": 6.9143, "step": 851 }, { "epoch": 0.2907849829351536, "grad_norm": 3.5920283794403076, "learning_rate": 0.0009030716723549488, "loss": 7.0298, "step": 852 }, { "epoch": 0.2911262798634812, "grad_norm": 4.226795196533203, "learning_rate": 0.0009029579067121729, "loss": 5.9417, "step": 853 }, { "epoch": 0.29146757679180885, "grad_norm": 3.6170947551727295, "learning_rate": 0.000902844141069397, "loss": 6.926, "step": 854 }, { "epoch": 0.29180887372013653, "grad_norm": 3.58585786819458, "learning_rate": 0.0009027303754266211, "loss": 6.7011, "step": 855 }, { "epoch": 0.29215017064846416, "grad_norm": 3.7158362865448, "learning_rate": 0.0009026166097838453, "loss": 6.3346, "step": 856 }, { "epoch": 0.2924914675767918, "grad_norm": 3.4805827140808105, "learning_rate": 0.0009025028441410694, "loss": 7.1524, "step": 857 }, { "epoch": 0.29283276450511947, "grad_norm": 4.309206485748291, "learning_rate": 0.0009023890784982936, "loss": 6.79, "step": 858 }, { "epoch": 0.2931740614334471, "grad_norm": 4.295877456665039, "learning_rate": 0.0009022753128555177, "loss": 6.5865, "step": 859 }, { "epoch": 0.2935153583617747, "grad_norm": 3.572010040283203, "learning_rate": 0.0009021615472127418, "loss": 6.9353, "step": 860 }, { "epoch": 0.2938566552901024, "grad_norm": 3.4424901008605957, "learning_rate": 0.0009020477815699659, "loss": 6.7194, "step": 861 }, { "epoch": 0.29419795221843004, "grad_norm": 3.855348825454712, "learning_rate": 0.0009019340159271901, "loss": 6.9222, "step": 862 }, { "epoch": 0.29453924914675766, "grad_norm": 6.261025428771973, "learning_rate": 0.0009018202502844142, "loss": 5.8834, "step": 863 }, { "epoch": 0.29488054607508535, "grad_norm": 4.185757160186768, "learning_rate": 0.0009017064846416383, "loss": 6.8247, "step": 864 }, { "epoch": 0.295221843003413, "grad_norm": 3.7310314178466797, "learning_rate": 0.0009015927189988624, "loss": 7.1868, "step": 865 }, { "epoch": 0.2955631399317406, "grad_norm": 3.530855655670166, "learning_rate": 0.0009014789533560864, "loss": 7.2622, "step": 866 }, { "epoch": 0.29590443686006823, "grad_norm": 7.029987335205078, "learning_rate": 0.0009013651877133105, "loss": 6.3624, "step": 867 }, { "epoch": 0.2962457337883959, "grad_norm": 3.599027633666992, "learning_rate": 0.0009012514220705347, "loss": 7.0364, "step": 868 }, { "epoch": 0.29658703071672354, "grad_norm": 3.781937599182129, "learning_rate": 0.0009011376564277588, "loss": 6.8376, "step": 869 }, { "epoch": 0.29692832764505117, "grad_norm": 3.625401258468628, "learning_rate": 0.0009010238907849829, "loss": 7.1427, "step": 870 }, { "epoch": 0.29726962457337885, "grad_norm": 3.468726396560669, "learning_rate": 0.000900910125142207, "loss": 7.1632, "step": 871 }, { "epoch": 0.2976109215017065, "grad_norm": 5.193005084991455, "learning_rate": 0.0009007963594994311, "loss": 6.2791, "step": 872 }, { "epoch": 0.2979522184300341, "grad_norm": 3.5303850173950195, "learning_rate": 0.0009006825938566553, "loss": 6.9128, "step": 873 }, { "epoch": 0.2982935153583618, "grad_norm": 3.5984513759613037, "learning_rate": 0.0009005688282138795, "loss": 6.8501, "step": 874 }, { "epoch": 0.2986348122866894, "grad_norm": 13.128771781921387, "learning_rate": 0.0009004550625711036, "loss": 4.9908, "step": 875 }, { "epoch": 0.29897610921501705, "grad_norm": 4.513108730316162, "learning_rate": 0.0009003412969283277, "loss": 6.2214, "step": 876 }, { "epoch": 0.29931740614334473, "grad_norm": 3.9287192821502686, "learning_rate": 0.0009002275312855518, "loss": 6.5736, "step": 877 }, { "epoch": 0.29965870307167236, "grad_norm": 3.6785993576049805, "learning_rate": 0.0009001137656427759, "loss": 6.5955, "step": 878 }, { "epoch": 0.3, "grad_norm": 3.5705087184906006, "learning_rate": 0.0009000000000000001, "loss": 6.6474, "step": 879 }, { "epoch": 0.3003412969283277, "grad_norm": 3.4256176948547363, "learning_rate": 0.0008998862343572242, "loss": 7.1297, "step": 880 }, { "epoch": 0.3006825938566553, "grad_norm": 3.45796275138855, "learning_rate": 0.0008997724687144483, "loss": 7.0026, "step": 881 }, { "epoch": 0.30102389078498293, "grad_norm": 3.569291353225708, "learning_rate": 0.0008996587030716724, "loss": 6.8185, "step": 882 }, { "epoch": 0.30136518771331056, "grad_norm": 4.46466588973999, "learning_rate": 0.0008995449374288965, "loss": 6.4208, "step": 883 }, { "epoch": 0.30170648464163824, "grad_norm": 3.295574903488159, "learning_rate": 0.0008994311717861206, "loss": 6.6947, "step": 884 }, { "epoch": 0.30204778156996587, "grad_norm": 3.447225570678711, "learning_rate": 0.0008993174061433447, "loss": 7.3734, "step": 885 }, { "epoch": 0.3023890784982935, "grad_norm": 3.348236322402954, "learning_rate": 0.0008992036405005688, "loss": 6.6871, "step": 886 }, { "epoch": 0.3027303754266212, "grad_norm": 3.5283188819885254, "learning_rate": 0.0008990898748577929, "loss": 6.9201, "step": 887 }, { "epoch": 0.3030716723549488, "grad_norm": 3.4299886226654053, "learning_rate": 0.000898976109215017, "loss": 6.8192, "step": 888 }, { "epoch": 0.30341296928327643, "grad_norm": 3.3818650245666504, "learning_rate": 0.0008988623435722411, "loss": 6.4265, "step": 889 }, { "epoch": 0.3037542662116041, "grad_norm": 3.4895620346069336, "learning_rate": 0.0008987485779294653, "loss": 6.9777, "step": 890 }, { "epoch": 0.30409556313993175, "grad_norm": 3.668978691101074, "learning_rate": 0.0008986348122866895, "loss": 6.9788, "step": 891 }, { "epoch": 0.3044368600682594, "grad_norm": 3.538581132888794, "learning_rate": 0.0008985210466439136, "loss": 6.7965, "step": 892 }, { "epoch": 0.30477815699658706, "grad_norm": 3.531942844390869, "learning_rate": 0.0008984072810011377, "loss": 7.2188, "step": 893 }, { "epoch": 0.3051194539249147, "grad_norm": 3.566673755645752, "learning_rate": 0.0008982935153583618, "loss": 6.6428, "step": 894 }, { "epoch": 0.3054607508532423, "grad_norm": 3.5514180660247803, "learning_rate": 0.0008981797497155859, "loss": 7.0462, "step": 895 }, { "epoch": 0.30580204778156994, "grad_norm": 9.032792091369629, "learning_rate": 0.0008980659840728101, "loss": 8.609, "step": 896 }, { "epoch": 0.3061433447098976, "grad_norm": 3.692873001098633, "learning_rate": 0.0008979522184300342, "loss": 6.7491, "step": 897 }, { "epoch": 0.30648464163822525, "grad_norm": 3.670551061630249, "learning_rate": 0.0008978384527872583, "loss": 6.3838, "step": 898 }, { "epoch": 0.3068259385665529, "grad_norm": 5.768166542053223, "learning_rate": 0.0008977246871444824, "loss": 6.4646, "step": 899 }, { "epoch": 0.30716723549488056, "grad_norm": 3.830822706222534, "learning_rate": 0.0008976109215017065, "loss": 6.4905, "step": 900 }, { "epoch": 0.3075085324232082, "grad_norm": 3.606807231903076, "learning_rate": 0.0008974971558589306, "loss": 6.8074, "step": 901 }, { "epoch": 0.3078498293515358, "grad_norm": 3.6105079650878906, "learning_rate": 0.0008973833902161548, "loss": 6.8121, "step": 902 }, { "epoch": 0.3081911262798635, "grad_norm": 3.3105735778808594, "learning_rate": 0.0008972696245733789, "loss": 6.5316, "step": 903 }, { "epoch": 0.30853242320819113, "grad_norm": 4.859966278076172, "learning_rate": 0.000897155858930603, "loss": 6.0759, "step": 904 }, { "epoch": 0.30887372013651876, "grad_norm": 3.6393473148345947, "learning_rate": 0.000897042093287827, "loss": 6.8395, "step": 905 }, { "epoch": 0.30921501706484644, "grad_norm": 3.572521686553955, "learning_rate": 0.0008969283276450511, "loss": 6.8854, "step": 906 }, { "epoch": 0.30955631399317407, "grad_norm": 3.761270046234131, "learning_rate": 0.0008968145620022752, "loss": 6.2211, "step": 907 }, { "epoch": 0.3098976109215017, "grad_norm": 4.0054192543029785, "learning_rate": 0.0008967007963594995, "loss": 6.4794, "step": 908 }, { "epoch": 0.3102389078498293, "grad_norm": 3.401998519897461, "learning_rate": 0.0008965870307167236, "loss": 7.0889, "step": 909 }, { "epoch": 0.310580204778157, "grad_norm": 7.148408889770508, "learning_rate": 0.0008964732650739477, "loss": 6.3543, "step": 910 }, { "epoch": 0.31092150170648464, "grad_norm": 3.945793628692627, "learning_rate": 0.0008963594994311718, "loss": 7.0554, "step": 911 }, { "epoch": 0.31126279863481227, "grad_norm": 4.088592052459717, "learning_rate": 0.0008962457337883959, "loss": 6.5995, "step": 912 }, { "epoch": 0.31160409556313995, "grad_norm": 4.394853591918945, "learning_rate": 0.0008961319681456201, "loss": 7.0796, "step": 913 }, { "epoch": 0.3119453924914676, "grad_norm": 3.678943395614624, "learning_rate": 0.0008960182025028442, "loss": 7.3075, "step": 914 }, { "epoch": 0.3122866894197952, "grad_norm": 3.424969434738159, "learning_rate": 0.0008959044368600683, "loss": 6.9823, "step": 915 }, { "epoch": 0.3126279863481229, "grad_norm": 3.4337804317474365, "learning_rate": 0.0008957906712172924, "loss": 6.8313, "step": 916 }, { "epoch": 0.3129692832764505, "grad_norm": 3.492877960205078, "learning_rate": 0.0008956769055745165, "loss": 6.7776, "step": 917 }, { "epoch": 0.31331058020477814, "grad_norm": 3.4652328491210938, "learning_rate": 0.0008955631399317406, "loss": 6.5177, "step": 918 }, { "epoch": 0.3136518771331058, "grad_norm": 3.4624969959259033, "learning_rate": 0.0008954493742889648, "loss": 6.8393, "step": 919 }, { "epoch": 0.31399317406143346, "grad_norm": 3.3327879905700684, "learning_rate": 0.0008953356086461889, "loss": 6.9991, "step": 920 }, { "epoch": 0.3143344709897611, "grad_norm": 3.756620168685913, "learning_rate": 0.000895221843003413, "loss": 6.8865, "step": 921 }, { "epoch": 0.3146757679180887, "grad_norm": 8.067540168762207, "learning_rate": 0.0008951080773606371, "loss": 6.0514, "step": 922 }, { "epoch": 0.3150170648464164, "grad_norm": 3.782060384750366, "learning_rate": 0.0008949943117178612, "loss": 7.1974, "step": 923 }, { "epoch": 0.315358361774744, "grad_norm": 3.9047977924346924, "learning_rate": 0.0008948805460750852, "loss": 6.9444, "step": 924 }, { "epoch": 0.31569965870307165, "grad_norm": 3.4596681594848633, "learning_rate": 0.0008947667804323095, "loss": 6.9334, "step": 925 }, { "epoch": 0.31604095563139933, "grad_norm": 3.472452163696289, "learning_rate": 0.0008946530147895336, "loss": 6.5, "step": 926 }, { "epoch": 0.31638225255972696, "grad_norm": 3.3752787113189697, "learning_rate": 0.0008945392491467577, "loss": 6.3694, "step": 927 }, { "epoch": 0.3167235494880546, "grad_norm": 15.716917037963867, "learning_rate": 0.0008944254835039818, "loss": 9.5123, "step": 928 }, { "epoch": 0.3170648464163823, "grad_norm": 4.5218353271484375, "learning_rate": 0.0008943117178612059, "loss": 6.7683, "step": 929 }, { "epoch": 0.3174061433447099, "grad_norm": 3.797961711883545, "learning_rate": 0.0008941979522184301, "loss": 6.4766, "step": 930 }, { "epoch": 0.31774744027303753, "grad_norm": 5.070075035095215, "learning_rate": 0.0008940841865756542, "loss": 6.3685, "step": 931 }, { "epoch": 0.3180887372013652, "grad_norm": 3.9309263229370117, "learning_rate": 0.0008939704209328783, "loss": 5.8371, "step": 932 }, { "epoch": 0.31843003412969284, "grad_norm": 4.169372081756592, "learning_rate": 0.0008938566552901024, "loss": 7.4385, "step": 933 }, { "epoch": 0.31877133105802047, "grad_norm": 3.563783884048462, "learning_rate": 0.0008937428896473265, "loss": 6.448, "step": 934 }, { "epoch": 0.3191126279863481, "grad_norm": 3.728722333908081, "learning_rate": 0.0008936291240045506, "loss": 6.3533, "step": 935 }, { "epoch": 0.3194539249146758, "grad_norm": 3.3905997276306152, "learning_rate": 0.0008935153583617748, "loss": 6.6507, "step": 936 }, { "epoch": 0.3197952218430034, "grad_norm": 3.2747488021850586, "learning_rate": 0.0008934015927189989, "loss": 7.0518, "step": 937 }, { "epoch": 0.32013651877133104, "grad_norm": 3.3199024200439453, "learning_rate": 0.000893287827076223, "loss": 6.4047, "step": 938 }, { "epoch": 0.3204778156996587, "grad_norm": 6.756965637207031, "learning_rate": 0.0008931740614334471, "loss": 6.8004, "step": 939 }, { "epoch": 0.32081911262798635, "grad_norm": 3.5635828971862793, "learning_rate": 0.0008930602957906712, "loss": 6.7656, "step": 940 }, { "epoch": 0.321160409556314, "grad_norm": 3.9271492958068848, "learning_rate": 0.0008929465301478953, "loss": 6.823, "step": 941 }, { "epoch": 0.32150170648464166, "grad_norm": 3.3149237632751465, "learning_rate": 0.0008928327645051196, "loss": 4.3894, "step": 942 }, { "epoch": 0.3218430034129693, "grad_norm": 4.048464298248291, "learning_rate": 0.0008927189988623437, "loss": 7.0178, "step": 943 }, { "epoch": 0.3221843003412969, "grad_norm": 4.173979759216309, "learning_rate": 0.0008926052332195677, "loss": 6.5905, "step": 944 }, { "epoch": 0.3225255972696246, "grad_norm": 3.4944701194763184, "learning_rate": 0.0008924914675767918, "loss": 6.7574, "step": 945 }, { "epoch": 0.3228668941979522, "grad_norm": 3.6318819522857666, "learning_rate": 0.0008923777019340159, "loss": 6.7549, "step": 946 }, { "epoch": 0.32320819112627985, "grad_norm": 3.452150821685791, "learning_rate": 0.00089226393629124, "loss": 6.6196, "step": 947 }, { "epoch": 0.3235494880546075, "grad_norm": 5.811069488525391, "learning_rate": 0.0008921501706484642, "loss": 5.2026, "step": 948 }, { "epoch": 0.32389078498293516, "grad_norm": 3.539780378341675, "learning_rate": 0.0008920364050056883, "loss": 6.6506, "step": 949 }, { "epoch": 0.3242320819112628, "grad_norm": 4.116481304168701, "learning_rate": 0.0008919226393629124, "loss": 6.993, "step": 950 }, { "epoch": 0.3245733788395904, "grad_norm": 3.6477298736572266, "learning_rate": 0.0008918088737201365, "loss": 6.3687, "step": 951 }, { "epoch": 0.3249146757679181, "grad_norm": 3.5045762062072754, "learning_rate": 0.0008916951080773606, "loss": 6.5074, "step": 952 }, { "epoch": 0.32525597269624573, "grad_norm": 4.331373691558838, "learning_rate": 0.0008915813424345848, "loss": 6.8514, "step": 953 }, { "epoch": 0.32559726962457336, "grad_norm": 3.383375644683838, "learning_rate": 0.0008914675767918089, "loss": 6.7091, "step": 954 }, { "epoch": 0.32593856655290104, "grad_norm": 3.8869664669036865, "learning_rate": 0.000891353811149033, "loss": 6.7753, "step": 955 }, { "epoch": 0.32627986348122867, "grad_norm": 5.349417209625244, "learning_rate": 0.0008912400455062571, "loss": 6.3789, "step": 956 }, { "epoch": 0.3266211604095563, "grad_norm": 3.5549848079681396, "learning_rate": 0.0008911262798634812, "loss": 6.8974, "step": 957 }, { "epoch": 0.326962457337884, "grad_norm": 3.521125555038452, "learning_rate": 0.0008910125142207054, "loss": 6.947, "step": 958 }, { "epoch": 0.3273037542662116, "grad_norm": 5.296824932098389, "learning_rate": 0.0008908987485779296, "loss": 5.9691, "step": 959 }, { "epoch": 0.32764505119453924, "grad_norm": 3.733386516571045, "learning_rate": 0.0008907849829351537, "loss": 6.672, "step": 960 }, { "epoch": 0.32798634812286687, "grad_norm": 4.317715644836426, "learning_rate": 0.0008906712172923778, "loss": 6.1089, "step": 961 }, { "epoch": 0.32832764505119455, "grad_norm": 3.5799124240875244, "learning_rate": 0.0008905574516496019, "loss": 6.7116, "step": 962 }, { "epoch": 0.3286689419795222, "grad_norm": 3.54365873336792, "learning_rate": 0.0008904436860068259, "loss": 6.6934, "step": 963 }, { "epoch": 0.3290102389078498, "grad_norm": 3.48354172706604, "learning_rate": 0.00089032992036405, "loss": 6.7771, "step": 964 }, { "epoch": 0.3293515358361775, "grad_norm": 3.8075222969055176, "learning_rate": 0.0008902161547212742, "loss": 6.726, "step": 965 }, { "epoch": 0.3296928327645051, "grad_norm": 3.549077033996582, "learning_rate": 0.0008901023890784983, "loss": 6.8491, "step": 966 }, { "epoch": 0.33003412969283275, "grad_norm": 3.487042188644409, "learning_rate": 0.0008899886234357224, "loss": 6.9951, "step": 967 }, { "epoch": 0.33037542662116043, "grad_norm": 3.4379420280456543, "learning_rate": 0.0008898748577929465, "loss": 6.7213, "step": 968 }, { "epoch": 0.33071672354948806, "grad_norm": 3.3688578605651855, "learning_rate": 0.0008897610921501706, "loss": 6.7468, "step": 969 }, { "epoch": 0.3310580204778157, "grad_norm": 4.88122034072876, "learning_rate": 0.0008896473265073947, "loss": 6.8637, "step": 970 }, { "epoch": 0.33139931740614337, "grad_norm": 3.590115547180176, "learning_rate": 0.0008895335608646189, "loss": 6.4559, "step": 971 }, { "epoch": 0.331740614334471, "grad_norm": 3.8333377838134766, "learning_rate": 0.000889419795221843, "loss": 6.0111, "step": 972 }, { "epoch": 0.3320819112627986, "grad_norm": 3.6459643840789795, "learning_rate": 0.0008893060295790671, "loss": 5.3953, "step": 973 }, { "epoch": 0.3324232081911263, "grad_norm": 3.799060583114624, "learning_rate": 0.0008891922639362912, "loss": 6.8401, "step": 974 }, { "epoch": 0.33276450511945393, "grad_norm": 3.553802728652954, "learning_rate": 0.0008890784982935154, "loss": 6.763, "step": 975 }, { "epoch": 0.33310580204778156, "grad_norm": 5.171194553375244, "learning_rate": 0.0008889647326507396, "loss": 6.4421, "step": 976 }, { "epoch": 0.3334470989761092, "grad_norm": 4.729731559753418, "learning_rate": 0.0008888509670079637, "loss": 4.2545, "step": 977 }, { "epoch": 0.3337883959044369, "grad_norm": 4.018588066101074, "learning_rate": 0.0008887372013651878, "loss": 6.8516, "step": 978 }, { "epoch": 0.3341296928327645, "grad_norm": 3.8841042518615723, "learning_rate": 0.0008886234357224119, "loss": 7.2299, "step": 979 }, { "epoch": 0.33447098976109213, "grad_norm": 3.4637234210968018, "learning_rate": 0.000888509670079636, "loss": 6.5481, "step": 980 }, { "epoch": 0.3348122866894198, "grad_norm": 3.2626147270202637, "learning_rate": 0.0008883959044368601, "loss": 7.2605, "step": 981 }, { "epoch": 0.33515358361774744, "grad_norm": 3.390256643295288, "learning_rate": 0.0008882821387940843, "loss": 6.8236, "step": 982 }, { "epoch": 0.33549488054607507, "grad_norm": 3.666612386703491, "learning_rate": 0.0008881683731513083, "loss": 6.838, "step": 983 }, { "epoch": 0.33583617747440275, "grad_norm": 3.5521507263183594, "learning_rate": 0.0008880546075085324, "loss": 6.6424, "step": 984 }, { "epoch": 0.3361774744027304, "grad_norm": 4.2973551750183105, "learning_rate": 0.0008879408418657565, "loss": 6.7528, "step": 985 }, { "epoch": 0.336518771331058, "grad_norm": 3.663874864578247, "learning_rate": 0.0008878270762229806, "loss": 6.7709, "step": 986 }, { "epoch": 0.3368600682593857, "grad_norm": 3.8758318424224854, "learning_rate": 0.0008877133105802047, "loss": 6.5531, "step": 987 }, { "epoch": 0.3372013651877133, "grad_norm": 3.551164388656616, "learning_rate": 0.0008875995449374289, "loss": 6.9101, "step": 988 }, { "epoch": 0.33754266211604095, "grad_norm": 3.607560873031616, "learning_rate": 0.000887485779294653, "loss": 6.5752, "step": 989 }, { "epoch": 0.3378839590443686, "grad_norm": 3.6682469844818115, "learning_rate": 0.0008873720136518771, "loss": 6.3309, "step": 990 }, { "epoch": 0.33822525597269626, "grad_norm": 3.57940411567688, "learning_rate": 0.0008872582480091012, "loss": 7.2817, "step": 991 }, { "epoch": 0.3385665529010239, "grad_norm": 3.5571844577789307, "learning_rate": 0.0008871444823663254, "loss": 6.6641, "step": 992 }, { "epoch": 0.3389078498293515, "grad_norm": 3.4492499828338623, "learning_rate": 0.0008870307167235496, "loss": 6.9242, "step": 993 }, { "epoch": 0.3392491467576792, "grad_norm": 4.031134128570557, "learning_rate": 0.0008869169510807737, "loss": 6.8263, "step": 994 }, { "epoch": 0.3395904436860068, "grad_norm": 3.3851895332336426, "learning_rate": 0.0008868031854379978, "loss": 6.847, "step": 995 }, { "epoch": 0.33993174061433445, "grad_norm": 4.721614837646484, "learning_rate": 0.0008866894197952219, "loss": 6.7878, "step": 996 }, { "epoch": 0.34027303754266214, "grad_norm": 3.3920321464538574, "learning_rate": 0.000886575654152446, "loss": 6.6165, "step": 997 }, { "epoch": 0.34061433447098977, "grad_norm": 3.5880556106567383, "learning_rate": 0.0008864618885096701, "loss": 6.3648, "step": 998 }, { "epoch": 0.3409556313993174, "grad_norm": 3.7084357738494873, "learning_rate": 0.0008863481228668943, "loss": 6.5777, "step": 999 }, { "epoch": 0.3412969283276451, "grad_norm": 3.5012943744659424, "learning_rate": 0.0008862343572241184, "loss": 6.7863, "step": 1000 }, { "epoch": 0.3416382252559727, "grad_norm": 3.5657830238342285, "learning_rate": 0.0008861205915813425, "loss": 7.3395, "step": 1001 }, { "epoch": 0.34197952218430033, "grad_norm": 4.714262008666992, "learning_rate": 0.0008860068259385665, "loss": 6.4638, "step": 1002 }, { "epoch": 0.34232081911262796, "grad_norm": 3.793743848800659, "learning_rate": 0.0008858930602957906, "loss": 6.6217, "step": 1003 }, { "epoch": 0.34266211604095564, "grad_norm": 6.239543914794922, "learning_rate": 0.0008857792946530147, "loss": 6.3399, "step": 1004 }, { "epoch": 0.3430034129692833, "grad_norm": 3.838210344314575, "learning_rate": 0.0008856655290102389, "loss": 7.1438, "step": 1005 }, { "epoch": 0.3433447098976109, "grad_norm": 4.131673812866211, "learning_rate": 0.000885551763367463, "loss": 6.3229, "step": 1006 }, { "epoch": 0.3436860068259386, "grad_norm": 3.4897634983062744, "learning_rate": 0.0008854379977246871, "loss": 7.3418, "step": 1007 }, { "epoch": 0.3440273037542662, "grad_norm": 3.3156917095184326, "learning_rate": 0.0008853242320819112, "loss": 6.6131, "step": 1008 }, { "epoch": 0.34436860068259384, "grad_norm": 5.101655006408691, "learning_rate": 0.0008852104664391354, "loss": 6.4129, "step": 1009 }, { "epoch": 0.3447098976109215, "grad_norm": 3.4084413051605225, "learning_rate": 0.0008850967007963595, "loss": 6.8648, "step": 1010 }, { "epoch": 0.34505119453924915, "grad_norm": 3.558993101119995, "learning_rate": 0.0008849829351535837, "loss": 6.5622, "step": 1011 }, { "epoch": 0.3453924914675768, "grad_norm": 3.5753562450408936, "learning_rate": 0.0008848691695108078, "loss": 6.7141, "step": 1012 }, { "epoch": 0.34573378839590446, "grad_norm": 3.5802392959594727, "learning_rate": 0.0008847554038680319, "loss": 6.9107, "step": 1013 }, { "epoch": 0.3460750853242321, "grad_norm": 3.4724674224853516, "learning_rate": 0.000884641638225256, "loss": 6.6967, "step": 1014 }, { "epoch": 0.3464163822525597, "grad_norm": 3.293440103530884, "learning_rate": 0.0008845278725824801, "loss": 6.4138, "step": 1015 }, { "epoch": 0.34675767918088735, "grad_norm": 3.4762954711914062, "learning_rate": 0.0008844141069397043, "loss": 6.8232, "step": 1016 }, { "epoch": 0.34709897610921503, "grad_norm": 3.6370632648468018, "learning_rate": 0.0008843003412969284, "loss": 6.9722, "step": 1017 }, { "epoch": 0.34744027303754266, "grad_norm": 3.511749029159546, "learning_rate": 0.0008841865756541525, "loss": 6.8501, "step": 1018 }, { "epoch": 0.3477815699658703, "grad_norm": 5.163912296295166, "learning_rate": 0.0008840728100113766, "loss": 6.1093, "step": 1019 }, { "epoch": 0.34812286689419797, "grad_norm": 8.192817687988281, "learning_rate": 0.0008839590443686007, "loss": 6.5018, "step": 1020 }, { "epoch": 0.3484641638225256, "grad_norm": 3.8490147590637207, "learning_rate": 0.0008838452787258247, "loss": 6.8653, "step": 1021 }, { "epoch": 0.3488054607508532, "grad_norm": 3.7160000801086426, "learning_rate": 0.0008837315130830489, "loss": 6.9512, "step": 1022 }, { "epoch": 0.3491467576791809, "grad_norm": 3.8101956844329834, "learning_rate": 0.000883617747440273, "loss": 6.2376, "step": 1023 }, { "epoch": 0.34948805460750854, "grad_norm": 3.3378794193267822, "learning_rate": 0.0008835039817974971, "loss": 6.8794, "step": 1024 }, { "epoch": 0.34982935153583616, "grad_norm": 3.5934832096099854, "learning_rate": 0.0008833902161547212, "loss": 6.1176, "step": 1025 }, { "epoch": 0.35017064846416385, "grad_norm": 3.461996078491211, "learning_rate": 0.0008832764505119454, "loss": 6.5096, "step": 1026 }, { "epoch": 0.3505119453924915, "grad_norm": 3.272123098373413, "learning_rate": 0.0008831626848691695, "loss": 6.7895, "step": 1027 }, { "epoch": 0.3508532423208191, "grad_norm": 3.3164021968841553, "learning_rate": 0.0008830489192263937, "loss": 6.7558, "step": 1028 }, { "epoch": 0.35119453924914673, "grad_norm": 3.2223832607269287, "learning_rate": 0.0008829351535836178, "loss": 6.5788, "step": 1029 }, { "epoch": 0.3515358361774744, "grad_norm": 5.8226237297058105, "learning_rate": 0.0008828213879408419, "loss": 3.4241, "step": 1030 }, { "epoch": 0.35187713310580204, "grad_norm": 3.6470441818237305, "learning_rate": 0.000882707622298066, "loss": 6.7667, "step": 1031 }, { "epoch": 0.35221843003412967, "grad_norm": 3.8023180961608887, "learning_rate": 0.0008825938566552901, "loss": 6.8807, "step": 1032 }, { "epoch": 0.35255972696245735, "grad_norm": 3.8079946041107178, "learning_rate": 0.0008824800910125143, "loss": 6.7188, "step": 1033 }, { "epoch": 0.352901023890785, "grad_norm": 3.5138330459594727, "learning_rate": 0.0008823663253697384, "loss": 7.1953, "step": 1034 }, { "epoch": 0.3532423208191126, "grad_norm": 3.8133950233459473, "learning_rate": 0.0008822525597269625, "loss": 7.1188, "step": 1035 }, { "epoch": 0.3535836177474403, "grad_norm": 3.640202522277832, "learning_rate": 0.0008821387940841866, "loss": 6.739, "step": 1036 }, { "epoch": 0.3539249146757679, "grad_norm": 3.518843173980713, "learning_rate": 0.0008820250284414107, "loss": 6.8397, "step": 1037 }, { "epoch": 0.35426621160409555, "grad_norm": 3.5482702255249023, "learning_rate": 0.0008819112627986348, "loss": 6.618, "step": 1038 }, { "epoch": 0.35460750853242323, "grad_norm": 3.419344902038574, "learning_rate": 0.000881797497155859, "loss": 6.9977, "step": 1039 }, { "epoch": 0.35494880546075086, "grad_norm": 3.4673240184783936, "learning_rate": 0.0008816837315130831, "loss": 6.4868, "step": 1040 }, { "epoch": 0.3552901023890785, "grad_norm": 8.60751724243164, "learning_rate": 0.0008815699658703071, "loss": 4.6019, "step": 1041 }, { "epoch": 0.3556313993174061, "grad_norm": 4.347542762756348, "learning_rate": 0.0008814562002275312, "loss": 6.7114, "step": 1042 }, { "epoch": 0.3559726962457338, "grad_norm": 5.797741413116455, "learning_rate": 0.0008813424345847554, "loss": 4.9746, "step": 1043 }, { "epoch": 0.3563139931740614, "grad_norm": 4.145489692687988, "learning_rate": 0.0008812286689419795, "loss": 6.8995, "step": 1044 }, { "epoch": 0.35665529010238906, "grad_norm": 4.579039096832275, "learning_rate": 0.0008811149032992037, "loss": 7.1214, "step": 1045 }, { "epoch": 0.35699658703071674, "grad_norm": 3.6013543605804443, "learning_rate": 0.0008810011376564278, "loss": 6.9495, "step": 1046 }, { "epoch": 0.35733788395904437, "grad_norm": 3.570963144302368, "learning_rate": 0.0008808873720136519, "loss": 7.0384, "step": 1047 }, { "epoch": 0.357679180887372, "grad_norm": 3.448312759399414, "learning_rate": 0.000880773606370876, "loss": 6.8964, "step": 1048 }, { "epoch": 0.3580204778156997, "grad_norm": 3.4679174423217773, "learning_rate": 0.0008806598407281001, "loss": 6.5713, "step": 1049 }, { "epoch": 0.3583617747440273, "grad_norm": 3.8704769611358643, "learning_rate": 0.0008805460750853242, "loss": 6.424, "step": 1050 }, { "epoch": 0.35870307167235493, "grad_norm": 3.3437047004699707, "learning_rate": 0.0008804323094425484, "loss": 7.1021, "step": 1051 }, { "epoch": 0.3590443686006826, "grad_norm": 7.765682220458984, "learning_rate": 0.0008803185437997725, "loss": 5.6959, "step": 1052 }, { "epoch": 0.35938566552901025, "grad_norm": 4.041772842407227, "learning_rate": 0.0008802047781569966, "loss": 6.5353, "step": 1053 }, { "epoch": 0.3597269624573379, "grad_norm": 4.038043022155762, "learning_rate": 0.0008800910125142207, "loss": 6.8547, "step": 1054 }, { "epoch": 0.36006825938566556, "grad_norm": 3.7071590423583984, "learning_rate": 0.0008799772468714448, "loss": 6.4563, "step": 1055 }, { "epoch": 0.3604095563139932, "grad_norm": 4.166858196258545, "learning_rate": 0.000879863481228669, "loss": 6.431, "step": 1056 }, { "epoch": 0.3607508532423208, "grad_norm": 3.6435773372650146, "learning_rate": 0.0008797497155858931, "loss": 6.6037, "step": 1057 }, { "epoch": 0.36109215017064844, "grad_norm": 14.491841316223145, "learning_rate": 0.0008796359499431173, "loss": 7.0323, "step": 1058 }, { "epoch": 0.3614334470989761, "grad_norm": 3.81062388420105, "learning_rate": 0.0008795221843003414, "loss": 7.0748, "step": 1059 }, { "epoch": 0.36177474402730375, "grad_norm": 3.9405431747436523, "learning_rate": 0.0008794084186575654, "loss": 6.9883, "step": 1060 }, { "epoch": 0.3621160409556314, "grad_norm": 3.7249462604522705, "learning_rate": 0.0008792946530147895, "loss": 6.7057, "step": 1061 }, { "epoch": 0.36245733788395906, "grad_norm": 4.359280109405518, "learning_rate": 0.0008791808873720137, "loss": 6.2134, "step": 1062 }, { "epoch": 0.3627986348122867, "grad_norm": 3.8895323276519775, "learning_rate": 0.0008790671217292378, "loss": 6.3199, "step": 1063 }, { "epoch": 0.3631399317406143, "grad_norm": 3.3901989459991455, "learning_rate": 0.0008789533560864619, "loss": 6.822, "step": 1064 }, { "epoch": 0.363481228668942, "grad_norm": 3.3694682121276855, "learning_rate": 0.000878839590443686, "loss": 6.8209, "step": 1065 }, { "epoch": 0.36382252559726963, "grad_norm": 3.7551913261413574, "learning_rate": 0.0008787258248009101, "loss": 6.7343, "step": 1066 }, { "epoch": 0.36416382252559726, "grad_norm": 3.3077569007873535, "learning_rate": 0.0008786120591581342, "loss": 6.944, "step": 1067 }, { "epoch": 0.36450511945392494, "grad_norm": 3.292142629623413, "learning_rate": 0.0008784982935153584, "loss": 7.1756, "step": 1068 }, { "epoch": 0.36484641638225257, "grad_norm": 4.133096218109131, "learning_rate": 0.0008783845278725825, "loss": 6.7308, "step": 1069 }, { "epoch": 0.3651877133105802, "grad_norm": 3.361743927001953, "learning_rate": 0.0008782707622298066, "loss": 7.252, "step": 1070 }, { "epoch": 0.3655290102389078, "grad_norm": 3.582728624343872, "learning_rate": 0.0008781569965870307, "loss": 6.8102, "step": 1071 }, { "epoch": 0.3658703071672355, "grad_norm": 4.287242412567139, "learning_rate": 0.0008780432309442548, "loss": 6.4504, "step": 1072 }, { "epoch": 0.36621160409556314, "grad_norm": 3.59372878074646, "learning_rate": 0.000877929465301479, "loss": 7.1913, "step": 1073 }, { "epoch": 0.36655290102389076, "grad_norm": 3.4123799800872803, "learning_rate": 0.0008778156996587031, "loss": 6.5238, "step": 1074 }, { "epoch": 0.36689419795221845, "grad_norm": 3.3152260780334473, "learning_rate": 0.0008777019340159273, "loss": 6.9748, "step": 1075 }, { "epoch": 0.3672354948805461, "grad_norm": 3.3980987071990967, "learning_rate": 0.0008775881683731514, "loss": 6.8588, "step": 1076 }, { "epoch": 0.3675767918088737, "grad_norm": 3.447199583053589, "learning_rate": 0.0008774744027303755, "loss": 6.3821, "step": 1077 }, { "epoch": 0.3679180887372014, "grad_norm": 3.4618401527404785, "learning_rate": 0.0008773606370875996, "loss": 6.95, "step": 1078 }, { "epoch": 0.368259385665529, "grad_norm": 3.597135543823242, "learning_rate": 0.0008772468714448238, "loss": 6.4812, "step": 1079 }, { "epoch": 0.36860068259385664, "grad_norm": 3.396935224533081, "learning_rate": 0.0008771331058020478, "loss": 6.7553, "step": 1080 }, { "epoch": 0.3689419795221843, "grad_norm": 3.4151859283447266, "learning_rate": 0.0008770193401592719, "loss": 6.4082, "step": 1081 }, { "epoch": 0.36928327645051195, "grad_norm": 3.4455440044403076, "learning_rate": 0.000876905574516496, "loss": 6.7333, "step": 1082 }, { "epoch": 0.3696245733788396, "grad_norm": 3.539586067199707, "learning_rate": 0.0008767918088737201, "loss": 6.5843, "step": 1083 }, { "epoch": 0.3699658703071672, "grad_norm": 6.984851360321045, "learning_rate": 0.0008766780432309442, "loss": 5.8875, "step": 1084 }, { "epoch": 0.3703071672354949, "grad_norm": 3.949312448501587, "learning_rate": 0.0008765642775881684, "loss": 6.2822, "step": 1085 }, { "epoch": 0.3706484641638225, "grad_norm": 3.787013530731201, "learning_rate": 0.0008764505119453925, "loss": 6.7024, "step": 1086 }, { "epoch": 0.37098976109215015, "grad_norm": 3.797456741333008, "learning_rate": 0.0008763367463026166, "loss": 6.6641, "step": 1087 }, { "epoch": 0.37133105802047783, "grad_norm": 3.9768006801605225, "learning_rate": 0.0008762229806598407, "loss": 6.8357, "step": 1088 }, { "epoch": 0.37167235494880546, "grad_norm": 3.4279286861419678, "learning_rate": 0.0008761092150170648, "loss": 6.5971, "step": 1089 }, { "epoch": 0.3720136518771331, "grad_norm": 3.417083501815796, "learning_rate": 0.0008759954493742889, "loss": 6.818, "step": 1090 }, { "epoch": 0.3723549488054608, "grad_norm": 3.553903818130493, "learning_rate": 0.0008758816837315132, "loss": 6.8396, "step": 1091 }, { "epoch": 0.3726962457337884, "grad_norm": 3.620215892791748, "learning_rate": 0.0008757679180887373, "loss": 6.2553, "step": 1092 }, { "epoch": 0.37303754266211603, "grad_norm": 5.6876935958862305, "learning_rate": 0.0008756541524459614, "loss": 6.035, "step": 1093 }, { "epoch": 0.3733788395904437, "grad_norm": 3.6916663646698, "learning_rate": 0.0008755403868031855, "loss": 6.9809, "step": 1094 }, { "epoch": 0.37372013651877134, "grad_norm": 3.829458236694336, "learning_rate": 0.0008754266211604096, "loss": 6.3949, "step": 1095 }, { "epoch": 0.37406143344709897, "grad_norm": 3.719882011413574, "learning_rate": 0.0008753128555176338, "loss": 7.0602, "step": 1096 }, { "epoch": 0.3744027303754266, "grad_norm": 4.063767433166504, "learning_rate": 0.0008751990898748579, "loss": 6.713, "step": 1097 }, { "epoch": 0.3747440273037543, "grad_norm": 3.6009387969970703, "learning_rate": 0.000875085324232082, "loss": 6.6959, "step": 1098 }, { "epoch": 0.3750853242320819, "grad_norm": 3.6646664142608643, "learning_rate": 0.000874971558589306, "loss": 6.8591, "step": 1099 }, { "epoch": 0.37542662116040953, "grad_norm": 3.4815189838409424, "learning_rate": 0.0008748577929465301, "loss": 6.6657, "step": 1100 }, { "epoch": 0.3757679180887372, "grad_norm": 3.6855881214141846, "learning_rate": 0.0008747440273037542, "loss": 6.7854, "step": 1101 }, { "epoch": 0.37610921501706485, "grad_norm": 3.5180046558380127, "learning_rate": 0.0008746302616609784, "loss": 6.8922, "step": 1102 }, { "epoch": 0.3764505119453925, "grad_norm": 3.3709070682525635, "learning_rate": 0.0008745164960182025, "loss": 7.0109, "step": 1103 }, { "epoch": 0.37679180887372016, "grad_norm": 3.4868783950805664, "learning_rate": 0.0008744027303754266, "loss": 7.1033, "step": 1104 }, { "epoch": 0.3771331058020478, "grad_norm": 3.5890045166015625, "learning_rate": 0.0008742889647326507, "loss": 6.5034, "step": 1105 }, { "epoch": 0.3774744027303754, "grad_norm": 5.1739726066589355, "learning_rate": 0.0008741751990898748, "loss": 6.2063, "step": 1106 }, { "epoch": 0.3778156996587031, "grad_norm": 3.525132179260254, "learning_rate": 0.0008740614334470989, "loss": 7.3972, "step": 1107 }, { "epoch": 0.3781569965870307, "grad_norm": 3.622143268585205, "learning_rate": 0.0008739476678043232, "loss": 7.0089, "step": 1108 }, { "epoch": 0.37849829351535835, "grad_norm": 3.494596481323242, "learning_rate": 0.0008738339021615473, "loss": 6.5724, "step": 1109 }, { "epoch": 0.378839590443686, "grad_norm": 3.3533830642700195, "learning_rate": 0.0008737201365187714, "loss": 7.1964, "step": 1110 }, { "epoch": 0.37918088737201366, "grad_norm": 3.518562078475952, "learning_rate": 0.0008736063708759955, "loss": 7.2307, "step": 1111 }, { "epoch": 0.3795221843003413, "grad_norm": 3.4765775203704834, "learning_rate": 0.0008734926052332196, "loss": 6.6891, "step": 1112 }, { "epoch": 0.3798634812286689, "grad_norm": 3.379734754562378, "learning_rate": 0.0008733788395904437, "loss": 6.883, "step": 1113 }, { "epoch": 0.3802047781569966, "grad_norm": 3.9166646003723145, "learning_rate": 0.0008732650739476679, "loss": 6.7689, "step": 1114 }, { "epoch": 0.38054607508532423, "grad_norm": 3.344590663909912, "learning_rate": 0.000873151308304892, "loss": 6.579, "step": 1115 }, { "epoch": 0.38088737201365186, "grad_norm": 3.7421436309814453, "learning_rate": 0.0008730375426621161, "loss": 7.2903, "step": 1116 }, { "epoch": 0.38122866894197954, "grad_norm": 3.9341204166412354, "learning_rate": 0.0008729237770193402, "loss": 6.4053, "step": 1117 }, { "epoch": 0.38156996587030717, "grad_norm": 3.6717798709869385, "learning_rate": 0.0008728100113765643, "loss": 6.56, "step": 1118 }, { "epoch": 0.3819112627986348, "grad_norm": 3.7355589866638184, "learning_rate": 0.0008726962457337884, "loss": 6.1227, "step": 1119 }, { "epoch": 0.3822525597269625, "grad_norm": 3.6662402153015137, "learning_rate": 0.0008725824800910125, "loss": 6.8174, "step": 1120 }, { "epoch": 0.3825938566552901, "grad_norm": 3.7742650508880615, "learning_rate": 0.0008724687144482366, "loss": 6.9281, "step": 1121 }, { "epoch": 0.38293515358361774, "grad_norm": 3.4731431007385254, "learning_rate": 0.0008723549488054607, "loss": 6.5173, "step": 1122 }, { "epoch": 0.38327645051194537, "grad_norm": 3.55572247505188, "learning_rate": 0.0008722411831626848, "loss": 6.8068, "step": 1123 }, { "epoch": 0.38361774744027305, "grad_norm": 3.307126522064209, "learning_rate": 0.0008721274175199089, "loss": 6.2692, "step": 1124 }, { "epoch": 0.3839590443686007, "grad_norm": 3.444089889526367, "learning_rate": 0.0008720136518771332, "loss": 6.4222, "step": 1125 }, { "epoch": 0.3843003412969283, "grad_norm": 3.731123685836792, "learning_rate": 0.0008718998862343573, "loss": 6.7032, "step": 1126 }, { "epoch": 0.384641638225256, "grad_norm": 3.5673561096191406, "learning_rate": 0.0008717861205915814, "loss": 6.7617, "step": 1127 }, { "epoch": 0.3849829351535836, "grad_norm": 4.335822105407715, "learning_rate": 0.0008716723549488055, "loss": 6.0413, "step": 1128 }, { "epoch": 0.38532423208191124, "grad_norm": 3.483842372894287, "learning_rate": 0.0008715585893060296, "loss": 6.902, "step": 1129 }, { "epoch": 0.3856655290102389, "grad_norm": 3.3141043186187744, "learning_rate": 0.0008714448236632537, "loss": 6.5926, "step": 1130 }, { "epoch": 0.38600682593856656, "grad_norm": 3.5295164585113525, "learning_rate": 0.0008713310580204779, "loss": 7.411, "step": 1131 }, { "epoch": 0.3863481228668942, "grad_norm": 3.6749589443206787, "learning_rate": 0.000871217292377702, "loss": 6.4381, "step": 1132 }, { "epoch": 0.38668941979522187, "grad_norm": 3.7729690074920654, "learning_rate": 0.0008711035267349261, "loss": 6.2913, "step": 1133 }, { "epoch": 0.3870307167235495, "grad_norm": 3.4314284324645996, "learning_rate": 0.0008709897610921502, "loss": 7.1253, "step": 1134 }, { "epoch": 0.3873720136518771, "grad_norm": 3.633718252182007, "learning_rate": 0.0008708759954493743, "loss": 6.378, "step": 1135 }, { "epoch": 0.38771331058020475, "grad_norm": 3.4309566020965576, "learning_rate": 0.0008707622298065985, "loss": 6.7567, "step": 1136 }, { "epoch": 0.38805460750853243, "grad_norm": 5.337145805358887, "learning_rate": 0.0008706484641638226, "loss": 6.1113, "step": 1137 }, { "epoch": 0.38839590443686006, "grad_norm": 3.6700313091278076, "learning_rate": 0.0008705346985210466, "loss": 7.0188, "step": 1138 }, { "epoch": 0.3887372013651877, "grad_norm": 3.756471633911133, "learning_rate": 0.0008704209328782707, "loss": 6.3016, "step": 1139 }, { "epoch": 0.3890784982935154, "grad_norm": 3.3960928916931152, "learning_rate": 0.0008703071672354948, "loss": 6.6751, "step": 1140 }, { "epoch": 0.389419795221843, "grad_norm": 3.354475736618042, "learning_rate": 0.0008701934015927189, "loss": 6.9203, "step": 1141 }, { "epoch": 0.38976109215017063, "grad_norm": 4.095553398132324, "learning_rate": 0.0008700796359499432, "loss": 6.2154, "step": 1142 }, { "epoch": 0.3901023890784983, "grad_norm": 3.592214584350586, "learning_rate": 0.0008699658703071673, "loss": 6.5693, "step": 1143 }, { "epoch": 0.39044368600682594, "grad_norm": 3.5223283767700195, "learning_rate": 0.0008698521046643914, "loss": 7.3168, "step": 1144 }, { "epoch": 0.39078498293515357, "grad_norm": 3.3714346885681152, "learning_rate": 0.0008697383390216155, "loss": 6.5233, "step": 1145 }, { "epoch": 0.39112627986348125, "grad_norm": 4.475141525268555, "learning_rate": 0.0008696245733788396, "loss": 6.3844, "step": 1146 }, { "epoch": 0.3914675767918089, "grad_norm": 3.5419039726257324, "learning_rate": 0.0008695108077360637, "loss": 6.718, "step": 1147 }, { "epoch": 0.3918088737201365, "grad_norm": 3.6940994262695312, "learning_rate": 0.0008693970420932879, "loss": 6.9183, "step": 1148 }, { "epoch": 0.3921501706484642, "grad_norm": 3.406237840652466, "learning_rate": 0.000869283276450512, "loss": 6.817, "step": 1149 }, { "epoch": 0.3924914675767918, "grad_norm": 3.5237555503845215, "learning_rate": 0.0008691695108077361, "loss": 6.565, "step": 1150 }, { "epoch": 0.39283276450511945, "grad_norm": 3.400852918624878, "learning_rate": 0.0008690557451649602, "loss": 6.5612, "step": 1151 }, { "epoch": 0.3931740614334471, "grad_norm": 3.451266050338745, "learning_rate": 0.0008689419795221843, "loss": 6.5092, "step": 1152 }, { "epoch": 0.39351535836177476, "grad_norm": 4.814857006072998, "learning_rate": 0.0008688282138794084, "loss": 6.1531, "step": 1153 }, { "epoch": 0.3938566552901024, "grad_norm": 3.619635820388794, "learning_rate": 0.0008687144482366326, "loss": 6.4214, "step": 1154 }, { "epoch": 0.39419795221843, "grad_norm": 2.678661346435547, "learning_rate": 0.0008686006825938567, "loss": 3.9171, "step": 1155 }, { "epoch": 0.3945392491467577, "grad_norm": 4.169159889221191, "learning_rate": 0.0008684869169510808, "loss": 6.2438, "step": 1156 }, { "epoch": 0.3948805460750853, "grad_norm": 3.618849515914917, "learning_rate": 0.0008683731513083049, "loss": 6.7297, "step": 1157 }, { "epoch": 0.39522184300341295, "grad_norm": 5.10459566116333, "learning_rate": 0.0008682593856655289, "loss": 6.2313, "step": 1158 }, { "epoch": 0.39556313993174064, "grad_norm": 3.637228012084961, "learning_rate": 0.0008681456200227532, "loss": 6.6383, "step": 1159 }, { "epoch": 0.39590443686006827, "grad_norm": 3.539729356765747, "learning_rate": 0.0008680318543799773, "loss": 6.6497, "step": 1160 }, { "epoch": 0.3962457337883959, "grad_norm": 3.3988003730773926, "learning_rate": 0.0008679180887372014, "loss": 6.2502, "step": 1161 }, { "epoch": 0.3965870307167236, "grad_norm": 3.7295820713043213, "learning_rate": 0.0008678043230944255, "loss": 6.354, "step": 1162 }, { "epoch": 0.3969283276450512, "grad_norm": 3.539332151412964, "learning_rate": 0.0008676905574516496, "loss": 7.1846, "step": 1163 }, { "epoch": 0.39726962457337883, "grad_norm": 3.5238754749298096, "learning_rate": 0.0008675767918088737, "loss": 6.2985, "step": 1164 }, { "epoch": 0.39761092150170646, "grad_norm": 7.77903938293457, "learning_rate": 0.0008674630261660979, "loss": 4.739, "step": 1165 }, { "epoch": 0.39795221843003414, "grad_norm": 3.561558485031128, "learning_rate": 0.000867349260523322, "loss": 6.7075, "step": 1166 }, { "epoch": 0.39829351535836177, "grad_norm": 3.6842966079711914, "learning_rate": 0.0008672354948805461, "loss": 6.934, "step": 1167 }, { "epoch": 0.3986348122866894, "grad_norm": 3.6015913486480713, "learning_rate": 0.0008671217292377702, "loss": 7.0431, "step": 1168 }, { "epoch": 0.3989761092150171, "grad_norm": 3.3261101245880127, "learning_rate": 0.0008670079635949943, "loss": 6.5901, "step": 1169 }, { "epoch": 0.3993174061433447, "grad_norm": 3.6874048709869385, "learning_rate": 0.0008668941979522184, "loss": 6.8816, "step": 1170 }, { "epoch": 0.39965870307167234, "grad_norm": 3.367588996887207, "learning_rate": 0.0008667804323094426, "loss": 6.8432, "step": 1171 }, { "epoch": 0.4, "grad_norm": 3.2957537174224854, "learning_rate": 0.0008666666666666667, "loss": 6.7904, "step": 1172 }, { "epoch": 0.40034129692832765, "grad_norm": 3.3574087619781494, "learning_rate": 0.0008665529010238908, "loss": 6.7804, "step": 1173 }, { "epoch": 0.4006825938566553, "grad_norm": 3.550178050994873, "learning_rate": 0.0008664391353811149, "loss": 6.5741, "step": 1174 }, { "epoch": 0.40102389078498296, "grad_norm": 3.4762377738952637, "learning_rate": 0.000866325369738339, "loss": 7.1227, "step": 1175 }, { "epoch": 0.4013651877133106, "grad_norm": 3.523291826248169, "learning_rate": 0.0008662116040955633, "loss": 6.8638, "step": 1176 }, { "epoch": 0.4017064846416382, "grad_norm": 3.267287492752075, "learning_rate": 0.0008660978384527873, "loss": 6.6402, "step": 1177 }, { "epoch": 0.40204778156996585, "grad_norm": 3.369250774383545, "learning_rate": 0.0008659840728100114, "loss": 6.683, "step": 1178 }, { "epoch": 0.40238907849829353, "grad_norm": 3.4328765869140625, "learning_rate": 0.0008658703071672355, "loss": 6.6393, "step": 1179 }, { "epoch": 0.40273037542662116, "grad_norm": 3.6571414470672607, "learning_rate": 0.0008657565415244596, "loss": 6.5947, "step": 1180 }, { "epoch": 0.4030716723549488, "grad_norm": 3.311383008956909, "learning_rate": 0.0008656427758816837, "loss": 6.7028, "step": 1181 }, { "epoch": 0.40341296928327647, "grad_norm": 3.5350563526153564, "learning_rate": 0.0008655290102389079, "loss": 6.2399, "step": 1182 }, { "epoch": 0.4037542662116041, "grad_norm": 3.58445143699646, "learning_rate": 0.000865415244596132, "loss": 6.4364, "step": 1183 }, { "epoch": 0.4040955631399317, "grad_norm": 3.439987897872925, "learning_rate": 0.0008653014789533561, "loss": 6.5951, "step": 1184 }, { "epoch": 0.4044368600682594, "grad_norm": 8.74139404296875, "learning_rate": 0.0008651877133105802, "loss": 6.3451, "step": 1185 }, { "epoch": 0.40477815699658704, "grad_norm": 3.636200428009033, "learning_rate": 0.0008650739476678043, "loss": 7.1951, "step": 1186 }, { "epoch": 0.40511945392491466, "grad_norm": 3.539865493774414, "learning_rate": 0.0008649601820250284, "loss": 7.0252, "step": 1187 }, { "epoch": 0.40546075085324235, "grad_norm": 3.5502421855926514, "learning_rate": 0.0008648464163822526, "loss": 6.9277, "step": 1188 }, { "epoch": 0.40580204778157, "grad_norm": 6.065408706665039, "learning_rate": 0.0008647326507394767, "loss": 6.2756, "step": 1189 }, { "epoch": 0.4061433447098976, "grad_norm": 3.5880606174468994, "learning_rate": 0.0008646188850967008, "loss": 6.609, "step": 1190 }, { "epoch": 0.40648464163822523, "grad_norm": 3.5269694328308105, "learning_rate": 0.000864505119453925, "loss": 6.6192, "step": 1191 }, { "epoch": 0.4068259385665529, "grad_norm": 3.216411828994751, "learning_rate": 0.000864391353811149, "loss": 6.6329, "step": 1192 }, { "epoch": 0.40716723549488054, "grad_norm": 3.540984630584717, "learning_rate": 0.0008642775881683732, "loss": 6.8023, "step": 1193 }, { "epoch": 0.40750853242320817, "grad_norm": 3.430344581604004, "learning_rate": 0.0008641638225255974, "loss": 6.4494, "step": 1194 }, { "epoch": 0.40784982935153585, "grad_norm": 3.64399790763855, "learning_rate": 0.0008640500568828215, "loss": 6.0932, "step": 1195 }, { "epoch": 0.4081911262798635, "grad_norm": 3.293858528137207, "learning_rate": 0.0008639362912400455, "loss": 6.4266, "step": 1196 }, { "epoch": 0.4085324232081911, "grad_norm": 3.498997449874878, "learning_rate": 0.0008638225255972696, "loss": 6.4859, "step": 1197 }, { "epoch": 0.4088737201365188, "grad_norm": 3.547935724258423, "learning_rate": 0.0008637087599544937, "loss": 7.0903, "step": 1198 }, { "epoch": 0.4092150170648464, "grad_norm": 3.3775041103363037, "learning_rate": 0.0008635949943117179, "loss": 6.6445, "step": 1199 }, { "epoch": 0.40955631399317405, "grad_norm": 3.678861379623413, "learning_rate": 0.000863481228668942, "loss": 6.3223, "step": 1200 }, { "epoch": 0.40989761092150173, "grad_norm": 5.478918075561523, "learning_rate": 0.0008633674630261661, "loss": 6.726, "step": 1201 }, { "epoch": 0.41023890784982936, "grad_norm": 5.6262407302856445, "learning_rate": 0.0008632536973833902, "loss": 6.1447, "step": 1202 }, { "epoch": 0.410580204778157, "grad_norm": 3.8595380783081055, "learning_rate": 0.0008631399317406143, "loss": 6.7759, "step": 1203 }, { "epoch": 0.4109215017064846, "grad_norm": 3.590745449066162, "learning_rate": 0.0008630261660978384, "loss": 6.6065, "step": 1204 }, { "epoch": 0.4112627986348123, "grad_norm": 3.531480550765991, "learning_rate": 0.0008629124004550626, "loss": 6.3746, "step": 1205 }, { "epoch": 0.4116040955631399, "grad_norm": 3.606606960296631, "learning_rate": 0.0008627986348122867, "loss": 6.3677, "step": 1206 }, { "epoch": 0.41194539249146755, "grad_norm": 3.5986878871917725, "learning_rate": 0.0008626848691695108, "loss": 6.2718, "step": 1207 }, { "epoch": 0.41228668941979524, "grad_norm": 3.399188995361328, "learning_rate": 0.000862571103526735, "loss": 6.8167, "step": 1208 }, { "epoch": 0.41262798634812287, "grad_norm": 3.2899904251098633, "learning_rate": 0.000862457337883959, "loss": 7.0081, "step": 1209 }, { "epoch": 0.4129692832764505, "grad_norm": 3.522502899169922, "learning_rate": 0.0008623435722411832, "loss": 6.6561, "step": 1210 }, { "epoch": 0.4133105802047782, "grad_norm": 3.5413362979888916, "learning_rate": 0.0008622298065984074, "loss": 6.8759, "step": 1211 }, { "epoch": 0.4136518771331058, "grad_norm": 3.4126598834991455, "learning_rate": 0.0008621160409556315, "loss": 7.0551, "step": 1212 }, { "epoch": 0.41399317406143343, "grad_norm": 3.658048391342163, "learning_rate": 0.0008620022753128556, "loss": 6.7476, "step": 1213 }, { "epoch": 0.4143344709897611, "grad_norm": 3.4803097248077393, "learning_rate": 0.0008618885096700797, "loss": 6.717, "step": 1214 }, { "epoch": 0.41467576791808874, "grad_norm": 3.52303147315979, "learning_rate": 0.0008617747440273038, "loss": 6.6623, "step": 1215 }, { "epoch": 0.4150170648464164, "grad_norm": 3.362966537475586, "learning_rate": 0.0008616609783845278, "loss": 6.8483, "step": 1216 }, { "epoch": 0.415358361774744, "grad_norm": 4.339871883392334, "learning_rate": 0.000861547212741752, "loss": 6.3404, "step": 1217 }, { "epoch": 0.4156996587030717, "grad_norm": 3.4630043506622314, "learning_rate": 0.0008614334470989761, "loss": 7.0581, "step": 1218 }, { "epoch": 0.4160409556313993, "grad_norm": 4.27215576171875, "learning_rate": 0.0008613196814562002, "loss": 6.5148, "step": 1219 }, { "epoch": 0.41638225255972694, "grad_norm": 3.6531379222869873, "learning_rate": 0.0008612059158134243, "loss": 6.1321, "step": 1220 }, { "epoch": 0.4167235494880546, "grad_norm": 3.4010026454925537, "learning_rate": 0.0008610921501706484, "loss": 6.827, "step": 1221 }, { "epoch": 0.41706484641638225, "grad_norm": 3.467449903488159, "learning_rate": 0.0008609783845278726, "loss": 7.0066, "step": 1222 }, { "epoch": 0.4174061433447099, "grad_norm": 3.3873372077941895, "learning_rate": 0.0008608646188850967, "loss": 6.1582, "step": 1223 }, { "epoch": 0.41774744027303756, "grad_norm": 3.483137607574463, "learning_rate": 0.0008607508532423208, "loss": 6.3962, "step": 1224 }, { "epoch": 0.4180887372013652, "grad_norm": 3.4427895545959473, "learning_rate": 0.000860637087599545, "loss": 6.5905, "step": 1225 }, { "epoch": 0.4184300341296928, "grad_norm": 3.545475721359253, "learning_rate": 0.000860523321956769, "loss": 7.2328, "step": 1226 }, { "epoch": 0.4187713310580205, "grad_norm": 3.3596596717834473, "learning_rate": 0.0008604095563139932, "loss": 6.6329, "step": 1227 }, { "epoch": 0.41911262798634813, "grad_norm": 3.347614049911499, "learning_rate": 0.0008602957906712174, "loss": 6.6236, "step": 1228 }, { "epoch": 0.41945392491467576, "grad_norm": 3.284127950668335, "learning_rate": 0.0008601820250284415, "loss": 6.7784, "step": 1229 }, { "epoch": 0.4197952218430034, "grad_norm": 3.3968138694763184, "learning_rate": 0.0008600682593856656, "loss": 6.8942, "step": 1230 }, { "epoch": 0.42013651877133107, "grad_norm": 3.375905752182007, "learning_rate": 0.0008599544937428897, "loss": 6.6425, "step": 1231 }, { "epoch": 0.4204778156996587, "grad_norm": 3.409838914871216, "learning_rate": 0.0008598407281001138, "loss": 6.6804, "step": 1232 }, { "epoch": 0.4208191126279863, "grad_norm": 4.324724197387695, "learning_rate": 0.0008597269624573379, "loss": 6.1592, "step": 1233 }, { "epoch": 0.421160409556314, "grad_norm": 5.707208156585693, "learning_rate": 0.0008596131968145621, "loss": 5.9943, "step": 1234 }, { "epoch": 0.42150170648464164, "grad_norm": 3.6807117462158203, "learning_rate": 0.0008594994311717861, "loss": 6.7764, "step": 1235 }, { "epoch": 0.42184300341296926, "grad_norm": 3.467085361480713, "learning_rate": 0.0008593856655290102, "loss": 6.5814, "step": 1236 }, { "epoch": 0.42218430034129695, "grad_norm": 3.4565463066101074, "learning_rate": 0.0008592718998862343, "loss": 6.9017, "step": 1237 }, { "epoch": 0.4225255972696246, "grad_norm": 4.344884395599365, "learning_rate": 0.0008591581342434584, "loss": 6.5987, "step": 1238 }, { "epoch": 0.4228668941979522, "grad_norm": 4.411989212036133, "learning_rate": 0.0008590443686006826, "loss": 6.741, "step": 1239 }, { "epoch": 0.4232081911262799, "grad_norm": 3.775130271911621, "learning_rate": 0.0008589306029579067, "loss": 6.3065, "step": 1240 }, { "epoch": 0.4235494880546075, "grad_norm": 3.5393505096435547, "learning_rate": 0.0008588168373151308, "loss": 6.9163, "step": 1241 }, { "epoch": 0.42389078498293514, "grad_norm": 3.7094571590423584, "learning_rate": 0.000858703071672355, "loss": 5.9449, "step": 1242 }, { "epoch": 0.4242320819112628, "grad_norm": 3.3689160346984863, "learning_rate": 0.000858589306029579, "loss": 6.5366, "step": 1243 }, { "epoch": 0.42457337883959045, "grad_norm": 4.066009521484375, "learning_rate": 0.0008584755403868032, "loss": 6.2055, "step": 1244 }, { "epoch": 0.4249146757679181, "grad_norm": 3.611774444580078, "learning_rate": 0.0008583617747440274, "loss": 6.6451, "step": 1245 }, { "epoch": 0.4252559726962457, "grad_norm": 3.587705373764038, "learning_rate": 0.0008582480091012515, "loss": 6.6259, "step": 1246 }, { "epoch": 0.4255972696245734, "grad_norm": 3.442603349685669, "learning_rate": 0.0008581342434584756, "loss": 6.8989, "step": 1247 }, { "epoch": 0.425938566552901, "grad_norm": 4.580451488494873, "learning_rate": 0.0008580204778156997, "loss": 6.2377, "step": 1248 }, { "epoch": 0.42627986348122865, "grad_norm": 3.4543490409851074, "learning_rate": 0.0008579067121729238, "loss": 6.2214, "step": 1249 }, { "epoch": 0.42662116040955633, "grad_norm": 5.877060890197754, "learning_rate": 0.0008577929465301479, "loss": 5.4147, "step": 1250 }, { "epoch": 0.42696245733788396, "grad_norm": 4.635700225830078, "learning_rate": 0.0008576791808873721, "loss": 6.1787, "step": 1251 }, { "epoch": 0.4273037542662116, "grad_norm": 3.6666946411132812, "learning_rate": 0.0008575654152445962, "loss": 7.006, "step": 1252 }, { "epoch": 0.42764505119453927, "grad_norm": 3.5804553031921387, "learning_rate": 0.0008574516496018203, "loss": 6.8813, "step": 1253 }, { "epoch": 0.4279863481228669, "grad_norm": 3.4612207412719727, "learning_rate": 0.0008573378839590444, "loss": 6.6291, "step": 1254 }, { "epoch": 0.4283276450511945, "grad_norm": 3.6497762203216553, "learning_rate": 0.0008572241183162684, "loss": 6.9736, "step": 1255 }, { "epoch": 0.4286689419795222, "grad_norm": 3.296250104904175, "learning_rate": 0.0008571103526734925, "loss": 6.5129, "step": 1256 }, { "epoch": 0.42901023890784984, "grad_norm": 3.4279532432556152, "learning_rate": 0.0008569965870307167, "loss": 6.4369, "step": 1257 }, { "epoch": 0.42935153583617747, "grad_norm": 3.448242664337158, "learning_rate": 0.0008568828213879408, "loss": 6.6171, "step": 1258 }, { "epoch": 0.4296928327645051, "grad_norm": 3.353353977203369, "learning_rate": 0.000856769055745165, "loss": 6.5279, "step": 1259 }, { "epoch": 0.4300341296928328, "grad_norm": 3.488964319229126, "learning_rate": 0.0008566552901023891, "loss": 6.8257, "step": 1260 }, { "epoch": 0.4303754266211604, "grad_norm": 3.5172197818756104, "learning_rate": 0.0008565415244596132, "loss": 6.1531, "step": 1261 }, { "epoch": 0.43071672354948803, "grad_norm": 3.3878061771392822, "learning_rate": 0.0008564277588168374, "loss": 6.8535, "step": 1262 }, { "epoch": 0.4310580204778157, "grad_norm": 3.500743865966797, "learning_rate": 0.0008563139931740615, "loss": 6.9892, "step": 1263 }, { "epoch": 0.43139931740614335, "grad_norm": 7.8660736083984375, "learning_rate": 0.0008562002275312856, "loss": 5.9798, "step": 1264 }, { "epoch": 0.431740614334471, "grad_norm": 3.737638473510742, "learning_rate": 0.0008560864618885097, "loss": 6.8881, "step": 1265 }, { "epoch": 0.43208191126279866, "grad_norm": 3.8500828742980957, "learning_rate": 0.0008559726962457338, "loss": 6.8304, "step": 1266 }, { "epoch": 0.4324232081911263, "grad_norm": 3.5614750385284424, "learning_rate": 0.0008558589306029579, "loss": 6.4568, "step": 1267 }, { "epoch": 0.4327645051194539, "grad_norm": 3.2859628200531006, "learning_rate": 0.0008557451649601821, "loss": 6.977, "step": 1268 }, { "epoch": 0.4331058020477816, "grad_norm": 3.4138572216033936, "learning_rate": 0.0008556313993174062, "loss": 5.7698, "step": 1269 }, { "epoch": 0.4334470989761092, "grad_norm": 3.3377716541290283, "learning_rate": 0.0008555176336746303, "loss": 6.5715, "step": 1270 }, { "epoch": 0.43378839590443685, "grad_norm": 3.5361649990081787, "learning_rate": 0.0008554038680318544, "loss": 6.546, "step": 1271 }, { "epoch": 0.4341296928327645, "grad_norm": 3.4464402198791504, "learning_rate": 0.0008552901023890785, "loss": 6.2318, "step": 1272 }, { "epoch": 0.43447098976109216, "grad_norm": 3.289402484893799, "learning_rate": 0.0008551763367463026, "loss": 6.7913, "step": 1273 }, { "epoch": 0.4348122866894198, "grad_norm": 3.665785312652588, "learning_rate": 0.0008550625711035267, "loss": 6.6228, "step": 1274 }, { "epoch": 0.4351535836177474, "grad_norm": 5.519403457641602, "learning_rate": 0.0008549488054607508, "loss": 6.0677, "step": 1275 }, { "epoch": 0.4354948805460751, "grad_norm": 3.639775276184082, "learning_rate": 0.000854835039817975, "loss": 6.8561, "step": 1276 }, { "epoch": 0.43583617747440273, "grad_norm": 3.529599905014038, "learning_rate": 0.0008547212741751991, "loss": 6.6314, "step": 1277 }, { "epoch": 0.43617747440273036, "grad_norm": 3.8909149169921875, "learning_rate": 0.0008546075085324232, "loss": 6.7449, "step": 1278 }, { "epoch": 0.43651877133105804, "grad_norm": 4.182558059692383, "learning_rate": 0.0008544937428896473, "loss": 6.1402, "step": 1279 }, { "epoch": 0.43686006825938567, "grad_norm": 3.6773123741149902, "learning_rate": 0.0008543799772468715, "loss": 6.5567, "step": 1280 }, { "epoch": 0.4372013651877133, "grad_norm": 3.5389983654022217, "learning_rate": 0.0008542662116040956, "loss": 6.847, "step": 1281 }, { "epoch": 0.437542662116041, "grad_norm": 3.5662002563476562, "learning_rate": 0.0008541524459613197, "loss": 6.242, "step": 1282 }, { "epoch": 0.4378839590443686, "grad_norm": 3.623596668243408, "learning_rate": 0.0008540386803185438, "loss": 6.4278, "step": 1283 }, { "epoch": 0.43822525597269624, "grad_norm": 3.6582117080688477, "learning_rate": 0.0008539249146757679, "loss": 6.266, "step": 1284 }, { "epoch": 0.43856655290102387, "grad_norm": 5.415266990661621, "learning_rate": 0.0008538111490329921, "loss": 4.8563, "step": 1285 }, { "epoch": 0.43890784982935155, "grad_norm": 3.88291335105896, "learning_rate": 0.0008536973833902162, "loss": 7.0351, "step": 1286 }, { "epoch": 0.4392491467576792, "grad_norm": 3.7798752784729004, "learning_rate": 0.0008535836177474403, "loss": 6.9712, "step": 1287 }, { "epoch": 0.4395904436860068, "grad_norm": 4.600677490234375, "learning_rate": 0.0008534698521046644, "loss": 6.4982, "step": 1288 }, { "epoch": 0.4399317406143345, "grad_norm": 3.6498169898986816, "learning_rate": 0.0008533560864618885, "loss": 6.7925, "step": 1289 }, { "epoch": 0.4402730375426621, "grad_norm": 3.712674617767334, "learning_rate": 0.0008532423208191126, "loss": 6.5274, "step": 1290 }, { "epoch": 0.44061433447098974, "grad_norm": 3.2611215114593506, "learning_rate": 0.0008531285551763368, "loss": 6.954, "step": 1291 }, { "epoch": 0.4409556313993174, "grad_norm": 3.1427042484283447, "learning_rate": 0.000853014789533561, "loss": 6.6816, "step": 1292 }, { "epoch": 0.44129692832764505, "grad_norm": 3.4534733295440674, "learning_rate": 0.0008529010238907851, "loss": 6.4714, "step": 1293 }, { "epoch": 0.4416382252559727, "grad_norm": 3.25132155418396, "learning_rate": 0.0008527872582480091, "loss": 6.3441, "step": 1294 }, { "epoch": 0.44197952218430037, "grad_norm": 3.3785088062286377, "learning_rate": 0.0008526734926052332, "loss": 6.8156, "step": 1295 }, { "epoch": 0.442320819112628, "grad_norm": 3.79790997505188, "learning_rate": 0.0008525597269624573, "loss": 6.3941, "step": 1296 }, { "epoch": 0.4426621160409556, "grad_norm": 10.423567771911621, "learning_rate": 0.0008524459613196815, "loss": 5.7454, "step": 1297 }, { "epoch": 0.44300341296928325, "grad_norm": 6.871647357940674, "learning_rate": 0.0008523321956769056, "loss": 6.844, "step": 1298 }, { "epoch": 0.44334470989761093, "grad_norm": 4.372817039489746, "learning_rate": 0.0008522184300341297, "loss": 6.4617, "step": 1299 }, { "epoch": 0.44368600682593856, "grad_norm": 3.725336790084839, "learning_rate": 0.0008521046643913538, "loss": 6.9746, "step": 1300 }, { "epoch": 0.4440273037542662, "grad_norm": 3.9999923706054688, "learning_rate": 0.0008519908987485779, "loss": 6.1832, "step": 1301 }, { "epoch": 0.4443686006825939, "grad_norm": 8.845232963562012, "learning_rate": 0.0008518771331058021, "loss": 4.6078, "step": 1302 }, { "epoch": 0.4447098976109215, "grad_norm": 3.548273801803589, "learning_rate": 0.0008517633674630262, "loss": 7.1117, "step": 1303 }, { "epoch": 0.44505119453924913, "grad_norm": 3.508362054824829, "learning_rate": 0.0008516496018202503, "loss": 7.0278, "step": 1304 }, { "epoch": 0.4453924914675768, "grad_norm": 5.351772785186768, "learning_rate": 0.0008515358361774744, "loss": 6.3568, "step": 1305 }, { "epoch": 0.44573378839590444, "grad_norm": 3.509801149368286, "learning_rate": 0.0008514220705346985, "loss": 6.6052, "step": 1306 }, { "epoch": 0.44607508532423207, "grad_norm": 3.358114719390869, "learning_rate": 0.0008513083048919226, "loss": 6.3912, "step": 1307 }, { "epoch": 0.44641638225255975, "grad_norm": 5.949875354766846, "learning_rate": 0.0008511945392491469, "loss": 6.5835, "step": 1308 }, { "epoch": 0.4467576791808874, "grad_norm": 3.4979476928710938, "learning_rate": 0.000851080773606371, "loss": 6.1771, "step": 1309 }, { "epoch": 0.447098976109215, "grad_norm": 3.5438613891601562, "learning_rate": 0.0008509670079635951, "loss": 6.2541, "step": 1310 }, { "epoch": 0.44744027303754264, "grad_norm": 3.3947629928588867, "learning_rate": 0.0008508532423208192, "loss": 6.2373, "step": 1311 }, { "epoch": 0.4477815699658703, "grad_norm": 3.5880777835845947, "learning_rate": 0.0008507394766780433, "loss": 6.3093, "step": 1312 }, { "epoch": 0.44812286689419795, "grad_norm": 3.6189653873443604, "learning_rate": 0.0008506257110352673, "loss": 6.1852, "step": 1313 }, { "epoch": 0.4484641638225256, "grad_norm": 3.5925283432006836, "learning_rate": 0.0008505119453924915, "loss": 6.7835, "step": 1314 }, { "epoch": 0.44880546075085326, "grad_norm": 3.7551400661468506, "learning_rate": 0.0008503981797497156, "loss": 6.3399, "step": 1315 }, { "epoch": 0.4491467576791809, "grad_norm": 3.489022731781006, "learning_rate": 0.0008502844141069397, "loss": 6.8905, "step": 1316 }, { "epoch": 0.4494880546075085, "grad_norm": 3.6786651611328125, "learning_rate": 0.0008501706484641638, "loss": 6.74, "step": 1317 }, { "epoch": 0.4498293515358362, "grad_norm": 3.3341808319091797, "learning_rate": 0.0008500568828213879, "loss": 6.4478, "step": 1318 }, { "epoch": 0.4501706484641638, "grad_norm": 8.599390983581543, "learning_rate": 0.000849943117178612, "loss": 6.5547, "step": 1319 }, { "epoch": 0.45051194539249145, "grad_norm": 4.72025728225708, "learning_rate": 0.0008498293515358362, "loss": 6.7761, "step": 1320 }, { "epoch": 0.45085324232081914, "grad_norm": 3.8403704166412354, "learning_rate": 0.0008497155858930603, "loss": 6.6878, "step": 1321 }, { "epoch": 0.45119453924914676, "grad_norm": 3.509477376937866, "learning_rate": 0.0008496018202502844, "loss": 7.0436, "step": 1322 }, { "epoch": 0.4515358361774744, "grad_norm": 3.821585178375244, "learning_rate": 0.0008494880546075085, "loss": 6.573, "step": 1323 }, { "epoch": 0.451877133105802, "grad_norm": 3.739370584487915, "learning_rate": 0.0008493742889647326, "loss": 6.3931, "step": 1324 }, { "epoch": 0.4522184300341297, "grad_norm": 3.2959489822387695, "learning_rate": 0.0008492605233219569, "loss": 6.8188, "step": 1325 }, { "epoch": 0.45255972696245733, "grad_norm": 3.6209750175476074, "learning_rate": 0.000849146757679181, "loss": 6.6649, "step": 1326 }, { "epoch": 0.45290102389078496, "grad_norm": 3.411741018295288, "learning_rate": 0.0008490329920364051, "loss": 6.2213, "step": 1327 }, { "epoch": 0.45324232081911264, "grad_norm": 3.494786262512207, "learning_rate": 0.0008489192263936292, "loss": 6.9988, "step": 1328 }, { "epoch": 0.45358361774744027, "grad_norm": 3.3790512084960938, "learning_rate": 0.0008488054607508533, "loss": 6.7258, "step": 1329 }, { "epoch": 0.4539249146757679, "grad_norm": 3.412260055541992, "learning_rate": 0.0008486916951080774, "loss": 6.8724, "step": 1330 }, { "epoch": 0.4542662116040956, "grad_norm": 3.655362844467163, "learning_rate": 0.0008485779294653016, "loss": 6.644, "step": 1331 }, { "epoch": 0.4546075085324232, "grad_norm": 3.687267780303955, "learning_rate": 0.0008484641638225257, "loss": 6.2437, "step": 1332 }, { "epoch": 0.45494880546075084, "grad_norm": 3.81823468208313, "learning_rate": 0.0008483503981797497, "loss": 6.98, "step": 1333 }, { "epoch": 0.4552901023890785, "grad_norm": 4.4424967765808105, "learning_rate": 0.0008482366325369738, "loss": 6.6895, "step": 1334 }, { "epoch": 0.45563139931740615, "grad_norm": 3.6372294425964355, "learning_rate": 0.0008481228668941979, "loss": 6.4528, "step": 1335 }, { "epoch": 0.4559726962457338, "grad_norm": 4.353541851043701, "learning_rate": 0.000848009101251422, "loss": 6.8672, "step": 1336 }, { "epoch": 0.45631399317406146, "grad_norm": 3.73286509513855, "learning_rate": 0.0008478953356086462, "loss": 6.2034, "step": 1337 }, { "epoch": 0.4566552901023891, "grad_norm": 3.300739049911499, "learning_rate": 0.0008477815699658703, "loss": 6.6602, "step": 1338 }, { "epoch": 0.4569965870307167, "grad_norm": 3.4340107440948486, "learning_rate": 0.0008476678043230944, "loss": 6.3554, "step": 1339 }, { "epoch": 0.45733788395904434, "grad_norm": 3.3781628608703613, "learning_rate": 0.0008475540386803185, "loss": 6.4238, "step": 1340 }, { "epoch": 0.45767918088737203, "grad_norm": 3.5280208587646484, "learning_rate": 0.0008474402730375426, "loss": 6.8517, "step": 1341 }, { "epoch": 0.45802047781569966, "grad_norm": 3.4973056316375732, "learning_rate": 0.0008473265073947669, "loss": 6.3462, "step": 1342 }, { "epoch": 0.4583617747440273, "grad_norm": 3.7946386337280273, "learning_rate": 0.000847212741751991, "loss": 6.7902, "step": 1343 }, { "epoch": 0.45870307167235497, "grad_norm": 3.5301756858825684, "learning_rate": 0.0008470989761092151, "loss": 6.9557, "step": 1344 }, { "epoch": 0.4590443686006826, "grad_norm": 3.576582431793213, "learning_rate": 0.0008469852104664392, "loss": 7.1152, "step": 1345 }, { "epoch": 0.4593856655290102, "grad_norm": 3.645031452178955, "learning_rate": 0.0008468714448236633, "loss": 6.5057, "step": 1346 }, { "epoch": 0.4597269624573379, "grad_norm": 3.4032137393951416, "learning_rate": 0.0008467576791808874, "loss": 6.8364, "step": 1347 }, { "epoch": 0.46006825938566553, "grad_norm": 3.5544800758361816, "learning_rate": 0.0008466439135381116, "loss": 6.8789, "step": 1348 }, { "epoch": 0.46040955631399316, "grad_norm": 3.475367784500122, "learning_rate": 0.0008465301478953357, "loss": 6.9583, "step": 1349 }, { "epoch": 0.46075085324232085, "grad_norm": 3.5934853553771973, "learning_rate": 0.0008464163822525598, "loss": 7.1558, "step": 1350 }, { "epoch": 0.4610921501706485, "grad_norm": 3.3591737747192383, "learning_rate": 0.0008463026166097839, "loss": 6.709, "step": 1351 }, { "epoch": 0.4614334470989761, "grad_norm": 3.4733426570892334, "learning_rate": 0.0008461888509670079, "loss": 6.7051, "step": 1352 }, { "epoch": 0.46177474402730373, "grad_norm": 3.335206985473633, "learning_rate": 0.000846075085324232, "loss": 6.4163, "step": 1353 }, { "epoch": 0.4621160409556314, "grad_norm": 3.442255973815918, "learning_rate": 0.0008459613196814562, "loss": 6.4429, "step": 1354 }, { "epoch": 0.46245733788395904, "grad_norm": 3.5777900218963623, "learning_rate": 0.0008458475540386803, "loss": 6.9031, "step": 1355 }, { "epoch": 0.46279863481228667, "grad_norm": 3.4293618202209473, "learning_rate": 0.0008457337883959044, "loss": 6.5918, "step": 1356 }, { "epoch": 0.46313993174061435, "grad_norm": 3.3813259601593018, "learning_rate": 0.0008456200227531285, "loss": 6.4456, "step": 1357 }, { "epoch": 0.463481228668942, "grad_norm": 4.471978664398193, "learning_rate": 0.0008455062571103526, "loss": 5.7945, "step": 1358 }, { "epoch": 0.4638225255972696, "grad_norm": 4.107702255249023, "learning_rate": 0.0008453924914675767, "loss": 6.4136, "step": 1359 }, { "epoch": 0.4641638225255973, "grad_norm": 3.807112216949463, "learning_rate": 0.000845278725824801, "loss": 6.8553, "step": 1360 }, { "epoch": 0.4645051194539249, "grad_norm": 3.7887256145477295, "learning_rate": 0.0008451649601820251, "loss": 6.4577, "step": 1361 }, { "epoch": 0.46484641638225255, "grad_norm": 3.356083393096924, "learning_rate": 0.0008450511945392492, "loss": 6.4253, "step": 1362 }, { "epoch": 0.46518771331058023, "grad_norm": 3.5079097747802734, "learning_rate": 0.0008449374288964733, "loss": 6.9637, "step": 1363 }, { "epoch": 0.46552901023890786, "grad_norm": 3.414142370223999, "learning_rate": 0.0008448236632536974, "loss": 6.5443, "step": 1364 }, { "epoch": 0.4658703071672355, "grad_norm": 7.6356587409973145, "learning_rate": 0.0008447098976109216, "loss": 6.1728, "step": 1365 }, { "epoch": 0.4662116040955631, "grad_norm": 3.9965884685516357, "learning_rate": 0.0008445961319681457, "loss": 6.5908, "step": 1366 }, { "epoch": 0.4665529010238908, "grad_norm": 3.6052424907684326, "learning_rate": 0.0008444823663253698, "loss": 6.8923, "step": 1367 }, { "epoch": 0.4668941979522184, "grad_norm": 3.6835129261016846, "learning_rate": 0.0008443686006825939, "loss": 6.4614, "step": 1368 }, { "epoch": 0.46723549488054605, "grad_norm": 3.5373635292053223, "learning_rate": 0.000844254835039818, "loss": 6.6617, "step": 1369 }, { "epoch": 0.46757679180887374, "grad_norm": 4.537879467010498, "learning_rate": 0.0008441410693970421, "loss": 5.4222, "step": 1370 }, { "epoch": 0.46791808873720137, "grad_norm": 3.356199026107788, "learning_rate": 0.0008440273037542663, "loss": 6.792, "step": 1371 }, { "epoch": 0.468259385665529, "grad_norm": 3.830232858657837, "learning_rate": 0.0008439135381114903, "loss": 6.6182, "step": 1372 }, { "epoch": 0.4686006825938567, "grad_norm": 3.4203712940216064, "learning_rate": 0.0008437997724687144, "loss": 6.5533, "step": 1373 }, { "epoch": 0.4689419795221843, "grad_norm": 3.5441665649414062, "learning_rate": 0.0008436860068259385, "loss": 6.5479, "step": 1374 }, { "epoch": 0.46928327645051193, "grad_norm": 3.4909310340881348, "learning_rate": 0.0008435722411831626, "loss": 6.1958, "step": 1375 }, { "epoch": 0.4696245733788396, "grad_norm": 3.6683506965637207, "learning_rate": 0.0008434584755403867, "loss": 6.6273, "step": 1376 }, { "epoch": 0.46996587030716724, "grad_norm": 4.192461013793945, "learning_rate": 0.000843344709897611, "loss": 6.0141, "step": 1377 }, { "epoch": 0.47030716723549487, "grad_norm": 3.5298233032226562, "learning_rate": 0.0008432309442548351, "loss": 6.9406, "step": 1378 }, { "epoch": 0.4706484641638225, "grad_norm": 3.549668550491333, "learning_rate": 0.0008431171786120592, "loss": 6.8979, "step": 1379 }, { "epoch": 0.4709897610921502, "grad_norm": 3.3617653846740723, "learning_rate": 0.0008430034129692833, "loss": 6.4427, "step": 1380 }, { "epoch": 0.4713310580204778, "grad_norm": 3.317580461502075, "learning_rate": 0.0008428896473265074, "loss": 7.0131, "step": 1381 }, { "epoch": 0.47167235494880544, "grad_norm": 3.481401205062866, "learning_rate": 0.0008427758816837315, "loss": 6.6665, "step": 1382 }, { "epoch": 0.4720136518771331, "grad_norm": 3.4528825283050537, "learning_rate": 0.0008426621160409557, "loss": 6.6673, "step": 1383 }, { "epoch": 0.47235494880546075, "grad_norm": 4.272742748260498, "learning_rate": 0.0008425483503981798, "loss": 5.9544, "step": 1384 }, { "epoch": 0.4726962457337884, "grad_norm": 3.5958762168884277, "learning_rate": 0.0008424345847554039, "loss": 6.8734, "step": 1385 }, { "epoch": 0.47303754266211606, "grad_norm": 3.564652681350708, "learning_rate": 0.000842320819112628, "loss": 6.846, "step": 1386 }, { "epoch": 0.4733788395904437, "grad_norm": 3.3910999298095703, "learning_rate": 0.0008422070534698521, "loss": 6.3894, "step": 1387 }, { "epoch": 0.4737201365187713, "grad_norm": 4.498713493347168, "learning_rate": 0.0008420932878270763, "loss": 6.7253, "step": 1388 }, { "epoch": 0.474061433447099, "grad_norm": 3.539781332015991, "learning_rate": 0.0008419795221843004, "loss": 7.1748, "step": 1389 }, { "epoch": 0.47440273037542663, "grad_norm": 3.6499907970428467, "learning_rate": 0.0008418657565415245, "loss": 7.0704, "step": 1390 }, { "epoch": 0.47474402730375426, "grad_norm": 3.4689838886260986, "learning_rate": 0.0008417519908987485, "loss": 6.6226, "step": 1391 }, { "epoch": 0.4750853242320819, "grad_norm": 3.3279478549957275, "learning_rate": 0.0008416382252559726, "loss": 6.6018, "step": 1392 }, { "epoch": 0.47542662116040957, "grad_norm": 3.3303651809692383, "learning_rate": 0.0008415244596131967, "loss": 6.6632, "step": 1393 }, { "epoch": 0.4757679180887372, "grad_norm": 3.5589680671691895, "learning_rate": 0.000841410693970421, "loss": 6.7845, "step": 1394 }, { "epoch": 0.4761092150170648, "grad_norm": 3.484956741333008, "learning_rate": 0.0008412969283276451, "loss": 6.8862, "step": 1395 }, { "epoch": 0.4764505119453925, "grad_norm": 4.03617525100708, "learning_rate": 0.0008411831626848692, "loss": 6.6096, "step": 1396 }, { "epoch": 0.47679180887372014, "grad_norm": 3.343886375427246, "learning_rate": 0.0008410693970420933, "loss": 6.322, "step": 1397 }, { "epoch": 0.47713310580204776, "grad_norm": 3.6128275394439697, "learning_rate": 0.0008409556313993174, "loss": 6.9015, "step": 1398 }, { "epoch": 0.47747440273037545, "grad_norm": 3.4812707901000977, "learning_rate": 0.0008408418657565415, "loss": 6.8565, "step": 1399 }, { "epoch": 0.4778156996587031, "grad_norm": 3.590444326400757, "learning_rate": 0.0008407281001137657, "loss": 6.534, "step": 1400 }, { "epoch": 0.4781569965870307, "grad_norm": 3.251206159591675, "learning_rate": 0.0008406143344709898, "loss": 6.7066, "step": 1401 }, { "epoch": 0.4784982935153584, "grad_norm": 3.3419792652130127, "learning_rate": 0.0008405005688282139, "loss": 6.9371, "step": 1402 }, { "epoch": 0.478839590443686, "grad_norm": 3.3254618644714355, "learning_rate": 0.000840386803185438, "loss": 6.8618, "step": 1403 }, { "epoch": 0.47918088737201364, "grad_norm": 5.28602409362793, "learning_rate": 0.0008402730375426621, "loss": 5.7656, "step": 1404 }, { "epoch": 0.47952218430034127, "grad_norm": 4.599923610687256, "learning_rate": 0.0008401592718998863, "loss": 5.8085, "step": 1405 }, { "epoch": 0.47986348122866895, "grad_norm": 3.606318712234497, "learning_rate": 0.0008400455062571104, "loss": 6.6817, "step": 1406 }, { "epoch": 0.4802047781569966, "grad_norm": 3.839956283569336, "learning_rate": 0.0008399317406143345, "loss": 7.2384, "step": 1407 }, { "epoch": 0.4805460750853242, "grad_norm": 3.5408711433410645, "learning_rate": 0.0008398179749715586, "loss": 6.7327, "step": 1408 }, { "epoch": 0.4808873720136519, "grad_norm": 3.496644973754883, "learning_rate": 0.0008397042093287828, "loss": 6.9381, "step": 1409 }, { "epoch": 0.4812286689419795, "grad_norm": 3.4285078048706055, "learning_rate": 0.0008395904436860067, "loss": 6.5183, "step": 1410 }, { "epoch": 0.48156996587030715, "grad_norm": 3.322774648666382, "learning_rate": 0.000839476678043231, "loss": 6.3513, "step": 1411 }, { "epoch": 0.48191126279863483, "grad_norm": 3.334423542022705, "learning_rate": 0.0008393629124004551, "loss": 6.9284, "step": 1412 }, { "epoch": 0.48225255972696246, "grad_norm": 3.4194326400756836, "learning_rate": 0.0008392491467576792, "loss": 7.0258, "step": 1413 }, { "epoch": 0.4825938566552901, "grad_norm": 5.742277145385742, "learning_rate": 0.0008391353811149033, "loss": 5.9465, "step": 1414 }, { "epoch": 0.48293515358361777, "grad_norm": 3.6702628135681152, "learning_rate": 0.0008390216154721274, "loss": 6.7509, "step": 1415 }, { "epoch": 0.4832764505119454, "grad_norm": 3.5624711513519287, "learning_rate": 0.0008389078498293515, "loss": 7.3361, "step": 1416 }, { "epoch": 0.483617747440273, "grad_norm": 3.3893463611602783, "learning_rate": 0.0008387940841865757, "loss": 6.5935, "step": 1417 }, { "epoch": 0.48395904436860065, "grad_norm": 4.454052448272705, "learning_rate": 0.0008386803185437998, "loss": 6.352, "step": 1418 }, { "epoch": 0.48430034129692834, "grad_norm": 3.6366562843322754, "learning_rate": 0.0008385665529010239, "loss": 6.7991, "step": 1419 }, { "epoch": 0.48464163822525597, "grad_norm": 3.4529809951782227, "learning_rate": 0.000838452787258248, "loss": 6.6057, "step": 1420 }, { "epoch": 0.4849829351535836, "grad_norm": 3.475712776184082, "learning_rate": 0.0008383390216154721, "loss": 5.9796, "step": 1421 }, { "epoch": 0.4853242320819113, "grad_norm": 3.537789821624756, "learning_rate": 0.0008382252559726962, "loss": 7.112, "step": 1422 }, { "epoch": 0.4856655290102389, "grad_norm": 3.504626750946045, "learning_rate": 0.0008381114903299204, "loss": 6.8114, "step": 1423 }, { "epoch": 0.48600682593856653, "grad_norm": 3.580801486968994, "learning_rate": 0.0008379977246871445, "loss": 6.4932, "step": 1424 }, { "epoch": 0.4863481228668942, "grad_norm": 3.4925484657287598, "learning_rate": 0.0008378839590443686, "loss": 6.7872, "step": 1425 }, { "epoch": 0.48668941979522184, "grad_norm": 3.814373254776001, "learning_rate": 0.0008377701934015928, "loss": 6.9086, "step": 1426 }, { "epoch": 0.4870307167235495, "grad_norm": 3.2810380458831787, "learning_rate": 0.0008376564277588169, "loss": 6.6771, "step": 1427 }, { "epoch": 0.48737201365187716, "grad_norm": 3.3674893379211426, "learning_rate": 0.0008375426621160411, "loss": 6.7597, "step": 1428 }, { "epoch": 0.4877133105802048, "grad_norm": 3.3947198390960693, "learning_rate": 0.0008374288964732652, "loss": 6.5867, "step": 1429 }, { "epoch": 0.4880546075085324, "grad_norm": 3.6930675506591797, "learning_rate": 0.0008373151308304892, "loss": 6.7218, "step": 1430 }, { "epoch": 0.4883959044368601, "grad_norm": 3.436976909637451, "learning_rate": 0.0008372013651877133, "loss": 6.906, "step": 1431 }, { "epoch": 0.4887372013651877, "grad_norm": 3.462581157684326, "learning_rate": 0.0008370875995449374, "loss": 6.7478, "step": 1432 }, { "epoch": 0.48907849829351535, "grad_norm": 3.483604907989502, "learning_rate": 0.0008369738339021615, "loss": 6.3906, "step": 1433 }, { "epoch": 0.489419795221843, "grad_norm": 3.3858816623687744, "learning_rate": 0.0008368600682593857, "loss": 6.3999, "step": 1434 }, { "epoch": 0.48976109215017066, "grad_norm": 3.541221857070923, "learning_rate": 0.0008367463026166098, "loss": 7.3143, "step": 1435 }, { "epoch": 0.4901023890784983, "grad_norm": 3.380033254623413, "learning_rate": 0.0008366325369738339, "loss": 6.7311, "step": 1436 }, { "epoch": 0.4904436860068259, "grad_norm": 3.423128604888916, "learning_rate": 0.000836518771331058, "loss": 6.5591, "step": 1437 }, { "epoch": 0.4907849829351536, "grad_norm": 3.3924520015716553, "learning_rate": 0.0008364050056882821, "loss": 6.8674, "step": 1438 }, { "epoch": 0.49112627986348123, "grad_norm": 3.7471725940704346, "learning_rate": 0.0008362912400455062, "loss": 6.6909, "step": 1439 }, { "epoch": 0.49146757679180886, "grad_norm": 3.535061836242676, "learning_rate": 0.0008361774744027304, "loss": 6.5537, "step": 1440 }, { "epoch": 0.49180887372013654, "grad_norm": 3.317924976348877, "learning_rate": 0.0008360637087599545, "loss": 6.6799, "step": 1441 }, { "epoch": 0.49215017064846417, "grad_norm": 5.021495342254639, "learning_rate": 0.0008359499431171786, "loss": 5.363, "step": 1442 }, { "epoch": 0.4924914675767918, "grad_norm": 3.5335588455200195, "learning_rate": 0.0008358361774744028, "loss": 6.8348, "step": 1443 }, { "epoch": 0.4928327645051195, "grad_norm": 3.5306732654571533, "learning_rate": 0.0008357224118316269, "loss": 6.6957, "step": 1444 }, { "epoch": 0.4931740614334471, "grad_norm": 3.4954781532287598, "learning_rate": 0.0008356086461888511, "loss": 6.8805, "step": 1445 }, { "epoch": 0.49351535836177474, "grad_norm": 3.4456214904785156, "learning_rate": 0.0008354948805460752, "loss": 6.6546, "step": 1446 }, { "epoch": 0.49385665529010236, "grad_norm": 3.323965311050415, "learning_rate": 0.0008353811149032993, "loss": 6.8501, "step": 1447 }, { "epoch": 0.49419795221843005, "grad_norm": 3.692003011703491, "learning_rate": 0.0008352673492605234, "loss": 6.282, "step": 1448 }, { "epoch": 0.4945392491467577, "grad_norm": 3.3409910202026367, "learning_rate": 0.0008351535836177474, "loss": 6.6912, "step": 1449 }, { "epoch": 0.4948805460750853, "grad_norm": 3.5133414268493652, "learning_rate": 0.0008350398179749715, "loss": 6.0178, "step": 1450 }, { "epoch": 0.495221843003413, "grad_norm": 4.341811656951904, "learning_rate": 0.0008349260523321957, "loss": 6.4182, "step": 1451 }, { "epoch": 0.4955631399317406, "grad_norm": 4.636251926422119, "learning_rate": 0.0008348122866894198, "loss": 6.824, "step": 1452 }, { "epoch": 0.49590443686006824, "grad_norm": 5.0915632247924805, "learning_rate": 0.0008346985210466439, "loss": 5.3702, "step": 1453 }, { "epoch": 0.4962457337883959, "grad_norm": 3.465383529663086, "learning_rate": 0.000834584755403868, "loss": 6.642, "step": 1454 }, { "epoch": 0.49658703071672355, "grad_norm": 3.462819814682007, "learning_rate": 0.0008344709897610921, "loss": 6.5499, "step": 1455 }, { "epoch": 0.4969283276450512, "grad_norm": 3.4404056072235107, "learning_rate": 0.0008343572241183162, "loss": 6.8499, "step": 1456 }, { "epoch": 0.49726962457337887, "grad_norm": 3.342381238937378, "learning_rate": 0.0008342434584755404, "loss": 6.5637, "step": 1457 }, { "epoch": 0.4976109215017065, "grad_norm": 3.599907875061035, "learning_rate": 0.0008341296928327645, "loss": 6.5076, "step": 1458 }, { "epoch": 0.4979522184300341, "grad_norm": 3.521761178970337, "learning_rate": 0.0008340159271899886, "loss": 6.557, "step": 1459 }, { "epoch": 0.49829351535836175, "grad_norm": 4.715250015258789, "learning_rate": 0.0008339021615472128, "loss": 5.1495, "step": 1460 }, { "epoch": 0.49863481228668943, "grad_norm": 3.4854562282562256, "learning_rate": 0.0008337883959044369, "loss": 6.7593, "step": 1461 }, { "epoch": 0.49897610921501706, "grad_norm": 3.903296947479248, "learning_rate": 0.000833674630261661, "loss": 6.1174, "step": 1462 }, { "epoch": 0.4993174061433447, "grad_norm": 3.4270193576812744, "learning_rate": 0.0008335608646188852, "loss": 6.5968, "step": 1463 }, { "epoch": 0.49965870307167237, "grad_norm": 8.496833801269531, "learning_rate": 0.0008334470989761093, "loss": 6.2964, "step": 1464 }, { "epoch": 0.5, "grad_norm": 3.6175074577331543, "learning_rate": 0.0008333333333333334, "loss": 6.989, "step": 1465 }, { "epoch": 0.5003412969283276, "grad_norm": 3.5683434009552, "learning_rate": 0.0008332195676905575, "loss": 7.0006, "step": 1466 }, { "epoch": 0.5006825938566553, "grad_norm": 3.887437582015991, "learning_rate": 0.0008331058020477816, "loss": 6.1925, "step": 1467 }, { "epoch": 0.5010238907849829, "grad_norm": 3.302295207977295, "learning_rate": 0.0008329920364050058, "loss": 6.4899, "step": 1468 }, { "epoch": 0.5013651877133106, "grad_norm": 3.6436431407928467, "learning_rate": 0.0008328782707622298, "loss": 6.103, "step": 1469 }, { "epoch": 0.5017064846416383, "grad_norm": 3.531428337097168, "learning_rate": 0.0008327645051194539, "loss": 6.1611, "step": 1470 }, { "epoch": 0.5020477815699659, "grad_norm": 3.350801944732666, "learning_rate": 0.000832650739476678, "loss": 6.4045, "step": 1471 }, { "epoch": 0.5023890784982935, "grad_norm": 4.109683513641357, "learning_rate": 0.0008325369738339021, "loss": 3.9738, "step": 1472 }, { "epoch": 0.5027303754266211, "grad_norm": 3.4948461055755615, "learning_rate": 0.0008324232081911262, "loss": 6.8257, "step": 1473 }, { "epoch": 0.5030716723549488, "grad_norm": 3.710834264755249, "learning_rate": 0.0008323094425483504, "loss": 6.6345, "step": 1474 }, { "epoch": 0.5034129692832765, "grad_norm": 3.6197144985198975, "learning_rate": 0.0008321956769055745, "loss": 6.386, "step": 1475 }, { "epoch": 0.5037542662116041, "grad_norm": 3.6927449703216553, "learning_rate": 0.0008320819112627986, "loss": 6.3214, "step": 1476 }, { "epoch": 0.5040955631399318, "grad_norm": 3.6893727779388428, "learning_rate": 0.0008319681456200228, "loss": 6.8485, "step": 1477 }, { "epoch": 0.5044368600682594, "grad_norm": 4.398456573486328, "learning_rate": 0.0008318543799772469, "loss": 6.3134, "step": 1478 }, { "epoch": 0.504778156996587, "grad_norm": 3.5371034145355225, "learning_rate": 0.000831740614334471, "loss": 6.685, "step": 1479 }, { "epoch": 0.5051194539249146, "grad_norm": 3.924985885620117, "learning_rate": 0.0008316268486916952, "loss": 6.4405, "step": 1480 }, { "epoch": 0.5054607508532423, "grad_norm": 3.409550666809082, "learning_rate": 0.0008315130830489193, "loss": 6.5593, "step": 1481 }, { "epoch": 0.50580204778157, "grad_norm": 3.491574287414551, "learning_rate": 0.0008313993174061434, "loss": 6.2359, "step": 1482 }, { "epoch": 0.5061433447098976, "grad_norm": 3.836836338043213, "learning_rate": 0.0008312855517633675, "loss": 6.3855, "step": 1483 }, { "epoch": 0.5064846416382253, "grad_norm": 5.168152809143066, "learning_rate": 0.0008311717861205916, "loss": 4.9784, "step": 1484 }, { "epoch": 0.5068259385665529, "grad_norm": 9.451974868774414, "learning_rate": 0.0008310580204778158, "loss": 6.8587, "step": 1485 }, { "epoch": 0.5071672354948805, "grad_norm": 3.954226493835449, "learning_rate": 0.0008309442548350399, "loss": 7.1898, "step": 1486 }, { "epoch": 0.5075085324232081, "grad_norm": 5.50667142868042, "learning_rate": 0.000830830489192264, "loss": 6.3041, "step": 1487 }, { "epoch": 0.5078498293515359, "grad_norm": 3.840693235397339, "learning_rate": 0.000830716723549488, "loss": 6.9871, "step": 1488 }, { "epoch": 0.5081911262798635, "grad_norm": 8.774356842041016, "learning_rate": 0.0008306029579067121, "loss": 8.2422, "step": 1489 }, { "epoch": 0.5085324232081911, "grad_norm": 3.4625966548919678, "learning_rate": 0.0008304891922639362, "loss": 6.9081, "step": 1490 }, { "epoch": 0.5088737201365188, "grad_norm": 3.4914445877075195, "learning_rate": 0.0008303754266211604, "loss": 6.7927, "step": 1491 }, { "epoch": 0.5092150170648464, "grad_norm": 3.4084482192993164, "learning_rate": 0.0008302616609783845, "loss": 7.4133, "step": 1492 }, { "epoch": 0.509556313993174, "grad_norm": 7.066652774810791, "learning_rate": 0.0008301478953356087, "loss": 6.348, "step": 1493 }, { "epoch": 0.5098976109215017, "grad_norm": 4.37141227722168, "learning_rate": 0.0008300341296928328, "loss": 6.462, "step": 1494 }, { "epoch": 0.5102389078498294, "grad_norm": 5.614762783050537, "learning_rate": 0.0008299203640500569, "loss": 6.7, "step": 1495 }, { "epoch": 0.510580204778157, "grad_norm": 4.1260271072387695, "learning_rate": 0.000829806598407281, "loss": 6.0831, "step": 1496 }, { "epoch": 0.5109215017064846, "grad_norm": 3.777806282043457, "learning_rate": 0.0008296928327645052, "loss": 7.0977, "step": 1497 }, { "epoch": 0.5112627986348123, "grad_norm": 3.448523759841919, "learning_rate": 0.0008295790671217293, "loss": 6.7309, "step": 1498 }, { "epoch": 0.5116040955631399, "grad_norm": 3.3855607509613037, "learning_rate": 0.0008294653014789534, "loss": 6.2709, "step": 1499 }, { "epoch": 0.5119453924914675, "grad_norm": 3.435232162475586, "learning_rate": 0.0008293515358361775, "loss": 6.3555, "step": 1500 }, { "epoch": 0.5122866894197953, "grad_norm": 3.2398738861083984, "learning_rate": 0.0008292377701934016, "loss": 3.8611, "step": 1501 }, { "epoch": 0.5126279863481229, "grad_norm": 4.865748882293701, "learning_rate": 0.0008291240045506257, "loss": 6.4543, "step": 1502 }, { "epoch": 0.5129692832764505, "grad_norm": 4.460921287536621, "learning_rate": 0.0008290102389078499, "loss": 6.4598, "step": 1503 }, { "epoch": 0.5133105802047782, "grad_norm": 4.03560209274292, "learning_rate": 0.000828896473265074, "loss": 6.5586, "step": 1504 }, { "epoch": 0.5136518771331058, "grad_norm": 3.64315128326416, "learning_rate": 0.0008287827076222981, "loss": 7.1097, "step": 1505 }, { "epoch": 0.5139931740614334, "grad_norm": 4.334589004516602, "learning_rate": 0.0008286689419795222, "loss": 6.402, "step": 1506 }, { "epoch": 0.514334470989761, "grad_norm": 27.29024314880371, "learning_rate": 0.0008285551763367463, "loss": 7.7064, "step": 1507 }, { "epoch": 0.5146757679180888, "grad_norm": 4.106762886047363, "learning_rate": 0.0008284414106939704, "loss": 6.5886, "step": 1508 }, { "epoch": 0.5150170648464164, "grad_norm": 4.14035701751709, "learning_rate": 0.0008283276450511945, "loss": 7.2057, "step": 1509 }, { "epoch": 0.515358361774744, "grad_norm": 3.5756776332855225, "learning_rate": 0.0008282138794084187, "loss": 6.7014, "step": 1510 }, { "epoch": 0.5156996587030717, "grad_norm": 3.2851498126983643, "learning_rate": 0.0008281001137656428, "loss": 6.6437, "step": 1511 }, { "epoch": 0.5160409556313993, "grad_norm": 3.490445375442505, "learning_rate": 0.0008279863481228669, "loss": 6.464, "step": 1512 }, { "epoch": 0.5163822525597269, "grad_norm": 3.3551950454711914, "learning_rate": 0.000827872582480091, "loss": 6.5594, "step": 1513 }, { "epoch": 0.5167235494880547, "grad_norm": 3.4405341148376465, "learning_rate": 0.0008277588168373152, "loss": 6.8159, "step": 1514 }, { "epoch": 0.5170648464163823, "grad_norm": 3.30784010887146, "learning_rate": 0.0008276450511945393, "loss": 6.8372, "step": 1515 }, { "epoch": 0.5174061433447099, "grad_norm": 3.6393938064575195, "learning_rate": 0.0008275312855517634, "loss": 6.9488, "step": 1516 }, { "epoch": 0.5177474402730375, "grad_norm": 3.558758497238159, "learning_rate": 0.0008274175199089875, "loss": 6.452, "step": 1517 }, { "epoch": 0.5180887372013652, "grad_norm": 3.8486268520355225, "learning_rate": 0.0008273037542662116, "loss": 6.601, "step": 1518 }, { "epoch": 0.5184300341296928, "grad_norm": 3.421262741088867, "learning_rate": 0.0008271899886234357, "loss": 6.6491, "step": 1519 }, { "epoch": 0.5187713310580204, "grad_norm": 3.4998815059661865, "learning_rate": 0.0008270762229806599, "loss": 6.5504, "step": 1520 }, { "epoch": 0.5191126279863482, "grad_norm": 3.533386707305908, "learning_rate": 0.000826962457337884, "loss": 6.797, "step": 1521 }, { "epoch": 0.5194539249146758, "grad_norm": 3.6013705730438232, "learning_rate": 0.0008268486916951081, "loss": 7.1215, "step": 1522 }, { "epoch": 0.5197952218430034, "grad_norm": 3.9131691455841064, "learning_rate": 0.0008267349260523322, "loss": 6.4156, "step": 1523 }, { "epoch": 0.520136518771331, "grad_norm": 3.614016532897949, "learning_rate": 0.0008266211604095563, "loss": 6.9913, "step": 1524 }, { "epoch": 0.5204778156996587, "grad_norm": 4.081565856933594, "learning_rate": 0.0008265073947667804, "loss": 6.7432, "step": 1525 }, { "epoch": 0.5208191126279863, "grad_norm": 3.426879405975342, "learning_rate": 0.0008263936291240047, "loss": 7.093, "step": 1526 }, { "epoch": 0.521160409556314, "grad_norm": 3.377384662628174, "learning_rate": 0.0008262798634812287, "loss": 6.8175, "step": 1527 }, { "epoch": 0.5215017064846417, "grad_norm": 3.400791883468628, "learning_rate": 0.0008261660978384528, "loss": 6.8663, "step": 1528 }, { "epoch": 0.5218430034129693, "grad_norm": 3.3176791667938232, "learning_rate": 0.0008260523321956769, "loss": 7.0355, "step": 1529 }, { "epoch": 0.5221843003412969, "grad_norm": 3.3764326572418213, "learning_rate": 0.000825938566552901, "loss": 6.8315, "step": 1530 }, { "epoch": 0.5225255972696246, "grad_norm": 3.2921946048736572, "learning_rate": 0.0008258248009101252, "loss": 6.869, "step": 1531 }, { "epoch": 0.5228668941979522, "grad_norm": 3.3354740142822266, "learning_rate": 0.0008257110352673493, "loss": 6.8916, "step": 1532 }, { "epoch": 0.5232081911262798, "grad_norm": 7.8725199699401855, "learning_rate": 0.0008255972696245734, "loss": 5.065, "step": 1533 }, { "epoch": 0.5235494880546075, "grad_norm": 3.697795867919922, "learning_rate": 0.0008254835039817975, "loss": 6.9579, "step": 1534 }, { "epoch": 0.5238907849829352, "grad_norm": 8.069738388061523, "learning_rate": 0.0008253697383390216, "loss": 6.3024, "step": 1535 }, { "epoch": 0.5242320819112628, "grad_norm": 4.431558132171631, "learning_rate": 0.0008252559726962457, "loss": 6.1488, "step": 1536 }, { "epoch": 0.5245733788395904, "grad_norm": 4.185642719268799, "learning_rate": 0.0008251422070534699, "loss": 7.1044, "step": 1537 }, { "epoch": 0.5249146757679181, "grad_norm": 3.705733060836792, "learning_rate": 0.000825028441410694, "loss": 6.8109, "step": 1538 }, { "epoch": 0.5252559726962457, "grad_norm": 3.4032537937164307, "learning_rate": 0.0008249146757679181, "loss": 6.6804, "step": 1539 }, { "epoch": 0.5255972696245734, "grad_norm": 3.183061122894287, "learning_rate": 0.0008248009101251422, "loss": 6.3562, "step": 1540 }, { "epoch": 0.525938566552901, "grad_norm": 3.406890630722046, "learning_rate": 0.0008246871444823663, "loss": 6.4419, "step": 1541 }, { "epoch": 0.5262798634812287, "grad_norm": 3.5061683654785156, "learning_rate": 0.0008245733788395904, "loss": 6.7721, "step": 1542 }, { "epoch": 0.5266211604095563, "grad_norm": 3.441354751586914, "learning_rate": 0.0008244596131968147, "loss": 6.1766, "step": 1543 }, { "epoch": 0.5269624573378839, "grad_norm": 4.2335429191589355, "learning_rate": 0.0008243458475540388, "loss": 6.2502, "step": 1544 }, { "epoch": 0.5273037542662116, "grad_norm": 3.5456383228302, "learning_rate": 0.0008242320819112629, "loss": 6.5149, "step": 1545 }, { "epoch": 0.5276450511945392, "grad_norm": 3.58383846282959, "learning_rate": 0.000824118316268487, "loss": 6.4269, "step": 1546 }, { "epoch": 0.5279863481228669, "grad_norm": 4.371920585632324, "learning_rate": 0.000824004550625711, "loss": 6.4354, "step": 1547 }, { "epoch": 0.5283276450511946, "grad_norm": 3.588916063308716, "learning_rate": 0.0008238907849829351, "loss": 6.6855, "step": 1548 }, { "epoch": 0.5286689419795222, "grad_norm": 4.130184650421143, "learning_rate": 0.0008237770193401593, "loss": 6.1499, "step": 1549 }, { "epoch": 0.5290102389078498, "grad_norm": 4.080915927886963, "learning_rate": 0.0008236632536973834, "loss": 5.4587, "step": 1550 }, { "epoch": 0.5293515358361774, "grad_norm": 3.7122843265533447, "learning_rate": 0.0008235494880546075, "loss": 6.3315, "step": 1551 }, { "epoch": 0.5296928327645051, "grad_norm": 3.8710949420928955, "learning_rate": 0.0008234357224118316, "loss": 6.4841, "step": 1552 }, { "epoch": 0.5300341296928328, "grad_norm": 3.440357208251953, "learning_rate": 0.0008233219567690557, "loss": 6.9585, "step": 1553 }, { "epoch": 0.5303754266211604, "grad_norm": 3.3393759727478027, "learning_rate": 0.0008232081911262799, "loss": 6.8411, "step": 1554 }, { "epoch": 0.5307167235494881, "grad_norm": 3.5455715656280518, "learning_rate": 0.000823094425483504, "loss": 6.6823, "step": 1555 }, { "epoch": 0.5310580204778157, "grad_norm": 3.815316677093506, "learning_rate": 0.0008229806598407281, "loss": 6.1607, "step": 1556 }, { "epoch": 0.5313993174061433, "grad_norm": 3.953657388687134, "learning_rate": 0.0008228668941979522, "loss": 6.6442, "step": 1557 }, { "epoch": 0.531740614334471, "grad_norm": 3.4265329837799072, "learning_rate": 0.0008227531285551763, "loss": 6.6399, "step": 1558 }, { "epoch": 0.5320819112627987, "grad_norm": 3.6078526973724365, "learning_rate": 0.0008226393629124004, "loss": 6.654, "step": 1559 }, { "epoch": 0.5324232081911263, "grad_norm": 3.5532915592193604, "learning_rate": 0.0008225255972696247, "loss": 6.0045, "step": 1560 }, { "epoch": 0.532764505119454, "grad_norm": 3.4579410552978516, "learning_rate": 0.0008224118316268488, "loss": 6.6127, "step": 1561 }, { "epoch": 0.5331058020477816, "grad_norm": 3.489680051803589, "learning_rate": 0.0008222980659840729, "loss": 7.2289, "step": 1562 }, { "epoch": 0.5334470989761092, "grad_norm": 3.981506824493408, "learning_rate": 0.000822184300341297, "loss": 6.8315, "step": 1563 }, { "epoch": 0.5337883959044368, "grad_norm": 3.5434958934783936, "learning_rate": 0.0008220705346985211, "loss": 6.4467, "step": 1564 }, { "epoch": 0.5341296928327645, "grad_norm": 20.380250930786133, "learning_rate": 0.0008219567690557452, "loss": 5.3638, "step": 1565 }, { "epoch": 0.5344709897610922, "grad_norm": 3.482201337814331, "learning_rate": 0.0008218430034129693, "loss": 6.8673, "step": 1566 }, { "epoch": 0.5348122866894198, "grad_norm": 3.6969149112701416, "learning_rate": 0.0008217292377701934, "loss": 6.7083, "step": 1567 }, { "epoch": 0.5351535836177475, "grad_norm": 3.4104528427124023, "learning_rate": 0.0008216154721274175, "loss": 6.7722, "step": 1568 }, { "epoch": 0.5354948805460751, "grad_norm": 3.611492872238159, "learning_rate": 0.0008215017064846416, "loss": 6.3124, "step": 1569 }, { "epoch": 0.5358361774744027, "grad_norm": 16.186843872070312, "learning_rate": 0.0008213879408418657, "loss": 8.7558, "step": 1570 }, { "epoch": 0.5361774744027303, "grad_norm": 3.5169475078582764, "learning_rate": 0.0008212741751990899, "loss": 7.0439, "step": 1571 }, { "epoch": 0.5365187713310581, "grad_norm": 3.61405611038208, "learning_rate": 0.000821160409556314, "loss": 6.2176, "step": 1572 }, { "epoch": 0.5368600682593857, "grad_norm": 3.3224093914031982, "learning_rate": 0.0008210466439135381, "loss": 6.693, "step": 1573 }, { "epoch": 0.5372013651877133, "grad_norm": 3.339376211166382, "learning_rate": 0.0008209328782707622, "loss": 6.3451, "step": 1574 }, { "epoch": 0.537542662116041, "grad_norm": 3.3219499588012695, "learning_rate": 0.0008208191126279863, "loss": 6.9464, "step": 1575 }, { "epoch": 0.5378839590443686, "grad_norm": 3.3583548069000244, "learning_rate": 0.0008207053469852104, "loss": 6.9545, "step": 1576 }, { "epoch": 0.5382252559726962, "grad_norm": 3.8789596557617188, "learning_rate": 0.0008205915813424347, "loss": 6.5906, "step": 1577 }, { "epoch": 0.5385665529010238, "grad_norm": 3.4868991374969482, "learning_rate": 0.0008204778156996588, "loss": 6.8179, "step": 1578 }, { "epoch": 0.5389078498293516, "grad_norm": 3.5278048515319824, "learning_rate": 0.0008203640500568829, "loss": 6.8537, "step": 1579 }, { "epoch": 0.5392491467576792, "grad_norm": 3.1563076972961426, "learning_rate": 0.000820250284414107, "loss": 6.7337, "step": 1580 }, { "epoch": 0.5395904436860068, "grad_norm": 3.347745418548584, "learning_rate": 0.0008201365187713311, "loss": 6.8519, "step": 1581 }, { "epoch": 0.5399317406143345, "grad_norm": 3.3623850345611572, "learning_rate": 0.0008200227531285552, "loss": 6.7128, "step": 1582 }, { "epoch": 0.5402730375426621, "grad_norm": 3.4226489067077637, "learning_rate": 0.0008199089874857794, "loss": 6.9467, "step": 1583 }, { "epoch": 0.5406143344709897, "grad_norm": 3.467519521713257, "learning_rate": 0.0008197952218430035, "loss": 7.0993, "step": 1584 }, { "epoch": 0.5409556313993175, "grad_norm": 4.463557720184326, "learning_rate": 0.0008196814562002275, "loss": 6.207, "step": 1585 }, { "epoch": 0.5412969283276451, "grad_norm": 7.1371846199035645, "learning_rate": 0.0008195676905574516, "loss": 6.115, "step": 1586 }, { "epoch": 0.5416382252559727, "grad_norm": 4.31298828125, "learning_rate": 0.0008194539249146757, "loss": 6.4027, "step": 1587 }, { "epoch": 0.5419795221843003, "grad_norm": 3.982283115386963, "learning_rate": 0.0008193401592718998, "loss": 6.9022, "step": 1588 }, { "epoch": 0.542320819112628, "grad_norm": 3.6711089611053467, "learning_rate": 0.000819226393629124, "loss": 6.7532, "step": 1589 }, { "epoch": 0.5426621160409556, "grad_norm": 3.598482370376587, "learning_rate": 0.0008191126279863481, "loss": 6.0211, "step": 1590 }, { "epoch": 0.5430034129692832, "grad_norm": 3.518918514251709, "learning_rate": 0.0008189988623435722, "loss": 6.7096, "step": 1591 }, { "epoch": 0.543344709897611, "grad_norm": 3.3870673179626465, "learning_rate": 0.0008188850967007963, "loss": 6.8977, "step": 1592 }, { "epoch": 0.5436860068259386, "grad_norm": 3.3564047813415527, "learning_rate": 0.0008187713310580204, "loss": 6.5459, "step": 1593 }, { "epoch": 0.5440273037542662, "grad_norm": 3.4745521545410156, "learning_rate": 0.0008186575654152447, "loss": 6.3275, "step": 1594 }, { "epoch": 0.5443686006825939, "grad_norm": 3.4397785663604736, "learning_rate": 0.0008185437997724688, "loss": 6.644, "step": 1595 }, { "epoch": 0.5447098976109215, "grad_norm": 3.6341989040374756, "learning_rate": 0.0008184300341296929, "loss": 6.3724, "step": 1596 }, { "epoch": 0.5450511945392491, "grad_norm": 3.4546432495117188, "learning_rate": 0.000818316268486917, "loss": 6.0687, "step": 1597 }, { "epoch": 0.5453924914675768, "grad_norm": 3.2902228832244873, "learning_rate": 0.0008182025028441411, "loss": 6.7731, "step": 1598 }, { "epoch": 0.5457337883959045, "grad_norm": 21.545167922973633, "learning_rate": 0.0008180887372013652, "loss": 9.0693, "step": 1599 }, { "epoch": 0.5460750853242321, "grad_norm": 4.197141170501709, "learning_rate": 0.0008179749715585894, "loss": 6.2883, "step": 1600 }, { "epoch": 0.5464163822525597, "grad_norm": 3.763883113861084, "learning_rate": 0.0008178612059158135, "loss": 6.7896, "step": 1601 }, { "epoch": 0.5467576791808874, "grad_norm": 3.678478717803955, "learning_rate": 0.0008177474402730376, "loss": 6.6968, "step": 1602 }, { "epoch": 0.547098976109215, "grad_norm": 4.786699295043945, "learning_rate": 0.0008176336746302617, "loss": 6.342, "step": 1603 }, { "epoch": 0.5474402730375426, "grad_norm": 2.2955498695373535, "learning_rate": 0.0008175199089874858, "loss": 3.6378, "step": 1604 }, { "epoch": 0.5477815699658704, "grad_norm": 3.439753293991089, "learning_rate": 0.0008174061433447098, "loss": 6.7798, "step": 1605 }, { "epoch": 0.548122866894198, "grad_norm": 4.1457839012146, "learning_rate": 0.000817292377701934, "loss": 6.3028, "step": 1606 }, { "epoch": 0.5484641638225256, "grad_norm": 3.4417805671691895, "learning_rate": 0.0008171786120591581, "loss": 7.0572, "step": 1607 }, { "epoch": 0.5488054607508532, "grad_norm": 3.348179578781128, "learning_rate": 0.0008170648464163822, "loss": 6.7471, "step": 1608 }, { "epoch": 0.5491467576791809, "grad_norm": 3.4743990898132324, "learning_rate": 0.0008169510807736063, "loss": 7.0257, "step": 1609 }, { "epoch": 0.5494880546075085, "grad_norm": 3.894490957260132, "learning_rate": 0.0008168373151308304, "loss": 6.1222, "step": 1610 }, { "epoch": 0.5498293515358362, "grad_norm": 3.6673929691314697, "learning_rate": 0.0008167235494880547, "loss": 6.5822, "step": 1611 }, { "epoch": 0.5501706484641639, "grad_norm": 3.5163426399230957, "learning_rate": 0.0008166097838452788, "loss": 6.7198, "step": 1612 }, { "epoch": 0.5505119453924915, "grad_norm": 4.025699138641357, "learning_rate": 0.0008164960182025029, "loss": 6.3125, "step": 1613 }, { "epoch": 0.5508532423208191, "grad_norm": 4.222406387329102, "learning_rate": 0.000816382252559727, "loss": 7.1646, "step": 1614 }, { "epoch": 0.5511945392491467, "grad_norm": 6.100660800933838, "learning_rate": 0.0008162684869169511, "loss": 6.3055, "step": 1615 }, { "epoch": 0.5515358361774744, "grad_norm": 3.6640098094940186, "learning_rate": 0.0008161547212741752, "loss": 6.3726, "step": 1616 }, { "epoch": 0.551877133105802, "grad_norm": 3.549154281616211, "learning_rate": 0.0008160409556313994, "loss": 6.8686, "step": 1617 }, { "epoch": 0.5522184300341297, "grad_norm": 3.3445968627929688, "learning_rate": 0.0008159271899886235, "loss": 6.7752, "step": 1618 }, { "epoch": 0.5525597269624574, "grad_norm": 3.3909804821014404, "learning_rate": 0.0008158134243458476, "loss": 6.7392, "step": 1619 }, { "epoch": 0.552901023890785, "grad_norm": 3.3068623542785645, "learning_rate": 0.0008156996587030717, "loss": 7.0023, "step": 1620 }, { "epoch": 0.5532423208191126, "grad_norm": 5.286759376525879, "learning_rate": 0.0008155858930602958, "loss": 5.7817, "step": 1621 }, { "epoch": 0.5535836177474402, "grad_norm": 4.21416711807251, "learning_rate": 0.0008154721274175199, "loss": 6.6314, "step": 1622 }, { "epoch": 0.5539249146757679, "grad_norm": 3.586740016937256, "learning_rate": 0.0008153583617747441, "loss": 6.7685, "step": 1623 }, { "epoch": 0.5542662116040956, "grad_norm": 3.7087645530700684, "learning_rate": 0.0008152445961319681, "loss": 5.8739, "step": 1624 }, { "epoch": 0.5546075085324232, "grad_norm": 3.9392848014831543, "learning_rate": 0.0008151308304891922, "loss": 6.6611, "step": 1625 }, { "epoch": 0.5549488054607509, "grad_norm": 3.521493673324585, "learning_rate": 0.0008150170648464163, "loss": 7.1034, "step": 1626 }, { "epoch": 0.5552901023890785, "grad_norm": 4.224700927734375, "learning_rate": 0.0008149032992036404, "loss": 5.8669, "step": 1627 }, { "epoch": 0.5556313993174061, "grad_norm": 3.757660388946533, "learning_rate": 0.0008147895335608646, "loss": 6.5412, "step": 1628 }, { "epoch": 0.5559726962457338, "grad_norm": 3.58388614654541, "learning_rate": 0.0008146757679180888, "loss": 6.7925, "step": 1629 }, { "epoch": 0.5563139931740614, "grad_norm": 3.373109817504883, "learning_rate": 0.0008145620022753129, "loss": 6.5338, "step": 1630 }, { "epoch": 0.5566552901023891, "grad_norm": 3.3776378631591797, "learning_rate": 0.000814448236632537, "loss": 6.6037, "step": 1631 }, { "epoch": 0.5569965870307167, "grad_norm": 3.9116930961608887, "learning_rate": 0.0008143344709897611, "loss": 5.6358, "step": 1632 }, { "epoch": 0.5573378839590444, "grad_norm": 3.494377851486206, "learning_rate": 0.0008142207053469852, "loss": 6.9675, "step": 1633 }, { "epoch": 0.557679180887372, "grad_norm": 3.6268115043640137, "learning_rate": 0.0008141069397042094, "loss": 6.7061, "step": 1634 }, { "epoch": 0.5580204778156996, "grad_norm": 4.0463409423828125, "learning_rate": 0.0008139931740614335, "loss": 5.8411, "step": 1635 }, { "epoch": 0.5583617747440273, "grad_norm": 3.519866466522217, "learning_rate": 0.0008138794084186576, "loss": 6.6951, "step": 1636 }, { "epoch": 0.558703071672355, "grad_norm": 3.481750965118408, "learning_rate": 0.0008137656427758817, "loss": 7.011, "step": 1637 }, { "epoch": 0.5590443686006826, "grad_norm": 3.268798828125, "learning_rate": 0.0008136518771331058, "loss": 6.7461, "step": 1638 }, { "epoch": 0.5593856655290103, "grad_norm": 3.8919827938079834, "learning_rate": 0.0008135381114903299, "loss": 6.4497, "step": 1639 }, { "epoch": 0.5597269624573379, "grad_norm": 3.408388614654541, "learning_rate": 0.0008134243458475541, "loss": 7.0488, "step": 1640 }, { "epoch": 0.5600682593856655, "grad_norm": 3.337852954864502, "learning_rate": 0.0008133105802047782, "loss": 6.6489, "step": 1641 }, { "epoch": 0.5604095563139931, "grad_norm": 3.176896095275879, "learning_rate": 0.0008131968145620023, "loss": 7.0354, "step": 1642 }, { "epoch": 0.5607508532423208, "grad_norm": 3.4171290397644043, "learning_rate": 0.0008130830489192265, "loss": 6.6106, "step": 1643 }, { "epoch": 0.5610921501706485, "grad_norm": 3.348039150238037, "learning_rate": 0.0008129692832764504, "loss": 6.4115, "step": 1644 }, { "epoch": 0.5614334470989761, "grad_norm": 3.897597074508667, "learning_rate": 0.0008128555176336746, "loss": 6.5893, "step": 1645 }, { "epoch": 0.5617747440273038, "grad_norm": 3.424443006515503, "learning_rate": 0.0008127417519908988, "loss": 6.4034, "step": 1646 }, { "epoch": 0.5621160409556314, "grad_norm": 3.577786445617676, "learning_rate": 0.0008126279863481229, "loss": 6.916, "step": 1647 }, { "epoch": 0.562457337883959, "grad_norm": 3.442438840866089, "learning_rate": 0.000812514220705347, "loss": 7.1346, "step": 1648 }, { "epoch": 0.5627986348122866, "grad_norm": 3.3798537254333496, "learning_rate": 0.0008124004550625711, "loss": 6.4942, "step": 1649 }, { "epoch": 0.5631399317406144, "grad_norm": 3.885824680328369, "learning_rate": 0.0008122866894197952, "loss": 6.8912, "step": 1650 }, { "epoch": 0.563481228668942, "grad_norm": 3.910099744796753, "learning_rate": 0.0008121729237770193, "loss": 6.3676, "step": 1651 }, { "epoch": 0.5638225255972696, "grad_norm": 3.5344743728637695, "learning_rate": 0.0008120591581342435, "loss": 6.8041, "step": 1652 }, { "epoch": 0.5641638225255973, "grad_norm": 7.247477054595947, "learning_rate": 0.0008119453924914676, "loss": 5.6654, "step": 1653 }, { "epoch": 0.5645051194539249, "grad_norm": 3.664292097091675, "learning_rate": 0.0008118316268486917, "loss": 7.0846, "step": 1654 }, { "epoch": 0.5648464163822525, "grad_norm": 5.331287384033203, "learning_rate": 0.0008117178612059158, "loss": 5.1035, "step": 1655 }, { "epoch": 0.5651877133105802, "grad_norm": 4.2963175773620605, "learning_rate": 0.0008116040955631399, "loss": 6.543, "step": 1656 }, { "epoch": 0.5655290102389079, "grad_norm": 3.7694435119628906, "learning_rate": 0.0008114903299203641, "loss": 6.5782, "step": 1657 }, { "epoch": 0.5658703071672355, "grad_norm": 3.3305928707122803, "learning_rate": 0.0008113765642775882, "loss": 6.992, "step": 1658 }, { "epoch": 0.5662116040955631, "grad_norm": 3.3793208599090576, "learning_rate": 0.0008112627986348123, "loss": 6.3376, "step": 1659 }, { "epoch": 0.5665529010238908, "grad_norm": 3.6285388469696045, "learning_rate": 0.0008111490329920365, "loss": 6.6463, "step": 1660 }, { "epoch": 0.5668941979522184, "grad_norm": 3.3783798217773438, "learning_rate": 0.0008110352673492606, "loss": 6.4926, "step": 1661 }, { "epoch": 0.567235494880546, "grad_norm": 3.489332675933838, "learning_rate": 0.0008109215017064847, "loss": 6.9067, "step": 1662 }, { "epoch": 0.5675767918088738, "grad_norm": 3.4197375774383545, "learning_rate": 0.0008108077360637088, "loss": 6.8896, "step": 1663 }, { "epoch": 0.5679180887372014, "grad_norm": 5.391114234924316, "learning_rate": 0.0008106939704209329, "loss": 6.6774, "step": 1664 }, { "epoch": 0.568259385665529, "grad_norm": 3.594270706176758, "learning_rate": 0.000810580204778157, "loss": 7.0118, "step": 1665 }, { "epoch": 0.5686006825938567, "grad_norm": 3.6209936141967773, "learning_rate": 0.0008104664391353811, "loss": 5.8403, "step": 1666 }, { "epoch": 0.5689419795221843, "grad_norm": 3.4565608501434326, "learning_rate": 0.0008103526734926052, "loss": 7.2969, "step": 1667 }, { "epoch": 0.5692832764505119, "grad_norm": 3.351238489151001, "learning_rate": 0.0008102389078498293, "loss": 7.1469, "step": 1668 }, { "epoch": 0.5696245733788395, "grad_norm": 3.340491771697998, "learning_rate": 0.0008101251422070535, "loss": 6.5831, "step": 1669 }, { "epoch": 0.5699658703071673, "grad_norm": 3.3697030544281006, "learning_rate": 0.0008100113765642776, "loss": 6.5447, "step": 1670 }, { "epoch": 0.5703071672354949, "grad_norm": 3.3213913440704346, "learning_rate": 0.0008098976109215017, "loss": 6.7253, "step": 1671 }, { "epoch": 0.5706484641638225, "grad_norm": 3.31199312210083, "learning_rate": 0.0008097838452787258, "loss": 6.7037, "step": 1672 }, { "epoch": 0.5709897610921502, "grad_norm": 3.239823341369629, "learning_rate": 0.0008096700796359499, "loss": 6.2837, "step": 1673 }, { "epoch": 0.5713310580204778, "grad_norm": 3.233081102371216, "learning_rate": 0.0008095563139931741, "loss": 6.4038, "step": 1674 }, { "epoch": 0.5716723549488054, "grad_norm": 3.2402610778808594, "learning_rate": 0.0008094425483503982, "loss": 6.8486, "step": 1675 }, { "epoch": 0.5720136518771332, "grad_norm": 3.5634925365448, "learning_rate": 0.0008093287827076223, "loss": 5.8242, "step": 1676 }, { "epoch": 0.5723549488054608, "grad_norm": 3.4806721210479736, "learning_rate": 0.0008092150170648465, "loss": 7.0927, "step": 1677 }, { "epoch": 0.5726962457337884, "grad_norm": 3.4350132942199707, "learning_rate": 0.0008091012514220706, "loss": 6.6534, "step": 1678 }, { "epoch": 0.573037542662116, "grad_norm": 3.3814663887023926, "learning_rate": 0.0008089874857792947, "loss": 6.8434, "step": 1679 }, { "epoch": 0.5733788395904437, "grad_norm": 3.420792579650879, "learning_rate": 0.0008088737201365189, "loss": 6.8301, "step": 1680 }, { "epoch": 0.5737201365187713, "grad_norm": 3.6617331504821777, "learning_rate": 0.000808759954493743, "loss": 7.1931, "step": 1681 }, { "epoch": 0.5740614334470989, "grad_norm": 3.417377233505249, "learning_rate": 0.0008086461888509671, "loss": 6.8856, "step": 1682 }, { "epoch": 0.5744027303754267, "grad_norm": 3.4047882556915283, "learning_rate": 0.0008085324232081911, "loss": 6.7193, "step": 1683 }, { "epoch": 0.5747440273037543, "grad_norm": 5.160583019256592, "learning_rate": 0.0008084186575654152, "loss": 6.3314, "step": 1684 }, { "epoch": 0.5750853242320819, "grad_norm": 3.442270517349243, "learning_rate": 0.0008083048919226393, "loss": 6.5523, "step": 1685 }, { "epoch": 0.5754266211604095, "grad_norm": 3.4969444274902344, "learning_rate": 0.0008081911262798635, "loss": 6.1442, "step": 1686 }, { "epoch": 0.5757679180887372, "grad_norm": 3.283874988555908, "learning_rate": 0.0008080773606370876, "loss": 6.5447, "step": 1687 }, { "epoch": 0.5761092150170648, "grad_norm": 3.373081922531128, "learning_rate": 0.0008079635949943117, "loss": 6.9056, "step": 1688 }, { "epoch": 0.5764505119453925, "grad_norm": 3.2919609546661377, "learning_rate": 0.0008078498293515358, "loss": 6.6918, "step": 1689 }, { "epoch": 0.5767918088737202, "grad_norm": 3.302234411239624, "learning_rate": 0.0008077360637087599, "loss": 6.5728, "step": 1690 }, { "epoch": 0.5771331058020478, "grad_norm": 3.6976983547210693, "learning_rate": 0.000807622298065984, "loss": 6.1227, "step": 1691 }, { "epoch": 0.5774744027303754, "grad_norm": 3.5474138259887695, "learning_rate": 0.0008075085324232082, "loss": 6.2463, "step": 1692 }, { "epoch": 0.577815699658703, "grad_norm": 3.680959701538086, "learning_rate": 0.0008073947667804323, "loss": 6.6472, "step": 1693 }, { "epoch": 0.5781569965870307, "grad_norm": 3.4441518783569336, "learning_rate": 0.0008072810011376565, "loss": 6.5921, "step": 1694 }, { "epoch": 0.5784982935153583, "grad_norm": 4.723526477813721, "learning_rate": 0.0008071672354948806, "loss": 6.2168, "step": 1695 }, { "epoch": 0.578839590443686, "grad_norm": 3.377566337585449, "learning_rate": 0.0008070534698521047, "loss": 6.7912, "step": 1696 }, { "epoch": 0.5791808873720137, "grad_norm": 3.3538033962249756, "learning_rate": 0.0008069397042093289, "loss": 6.7811, "step": 1697 }, { "epoch": 0.5795221843003413, "grad_norm": 3.2516143321990967, "learning_rate": 0.000806825938566553, "loss": 6.6028, "step": 1698 }, { "epoch": 0.5798634812286689, "grad_norm": 3.5221121311187744, "learning_rate": 0.0008067121729237771, "loss": 6.8532, "step": 1699 }, { "epoch": 0.5802047781569966, "grad_norm": 3.5948259830474854, "learning_rate": 0.0008065984072810012, "loss": 6.5451, "step": 1700 }, { "epoch": 0.5805460750853242, "grad_norm": 3.5543367862701416, "learning_rate": 0.0008064846416382253, "loss": 6.5479, "step": 1701 }, { "epoch": 0.5808873720136519, "grad_norm": 3.5869266986846924, "learning_rate": 0.0008063708759954493, "loss": 6.4638, "step": 1702 }, { "epoch": 0.5812286689419796, "grad_norm": 3.233880043029785, "learning_rate": 0.0008062571103526735, "loss": 6.6942, "step": 1703 }, { "epoch": 0.5815699658703072, "grad_norm": 3.558081865310669, "learning_rate": 0.0008061433447098976, "loss": 6.5371, "step": 1704 }, { "epoch": 0.5819112627986348, "grad_norm": 3.4222469329833984, "learning_rate": 0.0008060295790671217, "loss": 6.667, "step": 1705 }, { "epoch": 0.5822525597269624, "grad_norm": 3.447845697402954, "learning_rate": 0.0008059158134243458, "loss": 6.8876, "step": 1706 }, { "epoch": 0.5825938566552901, "grad_norm": 5.436509132385254, "learning_rate": 0.0008058020477815699, "loss": 6.4207, "step": 1707 }, { "epoch": 0.5829351535836177, "grad_norm": 3.4778072834014893, "learning_rate": 0.000805688282138794, "loss": 6.5555, "step": 1708 }, { "epoch": 0.5832764505119454, "grad_norm": 3.7661755084991455, "learning_rate": 0.0008055745164960182, "loss": 6.6308, "step": 1709 }, { "epoch": 0.5836177474402731, "grad_norm": 3.373845100402832, "learning_rate": 0.0008054607508532424, "loss": 7.044, "step": 1710 }, { "epoch": 0.5839590443686007, "grad_norm": 3.3945181369781494, "learning_rate": 0.0008053469852104665, "loss": 6.8728, "step": 1711 }, { "epoch": 0.5843003412969283, "grad_norm": 3.244920015335083, "learning_rate": 0.0008052332195676906, "loss": 6.4727, "step": 1712 }, { "epoch": 0.5846416382252559, "grad_norm": 3.3084375858306885, "learning_rate": 0.0008051194539249147, "loss": 6.6563, "step": 1713 }, { "epoch": 0.5849829351535836, "grad_norm": 3.3623743057250977, "learning_rate": 0.0008050056882821389, "loss": 6.8908, "step": 1714 }, { "epoch": 0.5853242320819113, "grad_norm": 3.3226821422576904, "learning_rate": 0.000804891922639363, "loss": 6.7584, "step": 1715 }, { "epoch": 0.5856655290102389, "grad_norm": 3.3474953174591064, "learning_rate": 0.0008047781569965871, "loss": 7.0349, "step": 1716 }, { "epoch": 0.5860068259385666, "grad_norm": 3.4366891384124756, "learning_rate": 0.0008046643913538112, "loss": 6.7988, "step": 1717 }, { "epoch": 0.5863481228668942, "grad_norm": 3.3742077350616455, "learning_rate": 0.0008045506257110353, "loss": 6.3303, "step": 1718 }, { "epoch": 0.5866894197952218, "grad_norm": 3.367166042327881, "learning_rate": 0.0008044368600682594, "loss": 6.9229, "step": 1719 }, { "epoch": 0.5870307167235495, "grad_norm": 3.3801698684692383, "learning_rate": 0.0008043230944254836, "loss": 6.4958, "step": 1720 }, { "epoch": 0.5873720136518771, "grad_norm": 3.513958692550659, "learning_rate": 0.0008042093287827077, "loss": 6.4753, "step": 1721 }, { "epoch": 0.5877133105802048, "grad_norm": 3.514188051223755, "learning_rate": 0.0008040955631399317, "loss": 6.6555, "step": 1722 }, { "epoch": 0.5880546075085324, "grad_norm": 3.736156702041626, "learning_rate": 0.0008039817974971558, "loss": 6.2383, "step": 1723 }, { "epoch": 0.5883959044368601, "grad_norm": 3.569303274154663, "learning_rate": 0.0008038680318543799, "loss": 6.6994, "step": 1724 }, { "epoch": 0.5887372013651877, "grad_norm": 3.693387031555176, "learning_rate": 0.000803754266211604, "loss": 7.2297, "step": 1725 }, { "epoch": 0.5890784982935153, "grad_norm": 3.4730892181396484, "learning_rate": 0.0008036405005688282, "loss": 6.9965, "step": 1726 }, { "epoch": 0.589419795221843, "grad_norm": 3.8345372676849365, "learning_rate": 0.0008035267349260524, "loss": 6.1234, "step": 1727 }, { "epoch": 0.5897610921501707, "grad_norm": 3.4538676738739014, "learning_rate": 0.0008034129692832765, "loss": 6.5552, "step": 1728 }, { "epoch": 0.5901023890784983, "grad_norm": 3.578291654586792, "learning_rate": 0.0008032992036405006, "loss": 7.2901, "step": 1729 }, { "epoch": 0.590443686006826, "grad_norm": 3.2786262035369873, "learning_rate": 0.0008031854379977247, "loss": 6.9642, "step": 1730 }, { "epoch": 0.5907849829351536, "grad_norm": 3.3056552410125732, "learning_rate": 0.0008030716723549488, "loss": 6.7187, "step": 1731 }, { "epoch": 0.5911262798634812, "grad_norm": 3.6989173889160156, "learning_rate": 0.000802957906712173, "loss": 6.7993, "step": 1732 }, { "epoch": 0.5914675767918088, "grad_norm": 3.3782973289489746, "learning_rate": 0.0008028441410693971, "loss": 6.8576, "step": 1733 }, { "epoch": 0.5918088737201365, "grad_norm": 3.2711713314056396, "learning_rate": 0.0008027303754266212, "loss": 6.8393, "step": 1734 }, { "epoch": 0.5921501706484642, "grad_norm": 3.265575885772705, "learning_rate": 0.0008026166097838453, "loss": 6.5095, "step": 1735 }, { "epoch": 0.5924914675767918, "grad_norm": 3.287745237350464, "learning_rate": 0.0008025028441410694, "loss": 6.8591, "step": 1736 }, { "epoch": 0.5928327645051195, "grad_norm": 4.958430290222168, "learning_rate": 0.0008023890784982936, "loss": 6.7966, "step": 1737 }, { "epoch": 0.5931740614334471, "grad_norm": 4.398024082183838, "learning_rate": 0.0008022753128555177, "loss": 5.6212, "step": 1738 }, { "epoch": 0.5935153583617747, "grad_norm": 3.486335277557373, "learning_rate": 0.0008021615472127418, "loss": 6.9339, "step": 1739 }, { "epoch": 0.5938566552901023, "grad_norm": 3.2780847549438477, "learning_rate": 0.0008020477815699659, "loss": 6.4291, "step": 1740 }, { "epoch": 0.5941979522184301, "grad_norm": 8.394953727722168, "learning_rate": 0.0008019340159271899, "loss": 6.1876, "step": 1741 }, { "epoch": 0.5945392491467577, "grad_norm": 3.4329168796539307, "learning_rate": 0.000801820250284414, "loss": 4.1804, "step": 1742 }, { "epoch": 0.5948805460750853, "grad_norm": 4.278625011444092, "learning_rate": 0.0008017064846416382, "loss": 6.3612, "step": 1743 }, { "epoch": 0.595221843003413, "grad_norm": 3.5409955978393555, "learning_rate": 0.0008015927189988624, "loss": 6.4157, "step": 1744 }, { "epoch": 0.5955631399317406, "grad_norm": 5.1169891357421875, "learning_rate": 0.0008014789533560865, "loss": 5.6104, "step": 1745 }, { "epoch": 0.5959044368600682, "grad_norm": 3.445643901824951, "learning_rate": 0.0008013651877133106, "loss": 6.0491, "step": 1746 }, { "epoch": 0.596245733788396, "grad_norm": 3.8756022453308105, "learning_rate": 0.0008012514220705347, "loss": 5.9952, "step": 1747 }, { "epoch": 0.5965870307167236, "grad_norm": 3.3911144733428955, "learning_rate": 0.0008011376564277588, "loss": 7.2222, "step": 1748 }, { "epoch": 0.5969283276450512, "grad_norm": 3.516190528869629, "learning_rate": 0.000801023890784983, "loss": 6.4552, "step": 1749 }, { "epoch": 0.5972696245733788, "grad_norm": 3.3756418228149414, "learning_rate": 0.0008009101251422071, "loss": 6.643, "step": 1750 }, { "epoch": 0.5976109215017065, "grad_norm": 6.02211856842041, "learning_rate": 0.0008007963594994312, "loss": 6.1575, "step": 1751 }, { "epoch": 0.5979522184300341, "grad_norm": 3.416879177093506, "learning_rate": 0.0008006825938566553, "loss": 6.7652, "step": 1752 }, { "epoch": 0.5982935153583617, "grad_norm": 4.541624069213867, "learning_rate": 0.0008005688282138794, "loss": 6.4852, "step": 1753 }, { "epoch": 0.5986348122866895, "grad_norm": 12.214677810668945, "learning_rate": 0.0008004550625711036, "loss": 6.6869, "step": 1754 }, { "epoch": 0.5989761092150171, "grad_norm": 3.7498533725738525, "learning_rate": 0.0008003412969283277, "loss": 6.6797, "step": 1755 }, { "epoch": 0.5993174061433447, "grad_norm": 7.0607523918151855, "learning_rate": 0.0008002275312855518, "loss": 6.0239, "step": 1756 }, { "epoch": 0.5996587030716723, "grad_norm": 5.875295639038086, "learning_rate": 0.0008001137656427759, "loss": 6.777, "step": 1757 }, { "epoch": 0.6, "grad_norm": 3.4712796211242676, "learning_rate": 0.0008, "loss": 6.5113, "step": 1758 }, { "epoch": 0.6003412969283276, "grad_norm": 3.289867877960205, "learning_rate": 0.0007998862343572241, "loss": 6.271, "step": 1759 }, { "epoch": 0.6006825938566553, "grad_norm": 3.281529664993286, "learning_rate": 0.0007997724687144482, "loss": 6.068, "step": 1760 }, { "epoch": 0.601023890784983, "grad_norm": 3.141273260116577, "learning_rate": 0.0007996587030716724, "loss": 6.6534, "step": 1761 }, { "epoch": 0.6013651877133106, "grad_norm": 3.538984537124634, "learning_rate": 0.0007995449374288965, "loss": 6.7176, "step": 1762 }, { "epoch": 0.6017064846416382, "grad_norm": 3.208937644958496, "learning_rate": 0.0007994311717861206, "loss": 6.2085, "step": 1763 }, { "epoch": 0.6020477815699659, "grad_norm": 3.4167351722717285, "learning_rate": 0.0007993174061433447, "loss": 6.4306, "step": 1764 }, { "epoch": 0.6023890784982935, "grad_norm": 3.342594861984253, "learning_rate": 0.0007992036405005688, "loss": 6.9312, "step": 1765 }, { "epoch": 0.6027303754266211, "grad_norm": 3.4137015342712402, "learning_rate": 0.000799089874857793, "loss": 6.6204, "step": 1766 }, { "epoch": 0.6030716723549489, "grad_norm": 3.446514368057251, "learning_rate": 0.0007989761092150171, "loss": 6.866, "step": 1767 }, { "epoch": 0.6034129692832765, "grad_norm": 3.3566482067108154, "learning_rate": 0.0007988623435722412, "loss": 6.1879, "step": 1768 }, { "epoch": 0.6037542662116041, "grad_norm": 3.4932174682617188, "learning_rate": 0.0007987485779294653, "loss": 7.2527, "step": 1769 }, { "epoch": 0.6040955631399317, "grad_norm": 3.385852098464966, "learning_rate": 0.0007986348122866894, "loss": 7.0579, "step": 1770 }, { "epoch": 0.6044368600682594, "grad_norm": 3.311905860900879, "learning_rate": 0.0007985210466439135, "loss": 7.0279, "step": 1771 }, { "epoch": 0.604778156996587, "grad_norm": 3.295506238937378, "learning_rate": 0.0007984072810011377, "loss": 6.3771, "step": 1772 }, { "epoch": 0.6051194539249147, "grad_norm": 3.4223151206970215, "learning_rate": 0.0007982935153583618, "loss": 6.1649, "step": 1773 }, { "epoch": 0.6054607508532424, "grad_norm": 3.4446609020233154, "learning_rate": 0.0007981797497155859, "loss": 5.7184, "step": 1774 }, { "epoch": 0.60580204778157, "grad_norm": 3.3032233715057373, "learning_rate": 0.00079806598407281, "loss": 6.4993, "step": 1775 }, { "epoch": 0.6061433447098976, "grad_norm": 4.309022426605225, "learning_rate": 0.0007979522184300341, "loss": 5.8551, "step": 1776 }, { "epoch": 0.6064846416382252, "grad_norm": 5.289666175842285, "learning_rate": 0.0007978384527872584, "loss": 6.1384, "step": 1777 }, { "epoch": 0.6068259385665529, "grad_norm": 3.589069366455078, "learning_rate": 0.0007977246871444825, "loss": 6.6301, "step": 1778 }, { "epoch": 0.6071672354948805, "grad_norm": 4.165103435516357, "learning_rate": 0.0007976109215017066, "loss": 6.2209, "step": 1779 }, { "epoch": 0.6075085324232082, "grad_norm": 3.654651641845703, "learning_rate": 0.0007974971558589306, "loss": 6.8522, "step": 1780 }, { "epoch": 0.6078498293515359, "grad_norm": 3.5410244464874268, "learning_rate": 0.0007973833902161547, "loss": 6.7159, "step": 1781 }, { "epoch": 0.6081911262798635, "grad_norm": 3.301004648208618, "learning_rate": 0.0007972696245733788, "loss": 6.7727, "step": 1782 }, { "epoch": 0.6085324232081911, "grad_norm": 8.049127578735352, "learning_rate": 0.000797155858930603, "loss": 5.6872, "step": 1783 }, { "epoch": 0.6088737201365187, "grad_norm": 3.7068381309509277, "learning_rate": 0.0007970420932878271, "loss": 6.6709, "step": 1784 }, { "epoch": 0.6092150170648464, "grad_norm": 3.3772456645965576, "learning_rate": 0.0007969283276450512, "loss": 6.8441, "step": 1785 }, { "epoch": 0.6095563139931741, "grad_norm": 3.3938705921173096, "learning_rate": 0.0007968145620022753, "loss": 6.7171, "step": 1786 }, { "epoch": 0.6098976109215017, "grad_norm": 3.674920082092285, "learning_rate": 0.0007967007963594994, "loss": 6.424, "step": 1787 }, { "epoch": 0.6102389078498294, "grad_norm": 5.99582576751709, "learning_rate": 0.0007965870307167235, "loss": 6.1769, "step": 1788 }, { "epoch": 0.610580204778157, "grad_norm": 3.5924954414367676, "learning_rate": 0.0007964732650739477, "loss": 6.5125, "step": 1789 }, { "epoch": 0.6109215017064846, "grad_norm": 3.4204015731811523, "learning_rate": 0.0007963594994311718, "loss": 6.6597, "step": 1790 }, { "epoch": 0.6112627986348123, "grad_norm": 3.3368844985961914, "learning_rate": 0.0007962457337883959, "loss": 6.2667, "step": 1791 }, { "epoch": 0.6116040955631399, "grad_norm": 3.3641443252563477, "learning_rate": 0.00079613196814562, "loss": 6.5083, "step": 1792 }, { "epoch": 0.6119453924914676, "grad_norm": 8.113044738769531, "learning_rate": 0.0007960182025028441, "loss": 6.1718, "step": 1793 }, { "epoch": 0.6122866894197952, "grad_norm": 3.4572935104370117, "learning_rate": 0.0007959044368600682, "loss": 6.3586, "step": 1794 }, { "epoch": 0.6126279863481229, "grad_norm": 3.6194283962249756, "learning_rate": 0.0007957906712172925, "loss": 6.8134, "step": 1795 }, { "epoch": 0.6129692832764505, "grad_norm": 3.5017333030700684, "learning_rate": 0.0007956769055745166, "loss": 7.0632, "step": 1796 }, { "epoch": 0.6133105802047781, "grad_norm": 3.5431251525878906, "learning_rate": 0.0007955631399317407, "loss": 7.0487, "step": 1797 }, { "epoch": 0.6136518771331058, "grad_norm": 3.3673455715179443, "learning_rate": 0.0007954493742889648, "loss": 6.7595, "step": 1798 }, { "epoch": 0.6139931740614335, "grad_norm": 5.616215705871582, "learning_rate": 0.0007953356086461888, "loss": 5.7906, "step": 1799 }, { "epoch": 0.6143344709897611, "grad_norm": 3.828660488128662, "learning_rate": 0.000795221843003413, "loss": 6.9296, "step": 1800 }, { "epoch": 0.6146757679180888, "grad_norm": 3.748854637145996, "learning_rate": 0.0007951080773606371, "loss": 6.3692, "step": 1801 }, { "epoch": 0.6150170648464164, "grad_norm": 3.4253838062286377, "learning_rate": 0.0007949943117178612, "loss": 6.5113, "step": 1802 }, { "epoch": 0.615358361774744, "grad_norm": 5.2447357177734375, "learning_rate": 0.0007948805460750853, "loss": 6.4785, "step": 1803 }, { "epoch": 0.6156996587030716, "grad_norm": 3.3891453742980957, "learning_rate": 0.0007947667804323094, "loss": 7.1645, "step": 1804 }, { "epoch": 0.6160409556313993, "grad_norm": 3.5664119720458984, "learning_rate": 0.0007946530147895335, "loss": 6.733, "step": 1805 }, { "epoch": 0.616382252559727, "grad_norm": 4.1726298332214355, "learning_rate": 0.0007945392491467577, "loss": 5.8798, "step": 1806 }, { "epoch": 0.6167235494880546, "grad_norm": 4.10631799697876, "learning_rate": 0.0007944254835039818, "loss": 6.4036, "step": 1807 }, { "epoch": 0.6170648464163823, "grad_norm": 4.00150728225708, "learning_rate": 0.0007943117178612059, "loss": 6.7822, "step": 1808 }, { "epoch": 0.6174061433447099, "grad_norm": 3.687084674835205, "learning_rate": 0.00079419795221843, "loss": 6.653, "step": 1809 }, { "epoch": 0.6177474402730375, "grad_norm": 3.6721670627593994, "learning_rate": 0.0007940841865756541, "loss": 6.7528, "step": 1810 }, { "epoch": 0.6180887372013651, "grad_norm": 3.374575138092041, "learning_rate": 0.0007939704209328783, "loss": 6.6044, "step": 1811 }, { "epoch": 0.6184300341296929, "grad_norm": 3.891878843307495, "learning_rate": 0.0007938566552901025, "loss": 6.495, "step": 1812 }, { "epoch": 0.6187713310580205, "grad_norm": 3.5438005924224854, "learning_rate": 0.0007937428896473266, "loss": 6.2664, "step": 1813 }, { "epoch": 0.6191126279863481, "grad_norm": 3.501797676086426, "learning_rate": 0.0007936291240045507, "loss": 6.7247, "step": 1814 }, { "epoch": 0.6194539249146758, "grad_norm": 3.600700855255127, "learning_rate": 0.0007935153583617748, "loss": 6.8725, "step": 1815 }, { "epoch": 0.6197952218430034, "grad_norm": 3.210486650466919, "learning_rate": 0.0007934015927189989, "loss": 6.9718, "step": 1816 }, { "epoch": 0.620136518771331, "grad_norm": 3.1832642555236816, "learning_rate": 0.0007932878270762231, "loss": 6.8543, "step": 1817 }, { "epoch": 0.6204778156996587, "grad_norm": 4.302059173583984, "learning_rate": 0.0007931740614334472, "loss": 5.7968, "step": 1818 }, { "epoch": 0.6208191126279864, "grad_norm": 3.688284158706665, "learning_rate": 0.0007930602957906712, "loss": 6.4013, "step": 1819 }, { "epoch": 0.621160409556314, "grad_norm": 3.592975616455078, "learning_rate": 0.0007929465301478953, "loss": 7.0964, "step": 1820 }, { "epoch": 0.6215017064846416, "grad_norm": 3.5165815353393555, "learning_rate": 0.0007928327645051194, "loss": 7.1356, "step": 1821 }, { "epoch": 0.6218430034129693, "grad_norm": 3.4153037071228027, "learning_rate": 0.0007927189988623435, "loss": 6.2598, "step": 1822 }, { "epoch": 0.6221843003412969, "grad_norm": 3.3510231971740723, "learning_rate": 0.0007926052332195677, "loss": 6.149, "step": 1823 }, { "epoch": 0.6225255972696245, "grad_norm": 3.2553350925445557, "learning_rate": 0.0007924914675767918, "loss": 6.9447, "step": 1824 }, { "epoch": 0.6228668941979523, "grad_norm": 3.4982099533081055, "learning_rate": 0.0007923777019340159, "loss": 6.8259, "step": 1825 }, { "epoch": 0.6232081911262799, "grad_norm": 3.446209192276001, "learning_rate": 0.00079226393629124, "loss": 6.4785, "step": 1826 }, { "epoch": 0.6235494880546075, "grad_norm": 3.815865993499756, "learning_rate": 0.0007921501706484641, "loss": 6.1361, "step": 1827 }, { "epoch": 0.6238907849829352, "grad_norm": 3.4647111892700195, "learning_rate": 0.0007920364050056883, "loss": 6.4204, "step": 1828 }, { "epoch": 0.6242320819112628, "grad_norm": 5.167119026184082, "learning_rate": 0.0007919226393629125, "loss": 6.0301, "step": 1829 }, { "epoch": 0.6245733788395904, "grad_norm": 3.3964574337005615, "learning_rate": 0.0007918088737201366, "loss": 6.1847, "step": 1830 }, { "epoch": 0.624914675767918, "grad_norm": 3.4840471744537354, "learning_rate": 0.0007916951080773607, "loss": 6.8604, "step": 1831 }, { "epoch": 0.6252559726962458, "grad_norm": 3.4988765716552734, "learning_rate": 0.0007915813424345848, "loss": 6.5158, "step": 1832 }, { "epoch": 0.6255972696245734, "grad_norm": 3.5883636474609375, "learning_rate": 0.0007914675767918089, "loss": 6.6659, "step": 1833 }, { "epoch": 0.625938566552901, "grad_norm": 3.3029944896698, "learning_rate": 0.000791353811149033, "loss": 6.7029, "step": 1834 }, { "epoch": 0.6262798634812287, "grad_norm": 3.6475093364715576, "learning_rate": 0.0007912400455062572, "loss": 6.6529, "step": 1835 }, { "epoch": 0.6266211604095563, "grad_norm": 3.320028305053711, "learning_rate": 0.0007911262798634813, "loss": 6.3984, "step": 1836 }, { "epoch": 0.6269624573378839, "grad_norm": 3.4347753524780273, "learning_rate": 0.0007910125142207054, "loss": 6.7391, "step": 1837 }, { "epoch": 0.6273037542662117, "grad_norm": 3.7779273986816406, "learning_rate": 0.0007908987485779294, "loss": 6.4521, "step": 1838 }, { "epoch": 0.6276450511945393, "grad_norm": 3.466188907623291, "learning_rate": 0.0007907849829351535, "loss": 6.3165, "step": 1839 }, { "epoch": 0.6279863481228669, "grad_norm": 3.9799110889434814, "learning_rate": 0.0007906712172923777, "loss": 6.4507, "step": 1840 }, { "epoch": 0.6283276450511945, "grad_norm": 3.497555732727051, "learning_rate": 0.0007905574516496018, "loss": 6.819, "step": 1841 }, { "epoch": 0.6286689419795222, "grad_norm": 3.5930631160736084, "learning_rate": 0.0007904436860068259, "loss": 6.5014, "step": 1842 }, { "epoch": 0.6290102389078498, "grad_norm": 3.517838478088379, "learning_rate": 0.00079032992036405, "loss": 6.4191, "step": 1843 }, { "epoch": 0.6293515358361774, "grad_norm": 3.4117469787597656, "learning_rate": 0.0007902161547212741, "loss": 6.6453, "step": 1844 }, { "epoch": 0.6296928327645052, "grad_norm": 3.3217053413391113, "learning_rate": 0.0007901023890784983, "loss": 6.5161, "step": 1845 }, { "epoch": 0.6300341296928328, "grad_norm": 3.3784968852996826, "learning_rate": 0.0007899886234357225, "loss": 6.4929, "step": 1846 }, { "epoch": 0.6303754266211604, "grad_norm": 3.2478907108306885, "learning_rate": 0.0007898748577929466, "loss": 6.4198, "step": 1847 }, { "epoch": 0.630716723549488, "grad_norm": 3.4220783710479736, "learning_rate": 0.0007897610921501707, "loss": 6.9195, "step": 1848 }, { "epoch": 0.6310580204778157, "grad_norm": 3.5167126655578613, "learning_rate": 0.0007896473265073948, "loss": 6.4107, "step": 1849 }, { "epoch": 0.6313993174061433, "grad_norm": 3.388826608657837, "learning_rate": 0.0007895335608646189, "loss": 6.7458, "step": 1850 }, { "epoch": 0.631740614334471, "grad_norm": 3.440612316131592, "learning_rate": 0.000789419795221843, "loss": 6.2142, "step": 1851 }, { "epoch": 0.6320819112627987, "grad_norm": 3.389021873474121, "learning_rate": 0.0007893060295790672, "loss": 6.7827, "step": 1852 }, { "epoch": 0.6324232081911263, "grad_norm": 3.3972480297088623, "learning_rate": 0.0007891922639362913, "loss": 6.7742, "step": 1853 }, { "epoch": 0.6327645051194539, "grad_norm": 4.108426094055176, "learning_rate": 0.0007890784982935154, "loss": 6.5065, "step": 1854 }, { "epoch": 0.6331058020477816, "grad_norm": 3.433762311935425, "learning_rate": 0.0007889647326507395, "loss": 6.8187, "step": 1855 }, { "epoch": 0.6334470989761092, "grad_norm": 4.6445746421813965, "learning_rate": 0.0007888509670079636, "loss": 5.8341, "step": 1856 }, { "epoch": 0.6337883959044368, "grad_norm": 4.046677589416504, "learning_rate": 0.0007887372013651878, "loss": 5.9867, "step": 1857 }, { "epoch": 0.6341296928327645, "grad_norm": 3.4539291858673096, "learning_rate": 0.0007886234357224118, "loss": 6.8548, "step": 1858 }, { "epoch": 0.6344709897610922, "grad_norm": 8.063450813293457, "learning_rate": 0.0007885096700796359, "loss": 7.0799, "step": 1859 }, { "epoch": 0.6348122866894198, "grad_norm": 4.95775032043457, "learning_rate": 0.00078839590443686, "loss": 6.2372, "step": 1860 }, { "epoch": 0.6351535836177474, "grad_norm": 3.3882105350494385, "learning_rate": 0.0007882821387940841, "loss": 6.8213, "step": 1861 }, { "epoch": 0.6354948805460751, "grad_norm": 3.2759015560150146, "learning_rate": 0.0007881683731513083, "loss": 6.717, "step": 1862 }, { "epoch": 0.6358361774744027, "grad_norm": 3.300447463989258, "learning_rate": 0.0007880546075085325, "loss": 6.194, "step": 1863 }, { "epoch": 0.6361774744027304, "grad_norm": 3.613112688064575, "learning_rate": 0.0007879408418657566, "loss": 6.6778, "step": 1864 }, { "epoch": 0.636518771331058, "grad_norm": 3.3438189029693604, "learning_rate": 0.0007878270762229807, "loss": 6.2512, "step": 1865 }, { "epoch": 0.6368600682593857, "grad_norm": 3.257805347442627, "learning_rate": 0.0007877133105802048, "loss": 6.7676, "step": 1866 }, { "epoch": 0.6372013651877133, "grad_norm": 3.257349729537964, "learning_rate": 0.0007875995449374289, "loss": 6.7774, "step": 1867 }, { "epoch": 0.6375426621160409, "grad_norm": 9.708579063415527, "learning_rate": 0.000787485779294653, "loss": 6.7028, "step": 1868 }, { "epoch": 0.6378839590443686, "grad_norm": 3.499809503555298, "learning_rate": 0.0007873720136518772, "loss": 6.5104, "step": 1869 }, { "epoch": 0.6382252559726962, "grad_norm": 3.5874903202056885, "learning_rate": 0.0007872582480091013, "loss": 7.1038, "step": 1870 }, { "epoch": 0.6385665529010239, "grad_norm": 3.483231544494629, "learning_rate": 0.0007871444823663254, "loss": 6.7159, "step": 1871 }, { "epoch": 0.6389078498293516, "grad_norm": 3.406229257583618, "learning_rate": 0.0007870307167235495, "loss": 6.5912, "step": 1872 }, { "epoch": 0.6392491467576792, "grad_norm": 3.2390456199645996, "learning_rate": 0.0007869169510807736, "loss": 6.7661, "step": 1873 }, { "epoch": 0.6395904436860068, "grad_norm": 5.70449161529541, "learning_rate": 0.0007868031854379977, "loss": 5.6305, "step": 1874 }, { "epoch": 0.6399317406143344, "grad_norm": 3.646918535232544, "learning_rate": 0.0007866894197952219, "loss": 6.772, "step": 1875 }, { "epoch": 0.6402730375426621, "grad_norm": 3.5219454765319824, "learning_rate": 0.000786575654152446, "loss": 6.6647, "step": 1876 }, { "epoch": 0.6406143344709898, "grad_norm": 3.4784984588623047, "learning_rate": 0.00078646188850967, "loss": 6.8152, "step": 1877 }, { "epoch": 0.6409556313993174, "grad_norm": 3.503469467163086, "learning_rate": 0.0007863481228668941, "loss": 6.5573, "step": 1878 }, { "epoch": 0.6412969283276451, "grad_norm": 3.3382554054260254, "learning_rate": 0.0007862343572241183, "loss": 7.0561, "step": 1879 }, { "epoch": 0.6416382252559727, "grad_norm": 3.2195024490356445, "learning_rate": 0.0007861205915813425, "loss": 6.5636, "step": 1880 }, { "epoch": 0.6419795221843003, "grad_norm": 3.3817391395568848, "learning_rate": 0.0007860068259385666, "loss": 6.0123, "step": 1881 }, { "epoch": 0.642320819112628, "grad_norm": 5.903642177581787, "learning_rate": 0.0007858930602957907, "loss": 5.0916, "step": 1882 }, { "epoch": 0.6426621160409556, "grad_norm": 3.8153889179229736, "learning_rate": 0.0007857792946530148, "loss": 6.7254, "step": 1883 }, { "epoch": 0.6430034129692833, "grad_norm": 3.6918563842773438, "learning_rate": 0.0007856655290102389, "loss": 6.8791, "step": 1884 }, { "epoch": 0.643344709897611, "grad_norm": 3.476710796356201, "learning_rate": 0.000785551763367463, "loss": 6.2294, "step": 1885 }, { "epoch": 0.6436860068259386, "grad_norm": 3.537196397781372, "learning_rate": 0.0007854379977246872, "loss": 6.5357, "step": 1886 }, { "epoch": 0.6440273037542662, "grad_norm": 4.697029113769531, "learning_rate": 0.0007853242320819113, "loss": 6.3114, "step": 1887 }, { "epoch": 0.6443686006825938, "grad_norm": 3.5125951766967773, "learning_rate": 0.0007852104664391354, "loss": 6.6103, "step": 1888 }, { "epoch": 0.6447098976109215, "grad_norm": 3.5048978328704834, "learning_rate": 0.0007850967007963595, "loss": 7.0019, "step": 1889 }, { "epoch": 0.6450511945392492, "grad_norm": 3.2914698123931885, "learning_rate": 0.0007849829351535836, "loss": 6.8293, "step": 1890 }, { "epoch": 0.6453924914675768, "grad_norm": 5.254654407501221, "learning_rate": 0.0007848691695108077, "loss": 6.2791, "step": 1891 }, { "epoch": 0.6457337883959045, "grad_norm": 3.4875900745391846, "learning_rate": 0.0007847554038680319, "loss": 7.0699, "step": 1892 }, { "epoch": 0.6460750853242321, "grad_norm": 7.53113317489624, "learning_rate": 0.000784641638225256, "loss": 5.7548, "step": 1893 }, { "epoch": 0.6464163822525597, "grad_norm": 3.497556209564209, "learning_rate": 0.0007845278725824802, "loss": 6.2634, "step": 1894 }, { "epoch": 0.6467576791808873, "grad_norm": 5.653488636016846, "learning_rate": 0.0007844141069397043, "loss": 5.9736, "step": 1895 }, { "epoch": 0.647098976109215, "grad_norm": 3.3323814868927, "learning_rate": 0.0007843003412969284, "loss": 6.4086, "step": 1896 }, { "epoch": 0.6474402730375427, "grad_norm": 5.373404026031494, "learning_rate": 0.0007841865756541524, "loss": 6.2502, "step": 1897 }, { "epoch": 0.6477815699658703, "grad_norm": 4.002843379974365, "learning_rate": 0.0007840728100113766, "loss": 6.3939, "step": 1898 }, { "epoch": 0.648122866894198, "grad_norm": 3.4836678504943848, "learning_rate": 0.0007839590443686007, "loss": 6.5778, "step": 1899 }, { "epoch": 0.6484641638225256, "grad_norm": 5.268570899963379, "learning_rate": 0.0007838452787258248, "loss": 6.1417, "step": 1900 }, { "epoch": 0.6488054607508532, "grad_norm": 3.3893330097198486, "learning_rate": 0.0007837315130830489, "loss": 6.579, "step": 1901 }, { "epoch": 0.6491467576791808, "grad_norm": 3.871875762939453, "learning_rate": 0.000783617747440273, "loss": 6.7902, "step": 1902 }, { "epoch": 0.6494880546075086, "grad_norm": 3.5524959564208984, "learning_rate": 0.0007835039817974972, "loss": 6.6698, "step": 1903 }, { "epoch": 0.6498293515358362, "grad_norm": 3.273620128631592, "learning_rate": 0.0007833902161547213, "loss": 6.2347, "step": 1904 }, { "epoch": 0.6501706484641638, "grad_norm": 3.283205986022949, "learning_rate": 0.0007832764505119454, "loss": 5.978, "step": 1905 }, { "epoch": 0.6505119453924915, "grad_norm": 3.333651304244995, "learning_rate": 0.0007831626848691695, "loss": 6.5876, "step": 1906 }, { "epoch": 0.6508532423208191, "grad_norm": 6.1999030113220215, "learning_rate": 0.0007830489192263936, "loss": 6.6631, "step": 1907 }, { "epoch": 0.6511945392491467, "grad_norm": 3.410543203353882, "learning_rate": 0.0007829351535836177, "loss": 6.5243, "step": 1908 }, { "epoch": 0.6515358361774743, "grad_norm": 4.422999858856201, "learning_rate": 0.0007828213879408419, "loss": 6.4633, "step": 1909 }, { "epoch": 0.6518771331058021, "grad_norm": 3.5655972957611084, "learning_rate": 0.000782707622298066, "loss": 6.524, "step": 1910 }, { "epoch": 0.6522184300341297, "grad_norm": 3.3885929584503174, "learning_rate": 0.0007825938566552902, "loss": 6.7883, "step": 1911 }, { "epoch": 0.6525597269624573, "grad_norm": 3.3488664627075195, "learning_rate": 0.0007824800910125143, "loss": 6.917, "step": 1912 }, { "epoch": 0.652901023890785, "grad_norm": 3.5068624019622803, "learning_rate": 0.0007823663253697384, "loss": 6.6262, "step": 1913 }, { "epoch": 0.6532423208191126, "grad_norm": 3.233506202697754, "learning_rate": 0.0007822525597269625, "loss": 6.6697, "step": 1914 }, { "epoch": 0.6535836177474402, "grad_norm": 3.320382833480835, "learning_rate": 0.0007821387940841867, "loss": 6.9184, "step": 1915 }, { "epoch": 0.653924914675768, "grad_norm": 3.2479734420776367, "learning_rate": 0.0007820250284414107, "loss": 6.8682, "step": 1916 }, { "epoch": 0.6542662116040956, "grad_norm": 3.2496206760406494, "learning_rate": 0.0007819112627986348, "loss": 6.945, "step": 1917 }, { "epoch": 0.6546075085324232, "grad_norm": 3.3230035305023193, "learning_rate": 0.0007817974971558589, "loss": 6.6453, "step": 1918 }, { "epoch": 0.6549488054607508, "grad_norm": 4.4280619621276855, "learning_rate": 0.000781683731513083, "loss": 5.9717, "step": 1919 }, { "epoch": 0.6552901023890785, "grad_norm": 3.414978265762329, "learning_rate": 0.0007815699658703072, "loss": 7.0985, "step": 1920 }, { "epoch": 0.6556313993174061, "grad_norm": 3.3314380645751953, "learning_rate": 0.0007814562002275313, "loss": 6.9524, "step": 1921 }, { "epoch": 0.6559726962457337, "grad_norm": 3.4111015796661377, "learning_rate": 0.0007813424345847554, "loss": 6.455, "step": 1922 }, { "epoch": 0.6563139931740615, "grad_norm": 3.4409379959106445, "learning_rate": 0.0007812286689419795, "loss": 6.6308, "step": 1923 }, { "epoch": 0.6566552901023891, "grad_norm": 3.246084451675415, "learning_rate": 0.0007811149032992036, "loss": 6.8811, "step": 1924 }, { "epoch": 0.6569965870307167, "grad_norm": 3.421894073486328, "learning_rate": 0.0007810011376564277, "loss": 6.9111, "step": 1925 }, { "epoch": 0.6573378839590444, "grad_norm": 3.2279820442199707, "learning_rate": 0.0007808873720136519, "loss": 6.8361, "step": 1926 }, { "epoch": 0.657679180887372, "grad_norm": 3.337752103805542, "learning_rate": 0.000780773606370876, "loss": 7.0658, "step": 1927 }, { "epoch": 0.6580204778156996, "grad_norm": 3.4158434867858887, "learning_rate": 0.0007806598407281002, "loss": 6.6466, "step": 1928 }, { "epoch": 0.6583617747440274, "grad_norm": 3.2831523418426514, "learning_rate": 0.0007805460750853243, "loss": 6.6213, "step": 1929 }, { "epoch": 0.658703071672355, "grad_norm": 3.8880624771118164, "learning_rate": 0.0007804323094425484, "loss": 6.2975, "step": 1930 }, { "epoch": 0.6590443686006826, "grad_norm": 3.35299015045166, "learning_rate": 0.0007803185437997725, "loss": 7.1754, "step": 1931 }, { "epoch": 0.6593856655290102, "grad_norm": 3.4394142627716064, "learning_rate": 0.0007802047781569967, "loss": 6.6023, "step": 1932 }, { "epoch": 0.6597269624573379, "grad_norm": 3.386638641357422, "learning_rate": 0.0007800910125142208, "loss": 6.9677, "step": 1933 }, { "epoch": 0.6600682593856655, "grad_norm": 3.344113826751709, "learning_rate": 0.0007799772468714449, "loss": 6.9545, "step": 1934 }, { "epoch": 0.6604095563139932, "grad_norm": 3.3587405681610107, "learning_rate": 0.0007798634812286689, "loss": 7.0659, "step": 1935 }, { "epoch": 0.6607508532423209, "grad_norm": 3.9267964363098145, "learning_rate": 0.000779749715585893, "loss": 4.5715, "step": 1936 }, { "epoch": 0.6610921501706485, "grad_norm": 3.7344627380371094, "learning_rate": 0.0007796359499431171, "loss": 6.0021, "step": 1937 }, { "epoch": 0.6614334470989761, "grad_norm": 3.6474874019622803, "learning_rate": 0.0007795221843003413, "loss": 6.2885, "step": 1938 }, { "epoch": 0.6617747440273037, "grad_norm": 6.553812026977539, "learning_rate": 0.0007794084186575654, "loss": 5.7511, "step": 1939 }, { "epoch": 0.6621160409556314, "grad_norm": 3.5463573932647705, "learning_rate": 0.0007792946530147895, "loss": 6.4974, "step": 1940 }, { "epoch": 0.662457337883959, "grad_norm": 3.536761999130249, "learning_rate": 0.0007791808873720136, "loss": 6.4656, "step": 1941 }, { "epoch": 0.6627986348122867, "grad_norm": 3.480790615081787, "learning_rate": 0.0007790671217292377, "loss": 5.9657, "step": 1942 }, { "epoch": 0.6631399317406144, "grad_norm": 3.640864849090576, "learning_rate": 0.000778953356086462, "loss": 7.0483, "step": 1943 }, { "epoch": 0.663481228668942, "grad_norm": 3.7452423572540283, "learning_rate": 0.000778839590443686, "loss": 6.3805, "step": 1944 }, { "epoch": 0.6638225255972696, "grad_norm": 3.4025330543518066, "learning_rate": 0.0007787258248009102, "loss": 6.5517, "step": 1945 }, { "epoch": 0.6641638225255972, "grad_norm": 3.4373252391815186, "learning_rate": 0.0007786120591581343, "loss": 6.8531, "step": 1946 }, { "epoch": 0.6645051194539249, "grad_norm": 3.367783546447754, "learning_rate": 0.0007784982935153584, "loss": 6.8904, "step": 1947 }, { "epoch": 0.6648464163822526, "grad_norm": 3.365324020385742, "learning_rate": 0.0007783845278725825, "loss": 6.4844, "step": 1948 }, { "epoch": 0.6651877133105802, "grad_norm": 3.2752928733825684, "learning_rate": 0.0007782707622298067, "loss": 6.9888, "step": 1949 }, { "epoch": 0.6655290102389079, "grad_norm": 3.411865711212158, "learning_rate": 0.0007781569965870308, "loss": 6.9369, "step": 1950 }, { "epoch": 0.6658703071672355, "grad_norm": 3.8111231327056885, "learning_rate": 0.0007780432309442549, "loss": 5.9984, "step": 1951 }, { "epoch": 0.6662116040955631, "grad_norm": 3.5093679428100586, "learning_rate": 0.000777929465301479, "loss": 7.1369, "step": 1952 }, { "epoch": 0.6665529010238908, "grad_norm": 8.49619197845459, "learning_rate": 0.0007778156996587031, "loss": 6.2056, "step": 1953 }, { "epoch": 0.6668941979522184, "grad_norm": 3.572247266769409, "learning_rate": 0.0007777019340159272, "loss": 7.0313, "step": 1954 }, { "epoch": 0.6672354948805461, "grad_norm": 6.142834663391113, "learning_rate": 0.0007775881683731513, "loss": 4.7668, "step": 1955 }, { "epoch": 0.6675767918088737, "grad_norm": 3.7518608570098877, "learning_rate": 0.0007774744027303754, "loss": 7.0454, "step": 1956 }, { "epoch": 0.6679180887372014, "grad_norm": 3.596379280090332, "learning_rate": 0.0007773606370875995, "loss": 6.8423, "step": 1957 }, { "epoch": 0.668259385665529, "grad_norm": 3.454772710800171, "learning_rate": 0.0007772468714448236, "loss": 6.3305, "step": 1958 }, { "epoch": 0.6686006825938566, "grad_norm": 3.3043911457061768, "learning_rate": 0.0007771331058020477, "loss": 6.575, "step": 1959 }, { "epoch": 0.6689419795221843, "grad_norm": 3.314772844314575, "learning_rate": 0.0007770193401592718, "loss": 6.6896, "step": 1960 }, { "epoch": 0.669283276450512, "grad_norm": 3.414109230041504, "learning_rate": 0.000776905574516496, "loss": 6.3657, "step": 1961 }, { "epoch": 0.6696245733788396, "grad_norm": 8.866144180297852, "learning_rate": 0.0007767918088737202, "loss": 6.4915, "step": 1962 }, { "epoch": 0.6699658703071673, "grad_norm": 5.978751182556152, "learning_rate": 0.0007766780432309443, "loss": 4.808, "step": 1963 }, { "epoch": 0.6703071672354949, "grad_norm": 4.224075794219971, "learning_rate": 0.0007765642775881684, "loss": 6.2096, "step": 1964 }, { "epoch": 0.6706484641638225, "grad_norm": 3.7597286701202393, "learning_rate": 0.0007764505119453925, "loss": 6.6962, "step": 1965 }, { "epoch": 0.6709897610921501, "grad_norm": 3.6143290996551514, "learning_rate": 0.0007763367463026167, "loss": 6.2249, "step": 1966 }, { "epoch": 0.6713310580204778, "grad_norm": 3.733955144882202, "learning_rate": 0.0007762229806598408, "loss": 6.1922, "step": 1967 }, { "epoch": 0.6716723549488055, "grad_norm": 3.268876791000366, "learning_rate": 0.0007761092150170649, "loss": 6.6224, "step": 1968 }, { "epoch": 0.6720136518771331, "grad_norm": 3.7937519550323486, "learning_rate": 0.000775995449374289, "loss": 6.1537, "step": 1969 }, { "epoch": 0.6723549488054608, "grad_norm": 3.96634578704834, "learning_rate": 0.0007758816837315131, "loss": 5.2715, "step": 1970 }, { "epoch": 0.6726962457337884, "grad_norm": 3.32902455329895, "learning_rate": 0.0007757679180887372, "loss": 7.1139, "step": 1971 }, { "epoch": 0.673037542662116, "grad_norm": 3.3868730068206787, "learning_rate": 0.0007756541524459614, "loss": 7.0923, "step": 1972 }, { "epoch": 0.6733788395904436, "grad_norm": 3.4353487491607666, "learning_rate": 0.0007755403868031855, "loss": 6.6146, "step": 1973 }, { "epoch": 0.6737201365187714, "grad_norm": 4.369273662567139, "learning_rate": 0.0007754266211604095, "loss": 6.2917, "step": 1974 }, { "epoch": 0.674061433447099, "grad_norm": 3.72794508934021, "learning_rate": 0.0007753128555176336, "loss": 6.1588, "step": 1975 }, { "epoch": 0.6744027303754266, "grad_norm": 3.5048928260803223, "learning_rate": 0.0007751990898748577, "loss": 6.603, "step": 1976 }, { "epoch": 0.6747440273037543, "grad_norm": 4.048648834228516, "learning_rate": 0.0007750853242320818, "loss": 5.5782, "step": 1977 }, { "epoch": 0.6750853242320819, "grad_norm": 3.9075467586517334, "learning_rate": 0.000774971558589306, "loss": 5.8681, "step": 1978 }, { "epoch": 0.6754266211604095, "grad_norm": 3.3288211822509766, "learning_rate": 0.0007748577929465302, "loss": 6.6653, "step": 1979 }, { "epoch": 0.6757679180887372, "grad_norm": 3.535240650177002, "learning_rate": 0.0007747440273037543, "loss": 6.4881, "step": 1980 }, { "epoch": 0.6761092150170649, "grad_norm": 3.2548129558563232, "learning_rate": 0.0007746302616609784, "loss": 6.792, "step": 1981 }, { "epoch": 0.6764505119453925, "grad_norm": 3.5258853435516357, "learning_rate": 0.0007745164960182025, "loss": 6.5358, "step": 1982 }, { "epoch": 0.6767918088737201, "grad_norm": 5.013880729675293, "learning_rate": 0.0007744027303754267, "loss": 6.1146, "step": 1983 }, { "epoch": 0.6771331058020478, "grad_norm": 3.550227165222168, "learning_rate": 0.0007742889647326508, "loss": 6.9435, "step": 1984 }, { "epoch": 0.6774744027303754, "grad_norm": 3.3671066761016846, "learning_rate": 0.0007741751990898749, "loss": 6.4325, "step": 1985 }, { "epoch": 0.677815699658703, "grad_norm": 4.051577091217041, "learning_rate": 0.000774061433447099, "loss": 6.3684, "step": 1986 }, { "epoch": 0.6781569965870308, "grad_norm": 3.442668914794922, "learning_rate": 0.0007739476678043231, "loss": 6.7317, "step": 1987 }, { "epoch": 0.6784982935153584, "grad_norm": 3.2804269790649414, "learning_rate": 0.0007738339021615472, "loss": 6.6793, "step": 1988 }, { "epoch": 0.678839590443686, "grad_norm": 4.920018196105957, "learning_rate": 0.0007737201365187714, "loss": 5.9878, "step": 1989 }, { "epoch": 0.6791808873720137, "grad_norm": 3.5679967403411865, "learning_rate": 0.0007736063708759955, "loss": 6.9321, "step": 1990 }, { "epoch": 0.6795221843003413, "grad_norm": 3.628213405609131, "learning_rate": 0.0007734926052332196, "loss": 6.5297, "step": 1991 }, { "epoch": 0.6798634812286689, "grad_norm": 3.5422585010528564, "learning_rate": 0.0007733788395904437, "loss": 6.1211, "step": 1992 }, { "epoch": 0.6802047781569965, "grad_norm": 3.3573875427246094, "learning_rate": 0.0007732650739476678, "loss": 7.0201, "step": 1993 }, { "epoch": 0.6805460750853243, "grad_norm": 3.2984368801116943, "learning_rate": 0.0007731513083048918, "loss": 6.5626, "step": 1994 }, { "epoch": 0.6808873720136519, "grad_norm": 3.394038200378418, "learning_rate": 0.000773037542662116, "loss": 6.9126, "step": 1995 }, { "epoch": 0.6812286689419795, "grad_norm": 3.2731823921203613, "learning_rate": 0.0007729237770193402, "loss": 6.8367, "step": 1996 }, { "epoch": 0.6815699658703072, "grad_norm": 3.3242337703704834, "learning_rate": 0.0007728100113765643, "loss": 6.6835, "step": 1997 }, { "epoch": 0.6819112627986348, "grad_norm": 3.444890260696411, "learning_rate": 0.0007726962457337884, "loss": 6.7718, "step": 1998 }, { "epoch": 0.6822525597269624, "grad_norm": 3.4120771884918213, "learning_rate": 0.0007725824800910125, "loss": 6.6572, "step": 1999 }, { "epoch": 0.6825938566552902, "grad_norm": 4.183150768280029, "learning_rate": 0.0007724687144482366, "loss": 6.0941, "step": 2000 }, { "epoch": 0.6829351535836178, "grad_norm": 3.6355671882629395, "learning_rate": 0.0007723549488054608, "loss": 5.8741, "step": 2001 }, { "epoch": 0.6832764505119454, "grad_norm": 6.971808433532715, "learning_rate": 0.0007722411831626849, "loss": 4.9892, "step": 2002 }, { "epoch": 0.683617747440273, "grad_norm": 4.916726589202881, "learning_rate": 0.000772127417519909, "loss": 6.2013, "step": 2003 }, { "epoch": 0.6839590443686007, "grad_norm": 3.724529981613159, "learning_rate": 0.0007720136518771331, "loss": 6.086, "step": 2004 }, { "epoch": 0.6843003412969283, "grad_norm": 3.7401139736175537, "learning_rate": 0.0007718998862343572, "loss": 6.875, "step": 2005 }, { "epoch": 0.6846416382252559, "grad_norm": 3.41847562789917, "learning_rate": 0.0007717861205915814, "loss": 6.9408, "step": 2006 }, { "epoch": 0.6849829351535837, "grad_norm": 3.425471782684326, "learning_rate": 0.0007716723549488055, "loss": 6.7579, "step": 2007 }, { "epoch": 0.6853242320819113, "grad_norm": 3.197573661804199, "learning_rate": 0.0007715585893060296, "loss": 6.6664, "step": 2008 }, { "epoch": 0.6856655290102389, "grad_norm": 3.4145679473876953, "learning_rate": 0.0007714448236632537, "loss": 6.515, "step": 2009 }, { "epoch": 0.6860068259385665, "grad_norm": 3.9534404277801514, "learning_rate": 0.0007713310580204778, "loss": 6.2037, "step": 2010 }, { "epoch": 0.6863481228668942, "grad_norm": 3.274080514907837, "learning_rate": 0.000771217292377702, "loss": 6.4559, "step": 2011 }, { "epoch": 0.6866894197952218, "grad_norm": 3.604434013366699, "learning_rate": 0.0007711035267349262, "loss": 7.1227, "step": 2012 }, { "epoch": 0.6870307167235495, "grad_norm": 3.635999917984009, "learning_rate": 0.0007709897610921502, "loss": 6.5486, "step": 2013 }, { "epoch": 0.6873720136518772, "grad_norm": 3.22017765045166, "learning_rate": 0.0007708759954493743, "loss": 6.9555, "step": 2014 }, { "epoch": 0.6877133105802048, "grad_norm": 3.5242180824279785, "learning_rate": 0.0007707622298065984, "loss": 6.6827, "step": 2015 }, { "epoch": 0.6880546075085324, "grad_norm": 3.7955944538116455, "learning_rate": 0.0007706484641638225, "loss": 5.7521, "step": 2016 }, { "epoch": 0.68839590443686, "grad_norm": 3.422865629196167, "learning_rate": 0.0007705346985210466, "loss": 6.7181, "step": 2017 }, { "epoch": 0.6887372013651877, "grad_norm": 3.7344348430633545, "learning_rate": 0.0007704209328782708, "loss": 6.6848, "step": 2018 }, { "epoch": 0.6890784982935153, "grad_norm": 3.6247267723083496, "learning_rate": 0.0007703071672354949, "loss": 6.3779, "step": 2019 }, { "epoch": 0.689419795221843, "grad_norm": 3.669395923614502, "learning_rate": 0.000770193401592719, "loss": 6.2553, "step": 2020 }, { "epoch": 0.6897610921501707, "grad_norm": 3.4517054557800293, "learning_rate": 0.0007700796359499431, "loss": 6.3794, "step": 2021 }, { "epoch": 0.6901023890784983, "grad_norm": 3.477140188217163, "learning_rate": 0.0007699658703071672, "loss": 7.0087, "step": 2022 }, { "epoch": 0.6904436860068259, "grad_norm": 3.354229688644409, "learning_rate": 0.0007698521046643914, "loss": 6.3737, "step": 2023 }, { "epoch": 0.6907849829351536, "grad_norm": 3.4395453929901123, "learning_rate": 0.0007697383390216155, "loss": 6.6592, "step": 2024 }, { "epoch": 0.6911262798634812, "grad_norm": 3.5157663822174072, "learning_rate": 0.0007696245733788396, "loss": 6.6477, "step": 2025 }, { "epoch": 0.6914675767918089, "grad_norm": 3.3607208728790283, "learning_rate": 0.0007695108077360637, "loss": 6.3353, "step": 2026 }, { "epoch": 0.6918088737201366, "grad_norm": 5.396403789520264, "learning_rate": 0.0007693970420932878, "loss": 6.0246, "step": 2027 }, { "epoch": 0.6921501706484642, "grad_norm": 3.4905757904052734, "learning_rate": 0.000769283276450512, "loss": 6.1745, "step": 2028 }, { "epoch": 0.6924914675767918, "grad_norm": 3.5405080318450928, "learning_rate": 0.0007691695108077362, "loss": 6.5764, "step": 2029 }, { "epoch": 0.6928327645051194, "grad_norm": 4.243968963623047, "learning_rate": 0.0007690557451649603, "loss": 6.2635, "step": 2030 }, { "epoch": 0.6931740614334471, "grad_norm": 6.531406402587891, "learning_rate": 0.0007689419795221844, "loss": 5.7165, "step": 2031 }, { "epoch": 0.6935153583617747, "grad_norm": 3.5809173583984375, "learning_rate": 0.0007688282138794085, "loss": 6.8084, "step": 2032 }, { "epoch": 0.6938566552901024, "grad_norm": 3.7270376682281494, "learning_rate": 0.0007687144482366325, "loss": 6.7852, "step": 2033 }, { "epoch": 0.6941979522184301, "grad_norm": 3.384864330291748, "learning_rate": 0.0007686006825938566, "loss": 6.6244, "step": 2034 }, { "epoch": 0.6945392491467577, "grad_norm": 4.850109100341797, "learning_rate": 0.0007684869169510808, "loss": 6.7117, "step": 2035 }, { "epoch": 0.6948805460750853, "grad_norm": 6.860106468200684, "learning_rate": 0.0007683731513083049, "loss": 6.1666, "step": 2036 }, { "epoch": 0.6952218430034129, "grad_norm": 3.350128412246704, "learning_rate": 0.000768259385665529, "loss": 6.9472, "step": 2037 }, { "epoch": 0.6955631399317406, "grad_norm": 3.3778512477874756, "learning_rate": 0.0007681456200227531, "loss": 6.8923, "step": 2038 }, { "epoch": 0.6959044368600683, "grad_norm": 3.257622003555298, "learning_rate": 0.0007680318543799772, "loss": 6.6449, "step": 2039 }, { "epoch": 0.6962457337883959, "grad_norm": 3.383254051208496, "learning_rate": 0.0007679180887372013, "loss": 5.932, "step": 2040 }, { "epoch": 0.6965870307167236, "grad_norm": 3.2423672676086426, "learning_rate": 0.0007678043230944255, "loss": 6.2983, "step": 2041 }, { "epoch": 0.6969283276450512, "grad_norm": 3.3049263954162598, "learning_rate": 0.0007676905574516496, "loss": 6.4907, "step": 2042 }, { "epoch": 0.6972696245733788, "grad_norm": 3.3618760108947754, "learning_rate": 0.0007675767918088737, "loss": 6.322, "step": 2043 }, { "epoch": 0.6976109215017064, "grad_norm": 3.2208092212677, "learning_rate": 0.0007674630261660978, "loss": 6.8044, "step": 2044 }, { "epoch": 0.6979522184300341, "grad_norm": 3.3235998153686523, "learning_rate": 0.000767349260523322, "loss": 5.9484, "step": 2045 }, { "epoch": 0.6982935153583618, "grad_norm": 3.5273094177246094, "learning_rate": 0.0007672354948805462, "loss": 6.9184, "step": 2046 }, { "epoch": 0.6986348122866894, "grad_norm": 3.4016122817993164, "learning_rate": 0.0007671217292377703, "loss": 6.5703, "step": 2047 }, { "epoch": 0.6989761092150171, "grad_norm": 5.7711405754089355, "learning_rate": 0.0007670079635949944, "loss": 5.5968, "step": 2048 }, { "epoch": 0.6993174061433447, "grad_norm": 7.544925689697266, "learning_rate": 0.0007668941979522185, "loss": 6.2352, "step": 2049 }, { "epoch": 0.6996587030716723, "grad_norm": 3.5243000984191895, "learning_rate": 0.0007667804323094426, "loss": 6.3483, "step": 2050 }, { "epoch": 0.7, "grad_norm": 3.510413885116577, "learning_rate": 0.0007666666666666667, "loss": 6.7339, "step": 2051 }, { "epoch": 0.7003412969283277, "grad_norm": 3.272040843963623, "learning_rate": 0.0007665529010238908, "loss": 6.3808, "step": 2052 }, { "epoch": 0.7006825938566553, "grad_norm": 3.230138063430786, "learning_rate": 0.0007664391353811149, "loss": 6.4907, "step": 2053 }, { "epoch": 0.701023890784983, "grad_norm": 3.2666988372802734, "learning_rate": 0.000766325369738339, "loss": 6.5341, "step": 2054 }, { "epoch": 0.7013651877133106, "grad_norm": 3.246366262435913, "learning_rate": 0.0007662116040955631, "loss": 6.4526, "step": 2055 }, { "epoch": 0.7017064846416382, "grad_norm": 3.516317844390869, "learning_rate": 0.0007660978384527872, "loss": 7.0432, "step": 2056 }, { "epoch": 0.7020477815699658, "grad_norm": 3.530691146850586, "learning_rate": 0.0007659840728100113, "loss": 6.7314, "step": 2057 }, { "epoch": 0.7023890784982935, "grad_norm": 3.3396496772766113, "learning_rate": 0.0007658703071672355, "loss": 6.9605, "step": 2058 }, { "epoch": 0.7027303754266212, "grad_norm": 3.306985378265381, "learning_rate": 0.0007657565415244596, "loss": 6.488, "step": 2059 }, { "epoch": 0.7030716723549488, "grad_norm": 3.3087706565856934, "learning_rate": 0.0007656427758816837, "loss": 5.9937, "step": 2060 }, { "epoch": 0.7034129692832765, "grad_norm": 3.159797191619873, "learning_rate": 0.0007655290102389078, "loss": 6.5233, "step": 2061 }, { "epoch": 0.7037542662116041, "grad_norm": 3.4056601524353027, "learning_rate": 0.000765415244596132, "loss": 6.7224, "step": 2062 }, { "epoch": 0.7040955631399317, "grad_norm": 3.676870822906494, "learning_rate": 0.0007653014789533561, "loss": 6.3446, "step": 2063 }, { "epoch": 0.7044368600682593, "grad_norm": 3.3793466091156006, "learning_rate": 0.0007651877133105803, "loss": 6.8083, "step": 2064 }, { "epoch": 0.7047781569965871, "grad_norm": 3.507800340652466, "learning_rate": 0.0007650739476678044, "loss": 7.0639, "step": 2065 }, { "epoch": 0.7051194539249147, "grad_norm": 6.222537994384766, "learning_rate": 0.0007649601820250285, "loss": 6.6979, "step": 2066 }, { "epoch": 0.7054607508532423, "grad_norm": 3.477949380874634, "learning_rate": 0.0007648464163822526, "loss": 6.6638, "step": 2067 }, { "epoch": 0.70580204778157, "grad_norm": 3.4923906326293945, "learning_rate": 0.0007647326507394767, "loss": 6.8871, "step": 2068 }, { "epoch": 0.7061433447098976, "grad_norm": 3.818601608276367, "learning_rate": 0.0007646188850967009, "loss": 6.0797, "step": 2069 }, { "epoch": 0.7064846416382252, "grad_norm": 3.1661059856414795, "learning_rate": 0.000764505119453925, "loss": 6.2969, "step": 2070 }, { "epoch": 0.7068259385665528, "grad_norm": 3.237128496170044, "learning_rate": 0.0007643913538111491, "loss": 6.4024, "step": 2071 }, { "epoch": 0.7071672354948806, "grad_norm": 3.7169055938720703, "learning_rate": 0.0007642775881683731, "loss": 5.803, "step": 2072 }, { "epoch": 0.7075085324232082, "grad_norm": 3.3643290996551514, "learning_rate": 0.0007641638225255972, "loss": 6.5204, "step": 2073 }, { "epoch": 0.7078498293515358, "grad_norm": 3.5020577907562256, "learning_rate": 0.0007640500568828213, "loss": 6.4471, "step": 2074 }, { "epoch": 0.7081911262798635, "grad_norm": 3.174459934234619, "learning_rate": 0.0007639362912400455, "loss": 6.4608, "step": 2075 }, { "epoch": 0.7085324232081911, "grad_norm": 3.408348321914673, "learning_rate": 0.0007638225255972696, "loss": 6.2846, "step": 2076 }, { "epoch": 0.7088737201365187, "grad_norm": 3.218968152999878, "learning_rate": 0.0007637087599544937, "loss": 6.5383, "step": 2077 }, { "epoch": 0.7092150170648465, "grad_norm": 3.2198712825775146, "learning_rate": 0.0007635949943117178, "loss": 6.9809, "step": 2078 }, { "epoch": 0.7095563139931741, "grad_norm": 3.364302635192871, "learning_rate": 0.000763481228668942, "loss": 6.0809, "step": 2079 }, { "epoch": 0.7098976109215017, "grad_norm": 3.22308611869812, "learning_rate": 0.0007633674630261661, "loss": 6.7824, "step": 2080 }, { "epoch": 0.7102389078498293, "grad_norm": 3.4846742153167725, "learning_rate": 0.0007632536973833903, "loss": 6.8048, "step": 2081 }, { "epoch": 0.710580204778157, "grad_norm": 3.32481050491333, "learning_rate": 0.0007631399317406144, "loss": 6.4347, "step": 2082 }, { "epoch": 0.7109215017064846, "grad_norm": 3.340768814086914, "learning_rate": 0.0007630261660978385, "loss": 6.7707, "step": 2083 }, { "epoch": 0.7112627986348122, "grad_norm": 5.2757086753845215, "learning_rate": 0.0007629124004550626, "loss": 5.6813, "step": 2084 }, { "epoch": 0.71160409556314, "grad_norm": 3.4196841716766357, "learning_rate": 0.0007627986348122867, "loss": 6.8647, "step": 2085 }, { "epoch": 0.7119453924914676, "grad_norm": 3.5735483169555664, "learning_rate": 0.0007626848691695109, "loss": 6.4418, "step": 2086 }, { "epoch": 0.7122866894197952, "grad_norm": 3.366319179534912, "learning_rate": 0.000762571103526735, "loss": 6.5476, "step": 2087 }, { "epoch": 0.7126279863481229, "grad_norm": 3.2403366565704346, "learning_rate": 0.0007624573378839591, "loss": 6.5685, "step": 2088 }, { "epoch": 0.7129692832764505, "grad_norm": 3.317497730255127, "learning_rate": 0.0007623435722411832, "loss": 6.5655, "step": 2089 }, { "epoch": 0.7133105802047781, "grad_norm": 3.3788607120513916, "learning_rate": 0.0007622298065984073, "loss": 6.5866, "step": 2090 }, { "epoch": 0.7136518771331058, "grad_norm": 7.484004020690918, "learning_rate": 0.0007621160409556313, "loss": 5.0795, "step": 2091 }, { "epoch": 0.7139931740614335, "grad_norm": 3.5781850814819336, "learning_rate": 0.0007620022753128555, "loss": 6.0668, "step": 2092 }, { "epoch": 0.7143344709897611, "grad_norm": 3.5423996448516846, "learning_rate": 0.0007618885096700796, "loss": 6.4591, "step": 2093 }, { "epoch": 0.7146757679180887, "grad_norm": 3.2912189960479736, "learning_rate": 0.0007617747440273037, "loss": 6.3944, "step": 2094 }, { "epoch": 0.7150170648464164, "grad_norm": 3.3964452743530273, "learning_rate": 0.0007616609783845278, "loss": 6.0996, "step": 2095 }, { "epoch": 0.715358361774744, "grad_norm": 3.4383544921875, "learning_rate": 0.000761547212741752, "loss": 6.8264, "step": 2096 }, { "epoch": 0.7156996587030716, "grad_norm": 3.80399227142334, "learning_rate": 0.0007614334470989761, "loss": 5.6848, "step": 2097 }, { "epoch": 0.7160409556313994, "grad_norm": 3.4346396923065186, "learning_rate": 0.0007613196814562003, "loss": 6.3825, "step": 2098 }, { "epoch": 0.716382252559727, "grad_norm": 4.535429000854492, "learning_rate": 0.0007612059158134244, "loss": 5.9877, "step": 2099 }, { "epoch": 0.7167235494880546, "grad_norm": 4.623420238494873, "learning_rate": 0.0007610921501706485, "loss": 6.6088, "step": 2100 }, { "epoch": 0.7170648464163822, "grad_norm": 3.520456552505493, "learning_rate": 0.0007609783845278726, "loss": 6.8445, "step": 2101 }, { "epoch": 0.7174061433447099, "grad_norm": 3.4850854873657227, "learning_rate": 0.0007608646188850967, "loss": 6.5181, "step": 2102 }, { "epoch": 0.7177474402730375, "grad_norm": 3.35722279548645, "learning_rate": 0.0007607508532423208, "loss": 7.1329, "step": 2103 }, { "epoch": 0.7180887372013652, "grad_norm": 3.2173945903778076, "learning_rate": 0.000760637087599545, "loss": 6.9774, "step": 2104 }, { "epoch": 0.7184300341296929, "grad_norm": 3.337705373764038, "learning_rate": 0.0007605233219567691, "loss": 7.1209, "step": 2105 }, { "epoch": 0.7187713310580205, "grad_norm": 3.361863136291504, "learning_rate": 0.0007604095563139932, "loss": 6.4386, "step": 2106 }, { "epoch": 0.7191126279863481, "grad_norm": 3.371455669403076, "learning_rate": 0.0007602957906712173, "loss": 7.0502, "step": 2107 }, { "epoch": 0.7194539249146757, "grad_norm": 3.6178102493286133, "learning_rate": 0.0007601820250284414, "loss": 6.33, "step": 2108 }, { "epoch": 0.7197952218430034, "grad_norm": 3.337318181991577, "learning_rate": 0.0007600682593856656, "loss": 7.0613, "step": 2109 }, { "epoch": 0.7201365187713311, "grad_norm": 3.516834020614624, "learning_rate": 0.0007599544937428896, "loss": 6.2131, "step": 2110 }, { "epoch": 0.7204778156996587, "grad_norm": 3.4340057373046875, "learning_rate": 0.0007598407281001137, "loss": 6.4354, "step": 2111 }, { "epoch": 0.7208191126279864, "grad_norm": 3.5667688846588135, "learning_rate": 0.0007597269624573379, "loss": 6.841, "step": 2112 }, { "epoch": 0.721160409556314, "grad_norm": 3.2933144569396973, "learning_rate": 0.000759613196814562, "loss": 6.542, "step": 2113 }, { "epoch": 0.7215017064846416, "grad_norm": 3.4875848293304443, "learning_rate": 0.0007594994311717861, "loss": 6.9857, "step": 2114 }, { "epoch": 0.7218430034129693, "grad_norm": 4.947911739349365, "learning_rate": 0.0007593856655290103, "loss": 5.5756, "step": 2115 }, { "epoch": 0.7221843003412969, "grad_norm": 3.511719226837158, "learning_rate": 0.0007592718998862344, "loss": 6.6189, "step": 2116 }, { "epoch": 0.7225255972696246, "grad_norm": 3.4902215003967285, "learning_rate": 0.0007591581342434585, "loss": 6.9783, "step": 2117 }, { "epoch": 0.7228668941979522, "grad_norm": 3.5169901847839355, "learning_rate": 0.0007590443686006826, "loss": 6.8164, "step": 2118 }, { "epoch": 0.7232081911262799, "grad_norm": 3.3471617698669434, "learning_rate": 0.0007589306029579067, "loss": 6.8412, "step": 2119 }, { "epoch": 0.7235494880546075, "grad_norm": 3.3342645168304443, "learning_rate": 0.0007588168373151308, "loss": 6.4882, "step": 2120 }, { "epoch": 0.7238907849829351, "grad_norm": 3.29974627494812, "learning_rate": 0.000758703071672355, "loss": 6.5342, "step": 2121 }, { "epoch": 0.7242320819112628, "grad_norm": 5.739980697631836, "learning_rate": 0.0007585893060295791, "loss": 6.6463, "step": 2122 }, { "epoch": 0.7245733788395905, "grad_norm": 4.314258575439453, "learning_rate": 0.0007584755403868032, "loss": 5.9307, "step": 2123 }, { "epoch": 0.7249146757679181, "grad_norm": 5.394867897033691, "learning_rate": 0.0007583617747440273, "loss": 6.2618, "step": 2124 }, { "epoch": 0.7252559726962458, "grad_norm": 3.7781617641448975, "learning_rate": 0.0007582480091012514, "loss": 6.4021, "step": 2125 }, { "epoch": 0.7255972696245734, "grad_norm": 3.883280038833618, "learning_rate": 0.0007581342434584756, "loss": 6.2862, "step": 2126 }, { "epoch": 0.725938566552901, "grad_norm": 3.35640025138855, "learning_rate": 0.0007580204778156997, "loss": 6.9783, "step": 2127 }, { "epoch": 0.7262798634812286, "grad_norm": 3.3483872413635254, "learning_rate": 0.0007579067121729239, "loss": 6.6387, "step": 2128 }, { "epoch": 0.7266211604095563, "grad_norm": 3.295193672180176, "learning_rate": 0.000757792946530148, "loss": 6.7598, "step": 2129 }, { "epoch": 0.726962457337884, "grad_norm": 5.115669250488281, "learning_rate": 0.000757679180887372, "loss": 5.9118, "step": 2130 }, { "epoch": 0.7273037542662116, "grad_norm": 3.72269606590271, "learning_rate": 0.0007575654152445961, "loss": 6.5912, "step": 2131 }, { "epoch": 0.7276450511945393, "grad_norm": 3.6045024394989014, "learning_rate": 0.0007574516496018203, "loss": 6.7721, "step": 2132 }, { "epoch": 0.7279863481228669, "grad_norm": 3.3702828884124756, "learning_rate": 0.0007573378839590444, "loss": 6.1741, "step": 2133 }, { "epoch": 0.7283276450511945, "grad_norm": 3.3547866344451904, "learning_rate": 0.0007572241183162685, "loss": 6.5913, "step": 2134 }, { "epoch": 0.7286689419795221, "grad_norm": 3.366942882537842, "learning_rate": 0.0007571103526734926, "loss": 6.519, "step": 2135 }, { "epoch": 0.7290102389078499, "grad_norm": 3.3425605297088623, "learning_rate": 0.0007569965870307167, "loss": 7.0345, "step": 2136 }, { "epoch": 0.7293515358361775, "grad_norm": 6.479069232940674, "learning_rate": 0.0007568828213879408, "loss": 6.3204, "step": 2137 }, { "epoch": 0.7296928327645051, "grad_norm": 3.5541889667510986, "learning_rate": 0.000756769055745165, "loss": 6.6065, "step": 2138 }, { "epoch": 0.7300341296928328, "grad_norm": 3.4596619606018066, "learning_rate": 0.0007566552901023891, "loss": 6.6794, "step": 2139 }, { "epoch": 0.7303754266211604, "grad_norm": 3.4551479816436768, "learning_rate": 0.0007565415244596132, "loss": 6.7369, "step": 2140 }, { "epoch": 0.730716723549488, "grad_norm": 3.431898832321167, "learning_rate": 0.0007564277588168373, "loss": 6.1183, "step": 2141 }, { "epoch": 0.7310580204778157, "grad_norm": 3.323843002319336, "learning_rate": 0.0007563139931740614, "loss": 6.919, "step": 2142 }, { "epoch": 0.7313993174061434, "grad_norm": 3.4173998832702637, "learning_rate": 0.0007562002275312855, "loss": 6.7127, "step": 2143 }, { "epoch": 0.731740614334471, "grad_norm": 3.3746798038482666, "learning_rate": 0.0007560864618885098, "loss": 6.8423, "step": 2144 }, { "epoch": 0.7320819112627986, "grad_norm": 3.6130669116973877, "learning_rate": 0.0007559726962457339, "loss": 6.3245, "step": 2145 }, { "epoch": 0.7324232081911263, "grad_norm": 3.3271877765655518, "learning_rate": 0.000755858930602958, "loss": 6.885, "step": 2146 }, { "epoch": 0.7327645051194539, "grad_norm": 4.328009605407715, "learning_rate": 0.0007557451649601821, "loss": 6.3472, "step": 2147 }, { "epoch": 0.7331058020477815, "grad_norm": 3.4987595081329346, "learning_rate": 0.0007556313993174062, "loss": 6.632, "step": 2148 }, { "epoch": 0.7334470989761093, "grad_norm": 3.967522144317627, "learning_rate": 0.0007555176336746303, "loss": 6.3009, "step": 2149 }, { "epoch": 0.7337883959044369, "grad_norm": 4.190941333770752, "learning_rate": 0.0007554038680318544, "loss": 5.315, "step": 2150 }, { "epoch": 0.7341296928327645, "grad_norm": 3.2750062942504883, "learning_rate": 0.0007552901023890785, "loss": 6.4132, "step": 2151 }, { "epoch": 0.7344709897610922, "grad_norm": 3.1912786960601807, "learning_rate": 0.0007551763367463026, "loss": 6.4303, "step": 2152 }, { "epoch": 0.7348122866894198, "grad_norm": 4.60476016998291, "learning_rate": 0.0007550625711035267, "loss": 5.7562, "step": 2153 }, { "epoch": 0.7351535836177474, "grad_norm": 3.1308608055114746, "learning_rate": 0.0007549488054607508, "loss": 6.6797, "step": 2154 }, { "epoch": 0.735494880546075, "grad_norm": 3.2094202041625977, "learning_rate": 0.000754835039817975, "loss": 6.6746, "step": 2155 }, { "epoch": 0.7358361774744028, "grad_norm": 3.3604657649993896, "learning_rate": 0.0007547212741751991, "loss": 7.061, "step": 2156 }, { "epoch": 0.7361774744027304, "grad_norm": 3.310800075531006, "learning_rate": 0.0007546075085324232, "loss": 6.4595, "step": 2157 }, { "epoch": 0.736518771331058, "grad_norm": 4.224414348602295, "learning_rate": 0.0007544937428896473, "loss": 6.3834, "step": 2158 }, { "epoch": 0.7368600682593857, "grad_norm": 3.4265730381011963, "learning_rate": 0.0007543799772468714, "loss": 6.8672, "step": 2159 }, { "epoch": 0.7372013651877133, "grad_norm": 3.1829991340637207, "learning_rate": 0.0007542662116040955, "loss": 6.7422, "step": 2160 }, { "epoch": 0.7375426621160409, "grad_norm": 3.3258779048919678, "learning_rate": 0.0007541524459613198, "loss": 6.8123, "step": 2161 }, { "epoch": 0.7378839590443687, "grad_norm": 3.4259634017944336, "learning_rate": 0.0007540386803185439, "loss": 7.0327, "step": 2162 }, { "epoch": 0.7382252559726963, "grad_norm": 6.536485195159912, "learning_rate": 0.000753924914675768, "loss": 6.6945, "step": 2163 }, { "epoch": 0.7385665529010239, "grad_norm": 3.6783645153045654, "learning_rate": 0.0007538111490329921, "loss": 7.1871, "step": 2164 }, { "epoch": 0.7389078498293515, "grad_norm": 3.5718960762023926, "learning_rate": 0.0007536973833902162, "loss": 6.322, "step": 2165 }, { "epoch": 0.7392491467576792, "grad_norm": 3.513333320617676, "learning_rate": 0.0007535836177474404, "loss": 6.5015, "step": 2166 }, { "epoch": 0.7395904436860068, "grad_norm": 3.306096315383911, "learning_rate": 0.0007534698521046645, "loss": 6.2813, "step": 2167 }, { "epoch": 0.7399317406143344, "grad_norm": 4.2289557456970215, "learning_rate": 0.0007533560864618886, "loss": 6.128, "step": 2168 }, { "epoch": 0.7402730375426622, "grad_norm": 3.4186182022094727, "learning_rate": 0.0007532423208191126, "loss": 6.4153, "step": 2169 }, { "epoch": 0.7406143344709898, "grad_norm": 3.4397335052490234, "learning_rate": 0.0007531285551763367, "loss": 6.4691, "step": 2170 }, { "epoch": 0.7409556313993174, "grad_norm": 3.2782340049743652, "learning_rate": 0.0007530147895335608, "loss": 6.8505, "step": 2171 }, { "epoch": 0.741296928327645, "grad_norm": 3.247593879699707, "learning_rate": 0.000752901023890785, "loss": 6.429, "step": 2172 }, { "epoch": 0.7416382252559727, "grad_norm": 3.29437518119812, "learning_rate": 0.0007527872582480091, "loss": 6.7258, "step": 2173 }, { "epoch": 0.7419795221843003, "grad_norm": 3.4087376594543457, "learning_rate": 0.0007526734926052332, "loss": 7.0698, "step": 2174 }, { "epoch": 0.742320819112628, "grad_norm": 3.289987564086914, "learning_rate": 0.0007525597269624573, "loss": 6.3238, "step": 2175 }, { "epoch": 0.7426621160409557, "grad_norm": 3.370927095413208, "learning_rate": 0.0007524459613196814, "loss": 6.6566, "step": 2176 }, { "epoch": 0.7430034129692833, "grad_norm": 3.518704891204834, "learning_rate": 0.0007523321956769055, "loss": 6.1031, "step": 2177 }, { "epoch": 0.7433447098976109, "grad_norm": 3.2359862327575684, "learning_rate": 0.0007522184300341298, "loss": 6.4414, "step": 2178 }, { "epoch": 0.7436860068259386, "grad_norm": 3.503610134124756, "learning_rate": 0.0007521046643913539, "loss": 6.473, "step": 2179 }, { "epoch": 0.7440273037542662, "grad_norm": 3.644157886505127, "learning_rate": 0.000751990898748578, "loss": 6.7016, "step": 2180 }, { "epoch": 0.7443686006825938, "grad_norm": 3.6388585567474365, "learning_rate": 0.0007518771331058021, "loss": 6.0456, "step": 2181 }, { "epoch": 0.7447098976109215, "grad_norm": 3.494711399078369, "learning_rate": 0.0007517633674630262, "loss": 6.3996, "step": 2182 }, { "epoch": 0.7450511945392492, "grad_norm": 3.3580422401428223, "learning_rate": 0.0007516496018202503, "loss": 6.791, "step": 2183 }, { "epoch": 0.7453924914675768, "grad_norm": 3.2778096199035645, "learning_rate": 0.0007515358361774745, "loss": 6.8585, "step": 2184 }, { "epoch": 0.7457337883959044, "grad_norm": 4.171144485473633, "learning_rate": 0.0007514220705346986, "loss": 6.5811, "step": 2185 }, { "epoch": 0.7460750853242321, "grad_norm": 3.2704648971557617, "learning_rate": 0.0007513083048919227, "loss": 6.4436, "step": 2186 }, { "epoch": 0.7464163822525597, "grad_norm": 3.2986881732940674, "learning_rate": 0.0007511945392491468, "loss": 6.3978, "step": 2187 }, { "epoch": 0.7467576791808874, "grad_norm": 3.2707314491271973, "learning_rate": 0.0007510807736063708, "loss": 6.8469, "step": 2188 }, { "epoch": 0.747098976109215, "grad_norm": 3.3278586864471436, "learning_rate": 0.000750967007963595, "loss": 6.7223, "step": 2189 }, { "epoch": 0.7474402730375427, "grad_norm": 3.3085014820098877, "learning_rate": 0.0007508532423208191, "loss": 6.732, "step": 2190 }, { "epoch": 0.7477815699658703, "grad_norm": 3.2527832984924316, "learning_rate": 0.0007507394766780432, "loss": 6.7635, "step": 2191 }, { "epoch": 0.7481228668941979, "grad_norm": 4.035043239593506, "learning_rate": 0.0007506257110352673, "loss": 6.5922, "step": 2192 }, { "epoch": 0.7484641638225256, "grad_norm": 3.383103370666504, "learning_rate": 0.0007505119453924914, "loss": 6.6097, "step": 2193 }, { "epoch": 0.7488054607508532, "grad_norm": 3.518967628479004, "learning_rate": 0.0007503981797497155, "loss": 6.7073, "step": 2194 }, { "epoch": 0.7491467576791809, "grad_norm": 3.142347574234009, "learning_rate": 0.0007502844141069398, "loss": 6.7196, "step": 2195 }, { "epoch": 0.7494880546075086, "grad_norm": 3.3072586059570312, "learning_rate": 0.0007501706484641639, "loss": 7.2352, "step": 2196 }, { "epoch": 0.7498293515358362, "grad_norm": 3.449594020843506, "learning_rate": 0.000750056882821388, "loss": 6.746, "step": 2197 }, { "epoch": 0.7501706484641638, "grad_norm": 3.179112672805786, "learning_rate": 0.0007499431171786121, "loss": 6.3185, "step": 2198 }, { "epoch": 0.7505119453924914, "grad_norm": 3.2998242378234863, "learning_rate": 0.0007498293515358362, "loss": 6.7703, "step": 2199 }, { "epoch": 0.7508532423208191, "grad_norm": 5.338634967803955, "learning_rate": 0.0007497155858930603, "loss": 5.8161, "step": 2200 }, { "epoch": 0.7511945392491468, "grad_norm": 4.291819095611572, "learning_rate": 0.0007496018202502845, "loss": 6.3253, "step": 2201 }, { "epoch": 0.7515358361774744, "grad_norm": 3.579894542694092, "learning_rate": 0.0007494880546075086, "loss": 6.7618, "step": 2202 }, { "epoch": 0.7518771331058021, "grad_norm": 3.397998809814453, "learning_rate": 0.0007493742889647327, "loss": 6.4193, "step": 2203 }, { "epoch": 0.7522184300341297, "grad_norm": 3.181931972503662, "learning_rate": 0.0007492605233219568, "loss": 6.1566, "step": 2204 }, { "epoch": 0.7525597269624573, "grad_norm": 3.316357374191284, "learning_rate": 0.0007491467576791809, "loss": 6.9517, "step": 2205 }, { "epoch": 0.752901023890785, "grad_norm": 3.6711394786834717, "learning_rate": 0.000749032992036405, "loss": 6.699, "step": 2206 }, { "epoch": 0.7532423208191126, "grad_norm": 3.2248027324676514, "learning_rate": 0.0007489192263936292, "loss": 6.7637, "step": 2207 }, { "epoch": 0.7535836177474403, "grad_norm": 6.611885070800781, "learning_rate": 0.0007488054607508532, "loss": 5.6556, "step": 2208 }, { "epoch": 0.7539249146757679, "grad_norm": 3.5705573558807373, "learning_rate": 0.0007486916951080773, "loss": 6.6186, "step": 2209 }, { "epoch": 0.7542662116040956, "grad_norm": 3.3954081535339355, "learning_rate": 0.0007485779294653014, "loss": 6.6294, "step": 2210 }, { "epoch": 0.7546075085324232, "grad_norm": 4.374399185180664, "learning_rate": 0.0007484641638225255, "loss": 5.2589, "step": 2211 }, { "epoch": 0.7549488054607508, "grad_norm": 3.399153232574463, "learning_rate": 0.0007483503981797498, "loss": 7.0186, "step": 2212 }, { "epoch": 0.7552901023890785, "grad_norm": 3.427119016647339, "learning_rate": 0.0007482366325369739, "loss": 6.4384, "step": 2213 }, { "epoch": 0.7556313993174062, "grad_norm": 3.3054943084716797, "learning_rate": 0.000748122866894198, "loss": 6.8928, "step": 2214 }, { "epoch": 0.7559726962457338, "grad_norm": 3.1258208751678467, "learning_rate": 0.0007480091012514221, "loss": 7.0228, "step": 2215 }, { "epoch": 0.7563139931740614, "grad_norm": 3.197582721710205, "learning_rate": 0.0007478953356086462, "loss": 6.4303, "step": 2216 }, { "epoch": 0.7566552901023891, "grad_norm": 3.482532262802124, "learning_rate": 0.0007477815699658703, "loss": 6.5166, "step": 2217 }, { "epoch": 0.7569965870307167, "grad_norm": 3.3333423137664795, "learning_rate": 0.0007476678043230945, "loss": 6.8339, "step": 2218 }, { "epoch": 0.7573378839590443, "grad_norm": 3.8367319107055664, "learning_rate": 0.0007475540386803186, "loss": 4.8938, "step": 2219 }, { "epoch": 0.757679180887372, "grad_norm": 3.483358383178711, "learning_rate": 0.0007474402730375427, "loss": 6.3541, "step": 2220 }, { "epoch": 0.7580204778156997, "grad_norm": 3.646254539489746, "learning_rate": 0.0007473265073947668, "loss": 6.2515, "step": 2221 }, { "epoch": 0.7583617747440273, "grad_norm": 3.3919055461883545, "learning_rate": 0.0007472127417519909, "loss": 6.4064, "step": 2222 }, { "epoch": 0.758703071672355, "grad_norm": 3.8102240562438965, "learning_rate": 0.000747098976109215, "loss": 6.0644, "step": 2223 }, { "epoch": 0.7590443686006826, "grad_norm": 4.608096599578857, "learning_rate": 0.0007469852104664392, "loss": 6.7343, "step": 2224 }, { "epoch": 0.7593856655290102, "grad_norm": 4.500402450561523, "learning_rate": 0.0007468714448236633, "loss": 6.7885, "step": 2225 }, { "epoch": 0.7597269624573378, "grad_norm": 3.641897439956665, "learning_rate": 0.0007467576791808874, "loss": 7.0954, "step": 2226 }, { "epoch": 0.7600682593856656, "grad_norm": 3.584850788116455, "learning_rate": 0.0007466439135381114, "loss": 6.7046, "step": 2227 }, { "epoch": 0.7604095563139932, "grad_norm": 3.864504337310791, "learning_rate": 0.0007465301478953355, "loss": 6.564, "step": 2228 }, { "epoch": 0.7607508532423208, "grad_norm": 3.3200836181640625, "learning_rate": 0.0007464163822525596, "loss": 6.8344, "step": 2229 }, { "epoch": 0.7610921501706485, "grad_norm": 3.698155641555786, "learning_rate": 0.0007463026166097839, "loss": 6.0784, "step": 2230 }, { "epoch": 0.7614334470989761, "grad_norm": 3.2318496704101562, "learning_rate": 0.000746188850967008, "loss": 6.6715, "step": 2231 }, { "epoch": 0.7617747440273037, "grad_norm": 3.3073530197143555, "learning_rate": 0.0007460750853242321, "loss": 6.779, "step": 2232 }, { "epoch": 0.7621160409556313, "grad_norm": 3.1656546592712402, "learning_rate": 0.0007459613196814562, "loss": 6.1155, "step": 2233 }, { "epoch": 0.7624573378839591, "grad_norm": 3.251887798309326, "learning_rate": 0.0007458475540386803, "loss": 6.747, "step": 2234 }, { "epoch": 0.7627986348122867, "grad_norm": 3.452404022216797, "learning_rate": 0.0007457337883959045, "loss": 6.3328, "step": 2235 }, { "epoch": 0.7631399317406143, "grad_norm": 10.201861381530762, "learning_rate": 0.0007456200227531286, "loss": 5.8861, "step": 2236 }, { "epoch": 0.763481228668942, "grad_norm": 6.577554225921631, "learning_rate": 0.0007455062571103527, "loss": 6.1329, "step": 2237 }, { "epoch": 0.7638225255972696, "grad_norm": 4.222815036773682, "learning_rate": 0.0007453924914675768, "loss": 6.9536, "step": 2238 }, { "epoch": 0.7641638225255972, "grad_norm": 4.045281887054443, "learning_rate": 0.0007452787258248009, "loss": 6.7163, "step": 2239 }, { "epoch": 0.764505119453925, "grad_norm": 3.9914534091949463, "learning_rate": 0.000745164960182025, "loss": 6.5436, "step": 2240 }, { "epoch": 0.7648464163822526, "grad_norm": 3.4769484996795654, "learning_rate": 0.0007450511945392492, "loss": 6.695, "step": 2241 }, { "epoch": 0.7651877133105802, "grad_norm": 3.3896267414093018, "learning_rate": 0.0007449374288964733, "loss": 6.5388, "step": 2242 }, { "epoch": 0.7655290102389078, "grad_norm": 3.0643441677093506, "learning_rate": 0.0007448236632536974, "loss": 6.0324, "step": 2243 }, { "epoch": 0.7658703071672355, "grad_norm": 3.558314800262451, "learning_rate": 0.0007447098976109215, "loss": 6.1635, "step": 2244 }, { "epoch": 0.7662116040955631, "grad_norm": 3.5593554973602295, "learning_rate": 0.0007445961319681457, "loss": 7.1386, "step": 2245 }, { "epoch": 0.7665529010238907, "grad_norm": 7.941109657287598, "learning_rate": 0.0007444823663253698, "loss": 5.8192, "step": 2246 }, { "epoch": 0.7668941979522185, "grad_norm": 3.61783504486084, "learning_rate": 0.0007443686006825939, "loss": 6.484, "step": 2247 }, { "epoch": 0.7672354948805461, "grad_norm": 3.6058130264282227, "learning_rate": 0.000744254835039818, "loss": 6.908, "step": 2248 }, { "epoch": 0.7675767918088737, "grad_norm": 3.377002477645874, "learning_rate": 0.0007441410693970421, "loss": 6.7491, "step": 2249 }, { "epoch": 0.7679180887372014, "grad_norm": 3.3072400093078613, "learning_rate": 0.0007440273037542662, "loss": 6.5535, "step": 2250 }, { "epoch": 0.768259385665529, "grad_norm": 5.5078020095825195, "learning_rate": 0.0007439135381114903, "loss": 6.8116, "step": 2251 }, { "epoch": 0.7686006825938566, "grad_norm": 3.3346757888793945, "learning_rate": 0.0007437997724687145, "loss": 6.6911, "step": 2252 }, { "epoch": 0.7689419795221843, "grad_norm": 3.3978588581085205, "learning_rate": 0.0007436860068259386, "loss": 6.6, "step": 2253 }, { "epoch": 0.769283276450512, "grad_norm": 3.0739238262176514, "learning_rate": 0.0007435722411831627, "loss": 6.3782, "step": 2254 }, { "epoch": 0.7696245733788396, "grad_norm": 3.2551000118255615, "learning_rate": 0.0007434584755403868, "loss": 6.6849, "step": 2255 }, { "epoch": 0.7699658703071672, "grad_norm": 3.300963878631592, "learning_rate": 0.0007433447098976109, "loss": 6.8587, "step": 2256 }, { "epoch": 0.7703071672354949, "grad_norm": 3.314493179321289, "learning_rate": 0.000743230944254835, "loss": 6.3657, "step": 2257 }, { "epoch": 0.7706484641638225, "grad_norm": 3.863924264907837, "learning_rate": 0.0007431171786120592, "loss": 6.2232, "step": 2258 }, { "epoch": 0.7709897610921501, "grad_norm": 3.727370023727417, "learning_rate": 0.0007430034129692833, "loss": 6.1062, "step": 2259 }, { "epoch": 0.7713310580204779, "grad_norm": 3.3532567024230957, "learning_rate": 0.0007428896473265074, "loss": 6.0705, "step": 2260 }, { "epoch": 0.7716723549488055, "grad_norm": 3.544203281402588, "learning_rate": 0.0007427758816837315, "loss": 6.8618, "step": 2261 }, { "epoch": 0.7720136518771331, "grad_norm": 3.5925631523132324, "learning_rate": 0.0007426621160409557, "loss": 6.7984, "step": 2262 }, { "epoch": 0.7723549488054607, "grad_norm": 11.028677940368652, "learning_rate": 0.0007425483503981798, "loss": 7.319, "step": 2263 }, { "epoch": 0.7726962457337884, "grad_norm": 4.054958343505859, "learning_rate": 0.000742434584755404, "loss": 3.7261, "step": 2264 }, { "epoch": 0.773037542662116, "grad_norm": 3.978557825088501, "learning_rate": 0.0007423208191126281, "loss": 6.3767, "step": 2265 }, { "epoch": 0.7733788395904437, "grad_norm": 3.5264103412628174, "learning_rate": 0.0007422070534698521, "loss": 6.3895, "step": 2266 }, { "epoch": 0.7737201365187714, "grad_norm": 5.270613193511963, "learning_rate": 0.0007420932878270762, "loss": 7.091, "step": 2267 }, { "epoch": 0.774061433447099, "grad_norm": 9.161282539367676, "learning_rate": 0.0007419795221843003, "loss": 6.3023, "step": 2268 }, { "epoch": 0.7744027303754266, "grad_norm": 3.5067977905273438, "learning_rate": 0.0007418657565415244, "loss": 7.0963, "step": 2269 }, { "epoch": 0.7747440273037542, "grad_norm": 3.9335129261016846, "learning_rate": 0.0007417519908987486, "loss": 6.6442, "step": 2270 }, { "epoch": 0.7750853242320819, "grad_norm": 3.2506704330444336, "learning_rate": 0.0007416382252559727, "loss": 6.592, "step": 2271 }, { "epoch": 0.7754266211604095, "grad_norm": 3.383512258529663, "learning_rate": 0.0007415244596131968, "loss": 6.3122, "step": 2272 }, { "epoch": 0.7757679180887372, "grad_norm": 4.869744300842285, "learning_rate": 0.0007414106939704209, "loss": 6.1108, "step": 2273 }, { "epoch": 0.7761092150170649, "grad_norm": 9.724971771240234, "learning_rate": 0.000741296928327645, "loss": 6.1679, "step": 2274 }, { "epoch": 0.7764505119453925, "grad_norm": 3.5860729217529297, "learning_rate": 0.0007411831626848692, "loss": 6.8647, "step": 2275 }, { "epoch": 0.7767918088737201, "grad_norm": 3.8942580223083496, "learning_rate": 0.0007410693970420933, "loss": 6.8968, "step": 2276 }, { "epoch": 0.7771331058020478, "grad_norm": 3.5359604358673096, "learning_rate": 0.0007409556313993174, "loss": 6.515, "step": 2277 }, { "epoch": 0.7774744027303754, "grad_norm": 5.767436981201172, "learning_rate": 0.0007408418657565415, "loss": 6.4321, "step": 2278 }, { "epoch": 0.7778156996587031, "grad_norm": 3.3587779998779297, "learning_rate": 0.0007407281001137657, "loss": 6.6002, "step": 2279 }, { "epoch": 0.7781569965870307, "grad_norm": 3.4271020889282227, "learning_rate": 0.0007406143344709898, "loss": 6.5293, "step": 2280 }, { "epoch": 0.7784982935153584, "grad_norm": 3.1101016998291016, "learning_rate": 0.000740500568828214, "loss": 6.7376, "step": 2281 }, { "epoch": 0.778839590443686, "grad_norm": 9.123248100280762, "learning_rate": 0.0007403868031854381, "loss": 6.5933, "step": 2282 }, { "epoch": 0.7791808873720136, "grad_norm": 3.5987279415130615, "learning_rate": 0.0007402730375426622, "loss": 5.8846, "step": 2283 }, { "epoch": 0.7795221843003413, "grad_norm": 3.3567636013031006, "learning_rate": 0.0007401592718998863, "loss": 6.7901, "step": 2284 }, { "epoch": 0.7798634812286689, "grad_norm": 3.2160773277282715, "learning_rate": 0.0007400455062571104, "loss": 6.4096, "step": 2285 }, { "epoch": 0.7802047781569966, "grad_norm": 3.893007278442383, "learning_rate": 0.0007399317406143344, "loss": 6.9421, "step": 2286 }, { "epoch": 0.7805460750853243, "grad_norm": 4.227537631988525, "learning_rate": 0.0007398179749715586, "loss": 6.5151, "step": 2287 }, { "epoch": 0.7808873720136519, "grad_norm": 3.350599527359009, "learning_rate": 0.0007397042093287827, "loss": 6.3571, "step": 2288 }, { "epoch": 0.7812286689419795, "grad_norm": 3.571381092071533, "learning_rate": 0.0007395904436860068, "loss": 6.446, "step": 2289 }, { "epoch": 0.7815699658703071, "grad_norm": 3.227069139480591, "learning_rate": 0.0007394766780432309, "loss": 6.7134, "step": 2290 }, { "epoch": 0.7819112627986348, "grad_norm": 5.10248327255249, "learning_rate": 0.000739362912400455, "loss": 6.0105, "step": 2291 }, { "epoch": 0.7822525597269625, "grad_norm": 12.228768348693848, "learning_rate": 0.0007392491467576792, "loss": 6.8161, "step": 2292 }, { "epoch": 0.7825938566552901, "grad_norm": 3.5377376079559326, "learning_rate": 0.0007391353811149033, "loss": 6.5361, "step": 2293 }, { "epoch": 0.7829351535836178, "grad_norm": 3.385505437850952, "learning_rate": 0.0007390216154721274, "loss": 6.5231, "step": 2294 }, { "epoch": 0.7832764505119454, "grad_norm": 3.796246290206909, "learning_rate": 0.0007389078498293515, "loss": 6.8123, "step": 2295 }, { "epoch": 0.783617747440273, "grad_norm": 3.4816348552703857, "learning_rate": 0.0007387940841865757, "loss": 6.6924, "step": 2296 }, { "epoch": 0.7839590443686006, "grad_norm": 3.1583478450775146, "learning_rate": 0.0007386803185437998, "loss": 6.6573, "step": 2297 }, { "epoch": 0.7843003412969284, "grad_norm": 3.1136934757232666, "learning_rate": 0.000738566552901024, "loss": 6.679, "step": 2298 }, { "epoch": 0.784641638225256, "grad_norm": 3.1441729068756104, "learning_rate": 0.0007384527872582481, "loss": 6.7502, "step": 2299 }, { "epoch": 0.7849829351535836, "grad_norm": 3.35772442817688, "learning_rate": 0.0007383390216154722, "loss": 6.6187, "step": 2300 }, { "epoch": 0.7853242320819113, "grad_norm": 3.171649694442749, "learning_rate": 0.0007382252559726963, "loss": 6.6412, "step": 2301 }, { "epoch": 0.7856655290102389, "grad_norm": 3.1992034912109375, "learning_rate": 0.0007381114903299204, "loss": 6.5126, "step": 2302 }, { "epoch": 0.7860068259385665, "grad_norm": 3.29266619682312, "learning_rate": 0.0007379977246871445, "loss": 6.8787, "step": 2303 }, { "epoch": 0.7863481228668942, "grad_norm": 3.2478480339050293, "learning_rate": 0.0007378839590443687, "loss": 6.6584, "step": 2304 }, { "epoch": 0.7866894197952219, "grad_norm": 4.051445007324219, "learning_rate": 0.0007377701934015927, "loss": 5.3193, "step": 2305 }, { "epoch": 0.7870307167235495, "grad_norm": 3.5747928619384766, "learning_rate": 0.0007376564277588168, "loss": 6.6164, "step": 2306 }, { "epoch": 0.7873720136518771, "grad_norm": 3.6790921688079834, "learning_rate": 0.0007375426621160409, "loss": 6.2069, "step": 2307 }, { "epoch": 0.7877133105802048, "grad_norm": 3.410109519958496, "learning_rate": 0.000737428896473265, "loss": 6.6221, "step": 2308 }, { "epoch": 0.7880546075085324, "grad_norm": 3.273460626602173, "learning_rate": 0.0007373151308304891, "loss": 6.8308, "step": 2309 }, { "epoch": 0.78839590443686, "grad_norm": 3.7715392112731934, "learning_rate": 0.0007372013651877133, "loss": 6.3757, "step": 2310 }, { "epoch": 0.7887372013651878, "grad_norm": 3.6163434982299805, "learning_rate": 0.0007370875995449374, "loss": 6.4616, "step": 2311 }, { "epoch": 0.7890784982935154, "grad_norm": 3.2773282527923584, "learning_rate": 0.0007369738339021615, "loss": 6.6696, "step": 2312 }, { "epoch": 0.789419795221843, "grad_norm": 3.1374948024749756, "learning_rate": 0.0007368600682593857, "loss": 6.7808, "step": 2313 }, { "epoch": 0.7897610921501707, "grad_norm": 3.342297315597534, "learning_rate": 0.0007367463026166098, "loss": 6.3873, "step": 2314 }, { "epoch": 0.7901023890784983, "grad_norm": 3.1699624061584473, "learning_rate": 0.000736632536973834, "loss": 6.458, "step": 2315 }, { "epoch": 0.7904436860068259, "grad_norm": 3.4422099590301514, "learning_rate": 0.0007365187713310581, "loss": 6.8495, "step": 2316 }, { "epoch": 0.7907849829351535, "grad_norm": 3.30281400680542, "learning_rate": 0.0007364050056882822, "loss": 6.2791, "step": 2317 }, { "epoch": 0.7911262798634813, "grad_norm": 3.281485080718994, "learning_rate": 0.0007362912400455063, "loss": 6.733, "step": 2318 }, { "epoch": 0.7914675767918089, "grad_norm": 3.2476160526275635, "learning_rate": 0.0007361774744027304, "loss": 7.0849, "step": 2319 }, { "epoch": 0.7918088737201365, "grad_norm": 5.255532264709473, "learning_rate": 0.0007360637087599545, "loss": 5.6159, "step": 2320 }, { "epoch": 0.7921501706484642, "grad_norm": 3.1768462657928467, "learning_rate": 0.0007359499431171787, "loss": 6.4978, "step": 2321 }, { "epoch": 0.7924914675767918, "grad_norm": 3.386536121368408, "learning_rate": 0.0007358361774744028, "loss": 6.5782, "step": 2322 }, { "epoch": 0.7928327645051194, "grad_norm": 3.406545877456665, "learning_rate": 0.0007357224118316269, "loss": 6.5096, "step": 2323 }, { "epoch": 0.7931740614334472, "grad_norm": 3.1887378692626953, "learning_rate": 0.0007356086461888509, "loss": 6.5397, "step": 2324 }, { "epoch": 0.7935153583617748, "grad_norm": 3.7656588554382324, "learning_rate": 0.000735494880546075, "loss": 6.4162, "step": 2325 }, { "epoch": 0.7938566552901024, "grad_norm": 4.206430912017822, "learning_rate": 0.0007353811149032991, "loss": 6.1527, "step": 2326 }, { "epoch": 0.79419795221843, "grad_norm": 3.5633111000061035, "learning_rate": 0.0007352673492605233, "loss": 6.1671, "step": 2327 }, { "epoch": 0.7945392491467577, "grad_norm": 3.431990623474121, "learning_rate": 0.0007351535836177474, "loss": 6.4424, "step": 2328 }, { "epoch": 0.7948805460750853, "grad_norm": 3.3288986682891846, "learning_rate": 0.0007350398179749716, "loss": 6.4327, "step": 2329 }, { "epoch": 0.7952218430034129, "grad_norm": 5.098928928375244, "learning_rate": 0.0007349260523321957, "loss": 6.9967, "step": 2330 }, { "epoch": 0.7955631399317407, "grad_norm": 10.33311939239502, "learning_rate": 0.0007348122866894198, "loss": 7.4892, "step": 2331 }, { "epoch": 0.7959044368600683, "grad_norm": 3.6178691387176514, "learning_rate": 0.000734698521046644, "loss": 6.2764, "step": 2332 }, { "epoch": 0.7962457337883959, "grad_norm": 3.6678335666656494, "learning_rate": 0.0007345847554038681, "loss": 6.4699, "step": 2333 }, { "epoch": 0.7965870307167235, "grad_norm": 3.2506070137023926, "learning_rate": 0.0007344709897610922, "loss": 6.8747, "step": 2334 }, { "epoch": 0.7969283276450512, "grad_norm": 12.323904037475586, "learning_rate": 0.0007343572241183163, "loss": 7.0181, "step": 2335 }, { "epoch": 0.7972696245733788, "grad_norm": 3.266211986541748, "learning_rate": 0.0007342434584755404, "loss": 6.9827, "step": 2336 }, { "epoch": 0.7976109215017065, "grad_norm": 3.527411699295044, "learning_rate": 0.0007341296928327645, "loss": 7.1384, "step": 2337 }, { "epoch": 0.7979522184300342, "grad_norm": 3.395819902420044, "learning_rate": 0.0007340159271899887, "loss": 6.9804, "step": 2338 }, { "epoch": 0.7982935153583618, "grad_norm": 3.189687728881836, "learning_rate": 0.0007339021615472128, "loss": 6.748, "step": 2339 }, { "epoch": 0.7986348122866894, "grad_norm": 3.2057018280029297, "learning_rate": 0.0007337883959044369, "loss": 7.0284, "step": 2340 }, { "epoch": 0.798976109215017, "grad_norm": 3.4170916080474854, "learning_rate": 0.000733674630261661, "loss": 6.6718, "step": 2341 }, { "epoch": 0.7993174061433447, "grad_norm": 5.019681930541992, "learning_rate": 0.0007335608646188851, "loss": 4.0973, "step": 2342 }, { "epoch": 0.7996587030716723, "grad_norm": 3.8981494903564453, "learning_rate": 0.0007334470989761092, "loss": 6.8531, "step": 2343 }, { "epoch": 0.8, "grad_norm": 3.444851875305176, "learning_rate": 0.0007333333333333333, "loss": 6.7379, "step": 2344 }, { "epoch": 0.8003412969283277, "grad_norm": 3.404766082763672, "learning_rate": 0.0007332195676905574, "loss": 6.8825, "step": 2345 }, { "epoch": 0.8006825938566553, "grad_norm": 3.660458564758301, "learning_rate": 0.0007331058020477816, "loss": 6.5001, "step": 2346 }, { "epoch": 0.8010238907849829, "grad_norm": 3.563992738723755, "learning_rate": 0.0007329920364050057, "loss": 7.2701, "step": 2347 }, { "epoch": 0.8013651877133106, "grad_norm": 3.2425544261932373, "learning_rate": 0.0007328782707622298, "loss": 6.3414, "step": 2348 }, { "epoch": 0.8017064846416382, "grad_norm": 3.2002296447753906, "learning_rate": 0.0007327645051194539, "loss": 6.5811, "step": 2349 }, { "epoch": 0.8020477815699659, "grad_norm": 6.365078926086426, "learning_rate": 0.0007326507394766781, "loss": 5.5698, "step": 2350 }, { "epoch": 0.8023890784982936, "grad_norm": 3.3614845275878906, "learning_rate": 0.0007325369738339022, "loss": 6.7194, "step": 2351 }, { "epoch": 0.8027303754266212, "grad_norm": 3.386559247970581, "learning_rate": 0.0007324232081911263, "loss": 6.7674, "step": 2352 }, { "epoch": 0.8030716723549488, "grad_norm": 3.318051338195801, "learning_rate": 0.0007323094425483504, "loss": 6.8814, "step": 2353 }, { "epoch": 0.8034129692832764, "grad_norm": 3.2678332328796387, "learning_rate": 0.0007321956769055745, "loss": 6.7354, "step": 2354 }, { "epoch": 0.8037542662116041, "grad_norm": 3.1607394218444824, "learning_rate": 0.0007320819112627987, "loss": 6.4444, "step": 2355 }, { "epoch": 0.8040955631399317, "grad_norm": 3.887561798095703, "learning_rate": 0.0007319681456200228, "loss": 6.402, "step": 2356 }, { "epoch": 0.8044368600682594, "grad_norm": 3.33502459526062, "learning_rate": 0.0007318543799772469, "loss": 6.8184, "step": 2357 }, { "epoch": 0.8047781569965871, "grad_norm": 3.411867380142212, "learning_rate": 0.000731740614334471, "loss": 6.712, "step": 2358 }, { "epoch": 0.8051194539249147, "grad_norm": 3.4380221366882324, "learning_rate": 0.0007316268486916951, "loss": 6.7389, "step": 2359 }, { "epoch": 0.8054607508532423, "grad_norm": 4.988135814666748, "learning_rate": 0.0007315130830489192, "loss": 5.3108, "step": 2360 }, { "epoch": 0.8058020477815699, "grad_norm": 3.4688353538513184, "learning_rate": 0.0007313993174061435, "loss": 6.6813, "step": 2361 }, { "epoch": 0.8061433447098976, "grad_norm": 3.7119762897491455, "learning_rate": 0.0007312855517633676, "loss": 6.419, "step": 2362 }, { "epoch": 0.8064846416382253, "grad_norm": 4.025672912597656, "learning_rate": 0.0007311717861205916, "loss": 6.0607, "step": 2363 }, { "epoch": 0.8068259385665529, "grad_norm": 3.4323062896728516, "learning_rate": 0.0007310580204778157, "loss": 6.8558, "step": 2364 }, { "epoch": 0.8071672354948806, "grad_norm": 3.3362526893615723, "learning_rate": 0.0007309442548350398, "loss": 6.2351, "step": 2365 }, { "epoch": 0.8075085324232082, "grad_norm": 3.44800066947937, "learning_rate": 0.0007308304891922639, "loss": 6.2169, "step": 2366 }, { "epoch": 0.8078498293515358, "grad_norm": 3.3122382164001465, "learning_rate": 0.0007307167235494881, "loss": 7.1405, "step": 2367 }, { "epoch": 0.8081911262798634, "grad_norm": 3.3105790615081787, "learning_rate": 0.0007306029579067122, "loss": 6.7058, "step": 2368 }, { "epoch": 0.8085324232081911, "grad_norm": 3.940232038497925, "learning_rate": 0.0007304891922639363, "loss": 6.7658, "step": 2369 }, { "epoch": 0.8088737201365188, "grad_norm": 3.7123522758483887, "learning_rate": 0.0007303754266211604, "loss": 6.2908, "step": 2370 }, { "epoch": 0.8092150170648464, "grad_norm": 3.907029151916504, "learning_rate": 0.0007302616609783845, "loss": 6.2083, "step": 2371 }, { "epoch": 0.8095563139931741, "grad_norm": 3.455446481704712, "learning_rate": 0.0007301478953356086, "loss": 6.6809, "step": 2372 }, { "epoch": 0.8098976109215017, "grad_norm": 3.3250789642333984, "learning_rate": 0.0007300341296928328, "loss": 6.6732, "step": 2373 }, { "epoch": 0.8102389078498293, "grad_norm": 3.413405656814575, "learning_rate": 0.0007299203640500569, "loss": 6.0811, "step": 2374 }, { "epoch": 0.810580204778157, "grad_norm": 3.436432361602783, "learning_rate": 0.000729806598407281, "loss": 6.8951, "step": 2375 }, { "epoch": 0.8109215017064847, "grad_norm": 3.539832592010498, "learning_rate": 0.0007296928327645051, "loss": 6.9036, "step": 2376 }, { "epoch": 0.8112627986348123, "grad_norm": 3.5589230060577393, "learning_rate": 0.0007295790671217292, "loss": 6.6657, "step": 2377 }, { "epoch": 0.81160409556314, "grad_norm": 5.298614025115967, "learning_rate": 0.0007294653014789535, "loss": 5.9221, "step": 2378 }, { "epoch": 0.8119453924914676, "grad_norm": 3.4280951023101807, "learning_rate": 0.0007293515358361776, "loss": 6.775, "step": 2379 }, { "epoch": 0.8122866894197952, "grad_norm": 3.2987239360809326, "learning_rate": 0.0007292377701934017, "loss": 6.5316, "step": 2380 }, { "epoch": 0.8126279863481228, "grad_norm": 4.67313289642334, "learning_rate": 0.0007291240045506258, "loss": 6.9242, "step": 2381 }, { "epoch": 0.8129692832764505, "grad_norm": 3.3883895874023438, "learning_rate": 0.0007290102389078499, "loss": 6.9792, "step": 2382 }, { "epoch": 0.8133105802047782, "grad_norm": 3.6097097396850586, "learning_rate": 0.0007288964732650739, "loss": 6.0067, "step": 2383 }, { "epoch": 0.8136518771331058, "grad_norm": 3.708559989929199, "learning_rate": 0.0007287827076222981, "loss": 6.6121, "step": 2384 }, { "epoch": 0.8139931740614335, "grad_norm": 3.4102509021759033, "learning_rate": 0.0007286689419795222, "loss": 6.7611, "step": 2385 }, { "epoch": 0.8143344709897611, "grad_norm": 3.181281566619873, "learning_rate": 0.0007285551763367463, "loss": 6.5479, "step": 2386 }, { "epoch": 0.8146757679180887, "grad_norm": 3.2941017150878906, "learning_rate": 0.0007284414106939704, "loss": 6.9345, "step": 2387 }, { "epoch": 0.8150170648464163, "grad_norm": 3.4857585430145264, "learning_rate": 0.0007283276450511945, "loss": 6.2137, "step": 2388 }, { "epoch": 0.8153583617747441, "grad_norm": 3.3792757987976074, "learning_rate": 0.0007282138794084186, "loss": 6.9875, "step": 2389 }, { "epoch": 0.8156996587030717, "grad_norm": 3.3115673065185547, "learning_rate": 0.0007281001137656428, "loss": 6.9524, "step": 2390 }, { "epoch": 0.8160409556313993, "grad_norm": 3.5123884677886963, "learning_rate": 0.0007279863481228669, "loss": 6.3792, "step": 2391 }, { "epoch": 0.816382252559727, "grad_norm": 3.4690353870391846, "learning_rate": 0.000727872582480091, "loss": 7.0668, "step": 2392 }, { "epoch": 0.8167235494880546, "grad_norm": 3.470229387283325, "learning_rate": 0.0007277588168373151, "loss": 6.5134, "step": 2393 }, { "epoch": 0.8170648464163822, "grad_norm": 3.3032960891723633, "learning_rate": 0.0007276450511945392, "loss": 7.0573, "step": 2394 }, { "epoch": 0.8174061433447098, "grad_norm": 6.7078094482421875, "learning_rate": 0.0007275312855517635, "loss": 6.4783, "step": 2395 }, { "epoch": 0.8177474402730376, "grad_norm": 3.469536304473877, "learning_rate": 0.0007274175199089876, "loss": 6.6776, "step": 2396 }, { "epoch": 0.8180887372013652, "grad_norm": 3.5298237800598145, "learning_rate": 0.0007273037542662117, "loss": 6.3748, "step": 2397 }, { "epoch": 0.8184300341296928, "grad_norm": 3.280069589614868, "learning_rate": 0.0007271899886234358, "loss": 6.5264, "step": 2398 }, { "epoch": 0.8187713310580205, "grad_norm": 3.1752591133117676, "learning_rate": 0.0007270762229806599, "loss": 6.656, "step": 2399 }, { "epoch": 0.8191126279863481, "grad_norm": 3.6475753784179688, "learning_rate": 0.000726962457337884, "loss": 6.703, "step": 2400 }, { "epoch": 0.8194539249146757, "grad_norm": 3.4017770290374756, "learning_rate": 0.0007268486916951082, "loss": 6.6566, "step": 2401 }, { "epoch": 0.8197952218430035, "grad_norm": 3.364267349243164, "learning_rate": 0.0007267349260523322, "loss": 6.5412, "step": 2402 }, { "epoch": 0.8201365187713311, "grad_norm": 3.2577364444732666, "learning_rate": 0.0007266211604095563, "loss": 6.4592, "step": 2403 }, { "epoch": 0.8204778156996587, "grad_norm": 3.6079461574554443, "learning_rate": 0.0007265073947667804, "loss": 6.4137, "step": 2404 }, { "epoch": 0.8208191126279863, "grad_norm": 3.259861946105957, "learning_rate": 0.0007263936291240045, "loss": 6.5796, "step": 2405 }, { "epoch": 0.821160409556314, "grad_norm": 3.173610210418701, "learning_rate": 0.0007262798634812286, "loss": 6.9958, "step": 2406 }, { "epoch": 0.8215017064846416, "grad_norm": 3.753018856048584, "learning_rate": 0.0007261660978384528, "loss": 6.7856, "step": 2407 }, { "epoch": 0.8218430034129692, "grad_norm": 4.427280426025391, "learning_rate": 0.0007260523321956769, "loss": 6.5343, "step": 2408 }, { "epoch": 0.822184300341297, "grad_norm": 3.921279191970825, "learning_rate": 0.000725938566552901, "loss": 6.1591, "step": 2409 }, { "epoch": 0.8225255972696246, "grad_norm": 3.6298179626464844, "learning_rate": 0.0007258248009101251, "loss": 6.6855, "step": 2410 }, { "epoch": 0.8228668941979522, "grad_norm": 4.846485137939453, "learning_rate": 0.0007257110352673492, "loss": 5.8797, "step": 2411 }, { "epoch": 0.8232081911262799, "grad_norm": 3.467717409133911, "learning_rate": 0.0007255972696245733, "loss": 6.3911, "step": 2412 }, { "epoch": 0.8235494880546075, "grad_norm": 3.697725534439087, "learning_rate": 0.0007254835039817976, "loss": 6.4286, "step": 2413 }, { "epoch": 0.8238907849829351, "grad_norm": 3.694342851638794, "learning_rate": 0.0007253697383390217, "loss": 6.1186, "step": 2414 }, { "epoch": 0.8242320819112628, "grad_norm": 3.3936257362365723, "learning_rate": 0.0007252559726962458, "loss": 6.7766, "step": 2415 }, { "epoch": 0.8245733788395905, "grad_norm": 3.418151617050171, "learning_rate": 0.0007251422070534699, "loss": 6.3386, "step": 2416 }, { "epoch": 0.8249146757679181, "grad_norm": 3.6463325023651123, "learning_rate": 0.000725028441410694, "loss": 6.1039, "step": 2417 }, { "epoch": 0.8252559726962457, "grad_norm": 3.3325319290161133, "learning_rate": 0.0007249146757679182, "loss": 6.4488, "step": 2418 }, { "epoch": 0.8255972696245734, "grad_norm": 3.4702980518341064, "learning_rate": 0.0007248009101251423, "loss": 5.9765, "step": 2419 }, { "epoch": 0.825938566552901, "grad_norm": 3.6678733825683594, "learning_rate": 0.0007246871444823664, "loss": 6.4048, "step": 2420 }, { "epoch": 0.8262798634812286, "grad_norm": 3.4993784427642822, "learning_rate": 0.0007245733788395905, "loss": 7.3037, "step": 2421 }, { "epoch": 0.8266211604095564, "grad_norm": 3.750591516494751, "learning_rate": 0.0007244596131968145, "loss": 6.4685, "step": 2422 }, { "epoch": 0.826962457337884, "grad_norm": 3.406001567840576, "learning_rate": 0.0007243458475540386, "loss": 6.5744, "step": 2423 }, { "epoch": 0.8273037542662116, "grad_norm": 3.754840135574341, "learning_rate": 0.0007242320819112628, "loss": 6.0161, "step": 2424 }, { "epoch": 0.8276450511945392, "grad_norm": 4.734508514404297, "learning_rate": 0.0007241183162684869, "loss": 6.0864, "step": 2425 }, { "epoch": 0.8279863481228669, "grad_norm": 3.3035054206848145, "learning_rate": 0.000724004550625711, "loss": 6.7669, "step": 2426 }, { "epoch": 0.8283276450511945, "grad_norm": 3.275688648223877, "learning_rate": 0.0007238907849829351, "loss": 6.5271, "step": 2427 }, { "epoch": 0.8286689419795222, "grad_norm": 3.2510900497436523, "learning_rate": 0.0007237770193401592, "loss": 6.3089, "step": 2428 }, { "epoch": 0.8290102389078499, "grad_norm": 4.378868579864502, "learning_rate": 0.0007236632536973833, "loss": 5.3725, "step": 2429 }, { "epoch": 0.8293515358361775, "grad_norm": 3.4092857837677, "learning_rate": 0.0007235494880546076, "loss": 6.4763, "step": 2430 }, { "epoch": 0.8296928327645051, "grad_norm": 3.2129056453704834, "learning_rate": 0.0007234357224118317, "loss": 6.4405, "step": 2431 }, { "epoch": 0.8300341296928327, "grad_norm": 3.3604624271392822, "learning_rate": 0.0007233219567690558, "loss": 6.5199, "step": 2432 }, { "epoch": 0.8303754266211604, "grad_norm": 3.6818511486053467, "learning_rate": 0.0007232081911262799, "loss": 6.1862, "step": 2433 }, { "epoch": 0.830716723549488, "grad_norm": 3.174171209335327, "learning_rate": 0.000723094425483504, "loss": 6.5523, "step": 2434 }, { "epoch": 0.8310580204778157, "grad_norm": 3.3606784343719482, "learning_rate": 0.0007229806598407282, "loss": 6.6429, "step": 2435 }, { "epoch": 0.8313993174061434, "grad_norm": 3.436796188354492, "learning_rate": 0.0007228668941979523, "loss": 6.749, "step": 2436 }, { "epoch": 0.831740614334471, "grad_norm": 3.20074462890625, "learning_rate": 0.0007227531285551764, "loss": 6.6947, "step": 2437 }, { "epoch": 0.8320819112627986, "grad_norm": 3.3540759086608887, "learning_rate": 0.0007226393629124005, "loss": 6.7936, "step": 2438 }, { "epoch": 0.8324232081911263, "grad_norm": 12.003594398498535, "learning_rate": 0.0007225255972696246, "loss": 6.054, "step": 2439 }, { "epoch": 0.8327645051194539, "grad_norm": 3.418896198272705, "learning_rate": 0.0007224118316268487, "loss": 6.8313, "step": 2440 }, { "epoch": 0.8331058020477816, "grad_norm": 3.5334503650665283, "learning_rate": 0.0007222980659840728, "loss": 6.7967, "step": 2441 }, { "epoch": 0.8334470989761092, "grad_norm": 3.854482412338257, "learning_rate": 0.0007221843003412969, "loss": 5.6593, "step": 2442 }, { "epoch": 0.8337883959044369, "grad_norm": 3.3690783977508545, "learning_rate": 0.000722070534698521, "loss": 6.9381, "step": 2443 }, { "epoch": 0.8341296928327645, "grad_norm": 4.998669624328613, "learning_rate": 0.0007219567690557451, "loss": 5.7073, "step": 2444 }, { "epoch": 0.8344709897610921, "grad_norm": 3.3754959106445312, "learning_rate": 0.0007218430034129692, "loss": 6.9455, "step": 2445 }, { "epoch": 0.8348122866894198, "grad_norm": 3.3395328521728516, "learning_rate": 0.0007217292377701933, "loss": 6.6405, "step": 2446 }, { "epoch": 0.8351535836177474, "grad_norm": 3.234178066253662, "learning_rate": 0.0007216154721274176, "loss": 7.0086, "step": 2447 }, { "epoch": 0.8354948805460751, "grad_norm": 3.2380173206329346, "learning_rate": 0.0007215017064846417, "loss": 6.4784, "step": 2448 }, { "epoch": 0.8358361774744028, "grad_norm": 3.333740472793579, "learning_rate": 0.0007213879408418658, "loss": 6.5965, "step": 2449 }, { "epoch": 0.8361774744027304, "grad_norm": 3.263247489929199, "learning_rate": 0.0007212741751990899, "loss": 6.589, "step": 2450 }, { "epoch": 0.836518771331058, "grad_norm": 3.6883716583251953, "learning_rate": 0.000721160409556314, "loss": 6.2031, "step": 2451 }, { "epoch": 0.8368600682593856, "grad_norm": 3.3364152908325195, "learning_rate": 0.0007210466439135381, "loss": 6.6068, "step": 2452 }, { "epoch": 0.8372013651877133, "grad_norm": 3.7093443870544434, "learning_rate": 0.0007209328782707623, "loss": 6.4188, "step": 2453 }, { "epoch": 0.837542662116041, "grad_norm": 3.4039225578308105, "learning_rate": 0.0007208191126279864, "loss": 6.5616, "step": 2454 }, { "epoch": 0.8378839590443686, "grad_norm": 3.357640027999878, "learning_rate": 0.0007207053469852105, "loss": 6.5854, "step": 2455 }, { "epoch": 0.8382252559726963, "grad_norm": 4.375115871429443, "learning_rate": 0.0007205915813424346, "loss": 5.801, "step": 2456 }, { "epoch": 0.8385665529010239, "grad_norm": 3.4280662536621094, "learning_rate": 0.0007204778156996587, "loss": 6.8118, "step": 2457 }, { "epoch": 0.8389078498293515, "grad_norm": 3.6396632194519043, "learning_rate": 0.0007203640500568829, "loss": 6.6882, "step": 2458 }, { "epoch": 0.8392491467576791, "grad_norm": 3.238856792449951, "learning_rate": 0.000720250284414107, "loss": 6.3282, "step": 2459 }, { "epoch": 0.8395904436860068, "grad_norm": 3.151153087615967, "learning_rate": 0.0007201365187713311, "loss": 6.816, "step": 2460 }, { "epoch": 0.8399317406143345, "grad_norm": 4.557154178619385, "learning_rate": 0.0007200227531285551, "loss": 6.6596, "step": 2461 }, { "epoch": 0.8402730375426621, "grad_norm": 3.225273609161377, "learning_rate": 0.0007199089874857792, "loss": 6.5537, "step": 2462 }, { "epoch": 0.8406143344709898, "grad_norm": 3.4817261695861816, "learning_rate": 0.0007197952218430033, "loss": 6.567, "step": 2463 }, { "epoch": 0.8409556313993174, "grad_norm": 3.3386945724487305, "learning_rate": 0.0007196814562002276, "loss": 7.1524, "step": 2464 }, { "epoch": 0.841296928327645, "grad_norm": 3.4559755325317383, "learning_rate": 0.0007195676905574517, "loss": 6.4858, "step": 2465 }, { "epoch": 0.8416382252559726, "grad_norm": 3.230203866958618, "learning_rate": 0.0007194539249146758, "loss": 6.6456, "step": 2466 }, { "epoch": 0.8419795221843004, "grad_norm": 3.321943998336792, "learning_rate": 0.0007193401592718999, "loss": 6.6494, "step": 2467 }, { "epoch": 0.842320819112628, "grad_norm": 3.304291248321533, "learning_rate": 0.000719226393629124, "loss": 6.7956, "step": 2468 }, { "epoch": 0.8426621160409556, "grad_norm": 3.3571488857269287, "learning_rate": 0.0007191126279863481, "loss": 6.8112, "step": 2469 }, { "epoch": 0.8430034129692833, "grad_norm": 3.388721466064453, "learning_rate": 0.0007189988623435723, "loss": 6.8564, "step": 2470 }, { "epoch": 0.8433447098976109, "grad_norm": 3.430446147918701, "learning_rate": 0.0007188850967007964, "loss": 6.7175, "step": 2471 }, { "epoch": 0.8436860068259385, "grad_norm": 3.3103549480438232, "learning_rate": 0.0007187713310580205, "loss": 6.5384, "step": 2472 }, { "epoch": 0.8440273037542663, "grad_norm": 7.369909763336182, "learning_rate": 0.0007186575654152446, "loss": 5.398, "step": 2473 }, { "epoch": 0.8443686006825939, "grad_norm": 3.683732748031616, "learning_rate": 0.0007185437997724687, "loss": 6.4451, "step": 2474 }, { "epoch": 0.8447098976109215, "grad_norm": 4.542445659637451, "learning_rate": 0.0007184300341296928, "loss": 6.101, "step": 2475 }, { "epoch": 0.8450511945392492, "grad_norm": 4.248722553253174, "learning_rate": 0.000718316268486917, "loss": 6.0299, "step": 2476 }, { "epoch": 0.8453924914675768, "grad_norm": 3.940197467803955, "learning_rate": 0.0007182025028441411, "loss": 5.5218, "step": 2477 }, { "epoch": 0.8457337883959044, "grad_norm": 3.645324945449829, "learning_rate": 0.0007180887372013652, "loss": 6.9645, "step": 2478 }, { "epoch": 0.846075085324232, "grad_norm": 5.713097095489502, "learning_rate": 0.0007179749715585894, "loss": 5.7437, "step": 2479 }, { "epoch": 0.8464163822525598, "grad_norm": 3.8885574340820312, "learning_rate": 0.0007178612059158133, "loss": 6.7958, "step": 2480 }, { "epoch": 0.8467576791808874, "grad_norm": 3.430229902267456, "learning_rate": 0.0007177474402730376, "loss": 6.7616, "step": 2481 }, { "epoch": 0.847098976109215, "grad_norm": 3.286057472229004, "learning_rate": 0.0007176336746302617, "loss": 6.6936, "step": 2482 }, { "epoch": 0.8474402730375427, "grad_norm": 6.800687313079834, "learning_rate": 0.0007175199089874858, "loss": 5.4928, "step": 2483 }, { "epoch": 0.8477815699658703, "grad_norm": 6.051274299621582, "learning_rate": 0.0007174061433447099, "loss": 6.3143, "step": 2484 }, { "epoch": 0.8481228668941979, "grad_norm": 3.3994340896606445, "learning_rate": 0.000717292377701934, "loss": 6.5889, "step": 2485 }, { "epoch": 0.8484641638225257, "grad_norm": 3.665647029876709, "learning_rate": 0.0007171786120591581, "loss": 5.8222, "step": 2486 }, { "epoch": 0.8488054607508533, "grad_norm": 3.3410794734954834, "learning_rate": 0.0007170648464163823, "loss": 6.8781, "step": 2487 }, { "epoch": 0.8491467576791809, "grad_norm": 3.42598819732666, "learning_rate": 0.0007169510807736064, "loss": 6.8923, "step": 2488 }, { "epoch": 0.8494880546075085, "grad_norm": 7.492861270904541, "learning_rate": 0.0007168373151308305, "loss": 7.0957, "step": 2489 }, { "epoch": 0.8498293515358362, "grad_norm": 3.137038469314575, "learning_rate": 0.0007167235494880546, "loss": 6.6471, "step": 2490 }, { "epoch": 0.8501706484641638, "grad_norm": 3.3882358074188232, "learning_rate": 0.0007166097838452787, "loss": 6.782, "step": 2491 }, { "epoch": 0.8505119453924914, "grad_norm": 3.2121896743774414, "learning_rate": 0.0007164960182025028, "loss": 7.047, "step": 2492 }, { "epoch": 0.8508532423208192, "grad_norm": 3.2568767070770264, "learning_rate": 0.000716382252559727, "loss": 6.6304, "step": 2493 }, { "epoch": 0.8511945392491468, "grad_norm": 3.2692127227783203, "learning_rate": 0.0007162684869169511, "loss": 6.4022, "step": 2494 }, { "epoch": 0.8515358361774744, "grad_norm": 3.5089681148529053, "learning_rate": 0.0007161547212741752, "loss": 6.5384, "step": 2495 }, { "epoch": 0.851877133105802, "grad_norm": 3.226477861404419, "learning_rate": 0.0007160409556313994, "loss": 6.6573, "step": 2496 }, { "epoch": 0.8522184300341297, "grad_norm": 3.2870302200317383, "learning_rate": 0.0007159271899886235, "loss": 6.9091, "step": 2497 }, { "epoch": 0.8525597269624573, "grad_norm": 3.368769884109497, "learning_rate": 0.0007158134243458477, "loss": 6.54, "step": 2498 }, { "epoch": 0.852901023890785, "grad_norm": 7.26986837387085, "learning_rate": 0.0007156996587030717, "loss": 5.2813, "step": 2499 }, { "epoch": 0.8532423208191127, "grad_norm": 3.4235429763793945, "learning_rate": 0.0007155858930602958, "loss": 6.5904, "step": 2500 }, { "epoch": 0.8535836177474403, "grad_norm": 3.4683103561401367, "learning_rate": 0.0007154721274175199, "loss": 6.8818, "step": 2501 }, { "epoch": 0.8539249146757679, "grad_norm": 3.259256362915039, "learning_rate": 0.000715358361774744, "loss": 6.5643, "step": 2502 }, { "epoch": 0.8542662116040955, "grad_norm": 3.19038724899292, "learning_rate": 0.0007152445961319681, "loss": 6.4574, "step": 2503 }, { "epoch": 0.8546075085324232, "grad_norm": 4.036156177520752, "learning_rate": 0.0007151308304891923, "loss": 6.0228, "step": 2504 }, { "epoch": 0.8549488054607508, "grad_norm": 3.6171653270721436, "learning_rate": 0.0007150170648464164, "loss": 6.4731, "step": 2505 }, { "epoch": 0.8552901023890785, "grad_norm": 3.450333595275879, "learning_rate": 0.0007149032992036405, "loss": 6.3563, "step": 2506 }, { "epoch": 0.8556313993174062, "grad_norm": 3.4450273513793945, "learning_rate": 0.0007147895335608646, "loss": 6.9957, "step": 2507 }, { "epoch": 0.8559726962457338, "grad_norm": 3.228724241256714, "learning_rate": 0.0007146757679180887, "loss": 6.4982, "step": 2508 }, { "epoch": 0.8563139931740614, "grad_norm": 3.283799409866333, "learning_rate": 0.0007145620022753128, "loss": 6.301, "step": 2509 }, { "epoch": 0.856655290102389, "grad_norm": 3.258451223373413, "learning_rate": 0.000714448236632537, "loss": 6.2914, "step": 2510 }, { "epoch": 0.8569965870307167, "grad_norm": 5.3775153160095215, "learning_rate": 0.0007143344709897611, "loss": 6.2267, "step": 2511 }, { "epoch": 0.8573378839590444, "grad_norm": 3.5916664600372314, "learning_rate": 0.0007142207053469852, "loss": 6.7227, "step": 2512 }, { "epoch": 0.857679180887372, "grad_norm": 5.10945987701416, "learning_rate": 0.0007141069397042094, "loss": 6.2029, "step": 2513 }, { "epoch": 0.8580204778156997, "grad_norm": 3.6190226078033447, "learning_rate": 0.0007139931740614335, "loss": 6.0994, "step": 2514 }, { "epoch": 0.8583617747440273, "grad_norm": 3.3382725715637207, "learning_rate": 0.0007138794084186576, "loss": 6.891, "step": 2515 }, { "epoch": 0.8587030716723549, "grad_norm": 3.399120569229126, "learning_rate": 0.0007137656427758818, "loss": 6.8522, "step": 2516 }, { "epoch": 0.8590443686006826, "grad_norm": 3.6390433311462402, "learning_rate": 0.0007136518771331059, "loss": 6.1696, "step": 2517 }, { "epoch": 0.8593856655290102, "grad_norm": 9.467489242553711, "learning_rate": 0.00071353811149033, "loss": 6.8595, "step": 2518 }, { "epoch": 0.8597269624573379, "grad_norm": 3.805907726287842, "learning_rate": 0.000713424345847554, "loss": 6.7477, "step": 2519 }, { "epoch": 0.8600682593856656, "grad_norm": 3.396374464035034, "learning_rate": 0.0007133105802047781, "loss": 6.3055, "step": 2520 }, { "epoch": 0.8604095563139932, "grad_norm": 3.43021821975708, "learning_rate": 0.0007131968145620023, "loss": 6.5407, "step": 2521 }, { "epoch": 0.8607508532423208, "grad_norm": 6.201378345489502, "learning_rate": 0.0007130830489192264, "loss": 5.8928, "step": 2522 }, { "epoch": 0.8610921501706484, "grad_norm": 3.4402365684509277, "learning_rate": 0.0007129692832764505, "loss": 6.5171, "step": 2523 }, { "epoch": 0.8614334470989761, "grad_norm": 3.8701541423797607, "learning_rate": 0.0007128555176336746, "loss": 6.4958, "step": 2524 }, { "epoch": 0.8617747440273038, "grad_norm": 3.2781641483306885, "learning_rate": 0.0007127417519908987, "loss": 6.8188, "step": 2525 }, { "epoch": 0.8621160409556314, "grad_norm": 3.67041277885437, "learning_rate": 0.0007126279863481228, "loss": 5.8762, "step": 2526 }, { "epoch": 0.8624573378839591, "grad_norm": 3.6243369579315186, "learning_rate": 0.000712514220705347, "loss": 6.3403, "step": 2527 }, { "epoch": 0.8627986348122867, "grad_norm": 3.2596120834350586, "learning_rate": 0.0007124004550625711, "loss": 6.812, "step": 2528 }, { "epoch": 0.8631399317406143, "grad_norm": 4.878437519073486, "learning_rate": 0.0007122866894197952, "loss": 6.3827, "step": 2529 }, { "epoch": 0.863481228668942, "grad_norm": 2.523153305053711, "learning_rate": 0.0007121729237770194, "loss": 3.6518, "step": 2530 }, { "epoch": 0.8638225255972696, "grad_norm": 3.9644103050231934, "learning_rate": 0.0007120591581342435, "loss": 5.9803, "step": 2531 }, { "epoch": 0.8641638225255973, "grad_norm": 3.464620590209961, "learning_rate": 0.0007119453924914676, "loss": 6.6308, "step": 2532 }, { "epoch": 0.8645051194539249, "grad_norm": 3.587636709213257, "learning_rate": 0.0007118316268486918, "loss": 7.2469, "step": 2533 }, { "epoch": 0.8648464163822526, "grad_norm": 3.188854217529297, "learning_rate": 0.0007117178612059159, "loss": 6.3745, "step": 2534 }, { "epoch": 0.8651877133105802, "grad_norm": 3.3936569690704346, "learning_rate": 0.00071160409556314, "loss": 7.0373, "step": 2535 }, { "epoch": 0.8655290102389078, "grad_norm": 3.3409032821655273, "learning_rate": 0.0007114903299203641, "loss": 6.6337, "step": 2536 }, { "epoch": 0.8658703071672355, "grad_norm": 5.52333402633667, "learning_rate": 0.0007113765642775882, "loss": 4.6133, "step": 2537 }, { "epoch": 0.8662116040955632, "grad_norm": 3.3822033405303955, "learning_rate": 0.0007112627986348122, "loss": 6.1598, "step": 2538 }, { "epoch": 0.8665529010238908, "grad_norm": 3.8111062049865723, "learning_rate": 0.0007111490329920364, "loss": 6.0937, "step": 2539 }, { "epoch": 0.8668941979522184, "grad_norm": 4.7204909324646, "learning_rate": 0.0007110352673492605, "loss": 5.8796, "step": 2540 }, { "epoch": 0.8672354948805461, "grad_norm": 3.3463664054870605, "learning_rate": 0.0007109215017064846, "loss": 6.7194, "step": 2541 }, { "epoch": 0.8675767918088737, "grad_norm": 3.3839924335479736, "learning_rate": 0.0007108077360637087, "loss": 6.8464, "step": 2542 }, { "epoch": 0.8679180887372013, "grad_norm": 4.8690571784973145, "learning_rate": 0.0007106939704209328, "loss": 5.8016, "step": 2543 }, { "epoch": 0.868259385665529, "grad_norm": 3.3318896293640137, "learning_rate": 0.000710580204778157, "loss": 6.4526, "step": 2544 }, { "epoch": 0.8686006825938567, "grad_norm": 3.221559762954712, "learning_rate": 0.0007104664391353811, "loss": 6.6463, "step": 2545 }, { "epoch": 0.8689419795221843, "grad_norm": 3.1767921447753906, "learning_rate": 0.0007103526734926053, "loss": 6.4654, "step": 2546 }, { "epoch": 0.869283276450512, "grad_norm": 3.4493680000305176, "learning_rate": 0.0007102389078498294, "loss": 5.9502, "step": 2547 }, { "epoch": 0.8696245733788396, "grad_norm": 3.9862265586853027, "learning_rate": 0.0007101251422070535, "loss": 6.3229, "step": 2548 }, { "epoch": 0.8699658703071672, "grad_norm": 3.4914379119873047, "learning_rate": 0.0007100113765642776, "loss": 6.9688, "step": 2549 }, { "epoch": 0.8703071672354948, "grad_norm": 3.332296371459961, "learning_rate": 0.0007098976109215018, "loss": 6.8452, "step": 2550 }, { "epoch": 0.8706484641638226, "grad_norm": 4.199538230895996, "learning_rate": 0.0007097838452787259, "loss": 6.881, "step": 2551 }, { "epoch": 0.8709897610921502, "grad_norm": 3.5314183235168457, "learning_rate": 0.00070967007963595, "loss": 5.8921, "step": 2552 }, { "epoch": 0.8713310580204778, "grad_norm": 3.459620237350464, "learning_rate": 0.0007095563139931741, "loss": 6.258, "step": 2553 }, { "epoch": 0.8716723549488055, "grad_norm": 3.3095757961273193, "learning_rate": 0.0007094425483503982, "loss": 6.7562, "step": 2554 }, { "epoch": 0.8720136518771331, "grad_norm": 3.2606141567230225, "learning_rate": 0.0007093287827076223, "loss": 6.7756, "step": 2555 }, { "epoch": 0.8723549488054607, "grad_norm": 3.6110377311706543, "learning_rate": 0.0007092150170648465, "loss": 6.3181, "step": 2556 }, { "epoch": 0.8726962457337883, "grad_norm": 3.273369550704956, "learning_rate": 0.0007091012514220706, "loss": 6.9579, "step": 2557 }, { "epoch": 0.8730375426621161, "grad_norm": 5.260756492614746, "learning_rate": 0.0007089874857792946, "loss": 5.2098, "step": 2558 }, { "epoch": 0.8733788395904437, "grad_norm": 3.3652751445770264, "learning_rate": 0.0007088737201365187, "loss": 6.8058, "step": 2559 }, { "epoch": 0.8737201365187713, "grad_norm": 3.266148805618286, "learning_rate": 0.0007087599544937428, "loss": 7.0574, "step": 2560 }, { "epoch": 0.874061433447099, "grad_norm": 3.58514666557312, "learning_rate": 0.000708646188850967, "loss": 6.6256, "step": 2561 }, { "epoch": 0.8744027303754266, "grad_norm": 3.195068597793579, "learning_rate": 0.0007085324232081911, "loss": 6.6367, "step": 2562 }, { "epoch": 0.8747440273037542, "grad_norm": 4.282649040222168, "learning_rate": 0.0007084186575654153, "loss": 4.9499, "step": 2563 }, { "epoch": 0.875085324232082, "grad_norm": 3.2996411323547363, "learning_rate": 0.0007083048919226394, "loss": 6.4127, "step": 2564 }, { "epoch": 0.8754266211604096, "grad_norm": 3.3839046955108643, "learning_rate": 0.0007081911262798635, "loss": 6.6752, "step": 2565 }, { "epoch": 0.8757679180887372, "grad_norm": 3.211970806121826, "learning_rate": 0.0007080773606370876, "loss": 6.1393, "step": 2566 }, { "epoch": 0.8761092150170648, "grad_norm": 3.1798558235168457, "learning_rate": 0.0007079635949943118, "loss": 6.6951, "step": 2567 }, { "epoch": 0.8764505119453925, "grad_norm": 3.2988522052764893, "learning_rate": 0.0007078498293515359, "loss": 6.2898, "step": 2568 }, { "epoch": 0.8767918088737201, "grad_norm": 3.8183929920196533, "learning_rate": 0.00070773606370876, "loss": 6.2697, "step": 2569 }, { "epoch": 0.8771331058020477, "grad_norm": 3.168560266494751, "learning_rate": 0.0007076222980659841, "loss": 6.2589, "step": 2570 }, { "epoch": 0.8774744027303755, "grad_norm": 3.605886459350586, "learning_rate": 0.0007075085324232082, "loss": 5.4431, "step": 2571 }, { "epoch": 0.8778156996587031, "grad_norm": 3.5225412845611572, "learning_rate": 0.0007073947667804323, "loss": 6.9552, "step": 2572 }, { "epoch": 0.8781569965870307, "grad_norm": 3.3253746032714844, "learning_rate": 0.0007072810011376565, "loss": 6.5634, "step": 2573 }, { "epoch": 0.8784982935153584, "grad_norm": 3.3605477809906006, "learning_rate": 0.0007071672354948806, "loss": 6.4666, "step": 2574 }, { "epoch": 0.878839590443686, "grad_norm": 3.1568098068237305, "learning_rate": 0.0007070534698521047, "loss": 6.6565, "step": 2575 }, { "epoch": 0.8791808873720136, "grad_norm": 3.7349367141723633, "learning_rate": 0.0007069397042093288, "loss": 6.1776, "step": 2576 }, { "epoch": 0.8795221843003413, "grad_norm": 3.3942160606384277, "learning_rate": 0.0007068259385665528, "loss": 6.32, "step": 2577 }, { "epoch": 0.879863481228669, "grad_norm": 3.2961456775665283, "learning_rate": 0.0007067121729237769, "loss": 6.7801, "step": 2578 }, { "epoch": 0.8802047781569966, "grad_norm": 3.3948254585266113, "learning_rate": 0.0007065984072810011, "loss": 6.0769, "step": 2579 }, { "epoch": 0.8805460750853242, "grad_norm": 3.3471062183380127, "learning_rate": 0.0007064846416382253, "loss": 5.8324, "step": 2580 }, { "epoch": 0.8808873720136519, "grad_norm": 3.5792579650878906, "learning_rate": 0.0007063708759954494, "loss": 6.313, "step": 2581 }, { "epoch": 0.8812286689419795, "grad_norm": 3.430663824081421, "learning_rate": 0.0007062571103526735, "loss": 6.2877, "step": 2582 }, { "epoch": 0.8815699658703071, "grad_norm": 5.736737251281738, "learning_rate": 0.0007061433447098976, "loss": 6.0753, "step": 2583 }, { "epoch": 0.8819112627986349, "grad_norm": 3.3799870014190674, "learning_rate": 0.0007060295790671218, "loss": 6.7596, "step": 2584 }, { "epoch": 0.8822525597269625, "grad_norm": 6.616389751434326, "learning_rate": 0.0007059158134243459, "loss": 5.9919, "step": 2585 }, { "epoch": 0.8825938566552901, "grad_norm": 3.429597854614258, "learning_rate": 0.00070580204778157, "loss": 6.3736, "step": 2586 }, { "epoch": 0.8829351535836177, "grad_norm": 3.608384847640991, "learning_rate": 0.0007056882821387941, "loss": 6.3896, "step": 2587 }, { "epoch": 0.8832764505119454, "grad_norm": 3.358494281768799, "learning_rate": 0.0007055745164960182, "loss": 7.1418, "step": 2588 }, { "epoch": 0.883617747440273, "grad_norm": 3.164523124694824, "learning_rate": 0.0007054607508532423, "loss": 6.8558, "step": 2589 }, { "epoch": 0.8839590443686007, "grad_norm": 3.5418014526367188, "learning_rate": 0.0007053469852104665, "loss": 6.2485, "step": 2590 }, { "epoch": 0.8843003412969284, "grad_norm": 3.181857109069824, "learning_rate": 0.0007052332195676906, "loss": 6.9945, "step": 2591 }, { "epoch": 0.884641638225256, "grad_norm": 3.445446014404297, "learning_rate": 0.0007051194539249147, "loss": 6.6083, "step": 2592 }, { "epoch": 0.8849829351535836, "grad_norm": 3.159823417663574, "learning_rate": 0.0007050056882821388, "loss": 6.6446, "step": 2593 }, { "epoch": 0.8853242320819112, "grad_norm": 5.284470081329346, "learning_rate": 0.0007048919226393629, "loss": 5.6566, "step": 2594 }, { "epoch": 0.8856655290102389, "grad_norm": 5.081126689910889, "learning_rate": 0.000704778156996587, "loss": 5.952, "step": 2595 }, { "epoch": 0.8860068259385665, "grad_norm": 3.5319552421569824, "learning_rate": 0.0007046643913538113, "loss": 6.2608, "step": 2596 }, { "epoch": 0.8863481228668942, "grad_norm": 3.5452115535736084, "learning_rate": 0.0007045506257110353, "loss": 7.0805, "step": 2597 }, { "epoch": 0.8866894197952219, "grad_norm": 3.5976240634918213, "learning_rate": 0.0007044368600682594, "loss": 6.3286, "step": 2598 }, { "epoch": 0.8870307167235495, "grad_norm": 3.242631435394287, "learning_rate": 0.0007043230944254835, "loss": 6.8221, "step": 2599 }, { "epoch": 0.8873720136518771, "grad_norm": 3.2306206226348877, "learning_rate": 0.0007042093287827076, "loss": 6.2416, "step": 2600 }, { "epoch": 0.8877133105802048, "grad_norm": 3.222269296646118, "learning_rate": 0.0007040955631399318, "loss": 6.9457, "step": 2601 }, { "epoch": 0.8880546075085324, "grad_norm": 3.364189863204956, "learning_rate": 0.0007039817974971559, "loss": 6.3123, "step": 2602 }, { "epoch": 0.8883959044368601, "grad_norm": 7.864152908325195, "learning_rate": 0.00070386803185438, "loss": 5.3723, "step": 2603 }, { "epoch": 0.8887372013651877, "grad_norm": 3.3091676235198975, "learning_rate": 0.0007037542662116041, "loss": 6.5701, "step": 2604 }, { "epoch": 0.8890784982935154, "grad_norm": 3.4476070404052734, "learning_rate": 0.0007036405005688282, "loss": 6.4836, "step": 2605 }, { "epoch": 0.889419795221843, "grad_norm": 3.41062593460083, "learning_rate": 0.0007035267349260523, "loss": 6.7782, "step": 2606 }, { "epoch": 0.8897610921501706, "grad_norm": 3.3153281211853027, "learning_rate": 0.0007034129692832765, "loss": 6.5789, "step": 2607 }, { "epoch": 0.8901023890784983, "grad_norm": 3.2125444412231445, "learning_rate": 0.0007032992036405006, "loss": 6.57, "step": 2608 }, { "epoch": 0.8904436860068259, "grad_norm": 3.210056781768799, "learning_rate": 0.0007031854379977247, "loss": 6.4377, "step": 2609 }, { "epoch": 0.8907849829351536, "grad_norm": 3.1639184951782227, "learning_rate": 0.0007030716723549488, "loss": 6.4598, "step": 2610 }, { "epoch": 0.8911262798634813, "grad_norm": 3.7884199619293213, "learning_rate": 0.0007029579067121729, "loss": 6.4981, "step": 2611 }, { "epoch": 0.8914675767918089, "grad_norm": 3.2961177825927734, "learning_rate": 0.000702844141069397, "loss": 6.8189, "step": 2612 }, { "epoch": 0.8918088737201365, "grad_norm": 3.3489444255828857, "learning_rate": 0.0007027303754266213, "loss": 6.3774, "step": 2613 }, { "epoch": 0.8921501706484641, "grad_norm": 3.563915967941284, "learning_rate": 0.0007026166097838454, "loss": 6.4961, "step": 2614 }, { "epoch": 0.8924914675767918, "grad_norm": 3.3189163208007812, "learning_rate": 0.0007025028441410695, "loss": 6.6371, "step": 2615 }, { "epoch": 0.8928327645051195, "grad_norm": 3.447849750518799, "learning_rate": 0.0007023890784982935, "loss": 5.8723, "step": 2616 }, { "epoch": 0.8931740614334471, "grad_norm": 3.2559688091278076, "learning_rate": 0.0007022753128555176, "loss": 6.4424, "step": 2617 }, { "epoch": 0.8935153583617748, "grad_norm": 4.270260334014893, "learning_rate": 0.0007021615472127417, "loss": 5.9736, "step": 2618 }, { "epoch": 0.8938566552901024, "grad_norm": 3.3759734630584717, "learning_rate": 0.0007020477815699659, "loss": 6.6424, "step": 2619 }, { "epoch": 0.89419795221843, "grad_norm": 3.839401960372925, "learning_rate": 0.00070193401592719, "loss": 5.8343, "step": 2620 }, { "epoch": 0.8945392491467576, "grad_norm": 3.4402377605438232, "learning_rate": 0.0007018202502844141, "loss": 5.9067, "step": 2621 }, { "epoch": 0.8948805460750853, "grad_norm": 3.8046011924743652, "learning_rate": 0.0007017064846416382, "loss": 6.205, "step": 2622 }, { "epoch": 0.895221843003413, "grad_norm": 3.5362050533294678, "learning_rate": 0.0007015927189988623, "loss": 6.9003, "step": 2623 }, { "epoch": 0.8955631399317406, "grad_norm": 3.417820930480957, "learning_rate": 0.0007014789533560865, "loss": 6.9618, "step": 2624 }, { "epoch": 0.8959044368600683, "grad_norm": 3.3132388591766357, "learning_rate": 0.0007013651877133106, "loss": 6.8951, "step": 2625 }, { "epoch": 0.8962457337883959, "grad_norm": 3.25205659866333, "learning_rate": 0.0007012514220705347, "loss": 6.361, "step": 2626 }, { "epoch": 0.8965870307167235, "grad_norm": 3.5494885444641113, "learning_rate": 0.0007011376564277588, "loss": 6.2906, "step": 2627 }, { "epoch": 0.8969283276450511, "grad_norm": 3.837991714477539, "learning_rate": 0.0007010238907849829, "loss": 5.26, "step": 2628 }, { "epoch": 0.8972696245733789, "grad_norm": 3.9714996814727783, "learning_rate": 0.000700910125142207, "loss": 6.2808, "step": 2629 }, { "epoch": 0.8976109215017065, "grad_norm": 3.556457042694092, "learning_rate": 0.0007007963594994313, "loss": 6.3822, "step": 2630 }, { "epoch": 0.8979522184300341, "grad_norm": 3.464637517929077, "learning_rate": 0.0007006825938566554, "loss": 6.9314, "step": 2631 }, { "epoch": 0.8982935153583618, "grad_norm": 6.831556797027588, "learning_rate": 0.0007005688282138795, "loss": 6.4999, "step": 2632 }, { "epoch": 0.8986348122866894, "grad_norm": 3.62048602104187, "learning_rate": 0.0007004550625711036, "loss": 6.0907, "step": 2633 }, { "epoch": 0.898976109215017, "grad_norm": 3.393540620803833, "learning_rate": 0.0007003412969283277, "loss": 6.192, "step": 2634 }, { "epoch": 0.8993174061433447, "grad_norm": 3.4911301136016846, "learning_rate": 0.0007002275312855518, "loss": 6.8963, "step": 2635 }, { "epoch": 0.8996587030716724, "grad_norm": 3.324739694595337, "learning_rate": 0.0007001137656427759, "loss": 6.5604, "step": 2636 }, { "epoch": 0.9, "grad_norm": 3.236996650695801, "learning_rate": 0.0007, "loss": 6.8306, "step": 2637 }, { "epoch": 0.9003412969283277, "grad_norm": 6.527902603149414, "learning_rate": 0.0006998862343572241, "loss": 4.9167, "step": 2638 }, { "epoch": 0.9006825938566553, "grad_norm": 3.5526578426361084, "learning_rate": 0.0006997724687144482, "loss": 6.7842, "step": 2639 }, { "epoch": 0.9010238907849829, "grad_norm": 3.3249645233154297, "learning_rate": 0.0006996587030716723, "loss": 6.6712, "step": 2640 }, { "epoch": 0.9013651877133105, "grad_norm": 3.207120656967163, "learning_rate": 0.0006995449374288964, "loss": 6.8788, "step": 2641 }, { "epoch": 0.9017064846416383, "grad_norm": 3.128549098968506, "learning_rate": 0.0006994311717861206, "loss": 6.5011, "step": 2642 }, { "epoch": 0.9020477815699659, "grad_norm": 3.4323368072509766, "learning_rate": 0.0006993174061433447, "loss": 6.3345, "step": 2643 }, { "epoch": 0.9023890784982935, "grad_norm": 3.2746779918670654, "learning_rate": 0.0006992036405005688, "loss": 6.3755, "step": 2644 }, { "epoch": 0.9027303754266212, "grad_norm": 8.446745872497559, "learning_rate": 0.0006990898748577929, "loss": 6.7535, "step": 2645 }, { "epoch": 0.9030716723549488, "grad_norm": 3.5905816555023193, "learning_rate": 0.000698976109215017, "loss": 6.7017, "step": 2646 }, { "epoch": 0.9034129692832764, "grad_norm": 3.572148084640503, "learning_rate": 0.0006988623435722413, "loss": 6.6064, "step": 2647 }, { "epoch": 0.903754266211604, "grad_norm": 5.641802787780762, "learning_rate": 0.0006987485779294654, "loss": 5.9658, "step": 2648 }, { "epoch": 0.9040955631399318, "grad_norm": 3.906459331512451, "learning_rate": 0.0006986348122866895, "loss": 6.1134, "step": 2649 }, { "epoch": 0.9044368600682594, "grad_norm": 3.3307929039001465, "learning_rate": 0.0006985210466439136, "loss": 6.8376, "step": 2650 }, { "epoch": 0.904778156996587, "grad_norm": 3.302304267883301, "learning_rate": 0.0006984072810011377, "loss": 6.762, "step": 2651 }, { "epoch": 0.9051194539249147, "grad_norm": 3.249640941619873, "learning_rate": 0.0006982935153583618, "loss": 6.7481, "step": 2652 }, { "epoch": 0.9054607508532423, "grad_norm": 5.366897106170654, "learning_rate": 0.000698179749715586, "loss": 6.1722, "step": 2653 }, { "epoch": 0.9058020477815699, "grad_norm": 3.280503749847412, "learning_rate": 0.0006980659840728101, "loss": 6.5592, "step": 2654 }, { "epoch": 0.9061433447098977, "grad_norm": 3.4887964725494385, "learning_rate": 0.0006979522184300341, "loss": 6.6677, "step": 2655 }, { "epoch": 0.9064846416382253, "grad_norm": 3.333489418029785, "learning_rate": 0.0006978384527872582, "loss": 5.8702, "step": 2656 }, { "epoch": 0.9068259385665529, "grad_norm": 3.1572794914245605, "learning_rate": 0.0006977246871444823, "loss": 6.6554, "step": 2657 }, { "epoch": 0.9071672354948805, "grad_norm": 3.2989954948425293, "learning_rate": 0.0006976109215017064, "loss": 6.5196, "step": 2658 }, { "epoch": 0.9075085324232082, "grad_norm": 5.108197212219238, "learning_rate": 0.0006974971558589306, "loss": 5.6353, "step": 2659 }, { "epoch": 0.9078498293515358, "grad_norm": 3.3741981983184814, "learning_rate": 0.0006973833902161547, "loss": 6.3594, "step": 2660 }, { "epoch": 0.9081911262798635, "grad_norm": 3.180333137512207, "learning_rate": 0.0006972696245733788, "loss": 6.3406, "step": 2661 }, { "epoch": 0.9085324232081912, "grad_norm": 3.803738832473755, "learning_rate": 0.0006971558589306029, "loss": 6.3439, "step": 2662 }, { "epoch": 0.9088737201365188, "grad_norm": 4.112486362457275, "learning_rate": 0.000697042093287827, "loss": 5.6222, "step": 2663 }, { "epoch": 0.9092150170648464, "grad_norm": 3.6349542140960693, "learning_rate": 0.0006969283276450513, "loss": 6.0037, "step": 2664 }, { "epoch": 0.909556313993174, "grad_norm": 3.6072964668273926, "learning_rate": 0.0006968145620022754, "loss": 6.6496, "step": 2665 }, { "epoch": 0.9098976109215017, "grad_norm": 3.841954469680786, "learning_rate": 0.0006967007963594995, "loss": 7.091, "step": 2666 }, { "epoch": 0.9102389078498293, "grad_norm": 3.223397970199585, "learning_rate": 0.0006965870307167236, "loss": 6.9502, "step": 2667 }, { "epoch": 0.910580204778157, "grad_norm": 3.3219428062438965, "learning_rate": 0.0006964732650739477, "loss": 6.7914, "step": 2668 }, { "epoch": 0.9109215017064847, "grad_norm": 3.22790265083313, "learning_rate": 0.0006963594994311718, "loss": 6.8888, "step": 2669 }, { "epoch": 0.9112627986348123, "grad_norm": 3.58799409866333, "learning_rate": 0.000696245733788396, "loss": 6.5231, "step": 2670 }, { "epoch": 0.9116040955631399, "grad_norm": 3.4284210205078125, "learning_rate": 0.0006961319681456201, "loss": 6.4324, "step": 2671 }, { "epoch": 0.9119453924914676, "grad_norm": 3.6052560806274414, "learning_rate": 0.0006960182025028442, "loss": 6.6546, "step": 2672 }, { "epoch": 0.9122866894197952, "grad_norm": 3.6261966228485107, "learning_rate": 0.0006959044368600683, "loss": 6.6569, "step": 2673 }, { "epoch": 0.9126279863481229, "grad_norm": 3.32460880279541, "learning_rate": 0.0006957906712172923, "loss": 6.5463, "step": 2674 }, { "epoch": 0.9129692832764505, "grad_norm": 3.767965078353882, "learning_rate": 0.0006956769055745164, "loss": 6.153, "step": 2675 }, { "epoch": 0.9133105802047782, "grad_norm": 3.2888388633728027, "learning_rate": 0.0006955631399317406, "loss": 6.5947, "step": 2676 }, { "epoch": 0.9136518771331058, "grad_norm": 3.235499620437622, "learning_rate": 0.0006954493742889647, "loss": 6.6224, "step": 2677 }, { "epoch": 0.9139931740614334, "grad_norm": 3.1884522438049316, "learning_rate": 0.0006953356086461888, "loss": 6.8578, "step": 2678 }, { "epoch": 0.9143344709897611, "grad_norm": 3.4016973972320557, "learning_rate": 0.0006952218430034129, "loss": 6.9575, "step": 2679 }, { "epoch": 0.9146757679180887, "grad_norm": 3.2565393447875977, "learning_rate": 0.000695108077360637, "loss": 7.086, "step": 2680 }, { "epoch": 0.9150170648464164, "grad_norm": 3.359285593032837, "learning_rate": 0.0006949943117178612, "loss": 6.695, "step": 2681 }, { "epoch": 0.9153583617747441, "grad_norm": 3.5385429859161377, "learning_rate": 0.0006948805460750854, "loss": 6.2872, "step": 2682 }, { "epoch": 0.9156996587030717, "grad_norm": 3.284391164779663, "learning_rate": 0.0006947667804323095, "loss": 6.5209, "step": 2683 }, { "epoch": 0.9160409556313993, "grad_norm": 3.260636329650879, "learning_rate": 0.0006946530147895336, "loss": 6.4423, "step": 2684 }, { "epoch": 0.9163822525597269, "grad_norm": 5.266411304473877, "learning_rate": 0.0006945392491467577, "loss": 5.6748, "step": 2685 }, { "epoch": 0.9167235494880546, "grad_norm": 3.5976195335388184, "learning_rate": 0.0006944254835039818, "loss": 6.3951, "step": 2686 }, { "epoch": 0.9170648464163823, "grad_norm": 3.503249406814575, "learning_rate": 0.000694311717861206, "loss": 6.8257, "step": 2687 }, { "epoch": 0.9174061433447099, "grad_norm": 3.3149096965789795, "learning_rate": 0.0006941979522184301, "loss": 6.1159, "step": 2688 }, { "epoch": 0.9177474402730376, "grad_norm": 3.2986981868743896, "learning_rate": 0.0006940841865756542, "loss": 6.1175, "step": 2689 }, { "epoch": 0.9180887372013652, "grad_norm": 3.357754707336426, "learning_rate": 0.0006939704209328783, "loss": 6.8085, "step": 2690 }, { "epoch": 0.9184300341296928, "grad_norm": 3.491800308227539, "learning_rate": 0.0006938566552901024, "loss": 6.4161, "step": 2691 }, { "epoch": 0.9187713310580204, "grad_norm": 3.2512035369873047, "learning_rate": 0.0006937428896473265, "loss": 6.8517, "step": 2692 }, { "epoch": 0.9191126279863481, "grad_norm": 3.1842398643493652, "learning_rate": 0.0006936291240045507, "loss": 7.1578, "step": 2693 }, { "epoch": 0.9194539249146758, "grad_norm": 3.4947540760040283, "learning_rate": 0.0006935153583617747, "loss": 6.0914, "step": 2694 }, { "epoch": 0.9197952218430034, "grad_norm": 8.686001777648926, "learning_rate": 0.0006934015927189988, "loss": 5.7208, "step": 2695 }, { "epoch": 0.9201365187713311, "grad_norm": 3.50386643409729, "learning_rate": 0.0006932878270762229, "loss": 6.766, "step": 2696 }, { "epoch": 0.9204778156996587, "grad_norm": 3.275815486907959, "learning_rate": 0.000693174061433447, "loss": 6.6614, "step": 2697 }, { "epoch": 0.9208191126279863, "grad_norm": 3.2420060634613037, "learning_rate": 0.0006930602957906712, "loss": 6.4655, "step": 2698 }, { "epoch": 0.921160409556314, "grad_norm": 3.386936902999878, "learning_rate": 0.0006929465301478954, "loss": 6.318, "step": 2699 }, { "epoch": 0.9215017064846417, "grad_norm": 5.493778705596924, "learning_rate": 0.0006928327645051195, "loss": 5.9732, "step": 2700 }, { "epoch": 0.9218430034129693, "grad_norm": 3.2191455364227295, "learning_rate": 0.0006927189988623436, "loss": 6.5306, "step": 2701 }, { "epoch": 0.922184300341297, "grad_norm": 3.3493263721466064, "learning_rate": 0.0006926052332195677, "loss": 6.4363, "step": 2702 }, { "epoch": 0.9225255972696246, "grad_norm": 3.3574280738830566, "learning_rate": 0.0006924914675767918, "loss": 6.9716, "step": 2703 }, { "epoch": 0.9228668941979522, "grad_norm": 3.3387627601623535, "learning_rate": 0.000692377701934016, "loss": 6.9231, "step": 2704 }, { "epoch": 0.9232081911262798, "grad_norm": 3.2208516597747803, "learning_rate": 0.0006922639362912401, "loss": 6.4118, "step": 2705 }, { "epoch": 0.9235494880546075, "grad_norm": 3.2908411026000977, "learning_rate": 0.0006921501706484642, "loss": 6.4098, "step": 2706 }, { "epoch": 0.9238907849829352, "grad_norm": 3.3244807720184326, "learning_rate": 0.0006920364050056883, "loss": 6.8572, "step": 2707 }, { "epoch": 0.9242320819112628, "grad_norm": 3.2083566188812256, "learning_rate": 0.0006919226393629124, "loss": 6.8654, "step": 2708 }, { "epoch": 0.9245733788395905, "grad_norm": 3.230299472808838, "learning_rate": 0.0006918088737201365, "loss": 6.6024, "step": 2709 }, { "epoch": 0.9249146757679181, "grad_norm": 3.518018960952759, "learning_rate": 0.0006916951080773607, "loss": 5.8393, "step": 2710 }, { "epoch": 0.9252559726962457, "grad_norm": 3.445234775543213, "learning_rate": 0.0006915813424345848, "loss": 6.0757, "step": 2711 }, { "epoch": 0.9255972696245733, "grad_norm": 3.329686403274536, "learning_rate": 0.000691467576791809, "loss": 6.8527, "step": 2712 }, { "epoch": 0.9259385665529011, "grad_norm": 3.224597215652466, "learning_rate": 0.0006913538111490329, "loss": 6.5644, "step": 2713 }, { "epoch": 0.9262798634812287, "grad_norm": 3.4149997234344482, "learning_rate": 0.000691240045506257, "loss": 6.7847, "step": 2714 }, { "epoch": 0.9266211604095563, "grad_norm": 3.3255600929260254, "learning_rate": 0.0006911262798634812, "loss": 6.5404, "step": 2715 }, { "epoch": 0.926962457337884, "grad_norm": 5.029857635498047, "learning_rate": 0.0006910125142207054, "loss": 5.6074, "step": 2716 }, { "epoch": 0.9273037542662116, "grad_norm": 4.0226149559021, "learning_rate": 0.0006908987485779295, "loss": 6.6629, "step": 2717 }, { "epoch": 0.9276450511945392, "grad_norm": 3.5831539630889893, "learning_rate": 0.0006907849829351536, "loss": 5.8483, "step": 2718 }, { "epoch": 0.9279863481228668, "grad_norm": 3.3607494831085205, "learning_rate": 0.0006906712172923777, "loss": 6.7091, "step": 2719 }, { "epoch": 0.9283276450511946, "grad_norm": 5.548701763153076, "learning_rate": 0.0006905574516496018, "loss": 5.6986, "step": 2720 }, { "epoch": 0.9286689419795222, "grad_norm": 3.317486047744751, "learning_rate": 0.0006904436860068259, "loss": 6.4468, "step": 2721 }, { "epoch": 0.9290102389078498, "grad_norm": 3.3964345455169678, "learning_rate": 0.0006903299203640501, "loss": 6.7458, "step": 2722 }, { "epoch": 0.9293515358361775, "grad_norm": 3.141096591949463, "learning_rate": 0.0006902161547212742, "loss": 6.8357, "step": 2723 }, { "epoch": 0.9296928327645051, "grad_norm": 3.1272401809692383, "learning_rate": 0.0006901023890784983, "loss": 6.2205, "step": 2724 }, { "epoch": 0.9300341296928327, "grad_norm": 3.82816743850708, "learning_rate": 0.0006899886234357224, "loss": 6.151, "step": 2725 }, { "epoch": 0.9303754266211605, "grad_norm": 3.0288915634155273, "learning_rate": 0.0006898748577929465, "loss": 6.5263, "step": 2726 }, { "epoch": 0.9307167235494881, "grad_norm": 3.344312906265259, "learning_rate": 0.0006897610921501707, "loss": 6.6451, "step": 2727 }, { "epoch": 0.9310580204778157, "grad_norm": 3.386958122253418, "learning_rate": 0.0006896473265073948, "loss": 6.5277, "step": 2728 }, { "epoch": 0.9313993174061433, "grad_norm": 3.474781036376953, "learning_rate": 0.000689533560864619, "loss": 6.0832, "step": 2729 }, { "epoch": 0.931740614334471, "grad_norm": 3.3939759731292725, "learning_rate": 0.000689419795221843, "loss": 7.0273, "step": 2730 }, { "epoch": 0.9320819112627986, "grad_norm": 3.276217460632324, "learning_rate": 0.0006893060295790672, "loss": 5.9832, "step": 2731 }, { "epoch": 0.9324232081911262, "grad_norm": 3.2596402168273926, "learning_rate": 0.0006891922639362913, "loss": 6.8106, "step": 2732 }, { "epoch": 0.932764505119454, "grad_norm": 3.262134313583374, "learning_rate": 0.0006890784982935154, "loss": 6.5364, "step": 2733 }, { "epoch": 0.9331058020477816, "grad_norm": 4.899016380310059, "learning_rate": 0.0006889647326507395, "loss": 6.1792, "step": 2734 }, { "epoch": 0.9334470989761092, "grad_norm": 4.251522064208984, "learning_rate": 0.0006888509670079636, "loss": 6.3958, "step": 2735 }, { "epoch": 0.9337883959044369, "grad_norm": 4.013020992279053, "learning_rate": 0.0006887372013651877, "loss": 6.1985, "step": 2736 }, { "epoch": 0.9341296928327645, "grad_norm": 3.3481905460357666, "learning_rate": 0.0006886234357224118, "loss": 7.0557, "step": 2737 }, { "epoch": 0.9344709897610921, "grad_norm": 3.201247215270996, "learning_rate": 0.0006885096700796359, "loss": 6.2043, "step": 2738 }, { "epoch": 0.9348122866894198, "grad_norm": 3.4401636123657227, "learning_rate": 0.0006883959044368601, "loss": 7.097, "step": 2739 }, { "epoch": 0.9351535836177475, "grad_norm": 3.1851375102996826, "learning_rate": 0.0006882821387940842, "loss": 6.761, "step": 2740 }, { "epoch": 0.9354948805460751, "grad_norm": 3.280695915222168, "learning_rate": 0.0006881683731513083, "loss": 7.2953, "step": 2741 }, { "epoch": 0.9358361774744027, "grad_norm": 3.2717418670654297, "learning_rate": 0.0006880546075085324, "loss": 6.71, "step": 2742 }, { "epoch": 0.9361774744027304, "grad_norm": 3.8960654735565186, "learning_rate": 0.0006879408418657565, "loss": 5.5993, "step": 2743 }, { "epoch": 0.936518771331058, "grad_norm": 3.261302947998047, "learning_rate": 0.0006878270762229807, "loss": 6.2783, "step": 2744 }, { "epoch": 0.9368600682593856, "grad_norm": 3.3171653747558594, "learning_rate": 0.0006877133105802048, "loss": 5.9773, "step": 2745 }, { "epoch": 0.9372013651877134, "grad_norm": 3.2578046321868896, "learning_rate": 0.000687599544937429, "loss": 6.0005, "step": 2746 }, { "epoch": 0.937542662116041, "grad_norm": 3.587801933288574, "learning_rate": 0.0006874857792946531, "loss": 6.7843, "step": 2747 }, { "epoch": 0.9378839590443686, "grad_norm": 3.3838870525360107, "learning_rate": 0.0006873720136518772, "loss": 6.8512, "step": 2748 }, { "epoch": 0.9382252559726962, "grad_norm": 3.3424386978149414, "learning_rate": 0.0006872582480091013, "loss": 6.5035, "step": 2749 }, { "epoch": 0.9385665529010239, "grad_norm": 4.673618793487549, "learning_rate": 0.0006871444823663255, "loss": 4.9139, "step": 2750 }, { "epoch": 0.9389078498293515, "grad_norm": 3.2579727172851562, "learning_rate": 0.0006870307167235496, "loss": 6.4092, "step": 2751 }, { "epoch": 0.9392491467576792, "grad_norm": 3.674421787261963, "learning_rate": 0.0006869169510807736, "loss": 6.4869, "step": 2752 }, { "epoch": 0.9395904436860069, "grad_norm": 3.2421398162841797, "learning_rate": 0.0006868031854379977, "loss": 6.3196, "step": 2753 }, { "epoch": 0.9399317406143345, "grad_norm": 3.4086074829101562, "learning_rate": 0.0006866894197952218, "loss": 6.2654, "step": 2754 }, { "epoch": 0.9402730375426621, "grad_norm": 3.3109261989593506, "learning_rate": 0.0006865756541524459, "loss": 6.3046, "step": 2755 }, { "epoch": 0.9406143344709897, "grad_norm": 3.2505524158477783, "learning_rate": 0.0006864618885096701, "loss": 6.3068, "step": 2756 }, { "epoch": 0.9409556313993174, "grad_norm": 3.0877838134765625, "learning_rate": 0.0006863481228668942, "loss": 6.4801, "step": 2757 }, { "epoch": 0.941296928327645, "grad_norm": 3.232034206390381, "learning_rate": 0.0006862343572241183, "loss": 6.1997, "step": 2758 }, { "epoch": 0.9416382252559727, "grad_norm": 3.7556653022766113, "learning_rate": 0.0006861205915813424, "loss": 6.0221, "step": 2759 }, { "epoch": 0.9419795221843004, "grad_norm": 3.408632278442383, "learning_rate": 0.0006860068259385665, "loss": 6.4331, "step": 2760 }, { "epoch": 0.942320819112628, "grad_norm": 6.045160293579102, "learning_rate": 0.0006858930602957906, "loss": 5.5115, "step": 2761 }, { "epoch": 0.9426621160409556, "grad_norm": 3.6110312938690186, "learning_rate": 0.0006857792946530148, "loss": 6.957, "step": 2762 }, { "epoch": 0.9430034129692833, "grad_norm": 3.6176974773406982, "learning_rate": 0.000685665529010239, "loss": 6.2635, "step": 2763 }, { "epoch": 0.9433447098976109, "grad_norm": 3.2292110919952393, "learning_rate": 0.0006855517633674631, "loss": 6.4325, "step": 2764 }, { "epoch": 0.9436860068259386, "grad_norm": 3.226062536239624, "learning_rate": 0.0006854379977246872, "loss": 6.1578, "step": 2765 }, { "epoch": 0.9440273037542662, "grad_norm": 3.0933258533477783, "learning_rate": 0.0006853242320819113, "loss": 6.3193, "step": 2766 }, { "epoch": 0.9443686006825939, "grad_norm": 3.3063440322875977, "learning_rate": 0.0006852104664391355, "loss": 6.4354, "step": 2767 }, { "epoch": 0.9447098976109215, "grad_norm": 7.300811290740967, "learning_rate": 0.0006850967007963596, "loss": 5.611, "step": 2768 }, { "epoch": 0.9450511945392491, "grad_norm": 3.557973623275757, "learning_rate": 0.0006849829351535837, "loss": 6.5173, "step": 2769 }, { "epoch": 0.9453924914675768, "grad_norm": 2.229107141494751, "learning_rate": 0.0006848691695108078, "loss": 3.489, "step": 2770 }, { "epoch": 0.9457337883959044, "grad_norm": 3.4767768383026123, "learning_rate": 0.0006847554038680319, "loss": 6.4509, "step": 2771 }, { "epoch": 0.9460750853242321, "grad_norm": 3.309904098510742, "learning_rate": 0.0006846416382252559, "loss": 6.3209, "step": 2772 }, { "epoch": 0.9464163822525598, "grad_norm": 3.6992416381835938, "learning_rate": 0.0006845278725824801, "loss": 6.4455, "step": 2773 }, { "epoch": 0.9467576791808874, "grad_norm": 3.4120285511016846, "learning_rate": 0.0006844141069397042, "loss": 6.5287, "step": 2774 }, { "epoch": 0.947098976109215, "grad_norm": 3.3059194087982178, "learning_rate": 0.0006843003412969283, "loss": 6.4193, "step": 2775 }, { "epoch": 0.9474402730375426, "grad_norm": 3.727429151535034, "learning_rate": 0.0006841865756541524, "loss": 6.0036, "step": 2776 }, { "epoch": 0.9477815699658703, "grad_norm": 5.650639057159424, "learning_rate": 0.0006840728100113765, "loss": 5.4394, "step": 2777 }, { "epoch": 0.948122866894198, "grad_norm": 3.2986202239990234, "learning_rate": 0.0006839590443686006, "loss": 6.7952, "step": 2778 }, { "epoch": 0.9484641638225256, "grad_norm": 4.086126804351807, "learning_rate": 0.0006838452787258248, "loss": 4.7384, "step": 2779 }, { "epoch": 0.9488054607508533, "grad_norm": 3.3148763179779053, "learning_rate": 0.000683731513083049, "loss": 6.6121, "step": 2780 }, { "epoch": 0.9491467576791809, "grad_norm": 3.115473508834839, "learning_rate": 0.0006836177474402731, "loss": 6.375, "step": 2781 }, { "epoch": 0.9494880546075085, "grad_norm": 4.739993572235107, "learning_rate": 0.0006835039817974972, "loss": 6.0525, "step": 2782 }, { "epoch": 0.9498293515358361, "grad_norm": 3.0348520278930664, "learning_rate": 0.0006833902161547213, "loss": 6.6161, "step": 2783 }, { "epoch": 0.9501706484641638, "grad_norm": 4.478902339935303, "learning_rate": 0.0006832764505119454, "loss": 6.0643, "step": 2784 }, { "epoch": 0.9505119453924915, "grad_norm": 3.3169867992401123, "learning_rate": 0.0006831626848691696, "loss": 6.4104, "step": 2785 }, { "epoch": 0.9508532423208191, "grad_norm": 3.3145945072174072, "learning_rate": 0.0006830489192263937, "loss": 6.6644, "step": 2786 }, { "epoch": 0.9511945392491468, "grad_norm": 3.361513614654541, "learning_rate": 0.0006829351535836178, "loss": 6.4348, "step": 2787 }, { "epoch": 0.9515358361774744, "grad_norm": 3.221954584121704, "learning_rate": 0.0006828213879408419, "loss": 6.8069, "step": 2788 }, { "epoch": 0.951877133105802, "grad_norm": 3.3777694702148438, "learning_rate": 0.000682707622298066, "loss": 5.9127, "step": 2789 }, { "epoch": 0.9522184300341296, "grad_norm": 4.066497325897217, "learning_rate": 0.0006825938566552902, "loss": 5.7392, "step": 2790 }, { "epoch": 0.9525597269624574, "grad_norm": 3.3121917247772217, "learning_rate": 0.0006824800910125142, "loss": 6.9516, "step": 2791 }, { "epoch": 0.952901023890785, "grad_norm": 3.3323752880096436, "learning_rate": 0.0006823663253697383, "loss": 6.7219, "step": 2792 }, { "epoch": 0.9532423208191126, "grad_norm": 3.7000746726989746, "learning_rate": 0.0006822525597269624, "loss": 6.541, "step": 2793 }, { "epoch": 0.9535836177474403, "grad_norm": 3.4040660858154297, "learning_rate": 0.0006821387940841865, "loss": 6.7015, "step": 2794 }, { "epoch": 0.9539249146757679, "grad_norm": 3.9156670570373535, "learning_rate": 0.0006820250284414106, "loss": 6.447, "step": 2795 }, { "epoch": 0.9542662116040955, "grad_norm": 3.4194421768188477, "learning_rate": 0.0006819112627986348, "loss": 6.6974, "step": 2796 }, { "epoch": 0.9546075085324232, "grad_norm": 4.750198841094971, "learning_rate": 0.000681797497155859, "loss": 6.2898, "step": 2797 }, { "epoch": 0.9549488054607509, "grad_norm": 3.528273582458496, "learning_rate": 0.0006816837315130831, "loss": 6.2098, "step": 2798 }, { "epoch": 0.9552901023890785, "grad_norm": 3.286182403564453, "learning_rate": 0.0006815699658703072, "loss": 6.7786, "step": 2799 }, { "epoch": 0.9556313993174061, "grad_norm": 3.299617052078247, "learning_rate": 0.0006814562002275313, "loss": 6.6818, "step": 2800 }, { "epoch": 0.9559726962457338, "grad_norm": 3.2190589904785156, "learning_rate": 0.0006813424345847554, "loss": 6.15, "step": 2801 }, { "epoch": 0.9563139931740614, "grad_norm": 3.2283973693847656, "learning_rate": 0.0006812286689419796, "loss": 5.7856, "step": 2802 }, { "epoch": 0.956655290102389, "grad_norm": 3.121652364730835, "learning_rate": 0.0006811149032992037, "loss": 6.5968, "step": 2803 }, { "epoch": 0.9569965870307168, "grad_norm": 3.343250274658203, "learning_rate": 0.0006810011376564278, "loss": 6.9726, "step": 2804 }, { "epoch": 0.9573378839590444, "grad_norm": 3.670442819595337, "learning_rate": 0.0006808873720136519, "loss": 5.8369, "step": 2805 }, { "epoch": 0.957679180887372, "grad_norm": 3.4261021614074707, "learning_rate": 0.000680773606370876, "loss": 6.7033, "step": 2806 }, { "epoch": 0.9580204778156997, "grad_norm": 3.3535799980163574, "learning_rate": 0.0006806598407281002, "loss": 5.2909, "step": 2807 }, { "epoch": 0.9583617747440273, "grad_norm": 3.462440252304077, "learning_rate": 0.0006805460750853243, "loss": 5.9633, "step": 2808 }, { "epoch": 0.9587030716723549, "grad_norm": 3.2325925827026367, "learning_rate": 0.0006804323094425484, "loss": 6.4796, "step": 2809 }, { "epoch": 0.9590443686006825, "grad_norm": 3.415300130844116, "learning_rate": 0.0006803185437997725, "loss": 6.298, "step": 2810 }, { "epoch": 0.9593856655290103, "grad_norm": 3.2603628635406494, "learning_rate": 0.0006802047781569965, "loss": 6.9542, "step": 2811 }, { "epoch": 0.9597269624573379, "grad_norm": 3.158876657485962, "learning_rate": 0.0006800910125142206, "loss": 6.7321, "step": 2812 }, { "epoch": 0.9600682593856655, "grad_norm": 3.137831687927246, "learning_rate": 0.0006799772468714448, "loss": 6.1844, "step": 2813 }, { "epoch": 0.9604095563139932, "grad_norm": 3.162550687789917, "learning_rate": 0.000679863481228669, "loss": 6.5377, "step": 2814 }, { "epoch": 0.9607508532423208, "grad_norm": 3.1445960998535156, "learning_rate": 0.0006797497155858931, "loss": 6.4721, "step": 2815 }, { "epoch": 0.9610921501706484, "grad_norm": 3.4892373085021973, "learning_rate": 0.0006796359499431172, "loss": 6.4251, "step": 2816 }, { "epoch": 0.9614334470989762, "grad_norm": 3.36893892288208, "learning_rate": 0.0006795221843003413, "loss": 5.9855, "step": 2817 }, { "epoch": 0.9617747440273038, "grad_norm": 3.3693227767944336, "learning_rate": 0.0006794084186575654, "loss": 6.8197, "step": 2818 }, { "epoch": 0.9621160409556314, "grad_norm": 3.2168221473693848, "learning_rate": 0.0006792946530147896, "loss": 6.6614, "step": 2819 }, { "epoch": 0.962457337883959, "grad_norm": 3.355677604675293, "learning_rate": 0.0006791808873720137, "loss": 6.1324, "step": 2820 }, { "epoch": 0.9627986348122867, "grad_norm": 3.28233003616333, "learning_rate": 0.0006790671217292378, "loss": 6.5953, "step": 2821 }, { "epoch": 0.9631399317406143, "grad_norm": 3.175426959991455, "learning_rate": 0.0006789533560864619, "loss": 6.4139, "step": 2822 }, { "epoch": 0.9634812286689419, "grad_norm": 3.2525055408477783, "learning_rate": 0.000678839590443686, "loss": 6.8946, "step": 2823 }, { "epoch": 0.9638225255972697, "grad_norm": 3.2364585399627686, "learning_rate": 0.0006787258248009101, "loss": 6.5195, "step": 2824 }, { "epoch": 0.9641638225255973, "grad_norm": 3.8817360401153564, "learning_rate": 0.0006786120591581343, "loss": 5.2455, "step": 2825 }, { "epoch": 0.9645051194539249, "grad_norm": 3.36077618598938, "learning_rate": 0.0006784982935153584, "loss": 5.7979, "step": 2826 }, { "epoch": 0.9648464163822525, "grad_norm": 3.421419620513916, "learning_rate": 0.0006783845278725825, "loss": 6.6912, "step": 2827 }, { "epoch": 0.9651877133105802, "grad_norm": 3.2399160861968994, "learning_rate": 0.0006782707622298066, "loss": 6.7359, "step": 2828 }, { "epoch": 0.9655290102389078, "grad_norm": 3.5829226970672607, "learning_rate": 0.0006781569965870307, "loss": 6.447, "step": 2829 }, { "epoch": 0.9658703071672355, "grad_norm": 3.9516849517822266, "learning_rate": 0.0006780432309442548, "loss": 6.1873, "step": 2830 }, { "epoch": 0.9662116040955632, "grad_norm": 3.371225595474243, "learning_rate": 0.000677929465301479, "loss": 6.8074, "step": 2831 }, { "epoch": 0.9665529010238908, "grad_norm": 3.1678028106689453, "learning_rate": 0.0006778156996587031, "loss": 6.8476, "step": 2832 }, { "epoch": 0.9668941979522184, "grad_norm": 3.185497283935547, "learning_rate": 0.0006777019340159272, "loss": 6.7461, "step": 2833 }, { "epoch": 0.967235494880546, "grad_norm": 6.401412487030029, "learning_rate": 0.0006775881683731513, "loss": 6.6854, "step": 2834 }, { "epoch": 0.9675767918088737, "grad_norm": 4.533454418182373, "learning_rate": 0.0006774744027303754, "loss": 5.6879, "step": 2835 }, { "epoch": 0.9679180887372013, "grad_norm": 4.112688064575195, "learning_rate": 0.0006773606370875996, "loss": 6.5312, "step": 2836 }, { "epoch": 0.968259385665529, "grad_norm": 3.4008233547210693, "learning_rate": 0.0006772468714448237, "loss": 6.5382, "step": 2837 }, { "epoch": 0.9686006825938567, "grad_norm": 3.319769859313965, "learning_rate": 0.0006771331058020478, "loss": 6.3743, "step": 2838 }, { "epoch": 0.9689419795221843, "grad_norm": 3.506112813949585, "learning_rate": 0.0006770193401592719, "loss": 5.7598, "step": 2839 }, { "epoch": 0.9692832764505119, "grad_norm": 3.2987313270568848, "learning_rate": 0.000676905574516496, "loss": 6.7194, "step": 2840 }, { "epoch": 0.9696245733788396, "grad_norm": 3.148381233215332, "learning_rate": 0.0006767918088737201, "loss": 6.5178, "step": 2841 }, { "epoch": 0.9699658703071672, "grad_norm": 3.224257230758667, "learning_rate": 0.0006766780432309443, "loss": 6.1293, "step": 2842 }, { "epoch": 0.9703071672354949, "grad_norm": 3.166309356689453, "learning_rate": 0.0006765642775881684, "loss": 6.7261, "step": 2843 }, { "epoch": 0.9706484641638226, "grad_norm": 3.201484441757202, "learning_rate": 0.0006764505119453925, "loss": 6.6831, "step": 2844 }, { "epoch": 0.9709897610921502, "grad_norm": 3.366837501525879, "learning_rate": 0.0006763367463026166, "loss": 6.6644, "step": 2845 }, { "epoch": 0.9713310580204778, "grad_norm": 3.205770969390869, "learning_rate": 0.0006762229806598407, "loss": 6.787, "step": 2846 }, { "epoch": 0.9716723549488054, "grad_norm": 3.400615692138672, "learning_rate": 0.000676109215017065, "loss": 6.4538, "step": 2847 }, { "epoch": 0.9720136518771331, "grad_norm": 3.295478343963623, "learning_rate": 0.0006759954493742891, "loss": 7.0216, "step": 2848 }, { "epoch": 0.9723549488054608, "grad_norm": 3.9448795318603516, "learning_rate": 0.0006758816837315131, "loss": 6.3134, "step": 2849 }, { "epoch": 0.9726962457337884, "grad_norm": 3.3810625076293945, "learning_rate": 0.0006757679180887372, "loss": 6.5216, "step": 2850 }, { "epoch": 0.9730375426621161, "grad_norm": 3.364837169647217, "learning_rate": 0.0006756541524459613, "loss": 6.4476, "step": 2851 }, { "epoch": 0.9733788395904437, "grad_norm": 3.2336301803588867, "learning_rate": 0.0006755403868031854, "loss": 6.19, "step": 2852 }, { "epoch": 0.9737201365187713, "grad_norm": 3.081376314163208, "learning_rate": 0.0006754266211604096, "loss": 6.6487, "step": 2853 }, { "epoch": 0.974061433447099, "grad_norm": 3.2751305103302, "learning_rate": 0.0006753128555176337, "loss": 6.3007, "step": 2854 }, { "epoch": 0.9744027303754266, "grad_norm": 6.972997665405273, "learning_rate": 0.0006751990898748578, "loss": 5.3055, "step": 2855 }, { "epoch": 0.9747440273037543, "grad_norm": 3.2816948890686035, "learning_rate": 0.0006750853242320819, "loss": 6.5868, "step": 2856 }, { "epoch": 0.9750853242320819, "grad_norm": 3.3620402812957764, "learning_rate": 0.000674971558589306, "loss": 6.5817, "step": 2857 }, { "epoch": 0.9754266211604096, "grad_norm": 3.404201030731201, "learning_rate": 0.0006748577929465301, "loss": 6.3418, "step": 2858 }, { "epoch": 0.9757679180887372, "grad_norm": 3.4882445335388184, "learning_rate": 0.0006747440273037543, "loss": 5.5313, "step": 2859 }, { "epoch": 0.9761092150170648, "grad_norm": 3.3639590740203857, "learning_rate": 0.0006746302616609784, "loss": 6.7607, "step": 2860 }, { "epoch": 0.9764505119453925, "grad_norm": 3.0582730770111084, "learning_rate": 0.0006745164960182025, "loss": 6.4016, "step": 2861 }, { "epoch": 0.9767918088737202, "grad_norm": 3.409369945526123, "learning_rate": 0.0006744027303754266, "loss": 6.8788, "step": 2862 }, { "epoch": 0.9771331058020478, "grad_norm": 3.20253849029541, "learning_rate": 0.0006742889647326507, "loss": 6.3911, "step": 2863 }, { "epoch": 0.9774744027303754, "grad_norm": 3.5735726356506348, "learning_rate": 0.0006741751990898749, "loss": 6.0568, "step": 2864 }, { "epoch": 0.9778156996587031, "grad_norm": 3.4013800621032715, "learning_rate": 0.0006740614334470991, "loss": 6.2177, "step": 2865 }, { "epoch": 0.9781569965870307, "grad_norm": 5.85659646987915, "learning_rate": 0.0006739476678043232, "loss": 5.692, "step": 2866 }, { "epoch": 0.9784982935153583, "grad_norm": 3.534287214279175, "learning_rate": 0.0006738339021615473, "loss": 6.3442, "step": 2867 }, { "epoch": 0.978839590443686, "grad_norm": 3.368276357650757, "learning_rate": 0.0006737201365187714, "loss": 6.6924, "step": 2868 }, { "epoch": 0.9791808873720137, "grad_norm": 3.3042497634887695, "learning_rate": 0.0006736063708759954, "loss": 6.2767, "step": 2869 }, { "epoch": 0.9795221843003413, "grad_norm": 3.2396910190582275, "learning_rate": 0.0006734926052332196, "loss": 6.6282, "step": 2870 }, { "epoch": 0.979863481228669, "grad_norm": 3.2214739322662354, "learning_rate": 0.0006733788395904437, "loss": 6.5097, "step": 2871 }, { "epoch": 0.9802047781569966, "grad_norm": 3.2005984783172607, "learning_rate": 0.0006732650739476678, "loss": 6.2032, "step": 2872 }, { "epoch": 0.9805460750853242, "grad_norm": 4.3526105880737305, "learning_rate": 0.0006731513083048919, "loss": 6.1474, "step": 2873 }, { "epoch": 0.9808873720136518, "grad_norm": 3.500005006790161, "learning_rate": 0.000673037542662116, "loss": 6.3671, "step": 2874 }, { "epoch": 0.9812286689419796, "grad_norm": 4.856510162353516, "learning_rate": 0.0006729237770193401, "loss": 5.3342, "step": 2875 }, { "epoch": 0.9815699658703072, "grad_norm": 3.3631317615509033, "learning_rate": 0.0006728100113765643, "loss": 6.9552, "step": 2876 }, { "epoch": 0.9819112627986348, "grad_norm": 4.488306999206543, "learning_rate": 0.0006726962457337884, "loss": 5.2524, "step": 2877 }, { "epoch": 0.9822525597269625, "grad_norm": 3.190113067626953, "learning_rate": 0.0006725824800910125, "loss": 6.5939, "step": 2878 }, { "epoch": 0.9825938566552901, "grad_norm": 3.2531468868255615, "learning_rate": 0.0006724687144482366, "loss": 6.8033, "step": 2879 }, { "epoch": 0.9829351535836177, "grad_norm": 3.396819591522217, "learning_rate": 0.0006723549488054607, "loss": 6.5321, "step": 2880 }, { "epoch": 0.9832764505119453, "grad_norm": 3.1268954277038574, "learning_rate": 0.0006722411831626849, "loss": 6.7329, "step": 2881 }, { "epoch": 0.9836177474402731, "grad_norm": 3.4589643478393555, "learning_rate": 0.0006721274175199091, "loss": 6.5176, "step": 2882 }, { "epoch": 0.9839590443686007, "grad_norm": 3.242070198059082, "learning_rate": 0.0006720136518771332, "loss": 6.6396, "step": 2883 }, { "epoch": 0.9843003412969283, "grad_norm": 3.2771873474121094, "learning_rate": 0.0006718998862343573, "loss": 6.3999, "step": 2884 }, { "epoch": 0.984641638225256, "grad_norm": 3.497062921524048, "learning_rate": 0.0006717861205915814, "loss": 6.2288, "step": 2885 }, { "epoch": 0.9849829351535836, "grad_norm": 3.2468268871307373, "learning_rate": 0.0006716723549488055, "loss": 6.7244, "step": 2886 }, { "epoch": 0.9853242320819112, "grad_norm": 3.181048631668091, "learning_rate": 0.0006715585893060296, "loss": 6.9261, "step": 2887 }, { "epoch": 0.985665529010239, "grad_norm": 3.220104455947876, "learning_rate": 0.0006714448236632537, "loss": 6.4713, "step": 2888 }, { "epoch": 0.9860068259385666, "grad_norm": 3.5669116973876953, "learning_rate": 0.0006713310580204778, "loss": 5.612, "step": 2889 }, { "epoch": 0.9863481228668942, "grad_norm": 3.239811897277832, "learning_rate": 0.0006712172923777019, "loss": 6.8196, "step": 2890 }, { "epoch": 0.9866894197952218, "grad_norm": 3.4331865310668945, "learning_rate": 0.000671103526734926, "loss": 6.6693, "step": 2891 }, { "epoch": 0.9870307167235495, "grad_norm": 3.3848814964294434, "learning_rate": 0.0006709897610921501, "loss": 7.0898, "step": 2892 }, { "epoch": 0.9873720136518771, "grad_norm": 3.2575674057006836, "learning_rate": 0.0006708759954493743, "loss": 6.7033, "step": 2893 }, { "epoch": 0.9877133105802047, "grad_norm": 3.440713882446289, "learning_rate": 0.0006707622298065984, "loss": 6.4287, "step": 2894 }, { "epoch": 0.9880546075085325, "grad_norm": 3.2217304706573486, "learning_rate": 0.0006706484641638225, "loss": 6.721, "step": 2895 }, { "epoch": 0.9883959044368601, "grad_norm": 3.2484257221221924, "learning_rate": 0.0006705346985210466, "loss": 6.8919, "step": 2896 }, { "epoch": 0.9887372013651877, "grad_norm": 3.206747055053711, "learning_rate": 0.0006704209328782707, "loss": 6.5002, "step": 2897 }, { "epoch": 0.9890784982935154, "grad_norm": 3.30210280418396, "learning_rate": 0.0006703071672354949, "loss": 6.0187, "step": 2898 }, { "epoch": 0.989419795221843, "grad_norm": 2.3465044498443604, "learning_rate": 0.0006701934015927191, "loss": 3.2803, "step": 2899 }, { "epoch": 0.9897610921501706, "grad_norm": 3.455019950866699, "learning_rate": 0.0006700796359499432, "loss": 6.937, "step": 2900 }, { "epoch": 0.9901023890784983, "grad_norm": 3.3712611198425293, "learning_rate": 0.0006699658703071673, "loss": 6.6826, "step": 2901 }, { "epoch": 0.990443686006826, "grad_norm": 3.372312068939209, "learning_rate": 0.0006698521046643914, "loss": 6.529, "step": 2902 }, { "epoch": 0.9907849829351536, "grad_norm": 3.750188112258911, "learning_rate": 0.0006697383390216155, "loss": 6.4456, "step": 2903 }, { "epoch": 0.9911262798634812, "grad_norm": 3.316756010055542, "learning_rate": 0.0006696245733788396, "loss": 6.311, "step": 2904 }, { "epoch": 0.9914675767918089, "grad_norm": 3.249242067337036, "learning_rate": 0.0006695108077360638, "loss": 6.9939, "step": 2905 }, { "epoch": 0.9918088737201365, "grad_norm": 3.5320539474487305, "learning_rate": 0.0006693970420932879, "loss": 6.3807, "step": 2906 }, { "epoch": 0.9921501706484641, "grad_norm": 3.2891733646392822, "learning_rate": 0.000669283276450512, "loss": 6.5872, "step": 2907 }, { "epoch": 0.9924914675767919, "grad_norm": 3.310703754425049, "learning_rate": 0.000669169510807736, "loss": 6.7432, "step": 2908 }, { "epoch": 0.9928327645051195, "grad_norm": 3.4947831630706787, "learning_rate": 0.0006690557451649601, "loss": 6.6943, "step": 2909 }, { "epoch": 0.9931740614334471, "grad_norm": 6.251656532287598, "learning_rate": 0.0006689419795221842, "loss": 5.9758, "step": 2910 }, { "epoch": 0.9935153583617747, "grad_norm": 3.236522912979126, "learning_rate": 0.0006688282138794084, "loss": 6.7063, "step": 2911 }, { "epoch": 0.9938566552901024, "grad_norm": 3.4270472526550293, "learning_rate": 0.0006687144482366325, "loss": 6.9056, "step": 2912 }, { "epoch": 0.99419795221843, "grad_norm": 3.3885722160339355, "learning_rate": 0.0006686006825938566, "loss": 6.6033, "step": 2913 }, { "epoch": 0.9945392491467577, "grad_norm": 3.3846211433410645, "learning_rate": 0.0006684869169510807, "loss": 6.8574, "step": 2914 }, { "epoch": 0.9948805460750854, "grad_norm": 3.4704039096832275, "learning_rate": 0.0006683731513083049, "loss": 6.2599, "step": 2915 }, { "epoch": 0.995221843003413, "grad_norm": 3.076455593109131, "learning_rate": 0.0006682593856655291, "loss": 6.8536, "step": 2916 }, { "epoch": 0.9955631399317406, "grad_norm": 3.1758480072021484, "learning_rate": 0.0006681456200227532, "loss": 6.7996, "step": 2917 }, { "epoch": 0.9959044368600682, "grad_norm": 4.0002851486206055, "learning_rate": 0.0006680318543799773, "loss": 5.2598, "step": 2918 }, { "epoch": 0.9962457337883959, "grad_norm": 5.545552730560303, "learning_rate": 0.0006679180887372014, "loss": 5.8838, "step": 2919 }, { "epoch": 0.9965870307167235, "grad_norm": 3.345064163208008, "learning_rate": 0.0006678043230944255, "loss": 6.566, "step": 2920 }, { "epoch": 0.9969283276450512, "grad_norm": 3.4302754402160645, "learning_rate": 0.0006676905574516496, "loss": 7.0484, "step": 2921 }, { "epoch": 0.9972696245733789, "grad_norm": 3.445356607437134, "learning_rate": 0.0006675767918088738, "loss": 6.7516, "step": 2922 }, { "epoch": 0.9976109215017065, "grad_norm": 3.258596181869507, "learning_rate": 0.0006674630261660979, "loss": 6.5921, "step": 2923 }, { "epoch": 0.9979522184300341, "grad_norm": 3.594090700149536, "learning_rate": 0.000667349260523322, "loss": 6.6817, "step": 2924 }, { "epoch": 0.9982935153583617, "grad_norm": 4.468551158905029, "learning_rate": 0.0006672354948805461, "loss": 6.0565, "step": 2925 }, { "epoch": 0.9986348122866894, "grad_norm": 3.330815076828003, "learning_rate": 0.0006671217292377702, "loss": 6.2681, "step": 2926 }, { "epoch": 0.9989761092150171, "grad_norm": 3.3018605709075928, "learning_rate": 0.0006670079635949942, "loss": 6.4861, "step": 2927 }, { "epoch": 0.9993174061433447, "grad_norm": 3.5475411415100098, "learning_rate": 0.0006668941979522184, "loss": 6.3887, "step": 2928 }, { "epoch": 0.9996587030716724, "grad_norm": 3.245288133621216, "learning_rate": 0.0006667804323094425, "loss": 6.7998, "step": 2929 }, { "epoch": 1.0, "grad_norm": 3.472442388534546, "learning_rate": 0.0006666666666666666, "loss": 6.3197, "step": 2930 }, { "epoch": 1.0003412969283276, "grad_norm": 3.2570908069610596, "learning_rate": 0.0006665529010238907, "loss": 6.4722, "step": 2931 }, { "epoch": 1.0006825938566553, "grad_norm": 3.1390857696533203, "learning_rate": 0.0006664391353811149, "loss": 6.8016, "step": 2932 }, { "epoch": 1.0010238907849829, "grad_norm": 3.2926814556121826, "learning_rate": 0.0006663253697383391, "loss": 6.5122, "step": 2933 }, { "epoch": 1.0013651877133105, "grad_norm": 4.434762477874756, "learning_rate": 0.0006662116040955632, "loss": 4.9442, "step": 2934 }, { "epoch": 1.0017064846416381, "grad_norm": 3.5702288150787354, "learning_rate": 0.0006660978384527873, "loss": 5.8109, "step": 2935 }, { "epoch": 1.0020477815699658, "grad_norm": 3.399153709411621, "learning_rate": 0.0006659840728100114, "loss": 7.0274, "step": 2936 }, { "epoch": 1.0023890784982936, "grad_norm": 3.211782455444336, "learning_rate": 0.0006658703071672355, "loss": 6.4682, "step": 2937 }, { "epoch": 1.0027303754266212, "grad_norm": 3.3782007694244385, "learning_rate": 0.0006657565415244596, "loss": 5.5324, "step": 2938 }, { "epoch": 1.0030716723549489, "grad_norm": 3.238039493560791, "learning_rate": 0.0006656427758816838, "loss": 6.6212, "step": 2939 }, { "epoch": 1.0034129692832765, "grad_norm": 3.5137596130371094, "learning_rate": 0.0006655290102389079, "loss": 6.15, "step": 2940 }, { "epoch": 1.0037542662116041, "grad_norm": 4.969235897064209, "learning_rate": 0.000665415244596132, "loss": 6.0722, "step": 2941 }, { "epoch": 1.0040955631399318, "grad_norm": 3.3056838512420654, "learning_rate": 0.0006653014789533561, "loss": 7.2703, "step": 2942 }, { "epoch": 1.0044368600682594, "grad_norm": 3.27779221534729, "learning_rate": 0.0006651877133105802, "loss": 6.3094, "step": 2943 }, { "epoch": 1.004778156996587, "grad_norm": 3.4663007259368896, "learning_rate": 0.0006650739476678043, "loss": 6.4624, "step": 2944 }, { "epoch": 1.0051194539249146, "grad_norm": 3.294342279434204, "learning_rate": 0.0006649601820250285, "loss": 6.1171, "step": 2945 }, { "epoch": 1.0054607508532423, "grad_norm": 3.324336051940918, "learning_rate": 0.0006648464163822526, "loss": 6.8682, "step": 2946 }, { "epoch": 1.00580204778157, "grad_norm": 3.631251335144043, "learning_rate": 0.0006647326507394766, "loss": 5.8187, "step": 2947 }, { "epoch": 1.0061433447098975, "grad_norm": 4.85386848449707, "learning_rate": 0.0006646188850967008, "loss": 5.7936, "step": 2948 }, { "epoch": 1.0064846416382252, "grad_norm": 3.450941324234009, "learning_rate": 0.0006645051194539249, "loss": 6.8983, "step": 2949 }, { "epoch": 1.006825938566553, "grad_norm": 3.294748544692993, "learning_rate": 0.000664391353811149, "loss": 5.9347, "step": 2950 }, { "epoch": 1.0071672354948806, "grad_norm": 3.2384605407714844, "learning_rate": 0.0006642775881683732, "loss": 6.8953, "step": 2951 }, { "epoch": 1.0075085324232083, "grad_norm": 3.294921875, "learning_rate": 0.0006641638225255973, "loss": 6.7133, "step": 2952 }, { "epoch": 1.0078498293515359, "grad_norm": 3.22776198387146, "learning_rate": 0.0006640500568828214, "loss": 6.7304, "step": 2953 }, { "epoch": 1.0081911262798635, "grad_norm": 4.246631145477295, "learning_rate": 0.0006639362912400455, "loss": 4.3664, "step": 2954 }, { "epoch": 1.0085324232081911, "grad_norm": 3.2894020080566406, "learning_rate": 0.0006638225255972696, "loss": 6.795, "step": 2955 }, { "epoch": 1.0088737201365188, "grad_norm": 3.3010354042053223, "learning_rate": 0.0006637087599544938, "loss": 6.5039, "step": 2956 }, { "epoch": 1.0092150170648464, "grad_norm": 3.403155565261841, "learning_rate": 0.0006635949943117179, "loss": 6.5983, "step": 2957 }, { "epoch": 1.009556313993174, "grad_norm": 3.287907123565674, "learning_rate": 0.000663481228668942, "loss": 5.8215, "step": 2958 }, { "epoch": 1.0098976109215017, "grad_norm": 3.528432607650757, "learning_rate": 0.0006633674630261661, "loss": 5.889, "step": 2959 }, { "epoch": 1.0102389078498293, "grad_norm": 3.363466739654541, "learning_rate": 0.0006632536973833902, "loss": 6.8933, "step": 2960 }, { "epoch": 1.010580204778157, "grad_norm": 3.36714243888855, "learning_rate": 0.0006631399317406143, "loss": 6.4269, "step": 2961 }, { "epoch": 1.0109215017064845, "grad_norm": 3.2045083045959473, "learning_rate": 0.0006630261660978385, "loss": 6.6594, "step": 2962 }, { "epoch": 1.0112627986348124, "grad_norm": 3.256004571914673, "learning_rate": 0.0006629124004550626, "loss": 6.5809, "step": 2963 }, { "epoch": 1.01160409556314, "grad_norm": 3.3133018016815186, "learning_rate": 0.0006627986348122868, "loss": 6.3003, "step": 2964 }, { "epoch": 1.0119453924914676, "grad_norm": 3.8093559741973877, "learning_rate": 0.0006626848691695109, "loss": 6.2441, "step": 2965 }, { "epoch": 1.0122866894197953, "grad_norm": 3.227069139480591, "learning_rate": 0.0006625711035267349, "loss": 6.5614, "step": 2966 }, { "epoch": 1.012627986348123, "grad_norm": 3.215060234069824, "learning_rate": 0.000662457337883959, "loss": 6.2936, "step": 2967 }, { "epoch": 1.0129692832764505, "grad_norm": 5.511234283447266, "learning_rate": 0.0006623435722411832, "loss": 6.3446, "step": 2968 }, { "epoch": 1.0133105802047782, "grad_norm": 3.6093709468841553, "learning_rate": 0.0006622298065984073, "loss": 7.026, "step": 2969 }, { "epoch": 1.0136518771331058, "grad_norm": 9.460335731506348, "learning_rate": 0.0006621160409556314, "loss": 8.476, "step": 2970 }, { "epoch": 1.0139931740614334, "grad_norm": 3.619230031967163, "learning_rate": 0.0006620022753128555, "loss": 6.4041, "step": 2971 }, { "epoch": 1.014334470989761, "grad_norm": 3.284637928009033, "learning_rate": 0.0006618885096700796, "loss": 6.6486, "step": 2972 }, { "epoch": 1.0146757679180887, "grad_norm": 3.4304845333099365, "learning_rate": 0.0006617747440273038, "loss": 5.978, "step": 2973 }, { "epoch": 1.0150170648464163, "grad_norm": 3.1258368492126465, "learning_rate": 0.0006616609783845279, "loss": 6.6721, "step": 2974 }, { "epoch": 1.015358361774744, "grad_norm": 3.1004960536956787, "learning_rate": 0.000661547212741752, "loss": 6.6683, "step": 2975 }, { "epoch": 1.0156996587030718, "grad_norm": 3.346923351287842, "learning_rate": 0.0006614334470989761, "loss": 6.4257, "step": 2976 }, { "epoch": 1.0160409556313994, "grad_norm": 3.8876302242279053, "learning_rate": 0.0006613196814562002, "loss": 6.0568, "step": 2977 }, { "epoch": 1.016382252559727, "grad_norm": 3.5111100673675537, "learning_rate": 0.0006612059158134243, "loss": 6.9239, "step": 2978 }, { "epoch": 1.0167235494880547, "grad_norm": 3.2837634086608887, "learning_rate": 0.0006610921501706485, "loss": 6.5992, "step": 2979 }, { "epoch": 1.0170648464163823, "grad_norm": 3.1712963581085205, "learning_rate": 0.0006609783845278727, "loss": 6.7537, "step": 2980 }, { "epoch": 1.01740614334471, "grad_norm": 3.12092924118042, "learning_rate": 0.0006608646188850968, "loss": 6.6126, "step": 2981 }, { "epoch": 1.0177474402730375, "grad_norm": 3.164799928665161, "learning_rate": 0.0006607508532423209, "loss": 6.2507, "step": 2982 }, { "epoch": 1.0180887372013652, "grad_norm": 3.2638497352600098, "learning_rate": 0.000660637087599545, "loss": 6.1015, "step": 2983 }, { "epoch": 1.0184300341296928, "grad_norm": 3.560190200805664, "learning_rate": 0.0006605233219567691, "loss": 6.388, "step": 2984 }, { "epoch": 1.0187713310580204, "grad_norm": 3.2741174697875977, "learning_rate": 0.0006604095563139933, "loss": 6.9384, "step": 2985 }, { "epoch": 1.019112627986348, "grad_norm": 6.3302788734436035, "learning_rate": 0.0006602957906712173, "loss": 5.3906, "step": 2986 }, { "epoch": 1.0194539249146757, "grad_norm": 3.5244359970092773, "learning_rate": 0.0006601820250284414, "loss": 6.3372, "step": 2987 }, { "epoch": 1.0197952218430033, "grad_norm": 3.4077272415161133, "learning_rate": 0.0006600682593856655, "loss": 6.0502, "step": 2988 }, { "epoch": 1.0201365187713312, "grad_norm": 3.206965446472168, "learning_rate": 0.0006599544937428896, "loss": 6.3437, "step": 2989 }, { "epoch": 1.0204778156996588, "grad_norm": 3.215886116027832, "learning_rate": 0.0006598407281001137, "loss": 6.6096, "step": 2990 }, { "epoch": 1.0208191126279864, "grad_norm": 3.386878728866577, "learning_rate": 0.0006597269624573379, "loss": 6.5003, "step": 2991 }, { "epoch": 1.021160409556314, "grad_norm": 3.419936180114746, "learning_rate": 0.000659613196814562, "loss": 6.5968, "step": 2992 }, { "epoch": 1.0215017064846417, "grad_norm": 3.197819471359253, "learning_rate": 0.0006594994311717861, "loss": 6.9043, "step": 2993 }, { "epoch": 1.0218430034129693, "grad_norm": 3.7721807956695557, "learning_rate": 0.0006593856655290102, "loss": 5.7042, "step": 2994 }, { "epoch": 1.022184300341297, "grad_norm": 3.4664015769958496, "learning_rate": 0.0006592718998862343, "loss": 6.1596, "step": 2995 }, { "epoch": 1.0225255972696246, "grad_norm": 3.259398937225342, "learning_rate": 0.0006591581342434585, "loss": 6.705, "step": 2996 }, { "epoch": 1.0228668941979522, "grad_norm": 3.5420732498168945, "learning_rate": 0.0006590443686006827, "loss": 5.5468, "step": 2997 }, { "epoch": 1.0232081911262798, "grad_norm": 3.332111120223999, "learning_rate": 0.0006589306029579068, "loss": 6.9041, "step": 2998 }, { "epoch": 1.0235494880546074, "grad_norm": 3.29581618309021, "learning_rate": 0.0006588168373151309, "loss": 6.0205, "step": 2999 }, { "epoch": 1.023890784982935, "grad_norm": 3.214153289794922, "learning_rate": 0.000658703071672355, "loss": 6.4507, "step": 3000 }, { "epoch": 1.0242320819112627, "grad_norm": 3.5588912963867188, "learning_rate": 0.0006585893060295791, "loss": 6.4715, "step": 3001 }, { "epoch": 1.0245733788395905, "grad_norm": 3.5324950218200684, "learning_rate": 0.0006584755403868033, "loss": 6.2474, "step": 3002 }, { "epoch": 1.0249146757679182, "grad_norm": 3.397630453109741, "learning_rate": 0.0006583617747440274, "loss": 6.9605, "step": 3003 }, { "epoch": 1.0252559726962458, "grad_norm": 3.475452423095703, "learning_rate": 0.0006582480091012515, "loss": 6.6207, "step": 3004 }, { "epoch": 1.0255972696245734, "grad_norm": 3.4406540393829346, "learning_rate": 0.0006581342434584755, "loss": 6.6004, "step": 3005 }, { "epoch": 1.025938566552901, "grad_norm": 5.332218170166016, "learning_rate": 0.0006580204778156996, "loss": 5.9674, "step": 3006 }, { "epoch": 1.0262798634812287, "grad_norm": 3.4023447036743164, "learning_rate": 0.0006579067121729237, "loss": 6.3335, "step": 3007 }, { "epoch": 1.0266211604095563, "grad_norm": 3.25628399848938, "learning_rate": 0.0006577929465301479, "loss": 6.2624, "step": 3008 }, { "epoch": 1.026962457337884, "grad_norm": 3.3259971141815186, "learning_rate": 0.000657679180887372, "loss": 6.3255, "step": 3009 }, { "epoch": 1.0273037542662116, "grad_norm": 4.986135482788086, "learning_rate": 0.0006575654152445961, "loss": 5.4787, "step": 3010 }, { "epoch": 1.0276450511945392, "grad_norm": 3.862529754638672, "learning_rate": 0.0006574516496018202, "loss": 5.2939, "step": 3011 }, { "epoch": 1.0279863481228668, "grad_norm": 3.2690622806549072, "learning_rate": 0.0006573378839590443, "loss": 6.5039, "step": 3012 }, { "epoch": 1.0283276450511944, "grad_norm": 3.9144980907440186, "learning_rate": 0.0006572241183162685, "loss": 5.961, "step": 3013 }, { "epoch": 1.028668941979522, "grad_norm": 3.58562970161438, "learning_rate": 0.0006571103526734927, "loss": 5.8142, "step": 3014 }, { "epoch": 1.02901023890785, "grad_norm": 3.2888505458831787, "learning_rate": 0.0006569965870307168, "loss": 6.5148, "step": 3015 }, { "epoch": 1.0293515358361776, "grad_norm": 3.2708396911621094, "learning_rate": 0.0006568828213879409, "loss": 6.4559, "step": 3016 }, { "epoch": 1.0296928327645052, "grad_norm": 3.5158915519714355, "learning_rate": 0.000656769055745165, "loss": 6.3544, "step": 3017 }, { "epoch": 1.0300341296928328, "grad_norm": 3.263108968734741, "learning_rate": 0.0006566552901023891, "loss": 6.443, "step": 3018 }, { "epoch": 1.0303754266211604, "grad_norm": 3.9215052127838135, "learning_rate": 0.0006565415244596133, "loss": 5.9249, "step": 3019 }, { "epoch": 1.030716723549488, "grad_norm": 3.202061653137207, "learning_rate": 0.0006564277588168374, "loss": 6.5584, "step": 3020 }, { "epoch": 1.0310580204778157, "grad_norm": 14.988799095153809, "learning_rate": 0.0006563139931740615, "loss": 6.4804, "step": 3021 }, { "epoch": 1.0313993174061433, "grad_norm": 11.352145195007324, "learning_rate": 0.0006562002275312856, "loss": 7.3238, "step": 3022 }, { "epoch": 1.031740614334471, "grad_norm": 3.537874221801758, "learning_rate": 0.0006560864618885097, "loss": 7.0763, "step": 3023 }, { "epoch": 1.0320819112627986, "grad_norm": 3.5525970458984375, "learning_rate": 0.0006559726962457337, "loss": 6.6374, "step": 3024 }, { "epoch": 1.0324232081911262, "grad_norm": 3.3168790340423584, "learning_rate": 0.0006558589306029579, "loss": 6.5269, "step": 3025 }, { "epoch": 1.0327645051194538, "grad_norm": 5.2062835693359375, "learning_rate": 0.000655745164960182, "loss": 6.9295, "step": 3026 }, { "epoch": 1.0331058020477815, "grad_norm": 3.208059310913086, "learning_rate": 0.0006556313993174061, "loss": 7.0108, "step": 3027 }, { "epoch": 1.0334470989761093, "grad_norm": 3.500671148300171, "learning_rate": 0.0006555176336746302, "loss": 6.1865, "step": 3028 }, { "epoch": 1.033788395904437, "grad_norm": 5.409757137298584, "learning_rate": 0.0006554038680318543, "loss": 6.2085, "step": 3029 }, { "epoch": 1.0341296928327646, "grad_norm": 3.258284091949463, "learning_rate": 0.0006552901023890784, "loss": 6.0585, "step": 3030 }, { "epoch": 1.0344709897610922, "grad_norm": 3.4100899696350098, "learning_rate": 0.0006551763367463027, "loss": 6.069, "step": 3031 }, { "epoch": 1.0348122866894198, "grad_norm": 3.146129846572876, "learning_rate": 0.0006550625711035268, "loss": 5.9704, "step": 3032 }, { "epoch": 1.0351535836177475, "grad_norm": 3.3420863151550293, "learning_rate": 0.0006549488054607509, "loss": 7.3534, "step": 3033 }, { "epoch": 1.035494880546075, "grad_norm": 3.4072580337524414, "learning_rate": 0.000654835039817975, "loss": 6.6606, "step": 3034 }, { "epoch": 1.0358361774744027, "grad_norm": 3.3108816146850586, "learning_rate": 0.0006547212741751991, "loss": 6.3785, "step": 3035 }, { "epoch": 1.0361774744027303, "grad_norm": 3.2887344360351562, "learning_rate": 0.0006546075085324233, "loss": 7.0955, "step": 3036 }, { "epoch": 1.036518771331058, "grad_norm": 6.6958842277526855, "learning_rate": 0.0006544937428896474, "loss": 5.6834, "step": 3037 }, { "epoch": 1.0368600682593856, "grad_norm": 3.3041765689849854, "learning_rate": 0.0006543799772468715, "loss": 6.5706, "step": 3038 }, { "epoch": 1.0372013651877132, "grad_norm": 3.6999776363372803, "learning_rate": 0.0006542662116040956, "loss": 6.5919, "step": 3039 }, { "epoch": 1.0375426621160408, "grad_norm": 3.507978916168213, "learning_rate": 0.0006541524459613197, "loss": 6.4322, "step": 3040 }, { "epoch": 1.0378839590443687, "grad_norm": 3.151499032974243, "learning_rate": 0.0006540386803185438, "loss": 6.5041, "step": 3041 }, { "epoch": 1.0382252559726963, "grad_norm": 3.339322805404663, "learning_rate": 0.000653924914675768, "loss": 6.2779, "step": 3042 }, { "epoch": 1.038566552901024, "grad_norm": 4.641025543212891, "learning_rate": 0.0006538111490329921, "loss": 5.4317, "step": 3043 }, { "epoch": 1.0389078498293516, "grad_norm": 3.3344995975494385, "learning_rate": 0.0006536973833902161, "loss": 6.5923, "step": 3044 }, { "epoch": 1.0392491467576792, "grad_norm": 3.2587029933929443, "learning_rate": 0.0006535836177474402, "loss": 6.4993, "step": 3045 }, { "epoch": 1.0395904436860068, "grad_norm": 4.462654113769531, "learning_rate": 0.0006534698521046643, "loss": 6.0649, "step": 3046 }, { "epoch": 1.0399317406143345, "grad_norm": 3.293430805206299, "learning_rate": 0.0006533560864618884, "loss": 5.8758, "step": 3047 }, { "epoch": 1.040273037542662, "grad_norm": 3.2306947708129883, "learning_rate": 0.0006532423208191127, "loss": 6.3128, "step": 3048 }, { "epoch": 1.0406143344709897, "grad_norm": 3.3286349773406982, "learning_rate": 0.0006531285551763368, "loss": 6.7096, "step": 3049 }, { "epoch": 1.0409556313993173, "grad_norm": 3.1706297397613525, "learning_rate": 0.0006530147895335609, "loss": 6.7695, "step": 3050 }, { "epoch": 1.041296928327645, "grad_norm": 3.0948450565338135, "learning_rate": 0.000652901023890785, "loss": 6.4126, "step": 3051 }, { "epoch": 1.0416382252559726, "grad_norm": 3.1173272132873535, "learning_rate": 0.0006527872582480091, "loss": 6.961, "step": 3052 }, { "epoch": 1.0419795221843002, "grad_norm": 3.5899710655212402, "learning_rate": 0.0006526734926052332, "loss": 5.9567, "step": 3053 }, { "epoch": 1.042320819112628, "grad_norm": 3.2167882919311523, "learning_rate": 0.0006525597269624574, "loss": 6.2195, "step": 3054 }, { "epoch": 1.0426621160409557, "grad_norm": 3.2807223796844482, "learning_rate": 0.0006524459613196815, "loss": 6.4943, "step": 3055 }, { "epoch": 1.0430034129692833, "grad_norm": 3.2400906085968018, "learning_rate": 0.0006523321956769056, "loss": 6.4222, "step": 3056 }, { "epoch": 1.043344709897611, "grad_norm": 3.420193672180176, "learning_rate": 0.0006522184300341297, "loss": 6.3227, "step": 3057 }, { "epoch": 1.0436860068259386, "grad_norm": 3.231858015060425, "learning_rate": 0.0006521046643913538, "loss": 7.0397, "step": 3058 }, { "epoch": 1.0440273037542662, "grad_norm": 3.291337490081787, "learning_rate": 0.000651990898748578, "loss": 5.907, "step": 3059 }, { "epoch": 1.0443686006825939, "grad_norm": 3.354321241378784, "learning_rate": 0.0006518771331058021, "loss": 6.532, "step": 3060 }, { "epoch": 1.0447098976109215, "grad_norm": 3.303192377090454, "learning_rate": 0.0006517633674630262, "loss": 6.2837, "step": 3061 }, { "epoch": 1.045051194539249, "grad_norm": 3.2552897930145264, "learning_rate": 0.0006516496018202503, "loss": 6.9191, "step": 3062 }, { "epoch": 1.0453924914675767, "grad_norm": 3.7623484134674072, "learning_rate": 0.0006515358361774743, "loss": 6.0883, "step": 3063 }, { "epoch": 1.0457337883959044, "grad_norm": 3.314925193786621, "learning_rate": 0.0006514220705346984, "loss": 6.4738, "step": 3064 }, { "epoch": 1.046075085324232, "grad_norm": 3.2855207920074463, "learning_rate": 0.0006513083048919227, "loss": 5.971, "step": 3065 }, { "epoch": 1.0464163822525596, "grad_norm": 3.406338930130005, "learning_rate": 0.0006511945392491468, "loss": 6.8355, "step": 3066 }, { "epoch": 1.0467576791808875, "grad_norm": 3.4004054069519043, "learning_rate": 0.0006510807736063709, "loss": 6.2127, "step": 3067 }, { "epoch": 1.047098976109215, "grad_norm": 3.392861843109131, "learning_rate": 0.000650967007963595, "loss": 6.3045, "step": 3068 }, { "epoch": 1.0474402730375427, "grad_norm": 3.3509092330932617, "learning_rate": 0.0006508532423208191, "loss": 6.2746, "step": 3069 }, { "epoch": 1.0477815699658704, "grad_norm": 3.2432587146759033, "learning_rate": 0.0006507394766780432, "loss": 6.21, "step": 3070 }, { "epoch": 1.048122866894198, "grad_norm": 8.192042350769043, "learning_rate": 0.0006506257110352674, "loss": 6.7381, "step": 3071 }, { "epoch": 1.0484641638225256, "grad_norm": 5.284655570983887, "learning_rate": 0.0006505119453924915, "loss": 6.5578, "step": 3072 }, { "epoch": 1.0488054607508532, "grad_norm": 3.650186538696289, "learning_rate": 0.0006503981797497156, "loss": 6.2315, "step": 3073 }, { "epoch": 1.0491467576791809, "grad_norm": 3.323975086212158, "learning_rate": 0.0006502844141069397, "loss": 6.7739, "step": 3074 }, { "epoch": 1.0494880546075085, "grad_norm": 3.2510526180267334, "learning_rate": 0.0006501706484641638, "loss": 6.4504, "step": 3075 }, { "epoch": 1.0498293515358361, "grad_norm": 7.123908996582031, "learning_rate": 0.000650056882821388, "loss": 7.3726, "step": 3076 }, { "epoch": 1.0501706484641637, "grad_norm": 3.3244235515594482, "learning_rate": 0.0006499431171786121, "loss": 5.9337, "step": 3077 }, { "epoch": 1.0505119453924914, "grad_norm": 4.019181728363037, "learning_rate": 0.0006498293515358362, "loss": 5.851, "step": 3078 }, { "epoch": 1.050853242320819, "grad_norm": 3.2663567066192627, "learning_rate": 0.0006497155858930603, "loss": 6.808, "step": 3079 }, { "epoch": 1.0511945392491469, "grad_norm": 3.2824909687042236, "learning_rate": 0.0006496018202502844, "loss": 6.8706, "step": 3080 }, { "epoch": 1.0515358361774745, "grad_norm": 3.404869794845581, "learning_rate": 0.0006494880546075086, "loss": 6.5542, "step": 3081 }, { "epoch": 1.051877133105802, "grad_norm": 3.04349684715271, "learning_rate": 0.0006493742889647328, "loss": 6.3294, "step": 3082 }, { "epoch": 1.0522184300341297, "grad_norm": 3.5680627822875977, "learning_rate": 0.0006492605233219568, "loss": 6.0419, "step": 3083 }, { "epoch": 1.0525597269624574, "grad_norm": 3.081714153289795, "learning_rate": 0.0006491467576791809, "loss": 6.4956, "step": 3084 }, { "epoch": 1.052901023890785, "grad_norm": 3.4470624923706055, "learning_rate": 0.000649032992036405, "loss": 6.9409, "step": 3085 }, { "epoch": 1.0532423208191126, "grad_norm": 3.1926770210266113, "learning_rate": 0.0006489192263936291, "loss": 6.461, "step": 3086 }, { "epoch": 1.0535836177474402, "grad_norm": 10.409677505493164, "learning_rate": 0.0006488054607508532, "loss": 5.6452, "step": 3087 }, { "epoch": 1.0539249146757679, "grad_norm": 3.4879887104034424, "learning_rate": 0.0006486916951080774, "loss": 6.7937, "step": 3088 }, { "epoch": 1.0542662116040955, "grad_norm": 3.3193447589874268, "learning_rate": 0.0006485779294653015, "loss": 6.813, "step": 3089 }, { "epoch": 1.0546075085324231, "grad_norm": 3.5224204063415527, "learning_rate": 0.0006484641638225256, "loss": 6.5331, "step": 3090 }, { "epoch": 1.0549488054607508, "grad_norm": 3.5668528079986572, "learning_rate": 0.0006483503981797497, "loss": 5.8162, "step": 3091 }, { "epoch": 1.0552901023890784, "grad_norm": 3.3257477283477783, "learning_rate": 0.0006482366325369738, "loss": 5.6231, "step": 3092 }, { "epoch": 1.0556313993174062, "grad_norm": 3.734152317047119, "learning_rate": 0.0006481228668941979, "loss": 6.3863, "step": 3093 }, { "epoch": 1.0559726962457339, "grad_norm": 3.2832605838775635, "learning_rate": 0.0006480091012514221, "loss": 6.9209, "step": 3094 }, { "epoch": 1.0563139931740615, "grad_norm": 3.4488635063171387, "learning_rate": 0.0006478953356086462, "loss": 6.6284, "step": 3095 }, { "epoch": 1.0566552901023891, "grad_norm": 3.6133999824523926, "learning_rate": 0.0006477815699658703, "loss": 6.6132, "step": 3096 }, { "epoch": 1.0569965870307167, "grad_norm": 3.2564053535461426, "learning_rate": 0.0006476678043230944, "loss": 6.4362, "step": 3097 }, { "epoch": 1.0573378839590444, "grad_norm": 3.2632007598876953, "learning_rate": 0.0006475540386803186, "loss": 6.6581, "step": 3098 }, { "epoch": 1.057679180887372, "grad_norm": 3.2787528038024902, "learning_rate": 0.0006474402730375428, "loss": 6.1405, "step": 3099 }, { "epoch": 1.0580204778156996, "grad_norm": 3.1381187438964844, "learning_rate": 0.0006473265073947669, "loss": 6.5018, "step": 3100 }, { "epoch": 1.0583617747440273, "grad_norm": 6.568216800689697, "learning_rate": 0.000647212741751991, "loss": 5.6483, "step": 3101 }, { "epoch": 1.058703071672355, "grad_norm": 3.281235694885254, "learning_rate": 0.000647098976109215, "loss": 6.5081, "step": 3102 }, { "epoch": 1.0590443686006825, "grad_norm": 3.3020272254943848, "learning_rate": 0.0006469852104664391, "loss": 6.0661, "step": 3103 }, { "epoch": 1.0593856655290101, "grad_norm": 3.2067642211914062, "learning_rate": 0.0006468714448236632, "loss": 6.4818, "step": 3104 }, { "epoch": 1.0597269624573378, "grad_norm": 8.392196655273438, "learning_rate": 0.0006467576791808874, "loss": 6.1493, "step": 3105 }, { "epoch": 1.0600682593856656, "grad_norm": 4.222563743591309, "learning_rate": 0.0006466439135381115, "loss": 6.2424, "step": 3106 }, { "epoch": 1.0604095563139933, "grad_norm": 3.43169903755188, "learning_rate": 0.0006465301478953356, "loss": 6.2598, "step": 3107 }, { "epoch": 1.0607508532423209, "grad_norm": 3.4820284843444824, "learning_rate": 0.0006464163822525597, "loss": 6.6193, "step": 3108 }, { "epoch": 1.0610921501706485, "grad_norm": 3.227473497390747, "learning_rate": 0.0006463026166097838, "loss": 6.5877, "step": 3109 }, { "epoch": 1.0614334470989761, "grad_norm": 3.3051085472106934, "learning_rate": 0.0006461888509670079, "loss": 6.1434, "step": 3110 }, { "epoch": 1.0617747440273038, "grad_norm": 4.006912708282471, "learning_rate": 0.0006460750853242321, "loss": 6.5416, "step": 3111 }, { "epoch": 1.0621160409556314, "grad_norm": 3.184049129486084, "learning_rate": 0.0006459613196814562, "loss": 6.7029, "step": 3112 }, { "epoch": 1.062457337883959, "grad_norm": 3.2582833766937256, "learning_rate": 0.0006458475540386803, "loss": 6.4838, "step": 3113 }, { "epoch": 1.0627986348122866, "grad_norm": 3.350537061691284, "learning_rate": 0.0006457337883959044, "loss": 6.6411, "step": 3114 }, { "epoch": 1.0631399317406143, "grad_norm": 4.094681739807129, "learning_rate": 0.0006456200227531286, "loss": 5.957, "step": 3115 }, { "epoch": 1.063481228668942, "grad_norm": 3.8731682300567627, "learning_rate": 0.0006455062571103528, "loss": 6.3783, "step": 3116 }, { "epoch": 1.0638225255972695, "grad_norm": 3.384065866470337, "learning_rate": 0.0006453924914675769, "loss": 6.6837, "step": 3117 }, { "epoch": 1.0641638225255972, "grad_norm": 3.379866361618042, "learning_rate": 0.000645278725824801, "loss": 6.4046, "step": 3118 }, { "epoch": 1.064505119453925, "grad_norm": 3.2628586292266846, "learning_rate": 0.0006451649601820251, "loss": 6.209, "step": 3119 }, { "epoch": 1.0648464163822526, "grad_norm": 3.4711813926696777, "learning_rate": 0.0006450511945392492, "loss": 6.1973, "step": 3120 }, { "epoch": 1.0651877133105803, "grad_norm": 3.7682406902313232, "learning_rate": 0.0006449374288964733, "loss": 6.1473, "step": 3121 }, { "epoch": 1.065529010238908, "grad_norm": 8.02403450012207, "learning_rate": 0.0006448236632536974, "loss": 5.49, "step": 3122 }, { "epoch": 1.0658703071672355, "grad_norm": 3.402454376220703, "learning_rate": 0.0006447098976109215, "loss": 6.6287, "step": 3123 }, { "epoch": 1.0662116040955631, "grad_norm": 4.036313056945801, "learning_rate": 0.0006445961319681456, "loss": 5.7235, "step": 3124 }, { "epoch": 1.0665529010238908, "grad_norm": 7.888774394989014, "learning_rate": 0.0006444823663253697, "loss": 4.691, "step": 3125 }, { "epoch": 1.0668941979522184, "grad_norm": 3.3642637729644775, "learning_rate": 0.0006443686006825938, "loss": 6.7421, "step": 3126 }, { "epoch": 1.067235494880546, "grad_norm": 3.545217990875244, "learning_rate": 0.0006442548350398179, "loss": 5.7046, "step": 3127 }, { "epoch": 1.0675767918088737, "grad_norm": 3.4235167503356934, "learning_rate": 0.0006441410693970421, "loss": 6.7823, "step": 3128 }, { "epoch": 1.0679180887372013, "grad_norm": 3.252849578857422, "learning_rate": 0.0006440273037542662, "loss": 7.001, "step": 3129 }, { "epoch": 1.068259385665529, "grad_norm": 5.600972652435303, "learning_rate": 0.0006439135381114903, "loss": 6.5534, "step": 3130 }, { "epoch": 1.0686006825938565, "grad_norm": 3.1004841327667236, "learning_rate": 0.0006437997724687144, "loss": 6.7081, "step": 3131 }, { "epoch": 1.0689419795221844, "grad_norm": 8.813851356506348, "learning_rate": 0.0006436860068259386, "loss": 4.8907, "step": 3132 }, { "epoch": 1.069283276450512, "grad_norm": 3.3041679859161377, "learning_rate": 0.0006435722411831627, "loss": 6.594, "step": 3133 }, { "epoch": 1.0696245733788396, "grad_norm": 3.500278949737549, "learning_rate": 0.0006434584755403869, "loss": 6.3211, "step": 3134 }, { "epoch": 1.0699658703071673, "grad_norm": 3.4029455184936523, "learning_rate": 0.000643344709897611, "loss": 6.3552, "step": 3135 }, { "epoch": 1.070307167235495, "grad_norm": 3.182002305984497, "learning_rate": 0.0006432309442548351, "loss": 6.8919, "step": 3136 }, { "epoch": 1.0706484641638225, "grad_norm": 3.758815050125122, "learning_rate": 0.0006431171786120592, "loss": 5.6043, "step": 3137 }, { "epoch": 1.0709897610921502, "grad_norm": 3.294699192047119, "learning_rate": 0.0006430034129692833, "loss": 6.3708, "step": 3138 }, { "epoch": 1.0713310580204778, "grad_norm": 3.3556272983551025, "learning_rate": 0.0006428896473265075, "loss": 6.6939, "step": 3139 }, { "epoch": 1.0716723549488054, "grad_norm": 3.1853625774383545, "learning_rate": 0.0006427758816837316, "loss": 6.1872, "step": 3140 }, { "epoch": 1.072013651877133, "grad_norm": 3.300252676010132, "learning_rate": 0.0006426621160409556, "loss": 6.771, "step": 3141 }, { "epoch": 1.0723549488054607, "grad_norm": 3.111130714416504, "learning_rate": 0.0006425483503981797, "loss": 6.3194, "step": 3142 }, { "epoch": 1.0726962457337883, "grad_norm": 3.808727741241455, "learning_rate": 0.0006424345847554038, "loss": 5.9753, "step": 3143 }, { "epoch": 1.073037542662116, "grad_norm": 3.226867437362671, "learning_rate": 0.0006423208191126279, "loss": 6.5513, "step": 3144 }, { "epoch": 1.0733788395904438, "grad_norm": 3.2645046710968018, "learning_rate": 0.0006422070534698521, "loss": 6.7096, "step": 3145 }, { "epoch": 1.0737201365187714, "grad_norm": 3.1545019149780273, "learning_rate": 0.0006420932878270762, "loss": 6.5052, "step": 3146 }, { "epoch": 1.074061433447099, "grad_norm": 3.2468883991241455, "learning_rate": 0.0006419795221843003, "loss": 6.8057, "step": 3147 }, { "epoch": 1.0744027303754267, "grad_norm": 5.185894966125488, "learning_rate": 0.0006418657565415244, "loss": 4.2064, "step": 3148 }, { "epoch": 1.0747440273037543, "grad_norm": 3.218177318572998, "learning_rate": 0.0006417519908987486, "loss": 6.056, "step": 3149 }, { "epoch": 1.075085324232082, "grad_norm": 3.383833646774292, "learning_rate": 0.0006416382252559727, "loss": 6.7475, "step": 3150 }, { "epoch": 1.0754266211604095, "grad_norm": 3.697319507598877, "learning_rate": 0.0006415244596131969, "loss": 6.663, "step": 3151 }, { "epoch": 1.0757679180887372, "grad_norm": 3.122490644454956, "learning_rate": 0.000641410693970421, "loss": 6.6991, "step": 3152 }, { "epoch": 1.0761092150170648, "grad_norm": 3.148451089859009, "learning_rate": 0.0006412969283276451, "loss": 6.6486, "step": 3153 }, { "epoch": 1.0764505119453924, "grad_norm": 3.321945905685425, "learning_rate": 0.0006411831626848692, "loss": 6.1962, "step": 3154 }, { "epoch": 1.07679180887372, "grad_norm": 3.590642213821411, "learning_rate": 0.0006410693970420933, "loss": 5.5254, "step": 3155 }, { "epoch": 1.0771331058020477, "grad_norm": 3.300290584564209, "learning_rate": 0.0006409556313993174, "loss": 5.7448, "step": 3156 }, { "epoch": 1.0774744027303753, "grad_norm": 3.4239137172698975, "learning_rate": 0.0006408418657565416, "loss": 6.2855, "step": 3157 }, { "epoch": 1.0778156996587032, "grad_norm": 3.3473432064056396, "learning_rate": 0.0006407281001137657, "loss": 6.6531, "step": 3158 }, { "epoch": 1.0781569965870308, "grad_norm": 3.0467193126678467, "learning_rate": 0.0006406143344709898, "loss": 6.3434, "step": 3159 }, { "epoch": 1.0784982935153584, "grad_norm": 3.6349399089813232, "learning_rate": 0.0006405005688282139, "loss": 6.3252, "step": 3160 }, { "epoch": 1.078839590443686, "grad_norm": 3.4417903423309326, "learning_rate": 0.0006403868031854379, "loss": 6.9098, "step": 3161 }, { "epoch": 1.0791808873720137, "grad_norm": 3.258446455001831, "learning_rate": 0.0006402730375426621, "loss": 6.3267, "step": 3162 }, { "epoch": 1.0795221843003413, "grad_norm": 3.3519585132598877, "learning_rate": 0.0006401592718998862, "loss": 5.89, "step": 3163 }, { "epoch": 1.079863481228669, "grad_norm": 3.009504795074463, "learning_rate": 0.0006400455062571103, "loss": 6.2854, "step": 3164 }, { "epoch": 1.0802047781569966, "grad_norm": 3.346721649169922, "learning_rate": 0.0006399317406143345, "loss": 6.9105, "step": 3165 }, { "epoch": 1.0805460750853242, "grad_norm": 3.1614081859588623, "learning_rate": 0.0006398179749715586, "loss": 6.4212, "step": 3166 }, { "epoch": 1.0808873720136518, "grad_norm": 3.280531406402588, "learning_rate": 0.0006397042093287827, "loss": 6.22, "step": 3167 }, { "epoch": 1.0812286689419794, "grad_norm": 3.3301196098327637, "learning_rate": 0.0006395904436860069, "loss": 6.7717, "step": 3168 }, { "epoch": 1.081569965870307, "grad_norm": 3.3172807693481445, "learning_rate": 0.000639476678043231, "loss": 6.6153, "step": 3169 }, { "epoch": 1.0819112627986347, "grad_norm": 3.3986637592315674, "learning_rate": 0.0006393629124004551, "loss": 6.0319, "step": 3170 }, { "epoch": 1.0822525597269625, "grad_norm": 3.199392318725586, "learning_rate": 0.0006392491467576792, "loss": 6.2352, "step": 3171 }, { "epoch": 1.0825938566552902, "grad_norm": 5.347891330718994, "learning_rate": 0.0006391353811149033, "loss": 5.6896, "step": 3172 }, { "epoch": 1.0829351535836178, "grad_norm": 3.7180368900299072, "learning_rate": 0.0006390216154721274, "loss": 6.2303, "step": 3173 }, { "epoch": 1.0832764505119454, "grad_norm": 3.3773393630981445, "learning_rate": 0.0006389078498293516, "loss": 6.0189, "step": 3174 }, { "epoch": 1.083617747440273, "grad_norm": 6.927037715911865, "learning_rate": 0.0006387940841865757, "loss": 4.5765, "step": 3175 }, { "epoch": 1.0839590443686007, "grad_norm": 4.274540901184082, "learning_rate": 0.0006386803185437998, "loss": 5.871, "step": 3176 }, { "epoch": 1.0843003412969283, "grad_norm": 3.52830171585083, "learning_rate": 0.0006385665529010239, "loss": 6.474, "step": 3177 }, { "epoch": 1.084641638225256, "grad_norm": 3.9919192790985107, "learning_rate": 0.000638452787258248, "loss": 6.2379, "step": 3178 }, { "epoch": 1.0849829351535836, "grad_norm": 3.2457633018493652, "learning_rate": 0.0006383390216154722, "loss": 7.1027, "step": 3179 }, { "epoch": 1.0853242320819112, "grad_norm": 3.2085020542144775, "learning_rate": 0.0006382252559726962, "loss": 6.7974, "step": 3180 }, { "epoch": 1.0856655290102388, "grad_norm": 3.2093658447265625, "learning_rate": 0.0006381114903299203, "loss": 6.5593, "step": 3181 }, { "epoch": 1.0860068259385665, "grad_norm": 4.833531856536865, "learning_rate": 0.0006379977246871445, "loss": 6.0521, "step": 3182 }, { "epoch": 1.086348122866894, "grad_norm": 3.516695499420166, "learning_rate": 0.0006378839590443686, "loss": 6.3321, "step": 3183 }, { "epoch": 1.086689419795222, "grad_norm": 3.375440835952759, "learning_rate": 0.0006377701934015927, "loss": 6.4715, "step": 3184 }, { "epoch": 1.0870307167235496, "grad_norm": 3.308617115020752, "learning_rate": 0.0006376564277588169, "loss": 6.5871, "step": 3185 }, { "epoch": 1.0873720136518772, "grad_norm": 3.2866013050079346, "learning_rate": 0.000637542662116041, "loss": 6.5732, "step": 3186 }, { "epoch": 1.0877133105802048, "grad_norm": 3.3857619762420654, "learning_rate": 0.0006374288964732651, "loss": 6.3731, "step": 3187 }, { "epoch": 1.0880546075085324, "grad_norm": 3.51200795173645, "learning_rate": 0.0006373151308304892, "loss": 5.6781, "step": 3188 }, { "epoch": 1.08839590443686, "grad_norm": 3.297863006591797, "learning_rate": 0.0006372013651877133, "loss": 6.4584, "step": 3189 }, { "epoch": 1.0887372013651877, "grad_norm": 3.261662244796753, "learning_rate": 0.0006370875995449374, "loss": 6.2231, "step": 3190 }, { "epoch": 1.0890784982935153, "grad_norm": 3.8961715698242188, "learning_rate": 0.0006369738339021616, "loss": 6.607, "step": 3191 }, { "epoch": 1.089419795221843, "grad_norm": 3.399348020553589, "learning_rate": 0.0006368600682593857, "loss": 4.4051, "step": 3192 }, { "epoch": 1.0897610921501706, "grad_norm": 4.216414928436279, "learning_rate": 0.0006367463026166098, "loss": 4.1361, "step": 3193 }, { "epoch": 1.0901023890784982, "grad_norm": 3.367846727371216, "learning_rate": 0.0006366325369738339, "loss": 6.8591, "step": 3194 }, { "epoch": 1.0904436860068258, "grad_norm": 3.9558677673339844, "learning_rate": 0.000636518771331058, "loss": 6.0432, "step": 3195 }, { "epoch": 1.0907849829351535, "grad_norm": 3.5588676929473877, "learning_rate": 0.0006364050056882821, "loss": 6.5362, "step": 3196 }, { "epoch": 1.0911262798634813, "grad_norm": 4.590184688568115, "learning_rate": 0.0006362912400455064, "loss": 6.3793, "step": 3197 }, { "epoch": 1.091467576791809, "grad_norm": 3.1657865047454834, "learning_rate": 0.0006361774744027305, "loss": 6.3938, "step": 3198 }, { "epoch": 1.0918088737201366, "grad_norm": 3.1333377361297607, "learning_rate": 0.0006360637087599546, "loss": 6.9106, "step": 3199 }, { "epoch": 1.0921501706484642, "grad_norm": 3.446474313735962, "learning_rate": 0.0006359499431171786, "loss": 6.3406, "step": 3200 }, { "epoch": 1.0924914675767918, "grad_norm": 4.603214263916016, "learning_rate": 0.0006358361774744027, "loss": 5.921, "step": 3201 }, { "epoch": 1.0928327645051195, "grad_norm": 3.366414785385132, "learning_rate": 0.0006357224118316269, "loss": 6.5776, "step": 3202 }, { "epoch": 1.093174061433447, "grad_norm": 3.351638078689575, "learning_rate": 0.000635608646188851, "loss": 6.0564, "step": 3203 }, { "epoch": 1.0935153583617747, "grad_norm": 3.204946517944336, "learning_rate": 0.0006354948805460751, "loss": 6.5415, "step": 3204 }, { "epoch": 1.0938566552901023, "grad_norm": 3.1665844917297363, "learning_rate": 0.0006353811149032992, "loss": 6.6033, "step": 3205 }, { "epoch": 1.09419795221843, "grad_norm": 3.453909158706665, "learning_rate": 0.0006352673492605233, "loss": 6.2195, "step": 3206 }, { "epoch": 1.0945392491467576, "grad_norm": 3.191018581390381, "learning_rate": 0.0006351535836177474, "loss": 6.3949, "step": 3207 }, { "epoch": 1.0948805460750852, "grad_norm": 3.1959285736083984, "learning_rate": 0.0006350398179749716, "loss": 6.2866, "step": 3208 }, { "epoch": 1.0952218430034129, "grad_norm": 3.368140459060669, "learning_rate": 0.0006349260523321957, "loss": 6.4439, "step": 3209 }, { "epoch": 1.0955631399317407, "grad_norm": 3.4131994247436523, "learning_rate": 0.0006348122866894198, "loss": 6.5307, "step": 3210 }, { "epoch": 1.0959044368600683, "grad_norm": 3.310483455657959, "learning_rate": 0.0006346985210466439, "loss": 6.8359, "step": 3211 }, { "epoch": 1.096245733788396, "grad_norm": 3.1908204555511475, "learning_rate": 0.000634584755403868, "loss": 6.6964, "step": 3212 }, { "epoch": 1.0965870307167236, "grad_norm": 3.5243823528289795, "learning_rate": 0.0006344709897610921, "loss": 6.4033, "step": 3213 }, { "epoch": 1.0969283276450512, "grad_norm": 3.324995756149292, "learning_rate": 0.0006343572241183164, "loss": 6.8275, "step": 3214 }, { "epoch": 1.0972696245733788, "grad_norm": 3.591324806213379, "learning_rate": 0.0006342434584755405, "loss": 6.5428, "step": 3215 }, { "epoch": 1.0976109215017065, "grad_norm": 3.3508944511413574, "learning_rate": 0.0006341296928327646, "loss": 6.4207, "step": 3216 }, { "epoch": 1.097952218430034, "grad_norm": 3.258124589920044, "learning_rate": 0.0006340159271899887, "loss": 6.369, "step": 3217 }, { "epoch": 1.0982935153583617, "grad_norm": 3.1361072063446045, "learning_rate": 0.0006339021615472128, "loss": 6.3561, "step": 3218 }, { "epoch": 1.0986348122866894, "grad_norm": 3.4941697120666504, "learning_rate": 0.0006337883959044368, "loss": 5.6931, "step": 3219 }, { "epoch": 1.098976109215017, "grad_norm": 3.129948854446411, "learning_rate": 0.000633674630261661, "loss": 6.4307, "step": 3220 }, { "epoch": 1.0993174061433446, "grad_norm": 6.091549396514893, "learning_rate": 0.0006335608646188851, "loss": 5.7017, "step": 3221 }, { "epoch": 1.0996587030716722, "grad_norm": 3.1629409790039062, "learning_rate": 0.0006334470989761092, "loss": 6.0674, "step": 3222 }, { "epoch": 1.1, "grad_norm": 6.926721572875977, "learning_rate": 0.0006333333333333333, "loss": 5.3602, "step": 3223 }, { "epoch": 1.1003412969283277, "grad_norm": 4.425708770751953, "learning_rate": 0.0006332195676905574, "loss": 6.9397, "step": 3224 }, { "epoch": 1.1006825938566553, "grad_norm": 3.5748658180236816, "learning_rate": 0.0006331058020477816, "loss": 7.2775, "step": 3225 }, { "epoch": 1.101023890784983, "grad_norm": 3.3313708305358887, "learning_rate": 0.0006329920364050057, "loss": 6.7344, "step": 3226 }, { "epoch": 1.1013651877133106, "grad_norm": 4.757328987121582, "learning_rate": 0.0006328782707622298, "loss": 5.769, "step": 3227 }, { "epoch": 1.1017064846416382, "grad_norm": 3.1376912593841553, "learning_rate": 0.0006327645051194539, "loss": 6.7527, "step": 3228 }, { "epoch": 1.1020477815699659, "grad_norm": 3.682274341583252, "learning_rate": 0.000632650739476678, "loss": 6.7461, "step": 3229 }, { "epoch": 1.1023890784982935, "grad_norm": 3.245393991470337, "learning_rate": 0.0006325369738339021, "loss": 6.5636, "step": 3230 }, { "epoch": 1.1027303754266211, "grad_norm": 3.133185625076294, "learning_rate": 0.0006324232081911264, "loss": 6.2898, "step": 3231 }, { "epoch": 1.1030716723549487, "grad_norm": 3.1336660385131836, "learning_rate": 0.0006323094425483505, "loss": 6.4333, "step": 3232 }, { "epoch": 1.1034129692832764, "grad_norm": 3.263469934463501, "learning_rate": 0.0006321956769055746, "loss": 6.4195, "step": 3233 }, { "epoch": 1.103754266211604, "grad_norm": 3.146401882171631, "learning_rate": 0.0006320819112627987, "loss": 6.7663, "step": 3234 }, { "epoch": 1.1040955631399316, "grad_norm": 3.2967708110809326, "learning_rate": 0.0006319681456200228, "loss": 6.8334, "step": 3235 }, { "epoch": 1.1044368600682595, "grad_norm": 3.167576551437378, "learning_rate": 0.0006318543799772469, "loss": 6.619, "step": 3236 }, { "epoch": 1.104778156996587, "grad_norm": 6.352286338806152, "learning_rate": 0.0006317406143344711, "loss": 6.0053, "step": 3237 }, { "epoch": 1.1051194539249147, "grad_norm": 3.420991897583008, "learning_rate": 0.0006316268486916951, "loss": 6.2279, "step": 3238 }, { "epoch": 1.1054607508532424, "grad_norm": 3.3597917556762695, "learning_rate": 0.0006315130830489192, "loss": 6.4583, "step": 3239 }, { "epoch": 1.10580204778157, "grad_norm": 3.377300262451172, "learning_rate": 0.0006313993174061433, "loss": 6.5939, "step": 3240 }, { "epoch": 1.1061433447098976, "grad_norm": 3.4640071392059326, "learning_rate": 0.0006312855517633674, "loss": 6.1728, "step": 3241 }, { "epoch": 1.1064846416382252, "grad_norm": 3.410388708114624, "learning_rate": 0.0006311717861205916, "loss": 6.1942, "step": 3242 }, { "epoch": 1.1068259385665529, "grad_norm": 3.2869067192077637, "learning_rate": 0.0006310580204778157, "loss": 6.6288, "step": 3243 }, { "epoch": 1.1071672354948805, "grad_norm": 3.073132038116455, "learning_rate": 0.0006309442548350398, "loss": 6.4948, "step": 3244 }, { "epoch": 1.1075085324232081, "grad_norm": 3.332573175430298, "learning_rate": 0.0006308304891922639, "loss": 6.2622, "step": 3245 }, { "epoch": 1.1078498293515358, "grad_norm": 3.1241261959075928, "learning_rate": 0.000630716723549488, "loss": 6.4462, "step": 3246 }, { "epoch": 1.1081911262798634, "grad_norm": 3.215703248977661, "learning_rate": 0.0006306029579067121, "loss": 6.9699, "step": 3247 }, { "epoch": 1.108532423208191, "grad_norm": 3.1730904579162598, "learning_rate": 0.0006304891922639364, "loss": 6.5032, "step": 3248 }, { "epoch": 1.1088737201365189, "grad_norm": 3.286179542541504, "learning_rate": 0.0006303754266211605, "loss": 6.4786, "step": 3249 }, { "epoch": 1.1092150170648465, "grad_norm": 3.2079620361328125, "learning_rate": 0.0006302616609783846, "loss": 5.9624, "step": 3250 }, { "epoch": 1.1095563139931741, "grad_norm": 3.4078316688537598, "learning_rate": 0.0006301478953356087, "loss": 5.7281, "step": 3251 }, { "epoch": 1.1098976109215017, "grad_norm": 3.154865264892578, "learning_rate": 0.0006300341296928328, "loss": 6.2634, "step": 3252 }, { "epoch": 1.1102389078498294, "grad_norm": 4.15884256362915, "learning_rate": 0.0006299203640500569, "loss": 5.9743, "step": 3253 }, { "epoch": 1.110580204778157, "grad_norm": 3.244605302810669, "learning_rate": 0.0006298065984072811, "loss": 6.3037, "step": 3254 }, { "epoch": 1.1109215017064846, "grad_norm": 3.354834794998169, "learning_rate": 0.0006296928327645052, "loss": 6.2177, "step": 3255 }, { "epoch": 1.1112627986348123, "grad_norm": 3.270688056945801, "learning_rate": 0.0006295790671217293, "loss": 6.2083, "step": 3256 }, { "epoch": 1.1116040955631399, "grad_norm": 3.2896766662597656, "learning_rate": 0.0006294653014789534, "loss": 6.3809, "step": 3257 }, { "epoch": 1.1119453924914675, "grad_norm": 3.2658944129943848, "learning_rate": 0.0006293515358361774, "loss": 5.7977, "step": 3258 }, { "epoch": 1.1122866894197951, "grad_norm": 3.716240644454956, "learning_rate": 0.0006292377701934015, "loss": 6.0917, "step": 3259 }, { "epoch": 1.1126279863481228, "grad_norm": 3.223571538925171, "learning_rate": 0.0006291240045506257, "loss": 4.5392, "step": 3260 }, { "epoch": 1.1129692832764504, "grad_norm": 3.6493613719940186, "learning_rate": 0.0006290102389078498, "loss": 6.3741, "step": 3261 }, { "epoch": 1.1133105802047782, "grad_norm": 3.8748128414154053, "learning_rate": 0.0006288964732650739, "loss": 5.9105, "step": 3262 }, { "epoch": 1.1136518771331059, "grad_norm": 3.320779323577881, "learning_rate": 0.000628782707622298, "loss": 6.4, "step": 3263 }, { "epoch": 1.1139931740614335, "grad_norm": 3.4547297954559326, "learning_rate": 0.0006286689419795221, "loss": 6.1842, "step": 3264 }, { "epoch": 1.1143344709897611, "grad_norm": 3.3176543712615967, "learning_rate": 0.0006285551763367464, "loss": 6.8925, "step": 3265 }, { "epoch": 1.1146757679180888, "grad_norm": 3.095383882522583, "learning_rate": 0.0006284414106939705, "loss": 6.7177, "step": 3266 }, { "epoch": 1.1150170648464164, "grad_norm": 3.1322736740112305, "learning_rate": 0.0006283276450511946, "loss": 7.11, "step": 3267 }, { "epoch": 1.115358361774744, "grad_norm": 3.179048538208008, "learning_rate": 0.0006282138794084187, "loss": 6.6664, "step": 3268 }, { "epoch": 1.1156996587030716, "grad_norm": 3.2446703910827637, "learning_rate": 0.0006281001137656428, "loss": 6.2555, "step": 3269 }, { "epoch": 1.1160409556313993, "grad_norm": 3.4410719871520996, "learning_rate": 0.0006279863481228669, "loss": 6.3639, "step": 3270 }, { "epoch": 1.116382252559727, "grad_norm": 3.151151657104492, "learning_rate": 0.0006278725824800911, "loss": 6.548, "step": 3271 }, { "epoch": 1.1167235494880545, "grad_norm": 3.394761085510254, "learning_rate": 0.0006277588168373152, "loss": 6.7941, "step": 3272 }, { "epoch": 1.1170648464163822, "grad_norm": 3.4084866046905518, "learning_rate": 0.0006276450511945393, "loss": 6.0128, "step": 3273 }, { "epoch": 1.11740614334471, "grad_norm": 3.2266435623168945, "learning_rate": 0.0006275312855517634, "loss": 6.3047, "step": 3274 }, { "epoch": 1.1177474402730376, "grad_norm": 3.2533907890319824, "learning_rate": 0.0006274175199089875, "loss": 6.4112, "step": 3275 }, { "epoch": 1.1180887372013653, "grad_norm": 3.1983823776245117, "learning_rate": 0.0006273037542662116, "loss": 6.8299, "step": 3276 }, { "epoch": 1.1184300341296929, "grad_norm": 3.781130075454712, "learning_rate": 0.0006271899886234357, "loss": 6.1287, "step": 3277 }, { "epoch": 1.1187713310580205, "grad_norm": 3.3088560104370117, "learning_rate": 0.0006270762229806598, "loss": 6.1003, "step": 3278 }, { "epoch": 1.1191126279863481, "grad_norm": 3.457223653793335, "learning_rate": 0.0006269624573378839, "loss": 6.1749, "step": 3279 }, { "epoch": 1.1194539249146758, "grad_norm": 3.4108994007110596, "learning_rate": 0.000626848691695108, "loss": 6.4882, "step": 3280 }, { "epoch": 1.1197952218430034, "grad_norm": 3.191033363342285, "learning_rate": 0.0006267349260523321, "loss": 6.386, "step": 3281 }, { "epoch": 1.120136518771331, "grad_norm": 3.2378880977630615, "learning_rate": 0.0006266211604095564, "loss": 6.5682, "step": 3282 }, { "epoch": 1.1204778156996587, "grad_norm": 3.0198142528533936, "learning_rate": 0.0006265073947667805, "loss": 6.6885, "step": 3283 }, { "epoch": 1.1208191126279863, "grad_norm": 3.2005209922790527, "learning_rate": 0.0006263936291240046, "loss": 6.7357, "step": 3284 }, { "epoch": 1.121160409556314, "grad_norm": 3.2650158405303955, "learning_rate": 0.0006262798634812287, "loss": 6.8819, "step": 3285 }, { "epoch": 1.1215017064846415, "grad_norm": 3.4467949867248535, "learning_rate": 0.0006261660978384528, "loss": 6.327, "step": 3286 }, { "epoch": 1.1218430034129694, "grad_norm": 3.51582932472229, "learning_rate": 0.0006260523321956769, "loss": 6.4275, "step": 3287 }, { "epoch": 1.122184300341297, "grad_norm": 3.2341697216033936, "learning_rate": 0.0006259385665529011, "loss": 6.6526, "step": 3288 }, { "epoch": 1.1225255972696246, "grad_norm": 3.0597870349884033, "learning_rate": 0.0006258248009101252, "loss": 6.3242, "step": 3289 }, { "epoch": 1.1228668941979523, "grad_norm": 3.1777312755584717, "learning_rate": 0.0006257110352673493, "loss": 6.7306, "step": 3290 }, { "epoch": 1.12320819112628, "grad_norm": 3.1080517768859863, "learning_rate": 0.0006255972696245734, "loss": 6.5034, "step": 3291 }, { "epoch": 1.1235494880546075, "grad_norm": 3.166882276535034, "learning_rate": 0.0006254835039817975, "loss": 6.6549, "step": 3292 }, { "epoch": 1.1238907849829352, "grad_norm": 3.2958264350891113, "learning_rate": 0.0006253697383390216, "loss": 6.3964, "step": 3293 }, { "epoch": 1.1242320819112628, "grad_norm": 3.2784876823425293, "learning_rate": 0.0006252559726962458, "loss": 6.7125, "step": 3294 }, { "epoch": 1.1245733788395904, "grad_norm": 3.357529878616333, "learning_rate": 0.0006251422070534699, "loss": 6.2972, "step": 3295 }, { "epoch": 1.124914675767918, "grad_norm": 3.1187024116516113, "learning_rate": 0.000625028441410694, "loss": 6.4141, "step": 3296 }, { "epoch": 1.1252559726962457, "grad_norm": 3.192850112915039, "learning_rate": 0.000624914675767918, "loss": 6.4792, "step": 3297 }, { "epoch": 1.1255972696245733, "grad_norm": 3.124603509902954, "learning_rate": 0.0006248009101251421, "loss": 6.084, "step": 3298 }, { "epoch": 1.1259385665529011, "grad_norm": 3.316884756088257, "learning_rate": 0.0006246871444823662, "loss": 6.8159, "step": 3299 }, { "epoch": 1.1262798634812285, "grad_norm": 3.2244949340820312, "learning_rate": 0.0006245733788395905, "loss": 6.0295, "step": 3300 }, { "epoch": 1.1266211604095564, "grad_norm": 3.214632749557495, "learning_rate": 0.0006244596131968146, "loss": 6.0611, "step": 3301 }, { "epoch": 1.126962457337884, "grad_norm": 3.4261796474456787, "learning_rate": 0.0006243458475540387, "loss": 6.371, "step": 3302 }, { "epoch": 1.1273037542662117, "grad_norm": 3.2653138637542725, "learning_rate": 0.0006242320819112628, "loss": 6.4282, "step": 3303 }, { "epoch": 1.1276450511945393, "grad_norm": 3.3320472240448, "learning_rate": 0.0006241183162684869, "loss": 6.7758, "step": 3304 }, { "epoch": 1.127986348122867, "grad_norm": 3.2385928630828857, "learning_rate": 0.0006240045506257111, "loss": 6.7638, "step": 3305 }, { "epoch": 1.1283276450511945, "grad_norm": 3.225954055786133, "learning_rate": 0.0006238907849829352, "loss": 6.3909, "step": 3306 }, { "epoch": 1.1286689419795222, "grad_norm": 3.2931458950042725, "learning_rate": 0.0006237770193401593, "loss": 6.7229, "step": 3307 }, { "epoch": 1.1290102389078498, "grad_norm": 3.082092761993408, "learning_rate": 0.0006236632536973834, "loss": 6.673, "step": 3308 }, { "epoch": 1.1293515358361774, "grad_norm": 4.754388809204102, "learning_rate": 0.0006235494880546075, "loss": 6.1331, "step": 3309 }, { "epoch": 1.129692832764505, "grad_norm": 3.1302919387817383, "learning_rate": 0.0006234357224118316, "loss": 6.512, "step": 3310 }, { "epoch": 1.1300341296928327, "grad_norm": 5.564751148223877, "learning_rate": 0.0006233219567690558, "loss": 6.6001, "step": 3311 }, { "epoch": 1.1303754266211605, "grad_norm": 2.1751673221588135, "learning_rate": 0.0006232081911262799, "loss": 3.1882, "step": 3312 }, { "epoch": 1.130716723549488, "grad_norm": 3.3056774139404297, "learning_rate": 0.000623094425483504, "loss": 6.0435, "step": 3313 }, { "epoch": 1.1310580204778158, "grad_norm": 3.5110793113708496, "learning_rate": 0.0006229806598407281, "loss": 6.7698, "step": 3314 }, { "epoch": 1.1313993174061434, "grad_norm": 3.8492422103881836, "learning_rate": 0.0006228668941979523, "loss": 5.9606, "step": 3315 }, { "epoch": 1.131740614334471, "grad_norm": 3.3324551582336426, "learning_rate": 0.0006227531285551762, "loss": 6.1918, "step": 3316 }, { "epoch": 1.1320819112627987, "grad_norm": 3.3533132076263428, "learning_rate": 0.0006226393629124005, "loss": 6.8835, "step": 3317 }, { "epoch": 1.1324232081911263, "grad_norm": 3.216062068939209, "learning_rate": 0.0006225255972696246, "loss": 6.1811, "step": 3318 }, { "epoch": 1.132764505119454, "grad_norm": 3.2382802963256836, "learning_rate": 0.0006224118316268487, "loss": 6.3388, "step": 3319 }, { "epoch": 1.1331058020477816, "grad_norm": 3.712456226348877, "learning_rate": 0.0006222980659840728, "loss": 6.0058, "step": 3320 }, { "epoch": 1.1334470989761092, "grad_norm": 3.3816704750061035, "learning_rate": 0.0006221843003412969, "loss": 6.3261, "step": 3321 }, { "epoch": 1.1337883959044368, "grad_norm": 3.733504295349121, "learning_rate": 0.000622070534698521, "loss": 4.9361, "step": 3322 }, { "epoch": 1.1341296928327644, "grad_norm": 3.436828851699829, "learning_rate": 0.0006219567690557452, "loss": 6.4064, "step": 3323 }, { "epoch": 1.134470989761092, "grad_norm": 3.211754083633423, "learning_rate": 0.0006218430034129693, "loss": 6.9981, "step": 3324 }, { "epoch": 1.13481228668942, "grad_norm": 3.2073490619659424, "learning_rate": 0.0006217292377701934, "loss": 6.2856, "step": 3325 }, { "epoch": 1.1351535836177473, "grad_norm": 3.1387381553649902, "learning_rate": 0.0006216154721274175, "loss": 6.6068, "step": 3326 }, { "epoch": 1.1354948805460752, "grad_norm": 5.683926582336426, "learning_rate": 0.0006215017064846416, "loss": 6.1509, "step": 3327 }, { "epoch": 1.1358361774744028, "grad_norm": 3.4648234844207764, "learning_rate": 0.0006213879408418658, "loss": 6.0164, "step": 3328 }, { "epoch": 1.1361774744027304, "grad_norm": 3.3188838958740234, "learning_rate": 0.0006212741751990899, "loss": 6.6927, "step": 3329 }, { "epoch": 1.136518771331058, "grad_norm": 5.669882297515869, "learning_rate": 0.000621160409556314, "loss": 5.7178, "step": 3330 }, { "epoch": 1.1368600682593857, "grad_norm": 3.321120023727417, "learning_rate": 0.0006210466439135381, "loss": 6.3605, "step": 3331 }, { "epoch": 1.1372013651877133, "grad_norm": 3.19608473777771, "learning_rate": 0.0006209328782707623, "loss": 7.1759, "step": 3332 }, { "epoch": 1.137542662116041, "grad_norm": 3.3638601303100586, "learning_rate": 0.0006208191126279864, "loss": 6.4738, "step": 3333 }, { "epoch": 1.1378839590443686, "grad_norm": 3.9922034740448, "learning_rate": 0.0006207053469852106, "loss": 6.4603, "step": 3334 }, { "epoch": 1.1382252559726962, "grad_norm": 3.5877456665039062, "learning_rate": 0.0006205915813424347, "loss": 5.6385, "step": 3335 }, { "epoch": 1.1385665529010238, "grad_norm": 3.1927621364593506, "learning_rate": 0.0006204778156996587, "loss": 5.9584, "step": 3336 }, { "epoch": 1.1389078498293514, "grad_norm": 7.028829574584961, "learning_rate": 0.0006203640500568828, "loss": 4.9702, "step": 3337 }, { "epoch": 1.1392491467576793, "grad_norm": 3.2814626693725586, "learning_rate": 0.0006202502844141069, "loss": 6.8503, "step": 3338 }, { "epoch": 1.1395904436860067, "grad_norm": 3.5198097229003906, "learning_rate": 0.000620136518771331, "loss": 6.2619, "step": 3339 }, { "epoch": 1.1399317406143346, "grad_norm": 3.3513224124908447, "learning_rate": 0.0006200227531285552, "loss": 6.749, "step": 3340 }, { "epoch": 1.1402730375426622, "grad_norm": 3.736387014389038, "learning_rate": 0.0006199089874857793, "loss": 5.8229, "step": 3341 }, { "epoch": 1.1406143344709898, "grad_norm": 3.531832456588745, "learning_rate": 0.0006197952218430034, "loss": 5.5888, "step": 3342 }, { "epoch": 1.1409556313993174, "grad_norm": 3.2506160736083984, "learning_rate": 0.0006196814562002275, "loss": 6.379, "step": 3343 }, { "epoch": 1.141296928327645, "grad_norm": 5.999919414520264, "learning_rate": 0.0006195676905574516, "loss": 5.3758, "step": 3344 }, { "epoch": 1.1416382252559727, "grad_norm": 3.500176191329956, "learning_rate": 0.0006194539249146758, "loss": 6.2346, "step": 3345 }, { "epoch": 1.1419795221843003, "grad_norm": 2.100086212158203, "learning_rate": 0.0006193401592718999, "loss": 3.3155, "step": 3346 }, { "epoch": 1.142320819112628, "grad_norm": 5.4075798988342285, "learning_rate": 0.000619226393629124, "loss": 4.6973, "step": 3347 }, { "epoch": 1.1426621160409556, "grad_norm": 3.206171989440918, "learning_rate": 0.0006191126279863481, "loss": 6.4632, "step": 3348 }, { "epoch": 1.1430034129692832, "grad_norm": 3.123758316040039, "learning_rate": 0.0006189988623435723, "loss": 6.6153, "step": 3349 }, { "epoch": 1.1433447098976108, "grad_norm": 3.109133243560791, "learning_rate": 0.0006188850967007964, "loss": 6.9189, "step": 3350 }, { "epoch": 1.1436860068259387, "grad_norm": 3.7639737129211426, "learning_rate": 0.0006187713310580206, "loss": 5.0232, "step": 3351 }, { "epoch": 1.144027303754266, "grad_norm": 3.2191059589385986, "learning_rate": 0.0006186575654152447, "loss": 6.3187, "step": 3352 }, { "epoch": 1.144368600682594, "grad_norm": 3.2868123054504395, "learning_rate": 0.0006185437997724688, "loss": 6.3158, "step": 3353 }, { "epoch": 1.1447098976109216, "grad_norm": 3.318932294845581, "learning_rate": 0.0006184300341296929, "loss": 6.2764, "step": 3354 }, { "epoch": 1.1450511945392492, "grad_norm": 3.06105899810791, "learning_rate": 0.0006183162684869169, "loss": 6.4749, "step": 3355 }, { "epoch": 1.1453924914675768, "grad_norm": 3.42996883392334, "learning_rate": 0.000618202502844141, "loss": 6.438, "step": 3356 }, { "epoch": 1.1457337883959045, "grad_norm": 3.3737375736236572, "learning_rate": 0.0006180887372013652, "loss": 6.6715, "step": 3357 }, { "epoch": 1.146075085324232, "grad_norm": 3.796318531036377, "learning_rate": 0.0006179749715585893, "loss": 5.5668, "step": 3358 }, { "epoch": 1.1464163822525597, "grad_norm": 3.3401639461517334, "learning_rate": 0.0006178612059158134, "loss": 6.3617, "step": 3359 }, { "epoch": 1.1467576791808873, "grad_norm": 3.4352896213531494, "learning_rate": 0.0006177474402730375, "loss": 6.105, "step": 3360 }, { "epoch": 1.147098976109215, "grad_norm": 3.1488349437713623, "learning_rate": 0.0006176336746302616, "loss": 6.588, "step": 3361 }, { "epoch": 1.1474402730375426, "grad_norm": 3.108013868331909, "learning_rate": 0.0006175199089874857, "loss": 6.22, "step": 3362 }, { "epoch": 1.1477815699658702, "grad_norm": 3.5639657974243164, "learning_rate": 0.0006174061433447099, "loss": 6.7161, "step": 3363 }, { "epoch": 1.148122866894198, "grad_norm": 4.529385566711426, "learning_rate": 0.000617292377701934, "loss": 5.2737, "step": 3364 }, { "epoch": 1.1484641638225255, "grad_norm": 5.763156414031982, "learning_rate": 0.0006171786120591581, "loss": 5.3901, "step": 3365 }, { "epoch": 1.1488054607508533, "grad_norm": 3.319201707839966, "learning_rate": 0.0006170648464163823, "loss": 6.2037, "step": 3366 }, { "epoch": 1.149146757679181, "grad_norm": 3.429962635040283, "learning_rate": 0.0006169510807736064, "loss": 6.1665, "step": 3367 }, { "epoch": 1.1494880546075086, "grad_norm": 3.73044490814209, "learning_rate": 0.0006168373151308306, "loss": 6.0798, "step": 3368 }, { "epoch": 1.1498293515358362, "grad_norm": 5.758520126342773, "learning_rate": 0.0006167235494880547, "loss": 5.322, "step": 3369 }, { "epoch": 1.1501706484641638, "grad_norm": 3.261571168899536, "learning_rate": 0.0006166097838452788, "loss": 6.9592, "step": 3370 }, { "epoch": 1.1505119453924915, "grad_norm": 3.1305222511291504, "learning_rate": 0.0006164960182025029, "loss": 6.0327, "step": 3371 }, { "epoch": 1.150853242320819, "grad_norm": 3.285921335220337, "learning_rate": 0.000616382252559727, "loss": 6.7096, "step": 3372 }, { "epoch": 1.1511945392491467, "grad_norm": 3.030154228210449, "learning_rate": 0.0006162684869169511, "loss": 6.5408, "step": 3373 }, { "epoch": 1.1515358361774743, "grad_norm": 3.1218512058258057, "learning_rate": 0.0006161547212741753, "loss": 6.5412, "step": 3374 }, { "epoch": 1.151877133105802, "grad_norm": 3.093693733215332, "learning_rate": 0.0006160409556313993, "loss": 6.7893, "step": 3375 }, { "epoch": 1.1522184300341296, "grad_norm": 8.688549041748047, "learning_rate": 0.0006159271899886234, "loss": 5.4964, "step": 3376 }, { "epoch": 1.1525597269624575, "grad_norm": 3.335278272628784, "learning_rate": 0.0006158134243458475, "loss": 6.1672, "step": 3377 }, { "epoch": 1.1529010238907849, "grad_norm": 3.6452267169952393, "learning_rate": 0.0006156996587030716, "loss": 5.9682, "step": 3378 }, { "epoch": 1.1532423208191127, "grad_norm": 3.369281530380249, "learning_rate": 0.0006155858930602957, "loss": 6.0847, "step": 3379 }, { "epoch": 1.1535836177474403, "grad_norm": 3.305046319961548, "learning_rate": 0.0006154721274175199, "loss": 6.8861, "step": 3380 }, { "epoch": 1.153924914675768, "grad_norm": 4.961038112640381, "learning_rate": 0.000615358361774744, "loss": 4.7492, "step": 3381 }, { "epoch": 1.1542662116040956, "grad_norm": 3.369922161102295, "learning_rate": 0.0006152445961319682, "loss": 6.3582, "step": 3382 }, { "epoch": 1.1546075085324232, "grad_norm": 3.3537933826446533, "learning_rate": 0.0006151308304891923, "loss": 6.2107, "step": 3383 }, { "epoch": 1.1549488054607508, "grad_norm": 3.241588830947876, "learning_rate": 0.0006150170648464164, "loss": 6.0113, "step": 3384 }, { "epoch": 1.1552901023890785, "grad_norm": 3.1636621952056885, "learning_rate": 0.0006149032992036406, "loss": 6.7975, "step": 3385 }, { "epoch": 1.155631399317406, "grad_norm": 3.1564855575561523, "learning_rate": 0.0006147895335608647, "loss": 6.2154, "step": 3386 }, { "epoch": 1.1559726962457337, "grad_norm": 3.18015456199646, "learning_rate": 0.0006146757679180888, "loss": 6.7107, "step": 3387 }, { "epoch": 1.1563139931740614, "grad_norm": 3.214262008666992, "learning_rate": 0.0006145620022753129, "loss": 6.2703, "step": 3388 }, { "epoch": 1.156655290102389, "grad_norm": 3.171013593673706, "learning_rate": 0.000614448236632537, "loss": 6.5638, "step": 3389 }, { "epoch": 1.1569965870307168, "grad_norm": 3.184718608856201, "learning_rate": 0.0006143344709897611, "loss": 6.9765, "step": 3390 }, { "epoch": 1.1573378839590442, "grad_norm": 3.161717414855957, "learning_rate": 0.0006142207053469853, "loss": 6.8384, "step": 3391 }, { "epoch": 1.157679180887372, "grad_norm": 5.287087440490723, "learning_rate": 0.0006141069397042094, "loss": 6.4128, "step": 3392 }, { "epoch": 1.1580204778156997, "grad_norm": 3.629075765609741, "learning_rate": 0.0006139931740614335, "loss": 6.171, "step": 3393 }, { "epoch": 1.1583617747440274, "grad_norm": 3.260854721069336, "learning_rate": 0.0006138794084186575, "loss": 6.6395, "step": 3394 }, { "epoch": 1.158703071672355, "grad_norm": 3.2226295471191406, "learning_rate": 0.0006137656427758816, "loss": 6.533, "step": 3395 }, { "epoch": 1.1590443686006826, "grad_norm": 3.1546380519866943, "learning_rate": 0.0006136518771331057, "loss": 6.5444, "step": 3396 }, { "epoch": 1.1593856655290102, "grad_norm": 3.138427257537842, "learning_rate": 0.0006135381114903299, "loss": 6.7728, "step": 3397 }, { "epoch": 1.1597269624573379, "grad_norm": 3.0224416255950928, "learning_rate": 0.000613424345847554, "loss": 6.6283, "step": 3398 }, { "epoch": 1.1600682593856655, "grad_norm": 3.074084997177124, "learning_rate": 0.0006133105802047782, "loss": 6.5736, "step": 3399 }, { "epoch": 1.1604095563139931, "grad_norm": 4.4774909019470215, "learning_rate": 0.0006131968145620023, "loss": 5.5653, "step": 3400 }, { "epoch": 1.1607508532423207, "grad_norm": 3.481534004211426, "learning_rate": 0.0006130830489192264, "loss": 6.0247, "step": 3401 }, { "epoch": 1.1610921501706484, "grad_norm": 3.528672695159912, "learning_rate": 0.0006129692832764505, "loss": 6.7388, "step": 3402 }, { "epoch": 1.1614334470989762, "grad_norm": 3.561124324798584, "learning_rate": 0.0006128555176336747, "loss": 6.303, "step": 3403 }, { "epoch": 1.1617747440273036, "grad_norm": 3.158127546310425, "learning_rate": 0.0006127417519908988, "loss": 6.2118, "step": 3404 }, { "epoch": 1.1621160409556315, "grad_norm": 3.1194143295288086, "learning_rate": 0.0006126279863481229, "loss": 6.4867, "step": 3405 }, { "epoch": 1.162457337883959, "grad_norm": 3.337048292160034, "learning_rate": 0.000612514220705347, "loss": 6.7671, "step": 3406 }, { "epoch": 1.1627986348122867, "grad_norm": 3.0700247287750244, "learning_rate": 0.0006124004550625711, "loss": 6.1681, "step": 3407 }, { "epoch": 1.1631399317406144, "grad_norm": 3.1669013500213623, "learning_rate": 0.0006122866894197953, "loss": 6.8375, "step": 3408 }, { "epoch": 1.163481228668942, "grad_norm": 3.3716862201690674, "learning_rate": 0.0006121729237770194, "loss": 6.3905, "step": 3409 }, { "epoch": 1.1638225255972696, "grad_norm": 7.003607273101807, "learning_rate": 0.0006120591581342435, "loss": 6.1162, "step": 3410 }, { "epoch": 1.1641638225255972, "grad_norm": 3.2531354427337646, "learning_rate": 0.0006119453924914676, "loss": 6.4429, "step": 3411 }, { "epoch": 1.1645051194539249, "grad_norm": 3.7305870056152344, "learning_rate": 0.0006118316268486917, "loss": 6.4708, "step": 3412 }, { "epoch": 1.1648464163822525, "grad_norm": 4.199586868286133, "learning_rate": 0.0006117178612059157, "loss": 6.2342, "step": 3413 }, { "epoch": 1.1651877133105801, "grad_norm": 3.2192304134368896, "learning_rate": 0.0006116040955631399, "loss": 6.3102, "step": 3414 }, { "epoch": 1.1655290102389078, "grad_norm": 3.2440237998962402, "learning_rate": 0.000611490329920364, "loss": 7.0148, "step": 3415 }, { "epoch": 1.1658703071672356, "grad_norm": 3.203922748565674, "learning_rate": 0.0006113765642775882, "loss": 6.0749, "step": 3416 }, { "epoch": 1.1662116040955632, "grad_norm": 3.4446823596954346, "learning_rate": 0.0006112627986348123, "loss": 6.4998, "step": 3417 }, { "epoch": 1.1665529010238909, "grad_norm": 3.079411506652832, "learning_rate": 0.0006111490329920364, "loss": 6.3895, "step": 3418 }, { "epoch": 1.1668941979522185, "grad_norm": 3.134840488433838, "learning_rate": 0.0006110352673492605, "loss": 6.3418, "step": 3419 }, { "epoch": 1.1672354948805461, "grad_norm": 3.2079269886016846, "learning_rate": 0.0006109215017064847, "loss": 6.4572, "step": 3420 }, { "epoch": 1.1675767918088737, "grad_norm": 4.834338665008545, "learning_rate": 0.0006108077360637088, "loss": 5.9253, "step": 3421 }, { "epoch": 1.1679180887372014, "grad_norm": 3.2818708419799805, "learning_rate": 0.0006106939704209329, "loss": 6.789, "step": 3422 }, { "epoch": 1.168259385665529, "grad_norm": 3.2917566299438477, "learning_rate": 0.000610580204778157, "loss": 6.8878, "step": 3423 }, { "epoch": 1.1686006825938566, "grad_norm": 3.199425458908081, "learning_rate": 0.0006104664391353811, "loss": 6.4318, "step": 3424 }, { "epoch": 1.1689419795221843, "grad_norm": 4.149702548980713, "learning_rate": 0.0006103526734926053, "loss": 5.2789, "step": 3425 }, { "epoch": 1.1692832764505119, "grad_norm": 3.1588354110717773, "learning_rate": 0.0006102389078498294, "loss": 6.5981, "step": 3426 }, { "epoch": 1.1696245733788395, "grad_norm": 3.282426595687866, "learning_rate": 0.0006101251422070535, "loss": 6.3488, "step": 3427 }, { "epoch": 1.1699658703071671, "grad_norm": 3.071763515472412, "learning_rate": 0.0006100113765642776, "loss": 6.6647, "step": 3428 }, { "epoch": 1.170307167235495, "grad_norm": 3.4117684364318848, "learning_rate": 0.0006098976109215017, "loss": 6.057, "step": 3429 }, { "epoch": 1.1706484641638226, "grad_norm": 3.132619857788086, "learning_rate": 0.0006097838452787258, "loss": 6.3089, "step": 3430 }, { "epoch": 1.1709897610921502, "grad_norm": 3.3119728565216064, "learning_rate": 0.00060967007963595, "loss": 6.6789, "step": 3431 }, { "epoch": 1.1713310580204779, "grad_norm": 3.4108917713165283, "learning_rate": 0.0006095563139931742, "loss": 6.664, "step": 3432 }, { "epoch": 1.1716723549488055, "grad_norm": 3.4007740020751953, "learning_rate": 0.0006094425483503982, "loss": 6.6983, "step": 3433 }, { "epoch": 1.1720136518771331, "grad_norm": 3.2750303745269775, "learning_rate": 0.0006093287827076223, "loss": 6.7191, "step": 3434 }, { "epoch": 1.1723549488054608, "grad_norm": 3.2517144680023193, "learning_rate": 0.0006092150170648464, "loss": 6.6333, "step": 3435 }, { "epoch": 1.1726962457337884, "grad_norm": 3.0657079219818115, "learning_rate": 0.0006091012514220705, "loss": 7.0866, "step": 3436 }, { "epoch": 1.173037542662116, "grad_norm": 3.8574588298797607, "learning_rate": 0.0006089874857792947, "loss": 6.2078, "step": 3437 }, { "epoch": 1.1733788395904436, "grad_norm": 3.447795867919922, "learning_rate": 0.0006088737201365188, "loss": 6.5433, "step": 3438 }, { "epoch": 1.1737201365187713, "grad_norm": 3.2688255310058594, "learning_rate": 0.0006087599544937429, "loss": 6.0859, "step": 3439 }, { "epoch": 1.174061433447099, "grad_norm": 3.0544850826263428, "learning_rate": 0.000608646188850967, "loss": 6.6904, "step": 3440 }, { "epoch": 1.1744027303754265, "grad_norm": 3.1975157260894775, "learning_rate": 0.0006085324232081911, "loss": 6.3464, "step": 3441 }, { "epoch": 1.1747440273037544, "grad_norm": 3.1595888137817383, "learning_rate": 0.0006084186575654152, "loss": 6.721, "step": 3442 }, { "epoch": 1.175085324232082, "grad_norm": 3.2810568809509277, "learning_rate": 0.0006083048919226394, "loss": 5.9367, "step": 3443 }, { "epoch": 1.1754266211604096, "grad_norm": 3.1587705612182617, "learning_rate": 0.0006081911262798635, "loss": 6.2515, "step": 3444 }, { "epoch": 1.1757679180887373, "grad_norm": 3.1385536193847656, "learning_rate": 0.0006080773606370876, "loss": 6.3264, "step": 3445 }, { "epoch": 1.176109215017065, "grad_norm": 3.262084722518921, "learning_rate": 0.0006079635949943117, "loss": 6.7922, "step": 3446 }, { "epoch": 1.1764505119453925, "grad_norm": 3.3084287643432617, "learning_rate": 0.0006078498293515358, "loss": 6.5934, "step": 3447 }, { "epoch": 1.1767918088737201, "grad_norm": 7.798718452453613, "learning_rate": 0.00060773606370876, "loss": 5.7443, "step": 3448 }, { "epoch": 1.1771331058020478, "grad_norm": 3.4016666412353516, "learning_rate": 0.0006076222980659842, "loss": 7.0521, "step": 3449 }, { "epoch": 1.1774744027303754, "grad_norm": 3.541292905807495, "learning_rate": 0.0006075085324232083, "loss": 6.5505, "step": 3450 }, { "epoch": 1.177815699658703, "grad_norm": 3.220148801803589, "learning_rate": 0.0006073947667804324, "loss": 6.8404, "step": 3451 }, { "epoch": 1.1781569965870307, "grad_norm": 8.531793594360352, "learning_rate": 0.0006072810011376564, "loss": 5.1038, "step": 3452 }, { "epoch": 1.1784982935153583, "grad_norm": 3.7234575748443604, "learning_rate": 0.0006071672354948805, "loss": 6.5902, "step": 3453 }, { "epoch": 1.178839590443686, "grad_norm": 3.364846706390381, "learning_rate": 0.0006070534698521047, "loss": 6.1682, "step": 3454 }, { "epoch": 1.1791808873720138, "grad_norm": 3.291454315185547, "learning_rate": 0.0006069397042093288, "loss": 6.5433, "step": 3455 }, { "epoch": 1.1795221843003414, "grad_norm": 3.3431570529937744, "learning_rate": 0.0006068259385665529, "loss": 6.5502, "step": 3456 }, { "epoch": 1.179863481228669, "grad_norm": 5.842983722686768, "learning_rate": 0.000606712172923777, "loss": 5.9161, "step": 3457 }, { "epoch": 1.1802047781569966, "grad_norm": 3.1439208984375, "learning_rate": 0.0006065984072810011, "loss": 6.7444, "step": 3458 }, { "epoch": 1.1805460750853243, "grad_norm": 3.290973424911499, "learning_rate": 0.0006064846416382252, "loss": 6.1831, "step": 3459 }, { "epoch": 1.180887372013652, "grad_norm": 3.0965545177459717, "learning_rate": 0.0006063708759954494, "loss": 6.2298, "step": 3460 }, { "epoch": 1.1812286689419795, "grad_norm": 3.1501073837280273, "learning_rate": 0.0006062571103526735, "loss": 5.6735, "step": 3461 }, { "epoch": 1.1815699658703072, "grad_norm": 3.1459507942199707, "learning_rate": 0.0006061433447098976, "loss": 6.6602, "step": 3462 }, { "epoch": 1.1819112627986348, "grad_norm": 6.302970886230469, "learning_rate": 0.0006060295790671217, "loss": 6.541, "step": 3463 }, { "epoch": 1.1822525597269624, "grad_norm": 3.364511251449585, "learning_rate": 0.0006059158134243458, "loss": 6.4438, "step": 3464 }, { "epoch": 1.18259385665529, "grad_norm": 3.289085626602173, "learning_rate": 0.0006058020477815699, "loss": 6.512, "step": 3465 }, { "epoch": 1.1829351535836177, "grad_norm": 3.2350807189941406, "learning_rate": 0.0006056882821387942, "loss": 6.6411, "step": 3466 }, { "epoch": 1.1832764505119453, "grad_norm": 3.8296823501586914, "learning_rate": 0.0006055745164960183, "loss": 6.2768, "step": 3467 }, { "epoch": 1.1836177474402731, "grad_norm": 3.2926394939422607, "learning_rate": 0.0006054607508532424, "loss": 6.6457, "step": 3468 }, { "epoch": 1.1839590443686008, "grad_norm": 3.056105613708496, "learning_rate": 0.0006053469852104665, "loss": 6.1748, "step": 3469 }, { "epoch": 1.1843003412969284, "grad_norm": 3.2163851261138916, "learning_rate": 0.0006052332195676906, "loss": 6.1277, "step": 3470 }, { "epoch": 1.184641638225256, "grad_norm": 3.086276054382324, "learning_rate": 0.0006051194539249148, "loss": 6.2933, "step": 3471 }, { "epoch": 1.1849829351535837, "grad_norm": 5.672454833984375, "learning_rate": 0.0006050056882821388, "loss": 5.7082, "step": 3472 }, { "epoch": 1.1853242320819113, "grad_norm": 3.3687098026275635, "learning_rate": 0.0006048919226393629, "loss": 6.9588, "step": 3473 }, { "epoch": 1.185665529010239, "grad_norm": 3.393932580947876, "learning_rate": 0.000604778156996587, "loss": 6.7387, "step": 3474 }, { "epoch": 1.1860068259385665, "grad_norm": 3.1261706352233887, "learning_rate": 0.0006046643913538111, "loss": 6.5774, "step": 3475 }, { "epoch": 1.1863481228668942, "grad_norm": 3.0573582649230957, "learning_rate": 0.0006045506257110352, "loss": 6.3902, "step": 3476 }, { "epoch": 1.1866894197952218, "grad_norm": 3.2194995880126953, "learning_rate": 0.0006044368600682594, "loss": 6.3269, "step": 3477 }, { "epoch": 1.1870307167235494, "grad_norm": 3.1429195404052734, "learning_rate": 0.0006043230944254835, "loss": 6.4637, "step": 3478 }, { "epoch": 1.187372013651877, "grad_norm": 3.0701262950897217, "learning_rate": 0.0006042093287827076, "loss": 6.6339, "step": 3479 }, { "epoch": 1.1877133105802047, "grad_norm": 3.701120138168335, "learning_rate": 0.0006040955631399317, "loss": 6.075, "step": 3480 }, { "epoch": 1.1880546075085325, "grad_norm": 3.3148467540740967, "learning_rate": 0.0006039817974971558, "loss": 6.1615, "step": 3481 }, { "epoch": 1.1883959044368602, "grad_norm": 3.282397985458374, "learning_rate": 0.0006038680318543799, "loss": 6.6828, "step": 3482 }, { "epoch": 1.1887372013651878, "grad_norm": 3.1746835708618164, "learning_rate": 0.0006037542662116042, "loss": 6.7092, "step": 3483 }, { "epoch": 1.1890784982935154, "grad_norm": 3.044534206390381, "learning_rate": 0.0006036405005688283, "loss": 5.9796, "step": 3484 }, { "epoch": 1.189419795221843, "grad_norm": 3.210864543914795, "learning_rate": 0.0006035267349260524, "loss": 6.522, "step": 3485 }, { "epoch": 1.1897610921501707, "grad_norm": 3.328636646270752, "learning_rate": 0.0006034129692832765, "loss": 6.472, "step": 3486 }, { "epoch": 1.1901023890784983, "grad_norm": 3.153980255126953, "learning_rate": 0.0006032992036405006, "loss": 7.1446, "step": 3487 }, { "epoch": 1.190443686006826, "grad_norm": 3.198655128479004, "learning_rate": 0.0006031854379977248, "loss": 6.6485, "step": 3488 }, { "epoch": 1.1907849829351536, "grad_norm": 3.2300875186920166, "learning_rate": 0.0006030716723549489, "loss": 6.5407, "step": 3489 }, { "epoch": 1.1911262798634812, "grad_norm": 3.580975294113159, "learning_rate": 0.000602957906712173, "loss": 6.4401, "step": 3490 }, { "epoch": 1.1914675767918088, "grad_norm": 3.525164842605591, "learning_rate": 0.000602844141069397, "loss": 6.486, "step": 3491 }, { "epoch": 1.1918088737201364, "grad_norm": 3.407437562942505, "learning_rate": 0.0006027303754266211, "loss": 6.3749, "step": 3492 }, { "epoch": 1.192150170648464, "grad_norm": 3.6547207832336426, "learning_rate": 0.0006026166097838452, "loss": 5.8585, "step": 3493 }, { "epoch": 1.192491467576792, "grad_norm": 3.2446658611297607, "learning_rate": 0.0006025028441410694, "loss": 6.5614, "step": 3494 }, { "epoch": 1.1928327645051195, "grad_norm": 3.150815963745117, "learning_rate": 0.0006023890784982935, "loss": 6.0707, "step": 3495 }, { "epoch": 1.1931740614334472, "grad_norm": 5.976104736328125, "learning_rate": 0.0006022753128555176, "loss": 6.0327, "step": 3496 }, { "epoch": 1.1935153583617748, "grad_norm": 3.18243408203125, "learning_rate": 0.0006021615472127417, "loss": 6.7489, "step": 3497 }, { "epoch": 1.1938566552901024, "grad_norm": 3.4427740573883057, "learning_rate": 0.0006020477815699658, "loss": 5.9822, "step": 3498 }, { "epoch": 1.19419795221843, "grad_norm": 3.2316274642944336, "learning_rate": 0.00060193401592719, "loss": 6.4285, "step": 3499 }, { "epoch": 1.1945392491467577, "grad_norm": 3.2206499576568604, "learning_rate": 0.0006018202502844142, "loss": 6.5806, "step": 3500 }, { "epoch": 1.1948805460750853, "grad_norm": 3.1856689453125, "learning_rate": 0.0006017064846416383, "loss": 6.5702, "step": 3501 }, { "epoch": 1.195221843003413, "grad_norm": 3.3193821907043457, "learning_rate": 0.0006015927189988624, "loss": 6.6808, "step": 3502 }, { "epoch": 1.1955631399317406, "grad_norm": 4.13479471206665, "learning_rate": 0.0006014789533560865, "loss": 5.7823, "step": 3503 }, { "epoch": 1.1959044368600682, "grad_norm": 4.107855319976807, "learning_rate": 0.0006013651877133106, "loss": 5.8399, "step": 3504 }, { "epoch": 1.1962457337883958, "grad_norm": 3.282972574234009, "learning_rate": 0.0006012514220705347, "loss": 6.0828, "step": 3505 }, { "epoch": 1.1965870307167235, "grad_norm": 3.2156879901885986, "learning_rate": 0.0006011376564277589, "loss": 6.3889, "step": 3506 }, { "epoch": 1.1969283276450513, "grad_norm": 3.4601330757141113, "learning_rate": 0.000601023890784983, "loss": 5.7612, "step": 3507 }, { "epoch": 1.197269624573379, "grad_norm": 3.1823012828826904, "learning_rate": 0.0006009101251422071, "loss": 6.3664, "step": 3508 }, { "epoch": 1.1976109215017066, "grad_norm": 3.098069906234741, "learning_rate": 0.0006007963594994312, "loss": 6.0548, "step": 3509 }, { "epoch": 1.1979522184300342, "grad_norm": 3.121912956237793, "learning_rate": 0.0006006825938566553, "loss": 6.4154, "step": 3510 }, { "epoch": 1.1982935153583618, "grad_norm": 3.1986167430877686, "learning_rate": 0.0006005688282138794, "loss": 6.5749, "step": 3511 }, { "epoch": 1.1986348122866894, "grad_norm": 3.2935080528259277, "learning_rate": 0.0006004550625711035, "loss": 6.0803, "step": 3512 }, { "epoch": 1.198976109215017, "grad_norm": 4.030786991119385, "learning_rate": 0.0006003412969283276, "loss": 6.0646, "step": 3513 }, { "epoch": 1.1993174061433447, "grad_norm": 3.3741588592529297, "learning_rate": 0.0006002275312855517, "loss": 6.4553, "step": 3514 }, { "epoch": 1.1996587030716723, "grad_norm": 3.256930112838745, "learning_rate": 0.0006001137656427758, "loss": 6.6533, "step": 3515 }, { "epoch": 1.2, "grad_norm": 3.3063952922821045, "learning_rate": 0.0006, "loss": 6.369, "step": 3516 }, { "epoch": 1.2003412969283276, "grad_norm": 3.2215917110443115, "learning_rate": 0.0005998862343572242, "loss": 6.4497, "step": 3517 }, { "epoch": 1.2006825938566552, "grad_norm": 4.139420032501221, "learning_rate": 0.0005997724687144483, "loss": 6.2639, "step": 3518 }, { "epoch": 1.2010238907849828, "grad_norm": 3.161328077316284, "learning_rate": 0.0005996587030716724, "loss": 6.4553, "step": 3519 }, { "epoch": 1.2013651877133107, "grad_norm": 3.1102607250213623, "learning_rate": 0.0005995449374288965, "loss": 6.6922, "step": 3520 }, { "epoch": 1.2017064846416383, "grad_norm": 3.2522287368774414, "learning_rate": 0.0005994311717861206, "loss": 6.8216, "step": 3521 }, { "epoch": 1.202047781569966, "grad_norm": 3.175919771194458, "learning_rate": 0.0005993174061433447, "loss": 6.2899, "step": 3522 }, { "epoch": 1.2023890784982936, "grad_norm": 3.2448034286499023, "learning_rate": 0.0005992036405005689, "loss": 6.7938, "step": 3523 }, { "epoch": 1.2027303754266212, "grad_norm": 3.362255811691284, "learning_rate": 0.000599089874857793, "loss": 6.4267, "step": 3524 }, { "epoch": 1.2030716723549488, "grad_norm": 3.1305243968963623, "learning_rate": 0.0005989761092150171, "loss": 6.7481, "step": 3525 }, { "epoch": 1.2034129692832765, "grad_norm": 3.1050209999084473, "learning_rate": 0.0005988623435722412, "loss": 6.5467, "step": 3526 }, { "epoch": 1.203754266211604, "grad_norm": 3.1616594791412354, "learning_rate": 0.0005987485779294653, "loss": 6.7965, "step": 3527 }, { "epoch": 1.2040955631399317, "grad_norm": 3.2334542274475098, "learning_rate": 0.0005986348122866895, "loss": 6.6105, "step": 3528 }, { "epoch": 1.2044368600682593, "grad_norm": 4.107026100158691, "learning_rate": 0.0005985210466439136, "loss": 6.1539, "step": 3529 }, { "epoch": 1.204778156996587, "grad_norm": 3.437368392944336, "learning_rate": 0.0005984072810011376, "loss": 6.7131, "step": 3530 }, { "epoch": 1.2051194539249146, "grad_norm": 3.2274441719055176, "learning_rate": 0.0005982935153583617, "loss": 6.4707, "step": 3531 }, { "epoch": 1.2054607508532422, "grad_norm": 3.5051920413970947, "learning_rate": 0.0005981797497155858, "loss": 6.4562, "step": 3532 }, { "epoch": 1.20580204778157, "grad_norm": 3.2286267280578613, "learning_rate": 0.00059806598407281, "loss": 6.3122, "step": 3533 }, { "epoch": 1.2061433447098977, "grad_norm": 3.3295505046844482, "learning_rate": 0.0005979522184300342, "loss": 6.7408, "step": 3534 }, { "epoch": 1.2064846416382253, "grad_norm": 3.2205469608306885, "learning_rate": 0.0005978384527872583, "loss": 6.4856, "step": 3535 }, { "epoch": 1.206825938566553, "grad_norm": 3.2709555625915527, "learning_rate": 0.0005977246871444824, "loss": 6.2622, "step": 3536 }, { "epoch": 1.2071672354948806, "grad_norm": 3.1822428703308105, "learning_rate": 0.0005976109215017065, "loss": 6.2327, "step": 3537 }, { "epoch": 1.2075085324232082, "grad_norm": 3.1136908531188965, "learning_rate": 0.0005974971558589306, "loss": 6.6299, "step": 3538 }, { "epoch": 1.2078498293515358, "grad_norm": 3.7378649711608887, "learning_rate": 0.0005973833902161547, "loss": 5.8328, "step": 3539 }, { "epoch": 1.2081911262798635, "grad_norm": 3.2474365234375, "learning_rate": 0.0005972696245733789, "loss": 6.47, "step": 3540 }, { "epoch": 1.208532423208191, "grad_norm": 3.283991575241089, "learning_rate": 0.000597155858930603, "loss": 6.2577, "step": 3541 }, { "epoch": 1.2088737201365187, "grad_norm": 3.1167514324188232, "learning_rate": 0.0005970420932878271, "loss": 6.4208, "step": 3542 }, { "epoch": 1.2092150170648464, "grad_norm": 3.0923006534576416, "learning_rate": 0.0005969283276450512, "loss": 6.7528, "step": 3543 }, { "epoch": 1.209556313993174, "grad_norm": 3.308056354522705, "learning_rate": 0.0005968145620022753, "loss": 7.0382, "step": 3544 }, { "epoch": 1.2098976109215016, "grad_norm": 3.772918701171875, "learning_rate": 0.0005967007963594994, "loss": 6.3507, "step": 3545 }, { "epoch": 1.2102389078498295, "grad_norm": 3.130896806716919, "learning_rate": 0.0005965870307167236, "loss": 6.9891, "step": 3546 }, { "epoch": 1.210580204778157, "grad_norm": 6.454501152038574, "learning_rate": 0.0005964732650739477, "loss": 6.0827, "step": 3547 }, { "epoch": 1.2109215017064847, "grad_norm": 3.259948968887329, "learning_rate": 0.0005963594994311718, "loss": 5.4018, "step": 3548 }, { "epoch": 1.2112627986348123, "grad_norm": 3.4372003078460693, "learning_rate": 0.000596245733788396, "loss": 6.5116, "step": 3549 }, { "epoch": 1.21160409556314, "grad_norm": 3.244441509246826, "learning_rate": 0.00059613196814562, "loss": 6.7611, "step": 3550 }, { "epoch": 1.2119453924914676, "grad_norm": 3.4917261600494385, "learning_rate": 0.0005960182025028442, "loss": 5.7466, "step": 3551 }, { "epoch": 1.2122866894197952, "grad_norm": 3.247199058532715, "learning_rate": 0.0005959044368600683, "loss": 6.3, "step": 3552 }, { "epoch": 1.2126279863481229, "grad_norm": 3.103398323059082, "learning_rate": 0.0005957906712172924, "loss": 6.9337, "step": 3553 }, { "epoch": 1.2129692832764505, "grad_norm": 3.120478868484497, "learning_rate": 0.0005956769055745165, "loss": 6.3149, "step": 3554 }, { "epoch": 1.213310580204778, "grad_norm": 3.190842866897583, "learning_rate": 0.0005955631399317406, "loss": 6.078, "step": 3555 }, { "epoch": 1.2136518771331057, "grad_norm": 3.1261842250823975, "learning_rate": 0.0005954493742889647, "loss": 6.4843, "step": 3556 }, { "epoch": 1.2139931740614334, "grad_norm": 3.333153486251831, "learning_rate": 0.0005953356086461889, "loss": 6.1083, "step": 3557 }, { "epoch": 1.214334470989761, "grad_norm": 3.2019073963165283, "learning_rate": 0.000595221843003413, "loss": 6.4916, "step": 3558 }, { "epoch": 1.2146757679180888, "grad_norm": 3.232032060623169, "learning_rate": 0.0005951080773606371, "loss": 6.5154, "step": 3559 }, { "epoch": 1.2150170648464165, "grad_norm": 3.299344301223755, "learning_rate": 0.0005949943117178612, "loss": 6.6801, "step": 3560 }, { "epoch": 1.215358361774744, "grad_norm": 3.3514556884765625, "learning_rate": 0.0005948805460750853, "loss": 6.742, "step": 3561 }, { "epoch": 1.2156996587030717, "grad_norm": 3.3210854530334473, "learning_rate": 0.0005947667804323094, "loss": 5.9867, "step": 3562 }, { "epoch": 1.2160409556313994, "grad_norm": 3.2020204067230225, "learning_rate": 0.0005946530147895336, "loss": 6.715, "step": 3563 }, { "epoch": 1.216382252559727, "grad_norm": 3.123915433883667, "learning_rate": 0.0005945392491467577, "loss": 6.6287, "step": 3564 }, { "epoch": 1.2167235494880546, "grad_norm": 3.2689836025238037, "learning_rate": 0.0005944254835039818, "loss": 6.6374, "step": 3565 }, { "epoch": 1.2170648464163822, "grad_norm": 4.918799877166748, "learning_rate": 0.000594311717861206, "loss": 5.6419, "step": 3566 }, { "epoch": 1.2174061433447099, "grad_norm": 3.0726449489593506, "learning_rate": 0.0005941979522184301, "loss": 6.2995, "step": 3567 }, { "epoch": 1.2177474402730375, "grad_norm": 3.210508108139038, "learning_rate": 0.0005940841865756542, "loss": 6.058, "step": 3568 }, { "epoch": 1.2180887372013651, "grad_norm": 3.5676910877227783, "learning_rate": 0.0005939704209328783, "loss": 5.5099, "step": 3569 }, { "epoch": 1.2184300341296928, "grad_norm": 3.3140625953674316, "learning_rate": 0.0005938566552901024, "loss": 6.7096, "step": 3570 }, { "epoch": 1.2187713310580204, "grad_norm": 3.6696548461914062, "learning_rate": 0.0005937428896473265, "loss": 4.3158, "step": 3571 }, { "epoch": 1.2191126279863482, "grad_norm": 3.3248839378356934, "learning_rate": 0.0005936291240045506, "loss": 6.9934, "step": 3572 }, { "epoch": 1.2194539249146759, "grad_norm": 3.5815327167510986, "learning_rate": 0.0005935153583617747, "loss": 5.5062, "step": 3573 }, { "epoch": 1.2197952218430035, "grad_norm": 3.4461398124694824, "learning_rate": 0.0005934015927189989, "loss": 6.7242, "step": 3574 }, { "epoch": 1.2201365187713311, "grad_norm": 3.182681083679199, "learning_rate": 0.000593287827076223, "loss": 6.8025, "step": 3575 }, { "epoch": 1.2204778156996587, "grad_norm": 3.6863200664520264, "learning_rate": 0.0005931740614334471, "loss": 6.4275, "step": 3576 }, { "epoch": 1.2208191126279864, "grad_norm": 3.2513556480407715, "learning_rate": 0.0005930602957906712, "loss": 6.3153, "step": 3577 }, { "epoch": 1.221160409556314, "grad_norm": 5.446213722229004, "learning_rate": 0.0005929465301478953, "loss": 6.1899, "step": 3578 }, { "epoch": 1.2215017064846416, "grad_norm": 5.742640972137451, "learning_rate": 0.0005928327645051194, "loss": 4.5945, "step": 3579 }, { "epoch": 1.2218430034129693, "grad_norm": 3.372920036315918, "learning_rate": 0.0005927189988623436, "loss": 6.4181, "step": 3580 }, { "epoch": 1.2221843003412969, "grad_norm": 3.323343515396118, "learning_rate": 0.0005926052332195677, "loss": 6.5921, "step": 3581 }, { "epoch": 1.2225255972696245, "grad_norm": 3.2678415775299072, "learning_rate": 0.0005924914675767918, "loss": 6.544, "step": 3582 }, { "epoch": 1.2228668941979521, "grad_norm": 3.0852653980255127, "learning_rate": 0.000592377701934016, "loss": 6.5581, "step": 3583 }, { "epoch": 1.2232081911262798, "grad_norm": 3.8021903038024902, "learning_rate": 0.0005922639362912401, "loss": 5.3767, "step": 3584 }, { "epoch": 1.2235494880546076, "grad_norm": 3.143950939178467, "learning_rate": 0.0005921501706484642, "loss": 6.1757, "step": 3585 }, { "epoch": 1.2238907849829352, "grad_norm": 3.0818896293640137, "learning_rate": 0.0005920364050056884, "loss": 6.837, "step": 3586 }, { "epoch": 1.2242320819112629, "grad_norm": 2.336493730545044, "learning_rate": 0.0005919226393629125, "loss": 3.1151, "step": 3587 }, { "epoch": 1.2245733788395905, "grad_norm": 3.16595721244812, "learning_rate": 0.0005918088737201365, "loss": 6.2877, "step": 3588 }, { "epoch": 1.2249146757679181, "grad_norm": 4.046875, "learning_rate": 0.0005916951080773606, "loss": 5.9708, "step": 3589 }, { "epoch": 1.2252559726962458, "grad_norm": 3.3334009647369385, "learning_rate": 0.0005915813424345847, "loss": 6.8327, "step": 3590 }, { "epoch": 1.2255972696245734, "grad_norm": 3.452604055404663, "learning_rate": 0.0005914675767918088, "loss": 6.5917, "step": 3591 }, { "epoch": 1.225938566552901, "grad_norm": 3.072746753692627, "learning_rate": 0.000591353811149033, "loss": 6.3403, "step": 3592 }, { "epoch": 1.2262798634812286, "grad_norm": 3.248479127883911, "learning_rate": 0.0005912400455062571, "loss": 6.3434, "step": 3593 }, { "epoch": 1.2266211604095563, "grad_norm": 3.0548229217529297, "learning_rate": 0.0005911262798634812, "loss": 6.6718, "step": 3594 }, { "epoch": 1.226962457337884, "grad_norm": 3.1804940700531006, "learning_rate": 0.0005910125142207053, "loss": 6.3622, "step": 3595 }, { "epoch": 1.2273037542662115, "grad_norm": 3.511061668395996, "learning_rate": 0.0005908987485779294, "loss": 6.1706, "step": 3596 }, { "epoch": 1.2276450511945391, "grad_norm": 3.2657320499420166, "learning_rate": 0.0005907849829351536, "loss": 6.4201, "step": 3597 }, { "epoch": 1.227986348122867, "grad_norm": 3.191793918609619, "learning_rate": 0.0005906712172923777, "loss": 6.2123, "step": 3598 }, { "epoch": 1.2283276450511946, "grad_norm": 3.1244120597839355, "learning_rate": 0.0005905574516496019, "loss": 6.2795, "step": 3599 }, { "epoch": 1.2286689419795223, "grad_norm": 3.2375831604003906, "learning_rate": 0.000590443686006826, "loss": 6.175, "step": 3600 }, { "epoch": 1.2290102389078499, "grad_norm": 3.0911331176757812, "learning_rate": 0.0005903299203640501, "loss": 5.3192, "step": 3601 }, { "epoch": 1.2293515358361775, "grad_norm": 3.3348145484924316, "learning_rate": 0.0005902161547212742, "loss": 6.614, "step": 3602 }, { "epoch": 1.2296928327645051, "grad_norm": 3.407654047012329, "learning_rate": 0.0005901023890784984, "loss": 5.4858, "step": 3603 }, { "epoch": 1.2300341296928328, "grad_norm": 3.8060050010681152, "learning_rate": 0.0005899886234357225, "loss": 6.4772, "step": 3604 }, { "epoch": 1.2303754266211604, "grad_norm": 3.2755227088928223, "learning_rate": 0.0005898748577929466, "loss": 6.4768, "step": 3605 }, { "epoch": 1.230716723549488, "grad_norm": 3.256535530090332, "learning_rate": 0.0005897610921501707, "loss": 6.8184, "step": 3606 }, { "epoch": 1.2310580204778157, "grad_norm": 3.2337422370910645, "learning_rate": 0.0005896473265073948, "loss": 6.2524, "step": 3607 }, { "epoch": 1.2313993174061433, "grad_norm": 3.262669801712036, "learning_rate": 0.0005895335608646188, "loss": 5.987, "step": 3608 }, { "epoch": 1.231740614334471, "grad_norm": 3.1079490184783936, "learning_rate": 0.000589419795221843, "loss": 6.9693, "step": 3609 }, { "epoch": 1.2320819112627985, "grad_norm": 3.214324951171875, "learning_rate": 0.0005893060295790671, "loss": 6.4031, "step": 3610 }, { "epoch": 1.2324232081911264, "grad_norm": 3.1826834678649902, "learning_rate": 0.0005891922639362912, "loss": 6.5096, "step": 3611 }, { "epoch": 1.232764505119454, "grad_norm": 3.2287216186523438, "learning_rate": 0.0005890784982935153, "loss": 6.587, "step": 3612 }, { "epoch": 1.2331058020477816, "grad_norm": 3.313389778137207, "learning_rate": 0.0005889647326507394, "loss": 6.2603, "step": 3613 }, { "epoch": 1.2334470989761093, "grad_norm": 3.2733147144317627, "learning_rate": 0.0005888509670079636, "loss": 6.7555, "step": 3614 }, { "epoch": 1.233788395904437, "grad_norm": 3.265892505645752, "learning_rate": 0.0005887372013651877, "loss": 6.1515, "step": 3615 }, { "epoch": 1.2341296928327645, "grad_norm": 3.340308904647827, "learning_rate": 0.0005886234357224119, "loss": 6.3667, "step": 3616 }, { "epoch": 1.2344709897610922, "grad_norm": 3.3036296367645264, "learning_rate": 0.000588509670079636, "loss": 7.1784, "step": 3617 }, { "epoch": 1.2348122866894198, "grad_norm": 3.338773250579834, "learning_rate": 0.0005883959044368601, "loss": 6.5319, "step": 3618 }, { "epoch": 1.2351535836177474, "grad_norm": 3.15885329246521, "learning_rate": 0.0005882821387940842, "loss": 6.8377, "step": 3619 }, { "epoch": 1.235494880546075, "grad_norm": 3.140784978866577, "learning_rate": 0.0005881683731513084, "loss": 6.2892, "step": 3620 }, { "epoch": 1.2358361774744027, "grad_norm": 3.2073025703430176, "learning_rate": 0.0005880546075085325, "loss": 5.9276, "step": 3621 }, { "epoch": 1.2361774744027303, "grad_norm": 4.180164813995361, "learning_rate": 0.0005879408418657566, "loss": 5.3202, "step": 3622 }, { "epoch": 1.236518771331058, "grad_norm": 3.397432327270508, "learning_rate": 0.0005878270762229807, "loss": 6.0951, "step": 3623 }, { "epoch": 1.2368600682593858, "grad_norm": 3.3566372394561768, "learning_rate": 0.0005877133105802048, "loss": 6.4745, "step": 3624 }, { "epoch": 1.2372013651877134, "grad_norm": 3.236814260482788, "learning_rate": 0.0005875995449374289, "loss": 6.5627, "step": 3625 }, { "epoch": 1.237542662116041, "grad_norm": 3.453336238861084, "learning_rate": 0.0005874857792946531, "loss": 5.8774, "step": 3626 }, { "epoch": 1.2378839590443687, "grad_norm": 6.298210620880127, "learning_rate": 0.0005873720136518771, "loss": 5.7175, "step": 3627 }, { "epoch": 1.2382252559726963, "grad_norm": 3.396085023880005, "learning_rate": 0.0005872582480091012, "loss": 6.5425, "step": 3628 }, { "epoch": 1.238566552901024, "grad_norm": 3.876892566680908, "learning_rate": 0.0005871444823663253, "loss": 6.0154, "step": 3629 }, { "epoch": 1.2389078498293515, "grad_norm": 3.221203088760376, "learning_rate": 0.0005870307167235494, "loss": 6.6467, "step": 3630 }, { "epoch": 1.2392491467576792, "grad_norm": 4.505368232727051, "learning_rate": 0.0005869169510807735, "loss": 5.1206, "step": 3631 }, { "epoch": 1.2395904436860068, "grad_norm": 3.1172850131988525, "learning_rate": 0.0005868031854379977, "loss": 6.6021, "step": 3632 }, { "epoch": 1.2399317406143344, "grad_norm": 3.2955751419067383, "learning_rate": 0.0005866894197952219, "loss": 5.3394, "step": 3633 }, { "epoch": 1.240273037542662, "grad_norm": 3.096254348754883, "learning_rate": 0.000586575654152446, "loss": 6.3385, "step": 3634 }, { "epoch": 1.2406143344709897, "grad_norm": 3.1540637016296387, "learning_rate": 0.0005864618885096701, "loss": 6.8789, "step": 3635 }, { "epoch": 1.2409556313993173, "grad_norm": 3.2090256214141846, "learning_rate": 0.0005863481228668942, "loss": 5.8644, "step": 3636 }, { "epoch": 1.2412969283276452, "grad_norm": 3.128225564956665, "learning_rate": 0.0005862343572241184, "loss": 6.6566, "step": 3637 }, { "epoch": 1.2416382252559728, "grad_norm": 4.46166467666626, "learning_rate": 0.0005861205915813425, "loss": 5.8012, "step": 3638 }, { "epoch": 1.2419795221843004, "grad_norm": 3.2098820209503174, "learning_rate": 0.0005860068259385666, "loss": 6.0215, "step": 3639 }, { "epoch": 1.242320819112628, "grad_norm": 4.331230163574219, "learning_rate": 0.0005858930602957907, "loss": 5.8321, "step": 3640 }, { "epoch": 1.2426621160409557, "grad_norm": 3.4035027027130127, "learning_rate": 0.0005857792946530148, "loss": 6.608, "step": 3641 }, { "epoch": 1.2430034129692833, "grad_norm": 3.085808515548706, "learning_rate": 0.0005856655290102389, "loss": 6.7047, "step": 3642 }, { "epoch": 1.243344709897611, "grad_norm": 3.15273380279541, "learning_rate": 0.0005855517633674631, "loss": 5.9884, "step": 3643 }, { "epoch": 1.2436860068259386, "grad_norm": 3.1481151580810547, "learning_rate": 0.0005854379977246872, "loss": 6.578, "step": 3644 }, { "epoch": 1.2440273037542662, "grad_norm": 3.1449875831604004, "learning_rate": 0.0005853242320819113, "loss": 6.3897, "step": 3645 }, { "epoch": 1.2443686006825938, "grad_norm": 3.1563587188720703, "learning_rate": 0.0005852104664391354, "loss": 6.6151, "step": 3646 }, { "epoch": 1.2447098976109214, "grad_norm": 4.4367146492004395, "learning_rate": 0.0005850967007963594, "loss": 5.5777, "step": 3647 }, { "epoch": 1.245051194539249, "grad_norm": 3.3431198596954346, "learning_rate": 0.0005849829351535835, "loss": 6.4409, "step": 3648 }, { "epoch": 1.2453924914675767, "grad_norm": 4.078390121459961, "learning_rate": 0.0005848691695108077, "loss": 5.2748, "step": 3649 }, { "epoch": 1.2457337883959045, "grad_norm": 3.3056936264038086, "learning_rate": 0.0005847554038680319, "loss": 6.1259, "step": 3650 }, { "epoch": 1.2460750853242322, "grad_norm": 3.204592227935791, "learning_rate": 0.000584641638225256, "loss": 6.6076, "step": 3651 }, { "epoch": 1.2464163822525598, "grad_norm": 3.014092445373535, "learning_rate": 0.0005845278725824801, "loss": 6.6634, "step": 3652 }, { "epoch": 1.2467576791808874, "grad_norm": 2.9963581562042236, "learning_rate": 0.0005844141069397042, "loss": 6.0894, "step": 3653 }, { "epoch": 1.247098976109215, "grad_norm": 3.0778584480285645, "learning_rate": 0.0005843003412969284, "loss": 6.6684, "step": 3654 }, { "epoch": 1.2474402730375427, "grad_norm": 3.172811985015869, "learning_rate": 0.0005841865756541525, "loss": 6.6487, "step": 3655 }, { "epoch": 1.2477815699658703, "grad_norm": 3.1092119216918945, "learning_rate": 0.0005840728100113766, "loss": 6.0495, "step": 3656 }, { "epoch": 1.248122866894198, "grad_norm": 3.1255362033843994, "learning_rate": 0.0005839590443686007, "loss": 6.6304, "step": 3657 }, { "epoch": 1.2484641638225256, "grad_norm": 3.0759193897247314, "learning_rate": 0.0005838452787258248, "loss": 6.9172, "step": 3658 }, { "epoch": 1.2488054607508532, "grad_norm": 3.2935664653778076, "learning_rate": 0.0005837315130830489, "loss": 5.9662, "step": 3659 }, { "epoch": 1.2491467576791808, "grad_norm": 3.370425224304199, "learning_rate": 0.0005836177474402731, "loss": 6.481, "step": 3660 }, { "epoch": 1.2494880546075084, "grad_norm": 3.1100013256073, "learning_rate": 0.0005835039817974972, "loss": 6.6346, "step": 3661 }, { "epoch": 1.249829351535836, "grad_norm": 3.2249128818511963, "learning_rate": 0.0005833902161547213, "loss": 6.2269, "step": 3662 }, { "epoch": 1.250170648464164, "grad_norm": 4.628625869750977, "learning_rate": 0.0005832764505119454, "loss": 5.2683, "step": 3663 }, { "epoch": 1.2505119453924913, "grad_norm": 3.3335788249969482, "learning_rate": 0.0005831626848691695, "loss": 5.914, "step": 3664 }, { "epoch": 1.2508532423208192, "grad_norm": 3.5215747356414795, "learning_rate": 0.0005830489192263936, "loss": 5.747, "step": 3665 }, { "epoch": 1.2511945392491468, "grad_norm": 3.2354788780212402, "learning_rate": 0.0005829351535836177, "loss": 5.9926, "step": 3666 }, { "epoch": 1.2515358361774744, "grad_norm": 3.289290428161621, "learning_rate": 0.0005828213879408419, "loss": 6.9403, "step": 3667 }, { "epoch": 1.251877133105802, "grad_norm": 3.2592785358428955, "learning_rate": 0.000582707622298066, "loss": 6.102, "step": 3668 }, { "epoch": 1.2522184300341297, "grad_norm": 3.279924154281616, "learning_rate": 0.0005825938566552901, "loss": 6.5536, "step": 3669 }, { "epoch": 1.2525597269624573, "grad_norm": 6.693105697631836, "learning_rate": 0.0005824800910125142, "loss": 4.9568, "step": 3670 }, { "epoch": 1.252901023890785, "grad_norm": 3.1594643592834473, "learning_rate": 0.0005823663253697383, "loss": 6.6421, "step": 3671 }, { "epoch": 1.2532423208191126, "grad_norm": 3.6135575771331787, "learning_rate": 0.0005822525597269625, "loss": 5.984, "step": 3672 }, { "epoch": 1.2535836177474402, "grad_norm": 3.2949254512786865, "learning_rate": 0.0005821387940841866, "loss": 6.6492, "step": 3673 }, { "epoch": 1.253924914675768, "grad_norm": 3.300008773803711, "learning_rate": 0.0005820250284414107, "loss": 5.9884, "step": 3674 }, { "epoch": 1.2542662116040955, "grad_norm": 3.126598596572876, "learning_rate": 0.0005819112627986348, "loss": 6.3651, "step": 3675 }, { "epoch": 1.2546075085324233, "grad_norm": 3.004107713699341, "learning_rate": 0.0005817974971558589, "loss": 6.3782, "step": 3676 }, { "epoch": 1.2549488054607507, "grad_norm": 3.3004016876220703, "learning_rate": 0.0005816837315130831, "loss": 5.9222, "step": 3677 }, { "epoch": 1.2552901023890786, "grad_norm": 3.169092893600464, "learning_rate": 0.0005815699658703072, "loss": 6.5686, "step": 3678 }, { "epoch": 1.2556313993174062, "grad_norm": 4.297203063964844, "learning_rate": 0.0005814562002275313, "loss": 5.7196, "step": 3679 }, { "epoch": 1.2559726962457338, "grad_norm": 3.2692508697509766, "learning_rate": 0.0005813424345847554, "loss": 6.7681, "step": 3680 }, { "epoch": 1.2563139931740614, "grad_norm": 3.4113283157348633, "learning_rate": 0.0005812286689419795, "loss": 5.9537, "step": 3681 }, { "epoch": 1.256655290102389, "grad_norm": 3.1349599361419678, "learning_rate": 0.0005811149032992036, "loss": 6.4065, "step": 3682 }, { "epoch": 1.2569965870307167, "grad_norm": 3.8762474060058594, "learning_rate": 0.0005810011376564279, "loss": 6.0212, "step": 3683 }, { "epoch": 1.2573378839590443, "grad_norm": 3.2077221870422363, "learning_rate": 0.000580887372013652, "loss": 6.3899, "step": 3684 }, { "epoch": 1.257679180887372, "grad_norm": 3.3499104976654053, "learning_rate": 0.0005807736063708761, "loss": 6.1954, "step": 3685 }, { "epoch": 1.2580204778156996, "grad_norm": 3.0911295413970947, "learning_rate": 0.0005806598407281001, "loss": 6.2744, "step": 3686 }, { "epoch": 1.2583617747440274, "grad_norm": 3.294782876968384, "learning_rate": 0.0005805460750853242, "loss": 6.4801, "step": 3687 }, { "epoch": 1.2587030716723548, "grad_norm": 3.7116618156433105, "learning_rate": 0.0005804323094425483, "loss": 5.8219, "step": 3688 }, { "epoch": 1.2590443686006827, "grad_norm": 3.1158978939056396, "learning_rate": 0.0005803185437997725, "loss": 6.1431, "step": 3689 }, { "epoch": 1.25938566552901, "grad_norm": 3.4460155963897705, "learning_rate": 0.0005802047781569966, "loss": 6.9438, "step": 3690 }, { "epoch": 1.259726962457338, "grad_norm": 3.296086311340332, "learning_rate": 0.0005800910125142207, "loss": 6.5641, "step": 3691 }, { "epoch": 1.2600682593856656, "grad_norm": 3.484635591506958, "learning_rate": 0.0005799772468714448, "loss": 6.6866, "step": 3692 }, { "epoch": 1.2604095563139932, "grad_norm": 3.001859664916992, "learning_rate": 0.0005798634812286689, "loss": 6.845, "step": 3693 }, { "epoch": 1.2607508532423208, "grad_norm": 3.264299154281616, "learning_rate": 0.0005797497155858931, "loss": 6.5779, "step": 3694 }, { "epoch": 1.2610921501706485, "grad_norm": 4.64964485168457, "learning_rate": 0.0005796359499431172, "loss": 6.1898, "step": 3695 }, { "epoch": 1.261433447098976, "grad_norm": 3.335503339767456, "learning_rate": 0.0005795221843003413, "loss": 6.6275, "step": 3696 }, { "epoch": 1.2617747440273037, "grad_norm": 3.441882610321045, "learning_rate": 0.0005794084186575654, "loss": 6.1924, "step": 3697 }, { "epoch": 1.2621160409556313, "grad_norm": 3.124157428741455, "learning_rate": 0.0005792946530147895, "loss": 6.4121, "step": 3698 }, { "epoch": 1.262457337883959, "grad_norm": 3.6481199264526367, "learning_rate": 0.0005791808873720136, "loss": 6.0989, "step": 3699 }, { "epoch": 1.2627986348122868, "grad_norm": 3.1325788497924805, "learning_rate": 0.0005790671217292379, "loss": 6.6209, "step": 3700 }, { "epoch": 1.2631399317406142, "grad_norm": 3.0426456928253174, "learning_rate": 0.000578953356086462, "loss": 6.2306, "step": 3701 }, { "epoch": 1.263481228668942, "grad_norm": 3.1229195594787598, "learning_rate": 0.0005788395904436861, "loss": 6.9213, "step": 3702 }, { "epoch": 1.2638225255972695, "grad_norm": 3.100487232208252, "learning_rate": 0.0005787258248009102, "loss": 6.5218, "step": 3703 }, { "epoch": 1.2641638225255973, "grad_norm": 3.2305166721343994, "learning_rate": 0.0005786120591581343, "loss": 6.4707, "step": 3704 }, { "epoch": 1.264505119453925, "grad_norm": 3.1007251739501953, "learning_rate": 0.0005784982935153583, "loss": 7.0472, "step": 3705 }, { "epoch": 1.2648464163822526, "grad_norm": 3.452636480331421, "learning_rate": 0.0005783845278725825, "loss": 6.0918, "step": 3706 }, { "epoch": 1.2651877133105802, "grad_norm": 3.7483577728271484, "learning_rate": 0.0005782707622298066, "loss": 5.3098, "step": 3707 }, { "epoch": 1.2655290102389078, "grad_norm": 3.2747714519500732, "learning_rate": 0.0005781569965870307, "loss": 6.4773, "step": 3708 }, { "epoch": 1.2658703071672355, "grad_norm": 3.334418773651123, "learning_rate": 0.0005780432309442548, "loss": 5.9476, "step": 3709 }, { "epoch": 1.266211604095563, "grad_norm": 3.388519287109375, "learning_rate": 0.0005779294653014789, "loss": 6.7832, "step": 3710 }, { "epoch": 1.2665529010238907, "grad_norm": 3.2145025730133057, "learning_rate": 0.000577815699658703, "loss": 7.1023, "step": 3711 }, { "epoch": 1.2668941979522184, "grad_norm": 3.1737163066864014, "learning_rate": 0.0005777019340159272, "loss": 6.2078, "step": 3712 }, { "epoch": 1.2672354948805462, "grad_norm": 3.514313220977783, "learning_rate": 0.0005775881683731513, "loss": 5.9245, "step": 3713 }, { "epoch": 1.2675767918088736, "grad_norm": 3.119048595428467, "learning_rate": 0.0005774744027303754, "loss": 6.1917, "step": 3714 }, { "epoch": 1.2679180887372015, "grad_norm": 3.2024073600769043, "learning_rate": 0.0005773606370875995, "loss": 6.2773, "step": 3715 }, { "epoch": 1.268259385665529, "grad_norm": 3.101339817047119, "learning_rate": 0.0005772468714448236, "loss": 6.6976, "step": 3716 }, { "epoch": 1.2686006825938567, "grad_norm": 4.5886616706848145, "learning_rate": 0.0005771331058020479, "loss": 5.4188, "step": 3717 }, { "epoch": 1.2689419795221843, "grad_norm": 3.2115495204925537, "learning_rate": 0.000577019340159272, "loss": 6.3656, "step": 3718 }, { "epoch": 1.269283276450512, "grad_norm": 3.105013608932495, "learning_rate": 0.0005769055745164961, "loss": 6.04, "step": 3719 }, { "epoch": 1.2696245733788396, "grad_norm": 3.715817451477051, "learning_rate": 0.0005767918088737202, "loss": 6.0363, "step": 3720 }, { "epoch": 1.2699658703071672, "grad_norm": 3.0130434036254883, "learning_rate": 0.0005766780432309443, "loss": 6.3197, "step": 3721 }, { "epoch": 1.2703071672354949, "grad_norm": 3.1294775009155273, "learning_rate": 0.0005765642775881684, "loss": 6.4423, "step": 3722 }, { "epoch": 1.2706484641638225, "grad_norm": 3.1962530612945557, "learning_rate": 0.0005764505119453926, "loss": 6.3955, "step": 3723 }, { "epoch": 1.2709897610921501, "grad_norm": 3.1761527061462402, "learning_rate": 0.0005763367463026167, "loss": 6.3929, "step": 3724 }, { "epoch": 1.2713310580204777, "grad_norm": 3.09688138961792, "learning_rate": 0.0005762229806598407, "loss": 6.5535, "step": 3725 }, { "epoch": 1.2716723549488056, "grad_norm": 3.14316463470459, "learning_rate": 0.0005761092150170648, "loss": 6.3527, "step": 3726 }, { "epoch": 1.272013651877133, "grad_norm": 2.774230480194092, "learning_rate": 0.0005759954493742889, "loss": 3.1619, "step": 3727 }, { "epoch": 1.2723549488054609, "grad_norm": 3.723099946975708, "learning_rate": 0.000575881683731513, "loss": 6.4016, "step": 3728 }, { "epoch": 1.2726962457337885, "grad_norm": 3.691892385482788, "learning_rate": 0.0005757679180887372, "loss": 5.958, "step": 3729 }, { "epoch": 1.273037542662116, "grad_norm": 3.364720582962036, "learning_rate": 0.0005756541524459613, "loss": 6.9903, "step": 3730 }, { "epoch": 1.2733788395904437, "grad_norm": 3.500404119491577, "learning_rate": 0.0005755403868031854, "loss": 6.1624, "step": 3731 }, { "epoch": 1.2737201365187714, "grad_norm": 4.1718597412109375, "learning_rate": 0.0005754266211604095, "loss": 6.2387, "step": 3732 }, { "epoch": 1.274061433447099, "grad_norm": 3.1191086769104004, "learning_rate": 0.0005753128555176336, "loss": 7.2727, "step": 3733 }, { "epoch": 1.2744027303754266, "grad_norm": 3.2956604957580566, "learning_rate": 0.0005751990898748578, "loss": 6.6461, "step": 3734 }, { "epoch": 1.2747440273037542, "grad_norm": 3.2914106845855713, "learning_rate": 0.000575085324232082, "loss": 6.8773, "step": 3735 }, { "epoch": 1.2750853242320819, "grad_norm": 3.343107223510742, "learning_rate": 0.0005749715585893061, "loss": 6.4053, "step": 3736 }, { "epoch": 1.2754266211604095, "grad_norm": 3.184845447540283, "learning_rate": 0.0005748577929465302, "loss": 5.8288, "step": 3737 }, { "epoch": 1.2757679180887371, "grad_norm": 3.4929275512695312, "learning_rate": 0.0005747440273037543, "loss": 6.2784, "step": 3738 }, { "epoch": 1.276109215017065, "grad_norm": 3.1286535263061523, "learning_rate": 0.0005746302616609784, "loss": 6.8593, "step": 3739 }, { "epoch": 1.2764505119453924, "grad_norm": 3.4388904571533203, "learning_rate": 0.0005745164960182026, "loss": 5.3746, "step": 3740 }, { "epoch": 1.2767918088737202, "grad_norm": 3.239522933959961, "learning_rate": 0.0005744027303754267, "loss": 5.9408, "step": 3741 }, { "epoch": 1.2771331058020479, "grad_norm": 3.316533088684082, "learning_rate": 0.0005742889647326508, "loss": 6.9465, "step": 3742 }, { "epoch": 1.2774744027303755, "grad_norm": 3.470165967941284, "learning_rate": 0.0005741751990898749, "loss": 5.991, "step": 3743 }, { "epoch": 1.2778156996587031, "grad_norm": 3.250302314758301, "learning_rate": 0.0005740614334470989, "loss": 6.1206, "step": 3744 }, { "epoch": 1.2781569965870307, "grad_norm": 3.4087443351745605, "learning_rate": 0.000573947667804323, "loss": 6.4992, "step": 3745 }, { "epoch": 1.2784982935153584, "grad_norm": 3.2577805519104004, "learning_rate": 0.0005738339021615472, "loss": 6.3207, "step": 3746 }, { "epoch": 1.278839590443686, "grad_norm": 4.152851104736328, "learning_rate": 0.0005737201365187713, "loss": 4.8242, "step": 3747 }, { "epoch": 1.2791808873720136, "grad_norm": 3.181995391845703, "learning_rate": 0.0005736063708759954, "loss": 6.548, "step": 3748 }, { "epoch": 1.2795221843003413, "grad_norm": 3.4868032932281494, "learning_rate": 0.0005734926052332195, "loss": 6.2143, "step": 3749 }, { "epoch": 1.2798634812286689, "grad_norm": 3.12666392326355, "learning_rate": 0.0005733788395904436, "loss": 6.9013, "step": 3750 }, { "epoch": 1.2802047781569965, "grad_norm": 3.1087594032287598, "learning_rate": 0.0005732650739476678, "loss": 6.8698, "step": 3751 }, { "epoch": 1.2805460750853244, "grad_norm": 3.2161049842834473, "learning_rate": 0.000573151308304892, "loss": 6.4872, "step": 3752 }, { "epoch": 1.2808873720136518, "grad_norm": 3.140566110610962, "learning_rate": 0.0005730375426621161, "loss": 6.7521, "step": 3753 }, { "epoch": 1.2812286689419796, "grad_norm": 3.375011920928955, "learning_rate": 0.0005729237770193402, "loss": 6.361, "step": 3754 }, { "epoch": 1.2815699658703072, "grad_norm": 3.8893067836761475, "learning_rate": 0.0005728100113765643, "loss": 6.0318, "step": 3755 }, { "epoch": 1.2819112627986349, "grad_norm": 3.1433284282684326, "learning_rate": 0.0005726962457337884, "loss": 6.0137, "step": 3756 }, { "epoch": 1.2822525597269625, "grad_norm": 3.2328553199768066, "learning_rate": 0.0005725824800910126, "loss": 6.3901, "step": 3757 }, { "epoch": 1.2825938566552901, "grad_norm": 3.0348122119903564, "learning_rate": 0.0005724687144482367, "loss": 6.2821, "step": 3758 }, { "epoch": 1.2829351535836178, "grad_norm": 3.163795232772827, "learning_rate": 0.0005723549488054608, "loss": 6.2773, "step": 3759 }, { "epoch": 1.2832764505119454, "grad_norm": 3.1185197830200195, "learning_rate": 0.0005722411831626849, "loss": 6.575, "step": 3760 }, { "epoch": 1.283617747440273, "grad_norm": 4.635683536529541, "learning_rate": 0.000572127417519909, "loss": 5.0669, "step": 3761 }, { "epoch": 1.2839590443686006, "grad_norm": 5.593306541442871, "learning_rate": 0.0005720136518771331, "loss": 5.0333, "step": 3762 }, { "epoch": 1.2843003412969283, "grad_norm": 3.199862241744995, "learning_rate": 0.0005718998862343572, "loss": 6.5938, "step": 3763 }, { "epoch": 1.284641638225256, "grad_norm": 3.554161787033081, "learning_rate": 0.0005717861205915813, "loss": 6.2362, "step": 3764 }, { "epoch": 1.2849829351535837, "grad_norm": 3.397326946258545, "learning_rate": 0.0005716723549488054, "loss": 6.0153, "step": 3765 }, { "epoch": 1.2853242320819112, "grad_norm": 3.148334264755249, "learning_rate": 0.0005715585893060295, "loss": 6.2978, "step": 3766 }, { "epoch": 1.285665529010239, "grad_norm": 4.568434715270996, "learning_rate": 0.0005714448236632536, "loss": 5.762, "step": 3767 }, { "epoch": 1.2860068259385666, "grad_norm": 3.284611225128174, "learning_rate": 0.0005713310580204778, "loss": 6.6268, "step": 3768 }, { "epoch": 1.2863481228668943, "grad_norm": 14.985135078430176, "learning_rate": 0.000571217292377702, "loss": 4.1334, "step": 3769 }, { "epoch": 1.286689419795222, "grad_norm": 3.237342119216919, "learning_rate": 0.0005711035267349261, "loss": 6.3474, "step": 3770 }, { "epoch": 1.2870307167235495, "grad_norm": 3.287044048309326, "learning_rate": 0.0005709897610921502, "loss": 6.2234, "step": 3771 }, { "epoch": 1.2873720136518771, "grad_norm": 3.0932626724243164, "learning_rate": 0.0005708759954493743, "loss": 6.8984, "step": 3772 }, { "epoch": 1.2877133105802048, "grad_norm": 4.495637893676758, "learning_rate": 0.0005707622298065984, "loss": 5.0791, "step": 3773 }, { "epoch": 1.2880546075085324, "grad_norm": 3.20120906829834, "learning_rate": 0.0005706484641638225, "loss": 6.4873, "step": 3774 }, { "epoch": 1.28839590443686, "grad_norm": 3.146425485610962, "learning_rate": 0.0005705346985210467, "loss": 6.2309, "step": 3775 }, { "epoch": 1.2887372013651877, "grad_norm": 3.152387857437134, "learning_rate": 0.0005704209328782708, "loss": 6.3663, "step": 3776 }, { "epoch": 1.2890784982935153, "grad_norm": 3.333812952041626, "learning_rate": 0.0005703071672354949, "loss": 6.1972, "step": 3777 }, { "epoch": 1.2894197952218431, "grad_norm": 4.746973037719727, "learning_rate": 0.000570193401592719, "loss": 4.9203, "step": 3778 }, { "epoch": 1.2897610921501705, "grad_norm": 5.360435962677002, "learning_rate": 0.0005700796359499431, "loss": 5.874, "step": 3779 }, { "epoch": 1.2901023890784984, "grad_norm": 3.3603954315185547, "learning_rate": 0.0005699658703071673, "loss": 6.2611, "step": 3780 }, { "epoch": 1.290443686006826, "grad_norm": 3.251047134399414, "learning_rate": 0.0005698521046643914, "loss": 5.6341, "step": 3781 }, { "epoch": 1.2907849829351536, "grad_norm": 3.316932201385498, "learning_rate": 0.0005697383390216155, "loss": 6.9426, "step": 3782 }, { "epoch": 1.2911262798634813, "grad_norm": 5.679823398590088, "learning_rate": 0.0005696245733788395, "loss": 5.4432, "step": 3783 }, { "epoch": 1.291467576791809, "grad_norm": 3.251844644546509, "learning_rate": 0.0005695108077360637, "loss": 6.9916, "step": 3784 }, { "epoch": 1.2918088737201365, "grad_norm": 3.1990809440612793, "learning_rate": 0.0005693970420932878, "loss": 6.6435, "step": 3785 }, { "epoch": 1.2921501706484642, "grad_norm": 3.5617237091064453, "learning_rate": 0.000569283276450512, "loss": 5.8256, "step": 3786 }, { "epoch": 1.2924914675767918, "grad_norm": 3.2828528881073, "learning_rate": 0.0005691695108077361, "loss": 3.0921, "step": 3787 }, { "epoch": 1.2928327645051194, "grad_norm": 4.945265769958496, "learning_rate": 0.0005690557451649602, "loss": 5.9462, "step": 3788 }, { "epoch": 1.293174061433447, "grad_norm": 4.702214241027832, "learning_rate": 0.0005689419795221843, "loss": 5.9888, "step": 3789 }, { "epoch": 1.2935153583617747, "grad_norm": 3.3534915447235107, "learning_rate": 0.0005688282138794084, "loss": 6.2144, "step": 3790 }, { "epoch": 1.2938566552901025, "grad_norm": 3.387922525405884, "learning_rate": 0.0005687144482366325, "loss": 5.9508, "step": 3791 }, { "epoch": 1.29419795221843, "grad_norm": 3.280163049697876, "learning_rate": 0.0005686006825938567, "loss": 7.1366, "step": 3792 }, { "epoch": 1.2945392491467578, "grad_norm": 3.0818960666656494, "learning_rate": 0.0005684869169510808, "loss": 6.249, "step": 3793 }, { "epoch": 1.2948805460750854, "grad_norm": 3.1594650745391846, "learning_rate": 0.0005683731513083049, "loss": 6.8152, "step": 3794 }, { "epoch": 1.295221843003413, "grad_norm": 3.0736000537872314, "learning_rate": 0.000568259385665529, "loss": 6.0188, "step": 3795 }, { "epoch": 1.2955631399317407, "grad_norm": 3.239778757095337, "learning_rate": 0.0005681456200227531, "loss": 6.2566, "step": 3796 }, { "epoch": 1.2959044368600683, "grad_norm": 3.039710521697998, "learning_rate": 0.0005680318543799773, "loss": 6.3336, "step": 3797 }, { "epoch": 1.296245733788396, "grad_norm": 3.1235196590423584, "learning_rate": 0.0005679180887372014, "loss": 6.0182, "step": 3798 }, { "epoch": 1.2965870307167235, "grad_norm": 3.3898894786834717, "learning_rate": 0.0005678043230944255, "loss": 5.9916, "step": 3799 }, { "epoch": 1.2969283276450512, "grad_norm": 3.1873528957366943, "learning_rate": 0.0005676905574516497, "loss": 6.7079, "step": 3800 }, { "epoch": 1.2972696245733788, "grad_norm": 3.133732557296753, "learning_rate": 0.0005675767918088738, "loss": 6.0902, "step": 3801 }, { "epoch": 1.2976109215017064, "grad_norm": 3.195161819458008, "learning_rate": 0.0005674630261660978, "loss": 6.3961, "step": 3802 }, { "epoch": 1.297952218430034, "grad_norm": 3.204354763031006, "learning_rate": 0.000567349260523322, "loss": 5.9973, "step": 3803 }, { "epoch": 1.298293515358362, "grad_norm": 3.1573190689086914, "learning_rate": 0.0005672354948805461, "loss": 6.5401, "step": 3804 }, { "epoch": 1.2986348122866893, "grad_norm": 3.1424853801727295, "learning_rate": 0.0005671217292377702, "loss": 6.5217, "step": 3805 }, { "epoch": 1.2989761092150172, "grad_norm": 3.2337796688079834, "learning_rate": 0.0005670079635949943, "loss": 6.818, "step": 3806 }, { "epoch": 1.2993174061433448, "grad_norm": 3.1818370819091797, "learning_rate": 0.0005668941979522184, "loss": 6.4562, "step": 3807 }, { "epoch": 1.2996587030716724, "grad_norm": 3.1999707221984863, "learning_rate": 0.0005667804323094425, "loss": 6.2445, "step": 3808 }, { "epoch": 1.3, "grad_norm": 3.0630509853363037, "learning_rate": 0.0005666666666666667, "loss": 6.6849, "step": 3809 }, { "epoch": 1.3003412969283277, "grad_norm": 3.165708541870117, "learning_rate": 0.0005665529010238908, "loss": 6.9432, "step": 3810 }, { "epoch": 1.3006825938566553, "grad_norm": 3.325191020965576, "learning_rate": 0.0005664391353811149, "loss": 6.0563, "step": 3811 }, { "epoch": 1.301023890784983, "grad_norm": 3.1484081745147705, "learning_rate": 0.000566325369738339, "loss": 6.8012, "step": 3812 }, { "epoch": 1.3013651877133106, "grad_norm": 3.0226058959960938, "learning_rate": 0.0005662116040955631, "loss": 6.4212, "step": 3813 }, { "epoch": 1.3017064846416382, "grad_norm": 3.7095937728881836, "learning_rate": 0.0005660978384527872, "loss": 5.5472, "step": 3814 }, { "epoch": 1.3020477815699658, "grad_norm": 8.578839302062988, "learning_rate": 0.0005659840728100114, "loss": 6.6667, "step": 3815 }, { "epoch": 1.3023890784982934, "grad_norm": 3.352297067642212, "learning_rate": 0.0005658703071672356, "loss": 6.7404, "step": 3816 }, { "epoch": 1.3027303754266213, "grad_norm": 3.2766904830932617, "learning_rate": 0.0005657565415244597, "loss": 5.9404, "step": 3817 }, { "epoch": 1.3030716723549487, "grad_norm": 3.1568057537078857, "learning_rate": 0.0005656427758816838, "loss": 6.3997, "step": 3818 }, { "epoch": 1.3034129692832765, "grad_norm": 3.079411029815674, "learning_rate": 0.0005655290102389079, "loss": 6.5721, "step": 3819 }, { "epoch": 1.3037542662116042, "grad_norm": 3.156961679458618, "learning_rate": 0.0005654152445961321, "loss": 6.3826, "step": 3820 }, { "epoch": 1.3040955631399318, "grad_norm": 3.102477788925171, "learning_rate": 0.0005653014789533562, "loss": 6.4791, "step": 3821 }, { "epoch": 1.3044368600682594, "grad_norm": 3.097460985183716, "learning_rate": 0.0005651877133105802, "loss": 6.6575, "step": 3822 }, { "epoch": 1.304778156996587, "grad_norm": 3.1785974502563477, "learning_rate": 0.0005650739476678043, "loss": 6.6686, "step": 3823 }, { "epoch": 1.3051194539249147, "grad_norm": 3.1115939617156982, "learning_rate": 0.0005649601820250284, "loss": 6.6035, "step": 3824 }, { "epoch": 1.3054607508532423, "grad_norm": 3.2212889194488525, "learning_rate": 0.0005648464163822525, "loss": 6.1973, "step": 3825 }, { "epoch": 1.30580204778157, "grad_norm": 3.3271737098693848, "learning_rate": 0.0005647326507394767, "loss": 5.9092, "step": 3826 }, { "epoch": 1.3061433447098976, "grad_norm": 3.7578651905059814, "learning_rate": 0.0005646188850967008, "loss": 5.8015, "step": 3827 }, { "epoch": 1.3064846416382252, "grad_norm": 3.2217273712158203, "learning_rate": 0.0005645051194539249, "loss": 6.6484, "step": 3828 }, { "epoch": 1.3068259385665528, "grad_norm": 3.182534694671631, "learning_rate": 0.000564391353811149, "loss": 6.494, "step": 3829 }, { "epoch": 1.3071672354948807, "grad_norm": 3.2115378379821777, "learning_rate": 0.0005642775881683731, "loss": 6.4137, "step": 3830 }, { "epoch": 1.307508532423208, "grad_norm": 3.060603618621826, "learning_rate": 0.0005641638225255972, "loss": 6.485, "step": 3831 }, { "epoch": 1.307849829351536, "grad_norm": 3.5397446155548096, "learning_rate": 0.0005640500568828214, "loss": 6.3132, "step": 3832 }, { "epoch": 1.3081911262798636, "grad_norm": 3.190903663635254, "learning_rate": 0.0005639362912400456, "loss": 5.9605, "step": 3833 }, { "epoch": 1.3085324232081912, "grad_norm": 6.479193210601807, "learning_rate": 0.0005638225255972697, "loss": 5.4088, "step": 3834 }, { "epoch": 1.3088737201365188, "grad_norm": 3.177168846130371, "learning_rate": 0.0005637087599544938, "loss": 6.6025, "step": 3835 }, { "epoch": 1.3092150170648464, "grad_norm": 3.4449946880340576, "learning_rate": 0.0005635949943117179, "loss": 5.3262, "step": 3836 }, { "epoch": 1.309556313993174, "grad_norm": 3.2236359119415283, "learning_rate": 0.0005634812286689421, "loss": 6.2613, "step": 3837 }, { "epoch": 1.3098976109215017, "grad_norm": 3.60327410697937, "learning_rate": 0.0005633674630261662, "loss": 5.2672, "step": 3838 }, { "epoch": 1.3102389078498293, "grad_norm": 2.984083890914917, "learning_rate": 0.0005632536973833903, "loss": 6.2554, "step": 3839 }, { "epoch": 1.310580204778157, "grad_norm": 3.2155168056488037, "learning_rate": 0.0005631399317406144, "loss": 6.54, "step": 3840 }, { "epoch": 1.3109215017064846, "grad_norm": 3.312378168106079, "learning_rate": 0.0005630261660978384, "loss": 6.4537, "step": 3841 }, { "epoch": 1.3112627986348122, "grad_norm": 3.047915458679199, "learning_rate": 0.0005629124004550625, "loss": 6.4776, "step": 3842 }, { "epoch": 1.31160409556314, "grad_norm": 3.2649431228637695, "learning_rate": 0.0005627986348122867, "loss": 6.1978, "step": 3843 }, { "epoch": 1.3119453924914675, "grad_norm": 3.579108238220215, "learning_rate": 0.0005626848691695108, "loss": 6.1248, "step": 3844 }, { "epoch": 1.3122866894197953, "grad_norm": 5.931406497955322, "learning_rate": 0.0005625711035267349, "loss": 5.7591, "step": 3845 }, { "epoch": 1.312627986348123, "grad_norm": 3.2167887687683105, "learning_rate": 0.000562457337883959, "loss": 6.8081, "step": 3846 }, { "epoch": 1.3129692832764506, "grad_norm": 3.454554557800293, "learning_rate": 0.0005623435722411831, "loss": 6.5649, "step": 3847 }, { "epoch": 1.3133105802047782, "grad_norm": 7.931427478790283, "learning_rate": 0.0005622298065984072, "loss": 4.5475, "step": 3848 }, { "epoch": 1.3136518771331058, "grad_norm": 3.3232662677764893, "learning_rate": 0.0005621160409556314, "loss": 6.3771, "step": 3849 }, { "epoch": 1.3139931740614335, "grad_norm": 3.6749861240386963, "learning_rate": 0.0005620022753128556, "loss": 6.2799, "step": 3850 }, { "epoch": 1.314334470989761, "grad_norm": 3.1684978008270264, "learning_rate": 0.0005618885096700797, "loss": 6.4631, "step": 3851 }, { "epoch": 1.3146757679180887, "grad_norm": 3.638622283935547, "learning_rate": 0.0005617747440273038, "loss": 5.7704, "step": 3852 }, { "epoch": 1.3150170648464163, "grad_norm": 3.2697503566741943, "learning_rate": 0.0005616609783845279, "loss": 5.7991, "step": 3853 }, { "epoch": 1.315358361774744, "grad_norm": 3.5689799785614014, "learning_rate": 0.000561547212741752, "loss": 5.2592, "step": 3854 }, { "epoch": 1.3156996587030716, "grad_norm": 3.1521520614624023, "learning_rate": 0.0005614334470989762, "loss": 6.3141, "step": 3855 }, { "epoch": 1.3160409556313994, "grad_norm": 3.137810468673706, "learning_rate": 0.0005613196814562003, "loss": 6.8293, "step": 3856 }, { "epoch": 1.3163822525597269, "grad_norm": 3.1368062496185303, "learning_rate": 0.0005612059158134244, "loss": 6.7367, "step": 3857 }, { "epoch": 1.3167235494880547, "grad_norm": 3.516813278198242, "learning_rate": 0.0005610921501706485, "loss": 6.4741, "step": 3858 }, { "epoch": 1.3170648464163823, "grad_norm": 5.221506118774414, "learning_rate": 0.0005609783845278726, "loss": 5.8297, "step": 3859 }, { "epoch": 1.31740614334471, "grad_norm": 3.2400710582733154, "learning_rate": 0.0005608646188850968, "loss": 7.1234, "step": 3860 }, { "epoch": 1.3177474402730376, "grad_norm": 3.2599074840545654, "learning_rate": 0.0005607508532423208, "loss": 6.3569, "step": 3861 }, { "epoch": 1.3180887372013652, "grad_norm": 3.250030994415283, "learning_rate": 0.0005606370875995449, "loss": 7.1309, "step": 3862 }, { "epoch": 1.3184300341296928, "grad_norm": 3.246612071990967, "learning_rate": 0.000560523321956769, "loss": 6.5661, "step": 3863 }, { "epoch": 1.3187713310580205, "grad_norm": 3.082751989364624, "learning_rate": 0.0005604095563139931, "loss": 6.5423, "step": 3864 }, { "epoch": 1.319112627986348, "grad_norm": 3.3138341903686523, "learning_rate": 0.0005602957906712172, "loss": 6.3662, "step": 3865 }, { "epoch": 1.3194539249146757, "grad_norm": 5.791090488433838, "learning_rate": 0.0005601820250284414, "loss": 5.5428, "step": 3866 }, { "epoch": 1.3197952218430034, "grad_norm": 3.186373710632324, "learning_rate": 0.0005600682593856656, "loss": 6.7147, "step": 3867 }, { "epoch": 1.320136518771331, "grad_norm": 3.214484930038452, "learning_rate": 0.0005599544937428897, "loss": 6.3017, "step": 3868 }, { "epoch": 1.3204778156996588, "grad_norm": 3.255305051803589, "learning_rate": 0.0005598407281001138, "loss": 6.3534, "step": 3869 }, { "epoch": 1.3208191126279862, "grad_norm": 3.318035125732422, "learning_rate": 0.0005597269624573379, "loss": 6.9723, "step": 3870 }, { "epoch": 1.321160409556314, "grad_norm": 3.18615984916687, "learning_rate": 0.000559613196814562, "loss": 6.6132, "step": 3871 }, { "epoch": 1.3215017064846417, "grad_norm": 3.249497652053833, "learning_rate": 0.0005594994311717862, "loss": 6.7708, "step": 3872 }, { "epoch": 1.3218430034129693, "grad_norm": 4.298493385314941, "learning_rate": 0.0005593856655290103, "loss": 6.0935, "step": 3873 }, { "epoch": 1.322184300341297, "grad_norm": 3.8184046745300293, "learning_rate": 0.0005592718998862344, "loss": 5.617, "step": 3874 }, { "epoch": 1.3225255972696246, "grad_norm": 3.3545162677764893, "learning_rate": 0.0005591581342434585, "loss": 6.2788, "step": 3875 }, { "epoch": 1.3228668941979522, "grad_norm": 3.2084295749664307, "learning_rate": 0.0005590443686006826, "loss": 6.5599, "step": 3876 }, { "epoch": 1.3232081911262799, "grad_norm": 3.0472261905670166, "learning_rate": 0.0005589306029579067, "loss": 6.0838, "step": 3877 }, { "epoch": 1.3235494880546075, "grad_norm": 3.785743236541748, "learning_rate": 0.0005588168373151309, "loss": 5.699, "step": 3878 }, { "epoch": 1.323890784982935, "grad_norm": 3.114495038986206, "learning_rate": 0.000558703071672355, "loss": 6.5545, "step": 3879 }, { "epoch": 1.3242320819112627, "grad_norm": 4.037391662597656, "learning_rate": 0.000558589306029579, "loss": 5.8293, "step": 3880 }, { "epoch": 1.3245733788395904, "grad_norm": 3.1285529136657715, "learning_rate": 0.0005584755403868031, "loss": 6.5584, "step": 3881 }, { "epoch": 1.3249146757679182, "grad_norm": 3.292494058609009, "learning_rate": 0.0005583617747440272, "loss": 6.8052, "step": 3882 }, { "epoch": 1.3252559726962456, "grad_norm": 5.072427272796631, "learning_rate": 0.0005582480091012514, "loss": 5.6256, "step": 3883 }, { "epoch": 1.3255972696245735, "grad_norm": 3.1769468784332275, "learning_rate": 0.0005581342434584756, "loss": 6.5001, "step": 3884 }, { "epoch": 1.325938566552901, "grad_norm": 6.59473180770874, "learning_rate": 0.0005580204778156997, "loss": 6.2227, "step": 3885 }, { "epoch": 1.3262798634812287, "grad_norm": 3.345031261444092, "learning_rate": 0.0005579067121729238, "loss": 6.3236, "step": 3886 }, { "epoch": 1.3266211604095564, "grad_norm": 3.1531882286071777, "learning_rate": 0.0005577929465301479, "loss": 6.8264, "step": 3887 }, { "epoch": 1.326962457337884, "grad_norm": 3.17917537689209, "learning_rate": 0.000557679180887372, "loss": 6.032, "step": 3888 }, { "epoch": 1.3273037542662116, "grad_norm": 3.783651351928711, "learning_rate": 0.0005575654152445962, "loss": 5.9765, "step": 3889 }, { "epoch": 1.3276450511945392, "grad_norm": 3.515580892562866, "learning_rate": 0.0005574516496018203, "loss": 5.7462, "step": 3890 }, { "epoch": 1.3279863481228669, "grad_norm": 2.9463393688201904, "learning_rate": 0.0005573378839590444, "loss": 6.4177, "step": 3891 }, { "epoch": 1.3283276450511945, "grad_norm": 3.109710216522217, "learning_rate": 0.0005572241183162685, "loss": 6.3956, "step": 3892 }, { "epoch": 1.3286689419795221, "grad_norm": 3.2260215282440186, "learning_rate": 0.0005571103526734926, "loss": 6.6147, "step": 3893 }, { "epoch": 1.3290102389078498, "grad_norm": 3.0810201168060303, "learning_rate": 0.0005569965870307167, "loss": 6.1781, "step": 3894 }, { "epoch": 1.3293515358361776, "grad_norm": 3.121894598007202, "learning_rate": 0.0005568828213879409, "loss": 6.7156, "step": 3895 }, { "epoch": 1.329692832764505, "grad_norm": 3.2574121952056885, "learning_rate": 0.000556769055745165, "loss": 6.3006, "step": 3896 }, { "epoch": 1.3300341296928329, "grad_norm": 3.250694990158081, "learning_rate": 0.0005566552901023891, "loss": 6.9496, "step": 3897 }, { "epoch": 1.3303754266211605, "grad_norm": 3.090780258178711, "learning_rate": 0.0005565415244596132, "loss": 6.5721, "step": 3898 }, { "epoch": 1.3307167235494881, "grad_norm": 3.135728120803833, "learning_rate": 0.0005564277588168373, "loss": 7.0426, "step": 3899 }, { "epoch": 1.3310580204778157, "grad_norm": 3.099902391433716, "learning_rate": 0.0005563139931740613, "loss": 6.4846, "step": 3900 }, { "epoch": 1.3313993174061434, "grad_norm": 3.4714698791503906, "learning_rate": 0.0005562002275312856, "loss": 6.1539, "step": 3901 }, { "epoch": 1.331740614334471, "grad_norm": 3.3556177616119385, "learning_rate": 0.0005560864618885097, "loss": 6.3849, "step": 3902 }, { "epoch": 1.3320819112627986, "grad_norm": 3.5183370113372803, "learning_rate": 0.0005559726962457338, "loss": 6.1542, "step": 3903 }, { "epoch": 1.3324232081911263, "grad_norm": 3.0659613609313965, "learning_rate": 0.0005558589306029579, "loss": 6.262, "step": 3904 }, { "epoch": 1.3327645051194539, "grad_norm": 3.1507010459899902, "learning_rate": 0.000555745164960182, "loss": 6.7591, "step": 3905 }, { "epoch": 1.3331058020477815, "grad_norm": 3.488461971282959, "learning_rate": 0.0005556313993174062, "loss": 6.2227, "step": 3906 }, { "epoch": 1.3334470989761091, "grad_norm": 3.122450828552246, "learning_rate": 0.0005555176336746303, "loss": 6.2148, "step": 3907 }, { "epoch": 1.333788395904437, "grad_norm": 3.227902412414551, "learning_rate": 0.0005554038680318544, "loss": 6.7408, "step": 3908 }, { "epoch": 1.3341296928327644, "grad_norm": 3.1958107948303223, "learning_rate": 0.0005552901023890785, "loss": 6.4788, "step": 3909 }, { "epoch": 1.3344709897610922, "grad_norm": 3.2183525562286377, "learning_rate": 0.0005551763367463026, "loss": 6.5644, "step": 3910 }, { "epoch": 1.3348122866894199, "grad_norm": 3.3385112285614014, "learning_rate": 0.0005550625711035267, "loss": 6.0782, "step": 3911 }, { "epoch": 1.3351535836177475, "grad_norm": 4.089425086975098, "learning_rate": 0.0005549488054607509, "loss": 5.6889, "step": 3912 }, { "epoch": 1.3354948805460751, "grad_norm": 3.097321033477783, "learning_rate": 0.000554835039817975, "loss": 6.336, "step": 3913 }, { "epoch": 1.3358361774744028, "grad_norm": 5.853010654449463, "learning_rate": 0.0005547212741751991, "loss": 4.3251, "step": 3914 }, { "epoch": 1.3361774744027304, "grad_norm": 3.582655191421509, "learning_rate": 0.0005546075085324232, "loss": 5.5901, "step": 3915 }, { "epoch": 1.336518771331058, "grad_norm": 3.233471155166626, "learning_rate": 0.0005544937428896473, "loss": 6.5578, "step": 3916 }, { "epoch": 1.3368600682593856, "grad_norm": 3.3025286197662354, "learning_rate": 0.0005543799772468715, "loss": 6.0288, "step": 3917 }, { "epoch": 1.3372013651877133, "grad_norm": 3.4599759578704834, "learning_rate": 0.0005542662116040957, "loss": 6.4112, "step": 3918 }, { "epoch": 1.337542662116041, "grad_norm": 3.0111966133117676, "learning_rate": 0.0005541524459613197, "loss": 6.3284, "step": 3919 }, { "epoch": 1.3378839590443685, "grad_norm": 4.267691135406494, "learning_rate": 0.0005540386803185438, "loss": 6.3674, "step": 3920 }, { "epoch": 1.3382252559726964, "grad_norm": 3.2080326080322266, "learning_rate": 0.0005539249146757679, "loss": 6.5919, "step": 3921 }, { "epoch": 1.3385665529010238, "grad_norm": 3.23378849029541, "learning_rate": 0.000553811149032992, "loss": 6.1681, "step": 3922 }, { "epoch": 1.3389078498293516, "grad_norm": 3.0529868602752686, "learning_rate": 0.0005536973833902162, "loss": 6.6733, "step": 3923 }, { "epoch": 1.3392491467576793, "grad_norm": 4.308516502380371, "learning_rate": 0.0005535836177474403, "loss": 5.9069, "step": 3924 }, { "epoch": 1.3395904436860069, "grad_norm": 6.184211254119873, "learning_rate": 0.0005534698521046644, "loss": 5.3009, "step": 3925 }, { "epoch": 1.3399317406143345, "grad_norm": 3.4031319618225098, "learning_rate": 0.0005533560864618885, "loss": 6.694, "step": 3926 }, { "epoch": 1.3402730375426621, "grad_norm": 3.15828275680542, "learning_rate": 0.0005532423208191126, "loss": 6.8263, "step": 3927 }, { "epoch": 1.3406143344709898, "grad_norm": 3.1163811683654785, "learning_rate": 0.0005531285551763367, "loss": 6.3808, "step": 3928 }, { "epoch": 1.3409556313993174, "grad_norm": 3.1072165966033936, "learning_rate": 0.0005530147895335609, "loss": 6.4251, "step": 3929 }, { "epoch": 1.341296928327645, "grad_norm": 3.2711892127990723, "learning_rate": 0.000552901023890785, "loss": 6.1466, "step": 3930 }, { "epoch": 1.3416382252559726, "grad_norm": 3.1679635047912598, "learning_rate": 0.0005527872582480091, "loss": 6.3643, "step": 3931 }, { "epoch": 1.3419795221843003, "grad_norm": 3.1142637729644775, "learning_rate": 0.0005526734926052332, "loss": 6.8845, "step": 3932 }, { "epoch": 1.342320819112628, "grad_norm": 3.082751989364624, "learning_rate": 0.0005525597269624573, "loss": 6.1942, "step": 3933 }, { "epoch": 1.3426621160409558, "grad_norm": 3.139395236968994, "learning_rate": 0.0005524459613196815, "loss": 6.6958, "step": 3934 }, { "epoch": 1.3430034129692832, "grad_norm": 3.3839261531829834, "learning_rate": 0.0005523321956769057, "loss": 5.6164, "step": 3935 }, { "epoch": 1.343344709897611, "grad_norm": 3.2381277084350586, "learning_rate": 0.0005522184300341298, "loss": 6.8543, "step": 3936 }, { "epoch": 1.3436860068259386, "grad_norm": 3.236882209777832, "learning_rate": 0.0005521046643913539, "loss": 6.2873, "step": 3937 }, { "epoch": 1.3440273037542663, "grad_norm": 3.085549831390381, "learning_rate": 0.0005519908987485779, "loss": 6.6844, "step": 3938 }, { "epoch": 1.344368600682594, "grad_norm": 3.2600576877593994, "learning_rate": 0.000551877133105802, "loss": 6.4399, "step": 3939 }, { "epoch": 1.3447098976109215, "grad_norm": 3.097806215286255, "learning_rate": 0.0005517633674630261, "loss": 6.825, "step": 3940 }, { "epoch": 1.3450511945392492, "grad_norm": 3.1946775913238525, "learning_rate": 0.0005516496018202503, "loss": 6.6171, "step": 3941 }, { "epoch": 1.3453924914675768, "grad_norm": 3.3030920028686523, "learning_rate": 0.0005515358361774744, "loss": 6.3477, "step": 3942 }, { "epoch": 1.3457337883959044, "grad_norm": 3.1355345249176025, "learning_rate": 0.0005514220705346985, "loss": 6.8327, "step": 3943 }, { "epoch": 1.346075085324232, "grad_norm": 3.1044278144836426, "learning_rate": 0.0005513083048919226, "loss": 6.3775, "step": 3944 }, { "epoch": 1.3464163822525597, "grad_norm": 3.12194561958313, "learning_rate": 0.0005511945392491467, "loss": 6.2563, "step": 3945 }, { "epoch": 1.3467576791808873, "grad_norm": 3.1160433292388916, "learning_rate": 0.0005510807736063709, "loss": 6.0824, "step": 3946 }, { "epoch": 1.3470989761092151, "grad_norm": 3.5935933589935303, "learning_rate": 0.000550967007963595, "loss": 3.7834, "step": 3947 }, { "epoch": 1.3474402730375425, "grad_norm": 3.4486262798309326, "learning_rate": 0.0005508532423208191, "loss": 6.0705, "step": 3948 }, { "epoch": 1.3477815699658704, "grad_norm": 3.192941427230835, "learning_rate": 0.0005507394766780432, "loss": 5.8078, "step": 3949 }, { "epoch": 1.348122866894198, "grad_norm": 8.985308647155762, "learning_rate": 0.0005506257110352673, "loss": 5.2075, "step": 3950 }, { "epoch": 1.3484641638225257, "grad_norm": 4.667975902557373, "learning_rate": 0.0005505119453924915, "loss": 6.0374, "step": 3951 }, { "epoch": 1.3488054607508533, "grad_norm": 3.4033608436584473, "learning_rate": 0.0005503981797497157, "loss": 6.1368, "step": 3952 }, { "epoch": 1.349146757679181, "grad_norm": 3.455477237701416, "learning_rate": 0.0005502844141069398, "loss": 6.2675, "step": 3953 }, { "epoch": 1.3494880546075085, "grad_norm": 3.2633755207061768, "learning_rate": 0.0005501706484641639, "loss": 6.7096, "step": 3954 }, { "epoch": 1.3498293515358362, "grad_norm": 4.205435752868652, "learning_rate": 0.000550056882821388, "loss": 5.0339, "step": 3955 }, { "epoch": 1.3501706484641638, "grad_norm": 3.1582601070404053, "learning_rate": 0.0005499431171786121, "loss": 7.1195, "step": 3956 }, { "epoch": 1.3505119453924914, "grad_norm": 3.342383623123169, "learning_rate": 0.0005498293515358362, "loss": 5.5482, "step": 3957 }, { "epoch": 1.350853242320819, "grad_norm": 3.2618634700775146, "learning_rate": 0.0005497155858930603, "loss": 6.5156, "step": 3958 }, { "epoch": 1.3511945392491467, "grad_norm": 3.0584144592285156, "learning_rate": 0.0005496018202502844, "loss": 6.5958, "step": 3959 }, { "epoch": 1.3515358361774745, "grad_norm": 3.212893009185791, "learning_rate": 0.0005494880546075085, "loss": 6.7449, "step": 3960 }, { "epoch": 1.351877133105802, "grad_norm": 3.140817403793335, "learning_rate": 0.0005493742889647326, "loss": 6.134, "step": 3961 }, { "epoch": 1.3522184300341298, "grad_norm": 3.27764630317688, "learning_rate": 0.0005492605233219567, "loss": 5.9015, "step": 3962 }, { "epoch": 1.3525597269624574, "grad_norm": 3.1260316371917725, "learning_rate": 0.0005491467576791809, "loss": 6.9168, "step": 3963 }, { "epoch": 1.352901023890785, "grad_norm": 4.186235427856445, "learning_rate": 0.000549032992036405, "loss": 5.7906, "step": 3964 }, { "epoch": 1.3532423208191127, "grad_norm": 3.5168609619140625, "learning_rate": 0.0005489192263936291, "loss": 6.4136, "step": 3965 }, { "epoch": 1.3535836177474403, "grad_norm": 3.287663698196411, "learning_rate": 0.0005488054607508532, "loss": 6.072, "step": 3966 }, { "epoch": 1.353924914675768, "grad_norm": 3.4218294620513916, "learning_rate": 0.0005486916951080773, "loss": 5.65, "step": 3967 }, { "epoch": 1.3542662116040955, "grad_norm": 3.3212225437164307, "learning_rate": 0.0005485779294653015, "loss": 6.4668, "step": 3968 }, { "epoch": 1.3546075085324232, "grad_norm": 3.127525568008423, "learning_rate": 0.0005484641638225257, "loss": 6.4388, "step": 3969 }, { "epoch": 1.3549488054607508, "grad_norm": 3.166355848312378, "learning_rate": 0.0005483503981797498, "loss": 6.3717, "step": 3970 }, { "epoch": 1.3552901023890784, "grad_norm": 3.090057134628296, "learning_rate": 0.0005482366325369739, "loss": 5.9435, "step": 3971 }, { "epoch": 1.355631399317406, "grad_norm": 3.0684850215911865, "learning_rate": 0.000548122866894198, "loss": 7.0271, "step": 3972 }, { "epoch": 1.355972696245734, "grad_norm": 3.6041576862335205, "learning_rate": 0.0005480091012514221, "loss": 6.0391, "step": 3973 }, { "epoch": 1.3563139931740613, "grad_norm": 5.369800567626953, "learning_rate": 0.0005478953356086462, "loss": 6.1618, "step": 3974 }, { "epoch": 1.3566552901023892, "grad_norm": 3.1870033740997314, "learning_rate": 0.0005477815699658704, "loss": 6.782, "step": 3975 }, { "epoch": 1.3569965870307168, "grad_norm": 3.168116331100464, "learning_rate": 0.0005476678043230945, "loss": 6.3682, "step": 3976 }, { "epoch": 1.3573378839590444, "grad_norm": 3.2741923332214355, "learning_rate": 0.0005475540386803185, "loss": 6.0431, "step": 3977 }, { "epoch": 1.357679180887372, "grad_norm": 3.190312147140503, "learning_rate": 0.0005474402730375426, "loss": 6.2224, "step": 3978 }, { "epoch": 1.3580204778156997, "grad_norm": 3.3220231533050537, "learning_rate": 0.0005473265073947667, "loss": 6.61, "step": 3979 }, { "epoch": 1.3583617747440273, "grad_norm": 3.0520358085632324, "learning_rate": 0.0005472127417519908, "loss": 6.6153, "step": 3980 }, { "epoch": 1.358703071672355, "grad_norm": 3.942625045776367, "learning_rate": 0.000547098976109215, "loss": 4.871, "step": 3981 }, { "epoch": 1.3590443686006826, "grad_norm": 3.148261785507202, "learning_rate": 0.0005469852104664391, "loss": 6.5981, "step": 3982 }, { "epoch": 1.3593856655290102, "grad_norm": 3.1184489727020264, "learning_rate": 0.0005468714448236632, "loss": 6.0445, "step": 3983 }, { "epoch": 1.3597269624573378, "grad_norm": 3.2603631019592285, "learning_rate": 0.0005467576791808873, "loss": 6.847, "step": 3984 }, { "epoch": 1.3600682593856654, "grad_norm": 3.4152493476867676, "learning_rate": 0.0005466439135381115, "loss": 5.5912, "step": 3985 }, { "epoch": 1.3604095563139933, "grad_norm": 3.3140194416046143, "learning_rate": 0.0005465301478953357, "loss": 6.3923, "step": 3986 }, { "epoch": 1.3607508532423207, "grad_norm": 3.526538610458374, "learning_rate": 0.0005464163822525598, "loss": 5.259, "step": 3987 }, { "epoch": 1.3610921501706486, "grad_norm": 3.1631855964660645, "learning_rate": 0.0005463026166097839, "loss": 6.5663, "step": 3988 }, { "epoch": 1.3614334470989762, "grad_norm": 3.11613392829895, "learning_rate": 0.000546188850967008, "loss": 6.3917, "step": 3989 }, { "epoch": 1.3617747440273038, "grad_norm": 3.1377980709075928, "learning_rate": 0.0005460750853242321, "loss": 6.2215, "step": 3990 }, { "epoch": 1.3621160409556314, "grad_norm": 2.9950690269470215, "learning_rate": 0.0005459613196814562, "loss": 6.8778, "step": 3991 }, { "epoch": 1.362457337883959, "grad_norm": 3.1760122776031494, "learning_rate": 0.0005458475540386804, "loss": 6.2066, "step": 3992 }, { "epoch": 1.3627986348122867, "grad_norm": 3.045168399810791, "learning_rate": 0.0005457337883959045, "loss": 6.3082, "step": 3993 }, { "epoch": 1.3631399317406143, "grad_norm": 3.054022789001465, "learning_rate": 0.0005456200227531286, "loss": 6.0026, "step": 3994 }, { "epoch": 1.363481228668942, "grad_norm": 3.007483959197998, "learning_rate": 0.0005455062571103527, "loss": 6.052, "step": 3995 }, { "epoch": 1.3638225255972696, "grad_norm": 3.831319570541382, "learning_rate": 0.0005453924914675768, "loss": 5.1814, "step": 3996 }, { "epoch": 1.3641638225255972, "grad_norm": 3.610045909881592, "learning_rate": 0.0005452787258248008, "loss": 6.2811, "step": 3997 }, { "epoch": 1.3645051194539248, "grad_norm": 3.2371370792388916, "learning_rate": 0.000545164960182025, "loss": 6.8801, "step": 3998 }, { "epoch": 1.3648464163822527, "grad_norm": 3.0516843795776367, "learning_rate": 0.0005450511945392491, "loss": 6.3365, "step": 3999 }, { "epoch": 1.36518771331058, "grad_norm": 3.2358148097991943, "learning_rate": 0.0005449374288964732, "loss": 6.708, "step": 4000 }, { "epoch": 1.365529010238908, "grad_norm": 3.3484854698181152, "learning_rate": 0.0005448236632536974, "loss": 5.9241, "step": 4001 }, { "epoch": 1.3658703071672356, "grad_norm": 3.215952157974243, "learning_rate": 0.0005447098976109215, "loss": 6.2136, "step": 4002 }, { "epoch": 1.3662116040955632, "grad_norm": 3.1921606063842773, "learning_rate": 0.0005445961319681456, "loss": 6.4693, "step": 4003 }, { "epoch": 1.3665529010238908, "grad_norm": 3.2230448722839355, "learning_rate": 0.0005444823663253698, "loss": 6.3591, "step": 4004 }, { "epoch": 1.3668941979522184, "grad_norm": 3.30001163482666, "learning_rate": 0.0005443686006825939, "loss": 7.2393, "step": 4005 }, { "epoch": 1.367235494880546, "grad_norm": 3.1565656661987305, "learning_rate": 0.000544254835039818, "loss": 6.7251, "step": 4006 }, { "epoch": 1.3675767918088737, "grad_norm": 2.9923689365386963, "learning_rate": 0.0005441410693970421, "loss": 6.4636, "step": 4007 }, { "epoch": 1.3679180887372013, "grad_norm": 3.189656972885132, "learning_rate": 0.0005440273037542662, "loss": 6.586, "step": 4008 }, { "epoch": 1.368259385665529, "grad_norm": 4.6809234619140625, "learning_rate": 0.0005439135381114904, "loss": 5.4901, "step": 4009 }, { "epoch": 1.3686006825938566, "grad_norm": 3.192366600036621, "learning_rate": 0.0005437997724687145, "loss": 6.0856, "step": 4010 }, { "epoch": 1.3689419795221842, "grad_norm": 3.0873725414276123, "learning_rate": 0.0005436860068259386, "loss": 6.8565, "step": 4011 }, { "epoch": 1.369283276450512, "grad_norm": 3.1170427799224854, "learning_rate": 0.0005435722411831627, "loss": 6.371, "step": 4012 }, { "epoch": 1.3696245733788395, "grad_norm": 3.2047770023345947, "learning_rate": 0.0005434584755403868, "loss": 6.8304, "step": 4013 }, { "epoch": 1.3699658703071673, "grad_norm": 3.132357120513916, "learning_rate": 0.0005433447098976109, "loss": 6.4195, "step": 4014 }, { "epoch": 1.370307167235495, "grad_norm": 3.173907518386841, "learning_rate": 0.0005432309442548351, "loss": 6.4185, "step": 4015 }, { "epoch": 1.3706484641638226, "grad_norm": 5.250078201293945, "learning_rate": 0.0005431171786120591, "loss": 5.0679, "step": 4016 }, { "epoch": 1.3709897610921502, "grad_norm": 3.5316922664642334, "learning_rate": 0.0005430034129692832, "loss": 6.0273, "step": 4017 }, { "epoch": 1.3713310580204778, "grad_norm": 3.2254114151000977, "learning_rate": 0.0005428896473265074, "loss": 6.6384, "step": 4018 }, { "epoch": 1.3716723549488055, "grad_norm": 3.242680788040161, "learning_rate": 0.0005427758816837315, "loss": 6.5512, "step": 4019 }, { "epoch": 1.372013651877133, "grad_norm": 3.2910420894622803, "learning_rate": 0.0005426621160409556, "loss": 6.7562, "step": 4020 }, { "epoch": 1.3723549488054607, "grad_norm": 3.184915065765381, "learning_rate": 0.0005425483503981798, "loss": 6.5688, "step": 4021 }, { "epoch": 1.3726962457337883, "grad_norm": 3.098815679550171, "learning_rate": 0.0005424345847554039, "loss": 6.6371, "step": 4022 }, { "epoch": 1.373037542662116, "grad_norm": 3.283712863922119, "learning_rate": 0.000542320819112628, "loss": 6.0917, "step": 4023 }, { "epoch": 1.3733788395904436, "grad_norm": 3.190316677093506, "learning_rate": 0.0005422070534698521, "loss": 6.1911, "step": 4024 }, { "epoch": 1.3737201365187715, "grad_norm": 3.122403621673584, "learning_rate": 0.0005420932878270762, "loss": 6.3405, "step": 4025 }, { "epoch": 1.3740614334470989, "grad_norm": 3.1362032890319824, "learning_rate": 0.0005419795221843004, "loss": 6.6867, "step": 4026 }, { "epoch": 1.3744027303754267, "grad_norm": 3.2245562076568604, "learning_rate": 0.0005418657565415245, "loss": 6.2402, "step": 4027 }, { "epoch": 1.3747440273037543, "grad_norm": 3.087644338607788, "learning_rate": 0.0005417519908987486, "loss": 6.16, "step": 4028 }, { "epoch": 1.375085324232082, "grad_norm": 3.1691482067108154, "learning_rate": 0.0005416382252559727, "loss": 6.5541, "step": 4029 }, { "epoch": 1.3754266211604096, "grad_norm": 3.053196668624878, "learning_rate": 0.0005415244596131968, "loss": 6.7324, "step": 4030 }, { "epoch": 1.3757679180887372, "grad_norm": 4.789836406707764, "learning_rate": 0.0005414106939704209, "loss": 5.2476, "step": 4031 }, { "epoch": 1.3761092150170648, "grad_norm": 3.1950435638427734, "learning_rate": 0.0005412969283276451, "loss": 6.7076, "step": 4032 }, { "epoch": 1.3764505119453925, "grad_norm": 3.2419111728668213, "learning_rate": 0.0005411831626848693, "loss": 6.1823, "step": 4033 }, { "epoch": 1.37679180887372, "grad_norm": 7.173503398895264, "learning_rate": 0.0005410693970420934, "loss": 4.7265, "step": 4034 }, { "epoch": 1.3771331058020477, "grad_norm": 3.251122236251831, "learning_rate": 0.0005409556313993175, "loss": 6.432, "step": 4035 }, { "epoch": 1.3774744027303754, "grad_norm": 3.205535888671875, "learning_rate": 0.0005408418657565415, "loss": 6.6821, "step": 4036 }, { "epoch": 1.377815699658703, "grad_norm": 3.3682541847229004, "learning_rate": 0.0005407281001137656, "loss": 6.1631, "step": 4037 }, { "epoch": 1.3781569965870308, "grad_norm": 3.0801639556884766, "learning_rate": 0.0005406143344709898, "loss": 6.1151, "step": 4038 }, { "epoch": 1.3784982935153582, "grad_norm": 3.063805341720581, "learning_rate": 0.0005405005688282139, "loss": 6.5842, "step": 4039 }, { "epoch": 1.378839590443686, "grad_norm": 2.9392287731170654, "learning_rate": 0.000540386803185438, "loss": 6.3864, "step": 4040 }, { "epoch": 1.3791808873720137, "grad_norm": 3.018649101257324, "learning_rate": 0.0005402730375426621, "loss": 6.542, "step": 4041 }, { "epoch": 1.3795221843003413, "grad_norm": 4.661662578582764, "learning_rate": 0.0005401592718998862, "loss": 5.7853, "step": 4042 }, { "epoch": 1.379863481228669, "grad_norm": 3.264328956604004, "learning_rate": 0.0005400455062571103, "loss": 6.3248, "step": 4043 }, { "epoch": 1.3802047781569966, "grad_norm": 3.175499200820923, "learning_rate": 0.0005399317406143345, "loss": 6.5147, "step": 4044 }, { "epoch": 1.3805460750853242, "grad_norm": 3.1350579261779785, "learning_rate": 0.0005398179749715586, "loss": 6.9947, "step": 4045 }, { "epoch": 1.3808873720136519, "grad_norm": 3.96781063079834, "learning_rate": 0.0005397042093287827, "loss": 5.874, "step": 4046 }, { "epoch": 1.3812286689419795, "grad_norm": 3.2071373462677, "learning_rate": 0.0005395904436860068, "loss": 6.668, "step": 4047 }, { "epoch": 1.3815699658703071, "grad_norm": 3.1385679244995117, "learning_rate": 0.0005394766780432309, "loss": 6.7825, "step": 4048 }, { "epoch": 1.3819112627986347, "grad_norm": 3.573509931564331, "learning_rate": 0.0005393629124004551, "loss": 5.5573, "step": 4049 }, { "epoch": 1.3822525597269624, "grad_norm": 3.066441059112549, "learning_rate": 0.0005392491467576793, "loss": 6.6349, "step": 4050 }, { "epoch": 1.3825938566552902, "grad_norm": 2.976698160171509, "learning_rate": 0.0005391353811149034, "loss": 6.6275, "step": 4051 }, { "epoch": 1.3829351535836176, "grad_norm": 3.050534963607788, "learning_rate": 0.0005390216154721275, "loss": 6.8268, "step": 4052 }, { "epoch": 1.3832764505119455, "grad_norm": 3.335890769958496, "learning_rate": 0.0005389078498293516, "loss": 6.1537, "step": 4053 }, { "epoch": 1.383617747440273, "grad_norm": 3.080980062484741, "learning_rate": 0.0005387940841865757, "loss": 6.7967, "step": 4054 }, { "epoch": 1.3839590443686007, "grad_norm": 3.0135996341705322, "learning_rate": 0.0005386803185437998, "loss": 7.1126, "step": 4055 }, { "epoch": 1.3843003412969284, "grad_norm": 3.079207420349121, "learning_rate": 0.0005385665529010239, "loss": 6.7536, "step": 4056 }, { "epoch": 1.384641638225256, "grad_norm": 3.1604158878326416, "learning_rate": 0.000538452787258248, "loss": 6.2925, "step": 4057 }, { "epoch": 1.3849829351535836, "grad_norm": 3.069831371307373, "learning_rate": 0.0005383390216154721, "loss": 6.494, "step": 4058 }, { "epoch": 1.3853242320819112, "grad_norm": 3.445303201675415, "learning_rate": 0.0005382252559726962, "loss": 6.0185, "step": 4059 }, { "epoch": 1.3856655290102389, "grad_norm": 3.107506513595581, "learning_rate": 0.0005381114903299203, "loss": 6.3259, "step": 4060 }, { "epoch": 1.3860068259385665, "grad_norm": 3.251415729522705, "learning_rate": 0.0005379977246871445, "loss": 6.1274, "step": 4061 }, { "epoch": 1.3863481228668941, "grad_norm": 3.4389657974243164, "learning_rate": 0.0005378839590443686, "loss": 5.9044, "step": 4062 }, { "epoch": 1.3866894197952218, "grad_norm": 2.960808515548706, "learning_rate": 0.0005377701934015927, "loss": 6.5314, "step": 4063 }, { "epoch": 1.3870307167235496, "grad_norm": 3.1747775077819824, "learning_rate": 0.0005376564277588168, "loss": 6.3545, "step": 4064 }, { "epoch": 1.387372013651877, "grad_norm": 3.289687156677246, "learning_rate": 0.0005375426621160409, "loss": 6.4242, "step": 4065 }, { "epoch": 1.3877133105802049, "grad_norm": 3.1048786640167236, "learning_rate": 0.0005374288964732651, "loss": 6.6646, "step": 4066 }, { "epoch": 1.3880546075085325, "grad_norm": 3.1970558166503906, "learning_rate": 0.0005373151308304893, "loss": 6.5832, "step": 4067 }, { "epoch": 1.3883959044368601, "grad_norm": 4.1045637130737305, "learning_rate": 0.0005372013651877134, "loss": 5.232, "step": 4068 }, { "epoch": 1.3887372013651877, "grad_norm": 3.392815589904785, "learning_rate": 0.0005370875995449375, "loss": 5.7788, "step": 4069 }, { "epoch": 1.3890784982935154, "grad_norm": 3.2663235664367676, "learning_rate": 0.0005369738339021616, "loss": 6.5488, "step": 4070 }, { "epoch": 1.389419795221843, "grad_norm": 4.128077030181885, "learning_rate": 0.0005368600682593857, "loss": 5.8966, "step": 4071 }, { "epoch": 1.3897610921501706, "grad_norm": 3.2271320819854736, "learning_rate": 0.0005367463026166099, "loss": 6.8354, "step": 4072 }, { "epoch": 1.3901023890784983, "grad_norm": 3.334517478942871, "learning_rate": 0.000536632536973834, "loss": 4.8601, "step": 4073 }, { "epoch": 1.3904436860068259, "grad_norm": 3.136608362197876, "learning_rate": 0.0005365187713310581, "loss": 6.6863, "step": 4074 }, { "epoch": 1.3907849829351535, "grad_norm": 3.104353904724121, "learning_rate": 0.0005364050056882821, "loss": 6.5178, "step": 4075 }, { "epoch": 1.3911262798634811, "grad_norm": 3.1452524662017822, "learning_rate": 0.0005362912400455062, "loss": 6.2725, "step": 4076 }, { "epoch": 1.391467576791809, "grad_norm": 3.041658639907837, "learning_rate": 0.0005361774744027303, "loss": 6.231, "step": 4077 }, { "epoch": 1.3918088737201364, "grad_norm": 3.1051037311553955, "learning_rate": 0.0005360637087599545, "loss": 6.0589, "step": 4078 }, { "epoch": 1.3921501706484642, "grad_norm": 3.173142433166504, "learning_rate": 0.0005359499431171786, "loss": 6.2059, "step": 4079 }, { "epoch": 1.3924914675767919, "grad_norm": 3.0535824298858643, "learning_rate": 0.0005358361774744027, "loss": 6.2619, "step": 4080 }, { "epoch": 1.3928327645051195, "grad_norm": 3.0652310848236084, "learning_rate": 0.0005357224118316268, "loss": 6.6454, "step": 4081 }, { "epoch": 1.3931740614334471, "grad_norm": 3.147127151489258, "learning_rate": 0.0005356086461888509, "loss": 6.4706, "step": 4082 }, { "epoch": 1.3935153583617748, "grad_norm": 3.9887466430664062, "learning_rate": 0.000535494880546075, "loss": 5.9159, "step": 4083 }, { "epoch": 1.3938566552901024, "grad_norm": 3.139221429824829, "learning_rate": 0.0005353811149032993, "loss": 6.4576, "step": 4084 }, { "epoch": 1.39419795221843, "grad_norm": 3.5257785320281982, "learning_rate": 0.0005352673492605234, "loss": 5.7365, "step": 4085 }, { "epoch": 1.3945392491467576, "grad_norm": 3.062695026397705, "learning_rate": 0.0005351535836177475, "loss": 6.4737, "step": 4086 }, { "epoch": 1.3948805460750853, "grad_norm": 3.23443603515625, "learning_rate": 0.0005350398179749716, "loss": 5.8095, "step": 4087 }, { "epoch": 1.395221843003413, "grad_norm": 3.249667167663574, "learning_rate": 0.0005349260523321957, "loss": 6.3928, "step": 4088 }, { "epoch": 1.3955631399317405, "grad_norm": 3.0842514038085938, "learning_rate": 0.0005348122866894199, "loss": 6.2861, "step": 4089 }, { "epoch": 1.3959044368600684, "grad_norm": 3.2530109882354736, "learning_rate": 0.000534698521046644, "loss": 5.9651, "step": 4090 }, { "epoch": 1.3962457337883958, "grad_norm": 4.654758930206299, "learning_rate": 0.0005345847554038681, "loss": 5.5245, "step": 4091 }, { "epoch": 1.3965870307167236, "grad_norm": 3.233858108520508, "learning_rate": 0.0005344709897610922, "loss": 6.6915, "step": 4092 }, { "epoch": 1.3969283276450513, "grad_norm": 3.159165382385254, "learning_rate": 0.0005343572241183163, "loss": 6.3194, "step": 4093 }, { "epoch": 1.3972696245733789, "grad_norm": 3.0182223320007324, "learning_rate": 0.0005342434584755403, "loss": 6.0745, "step": 4094 }, { "epoch": 1.3976109215017065, "grad_norm": 3.3083250522613525, "learning_rate": 0.0005341296928327645, "loss": 5.7519, "step": 4095 }, { "epoch": 1.3979522184300341, "grad_norm": 3.4524002075195312, "learning_rate": 0.0005340159271899886, "loss": 5.9577, "step": 4096 }, { "epoch": 1.3982935153583618, "grad_norm": 3.095432996749878, "learning_rate": 0.0005339021615472127, "loss": 6.4214, "step": 4097 }, { "epoch": 1.3986348122866894, "grad_norm": 3.4075958728790283, "learning_rate": 0.0005337883959044368, "loss": 5.6942, "step": 4098 }, { "epoch": 1.398976109215017, "grad_norm": 4.778470993041992, "learning_rate": 0.0005336746302616609, "loss": 5.1441, "step": 4099 }, { "epoch": 1.3993174061433447, "grad_norm": 4.083403587341309, "learning_rate": 0.000533560864618885, "loss": 5.8321, "step": 4100 }, { "epoch": 1.3996587030716723, "grad_norm": 3.242581367492676, "learning_rate": 0.0005334470989761093, "loss": 6.1174, "step": 4101 }, { "epoch": 1.4, "grad_norm": 3.0662283897399902, "learning_rate": 0.0005333333333333334, "loss": 6.7307, "step": 4102 }, { "epoch": 1.4003412969283278, "grad_norm": 3.119428873062134, "learning_rate": 0.0005332195676905575, "loss": 6.5688, "step": 4103 }, { "epoch": 1.4006825938566552, "grad_norm": 3.014010190963745, "learning_rate": 0.0005331058020477816, "loss": 6.5248, "step": 4104 }, { "epoch": 1.401023890784983, "grad_norm": 3.5993144512176514, "learning_rate": 0.0005329920364050057, "loss": 5.533, "step": 4105 }, { "epoch": 1.4013651877133106, "grad_norm": 3.0127501487731934, "learning_rate": 0.0005328782707622299, "loss": 6.3895, "step": 4106 }, { "epoch": 1.4017064846416383, "grad_norm": 3.770829916000366, "learning_rate": 0.000532764505119454, "loss": 5.6792, "step": 4107 }, { "epoch": 1.402047781569966, "grad_norm": 3.10686993598938, "learning_rate": 0.0005326507394766781, "loss": 6.5603, "step": 4108 }, { "epoch": 1.4023890784982935, "grad_norm": 3.173125982284546, "learning_rate": 0.0005325369738339022, "loss": 5.9965, "step": 4109 }, { "epoch": 1.4027303754266212, "grad_norm": 3.075287103652954, "learning_rate": 0.0005324232081911263, "loss": 6.0827, "step": 4110 }, { "epoch": 1.4030716723549488, "grad_norm": 3.0550026893615723, "learning_rate": 0.0005323094425483504, "loss": 6.2533, "step": 4111 }, { "epoch": 1.4034129692832764, "grad_norm": 4.698912620544434, "learning_rate": 0.0005321956769055746, "loss": 4.8645, "step": 4112 }, { "epoch": 1.403754266211604, "grad_norm": 3.12372088432312, "learning_rate": 0.0005320819112627987, "loss": 6.4869, "step": 4113 }, { "epoch": 1.4040955631399317, "grad_norm": 3.2088253498077393, "learning_rate": 0.0005319681456200227, "loss": 6.1782, "step": 4114 }, { "epoch": 1.4044368600682593, "grad_norm": 3.946972370147705, "learning_rate": 0.0005318543799772468, "loss": 5.4858, "step": 4115 }, { "epoch": 1.4047781569965871, "grad_norm": 3.758061408996582, "learning_rate": 0.0005317406143344709, "loss": 5.5092, "step": 4116 }, { "epoch": 1.4051194539249146, "grad_norm": 3.1338324546813965, "learning_rate": 0.000531626848691695, "loss": 6.4477, "step": 4117 }, { "epoch": 1.4054607508532424, "grad_norm": 3.124582529067993, "learning_rate": 0.0005315130830489193, "loss": 5.2994, "step": 4118 }, { "epoch": 1.40580204778157, "grad_norm": 3.2185561656951904, "learning_rate": 0.0005313993174061434, "loss": 6.29, "step": 4119 }, { "epoch": 1.4061433447098977, "grad_norm": 3.439194440841675, "learning_rate": 0.0005312855517633675, "loss": 5.8294, "step": 4120 }, { "epoch": 1.4064846416382253, "grad_norm": 3.0680735111236572, "learning_rate": 0.0005311717861205916, "loss": 6.3791, "step": 4121 }, { "epoch": 1.406825938566553, "grad_norm": 3.082590341567993, "learning_rate": 0.0005310580204778157, "loss": 6.5443, "step": 4122 }, { "epoch": 1.4071672354948805, "grad_norm": 3.0105934143066406, "learning_rate": 0.0005309442548350398, "loss": 6.4141, "step": 4123 }, { "epoch": 1.4075085324232082, "grad_norm": 3.0490424633026123, "learning_rate": 0.000530830489192264, "loss": 6.5894, "step": 4124 }, { "epoch": 1.4078498293515358, "grad_norm": 3.1783745288848877, "learning_rate": 0.0005307167235494881, "loss": 6.3531, "step": 4125 }, { "epoch": 1.4081911262798634, "grad_norm": 3.387235641479492, "learning_rate": 0.0005306029579067122, "loss": 6.4013, "step": 4126 }, { "epoch": 1.408532423208191, "grad_norm": 3.4885988235473633, "learning_rate": 0.0005304891922639363, "loss": 6.4457, "step": 4127 }, { "epoch": 1.4088737201365187, "grad_norm": 3.323430061340332, "learning_rate": 0.0005303754266211604, "loss": 6.1437, "step": 4128 }, { "epoch": 1.4092150170648465, "grad_norm": 5.294995307922363, "learning_rate": 0.0005302616609783846, "loss": 5.1343, "step": 4129 }, { "epoch": 1.409556313993174, "grad_norm": 3.099463701248169, "learning_rate": 0.0005301478953356087, "loss": 6.3936, "step": 4130 }, { "epoch": 1.4098976109215018, "grad_norm": 3.1338493824005127, "learning_rate": 0.0005300341296928328, "loss": 6.8859, "step": 4131 }, { "epoch": 1.4102389078498294, "grad_norm": 3.034945011138916, "learning_rate": 0.0005299203640500569, "loss": 6.1265, "step": 4132 }, { "epoch": 1.410580204778157, "grad_norm": 3.033184051513672, "learning_rate": 0.0005298065984072809, "loss": 6.3814, "step": 4133 }, { "epoch": 1.4109215017064847, "grad_norm": 2.992227554321289, "learning_rate": 0.000529692832764505, "loss": 6.4773, "step": 4134 }, { "epoch": 1.4112627986348123, "grad_norm": 3.4906859397888184, "learning_rate": 0.0005295790671217293, "loss": 5.8243, "step": 4135 }, { "epoch": 1.41160409556314, "grad_norm": 3.140052318572998, "learning_rate": 0.0005294653014789534, "loss": 6.3378, "step": 4136 }, { "epoch": 1.4119453924914676, "grad_norm": 4.089855670928955, "learning_rate": 0.0005293515358361775, "loss": 6.1393, "step": 4137 }, { "epoch": 1.4122866894197952, "grad_norm": 3.1651060581207275, "learning_rate": 0.0005292377701934016, "loss": 6.5695, "step": 4138 }, { "epoch": 1.4126279863481228, "grad_norm": 3.1677775382995605, "learning_rate": 0.0005291240045506257, "loss": 6.1942, "step": 4139 }, { "epoch": 1.4129692832764504, "grad_norm": 3.1922414302825928, "learning_rate": 0.0005290102389078498, "loss": 6.1814, "step": 4140 }, { "epoch": 1.413310580204778, "grad_norm": 3.341269016265869, "learning_rate": 0.000528896473265074, "loss": 6.1189, "step": 4141 }, { "epoch": 1.413651877133106, "grad_norm": 3.1737465858459473, "learning_rate": 0.0005287827076222981, "loss": 6.324, "step": 4142 }, { "epoch": 1.4139931740614333, "grad_norm": 3.0902111530303955, "learning_rate": 0.0005286689419795222, "loss": 6.0682, "step": 4143 }, { "epoch": 1.4143344709897612, "grad_norm": 3.198076009750366, "learning_rate": 0.0005285551763367463, "loss": 6.6576, "step": 4144 }, { "epoch": 1.4146757679180888, "grad_norm": 3.349595785140991, "learning_rate": 0.0005284414106939704, "loss": 5.8567, "step": 4145 }, { "epoch": 1.4150170648464164, "grad_norm": 3.0596208572387695, "learning_rate": 0.0005283276450511945, "loss": 6.6378, "step": 4146 }, { "epoch": 1.415358361774744, "grad_norm": 3.092496633529663, "learning_rate": 0.0005282138794084187, "loss": 6.8541, "step": 4147 }, { "epoch": 1.4156996587030717, "grad_norm": 3.092621088027954, "learning_rate": 0.0005281001137656428, "loss": 6.2547, "step": 4148 }, { "epoch": 1.4160409556313993, "grad_norm": 3.077143907546997, "learning_rate": 0.0005279863481228669, "loss": 6.9397, "step": 4149 }, { "epoch": 1.416382252559727, "grad_norm": 3.1491472721099854, "learning_rate": 0.000527872582480091, "loss": 6.3133, "step": 4150 }, { "epoch": 1.4167235494880546, "grad_norm": 3.1234889030456543, "learning_rate": 0.0005277588168373152, "loss": 6.5563, "step": 4151 }, { "epoch": 1.4170648464163822, "grad_norm": 3.086249828338623, "learning_rate": 0.0005276450511945393, "loss": 6.5361, "step": 4152 }, { "epoch": 1.4174061433447098, "grad_norm": 3.6380252838134766, "learning_rate": 0.0005275312855517634, "loss": 6.0914, "step": 4153 }, { "epoch": 1.4177474402730375, "grad_norm": 4.482765197753906, "learning_rate": 0.0005274175199089875, "loss": 6.6235, "step": 4154 }, { "epoch": 1.4180887372013653, "grad_norm": 3.4601078033447266, "learning_rate": 0.0005273037542662116, "loss": 6.413, "step": 4155 }, { "epoch": 1.4184300341296927, "grad_norm": 3.319906234741211, "learning_rate": 0.0005271899886234357, "loss": 6.8646, "step": 4156 }, { "epoch": 1.4187713310580206, "grad_norm": 3.219625473022461, "learning_rate": 0.0005270762229806598, "loss": 6.1368, "step": 4157 }, { "epoch": 1.4191126279863482, "grad_norm": 3.033139228820801, "learning_rate": 0.000526962457337884, "loss": 6.9841, "step": 4158 }, { "epoch": 1.4194539249146758, "grad_norm": 3.063420534133911, "learning_rate": 0.0005268486916951081, "loss": 6.2634, "step": 4159 }, { "epoch": 1.4197952218430034, "grad_norm": 6.298565864562988, "learning_rate": 0.0005267349260523322, "loss": 5.7468, "step": 4160 }, { "epoch": 1.420136518771331, "grad_norm": 3.0811851024627686, "learning_rate": 0.0005266211604095563, "loss": 6.4445, "step": 4161 }, { "epoch": 1.4204778156996587, "grad_norm": 3.210052967071533, "learning_rate": 0.0005265073947667804, "loss": 6.1846, "step": 4162 }, { "epoch": 1.4208191126279863, "grad_norm": 3.0224571228027344, "learning_rate": 0.0005263936291240045, "loss": 6.5175, "step": 4163 }, { "epoch": 1.421160409556314, "grad_norm": 3.2952568531036377, "learning_rate": 0.0005262798634812287, "loss": 6.3716, "step": 4164 }, { "epoch": 1.4215017064846416, "grad_norm": 3.3877005577087402, "learning_rate": 0.0005261660978384528, "loss": 6.2331, "step": 4165 }, { "epoch": 1.4218430034129692, "grad_norm": 3.012554168701172, "learning_rate": 0.0005260523321956769, "loss": 6.0445, "step": 4166 }, { "epoch": 1.4221843003412968, "grad_norm": 3.1453211307525635, "learning_rate": 0.000525938566552901, "loss": 6.6895, "step": 4167 }, { "epoch": 1.4225255972696247, "grad_norm": 3.146012783050537, "learning_rate": 0.0005258248009101252, "loss": 6.581, "step": 4168 }, { "epoch": 1.422866894197952, "grad_norm": 3.0287742614746094, "learning_rate": 0.0005257110352673494, "loss": 6.2703, "step": 4169 }, { "epoch": 1.42320819112628, "grad_norm": 3.0194754600524902, "learning_rate": 0.0005255972696245735, "loss": 6.4683, "step": 4170 }, { "epoch": 1.4235494880546076, "grad_norm": 2.9725260734558105, "learning_rate": 0.0005254835039817976, "loss": 6.3189, "step": 4171 }, { "epoch": 1.4238907849829352, "grad_norm": 3.101661443710327, "learning_rate": 0.0005253697383390216, "loss": 6.1797, "step": 4172 }, { "epoch": 1.4242320819112628, "grad_norm": 3.5125575065612793, "learning_rate": 0.0005252559726962457, "loss": 6.1972, "step": 4173 }, { "epoch": 1.4245733788395905, "grad_norm": 3.2034642696380615, "learning_rate": 0.0005251422070534698, "loss": 6.4367, "step": 4174 }, { "epoch": 1.424914675767918, "grad_norm": 3.173069715499878, "learning_rate": 0.000525028441410694, "loss": 6.1895, "step": 4175 }, { "epoch": 1.4252559726962457, "grad_norm": 3.3157718181610107, "learning_rate": 0.0005249146757679181, "loss": 5.2822, "step": 4176 }, { "epoch": 1.4255972696245733, "grad_norm": 3.15852952003479, "learning_rate": 0.0005248009101251422, "loss": 6.4959, "step": 4177 }, { "epoch": 1.425938566552901, "grad_norm": 2.8751838207244873, "learning_rate": 0.0005246871444823663, "loss": 4.8944, "step": 4178 }, { "epoch": 1.4262798634812286, "grad_norm": 3.131141185760498, "learning_rate": 0.0005245733788395904, "loss": 6.5915, "step": 4179 }, { "epoch": 1.4266211604095562, "grad_norm": 3.1316215991973877, "learning_rate": 0.0005244596131968145, "loss": 5.7723, "step": 4180 }, { "epoch": 1.426962457337884, "grad_norm": 4.314980506896973, "learning_rate": 0.0005243458475540387, "loss": 5.9181, "step": 4181 }, { "epoch": 1.4273037542662115, "grad_norm": 3.1601321697235107, "learning_rate": 0.0005242320819112628, "loss": 6.1492, "step": 4182 }, { "epoch": 1.4276450511945393, "grad_norm": 3.1197149753570557, "learning_rate": 0.0005241183162684869, "loss": 6.6196, "step": 4183 }, { "epoch": 1.427986348122867, "grad_norm": 3.109541893005371, "learning_rate": 0.000524004550625711, "loss": 6.5163, "step": 4184 }, { "epoch": 1.4283276450511946, "grad_norm": 3.131387233734131, "learning_rate": 0.0005238907849829352, "loss": 6.6058, "step": 4185 }, { "epoch": 1.4286689419795222, "grad_norm": 4.881186485290527, "learning_rate": 0.0005237770193401593, "loss": 6.105, "step": 4186 }, { "epoch": 1.4290102389078498, "grad_norm": 3.3822786808013916, "learning_rate": 0.0005236632536973835, "loss": 6.0654, "step": 4187 }, { "epoch": 1.4293515358361775, "grad_norm": 3.2397496700286865, "learning_rate": 0.0005235494880546076, "loss": 6.7056, "step": 4188 }, { "epoch": 1.429692832764505, "grad_norm": 3.1808483600616455, "learning_rate": 0.0005234357224118317, "loss": 6.3953, "step": 4189 }, { "epoch": 1.4300341296928327, "grad_norm": 3.1761183738708496, "learning_rate": 0.0005233219567690558, "loss": 6.3699, "step": 4190 }, { "epoch": 1.4303754266211604, "grad_norm": 3.019099712371826, "learning_rate": 0.0005232081911262798, "loss": 6.266, "step": 4191 }, { "epoch": 1.430716723549488, "grad_norm": 3.0634679794311523, "learning_rate": 0.000523094425483504, "loss": 6.5473, "step": 4192 }, { "epoch": 1.4310580204778156, "grad_norm": 3.0792815685272217, "learning_rate": 0.0005229806598407281, "loss": 6.3998, "step": 4193 }, { "epoch": 1.4313993174061435, "grad_norm": 3.048142433166504, "learning_rate": 0.0005228668941979522, "loss": 6.0785, "step": 4194 }, { "epoch": 1.4317406143344709, "grad_norm": 3.5605955123901367, "learning_rate": 0.0005227531285551763, "loss": 6.2169, "step": 4195 }, { "epoch": 1.4320819112627987, "grad_norm": 3.1062545776367188, "learning_rate": 0.0005226393629124004, "loss": 6.2116, "step": 4196 }, { "epoch": 1.4324232081911263, "grad_norm": 3.5202982425689697, "learning_rate": 0.0005225255972696245, "loss": 6.6248, "step": 4197 }, { "epoch": 1.432764505119454, "grad_norm": 4.942377090454102, "learning_rate": 0.0005224118316268487, "loss": 4.8293, "step": 4198 }, { "epoch": 1.4331058020477816, "grad_norm": 3.237367868423462, "learning_rate": 0.0005222980659840728, "loss": 6.9193, "step": 4199 }, { "epoch": 1.4334470989761092, "grad_norm": 3.33652400970459, "learning_rate": 0.0005221843003412969, "loss": 5.7304, "step": 4200 }, { "epoch": 1.4337883959044369, "grad_norm": 4.465225696563721, "learning_rate": 0.000522070534698521, "loss": 5.5101, "step": 4201 }, { "epoch": 1.4341296928327645, "grad_norm": 3.111236572265625, "learning_rate": 0.0005219567690557452, "loss": 6.8656, "step": 4202 }, { "epoch": 1.434470989761092, "grad_norm": 3.2016916275024414, "learning_rate": 0.0005218430034129693, "loss": 6.7554, "step": 4203 }, { "epoch": 1.4348122866894197, "grad_norm": 3.0454368591308594, "learning_rate": 0.0005217292377701935, "loss": 6.2635, "step": 4204 }, { "epoch": 1.4351535836177474, "grad_norm": 3.07362699508667, "learning_rate": 0.0005216154721274176, "loss": 5.867, "step": 4205 }, { "epoch": 1.435494880546075, "grad_norm": 3.0416789054870605, "learning_rate": 0.0005215017064846417, "loss": 6.4948, "step": 4206 }, { "epoch": 1.4358361774744028, "grad_norm": 3.082453966140747, "learning_rate": 0.0005213879408418658, "loss": 6.4393, "step": 4207 }, { "epoch": 1.4361774744027302, "grad_norm": 4.051252365112305, "learning_rate": 0.0005212741751990899, "loss": 5.6161, "step": 4208 }, { "epoch": 1.436518771331058, "grad_norm": 3.2135181427001953, "learning_rate": 0.0005211604095563141, "loss": 6.2619, "step": 4209 }, { "epoch": 1.4368600682593857, "grad_norm": 4.387260437011719, "learning_rate": 0.0005210466439135382, "loss": 5.5295, "step": 4210 }, { "epoch": 1.4372013651877134, "grad_norm": 3.195601463317871, "learning_rate": 0.0005209328782707622, "loss": 6.3014, "step": 4211 }, { "epoch": 1.437542662116041, "grad_norm": 3.236250877380371, "learning_rate": 0.0005208191126279863, "loss": 6.6634, "step": 4212 }, { "epoch": 1.4378839590443686, "grad_norm": 3.2227210998535156, "learning_rate": 0.0005207053469852104, "loss": 6.0613, "step": 4213 }, { "epoch": 1.4382252559726962, "grad_norm": 4.377707004547119, "learning_rate": 0.0005205915813424345, "loss": 4.7205, "step": 4214 }, { "epoch": 1.4385665529010239, "grad_norm": 3.1697402000427246, "learning_rate": 0.0005204778156996587, "loss": 6.6822, "step": 4215 }, { "epoch": 1.4389078498293515, "grad_norm": 3.424391746520996, "learning_rate": 0.0005203640500568828, "loss": 6.3304, "step": 4216 }, { "epoch": 1.4392491467576791, "grad_norm": 3.166321039199829, "learning_rate": 0.0005202502844141069, "loss": 6.5789, "step": 4217 }, { "epoch": 1.4395904436860067, "grad_norm": 3.058227062225342, "learning_rate": 0.000520136518771331, "loss": 6.3097, "step": 4218 }, { "epoch": 1.4399317406143344, "grad_norm": 3.0803213119506836, "learning_rate": 0.0005200227531285552, "loss": 6.195, "step": 4219 }, { "epoch": 1.4402730375426622, "grad_norm": 3.0819287300109863, "learning_rate": 0.0005199089874857793, "loss": 6.3407, "step": 4220 }, { "epoch": 1.4406143344709896, "grad_norm": 3.058324098587036, "learning_rate": 0.0005197952218430035, "loss": 6.1638, "step": 4221 }, { "epoch": 1.4409556313993175, "grad_norm": 3.040989398956299, "learning_rate": 0.0005196814562002276, "loss": 6.2791, "step": 4222 }, { "epoch": 1.441296928327645, "grad_norm": 3.0801877975463867, "learning_rate": 0.0005195676905574517, "loss": 5.9373, "step": 4223 }, { "epoch": 1.4416382252559727, "grad_norm": 3.13028883934021, "learning_rate": 0.0005194539249146758, "loss": 5.9899, "step": 4224 }, { "epoch": 1.4419795221843004, "grad_norm": 3.0730881690979004, "learning_rate": 0.0005193401592718999, "loss": 6.0214, "step": 4225 }, { "epoch": 1.442320819112628, "grad_norm": 3.320467233657837, "learning_rate": 0.000519226393629124, "loss": 6.4929, "step": 4226 }, { "epoch": 1.4426621160409556, "grad_norm": 3.381704330444336, "learning_rate": 0.0005191126279863482, "loss": 5.9312, "step": 4227 }, { "epoch": 1.4430034129692833, "grad_norm": 3.1299855709075928, "learning_rate": 0.0005189988623435723, "loss": 6.446, "step": 4228 }, { "epoch": 1.4433447098976109, "grad_norm": 3.0571303367614746, "learning_rate": 0.0005188850967007964, "loss": 6.3526, "step": 4229 }, { "epoch": 1.4436860068259385, "grad_norm": 3.1851227283477783, "learning_rate": 0.0005187713310580204, "loss": 6.7403, "step": 4230 }, { "epoch": 1.4440273037542661, "grad_norm": 3.1886208057403564, "learning_rate": 0.0005186575654152445, "loss": 6.0976, "step": 4231 }, { "epoch": 1.4443686006825938, "grad_norm": 3.0047943592071533, "learning_rate": 0.0005185437997724687, "loss": 6.605, "step": 4232 }, { "epoch": 1.4447098976109216, "grad_norm": 3.09120512008667, "learning_rate": 0.0005184300341296928, "loss": 6.6422, "step": 4233 }, { "epoch": 1.445051194539249, "grad_norm": 3.228661060333252, "learning_rate": 0.000518316268486917, "loss": 6.1845, "step": 4234 }, { "epoch": 1.4453924914675769, "grad_norm": 3.080799102783203, "learning_rate": 0.000518202502844141, "loss": 6.8722, "step": 4235 }, { "epoch": 1.4457337883959045, "grad_norm": 3.116177558898926, "learning_rate": 0.0005180887372013652, "loss": 6.7637, "step": 4236 }, { "epoch": 1.4460750853242321, "grad_norm": 3.183600664138794, "learning_rate": 0.0005179749715585893, "loss": 5.8418, "step": 4237 }, { "epoch": 1.4464163822525598, "grad_norm": 3.1986725330352783, "learning_rate": 0.0005178612059158135, "loss": 5.7188, "step": 4238 }, { "epoch": 1.4467576791808874, "grad_norm": 3.249572277069092, "learning_rate": 0.0005177474402730376, "loss": 5.8931, "step": 4239 }, { "epoch": 1.447098976109215, "grad_norm": 3.0727765560150146, "learning_rate": 0.0005176336746302617, "loss": 6.6941, "step": 4240 }, { "epoch": 1.4474402730375426, "grad_norm": 2.9626481533050537, "learning_rate": 0.0005175199089874858, "loss": 5.5147, "step": 4241 }, { "epoch": 1.4477815699658703, "grad_norm": 2.4201412200927734, "learning_rate": 0.0005174061433447099, "loss": 3.1755, "step": 4242 }, { "epoch": 1.448122866894198, "grad_norm": 3.06439471244812, "learning_rate": 0.000517292377701934, "loss": 6.101, "step": 4243 }, { "epoch": 1.4484641638225255, "grad_norm": 3.1331796646118164, "learning_rate": 0.0005171786120591582, "loss": 6.3183, "step": 4244 }, { "epoch": 1.4488054607508531, "grad_norm": 3.0797486305236816, "learning_rate": 0.0005170648464163823, "loss": 6.3455, "step": 4245 }, { "epoch": 1.449146757679181, "grad_norm": 3.1885828971862793, "learning_rate": 0.0005169510807736064, "loss": 6.6001, "step": 4246 }, { "epoch": 1.4494880546075084, "grad_norm": 4.609078884124756, "learning_rate": 0.0005168373151308305, "loss": 4.6183, "step": 4247 }, { "epoch": 1.4498293515358363, "grad_norm": 3.357383966445923, "learning_rate": 0.0005167235494880546, "loss": 6.4766, "step": 4248 }, { "epoch": 1.4501706484641639, "grad_norm": 3.172637701034546, "learning_rate": 0.0005166097838452787, "loss": 6.3503, "step": 4249 }, { "epoch": 1.4505119453924915, "grad_norm": 3.2504348754882812, "learning_rate": 0.0005164960182025028, "loss": 6.4823, "step": 4250 }, { "epoch": 1.4508532423208191, "grad_norm": 3.318784475326538, "learning_rate": 0.000516382252559727, "loss": 5.2902, "step": 4251 }, { "epoch": 1.4511945392491468, "grad_norm": 3.166903257369995, "learning_rate": 0.000516268486916951, "loss": 7.0076, "step": 4252 }, { "epoch": 1.4515358361774744, "grad_norm": 3.29073166847229, "learning_rate": 0.0005161547212741752, "loss": 5.809, "step": 4253 }, { "epoch": 1.451877133105802, "grad_norm": 3.2139933109283447, "learning_rate": 0.0005160409556313993, "loss": 6.6659, "step": 4254 }, { "epoch": 1.4522184300341296, "grad_norm": 2.99873685836792, "learning_rate": 0.0005159271899886235, "loss": 6.3525, "step": 4255 }, { "epoch": 1.4525597269624573, "grad_norm": 3.2787253856658936, "learning_rate": 0.0005158134243458476, "loss": 5.645, "step": 4256 }, { "epoch": 1.452901023890785, "grad_norm": 3.140230178833008, "learning_rate": 0.0005156996587030717, "loss": 6.1587, "step": 4257 }, { "epoch": 1.4532423208191125, "grad_norm": 4.204946994781494, "learning_rate": 0.0005155858930602958, "loss": 5.4848, "step": 4258 }, { "epoch": 1.4535836177474404, "grad_norm": 3.3353021144866943, "learning_rate": 0.0005154721274175199, "loss": 6.1801, "step": 4259 }, { "epoch": 1.4539249146757678, "grad_norm": 3.392322540283203, "learning_rate": 0.000515358361774744, "loss": 6.8188, "step": 4260 }, { "epoch": 1.4542662116040956, "grad_norm": 3.1940486431121826, "learning_rate": 0.0005152445961319682, "loss": 7.0783, "step": 4261 }, { "epoch": 1.4546075085324233, "grad_norm": 3.1675353050231934, "learning_rate": 0.0005151308304891923, "loss": 6.6465, "step": 4262 }, { "epoch": 1.454948805460751, "grad_norm": 3.268160581588745, "learning_rate": 0.0005150170648464164, "loss": 6.48, "step": 4263 }, { "epoch": 1.4552901023890785, "grad_norm": 3.4621543884277344, "learning_rate": 0.0005149032992036405, "loss": 6.2458, "step": 4264 }, { "epoch": 1.4556313993174061, "grad_norm": 3.037792682647705, "learning_rate": 0.0005147895335608646, "loss": 6.4756, "step": 4265 }, { "epoch": 1.4559726962457338, "grad_norm": 3.1558916568756104, "learning_rate": 0.0005146757679180887, "loss": 6.99, "step": 4266 }, { "epoch": 1.4563139931740614, "grad_norm": 3.1241016387939453, "learning_rate": 0.000514562002275313, "loss": 6.639, "step": 4267 }, { "epoch": 1.456655290102389, "grad_norm": 3.117509603500366, "learning_rate": 0.0005144482366325371, "loss": 6.2899, "step": 4268 }, { "epoch": 1.4569965870307167, "grad_norm": 3.071044683456421, "learning_rate": 0.000514334470989761, "loss": 6.1585, "step": 4269 }, { "epoch": 1.4573378839590443, "grad_norm": 3.1628692150115967, "learning_rate": 0.0005142207053469852, "loss": 6.4385, "step": 4270 }, { "epoch": 1.457679180887372, "grad_norm": 5.46381950378418, "learning_rate": 0.0005141069397042093, "loss": 4.9057, "step": 4271 }, { "epoch": 1.4580204778156998, "grad_norm": 3.0911929607391357, "learning_rate": 0.0005139931740614334, "loss": 6.5145, "step": 4272 }, { "epoch": 1.4583617747440272, "grad_norm": 3.1896402835845947, "learning_rate": 0.0005138794084186576, "loss": 5.9364, "step": 4273 }, { "epoch": 1.458703071672355, "grad_norm": 3.1629385948181152, "learning_rate": 0.0005137656427758817, "loss": 5.7098, "step": 4274 }, { "epoch": 1.4590443686006827, "grad_norm": 3.908473014831543, "learning_rate": 0.0005136518771331058, "loss": 6.1301, "step": 4275 }, { "epoch": 1.4593856655290103, "grad_norm": 3.4416491985321045, "learning_rate": 0.0005135381114903299, "loss": 5.7183, "step": 4276 }, { "epoch": 1.459726962457338, "grad_norm": 3.129523992538452, "learning_rate": 0.000513424345847554, "loss": 6.4804, "step": 4277 }, { "epoch": 1.4600682593856655, "grad_norm": 3.913393020629883, "learning_rate": 0.0005133105802047782, "loss": 5.0513, "step": 4278 }, { "epoch": 1.4604095563139932, "grad_norm": 3.2042019367218018, "learning_rate": 0.0005131968145620023, "loss": 5.7857, "step": 4279 }, { "epoch": 1.4607508532423208, "grad_norm": 3.098621368408203, "learning_rate": 0.0005130830489192264, "loss": 6.7836, "step": 4280 }, { "epoch": 1.4610921501706484, "grad_norm": 3.1765682697296143, "learning_rate": 0.0005129692832764505, "loss": 6.9352, "step": 4281 }, { "epoch": 1.461433447098976, "grad_norm": 3.0995981693267822, "learning_rate": 0.0005128555176336746, "loss": 6.4874, "step": 4282 }, { "epoch": 1.4617747440273037, "grad_norm": 3.3691446781158447, "learning_rate": 0.0005127417519908987, "loss": 6.0037, "step": 4283 }, { "epoch": 1.4621160409556313, "grad_norm": 4.908443450927734, "learning_rate": 0.000512627986348123, "loss": 4.7213, "step": 4284 }, { "epoch": 1.4624573378839592, "grad_norm": 3.1471049785614014, "learning_rate": 0.0005125142207053471, "loss": 6.9427, "step": 4285 }, { "epoch": 1.4627986348122866, "grad_norm": 3.119262456893921, "learning_rate": 0.0005124004550625712, "loss": 6.4854, "step": 4286 }, { "epoch": 1.4631399317406144, "grad_norm": 3.1892127990722656, "learning_rate": 0.0005122866894197953, "loss": 6.2065, "step": 4287 }, { "epoch": 1.463481228668942, "grad_norm": 3.0533523559570312, "learning_rate": 0.0005121729237770194, "loss": 6.6329, "step": 4288 }, { "epoch": 1.4638225255972697, "grad_norm": 4.774298191070557, "learning_rate": 0.0005120591581342434, "loss": 5.365, "step": 4289 }, { "epoch": 1.4641638225255973, "grad_norm": 4.030971050262451, "learning_rate": 0.0005119453924914676, "loss": 5.6187, "step": 4290 }, { "epoch": 1.464505119453925, "grad_norm": 3.2482876777648926, "learning_rate": 0.0005118316268486917, "loss": 6.1619, "step": 4291 }, { "epoch": 1.4648464163822525, "grad_norm": 2.9875893592834473, "learning_rate": 0.0005117178612059158, "loss": 6.4415, "step": 4292 }, { "epoch": 1.4651877133105802, "grad_norm": 4.620938301086426, "learning_rate": 0.0005116040955631399, "loss": 1.5726, "step": 4293 }, { "epoch": 1.4655290102389078, "grad_norm": 3.08268141746521, "learning_rate": 0.000511490329920364, "loss": 6.0083, "step": 4294 }, { "epoch": 1.4658703071672354, "grad_norm": 3.3414804935455322, "learning_rate": 0.0005113765642775882, "loss": 6.0082, "step": 4295 }, { "epoch": 1.466211604095563, "grad_norm": 4.3554792404174805, "learning_rate": 0.0005112627986348123, "loss": 4.8411, "step": 4296 }, { "epoch": 1.4665529010238907, "grad_norm": 3.1311569213867188, "learning_rate": 0.0005111490329920364, "loss": 6.3444, "step": 4297 }, { "epoch": 1.4668941979522185, "grad_norm": 3.0714824199676514, "learning_rate": 0.0005110352673492605, "loss": 6.2691, "step": 4298 }, { "epoch": 1.467235494880546, "grad_norm": 2.927114963531494, "learning_rate": 0.0005109215017064846, "loss": 6.2727, "step": 4299 }, { "epoch": 1.4675767918088738, "grad_norm": 2.885935068130493, "learning_rate": 0.0005108077360637087, "loss": 6.4839, "step": 4300 }, { "epoch": 1.4679180887372014, "grad_norm": 2.985340118408203, "learning_rate": 0.000510693970420933, "loss": 6.5858, "step": 4301 }, { "epoch": 1.468259385665529, "grad_norm": 3.0189003944396973, "learning_rate": 0.0005105802047781571, "loss": 6.4866, "step": 4302 }, { "epoch": 1.4686006825938567, "grad_norm": 3.324467658996582, "learning_rate": 0.0005104664391353812, "loss": 6.1826, "step": 4303 }, { "epoch": 1.4689419795221843, "grad_norm": 3.1696572303771973, "learning_rate": 0.0005103526734926053, "loss": 6.7885, "step": 4304 }, { "epoch": 1.469283276450512, "grad_norm": 3.074852705001831, "learning_rate": 0.0005102389078498294, "loss": 6.6288, "step": 4305 }, { "epoch": 1.4696245733788396, "grad_norm": 2.911682367324829, "learning_rate": 0.0005101251422070535, "loss": 6.4831, "step": 4306 }, { "epoch": 1.4699658703071672, "grad_norm": 3.0853192806243896, "learning_rate": 0.0005100113765642777, "loss": 6.6247, "step": 4307 }, { "epoch": 1.4703071672354948, "grad_norm": 3.0632660388946533, "learning_rate": 0.0005098976109215017, "loss": 6.4772, "step": 4308 }, { "epoch": 1.4706484641638224, "grad_norm": 3.191326379776001, "learning_rate": 0.0005097838452787258, "loss": 6.1043, "step": 4309 }, { "epoch": 1.47098976109215, "grad_norm": 2.7078959941864014, "learning_rate": 0.0005096700796359499, "loss": 3.3701, "step": 4310 }, { "epoch": 1.471331058020478, "grad_norm": 3.1352057456970215, "learning_rate": 0.000509556313993174, "loss": 6.237, "step": 4311 }, { "epoch": 1.4716723549488053, "grad_norm": 3.192700147628784, "learning_rate": 0.0005094425483503981, "loss": 6.6129, "step": 4312 }, { "epoch": 1.4720136518771332, "grad_norm": 2.962541103363037, "learning_rate": 0.0005093287827076223, "loss": 5.8705, "step": 4313 }, { "epoch": 1.4723549488054608, "grad_norm": 3.1734907627105713, "learning_rate": 0.0005092150170648464, "loss": 5.8751, "step": 4314 }, { "epoch": 1.4726962457337884, "grad_norm": 5.9300456047058105, "learning_rate": 0.0005091012514220705, "loss": 5.5791, "step": 4315 }, { "epoch": 1.473037542662116, "grad_norm": 2.9836061000823975, "learning_rate": 0.0005089874857792946, "loss": 6.6064, "step": 4316 }, { "epoch": 1.4733788395904437, "grad_norm": 3.1486117839813232, "learning_rate": 0.0005088737201365187, "loss": 6.6439, "step": 4317 }, { "epoch": 1.4737201365187713, "grad_norm": 3.243680000305176, "learning_rate": 0.000508759954493743, "loss": 6.4473, "step": 4318 }, { "epoch": 1.474061433447099, "grad_norm": 3.0161967277526855, "learning_rate": 0.0005086461888509671, "loss": 6.6844, "step": 4319 }, { "epoch": 1.4744027303754266, "grad_norm": 3.0081169605255127, "learning_rate": 0.0005085324232081912, "loss": 6.537, "step": 4320 }, { "epoch": 1.4747440273037542, "grad_norm": 2.991122245788574, "learning_rate": 0.0005084186575654153, "loss": 6.3377, "step": 4321 }, { "epoch": 1.4750853242320818, "grad_norm": 3.0482964515686035, "learning_rate": 0.0005083048919226394, "loss": 6.6401, "step": 4322 }, { "epoch": 1.4754266211604095, "grad_norm": 4.201125621795654, "learning_rate": 0.0005081911262798635, "loss": 6.4634, "step": 4323 }, { "epoch": 1.4757679180887373, "grad_norm": 3.031925916671753, "learning_rate": 0.0005080773606370877, "loss": 6.3946, "step": 4324 }, { "epoch": 1.4761092150170647, "grad_norm": 3.21101975440979, "learning_rate": 0.0005079635949943118, "loss": 6.258, "step": 4325 }, { "epoch": 1.4764505119453926, "grad_norm": 3.451096534729004, "learning_rate": 0.0005078498293515359, "loss": 5.4511, "step": 4326 }, { "epoch": 1.4767918088737202, "grad_norm": 3.0925445556640625, "learning_rate": 0.0005077360637087599, "loss": 6.7531, "step": 4327 }, { "epoch": 1.4771331058020478, "grad_norm": 3.206573963165283, "learning_rate": 0.000507622298065984, "loss": 6.8944, "step": 4328 }, { "epoch": 1.4774744027303754, "grad_norm": 3.1508631706237793, "learning_rate": 0.0005075085324232081, "loss": 6.8273, "step": 4329 }, { "epoch": 1.477815699658703, "grad_norm": 3.3948967456817627, "learning_rate": 0.0005073947667804323, "loss": 5.824, "step": 4330 }, { "epoch": 1.4781569965870307, "grad_norm": 3.199467182159424, "learning_rate": 0.0005072810011376564, "loss": 6.1532, "step": 4331 }, { "epoch": 1.4784982935153583, "grad_norm": 3.0080020427703857, "learning_rate": 0.0005071672354948805, "loss": 6.5633, "step": 4332 }, { "epoch": 1.478839590443686, "grad_norm": 3.230149030685425, "learning_rate": 0.0005070534698521046, "loss": 6.5214, "step": 4333 }, { "epoch": 1.4791808873720136, "grad_norm": 3.4948723316192627, "learning_rate": 0.0005069397042093287, "loss": 5.9058, "step": 4334 }, { "epoch": 1.4795221843003412, "grad_norm": 3.171333074569702, "learning_rate": 0.000506825938566553, "loss": 6.203, "step": 4335 }, { "epoch": 1.4798634812286688, "grad_norm": 3.0840699672698975, "learning_rate": 0.0005067121729237771, "loss": 6.1753, "step": 4336 }, { "epoch": 1.4802047781569967, "grad_norm": 3.062875747680664, "learning_rate": 0.0005065984072810012, "loss": 6.574, "step": 4337 }, { "epoch": 1.480546075085324, "grad_norm": 5.447630882263184, "learning_rate": 0.0005064846416382253, "loss": 4.2755, "step": 4338 }, { "epoch": 1.480887372013652, "grad_norm": 3.1661298274993896, "learning_rate": 0.0005063708759954494, "loss": 6.4025, "step": 4339 }, { "epoch": 1.4812286689419796, "grad_norm": 3.3090450763702393, "learning_rate": 0.0005062571103526735, "loss": 6.0994, "step": 4340 }, { "epoch": 1.4815699658703072, "grad_norm": 3.69329571723938, "learning_rate": 0.0005061433447098977, "loss": 4.7961, "step": 4341 }, { "epoch": 1.4819112627986348, "grad_norm": 3.3056480884552, "learning_rate": 0.0005060295790671218, "loss": 6.4119, "step": 4342 }, { "epoch": 1.4822525597269625, "grad_norm": 10.146489143371582, "learning_rate": 0.0005059158134243459, "loss": 6.9681, "step": 4343 }, { "epoch": 1.48259385665529, "grad_norm": 3.1964523792266846, "learning_rate": 0.00050580204778157, "loss": 6.9197, "step": 4344 }, { "epoch": 1.4829351535836177, "grad_norm": 3.3806843757629395, "learning_rate": 0.0005056882821387941, "loss": 6.3696, "step": 4345 }, { "epoch": 1.4832764505119453, "grad_norm": 3.172544002532959, "learning_rate": 0.0005055745164960182, "loss": 5.6425, "step": 4346 }, { "epoch": 1.483617747440273, "grad_norm": 3.494082450866699, "learning_rate": 0.0005054607508532423, "loss": 6.5567, "step": 4347 }, { "epoch": 1.4839590443686006, "grad_norm": 3.1376161575317383, "learning_rate": 0.0005053469852104664, "loss": 6.1425, "step": 4348 }, { "epoch": 1.4843003412969282, "grad_norm": 4.658734321594238, "learning_rate": 0.0005052332195676905, "loss": 4.829, "step": 4349 }, { "epoch": 1.484641638225256, "grad_norm": 3.0457332134246826, "learning_rate": 0.0005051194539249146, "loss": 6.6027, "step": 4350 }, { "epoch": 1.4849829351535835, "grad_norm": 3.252115249633789, "learning_rate": 0.0005050056882821387, "loss": 6.4487, "step": 4351 }, { "epoch": 1.4853242320819113, "grad_norm": 3.3878161907196045, "learning_rate": 0.0005048919226393628, "loss": 5.874, "step": 4352 }, { "epoch": 1.485665529010239, "grad_norm": 3.082244873046875, "learning_rate": 0.0005047781569965871, "loss": 6.3979, "step": 4353 }, { "epoch": 1.4860068259385666, "grad_norm": 3.0858993530273438, "learning_rate": 0.0005046643913538112, "loss": 6.2685, "step": 4354 }, { "epoch": 1.4863481228668942, "grad_norm": 3.532917022705078, "learning_rate": 0.0005045506257110353, "loss": 6.3543, "step": 4355 }, { "epoch": 1.4866894197952218, "grad_norm": 3.1342530250549316, "learning_rate": 0.0005044368600682594, "loss": 5.8409, "step": 4356 }, { "epoch": 1.4870307167235495, "grad_norm": 3.64797306060791, "learning_rate": 0.0005043230944254835, "loss": 6.3942, "step": 4357 }, { "epoch": 1.487372013651877, "grad_norm": 4.409688472747803, "learning_rate": 0.0005042093287827077, "loss": 5.6323, "step": 4358 }, { "epoch": 1.4877133105802047, "grad_norm": 3.2418742179870605, "learning_rate": 0.0005040955631399318, "loss": 6.3286, "step": 4359 }, { "epoch": 1.4880546075085324, "grad_norm": 3.112736225128174, "learning_rate": 0.0005039817974971559, "loss": 6.2084, "step": 4360 }, { "epoch": 1.4883959044368602, "grad_norm": 3.1486265659332275, "learning_rate": 0.00050386803185438, "loss": 6.2417, "step": 4361 }, { "epoch": 1.4887372013651876, "grad_norm": 5.536955833435059, "learning_rate": 0.0005037542662116041, "loss": 5.6501, "step": 4362 }, { "epoch": 1.4890784982935155, "grad_norm": 3.1049556732177734, "learning_rate": 0.0005036405005688282, "loss": 6.3247, "step": 4363 }, { "epoch": 1.4894197952218429, "grad_norm": 3.001415252685547, "learning_rate": 0.0005035267349260524, "loss": 6.7637, "step": 4364 }, { "epoch": 1.4897610921501707, "grad_norm": 2.8671603202819824, "learning_rate": 0.0005034129692832765, "loss": 6.3853, "step": 4365 }, { "epoch": 1.4901023890784983, "grad_norm": 4.22214937210083, "learning_rate": 0.0005032992036405005, "loss": 6.064, "step": 4366 }, { "epoch": 1.490443686006826, "grad_norm": 3.110260009765625, "learning_rate": 0.0005031854379977246, "loss": 6.2449, "step": 4367 }, { "epoch": 1.4907849829351536, "grad_norm": 3.0448741912841797, "learning_rate": 0.0005030716723549487, "loss": 6.6455, "step": 4368 }, { "epoch": 1.4911262798634812, "grad_norm": 3.040241003036499, "learning_rate": 0.0005029579067121728, "loss": 6.2379, "step": 4369 }, { "epoch": 1.4914675767918089, "grad_norm": 3.210784912109375, "learning_rate": 0.0005028441410693971, "loss": 5.9151, "step": 4370 }, { "epoch": 1.4918088737201365, "grad_norm": 4.361735820770264, "learning_rate": 0.0005027303754266212, "loss": 5.6326, "step": 4371 }, { "epoch": 1.4921501706484641, "grad_norm": 3.1003541946411133, "learning_rate": 0.0005026166097838453, "loss": 6.7568, "step": 4372 }, { "epoch": 1.4924914675767917, "grad_norm": 4.7033281326293945, "learning_rate": 0.0005025028441410694, "loss": 5.7138, "step": 4373 }, { "epoch": 1.4928327645051196, "grad_norm": 3.1924164295196533, "learning_rate": 0.0005023890784982935, "loss": 6.2701, "step": 4374 }, { "epoch": 1.493174061433447, "grad_norm": 3.1967172622680664, "learning_rate": 0.0005022753128555177, "loss": 6.2449, "step": 4375 }, { "epoch": 1.4935153583617748, "grad_norm": 3.1058359146118164, "learning_rate": 0.0005021615472127418, "loss": 6.567, "step": 4376 }, { "epoch": 1.4938566552901023, "grad_norm": 3.0760414600372314, "learning_rate": 0.0005020477815699659, "loss": 6.221, "step": 4377 }, { "epoch": 1.49419795221843, "grad_norm": 3.031233549118042, "learning_rate": 0.00050193401592719, "loss": 6.3645, "step": 4378 }, { "epoch": 1.4945392491467577, "grad_norm": 2.9455747604370117, "learning_rate": 0.0005018202502844141, "loss": 6.3219, "step": 4379 }, { "epoch": 1.4948805460750854, "grad_norm": 3.113830804824829, "learning_rate": 0.0005017064846416382, "loss": 6.1877, "step": 4380 }, { "epoch": 1.495221843003413, "grad_norm": 2.8928475379943848, "learning_rate": 0.0005015927189988624, "loss": 6.0893, "step": 4381 }, { "epoch": 1.4955631399317406, "grad_norm": 5.041830539703369, "learning_rate": 0.0005014789533560865, "loss": 5.6682, "step": 4382 }, { "epoch": 1.4959044368600682, "grad_norm": 3.120551109313965, "learning_rate": 0.0005013651877133106, "loss": 6.5289, "step": 4383 }, { "epoch": 1.4962457337883959, "grad_norm": 3.156862735748291, "learning_rate": 0.0005012514220705347, "loss": 6.1597, "step": 4384 }, { "epoch": 1.4965870307167235, "grad_norm": 2.876394510269165, "learning_rate": 0.0005011376564277589, "loss": 3.1941, "step": 4385 }, { "epoch": 1.4969283276450511, "grad_norm": 3.063058376312256, "learning_rate": 0.0005010238907849828, "loss": 6.0953, "step": 4386 }, { "epoch": 1.497269624573379, "grad_norm": 3.1170380115509033, "learning_rate": 0.0005009101251422071, "loss": 6.3125, "step": 4387 }, { "epoch": 1.4976109215017064, "grad_norm": 5.3596062660217285, "learning_rate": 0.0005007963594994312, "loss": 4.3308, "step": 4388 }, { "epoch": 1.4979522184300342, "grad_norm": 3.159137487411499, "learning_rate": 0.0005006825938566553, "loss": 6.5011, "step": 4389 }, { "epoch": 1.4982935153583616, "grad_norm": 3.204058885574341, "learning_rate": 0.0005005688282138794, "loss": 6.6059, "step": 4390 }, { "epoch": 1.4986348122866895, "grad_norm": 3.0355639457702637, "learning_rate": 0.0005004550625711035, "loss": 6.5694, "step": 4391 }, { "epoch": 1.4989761092150171, "grad_norm": 3.0069313049316406, "learning_rate": 0.0005003412969283276, "loss": 6.1367, "step": 4392 }, { "epoch": 1.4993174061433447, "grad_norm": 3.0549659729003906, "learning_rate": 0.0005002275312855518, "loss": 6.1733, "step": 4393 }, { "epoch": 1.4996587030716724, "grad_norm": 3.0711121559143066, "learning_rate": 0.0005001137656427759, "loss": 6.0055, "step": 4394 }, { "epoch": 1.5, "grad_norm": 3.120171546936035, "learning_rate": 0.0005, "loss": 6.6731, "step": 4395 }, { "epoch": 1.5003412969283276, "grad_norm": 3.0403029918670654, "learning_rate": 0.0004998862343572241, "loss": 6.1493, "step": 4396 }, { "epoch": 1.5006825938566553, "grad_norm": 3.202802896499634, "learning_rate": 0.0004997724687144482, "loss": 6.1068, "step": 4397 }, { "epoch": 1.5010238907849829, "grad_norm": 3.0703487396240234, "learning_rate": 0.0004996587030716724, "loss": 6.4405, "step": 4398 }, { "epoch": 1.5013651877133105, "grad_norm": 3.1371710300445557, "learning_rate": 0.0004995449374288965, "loss": 6.5393, "step": 4399 }, { "epoch": 1.5017064846416384, "grad_norm": 3.1856324672698975, "learning_rate": 0.0004994311717861205, "loss": 6.4713, "step": 4400 }, { "epoch": 1.5020477815699658, "grad_norm": 3.43066143989563, "learning_rate": 0.0004993174061433447, "loss": 5.8309, "step": 4401 }, { "epoch": 1.5023890784982936, "grad_norm": 3.037707567214966, "learning_rate": 0.0004992036405005689, "loss": 6.4918, "step": 4402 }, { "epoch": 1.502730375426621, "grad_norm": 3.1267921924591064, "learning_rate": 0.000499089874857793, "loss": 6.2709, "step": 4403 }, { "epoch": 1.5030716723549489, "grad_norm": 3.2483456134796143, "learning_rate": 0.0004989761092150171, "loss": 6.1242, "step": 4404 }, { "epoch": 1.5034129692832765, "grad_norm": 3.104841947555542, "learning_rate": 0.0004988623435722412, "loss": 6.3647, "step": 4405 }, { "epoch": 1.5037542662116041, "grad_norm": 3.147634267807007, "learning_rate": 0.0004987485779294653, "loss": 6.5279, "step": 4406 }, { "epoch": 1.5040955631399318, "grad_norm": 3.1218855381011963, "learning_rate": 0.0004986348122866895, "loss": 6.1442, "step": 4407 }, { "epoch": 1.5044368600682594, "grad_norm": 3.1170077323913574, "learning_rate": 0.0004985210466439136, "loss": 6.1911, "step": 4408 }, { "epoch": 1.504778156996587, "grad_norm": 4.791984558105469, "learning_rate": 0.0004984072810011377, "loss": 5.5688, "step": 4409 }, { "epoch": 1.5051194539249146, "grad_norm": 2.986837863922119, "learning_rate": 0.0004982935153583618, "loss": 6.2765, "step": 4410 }, { "epoch": 1.5054607508532423, "grad_norm": 3.148416757583618, "learning_rate": 0.0004981797497155859, "loss": 6.2501, "step": 4411 }, { "epoch": 1.50580204778157, "grad_norm": 3.0591113567352295, "learning_rate": 0.00049806598407281, "loss": 6.6508, "step": 4412 }, { "epoch": 1.5061433447098977, "grad_norm": 3.104835271835327, "learning_rate": 0.0004979522184300341, "loss": 6.0843, "step": 4413 }, { "epoch": 1.5064846416382252, "grad_norm": 3.147568464279175, "learning_rate": 0.0004978384527872582, "loss": 5.7625, "step": 4414 }, { "epoch": 1.506825938566553, "grad_norm": 3.17414927482605, "learning_rate": 0.0004977246871444823, "loss": 5.7501, "step": 4415 }, { "epoch": 1.5071672354948804, "grad_norm": 10.699484825134277, "learning_rate": 0.0004976109215017065, "loss": 5.8049, "step": 4416 }, { "epoch": 1.5075085324232083, "grad_norm": 3.1057991981506348, "learning_rate": 0.0004974971558589306, "loss": 6.3801, "step": 4417 }, { "epoch": 1.5078498293515359, "grad_norm": 3.1610498428344727, "learning_rate": 0.0004973833902161547, "loss": 6.239, "step": 4418 }, { "epoch": 1.5081911262798635, "grad_norm": 3.110896348953247, "learning_rate": 0.0004972696245733789, "loss": 6.927, "step": 4419 }, { "epoch": 1.5085324232081911, "grad_norm": 3.309873580932617, "learning_rate": 0.000497155858930603, "loss": 6.2068, "step": 4420 }, { "epoch": 1.5088737201365188, "grad_norm": 2.9430341720581055, "learning_rate": 0.0004970420932878271, "loss": 6.6838, "step": 4421 }, { "epoch": 1.5092150170648464, "grad_norm": 3.1343164443969727, "learning_rate": 0.0004969283276450512, "loss": 6.2543, "step": 4422 }, { "epoch": 1.509556313993174, "grad_norm": 3.106687068939209, "learning_rate": 0.0004968145620022753, "loss": 6.4328, "step": 4423 }, { "epoch": 1.5098976109215017, "grad_norm": 3.152050018310547, "learning_rate": 0.0004967007963594995, "loss": 5.92, "step": 4424 }, { "epoch": 1.5102389078498293, "grad_norm": 3.016350507736206, "learning_rate": 0.0004965870307167236, "loss": 6.5876, "step": 4425 }, { "epoch": 1.5105802047781571, "grad_norm": 3.782809257507324, "learning_rate": 0.0004964732650739477, "loss": 5.1762, "step": 4426 }, { "epoch": 1.5109215017064845, "grad_norm": 3.22169828414917, "learning_rate": 0.0004963594994311718, "loss": 5.9286, "step": 4427 }, { "epoch": 1.5112627986348124, "grad_norm": 3.2454440593719482, "learning_rate": 0.0004962457337883959, "loss": 6.5536, "step": 4428 }, { "epoch": 1.5116040955631398, "grad_norm": 3.292848587036133, "learning_rate": 0.00049613196814562, "loss": 5.9237, "step": 4429 }, { "epoch": 1.5119453924914676, "grad_norm": 3.0619893074035645, "learning_rate": 0.0004960182025028441, "loss": 6.0451, "step": 4430 }, { "epoch": 1.5122866894197953, "grad_norm": 3.2105019092559814, "learning_rate": 0.0004959044368600682, "loss": 6.5059, "step": 4431 }, { "epoch": 1.512627986348123, "grad_norm": 3.131542921066284, "learning_rate": 0.0004957906712172923, "loss": 6.6094, "step": 4432 }, { "epoch": 1.5129692832764505, "grad_norm": 3.2492835521698, "learning_rate": 0.0004956769055745165, "loss": 6.0207, "step": 4433 }, { "epoch": 1.5133105802047782, "grad_norm": 3.1075937747955322, "learning_rate": 0.0004955631399317406, "loss": 6.6052, "step": 4434 }, { "epoch": 1.5136518771331058, "grad_norm": 3.096921443939209, "learning_rate": 0.0004954493742889648, "loss": 6.0225, "step": 4435 }, { "epoch": 1.5139931740614334, "grad_norm": 3.1452181339263916, "learning_rate": 0.0004953356086461889, "loss": 5.4943, "step": 4436 }, { "epoch": 1.514334470989761, "grad_norm": 3.2415482997894287, "learning_rate": 0.000495221843003413, "loss": 6.7057, "step": 4437 }, { "epoch": 1.5146757679180887, "grad_norm": 3.020129442214966, "learning_rate": 0.0004951080773606372, "loss": 6.2966, "step": 4438 }, { "epoch": 1.5150170648464165, "grad_norm": 3.014113426208496, "learning_rate": 0.0004949943117178612, "loss": 5.4332, "step": 4439 }, { "epoch": 1.515358361774744, "grad_norm": 3.206845760345459, "learning_rate": 0.0004948805460750853, "loss": 5.9552, "step": 4440 }, { "epoch": 1.5156996587030718, "grad_norm": 3.4275362491607666, "learning_rate": 0.0004947667804323095, "loss": 5.7084, "step": 4441 }, { "epoch": 1.5160409556313992, "grad_norm": 3.051011085510254, "learning_rate": 0.0004946530147895336, "loss": 6.4459, "step": 4442 }, { "epoch": 1.516382252559727, "grad_norm": 4.869009017944336, "learning_rate": 0.0004945392491467577, "loss": 5.4903, "step": 4443 }, { "epoch": 1.5167235494880547, "grad_norm": 3.0409438610076904, "learning_rate": 0.0004944254835039818, "loss": 6.5647, "step": 4444 }, { "epoch": 1.5170648464163823, "grad_norm": 3.4340035915374756, "learning_rate": 0.0004943117178612059, "loss": 6.1847, "step": 4445 }, { "epoch": 1.51740614334471, "grad_norm": 3.105072021484375, "learning_rate": 0.00049419795221843, "loss": 6.2396, "step": 4446 }, { "epoch": 1.5177474402730375, "grad_norm": 7.7960638999938965, "learning_rate": 0.0004940841865756542, "loss": 5.791, "step": 4447 }, { "epoch": 1.5180887372013652, "grad_norm": 3.257929801940918, "learning_rate": 0.0004939704209328783, "loss": 6.3392, "step": 4448 }, { "epoch": 1.5184300341296928, "grad_norm": 3.071336269378662, "learning_rate": 0.0004938566552901023, "loss": 6.1078, "step": 4449 }, { "epoch": 1.5187713310580204, "grad_norm": 3.080676317214966, "learning_rate": 0.0004937428896473265, "loss": 6.3788, "step": 4450 }, { "epoch": 1.519112627986348, "grad_norm": 3.079050064086914, "learning_rate": 0.0004936291240045506, "loss": 6.1288, "step": 4451 }, { "epoch": 1.519453924914676, "grad_norm": 3.3807246685028076, "learning_rate": 0.0004935153583617748, "loss": 6.4449, "step": 4452 }, { "epoch": 1.5197952218430033, "grad_norm": 4.4379730224609375, "learning_rate": 0.0004934015927189989, "loss": 6.0084, "step": 4453 }, { "epoch": 1.5201365187713312, "grad_norm": 5.487371444702148, "learning_rate": 0.000493287827076223, "loss": 6.276, "step": 4454 }, { "epoch": 1.5204778156996586, "grad_norm": 3.2696280479431152, "learning_rate": 0.0004931740614334471, "loss": 6.928, "step": 4455 }, { "epoch": 1.5208191126279864, "grad_norm": 3.299424648284912, "learning_rate": 0.0004930602957906713, "loss": 5.6853, "step": 4456 }, { "epoch": 1.521160409556314, "grad_norm": 3.520862340927124, "learning_rate": 0.0004929465301478954, "loss": 6.0821, "step": 4457 }, { "epoch": 1.5215017064846417, "grad_norm": 3.142890691757202, "learning_rate": 0.0004928327645051195, "loss": 6.0354, "step": 4458 }, { "epoch": 1.5218430034129693, "grad_norm": 2.9746880531311035, "learning_rate": 0.0004927189988623436, "loss": 6.4439, "step": 4459 }, { "epoch": 1.522184300341297, "grad_norm": 4.289761066436768, "learning_rate": 0.0004926052332195677, "loss": 5.2062, "step": 4460 }, { "epoch": 1.5225255972696246, "grad_norm": 3.730030059814453, "learning_rate": 0.0004924914675767918, "loss": 4.8441, "step": 4461 }, { "epoch": 1.5228668941979522, "grad_norm": 3.0463922023773193, "learning_rate": 0.0004923777019340159, "loss": 6.1014, "step": 4462 }, { "epoch": 1.5232081911262798, "grad_norm": 3.1336638927459717, "learning_rate": 0.00049226393629124, "loss": 6.3418, "step": 4463 }, { "epoch": 1.5235494880546074, "grad_norm": 3.0351781845092773, "learning_rate": 0.0004921501706484642, "loss": 6.3558, "step": 4464 }, { "epoch": 1.5238907849829353, "grad_norm": 3.130140542984009, "learning_rate": 0.0004920364050056883, "loss": 6.5153, "step": 4465 }, { "epoch": 1.5242320819112627, "grad_norm": 3.104055404663086, "learning_rate": 0.0004919226393629124, "loss": 6.4679, "step": 4466 }, { "epoch": 1.5245733788395905, "grad_norm": 4.129725933074951, "learning_rate": 0.0004918088737201365, "loss": 5.9675, "step": 4467 }, { "epoch": 1.524914675767918, "grad_norm": 3.040032386779785, "learning_rate": 0.0004916951080773606, "loss": 6.476, "step": 4468 }, { "epoch": 1.5252559726962458, "grad_norm": 2.9849636554718018, "learning_rate": 0.0004915813424345848, "loss": 6.3094, "step": 4469 }, { "epoch": 1.5255972696245734, "grad_norm": 3.130922794342041, "learning_rate": 0.0004914675767918089, "loss": 6.2869, "step": 4470 }, { "epoch": 1.525938566552901, "grad_norm": 3.0397064685821533, "learning_rate": 0.000491353811149033, "loss": 6.6056, "step": 4471 }, { "epoch": 1.5262798634812287, "grad_norm": 3.0539159774780273, "learning_rate": 0.0004912400455062571, "loss": 6.4248, "step": 4472 }, { "epoch": 1.5266211604095563, "grad_norm": 2.9415955543518066, "learning_rate": 0.0004911262798634813, "loss": 6.1774, "step": 4473 }, { "epoch": 1.526962457337884, "grad_norm": 3.0912396907806396, "learning_rate": 0.0004910125142207054, "loss": 6.3592, "step": 4474 }, { "epoch": 1.5273037542662116, "grad_norm": 3.2052664756774902, "learning_rate": 0.0004908987485779295, "loss": 5.9968, "step": 4475 }, { "epoch": 1.5276450511945392, "grad_norm": 3.073906421661377, "learning_rate": 0.0004907849829351536, "loss": 6.3548, "step": 4476 }, { "epoch": 1.5279863481228668, "grad_norm": 3.0415360927581787, "learning_rate": 0.0004906712172923777, "loss": 5.5661, "step": 4477 }, { "epoch": 1.5283276450511947, "grad_norm": 2.9644320011138916, "learning_rate": 0.0004905574516496018, "loss": 5.781, "step": 4478 }, { "epoch": 1.528668941979522, "grad_norm": 3.074263095855713, "learning_rate": 0.0004904436860068259, "loss": 6.4673, "step": 4479 }, { "epoch": 1.52901023890785, "grad_norm": 3.1107773780822754, "learning_rate": 0.00049032992036405, "loss": 6.2733, "step": 4480 }, { "epoch": 1.5293515358361773, "grad_norm": 3.262098550796509, "learning_rate": 0.0004902161547212742, "loss": 5.7654, "step": 4481 }, { "epoch": 1.5296928327645052, "grad_norm": 7.093457221984863, "learning_rate": 0.0004901023890784983, "loss": 5.9941, "step": 4482 }, { "epoch": 1.5300341296928328, "grad_norm": 3.19046950340271, "learning_rate": 0.0004899886234357224, "loss": 6.786, "step": 4483 }, { "epoch": 1.5303754266211604, "grad_norm": 3.732973337173462, "learning_rate": 0.0004898748577929465, "loss": 5.1743, "step": 4484 }, { "epoch": 1.530716723549488, "grad_norm": 3.1745400428771973, "learning_rate": 0.0004897610921501706, "loss": 5.9867, "step": 4485 }, { "epoch": 1.5310580204778157, "grad_norm": 3.155008554458618, "learning_rate": 0.0004896473265073948, "loss": 6.6222, "step": 4486 }, { "epoch": 1.5313993174061433, "grad_norm": 4.09003210067749, "learning_rate": 0.000489533560864619, "loss": 5.6898, "step": 4487 }, { "epoch": 1.531740614334471, "grad_norm": 4.12489128112793, "learning_rate": 0.000489419795221843, "loss": 4.9424, "step": 4488 }, { "epoch": 1.5320819112627988, "grad_norm": 3.1861050128936768, "learning_rate": 0.0004893060295790671, "loss": 6.032, "step": 4489 }, { "epoch": 1.5324232081911262, "grad_norm": 3.163435935974121, "learning_rate": 0.0004891922639362913, "loss": 6.3353, "step": 4490 }, { "epoch": 1.532764505119454, "grad_norm": 2.9932167530059814, "learning_rate": 0.0004890784982935154, "loss": 6.5483, "step": 4491 }, { "epoch": 1.5331058020477815, "grad_norm": 3.199826955795288, "learning_rate": 0.0004889647326507395, "loss": 6.1149, "step": 4492 }, { "epoch": 1.5334470989761093, "grad_norm": 3.0402212142944336, "learning_rate": 0.0004888509670079636, "loss": 6.484, "step": 4493 }, { "epoch": 1.5337883959044367, "grad_norm": 3.0578010082244873, "learning_rate": 0.0004887372013651877, "loss": 6.7166, "step": 4494 }, { "epoch": 1.5341296928327646, "grad_norm": 3.0253403186798096, "learning_rate": 0.0004886234357224118, "loss": 6.8642, "step": 4495 }, { "epoch": 1.5344709897610922, "grad_norm": 3.225531578063965, "learning_rate": 0.000488509670079636, "loss": 5.9513, "step": 4496 }, { "epoch": 1.5348122866894198, "grad_norm": 3.0968496799468994, "learning_rate": 0.0004883959044368601, "loss": 6.327, "step": 4497 }, { "epoch": 1.5351535836177475, "grad_norm": 3.0139734745025635, "learning_rate": 0.0004882821387940841, "loss": 6.8722, "step": 4498 }, { "epoch": 1.535494880546075, "grad_norm": 3.061629056930542, "learning_rate": 0.0004881683731513083, "loss": 6.5616, "step": 4499 }, { "epoch": 1.5358361774744027, "grad_norm": 3.062117338180542, "learning_rate": 0.00048805460750853244, "loss": 6.8498, "step": 4500 }, { "epoch": 1.5361774744027303, "grad_norm": 3.0551111698150635, "learning_rate": 0.00048794084186575654, "loss": 6.2062, "step": 4501 }, { "epoch": 1.5365187713310582, "grad_norm": 3.084839105606079, "learning_rate": 0.00048782707622298065, "loss": 6.1916, "step": 4502 }, { "epoch": 1.5368600682593856, "grad_norm": 2.976888418197632, "learning_rate": 0.0004877133105802048, "loss": 6.6639, "step": 4503 }, { "epoch": 1.5372013651877134, "grad_norm": 2.9720981121063232, "learning_rate": 0.0004875995449374289, "loss": 6.4217, "step": 4504 }, { "epoch": 1.5375426621160408, "grad_norm": 3.941185712814331, "learning_rate": 0.000487485779294653, "loss": 5.9576, "step": 4505 }, { "epoch": 1.5378839590443687, "grad_norm": 3.0946285724639893, "learning_rate": 0.0004873720136518772, "loss": 6.3165, "step": 4506 }, { "epoch": 1.538225255972696, "grad_norm": 3.0190694332122803, "learning_rate": 0.00048725824800910123, "loss": 6.4502, "step": 4507 }, { "epoch": 1.538566552901024, "grad_norm": 3.0796146392822266, "learning_rate": 0.00048714448236632533, "loss": 6.1499, "step": 4508 }, { "epoch": 1.5389078498293516, "grad_norm": 4.050348281860352, "learning_rate": 0.0004870307167235495, "loss": 6.0962, "step": 4509 }, { "epoch": 1.5392491467576792, "grad_norm": 3.1888248920440674, "learning_rate": 0.0004869169510807736, "loss": 6.0087, "step": 4510 }, { "epoch": 1.5395904436860068, "grad_norm": 2.9527971744537354, "learning_rate": 0.00048680318543799776, "loss": 6.3688, "step": 4511 }, { "epoch": 1.5399317406143345, "grad_norm": 3.0263445377349854, "learning_rate": 0.00048668941979522186, "loss": 5.7253, "step": 4512 }, { "epoch": 1.540273037542662, "grad_norm": 3.0024619102478027, "learning_rate": 0.00048657565415244597, "loss": 6.0979, "step": 4513 }, { "epoch": 1.5406143344709897, "grad_norm": 3.0450499057769775, "learning_rate": 0.0004864618885096701, "loss": 6.0442, "step": 4514 }, { "epoch": 1.5409556313993176, "grad_norm": 3.137784242630005, "learning_rate": 0.00048634812286689423, "loss": 6.4424, "step": 4515 }, { "epoch": 1.541296928327645, "grad_norm": 3.5526490211486816, "learning_rate": 0.00048623435722411833, "loss": 6.2877, "step": 4516 }, { "epoch": 1.5416382252559728, "grad_norm": 3.030540704727173, "learning_rate": 0.00048612059158134244, "loss": 6.2586, "step": 4517 }, { "epoch": 1.5419795221843002, "grad_norm": 3.2171597480773926, "learning_rate": 0.00048600682593856654, "loss": 6.0998, "step": 4518 }, { "epoch": 1.542320819112628, "grad_norm": 2.999274730682373, "learning_rate": 0.00048589306029579065, "loss": 6.0791, "step": 4519 }, { "epoch": 1.5426621160409555, "grad_norm": 3.330794334411621, "learning_rate": 0.0004857792946530148, "loss": 5.9229, "step": 4520 }, { "epoch": 1.5430034129692833, "grad_norm": 3.1574463844299316, "learning_rate": 0.0004856655290102389, "loss": 5.7773, "step": 4521 }, { "epoch": 1.543344709897611, "grad_norm": 3.089190721511841, "learning_rate": 0.000485551763367463, "loss": 6.2909, "step": 4522 }, { "epoch": 1.5436860068259386, "grad_norm": 2.9726176261901855, "learning_rate": 0.0004854379977246872, "loss": 5.816, "step": 4523 }, { "epoch": 1.5440273037542662, "grad_norm": 3.124121904373169, "learning_rate": 0.0004853242320819113, "loss": 6.2375, "step": 4524 }, { "epoch": 1.5443686006825939, "grad_norm": 3.3395678997039795, "learning_rate": 0.0004852104664391354, "loss": 6.0664, "step": 4525 }, { "epoch": 1.5447098976109215, "grad_norm": 3.0564520359039307, "learning_rate": 0.00048509670079635955, "loss": 6.4603, "step": 4526 }, { "epoch": 1.545051194539249, "grad_norm": 3.5565192699432373, "learning_rate": 0.0004849829351535836, "loss": 6.0142, "step": 4527 }, { "epoch": 1.545392491467577, "grad_norm": 3.0865042209625244, "learning_rate": 0.0004848691695108077, "loss": 6.3314, "step": 4528 }, { "epoch": 1.5457337883959044, "grad_norm": 4.123645782470703, "learning_rate": 0.00048475540386803186, "loss": 5.3, "step": 4529 }, { "epoch": 1.5460750853242322, "grad_norm": 3.4055612087249756, "learning_rate": 0.00048464163822525597, "loss": 5.816, "step": 4530 }, { "epoch": 1.5464163822525596, "grad_norm": 3.080965042114258, "learning_rate": 0.0004845278725824801, "loss": 6.9478, "step": 4531 }, { "epoch": 1.5467576791808875, "grad_norm": 3.1426844596862793, "learning_rate": 0.00048441410693970423, "loss": 5.8243, "step": 4532 }, { "epoch": 1.5470989761092149, "grad_norm": 3.0503451824188232, "learning_rate": 0.00048430034129692834, "loss": 6.595, "step": 4533 }, { "epoch": 1.5474402730375427, "grad_norm": 2.9815714359283447, "learning_rate": 0.0004841865756541525, "loss": 6.7113, "step": 4534 }, { "epoch": 1.5477815699658704, "grad_norm": 2.955928325653076, "learning_rate": 0.0004840728100113766, "loss": 6.4205, "step": 4535 }, { "epoch": 1.548122866894198, "grad_norm": 3.0034239292144775, "learning_rate": 0.00048395904436860065, "loss": 6.4585, "step": 4536 }, { "epoch": 1.5484641638225256, "grad_norm": 3.0914156436920166, "learning_rate": 0.0004838452787258248, "loss": 6.3329, "step": 4537 }, { "epoch": 1.5488054607508532, "grad_norm": 3.089557409286499, "learning_rate": 0.0004837315130830489, "loss": 6.2133, "step": 4538 }, { "epoch": 1.5491467576791809, "grad_norm": 3.246049642562866, "learning_rate": 0.000483617747440273, "loss": 6.5331, "step": 4539 }, { "epoch": 1.5494880546075085, "grad_norm": 3.0475661754608154, "learning_rate": 0.0004835039817974972, "loss": 6.365, "step": 4540 }, { "epoch": 1.5498293515358363, "grad_norm": 3.186208486557007, "learning_rate": 0.0004833902161547213, "loss": 6.1348, "step": 4541 }, { "epoch": 1.5501706484641637, "grad_norm": 3.0234334468841553, "learning_rate": 0.0004832764505119454, "loss": 6.4846, "step": 4542 }, { "epoch": 1.5505119453924916, "grad_norm": 7.129222869873047, "learning_rate": 0.00048316268486916955, "loss": 5.4173, "step": 4543 }, { "epoch": 1.550853242320819, "grad_norm": 3.194537401199341, "learning_rate": 0.00048304891922639365, "loss": 6.3047, "step": 4544 }, { "epoch": 1.5511945392491469, "grad_norm": 3.1084678173065186, "learning_rate": 0.00048293515358361776, "loss": 6.288, "step": 4545 }, { "epoch": 1.5515358361774743, "grad_norm": 3.1638569831848145, "learning_rate": 0.00048282138794084186, "loss": 6.6077, "step": 4546 }, { "epoch": 1.551877133105802, "grad_norm": 3.195992946624756, "learning_rate": 0.00048270762229806597, "loss": 6.6632, "step": 4547 }, { "epoch": 1.5522184300341297, "grad_norm": 3.146247148513794, "learning_rate": 0.00048259385665529007, "loss": 6.5658, "step": 4548 }, { "epoch": 1.5525597269624574, "grad_norm": 3.0369887351989746, "learning_rate": 0.00048248009101251423, "loss": 6.1692, "step": 4549 }, { "epoch": 1.552901023890785, "grad_norm": 3.0904510021209717, "learning_rate": 0.00048236632536973834, "loss": 6.1782, "step": 4550 }, { "epoch": 1.5532423208191126, "grad_norm": 3.021652936935425, "learning_rate": 0.0004822525597269625, "loss": 6.3469, "step": 4551 }, { "epoch": 1.5535836177474402, "grad_norm": 3.161728858947754, "learning_rate": 0.0004821387940841866, "loss": 6.258, "step": 4552 }, { "epoch": 1.5539249146757679, "grad_norm": 3.3773646354675293, "learning_rate": 0.0004820250284414107, "loss": 5.8451, "step": 4553 }, { "epoch": 1.5542662116040957, "grad_norm": 3.128512144088745, "learning_rate": 0.00048191126279863486, "loss": 6.5491, "step": 4554 }, { "epoch": 1.5546075085324231, "grad_norm": 3.0670344829559326, "learning_rate": 0.00048179749715585897, "loss": 7.086, "step": 4555 }, { "epoch": 1.554948805460751, "grad_norm": 3.074653387069702, "learning_rate": 0.000481683731513083, "loss": 6.4795, "step": 4556 }, { "epoch": 1.5552901023890784, "grad_norm": 3.1203551292419434, "learning_rate": 0.0004815699658703072, "loss": 6.3373, "step": 4557 }, { "epoch": 1.5556313993174062, "grad_norm": 3.1555371284484863, "learning_rate": 0.0004814562002275313, "loss": 6.8241, "step": 4558 }, { "epoch": 1.5559726962457336, "grad_norm": 3.1487646102905273, "learning_rate": 0.0004813424345847554, "loss": 6.4579, "step": 4559 }, { "epoch": 1.5563139931740615, "grad_norm": 3.830792188644409, "learning_rate": 0.00048122866894197955, "loss": 6.1349, "step": 4560 }, { "epoch": 1.5566552901023891, "grad_norm": 3.0813772678375244, "learning_rate": 0.00048111490329920365, "loss": 6.9713, "step": 4561 }, { "epoch": 1.5569965870307167, "grad_norm": 3.344524621963501, "learning_rate": 0.00048100113765642776, "loss": 5.4983, "step": 4562 }, { "epoch": 1.5573378839590444, "grad_norm": 3.2821524143218994, "learning_rate": 0.0004808873720136519, "loss": 6.4667, "step": 4563 }, { "epoch": 1.557679180887372, "grad_norm": 3.1274075508117676, "learning_rate": 0.000480773606370876, "loss": 6.7131, "step": 4564 }, { "epoch": 1.5580204778156996, "grad_norm": 2.94832444190979, "learning_rate": 0.00048065984072810013, "loss": 6.3848, "step": 4565 }, { "epoch": 1.5583617747440273, "grad_norm": 3.0074613094329834, "learning_rate": 0.00048054607508532423, "loss": 6.2491, "step": 4566 }, { "epoch": 1.5587030716723551, "grad_norm": 3.0959746837615967, "learning_rate": 0.00048043230944254834, "loss": 5.8382, "step": 4567 }, { "epoch": 1.5590443686006825, "grad_norm": 3.2001736164093018, "learning_rate": 0.00048031854379977244, "loss": 6.5361, "step": 4568 }, { "epoch": 1.5593856655290104, "grad_norm": 3.8832218647003174, "learning_rate": 0.0004802047781569966, "loss": 5.3809, "step": 4569 }, { "epoch": 1.5597269624573378, "grad_norm": 3.133195638656616, "learning_rate": 0.0004800910125142207, "loss": 6.6976, "step": 4570 }, { "epoch": 1.5600682593856656, "grad_norm": 3.2037956714630127, "learning_rate": 0.00047997724687144487, "loss": 5.5344, "step": 4571 }, { "epoch": 1.560409556313993, "grad_norm": 3.157449722290039, "learning_rate": 0.00047986348122866897, "loss": 6.2814, "step": 4572 }, { "epoch": 1.5607508532423209, "grad_norm": 3.062404155731201, "learning_rate": 0.0004797497155858931, "loss": 6.316, "step": 4573 }, { "epoch": 1.5610921501706485, "grad_norm": 3.134361982345581, "learning_rate": 0.00047963594994311723, "loss": 6.5262, "step": 4574 }, { "epoch": 1.5614334470989761, "grad_norm": 3.2360646724700928, "learning_rate": 0.0004795221843003413, "loss": 6.2296, "step": 4575 }, { "epoch": 1.5617747440273038, "grad_norm": 3.012258529663086, "learning_rate": 0.0004794084186575654, "loss": 6.7743, "step": 4576 }, { "epoch": 1.5621160409556314, "grad_norm": 3.5233166217803955, "learning_rate": 0.00047929465301478955, "loss": 5.9291, "step": 4577 }, { "epoch": 1.562457337883959, "grad_norm": 4.878261566162109, "learning_rate": 0.00047918088737201365, "loss": 5.7263, "step": 4578 }, { "epoch": 1.5627986348122866, "grad_norm": 3.228557825088501, "learning_rate": 0.00047906712172923776, "loss": 6.0776, "step": 4579 }, { "epoch": 1.5631399317406145, "grad_norm": 3.1920437812805176, "learning_rate": 0.0004789533560864619, "loss": 6.7448, "step": 4580 }, { "epoch": 1.563481228668942, "grad_norm": 3.1803622245788574, "learning_rate": 0.000478839590443686, "loss": 6.0921, "step": 4581 }, { "epoch": 1.5638225255972698, "grad_norm": 5.843010902404785, "learning_rate": 0.00047872582480091013, "loss": 5.7007, "step": 4582 }, { "epoch": 1.5641638225255972, "grad_norm": 3.6518936157226562, "learning_rate": 0.0004786120591581343, "loss": 6.3155, "step": 4583 }, { "epoch": 1.564505119453925, "grad_norm": 3.08497953414917, "learning_rate": 0.0004784982935153584, "loss": 6.524, "step": 4584 }, { "epoch": 1.5648464163822524, "grad_norm": 2.1860828399658203, "learning_rate": 0.00047838452787258244, "loss": 3.6702, "step": 4585 }, { "epoch": 1.5651877133105803, "grad_norm": 3.152881145477295, "learning_rate": 0.0004782707622298066, "loss": 5.9726, "step": 4586 }, { "epoch": 1.565529010238908, "grad_norm": 3.119457721710205, "learning_rate": 0.0004781569965870307, "loss": 5.8977, "step": 4587 }, { "epoch": 1.5658703071672355, "grad_norm": 3.2837250232696533, "learning_rate": 0.0004780432309442548, "loss": 6.7914, "step": 4588 }, { "epoch": 1.5662116040955631, "grad_norm": 3.0338499546051025, "learning_rate": 0.00047792946530147897, "loss": 6.6875, "step": 4589 }, { "epoch": 1.5665529010238908, "grad_norm": 3.1285126209259033, "learning_rate": 0.0004778156996587031, "loss": 6.115, "step": 4590 }, { "epoch": 1.5668941979522184, "grad_norm": 3.540517568588257, "learning_rate": 0.00047770193401592724, "loss": 5.9633, "step": 4591 }, { "epoch": 1.567235494880546, "grad_norm": 3.079838991165161, "learning_rate": 0.00047758816837315134, "loss": 5.9609, "step": 4592 }, { "epoch": 1.5675767918088739, "grad_norm": 3.100437879562378, "learning_rate": 0.00047747440273037545, "loss": 5.9144, "step": 4593 }, { "epoch": 1.5679180887372013, "grad_norm": 3.044776439666748, "learning_rate": 0.0004773606370875996, "loss": 6.1149, "step": 4594 }, { "epoch": 1.5682593856655291, "grad_norm": 3.157855749130249, "learning_rate": 0.00047724687144482366, "loss": 6.7326, "step": 4595 }, { "epoch": 1.5686006825938565, "grad_norm": 6.666663646697998, "learning_rate": 0.00047713310580204776, "loss": 5.6815, "step": 4596 }, { "epoch": 1.5689419795221844, "grad_norm": 3.1114113330841064, "learning_rate": 0.0004770193401592719, "loss": 5.6961, "step": 4597 }, { "epoch": 1.5692832764505118, "grad_norm": 3.6068615913391113, "learning_rate": 0.000476905574516496, "loss": 4.9314, "step": 4598 }, { "epoch": 1.5696245733788396, "grad_norm": 3.157696008682251, "learning_rate": 0.00047679180887372013, "loss": 6.3051, "step": 4599 }, { "epoch": 1.5699658703071673, "grad_norm": 3.262202262878418, "learning_rate": 0.0004766780432309443, "loss": 6.2528, "step": 4600 }, { "epoch": 1.570307167235495, "grad_norm": 3.1908304691314697, "learning_rate": 0.0004765642775881684, "loss": 6.1413, "step": 4601 }, { "epoch": 1.5706484641638225, "grad_norm": 3.041872978210449, "learning_rate": 0.0004764505119453925, "loss": 6.4687, "step": 4602 }, { "epoch": 1.5709897610921502, "grad_norm": 3.907120943069458, "learning_rate": 0.00047633674630261666, "loss": 5.4988, "step": 4603 }, { "epoch": 1.5713310580204778, "grad_norm": 3.080669403076172, "learning_rate": 0.00047622298065984076, "loss": 6.2327, "step": 4604 }, { "epoch": 1.5716723549488054, "grad_norm": 3.0005602836608887, "learning_rate": 0.0004761092150170648, "loss": 6.2388, "step": 4605 }, { "epoch": 1.5720136518771333, "grad_norm": 3.291757345199585, "learning_rate": 0.00047599544937428897, "loss": 6.102, "step": 4606 }, { "epoch": 1.5723549488054607, "grad_norm": 3.188426971435547, "learning_rate": 0.0004758816837315131, "loss": 6.7411, "step": 4607 }, { "epoch": 1.5726962457337885, "grad_norm": 3.0288374423980713, "learning_rate": 0.0004757679180887372, "loss": 5.8925, "step": 4608 }, { "epoch": 1.573037542662116, "grad_norm": 3.1033778190612793, "learning_rate": 0.00047565415244596134, "loss": 6.0647, "step": 4609 }, { "epoch": 1.5733788395904438, "grad_norm": 3.0476388931274414, "learning_rate": 0.00047554038680318545, "loss": 6.4688, "step": 4610 }, { "epoch": 1.5737201365187712, "grad_norm": 4.029483795166016, "learning_rate": 0.00047542662116040955, "loss": 5.5514, "step": 4611 }, { "epoch": 1.574061433447099, "grad_norm": 3.1188085079193115, "learning_rate": 0.0004753128555176337, "loss": 6.6279, "step": 4612 }, { "epoch": 1.5744027303754267, "grad_norm": 3.5747570991516113, "learning_rate": 0.0004751990898748578, "loss": 6.019, "step": 4613 }, { "epoch": 1.5747440273037543, "grad_norm": 3.1913697719573975, "learning_rate": 0.0004750853242320819, "loss": 5.9978, "step": 4614 }, { "epoch": 1.575085324232082, "grad_norm": 3.071608543395996, "learning_rate": 0.000474971558589306, "loss": 6.2824, "step": 4615 }, { "epoch": 1.5754266211604095, "grad_norm": 3.110900402069092, "learning_rate": 0.00047485779294653013, "loss": 6.441, "step": 4616 }, { "epoch": 1.5757679180887372, "grad_norm": 3.037877321243286, "learning_rate": 0.0004747440273037543, "loss": 6.5409, "step": 4617 }, { "epoch": 1.5761092150170648, "grad_norm": 3.036573886871338, "learning_rate": 0.0004746302616609784, "loss": 6.5253, "step": 4618 }, { "epoch": 1.5764505119453927, "grad_norm": 3.087519645690918, "learning_rate": 0.0004745164960182025, "loss": 6.2424, "step": 4619 }, { "epoch": 1.57679180887372, "grad_norm": 2.925489902496338, "learning_rate": 0.00047440273037542666, "loss": 6.7424, "step": 4620 }, { "epoch": 1.577133105802048, "grad_norm": 2.996990442276001, "learning_rate": 0.00047428896473265076, "loss": 6.6232, "step": 4621 }, { "epoch": 1.5774744027303753, "grad_norm": 3.041271924972534, "learning_rate": 0.00047417519908987487, "loss": 6.5346, "step": 4622 }, { "epoch": 1.5778156996587032, "grad_norm": 3.0396969318389893, "learning_rate": 0.00047406143344709903, "loss": 6.3334, "step": 4623 }, { "epoch": 1.5781569965870306, "grad_norm": 2.9244205951690674, "learning_rate": 0.0004739476678043231, "loss": 6.4611, "step": 4624 }, { "epoch": 1.5784982935153584, "grad_norm": 3.119096517562866, "learning_rate": 0.0004738339021615472, "loss": 6.2721, "step": 4625 }, { "epoch": 1.578839590443686, "grad_norm": 3.0966577529907227, "learning_rate": 0.00047372013651877134, "loss": 6.0143, "step": 4626 }, { "epoch": 1.5791808873720137, "grad_norm": 2.9753429889678955, "learning_rate": 0.00047360637087599545, "loss": 6.1939, "step": 4627 }, { "epoch": 1.5795221843003413, "grad_norm": 3.2640163898468018, "learning_rate": 0.00047349260523321955, "loss": 6.7567, "step": 4628 }, { "epoch": 1.579863481228669, "grad_norm": 3.492380380630493, "learning_rate": 0.0004733788395904437, "loss": 5.9374, "step": 4629 }, { "epoch": 1.5802047781569966, "grad_norm": 3.118227481842041, "learning_rate": 0.0004732650739476678, "loss": 6.7009, "step": 4630 }, { "epoch": 1.5805460750853242, "grad_norm": 3.0515055656433105, "learning_rate": 0.0004731513083048919, "loss": 6.1687, "step": 4631 }, { "epoch": 1.580887372013652, "grad_norm": 2.957733154296875, "learning_rate": 0.0004730375426621161, "loss": 6.4641, "step": 4632 }, { "epoch": 1.5812286689419794, "grad_norm": 3.1707279682159424, "learning_rate": 0.0004729237770193402, "loss": 5.8587, "step": 4633 }, { "epoch": 1.5815699658703073, "grad_norm": 3.1099092960357666, "learning_rate": 0.0004728100113765643, "loss": 6.0575, "step": 4634 }, { "epoch": 1.5819112627986347, "grad_norm": 2.9342041015625, "learning_rate": 0.0004726962457337884, "loss": 5.9659, "step": 4635 }, { "epoch": 1.5822525597269625, "grad_norm": 3.138521909713745, "learning_rate": 0.0004725824800910125, "loss": 5.3131, "step": 4636 }, { "epoch": 1.58259385665529, "grad_norm": 3.0649149417877197, "learning_rate": 0.00047246871444823666, "loss": 6.2565, "step": 4637 }, { "epoch": 1.5829351535836178, "grad_norm": 5.445117950439453, "learning_rate": 0.00047235494880546076, "loss": 5.9185, "step": 4638 }, { "epoch": 1.5832764505119454, "grad_norm": 3.1243233680725098, "learning_rate": 0.00047224118316268487, "loss": 6.2452, "step": 4639 }, { "epoch": 1.583617747440273, "grad_norm": 3.1046507358551025, "learning_rate": 0.00047212741751990903, "loss": 6.2636, "step": 4640 }, { "epoch": 1.5839590443686007, "grad_norm": 3.108381509780884, "learning_rate": 0.00047201365187713313, "loss": 6.7736, "step": 4641 }, { "epoch": 1.5843003412969283, "grad_norm": 3.1656174659729004, "learning_rate": 0.00047189988623435724, "loss": 5.8736, "step": 4642 }, { "epoch": 1.584641638225256, "grad_norm": 2.9965546131134033, "learning_rate": 0.00047178612059158134, "loss": 5.8275, "step": 4643 }, { "epoch": 1.5849829351535836, "grad_norm": 2.997084617614746, "learning_rate": 0.00047167235494880545, "loss": 6.993, "step": 4644 }, { "epoch": 1.5853242320819114, "grad_norm": 2.2224056720733643, "learning_rate": 0.00047155858930602955, "loss": 2.9852, "step": 4645 }, { "epoch": 1.5856655290102388, "grad_norm": 3.1612727642059326, "learning_rate": 0.0004714448236632537, "loss": 6.3212, "step": 4646 }, { "epoch": 1.5860068259385667, "grad_norm": 3.2868475914001465, "learning_rate": 0.0004713310580204778, "loss": 6.2314, "step": 4647 }, { "epoch": 1.586348122866894, "grad_norm": 3.0663857460021973, "learning_rate": 0.0004712172923777019, "loss": 6.4269, "step": 4648 }, { "epoch": 1.586689419795222, "grad_norm": 3.672306537628174, "learning_rate": 0.0004711035267349261, "loss": 5.4267, "step": 4649 }, { "epoch": 1.5870307167235493, "grad_norm": 3.1437530517578125, "learning_rate": 0.0004709897610921502, "loss": 6.4256, "step": 4650 }, { "epoch": 1.5873720136518772, "grad_norm": 3.077855348587036, "learning_rate": 0.0004708759954493743, "loss": 6.1129, "step": 4651 }, { "epoch": 1.5877133105802048, "grad_norm": 3.188419818878174, "learning_rate": 0.00047076222980659845, "loss": 6.0257, "step": 4652 }, { "epoch": 1.5880546075085324, "grad_norm": 2.986910581588745, "learning_rate": 0.0004706484641638225, "loss": 6.3658, "step": 4653 }, { "epoch": 1.58839590443686, "grad_norm": 3.301227569580078, "learning_rate": 0.00047053469852104666, "loss": 5.64, "step": 4654 }, { "epoch": 1.5887372013651877, "grad_norm": 4.034798622131348, "learning_rate": 0.00047042093287827077, "loss": 5.0341, "step": 4655 }, { "epoch": 1.5890784982935153, "grad_norm": 3.077500104904175, "learning_rate": 0.00047030716723549487, "loss": 5.9843, "step": 4656 }, { "epoch": 1.589419795221843, "grad_norm": 2.99000883102417, "learning_rate": 0.00047019340159271903, "loss": 6.4999, "step": 4657 }, { "epoch": 1.5897610921501708, "grad_norm": 3.0318803787231445, "learning_rate": 0.00047007963594994313, "loss": 6.5713, "step": 4658 }, { "epoch": 1.5901023890784982, "grad_norm": 3.398522138595581, "learning_rate": 0.00046996587030716724, "loss": 5.365, "step": 4659 }, { "epoch": 1.590443686006826, "grad_norm": 2.9953789710998535, "learning_rate": 0.0004698521046643914, "loss": 6.1898, "step": 4660 }, { "epoch": 1.5907849829351535, "grad_norm": 3.6404740810394287, "learning_rate": 0.0004697383390216155, "loss": 6.2839, "step": 4661 }, { "epoch": 1.5911262798634813, "grad_norm": 3.0940699577331543, "learning_rate": 0.0004696245733788396, "loss": 6.2371, "step": 4662 }, { "epoch": 1.5914675767918087, "grad_norm": 2.9704160690307617, "learning_rate": 0.0004695108077360637, "loss": 6.0635, "step": 4663 }, { "epoch": 1.5918088737201366, "grad_norm": 3.2446553707122803, "learning_rate": 0.0004693970420932878, "loss": 6.2583, "step": 4664 }, { "epoch": 1.5921501706484642, "grad_norm": 2.948814868927002, "learning_rate": 0.0004692832764505119, "loss": 6.6138, "step": 4665 }, { "epoch": 1.5924914675767918, "grad_norm": 2.9930472373962402, "learning_rate": 0.0004691695108077361, "loss": 6.747, "step": 4666 }, { "epoch": 1.5928327645051195, "grad_norm": 3.368851661682129, "learning_rate": 0.0004690557451649602, "loss": 5.8773, "step": 4667 }, { "epoch": 1.593174061433447, "grad_norm": 2.977428674697876, "learning_rate": 0.0004689419795221843, "loss": 6.556, "step": 4668 }, { "epoch": 1.5935153583617747, "grad_norm": 3.041341543197632, "learning_rate": 0.00046882821387940845, "loss": 6.6892, "step": 4669 }, { "epoch": 1.5938566552901023, "grad_norm": 2.9362432956695557, "learning_rate": 0.00046871444823663256, "loss": 6.8953, "step": 4670 }, { "epoch": 1.5941979522184302, "grad_norm": 3.3086955547332764, "learning_rate": 0.00046860068259385666, "loss": 6.2347, "step": 4671 }, { "epoch": 1.5945392491467576, "grad_norm": 3.477236270904541, "learning_rate": 0.0004684869169510808, "loss": 2.9186, "step": 4672 }, { "epoch": 1.5948805460750854, "grad_norm": 3.360569477081299, "learning_rate": 0.00046837315130830487, "loss": 5.8384, "step": 4673 }, { "epoch": 1.5952218430034129, "grad_norm": 3.140916585922241, "learning_rate": 0.000468259385665529, "loss": 6.0714, "step": 4674 }, { "epoch": 1.5955631399317407, "grad_norm": 3.2916316986083984, "learning_rate": 0.00046814562002275314, "loss": 5.5684, "step": 4675 }, { "epoch": 1.595904436860068, "grad_norm": 3.8546245098114014, "learning_rate": 0.00046803185437997724, "loss": 6.0605, "step": 4676 }, { "epoch": 1.596245733788396, "grad_norm": 3.0560851097106934, "learning_rate": 0.0004679180887372014, "loss": 6.554, "step": 4677 }, { "epoch": 1.5965870307167236, "grad_norm": 2.926163911819458, "learning_rate": 0.0004678043230944255, "loss": 6.2362, "step": 4678 }, { "epoch": 1.5969283276450512, "grad_norm": 3.3139231204986572, "learning_rate": 0.0004676905574516496, "loss": 6.3501, "step": 4679 }, { "epoch": 1.5972696245733788, "grad_norm": 3.2012598514556885, "learning_rate": 0.00046757679180887377, "loss": 5.9128, "step": 4680 }, { "epoch": 1.5976109215017065, "grad_norm": 3.0795986652374268, "learning_rate": 0.0004674630261660979, "loss": 5.7596, "step": 4681 }, { "epoch": 1.597952218430034, "grad_norm": 3.0973923206329346, "learning_rate": 0.0004673492605233219, "loss": 6.176, "step": 4682 }, { "epoch": 1.5982935153583617, "grad_norm": 2.9738688468933105, "learning_rate": 0.0004672354948805461, "loss": 6.2366, "step": 4683 }, { "epoch": 1.5986348122866896, "grad_norm": 3.048534870147705, "learning_rate": 0.0004671217292377702, "loss": 6.2017, "step": 4684 }, { "epoch": 1.598976109215017, "grad_norm": 3.043328285217285, "learning_rate": 0.0004670079635949943, "loss": 6.1733, "step": 4685 }, { "epoch": 1.5993174061433448, "grad_norm": 3.086865186691284, "learning_rate": 0.00046689419795221845, "loss": 6.1099, "step": 4686 }, { "epoch": 1.5996587030716722, "grad_norm": 3.1289331912994385, "learning_rate": 0.00046678043230944256, "loss": 5.693, "step": 4687 }, { "epoch": 1.6, "grad_norm": 3.2024731636047363, "learning_rate": 0.00046666666666666666, "loss": 6.616, "step": 4688 }, { "epoch": 1.6003412969283275, "grad_norm": 3.0767719745635986, "learning_rate": 0.0004665529010238908, "loss": 5.9802, "step": 4689 }, { "epoch": 1.6006825938566553, "grad_norm": 3.016394853591919, "learning_rate": 0.0004664391353811149, "loss": 6.0161, "step": 4690 }, { "epoch": 1.601023890784983, "grad_norm": 2.9882636070251465, "learning_rate": 0.00046632536973833903, "loss": 6.7943, "step": 4691 }, { "epoch": 1.6013651877133106, "grad_norm": 3.6328351497650146, "learning_rate": 0.00046621160409556314, "loss": 5.8045, "step": 4692 }, { "epoch": 1.6017064846416382, "grad_norm": 3.0648207664489746, "learning_rate": 0.00046609783845278724, "loss": 6.569, "step": 4693 }, { "epoch": 1.6020477815699659, "grad_norm": 3.3391435146331787, "learning_rate": 0.00046598407281001135, "loss": 6.1539, "step": 4694 }, { "epoch": 1.6023890784982935, "grad_norm": 2.938784122467041, "learning_rate": 0.0004658703071672355, "loss": 6.4672, "step": 4695 }, { "epoch": 1.6027303754266211, "grad_norm": 3.095799684524536, "learning_rate": 0.0004657565415244596, "loss": 6.4475, "step": 4696 }, { "epoch": 1.603071672354949, "grad_norm": 3.8702211380004883, "learning_rate": 0.00046564277588168377, "loss": 5.1767, "step": 4697 }, { "epoch": 1.6034129692832764, "grad_norm": 3.0789310932159424, "learning_rate": 0.0004655290102389079, "loss": 5.2598, "step": 4698 }, { "epoch": 1.6037542662116042, "grad_norm": 3.129061698913574, "learning_rate": 0.000465415244596132, "loss": 6.486, "step": 4699 }, { "epoch": 1.6040955631399316, "grad_norm": 3.2048168182373047, "learning_rate": 0.00046530147895335614, "loss": 6.495, "step": 4700 }, { "epoch": 1.6044368600682595, "grad_norm": 3.1725878715515137, "learning_rate": 0.00046518771331058024, "loss": 6.2983, "step": 4701 }, { "epoch": 1.6047781569965869, "grad_norm": 3.1969106197357178, "learning_rate": 0.0004650739476678043, "loss": 5.2289, "step": 4702 }, { "epoch": 1.6051194539249147, "grad_norm": 3.340092658996582, "learning_rate": 0.00046496018202502845, "loss": 6.19, "step": 4703 }, { "epoch": 1.6054607508532424, "grad_norm": 3.10861873626709, "learning_rate": 0.00046484641638225256, "loss": 6.4409, "step": 4704 }, { "epoch": 1.60580204778157, "grad_norm": 2.947946310043335, "learning_rate": 0.00046473265073947666, "loss": 5.9036, "step": 4705 }, { "epoch": 1.6061433447098976, "grad_norm": 3.063727855682373, "learning_rate": 0.0004646188850967008, "loss": 6.3689, "step": 4706 }, { "epoch": 1.6064846416382252, "grad_norm": 3.1022796630859375, "learning_rate": 0.0004645051194539249, "loss": 6.3524, "step": 4707 }, { "epoch": 1.6068259385665529, "grad_norm": 3.0189907550811768, "learning_rate": 0.00046439135381114903, "loss": 6.7521, "step": 4708 }, { "epoch": 1.6071672354948805, "grad_norm": 3.067929744720459, "learning_rate": 0.0004642775881683732, "loss": 6.0322, "step": 4709 }, { "epoch": 1.6075085324232083, "grad_norm": 3.403313159942627, "learning_rate": 0.0004641638225255973, "loss": 5.9158, "step": 4710 }, { "epoch": 1.6078498293515358, "grad_norm": 3.100106954574585, "learning_rate": 0.0004640500568828214, "loss": 6.0732, "step": 4711 }, { "epoch": 1.6081911262798636, "grad_norm": 3.0627291202545166, "learning_rate": 0.0004639362912400455, "loss": 5.8361, "step": 4712 }, { "epoch": 1.608532423208191, "grad_norm": 3.6144957542419434, "learning_rate": 0.0004638225255972696, "loss": 6.1397, "step": 4713 }, { "epoch": 1.6088737201365189, "grad_norm": 3.0293452739715576, "learning_rate": 0.0004637087599544937, "loss": 6.5087, "step": 4714 }, { "epoch": 1.6092150170648463, "grad_norm": 3.0504560470581055, "learning_rate": 0.0004635949943117179, "loss": 6.5581, "step": 4715 }, { "epoch": 1.6095563139931741, "grad_norm": 3.043027877807617, "learning_rate": 0.000463481228668942, "loss": 6.8183, "step": 4716 }, { "epoch": 1.6098976109215017, "grad_norm": 3.135340452194214, "learning_rate": 0.00046336746302616614, "loss": 6.3905, "step": 4717 }, { "epoch": 1.6102389078498294, "grad_norm": 2.9933533668518066, "learning_rate": 0.00046325369738339024, "loss": 6.2646, "step": 4718 }, { "epoch": 1.610580204778157, "grad_norm": 3.0164849758148193, "learning_rate": 0.00046313993174061435, "loss": 6.5204, "step": 4719 }, { "epoch": 1.6109215017064846, "grad_norm": 2.9858627319335938, "learning_rate": 0.0004630261660978385, "loss": 6.2363, "step": 4720 }, { "epoch": 1.6112627986348123, "grad_norm": 2.998555898666382, "learning_rate": 0.00046291240045506256, "loss": 6.4151, "step": 4721 }, { "epoch": 1.6116040955631399, "grad_norm": 3.497677803039551, "learning_rate": 0.00046279863481228666, "loss": 5.135, "step": 4722 }, { "epoch": 1.6119453924914677, "grad_norm": 3.07749080657959, "learning_rate": 0.0004626848691695108, "loss": 6.402, "step": 4723 }, { "epoch": 1.6122866894197951, "grad_norm": 3.127281665802002, "learning_rate": 0.00046257110352673493, "loss": 6.8232, "step": 4724 }, { "epoch": 1.612627986348123, "grad_norm": 3.1097822189331055, "learning_rate": 0.00046245733788395903, "loss": 6.0242, "step": 4725 }, { "epoch": 1.6129692832764504, "grad_norm": 3.2166764736175537, "learning_rate": 0.0004623435722411832, "loss": 5.9335, "step": 4726 }, { "epoch": 1.6133105802047782, "grad_norm": 3.05297589302063, "learning_rate": 0.0004622298065984073, "loss": 6.4292, "step": 4727 }, { "epoch": 1.6136518771331056, "grad_norm": 2.0636985301971436, "learning_rate": 0.0004621160409556314, "loss": 3.1146, "step": 4728 }, { "epoch": 1.6139931740614335, "grad_norm": 3.0197324752807617, "learning_rate": 0.00046200227531285556, "loss": 6.0745, "step": 4729 }, { "epoch": 1.6143344709897611, "grad_norm": 3.0842418670654297, "learning_rate": 0.00046188850967007967, "loss": 6.6684, "step": 4730 }, { "epoch": 1.6146757679180888, "grad_norm": 3.0386857986450195, "learning_rate": 0.0004617747440273037, "loss": 6.3415, "step": 4731 }, { "epoch": 1.6150170648464164, "grad_norm": 3.1578781604766846, "learning_rate": 0.0004616609783845279, "loss": 5.7987, "step": 4732 }, { "epoch": 1.615358361774744, "grad_norm": 3.1602065563201904, "learning_rate": 0.000461547212741752, "loss": 6.5547, "step": 4733 }, { "epoch": 1.6156996587030716, "grad_norm": 3.2017452716827393, "learning_rate": 0.0004614334470989761, "loss": 6.0994, "step": 4734 }, { "epoch": 1.6160409556313993, "grad_norm": 3.114725112915039, "learning_rate": 0.00046131968145620024, "loss": 6.5211, "step": 4735 }, { "epoch": 1.6163822525597271, "grad_norm": 3.1060032844543457, "learning_rate": 0.00046120591581342435, "loss": 6.3797, "step": 4736 }, { "epoch": 1.6167235494880545, "grad_norm": 3.1103594303131104, "learning_rate": 0.0004610921501706485, "loss": 6.0067, "step": 4737 }, { "epoch": 1.6170648464163824, "grad_norm": 3.124356985092163, "learning_rate": 0.0004609783845278726, "loss": 6.7605, "step": 4738 }, { "epoch": 1.6174061433447098, "grad_norm": 3.092719793319702, "learning_rate": 0.0004608646188850967, "loss": 5.9352, "step": 4739 }, { "epoch": 1.6177474402730376, "grad_norm": 2.993748903274536, "learning_rate": 0.0004607508532423209, "loss": 6.4874, "step": 4740 }, { "epoch": 1.618088737201365, "grad_norm": 3.0820672512054443, "learning_rate": 0.00046063708759954493, "loss": 5.4731, "step": 4741 }, { "epoch": 1.6184300341296929, "grad_norm": 3.193246841430664, "learning_rate": 0.00046052332195676903, "loss": 6.4882, "step": 4742 }, { "epoch": 1.6187713310580205, "grad_norm": 3.0461337566375732, "learning_rate": 0.0004604095563139932, "loss": 6.7046, "step": 4743 }, { "epoch": 1.6191126279863481, "grad_norm": 3.1627285480499268, "learning_rate": 0.0004602957906712173, "loss": 5.9241, "step": 4744 }, { "epoch": 1.6194539249146758, "grad_norm": 3.004056215286255, "learning_rate": 0.0004601820250284414, "loss": 6.182, "step": 4745 }, { "epoch": 1.6197952218430034, "grad_norm": 3.3949027061462402, "learning_rate": 0.00046006825938566556, "loss": 6.1612, "step": 4746 }, { "epoch": 1.620136518771331, "grad_norm": 3.4996936321258545, "learning_rate": 0.00045995449374288967, "loss": 5.6995, "step": 4747 }, { "epoch": 1.6204778156996587, "grad_norm": 3.5690553188323975, "learning_rate": 0.00045984072810011377, "loss": 6.0195, "step": 4748 }, { "epoch": 1.6208191126279865, "grad_norm": 3.138127326965332, "learning_rate": 0.00045972696245733793, "loss": 6.9745, "step": 4749 }, { "epoch": 1.621160409556314, "grad_norm": 3.223238468170166, "learning_rate": 0.000459613196814562, "loss": 6.5723, "step": 4750 }, { "epoch": 1.6215017064846418, "grad_norm": 3.028900146484375, "learning_rate": 0.0004594994311717861, "loss": 6.8207, "step": 4751 }, { "epoch": 1.6218430034129692, "grad_norm": 3.1101443767547607, "learning_rate": 0.00045938566552901025, "loss": 6.2446, "step": 4752 }, { "epoch": 1.622184300341297, "grad_norm": 3.129108190536499, "learning_rate": 0.00045927189988623435, "loss": 6.6041, "step": 4753 }, { "epoch": 1.6225255972696244, "grad_norm": 2.9134464263916016, "learning_rate": 0.00045915813424345846, "loss": 6.674, "step": 4754 }, { "epoch": 1.6228668941979523, "grad_norm": 2.890514373779297, "learning_rate": 0.0004590443686006826, "loss": 6.4995, "step": 4755 }, { "epoch": 1.62320819112628, "grad_norm": 3.06087589263916, "learning_rate": 0.0004589306029579067, "loss": 5.8313, "step": 4756 }, { "epoch": 1.6235494880546075, "grad_norm": 2.98832631111145, "learning_rate": 0.0004588168373151309, "loss": 6.4396, "step": 4757 }, { "epoch": 1.6238907849829352, "grad_norm": 2.935410976409912, "learning_rate": 0.000458703071672355, "loss": 6.4128, "step": 4758 }, { "epoch": 1.6242320819112628, "grad_norm": 3.138000965118408, "learning_rate": 0.0004585893060295791, "loss": 6.956, "step": 4759 }, { "epoch": 1.6245733788395904, "grad_norm": 2.011180877685547, "learning_rate": 0.0004584755403868032, "loss": 3.2119, "step": 4760 }, { "epoch": 1.624914675767918, "grad_norm": 3.1553590297698975, "learning_rate": 0.0004583617747440273, "loss": 6.419, "step": 4761 }, { "epoch": 1.6252559726962459, "grad_norm": 3.162964344024658, "learning_rate": 0.0004582480091012514, "loss": 6.3694, "step": 4762 }, { "epoch": 1.6255972696245733, "grad_norm": 3.1152918338775635, "learning_rate": 0.00045813424345847556, "loss": 6.0361, "step": 4763 }, { "epoch": 1.6259385665529011, "grad_norm": 3.273135185241699, "learning_rate": 0.00045802047781569967, "loss": 6.0278, "step": 4764 }, { "epoch": 1.6262798634812285, "grad_norm": 4.751234531402588, "learning_rate": 0.00045790671217292377, "loss": 5.5492, "step": 4765 }, { "epoch": 1.6266211604095564, "grad_norm": 3.12723445892334, "learning_rate": 0.00045779294653014793, "loss": 6.8186, "step": 4766 }, { "epoch": 1.6269624573378838, "grad_norm": 3.2533812522888184, "learning_rate": 0.00045767918088737204, "loss": 6.105, "step": 4767 }, { "epoch": 1.6273037542662117, "grad_norm": 3.344517946243286, "learning_rate": 0.00045756541524459614, "loss": 5.5507, "step": 4768 }, { "epoch": 1.6276450511945393, "grad_norm": 3.1536638736724854, "learning_rate": 0.0004574516496018203, "loss": 5.9071, "step": 4769 }, { "epoch": 1.627986348122867, "grad_norm": 3.0342910289764404, "learning_rate": 0.00045733788395904435, "loss": 6.3558, "step": 4770 }, { "epoch": 1.6283276450511945, "grad_norm": 2.96907377243042, "learning_rate": 0.00045722411831626846, "loss": 6.4653, "step": 4771 }, { "epoch": 1.6286689419795222, "grad_norm": 3.0146501064300537, "learning_rate": 0.0004571103526734926, "loss": 6.3785, "step": 4772 }, { "epoch": 1.6290102389078498, "grad_norm": 3.0192205905914307, "learning_rate": 0.0004569965870307167, "loss": 6.1357, "step": 4773 }, { "epoch": 1.6293515358361774, "grad_norm": 2.924898862838745, "learning_rate": 0.0004568828213879408, "loss": 6.2558, "step": 4774 }, { "epoch": 1.6296928327645053, "grad_norm": 2.9523444175720215, "learning_rate": 0.000456769055745165, "loss": 6.6745, "step": 4775 }, { "epoch": 1.6300341296928327, "grad_norm": 3.029269218444824, "learning_rate": 0.0004566552901023891, "loss": 6.6144, "step": 4776 }, { "epoch": 1.6303754266211605, "grad_norm": 3.0683581829071045, "learning_rate": 0.00045654152445961325, "loss": 6.2914, "step": 4777 }, { "epoch": 1.630716723549488, "grad_norm": 3.100022554397583, "learning_rate": 0.00045642775881683735, "loss": 6.7963, "step": 4778 }, { "epoch": 1.6310580204778158, "grad_norm": 2.997330904006958, "learning_rate": 0.00045631399317406146, "loss": 6.5692, "step": 4779 }, { "epoch": 1.6313993174061432, "grad_norm": 3.10273814201355, "learning_rate": 0.00045620022753128556, "loss": 6.137, "step": 4780 }, { "epoch": 1.631740614334471, "grad_norm": 2.9892499446868896, "learning_rate": 0.00045608646188850967, "loss": 6.4949, "step": 4781 }, { "epoch": 1.6320819112627987, "grad_norm": 3.031235933303833, "learning_rate": 0.0004559726962457338, "loss": 6.3293, "step": 4782 }, { "epoch": 1.6324232081911263, "grad_norm": 3.0643863677978516, "learning_rate": 0.00045585893060295793, "loss": 6.279, "step": 4783 }, { "epoch": 1.632764505119454, "grad_norm": 3.4579365253448486, "learning_rate": 0.00045574516496018204, "loss": 6.1474, "step": 4784 }, { "epoch": 1.6331058020477816, "grad_norm": 3.1474268436431885, "learning_rate": 0.00045563139931740614, "loss": 6.3421, "step": 4785 }, { "epoch": 1.6334470989761092, "grad_norm": 3.405183792114258, "learning_rate": 0.0004555176336746303, "loss": 6.1607, "step": 4786 }, { "epoch": 1.6337883959044368, "grad_norm": 3.0848171710968018, "learning_rate": 0.0004554038680318544, "loss": 6.4013, "step": 4787 }, { "epoch": 1.6341296928327647, "grad_norm": 3.092343807220459, "learning_rate": 0.0004552901023890785, "loss": 6.4724, "step": 4788 }, { "epoch": 1.634470989761092, "grad_norm": 3.5067667961120605, "learning_rate": 0.0004551763367463026, "loss": 5.3636, "step": 4789 }, { "epoch": 1.63481228668942, "grad_norm": 3.0137641429901123, "learning_rate": 0.0004550625711035267, "loss": 6.6243, "step": 4790 }, { "epoch": 1.6351535836177473, "grad_norm": 3.554887533187866, "learning_rate": 0.0004549488054607508, "loss": 5.7127, "step": 4791 }, { "epoch": 1.6354948805460752, "grad_norm": 3.113088607788086, "learning_rate": 0.000454835039817975, "loss": 6.3377, "step": 4792 }, { "epoch": 1.6358361774744026, "grad_norm": 3.3043792247772217, "learning_rate": 0.0004547212741751991, "loss": 6.3576, "step": 4793 }, { "epoch": 1.6361774744027304, "grad_norm": 3.1431515216827393, "learning_rate": 0.0004546075085324232, "loss": 6.6704, "step": 4794 }, { "epoch": 1.636518771331058, "grad_norm": 2.899198293685913, "learning_rate": 0.00045449374288964735, "loss": 6.5668, "step": 4795 }, { "epoch": 1.6368600682593857, "grad_norm": 2.860257625579834, "learning_rate": 0.00045437997724687146, "loss": 6.3462, "step": 4796 }, { "epoch": 1.6372013651877133, "grad_norm": 2.903857946395874, "learning_rate": 0.0004542662116040956, "loss": 6.5885, "step": 4797 }, { "epoch": 1.637542662116041, "grad_norm": 3.1243457794189453, "learning_rate": 0.0004541524459613197, "loss": 6.5971, "step": 4798 }, { "epoch": 1.6378839590443686, "grad_norm": 2.9572298526763916, "learning_rate": 0.0004540386803185438, "loss": 6.5093, "step": 4799 }, { "epoch": 1.6382252559726962, "grad_norm": 2.8882224559783936, "learning_rate": 0.00045392491467576793, "loss": 6.5098, "step": 4800 }, { "epoch": 1.638566552901024, "grad_norm": 4.230573654174805, "learning_rate": 0.00045381114903299204, "loss": 5.5162, "step": 4801 }, { "epoch": 1.6389078498293514, "grad_norm": 3.288973093032837, "learning_rate": 0.00045369738339021614, "loss": 6.3791, "step": 4802 }, { "epoch": 1.6392491467576793, "grad_norm": 3.1963343620300293, "learning_rate": 0.0004535836177474403, "loss": 6.6689, "step": 4803 }, { "epoch": 1.6395904436860067, "grad_norm": 3.1104302406311035, "learning_rate": 0.0004534698521046644, "loss": 6.6137, "step": 4804 }, { "epoch": 1.6399317406143346, "grad_norm": 3.651779890060425, "learning_rate": 0.0004533560864618885, "loss": 5.9103, "step": 4805 }, { "epoch": 1.640273037542662, "grad_norm": 5.380152702331543, "learning_rate": 0.00045324232081911267, "loss": 5.7795, "step": 4806 }, { "epoch": 1.6406143344709898, "grad_norm": 3.1054184436798096, "learning_rate": 0.0004531285551763368, "loss": 6.1345, "step": 4807 }, { "epoch": 1.6409556313993174, "grad_norm": 3.150416851043701, "learning_rate": 0.0004530147895335609, "loss": 6.2737, "step": 4808 }, { "epoch": 1.641296928327645, "grad_norm": 6.0928053855896, "learning_rate": 0.000452901023890785, "loss": 5.2429, "step": 4809 }, { "epoch": 1.6416382252559727, "grad_norm": 3.073267698287964, "learning_rate": 0.0004527872582480091, "loss": 6.2179, "step": 4810 }, { "epoch": 1.6419795221843003, "grad_norm": 3.0769455432891846, "learning_rate": 0.0004526734926052332, "loss": 6.1769, "step": 4811 }, { "epoch": 1.642320819112628, "grad_norm": 4.3502888679504395, "learning_rate": 0.00045255972696245736, "loss": 5.1454, "step": 4812 }, { "epoch": 1.6426621160409556, "grad_norm": 3.0830323696136475, "learning_rate": 0.00045244596131968146, "loss": 6.0589, "step": 4813 }, { "epoch": 1.6430034129692834, "grad_norm": 3.1683781147003174, "learning_rate": 0.00045233219567690557, "loss": 6.5236, "step": 4814 }, { "epoch": 1.6433447098976108, "grad_norm": 3.1316497325897217, "learning_rate": 0.0004522184300341297, "loss": 6.4113, "step": 4815 }, { "epoch": 1.6436860068259387, "grad_norm": 3.0804522037506104, "learning_rate": 0.00045210466439135383, "loss": 5.9039, "step": 4816 }, { "epoch": 1.644027303754266, "grad_norm": 3.0023155212402344, "learning_rate": 0.00045199089874857793, "loss": 6.1391, "step": 4817 }, { "epoch": 1.644368600682594, "grad_norm": 2.9633994102478027, "learning_rate": 0.00045187713310580204, "loss": 6.148, "step": 4818 }, { "epoch": 1.6447098976109213, "grad_norm": 2.8795106410980225, "learning_rate": 0.00045176336746302614, "loss": 6.4872, "step": 4819 }, { "epoch": 1.6450511945392492, "grad_norm": 6.828871250152588, "learning_rate": 0.0004516496018202503, "loss": 5.7615, "step": 4820 }, { "epoch": 1.6453924914675768, "grad_norm": 3.061558485031128, "learning_rate": 0.0004515358361774744, "loss": 6.7772, "step": 4821 }, { "epoch": 1.6457337883959045, "grad_norm": 3.115145444869995, "learning_rate": 0.0004514220705346985, "loss": 6.3194, "step": 4822 }, { "epoch": 1.646075085324232, "grad_norm": 2.98173451423645, "learning_rate": 0.00045130830489192267, "loss": 6.3607, "step": 4823 }, { "epoch": 1.6464163822525597, "grad_norm": 3.000434637069702, "learning_rate": 0.0004511945392491468, "loss": 6.7843, "step": 4824 }, { "epoch": 1.6467576791808873, "grad_norm": 2.882544755935669, "learning_rate": 0.0004510807736063709, "loss": 6.2495, "step": 4825 }, { "epoch": 1.647098976109215, "grad_norm": 3.010756492614746, "learning_rate": 0.00045096700796359504, "loss": 6.589, "step": 4826 }, { "epoch": 1.6474402730375428, "grad_norm": 2.991612434387207, "learning_rate": 0.00045085324232081915, "loss": 5.9436, "step": 4827 }, { "epoch": 1.6477815699658702, "grad_norm": 3.1836135387420654, "learning_rate": 0.0004507394766780432, "loss": 6.5523, "step": 4828 }, { "epoch": 1.648122866894198, "grad_norm": 3.123823404312134, "learning_rate": 0.00045062571103526736, "loss": 6.2423, "step": 4829 }, { "epoch": 1.6484641638225255, "grad_norm": 3.108682632446289, "learning_rate": 0.00045051194539249146, "loss": 5.803, "step": 4830 }, { "epoch": 1.6488054607508533, "grad_norm": 2.941983938217163, "learning_rate": 0.00045039817974971557, "loss": 6.7919, "step": 4831 }, { "epoch": 1.6491467576791807, "grad_norm": 3.0571041107177734, "learning_rate": 0.0004502844141069397, "loss": 6.3158, "step": 4832 }, { "epoch": 1.6494880546075086, "grad_norm": 3.1755189895629883, "learning_rate": 0.00045017064846416383, "loss": 6.4594, "step": 4833 }, { "epoch": 1.6498293515358362, "grad_norm": 3.0600199699401855, "learning_rate": 0.00045005688282138794, "loss": 5.9204, "step": 4834 }, { "epoch": 1.6501706484641638, "grad_norm": 3.043494939804077, "learning_rate": 0.0004499431171786121, "loss": 6.3821, "step": 4835 }, { "epoch": 1.6505119453924915, "grad_norm": 3.1068077087402344, "learning_rate": 0.0004498293515358362, "loss": 6.4803, "step": 4836 }, { "epoch": 1.650853242320819, "grad_norm": 2.9238088130950928, "learning_rate": 0.0004497155858930603, "loss": 5.7569, "step": 4837 }, { "epoch": 1.6511945392491467, "grad_norm": 3.37758207321167, "learning_rate": 0.0004496018202502844, "loss": 6.4423, "step": 4838 }, { "epoch": 1.6515358361774743, "grad_norm": 2.9938437938690186, "learning_rate": 0.0004494880546075085, "loss": 6.492, "step": 4839 }, { "epoch": 1.6518771331058022, "grad_norm": 2.9494314193725586, "learning_rate": 0.0004493742889647327, "loss": 5.878, "step": 4840 }, { "epoch": 1.6522184300341296, "grad_norm": 3.7142817974090576, "learning_rate": 0.0004492605233219568, "loss": 5.7933, "step": 4841 }, { "epoch": 1.6525597269624575, "grad_norm": 3.0025503635406494, "learning_rate": 0.0004491467576791809, "loss": 6.2064, "step": 4842 }, { "epoch": 1.6529010238907849, "grad_norm": 3.326956272125244, "learning_rate": 0.00044903299203640504, "loss": 5.7267, "step": 4843 }, { "epoch": 1.6532423208191127, "grad_norm": 2.990328311920166, "learning_rate": 0.00044891922639362915, "loss": 5.7901, "step": 4844 }, { "epoch": 1.6535836177474401, "grad_norm": 3.0785820484161377, "learning_rate": 0.00044880546075085325, "loss": 5.9214, "step": 4845 }, { "epoch": 1.653924914675768, "grad_norm": 3.112694025039673, "learning_rate": 0.0004486916951080774, "loss": 6.3741, "step": 4846 }, { "epoch": 1.6542662116040956, "grad_norm": 3.4003303050994873, "learning_rate": 0.0004485779294653015, "loss": 5.736, "step": 4847 }, { "epoch": 1.6546075085324232, "grad_norm": 2.963510274887085, "learning_rate": 0.00044846416382252557, "loss": 6.0387, "step": 4848 }, { "epoch": 1.6549488054607508, "grad_norm": 3.8840463161468506, "learning_rate": 0.0004483503981797497, "loss": 5.371, "step": 4849 }, { "epoch": 1.6552901023890785, "grad_norm": 3.072937488555908, "learning_rate": 0.00044823663253697383, "loss": 6.6929, "step": 4850 }, { "epoch": 1.655631399317406, "grad_norm": 3.033498764038086, "learning_rate": 0.00044812286689419794, "loss": 5.939, "step": 4851 }, { "epoch": 1.6559726962457337, "grad_norm": 3.0038321018218994, "learning_rate": 0.0004480091012514221, "loss": 6.4066, "step": 4852 }, { "epoch": 1.6563139931740616, "grad_norm": 3.0644583702087402, "learning_rate": 0.0004478953356086462, "loss": 6.2456, "step": 4853 }, { "epoch": 1.656655290102389, "grad_norm": 3.0209004878997803, "learning_rate": 0.0004477815699658703, "loss": 6.2575, "step": 4854 }, { "epoch": 1.6569965870307168, "grad_norm": 2.9037437438964844, "learning_rate": 0.00044766780432309446, "loss": 6.1034, "step": 4855 }, { "epoch": 1.6573378839590442, "grad_norm": 2.919391393661499, "learning_rate": 0.00044755403868031857, "loss": 6.4839, "step": 4856 }, { "epoch": 1.657679180887372, "grad_norm": 2.8986079692840576, "learning_rate": 0.0004474402730375426, "loss": 6.5156, "step": 4857 }, { "epoch": 1.6580204778156995, "grad_norm": 3.268554210662842, "learning_rate": 0.0004473265073947668, "loss": 6.1534, "step": 4858 }, { "epoch": 1.6583617747440274, "grad_norm": 3.4434092044830322, "learning_rate": 0.0004472127417519909, "loss": 5.8377, "step": 4859 }, { "epoch": 1.658703071672355, "grad_norm": 2.988135814666748, "learning_rate": 0.00044709897610921504, "loss": 6.4622, "step": 4860 }, { "epoch": 1.6590443686006826, "grad_norm": 3.0992002487182617, "learning_rate": 0.00044698521046643915, "loss": 6.4363, "step": 4861 }, { "epoch": 1.6593856655290102, "grad_norm": 3.1471357345581055, "learning_rate": 0.00044687144482366325, "loss": 6.0289, "step": 4862 }, { "epoch": 1.6597269624573379, "grad_norm": 3.0062060356140137, "learning_rate": 0.0004467576791808874, "loss": 6.4248, "step": 4863 }, { "epoch": 1.6600682593856655, "grad_norm": 3.003093957901001, "learning_rate": 0.0004466439135381115, "loss": 6.4863, "step": 4864 }, { "epoch": 1.6604095563139931, "grad_norm": 3.077218770980835, "learning_rate": 0.0004465301478953356, "loss": 6.3079, "step": 4865 }, { "epoch": 1.660750853242321, "grad_norm": 3.4413108825683594, "learning_rate": 0.0004464163822525598, "loss": 5.9743, "step": 4866 }, { "epoch": 1.6610921501706484, "grad_norm": 3.0126073360443115, "learning_rate": 0.00044630261660978383, "loss": 6.3671, "step": 4867 }, { "epoch": 1.6614334470989762, "grad_norm": 3.0863595008850098, "learning_rate": 0.00044618885096700794, "loss": 6.6083, "step": 4868 }, { "epoch": 1.6617747440273036, "grad_norm": 3.1746888160705566, "learning_rate": 0.0004460750853242321, "loss": 6.5011, "step": 4869 }, { "epoch": 1.6621160409556315, "grad_norm": 3.1492929458618164, "learning_rate": 0.0004459613196814562, "loss": 6.4877, "step": 4870 }, { "epoch": 1.6624573378839589, "grad_norm": 3.422447443008423, "learning_rate": 0.0004458475540386803, "loss": 5.9579, "step": 4871 }, { "epoch": 1.6627986348122867, "grad_norm": 3.057371139526367, "learning_rate": 0.00044573378839590447, "loss": 6.0468, "step": 4872 }, { "epoch": 1.6631399317406144, "grad_norm": 4.449178695678711, "learning_rate": 0.00044562002275312857, "loss": 5.8584, "step": 4873 }, { "epoch": 1.663481228668942, "grad_norm": 3.1592650413513184, "learning_rate": 0.0004455062571103527, "loss": 6.3337, "step": 4874 }, { "epoch": 1.6638225255972696, "grad_norm": 3.210031509399414, "learning_rate": 0.00044539249146757683, "loss": 6.4076, "step": 4875 }, { "epoch": 1.6641638225255972, "grad_norm": 3.0083580017089844, "learning_rate": 0.00044527872582480094, "loss": 6.2168, "step": 4876 }, { "epoch": 1.6645051194539249, "grad_norm": 2.0590007305145264, "learning_rate": 0.000445164960182025, "loss": 3.1449, "step": 4877 }, { "epoch": 1.6648464163822525, "grad_norm": 3.020071506500244, "learning_rate": 0.00044505119453924915, "loss": 6.0866, "step": 4878 }, { "epoch": 1.6651877133105804, "grad_norm": 4.307253360748291, "learning_rate": 0.00044493742889647325, "loss": 6.2752, "step": 4879 }, { "epoch": 1.6655290102389078, "grad_norm": 3.5636332035064697, "learning_rate": 0.00044482366325369736, "loss": 4.8149, "step": 4880 }, { "epoch": 1.6658703071672356, "grad_norm": 3.1198694705963135, "learning_rate": 0.0004447098976109215, "loss": 6.368, "step": 4881 }, { "epoch": 1.666211604095563, "grad_norm": 3.0112080574035645, "learning_rate": 0.0004445961319681456, "loss": 6.3972, "step": 4882 }, { "epoch": 1.6665529010238909, "grad_norm": 3.0651512145996094, "learning_rate": 0.0004444823663253698, "loss": 4.845, "step": 4883 }, { "epoch": 1.6668941979522183, "grad_norm": 3.187875270843506, "learning_rate": 0.0004443686006825939, "loss": 6.1866, "step": 4884 }, { "epoch": 1.6672354948805461, "grad_norm": 3.1536598205566406, "learning_rate": 0.000444254835039818, "loss": 6.3055, "step": 4885 }, { "epoch": 1.6675767918088737, "grad_norm": 3.3544106483459473, "learning_rate": 0.00044414106939704215, "loss": 6.1421, "step": 4886 }, { "epoch": 1.6679180887372014, "grad_norm": 3.0838112831115723, "learning_rate": 0.0004440273037542662, "loss": 6.1572, "step": 4887 }, { "epoch": 1.668259385665529, "grad_norm": 2.9401392936706543, "learning_rate": 0.0004439135381114903, "loss": 6.5586, "step": 4888 }, { "epoch": 1.6686006825938566, "grad_norm": 3.06618595123291, "learning_rate": 0.00044379977246871447, "loss": 6.0765, "step": 4889 }, { "epoch": 1.6689419795221843, "grad_norm": 2.9306881427764893, "learning_rate": 0.00044368600682593857, "loss": 6.2415, "step": 4890 }, { "epoch": 1.6692832764505119, "grad_norm": 3.1481986045837402, "learning_rate": 0.0004435722411831627, "loss": 5.8938, "step": 4891 }, { "epoch": 1.6696245733788397, "grad_norm": 3.0431253910064697, "learning_rate": 0.00044345847554038684, "loss": 6.1017, "step": 4892 }, { "epoch": 1.6699658703071671, "grad_norm": 2.9972074031829834, "learning_rate": 0.00044334470989761094, "loss": 6.3889, "step": 4893 }, { "epoch": 1.670307167235495, "grad_norm": 3.1371490955352783, "learning_rate": 0.00044323094425483504, "loss": 6.6684, "step": 4894 }, { "epoch": 1.6706484641638224, "grad_norm": 3.0418312549591064, "learning_rate": 0.0004431171786120592, "loss": 6.478, "step": 4895 }, { "epoch": 1.6709897610921502, "grad_norm": 3.074183464050293, "learning_rate": 0.00044300341296928325, "loss": 6.388, "step": 4896 }, { "epoch": 1.6713310580204777, "grad_norm": 3.0589749813079834, "learning_rate": 0.00044288964732650736, "loss": 6.2, "step": 4897 }, { "epoch": 1.6716723549488055, "grad_norm": 3.161790609359741, "learning_rate": 0.0004427758816837315, "loss": 5.6493, "step": 4898 }, { "epoch": 1.6720136518771331, "grad_norm": 2.9835970401763916, "learning_rate": 0.0004426621160409556, "loss": 6.1388, "step": 4899 }, { "epoch": 1.6723549488054608, "grad_norm": 3.0649311542510986, "learning_rate": 0.00044254835039817973, "loss": 5.9025, "step": 4900 }, { "epoch": 1.6726962457337884, "grad_norm": 3.1138432025909424, "learning_rate": 0.0004424345847554039, "loss": 6.4551, "step": 4901 }, { "epoch": 1.673037542662116, "grad_norm": 3.0755131244659424, "learning_rate": 0.000442320819112628, "loss": 6.5686, "step": 4902 }, { "epoch": 1.6733788395904436, "grad_norm": 3.0821003913879395, "learning_rate": 0.00044220705346985215, "loss": 6.092, "step": 4903 }, { "epoch": 1.6737201365187713, "grad_norm": 2.937201499938965, "learning_rate": 0.00044209328782707626, "loss": 6.2059, "step": 4904 }, { "epoch": 1.6740614334470991, "grad_norm": 2.9311842918395996, "learning_rate": 0.00044197952218430036, "loss": 6.4499, "step": 4905 }, { "epoch": 1.6744027303754265, "grad_norm": 3.036937713623047, "learning_rate": 0.00044186575654152447, "loss": 6.1801, "step": 4906 }, { "epoch": 1.6747440273037544, "grad_norm": 3.5501644611358643, "learning_rate": 0.00044175199089874857, "loss": 5.8053, "step": 4907 }, { "epoch": 1.6750853242320818, "grad_norm": 3.023075580596924, "learning_rate": 0.0004416382252559727, "loss": 6.1232, "step": 4908 }, { "epoch": 1.6754266211604096, "grad_norm": 3.7344937324523926, "learning_rate": 0.00044152445961319684, "loss": 4.8391, "step": 4909 }, { "epoch": 1.675767918088737, "grad_norm": 4.388406276702881, "learning_rate": 0.00044141069397042094, "loss": 6.2587, "step": 4910 }, { "epoch": 1.676109215017065, "grad_norm": 3.0161640644073486, "learning_rate": 0.00044129692832764505, "loss": 6.2966, "step": 4911 }, { "epoch": 1.6764505119453925, "grad_norm": 3.0810742378234863, "learning_rate": 0.0004411831626848692, "loss": 6.915, "step": 4912 }, { "epoch": 1.6767918088737201, "grad_norm": 3.1341419219970703, "learning_rate": 0.0004410693970420933, "loss": 7.0646, "step": 4913 }, { "epoch": 1.6771331058020478, "grad_norm": 3.8616952896118164, "learning_rate": 0.0004409556313993174, "loss": 5.5981, "step": 4914 }, { "epoch": 1.6774744027303754, "grad_norm": 3.4659619331359863, "learning_rate": 0.0004408418657565416, "loss": 5.83, "step": 4915 }, { "epoch": 1.677815699658703, "grad_norm": 3.06744647026062, "learning_rate": 0.0004407281001137656, "loss": 6.2469, "step": 4916 }, { "epoch": 1.6781569965870307, "grad_norm": 3.057793140411377, "learning_rate": 0.00044061433447098973, "loss": 6.788, "step": 4917 }, { "epoch": 1.6784982935153585, "grad_norm": 2.971437931060791, "learning_rate": 0.0004405005688282139, "loss": 6.2344, "step": 4918 }, { "epoch": 1.678839590443686, "grad_norm": 3.025042772293091, "learning_rate": 0.000440386803185438, "loss": 6.3374, "step": 4919 }, { "epoch": 1.6791808873720138, "grad_norm": 3.0731565952301025, "learning_rate": 0.0004402730375426621, "loss": 5.7545, "step": 4920 }, { "epoch": 1.6795221843003412, "grad_norm": 2.9983479976654053, "learning_rate": 0.00044015927189988626, "loss": 6.1115, "step": 4921 }, { "epoch": 1.679863481228669, "grad_norm": 2.97145414352417, "learning_rate": 0.00044004550625711036, "loss": 6.2915, "step": 4922 }, { "epoch": 1.6802047781569964, "grad_norm": 3.1883833408355713, "learning_rate": 0.0004399317406143345, "loss": 5.9769, "step": 4923 }, { "epoch": 1.6805460750853243, "grad_norm": 2.9268434047698975, "learning_rate": 0.0004398179749715586, "loss": 6.5054, "step": 4924 }, { "epoch": 1.680887372013652, "grad_norm": 3.26137375831604, "learning_rate": 0.0004397042093287827, "loss": 6.3084, "step": 4925 }, { "epoch": 1.6812286689419795, "grad_norm": 3.009206533432007, "learning_rate": 0.00043959044368600684, "loss": 6.0177, "step": 4926 }, { "epoch": 1.6815699658703072, "grad_norm": 3.147836685180664, "learning_rate": 0.00043947667804323094, "loss": 5.1989, "step": 4927 }, { "epoch": 1.6819112627986348, "grad_norm": 3.239095687866211, "learning_rate": 0.00043936291240045505, "loss": 6.1885, "step": 4928 }, { "epoch": 1.6822525597269624, "grad_norm": 3.0361344814300537, "learning_rate": 0.0004392491467576792, "loss": 6.1484, "step": 4929 }, { "epoch": 1.68259385665529, "grad_norm": 2.998265504837036, "learning_rate": 0.0004391353811149033, "loss": 6.7742, "step": 4930 }, { "epoch": 1.682935153583618, "grad_norm": 3.0824317932128906, "learning_rate": 0.0004390216154721274, "loss": 6.5443, "step": 4931 }, { "epoch": 1.6832764505119453, "grad_norm": 5.664287090301514, "learning_rate": 0.0004389078498293516, "loss": 5.2706, "step": 4932 }, { "epoch": 1.6836177474402731, "grad_norm": 2.9945015907287598, "learning_rate": 0.0004387940841865757, "loss": 6.2064, "step": 4933 }, { "epoch": 1.6839590443686006, "grad_norm": 3.081329584121704, "learning_rate": 0.0004386803185437998, "loss": 6.5694, "step": 4934 }, { "epoch": 1.6843003412969284, "grad_norm": 3.1098530292510986, "learning_rate": 0.0004385665529010239, "loss": 5.8361, "step": 4935 }, { "epoch": 1.6846416382252558, "grad_norm": 3.166102170944214, "learning_rate": 0.000438452787258248, "loss": 6.0032, "step": 4936 }, { "epoch": 1.6849829351535837, "grad_norm": 3.0542194843292236, "learning_rate": 0.0004383390216154721, "loss": 5.7641, "step": 4937 }, { "epoch": 1.6853242320819113, "grad_norm": 2.8499386310577393, "learning_rate": 0.00043822525597269626, "loss": 6.0653, "step": 4938 }, { "epoch": 1.685665529010239, "grad_norm": 3.0446012020111084, "learning_rate": 0.00043811149032992036, "loss": 6.3366, "step": 4939 }, { "epoch": 1.6860068259385665, "grad_norm": 3.0523667335510254, "learning_rate": 0.00043799772468714447, "loss": 6.0754, "step": 4940 }, { "epoch": 1.6863481228668942, "grad_norm": 3.245394229888916, "learning_rate": 0.00043788395904436863, "loss": 6.1414, "step": 4941 }, { "epoch": 1.6866894197952218, "grad_norm": 3.0902605056762695, "learning_rate": 0.00043777019340159273, "loss": 6.5985, "step": 4942 }, { "epoch": 1.6870307167235494, "grad_norm": 2.9489684104919434, "learning_rate": 0.0004376564277588169, "loss": 6.3939, "step": 4943 }, { "epoch": 1.6873720136518773, "grad_norm": 3.0497970581054688, "learning_rate": 0.000437542662116041, "loss": 6.8189, "step": 4944 }, { "epoch": 1.6877133105802047, "grad_norm": 3.0870232582092285, "learning_rate": 0.00043742889647326505, "loss": 5.96, "step": 4945 }, { "epoch": 1.6880546075085325, "grad_norm": 3.0094494819641113, "learning_rate": 0.0004373151308304892, "loss": 6.2432, "step": 4946 }, { "epoch": 1.68839590443686, "grad_norm": 3.130962371826172, "learning_rate": 0.0004372013651877133, "loss": 6.3456, "step": 4947 }, { "epoch": 1.6887372013651878, "grad_norm": 2.9619531631469727, "learning_rate": 0.0004370875995449374, "loss": 6.666, "step": 4948 }, { "epoch": 1.6890784982935152, "grad_norm": 3.149620532989502, "learning_rate": 0.0004369738339021616, "loss": 6.0721, "step": 4949 }, { "epoch": 1.689419795221843, "grad_norm": 2.937761068344116, "learning_rate": 0.0004368600682593857, "loss": 6.7564, "step": 4950 }, { "epoch": 1.6897610921501707, "grad_norm": 3.5034937858581543, "learning_rate": 0.0004367463026166098, "loss": 5.4092, "step": 4951 }, { "epoch": 1.6901023890784983, "grad_norm": 3.117332935333252, "learning_rate": 0.00043663253697383394, "loss": 6.3505, "step": 4952 }, { "epoch": 1.690443686006826, "grad_norm": 5.035308361053467, "learning_rate": 0.00043651877133105805, "loss": 5.6475, "step": 4953 }, { "epoch": 1.6907849829351536, "grad_norm": 3.0669705867767334, "learning_rate": 0.00043640500568828215, "loss": 5.8391, "step": 4954 }, { "epoch": 1.6911262798634812, "grad_norm": 3.2862114906311035, "learning_rate": 0.00043629124004550626, "loss": 5.9049, "step": 4955 }, { "epoch": 1.6914675767918088, "grad_norm": 3.088000774383545, "learning_rate": 0.00043617747440273036, "loss": 6.5768, "step": 4956 }, { "epoch": 1.6918088737201367, "grad_norm": 3.001810073852539, "learning_rate": 0.00043606370875995447, "loss": 6.2097, "step": 4957 }, { "epoch": 1.692150170648464, "grad_norm": 3.081796407699585, "learning_rate": 0.00043594994311717863, "loss": 6.8779, "step": 4958 }, { "epoch": 1.692491467576792, "grad_norm": 3.0735950469970703, "learning_rate": 0.00043583617747440273, "loss": 5.9983, "step": 4959 }, { "epoch": 1.6928327645051193, "grad_norm": 3.0525989532470703, "learning_rate": 0.00043572241183162684, "loss": 6.4789, "step": 4960 }, { "epoch": 1.6931740614334472, "grad_norm": 2.9428293704986572, "learning_rate": 0.000435608646188851, "loss": 6.3712, "step": 4961 }, { "epoch": 1.6935153583617746, "grad_norm": 3.002599000930786, "learning_rate": 0.0004354948805460751, "loss": 6.2377, "step": 4962 }, { "epoch": 1.6938566552901024, "grad_norm": 3.1271250247955322, "learning_rate": 0.00043538111490329926, "loss": 6.357, "step": 4963 }, { "epoch": 1.69419795221843, "grad_norm": 2.927199125289917, "learning_rate": 0.0004352673492605233, "loss": 6.3556, "step": 4964 }, { "epoch": 1.6945392491467577, "grad_norm": 3.364306926727295, "learning_rate": 0.0004351535836177474, "loss": 6.1131, "step": 4965 }, { "epoch": 1.6948805460750853, "grad_norm": 3.0801026821136475, "learning_rate": 0.0004350398179749716, "loss": 5.7936, "step": 4966 }, { "epoch": 1.695221843003413, "grad_norm": 4.118381500244141, "learning_rate": 0.0004349260523321957, "loss": 4.5499, "step": 4967 }, { "epoch": 1.6955631399317406, "grad_norm": 3.0644776821136475, "learning_rate": 0.0004348122866894198, "loss": 6.8683, "step": 4968 }, { "epoch": 1.6959044368600682, "grad_norm": 3.154467821121216, "learning_rate": 0.00043469852104664395, "loss": 6.1359, "step": 4969 }, { "epoch": 1.696245733788396, "grad_norm": 2.9244272708892822, "learning_rate": 0.00043458475540386805, "loss": 5.8604, "step": 4970 }, { "epoch": 1.6965870307167235, "grad_norm": 3.170546054840088, "learning_rate": 0.00043447098976109216, "loss": 6.1517, "step": 4971 }, { "epoch": 1.6969283276450513, "grad_norm": 3.1615684032440186, "learning_rate": 0.0004343572241183163, "loss": 6.2409, "step": 4972 }, { "epoch": 1.6972696245733787, "grad_norm": 2.9471771717071533, "learning_rate": 0.0004342434584755404, "loss": 5.7621, "step": 4973 }, { "epoch": 1.6976109215017066, "grad_norm": 3.0422165393829346, "learning_rate": 0.00043412969283276447, "loss": 6.4401, "step": 4974 }, { "epoch": 1.697952218430034, "grad_norm": 3.3858096599578857, "learning_rate": 0.00043401592718998863, "loss": 5.916, "step": 4975 }, { "epoch": 1.6982935153583618, "grad_norm": 3.054570198059082, "learning_rate": 0.00043390216154721273, "loss": 6.6209, "step": 4976 }, { "epoch": 1.6986348122866894, "grad_norm": 3.7693064212799072, "learning_rate": 0.00043378839590443684, "loss": 5.5803, "step": 4977 }, { "epoch": 1.698976109215017, "grad_norm": 3.033956289291382, "learning_rate": 0.000433674630261661, "loss": 6.0595, "step": 4978 }, { "epoch": 1.6993174061433447, "grad_norm": 6.4478912353515625, "learning_rate": 0.0004335608646188851, "loss": 5.3277, "step": 4979 }, { "epoch": 1.6996587030716723, "grad_norm": 3.0002944469451904, "learning_rate": 0.0004334470989761092, "loss": 6.3766, "step": 4980 }, { "epoch": 1.7, "grad_norm": 3.1205992698669434, "learning_rate": 0.00043333333333333337, "loss": 6.3214, "step": 4981 }, { "epoch": 1.7003412969283276, "grad_norm": 3.3028411865234375, "learning_rate": 0.00043321956769055747, "loss": 6.2618, "step": 4982 }, { "epoch": 1.7006825938566554, "grad_norm": 3.0650041103363037, "learning_rate": 0.00043310580204778163, "loss": 6.48, "step": 4983 }, { "epoch": 1.7010238907849828, "grad_norm": 2.9815592765808105, "learning_rate": 0.0004329920364050057, "loss": 5.3141, "step": 4984 }, { "epoch": 1.7013651877133107, "grad_norm": 3.0242958068847656, "learning_rate": 0.0004328782707622298, "loss": 6.1838, "step": 4985 }, { "epoch": 1.701706484641638, "grad_norm": 4.361161708831787, "learning_rate": 0.00043276450511945395, "loss": 4.7251, "step": 4986 }, { "epoch": 1.702047781569966, "grad_norm": 2.9456946849823, "learning_rate": 0.00043265073947667805, "loss": 6.3513, "step": 4987 }, { "epoch": 1.7023890784982934, "grad_norm": 3.1762263774871826, "learning_rate": 0.00043253697383390216, "loss": 6.6752, "step": 4988 }, { "epoch": 1.7027303754266212, "grad_norm": 3.128908634185791, "learning_rate": 0.0004324232081911263, "loss": 6.6874, "step": 4989 }, { "epoch": 1.7030716723549488, "grad_norm": 2.998319625854492, "learning_rate": 0.0004323094425483504, "loss": 5.9913, "step": 4990 }, { "epoch": 1.7034129692832765, "grad_norm": 3.161729097366333, "learning_rate": 0.0004321956769055745, "loss": 6.0972, "step": 4991 }, { "epoch": 1.703754266211604, "grad_norm": 3.145906686782837, "learning_rate": 0.0004320819112627987, "loss": 6.0995, "step": 4992 }, { "epoch": 1.7040955631399317, "grad_norm": 2.9762144088745117, "learning_rate": 0.00043196814562002274, "loss": 6.3909, "step": 4993 }, { "epoch": 1.7044368600682593, "grad_norm": 3.2298426628112793, "learning_rate": 0.00043185437997724684, "loss": 5.4184, "step": 4994 }, { "epoch": 1.704778156996587, "grad_norm": 3.0982184410095215, "learning_rate": 0.000431740614334471, "loss": 5.8238, "step": 4995 }, { "epoch": 1.7051194539249148, "grad_norm": 2.9920384883880615, "learning_rate": 0.0004316268486916951, "loss": 5.8196, "step": 4996 }, { "epoch": 1.7054607508532422, "grad_norm": 4.23321533203125, "learning_rate": 0.0004315130830489192, "loss": 5.5014, "step": 4997 }, { "epoch": 1.70580204778157, "grad_norm": 3.3732895851135254, "learning_rate": 0.00043139931740614337, "loss": 5.9258, "step": 4998 }, { "epoch": 1.7061433447098975, "grad_norm": 3.0920844078063965, "learning_rate": 0.0004312855517633675, "loss": 6.3851, "step": 4999 }, { "epoch": 1.7064846416382253, "grad_norm": 3.1090736389160156, "learning_rate": 0.0004311717861205916, "loss": 6.5109, "step": 5000 }, { "epoch": 1.7068259385665527, "grad_norm": 2.9566802978515625, "learning_rate": 0.00043105802047781574, "loss": 6.5596, "step": 5001 }, { "epoch": 1.7071672354948806, "grad_norm": 2.936339855194092, "learning_rate": 0.00043094425483503984, "loss": 6.7089, "step": 5002 }, { "epoch": 1.7075085324232082, "grad_norm": 3.0246903896331787, "learning_rate": 0.0004308304891922639, "loss": 6.5555, "step": 5003 }, { "epoch": 1.7078498293515358, "grad_norm": 3.0471384525299072, "learning_rate": 0.00043071672354948805, "loss": 6.3993, "step": 5004 }, { "epoch": 1.7081911262798635, "grad_norm": 3.12800931930542, "learning_rate": 0.00043060295790671216, "loss": 6.0564, "step": 5005 }, { "epoch": 1.708532423208191, "grad_norm": 6.632503032684326, "learning_rate": 0.0004304891922639363, "loss": 5.0925, "step": 5006 }, { "epoch": 1.7088737201365187, "grad_norm": 3.128831624984741, "learning_rate": 0.0004303754266211604, "loss": 5.274, "step": 5007 }, { "epoch": 1.7092150170648464, "grad_norm": 3.365473508834839, "learning_rate": 0.0004302616609783845, "loss": 6.5734, "step": 5008 }, { "epoch": 1.7095563139931742, "grad_norm": 3.8749191761016846, "learning_rate": 0.0004301478953356087, "loss": 6.8053, "step": 5009 }, { "epoch": 1.7098976109215016, "grad_norm": 3.1307623386383057, "learning_rate": 0.0004300341296928328, "loss": 6.4477, "step": 5010 }, { "epoch": 1.7102389078498295, "grad_norm": 2.960883140563965, "learning_rate": 0.0004299203640500569, "loss": 6.4975, "step": 5011 }, { "epoch": 1.7105802047781569, "grad_norm": 2.9841647148132324, "learning_rate": 0.00042980659840728105, "loss": 6.5142, "step": 5012 }, { "epoch": 1.7109215017064847, "grad_norm": 3.0024847984313965, "learning_rate": 0.0004296928327645051, "loss": 6.8747, "step": 5013 }, { "epoch": 1.7112627986348121, "grad_norm": 3.0802016258239746, "learning_rate": 0.0004295790671217292, "loss": 5.6291, "step": 5014 }, { "epoch": 1.71160409556314, "grad_norm": 3.068802833557129, "learning_rate": 0.00042946530147895337, "loss": 5.808, "step": 5015 }, { "epoch": 1.7119453924914676, "grad_norm": 2.9802236557006836, "learning_rate": 0.0004293515358361775, "loss": 6.4699, "step": 5016 }, { "epoch": 1.7122866894197952, "grad_norm": 2.9703474044799805, "learning_rate": 0.0004292377701934016, "loss": 5.8941, "step": 5017 }, { "epoch": 1.7126279863481229, "grad_norm": 2.9278199672698975, "learning_rate": 0.00042912400455062574, "loss": 6.2988, "step": 5018 }, { "epoch": 1.7129692832764505, "grad_norm": 3.066082239151001, "learning_rate": 0.00042901023890784984, "loss": 6.5867, "step": 5019 }, { "epoch": 1.713310580204778, "grad_norm": 3.058166980743408, "learning_rate": 0.00042889647326507395, "loss": 6.0648, "step": 5020 }, { "epoch": 1.7136518771331057, "grad_norm": 3.1347908973693848, "learning_rate": 0.0004287827076222981, "loss": 5.8548, "step": 5021 }, { "epoch": 1.7139931740614336, "grad_norm": 3.1643781661987305, "learning_rate": 0.0004286689419795222, "loss": 5.9265, "step": 5022 }, { "epoch": 1.714334470989761, "grad_norm": 3.030947685241699, "learning_rate": 0.00042855517633674626, "loss": 5.989, "step": 5023 }, { "epoch": 1.7146757679180888, "grad_norm": 3.111829996109009, "learning_rate": 0.0004284414106939704, "loss": 6.5268, "step": 5024 }, { "epoch": 1.7150170648464163, "grad_norm": 2.8761911392211914, "learning_rate": 0.00042832764505119453, "loss": 6.5559, "step": 5025 }, { "epoch": 1.715358361774744, "grad_norm": 5.675846099853516, "learning_rate": 0.0004282138794084187, "loss": 5.3305, "step": 5026 }, { "epoch": 1.7156996587030715, "grad_norm": 3.131277322769165, "learning_rate": 0.0004281001137656428, "loss": 6.8141, "step": 5027 }, { "epoch": 1.7160409556313994, "grad_norm": 3.332944631576538, "learning_rate": 0.0004279863481228669, "loss": 6.1245, "step": 5028 }, { "epoch": 1.716382252559727, "grad_norm": 3.1765148639678955, "learning_rate": 0.00042787258248009106, "loss": 5.8214, "step": 5029 }, { "epoch": 1.7167235494880546, "grad_norm": 3.1236507892608643, "learning_rate": 0.00042775881683731516, "loss": 5.903, "step": 5030 }, { "epoch": 1.7170648464163822, "grad_norm": 3.129188060760498, "learning_rate": 0.00042764505119453927, "loss": 6.1272, "step": 5031 }, { "epoch": 1.7174061433447099, "grad_norm": 3.017763614654541, "learning_rate": 0.00042753128555176337, "loss": 6.2087, "step": 5032 }, { "epoch": 1.7177474402730375, "grad_norm": 3.383964776992798, "learning_rate": 0.0004274175199089875, "loss": 5.143, "step": 5033 }, { "epoch": 1.7180887372013651, "grad_norm": 3.011268377304077, "learning_rate": 0.0004273037542662116, "loss": 6.7169, "step": 5034 }, { "epoch": 1.718430034129693, "grad_norm": 3.053954601287842, "learning_rate": 0.00042718998862343574, "loss": 5.9654, "step": 5035 }, { "epoch": 1.7187713310580204, "grad_norm": 3.0908567905426025, "learning_rate": 0.00042707622298065984, "loss": 6.1534, "step": 5036 }, { "epoch": 1.7191126279863482, "grad_norm": 3.233099937438965, "learning_rate": 0.00042696245733788395, "loss": 5.985, "step": 5037 }, { "epoch": 1.7194539249146756, "grad_norm": 2.9370503425598145, "learning_rate": 0.0004268486916951081, "loss": 6.4747, "step": 5038 }, { "epoch": 1.7197952218430035, "grad_norm": 3.1536028385162354, "learning_rate": 0.0004267349260523322, "loss": 5.9485, "step": 5039 }, { "epoch": 1.7201365187713311, "grad_norm": 3.166968584060669, "learning_rate": 0.0004266211604095563, "loss": 6.7526, "step": 5040 }, { "epoch": 1.7204778156996587, "grad_norm": 3.0940346717834473, "learning_rate": 0.0004265073947667805, "loss": 6.4837, "step": 5041 }, { "epoch": 1.7208191126279864, "grad_norm": 2.989959955215454, "learning_rate": 0.00042639362912400453, "loss": 6.5663, "step": 5042 }, { "epoch": 1.721160409556314, "grad_norm": 2.976271152496338, "learning_rate": 0.00042627986348122863, "loss": 6.0443, "step": 5043 }, { "epoch": 1.7215017064846416, "grad_norm": 3.0593347549438477, "learning_rate": 0.0004261660978384528, "loss": 5.6778, "step": 5044 }, { "epoch": 1.7218430034129693, "grad_norm": 3.020432710647583, "learning_rate": 0.0004260523321956769, "loss": 6.2861, "step": 5045 }, { "epoch": 1.7221843003412969, "grad_norm": 3.6655890941619873, "learning_rate": 0.00042593856655290106, "loss": 5.8077, "step": 5046 }, { "epoch": 1.7225255972696245, "grad_norm": 2.9208641052246094, "learning_rate": 0.00042582480091012516, "loss": 6.405, "step": 5047 }, { "epoch": 1.7228668941979524, "grad_norm": 2.9895660877227783, "learning_rate": 0.00042571103526734927, "loss": 6.1637, "step": 5048 }, { "epoch": 1.7232081911262798, "grad_norm": 3.0086591243743896, "learning_rate": 0.0004255972696245734, "loss": 6.4718, "step": 5049 }, { "epoch": 1.7235494880546076, "grad_norm": 3.0350987911224365, "learning_rate": 0.00042548350398179753, "loss": 6.4154, "step": 5050 }, { "epoch": 1.723890784982935, "grad_norm": 3.0023648738861084, "learning_rate": 0.00042536973833902164, "loss": 6.7531, "step": 5051 }, { "epoch": 1.7242320819112629, "grad_norm": 2.9815728664398193, "learning_rate": 0.00042525597269624574, "loss": 5.6956, "step": 5052 }, { "epoch": 1.7245733788395905, "grad_norm": 3.007316827774048, "learning_rate": 0.00042514220705346985, "loss": 6.5796, "step": 5053 }, { "epoch": 1.7249146757679181, "grad_norm": 3.032801866531372, "learning_rate": 0.00042502844141069395, "loss": 6.1046, "step": 5054 }, { "epoch": 1.7252559726962458, "grad_norm": 2.990694284439087, "learning_rate": 0.0004249146757679181, "loss": 6.188, "step": 5055 }, { "epoch": 1.7255972696245734, "grad_norm": 3.120084285736084, "learning_rate": 0.0004248009101251422, "loss": 6.7109, "step": 5056 }, { "epoch": 1.725938566552901, "grad_norm": 3.544955253601074, "learning_rate": 0.0004246871444823663, "loss": 5.6435, "step": 5057 }, { "epoch": 1.7262798634812286, "grad_norm": 2.977841377258301, "learning_rate": 0.0004245733788395905, "loss": 5.9886, "step": 5058 }, { "epoch": 1.7266211604095563, "grad_norm": 3.0229032039642334, "learning_rate": 0.0004244596131968146, "loss": 6.4989, "step": 5059 }, { "epoch": 1.726962457337884, "grad_norm": 2.9560835361480713, "learning_rate": 0.0004243458475540387, "loss": 6.0739, "step": 5060 }, { "epoch": 1.7273037542662117, "grad_norm": 2.8804562091827393, "learning_rate": 0.00042423208191126285, "loss": 6.0115, "step": 5061 }, { "epoch": 1.7276450511945391, "grad_norm": 2.9489169120788574, "learning_rate": 0.0004241183162684869, "loss": 6.7899, "step": 5062 }, { "epoch": 1.727986348122867, "grad_norm": 3.0079617500305176, "learning_rate": 0.000424004550625711, "loss": 6.6014, "step": 5063 }, { "epoch": 1.7283276450511944, "grad_norm": 3.1168062686920166, "learning_rate": 0.00042389078498293516, "loss": 6.5893, "step": 5064 }, { "epoch": 1.7286689419795223, "grad_norm": 2.953608512878418, "learning_rate": 0.00042377701934015927, "loss": 5.8904, "step": 5065 }, { "epoch": 1.7290102389078499, "grad_norm": 3.050062417984009, "learning_rate": 0.0004236632536973834, "loss": 6.3134, "step": 5066 }, { "epoch": 1.7293515358361775, "grad_norm": 2.984161138534546, "learning_rate": 0.00042354948805460753, "loss": 6.3411, "step": 5067 }, { "epoch": 1.7296928327645051, "grad_norm": 3.0105361938476562, "learning_rate": 0.00042343572241183164, "loss": 6.1606, "step": 5068 }, { "epoch": 1.7300341296928328, "grad_norm": 2.9933488368988037, "learning_rate": 0.0004233219567690558, "loss": 6.2474, "step": 5069 }, { "epoch": 1.7303754266211604, "grad_norm": 3.0552947521209717, "learning_rate": 0.0004232081911262799, "loss": 5.894, "step": 5070 }, { "epoch": 1.730716723549488, "grad_norm": 3.3107733726501465, "learning_rate": 0.00042309442548350395, "loss": 6.2136, "step": 5071 }, { "epoch": 1.7310580204778157, "grad_norm": 3.0709095001220703, "learning_rate": 0.0004229806598407281, "loss": 5.9675, "step": 5072 }, { "epoch": 1.7313993174061433, "grad_norm": 3.0665359497070312, "learning_rate": 0.0004228668941979522, "loss": 6.2627, "step": 5073 }, { "epoch": 1.7317406143344711, "grad_norm": 3.0029146671295166, "learning_rate": 0.0004227531285551763, "loss": 6.1561, "step": 5074 }, { "epoch": 1.7320819112627985, "grad_norm": 3.0567500591278076, "learning_rate": 0.0004226393629124005, "loss": 6.0348, "step": 5075 }, { "epoch": 1.7324232081911264, "grad_norm": 3.08443546295166, "learning_rate": 0.0004225255972696246, "loss": 5.8998, "step": 5076 }, { "epoch": 1.7327645051194538, "grad_norm": 2.951045274734497, "learning_rate": 0.0004224118316268487, "loss": 6.1719, "step": 5077 }, { "epoch": 1.7331058020477816, "grad_norm": 3.0656325817108154, "learning_rate": 0.00042229806598407285, "loss": 6.3616, "step": 5078 }, { "epoch": 1.7334470989761093, "grad_norm": 2.9396920204162598, "learning_rate": 0.00042218430034129695, "loss": 6.2838, "step": 5079 }, { "epoch": 1.733788395904437, "grad_norm": 2.96988844871521, "learning_rate": 0.00042207053469852106, "loss": 6.6509, "step": 5080 }, { "epoch": 1.7341296928327645, "grad_norm": 2.8381896018981934, "learning_rate": 0.00042195676905574516, "loss": 6.1965, "step": 5081 }, { "epoch": 1.7344709897610922, "grad_norm": 6.883829116821289, "learning_rate": 0.00042184300341296927, "loss": 5.7458, "step": 5082 }, { "epoch": 1.7348122866894198, "grad_norm": 3.1585566997528076, "learning_rate": 0.0004217292377701934, "loss": 6.4391, "step": 5083 }, { "epoch": 1.7351535836177474, "grad_norm": 3.090667486190796, "learning_rate": 0.00042161547212741753, "loss": 6.7133, "step": 5084 }, { "epoch": 1.735494880546075, "grad_norm": 3.033228874206543, "learning_rate": 0.00042150170648464164, "loss": 6.2394, "step": 5085 }, { "epoch": 1.7358361774744027, "grad_norm": 4.285531520843506, "learning_rate": 0.00042138794084186574, "loss": 4.8546, "step": 5086 }, { "epoch": 1.7361774744027305, "grad_norm": 3.2763266563415527, "learning_rate": 0.0004212741751990899, "loss": 6.2258, "step": 5087 }, { "epoch": 1.736518771331058, "grad_norm": 3.614485025405884, "learning_rate": 0.000421160409556314, "loss": 5.6639, "step": 5088 }, { "epoch": 1.7368600682593858, "grad_norm": 3.088728666305542, "learning_rate": 0.00042104664391353817, "loss": 6.7367, "step": 5089 }, { "epoch": 1.7372013651877132, "grad_norm": 3.1327497959136963, "learning_rate": 0.00042093287827076227, "loss": 5.9986, "step": 5090 }, { "epoch": 1.737542662116041, "grad_norm": 2.9615418910980225, "learning_rate": 0.0004208191126279863, "loss": 6.1185, "step": 5091 }, { "epoch": 1.7378839590443687, "grad_norm": 3.1406466960906982, "learning_rate": 0.0004207053469852105, "loss": 6.3329, "step": 5092 }, { "epoch": 1.7382252559726963, "grad_norm": 2.9437146186828613, "learning_rate": 0.0004205915813424346, "loss": 6.3977, "step": 5093 }, { "epoch": 1.738566552901024, "grad_norm": 2.894519090652466, "learning_rate": 0.0004204778156996587, "loss": 6.7851, "step": 5094 }, { "epoch": 1.7389078498293515, "grad_norm": 3.03589129447937, "learning_rate": 0.00042036405005688285, "loss": 5.6625, "step": 5095 }, { "epoch": 1.7392491467576792, "grad_norm": 3.0257980823516846, "learning_rate": 0.00042025028441410695, "loss": 5.6283, "step": 5096 }, { "epoch": 1.7395904436860068, "grad_norm": 2.9698126316070557, "learning_rate": 0.00042013651877133106, "loss": 5.9327, "step": 5097 }, { "epoch": 1.7399317406143344, "grad_norm": 3.094011068344116, "learning_rate": 0.0004200227531285552, "loss": 6.1025, "step": 5098 }, { "epoch": 1.740273037542662, "grad_norm": 2.933558702468872, "learning_rate": 0.0004199089874857793, "loss": 6.3226, "step": 5099 }, { "epoch": 1.74061433447099, "grad_norm": 3.014883518218994, "learning_rate": 0.0004197952218430034, "loss": 6.1689, "step": 5100 }, { "epoch": 1.7409556313993173, "grad_norm": 2.9969851970672607, "learning_rate": 0.00041968145620022753, "loss": 6.4516, "step": 5101 }, { "epoch": 1.7412969283276452, "grad_norm": 2.958338737487793, "learning_rate": 0.00041956769055745164, "loss": 6.3338, "step": 5102 }, { "epoch": 1.7416382252559726, "grad_norm": 2.871307373046875, "learning_rate": 0.00041945392491467574, "loss": 6.3378, "step": 5103 }, { "epoch": 1.7419795221843004, "grad_norm": 3.485704183578491, "learning_rate": 0.0004193401592718999, "loss": 5.556, "step": 5104 }, { "epoch": 1.742320819112628, "grad_norm": 3.0032105445861816, "learning_rate": 0.000419226393629124, "loss": 5.7468, "step": 5105 }, { "epoch": 1.7426621160409557, "grad_norm": 3.2163028717041016, "learning_rate": 0.0004191126279863481, "loss": 6.1866, "step": 5106 }, { "epoch": 1.7430034129692833, "grad_norm": 3.722259044647217, "learning_rate": 0.00041899886234357227, "loss": 5.2801, "step": 5107 }, { "epoch": 1.743344709897611, "grad_norm": 3.0470032691955566, "learning_rate": 0.0004188850967007964, "loss": 6.1884, "step": 5108 }, { "epoch": 1.7436860068259386, "grad_norm": 3.0684938430786133, "learning_rate": 0.00041877133105802054, "loss": 6.9454, "step": 5109 }, { "epoch": 1.7440273037542662, "grad_norm": 2.902766227722168, "learning_rate": 0.0004186575654152446, "loss": 5.6917, "step": 5110 }, { "epoch": 1.7443686006825938, "grad_norm": 2.978318929672241, "learning_rate": 0.0004185437997724687, "loss": 5.6608, "step": 5111 }, { "epoch": 1.7447098976109214, "grad_norm": 2.9816012382507324, "learning_rate": 0.00041843003412969285, "loss": 6.0245, "step": 5112 }, { "epoch": 1.7450511945392493, "grad_norm": 3.0680909156799316, "learning_rate": 0.00041831626848691695, "loss": 5.9496, "step": 5113 }, { "epoch": 1.7453924914675767, "grad_norm": 2.8896589279174805, "learning_rate": 0.00041820250284414106, "loss": 6.1912, "step": 5114 }, { "epoch": 1.7457337883959045, "grad_norm": 2.889788866043091, "learning_rate": 0.0004180887372013652, "loss": 6.7443, "step": 5115 }, { "epoch": 1.746075085324232, "grad_norm": 3.0151519775390625, "learning_rate": 0.0004179749715585893, "loss": 6.5864, "step": 5116 }, { "epoch": 1.7464163822525598, "grad_norm": 2.864457368850708, "learning_rate": 0.00041786120591581343, "loss": 6.5104, "step": 5117 }, { "epoch": 1.7467576791808874, "grad_norm": 3.0471596717834473, "learning_rate": 0.0004177474402730376, "loss": 6.4869, "step": 5118 }, { "epoch": 1.747098976109215, "grad_norm": 3.1271932125091553, "learning_rate": 0.0004176336746302617, "loss": 6.1027, "step": 5119 }, { "epoch": 1.7474402730375427, "grad_norm": 3.2092442512512207, "learning_rate": 0.00041751990898748574, "loss": 6.081, "step": 5120 }, { "epoch": 1.7477815699658703, "grad_norm": 3.0421433448791504, "learning_rate": 0.0004174061433447099, "loss": 6.3332, "step": 5121 }, { "epoch": 1.748122866894198, "grad_norm": 2.9517619609832764, "learning_rate": 0.000417292377701934, "loss": 6.3718, "step": 5122 }, { "epoch": 1.7484641638225256, "grad_norm": 2.967085838317871, "learning_rate": 0.0004171786120591581, "loss": 6.2602, "step": 5123 }, { "epoch": 1.7488054607508532, "grad_norm": 5.240909576416016, "learning_rate": 0.00041706484641638227, "loss": 5.1849, "step": 5124 }, { "epoch": 1.7491467576791808, "grad_norm": 3.141845464706421, "learning_rate": 0.0004169510807736064, "loss": 5.9711, "step": 5125 }, { "epoch": 1.7494880546075087, "grad_norm": 3.0010454654693604, "learning_rate": 0.0004168373151308305, "loss": 6.1771, "step": 5126 }, { "epoch": 1.749829351535836, "grad_norm": 2.5118589401245117, "learning_rate": 0.00041672354948805464, "loss": 3.2816, "step": 5127 }, { "epoch": 1.750170648464164, "grad_norm": 3.0626370906829834, "learning_rate": 0.00041660978384527875, "loss": 6.3235, "step": 5128 }, { "epoch": 1.7505119453924913, "grad_norm": 3.541957378387451, "learning_rate": 0.0004164960182025029, "loss": 5.6468, "step": 5129 }, { "epoch": 1.7508532423208192, "grad_norm": 3.089536428451538, "learning_rate": 0.00041638225255972696, "loss": 6.5741, "step": 5130 }, { "epoch": 1.7511945392491468, "grad_norm": 3.0071637630462646, "learning_rate": 0.00041626848691695106, "loss": 6.1721, "step": 5131 }, { "epoch": 1.7515358361774744, "grad_norm": 3.3689115047454834, "learning_rate": 0.0004161547212741752, "loss": 6.32, "step": 5132 }, { "epoch": 1.751877133105802, "grad_norm": 3.6956567764282227, "learning_rate": 0.0004160409556313993, "loss": 5.7046, "step": 5133 }, { "epoch": 1.7522184300341297, "grad_norm": 4.705052375793457, "learning_rate": 0.00041592718998862343, "loss": 5.2505, "step": 5134 }, { "epoch": 1.7525597269624573, "grad_norm": 2.986630916595459, "learning_rate": 0.0004158134243458476, "loss": 6.1452, "step": 5135 }, { "epoch": 1.752901023890785, "grad_norm": 3.066112995147705, "learning_rate": 0.0004156996587030717, "loss": 6.1217, "step": 5136 }, { "epoch": 1.7532423208191126, "grad_norm": 3.2455801963806152, "learning_rate": 0.0004155858930602958, "loss": 6.1903, "step": 5137 }, { "epoch": 1.7535836177474402, "grad_norm": 4.036913871765137, "learning_rate": 0.00041547212741751996, "loss": 5.0287, "step": 5138 }, { "epoch": 1.753924914675768, "grad_norm": 2.882148265838623, "learning_rate": 0.000415358361774744, "loss": 6.9066, "step": 5139 }, { "epoch": 1.7542662116040955, "grad_norm": 3.251460552215576, "learning_rate": 0.0004152445961319681, "loss": 5.8373, "step": 5140 }, { "epoch": 1.7546075085324233, "grad_norm": 3.242882013320923, "learning_rate": 0.00041513083048919227, "loss": 5.5887, "step": 5141 }, { "epoch": 1.7549488054607507, "grad_norm": 2.918837070465088, "learning_rate": 0.0004150170648464164, "loss": 6.3827, "step": 5142 }, { "epoch": 1.7552901023890786, "grad_norm": 5.232894420623779, "learning_rate": 0.0004149032992036405, "loss": 4.7531, "step": 5143 }, { "epoch": 1.7556313993174062, "grad_norm": 2.9766147136688232, "learning_rate": 0.00041478953356086464, "loss": 6.5025, "step": 5144 }, { "epoch": 1.7559726962457338, "grad_norm": 3.3592119216918945, "learning_rate": 0.00041467576791808875, "loss": 5.1757, "step": 5145 }, { "epoch": 1.7563139931740614, "grad_norm": 3.114332437515259, "learning_rate": 0.00041456200227531285, "loss": 5.9303, "step": 5146 }, { "epoch": 1.756655290102389, "grad_norm": 3.0244557857513428, "learning_rate": 0.000414448236632537, "loss": 5.9333, "step": 5147 }, { "epoch": 1.7569965870307167, "grad_norm": 3.0684051513671875, "learning_rate": 0.0004143344709897611, "loss": 5.7864, "step": 5148 }, { "epoch": 1.7573378839590443, "grad_norm": 2.931272029876709, "learning_rate": 0.0004142207053469852, "loss": 6.2102, "step": 5149 }, { "epoch": 1.757679180887372, "grad_norm": 3.0313282012939453, "learning_rate": 0.0004141069397042093, "loss": 6.2017, "step": 5150 }, { "epoch": 1.7580204778156996, "grad_norm": 3.021868944168091, "learning_rate": 0.00041399317406143343, "loss": 6.8173, "step": 5151 }, { "epoch": 1.7583617747440274, "grad_norm": 2.8583309650421143, "learning_rate": 0.0004138794084186576, "loss": 6.4639, "step": 5152 }, { "epoch": 1.7587030716723548, "grad_norm": 5.182238578796387, "learning_rate": 0.0004137656427758817, "loss": 5.7794, "step": 5153 }, { "epoch": 1.7590443686006827, "grad_norm": 2.988149881362915, "learning_rate": 0.0004136518771331058, "loss": 6.5781, "step": 5154 }, { "epoch": 1.75938566552901, "grad_norm": 3.7683768272399902, "learning_rate": 0.00041353811149032996, "loss": 6.1431, "step": 5155 }, { "epoch": 1.759726962457338, "grad_norm": 3.9146881103515625, "learning_rate": 0.00041342434584755406, "loss": 5.6584, "step": 5156 }, { "epoch": 1.7600682593856656, "grad_norm": 2.989680290222168, "learning_rate": 0.00041331058020477817, "loss": 6.8671, "step": 5157 }, { "epoch": 1.7604095563139932, "grad_norm": 3.8746161460876465, "learning_rate": 0.00041319681456200233, "loss": 5.0076, "step": 5158 }, { "epoch": 1.7607508532423208, "grad_norm": 3.014831066131592, "learning_rate": 0.0004130830489192264, "loss": 6.3627, "step": 5159 }, { "epoch": 1.7610921501706485, "grad_norm": 2.946559429168701, "learning_rate": 0.0004129692832764505, "loss": 5.7226, "step": 5160 }, { "epoch": 1.761433447098976, "grad_norm": 2.9957525730133057, "learning_rate": 0.00041285551763367464, "loss": 6.5074, "step": 5161 }, { "epoch": 1.7617747440273037, "grad_norm": 2.896390438079834, "learning_rate": 0.00041274175199089875, "loss": 6.9841, "step": 5162 }, { "epoch": 1.7621160409556313, "grad_norm": 3.291266918182373, "learning_rate": 0.00041262798634812285, "loss": 6.0102, "step": 5163 }, { "epoch": 1.762457337883959, "grad_norm": 3.1446962356567383, "learning_rate": 0.000412514220705347, "loss": 5.8353, "step": 5164 }, { "epoch": 1.7627986348122868, "grad_norm": 2.8148372173309326, "learning_rate": 0.0004124004550625711, "loss": 6.1815, "step": 5165 }, { "epoch": 1.7631399317406142, "grad_norm": 3.0298731327056885, "learning_rate": 0.0004122866894197952, "loss": 6.2174, "step": 5166 }, { "epoch": 1.763481228668942, "grad_norm": 3.0104305744171143, "learning_rate": 0.0004121729237770194, "loss": 6.3785, "step": 5167 }, { "epoch": 1.7638225255972695, "grad_norm": 2.9257543087005615, "learning_rate": 0.0004120591581342435, "loss": 6.0503, "step": 5168 }, { "epoch": 1.7641638225255973, "grad_norm": 3.0766642093658447, "learning_rate": 0.00041194539249146754, "loss": 5.7167, "step": 5169 }, { "epoch": 1.764505119453925, "grad_norm": 2.8836543560028076, "learning_rate": 0.0004118316268486917, "loss": 6.0921, "step": 5170 }, { "epoch": 1.7648464163822526, "grad_norm": 3.0110116004943848, "learning_rate": 0.0004117178612059158, "loss": 6.3009, "step": 5171 }, { "epoch": 1.7651877133105802, "grad_norm": 2.9918863773345947, "learning_rate": 0.00041160409556313996, "loss": 6.0492, "step": 5172 }, { "epoch": 1.7655290102389078, "grad_norm": 4.458995342254639, "learning_rate": 0.00041149032992036406, "loss": 5.4651, "step": 5173 }, { "epoch": 1.7658703071672355, "grad_norm": 3.1331934928894043, "learning_rate": 0.00041137656427758817, "loss": 5.8811, "step": 5174 }, { "epoch": 1.766211604095563, "grad_norm": 2.99288010597229, "learning_rate": 0.00041126279863481233, "loss": 6.1899, "step": 5175 }, { "epoch": 1.7665529010238907, "grad_norm": 6.400101661682129, "learning_rate": 0.00041114903299203643, "loss": 4.2093, "step": 5176 }, { "epoch": 1.7668941979522184, "grad_norm": 3.309239387512207, "learning_rate": 0.00041103526734926054, "loss": 5.9248, "step": 5177 }, { "epoch": 1.7672354948805462, "grad_norm": 2.9934983253479004, "learning_rate": 0.00041092150170648464, "loss": 6.4463, "step": 5178 }, { "epoch": 1.7675767918088736, "grad_norm": 3.0152664184570312, "learning_rate": 0.00041080773606370875, "loss": 6.4454, "step": 5179 }, { "epoch": 1.7679180887372015, "grad_norm": 3.141155958175659, "learning_rate": 0.00041069397042093285, "loss": 5.738, "step": 5180 }, { "epoch": 1.7682593856655289, "grad_norm": 3.011068344116211, "learning_rate": 0.000410580204778157, "loss": 6.8537, "step": 5181 }, { "epoch": 1.7686006825938567, "grad_norm": 7.368420600891113, "learning_rate": 0.0004104664391353811, "loss": 5.6683, "step": 5182 }, { "epoch": 1.7689419795221843, "grad_norm": 3.043335199356079, "learning_rate": 0.0004103526734926052, "loss": 6.6035, "step": 5183 }, { "epoch": 1.769283276450512, "grad_norm": 3.1109790802001953, "learning_rate": 0.0004102389078498294, "loss": 6.0835, "step": 5184 }, { "epoch": 1.7696245733788396, "grad_norm": 3.0300333499908447, "learning_rate": 0.0004101251422070535, "loss": 6.4015, "step": 5185 }, { "epoch": 1.7699658703071672, "grad_norm": 2.9411182403564453, "learning_rate": 0.0004100113765642776, "loss": 6.0647, "step": 5186 }, { "epoch": 1.7703071672354949, "grad_norm": 3.1921133995056152, "learning_rate": 0.00040989761092150175, "loss": 5.8967, "step": 5187 }, { "epoch": 1.7706484641638225, "grad_norm": 2.9762604236602783, "learning_rate": 0.0004097838452787258, "loss": 6.4729, "step": 5188 }, { "epoch": 1.7709897610921501, "grad_norm": 2.857520341873169, "learning_rate": 0.0004096700796359499, "loss": 6.0116, "step": 5189 }, { "epoch": 1.7713310580204777, "grad_norm": 2.9309916496276855, "learning_rate": 0.00040955631399317407, "loss": 6.5562, "step": 5190 }, { "epoch": 1.7716723549488056, "grad_norm": 3.0163867473602295, "learning_rate": 0.00040944254835039817, "loss": 6.6054, "step": 5191 }, { "epoch": 1.772013651877133, "grad_norm": 2.942237377166748, "learning_rate": 0.00040932878270762233, "loss": 6.6192, "step": 5192 }, { "epoch": 1.7723549488054609, "grad_norm": 2.938898801803589, "learning_rate": 0.00040921501706484643, "loss": 6.3657, "step": 5193 }, { "epoch": 1.7726962457337883, "grad_norm": 3.5897812843322754, "learning_rate": 0.00040910125142207054, "loss": 5.9367, "step": 5194 }, { "epoch": 1.773037542662116, "grad_norm": 2.926736831665039, "learning_rate": 0.0004089874857792947, "loss": 6.6937, "step": 5195 }, { "epoch": 1.7733788395904437, "grad_norm": 3.1813864707946777, "learning_rate": 0.0004088737201365188, "loss": 5.4131, "step": 5196 }, { "epoch": 1.7737201365187714, "grad_norm": 3.0285255908966064, "learning_rate": 0.0004087599544937429, "loss": 6.0017, "step": 5197 }, { "epoch": 1.774061433447099, "grad_norm": 3.036341428756714, "learning_rate": 0.000408646188850967, "loss": 6.6581, "step": 5198 }, { "epoch": 1.7744027303754266, "grad_norm": 3.0285723209381104, "learning_rate": 0.0004085324232081911, "loss": 6.2925, "step": 5199 }, { "epoch": 1.7747440273037542, "grad_norm": 2.9925947189331055, "learning_rate": 0.0004084186575654152, "loss": 6.1965, "step": 5200 }, { "epoch": 1.7750853242320819, "grad_norm": 3.050577163696289, "learning_rate": 0.0004083048919226394, "loss": 6.3542, "step": 5201 }, { "epoch": 1.7754266211604095, "grad_norm": 2.938793897628784, "learning_rate": 0.0004081911262798635, "loss": 6.4952, "step": 5202 }, { "epoch": 1.7757679180887371, "grad_norm": 3.007162570953369, "learning_rate": 0.0004080773606370876, "loss": 5.9792, "step": 5203 }, { "epoch": 1.776109215017065, "grad_norm": 2.8875231742858887, "learning_rate": 0.00040796359499431175, "loss": 6.2729, "step": 5204 }, { "epoch": 1.7764505119453924, "grad_norm": 4.119450092315674, "learning_rate": 0.00040784982935153586, "loss": 4.7705, "step": 5205 }, { "epoch": 1.7767918088737202, "grad_norm": 3.139040231704712, "learning_rate": 0.00040773606370875996, "loss": 5.8157, "step": 5206 }, { "epoch": 1.7771331058020476, "grad_norm": 2.9930758476257324, "learning_rate": 0.00040762229806598407, "loss": 6.2695, "step": 5207 }, { "epoch": 1.7774744027303755, "grad_norm": 2.9738657474517822, "learning_rate": 0.00040750853242320817, "loss": 6.24, "step": 5208 }, { "epoch": 1.7778156996587031, "grad_norm": 4.560755252838135, "learning_rate": 0.0004073947667804323, "loss": 4.1842, "step": 5209 }, { "epoch": 1.7781569965870307, "grad_norm": 3.2815661430358887, "learning_rate": 0.00040728100113765644, "loss": 5.3795, "step": 5210 }, { "epoch": 1.7784982935153584, "grad_norm": 3.145738124847412, "learning_rate": 0.00040716723549488054, "loss": 5.5073, "step": 5211 }, { "epoch": 1.778839590443686, "grad_norm": 3.058608293533325, "learning_rate": 0.0004070534698521047, "loss": 5.9812, "step": 5212 }, { "epoch": 1.7791808873720136, "grad_norm": 2.994455337524414, "learning_rate": 0.0004069397042093288, "loss": 6.39, "step": 5213 }, { "epoch": 1.7795221843003413, "grad_norm": 2.96401309967041, "learning_rate": 0.0004068259385665529, "loss": 6.4591, "step": 5214 }, { "epoch": 1.7798634812286689, "grad_norm": 3.074064016342163, "learning_rate": 0.00040671217292377707, "loss": 5.9995, "step": 5215 }, { "epoch": 1.7802047781569965, "grad_norm": 2.9683525562286377, "learning_rate": 0.0004065984072810012, "loss": 6.4996, "step": 5216 }, { "epoch": 1.7805460750853244, "grad_norm": 2.9950125217437744, "learning_rate": 0.0004064846416382252, "loss": 6.6542, "step": 5217 }, { "epoch": 1.7808873720136518, "grad_norm": 2.9647035598754883, "learning_rate": 0.0004063708759954494, "loss": 5.9651, "step": 5218 }, { "epoch": 1.7812286689419796, "grad_norm": 9.434081077575684, "learning_rate": 0.0004062571103526735, "loss": 5.0819, "step": 5219 }, { "epoch": 1.781569965870307, "grad_norm": 3.1312179565429688, "learning_rate": 0.0004061433447098976, "loss": 6.6445, "step": 5220 }, { "epoch": 1.7819112627986349, "grad_norm": 3.0493083000183105, "learning_rate": 0.00040602957906712175, "loss": 6.2367, "step": 5221 }, { "epoch": 1.7822525597269625, "grad_norm": 3.070904493331909, "learning_rate": 0.00040591581342434586, "loss": 6.6448, "step": 5222 }, { "epoch": 1.7825938566552901, "grad_norm": 3.064748525619507, "learning_rate": 0.00040580204778156996, "loss": 6.255, "step": 5223 }, { "epoch": 1.7829351535836178, "grad_norm": 2.90639328956604, "learning_rate": 0.0004056882821387941, "loss": 6.2975, "step": 5224 }, { "epoch": 1.7832764505119454, "grad_norm": 2.9274797439575195, "learning_rate": 0.0004055745164960182, "loss": 6.3858, "step": 5225 }, { "epoch": 1.783617747440273, "grad_norm": 3.3938796520233154, "learning_rate": 0.00040546075085324233, "loss": 5.6207, "step": 5226 }, { "epoch": 1.7839590443686006, "grad_norm": 2.930269956588745, "learning_rate": 0.00040534698521046644, "loss": 6.4902, "step": 5227 }, { "epoch": 1.7843003412969285, "grad_norm": 2.949573040008545, "learning_rate": 0.00040523321956769054, "loss": 6.3274, "step": 5228 }, { "epoch": 1.784641638225256, "grad_norm": 3.765118360519409, "learning_rate": 0.00040511945392491465, "loss": 5.895, "step": 5229 }, { "epoch": 1.7849829351535837, "grad_norm": 3.8920938968658447, "learning_rate": 0.0004050056882821388, "loss": 5.751, "step": 5230 }, { "epoch": 1.7853242320819112, "grad_norm": 3.290802240371704, "learning_rate": 0.0004048919226393629, "loss": 5.801, "step": 5231 }, { "epoch": 1.785665529010239, "grad_norm": 3.1709654331207275, "learning_rate": 0.00040477815699658707, "loss": 5.6403, "step": 5232 }, { "epoch": 1.7860068259385664, "grad_norm": 3.0955452919006348, "learning_rate": 0.0004046643913538112, "loss": 6.5735, "step": 5233 }, { "epoch": 1.7863481228668943, "grad_norm": 2.9965522289276123, "learning_rate": 0.0004045506257110353, "loss": 6.1418, "step": 5234 }, { "epoch": 1.786689419795222, "grad_norm": 2.980431079864502, "learning_rate": 0.00040443686006825944, "loss": 5.5827, "step": 5235 }, { "epoch": 1.7870307167235495, "grad_norm": 2.8499867916107178, "learning_rate": 0.00040432309442548354, "loss": 5.8907, "step": 5236 }, { "epoch": 1.7873720136518771, "grad_norm": 2.8207998275756836, "learning_rate": 0.0004042093287827076, "loss": 6.0233, "step": 5237 }, { "epoch": 1.7877133105802048, "grad_norm": 3.0672411918640137, "learning_rate": 0.00040409556313993175, "loss": 5.8684, "step": 5238 }, { "epoch": 1.7880546075085324, "grad_norm": 3.0053303241729736, "learning_rate": 0.00040398179749715586, "loss": 6.1898, "step": 5239 }, { "epoch": 1.78839590443686, "grad_norm": 8.463170051574707, "learning_rate": 0.00040386803185437996, "loss": 5.5515, "step": 5240 }, { "epoch": 1.7887372013651879, "grad_norm": 9.868966102600098, "learning_rate": 0.0004037542662116041, "loss": 5.336, "step": 5241 }, { "epoch": 1.7890784982935153, "grad_norm": 3.046468496322632, "learning_rate": 0.00040364050056882823, "loss": 6.4945, "step": 5242 }, { "epoch": 1.7894197952218431, "grad_norm": 3.114753007888794, "learning_rate": 0.00040352673492605233, "loss": 6.645, "step": 5243 }, { "epoch": 1.7897610921501705, "grad_norm": 4.43574857711792, "learning_rate": 0.0004034129692832765, "loss": 6.1538, "step": 5244 }, { "epoch": 1.7901023890784984, "grad_norm": 3.092228889465332, "learning_rate": 0.0004032992036405006, "loss": 6.236, "step": 5245 }, { "epoch": 1.7904436860068258, "grad_norm": 3.4146082401275635, "learning_rate": 0.00040318543799772465, "loss": 6.3327, "step": 5246 }, { "epoch": 1.7907849829351536, "grad_norm": 3.112100124359131, "learning_rate": 0.0004030716723549488, "loss": 6.1249, "step": 5247 }, { "epoch": 1.7911262798634813, "grad_norm": 3.0014235973358154, "learning_rate": 0.0004029579067121729, "loss": 6.4472, "step": 5248 }, { "epoch": 1.791467576791809, "grad_norm": 3.3161520957946777, "learning_rate": 0.000402844141069397, "loss": 5.6153, "step": 5249 }, { "epoch": 1.7918088737201365, "grad_norm": 2.991649866104126, "learning_rate": 0.0004027303754266212, "loss": 6.4251, "step": 5250 }, { "epoch": 1.7921501706484642, "grad_norm": 2.9196534156799316, "learning_rate": 0.0004026166097838453, "loss": 6.4241, "step": 5251 }, { "epoch": 1.7924914675767918, "grad_norm": 4.535854339599609, "learning_rate": 0.00040250284414106944, "loss": 4.7127, "step": 5252 }, { "epoch": 1.7928327645051194, "grad_norm": 2.958326578140259, "learning_rate": 0.00040238907849829354, "loss": 6.3131, "step": 5253 }, { "epoch": 1.7931740614334473, "grad_norm": 3.1165990829467773, "learning_rate": 0.00040227531285551765, "loss": 5.3994, "step": 5254 }, { "epoch": 1.7935153583617747, "grad_norm": 3.234656572341919, "learning_rate": 0.0004021615472127418, "loss": 6.8472, "step": 5255 }, { "epoch": 1.7938566552901025, "grad_norm": 3.0498690605163574, "learning_rate": 0.00040204778156996586, "loss": 6.2153, "step": 5256 }, { "epoch": 1.79419795221843, "grad_norm": 2.997565507888794, "learning_rate": 0.00040193401592718996, "loss": 6.3802, "step": 5257 }, { "epoch": 1.7945392491467578, "grad_norm": 2.9837565422058105, "learning_rate": 0.0004018202502844141, "loss": 5.8155, "step": 5258 }, { "epoch": 1.7948805460750852, "grad_norm": 2.9351367950439453, "learning_rate": 0.00040170648464163823, "loss": 6.6041, "step": 5259 }, { "epoch": 1.795221843003413, "grad_norm": 2.891256332397461, "learning_rate": 0.00040159271899886233, "loss": 5.978, "step": 5260 }, { "epoch": 1.7955631399317407, "grad_norm": 2.9434101581573486, "learning_rate": 0.0004014789533560865, "loss": 6.2294, "step": 5261 }, { "epoch": 1.7959044368600683, "grad_norm": 3.0695149898529053, "learning_rate": 0.0004013651877133106, "loss": 6.538, "step": 5262 }, { "epoch": 1.796245733788396, "grad_norm": 3.029841661453247, "learning_rate": 0.0004012514220705347, "loss": 5.6806, "step": 5263 }, { "epoch": 1.7965870307167235, "grad_norm": 2.961660623550415, "learning_rate": 0.00040113765642775886, "loss": 6.4616, "step": 5264 }, { "epoch": 1.7969283276450512, "grad_norm": 4.678473472595215, "learning_rate": 0.00040102389078498297, "loss": 5.3174, "step": 5265 }, { "epoch": 1.7972696245733788, "grad_norm": 3.08498215675354, "learning_rate": 0.000400910125142207, "loss": 5.7922, "step": 5266 }, { "epoch": 1.7976109215017066, "grad_norm": 2.980827808380127, "learning_rate": 0.0004007963594994312, "loss": 6.1678, "step": 5267 }, { "epoch": 1.797952218430034, "grad_norm": 2.927297592163086, "learning_rate": 0.0004006825938566553, "loss": 6.192, "step": 5268 }, { "epoch": 1.798293515358362, "grad_norm": 3.01383638381958, "learning_rate": 0.0004005688282138794, "loss": 6.2782, "step": 5269 }, { "epoch": 1.7986348122866893, "grad_norm": 3.114114999771118, "learning_rate": 0.00040045506257110355, "loss": 5.8744, "step": 5270 }, { "epoch": 1.7989761092150172, "grad_norm": 3.1586034297943115, "learning_rate": 0.00040034129692832765, "loss": 6.3056, "step": 5271 }, { "epoch": 1.7993174061433446, "grad_norm": 2.895045518875122, "learning_rate": 0.0004002275312855518, "loss": 5.8402, "step": 5272 }, { "epoch": 1.7996587030716724, "grad_norm": 3.156750202178955, "learning_rate": 0.0004001137656427759, "loss": 5.9734, "step": 5273 }, { "epoch": 1.8, "grad_norm": 3.116347312927246, "learning_rate": 0.0004, "loss": 5.6309, "step": 5274 }, { "epoch": 1.8003412969283277, "grad_norm": 3.3794455528259277, "learning_rate": 0.0003998862343572241, "loss": 6.1649, "step": 5275 }, { "epoch": 1.8006825938566553, "grad_norm": 3.00872540473938, "learning_rate": 0.00039977246871444823, "loss": 6.449, "step": 5276 }, { "epoch": 1.801023890784983, "grad_norm": 3.6723685264587402, "learning_rate": 0.00039965870307167233, "loss": 6.0114, "step": 5277 }, { "epoch": 1.8013651877133106, "grad_norm": 2.9524502754211426, "learning_rate": 0.0003995449374288965, "loss": 6.1426, "step": 5278 }, { "epoch": 1.8017064846416382, "grad_norm": 2.987380266189575, "learning_rate": 0.0003994311717861206, "loss": 5.964, "step": 5279 }, { "epoch": 1.802047781569966, "grad_norm": 3.3028364181518555, "learning_rate": 0.0003993174061433447, "loss": 5.3033, "step": 5280 }, { "epoch": 1.8023890784982934, "grad_norm": 2.9730100631713867, "learning_rate": 0.00039920364050056886, "loss": 5.9817, "step": 5281 }, { "epoch": 1.8027303754266213, "grad_norm": 3.1205883026123047, "learning_rate": 0.00039908987485779297, "loss": 5.8908, "step": 5282 }, { "epoch": 1.8030716723549487, "grad_norm": 2.9819066524505615, "learning_rate": 0.00039897610921501707, "loss": 5.9874, "step": 5283 }, { "epoch": 1.8034129692832765, "grad_norm": 3.0491702556610107, "learning_rate": 0.00039886234357224123, "loss": 6.1682, "step": 5284 }, { "epoch": 1.803754266211604, "grad_norm": 3.0884945392608643, "learning_rate": 0.0003987485779294653, "loss": 6.6682, "step": 5285 }, { "epoch": 1.8040955631399318, "grad_norm": 3.4136314392089844, "learning_rate": 0.0003986348122866894, "loss": 5.6285, "step": 5286 }, { "epoch": 1.8044368600682594, "grad_norm": 3.038418769836426, "learning_rate": 0.00039852104664391355, "loss": 6.8089, "step": 5287 }, { "epoch": 1.804778156996587, "grad_norm": 2.9346542358398438, "learning_rate": 0.00039840728100113765, "loss": 5.9561, "step": 5288 }, { "epoch": 1.8051194539249147, "grad_norm": 4.106077671051025, "learning_rate": 0.00039829351535836176, "loss": 5.2572, "step": 5289 }, { "epoch": 1.8054607508532423, "grad_norm": 3.0919151306152344, "learning_rate": 0.0003981797497155859, "loss": 6.5056, "step": 5290 }, { "epoch": 1.80580204778157, "grad_norm": 3.0660130977630615, "learning_rate": 0.00039806598407281, "loss": 6.0935, "step": 5291 }, { "epoch": 1.8061433447098976, "grad_norm": 3.0275156497955322, "learning_rate": 0.0003979522184300341, "loss": 4.5541, "step": 5292 }, { "epoch": 1.8064846416382254, "grad_norm": 3.370018243789673, "learning_rate": 0.0003978384527872583, "loss": 5.0421, "step": 5293 }, { "epoch": 1.8068259385665528, "grad_norm": 3.184339761734009, "learning_rate": 0.0003977246871444824, "loss": 5.1131, "step": 5294 }, { "epoch": 1.8071672354948807, "grad_norm": 3.00337553024292, "learning_rate": 0.0003976109215017065, "loss": 6.6479, "step": 5295 }, { "epoch": 1.807508532423208, "grad_norm": 2.92510724067688, "learning_rate": 0.0003974971558589306, "loss": 6.097, "step": 5296 }, { "epoch": 1.807849829351536, "grad_norm": 3.030064105987549, "learning_rate": 0.0003973833902161547, "loss": 6.3918, "step": 5297 }, { "epoch": 1.8081911262798633, "grad_norm": 3.019908905029297, "learning_rate": 0.00039726962457337886, "loss": 5.8597, "step": 5298 }, { "epoch": 1.8085324232081912, "grad_norm": 2.938002109527588, "learning_rate": 0.00039715585893060297, "loss": 6.1059, "step": 5299 }, { "epoch": 1.8088737201365188, "grad_norm": 3.3193299770355225, "learning_rate": 0.0003970420932878271, "loss": 5.3933, "step": 5300 }, { "epoch": 1.8092150170648464, "grad_norm": 3.112321376800537, "learning_rate": 0.00039692832764505123, "loss": 6.1252, "step": 5301 }, { "epoch": 1.809556313993174, "grad_norm": 2.9885871410369873, "learning_rate": 0.00039681456200227534, "loss": 6.2435, "step": 5302 }, { "epoch": 1.8098976109215017, "grad_norm": 3.8620083332061768, "learning_rate": 0.00039670079635949944, "loss": 5.2043, "step": 5303 }, { "epoch": 1.8102389078498293, "grad_norm": 3.0883119106292725, "learning_rate": 0.0003965870307167236, "loss": 6.0623, "step": 5304 }, { "epoch": 1.810580204778157, "grad_norm": 2.965674877166748, "learning_rate": 0.00039647326507394765, "loss": 6.138, "step": 5305 }, { "epoch": 1.8109215017064848, "grad_norm": 2.917496681213379, "learning_rate": 0.00039635949943117176, "loss": 6.5031, "step": 5306 }, { "epoch": 1.8112627986348122, "grad_norm": 2.951918363571167, "learning_rate": 0.0003962457337883959, "loss": 6.0293, "step": 5307 }, { "epoch": 1.81160409556314, "grad_norm": 4.126885890960693, "learning_rate": 0.00039613196814562, "loss": 5.7647, "step": 5308 }, { "epoch": 1.8119453924914675, "grad_norm": 2.963454484939575, "learning_rate": 0.0003960182025028441, "loss": 6.4464, "step": 5309 }, { "epoch": 1.8122866894197953, "grad_norm": 3.006791591644287, "learning_rate": 0.0003959044368600683, "loss": 6.4746, "step": 5310 }, { "epoch": 1.8126279863481227, "grad_norm": 2.443580150604248, "learning_rate": 0.0003957906712172924, "loss": 3.2005, "step": 5311 }, { "epoch": 1.8129692832764506, "grad_norm": 3.0745327472686768, "learning_rate": 0.0003956769055745165, "loss": 6.3638, "step": 5312 }, { "epoch": 1.8133105802047782, "grad_norm": 3.1432762145996094, "learning_rate": 0.00039556313993174065, "loss": 5.9445, "step": 5313 }, { "epoch": 1.8136518771331058, "grad_norm": 3.154367685317993, "learning_rate": 0.0003954493742889647, "loss": 6.4095, "step": 5314 }, { "epoch": 1.8139931740614335, "grad_norm": 3.050010919570923, "learning_rate": 0.00039533560864618886, "loss": 6.4283, "step": 5315 }, { "epoch": 1.814334470989761, "grad_norm": 2.967775344848633, "learning_rate": 0.00039522184300341297, "loss": 6.1403, "step": 5316 }, { "epoch": 1.8146757679180887, "grad_norm": 3.014551877975464, "learning_rate": 0.0003951080773606371, "loss": 6.4165, "step": 5317 }, { "epoch": 1.8150170648464163, "grad_norm": 3.006186008453369, "learning_rate": 0.00039499431171786123, "loss": 7.0178, "step": 5318 }, { "epoch": 1.8153583617747442, "grad_norm": 2.8938446044921875, "learning_rate": 0.00039488054607508534, "loss": 6.7276, "step": 5319 }, { "epoch": 1.8156996587030716, "grad_norm": 2.9930148124694824, "learning_rate": 0.00039476678043230944, "loss": 6.5492, "step": 5320 }, { "epoch": 1.8160409556313994, "grad_norm": 3.3085694313049316, "learning_rate": 0.0003946530147895336, "loss": 6.1508, "step": 5321 }, { "epoch": 1.8163822525597269, "grad_norm": 3.0276403427124023, "learning_rate": 0.0003945392491467577, "loss": 5.6613, "step": 5322 }, { "epoch": 1.8167235494880547, "grad_norm": 2.933673620223999, "learning_rate": 0.0003944254835039818, "loss": 6.7415, "step": 5323 }, { "epoch": 1.817064846416382, "grad_norm": 2.8729095458984375, "learning_rate": 0.0003943117178612059, "loss": 5.8731, "step": 5324 }, { "epoch": 1.81740614334471, "grad_norm": 2.9451277256011963, "learning_rate": 0.00039419795221843, "loss": 5.8406, "step": 5325 }, { "epoch": 1.8177474402730376, "grad_norm": 2.9216501712799072, "learning_rate": 0.0003940841865756541, "loss": 5.6633, "step": 5326 }, { "epoch": 1.8180887372013652, "grad_norm": 3.156517744064331, "learning_rate": 0.0003939704209328783, "loss": 6.3426, "step": 5327 }, { "epoch": 1.8184300341296928, "grad_norm": 3.5125882625579834, "learning_rate": 0.0003938566552901024, "loss": 5.8847, "step": 5328 }, { "epoch": 1.8187713310580205, "grad_norm": 3.0242371559143066, "learning_rate": 0.0003937428896473265, "loss": 6.1378, "step": 5329 }, { "epoch": 1.819112627986348, "grad_norm": 3.081592559814453, "learning_rate": 0.00039362912400455065, "loss": 6.0888, "step": 5330 }, { "epoch": 1.8194539249146757, "grad_norm": 2.9294071197509766, "learning_rate": 0.00039351535836177476, "loss": 6.4891, "step": 5331 }, { "epoch": 1.8197952218430036, "grad_norm": 2.9522759914398193, "learning_rate": 0.00039340159271899886, "loss": 6.6007, "step": 5332 }, { "epoch": 1.820136518771331, "grad_norm": 2.9654457569122314, "learning_rate": 0.000393287827076223, "loss": 6.6926, "step": 5333 }, { "epoch": 1.8204778156996588, "grad_norm": 3.4901723861694336, "learning_rate": 0.0003931740614334471, "loss": 4.3765, "step": 5334 }, { "epoch": 1.8208191126279862, "grad_norm": 4.748300552368164, "learning_rate": 0.00039306029579067123, "loss": 5.744, "step": 5335 }, { "epoch": 1.821160409556314, "grad_norm": 2.887054204940796, "learning_rate": 0.00039294653014789534, "loss": 6.1389, "step": 5336 }, { "epoch": 1.8215017064846415, "grad_norm": 2.9811062812805176, "learning_rate": 0.00039283276450511944, "loss": 6.3022, "step": 5337 }, { "epoch": 1.8218430034129693, "grad_norm": 3.0545904636383057, "learning_rate": 0.0003927189988623436, "loss": 6.1094, "step": 5338 }, { "epoch": 1.822184300341297, "grad_norm": 3.0074126720428467, "learning_rate": 0.0003926052332195677, "loss": 6.274, "step": 5339 }, { "epoch": 1.8225255972696246, "grad_norm": 2.941384792327881, "learning_rate": 0.0003924914675767918, "loss": 6.2696, "step": 5340 }, { "epoch": 1.8228668941979522, "grad_norm": 2.8732473850250244, "learning_rate": 0.00039237770193401597, "loss": 6.7092, "step": 5341 }, { "epoch": 1.8232081911262799, "grad_norm": 2.9521396160125732, "learning_rate": 0.0003922639362912401, "loss": 6.506, "step": 5342 }, { "epoch": 1.8235494880546075, "grad_norm": 2.961531162261963, "learning_rate": 0.0003921501706484642, "loss": 6.3482, "step": 5343 }, { "epoch": 1.823890784982935, "grad_norm": 3.0271778106689453, "learning_rate": 0.0003920364050056883, "loss": 5.76, "step": 5344 }, { "epoch": 1.824232081911263, "grad_norm": 2.9559972286224365, "learning_rate": 0.0003919226393629124, "loss": 6.5723, "step": 5345 }, { "epoch": 1.8245733788395904, "grad_norm": 2.9276723861694336, "learning_rate": 0.0003918088737201365, "loss": 6.3815, "step": 5346 }, { "epoch": 1.8249146757679182, "grad_norm": 2.9702205657958984, "learning_rate": 0.00039169510807736066, "loss": 6.6194, "step": 5347 }, { "epoch": 1.8252559726962456, "grad_norm": 3.0158731937408447, "learning_rate": 0.00039158134243458476, "loss": 6.287, "step": 5348 }, { "epoch": 1.8255972696245735, "grad_norm": 3.1500322818756104, "learning_rate": 0.00039146757679180887, "loss": 6.433, "step": 5349 }, { "epoch": 1.8259385665529009, "grad_norm": 3.0445759296417236, "learning_rate": 0.000391353811149033, "loss": 6.2129, "step": 5350 }, { "epoch": 1.8262798634812287, "grad_norm": 3.0558390617370605, "learning_rate": 0.00039124004550625713, "loss": 6.182, "step": 5351 }, { "epoch": 1.8266211604095564, "grad_norm": 3.010730504989624, "learning_rate": 0.00039112627986348123, "loss": 5.7881, "step": 5352 }, { "epoch": 1.826962457337884, "grad_norm": 2.9294490814208984, "learning_rate": 0.00039101251422070534, "loss": 6.0147, "step": 5353 }, { "epoch": 1.8273037542662116, "grad_norm": 2.883665084838867, "learning_rate": 0.00039089874857792944, "loss": 6.3523, "step": 5354 }, { "epoch": 1.8276450511945392, "grad_norm": 3.021610975265503, "learning_rate": 0.0003907849829351536, "loss": 6.6719, "step": 5355 }, { "epoch": 1.8279863481228669, "grad_norm": 3.135248899459839, "learning_rate": 0.0003906712172923777, "loss": 6.1267, "step": 5356 }, { "epoch": 1.8283276450511945, "grad_norm": 3.0037925243377686, "learning_rate": 0.0003905574516496018, "loss": 5.9688, "step": 5357 }, { "epoch": 1.8286689419795223, "grad_norm": 3.0769035816192627, "learning_rate": 0.00039044368600682597, "loss": 5.994, "step": 5358 }, { "epoch": 1.8290102389078498, "grad_norm": 2.995614767074585, "learning_rate": 0.0003903299203640501, "loss": 6.2174, "step": 5359 }, { "epoch": 1.8293515358361776, "grad_norm": 3.0436437129974365, "learning_rate": 0.0003902161547212742, "loss": 5.6113, "step": 5360 }, { "epoch": 1.829692832764505, "grad_norm": 2.8753719329833984, "learning_rate": 0.00039010238907849834, "loss": 5.9463, "step": 5361 }, { "epoch": 1.8300341296928329, "grad_norm": 2.950981378555298, "learning_rate": 0.00038998862343572245, "loss": 5.9652, "step": 5362 }, { "epoch": 1.8303754266211603, "grad_norm": 3.0115854740142822, "learning_rate": 0.0003898748577929465, "loss": 6.1722, "step": 5363 }, { "epoch": 1.8307167235494881, "grad_norm": 3.0039737224578857, "learning_rate": 0.00038976109215017066, "loss": 6.8049, "step": 5364 }, { "epoch": 1.8310580204778157, "grad_norm": 2.8962740898132324, "learning_rate": 0.00038964732650739476, "loss": 6.1094, "step": 5365 }, { "epoch": 1.8313993174061434, "grad_norm": 3.1070163249969482, "learning_rate": 0.00038953356086461887, "loss": 5.8425, "step": 5366 }, { "epoch": 1.831740614334471, "grad_norm": 4.08699893951416, "learning_rate": 0.000389419795221843, "loss": 4.5633, "step": 5367 }, { "epoch": 1.8320819112627986, "grad_norm": 3.1629393100738525, "learning_rate": 0.00038930602957906713, "loss": 6.2469, "step": 5368 }, { "epoch": 1.8324232081911263, "grad_norm": 3.210838556289673, "learning_rate": 0.00038919226393629124, "loss": 6.6676, "step": 5369 }, { "epoch": 1.8327645051194539, "grad_norm": 3.3137311935424805, "learning_rate": 0.0003890784982935154, "loss": 5.3108, "step": 5370 }, { "epoch": 1.8331058020477817, "grad_norm": 2.9676313400268555, "learning_rate": 0.0003889647326507395, "loss": 6.4932, "step": 5371 }, { "epoch": 1.8334470989761091, "grad_norm": 2.9483461380004883, "learning_rate": 0.0003888509670079636, "loss": 6.2493, "step": 5372 }, { "epoch": 1.833788395904437, "grad_norm": 3.206331491470337, "learning_rate": 0.0003887372013651877, "loss": 5.3646, "step": 5373 }, { "epoch": 1.8341296928327644, "grad_norm": 3.0024166107177734, "learning_rate": 0.0003886234357224118, "loss": 6.3351, "step": 5374 }, { "epoch": 1.8344709897610922, "grad_norm": 3.38862943649292, "learning_rate": 0.0003885096700796359, "loss": 6.2358, "step": 5375 }, { "epoch": 1.8348122866894196, "grad_norm": 2.8507230281829834, "learning_rate": 0.0003883959044368601, "loss": 5.6975, "step": 5376 }, { "epoch": 1.8351535836177475, "grad_norm": 2.947876214981079, "learning_rate": 0.0003882821387940842, "loss": 6.2397, "step": 5377 }, { "epoch": 1.8354948805460751, "grad_norm": 3.339482307434082, "learning_rate": 0.00038816837315130834, "loss": 5.5517, "step": 5378 }, { "epoch": 1.8358361774744028, "grad_norm": 2.9442296028137207, "learning_rate": 0.00038805460750853245, "loss": 6.5012, "step": 5379 }, { "epoch": 1.8361774744027304, "grad_norm": 3.01493239402771, "learning_rate": 0.00038794084186575655, "loss": 6.1444, "step": 5380 }, { "epoch": 1.836518771331058, "grad_norm": 3.0220446586608887, "learning_rate": 0.0003878270762229807, "loss": 6.8928, "step": 5381 }, { "epoch": 1.8368600682593856, "grad_norm": 2.9548652172088623, "learning_rate": 0.00038771331058020476, "loss": 6.0756, "step": 5382 }, { "epoch": 1.8372013651877133, "grad_norm": 3.209747314453125, "learning_rate": 0.00038759954493742887, "loss": 5.5828, "step": 5383 }, { "epoch": 1.8375426621160411, "grad_norm": 3.493947744369507, "learning_rate": 0.000387485779294653, "loss": 6.0699, "step": 5384 }, { "epoch": 1.8378839590443685, "grad_norm": 3.8855106830596924, "learning_rate": 0.00038737201365187713, "loss": 4.7189, "step": 5385 }, { "epoch": 1.8382252559726964, "grad_norm": 3.2830464839935303, "learning_rate": 0.00038725824800910124, "loss": 5.7651, "step": 5386 }, { "epoch": 1.8385665529010238, "grad_norm": 3.040435314178467, "learning_rate": 0.0003871444823663254, "loss": 5.9345, "step": 5387 }, { "epoch": 1.8389078498293516, "grad_norm": 3.311544179916382, "learning_rate": 0.0003870307167235495, "loss": 5.9783, "step": 5388 }, { "epoch": 1.839249146757679, "grad_norm": 2.9888603687286377, "learning_rate": 0.0003869169510807736, "loss": 5.666, "step": 5389 }, { "epoch": 1.8395904436860069, "grad_norm": 3.0224196910858154, "learning_rate": 0.00038680318543799776, "loss": 6.6377, "step": 5390 }, { "epoch": 1.8399317406143345, "grad_norm": 3.0179591178894043, "learning_rate": 0.00038668941979522187, "loss": 6.0367, "step": 5391 }, { "epoch": 1.8402730375426621, "grad_norm": 2.922994375228882, "learning_rate": 0.0003865756541524459, "loss": 6.0782, "step": 5392 }, { "epoch": 1.8406143344709898, "grad_norm": 2.8612077236175537, "learning_rate": 0.0003864618885096701, "loss": 6.0352, "step": 5393 }, { "epoch": 1.8409556313993174, "grad_norm": 2.957252025604248, "learning_rate": 0.0003863481228668942, "loss": 6.5599, "step": 5394 }, { "epoch": 1.841296928327645, "grad_norm": 3.019355535507202, "learning_rate": 0.0003862343572241183, "loss": 5.697, "step": 5395 }, { "epoch": 1.8416382252559726, "grad_norm": 2.8055245876312256, "learning_rate": 0.00038612059158134245, "loss": 6.1868, "step": 5396 }, { "epoch": 1.8419795221843005, "grad_norm": 3.0640079975128174, "learning_rate": 0.00038600682593856655, "loss": 6.252, "step": 5397 }, { "epoch": 1.842320819112628, "grad_norm": 2.855372428894043, "learning_rate": 0.0003858930602957907, "loss": 6.6329, "step": 5398 }, { "epoch": 1.8426621160409558, "grad_norm": 2.9066994190216064, "learning_rate": 0.0003857792946530148, "loss": 6.5626, "step": 5399 }, { "epoch": 1.8430034129692832, "grad_norm": 2.871976852416992, "learning_rate": 0.0003856655290102389, "loss": 6.0375, "step": 5400 }, { "epoch": 1.843344709897611, "grad_norm": 3.041769504547119, "learning_rate": 0.0003855517633674631, "loss": 6.551, "step": 5401 }, { "epoch": 1.8436860068259384, "grad_norm": 3.0764667987823486, "learning_rate": 0.00038543799772468713, "loss": 6.187, "step": 5402 }, { "epoch": 1.8440273037542663, "grad_norm": 3.1484851837158203, "learning_rate": 0.00038532423208191124, "loss": 6.0588, "step": 5403 }, { "epoch": 1.844368600682594, "grad_norm": 3.08003306388855, "learning_rate": 0.0003852104664391354, "loss": 6.6638, "step": 5404 }, { "epoch": 1.8447098976109215, "grad_norm": 2.9924423694610596, "learning_rate": 0.0003850967007963595, "loss": 6.4312, "step": 5405 }, { "epoch": 1.8450511945392492, "grad_norm": 3.137577533721924, "learning_rate": 0.0003849829351535836, "loss": 6.4421, "step": 5406 }, { "epoch": 1.8453924914675768, "grad_norm": 2.974426507949829, "learning_rate": 0.00038486916951080777, "loss": 6.4856, "step": 5407 }, { "epoch": 1.8457337883959044, "grad_norm": 3.020400047302246, "learning_rate": 0.00038475540386803187, "loss": 6.534, "step": 5408 }, { "epoch": 1.846075085324232, "grad_norm": 2.938917636871338, "learning_rate": 0.000384641638225256, "loss": 6.6811, "step": 5409 }, { "epoch": 1.8464163822525599, "grad_norm": 3.0424704551696777, "learning_rate": 0.00038452787258248013, "loss": 6.4537, "step": 5410 }, { "epoch": 1.8467576791808873, "grad_norm": 3.186312198638916, "learning_rate": 0.00038441410693970424, "loss": 6.5699, "step": 5411 }, { "epoch": 1.8470989761092151, "grad_norm": 2.9400906562805176, "learning_rate": 0.0003843003412969283, "loss": 6.4694, "step": 5412 }, { "epoch": 1.8474402730375425, "grad_norm": 3.567128896713257, "learning_rate": 0.00038418657565415245, "loss": 4.6422, "step": 5413 }, { "epoch": 1.8477815699658704, "grad_norm": 5.326208591461182, "learning_rate": 0.00038407281001137655, "loss": 5.3053, "step": 5414 }, { "epoch": 1.8481228668941978, "grad_norm": 3.0531771183013916, "learning_rate": 0.00038395904436860066, "loss": 5.9597, "step": 5415 }, { "epoch": 1.8484641638225257, "grad_norm": 3.1060829162597656, "learning_rate": 0.0003838452787258248, "loss": 6.5521, "step": 5416 }, { "epoch": 1.8488054607508533, "grad_norm": 3.0353734493255615, "learning_rate": 0.0003837315130830489, "loss": 6.4911, "step": 5417 }, { "epoch": 1.849146757679181, "grad_norm": 2.9575839042663574, "learning_rate": 0.0003836177474402731, "loss": 6.3016, "step": 5418 }, { "epoch": 1.8494880546075085, "grad_norm": 2.9795007705688477, "learning_rate": 0.0003835039817974972, "loss": 6.3147, "step": 5419 }, { "epoch": 1.8498293515358362, "grad_norm": 2.8920490741729736, "learning_rate": 0.0003833902161547213, "loss": 6.7194, "step": 5420 }, { "epoch": 1.8501706484641638, "grad_norm": 3.0831782817840576, "learning_rate": 0.0003832764505119454, "loss": 6.2686, "step": 5421 }, { "epoch": 1.8505119453924914, "grad_norm": 2.8378100395202637, "learning_rate": 0.0003831626848691695, "loss": 6.5928, "step": 5422 }, { "epoch": 1.8508532423208193, "grad_norm": 2.8427324295043945, "learning_rate": 0.0003830489192263936, "loss": 6.1053, "step": 5423 }, { "epoch": 1.8511945392491467, "grad_norm": 3.0172784328460693, "learning_rate": 0.00038293515358361777, "loss": 6.1388, "step": 5424 }, { "epoch": 1.8515358361774745, "grad_norm": 2.933791160583496, "learning_rate": 0.00038282138794084187, "loss": 6.4189, "step": 5425 }, { "epoch": 1.851877133105802, "grad_norm": 2.942861557006836, "learning_rate": 0.000382707622298066, "loss": 6.5474, "step": 5426 }, { "epoch": 1.8522184300341298, "grad_norm": 6.984040260314941, "learning_rate": 0.00038259385665529014, "loss": 6.0704, "step": 5427 }, { "epoch": 1.8525597269624572, "grad_norm": 3.091132640838623, "learning_rate": 0.00038248009101251424, "loss": 6.6446, "step": 5428 }, { "epoch": 1.852901023890785, "grad_norm": 3.019805908203125, "learning_rate": 0.00038236632536973835, "loss": 5.9389, "step": 5429 }, { "epoch": 1.8532423208191127, "grad_norm": 3.0177457332611084, "learning_rate": 0.0003822525597269625, "loss": 6.3112, "step": 5430 }, { "epoch": 1.8535836177474403, "grad_norm": 2.8653550148010254, "learning_rate": 0.00038213879408418656, "loss": 6.0127, "step": 5431 }, { "epoch": 1.853924914675768, "grad_norm": 2.9369118213653564, "learning_rate": 0.00038202502844141066, "loss": 5.6815, "step": 5432 }, { "epoch": 1.8542662116040955, "grad_norm": 2.954627513885498, "learning_rate": 0.0003819112627986348, "loss": 6.5244, "step": 5433 }, { "epoch": 1.8546075085324232, "grad_norm": 2.8261237144470215, "learning_rate": 0.0003817974971558589, "loss": 6.4313, "step": 5434 }, { "epoch": 1.8549488054607508, "grad_norm": 3.0321621894836426, "learning_rate": 0.00038168373151308303, "loss": 6.5262, "step": 5435 }, { "epoch": 1.8552901023890787, "grad_norm": 3.0571656227111816, "learning_rate": 0.0003815699658703072, "loss": 6.4915, "step": 5436 }, { "epoch": 1.855631399317406, "grad_norm": 2.8418610095977783, "learning_rate": 0.0003814562002275313, "loss": 6.0628, "step": 5437 }, { "epoch": 1.855972696245734, "grad_norm": 3.0574285984039307, "learning_rate": 0.00038134243458475545, "loss": 6.2824, "step": 5438 }, { "epoch": 1.8563139931740613, "grad_norm": 2.951812267303467, "learning_rate": 0.00038122866894197956, "loss": 6.4227, "step": 5439 }, { "epoch": 1.8566552901023892, "grad_norm": 2.9646008014678955, "learning_rate": 0.00038111490329920366, "loss": 6.4111, "step": 5440 }, { "epoch": 1.8569965870307166, "grad_norm": 2.9364631175994873, "learning_rate": 0.00038100113765642777, "loss": 6.3866, "step": 5441 }, { "epoch": 1.8573378839590444, "grad_norm": 4.254305839538574, "learning_rate": 0.00038088737201365187, "loss": 5.721, "step": 5442 }, { "epoch": 1.857679180887372, "grad_norm": 2.9221060276031494, "learning_rate": 0.000380773606370876, "loss": 5.6808, "step": 5443 }, { "epoch": 1.8580204778156997, "grad_norm": 2.9992268085479736, "learning_rate": 0.00038065984072810014, "loss": 5.8649, "step": 5444 }, { "epoch": 1.8583617747440273, "grad_norm": 2.9220197200775146, "learning_rate": 0.00038054607508532424, "loss": 5.8822, "step": 5445 }, { "epoch": 1.858703071672355, "grad_norm": 2.9059829711914062, "learning_rate": 0.00038043230944254835, "loss": 6.4213, "step": 5446 }, { "epoch": 1.8590443686006826, "grad_norm": 3.104856252670288, "learning_rate": 0.0003803185437997725, "loss": 5.7524, "step": 5447 }, { "epoch": 1.8593856655290102, "grad_norm": 2.9057981967926025, "learning_rate": 0.0003802047781569966, "loss": 6.5828, "step": 5448 }, { "epoch": 1.859726962457338, "grad_norm": 2.952005624771118, "learning_rate": 0.0003800910125142207, "loss": 6.5241, "step": 5449 }, { "epoch": 1.8600682593856654, "grad_norm": 2.8778185844421387, "learning_rate": 0.0003799772468714448, "loss": 6.7302, "step": 5450 }, { "epoch": 1.8604095563139933, "grad_norm": 2.990785837173462, "learning_rate": 0.0003798634812286689, "loss": 5.8828, "step": 5451 }, { "epoch": 1.8607508532423207, "grad_norm": 2.9240405559539795, "learning_rate": 0.00037974971558589303, "loss": 6.4997, "step": 5452 }, { "epoch": 1.8610921501706486, "grad_norm": 3.0843167304992676, "learning_rate": 0.0003796359499431172, "loss": 5.7164, "step": 5453 }, { "epoch": 1.861433447098976, "grad_norm": 2.9468958377838135, "learning_rate": 0.0003795221843003413, "loss": 6.3823, "step": 5454 }, { "epoch": 1.8617747440273038, "grad_norm": 2.890228271484375, "learning_rate": 0.0003794084186575654, "loss": 6.1479, "step": 5455 }, { "epoch": 1.8621160409556314, "grad_norm": 7.07792329788208, "learning_rate": 0.00037929465301478956, "loss": 4.19, "step": 5456 }, { "epoch": 1.862457337883959, "grad_norm": 2.9772236347198486, "learning_rate": 0.00037918088737201366, "loss": 6.5797, "step": 5457 }, { "epoch": 1.8627986348122867, "grad_norm": 3.0900766849517822, "learning_rate": 0.0003790671217292378, "loss": 5.6404, "step": 5458 }, { "epoch": 1.8631399317406143, "grad_norm": 3.135882616043091, "learning_rate": 0.00037895335608646193, "loss": 6.2669, "step": 5459 }, { "epoch": 1.863481228668942, "grad_norm": 3.0472939014434814, "learning_rate": 0.000378839590443686, "loss": 5.6298, "step": 5460 }, { "epoch": 1.8638225255972696, "grad_norm": 3.0007853507995605, "learning_rate": 0.00037872582480091014, "loss": 5.4573, "step": 5461 }, { "epoch": 1.8641638225255974, "grad_norm": 2.992143154144287, "learning_rate": 0.00037861205915813424, "loss": 6.2351, "step": 5462 }, { "epoch": 1.8645051194539248, "grad_norm": 2.8720524311065674, "learning_rate": 0.00037849829351535835, "loss": 5.7451, "step": 5463 }, { "epoch": 1.8648464163822527, "grad_norm": 3.13032603263855, "learning_rate": 0.0003783845278725825, "loss": 5.7272, "step": 5464 }, { "epoch": 1.86518771331058, "grad_norm": 3.009556293487549, "learning_rate": 0.0003782707622298066, "loss": 5.9469, "step": 5465 }, { "epoch": 1.865529010238908, "grad_norm": 2.857898712158203, "learning_rate": 0.0003781569965870307, "loss": 5.6815, "step": 5466 }, { "epoch": 1.8658703071672353, "grad_norm": 2.863910675048828, "learning_rate": 0.0003780432309442549, "loss": 6.0975, "step": 5467 }, { "epoch": 1.8662116040955632, "grad_norm": 2.9575035572052, "learning_rate": 0.000377929465301479, "loss": 6.5311, "step": 5468 }, { "epoch": 1.8665529010238908, "grad_norm": 3.4162585735321045, "learning_rate": 0.0003778156996587031, "loss": 5.9707, "step": 5469 }, { "epoch": 1.8668941979522184, "grad_norm": 3.0944206714630127, "learning_rate": 0.0003777019340159272, "loss": 6.2304, "step": 5470 }, { "epoch": 1.867235494880546, "grad_norm": 3.3516929149627686, "learning_rate": 0.0003775881683731513, "loss": 5.7523, "step": 5471 }, { "epoch": 1.8675767918088737, "grad_norm": 2.970231294631958, "learning_rate": 0.0003774744027303754, "loss": 5.5873, "step": 5472 }, { "epoch": 1.8679180887372013, "grad_norm": 3.073521137237549, "learning_rate": 0.00037736063708759956, "loss": 6.0575, "step": 5473 }, { "epoch": 1.868259385665529, "grad_norm": 2.949779748916626, "learning_rate": 0.00037724687144482366, "loss": 6.1106, "step": 5474 }, { "epoch": 1.8686006825938568, "grad_norm": 2.9631776809692383, "learning_rate": 0.00037713310580204777, "loss": 5.8668, "step": 5475 }, { "epoch": 1.8689419795221842, "grad_norm": 2.9993419647216797, "learning_rate": 0.00037701934015927193, "loss": 6.5943, "step": 5476 }, { "epoch": 1.869283276450512, "grad_norm": 2.911229133605957, "learning_rate": 0.00037690557451649603, "loss": 6.1717, "step": 5477 }, { "epoch": 1.8696245733788395, "grad_norm": 3.7616472244262695, "learning_rate": 0.0003767918088737202, "loss": 4.3073, "step": 5478 }, { "epoch": 1.8699658703071673, "grad_norm": 3.202282190322876, "learning_rate": 0.0003766780432309443, "loss": 5.8356, "step": 5479 }, { "epoch": 1.8703071672354947, "grad_norm": 3.0891096591949463, "learning_rate": 0.00037656427758816835, "loss": 4.9737, "step": 5480 }, { "epoch": 1.8706484641638226, "grad_norm": 1.8888578414916992, "learning_rate": 0.0003764505119453925, "loss": 2.898, "step": 5481 }, { "epoch": 1.8709897610921502, "grad_norm": 3.4496963024139404, "learning_rate": 0.0003763367463026166, "loss": 5.7924, "step": 5482 }, { "epoch": 1.8713310580204778, "grad_norm": 3.0166397094726562, "learning_rate": 0.0003762229806598407, "loss": 6.2951, "step": 5483 }, { "epoch": 1.8716723549488055, "grad_norm": 3.0307724475860596, "learning_rate": 0.0003761092150170649, "loss": 6.5138, "step": 5484 }, { "epoch": 1.872013651877133, "grad_norm": 2.994663953781128, "learning_rate": 0.000375995449374289, "loss": 6.3472, "step": 5485 }, { "epoch": 1.8723549488054607, "grad_norm": 2.9022762775421143, "learning_rate": 0.0003758816837315131, "loss": 5.8626, "step": 5486 }, { "epoch": 1.8726962457337883, "grad_norm": 2.9295828342437744, "learning_rate": 0.00037576791808873725, "loss": 5.9134, "step": 5487 }, { "epoch": 1.8730375426621162, "grad_norm": 2.866713047027588, "learning_rate": 0.00037565415244596135, "loss": 6.403, "step": 5488 }, { "epoch": 1.8733788395904436, "grad_norm": 2.9316976070404053, "learning_rate": 0.0003755403868031854, "loss": 5.9694, "step": 5489 }, { "epoch": 1.8737201365187715, "grad_norm": 3.494288682937622, "learning_rate": 0.00037542662116040956, "loss": 6.0734, "step": 5490 }, { "epoch": 1.8740614334470989, "grad_norm": 7.175894260406494, "learning_rate": 0.00037531285551763367, "loss": 5.6944, "step": 5491 }, { "epoch": 1.8744027303754267, "grad_norm": 3.8673219680786133, "learning_rate": 0.00037519908987485777, "loss": 6.0276, "step": 5492 }, { "epoch": 1.8747440273037541, "grad_norm": 3.381589412689209, "learning_rate": 0.00037508532423208193, "loss": 5.8487, "step": 5493 }, { "epoch": 1.875085324232082, "grad_norm": 3.1894888877868652, "learning_rate": 0.00037497155858930603, "loss": 6.3183, "step": 5494 }, { "epoch": 1.8754266211604096, "grad_norm": 3.0183558464050293, "learning_rate": 0.00037485779294653014, "loss": 6.1961, "step": 5495 }, { "epoch": 1.8757679180887372, "grad_norm": 2.9099154472351074, "learning_rate": 0.0003747440273037543, "loss": 6.3843, "step": 5496 }, { "epoch": 1.8761092150170648, "grad_norm": 3.4501168727874756, "learning_rate": 0.0003746302616609784, "loss": 5.7031, "step": 5497 }, { "epoch": 1.8764505119453925, "grad_norm": 4.108460426330566, "learning_rate": 0.0003745164960182025, "loss": 5.5165, "step": 5498 }, { "epoch": 1.87679180887372, "grad_norm": 2.9657464027404785, "learning_rate": 0.0003744027303754266, "loss": 6.6402, "step": 5499 }, { "epoch": 1.8771331058020477, "grad_norm": 2.9928600788116455, "learning_rate": 0.0003742889647326507, "loss": 5.5103, "step": 5500 }, { "epoch": 1.8774744027303756, "grad_norm": 2.88197922706604, "learning_rate": 0.0003741751990898749, "loss": 6.0788, "step": 5501 }, { "epoch": 1.877815699658703, "grad_norm": 2.8990108966827393, "learning_rate": 0.000374061433447099, "loss": 5.9397, "step": 5502 }, { "epoch": 1.8781569965870308, "grad_norm": 6.239475250244141, "learning_rate": 0.0003739476678043231, "loss": 4.31, "step": 5503 }, { "epoch": 1.8784982935153582, "grad_norm": 2.9518520832061768, "learning_rate": 0.00037383390216154725, "loss": 6.4256, "step": 5504 }, { "epoch": 1.878839590443686, "grad_norm": 3.101850748062134, "learning_rate": 0.00037372013651877135, "loss": 6.2821, "step": 5505 }, { "epoch": 1.8791808873720135, "grad_norm": 2.954547643661499, "learning_rate": 0.00037360637087599546, "loss": 5.9507, "step": 5506 }, { "epoch": 1.8795221843003413, "grad_norm": 3.1224567890167236, "learning_rate": 0.0003734926052332196, "loss": 6.0534, "step": 5507 }, { "epoch": 1.879863481228669, "grad_norm": 2.9866816997528076, "learning_rate": 0.0003733788395904437, "loss": 6.8862, "step": 5508 }, { "epoch": 1.8802047781569966, "grad_norm": 2.98016095161438, "learning_rate": 0.00037326507394766777, "loss": 6.7283, "step": 5509 }, { "epoch": 1.8805460750853242, "grad_norm": 2.978584051132202, "learning_rate": 0.00037315130830489193, "loss": 5.4551, "step": 5510 }, { "epoch": 1.8808873720136519, "grad_norm": 3.2457802295684814, "learning_rate": 0.00037303754266211603, "loss": 6.5083, "step": 5511 }, { "epoch": 1.8812286689419795, "grad_norm": 2.834044933319092, "learning_rate": 0.00037292377701934014, "loss": 6.3825, "step": 5512 }, { "epoch": 1.8815699658703071, "grad_norm": 3.000852346420288, "learning_rate": 0.0003728100113765643, "loss": 5.9047, "step": 5513 }, { "epoch": 1.881911262798635, "grad_norm": 3.954674482345581, "learning_rate": 0.0003726962457337884, "loss": 5.7054, "step": 5514 }, { "epoch": 1.8822525597269624, "grad_norm": 3.103377103805542, "learning_rate": 0.0003725824800910125, "loss": 6.532, "step": 5515 }, { "epoch": 1.8825938566552902, "grad_norm": 3.011568546295166, "learning_rate": 0.00037246871444823667, "loss": 6.4145, "step": 5516 }, { "epoch": 1.8829351535836176, "grad_norm": 2.94783091545105, "learning_rate": 0.0003723549488054608, "loss": 5.7669, "step": 5517 }, { "epoch": 1.8832764505119455, "grad_norm": 3.0503170490264893, "learning_rate": 0.0003722411831626849, "loss": 6.1408, "step": 5518 }, { "epoch": 1.8836177474402729, "grad_norm": 2.829469919204712, "learning_rate": 0.000372127417519909, "loss": 6.3711, "step": 5519 }, { "epoch": 1.8839590443686007, "grad_norm": 2.903998374938965, "learning_rate": 0.0003720136518771331, "loss": 6.3651, "step": 5520 }, { "epoch": 1.8843003412969284, "grad_norm": 3.0705928802490234, "learning_rate": 0.00037189988623435725, "loss": 5.9707, "step": 5521 }, { "epoch": 1.884641638225256, "grad_norm": 3.0179994106292725, "learning_rate": 0.00037178612059158135, "loss": 5.813, "step": 5522 }, { "epoch": 1.8849829351535836, "grad_norm": 3.0558056831359863, "learning_rate": 0.00037167235494880546, "loss": 5.9914, "step": 5523 }, { "epoch": 1.8853242320819112, "grad_norm": 2.9075541496276855, "learning_rate": 0.0003715585893060296, "loss": 6.2369, "step": 5524 }, { "epoch": 1.8856655290102389, "grad_norm": 2.8797600269317627, "learning_rate": 0.0003714448236632537, "loss": 6.2321, "step": 5525 }, { "epoch": 1.8860068259385665, "grad_norm": 2.88400936126709, "learning_rate": 0.0003713310580204778, "loss": 5.2605, "step": 5526 }, { "epoch": 1.8863481228668944, "grad_norm": 3.1179773807525635, "learning_rate": 0.000371217292377702, "loss": 6.1447, "step": 5527 }, { "epoch": 1.8866894197952218, "grad_norm": 2.8629674911499023, "learning_rate": 0.00037110352673492604, "loss": 6.4715, "step": 5528 }, { "epoch": 1.8870307167235496, "grad_norm": 2.896420478820801, "learning_rate": 0.00037098976109215014, "loss": 6.3566, "step": 5529 }, { "epoch": 1.887372013651877, "grad_norm": 2.9856109619140625, "learning_rate": 0.0003708759954493743, "loss": 6.3204, "step": 5530 }, { "epoch": 1.8877133105802049, "grad_norm": 3.28257417678833, "learning_rate": 0.0003707622298065984, "loss": 5.3285, "step": 5531 }, { "epoch": 1.8880546075085323, "grad_norm": 2.9387850761413574, "learning_rate": 0.0003706484641638225, "loss": 6.0632, "step": 5532 }, { "epoch": 1.8883959044368601, "grad_norm": 2.9324870109558105, "learning_rate": 0.00037053469852104667, "loss": 5.8282, "step": 5533 }, { "epoch": 1.8887372013651877, "grad_norm": 2.979710340499878, "learning_rate": 0.0003704209328782708, "loss": 6.2292, "step": 5534 }, { "epoch": 1.8890784982935154, "grad_norm": 2.9082748889923096, "learning_rate": 0.0003703071672354949, "loss": 6.0311, "step": 5535 }, { "epoch": 1.889419795221843, "grad_norm": 2.8582496643066406, "learning_rate": 0.00037019340159271904, "loss": 6.5768, "step": 5536 }, { "epoch": 1.8897610921501706, "grad_norm": 3.1130855083465576, "learning_rate": 0.00037007963594994314, "loss": 5.4812, "step": 5537 }, { "epoch": 1.8901023890784983, "grad_norm": 3.0239624977111816, "learning_rate": 0.0003699658703071672, "loss": 6.754, "step": 5538 }, { "epoch": 1.8904436860068259, "grad_norm": 2.8626439571380615, "learning_rate": 0.00036985210466439135, "loss": 6.0451, "step": 5539 }, { "epoch": 1.8907849829351537, "grad_norm": 3.52713942527771, "learning_rate": 0.00036973833902161546, "loss": 5.5374, "step": 5540 }, { "epoch": 1.8911262798634811, "grad_norm": 3.0411159992218018, "learning_rate": 0.0003696245733788396, "loss": 6.253, "step": 5541 }, { "epoch": 1.891467576791809, "grad_norm": 3.1052937507629395, "learning_rate": 0.0003695108077360637, "loss": 5.3534, "step": 5542 }, { "epoch": 1.8918088737201364, "grad_norm": 3.007514715194702, "learning_rate": 0.0003693970420932878, "loss": 5.9601, "step": 5543 }, { "epoch": 1.8921501706484642, "grad_norm": 3.2602505683898926, "learning_rate": 0.000369283276450512, "loss": 5.6877, "step": 5544 }, { "epoch": 1.8924914675767917, "grad_norm": 3.1978507041931152, "learning_rate": 0.0003691695108077361, "loss": 5.6045, "step": 5545 }, { "epoch": 1.8928327645051195, "grad_norm": 2.927302360534668, "learning_rate": 0.0003690557451649602, "loss": 5.7842, "step": 5546 }, { "epoch": 1.8931740614334471, "grad_norm": 3.1250858306884766, "learning_rate": 0.00036894197952218435, "loss": 5.8389, "step": 5547 }, { "epoch": 1.8935153583617748, "grad_norm": 3.004615545272827, "learning_rate": 0.0003688282138794084, "loss": 6.1601, "step": 5548 }, { "epoch": 1.8938566552901024, "grad_norm": 3.0711987018585205, "learning_rate": 0.0003687144482366325, "loss": 6.316, "step": 5549 }, { "epoch": 1.89419795221843, "grad_norm": 3.4832606315612793, "learning_rate": 0.00036860068259385667, "loss": 5.1936, "step": 5550 }, { "epoch": 1.8945392491467576, "grad_norm": 3.176182508468628, "learning_rate": 0.0003684869169510808, "loss": 5.5715, "step": 5551 }, { "epoch": 1.8948805460750853, "grad_norm": 2.930396318435669, "learning_rate": 0.0003683731513083049, "loss": 6.3699, "step": 5552 }, { "epoch": 1.8952218430034131, "grad_norm": 2.8957557678222656, "learning_rate": 0.00036825938566552904, "loss": 5.121, "step": 5553 }, { "epoch": 1.8955631399317405, "grad_norm": 2.885211229324341, "learning_rate": 0.00036814562002275314, "loss": 6.0966, "step": 5554 }, { "epoch": 1.8959044368600684, "grad_norm": 3.3072848320007324, "learning_rate": 0.00036803185437997725, "loss": 5.6025, "step": 5555 }, { "epoch": 1.8962457337883958, "grad_norm": 3.156798839569092, "learning_rate": 0.0003679180887372014, "loss": 6.0641, "step": 5556 }, { "epoch": 1.8965870307167236, "grad_norm": 11.321859359741211, "learning_rate": 0.00036780432309442546, "loss": 5.4411, "step": 5557 }, { "epoch": 1.896928327645051, "grad_norm": 2.977379322052002, "learning_rate": 0.00036769055745164956, "loss": 6.6542, "step": 5558 }, { "epoch": 1.8972696245733789, "grad_norm": 3.0141522884368896, "learning_rate": 0.0003675767918088737, "loss": 6.0875, "step": 5559 }, { "epoch": 1.8976109215017065, "grad_norm": 2.9410688877105713, "learning_rate": 0.00036746302616609783, "loss": 6.4585, "step": 5560 }, { "epoch": 1.8979522184300341, "grad_norm": 3.0146484375, "learning_rate": 0.000367349260523322, "loss": 6.1579, "step": 5561 }, { "epoch": 1.8982935153583618, "grad_norm": 2.873905897140503, "learning_rate": 0.0003672354948805461, "loss": 5.932, "step": 5562 }, { "epoch": 1.8986348122866894, "grad_norm": 2.8637454509735107, "learning_rate": 0.0003671217292377702, "loss": 6.2974, "step": 5563 }, { "epoch": 1.898976109215017, "grad_norm": 3.0655555725097656, "learning_rate": 0.00036700796359499436, "loss": 5.8897, "step": 5564 }, { "epoch": 1.8993174061433447, "grad_norm": 2.8714025020599365, "learning_rate": 0.00036689419795221846, "loss": 6.1978, "step": 5565 }, { "epoch": 1.8996587030716725, "grad_norm": 2.971374034881592, "learning_rate": 0.00036678043230944257, "loss": 5.2991, "step": 5566 }, { "epoch": 1.9, "grad_norm": 2.9192891120910645, "learning_rate": 0.00036666666666666667, "loss": 5.9472, "step": 5567 }, { "epoch": 1.9003412969283278, "grad_norm": 3.0181756019592285, "learning_rate": 0.0003665529010238908, "loss": 6.4597, "step": 5568 }, { "epoch": 1.9006825938566552, "grad_norm": 2.9252567291259766, "learning_rate": 0.0003664391353811149, "loss": 6.3444, "step": 5569 }, { "epoch": 1.901023890784983, "grad_norm": 3.165065050125122, "learning_rate": 0.00036632536973833904, "loss": 5.9354, "step": 5570 }, { "epoch": 1.9013651877133104, "grad_norm": 2.935777425765991, "learning_rate": 0.00036621160409556314, "loss": 6.4579, "step": 5571 }, { "epoch": 1.9017064846416383, "grad_norm": 2.785609722137451, "learning_rate": 0.00036609783845278725, "loss": 5.7246, "step": 5572 }, { "epoch": 1.902047781569966, "grad_norm": 2.8585057258605957, "learning_rate": 0.0003659840728100114, "loss": 6.8029, "step": 5573 }, { "epoch": 1.9023890784982935, "grad_norm": 2.8563308715820312, "learning_rate": 0.0003658703071672355, "loss": 6.3342, "step": 5574 }, { "epoch": 1.9027303754266212, "grad_norm": 9.50164794921875, "learning_rate": 0.0003657565415244596, "loss": 5.5615, "step": 5575 }, { "epoch": 1.9030716723549488, "grad_norm": 2.983273983001709, "learning_rate": 0.0003656427758816838, "loss": 6.0595, "step": 5576 }, { "epoch": 1.9034129692832764, "grad_norm": 2.895350217819214, "learning_rate": 0.00036552901023890783, "loss": 6.2199, "step": 5577 }, { "epoch": 1.903754266211604, "grad_norm": 3.4141218662261963, "learning_rate": 0.00036541524459613193, "loss": 6.1079, "step": 5578 }, { "epoch": 1.904095563139932, "grad_norm": 3.1629059314727783, "learning_rate": 0.0003653014789533561, "loss": 5.895, "step": 5579 }, { "epoch": 1.9044368600682593, "grad_norm": 2.89243483543396, "learning_rate": 0.0003651877133105802, "loss": 5.857, "step": 5580 }, { "epoch": 1.9047781569965871, "grad_norm": 2.929072618484497, "learning_rate": 0.0003650739476678043, "loss": 6.2954, "step": 5581 }, { "epoch": 1.9051194539249146, "grad_norm": 3.005039691925049, "learning_rate": 0.00036496018202502846, "loss": 6.2573, "step": 5582 }, { "epoch": 1.9054607508532424, "grad_norm": 3.169196367263794, "learning_rate": 0.00036484641638225257, "loss": 5.6108, "step": 5583 }, { "epoch": 1.9058020477815698, "grad_norm": 2.895752429962158, "learning_rate": 0.0003647326507394767, "loss": 6.3867, "step": 5584 }, { "epoch": 1.9061433447098977, "grad_norm": 3.001361131668091, "learning_rate": 0.00036461888509670083, "loss": 6.3686, "step": 5585 }, { "epoch": 1.9064846416382253, "grad_norm": 2.938120126724243, "learning_rate": 0.00036450511945392494, "loss": 6.4083, "step": 5586 }, { "epoch": 1.906825938566553, "grad_norm": 2.791254758834839, "learning_rate": 0.00036439135381114904, "loss": 6.2613, "step": 5587 }, { "epoch": 1.9071672354948805, "grad_norm": 3.121710777282715, "learning_rate": 0.00036427758816837315, "loss": 5.9979, "step": 5588 }, { "epoch": 1.9075085324232082, "grad_norm": 3.0421764850616455, "learning_rate": 0.00036416382252559725, "loss": 5.7314, "step": 5589 }, { "epoch": 1.9078498293515358, "grad_norm": 3.0027949810028076, "learning_rate": 0.0003640500568828214, "loss": 6.3889, "step": 5590 }, { "epoch": 1.9081911262798634, "grad_norm": 2.965477466583252, "learning_rate": 0.0003639362912400455, "loss": 6.4909, "step": 5591 }, { "epoch": 1.9085324232081913, "grad_norm": 3.089905023574829, "learning_rate": 0.0003638225255972696, "loss": 6.4991, "step": 5592 }, { "epoch": 1.9088737201365187, "grad_norm": 2.832148551940918, "learning_rate": 0.0003637087599544938, "loss": 6.2179, "step": 5593 }, { "epoch": 1.9092150170648465, "grad_norm": 2.9781718254089355, "learning_rate": 0.0003635949943117179, "loss": 6.366, "step": 5594 }, { "epoch": 1.909556313993174, "grad_norm": 2.9966466426849365, "learning_rate": 0.000363481228668942, "loss": 6.1874, "step": 5595 }, { "epoch": 1.9098976109215018, "grad_norm": 2.9594128131866455, "learning_rate": 0.0003633674630261661, "loss": 6.2093, "step": 5596 }, { "epoch": 1.9102389078498292, "grad_norm": 2.8735949993133545, "learning_rate": 0.0003632536973833902, "loss": 6.6883, "step": 5597 }, { "epoch": 1.910580204778157, "grad_norm": 3.143655300140381, "learning_rate": 0.0003631399317406143, "loss": 5.5111, "step": 5598 }, { "epoch": 1.9109215017064847, "grad_norm": 2.9463629722595215, "learning_rate": 0.00036302616609783846, "loss": 6.3481, "step": 5599 }, { "epoch": 1.9112627986348123, "grad_norm": 3.0451953411102295, "learning_rate": 0.00036291240045506257, "loss": 6.3788, "step": 5600 }, { "epoch": 1.91160409556314, "grad_norm": 2.9045491218566895, "learning_rate": 0.00036279863481228667, "loss": 6.6246, "step": 5601 }, { "epoch": 1.9119453924914676, "grad_norm": 2.970602035522461, "learning_rate": 0.00036268486916951083, "loss": 6.1013, "step": 5602 }, { "epoch": 1.9122866894197952, "grad_norm": 2.88826847076416, "learning_rate": 0.00036257110352673494, "loss": 6.2451, "step": 5603 }, { "epoch": 1.9126279863481228, "grad_norm": 2.8405919075012207, "learning_rate": 0.0003624573378839591, "loss": 6.4149, "step": 5604 }, { "epoch": 1.9129692832764507, "grad_norm": 2.8814759254455566, "learning_rate": 0.0003623435722411832, "loss": 5.613, "step": 5605 }, { "epoch": 1.913310580204778, "grad_norm": 2.8033180236816406, "learning_rate": 0.00036222980659840725, "loss": 5.9609, "step": 5606 }, { "epoch": 1.913651877133106, "grad_norm": 2.9600167274475098, "learning_rate": 0.0003621160409556314, "loss": 6.6377, "step": 5607 }, { "epoch": 1.9139931740614333, "grad_norm": 2.917205572128296, "learning_rate": 0.0003620022753128555, "loss": 5.7594, "step": 5608 }, { "epoch": 1.9143344709897612, "grad_norm": 2.8245956897735596, "learning_rate": 0.0003618885096700796, "loss": 6.1144, "step": 5609 }, { "epoch": 1.9146757679180886, "grad_norm": 6.755703449249268, "learning_rate": 0.0003617747440273038, "loss": 5.8463, "step": 5610 }, { "epoch": 1.9150170648464164, "grad_norm": 2.968301773071289, "learning_rate": 0.0003616609783845279, "loss": 6.0244, "step": 5611 }, { "epoch": 1.915358361774744, "grad_norm": 3.3348543643951416, "learning_rate": 0.000361547212741752, "loss": 4.6684, "step": 5612 }, { "epoch": 1.9156996587030717, "grad_norm": 3.070195436477661, "learning_rate": 0.00036143344709897615, "loss": 5.955, "step": 5613 }, { "epoch": 1.9160409556313993, "grad_norm": 3.008603572845459, "learning_rate": 0.00036131968145620025, "loss": 6.47, "step": 5614 }, { "epoch": 1.916382252559727, "grad_norm": 3.069282293319702, "learning_rate": 0.00036120591581342436, "loss": 5.9866, "step": 5615 }, { "epoch": 1.9167235494880546, "grad_norm": 2.977428913116455, "learning_rate": 0.00036109215017064846, "loss": 6.5193, "step": 5616 }, { "epoch": 1.9170648464163822, "grad_norm": 2.9662117958068848, "learning_rate": 0.00036097838452787257, "loss": 5.8879, "step": 5617 }, { "epoch": 1.91740614334471, "grad_norm": 3.1188912391662598, "learning_rate": 0.0003608646188850967, "loss": 5.7213, "step": 5618 }, { "epoch": 1.9177474402730375, "grad_norm": 2.914436101913452, "learning_rate": 0.00036075085324232083, "loss": 6.7126, "step": 5619 }, { "epoch": 1.9180887372013653, "grad_norm": 2.956573247909546, "learning_rate": 0.00036063708759954494, "loss": 6.3765, "step": 5620 }, { "epoch": 1.9184300341296927, "grad_norm": 2.9372453689575195, "learning_rate": 0.00036052332195676904, "loss": 6.046, "step": 5621 }, { "epoch": 1.9187713310580206, "grad_norm": 2.8803412914276123, "learning_rate": 0.0003604095563139932, "loss": 7.05, "step": 5622 }, { "epoch": 1.919112627986348, "grad_norm": 3.022796392440796, "learning_rate": 0.0003602957906712173, "loss": 6.3435, "step": 5623 }, { "epoch": 1.9194539249146758, "grad_norm": 2.9055187702178955, "learning_rate": 0.00036018202502844147, "loss": 6.021, "step": 5624 }, { "epoch": 1.9197952218430034, "grad_norm": 3.274238348007202, "learning_rate": 0.00036006825938566557, "loss": 5.819, "step": 5625 }, { "epoch": 1.920136518771331, "grad_norm": 3.3062515258789062, "learning_rate": 0.0003599544937428896, "loss": 6.3418, "step": 5626 }, { "epoch": 1.9204778156996587, "grad_norm": 3.065157890319824, "learning_rate": 0.0003598407281001138, "loss": 6.9273, "step": 5627 }, { "epoch": 1.9208191126279863, "grad_norm": 3.0245931148529053, "learning_rate": 0.0003597269624573379, "loss": 6.6343, "step": 5628 }, { "epoch": 1.921160409556314, "grad_norm": 3.0174522399902344, "learning_rate": 0.000359613196814562, "loss": 5.9226, "step": 5629 }, { "epoch": 1.9215017064846416, "grad_norm": 2.8802099227905273, "learning_rate": 0.00035949943117178615, "loss": 6.4793, "step": 5630 }, { "epoch": 1.9218430034129694, "grad_norm": 3.4380247592926025, "learning_rate": 0.00035938566552901025, "loss": 5.8155, "step": 5631 }, { "epoch": 1.9221843003412968, "grad_norm": 2.8854217529296875, "learning_rate": 0.00035927189988623436, "loss": 6.4598, "step": 5632 }, { "epoch": 1.9225255972696247, "grad_norm": 3.8663971424102783, "learning_rate": 0.0003591581342434585, "loss": 4.1946, "step": 5633 }, { "epoch": 1.922866894197952, "grad_norm": 2.801706314086914, "learning_rate": 0.0003590443686006826, "loss": 4.3066, "step": 5634 }, { "epoch": 1.92320819112628, "grad_norm": 3.0731019973754883, "learning_rate": 0.0003589306029579067, "loss": 6.1641, "step": 5635 }, { "epoch": 1.9235494880546073, "grad_norm": 3.997176170349121, "learning_rate": 0.00035881683731513083, "loss": 5.2667, "step": 5636 }, { "epoch": 1.9238907849829352, "grad_norm": 3.5250935554504395, "learning_rate": 0.00035870307167235494, "loss": 3.3671, "step": 5637 }, { "epoch": 1.9242320819112628, "grad_norm": 3.1355345249176025, "learning_rate": 0.00035858930602957904, "loss": 5.3227, "step": 5638 }, { "epoch": 1.9245733788395905, "grad_norm": 3.095639228820801, "learning_rate": 0.0003584755403868032, "loss": 6.4683, "step": 5639 }, { "epoch": 1.924914675767918, "grad_norm": 3.36194109916687, "learning_rate": 0.0003583617747440273, "loss": 6.0146, "step": 5640 }, { "epoch": 1.9252559726962457, "grad_norm": 3.3805904388427734, "learning_rate": 0.0003582480091012514, "loss": 5.8779, "step": 5641 }, { "epoch": 1.9255972696245733, "grad_norm": 3.0124683380126953, "learning_rate": 0.00035813424345847557, "loss": 6.5908, "step": 5642 }, { "epoch": 1.925938566552901, "grad_norm": 3.2803475856781006, "learning_rate": 0.0003580204778156997, "loss": 5.7909, "step": 5643 }, { "epoch": 1.9262798634812288, "grad_norm": 4.804317951202393, "learning_rate": 0.00035790671217292384, "loss": 5.5523, "step": 5644 }, { "epoch": 1.9266211604095562, "grad_norm": 2.983696460723877, "learning_rate": 0.0003577929465301479, "loss": 6.0915, "step": 5645 }, { "epoch": 1.926962457337884, "grad_norm": 2.929771900177002, "learning_rate": 0.000357679180887372, "loss": 5.4666, "step": 5646 }, { "epoch": 1.9273037542662115, "grad_norm": 4.5054731369018555, "learning_rate": 0.00035756541524459615, "loss": 5.3908, "step": 5647 }, { "epoch": 1.9276450511945393, "grad_norm": 2.8603098392486572, "learning_rate": 0.00035745164960182026, "loss": 6.3809, "step": 5648 }, { "epoch": 1.9279863481228667, "grad_norm": 2.934943675994873, "learning_rate": 0.00035733788395904436, "loss": 5.8931, "step": 5649 }, { "epoch": 1.9283276450511946, "grad_norm": 3.016292095184326, "learning_rate": 0.0003572241183162685, "loss": 5.6018, "step": 5650 }, { "epoch": 1.9286689419795222, "grad_norm": 3.0841245651245117, "learning_rate": 0.0003571103526734926, "loss": 5.5162, "step": 5651 }, { "epoch": 1.9290102389078498, "grad_norm": 2.870736837387085, "learning_rate": 0.00035699658703071673, "loss": 6.1772, "step": 5652 }, { "epoch": 1.9293515358361775, "grad_norm": 6.509230136871338, "learning_rate": 0.0003568828213879409, "loss": 5.6765, "step": 5653 }, { "epoch": 1.929692832764505, "grad_norm": 2.972872257232666, "learning_rate": 0.000356769055745165, "loss": 6.6489, "step": 5654 }, { "epoch": 1.9300341296928327, "grad_norm": 3.467580556869507, "learning_rate": 0.00035665529010238904, "loss": 5.2504, "step": 5655 }, { "epoch": 1.9303754266211604, "grad_norm": 3.250774621963501, "learning_rate": 0.0003565415244596132, "loss": 5.4942, "step": 5656 }, { "epoch": 1.9307167235494882, "grad_norm": 2.9641740322113037, "learning_rate": 0.0003564277588168373, "loss": 6.0985, "step": 5657 }, { "epoch": 1.9310580204778156, "grad_norm": 2.939337730407715, "learning_rate": 0.0003563139931740614, "loss": 5.0818, "step": 5658 }, { "epoch": 1.9313993174061435, "grad_norm": 3.126194715499878, "learning_rate": 0.00035620022753128557, "loss": 6.1819, "step": 5659 }, { "epoch": 1.9317406143344709, "grad_norm": 5.255848407745361, "learning_rate": 0.0003560864618885097, "loss": 5.0863, "step": 5660 }, { "epoch": 1.9320819112627987, "grad_norm": 3.117389678955078, "learning_rate": 0.0003559726962457338, "loss": 6.127, "step": 5661 }, { "epoch": 1.9324232081911261, "grad_norm": 2.9117963314056396, "learning_rate": 0.00035585893060295794, "loss": 6.263, "step": 5662 }, { "epoch": 1.932764505119454, "grad_norm": 3.053349733352661, "learning_rate": 0.00035574516496018205, "loss": 6.0983, "step": 5663 }, { "epoch": 1.9331058020477816, "grad_norm": 3.072718381881714, "learning_rate": 0.0003556313993174061, "loss": 5.3886, "step": 5664 }, { "epoch": 1.9334470989761092, "grad_norm": 10.232709884643555, "learning_rate": 0.00035551763367463026, "loss": 6.7737, "step": 5665 }, { "epoch": 1.9337883959044369, "grad_norm": 2.86478328704834, "learning_rate": 0.00035540386803185436, "loss": 5.8319, "step": 5666 }, { "epoch": 1.9341296928327645, "grad_norm": 2.977330446243286, "learning_rate": 0.0003552901023890785, "loss": 6.3019, "step": 5667 }, { "epoch": 1.934470989761092, "grad_norm": 3.6822073459625244, "learning_rate": 0.0003551763367463026, "loss": 3.6634, "step": 5668 }, { "epoch": 1.9348122866894197, "grad_norm": 2.952073097229004, "learning_rate": 0.00035506257110352673, "loss": 6.0564, "step": 5669 }, { "epoch": 1.9351535836177476, "grad_norm": 3.094076156616211, "learning_rate": 0.0003549488054607509, "loss": 6.3278, "step": 5670 }, { "epoch": 1.935494880546075, "grad_norm": 3.062957763671875, "learning_rate": 0.000354835039817975, "loss": 6.9028, "step": 5671 }, { "epoch": 1.9358361774744028, "grad_norm": 3.019155502319336, "learning_rate": 0.0003547212741751991, "loss": 5.549, "step": 5672 }, { "epoch": 1.9361774744027302, "grad_norm": 2.855597734451294, "learning_rate": 0.00035460750853242326, "loss": 6.4601, "step": 5673 }, { "epoch": 1.936518771331058, "grad_norm": 2.83564829826355, "learning_rate": 0.0003544937428896473, "loss": 6.4995, "step": 5674 }, { "epoch": 1.9368600682593855, "grad_norm": 2.9659066200256348, "learning_rate": 0.0003543799772468714, "loss": 6.5664, "step": 5675 }, { "epoch": 1.9372013651877134, "grad_norm": 2.9743475914001465, "learning_rate": 0.0003542662116040956, "loss": 7.1904, "step": 5676 }, { "epoch": 1.937542662116041, "grad_norm": 8.094204902648926, "learning_rate": 0.0003541524459613197, "loss": 5.402, "step": 5677 }, { "epoch": 1.9378839590443686, "grad_norm": 3.036323070526123, "learning_rate": 0.0003540386803185438, "loss": 6.4258, "step": 5678 }, { "epoch": 1.9382252559726962, "grad_norm": 3.9837539196014404, "learning_rate": 0.00035392491467576794, "loss": 4.7752, "step": 5679 }, { "epoch": 1.9385665529010239, "grad_norm": 3.116345167160034, "learning_rate": 0.00035381114903299205, "loss": 6.3365, "step": 5680 }, { "epoch": 1.9389078498293515, "grad_norm": 2.885256290435791, "learning_rate": 0.00035369738339021615, "loss": 6.3768, "step": 5681 }, { "epoch": 1.9392491467576791, "grad_norm": 2.9523653984069824, "learning_rate": 0.0003535836177474403, "loss": 6.459, "step": 5682 }, { "epoch": 1.939590443686007, "grad_norm": 3.4799153804779053, "learning_rate": 0.0003534698521046644, "loss": 5.524, "step": 5683 }, { "epoch": 1.9399317406143344, "grad_norm": 2.8675577640533447, "learning_rate": 0.00035335608646188847, "loss": 6.0246, "step": 5684 }, { "epoch": 1.9402730375426622, "grad_norm": 2.8474411964416504, "learning_rate": 0.0003532423208191126, "loss": 6.6248, "step": 5685 }, { "epoch": 1.9406143344709896, "grad_norm": 2.8228769302368164, "learning_rate": 0.00035312855517633673, "loss": 5.826, "step": 5686 }, { "epoch": 1.9409556313993175, "grad_norm": 2.9075095653533936, "learning_rate": 0.0003530147895335609, "loss": 6.5578, "step": 5687 }, { "epoch": 1.9412969283276449, "grad_norm": 2.9296059608459473, "learning_rate": 0.000352901023890785, "loss": 6.1996, "step": 5688 }, { "epoch": 1.9416382252559727, "grad_norm": 3.073141098022461, "learning_rate": 0.0003527872582480091, "loss": 5.9504, "step": 5689 }, { "epoch": 1.9419795221843004, "grad_norm": 2.9059715270996094, "learning_rate": 0.00035267349260523326, "loss": 6.5117, "step": 5690 }, { "epoch": 1.942320819112628, "grad_norm": 2.9183788299560547, "learning_rate": 0.00035255972696245736, "loss": 6.7984, "step": 5691 }, { "epoch": 1.9426621160409556, "grad_norm": 4.353701591491699, "learning_rate": 0.00035244596131968147, "loss": 4.3046, "step": 5692 }, { "epoch": 1.9430034129692833, "grad_norm": 2.997781753540039, "learning_rate": 0.00035233219567690563, "loss": 6.7823, "step": 5693 }, { "epoch": 1.9433447098976109, "grad_norm": 3.029731512069702, "learning_rate": 0.0003522184300341297, "loss": 6.3343, "step": 5694 }, { "epoch": 1.9436860068259385, "grad_norm": 3.035346746444702, "learning_rate": 0.0003521046643913538, "loss": 6.6839, "step": 5695 }, { "epoch": 1.9440273037542664, "grad_norm": 2.9383420944213867, "learning_rate": 0.00035199089874857794, "loss": 5.8788, "step": 5696 }, { "epoch": 1.9443686006825938, "grad_norm": 2.8888626098632812, "learning_rate": 0.00035187713310580205, "loss": 6.6803, "step": 5697 }, { "epoch": 1.9447098976109216, "grad_norm": 2.8905954360961914, "learning_rate": 0.00035176336746302615, "loss": 6.7251, "step": 5698 }, { "epoch": 1.945051194539249, "grad_norm": 2.848459482192993, "learning_rate": 0.0003516496018202503, "loss": 6.3719, "step": 5699 }, { "epoch": 1.9453924914675769, "grad_norm": 6.845625877380371, "learning_rate": 0.0003515358361774744, "loss": 5.3028, "step": 5700 }, { "epoch": 1.9457337883959043, "grad_norm": 2.9232285022735596, "learning_rate": 0.0003514220705346985, "loss": 6.7182, "step": 5701 }, { "epoch": 1.9460750853242321, "grad_norm": 2.9307868480682373, "learning_rate": 0.0003513083048919227, "loss": 5.891, "step": 5702 }, { "epoch": 1.9464163822525598, "grad_norm": 4.067267894744873, "learning_rate": 0.00035119453924914673, "loss": 5.0473, "step": 5703 }, { "epoch": 1.9467576791808874, "grad_norm": 3.438600778579712, "learning_rate": 0.00035108077360637084, "loss": 4.1299, "step": 5704 }, { "epoch": 1.947098976109215, "grad_norm": 2.9375200271606445, "learning_rate": 0.000350967007963595, "loss": 6.3161, "step": 5705 }, { "epoch": 1.9474402730375426, "grad_norm": 3.0774381160736084, "learning_rate": 0.0003508532423208191, "loss": 6.826, "step": 5706 }, { "epoch": 1.9477815699658703, "grad_norm": 2.943984270095825, "learning_rate": 0.00035073947667804326, "loss": 6.4722, "step": 5707 }, { "epoch": 1.948122866894198, "grad_norm": 2.850332021713257, "learning_rate": 0.00035062571103526737, "loss": 6.3759, "step": 5708 }, { "epoch": 1.9484641638225257, "grad_norm": 2.926992177963257, "learning_rate": 0.00035051194539249147, "loss": 5.968, "step": 5709 }, { "epoch": 1.9488054607508531, "grad_norm": 2.8328535556793213, "learning_rate": 0.00035039817974971563, "loss": 6.1558, "step": 5710 }, { "epoch": 1.949146757679181, "grad_norm": 2.906438112258911, "learning_rate": 0.00035028441410693973, "loss": 6.7331, "step": 5711 }, { "epoch": 1.9494880546075084, "grad_norm": 2.8392958641052246, "learning_rate": 0.00035017064846416384, "loss": 6.4798, "step": 5712 }, { "epoch": 1.9498293515358363, "grad_norm": 3.147331476211548, "learning_rate": 0.00035005688282138794, "loss": 5.3819, "step": 5713 }, { "epoch": 1.9501706484641637, "grad_norm": 2.8738808631896973, "learning_rate": 0.00034994311717861205, "loss": 6.0735, "step": 5714 }, { "epoch": 1.9505119453924915, "grad_norm": 2.8813462257385254, "learning_rate": 0.00034982935153583615, "loss": 6.228, "step": 5715 }, { "epoch": 1.9508532423208191, "grad_norm": 2.9783530235290527, "learning_rate": 0.0003497155858930603, "loss": 6.3557, "step": 5716 }, { "epoch": 1.9511945392491468, "grad_norm": 2.8971831798553467, "learning_rate": 0.0003496018202502844, "loss": 6.5061, "step": 5717 }, { "epoch": 1.9515358361774744, "grad_norm": 7.339874267578125, "learning_rate": 0.0003494880546075085, "loss": 5.3234, "step": 5718 }, { "epoch": 1.951877133105802, "grad_norm": 2.992372751235962, "learning_rate": 0.0003493742889647327, "loss": 5.4487, "step": 5719 }, { "epoch": 1.9522184300341296, "grad_norm": 2.892254590988159, "learning_rate": 0.0003492605233219568, "loss": 6.1741, "step": 5720 }, { "epoch": 1.9525597269624573, "grad_norm": 2.97224497795105, "learning_rate": 0.0003491467576791809, "loss": 6.3184, "step": 5721 }, { "epoch": 1.9529010238907851, "grad_norm": 2.979079008102417, "learning_rate": 0.00034903299203640505, "loss": 6.1467, "step": 5722 }, { "epoch": 1.9532423208191125, "grad_norm": 3.279611825942993, "learning_rate": 0.0003489192263936291, "loss": 5.636, "step": 5723 }, { "epoch": 1.9535836177474404, "grad_norm": 2.8635432720184326, "learning_rate": 0.0003488054607508532, "loss": 6.1327, "step": 5724 }, { "epoch": 1.9539249146757678, "grad_norm": 2.876227855682373, "learning_rate": 0.00034869169510807737, "loss": 5.9195, "step": 5725 }, { "epoch": 1.9542662116040956, "grad_norm": 2.923100233078003, "learning_rate": 0.00034857792946530147, "loss": 5.9977, "step": 5726 }, { "epoch": 1.954607508532423, "grad_norm": 2.884539842605591, "learning_rate": 0.00034846416382252563, "loss": 6.6596, "step": 5727 }, { "epoch": 1.954948805460751, "grad_norm": 2.869551420211792, "learning_rate": 0.00034835039817974973, "loss": 6.7263, "step": 5728 }, { "epoch": 1.9552901023890785, "grad_norm": 3.0249316692352295, "learning_rate": 0.00034823663253697384, "loss": 5.9006, "step": 5729 }, { "epoch": 1.9556313993174061, "grad_norm": 3.008315086364746, "learning_rate": 0.000348122866894198, "loss": 6.1326, "step": 5730 }, { "epoch": 1.9559726962457338, "grad_norm": 2.924003839492798, "learning_rate": 0.0003480091012514221, "loss": 5.8715, "step": 5731 }, { "epoch": 1.9563139931740614, "grad_norm": 2.9903616905212402, "learning_rate": 0.00034789533560864615, "loss": 6.8732, "step": 5732 }, { "epoch": 1.956655290102389, "grad_norm": 3.197800636291504, "learning_rate": 0.0003477815699658703, "loss": 5.9274, "step": 5733 }, { "epoch": 1.9569965870307167, "grad_norm": 2.8411238193511963, "learning_rate": 0.0003476678043230944, "loss": 6.1661, "step": 5734 }, { "epoch": 1.9573378839590445, "grad_norm": 2.9080448150634766, "learning_rate": 0.0003475540386803185, "loss": 6.2348, "step": 5735 }, { "epoch": 1.957679180887372, "grad_norm": 2.778029441833496, "learning_rate": 0.0003474402730375427, "loss": 6.2891, "step": 5736 }, { "epoch": 1.9580204778156998, "grad_norm": 2.954080581665039, "learning_rate": 0.0003473265073947668, "loss": 6.1235, "step": 5737 }, { "epoch": 1.9583617747440272, "grad_norm": 2.9157159328460693, "learning_rate": 0.0003472127417519909, "loss": 6.2918, "step": 5738 }, { "epoch": 1.958703071672355, "grad_norm": 2.926003932952881, "learning_rate": 0.00034709897610921505, "loss": 6.6191, "step": 5739 }, { "epoch": 1.9590443686006824, "grad_norm": 3.026291847229004, "learning_rate": 0.00034698521046643916, "loss": 6.0096, "step": 5740 }, { "epoch": 1.9593856655290103, "grad_norm": 2.8232579231262207, "learning_rate": 0.00034687144482366326, "loss": 6.4494, "step": 5741 }, { "epoch": 1.959726962457338, "grad_norm": 3.837078332901001, "learning_rate": 0.00034675767918088737, "loss": 5.45, "step": 5742 }, { "epoch": 1.9600682593856655, "grad_norm": 2.8543741703033447, "learning_rate": 0.00034664391353811147, "loss": 6.1748, "step": 5743 }, { "epoch": 1.9604095563139932, "grad_norm": 3.0786311626434326, "learning_rate": 0.0003465301478953356, "loss": 5.7486, "step": 5744 }, { "epoch": 1.9607508532423208, "grad_norm": 3.161501407623291, "learning_rate": 0.00034641638225255974, "loss": 5.8587, "step": 5745 }, { "epoch": 1.9610921501706484, "grad_norm": 2.9017693996429443, "learning_rate": 0.00034630261660978384, "loss": 6.2358, "step": 5746 }, { "epoch": 1.961433447098976, "grad_norm": 2.9906439781188965, "learning_rate": 0.000346188850967008, "loss": 6.5953, "step": 5747 }, { "epoch": 1.961774744027304, "grad_norm": 2.825509786605835, "learning_rate": 0.0003460750853242321, "loss": 6.4981, "step": 5748 }, { "epoch": 1.9621160409556313, "grad_norm": 4.668010711669922, "learning_rate": 0.0003459613196814562, "loss": 4.1262, "step": 5749 }, { "epoch": 1.9624573378839592, "grad_norm": 2.996762990951538, "learning_rate": 0.00034584755403868037, "loss": 5.5406, "step": 5750 }, { "epoch": 1.9627986348122866, "grad_norm": 3.0203421115875244, "learning_rate": 0.0003457337883959045, "loss": 5.8989, "step": 5751 }, { "epoch": 1.9631399317406144, "grad_norm": 2.8792803287506104, "learning_rate": 0.0003456200227531285, "loss": 6.4116, "step": 5752 }, { "epoch": 1.9634812286689418, "grad_norm": 2.9204788208007812, "learning_rate": 0.0003455062571103527, "loss": 5.7728, "step": 5753 }, { "epoch": 1.9638225255972697, "grad_norm": 2.9870834350585938, "learning_rate": 0.0003453924914675768, "loss": 6.179, "step": 5754 }, { "epoch": 1.9641638225255973, "grad_norm": 3.1838748455047607, "learning_rate": 0.0003452787258248009, "loss": 5.5057, "step": 5755 }, { "epoch": 1.964505119453925, "grad_norm": 2.916292667388916, "learning_rate": 0.00034516496018202505, "loss": 6.7139, "step": 5756 }, { "epoch": 1.9648464163822525, "grad_norm": 2.948239326477051, "learning_rate": 0.00034505119453924916, "loss": 6.5786, "step": 5757 }, { "epoch": 1.9651877133105802, "grad_norm": 2.867830276489258, "learning_rate": 0.00034493742889647326, "loss": 6.4021, "step": 5758 }, { "epoch": 1.9655290102389078, "grad_norm": 3.0150563716888428, "learning_rate": 0.0003448236632536974, "loss": 5.8162, "step": 5759 }, { "epoch": 1.9658703071672354, "grad_norm": 3.1503114700317383, "learning_rate": 0.0003447098976109215, "loss": 5.9391, "step": 5760 }, { "epoch": 1.9662116040955633, "grad_norm": 2.8918957710266113, "learning_rate": 0.00034459613196814563, "loss": 6.6574, "step": 5761 }, { "epoch": 1.9665529010238907, "grad_norm": 3.020156145095825, "learning_rate": 0.00034448236632536974, "loss": 5.4734, "step": 5762 }, { "epoch": 1.9668941979522185, "grad_norm": 3.0351099967956543, "learning_rate": 0.00034436860068259384, "loss": 5.4351, "step": 5763 }, { "epoch": 1.967235494880546, "grad_norm": 2.8954880237579346, "learning_rate": 0.00034425483503981795, "loss": 6.2349, "step": 5764 }, { "epoch": 1.9675767918088738, "grad_norm": 2.7660810947418213, "learning_rate": 0.0003441410693970421, "loss": 6.1523, "step": 5765 }, { "epoch": 1.9679180887372012, "grad_norm": 11.300498962402344, "learning_rate": 0.0003440273037542662, "loss": 3.3175, "step": 5766 }, { "epoch": 1.968259385665529, "grad_norm": 9.648717880249023, "learning_rate": 0.00034391353811149037, "loss": 4.9295, "step": 5767 }, { "epoch": 1.9686006825938567, "grad_norm": 3.16329288482666, "learning_rate": 0.0003437997724687145, "loss": 5.9512, "step": 5768 }, { "epoch": 1.9689419795221843, "grad_norm": 3.4901375770568848, "learning_rate": 0.0003436860068259386, "loss": 5.3588, "step": 5769 }, { "epoch": 1.969283276450512, "grad_norm": 3.6523478031158447, "learning_rate": 0.00034357224118316274, "loss": 3.9288, "step": 5770 }, { "epoch": 1.9696245733788396, "grad_norm": 3.0849642753601074, "learning_rate": 0.0003434584755403868, "loss": 6.282, "step": 5771 }, { "epoch": 1.9699658703071672, "grad_norm": 3.0182032585144043, "learning_rate": 0.0003433447098976109, "loss": 6.6789, "step": 5772 }, { "epoch": 1.9703071672354948, "grad_norm": 2.9407904148101807, "learning_rate": 0.00034323094425483505, "loss": 6.3049, "step": 5773 }, { "epoch": 1.9706484641638227, "grad_norm": 2.94191837310791, "learning_rate": 0.00034311717861205916, "loss": 6.1876, "step": 5774 }, { "epoch": 1.97098976109215, "grad_norm": 2.8799357414245605, "learning_rate": 0.00034300341296928326, "loss": 6.0039, "step": 5775 }, { "epoch": 1.971331058020478, "grad_norm": 3.216027021408081, "learning_rate": 0.0003428896473265074, "loss": 5.3073, "step": 5776 }, { "epoch": 1.9716723549488053, "grad_norm": 2.9986681938171387, "learning_rate": 0.00034277588168373153, "loss": 5.4347, "step": 5777 }, { "epoch": 1.9720136518771332, "grad_norm": 2.878371477127075, "learning_rate": 0.00034266211604095563, "loss": 6.3747, "step": 5778 }, { "epoch": 1.9723549488054608, "grad_norm": 2.826388359069824, "learning_rate": 0.0003425483503981798, "loss": 6.0383, "step": 5779 }, { "epoch": 1.9726962457337884, "grad_norm": 2.887346029281616, "learning_rate": 0.0003424345847554039, "loss": 5.9541, "step": 5780 }, { "epoch": 1.973037542662116, "grad_norm": 3.0003409385681152, "learning_rate": 0.00034232081911262795, "loss": 5.4532, "step": 5781 }, { "epoch": 1.9733788395904437, "grad_norm": 2.9644722938537598, "learning_rate": 0.0003422070534698521, "loss": 6.1134, "step": 5782 }, { "epoch": 1.9737201365187713, "grad_norm": 3.0157651901245117, "learning_rate": 0.0003420932878270762, "loss": 6.7906, "step": 5783 }, { "epoch": 1.974061433447099, "grad_norm": 3.0431642532348633, "learning_rate": 0.0003419795221843003, "loss": 6.5403, "step": 5784 }, { "epoch": 1.9744027303754266, "grad_norm": 4.550543785095215, "learning_rate": 0.0003418657565415245, "loss": 5.8783, "step": 5785 }, { "epoch": 1.9747440273037542, "grad_norm": 3.095123052597046, "learning_rate": 0.0003417519908987486, "loss": 5.6745, "step": 5786 }, { "epoch": 1.975085324232082, "grad_norm": 2.822967290878296, "learning_rate": 0.0003416382252559727, "loss": 6.32, "step": 5787 }, { "epoch": 1.9754266211604095, "grad_norm": 3.229649066925049, "learning_rate": 0.00034152445961319684, "loss": 6.0626, "step": 5788 }, { "epoch": 1.9757679180887373, "grad_norm": 2.855510950088501, "learning_rate": 0.00034141069397042095, "loss": 5.828, "step": 5789 }, { "epoch": 1.9761092150170647, "grad_norm": 2.838651657104492, "learning_rate": 0.0003412969283276451, "loss": 6.1334, "step": 5790 }, { "epoch": 1.9764505119453926, "grad_norm": 2.9031753540039062, "learning_rate": 0.00034118316268486916, "loss": 6.1161, "step": 5791 }, { "epoch": 1.9767918088737202, "grad_norm": 3.6981256008148193, "learning_rate": 0.00034106939704209326, "loss": 5.6065, "step": 5792 }, { "epoch": 1.9771331058020478, "grad_norm": 2.8721656799316406, "learning_rate": 0.0003409556313993174, "loss": 6.2977, "step": 5793 }, { "epoch": 1.9774744027303754, "grad_norm": 2.9534177780151367, "learning_rate": 0.00034084186575654153, "loss": 5.5651, "step": 5794 }, { "epoch": 1.977815699658703, "grad_norm": 3.0801563262939453, "learning_rate": 0.00034072810011376563, "loss": 5.7325, "step": 5795 }, { "epoch": 1.9781569965870307, "grad_norm": 3.880596399307251, "learning_rate": 0.0003406143344709898, "loss": 4.5394, "step": 5796 }, { "epoch": 1.9784982935153583, "grad_norm": 2.8708319664001465, "learning_rate": 0.0003405005688282139, "loss": 6.3677, "step": 5797 }, { "epoch": 1.978839590443686, "grad_norm": 2.896517753601074, "learning_rate": 0.000340386803185438, "loss": 6.0279, "step": 5798 }, { "epoch": 1.9791808873720136, "grad_norm": 2.8943135738372803, "learning_rate": 0.00034027303754266216, "loss": 6.1411, "step": 5799 }, { "epoch": 1.9795221843003414, "grad_norm": 2.9464333057403564, "learning_rate": 0.00034015927189988627, "loss": 6.6108, "step": 5800 }, { "epoch": 1.9798634812286688, "grad_norm": 2.76023268699646, "learning_rate": 0.0003400455062571103, "loss": 6.2494, "step": 5801 }, { "epoch": 1.9802047781569967, "grad_norm": 2.8224380016326904, "learning_rate": 0.0003399317406143345, "loss": 6.3798, "step": 5802 }, { "epoch": 1.980546075085324, "grad_norm": 6.953861713409424, "learning_rate": 0.0003398179749715586, "loss": 6.2183, "step": 5803 }, { "epoch": 1.980887372013652, "grad_norm": 3.372380256652832, "learning_rate": 0.0003397042093287827, "loss": 5.2131, "step": 5804 }, { "epoch": 1.9812286689419796, "grad_norm": 2.964792490005493, "learning_rate": 0.00033959044368600685, "loss": 6.8906, "step": 5805 }, { "epoch": 1.9815699658703072, "grad_norm": 9.055648803710938, "learning_rate": 0.00033947667804323095, "loss": 7.0398, "step": 5806 }, { "epoch": 1.9819112627986348, "grad_norm": 3.136854648590088, "learning_rate": 0.00033936291240045506, "loss": 5.7341, "step": 5807 }, { "epoch": 1.9822525597269625, "grad_norm": 3.117483139038086, "learning_rate": 0.0003392491467576792, "loss": 6.1711, "step": 5808 }, { "epoch": 1.98259385665529, "grad_norm": 3.032315492630005, "learning_rate": 0.0003391353811149033, "loss": 5.4116, "step": 5809 }, { "epoch": 1.9829351535836177, "grad_norm": 2.858267068862915, "learning_rate": 0.0003390216154721274, "loss": 6.2049, "step": 5810 }, { "epoch": 1.9832764505119453, "grad_norm": 2.9900565147399902, "learning_rate": 0.00033890784982935153, "loss": 5.5319, "step": 5811 }, { "epoch": 1.983617747440273, "grad_norm": 4.313586711883545, "learning_rate": 0.00033879408418657563, "loss": 4.6686, "step": 5812 }, { "epoch": 1.9839590443686008, "grad_norm": 2.9994773864746094, "learning_rate": 0.0003386803185437998, "loss": 5.9989, "step": 5813 }, { "epoch": 1.9843003412969282, "grad_norm": 2.9691319465637207, "learning_rate": 0.0003385665529010239, "loss": 6.2986, "step": 5814 }, { "epoch": 1.984641638225256, "grad_norm": 2.8935835361480713, "learning_rate": 0.000338452787258248, "loss": 5.6747, "step": 5815 }, { "epoch": 1.9849829351535835, "grad_norm": 3.180413007736206, "learning_rate": 0.00033833902161547216, "loss": 5.2211, "step": 5816 }, { "epoch": 1.9853242320819113, "grad_norm": 3.1071324348449707, "learning_rate": 0.00033822525597269627, "loss": 6.0189, "step": 5817 }, { "epoch": 1.985665529010239, "grad_norm": 3.498764753341675, "learning_rate": 0.00033811149032992037, "loss": 5.6862, "step": 5818 }, { "epoch": 1.9860068259385666, "grad_norm": 2.8438425064086914, "learning_rate": 0.00033799772468714453, "loss": 6.1487, "step": 5819 }, { "epoch": 1.9863481228668942, "grad_norm": 2.9216058254241943, "learning_rate": 0.0003378839590443686, "loss": 6.1612, "step": 5820 }, { "epoch": 1.9866894197952218, "grad_norm": 2.917191743850708, "learning_rate": 0.0003377701934015927, "loss": 6.3926, "step": 5821 }, { "epoch": 1.9870307167235495, "grad_norm": 2.862936496734619, "learning_rate": 0.00033765642775881685, "loss": 6.1182, "step": 5822 }, { "epoch": 1.987372013651877, "grad_norm": 3.223823308944702, "learning_rate": 0.00033754266211604095, "loss": 5.9686, "step": 5823 }, { "epoch": 1.9877133105802047, "grad_norm": 3.052494764328003, "learning_rate": 0.00033742889647326506, "loss": 5.3687, "step": 5824 }, { "epoch": 1.9880546075085324, "grad_norm": 3.0097758769989014, "learning_rate": 0.0003373151308304892, "loss": 6.1109, "step": 5825 }, { "epoch": 1.9883959044368602, "grad_norm": 2.950054168701172, "learning_rate": 0.0003372013651877133, "loss": 6.4818, "step": 5826 }, { "epoch": 1.9887372013651876, "grad_norm": 2.8829801082611084, "learning_rate": 0.0003370875995449374, "loss": 6.2982, "step": 5827 }, { "epoch": 1.9890784982935155, "grad_norm": 2.908803701400757, "learning_rate": 0.0003369738339021616, "loss": 6.0992, "step": 5828 }, { "epoch": 1.9894197952218429, "grad_norm": 3.0301074981689453, "learning_rate": 0.0003368600682593857, "loss": 6.7445, "step": 5829 }, { "epoch": 1.9897610921501707, "grad_norm": 2.9564156532287598, "learning_rate": 0.0003367463026166098, "loss": 6.5264, "step": 5830 }, { "epoch": 1.9901023890784983, "grad_norm": 2.951019763946533, "learning_rate": 0.0003366325369738339, "loss": 5.8757, "step": 5831 }, { "epoch": 1.990443686006826, "grad_norm": 4.174768924713135, "learning_rate": 0.000336518771331058, "loss": 4.409, "step": 5832 }, { "epoch": 1.9907849829351536, "grad_norm": 6.287966251373291, "learning_rate": 0.00033640500568828216, "loss": 5.3929, "step": 5833 }, { "epoch": 1.9911262798634812, "grad_norm": 2.996116876602173, "learning_rate": 0.00033629124004550627, "loss": 6.4399, "step": 5834 }, { "epoch": 1.9914675767918089, "grad_norm": 3.3284964561462402, "learning_rate": 0.0003361774744027304, "loss": 5.0079, "step": 5835 }, { "epoch": 1.9918088737201365, "grad_norm": 2.8886935710906982, "learning_rate": 0.00033606370875995453, "loss": 6.3832, "step": 5836 }, { "epoch": 1.9921501706484641, "grad_norm": 2.967557907104492, "learning_rate": 0.00033594994311717864, "loss": 6.3409, "step": 5837 }, { "epoch": 1.9924914675767917, "grad_norm": 2.8980183601379395, "learning_rate": 0.00033583617747440274, "loss": 6.3379, "step": 5838 }, { "epoch": 1.9928327645051196, "grad_norm": 3.0234856605529785, "learning_rate": 0.00033572241183162685, "loss": 6.155, "step": 5839 }, { "epoch": 1.993174061433447, "grad_norm": 2.7687742710113525, "learning_rate": 0.00033560864618885095, "loss": 6.3193, "step": 5840 }, { "epoch": 1.9935153583617748, "grad_norm": 2.851742744445801, "learning_rate": 0.00033549488054607506, "loss": 6.1613, "step": 5841 }, { "epoch": 1.9938566552901023, "grad_norm": 2.8196914196014404, "learning_rate": 0.0003353811149032992, "loss": 6.0988, "step": 5842 }, { "epoch": 1.99419795221843, "grad_norm": 2.847468614578247, "learning_rate": 0.0003352673492605233, "loss": 6.8367, "step": 5843 }, { "epoch": 1.9945392491467577, "grad_norm": 2.88217830657959, "learning_rate": 0.0003351535836177474, "loss": 6.3181, "step": 5844 }, { "epoch": 1.9948805460750854, "grad_norm": 2.8297886848449707, "learning_rate": 0.0003350398179749716, "loss": 5.9361, "step": 5845 }, { "epoch": 1.995221843003413, "grad_norm": 2.9879589080810547, "learning_rate": 0.0003349260523321957, "loss": 6.2692, "step": 5846 }, { "epoch": 1.9955631399317406, "grad_norm": 2.923250198364258, "learning_rate": 0.0003348122866894198, "loss": 6.4919, "step": 5847 }, { "epoch": 1.9959044368600682, "grad_norm": 2.852890968322754, "learning_rate": 0.00033469852104664395, "loss": 5.8794, "step": 5848 }, { "epoch": 1.9962457337883959, "grad_norm": 4.5131001472473145, "learning_rate": 0.000334584755403868, "loss": 4.9364, "step": 5849 }, { "epoch": 1.9965870307167235, "grad_norm": 4.635778903961182, "learning_rate": 0.0003344709897610921, "loss": 5.363, "step": 5850 }, { "epoch": 1.9969283276450511, "grad_norm": 2.963787317276001, "learning_rate": 0.00033435722411831627, "loss": 6.3649, "step": 5851 }, { "epoch": 1.997269624573379, "grad_norm": 2.9836738109588623, "learning_rate": 0.0003342434584755404, "loss": 6.69, "step": 5852 }, { "epoch": 1.9976109215017064, "grad_norm": 2.863739252090454, "learning_rate": 0.00033412969283276453, "loss": 6.4131, "step": 5853 }, { "epoch": 1.9979522184300342, "grad_norm": 2.899862289428711, "learning_rate": 0.00033401592718998864, "loss": 6.0362, "step": 5854 }, { "epoch": 1.9982935153583616, "grad_norm": 2.9389188289642334, "learning_rate": 0.00033390216154721274, "loss": 6.5771, "step": 5855 }, { "epoch": 1.9986348122866895, "grad_norm": 5.9408183097839355, "learning_rate": 0.0003337883959044369, "loss": 5.5061, "step": 5856 }, { "epoch": 1.9989761092150171, "grad_norm": 2.952606201171875, "learning_rate": 0.000333674630261661, "loss": 6.0148, "step": 5857 }, { "epoch": 1.9993174061433447, "grad_norm": 2.8606393337249756, "learning_rate": 0.0003335608646188851, "loss": 6.0906, "step": 5858 }, { "epoch": 1.9996587030716724, "grad_norm": 2.852064371109009, "learning_rate": 0.0003334470989761092, "loss": 6.4203, "step": 5859 }, { "epoch": 2.0, "grad_norm": 2.8471901416778564, "learning_rate": 0.0003333333333333333, "loss": 6.2409, "step": 5860 }, { "epoch": 2.000341296928328, "grad_norm": 2.941194772720337, "learning_rate": 0.00033321956769055743, "loss": 6.3285, "step": 5861 }, { "epoch": 2.0006825938566553, "grad_norm": 3.083709955215454, "learning_rate": 0.0003331058020477816, "loss": 6.2642, "step": 5862 }, { "epoch": 2.001023890784983, "grad_norm": 3.8229711055755615, "learning_rate": 0.0003329920364050057, "loss": 3.7616, "step": 5863 }, { "epoch": 2.0013651877133105, "grad_norm": 2.923698902130127, "learning_rate": 0.0003328782707622298, "loss": 5.66, "step": 5864 }, { "epoch": 2.0017064846416384, "grad_norm": 3.1256601810455322, "learning_rate": 0.00033276450511945396, "loss": 5.7579, "step": 5865 }, { "epoch": 2.0020477815699658, "grad_norm": 3.4216883182525635, "learning_rate": 0.00033265073947667806, "loss": 5.9409, "step": 5866 }, { "epoch": 2.0023890784982936, "grad_norm": 3.03593373298645, "learning_rate": 0.00033253697383390217, "loss": 5.8932, "step": 5867 }, { "epoch": 2.002730375426621, "grad_norm": 2.919698715209961, "learning_rate": 0.0003324232081911263, "loss": 5.8919, "step": 5868 }, { "epoch": 2.003071672354949, "grad_norm": 2.8888025283813477, "learning_rate": 0.0003323094425483504, "loss": 5.7327, "step": 5869 }, { "epoch": 2.0034129692832763, "grad_norm": 2.8589115142822266, "learning_rate": 0.0003321956769055745, "loss": 5.6514, "step": 5870 }, { "epoch": 2.003754266211604, "grad_norm": 2.8871238231658936, "learning_rate": 0.00033208191126279864, "loss": 5.9583, "step": 5871 }, { "epoch": 2.0040955631399315, "grad_norm": 2.953996181488037, "learning_rate": 0.00033196814562002274, "loss": 5.6646, "step": 5872 }, { "epoch": 2.0044368600682594, "grad_norm": 2.8244423866271973, "learning_rate": 0.0003318543799772469, "loss": 6.174, "step": 5873 }, { "epoch": 2.0047781569965872, "grad_norm": 2.8501133918762207, "learning_rate": 0.000331740614334471, "loss": 5.4038, "step": 5874 }, { "epoch": 2.0051194539249146, "grad_norm": 2.8104753494262695, "learning_rate": 0.0003316268486916951, "loss": 6.5321, "step": 5875 }, { "epoch": 2.0054607508532425, "grad_norm": 2.9065990447998047, "learning_rate": 0.00033151308304891927, "loss": 6.6124, "step": 5876 }, { "epoch": 2.00580204778157, "grad_norm": 2.9225258827209473, "learning_rate": 0.0003313993174061434, "loss": 6.2582, "step": 5877 }, { "epoch": 2.0061433447098977, "grad_norm": 2.842495918273926, "learning_rate": 0.00033128555176336743, "loss": 6.2283, "step": 5878 }, { "epoch": 2.006484641638225, "grad_norm": 2.993636131286621, "learning_rate": 0.0003311717861205916, "loss": 6.1056, "step": 5879 }, { "epoch": 2.006825938566553, "grad_norm": 2.954638957977295, "learning_rate": 0.0003310580204778157, "loss": 6.1397, "step": 5880 }, { "epoch": 2.0071672354948804, "grad_norm": 2.83888840675354, "learning_rate": 0.0003309442548350398, "loss": 6.3731, "step": 5881 }, { "epoch": 2.0075085324232083, "grad_norm": 2.950007438659668, "learning_rate": 0.00033083048919226396, "loss": 5.9267, "step": 5882 }, { "epoch": 2.0078498293515357, "grad_norm": 2.8402795791625977, "learning_rate": 0.00033071672354948806, "loss": 6.5702, "step": 5883 }, { "epoch": 2.0081911262798635, "grad_norm": 2.827852964401245, "learning_rate": 0.00033060295790671217, "loss": 6.7476, "step": 5884 }, { "epoch": 2.008532423208191, "grad_norm": 2.843194007873535, "learning_rate": 0.0003304891922639363, "loss": 5.988, "step": 5885 }, { "epoch": 2.0088737201365188, "grad_norm": 2.8419055938720703, "learning_rate": 0.00033037542662116043, "loss": 5.5696, "step": 5886 }, { "epoch": 2.0092150170648466, "grad_norm": 2.968064308166504, "learning_rate": 0.00033026166097838454, "loss": 6.4374, "step": 5887 }, { "epoch": 2.009556313993174, "grad_norm": 3.4870381355285645, "learning_rate": 0.00033014789533560864, "loss": 4.7893, "step": 5888 }, { "epoch": 2.009897610921502, "grad_norm": 6.496885299682617, "learning_rate": 0.00033003412969283275, "loss": 4.7551, "step": 5889 }, { "epoch": 2.0102389078498293, "grad_norm": 2.9047958850860596, "learning_rate": 0.00032992036405005685, "loss": 5.9317, "step": 5890 }, { "epoch": 2.010580204778157, "grad_norm": 3.021362781524658, "learning_rate": 0.000329806598407281, "loss": 5.6877, "step": 5891 }, { "epoch": 2.0109215017064845, "grad_norm": 3.03979754447937, "learning_rate": 0.0003296928327645051, "loss": 5.5397, "step": 5892 }, { "epoch": 2.0112627986348124, "grad_norm": 3.0143215656280518, "learning_rate": 0.0003295790671217293, "loss": 6.1574, "step": 5893 }, { "epoch": 2.01160409556314, "grad_norm": 2.9029927253723145, "learning_rate": 0.0003294653014789534, "loss": 6.3871, "step": 5894 }, { "epoch": 2.0119453924914676, "grad_norm": 2.924941062927246, "learning_rate": 0.0003293515358361775, "loss": 5.6172, "step": 5895 }, { "epoch": 2.012286689419795, "grad_norm": 2.9214353561401367, "learning_rate": 0.00032923777019340164, "loss": 6.5576, "step": 5896 }, { "epoch": 2.012627986348123, "grad_norm": 2.9179751873016357, "learning_rate": 0.00032912400455062575, "loss": 6.3363, "step": 5897 }, { "epoch": 2.0129692832764503, "grad_norm": 2.8579938411712646, "learning_rate": 0.0003290102389078498, "loss": 6.3428, "step": 5898 }, { "epoch": 2.013310580204778, "grad_norm": 3.0229973793029785, "learning_rate": 0.00032889647326507396, "loss": 6.2619, "step": 5899 }, { "epoch": 2.013651877133106, "grad_norm": 4.09330940246582, "learning_rate": 0.00032878270762229806, "loss": 4.4237, "step": 5900 }, { "epoch": 2.0139931740614334, "grad_norm": 3.2335963249206543, "learning_rate": 0.00032866894197952217, "loss": 5.7218, "step": 5901 }, { "epoch": 2.0143344709897613, "grad_norm": 3.3706767559051514, "learning_rate": 0.0003285551763367463, "loss": 5.6349, "step": 5902 }, { "epoch": 2.0146757679180887, "grad_norm": 3.0019240379333496, "learning_rate": 0.00032844141069397043, "loss": 5.9354, "step": 5903 }, { "epoch": 2.0150170648464165, "grad_norm": 2.8847649097442627, "learning_rate": 0.00032832764505119454, "loss": 6.571, "step": 5904 }, { "epoch": 2.015358361774744, "grad_norm": 2.9488613605499268, "learning_rate": 0.0003282138794084187, "loss": 6.7779, "step": 5905 }, { "epoch": 2.0156996587030718, "grad_norm": 2.824702739715576, "learning_rate": 0.0003281001137656428, "loss": 6.7856, "step": 5906 }, { "epoch": 2.016040955631399, "grad_norm": 2.872587203979492, "learning_rate": 0.00032798634812286685, "loss": 6.1544, "step": 5907 }, { "epoch": 2.016382252559727, "grad_norm": 3.073383092880249, "learning_rate": 0.000327872582480091, "loss": 6.0585, "step": 5908 }, { "epoch": 2.0167235494880544, "grad_norm": 3.2500085830688477, "learning_rate": 0.0003277588168373151, "loss": 4.9903, "step": 5909 }, { "epoch": 2.0170648464163823, "grad_norm": 3.061755895614624, "learning_rate": 0.0003276450511945392, "loss": 5.9263, "step": 5910 }, { "epoch": 2.0174061433447097, "grad_norm": 3.07401442527771, "learning_rate": 0.0003275312855517634, "loss": 6.5287, "step": 5911 }, { "epoch": 2.0177474402730375, "grad_norm": 2.897965908050537, "learning_rate": 0.0003274175199089875, "loss": 6.4134, "step": 5912 }, { "epoch": 2.0180887372013654, "grad_norm": 3.0315048694610596, "learning_rate": 0.00032730375426621164, "loss": 5.2327, "step": 5913 }, { "epoch": 2.018430034129693, "grad_norm": 2.9312515258789062, "learning_rate": 0.00032718998862343575, "loss": 5.4876, "step": 5914 }, { "epoch": 2.0187713310580206, "grad_norm": 2.948835849761963, "learning_rate": 0.00032707622298065985, "loss": 6.1955, "step": 5915 }, { "epoch": 2.019112627986348, "grad_norm": 2.9583792686462402, "learning_rate": 0.000326962457337884, "loss": 4.9185, "step": 5916 }, { "epoch": 2.019453924914676, "grad_norm": 2.9608967304229736, "learning_rate": 0.00032684869169510806, "loss": 6.5537, "step": 5917 }, { "epoch": 2.0197952218430033, "grad_norm": 2.8537795543670654, "learning_rate": 0.00032673492605233217, "loss": 6.518, "step": 5918 }, { "epoch": 2.020136518771331, "grad_norm": 3.467559576034546, "learning_rate": 0.0003266211604095563, "loss": 5.4909, "step": 5919 }, { "epoch": 2.0204778156996586, "grad_norm": 2.9610671997070312, "learning_rate": 0.00032650739476678043, "loss": 5.8352, "step": 5920 }, { "epoch": 2.0208191126279864, "grad_norm": 2.9346728324890137, "learning_rate": 0.00032639362912400454, "loss": 5.7961, "step": 5921 }, { "epoch": 2.021160409556314, "grad_norm": 3.0085277557373047, "learning_rate": 0.0003262798634812287, "loss": 5.5517, "step": 5922 }, { "epoch": 2.0215017064846417, "grad_norm": 2.949371337890625, "learning_rate": 0.0003261660978384528, "loss": 6.364, "step": 5923 }, { "epoch": 2.021843003412969, "grad_norm": 4.0263752937316895, "learning_rate": 0.0003260523321956769, "loss": 3.3965, "step": 5924 }, { "epoch": 2.022184300341297, "grad_norm": 3.15136456489563, "learning_rate": 0.00032593856655290107, "loss": 5.744, "step": 5925 }, { "epoch": 2.0225255972696248, "grad_norm": 2.9317257404327393, "learning_rate": 0.00032582480091012517, "loss": 6.2774, "step": 5926 }, { "epoch": 2.022866894197952, "grad_norm": 2.901693344116211, "learning_rate": 0.0003257110352673492, "loss": 5.9736, "step": 5927 }, { "epoch": 2.02320819112628, "grad_norm": 2.804474353790283, "learning_rate": 0.0003255972696245734, "loss": 6.2335, "step": 5928 }, { "epoch": 2.0235494880546074, "grad_norm": 2.836078405380249, "learning_rate": 0.0003254835039817975, "loss": 6.5145, "step": 5929 }, { "epoch": 2.0238907849829353, "grad_norm": 2.8914759159088135, "learning_rate": 0.0003253697383390216, "loss": 5.9104, "step": 5930 }, { "epoch": 2.0242320819112627, "grad_norm": 3.001803398132324, "learning_rate": 0.00032525597269624575, "loss": 5.733, "step": 5931 }, { "epoch": 2.0245733788395905, "grad_norm": 3.05202054977417, "learning_rate": 0.00032514220705346985, "loss": 5.7444, "step": 5932 }, { "epoch": 2.024914675767918, "grad_norm": 2.8296566009521484, "learning_rate": 0.000325028441410694, "loss": 6.3294, "step": 5933 }, { "epoch": 2.025255972696246, "grad_norm": 2.734096050262451, "learning_rate": 0.0003249146757679181, "loss": 6.2183, "step": 5934 }, { "epoch": 2.025597269624573, "grad_norm": 2.8434300422668457, "learning_rate": 0.0003248009101251422, "loss": 5.6148, "step": 5935 }, { "epoch": 2.025938566552901, "grad_norm": 3.7026021480560303, "learning_rate": 0.0003246871444823664, "loss": 5.4888, "step": 5936 }, { "epoch": 2.0262798634812285, "grad_norm": 2.7599542140960693, "learning_rate": 0.00032457337883959043, "loss": 5.6935, "step": 5937 }, { "epoch": 2.0266211604095563, "grad_norm": 4.715770244598389, "learning_rate": 0.00032445961319681454, "loss": 5.3177, "step": 5938 }, { "epoch": 2.026962457337884, "grad_norm": 2.897782802581787, "learning_rate": 0.0003243458475540387, "loss": 5.7596, "step": 5939 }, { "epoch": 2.0273037542662116, "grad_norm": 2.934124708175659, "learning_rate": 0.0003242320819112628, "loss": 6.1473, "step": 5940 }, { "epoch": 2.0276450511945394, "grad_norm": 2.9181840419769287, "learning_rate": 0.0003241183162684869, "loss": 5.8151, "step": 5941 }, { "epoch": 2.027986348122867, "grad_norm": 2.937746286392212, "learning_rate": 0.00032400455062571107, "loss": 5.9434, "step": 5942 }, { "epoch": 2.0283276450511947, "grad_norm": 3.7255730628967285, "learning_rate": 0.00032389078498293517, "loss": 5.0394, "step": 5943 }, { "epoch": 2.028668941979522, "grad_norm": 2.898202896118164, "learning_rate": 0.0003237770193401593, "loss": 5.7591, "step": 5944 }, { "epoch": 2.02901023890785, "grad_norm": 3.671320915222168, "learning_rate": 0.00032366325369738344, "loss": 6.2918, "step": 5945 }, { "epoch": 2.0293515358361773, "grad_norm": 2.9321327209472656, "learning_rate": 0.0003235494880546075, "loss": 6.0174, "step": 5946 }, { "epoch": 2.029692832764505, "grad_norm": 2.786679744720459, "learning_rate": 0.0003234357224118316, "loss": 6.4303, "step": 5947 }, { "epoch": 2.0300341296928326, "grad_norm": 2.862600088119507, "learning_rate": 0.00032332195676905575, "loss": 6.0229, "step": 5948 }, { "epoch": 2.0303754266211604, "grad_norm": 2.9102251529693604, "learning_rate": 0.00032320819112627985, "loss": 5.6793, "step": 5949 }, { "epoch": 2.030716723549488, "grad_norm": 2.8339219093322754, "learning_rate": 0.00032309442548350396, "loss": 6.1311, "step": 5950 }, { "epoch": 2.0310580204778157, "grad_norm": 2.7801735401153564, "learning_rate": 0.0003229806598407281, "loss": 6.1714, "step": 5951 }, { "epoch": 2.0313993174061435, "grad_norm": 2.792600631713867, "learning_rate": 0.0003228668941979522, "loss": 6.4515, "step": 5952 }, { "epoch": 2.031740614334471, "grad_norm": 2.876671075820923, "learning_rate": 0.0003227531285551764, "loss": 6.3113, "step": 5953 }, { "epoch": 2.032081911262799, "grad_norm": 2.9454519748687744, "learning_rate": 0.0003226393629124005, "loss": 6.5142, "step": 5954 }, { "epoch": 2.032423208191126, "grad_norm": 2.833214044570923, "learning_rate": 0.0003225255972696246, "loss": 6.0907, "step": 5955 }, { "epoch": 2.032764505119454, "grad_norm": 4.851685047149658, "learning_rate": 0.0003224118316268487, "loss": 5.22, "step": 5956 }, { "epoch": 2.0331058020477815, "grad_norm": 3.4849393367767334, "learning_rate": 0.0003222980659840728, "loss": 5.0735, "step": 5957 }, { "epoch": 2.0334470989761093, "grad_norm": 3.021411180496216, "learning_rate": 0.0003221843003412969, "loss": 6.2273, "step": 5958 }, { "epoch": 2.0337883959044367, "grad_norm": 2.9004580974578857, "learning_rate": 0.00032207053469852107, "loss": 6.6881, "step": 5959 }, { "epoch": 2.0341296928327646, "grad_norm": 2.9186768531799316, "learning_rate": 0.00032195676905574517, "loss": 5.6578, "step": 5960 }, { "epoch": 2.034470989761092, "grad_norm": 2.9466514587402344, "learning_rate": 0.0003218430034129693, "loss": 6.1247, "step": 5961 }, { "epoch": 2.03481228668942, "grad_norm": 2.9205827713012695, "learning_rate": 0.00032172923777019344, "loss": 5.8445, "step": 5962 }, { "epoch": 2.0351535836177472, "grad_norm": 2.9399664402008057, "learning_rate": 0.00032161547212741754, "loss": 6.3061, "step": 5963 }, { "epoch": 2.035494880546075, "grad_norm": 2.7923221588134766, "learning_rate": 0.00032150170648464165, "loss": 6.9318, "step": 5964 }, { "epoch": 2.035836177474403, "grad_norm": 2.841721773147583, "learning_rate": 0.0003213879408418658, "loss": 6.2328, "step": 5965 }, { "epoch": 2.0361774744027303, "grad_norm": 2.805899143218994, "learning_rate": 0.00032127417519908986, "loss": 5.0354, "step": 5966 }, { "epoch": 2.036518771331058, "grad_norm": 2.8029048442840576, "learning_rate": 0.00032116040955631396, "loss": 6.3708, "step": 5967 }, { "epoch": 2.0368600682593856, "grad_norm": 2.7926340103149414, "learning_rate": 0.0003210466439135381, "loss": 5.9584, "step": 5968 }, { "epoch": 2.0372013651877134, "grad_norm": 2.774442672729492, "learning_rate": 0.0003209328782707622, "loss": 6.4848, "step": 5969 }, { "epoch": 2.037542662116041, "grad_norm": 4.830069065093994, "learning_rate": 0.00032081911262798633, "loss": 5.8823, "step": 5970 }, { "epoch": 2.0378839590443687, "grad_norm": 2.8296518325805664, "learning_rate": 0.0003207053469852105, "loss": 5.8528, "step": 5971 }, { "epoch": 2.038225255972696, "grad_norm": 2.9755330085754395, "learning_rate": 0.0003205915813424346, "loss": 6.5889, "step": 5972 }, { "epoch": 2.038566552901024, "grad_norm": 3.333840847015381, "learning_rate": 0.0003204778156996587, "loss": 5.3275, "step": 5973 }, { "epoch": 2.0389078498293514, "grad_norm": 2.9385743141174316, "learning_rate": 0.00032036405005688286, "loss": 6.0905, "step": 5974 }, { "epoch": 2.039249146757679, "grad_norm": 4.615128040313721, "learning_rate": 0.00032025028441410696, "loss": 5.379, "step": 5975 }, { "epoch": 2.0395904436860066, "grad_norm": 2.938904047012329, "learning_rate": 0.00032013651877133107, "loss": 6.5808, "step": 5976 }, { "epoch": 2.0399317406143345, "grad_norm": 2.9147865772247314, "learning_rate": 0.00032002275312855517, "loss": 6.7389, "step": 5977 }, { "epoch": 2.0402730375426623, "grad_norm": 2.8320443630218506, "learning_rate": 0.0003199089874857793, "loss": 5.7683, "step": 5978 }, { "epoch": 2.0406143344709897, "grad_norm": 3.8267099857330322, "learning_rate": 0.00031979522184300344, "loss": 4.3207, "step": 5979 }, { "epoch": 2.0409556313993176, "grad_norm": 3.4486052989959717, "learning_rate": 0.00031968145620022754, "loss": 6.0473, "step": 5980 }, { "epoch": 2.041296928327645, "grad_norm": 3.4830849170684814, "learning_rate": 0.00031956769055745165, "loss": 5.3791, "step": 5981 }, { "epoch": 2.041638225255973, "grad_norm": 2.9314374923706055, "learning_rate": 0.0003194539249146758, "loss": 6.1228, "step": 5982 }, { "epoch": 2.0419795221843002, "grad_norm": 3.42529034614563, "learning_rate": 0.0003193401592718999, "loss": 5.5453, "step": 5983 }, { "epoch": 2.042320819112628, "grad_norm": 2.903790235519409, "learning_rate": 0.000319226393629124, "loss": 5.9758, "step": 5984 }, { "epoch": 2.0426621160409555, "grad_norm": 3.009942054748535, "learning_rate": 0.0003191126279863481, "loss": 6.5988, "step": 5985 }, { "epoch": 2.0430034129692833, "grad_norm": 2.9778008460998535, "learning_rate": 0.0003189988623435722, "loss": 6.0532, "step": 5986 }, { "epoch": 2.0433447098976107, "grad_norm": 2.8316783905029297, "learning_rate": 0.00031888509670079633, "loss": 6.3487, "step": 5987 }, { "epoch": 2.0436860068259386, "grad_norm": 2.837663173675537, "learning_rate": 0.0003187713310580205, "loss": 6.4316, "step": 5988 }, { "epoch": 2.044027303754266, "grad_norm": 2.863551616668701, "learning_rate": 0.0003186575654152446, "loss": 5.7195, "step": 5989 }, { "epoch": 2.044368600682594, "grad_norm": 2.814051389694214, "learning_rate": 0.0003185437997724687, "loss": 6.0491, "step": 5990 }, { "epoch": 2.0447098976109217, "grad_norm": 2.8193931579589844, "learning_rate": 0.00031843003412969286, "loss": 6.4645, "step": 5991 }, { "epoch": 2.045051194539249, "grad_norm": 2.9294307231903076, "learning_rate": 0.00031831626848691696, "loss": 5.8573, "step": 5992 }, { "epoch": 2.045392491467577, "grad_norm": 2.906787395477295, "learning_rate": 0.00031820250284414107, "loss": 6.3565, "step": 5993 }, { "epoch": 2.0457337883959044, "grad_norm": 3.021745443344116, "learning_rate": 0.00031808873720136523, "loss": 5.4691, "step": 5994 }, { "epoch": 2.046075085324232, "grad_norm": 2.877673387527466, "learning_rate": 0.0003179749715585893, "loss": 5.5119, "step": 5995 }, { "epoch": 2.0464163822525596, "grad_norm": 3.2106802463531494, "learning_rate": 0.00031786120591581344, "loss": 5.6146, "step": 5996 }, { "epoch": 2.0467576791808875, "grad_norm": 2.90977144241333, "learning_rate": 0.00031774744027303754, "loss": 6.2368, "step": 5997 }, { "epoch": 2.047098976109215, "grad_norm": 2.888395309448242, "learning_rate": 0.00031763367463026165, "loss": 6.3792, "step": 5998 }, { "epoch": 2.0474402730375427, "grad_norm": 2.8118643760681152, "learning_rate": 0.0003175199089874858, "loss": 6.0576, "step": 5999 }, { "epoch": 2.04778156996587, "grad_norm": 2.768461227416992, "learning_rate": 0.0003174061433447099, "loss": 6.3125, "step": 6000 }, { "epoch": 2.048122866894198, "grad_norm": 2.7696850299835205, "learning_rate": 0.000317292377701934, "loss": 6.3775, "step": 6001 }, { "epoch": 2.0484641638225254, "grad_norm": 2.7902398109436035, "learning_rate": 0.0003171786120591582, "loss": 6.0764, "step": 6002 }, { "epoch": 2.0488054607508532, "grad_norm": 2.9154062271118164, "learning_rate": 0.0003170648464163823, "loss": 6.4892, "step": 6003 }, { "epoch": 2.049146757679181, "grad_norm": 2.908606767654419, "learning_rate": 0.0003169510807736064, "loss": 6.5462, "step": 6004 }, { "epoch": 2.0494880546075085, "grad_norm": 2.8876357078552246, "learning_rate": 0.0003168373151308305, "loss": 6.7508, "step": 6005 }, { "epoch": 2.0498293515358363, "grad_norm": 2.836838960647583, "learning_rate": 0.0003167235494880546, "loss": 6.5657, "step": 6006 }, { "epoch": 2.0501706484641637, "grad_norm": 2.8034183979034424, "learning_rate": 0.0003166097838452787, "loss": 6.3752, "step": 6007 }, { "epoch": 2.0505119453924916, "grad_norm": 2.8830206394195557, "learning_rate": 0.00031649601820250286, "loss": 6.294, "step": 6008 }, { "epoch": 2.050853242320819, "grad_norm": 2.811297655105591, "learning_rate": 0.00031638225255972696, "loss": 6.1428, "step": 6009 }, { "epoch": 2.051194539249147, "grad_norm": 2.8806638717651367, "learning_rate": 0.00031626848691695107, "loss": 6.192, "step": 6010 }, { "epoch": 2.0515358361774743, "grad_norm": 2.9297285079956055, "learning_rate": 0.00031615472127417523, "loss": 6.2655, "step": 6011 }, { "epoch": 2.051877133105802, "grad_norm": 3.0114855766296387, "learning_rate": 0.00031604095563139933, "loss": 5.8824, "step": 6012 }, { "epoch": 2.0522184300341295, "grad_norm": 3.1099727153778076, "learning_rate": 0.00031592718998862344, "loss": 5.416, "step": 6013 }, { "epoch": 2.0525597269624574, "grad_norm": 2.8879382610321045, "learning_rate": 0.00031581342434584754, "loss": 5.822, "step": 6014 }, { "epoch": 2.0529010238907848, "grad_norm": 2.9049463272094727, "learning_rate": 0.00031569965870307165, "loss": 5.4447, "step": 6015 }, { "epoch": 2.0532423208191126, "grad_norm": 2.8603172302246094, "learning_rate": 0.0003155858930602958, "loss": 6.007, "step": 6016 }, { "epoch": 2.0535836177474405, "grad_norm": 3.1179702281951904, "learning_rate": 0.0003154721274175199, "loss": 6.4848, "step": 6017 }, { "epoch": 2.053924914675768, "grad_norm": 3.008000612258911, "learning_rate": 0.000315358361774744, "loss": 6.0294, "step": 6018 }, { "epoch": 2.0542662116040957, "grad_norm": 2.8747920989990234, "learning_rate": 0.0003152445961319682, "loss": 6.0033, "step": 6019 }, { "epoch": 2.054607508532423, "grad_norm": 2.8617520332336426, "learning_rate": 0.0003151308304891923, "loss": 6.4272, "step": 6020 }, { "epoch": 2.054948805460751, "grad_norm": 2.861337900161743, "learning_rate": 0.0003150170648464164, "loss": 6.1443, "step": 6021 }, { "epoch": 2.0552901023890784, "grad_norm": 2.852907180786133, "learning_rate": 0.00031490329920364055, "loss": 5.7435, "step": 6022 }, { "epoch": 2.0556313993174062, "grad_norm": 3.9914369583129883, "learning_rate": 0.00031478953356086465, "loss": 5.5631, "step": 6023 }, { "epoch": 2.0559726962457336, "grad_norm": 3.1209657192230225, "learning_rate": 0.0003146757679180887, "loss": 5.8574, "step": 6024 }, { "epoch": 2.0563139931740615, "grad_norm": 2.9207725524902344, "learning_rate": 0.00031456200227531286, "loss": 6.2952, "step": 6025 }, { "epoch": 2.056655290102389, "grad_norm": 3.0054728984832764, "learning_rate": 0.00031444823663253697, "loss": 5.3445, "step": 6026 }, { "epoch": 2.0569965870307167, "grad_norm": 3.291370391845703, "learning_rate": 0.00031433447098976107, "loss": 4.9664, "step": 6027 }, { "epoch": 2.057337883959044, "grad_norm": 2.8859901428222656, "learning_rate": 0.00031422070534698523, "loss": 5.9449, "step": 6028 }, { "epoch": 2.057679180887372, "grad_norm": 3.0395874977111816, "learning_rate": 0.00031410693970420933, "loss": 6.0376, "step": 6029 }, { "epoch": 2.0580204778157, "grad_norm": 2.9008989334106445, "learning_rate": 0.00031399317406143344, "loss": 6.4105, "step": 6030 }, { "epoch": 2.0583617747440273, "grad_norm": 2.9411637783050537, "learning_rate": 0.0003138794084186576, "loss": 5.6366, "step": 6031 }, { "epoch": 2.058703071672355, "grad_norm": 2.858750104904175, "learning_rate": 0.0003137656427758817, "loss": 6.5325, "step": 6032 }, { "epoch": 2.0590443686006825, "grad_norm": 3.046865701675415, "learning_rate": 0.0003136518771331058, "loss": 6.317, "step": 6033 }, { "epoch": 2.0593856655290104, "grad_norm": 2.996433734893799, "learning_rate": 0.0003135381114903299, "loss": 6.144, "step": 6034 }, { "epoch": 2.0597269624573378, "grad_norm": 2.8240692615509033, "learning_rate": 0.000313424345847554, "loss": 6.3732, "step": 6035 }, { "epoch": 2.0600682593856656, "grad_norm": 2.915379047393799, "learning_rate": 0.0003133105802047782, "loss": 6.1081, "step": 6036 }, { "epoch": 2.060409556313993, "grad_norm": 6.734116077423096, "learning_rate": 0.0003131968145620023, "loss": 4.9619, "step": 6037 }, { "epoch": 2.060750853242321, "grad_norm": 2.909330129623413, "learning_rate": 0.0003130830489192264, "loss": 6.0949, "step": 6038 }, { "epoch": 2.0610921501706483, "grad_norm": 2.775883197784424, "learning_rate": 0.00031296928327645055, "loss": 6.0901, "step": 6039 }, { "epoch": 2.061433447098976, "grad_norm": 3.041524648666382, "learning_rate": 0.00031285551763367465, "loss": 6.8417, "step": 6040 }, { "epoch": 2.0617747440273035, "grad_norm": 2.883164882659912, "learning_rate": 0.00031274175199089876, "loss": 6.3803, "step": 6041 }, { "epoch": 2.0621160409556314, "grad_norm": 3.1183626651763916, "learning_rate": 0.0003126279863481229, "loss": 6.1928, "step": 6042 }, { "epoch": 2.0624573378839592, "grad_norm": 2.8333499431610107, "learning_rate": 0.000312514220705347, "loss": 6.2385, "step": 6043 }, { "epoch": 2.0627986348122866, "grad_norm": 2.8665261268615723, "learning_rate": 0.00031240045506257107, "loss": 6.8755, "step": 6044 }, { "epoch": 2.0631399317406145, "grad_norm": 4.331704139709473, "learning_rate": 0.00031228668941979523, "loss": 4.8018, "step": 6045 }, { "epoch": 2.063481228668942, "grad_norm": 2.9394123554229736, "learning_rate": 0.00031217292377701934, "loss": 6.2594, "step": 6046 }, { "epoch": 2.0638225255972698, "grad_norm": 2.87528395652771, "learning_rate": 0.00031205915813424344, "loss": 6.0032, "step": 6047 }, { "epoch": 2.064163822525597, "grad_norm": 2.8144853115081787, "learning_rate": 0.0003119453924914676, "loss": 6.0189, "step": 6048 }, { "epoch": 2.064505119453925, "grad_norm": 4.7918243408203125, "learning_rate": 0.0003118316268486917, "loss": 4.9166, "step": 6049 }, { "epoch": 2.0648464163822524, "grad_norm": 2.0117385387420654, "learning_rate": 0.0003117178612059158, "loss": 3.1662, "step": 6050 }, { "epoch": 2.0651877133105803, "grad_norm": 2.930635690689087, "learning_rate": 0.00031160409556313997, "loss": 6.7214, "step": 6051 }, { "epoch": 2.0655290102389077, "grad_norm": 2.864907741546631, "learning_rate": 0.0003114903299203641, "loss": 6.3194, "step": 6052 }, { "epoch": 2.0658703071672355, "grad_norm": 2.9267427921295166, "learning_rate": 0.0003113765642775881, "loss": 6.393, "step": 6053 }, { "epoch": 2.066211604095563, "grad_norm": 2.9477977752685547, "learning_rate": 0.0003112627986348123, "loss": 6.4259, "step": 6054 }, { "epoch": 2.0665529010238908, "grad_norm": 5.001644611358643, "learning_rate": 0.0003111490329920364, "loss": 4.6699, "step": 6055 }, { "epoch": 2.0668941979522186, "grad_norm": 2.8814961910247803, "learning_rate": 0.0003110352673492605, "loss": 6.1531, "step": 6056 }, { "epoch": 2.067235494880546, "grad_norm": 2.9188053607940674, "learning_rate": 0.00031092150170648465, "loss": 5.8409, "step": 6057 }, { "epoch": 2.067576791808874, "grad_norm": 2.8715567588806152, "learning_rate": 0.00031080773606370876, "loss": 5.4793, "step": 6058 }, { "epoch": 2.0679180887372013, "grad_norm": 9.393294334411621, "learning_rate": 0.0003106939704209329, "loss": 4.9354, "step": 6059 }, { "epoch": 2.068259385665529, "grad_norm": 2.8406503200531006, "learning_rate": 0.000310580204778157, "loss": 5.92, "step": 6060 }, { "epoch": 2.0686006825938565, "grad_norm": 3.1945927143096924, "learning_rate": 0.0003104664391353811, "loss": 5.5444, "step": 6061 }, { "epoch": 2.0689419795221844, "grad_norm": 2.9627017974853516, "learning_rate": 0.0003103526734926053, "loss": 6.2811, "step": 6062 }, { "epoch": 2.069283276450512, "grad_norm": 2.85520076751709, "learning_rate": 0.00031023890784982934, "loss": 6.3404, "step": 6063 }, { "epoch": 2.0696245733788396, "grad_norm": 3.261565685272217, "learning_rate": 0.00031012514220705344, "loss": 5.8875, "step": 6064 }, { "epoch": 2.069965870307167, "grad_norm": 2.8483152389526367, "learning_rate": 0.0003100113765642776, "loss": 6.4443, "step": 6065 }, { "epoch": 2.070307167235495, "grad_norm": 2.9961583614349365, "learning_rate": 0.0003098976109215017, "loss": 5.5248, "step": 6066 }, { "epoch": 2.0706484641638223, "grad_norm": 2.947376251220703, "learning_rate": 0.0003097838452787258, "loss": 5.3489, "step": 6067 }, { "epoch": 2.07098976109215, "grad_norm": 2.8761508464813232, "learning_rate": 0.00030967007963594997, "loss": 5.4662, "step": 6068 }, { "epoch": 2.071331058020478, "grad_norm": 2.8893637657165527, "learning_rate": 0.0003095563139931741, "loss": 5.9248, "step": 6069 }, { "epoch": 2.0716723549488054, "grad_norm": 2.9129505157470703, "learning_rate": 0.0003094425483503982, "loss": 6.376, "step": 6070 }, { "epoch": 2.0720136518771333, "grad_norm": 4.613974094390869, "learning_rate": 0.00030932878270762234, "loss": 5.5792, "step": 6071 }, { "epoch": 2.0723549488054607, "grad_norm": 2.938722610473633, "learning_rate": 0.00030921501706484644, "loss": 6.8475, "step": 6072 }, { "epoch": 2.0726962457337885, "grad_norm": 2.8759310245513916, "learning_rate": 0.0003091012514220705, "loss": 6.688, "step": 6073 }, { "epoch": 2.073037542662116, "grad_norm": 2.9209375381469727, "learning_rate": 0.00030898748577929465, "loss": 6.3908, "step": 6074 }, { "epoch": 2.073378839590444, "grad_norm": 2.9436604976654053, "learning_rate": 0.00030887372013651876, "loss": 6.0059, "step": 6075 }, { "epoch": 2.073720136518771, "grad_norm": 3.093762159347534, "learning_rate": 0.00030875995449374286, "loss": 5.5346, "step": 6076 }, { "epoch": 2.074061433447099, "grad_norm": 2.913107395172119, "learning_rate": 0.000308646188850967, "loss": 6.3079, "step": 6077 }, { "epoch": 2.0744027303754264, "grad_norm": 2.8740787506103516, "learning_rate": 0.00030853242320819113, "loss": 5.9121, "step": 6078 }, { "epoch": 2.0747440273037543, "grad_norm": 3.232553243637085, "learning_rate": 0.0003084186575654153, "loss": 5.6589, "step": 6079 }, { "epoch": 2.0750853242320817, "grad_norm": 2.818389654159546, "learning_rate": 0.0003083048919226394, "loss": 6.1773, "step": 6080 }, { "epoch": 2.0754266211604095, "grad_norm": 2.880063533782959, "learning_rate": 0.0003081911262798635, "loss": 6.005, "step": 6081 }, { "epoch": 2.0757679180887374, "grad_norm": 3.2499842643737793, "learning_rate": 0.00030807736063708766, "loss": 6.1325, "step": 6082 }, { "epoch": 2.076109215017065, "grad_norm": 2.889770746231079, "learning_rate": 0.0003079635949943117, "loss": 5.9898, "step": 6083 }, { "epoch": 2.0764505119453927, "grad_norm": 2.825451612472534, "learning_rate": 0.0003078498293515358, "loss": 6.3039, "step": 6084 }, { "epoch": 2.07679180887372, "grad_norm": 3.2816312313079834, "learning_rate": 0.00030773606370875997, "loss": 4.5501, "step": 6085 }, { "epoch": 2.077133105802048, "grad_norm": 2.836883068084717, "learning_rate": 0.0003076222980659841, "loss": 6.3832, "step": 6086 }, { "epoch": 2.0774744027303753, "grad_norm": 2.905423879623413, "learning_rate": 0.0003075085324232082, "loss": 6.6088, "step": 6087 }, { "epoch": 2.077815699658703, "grad_norm": 3.0025651454925537, "learning_rate": 0.00030739476678043234, "loss": 6.6776, "step": 6088 }, { "epoch": 2.0781569965870306, "grad_norm": 2.935211181640625, "learning_rate": 0.00030728100113765644, "loss": 5.9203, "step": 6089 }, { "epoch": 2.0784982935153584, "grad_norm": 3.124129295349121, "learning_rate": 0.00030716723549488055, "loss": 5.9244, "step": 6090 }, { "epoch": 2.078839590443686, "grad_norm": 2.805335283279419, "learning_rate": 0.0003070534698521047, "loss": 6.3963, "step": 6091 }, { "epoch": 2.0791808873720137, "grad_norm": 2.806159496307373, "learning_rate": 0.00030693970420932876, "loss": 6.1115, "step": 6092 }, { "epoch": 2.079522184300341, "grad_norm": 2.8614501953125, "learning_rate": 0.00030682593856655286, "loss": 6.4517, "step": 6093 }, { "epoch": 2.079863481228669, "grad_norm": 2.736070394515991, "learning_rate": 0.000306712172923777, "loss": 6.4008, "step": 6094 }, { "epoch": 2.080204778156997, "grad_norm": 2.861682176589966, "learning_rate": 0.00030659840728100113, "loss": 6.0243, "step": 6095 }, { "epoch": 2.080546075085324, "grad_norm": 2.79305100440979, "learning_rate": 0.00030648464163822523, "loss": 5.9656, "step": 6096 }, { "epoch": 2.080887372013652, "grad_norm": 2.781764030456543, "learning_rate": 0.0003063708759954494, "loss": 5.8456, "step": 6097 }, { "epoch": 2.0812286689419794, "grad_norm": 3.013824224472046, "learning_rate": 0.0003062571103526735, "loss": 6.0931, "step": 6098 }, { "epoch": 2.0815699658703073, "grad_norm": 2.9550583362579346, "learning_rate": 0.00030614334470989766, "loss": 6.5948, "step": 6099 }, { "epoch": 2.0819112627986347, "grad_norm": 2.903958320617676, "learning_rate": 0.00030602957906712176, "loss": 6.3639, "step": 6100 }, { "epoch": 2.0822525597269625, "grad_norm": 2.8269143104553223, "learning_rate": 0.00030591581342434587, "loss": 6.0978, "step": 6101 }, { "epoch": 2.08259385665529, "grad_norm": 2.9017698764801025, "learning_rate": 0.00030580204778156997, "loss": 6.2451, "step": 6102 }, { "epoch": 2.082935153583618, "grad_norm": 2.8492634296417236, "learning_rate": 0.0003056882821387941, "loss": 6.0594, "step": 6103 }, { "epoch": 2.083276450511945, "grad_norm": 2.8176167011260986, "learning_rate": 0.0003055745164960182, "loss": 6.4956, "step": 6104 }, { "epoch": 2.083617747440273, "grad_norm": 2.8858425617218018, "learning_rate": 0.00030546075085324234, "loss": 6.1588, "step": 6105 }, { "epoch": 2.0839590443686005, "grad_norm": 2.932605266571045, "learning_rate": 0.00030534698521046645, "loss": 5.9801, "step": 6106 }, { "epoch": 2.0843003412969283, "grad_norm": 2.9948959350585938, "learning_rate": 0.00030523321956769055, "loss": 6.1619, "step": 6107 }, { "epoch": 2.084641638225256, "grad_norm": 2.8777294158935547, "learning_rate": 0.0003051194539249147, "loss": 5.9433, "step": 6108 }, { "epoch": 2.0849829351535836, "grad_norm": 4.565830707550049, "learning_rate": 0.0003050056882821388, "loss": 5.8684, "step": 6109 }, { "epoch": 2.0853242320819114, "grad_norm": 2.942716360092163, "learning_rate": 0.0003048919226393629, "loss": 6.3863, "step": 6110 }, { "epoch": 2.085665529010239, "grad_norm": 3.1933271884918213, "learning_rate": 0.0003047781569965871, "loss": 4.8312, "step": 6111 }, { "epoch": 2.0860068259385667, "grad_norm": 2.8330047130584717, "learning_rate": 0.00030466439135381113, "loss": 6.6125, "step": 6112 }, { "epoch": 2.086348122866894, "grad_norm": 2.899695873260498, "learning_rate": 0.00030455062571103523, "loss": 5.9607, "step": 6113 }, { "epoch": 2.086689419795222, "grad_norm": 2.945712089538574, "learning_rate": 0.0003044368600682594, "loss": 6.2117, "step": 6114 }, { "epoch": 2.0870307167235493, "grad_norm": 3.072864532470703, "learning_rate": 0.0003043230944254835, "loss": 5.743, "step": 6115 }, { "epoch": 2.087372013651877, "grad_norm": 2.916473865509033, "learning_rate": 0.0003042093287827076, "loss": 6.4488, "step": 6116 }, { "epoch": 2.0877133105802046, "grad_norm": 2.8467938899993896, "learning_rate": 0.00030409556313993176, "loss": 6.1196, "step": 6117 }, { "epoch": 2.0880546075085324, "grad_norm": 2.8041112422943115, "learning_rate": 0.00030398179749715587, "loss": 6.3867, "step": 6118 }, { "epoch": 2.08839590443686, "grad_norm": 2.6718533039093018, "learning_rate": 0.00030386803185438, "loss": 6.1606, "step": 6119 }, { "epoch": 2.0887372013651877, "grad_norm": 3.134387254714966, "learning_rate": 0.00030375426621160413, "loss": 5.3715, "step": 6120 }, { "epoch": 2.0890784982935156, "grad_norm": 2.8396830558776855, "learning_rate": 0.0003036405005688282, "loss": 6.6257, "step": 6121 }, { "epoch": 2.089419795221843, "grad_norm": 2.89739990234375, "learning_rate": 0.00030352673492605234, "loss": 6.4547, "step": 6122 }, { "epoch": 2.089761092150171, "grad_norm": 2.870203971862793, "learning_rate": 0.00030341296928327645, "loss": 5.6783, "step": 6123 }, { "epoch": 2.090102389078498, "grad_norm": 6.795978546142578, "learning_rate": 0.00030329920364050055, "loss": 4.8163, "step": 6124 }, { "epoch": 2.090443686006826, "grad_norm": 2.864149570465088, "learning_rate": 0.0003031854379977247, "loss": 6.3274, "step": 6125 }, { "epoch": 2.0907849829351535, "grad_norm": 2.8654708862304688, "learning_rate": 0.0003030716723549488, "loss": 6.3438, "step": 6126 }, { "epoch": 2.0911262798634813, "grad_norm": 2.834479331970215, "learning_rate": 0.0003029579067121729, "loss": 6.1642, "step": 6127 }, { "epoch": 2.0914675767918087, "grad_norm": 2.9898204803466797, "learning_rate": 0.0003028441410693971, "loss": 6.4757, "step": 6128 }, { "epoch": 2.0918088737201366, "grad_norm": 2.8628549575805664, "learning_rate": 0.0003027303754266212, "loss": 6.5145, "step": 6129 }, { "epoch": 2.092150170648464, "grad_norm": 2.945585012435913, "learning_rate": 0.0003026166097838453, "loss": 6.4056, "step": 6130 }, { "epoch": 2.092491467576792, "grad_norm": 2.85774564743042, "learning_rate": 0.0003025028441410694, "loss": 5.6863, "step": 6131 }, { "epoch": 2.0928327645051192, "grad_norm": 3.0979244709014893, "learning_rate": 0.0003023890784982935, "loss": 6.1059, "step": 6132 }, { "epoch": 2.093174061433447, "grad_norm": 2.8489980697631836, "learning_rate": 0.0003022753128555176, "loss": 5.884, "step": 6133 }, { "epoch": 2.093515358361775, "grad_norm": 2.7917068004608154, "learning_rate": 0.00030216154721274176, "loss": 5.8435, "step": 6134 }, { "epoch": 2.0938566552901023, "grad_norm": 3.5278565883636475, "learning_rate": 0.00030204778156996587, "loss": 3.6562, "step": 6135 }, { "epoch": 2.09419795221843, "grad_norm": 2.9529824256896973, "learning_rate": 0.00030193401592718997, "loss": 5.6781, "step": 6136 }, { "epoch": 2.0945392491467576, "grad_norm": 7.044441223144531, "learning_rate": 0.00030182025028441413, "loss": 6.3167, "step": 6137 }, { "epoch": 2.0948805460750854, "grad_norm": 3.1856296062469482, "learning_rate": 0.00030170648464163824, "loss": 4.6697, "step": 6138 }, { "epoch": 2.095221843003413, "grad_norm": 2.9588096141815186, "learning_rate": 0.0003015927189988624, "loss": 6.2748, "step": 6139 }, { "epoch": 2.0955631399317407, "grad_norm": 3.0708155632019043, "learning_rate": 0.0003014789533560865, "loss": 6.2729, "step": 6140 }, { "epoch": 2.095904436860068, "grad_norm": 2.921039342880249, "learning_rate": 0.00030136518771331055, "loss": 5.9834, "step": 6141 }, { "epoch": 2.096245733788396, "grad_norm": 2.7934975624084473, "learning_rate": 0.0003012514220705347, "loss": 5.9395, "step": 6142 }, { "epoch": 2.0965870307167234, "grad_norm": 2.899808645248413, "learning_rate": 0.0003011376564277588, "loss": 6.1141, "step": 6143 }, { "epoch": 2.096928327645051, "grad_norm": 2.8703773021698, "learning_rate": 0.0003010238907849829, "loss": 6.3106, "step": 6144 }, { "epoch": 2.0972696245733786, "grad_norm": 2.8744289875030518, "learning_rate": 0.0003009101251422071, "loss": 6.4198, "step": 6145 }, { "epoch": 2.0976109215017065, "grad_norm": 2.801576852798462, "learning_rate": 0.0003007963594994312, "loss": 6.0057, "step": 6146 }, { "epoch": 2.0979522184300343, "grad_norm": 2.772566080093384, "learning_rate": 0.0003006825938566553, "loss": 5.799, "step": 6147 }, { "epoch": 2.0982935153583617, "grad_norm": 2.8693549633026123, "learning_rate": 0.00030056882821387945, "loss": 6.134, "step": 6148 }, { "epoch": 2.0986348122866896, "grad_norm": 1.9791274070739746, "learning_rate": 0.00030045506257110355, "loss": 3.122, "step": 6149 }, { "epoch": 2.098976109215017, "grad_norm": 2.8072032928466797, "learning_rate": 0.00030034129692832766, "loss": 5.8741, "step": 6150 }, { "epoch": 2.099317406143345, "grad_norm": 2.8439536094665527, "learning_rate": 0.00030022753128555176, "loss": 6.4318, "step": 6151 }, { "epoch": 2.0996587030716722, "grad_norm": 2.8671748638153076, "learning_rate": 0.00030011376564277587, "loss": 6.1086, "step": 6152 }, { "epoch": 2.1, "grad_norm": 2.7963056564331055, "learning_rate": 0.0003, "loss": 6.0888, "step": 6153 }, { "epoch": 2.1003412969283275, "grad_norm": 2.9660708904266357, "learning_rate": 0.00029988623435722413, "loss": 6.0207, "step": 6154 }, { "epoch": 2.1006825938566553, "grad_norm": 2.9538464546203613, "learning_rate": 0.00029977246871444824, "loss": 6.5961, "step": 6155 }, { "epoch": 2.1010238907849828, "grad_norm": 2.916219472885132, "learning_rate": 0.00029965870307167234, "loss": 6.4961, "step": 6156 }, { "epoch": 2.1013651877133106, "grad_norm": 3.0762720108032227, "learning_rate": 0.0002995449374288965, "loss": 5.8819, "step": 6157 }, { "epoch": 2.101706484641638, "grad_norm": 2.9753077030181885, "learning_rate": 0.0002994311717861206, "loss": 6.0732, "step": 6158 }, { "epoch": 2.102047781569966, "grad_norm": 3.163635492324829, "learning_rate": 0.00029931740614334477, "loss": 5.752, "step": 6159 }, { "epoch": 2.1023890784982937, "grad_norm": 2.8499581813812256, "learning_rate": 0.0002992036405005688, "loss": 6.1047, "step": 6160 }, { "epoch": 2.102730375426621, "grad_norm": 2.870997190475464, "learning_rate": 0.0002990898748577929, "loss": 6.2264, "step": 6161 }, { "epoch": 2.103071672354949, "grad_norm": 2.834073305130005, "learning_rate": 0.0002989761092150171, "loss": 6.223, "step": 6162 }, { "epoch": 2.1034129692832764, "grad_norm": 2.8211171627044678, "learning_rate": 0.0002988623435722412, "loss": 6.8218, "step": 6163 }, { "epoch": 2.103754266211604, "grad_norm": 2.9839437007904053, "learning_rate": 0.0002987485779294653, "loss": 6.177, "step": 6164 }, { "epoch": 2.1040955631399316, "grad_norm": 2.881976366043091, "learning_rate": 0.00029863481228668945, "loss": 6.3719, "step": 6165 }, { "epoch": 2.1044368600682595, "grad_norm": 2.873582601547241, "learning_rate": 0.00029852104664391355, "loss": 6.6528, "step": 6166 }, { "epoch": 2.104778156996587, "grad_norm": 2.877270460128784, "learning_rate": 0.00029840728100113766, "loss": 6.2307, "step": 6167 }, { "epoch": 2.1051194539249147, "grad_norm": 2.885765314102173, "learning_rate": 0.0002982935153583618, "loss": 6.1185, "step": 6168 }, { "epoch": 2.105460750853242, "grad_norm": 2.835249662399292, "learning_rate": 0.0002981797497155859, "loss": 6.2276, "step": 6169 }, { "epoch": 2.10580204778157, "grad_norm": 2.9260549545288086, "learning_rate": 0.00029806598407281, "loss": 6.3071, "step": 6170 }, { "epoch": 2.1061433447098974, "grad_norm": 2.8237810134887695, "learning_rate": 0.00029795221843003413, "loss": 6.1465, "step": 6171 }, { "epoch": 2.1064846416382252, "grad_norm": 2.822054862976074, "learning_rate": 0.00029783845278725824, "loss": 5.8652, "step": 6172 }, { "epoch": 2.106825938566553, "grad_norm": 2.801043748855591, "learning_rate": 0.00029772468714448234, "loss": 6.0659, "step": 6173 }, { "epoch": 2.1071672354948805, "grad_norm": 2.7532236576080322, "learning_rate": 0.0002976109215017065, "loss": 6.1017, "step": 6174 }, { "epoch": 2.1075085324232083, "grad_norm": 2.8876118659973145, "learning_rate": 0.0002974971558589306, "loss": 5.7315, "step": 6175 }, { "epoch": 2.1078498293515358, "grad_norm": 2.9598159790039062, "learning_rate": 0.0002973833902161547, "loss": 6.4794, "step": 6176 }, { "epoch": 2.1081911262798636, "grad_norm": 2.756242513656616, "learning_rate": 0.00029726962457337887, "loss": 6.6465, "step": 6177 }, { "epoch": 2.108532423208191, "grad_norm": 3.125593662261963, "learning_rate": 0.000297155858930603, "loss": 6.2238, "step": 6178 }, { "epoch": 2.108873720136519, "grad_norm": 2.907468557357788, "learning_rate": 0.0002970420932878271, "loss": 6.4868, "step": 6179 }, { "epoch": 2.1092150170648463, "grad_norm": 4.4180755615234375, "learning_rate": 0.0002969283276450512, "loss": 5.2227, "step": 6180 }, { "epoch": 2.109556313993174, "grad_norm": 2.873162269592285, "learning_rate": 0.0002968145620022753, "loss": 6.7799, "step": 6181 }, { "epoch": 2.1098976109215015, "grad_norm": 2.8836138248443604, "learning_rate": 0.00029670079635949945, "loss": 5.8275, "step": 6182 }, { "epoch": 2.1102389078498294, "grad_norm": 2.93564510345459, "learning_rate": 0.00029658703071672356, "loss": 5.3932, "step": 6183 }, { "epoch": 2.1105802047781568, "grad_norm": 2.858703374862671, "learning_rate": 0.00029647326507394766, "loss": 6.2049, "step": 6184 }, { "epoch": 2.1109215017064846, "grad_norm": 2.9091365337371826, "learning_rate": 0.0002963594994311718, "loss": 6.3892, "step": 6185 }, { "epoch": 2.1112627986348125, "grad_norm": 2.96952486038208, "learning_rate": 0.0002962457337883959, "loss": 5.8229, "step": 6186 }, { "epoch": 2.11160409556314, "grad_norm": 2.865198850631714, "learning_rate": 0.00029613196814562003, "loss": 6.4365, "step": 6187 }, { "epoch": 2.1119453924914677, "grad_norm": 2.7699646949768066, "learning_rate": 0.0002960182025028442, "loss": 6.4912, "step": 6188 }, { "epoch": 2.112286689419795, "grad_norm": 2.8436672687530518, "learning_rate": 0.00029590443686006824, "loss": 5.2912, "step": 6189 }, { "epoch": 2.112627986348123, "grad_norm": 2.8417892456054688, "learning_rate": 0.00029579067121729234, "loss": 6.3945, "step": 6190 }, { "epoch": 2.1129692832764504, "grad_norm": 6.457067966461182, "learning_rate": 0.0002956769055745165, "loss": 3.8569, "step": 6191 }, { "epoch": 2.1133105802047782, "grad_norm": 4.360135555267334, "learning_rate": 0.0002955631399317406, "loss": 5.5622, "step": 6192 }, { "epoch": 2.1136518771331056, "grad_norm": 3.0096099376678467, "learning_rate": 0.0002954493742889647, "loss": 5.8473, "step": 6193 }, { "epoch": 2.1139931740614335, "grad_norm": 3.391237497329712, "learning_rate": 0.00029533560864618887, "loss": 5.4066, "step": 6194 }, { "epoch": 2.114334470989761, "grad_norm": 2.9052274227142334, "learning_rate": 0.000295221843003413, "loss": 5.9522, "step": 6195 }, { "epoch": 2.1146757679180888, "grad_norm": 2.8499536514282227, "learning_rate": 0.0002951080773606371, "loss": 5.3388, "step": 6196 }, { "epoch": 2.115017064846416, "grad_norm": 2.8930346965789795, "learning_rate": 0.00029499431171786124, "loss": 6.1808, "step": 6197 }, { "epoch": 2.115358361774744, "grad_norm": 8.893372535705566, "learning_rate": 0.00029488054607508535, "loss": 5.8041, "step": 6198 }, { "epoch": 2.115699658703072, "grad_norm": 2.8673787117004395, "learning_rate": 0.0002947667804323094, "loss": 6.1321, "step": 6199 }, { "epoch": 2.1160409556313993, "grad_norm": 2.806966543197632, "learning_rate": 0.00029465301478953356, "loss": 6.6198, "step": 6200 }, { "epoch": 2.116382252559727, "grad_norm": 2.9241514205932617, "learning_rate": 0.00029453924914675766, "loss": 6.4543, "step": 6201 }, { "epoch": 2.1167235494880545, "grad_norm": 2.939549684524536, "learning_rate": 0.0002944254835039818, "loss": 5.8664, "step": 6202 }, { "epoch": 2.1170648464163824, "grad_norm": 2.817613124847412, "learning_rate": 0.0002943117178612059, "loss": 6.4852, "step": 6203 }, { "epoch": 2.11740614334471, "grad_norm": 2.9055352210998535, "learning_rate": 0.00029419795221843003, "loss": 6.1691, "step": 6204 }, { "epoch": 2.1177474402730376, "grad_norm": 2.756702184677124, "learning_rate": 0.0002940841865756542, "loss": 6.3137, "step": 6205 }, { "epoch": 2.118088737201365, "grad_norm": 2.957275152206421, "learning_rate": 0.0002939704209328783, "loss": 6.5439, "step": 6206 }, { "epoch": 2.118430034129693, "grad_norm": 2.8155031204223633, "learning_rate": 0.0002938566552901024, "loss": 6.0326, "step": 6207 }, { "epoch": 2.1187713310580203, "grad_norm": 12.575434684753418, "learning_rate": 0.00029374288964732656, "loss": 4.9286, "step": 6208 }, { "epoch": 2.119112627986348, "grad_norm": 5.587793350219727, "learning_rate": 0.0002936291240045506, "loss": 4.9998, "step": 6209 }, { "epoch": 2.1194539249146755, "grad_norm": 3.0160374641418457, "learning_rate": 0.0002935153583617747, "loss": 5.9745, "step": 6210 }, { "epoch": 2.1197952218430034, "grad_norm": 2.93703293800354, "learning_rate": 0.0002934015927189989, "loss": 6.4411, "step": 6211 }, { "epoch": 2.1201365187713312, "grad_norm": 2.9282970428466797, "learning_rate": 0.000293287827076223, "loss": 6.1064, "step": 6212 }, { "epoch": 2.1204778156996587, "grad_norm": 2.965182304382324, "learning_rate": 0.0002931740614334471, "loss": 6.5645, "step": 6213 }, { "epoch": 2.1208191126279865, "grad_norm": 2.8125925064086914, "learning_rate": 0.00029306029579067124, "loss": 6.1774, "step": 6214 }, { "epoch": 2.121160409556314, "grad_norm": 2.8314590454101562, "learning_rate": 0.00029294653014789535, "loss": 5.887, "step": 6215 }, { "epoch": 2.1215017064846418, "grad_norm": 2.745131492614746, "learning_rate": 0.00029283276450511945, "loss": 6.5378, "step": 6216 }, { "epoch": 2.121843003412969, "grad_norm": 2.8095803260803223, "learning_rate": 0.0002927189988623436, "loss": 6.3276, "step": 6217 }, { "epoch": 2.122184300341297, "grad_norm": 2.9379096031188965, "learning_rate": 0.0002926052332195677, "loss": 6.4891, "step": 6218 }, { "epoch": 2.1225255972696244, "grad_norm": 2.851707696914673, "learning_rate": 0.00029249146757679177, "loss": 6.2868, "step": 6219 }, { "epoch": 2.1228668941979523, "grad_norm": 3.480109214782715, "learning_rate": 0.0002923777019340159, "loss": 5.4423, "step": 6220 }, { "epoch": 2.1232081911262797, "grad_norm": 3.0105674266815186, "learning_rate": 0.00029226393629124003, "loss": 5.9214, "step": 6221 }, { "epoch": 2.1235494880546075, "grad_norm": 2.8100273609161377, "learning_rate": 0.0002921501706484642, "loss": 6.3837, "step": 6222 }, { "epoch": 2.123890784982935, "grad_norm": 2.886622190475464, "learning_rate": 0.0002920364050056883, "loss": 5.8483, "step": 6223 }, { "epoch": 2.124232081911263, "grad_norm": 2.9184138774871826, "learning_rate": 0.0002919226393629124, "loss": 6.2556, "step": 6224 }, { "epoch": 2.1245733788395906, "grad_norm": 3.3458776473999023, "learning_rate": 0.00029180887372013656, "loss": 4.7381, "step": 6225 }, { "epoch": 2.124914675767918, "grad_norm": 2.833589553833008, "learning_rate": 0.00029169510807736066, "loss": 6.5108, "step": 6226 }, { "epoch": 2.125255972696246, "grad_norm": 2.8403658866882324, "learning_rate": 0.00029158134243458477, "loss": 6.0265, "step": 6227 }, { "epoch": 2.1255972696245733, "grad_norm": 2.9905974864959717, "learning_rate": 0.0002914675767918089, "loss": 5.9997, "step": 6228 }, { "epoch": 2.125938566552901, "grad_norm": 3.724816083908081, "learning_rate": 0.000291353811149033, "loss": 5.0568, "step": 6229 }, { "epoch": 2.1262798634812285, "grad_norm": 2.8078196048736572, "learning_rate": 0.0002912400455062571, "loss": 6.1329, "step": 6230 }, { "epoch": 2.1266211604095564, "grad_norm": 2.8691749572753906, "learning_rate": 0.00029112627986348124, "loss": 6.2066, "step": 6231 }, { "epoch": 2.126962457337884, "grad_norm": 2.830688953399658, "learning_rate": 0.00029101251422070535, "loss": 6.0778, "step": 6232 }, { "epoch": 2.1273037542662117, "grad_norm": 2.865365505218506, "learning_rate": 0.00029089874857792945, "loss": 6.1605, "step": 6233 }, { "epoch": 2.127645051194539, "grad_norm": 2.6853408813476562, "learning_rate": 0.0002907849829351536, "loss": 5.9814, "step": 6234 }, { "epoch": 2.127986348122867, "grad_norm": 2.7538394927978516, "learning_rate": 0.0002906712172923777, "loss": 5.2563, "step": 6235 }, { "epoch": 2.1283276450511943, "grad_norm": 3.2029075622558594, "learning_rate": 0.0002905574516496018, "loss": 5.537, "step": 6236 }, { "epoch": 2.128668941979522, "grad_norm": 2.8655052185058594, "learning_rate": 0.000290443686006826, "loss": 6.3069, "step": 6237 }, { "epoch": 2.12901023890785, "grad_norm": 2.836099624633789, "learning_rate": 0.00029032992036405003, "loss": 6.1336, "step": 6238 }, { "epoch": 2.1293515358361774, "grad_norm": 2.8297688961029053, "learning_rate": 0.00029021615472127414, "loss": 5.9636, "step": 6239 }, { "epoch": 2.1296928327645053, "grad_norm": 2.8413031101226807, "learning_rate": 0.0002901023890784983, "loss": 6.4108, "step": 6240 }, { "epoch": 2.1300341296928327, "grad_norm": 2.8722383975982666, "learning_rate": 0.0002899886234357224, "loss": 6.5657, "step": 6241 }, { "epoch": 2.1303754266211605, "grad_norm": 2.8211677074432373, "learning_rate": 0.00028987485779294656, "loss": 6.3421, "step": 6242 }, { "epoch": 2.130716723549488, "grad_norm": 3.3281304836273193, "learning_rate": 0.00028976109215017067, "loss": 5.4648, "step": 6243 }, { "epoch": 2.131058020477816, "grad_norm": 2.8957037925720215, "learning_rate": 0.00028964732650739477, "loss": 6.5093, "step": 6244 }, { "epoch": 2.131399317406143, "grad_norm": 2.93902587890625, "learning_rate": 0.00028953356086461893, "loss": 6.1025, "step": 6245 }, { "epoch": 2.131740614334471, "grad_norm": 2.877804756164551, "learning_rate": 0.00028941979522184303, "loss": 5.8682, "step": 6246 }, { "epoch": 2.1320819112627984, "grad_norm": 2.8391692638397217, "learning_rate": 0.00028930602957906714, "loss": 6.1784, "step": 6247 }, { "epoch": 2.1324232081911263, "grad_norm": 2.83154559135437, "learning_rate": 0.00028919226393629124, "loss": 6.0821, "step": 6248 }, { "epoch": 2.1327645051194537, "grad_norm": 2.8575069904327393, "learning_rate": 0.00028907849829351535, "loss": 6.1532, "step": 6249 }, { "epoch": 2.1331058020477816, "grad_norm": 2.8776323795318604, "learning_rate": 0.00028896473265073945, "loss": 6.1705, "step": 6250 }, { "epoch": 2.1334470989761094, "grad_norm": 2.809744119644165, "learning_rate": 0.0002888509670079636, "loss": 5.6912, "step": 6251 }, { "epoch": 2.133788395904437, "grad_norm": 2.8350303173065186, "learning_rate": 0.0002887372013651877, "loss": 6.6106, "step": 6252 }, { "epoch": 2.1341296928327647, "grad_norm": 2.8744351863861084, "learning_rate": 0.0002886234357224118, "loss": 5.9893, "step": 6253 }, { "epoch": 2.134470989761092, "grad_norm": 2.87178897857666, "learning_rate": 0.000288509670079636, "loss": 5.9063, "step": 6254 }, { "epoch": 2.13481228668942, "grad_norm": 2.9190359115600586, "learning_rate": 0.0002883959044368601, "loss": 5.8424, "step": 6255 }, { "epoch": 2.1351535836177473, "grad_norm": 2.8117921352386475, "learning_rate": 0.0002882821387940842, "loss": 6.3904, "step": 6256 }, { "epoch": 2.135494880546075, "grad_norm": 2.7705960273742676, "learning_rate": 0.00028816837315130835, "loss": 6.1901, "step": 6257 }, { "epoch": 2.1358361774744026, "grad_norm": 2.7972941398620605, "learning_rate": 0.0002880546075085324, "loss": 6.4878, "step": 6258 }, { "epoch": 2.1361774744027304, "grad_norm": 2.915985345840454, "learning_rate": 0.0002879408418657565, "loss": 6.319, "step": 6259 }, { "epoch": 2.136518771331058, "grad_norm": 3.0406363010406494, "learning_rate": 0.00028782707622298067, "loss": 5.3564, "step": 6260 }, { "epoch": 2.1368600682593857, "grad_norm": 2.915806531906128, "learning_rate": 0.00028771331058020477, "loss": 6.8073, "step": 6261 }, { "epoch": 2.137201365187713, "grad_norm": 2.840094804763794, "learning_rate": 0.0002875995449374289, "loss": 6.0616, "step": 6262 }, { "epoch": 2.137542662116041, "grad_norm": 3.044546365737915, "learning_rate": 0.00028748577929465304, "loss": 5.9823, "step": 6263 }, { "epoch": 2.137883959044369, "grad_norm": 2.8069345951080322, "learning_rate": 0.00028737201365187714, "loss": 5.8899, "step": 6264 }, { "epoch": 2.138225255972696, "grad_norm": 2.8434646129608154, "learning_rate": 0.0002872582480091013, "loss": 6.471, "step": 6265 }, { "epoch": 2.138566552901024, "grad_norm": 2.9248239994049072, "learning_rate": 0.0002871444823663254, "loss": 5.8759, "step": 6266 }, { "epoch": 2.1389078498293514, "grad_norm": 2.838510036468506, "learning_rate": 0.00028703071672354946, "loss": 6.3492, "step": 6267 }, { "epoch": 2.1392491467576793, "grad_norm": 2.9336483478546143, "learning_rate": 0.0002869169510807736, "loss": 6.0126, "step": 6268 }, { "epoch": 2.1395904436860067, "grad_norm": 2.8124420642852783, "learning_rate": 0.0002868031854379977, "loss": 5.8763, "step": 6269 }, { "epoch": 2.1399317406143346, "grad_norm": 2.821249485015869, "learning_rate": 0.0002866894197952218, "loss": 6.0869, "step": 6270 }, { "epoch": 2.140273037542662, "grad_norm": 2.825366258621216, "learning_rate": 0.000286575654152446, "loss": 6.1345, "step": 6271 }, { "epoch": 2.14061433447099, "grad_norm": 2.799079418182373, "learning_rate": 0.0002864618885096701, "loss": 5.919, "step": 6272 }, { "epoch": 2.140955631399317, "grad_norm": 2.9337072372436523, "learning_rate": 0.0002863481228668942, "loss": 5.927, "step": 6273 }, { "epoch": 2.141296928327645, "grad_norm": 2.8965418338775635, "learning_rate": 0.00028623435722411835, "loss": 6.1504, "step": 6274 }, { "epoch": 2.1416382252559725, "grad_norm": 3.0523440837860107, "learning_rate": 0.00028612059158134246, "loss": 6.0062, "step": 6275 }, { "epoch": 2.1419795221843003, "grad_norm": 2.986406087875366, "learning_rate": 0.00028600682593856656, "loss": 5.8823, "step": 6276 }, { "epoch": 2.142320819112628, "grad_norm": 2.9808123111724854, "learning_rate": 0.00028589306029579067, "loss": 6.4025, "step": 6277 }, { "epoch": 2.1426621160409556, "grad_norm": 2.6804819107055664, "learning_rate": 0.00028577929465301477, "loss": 5.7797, "step": 6278 }, { "epoch": 2.1430034129692834, "grad_norm": 3.432440996170044, "learning_rate": 0.0002856655290102389, "loss": 4.9929, "step": 6279 }, { "epoch": 2.143344709897611, "grad_norm": 3.000901222229004, "learning_rate": 0.00028555176336746304, "loss": 6.2861, "step": 6280 }, { "epoch": 2.1436860068259387, "grad_norm": 2.8772642612457275, "learning_rate": 0.00028543799772468714, "loss": 6.4075, "step": 6281 }, { "epoch": 2.144027303754266, "grad_norm": 2.7788705825805664, "learning_rate": 0.00028532423208191125, "loss": 4.92, "step": 6282 }, { "epoch": 2.144368600682594, "grad_norm": 3.084559917449951, "learning_rate": 0.0002852104664391354, "loss": 5.9546, "step": 6283 }, { "epoch": 2.1447098976109213, "grad_norm": 3.004912853240967, "learning_rate": 0.0002850967007963595, "loss": 6.2623, "step": 6284 }, { "epoch": 2.145051194539249, "grad_norm": 4.325676441192627, "learning_rate": 0.00028498293515358367, "loss": 4.1026, "step": 6285 }, { "epoch": 2.1453924914675766, "grad_norm": 2.822035551071167, "learning_rate": 0.0002848691695108078, "loss": 6.1718, "step": 6286 }, { "epoch": 2.1457337883959045, "grad_norm": 3.587118148803711, "learning_rate": 0.0002847554038680318, "loss": 4.5978, "step": 6287 }, { "epoch": 2.146075085324232, "grad_norm": 3.637235403060913, "learning_rate": 0.000284641638225256, "loss": 5.1437, "step": 6288 }, { "epoch": 2.1464163822525597, "grad_norm": 3.1141090393066406, "learning_rate": 0.0002845278725824801, "loss": 4.2409, "step": 6289 }, { "epoch": 2.1467576791808876, "grad_norm": 2.9125001430511475, "learning_rate": 0.0002844141069397042, "loss": 6.4584, "step": 6290 }, { "epoch": 2.147098976109215, "grad_norm": 2.8799238204956055, "learning_rate": 0.00028430034129692835, "loss": 6.1453, "step": 6291 }, { "epoch": 2.147440273037543, "grad_norm": 2.945450782775879, "learning_rate": 0.00028418657565415246, "loss": 5.9845, "step": 6292 }, { "epoch": 2.14778156996587, "grad_norm": 2.812429428100586, "learning_rate": 0.00028407281001137656, "loss": 5.7118, "step": 6293 }, { "epoch": 2.148122866894198, "grad_norm": 2.8083250522613525, "learning_rate": 0.0002839590443686007, "loss": 6.1992, "step": 6294 }, { "epoch": 2.1484641638225255, "grad_norm": 2.8323144912719727, "learning_rate": 0.00028384527872582483, "loss": 6.9573, "step": 6295 }, { "epoch": 2.1488054607508533, "grad_norm": 2.777940273284912, "learning_rate": 0.0002837315130830489, "loss": 5.7647, "step": 6296 }, { "epoch": 2.1491467576791807, "grad_norm": 2.9180915355682373, "learning_rate": 0.00028361774744027304, "loss": 6.115, "step": 6297 }, { "epoch": 2.1494880546075086, "grad_norm": 2.861795663833618, "learning_rate": 0.00028350398179749714, "loss": 5.9888, "step": 6298 }, { "epoch": 2.149829351535836, "grad_norm": 2.804222822189331, "learning_rate": 0.00028339021615472125, "loss": 6.1902, "step": 6299 }, { "epoch": 2.150170648464164, "grad_norm": 2.753507375717163, "learning_rate": 0.0002832764505119454, "loss": 6.6358, "step": 6300 }, { "epoch": 2.1505119453924912, "grad_norm": 2.8499131202697754, "learning_rate": 0.0002831626848691695, "loss": 6.6611, "step": 6301 }, { "epoch": 2.150853242320819, "grad_norm": 2.9446632862091064, "learning_rate": 0.0002830489192263936, "loss": 6.9951, "step": 6302 }, { "epoch": 2.151194539249147, "grad_norm": 2.8921797275543213, "learning_rate": 0.0002829351535836178, "loss": 6.4307, "step": 6303 }, { "epoch": 2.1515358361774743, "grad_norm": 2.776242971420288, "learning_rate": 0.0002828213879408419, "loss": 6.5389, "step": 6304 }, { "epoch": 2.151877133105802, "grad_norm": 3.0470118522644043, "learning_rate": 0.00028270762229806604, "loss": 5.9021, "step": 6305 }, { "epoch": 2.1522184300341296, "grad_norm": 2.870765209197998, "learning_rate": 0.0002825938566552901, "loss": 5.767, "step": 6306 }, { "epoch": 2.1525597269624575, "grad_norm": 2.9112579822540283, "learning_rate": 0.0002824800910125142, "loss": 5.9856, "step": 6307 }, { "epoch": 2.152901023890785, "grad_norm": 2.8619847297668457, "learning_rate": 0.00028236632536973835, "loss": 6.447, "step": 6308 }, { "epoch": 2.1532423208191127, "grad_norm": 2.7564167976379395, "learning_rate": 0.00028225255972696246, "loss": 6.2936, "step": 6309 }, { "epoch": 2.15358361774744, "grad_norm": 2.8190064430236816, "learning_rate": 0.00028213879408418656, "loss": 6.035, "step": 6310 }, { "epoch": 2.153924914675768, "grad_norm": 2.4524943828582764, "learning_rate": 0.0002820250284414107, "loss": 3.2047, "step": 6311 }, { "epoch": 2.1542662116040954, "grad_norm": 2.8662970066070557, "learning_rate": 0.00028191126279863483, "loss": 4.9013, "step": 6312 }, { "epoch": 2.154607508532423, "grad_norm": 2.9255385398864746, "learning_rate": 0.00028179749715585893, "loss": 6.6828, "step": 6313 }, { "epoch": 2.1549488054607506, "grad_norm": 2.917696475982666, "learning_rate": 0.0002816837315130831, "loss": 4.9657, "step": 6314 }, { "epoch": 2.1552901023890785, "grad_norm": 2.835268259048462, "learning_rate": 0.0002815699658703072, "loss": 6.6023, "step": 6315 }, { "epoch": 2.1556313993174063, "grad_norm": 2.958221435546875, "learning_rate": 0.00028145620022753125, "loss": 5.9026, "step": 6316 }, { "epoch": 2.1559726962457337, "grad_norm": 2.8288214206695557, "learning_rate": 0.0002813424345847554, "loss": 5.8854, "step": 6317 }, { "epoch": 2.1563139931740616, "grad_norm": 3.5853185653686523, "learning_rate": 0.0002812286689419795, "loss": 5.0054, "step": 6318 }, { "epoch": 2.156655290102389, "grad_norm": 2.9272825717926025, "learning_rate": 0.0002811149032992036, "loss": 5.7341, "step": 6319 }, { "epoch": 2.156996587030717, "grad_norm": 2.8925740718841553, "learning_rate": 0.0002810011376564278, "loss": 6.3701, "step": 6320 }, { "epoch": 2.1573378839590442, "grad_norm": 2.798673152923584, "learning_rate": 0.0002808873720136519, "loss": 5.85, "step": 6321 }, { "epoch": 2.157679180887372, "grad_norm": 2.7680532932281494, "learning_rate": 0.000280773606370876, "loss": 6.048, "step": 6322 }, { "epoch": 2.1580204778156995, "grad_norm": 2.8404247760772705, "learning_rate": 0.00028065984072810015, "loss": 5.9817, "step": 6323 }, { "epoch": 2.1583617747440274, "grad_norm": 3.323213815689087, "learning_rate": 0.00028054607508532425, "loss": 4.1193, "step": 6324 }, { "epoch": 2.1587030716723548, "grad_norm": 3.4093360900878906, "learning_rate": 0.0002804323094425484, "loss": 4.3087, "step": 6325 }, { "epoch": 2.1590443686006826, "grad_norm": 2.8730340003967285, "learning_rate": 0.00028031854379977246, "loss": 6.0826, "step": 6326 }, { "epoch": 2.15938566552901, "grad_norm": 2.8777995109558105, "learning_rate": 0.00028020477815699656, "loss": 6.4141, "step": 6327 }, { "epoch": 2.159726962457338, "grad_norm": 2.9867589473724365, "learning_rate": 0.0002800910125142207, "loss": 6.3958, "step": 6328 }, { "epoch": 2.1600682593856657, "grad_norm": 2.889984369277954, "learning_rate": 0.00027997724687144483, "loss": 5.8593, "step": 6329 }, { "epoch": 2.160409556313993, "grad_norm": 2.9526164531707764, "learning_rate": 0.00027986348122866893, "loss": 5.9088, "step": 6330 }, { "epoch": 2.160750853242321, "grad_norm": 4.757111072540283, "learning_rate": 0.0002797497155858931, "loss": 5.8323, "step": 6331 }, { "epoch": 2.1610921501706484, "grad_norm": 2.8701725006103516, "learning_rate": 0.0002796359499431172, "loss": 6.536, "step": 6332 }, { "epoch": 2.1614334470989762, "grad_norm": 2.827101230621338, "learning_rate": 0.0002795221843003413, "loss": 5.6708, "step": 6333 }, { "epoch": 2.1617747440273036, "grad_norm": 3.0027883052825928, "learning_rate": 0.00027940841865756546, "loss": 5.5514, "step": 6334 }, { "epoch": 2.1621160409556315, "grad_norm": 3.244901418685913, "learning_rate": 0.0002792946530147895, "loss": 5.0491, "step": 6335 }, { "epoch": 2.162457337883959, "grad_norm": 2.8494021892547607, "learning_rate": 0.0002791808873720136, "loss": 6.3514, "step": 6336 }, { "epoch": 2.1627986348122867, "grad_norm": 2.9061434268951416, "learning_rate": 0.0002790671217292378, "loss": 6.184, "step": 6337 }, { "epoch": 2.163139931740614, "grad_norm": 2.848724126815796, "learning_rate": 0.0002789533560864619, "loss": 6.5265, "step": 6338 }, { "epoch": 2.163481228668942, "grad_norm": 3.203709602355957, "learning_rate": 0.000278839590443686, "loss": 6.0744, "step": 6339 }, { "epoch": 2.1638225255972694, "grad_norm": 2.862330198287964, "learning_rate": 0.00027872582480091015, "loss": 6.3895, "step": 6340 }, { "epoch": 2.1641638225255972, "grad_norm": 4.710589408874512, "learning_rate": 0.00027861205915813425, "loss": 4.9895, "step": 6341 }, { "epoch": 2.164505119453925, "grad_norm": 2.8820667266845703, "learning_rate": 0.00027849829351535836, "loss": 5.7645, "step": 6342 }, { "epoch": 2.1648464163822525, "grad_norm": 3.1783809661865234, "learning_rate": 0.0002783845278725825, "loss": 5.7725, "step": 6343 }, { "epoch": 2.1651877133105804, "grad_norm": 2.8277227878570557, "learning_rate": 0.0002782707622298066, "loss": 6.6817, "step": 6344 }, { "epoch": 2.1655290102389078, "grad_norm": 2.84521222114563, "learning_rate": 0.00027815699658703067, "loss": 5.903, "step": 6345 }, { "epoch": 2.1658703071672356, "grad_norm": 2.908289909362793, "learning_rate": 0.00027804323094425483, "loss": 5.6247, "step": 6346 }, { "epoch": 2.166211604095563, "grad_norm": 3.011632204055786, "learning_rate": 0.00027792946530147893, "loss": 6.108, "step": 6347 }, { "epoch": 2.166552901023891, "grad_norm": 2.8673269748687744, "learning_rate": 0.0002778156996587031, "loss": 6.2876, "step": 6348 }, { "epoch": 2.1668941979522183, "grad_norm": 2.8036434650421143, "learning_rate": 0.0002777019340159272, "loss": 6.3993, "step": 6349 }, { "epoch": 2.167235494880546, "grad_norm": 2.8587241172790527, "learning_rate": 0.0002775881683731513, "loss": 6.0677, "step": 6350 }, { "epoch": 2.1675767918088735, "grad_norm": 3.036465883255005, "learning_rate": 0.00027747440273037546, "loss": 5.7799, "step": 6351 }, { "epoch": 2.1679180887372014, "grad_norm": 2.8174219131469727, "learning_rate": 0.00027736063708759957, "loss": 5.7702, "step": 6352 }, { "epoch": 2.168259385665529, "grad_norm": 2.7940962314605713, "learning_rate": 0.00027724687144482367, "loss": 6.4102, "step": 6353 }, { "epoch": 2.1686006825938566, "grad_norm": 4.701676368713379, "learning_rate": 0.00027713310580204783, "loss": 4.1398, "step": 6354 }, { "epoch": 2.1689419795221845, "grad_norm": 2.993300676345825, "learning_rate": 0.0002770193401592719, "loss": 6.4199, "step": 6355 }, { "epoch": 2.169283276450512, "grad_norm": 2.90317440032959, "learning_rate": 0.000276905574516496, "loss": 6.436, "step": 6356 }, { "epoch": 2.1696245733788397, "grad_norm": 2.9535815715789795, "learning_rate": 0.00027679180887372015, "loss": 6.5237, "step": 6357 }, { "epoch": 2.169965870307167, "grad_norm": 2.869474172592163, "learning_rate": 0.00027667804323094425, "loss": 6.0359, "step": 6358 }, { "epoch": 2.170307167235495, "grad_norm": 2.888397693634033, "learning_rate": 0.00027656427758816836, "loss": 6.4275, "step": 6359 }, { "epoch": 2.1706484641638224, "grad_norm": 2.8712522983551025, "learning_rate": 0.0002764505119453925, "loss": 6.1997, "step": 6360 }, { "epoch": 2.1709897610921502, "grad_norm": 2.934269905090332, "learning_rate": 0.0002763367463026166, "loss": 6.4548, "step": 6361 }, { "epoch": 2.1713310580204777, "grad_norm": 2.752089262008667, "learning_rate": 0.0002762229806598407, "loss": 6.0606, "step": 6362 }, { "epoch": 2.1716723549488055, "grad_norm": 3.460238456726074, "learning_rate": 0.0002761092150170649, "loss": 4.7184, "step": 6363 }, { "epoch": 2.172013651877133, "grad_norm": 2.8735015392303467, "learning_rate": 0.00027599544937428894, "loss": 5.8198, "step": 6364 }, { "epoch": 2.1723549488054608, "grad_norm": 2.812506675720215, "learning_rate": 0.00027588168373151304, "loss": 5.6997, "step": 6365 }, { "epoch": 2.172696245733788, "grad_norm": 2.769024133682251, "learning_rate": 0.0002757679180887372, "loss": 6.1244, "step": 6366 }, { "epoch": 2.173037542662116, "grad_norm": 2.8082642555236816, "learning_rate": 0.0002756541524459613, "loss": 6.0761, "step": 6367 }, { "epoch": 2.173378839590444, "grad_norm": 2.7583067417144775, "learning_rate": 0.00027554038680318546, "loss": 5.7482, "step": 6368 }, { "epoch": 2.1737201365187713, "grad_norm": 2.982508659362793, "learning_rate": 0.00027542662116040957, "loss": 5.0801, "step": 6369 }, { "epoch": 2.174061433447099, "grad_norm": 2.7821574211120605, "learning_rate": 0.0002753128555176337, "loss": 6.4252, "step": 6370 }, { "epoch": 2.1744027303754265, "grad_norm": 2.783104419708252, "learning_rate": 0.00027519908987485783, "loss": 5.8358, "step": 6371 }, { "epoch": 2.1747440273037544, "grad_norm": 2.8212077617645264, "learning_rate": 0.00027508532423208194, "loss": 6.0909, "step": 6372 }, { "epoch": 2.175085324232082, "grad_norm": 2.8396553993225098, "learning_rate": 0.00027497155858930604, "loss": 6.2717, "step": 6373 }, { "epoch": 2.1754266211604096, "grad_norm": 4.277924060821533, "learning_rate": 0.00027485779294653015, "loss": 5.4965, "step": 6374 }, { "epoch": 2.175767918088737, "grad_norm": 2.8166744709014893, "learning_rate": 0.00027474402730375425, "loss": 5.7125, "step": 6375 }, { "epoch": 2.176109215017065, "grad_norm": 2.8981945514678955, "learning_rate": 0.00027463026166097836, "loss": 5.2261, "step": 6376 }, { "epoch": 2.1764505119453923, "grad_norm": 2.9114558696746826, "learning_rate": 0.0002745164960182025, "loss": 6.0895, "step": 6377 }, { "epoch": 2.17679180887372, "grad_norm": 3.33648681640625, "learning_rate": 0.0002744027303754266, "loss": 5.1431, "step": 6378 }, { "epoch": 2.1771331058020476, "grad_norm": 2.8457000255584717, "learning_rate": 0.0002742889647326507, "loss": 6.706, "step": 6379 }, { "epoch": 2.1774744027303754, "grad_norm": 2.829103946685791, "learning_rate": 0.0002741751990898749, "loss": 6.4637, "step": 6380 }, { "epoch": 2.1778156996587033, "grad_norm": 2.912069797515869, "learning_rate": 0.000274061433447099, "loss": 6.7109, "step": 6381 }, { "epoch": 2.1781569965870307, "grad_norm": 2.960214853286743, "learning_rate": 0.0002739476678043231, "loss": 5.9655, "step": 6382 }, { "epoch": 2.1784982935153585, "grad_norm": 3.0913033485412598, "learning_rate": 0.00027383390216154725, "loss": 5.843, "step": 6383 }, { "epoch": 2.178839590443686, "grad_norm": 2.850743293762207, "learning_rate": 0.0002737201365187713, "loss": 4.9693, "step": 6384 }, { "epoch": 2.1791808873720138, "grad_norm": 2.808077335357666, "learning_rate": 0.0002736063708759954, "loss": 6.0217, "step": 6385 }, { "epoch": 2.179522184300341, "grad_norm": 2.9761393070220947, "learning_rate": 0.00027349260523321957, "loss": 4.5135, "step": 6386 }, { "epoch": 2.179863481228669, "grad_norm": 2.9906089305877686, "learning_rate": 0.0002733788395904437, "loss": 6.1202, "step": 6387 }, { "epoch": 2.1802047781569964, "grad_norm": 3.4929113388061523, "learning_rate": 0.00027326507394766783, "loss": 5.8859, "step": 6388 }, { "epoch": 2.1805460750853243, "grad_norm": 2.846465826034546, "learning_rate": 0.00027315130830489194, "loss": 6.0745, "step": 6389 }, { "epoch": 2.1808873720136517, "grad_norm": 2.907808780670166, "learning_rate": 0.00027303754266211604, "loss": 5.983, "step": 6390 }, { "epoch": 2.1812286689419795, "grad_norm": 2.858454942703247, "learning_rate": 0.0002729237770193402, "loss": 6.0968, "step": 6391 }, { "epoch": 2.181569965870307, "grad_norm": 2.9495015144348145, "learning_rate": 0.0002728100113765643, "loss": 6.0371, "step": 6392 }, { "epoch": 2.181911262798635, "grad_norm": 3.2573885917663574, "learning_rate": 0.0002726962457337884, "loss": 5.8426, "step": 6393 }, { "epoch": 2.1822525597269626, "grad_norm": 4.468471527099609, "learning_rate": 0.0002725824800910125, "loss": 5.1957, "step": 6394 }, { "epoch": 2.18259385665529, "grad_norm": 2.860956907272339, "learning_rate": 0.0002724687144482366, "loss": 5.7918, "step": 6395 }, { "epoch": 2.182935153583618, "grad_norm": 2.8932464122772217, "learning_rate": 0.00027235494880546073, "loss": 6.7351, "step": 6396 }, { "epoch": 2.1832764505119453, "grad_norm": 3.92891001701355, "learning_rate": 0.0002722411831626849, "loss": 4.5889, "step": 6397 }, { "epoch": 2.183617747440273, "grad_norm": 1.9834781885147095, "learning_rate": 0.000272127417519909, "loss": 3.1828, "step": 6398 }, { "epoch": 2.1839590443686006, "grad_norm": 2.90181827545166, "learning_rate": 0.0002720136518771331, "loss": 6.1316, "step": 6399 }, { "epoch": 2.1843003412969284, "grad_norm": 3.912259817123413, "learning_rate": 0.00027189988623435726, "loss": 5.1916, "step": 6400 }, { "epoch": 2.184641638225256, "grad_norm": 2.8160667419433594, "learning_rate": 0.00027178612059158136, "loss": 5.7501, "step": 6401 }, { "epoch": 2.1849829351535837, "grad_norm": 2.872741222381592, "learning_rate": 0.00027167235494880547, "loss": 6.3647, "step": 6402 }, { "epoch": 2.185324232081911, "grad_norm": 3.7568233013153076, "learning_rate": 0.00027155858930602957, "loss": 5.5898, "step": 6403 }, { "epoch": 2.185665529010239, "grad_norm": 2.84272837638855, "learning_rate": 0.0002714448236632537, "loss": 5.3566, "step": 6404 }, { "epoch": 2.1860068259385663, "grad_norm": 2.7461118698120117, "learning_rate": 0.0002713310580204778, "loss": 5.7219, "step": 6405 }, { "epoch": 2.186348122866894, "grad_norm": 2.8305881023406982, "learning_rate": 0.00027121729237770194, "loss": 6.4276, "step": 6406 }, { "epoch": 2.186689419795222, "grad_norm": 2.7954049110412598, "learning_rate": 0.00027110352673492604, "loss": 6.167, "step": 6407 }, { "epoch": 2.1870307167235494, "grad_norm": 2.9570295810699463, "learning_rate": 0.0002709897610921502, "loss": 5.7226, "step": 6408 }, { "epoch": 2.1873720136518773, "grad_norm": 2.7967288494110107, "learning_rate": 0.0002708759954493743, "loss": 6.2319, "step": 6409 }, { "epoch": 2.1877133105802047, "grad_norm": 6.396007061004639, "learning_rate": 0.0002707622298065984, "loss": 5.3139, "step": 6410 }, { "epoch": 2.1880546075085325, "grad_norm": 2.839141607284546, "learning_rate": 0.00027064846416382257, "loss": 6.323, "step": 6411 }, { "epoch": 2.18839590443686, "grad_norm": 2.803288459777832, "learning_rate": 0.0002705346985210467, "loss": 6.2097, "step": 6412 }, { "epoch": 2.188737201365188, "grad_norm": 2.8960049152374268, "learning_rate": 0.00027042093287827073, "loss": 5.969, "step": 6413 }, { "epoch": 2.189078498293515, "grad_norm": 2.7831318378448486, "learning_rate": 0.0002703071672354949, "loss": 6.3809, "step": 6414 }, { "epoch": 2.189419795221843, "grad_norm": 2.8870432376861572, "learning_rate": 0.000270193401592719, "loss": 6.6029, "step": 6415 }, { "epoch": 2.1897610921501705, "grad_norm": 2.961520195007324, "learning_rate": 0.0002700796359499431, "loss": 6.0527, "step": 6416 }, { "epoch": 2.1901023890784983, "grad_norm": 3.088958978652954, "learning_rate": 0.00026996587030716726, "loss": 5.7707, "step": 6417 }, { "epoch": 2.1904436860068257, "grad_norm": 2.74513840675354, "learning_rate": 0.00026985210466439136, "loss": 5.923, "step": 6418 }, { "epoch": 2.1907849829351536, "grad_norm": 3.138380765914917, "learning_rate": 0.00026973833902161547, "loss": 5.148, "step": 6419 }, { "epoch": 2.1911262798634814, "grad_norm": 3.0224714279174805, "learning_rate": 0.0002696245733788396, "loss": 6.7062, "step": 6420 }, { "epoch": 2.191467576791809, "grad_norm": 2.8007893562316895, "learning_rate": 0.00026951080773606373, "loss": 6.122, "step": 6421 }, { "epoch": 2.1918088737201367, "grad_norm": 2.802536725997925, "learning_rate": 0.00026939704209328784, "loss": 6.1242, "step": 6422 }, { "epoch": 2.192150170648464, "grad_norm": 2.7604641914367676, "learning_rate": 0.00026928327645051194, "loss": 5.7296, "step": 6423 }, { "epoch": 2.192491467576792, "grad_norm": 3.111950159072876, "learning_rate": 0.00026916951080773605, "loss": 5.6642, "step": 6424 }, { "epoch": 2.1928327645051193, "grad_norm": 2.8447318077087402, "learning_rate": 0.00026905574516496015, "loss": 5.7298, "step": 6425 }, { "epoch": 2.193174061433447, "grad_norm": 2.7198376655578613, "learning_rate": 0.0002689419795221843, "loss": 5.9149, "step": 6426 }, { "epoch": 2.1935153583617746, "grad_norm": 2.7918670177459717, "learning_rate": 0.0002688282138794084, "loss": 6.2658, "step": 6427 }, { "epoch": 2.1938566552901024, "grad_norm": 3.1611740589141846, "learning_rate": 0.0002687144482366326, "loss": 4.734, "step": 6428 }, { "epoch": 2.19419795221843, "grad_norm": 2.8715784549713135, "learning_rate": 0.0002686006825938567, "loss": 6.3364, "step": 6429 }, { "epoch": 2.1945392491467577, "grad_norm": 2.958388328552246, "learning_rate": 0.0002684869169510808, "loss": 6.582, "step": 6430 }, { "epoch": 2.194880546075085, "grad_norm": 3.085158109664917, "learning_rate": 0.00026837315130830494, "loss": 5.759, "step": 6431 }, { "epoch": 2.195221843003413, "grad_norm": 2.9389808177948, "learning_rate": 0.00026825938566552905, "loss": 5.6247, "step": 6432 }, { "epoch": 2.195563139931741, "grad_norm": 4.009513854980469, "learning_rate": 0.0002681456200227531, "loss": 4.9773, "step": 6433 }, { "epoch": 2.195904436860068, "grad_norm": 2.732940196990967, "learning_rate": 0.00026803185437997726, "loss": 5.8776, "step": 6434 }, { "epoch": 2.196245733788396, "grad_norm": 2.8661234378814697, "learning_rate": 0.00026791808873720136, "loss": 6.5482, "step": 6435 }, { "epoch": 2.1965870307167235, "grad_norm": 2.915010690689087, "learning_rate": 0.00026780432309442547, "loss": 6.4809, "step": 6436 }, { "epoch": 2.1969283276450513, "grad_norm": 2.878875255584717, "learning_rate": 0.0002676905574516496, "loss": 6.1371, "step": 6437 }, { "epoch": 2.1972696245733787, "grad_norm": 3.3660991191864014, "learning_rate": 0.00026757679180887373, "loss": 6.0642, "step": 6438 }, { "epoch": 2.1976109215017066, "grad_norm": 2.9850966930389404, "learning_rate": 0.00026746302616609784, "loss": 5.561, "step": 6439 }, { "epoch": 2.197952218430034, "grad_norm": 2.8198862075805664, "learning_rate": 0.000267349260523322, "loss": 6.2445, "step": 6440 }, { "epoch": 2.198293515358362, "grad_norm": 3.0752429962158203, "learning_rate": 0.0002672354948805461, "loss": 5.486, "step": 6441 }, { "epoch": 2.198634812286689, "grad_norm": 2.9619510173797607, "learning_rate": 0.00026712172923777015, "loss": 5.3807, "step": 6442 }, { "epoch": 2.198976109215017, "grad_norm": 2.8163340091705322, "learning_rate": 0.0002670079635949943, "loss": 6.5359, "step": 6443 }, { "epoch": 2.1993174061433445, "grad_norm": 2.8125815391540527, "learning_rate": 0.0002668941979522184, "loss": 6.039, "step": 6444 }, { "epoch": 2.1996587030716723, "grad_norm": 2.7999801635742188, "learning_rate": 0.0002667804323094425, "loss": 6.1154, "step": 6445 }, { "epoch": 2.2, "grad_norm": 2.8857717514038086, "learning_rate": 0.0002666666666666667, "loss": 5.9834, "step": 6446 }, { "epoch": 2.2003412969283276, "grad_norm": 2.783299446105957, "learning_rate": 0.0002665529010238908, "loss": 6.3258, "step": 6447 }, { "epoch": 2.2006825938566554, "grad_norm": 2.7714779376983643, "learning_rate": 0.00026643913538111494, "loss": 6.0787, "step": 6448 }, { "epoch": 2.201023890784983, "grad_norm": 2.6970479488372803, "learning_rate": 0.00026632536973833905, "loss": 6.2198, "step": 6449 }, { "epoch": 2.2013651877133107, "grad_norm": 2.753514051437378, "learning_rate": 0.00026621160409556315, "loss": 5.8685, "step": 6450 }, { "epoch": 2.201706484641638, "grad_norm": 2.866901159286499, "learning_rate": 0.0002660978384527873, "loss": 6.4346, "step": 6451 }, { "epoch": 2.202047781569966, "grad_norm": 2.863551139831543, "learning_rate": 0.00026598407281001136, "loss": 5.7247, "step": 6452 }, { "epoch": 2.2023890784982934, "grad_norm": 2.88744854927063, "learning_rate": 0.00026587030716723547, "loss": 5.9772, "step": 6453 }, { "epoch": 2.202730375426621, "grad_norm": 3.1774702072143555, "learning_rate": 0.00026575654152445963, "loss": 5.8586, "step": 6454 }, { "epoch": 2.2030716723549486, "grad_norm": 2.86907958984375, "learning_rate": 0.00026564277588168373, "loss": 6.8104, "step": 6455 }, { "epoch": 2.2034129692832765, "grad_norm": 3.6283483505249023, "learning_rate": 0.00026552901023890784, "loss": 4.1912, "step": 6456 }, { "epoch": 2.203754266211604, "grad_norm": 2.87131404876709, "learning_rate": 0.000265415244596132, "loss": 6.1613, "step": 6457 }, { "epoch": 2.2040955631399317, "grad_norm": 2.80851149559021, "learning_rate": 0.0002653014789533561, "loss": 5.8281, "step": 6458 }, { "epoch": 2.2044368600682596, "grad_norm": 2.9386231899261475, "learning_rate": 0.0002651877133105802, "loss": 6.7991, "step": 6459 }, { "epoch": 2.204778156996587, "grad_norm": 2.9392945766448975, "learning_rate": 0.00026507394766780437, "loss": 6.1992, "step": 6460 }, { "epoch": 2.205119453924915, "grad_norm": 2.8204219341278076, "learning_rate": 0.00026496018202502847, "loss": 6.0136, "step": 6461 }, { "epoch": 2.2054607508532422, "grad_norm": 2.826657295227051, "learning_rate": 0.0002648464163822525, "loss": 5.8623, "step": 6462 }, { "epoch": 2.20580204778157, "grad_norm": 2.8905653953552246, "learning_rate": 0.0002647326507394767, "loss": 6.2452, "step": 6463 }, { "epoch": 2.2061433447098975, "grad_norm": 2.7385172843933105, "learning_rate": 0.0002646188850967008, "loss": 5.9435, "step": 6464 }, { "epoch": 2.2064846416382253, "grad_norm": 2.80544114112854, "learning_rate": 0.0002645051194539249, "loss": 5.9384, "step": 6465 }, { "epoch": 2.2068259385665527, "grad_norm": 2.877896785736084, "learning_rate": 0.00026439135381114905, "loss": 5.8879, "step": 6466 }, { "epoch": 2.2071672354948806, "grad_norm": 2.7671401500701904, "learning_rate": 0.00026427758816837315, "loss": 6.2006, "step": 6467 }, { "epoch": 2.207508532423208, "grad_norm": 2.8723201751708984, "learning_rate": 0.00026416382252559726, "loss": 6.475, "step": 6468 }, { "epoch": 2.207849829351536, "grad_norm": 2.728503704071045, "learning_rate": 0.0002640500568828214, "loss": 5.9604, "step": 6469 }, { "epoch": 2.2081911262798632, "grad_norm": 2.96976900100708, "learning_rate": 0.0002639362912400455, "loss": 5.7367, "step": 6470 }, { "epoch": 2.208532423208191, "grad_norm": 5.522859573364258, "learning_rate": 0.00026382252559726963, "loss": 4.7263, "step": 6471 }, { "epoch": 2.208873720136519, "grad_norm": 3.003938674926758, "learning_rate": 0.00026370875995449373, "loss": 5.9086, "step": 6472 }, { "epoch": 2.2092150170648464, "grad_norm": 2.7639968395233154, "learning_rate": 0.00026359499431171784, "loss": 5.8062, "step": 6473 }, { "epoch": 2.209556313993174, "grad_norm": 2.8196215629577637, "learning_rate": 0.000263481228668942, "loss": 6.236, "step": 6474 }, { "epoch": 2.2098976109215016, "grad_norm": 6.847387790679932, "learning_rate": 0.0002633674630261661, "loss": 5.3268, "step": 6475 }, { "epoch": 2.2102389078498295, "grad_norm": 2.8416600227355957, "learning_rate": 0.0002632536973833902, "loss": 5.8468, "step": 6476 }, { "epoch": 2.210580204778157, "grad_norm": 7.3218278884887695, "learning_rate": 0.00026313993174061437, "loss": 4.5238, "step": 6477 }, { "epoch": 2.2109215017064847, "grad_norm": 2.832921266555786, "learning_rate": 0.00026302616609783847, "loss": 5.7101, "step": 6478 }, { "epoch": 2.211262798634812, "grad_norm": 2.810177803039551, "learning_rate": 0.0002629124004550626, "loss": 5.9935, "step": 6479 }, { "epoch": 2.21160409556314, "grad_norm": 2.747629404067993, "learning_rate": 0.00026279863481228674, "loss": 6.4255, "step": 6480 }, { "epoch": 2.2119453924914674, "grad_norm": 2.820760726928711, "learning_rate": 0.0002626848691695108, "loss": 5.8886, "step": 6481 }, { "epoch": 2.2122866894197952, "grad_norm": 3.057654857635498, "learning_rate": 0.0002625711035267349, "loss": 5.4938, "step": 6482 }, { "epoch": 2.2126279863481226, "grad_norm": 3.943042755126953, "learning_rate": 0.00026245733788395905, "loss": 5.5298, "step": 6483 }, { "epoch": 2.2129692832764505, "grad_norm": 2.72080135345459, "learning_rate": 0.00026234357224118316, "loss": 6.1378, "step": 6484 }, { "epoch": 2.2133105802047783, "grad_norm": 2.7657735347747803, "learning_rate": 0.00026222980659840726, "loss": 6.1694, "step": 6485 }, { "epoch": 2.2136518771331057, "grad_norm": 2.795632839202881, "learning_rate": 0.0002621160409556314, "loss": 6.1866, "step": 6486 }, { "epoch": 2.2139931740614336, "grad_norm": 2.779759407043457, "learning_rate": 0.0002620022753128555, "loss": 5.7196, "step": 6487 }, { "epoch": 2.214334470989761, "grad_norm": 2.8835349082946777, "learning_rate": 0.00026188850967007963, "loss": 6.2627, "step": 6488 }, { "epoch": 2.214675767918089, "grad_norm": 5.428569316864014, "learning_rate": 0.0002617747440273038, "loss": 5.1553, "step": 6489 }, { "epoch": 2.2150170648464163, "grad_norm": 2.7858505249023438, "learning_rate": 0.0002616609783845279, "loss": 6.2648, "step": 6490 }, { "epoch": 2.215358361774744, "grad_norm": 2.827878475189209, "learning_rate": 0.000261547212741752, "loss": 6.1736, "step": 6491 }, { "epoch": 2.2156996587030715, "grad_norm": 2.7949113845825195, "learning_rate": 0.0002614334470989761, "loss": 6.4478, "step": 6492 }, { "epoch": 2.2160409556313994, "grad_norm": 3.1486802101135254, "learning_rate": 0.0002613196814562002, "loss": 5.8937, "step": 6493 }, { "epoch": 2.2163822525597268, "grad_norm": 2.8336243629455566, "learning_rate": 0.00026120591581342437, "loss": 6.8206, "step": 6494 }, { "epoch": 2.2167235494880546, "grad_norm": 2.8043153285980225, "learning_rate": 0.00026109215017064847, "loss": 6.1785, "step": 6495 }, { "epoch": 2.217064846416382, "grad_norm": 2.822309732437134, "learning_rate": 0.0002609783845278726, "loss": 5.5858, "step": 6496 }, { "epoch": 2.21740614334471, "grad_norm": 3.1378068923950195, "learning_rate": 0.00026086461888509674, "loss": 5.7572, "step": 6497 }, { "epoch": 2.2177474402730377, "grad_norm": 2.852703332901001, "learning_rate": 0.00026075085324232084, "loss": 6.8021, "step": 6498 }, { "epoch": 2.218088737201365, "grad_norm": 2.8655879497528076, "learning_rate": 0.00026063708759954495, "loss": 5.816, "step": 6499 }, { "epoch": 2.218430034129693, "grad_norm": 2.730862855911255, "learning_rate": 0.0002605233219567691, "loss": 6.2995, "step": 6500 }, { "epoch": 2.2187713310580204, "grad_norm": 2.8955562114715576, "learning_rate": 0.00026040955631399316, "loss": 6.3926, "step": 6501 }, { "epoch": 2.2191126279863482, "grad_norm": 2.799837350845337, "learning_rate": 0.00026029579067121726, "loss": 6.1799, "step": 6502 }, { "epoch": 2.2194539249146756, "grad_norm": 2.7385647296905518, "learning_rate": 0.0002601820250284414, "loss": 6.1926, "step": 6503 }, { "epoch": 2.2197952218430035, "grad_norm": 2.808077096939087, "learning_rate": 0.0002600682593856655, "loss": 6.1108, "step": 6504 }, { "epoch": 2.220136518771331, "grad_norm": 2.8860621452331543, "learning_rate": 0.00025995449374288963, "loss": 6.0537, "step": 6505 }, { "epoch": 2.2204778156996587, "grad_norm": 2.850010871887207, "learning_rate": 0.0002598407281001138, "loss": 6.5343, "step": 6506 }, { "epoch": 2.220819112627986, "grad_norm": 2.805081367492676, "learning_rate": 0.0002597269624573379, "loss": 6.2704, "step": 6507 }, { "epoch": 2.221160409556314, "grad_norm": 3.074279308319092, "learning_rate": 0.000259613196814562, "loss": 5.9484, "step": 6508 }, { "epoch": 2.2215017064846414, "grad_norm": 2.8078973293304443, "learning_rate": 0.00025949943117178616, "loss": 5.8503, "step": 6509 }, { "epoch": 2.2218430034129693, "grad_norm": 2.807574987411499, "learning_rate": 0.0002593856655290102, "loss": 5.7379, "step": 6510 }, { "epoch": 2.222184300341297, "grad_norm": 2.841683864593506, "learning_rate": 0.00025927189988623437, "loss": 6.2235, "step": 6511 }, { "epoch": 2.2225255972696245, "grad_norm": 2.873955726623535, "learning_rate": 0.0002591581342434585, "loss": 5.6057, "step": 6512 }, { "epoch": 2.2228668941979524, "grad_norm": 2.908351421356201, "learning_rate": 0.0002590443686006826, "loss": 5.904, "step": 6513 }, { "epoch": 2.2232081911262798, "grad_norm": 7.525314807891846, "learning_rate": 0.00025893060295790674, "loss": 5.3918, "step": 6514 }, { "epoch": 2.2235494880546076, "grad_norm": 2.931612253189087, "learning_rate": 0.00025881683731513084, "loss": 5.8895, "step": 6515 }, { "epoch": 2.223890784982935, "grad_norm": 2.975277900695801, "learning_rate": 0.00025870307167235495, "loss": 6.1299, "step": 6516 }, { "epoch": 2.224232081911263, "grad_norm": 2.8416590690612793, "learning_rate": 0.0002585893060295791, "loss": 6.2545, "step": 6517 }, { "epoch": 2.2245733788395903, "grad_norm": 2.888201951980591, "learning_rate": 0.0002584755403868032, "loss": 6.193, "step": 6518 }, { "epoch": 2.224914675767918, "grad_norm": 2.7630908489227295, "learning_rate": 0.0002583617747440273, "loss": 5.9981, "step": 6519 }, { "epoch": 2.2252559726962455, "grad_norm": 2.9234695434570312, "learning_rate": 0.0002582480091012514, "loss": 6.2954, "step": 6520 }, { "epoch": 2.2255972696245734, "grad_norm": 3.6878268718719482, "learning_rate": 0.0002581342434584755, "loss": 5.1989, "step": 6521 }, { "epoch": 2.225938566552901, "grad_norm": 2.8937203884124756, "learning_rate": 0.00025802047781569963, "loss": 6.15, "step": 6522 }, { "epoch": 2.2262798634812286, "grad_norm": 2.8810667991638184, "learning_rate": 0.0002579067121729238, "loss": 6.2176, "step": 6523 }, { "epoch": 2.2266211604095565, "grad_norm": 3.0311129093170166, "learning_rate": 0.0002577929465301479, "loss": 5.6466, "step": 6524 }, { "epoch": 2.226962457337884, "grad_norm": 3.263695001602173, "learning_rate": 0.000257679180887372, "loss": 4.1231, "step": 6525 }, { "epoch": 2.2273037542662117, "grad_norm": 2.8339765071868896, "learning_rate": 0.00025756541524459616, "loss": 6.2549, "step": 6526 }, { "epoch": 2.227645051194539, "grad_norm": 2.7960495948791504, "learning_rate": 0.00025745164960182026, "loss": 6.3462, "step": 6527 }, { "epoch": 2.227986348122867, "grad_norm": 2.9964165687561035, "learning_rate": 0.00025733788395904437, "loss": 5.2995, "step": 6528 }, { "epoch": 2.2283276450511944, "grad_norm": 2.8184449672698975, "learning_rate": 0.00025722411831626853, "loss": 6.5219, "step": 6529 }, { "epoch": 2.2286689419795223, "grad_norm": 2.7724874019622803, "learning_rate": 0.0002571103526734926, "loss": 6.0435, "step": 6530 }, { "epoch": 2.2290102389078497, "grad_norm": 3.961784601211548, "learning_rate": 0.0002569965870307167, "loss": 5.7937, "step": 6531 }, { "epoch": 2.2293515358361775, "grad_norm": 2.875619888305664, "learning_rate": 0.00025688282138794084, "loss": 6.6192, "step": 6532 }, { "epoch": 2.229692832764505, "grad_norm": 2.870389699935913, "learning_rate": 0.00025676905574516495, "loss": 6.4559, "step": 6533 }, { "epoch": 2.2300341296928328, "grad_norm": 3.843813896179199, "learning_rate": 0.0002566552901023891, "loss": 4.74, "step": 6534 }, { "epoch": 2.2303754266211606, "grad_norm": 3.107849597930908, "learning_rate": 0.0002565415244596132, "loss": 5.6221, "step": 6535 }, { "epoch": 2.230716723549488, "grad_norm": 2.7651352882385254, "learning_rate": 0.0002564277588168373, "loss": 6.2125, "step": 6536 }, { "epoch": 2.231058020477816, "grad_norm": 2.9481201171875, "learning_rate": 0.0002563139931740615, "loss": 5.7552, "step": 6537 }, { "epoch": 2.2313993174061433, "grad_norm": 2.9435575008392334, "learning_rate": 0.0002562002275312856, "loss": 6.1869, "step": 6538 }, { "epoch": 2.231740614334471, "grad_norm": 2.826810359954834, "learning_rate": 0.0002560864618885097, "loss": 6.1379, "step": 6539 }, { "epoch": 2.2320819112627985, "grad_norm": 2.7598679065704346, "learning_rate": 0.0002559726962457338, "loss": 6.0253, "step": 6540 }, { "epoch": 2.2324232081911264, "grad_norm": 2.83957576751709, "learning_rate": 0.0002558589306029579, "loss": 5.9333, "step": 6541 }, { "epoch": 2.232764505119454, "grad_norm": 3.761160135269165, "learning_rate": 0.000255745164960182, "loss": 5.4008, "step": 6542 }, { "epoch": 2.2331058020477816, "grad_norm": 2.7840142250061035, "learning_rate": 0.00025563139931740616, "loss": 6.4198, "step": 6543 }, { "epoch": 2.233447098976109, "grad_norm": 2.7931652069091797, "learning_rate": 0.00025551763367463026, "loss": 6.5151, "step": 6544 }, { "epoch": 2.233788395904437, "grad_norm": 2.733041286468506, "learning_rate": 0.00025540386803185437, "loss": 6.168, "step": 6545 }, { "epoch": 2.2341296928327643, "grad_norm": 3.0885894298553467, "learning_rate": 0.00025529010238907853, "loss": 5.4425, "step": 6546 }, { "epoch": 2.234470989761092, "grad_norm": 2.77445912361145, "learning_rate": 0.00025517633674630263, "loss": 6.0673, "step": 6547 }, { "epoch": 2.23481228668942, "grad_norm": 2.836225986480713, "learning_rate": 0.00025506257110352674, "loss": 6.3828, "step": 6548 }, { "epoch": 2.2351535836177474, "grad_norm": 2.814570903778076, "learning_rate": 0.00025494880546075084, "loss": 6.2877, "step": 6549 }, { "epoch": 2.2354948805460753, "grad_norm": 2.966297149658203, "learning_rate": 0.00025483503981797495, "loss": 6.2828, "step": 6550 }, { "epoch": 2.2358361774744027, "grad_norm": 2.8446590900421143, "learning_rate": 0.00025472127417519905, "loss": 5.9261, "step": 6551 }, { "epoch": 2.2361774744027305, "grad_norm": 2.9468929767608643, "learning_rate": 0.0002546075085324232, "loss": 6.1299, "step": 6552 }, { "epoch": 2.236518771331058, "grad_norm": 2.959243059158325, "learning_rate": 0.0002544937428896473, "loss": 5.9142, "step": 6553 }, { "epoch": 2.2368600682593858, "grad_norm": 2.908616781234741, "learning_rate": 0.0002543799772468715, "loss": 5.9464, "step": 6554 }, { "epoch": 2.237201365187713, "grad_norm": 2.857999563217163, "learning_rate": 0.0002542662116040956, "loss": 6.5652, "step": 6555 }, { "epoch": 2.237542662116041, "grad_norm": 2.829493522644043, "learning_rate": 0.0002541524459613197, "loss": 5.7772, "step": 6556 }, { "epoch": 2.2378839590443684, "grad_norm": 2.8293933868408203, "learning_rate": 0.00025403868031854385, "loss": 6.0328, "step": 6557 }, { "epoch": 2.2382252559726963, "grad_norm": 2.949575662612915, "learning_rate": 0.00025392491467576795, "loss": 6.5024, "step": 6558 }, { "epoch": 2.2385665529010237, "grad_norm": 3.105191946029663, "learning_rate": 0.000253811149032992, "loss": 6.3816, "step": 6559 }, { "epoch": 2.2389078498293515, "grad_norm": 2.836264133453369, "learning_rate": 0.00025369738339021616, "loss": 6.4266, "step": 6560 }, { "epoch": 2.2392491467576794, "grad_norm": 2.8078081607818604, "learning_rate": 0.00025358361774744027, "loss": 6.5147, "step": 6561 }, { "epoch": 2.239590443686007, "grad_norm": 2.8312695026397705, "learning_rate": 0.00025346985210466437, "loss": 6.6833, "step": 6562 }, { "epoch": 2.2399317406143346, "grad_norm": 3.007437229156494, "learning_rate": 0.00025335608646188853, "loss": 5.3381, "step": 6563 }, { "epoch": 2.240273037542662, "grad_norm": 3.071810722351074, "learning_rate": 0.00025324232081911263, "loss": 5.3215, "step": 6564 }, { "epoch": 2.24061433447099, "grad_norm": 2.7744386196136475, "learning_rate": 0.00025312855517633674, "loss": 6.1504, "step": 6565 }, { "epoch": 2.2409556313993173, "grad_norm": 2.710651159286499, "learning_rate": 0.0002530147895335609, "loss": 6.2226, "step": 6566 }, { "epoch": 2.241296928327645, "grad_norm": 2.8886449337005615, "learning_rate": 0.000252901023890785, "loss": 6.6402, "step": 6567 }, { "epoch": 2.2416382252559726, "grad_norm": 2.7804300785064697, "learning_rate": 0.0002527872582480091, "loss": 5.756, "step": 6568 }, { "epoch": 2.2419795221843004, "grad_norm": 2.8473620414733887, "learning_rate": 0.0002526734926052332, "loss": 6.4581, "step": 6569 }, { "epoch": 2.242320819112628, "grad_norm": 2.7306015491485596, "learning_rate": 0.0002525597269624573, "loss": 5.8941, "step": 6570 }, { "epoch": 2.2426621160409557, "grad_norm": 4.8221516609191895, "learning_rate": 0.0002524459613196814, "loss": 4.8643, "step": 6571 }, { "epoch": 2.243003412969283, "grad_norm": 2.7513792514801025, "learning_rate": 0.0002523321956769056, "loss": 5.9781, "step": 6572 }, { "epoch": 2.243344709897611, "grad_norm": 2.6935529708862305, "learning_rate": 0.0002522184300341297, "loss": 6.2471, "step": 6573 }, { "epoch": 2.2436860068259388, "grad_norm": 2.7685117721557617, "learning_rate": 0.00025210466439135385, "loss": 6.2081, "step": 6574 }, { "epoch": 2.244027303754266, "grad_norm": 2.770594596862793, "learning_rate": 0.00025199089874857795, "loss": 5.9571, "step": 6575 }, { "epoch": 2.244368600682594, "grad_norm": 2.826627731323242, "learning_rate": 0.00025187713310580206, "loss": 5.9076, "step": 6576 }, { "epoch": 2.2447098976109214, "grad_norm": 2.8154921531677246, "learning_rate": 0.0002517633674630262, "loss": 5.9773, "step": 6577 }, { "epoch": 2.2450511945392493, "grad_norm": 2.681960105895996, "learning_rate": 0.00025164960182025027, "loss": 5.4657, "step": 6578 }, { "epoch": 2.2453924914675767, "grad_norm": 2.780457019805908, "learning_rate": 0.00025153583617747437, "loss": 6.2352, "step": 6579 }, { "epoch": 2.2457337883959045, "grad_norm": 2.8633296489715576, "learning_rate": 0.00025142207053469853, "loss": 6.102, "step": 6580 }, { "epoch": 2.246075085324232, "grad_norm": 3.040717124938965, "learning_rate": 0.00025130830489192264, "loss": 5.2462, "step": 6581 }, { "epoch": 2.24641638225256, "grad_norm": 2.7666218280792236, "learning_rate": 0.00025119453924914674, "loss": 5.4919, "step": 6582 }, { "epoch": 2.246757679180887, "grad_norm": 2.838775157928467, "learning_rate": 0.0002510807736063709, "loss": 6.2391, "step": 6583 }, { "epoch": 2.247098976109215, "grad_norm": 2.8280463218688965, "learning_rate": 0.000250967007963595, "loss": 6.3613, "step": 6584 }, { "epoch": 2.2474402730375425, "grad_norm": 2.8792102336883545, "learning_rate": 0.0002508532423208191, "loss": 5.8355, "step": 6585 }, { "epoch": 2.2477815699658703, "grad_norm": 2.7618908882141113, "learning_rate": 0.00025073947667804327, "loss": 6.0076, "step": 6586 }, { "epoch": 2.248122866894198, "grad_norm": 2.7827494144439697, "learning_rate": 0.0002506257110352674, "loss": 4.3666, "step": 6587 }, { "epoch": 2.2484641638225256, "grad_norm": 2.926374673843384, "learning_rate": 0.0002505119453924914, "loss": 6.048, "step": 6588 }, { "epoch": 2.2488054607508534, "grad_norm": 2.982754945755005, "learning_rate": 0.0002503981797497156, "loss": 5.8397, "step": 6589 }, { "epoch": 2.249146757679181, "grad_norm": 2.973355770111084, "learning_rate": 0.0002502844141069397, "loss": 5.4961, "step": 6590 }, { "epoch": 2.2494880546075087, "grad_norm": 2.7500498294830322, "learning_rate": 0.0002501706484641638, "loss": 5.631, "step": 6591 }, { "epoch": 2.249829351535836, "grad_norm": 2.882366180419922, "learning_rate": 0.00025005688282138795, "loss": 5.8569, "step": 6592 }, { "epoch": 2.250170648464164, "grad_norm": 2.73225736618042, "learning_rate": 0.00024994311717861206, "loss": 6.4002, "step": 6593 }, { "epoch": 2.2505119453924913, "grad_norm": 2.800004720687866, "learning_rate": 0.0002498293515358362, "loss": 6.4107, "step": 6594 }, { "epoch": 2.250853242320819, "grad_norm": 2.6945483684539795, "learning_rate": 0.00024971558589306027, "loss": 6.414, "step": 6595 }, { "epoch": 2.2511945392491466, "grad_norm": 2.850060224533081, "learning_rate": 0.0002496018202502844, "loss": 5.8978, "step": 6596 }, { "epoch": 2.2515358361774744, "grad_norm": 2.810472249984741, "learning_rate": 0.00024948805460750853, "loss": 5.7288, "step": 6597 }, { "epoch": 2.2518771331058023, "grad_norm": 4.052561283111572, "learning_rate": 0.00024937428896473264, "loss": 5.1013, "step": 6598 }, { "epoch": 2.2522184300341297, "grad_norm": 3.0097126960754395, "learning_rate": 0.0002492605233219568, "loss": 5.8071, "step": 6599 }, { "epoch": 2.252559726962457, "grad_norm": 2.8566362857818604, "learning_rate": 0.0002491467576791809, "loss": 6.1921, "step": 6600 }, { "epoch": 2.252901023890785, "grad_norm": 3.514784336090088, "learning_rate": 0.000249032992036405, "loss": 4.8535, "step": 6601 }, { "epoch": 2.253242320819113, "grad_norm": 3.010627031326294, "learning_rate": 0.0002489192263936291, "loss": 6.0636, "step": 6602 }, { "epoch": 2.25358361774744, "grad_norm": 2.843026876449585, "learning_rate": 0.00024880546075085327, "loss": 5.8824, "step": 6603 }, { "epoch": 2.253924914675768, "grad_norm": 2.940535306930542, "learning_rate": 0.0002486916951080774, "loss": 5.8778, "step": 6604 }, { "epoch": 2.2542662116040955, "grad_norm": 2.917555570602417, "learning_rate": 0.0002485779294653015, "loss": 6.1297, "step": 6605 }, { "epoch": 2.2546075085324233, "grad_norm": 3.041822910308838, "learning_rate": 0.0002484641638225256, "loss": 5.5592, "step": 6606 }, { "epoch": 2.2549488054607507, "grad_norm": 2.8139913082122803, "learning_rate": 0.00024835039817974974, "loss": 6.2674, "step": 6607 }, { "epoch": 2.2552901023890786, "grad_norm": 2.7989606857299805, "learning_rate": 0.00024823663253697385, "loss": 5.9394, "step": 6608 }, { "epoch": 2.255631399317406, "grad_norm": 2.64013671875, "learning_rate": 0.00024812286689419795, "loss": 6.1189, "step": 6609 }, { "epoch": 2.255972696245734, "grad_norm": 2.7237131595611572, "learning_rate": 0.00024800910125142206, "loss": 5.7884, "step": 6610 }, { "epoch": 2.2563139931740617, "grad_norm": 4.8718719482421875, "learning_rate": 0.00024789533560864616, "loss": 6.2122, "step": 6611 }, { "epoch": 2.256655290102389, "grad_norm": 3.137288808822632, "learning_rate": 0.0002477815699658703, "loss": 6.0289, "step": 6612 }, { "epoch": 2.2569965870307165, "grad_norm": 2.8663458824157715, "learning_rate": 0.00024766780432309443, "loss": 5.903, "step": 6613 }, { "epoch": 2.2573378839590443, "grad_norm": 2.8547210693359375, "learning_rate": 0.0002475540386803186, "loss": 6.4314, "step": 6614 }, { "epoch": 2.257679180887372, "grad_norm": 2.7361135482788086, "learning_rate": 0.00024744027303754264, "loss": 5.6617, "step": 6615 }, { "epoch": 2.2580204778156996, "grad_norm": 3.2815380096435547, "learning_rate": 0.0002473265073947668, "loss": 5.6537, "step": 6616 }, { "epoch": 2.2583617747440274, "grad_norm": 2.7974159717559814, "learning_rate": 0.0002472127417519909, "loss": 6.072, "step": 6617 }, { "epoch": 2.258703071672355, "grad_norm": 2.8251793384552, "learning_rate": 0.000247098976109215, "loss": 6.4367, "step": 6618 }, { "epoch": 2.2590443686006827, "grad_norm": 2.9200313091278076, "learning_rate": 0.00024698521046643917, "loss": 5.9348, "step": 6619 }, { "epoch": 2.25938566552901, "grad_norm": 3.057755708694458, "learning_rate": 0.00024687144482366327, "loss": 5.5646, "step": 6620 }, { "epoch": 2.259726962457338, "grad_norm": 2.739042043685913, "learning_rate": 0.0002467576791808874, "loss": 6.5243, "step": 6621 }, { "epoch": 2.2600682593856654, "grad_norm": 2.8239619731903076, "learning_rate": 0.0002466439135381115, "loss": 5.7505, "step": 6622 }, { "epoch": 2.260409556313993, "grad_norm": 2.8290717601776123, "learning_rate": 0.00024653014789533564, "loss": 5.9059, "step": 6623 }, { "epoch": 2.260750853242321, "grad_norm": 3.1619184017181396, "learning_rate": 0.00024641638225255974, "loss": 5.6338, "step": 6624 }, { "epoch": 2.2610921501706485, "grad_norm": 2.940603256225586, "learning_rate": 0.00024630261660978385, "loss": 5.4515, "step": 6625 }, { "epoch": 2.261433447098976, "grad_norm": 3.02312970161438, "learning_rate": 0.00024618885096700795, "loss": 5.0054, "step": 6626 }, { "epoch": 2.2617747440273037, "grad_norm": 2.7911484241485596, "learning_rate": 0.0002460750853242321, "loss": 6.7703, "step": 6627 }, { "epoch": 2.2621160409556316, "grad_norm": 2.8118064403533936, "learning_rate": 0.0002459613196814562, "loss": 5.8083, "step": 6628 }, { "epoch": 2.262457337883959, "grad_norm": 2.7824995517730713, "learning_rate": 0.0002458475540386803, "loss": 5.9023, "step": 6629 }, { "epoch": 2.262798634812287, "grad_norm": 2.8783822059631348, "learning_rate": 0.00024573378839590443, "loss": 6.2359, "step": 6630 }, { "epoch": 2.2631399317406142, "grad_norm": 2.9949822425842285, "learning_rate": 0.00024562002275312853, "loss": 5.0531, "step": 6631 }, { "epoch": 2.263481228668942, "grad_norm": 2.8121888637542725, "learning_rate": 0.0002455062571103527, "loss": 5.86, "step": 6632 }, { "epoch": 2.2638225255972695, "grad_norm": 2.776623249053955, "learning_rate": 0.0002453924914675768, "loss": 6.3007, "step": 6633 }, { "epoch": 2.2641638225255973, "grad_norm": 2.7938060760498047, "learning_rate": 0.0002452787258248009, "loss": 6.3209, "step": 6634 }, { "epoch": 2.2645051194539247, "grad_norm": 2.750595808029175, "learning_rate": 0.000245164960182025, "loss": 6.1808, "step": 6635 }, { "epoch": 2.2648464163822526, "grad_norm": 2.7990636825561523, "learning_rate": 0.00024505119453924917, "loss": 6.4628, "step": 6636 }, { "epoch": 2.2651877133105804, "grad_norm": 2.674833059310913, "learning_rate": 0.00024493742889647327, "loss": 6.1304, "step": 6637 }, { "epoch": 2.265529010238908, "grad_norm": 2.6972098350524902, "learning_rate": 0.0002448236632536974, "loss": 6.4638, "step": 6638 }, { "epoch": 2.2658703071672353, "grad_norm": 2.8341851234436035, "learning_rate": 0.0002447098976109215, "loss": 6.2938, "step": 6639 }, { "epoch": 2.266211604095563, "grad_norm": 2.8352243900299072, "learning_rate": 0.00024459613196814564, "loss": 5.7276, "step": 6640 }, { "epoch": 2.266552901023891, "grad_norm": 2.8336145877838135, "learning_rate": 0.00024448236632536975, "loss": 5.9019, "step": 6641 }, { "epoch": 2.2668941979522184, "grad_norm": 2.872852087020874, "learning_rate": 0.00024436860068259385, "loss": 6.4402, "step": 6642 }, { "epoch": 2.267235494880546, "grad_norm": 2.7706029415130615, "learning_rate": 0.000244254835039818, "loss": 5.992, "step": 6643 }, { "epoch": 2.2675767918088736, "grad_norm": 2.805307149887085, "learning_rate": 0.00024414106939704206, "loss": 5.5684, "step": 6644 }, { "epoch": 2.2679180887372015, "grad_norm": 2.671431303024292, "learning_rate": 0.00024402730375426622, "loss": 5.8068, "step": 6645 }, { "epoch": 2.268259385665529, "grad_norm": 2.8474628925323486, "learning_rate": 0.00024391353811149032, "loss": 6.6582, "step": 6646 }, { "epoch": 2.2686006825938567, "grad_norm": 2.825212240219116, "learning_rate": 0.00024379977246871446, "loss": 6.1744, "step": 6647 }, { "epoch": 2.268941979522184, "grad_norm": 2.828723669052124, "learning_rate": 0.0002436860068259386, "loss": 5.794, "step": 6648 }, { "epoch": 2.269283276450512, "grad_norm": 2.701645612716675, "learning_rate": 0.00024357224118316267, "loss": 6.222, "step": 6649 }, { "epoch": 2.26962457337884, "grad_norm": 2.8495328426361084, "learning_rate": 0.0002434584755403868, "loss": 6.0397, "step": 6650 }, { "epoch": 2.2699658703071672, "grad_norm": 2.8856194019317627, "learning_rate": 0.00024334470989761093, "loss": 5.6817, "step": 6651 }, { "epoch": 2.2703071672354946, "grad_norm": 2.8644917011260986, "learning_rate": 0.00024323094425483506, "loss": 5.5872, "step": 6652 }, { "epoch": 2.2706484641638225, "grad_norm": 2.808652877807617, "learning_rate": 0.00024311717861205917, "loss": 5.4768, "step": 6653 }, { "epoch": 2.2709897610921503, "grad_norm": 3.034461259841919, "learning_rate": 0.00024300341296928327, "loss": 6.1287, "step": 6654 }, { "epoch": 2.2713310580204777, "grad_norm": 2.723137140274048, "learning_rate": 0.0002428896473265074, "loss": 6.1159, "step": 6655 }, { "epoch": 2.2716723549488056, "grad_norm": 2.77011775970459, "learning_rate": 0.0002427758816837315, "loss": 6.3846, "step": 6656 }, { "epoch": 2.272013651877133, "grad_norm": 2.8131182193756104, "learning_rate": 0.00024266211604095564, "loss": 6.6823, "step": 6657 }, { "epoch": 2.272354948805461, "grad_norm": 2.815347194671631, "learning_rate": 0.00024254835039817977, "loss": 6.3332, "step": 6658 }, { "epoch": 2.2726962457337883, "grad_norm": 2.8586032390594482, "learning_rate": 0.00024243458475540385, "loss": 6.2077, "step": 6659 }, { "epoch": 2.273037542662116, "grad_norm": 2.8561007976531982, "learning_rate": 0.00024232081911262798, "loss": 5.5016, "step": 6660 }, { "epoch": 2.2733788395904435, "grad_norm": 2.8218114376068115, "learning_rate": 0.00024220705346985212, "loss": 6.3183, "step": 6661 }, { "epoch": 2.2737201365187714, "grad_norm": 2.9033164978027344, "learning_rate": 0.00024209328782707625, "loss": 5.2998, "step": 6662 }, { "epoch": 2.274061433447099, "grad_norm": 2.784006357192993, "learning_rate": 0.00024197952218430033, "loss": 6.1072, "step": 6663 }, { "epoch": 2.2744027303754266, "grad_norm": 2.8394157886505127, "learning_rate": 0.00024186575654152446, "loss": 5.9447, "step": 6664 }, { "epoch": 2.274744027303754, "grad_norm": 2.8039710521698, "learning_rate": 0.0002417519908987486, "loss": 6.2788, "step": 6665 }, { "epoch": 2.275085324232082, "grad_norm": 2.781151533126831, "learning_rate": 0.0002416382252559727, "loss": 6.0061, "step": 6666 }, { "epoch": 2.2754266211604097, "grad_norm": 2.732513427734375, "learning_rate": 0.00024152445961319683, "loss": 6.1009, "step": 6667 }, { "epoch": 2.275767918088737, "grad_norm": 2.8295493125915527, "learning_rate": 0.00024141069397042093, "loss": 5.4378, "step": 6668 }, { "epoch": 2.276109215017065, "grad_norm": 2.9463069438934326, "learning_rate": 0.00024129692832764504, "loss": 5.8396, "step": 6669 }, { "epoch": 2.2764505119453924, "grad_norm": 2.698521375656128, "learning_rate": 0.00024118316268486917, "loss": 5.6336, "step": 6670 }, { "epoch": 2.2767918088737202, "grad_norm": 4.356855869293213, "learning_rate": 0.0002410693970420933, "loss": 4.6489, "step": 6671 }, { "epoch": 2.2771331058020476, "grad_norm": 2.87615704536438, "learning_rate": 0.00024095563139931743, "loss": 6.4171, "step": 6672 }, { "epoch": 2.2774744027303755, "grad_norm": 2.763654947280884, "learning_rate": 0.0002408418657565415, "loss": 6.1001, "step": 6673 }, { "epoch": 2.277815699658703, "grad_norm": 2.8707218170166016, "learning_rate": 0.00024072810011376564, "loss": 5.8888, "step": 6674 }, { "epoch": 2.2781569965870307, "grad_norm": 2.8277971744537354, "learning_rate": 0.00024061433447098977, "loss": 5.6618, "step": 6675 }, { "epoch": 2.2784982935153586, "grad_norm": 2.787092447280884, "learning_rate": 0.00024050056882821388, "loss": 6.5541, "step": 6676 }, { "epoch": 2.278839590443686, "grad_norm": 3.1452341079711914, "learning_rate": 0.000240386803185438, "loss": 4.8793, "step": 6677 }, { "epoch": 2.2791808873720134, "grad_norm": 2.8025355339050293, "learning_rate": 0.00024027303754266212, "loss": 5.8608, "step": 6678 }, { "epoch": 2.2795221843003413, "grad_norm": 2.7934341430664062, "learning_rate": 0.00024015927189988622, "loss": 6.2955, "step": 6679 }, { "epoch": 2.279863481228669, "grad_norm": 2.744135618209839, "learning_rate": 0.00024004550625711035, "loss": 6.4455, "step": 6680 }, { "epoch": 2.2802047781569965, "grad_norm": 2.76705002784729, "learning_rate": 0.00023993174061433449, "loss": 5.3531, "step": 6681 }, { "epoch": 2.2805460750853244, "grad_norm": 2.9204037189483643, "learning_rate": 0.00023981797497155862, "loss": 6.5813, "step": 6682 }, { "epoch": 2.2808873720136518, "grad_norm": 2.7960617542266846, "learning_rate": 0.0002397042093287827, "loss": 6.678, "step": 6683 }, { "epoch": 2.2812286689419796, "grad_norm": 2.736600399017334, "learning_rate": 0.00023959044368600683, "loss": 6.0339, "step": 6684 }, { "epoch": 2.281569965870307, "grad_norm": 3.3651063442230225, "learning_rate": 0.00023947667804323096, "loss": 5.7733, "step": 6685 }, { "epoch": 2.281911262798635, "grad_norm": 2.7875654697418213, "learning_rate": 0.00023936291240045506, "loss": 5.9666, "step": 6686 }, { "epoch": 2.2822525597269623, "grad_norm": 2.8029191493988037, "learning_rate": 0.0002392491467576792, "loss": 6.4682, "step": 6687 }, { "epoch": 2.28259385665529, "grad_norm": 2.731361150741577, "learning_rate": 0.0002391353811149033, "loss": 6.54, "step": 6688 }, { "epoch": 2.282935153583618, "grad_norm": 2.759530782699585, "learning_rate": 0.0002390216154721274, "loss": 6.4203, "step": 6689 }, { "epoch": 2.2832764505119454, "grad_norm": 2.8398633003234863, "learning_rate": 0.00023890784982935154, "loss": 6.7545, "step": 6690 }, { "epoch": 2.283617747440273, "grad_norm": 4.068755626678467, "learning_rate": 0.00023879408418657567, "loss": 5.7944, "step": 6691 }, { "epoch": 2.2839590443686006, "grad_norm": 2.7552263736724854, "learning_rate": 0.0002386803185437998, "loss": 6.4369, "step": 6692 }, { "epoch": 2.2843003412969285, "grad_norm": 2.8534114360809326, "learning_rate": 0.00023856655290102388, "loss": 5.6889, "step": 6693 }, { "epoch": 2.284641638225256, "grad_norm": 2.8475186824798584, "learning_rate": 0.000238452787258248, "loss": 5.7206, "step": 6694 }, { "epoch": 2.2849829351535837, "grad_norm": 2.8369054794311523, "learning_rate": 0.00023833902161547214, "loss": 5.6994, "step": 6695 }, { "epoch": 2.285324232081911, "grad_norm": 2.86474609375, "learning_rate": 0.00023822525597269625, "loss": 6.6088, "step": 6696 }, { "epoch": 2.285665529010239, "grad_norm": 2.8578531742095947, "learning_rate": 0.00023811149032992038, "loss": 6.2553, "step": 6697 }, { "epoch": 2.2860068259385664, "grad_norm": 2.844667673110962, "learning_rate": 0.00023799772468714449, "loss": 5.9709, "step": 6698 }, { "epoch": 2.2863481228668943, "grad_norm": 2.900826930999756, "learning_rate": 0.0002378839590443686, "loss": 6.7451, "step": 6699 }, { "epoch": 2.2866894197952217, "grad_norm": 2.819692373275757, "learning_rate": 0.00023777019340159272, "loss": 5.666, "step": 6700 }, { "epoch": 2.2870307167235495, "grad_norm": 2.7792670726776123, "learning_rate": 0.00023765642775881686, "loss": 6.7826, "step": 6701 }, { "epoch": 2.2873720136518774, "grad_norm": 2.857227087020874, "learning_rate": 0.00023754266211604096, "loss": 5.8068, "step": 6702 }, { "epoch": 2.2877133105802048, "grad_norm": 2.8718831539154053, "learning_rate": 0.00023742889647326507, "loss": 6.0465, "step": 6703 }, { "epoch": 2.288054607508532, "grad_norm": 2.8006720542907715, "learning_rate": 0.0002373151308304892, "loss": 6.0748, "step": 6704 }, { "epoch": 2.28839590443686, "grad_norm": 2.717714548110962, "learning_rate": 0.00023720136518771333, "loss": 6.2189, "step": 6705 }, { "epoch": 2.288737201365188, "grad_norm": 2.80198073387146, "learning_rate": 0.00023708759954493743, "loss": 6.7073, "step": 6706 }, { "epoch": 2.2890784982935153, "grad_norm": 2.960460901260376, "learning_rate": 0.00023697383390216154, "loss": 5.2394, "step": 6707 }, { "epoch": 2.289419795221843, "grad_norm": 2.737926483154297, "learning_rate": 0.00023686006825938567, "loss": 6.5359, "step": 6708 }, { "epoch": 2.2897610921501705, "grad_norm": 2.870438814163208, "learning_rate": 0.00023674630261660978, "loss": 6.5294, "step": 6709 }, { "epoch": 2.2901023890784984, "grad_norm": 2.9853978157043457, "learning_rate": 0.0002366325369738339, "loss": 6.5247, "step": 6710 }, { "epoch": 2.290443686006826, "grad_norm": 2.814044713973999, "learning_rate": 0.00023651877133105804, "loss": 6.4487, "step": 6711 }, { "epoch": 2.2907849829351536, "grad_norm": 2.9334306716918945, "learning_rate": 0.00023640500568828215, "loss": 5.1492, "step": 6712 }, { "epoch": 2.291126279863481, "grad_norm": 2.8538413047790527, "learning_rate": 0.00023629124004550625, "loss": 6.6194, "step": 6713 }, { "epoch": 2.291467576791809, "grad_norm": 2.9160053730010986, "learning_rate": 0.00023617747440273038, "loss": 5.5613, "step": 6714 }, { "epoch": 2.2918088737201368, "grad_norm": 2.795891046524048, "learning_rate": 0.00023606370875995451, "loss": 6.1532, "step": 6715 }, { "epoch": 2.292150170648464, "grad_norm": 2.7880423069000244, "learning_rate": 0.00023594994311717862, "loss": 5.6862, "step": 6716 }, { "epoch": 2.2924914675767916, "grad_norm": 2.863717794418335, "learning_rate": 0.00023583617747440272, "loss": 5.6751, "step": 6717 }, { "epoch": 2.2928327645051194, "grad_norm": 2.9119949340820312, "learning_rate": 0.00023572241183162686, "loss": 5.5249, "step": 6718 }, { "epoch": 2.2931740614334473, "grad_norm": 2.8489530086517334, "learning_rate": 0.00023560864618885096, "loss": 6.3403, "step": 6719 }, { "epoch": 2.2935153583617747, "grad_norm": 3.275228261947632, "learning_rate": 0.0002354948805460751, "loss": 5.1688, "step": 6720 }, { "epoch": 2.2938566552901025, "grad_norm": 2.758864164352417, "learning_rate": 0.00023538111490329923, "loss": 6.241, "step": 6721 }, { "epoch": 2.29419795221843, "grad_norm": 2.7460415363311768, "learning_rate": 0.00023526734926052333, "loss": 5.9875, "step": 6722 }, { "epoch": 2.2945392491467578, "grad_norm": 2.8659422397613525, "learning_rate": 0.00023515358361774744, "loss": 6.1308, "step": 6723 }, { "epoch": 2.294880546075085, "grad_norm": 8.827744483947754, "learning_rate": 0.00023503981797497157, "loss": 6.2413, "step": 6724 }, { "epoch": 2.295221843003413, "grad_norm": 2.7797117233276367, "learning_rate": 0.0002349260523321957, "loss": 6.1305, "step": 6725 }, { "epoch": 2.2955631399317404, "grad_norm": 2.6930477619171143, "learning_rate": 0.0002348122866894198, "loss": 6.202, "step": 6726 }, { "epoch": 2.2959044368600683, "grad_norm": 2.801419258117676, "learning_rate": 0.0002346985210466439, "loss": 6.2353, "step": 6727 }, { "epoch": 2.296245733788396, "grad_norm": 2.8180465698242188, "learning_rate": 0.00023458475540386804, "loss": 6.1544, "step": 6728 }, { "epoch": 2.2965870307167235, "grad_norm": 2.789066791534424, "learning_rate": 0.00023447098976109215, "loss": 5.9909, "step": 6729 }, { "epoch": 2.296928327645051, "grad_norm": 2.7527191638946533, "learning_rate": 0.00023435722411831628, "loss": 6.1873, "step": 6730 }, { "epoch": 2.297269624573379, "grad_norm": 2.798922061920166, "learning_rate": 0.0002342434584755404, "loss": 6.2598, "step": 6731 }, { "epoch": 2.2976109215017066, "grad_norm": 2.9935081005096436, "learning_rate": 0.0002341296928327645, "loss": 5.3781, "step": 6732 }, { "epoch": 2.297952218430034, "grad_norm": 2.8289272785186768, "learning_rate": 0.00023401592718998862, "loss": 6.3926, "step": 6733 }, { "epoch": 2.298293515358362, "grad_norm": 2.873661756515503, "learning_rate": 0.00023390216154721275, "loss": 5.2289, "step": 6734 }, { "epoch": 2.2986348122866893, "grad_norm": 2.7585761547088623, "learning_rate": 0.00023378839590443688, "loss": 6.4229, "step": 6735 }, { "epoch": 2.298976109215017, "grad_norm": 2.7429072856903076, "learning_rate": 0.00023367463026166096, "loss": 6.1628, "step": 6736 }, { "epoch": 2.2993174061433446, "grad_norm": 2.8091378211975098, "learning_rate": 0.0002335608646188851, "loss": 6.2183, "step": 6737 }, { "epoch": 2.2996587030716724, "grad_norm": 2.830545425415039, "learning_rate": 0.00023344709897610923, "loss": 6.1877, "step": 6738 }, { "epoch": 2.3, "grad_norm": 3.6861021518707275, "learning_rate": 0.00023333333333333333, "loss": 4.8636, "step": 6739 }, { "epoch": 2.3003412969283277, "grad_norm": 2.8198957443237305, "learning_rate": 0.00023321956769055746, "loss": 6.044, "step": 6740 }, { "epoch": 2.3006825938566555, "grad_norm": 2.8199527263641357, "learning_rate": 0.00023310580204778157, "loss": 6.2105, "step": 6741 }, { "epoch": 2.301023890784983, "grad_norm": 2.9623944759368896, "learning_rate": 0.00023299203640500567, "loss": 5.464, "step": 6742 }, { "epoch": 2.3013651877133103, "grad_norm": 2.8883421421051025, "learning_rate": 0.0002328782707622298, "loss": 5.7796, "step": 6743 }, { "epoch": 2.301706484641638, "grad_norm": 2.6974728107452393, "learning_rate": 0.00023276450511945394, "loss": 6.3523, "step": 6744 }, { "epoch": 2.302047781569966, "grad_norm": 2.7741858959198, "learning_rate": 0.00023265073947667807, "loss": 6.0041, "step": 6745 }, { "epoch": 2.3023890784982934, "grad_norm": 2.771564245223999, "learning_rate": 0.00023253697383390215, "loss": 6.3403, "step": 6746 }, { "epoch": 2.3027303754266213, "grad_norm": 2.801663875579834, "learning_rate": 0.00023242320819112628, "loss": 5.9708, "step": 6747 }, { "epoch": 2.3030716723549487, "grad_norm": 2.906155586242676, "learning_rate": 0.0002323094425483504, "loss": 5.9501, "step": 6748 }, { "epoch": 2.3034129692832765, "grad_norm": 2.7590839862823486, "learning_rate": 0.00023219567690557452, "loss": 6.2378, "step": 6749 }, { "epoch": 2.303754266211604, "grad_norm": 2.7589144706726074, "learning_rate": 0.00023208191126279865, "loss": 6.4537, "step": 6750 }, { "epoch": 2.304095563139932, "grad_norm": 2.7965896129608154, "learning_rate": 0.00023196814562002275, "loss": 5.9146, "step": 6751 }, { "epoch": 2.304436860068259, "grad_norm": 2.827988862991333, "learning_rate": 0.00023185437997724686, "loss": 6.109, "step": 6752 }, { "epoch": 2.304778156996587, "grad_norm": 2.7847177982330322, "learning_rate": 0.000231740614334471, "loss": 6.2527, "step": 6753 }, { "epoch": 2.305119453924915, "grad_norm": 2.7947700023651123, "learning_rate": 0.00023162684869169512, "loss": 5.7674, "step": 6754 }, { "epoch": 2.3054607508532423, "grad_norm": 2.828664779663086, "learning_rate": 0.00023151308304891925, "loss": 6.086, "step": 6755 }, { "epoch": 2.3058020477815697, "grad_norm": 2.7907068729400635, "learning_rate": 0.00023139931740614333, "loss": 6.1609, "step": 6756 }, { "epoch": 2.3061433447098976, "grad_norm": 2.880263566970825, "learning_rate": 0.00023128555176336746, "loss": 5.7513, "step": 6757 }, { "epoch": 2.3064846416382254, "grad_norm": 2.764763832092285, "learning_rate": 0.0002311717861205916, "loss": 6.418, "step": 6758 }, { "epoch": 2.306825938566553, "grad_norm": 5.498814105987549, "learning_rate": 0.0002310580204778157, "loss": 4.4087, "step": 6759 }, { "epoch": 2.3071672354948807, "grad_norm": 2.8534750938415527, "learning_rate": 0.00023094425483503983, "loss": 6.3137, "step": 6760 }, { "epoch": 2.307508532423208, "grad_norm": 2.6376616954803467, "learning_rate": 0.00023083048919226394, "loss": 6.0889, "step": 6761 }, { "epoch": 2.307849829351536, "grad_norm": 3.473785877227783, "learning_rate": 0.00023071672354948804, "loss": 5.482, "step": 6762 }, { "epoch": 2.3081911262798633, "grad_norm": 3.221046209335327, "learning_rate": 0.00023060295790671217, "loss": 5.6875, "step": 6763 }, { "epoch": 2.308532423208191, "grad_norm": 2.871093273162842, "learning_rate": 0.0002304891922639363, "loss": 6.1357, "step": 6764 }, { "epoch": 2.3088737201365186, "grad_norm": 4.9006667137146, "learning_rate": 0.00023037542662116044, "loss": 5.354, "step": 6765 }, { "epoch": 2.3092150170648464, "grad_norm": 5.541250228881836, "learning_rate": 0.00023026166097838452, "loss": 5.1683, "step": 6766 }, { "epoch": 2.3095563139931743, "grad_norm": 2.8012752532958984, "learning_rate": 0.00023014789533560865, "loss": 6.1139, "step": 6767 }, { "epoch": 2.3098976109215017, "grad_norm": 2.7383131980895996, "learning_rate": 0.00023003412969283278, "loss": 5.1907, "step": 6768 }, { "epoch": 2.310238907849829, "grad_norm": 2.7988059520721436, "learning_rate": 0.00022992036405005689, "loss": 5.9688, "step": 6769 }, { "epoch": 2.310580204778157, "grad_norm": 2.8070616722106934, "learning_rate": 0.000229806598407281, "loss": 6.0292, "step": 6770 }, { "epoch": 2.310921501706485, "grad_norm": 2.855241537094116, "learning_rate": 0.00022969283276450512, "loss": 5.8921, "step": 6771 }, { "epoch": 2.311262798634812, "grad_norm": 6.3953142166137695, "learning_rate": 0.00022957906712172923, "loss": 5.0532, "step": 6772 }, { "epoch": 2.31160409556314, "grad_norm": 6.097601890563965, "learning_rate": 0.00022946530147895336, "loss": 4.7717, "step": 6773 }, { "epoch": 2.3119453924914675, "grad_norm": 4.208084583282471, "learning_rate": 0.0002293515358361775, "loss": 3.4272, "step": 6774 }, { "epoch": 2.3122866894197953, "grad_norm": 2.8885231018066406, "learning_rate": 0.0002292377701934016, "loss": 6.0002, "step": 6775 }, { "epoch": 2.3126279863481227, "grad_norm": 2.9425899982452393, "learning_rate": 0.0002291240045506257, "loss": 5.8151, "step": 6776 }, { "epoch": 2.3129692832764506, "grad_norm": 2.7507827281951904, "learning_rate": 0.00022901023890784983, "loss": 6.2093, "step": 6777 }, { "epoch": 2.313310580204778, "grad_norm": 2.732609272003174, "learning_rate": 0.00022889647326507397, "loss": 5.5137, "step": 6778 }, { "epoch": 2.313651877133106, "grad_norm": 2.8268463611602783, "learning_rate": 0.00022878270762229807, "loss": 5.6401, "step": 6779 }, { "epoch": 2.3139931740614337, "grad_norm": 2.7519233226776123, "learning_rate": 0.00022866894197952218, "loss": 6.2487, "step": 6780 }, { "epoch": 2.314334470989761, "grad_norm": 2.7072274684906006, "learning_rate": 0.0002285551763367463, "loss": 5.4718, "step": 6781 }, { "epoch": 2.3146757679180885, "grad_norm": 2.7394325733184814, "learning_rate": 0.0002284414106939704, "loss": 6.1958, "step": 6782 }, { "epoch": 2.3150170648464163, "grad_norm": 2.812884569168091, "learning_rate": 0.00022832764505119454, "loss": 6.1972, "step": 6783 }, { "epoch": 2.315358361774744, "grad_norm": 2.7412703037261963, "learning_rate": 0.00022821387940841868, "loss": 6.1518, "step": 6784 }, { "epoch": 2.3156996587030716, "grad_norm": 2.686539888381958, "learning_rate": 0.00022810011376564278, "loss": 6.5791, "step": 6785 }, { "epoch": 2.3160409556313994, "grad_norm": 2.780839443206787, "learning_rate": 0.0002279863481228669, "loss": 5.8385, "step": 6786 }, { "epoch": 2.316382252559727, "grad_norm": 2.8812310695648193, "learning_rate": 0.00022787258248009102, "loss": 4.8785, "step": 6787 }, { "epoch": 2.3167235494880547, "grad_norm": 2.8832690715789795, "learning_rate": 0.00022775881683731515, "loss": 6.5055, "step": 6788 }, { "epoch": 2.317064846416382, "grad_norm": 2.7269794940948486, "learning_rate": 0.00022764505119453926, "loss": 6.2457, "step": 6789 }, { "epoch": 2.31740614334471, "grad_norm": 2.879974126815796, "learning_rate": 0.00022753128555176336, "loss": 5.7098, "step": 6790 }, { "epoch": 2.3177474402730374, "grad_norm": 2.8237853050231934, "learning_rate": 0.0002274175199089875, "loss": 5.9351, "step": 6791 }, { "epoch": 2.318088737201365, "grad_norm": 2.740983247756958, "learning_rate": 0.0002273037542662116, "loss": 6.1166, "step": 6792 }, { "epoch": 2.318430034129693, "grad_norm": 2.9319350719451904, "learning_rate": 0.00022718998862343573, "loss": 6.4873, "step": 6793 }, { "epoch": 2.3187713310580205, "grad_norm": 2.7880241870880127, "learning_rate": 0.00022707622298065986, "loss": 5.8139, "step": 6794 }, { "epoch": 2.319112627986348, "grad_norm": 2.693197727203369, "learning_rate": 0.00022696245733788397, "loss": 6.0517, "step": 6795 }, { "epoch": 2.3194539249146757, "grad_norm": 2.8118865489959717, "learning_rate": 0.00022684869169510807, "loss": 6.0578, "step": 6796 }, { "epoch": 2.3197952218430036, "grad_norm": 3.474181890487671, "learning_rate": 0.0002267349260523322, "loss": 4.9987, "step": 6797 }, { "epoch": 2.320136518771331, "grad_norm": 2.758629083633423, "learning_rate": 0.00022662116040955634, "loss": 6.036, "step": 6798 }, { "epoch": 2.320477815699659, "grad_norm": 2.87188720703125, "learning_rate": 0.00022650739476678044, "loss": 6.5413, "step": 6799 }, { "epoch": 2.3208191126279862, "grad_norm": 2.650592803955078, "learning_rate": 0.00022639362912400455, "loss": 6.0238, "step": 6800 }, { "epoch": 2.321160409556314, "grad_norm": 2.7691047191619873, "learning_rate": 0.00022627986348122868, "loss": 6.3138, "step": 6801 }, { "epoch": 2.3215017064846415, "grad_norm": 2.800083637237549, "learning_rate": 0.00022616609783845278, "loss": 6.0257, "step": 6802 }, { "epoch": 2.3218430034129693, "grad_norm": 2.9052236080169678, "learning_rate": 0.00022605233219567691, "loss": 5.8308, "step": 6803 }, { "epoch": 2.3221843003412967, "grad_norm": 3.039931058883667, "learning_rate": 0.00022593856655290102, "loss": 5.0397, "step": 6804 }, { "epoch": 2.3225255972696246, "grad_norm": 2.8269150257110596, "learning_rate": 0.00022582480091012515, "loss": 5.7589, "step": 6805 }, { "epoch": 2.3228668941979524, "grad_norm": 2.7746031284332275, "learning_rate": 0.00022571103526734926, "loss": 5.6901, "step": 6806 }, { "epoch": 2.32320819112628, "grad_norm": 1.9601713418960571, "learning_rate": 0.0002255972696245734, "loss": 3.2623, "step": 6807 }, { "epoch": 2.3235494880546073, "grad_norm": 2.8860151767730713, "learning_rate": 0.00022548350398179752, "loss": 5.5029, "step": 6808 }, { "epoch": 2.323890784982935, "grad_norm": 2.7598114013671875, "learning_rate": 0.0002253697383390216, "loss": 5.9686, "step": 6809 }, { "epoch": 2.324232081911263, "grad_norm": 2.8497135639190674, "learning_rate": 0.00022525597269624573, "loss": 6.0748, "step": 6810 }, { "epoch": 2.3245733788395904, "grad_norm": 2.8299918174743652, "learning_rate": 0.00022514220705346986, "loss": 6.3305, "step": 6811 }, { "epoch": 2.324914675767918, "grad_norm": 2.794346332550049, "learning_rate": 0.00022502844141069397, "loss": 5.8338, "step": 6812 }, { "epoch": 2.3252559726962456, "grad_norm": 2.809251546859741, "learning_rate": 0.0002249146757679181, "loss": 6.1905, "step": 6813 }, { "epoch": 2.3255972696245735, "grad_norm": 3.0918211936950684, "learning_rate": 0.0002248009101251422, "loss": 6.2205, "step": 6814 }, { "epoch": 2.325938566552901, "grad_norm": 6.421435832977295, "learning_rate": 0.00022468714448236634, "loss": 4.1736, "step": 6815 }, { "epoch": 2.3262798634812287, "grad_norm": 3.02647066116333, "learning_rate": 0.00022457337883959044, "loss": 5.8284, "step": 6816 }, { "epoch": 2.326621160409556, "grad_norm": 2.943453550338745, "learning_rate": 0.00022445961319681457, "loss": 5.329, "step": 6817 }, { "epoch": 2.326962457337884, "grad_norm": 2.718707323074341, "learning_rate": 0.0002243458475540387, "loss": 6.4537, "step": 6818 }, { "epoch": 2.327303754266212, "grad_norm": 2.7228763103485107, "learning_rate": 0.00022423208191126278, "loss": 5.5298, "step": 6819 }, { "epoch": 2.3276450511945392, "grad_norm": 2.7870371341705322, "learning_rate": 0.00022411831626848692, "loss": 5.9528, "step": 6820 }, { "epoch": 2.3279863481228666, "grad_norm": 2.7378652095794678, "learning_rate": 0.00022400455062571105, "loss": 5.9546, "step": 6821 }, { "epoch": 2.3283276450511945, "grad_norm": 2.816159725189209, "learning_rate": 0.00022389078498293515, "loss": 6.0227, "step": 6822 }, { "epoch": 2.3286689419795223, "grad_norm": 3.0372061729431152, "learning_rate": 0.00022377701934015928, "loss": 5.1104, "step": 6823 }, { "epoch": 2.3290102389078498, "grad_norm": 3.7163870334625244, "learning_rate": 0.0002236632536973834, "loss": 5.6614, "step": 6824 }, { "epoch": 2.3293515358361776, "grad_norm": 2.7196543216705322, "learning_rate": 0.00022354948805460752, "loss": 6.4318, "step": 6825 }, { "epoch": 2.329692832764505, "grad_norm": 2.8511674404144287, "learning_rate": 0.00022343572241183163, "loss": 5.7164, "step": 6826 }, { "epoch": 2.330034129692833, "grad_norm": 2.838578462600708, "learning_rate": 0.00022332195676905576, "loss": 5.8845, "step": 6827 }, { "epoch": 2.3303754266211603, "grad_norm": 3.3302481174468994, "learning_rate": 0.0002232081911262799, "loss": 5.2908, "step": 6828 }, { "epoch": 2.330716723549488, "grad_norm": 2.730325222015381, "learning_rate": 0.00022309442548350397, "loss": 6.1103, "step": 6829 }, { "epoch": 2.3310580204778155, "grad_norm": 2.734823226928711, "learning_rate": 0.0002229806598407281, "loss": 6.5294, "step": 6830 }, { "epoch": 2.3313993174061434, "grad_norm": 2.777035713195801, "learning_rate": 0.00022286689419795223, "loss": 6.0642, "step": 6831 }, { "epoch": 2.331740614334471, "grad_norm": 2.896202802658081, "learning_rate": 0.00022275312855517634, "loss": 5.5833, "step": 6832 }, { "epoch": 2.3320819112627986, "grad_norm": 2.8327341079711914, "learning_rate": 0.00022263936291240047, "loss": 5.7914, "step": 6833 }, { "epoch": 2.3324232081911265, "grad_norm": 7.732255935668945, "learning_rate": 0.00022252559726962457, "loss": 3.887, "step": 6834 }, { "epoch": 2.332764505119454, "grad_norm": 2.896789312362671, "learning_rate": 0.00022241183162684868, "loss": 6.0555, "step": 6835 }, { "epoch": 2.3331058020477817, "grad_norm": 2.7806904315948486, "learning_rate": 0.0002222980659840728, "loss": 6.4753, "step": 6836 }, { "epoch": 2.333447098976109, "grad_norm": 2.836212635040283, "learning_rate": 0.00022218430034129694, "loss": 6.3409, "step": 6837 }, { "epoch": 2.333788395904437, "grad_norm": 2.7770421504974365, "learning_rate": 0.00022207053469852108, "loss": 6.5263, "step": 6838 }, { "epoch": 2.3341296928327644, "grad_norm": 2.7537899017333984, "learning_rate": 0.00022195676905574515, "loss": 6.3918, "step": 6839 }, { "epoch": 2.3344709897610922, "grad_norm": 2.9366977214813232, "learning_rate": 0.00022184300341296929, "loss": 5.7525, "step": 6840 }, { "epoch": 2.3348122866894196, "grad_norm": 3.4186301231384277, "learning_rate": 0.00022172923777019342, "loss": 5.2059, "step": 6841 }, { "epoch": 2.3351535836177475, "grad_norm": 2.832503080368042, "learning_rate": 0.00022161547212741752, "loss": 5.9087, "step": 6842 }, { "epoch": 2.335494880546075, "grad_norm": 2.741806983947754, "learning_rate": 0.00022150170648464163, "loss": 6.3415, "step": 6843 }, { "epoch": 2.3358361774744028, "grad_norm": 2.8064048290252686, "learning_rate": 0.00022138794084186576, "loss": 6.0018, "step": 6844 }, { "epoch": 2.3361774744027306, "grad_norm": 2.7633867263793945, "learning_rate": 0.00022127417519908986, "loss": 6.0155, "step": 6845 }, { "epoch": 2.336518771331058, "grad_norm": 2.8978452682495117, "learning_rate": 0.000221160409556314, "loss": 5.8894, "step": 6846 }, { "epoch": 2.336860068259386, "grad_norm": 2.9551198482513428, "learning_rate": 0.00022104664391353813, "loss": 4.3913, "step": 6847 }, { "epoch": 2.3372013651877133, "grad_norm": 2.845735549926758, "learning_rate": 0.00022093287827076223, "loss": 5.5712, "step": 6848 }, { "epoch": 2.337542662116041, "grad_norm": 2.8292698860168457, "learning_rate": 0.00022081911262798634, "loss": 6.0293, "step": 6849 }, { "epoch": 2.3378839590443685, "grad_norm": 2.7965333461761475, "learning_rate": 0.00022070534698521047, "loss": 5.9881, "step": 6850 }, { "epoch": 2.3382252559726964, "grad_norm": 2.763213872909546, "learning_rate": 0.0002205915813424346, "loss": 6.1471, "step": 6851 }, { "epoch": 2.3385665529010238, "grad_norm": 3.1655666828155518, "learning_rate": 0.0002204778156996587, "loss": 5.4231, "step": 6852 }, { "epoch": 2.3389078498293516, "grad_norm": 2.8350963592529297, "learning_rate": 0.0002203640500568828, "loss": 5.9476, "step": 6853 }, { "epoch": 2.339249146757679, "grad_norm": 2.731945514678955, "learning_rate": 0.00022025028441410694, "loss": 5.8654, "step": 6854 }, { "epoch": 2.339590443686007, "grad_norm": 2.7220661640167236, "learning_rate": 0.00022013651877133105, "loss": 6.4109, "step": 6855 }, { "epoch": 2.3399317406143343, "grad_norm": 2.7597591876983643, "learning_rate": 0.00022002275312855518, "loss": 6.377, "step": 6856 }, { "epoch": 2.340273037542662, "grad_norm": 2.7260942459106445, "learning_rate": 0.0002199089874857793, "loss": 5.9268, "step": 6857 }, { "epoch": 2.34061433447099, "grad_norm": 2.8388872146606445, "learning_rate": 0.00021979522184300342, "loss": 5.8152, "step": 6858 }, { "epoch": 2.3409556313993174, "grad_norm": 2.754322052001953, "learning_rate": 0.00021968145620022752, "loss": 6.2267, "step": 6859 }, { "epoch": 2.3412969283276452, "grad_norm": 2.73994779586792, "learning_rate": 0.00021956769055745166, "loss": 6.0755, "step": 6860 }, { "epoch": 2.3416382252559726, "grad_norm": 2.6968204975128174, "learning_rate": 0.0002194539249146758, "loss": 6.0136, "step": 6861 }, { "epoch": 2.3419795221843005, "grad_norm": 2.7095813751220703, "learning_rate": 0.0002193401592718999, "loss": 5.9502, "step": 6862 }, { "epoch": 2.342320819112628, "grad_norm": 4.785019397735596, "learning_rate": 0.000219226393629124, "loss": 4.9616, "step": 6863 }, { "epoch": 2.3426621160409558, "grad_norm": 2.7408225536346436, "learning_rate": 0.00021911262798634813, "loss": 6.1393, "step": 6864 }, { "epoch": 2.343003412969283, "grad_norm": 2.8245584964752197, "learning_rate": 0.00021899886234357223, "loss": 6.7514, "step": 6865 }, { "epoch": 2.343344709897611, "grad_norm": 2.7768466472625732, "learning_rate": 0.00021888509670079637, "loss": 6.0515, "step": 6866 }, { "epoch": 2.3436860068259384, "grad_norm": 2.7474124431610107, "learning_rate": 0.0002187713310580205, "loss": 6.5632, "step": 6867 }, { "epoch": 2.3440273037542663, "grad_norm": 2.730879545211792, "learning_rate": 0.0002186575654152446, "loss": 6.0169, "step": 6868 }, { "epoch": 2.3443686006825937, "grad_norm": 2.721271514892578, "learning_rate": 0.0002185437997724687, "loss": 6.3651, "step": 6869 }, { "epoch": 2.3447098976109215, "grad_norm": 2.727299690246582, "learning_rate": 0.00021843003412969284, "loss": 6.0875, "step": 6870 }, { "epoch": 2.3450511945392494, "grad_norm": 2.7979040145874023, "learning_rate": 0.00021831626848691697, "loss": 5.3848, "step": 6871 }, { "epoch": 2.345392491467577, "grad_norm": 2.7405858039855957, "learning_rate": 0.00021820250284414108, "loss": 5.9585, "step": 6872 }, { "epoch": 2.3457337883959046, "grad_norm": 2.7488925457000732, "learning_rate": 0.00021808873720136518, "loss": 6.4375, "step": 6873 }, { "epoch": 2.346075085324232, "grad_norm": 2.80232310295105, "learning_rate": 0.00021797497155858931, "loss": 6.0547, "step": 6874 }, { "epoch": 2.34641638225256, "grad_norm": 2.803708791732788, "learning_rate": 0.00021786120591581342, "loss": 5.5016, "step": 6875 }, { "epoch": 2.3467576791808873, "grad_norm": 2.724703550338745, "learning_rate": 0.00021774744027303755, "loss": 6.1233, "step": 6876 }, { "epoch": 2.347098976109215, "grad_norm": 2.8196821212768555, "learning_rate": 0.00021763367463026166, "loss": 5.8877, "step": 6877 }, { "epoch": 2.3474402730375425, "grad_norm": 3.931154251098633, "learning_rate": 0.0002175199089874858, "loss": 5.5875, "step": 6878 }, { "epoch": 2.3477815699658704, "grad_norm": 3.5176618099212646, "learning_rate": 0.0002174061433447099, "loss": 4.2908, "step": 6879 }, { "epoch": 2.348122866894198, "grad_norm": 2.752065896987915, "learning_rate": 0.00021729237770193403, "loss": 5.6804, "step": 6880 }, { "epoch": 2.3484641638225257, "grad_norm": 2.8360445499420166, "learning_rate": 0.00021717861205915816, "loss": 6.1461, "step": 6881 }, { "epoch": 2.348805460750853, "grad_norm": 2.739427328109741, "learning_rate": 0.00021706484641638224, "loss": 6.4064, "step": 6882 }, { "epoch": 2.349146757679181, "grad_norm": 3.3373124599456787, "learning_rate": 0.00021695108077360637, "loss": 6.0023, "step": 6883 }, { "epoch": 2.3494880546075088, "grad_norm": 2.7712957859039307, "learning_rate": 0.0002168373151308305, "loss": 6.3348, "step": 6884 }, { "epoch": 2.349829351535836, "grad_norm": 3.1796998977661133, "learning_rate": 0.0002167235494880546, "loss": 5.1784, "step": 6885 }, { "epoch": 2.350170648464164, "grad_norm": 2.7970149517059326, "learning_rate": 0.00021660978384527874, "loss": 6.1688, "step": 6886 }, { "epoch": 2.3505119453924914, "grad_norm": 2.6617581844329834, "learning_rate": 0.00021649601820250284, "loss": 6.3794, "step": 6887 }, { "epoch": 2.3508532423208193, "grad_norm": 2.752316474914551, "learning_rate": 0.00021638225255972697, "loss": 5.9957, "step": 6888 }, { "epoch": 2.3511945392491467, "grad_norm": 2.7273192405700684, "learning_rate": 0.00021626848691695108, "loss": 6.0486, "step": 6889 }, { "epoch": 2.3515358361774745, "grad_norm": 2.755157947540283, "learning_rate": 0.0002161547212741752, "loss": 6.2515, "step": 6890 }, { "epoch": 2.351877133105802, "grad_norm": 2.757171154022217, "learning_rate": 0.00021604095563139934, "loss": 6.3392, "step": 6891 }, { "epoch": 2.35221843003413, "grad_norm": 2.830286979675293, "learning_rate": 0.00021592718998862342, "loss": 6.4345, "step": 6892 }, { "epoch": 2.352559726962457, "grad_norm": 2.98397159576416, "learning_rate": 0.00021581342434584755, "loss": 5.7659, "step": 6893 }, { "epoch": 2.352901023890785, "grad_norm": 2.784120798110962, "learning_rate": 0.00021569965870307168, "loss": 5.3936, "step": 6894 }, { "epoch": 2.3532423208191124, "grad_norm": 2.79896879196167, "learning_rate": 0.0002155858930602958, "loss": 6.6646, "step": 6895 }, { "epoch": 2.3535836177474403, "grad_norm": 2.7373616695404053, "learning_rate": 0.00021547212741751992, "loss": 6.4598, "step": 6896 }, { "epoch": 2.353924914675768, "grad_norm": 2.7969889640808105, "learning_rate": 0.00021535836177474403, "loss": 5.9267, "step": 6897 }, { "epoch": 2.3542662116040955, "grad_norm": 2.9162516593933105, "learning_rate": 0.00021524459613196816, "loss": 5.6167, "step": 6898 }, { "epoch": 2.3546075085324234, "grad_norm": 2.745880365371704, "learning_rate": 0.00021513083048919226, "loss": 5.9538, "step": 6899 }, { "epoch": 2.354948805460751, "grad_norm": 2.799854278564453, "learning_rate": 0.0002150170648464164, "loss": 4.8416, "step": 6900 }, { "epoch": 2.3552901023890787, "grad_norm": 2.84338116645813, "learning_rate": 0.00021490329920364053, "loss": 6.2332, "step": 6901 }, { "epoch": 2.355631399317406, "grad_norm": 2.713643789291382, "learning_rate": 0.0002147895335608646, "loss": 6.3292, "step": 6902 }, { "epoch": 2.355972696245734, "grad_norm": 2.7593953609466553, "learning_rate": 0.00021467576791808874, "loss": 6.1654, "step": 6903 }, { "epoch": 2.3563139931740613, "grad_norm": 2.7301793098449707, "learning_rate": 0.00021456200227531287, "loss": 5.8449, "step": 6904 }, { "epoch": 2.356655290102389, "grad_norm": 2.736713409423828, "learning_rate": 0.00021444823663253697, "loss": 6.0168, "step": 6905 }, { "epoch": 2.3569965870307166, "grad_norm": 2.8782119750976562, "learning_rate": 0.0002143344709897611, "loss": 6.1835, "step": 6906 }, { "epoch": 2.3573378839590444, "grad_norm": 2.7333459854125977, "learning_rate": 0.0002142207053469852, "loss": 6.9639, "step": 6907 }, { "epoch": 2.357679180887372, "grad_norm": 2.8486573696136475, "learning_rate": 0.00021410693970420934, "loss": 5.9174, "step": 6908 }, { "epoch": 2.3580204778156997, "grad_norm": 2.6959550380706787, "learning_rate": 0.00021399317406143345, "loss": 6.3947, "step": 6909 }, { "epoch": 2.3583617747440275, "grad_norm": 2.771045207977295, "learning_rate": 0.00021387940841865758, "loss": 5.9439, "step": 6910 }, { "epoch": 2.358703071672355, "grad_norm": 2.7466630935668945, "learning_rate": 0.00021376564277588169, "loss": 5.7553, "step": 6911 }, { "epoch": 2.359044368600683, "grad_norm": 2.725827217102051, "learning_rate": 0.0002136518771331058, "loss": 5.7062, "step": 6912 }, { "epoch": 2.35938566552901, "grad_norm": 2.763960123062134, "learning_rate": 0.00021353811149032992, "loss": 6.1409, "step": 6913 }, { "epoch": 2.359726962457338, "grad_norm": 2.7459399700164795, "learning_rate": 0.00021342434584755405, "loss": 6.2805, "step": 6914 }, { "epoch": 2.3600682593856654, "grad_norm": 2.6126558780670166, "learning_rate": 0.00021331058020477816, "loss": 5.6644, "step": 6915 }, { "epoch": 2.3604095563139933, "grad_norm": 2.730069160461426, "learning_rate": 0.00021319681456200226, "loss": 5.9614, "step": 6916 }, { "epoch": 2.3607508532423207, "grad_norm": 2.6941754817962646, "learning_rate": 0.0002130830489192264, "loss": 6.082, "step": 6917 }, { "epoch": 2.3610921501706486, "grad_norm": 2.685556173324585, "learning_rate": 0.00021296928327645053, "loss": 5.7493, "step": 6918 }, { "epoch": 2.361433447098976, "grad_norm": 2.748690366744995, "learning_rate": 0.00021285551763367463, "loss": 6.2632, "step": 6919 }, { "epoch": 2.361774744027304, "grad_norm": 2.6726412773132324, "learning_rate": 0.00021274175199089877, "loss": 5.5741, "step": 6920 }, { "epoch": 2.362116040955631, "grad_norm": 2.778284788131714, "learning_rate": 0.00021262798634812287, "loss": 5.3961, "step": 6921 }, { "epoch": 2.362457337883959, "grad_norm": 2.7935075759887695, "learning_rate": 0.00021251422070534698, "loss": 6.1253, "step": 6922 }, { "epoch": 2.362798634812287, "grad_norm": 2.765655279159546, "learning_rate": 0.0002124004550625711, "loss": 6.6871, "step": 6923 }, { "epoch": 2.3631399317406143, "grad_norm": 2.659959554672241, "learning_rate": 0.00021228668941979524, "loss": 6.0147, "step": 6924 }, { "epoch": 2.363481228668942, "grad_norm": 2.7546181678771973, "learning_rate": 0.00021217292377701934, "loss": 6.5698, "step": 6925 }, { "epoch": 2.3638225255972696, "grad_norm": 2.8323190212249756, "learning_rate": 0.00021205915813424345, "loss": 5.7636, "step": 6926 }, { "epoch": 2.3641638225255974, "grad_norm": 2.7773520946502686, "learning_rate": 0.00021194539249146758, "loss": 6.3198, "step": 6927 }, { "epoch": 2.364505119453925, "grad_norm": 2.6490156650543213, "learning_rate": 0.0002118316268486917, "loss": 5.7899, "step": 6928 }, { "epoch": 2.3648464163822527, "grad_norm": 2.878728151321411, "learning_rate": 0.00021171786120591582, "loss": 5.5224, "step": 6929 }, { "epoch": 2.36518771331058, "grad_norm": 2.7923343181610107, "learning_rate": 0.00021160409556313995, "loss": 6.871, "step": 6930 }, { "epoch": 2.365529010238908, "grad_norm": 4.340980052947998, "learning_rate": 0.00021149032992036406, "loss": 5.0883, "step": 6931 }, { "epoch": 2.3658703071672353, "grad_norm": 2.780867338180542, "learning_rate": 0.00021137656427758816, "loss": 5.614, "step": 6932 }, { "epoch": 2.366211604095563, "grad_norm": 2.7651314735412598, "learning_rate": 0.0002112627986348123, "loss": 6.4194, "step": 6933 }, { "epoch": 2.3665529010238906, "grad_norm": 2.7465429306030273, "learning_rate": 0.00021114903299203642, "loss": 5.8274, "step": 6934 }, { "epoch": 2.3668941979522184, "grad_norm": 2.8030600547790527, "learning_rate": 0.00021103526734926053, "loss": 5.9884, "step": 6935 }, { "epoch": 2.3672354948805463, "grad_norm": 2.78486704826355, "learning_rate": 0.00021092150170648463, "loss": 6.4359, "step": 6936 }, { "epoch": 2.3675767918088737, "grad_norm": 2.7584781646728516, "learning_rate": 0.00021080773606370877, "loss": 6.4127, "step": 6937 }, { "epoch": 2.3679180887372016, "grad_norm": 2.7212891578674316, "learning_rate": 0.00021069397042093287, "loss": 6.8108, "step": 6938 }, { "epoch": 2.368259385665529, "grad_norm": 2.8116931915283203, "learning_rate": 0.000210580204778157, "loss": 6.0632, "step": 6939 }, { "epoch": 2.368600682593857, "grad_norm": 2.847207546234131, "learning_rate": 0.00021046643913538114, "loss": 5.8893, "step": 6940 }, { "epoch": 2.368941979522184, "grad_norm": 2.7255427837371826, "learning_rate": 0.00021035267349260524, "loss": 6.3627, "step": 6941 }, { "epoch": 2.369283276450512, "grad_norm": 3.3348500728607178, "learning_rate": 0.00021023890784982934, "loss": 6.4244, "step": 6942 }, { "epoch": 2.3696245733788395, "grad_norm": 2.6929471492767334, "learning_rate": 0.00021012514220705348, "loss": 6.4524, "step": 6943 }, { "epoch": 2.3699658703071673, "grad_norm": 2.598615884780884, "learning_rate": 0.0002100113765642776, "loss": 5.8728, "step": 6944 }, { "epoch": 2.3703071672354947, "grad_norm": 2.8167927265167236, "learning_rate": 0.0002098976109215017, "loss": 5.5652, "step": 6945 }, { "epoch": 2.3706484641638226, "grad_norm": 3.9340548515319824, "learning_rate": 0.00020978384527872582, "loss": 5.4056, "step": 6946 }, { "epoch": 2.37098976109215, "grad_norm": 2.9168150424957275, "learning_rate": 0.00020967007963594995, "loss": 5.4847, "step": 6947 }, { "epoch": 2.371331058020478, "grad_norm": 2.80441951751709, "learning_rate": 0.00020955631399317406, "loss": 6.661, "step": 6948 }, { "epoch": 2.3716723549488057, "grad_norm": 2.7382166385650635, "learning_rate": 0.0002094425483503982, "loss": 5.718, "step": 6949 }, { "epoch": 2.372013651877133, "grad_norm": 2.878997802734375, "learning_rate": 0.0002093287827076223, "loss": 6.9364, "step": 6950 }, { "epoch": 2.372354948805461, "grad_norm": 2.7554514408111572, "learning_rate": 0.00020921501706484642, "loss": 6.7696, "step": 6951 }, { "epoch": 2.3726962457337883, "grad_norm": 2.6601312160491943, "learning_rate": 0.00020910125142207053, "loss": 6.2882, "step": 6952 }, { "epoch": 2.373037542662116, "grad_norm": 2.9576683044433594, "learning_rate": 0.00020898748577929466, "loss": 5.6812, "step": 6953 }, { "epoch": 2.3733788395904436, "grad_norm": 2.700512170791626, "learning_rate": 0.0002088737201365188, "loss": 6.0231, "step": 6954 }, { "epoch": 2.3737201365187715, "grad_norm": 2.6124114990234375, "learning_rate": 0.00020875995449374287, "loss": 5.9432, "step": 6955 }, { "epoch": 2.374061433447099, "grad_norm": 4.633599281311035, "learning_rate": 0.000208646188850967, "loss": 5.6296, "step": 6956 }, { "epoch": 2.3744027303754267, "grad_norm": 2.7569868564605713, "learning_rate": 0.00020853242320819114, "loss": 6.6165, "step": 6957 }, { "epoch": 2.374744027303754, "grad_norm": 2.804227590560913, "learning_rate": 0.00020841865756541524, "loss": 5.4564, "step": 6958 }, { "epoch": 2.375085324232082, "grad_norm": 2.7688915729522705, "learning_rate": 0.00020830489192263937, "loss": 6.1656, "step": 6959 }, { "epoch": 2.3754266211604094, "grad_norm": 2.7455203533172607, "learning_rate": 0.00020819112627986348, "loss": 6.0186, "step": 6960 }, { "epoch": 2.375767918088737, "grad_norm": 2.7759294509887695, "learning_rate": 0.0002080773606370876, "loss": 5.8901, "step": 6961 }, { "epoch": 2.376109215017065, "grad_norm": 2.660414218902588, "learning_rate": 0.00020796359499431171, "loss": 6.0713, "step": 6962 }, { "epoch": 2.3764505119453925, "grad_norm": 2.7604238986968994, "learning_rate": 0.00020784982935153585, "loss": 6.2344, "step": 6963 }, { "epoch": 2.3767918088737203, "grad_norm": 2.65087890625, "learning_rate": 0.00020773606370875998, "loss": 6.0383, "step": 6964 }, { "epoch": 2.3771331058020477, "grad_norm": 2.73121976852417, "learning_rate": 0.00020762229806598406, "loss": 5.8064, "step": 6965 }, { "epoch": 2.3774744027303756, "grad_norm": 11.710968017578125, "learning_rate": 0.0002075085324232082, "loss": 5.0296, "step": 6966 }, { "epoch": 2.377815699658703, "grad_norm": 2.5780153274536133, "learning_rate": 0.00020739476678043232, "loss": 6.0447, "step": 6967 }, { "epoch": 2.378156996587031, "grad_norm": 2.7825236320495605, "learning_rate": 0.00020728100113765643, "loss": 6.6443, "step": 6968 }, { "epoch": 2.3784982935153582, "grad_norm": 2.7635421752929688, "learning_rate": 0.00020716723549488056, "loss": 5.4036, "step": 6969 }, { "epoch": 2.378839590443686, "grad_norm": 2.749699592590332, "learning_rate": 0.00020705346985210466, "loss": 5.763, "step": 6970 }, { "epoch": 2.3791808873720135, "grad_norm": 2.686196804046631, "learning_rate": 0.0002069397042093288, "loss": 5.9363, "step": 6971 }, { "epoch": 2.3795221843003413, "grad_norm": 2.801814317703247, "learning_rate": 0.0002068259385665529, "loss": 6.4167, "step": 6972 }, { "epoch": 2.3798634812286688, "grad_norm": 2.927759885787964, "learning_rate": 0.00020671217292377703, "loss": 5.5384, "step": 6973 }, { "epoch": 2.3802047781569966, "grad_norm": 5.355523109436035, "learning_rate": 0.00020659840728100116, "loss": 4.3588, "step": 6974 }, { "epoch": 2.3805460750853245, "grad_norm": 6.205738544464111, "learning_rate": 0.00020648464163822524, "loss": 5.5929, "step": 6975 }, { "epoch": 2.380887372013652, "grad_norm": 2.8085031509399414, "learning_rate": 0.00020637087599544937, "loss": 6.4081, "step": 6976 }, { "epoch": 2.3812286689419797, "grad_norm": 2.7193679809570312, "learning_rate": 0.0002062571103526735, "loss": 6.0632, "step": 6977 }, { "epoch": 2.381569965870307, "grad_norm": 2.8042826652526855, "learning_rate": 0.0002061433447098976, "loss": 5.7394, "step": 6978 }, { "epoch": 2.381911262798635, "grad_norm": 2.7161612510681152, "learning_rate": 0.00020602957906712174, "loss": 6.55, "step": 6979 }, { "epoch": 2.3822525597269624, "grad_norm": 2.6074378490448, "learning_rate": 0.00020591581342434585, "loss": 5.7456, "step": 6980 }, { "epoch": 2.38259385665529, "grad_norm": 2.804840087890625, "learning_rate": 0.00020580204778156998, "loss": 4.9974, "step": 6981 }, { "epoch": 2.3829351535836176, "grad_norm": 2.746267795562744, "learning_rate": 0.00020568828213879408, "loss": 6.2011, "step": 6982 }, { "epoch": 2.3832764505119455, "grad_norm": 2.72701096534729, "learning_rate": 0.00020557451649601822, "loss": 6.2791, "step": 6983 }, { "epoch": 2.383617747440273, "grad_norm": 2.6555674076080322, "learning_rate": 0.00020546075085324232, "loss": 5.5011, "step": 6984 }, { "epoch": 2.3839590443686007, "grad_norm": 2.7322537899017334, "learning_rate": 0.00020534698521046643, "loss": 5.9383, "step": 6985 }, { "epoch": 2.384300341296928, "grad_norm": 2.779167652130127, "learning_rate": 0.00020523321956769056, "loss": 6.5133, "step": 6986 }, { "epoch": 2.384641638225256, "grad_norm": 2.8352394104003906, "learning_rate": 0.0002051194539249147, "loss": 6.4203, "step": 6987 }, { "epoch": 2.384982935153584, "grad_norm": 2.740598440170288, "learning_rate": 0.0002050056882821388, "loss": 5.7477, "step": 6988 }, { "epoch": 2.3853242320819112, "grad_norm": 2.779649496078491, "learning_rate": 0.0002048919226393629, "loss": 6.0869, "step": 6989 }, { "epoch": 2.385665529010239, "grad_norm": 2.779036521911621, "learning_rate": 0.00020477815699658703, "loss": 5.5761, "step": 6990 }, { "epoch": 2.3860068259385665, "grad_norm": 2.7772841453552246, "learning_rate": 0.00020466439135381116, "loss": 6.7393, "step": 6991 }, { "epoch": 2.3863481228668944, "grad_norm": 2.7716453075408936, "learning_rate": 0.00020455062571103527, "loss": 6.1316, "step": 6992 }, { "epoch": 2.3866894197952218, "grad_norm": 3.0707526206970215, "learning_rate": 0.0002044368600682594, "loss": 5.781, "step": 6993 }, { "epoch": 2.3870307167235496, "grad_norm": 2.663928270339966, "learning_rate": 0.0002043230944254835, "loss": 6.2102, "step": 6994 }, { "epoch": 2.387372013651877, "grad_norm": 2.7111122608184814, "learning_rate": 0.0002042093287827076, "loss": 6.2127, "step": 6995 }, { "epoch": 2.387713310580205, "grad_norm": 2.7795569896698, "learning_rate": 0.00020409556313993174, "loss": 5.7084, "step": 6996 }, { "epoch": 2.3880546075085323, "grad_norm": 2.881326198577881, "learning_rate": 0.00020398179749715588, "loss": 5.929, "step": 6997 }, { "epoch": 2.38839590443686, "grad_norm": 2.725304365158081, "learning_rate": 0.00020386803185437998, "loss": 6.2795, "step": 6998 }, { "epoch": 2.3887372013651875, "grad_norm": 2.7434685230255127, "learning_rate": 0.00020375426621160409, "loss": 6.0435, "step": 6999 }, { "epoch": 2.3890784982935154, "grad_norm": 2.676453113555908, "learning_rate": 0.00020364050056882822, "loss": 6.1874, "step": 7000 }, { "epoch": 2.3894197952218432, "grad_norm": 2.7269630432128906, "learning_rate": 0.00020352673492605235, "loss": 5.7898, "step": 7001 }, { "epoch": 2.3897610921501706, "grad_norm": 2.72902774810791, "learning_rate": 0.00020341296928327645, "loss": 5.5644, "step": 7002 }, { "epoch": 2.3901023890784985, "grad_norm": 2.750995397567749, "learning_rate": 0.0002032992036405006, "loss": 6.2748, "step": 7003 }, { "epoch": 2.390443686006826, "grad_norm": 2.7395596504211426, "learning_rate": 0.0002031854379977247, "loss": 6.0858, "step": 7004 }, { "epoch": 2.3907849829351537, "grad_norm": 2.73018217086792, "learning_rate": 0.0002030716723549488, "loss": 5.9797, "step": 7005 }, { "epoch": 2.391126279863481, "grad_norm": 2.739351511001587, "learning_rate": 0.00020295790671217293, "loss": 6.2691, "step": 7006 }, { "epoch": 2.391467576791809, "grad_norm": 2.804325580596924, "learning_rate": 0.00020284414106939706, "loss": 6.5061, "step": 7007 }, { "epoch": 2.3918088737201364, "grad_norm": 2.734001636505127, "learning_rate": 0.00020273037542662117, "loss": 6.3185, "step": 7008 }, { "epoch": 2.3921501706484642, "grad_norm": 2.743891954421997, "learning_rate": 0.00020261660978384527, "loss": 6.3436, "step": 7009 }, { "epoch": 2.3924914675767917, "grad_norm": 3.208683729171753, "learning_rate": 0.0002025028441410694, "loss": 4.3648, "step": 7010 }, { "epoch": 2.3928327645051195, "grad_norm": 2.7099032402038574, "learning_rate": 0.00020238907849829353, "loss": 5.1361, "step": 7011 }, { "epoch": 2.393174061433447, "grad_norm": 2.8060643672943115, "learning_rate": 0.00020227531285551764, "loss": 6.1124, "step": 7012 }, { "epoch": 2.3935153583617748, "grad_norm": 2.7645676136016846, "learning_rate": 0.00020216154721274177, "loss": 5.3761, "step": 7013 }, { "epoch": 2.3938566552901026, "grad_norm": 2.6478114128112793, "learning_rate": 0.00020204778156996588, "loss": 5.6484, "step": 7014 }, { "epoch": 2.39419795221843, "grad_norm": 2.8686115741729736, "learning_rate": 0.00020193401592718998, "loss": 5.5769, "step": 7015 }, { "epoch": 2.394539249146758, "grad_norm": 2.700092315673828, "learning_rate": 0.00020182025028441411, "loss": 6.5028, "step": 7016 }, { "epoch": 2.3948805460750853, "grad_norm": 2.810314655303955, "learning_rate": 0.00020170648464163825, "loss": 6.574, "step": 7017 }, { "epoch": 2.395221843003413, "grad_norm": 2.6746256351470947, "learning_rate": 0.00020159271899886232, "loss": 5.7884, "step": 7018 }, { "epoch": 2.3955631399317405, "grad_norm": 2.735712766647339, "learning_rate": 0.00020147895335608646, "loss": 6.5509, "step": 7019 }, { "epoch": 2.3959044368600684, "grad_norm": 2.7703819274902344, "learning_rate": 0.0002013651877133106, "loss": 6.1054, "step": 7020 }, { "epoch": 2.396245733788396, "grad_norm": 2.840043306350708, "learning_rate": 0.00020125142207053472, "loss": 6.2688, "step": 7021 }, { "epoch": 2.3965870307167236, "grad_norm": 2.789001226425171, "learning_rate": 0.00020113765642775882, "loss": 6.1622, "step": 7022 }, { "epoch": 2.396928327645051, "grad_norm": 2.6123547554016113, "learning_rate": 0.00020102389078498293, "loss": 5.9342, "step": 7023 }, { "epoch": 2.397269624573379, "grad_norm": 2.793732166290283, "learning_rate": 0.00020091012514220706, "loss": 5.487, "step": 7024 }, { "epoch": 2.3976109215017063, "grad_norm": 2.722079277038574, "learning_rate": 0.00020079635949943117, "loss": 6.1717, "step": 7025 }, { "epoch": 2.397952218430034, "grad_norm": 2.798908233642578, "learning_rate": 0.0002006825938566553, "loss": 6.3019, "step": 7026 }, { "epoch": 2.398293515358362, "grad_norm": 2.8467278480529785, "learning_rate": 0.00020056882821387943, "loss": 5.7511, "step": 7027 }, { "epoch": 2.3986348122866894, "grad_norm": 2.7143468856811523, "learning_rate": 0.0002004550625711035, "loss": 5.4081, "step": 7028 }, { "epoch": 2.3989761092150172, "grad_norm": 2.916228771209717, "learning_rate": 0.00020034129692832764, "loss": 5.913, "step": 7029 }, { "epoch": 2.3993174061433447, "grad_norm": 2.752556562423706, "learning_rate": 0.00020022753128555177, "loss": 6.2246, "step": 7030 }, { "epoch": 2.3996587030716725, "grad_norm": 2.748680591583252, "learning_rate": 0.0002001137656427759, "loss": 6.3106, "step": 7031 }, { "epoch": 2.4, "grad_norm": 2.7810096740722656, "learning_rate": 0.0002, "loss": 6.5981, "step": 7032 }, { "epoch": 2.4003412969283278, "grad_norm": 2.9772250652313232, "learning_rate": 0.00019988623435722411, "loss": 4.0665, "step": 7033 }, { "epoch": 2.400682593856655, "grad_norm": 2.8036811351776123, "learning_rate": 0.00019977246871444825, "loss": 6.0835, "step": 7034 }, { "epoch": 2.401023890784983, "grad_norm": 2.714700698852539, "learning_rate": 0.00019965870307167235, "loss": 6.4391, "step": 7035 }, { "epoch": 2.4013651877133104, "grad_norm": 2.8477370738983154, "learning_rate": 0.00019954493742889648, "loss": 5.4793, "step": 7036 }, { "epoch": 2.4017064846416383, "grad_norm": 2.8073480129241943, "learning_rate": 0.00019943117178612062, "loss": 5.7709, "step": 7037 }, { "epoch": 2.4020477815699657, "grad_norm": 2.8629024028778076, "learning_rate": 0.0001993174061433447, "loss": 5.7823, "step": 7038 }, { "epoch": 2.4023890784982935, "grad_norm": 2.7898664474487305, "learning_rate": 0.00019920364050056883, "loss": 6.2151, "step": 7039 }, { "epoch": 2.4027303754266214, "grad_norm": 3.052391290664673, "learning_rate": 0.00019908987485779296, "loss": 4.0887, "step": 7040 }, { "epoch": 2.403071672354949, "grad_norm": 2.876765489578247, "learning_rate": 0.00019897610921501706, "loss": 5.6112, "step": 7041 }, { "epoch": 2.4034129692832766, "grad_norm": 2.745173931121826, "learning_rate": 0.0001988623435722412, "loss": 6.1771, "step": 7042 }, { "epoch": 2.403754266211604, "grad_norm": 2.908230781555176, "learning_rate": 0.0001987485779294653, "loss": 5.1346, "step": 7043 }, { "epoch": 2.404095563139932, "grad_norm": 2.827298402786255, "learning_rate": 0.00019863481228668943, "loss": 6.1908, "step": 7044 }, { "epoch": 2.4044368600682593, "grad_norm": 2.6762778759002686, "learning_rate": 0.00019852104664391354, "loss": 6.1944, "step": 7045 }, { "epoch": 2.404778156996587, "grad_norm": 2.9926464557647705, "learning_rate": 0.00019840728100113767, "loss": 4.689, "step": 7046 }, { "epoch": 2.4051194539249146, "grad_norm": 2.697849750518799, "learning_rate": 0.0001982935153583618, "loss": 6.0584, "step": 7047 }, { "epoch": 2.4054607508532424, "grad_norm": 3.2848122119903564, "learning_rate": 0.00019817974971558588, "loss": 5.2301, "step": 7048 }, { "epoch": 2.40580204778157, "grad_norm": 3.0285191535949707, "learning_rate": 0.00019806598407281, "loss": 4.5672, "step": 7049 }, { "epoch": 2.4061433447098977, "grad_norm": 2.768855094909668, "learning_rate": 0.00019795221843003414, "loss": 6.4452, "step": 7050 }, { "epoch": 2.406484641638225, "grad_norm": 2.6719319820404053, "learning_rate": 0.00019783845278725825, "loss": 6.2495, "step": 7051 }, { "epoch": 2.406825938566553, "grad_norm": 2.812721014022827, "learning_rate": 0.00019772468714448235, "loss": 5.9707, "step": 7052 }, { "epoch": 2.4071672354948808, "grad_norm": 3.698657751083374, "learning_rate": 0.00019761092150170648, "loss": 5.3361, "step": 7053 }, { "epoch": 2.407508532423208, "grad_norm": 2.74291729927063, "learning_rate": 0.00019749715585893062, "loss": 6.0255, "step": 7054 }, { "epoch": 2.407849829351536, "grad_norm": 2.770951986312866, "learning_rate": 0.00019738339021615472, "loss": 6.6123, "step": 7055 }, { "epoch": 2.4081911262798634, "grad_norm": 2.8375635147094727, "learning_rate": 0.00019726962457337885, "loss": 6.4971, "step": 7056 }, { "epoch": 2.4085324232081913, "grad_norm": 2.986063003540039, "learning_rate": 0.00019715585893060296, "loss": 5.8929, "step": 7057 }, { "epoch": 2.4088737201365187, "grad_norm": 4.404201030731201, "learning_rate": 0.00019704209328782706, "loss": 4.4985, "step": 7058 }, { "epoch": 2.4092150170648465, "grad_norm": 2.6988513469696045, "learning_rate": 0.0001969283276450512, "loss": 5.791, "step": 7059 }, { "epoch": 2.409556313993174, "grad_norm": 2.7154700756073, "learning_rate": 0.00019681456200227533, "loss": 6.4737, "step": 7060 }, { "epoch": 2.409897610921502, "grad_norm": 2.815117597579956, "learning_rate": 0.00019670079635949943, "loss": 6.0718, "step": 7061 }, { "epoch": 2.410238907849829, "grad_norm": 2.831260919570923, "learning_rate": 0.00019658703071672354, "loss": 5.3611, "step": 7062 }, { "epoch": 2.410580204778157, "grad_norm": 1.9146329164505005, "learning_rate": 0.00019647326507394767, "loss": 2.7539, "step": 7063 }, { "epoch": 2.4109215017064844, "grad_norm": 2.497642755508423, "learning_rate": 0.0001963594994311718, "loss": 5.5512, "step": 7064 }, { "epoch": 2.4112627986348123, "grad_norm": 2.6266098022460938, "learning_rate": 0.0001962457337883959, "loss": 5.7939, "step": 7065 }, { "epoch": 2.41160409556314, "grad_norm": 2.7678215503692627, "learning_rate": 0.00019613196814562004, "loss": 6.1091, "step": 7066 }, { "epoch": 2.4119453924914676, "grad_norm": 3.060725212097168, "learning_rate": 0.00019601820250284414, "loss": 6.1383, "step": 7067 }, { "epoch": 2.4122866894197954, "grad_norm": 2.825890302658081, "learning_rate": 0.00019590443686006825, "loss": 6.1728, "step": 7068 }, { "epoch": 2.412627986348123, "grad_norm": 2.731437921524048, "learning_rate": 0.00019579067121729238, "loss": 6.2054, "step": 7069 }, { "epoch": 2.4129692832764507, "grad_norm": 2.756383180618286, "learning_rate": 0.0001956769055745165, "loss": 5.4519, "step": 7070 }, { "epoch": 2.413310580204778, "grad_norm": 2.7772936820983887, "learning_rate": 0.00019556313993174062, "loss": 5.739, "step": 7071 }, { "epoch": 2.413651877133106, "grad_norm": 2.736973285675049, "learning_rate": 0.00019544937428896472, "loss": 5.8474, "step": 7072 }, { "epoch": 2.4139931740614333, "grad_norm": 2.7424445152282715, "learning_rate": 0.00019533560864618885, "loss": 6.2036, "step": 7073 }, { "epoch": 2.414334470989761, "grad_norm": 2.801053285598755, "learning_rate": 0.00019522184300341299, "loss": 6.0991, "step": 7074 }, { "epoch": 2.4146757679180886, "grad_norm": 3.4473876953125, "learning_rate": 0.0001951080773606371, "loss": 5.6659, "step": 7075 }, { "epoch": 2.4150170648464164, "grad_norm": 2.782585620880127, "learning_rate": 0.00019499431171786122, "loss": 5.653, "step": 7076 }, { "epoch": 2.415358361774744, "grad_norm": 2.6526317596435547, "learning_rate": 0.00019488054607508533, "loss": 5.88, "step": 7077 }, { "epoch": 2.4156996587030717, "grad_norm": 2.770357608795166, "learning_rate": 0.00019476678043230943, "loss": 5.7505, "step": 7078 }, { "epoch": 2.4160409556313995, "grad_norm": 4.893315315246582, "learning_rate": 0.00019465301478953357, "loss": 4.9911, "step": 7079 }, { "epoch": 2.416382252559727, "grad_norm": 2.6807408332824707, "learning_rate": 0.0001945392491467577, "loss": 6.1131, "step": 7080 }, { "epoch": 2.416723549488055, "grad_norm": 2.761153221130371, "learning_rate": 0.0001944254835039818, "loss": 5.9863, "step": 7081 }, { "epoch": 2.417064846416382, "grad_norm": 2.669250965118408, "learning_rate": 0.0001943117178612059, "loss": 5.7527, "step": 7082 }, { "epoch": 2.41740614334471, "grad_norm": 2.7944650650024414, "learning_rate": 0.00019419795221843004, "loss": 6.0305, "step": 7083 }, { "epoch": 2.4177474402730375, "grad_norm": 2.851085662841797, "learning_rate": 0.00019408418657565417, "loss": 5.8564, "step": 7084 }, { "epoch": 2.4180887372013653, "grad_norm": 2.7912724018096924, "learning_rate": 0.00019397042093287828, "loss": 6.6317, "step": 7085 }, { "epoch": 2.4184300341296927, "grad_norm": 2.802370309829712, "learning_rate": 0.00019385665529010238, "loss": 6.2561, "step": 7086 }, { "epoch": 2.4187713310580206, "grad_norm": 2.675826072692871, "learning_rate": 0.0001937428896473265, "loss": 6.4923, "step": 7087 }, { "epoch": 2.419112627986348, "grad_norm": 2.732436418533325, "learning_rate": 0.00019362912400455062, "loss": 5.8842, "step": 7088 }, { "epoch": 2.419453924914676, "grad_norm": 2.6516783237457275, "learning_rate": 0.00019351535836177475, "loss": 6.1319, "step": 7089 }, { "epoch": 2.419795221843003, "grad_norm": 2.7802767753601074, "learning_rate": 0.00019340159271899888, "loss": 6.2859, "step": 7090 }, { "epoch": 2.420136518771331, "grad_norm": 2.645561695098877, "learning_rate": 0.00019328782707622296, "loss": 6.2186, "step": 7091 }, { "epoch": 2.420477815699659, "grad_norm": 2.762645721435547, "learning_rate": 0.0001931740614334471, "loss": 5.679, "step": 7092 }, { "epoch": 2.4208191126279863, "grad_norm": 2.7711853981018066, "learning_rate": 0.00019306029579067122, "loss": 6.269, "step": 7093 }, { "epoch": 2.421160409556314, "grad_norm": 2.817148208618164, "learning_rate": 0.00019294653014789536, "loss": 5.9867, "step": 7094 }, { "epoch": 2.4215017064846416, "grad_norm": 3.189115285873413, "learning_rate": 0.00019283276450511946, "loss": 5.4579, "step": 7095 }, { "epoch": 2.4218430034129694, "grad_norm": 2.7254624366760254, "learning_rate": 0.00019271899886234357, "loss": 6.1964, "step": 7096 }, { "epoch": 2.422184300341297, "grad_norm": 2.8384273052215576, "learning_rate": 0.0001926052332195677, "loss": 5.0824, "step": 7097 }, { "epoch": 2.4225255972696247, "grad_norm": 2.7569327354431152, "learning_rate": 0.0001924914675767918, "loss": 5.8933, "step": 7098 }, { "epoch": 2.422866894197952, "grad_norm": 2.6978225708007812, "learning_rate": 0.00019237770193401594, "loss": 4.7832, "step": 7099 }, { "epoch": 2.42320819112628, "grad_norm": 2.7177062034606934, "learning_rate": 0.00019226393629124007, "loss": 5.4131, "step": 7100 }, { "epoch": 2.4235494880546073, "grad_norm": 2.7541306018829346, "learning_rate": 0.00019215017064846415, "loss": 6.3098, "step": 7101 }, { "epoch": 2.423890784982935, "grad_norm": 2.7490100860595703, "learning_rate": 0.00019203640500568828, "loss": 6.5531, "step": 7102 }, { "epoch": 2.4242320819112626, "grad_norm": 2.74153208732605, "learning_rate": 0.0001919226393629124, "loss": 6.1171, "step": 7103 }, { "epoch": 2.4245733788395905, "grad_norm": 2.7653117179870605, "learning_rate": 0.00019180887372013654, "loss": 5.8081, "step": 7104 }, { "epoch": 2.4249146757679183, "grad_norm": 2.7591192722320557, "learning_rate": 0.00019169510807736065, "loss": 5.5008, "step": 7105 }, { "epoch": 2.4252559726962457, "grad_norm": 2.5488815307617188, "learning_rate": 0.00019158134243458475, "loss": 6.4281, "step": 7106 }, { "epoch": 2.4255972696245736, "grad_norm": 2.22991943359375, "learning_rate": 0.00019146757679180888, "loss": 2.1561, "step": 7107 }, { "epoch": 2.425938566552901, "grad_norm": 3.231452465057373, "learning_rate": 0.000191353811149033, "loss": 5.4485, "step": 7108 }, { "epoch": 2.426279863481229, "grad_norm": 3.1210076808929443, "learning_rate": 0.00019124004550625712, "loss": 4.5784, "step": 7109 }, { "epoch": 2.426621160409556, "grad_norm": 2.622694253921509, "learning_rate": 0.00019112627986348125, "loss": 5.6532, "step": 7110 }, { "epoch": 2.426962457337884, "grad_norm": 2.8294200897216797, "learning_rate": 0.00019101251422070533, "loss": 6.0965, "step": 7111 }, { "epoch": 2.4273037542662115, "grad_norm": 2.737205982208252, "learning_rate": 0.00019089874857792946, "loss": 6.3284, "step": 7112 }, { "epoch": 2.4276450511945393, "grad_norm": 2.6656839847564697, "learning_rate": 0.0001907849829351536, "loss": 6.3298, "step": 7113 }, { "epoch": 2.4279863481228667, "grad_norm": 2.7031219005584717, "learning_rate": 0.00019067121729237773, "loss": 6.1422, "step": 7114 }, { "epoch": 2.4283276450511946, "grad_norm": 2.792492389678955, "learning_rate": 0.00019055745164960183, "loss": 6.1246, "step": 7115 }, { "epoch": 2.428668941979522, "grad_norm": 2.8730626106262207, "learning_rate": 0.00019044368600682594, "loss": 5.2022, "step": 7116 }, { "epoch": 2.42901023890785, "grad_norm": 2.832547903060913, "learning_rate": 0.00019032992036405007, "loss": 5.8241, "step": 7117 }, { "epoch": 2.4293515358361777, "grad_norm": 2.7533719539642334, "learning_rate": 0.00019021615472127417, "loss": 6.1243, "step": 7118 }, { "epoch": 2.429692832764505, "grad_norm": 2.7654356956481934, "learning_rate": 0.0001901023890784983, "loss": 6.5303, "step": 7119 }, { "epoch": 2.430034129692833, "grad_norm": 2.7612063884735107, "learning_rate": 0.0001899886234357224, "loss": 6.8023, "step": 7120 }, { "epoch": 2.4303754266211604, "grad_norm": 2.8074557781219482, "learning_rate": 0.00018987485779294652, "loss": 5.4001, "step": 7121 }, { "epoch": 2.430716723549488, "grad_norm": 2.6893937587738037, "learning_rate": 0.00018976109215017065, "loss": 5.6546, "step": 7122 }, { "epoch": 2.4310580204778156, "grad_norm": 2.6178319454193115, "learning_rate": 0.00018964732650739478, "loss": 6.3508, "step": 7123 }, { "epoch": 2.4313993174061435, "grad_norm": 2.972472906112671, "learning_rate": 0.0001895335608646189, "loss": 5.156, "step": 7124 }, { "epoch": 2.431740614334471, "grad_norm": 2.7686290740966797, "learning_rate": 0.000189419795221843, "loss": 6.5945, "step": 7125 }, { "epoch": 2.4320819112627987, "grad_norm": 2.8814847469329834, "learning_rate": 0.00018930602957906712, "loss": 5.6923, "step": 7126 }, { "epoch": 2.432423208191126, "grad_norm": 1.9587899446487427, "learning_rate": 0.00018919226393629125, "loss": 3.0517, "step": 7127 }, { "epoch": 2.432764505119454, "grad_norm": 2.7158172130584717, "learning_rate": 0.00018907849829351536, "loss": 5.734, "step": 7128 }, { "epoch": 2.4331058020477814, "grad_norm": 2.719918727874756, "learning_rate": 0.0001889647326507395, "loss": 6.0725, "step": 7129 }, { "epoch": 2.4334470989761092, "grad_norm": 3.0201733112335205, "learning_rate": 0.0001888509670079636, "loss": 5.2013, "step": 7130 }, { "epoch": 2.433788395904437, "grad_norm": 2.701390266418457, "learning_rate": 0.0001887372013651877, "loss": 5.9561, "step": 7131 }, { "epoch": 2.4341296928327645, "grad_norm": 2.7162129878997803, "learning_rate": 0.00018862343572241183, "loss": 5.5845, "step": 7132 }, { "epoch": 2.4344709897610923, "grad_norm": 2.755629777908325, "learning_rate": 0.00018850967007963596, "loss": 5.9108, "step": 7133 }, { "epoch": 2.4348122866894197, "grad_norm": 2.744990110397339, "learning_rate": 0.0001883959044368601, "loss": 6.0868, "step": 7134 }, { "epoch": 2.4351535836177476, "grad_norm": 2.6877083778381348, "learning_rate": 0.00018828213879408417, "loss": 6.2003, "step": 7135 }, { "epoch": 2.435494880546075, "grad_norm": 2.8432061672210693, "learning_rate": 0.0001881683731513083, "loss": 4.8051, "step": 7136 }, { "epoch": 2.435836177474403, "grad_norm": 2.6555399894714355, "learning_rate": 0.00018805460750853244, "loss": 6.0686, "step": 7137 }, { "epoch": 2.4361774744027302, "grad_norm": 2.7505736351013184, "learning_rate": 0.00018794084186575654, "loss": 6.1821, "step": 7138 }, { "epoch": 2.436518771331058, "grad_norm": 2.7092573642730713, "learning_rate": 0.00018782707622298068, "loss": 6.1421, "step": 7139 }, { "epoch": 2.4368600682593855, "grad_norm": 2.7411575317382812, "learning_rate": 0.00018771331058020478, "loss": 5.4851, "step": 7140 }, { "epoch": 2.4372013651877134, "grad_norm": 2.853494882583618, "learning_rate": 0.00018759954493742888, "loss": 5.2069, "step": 7141 }, { "epoch": 2.4375426621160408, "grad_norm": 2.8510758876800537, "learning_rate": 0.00018748577929465302, "loss": 5.1078, "step": 7142 }, { "epoch": 2.4378839590443686, "grad_norm": 2.6476118564605713, "learning_rate": 0.00018737201365187715, "loss": 5.4156, "step": 7143 }, { "epoch": 2.4382252559726965, "grad_norm": 2.692403793334961, "learning_rate": 0.00018725824800910125, "loss": 5.7651, "step": 7144 }, { "epoch": 2.438566552901024, "grad_norm": 3.758758783340454, "learning_rate": 0.00018714448236632536, "loss": 5.061, "step": 7145 }, { "epoch": 2.4389078498293517, "grad_norm": 2.8809521198272705, "learning_rate": 0.0001870307167235495, "loss": 5.8695, "step": 7146 }, { "epoch": 2.439249146757679, "grad_norm": 3.381040573120117, "learning_rate": 0.00018691695108077362, "loss": 3.9914, "step": 7147 }, { "epoch": 2.439590443686007, "grad_norm": 2.7137439250946045, "learning_rate": 0.00018680318543799773, "loss": 6.4293, "step": 7148 }, { "epoch": 2.4399317406143344, "grad_norm": 2.7312674522399902, "learning_rate": 0.00018668941979522186, "loss": 5.4181, "step": 7149 }, { "epoch": 2.4402730375426622, "grad_norm": 2.8323845863342285, "learning_rate": 0.00018657565415244596, "loss": 6.078, "step": 7150 }, { "epoch": 2.4406143344709896, "grad_norm": 2.6376965045928955, "learning_rate": 0.00018646188850967007, "loss": 5.5007, "step": 7151 }, { "epoch": 2.4409556313993175, "grad_norm": 2.6789960861206055, "learning_rate": 0.0001863481228668942, "loss": 5.7487, "step": 7152 }, { "epoch": 2.441296928327645, "grad_norm": 2.624680995941162, "learning_rate": 0.00018623435722411833, "loss": 6.5413, "step": 7153 }, { "epoch": 2.4416382252559727, "grad_norm": 2.9074723720550537, "learning_rate": 0.00018612059158134244, "loss": 5.3683, "step": 7154 }, { "epoch": 2.4419795221843, "grad_norm": 2.71537446975708, "learning_rate": 0.00018600682593856654, "loss": 5.7032, "step": 7155 }, { "epoch": 2.442320819112628, "grad_norm": 3.114678144454956, "learning_rate": 0.00018589306029579068, "loss": 5.9269, "step": 7156 }, { "epoch": 2.442662116040956, "grad_norm": 2.7557051181793213, "learning_rate": 0.0001857792946530148, "loss": 6.3845, "step": 7157 }, { "epoch": 2.4430034129692833, "grad_norm": 2.7388687133789062, "learning_rate": 0.0001856655290102389, "loss": 6.0723, "step": 7158 }, { "epoch": 2.443344709897611, "grad_norm": 2.7414541244506836, "learning_rate": 0.00018555176336746302, "loss": 6.4165, "step": 7159 }, { "epoch": 2.4436860068259385, "grad_norm": 2.7356631755828857, "learning_rate": 0.00018543799772468715, "loss": 6.1855, "step": 7160 }, { "epoch": 2.4440273037542664, "grad_norm": 2.845184564590454, "learning_rate": 0.00018532423208191125, "loss": 5.8645, "step": 7161 }, { "epoch": 2.4443686006825938, "grad_norm": 2.6517839431762695, "learning_rate": 0.0001852104664391354, "loss": 6.2252, "step": 7162 }, { "epoch": 2.4447098976109216, "grad_norm": 2.7228269577026367, "learning_rate": 0.00018509670079635952, "loss": 6.3114, "step": 7163 }, { "epoch": 2.445051194539249, "grad_norm": 2.680921792984009, "learning_rate": 0.0001849829351535836, "loss": 6.3173, "step": 7164 }, { "epoch": 2.445392491467577, "grad_norm": 2.817765235900879, "learning_rate": 0.00018486916951080773, "loss": 6.0299, "step": 7165 }, { "epoch": 2.4457337883959043, "grad_norm": 4.926273822784424, "learning_rate": 0.00018475540386803186, "loss": 5.256, "step": 7166 }, { "epoch": 2.446075085324232, "grad_norm": 2.7189366817474365, "learning_rate": 0.000184641638225256, "loss": 5.6313, "step": 7167 }, { "epoch": 2.4464163822525595, "grad_norm": 6.03790807723999, "learning_rate": 0.0001845278725824801, "loss": 4.8653, "step": 7168 }, { "epoch": 2.4467576791808874, "grad_norm": 2.8209898471832275, "learning_rate": 0.0001844141069397042, "loss": 6.1202, "step": 7169 }, { "epoch": 2.4470989761092152, "grad_norm": 2.895895004272461, "learning_rate": 0.00018430034129692833, "loss": 5.8068, "step": 7170 }, { "epoch": 2.4474402730375426, "grad_norm": 2.669280529022217, "learning_rate": 0.00018418657565415244, "loss": 5.6466, "step": 7171 }, { "epoch": 2.4477815699658705, "grad_norm": 2.70436429977417, "learning_rate": 0.00018407281001137657, "loss": 5.8402, "step": 7172 }, { "epoch": 2.448122866894198, "grad_norm": 2.793987989425659, "learning_rate": 0.0001839590443686007, "loss": 6.507, "step": 7173 }, { "epoch": 2.4484641638225257, "grad_norm": 2.6550915241241455, "learning_rate": 0.00018384527872582478, "loss": 5.8959, "step": 7174 }, { "epoch": 2.448805460750853, "grad_norm": 2.6808414459228516, "learning_rate": 0.00018373151308304891, "loss": 6.6288, "step": 7175 }, { "epoch": 2.449146757679181, "grad_norm": 2.6597981452941895, "learning_rate": 0.00018361774744027305, "loss": 5.4804, "step": 7176 }, { "epoch": 2.4494880546075084, "grad_norm": 2.7056760787963867, "learning_rate": 0.00018350398179749718, "loss": 6.5242, "step": 7177 }, { "epoch": 2.4498293515358363, "grad_norm": 2.821537733078003, "learning_rate": 0.00018339021615472128, "loss": 5.9898, "step": 7178 }, { "epoch": 2.4501706484641637, "grad_norm": 4.618656635284424, "learning_rate": 0.0001832764505119454, "loss": 5.2683, "step": 7179 }, { "epoch": 2.4505119453924915, "grad_norm": 2.7883718013763428, "learning_rate": 0.00018316268486916952, "loss": 6.4302, "step": 7180 }, { "epoch": 2.450853242320819, "grad_norm": 2.782613515853882, "learning_rate": 0.00018304891922639362, "loss": 6.0204, "step": 7181 }, { "epoch": 2.4511945392491468, "grad_norm": 3.4366047382354736, "learning_rate": 0.00018293515358361776, "loss": 4.9788, "step": 7182 }, { "epoch": 2.4515358361774746, "grad_norm": 2.758683681488037, "learning_rate": 0.0001828213879408419, "loss": 6.1169, "step": 7183 }, { "epoch": 2.451877133105802, "grad_norm": 2.7406606674194336, "learning_rate": 0.00018270762229806597, "loss": 6.1129, "step": 7184 }, { "epoch": 2.45221843003413, "grad_norm": 2.7950448989868164, "learning_rate": 0.0001825938566552901, "loss": 6.2123, "step": 7185 }, { "epoch": 2.4525597269624573, "grad_norm": 2.6972243785858154, "learning_rate": 0.00018248009101251423, "loss": 5.9016, "step": 7186 }, { "epoch": 2.452901023890785, "grad_norm": 2.7879507541656494, "learning_rate": 0.00018236632536973836, "loss": 6.3663, "step": 7187 }, { "epoch": 2.4532423208191125, "grad_norm": 2.7534303665161133, "learning_rate": 0.00018225255972696247, "loss": 6.1968, "step": 7188 }, { "epoch": 2.4535836177474404, "grad_norm": 2.755431652069092, "learning_rate": 0.00018213879408418657, "loss": 5.782, "step": 7189 }, { "epoch": 2.453924914675768, "grad_norm": 2.6141881942749023, "learning_rate": 0.0001820250284414107, "loss": 5.532, "step": 7190 }, { "epoch": 2.4542662116040956, "grad_norm": 3.2307145595550537, "learning_rate": 0.0001819112627986348, "loss": 5.0953, "step": 7191 }, { "epoch": 2.454607508532423, "grad_norm": 2.6708171367645264, "learning_rate": 0.00018179749715585894, "loss": 6.0323, "step": 7192 }, { "epoch": 2.454948805460751, "grad_norm": 2.6066534519195557, "learning_rate": 0.00018168373151308305, "loss": 6.0203, "step": 7193 }, { "epoch": 2.4552901023890783, "grad_norm": 2.7354557514190674, "learning_rate": 0.00018156996587030715, "loss": 6.1075, "step": 7194 }, { "epoch": 2.455631399317406, "grad_norm": 2.2138519287109375, "learning_rate": 0.00018145620022753128, "loss": 4.2661, "step": 7195 }, { "epoch": 2.455972696245734, "grad_norm": 2.87853741645813, "learning_rate": 0.00018134243458475542, "loss": 6.0552, "step": 7196 }, { "epoch": 2.4563139931740614, "grad_norm": 2.7033181190490723, "learning_rate": 0.00018122866894197955, "loss": 6.5583, "step": 7197 }, { "epoch": 2.4566552901023893, "grad_norm": 2.6118319034576416, "learning_rate": 0.00018111490329920363, "loss": 6.1334, "step": 7198 }, { "epoch": 2.4569965870307167, "grad_norm": 2.641845941543579, "learning_rate": 0.00018100113765642776, "loss": 6.1401, "step": 7199 }, { "epoch": 2.4573378839590445, "grad_norm": 2.8019094467163086, "learning_rate": 0.0001808873720136519, "loss": 5.518, "step": 7200 }, { "epoch": 2.457679180887372, "grad_norm": 4.355568885803223, "learning_rate": 0.000180773606370876, "loss": 4.7298, "step": 7201 }, { "epoch": 2.4580204778156998, "grad_norm": 3.0501973628997803, "learning_rate": 0.00018065984072810013, "loss": 5.1193, "step": 7202 }, { "epoch": 2.458361774744027, "grad_norm": 2.716735363006592, "learning_rate": 0.00018054607508532423, "loss": 5.868, "step": 7203 }, { "epoch": 2.458703071672355, "grad_norm": 2.653223991394043, "learning_rate": 0.00018043230944254834, "loss": 5.8835, "step": 7204 }, { "epoch": 2.4590443686006824, "grad_norm": 2.7722156047821045, "learning_rate": 0.00018031854379977247, "loss": 6.1643, "step": 7205 }, { "epoch": 2.4593856655290103, "grad_norm": 4.1703619956970215, "learning_rate": 0.0001802047781569966, "loss": 5.0742, "step": 7206 }, { "epoch": 2.4597269624573377, "grad_norm": 2.799798011779785, "learning_rate": 0.00018009101251422073, "loss": 5.3693, "step": 7207 }, { "epoch": 2.4600682593856655, "grad_norm": 2.693559169769287, "learning_rate": 0.0001799772468714448, "loss": 6.1607, "step": 7208 }, { "epoch": 2.4604095563139934, "grad_norm": 2.786590576171875, "learning_rate": 0.00017986348122866894, "loss": 6.3078, "step": 7209 }, { "epoch": 2.460750853242321, "grad_norm": 6.34663200378418, "learning_rate": 0.00017974971558589307, "loss": 3.7846, "step": 7210 }, { "epoch": 2.4610921501706486, "grad_norm": 2.779407024383545, "learning_rate": 0.00017963594994311718, "loss": 6.0981, "step": 7211 }, { "epoch": 2.461433447098976, "grad_norm": 2.8530948162078857, "learning_rate": 0.0001795221843003413, "loss": 6.5747, "step": 7212 }, { "epoch": 2.461774744027304, "grad_norm": 2.747483491897583, "learning_rate": 0.00017940841865756542, "loss": 6.037, "step": 7213 }, { "epoch": 2.4621160409556313, "grad_norm": 2.689389228820801, "learning_rate": 0.00017929465301478952, "loss": 5.8297, "step": 7214 }, { "epoch": 2.462457337883959, "grad_norm": 2.734438419342041, "learning_rate": 0.00017918088737201365, "loss": 6.2479, "step": 7215 }, { "epoch": 2.4627986348122866, "grad_norm": 5.3711466789245605, "learning_rate": 0.00017906712172923779, "loss": 4.8514, "step": 7216 }, { "epoch": 2.4631399317406144, "grad_norm": 2.606839418411255, "learning_rate": 0.00017895335608646192, "loss": 6.2493, "step": 7217 }, { "epoch": 2.463481228668942, "grad_norm": 2.6620965003967285, "learning_rate": 0.000178839590443686, "loss": 6.1907, "step": 7218 }, { "epoch": 2.4638225255972697, "grad_norm": 2.8028299808502197, "learning_rate": 0.00017872582480091013, "loss": 6.3521, "step": 7219 }, { "epoch": 2.464163822525597, "grad_norm": 2.6073973178863525, "learning_rate": 0.00017861205915813426, "loss": 5.6279, "step": 7220 }, { "epoch": 2.464505119453925, "grad_norm": 3.001814603805542, "learning_rate": 0.00017849829351535836, "loss": 4.9883, "step": 7221 }, { "epoch": 2.4648464163822528, "grad_norm": 2.9250714778900146, "learning_rate": 0.0001783845278725825, "loss": 5.5733, "step": 7222 }, { "epoch": 2.46518771331058, "grad_norm": 2.743499994277954, "learning_rate": 0.0001782707622298066, "loss": 6.2804, "step": 7223 }, { "epoch": 2.465529010238908, "grad_norm": 2.7742860317230225, "learning_rate": 0.0001781569965870307, "loss": 6.0203, "step": 7224 }, { "epoch": 2.4658703071672354, "grad_norm": 2.818325996398926, "learning_rate": 0.00017804323094425484, "loss": 5.6781, "step": 7225 }, { "epoch": 2.4662116040955633, "grad_norm": 2.795591115951538, "learning_rate": 0.00017792946530147897, "loss": 5.602, "step": 7226 }, { "epoch": 2.4665529010238907, "grad_norm": 2.807055950164795, "learning_rate": 0.00017781569965870305, "loss": 5.1593, "step": 7227 }, { "epoch": 2.4668941979522185, "grad_norm": 2.7123217582702637, "learning_rate": 0.00017770193401592718, "loss": 6.4722, "step": 7228 }, { "epoch": 2.467235494880546, "grad_norm": 2.759861469268799, "learning_rate": 0.0001775881683731513, "loss": 5.4875, "step": 7229 }, { "epoch": 2.467576791808874, "grad_norm": 2.78224515914917, "learning_rate": 0.00017747440273037544, "loss": 5.79, "step": 7230 }, { "epoch": 2.467918088737201, "grad_norm": 2.7341177463531494, "learning_rate": 0.00017736063708759955, "loss": 5.9504, "step": 7231 }, { "epoch": 2.468259385665529, "grad_norm": 2.6890923976898193, "learning_rate": 0.00017724687144482365, "loss": 6.5827, "step": 7232 }, { "epoch": 2.4686006825938565, "grad_norm": 2.8176991939544678, "learning_rate": 0.0001771331058020478, "loss": 5.8202, "step": 7233 }, { "epoch": 2.4689419795221843, "grad_norm": 2.746105670928955, "learning_rate": 0.0001770193401592719, "loss": 6.5549, "step": 7234 }, { "epoch": 2.469283276450512, "grad_norm": 2.7601068019866943, "learning_rate": 0.00017690557451649602, "loss": 5.8594, "step": 7235 }, { "epoch": 2.4696245733788396, "grad_norm": 2.550632953643799, "learning_rate": 0.00017679180887372016, "loss": 4.9317, "step": 7236 }, { "epoch": 2.4699658703071674, "grad_norm": 2.6904959678649902, "learning_rate": 0.00017667804323094423, "loss": 6.5549, "step": 7237 }, { "epoch": 2.470307167235495, "grad_norm": 2.70029616355896, "learning_rate": 0.00017656427758816837, "loss": 5.7531, "step": 7238 }, { "epoch": 2.4706484641638227, "grad_norm": 2.799412250518799, "learning_rate": 0.0001764505119453925, "loss": 5.6873, "step": 7239 }, { "epoch": 2.47098976109215, "grad_norm": 2.8735549449920654, "learning_rate": 0.00017633674630261663, "loss": 4.2249, "step": 7240 }, { "epoch": 2.471331058020478, "grad_norm": 2.010611057281494, "learning_rate": 0.00017622298065984073, "loss": 2.7766, "step": 7241 }, { "epoch": 2.4716723549488053, "grad_norm": 2.699479579925537, "learning_rate": 0.00017610921501706484, "loss": 5.8529, "step": 7242 }, { "epoch": 2.472013651877133, "grad_norm": 2.7273783683776855, "learning_rate": 0.00017599544937428897, "loss": 6.4994, "step": 7243 }, { "epoch": 2.4723549488054606, "grad_norm": 2.768795967102051, "learning_rate": 0.00017588168373151308, "loss": 6.4167, "step": 7244 }, { "epoch": 2.4726962457337884, "grad_norm": 2.7882094383239746, "learning_rate": 0.0001757679180887372, "loss": 5.8934, "step": 7245 }, { "epoch": 2.473037542662116, "grad_norm": 2.8088643550872803, "learning_rate": 0.00017565415244596134, "loss": 6.0832, "step": 7246 }, { "epoch": 2.4733788395904437, "grad_norm": 2.7583882808685303, "learning_rate": 0.00017554038680318542, "loss": 6.4278, "step": 7247 }, { "epoch": 2.4737201365187715, "grad_norm": 2.710397720336914, "learning_rate": 0.00017542662116040955, "loss": 5.9064, "step": 7248 }, { "epoch": 2.474061433447099, "grad_norm": 3.8351919651031494, "learning_rate": 0.00017531285551763368, "loss": 5.1969, "step": 7249 }, { "epoch": 2.474402730375427, "grad_norm": 2.7258529663085938, "learning_rate": 0.00017519908987485781, "loss": 5.6392, "step": 7250 }, { "epoch": 2.474744027303754, "grad_norm": 2.758955717086792, "learning_rate": 0.00017508532423208192, "loss": 6.3959, "step": 7251 }, { "epoch": 2.475085324232082, "grad_norm": 2.727679967880249, "learning_rate": 0.00017497155858930602, "loss": 5.689, "step": 7252 }, { "epoch": 2.4754266211604095, "grad_norm": 2.7123825550079346, "learning_rate": 0.00017485779294653016, "loss": 6.4214, "step": 7253 }, { "epoch": 2.4757679180887373, "grad_norm": 2.7021186351776123, "learning_rate": 0.00017474402730375426, "loss": 5.8807, "step": 7254 }, { "epoch": 2.4761092150170647, "grad_norm": 2.6904120445251465, "learning_rate": 0.0001746302616609784, "loss": 6.3117, "step": 7255 }, { "epoch": 2.4764505119453926, "grad_norm": 2.68574595451355, "learning_rate": 0.00017451649601820253, "loss": 6.2527, "step": 7256 }, { "epoch": 2.47679180887372, "grad_norm": 2.733830213546753, "learning_rate": 0.0001744027303754266, "loss": 6.2249, "step": 7257 }, { "epoch": 2.477133105802048, "grad_norm": 2.7638471126556396, "learning_rate": 0.00017428896473265074, "loss": 6.5467, "step": 7258 }, { "epoch": 2.4774744027303752, "grad_norm": 2.627199172973633, "learning_rate": 0.00017417519908987487, "loss": 6.2666, "step": 7259 }, { "epoch": 2.477815699658703, "grad_norm": 2.6954307556152344, "learning_rate": 0.000174061433447099, "loss": 5.407, "step": 7260 }, { "epoch": 2.478156996587031, "grad_norm": 2.6798486709594727, "learning_rate": 0.00017394766780432308, "loss": 6.3605, "step": 7261 }, { "epoch": 2.4784982935153583, "grad_norm": 2.6780683994293213, "learning_rate": 0.0001738339021615472, "loss": 5.8869, "step": 7262 }, { "epoch": 2.478839590443686, "grad_norm": 2.620546340942383, "learning_rate": 0.00017372013651877134, "loss": 6.0429, "step": 7263 }, { "epoch": 2.4791808873720136, "grad_norm": 3.052701711654663, "learning_rate": 0.00017360637087599545, "loss": 5.4183, "step": 7264 }, { "epoch": 2.4795221843003414, "grad_norm": 2.6496641635894775, "learning_rate": 0.00017349260523321958, "loss": 5.8464, "step": 7265 }, { "epoch": 2.479863481228669, "grad_norm": 2.715848684310913, "learning_rate": 0.00017337883959044368, "loss": 6.6531, "step": 7266 }, { "epoch": 2.4802047781569967, "grad_norm": 2.6739721298217773, "learning_rate": 0.0001732650739476678, "loss": 6.264, "step": 7267 }, { "epoch": 2.480546075085324, "grad_norm": 2.7070438861846924, "learning_rate": 0.00017315130830489192, "loss": 6.047, "step": 7268 }, { "epoch": 2.480887372013652, "grad_norm": 2.752788782119751, "learning_rate": 0.00017303754266211605, "loss": 6.0782, "step": 7269 }, { "epoch": 2.4812286689419794, "grad_norm": 2.7496471405029297, "learning_rate": 0.00017292377701934018, "loss": 6.4748, "step": 7270 }, { "epoch": 2.481569965870307, "grad_norm": 2.7307982444763184, "learning_rate": 0.00017281001137656426, "loss": 6.6512, "step": 7271 }, { "epoch": 2.4819112627986346, "grad_norm": 2.725822925567627, "learning_rate": 0.0001726962457337884, "loss": 6.5072, "step": 7272 }, { "epoch": 2.4822525597269625, "grad_norm": 2.7123241424560547, "learning_rate": 0.00017258248009101253, "loss": 5.8875, "step": 7273 }, { "epoch": 2.4825938566552903, "grad_norm": 2.633855104446411, "learning_rate": 0.00017246871444823663, "loss": 6.0088, "step": 7274 }, { "epoch": 2.4829351535836177, "grad_norm": 4.553799629211426, "learning_rate": 0.00017235494880546076, "loss": 4.3472, "step": 7275 }, { "epoch": 2.4832764505119456, "grad_norm": 2.7347793579101562, "learning_rate": 0.00017224118316268487, "loss": 5.2237, "step": 7276 }, { "epoch": 2.483617747440273, "grad_norm": 2.723522424697876, "learning_rate": 0.00017212741751990897, "loss": 6.5071, "step": 7277 }, { "epoch": 2.483959044368601, "grad_norm": 2.6944844722747803, "learning_rate": 0.0001720136518771331, "loss": 6.1664, "step": 7278 }, { "epoch": 2.4843003412969282, "grad_norm": 2.7033700942993164, "learning_rate": 0.00017189988623435724, "loss": 6.2606, "step": 7279 }, { "epoch": 2.484641638225256, "grad_norm": 3.5506386756896973, "learning_rate": 0.00017178612059158137, "loss": 4.5449, "step": 7280 }, { "epoch": 2.4849829351535835, "grad_norm": 2.705688714981079, "learning_rate": 0.00017167235494880545, "loss": 6.3225, "step": 7281 }, { "epoch": 2.4853242320819113, "grad_norm": 2.7204155921936035, "learning_rate": 0.00017155858930602958, "loss": 6.047, "step": 7282 }, { "epoch": 2.4856655290102387, "grad_norm": 2.656323194503784, "learning_rate": 0.0001714448236632537, "loss": 6.1217, "step": 7283 }, { "epoch": 2.4860068259385666, "grad_norm": 3.497453451156616, "learning_rate": 0.00017133105802047782, "loss": 5.0882, "step": 7284 }, { "epoch": 2.486348122866894, "grad_norm": 2.69559383392334, "learning_rate": 0.00017121729237770195, "loss": 5.7036, "step": 7285 }, { "epoch": 2.486689419795222, "grad_norm": 2.734285354614258, "learning_rate": 0.00017110352673492605, "loss": 5.8757, "step": 7286 }, { "epoch": 2.4870307167235497, "grad_norm": 2.6888885498046875, "learning_rate": 0.00017098976109215016, "loss": 5.7562, "step": 7287 }, { "epoch": 2.487372013651877, "grad_norm": 2.7632334232330322, "learning_rate": 0.0001708759954493743, "loss": 6.3917, "step": 7288 }, { "epoch": 2.487713310580205, "grad_norm": 2.7298285961151123, "learning_rate": 0.00017076222980659842, "loss": 5.9822, "step": 7289 }, { "epoch": 2.4880546075085324, "grad_norm": 2.7448837757110596, "learning_rate": 0.00017064846416382255, "loss": 6.1619, "step": 7290 }, { "epoch": 2.48839590443686, "grad_norm": 2.6427669525146484, "learning_rate": 0.00017053469852104663, "loss": 5.762, "step": 7291 }, { "epoch": 2.4887372013651876, "grad_norm": 2.733144998550415, "learning_rate": 0.00017042093287827076, "loss": 6.4147, "step": 7292 }, { "epoch": 2.4890784982935155, "grad_norm": 2.6906228065490723, "learning_rate": 0.0001703071672354949, "loss": 6.2987, "step": 7293 }, { "epoch": 2.489419795221843, "grad_norm": 2.6950948238372803, "learning_rate": 0.000170193401592719, "loss": 5.6162, "step": 7294 }, { "epoch": 2.4897610921501707, "grad_norm": 2.7234177589416504, "learning_rate": 0.00017007963594994313, "loss": 5.8311, "step": 7295 }, { "epoch": 2.490102389078498, "grad_norm": 2.7544310092926025, "learning_rate": 0.00016996587030716724, "loss": 6.3541, "step": 7296 }, { "epoch": 2.490443686006826, "grad_norm": 2.595151424407959, "learning_rate": 0.00016985210466439134, "loss": 6.0401, "step": 7297 }, { "epoch": 2.4907849829351534, "grad_norm": 3.826329469680786, "learning_rate": 0.00016973833902161548, "loss": 4.7459, "step": 7298 }, { "epoch": 2.4911262798634812, "grad_norm": 2.8267500400543213, "learning_rate": 0.0001696245733788396, "loss": 5.6925, "step": 7299 }, { "epoch": 2.491467576791809, "grad_norm": 2.7589223384857178, "learning_rate": 0.0001695108077360637, "loss": 5.8866, "step": 7300 }, { "epoch": 2.4918088737201365, "grad_norm": 2.703211545944214, "learning_rate": 0.00016939704209328782, "loss": 6.3634, "step": 7301 }, { "epoch": 2.4921501706484643, "grad_norm": 2.917485237121582, "learning_rate": 0.00016928327645051195, "loss": 5.7224, "step": 7302 }, { "epoch": 2.4924914675767917, "grad_norm": 2.6582095623016357, "learning_rate": 0.00016916951080773608, "loss": 5.8915, "step": 7303 }, { "epoch": 2.4928327645051196, "grad_norm": 2.8652775287628174, "learning_rate": 0.00016905574516496019, "loss": 3.9218, "step": 7304 }, { "epoch": 2.493174061433447, "grad_norm": 2.739563226699829, "learning_rate": 0.0001689419795221843, "loss": 6.6156, "step": 7305 }, { "epoch": 2.493515358361775, "grad_norm": 2.928471803665161, "learning_rate": 0.00016882821387940842, "loss": 5.1992, "step": 7306 }, { "epoch": 2.4938566552901023, "grad_norm": 2.6852288246154785, "learning_rate": 0.00016871444823663253, "loss": 5.9716, "step": 7307 }, { "epoch": 2.49419795221843, "grad_norm": 2.7359983921051025, "learning_rate": 0.00016860068259385666, "loss": 5.5148, "step": 7308 }, { "epoch": 2.4945392491467575, "grad_norm": 2.796595335006714, "learning_rate": 0.0001684869169510808, "loss": 6.9963, "step": 7309 }, { "epoch": 2.4948805460750854, "grad_norm": 2.682931900024414, "learning_rate": 0.0001683731513083049, "loss": 5.4007, "step": 7310 }, { "epoch": 2.4952218430034128, "grad_norm": 1.8263013362884521, "learning_rate": 0.000168259385665529, "loss": 3.1275, "step": 7311 }, { "epoch": 2.4955631399317406, "grad_norm": 2.828277587890625, "learning_rate": 0.00016814562002275313, "loss": 6.0928, "step": 7312 }, { "epoch": 2.4959044368600685, "grad_norm": 2.7260549068450928, "learning_rate": 0.00016803185437997727, "loss": 5.7029, "step": 7313 }, { "epoch": 2.496245733788396, "grad_norm": 2.6961669921875, "learning_rate": 0.00016791808873720137, "loss": 5.9228, "step": 7314 }, { "epoch": 2.4965870307167237, "grad_norm": 2.813055992126465, "learning_rate": 0.00016780432309442548, "loss": 6.1379, "step": 7315 }, { "epoch": 2.496928327645051, "grad_norm": 2.6785166263580322, "learning_rate": 0.0001676905574516496, "loss": 5.7098, "step": 7316 }, { "epoch": 2.497269624573379, "grad_norm": 2.698068857192993, "learning_rate": 0.0001675767918088737, "loss": 6.0382, "step": 7317 }, { "epoch": 2.4976109215017064, "grad_norm": 2.680922508239746, "learning_rate": 0.00016746302616609785, "loss": 6.1616, "step": 7318 }, { "epoch": 2.4979522184300342, "grad_norm": 2.7005722522735596, "learning_rate": 0.00016734926052332198, "loss": 5.649, "step": 7319 }, { "epoch": 2.4982935153583616, "grad_norm": 2.744039535522461, "learning_rate": 0.00016723549488054606, "loss": 6.2951, "step": 7320 }, { "epoch": 2.4986348122866895, "grad_norm": 2.7696053981781006, "learning_rate": 0.0001671217292377702, "loss": 6.2824, "step": 7321 }, { "epoch": 2.498976109215017, "grad_norm": 2.9358487129211426, "learning_rate": 0.00016700796359499432, "loss": 5.1869, "step": 7322 }, { "epoch": 2.4993174061433447, "grad_norm": 2.740104913711548, "learning_rate": 0.00016689419795221845, "loss": 6.0786, "step": 7323 }, { "epoch": 2.499658703071672, "grad_norm": 2.702230215072632, "learning_rate": 0.00016678043230944256, "loss": 6.4051, "step": 7324 }, { "epoch": 2.5, "grad_norm": 4.933879375457764, "learning_rate": 0.00016666666666666666, "loss": 4.6992, "step": 7325 }, { "epoch": 2.500341296928328, "grad_norm": 2.6900596618652344, "learning_rate": 0.0001665529010238908, "loss": 5.8764, "step": 7326 }, { "epoch": 2.5006825938566553, "grad_norm": 2.7726457118988037, "learning_rate": 0.0001664391353811149, "loss": 5.6187, "step": 7327 }, { "epoch": 2.5010238907849827, "grad_norm": 1.9055275917053223, "learning_rate": 0.00016632536973833903, "loss": 3.4067, "step": 7328 }, { "epoch": 2.5013651877133105, "grad_norm": 2.7264151573181152, "learning_rate": 0.00016621160409556316, "loss": 6.0756, "step": 7329 }, { "epoch": 2.5017064846416384, "grad_norm": 2.759608030319214, "learning_rate": 0.00016609783845278724, "loss": 6.1258, "step": 7330 }, { "epoch": 2.5020477815699658, "grad_norm": 2.6936237812042236, "learning_rate": 0.00016598407281001137, "loss": 6.2095, "step": 7331 }, { "epoch": 2.5023890784982936, "grad_norm": 2.6739792823791504, "learning_rate": 0.0001658703071672355, "loss": 6.5549, "step": 7332 }, { "epoch": 2.502730375426621, "grad_norm": 2.725817918777466, "learning_rate": 0.00016575654152445964, "loss": 6.1217, "step": 7333 }, { "epoch": 2.503071672354949, "grad_norm": 2.785630702972412, "learning_rate": 0.00016564277588168371, "loss": 6.0126, "step": 7334 }, { "epoch": 2.5034129692832767, "grad_norm": 2.7225522994995117, "learning_rate": 0.00016552901023890785, "loss": 5.9088, "step": 7335 }, { "epoch": 2.503754266211604, "grad_norm": 2.7138113975524902, "learning_rate": 0.00016541524459613198, "loss": 6.5287, "step": 7336 }, { "epoch": 2.5040955631399315, "grad_norm": 2.659165143966675, "learning_rate": 0.00016530147895335608, "loss": 6.1559, "step": 7337 }, { "epoch": 2.5044368600682594, "grad_norm": 2.6596744060516357, "learning_rate": 0.00016518771331058022, "loss": 5.9957, "step": 7338 }, { "epoch": 2.5047781569965872, "grad_norm": 2.6613821983337402, "learning_rate": 0.00016507394766780432, "loss": 5.6435, "step": 7339 }, { "epoch": 2.5051194539249146, "grad_norm": 2.7147886753082275, "learning_rate": 0.00016496018202502842, "loss": 6.0681, "step": 7340 }, { "epoch": 2.505460750853242, "grad_norm": 2.6107845306396484, "learning_rate": 0.00016484641638225256, "loss": 5.9106, "step": 7341 }, { "epoch": 2.50580204778157, "grad_norm": 2.7579963207244873, "learning_rate": 0.0001647326507394767, "loss": 6.6005, "step": 7342 }, { "epoch": 2.5061433447098977, "grad_norm": 2.604501962661743, "learning_rate": 0.00016461888509670082, "loss": 5.8982, "step": 7343 }, { "epoch": 2.506484641638225, "grad_norm": 2.7372636795043945, "learning_rate": 0.0001645051194539249, "loss": 5.3334, "step": 7344 }, { "epoch": 2.506825938566553, "grad_norm": 2.69775390625, "learning_rate": 0.00016439135381114903, "loss": 6.0477, "step": 7345 }, { "epoch": 2.5071672354948804, "grad_norm": 2.698512077331543, "learning_rate": 0.00016427758816837316, "loss": 5.5654, "step": 7346 }, { "epoch": 2.5075085324232083, "grad_norm": 2.6294450759887695, "learning_rate": 0.00016416382252559727, "loss": 6.1314, "step": 7347 }, { "epoch": 2.507849829351536, "grad_norm": 2.6949124336242676, "learning_rate": 0.0001640500568828214, "loss": 6.6314, "step": 7348 }, { "epoch": 2.5081911262798635, "grad_norm": 2.7231664657592773, "learning_rate": 0.0001639362912400455, "loss": 6.1183, "step": 7349 }, { "epoch": 2.508532423208191, "grad_norm": 2.9584968090057373, "learning_rate": 0.0001638225255972696, "loss": 5.1231, "step": 7350 }, { "epoch": 2.5088737201365188, "grad_norm": 2.7133231163024902, "learning_rate": 0.00016370875995449374, "loss": 5.5295, "step": 7351 }, { "epoch": 2.5092150170648466, "grad_norm": 2.7822914123535156, "learning_rate": 0.00016359499431171787, "loss": 5.4579, "step": 7352 }, { "epoch": 2.509556313993174, "grad_norm": 2.6988959312438965, "learning_rate": 0.000163481228668942, "loss": 6.0726, "step": 7353 }, { "epoch": 2.5098976109215014, "grad_norm": 2.623605489730835, "learning_rate": 0.00016336746302616608, "loss": 6.1312, "step": 7354 }, { "epoch": 2.5102389078498293, "grad_norm": 2.7255163192749023, "learning_rate": 0.00016325369738339022, "loss": 5.9188, "step": 7355 }, { "epoch": 2.510580204778157, "grad_norm": 2.6522679328918457, "learning_rate": 0.00016313993174061435, "loss": 6.2107, "step": 7356 }, { "epoch": 2.5109215017064845, "grad_norm": 2.733840227127075, "learning_rate": 0.00016302616609783845, "loss": 6.0067, "step": 7357 }, { "epoch": 2.5112627986348124, "grad_norm": 2.6680688858032227, "learning_rate": 0.00016291240045506259, "loss": 6.2216, "step": 7358 }, { "epoch": 2.51160409556314, "grad_norm": 3.2532639503479004, "learning_rate": 0.0001627986348122867, "loss": 5.0936, "step": 7359 }, { "epoch": 2.5119453924914676, "grad_norm": 2.6566052436828613, "learning_rate": 0.0001626848691695108, "loss": 6.2296, "step": 7360 }, { "epoch": 2.5122866894197955, "grad_norm": 3.307908296585083, "learning_rate": 0.00016257110352673493, "loss": 4.3073, "step": 7361 }, { "epoch": 2.512627986348123, "grad_norm": 2.7235147953033447, "learning_rate": 0.00016245733788395906, "loss": 5.967, "step": 7362 }, { "epoch": 2.5129692832764503, "grad_norm": 2.7577085494995117, "learning_rate": 0.0001623435722411832, "loss": 4.6936, "step": 7363 }, { "epoch": 2.513310580204778, "grad_norm": 2.6942477226257324, "learning_rate": 0.00016222980659840727, "loss": 6.1181, "step": 7364 }, { "epoch": 2.513651877133106, "grad_norm": 2.6965997219085693, "learning_rate": 0.0001621160409556314, "loss": 4.4271, "step": 7365 }, { "epoch": 2.5139931740614334, "grad_norm": 2.5623295307159424, "learning_rate": 0.00016200227531285553, "loss": 5.8977, "step": 7366 }, { "epoch": 2.514334470989761, "grad_norm": 2.7341978549957275, "learning_rate": 0.00016188850967007964, "loss": 5.7355, "step": 7367 }, { "epoch": 2.5146757679180887, "grad_norm": 2.686908721923828, "learning_rate": 0.00016177474402730374, "loss": 5.5919, "step": 7368 }, { "epoch": 2.5150170648464165, "grad_norm": 2.768826961517334, "learning_rate": 0.00016166097838452787, "loss": 6.317, "step": 7369 }, { "epoch": 2.515358361774744, "grad_norm": 2.7448062896728516, "learning_rate": 0.00016154721274175198, "loss": 5.9812, "step": 7370 }, { "epoch": 2.5156996587030718, "grad_norm": 2.6533572673797607, "learning_rate": 0.0001614334470989761, "loss": 5.7661, "step": 7371 }, { "epoch": 2.516040955631399, "grad_norm": 2.7210354804992676, "learning_rate": 0.00016131968145620024, "loss": 5.9004, "step": 7372 }, { "epoch": 2.516382252559727, "grad_norm": 2.7211415767669678, "learning_rate": 0.00016120591581342435, "loss": 6.3239, "step": 7373 }, { "epoch": 2.516723549488055, "grad_norm": 2.718151092529297, "learning_rate": 0.00016109215017064845, "loss": 6.2132, "step": 7374 }, { "epoch": 2.5170648464163823, "grad_norm": 2.657224416732788, "learning_rate": 0.00016097838452787259, "loss": 5.9553, "step": 7375 }, { "epoch": 2.5174061433447097, "grad_norm": 2.738079071044922, "learning_rate": 0.00016086461888509672, "loss": 5.824, "step": 7376 }, { "epoch": 2.5177474402730375, "grad_norm": 2.7597906589508057, "learning_rate": 0.00016075085324232082, "loss": 6.2877, "step": 7377 }, { "epoch": 2.5180887372013654, "grad_norm": 2.7214534282684326, "learning_rate": 0.00016063708759954493, "loss": 6.262, "step": 7378 }, { "epoch": 2.518430034129693, "grad_norm": 2.705698013305664, "learning_rate": 0.00016052332195676906, "loss": 6.2512, "step": 7379 }, { "epoch": 2.51877133105802, "grad_norm": 2.636011838912964, "learning_rate": 0.00016040955631399316, "loss": 4.9735, "step": 7380 }, { "epoch": 2.519112627986348, "grad_norm": 3.66536021232605, "learning_rate": 0.0001602957906712173, "loss": 3.9323, "step": 7381 }, { "epoch": 2.519453924914676, "grad_norm": 2.7328991889953613, "learning_rate": 0.00016018202502844143, "loss": 4.5109, "step": 7382 }, { "epoch": 2.5197952218430033, "grad_norm": 2.689422369003296, "learning_rate": 0.00016006825938566553, "loss": 5.9128, "step": 7383 }, { "epoch": 2.520136518771331, "grad_norm": 3.5838780403137207, "learning_rate": 0.00015995449374288964, "loss": 4.2017, "step": 7384 }, { "epoch": 2.5204778156996586, "grad_norm": 2.720184087753296, "learning_rate": 0.00015984072810011377, "loss": 5.6297, "step": 7385 }, { "epoch": 2.5208191126279864, "grad_norm": 2.6871819496154785, "learning_rate": 0.0001597269624573379, "loss": 5.9124, "step": 7386 }, { "epoch": 2.5211604095563143, "grad_norm": 2.7035536766052246, "learning_rate": 0.000159613196814562, "loss": 6.2627, "step": 7387 }, { "epoch": 2.5215017064846417, "grad_norm": 2.7028918266296387, "learning_rate": 0.0001594994311717861, "loss": 5.8171, "step": 7388 }, { "epoch": 2.521843003412969, "grad_norm": 2.6863887310028076, "learning_rate": 0.00015938566552901024, "loss": 6.7491, "step": 7389 }, { "epoch": 2.522184300341297, "grad_norm": 2.64327073097229, "learning_rate": 0.00015927189988623435, "loss": 6.236, "step": 7390 }, { "epoch": 2.5225255972696248, "grad_norm": 2.628070592880249, "learning_rate": 0.00015915813424345848, "loss": 5.51, "step": 7391 }, { "epoch": 2.522866894197952, "grad_norm": 2.6996893882751465, "learning_rate": 0.00015904436860068261, "loss": 6.1538, "step": 7392 }, { "epoch": 2.5232081911262796, "grad_norm": 4.886688709259033, "learning_rate": 0.00015893060295790672, "loss": 4.6864, "step": 7393 }, { "epoch": 2.5235494880546074, "grad_norm": 2.6805944442749023, "learning_rate": 0.00015881683731513082, "loss": 6.105, "step": 7394 }, { "epoch": 2.5238907849829353, "grad_norm": 2.6910324096679688, "learning_rate": 0.00015870307167235496, "loss": 6.266, "step": 7395 }, { "epoch": 2.5242320819112627, "grad_norm": 2.6817996501922607, "learning_rate": 0.0001585893060295791, "loss": 6.4578, "step": 7396 }, { "epoch": 2.5245733788395905, "grad_norm": 3.1869544982910156, "learning_rate": 0.0001584755403868032, "loss": 5.4998, "step": 7397 }, { "epoch": 2.524914675767918, "grad_norm": 2.6462249755859375, "learning_rate": 0.0001583617747440273, "loss": 6.3286, "step": 7398 }, { "epoch": 2.525255972696246, "grad_norm": 2.711543321609497, "learning_rate": 0.00015824800910125143, "loss": 6.6815, "step": 7399 }, { "epoch": 2.5255972696245736, "grad_norm": 2.6534812450408936, "learning_rate": 0.00015813424345847553, "loss": 6.3155, "step": 7400 }, { "epoch": 2.525938566552901, "grad_norm": 2.68021559715271, "learning_rate": 0.00015802047781569967, "loss": 6.126, "step": 7401 }, { "epoch": 2.5262798634812285, "grad_norm": 2.7050492763519287, "learning_rate": 0.00015790671217292377, "loss": 5.7176, "step": 7402 }, { "epoch": 2.5266211604095563, "grad_norm": 2.6450881958007812, "learning_rate": 0.0001577929465301479, "loss": 6.5606, "step": 7403 }, { "epoch": 2.526962457337884, "grad_norm": 2.6857900619506836, "learning_rate": 0.000157679180887372, "loss": 6.2598, "step": 7404 }, { "epoch": 2.5273037542662116, "grad_norm": 2.6581039428710938, "learning_rate": 0.00015756541524459614, "loss": 6.5876, "step": 7405 }, { "epoch": 2.527645051194539, "grad_norm": 2.6261470317840576, "learning_rate": 0.00015745164960182027, "loss": 5.4513, "step": 7406 }, { "epoch": 2.527986348122867, "grad_norm": 2.684361696243286, "learning_rate": 0.00015733788395904435, "loss": 5.8607, "step": 7407 }, { "epoch": 2.5283276450511947, "grad_norm": 2.6788904666900635, "learning_rate": 0.00015722411831626848, "loss": 6.32, "step": 7408 }, { "epoch": 2.528668941979522, "grad_norm": 2.700685977935791, "learning_rate": 0.00015711035267349261, "loss": 5.7209, "step": 7409 }, { "epoch": 2.52901023890785, "grad_norm": 2.7809455394744873, "learning_rate": 0.00015699658703071672, "loss": 6.0776, "step": 7410 }, { "epoch": 2.5293515358361773, "grad_norm": 2.6642520427703857, "learning_rate": 0.00015688282138794085, "loss": 5.6266, "step": 7411 }, { "epoch": 2.529692832764505, "grad_norm": 2.7473788261413574, "learning_rate": 0.00015676905574516496, "loss": 5.0825, "step": 7412 }, { "epoch": 2.530034129692833, "grad_norm": 2.7476108074188232, "learning_rate": 0.0001566552901023891, "loss": 6.2024, "step": 7413 }, { "epoch": 2.5303754266211604, "grad_norm": 2.6110386848449707, "learning_rate": 0.0001565415244596132, "loss": 6.1209, "step": 7414 }, { "epoch": 2.530716723549488, "grad_norm": 2.9755313396453857, "learning_rate": 0.00015642775881683733, "loss": 5.5796, "step": 7415 }, { "epoch": 2.5310580204778157, "grad_norm": 2.710559844970703, "learning_rate": 0.00015631399317406146, "loss": 5.7508, "step": 7416 }, { "epoch": 2.5313993174061435, "grad_norm": 2.636697292327881, "learning_rate": 0.00015620022753128554, "loss": 6.1145, "step": 7417 }, { "epoch": 2.531740614334471, "grad_norm": 2.8945558071136475, "learning_rate": 0.00015608646188850967, "loss": 5.4822, "step": 7418 }, { "epoch": 2.532081911262799, "grad_norm": 2.7583422660827637, "learning_rate": 0.0001559726962457338, "loss": 6.3568, "step": 7419 }, { "epoch": 2.532423208191126, "grad_norm": 2.6713101863861084, "learning_rate": 0.0001558589306029579, "loss": 5.554, "step": 7420 }, { "epoch": 2.532764505119454, "grad_norm": 2.626268148422241, "learning_rate": 0.00015574516496018204, "loss": 5.8425, "step": 7421 }, { "epoch": 2.5331058020477815, "grad_norm": 2.9670071601867676, "learning_rate": 0.00015563139931740614, "loss": 5.2788, "step": 7422 }, { "epoch": 2.5334470989761093, "grad_norm": 2.694166660308838, "learning_rate": 0.00015551763367463025, "loss": 5.982, "step": 7423 }, { "epoch": 2.5337883959044367, "grad_norm": 2.6969735622406006, "learning_rate": 0.00015540386803185438, "loss": 6.4294, "step": 7424 }, { "epoch": 2.5341296928327646, "grad_norm": 2.7275049686431885, "learning_rate": 0.0001552901023890785, "loss": 6.0112, "step": 7425 }, { "epoch": 2.5344709897610924, "grad_norm": 2.7712514400482178, "learning_rate": 0.00015517633674630264, "loss": 6.2063, "step": 7426 }, { "epoch": 2.53481228668942, "grad_norm": 3.0031471252441406, "learning_rate": 0.00015506257110352672, "loss": 4.5911, "step": 7427 }, { "epoch": 2.5351535836177472, "grad_norm": 2.857637882232666, "learning_rate": 0.00015494880546075085, "loss": 4.5048, "step": 7428 }, { "epoch": 2.535494880546075, "grad_norm": 2.757772207260132, "learning_rate": 0.00015483503981797498, "loss": 5.9304, "step": 7429 }, { "epoch": 2.535836177474403, "grad_norm": 2.77624773979187, "learning_rate": 0.0001547212741751991, "loss": 5.2982, "step": 7430 }, { "epoch": 2.5361774744027303, "grad_norm": 2.9393277168273926, "learning_rate": 0.00015460750853242322, "loss": 5.1273, "step": 7431 }, { "epoch": 2.536518771331058, "grad_norm": 2.715731143951416, "learning_rate": 0.00015449374288964733, "loss": 6.2645, "step": 7432 }, { "epoch": 2.5368600682593856, "grad_norm": 2.7153773307800293, "learning_rate": 0.00015437997724687143, "loss": 5.8669, "step": 7433 }, { "epoch": 2.5372013651877134, "grad_norm": 2.890367031097412, "learning_rate": 0.00015426621160409556, "loss": 5.8867, "step": 7434 }, { "epoch": 2.537542662116041, "grad_norm": 2.7217259407043457, "learning_rate": 0.0001541524459613197, "loss": 6.2305, "step": 7435 }, { "epoch": 2.5378839590443687, "grad_norm": 5.296313285827637, "learning_rate": 0.00015403868031854383, "loss": 3.7479, "step": 7436 }, { "epoch": 2.538225255972696, "grad_norm": 2.703120231628418, "learning_rate": 0.0001539249146757679, "loss": 6.5971, "step": 7437 }, { "epoch": 2.538566552901024, "grad_norm": 3.4888710975646973, "learning_rate": 0.00015381114903299204, "loss": 4.5474, "step": 7438 }, { "epoch": 2.538907849829352, "grad_norm": 2.7433698177337646, "learning_rate": 0.00015369738339021617, "loss": 5.4895, "step": 7439 }, { "epoch": 2.539249146757679, "grad_norm": 2.715934991836548, "learning_rate": 0.00015358361774744027, "loss": 4.5857, "step": 7440 }, { "epoch": 2.5395904436860066, "grad_norm": 2.6325573921203613, "learning_rate": 0.00015346985210466438, "loss": 6.0054, "step": 7441 }, { "epoch": 2.5399317406143345, "grad_norm": 2.666834831237793, "learning_rate": 0.0001533560864618885, "loss": 6.4198, "step": 7442 }, { "epoch": 2.5402730375426623, "grad_norm": 2.6133065223693848, "learning_rate": 0.00015324232081911262, "loss": 6.1836, "step": 7443 }, { "epoch": 2.5406143344709897, "grad_norm": 2.7008965015411377, "learning_rate": 0.00015312855517633675, "loss": 6.5665, "step": 7444 }, { "epoch": 2.5409556313993176, "grad_norm": 2.5905418395996094, "learning_rate": 0.00015301478953356088, "loss": 5.7239, "step": 7445 }, { "epoch": 2.541296928327645, "grad_norm": 2.6478431224823, "learning_rate": 0.00015290102389078499, "loss": 6.0945, "step": 7446 }, { "epoch": 2.541638225255973, "grad_norm": 2.7686331272125244, "learning_rate": 0.0001527872582480091, "loss": 5.9458, "step": 7447 }, { "epoch": 2.5419795221843002, "grad_norm": 2.7820048332214355, "learning_rate": 0.00015267349260523322, "loss": 5.3187, "step": 7448 }, { "epoch": 2.542320819112628, "grad_norm": 2.7252564430236816, "learning_rate": 0.00015255972696245735, "loss": 6.1386, "step": 7449 }, { "epoch": 2.5426621160409555, "grad_norm": 3.9234542846679688, "learning_rate": 0.00015244596131968146, "loss": 4.9931, "step": 7450 }, { "epoch": 2.5430034129692833, "grad_norm": 2.7013142108917236, "learning_rate": 0.00015233219567690556, "loss": 6.5903, "step": 7451 }, { "epoch": 2.543344709897611, "grad_norm": 2.7234249114990234, "learning_rate": 0.0001522184300341297, "loss": 6.5083, "step": 7452 }, { "epoch": 2.5436860068259386, "grad_norm": 2.646691083908081, "learning_rate": 0.0001521046643913538, "loss": 5.8912, "step": 7453 }, { "epoch": 2.544027303754266, "grad_norm": 2.6651551723480225, "learning_rate": 0.00015199089874857793, "loss": 6.4359, "step": 7454 }, { "epoch": 2.544368600682594, "grad_norm": 2.7203242778778076, "learning_rate": 0.00015187713310580207, "loss": 6.1125, "step": 7455 }, { "epoch": 2.5447098976109217, "grad_norm": 2.7022976875305176, "learning_rate": 0.00015176336746302617, "loss": 6.3177, "step": 7456 }, { "epoch": 2.545051194539249, "grad_norm": 3.036482334136963, "learning_rate": 0.00015164960182025028, "loss": 5.8322, "step": 7457 }, { "epoch": 2.545392491467577, "grad_norm": 2.6736273765563965, "learning_rate": 0.0001515358361774744, "loss": 5.683, "step": 7458 }, { "epoch": 2.5457337883959044, "grad_norm": 2.6975038051605225, "learning_rate": 0.00015142207053469854, "loss": 5.9692, "step": 7459 }, { "epoch": 2.546075085324232, "grad_norm": 2.6708292961120605, "learning_rate": 0.00015130830489192264, "loss": 6.2471, "step": 7460 }, { "epoch": 2.5464163822525596, "grad_norm": 4.273525714874268, "learning_rate": 0.00015119453924914675, "loss": 5.0722, "step": 7461 }, { "epoch": 2.5467576791808875, "grad_norm": 2.717935085296631, "learning_rate": 0.00015108077360637088, "loss": 6.2979, "step": 7462 }, { "epoch": 2.547098976109215, "grad_norm": 2.620076894760132, "learning_rate": 0.00015096700796359499, "loss": 6.0223, "step": 7463 }, { "epoch": 2.5474402730375427, "grad_norm": 2.6915009021759033, "learning_rate": 0.00015085324232081912, "loss": 6.0069, "step": 7464 }, { "epoch": 2.5477815699658706, "grad_norm": 2.7233173847198486, "learning_rate": 0.00015073947667804325, "loss": 5.3867, "step": 7465 }, { "epoch": 2.548122866894198, "grad_norm": 2.6472926139831543, "learning_rate": 0.00015062571103526736, "loss": 6.8144, "step": 7466 }, { "epoch": 2.5484641638225254, "grad_norm": 2.7000956535339355, "learning_rate": 0.00015051194539249146, "loss": 5.9423, "step": 7467 }, { "epoch": 2.5488054607508532, "grad_norm": 2.6225948333740234, "learning_rate": 0.0001503981797497156, "loss": 6.0564, "step": 7468 }, { "epoch": 2.549146757679181, "grad_norm": 3.0721187591552734, "learning_rate": 0.00015028441410693972, "loss": 4.9398, "step": 7469 }, { "epoch": 2.5494880546075085, "grad_norm": 2.621379852294922, "learning_rate": 0.00015017064846416383, "loss": 6.065, "step": 7470 }, { "epoch": 2.5498293515358363, "grad_norm": 2.6615641117095947, "learning_rate": 0.00015005688282138793, "loss": 6.5425, "step": 7471 }, { "epoch": 2.5501706484641637, "grad_norm": 2.7236409187316895, "learning_rate": 0.00014994311717861207, "loss": 6.1923, "step": 7472 }, { "epoch": 2.5505119453924916, "grad_norm": 2.776329517364502, "learning_rate": 0.00014982935153583617, "loss": 6.3152, "step": 7473 }, { "epoch": 2.550853242320819, "grad_norm": 2.7233684062957764, "learning_rate": 0.0001497155858930603, "loss": 5.9222, "step": 7474 }, { "epoch": 2.551194539249147, "grad_norm": 2.578994035720825, "learning_rate": 0.0001496018202502844, "loss": 5.9317, "step": 7475 }, { "epoch": 2.5515358361774743, "grad_norm": 2.6251556873321533, "learning_rate": 0.00014948805460750854, "loss": 6.1575, "step": 7476 }, { "epoch": 2.551877133105802, "grad_norm": 2.599755048751831, "learning_rate": 0.00014937428896473265, "loss": 6.3228, "step": 7477 }, { "epoch": 2.55221843003413, "grad_norm": 2.6746912002563477, "learning_rate": 0.00014926052332195678, "loss": 6.0529, "step": 7478 }, { "epoch": 2.5525597269624574, "grad_norm": 2.6559457778930664, "learning_rate": 0.0001491467576791809, "loss": 5.4679, "step": 7479 }, { "epoch": 2.5529010238907848, "grad_norm": 2.8027467727661133, "learning_rate": 0.000149032992036405, "loss": 5.8367, "step": 7480 }, { "epoch": 2.5532423208191126, "grad_norm": 2.701709747314453, "learning_rate": 0.00014891922639362912, "loss": 6.8529, "step": 7481 }, { "epoch": 2.5535836177474405, "grad_norm": 2.6528892517089844, "learning_rate": 0.00014880546075085325, "loss": 5.5392, "step": 7482 }, { "epoch": 2.553924914675768, "grad_norm": 2.7285146713256836, "learning_rate": 0.00014869169510807736, "loss": 6.1253, "step": 7483 }, { "epoch": 2.5542662116040957, "grad_norm": 2.646544933319092, "learning_rate": 0.0001485779294653015, "loss": 6.2362, "step": 7484 }, { "epoch": 2.554607508532423, "grad_norm": 2.632233142852783, "learning_rate": 0.0001484641638225256, "loss": 5.9352, "step": 7485 }, { "epoch": 2.554948805460751, "grad_norm": 2.641140937805176, "learning_rate": 0.00014835039817974973, "loss": 6.2673, "step": 7486 }, { "epoch": 2.5552901023890784, "grad_norm": 2.7124555110931396, "learning_rate": 0.00014823663253697383, "loss": 5.4315, "step": 7487 }, { "epoch": 2.5556313993174062, "grad_norm": 2.6860406398773193, "learning_rate": 0.00014812286689419796, "loss": 6.3044, "step": 7488 }, { "epoch": 2.5559726962457336, "grad_norm": 2.6111605167388916, "learning_rate": 0.0001480091012514221, "loss": 6.2049, "step": 7489 }, { "epoch": 2.5563139931740615, "grad_norm": 2.7284209728240967, "learning_rate": 0.00014789533560864617, "loss": 6.7773, "step": 7490 }, { "epoch": 2.5566552901023893, "grad_norm": 2.732853651046753, "learning_rate": 0.0001477815699658703, "loss": 5.6464, "step": 7491 }, { "epoch": 2.5569965870307167, "grad_norm": 2.888129949569702, "learning_rate": 0.00014766780432309444, "loss": 5.4964, "step": 7492 }, { "epoch": 2.557337883959044, "grad_norm": 2.723909616470337, "learning_rate": 0.00014755403868031854, "loss": 6.3293, "step": 7493 }, { "epoch": 2.557679180887372, "grad_norm": 2.6410248279571533, "learning_rate": 0.00014744027303754267, "loss": 5.7546, "step": 7494 }, { "epoch": 2.5580204778157, "grad_norm": 2.708897829055786, "learning_rate": 0.00014732650739476678, "loss": 5.9368, "step": 7495 }, { "epoch": 2.5583617747440273, "grad_norm": 2.6864473819732666, "learning_rate": 0.0001472127417519909, "loss": 6.3272, "step": 7496 }, { "epoch": 2.558703071672355, "grad_norm": 2.6929290294647217, "learning_rate": 0.00014709897610921502, "loss": 6.0851, "step": 7497 }, { "epoch": 2.5590443686006825, "grad_norm": 2.725696325302124, "learning_rate": 0.00014698521046643915, "loss": 6.092, "step": 7498 }, { "epoch": 2.5593856655290104, "grad_norm": 2.703749418258667, "learning_rate": 0.00014687144482366328, "loss": 5.9719, "step": 7499 }, { "epoch": 2.5597269624573378, "grad_norm": 3.734013319015503, "learning_rate": 0.00014675767918088736, "loss": 4.0252, "step": 7500 }, { "epoch": 2.5600682593856656, "grad_norm": 2.6439578533172607, "learning_rate": 0.0001466439135381115, "loss": 5.7126, "step": 7501 }, { "epoch": 2.560409556313993, "grad_norm": 2.6712331771850586, "learning_rate": 0.00014653014789533562, "loss": 6.0862, "step": 7502 }, { "epoch": 2.560750853242321, "grad_norm": 2.6167094707489014, "learning_rate": 0.00014641638225255973, "loss": 6.5083, "step": 7503 }, { "epoch": 2.5610921501706487, "grad_norm": 2.631967544555664, "learning_rate": 0.00014630261660978386, "loss": 5.9288, "step": 7504 }, { "epoch": 2.561433447098976, "grad_norm": 2.8037562370300293, "learning_rate": 0.00014618885096700796, "loss": 5.0377, "step": 7505 }, { "epoch": 2.5617747440273035, "grad_norm": 2.6779751777648926, "learning_rate": 0.0001460750853242321, "loss": 6.2956, "step": 7506 }, { "epoch": 2.5621160409556314, "grad_norm": 2.6464788913726807, "learning_rate": 0.0001459613196814562, "loss": 5.9083, "step": 7507 }, { "epoch": 2.5624573378839592, "grad_norm": 2.7481160163879395, "learning_rate": 0.00014584755403868033, "loss": 6.2735, "step": 7508 }, { "epoch": 2.5627986348122866, "grad_norm": 2.6728515625, "learning_rate": 0.00014573378839590444, "loss": 6.0976, "step": 7509 }, { "epoch": 2.5631399317406145, "grad_norm": 3.7015953063964844, "learning_rate": 0.00014562002275312854, "loss": 3.9438, "step": 7510 }, { "epoch": 2.563481228668942, "grad_norm": 4.3790693283081055, "learning_rate": 0.00014550625711035267, "loss": 4.7137, "step": 7511 }, { "epoch": 2.5638225255972698, "grad_norm": 2.6874852180480957, "learning_rate": 0.0001453924914675768, "loss": 6.2475, "step": 7512 }, { "epoch": 2.564163822525597, "grad_norm": 2.693321943283081, "learning_rate": 0.0001452787258248009, "loss": 6.2983, "step": 7513 }, { "epoch": 2.564505119453925, "grad_norm": 2.621973752975464, "learning_rate": 0.00014516496018202502, "loss": 6.1115, "step": 7514 }, { "epoch": 2.5648464163822524, "grad_norm": 2.658691167831421, "learning_rate": 0.00014505119453924915, "loss": 5.8714, "step": 7515 }, { "epoch": 2.5651877133105803, "grad_norm": 2.660804271697998, "learning_rate": 0.00014493742889647328, "loss": 6.6816, "step": 7516 }, { "epoch": 2.565529010238908, "grad_norm": 2.6108481884002686, "learning_rate": 0.00014482366325369739, "loss": 5.8989, "step": 7517 }, { "epoch": 2.5658703071672355, "grad_norm": 2.703063488006592, "learning_rate": 0.00014470989761092152, "loss": 5.9069, "step": 7518 }, { "epoch": 2.566211604095563, "grad_norm": 2.910573959350586, "learning_rate": 0.00014459613196814562, "loss": 5.1331, "step": 7519 }, { "epoch": 2.5665529010238908, "grad_norm": 2.6214263439178467, "learning_rate": 0.00014448236632536973, "loss": 6.2886, "step": 7520 }, { "epoch": 2.5668941979522186, "grad_norm": 2.604752779006958, "learning_rate": 0.00014436860068259386, "loss": 6.0879, "step": 7521 }, { "epoch": 2.567235494880546, "grad_norm": 2.679976224899292, "learning_rate": 0.000144254835039818, "loss": 6.2263, "step": 7522 }, { "epoch": 2.567576791808874, "grad_norm": 2.592036485671997, "learning_rate": 0.0001441410693970421, "loss": 6.2702, "step": 7523 }, { "epoch": 2.5679180887372013, "grad_norm": 2.691864490509033, "learning_rate": 0.0001440273037542662, "loss": 6.3147, "step": 7524 }, { "epoch": 2.568259385665529, "grad_norm": 2.749069929122925, "learning_rate": 0.00014391353811149033, "loss": 5.9712, "step": 7525 }, { "epoch": 2.5686006825938565, "grad_norm": 2.7472119331359863, "learning_rate": 0.00014379977246871444, "loss": 6.6555, "step": 7526 }, { "epoch": 2.5689419795221844, "grad_norm": 2.7437517642974854, "learning_rate": 0.00014368600682593857, "loss": 5.8816, "step": 7527 }, { "epoch": 2.569283276450512, "grad_norm": 2.7553818225860596, "learning_rate": 0.0001435722411831627, "loss": 5.6382, "step": 7528 }, { "epoch": 2.5696245733788396, "grad_norm": 2.6735825538635254, "learning_rate": 0.0001434584755403868, "loss": 5.8185, "step": 7529 }, { "epoch": 2.5699658703071675, "grad_norm": 2.8182194232940674, "learning_rate": 0.0001433447098976109, "loss": 5.0823, "step": 7530 }, { "epoch": 2.570307167235495, "grad_norm": 2.7155697345733643, "learning_rate": 0.00014323094425483504, "loss": 5.3627, "step": 7531 }, { "epoch": 2.5706484641638223, "grad_norm": 2.6774308681488037, "learning_rate": 0.00014311717861205918, "loss": 5.8861, "step": 7532 }, { "epoch": 2.57098976109215, "grad_norm": 2.782548189163208, "learning_rate": 0.00014300341296928328, "loss": 5.6668, "step": 7533 }, { "epoch": 2.571331058020478, "grad_norm": 2.7212352752685547, "learning_rate": 0.00014288964732650739, "loss": 6.0719, "step": 7534 }, { "epoch": 2.5716723549488054, "grad_norm": 2.695491313934326, "learning_rate": 0.00014277588168373152, "loss": 6.0384, "step": 7535 }, { "epoch": 2.5720136518771333, "grad_norm": 2.6470556259155273, "learning_rate": 0.00014266211604095562, "loss": 5.7107, "step": 7536 }, { "epoch": 2.5723549488054607, "grad_norm": 2.6767444610595703, "learning_rate": 0.00014254835039817976, "loss": 5.8553, "step": 7537 }, { "epoch": 2.5726962457337885, "grad_norm": 2.643503189086914, "learning_rate": 0.0001424345847554039, "loss": 6.1327, "step": 7538 }, { "epoch": 2.573037542662116, "grad_norm": 2.6985726356506348, "learning_rate": 0.000142320819112628, "loss": 5.551, "step": 7539 }, { "epoch": 2.573378839590444, "grad_norm": 2.657045364379883, "learning_rate": 0.0001422070534698521, "loss": 5.9295, "step": 7540 }, { "epoch": 2.573720136518771, "grad_norm": 2.6590969562530518, "learning_rate": 0.00014209328782707623, "loss": 5.922, "step": 7541 }, { "epoch": 2.574061433447099, "grad_norm": 2.6547012329101562, "learning_rate": 0.00014197952218430036, "loss": 6.1562, "step": 7542 }, { "epoch": 2.574402730375427, "grad_norm": 2.6703603267669678, "learning_rate": 0.00014186575654152444, "loss": 5.9321, "step": 7543 }, { "epoch": 2.5747440273037543, "grad_norm": 2.9982120990753174, "learning_rate": 0.00014175199089874857, "loss": 5.0249, "step": 7544 }, { "epoch": 2.5750853242320817, "grad_norm": 2.565819263458252, "learning_rate": 0.0001416382252559727, "loss": 5.3644, "step": 7545 }, { "epoch": 2.5754266211604095, "grad_norm": 2.6522724628448486, "learning_rate": 0.0001415244596131968, "loss": 6.1545, "step": 7546 }, { "epoch": 2.5757679180887374, "grad_norm": 2.639831781387329, "learning_rate": 0.00014141069397042094, "loss": 5.8925, "step": 7547 }, { "epoch": 2.576109215017065, "grad_norm": 2.7260892391204834, "learning_rate": 0.00014129692832764505, "loss": 6.6641, "step": 7548 }, { "epoch": 2.5764505119453927, "grad_norm": 2.7644131183624268, "learning_rate": 0.00014118316268486918, "loss": 5.8881, "step": 7549 }, { "epoch": 2.57679180887372, "grad_norm": 2.671483039855957, "learning_rate": 0.00014106939704209328, "loss": 6.047, "step": 7550 }, { "epoch": 2.577133105802048, "grad_norm": 2.5989768505096436, "learning_rate": 0.00014095563139931741, "loss": 6.5126, "step": 7551 }, { "epoch": 2.5774744027303753, "grad_norm": 2.6520800590515137, "learning_rate": 0.00014084186575654155, "loss": 5.6839, "step": 7552 }, { "epoch": 2.577815699658703, "grad_norm": 2.7389848232269287, "learning_rate": 0.00014072810011376562, "loss": 6.3082, "step": 7553 }, { "epoch": 2.5781569965870306, "grad_norm": 2.6571834087371826, "learning_rate": 0.00014061433447098976, "loss": 2.0284, "step": 7554 }, { "epoch": 2.5784982935153584, "grad_norm": 2.616753339767456, "learning_rate": 0.0001405005688282139, "loss": 5.1408, "step": 7555 }, { "epoch": 2.5788395904436863, "grad_norm": 2.6577699184417725, "learning_rate": 0.000140386803185438, "loss": 6.3211, "step": 7556 }, { "epoch": 2.5791808873720137, "grad_norm": 2.816687822341919, "learning_rate": 0.00014027303754266213, "loss": 6.3943, "step": 7557 }, { "epoch": 2.579522184300341, "grad_norm": 2.7218778133392334, "learning_rate": 0.00014015927189988623, "loss": 6.1001, "step": 7558 }, { "epoch": 2.579863481228669, "grad_norm": 2.7211270332336426, "learning_rate": 0.00014004550625711036, "loss": 5.817, "step": 7559 }, { "epoch": 2.580204778156997, "grad_norm": 2.6363632678985596, "learning_rate": 0.00013993174061433447, "loss": 6.3371, "step": 7560 }, { "epoch": 2.580546075085324, "grad_norm": 2.642151355743408, "learning_rate": 0.0001398179749715586, "loss": 6.1385, "step": 7561 }, { "epoch": 2.580887372013652, "grad_norm": 3.0516741275787354, "learning_rate": 0.00013970420932878273, "loss": 5.0531, "step": 7562 }, { "epoch": 2.5812286689419794, "grad_norm": 2.72440505027771, "learning_rate": 0.0001395904436860068, "loss": 6.3252, "step": 7563 }, { "epoch": 2.5815699658703073, "grad_norm": 2.5998916625976562, "learning_rate": 0.00013947667804323094, "loss": 5.7731, "step": 7564 }, { "epoch": 2.5819112627986347, "grad_norm": 2.963616371154785, "learning_rate": 0.00013936291240045507, "loss": 4.7545, "step": 7565 }, { "epoch": 2.5822525597269625, "grad_norm": 2.8518779277801514, "learning_rate": 0.00013924914675767918, "loss": 5.6993, "step": 7566 }, { "epoch": 2.58259385665529, "grad_norm": 2.6663856506347656, "learning_rate": 0.0001391353811149033, "loss": 4.3027, "step": 7567 }, { "epoch": 2.582935153583618, "grad_norm": 2.5740647315979004, "learning_rate": 0.00013902161547212741, "loss": 6.0794, "step": 7568 }, { "epoch": 2.5832764505119457, "grad_norm": 2.7939860820770264, "learning_rate": 0.00013890784982935155, "loss": 5.3234, "step": 7569 }, { "epoch": 2.583617747440273, "grad_norm": 2.7503066062927246, "learning_rate": 0.00013879408418657565, "loss": 6.3546, "step": 7570 }, { "epoch": 2.5839590443686005, "grad_norm": 2.6611952781677246, "learning_rate": 0.00013868031854379978, "loss": 5.9687, "step": 7571 }, { "epoch": 2.5843003412969283, "grad_norm": 2.741959810256958, "learning_rate": 0.00013856655290102392, "loss": 5.5272, "step": 7572 }, { "epoch": 2.584641638225256, "grad_norm": 2.7685506343841553, "learning_rate": 0.000138452787258248, "loss": 5.9441, "step": 7573 }, { "epoch": 2.5849829351535836, "grad_norm": 2.6498281955718994, "learning_rate": 0.00013833902161547213, "loss": 5.6183, "step": 7574 }, { "epoch": 2.5853242320819114, "grad_norm": 2.6913111209869385, "learning_rate": 0.00013822525597269626, "loss": 6.2333, "step": 7575 }, { "epoch": 2.585665529010239, "grad_norm": 2.6091015338897705, "learning_rate": 0.00013811149032992036, "loss": 6.4434, "step": 7576 }, { "epoch": 2.5860068259385667, "grad_norm": 2.665632486343384, "learning_rate": 0.00013799772468714447, "loss": 6.0747, "step": 7577 }, { "epoch": 2.586348122866894, "grad_norm": 2.7666921615600586, "learning_rate": 0.0001378839590443686, "loss": 6.2239, "step": 7578 }, { "epoch": 2.586689419795222, "grad_norm": 2.6611504554748535, "learning_rate": 0.00013777019340159273, "loss": 6.6124, "step": 7579 }, { "epoch": 2.5870307167235493, "grad_norm": 2.631537437438965, "learning_rate": 0.00013765642775881684, "loss": 6.2377, "step": 7580 }, { "epoch": 2.587372013651877, "grad_norm": 2.7600057125091553, "learning_rate": 0.00013754266211604097, "loss": 5.9184, "step": 7581 }, { "epoch": 2.587713310580205, "grad_norm": 2.6380791664123535, "learning_rate": 0.00013742889647326507, "loss": 6.3217, "step": 7582 }, { "epoch": 2.5880546075085324, "grad_norm": 2.6468701362609863, "learning_rate": 0.00013731513083048918, "loss": 5.9776, "step": 7583 }, { "epoch": 2.58839590443686, "grad_norm": 2.6581053733825684, "learning_rate": 0.0001372013651877133, "loss": 6.3691, "step": 7584 }, { "epoch": 2.5887372013651877, "grad_norm": 2.5985143184661865, "learning_rate": 0.00013708759954493744, "loss": 6.0739, "step": 7585 }, { "epoch": 2.5890784982935156, "grad_norm": 2.678396701812744, "learning_rate": 0.00013697383390216155, "loss": 6.2776, "step": 7586 }, { "epoch": 2.589419795221843, "grad_norm": 2.7191009521484375, "learning_rate": 0.00013686006825938565, "loss": 6.1219, "step": 7587 }, { "epoch": 2.589761092150171, "grad_norm": 2.738563299179077, "learning_rate": 0.00013674630261660978, "loss": 6.3903, "step": 7588 }, { "epoch": 2.590102389078498, "grad_norm": 2.5974106788635254, "learning_rate": 0.00013663253697383392, "loss": 6.0863, "step": 7589 }, { "epoch": 2.590443686006826, "grad_norm": 2.727854013442993, "learning_rate": 0.00013651877133105802, "loss": 5.466, "step": 7590 }, { "epoch": 2.5907849829351535, "grad_norm": 2.6561155319213867, "learning_rate": 0.00013640500568828215, "loss": 5.8661, "step": 7591 }, { "epoch": 2.5911262798634813, "grad_norm": 2.671494483947754, "learning_rate": 0.00013629124004550626, "loss": 5.6121, "step": 7592 }, { "epoch": 2.5914675767918087, "grad_norm": 2.713571786880493, "learning_rate": 0.00013617747440273036, "loss": 6.0892, "step": 7593 }, { "epoch": 2.5918088737201366, "grad_norm": 2.6231658458709717, "learning_rate": 0.0001360637087599545, "loss": 5.5849, "step": 7594 }, { "epoch": 2.5921501706484644, "grad_norm": 4.061588287353516, "learning_rate": 0.00013594994311717863, "loss": 4.2174, "step": 7595 }, { "epoch": 2.592491467576792, "grad_norm": 2.6725544929504395, "learning_rate": 0.00013583617747440273, "loss": 5.5262, "step": 7596 }, { "epoch": 2.5928327645051192, "grad_norm": 3.150416612625122, "learning_rate": 0.00013572241183162684, "loss": 4.9608, "step": 7597 }, { "epoch": 2.593174061433447, "grad_norm": 2.6914148330688477, "learning_rate": 0.00013560864618885097, "loss": 6.0067, "step": 7598 }, { "epoch": 2.593515358361775, "grad_norm": 2.676713705062866, "learning_rate": 0.0001354948805460751, "loss": 6.4768, "step": 7599 }, { "epoch": 2.5938566552901023, "grad_norm": 2.7440037727355957, "learning_rate": 0.0001353811149032992, "loss": 5.4661, "step": 7600 }, { "epoch": 2.59419795221843, "grad_norm": 2.6399929523468018, "learning_rate": 0.00013526734926052334, "loss": 6.3171, "step": 7601 }, { "epoch": 2.5945392491467576, "grad_norm": 2.746882438659668, "learning_rate": 0.00013515358361774744, "loss": 6.0754, "step": 7602 }, { "epoch": 2.5948805460750854, "grad_norm": 2.638324022293091, "learning_rate": 0.00013503981797497155, "loss": 5.8914, "step": 7603 }, { "epoch": 2.595221843003413, "grad_norm": 2.618677854537964, "learning_rate": 0.00013492605233219568, "loss": 6.0271, "step": 7604 }, { "epoch": 2.5955631399317407, "grad_norm": 2.606271743774414, "learning_rate": 0.0001348122866894198, "loss": 4.9815, "step": 7605 }, { "epoch": 2.595904436860068, "grad_norm": 3.786407470703125, "learning_rate": 0.00013469852104664392, "loss": 4.8562, "step": 7606 }, { "epoch": 2.596245733788396, "grad_norm": 3.0423500537872314, "learning_rate": 0.00013458475540386802, "loss": 4.8359, "step": 7607 }, { "epoch": 2.596587030716724, "grad_norm": 3.009005308151245, "learning_rate": 0.00013447098976109215, "loss": 4.6837, "step": 7608 }, { "epoch": 2.596928327645051, "grad_norm": 2.638024091720581, "learning_rate": 0.0001343572241183163, "loss": 5.9415, "step": 7609 }, { "epoch": 2.5972696245733786, "grad_norm": 2.710456371307373, "learning_rate": 0.0001342434584755404, "loss": 5.729, "step": 7610 }, { "epoch": 2.5976109215017065, "grad_norm": 2.8217124938964844, "learning_rate": 0.00013412969283276452, "loss": 5.2043, "step": 7611 }, { "epoch": 2.5979522184300343, "grad_norm": 2.6994223594665527, "learning_rate": 0.00013401592718998863, "loss": 5.7662, "step": 7612 }, { "epoch": 2.5982935153583617, "grad_norm": 2.637618064880371, "learning_rate": 0.00013390216154721273, "loss": 5.9881, "step": 7613 }, { "epoch": 2.5986348122866896, "grad_norm": 2.62662935256958, "learning_rate": 0.00013378839590443687, "loss": 5.9036, "step": 7614 }, { "epoch": 2.598976109215017, "grad_norm": 2.6811110973358154, "learning_rate": 0.000133674630261661, "loss": 5.7672, "step": 7615 }, { "epoch": 2.599317406143345, "grad_norm": 2.62439227104187, "learning_rate": 0.00013356086461888508, "loss": 6.594, "step": 7616 }, { "epoch": 2.5996587030716722, "grad_norm": 2.640967607498169, "learning_rate": 0.0001334470989761092, "loss": 6.0876, "step": 7617 }, { "epoch": 2.6, "grad_norm": 2.6701509952545166, "learning_rate": 0.00013333333333333334, "loss": 6.2532, "step": 7618 }, { "epoch": 2.6003412969283275, "grad_norm": 2.628978967666626, "learning_rate": 0.00013321956769055747, "loss": 6.0009, "step": 7619 }, { "epoch": 2.6006825938566553, "grad_norm": 3.894216775894165, "learning_rate": 0.00013310580204778158, "loss": 5.3864, "step": 7620 }, { "epoch": 2.601023890784983, "grad_norm": 2.637104034423828, "learning_rate": 0.00013299203640500568, "loss": 6.1776, "step": 7621 }, { "epoch": 2.6013651877133106, "grad_norm": 2.7615833282470703, "learning_rate": 0.00013287827076222981, "loss": 6.4661, "step": 7622 }, { "epoch": 2.601706484641638, "grad_norm": 3.8650755882263184, "learning_rate": 0.00013276450511945392, "loss": 5.7443, "step": 7623 }, { "epoch": 2.602047781569966, "grad_norm": 2.71655011177063, "learning_rate": 0.00013265073947667805, "loss": 6.3148, "step": 7624 }, { "epoch": 2.6023890784982937, "grad_norm": 2.681018114089966, "learning_rate": 0.00013253697383390218, "loss": 6.3714, "step": 7625 }, { "epoch": 2.602730375426621, "grad_norm": 2.730727195739746, "learning_rate": 0.00013242320819112626, "loss": 6.1858, "step": 7626 }, { "epoch": 2.603071672354949, "grad_norm": 2.642637014389038, "learning_rate": 0.0001323094425483504, "loss": 5.5325, "step": 7627 }, { "epoch": 2.6034129692832764, "grad_norm": 2.673652172088623, "learning_rate": 0.00013219567690557452, "loss": 6.3912, "step": 7628 }, { "epoch": 2.603754266211604, "grad_norm": 2.6778078079223633, "learning_rate": 0.00013208191126279863, "loss": 6.2991, "step": 7629 }, { "epoch": 2.6040955631399316, "grad_norm": 2.6532318592071533, "learning_rate": 0.00013196814562002276, "loss": 5.6819, "step": 7630 }, { "epoch": 2.6044368600682595, "grad_norm": 3.36240291595459, "learning_rate": 0.00013185437997724687, "loss": 4.7713, "step": 7631 }, { "epoch": 2.604778156996587, "grad_norm": 2.656712532043457, "learning_rate": 0.000131740614334471, "loss": 5.4265, "step": 7632 }, { "epoch": 2.6051194539249147, "grad_norm": 2.6337647438049316, "learning_rate": 0.0001316268486916951, "loss": 6.0607, "step": 7633 }, { "epoch": 2.6054607508532426, "grad_norm": 2.639120578765869, "learning_rate": 0.00013151308304891924, "loss": 5.8628, "step": 7634 }, { "epoch": 2.60580204778157, "grad_norm": 2.6694211959838867, "learning_rate": 0.00013139931740614337, "loss": 6.3282, "step": 7635 }, { "epoch": 2.6061433447098974, "grad_norm": 2.556114435195923, "learning_rate": 0.00013128555176336745, "loss": 5.6204, "step": 7636 }, { "epoch": 2.6064846416382252, "grad_norm": 2.6191563606262207, "learning_rate": 0.00013117178612059158, "loss": 4.6587, "step": 7637 }, { "epoch": 2.606825938566553, "grad_norm": 2.7384796142578125, "learning_rate": 0.0001310580204778157, "loss": 5.1479, "step": 7638 }, { "epoch": 2.6071672354948805, "grad_norm": 2.6779263019561768, "learning_rate": 0.00013094425483503981, "loss": 5.5593, "step": 7639 }, { "epoch": 2.6075085324232083, "grad_norm": 2.643181324005127, "learning_rate": 0.00013083048919226395, "loss": 6.0678, "step": 7640 }, { "epoch": 2.6078498293515358, "grad_norm": 2.6609034538269043, "learning_rate": 0.00013071672354948805, "loss": 5.7646, "step": 7641 }, { "epoch": 2.6081911262798636, "grad_norm": 2.6710946559906006, "learning_rate": 0.00013060295790671218, "loss": 5.7591, "step": 7642 }, { "epoch": 2.608532423208191, "grad_norm": 2.7074642181396484, "learning_rate": 0.0001304891922639363, "loss": 5.8659, "step": 7643 }, { "epoch": 2.608873720136519, "grad_norm": 2.806108236312866, "learning_rate": 0.00013037542662116042, "loss": 5.3068, "step": 7644 }, { "epoch": 2.6092150170648463, "grad_norm": 2.8876841068267822, "learning_rate": 0.00013026166097838455, "loss": 4.9614, "step": 7645 }, { "epoch": 2.609556313993174, "grad_norm": 2.6452441215515137, "learning_rate": 0.00013014789533560863, "loss": 4.6235, "step": 7646 }, { "epoch": 2.609897610921502, "grad_norm": 2.64217209815979, "learning_rate": 0.00013003412969283276, "loss": 5.6776, "step": 7647 }, { "epoch": 2.6102389078498294, "grad_norm": 2.6999876499176025, "learning_rate": 0.0001299203640500569, "loss": 5.8503, "step": 7648 }, { "epoch": 2.6105802047781568, "grad_norm": 2.6543946266174316, "learning_rate": 0.000129806598407281, "loss": 6.0889, "step": 7649 }, { "epoch": 2.6109215017064846, "grad_norm": 2.6520276069641113, "learning_rate": 0.0001296928327645051, "loss": 5.8006, "step": 7650 }, { "epoch": 2.6112627986348125, "grad_norm": 2.520744562149048, "learning_rate": 0.00012957906712172924, "loss": 6.255, "step": 7651 }, { "epoch": 2.61160409556314, "grad_norm": 2.6043193340301514, "learning_rate": 0.00012946530147895337, "loss": 5.8413, "step": 7652 }, { "epoch": 2.6119453924914677, "grad_norm": 2.659938335418701, "learning_rate": 0.00012935153583617747, "loss": 5.6511, "step": 7653 }, { "epoch": 2.612286689419795, "grad_norm": 2.6185102462768555, "learning_rate": 0.0001292377701934016, "loss": 6.55, "step": 7654 }, { "epoch": 2.612627986348123, "grad_norm": 2.686222553253174, "learning_rate": 0.0001291240045506257, "loss": 5.7808, "step": 7655 }, { "epoch": 2.6129692832764504, "grad_norm": 2.712735414505005, "learning_rate": 0.00012901023890784982, "loss": 5.6224, "step": 7656 }, { "epoch": 2.6133105802047782, "grad_norm": 2.8246679306030273, "learning_rate": 0.00012889647326507395, "loss": 4.6806, "step": 7657 }, { "epoch": 2.6136518771331056, "grad_norm": 2.6839938163757324, "learning_rate": 0.00012878270762229808, "loss": 5.9373, "step": 7658 }, { "epoch": 2.6139931740614335, "grad_norm": 2.625178575515747, "learning_rate": 0.00012866894197952218, "loss": 6.0065, "step": 7659 }, { "epoch": 2.6143344709897613, "grad_norm": 2.6909379959106445, "learning_rate": 0.0001285551763367463, "loss": 6.3687, "step": 7660 }, { "epoch": 2.6146757679180888, "grad_norm": 2.5886874198913574, "learning_rate": 0.00012844141069397042, "loss": 5.5438, "step": 7661 }, { "epoch": 2.615017064846416, "grad_norm": 2.6835293769836426, "learning_rate": 0.00012832764505119455, "loss": 6.2562, "step": 7662 }, { "epoch": 2.615358361774744, "grad_norm": 2.7177085876464844, "learning_rate": 0.00012821387940841866, "loss": 6.1722, "step": 7663 }, { "epoch": 2.615699658703072, "grad_norm": 2.651545763015747, "learning_rate": 0.0001281001137656428, "loss": 6.2457, "step": 7664 }, { "epoch": 2.6160409556313993, "grad_norm": 2.6499643325805664, "learning_rate": 0.0001279863481228669, "loss": 5.9292, "step": 7665 }, { "epoch": 2.616382252559727, "grad_norm": 2.6566162109375, "learning_rate": 0.000127872582480091, "loss": 5.9867, "step": 7666 }, { "epoch": 2.6167235494880545, "grad_norm": 2.6786088943481445, "learning_rate": 0.00012775881683731513, "loss": 6.447, "step": 7667 }, { "epoch": 2.6170648464163824, "grad_norm": 2.659654140472412, "learning_rate": 0.00012764505119453926, "loss": 5.8441, "step": 7668 }, { "epoch": 2.61740614334471, "grad_norm": 2.6184911727905273, "learning_rate": 0.00012753128555176337, "loss": 5.8193, "step": 7669 }, { "epoch": 2.6177474402730376, "grad_norm": 2.677710771560669, "learning_rate": 0.00012741751990898747, "loss": 6.0518, "step": 7670 }, { "epoch": 2.618088737201365, "grad_norm": 2.705430030822754, "learning_rate": 0.0001273037542662116, "loss": 6.224, "step": 7671 }, { "epoch": 2.618430034129693, "grad_norm": 2.7365448474884033, "learning_rate": 0.00012718998862343574, "loss": 6.3756, "step": 7672 }, { "epoch": 2.6187713310580207, "grad_norm": 2.6876437664031982, "learning_rate": 0.00012707622298065984, "loss": 6.07, "step": 7673 }, { "epoch": 2.619112627986348, "grad_norm": 2.6189560890197754, "learning_rate": 0.00012696245733788398, "loss": 6.1245, "step": 7674 }, { "epoch": 2.6194539249146755, "grad_norm": 3.7583062648773193, "learning_rate": 0.00012684869169510808, "loss": 5.6039, "step": 7675 }, { "epoch": 2.6197952218430034, "grad_norm": 2.7220373153686523, "learning_rate": 0.00012673492605233219, "loss": 6.0232, "step": 7676 }, { "epoch": 2.6201365187713312, "grad_norm": 2.691070556640625, "learning_rate": 0.00012662116040955632, "loss": 6.2148, "step": 7677 }, { "epoch": 2.6204778156996587, "grad_norm": 2.6679129600524902, "learning_rate": 0.00012650739476678045, "loss": 6.1872, "step": 7678 }, { "epoch": 2.6208191126279865, "grad_norm": 2.7614474296569824, "learning_rate": 0.00012639362912400455, "loss": 5.669, "step": 7679 }, { "epoch": 2.621160409556314, "grad_norm": 2.7429089546203613, "learning_rate": 0.00012627986348122866, "loss": 5.5727, "step": 7680 }, { "epoch": 2.6215017064846418, "grad_norm": 2.684476613998413, "learning_rate": 0.0001261660978384528, "loss": 5.7475, "step": 7681 }, { "epoch": 2.621843003412969, "grad_norm": 2.627525806427002, "learning_rate": 0.00012605233219567692, "loss": 6.2481, "step": 7682 }, { "epoch": 2.622184300341297, "grad_norm": 2.691311836242676, "learning_rate": 0.00012593856655290103, "loss": 6.0962, "step": 7683 }, { "epoch": 2.6225255972696244, "grad_norm": 2.6626622676849365, "learning_rate": 0.00012582480091012513, "loss": 6.2863, "step": 7684 }, { "epoch": 2.6228668941979523, "grad_norm": 2.6201605796813965, "learning_rate": 0.00012571103526734927, "loss": 5.6584, "step": 7685 }, { "epoch": 2.62320819112628, "grad_norm": 2.704043388366699, "learning_rate": 0.00012559726962457337, "loss": 6.1213, "step": 7686 }, { "epoch": 2.6235494880546075, "grad_norm": 2.5595293045043945, "learning_rate": 0.0001254835039817975, "loss": 6.3741, "step": 7687 }, { "epoch": 2.623890784982935, "grad_norm": 5.192468643188477, "learning_rate": 0.00012536973833902163, "loss": 4.9172, "step": 7688 }, { "epoch": 2.624232081911263, "grad_norm": 2.794823169708252, "learning_rate": 0.0001252559726962457, "loss": 5.9293, "step": 7689 }, { "epoch": 2.6245733788395906, "grad_norm": 2.6778604984283447, "learning_rate": 0.00012514220705346984, "loss": 6.2208, "step": 7690 }, { "epoch": 2.624914675767918, "grad_norm": 2.6402392387390137, "learning_rate": 0.00012502844141069398, "loss": 5.5919, "step": 7691 }, { "epoch": 2.625255972696246, "grad_norm": 2.6135995388031006, "learning_rate": 0.0001249146757679181, "loss": 6.367, "step": 7692 }, { "epoch": 2.6255972696245733, "grad_norm": 2.745082378387451, "learning_rate": 0.0001248009101251422, "loss": 5.8043, "step": 7693 }, { "epoch": 2.625938566552901, "grad_norm": 2.596097946166992, "learning_rate": 0.00012468714448236632, "loss": 5.4991, "step": 7694 }, { "epoch": 2.6262798634812285, "grad_norm": 2.6917331218719482, "learning_rate": 0.00012457337883959045, "loss": 6.1673, "step": 7695 }, { "epoch": 2.6266211604095564, "grad_norm": 2.624852180480957, "learning_rate": 0.00012445961319681456, "loss": 6.381, "step": 7696 }, { "epoch": 2.626962457337884, "grad_norm": 2.734668493270874, "learning_rate": 0.0001243458475540387, "loss": 5.8872, "step": 7697 }, { "epoch": 2.6273037542662117, "grad_norm": 2.7033309936523438, "learning_rate": 0.0001242320819112628, "loss": 5.7082, "step": 7698 }, { "epoch": 2.6276450511945395, "grad_norm": 2.640166759490967, "learning_rate": 0.00012411831626848692, "loss": 5.962, "step": 7699 }, { "epoch": 2.627986348122867, "grad_norm": 2.584916591644287, "learning_rate": 0.00012400455062571103, "loss": 6.1359, "step": 7700 }, { "epoch": 2.6283276450511943, "grad_norm": 2.798306465148926, "learning_rate": 0.00012389078498293516, "loss": 6.2166, "step": 7701 }, { "epoch": 2.628668941979522, "grad_norm": 2.539496660232544, "learning_rate": 0.0001237770193401593, "loss": 5.7527, "step": 7702 }, { "epoch": 2.62901023890785, "grad_norm": 2.606154680252075, "learning_rate": 0.0001236632536973834, "loss": 6.1712, "step": 7703 }, { "epoch": 2.6293515358361774, "grad_norm": 4.360844612121582, "learning_rate": 0.0001235494880546075, "loss": 5.0891, "step": 7704 }, { "epoch": 2.6296928327645053, "grad_norm": 2.640214443206787, "learning_rate": 0.00012343572241183164, "loss": 5.8549, "step": 7705 }, { "epoch": 2.6300341296928327, "grad_norm": 2.8788836002349854, "learning_rate": 0.00012332195676905574, "loss": 5.118, "step": 7706 }, { "epoch": 2.6303754266211605, "grad_norm": 2.637471914291382, "learning_rate": 0.00012320819112627987, "loss": 6.1626, "step": 7707 }, { "epoch": 2.630716723549488, "grad_norm": 2.6709108352661133, "learning_rate": 0.00012309442548350398, "loss": 5.7018, "step": 7708 }, { "epoch": 2.631058020477816, "grad_norm": 2.575793504714966, "learning_rate": 0.0001229806598407281, "loss": 5.8412, "step": 7709 }, { "epoch": 2.631399317406143, "grad_norm": 2.7106573581695557, "learning_rate": 0.00012286689419795221, "loss": 5.151, "step": 7710 }, { "epoch": 2.631740614334471, "grad_norm": 2.6999032497406006, "learning_rate": 0.00012275312855517635, "loss": 6.1057, "step": 7711 }, { "epoch": 2.632081911262799, "grad_norm": 2.700033664703369, "learning_rate": 0.00012263936291240045, "loss": 6.1486, "step": 7712 }, { "epoch": 2.6324232081911263, "grad_norm": 2.705191135406494, "learning_rate": 0.00012252559726962458, "loss": 5.427, "step": 7713 }, { "epoch": 2.6327645051194537, "grad_norm": 2.5965819358825684, "learning_rate": 0.0001224118316268487, "loss": 6.0963, "step": 7714 }, { "epoch": 2.6331058020477816, "grad_norm": 2.7236809730529785, "learning_rate": 0.00012229806598407282, "loss": 5.8642, "step": 7715 }, { "epoch": 2.6334470989761094, "grad_norm": 2.6513259410858154, "learning_rate": 0.00012218430034129693, "loss": 6.547, "step": 7716 }, { "epoch": 2.633788395904437, "grad_norm": 2.6121363639831543, "learning_rate": 0.00012207053469852103, "loss": 5.9717, "step": 7717 }, { "epoch": 2.6341296928327647, "grad_norm": 2.756356716156006, "learning_rate": 0.00012195676905574516, "loss": 6.0155, "step": 7718 }, { "epoch": 2.634470989761092, "grad_norm": 2.6240437030792236, "learning_rate": 0.0001218430034129693, "loss": 5.6416, "step": 7719 }, { "epoch": 2.63481228668942, "grad_norm": 2.6901957988739014, "learning_rate": 0.0001217292377701934, "loss": 6.0908, "step": 7720 }, { "epoch": 2.6351535836177473, "grad_norm": 2.5926854610443115, "learning_rate": 0.00012161547212741753, "loss": 5.8315, "step": 7721 }, { "epoch": 2.635494880546075, "grad_norm": 2.6455578804016113, "learning_rate": 0.00012150170648464164, "loss": 6.1936, "step": 7722 }, { "epoch": 2.6358361774744026, "grad_norm": 2.6685874462127686, "learning_rate": 0.00012138794084186575, "loss": 6.2457, "step": 7723 }, { "epoch": 2.6361774744027304, "grad_norm": 2.5576179027557373, "learning_rate": 0.00012127417519908989, "loss": 6.1212, "step": 7724 }, { "epoch": 2.6365187713310583, "grad_norm": 2.6035420894622803, "learning_rate": 0.00012116040955631399, "loss": 6.173, "step": 7725 }, { "epoch": 2.6368600682593857, "grad_norm": 2.7565255165100098, "learning_rate": 0.00012104664391353812, "loss": 5.909, "step": 7726 }, { "epoch": 2.637201365187713, "grad_norm": 2.3989455699920654, "learning_rate": 0.00012093287827076223, "loss": 5.1077, "step": 7727 }, { "epoch": 2.637542662116041, "grad_norm": 2.7297885417938232, "learning_rate": 0.00012081911262798635, "loss": 5.1734, "step": 7728 }, { "epoch": 2.637883959044369, "grad_norm": 2.634373664855957, "learning_rate": 0.00012070534698521047, "loss": 5.418, "step": 7729 }, { "epoch": 2.638225255972696, "grad_norm": 2.557570457458496, "learning_rate": 0.00012059158134243458, "loss": 6.1528, "step": 7730 }, { "epoch": 2.638566552901024, "grad_norm": 2.647575855255127, "learning_rate": 0.00012047781569965872, "loss": 6.0692, "step": 7731 }, { "epoch": 2.6389078498293514, "grad_norm": 2.5773942470550537, "learning_rate": 0.00012036405005688282, "loss": 5.6637, "step": 7732 }, { "epoch": 2.6392491467576793, "grad_norm": 2.692434549331665, "learning_rate": 0.00012025028441410694, "loss": 6.029, "step": 7733 }, { "epoch": 2.6395904436860067, "grad_norm": 2.6368980407714844, "learning_rate": 0.00012013651877133106, "loss": 6.3336, "step": 7734 }, { "epoch": 2.6399317406143346, "grad_norm": 2.6980204582214355, "learning_rate": 0.00012002275312855518, "loss": 6.1817, "step": 7735 }, { "epoch": 2.640273037542662, "grad_norm": 2.6697869300842285, "learning_rate": 0.00011990898748577931, "loss": 6.2257, "step": 7736 }, { "epoch": 2.64061433447099, "grad_norm": 2.5750112533569336, "learning_rate": 0.00011979522184300341, "loss": 5.8854, "step": 7737 }, { "epoch": 2.6409556313993177, "grad_norm": 2.6087377071380615, "learning_rate": 0.00011968145620022753, "loss": 5.9283, "step": 7738 }, { "epoch": 2.641296928327645, "grad_norm": 2.607609272003174, "learning_rate": 0.00011956769055745165, "loss": 6.1513, "step": 7739 }, { "epoch": 2.6416382252559725, "grad_norm": 2.6486034393310547, "learning_rate": 0.00011945392491467577, "loss": 5.6541, "step": 7740 }, { "epoch": 2.6419795221843003, "grad_norm": 2.6967036724090576, "learning_rate": 0.0001193401592718999, "loss": 6.3013, "step": 7741 }, { "epoch": 2.642320819112628, "grad_norm": 2.7555899620056152, "learning_rate": 0.000119226393629124, "loss": 5.9503, "step": 7742 }, { "epoch": 2.6426621160409556, "grad_norm": 2.6998205184936523, "learning_rate": 0.00011911262798634812, "loss": 5.8985, "step": 7743 }, { "epoch": 2.6430034129692834, "grad_norm": 2.662689208984375, "learning_rate": 0.00011899886234357224, "loss": 6.3067, "step": 7744 }, { "epoch": 2.643344709897611, "grad_norm": 2.587956190109253, "learning_rate": 0.00011888509670079636, "loss": 6.137, "step": 7745 }, { "epoch": 2.6436860068259387, "grad_norm": 2.9233665466308594, "learning_rate": 0.00011877133105802048, "loss": 5.8408, "step": 7746 }, { "epoch": 2.644027303754266, "grad_norm": 2.765820264816284, "learning_rate": 0.0001186575654152446, "loss": 6.4021, "step": 7747 }, { "epoch": 2.644368600682594, "grad_norm": 2.7021546363830566, "learning_rate": 0.00011854379977246872, "loss": 6.3432, "step": 7748 }, { "epoch": 2.6447098976109213, "grad_norm": 2.628324031829834, "learning_rate": 0.00011843003412969284, "loss": 5.5596, "step": 7749 }, { "epoch": 2.645051194539249, "grad_norm": 2.6931304931640625, "learning_rate": 0.00011831626848691695, "loss": 6.4549, "step": 7750 }, { "epoch": 2.645392491467577, "grad_norm": 2.3119070529937744, "learning_rate": 0.00011820250284414107, "loss": 4.2596, "step": 7751 }, { "epoch": 2.6457337883959045, "grad_norm": 2.604924440383911, "learning_rate": 0.00011808873720136519, "loss": 5.8979, "step": 7752 }, { "epoch": 2.646075085324232, "grad_norm": 2.697232723236084, "learning_rate": 0.00011797497155858931, "loss": 5.1108, "step": 7753 }, { "epoch": 2.6464163822525597, "grad_norm": 2.6660239696502686, "learning_rate": 0.00011786120591581343, "loss": 6.2313, "step": 7754 }, { "epoch": 2.6467576791808876, "grad_norm": 2.5658226013183594, "learning_rate": 0.00011774744027303755, "loss": 5.9833, "step": 7755 }, { "epoch": 2.647098976109215, "grad_norm": 2.658269166946411, "learning_rate": 0.00011763367463026167, "loss": 5.9185, "step": 7756 }, { "epoch": 2.647440273037543, "grad_norm": 2.5440711975097656, "learning_rate": 0.00011751990898748578, "loss": 5.47, "step": 7757 }, { "epoch": 2.64778156996587, "grad_norm": 2.6822307109832764, "learning_rate": 0.0001174061433447099, "loss": 6.2749, "step": 7758 }, { "epoch": 2.648122866894198, "grad_norm": 2.758451461791992, "learning_rate": 0.00011729237770193402, "loss": 6.2035, "step": 7759 }, { "epoch": 2.6484641638225255, "grad_norm": 2.591820240020752, "learning_rate": 0.00011717861205915814, "loss": 6.4704, "step": 7760 }, { "epoch": 2.6488054607508533, "grad_norm": 2.8156046867370605, "learning_rate": 0.00011706484641638224, "loss": 6.4334, "step": 7761 }, { "epoch": 2.6491467576791807, "grad_norm": 2.6117329597473145, "learning_rate": 0.00011695108077360638, "loss": 6.3525, "step": 7762 }, { "epoch": 2.6494880546075086, "grad_norm": 2.6518020629882812, "learning_rate": 0.00011683731513083048, "loss": 5.9983, "step": 7763 }, { "epoch": 2.6498293515358364, "grad_norm": 2.606523275375366, "learning_rate": 0.00011672354948805461, "loss": 5.6909, "step": 7764 }, { "epoch": 2.650170648464164, "grad_norm": 2.7865447998046875, "learning_rate": 0.00011660978384527873, "loss": 3.9492, "step": 7765 }, { "epoch": 2.6505119453924912, "grad_norm": 2.6306912899017334, "learning_rate": 0.00011649601820250284, "loss": 5.3691, "step": 7766 }, { "epoch": 2.650853242320819, "grad_norm": 2.5862772464752197, "learning_rate": 0.00011638225255972697, "loss": 6.0451, "step": 7767 }, { "epoch": 2.651194539249147, "grad_norm": 2.6710424423217773, "learning_rate": 0.00011626848691695107, "loss": 6.339, "step": 7768 }, { "epoch": 2.6515358361774743, "grad_norm": 2.731431484222412, "learning_rate": 0.0001161547212741752, "loss": 6.5248, "step": 7769 }, { "epoch": 2.651877133105802, "grad_norm": 2.6861982345581055, "learning_rate": 0.00011604095563139932, "loss": 5.8373, "step": 7770 }, { "epoch": 2.6522184300341296, "grad_norm": 2.622677803039551, "learning_rate": 0.00011592718998862343, "loss": 6.0151, "step": 7771 }, { "epoch": 2.6525597269624575, "grad_norm": 2.653522491455078, "learning_rate": 0.00011581342434584756, "loss": 6.2623, "step": 7772 }, { "epoch": 2.652901023890785, "grad_norm": 2.724036693572998, "learning_rate": 0.00011569965870307167, "loss": 5.8716, "step": 7773 }, { "epoch": 2.6532423208191127, "grad_norm": 2.6914572715759277, "learning_rate": 0.0001155858930602958, "loss": 6.0782, "step": 7774 }, { "epoch": 2.65358361774744, "grad_norm": 2.528007984161377, "learning_rate": 0.00011547212741751992, "loss": 5.6457, "step": 7775 }, { "epoch": 2.653924914675768, "grad_norm": 2.635348081588745, "learning_rate": 0.00011535836177474402, "loss": 6.2753, "step": 7776 }, { "epoch": 2.654266211604096, "grad_norm": 2.6857311725616455, "learning_rate": 0.00011524459613196815, "loss": 5.6202, "step": 7777 }, { "epoch": 2.654607508532423, "grad_norm": 2.612197160720825, "learning_rate": 0.00011513083048919226, "loss": 5.9211, "step": 7778 }, { "epoch": 2.6549488054607506, "grad_norm": 2.6178486347198486, "learning_rate": 0.00011501706484641639, "loss": 4.9022, "step": 7779 }, { "epoch": 2.6552901023890785, "grad_norm": 2.6042556762695312, "learning_rate": 0.0001149032992036405, "loss": 6.2243, "step": 7780 }, { "epoch": 2.6556313993174063, "grad_norm": 2.5869898796081543, "learning_rate": 0.00011478953356086461, "loss": 6.0258, "step": 7781 }, { "epoch": 2.6559726962457337, "grad_norm": 2.7476601600646973, "learning_rate": 0.00011467576791808875, "loss": 5.2693, "step": 7782 }, { "epoch": 2.6563139931740616, "grad_norm": 2.492094039916992, "learning_rate": 0.00011456200227531285, "loss": 6.0099, "step": 7783 }, { "epoch": 2.656655290102389, "grad_norm": 2.6337618827819824, "learning_rate": 0.00011444823663253698, "loss": 6.8608, "step": 7784 }, { "epoch": 2.656996587030717, "grad_norm": 2.559250593185425, "learning_rate": 0.00011433447098976109, "loss": 5.9022, "step": 7785 }, { "epoch": 2.6573378839590442, "grad_norm": 2.6352384090423584, "learning_rate": 0.0001142207053469852, "loss": 6.3508, "step": 7786 }, { "epoch": 2.657679180887372, "grad_norm": 2.701533317565918, "learning_rate": 0.00011410693970420934, "loss": 6.1916, "step": 7787 }, { "epoch": 2.6580204778156995, "grad_norm": 2.6848697662353516, "learning_rate": 0.00011399317406143344, "loss": 6.2393, "step": 7788 }, { "epoch": 2.6583617747440274, "grad_norm": 3.9241063594818115, "learning_rate": 0.00011387940841865758, "loss": 5.0438, "step": 7789 }, { "epoch": 2.658703071672355, "grad_norm": 2.6796340942382812, "learning_rate": 0.00011376564277588168, "loss": 5.3564, "step": 7790 }, { "epoch": 2.6590443686006826, "grad_norm": 2.6478474140167236, "learning_rate": 0.0001136518771331058, "loss": 6.3117, "step": 7791 }, { "epoch": 2.65938566552901, "grad_norm": 2.6212310791015625, "learning_rate": 0.00011353811149032993, "loss": 6.6302, "step": 7792 }, { "epoch": 2.659726962457338, "grad_norm": 2.6432223320007324, "learning_rate": 0.00011342434584755404, "loss": 6.1271, "step": 7793 }, { "epoch": 2.6600682593856657, "grad_norm": 2.6077170372009277, "learning_rate": 0.00011331058020477817, "loss": 6.1778, "step": 7794 }, { "epoch": 2.660409556313993, "grad_norm": 2.6975767612457275, "learning_rate": 0.00011319681456200227, "loss": 6.2841, "step": 7795 }, { "epoch": 2.660750853242321, "grad_norm": 2.6964032649993896, "learning_rate": 0.00011308304891922639, "loss": 5.8834, "step": 7796 }, { "epoch": 2.6610921501706484, "grad_norm": 2.594709873199463, "learning_rate": 0.00011296928327645051, "loss": 5.8787, "step": 7797 }, { "epoch": 2.6614334470989762, "grad_norm": 2.6809041500091553, "learning_rate": 0.00011285551763367463, "loss": 6.3782, "step": 7798 }, { "epoch": 2.6617747440273036, "grad_norm": 2.5971288681030273, "learning_rate": 0.00011274175199089876, "loss": 6.1397, "step": 7799 }, { "epoch": 2.6621160409556315, "grad_norm": 2.648439407348633, "learning_rate": 0.00011262798634812287, "loss": 5.7752, "step": 7800 }, { "epoch": 2.662457337883959, "grad_norm": 2.61542010307312, "learning_rate": 0.00011251422070534698, "loss": 5.6754, "step": 7801 }, { "epoch": 2.6627986348122867, "grad_norm": 2.5414392948150635, "learning_rate": 0.0001124004550625711, "loss": 5.9982, "step": 7802 }, { "epoch": 2.6631399317406146, "grad_norm": 2.6229312419891357, "learning_rate": 0.00011228668941979522, "loss": 6.584, "step": 7803 }, { "epoch": 2.663481228668942, "grad_norm": 2.5685508251190186, "learning_rate": 0.00011217292377701935, "loss": 6.1668, "step": 7804 }, { "epoch": 2.6638225255972694, "grad_norm": 2.811805486679077, "learning_rate": 0.00011205915813424346, "loss": 5.735, "step": 7805 }, { "epoch": 2.6641638225255972, "grad_norm": 2.5972208976745605, "learning_rate": 0.00011194539249146758, "loss": 6.849, "step": 7806 }, { "epoch": 2.664505119453925, "grad_norm": 2.659641742706299, "learning_rate": 0.0001118316268486917, "loss": 6.3136, "step": 7807 }, { "epoch": 2.6648464163822525, "grad_norm": 2.7042298316955566, "learning_rate": 0.00011171786120591581, "loss": 6.0122, "step": 7808 }, { "epoch": 2.6651877133105804, "grad_norm": 2.617111921310425, "learning_rate": 0.00011160409556313995, "loss": 6.1822, "step": 7809 }, { "epoch": 2.6655290102389078, "grad_norm": 2.545041084289551, "learning_rate": 0.00011149032992036405, "loss": 6.1464, "step": 7810 }, { "epoch": 2.6658703071672356, "grad_norm": 3.4677798748016357, "learning_rate": 0.00011137656427758817, "loss": 4.4972, "step": 7811 }, { "epoch": 2.666211604095563, "grad_norm": 2.6456897258758545, "learning_rate": 0.00011126279863481229, "loss": 5.4503, "step": 7812 }, { "epoch": 2.666552901023891, "grad_norm": 2.6556036472320557, "learning_rate": 0.0001111490329920364, "loss": 6.0158, "step": 7813 }, { "epoch": 2.6668941979522183, "grad_norm": 2.6038191318511963, "learning_rate": 0.00011103526734926054, "loss": 6.0356, "step": 7814 }, { "epoch": 2.667235494880546, "grad_norm": 2.673539400100708, "learning_rate": 0.00011092150170648464, "loss": 5.9474, "step": 7815 }, { "epoch": 2.667576791808874, "grad_norm": 2.661170721054077, "learning_rate": 0.00011080773606370876, "loss": 6.2451, "step": 7816 }, { "epoch": 2.6679180887372014, "grad_norm": 2.639185905456543, "learning_rate": 0.00011069397042093288, "loss": 5.3801, "step": 7817 }, { "epoch": 2.668259385665529, "grad_norm": 2.672795057296753, "learning_rate": 0.000110580204778157, "loss": 6.2753, "step": 7818 }, { "epoch": 2.6686006825938566, "grad_norm": 1.828800916671753, "learning_rate": 0.00011046643913538112, "loss": 2.4337, "step": 7819 }, { "epoch": 2.6689419795221845, "grad_norm": 2.646155834197998, "learning_rate": 0.00011035267349260524, "loss": 5.8717, "step": 7820 }, { "epoch": 2.669283276450512, "grad_norm": 2.6443183422088623, "learning_rate": 0.00011023890784982935, "loss": 6.0079, "step": 7821 }, { "epoch": 2.6696245733788397, "grad_norm": 2.595067262649536, "learning_rate": 0.00011012514220705347, "loss": 5.6408, "step": 7822 }, { "epoch": 2.669965870307167, "grad_norm": 2.655027151107788, "learning_rate": 0.00011001137656427759, "loss": 5.8955, "step": 7823 }, { "epoch": 2.670307167235495, "grad_norm": 2.6794850826263428, "learning_rate": 0.00010989761092150171, "loss": 5.8458, "step": 7824 }, { "epoch": 2.6706484641638224, "grad_norm": 2.556561231613159, "learning_rate": 0.00010978384527872583, "loss": 5.9302, "step": 7825 }, { "epoch": 2.6709897610921502, "grad_norm": 2.651296854019165, "learning_rate": 0.00010967007963594995, "loss": 6.2365, "step": 7826 }, { "epoch": 2.6713310580204777, "grad_norm": 2.6557466983795166, "learning_rate": 0.00010955631399317406, "loss": 5.845, "step": 7827 }, { "epoch": 2.6716723549488055, "grad_norm": 2.8386025428771973, "learning_rate": 0.00010944254835039818, "loss": 5.3838, "step": 7828 }, { "epoch": 2.6720136518771334, "grad_norm": 2.6636369228363037, "learning_rate": 0.0001093287827076223, "loss": 6.2458, "step": 7829 }, { "epoch": 2.6723549488054608, "grad_norm": 2.6208786964416504, "learning_rate": 0.00010921501706484642, "loss": 6.4429, "step": 7830 }, { "epoch": 2.672696245733788, "grad_norm": 2.61503005027771, "learning_rate": 0.00010910125142207054, "loss": 6.0356, "step": 7831 }, { "epoch": 2.673037542662116, "grad_norm": 2.5759787559509277, "learning_rate": 0.00010898748577929466, "loss": 5.025, "step": 7832 }, { "epoch": 2.673378839590444, "grad_norm": 2.680743932723999, "learning_rate": 0.00010887372013651878, "loss": 5.651, "step": 7833 }, { "epoch": 2.6737201365187713, "grad_norm": 2.4130074977874756, "learning_rate": 0.0001087599544937429, "loss": 4.161, "step": 7834 }, { "epoch": 2.674061433447099, "grad_norm": 2.479400634765625, "learning_rate": 0.00010864618885096701, "loss": 4.6418, "step": 7835 }, { "epoch": 2.6744027303754265, "grad_norm": 2.6607160568237305, "learning_rate": 0.00010853242320819112, "loss": 5.682, "step": 7836 }, { "epoch": 2.6747440273037544, "grad_norm": 2.6387665271759033, "learning_rate": 0.00010841865756541525, "loss": 6.2866, "step": 7837 }, { "epoch": 2.675085324232082, "grad_norm": 2.6623637676239014, "learning_rate": 0.00010830489192263937, "loss": 5.5277, "step": 7838 }, { "epoch": 2.6754266211604096, "grad_norm": 2.7224626541137695, "learning_rate": 0.00010819112627986349, "loss": 6.5055, "step": 7839 }, { "epoch": 2.675767918088737, "grad_norm": 1.7368601560592651, "learning_rate": 0.0001080773606370876, "loss": 2.5353, "step": 7840 }, { "epoch": 2.676109215017065, "grad_norm": 2.5209169387817383, "learning_rate": 0.00010796359499431171, "loss": 5.4228, "step": 7841 }, { "epoch": 2.6764505119453927, "grad_norm": 2.6286282539367676, "learning_rate": 0.00010784982935153584, "loss": 6.5636, "step": 7842 }, { "epoch": 2.67679180887372, "grad_norm": 2.581543207168579, "learning_rate": 0.00010773606370875996, "loss": 4.6108, "step": 7843 }, { "epoch": 2.6771331058020476, "grad_norm": 2.5444607734680176, "learning_rate": 0.00010762229806598408, "loss": 6.3867, "step": 7844 }, { "epoch": 2.6774744027303754, "grad_norm": 2.6205897331237793, "learning_rate": 0.0001075085324232082, "loss": 6.3425, "step": 7845 }, { "epoch": 2.6778156996587033, "grad_norm": 2.6083576679229736, "learning_rate": 0.0001073947667804323, "loss": 5.435, "step": 7846 }, { "epoch": 2.6781569965870307, "grad_norm": 2.6391236782073975, "learning_rate": 0.00010728100113765643, "loss": 5.6658, "step": 7847 }, { "epoch": 2.6784982935153585, "grad_norm": 2.71447491645813, "learning_rate": 0.00010716723549488055, "loss": 5.6652, "step": 7848 }, { "epoch": 2.678839590443686, "grad_norm": 2.63681697845459, "learning_rate": 0.00010705346985210467, "loss": 5.891, "step": 7849 }, { "epoch": 2.6791808873720138, "grad_norm": 2.910919427871704, "learning_rate": 0.00010693970420932879, "loss": 5.8579, "step": 7850 }, { "epoch": 2.679522184300341, "grad_norm": 2.6132237911224365, "learning_rate": 0.0001068259385665529, "loss": 5.8708, "step": 7851 }, { "epoch": 2.679863481228669, "grad_norm": 2.6288161277770996, "learning_rate": 0.00010671217292377703, "loss": 5.7728, "step": 7852 }, { "epoch": 2.6802047781569964, "grad_norm": 2.5175042152404785, "learning_rate": 0.00010659840728100113, "loss": 5.9085, "step": 7853 }, { "epoch": 2.6805460750853243, "grad_norm": 2.57769775390625, "learning_rate": 0.00010648464163822526, "loss": 6.2867, "step": 7854 }, { "epoch": 2.680887372013652, "grad_norm": 2.5458014011383057, "learning_rate": 0.00010637087599544938, "loss": 6.0821, "step": 7855 }, { "epoch": 2.6812286689419795, "grad_norm": 2.641937732696533, "learning_rate": 0.00010625711035267349, "loss": 6.295, "step": 7856 }, { "epoch": 2.681569965870307, "grad_norm": 2.6056764125823975, "learning_rate": 0.00010614334470989762, "loss": 5.8905, "step": 7857 }, { "epoch": 2.681911262798635, "grad_norm": 2.6439177989959717, "learning_rate": 0.00010602957906712172, "loss": 6.2158, "step": 7858 }, { "epoch": 2.6822525597269626, "grad_norm": 2.680771589279175, "learning_rate": 0.00010591581342434586, "loss": 5.609, "step": 7859 }, { "epoch": 2.68259385665529, "grad_norm": 2.6440012454986572, "learning_rate": 0.00010580204778156998, "loss": 5.7455, "step": 7860 }, { "epoch": 2.682935153583618, "grad_norm": 2.6077284812927246, "learning_rate": 0.00010568828213879408, "loss": 6.4567, "step": 7861 }, { "epoch": 2.6832764505119453, "grad_norm": 2.5259697437286377, "learning_rate": 0.00010557451649601821, "loss": 5.9364, "step": 7862 }, { "epoch": 2.683617747440273, "grad_norm": 2.647272825241089, "learning_rate": 0.00010546075085324232, "loss": 6.4563, "step": 7863 }, { "epoch": 2.6839590443686006, "grad_norm": 2.600893020629883, "learning_rate": 0.00010534698521046644, "loss": 5.9343, "step": 7864 }, { "epoch": 2.6843003412969284, "grad_norm": 2.8401756286621094, "learning_rate": 0.00010523321956769057, "loss": 4.4045, "step": 7865 }, { "epoch": 2.684641638225256, "grad_norm": 2.6061348915100098, "learning_rate": 0.00010511945392491467, "loss": 4.6237, "step": 7866 }, { "epoch": 2.6849829351535837, "grad_norm": 2.599360466003418, "learning_rate": 0.0001050056882821388, "loss": 6.0888, "step": 7867 }, { "epoch": 2.6853242320819115, "grad_norm": 2.5830085277557373, "learning_rate": 0.00010489192263936291, "loss": 5.5227, "step": 7868 }, { "epoch": 2.685665529010239, "grad_norm": 2.7643754482269287, "learning_rate": 0.00010477815699658703, "loss": 5.0325, "step": 7869 }, { "epoch": 2.6860068259385663, "grad_norm": 2.723921537399292, "learning_rate": 0.00010466439135381115, "loss": 5.5498, "step": 7870 }, { "epoch": 2.686348122866894, "grad_norm": 2.556098461151123, "learning_rate": 0.00010455062571103526, "loss": 5.6835, "step": 7871 }, { "epoch": 2.686689419795222, "grad_norm": 2.629362106323242, "learning_rate": 0.0001044368600682594, "loss": 6.4194, "step": 7872 }, { "epoch": 2.6870307167235494, "grad_norm": 2.6125411987304688, "learning_rate": 0.0001043230944254835, "loss": 6.0512, "step": 7873 }, { "epoch": 2.6873720136518773, "grad_norm": 2.6440958976745605, "learning_rate": 0.00010420932878270762, "loss": 6.0304, "step": 7874 }, { "epoch": 2.6877133105802047, "grad_norm": 2.598612070083618, "learning_rate": 0.00010409556313993174, "loss": 5.6832, "step": 7875 }, { "epoch": 2.6880546075085325, "grad_norm": 2.6545569896698, "learning_rate": 0.00010398179749715586, "loss": 6.4576, "step": 7876 }, { "epoch": 2.68839590443686, "grad_norm": 2.641326427459717, "learning_rate": 0.00010386803185437999, "loss": 6.3398, "step": 7877 }, { "epoch": 2.688737201365188, "grad_norm": 2.6488940715789795, "learning_rate": 0.0001037542662116041, "loss": 5.7814, "step": 7878 }, { "epoch": 2.689078498293515, "grad_norm": 2.5873377323150635, "learning_rate": 0.00010364050056882821, "loss": 6.4943, "step": 7879 }, { "epoch": 2.689419795221843, "grad_norm": 2.5985260009765625, "learning_rate": 0.00010352673492605233, "loss": 6.0007, "step": 7880 }, { "epoch": 2.689761092150171, "grad_norm": 2.6462574005126953, "learning_rate": 0.00010341296928327645, "loss": 6.4716, "step": 7881 }, { "epoch": 2.6901023890784983, "grad_norm": 2.649360418319702, "learning_rate": 0.00010329920364050058, "loss": 5.7194, "step": 7882 }, { "epoch": 2.6904436860068257, "grad_norm": 2.5704243183135986, "learning_rate": 0.00010318543799772469, "loss": 5.9094, "step": 7883 }, { "epoch": 2.6907849829351536, "grad_norm": 2.7193174362182617, "learning_rate": 0.0001030716723549488, "loss": 6.4267, "step": 7884 }, { "epoch": 2.6911262798634814, "grad_norm": 2.567355155944824, "learning_rate": 0.00010295790671217292, "loss": 5.5288, "step": 7885 }, { "epoch": 2.691467576791809, "grad_norm": 2.8648149967193604, "learning_rate": 0.00010284414106939704, "loss": 5.4581, "step": 7886 }, { "epoch": 2.6918088737201367, "grad_norm": 2.6088626384735107, "learning_rate": 0.00010273037542662116, "loss": 6.3214, "step": 7887 }, { "epoch": 2.692150170648464, "grad_norm": 2.7508292198181152, "learning_rate": 0.00010261660978384528, "loss": 6.3367, "step": 7888 }, { "epoch": 2.692491467576792, "grad_norm": 2.5980231761932373, "learning_rate": 0.0001025028441410694, "loss": 5.7762, "step": 7889 }, { "epoch": 2.6928327645051193, "grad_norm": 2.668694496154785, "learning_rate": 0.00010238907849829352, "loss": 5.5341, "step": 7890 }, { "epoch": 2.693174061433447, "grad_norm": 2.5616753101348877, "learning_rate": 0.00010227531285551763, "loss": 6.0325, "step": 7891 }, { "epoch": 2.6935153583617746, "grad_norm": 5.180400371551514, "learning_rate": 0.00010216154721274175, "loss": 4.5402, "step": 7892 }, { "epoch": 2.6938566552901024, "grad_norm": 2.6964242458343506, "learning_rate": 0.00010204778156996587, "loss": 6.8569, "step": 7893 }, { "epoch": 2.6941979522184303, "grad_norm": 2.5725672245025635, "learning_rate": 0.00010193401592718999, "loss": 5.7403, "step": 7894 }, { "epoch": 2.6945392491467577, "grad_norm": 2.649010181427002, "learning_rate": 0.00010182025028441411, "loss": 6.1924, "step": 7895 }, { "epoch": 2.694880546075085, "grad_norm": 2.6579694747924805, "learning_rate": 0.00010170648464163823, "loss": 5.7039, "step": 7896 }, { "epoch": 2.695221843003413, "grad_norm": 2.6075141429901123, "learning_rate": 0.00010159271899886235, "loss": 6.356, "step": 7897 }, { "epoch": 2.695563139931741, "grad_norm": 2.6528830528259277, "learning_rate": 0.00010147895335608646, "loss": 5.4291, "step": 7898 }, { "epoch": 2.695904436860068, "grad_norm": 2.625629425048828, "learning_rate": 0.00010136518771331058, "loss": 6.2645, "step": 7899 }, { "epoch": 2.696245733788396, "grad_norm": 2.710127830505371, "learning_rate": 0.0001012514220705347, "loss": 4.9046, "step": 7900 }, { "epoch": 2.6965870307167235, "grad_norm": 2.4944448471069336, "learning_rate": 0.00010113765642775882, "loss": 5.518, "step": 7901 }, { "epoch": 2.6969283276450513, "grad_norm": 2.6292831897735596, "learning_rate": 0.00010102389078498294, "loss": 6.1254, "step": 7902 }, { "epoch": 2.6972696245733787, "grad_norm": 2.5295937061309814, "learning_rate": 0.00010091012514220706, "loss": 6.5135, "step": 7903 }, { "epoch": 2.6976109215017066, "grad_norm": 2.602799415588379, "learning_rate": 0.00010079635949943116, "loss": 5.8567, "step": 7904 }, { "epoch": 2.697952218430034, "grad_norm": 2.6777186393737793, "learning_rate": 0.0001006825938566553, "loss": 6.0356, "step": 7905 }, { "epoch": 2.698293515358362, "grad_norm": 2.5789544582366943, "learning_rate": 0.00010056882821387941, "loss": 5.3017, "step": 7906 }, { "epoch": 2.6986348122866897, "grad_norm": 2.565323829650879, "learning_rate": 0.00010045506257110353, "loss": 6.0801, "step": 7907 }, { "epoch": 2.698976109215017, "grad_norm": 2.089174270629883, "learning_rate": 0.00010034129692832765, "loss": 3.2135, "step": 7908 }, { "epoch": 2.6993174061433445, "grad_norm": 2.6627211570739746, "learning_rate": 0.00010022753128555175, "loss": 5.925, "step": 7909 }, { "epoch": 2.6996587030716723, "grad_norm": 2.5624616146087646, "learning_rate": 0.00010011376564277589, "loss": 5.6055, "step": 7910 }, { "epoch": 2.7, "grad_norm": 3.5882010459899902, "learning_rate": 0.0001, "loss": 4.6856, "step": 7911 }, { "epoch": 2.7003412969283276, "grad_norm": 2.569596290588379, "learning_rate": 9.988623435722412e-05, "loss": 5.86, "step": 7912 }, { "epoch": 2.7006825938566554, "grad_norm": 2.565866708755493, "learning_rate": 9.977246871444824e-05, "loss": 6.1467, "step": 7913 }, { "epoch": 2.701023890784983, "grad_norm": 2.524012327194214, "learning_rate": 9.965870307167235e-05, "loss": 5.9074, "step": 7914 }, { "epoch": 2.7013651877133107, "grad_norm": 1.8367999792099, "learning_rate": 9.954493742889648e-05, "loss": 3.4121, "step": 7915 }, { "epoch": 2.701706484641638, "grad_norm": 2.5629944801330566, "learning_rate": 9.94311717861206e-05, "loss": 6.5814, "step": 7916 }, { "epoch": 2.702047781569966, "grad_norm": 2.7166037559509277, "learning_rate": 9.931740614334472e-05, "loss": 6.285, "step": 7917 }, { "epoch": 2.7023890784982934, "grad_norm": 2.672518730163574, "learning_rate": 9.920364050056883e-05, "loss": 3.7134, "step": 7918 }, { "epoch": 2.702730375426621, "grad_norm": 2.4576282501220703, "learning_rate": 9.908987485779294e-05, "loss": 4.0857, "step": 7919 }, { "epoch": 2.703071672354949, "grad_norm": 2.5885422229766846, "learning_rate": 9.897610921501707e-05, "loss": 5.9866, "step": 7920 }, { "epoch": 2.7034129692832765, "grad_norm": 2.686434030532837, "learning_rate": 9.886234357224118e-05, "loss": 5.7955, "step": 7921 }, { "epoch": 2.703754266211604, "grad_norm": 2.6147704124450684, "learning_rate": 9.874857792946531e-05, "loss": 6.1341, "step": 7922 }, { "epoch": 2.7040955631399317, "grad_norm": 3.224640369415283, "learning_rate": 9.863481228668943e-05, "loss": 4.9066, "step": 7923 }, { "epoch": 2.7044368600682596, "grad_norm": 2.6708364486694336, "learning_rate": 9.852104664391353e-05, "loss": 5.7106, "step": 7924 }, { "epoch": 2.704778156996587, "grad_norm": 2.5874176025390625, "learning_rate": 9.840728100113766e-05, "loss": 4.865, "step": 7925 }, { "epoch": 2.705119453924915, "grad_norm": 2.6783151626586914, "learning_rate": 9.829351535836177e-05, "loss": 4.9307, "step": 7926 }, { "epoch": 2.7054607508532422, "grad_norm": 2.5469775199890137, "learning_rate": 9.81797497155859e-05, "loss": 5.7146, "step": 7927 }, { "epoch": 2.70580204778157, "grad_norm": 2.5792226791381836, "learning_rate": 9.806598407281002e-05, "loss": 5.7424, "step": 7928 }, { "epoch": 2.7061433447098975, "grad_norm": 2.562011241912842, "learning_rate": 9.795221843003412e-05, "loss": 5.7029, "step": 7929 }, { "epoch": 2.7064846416382253, "grad_norm": 3.7608301639556885, "learning_rate": 9.783845278725826e-05, "loss": 4.2989, "step": 7930 }, { "epoch": 2.7068259385665527, "grad_norm": 2.5663902759552, "learning_rate": 9.772468714448236e-05, "loss": 6.4621, "step": 7931 }, { "epoch": 2.7071672354948806, "grad_norm": 2.6280720233917236, "learning_rate": 9.761092150170649e-05, "loss": 6.3645, "step": 7932 }, { "epoch": 2.7075085324232084, "grad_norm": 2.5684332847595215, "learning_rate": 9.749715585893061e-05, "loss": 5.8003, "step": 7933 }, { "epoch": 2.707849829351536, "grad_norm": 2.643038034439087, "learning_rate": 9.738339021615472e-05, "loss": 6.4916, "step": 7934 }, { "epoch": 2.7081911262798632, "grad_norm": 2.559818983078003, "learning_rate": 9.726962457337885e-05, "loss": 6.41, "step": 7935 }, { "epoch": 2.708532423208191, "grad_norm": 2.636819839477539, "learning_rate": 9.715585893060295e-05, "loss": 5.2788, "step": 7936 }, { "epoch": 2.708873720136519, "grad_norm": 2.651435613632202, "learning_rate": 9.704209328782709e-05, "loss": 5.8778, "step": 7937 }, { "epoch": 2.7092150170648464, "grad_norm": 2.6740775108337402, "learning_rate": 9.692832764505119e-05, "loss": 4.9705, "step": 7938 }, { "epoch": 2.709556313993174, "grad_norm": 2.6301076412200928, "learning_rate": 9.681456200227531e-05, "loss": 6.1048, "step": 7939 }, { "epoch": 2.7098976109215016, "grad_norm": 2.613173007965088, "learning_rate": 9.670079635949944e-05, "loss": 5.3901, "step": 7940 }, { "epoch": 2.7102389078498295, "grad_norm": 2.5331554412841797, "learning_rate": 9.658703071672355e-05, "loss": 5.4303, "step": 7941 }, { "epoch": 2.710580204778157, "grad_norm": 2.602365016937256, "learning_rate": 9.647326507394768e-05, "loss": 6.2944, "step": 7942 }, { "epoch": 2.7109215017064847, "grad_norm": 2.6677892208099365, "learning_rate": 9.635949943117178e-05, "loss": 5.7442, "step": 7943 }, { "epoch": 2.711262798634812, "grad_norm": 2.62030291557312, "learning_rate": 9.62457337883959e-05, "loss": 6.2834, "step": 7944 }, { "epoch": 2.71160409556314, "grad_norm": 2.5873615741729736, "learning_rate": 9.613196814562003e-05, "loss": 5.7214, "step": 7945 }, { "epoch": 2.711945392491468, "grad_norm": 2.6523163318634033, "learning_rate": 9.601820250284414e-05, "loss": 5.5457, "step": 7946 }, { "epoch": 2.7122866894197952, "grad_norm": 2.6249499320983887, "learning_rate": 9.590443686006827e-05, "loss": 5.5907, "step": 7947 }, { "epoch": 2.7126279863481226, "grad_norm": 2.6475677490234375, "learning_rate": 9.579067121729238e-05, "loss": 6.2695, "step": 7948 }, { "epoch": 2.7129692832764505, "grad_norm": 2.638388156890869, "learning_rate": 9.56769055745165e-05, "loss": 6.0452, "step": 7949 }, { "epoch": 2.7133105802047783, "grad_norm": 2.5917885303497314, "learning_rate": 9.556313993174063e-05, "loss": 5.4567, "step": 7950 }, { "epoch": 2.7136518771331057, "grad_norm": 2.65228533744812, "learning_rate": 9.544937428896473e-05, "loss": 6.3895, "step": 7951 }, { "epoch": 2.7139931740614336, "grad_norm": 2.614609956741333, "learning_rate": 9.533560864618886e-05, "loss": 6.4066, "step": 7952 }, { "epoch": 2.714334470989761, "grad_norm": 2.717390298843384, "learning_rate": 9.522184300341297e-05, "loss": 4.8762, "step": 7953 }, { "epoch": 2.714675767918089, "grad_norm": 2.660956859588623, "learning_rate": 9.510807736063709e-05, "loss": 5.9635, "step": 7954 }, { "epoch": 2.7150170648464163, "grad_norm": 2.5839521884918213, "learning_rate": 9.49943117178612e-05, "loss": 5.8931, "step": 7955 }, { "epoch": 2.715358361774744, "grad_norm": 2.445621967315674, "learning_rate": 9.488054607508532e-05, "loss": 3.551, "step": 7956 }, { "epoch": 2.7156996587030715, "grad_norm": 2.659536838531494, "learning_rate": 9.476678043230946e-05, "loss": 6.4926, "step": 7957 }, { "epoch": 2.7160409556313994, "grad_norm": 2.6747255325317383, "learning_rate": 9.465301478953356e-05, "loss": 6.5505, "step": 7958 }, { "epoch": 2.716382252559727, "grad_norm": 2.663691520690918, "learning_rate": 9.453924914675768e-05, "loss": 6.1892, "step": 7959 }, { "epoch": 2.7167235494880546, "grad_norm": 2.6698076725006104, "learning_rate": 9.44254835039818e-05, "loss": 5.9412, "step": 7960 }, { "epoch": 2.717064846416382, "grad_norm": 2.622624635696411, "learning_rate": 9.431171786120592e-05, "loss": 5.906, "step": 7961 }, { "epoch": 2.71740614334471, "grad_norm": 2.6831278800964355, "learning_rate": 9.419795221843005e-05, "loss": 5.8821, "step": 7962 }, { "epoch": 2.7177474402730377, "grad_norm": 2.650803565979004, "learning_rate": 9.408418657565415e-05, "loss": 5.7527, "step": 7963 }, { "epoch": 2.718088737201365, "grad_norm": 2.567662477493286, "learning_rate": 9.397042093287827e-05, "loss": 5.8555, "step": 7964 }, { "epoch": 2.718430034129693, "grad_norm": 2.6113736629486084, "learning_rate": 9.385665529010239e-05, "loss": 5.5252, "step": 7965 }, { "epoch": 2.7187713310580204, "grad_norm": 2.6406290531158447, "learning_rate": 9.374288964732651e-05, "loss": 6.5528, "step": 7966 }, { "epoch": 2.7191126279863482, "grad_norm": 2.571793556213379, "learning_rate": 9.362912400455063e-05, "loss": 5.9012, "step": 7967 }, { "epoch": 2.7194539249146756, "grad_norm": 2.543337345123291, "learning_rate": 9.351535836177475e-05, "loss": 5.6792, "step": 7968 }, { "epoch": 2.7197952218430035, "grad_norm": 2.667567014694214, "learning_rate": 9.340159271899886e-05, "loss": 6.1126, "step": 7969 }, { "epoch": 2.720136518771331, "grad_norm": 2.7619335651397705, "learning_rate": 9.328782707622298e-05, "loss": 4.8842, "step": 7970 }, { "epoch": 2.7204778156996587, "grad_norm": 2.702486753463745, "learning_rate": 9.31740614334471e-05, "loss": 5.1622, "step": 7971 }, { "epoch": 2.7208191126279866, "grad_norm": 2.543321132659912, "learning_rate": 9.306029579067122e-05, "loss": 6.5731, "step": 7972 }, { "epoch": 2.721160409556314, "grad_norm": 2.6276233196258545, "learning_rate": 9.294653014789534e-05, "loss": 6.0075, "step": 7973 }, { "epoch": 2.7215017064846414, "grad_norm": 2.569683074951172, "learning_rate": 9.283276450511946e-05, "loss": 5.8531, "step": 7974 }, { "epoch": 2.7218430034129693, "grad_norm": 2.636974334716797, "learning_rate": 9.271899886234357e-05, "loss": 5.6186, "step": 7975 }, { "epoch": 2.722184300341297, "grad_norm": 2.7969841957092285, "learning_rate": 9.26052332195677e-05, "loss": 5.0165, "step": 7976 }, { "epoch": 2.7225255972696245, "grad_norm": 2.6524500846862793, "learning_rate": 9.24914675767918e-05, "loss": 5.6564, "step": 7977 }, { "epoch": 2.7228668941979524, "grad_norm": 2.553088903427124, "learning_rate": 9.237770193401593e-05, "loss": 6.0369, "step": 7978 }, { "epoch": 2.7232081911262798, "grad_norm": 2.4714152812957764, "learning_rate": 9.226393629124005e-05, "loss": 5.8547, "step": 7979 }, { "epoch": 2.7235494880546076, "grad_norm": 2.634051561355591, "learning_rate": 9.215017064846417e-05, "loss": 6.1899, "step": 7980 }, { "epoch": 2.723890784982935, "grad_norm": 2.6114561557769775, "learning_rate": 9.203640500568829e-05, "loss": 6.6533, "step": 7981 }, { "epoch": 2.724232081911263, "grad_norm": 2.653291702270508, "learning_rate": 9.192263936291239e-05, "loss": 6.1926, "step": 7982 }, { "epoch": 2.7245733788395903, "grad_norm": 2.614102363586426, "learning_rate": 9.180887372013652e-05, "loss": 6.1669, "step": 7983 }, { "epoch": 2.724914675767918, "grad_norm": 2.6618130207061768, "learning_rate": 9.169510807736064e-05, "loss": 4.0645, "step": 7984 }, { "epoch": 2.725255972696246, "grad_norm": 2.57953143119812, "learning_rate": 9.158134243458476e-05, "loss": 6.201, "step": 7985 }, { "epoch": 2.7255972696245734, "grad_norm": 2.664710760116577, "learning_rate": 9.146757679180888e-05, "loss": 6.319, "step": 7986 }, { "epoch": 2.725938566552901, "grad_norm": 2.681464672088623, "learning_rate": 9.135381114903298e-05, "loss": 5.8685, "step": 7987 }, { "epoch": 2.7262798634812286, "grad_norm": 2.5687036514282227, "learning_rate": 9.124004550625712e-05, "loss": 6.3869, "step": 7988 }, { "epoch": 2.7266211604095565, "grad_norm": 2.604609489440918, "learning_rate": 9.112627986348123e-05, "loss": 5.8885, "step": 7989 }, { "epoch": 2.726962457337884, "grad_norm": 2.5861759185791016, "learning_rate": 9.101251422070535e-05, "loss": 5.1008, "step": 7990 }, { "epoch": 2.7273037542662117, "grad_norm": 2.619473934173584, "learning_rate": 9.089874857792947e-05, "loss": 6.0467, "step": 7991 }, { "epoch": 2.727645051194539, "grad_norm": 2.6422886848449707, "learning_rate": 9.078498293515358e-05, "loss": 6.595, "step": 7992 }, { "epoch": 2.727986348122867, "grad_norm": 2.61552357673645, "learning_rate": 9.067121729237771e-05, "loss": 5.6713, "step": 7993 }, { "epoch": 2.7283276450511944, "grad_norm": 2.6517817974090576, "learning_rate": 9.055745164960181e-05, "loss": 6.3159, "step": 7994 }, { "epoch": 2.7286689419795223, "grad_norm": 2.8126118183135986, "learning_rate": 9.044368600682594e-05, "loss": 4.8574, "step": 7995 }, { "epoch": 2.7290102389078497, "grad_norm": 2.5647075176239014, "learning_rate": 9.032992036405006e-05, "loss": 5.692, "step": 7996 }, { "epoch": 2.7293515358361775, "grad_norm": 2.641845941543579, "learning_rate": 9.021615472127417e-05, "loss": 6.3246, "step": 7997 }, { "epoch": 2.7296928327645054, "grad_norm": 2.609525203704834, "learning_rate": 9.01023890784983e-05, "loss": 6.4175, "step": 7998 }, { "epoch": 2.7300341296928328, "grad_norm": 2.601841688156128, "learning_rate": 8.99886234357224e-05, "loss": 6.5798, "step": 7999 }, { "epoch": 2.73037542662116, "grad_norm": 2.6355714797973633, "learning_rate": 8.987485779294654e-05, "loss": 6.3919, "step": 8000 }, { "epoch": 2.730716723549488, "grad_norm": 2.583918333053589, "learning_rate": 8.976109215017066e-05, "loss": 6.3685, "step": 8001 }, { "epoch": 2.731058020477816, "grad_norm": 2.569997549057007, "learning_rate": 8.964732650739476e-05, "loss": 6.1898, "step": 8002 }, { "epoch": 2.7313993174061433, "grad_norm": 2.597790002822876, "learning_rate": 8.953356086461889e-05, "loss": 6.2867, "step": 8003 }, { "epoch": 2.731740614334471, "grad_norm": 2.638692855834961, "learning_rate": 8.9419795221843e-05, "loss": 5.6171, "step": 8004 }, { "epoch": 2.7320819112627985, "grad_norm": 2.7340500354766846, "learning_rate": 8.930602957906713e-05, "loss": 4.4383, "step": 8005 }, { "epoch": 2.7324232081911264, "grad_norm": 2.652092933654785, "learning_rate": 8.919226393629125e-05, "loss": 6.168, "step": 8006 }, { "epoch": 2.732764505119454, "grad_norm": 2.593048572540283, "learning_rate": 8.907849829351535e-05, "loss": 6.4116, "step": 8007 }, { "epoch": 2.7331058020477816, "grad_norm": 2.6806387901306152, "learning_rate": 8.896473265073949e-05, "loss": 5.6523, "step": 8008 }, { "epoch": 2.733447098976109, "grad_norm": 2.5880985260009766, "learning_rate": 8.885096700796359e-05, "loss": 6.137, "step": 8009 }, { "epoch": 2.733788395904437, "grad_norm": 2.6142632961273193, "learning_rate": 8.873720136518772e-05, "loss": 6.0497, "step": 8010 }, { "epoch": 2.7341296928327647, "grad_norm": 2.7263357639312744, "learning_rate": 8.862343572241183e-05, "loss": 6.2217, "step": 8011 }, { "epoch": 2.734470989761092, "grad_norm": 2.6077632904052734, "learning_rate": 8.850967007963595e-05, "loss": 5.723, "step": 8012 }, { "epoch": 2.7348122866894196, "grad_norm": 2.4901204109191895, "learning_rate": 8.839590443686008e-05, "loss": 5.1386, "step": 8013 }, { "epoch": 2.7351535836177474, "grad_norm": 2.620048999786377, "learning_rate": 8.828213879408418e-05, "loss": 6.5681, "step": 8014 }, { "epoch": 2.7354948805460753, "grad_norm": 2.5127553939819336, "learning_rate": 8.816837315130831e-05, "loss": 3.7144, "step": 8015 }, { "epoch": 2.7358361774744027, "grad_norm": 2.677717447280884, "learning_rate": 8.805460750853242e-05, "loss": 5.7862, "step": 8016 }, { "epoch": 2.7361774744027305, "grad_norm": 2.5813772678375244, "learning_rate": 8.794084186575654e-05, "loss": 6.38, "step": 8017 }, { "epoch": 2.736518771331058, "grad_norm": 2.593780279159546, "learning_rate": 8.782707622298067e-05, "loss": 6.2665, "step": 8018 }, { "epoch": 2.7368600682593858, "grad_norm": 2.6182265281677246, "learning_rate": 8.771331058020478e-05, "loss": 5.6717, "step": 8019 }, { "epoch": 2.737201365187713, "grad_norm": 2.615816593170166, "learning_rate": 8.759954493742891e-05, "loss": 6.4123, "step": 8020 }, { "epoch": 2.737542662116041, "grad_norm": 2.6214540004730225, "learning_rate": 8.748577929465301e-05, "loss": 6.0376, "step": 8021 }, { "epoch": 2.7378839590443684, "grad_norm": 2.631145715713501, "learning_rate": 8.737201365187713e-05, "loss": 5.6407, "step": 8022 }, { "epoch": 2.7382252559726963, "grad_norm": 2.52872371673584, "learning_rate": 8.725824800910126e-05, "loss": 6.0498, "step": 8023 }, { "epoch": 2.738566552901024, "grad_norm": 2.6615374088287354, "learning_rate": 8.714448236632537e-05, "loss": 5.4418, "step": 8024 }, { "epoch": 2.7389078498293515, "grad_norm": 4.1157917976379395, "learning_rate": 8.70307167235495e-05, "loss": 4.1903, "step": 8025 }, { "epoch": 2.739249146757679, "grad_norm": 2.548863410949707, "learning_rate": 8.69169510807736e-05, "loss": 6.1133, "step": 8026 }, { "epoch": 2.739590443686007, "grad_norm": 2.564734935760498, "learning_rate": 8.680318543799772e-05, "loss": 6.0158, "step": 8027 }, { "epoch": 2.7399317406143346, "grad_norm": 2.6325366497039795, "learning_rate": 8.668941979522184e-05, "loss": 6.132, "step": 8028 }, { "epoch": 2.740273037542662, "grad_norm": 2.528510808944702, "learning_rate": 8.657565415244596e-05, "loss": 6.0174, "step": 8029 }, { "epoch": 2.74061433447099, "grad_norm": 2.521850347518921, "learning_rate": 8.646188850967009e-05, "loss": 5.7658, "step": 8030 }, { "epoch": 2.7409556313993173, "grad_norm": 2.5946974754333496, "learning_rate": 8.63481228668942e-05, "loss": 6.4626, "step": 8031 }, { "epoch": 2.741296928327645, "grad_norm": 2.647458076477051, "learning_rate": 8.623435722411832e-05, "loss": 6.2879, "step": 8032 }, { "epoch": 2.7416382252559726, "grad_norm": 2.7117865085601807, "learning_rate": 8.612059158134243e-05, "loss": 5.5557, "step": 8033 }, { "epoch": 2.7419795221843004, "grad_norm": 2.601431131362915, "learning_rate": 8.600682593856655e-05, "loss": 5.8308, "step": 8034 }, { "epoch": 2.742320819112628, "grad_norm": 2.799042224884033, "learning_rate": 8.589306029579068e-05, "loss": 5.5987, "step": 8035 }, { "epoch": 2.7426621160409557, "grad_norm": 2.671846389770508, "learning_rate": 8.577929465301479e-05, "loss": 6.4048, "step": 8036 }, { "epoch": 2.7430034129692835, "grad_norm": 2.5593509674072266, "learning_rate": 8.566552901023891e-05, "loss": 5.5486, "step": 8037 }, { "epoch": 2.743344709897611, "grad_norm": 2.5567145347595215, "learning_rate": 8.555176336746303e-05, "loss": 6.0707, "step": 8038 }, { "epoch": 2.7436860068259383, "grad_norm": 2.59529185295105, "learning_rate": 8.543799772468715e-05, "loss": 5.8477, "step": 8039 }, { "epoch": 2.744027303754266, "grad_norm": 2.6873974800109863, "learning_rate": 8.532423208191128e-05, "loss": 6.008, "step": 8040 }, { "epoch": 2.744368600682594, "grad_norm": 2.6299967765808105, "learning_rate": 8.521046643913538e-05, "loss": 5.65, "step": 8041 }, { "epoch": 2.7447098976109214, "grad_norm": 2.6703577041625977, "learning_rate": 8.50967007963595e-05, "loss": 6.3595, "step": 8042 }, { "epoch": 2.7450511945392493, "grad_norm": 2.5916976928710938, "learning_rate": 8.498293515358362e-05, "loss": 6.1119, "step": 8043 }, { "epoch": 2.7453924914675767, "grad_norm": 2.577881097793579, "learning_rate": 8.486916951080774e-05, "loss": 5.5663, "step": 8044 }, { "epoch": 2.7457337883959045, "grad_norm": 2.5749382972717285, "learning_rate": 8.475540386803186e-05, "loss": 5.7458, "step": 8045 }, { "epoch": 2.746075085324232, "grad_norm": 2.5341036319732666, "learning_rate": 8.464163822525597e-05, "loss": 5.6241, "step": 8046 }, { "epoch": 2.74641638225256, "grad_norm": 2.6867024898529053, "learning_rate": 8.452787258248009e-05, "loss": 6.3056, "step": 8047 }, { "epoch": 2.746757679180887, "grad_norm": 2.5458154678344727, "learning_rate": 8.441410693970421e-05, "loss": 6.0838, "step": 8048 }, { "epoch": 2.747098976109215, "grad_norm": 2.6471140384674072, "learning_rate": 8.430034129692833e-05, "loss": 5.9484, "step": 8049 }, { "epoch": 2.747440273037543, "grad_norm": 2.6072444915771484, "learning_rate": 8.418657565415245e-05, "loss": 6.4437, "step": 8050 }, { "epoch": 2.7477815699658703, "grad_norm": 2.5881097316741943, "learning_rate": 8.407281001137657e-05, "loss": 5.7884, "step": 8051 }, { "epoch": 2.7481228668941977, "grad_norm": 2.7298855781555176, "learning_rate": 8.395904436860069e-05, "loss": 4.845, "step": 8052 }, { "epoch": 2.7484641638225256, "grad_norm": 2.646090507507324, "learning_rate": 8.38452787258248e-05, "loss": 6.2684, "step": 8053 }, { "epoch": 2.7488054607508534, "grad_norm": 2.5868003368377686, "learning_rate": 8.373151308304892e-05, "loss": 5.8335, "step": 8054 }, { "epoch": 2.749146757679181, "grad_norm": 2.60349440574646, "learning_rate": 8.361774744027303e-05, "loss": 5.6704, "step": 8055 }, { "epoch": 2.7494880546075087, "grad_norm": 2.5340113639831543, "learning_rate": 8.350398179749716e-05, "loss": 5.5741, "step": 8056 }, { "epoch": 2.749829351535836, "grad_norm": 2.6440858840942383, "learning_rate": 8.339021615472128e-05, "loss": 5.4331, "step": 8057 }, { "epoch": 2.750170648464164, "grad_norm": 2.588313579559326, "learning_rate": 8.32764505119454e-05, "loss": 6.2252, "step": 8058 }, { "epoch": 2.7505119453924913, "grad_norm": 2.574767827987671, "learning_rate": 8.316268486916952e-05, "loss": 6.3772, "step": 8059 }, { "epoch": 2.750853242320819, "grad_norm": 2.5740933418273926, "learning_rate": 8.304891922639362e-05, "loss": 6.231, "step": 8060 }, { "epoch": 2.7511945392491466, "grad_norm": 2.542821168899536, "learning_rate": 8.293515358361775e-05, "loss": 5.886, "step": 8061 }, { "epoch": 2.7515358361774744, "grad_norm": 2.542856454849243, "learning_rate": 8.282138794084186e-05, "loss": 5.9924, "step": 8062 }, { "epoch": 2.7518771331058023, "grad_norm": 2.618867874145508, "learning_rate": 8.270762229806599e-05, "loss": 5.3548, "step": 8063 }, { "epoch": 2.7522184300341297, "grad_norm": 2.605292797088623, "learning_rate": 8.259385665529011e-05, "loss": 5.7645, "step": 8064 }, { "epoch": 2.752559726962457, "grad_norm": 2.6475188732147217, "learning_rate": 8.248009101251421e-05, "loss": 6.4186, "step": 8065 }, { "epoch": 2.752901023890785, "grad_norm": 2.5117578506469727, "learning_rate": 8.236632536973834e-05, "loss": 5.997, "step": 8066 }, { "epoch": 2.753242320819113, "grad_norm": 2.5080535411834717, "learning_rate": 8.225255972696245e-05, "loss": 6.1888, "step": 8067 }, { "epoch": 2.75358361774744, "grad_norm": 2.5350992679595947, "learning_rate": 8.213879408418658e-05, "loss": 5.9308, "step": 8068 }, { "epoch": 2.753924914675768, "grad_norm": 2.5473434925079346, "learning_rate": 8.20250284414107e-05, "loss": 6.1776, "step": 8069 }, { "epoch": 2.7542662116040955, "grad_norm": 2.4866843223571777, "learning_rate": 8.19112627986348e-05, "loss": 4.3249, "step": 8070 }, { "epoch": 2.7546075085324233, "grad_norm": 2.6929423809051514, "learning_rate": 8.179749715585894e-05, "loss": 4.4646, "step": 8071 }, { "epoch": 2.7549488054607507, "grad_norm": 2.553462505340576, "learning_rate": 8.168373151308304e-05, "loss": 6.4205, "step": 8072 }, { "epoch": 2.7552901023890786, "grad_norm": 2.5952062606811523, "learning_rate": 8.156996587030717e-05, "loss": 6.2428, "step": 8073 }, { "epoch": 2.755631399317406, "grad_norm": 2.697514057159424, "learning_rate": 8.145620022753129e-05, "loss": 6.3499, "step": 8074 }, { "epoch": 2.755972696245734, "grad_norm": 2.558379888534546, "learning_rate": 8.13424345847554e-05, "loss": 5.889, "step": 8075 }, { "epoch": 2.7563139931740617, "grad_norm": 2.633807420730591, "learning_rate": 8.122866894197953e-05, "loss": 5.544, "step": 8076 }, { "epoch": 2.756655290102389, "grad_norm": 2.470966100692749, "learning_rate": 8.111490329920363e-05, "loss": 4.4105, "step": 8077 }, { "epoch": 2.7569965870307165, "grad_norm": 2.6610970497131348, "learning_rate": 8.100113765642777e-05, "loss": 6.4596, "step": 8078 }, { "epoch": 2.7573378839590443, "grad_norm": 2.5555026531219482, "learning_rate": 8.088737201365187e-05, "loss": 6.3518, "step": 8079 }, { "epoch": 2.757679180887372, "grad_norm": 2.5696256160736084, "learning_rate": 8.077360637087599e-05, "loss": 6.0327, "step": 8080 }, { "epoch": 2.7580204778156996, "grad_norm": 2.542940855026245, "learning_rate": 8.065984072810012e-05, "loss": 5.54, "step": 8081 }, { "epoch": 2.7583617747440274, "grad_norm": 2.5934388637542725, "learning_rate": 8.054607508532423e-05, "loss": 6.237, "step": 8082 }, { "epoch": 2.758703071672355, "grad_norm": 2.5500986576080322, "learning_rate": 8.043230944254836e-05, "loss": 6.0471, "step": 8083 }, { "epoch": 2.7590443686006827, "grad_norm": 2.556598663330078, "learning_rate": 8.031854379977246e-05, "loss": 5.7077, "step": 8084 }, { "epoch": 2.75938566552901, "grad_norm": 2.6231422424316406, "learning_rate": 8.020477815699658e-05, "loss": 5.1179, "step": 8085 }, { "epoch": 2.759726962457338, "grad_norm": 2.5738203525543213, "learning_rate": 8.009101251422071e-05, "loss": 6.3482, "step": 8086 }, { "epoch": 2.7600682593856654, "grad_norm": 2.713271379470825, "learning_rate": 7.997724687144482e-05, "loss": 6.5092, "step": 8087 }, { "epoch": 2.760409556313993, "grad_norm": 2.689073085784912, "learning_rate": 7.986348122866895e-05, "loss": 5.0591, "step": 8088 }, { "epoch": 2.760750853242321, "grad_norm": 2.620436429977417, "learning_rate": 7.974971558589306e-05, "loss": 6.0436, "step": 8089 }, { "epoch": 2.7610921501706485, "grad_norm": 2.5438833236694336, "learning_rate": 7.963594994311717e-05, "loss": 5.7818, "step": 8090 }, { "epoch": 2.761433447098976, "grad_norm": 2.6242706775665283, "learning_rate": 7.952218430034131e-05, "loss": 5.8769, "step": 8091 }, { "epoch": 2.7617747440273037, "grad_norm": 2.4857864379882812, "learning_rate": 7.940841865756541e-05, "loss": 5.7621, "step": 8092 }, { "epoch": 2.7621160409556316, "grad_norm": 2.627063035964966, "learning_rate": 7.929465301478954e-05, "loss": 6.45, "step": 8093 }, { "epoch": 2.762457337883959, "grad_norm": 2.4991109371185303, "learning_rate": 7.918088737201365e-05, "loss": 5.7273, "step": 8094 }, { "epoch": 2.762798634812287, "grad_norm": 2.532963991165161, "learning_rate": 7.906712172923777e-05, "loss": 6.2703, "step": 8095 }, { "epoch": 2.7631399317406142, "grad_norm": 2.5222792625427246, "learning_rate": 7.895335608646189e-05, "loss": 5.554, "step": 8096 }, { "epoch": 2.763481228668942, "grad_norm": 2.6223628520965576, "learning_rate": 7.8839590443686e-05, "loss": 6.0185, "step": 8097 }, { "epoch": 2.7638225255972695, "grad_norm": 2.598527669906616, "learning_rate": 7.872582480091014e-05, "loss": 5.8662, "step": 8098 }, { "epoch": 2.7641638225255973, "grad_norm": 2.4972290992736816, "learning_rate": 7.861205915813424e-05, "loss": 6.2221, "step": 8099 }, { "epoch": 2.7645051194539247, "grad_norm": 2.5884251594543457, "learning_rate": 7.849829351535836e-05, "loss": 6.2226, "step": 8100 }, { "epoch": 2.7648464163822526, "grad_norm": 2.6210410594940186, "learning_rate": 7.838452787258248e-05, "loss": 5.378, "step": 8101 }, { "epoch": 2.7651877133105804, "grad_norm": 2.380128860473633, "learning_rate": 7.82707622298066e-05, "loss": 4.1676, "step": 8102 }, { "epoch": 2.765529010238908, "grad_norm": 2.6382617950439453, "learning_rate": 7.815699658703073e-05, "loss": 6.0014, "step": 8103 }, { "epoch": 2.7658703071672353, "grad_norm": 2.6179182529449463, "learning_rate": 7.804323094425483e-05, "loss": 5.9414, "step": 8104 }, { "epoch": 2.766211604095563, "grad_norm": 2.5637147426605225, "learning_rate": 7.792946530147895e-05, "loss": 5.5923, "step": 8105 }, { "epoch": 2.766552901023891, "grad_norm": 2.6299054622650146, "learning_rate": 7.781569965870307e-05, "loss": 5.6322, "step": 8106 }, { "epoch": 2.7668941979522184, "grad_norm": 2.7012157440185547, "learning_rate": 7.770193401592719e-05, "loss": 5.6976, "step": 8107 }, { "epoch": 2.767235494880546, "grad_norm": 2.60090970993042, "learning_rate": 7.758816837315132e-05, "loss": 6.3271, "step": 8108 }, { "epoch": 2.7675767918088736, "grad_norm": 2.5655224323272705, "learning_rate": 7.747440273037543e-05, "loss": 5.833, "step": 8109 }, { "epoch": 2.7679180887372015, "grad_norm": 2.630000114440918, "learning_rate": 7.736063708759954e-05, "loss": 5.7833, "step": 8110 }, { "epoch": 2.768259385665529, "grad_norm": 2.582270860671997, "learning_rate": 7.724687144482366e-05, "loss": 5.4806, "step": 8111 }, { "epoch": 2.7686006825938567, "grad_norm": 2.481131076812744, "learning_rate": 7.713310580204778e-05, "loss": 4.7699, "step": 8112 }, { "epoch": 2.768941979522184, "grad_norm": 1.8021996021270752, "learning_rate": 7.701934015927191e-05, "loss": 3.0427, "step": 8113 }, { "epoch": 2.769283276450512, "grad_norm": 2.502584457397461, "learning_rate": 7.690557451649602e-05, "loss": 6.344, "step": 8114 }, { "epoch": 2.76962457337884, "grad_norm": 2.3312814235687256, "learning_rate": 7.679180887372014e-05, "loss": 4.1687, "step": 8115 }, { "epoch": 2.7699658703071672, "grad_norm": 2.548377752304077, "learning_rate": 7.667804323094426e-05, "loss": 5.0895, "step": 8116 }, { "epoch": 2.7703071672354946, "grad_norm": 2.6071834564208984, "learning_rate": 7.656427758816837e-05, "loss": 5.0985, "step": 8117 }, { "epoch": 2.7706484641638225, "grad_norm": 2.3951501846313477, "learning_rate": 7.645051194539249e-05, "loss": 4.862, "step": 8118 }, { "epoch": 2.7709897610921503, "grad_norm": 2.610914945602417, "learning_rate": 7.633674630261661e-05, "loss": 6.137, "step": 8119 }, { "epoch": 2.7713310580204777, "grad_norm": 2.4345929622650146, "learning_rate": 7.622298065984073e-05, "loss": 4.7642, "step": 8120 }, { "epoch": 2.7716723549488056, "grad_norm": 2.550601005554199, "learning_rate": 7.610921501706485e-05, "loss": 5.3778, "step": 8121 }, { "epoch": 2.772013651877133, "grad_norm": 2.6055097579956055, "learning_rate": 7.599544937428897e-05, "loss": 5.7184, "step": 8122 }, { "epoch": 2.772354948805461, "grad_norm": 2.512861490249634, "learning_rate": 7.588168373151309e-05, "loss": 5.2337, "step": 8123 }, { "epoch": 2.7726962457337883, "grad_norm": 2.71311354637146, "learning_rate": 7.57679180887372e-05, "loss": 6.3602, "step": 8124 }, { "epoch": 2.773037542662116, "grad_norm": 2.6916327476501465, "learning_rate": 7.565415244596132e-05, "loss": 6.0187, "step": 8125 }, { "epoch": 2.7733788395904435, "grad_norm": 2.6451501846313477, "learning_rate": 7.554038680318544e-05, "loss": 5.8547, "step": 8126 }, { "epoch": 2.7737201365187714, "grad_norm": 2.736882209777832, "learning_rate": 7.542662116040956e-05, "loss": 6.2056, "step": 8127 }, { "epoch": 2.774061433447099, "grad_norm": 2.5844454765319824, "learning_rate": 7.531285551763368e-05, "loss": 5.795, "step": 8128 }, { "epoch": 2.7744027303754266, "grad_norm": 2.4675440788269043, "learning_rate": 7.51990898748578e-05, "loss": 5.4714, "step": 8129 }, { "epoch": 2.774744027303754, "grad_norm": 2.4359679222106934, "learning_rate": 7.508532423208191e-05, "loss": 5.8461, "step": 8130 }, { "epoch": 2.775085324232082, "grad_norm": 2.678617477416992, "learning_rate": 7.497155858930603e-05, "loss": 5.3954, "step": 8131 }, { "epoch": 2.7754266211604097, "grad_norm": 2.575972557067871, "learning_rate": 7.485779294653015e-05, "loss": 6.3859, "step": 8132 }, { "epoch": 2.775767918088737, "grad_norm": 2.5873968601226807, "learning_rate": 7.474402730375427e-05, "loss": 6.2744, "step": 8133 }, { "epoch": 2.776109215017065, "grad_norm": 2.5679585933685303, "learning_rate": 7.463026166097839e-05, "loss": 6.3518, "step": 8134 }, { "epoch": 2.7764505119453924, "grad_norm": 2.5673532485961914, "learning_rate": 7.45164960182025e-05, "loss": 5.3267, "step": 8135 }, { "epoch": 2.7767918088737202, "grad_norm": 2.5870144367218018, "learning_rate": 7.440273037542663e-05, "loss": 5.5847, "step": 8136 }, { "epoch": 2.7771331058020476, "grad_norm": 2.542771100997925, "learning_rate": 7.428896473265074e-05, "loss": 4.9482, "step": 8137 }, { "epoch": 2.7774744027303755, "grad_norm": 2.619039535522461, "learning_rate": 7.417519908987486e-05, "loss": 6.4865, "step": 8138 }, { "epoch": 2.777815699658703, "grad_norm": 2.628445863723755, "learning_rate": 7.406143344709898e-05, "loss": 5.8237, "step": 8139 }, { "epoch": 2.7781569965870307, "grad_norm": 2.5685930252075195, "learning_rate": 7.394766780432309e-05, "loss": 6.1367, "step": 8140 }, { "epoch": 2.7784982935153586, "grad_norm": 2.6697919368743896, "learning_rate": 7.383390216154722e-05, "loss": 5.8577, "step": 8141 }, { "epoch": 2.778839590443686, "grad_norm": 2.6898036003112793, "learning_rate": 7.372013651877134e-05, "loss": 6.3594, "step": 8142 }, { "epoch": 2.7791808873720134, "grad_norm": 2.5270895957946777, "learning_rate": 7.360637087599546e-05, "loss": 6.1053, "step": 8143 }, { "epoch": 2.7795221843003413, "grad_norm": 2.6355865001678467, "learning_rate": 7.349260523321957e-05, "loss": 4.7732, "step": 8144 }, { "epoch": 2.779863481228669, "grad_norm": 2.624152898788452, "learning_rate": 7.337883959044368e-05, "loss": 6.3116, "step": 8145 }, { "epoch": 2.7802047781569965, "grad_norm": 2.588355302810669, "learning_rate": 7.326507394766781e-05, "loss": 5.3328, "step": 8146 }, { "epoch": 2.7805460750853244, "grad_norm": 2.633571147918701, "learning_rate": 7.315130830489193e-05, "loss": 6.1316, "step": 8147 }, { "epoch": 2.7808873720136518, "grad_norm": 2.573197364807129, "learning_rate": 7.303754266211605e-05, "loss": 6.7343, "step": 8148 }, { "epoch": 2.7812286689419796, "grad_norm": 2.558769464492798, "learning_rate": 7.292377701934017e-05, "loss": 5.357, "step": 8149 }, { "epoch": 2.781569965870307, "grad_norm": 2.6176862716674805, "learning_rate": 7.281001137656427e-05, "loss": 5.6288, "step": 8150 }, { "epoch": 2.781911262798635, "grad_norm": 2.510423183441162, "learning_rate": 7.26962457337884e-05, "loss": 6.4119, "step": 8151 }, { "epoch": 2.7822525597269623, "grad_norm": 2.7808709144592285, "learning_rate": 7.258248009101251e-05, "loss": 4.3593, "step": 8152 }, { "epoch": 2.78259385665529, "grad_norm": 2.575263261795044, "learning_rate": 7.246871444823664e-05, "loss": 6.4656, "step": 8153 }, { "epoch": 2.782935153583618, "grad_norm": 2.588742256164551, "learning_rate": 7.235494880546076e-05, "loss": 5.9384, "step": 8154 }, { "epoch": 2.7832764505119454, "grad_norm": 2.6123979091644287, "learning_rate": 7.224118316268486e-05, "loss": 6.0583, "step": 8155 }, { "epoch": 2.783617747440273, "grad_norm": 2.4757981300354004, "learning_rate": 7.2127417519909e-05, "loss": 5.5663, "step": 8156 }, { "epoch": 2.7839590443686006, "grad_norm": 2.5429530143737793, "learning_rate": 7.20136518771331e-05, "loss": 5.9294, "step": 8157 }, { "epoch": 2.7843003412969285, "grad_norm": 2.5413453578948975, "learning_rate": 7.189988623435722e-05, "loss": 5.9371, "step": 8158 }, { "epoch": 2.784641638225256, "grad_norm": 2.5826592445373535, "learning_rate": 7.178612059158135e-05, "loss": 5.9754, "step": 8159 }, { "epoch": 2.7849829351535837, "grad_norm": 2.4613709449768066, "learning_rate": 7.167235494880546e-05, "loss": 5.7554, "step": 8160 }, { "epoch": 2.785324232081911, "grad_norm": 2.59067964553833, "learning_rate": 7.155858930602959e-05, "loss": 6.3009, "step": 8161 }, { "epoch": 2.785665529010239, "grad_norm": 2.543574810028076, "learning_rate": 7.144482366325369e-05, "loss": 5.7074, "step": 8162 }, { "epoch": 2.7860068259385664, "grad_norm": 2.595146656036377, "learning_rate": 7.133105802047781e-05, "loss": 6.4757, "step": 8163 }, { "epoch": 2.7863481228668943, "grad_norm": 5.569501876831055, "learning_rate": 7.121729237770194e-05, "loss": 4.9309, "step": 8164 }, { "epoch": 2.7866894197952217, "grad_norm": 2.49450945854187, "learning_rate": 7.110352673492605e-05, "loss": 5.1546, "step": 8165 }, { "epoch": 2.7870307167235495, "grad_norm": 2.642744541168213, "learning_rate": 7.098976109215018e-05, "loss": 6.0856, "step": 8166 }, { "epoch": 2.7873720136518774, "grad_norm": 2.5893845558166504, "learning_rate": 7.087599544937429e-05, "loss": 6.0408, "step": 8167 }, { "epoch": 2.7877133105802048, "grad_norm": 2.603260040283203, "learning_rate": 7.07622298065984e-05, "loss": 5.7834, "step": 8168 }, { "epoch": 2.788054607508532, "grad_norm": 2.669589042663574, "learning_rate": 7.064846416382252e-05, "loss": 5.6021, "step": 8169 }, { "epoch": 2.78839590443686, "grad_norm": 2.57565975189209, "learning_rate": 7.053469852104664e-05, "loss": 5.8934, "step": 8170 }, { "epoch": 2.788737201365188, "grad_norm": 2.455730438232422, "learning_rate": 7.042093287827077e-05, "loss": 5.5977, "step": 8171 }, { "epoch": 2.7890784982935153, "grad_norm": 2.560124635696411, "learning_rate": 7.030716723549488e-05, "loss": 5.4134, "step": 8172 }, { "epoch": 2.789419795221843, "grad_norm": 2.573716163635254, "learning_rate": 7.0193401592719e-05, "loss": 5.9382, "step": 8173 }, { "epoch": 2.7897610921501705, "grad_norm": 2.4947168827056885, "learning_rate": 7.007963594994311e-05, "loss": 5.3002, "step": 8174 }, { "epoch": 2.7901023890784984, "grad_norm": 2.6110596656799316, "learning_rate": 6.996587030716723e-05, "loss": 6.0533, "step": 8175 }, { "epoch": 2.790443686006826, "grad_norm": 2.5457863807678223, "learning_rate": 6.985210466439137e-05, "loss": 6.1391, "step": 8176 }, { "epoch": 2.7907849829351536, "grad_norm": 2.6228179931640625, "learning_rate": 6.973833902161547e-05, "loss": 6.0603, "step": 8177 }, { "epoch": 2.791126279863481, "grad_norm": 2.5657594203948975, "learning_rate": 6.962457337883959e-05, "loss": 5.8292, "step": 8178 }, { "epoch": 2.791467576791809, "grad_norm": 2.512878894805908, "learning_rate": 6.951080773606371e-05, "loss": 5.5296, "step": 8179 }, { "epoch": 2.7918088737201368, "grad_norm": 2.526179075241089, "learning_rate": 6.939704209328783e-05, "loss": 5.4596, "step": 8180 }, { "epoch": 2.792150170648464, "grad_norm": 2.5494656562805176, "learning_rate": 6.928327645051196e-05, "loss": 5.7413, "step": 8181 }, { "epoch": 2.7924914675767916, "grad_norm": 2.623551368713379, "learning_rate": 6.916951080773606e-05, "loss": 5.9198, "step": 8182 }, { "epoch": 2.7928327645051194, "grad_norm": 2.609656572341919, "learning_rate": 6.905574516496018e-05, "loss": 6.1618, "step": 8183 }, { "epoch": 2.7931740614334473, "grad_norm": 3.3349082469940186, "learning_rate": 6.89419795221843e-05, "loss": 4.9097, "step": 8184 }, { "epoch": 2.7935153583617747, "grad_norm": 2.6144847869873047, "learning_rate": 6.882821387940842e-05, "loss": 6.3871, "step": 8185 }, { "epoch": 2.7938566552901025, "grad_norm": 2.5866599082946777, "learning_rate": 6.871444823663254e-05, "loss": 5.3323, "step": 8186 }, { "epoch": 2.79419795221843, "grad_norm": 2.489753246307373, "learning_rate": 6.860068259385666e-05, "loss": 5.2399, "step": 8187 }, { "epoch": 2.7945392491467578, "grad_norm": 2.5292809009552, "learning_rate": 6.848691695108077e-05, "loss": 4.0588, "step": 8188 }, { "epoch": 2.794880546075085, "grad_norm": 2.519035577774048, "learning_rate": 6.837315130830489e-05, "loss": 5.8272, "step": 8189 }, { "epoch": 2.795221843003413, "grad_norm": 2.5505032539367676, "learning_rate": 6.825938566552901e-05, "loss": 6.0068, "step": 8190 }, { "epoch": 2.7955631399317404, "grad_norm": 2.5838968753814697, "learning_rate": 6.814562002275313e-05, "loss": 6.1091, "step": 8191 }, { "epoch": 2.7959044368600683, "grad_norm": 4.409801006317139, "learning_rate": 6.803185437997725e-05, "loss": 4.6617, "step": 8192 }, { "epoch": 2.796245733788396, "grad_norm": 2.558980703353882, "learning_rate": 6.791808873720137e-05, "loss": 5.8792, "step": 8193 }, { "epoch": 2.7965870307167235, "grad_norm": 2.547933340072632, "learning_rate": 6.780432309442548e-05, "loss": 6.102, "step": 8194 }, { "epoch": 2.796928327645051, "grad_norm": 2.719900608062744, "learning_rate": 6.76905574516496e-05, "loss": 5.5898, "step": 8195 }, { "epoch": 2.797269624573379, "grad_norm": 2.68369722366333, "learning_rate": 6.757679180887372e-05, "loss": 5.1553, "step": 8196 }, { "epoch": 2.7976109215017066, "grad_norm": 2.6060352325439453, "learning_rate": 6.746302616609784e-05, "loss": 6.2941, "step": 8197 }, { "epoch": 2.797952218430034, "grad_norm": 2.6711232662200928, "learning_rate": 6.734926052332196e-05, "loss": 6.4384, "step": 8198 }, { "epoch": 2.798293515358362, "grad_norm": 2.690762996673584, "learning_rate": 6.723549488054608e-05, "loss": 6.0928, "step": 8199 }, { "epoch": 2.7986348122866893, "grad_norm": 2.6682162284851074, "learning_rate": 6.71217292377702e-05, "loss": 6.2376, "step": 8200 }, { "epoch": 2.798976109215017, "grad_norm": 2.683518171310425, "learning_rate": 6.700796359499431e-05, "loss": 6.319, "step": 8201 }, { "epoch": 2.7993174061433446, "grad_norm": 2.5631539821624756, "learning_rate": 6.689419795221843e-05, "loss": 6.1925, "step": 8202 }, { "epoch": 2.7996587030716724, "grad_norm": 2.5291433334350586, "learning_rate": 6.678043230944254e-05, "loss": 6.2034, "step": 8203 }, { "epoch": 2.8, "grad_norm": 2.4929697513580322, "learning_rate": 6.666666666666667e-05, "loss": 5.6572, "step": 8204 }, { "epoch": 2.8003412969283277, "grad_norm": 2.6479387283325195, "learning_rate": 6.655290102389079e-05, "loss": 5.844, "step": 8205 }, { "epoch": 2.8006825938566555, "grad_norm": 3.730424404144287, "learning_rate": 6.643913538111491e-05, "loss": 3.1324, "step": 8206 }, { "epoch": 2.801023890784983, "grad_norm": 2.496727228164673, "learning_rate": 6.632536973833903e-05, "loss": 5.9789, "step": 8207 }, { "epoch": 2.8013651877133103, "grad_norm": 2.9955997467041016, "learning_rate": 6.621160409556313e-05, "loss": 4.9055, "step": 8208 }, { "epoch": 2.801706484641638, "grad_norm": 2.6087541580200195, "learning_rate": 6.609783845278726e-05, "loss": 6.3396, "step": 8209 }, { "epoch": 2.802047781569966, "grad_norm": 2.530484676361084, "learning_rate": 6.598407281001138e-05, "loss": 5.6127, "step": 8210 }, { "epoch": 2.8023890784982934, "grad_norm": 2.2731897830963135, "learning_rate": 6.58703071672355e-05, "loss": 3.3329, "step": 8211 }, { "epoch": 2.8027303754266213, "grad_norm": 2.5415871143341064, "learning_rate": 6.575654152445962e-05, "loss": 5.8304, "step": 8212 }, { "epoch": 2.8030716723549487, "grad_norm": 2.5074963569641113, "learning_rate": 6.564277588168372e-05, "loss": 5.6721, "step": 8213 }, { "epoch": 2.8034129692832765, "grad_norm": 2.5421886444091797, "learning_rate": 6.552901023890785e-05, "loss": 6.2318, "step": 8214 }, { "epoch": 2.803754266211604, "grad_norm": 2.5332021713256836, "learning_rate": 6.541524459613197e-05, "loss": 5.7743, "step": 8215 }, { "epoch": 2.804095563139932, "grad_norm": 2.539001226425171, "learning_rate": 6.530147895335609e-05, "loss": 5.5197, "step": 8216 }, { "epoch": 2.804436860068259, "grad_norm": 2.585606336593628, "learning_rate": 6.518771331058021e-05, "loss": 5.9402, "step": 8217 }, { "epoch": 2.804778156996587, "grad_norm": 2.5669174194335938, "learning_rate": 6.507394766780432e-05, "loss": 5.8436, "step": 8218 }, { "epoch": 2.805119453924915, "grad_norm": 2.6501104831695557, "learning_rate": 6.496018202502845e-05, "loss": 6.0262, "step": 8219 }, { "epoch": 2.8054607508532423, "grad_norm": 2.6469192504882812, "learning_rate": 6.484641638225255e-05, "loss": 6.1936, "step": 8220 }, { "epoch": 2.8058020477815697, "grad_norm": 2.4249207973480225, "learning_rate": 6.473265073947668e-05, "loss": 5.074, "step": 8221 }, { "epoch": 2.8061433447098976, "grad_norm": 2.5924134254455566, "learning_rate": 6.46188850967008e-05, "loss": 6.188, "step": 8222 }, { "epoch": 2.8064846416382254, "grad_norm": 2.559102773666382, "learning_rate": 6.450511945392491e-05, "loss": 5.493, "step": 8223 }, { "epoch": 2.806825938566553, "grad_norm": 2.6225290298461914, "learning_rate": 6.439135381114904e-05, "loss": 5.5104, "step": 8224 }, { "epoch": 2.8071672354948807, "grad_norm": 2.5742197036743164, "learning_rate": 6.427758816837314e-05, "loss": 6.0183, "step": 8225 }, { "epoch": 2.807508532423208, "grad_norm": 2.534911870956421, "learning_rate": 6.416382252559728e-05, "loss": 5.6932, "step": 8226 }, { "epoch": 2.807849829351536, "grad_norm": 2.5558876991271973, "learning_rate": 6.40500568828214e-05, "loss": 5.7026, "step": 8227 }, { "epoch": 2.8081911262798633, "grad_norm": 2.528634786605835, "learning_rate": 6.39362912400455e-05, "loss": 5.7845, "step": 8228 }, { "epoch": 2.808532423208191, "grad_norm": 2.934544324874878, "learning_rate": 6.382252559726963e-05, "loss": 4.4868, "step": 8229 }, { "epoch": 2.8088737201365186, "grad_norm": 2.5007529258728027, "learning_rate": 6.370875995449374e-05, "loss": 4.6475, "step": 8230 }, { "epoch": 2.8092150170648464, "grad_norm": 2.566521644592285, "learning_rate": 6.359499431171787e-05, "loss": 5.7933, "step": 8231 }, { "epoch": 2.8095563139931743, "grad_norm": 2.5641417503356934, "learning_rate": 6.348122866894199e-05, "loss": 5.4627, "step": 8232 }, { "epoch": 2.8098976109215017, "grad_norm": 2.489718437194824, "learning_rate": 6.336746302616609e-05, "loss": 5.6577, "step": 8233 }, { "epoch": 2.810238907849829, "grad_norm": 2.5271124839782715, "learning_rate": 6.325369738339022e-05, "loss": 5.8988, "step": 8234 }, { "epoch": 2.810580204778157, "grad_norm": 2.544497013092041, "learning_rate": 6.313993174061433e-05, "loss": 6.0608, "step": 8235 }, { "epoch": 2.810921501706485, "grad_norm": 3.1858813762664795, "learning_rate": 6.302616609783846e-05, "loss": 4.988, "step": 8236 }, { "epoch": 2.811262798634812, "grad_norm": 2.5615243911743164, "learning_rate": 6.291240045506257e-05, "loss": 5.9591, "step": 8237 }, { "epoch": 2.81160409556314, "grad_norm": 2.4544830322265625, "learning_rate": 6.279863481228669e-05, "loss": 6.0645, "step": 8238 }, { "epoch": 2.8119453924914675, "grad_norm": 2.5661332607269287, "learning_rate": 6.268486916951082e-05, "loss": 4.455, "step": 8239 }, { "epoch": 2.8122866894197953, "grad_norm": 2.561377763748169, "learning_rate": 6.257110352673492e-05, "loss": 6.0717, "step": 8240 }, { "epoch": 2.8126279863481227, "grad_norm": 2.537170648574829, "learning_rate": 6.245733788395905e-05, "loss": 4.6822, "step": 8241 }, { "epoch": 2.8129692832764506, "grad_norm": 2.4561476707458496, "learning_rate": 6.234357224118316e-05, "loss": 5.5428, "step": 8242 }, { "epoch": 2.813310580204778, "grad_norm": 2.530233383178711, "learning_rate": 6.222980659840728e-05, "loss": 6.0164, "step": 8243 }, { "epoch": 2.813651877133106, "grad_norm": 2.61234974861145, "learning_rate": 6.21160409556314e-05, "loss": 5.5028, "step": 8244 }, { "epoch": 2.8139931740614337, "grad_norm": 2.5273263454437256, "learning_rate": 6.200227531285551e-05, "loss": 5.8513, "step": 8245 }, { "epoch": 2.814334470989761, "grad_norm": 2.5896804332733154, "learning_rate": 6.188850967007965e-05, "loss": 5.3815, "step": 8246 }, { "epoch": 2.8146757679180885, "grad_norm": 2.605863332748413, "learning_rate": 6.177474402730375e-05, "loss": 5.9044, "step": 8247 }, { "epoch": 2.8150170648464163, "grad_norm": 2.4373059272766113, "learning_rate": 6.166097838452787e-05, "loss": 5.8908, "step": 8248 }, { "epoch": 2.815358361774744, "grad_norm": 2.578674793243408, "learning_rate": 6.154721274175199e-05, "loss": 6.1842, "step": 8249 }, { "epoch": 2.8156996587030716, "grad_norm": 2.639101028442383, "learning_rate": 6.143344709897611e-05, "loss": 5.4184, "step": 8250 }, { "epoch": 2.8160409556313994, "grad_norm": 2.5843536853790283, "learning_rate": 6.131968145620023e-05, "loss": 6.1219, "step": 8251 }, { "epoch": 2.816382252559727, "grad_norm": 2.562448263168335, "learning_rate": 6.120591581342434e-05, "loss": 5.6921, "step": 8252 }, { "epoch": 2.8167235494880547, "grad_norm": 2.592428207397461, "learning_rate": 6.109215017064846e-05, "loss": 6.1349, "step": 8253 }, { "epoch": 2.817064846416382, "grad_norm": 2.594329357147217, "learning_rate": 6.097838452787258e-05, "loss": 6.4242, "step": 8254 }, { "epoch": 2.81740614334471, "grad_norm": 2.5750083923339844, "learning_rate": 6.08646188850967e-05, "loss": 5.7389, "step": 8255 }, { "epoch": 2.8177474402730374, "grad_norm": 2.5281083583831787, "learning_rate": 6.075085324232082e-05, "loss": 5.9597, "step": 8256 }, { "epoch": 2.818088737201365, "grad_norm": 2.5450093746185303, "learning_rate": 6.063708759954494e-05, "loss": 5.7704, "step": 8257 }, { "epoch": 2.818430034129693, "grad_norm": 2.50821852684021, "learning_rate": 6.052332195676906e-05, "loss": 5.5946, "step": 8258 }, { "epoch": 2.8187713310580205, "grad_norm": 2.6139910221099854, "learning_rate": 6.0409556313993174e-05, "loss": 6.1745, "step": 8259 }, { "epoch": 2.819112627986348, "grad_norm": 2.6088783740997314, "learning_rate": 6.029579067121729e-05, "loss": 5.8251, "step": 8260 }, { "epoch": 2.8194539249146757, "grad_norm": 2.6340761184692383, "learning_rate": 6.018202502844141e-05, "loss": 4.9621, "step": 8261 }, { "epoch": 2.8197952218430036, "grad_norm": 2.5626347064971924, "learning_rate": 6.006825938566553e-05, "loss": 5.829, "step": 8262 }, { "epoch": 2.820136518771331, "grad_norm": 2.573450803756714, "learning_rate": 5.9954493742889654e-05, "loss": 5.9689, "step": 8263 }, { "epoch": 2.820477815699659, "grad_norm": 2.446423053741455, "learning_rate": 5.9840728100113766e-05, "loss": 5.088, "step": 8264 }, { "epoch": 2.8208191126279862, "grad_norm": 2.590911865234375, "learning_rate": 5.9726962457337885e-05, "loss": 5.8315, "step": 8265 }, { "epoch": 2.821160409556314, "grad_norm": 2.581800699234009, "learning_rate": 5.9613196814562e-05, "loss": 6.0672, "step": 8266 }, { "epoch": 2.8215017064846415, "grad_norm": 2.534433126449585, "learning_rate": 5.949943117178612e-05, "loss": 5.7578, "step": 8267 }, { "epoch": 2.8218430034129693, "grad_norm": 2.6216630935668945, "learning_rate": 5.938566552901024e-05, "loss": 5.6905, "step": 8268 }, { "epoch": 2.8221843003412967, "grad_norm": 2.5895118713378906, "learning_rate": 5.927189988623436e-05, "loss": 6.1605, "step": 8269 }, { "epoch": 2.8225255972696246, "grad_norm": 2.5925073623657227, "learning_rate": 5.915813424345848e-05, "loss": 6.0352, "step": 8270 }, { "epoch": 2.8228668941979524, "grad_norm": 2.5645461082458496, "learning_rate": 5.9044368600682596e-05, "loss": 5.7567, "step": 8271 }, { "epoch": 2.82320819112628, "grad_norm": 2.479318857192993, "learning_rate": 5.8930602957906714e-05, "loss": 5.6575, "step": 8272 }, { "epoch": 2.8235494880546073, "grad_norm": 2.6021623611450195, "learning_rate": 5.881683731513083e-05, "loss": 4.8975, "step": 8273 }, { "epoch": 2.823890784982935, "grad_norm": 2.5270769596099854, "learning_rate": 5.870307167235495e-05, "loss": 6.0327, "step": 8274 }, { "epoch": 2.824232081911263, "grad_norm": 2.6603550910949707, "learning_rate": 5.858930602957907e-05, "loss": 6.38, "step": 8275 }, { "epoch": 2.8245733788395904, "grad_norm": 2.523515462875366, "learning_rate": 5.847554038680319e-05, "loss": 5.7957, "step": 8276 }, { "epoch": 2.824914675767918, "grad_norm": 2.602630615234375, "learning_rate": 5.8361774744027307e-05, "loss": 6.3797, "step": 8277 }, { "epoch": 2.8252559726962456, "grad_norm": 3.1169798374176025, "learning_rate": 5.824800910125142e-05, "loss": 4.2113, "step": 8278 }, { "epoch": 2.8255972696245735, "grad_norm": 2.580352306365967, "learning_rate": 5.813424345847554e-05, "loss": 6.75, "step": 8279 }, { "epoch": 2.825938566552901, "grad_norm": 2.5643091201782227, "learning_rate": 5.802047781569966e-05, "loss": 5.7626, "step": 8280 }, { "epoch": 2.8262798634812287, "grad_norm": 2.5562422275543213, "learning_rate": 5.790671217292378e-05, "loss": 6.2436, "step": 8281 }, { "epoch": 2.826621160409556, "grad_norm": 2.547008514404297, "learning_rate": 5.77929465301479e-05, "loss": 6.1454, "step": 8282 }, { "epoch": 2.826962457337884, "grad_norm": 2.544355869293213, "learning_rate": 5.767918088737201e-05, "loss": 6.3044, "step": 8283 }, { "epoch": 2.827303754266212, "grad_norm": 2.6169373989105225, "learning_rate": 5.756541524459613e-05, "loss": 5.7332, "step": 8284 }, { "epoch": 2.8276450511945392, "grad_norm": 2.581423759460449, "learning_rate": 5.745164960182025e-05, "loss": 5.234, "step": 8285 }, { "epoch": 2.8279863481228666, "grad_norm": 2.5888426303863525, "learning_rate": 5.733788395904437e-05, "loss": 6.8817, "step": 8286 }, { "epoch": 2.8283276450511945, "grad_norm": 2.491856098175049, "learning_rate": 5.722411831626849e-05, "loss": 5.7124, "step": 8287 }, { "epoch": 2.8286689419795223, "grad_norm": 2.546119213104248, "learning_rate": 5.71103526734926e-05, "loss": 5.9523, "step": 8288 }, { "epoch": 2.8290102389078498, "grad_norm": 2.61152720451355, "learning_rate": 5.699658703071672e-05, "loss": 5.9196, "step": 8289 }, { "epoch": 2.8293515358361776, "grad_norm": 2.7438294887542725, "learning_rate": 5.688282138794084e-05, "loss": 5.4704, "step": 8290 }, { "epoch": 2.829692832764505, "grad_norm": 2.6315395832061768, "learning_rate": 5.6769055745164965e-05, "loss": 5.323, "step": 8291 }, { "epoch": 2.830034129692833, "grad_norm": 2.675381660461426, "learning_rate": 5.6655290102389084e-05, "loss": 5.9233, "step": 8292 }, { "epoch": 2.8303754266211603, "grad_norm": 2.479240894317627, "learning_rate": 5.6541524459613196e-05, "loss": 5.9549, "step": 8293 }, { "epoch": 2.830716723549488, "grad_norm": 2.5712265968322754, "learning_rate": 5.6427758816837314e-05, "loss": 6.1167, "step": 8294 }, { "epoch": 2.8310580204778155, "grad_norm": 2.5504567623138428, "learning_rate": 5.631399317406143e-05, "loss": 6.1586, "step": 8295 }, { "epoch": 2.8313993174061434, "grad_norm": 2.505173444747925, "learning_rate": 5.620022753128555e-05, "loss": 6.0159, "step": 8296 }, { "epoch": 2.831740614334471, "grad_norm": 2.5844502449035645, "learning_rate": 5.6086461888509676e-05, "loss": 6.1771, "step": 8297 }, { "epoch": 2.8320819112627986, "grad_norm": 2.5144166946411133, "learning_rate": 5.597269624573379e-05, "loss": 6.1626, "step": 8298 }, { "epoch": 2.832423208191126, "grad_norm": 2.5902957916259766, "learning_rate": 5.585893060295791e-05, "loss": 5.6985, "step": 8299 }, { "epoch": 2.832764505119454, "grad_norm": 2.809479236602783, "learning_rate": 5.5745164960182025e-05, "loss": 4.2213, "step": 8300 }, { "epoch": 2.8331058020477817, "grad_norm": 2.563436269760132, "learning_rate": 5.5631399317406144e-05, "loss": 5.617, "step": 8301 }, { "epoch": 2.833447098976109, "grad_norm": 2.593966484069824, "learning_rate": 5.551763367463027e-05, "loss": 5.7472, "step": 8302 }, { "epoch": 2.833788395904437, "grad_norm": 2.590921640396118, "learning_rate": 5.540386803185438e-05, "loss": 5.1288, "step": 8303 }, { "epoch": 2.8341296928327644, "grad_norm": 2.558959722518921, "learning_rate": 5.52901023890785e-05, "loss": 5.8822, "step": 8304 }, { "epoch": 2.8344709897610922, "grad_norm": 2.5313167572021484, "learning_rate": 5.517633674630262e-05, "loss": 6.0652, "step": 8305 }, { "epoch": 2.8348122866894196, "grad_norm": 2.59818172454834, "learning_rate": 5.5062571103526736e-05, "loss": 5.8414, "step": 8306 }, { "epoch": 2.8351535836177475, "grad_norm": 2.5654821395874023, "learning_rate": 5.4948805460750855e-05, "loss": 5.9746, "step": 8307 }, { "epoch": 2.835494880546075, "grad_norm": 2.573014974594116, "learning_rate": 5.483503981797497e-05, "loss": 6.3835, "step": 8308 }, { "epoch": 2.8358361774744028, "grad_norm": 2.426826238632202, "learning_rate": 5.472127417519909e-05, "loss": 5.2187, "step": 8309 }, { "epoch": 2.8361774744027306, "grad_norm": 2.524761199951172, "learning_rate": 5.460750853242321e-05, "loss": 6.1975, "step": 8310 }, { "epoch": 2.836518771331058, "grad_norm": 2.5690839290618896, "learning_rate": 5.449374288964733e-05, "loss": 5.8855, "step": 8311 }, { "epoch": 2.8368600682593854, "grad_norm": 2.8350605964660645, "learning_rate": 5.437997724687145e-05, "loss": 4.5688, "step": 8312 }, { "epoch": 2.8372013651877133, "grad_norm": 2.7381675243377686, "learning_rate": 5.426621160409556e-05, "loss": 5.0834, "step": 8313 }, { "epoch": 2.837542662116041, "grad_norm": 2.534423828125, "learning_rate": 5.4152445961319684e-05, "loss": 6.1567, "step": 8314 }, { "epoch": 2.8378839590443685, "grad_norm": 2.4711785316467285, "learning_rate": 5.40386803185438e-05, "loss": 5.8467, "step": 8315 }, { "epoch": 2.8382252559726964, "grad_norm": 2.48486328125, "learning_rate": 5.392491467576792e-05, "loss": 5.5474, "step": 8316 }, { "epoch": 2.8385665529010238, "grad_norm": 2.6153039932250977, "learning_rate": 5.381114903299204e-05, "loss": 6.1734, "step": 8317 }, { "epoch": 2.8389078498293516, "grad_norm": 2.5097267627716064, "learning_rate": 5.369738339021615e-05, "loss": 5.9648, "step": 8318 }, { "epoch": 2.839249146757679, "grad_norm": 2.603243827819824, "learning_rate": 5.3583617747440277e-05, "loss": 5.7459, "step": 8319 }, { "epoch": 2.839590443686007, "grad_norm": 2.543027877807617, "learning_rate": 5.3469852104664395e-05, "loss": 6.1973, "step": 8320 }, { "epoch": 2.8399317406143343, "grad_norm": 2.5744729042053223, "learning_rate": 5.3356086461888514e-05, "loss": 6.2723, "step": 8321 }, { "epoch": 2.840273037542662, "grad_norm": 2.5325405597686768, "learning_rate": 5.324232081911263e-05, "loss": 6.2439, "step": 8322 }, { "epoch": 2.84061433447099, "grad_norm": 2.521803617477417, "learning_rate": 5.3128555176336744e-05, "loss": 4.9205, "step": 8323 }, { "epoch": 2.8409556313993174, "grad_norm": 2.6305558681488037, "learning_rate": 5.301478953356086e-05, "loss": 6.7376, "step": 8324 }, { "epoch": 2.841296928327645, "grad_norm": 2.589456558227539, "learning_rate": 5.290102389078499e-05, "loss": 5.3846, "step": 8325 }, { "epoch": 2.8416382252559726, "grad_norm": 2.5877840518951416, "learning_rate": 5.2787258248009106e-05, "loss": 5.3142, "step": 8326 }, { "epoch": 2.8419795221843005, "grad_norm": 2.562431573867798, "learning_rate": 5.267349260523322e-05, "loss": 5.6504, "step": 8327 }, { "epoch": 2.842320819112628, "grad_norm": 2.539492607116699, "learning_rate": 5.2559726962457336e-05, "loss": 4.5076, "step": 8328 }, { "epoch": 2.8426621160409558, "grad_norm": 2.5125441551208496, "learning_rate": 5.2445961319681455e-05, "loss": 6.1126, "step": 8329 }, { "epoch": 2.843003412969283, "grad_norm": 2.546273708343506, "learning_rate": 5.233219567690557e-05, "loss": 5.7161, "step": 8330 }, { "epoch": 2.843344709897611, "grad_norm": 2.596364736557007, "learning_rate": 5.22184300341297e-05, "loss": 6.1584, "step": 8331 }, { "epoch": 2.8436860068259384, "grad_norm": 2.556121349334717, "learning_rate": 5.210466439135381e-05, "loss": 5.4851, "step": 8332 }, { "epoch": 2.8440273037542663, "grad_norm": 2.4971086978912354, "learning_rate": 5.199089874857793e-05, "loss": 6.1228, "step": 8333 }, { "epoch": 2.8443686006825937, "grad_norm": 2.5893213748931885, "learning_rate": 5.187713310580205e-05, "loss": 4.826, "step": 8334 }, { "epoch": 2.8447098976109215, "grad_norm": 2.562465190887451, "learning_rate": 5.1763367463026166e-05, "loss": 4.6726, "step": 8335 }, { "epoch": 2.8450511945392494, "grad_norm": 2.575310707092285, "learning_rate": 5.164960182025029e-05, "loss": 6.3681, "step": 8336 }, { "epoch": 2.845392491467577, "grad_norm": 3.0285212993621826, "learning_rate": 5.15358361774744e-05, "loss": 5.0006, "step": 8337 }, { "epoch": 2.845733788395904, "grad_norm": 2.7478995323181152, "learning_rate": 5.142207053469852e-05, "loss": 5.4962, "step": 8338 }, { "epoch": 2.846075085324232, "grad_norm": 2.4442386627197266, "learning_rate": 5.130830489192264e-05, "loss": 5.7498, "step": 8339 }, { "epoch": 2.84641638225256, "grad_norm": 2.5991361141204834, "learning_rate": 5.119453924914676e-05, "loss": 5.419, "step": 8340 }, { "epoch": 2.8467576791808873, "grad_norm": 2.547792434692383, "learning_rate": 5.108077360637088e-05, "loss": 5.9812, "step": 8341 }, { "epoch": 2.847098976109215, "grad_norm": 2.6672022342681885, "learning_rate": 5.0967007963594995e-05, "loss": 4.3484, "step": 8342 }, { "epoch": 2.8474402730375425, "grad_norm": 2.547494649887085, "learning_rate": 5.0853242320819114e-05, "loss": 6.2943, "step": 8343 }, { "epoch": 2.8477815699658704, "grad_norm": 2.4189071655273438, "learning_rate": 5.073947667804323e-05, "loss": 5.0494, "step": 8344 }, { "epoch": 2.848122866894198, "grad_norm": 2.471524477005005, "learning_rate": 5.062571103526735e-05, "loss": 5.5994, "step": 8345 }, { "epoch": 2.8484641638225257, "grad_norm": 2.490009069442749, "learning_rate": 5.051194539249147e-05, "loss": 5.8388, "step": 8346 }, { "epoch": 2.848805460750853, "grad_norm": 2.542736053466797, "learning_rate": 5.039817974971558e-05, "loss": 5.7709, "step": 8347 }, { "epoch": 2.849146757679181, "grad_norm": 2.5808589458465576, "learning_rate": 5.0284414106939706e-05, "loss": 6.7117, "step": 8348 }, { "epoch": 2.8494880546075088, "grad_norm": 2.597972869873047, "learning_rate": 5.0170648464163825e-05, "loss": 6.4791, "step": 8349 }, { "epoch": 2.849829351535836, "grad_norm": 2.577030658721924, "learning_rate": 5.005688282138794e-05, "loss": 6.3102, "step": 8350 }, { "epoch": 2.8501706484641636, "grad_norm": 2.47444486618042, "learning_rate": 4.994311717861206e-05, "loss": 5.5967, "step": 8351 }, { "epoch": 2.8505119453924914, "grad_norm": 2.5281221866607666, "learning_rate": 4.982935153583617e-05, "loss": 5.4821, "step": 8352 }, { "epoch": 2.8508532423208193, "grad_norm": 2.5494225025177, "learning_rate": 4.97155858930603e-05, "loss": 6.4092, "step": 8353 }, { "epoch": 2.8511945392491467, "grad_norm": 2.561234712600708, "learning_rate": 4.960182025028442e-05, "loss": 5.7412, "step": 8354 }, { "epoch": 2.8515358361774745, "grad_norm": 3.5011022090911865, "learning_rate": 4.9488054607508536e-05, "loss": 5.4677, "step": 8355 }, { "epoch": 2.851877133105802, "grad_norm": 2.60447359085083, "learning_rate": 4.9374288964732654e-05, "loss": 6.2699, "step": 8356 }, { "epoch": 2.85221843003413, "grad_norm": 2.556480884552002, "learning_rate": 4.9260523321956766e-05, "loss": 5.9936, "step": 8357 }, { "epoch": 2.852559726962457, "grad_norm": 2.508237600326538, "learning_rate": 4.9146757679180884e-05, "loss": 5.9199, "step": 8358 }, { "epoch": 2.852901023890785, "grad_norm": 2.5938949584960938, "learning_rate": 4.903299203640501e-05, "loss": 6.2548, "step": 8359 }, { "epoch": 2.8532423208191124, "grad_norm": 2.570672035217285, "learning_rate": 4.891922639362913e-05, "loss": 6.0058, "step": 8360 }, { "epoch": 2.8535836177474403, "grad_norm": 2.583148717880249, "learning_rate": 4.8805460750853247e-05, "loss": 6.0509, "step": 8361 }, { "epoch": 2.853924914675768, "grad_norm": 2.4389407634735107, "learning_rate": 4.869169510807736e-05, "loss": 6.2846, "step": 8362 }, { "epoch": 2.8542662116040955, "grad_norm": 2.574002981185913, "learning_rate": 4.857792946530148e-05, "loss": 5.7958, "step": 8363 }, { "epoch": 2.854607508532423, "grad_norm": 2.5079104900360107, "learning_rate": 4.8464163822525595e-05, "loss": 6.4898, "step": 8364 }, { "epoch": 2.854948805460751, "grad_norm": 2.6473233699798584, "learning_rate": 4.835039817974972e-05, "loss": 5.9305, "step": 8365 }, { "epoch": 2.8552901023890787, "grad_norm": 2.551525831222534, "learning_rate": 4.823663253697384e-05, "loss": 5.7632, "step": 8366 }, { "epoch": 2.855631399317406, "grad_norm": 2.5253288745880127, "learning_rate": 4.812286689419795e-05, "loss": 6.1019, "step": 8367 }, { "epoch": 2.855972696245734, "grad_norm": 2.598867654800415, "learning_rate": 4.800910125142207e-05, "loss": 6.0308, "step": 8368 }, { "epoch": 2.8563139931740613, "grad_norm": 2.4677653312683105, "learning_rate": 4.789533560864619e-05, "loss": 5.5467, "step": 8369 }, { "epoch": 2.856655290102389, "grad_norm": 2.6016745567321777, "learning_rate": 4.778156996587031e-05, "loss": 5.6461, "step": 8370 }, { "epoch": 2.8569965870307166, "grad_norm": 2.613708734512329, "learning_rate": 4.766780432309443e-05, "loss": 6.6418, "step": 8371 }, { "epoch": 2.8573378839590444, "grad_norm": 2.5926055908203125, "learning_rate": 4.755403868031854e-05, "loss": 5.8853, "step": 8372 }, { "epoch": 2.857679180887372, "grad_norm": 2.541905403137207, "learning_rate": 4.744027303754266e-05, "loss": 6.4039, "step": 8373 }, { "epoch": 2.8580204778156997, "grad_norm": 2.5121161937713623, "learning_rate": 4.732650739476678e-05, "loss": 5.7596, "step": 8374 }, { "epoch": 2.8583617747440275, "grad_norm": 2.536179780960083, "learning_rate": 4.72127417519909e-05, "loss": 6.1308, "step": 8375 }, { "epoch": 2.858703071672355, "grad_norm": 7.43336296081543, "learning_rate": 4.7098976109215024e-05, "loss": 5.0863, "step": 8376 }, { "epoch": 2.8590443686006823, "grad_norm": 2.5915966033935547, "learning_rate": 4.6985210466439136e-05, "loss": 6.191, "step": 8377 }, { "epoch": 2.85938566552901, "grad_norm": 2.5173799991607666, "learning_rate": 4.6871444823663254e-05, "loss": 6.0567, "step": 8378 }, { "epoch": 2.859726962457338, "grad_norm": 2.5869557857513428, "learning_rate": 4.675767918088737e-05, "loss": 5.7846, "step": 8379 }, { "epoch": 2.8600682593856654, "grad_norm": 2.5171236991882324, "learning_rate": 4.664391353811149e-05, "loss": 6.236, "step": 8380 }, { "epoch": 2.8604095563139933, "grad_norm": 2.6231682300567627, "learning_rate": 4.653014789533561e-05, "loss": 5.6616, "step": 8381 }, { "epoch": 2.8607508532423207, "grad_norm": 2.555541753768921, "learning_rate": 4.641638225255973e-05, "loss": 6.3776, "step": 8382 }, { "epoch": 2.8610921501706486, "grad_norm": 2.5504350662231445, "learning_rate": 4.630261660978385e-05, "loss": 6.1276, "step": 8383 }, { "epoch": 2.861433447098976, "grad_norm": 2.523817300796509, "learning_rate": 4.6188850967007965e-05, "loss": 5.6059, "step": 8384 }, { "epoch": 2.861774744027304, "grad_norm": 2.5135388374328613, "learning_rate": 4.6075085324232084e-05, "loss": 5.924, "step": 8385 }, { "epoch": 2.862116040955631, "grad_norm": 2.5756356716156006, "learning_rate": 4.5961319681456195e-05, "loss": 5.9326, "step": 8386 }, { "epoch": 2.862457337883959, "grad_norm": 2.5897839069366455, "learning_rate": 4.584755403868032e-05, "loss": 6.2464, "step": 8387 }, { "epoch": 2.862798634812287, "grad_norm": 2.522111177444458, "learning_rate": 4.573378839590444e-05, "loss": 5.7213, "step": 8388 }, { "epoch": 2.8631399317406143, "grad_norm": 2.5438270568847656, "learning_rate": 4.562002275312856e-05, "loss": 5.5916, "step": 8389 }, { "epoch": 2.8634812286689417, "grad_norm": 2.5333592891693115, "learning_rate": 4.5506257110352676e-05, "loss": 6.1719, "step": 8390 }, { "epoch": 2.8638225255972696, "grad_norm": 2.5089774131774902, "learning_rate": 4.539249146757679e-05, "loss": 6.1262, "step": 8391 }, { "epoch": 2.8641638225255974, "grad_norm": 2.4316928386688232, "learning_rate": 4.5278725824800906e-05, "loss": 4.518, "step": 8392 }, { "epoch": 2.864505119453925, "grad_norm": 2.5709829330444336, "learning_rate": 4.516496018202503e-05, "loss": 6.1676, "step": 8393 }, { "epoch": 2.8648464163822527, "grad_norm": 2.5119333267211914, "learning_rate": 4.505119453924915e-05, "loss": 4.8229, "step": 8394 }, { "epoch": 2.86518771331058, "grad_norm": 2.5550503730773926, "learning_rate": 4.493742889647327e-05, "loss": 5.6363, "step": 8395 }, { "epoch": 2.865529010238908, "grad_norm": 2.5302844047546387, "learning_rate": 4.482366325369738e-05, "loss": 5.3337, "step": 8396 }, { "epoch": 2.8658703071672353, "grad_norm": 2.5047590732574463, "learning_rate": 4.47098976109215e-05, "loss": 6.5914, "step": 8397 }, { "epoch": 2.866211604095563, "grad_norm": 2.5279815196990967, "learning_rate": 4.4596131968145624e-05, "loss": 6.4273, "step": 8398 }, { "epoch": 2.8665529010238906, "grad_norm": 2.603135347366333, "learning_rate": 4.448236632536974e-05, "loss": 6.5059, "step": 8399 }, { "epoch": 2.8668941979522184, "grad_norm": 2.582097053527832, "learning_rate": 4.436860068259386e-05, "loss": 5.8008, "step": 8400 }, { "epoch": 2.8672354948805463, "grad_norm": 2.5195491313934326, "learning_rate": 4.425483503981797e-05, "loss": 6.622, "step": 8401 }, { "epoch": 2.8675767918088737, "grad_norm": 2.5339550971984863, "learning_rate": 4.414106939704209e-05, "loss": 6.4584, "step": 8402 }, { "epoch": 2.867918088737201, "grad_norm": 2.524705648422241, "learning_rate": 4.402730375426621e-05, "loss": 5.2988, "step": 8403 }, { "epoch": 2.868259385665529, "grad_norm": 2.5974693298339844, "learning_rate": 4.3913538111490335e-05, "loss": 6.255, "step": 8404 }, { "epoch": 2.868600682593857, "grad_norm": 2.66146183013916, "learning_rate": 4.3799772468714454e-05, "loss": 4.7422, "step": 8405 }, { "epoch": 2.868941979522184, "grad_norm": 2.778348922729492, "learning_rate": 4.3686006825938565e-05, "loss": 5.1854, "step": 8406 }, { "epoch": 2.869283276450512, "grad_norm": 2.8919944763183594, "learning_rate": 4.3572241183162684e-05, "loss": 3.8649, "step": 8407 }, { "epoch": 2.8696245733788395, "grad_norm": 2.5984740257263184, "learning_rate": 4.34584755403868e-05, "loss": 5.6027, "step": 8408 }, { "epoch": 2.8699658703071673, "grad_norm": 2.449209690093994, "learning_rate": 4.334470989761092e-05, "loss": 5.0734, "step": 8409 }, { "epoch": 2.8703071672354947, "grad_norm": 2.4918715953826904, "learning_rate": 4.3230944254835046e-05, "loss": 5.4387, "step": 8410 }, { "epoch": 2.8706484641638226, "grad_norm": 2.528923749923706, "learning_rate": 4.311717861205916e-05, "loss": 5.9894, "step": 8411 }, { "epoch": 2.87098976109215, "grad_norm": 2.5853374004364014, "learning_rate": 4.3003412969283276e-05, "loss": 6.5989, "step": 8412 }, { "epoch": 2.871331058020478, "grad_norm": 2.531834363937378, "learning_rate": 4.2889647326507395e-05, "loss": 6.2346, "step": 8413 }, { "epoch": 2.8716723549488057, "grad_norm": 2.5188536643981934, "learning_rate": 4.277588168373151e-05, "loss": 5.9237, "step": 8414 }, { "epoch": 2.872013651877133, "grad_norm": 2.5645134449005127, "learning_rate": 4.266211604095564e-05, "loss": 5.9186, "step": 8415 }, { "epoch": 2.8723549488054605, "grad_norm": 2.4560606479644775, "learning_rate": 4.254835039817975e-05, "loss": 5.6281, "step": 8416 }, { "epoch": 2.8726962457337883, "grad_norm": 2.49910306930542, "learning_rate": 4.243458475540387e-05, "loss": 6.1462, "step": 8417 }, { "epoch": 2.873037542662116, "grad_norm": 2.547170877456665, "learning_rate": 4.232081911262799e-05, "loss": 5.9647, "step": 8418 }, { "epoch": 2.8733788395904436, "grad_norm": 2.5908734798431396, "learning_rate": 4.2207053469852106e-05, "loss": 5.909, "step": 8419 }, { "epoch": 2.8737201365187715, "grad_norm": 2.5554044246673584, "learning_rate": 4.2093287827076224e-05, "loss": 5.9551, "step": 8420 }, { "epoch": 2.874061433447099, "grad_norm": 2.5563385486602783, "learning_rate": 4.197952218430034e-05, "loss": 6.0185, "step": 8421 }, { "epoch": 2.8744027303754267, "grad_norm": 2.525693893432617, "learning_rate": 4.186575654152446e-05, "loss": 5.7964, "step": 8422 }, { "epoch": 2.874744027303754, "grad_norm": 2.5117642879486084, "learning_rate": 4.175199089874858e-05, "loss": 5.5265, "step": 8423 }, { "epoch": 2.875085324232082, "grad_norm": 2.5649948120117188, "learning_rate": 4.16382252559727e-05, "loss": 5.9956, "step": 8424 }, { "epoch": 2.8754266211604094, "grad_norm": 2.43780255317688, "learning_rate": 4.152445961319681e-05, "loss": 6.1702, "step": 8425 }, { "epoch": 2.875767918088737, "grad_norm": 2.5544321537017822, "learning_rate": 4.141069397042093e-05, "loss": 5.5664, "step": 8426 }, { "epoch": 2.876109215017065, "grad_norm": 2.5855114459991455, "learning_rate": 4.1296928327645054e-05, "loss": 6.168, "step": 8427 }, { "epoch": 2.8764505119453925, "grad_norm": 2.5726702213287354, "learning_rate": 4.118316268486917e-05, "loss": 5.963, "step": 8428 }, { "epoch": 2.87679180887372, "grad_norm": 2.6072628498077393, "learning_rate": 4.106939704209329e-05, "loss": 6.4451, "step": 8429 }, { "epoch": 2.8771331058020477, "grad_norm": 2.586865186691284, "learning_rate": 4.09556313993174e-05, "loss": 6.5633, "step": 8430 }, { "epoch": 2.8774744027303756, "grad_norm": 2.524959087371826, "learning_rate": 4.084186575654152e-05, "loss": 6.5356, "step": 8431 }, { "epoch": 2.877815699658703, "grad_norm": 2.5504794120788574, "learning_rate": 4.0728100113765646e-05, "loss": 5.9479, "step": 8432 }, { "epoch": 2.878156996587031, "grad_norm": 2.483062505722046, "learning_rate": 4.0614334470989765e-05, "loss": 6.0917, "step": 8433 }, { "epoch": 2.8784982935153582, "grad_norm": 2.506287097930908, "learning_rate": 4.050056882821388e-05, "loss": 6.0076, "step": 8434 }, { "epoch": 2.878839590443686, "grad_norm": 2.6461734771728516, "learning_rate": 4.0386803185437995e-05, "loss": 5.6557, "step": 8435 }, { "epoch": 2.8791808873720135, "grad_norm": 2.628688335418701, "learning_rate": 4.0273037542662113e-05, "loss": 6.4374, "step": 8436 }, { "epoch": 2.8795221843003413, "grad_norm": 2.541585683822632, "learning_rate": 4.015927189988623e-05, "loss": 6.1847, "step": 8437 }, { "epoch": 2.8798634812286688, "grad_norm": 2.503748655319214, "learning_rate": 4.004550625711036e-05, "loss": 5.7091, "step": 8438 }, { "epoch": 2.8802047781569966, "grad_norm": 2.540815591812134, "learning_rate": 3.9931740614334476e-05, "loss": 5.6816, "step": 8439 }, { "epoch": 2.8805460750853245, "grad_norm": 2.56005597114563, "learning_rate": 3.981797497155859e-05, "loss": 5.871, "step": 8440 }, { "epoch": 2.880887372013652, "grad_norm": 2.4481024742126465, "learning_rate": 3.9704209328782706e-05, "loss": 5.8886, "step": 8441 }, { "epoch": 2.8812286689419793, "grad_norm": 2.5519607067108154, "learning_rate": 3.9590443686006824e-05, "loss": 5.7186, "step": 8442 }, { "epoch": 2.881569965870307, "grad_norm": 2.5279147624969482, "learning_rate": 3.947667804323094e-05, "loss": 5.7121, "step": 8443 }, { "epoch": 2.881911262798635, "grad_norm": 2.5419199466705322, "learning_rate": 3.936291240045507e-05, "loss": 6.1733, "step": 8444 }, { "epoch": 2.8822525597269624, "grad_norm": 2.4785492420196533, "learning_rate": 3.924914675767918e-05, "loss": 5.7262, "step": 8445 }, { "epoch": 2.88259385665529, "grad_norm": 2.5293445587158203, "learning_rate": 3.91353811149033e-05, "loss": 6.0911, "step": 8446 }, { "epoch": 2.8829351535836176, "grad_norm": 2.6208319664001465, "learning_rate": 3.902161547212742e-05, "loss": 6.4624, "step": 8447 }, { "epoch": 2.8832764505119455, "grad_norm": 2.5119760036468506, "learning_rate": 3.8907849829351535e-05, "loss": 5.5541, "step": 8448 }, { "epoch": 2.883617747440273, "grad_norm": 2.55000638961792, "learning_rate": 3.879408418657566e-05, "loss": 6.5788, "step": 8449 }, { "epoch": 2.8839590443686007, "grad_norm": 2.545145273208618, "learning_rate": 3.868031854379977e-05, "loss": 6.2904, "step": 8450 }, { "epoch": 2.884300341296928, "grad_norm": 2.5100696086883545, "learning_rate": 3.856655290102389e-05, "loss": 6.0244, "step": 8451 }, { "epoch": 2.884641638225256, "grad_norm": 2.528092384338379, "learning_rate": 3.845278725824801e-05, "loss": 4.7265, "step": 8452 }, { "epoch": 2.884982935153584, "grad_norm": 2.526163339614868, "learning_rate": 3.833902161547213e-05, "loss": 5.983, "step": 8453 }, { "epoch": 2.8853242320819112, "grad_norm": 2.3863651752471924, "learning_rate": 3.8225255972696246e-05, "loss": 4.602, "step": 8454 }, { "epoch": 2.8856655290102387, "grad_norm": 2.4391610622406006, "learning_rate": 3.8111490329920365e-05, "loss": 5.4737, "step": 8455 }, { "epoch": 2.8860068259385665, "grad_norm": 2.584704875946045, "learning_rate": 3.799772468714448e-05, "loss": 5.8695, "step": 8456 }, { "epoch": 2.8863481228668944, "grad_norm": 2.5321240425109863, "learning_rate": 3.78839590443686e-05, "loss": 5.7091, "step": 8457 }, { "epoch": 2.8866894197952218, "grad_norm": 1.8349437713623047, "learning_rate": 3.777019340159272e-05, "loss": 3.3003, "step": 8458 }, { "epoch": 2.8870307167235496, "grad_norm": 2.5653040409088135, "learning_rate": 3.765642775881684e-05, "loss": 6.2374, "step": 8459 }, { "epoch": 2.887372013651877, "grad_norm": 2.696136236190796, "learning_rate": 3.754266211604096e-05, "loss": 5.7614, "step": 8460 }, { "epoch": 2.887713310580205, "grad_norm": 2.485445499420166, "learning_rate": 3.7428896473265076e-05, "loss": 4.7444, "step": 8461 }, { "epoch": 2.8880546075085323, "grad_norm": 2.547309398651123, "learning_rate": 3.7315130830489194e-05, "loss": 6.003, "step": 8462 }, { "epoch": 2.88839590443686, "grad_norm": 2.4875457286834717, "learning_rate": 3.720136518771331e-05, "loss": 6.3583, "step": 8463 }, { "epoch": 2.8887372013651875, "grad_norm": 2.5078489780426025, "learning_rate": 3.708759954493743e-05, "loss": 5.2605, "step": 8464 }, { "epoch": 2.8890784982935154, "grad_norm": 2.540022373199463, "learning_rate": 3.697383390216154e-05, "loss": 6.3982, "step": 8465 }, { "epoch": 2.8894197952218432, "grad_norm": 2.497128963470459, "learning_rate": 3.686006825938567e-05, "loss": 5.7944, "step": 8466 }, { "epoch": 2.8897610921501706, "grad_norm": 2.550382614135742, "learning_rate": 3.674630261660979e-05, "loss": 6.0483, "step": 8467 }, { "epoch": 2.890102389078498, "grad_norm": 2.4876744747161865, "learning_rate": 3.6632536973833905e-05, "loss": 5.8666, "step": 8468 }, { "epoch": 2.890443686006826, "grad_norm": 2.4927735328674316, "learning_rate": 3.6518771331058024e-05, "loss": 5.7912, "step": 8469 }, { "epoch": 2.8907849829351537, "grad_norm": 2.4995381832122803, "learning_rate": 3.6405005688282136e-05, "loss": 5.6446, "step": 8470 }, { "epoch": 2.891126279863481, "grad_norm": 2.485089063644409, "learning_rate": 3.6291240045506254e-05, "loss": 6.2976, "step": 8471 }, { "epoch": 2.891467576791809, "grad_norm": 2.451723337173462, "learning_rate": 3.617747440273038e-05, "loss": 5.6149, "step": 8472 }, { "epoch": 2.8918088737201364, "grad_norm": 2.487278938293457, "learning_rate": 3.60637087599545e-05, "loss": 5.683, "step": 8473 }, { "epoch": 2.8921501706484642, "grad_norm": 2.5790092945098877, "learning_rate": 3.594994311717861e-05, "loss": 5.1775, "step": 8474 }, { "epoch": 2.8924914675767917, "grad_norm": 2.5560343265533447, "learning_rate": 3.583617747440273e-05, "loss": 5.4929, "step": 8475 }, { "epoch": 2.8928327645051195, "grad_norm": 2.5981833934783936, "learning_rate": 3.5722411831626847e-05, "loss": 5.6103, "step": 8476 }, { "epoch": 2.893174061433447, "grad_norm": 2.4823999404907227, "learning_rate": 3.560864618885097e-05, "loss": 5.6562, "step": 8477 }, { "epoch": 2.8935153583617748, "grad_norm": 2.4643754959106445, "learning_rate": 3.549488054607509e-05, "loss": 6.0838, "step": 8478 }, { "epoch": 2.8938566552901026, "grad_norm": 2.5785889625549316, "learning_rate": 3.53811149032992e-05, "loss": 5.6819, "step": 8479 }, { "epoch": 2.89419795221843, "grad_norm": 2.451211452484131, "learning_rate": 3.526734926052332e-05, "loss": 5.2065, "step": 8480 }, { "epoch": 2.8945392491467574, "grad_norm": 2.5396275520324707, "learning_rate": 3.515358361774744e-05, "loss": 6.2585, "step": 8481 }, { "epoch": 2.8948805460750853, "grad_norm": 2.5079259872436523, "learning_rate": 3.503981797497156e-05, "loss": 5.9257, "step": 8482 }, { "epoch": 2.895221843003413, "grad_norm": 2.489826202392578, "learning_rate": 3.492605233219568e-05, "loss": 5.6171, "step": 8483 }, { "epoch": 2.8955631399317405, "grad_norm": 2.556549310684204, "learning_rate": 3.4812286689419794e-05, "loss": 5.8783, "step": 8484 }, { "epoch": 2.8959044368600684, "grad_norm": 2.518484354019165, "learning_rate": 3.469852104664391e-05, "loss": 5.9915, "step": 8485 }, { "epoch": 2.896245733788396, "grad_norm": 2.582871675491333, "learning_rate": 3.458475540386803e-05, "loss": 6.5008, "step": 8486 }, { "epoch": 2.8965870307167236, "grad_norm": 2.5308432579040527, "learning_rate": 3.447098976109215e-05, "loss": 6.2562, "step": 8487 }, { "epoch": 2.896928327645051, "grad_norm": 2.4115796089172363, "learning_rate": 3.435722411831627e-05, "loss": 4.786, "step": 8488 }, { "epoch": 2.897269624573379, "grad_norm": 2.572611093521118, "learning_rate": 3.424345847554039e-05, "loss": 6.1011, "step": 8489 }, { "epoch": 2.8976109215017063, "grad_norm": 2.5578956604003906, "learning_rate": 3.4129692832764505e-05, "loss": 6.097, "step": 8490 }, { "epoch": 2.897952218430034, "grad_norm": 2.5361123085021973, "learning_rate": 3.4015927189988624e-05, "loss": 5.612, "step": 8491 }, { "epoch": 2.898293515358362, "grad_norm": 2.522542715072632, "learning_rate": 3.390216154721274e-05, "loss": 6.287, "step": 8492 }, { "epoch": 2.8986348122866894, "grad_norm": 2.521287202835083, "learning_rate": 3.378839590443686e-05, "loss": 6.448, "step": 8493 }, { "epoch": 2.898976109215017, "grad_norm": 2.5632736682891846, "learning_rate": 3.367463026166098e-05, "loss": 6.7079, "step": 8494 }, { "epoch": 2.8993174061433447, "grad_norm": 2.497307300567627, "learning_rate": 3.35608646188851e-05, "loss": 5.8383, "step": 8495 }, { "epoch": 2.8996587030716725, "grad_norm": 2.5428879261016846, "learning_rate": 3.3447098976109216e-05, "loss": 6.4592, "step": 8496 }, { "epoch": 2.9, "grad_norm": 2.5488977432250977, "learning_rate": 3.3333333333333335e-05, "loss": 5.6038, "step": 8497 }, { "epoch": 2.9003412969283278, "grad_norm": 2.5850279331207275, "learning_rate": 3.3219567690557453e-05, "loss": 6.1336, "step": 8498 }, { "epoch": 2.900682593856655, "grad_norm": 2.4947924613952637, "learning_rate": 3.3105802047781565e-05, "loss": 5.3009, "step": 8499 }, { "epoch": 2.901023890784983, "grad_norm": 2.5952110290527344, "learning_rate": 3.299203640500569e-05, "loss": 5.9782, "step": 8500 }, { "epoch": 2.9013651877133104, "grad_norm": 2.5056798458099365, "learning_rate": 3.287827076222981e-05, "loss": 5.4483, "step": 8501 }, { "epoch": 2.9017064846416383, "grad_norm": 2.5299181938171387, "learning_rate": 3.276450511945393e-05, "loss": 6.3067, "step": 8502 }, { "epoch": 2.9020477815699657, "grad_norm": 2.47225284576416, "learning_rate": 3.2650739476678046e-05, "loss": 5.9558, "step": 8503 }, { "epoch": 2.9023890784982935, "grad_norm": 2.519516944885254, "learning_rate": 3.253697383390216e-05, "loss": 5.8317, "step": 8504 }, { "epoch": 2.9027303754266214, "grad_norm": 2.538496255874634, "learning_rate": 3.2423208191126276e-05, "loss": 6.2876, "step": 8505 }, { "epoch": 2.903071672354949, "grad_norm": 1.7910349369049072, "learning_rate": 3.23094425483504e-05, "loss": 3.0475, "step": 8506 }, { "epoch": 2.903412969283276, "grad_norm": 2.440401077270508, "learning_rate": 3.219567690557452e-05, "loss": 6.2855, "step": 8507 }, { "epoch": 2.903754266211604, "grad_norm": 2.4906156063079834, "learning_rate": 3.208191126279864e-05, "loss": 5.975, "step": 8508 }, { "epoch": 2.904095563139932, "grad_norm": 2.4778618812561035, "learning_rate": 3.196814562002275e-05, "loss": 5.3293, "step": 8509 }, { "epoch": 2.9044368600682593, "grad_norm": 2.5149154663085938, "learning_rate": 3.185437997724687e-05, "loss": 5.6077, "step": 8510 }, { "epoch": 2.904778156996587, "grad_norm": 2.450698137283325, "learning_rate": 3.1740614334470994e-05, "loss": 5.5491, "step": 8511 }, { "epoch": 2.9051194539249146, "grad_norm": 2.482588768005371, "learning_rate": 3.162684869169511e-05, "loss": 4.8993, "step": 8512 }, { "epoch": 2.9054607508532424, "grad_norm": 2.5095643997192383, "learning_rate": 3.151308304891923e-05, "loss": 5.3198, "step": 8513 }, { "epoch": 2.90580204778157, "grad_norm": 2.603271484375, "learning_rate": 3.139931740614334e-05, "loss": 6.0782, "step": 8514 }, { "epoch": 2.9061433447098977, "grad_norm": 2.445197105407715, "learning_rate": 3.128555176336746e-05, "loss": 6.073, "step": 8515 }, { "epoch": 2.906484641638225, "grad_norm": 2.500511407852173, "learning_rate": 3.117178612059158e-05, "loss": 5.7921, "step": 8516 }, { "epoch": 2.906825938566553, "grad_norm": 2.6357011795043945, "learning_rate": 3.10580204778157e-05, "loss": 5.9283, "step": 8517 }, { "epoch": 2.9071672354948808, "grad_norm": 2.5531325340270996, "learning_rate": 3.094425483503982e-05, "loss": 5.7393, "step": 8518 }, { "epoch": 2.907508532423208, "grad_norm": 2.4458229541778564, "learning_rate": 3.0830489192263935e-05, "loss": 5.7482, "step": 8519 }, { "epoch": 2.9078498293515356, "grad_norm": 2.4953360557556152, "learning_rate": 3.0716723549488054e-05, "loss": 5.7151, "step": 8520 }, { "epoch": 2.9081911262798634, "grad_norm": 2.5121448040008545, "learning_rate": 3.060295790671217e-05, "loss": 5.8653, "step": 8521 }, { "epoch": 2.9085324232081913, "grad_norm": 2.5156524181365967, "learning_rate": 3.048919226393629e-05, "loss": 6.4765, "step": 8522 }, { "epoch": 2.9088737201365187, "grad_norm": 2.5591423511505127, "learning_rate": 3.037542662116041e-05, "loss": 6.2486, "step": 8523 }, { "epoch": 2.9092150170648465, "grad_norm": 2.460395336151123, "learning_rate": 3.026166097838453e-05, "loss": 5.5661, "step": 8524 }, { "epoch": 2.909556313993174, "grad_norm": 2.557854413986206, "learning_rate": 3.0147895335608646e-05, "loss": 6.0055, "step": 8525 }, { "epoch": 2.909897610921502, "grad_norm": 2.463956594467163, "learning_rate": 3.0034129692832765e-05, "loss": 5.6952, "step": 8526 }, { "epoch": 2.910238907849829, "grad_norm": 2.577049493789673, "learning_rate": 2.9920364050056883e-05, "loss": 6.4598, "step": 8527 }, { "epoch": 2.910580204778157, "grad_norm": 2.5568654537200928, "learning_rate": 2.9806598407281e-05, "loss": 5.7478, "step": 8528 }, { "epoch": 2.9109215017064844, "grad_norm": 2.576349973678589, "learning_rate": 2.969283276450512e-05, "loss": 6.455, "step": 8529 }, { "epoch": 2.9112627986348123, "grad_norm": 2.497002601623535, "learning_rate": 2.957906712172924e-05, "loss": 5.4758, "step": 8530 }, { "epoch": 2.91160409556314, "grad_norm": 2.4823288917541504, "learning_rate": 2.9465301478953357e-05, "loss": 6.516, "step": 8531 }, { "epoch": 2.9119453924914676, "grad_norm": 2.561558246612549, "learning_rate": 2.9351535836177476e-05, "loss": 6.5232, "step": 8532 }, { "epoch": 2.912286689419795, "grad_norm": 2.498506546020508, "learning_rate": 2.9237770193401594e-05, "loss": 5.9132, "step": 8533 }, { "epoch": 2.912627986348123, "grad_norm": 2.6166279315948486, "learning_rate": 2.912400455062571e-05, "loss": 6.2046, "step": 8534 }, { "epoch": 2.9129692832764507, "grad_norm": 2.5233633518218994, "learning_rate": 2.901023890784983e-05, "loss": 5.4725, "step": 8535 }, { "epoch": 2.913310580204778, "grad_norm": 2.531095266342163, "learning_rate": 2.889647326507395e-05, "loss": 5.3685, "step": 8536 }, { "epoch": 2.913651877133106, "grad_norm": 2.4622771739959717, "learning_rate": 2.8782707622298065e-05, "loss": 6.094, "step": 8537 }, { "epoch": 2.9139931740614333, "grad_norm": 2.830827474594116, "learning_rate": 2.8668941979522186e-05, "loss": 5.0274, "step": 8538 }, { "epoch": 2.914334470989761, "grad_norm": 2.535754442214966, "learning_rate": 2.85551763367463e-05, "loss": 5.6579, "step": 8539 }, { "epoch": 2.9146757679180886, "grad_norm": 2.4490978717803955, "learning_rate": 2.844141069397042e-05, "loss": 5.3953, "step": 8540 }, { "epoch": 2.9150170648464164, "grad_norm": 2.5437428951263428, "learning_rate": 2.8327645051194542e-05, "loss": 5.3498, "step": 8541 }, { "epoch": 2.915358361774744, "grad_norm": 2.509765863418579, "learning_rate": 2.8213879408418657e-05, "loss": 5.8658, "step": 8542 }, { "epoch": 2.9156996587030717, "grad_norm": 2.643817186355591, "learning_rate": 2.8100113765642776e-05, "loss": 4.4102, "step": 8543 }, { "epoch": 2.9160409556313995, "grad_norm": 2.594874858856201, "learning_rate": 2.7986348122866894e-05, "loss": 5.9731, "step": 8544 }, { "epoch": 2.916382252559727, "grad_norm": 2.5506644248962402, "learning_rate": 2.7872582480091013e-05, "loss": 5.838, "step": 8545 }, { "epoch": 2.9167235494880543, "grad_norm": 2.4979164600372314, "learning_rate": 2.7758816837315134e-05, "loss": 6.2731, "step": 8546 }, { "epoch": 2.917064846416382, "grad_norm": 2.5016138553619385, "learning_rate": 2.764505119453925e-05, "loss": 6.0537, "step": 8547 }, { "epoch": 2.91740614334471, "grad_norm": 2.495339870452881, "learning_rate": 2.7531285551763368e-05, "loss": 6.1774, "step": 8548 }, { "epoch": 2.9177474402730375, "grad_norm": 2.524669647216797, "learning_rate": 2.7417519908987487e-05, "loss": 5.435, "step": 8549 }, { "epoch": 2.9180887372013653, "grad_norm": 2.4896841049194336, "learning_rate": 2.7303754266211605e-05, "loss": 5.6336, "step": 8550 }, { "epoch": 2.9184300341296927, "grad_norm": 2.673529624938965, "learning_rate": 2.7189988623435724e-05, "loss": 5.4314, "step": 8551 }, { "epoch": 2.9187713310580206, "grad_norm": 2.626699447631836, "learning_rate": 2.7076222980659842e-05, "loss": 5.5602, "step": 8552 }, { "epoch": 2.919112627986348, "grad_norm": 2.520334243774414, "learning_rate": 2.696245733788396e-05, "loss": 5.9856, "step": 8553 }, { "epoch": 2.919453924914676, "grad_norm": 2.5466573238372803, "learning_rate": 2.6848691695108076e-05, "loss": 5.8577, "step": 8554 }, { "epoch": 2.919795221843003, "grad_norm": 2.380843162536621, "learning_rate": 2.6734926052332198e-05, "loss": 6.0584, "step": 8555 }, { "epoch": 2.920136518771331, "grad_norm": 2.4532737731933594, "learning_rate": 2.6621160409556316e-05, "loss": 5.8358, "step": 8556 }, { "epoch": 2.920477815699659, "grad_norm": 2.493149518966675, "learning_rate": 2.650739476678043e-05, "loss": 6.2587, "step": 8557 }, { "epoch": 2.9208191126279863, "grad_norm": 2.247823476791382, "learning_rate": 2.6393629124004553e-05, "loss": 4.0832, "step": 8558 }, { "epoch": 2.9211604095563137, "grad_norm": 2.4827280044555664, "learning_rate": 2.6279863481228668e-05, "loss": 5.8317, "step": 8559 }, { "epoch": 2.9215017064846416, "grad_norm": 2.504302978515625, "learning_rate": 2.6166097838452787e-05, "loss": 5.7784, "step": 8560 }, { "epoch": 2.9218430034129694, "grad_norm": 2.515334367752075, "learning_rate": 2.6052332195676905e-05, "loss": 5.533, "step": 8561 }, { "epoch": 2.922184300341297, "grad_norm": 2.5521583557128906, "learning_rate": 2.5938566552901024e-05, "loss": 5.6752, "step": 8562 }, { "epoch": 2.9225255972696247, "grad_norm": 2.538944959640503, "learning_rate": 2.5824800910125145e-05, "loss": 6.2666, "step": 8563 }, { "epoch": 2.922866894197952, "grad_norm": 5.985457897186279, "learning_rate": 2.571103526734926e-05, "loss": 4.8984, "step": 8564 }, { "epoch": 2.92320819112628, "grad_norm": 2.5538158416748047, "learning_rate": 2.559726962457338e-05, "loss": 5.5896, "step": 8565 }, { "epoch": 2.9235494880546073, "grad_norm": 2.5504369735717773, "learning_rate": 2.5483503981797498e-05, "loss": 5.352, "step": 8566 }, { "epoch": 2.923890784982935, "grad_norm": 2.536086082458496, "learning_rate": 2.5369738339021616e-05, "loss": 5.7882, "step": 8567 }, { "epoch": 2.9242320819112626, "grad_norm": 2.487410068511963, "learning_rate": 2.5255972696245735e-05, "loss": 6.1777, "step": 8568 }, { "epoch": 2.9245733788395905, "grad_norm": 2.55271577835083, "learning_rate": 2.5142207053469853e-05, "loss": 6.6132, "step": 8569 }, { "epoch": 2.9249146757679183, "grad_norm": 2.5541188716888428, "learning_rate": 2.502844141069397e-05, "loss": 4.3692, "step": 8570 }, { "epoch": 2.9252559726962457, "grad_norm": 2.487154006958008, "learning_rate": 2.4914675767918087e-05, "loss": 6.1641, "step": 8571 }, { "epoch": 2.925597269624573, "grad_norm": 2.4862101078033447, "learning_rate": 2.480091012514221e-05, "loss": 6.3252, "step": 8572 }, { "epoch": 2.925938566552901, "grad_norm": 2.580897569656372, "learning_rate": 2.4687144482366327e-05, "loss": 5.0885, "step": 8573 }, { "epoch": 2.926279863481229, "grad_norm": 2.546022653579712, "learning_rate": 2.4573378839590442e-05, "loss": 5.7194, "step": 8574 }, { "epoch": 2.926621160409556, "grad_norm": 2.55588436126709, "learning_rate": 2.4459613196814564e-05, "loss": 6.2847, "step": 8575 }, { "epoch": 2.926962457337884, "grad_norm": 2.4706380367279053, "learning_rate": 2.434584755403868e-05, "loss": 5.9013, "step": 8576 }, { "epoch": 2.9273037542662115, "grad_norm": 2.4694294929504395, "learning_rate": 2.4232081911262798e-05, "loss": 5.854, "step": 8577 }, { "epoch": 2.9276450511945393, "grad_norm": 2.516496181488037, "learning_rate": 2.411831626848692e-05, "loss": 5.9697, "step": 8578 }, { "epoch": 2.9279863481228667, "grad_norm": 2.545330762863159, "learning_rate": 2.4004550625711035e-05, "loss": 6.2448, "step": 8579 }, { "epoch": 2.9283276450511946, "grad_norm": 2.4520492553710938, "learning_rate": 2.3890784982935157e-05, "loss": 5.3888, "step": 8580 }, { "epoch": 2.928668941979522, "grad_norm": 2.5857083797454834, "learning_rate": 2.377701934015927e-05, "loss": 6.2331, "step": 8581 }, { "epoch": 2.92901023890785, "grad_norm": 2.9561941623687744, "learning_rate": 2.366325369738339e-05, "loss": 4.9944, "step": 8582 }, { "epoch": 2.9293515358361777, "grad_norm": 2.5174217224121094, "learning_rate": 2.3549488054607512e-05, "loss": 5.897, "step": 8583 }, { "epoch": 2.929692832764505, "grad_norm": 2.585947275161743, "learning_rate": 2.3435722411831627e-05, "loss": 6.0279, "step": 8584 }, { "epoch": 2.9300341296928325, "grad_norm": 2.439552068710327, "learning_rate": 2.3321956769055746e-05, "loss": 5.9078, "step": 8585 }, { "epoch": 2.9303754266211604, "grad_norm": 2.4478914737701416, "learning_rate": 2.3208191126279864e-05, "loss": 5.6497, "step": 8586 }, { "epoch": 2.930716723549488, "grad_norm": 2.537717342376709, "learning_rate": 2.3094425483503983e-05, "loss": 6.0121, "step": 8587 }, { "epoch": 2.9310580204778156, "grad_norm": 2.4616639614105225, "learning_rate": 2.2980659840728098e-05, "loss": 5.9547, "step": 8588 }, { "epoch": 2.9313993174061435, "grad_norm": 2.514845371246338, "learning_rate": 2.286689419795222e-05, "loss": 6.3048, "step": 8589 }, { "epoch": 2.931740614334471, "grad_norm": 2.4787206649780273, "learning_rate": 2.2753128555176338e-05, "loss": 6.2414, "step": 8590 }, { "epoch": 2.9320819112627987, "grad_norm": 2.4855761528015137, "learning_rate": 2.2639362912400453e-05, "loss": 6.3916, "step": 8591 }, { "epoch": 2.932423208191126, "grad_norm": 2.6569809913635254, "learning_rate": 2.2525597269624575e-05, "loss": 6.2394, "step": 8592 }, { "epoch": 2.932764505119454, "grad_norm": 2.4555177688598633, "learning_rate": 2.241183162684869e-05, "loss": 5.8789, "step": 8593 }, { "epoch": 2.9331058020477814, "grad_norm": 2.477505683898926, "learning_rate": 2.2298065984072812e-05, "loss": 5.7146, "step": 8594 }, { "epoch": 2.9334470989761092, "grad_norm": 2.5200958251953125, "learning_rate": 2.218430034129693e-05, "loss": 6.0025, "step": 8595 }, { "epoch": 2.933788395904437, "grad_norm": 2.5263843536376953, "learning_rate": 2.2070534698521046e-05, "loss": 6.2893, "step": 8596 }, { "epoch": 2.9341296928327645, "grad_norm": 2.552253007888794, "learning_rate": 2.1956769055745168e-05, "loss": 6.0527, "step": 8597 }, { "epoch": 2.934470989761092, "grad_norm": 2.494657039642334, "learning_rate": 2.1843003412969283e-05, "loss": 5.55, "step": 8598 }, { "epoch": 2.9348122866894197, "grad_norm": 2.446608543395996, "learning_rate": 2.17292377701934e-05, "loss": 5.6856, "step": 8599 }, { "epoch": 2.9351535836177476, "grad_norm": 2.4764108657836914, "learning_rate": 2.1615472127417523e-05, "loss": 6.2355, "step": 8600 }, { "epoch": 2.935494880546075, "grad_norm": 2.570817232131958, "learning_rate": 2.1501706484641638e-05, "loss": 6.3309, "step": 8601 }, { "epoch": 2.935836177474403, "grad_norm": 2.5633604526519775, "learning_rate": 2.1387940841865757e-05, "loss": 5.7691, "step": 8602 }, { "epoch": 2.9361774744027302, "grad_norm": 2.5183494091033936, "learning_rate": 2.1274175199089875e-05, "loss": 4.9273, "step": 8603 }, { "epoch": 2.936518771331058, "grad_norm": 2.4383068084716797, "learning_rate": 2.1160409556313994e-05, "loss": 5.0008, "step": 8604 }, { "epoch": 2.9368600682593855, "grad_norm": 2.5003600120544434, "learning_rate": 2.1046643913538112e-05, "loss": 5.857, "step": 8605 }, { "epoch": 2.9372013651877134, "grad_norm": 2.480581283569336, "learning_rate": 2.093287827076223e-05, "loss": 5.1716, "step": 8606 }, { "epoch": 2.9375426621160408, "grad_norm": 2.540728807449341, "learning_rate": 2.081911262798635e-05, "loss": 6.2848, "step": 8607 }, { "epoch": 2.9378839590443686, "grad_norm": 2.5028791427612305, "learning_rate": 2.0705346985210464e-05, "loss": 5.7391, "step": 8608 }, { "epoch": 2.9382252559726965, "grad_norm": 2.4789538383483887, "learning_rate": 2.0591581342434586e-05, "loss": 5.223, "step": 8609 }, { "epoch": 2.938566552901024, "grad_norm": 2.4967267513275146, "learning_rate": 2.04778156996587e-05, "loss": 5.668, "step": 8610 }, { "epoch": 2.9389078498293513, "grad_norm": 2.519948959350586, "learning_rate": 2.0364050056882823e-05, "loss": 5.9026, "step": 8611 }, { "epoch": 2.939249146757679, "grad_norm": 2.4573259353637695, "learning_rate": 2.025028441410694e-05, "loss": 5.8025, "step": 8612 }, { "epoch": 2.939590443686007, "grad_norm": 2.5251667499542236, "learning_rate": 2.0136518771331057e-05, "loss": 5.5212, "step": 8613 }, { "epoch": 2.9399317406143344, "grad_norm": 2.6210274696350098, "learning_rate": 2.002275312855518e-05, "loss": 5.3951, "step": 8614 }, { "epoch": 2.9402730375426622, "grad_norm": 2.532759189605713, "learning_rate": 1.9908987485779294e-05, "loss": 5.797, "step": 8615 }, { "epoch": 2.9406143344709896, "grad_norm": 2.4565927982330322, "learning_rate": 1.9795221843003412e-05, "loss": 5.4728, "step": 8616 }, { "epoch": 2.9409556313993175, "grad_norm": 2.4736921787261963, "learning_rate": 1.9681456200227534e-05, "loss": 5.9357, "step": 8617 }, { "epoch": 2.941296928327645, "grad_norm": 2.5852224826812744, "learning_rate": 1.956769055745165e-05, "loss": 4.7119, "step": 8618 }, { "epoch": 2.9416382252559727, "grad_norm": 2.5384979248046875, "learning_rate": 1.9453924914675768e-05, "loss": 5.4497, "step": 8619 }, { "epoch": 2.9419795221843, "grad_norm": 2.4566867351531982, "learning_rate": 1.9340159271899886e-05, "loss": 5.7871, "step": 8620 }, { "epoch": 2.942320819112628, "grad_norm": 2.4984500408172607, "learning_rate": 1.9226393629124005e-05, "loss": 5.9535, "step": 8621 }, { "epoch": 2.942662116040956, "grad_norm": 2.4898228645324707, "learning_rate": 1.9112627986348123e-05, "loss": 5.7868, "step": 8622 }, { "epoch": 2.9430034129692833, "grad_norm": 2.410717248916626, "learning_rate": 1.899886234357224e-05, "loss": 4.692, "step": 8623 }, { "epoch": 2.9433447098976107, "grad_norm": 2.5031991004943848, "learning_rate": 1.888509670079636e-05, "loss": 6.0732, "step": 8624 }, { "epoch": 2.9436860068259385, "grad_norm": 2.5316109657287598, "learning_rate": 1.877133105802048e-05, "loss": 5.5196, "step": 8625 }, { "epoch": 2.9440273037542664, "grad_norm": 2.44270658493042, "learning_rate": 1.8657565415244597e-05, "loss": 5.973, "step": 8626 }, { "epoch": 2.9443686006825938, "grad_norm": 2.500929355621338, "learning_rate": 1.8543799772468716e-05, "loss": 6.2362, "step": 8627 }, { "epoch": 2.9447098976109216, "grad_norm": 2.4967660903930664, "learning_rate": 1.8430034129692834e-05, "loss": 6.0688, "step": 8628 }, { "epoch": 2.945051194539249, "grad_norm": 2.551168441772461, "learning_rate": 1.8316268486916953e-05, "loss": 6.4435, "step": 8629 }, { "epoch": 2.945392491467577, "grad_norm": 2.529707431793213, "learning_rate": 1.8202502844141068e-05, "loss": 6.0493, "step": 8630 }, { "epoch": 2.9457337883959043, "grad_norm": 2.394730567932129, "learning_rate": 1.808873720136519e-05, "loss": 5.1711, "step": 8631 }, { "epoch": 2.946075085324232, "grad_norm": 2.5961592197418213, "learning_rate": 1.7974971558589305e-05, "loss": 6.0267, "step": 8632 }, { "epoch": 2.9464163822525595, "grad_norm": 2.4679746627807617, "learning_rate": 1.7861205915813423e-05, "loss": 5.6721, "step": 8633 }, { "epoch": 2.9467576791808874, "grad_norm": 2.5363996028900146, "learning_rate": 1.7747440273037545e-05, "loss": 5.6486, "step": 8634 }, { "epoch": 2.9470989761092152, "grad_norm": 2.4979100227355957, "learning_rate": 1.763367463026166e-05, "loss": 6.2355, "step": 8635 }, { "epoch": 2.9474402730375426, "grad_norm": 2.5005037784576416, "learning_rate": 1.751990898748578e-05, "loss": 6.0746, "step": 8636 }, { "epoch": 2.94778156996587, "grad_norm": 2.6224281787872314, "learning_rate": 1.7406143344709897e-05, "loss": 6.1915, "step": 8637 }, { "epoch": 2.948122866894198, "grad_norm": 2.4762070178985596, "learning_rate": 1.7292377701934016e-05, "loss": 5.9801, "step": 8638 }, { "epoch": 2.9484641638225257, "grad_norm": 2.5673601627349854, "learning_rate": 1.7178612059158134e-05, "loss": 5.5908, "step": 8639 }, { "epoch": 2.948805460750853, "grad_norm": 2.461416482925415, "learning_rate": 1.7064846416382253e-05, "loss": 5.2207, "step": 8640 }, { "epoch": 2.949146757679181, "grad_norm": 2.4866373538970947, "learning_rate": 1.695108077360637e-05, "loss": 6.422, "step": 8641 }, { "epoch": 2.9494880546075084, "grad_norm": 2.6868081092834473, "learning_rate": 1.683731513083049e-05, "loss": 4.5656, "step": 8642 }, { "epoch": 2.9498293515358363, "grad_norm": 2.542024612426758, "learning_rate": 1.6723549488054608e-05, "loss": 5.2073, "step": 8643 }, { "epoch": 2.9501706484641637, "grad_norm": 2.4783406257629395, "learning_rate": 1.6609783845278727e-05, "loss": 5.8559, "step": 8644 }, { "epoch": 2.9505119453924915, "grad_norm": 2.5044283866882324, "learning_rate": 1.6496018202502845e-05, "loss": 6.2763, "step": 8645 }, { "epoch": 2.950853242320819, "grad_norm": 2.472458600997925, "learning_rate": 1.6382252559726964e-05, "loss": 6.0363, "step": 8646 }, { "epoch": 2.9511945392491468, "grad_norm": 2.4711015224456787, "learning_rate": 1.626848691695108e-05, "loss": 5.4181, "step": 8647 }, { "epoch": 2.9515358361774746, "grad_norm": 2.5318753719329834, "learning_rate": 1.61547212741752e-05, "loss": 6.4278, "step": 8648 }, { "epoch": 2.951877133105802, "grad_norm": 2.4760727882385254, "learning_rate": 1.604095563139932e-05, "loss": 6.1243, "step": 8649 }, { "epoch": 2.9522184300341294, "grad_norm": 2.4930100440979004, "learning_rate": 1.5927189988623434e-05, "loss": 5.0605, "step": 8650 }, { "epoch": 2.9525597269624573, "grad_norm": 2.477997064590454, "learning_rate": 1.5813424345847556e-05, "loss": 6.0705, "step": 8651 }, { "epoch": 2.952901023890785, "grad_norm": 2.1888606548309326, "learning_rate": 1.569965870307167e-05, "loss": 4.6192, "step": 8652 }, { "epoch": 2.9532423208191125, "grad_norm": 2.3284385204315186, "learning_rate": 1.558589306029579e-05, "loss": 4.0104, "step": 8653 }, { "epoch": 2.9535836177474404, "grad_norm": 2.4472768306732178, "learning_rate": 1.547212741751991e-05, "loss": 5.7057, "step": 8654 }, { "epoch": 2.953924914675768, "grad_norm": 2.4223828315734863, "learning_rate": 1.5358361774744027e-05, "loss": 5.7879, "step": 8655 }, { "epoch": 2.9542662116040956, "grad_norm": 2.5395631790161133, "learning_rate": 1.5244596131968145e-05, "loss": 5.7686, "step": 8656 }, { "epoch": 2.954607508532423, "grad_norm": 2.530900239944458, "learning_rate": 1.5130830489192265e-05, "loss": 4.8828, "step": 8657 }, { "epoch": 2.954948805460751, "grad_norm": 2.5554354190826416, "learning_rate": 1.5017064846416382e-05, "loss": 6.6185, "step": 8658 }, { "epoch": 2.9552901023890783, "grad_norm": 2.492499828338623, "learning_rate": 1.49032992036405e-05, "loss": 5.1967, "step": 8659 }, { "epoch": 2.955631399317406, "grad_norm": 2.4863228797912598, "learning_rate": 1.478953356086462e-05, "loss": 5.9918, "step": 8660 }, { "epoch": 2.955972696245734, "grad_norm": 2.4008748531341553, "learning_rate": 1.4675767918088738e-05, "loss": 5.112, "step": 8661 }, { "epoch": 2.9563139931740614, "grad_norm": 2.5058634281158447, "learning_rate": 1.4562002275312855e-05, "loss": 5.9349, "step": 8662 }, { "epoch": 2.956655290102389, "grad_norm": 2.5977463722229004, "learning_rate": 1.4448236632536975e-05, "loss": 5.3876, "step": 8663 }, { "epoch": 2.9569965870307167, "grad_norm": 1.7508560419082642, "learning_rate": 1.4334470989761093e-05, "loss": 2.9253, "step": 8664 }, { "epoch": 2.9573378839590445, "grad_norm": 2.433396816253662, "learning_rate": 1.422070534698521e-05, "loss": 6.2839, "step": 8665 }, { "epoch": 2.957679180887372, "grad_norm": 2.559898614883423, "learning_rate": 1.4106939704209329e-05, "loss": 5.909, "step": 8666 }, { "epoch": 2.9580204778156998, "grad_norm": 2.5637035369873047, "learning_rate": 1.3993174061433447e-05, "loss": 5.9518, "step": 8667 }, { "epoch": 2.958361774744027, "grad_norm": 2.566162586212158, "learning_rate": 1.3879408418657567e-05, "loss": 6.0647, "step": 8668 }, { "epoch": 2.958703071672355, "grad_norm": 2.502931594848633, "learning_rate": 1.3765642775881684e-05, "loss": 4.3806, "step": 8669 }, { "epoch": 2.9590443686006824, "grad_norm": 2.453782320022583, "learning_rate": 1.3651877133105803e-05, "loss": 5.6523, "step": 8670 }, { "epoch": 2.9593856655290103, "grad_norm": 2.585204601287842, "learning_rate": 1.3538111490329921e-05, "loss": 5.9991, "step": 8671 }, { "epoch": 2.9597269624573377, "grad_norm": 2.713543176651001, "learning_rate": 1.3424345847554038e-05, "loss": 4.8614, "step": 8672 }, { "epoch": 2.9600682593856655, "grad_norm": 2.511810541152954, "learning_rate": 1.3310580204778158e-05, "loss": 6.274, "step": 8673 }, { "epoch": 2.9604095563139934, "grad_norm": 2.4611916542053223, "learning_rate": 1.3196814562002277e-05, "loss": 5.9661, "step": 8674 }, { "epoch": 2.960750853242321, "grad_norm": 2.5248091220855713, "learning_rate": 1.3083048919226393e-05, "loss": 6.2989, "step": 8675 }, { "epoch": 2.961092150170648, "grad_norm": 2.4829485416412354, "learning_rate": 1.2969283276450512e-05, "loss": 5.8782, "step": 8676 }, { "epoch": 2.961433447098976, "grad_norm": 2.432086229324341, "learning_rate": 1.285551763367463e-05, "loss": 5.5988, "step": 8677 }, { "epoch": 2.961774744027304, "grad_norm": 2.426537036895752, "learning_rate": 1.2741751990898749e-05, "loss": 4.3375, "step": 8678 }, { "epoch": 2.9621160409556313, "grad_norm": 2.4175045490264893, "learning_rate": 1.2627986348122867e-05, "loss": 5.8644, "step": 8679 }, { "epoch": 2.962457337883959, "grad_norm": 2.5056276321411133, "learning_rate": 1.2514220705346986e-05, "loss": 5.4849, "step": 8680 }, { "epoch": 2.9627986348122866, "grad_norm": 2.4787650108337402, "learning_rate": 1.2400455062571104e-05, "loss": 5.944, "step": 8681 }, { "epoch": 2.9631399317406144, "grad_norm": 2.4695487022399902, "learning_rate": 1.2286689419795221e-05, "loss": 5.8351, "step": 8682 }, { "epoch": 2.963481228668942, "grad_norm": 2.573592185974121, "learning_rate": 1.217292377701934e-05, "loss": 5.9925, "step": 8683 }, { "epoch": 2.9638225255972697, "grad_norm": 2.236001968383789, "learning_rate": 1.205915813424346e-05, "loss": 4.7678, "step": 8684 }, { "epoch": 2.964163822525597, "grad_norm": 2.475437641143799, "learning_rate": 1.1945392491467578e-05, "loss": 6.0264, "step": 8685 }, { "epoch": 2.964505119453925, "grad_norm": 2.467595100402832, "learning_rate": 1.1831626848691695e-05, "loss": 5.2569, "step": 8686 }, { "epoch": 2.9648464163822528, "grad_norm": 2.4960720539093018, "learning_rate": 1.1717861205915814e-05, "loss": 5.4893, "step": 8687 }, { "epoch": 2.96518771331058, "grad_norm": 2.579531192779541, "learning_rate": 1.1604095563139932e-05, "loss": 6.2595, "step": 8688 }, { "epoch": 2.9655290102389076, "grad_norm": 2.4547293186187744, "learning_rate": 1.1490329920364049e-05, "loss": 6.4029, "step": 8689 }, { "epoch": 2.9658703071672354, "grad_norm": 2.6123619079589844, "learning_rate": 1.1376564277588169e-05, "loss": 5.7366, "step": 8690 }, { "epoch": 2.9662116040955633, "grad_norm": 2.4635937213897705, "learning_rate": 1.1262798634812288e-05, "loss": 5.4398, "step": 8691 }, { "epoch": 2.9665529010238907, "grad_norm": 2.44295597076416, "learning_rate": 1.1149032992036406e-05, "loss": 5.6055, "step": 8692 }, { "epoch": 2.9668941979522185, "grad_norm": 2.5429694652557373, "learning_rate": 1.1035267349260523e-05, "loss": 5.9528, "step": 8693 }, { "epoch": 2.967235494880546, "grad_norm": 2.481144428253174, "learning_rate": 1.0921501706484641e-05, "loss": 6.0664, "step": 8694 }, { "epoch": 2.967576791808874, "grad_norm": 2.5129120349884033, "learning_rate": 1.0807736063708762e-05, "loss": 5.1419, "step": 8695 }, { "epoch": 2.967918088737201, "grad_norm": 2.522629737854004, "learning_rate": 1.0693970420932878e-05, "loss": 6.0011, "step": 8696 }, { "epoch": 2.968259385665529, "grad_norm": 2.5712890625, "learning_rate": 1.0580204778156997e-05, "loss": 5.9975, "step": 8697 }, { "epoch": 2.9686006825938565, "grad_norm": 2.445847511291504, "learning_rate": 1.0466439135381115e-05, "loss": 5.2381, "step": 8698 }, { "epoch": 2.9689419795221843, "grad_norm": 2.4890472888946533, "learning_rate": 1.0352673492605232e-05, "loss": 6.3443, "step": 8699 }, { "epoch": 2.969283276450512, "grad_norm": 2.519037961959839, "learning_rate": 1.023890784982935e-05, "loss": 4.9883, "step": 8700 }, { "epoch": 2.9696245733788396, "grad_norm": 2.496644973754883, "learning_rate": 1.012514220705347e-05, "loss": 5.4765, "step": 8701 }, { "epoch": 2.969965870307167, "grad_norm": 2.4772489070892334, "learning_rate": 1.001137656427759e-05, "loss": 6.0936, "step": 8702 }, { "epoch": 2.970307167235495, "grad_norm": 2.4817209243774414, "learning_rate": 9.897610921501706e-06, "loss": 5.833, "step": 8703 }, { "epoch": 2.9706484641638227, "grad_norm": 2.488058567047119, "learning_rate": 9.783845278725825e-06, "loss": 6.3268, "step": 8704 }, { "epoch": 2.97098976109215, "grad_norm": 2.6241326332092285, "learning_rate": 9.670079635949943e-06, "loss": 6.1281, "step": 8705 }, { "epoch": 2.971331058020478, "grad_norm": 2.4947798252105713, "learning_rate": 9.556313993174062e-06, "loss": 6.1815, "step": 8706 }, { "epoch": 2.9716723549488053, "grad_norm": 2.588179349899292, "learning_rate": 9.44254835039818e-06, "loss": 6.1517, "step": 8707 }, { "epoch": 2.972013651877133, "grad_norm": 2.5421693325042725, "learning_rate": 9.328782707622299e-06, "loss": 5.874, "step": 8708 }, { "epoch": 2.972354948805461, "grad_norm": 2.4261326789855957, "learning_rate": 9.215017064846417e-06, "loss": 5.1937, "step": 8709 }, { "epoch": 2.9726962457337884, "grad_norm": 4.1984076499938965, "learning_rate": 9.101251422070534e-06, "loss": 4.4323, "step": 8710 }, { "epoch": 2.973037542662116, "grad_norm": 2.5183022022247314, "learning_rate": 8.987485779294652e-06, "loss": 6.3584, "step": 8711 }, { "epoch": 2.9733788395904437, "grad_norm": 2.593282699584961, "learning_rate": 8.873720136518773e-06, "loss": 6.1431, "step": 8712 }, { "epoch": 2.9737201365187715, "grad_norm": 2.50671124458313, "learning_rate": 8.75995449374289e-06, "loss": 5.9708, "step": 8713 }, { "epoch": 2.974061433447099, "grad_norm": 2.5852482318878174, "learning_rate": 8.646188850967008e-06, "loss": 6.1701, "step": 8714 }, { "epoch": 2.9744027303754264, "grad_norm": 2.4211649894714355, "learning_rate": 8.532423208191126e-06, "loss": 5.8361, "step": 8715 }, { "epoch": 2.974744027303754, "grad_norm": 2.5099081993103027, "learning_rate": 8.418657565415245e-06, "loss": 5.9594, "step": 8716 }, { "epoch": 2.975085324232082, "grad_norm": 2.5803756713867188, "learning_rate": 8.304891922639363e-06, "loss": 6.0941, "step": 8717 }, { "epoch": 2.9754266211604095, "grad_norm": 2.493959665298462, "learning_rate": 8.191126279863482e-06, "loss": 6.2058, "step": 8718 }, { "epoch": 2.9757679180887373, "grad_norm": 2.3790855407714844, "learning_rate": 8.0773606370876e-06, "loss": 4.5639, "step": 8719 }, { "epoch": 2.9761092150170647, "grad_norm": 2.463736057281494, "learning_rate": 7.963594994311717e-06, "loss": 5.2897, "step": 8720 }, { "epoch": 2.9764505119453926, "grad_norm": 2.368633270263672, "learning_rate": 7.849829351535836e-06, "loss": 4.8404, "step": 8721 }, { "epoch": 2.9767918088737204, "grad_norm": 2.4608263969421387, "learning_rate": 7.736063708759956e-06, "loss": 6.1187, "step": 8722 }, { "epoch": 2.977133105802048, "grad_norm": 2.5266082286834717, "learning_rate": 7.622298065984073e-06, "loss": 4.6866, "step": 8723 }, { "epoch": 2.9774744027303752, "grad_norm": 2.4819984436035156, "learning_rate": 7.508532423208191e-06, "loss": 5.8985, "step": 8724 }, { "epoch": 2.977815699658703, "grad_norm": 2.534745931625366, "learning_rate": 7.39476678043231e-06, "loss": 6.0181, "step": 8725 }, { "epoch": 2.978156996587031, "grad_norm": 2.522770643234253, "learning_rate": 7.281001137656427e-06, "loss": 4.879, "step": 8726 }, { "epoch": 2.9784982935153583, "grad_norm": 2.298532009124756, "learning_rate": 7.167235494880547e-06, "loss": 4.4423, "step": 8727 }, { "epoch": 2.9788395904436857, "grad_norm": 2.4776766300201416, "learning_rate": 7.053469852104664e-06, "loss": 5.5068, "step": 8728 }, { "epoch": 2.9791808873720136, "grad_norm": 2.411564588546753, "learning_rate": 6.939704209328784e-06, "loss": 5.7158, "step": 8729 }, { "epoch": 2.9795221843003414, "grad_norm": 2.4252982139587402, "learning_rate": 6.825938566552901e-06, "loss": 4.9397, "step": 8730 }, { "epoch": 2.979863481228669, "grad_norm": 2.472968339920044, "learning_rate": 6.712172923777019e-06, "loss": 4.6723, "step": 8731 }, { "epoch": 2.9802047781569967, "grad_norm": 2.463764190673828, "learning_rate": 6.598407281001138e-06, "loss": 5.8935, "step": 8732 }, { "epoch": 2.980546075085324, "grad_norm": 2.4861762523651123, "learning_rate": 6.484641638225256e-06, "loss": 5.6101, "step": 8733 }, { "epoch": 2.980887372013652, "grad_norm": 2.5116894245147705, "learning_rate": 6.370875995449374e-06, "loss": 6.019, "step": 8734 }, { "epoch": 2.98122866894198, "grad_norm": 2.589373826980591, "learning_rate": 6.257110352673493e-06, "loss": 5.3002, "step": 8735 }, { "epoch": 2.981569965870307, "grad_norm": 2.5636868476867676, "learning_rate": 6.1433447098976105e-06, "loss": 6.0956, "step": 8736 }, { "epoch": 2.9819112627986346, "grad_norm": 2.4157183170318604, "learning_rate": 6.02957906712173e-06, "loss": 6.1327, "step": 8737 }, { "epoch": 2.9822525597269625, "grad_norm": 2.5133705139160156, "learning_rate": 5.9158134243458475e-06, "loss": 6.5907, "step": 8738 }, { "epoch": 2.9825938566552903, "grad_norm": 2.6293768882751465, "learning_rate": 5.802047781569966e-06, "loss": 6.3252, "step": 8739 }, { "epoch": 2.9829351535836177, "grad_norm": 2.530759334564209, "learning_rate": 5.6882821387940845e-06, "loss": 5.8731, "step": 8740 }, { "epoch": 2.983276450511945, "grad_norm": 2.5379390716552734, "learning_rate": 5.574516496018203e-06, "loss": 5.9245, "step": 8741 }, { "epoch": 2.983617747440273, "grad_norm": 2.5322394371032715, "learning_rate": 5.460750853242321e-06, "loss": 4.8229, "step": 8742 }, { "epoch": 2.983959044368601, "grad_norm": 2.562993049621582, "learning_rate": 5.346985210466439e-06, "loss": 6.2476, "step": 8743 }, { "epoch": 2.9843003412969282, "grad_norm": 2.5595932006835938, "learning_rate": 5.233219567690558e-06, "loss": 6.0822, "step": 8744 }, { "epoch": 2.984641638225256, "grad_norm": 2.542302131652832, "learning_rate": 5.119453924914675e-06, "loss": 5.6779, "step": 8745 }, { "epoch": 2.9849829351535835, "grad_norm": 2.3574278354644775, "learning_rate": 5.005688282138795e-06, "loss": 5.5255, "step": 8746 }, { "epoch": 2.9853242320819113, "grad_norm": 2.4841222763061523, "learning_rate": 4.891922639362912e-06, "loss": 5.7245, "step": 8747 }, { "epoch": 2.985665529010239, "grad_norm": 2.4711790084838867, "learning_rate": 4.778156996587031e-06, "loss": 6.1008, "step": 8748 }, { "epoch": 2.9860068259385666, "grad_norm": 2.472411632537842, "learning_rate": 4.664391353811149e-06, "loss": 5.9053, "step": 8749 }, { "epoch": 2.986348122866894, "grad_norm": 2.540469169616699, "learning_rate": 4.550625711035267e-06, "loss": 5.4633, "step": 8750 }, { "epoch": 2.986689419795222, "grad_norm": 2.4350697994232178, "learning_rate": 4.436860068259386e-06, "loss": 5.7402, "step": 8751 }, { "epoch": 2.9870307167235497, "grad_norm": 2.4693779945373535, "learning_rate": 4.323094425483504e-06, "loss": 5.52, "step": 8752 }, { "epoch": 2.987372013651877, "grad_norm": 2.453641414642334, "learning_rate": 4.2093287827076224e-06, "loss": 6.0574, "step": 8753 }, { "epoch": 2.9877133105802045, "grad_norm": 2.4429779052734375, "learning_rate": 4.095563139931741e-06, "loss": 5.5031, "step": 8754 }, { "epoch": 2.9880546075085324, "grad_norm": 2.479944944381714, "learning_rate": 3.9817974971558586e-06, "loss": 5.6135, "step": 8755 }, { "epoch": 2.98839590443686, "grad_norm": 2.4810147285461426, "learning_rate": 3.868031854379978e-06, "loss": 6.4582, "step": 8756 }, { "epoch": 2.9887372013651876, "grad_norm": 2.5106160640716553, "learning_rate": 3.7542662116040956e-06, "loss": 6.1153, "step": 8757 }, { "epoch": 2.9890784982935155, "grad_norm": 2.4401440620422363, "learning_rate": 3.6405005688282136e-06, "loss": 5.8295, "step": 8758 }, { "epoch": 2.989419795221843, "grad_norm": 2.520796298980713, "learning_rate": 3.526734926052332e-06, "loss": 6.0053, "step": 8759 }, { "epoch": 2.9897610921501707, "grad_norm": 2.531611204147339, "learning_rate": 3.4129692832764506e-06, "loss": 6.1409, "step": 8760 }, { "epoch": 2.9901023890784986, "grad_norm": 2.4728920459747314, "learning_rate": 3.299203640500569e-06, "loss": 5.6671, "step": 8761 }, { "epoch": 2.990443686006826, "grad_norm": 2.493472099304199, "learning_rate": 3.185437997724687e-06, "loss": 6.0225, "step": 8762 }, { "epoch": 2.9907849829351534, "grad_norm": 2.4957869052886963, "learning_rate": 3.0716723549488053e-06, "loss": 6.2367, "step": 8763 }, { "epoch": 2.9911262798634812, "grad_norm": 2.530949115753174, "learning_rate": 2.9579067121729238e-06, "loss": 5.9412, "step": 8764 }, { "epoch": 2.991467576791809, "grad_norm": 2.5535237789154053, "learning_rate": 2.8441410693970423e-06, "loss": 5.8223, "step": 8765 }, { "epoch": 2.9918088737201365, "grad_norm": 2.5407912731170654, "learning_rate": 2.7303754266211603e-06, "loss": 5.8673, "step": 8766 }, { "epoch": 2.992150170648464, "grad_norm": 2.4849934577941895, "learning_rate": 2.616609783845279e-06, "loss": 6.2063, "step": 8767 }, { "epoch": 2.9924914675767917, "grad_norm": 2.459456443786621, "learning_rate": 2.5028441410693973e-06, "loss": 5.7936, "step": 8768 }, { "epoch": 2.9928327645051196, "grad_norm": 2.41933012008667, "learning_rate": 2.3890784982935154e-06, "loss": 6.1125, "step": 8769 }, { "epoch": 2.993174061433447, "grad_norm": 2.5088438987731934, "learning_rate": 2.2753128555176335e-06, "loss": 5.7709, "step": 8770 }, { "epoch": 2.993515358361775, "grad_norm": 2.481630563735962, "learning_rate": 2.161547212741752e-06, "loss": 5.1395, "step": 8771 }, { "epoch": 2.9938566552901023, "grad_norm": 2.4694602489471436, "learning_rate": 2.0477815699658705e-06, "loss": 6.408, "step": 8772 }, { "epoch": 2.99419795221843, "grad_norm": 2.609255313873291, "learning_rate": 1.934015927189989e-06, "loss": 5.2451, "step": 8773 }, { "epoch": 2.994539249146758, "grad_norm": 2.562502145767212, "learning_rate": 1.8202502844141068e-06, "loss": 6.0563, "step": 8774 }, { "epoch": 2.9948805460750854, "grad_norm": 2.526449203491211, "learning_rate": 1.7064846416382253e-06, "loss": 5.7829, "step": 8775 }, { "epoch": 2.9952218430034128, "grad_norm": 2.5452816486358643, "learning_rate": 1.5927189988623436e-06, "loss": 6.2753, "step": 8776 }, { "epoch": 2.9955631399317406, "grad_norm": 2.4952499866485596, "learning_rate": 1.4789533560864619e-06, "loss": 6.2114, "step": 8777 }, { "epoch": 2.9959044368600685, "grad_norm": 2.254758358001709, "learning_rate": 1.3651877133105802e-06, "loss": 4.1049, "step": 8778 }, { "epoch": 2.996245733788396, "grad_norm": 2.518864870071411, "learning_rate": 1.2514220705346987e-06, "loss": 6.5108, "step": 8779 }, { "epoch": 2.9965870307167233, "grad_norm": 2.4607439041137695, "learning_rate": 1.1376564277588167e-06, "loss": 5.4417, "step": 8780 }, { "epoch": 2.996928327645051, "grad_norm": 2.581373453140259, "learning_rate": 1.0238907849829352e-06, "loss": 5.4841, "step": 8781 }, { "epoch": 2.997269624573379, "grad_norm": 2.3811047077178955, "learning_rate": 9.101251422070534e-07, "loss": 5.2381, "step": 8782 }, { "epoch": 2.9976109215017064, "grad_norm": 2.4670047760009766, "learning_rate": 7.963594994311718e-07, "loss": 6.3373, "step": 8783 }, { "epoch": 2.9979522184300342, "grad_norm": 2.462789297103882, "learning_rate": 6.825938566552901e-07, "loss": 5.4766, "step": 8784 }, { "epoch": 2.9982935153583616, "grad_norm": 2.467465877532959, "learning_rate": 5.688282138794084e-07, "loss": 6.3852, "step": 8785 }, { "epoch": 2.9986348122866895, "grad_norm": 2.4952261447906494, "learning_rate": 4.550625711035267e-07, "loss": 5.9034, "step": 8786 }, { "epoch": 2.9989761092150173, "grad_norm": 2.497133731842041, "learning_rate": 3.4129692832764504e-07, "loss": 5.4161, "step": 8787 }, { "epoch": 2.9993174061433447, "grad_norm": 2.377657890319824, "learning_rate": 2.2753128555176335e-07, "loss": 5.5288, "step": 8788 }, { "epoch": 2.999658703071672, "grad_norm": 2.2658233642578125, "learning_rate": 1.1376564277588168e-07, "loss": 3.8865, "step": 8789 }, { "epoch": 3.0, "grad_norm": 2.476238965988159, "learning_rate": 0.0, "loss": 5.7151, "step": 8790 } ], "logging_steps": 1, "max_steps": 8790, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 120000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 7.53227897518424e+17, "train_batch_size": 1, "trial_name": null, "trial_params": null }