{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 500, "global_step": 14637, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 6.832001093120175e-05, "grad_norm": 36.58007049560547, "learning_rate": 0.0, "loss": 0.8362, "step": 1 }, { "epoch": 0.0001366400218624035, "grad_norm": 36.93478012084961, "learning_rate": 1.5909090909090908e-08, "loss": 0.865, "step": 2 }, { "epoch": 0.00020496003279360525, "grad_norm": 40.00745391845703, "learning_rate": 3.1818181818181816e-08, "loss": 1.096, "step": 3 }, { "epoch": 0.000273280043724807, "grad_norm": 28.330896377563477, "learning_rate": 4.772727272727273e-08, "loss": 1.1825, "step": 4 }, { "epoch": 0.00034160005465600873, "grad_norm": 34.07448196411133, "learning_rate": 6.363636363636363e-08, "loss": 0.9188, "step": 5 }, { "epoch": 0.0004099200655872105, "grad_norm": 30.18312644958496, "learning_rate": 7.954545454545455e-08, "loss": 0.8192, "step": 6 }, { "epoch": 0.00047824007651841227, "grad_norm": 44.98463821411133, "learning_rate": 9.545454545454546e-08, "loss": 0.8966, "step": 7 }, { "epoch": 0.000546560087449614, "grad_norm": 35.56296920776367, "learning_rate": 1.1136363636363635e-07, "loss": 1.0439, "step": 8 }, { "epoch": 0.0006148800983808157, "grad_norm": 28.068071365356445, "learning_rate": 1.2727272727272726e-07, "loss": 0.8223, "step": 9 }, { "epoch": 0.0006832001093120175, "grad_norm": 29.180540084838867, "learning_rate": 1.4318181818181818e-07, "loss": 1.1088, "step": 10 }, { "epoch": 0.0007515201202432192, "grad_norm": 33.89573669433594, "learning_rate": 1.590909090909091e-07, "loss": 0.8412, "step": 11 }, { "epoch": 0.000819840131174421, "grad_norm": 23.96651268005371, "learning_rate": 1.7500000000000002e-07, "loss": 1.1668, "step": 12 }, { "epoch": 0.0008881601421056228, "grad_norm": 31.181114196777344, "learning_rate": 1.909090909090909e-07, "loss": 0.9652, "step": 13 }, { "epoch": 0.0009564801530368245, "grad_norm": 24.59832763671875, "learning_rate": 2.068181818181818e-07, "loss": 0.9777, "step": 14 }, { "epoch": 0.0010248001639680262, "grad_norm": 35.586177825927734, "learning_rate": 2.227272727272727e-07, "loss": 0.9024, "step": 15 }, { "epoch": 0.001093120174899228, "grad_norm": 26.81324577331543, "learning_rate": 2.3863636363636364e-07, "loss": 0.8433, "step": 16 }, { "epoch": 0.0011614401858304297, "grad_norm": 37.967750549316406, "learning_rate": 2.5454545454545453e-07, "loss": 0.7073, "step": 17 }, { "epoch": 0.0012297601967616314, "grad_norm": 31.58544921875, "learning_rate": 2.704545454545454e-07, "loss": 0.9826, "step": 18 }, { "epoch": 0.0012980802076928333, "grad_norm": 22.405118942260742, "learning_rate": 2.8636363636363637e-07, "loss": 1.0106, "step": 19 }, { "epoch": 0.001366400218624035, "grad_norm": 26.9180850982666, "learning_rate": 3.0227272727272726e-07, "loss": 0.9476, "step": 20 }, { "epoch": 0.0014347202295552368, "grad_norm": 21.962631225585938, "learning_rate": 3.181818181818182e-07, "loss": 0.8375, "step": 21 }, { "epoch": 0.0015030402404864385, "grad_norm": 32.034732818603516, "learning_rate": 3.340909090909091e-07, "loss": 0.83, "step": 22 }, { "epoch": 0.0015713602514176401, "grad_norm": 24.20343589782715, "learning_rate": 3.5000000000000004e-07, "loss": 0.8054, "step": 23 }, { "epoch": 0.001639680262348842, "grad_norm": 23.997596740722656, "learning_rate": 3.659090909090909e-07, "loss": 1.0729, "step": 24 }, { "epoch": 0.0017080002732800437, "grad_norm": 17.400592803955078, "learning_rate": 3.818181818181818e-07, "loss": 0.7604, "step": 25 }, { "epoch": 0.0017763202842112455, "grad_norm": 17.517396926879883, "learning_rate": 3.977272727272727e-07, "loss": 0.9263, "step": 26 }, { "epoch": 0.0018446402951424472, "grad_norm": 26.000858306884766, "learning_rate": 4.136363636363636e-07, "loss": 0.8718, "step": 27 }, { "epoch": 0.001912960306073649, "grad_norm": 16.284147262573242, "learning_rate": 4.2954545454545455e-07, "loss": 0.9193, "step": 28 }, { "epoch": 0.0019812803170048505, "grad_norm": 20.71376609802246, "learning_rate": 4.454545454545454e-07, "loss": 0.876, "step": 29 }, { "epoch": 0.0020496003279360524, "grad_norm": 14.203250885009766, "learning_rate": 4.6136363636363633e-07, "loss": 0.9193, "step": 30 }, { "epoch": 0.0021179203388672543, "grad_norm": 15.716347694396973, "learning_rate": 4.772727272727273e-07, "loss": 0.8106, "step": 31 }, { "epoch": 0.002186240349798456, "grad_norm": 12.213006973266602, "learning_rate": 4.931818181818182e-07, "loss": 0.7438, "step": 32 }, { "epoch": 0.0022545603607296576, "grad_norm": 15.103764533996582, "learning_rate": 5.090909090909091e-07, "loss": 0.8208, "step": 33 }, { "epoch": 0.0023228803716608595, "grad_norm": 22.403749465942383, "learning_rate": 5.25e-07, "loss": 0.8922, "step": 34 }, { "epoch": 0.0023912003825920613, "grad_norm": 12.383172988891602, "learning_rate": 5.409090909090908e-07, "loss": 0.8583, "step": 35 }, { "epoch": 0.0024595203935232628, "grad_norm": 16.273120880126953, "learning_rate": 5.568181818181818e-07, "loss": 0.8286, "step": 36 }, { "epoch": 0.0025278404044544647, "grad_norm": 10.227724075317383, "learning_rate": 5.727272727272727e-07, "loss": 0.7545, "step": 37 }, { "epoch": 0.0025961604153856665, "grad_norm": 12.145092010498047, "learning_rate": 5.886363636363636e-07, "loss": 0.8521, "step": 38 }, { "epoch": 0.0026644804263168684, "grad_norm": 11.358644485473633, "learning_rate": 6.045454545454545e-07, "loss": 0.8312, "step": 39 }, { "epoch": 0.00273280043724807, "grad_norm": 11.294305801391602, "learning_rate": 6.204545454545454e-07, "loss": 0.7209, "step": 40 }, { "epoch": 0.0028011204481792717, "grad_norm": 14.755722999572754, "learning_rate": 6.363636363636364e-07, "loss": 0.9238, "step": 41 }, { "epoch": 0.0028694404591104736, "grad_norm": 9.838054656982422, "learning_rate": 6.522727272727273e-07, "loss": 0.6878, "step": 42 }, { "epoch": 0.002937760470041675, "grad_norm": 13.278871536254883, "learning_rate": 6.681818181818182e-07, "loss": 0.839, "step": 43 }, { "epoch": 0.003006080480972877, "grad_norm": 15.418530464172363, "learning_rate": 6.840909090909091e-07, "loss": 0.9537, "step": 44 }, { "epoch": 0.003074400491904079, "grad_norm": 9.19633674621582, "learning_rate": 7.000000000000001e-07, "loss": 0.5763, "step": 45 }, { "epoch": 0.0031427205028352802, "grad_norm": 12.002073287963867, "learning_rate": 7.15909090909091e-07, "loss": 0.8469, "step": 46 }, { "epoch": 0.003211040513766482, "grad_norm": 11.938868522644043, "learning_rate": 7.318181818181818e-07, "loss": 0.7474, "step": 47 }, { "epoch": 0.003279360524697684, "grad_norm": 13.47724437713623, "learning_rate": 7.477272727272726e-07, "loss": 0.856, "step": 48 }, { "epoch": 0.003347680535628886, "grad_norm": 13.090188980102539, "learning_rate": 7.636363636363636e-07, "loss": 0.8851, "step": 49 }, { "epoch": 0.0034160005465600873, "grad_norm": 12.899078369140625, "learning_rate": 7.795454545454545e-07, "loss": 0.6993, "step": 50 }, { "epoch": 0.003484320557491289, "grad_norm": 9.430854797363281, "learning_rate": 7.954545454545454e-07, "loss": 0.6675, "step": 51 }, { "epoch": 0.003552640568422491, "grad_norm": 11.106538772583008, "learning_rate": 8.113636363636363e-07, "loss": 0.7783, "step": 52 }, { "epoch": 0.0036209605793536925, "grad_norm": 11.029132843017578, "learning_rate": 8.272727272727272e-07, "loss": 0.6857, "step": 53 }, { "epoch": 0.0036892805902848944, "grad_norm": 11.463935852050781, "learning_rate": 8.431818181818182e-07, "loss": 0.8074, "step": 54 }, { "epoch": 0.0037576006012160963, "grad_norm": 12.695626258850098, "learning_rate": 8.590909090909091e-07, "loss": 0.8796, "step": 55 }, { "epoch": 0.003825920612147298, "grad_norm": 9.700435638427734, "learning_rate": 8.75e-07, "loss": 0.7001, "step": 56 }, { "epoch": 0.0038942406230784996, "grad_norm": 10.893965721130371, "learning_rate": 8.909090909090908e-07, "loss": 0.7174, "step": 57 }, { "epoch": 0.003962560634009701, "grad_norm": 13.75069808959961, "learning_rate": 9.068181818181818e-07, "loss": 0.8583, "step": 58 }, { "epoch": 0.004030880644940903, "grad_norm": 8.75240707397461, "learning_rate": 9.227272727272727e-07, "loss": 0.7044, "step": 59 }, { "epoch": 0.004099200655872105, "grad_norm": 10.253602027893066, "learning_rate": 9.386363636363637e-07, "loss": 0.6191, "step": 60 }, { "epoch": 0.004167520666803307, "grad_norm": 10.599081039428711, "learning_rate": 9.545454545454546e-07, "loss": 0.7351, "step": 61 }, { "epoch": 0.0042358406777345085, "grad_norm": 10.32392406463623, "learning_rate": 9.704545454545454e-07, "loss": 0.6018, "step": 62 }, { "epoch": 0.00430416068866571, "grad_norm": 8.41724681854248, "learning_rate": 9.863636363636363e-07, "loss": 0.63, "step": 63 }, { "epoch": 0.004372480699596912, "grad_norm": 7.899345397949219, "learning_rate": 1.0022727272727272e-06, "loss": 0.55, "step": 64 }, { "epoch": 0.004440800710528114, "grad_norm": 10.247184753417969, "learning_rate": 1.0181818181818181e-06, "loss": 0.6823, "step": 65 }, { "epoch": 0.004509120721459315, "grad_norm": 9.274835586547852, "learning_rate": 1.0340909090909092e-06, "loss": 0.6885, "step": 66 }, { "epoch": 0.0045774407323905175, "grad_norm": 10.044890403747559, "learning_rate": 1.05e-06, "loss": 0.7762, "step": 67 }, { "epoch": 0.004645760743321719, "grad_norm": 8.833833694458008, "learning_rate": 1.065909090909091e-06, "loss": 0.7173, "step": 68 }, { "epoch": 0.00471408075425292, "grad_norm": 10.072002410888672, "learning_rate": 1.0818181818181817e-06, "loss": 0.6255, "step": 69 }, { "epoch": 0.004782400765184123, "grad_norm": 8.723885536193848, "learning_rate": 1.0977272727272728e-06, "loss": 0.773, "step": 70 }, { "epoch": 0.004850720776115324, "grad_norm": 8.864386558532715, "learning_rate": 1.1136363636363637e-06, "loss": 0.7112, "step": 71 }, { "epoch": 0.0049190407870465256, "grad_norm": 9.213874816894531, "learning_rate": 1.1295454545454546e-06, "loss": 0.7681, "step": 72 }, { "epoch": 0.004987360797977728, "grad_norm": 9.01799201965332, "learning_rate": 1.1454545454545455e-06, "loss": 0.7543, "step": 73 }, { "epoch": 0.005055680808908929, "grad_norm": 8.732536315917969, "learning_rate": 1.1613636363636364e-06, "loss": 0.546, "step": 74 }, { "epoch": 0.005124000819840131, "grad_norm": 9.755199432373047, "learning_rate": 1.1772727272727272e-06, "loss": 0.7861, "step": 75 }, { "epoch": 0.005192320830771333, "grad_norm": 8.743706703186035, "learning_rate": 1.1931818181818181e-06, "loss": 0.7451, "step": 76 }, { "epoch": 0.0052606408417025345, "grad_norm": 13.332551002502441, "learning_rate": 1.209090909090909e-06, "loss": 0.6052, "step": 77 }, { "epoch": 0.005328960852633737, "grad_norm": 10.03634262084961, "learning_rate": 1.225e-06, "loss": 0.5585, "step": 78 }, { "epoch": 0.005397280863564938, "grad_norm": 8.1842622756958, "learning_rate": 1.2409090909090908e-06, "loss": 0.7309, "step": 79 }, { "epoch": 0.00546560087449614, "grad_norm": 11.625762939453125, "learning_rate": 1.2568181818181817e-06, "loss": 0.715, "step": 80 }, { "epoch": 0.005533920885427342, "grad_norm": 8.867816925048828, "learning_rate": 1.2727272727272728e-06, "loss": 0.532, "step": 81 }, { "epoch": 0.0056022408963585435, "grad_norm": 8.077404022216797, "learning_rate": 1.2886363636363635e-06, "loss": 0.7286, "step": 82 }, { "epoch": 0.005670560907289745, "grad_norm": 9.66309928894043, "learning_rate": 1.3045454545454546e-06, "loss": 0.6614, "step": 83 }, { "epoch": 0.005738880918220947, "grad_norm": 8.48843002319336, "learning_rate": 1.3204545454545455e-06, "loss": 0.6381, "step": 84 }, { "epoch": 0.005807200929152149, "grad_norm": 7.770285129547119, "learning_rate": 1.3363636363636364e-06, "loss": 0.5124, "step": 85 }, { "epoch": 0.00587552094008335, "grad_norm": 7.774601936340332, "learning_rate": 1.3522727272727273e-06, "loss": 0.6425, "step": 86 }, { "epoch": 0.005943840951014552, "grad_norm": 7.24495267868042, "learning_rate": 1.3681818181818182e-06, "loss": 0.6521, "step": 87 }, { "epoch": 0.006012160961945754, "grad_norm": 10.000133514404297, "learning_rate": 1.384090909090909e-06, "loss": 0.5476, "step": 88 }, { "epoch": 0.006080480972876955, "grad_norm": 6.464188575744629, "learning_rate": 1.4000000000000001e-06, "loss": 0.688, "step": 89 }, { "epoch": 0.006148800983808158, "grad_norm": 9.1311674118042, "learning_rate": 1.4159090909090908e-06, "loss": 0.5479, "step": 90 }, { "epoch": 0.006217120994739359, "grad_norm": 8.125679969787598, "learning_rate": 1.431818181818182e-06, "loss": 0.6884, "step": 91 }, { "epoch": 0.0062854410056705605, "grad_norm": 7.3780951499938965, "learning_rate": 1.4477272727272726e-06, "loss": 0.5808, "step": 92 }, { "epoch": 0.006353761016601763, "grad_norm": 6.840122699737549, "learning_rate": 1.4636363636363635e-06, "loss": 0.6125, "step": 93 }, { "epoch": 0.006422081027532964, "grad_norm": 5.838550090789795, "learning_rate": 1.4795454545454546e-06, "loss": 0.5201, "step": 94 }, { "epoch": 0.0064904010384641666, "grad_norm": 11.269671440124512, "learning_rate": 1.4954545454545453e-06, "loss": 0.8082, "step": 95 }, { "epoch": 0.006558721049395368, "grad_norm": 6.790184020996094, "learning_rate": 1.5113636363636364e-06, "loss": 0.6535, "step": 96 }, { "epoch": 0.0066270410603265694, "grad_norm": 9.405611038208008, "learning_rate": 1.5272727272727273e-06, "loss": 0.7524, "step": 97 }, { "epoch": 0.006695361071257772, "grad_norm": 8.11466121673584, "learning_rate": 1.5431818181818182e-06, "loss": 0.6267, "step": 98 }, { "epoch": 0.006763681082188973, "grad_norm": 7.093649864196777, "learning_rate": 1.559090909090909e-06, "loss": 0.5565, "step": 99 }, { "epoch": 0.006832001093120175, "grad_norm": 7.089436054229736, "learning_rate": 1.575e-06, "loss": 0.5355, "step": 100 }, { "epoch": 0.006900321104051377, "grad_norm": 8.02022647857666, "learning_rate": 1.5909090909090908e-06, "loss": 0.5753, "step": 101 }, { "epoch": 0.006968641114982578, "grad_norm": 7.949474334716797, "learning_rate": 1.6068181818181817e-06, "loss": 0.647, "step": 102 }, { "epoch": 0.00703696112591378, "grad_norm": 8.906007766723633, "learning_rate": 1.6227272727272726e-06, "loss": 0.7441, "step": 103 }, { "epoch": 0.007105281136844982, "grad_norm": 7.378682613372803, "learning_rate": 1.6386363636363637e-06, "loss": 0.5866, "step": 104 }, { "epoch": 0.007173601147776184, "grad_norm": 6.84084415435791, "learning_rate": 1.6545454545454544e-06, "loss": 0.5784, "step": 105 }, { "epoch": 0.007241921158707385, "grad_norm": 7.051499843597412, "learning_rate": 1.6704545454545455e-06, "loss": 0.6384, "step": 106 }, { "epoch": 0.007310241169638587, "grad_norm": 6.3690643310546875, "learning_rate": 1.6863636363636364e-06, "loss": 0.5628, "step": 107 }, { "epoch": 0.007378561180569789, "grad_norm": 4.996488094329834, "learning_rate": 1.7022727272727273e-06, "loss": 0.4689, "step": 108 }, { "epoch": 0.00744688119150099, "grad_norm": 6.694011211395264, "learning_rate": 1.7181818181818182e-06, "loss": 0.5855, "step": 109 }, { "epoch": 0.0075152012024321925, "grad_norm": 6.764377593994141, "learning_rate": 1.734090909090909e-06, "loss": 0.5025, "step": 110 }, { "epoch": 0.007583521213363394, "grad_norm": 7.904394626617432, "learning_rate": 1.75e-06, "loss": 0.5856, "step": 111 }, { "epoch": 0.007651841224294596, "grad_norm": 8.832837104797363, "learning_rate": 1.7659090909090909e-06, "loss": 0.6813, "step": 112 }, { "epoch": 0.007720161235225798, "grad_norm": 9.258819580078125, "learning_rate": 1.7818181818181815e-06, "loss": 0.5749, "step": 113 }, { "epoch": 0.007788481246156999, "grad_norm": 9.696954727172852, "learning_rate": 1.7977272727272729e-06, "loss": 0.59, "step": 114 }, { "epoch": 0.007856801257088201, "grad_norm": 7.1866912841796875, "learning_rate": 1.8136363636363635e-06, "loss": 0.5677, "step": 115 }, { "epoch": 0.007925121268019402, "grad_norm": 5.40134859085083, "learning_rate": 1.8295454545454544e-06, "loss": 0.5642, "step": 116 }, { "epoch": 0.007993441278950604, "grad_norm": 8.890814781188965, "learning_rate": 1.8454545454545453e-06, "loss": 0.6474, "step": 117 }, { "epoch": 0.008061761289881807, "grad_norm": 7.66494083404541, "learning_rate": 1.8613636363636364e-06, "loss": 0.6218, "step": 118 }, { "epoch": 0.008130081300813009, "grad_norm": 8.882439613342285, "learning_rate": 1.8772727272727273e-06, "loss": 0.6575, "step": 119 }, { "epoch": 0.00819840131174421, "grad_norm": 7.533927917480469, "learning_rate": 1.8931818181818182e-06, "loss": 0.5976, "step": 120 }, { "epoch": 0.008266721322675412, "grad_norm": 6.338353633880615, "learning_rate": 1.909090909090909e-06, "loss": 0.5973, "step": 121 }, { "epoch": 0.008335041333606614, "grad_norm": 5.925466537475586, "learning_rate": 1.925e-06, "loss": 0.541, "step": 122 }, { "epoch": 0.008403361344537815, "grad_norm": 7.382513523101807, "learning_rate": 1.940909090909091e-06, "loss": 0.6297, "step": 123 }, { "epoch": 0.008471681355469017, "grad_norm": 8.834944725036621, "learning_rate": 1.9568181818181816e-06, "loss": 0.6564, "step": 124 }, { "epoch": 0.00854000136640022, "grad_norm": 8.232640266418457, "learning_rate": 1.9727272727272727e-06, "loss": 0.608, "step": 125 }, { "epoch": 0.00860832137733142, "grad_norm": 7.354604244232178, "learning_rate": 1.9886363636363638e-06, "loss": 0.6809, "step": 126 }, { "epoch": 0.008676641388262622, "grad_norm": 7.305105686187744, "learning_rate": 2.0045454545454544e-06, "loss": 0.5604, "step": 127 }, { "epoch": 0.008744961399193825, "grad_norm": 7.434475421905518, "learning_rate": 2.0204545454545456e-06, "loss": 0.6117, "step": 128 }, { "epoch": 0.008813281410125025, "grad_norm": 9.79948902130127, "learning_rate": 2.0363636363636362e-06, "loss": 0.669, "step": 129 }, { "epoch": 0.008881601421056227, "grad_norm": 7.924960613250732, "learning_rate": 2.052272727272727e-06, "loss": 0.5732, "step": 130 }, { "epoch": 0.00894992143198743, "grad_norm": 8.48389720916748, "learning_rate": 2.0681818181818184e-06, "loss": 0.7374, "step": 131 }, { "epoch": 0.00901824144291863, "grad_norm": 5.079216003417969, "learning_rate": 2.084090909090909e-06, "loss": 0.5207, "step": 132 }, { "epoch": 0.009086561453849833, "grad_norm": 8.688608169555664, "learning_rate": 2.1e-06, "loss": 0.6149, "step": 133 }, { "epoch": 0.009154881464781035, "grad_norm": 9.01906967163086, "learning_rate": 2.115909090909091e-06, "loss": 0.5405, "step": 134 }, { "epoch": 0.009223201475712236, "grad_norm": 7.433481216430664, "learning_rate": 2.131818181818182e-06, "loss": 0.6455, "step": 135 }, { "epoch": 0.009291521486643438, "grad_norm": 8.22209358215332, "learning_rate": 2.1477272727272727e-06, "loss": 0.6189, "step": 136 }, { "epoch": 0.00935984149757464, "grad_norm": 10.465771675109863, "learning_rate": 2.1636363636363634e-06, "loss": 0.5756, "step": 137 }, { "epoch": 0.00942816150850584, "grad_norm": 7.3151535987854, "learning_rate": 2.1795454545454545e-06, "loss": 0.6373, "step": 138 }, { "epoch": 0.009496481519437043, "grad_norm": 9.227588653564453, "learning_rate": 2.1954545454545456e-06, "loss": 0.6666, "step": 139 }, { "epoch": 0.009564801530368245, "grad_norm": 8.500102996826172, "learning_rate": 2.2113636363636363e-06, "loss": 0.6647, "step": 140 }, { "epoch": 0.009633121541299446, "grad_norm": 7.618299961090088, "learning_rate": 2.2272727272727274e-06, "loss": 0.5754, "step": 141 }, { "epoch": 0.009701441552230648, "grad_norm": 8.965676307678223, "learning_rate": 2.243181818181818e-06, "loss": 0.6071, "step": 142 }, { "epoch": 0.00976976156316185, "grad_norm": 8.329572677612305, "learning_rate": 2.259090909090909e-06, "loss": 0.5467, "step": 143 }, { "epoch": 0.009838081574093051, "grad_norm": 8.797358512878418, "learning_rate": 2.2750000000000002e-06, "loss": 0.6713, "step": 144 }, { "epoch": 0.009906401585024253, "grad_norm": 7.541782379150391, "learning_rate": 2.290909090909091e-06, "loss": 0.6274, "step": 145 }, { "epoch": 0.009974721595955456, "grad_norm": 7.168302536010742, "learning_rate": 2.3068181818181816e-06, "loss": 0.6404, "step": 146 }, { "epoch": 0.010043041606886656, "grad_norm": 6.507270336151123, "learning_rate": 2.3227272727272727e-06, "loss": 0.5817, "step": 147 }, { "epoch": 0.010111361617817859, "grad_norm": 11.363602638244629, "learning_rate": 2.338636363636364e-06, "loss": 0.5239, "step": 148 }, { "epoch": 0.010179681628749061, "grad_norm": 7.827622413635254, "learning_rate": 2.3545454545454545e-06, "loss": 0.6427, "step": 149 }, { "epoch": 0.010248001639680262, "grad_norm": 5.668514251708984, "learning_rate": 2.370454545454545e-06, "loss": 0.5067, "step": 150 }, { "epoch": 0.010316321650611464, "grad_norm": 8.072701454162598, "learning_rate": 2.3863636363636363e-06, "loss": 0.6921, "step": 151 }, { "epoch": 0.010384641661542666, "grad_norm": 6.909970283508301, "learning_rate": 2.4022727272727274e-06, "loss": 0.662, "step": 152 }, { "epoch": 0.010452961672473868, "grad_norm": 8.209244728088379, "learning_rate": 2.418181818181818e-06, "loss": 0.6879, "step": 153 }, { "epoch": 0.010521281683405069, "grad_norm": 7.2487473487854, "learning_rate": 2.434090909090909e-06, "loss": 0.5651, "step": 154 }, { "epoch": 0.010589601694336271, "grad_norm": 8.918998718261719, "learning_rate": 2.45e-06, "loss": 0.6284, "step": 155 }, { "epoch": 0.010657921705267474, "grad_norm": 6.427109241485596, "learning_rate": 2.465909090909091e-06, "loss": 0.5363, "step": 156 }, { "epoch": 0.010726241716198674, "grad_norm": 5.722228050231934, "learning_rate": 2.4818181818181816e-06, "loss": 0.5665, "step": 157 }, { "epoch": 0.010794561727129877, "grad_norm": 6.41756534576416, "learning_rate": 2.4977272727272727e-06, "loss": 0.5752, "step": 158 }, { "epoch": 0.010862881738061079, "grad_norm": 7.787707328796387, "learning_rate": 2.5136363636363634e-06, "loss": 0.4937, "step": 159 }, { "epoch": 0.01093120174899228, "grad_norm": 7.4017486572265625, "learning_rate": 2.5295454545454545e-06, "loss": 0.49, "step": 160 }, { "epoch": 0.010999521759923482, "grad_norm": 7.0198798179626465, "learning_rate": 2.5454545454545456e-06, "loss": 0.5213, "step": 161 }, { "epoch": 0.011067841770854684, "grad_norm": 9.523085594177246, "learning_rate": 2.5613636363636363e-06, "loss": 0.5812, "step": 162 }, { "epoch": 0.011136161781785885, "grad_norm": 6.750725269317627, "learning_rate": 2.577272727272727e-06, "loss": 0.6561, "step": 163 }, { "epoch": 0.011204481792717087, "grad_norm": 7.737731456756592, "learning_rate": 2.5931818181818185e-06, "loss": 0.4538, "step": 164 }, { "epoch": 0.01127280180364829, "grad_norm": 9.571794509887695, "learning_rate": 2.609090909090909e-06, "loss": 0.549, "step": 165 }, { "epoch": 0.01134112181457949, "grad_norm": 8.7518949508667, "learning_rate": 2.625e-06, "loss": 0.5858, "step": 166 }, { "epoch": 0.011409441825510692, "grad_norm": 8.849173545837402, "learning_rate": 2.640909090909091e-06, "loss": 0.5432, "step": 167 }, { "epoch": 0.011477761836441894, "grad_norm": 9.372940063476562, "learning_rate": 2.6568181818181816e-06, "loss": 0.5465, "step": 168 }, { "epoch": 0.011546081847373095, "grad_norm": 7.32110071182251, "learning_rate": 2.6727272727272727e-06, "loss": 0.6102, "step": 169 }, { "epoch": 0.011614401858304297, "grad_norm": 7.903537273406982, "learning_rate": 2.6886363636363634e-06, "loss": 0.578, "step": 170 }, { "epoch": 0.0116827218692355, "grad_norm": 6.897369384765625, "learning_rate": 2.7045454545454545e-06, "loss": 0.6182, "step": 171 }, { "epoch": 0.0117510418801667, "grad_norm": 7.274548053741455, "learning_rate": 2.720454545454545e-06, "loss": 0.5486, "step": 172 }, { "epoch": 0.011819361891097903, "grad_norm": 6.1495747566223145, "learning_rate": 2.7363636363636363e-06, "loss": 0.5478, "step": 173 }, { "epoch": 0.011887681902029105, "grad_norm": 7.658591270446777, "learning_rate": 2.7522727272727274e-06, "loss": 0.5165, "step": 174 }, { "epoch": 0.011956001912960305, "grad_norm": 6.560210704803467, "learning_rate": 2.768181818181818e-06, "loss": 0.6197, "step": 175 }, { "epoch": 0.012024321923891508, "grad_norm": 9.655133247375488, "learning_rate": 2.7840909090909088e-06, "loss": 0.4733, "step": 176 }, { "epoch": 0.01209264193482271, "grad_norm": 7.138974666595459, "learning_rate": 2.8000000000000003e-06, "loss": 0.4839, "step": 177 }, { "epoch": 0.01216096194575391, "grad_norm": 5.409519195556641, "learning_rate": 2.815909090909091e-06, "loss": 0.4582, "step": 178 }, { "epoch": 0.012229281956685113, "grad_norm": 6.488499641418457, "learning_rate": 2.8318181818181817e-06, "loss": 0.5765, "step": 179 }, { "epoch": 0.012297601967616315, "grad_norm": 7.464823246002197, "learning_rate": 2.8477272727272728e-06, "loss": 0.5505, "step": 180 }, { "epoch": 0.012365921978547516, "grad_norm": 8.91684627532959, "learning_rate": 2.863636363636364e-06, "loss": 0.4451, "step": 181 }, { "epoch": 0.012434241989478718, "grad_norm": 6.261149883270264, "learning_rate": 2.8795454545454545e-06, "loss": 0.5467, "step": 182 }, { "epoch": 0.01250256200040992, "grad_norm": 5.99649715423584, "learning_rate": 2.8954545454545452e-06, "loss": 0.5147, "step": 183 }, { "epoch": 0.012570882011341121, "grad_norm": 10.005566596984863, "learning_rate": 2.9113636363636363e-06, "loss": 0.5656, "step": 184 }, { "epoch": 0.012639202022272323, "grad_norm": 9.63338851928711, "learning_rate": 2.927272727272727e-06, "loss": 0.5735, "step": 185 }, { "epoch": 0.012707522033203526, "grad_norm": 8.47904109954834, "learning_rate": 2.943181818181818e-06, "loss": 0.5935, "step": 186 }, { "epoch": 0.012775842044134728, "grad_norm": 7.638238430023193, "learning_rate": 2.9590909090909092e-06, "loss": 0.5654, "step": 187 }, { "epoch": 0.012844162055065928, "grad_norm": 8.30197811126709, "learning_rate": 2.975e-06, "loss": 0.5934, "step": 188 }, { "epoch": 0.01291248206599713, "grad_norm": 6.633927822113037, "learning_rate": 2.9909090909090906e-06, "loss": 0.4611, "step": 189 }, { "epoch": 0.012980802076928333, "grad_norm": 8.135661125183105, "learning_rate": 3.006818181818182e-06, "loss": 0.5838, "step": 190 }, { "epoch": 0.013049122087859534, "grad_norm": 7.756171703338623, "learning_rate": 3.0227272727272728e-06, "loss": 0.572, "step": 191 }, { "epoch": 0.013117442098790736, "grad_norm": 7.228518486022949, "learning_rate": 3.0386363636363635e-06, "loss": 0.5848, "step": 192 }, { "epoch": 0.013185762109721938, "grad_norm": 7.257087707519531, "learning_rate": 3.0545454545454546e-06, "loss": 0.5745, "step": 193 }, { "epoch": 0.013254082120653139, "grad_norm": 6.890860080718994, "learning_rate": 3.0704545454545457e-06, "loss": 0.5425, "step": 194 }, { "epoch": 0.013322402131584341, "grad_norm": 7.1974897384643555, "learning_rate": 3.0863636363636363e-06, "loss": 0.517, "step": 195 }, { "epoch": 0.013390722142515544, "grad_norm": 6.580018043518066, "learning_rate": 3.102272727272727e-06, "loss": 0.4672, "step": 196 }, { "epoch": 0.013459042153446744, "grad_norm": 6.480654716491699, "learning_rate": 3.118181818181818e-06, "loss": 0.4491, "step": 197 }, { "epoch": 0.013527362164377946, "grad_norm": 8.120529174804688, "learning_rate": 3.1340909090909092e-06, "loss": 0.498, "step": 198 }, { "epoch": 0.013595682175309149, "grad_norm": 8.14588451385498, "learning_rate": 3.15e-06, "loss": 0.5309, "step": 199 }, { "epoch": 0.01366400218624035, "grad_norm": 7.58510684967041, "learning_rate": 3.165909090909091e-06, "loss": 0.4363, "step": 200 }, { "epoch": 0.013732322197171552, "grad_norm": 6.469640731811523, "learning_rate": 3.1818181818181817e-06, "loss": 0.5246, "step": 201 }, { "epoch": 0.013800642208102754, "grad_norm": 9.609189987182617, "learning_rate": 3.197727272727273e-06, "loss": 0.6549, "step": 202 }, { "epoch": 0.013868962219033954, "grad_norm": 6.30001163482666, "learning_rate": 3.2136363636363635e-06, "loss": 0.4726, "step": 203 }, { "epoch": 0.013937282229965157, "grad_norm": 6.232417106628418, "learning_rate": 3.2295454545454546e-06, "loss": 0.5303, "step": 204 }, { "epoch": 0.014005602240896359, "grad_norm": 6.759830951690674, "learning_rate": 3.2454545454545453e-06, "loss": 0.6089, "step": 205 }, { "epoch": 0.01407392225182756, "grad_norm": 6.918146133422852, "learning_rate": 3.261363636363636e-06, "loss": 0.4654, "step": 206 }, { "epoch": 0.014142242262758762, "grad_norm": 7.54303503036499, "learning_rate": 3.2772727272727275e-06, "loss": 0.5547, "step": 207 }, { "epoch": 0.014210562273689964, "grad_norm": 9.646990776062012, "learning_rate": 3.293181818181818e-06, "loss": 0.4405, "step": 208 }, { "epoch": 0.014278882284621165, "grad_norm": 7.51863431930542, "learning_rate": 3.309090909090909e-06, "loss": 0.6304, "step": 209 }, { "epoch": 0.014347202295552367, "grad_norm": 7.673823356628418, "learning_rate": 3.325e-06, "loss": 0.5985, "step": 210 }, { "epoch": 0.01441552230648357, "grad_norm": 6.263634204864502, "learning_rate": 3.340909090909091e-06, "loss": 0.5207, "step": 211 }, { "epoch": 0.01448384231741477, "grad_norm": 7.83951473236084, "learning_rate": 3.3568181818181817e-06, "loss": 0.5488, "step": 212 }, { "epoch": 0.014552162328345972, "grad_norm": 7.823199272155762, "learning_rate": 3.372727272727273e-06, "loss": 0.5973, "step": 213 }, { "epoch": 0.014620482339277175, "grad_norm": 6.921258926391602, "learning_rate": 3.3886363636363635e-06, "loss": 0.4633, "step": 214 }, { "epoch": 0.014688802350208375, "grad_norm": 7.005030632019043, "learning_rate": 3.4045454545454546e-06, "loss": 0.4768, "step": 215 }, { "epoch": 0.014757122361139578, "grad_norm": 7.640456676483154, "learning_rate": 3.4204545454545453e-06, "loss": 0.6123, "step": 216 }, { "epoch": 0.01482544237207078, "grad_norm": 6.770887851715088, "learning_rate": 3.4363636363636364e-06, "loss": 0.4404, "step": 217 }, { "epoch": 0.01489376238300198, "grad_norm": 6.398966312408447, "learning_rate": 3.452272727272727e-06, "loss": 0.4916, "step": 218 }, { "epoch": 0.014962082393933183, "grad_norm": 7.183138847351074, "learning_rate": 3.468181818181818e-06, "loss": 0.5711, "step": 219 }, { "epoch": 0.015030402404864385, "grad_norm": 7.152739524841309, "learning_rate": 3.4840909090909093e-06, "loss": 0.7335, "step": 220 }, { "epoch": 0.015098722415795587, "grad_norm": 7.15789794921875, "learning_rate": 3.5e-06, "loss": 0.5376, "step": 221 }, { "epoch": 0.015167042426726788, "grad_norm": 5.758776664733887, "learning_rate": 3.515909090909091e-06, "loss": 0.4242, "step": 222 }, { "epoch": 0.01523536243765799, "grad_norm": 6.688355922698975, "learning_rate": 3.5318181818181817e-06, "loss": 0.4895, "step": 223 }, { "epoch": 0.015303682448589193, "grad_norm": 6.7115302085876465, "learning_rate": 3.547727272727273e-06, "loss": 0.5954, "step": 224 }, { "epoch": 0.015372002459520393, "grad_norm": 7.735559463500977, "learning_rate": 3.563636363636363e-06, "loss": 0.5505, "step": 225 }, { "epoch": 0.015440322470451595, "grad_norm": 4.957777976989746, "learning_rate": 3.5795454545454546e-06, "loss": 0.4231, "step": 226 }, { "epoch": 0.015508642481382798, "grad_norm": 5.049996852874756, "learning_rate": 3.5954545454545457e-06, "loss": 0.5796, "step": 227 }, { "epoch": 0.015576962492313998, "grad_norm": 7.716152667999268, "learning_rate": 3.611363636363636e-06, "loss": 0.5587, "step": 228 }, { "epoch": 0.0156452825032452, "grad_norm": 7.488137722015381, "learning_rate": 3.627272727272727e-06, "loss": 0.5201, "step": 229 }, { "epoch": 0.015713602514176403, "grad_norm": 8.49866008758545, "learning_rate": 3.6431818181818186e-06, "loss": 0.6749, "step": 230 }, { "epoch": 0.015781922525107604, "grad_norm": 7.4102678298950195, "learning_rate": 3.659090909090909e-06, "loss": 0.527, "step": 231 }, { "epoch": 0.015850242536038804, "grad_norm": 7.628543853759766, "learning_rate": 3.675e-06, "loss": 0.4837, "step": 232 }, { "epoch": 0.015918562546970008, "grad_norm": 5.828675270080566, "learning_rate": 3.6909090909090906e-06, "loss": 0.5166, "step": 233 }, { "epoch": 0.01598688255790121, "grad_norm": 7.7339277267456055, "learning_rate": 3.7068181818181817e-06, "loss": 0.5012, "step": 234 }, { "epoch": 0.01605520256883241, "grad_norm": 8.533296585083008, "learning_rate": 3.722727272727273e-06, "loss": 0.6111, "step": 235 }, { "epoch": 0.016123522579763613, "grad_norm": 7.659719467163086, "learning_rate": 3.7386363636363635e-06, "loss": 0.5212, "step": 236 }, { "epoch": 0.016191842590694814, "grad_norm": 5.3718061447143555, "learning_rate": 3.7545454545454546e-06, "loss": 0.4212, "step": 237 }, { "epoch": 0.016260162601626018, "grad_norm": 7.448660373687744, "learning_rate": 3.7704545454545457e-06, "loss": 0.7442, "step": 238 }, { "epoch": 0.01632848261255722, "grad_norm": 8.796032905578613, "learning_rate": 3.7863636363636364e-06, "loss": 0.5596, "step": 239 }, { "epoch": 0.01639680262348842, "grad_norm": 6.005701065063477, "learning_rate": 3.8022727272727275e-06, "loss": 0.4851, "step": 240 }, { "epoch": 0.016465122634419623, "grad_norm": 6.545197486877441, "learning_rate": 3.818181818181818e-06, "loss": 0.5305, "step": 241 }, { "epoch": 0.016533442645350824, "grad_norm": 5.962306976318359, "learning_rate": 3.834090909090909e-06, "loss": 0.4349, "step": 242 }, { "epoch": 0.016601762656282024, "grad_norm": 7.6940598487854, "learning_rate": 3.85e-06, "loss": 0.5185, "step": 243 }, { "epoch": 0.01667008266721323, "grad_norm": 6.077400207519531, "learning_rate": 3.865909090909091e-06, "loss": 0.4881, "step": 244 }, { "epoch": 0.01673840267814443, "grad_norm": 7.312815189361572, "learning_rate": 3.881818181818182e-06, "loss": 0.5522, "step": 245 }, { "epoch": 0.01680672268907563, "grad_norm": 6.205748558044434, "learning_rate": 3.8977272727272724e-06, "loss": 0.4727, "step": 246 }, { "epoch": 0.016875042700006834, "grad_norm": 7.1866655349731445, "learning_rate": 3.913636363636363e-06, "loss": 0.5356, "step": 247 }, { "epoch": 0.016943362710938034, "grad_norm": 9.025790214538574, "learning_rate": 3.929545454545455e-06, "loss": 0.4761, "step": 248 }, { "epoch": 0.017011682721869235, "grad_norm": 8.576367378234863, "learning_rate": 3.945454545454545e-06, "loss": 0.6165, "step": 249 }, { "epoch": 0.01708000273280044, "grad_norm": 8.201793670654297, "learning_rate": 3.961363636363636e-06, "loss": 0.5695, "step": 250 }, { "epoch": 0.01714832274373164, "grad_norm": 7.257275104522705, "learning_rate": 3.9772727272727275e-06, "loss": 0.4966, "step": 251 }, { "epoch": 0.01721664275466284, "grad_norm": 8.970799446105957, "learning_rate": 3.993181818181818e-06, "loss": 0.4842, "step": 252 }, { "epoch": 0.017284962765594044, "grad_norm": 5.2523112297058105, "learning_rate": 4.009090909090909e-06, "loss": 0.4707, "step": 253 }, { "epoch": 0.017353282776525245, "grad_norm": 6.089587211608887, "learning_rate": 4.025e-06, "loss": 0.4971, "step": 254 }, { "epoch": 0.017421602787456445, "grad_norm": 8.941908836364746, "learning_rate": 4.040909090909091e-06, "loss": 0.6463, "step": 255 }, { "epoch": 0.01748992279838765, "grad_norm": 5.734358787536621, "learning_rate": 4.056818181818182e-06, "loss": 0.5282, "step": 256 }, { "epoch": 0.01755824280931885, "grad_norm": 6.922497749328613, "learning_rate": 4.0727272727272725e-06, "loss": 0.4426, "step": 257 }, { "epoch": 0.01762656282025005, "grad_norm": 8.990668296813965, "learning_rate": 4.088636363636364e-06, "loss": 0.5363, "step": 258 }, { "epoch": 0.017694882831181254, "grad_norm": 6.576870918273926, "learning_rate": 4.104545454545454e-06, "loss": 0.4973, "step": 259 }, { "epoch": 0.017763202842112455, "grad_norm": 6.364839553833008, "learning_rate": 4.120454545454545e-06, "loss": 0.5464, "step": 260 }, { "epoch": 0.017831522853043656, "grad_norm": 8.618924140930176, "learning_rate": 4.136363636363637e-06, "loss": 0.5868, "step": 261 }, { "epoch": 0.01789984286397486, "grad_norm": 7.81609582901001, "learning_rate": 4.152272727272727e-06, "loss": 0.6217, "step": 262 }, { "epoch": 0.01796816287490606, "grad_norm": 8.685025215148926, "learning_rate": 4.168181818181818e-06, "loss": 0.6367, "step": 263 }, { "epoch": 0.01803648288583726, "grad_norm": 9.122568130493164, "learning_rate": 4.18409090909091e-06, "loss": 0.493, "step": 264 }, { "epoch": 0.018104802896768465, "grad_norm": 7.2865190505981445, "learning_rate": 4.2e-06, "loss": 0.4984, "step": 265 }, { "epoch": 0.018173122907699665, "grad_norm": 6.506542682647705, "learning_rate": 4.215909090909091e-06, "loss": 0.5341, "step": 266 }, { "epoch": 0.018241442918630866, "grad_norm": 8.12984848022461, "learning_rate": 4.231818181818182e-06, "loss": 0.5127, "step": 267 }, { "epoch": 0.01830976292956207, "grad_norm": 7.668753147125244, "learning_rate": 4.2477272727272725e-06, "loss": 0.5845, "step": 268 }, { "epoch": 0.01837808294049327, "grad_norm": 7.267555236816406, "learning_rate": 4.263636363636364e-06, "loss": 0.6774, "step": 269 }, { "epoch": 0.01844640295142447, "grad_norm": 7.745560646057129, "learning_rate": 4.279545454545454e-06, "loss": 0.583, "step": 270 }, { "epoch": 0.018514722962355675, "grad_norm": 8.75478744506836, "learning_rate": 4.295454545454545e-06, "loss": 0.5715, "step": 271 }, { "epoch": 0.018583042973286876, "grad_norm": 4.88132905960083, "learning_rate": 4.311363636363637e-06, "loss": 0.4166, "step": 272 }, { "epoch": 0.018651362984218076, "grad_norm": 6.273959636688232, "learning_rate": 4.327272727272727e-06, "loss": 0.5103, "step": 273 }, { "epoch": 0.01871968299514928, "grad_norm": 7.880096912384033, "learning_rate": 4.343181818181818e-06, "loss": 0.522, "step": 274 }, { "epoch": 0.01878800300608048, "grad_norm": 8.485411643981934, "learning_rate": 4.359090909090909e-06, "loss": 0.5005, "step": 275 }, { "epoch": 0.01885632301701168, "grad_norm": 7.04388427734375, "learning_rate": 4.375e-06, "loss": 0.5719, "step": 276 }, { "epoch": 0.018924643027942886, "grad_norm": 10.87972354888916, "learning_rate": 4.390909090909091e-06, "loss": 0.5605, "step": 277 }, { "epoch": 0.018992963038874086, "grad_norm": 5.3912835121154785, "learning_rate": 4.406818181818182e-06, "loss": 0.4785, "step": 278 }, { "epoch": 0.019061283049805287, "grad_norm": 6.702377796173096, "learning_rate": 4.4227272727272725e-06, "loss": 0.5535, "step": 279 }, { "epoch": 0.01912960306073649, "grad_norm": 5.571341514587402, "learning_rate": 4.438636363636363e-06, "loss": 0.4468, "step": 280 }, { "epoch": 0.01919792307166769, "grad_norm": 6.837368011474609, "learning_rate": 4.454545454545455e-06, "loss": 0.4503, "step": 281 }, { "epoch": 0.019266243082598892, "grad_norm": 6.421364784240723, "learning_rate": 4.470454545454545e-06, "loss": 0.4741, "step": 282 }, { "epoch": 0.019334563093530096, "grad_norm": 6.629866123199463, "learning_rate": 4.486363636363636e-06, "loss": 0.512, "step": 283 }, { "epoch": 0.019402883104461296, "grad_norm": 5.0064778327941895, "learning_rate": 4.502272727272728e-06, "loss": 0.4127, "step": 284 }, { "epoch": 0.019471203115392497, "grad_norm": 7.116041660308838, "learning_rate": 4.518181818181818e-06, "loss": 0.6399, "step": 285 }, { "epoch": 0.0195395231263237, "grad_norm": 7.288229942321777, "learning_rate": 4.534090909090909e-06, "loss": 0.5217, "step": 286 }, { "epoch": 0.0196078431372549, "grad_norm": 7.4444122314453125, "learning_rate": 4.5500000000000005e-06, "loss": 0.55, "step": 287 }, { "epoch": 0.019676163148186102, "grad_norm": 7.031043529510498, "learning_rate": 4.56590909090909e-06, "loss": 0.5481, "step": 288 }, { "epoch": 0.019744483159117306, "grad_norm": 7.14063835144043, "learning_rate": 4.581818181818182e-06, "loss": 0.4872, "step": 289 }, { "epoch": 0.019812803170048507, "grad_norm": 6.410430431365967, "learning_rate": 4.5977272727272725e-06, "loss": 0.5217, "step": 290 }, { "epoch": 0.019881123180979707, "grad_norm": 7.435976982116699, "learning_rate": 4.613636363636363e-06, "loss": 0.5414, "step": 291 }, { "epoch": 0.01994944319191091, "grad_norm": 6.475009918212891, "learning_rate": 4.629545454545455e-06, "loss": 0.5033, "step": 292 }, { "epoch": 0.020017763202842112, "grad_norm": 5.000571250915527, "learning_rate": 4.645454545454545e-06, "loss": 0.4403, "step": 293 }, { "epoch": 0.020086083213773313, "grad_norm": 6.365567684173584, "learning_rate": 4.661363636363636e-06, "loss": 0.5276, "step": 294 }, { "epoch": 0.020154403224704517, "grad_norm": 8.599311828613281, "learning_rate": 4.677272727272728e-06, "loss": 0.659, "step": 295 }, { "epoch": 0.020222723235635717, "grad_norm": 11.27370834350586, "learning_rate": 4.6931818181818174e-06, "loss": 0.5174, "step": 296 }, { "epoch": 0.020291043246566918, "grad_norm": 5.419808864593506, "learning_rate": 4.709090909090909e-06, "loss": 0.4378, "step": 297 }, { "epoch": 0.020359363257498122, "grad_norm": 6.503259658813477, "learning_rate": 4.7250000000000005e-06, "loss": 0.472, "step": 298 }, { "epoch": 0.020427683268429322, "grad_norm": 6.312655448913574, "learning_rate": 4.74090909090909e-06, "loss": 0.4201, "step": 299 }, { "epoch": 0.020496003279360523, "grad_norm": 6.592263221740723, "learning_rate": 4.756818181818182e-06, "loss": 0.5324, "step": 300 }, { "epoch": 0.020564323290291727, "grad_norm": 6.196633815765381, "learning_rate": 4.7727272727272725e-06, "loss": 0.5705, "step": 301 }, { "epoch": 0.020632643301222928, "grad_norm": 6.383309841156006, "learning_rate": 4.788636363636363e-06, "loss": 0.469, "step": 302 }, { "epoch": 0.020700963312154128, "grad_norm": 7.32251501083374, "learning_rate": 4.804545454545455e-06, "loss": 0.5995, "step": 303 }, { "epoch": 0.020769283323085332, "grad_norm": 8.024049758911133, "learning_rate": 4.8204545454545454e-06, "loss": 0.4689, "step": 304 }, { "epoch": 0.020837603334016533, "grad_norm": 6.467672348022461, "learning_rate": 4.836363636363636e-06, "loss": 0.3395, "step": 305 }, { "epoch": 0.020905923344947737, "grad_norm": 6.945488929748535, "learning_rate": 4.852272727272728e-06, "loss": 0.4493, "step": 306 }, { "epoch": 0.020974243355878937, "grad_norm": 5.348641872406006, "learning_rate": 4.868181818181818e-06, "loss": 0.5115, "step": 307 }, { "epoch": 0.021042563366810138, "grad_norm": 6.948513507843018, "learning_rate": 4.884090909090909e-06, "loss": 0.537, "step": 308 }, { "epoch": 0.021110883377741342, "grad_norm": 7.66775369644165, "learning_rate": 4.9e-06, "loss": 0.6202, "step": 309 }, { "epoch": 0.021179203388672543, "grad_norm": 8.163763999938965, "learning_rate": 4.915909090909091e-06, "loss": 0.5203, "step": 310 }, { "epoch": 0.021247523399603743, "grad_norm": 6.4034881591796875, "learning_rate": 4.931818181818182e-06, "loss": 0.4679, "step": 311 }, { "epoch": 0.021315843410534947, "grad_norm": 5.995136260986328, "learning_rate": 4.9477272727272726e-06, "loss": 0.4815, "step": 312 }, { "epoch": 0.021384163421466148, "grad_norm": 8.552952766418457, "learning_rate": 4.963636363636363e-06, "loss": 0.6291, "step": 313 }, { "epoch": 0.02145248343239735, "grad_norm": 6.181306838989258, "learning_rate": 4.979545454545454e-06, "loss": 0.509, "step": 314 }, { "epoch": 0.021520803443328552, "grad_norm": 7.286202430725098, "learning_rate": 4.9954545454545454e-06, "loss": 0.4738, "step": 315 }, { "epoch": 0.021589123454259753, "grad_norm": 8.103401184082031, "learning_rate": 5.011363636363636e-06, "loss": 0.494, "step": 316 }, { "epoch": 0.021657443465190954, "grad_norm": 7.421688079833984, "learning_rate": 5.027272727272727e-06, "loss": 0.4213, "step": 317 }, { "epoch": 0.021725763476122158, "grad_norm": 5.859961986541748, "learning_rate": 5.043181818181818e-06, "loss": 0.517, "step": 318 }, { "epoch": 0.02179408348705336, "grad_norm": 6.664839744567871, "learning_rate": 5.059090909090909e-06, "loss": 0.4981, "step": 319 }, { "epoch": 0.02186240349798456, "grad_norm": 6.01913595199585, "learning_rate": 5.075e-06, "loss": 0.5305, "step": 320 }, { "epoch": 0.021930723508915763, "grad_norm": 5.679762840270996, "learning_rate": 5.090909090909091e-06, "loss": 0.4823, "step": 321 }, { "epoch": 0.021999043519846963, "grad_norm": 7.274050712585449, "learning_rate": 5.106818181818181e-06, "loss": 0.4228, "step": 322 }, { "epoch": 0.022067363530778164, "grad_norm": 7.57437801361084, "learning_rate": 5.122727272727273e-06, "loss": 0.5226, "step": 323 }, { "epoch": 0.022135683541709368, "grad_norm": 7.07823371887207, "learning_rate": 5.138636363636364e-06, "loss": 0.4829, "step": 324 }, { "epoch": 0.02220400355264057, "grad_norm": 8.874361038208008, "learning_rate": 5.154545454545454e-06, "loss": 0.5303, "step": 325 }, { "epoch": 0.02227232356357177, "grad_norm": 7.775270938873291, "learning_rate": 5.1704545454545455e-06, "loss": 0.4911, "step": 326 }, { "epoch": 0.022340643574502973, "grad_norm": 7.291893482208252, "learning_rate": 5.186363636363637e-06, "loss": 0.6611, "step": 327 }, { "epoch": 0.022408963585434174, "grad_norm": 7.39361572265625, "learning_rate": 5.202272727272727e-06, "loss": 0.5154, "step": 328 }, { "epoch": 0.022477283596365374, "grad_norm": 6.0636091232299805, "learning_rate": 5.218181818181818e-06, "loss": 0.5901, "step": 329 }, { "epoch": 0.02254560360729658, "grad_norm": 6.276981353759766, "learning_rate": 5.234090909090909e-06, "loss": 0.4594, "step": 330 }, { "epoch": 0.02261392361822778, "grad_norm": 9.383177757263184, "learning_rate": 5.25e-06, "loss": 0.5393, "step": 331 }, { "epoch": 0.02268224362915898, "grad_norm": 7.995043754577637, "learning_rate": 5.265909090909091e-06, "loss": 0.5467, "step": 332 }, { "epoch": 0.022750563640090184, "grad_norm": 9.465750694274902, "learning_rate": 5.281818181818182e-06, "loss": 0.6296, "step": 333 }, { "epoch": 0.022818883651021384, "grad_norm": 5.774755477905273, "learning_rate": 5.297727272727273e-06, "loss": 0.4351, "step": 334 }, { "epoch": 0.022887203661952585, "grad_norm": 6.242987632751465, "learning_rate": 5.313636363636363e-06, "loss": 0.4131, "step": 335 }, { "epoch": 0.02295552367288379, "grad_norm": 6.446985244750977, "learning_rate": 5.329545454545455e-06, "loss": 0.5125, "step": 336 }, { "epoch": 0.02302384368381499, "grad_norm": 4.927233695983887, "learning_rate": 5.3454545454545455e-06, "loss": 0.5274, "step": 337 }, { "epoch": 0.02309216369474619, "grad_norm": 8.495304107666016, "learning_rate": 5.361363636363636e-06, "loss": 0.5686, "step": 338 }, { "epoch": 0.023160483705677394, "grad_norm": 7.7036051750183105, "learning_rate": 5.377272727272727e-06, "loss": 0.4927, "step": 339 }, { "epoch": 0.023228803716608595, "grad_norm": 6.4172210693359375, "learning_rate": 5.393181818181818e-06, "loss": 0.4691, "step": 340 }, { "epoch": 0.023297123727539795, "grad_norm": 7.4051594734191895, "learning_rate": 5.409090909090909e-06, "loss": 0.6002, "step": 341 }, { "epoch": 0.023365443738471, "grad_norm": 7.229957103729248, "learning_rate": 5.425e-06, "loss": 0.5459, "step": 342 }, { "epoch": 0.0234337637494022, "grad_norm": 4.95484733581543, "learning_rate": 5.44090909090909e-06, "loss": 0.4136, "step": 343 }, { "epoch": 0.0235020837603334, "grad_norm": 8.557330131530762, "learning_rate": 5.456818181818182e-06, "loss": 0.5567, "step": 344 }, { "epoch": 0.023570403771264604, "grad_norm": 8.463507652282715, "learning_rate": 5.472727272727273e-06, "loss": 0.4638, "step": 345 }, { "epoch": 0.023638723782195805, "grad_norm": 5.759728908538818, "learning_rate": 5.488636363636363e-06, "loss": 0.4948, "step": 346 }, { "epoch": 0.023707043793127006, "grad_norm": 5.898278713226318, "learning_rate": 5.504545454545455e-06, "loss": 0.4848, "step": 347 }, { "epoch": 0.02377536380405821, "grad_norm": 6.890252590179443, "learning_rate": 5.5204545454545455e-06, "loss": 0.5201, "step": 348 }, { "epoch": 0.02384368381498941, "grad_norm": 5.543643951416016, "learning_rate": 5.536363636363636e-06, "loss": 0.461, "step": 349 }, { "epoch": 0.02391200382592061, "grad_norm": 10.402789115905762, "learning_rate": 5.552272727272728e-06, "loss": 0.6394, "step": 350 }, { "epoch": 0.023980323836851815, "grad_norm": 5.347923755645752, "learning_rate": 5.5681818181818175e-06, "loss": 0.4298, "step": 351 }, { "epoch": 0.024048643847783015, "grad_norm": 10.650976181030273, "learning_rate": 5.584090909090909e-06, "loss": 0.5865, "step": 352 }, { "epoch": 0.024116963858714216, "grad_norm": 5.757544040679932, "learning_rate": 5.600000000000001e-06, "loss": 0.4905, "step": 353 }, { "epoch": 0.02418528386964542, "grad_norm": 7.625546932220459, "learning_rate": 5.6159090909090904e-06, "loss": 0.4893, "step": 354 }, { "epoch": 0.02425360388057662, "grad_norm": 6.813776969909668, "learning_rate": 5.631818181818182e-06, "loss": 0.5315, "step": 355 }, { "epoch": 0.02432192389150782, "grad_norm": 6.4341936111450195, "learning_rate": 5.647727272727273e-06, "loss": 0.461, "step": 356 }, { "epoch": 0.024390243902439025, "grad_norm": 5.5120720863342285, "learning_rate": 5.663636363636363e-06, "loss": 0.4407, "step": 357 }, { "epoch": 0.024458563913370226, "grad_norm": 6.558799743652344, "learning_rate": 5.679545454545455e-06, "loss": 0.4923, "step": 358 }, { "epoch": 0.024526883924301426, "grad_norm": 7.892202854156494, "learning_rate": 5.6954545454545455e-06, "loss": 0.4643, "step": 359 }, { "epoch": 0.02459520393523263, "grad_norm": 8.409863471984863, "learning_rate": 5.711363636363636e-06, "loss": 0.5336, "step": 360 }, { "epoch": 0.02466352394616383, "grad_norm": 7.3052167892456055, "learning_rate": 5.727272727272728e-06, "loss": 0.4959, "step": 361 }, { "epoch": 0.02473184395709503, "grad_norm": 7.240612506866455, "learning_rate": 5.7431818181818176e-06, "loss": 0.4934, "step": 362 }, { "epoch": 0.024800163968026236, "grad_norm": 8.023192405700684, "learning_rate": 5.759090909090909e-06, "loss": 0.6121, "step": 363 }, { "epoch": 0.024868483978957436, "grad_norm": 6.793002128601074, "learning_rate": 5.775e-06, "loss": 0.4973, "step": 364 }, { "epoch": 0.024936803989888637, "grad_norm": 5.915679454803467, "learning_rate": 5.7909090909090904e-06, "loss": 0.4601, "step": 365 }, { "epoch": 0.02500512400081984, "grad_norm": 5.898402214050293, "learning_rate": 5.806818181818182e-06, "loss": 0.5794, "step": 366 }, { "epoch": 0.02507344401175104, "grad_norm": 6.738814830780029, "learning_rate": 5.822727272727273e-06, "loss": 0.4895, "step": 367 }, { "epoch": 0.025141764022682242, "grad_norm": 5.767965316772461, "learning_rate": 5.838636363636363e-06, "loss": 0.4703, "step": 368 }, { "epoch": 0.025210084033613446, "grad_norm": 6.485166072845459, "learning_rate": 5.854545454545454e-06, "loss": 0.554, "step": 369 }, { "epoch": 0.025278404044544647, "grad_norm": 6.8699774742126465, "learning_rate": 5.8704545454545455e-06, "loss": 0.5274, "step": 370 }, { "epoch": 0.025346724055475847, "grad_norm": 6.004184722900391, "learning_rate": 5.886363636363636e-06, "loss": 0.3683, "step": 371 }, { "epoch": 0.02541504406640705, "grad_norm": 6.8236470222473145, "learning_rate": 5.902272727272727e-06, "loss": 0.4893, "step": 372 }, { "epoch": 0.025483364077338252, "grad_norm": 6.84896183013916, "learning_rate": 5.9181818181818184e-06, "loss": 0.4709, "step": 373 }, { "epoch": 0.025551684088269456, "grad_norm": 8.804109573364258, "learning_rate": 5.934090909090909e-06, "loss": 0.6122, "step": 374 }, { "epoch": 0.025620004099200656, "grad_norm": 4.673861026763916, "learning_rate": 5.95e-06, "loss": 0.449, "step": 375 }, { "epoch": 0.025688324110131857, "grad_norm": 5.868533611297607, "learning_rate": 5.965909090909091e-06, "loss": 0.3827, "step": 376 }, { "epoch": 0.02575664412106306, "grad_norm": 5.25044584274292, "learning_rate": 5.981818181818181e-06, "loss": 0.4625, "step": 377 }, { "epoch": 0.02582496413199426, "grad_norm": 5.7774271965026855, "learning_rate": 5.997727272727273e-06, "loss": 0.4763, "step": 378 }, { "epoch": 0.025893284142925462, "grad_norm": 8.158554077148438, "learning_rate": 6.013636363636364e-06, "loss": 0.5571, "step": 379 }, { "epoch": 0.025961604153856666, "grad_norm": 7.069841384887695, "learning_rate": 6.029545454545454e-06, "loss": 0.4709, "step": 380 }, { "epoch": 0.026029924164787867, "grad_norm": 4.372995853424072, "learning_rate": 6.0454545454545456e-06, "loss": 0.4754, "step": 381 }, { "epoch": 0.026098244175719067, "grad_norm": 6.457646369934082, "learning_rate": 6.061363636363636e-06, "loss": 0.5074, "step": 382 }, { "epoch": 0.02616656418665027, "grad_norm": 6.440744876861572, "learning_rate": 6.077272727272727e-06, "loss": 0.574, "step": 383 }, { "epoch": 0.026234884197581472, "grad_norm": 5.089472770690918, "learning_rate": 6.0931818181818184e-06, "loss": 0.5151, "step": 384 }, { "epoch": 0.026303204208512673, "grad_norm": 7.917370319366455, "learning_rate": 6.109090909090909e-06, "loss": 0.6491, "step": 385 }, { "epoch": 0.026371524219443877, "grad_norm": 8.80392074584961, "learning_rate": 6.125e-06, "loss": 0.5848, "step": 386 }, { "epoch": 0.026439844230375077, "grad_norm": 7.147172451019287, "learning_rate": 6.140909090909091e-06, "loss": 0.5465, "step": 387 }, { "epoch": 0.026508164241306278, "grad_norm": 7.225268840789795, "learning_rate": 6.156818181818181e-06, "loss": 0.5413, "step": 388 }, { "epoch": 0.026576484252237482, "grad_norm": 7.696239948272705, "learning_rate": 6.172727272727273e-06, "loss": 0.5356, "step": 389 }, { "epoch": 0.026644804263168682, "grad_norm": 6.366203784942627, "learning_rate": 6.188636363636363e-06, "loss": 0.5145, "step": 390 }, { "epoch": 0.026713124274099883, "grad_norm": 5.0429277420043945, "learning_rate": 6.204545454545454e-06, "loss": 0.4744, "step": 391 }, { "epoch": 0.026781444285031087, "grad_norm": 7.89503812789917, "learning_rate": 6.2204545454545456e-06, "loss": 0.5677, "step": 392 }, { "epoch": 0.026849764295962288, "grad_norm": 6.5971360206604, "learning_rate": 6.236363636363636e-06, "loss": 0.5046, "step": 393 }, { "epoch": 0.026918084306893488, "grad_norm": 6.847208023071289, "learning_rate": 6.252272727272727e-06, "loss": 0.5811, "step": 394 }, { "epoch": 0.026986404317824692, "grad_norm": 10.004498481750488, "learning_rate": 6.2681818181818185e-06, "loss": 0.6325, "step": 395 }, { "epoch": 0.027054724328755893, "grad_norm": 7.04339599609375, "learning_rate": 6.284090909090909e-06, "loss": 0.568, "step": 396 }, { "epoch": 0.027123044339687093, "grad_norm": 4.639193534851074, "learning_rate": 6.3e-06, "loss": 0.3764, "step": 397 }, { "epoch": 0.027191364350618297, "grad_norm": 7.546091556549072, "learning_rate": 6.3159090909090905e-06, "loss": 0.628, "step": 398 }, { "epoch": 0.027259684361549498, "grad_norm": 5.939865589141846, "learning_rate": 6.331818181818182e-06, "loss": 0.5317, "step": 399 }, { "epoch": 0.0273280043724807, "grad_norm": 4.947049617767334, "learning_rate": 6.347727272727273e-06, "loss": 0.4842, "step": 400 }, { "epoch": 0.027396324383411903, "grad_norm": 7.834659576416016, "learning_rate": 6.363636363636363e-06, "loss": 0.5332, "step": 401 }, { "epoch": 0.027464644394343103, "grad_norm": 7.400113105773926, "learning_rate": 6.379545454545455e-06, "loss": 0.4439, "step": 402 }, { "epoch": 0.027532964405274304, "grad_norm": 5.002071380615234, "learning_rate": 6.395454545454546e-06, "loss": 0.4692, "step": 403 }, { "epoch": 0.027601284416205508, "grad_norm": 7.571725368499756, "learning_rate": 6.411363636363636e-06, "loss": 0.5392, "step": 404 }, { "epoch": 0.02766960442713671, "grad_norm": 9.254777908325195, "learning_rate": 6.427272727272727e-06, "loss": 0.517, "step": 405 }, { "epoch": 0.02773792443806791, "grad_norm": 6.720826148986816, "learning_rate": 6.443181818181818e-06, "loss": 0.5207, "step": 406 }, { "epoch": 0.027806244448999113, "grad_norm": 6.8792829513549805, "learning_rate": 6.459090909090909e-06, "loss": 0.4525, "step": 407 }, { "epoch": 0.027874564459930314, "grad_norm": 6.1758599281311035, "learning_rate": 6.475e-06, "loss": 0.4622, "step": 408 }, { "epoch": 0.027942884470861514, "grad_norm": 5.032190322875977, "learning_rate": 6.4909090909090905e-06, "loss": 0.4257, "step": 409 }, { "epoch": 0.028011204481792718, "grad_norm": 7.358471393585205, "learning_rate": 6.506818181818182e-06, "loss": 0.546, "step": 410 }, { "epoch": 0.02807952449272392, "grad_norm": 6.682706832885742, "learning_rate": 6.522727272727272e-06, "loss": 0.5248, "step": 411 }, { "epoch": 0.02814784450365512, "grad_norm": 7.16533088684082, "learning_rate": 6.538636363636363e-06, "loss": 0.4818, "step": 412 }, { "epoch": 0.028216164514586323, "grad_norm": 6.674290657043457, "learning_rate": 6.554545454545455e-06, "loss": 0.5288, "step": 413 }, { "epoch": 0.028284484525517524, "grad_norm": 6.849817752838135, "learning_rate": 6.570454545454545e-06, "loss": 0.4741, "step": 414 }, { "epoch": 0.028352804536448725, "grad_norm": 7.65207576751709, "learning_rate": 6.586363636363636e-06, "loss": 0.4572, "step": 415 }, { "epoch": 0.02842112454737993, "grad_norm": 7.950820446014404, "learning_rate": 6.602272727272728e-06, "loss": 0.4891, "step": 416 }, { "epoch": 0.02848944455831113, "grad_norm": 5.970667362213135, "learning_rate": 6.618181818181818e-06, "loss": 0.4567, "step": 417 }, { "epoch": 0.02855776456924233, "grad_norm": 7.429079532623291, "learning_rate": 6.634090909090909e-06, "loss": 0.5506, "step": 418 }, { "epoch": 0.028626084580173534, "grad_norm": 5.615156173706055, "learning_rate": 6.65e-06, "loss": 0.4298, "step": 419 }, { "epoch": 0.028694404591104734, "grad_norm": 6.122983455657959, "learning_rate": 6.6659090909090905e-06, "loss": 0.5853, "step": 420 }, { "epoch": 0.028762724602035935, "grad_norm": 6.845665454864502, "learning_rate": 6.681818181818182e-06, "loss": 0.5424, "step": 421 }, { "epoch": 0.02883104461296714, "grad_norm": 5.310086250305176, "learning_rate": 6.697727272727273e-06, "loss": 0.456, "step": 422 }, { "epoch": 0.02889936462389834, "grad_norm": 8.126826286315918, "learning_rate": 6.7136363636363634e-06, "loss": 0.5931, "step": 423 }, { "epoch": 0.02896768463482954, "grad_norm": 5.978158950805664, "learning_rate": 6.729545454545454e-06, "loss": 0.5929, "step": 424 }, { "epoch": 0.029036004645760744, "grad_norm": 6.122375011444092, "learning_rate": 6.745454545454546e-06, "loss": 0.4926, "step": 425 }, { "epoch": 0.029104324656691945, "grad_norm": 5.69883918762207, "learning_rate": 6.761363636363636e-06, "loss": 0.4888, "step": 426 }, { "epoch": 0.029172644667623145, "grad_norm": 4.70219087600708, "learning_rate": 6.777272727272727e-06, "loss": 0.4528, "step": 427 }, { "epoch": 0.02924096467855435, "grad_norm": 7.338653564453125, "learning_rate": 6.7931818181818185e-06, "loss": 0.5122, "step": 428 }, { "epoch": 0.02930928468948555, "grad_norm": 6.224643230438232, "learning_rate": 6.809090909090909e-06, "loss": 0.4816, "step": 429 }, { "epoch": 0.02937760470041675, "grad_norm": 6.914153575897217, "learning_rate": 6.825e-06, "loss": 0.5078, "step": 430 }, { "epoch": 0.029445924711347955, "grad_norm": 6.511452674865723, "learning_rate": 6.8409090909090906e-06, "loss": 0.6634, "step": 431 }, { "epoch": 0.029514244722279155, "grad_norm": 5.657638072967529, "learning_rate": 6.856818181818181e-06, "loss": 0.4598, "step": 432 }, { "epoch": 0.029582564733210356, "grad_norm": 5.427535533905029, "learning_rate": 6.872727272727273e-06, "loss": 0.3974, "step": 433 }, { "epoch": 0.02965088474414156, "grad_norm": 6.169464111328125, "learning_rate": 6.8886363636363634e-06, "loss": 0.5451, "step": 434 }, { "epoch": 0.02971920475507276, "grad_norm": 5.214122772216797, "learning_rate": 6.904545454545454e-06, "loss": 0.4228, "step": 435 }, { "epoch": 0.02978752476600396, "grad_norm": 6.315852642059326, "learning_rate": 6.920454545454546e-06, "loss": 0.45, "step": 436 }, { "epoch": 0.029855844776935165, "grad_norm": 4.517842769622803, "learning_rate": 6.936363636363636e-06, "loss": 0.3964, "step": 437 }, { "epoch": 0.029924164787866366, "grad_norm": 5.365926742553711, "learning_rate": 6.952272727272727e-06, "loss": 0.4883, "step": 438 }, { "epoch": 0.029992484798797566, "grad_norm": 8.008356094360352, "learning_rate": 6.9681818181818185e-06, "loss": 0.5264, "step": 439 }, { "epoch": 0.03006080480972877, "grad_norm": 5.81397819519043, "learning_rate": 6.984090909090908e-06, "loss": 0.518, "step": 440 }, { "epoch": 0.03012912482065997, "grad_norm": 6.794382572174072, "learning_rate": 7e-06, "loss": 0.4997, "step": 441 }, { "epoch": 0.030197444831591175, "grad_norm": 8.206464767456055, "learning_rate": 6.999999914307142e-06, "loss": 0.5697, "step": 442 }, { "epoch": 0.030265764842522375, "grad_norm": 6.0030317306518555, "learning_rate": 6.999999657228572e-06, "loss": 0.4264, "step": 443 }, { "epoch": 0.030334084853453576, "grad_norm": 7.5216193199157715, "learning_rate": 6.999999228764303e-06, "loss": 0.4802, "step": 444 }, { "epoch": 0.03040240486438478, "grad_norm": 6.395943641662598, "learning_rate": 6.999998628914355e-06, "loss": 0.4989, "step": 445 }, { "epoch": 0.03047072487531598, "grad_norm": 5.006590366363525, "learning_rate": 6.999997857678759e-06, "loss": 0.4533, "step": 446 }, { "epoch": 0.03053904488624718, "grad_norm": 10.296483039855957, "learning_rate": 6.9999969150575515e-06, "loss": 0.4053, "step": 447 }, { "epoch": 0.030607364897178385, "grad_norm": 6.148649215698242, "learning_rate": 6.999995801050779e-06, "loss": 0.4573, "step": 448 }, { "epoch": 0.030675684908109586, "grad_norm": 5.256613731384277, "learning_rate": 6.9999945156584966e-06, "loss": 0.4519, "step": 449 }, { "epoch": 0.030744004919040786, "grad_norm": 5.525970458984375, "learning_rate": 6.999993058880766e-06, "loss": 0.5123, "step": 450 }, { "epoch": 0.03081232492997199, "grad_norm": 6.535553932189941, "learning_rate": 6.999991430717659e-06, "loss": 0.5659, "step": 451 }, { "epoch": 0.03088064494090319, "grad_norm": 10.393771171569824, "learning_rate": 6.999989631169257e-06, "loss": 0.5209, "step": 452 }, { "epoch": 0.03094896495183439, "grad_norm": 7.058047771453857, "learning_rate": 6.999987660235646e-06, "loss": 0.4771, "step": 453 }, { "epoch": 0.031017284962765596, "grad_norm": 5.598639488220215, "learning_rate": 6.999985517916922e-06, "loss": 0.3981, "step": 454 }, { "epoch": 0.031085604973696796, "grad_norm": 5.554254531860352, "learning_rate": 6.999983204213193e-06, "loss": 0.4204, "step": 455 }, { "epoch": 0.031153924984627997, "grad_norm": 7.910416603088379, "learning_rate": 6.999980719124569e-06, "loss": 0.5867, "step": 456 }, { "epoch": 0.0312222449955592, "grad_norm": 6.586370468139648, "learning_rate": 6.999978062651173e-06, "loss": 0.5631, "step": 457 }, { "epoch": 0.0312905650064904, "grad_norm": 5.855382442474365, "learning_rate": 6.999975234793135e-06, "loss": 0.3777, "step": 458 }, { "epoch": 0.0313588850174216, "grad_norm": 5.3303728103637695, "learning_rate": 6.999972235550595e-06, "loss": 0.6449, "step": 459 }, { "epoch": 0.031427205028352806, "grad_norm": 8.429052352905273, "learning_rate": 6.999969064923698e-06, "loss": 0.7202, "step": 460 }, { "epoch": 0.031495525039284, "grad_norm": 7.241459369659424, "learning_rate": 6.9999657229126e-06, "loss": 0.45, "step": 461 }, { "epoch": 0.03156384505021521, "grad_norm": 6.351389408111572, "learning_rate": 6.999962209517464e-06, "loss": 0.488, "step": 462 }, { "epoch": 0.03163216506114641, "grad_norm": 4.8593597412109375, "learning_rate": 6.999958524738463e-06, "loss": 0.4554, "step": 463 }, { "epoch": 0.03170048507207761, "grad_norm": 4.893687725067139, "learning_rate": 6.999954668575776e-06, "loss": 0.4104, "step": 464 }, { "epoch": 0.03176880508300881, "grad_norm": 5.272672176361084, "learning_rate": 6.999950641029593e-06, "loss": 0.4702, "step": 465 }, { "epoch": 0.031837125093940016, "grad_norm": 6.85011100769043, "learning_rate": 6.999946442100111e-06, "loss": 0.5208, "step": 466 }, { "epoch": 0.03190544510487121, "grad_norm": 5.622641563415527, "learning_rate": 6.999942071787535e-06, "loss": 0.522, "step": 467 }, { "epoch": 0.03197376511580242, "grad_norm": 7.360350608825684, "learning_rate": 6.99993753009208e-06, "loss": 0.52, "step": 468 }, { "epoch": 0.03204208512673362, "grad_norm": 6.269063472747803, "learning_rate": 6.999932817013968e-06, "loss": 0.4123, "step": 469 }, { "epoch": 0.03211040513766482, "grad_norm": 7.248227596282959, "learning_rate": 6.99992793255343e-06, "loss": 0.4784, "step": 470 }, { "epoch": 0.03217872514859602, "grad_norm": 6.0992584228515625, "learning_rate": 6.999922876710705e-06, "loss": 0.5259, "step": 471 }, { "epoch": 0.03224704515952723, "grad_norm": 4.965145111083984, "learning_rate": 6.99991764948604e-06, "loss": 0.3752, "step": 472 }, { "epoch": 0.032315365170458424, "grad_norm": 5.686493873596191, "learning_rate": 6.999912250879691e-06, "loss": 0.4333, "step": 473 }, { "epoch": 0.03238368518138963, "grad_norm": 5.312131404876709, "learning_rate": 6.999906680891924e-06, "loss": 0.4976, "step": 474 }, { "epoch": 0.03245200519232083, "grad_norm": 6.8701887130737305, "learning_rate": 6.99990093952301e-06, "loss": 0.4717, "step": 475 }, { "epoch": 0.032520325203252036, "grad_norm": 9.454743385314941, "learning_rate": 6.99989502677323e-06, "loss": 0.564, "step": 476 }, { "epoch": 0.03258864521418323, "grad_norm": 6.572393894195557, "learning_rate": 6.999888942642875e-06, "loss": 0.448, "step": 477 }, { "epoch": 0.03265696522511444, "grad_norm": 8.992391586303711, "learning_rate": 6.999882687132242e-06, "loss": 0.5113, "step": 478 }, { "epoch": 0.03272528523604564, "grad_norm": 4.532113552093506, "learning_rate": 6.999876260241637e-06, "loss": 0.4061, "step": 479 }, { "epoch": 0.03279360524697684, "grad_norm": 6.065232276916504, "learning_rate": 6.999869661971375e-06, "loss": 0.4773, "step": 480 }, { "epoch": 0.03286192525790804, "grad_norm": 7.154354095458984, "learning_rate": 6.999862892321781e-06, "loss": 0.6061, "step": 481 }, { "epoch": 0.032930245268839246, "grad_norm": 6.833878517150879, "learning_rate": 6.999855951293184e-06, "loss": 0.4385, "step": 482 }, { "epoch": 0.03299856527977044, "grad_norm": 6.245718002319336, "learning_rate": 6.999848838885923e-06, "loss": 0.5923, "step": 483 }, { "epoch": 0.03306688529070165, "grad_norm": 7.27117919921875, "learning_rate": 6.99984155510035e-06, "loss": 0.5898, "step": 484 }, { "epoch": 0.03313520530163285, "grad_norm": 7.536782264709473, "learning_rate": 6.999834099936818e-06, "loss": 0.5382, "step": 485 }, { "epoch": 0.03320352531256405, "grad_norm": 7.052516460418701, "learning_rate": 6.999826473395695e-06, "loss": 0.5163, "step": 486 }, { "epoch": 0.03327184532349525, "grad_norm": 4.695589542388916, "learning_rate": 6.999818675477353e-06, "loss": 0.3576, "step": 487 }, { "epoch": 0.03334016533442646, "grad_norm": 6.106522083282471, "learning_rate": 6.999810706182173e-06, "loss": 0.5243, "step": 488 }, { "epoch": 0.033408485345357654, "grad_norm": 6.399619102478027, "learning_rate": 6.999802565510547e-06, "loss": 0.5598, "step": 489 }, { "epoch": 0.03347680535628886, "grad_norm": 5.878777027130127, "learning_rate": 6.999794253462872e-06, "loss": 0.4392, "step": 490 }, { "epoch": 0.03354512536722006, "grad_norm": 7.014889240264893, "learning_rate": 6.999785770039556e-06, "loss": 0.5522, "step": 491 }, { "epoch": 0.03361344537815126, "grad_norm": 8.326781272888184, "learning_rate": 6.999777115241014e-06, "loss": 0.4717, "step": 492 }, { "epoch": 0.03368176538908246, "grad_norm": 7.462229251861572, "learning_rate": 6.999768289067671e-06, "loss": 0.4638, "step": 493 }, { "epoch": 0.03375008540001367, "grad_norm": 6.016600131988525, "learning_rate": 6.999759291519957e-06, "loss": 0.6363, "step": 494 }, { "epoch": 0.033818405410944864, "grad_norm": 5.627028942108154, "learning_rate": 6.999750122598315e-06, "loss": 0.3713, "step": 495 }, { "epoch": 0.03388672542187607, "grad_norm": 6.941834449768066, "learning_rate": 6.999740782303193e-06, "loss": 0.5114, "step": 496 }, { "epoch": 0.03395504543280727, "grad_norm": 7.607276916503906, "learning_rate": 6.999731270635046e-06, "loss": 0.4418, "step": 497 }, { "epoch": 0.03402336544373847, "grad_norm": 6.4957170486450195, "learning_rate": 6.999721587594344e-06, "loss": 0.7458, "step": 498 }, { "epoch": 0.034091685454669673, "grad_norm": 6.4242353439331055, "learning_rate": 6.999711733181558e-06, "loss": 0.5413, "step": 499 }, { "epoch": 0.03416000546560088, "grad_norm": 5.252993106842041, "learning_rate": 6.9997017073971725e-06, "loss": 0.4469, "step": 500 }, { "epoch": 0.034228325476532075, "grad_norm": 6.306579113006592, "learning_rate": 6.999691510241677e-06, "loss": 0.5301, "step": 501 }, { "epoch": 0.03429664548746328, "grad_norm": 5.344730377197266, "learning_rate": 6.999681141715571e-06, "loss": 0.4988, "step": 502 }, { "epoch": 0.03436496549839448, "grad_norm": 5.036507606506348, "learning_rate": 6.999670601819363e-06, "loss": 0.4518, "step": 503 }, { "epoch": 0.03443328550932568, "grad_norm": 6.467003345489502, "learning_rate": 6.999659890553569e-06, "loss": 0.477, "step": 504 }, { "epoch": 0.034501605520256884, "grad_norm": 4.719061851501465, "learning_rate": 6.999649007918713e-06, "loss": 0.4581, "step": 505 }, { "epoch": 0.03456992553118809, "grad_norm": 8.255884170532227, "learning_rate": 6.999637953915328e-06, "loss": 0.5576, "step": 506 }, { "epoch": 0.034638245542119285, "grad_norm": 7.7945122718811035, "learning_rate": 6.9996267285439555e-06, "loss": 0.4871, "step": 507 }, { "epoch": 0.03470656555305049, "grad_norm": 5.280129909515381, "learning_rate": 6.999615331805145e-06, "loss": 0.4091, "step": 508 }, { "epoch": 0.03477488556398169, "grad_norm": 5.285617351531982, "learning_rate": 6.9996037636994545e-06, "loss": 0.4278, "step": 509 }, { "epoch": 0.03484320557491289, "grad_norm": 4.326678276062012, "learning_rate": 6.999592024227451e-06, "loss": 0.5145, "step": 510 }, { "epoch": 0.034911525585844094, "grad_norm": 6.7775444984436035, "learning_rate": 6.999580113389709e-06, "loss": 0.4882, "step": 511 }, { "epoch": 0.0349798455967753, "grad_norm": 4.537039279937744, "learning_rate": 6.999568031186811e-06, "loss": 0.4243, "step": 512 }, { "epoch": 0.035048165607706495, "grad_norm": 6.504220962524414, "learning_rate": 6.9995557776193495e-06, "loss": 0.5792, "step": 513 }, { "epoch": 0.0351164856186377, "grad_norm": 7.376718044281006, "learning_rate": 6.999543352687926e-06, "loss": 0.458, "step": 514 }, { "epoch": 0.035184805629568904, "grad_norm": 6.076572418212891, "learning_rate": 6.999530756393145e-06, "loss": 0.5525, "step": 515 }, { "epoch": 0.0352531256405001, "grad_norm": 6.7931036949157715, "learning_rate": 6.999517988735626e-06, "loss": 0.6159, "step": 516 }, { "epoch": 0.035321445651431305, "grad_norm": 4.746064186096191, "learning_rate": 6.999505049715994e-06, "loss": 0.4742, "step": 517 }, { "epoch": 0.03538976566236251, "grad_norm": 6.450669765472412, "learning_rate": 6.999491939334882e-06, "loss": 0.543, "step": 518 }, { "epoch": 0.035458085673293706, "grad_norm": 6.642461776733398, "learning_rate": 6.999478657592934e-06, "loss": 0.5355, "step": 519 }, { "epoch": 0.03552640568422491, "grad_norm": 8.131266593933105, "learning_rate": 6.999465204490797e-06, "loss": 0.5017, "step": 520 }, { "epoch": 0.035594725695156114, "grad_norm": 6.717956066131592, "learning_rate": 6.999451580029133e-06, "loss": 0.4485, "step": 521 }, { "epoch": 0.03566304570608731, "grad_norm": 5.382206916809082, "learning_rate": 6.9994377842086065e-06, "loss": 0.5419, "step": 522 }, { "epoch": 0.035731365717018515, "grad_norm": 5.905843734741211, "learning_rate": 6.999423817029893e-06, "loss": 0.6413, "step": 523 }, { "epoch": 0.03579968572794972, "grad_norm": 4.693472385406494, "learning_rate": 6.99940967849368e-06, "loss": 0.3381, "step": 524 }, { "epoch": 0.035868005738880916, "grad_norm": 6.523636817932129, "learning_rate": 6.999395368600657e-06, "loss": 0.5155, "step": 525 }, { "epoch": 0.03593632574981212, "grad_norm": 5.040549278259277, "learning_rate": 6.999380887351524e-06, "loss": 0.5144, "step": 526 }, { "epoch": 0.036004645760743324, "grad_norm": 5.709218978881836, "learning_rate": 6.999366234746991e-06, "loss": 0.4073, "step": 527 }, { "epoch": 0.03607296577167452, "grad_norm": 7.293386459350586, "learning_rate": 6.999351410787777e-06, "loss": 0.4677, "step": 528 }, { "epoch": 0.036141285782605725, "grad_norm": 4.174495697021484, "learning_rate": 6.999336415474606e-06, "loss": 0.3661, "step": 529 }, { "epoch": 0.03620960579353693, "grad_norm": 6.609614849090576, "learning_rate": 6.999321248808214e-06, "loss": 0.5594, "step": 530 }, { "epoch": 0.03627792580446813, "grad_norm": 4.892010688781738, "learning_rate": 6.999305910789341e-06, "loss": 0.4979, "step": 531 }, { "epoch": 0.03634624581539933, "grad_norm": 6.052121162414551, "learning_rate": 6.999290401418741e-06, "loss": 0.5179, "step": 532 }, { "epoch": 0.036414565826330535, "grad_norm": 6.812851428985596, "learning_rate": 6.9992747206971715e-06, "loss": 0.4562, "step": 533 }, { "epoch": 0.03648288583726173, "grad_norm": 4.943441390991211, "learning_rate": 6.9992588686254e-06, "loss": 0.44, "step": 534 }, { "epoch": 0.036551205848192936, "grad_norm": 6.204377174377441, "learning_rate": 6.999242845204205e-06, "loss": 0.4186, "step": 535 }, { "epoch": 0.03661952585912414, "grad_norm": 6.2624831199646, "learning_rate": 6.99922665043437e-06, "loss": 0.4274, "step": 536 }, { "epoch": 0.03668784587005534, "grad_norm": 6.393564224243164, "learning_rate": 6.999210284316687e-06, "loss": 0.3961, "step": 537 }, { "epoch": 0.03675616588098654, "grad_norm": 7.75474214553833, "learning_rate": 6.999193746851959e-06, "loss": 0.4642, "step": 538 }, { "epoch": 0.036824485891917745, "grad_norm": 4.245616912841797, "learning_rate": 6.999177038040994e-06, "loss": 0.4283, "step": 539 }, { "epoch": 0.03689280590284894, "grad_norm": 4.641530990600586, "learning_rate": 6.9991601578846125e-06, "loss": 0.4655, "step": 540 }, { "epoch": 0.036961125913780146, "grad_norm": 6.192604064941406, "learning_rate": 6.999143106383639e-06, "loss": 0.535, "step": 541 }, { "epoch": 0.03702944592471135, "grad_norm": 6.898579120635986, "learning_rate": 6.999125883538909e-06, "loss": 0.5901, "step": 542 }, { "epoch": 0.03709776593564255, "grad_norm": 5.905754089355469, "learning_rate": 6.999108489351266e-06, "loss": 0.515, "step": 543 }, { "epoch": 0.03716608594657375, "grad_norm": 6.641386032104492, "learning_rate": 6.9990909238215625e-06, "loss": 0.4778, "step": 544 }, { "epoch": 0.037234405957504955, "grad_norm": 4.449394702911377, "learning_rate": 6.999073186950658e-06, "loss": 0.469, "step": 545 }, { "epoch": 0.03730272596843615, "grad_norm": 6.620095252990723, "learning_rate": 6.999055278739422e-06, "loss": 0.5123, "step": 546 }, { "epoch": 0.03737104597936736, "grad_norm": 6.819068431854248, "learning_rate": 6.9990371991887284e-06, "loss": 0.5633, "step": 547 }, { "epoch": 0.03743936599029856, "grad_norm": 6.521850109100342, "learning_rate": 6.999018948299466e-06, "loss": 0.5094, "step": 548 }, { "epoch": 0.03750768600122976, "grad_norm": 6.2901434898376465, "learning_rate": 6.999000526072526e-06, "loss": 0.5683, "step": 549 }, { "epoch": 0.03757600601216096, "grad_norm": 4.4043097496032715, "learning_rate": 6.998981932508812e-06, "loss": 0.4252, "step": 550 }, { "epoch": 0.037644326023092166, "grad_norm": 10.389490127563477, "learning_rate": 6.9989631676092345e-06, "loss": 0.6484, "step": 551 }, { "epoch": 0.03771264603402336, "grad_norm": 7.878683567047119, "learning_rate": 6.998944231374711e-06, "loss": 0.6515, "step": 552 }, { "epoch": 0.03778096604495457, "grad_norm": 4.7959489822387695, "learning_rate": 6.99892512380617e-06, "loss": 0.4773, "step": 553 }, { "epoch": 0.03784928605588577, "grad_norm": 6.2549333572387695, "learning_rate": 6.998905844904548e-06, "loss": 0.5301, "step": 554 }, { "epoch": 0.03791760606681697, "grad_norm": 5.311519622802734, "learning_rate": 6.998886394670785e-06, "loss": 0.4213, "step": 555 }, { "epoch": 0.03798592607774817, "grad_norm": 5.8910231590271, "learning_rate": 6.998866773105839e-06, "loss": 0.4407, "step": 556 }, { "epoch": 0.038054246088679376, "grad_norm": 6.410462856292725, "learning_rate": 6.998846980210667e-06, "loss": 0.4512, "step": 557 }, { "epoch": 0.03812256609961057, "grad_norm": 7.027028560638428, "learning_rate": 6.9988270159862395e-06, "loss": 0.6342, "step": 558 }, { "epoch": 0.03819088611054178, "grad_norm": 7.417361736297607, "learning_rate": 6.998806880433534e-06, "loss": 0.4686, "step": 559 }, { "epoch": 0.03825920612147298, "grad_norm": 6.200222492218018, "learning_rate": 6.998786573553535e-06, "loss": 0.4278, "step": 560 }, { "epoch": 0.03832752613240418, "grad_norm": 6.384243965148926, "learning_rate": 6.99876609534724e-06, "loss": 0.483, "step": 561 }, { "epoch": 0.03839584614333538, "grad_norm": 5.103583335876465, "learning_rate": 6.9987454458156495e-06, "loss": 0.4987, "step": 562 }, { "epoch": 0.03846416615426659, "grad_norm": 7.195427894592285, "learning_rate": 6.998724624959775e-06, "loss": 0.5516, "step": 563 }, { "epoch": 0.038532486165197784, "grad_norm": 5.9117207527160645, "learning_rate": 6.998703632780636e-06, "loss": 0.5798, "step": 564 }, { "epoch": 0.03860080617612899, "grad_norm": 6.134669303894043, "learning_rate": 6.99868246927926e-06, "loss": 0.4526, "step": 565 }, { "epoch": 0.03866912618706019, "grad_norm": 7.640054225921631, "learning_rate": 6.998661134456685e-06, "loss": 0.5584, "step": 566 }, { "epoch": 0.03873744619799139, "grad_norm": 4.602800369262695, "learning_rate": 6.998639628313954e-06, "loss": 0.4077, "step": 567 }, { "epoch": 0.03880576620892259, "grad_norm": 4.353930950164795, "learning_rate": 6.998617950852121e-06, "loss": 0.4902, "step": 568 }, { "epoch": 0.0388740862198538, "grad_norm": 7.3568572998046875, "learning_rate": 6.9985961020722475e-06, "loss": 0.5015, "step": 569 }, { "epoch": 0.038942406230784994, "grad_norm": 5.813183307647705, "learning_rate": 6.998574081975403e-06, "loss": 0.4987, "step": 570 }, { "epoch": 0.0390107262417162, "grad_norm": 5.761905670166016, "learning_rate": 6.998551890562666e-06, "loss": 0.4318, "step": 571 }, { "epoch": 0.0390790462526474, "grad_norm": 5.67811918258667, "learning_rate": 6.998529527835123e-06, "loss": 0.4707, "step": 572 }, { "epoch": 0.0391473662635786, "grad_norm": 5.873950481414795, "learning_rate": 6.998506993793868e-06, "loss": 0.5594, "step": 573 }, { "epoch": 0.0392156862745098, "grad_norm": 9.783050537109375, "learning_rate": 6.998484288440007e-06, "loss": 0.4635, "step": 574 }, { "epoch": 0.03928400628544101, "grad_norm": 5.8183369636535645, "learning_rate": 6.9984614117746505e-06, "loss": 0.5393, "step": 575 }, { "epoch": 0.039352326296372205, "grad_norm": 4.414392471313477, "learning_rate": 6.998438363798917e-06, "loss": 0.4775, "step": 576 }, { "epoch": 0.03942064630730341, "grad_norm": 7.7797064781188965, "learning_rate": 6.998415144513938e-06, "loss": 0.4991, "step": 577 }, { "epoch": 0.03948896631823461, "grad_norm": 5.842086315155029, "learning_rate": 6.998391753920848e-06, "loss": 0.541, "step": 578 }, { "epoch": 0.03955728632916581, "grad_norm": 5.139865398406982, "learning_rate": 6.998368192020794e-06, "loss": 0.548, "step": 579 }, { "epoch": 0.039625606340097014, "grad_norm": 5.478124141693115, "learning_rate": 6.998344458814929e-06, "loss": 0.4336, "step": 580 }, { "epoch": 0.03969392635102822, "grad_norm": 5.953588485717773, "learning_rate": 6.998320554304416e-06, "loss": 0.4528, "step": 581 }, { "epoch": 0.039762246361959415, "grad_norm": 6.232389450073242, "learning_rate": 6.998296478490425e-06, "loss": 0.52, "step": 582 }, { "epoch": 0.03983056637289062, "grad_norm": 5.848459243774414, "learning_rate": 6.998272231374135e-06, "loss": 0.5433, "step": 583 }, { "epoch": 0.03989888638382182, "grad_norm": 6.748136520385742, "learning_rate": 6.998247812956732e-06, "loss": 0.4642, "step": 584 }, { "epoch": 0.03996720639475302, "grad_norm": 4.590267181396484, "learning_rate": 6.998223223239414e-06, "loss": 0.3944, "step": 585 }, { "epoch": 0.040035526405684224, "grad_norm": 6.2035040855407715, "learning_rate": 6.998198462223384e-06, "loss": 0.5212, "step": 586 }, { "epoch": 0.04010384641661543, "grad_norm": 5.700768947601318, "learning_rate": 6.998173529909854e-06, "loss": 0.4768, "step": 587 }, { "epoch": 0.040172166427546625, "grad_norm": 4.760676383972168, "learning_rate": 6.9981484263000446e-06, "loss": 0.4474, "step": 588 }, { "epoch": 0.04024048643847783, "grad_norm": 8.435996055603027, "learning_rate": 6.998123151395187e-06, "loss": 0.6087, "step": 589 }, { "epoch": 0.04030880644940903, "grad_norm": 5.1225738525390625, "learning_rate": 6.998097705196517e-06, "loss": 0.4225, "step": 590 }, { "epoch": 0.04037712646034023, "grad_norm": 4.9671630859375, "learning_rate": 6.998072087705282e-06, "loss": 0.3814, "step": 591 }, { "epoch": 0.040445446471271435, "grad_norm": 6.398429870605469, "learning_rate": 6.9980462989227354e-06, "loss": 0.4487, "step": 592 }, { "epoch": 0.04051376648220264, "grad_norm": 6.620534420013428, "learning_rate": 6.99802033885014e-06, "loss": 0.4941, "step": 593 }, { "epoch": 0.040582086493133836, "grad_norm": 5.096595287322998, "learning_rate": 6.997994207488768e-06, "loss": 0.4784, "step": 594 }, { "epoch": 0.04065040650406504, "grad_norm": 7.679523944854736, "learning_rate": 6.997967904839898e-06, "loss": 0.4593, "step": 595 }, { "epoch": 0.040718726514996244, "grad_norm": 4.875520706176758, "learning_rate": 6.997941430904818e-06, "loss": 0.451, "step": 596 }, { "epoch": 0.04078704652592744, "grad_norm": 4.5759758949279785, "learning_rate": 6.9979147856848246e-06, "loss": 0.4663, "step": 597 }, { "epoch": 0.040855366536858645, "grad_norm": 5.155215263366699, "learning_rate": 6.997887969181223e-06, "loss": 0.5343, "step": 598 }, { "epoch": 0.04092368654778985, "grad_norm": 5.626392364501953, "learning_rate": 6.997860981395326e-06, "loss": 0.5305, "step": 599 }, { "epoch": 0.040992006558721046, "grad_norm": 4.434484481811523, "learning_rate": 6.997833822328454e-06, "loss": 0.4088, "step": 600 }, { "epoch": 0.04106032656965225, "grad_norm": 4.931980609893799, "learning_rate": 6.997806491981939e-06, "loss": 0.4483, "step": 601 }, { "epoch": 0.041128646580583454, "grad_norm": 6.3369574546813965, "learning_rate": 6.997778990357118e-06, "loss": 0.6369, "step": 602 }, { "epoch": 0.04119696659151465, "grad_norm": 6.701540946960449, "learning_rate": 6.9977513174553374e-06, "loss": 0.5453, "step": 603 }, { "epoch": 0.041265286602445855, "grad_norm": 5.0623931884765625, "learning_rate": 6.997723473277953e-06, "loss": 0.5073, "step": 604 }, { "epoch": 0.04133360661337706, "grad_norm": 5.22011137008667, "learning_rate": 6.997695457826328e-06, "loss": 0.4113, "step": 605 }, { "epoch": 0.041401926624308256, "grad_norm": 8.233436584472656, "learning_rate": 6.997667271101834e-06, "loss": 0.4368, "step": 606 }, { "epoch": 0.04147024663523946, "grad_norm": 5.3937530517578125, "learning_rate": 6.997638913105852e-06, "loss": 0.5141, "step": 607 }, { "epoch": 0.041538566646170665, "grad_norm": 7.659331798553467, "learning_rate": 6.997610383839771e-06, "loss": 0.5005, "step": 608 }, { "epoch": 0.04160688665710186, "grad_norm": 6.018057823181152, "learning_rate": 6.9975816833049855e-06, "loss": 0.5046, "step": 609 }, { "epoch": 0.041675206668033066, "grad_norm": 5.891765117645264, "learning_rate": 6.9975528115029035e-06, "loss": 0.5359, "step": 610 }, { "epoch": 0.04174352667896427, "grad_norm": 5.3556036949157715, "learning_rate": 6.997523768434937e-06, "loss": 0.4759, "step": 611 }, { "epoch": 0.041811846689895474, "grad_norm": 5.618587493896484, "learning_rate": 6.9974945541025095e-06, "loss": 0.5792, "step": 612 }, { "epoch": 0.04188016670082667, "grad_norm": 8.02511978149414, "learning_rate": 6.997465168507051e-06, "loss": 0.4697, "step": 613 }, { "epoch": 0.041948486711757875, "grad_norm": 5.641201972961426, "learning_rate": 6.997435611649999e-06, "loss": 0.5236, "step": 614 }, { "epoch": 0.04201680672268908, "grad_norm": 5.997542858123779, "learning_rate": 6.9974058835328044e-06, "loss": 0.5769, "step": 615 }, { "epoch": 0.042085126733620276, "grad_norm": 6.0563201904296875, "learning_rate": 6.99737598415692e-06, "loss": 0.4546, "step": 616 }, { "epoch": 0.04215344674455148, "grad_norm": 5.323855876922607, "learning_rate": 6.997345913523811e-06, "loss": 0.5097, "step": 617 }, { "epoch": 0.042221766755482684, "grad_norm": 8.770323753356934, "learning_rate": 6.997315671634949e-06, "loss": 0.5063, "step": 618 }, { "epoch": 0.04229008676641388, "grad_norm": 6.224748134613037, "learning_rate": 6.997285258491815e-06, "loss": 0.4557, "step": 619 }, { "epoch": 0.042358406777345085, "grad_norm": 5.436992168426514, "learning_rate": 6.997254674095899e-06, "loss": 0.5317, "step": 620 }, { "epoch": 0.04242672678827629, "grad_norm": 5.8154730796813965, "learning_rate": 6.997223918448698e-06, "loss": 0.4313, "step": 621 }, { "epoch": 0.042495046799207487, "grad_norm": 6.624174118041992, "learning_rate": 6.997192991551719e-06, "loss": 0.4956, "step": 622 }, { "epoch": 0.04256336681013869, "grad_norm": 6.2583208084106445, "learning_rate": 6.997161893406476e-06, "loss": 0.4826, "step": 623 }, { "epoch": 0.042631686821069895, "grad_norm": 5.0580525398254395, "learning_rate": 6.997130624014491e-06, "loss": 0.5063, "step": 624 }, { "epoch": 0.04270000683200109, "grad_norm": 5.66411828994751, "learning_rate": 6.997099183377295e-06, "loss": 0.4887, "step": 625 }, { "epoch": 0.042768326842932296, "grad_norm": 4.778545379638672, "learning_rate": 6.99706757149643e-06, "loss": 0.3525, "step": 626 }, { "epoch": 0.0428366468538635, "grad_norm": 4.931090831756592, "learning_rate": 6.997035788373441e-06, "loss": 0.4783, "step": 627 }, { "epoch": 0.0429049668647947, "grad_norm": 5.1171159744262695, "learning_rate": 6.997003834009885e-06, "loss": 0.4803, "step": 628 }, { "epoch": 0.0429732868757259, "grad_norm": 5.080070972442627, "learning_rate": 6.9969717084073275e-06, "loss": 0.4476, "step": 629 }, { "epoch": 0.043041606886657105, "grad_norm": 5.633956432342529, "learning_rate": 6.996939411567342e-06, "loss": 0.5413, "step": 630 }, { "epoch": 0.0431099268975883, "grad_norm": 5.344780445098877, "learning_rate": 6.996906943491509e-06, "loss": 0.5094, "step": 631 }, { "epoch": 0.043178246908519506, "grad_norm": 5.738333225250244, "learning_rate": 6.996874304181418e-06, "loss": 0.4611, "step": 632 }, { "epoch": 0.04324656691945071, "grad_norm": 7.798614025115967, "learning_rate": 6.99684149363867e-06, "loss": 0.515, "step": 633 }, { "epoch": 0.04331488693038191, "grad_norm": 5.423054218292236, "learning_rate": 6.996808511864868e-06, "loss": 0.5721, "step": 634 }, { "epoch": 0.04338320694131311, "grad_norm": 6.949940204620361, "learning_rate": 6.996775358861629e-06, "loss": 0.4869, "step": 635 }, { "epoch": 0.043451526952244315, "grad_norm": 6.151837348937988, "learning_rate": 6.9967420346305755e-06, "loss": 0.4992, "step": 636 }, { "epoch": 0.04351984696317551, "grad_norm": 6.838741779327393, "learning_rate": 6.996708539173341e-06, "loss": 0.6474, "step": 637 }, { "epoch": 0.04358816697410672, "grad_norm": 5.498172760009766, "learning_rate": 6.9966748724915635e-06, "loss": 0.4382, "step": 638 }, { "epoch": 0.04365648698503792, "grad_norm": 4.931705951690674, "learning_rate": 6.9966410345868935e-06, "loss": 0.4802, "step": 639 }, { "epoch": 0.04372480699596912, "grad_norm": 4.9084296226501465, "learning_rate": 6.996607025460986e-06, "loss": 0.4635, "step": 640 }, { "epoch": 0.04379312700690032, "grad_norm": 4.430806636810303, "learning_rate": 6.99657284511551e-06, "loss": 0.336, "step": 641 }, { "epoch": 0.043861447017831526, "grad_norm": 5.144189357757568, "learning_rate": 6.996538493552134e-06, "loss": 0.4418, "step": 642 }, { "epoch": 0.04392976702876272, "grad_norm": 5.059962272644043, "learning_rate": 6.996503970772544e-06, "loss": 0.339, "step": 643 }, { "epoch": 0.04399808703969393, "grad_norm": 5.00610876083374, "learning_rate": 6.996469276778428e-06, "loss": 0.4486, "step": 644 }, { "epoch": 0.04406640705062513, "grad_norm": 5.465323448181152, "learning_rate": 6.996434411571487e-06, "loss": 0.4724, "step": 645 }, { "epoch": 0.04413472706155633, "grad_norm": 6.898195743560791, "learning_rate": 6.996399375153427e-06, "loss": 0.5861, "step": 646 }, { "epoch": 0.04420304707248753, "grad_norm": 5.709597110748291, "learning_rate": 6.996364167525965e-06, "loss": 0.4189, "step": 647 }, { "epoch": 0.044271367083418736, "grad_norm": 4.7098517417907715, "learning_rate": 6.996328788690823e-06, "loss": 0.4741, "step": 648 }, { "epoch": 0.04433968709434993, "grad_norm": 6.52232551574707, "learning_rate": 6.996293238649735e-06, "loss": 0.5658, "step": 649 }, { "epoch": 0.04440800710528114, "grad_norm": 5.843962669372559, "learning_rate": 6.996257517404441e-06, "loss": 0.5705, "step": 650 }, { "epoch": 0.04447632711621234, "grad_norm": 5.056528091430664, "learning_rate": 6.99622162495669e-06, "loss": 0.4497, "step": 651 }, { "epoch": 0.04454464712714354, "grad_norm": 5.167799949645996, "learning_rate": 6.996185561308241e-06, "loss": 0.4785, "step": 652 }, { "epoch": 0.04461296713807474, "grad_norm": 5.8921799659729, "learning_rate": 6.996149326460858e-06, "loss": 0.5041, "step": 653 }, { "epoch": 0.04468128714900595, "grad_norm": 5.771914958953857, "learning_rate": 6.996112920416317e-06, "loss": 0.4771, "step": 654 }, { "epoch": 0.044749607159937144, "grad_norm": 6.4158034324646, "learning_rate": 6.996076343176399e-06, "loss": 0.5055, "step": 655 }, { "epoch": 0.04481792717086835, "grad_norm": 7.41198205947876, "learning_rate": 6.996039594742897e-06, "loss": 0.4567, "step": 656 }, { "epoch": 0.04488624718179955, "grad_norm": 8.142203330993652, "learning_rate": 6.996002675117608e-06, "loss": 0.4964, "step": 657 }, { "epoch": 0.04495456719273075, "grad_norm": 5.3604736328125, "learning_rate": 6.995965584302342e-06, "loss": 0.4748, "step": 658 }, { "epoch": 0.04502288720366195, "grad_norm": 5.552134037017822, "learning_rate": 6.995928322298916e-06, "loss": 0.4244, "step": 659 }, { "epoch": 0.04509120721459316, "grad_norm": 5.8264641761779785, "learning_rate": 6.995890889109152e-06, "loss": 0.4855, "step": 660 }, { "epoch": 0.045159527225524354, "grad_norm": 5.567123889923096, "learning_rate": 6.995853284734883e-06, "loss": 0.5042, "step": 661 }, { "epoch": 0.04522784723645556, "grad_norm": 5.86140775680542, "learning_rate": 6.995815509177953e-06, "loss": 0.4794, "step": 662 }, { "epoch": 0.04529616724738676, "grad_norm": 6.395702838897705, "learning_rate": 6.995777562440211e-06, "loss": 0.448, "step": 663 }, { "epoch": 0.04536448725831796, "grad_norm": 6.664085865020752, "learning_rate": 6.995739444523513e-06, "loss": 0.427, "step": 664 }, { "epoch": 0.04543280726924916, "grad_norm": 5.494058609008789, "learning_rate": 6.995701155429728e-06, "loss": 0.3788, "step": 665 }, { "epoch": 0.04550112728018037, "grad_norm": 7.06205940246582, "learning_rate": 6.99566269516073e-06, "loss": 0.5119, "step": 666 }, { "epoch": 0.045569447291111564, "grad_norm": 5.169694900512695, "learning_rate": 6.995624063718402e-06, "loss": 0.4073, "step": 667 }, { "epoch": 0.04563776730204277, "grad_norm": 6.304897308349609, "learning_rate": 6.995585261104637e-06, "loss": 0.4759, "step": 668 }, { "epoch": 0.04570608731297397, "grad_norm": 9.204445838928223, "learning_rate": 6.995546287321331e-06, "loss": 0.5529, "step": 669 }, { "epoch": 0.04577440732390517, "grad_norm": 4.85435152053833, "learning_rate": 6.995507142370399e-06, "loss": 0.4351, "step": 670 }, { "epoch": 0.045842727334836374, "grad_norm": 5.473654270172119, "learning_rate": 6.995467826253753e-06, "loss": 0.537, "step": 671 }, { "epoch": 0.04591104734576758, "grad_norm": 6.599479675292969, "learning_rate": 6.995428338973319e-06, "loss": 0.5648, "step": 672 }, { "epoch": 0.045979367356698775, "grad_norm": 6.939873695373535, "learning_rate": 6.995388680531031e-06, "loss": 0.496, "step": 673 }, { "epoch": 0.04604768736762998, "grad_norm": 5.592713832855225, "learning_rate": 6.995348850928831e-06, "loss": 0.5399, "step": 674 }, { "epoch": 0.04611600737856118, "grad_norm": 5.240994930267334, "learning_rate": 6.99530885016867e-06, "loss": 0.436, "step": 675 }, { "epoch": 0.04618432738949238, "grad_norm": 5.834777355194092, "learning_rate": 6.9952686782525055e-06, "loss": 0.4831, "step": 676 }, { "epoch": 0.046252647400423584, "grad_norm": 4.758626937866211, "learning_rate": 6.9952283351823056e-06, "loss": 0.5265, "step": 677 }, { "epoch": 0.04632096741135479, "grad_norm": 7.530935764312744, "learning_rate": 6.995187820960046e-06, "loss": 0.5185, "step": 678 }, { "epoch": 0.046389287422285985, "grad_norm": 5.330808162689209, "learning_rate": 6.995147135587709e-06, "loss": 0.5382, "step": 679 }, { "epoch": 0.04645760743321719, "grad_norm": 6.1512250900268555, "learning_rate": 6.995106279067289e-06, "loss": 0.5052, "step": 680 }, { "epoch": 0.04652592744414839, "grad_norm": 6.156898498535156, "learning_rate": 6.995065251400784e-06, "loss": 0.5613, "step": 681 }, { "epoch": 0.04659424745507959, "grad_norm": 5.536029815673828, "learning_rate": 6.995024052590205e-06, "loss": 0.4349, "step": 682 }, { "epoch": 0.046662567466010794, "grad_norm": 5.038524150848389, "learning_rate": 6.994982682637569e-06, "loss": 0.4309, "step": 683 }, { "epoch": 0.046730887476942, "grad_norm": 4.104547023773193, "learning_rate": 6.994941141544902e-06, "loss": 0.3907, "step": 684 }, { "epoch": 0.046799207487873196, "grad_norm": 4.786357402801514, "learning_rate": 6.9948994293142375e-06, "loss": 0.3844, "step": 685 }, { "epoch": 0.0468675274988044, "grad_norm": 5.780428886413574, "learning_rate": 6.994857545947619e-06, "loss": 0.4219, "step": 686 }, { "epoch": 0.046935847509735604, "grad_norm": 6.284597396850586, "learning_rate": 6.994815491447095e-06, "loss": 0.4916, "step": 687 }, { "epoch": 0.0470041675206668, "grad_norm": 7.1625657081604, "learning_rate": 6.994773265814729e-06, "loss": 0.4634, "step": 688 }, { "epoch": 0.047072487531598005, "grad_norm": 5.475930690765381, "learning_rate": 6.994730869052585e-06, "loss": 0.4806, "step": 689 }, { "epoch": 0.04714080754252921, "grad_norm": 4.818758487701416, "learning_rate": 6.994688301162741e-06, "loss": 0.4673, "step": 690 }, { "epoch": 0.047209127553460406, "grad_norm": 3.720637798309326, "learning_rate": 6.994645562147279e-06, "loss": 0.3637, "step": 691 }, { "epoch": 0.04727744756439161, "grad_norm": 6.497039318084717, "learning_rate": 6.994602652008296e-06, "loss": 0.5077, "step": 692 }, { "epoch": 0.047345767575322814, "grad_norm": 3.694833278656006, "learning_rate": 6.994559570747889e-06, "loss": 0.3896, "step": 693 }, { "epoch": 0.04741408758625401, "grad_norm": 5.882482051849365, "learning_rate": 6.9945163183681695e-06, "loss": 0.4535, "step": 694 }, { "epoch": 0.047482407597185215, "grad_norm": 5.005761623382568, "learning_rate": 6.994472894871256e-06, "loss": 0.553, "step": 695 }, { "epoch": 0.04755072760811642, "grad_norm": 6.811455249786377, "learning_rate": 6.994429300259274e-06, "loss": 0.5583, "step": 696 }, { "epoch": 0.047619047619047616, "grad_norm": 8.131355285644531, "learning_rate": 6.994385534534359e-06, "loss": 0.5814, "step": 697 }, { "epoch": 0.04768736762997882, "grad_norm": 5.484442710876465, "learning_rate": 6.9943415976986515e-06, "loss": 0.5034, "step": 698 }, { "epoch": 0.047755687640910024, "grad_norm": 4.927276611328125, "learning_rate": 6.994297489754306e-06, "loss": 0.4176, "step": 699 }, { "epoch": 0.04782400765184122, "grad_norm": 6.464971542358398, "learning_rate": 6.9942532107034825e-06, "loss": 0.4332, "step": 700 }, { "epoch": 0.047892327662772426, "grad_norm": 5.103027820587158, "learning_rate": 6.994208760548346e-06, "loss": 0.5204, "step": 701 }, { "epoch": 0.04796064767370363, "grad_norm": 4.92249059677124, "learning_rate": 6.9941641392910764e-06, "loss": 0.4331, "step": 702 }, { "epoch": 0.04802896768463483, "grad_norm": 7.582304000854492, "learning_rate": 6.9941193469338565e-06, "loss": 0.5053, "step": 703 }, { "epoch": 0.04809728769556603, "grad_norm": 4.530175685882568, "learning_rate": 6.994074383478881e-06, "loss": 0.3946, "step": 704 }, { "epoch": 0.048165607706497235, "grad_norm": 4.9610700607299805, "learning_rate": 6.99402924892835e-06, "loss": 0.6032, "step": 705 }, { "epoch": 0.04823392771742843, "grad_norm": 3.8536503314971924, "learning_rate": 6.993983943284476e-06, "loss": 0.3858, "step": 706 }, { "epoch": 0.048302247728359636, "grad_norm": 4.784192085266113, "learning_rate": 6.9939384665494775e-06, "loss": 0.438, "step": 707 }, { "epoch": 0.04837056773929084, "grad_norm": 5.672399044036865, "learning_rate": 6.993892818725579e-06, "loss": 0.4811, "step": 708 }, { "epoch": 0.04843888775022204, "grad_norm": 6.353575229644775, "learning_rate": 6.993846999815016e-06, "loss": 0.4965, "step": 709 }, { "epoch": 0.04850720776115324, "grad_norm": 4.726484775543213, "learning_rate": 6.993801009820033e-06, "loss": 0.4284, "step": 710 }, { "epoch": 0.048575527772084445, "grad_norm": 5.558669090270996, "learning_rate": 6.993754848742883e-06, "loss": 0.6195, "step": 711 }, { "epoch": 0.04864384778301564, "grad_norm": 5.261433124542236, "learning_rate": 6.993708516585825e-06, "loss": 0.4578, "step": 712 }, { "epoch": 0.048712167793946846, "grad_norm": 4.503021717071533, "learning_rate": 6.993662013351129e-06, "loss": 0.4383, "step": 713 }, { "epoch": 0.04878048780487805, "grad_norm": 5.09766960144043, "learning_rate": 6.993615339041071e-06, "loss": 0.4394, "step": 714 }, { "epoch": 0.04884880781580925, "grad_norm": 6.790043830871582, "learning_rate": 6.9935684936579364e-06, "loss": 0.4686, "step": 715 }, { "epoch": 0.04891712782674045, "grad_norm": 5.399369239807129, "learning_rate": 6.9935214772040205e-06, "loss": 0.4396, "step": 716 }, { "epoch": 0.048985447837671656, "grad_norm": 6.762552738189697, "learning_rate": 6.993474289681624e-06, "loss": 0.5876, "step": 717 }, { "epoch": 0.04905376784860285, "grad_norm": 7.288950443267822, "learning_rate": 6.993426931093058e-06, "loss": 0.6516, "step": 718 }, { "epoch": 0.04912208785953406, "grad_norm": 5.603554725646973, "learning_rate": 6.993379401440643e-06, "loss": 0.4534, "step": 719 }, { "epoch": 0.04919040787046526, "grad_norm": 6.843604564666748, "learning_rate": 6.993331700726702e-06, "loss": 0.5845, "step": 720 }, { "epoch": 0.04925872788139646, "grad_norm": 3.42677640914917, "learning_rate": 6.993283828953577e-06, "loss": 0.4567, "step": 721 }, { "epoch": 0.04932704789232766, "grad_norm": 6.3785176277160645, "learning_rate": 6.993235786123607e-06, "loss": 0.4598, "step": 722 }, { "epoch": 0.049395367903258866, "grad_norm": 5.450109004974365, "learning_rate": 6.9931875722391475e-06, "loss": 0.4784, "step": 723 }, { "epoch": 0.04946368791419006, "grad_norm": 5.140190124511719, "learning_rate": 6.9931391873025584e-06, "loss": 0.4739, "step": 724 }, { "epoch": 0.04953200792512127, "grad_norm": 7.451810836791992, "learning_rate": 6.99309063131621e-06, "loss": 0.4385, "step": 725 }, { "epoch": 0.04960032793605247, "grad_norm": 5.555544376373291, "learning_rate": 6.993041904282478e-06, "loss": 0.4419, "step": 726 }, { "epoch": 0.04966864794698367, "grad_norm": 6.045748233795166, "learning_rate": 6.99299300620375e-06, "loss": 0.5575, "step": 727 }, { "epoch": 0.04973696795791487, "grad_norm": 6.659077167510986, "learning_rate": 6.992943937082419e-06, "loss": 0.5368, "step": 728 }, { "epoch": 0.049805287968846076, "grad_norm": 4.466902732849121, "learning_rate": 6.992894696920888e-06, "loss": 0.518, "step": 729 }, { "epoch": 0.049873607979777274, "grad_norm": 7.834240436553955, "learning_rate": 6.9928452857215705e-06, "loss": 0.5062, "step": 730 }, { "epoch": 0.04994192799070848, "grad_norm": 5.88982629776001, "learning_rate": 6.992795703486883e-06, "loss": 0.4817, "step": 731 }, { "epoch": 0.05001024800163968, "grad_norm": 5.054663181304932, "learning_rate": 6.992745950219256e-06, "loss": 0.4717, "step": 732 }, { "epoch": 0.05007856801257088, "grad_norm": 5.462002277374268, "learning_rate": 6.992696025921124e-06, "loss": 0.4375, "step": 733 }, { "epoch": 0.05014688802350208, "grad_norm": 7.893656253814697, "learning_rate": 6.992645930594932e-06, "loss": 0.4433, "step": 734 }, { "epoch": 0.05021520803443329, "grad_norm": 5.941608905792236, "learning_rate": 6.992595664243134e-06, "loss": 0.4936, "step": 735 }, { "epoch": 0.050283528045364484, "grad_norm": 5.450270175933838, "learning_rate": 6.992545226868189e-06, "loss": 0.5425, "step": 736 }, { "epoch": 0.05035184805629569, "grad_norm": 5.000458240509033, "learning_rate": 6.99249461847257e-06, "loss": 0.5012, "step": 737 }, { "epoch": 0.05042016806722689, "grad_norm": 7.388792037963867, "learning_rate": 6.992443839058753e-06, "loss": 0.4378, "step": 738 }, { "epoch": 0.05048848807815809, "grad_norm": 6.871339321136475, "learning_rate": 6.992392888629225e-06, "loss": 0.4141, "step": 739 }, { "epoch": 0.05055680808908929, "grad_norm": 8.260720252990723, "learning_rate": 6.9923417671864815e-06, "loss": 0.5207, "step": 740 }, { "epoch": 0.0506251281000205, "grad_norm": 5.433541774749756, "learning_rate": 6.992290474733025e-06, "loss": 0.4718, "step": 741 }, { "epoch": 0.050693448110951694, "grad_norm": 8.136115074157715, "learning_rate": 6.992239011271367e-06, "loss": 0.4521, "step": 742 }, { "epoch": 0.0507617681218829, "grad_norm": 6.713597297668457, "learning_rate": 6.992187376804028e-06, "loss": 0.4689, "step": 743 }, { "epoch": 0.0508300881328141, "grad_norm": 6.2160420417785645, "learning_rate": 6.992135571333537e-06, "loss": 0.3916, "step": 744 }, { "epoch": 0.0508984081437453, "grad_norm": 4.810196399688721, "learning_rate": 6.99208359486243e-06, "loss": 0.3839, "step": 745 }, { "epoch": 0.050966728154676504, "grad_norm": 4.957190036773682, "learning_rate": 6.992031447393252e-06, "loss": 0.4208, "step": 746 }, { "epoch": 0.05103504816560771, "grad_norm": 5.010754585266113, "learning_rate": 6.991979128928557e-06, "loss": 0.5519, "step": 747 }, { "epoch": 0.05110336817653891, "grad_norm": 4.520418643951416, "learning_rate": 6.991926639470908e-06, "loss": 0.412, "step": 748 }, { "epoch": 0.05117168818747011, "grad_norm": 6.732712268829346, "learning_rate": 6.991873979022873e-06, "loss": 0.566, "step": 749 }, { "epoch": 0.05124000819840131, "grad_norm": 4.1850762367248535, "learning_rate": 6.9918211475870305e-06, "loss": 0.3605, "step": 750 }, { "epoch": 0.05130832820933252, "grad_norm": 5.720725059509277, "learning_rate": 6.99176814516597e-06, "loss": 0.4889, "step": 751 }, { "epoch": 0.051376648220263714, "grad_norm": 6.003523349761963, "learning_rate": 6.991714971762285e-06, "loss": 0.4918, "step": 752 }, { "epoch": 0.05144496823119492, "grad_norm": 5.467438697814941, "learning_rate": 6.99166162737858e-06, "loss": 0.5058, "step": 753 }, { "epoch": 0.05151328824212612, "grad_norm": 5.133394718170166, "learning_rate": 6.991608112017467e-06, "loss": 0.393, "step": 754 }, { "epoch": 0.05158160825305732, "grad_norm": 6.131045341491699, "learning_rate": 6.991554425681567e-06, "loss": 0.4934, "step": 755 }, { "epoch": 0.05164992826398852, "grad_norm": 5.2678937911987305, "learning_rate": 6.991500568373507e-06, "loss": 0.5369, "step": 756 }, { "epoch": 0.05171824827491973, "grad_norm": 5.719157695770264, "learning_rate": 6.991446540095926e-06, "loss": 0.4527, "step": 757 }, { "epoch": 0.051786568285850924, "grad_norm": 7.726016998291016, "learning_rate": 6.99139234085147e-06, "loss": 0.5216, "step": 758 }, { "epoch": 0.05185488829678213, "grad_norm": 5.845545768737793, "learning_rate": 6.991337970642791e-06, "loss": 0.4984, "step": 759 }, { "epoch": 0.05192320830771333, "grad_norm": 5.445602893829346, "learning_rate": 6.991283429472554e-06, "loss": 0.4956, "step": 760 }, { "epoch": 0.05199152831864453, "grad_norm": 5.522208213806152, "learning_rate": 6.991228717343428e-06, "loss": 0.4951, "step": 761 }, { "epoch": 0.052059848329575734, "grad_norm": 4.748927116394043, "learning_rate": 6.991173834258092e-06, "loss": 0.5298, "step": 762 }, { "epoch": 0.05212816834050694, "grad_norm": 4.091080188751221, "learning_rate": 6.991118780219232e-06, "loss": 0.3958, "step": 763 }, { "epoch": 0.052196488351438135, "grad_norm": 5.2195963859558105, "learning_rate": 6.991063555229548e-06, "loss": 0.4694, "step": 764 }, { "epoch": 0.05226480836236934, "grad_norm": 5.733306407928467, "learning_rate": 6.991008159291742e-06, "loss": 0.4742, "step": 765 }, { "epoch": 0.05233312837330054, "grad_norm": 4.441177845001221, "learning_rate": 6.990952592408526e-06, "loss": 0.4508, "step": 766 }, { "epoch": 0.05240144838423174, "grad_norm": 5.96537971496582, "learning_rate": 6.990896854582622e-06, "loss": 0.5358, "step": 767 }, { "epoch": 0.052469768395162944, "grad_norm": 6.62312650680542, "learning_rate": 6.990840945816758e-06, "loss": 0.4623, "step": 768 }, { "epoch": 0.05253808840609415, "grad_norm": 5.60703182220459, "learning_rate": 6.990784866113673e-06, "loss": 0.5093, "step": 769 }, { "epoch": 0.052606408417025345, "grad_norm": 4.755514621734619, "learning_rate": 6.990728615476112e-06, "loss": 0.4288, "step": 770 }, { "epoch": 0.05267472842795655, "grad_norm": 5.519662380218506, "learning_rate": 6.990672193906831e-06, "loss": 0.4746, "step": 771 }, { "epoch": 0.05274304843888775, "grad_norm": 5.13151741027832, "learning_rate": 6.990615601408592e-06, "loss": 0.4408, "step": 772 }, { "epoch": 0.05281136844981895, "grad_norm": 5.947344779968262, "learning_rate": 6.990558837984165e-06, "loss": 0.4392, "step": 773 }, { "epoch": 0.052879688460750154, "grad_norm": 7.978281021118164, "learning_rate": 6.990501903636332e-06, "loss": 0.4172, "step": 774 }, { "epoch": 0.05294800847168136, "grad_norm": 5.194009304046631, "learning_rate": 6.990444798367878e-06, "loss": 0.501, "step": 775 }, { "epoch": 0.053016328482612556, "grad_norm": 5.869799613952637, "learning_rate": 6.990387522181602e-06, "loss": 0.5063, "step": 776 }, { "epoch": 0.05308464849354376, "grad_norm": 6.250855445861816, "learning_rate": 6.990330075080307e-06, "loss": 0.5199, "step": 777 }, { "epoch": 0.053152968504474964, "grad_norm": 6.556947708129883, "learning_rate": 6.9902724570668065e-06, "loss": 0.4946, "step": 778 }, { "epoch": 0.05322128851540616, "grad_norm": 5.274451732635498, "learning_rate": 6.990214668143922e-06, "loss": 0.646, "step": 779 }, { "epoch": 0.053289608526337365, "grad_norm": 4.557438373565674, "learning_rate": 6.990156708314483e-06, "loss": 0.4926, "step": 780 }, { "epoch": 0.05335792853726857, "grad_norm": 4.411280155181885, "learning_rate": 6.990098577581328e-06, "loss": 0.4234, "step": 781 }, { "epoch": 0.053426248548199766, "grad_norm": 6.701441287994385, "learning_rate": 6.990040275947303e-06, "loss": 0.6079, "step": 782 }, { "epoch": 0.05349456855913097, "grad_norm": 4.752560615539551, "learning_rate": 6.989981803415264e-06, "loss": 0.4731, "step": 783 }, { "epoch": 0.053562888570062174, "grad_norm": 7.298306465148926, "learning_rate": 6.989923159988073e-06, "loss": 0.5382, "step": 784 }, { "epoch": 0.05363120858099337, "grad_norm": 5.173901081085205, "learning_rate": 6.989864345668602e-06, "loss": 0.4875, "step": 785 }, { "epoch": 0.053699528591924575, "grad_norm": 6.052708625793457, "learning_rate": 6.989805360459732e-06, "loss": 0.4557, "step": 786 }, { "epoch": 0.05376784860285578, "grad_norm": 5.795623779296875, "learning_rate": 6.989746204364349e-06, "loss": 0.4907, "step": 787 }, { "epoch": 0.053836168613786976, "grad_norm": 6.895591735839844, "learning_rate": 6.9896868773853525e-06, "loss": 0.6047, "step": 788 }, { "epoch": 0.05390448862471818, "grad_norm": 5.170362949371338, "learning_rate": 6.989627379525644e-06, "loss": 0.4749, "step": 789 }, { "epoch": 0.053972808635649384, "grad_norm": 6.215627670288086, "learning_rate": 6.989567710788141e-06, "loss": 0.4594, "step": 790 }, { "epoch": 0.05404112864658058, "grad_norm": 4.811171531677246, "learning_rate": 6.989507871175764e-06, "loss": 0.4906, "step": 791 }, { "epoch": 0.054109448657511786, "grad_norm": 5.69701623916626, "learning_rate": 6.989447860691441e-06, "loss": 0.4804, "step": 792 }, { "epoch": 0.05417776866844299, "grad_norm": 5.480172157287598, "learning_rate": 6.989387679338113e-06, "loss": 0.4271, "step": 793 }, { "epoch": 0.05424608867937419, "grad_norm": 5.110708713531494, "learning_rate": 6.989327327118726e-06, "loss": 0.5277, "step": 794 }, { "epoch": 0.05431440869030539, "grad_norm": 5.666086673736572, "learning_rate": 6.989266804036236e-06, "loss": 0.4494, "step": 795 }, { "epoch": 0.054382728701236595, "grad_norm": 5.303852558135986, "learning_rate": 6.989206110093605e-06, "loss": 0.5562, "step": 796 }, { "epoch": 0.05445104871216779, "grad_norm": 4.515955924987793, "learning_rate": 6.989145245293807e-06, "loss": 0.4286, "step": 797 }, { "epoch": 0.054519368723098996, "grad_norm": 7.794065952301025, "learning_rate": 6.989084209639822e-06, "loss": 0.4995, "step": 798 }, { "epoch": 0.0545876887340302, "grad_norm": 4.36612606048584, "learning_rate": 6.989023003134637e-06, "loss": 0.3845, "step": 799 }, { "epoch": 0.0546560087449614, "grad_norm": 4.1150312423706055, "learning_rate": 6.988961625781251e-06, "loss": 0.4769, "step": 800 }, { "epoch": 0.0547243287558926, "grad_norm": 6.043631076812744, "learning_rate": 6.988900077582669e-06, "loss": 0.4091, "step": 801 }, { "epoch": 0.054792648766823805, "grad_norm": 3.988997220993042, "learning_rate": 6.988838358541904e-06, "loss": 0.3739, "step": 802 }, { "epoch": 0.054860968777755, "grad_norm": 3.257962226867676, "learning_rate": 6.9887764686619805e-06, "loss": 0.3553, "step": 803 }, { "epoch": 0.054929288788686206, "grad_norm": 5.108057975769043, "learning_rate": 6.988714407945926e-06, "loss": 0.5591, "step": 804 }, { "epoch": 0.05499760879961741, "grad_norm": 5.367467880249023, "learning_rate": 6.988652176396781e-06, "loss": 0.4179, "step": 805 }, { "epoch": 0.05506592881054861, "grad_norm": 7.286514759063721, "learning_rate": 6.988589774017593e-06, "loss": 0.4918, "step": 806 }, { "epoch": 0.05513424882147981, "grad_norm": 6.044273376464844, "learning_rate": 6.988527200811418e-06, "loss": 0.5055, "step": 807 }, { "epoch": 0.055202568832411016, "grad_norm": 5.828891277313232, "learning_rate": 6.988464456781319e-06, "loss": 0.5201, "step": 808 }, { "epoch": 0.05527088884334221, "grad_norm": 6.606822490692139, "learning_rate": 6.988401541930369e-06, "loss": 0.5358, "step": 809 }, { "epoch": 0.05533920885427342, "grad_norm": 6.399984836578369, "learning_rate": 6.988338456261649e-06, "loss": 0.5236, "step": 810 }, { "epoch": 0.05540752886520462, "grad_norm": 5.238390922546387, "learning_rate": 6.988275199778247e-06, "loss": 0.389, "step": 811 }, { "epoch": 0.05547584887613582, "grad_norm": 6.33929967880249, "learning_rate": 6.988211772483262e-06, "loss": 0.391, "step": 812 }, { "epoch": 0.05554416888706702, "grad_norm": 7.859992980957031, "learning_rate": 6.9881481743798e-06, "loss": 0.4565, "step": 813 }, { "epoch": 0.055612488897998226, "grad_norm": 4.806094169616699, "learning_rate": 6.9880844054709725e-06, "loss": 0.4493, "step": 814 }, { "epoch": 0.05568080890892942, "grad_norm": 7.284493446350098, "learning_rate": 6.988020465759905e-06, "loss": 0.6745, "step": 815 }, { "epoch": 0.05574912891986063, "grad_norm": 5.331704616546631, "learning_rate": 6.987956355249727e-06, "loss": 0.5612, "step": 816 }, { "epoch": 0.05581744893079183, "grad_norm": 5.754367351531982, "learning_rate": 6.9878920739435794e-06, "loss": 0.4153, "step": 817 }, { "epoch": 0.05588576894172303, "grad_norm": 5.321170330047607, "learning_rate": 6.987827621844608e-06, "loss": 0.4233, "step": 818 }, { "epoch": 0.05595408895265423, "grad_norm": 5.766734600067139, "learning_rate": 6.98776299895597e-06, "loss": 0.4714, "step": 819 }, { "epoch": 0.056022408963585436, "grad_norm": 6.10848331451416, "learning_rate": 6.987698205280828e-06, "loss": 0.4068, "step": 820 }, { "epoch": 0.056090728974516633, "grad_norm": 7.466212272644043, "learning_rate": 6.987633240822358e-06, "loss": 0.5583, "step": 821 }, { "epoch": 0.05615904898544784, "grad_norm": 6.334005355834961, "learning_rate": 6.987568105583739e-06, "loss": 0.5402, "step": 822 }, { "epoch": 0.05622736899637904, "grad_norm": 6.333431243896484, "learning_rate": 6.987502799568161e-06, "loss": 0.492, "step": 823 }, { "epoch": 0.05629568900731024, "grad_norm": 2.873946189880371, "learning_rate": 6.98743732277882e-06, "loss": 0.3773, "step": 824 }, { "epoch": 0.05636400901824144, "grad_norm": 6.52631950378418, "learning_rate": 6.987371675218926e-06, "loss": 0.5092, "step": 825 }, { "epoch": 0.05643232902917265, "grad_norm": 7.010120391845703, "learning_rate": 6.98730585689169e-06, "loss": 0.4431, "step": 826 }, { "epoch": 0.056500649040103844, "grad_norm": 4.8543782234191895, "learning_rate": 6.987239867800337e-06, "loss": 0.4396, "step": 827 }, { "epoch": 0.05656896905103505, "grad_norm": 5.386585712432861, "learning_rate": 6.9871737079480994e-06, "loss": 0.5027, "step": 828 }, { "epoch": 0.05663728906196625, "grad_norm": 4.614814281463623, "learning_rate": 6.987107377338213e-06, "loss": 0.4101, "step": 829 }, { "epoch": 0.05670560907289745, "grad_norm": 5.365667819976807, "learning_rate": 6.98704087597393e-06, "loss": 0.522, "step": 830 }, { "epoch": 0.05677392908382865, "grad_norm": 5.446089744567871, "learning_rate": 6.986974203858503e-06, "loss": 0.4769, "step": 831 }, { "epoch": 0.05684224909475986, "grad_norm": 6.733297348022461, "learning_rate": 6.986907360995199e-06, "loss": 0.5241, "step": 832 }, { "epoch": 0.056910569105691054, "grad_norm": 4.842236518859863, "learning_rate": 6.986840347387291e-06, "loss": 0.5231, "step": 833 }, { "epoch": 0.05697888911662226, "grad_norm": 5.423616886138916, "learning_rate": 6.986773163038061e-06, "loss": 0.5173, "step": 834 }, { "epoch": 0.05704720912755346, "grad_norm": 5.918302536010742, "learning_rate": 6.986705807950798e-06, "loss": 0.5966, "step": 835 }, { "epoch": 0.05711552913848466, "grad_norm": 6.512700080871582, "learning_rate": 6.9866382821288e-06, "loss": 0.5518, "step": 836 }, { "epoch": 0.057183849149415863, "grad_norm": 5.923089504241943, "learning_rate": 6.986570585575373e-06, "loss": 0.5863, "step": 837 }, { "epoch": 0.05725216916034707, "grad_norm": 4.583014488220215, "learning_rate": 6.986502718293834e-06, "loss": 0.4699, "step": 838 }, { "epoch": 0.057320489171278265, "grad_norm": 6.492140769958496, "learning_rate": 6.9864346802875035e-06, "loss": 0.4143, "step": 839 }, { "epoch": 0.05738880918220947, "grad_norm": 6.225160598754883, "learning_rate": 6.986366471559716e-06, "loss": 0.4483, "step": 840 }, { "epoch": 0.05745712919314067, "grad_norm": 4.6378655433654785, "learning_rate": 6.98629809211381e-06, "loss": 0.4773, "step": 841 }, { "epoch": 0.05752544920407187, "grad_norm": 4.273635387420654, "learning_rate": 6.9862295419531344e-06, "loss": 0.3512, "step": 842 }, { "epoch": 0.057593769215003074, "grad_norm": 6.7499918937683105, "learning_rate": 6.986160821081044e-06, "loss": 0.4543, "step": 843 }, { "epoch": 0.05766208922593428, "grad_norm": 5.864699840545654, "learning_rate": 6.986091929500907e-06, "loss": 0.5117, "step": 844 }, { "epoch": 0.057730409236865475, "grad_norm": 5.612395286560059, "learning_rate": 6.986022867216095e-06, "loss": 0.3943, "step": 845 }, { "epoch": 0.05779872924779668, "grad_norm": 4.750389099121094, "learning_rate": 6.98595363422999e-06, "loss": 0.4728, "step": 846 }, { "epoch": 0.05786704925872788, "grad_norm": 6.559749603271484, "learning_rate": 6.985884230545981e-06, "loss": 0.4674, "step": 847 }, { "epoch": 0.05793536926965908, "grad_norm": 4.66409158706665, "learning_rate": 6.9858146561674685e-06, "loss": 0.4817, "step": 848 }, { "epoch": 0.058003689280590284, "grad_norm": 4.610204219818115, "learning_rate": 6.985744911097859e-06, "loss": 0.4047, "step": 849 }, { "epoch": 0.05807200929152149, "grad_norm": 5.64168119430542, "learning_rate": 6.985674995340567e-06, "loss": 0.5215, "step": 850 }, { "epoch": 0.058140329302452685, "grad_norm": 5.072474956512451, "learning_rate": 6.985604908899018e-06, "loss": 0.4216, "step": 851 }, { "epoch": 0.05820864931338389, "grad_norm": 5.536591053009033, "learning_rate": 6.985534651776641e-06, "loss": 0.548, "step": 852 }, { "epoch": 0.058276969324315094, "grad_norm": 5.612533092498779, "learning_rate": 6.985464223976877e-06, "loss": 0.4685, "step": 853 }, { "epoch": 0.05834528933524629, "grad_norm": 5.402032375335693, "learning_rate": 6.985393625503176e-06, "loss": 0.4277, "step": 854 }, { "epoch": 0.058413609346177495, "grad_norm": 5.403318405151367, "learning_rate": 6.985322856358994e-06, "loss": 0.5064, "step": 855 }, { "epoch": 0.0584819293571087, "grad_norm": 5.977784156799316, "learning_rate": 6.985251916547798e-06, "loss": 0.4786, "step": 856 }, { "epoch": 0.058550249368039896, "grad_norm": 4.463479518890381, "learning_rate": 6.985180806073059e-06, "loss": 0.4398, "step": 857 }, { "epoch": 0.0586185693789711, "grad_norm": 4.5741682052612305, "learning_rate": 6.9851095249382614e-06, "loss": 0.522, "step": 858 }, { "epoch": 0.058686889389902304, "grad_norm": 5.973104953765869, "learning_rate": 6.9850380731468945e-06, "loss": 0.5734, "step": 859 }, { "epoch": 0.0587552094008335, "grad_norm": 4.769052028656006, "learning_rate": 6.984966450702458e-06, "loss": 0.4641, "step": 860 }, { "epoch": 0.058823529411764705, "grad_norm": 4.759701251983643, "learning_rate": 6.984894657608458e-06, "loss": 0.3942, "step": 861 }, { "epoch": 0.05889184942269591, "grad_norm": 5.304861545562744, "learning_rate": 6.984822693868411e-06, "loss": 0.3906, "step": 862 }, { "epoch": 0.058960169433627106, "grad_norm": 5.564618110656738, "learning_rate": 6.98475055948584e-06, "loss": 0.4759, "step": 863 }, { "epoch": 0.05902848944455831, "grad_norm": 6.526188850402832, "learning_rate": 6.984678254464277e-06, "loss": 0.5194, "step": 864 }, { "epoch": 0.059096809455489514, "grad_norm": 4.066537857055664, "learning_rate": 6.9846057788072635e-06, "loss": 0.4246, "step": 865 }, { "epoch": 0.05916512946642071, "grad_norm": 5.686439514160156, "learning_rate": 6.984533132518349e-06, "loss": 0.4927, "step": 866 }, { "epoch": 0.059233449477351915, "grad_norm": 5.617433071136475, "learning_rate": 6.984460315601088e-06, "loss": 0.4394, "step": 867 }, { "epoch": 0.05930176948828312, "grad_norm": 4.988180637359619, "learning_rate": 6.98438732805905e-06, "loss": 0.4827, "step": 868 }, { "epoch": 0.05937008949921432, "grad_norm": 5.747071266174316, "learning_rate": 6.984314169895807e-06, "loss": 0.498, "step": 869 }, { "epoch": 0.05943840951014552, "grad_norm": 8.16965103149414, "learning_rate": 6.98424084111494e-06, "loss": 0.507, "step": 870 }, { "epoch": 0.059506729521076725, "grad_norm": 5.424156188964844, "learning_rate": 6.984167341720042e-06, "loss": 0.3829, "step": 871 }, { "epoch": 0.05957504953200792, "grad_norm": 6.083730220794678, "learning_rate": 6.984093671714711e-06, "loss": 0.4724, "step": 872 }, { "epoch": 0.059643369542939126, "grad_norm": 4.484676361083984, "learning_rate": 6.984019831102556e-06, "loss": 0.5037, "step": 873 }, { "epoch": 0.05971168955387033, "grad_norm": 5.398575782775879, "learning_rate": 6.98394581988719e-06, "loss": 0.4829, "step": 874 }, { "epoch": 0.05978000956480153, "grad_norm": 5.141324520111084, "learning_rate": 6.983871638072239e-06, "loss": 0.5284, "step": 875 }, { "epoch": 0.05984832957573273, "grad_norm": 4.479282379150391, "learning_rate": 6.983797285661335e-06, "loss": 0.4719, "step": 876 }, { "epoch": 0.059916649586663935, "grad_norm": 5.84434700012207, "learning_rate": 6.98372276265812e-06, "loss": 0.5072, "step": 877 }, { "epoch": 0.05998496959759513, "grad_norm": 4.499395847320557, "learning_rate": 6.98364806906624e-06, "loss": 0.5068, "step": 878 }, { "epoch": 0.060053289608526336, "grad_norm": 4.360836029052734, "learning_rate": 6.983573204889356e-06, "loss": 0.4761, "step": 879 }, { "epoch": 0.06012160961945754, "grad_norm": 6.049273490905762, "learning_rate": 6.983498170131132e-06, "loss": 0.4709, "step": 880 }, { "epoch": 0.060189929630388744, "grad_norm": 5.283913612365723, "learning_rate": 6.9834229647952434e-06, "loss": 0.4861, "step": 881 }, { "epoch": 0.06025824964131994, "grad_norm": 6.273472309112549, "learning_rate": 6.983347588885371e-06, "loss": 0.5023, "step": 882 }, { "epoch": 0.060326569652251145, "grad_norm": 4.846480846405029, "learning_rate": 6.9832720424052076e-06, "loss": 0.4232, "step": 883 }, { "epoch": 0.06039488966318235, "grad_norm": 5.227074146270752, "learning_rate": 6.983196325358452e-06, "loss": 0.4804, "step": 884 }, { "epoch": 0.06046320967411355, "grad_norm": 6.235658645629883, "learning_rate": 6.983120437748812e-06, "loss": 0.4081, "step": 885 }, { "epoch": 0.06053152968504475, "grad_norm": 4.9463958740234375, "learning_rate": 6.983044379580003e-06, "loss": 0.4211, "step": 886 }, { "epoch": 0.060599849695975955, "grad_norm": 5.408195972442627, "learning_rate": 6.98296815085575e-06, "loss": 0.4149, "step": 887 }, { "epoch": 0.06066816970690715, "grad_norm": 4.926308631896973, "learning_rate": 6.9828917515797855e-06, "loss": 0.4818, "step": 888 }, { "epoch": 0.060736489717838356, "grad_norm": 4.574568271636963, "learning_rate": 6.982815181755851e-06, "loss": 0.4706, "step": 889 }, { "epoch": 0.06080480972876956, "grad_norm": 3.967928409576416, "learning_rate": 6.982738441387694e-06, "loss": 0.4298, "step": 890 }, { "epoch": 0.06087312973970076, "grad_norm": 5.181745529174805, "learning_rate": 6.982661530479074e-06, "loss": 0.3955, "step": 891 }, { "epoch": 0.06094144975063196, "grad_norm": 4.756076335906982, "learning_rate": 6.9825844490337565e-06, "loss": 0.4982, "step": 892 }, { "epoch": 0.061009769761563165, "grad_norm": 5.005542755126953, "learning_rate": 6.982507197055517e-06, "loss": 0.493, "step": 893 }, { "epoch": 0.06107808977249436, "grad_norm": 5.440253257751465, "learning_rate": 6.982429774548139e-06, "loss": 0.4745, "step": 894 }, { "epoch": 0.061146409783425566, "grad_norm": 4.87623405456543, "learning_rate": 6.9823521815154106e-06, "loss": 0.4744, "step": 895 }, { "epoch": 0.06121472979435677, "grad_norm": 5.300076961517334, "learning_rate": 6.982274417961132e-06, "loss": 0.4805, "step": 896 }, { "epoch": 0.06128304980528797, "grad_norm": 4.693041801452637, "learning_rate": 6.982196483889114e-06, "loss": 0.4829, "step": 897 }, { "epoch": 0.06135136981621917, "grad_norm": 5.244170188903809, "learning_rate": 6.98211837930317e-06, "loss": 0.451, "step": 898 }, { "epoch": 0.061419689827150376, "grad_norm": 4.624935150146484, "learning_rate": 6.982040104207126e-06, "loss": 0.5011, "step": 899 }, { "epoch": 0.06148800983808157, "grad_norm": 4.557201385498047, "learning_rate": 6.9819616586048136e-06, "loss": 0.4604, "step": 900 }, { "epoch": 0.06155632984901278, "grad_norm": 6.542797565460205, "learning_rate": 6.981883042500076e-06, "loss": 0.5749, "step": 901 }, { "epoch": 0.06162464985994398, "grad_norm": 6.3928704261779785, "learning_rate": 6.981804255896761e-06, "loss": 0.5383, "step": 902 }, { "epoch": 0.06169296987087518, "grad_norm": 4.196661949157715, "learning_rate": 6.981725298798728e-06, "loss": 0.4304, "step": 903 }, { "epoch": 0.06176128988180638, "grad_norm": 5.691556453704834, "learning_rate": 6.9816461712098415e-06, "loss": 0.4776, "step": 904 }, { "epoch": 0.061829609892737586, "grad_norm": 5.671245574951172, "learning_rate": 6.981566873133978e-06, "loss": 0.4484, "step": 905 }, { "epoch": 0.06189792990366878, "grad_norm": 5.656569957733154, "learning_rate": 6.98148740457502e-06, "loss": 0.5367, "step": 906 }, { "epoch": 0.06196624991459999, "grad_norm": 5.002378463745117, "learning_rate": 6.981407765536858e-06, "loss": 0.4266, "step": 907 }, { "epoch": 0.06203456992553119, "grad_norm": 6.26120662689209, "learning_rate": 6.981327956023391e-06, "loss": 0.4829, "step": 908 }, { "epoch": 0.06210288993646239, "grad_norm": 5.168051242828369, "learning_rate": 6.981247976038531e-06, "loss": 0.452, "step": 909 }, { "epoch": 0.06217120994739359, "grad_norm": 5.430450916290283, "learning_rate": 6.981167825586191e-06, "loss": 0.5064, "step": 910 }, { "epoch": 0.062239529958324796, "grad_norm": 5.506072044372559, "learning_rate": 6.9810875046702965e-06, "loss": 0.5549, "step": 911 }, { "epoch": 0.06230784996925599, "grad_norm": 4.093185901641846, "learning_rate": 6.981007013294781e-06, "loss": 0.3958, "step": 912 }, { "epoch": 0.0623761699801872, "grad_norm": 4.318744659423828, "learning_rate": 6.980926351463585e-06, "loss": 0.3941, "step": 913 }, { "epoch": 0.0624444899911184, "grad_norm": 5.433882236480713, "learning_rate": 6.980845519180659e-06, "loss": 0.4357, "step": 914 }, { "epoch": 0.0625128100020496, "grad_norm": 5.118538856506348, "learning_rate": 6.9807645164499614e-06, "loss": 0.5369, "step": 915 }, { "epoch": 0.0625811300129808, "grad_norm": 5.406454563140869, "learning_rate": 6.980683343275458e-06, "loss": 0.5418, "step": 916 }, { "epoch": 0.062649450023912, "grad_norm": 3.9112250804901123, "learning_rate": 6.980601999661124e-06, "loss": 0.3819, "step": 917 }, { "epoch": 0.0627177700348432, "grad_norm": 4.9855451583862305, "learning_rate": 6.980520485610943e-06, "loss": 0.4689, "step": 918 }, { "epoch": 0.0627860900457744, "grad_norm": 6.347271919250488, "learning_rate": 6.9804388011289054e-06, "loss": 0.5713, "step": 919 }, { "epoch": 0.06285441005670561, "grad_norm": 4.910754203796387, "learning_rate": 6.980356946219012e-06, "loss": 0.5326, "step": 920 }, { "epoch": 0.06292273006763681, "grad_norm": 4.860776424407959, "learning_rate": 6.980274920885272e-06, "loss": 0.5254, "step": 921 }, { "epoch": 0.062991050078568, "grad_norm": 4.532150745391846, "learning_rate": 6.9801927251316995e-06, "loss": 0.4634, "step": 922 }, { "epoch": 0.06305937008949922, "grad_norm": 4.966032028198242, "learning_rate": 6.980110358962321e-06, "loss": 0.434, "step": 923 }, { "epoch": 0.06312769010043041, "grad_norm": 5.3694915771484375, "learning_rate": 6.980027822381169e-06, "loss": 0.3336, "step": 924 }, { "epoch": 0.06319601011136161, "grad_norm": 5.669986724853516, "learning_rate": 6.979945115392286e-06, "loss": 0.4748, "step": 925 }, { "epoch": 0.06326433012229282, "grad_norm": 3.6502201557159424, "learning_rate": 6.979862237999722e-06, "loss": 0.3539, "step": 926 }, { "epoch": 0.06333265013322402, "grad_norm": 6.58615779876709, "learning_rate": 6.979779190207534e-06, "loss": 0.4494, "step": 927 }, { "epoch": 0.06340097014415522, "grad_norm": 5.949869632720947, "learning_rate": 6.979695972019788e-06, "loss": 0.4877, "step": 928 }, { "epoch": 0.06346929015508643, "grad_norm": 5.087482452392578, "learning_rate": 6.979612583440563e-06, "loss": 0.4917, "step": 929 }, { "epoch": 0.06353761016601762, "grad_norm": 4.150244235992432, "learning_rate": 6.979529024473938e-06, "loss": 0.3825, "step": 930 }, { "epoch": 0.06360593017694882, "grad_norm": 4.149338245391846, "learning_rate": 6.979445295124007e-06, "loss": 0.4578, "step": 931 }, { "epoch": 0.06367425018788003, "grad_norm": 4.621589660644531, "learning_rate": 6.979361395394869e-06, "loss": 0.4536, "step": 932 }, { "epoch": 0.06374257019881123, "grad_norm": 4.396474838256836, "learning_rate": 6.979277325290632e-06, "loss": 0.4511, "step": 933 }, { "epoch": 0.06381089020974243, "grad_norm": 5.8095011711120605, "learning_rate": 6.979193084815415e-06, "loss": 0.4996, "step": 934 }, { "epoch": 0.06387921022067364, "grad_norm": 6.109733581542969, "learning_rate": 6.97910867397334e-06, "loss": 0.5936, "step": 935 }, { "epoch": 0.06394753023160483, "grad_norm": 5.031907081604004, "learning_rate": 6.979024092768544e-06, "loss": 0.4367, "step": 936 }, { "epoch": 0.06401585024253603, "grad_norm": 3.8161332607269287, "learning_rate": 6.978939341205164e-06, "loss": 0.4249, "step": 937 }, { "epoch": 0.06408417025346724, "grad_norm": 4.290917873382568, "learning_rate": 6.978854419287353e-06, "loss": 0.4585, "step": 938 }, { "epoch": 0.06415249026439844, "grad_norm": 4.317746639251709, "learning_rate": 6.978769327019269e-06, "loss": 0.4651, "step": 939 }, { "epoch": 0.06422081027532964, "grad_norm": 5.911868572235107, "learning_rate": 6.9786840644050795e-06, "loss": 0.5437, "step": 940 }, { "epoch": 0.06428913028626085, "grad_norm": 4.347911834716797, "learning_rate": 6.978598631448958e-06, "loss": 0.3901, "step": 941 }, { "epoch": 0.06435745029719205, "grad_norm": 3.937730550765991, "learning_rate": 6.978513028155089e-06, "loss": 0.3824, "step": 942 }, { "epoch": 0.06442577030812324, "grad_norm": 5.299707412719727, "learning_rate": 6.978427254527664e-06, "loss": 0.4364, "step": 943 }, { "epoch": 0.06449409031905445, "grad_norm": 5.041306972503662, "learning_rate": 6.978341310570884e-06, "loss": 0.4542, "step": 944 }, { "epoch": 0.06456241032998565, "grad_norm": 6.875980377197266, "learning_rate": 6.9782551962889546e-06, "loss": 0.4427, "step": 945 }, { "epoch": 0.06463073034091685, "grad_norm": 5.544386386871338, "learning_rate": 6.978168911686096e-06, "loss": 0.3757, "step": 946 }, { "epoch": 0.06469905035184806, "grad_norm": 4.9012017250061035, "learning_rate": 6.978082456766531e-06, "loss": 0.4448, "step": 947 }, { "epoch": 0.06476737036277926, "grad_norm": 5.388370990753174, "learning_rate": 6.977995831534495e-06, "loss": 0.5015, "step": 948 }, { "epoch": 0.06483569037371047, "grad_norm": 4.372318267822266, "learning_rate": 6.977909035994228e-06, "loss": 0.4723, "step": 949 }, { "epoch": 0.06490401038464166, "grad_norm": 5.218762397766113, "learning_rate": 6.97782207014998e-06, "loss": 0.4507, "step": 950 }, { "epoch": 0.06497233039557286, "grad_norm": 7.4524736404418945, "learning_rate": 6.977734934006011e-06, "loss": 0.5222, "step": 951 }, { "epoch": 0.06504065040650407, "grad_norm": 4.649774074554443, "learning_rate": 6.977647627566588e-06, "loss": 0.4469, "step": 952 }, { "epoch": 0.06510897041743527, "grad_norm": 4.385945796966553, "learning_rate": 6.977560150835985e-06, "loss": 0.4481, "step": 953 }, { "epoch": 0.06517729042836647, "grad_norm": 4.134610652923584, "learning_rate": 6.977472503818485e-06, "loss": 0.3563, "step": 954 }, { "epoch": 0.06524561043929768, "grad_norm": 4.373342990875244, "learning_rate": 6.977384686518382e-06, "loss": 0.4382, "step": 955 }, { "epoch": 0.06531393045022887, "grad_norm": 5.6946797370910645, "learning_rate": 6.977296698939973e-06, "loss": 0.5498, "step": 956 }, { "epoch": 0.06538225046116007, "grad_norm": 3.881970167160034, "learning_rate": 6.97720854108757e-06, "loss": 0.374, "step": 957 }, { "epoch": 0.06545057047209128, "grad_norm": 4.965295314788818, "learning_rate": 6.9771202129654885e-06, "loss": 0.4957, "step": 958 }, { "epoch": 0.06551889048302248, "grad_norm": 4.226462364196777, "learning_rate": 6.9770317145780525e-06, "loss": 0.4152, "step": 959 }, { "epoch": 0.06558721049395368, "grad_norm": 5.636233806610107, "learning_rate": 6.976943045929595e-06, "loss": 0.3612, "step": 960 }, { "epoch": 0.06565553050488489, "grad_norm": 5.623170852661133, "learning_rate": 6.976854207024463e-06, "loss": 0.5679, "step": 961 }, { "epoch": 0.06572385051581608, "grad_norm": 5.362651824951172, "learning_rate": 6.976765197867001e-06, "loss": 0.583, "step": 962 }, { "epoch": 0.06579217052674728, "grad_norm": 5.219436168670654, "learning_rate": 6.976676018461568e-06, "loss": 0.4431, "step": 963 }, { "epoch": 0.06586049053767849, "grad_norm": 3.795285224914551, "learning_rate": 6.976586668812534e-06, "loss": 0.4396, "step": 964 }, { "epoch": 0.06592881054860969, "grad_norm": 5.328129291534424, "learning_rate": 6.976497148924273e-06, "loss": 0.463, "step": 965 }, { "epoch": 0.06599713055954089, "grad_norm": 4.052913188934326, "learning_rate": 6.976407458801168e-06, "loss": 0.4433, "step": 966 }, { "epoch": 0.0660654505704721, "grad_norm": 5.629720687866211, "learning_rate": 6.976317598447611e-06, "loss": 0.4383, "step": 967 }, { "epoch": 0.0661337705814033, "grad_norm": 4.617870807647705, "learning_rate": 6.976227567868003e-06, "loss": 0.554, "step": 968 }, { "epoch": 0.06620209059233449, "grad_norm": 5.106689453125, "learning_rate": 6.976137367066751e-06, "loss": 0.358, "step": 969 }, { "epoch": 0.0662704106032657, "grad_norm": 4.589452743530273, "learning_rate": 6.976046996048274e-06, "loss": 0.3906, "step": 970 }, { "epoch": 0.0663387306141969, "grad_norm": 5.119375705718994, "learning_rate": 6.975956454816994e-06, "loss": 0.6419, "step": 971 }, { "epoch": 0.0664070506251281, "grad_norm": 4.320983409881592, "learning_rate": 6.9758657433773484e-06, "loss": 0.3608, "step": 972 }, { "epoch": 0.06647537063605931, "grad_norm": 4.763766765594482, "learning_rate": 6.9757748617337765e-06, "loss": 0.546, "step": 973 }, { "epoch": 0.0665436906469905, "grad_norm": 4.278682708740234, "learning_rate": 6.97568380989073e-06, "loss": 0.4146, "step": 974 }, { "epoch": 0.0666120106579217, "grad_norm": 4.547045707702637, "learning_rate": 6.975592587852665e-06, "loss": 0.4193, "step": 975 }, { "epoch": 0.06668033066885291, "grad_norm": 7.809145927429199, "learning_rate": 6.975501195624052e-06, "loss": 0.6475, "step": 976 }, { "epoch": 0.06674865067978411, "grad_norm": 6.158968448638916, "learning_rate": 6.975409633209364e-06, "loss": 0.4968, "step": 977 }, { "epoch": 0.06681697069071531, "grad_norm": 6.328779220581055, "learning_rate": 6.975317900613084e-06, "loss": 0.5229, "step": 978 }, { "epoch": 0.06688529070164652, "grad_norm": 5.684151649475098, "learning_rate": 6.975225997839706e-06, "loss": 0.418, "step": 979 }, { "epoch": 0.06695361071257772, "grad_norm": 4.940898895263672, "learning_rate": 6.975133924893729e-06, "loss": 0.4138, "step": 980 }, { "epoch": 0.06702193072350891, "grad_norm": 5.123105525970459, "learning_rate": 6.975041681779662e-06, "loss": 0.4126, "step": 981 }, { "epoch": 0.06709025073444012, "grad_norm": 5.354041576385498, "learning_rate": 6.974949268502021e-06, "loss": 0.4569, "step": 982 }, { "epoch": 0.06715857074537132, "grad_norm": 5.966973781585693, "learning_rate": 6.974856685065332e-06, "loss": 0.4385, "step": 983 }, { "epoch": 0.06722689075630252, "grad_norm": 5.555461406707764, "learning_rate": 6.974763931474128e-06, "loss": 0.4128, "step": 984 }, { "epoch": 0.06729521076723373, "grad_norm": 5.8496904373168945, "learning_rate": 6.974671007732951e-06, "loss": 0.5607, "step": 985 }, { "epoch": 0.06736353077816493, "grad_norm": 4.248170375823975, "learning_rate": 6.974577913846353e-06, "loss": 0.4437, "step": 986 }, { "epoch": 0.06743185078909612, "grad_norm": 6.146648406982422, "learning_rate": 6.97448464981889e-06, "loss": 0.5744, "step": 987 }, { "epoch": 0.06750017080002733, "grad_norm": 5.479284286499023, "learning_rate": 6.97439121565513e-06, "loss": 0.4253, "step": 988 }, { "epoch": 0.06756849081095853, "grad_norm": 6.048396587371826, "learning_rate": 6.974297611359649e-06, "loss": 0.4949, "step": 989 }, { "epoch": 0.06763681082188973, "grad_norm": 5.183492660522461, "learning_rate": 6.9742038369370285e-06, "loss": 0.4183, "step": 990 }, { "epoch": 0.06770513083282094, "grad_norm": 6.4652018547058105, "learning_rate": 6.974109892391863e-06, "loss": 0.5803, "step": 991 }, { "epoch": 0.06777345084375214, "grad_norm": 4.580708980560303, "learning_rate": 6.97401577772875e-06, "loss": 0.4663, "step": 992 }, { "epoch": 0.06784177085468333, "grad_norm": 6.379363536834717, "learning_rate": 6.9739214929523e-06, "loss": 0.5024, "step": 993 }, { "epoch": 0.06791009086561454, "grad_norm": 6.137452125549316, "learning_rate": 6.973827038067129e-06, "loss": 0.4737, "step": 994 }, { "epoch": 0.06797841087654574, "grad_norm": 5.576153755187988, "learning_rate": 6.973732413077863e-06, "loss": 0.4837, "step": 995 }, { "epoch": 0.06804673088747694, "grad_norm": 4.694103240966797, "learning_rate": 6.973637617989135e-06, "loss": 0.405, "step": 996 }, { "epoch": 0.06811505089840815, "grad_norm": 4.866527080535889, "learning_rate": 6.973542652805586e-06, "loss": 0.4638, "step": 997 }, { "epoch": 0.06818337090933935, "grad_norm": 3.678528308868408, "learning_rate": 6.973447517531868e-06, "loss": 0.3689, "step": 998 }, { "epoch": 0.06825169092027054, "grad_norm": 4.800528526306152, "learning_rate": 6.973352212172638e-06, "loss": 0.5105, "step": 999 }, { "epoch": 0.06832001093120176, "grad_norm": 4.974269390106201, "learning_rate": 6.9732567367325645e-06, "loss": 0.41, "step": 1000 }, { "epoch": 0.06838833094213295, "grad_norm": 5.583682537078857, "learning_rate": 6.973161091216321e-06, "loss": 0.4526, "step": 1001 }, { "epoch": 0.06845665095306415, "grad_norm": 6.187911033630371, "learning_rate": 6.9730652756285905e-06, "loss": 0.4695, "step": 1002 }, { "epoch": 0.06852497096399536, "grad_norm": 5.139835357666016, "learning_rate": 6.972969289974067e-06, "loss": 0.4726, "step": 1003 }, { "epoch": 0.06859329097492656, "grad_norm": 6.467377185821533, "learning_rate": 6.97287313425745e-06, "loss": 0.4134, "step": 1004 }, { "epoch": 0.06866161098585775, "grad_norm": 4.5241546630859375, "learning_rate": 6.972776808483447e-06, "loss": 0.5141, "step": 1005 }, { "epoch": 0.06872993099678897, "grad_norm": 4.507420539855957, "learning_rate": 6.972680312656776e-06, "loss": 0.4143, "step": 1006 }, { "epoch": 0.06879825100772016, "grad_norm": 5.2317399978637695, "learning_rate": 6.972583646782161e-06, "loss": 0.5369, "step": 1007 }, { "epoch": 0.06886657101865136, "grad_norm": 6.116033554077148, "learning_rate": 6.972486810864336e-06, "loss": 0.4215, "step": 1008 }, { "epoch": 0.06893489102958257, "grad_norm": 4.915561676025391, "learning_rate": 6.972389804908043e-06, "loss": 0.5074, "step": 1009 }, { "epoch": 0.06900321104051377, "grad_norm": 6.491398334503174, "learning_rate": 6.972292628918031e-06, "loss": 0.4514, "step": 1010 }, { "epoch": 0.06907153105144496, "grad_norm": 5.508046627044678, "learning_rate": 6.9721952828990615e-06, "loss": 0.4222, "step": 1011 }, { "epoch": 0.06913985106237618, "grad_norm": 6.003370761871338, "learning_rate": 6.972097766855898e-06, "loss": 0.3935, "step": 1012 }, { "epoch": 0.06920817107330737, "grad_norm": 6.596052646636963, "learning_rate": 6.972000080793317e-06, "loss": 0.5239, "step": 1013 }, { "epoch": 0.06927649108423857, "grad_norm": 6.0664448738098145, "learning_rate": 6.971902224716102e-06, "loss": 0.5458, "step": 1014 }, { "epoch": 0.06934481109516978, "grad_norm": 5.178022384643555, "learning_rate": 6.971804198629044e-06, "loss": 0.4352, "step": 1015 }, { "epoch": 0.06941313110610098, "grad_norm": 5.295454978942871, "learning_rate": 6.9717060025369436e-06, "loss": 0.4677, "step": 1016 }, { "epoch": 0.06948145111703218, "grad_norm": 5.225893020629883, "learning_rate": 6.97160763644461e-06, "loss": 0.5402, "step": 1017 }, { "epoch": 0.06954977112796339, "grad_norm": 4.392914295196533, "learning_rate": 6.971509100356858e-06, "loss": 0.4386, "step": 1018 }, { "epoch": 0.06961809113889458, "grad_norm": 4.1377272605896, "learning_rate": 6.971410394278514e-06, "loss": 0.4611, "step": 1019 }, { "epoch": 0.06968641114982578, "grad_norm": 6.031479358673096, "learning_rate": 6.971311518214411e-06, "loss": 0.6168, "step": 1020 }, { "epoch": 0.06975473116075699, "grad_norm": 5.80026388168335, "learning_rate": 6.971212472169391e-06, "loss": 0.4738, "step": 1021 }, { "epoch": 0.06982305117168819, "grad_norm": 6.943430423736572, "learning_rate": 6.971113256148304e-06, "loss": 0.4862, "step": 1022 }, { "epoch": 0.06989137118261939, "grad_norm": 5.271905899047852, "learning_rate": 6.971013870156008e-06, "loss": 0.4502, "step": 1023 }, { "epoch": 0.0699596911935506, "grad_norm": 5.670677661895752, "learning_rate": 6.97091431419737e-06, "loss": 0.521, "step": 1024 }, { "epoch": 0.0700280112044818, "grad_norm": 5.07114315032959, "learning_rate": 6.970814588277265e-06, "loss": 0.6034, "step": 1025 }, { "epoch": 0.07009633121541299, "grad_norm": 4.776607513427734, "learning_rate": 6.970714692400575e-06, "loss": 0.3852, "step": 1026 }, { "epoch": 0.0701646512263442, "grad_norm": 6.144665241241455, "learning_rate": 6.970614626572194e-06, "loss": 0.5077, "step": 1027 }, { "epoch": 0.0702329712372754, "grad_norm": 4.623538494110107, "learning_rate": 6.97051439079702e-06, "loss": 0.4703, "step": 1028 }, { "epoch": 0.0703012912482066, "grad_norm": 4.253912448883057, "learning_rate": 6.970413985079964e-06, "loss": 0.489, "step": 1029 }, { "epoch": 0.07036961125913781, "grad_norm": 4.449839115142822, "learning_rate": 6.970313409425939e-06, "loss": 0.4234, "step": 1030 }, { "epoch": 0.070437931270069, "grad_norm": 5.722070693969727, "learning_rate": 6.97021266383987e-06, "loss": 0.4179, "step": 1031 }, { "epoch": 0.0705062512810002, "grad_norm": 5.63511848449707, "learning_rate": 6.970111748326694e-06, "loss": 0.4723, "step": 1032 }, { "epoch": 0.07057457129193141, "grad_norm": 4.257573127746582, "learning_rate": 6.970010662891351e-06, "loss": 0.5019, "step": 1033 }, { "epoch": 0.07064289130286261, "grad_norm": 5.844900131225586, "learning_rate": 6.969909407538788e-06, "loss": 0.695, "step": 1034 }, { "epoch": 0.0707112113137938, "grad_norm": 5.0263895988464355, "learning_rate": 6.969807982273966e-06, "loss": 0.4855, "step": 1035 }, { "epoch": 0.07077953132472502, "grad_norm": 6.9580535888671875, "learning_rate": 6.969706387101851e-06, "loss": 0.593, "step": 1036 }, { "epoch": 0.07084785133565621, "grad_norm": 6.4573235511779785, "learning_rate": 6.9696046220274195e-06, "loss": 0.4452, "step": 1037 }, { "epoch": 0.07091617134658741, "grad_norm": 7.271811485290527, "learning_rate": 6.969502687055651e-06, "loss": 0.4766, "step": 1038 }, { "epoch": 0.07098449135751862, "grad_norm": 5.250948905944824, "learning_rate": 6.96940058219154e-06, "loss": 0.5051, "step": 1039 }, { "epoch": 0.07105281136844982, "grad_norm": 5.226772308349609, "learning_rate": 6.9692983074400855e-06, "loss": 0.5521, "step": 1040 }, { "epoch": 0.07112113137938102, "grad_norm": 4.874975204467773, "learning_rate": 6.969195862806295e-06, "loss": 0.4149, "step": 1041 }, { "epoch": 0.07118945139031223, "grad_norm": 5.247952938079834, "learning_rate": 6.969093248295184e-06, "loss": 0.3999, "step": 1042 }, { "epoch": 0.07125777140124342, "grad_norm": 5.567117691040039, "learning_rate": 6.9689904639117805e-06, "loss": 0.4595, "step": 1043 }, { "epoch": 0.07132609141217462, "grad_norm": 3.9168922901153564, "learning_rate": 6.968887509661115e-06, "loss": 0.4021, "step": 1044 }, { "epoch": 0.07139441142310583, "grad_norm": 4.515679836273193, "learning_rate": 6.96878438554823e-06, "loss": 0.4295, "step": 1045 }, { "epoch": 0.07146273143403703, "grad_norm": 5.128707408905029, "learning_rate": 6.9686810915781745e-06, "loss": 0.4551, "step": 1046 }, { "epoch": 0.07153105144496823, "grad_norm": 7.2604570388793945, "learning_rate": 6.9685776277560065e-06, "loss": 0.4817, "step": 1047 }, { "epoch": 0.07159937145589944, "grad_norm": 5.3176445960998535, "learning_rate": 6.9684739940867935e-06, "loss": 0.4438, "step": 1048 }, { "epoch": 0.07166769146683064, "grad_norm": 5.067906856536865, "learning_rate": 6.9683701905756086e-06, "loss": 0.4145, "step": 1049 }, { "epoch": 0.07173601147776183, "grad_norm": 4.448107719421387, "learning_rate": 6.968266217227535e-06, "loss": 0.4317, "step": 1050 }, { "epoch": 0.07180433148869304, "grad_norm": 6.091123580932617, "learning_rate": 6.968162074047665e-06, "loss": 0.5858, "step": 1051 }, { "epoch": 0.07187265149962424, "grad_norm": 5.304074764251709, "learning_rate": 6.968057761041099e-06, "loss": 0.4881, "step": 1052 }, { "epoch": 0.07194097151055544, "grad_norm": 5.166040420532227, "learning_rate": 6.967953278212942e-06, "loss": 0.5134, "step": 1053 }, { "epoch": 0.07200929152148665, "grad_norm": 5.831033706665039, "learning_rate": 6.967848625568313e-06, "loss": 0.4799, "step": 1054 }, { "epoch": 0.07207761153241785, "grad_norm": 4.391088008880615, "learning_rate": 6.967743803112334e-06, "loss": 0.4478, "step": 1055 }, { "epoch": 0.07214593154334904, "grad_norm": 6.43676233291626, "learning_rate": 6.967638810850141e-06, "loss": 0.5688, "step": 1056 }, { "epoch": 0.07221425155428025, "grad_norm": 5.920970439910889, "learning_rate": 6.967533648786873e-06, "loss": 0.5293, "step": 1057 }, { "epoch": 0.07228257156521145, "grad_norm": 4.4688401222229, "learning_rate": 6.96742831692768e-06, "loss": 0.4356, "step": 1058 }, { "epoch": 0.07235089157614265, "grad_norm": 6.602320671081543, "learning_rate": 6.96732281527772e-06, "loss": 0.5823, "step": 1059 }, { "epoch": 0.07241921158707386, "grad_norm": 6.659129619598389, "learning_rate": 6.9672171438421574e-06, "loss": 0.4866, "step": 1060 }, { "epoch": 0.07248753159800506, "grad_norm": 5.116439342498779, "learning_rate": 6.96711130262617e-06, "loss": 0.5102, "step": 1061 }, { "epoch": 0.07255585160893625, "grad_norm": 4.912674427032471, "learning_rate": 6.967005291634939e-06, "loss": 0.5287, "step": 1062 }, { "epoch": 0.07262417161986746, "grad_norm": 7.278204441070557, "learning_rate": 6.966899110873654e-06, "loss": 0.4137, "step": 1063 }, { "epoch": 0.07269249163079866, "grad_norm": 7.310492992401123, "learning_rate": 6.966792760347516e-06, "loss": 0.4918, "step": 1064 }, { "epoch": 0.07276081164172986, "grad_norm": 4.872712135314941, "learning_rate": 6.966686240061734e-06, "loss": 0.4382, "step": 1065 }, { "epoch": 0.07282913165266107, "grad_norm": 3.7145795822143555, "learning_rate": 6.9665795500215215e-06, "loss": 0.389, "step": 1066 }, { "epoch": 0.07289745166359227, "grad_norm": 5.154821395874023, "learning_rate": 6.966472690232104e-06, "loss": 0.4047, "step": 1067 }, { "epoch": 0.07296577167452346, "grad_norm": 4.190892219543457, "learning_rate": 6.966365660698712e-06, "loss": 0.3485, "step": 1068 }, { "epoch": 0.07303409168545467, "grad_norm": 4.450643539428711, "learning_rate": 6.966258461426591e-06, "loss": 0.462, "step": 1069 }, { "epoch": 0.07310241169638587, "grad_norm": 4.793623924255371, "learning_rate": 6.966151092420986e-06, "loss": 0.4905, "step": 1070 }, { "epoch": 0.07317073170731707, "grad_norm": 5.178919792175293, "learning_rate": 6.966043553687156e-06, "loss": 0.5013, "step": 1071 }, { "epoch": 0.07323905171824828, "grad_norm": 4.20626163482666, "learning_rate": 6.9659358452303675e-06, "loss": 0.4104, "step": 1072 }, { "epoch": 0.07330737172917948, "grad_norm": 7.403985023498535, "learning_rate": 6.9658279670558955e-06, "loss": 0.7068, "step": 1073 }, { "epoch": 0.07337569174011067, "grad_norm": 5.46896505355835, "learning_rate": 6.965719919169019e-06, "loss": 0.5502, "step": 1074 }, { "epoch": 0.07344401175104188, "grad_norm": 5.289668083190918, "learning_rate": 6.965611701575033e-06, "loss": 0.4897, "step": 1075 }, { "epoch": 0.07351233176197308, "grad_norm": 5.238574028015137, "learning_rate": 6.965503314279234e-06, "loss": 0.4208, "step": 1076 }, { "epoch": 0.07358065177290428, "grad_norm": 5.520150184631348, "learning_rate": 6.96539475728693e-06, "loss": 0.5227, "step": 1077 }, { "epoch": 0.07364897178383549, "grad_norm": 4.895176887512207, "learning_rate": 6.965286030603437e-06, "loss": 0.3641, "step": 1078 }, { "epoch": 0.07371729179476669, "grad_norm": 4.781255722045898, "learning_rate": 6.9651771342340785e-06, "loss": 0.4019, "step": 1079 }, { "epoch": 0.07378561180569788, "grad_norm": 4.565423965454102, "learning_rate": 6.9650680681841876e-06, "loss": 0.4851, "step": 1080 }, { "epoch": 0.0738539318166291, "grad_norm": 3.3624045848846436, "learning_rate": 6.964958832459105e-06, "loss": 0.4514, "step": 1081 }, { "epoch": 0.07392225182756029, "grad_norm": 5.590386390686035, "learning_rate": 6.964849427064179e-06, "loss": 0.4675, "step": 1082 }, { "epoch": 0.07399057183849149, "grad_norm": 5.199221134185791, "learning_rate": 6.964739852004768e-06, "loss": 0.4603, "step": 1083 }, { "epoch": 0.0740588918494227, "grad_norm": 5.295738220214844, "learning_rate": 6.964630107286236e-06, "loss": 0.4299, "step": 1084 }, { "epoch": 0.0741272118603539, "grad_norm": 6.301430702209473, "learning_rate": 6.964520192913959e-06, "loss": 0.6284, "step": 1085 }, { "epoch": 0.0741955318712851, "grad_norm": 6.622217655181885, "learning_rate": 6.964410108893316e-06, "loss": 0.4595, "step": 1086 }, { "epoch": 0.0742638518822163, "grad_norm": 4.83389949798584, "learning_rate": 6.9642998552297005e-06, "loss": 0.4532, "step": 1087 }, { "epoch": 0.0743321718931475, "grad_norm": 7.193907260894775, "learning_rate": 6.964189431928511e-06, "loss": 0.4925, "step": 1088 }, { "epoch": 0.0744004919040787, "grad_norm": 5.180975437164307, "learning_rate": 6.964078838995153e-06, "loss": 0.405, "step": 1089 }, { "epoch": 0.07446881191500991, "grad_norm": 4.5274834632873535, "learning_rate": 6.963968076435044e-06, "loss": 0.3803, "step": 1090 }, { "epoch": 0.07453713192594111, "grad_norm": 6.150032043457031, "learning_rate": 6.963857144253606e-06, "loss": 0.4616, "step": 1091 }, { "epoch": 0.0746054519368723, "grad_norm": 5.069127559661865, "learning_rate": 6.963746042456272e-06, "loss": 0.4128, "step": 1092 }, { "epoch": 0.07467377194780352, "grad_norm": 6.8901519775390625, "learning_rate": 6.963634771048481e-06, "loss": 0.4905, "step": 1093 }, { "epoch": 0.07474209195873471, "grad_norm": 4.511608600616455, "learning_rate": 6.963523330035684e-06, "loss": 0.5137, "step": 1094 }, { "epoch": 0.07481041196966591, "grad_norm": 4.947923183441162, "learning_rate": 6.963411719423337e-06, "loss": 0.4272, "step": 1095 }, { "epoch": 0.07487873198059712, "grad_norm": 4.139679908752441, "learning_rate": 6.963299939216905e-06, "loss": 0.465, "step": 1096 }, { "epoch": 0.07494705199152832, "grad_norm": 6.947859287261963, "learning_rate": 6.96318798942186e-06, "loss": 0.4386, "step": 1097 }, { "epoch": 0.07501537200245952, "grad_norm": 7.879364490509033, "learning_rate": 6.963075870043687e-06, "loss": 0.4231, "step": 1098 }, { "epoch": 0.07508369201339073, "grad_norm": 4.996697425842285, "learning_rate": 6.962963581087873e-06, "loss": 0.458, "step": 1099 }, { "epoch": 0.07515201202432192, "grad_norm": 5.209440231323242, "learning_rate": 6.9628511225599195e-06, "loss": 0.5509, "step": 1100 }, { "epoch": 0.07522033203525312, "grad_norm": 5.896338939666748, "learning_rate": 6.9627384944653315e-06, "loss": 0.445, "step": 1101 }, { "epoch": 0.07528865204618433, "grad_norm": 5.571317672729492, "learning_rate": 6.962625696809625e-06, "loss": 0.4487, "step": 1102 }, { "epoch": 0.07535697205711553, "grad_norm": 6.474515914916992, "learning_rate": 6.962512729598323e-06, "loss": 0.4742, "step": 1103 }, { "epoch": 0.07542529206804673, "grad_norm": 6.413109302520752, "learning_rate": 6.9623995928369565e-06, "loss": 0.4338, "step": 1104 }, { "epoch": 0.07549361207897794, "grad_norm": 4.119969844818115, "learning_rate": 6.962286286531067e-06, "loss": 0.4263, "step": 1105 }, { "epoch": 0.07556193208990913, "grad_norm": 5.579270839691162, "learning_rate": 6.962172810686201e-06, "loss": 0.5005, "step": 1106 }, { "epoch": 0.07563025210084033, "grad_norm": 5.475287437438965, "learning_rate": 6.962059165307916e-06, "loss": 0.4863, "step": 1107 }, { "epoch": 0.07569857211177154, "grad_norm": 6.641391277313232, "learning_rate": 6.961945350401778e-06, "loss": 0.4959, "step": 1108 }, { "epoch": 0.07576689212270274, "grad_norm": 6.084031581878662, "learning_rate": 6.96183136597336e-06, "loss": 0.4937, "step": 1109 }, { "epoch": 0.07583521213363394, "grad_norm": 5.2354817390441895, "learning_rate": 6.961717212028241e-06, "loss": 0.4394, "step": 1110 }, { "epoch": 0.07590353214456515, "grad_norm": 5.339702129364014, "learning_rate": 6.961602888572012e-06, "loss": 0.4018, "step": 1111 }, { "epoch": 0.07597185215549634, "grad_norm": 5.342970371246338, "learning_rate": 6.9614883956102725e-06, "loss": 0.4997, "step": 1112 }, { "epoch": 0.07604017216642754, "grad_norm": 5.023445129394531, "learning_rate": 6.961373733148629e-06, "loss": 0.4543, "step": 1113 }, { "epoch": 0.07610849217735875, "grad_norm": 3.86006760597229, "learning_rate": 6.961258901192694e-06, "loss": 0.414, "step": 1114 }, { "epoch": 0.07617681218828995, "grad_norm": 5.548669815063477, "learning_rate": 6.961143899748092e-06, "loss": 0.4653, "step": 1115 }, { "epoch": 0.07624513219922115, "grad_norm": 5.295998573303223, "learning_rate": 6.961028728820455e-06, "loss": 0.4584, "step": 1116 }, { "epoch": 0.07631345221015236, "grad_norm": 5.366547107696533, "learning_rate": 6.960913388415421e-06, "loss": 0.4411, "step": 1117 }, { "epoch": 0.07638177222108355, "grad_norm": 5.404208660125732, "learning_rate": 6.960797878538639e-06, "loss": 0.4975, "step": 1118 }, { "epoch": 0.07645009223201475, "grad_norm": 4.917410850524902, "learning_rate": 6.960682199195764e-06, "loss": 0.4386, "step": 1119 }, { "epoch": 0.07651841224294596, "grad_norm": 6.291979789733887, "learning_rate": 6.960566350392462e-06, "loss": 0.5387, "step": 1120 }, { "epoch": 0.07658673225387716, "grad_norm": 4.972846508026123, "learning_rate": 6.960450332134406e-06, "loss": 0.4131, "step": 1121 }, { "epoch": 0.07665505226480836, "grad_norm": 4.575351715087891, "learning_rate": 6.960334144427275e-06, "loss": 0.4681, "step": 1122 }, { "epoch": 0.07672337227573957, "grad_norm": 4.945193767547607, "learning_rate": 6.96021778727676e-06, "loss": 0.4184, "step": 1123 }, { "epoch": 0.07679169228667077, "grad_norm": 5.720283031463623, "learning_rate": 6.9601012606885586e-06, "loss": 0.4007, "step": 1124 }, { "epoch": 0.07686001229760196, "grad_norm": 5.307750225067139, "learning_rate": 6.9599845646683765e-06, "loss": 0.4127, "step": 1125 }, { "epoch": 0.07692833230853317, "grad_norm": 6.313582897186279, "learning_rate": 6.9598676992219284e-06, "loss": 0.5017, "step": 1126 }, { "epoch": 0.07699665231946437, "grad_norm": 4.082217216491699, "learning_rate": 6.959750664354935e-06, "loss": 0.4132, "step": 1127 }, { "epoch": 0.07706497233039557, "grad_norm": 4.96771764755249, "learning_rate": 6.95963346007313e-06, "loss": 0.5927, "step": 1128 }, { "epoch": 0.07713329234132678, "grad_norm": 4.743508338928223, "learning_rate": 6.9595160863822526e-06, "loss": 0.4023, "step": 1129 }, { "epoch": 0.07720161235225798, "grad_norm": 5.030421733856201, "learning_rate": 6.959398543288047e-06, "loss": 0.4757, "step": 1130 }, { "epoch": 0.07726993236318917, "grad_norm": 4.966275691986084, "learning_rate": 6.959280830796272e-06, "loss": 0.3537, "step": 1131 }, { "epoch": 0.07733825237412038, "grad_norm": 6.186814785003662, "learning_rate": 6.95916294891269e-06, "loss": 0.4379, "step": 1132 }, { "epoch": 0.07740657238505158, "grad_norm": 5.713267803192139, "learning_rate": 6.9590448976430745e-06, "loss": 0.4183, "step": 1133 }, { "epoch": 0.07747489239598278, "grad_norm": 5.093454837799072, "learning_rate": 6.958926676993206e-06, "loss": 0.4731, "step": 1134 }, { "epoch": 0.07754321240691399, "grad_norm": 4.370391368865967, "learning_rate": 6.958808286968873e-06, "loss": 0.4316, "step": 1135 }, { "epoch": 0.07761153241784519, "grad_norm": 5.165853023529053, "learning_rate": 6.958689727575872e-06, "loss": 0.5251, "step": 1136 }, { "epoch": 0.07767985242877638, "grad_norm": 5.28877592086792, "learning_rate": 6.958570998820011e-06, "loss": 0.4582, "step": 1137 }, { "epoch": 0.0777481724397076, "grad_norm": 3.215847969055176, "learning_rate": 6.9584521007071e-06, "loss": 0.4148, "step": 1138 }, { "epoch": 0.07781649245063879, "grad_norm": 4.165588855743408, "learning_rate": 6.958333033242965e-06, "loss": 0.4141, "step": 1139 }, { "epoch": 0.07788481246156999, "grad_norm": 7.437760353088379, "learning_rate": 6.958213796433435e-06, "loss": 0.5067, "step": 1140 }, { "epoch": 0.0779531324725012, "grad_norm": 4.7814412117004395, "learning_rate": 6.958094390284348e-06, "loss": 0.456, "step": 1141 }, { "epoch": 0.0780214524834324, "grad_norm": 5.351198196411133, "learning_rate": 6.957974814801551e-06, "loss": 0.5034, "step": 1142 }, { "epoch": 0.0780897724943636, "grad_norm": 3.916764259338379, "learning_rate": 6.957855069990901e-06, "loss": 0.4165, "step": 1143 }, { "epoch": 0.0781580925052948, "grad_norm": 6.394623279571533, "learning_rate": 6.9577351558582596e-06, "loss": 0.5483, "step": 1144 }, { "epoch": 0.078226412516226, "grad_norm": 4.731048107147217, "learning_rate": 6.957615072409499e-06, "loss": 0.3369, "step": 1145 }, { "epoch": 0.0782947325271572, "grad_norm": 4.484692096710205, "learning_rate": 6.957494819650501e-06, "loss": 0.5234, "step": 1146 }, { "epoch": 0.07836305253808841, "grad_norm": 4.680265426635742, "learning_rate": 6.957374397587151e-06, "loss": 0.4283, "step": 1147 }, { "epoch": 0.0784313725490196, "grad_norm": 5.284897327423096, "learning_rate": 6.9572538062253495e-06, "loss": 0.4463, "step": 1148 }, { "epoch": 0.0784996925599508, "grad_norm": 4.945843696594238, "learning_rate": 6.9571330455709995e-06, "loss": 0.5251, "step": 1149 }, { "epoch": 0.07856801257088201, "grad_norm": 3.968698263168335, "learning_rate": 6.957012115630013e-06, "loss": 0.452, "step": 1150 }, { "epoch": 0.07863633258181321, "grad_norm": 5.090188503265381, "learning_rate": 6.956891016408315e-06, "loss": 0.4492, "step": 1151 }, { "epoch": 0.07870465259274441, "grad_norm": 4.054122447967529, "learning_rate": 6.956769747911832e-06, "loss": 0.4672, "step": 1152 }, { "epoch": 0.07877297260367562, "grad_norm": 6.663003921508789, "learning_rate": 6.956648310146504e-06, "loss": 0.5109, "step": 1153 }, { "epoch": 0.07884129261460682, "grad_norm": 4.473837852478027, "learning_rate": 6.956526703118279e-06, "loss": 0.4864, "step": 1154 }, { "epoch": 0.07890961262553801, "grad_norm": 5.646516799926758, "learning_rate": 6.956404926833108e-06, "loss": 0.4651, "step": 1155 }, { "epoch": 0.07897793263646923, "grad_norm": 4.172019958496094, "learning_rate": 6.956282981296957e-06, "loss": 0.4616, "step": 1156 }, { "epoch": 0.07904625264740042, "grad_norm": 4.451043605804443, "learning_rate": 6.956160866515795e-06, "loss": 0.3893, "step": 1157 }, { "epoch": 0.07911457265833162, "grad_norm": 4.678030014038086, "learning_rate": 6.9560385824956044e-06, "loss": 0.5152, "step": 1158 }, { "epoch": 0.07918289266926283, "grad_norm": 5.3037519454956055, "learning_rate": 6.955916129242372e-06, "loss": 0.4717, "step": 1159 }, { "epoch": 0.07925121268019403, "grad_norm": 6.448419094085693, "learning_rate": 6.955793506762094e-06, "loss": 0.4936, "step": 1160 }, { "epoch": 0.07931953269112522, "grad_norm": 4.900425910949707, "learning_rate": 6.955670715060773e-06, "loss": 0.4393, "step": 1161 }, { "epoch": 0.07938785270205644, "grad_norm": 4.9146809577941895, "learning_rate": 6.955547754144424e-06, "loss": 0.3881, "step": 1162 }, { "epoch": 0.07945617271298763, "grad_norm": 6.605103015899658, "learning_rate": 6.955424624019068e-06, "loss": 0.5231, "step": 1163 }, { "epoch": 0.07952449272391883, "grad_norm": 4.436881065368652, "learning_rate": 6.9553013246907345e-06, "loss": 0.4922, "step": 1164 }, { "epoch": 0.07959281273485004, "grad_norm": 5.731018543243408, "learning_rate": 6.9551778561654595e-06, "loss": 0.5081, "step": 1165 }, { "epoch": 0.07966113274578124, "grad_norm": 5.069095611572266, "learning_rate": 6.95505421844929e-06, "loss": 0.393, "step": 1166 }, { "epoch": 0.07972945275671244, "grad_norm": 5.451523780822754, "learning_rate": 6.954930411548281e-06, "loss": 0.4783, "step": 1167 }, { "epoch": 0.07979777276764365, "grad_norm": 6.046739101409912, "learning_rate": 6.9548064354684935e-06, "loss": 0.5529, "step": 1168 }, { "epoch": 0.07986609277857484, "grad_norm": 4.138797283172607, "learning_rate": 6.954682290215999e-06, "loss": 0.4281, "step": 1169 }, { "epoch": 0.07993441278950604, "grad_norm": 4.223822593688965, "learning_rate": 6.954557975796876e-06, "loss": 0.4213, "step": 1170 }, { "epoch": 0.08000273280043725, "grad_norm": 4.6773881912231445, "learning_rate": 6.954433492217213e-06, "loss": 0.533, "step": 1171 }, { "epoch": 0.08007105281136845, "grad_norm": 4.577070236206055, "learning_rate": 6.9543088394831044e-06, "loss": 0.379, "step": 1172 }, { "epoch": 0.08013937282229965, "grad_norm": 5.98910665512085, "learning_rate": 6.954184017600656e-06, "loss": 0.3986, "step": 1173 }, { "epoch": 0.08020769283323086, "grad_norm": 4.3077898025512695, "learning_rate": 6.954059026575978e-06, "loss": 0.404, "step": 1174 }, { "epoch": 0.08027601284416205, "grad_norm": 6.850929260253906, "learning_rate": 6.953933866415191e-06, "loss": 0.4227, "step": 1175 }, { "epoch": 0.08034433285509325, "grad_norm": 6.492523670196533, "learning_rate": 6.953808537124424e-06, "loss": 0.6665, "step": 1176 }, { "epoch": 0.08041265286602446, "grad_norm": 6.0046868324279785, "learning_rate": 6.953683038709815e-06, "loss": 0.4221, "step": 1177 }, { "epoch": 0.08048097287695566, "grad_norm": 4.462163925170898, "learning_rate": 6.953557371177509e-06, "loss": 0.5164, "step": 1178 }, { "epoch": 0.08054929288788686, "grad_norm": 3.8745148181915283, "learning_rate": 6.953431534533658e-06, "loss": 0.4282, "step": 1179 }, { "epoch": 0.08061761289881807, "grad_norm": 6.404794692993164, "learning_rate": 6.953305528784427e-06, "loss": 0.4458, "step": 1180 }, { "epoch": 0.08068593290974926, "grad_norm": 5.214139938354492, "learning_rate": 6.953179353935984e-06, "loss": 0.4216, "step": 1181 }, { "epoch": 0.08075425292068046, "grad_norm": 5.636439800262451, "learning_rate": 6.953053009994507e-06, "loss": 0.5107, "step": 1182 }, { "epoch": 0.08082257293161167, "grad_norm": 4.674103260040283, "learning_rate": 6.952926496966184e-06, "loss": 0.3934, "step": 1183 }, { "epoch": 0.08089089294254287, "grad_norm": 4.752730369567871, "learning_rate": 6.9527998148572085e-06, "loss": 0.501, "step": 1184 }, { "epoch": 0.08095921295347407, "grad_norm": 5.149921417236328, "learning_rate": 6.9526729636737866e-06, "loss": 0.4429, "step": 1185 }, { "epoch": 0.08102753296440528, "grad_norm": 4.865383148193359, "learning_rate": 6.952545943422127e-06, "loss": 0.394, "step": 1186 }, { "epoch": 0.08109585297533647, "grad_norm": 7.607017517089844, "learning_rate": 6.952418754108451e-06, "loss": 0.4815, "step": 1187 }, { "epoch": 0.08116417298626767, "grad_norm": 5.068802833557129, "learning_rate": 6.952291395738986e-06, "loss": 0.4772, "step": 1188 }, { "epoch": 0.08123249299719888, "grad_norm": 8.45904541015625, "learning_rate": 6.952163868319969e-06, "loss": 0.4476, "step": 1189 }, { "epoch": 0.08130081300813008, "grad_norm": 6.573885440826416, "learning_rate": 6.952036171857644e-06, "loss": 0.5712, "step": 1190 }, { "epoch": 0.08136913301906128, "grad_norm": 5.435256481170654, "learning_rate": 6.951908306358265e-06, "loss": 0.4995, "step": 1191 }, { "epoch": 0.08143745302999249, "grad_norm": 4.5641279220581055, "learning_rate": 6.951780271828093e-06, "loss": 0.6246, "step": 1192 }, { "epoch": 0.08150577304092368, "grad_norm": 6.277070045471191, "learning_rate": 6.951652068273396e-06, "loss": 0.4073, "step": 1193 }, { "epoch": 0.08157409305185488, "grad_norm": 5.975437164306641, "learning_rate": 6.951523695700453e-06, "loss": 0.4299, "step": 1194 }, { "epoch": 0.08164241306278609, "grad_norm": 5.3249101638793945, "learning_rate": 6.951395154115551e-06, "loss": 0.4192, "step": 1195 }, { "epoch": 0.08171073307371729, "grad_norm": 4.128839492797852, "learning_rate": 6.951266443524983e-06, "loss": 0.3588, "step": 1196 }, { "epoch": 0.08177905308464849, "grad_norm": 5.5909342765808105, "learning_rate": 6.951137563935051e-06, "loss": 0.4792, "step": 1197 }, { "epoch": 0.0818473730955797, "grad_norm": 3.8734748363494873, "learning_rate": 6.951008515352068e-06, "loss": 0.367, "step": 1198 }, { "epoch": 0.0819156931065109, "grad_norm": 5.395120620727539, "learning_rate": 6.950879297782352e-06, "loss": 0.452, "step": 1199 }, { "epoch": 0.08198401311744209, "grad_norm": 4.326878547668457, "learning_rate": 6.950749911232229e-06, "loss": 0.5484, "step": 1200 }, { "epoch": 0.0820523331283733, "grad_norm": 4.91016960144043, "learning_rate": 6.950620355708037e-06, "loss": 0.4966, "step": 1201 }, { "epoch": 0.0821206531393045, "grad_norm": 5.9802117347717285, "learning_rate": 6.95049063121612e-06, "loss": 0.4537, "step": 1202 }, { "epoch": 0.0821889731502357, "grad_norm": 4.6512556076049805, "learning_rate": 6.950360737762829e-06, "loss": 0.452, "step": 1203 }, { "epoch": 0.08225729316116691, "grad_norm": 4.4860429763793945, "learning_rate": 6.950230675354524e-06, "loss": 0.5341, "step": 1204 }, { "epoch": 0.0823256131720981, "grad_norm": 5.364721775054932, "learning_rate": 6.950100443997576e-06, "loss": 0.4549, "step": 1205 }, { "epoch": 0.0823939331830293, "grad_norm": 7.177951812744141, "learning_rate": 6.94997004369836e-06, "loss": 0.4525, "step": 1206 }, { "epoch": 0.08246225319396051, "grad_norm": 6.034006595611572, "learning_rate": 6.949839474463262e-06, "loss": 0.4209, "step": 1207 }, { "epoch": 0.08253057320489171, "grad_norm": 4.636082649230957, "learning_rate": 6.949708736298677e-06, "loss": 0.5386, "step": 1208 }, { "epoch": 0.08259889321582291, "grad_norm": 4.669948577880859, "learning_rate": 6.9495778292110055e-06, "loss": 0.5002, "step": 1209 }, { "epoch": 0.08266721322675412, "grad_norm": 6.683463096618652, "learning_rate": 6.949446753206657e-06, "loss": 0.476, "step": 1210 }, { "epoch": 0.08273553323768532, "grad_norm": 4.515961647033691, "learning_rate": 6.9493155082920515e-06, "loss": 0.4279, "step": 1211 }, { "epoch": 0.08280385324861651, "grad_norm": 4.952330589294434, "learning_rate": 6.949184094473615e-06, "loss": 0.4351, "step": 1212 }, { "epoch": 0.08287217325954772, "grad_norm": 5.700851917266846, "learning_rate": 6.949052511757782e-06, "loss": 0.4819, "step": 1213 }, { "epoch": 0.08294049327047892, "grad_norm": 5.282586097717285, "learning_rate": 6.948920760150998e-06, "loss": 0.5076, "step": 1214 }, { "epoch": 0.08300881328141012, "grad_norm": 5.621854305267334, "learning_rate": 6.948788839659711e-06, "loss": 0.565, "step": 1215 }, { "epoch": 0.08307713329234133, "grad_norm": 5.233310222625732, "learning_rate": 6.948656750290383e-06, "loss": 0.4167, "step": 1216 }, { "epoch": 0.08314545330327253, "grad_norm": 5.322066307067871, "learning_rate": 6.948524492049482e-06, "loss": 0.4316, "step": 1217 }, { "epoch": 0.08321377331420372, "grad_norm": 4.6681718826293945, "learning_rate": 6.948392064943484e-06, "loss": 0.4159, "step": 1218 }, { "epoch": 0.08328209332513493, "grad_norm": 3.993257999420166, "learning_rate": 6.948259468978873e-06, "loss": 0.393, "step": 1219 }, { "epoch": 0.08335041333606613, "grad_norm": 5.0748138427734375, "learning_rate": 6.948126704162143e-06, "loss": 0.5785, "step": 1220 }, { "epoch": 0.08341873334699734, "grad_norm": 5.529793739318848, "learning_rate": 6.9479937704997955e-06, "loss": 0.4038, "step": 1221 }, { "epoch": 0.08348705335792854, "grad_norm": 3.9206392765045166, "learning_rate": 6.947860667998338e-06, "loss": 0.4438, "step": 1222 }, { "epoch": 0.08355537336885974, "grad_norm": 5.86068058013916, "learning_rate": 6.94772739666429e-06, "loss": 0.4831, "step": 1223 }, { "epoch": 0.08362369337979095, "grad_norm": 4.3717546463012695, "learning_rate": 6.947593956504177e-06, "loss": 0.438, "step": 1224 }, { "epoch": 0.08369201339072214, "grad_norm": 4.673659801483154, "learning_rate": 6.947460347524532e-06, "loss": 0.4166, "step": 1225 }, { "epoch": 0.08376033340165334, "grad_norm": 5.563604354858398, "learning_rate": 6.947326569731899e-06, "loss": 0.4529, "step": 1226 }, { "epoch": 0.08382865341258455, "grad_norm": 4.876186370849609, "learning_rate": 6.947192623132827e-06, "loss": 0.4494, "step": 1227 }, { "epoch": 0.08389697342351575, "grad_norm": 4.718761920928955, "learning_rate": 6.9470585077338764e-06, "loss": 0.4962, "step": 1228 }, { "epoch": 0.08396529343444695, "grad_norm": 5.130120277404785, "learning_rate": 6.946924223541615e-06, "loss": 0.4761, "step": 1229 }, { "epoch": 0.08403361344537816, "grad_norm": 4.575566291809082, "learning_rate": 6.946789770562616e-06, "loss": 0.4248, "step": 1230 }, { "epoch": 0.08410193345630936, "grad_norm": 5.284677028656006, "learning_rate": 6.946655148803467e-06, "loss": 0.4723, "step": 1231 }, { "epoch": 0.08417025346724055, "grad_norm": 6.366842746734619, "learning_rate": 6.9465203582707565e-06, "loss": 0.4277, "step": 1232 }, { "epoch": 0.08423857347817176, "grad_norm": 4.452773571014404, "learning_rate": 6.946385398971087e-06, "loss": 0.3666, "step": 1233 }, { "epoch": 0.08430689348910296, "grad_norm": 4.457316875457764, "learning_rate": 6.946250270911065e-06, "loss": 0.5002, "step": 1234 }, { "epoch": 0.08437521350003416, "grad_norm": 5.1504316329956055, "learning_rate": 6.94611497409731e-06, "loss": 0.4752, "step": 1235 }, { "epoch": 0.08444353351096537, "grad_norm": 4.506463527679443, "learning_rate": 6.9459795085364445e-06, "loss": 0.4947, "step": 1236 }, { "epoch": 0.08451185352189657, "grad_norm": 3.8578813076019287, "learning_rate": 6.9458438742351035e-06, "loss": 0.3926, "step": 1237 }, { "epoch": 0.08458017353282776, "grad_norm": 5.806201457977295, "learning_rate": 6.945708071199929e-06, "loss": 0.5874, "step": 1238 }, { "epoch": 0.08464849354375897, "grad_norm": 3.8063180446624756, "learning_rate": 6.945572099437569e-06, "loss": 0.3356, "step": 1239 }, { "epoch": 0.08471681355469017, "grad_norm": 6.1980977058410645, "learning_rate": 6.945435958954684e-06, "loss": 0.4746, "step": 1240 }, { "epoch": 0.08478513356562137, "grad_norm": 3.736914873123169, "learning_rate": 6.945299649757939e-06, "loss": 0.3592, "step": 1241 }, { "epoch": 0.08485345357655258, "grad_norm": 5.026304244995117, "learning_rate": 6.945163171854008e-06, "loss": 0.4526, "step": 1242 }, { "epoch": 0.08492177358748378, "grad_norm": 5.71868896484375, "learning_rate": 6.945026525249576e-06, "loss": 0.5672, "step": 1243 }, { "epoch": 0.08499009359841497, "grad_norm": 7.684164047241211, "learning_rate": 6.944889709951332e-06, "loss": 0.5304, "step": 1244 }, { "epoch": 0.08505841360934618, "grad_norm": 5.521027565002441, "learning_rate": 6.944752725965979e-06, "loss": 0.5061, "step": 1245 }, { "epoch": 0.08512673362027738, "grad_norm": 4.836416244506836, "learning_rate": 6.944615573300221e-06, "loss": 0.4418, "step": 1246 }, { "epoch": 0.08519505363120858, "grad_norm": 5.4114603996276855, "learning_rate": 6.944478251960774e-06, "loss": 0.4569, "step": 1247 }, { "epoch": 0.08526337364213979, "grad_norm": 4.317294120788574, "learning_rate": 6.944340761954367e-06, "loss": 0.4702, "step": 1248 }, { "epoch": 0.08533169365307099, "grad_norm": 4.490108013153076, "learning_rate": 6.944203103287726e-06, "loss": 0.4534, "step": 1249 }, { "epoch": 0.08540001366400218, "grad_norm": 5.355266571044922, "learning_rate": 6.944065275967596e-06, "loss": 0.5117, "step": 1250 }, { "epoch": 0.0854683336749334, "grad_norm": 5.4742350578308105, "learning_rate": 6.943927280000726e-06, "loss": 0.5173, "step": 1251 }, { "epoch": 0.08553665368586459, "grad_norm": 4.608235836029053, "learning_rate": 6.943789115393872e-06, "loss": 0.4964, "step": 1252 }, { "epoch": 0.08560497369679579, "grad_norm": 4.94736385345459, "learning_rate": 6.943650782153799e-06, "loss": 0.4251, "step": 1253 }, { "epoch": 0.085673293707727, "grad_norm": 4.333778381347656, "learning_rate": 6.943512280287282e-06, "loss": 0.4993, "step": 1254 }, { "epoch": 0.0857416137186582, "grad_norm": 4.621829986572266, "learning_rate": 6.943373609801103e-06, "loss": 0.5524, "step": 1255 }, { "epoch": 0.0858099337295894, "grad_norm": 4.622247219085693, "learning_rate": 6.943234770702052e-06, "loss": 0.3926, "step": 1256 }, { "epoch": 0.0858782537405206, "grad_norm": 5.484346389770508, "learning_rate": 6.943095762996928e-06, "loss": 0.6022, "step": 1257 }, { "epoch": 0.0859465737514518, "grad_norm": 5.2154107093811035, "learning_rate": 6.942956586692537e-06, "loss": 0.4622, "step": 1258 }, { "epoch": 0.086014893762383, "grad_norm": 4.7599382400512695, "learning_rate": 6.942817241795694e-06, "loss": 0.5376, "step": 1259 }, { "epoch": 0.08608321377331421, "grad_norm": 5.989846229553223, "learning_rate": 6.9426777283132245e-06, "loss": 0.4484, "step": 1260 }, { "epoch": 0.08615153378424541, "grad_norm": 6.104957103729248, "learning_rate": 6.942538046251959e-06, "loss": 0.4398, "step": 1261 }, { "epoch": 0.0862198537951766, "grad_norm": 5.818051815032959, "learning_rate": 6.942398195618735e-06, "loss": 0.4507, "step": 1262 }, { "epoch": 0.08628817380610782, "grad_norm": 4.032196998596191, "learning_rate": 6.942258176420403e-06, "loss": 0.4037, "step": 1263 }, { "epoch": 0.08635649381703901, "grad_norm": 4.297347068786621, "learning_rate": 6.942117988663819e-06, "loss": 0.4948, "step": 1264 }, { "epoch": 0.08642481382797021, "grad_norm": 4.770655155181885, "learning_rate": 6.941977632355848e-06, "loss": 0.4599, "step": 1265 }, { "epoch": 0.08649313383890142, "grad_norm": 5.387250900268555, "learning_rate": 6.941837107503361e-06, "loss": 0.6269, "step": 1266 }, { "epoch": 0.08656145384983262, "grad_norm": 5.266448497772217, "learning_rate": 6.941696414113242e-06, "loss": 0.507, "step": 1267 }, { "epoch": 0.08662977386076381, "grad_norm": 4.879828453063965, "learning_rate": 6.941555552192378e-06, "loss": 0.4596, "step": 1268 }, { "epoch": 0.08669809387169503, "grad_norm": 3.907616376876831, "learning_rate": 6.941414521747669e-06, "loss": 0.5709, "step": 1269 }, { "epoch": 0.08676641388262622, "grad_norm": 5.385461330413818, "learning_rate": 6.941273322786018e-06, "loss": 0.3902, "step": 1270 }, { "epoch": 0.08683473389355742, "grad_norm": 4.910499572753906, "learning_rate": 6.941131955314341e-06, "loss": 0.4571, "step": 1271 }, { "epoch": 0.08690305390448863, "grad_norm": 5.715214252471924, "learning_rate": 6.94099041933956e-06, "loss": 0.4984, "step": 1272 }, { "epoch": 0.08697137391541983, "grad_norm": 5.340714931488037, "learning_rate": 6.940848714868605e-06, "loss": 0.4822, "step": 1273 }, { "epoch": 0.08703969392635102, "grad_norm": 5.414804458618164, "learning_rate": 6.940706841908417e-06, "loss": 0.383, "step": 1274 }, { "epoch": 0.08710801393728224, "grad_norm": 4.521617889404297, "learning_rate": 6.94056480046594e-06, "loss": 0.5532, "step": 1275 }, { "epoch": 0.08717633394821343, "grad_norm": 4.400304317474365, "learning_rate": 6.940422590548132e-06, "loss": 0.4196, "step": 1276 }, { "epoch": 0.08724465395914463, "grad_norm": 5.517279624938965, "learning_rate": 6.9402802121619545e-06, "loss": 0.4838, "step": 1277 }, { "epoch": 0.08731297397007584, "grad_norm": 5.83180046081543, "learning_rate": 6.940137665314382e-06, "loss": 0.4176, "step": 1278 }, { "epoch": 0.08738129398100704, "grad_norm": 4.6245880126953125, "learning_rate": 6.939994950012392e-06, "loss": 0.452, "step": 1279 }, { "epoch": 0.08744961399193824, "grad_norm": 5.488564968109131, "learning_rate": 6.939852066262976e-06, "loss": 0.5061, "step": 1280 }, { "epoch": 0.08751793400286945, "grad_norm": 4.74057674407959, "learning_rate": 6.939709014073126e-06, "loss": 0.4674, "step": 1281 }, { "epoch": 0.08758625401380064, "grad_norm": 5.456006050109863, "learning_rate": 6.939565793449851e-06, "loss": 0.4827, "step": 1282 }, { "epoch": 0.08765457402473184, "grad_norm": 4.659256935119629, "learning_rate": 6.939422404400163e-06, "loss": 0.3865, "step": 1283 }, { "epoch": 0.08772289403566305, "grad_norm": 3.7926762104034424, "learning_rate": 6.939278846931083e-06, "loss": 0.3966, "step": 1284 }, { "epoch": 0.08779121404659425, "grad_norm": 5.033846378326416, "learning_rate": 6.93913512104964e-06, "loss": 0.4692, "step": 1285 }, { "epoch": 0.08785953405752545, "grad_norm": 5.081603527069092, "learning_rate": 6.938991226762874e-06, "loss": 0.5404, "step": 1286 }, { "epoch": 0.08792785406845666, "grad_norm": 5.8332905769348145, "learning_rate": 6.938847164077828e-06, "loss": 0.4675, "step": 1287 }, { "epoch": 0.08799617407938785, "grad_norm": 5.506415843963623, "learning_rate": 6.93870293300156e-06, "loss": 0.4684, "step": 1288 }, { "epoch": 0.08806449409031905, "grad_norm": 6.301297664642334, "learning_rate": 6.93855853354113e-06, "loss": 0.4398, "step": 1289 }, { "epoch": 0.08813281410125026, "grad_norm": 5.006857872009277, "learning_rate": 6.938413965703608e-06, "loss": 0.4758, "step": 1290 }, { "epoch": 0.08820113411218146, "grad_norm": 5.38419771194458, "learning_rate": 6.938269229496077e-06, "loss": 0.4379, "step": 1291 }, { "epoch": 0.08826945412311266, "grad_norm": 4.4583940505981445, "learning_rate": 6.938124324925621e-06, "loss": 0.4502, "step": 1292 }, { "epoch": 0.08833777413404387, "grad_norm": 4.701419830322266, "learning_rate": 6.937979251999337e-06, "loss": 0.4699, "step": 1293 }, { "epoch": 0.08840609414497506, "grad_norm": 4.213678359985352, "learning_rate": 6.937834010724329e-06, "loss": 0.4569, "step": 1294 }, { "epoch": 0.08847441415590626, "grad_norm": 5.4524993896484375, "learning_rate": 6.937688601107707e-06, "loss": 0.3864, "step": 1295 }, { "epoch": 0.08854273416683747, "grad_norm": 4.355647087097168, "learning_rate": 6.9375430231565935e-06, "loss": 0.446, "step": 1296 }, { "epoch": 0.08861105417776867, "grad_norm": 5.434544086456299, "learning_rate": 6.937397276878117e-06, "loss": 0.5511, "step": 1297 }, { "epoch": 0.08867937418869987, "grad_norm": 4.14598274230957, "learning_rate": 6.937251362279413e-06, "loss": 0.4524, "step": 1298 }, { "epoch": 0.08874769419963108, "grad_norm": 5.208337306976318, "learning_rate": 6.937105279367628e-06, "loss": 0.5404, "step": 1299 }, { "epoch": 0.08881601421056227, "grad_norm": 5.778244972229004, "learning_rate": 6.9369590281499145e-06, "loss": 0.4848, "step": 1300 }, { "epoch": 0.08888433422149347, "grad_norm": 4.00595760345459, "learning_rate": 6.9368126086334335e-06, "loss": 0.4709, "step": 1301 }, { "epoch": 0.08895265423242468, "grad_norm": 6.233242511749268, "learning_rate": 6.936666020825356e-06, "loss": 0.411, "step": 1302 }, { "epoch": 0.08902097424335588, "grad_norm": 6.144073486328125, "learning_rate": 6.936519264732859e-06, "loss": 0.4016, "step": 1303 }, { "epoch": 0.08908929425428708, "grad_norm": 6.056375980377197, "learning_rate": 6.936372340363129e-06, "loss": 0.5163, "step": 1304 }, { "epoch": 0.08915761426521829, "grad_norm": 3.1819987297058105, "learning_rate": 6.9362252477233614e-06, "loss": 0.3434, "step": 1305 }, { "epoch": 0.08922593427614949, "grad_norm": 4.991354942321777, "learning_rate": 6.936077986820757e-06, "loss": 0.4076, "step": 1306 }, { "epoch": 0.08929425428708068, "grad_norm": 5.078733921051025, "learning_rate": 6.935930557662529e-06, "loss": 0.3919, "step": 1307 }, { "epoch": 0.0893625742980119, "grad_norm": 5.694471836090088, "learning_rate": 6.935782960255897e-06, "loss": 0.4275, "step": 1308 }, { "epoch": 0.08943089430894309, "grad_norm": 3.2941348552703857, "learning_rate": 6.935635194608084e-06, "loss": 0.3579, "step": 1309 }, { "epoch": 0.08949921431987429, "grad_norm": 4.219005584716797, "learning_rate": 6.935487260726331e-06, "loss": 0.5052, "step": 1310 }, { "epoch": 0.0895675343308055, "grad_norm": 7.149528980255127, "learning_rate": 6.935339158617879e-06, "loss": 0.53, "step": 1311 }, { "epoch": 0.0896358543417367, "grad_norm": 3.4517784118652344, "learning_rate": 6.93519088828998e-06, "loss": 0.4031, "step": 1312 }, { "epoch": 0.08970417435266789, "grad_norm": 4.43985652923584, "learning_rate": 6.935042449749897e-06, "loss": 0.3798, "step": 1313 }, { "epoch": 0.0897724943635991, "grad_norm": 5.018682956695557, "learning_rate": 6.934893843004895e-06, "loss": 0.5131, "step": 1314 }, { "epoch": 0.0898408143745303, "grad_norm": 4.399933338165283, "learning_rate": 6.934745068062254e-06, "loss": 0.3722, "step": 1315 }, { "epoch": 0.0899091343854615, "grad_norm": 5.473968982696533, "learning_rate": 6.934596124929258e-06, "loss": 0.4614, "step": 1316 }, { "epoch": 0.08997745439639271, "grad_norm": 3.2636241912841797, "learning_rate": 6.9344470136131996e-06, "loss": 0.3816, "step": 1317 }, { "epoch": 0.0900457744073239, "grad_norm": 4.598232746124268, "learning_rate": 6.93429773412138e-06, "loss": 0.4842, "step": 1318 }, { "epoch": 0.0901140944182551, "grad_norm": 4.199049949645996, "learning_rate": 6.9341482864611115e-06, "loss": 0.4749, "step": 1319 }, { "epoch": 0.09018241442918631, "grad_norm": 4.536502361297607, "learning_rate": 6.93399867063971e-06, "loss": 0.5068, "step": 1320 }, { "epoch": 0.09025073444011751, "grad_norm": 5.013396739959717, "learning_rate": 6.933848886664504e-06, "loss": 0.5, "step": 1321 }, { "epoch": 0.09031905445104871, "grad_norm": 5.635636329650879, "learning_rate": 6.933698934542825e-06, "loss": 0.5173, "step": 1322 }, { "epoch": 0.09038737446197992, "grad_norm": 5.343161106109619, "learning_rate": 6.933548814282017e-06, "loss": 0.4674, "step": 1323 }, { "epoch": 0.09045569447291112, "grad_norm": 5.004236221313477, "learning_rate": 6.933398525889432e-06, "loss": 0.5335, "step": 1324 }, { "epoch": 0.09052401448384231, "grad_norm": 5.058034420013428, "learning_rate": 6.933248069372429e-06, "loss": 0.4472, "step": 1325 }, { "epoch": 0.09059233449477352, "grad_norm": 4.249085426330566, "learning_rate": 6.933097444738373e-06, "loss": 0.4202, "step": 1326 }, { "epoch": 0.09066065450570472, "grad_norm": 4.67153263092041, "learning_rate": 6.932946651994645e-06, "loss": 0.5019, "step": 1327 }, { "epoch": 0.09072897451663592, "grad_norm": 5.352621555328369, "learning_rate": 6.932795691148623e-06, "loss": 0.3759, "step": 1328 }, { "epoch": 0.09079729452756713, "grad_norm": 4.06931734085083, "learning_rate": 6.932644562207702e-06, "loss": 0.3786, "step": 1329 }, { "epoch": 0.09086561453849833, "grad_norm": 5.439969539642334, "learning_rate": 6.932493265179283e-06, "loss": 0.4544, "step": 1330 }, { "epoch": 0.09093393454942952, "grad_norm": 4.233795166015625, "learning_rate": 6.932341800070773e-06, "loss": 0.4711, "step": 1331 }, { "epoch": 0.09100225456036073, "grad_norm": 5.726016044616699, "learning_rate": 6.932190166889589e-06, "loss": 0.4511, "step": 1332 }, { "epoch": 0.09107057457129193, "grad_norm": 3.9876322746276855, "learning_rate": 6.932038365643157e-06, "loss": 0.4674, "step": 1333 }, { "epoch": 0.09113889458222313, "grad_norm": 4.9533162117004395, "learning_rate": 6.93188639633891e-06, "loss": 0.419, "step": 1334 }, { "epoch": 0.09120721459315434, "grad_norm": 4.208242893218994, "learning_rate": 6.931734258984289e-06, "loss": 0.4154, "step": 1335 }, { "epoch": 0.09127553460408554, "grad_norm": 5.067508220672607, "learning_rate": 6.931581953586745e-06, "loss": 0.5006, "step": 1336 }, { "epoch": 0.09134385461501673, "grad_norm": 4.3879008293151855, "learning_rate": 6.931429480153734e-06, "loss": 0.4124, "step": 1337 }, { "epoch": 0.09141217462594795, "grad_norm": 3.457304000854492, "learning_rate": 6.931276838692725e-06, "loss": 0.3901, "step": 1338 }, { "epoch": 0.09148049463687914, "grad_norm": 5.508024215698242, "learning_rate": 6.931124029211189e-06, "loss": 0.5193, "step": 1339 }, { "epoch": 0.09154881464781034, "grad_norm": 4.89241361618042, "learning_rate": 6.930971051716612e-06, "loss": 0.4044, "step": 1340 }, { "epoch": 0.09161713465874155, "grad_norm": 5.962045669555664, "learning_rate": 6.930817906216483e-06, "loss": 0.4981, "step": 1341 }, { "epoch": 0.09168545466967275, "grad_norm": 4.726837635040283, "learning_rate": 6.9306645927183024e-06, "loss": 0.4297, "step": 1342 }, { "epoch": 0.09175377468060394, "grad_norm": 4.926170349121094, "learning_rate": 6.930511111229576e-06, "loss": 0.5191, "step": 1343 }, { "epoch": 0.09182209469153516, "grad_norm": 4.8142313957214355, "learning_rate": 6.93035746175782e-06, "loss": 0.3653, "step": 1344 }, { "epoch": 0.09189041470246635, "grad_norm": 5.237508773803711, "learning_rate": 6.930203644310558e-06, "loss": 0.5277, "step": 1345 }, { "epoch": 0.09195873471339755, "grad_norm": 4.301128387451172, "learning_rate": 6.930049658895324e-06, "loss": 0.4033, "step": 1346 }, { "epoch": 0.09202705472432876, "grad_norm": 4.363320350646973, "learning_rate": 6.929895505519655e-06, "loss": 0.4372, "step": 1347 }, { "epoch": 0.09209537473525996, "grad_norm": 7.585724830627441, "learning_rate": 6.929741184191102e-06, "loss": 0.6022, "step": 1348 }, { "epoch": 0.09216369474619115, "grad_norm": 4.675911903381348, "learning_rate": 6.9295866949172206e-06, "loss": 0.5227, "step": 1349 }, { "epoch": 0.09223201475712237, "grad_norm": 5.73250150680542, "learning_rate": 6.929432037705575e-06, "loss": 0.6491, "step": 1350 }, { "epoch": 0.09230033476805356, "grad_norm": 4.723649978637695, "learning_rate": 6.92927721256374e-06, "loss": 0.4284, "step": 1351 }, { "epoch": 0.09236865477898476, "grad_norm": 4.379177093505859, "learning_rate": 6.929122219499297e-06, "loss": 0.4549, "step": 1352 }, { "epoch": 0.09243697478991597, "grad_norm": 5.112595558166504, "learning_rate": 6.928967058519833e-06, "loss": 0.4344, "step": 1353 }, { "epoch": 0.09250529480084717, "grad_norm": 4.675661087036133, "learning_rate": 6.928811729632949e-06, "loss": 0.3998, "step": 1354 }, { "epoch": 0.09257361481177837, "grad_norm": 5.437354564666748, "learning_rate": 6.92865623284625e-06, "loss": 0.408, "step": 1355 }, { "epoch": 0.09264193482270958, "grad_norm": 6.188250541687012, "learning_rate": 6.928500568167349e-06, "loss": 0.5256, "step": 1356 }, { "epoch": 0.09271025483364077, "grad_norm": 4.285423278808594, "learning_rate": 6.9283447356038705e-06, "loss": 0.5113, "step": 1357 }, { "epoch": 0.09277857484457197, "grad_norm": 4.311448574066162, "learning_rate": 6.928188735163443e-06, "loss": 0.4069, "step": 1358 }, { "epoch": 0.09284689485550318, "grad_norm": 6.02994441986084, "learning_rate": 6.928032566853708e-06, "loss": 0.4764, "step": 1359 }, { "epoch": 0.09291521486643438, "grad_norm": 4.707503795623779, "learning_rate": 6.92787623068231e-06, "loss": 0.5294, "step": 1360 }, { "epoch": 0.09298353487736558, "grad_norm": 4.683815956115723, "learning_rate": 6.927719726656907e-06, "loss": 0.3672, "step": 1361 }, { "epoch": 0.09305185488829679, "grad_norm": 4.174949645996094, "learning_rate": 6.92756305478516e-06, "loss": 0.4514, "step": 1362 }, { "epoch": 0.09312017489922798, "grad_norm": 4.551355361938477, "learning_rate": 6.9274062150747415e-06, "loss": 0.5546, "step": 1363 }, { "epoch": 0.09318849491015918, "grad_norm": 4.419095993041992, "learning_rate": 6.927249207533334e-06, "loss": 0.4346, "step": 1364 }, { "epoch": 0.09325681492109039, "grad_norm": 4.4535675048828125, "learning_rate": 6.927092032168622e-06, "loss": 0.4219, "step": 1365 }, { "epoch": 0.09332513493202159, "grad_norm": 5.157585620880127, "learning_rate": 6.926934688988305e-06, "loss": 0.4141, "step": 1366 }, { "epoch": 0.09339345494295279, "grad_norm": 5.223043918609619, "learning_rate": 6.926777178000086e-06, "loss": 0.4334, "step": 1367 }, { "epoch": 0.093461774953884, "grad_norm": 5.021360874176025, "learning_rate": 6.926619499211678e-06, "loss": 0.3919, "step": 1368 }, { "epoch": 0.0935300949648152, "grad_norm": 3.9901318550109863, "learning_rate": 6.926461652630802e-06, "loss": 0.4324, "step": 1369 }, { "epoch": 0.09359841497574639, "grad_norm": 5.2498884201049805, "learning_rate": 6.926303638265189e-06, "loss": 0.4462, "step": 1370 }, { "epoch": 0.0936667349866776, "grad_norm": 5.561053276062012, "learning_rate": 6.926145456122575e-06, "loss": 0.4649, "step": 1371 }, { "epoch": 0.0937350549976088, "grad_norm": 5.089725494384766, "learning_rate": 6.925987106210705e-06, "loss": 0.4714, "step": 1372 }, { "epoch": 0.09380337500854, "grad_norm": 5.837823867797852, "learning_rate": 6.9258285885373354e-06, "loss": 0.4161, "step": 1373 }, { "epoch": 0.09387169501947121, "grad_norm": 5.929015636444092, "learning_rate": 6.9256699031102255e-06, "loss": 0.4289, "step": 1374 }, { "epoch": 0.0939400150304024, "grad_norm": 3.6987085342407227, "learning_rate": 6.9255110499371485e-06, "loss": 0.4014, "step": 1375 }, { "epoch": 0.0940083350413336, "grad_norm": 4.620209693908691, "learning_rate": 6.925352029025881e-06, "loss": 0.3548, "step": 1376 }, { "epoch": 0.09407665505226481, "grad_norm": 4.244948387145996, "learning_rate": 6.925192840384212e-06, "loss": 0.4846, "step": 1377 }, { "epoch": 0.09414497506319601, "grad_norm": 5.002005577087402, "learning_rate": 6.925033484019934e-06, "loss": 0.4988, "step": 1378 }, { "epoch": 0.0942132950741272, "grad_norm": 4.726781368255615, "learning_rate": 6.924873959940851e-06, "loss": 0.393, "step": 1379 }, { "epoch": 0.09428161508505842, "grad_norm": 5.940813064575195, "learning_rate": 6.924714268154776e-06, "loss": 0.5348, "step": 1380 }, { "epoch": 0.09434993509598961, "grad_norm": 5.628772735595703, "learning_rate": 6.9245544086695266e-06, "loss": 0.4589, "step": 1381 }, { "epoch": 0.09441825510692081, "grad_norm": 6.8276686668396, "learning_rate": 6.924394381492932e-06, "loss": 0.5342, "step": 1382 }, { "epoch": 0.09448657511785202, "grad_norm": 5.0304460525512695, "learning_rate": 6.9242341866328275e-06, "loss": 0.5443, "step": 1383 }, { "epoch": 0.09455489512878322, "grad_norm": 5.508142471313477, "learning_rate": 6.924073824097058e-06, "loss": 0.5243, "step": 1384 }, { "epoch": 0.09462321513971442, "grad_norm": 5.216789722442627, "learning_rate": 6.923913293893476e-06, "loss": 0.5213, "step": 1385 }, { "epoch": 0.09469153515064563, "grad_norm": 5.636913299560547, "learning_rate": 6.923752596029943e-06, "loss": 0.6084, "step": 1386 }, { "epoch": 0.09475985516157683, "grad_norm": 4.690301418304443, "learning_rate": 6.923591730514326e-06, "loss": 0.35, "step": 1387 }, { "epoch": 0.09482817517250802, "grad_norm": 4.824398994445801, "learning_rate": 6.923430697354504e-06, "loss": 0.3446, "step": 1388 }, { "epoch": 0.09489649518343923, "grad_norm": 5.167459011077881, "learning_rate": 6.923269496558361e-06, "loss": 0.4647, "step": 1389 }, { "epoch": 0.09496481519437043, "grad_norm": 4.398243427276611, "learning_rate": 6.923108128133792e-06, "loss": 0.5399, "step": 1390 }, { "epoch": 0.09503313520530163, "grad_norm": 5.946700096130371, "learning_rate": 6.922946592088697e-06, "loss": 0.4525, "step": 1391 }, { "epoch": 0.09510145521623284, "grad_norm": 4.2550249099731445, "learning_rate": 6.922784888430987e-06, "loss": 0.4008, "step": 1392 }, { "epoch": 0.09516977522716404, "grad_norm": 3.6069231033325195, "learning_rate": 6.92262301716858e-06, "loss": 0.3858, "step": 1393 }, { "epoch": 0.09523809523809523, "grad_norm": 4.82012414932251, "learning_rate": 6.922460978309403e-06, "loss": 0.4785, "step": 1394 }, { "epoch": 0.09530641524902644, "grad_norm": 7.362630844116211, "learning_rate": 6.92229877186139e-06, "loss": 0.5209, "step": 1395 }, { "epoch": 0.09537473525995764, "grad_norm": 6.904207706451416, "learning_rate": 6.922136397832485e-06, "loss": 0.5075, "step": 1396 }, { "epoch": 0.09544305527088884, "grad_norm": 4.51824951171875, "learning_rate": 6.921973856230636e-06, "loss": 0.4416, "step": 1397 }, { "epoch": 0.09551137528182005, "grad_norm": 5.366234302520752, "learning_rate": 6.9218111470638056e-06, "loss": 0.4994, "step": 1398 }, { "epoch": 0.09557969529275125, "grad_norm": 7.740026950836182, "learning_rate": 6.921648270339961e-06, "loss": 0.4508, "step": 1399 }, { "epoch": 0.09564801530368244, "grad_norm": 6.052661418914795, "learning_rate": 6.921485226067075e-06, "loss": 0.4858, "step": 1400 }, { "epoch": 0.09571633531461365, "grad_norm": 6.580941200256348, "learning_rate": 6.921322014253134e-06, "loss": 0.5122, "step": 1401 }, { "epoch": 0.09578465532554485, "grad_norm": 5.378298759460449, "learning_rate": 6.921158634906128e-06, "loss": 0.503, "step": 1402 }, { "epoch": 0.09585297533647605, "grad_norm": 4.0415496826171875, "learning_rate": 6.9209950880340595e-06, "loss": 0.5442, "step": 1403 }, { "epoch": 0.09592129534740726, "grad_norm": 5.492336273193359, "learning_rate": 6.920831373644935e-06, "loss": 0.4186, "step": 1404 }, { "epoch": 0.09598961535833846, "grad_norm": 4.071633815765381, "learning_rate": 6.9206674917467735e-06, "loss": 0.409, "step": 1405 }, { "epoch": 0.09605793536926965, "grad_norm": 4.041861057281494, "learning_rate": 6.920503442347597e-06, "loss": 0.3911, "step": 1406 }, { "epoch": 0.09612625538020086, "grad_norm": 6.904483795166016, "learning_rate": 6.920339225455442e-06, "loss": 0.4987, "step": 1407 }, { "epoch": 0.09619457539113206, "grad_norm": 5.656258583068848, "learning_rate": 6.920174841078346e-06, "loss": 0.4835, "step": 1408 }, { "epoch": 0.09626289540206326, "grad_norm": 5.801638603210449, "learning_rate": 6.9200102892243614e-06, "loss": 0.4995, "step": 1409 }, { "epoch": 0.09633121541299447, "grad_norm": 4.129219055175781, "learning_rate": 6.9198455699015436e-06, "loss": 0.3952, "step": 1410 }, { "epoch": 0.09639953542392567, "grad_norm": 4.355808734893799, "learning_rate": 6.919680683117961e-06, "loss": 0.4074, "step": 1411 }, { "epoch": 0.09646785543485686, "grad_norm": 5.165668964385986, "learning_rate": 6.919515628881684e-06, "loss": 0.5405, "step": 1412 }, { "epoch": 0.09653617544578807, "grad_norm": 7.804919242858887, "learning_rate": 6.919350407200799e-06, "loss": 0.5445, "step": 1413 }, { "epoch": 0.09660449545671927, "grad_norm": 4.8407206535339355, "learning_rate": 6.919185018083393e-06, "loss": 0.4269, "step": 1414 }, { "epoch": 0.09667281546765047, "grad_norm": 4.827986240386963, "learning_rate": 6.919019461537567e-06, "loss": 0.4887, "step": 1415 }, { "epoch": 0.09674113547858168, "grad_norm": 4.497382164001465, "learning_rate": 6.918853737571428e-06, "loss": 0.3635, "step": 1416 }, { "epoch": 0.09680945548951288, "grad_norm": 4.706750869750977, "learning_rate": 6.91868784619309e-06, "loss": 0.4446, "step": 1417 }, { "epoch": 0.09687777550044407, "grad_norm": 4.295995712280273, "learning_rate": 6.918521787410676e-06, "loss": 0.3868, "step": 1418 }, { "epoch": 0.09694609551137529, "grad_norm": 6.605663776397705, "learning_rate": 6.918355561232318e-06, "loss": 0.5304, "step": 1419 }, { "epoch": 0.09701441552230648, "grad_norm": 4.956119060516357, "learning_rate": 6.918189167666155e-06, "loss": 0.3329, "step": 1420 }, { "epoch": 0.09708273553323768, "grad_norm": 4.512330532073975, "learning_rate": 6.918022606720336e-06, "loss": 0.3706, "step": 1421 }, { "epoch": 0.09715105554416889, "grad_norm": 5.634589195251465, "learning_rate": 6.917855878403016e-06, "loss": 0.483, "step": 1422 }, { "epoch": 0.09721937555510009, "grad_norm": 6.474319934844971, "learning_rate": 6.917688982722361e-06, "loss": 0.5132, "step": 1423 }, { "epoch": 0.09728769556603128, "grad_norm": 4.696680068969727, "learning_rate": 6.917521919686541e-06, "loss": 0.3853, "step": 1424 }, { "epoch": 0.0973560155769625, "grad_norm": 3.71309232711792, "learning_rate": 6.917354689303738e-06, "loss": 0.3003, "step": 1425 }, { "epoch": 0.09742433558789369, "grad_norm": 6.760096073150635, "learning_rate": 6.9171872915821405e-06, "loss": 0.4309, "step": 1426 }, { "epoch": 0.09749265559882489, "grad_norm": 4.670010566711426, "learning_rate": 6.917019726529946e-06, "loss": 0.4621, "step": 1427 }, { "epoch": 0.0975609756097561, "grad_norm": 5.479829788208008, "learning_rate": 6.9168519941553585e-06, "loss": 0.5103, "step": 1428 }, { "epoch": 0.0976292956206873, "grad_norm": 5.72834587097168, "learning_rate": 6.916684094466592e-06, "loss": 0.5145, "step": 1429 }, { "epoch": 0.0976976156316185, "grad_norm": 4.3349432945251465, "learning_rate": 6.91651602747187e-06, "loss": 0.3912, "step": 1430 }, { "epoch": 0.0977659356425497, "grad_norm": 6.665806293487549, "learning_rate": 6.91634779317942e-06, "loss": 0.4679, "step": 1431 }, { "epoch": 0.0978342556534809, "grad_norm": 5.656777381896973, "learning_rate": 6.91617939159748e-06, "loss": 0.4244, "step": 1432 }, { "epoch": 0.0979025756644121, "grad_norm": 6.850466251373291, "learning_rate": 6.916010822734297e-06, "loss": 0.4892, "step": 1433 }, { "epoch": 0.09797089567534331, "grad_norm": 5.33514928817749, "learning_rate": 6.915842086598125e-06, "loss": 0.4809, "step": 1434 }, { "epoch": 0.09803921568627451, "grad_norm": 5.205555438995361, "learning_rate": 6.915673183197228e-06, "loss": 0.3624, "step": 1435 }, { "epoch": 0.0981075356972057, "grad_norm": 4.434805870056152, "learning_rate": 6.915504112539875e-06, "loss": 0.4356, "step": 1436 }, { "epoch": 0.09817585570813692, "grad_norm": 5.664936065673828, "learning_rate": 6.9153348746343444e-06, "loss": 0.4456, "step": 1437 }, { "epoch": 0.09824417571906811, "grad_norm": 4.257216930389404, "learning_rate": 6.915165469488925e-06, "loss": 0.3975, "step": 1438 }, { "epoch": 0.09831249572999931, "grad_norm": 3.8234548568725586, "learning_rate": 6.914995897111911e-06, "loss": 0.4857, "step": 1439 }, { "epoch": 0.09838081574093052, "grad_norm": 5.482242107391357, "learning_rate": 6.914826157511607e-06, "loss": 0.4085, "step": 1440 }, { "epoch": 0.09844913575186172, "grad_norm": 5.077967643737793, "learning_rate": 6.914656250696324e-06, "loss": 0.6209, "step": 1441 }, { "epoch": 0.09851745576279292, "grad_norm": 4.358163356781006, "learning_rate": 6.914486176674382e-06, "loss": 0.4882, "step": 1442 }, { "epoch": 0.09858577577372413, "grad_norm": 4.014760971069336, "learning_rate": 6.914315935454109e-06, "loss": 0.4005, "step": 1443 }, { "epoch": 0.09865409578465532, "grad_norm": 5.361038684844971, "learning_rate": 6.914145527043842e-06, "loss": 0.4677, "step": 1444 }, { "epoch": 0.09872241579558652, "grad_norm": 4.773965358734131, "learning_rate": 6.913974951451922e-06, "loss": 0.3353, "step": 1445 }, { "epoch": 0.09879073580651773, "grad_norm": 4.853516101837158, "learning_rate": 6.913804208686707e-06, "loss": 0.3963, "step": 1446 }, { "epoch": 0.09885905581744893, "grad_norm": 5.640417575836182, "learning_rate": 6.913633298756555e-06, "loss": 0.4133, "step": 1447 }, { "epoch": 0.09892737582838013, "grad_norm": 4.865151882171631, "learning_rate": 6.9134622216698345e-06, "loss": 0.506, "step": 1448 }, { "epoch": 0.09899569583931134, "grad_norm": 4.223058223724365, "learning_rate": 6.9132909774349245e-06, "loss": 0.3855, "step": 1449 }, { "epoch": 0.09906401585024253, "grad_norm": 6.6711344718933105, "learning_rate": 6.913119566060208e-06, "loss": 0.5057, "step": 1450 }, { "epoch": 0.09913233586117373, "grad_norm": 6.175708293914795, "learning_rate": 6.912947987554081e-06, "loss": 0.5388, "step": 1451 }, { "epoch": 0.09920065587210494, "grad_norm": 5.071897983551025, "learning_rate": 6.912776241924944e-06, "loss": 0.386, "step": 1452 }, { "epoch": 0.09926897588303614, "grad_norm": 4.9293317794799805, "learning_rate": 6.912604329181205e-06, "loss": 0.4349, "step": 1453 }, { "epoch": 0.09933729589396734, "grad_norm": 6.469242572784424, "learning_rate": 6.912432249331287e-06, "loss": 0.5129, "step": 1454 }, { "epoch": 0.09940561590489855, "grad_norm": 3.938974380493164, "learning_rate": 6.912260002383611e-06, "loss": 0.4942, "step": 1455 }, { "epoch": 0.09947393591582974, "grad_norm": 6.727536201477051, "learning_rate": 6.912087588346616e-06, "loss": 0.551, "step": 1456 }, { "epoch": 0.09954225592676094, "grad_norm": 5.254971504211426, "learning_rate": 6.911915007228741e-06, "loss": 0.4311, "step": 1457 }, { "epoch": 0.09961057593769215, "grad_norm": 3.320363759994507, "learning_rate": 6.911742259038439e-06, "loss": 0.4288, "step": 1458 }, { "epoch": 0.09967889594862335, "grad_norm": 7.56352424621582, "learning_rate": 6.911569343784169e-06, "loss": 0.3957, "step": 1459 }, { "epoch": 0.09974721595955455, "grad_norm": 5.395484924316406, "learning_rate": 6.911396261474396e-06, "loss": 0.4235, "step": 1460 }, { "epoch": 0.09981553597048576, "grad_norm": 5.332953929901123, "learning_rate": 6.9112230121175996e-06, "loss": 0.4846, "step": 1461 }, { "epoch": 0.09988385598141696, "grad_norm": 3.7938942909240723, "learning_rate": 6.911049595722258e-06, "loss": 0.4171, "step": 1462 }, { "epoch": 0.09995217599234815, "grad_norm": 5.68131685256958, "learning_rate": 6.910876012296866e-06, "loss": 0.544, "step": 1463 }, { "epoch": 0.10002049600327936, "grad_norm": 5.4892354011535645, "learning_rate": 6.910702261849926e-06, "loss": 0.4273, "step": 1464 }, { "epoch": 0.10008881601421056, "grad_norm": 3.656273365020752, "learning_rate": 6.9105283443899406e-06, "loss": 0.4314, "step": 1465 }, { "epoch": 0.10015713602514176, "grad_norm": 4.1816911697387695, "learning_rate": 6.91035425992543e-06, "loss": 0.4978, "step": 1466 }, { "epoch": 0.10022545603607297, "grad_norm": 4.512140274047852, "learning_rate": 6.910180008464917e-06, "loss": 0.4059, "step": 1467 }, { "epoch": 0.10029377604700417, "grad_norm": 4.168886661529541, "learning_rate": 6.910005590016935e-06, "loss": 0.479, "step": 1468 }, { "epoch": 0.10036209605793536, "grad_norm": 4.693017959594727, "learning_rate": 6.909831004590024e-06, "loss": 0.4843, "step": 1469 }, { "epoch": 0.10043041606886657, "grad_norm": 4.712803363800049, "learning_rate": 6.909656252192734e-06, "loss": 0.5728, "step": 1470 }, { "epoch": 0.10049873607979777, "grad_norm": 4.142809867858887, "learning_rate": 6.909481332833621e-06, "loss": 0.3715, "step": 1471 }, { "epoch": 0.10056705609072897, "grad_norm": 3.9913978576660156, "learning_rate": 6.9093062465212514e-06, "loss": 0.4713, "step": 1472 }, { "epoch": 0.10063537610166018, "grad_norm": 4.924096584320068, "learning_rate": 6.909130993264199e-06, "loss": 0.594, "step": 1473 }, { "epoch": 0.10070369611259138, "grad_norm": 4.510251522064209, "learning_rate": 6.908955573071044e-06, "loss": 0.4683, "step": 1474 }, { "epoch": 0.10077201612352257, "grad_norm": 4.376286029815674, "learning_rate": 6.908779985950377e-06, "loss": 0.407, "step": 1475 }, { "epoch": 0.10084033613445378, "grad_norm": 4.971362113952637, "learning_rate": 6.908604231910796e-06, "loss": 0.4577, "step": 1476 }, { "epoch": 0.10090865614538498, "grad_norm": 4.909231662750244, "learning_rate": 6.908428310960907e-06, "loss": 0.4226, "step": 1477 }, { "epoch": 0.10097697615631618, "grad_norm": 4.539865493774414, "learning_rate": 6.908252223109325e-06, "loss": 0.4777, "step": 1478 }, { "epoch": 0.10104529616724739, "grad_norm": 4.335827350616455, "learning_rate": 6.908075968364673e-06, "loss": 0.4393, "step": 1479 }, { "epoch": 0.10111361617817859, "grad_norm": 3.7398011684417725, "learning_rate": 6.9078995467355806e-06, "loss": 0.3976, "step": 1480 }, { "epoch": 0.10118193618910978, "grad_norm": 5.036581039428711, "learning_rate": 6.907722958230686e-06, "loss": 0.3753, "step": 1481 }, { "epoch": 0.101250256200041, "grad_norm": 4.407632350921631, "learning_rate": 6.907546202858638e-06, "loss": 0.4297, "step": 1482 }, { "epoch": 0.10131857621097219, "grad_norm": 3.6623008251190186, "learning_rate": 6.9073692806280915e-06, "loss": 0.4335, "step": 1483 }, { "epoch": 0.10138689622190339, "grad_norm": 4.853094100952148, "learning_rate": 6.907192191547709e-06, "loss": 0.494, "step": 1484 }, { "epoch": 0.1014552162328346, "grad_norm": 4.77580451965332, "learning_rate": 6.907014935626163e-06, "loss": 0.4343, "step": 1485 }, { "epoch": 0.1015235362437658, "grad_norm": 5.259939670562744, "learning_rate": 6.906837512872133e-06, "loss": 0.4107, "step": 1486 }, { "epoch": 0.101591856254697, "grad_norm": 4.272934436798096, "learning_rate": 6.906659923294307e-06, "loss": 0.4825, "step": 1487 }, { "epoch": 0.1016601762656282, "grad_norm": 5.934340000152588, "learning_rate": 6.906482166901381e-06, "loss": 0.5757, "step": 1488 }, { "epoch": 0.1017284962765594, "grad_norm": 5.31971549987793, "learning_rate": 6.906304243702058e-06, "loss": 0.4478, "step": 1489 }, { "epoch": 0.1017968162874906, "grad_norm": 5.3900146484375, "learning_rate": 6.906126153705053e-06, "loss": 0.5164, "step": 1490 }, { "epoch": 0.10186513629842181, "grad_norm": 4.901994705200195, "learning_rate": 6.9059478969190844e-06, "loss": 0.5328, "step": 1491 }, { "epoch": 0.10193345630935301, "grad_norm": 5.246541976928711, "learning_rate": 6.905769473352882e-06, "loss": 0.3545, "step": 1492 }, { "epoch": 0.10200177632028422, "grad_norm": 4.9273762702941895, "learning_rate": 6.905590883015182e-06, "loss": 0.4534, "step": 1493 }, { "epoch": 0.10207009633121542, "grad_norm": 4.674345970153809, "learning_rate": 6.905412125914731e-06, "loss": 0.4892, "step": 1494 }, { "epoch": 0.10213841634214661, "grad_norm": 5.441760063171387, "learning_rate": 6.90523320206028e-06, "loss": 0.4492, "step": 1495 }, { "epoch": 0.10220673635307782, "grad_norm": 4.074987888336182, "learning_rate": 6.905054111460592e-06, "loss": 0.3962, "step": 1496 }, { "epoch": 0.10227505636400902, "grad_norm": 4.00773286819458, "learning_rate": 6.904874854124438e-06, "loss": 0.4142, "step": 1497 }, { "epoch": 0.10234337637494022, "grad_norm": 4.044902324676514, "learning_rate": 6.904695430060592e-06, "loss": 0.5246, "step": 1498 }, { "epoch": 0.10241169638587143, "grad_norm": 3.351255416870117, "learning_rate": 6.904515839277843e-06, "loss": 0.3655, "step": 1499 }, { "epoch": 0.10248001639680263, "grad_norm": 7.537123203277588, "learning_rate": 6.904336081784984e-06, "loss": 0.4632, "step": 1500 }, { "epoch": 0.10254833640773382, "grad_norm": 4.9327473640441895, "learning_rate": 6.9041561575908165e-06, "loss": 0.4378, "step": 1501 }, { "epoch": 0.10261665641866503, "grad_norm": 4.341201305389404, "learning_rate": 6.903976066704152e-06, "loss": 0.4707, "step": 1502 }, { "epoch": 0.10268497642959623, "grad_norm": 4.246030330657959, "learning_rate": 6.9037958091338086e-06, "loss": 0.3874, "step": 1503 }, { "epoch": 0.10275329644052743, "grad_norm": 4.275722026824951, "learning_rate": 6.903615384888614e-06, "loss": 0.4931, "step": 1504 }, { "epoch": 0.10282161645145864, "grad_norm": 3.445460557937622, "learning_rate": 6.903434793977402e-06, "loss": 0.397, "step": 1505 }, { "epoch": 0.10288993646238984, "grad_norm": 4.518238544464111, "learning_rate": 6.9032540364090154e-06, "loss": 0.4126, "step": 1506 }, { "epoch": 0.10295825647332103, "grad_norm": 5.909249782562256, "learning_rate": 6.903073112192307e-06, "loss": 0.4738, "step": 1507 }, { "epoch": 0.10302657648425224, "grad_norm": 4.363755702972412, "learning_rate": 6.902892021336134e-06, "loss": 0.5087, "step": 1508 }, { "epoch": 0.10309489649518344, "grad_norm": 5.789895057678223, "learning_rate": 6.902710763849365e-06, "loss": 0.4688, "step": 1509 }, { "epoch": 0.10316321650611464, "grad_norm": 4.980250358581543, "learning_rate": 6.902529339740876e-06, "loss": 0.5348, "step": 1510 }, { "epoch": 0.10323153651704585, "grad_norm": 4.497342109680176, "learning_rate": 6.9023477490195515e-06, "loss": 0.3961, "step": 1511 }, { "epoch": 0.10329985652797705, "grad_norm": 5.406769752502441, "learning_rate": 6.902165991694282e-06, "loss": 0.4133, "step": 1512 }, { "epoch": 0.10336817653890824, "grad_norm": 5.0202131271362305, "learning_rate": 6.901984067773969e-06, "loss": 0.4015, "step": 1513 }, { "epoch": 0.10343649654983945, "grad_norm": 5.444234371185303, "learning_rate": 6.901801977267518e-06, "loss": 0.4704, "step": 1514 }, { "epoch": 0.10350481656077065, "grad_norm": 4.719535827636719, "learning_rate": 6.90161972018385e-06, "loss": 0.3767, "step": 1515 }, { "epoch": 0.10357313657170185, "grad_norm": 5.5794196128845215, "learning_rate": 6.901437296531886e-06, "loss": 0.3792, "step": 1516 }, { "epoch": 0.10364145658263306, "grad_norm": 5.489114761352539, "learning_rate": 6.901254706320561e-06, "loss": 0.6275, "step": 1517 }, { "epoch": 0.10370977659356426, "grad_norm": 5.053063869476318, "learning_rate": 6.901071949558814e-06, "loss": 0.4427, "step": 1518 }, { "epoch": 0.10377809660449545, "grad_norm": 5.095762252807617, "learning_rate": 6.9008890262555965e-06, "loss": 0.4552, "step": 1519 }, { "epoch": 0.10384641661542666, "grad_norm": 5.491945266723633, "learning_rate": 6.9007059364198634e-06, "loss": 0.4906, "step": 1520 }, { "epoch": 0.10391473662635786, "grad_norm": 4.89140510559082, "learning_rate": 6.900522680060582e-06, "loss": 0.4412, "step": 1521 }, { "epoch": 0.10398305663728906, "grad_norm": 6.979658603668213, "learning_rate": 6.900339257186724e-06, "loss": 0.3974, "step": 1522 }, { "epoch": 0.10405137664822027, "grad_norm": 4.795107841491699, "learning_rate": 6.900155667807273e-06, "loss": 0.4237, "step": 1523 }, { "epoch": 0.10411969665915147, "grad_norm": 4.1038665771484375, "learning_rate": 6.899971911931218e-06, "loss": 0.4582, "step": 1524 }, { "epoch": 0.10418801667008266, "grad_norm": 3.733682870864868, "learning_rate": 6.899787989567557e-06, "loss": 0.3091, "step": 1525 }, { "epoch": 0.10425633668101388, "grad_norm": 5.351358413696289, "learning_rate": 6.899603900725296e-06, "loss": 0.4108, "step": 1526 }, { "epoch": 0.10432465669194507, "grad_norm": 5.464968204498291, "learning_rate": 6.89941964541345e-06, "loss": 0.3994, "step": 1527 }, { "epoch": 0.10439297670287627, "grad_norm": 4.7507758140563965, "learning_rate": 6.899235223641041e-06, "loss": 0.4073, "step": 1528 }, { "epoch": 0.10446129671380748, "grad_norm": 5.61574649810791, "learning_rate": 6.8990506354171e-06, "loss": 0.4369, "step": 1529 }, { "epoch": 0.10452961672473868, "grad_norm": 5.066982746124268, "learning_rate": 6.898865880750664e-06, "loss": 0.47, "step": 1530 }, { "epoch": 0.10459793673566987, "grad_norm": 4.197581768035889, "learning_rate": 6.898680959650784e-06, "loss": 0.4342, "step": 1531 }, { "epoch": 0.10466625674660109, "grad_norm": 6.357797145843506, "learning_rate": 6.898495872126511e-06, "loss": 0.5322, "step": 1532 }, { "epoch": 0.10473457675753228, "grad_norm": 4.835041046142578, "learning_rate": 6.89831061818691e-06, "loss": 0.4302, "step": 1533 }, { "epoch": 0.10480289676846348, "grad_norm": 4.404298782348633, "learning_rate": 6.898125197841054e-06, "loss": 0.3717, "step": 1534 }, { "epoch": 0.10487121677939469, "grad_norm": 3.850142478942871, "learning_rate": 6.897939611098018e-06, "loss": 0.3833, "step": 1535 }, { "epoch": 0.10493953679032589, "grad_norm": 5.2773118019104, "learning_rate": 6.897753857966895e-06, "loss": 0.5143, "step": 1536 }, { "epoch": 0.10500785680125709, "grad_norm": 4.609970569610596, "learning_rate": 6.897567938456776e-06, "loss": 0.3346, "step": 1537 }, { "epoch": 0.1050761768121883, "grad_norm": 3.4807491302490234, "learning_rate": 6.8973818525767685e-06, "loss": 0.3792, "step": 1538 }, { "epoch": 0.1051444968231195, "grad_norm": 4.171069622039795, "learning_rate": 6.897195600335984e-06, "loss": 0.498, "step": 1539 }, { "epoch": 0.10521281683405069, "grad_norm": 5.833878517150879, "learning_rate": 6.897009181743541e-06, "loss": 0.4855, "step": 1540 }, { "epoch": 0.1052811368449819, "grad_norm": 6.310886859893799, "learning_rate": 6.89682259680857e-06, "loss": 0.4705, "step": 1541 }, { "epoch": 0.1053494568559131, "grad_norm": 4.259115219116211, "learning_rate": 6.896635845540207e-06, "loss": 0.4678, "step": 1542 }, { "epoch": 0.1054177768668443, "grad_norm": 4.45581579208374, "learning_rate": 6.896448927947595e-06, "loss": 0.4628, "step": 1543 }, { "epoch": 0.1054860968777755, "grad_norm": 4.800694465637207, "learning_rate": 6.896261844039889e-06, "loss": 0.4473, "step": 1544 }, { "epoch": 0.1055544168887067, "grad_norm": 6.565711498260498, "learning_rate": 6.896074593826249e-06, "loss": 0.4983, "step": 1545 }, { "epoch": 0.1056227368996379, "grad_norm": 5.114604473114014, "learning_rate": 6.895887177315845e-06, "loss": 0.4606, "step": 1546 }, { "epoch": 0.10569105691056911, "grad_norm": 5.906209945678711, "learning_rate": 6.895699594517854e-06, "loss": 0.5388, "step": 1547 }, { "epoch": 0.10575937692150031, "grad_norm": 4.544822692871094, "learning_rate": 6.895511845441461e-06, "loss": 0.478, "step": 1548 }, { "epoch": 0.1058276969324315, "grad_norm": 4.496562480926514, "learning_rate": 6.895323930095859e-06, "loss": 0.5031, "step": 1549 }, { "epoch": 0.10589601694336272, "grad_norm": 3.36051869392395, "learning_rate": 6.895135848490251e-06, "loss": 0.4307, "step": 1550 }, { "epoch": 0.10596433695429391, "grad_norm": 5.980930328369141, "learning_rate": 6.8949476006338455e-06, "loss": 0.3142, "step": 1551 }, { "epoch": 0.10603265696522511, "grad_norm": 4.9192280769348145, "learning_rate": 6.894759186535862e-06, "loss": 0.4828, "step": 1552 }, { "epoch": 0.10610097697615632, "grad_norm": 5.4362874031066895, "learning_rate": 6.894570606205526e-06, "loss": 0.529, "step": 1553 }, { "epoch": 0.10616929698708752, "grad_norm": 5.825161933898926, "learning_rate": 6.894381859652071e-06, "loss": 0.4642, "step": 1554 }, { "epoch": 0.10623761699801872, "grad_norm": 6.963778018951416, "learning_rate": 6.894192946884741e-06, "loss": 0.5266, "step": 1555 }, { "epoch": 0.10630593700894993, "grad_norm": 5.824758529663086, "learning_rate": 6.894003867912786e-06, "loss": 0.4883, "step": 1556 }, { "epoch": 0.10637425701988112, "grad_norm": 4.251987457275391, "learning_rate": 6.893814622745463e-06, "loss": 0.4577, "step": 1557 }, { "epoch": 0.10644257703081232, "grad_norm": 5.846950054168701, "learning_rate": 6.893625211392041e-06, "loss": 0.4673, "step": 1558 }, { "epoch": 0.10651089704174353, "grad_norm": 4.828357219696045, "learning_rate": 6.893435633861794e-06, "loss": 0.5141, "step": 1559 }, { "epoch": 0.10657921705267473, "grad_norm": 5.539738178253174, "learning_rate": 6.893245890164006e-06, "loss": 0.5028, "step": 1560 }, { "epoch": 0.10664753706360593, "grad_norm": 4.3930583000183105, "learning_rate": 6.893055980307967e-06, "loss": 0.5259, "step": 1561 }, { "epoch": 0.10671585707453714, "grad_norm": 4.372837066650391, "learning_rate": 6.892865904302977e-06, "loss": 0.4905, "step": 1562 }, { "epoch": 0.10678417708546833, "grad_norm": 6.607868194580078, "learning_rate": 6.892675662158343e-06, "loss": 0.5095, "step": 1563 }, { "epoch": 0.10685249709639953, "grad_norm": 6.064873695373535, "learning_rate": 6.892485253883381e-06, "loss": 0.4044, "step": 1564 }, { "epoch": 0.10692081710733074, "grad_norm": 4.266542434692383, "learning_rate": 6.892294679487416e-06, "loss": 0.4219, "step": 1565 }, { "epoch": 0.10698913711826194, "grad_norm": 4.213958740234375, "learning_rate": 6.892103938979778e-06, "loss": 0.3969, "step": 1566 }, { "epoch": 0.10705745712919314, "grad_norm": 4.977325439453125, "learning_rate": 6.891913032369808e-06, "loss": 0.3331, "step": 1567 }, { "epoch": 0.10712577714012435, "grad_norm": 5.549046516418457, "learning_rate": 6.891721959666854e-06, "loss": 0.592, "step": 1568 }, { "epoch": 0.10719409715105555, "grad_norm": 5.943163871765137, "learning_rate": 6.891530720880273e-06, "loss": 0.4153, "step": 1569 }, { "epoch": 0.10726241716198674, "grad_norm": 4.1797404289245605, "learning_rate": 6.891339316019428e-06, "loss": 0.4256, "step": 1570 }, { "epoch": 0.10733073717291795, "grad_norm": 4.546952247619629, "learning_rate": 6.891147745093693e-06, "loss": 0.4394, "step": 1571 }, { "epoch": 0.10739905718384915, "grad_norm": 4.305552959442139, "learning_rate": 6.890956008112447e-06, "loss": 0.3839, "step": 1572 }, { "epoch": 0.10746737719478035, "grad_norm": 4.667248249053955, "learning_rate": 6.890764105085081e-06, "loss": 0.509, "step": 1573 }, { "epoch": 0.10753569720571156, "grad_norm": 4.356881141662598, "learning_rate": 6.890572036020991e-06, "loss": 0.3985, "step": 1574 }, { "epoch": 0.10760401721664276, "grad_norm": 4.803138732910156, "learning_rate": 6.890379800929581e-06, "loss": 0.4113, "step": 1575 }, { "epoch": 0.10767233722757395, "grad_norm": 6.033741474151611, "learning_rate": 6.890187399820267e-06, "loss": 0.3985, "step": 1576 }, { "epoch": 0.10774065723850516, "grad_norm": 5.097756385803223, "learning_rate": 6.889994832702467e-06, "loss": 0.5198, "step": 1577 }, { "epoch": 0.10780897724943636, "grad_norm": 4.55687952041626, "learning_rate": 6.889802099585615e-06, "loss": 0.4861, "step": 1578 }, { "epoch": 0.10787729726036756, "grad_norm": 4.151483058929443, "learning_rate": 6.889609200479143e-06, "loss": 0.4257, "step": 1579 }, { "epoch": 0.10794561727129877, "grad_norm": 4.535404205322266, "learning_rate": 6.889416135392501e-06, "loss": 0.4355, "step": 1580 }, { "epoch": 0.10801393728222997, "grad_norm": 3.9688491821289062, "learning_rate": 6.8892229043351406e-06, "loss": 0.419, "step": 1581 }, { "epoch": 0.10808225729316116, "grad_norm": 5.4328765869140625, "learning_rate": 6.889029507316525e-06, "loss": 0.4172, "step": 1582 }, { "epoch": 0.10815057730409237, "grad_norm": 5.739156246185303, "learning_rate": 6.888835944346123e-06, "loss": 0.5186, "step": 1583 }, { "epoch": 0.10821889731502357, "grad_norm": 3.7919905185699463, "learning_rate": 6.888642215433416e-06, "loss": 0.4232, "step": 1584 }, { "epoch": 0.10828721732595477, "grad_norm": 3.734800338745117, "learning_rate": 6.888448320587887e-06, "loss": 0.3697, "step": 1585 }, { "epoch": 0.10835553733688598, "grad_norm": 3.502906322479248, "learning_rate": 6.888254259819032e-06, "loss": 0.4946, "step": 1586 }, { "epoch": 0.10842385734781718, "grad_norm": 5.556142807006836, "learning_rate": 6.888060033136354e-06, "loss": 0.3884, "step": 1587 }, { "epoch": 0.10849217735874837, "grad_norm": 4.653379440307617, "learning_rate": 6.887865640549363e-06, "loss": 0.4033, "step": 1588 }, { "epoch": 0.10856049736967958, "grad_norm": 5.288240432739258, "learning_rate": 6.887671082067577e-06, "loss": 0.4594, "step": 1589 }, { "epoch": 0.10862881738061078, "grad_norm": 4.0893964767456055, "learning_rate": 6.887476357700525e-06, "loss": 0.4776, "step": 1590 }, { "epoch": 0.10869713739154198, "grad_norm": 4.378586292266846, "learning_rate": 6.887281467457742e-06, "loss": 0.4631, "step": 1591 }, { "epoch": 0.10876545740247319, "grad_norm": 5.239903450012207, "learning_rate": 6.887086411348769e-06, "loss": 0.374, "step": 1592 }, { "epoch": 0.10883377741340439, "grad_norm": 3.563424587249756, "learning_rate": 6.886891189383159e-06, "loss": 0.4486, "step": 1593 }, { "epoch": 0.10890209742433558, "grad_norm": 5.904158115386963, "learning_rate": 6.8866958015704724e-06, "loss": 0.5583, "step": 1594 }, { "epoch": 0.1089704174352668, "grad_norm": 6.912940979003906, "learning_rate": 6.886500247920276e-06, "loss": 0.4864, "step": 1595 }, { "epoch": 0.10903873744619799, "grad_norm": 4.94856071472168, "learning_rate": 6.886304528442144e-06, "loss": 0.4739, "step": 1596 }, { "epoch": 0.10910705745712919, "grad_norm": 3.301316976547241, "learning_rate": 6.886108643145663e-06, "loss": 0.4634, "step": 1597 }, { "epoch": 0.1091753774680604, "grad_norm": 4.380321025848389, "learning_rate": 6.8859125920404225e-06, "loss": 0.5397, "step": 1598 }, { "epoch": 0.1092436974789916, "grad_norm": 5.4363484382629395, "learning_rate": 6.885716375136023e-06, "loss": 0.4029, "step": 1599 }, { "epoch": 0.1093120174899228, "grad_norm": 3.997394561767578, "learning_rate": 6.885519992442076e-06, "loss": 0.4784, "step": 1600 }, { "epoch": 0.109380337500854, "grad_norm": 4.503654479980469, "learning_rate": 6.885323443968193e-06, "loss": 0.4637, "step": 1601 }, { "epoch": 0.1094486575117852, "grad_norm": 6.768045902252197, "learning_rate": 6.885126729724001e-06, "loss": 0.4153, "step": 1602 }, { "epoch": 0.1095169775227164, "grad_norm": 4.380611419677734, "learning_rate": 6.884929849719133e-06, "loss": 0.4217, "step": 1603 }, { "epoch": 0.10958529753364761, "grad_norm": 3.9967191219329834, "learning_rate": 6.884732803963229e-06, "loss": 0.4424, "step": 1604 }, { "epoch": 0.10965361754457881, "grad_norm": 5.593958377838135, "learning_rate": 6.884535592465938e-06, "loss": 0.4787, "step": 1605 }, { "epoch": 0.10972193755551, "grad_norm": 5.051266670227051, "learning_rate": 6.8843382152369165e-06, "loss": 0.46, "step": 1606 }, { "epoch": 0.10979025756644122, "grad_norm": 4.123932838439941, "learning_rate": 6.884140672285829e-06, "loss": 0.4279, "step": 1607 }, { "epoch": 0.10985857757737241, "grad_norm": 4.371647357940674, "learning_rate": 6.88394296362235e-06, "loss": 0.4818, "step": 1608 }, { "epoch": 0.10992689758830361, "grad_norm": 4.891842365264893, "learning_rate": 6.883745089256161e-06, "loss": 0.4389, "step": 1609 }, { "epoch": 0.10999521759923482, "grad_norm": 5.820152282714844, "learning_rate": 6.883547049196949e-06, "loss": 0.521, "step": 1610 }, { "epoch": 0.11006353761016602, "grad_norm": 5.047607898712158, "learning_rate": 6.883348843454414e-06, "loss": 0.4866, "step": 1611 }, { "epoch": 0.11013185762109721, "grad_norm": 5.464288711547852, "learning_rate": 6.88315047203826e-06, "loss": 0.3661, "step": 1612 }, { "epoch": 0.11020017763202843, "grad_norm": 4.5057597160339355, "learning_rate": 6.882951934958201e-06, "loss": 0.4523, "step": 1613 }, { "epoch": 0.11026849764295962, "grad_norm": 4.393147945404053, "learning_rate": 6.88275323222396e-06, "loss": 0.3882, "step": 1614 }, { "epoch": 0.11033681765389082, "grad_norm": 4.632749080657959, "learning_rate": 6.882554363845267e-06, "loss": 0.4714, "step": 1615 }, { "epoch": 0.11040513766482203, "grad_norm": 4.124814510345459, "learning_rate": 6.882355329831859e-06, "loss": 0.3834, "step": 1616 }, { "epoch": 0.11047345767575323, "grad_norm": 4.127981662750244, "learning_rate": 6.882156130193481e-06, "loss": 0.4659, "step": 1617 }, { "epoch": 0.11054177768668443, "grad_norm": 3.855597734451294, "learning_rate": 6.8819567649398896e-06, "loss": 0.4671, "step": 1618 }, { "epoch": 0.11061009769761564, "grad_norm": 5.287643909454346, "learning_rate": 6.881757234080846e-06, "loss": 0.5309, "step": 1619 }, { "epoch": 0.11067841770854683, "grad_norm": 6.193994522094727, "learning_rate": 6.881557537626121e-06, "loss": 0.5039, "step": 1620 }, { "epoch": 0.11074673771947803, "grad_norm": 4.325383186340332, "learning_rate": 6.8813576755854935e-06, "loss": 0.4714, "step": 1621 }, { "epoch": 0.11081505773040924, "grad_norm": 5.454934597015381, "learning_rate": 6.881157647968749e-06, "loss": 0.5104, "step": 1622 }, { "epoch": 0.11088337774134044, "grad_norm": 4.453930377960205, "learning_rate": 6.880957454785684e-06, "loss": 0.5042, "step": 1623 }, { "epoch": 0.11095169775227164, "grad_norm": 4.785533428192139, "learning_rate": 6.8807570960461e-06, "loss": 0.4826, "step": 1624 }, { "epoch": 0.11102001776320285, "grad_norm": 5.247739791870117, "learning_rate": 6.88055657175981e-06, "loss": 0.3977, "step": 1625 }, { "epoch": 0.11108833777413404, "grad_norm": 5.639232635498047, "learning_rate": 6.88035588193663e-06, "loss": 0.6005, "step": 1626 }, { "epoch": 0.11115665778506524, "grad_norm": 5.766960144042969, "learning_rate": 6.880155026586391e-06, "loss": 0.4736, "step": 1627 }, { "epoch": 0.11122497779599645, "grad_norm": 4.725334167480469, "learning_rate": 6.879954005718925e-06, "loss": 0.4847, "step": 1628 }, { "epoch": 0.11129329780692765, "grad_norm": 5.063401222229004, "learning_rate": 6.879752819344077e-06, "loss": 0.438, "step": 1629 }, { "epoch": 0.11136161781785885, "grad_norm": 4.618640422821045, "learning_rate": 6.8795514674716995e-06, "loss": 0.5016, "step": 1630 }, { "epoch": 0.11142993782879006, "grad_norm": 5.425630569458008, "learning_rate": 6.87934995011165e-06, "loss": 0.4956, "step": 1631 }, { "epoch": 0.11149825783972125, "grad_norm": 5.0140485763549805, "learning_rate": 6.879148267273797e-06, "loss": 0.4831, "step": 1632 }, { "epoch": 0.11156657785065245, "grad_norm": 5.200329303741455, "learning_rate": 6.878946418968018e-06, "loss": 0.4742, "step": 1633 }, { "epoch": 0.11163489786158366, "grad_norm": 5.564296722412109, "learning_rate": 6.878744405204195e-06, "loss": 0.5078, "step": 1634 }, { "epoch": 0.11170321787251486, "grad_norm": 5.128294944763184, "learning_rate": 6.878542225992221e-06, "loss": 0.4715, "step": 1635 }, { "epoch": 0.11177153788344606, "grad_norm": 3.6643245220184326, "learning_rate": 6.878339881341995e-06, "loss": 0.3579, "step": 1636 }, { "epoch": 0.11183985789437727, "grad_norm": 6.296716690063477, "learning_rate": 6.878137371263428e-06, "loss": 0.4418, "step": 1637 }, { "epoch": 0.11190817790530846, "grad_norm": 3.987868547439575, "learning_rate": 6.8779346957664335e-06, "loss": 0.453, "step": 1638 }, { "epoch": 0.11197649791623966, "grad_norm": 5.229997634887695, "learning_rate": 6.877731854860937e-06, "loss": 0.5521, "step": 1639 }, { "epoch": 0.11204481792717087, "grad_norm": 4.072739601135254, "learning_rate": 6.877528848556872e-06, "loss": 0.5306, "step": 1640 }, { "epoch": 0.11211313793810207, "grad_norm": 4.209604263305664, "learning_rate": 6.8773256768641775e-06, "loss": 0.4415, "step": 1641 }, { "epoch": 0.11218145794903327, "grad_norm": 4.120842933654785, "learning_rate": 6.877122339792804e-06, "loss": 0.4106, "step": 1642 }, { "epoch": 0.11224977795996448, "grad_norm": 3.7910280227661133, "learning_rate": 6.876918837352707e-06, "loss": 0.4422, "step": 1643 }, { "epoch": 0.11231809797089568, "grad_norm": 4.842207908630371, "learning_rate": 6.876715169553852e-06, "loss": 0.4829, "step": 1644 }, { "epoch": 0.11238641798182687, "grad_norm": 4.0703277587890625, "learning_rate": 6.876511336406212e-06, "loss": 0.4706, "step": 1645 }, { "epoch": 0.11245473799275808, "grad_norm": 7.602311134338379, "learning_rate": 6.876307337919769e-06, "loss": 0.5734, "step": 1646 }, { "epoch": 0.11252305800368928, "grad_norm": 4.236806869506836, "learning_rate": 6.876103174104511e-06, "loss": 0.4495, "step": 1647 }, { "epoch": 0.11259137801462048, "grad_norm": 3.9661290645599365, "learning_rate": 6.875898844970436e-06, "loss": 0.3673, "step": 1648 }, { "epoch": 0.11265969802555169, "grad_norm": 4.353143215179443, "learning_rate": 6.875694350527549e-06, "loss": 0.446, "step": 1649 }, { "epoch": 0.11272801803648289, "grad_norm": 5.179260730743408, "learning_rate": 6.875489690785865e-06, "loss": 0.3995, "step": 1650 }, { "epoch": 0.11279633804741408, "grad_norm": 5.515905380249023, "learning_rate": 6.875284865755402e-06, "loss": 0.36, "step": 1651 }, { "epoch": 0.1128646580583453, "grad_norm": 5.168540000915527, "learning_rate": 6.875079875446195e-06, "loss": 0.5824, "step": 1652 }, { "epoch": 0.11293297806927649, "grad_norm": 6.031967639923096, "learning_rate": 6.874874719868278e-06, "loss": 0.4741, "step": 1653 }, { "epoch": 0.11300129808020769, "grad_norm": 3.7792141437530518, "learning_rate": 6.874669399031697e-06, "loss": 0.4599, "step": 1654 }, { "epoch": 0.1130696180911389, "grad_norm": 4.391394138336182, "learning_rate": 6.874463912946508e-06, "loss": 0.5014, "step": 1655 }, { "epoch": 0.1131379381020701, "grad_norm": 5.709780693054199, "learning_rate": 6.874258261622772e-06, "loss": 0.4692, "step": 1656 }, { "epoch": 0.11320625811300129, "grad_norm": 6.6905083656311035, "learning_rate": 6.87405244507056e-06, "loss": 0.541, "step": 1657 }, { "epoch": 0.1132745781239325, "grad_norm": 5.200557708740234, "learning_rate": 6.8738464632999494e-06, "loss": 0.4437, "step": 1658 }, { "epoch": 0.1133428981348637, "grad_norm": 3.8782689571380615, "learning_rate": 6.873640316321026e-06, "loss": 0.3658, "step": 1659 }, { "epoch": 0.1134112181457949, "grad_norm": 4.8588666915893555, "learning_rate": 6.873434004143886e-06, "loss": 0.4251, "step": 1660 }, { "epoch": 0.11347953815672611, "grad_norm": 4.515092372894287, "learning_rate": 6.873227526778631e-06, "loss": 0.5293, "step": 1661 }, { "epoch": 0.1135478581676573, "grad_norm": 4.216739654541016, "learning_rate": 6.873020884235371e-06, "loss": 0.4339, "step": 1662 }, { "epoch": 0.1136161781785885, "grad_norm": 5.133565902709961, "learning_rate": 6.872814076524226e-06, "loss": 0.4263, "step": 1663 }, { "epoch": 0.11368449818951971, "grad_norm": 4.5430426597595215, "learning_rate": 6.872607103655322e-06, "loss": 0.4867, "step": 1664 }, { "epoch": 0.11375281820045091, "grad_norm": 5.911352634429932, "learning_rate": 6.872399965638794e-06, "loss": 0.4732, "step": 1665 }, { "epoch": 0.11382113821138211, "grad_norm": 3.9669549465179443, "learning_rate": 6.872192662484786e-06, "loss": 0.4165, "step": 1666 }, { "epoch": 0.11388945822231332, "grad_norm": 3.611751079559326, "learning_rate": 6.871985194203448e-06, "loss": 0.4765, "step": 1667 }, { "epoch": 0.11395777823324452, "grad_norm": 4.494018077850342, "learning_rate": 6.871777560804939e-06, "loss": 0.4898, "step": 1668 }, { "epoch": 0.11402609824417571, "grad_norm": 4.941554546356201, "learning_rate": 6.871569762299426e-06, "loss": 0.4204, "step": 1669 }, { "epoch": 0.11409441825510692, "grad_norm": 4.562984466552734, "learning_rate": 6.871361798697086e-06, "loss": 0.4066, "step": 1670 }, { "epoch": 0.11416273826603812, "grad_norm": 5.912262916564941, "learning_rate": 6.871153670008102e-06, "loss": 0.4284, "step": 1671 }, { "epoch": 0.11423105827696932, "grad_norm": 4.164581775665283, "learning_rate": 6.870945376242664e-06, "loss": 0.4471, "step": 1672 }, { "epoch": 0.11429937828790053, "grad_norm": 3.8949413299560547, "learning_rate": 6.870736917410973e-06, "loss": 0.4146, "step": 1673 }, { "epoch": 0.11436769829883173, "grad_norm": 5.278111457824707, "learning_rate": 6.870528293523236e-06, "loss": 0.5374, "step": 1674 }, { "epoch": 0.11443601830976292, "grad_norm": 5.057509899139404, "learning_rate": 6.8703195045896685e-06, "loss": 0.5046, "step": 1675 }, { "epoch": 0.11450433832069414, "grad_norm": 4.485426425933838, "learning_rate": 6.870110550620495e-06, "loss": 0.5035, "step": 1676 }, { "epoch": 0.11457265833162533, "grad_norm": 4.094045639038086, "learning_rate": 6.869901431625948e-06, "loss": 0.4156, "step": 1677 }, { "epoch": 0.11464097834255653, "grad_norm": 4.632134437561035, "learning_rate": 6.8696921476162665e-06, "loss": 0.4572, "step": 1678 }, { "epoch": 0.11470929835348774, "grad_norm": 4.846744060516357, "learning_rate": 6.869482698601699e-06, "loss": 0.539, "step": 1679 }, { "epoch": 0.11477761836441894, "grad_norm": 4.093472480773926, "learning_rate": 6.869273084592501e-06, "loss": 0.341, "step": 1680 }, { "epoch": 0.11484593837535013, "grad_norm": 4.874910354614258, "learning_rate": 6.869063305598937e-06, "loss": 0.5353, "step": 1681 }, { "epoch": 0.11491425838628135, "grad_norm": 4.62385892868042, "learning_rate": 6.86885336163128e-06, "loss": 0.3909, "step": 1682 }, { "epoch": 0.11498257839721254, "grad_norm": 4.532124042510986, "learning_rate": 6.86864325269981e-06, "loss": 0.3069, "step": 1683 }, { "epoch": 0.11505089840814374, "grad_norm": 5.0451202392578125, "learning_rate": 6.868432978814815e-06, "loss": 0.5607, "step": 1684 }, { "epoch": 0.11511921841907495, "grad_norm": 4.673802852630615, "learning_rate": 6.868222539986592e-06, "loss": 0.4689, "step": 1685 }, { "epoch": 0.11518753843000615, "grad_norm": 5.193235874176025, "learning_rate": 6.8680119362254465e-06, "loss": 0.4784, "step": 1686 }, { "epoch": 0.11525585844093734, "grad_norm": 3.8314530849456787, "learning_rate": 6.8678011675416905e-06, "loss": 0.3401, "step": 1687 }, { "epoch": 0.11532417845186856, "grad_norm": 5.377197742462158, "learning_rate": 6.867590233945643e-06, "loss": 0.4975, "step": 1688 }, { "epoch": 0.11539249846279975, "grad_norm": 6.318967819213867, "learning_rate": 6.8673791354476365e-06, "loss": 0.5181, "step": 1689 }, { "epoch": 0.11546081847373095, "grad_norm": 5.04278564453125, "learning_rate": 6.867167872058004e-06, "loss": 0.4598, "step": 1690 }, { "epoch": 0.11552913848466216, "grad_norm": 4.885934829711914, "learning_rate": 6.866956443787094e-06, "loss": 0.4376, "step": 1691 }, { "epoch": 0.11559745849559336, "grad_norm": 4.64083194732666, "learning_rate": 6.8667448506452575e-06, "loss": 0.4551, "step": 1692 }, { "epoch": 0.11566577850652456, "grad_norm": 5.313033580780029, "learning_rate": 6.866533092642855e-06, "loss": 0.4957, "step": 1693 }, { "epoch": 0.11573409851745577, "grad_norm": 4.641627311706543, "learning_rate": 6.866321169790258e-06, "loss": 0.4705, "step": 1694 }, { "epoch": 0.11580241852838696, "grad_norm": 4.6883158683776855, "learning_rate": 6.866109082097843e-06, "loss": 0.3962, "step": 1695 }, { "epoch": 0.11587073853931816, "grad_norm": 3.159397840499878, "learning_rate": 6.865896829575994e-06, "loss": 0.4098, "step": 1696 }, { "epoch": 0.11593905855024937, "grad_norm": 7.096440315246582, "learning_rate": 6.865684412235106e-06, "loss": 0.6251, "step": 1697 }, { "epoch": 0.11600737856118057, "grad_norm": 4.558440208435059, "learning_rate": 6.86547183008558e-06, "loss": 0.3907, "step": 1698 }, { "epoch": 0.11607569857211177, "grad_norm": 5.390957832336426, "learning_rate": 6.865259083137825e-06, "loss": 0.4941, "step": 1699 }, { "epoch": 0.11614401858304298, "grad_norm": 5.833737373352051, "learning_rate": 6.86504617140226e-06, "loss": 0.602, "step": 1700 }, { "epoch": 0.11621233859397417, "grad_norm": 4.810632705688477, "learning_rate": 6.864833094889311e-06, "loss": 0.4455, "step": 1701 }, { "epoch": 0.11628065860490537, "grad_norm": 5.741375923156738, "learning_rate": 6.864619853609408e-06, "loss": 0.4568, "step": 1702 }, { "epoch": 0.11634897861583658, "grad_norm": 4.670109272003174, "learning_rate": 6.864406447572998e-06, "loss": 0.4759, "step": 1703 }, { "epoch": 0.11641729862676778, "grad_norm": 4.951505661010742, "learning_rate": 6.864192876790528e-06, "loss": 0.5216, "step": 1704 }, { "epoch": 0.11648561863769898, "grad_norm": 4.590980529785156, "learning_rate": 6.863979141272456e-06, "loss": 0.4929, "step": 1705 }, { "epoch": 0.11655393864863019, "grad_norm": 7.971776008605957, "learning_rate": 6.863765241029249e-06, "loss": 0.4886, "step": 1706 }, { "epoch": 0.11662225865956138, "grad_norm": 4.275908946990967, "learning_rate": 6.863551176071381e-06, "loss": 0.4365, "step": 1707 }, { "epoch": 0.11669057867049258, "grad_norm": 4.3525238037109375, "learning_rate": 6.863336946409333e-06, "loss": 0.5229, "step": 1708 }, { "epoch": 0.11675889868142379, "grad_norm": 4.836097240447998, "learning_rate": 6.863122552053596e-06, "loss": 0.4019, "step": 1709 }, { "epoch": 0.11682721869235499, "grad_norm": 3.4484829902648926, "learning_rate": 6.86290799301467e-06, "loss": 0.3862, "step": 1710 }, { "epoch": 0.11689553870328619, "grad_norm": 4.820058345794678, "learning_rate": 6.862693269303059e-06, "loss": 0.5754, "step": 1711 }, { "epoch": 0.1169638587142174, "grad_norm": 5.185234546661377, "learning_rate": 6.862478380929278e-06, "loss": 0.5756, "step": 1712 }, { "epoch": 0.1170321787251486, "grad_norm": 5.154475212097168, "learning_rate": 6.862263327903851e-06, "loss": 0.4208, "step": 1713 }, { "epoch": 0.11710049873607979, "grad_norm": 5.186435699462891, "learning_rate": 6.862048110237306e-06, "loss": 0.4825, "step": 1714 }, { "epoch": 0.117168818747011, "grad_norm": 4.099624156951904, "learning_rate": 6.8618327279401845e-06, "loss": 0.4209, "step": 1715 }, { "epoch": 0.1172371387579422, "grad_norm": 4.838374137878418, "learning_rate": 6.861617181023031e-06, "loss": 0.5666, "step": 1716 }, { "epoch": 0.1173054587688734, "grad_norm": 3.1433815956115723, "learning_rate": 6.861401469496401e-06, "loss": 0.4025, "step": 1717 }, { "epoch": 0.11737377877980461, "grad_norm": 5.947017192840576, "learning_rate": 6.8611855933708575e-06, "loss": 0.5264, "step": 1718 }, { "epoch": 0.1174420987907358, "grad_norm": 5.160083293914795, "learning_rate": 6.860969552656971e-06, "loss": 0.4234, "step": 1719 }, { "epoch": 0.117510418801667, "grad_norm": 4.427141189575195, "learning_rate": 6.860753347365321e-06, "loss": 0.4178, "step": 1720 }, { "epoch": 0.11757873881259821, "grad_norm": 3.6935906410217285, "learning_rate": 6.860536977506494e-06, "loss": 0.356, "step": 1721 }, { "epoch": 0.11764705882352941, "grad_norm": 4.611929416656494, "learning_rate": 6.860320443091086e-06, "loss": 0.4929, "step": 1722 }, { "epoch": 0.11771537883446061, "grad_norm": 4.979726791381836, "learning_rate": 6.8601037441296975e-06, "loss": 0.5015, "step": 1723 }, { "epoch": 0.11778369884539182, "grad_norm": 5.167472839355469, "learning_rate": 6.859886880632944e-06, "loss": 0.4807, "step": 1724 }, { "epoch": 0.11785201885632302, "grad_norm": 3.845442295074463, "learning_rate": 6.85966985261144e-06, "loss": 0.3676, "step": 1725 }, { "epoch": 0.11792033886725421, "grad_norm": 3.3450543880462646, "learning_rate": 6.859452660075816e-06, "loss": 0.4636, "step": 1726 }, { "epoch": 0.11798865887818542, "grad_norm": 4.611940860748291, "learning_rate": 6.859235303036707e-06, "loss": 0.4815, "step": 1727 }, { "epoch": 0.11805697888911662, "grad_norm": 4.427138805389404, "learning_rate": 6.8590177815047545e-06, "loss": 0.4416, "step": 1728 }, { "epoch": 0.11812529890004782, "grad_norm": 3.5902812480926514, "learning_rate": 6.858800095490612e-06, "loss": 0.4798, "step": 1729 }, { "epoch": 0.11819361891097903, "grad_norm": 4.735126972198486, "learning_rate": 6.858582245004938e-06, "loss": 0.6419, "step": 1730 }, { "epoch": 0.11826193892191023, "grad_norm": 4.5069403648376465, "learning_rate": 6.8583642300584e-06, "loss": 0.4079, "step": 1731 }, { "epoch": 0.11833025893284142, "grad_norm": 6.784451007843018, "learning_rate": 6.858146050661675e-06, "loss": 0.4611, "step": 1732 }, { "epoch": 0.11839857894377263, "grad_norm": 4.457805633544922, "learning_rate": 6.857927706825444e-06, "loss": 0.4741, "step": 1733 }, { "epoch": 0.11846689895470383, "grad_norm": 3.7668747901916504, "learning_rate": 6.857709198560401e-06, "loss": 0.4948, "step": 1734 }, { "epoch": 0.11853521896563503, "grad_norm": 4.813012599945068, "learning_rate": 6.857490525877246e-06, "loss": 0.6094, "step": 1735 }, { "epoch": 0.11860353897656624, "grad_norm": 2.975414991378784, "learning_rate": 6.8572716887866854e-06, "loss": 0.3441, "step": 1736 }, { "epoch": 0.11867185898749744, "grad_norm": 4.949438095092773, "learning_rate": 6.857052687299435e-06, "loss": 0.3883, "step": 1737 }, { "epoch": 0.11874017899842863, "grad_norm": 4.911142826080322, "learning_rate": 6.8568335214262194e-06, "loss": 0.3774, "step": 1738 }, { "epoch": 0.11880849900935984, "grad_norm": 4.846449375152588, "learning_rate": 6.856614191177771e-06, "loss": 0.4818, "step": 1739 }, { "epoch": 0.11887681902029104, "grad_norm": 4.856111526489258, "learning_rate": 6.856394696564829e-06, "loss": 0.5353, "step": 1740 }, { "epoch": 0.11894513903122224, "grad_norm": 5.32947301864624, "learning_rate": 6.856175037598142e-06, "loss": 0.5018, "step": 1741 }, { "epoch": 0.11901345904215345, "grad_norm": 3.431021213531494, "learning_rate": 6.855955214288464e-06, "loss": 0.3395, "step": 1742 }, { "epoch": 0.11908177905308465, "grad_norm": 4.929079532623291, "learning_rate": 6.855735226646563e-06, "loss": 0.5153, "step": 1743 }, { "epoch": 0.11915009906401584, "grad_norm": 5.325693607330322, "learning_rate": 6.855515074683208e-06, "loss": 0.5254, "step": 1744 }, { "epoch": 0.11921841907494705, "grad_norm": 4.387486934661865, "learning_rate": 6.855294758409182e-06, "loss": 0.482, "step": 1745 }, { "epoch": 0.11928673908587825, "grad_norm": 3.517493963241577, "learning_rate": 6.8550742778352715e-06, "loss": 0.3429, "step": 1746 }, { "epoch": 0.11935505909680945, "grad_norm": 5.047107219696045, "learning_rate": 6.8548536329722724e-06, "loss": 0.5578, "step": 1747 }, { "epoch": 0.11942337910774066, "grad_norm": 4.089483737945557, "learning_rate": 6.85463282383099e-06, "loss": 0.4506, "step": 1748 }, { "epoch": 0.11949169911867186, "grad_norm": 5.963806629180908, "learning_rate": 6.854411850422237e-06, "loss": 0.5069, "step": 1749 }, { "epoch": 0.11956001912960305, "grad_norm": 4.114312171936035, "learning_rate": 6.854190712756833e-06, "loss": 0.374, "step": 1750 }, { "epoch": 0.11962833914053426, "grad_norm": 4.0956878662109375, "learning_rate": 6.853969410845607e-06, "loss": 0.4284, "step": 1751 }, { "epoch": 0.11969665915146546, "grad_norm": 3.726505756378174, "learning_rate": 6.8537479446993955e-06, "loss": 0.4024, "step": 1752 }, { "epoch": 0.11976497916239666, "grad_norm": 3.941403388977051, "learning_rate": 6.8535263143290436e-06, "loss": 0.5006, "step": 1753 }, { "epoch": 0.11983329917332787, "grad_norm": 4.421465873718262, "learning_rate": 6.8533045197454035e-06, "loss": 0.4497, "step": 1754 }, { "epoch": 0.11990161918425907, "grad_norm": 3.5456807613372803, "learning_rate": 6.853082560959335e-06, "loss": 0.4086, "step": 1755 }, { "epoch": 0.11996993919519026, "grad_norm": 4.856777667999268, "learning_rate": 6.852860437981708e-06, "loss": 0.5662, "step": 1756 }, { "epoch": 0.12003825920612148, "grad_norm": 4.09605073928833, "learning_rate": 6.8526381508234e-06, "loss": 0.4394, "step": 1757 }, { "epoch": 0.12010657921705267, "grad_norm": 4.590183258056641, "learning_rate": 6.852415699495294e-06, "loss": 0.3871, "step": 1758 }, { "epoch": 0.12017489922798387, "grad_norm": 5.452573299407959, "learning_rate": 6.852193084008285e-06, "loss": 0.4489, "step": 1759 }, { "epoch": 0.12024321923891508, "grad_norm": 4.085848331451416, "learning_rate": 6.851970304373271e-06, "loss": 0.4262, "step": 1760 }, { "epoch": 0.12031153924984628, "grad_norm": 4.421599388122559, "learning_rate": 6.851747360601162e-06, "loss": 0.3249, "step": 1761 }, { "epoch": 0.12037985926077749, "grad_norm": 4.663567543029785, "learning_rate": 6.851524252702877e-06, "loss": 0.375, "step": 1762 }, { "epoch": 0.12044817927170869, "grad_norm": 4.681369304656982, "learning_rate": 6.85130098068934e-06, "loss": 0.5058, "step": 1763 }, { "epoch": 0.12051649928263988, "grad_norm": 4.041294097900391, "learning_rate": 6.8510775445714815e-06, "loss": 0.4985, "step": 1764 }, { "epoch": 0.1205848192935711, "grad_norm": 4.882079601287842, "learning_rate": 6.850853944360245e-06, "loss": 0.4404, "step": 1765 }, { "epoch": 0.12065313930450229, "grad_norm": 4.438896656036377, "learning_rate": 6.850630180066579e-06, "loss": 0.5458, "step": 1766 }, { "epoch": 0.12072145931543349, "grad_norm": 4.123732566833496, "learning_rate": 6.850406251701443e-06, "loss": 0.449, "step": 1767 }, { "epoch": 0.1207897793263647, "grad_norm": 6.411483287811279, "learning_rate": 6.850182159275798e-06, "loss": 0.442, "step": 1768 }, { "epoch": 0.1208580993372959, "grad_norm": 4.707142353057861, "learning_rate": 6.849957902800619e-06, "loss": 0.4988, "step": 1769 }, { "epoch": 0.1209264193482271, "grad_norm": 3.609114646911621, "learning_rate": 6.849733482286888e-06, "loss": 0.3988, "step": 1770 }, { "epoch": 0.1209947393591583, "grad_norm": 4.881253242492676, "learning_rate": 6.849508897745595e-06, "loss": 0.4844, "step": 1771 }, { "epoch": 0.1210630593700895, "grad_norm": 5.258716583251953, "learning_rate": 6.8492841491877345e-06, "loss": 0.493, "step": 1772 }, { "epoch": 0.1211313793810207, "grad_norm": 5.372257709503174, "learning_rate": 6.849059236624313e-06, "loss": 0.4523, "step": 1773 }, { "epoch": 0.12119969939195191, "grad_norm": 4.350595474243164, "learning_rate": 6.848834160066346e-06, "loss": 0.4912, "step": 1774 }, { "epoch": 0.1212680194028831, "grad_norm": 4.721629619598389, "learning_rate": 6.848608919524852e-06, "loss": 0.4155, "step": 1775 }, { "epoch": 0.1213363394138143, "grad_norm": 4.747425079345703, "learning_rate": 6.848383515010862e-06, "loss": 0.4746, "step": 1776 }, { "epoch": 0.12140465942474551, "grad_norm": 4.884566307067871, "learning_rate": 6.8481579465354125e-06, "loss": 0.437, "step": 1777 }, { "epoch": 0.12147297943567671, "grad_norm": 4.817719459533691, "learning_rate": 6.84793221410955e-06, "loss": 0.3988, "step": 1778 }, { "epoch": 0.12154129944660791, "grad_norm": 3.9806270599365234, "learning_rate": 6.847706317744328e-06, "loss": 0.3367, "step": 1779 }, { "epoch": 0.12160961945753912, "grad_norm": 4.527724742889404, "learning_rate": 6.847480257450807e-06, "loss": 0.4111, "step": 1780 }, { "epoch": 0.12167793946847032, "grad_norm": 4.058917045593262, "learning_rate": 6.847254033240057e-06, "loss": 0.4531, "step": 1781 }, { "epoch": 0.12174625947940151, "grad_norm": 5.318237781524658, "learning_rate": 6.847027645123156e-06, "loss": 0.361, "step": 1782 }, { "epoch": 0.12181457949033273, "grad_norm": 4.51426362991333, "learning_rate": 6.846801093111189e-06, "loss": 0.5859, "step": 1783 }, { "epoch": 0.12188289950126392, "grad_norm": 4.635857105255127, "learning_rate": 6.84657437721525e-06, "loss": 0.4703, "step": 1784 }, { "epoch": 0.12195121951219512, "grad_norm": 4.265644550323486, "learning_rate": 6.846347497446442e-06, "loss": 0.4793, "step": 1785 }, { "epoch": 0.12201953952312633, "grad_norm": 5.454245567321777, "learning_rate": 6.846120453815873e-06, "loss": 0.4959, "step": 1786 }, { "epoch": 0.12208785953405753, "grad_norm": 4.079835414886475, "learning_rate": 6.84589324633466e-06, "loss": 0.4134, "step": 1787 }, { "epoch": 0.12215617954498872, "grad_norm": 5.170139789581299, "learning_rate": 6.8456658750139315e-06, "loss": 0.4078, "step": 1788 }, { "epoch": 0.12222449955591994, "grad_norm": 3.8005595207214355, "learning_rate": 6.845438339864819e-06, "loss": 0.3966, "step": 1789 }, { "epoch": 0.12229281956685113, "grad_norm": 5.20958137512207, "learning_rate": 6.8452106408984645e-06, "loss": 0.5305, "step": 1790 }, { "epoch": 0.12236113957778233, "grad_norm": 3.610365867614746, "learning_rate": 6.844982778126019e-06, "loss": 0.3995, "step": 1791 }, { "epoch": 0.12242945958871354, "grad_norm": 5.050682067871094, "learning_rate": 6.8447547515586385e-06, "loss": 0.4873, "step": 1792 }, { "epoch": 0.12249777959964474, "grad_norm": 4.964012145996094, "learning_rate": 6.844526561207491e-06, "loss": 0.5165, "step": 1793 }, { "epoch": 0.12256609961057593, "grad_norm": 4.906362533569336, "learning_rate": 6.8442982070837485e-06, "loss": 0.4637, "step": 1794 }, { "epoch": 0.12263441962150715, "grad_norm": 5.174752712249756, "learning_rate": 6.844069689198594e-06, "loss": 0.5225, "step": 1795 }, { "epoch": 0.12270273963243834, "grad_norm": 4.513548851013184, "learning_rate": 6.843841007563218e-06, "loss": 0.4779, "step": 1796 }, { "epoch": 0.12277105964336954, "grad_norm": 5.221304416656494, "learning_rate": 6.843612162188817e-06, "loss": 0.3769, "step": 1797 }, { "epoch": 0.12283937965430075, "grad_norm": 4.787476539611816, "learning_rate": 6.843383153086598e-06, "loss": 0.596, "step": 1798 }, { "epoch": 0.12290769966523195, "grad_norm": 5.0949811935424805, "learning_rate": 6.843153980267774e-06, "loss": 0.5913, "step": 1799 }, { "epoch": 0.12297601967616315, "grad_norm": 4.581943511962891, "learning_rate": 6.842924643743568e-06, "loss": 0.4432, "step": 1800 }, { "epoch": 0.12304433968709436, "grad_norm": 5.493159294128418, "learning_rate": 6.842695143525209e-06, "loss": 0.5822, "step": 1801 }, { "epoch": 0.12311265969802555, "grad_norm": 5.493580341339111, "learning_rate": 6.8424654796239354e-06, "loss": 0.5291, "step": 1802 }, { "epoch": 0.12318097970895675, "grad_norm": 5.111479759216309, "learning_rate": 6.842235652050994e-06, "loss": 0.3905, "step": 1803 }, { "epoch": 0.12324929971988796, "grad_norm": 4.3129377365112305, "learning_rate": 6.842005660817638e-06, "loss": 0.4359, "step": 1804 }, { "epoch": 0.12331761973081916, "grad_norm": 4.743738174438477, "learning_rate": 6.8417755059351285e-06, "loss": 0.4234, "step": 1805 }, { "epoch": 0.12338593974175036, "grad_norm": 5.351105690002441, "learning_rate": 6.841545187414738e-06, "loss": 0.4511, "step": 1806 }, { "epoch": 0.12345425975268157, "grad_norm": 4.293558597564697, "learning_rate": 6.841314705267743e-06, "loss": 0.3977, "step": 1807 }, { "epoch": 0.12352257976361276, "grad_norm": 4.251938819885254, "learning_rate": 6.841084059505431e-06, "loss": 0.4049, "step": 1808 }, { "epoch": 0.12359089977454396, "grad_norm": 3.1333978176116943, "learning_rate": 6.8408532501390934e-06, "loss": 0.3542, "step": 1809 }, { "epoch": 0.12365921978547517, "grad_norm": 5.543499946594238, "learning_rate": 6.840622277180035e-06, "loss": 0.4948, "step": 1810 }, { "epoch": 0.12372753979640637, "grad_norm": 4.184708595275879, "learning_rate": 6.8403911406395635e-06, "loss": 0.5529, "step": 1811 }, { "epoch": 0.12379585980733757, "grad_norm": 5.013919353485107, "learning_rate": 6.840159840528999e-06, "loss": 0.4975, "step": 1812 }, { "epoch": 0.12386417981826878, "grad_norm": 4.231507301330566, "learning_rate": 6.839928376859668e-06, "loss": 0.3658, "step": 1813 }, { "epoch": 0.12393249982919997, "grad_norm": 4.542490005493164, "learning_rate": 6.839696749642903e-06, "loss": 0.3628, "step": 1814 }, { "epoch": 0.12400081984013117, "grad_norm": 5.631608486175537, "learning_rate": 6.839464958890047e-06, "loss": 0.4001, "step": 1815 }, { "epoch": 0.12406913985106238, "grad_norm": 4.5371479988098145, "learning_rate": 6.83923300461245e-06, "loss": 0.5503, "step": 1816 }, { "epoch": 0.12413745986199358, "grad_norm": 4.619955539703369, "learning_rate": 6.8390008868214705e-06, "loss": 0.5029, "step": 1817 }, { "epoch": 0.12420577987292478, "grad_norm": 3.502044439315796, "learning_rate": 6.838768605528474e-06, "loss": 0.4191, "step": 1818 }, { "epoch": 0.12427409988385599, "grad_norm": 4.89454460144043, "learning_rate": 6.8385361607448364e-06, "loss": 0.4175, "step": 1819 }, { "epoch": 0.12434241989478718, "grad_norm": 5.52889347076416, "learning_rate": 6.838303552481938e-06, "loss": 0.3613, "step": 1820 }, { "epoch": 0.12441073990571838, "grad_norm": 4.172506809234619, "learning_rate": 6.838070780751168e-06, "loss": 0.434, "step": 1821 }, { "epoch": 0.12447905991664959, "grad_norm": 5.474318504333496, "learning_rate": 6.837837845563928e-06, "loss": 0.4588, "step": 1822 }, { "epoch": 0.12454737992758079, "grad_norm": 4.612488269805908, "learning_rate": 6.837604746931622e-06, "loss": 0.458, "step": 1823 }, { "epoch": 0.12461569993851199, "grad_norm": 3.808480978012085, "learning_rate": 6.837371484865665e-06, "loss": 0.4192, "step": 1824 }, { "epoch": 0.1246840199494432, "grad_norm": 3.6560871601104736, "learning_rate": 6.83713805937748e-06, "loss": 0.3952, "step": 1825 }, { "epoch": 0.1247523399603744, "grad_norm": 4.498500347137451, "learning_rate": 6.836904470478495e-06, "loss": 0.4241, "step": 1826 }, { "epoch": 0.12482065997130559, "grad_norm": 4.6794962882995605, "learning_rate": 6.8366707181801495e-06, "loss": 0.4226, "step": 1827 }, { "epoch": 0.1248889799822368, "grad_norm": 5.242727279663086, "learning_rate": 6.83643680249389e-06, "loss": 0.4086, "step": 1828 }, { "epoch": 0.124957299993168, "grad_norm": 5.380890846252441, "learning_rate": 6.83620272343117e-06, "loss": 0.3905, "step": 1829 }, { "epoch": 0.1250256200040992, "grad_norm": 3.5526440143585205, "learning_rate": 6.8359684810034514e-06, "loss": 0.3859, "step": 1830 }, { "epoch": 0.1250939400150304, "grad_norm": 4.3533935546875, "learning_rate": 6.835734075222206e-06, "loss": 0.4191, "step": 1831 }, { "epoch": 0.1251622600259616, "grad_norm": 4.871601581573486, "learning_rate": 6.835499506098912e-06, "loss": 0.4164, "step": 1832 }, { "epoch": 0.1252305800368928, "grad_norm": 5.096592426300049, "learning_rate": 6.835264773645053e-06, "loss": 0.4816, "step": 1833 }, { "epoch": 0.125298900047824, "grad_norm": 4.250738620758057, "learning_rate": 6.835029877872126e-06, "loss": 0.4154, "step": 1834 }, { "epoch": 0.1253672200587552, "grad_norm": 4.516358852386475, "learning_rate": 6.8347948187916326e-06, "loss": 0.4458, "step": 1835 }, { "epoch": 0.1254355400696864, "grad_norm": 5.217809677124023, "learning_rate": 6.834559596415082e-06, "loss": 0.3985, "step": 1836 }, { "epoch": 0.12550386008061762, "grad_norm": 5.868136882781982, "learning_rate": 6.8343242107539935e-06, "loss": 0.4531, "step": 1837 }, { "epoch": 0.1255721800915488, "grad_norm": 5.0810651779174805, "learning_rate": 6.834088661819893e-06, "loss": 0.4439, "step": 1838 }, { "epoch": 0.12564050010248, "grad_norm": 5.095423698425293, "learning_rate": 6.8338529496243135e-06, "loss": 0.4579, "step": 1839 }, { "epoch": 0.12570882011341122, "grad_norm": 4.752091407775879, "learning_rate": 6.833617074178799e-06, "loss": 0.4235, "step": 1840 }, { "epoch": 0.1257771401243424, "grad_norm": 5.9014997482299805, "learning_rate": 6.833381035494899e-06, "loss": 0.5081, "step": 1841 }, { "epoch": 0.12584546013527362, "grad_norm": 6.159440517425537, "learning_rate": 6.8331448335841725e-06, "loss": 0.3822, "step": 1842 }, { "epoch": 0.12591378014620483, "grad_norm": 4.898373603820801, "learning_rate": 6.832908468458185e-06, "loss": 0.4486, "step": 1843 }, { "epoch": 0.125982100157136, "grad_norm": 4.626499176025391, "learning_rate": 6.8326719401285086e-06, "loss": 0.4764, "step": 1844 }, { "epoch": 0.12605042016806722, "grad_norm": 3.3825724124908447, "learning_rate": 6.832435248606728e-06, "loss": 0.4297, "step": 1845 }, { "epoch": 0.12611874017899843, "grad_norm": 4.521305084228516, "learning_rate": 6.832198393904434e-06, "loss": 0.4083, "step": 1846 }, { "epoch": 0.12618706018992962, "grad_norm": 4.6439948081970215, "learning_rate": 6.831961376033223e-06, "loss": 0.4128, "step": 1847 }, { "epoch": 0.12625538020086083, "grad_norm": 5.703760623931885, "learning_rate": 6.8317241950047e-06, "loss": 0.5381, "step": 1848 }, { "epoch": 0.12632370021179204, "grad_norm": 4.140481472015381, "learning_rate": 6.831486850830483e-06, "loss": 0.4467, "step": 1849 }, { "epoch": 0.12639202022272322, "grad_norm": 5.5061540603637695, "learning_rate": 6.831249343522191e-06, "loss": 0.3793, "step": 1850 }, { "epoch": 0.12646034023365443, "grad_norm": 6.751096725463867, "learning_rate": 6.831011673091455e-06, "loss": 0.3273, "step": 1851 }, { "epoch": 0.12652866024458564, "grad_norm": 3.987990617752075, "learning_rate": 6.830773839549914e-06, "loss": 0.4144, "step": 1852 }, { "epoch": 0.12659698025551683, "grad_norm": 4.117154121398926, "learning_rate": 6.830535842909211e-06, "loss": 0.4596, "step": 1853 }, { "epoch": 0.12666530026644804, "grad_norm": 4.4376220703125, "learning_rate": 6.830297683181003e-06, "loss": 0.3975, "step": 1854 }, { "epoch": 0.12673362027737925, "grad_norm": 4.850616455078125, "learning_rate": 6.830059360376952e-06, "loss": 0.3608, "step": 1855 }, { "epoch": 0.12680194028831043, "grad_norm": 3.6652982234954834, "learning_rate": 6.8298208745087275e-06, "loss": 0.4649, "step": 1856 }, { "epoch": 0.12687026029924164, "grad_norm": 4.701138973236084, "learning_rate": 6.829582225588006e-06, "loss": 0.4002, "step": 1857 }, { "epoch": 0.12693858031017285, "grad_norm": 4.747945308685303, "learning_rate": 6.8293434136264755e-06, "loss": 0.4367, "step": 1858 }, { "epoch": 0.12700690032110404, "grad_norm": 4.709779739379883, "learning_rate": 6.82910443863583e-06, "loss": 0.4091, "step": 1859 }, { "epoch": 0.12707522033203525, "grad_norm": 5.03069543838501, "learning_rate": 6.82886530062777e-06, "loss": 0.4759, "step": 1860 }, { "epoch": 0.12714354034296646, "grad_norm": 4.909942626953125, "learning_rate": 6.828625999614007e-06, "loss": 0.446, "step": 1861 }, { "epoch": 0.12721186035389764, "grad_norm": 4.733447074890137, "learning_rate": 6.828386535606257e-06, "loss": 0.4807, "step": 1862 }, { "epoch": 0.12728018036482885, "grad_norm": 4.3184590339660645, "learning_rate": 6.828146908616248e-06, "loss": 0.4356, "step": 1863 }, { "epoch": 0.12734850037576007, "grad_norm": 4.570539951324463, "learning_rate": 6.827907118655713e-06, "loss": 0.4483, "step": 1864 }, { "epoch": 0.12741682038669125, "grad_norm": 4.581055164337158, "learning_rate": 6.827667165736393e-06, "loss": 0.3746, "step": 1865 }, { "epoch": 0.12748514039762246, "grad_norm": 5.102787017822266, "learning_rate": 6.8274270498700394e-06, "loss": 0.4393, "step": 1866 }, { "epoch": 0.12755346040855367, "grad_norm": 4.563209533691406, "learning_rate": 6.82718677106841e-06, "loss": 0.4163, "step": 1867 }, { "epoch": 0.12762178041948485, "grad_norm": 4.994136810302734, "learning_rate": 6.826946329343268e-06, "loss": 0.4781, "step": 1868 }, { "epoch": 0.12769010043041606, "grad_norm": 4.745979309082031, "learning_rate": 6.826705724706391e-06, "loss": 0.4722, "step": 1869 }, { "epoch": 0.12775842044134728, "grad_norm": 5.022345542907715, "learning_rate": 6.826464957169558e-06, "loss": 0.4798, "step": 1870 }, { "epoch": 0.12782674045227846, "grad_norm": 6.087778568267822, "learning_rate": 6.826224026744561e-06, "loss": 0.6391, "step": 1871 }, { "epoch": 0.12789506046320967, "grad_norm": 5.147538185119629, "learning_rate": 6.825982933443194e-06, "loss": 0.4247, "step": 1872 }, { "epoch": 0.12796338047414088, "grad_norm": 3.1998162269592285, "learning_rate": 6.825741677277267e-06, "loss": 0.3442, "step": 1873 }, { "epoch": 0.12803170048507206, "grad_norm": 3.711519241333008, "learning_rate": 6.8255002582585915e-06, "loss": 0.4306, "step": 1874 }, { "epoch": 0.12810002049600328, "grad_norm": 5.856657028198242, "learning_rate": 6.825258676398989e-06, "loss": 0.5067, "step": 1875 }, { "epoch": 0.12816834050693449, "grad_norm": 4.406485080718994, "learning_rate": 6.82501693171029e-06, "loss": 0.5106, "step": 1876 }, { "epoch": 0.12823666051786567, "grad_norm": 5.639950275421143, "learning_rate": 6.824775024204333e-06, "loss": 0.4861, "step": 1877 }, { "epoch": 0.12830498052879688, "grad_norm": 4.158560752868652, "learning_rate": 6.82453295389296e-06, "loss": 0.3702, "step": 1878 }, { "epoch": 0.1283733005397281, "grad_norm": 7.563809394836426, "learning_rate": 6.824290720788027e-06, "loss": 0.5257, "step": 1879 }, { "epoch": 0.12844162055065927, "grad_norm": 4.0283918380737305, "learning_rate": 6.824048324901395e-06, "loss": 0.4942, "step": 1880 }, { "epoch": 0.12850994056159049, "grad_norm": 4.552830219268799, "learning_rate": 6.823805766244935e-06, "loss": 0.4365, "step": 1881 }, { "epoch": 0.1285782605725217, "grad_norm": 4.754653453826904, "learning_rate": 6.823563044830523e-06, "loss": 0.4169, "step": 1882 }, { "epoch": 0.12864658058345288, "grad_norm": 6.1363935470581055, "learning_rate": 6.823320160670044e-06, "loss": 0.5107, "step": 1883 }, { "epoch": 0.1287149005943841, "grad_norm": 2.824596405029297, "learning_rate": 6.823077113775393e-06, "loss": 0.4108, "step": 1884 }, { "epoch": 0.1287832206053153, "grad_norm": 3.588808298110962, "learning_rate": 6.82283390415847e-06, "loss": 0.4794, "step": 1885 }, { "epoch": 0.12885154061624648, "grad_norm": 4.989952087402344, "learning_rate": 6.822590531831184e-06, "loss": 0.5205, "step": 1886 }, { "epoch": 0.1289198606271777, "grad_norm": 4.702836990356445, "learning_rate": 6.822346996805454e-06, "loss": 0.4895, "step": 1887 }, { "epoch": 0.1289881806381089, "grad_norm": 4.934508323669434, "learning_rate": 6.822103299093204e-06, "loss": 0.5392, "step": 1888 }, { "epoch": 0.1290565006490401, "grad_norm": 4.890828609466553, "learning_rate": 6.821859438706367e-06, "loss": 0.5137, "step": 1889 }, { "epoch": 0.1291248206599713, "grad_norm": 5.8660359382629395, "learning_rate": 6.821615415656885e-06, "loss": 0.3323, "step": 1890 }, { "epoch": 0.1291931406709025, "grad_norm": 4.433281421661377, "learning_rate": 6.821371229956707e-06, "loss": 0.4653, "step": 1891 }, { "epoch": 0.1292614606818337, "grad_norm": 3.574950933456421, "learning_rate": 6.8211268816177905e-06, "loss": 0.4344, "step": 1892 }, { "epoch": 0.1293297806927649, "grad_norm": 4.818638801574707, "learning_rate": 6.820882370652099e-06, "loss": 0.5493, "step": 1893 }, { "epoch": 0.12939810070369612, "grad_norm": 4.192071437835693, "learning_rate": 6.8206376970716076e-06, "loss": 0.403, "step": 1894 }, { "epoch": 0.1294664207146273, "grad_norm": 3.6111385822296143, "learning_rate": 6.820392860888296e-06, "loss": 0.3082, "step": 1895 }, { "epoch": 0.1295347407255585, "grad_norm": 6.036190509796143, "learning_rate": 6.820147862114153e-06, "loss": 0.5421, "step": 1896 }, { "epoch": 0.12960306073648972, "grad_norm": 4.841505527496338, "learning_rate": 6.819902700761177e-06, "loss": 0.4605, "step": 1897 }, { "epoch": 0.12967138074742093, "grad_norm": 4.942228317260742, "learning_rate": 6.819657376841371e-06, "loss": 0.4028, "step": 1898 }, { "epoch": 0.12973970075835212, "grad_norm": 4.8213725090026855, "learning_rate": 6.819411890366749e-06, "loss": 0.4739, "step": 1899 }, { "epoch": 0.12980802076928333, "grad_norm": 4.2729692459106445, "learning_rate": 6.819166241349332e-06, "loss": 0.4147, "step": 1900 }, { "epoch": 0.12987634078021454, "grad_norm": 5.7713446617126465, "learning_rate": 6.818920429801149e-06, "loss": 0.4684, "step": 1901 }, { "epoch": 0.12994466079114572, "grad_norm": 4.474407196044922, "learning_rate": 6.818674455734236e-06, "loss": 0.3792, "step": 1902 }, { "epoch": 0.13001298080207693, "grad_norm": 5.278648376464844, "learning_rate": 6.818428319160637e-06, "loss": 0.4471, "step": 1903 }, { "epoch": 0.13008130081300814, "grad_norm": 4.361280918121338, "learning_rate": 6.818182020092407e-06, "loss": 0.4464, "step": 1904 }, { "epoch": 0.13014962082393933, "grad_norm": 3.7285115718841553, "learning_rate": 6.817935558541604e-06, "loss": 0.3871, "step": 1905 }, { "epoch": 0.13021794083487054, "grad_norm": 4.938891410827637, "learning_rate": 6.817688934520298e-06, "loss": 0.4643, "step": 1906 }, { "epoch": 0.13028626084580175, "grad_norm": 4.2727274894714355, "learning_rate": 6.8174421480405646e-06, "loss": 0.4664, "step": 1907 }, { "epoch": 0.13035458085673293, "grad_norm": 5.087023735046387, "learning_rate": 6.81719519911449e-06, "loss": 0.4277, "step": 1908 }, { "epoch": 0.13042290086766414, "grad_norm": 5.884597301483154, "learning_rate": 6.816948087754165e-06, "loss": 0.4199, "step": 1909 }, { "epoch": 0.13049122087859535, "grad_norm": 3.834164619445801, "learning_rate": 6.8167008139716906e-06, "loss": 0.3245, "step": 1910 }, { "epoch": 0.13055954088952654, "grad_norm": 3.5468366146087646, "learning_rate": 6.816453377779175e-06, "loss": 0.5143, "step": 1911 }, { "epoch": 0.13062786090045775, "grad_norm": 3.8972036838531494, "learning_rate": 6.816205779188734e-06, "loss": 0.413, "step": 1912 }, { "epoch": 0.13069618091138896, "grad_norm": 5.300376892089844, "learning_rate": 6.815958018212494e-06, "loss": 0.4597, "step": 1913 }, { "epoch": 0.13076450092232014, "grad_norm": 4.620910167694092, "learning_rate": 6.815710094862585e-06, "loss": 0.4997, "step": 1914 }, { "epoch": 0.13083282093325135, "grad_norm": 4.365530967712402, "learning_rate": 6.815462009151147e-06, "loss": 0.5142, "step": 1915 }, { "epoch": 0.13090114094418256, "grad_norm": 4.491677761077881, "learning_rate": 6.815213761090329e-06, "loss": 0.4194, "step": 1916 }, { "epoch": 0.13096946095511375, "grad_norm": 4.024829387664795, "learning_rate": 6.814965350692288e-06, "loss": 0.3727, "step": 1917 }, { "epoch": 0.13103778096604496, "grad_norm": 4.81095552444458, "learning_rate": 6.814716777969186e-06, "loss": 0.4539, "step": 1918 }, { "epoch": 0.13110610097697617, "grad_norm": 4.417872905731201, "learning_rate": 6.814468042933195e-06, "loss": 0.3951, "step": 1919 }, { "epoch": 0.13117442098790735, "grad_norm": 3.8975508213043213, "learning_rate": 6.814219145596498e-06, "loss": 0.3757, "step": 1920 }, { "epoch": 0.13124274099883856, "grad_norm": 5.061924457550049, "learning_rate": 6.813970085971279e-06, "loss": 0.4493, "step": 1921 }, { "epoch": 0.13131106100976978, "grad_norm": 3.9213101863861084, "learning_rate": 6.813720864069735e-06, "loss": 0.3454, "step": 1922 }, { "epoch": 0.13137938102070096, "grad_norm": 5.5182881355285645, "learning_rate": 6.813471479904072e-06, "loss": 0.3732, "step": 1923 }, { "epoch": 0.13144770103163217, "grad_norm": 5.822735786437988, "learning_rate": 6.8132219334864986e-06, "loss": 0.5502, "step": 1924 }, { "epoch": 0.13151602104256338, "grad_norm": 5.100861072540283, "learning_rate": 6.812972224829236e-06, "loss": 0.3453, "step": 1925 }, { "epoch": 0.13158434105349456, "grad_norm": 4.218437194824219, "learning_rate": 6.8127223539445104e-06, "loss": 0.4416, "step": 1926 }, { "epoch": 0.13165266106442577, "grad_norm": 4.906314373016357, "learning_rate": 6.8124723208445596e-06, "loss": 0.558, "step": 1927 }, { "epoch": 0.13172098107535699, "grad_norm": 5.550416946411133, "learning_rate": 6.812222125541625e-06, "loss": 0.5603, "step": 1928 }, { "epoch": 0.13178930108628817, "grad_norm": 4.2667341232299805, "learning_rate": 6.811971768047958e-06, "loss": 0.4825, "step": 1929 }, { "epoch": 0.13185762109721938, "grad_norm": 5.023087024688721, "learning_rate": 6.81172124837582e-06, "loss": 0.2969, "step": 1930 }, { "epoch": 0.1319259411081506, "grad_norm": 4.158977508544922, "learning_rate": 6.811470566537477e-06, "loss": 0.4273, "step": 1931 }, { "epoch": 0.13199426111908177, "grad_norm": 4.477953910827637, "learning_rate": 6.811219722545205e-06, "loss": 0.4294, "step": 1932 }, { "epoch": 0.13206258113001298, "grad_norm": 4.153700351715088, "learning_rate": 6.810968716411285e-06, "loss": 0.3874, "step": 1933 }, { "epoch": 0.1321309011409442, "grad_norm": 4.557558059692383, "learning_rate": 6.81071754814801e-06, "loss": 0.4264, "step": 1934 }, { "epoch": 0.13219922115187538, "grad_norm": 4.718655586242676, "learning_rate": 6.810466217767678e-06, "loss": 0.4733, "step": 1935 }, { "epoch": 0.1322675411628066, "grad_norm": 4.1297712326049805, "learning_rate": 6.810214725282597e-06, "loss": 0.3436, "step": 1936 }, { "epoch": 0.1323358611737378, "grad_norm": 5.14635705947876, "learning_rate": 6.809963070705081e-06, "loss": 0.4435, "step": 1937 }, { "epoch": 0.13240418118466898, "grad_norm": 7.2854814529418945, "learning_rate": 6.809711254047454e-06, "loss": 0.4797, "step": 1938 }, { "epoch": 0.1324725011956002, "grad_norm": 4.966057777404785, "learning_rate": 6.809459275322046e-06, "loss": 0.4354, "step": 1939 }, { "epoch": 0.1325408212065314, "grad_norm": 3.8156843185424805, "learning_rate": 6.809207134541195e-06, "loss": 0.5115, "step": 1940 }, { "epoch": 0.1326091412174626, "grad_norm": 4.081060886383057, "learning_rate": 6.808954831717249e-06, "loss": 0.4165, "step": 1941 }, { "epoch": 0.1326774612283938, "grad_norm": 4.546128273010254, "learning_rate": 6.8087023668625625e-06, "loss": 0.4052, "step": 1942 }, { "epoch": 0.132745781239325, "grad_norm": 5.297682762145996, "learning_rate": 6.808449739989497e-06, "loss": 0.5221, "step": 1943 }, { "epoch": 0.1328141012502562, "grad_norm": 4.448784351348877, "learning_rate": 6.808196951110424e-06, "loss": 0.4664, "step": 1944 }, { "epoch": 0.1328824212611874, "grad_norm": 4.5303850173950195, "learning_rate": 6.8079440002377215e-06, "loss": 0.4408, "step": 1945 }, { "epoch": 0.13295074127211862, "grad_norm": 7.515509128570557, "learning_rate": 6.807690887383776e-06, "loss": 0.6081, "step": 1946 }, { "epoch": 0.1330190612830498, "grad_norm": 4.981361389160156, "learning_rate": 6.807437612560981e-06, "loss": 0.4362, "step": 1947 }, { "epoch": 0.133087381293981, "grad_norm": 5.315288543701172, "learning_rate": 6.80718417578174e-06, "loss": 0.4524, "step": 1948 }, { "epoch": 0.13315570130491222, "grad_norm": 4.539191246032715, "learning_rate": 6.806930577058462e-06, "loss": 0.4427, "step": 1949 }, { "epoch": 0.1332240213158434, "grad_norm": 5.017223834991455, "learning_rate": 6.806676816403566e-06, "loss": 0.3384, "step": 1950 }, { "epoch": 0.13329234132677462, "grad_norm": 4.715338706970215, "learning_rate": 6.806422893829476e-06, "loss": 0.3433, "step": 1951 }, { "epoch": 0.13336066133770583, "grad_norm": 3.573136568069458, "learning_rate": 6.806168809348628e-06, "loss": 0.3932, "step": 1952 }, { "epoch": 0.133428981348637, "grad_norm": 4.624008655548096, "learning_rate": 6.805914562973464e-06, "loss": 0.5684, "step": 1953 }, { "epoch": 0.13349730135956822, "grad_norm": 3.9227774143218994, "learning_rate": 6.805660154716432e-06, "loss": 0.4032, "step": 1954 }, { "epoch": 0.13356562137049943, "grad_norm": 4.3774542808532715, "learning_rate": 6.8054055845899915e-06, "loss": 0.4472, "step": 1955 }, { "epoch": 0.13363394138143062, "grad_norm": 5.5424323081970215, "learning_rate": 6.805150852606607e-06, "loss": 0.4535, "step": 1956 }, { "epoch": 0.13370226139236183, "grad_norm": 5.360537052154541, "learning_rate": 6.804895958778752e-06, "loss": 0.4789, "step": 1957 }, { "epoch": 0.13377058140329304, "grad_norm": 4.5036702156066895, "learning_rate": 6.804640903118908e-06, "loss": 0.4092, "step": 1958 }, { "epoch": 0.13383890141422422, "grad_norm": 4.4238505363464355, "learning_rate": 6.804385685639566e-06, "loss": 0.4295, "step": 1959 }, { "epoch": 0.13390722142515543, "grad_norm": 4.455329895019531, "learning_rate": 6.8041303063532215e-06, "loss": 0.4379, "step": 1960 }, { "epoch": 0.13397554143608664, "grad_norm": 4.979348659515381, "learning_rate": 6.8038747652723796e-06, "loss": 0.4119, "step": 1961 }, { "epoch": 0.13404386144701783, "grad_norm": 3.4345357418060303, "learning_rate": 6.803619062409555e-06, "loss": 0.4287, "step": 1962 }, { "epoch": 0.13411218145794904, "grad_norm": 5.5179057121276855, "learning_rate": 6.803363197777267e-06, "loss": 0.4505, "step": 1963 }, { "epoch": 0.13418050146888025, "grad_norm": 4.538340091705322, "learning_rate": 6.803107171388047e-06, "loss": 0.3906, "step": 1964 }, { "epoch": 0.13424882147981143, "grad_norm": 3.5808048248291016, "learning_rate": 6.8028509832544295e-06, "loss": 0.4107, "step": 1965 }, { "epoch": 0.13431714149074264, "grad_norm": 5.084666728973389, "learning_rate": 6.802594633388961e-06, "loss": 0.4422, "step": 1966 }, { "epoch": 0.13438546150167385, "grad_norm": 4.769865036010742, "learning_rate": 6.802338121804194e-06, "loss": 0.465, "step": 1967 }, { "epoch": 0.13445378151260504, "grad_norm": 4.462982654571533, "learning_rate": 6.802081448512688e-06, "loss": 0.4516, "step": 1968 }, { "epoch": 0.13452210152353625, "grad_norm": 4.6410441398620605, "learning_rate": 6.801824613527012e-06, "loss": 0.4972, "step": 1969 }, { "epoch": 0.13459042153446746, "grad_norm": 4.601633071899414, "learning_rate": 6.801567616859744e-06, "loss": 0.4139, "step": 1970 }, { "epoch": 0.13465874154539864, "grad_norm": 5.93872594833374, "learning_rate": 6.8013104585234655e-06, "loss": 0.3915, "step": 1971 }, { "epoch": 0.13472706155632985, "grad_norm": 5.076761245727539, "learning_rate": 6.801053138530773e-06, "loss": 0.4498, "step": 1972 }, { "epoch": 0.13479538156726106, "grad_norm": 3.841987133026123, "learning_rate": 6.800795656894263e-06, "loss": 0.3554, "step": 1973 }, { "epoch": 0.13486370157819225, "grad_norm": 6.525400638580322, "learning_rate": 6.800538013626547e-06, "loss": 0.539, "step": 1974 }, { "epoch": 0.13493202158912346, "grad_norm": 3.915055274963379, "learning_rate": 6.8002802087402375e-06, "loss": 0.4742, "step": 1975 }, { "epoch": 0.13500034160005467, "grad_norm": 5.630869388580322, "learning_rate": 6.800022242247961e-06, "loss": 0.5357, "step": 1976 }, { "epoch": 0.13506866161098585, "grad_norm": 3.897430896759033, "learning_rate": 6.799764114162349e-06, "loss": 0.3107, "step": 1977 }, { "epoch": 0.13513698162191706, "grad_norm": 4.294949531555176, "learning_rate": 6.799505824496041e-06, "loss": 0.4392, "step": 1978 }, { "epoch": 0.13520530163284827, "grad_norm": 4.766075134277344, "learning_rate": 6.799247373261686e-06, "loss": 0.3942, "step": 1979 }, { "epoch": 0.13527362164377946, "grad_norm": 4.8877763748168945, "learning_rate": 6.7989887604719375e-06, "loss": 0.4304, "step": 1980 }, { "epoch": 0.13534194165471067, "grad_norm": 4.448221206665039, "learning_rate": 6.79872998613946e-06, "loss": 0.3881, "step": 1981 }, { "epoch": 0.13541026166564188, "grad_norm": 4.417725563049316, "learning_rate": 6.798471050276925e-06, "loss": 0.4861, "step": 1982 }, { "epoch": 0.13547858167657306, "grad_norm": 5.380626678466797, "learning_rate": 6.798211952897012e-06, "loss": 0.5157, "step": 1983 }, { "epoch": 0.13554690168750427, "grad_norm": 4.236386299133301, "learning_rate": 6.797952694012409e-06, "loss": 0.483, "step": 1984 }, { "epoch": 0.13561522169843548, "grad_norm": 3.459226369857788, "learning_rate": 6.797693273635811e-06, "loss": 0.3953, "step": 1985 }, { "epoch": 0.13568354170936667, "grad_norm": 5.2188239097595215, "learning_rate": 6.79743369177992e-06, "loss": 0.4083, "step": 1986 }, { "epoch": 0.13575186172029788, "grad_norm": 4.977123260498047, "learning_rate": 6.797173948457448e-06, "loss": 0.3902, "step": 1987 }, { "epoch": 0.1358201817312291, "grad_norm": 4.792862892150879, "learning_rate": 6.796914043681112e-06, "loss": 0.4066, "step": 1988 }, { "epoch": 0.13588850174216027, "grad_norm": 3.985074520111084, "learning_rate": 6.796653977463643e-06, "loss": 0.3747, "step": 1989 }, { "epoch": 0.13595682175309148, "grad_norm": 3.59224534034729, "learning_rate": 6.796393749817772e-06, "loss": 0.3693, "step": 1990 }, { "epoch": 0.1360251417640227, "grad_norm": 3.589686393737793, "learning_rate": 6.7961333607562445e-06, "loss": 0.3527, "step": 1991 }, { "epoch": 0.13609346177495388, "grad_norm": 4.531583309173584, "learning_rate": 6.795872810291807e-06, "loss": 0.4502, "step": 1992 }, { "epoch": 0.1361617817858851, "grad_norm": 4.6601457595825195, "learning_rate": 6.7956120984372226e-06, "loss": 0.382, "step": 1993 }, { "epoch": 0.1362301017968163, "grad_norm": 5.468544960021973, "learning_rate": 6.7953512252052545e-06, "loss": 0.5044, "step": 1994 }, { "epoch": 0.13629842180774748, "grad_norm": 4.5765461921691895, "learning_rate": 6.795090190608678e-06, "loss": 0.42, "step": 1995 }, { "epoch": 0.1363667418186787, "grad_norm": 4.707749366760254, "learning_rate": 6.794828994660276e-06, "loss": 0.481, "step": 1996 }, { "epoch": 0.1364350618296099, "grad_norm": 4.409252643585205, "learning_rate": 6.794567637372837e-06, "loss": 0.415, "step": 1997 }, { "epoch": 0.1365033818405411, "grad_norm": 5.316361427307129, "learning_rate": 6.794306118759161e-06, "loss": 0.5164, "step": 1998 }, { "epoch": 0.1365717018514723, "grad_norm": 4.252584934234619, "learning_rate": 6.794044438832052e-06, "loss": 0.4397, "step": 1999 }, { "epoch": 0.1366400218624035, "grad_norm": 5.33632230758667, "learning_rate": 6.793782597604324e-06, "loss": 0.5043, "step": 2000 }, { "epoch": 0.1367083418733347, "grad_norm": 3.802696466445923, "learning_rate": 6.793520595088798e-06, "loss": 0.4982, "step": 2001 }, { "epoch": 0.1367766618842659, "grad_norm": 3.8360133171081543, "learning_rate": 6.7932584312983076e-06, "loss": 0.4547, "step": 2002 }, { "epoch": 0.13684498189519712, "grad_norm": 3.805124521255493, "learning_rate": 6.792996106245686e-06, "loss": 0.4251, "step": 2003 }, { "epoch": 0.1369133019061283, "grad_norm": 5.113128662109375, "learning_rate": 6.79273361994378e-06, "loss": 0.4119, "step": 2004 }, { "epoch": 0.1369816219170595, "grad_norm": 5.219719409942627, "learning_rate": 6.792470972405442e-06, "loss": 0.4331, "step": 2005 }, { "epoch": 0.13704994192799072, "grad_norm": 4.393361568450928, "learning_rate": 6.792208163643535e-06, "loss": 0.4691, "step": 2006 }, { "epoch": 0.1371182619389219, "grad_norm": 3.8483266830444336, "learning_rate": 6.791945193670927e-06, "loss": 0.384, "step": 2007 }, { "epoch": 0.13718658194985311, "grad_norm": 4.514275074005127, "learning_rate": 6.791682062500495e-06, "loss": 0.4998, "step": 2008 }, { "epoch": 0.13725490196078433, "grad_norm": 6.062517166137695, "learning_rate": 6.791418770145123e-06, "loss": 0.5306, "step": 2009 }, { "epoch": 0.1373232219717155, "grad_norm": 5.488317966461182, "learning_rate": 6.791155316617706e-06, "loss": 0.4643, "step": 2010 }, { "epoch": 0.13739154198264672, "grad_norm": 4.729602813720703, "learning_rate": 6.7908917019311416e-06, "loss": 0.5731, "step": 2011 }, { "epoch": 0.13745986199357793, "grad_norm": 3.925447702407837, "learning_rate": 6.790627926098339e-06, "loss": 0.4895, "step": 2012 }, { "epoch": 0.13752818200450911, "grad_norm": 4.205440998077393, "learning_rate": 6.7903639891322176e-06, "loss": 0.5578, "step": 2013 }, { "epoch": 0.13759650201544033, "grad_norm": 4.64024019241333, "learning_rate": 6.790099891045699e-06, "loss": 0.4679, "step": 2014 }, { "epoch": 0.13766482202637154, "grad_norm": 4.213573932647705, "learning_rate": 6.789835631851715e-06, "loss": 0.4227, "step": 2015 }, { "epoch": 0.13773314203730272, "grad_norm": 5.258309841156006, "learning_rate": 6.789571211563208e-06, "loss": 0.4477, "step": 2016 }, { "epoch": 0.13780146204823393, "grad_norm": 3.9132657051086426, "learning_rate": 6.789306630193124e-06, "loss": 0.4258, "step": 2017 }, { "epoch": 0.13786978205916514, "grad_norm": 3.2070372104644775, "learning_rate": 6.789041887754419e-06, "loss": 0.4043, "step": 2018 }, { "epoch": 0.13793810207009632, "grad_norm": 4.057154655456543, "learning_rate": 6.7887769842600565e-06, "loss": 0.343, "step": 2019 }, { "epoch": 0.13800642208102754, "grad_norm": 3.117525577545166, "learning_rate": 6.78851191972301e-06, "loss": 0.4306, "step": 2020 }, { "epoch": 0.13807474209195875, "grad_norm": 3.9113614559173584, "learning_rate": 6.788246694156257e-06, "loss": 0.3197, "step": 2021 }, { "epoch": 0.13814306210288993, "grad_norm": 4.671478748321533, "learning_rate": 6.787981307572787e-06, "loss": 0.4415, "step": 2022 }, { "epoch": 0.13821138211382114, "grad_norm": 4.182068347930908, "learning_rate": 6.787715759985592e-06, "loss": 0.366, "step": 2023 }, { "epoch": 0.13827970212475235, "grad_norm": 4.723968029022217, "learning_rate": 6.787450051407678e-06, "loss": 0.3237, "step": 2024 }, { "epoch": 0.13834802213568353, "grad_norm": 4.278010368347168, "learning_rate": 6.787184181852055e-06, "loss": 0.4812, "step": 2025 }, { "epoch": 0.13841634214661475, "grad_norm": 4.754894733428955, "learning_rate": 6.786918151331743e-06, "loss": 0.4466, "step": 2026 }, { "epoch": 0.13848466215754596, "grad_norm": 4.566272258758545, "learning_rate": 6.786651959859766e-06, "loss": 0.4493, "step": 2027 }, { "epoch": 0.13855298216847714, "grad_norm": 4.852118492126465, "learning_rate": 6.786385607449162e-06, "loss": 0.4393, "step": 2028 }, { "epoch": 0.13862130217940835, "grad_norm": 4.163819789886475, "learning_rate": 6.7861190941129715e-06, "loss": 0.4149, "step": 2029 }, { "epoch": 0.13868962219033956, "grad_norm": 3.7842764854431152, "learning_rate": 6.785852419864246e-06, "loss": 0.4463, "step": 2030 }, { "epoch": 0.13875794220127075, "grad_norm": 4.452808380126953, "learning_rate": 6.785585584716044e-06, "loss": 0.3698, "step": 2031 }, { "epoch": 0.13882626221220196, "grad_norm": 4.82826566696167, "learning_rate": 6.7853185886814305e-06, "loss": 0.477, "step": 2032 }, { "epoch": 0.13889458222313317, "grad_norm": 3.3683433532714844, "learning_rate": 6.785051431773479e-06, "loss": 0.3057, "step": 2033 }, { "epoch": 0.13896290223406435, "grad_norm": 5.297665119171143, "learning_rate": 6.784784114005274e-06, "loss": 0.4697, "step": 2034 }, { "epoch": 0.13903122224499556, "grad_norm": 6.0170207023620605, "learning_rate": 6.784516635389904e-06, "loss": 0.4968, "step": 2035 }, { "epoch": 0.13909954225592677, "grad_norm": 4.694550514221191, "learning_rate": 6.784248995940467e-06, "loss": 0.4679, "step": 2036 }, { "epoch": 0.13916786226685796, "grad_norm": 5.397669315338135, "learning_rate": 6.783981195670067e-06, "loss": 0.4392, "step": 2037 }, { "epoch": 0.13923618227778917, "grad_norm": 3.9605538845062256, "learning_rate": 6.78371323459182e-06, "loss": 0.3405, "step": 2038 }, { "epoch": 0.13930450228872038, "grad_norm": 3.8018858432769775, "learning_rate": 6.783445112718846e-06, "loss": 0.3903, "step": 2039 }, { "epoch": 0.13937282229965156, "grad_norm": 4.590705871582031, "learning_rate": 6.783176830064274e-06, "loss": 0.3329, "step": 2040 }, { "epoch": 0.13944114231058277, "grad_norm": 4.104523658752441, "learning_rate": 6.782908386641242e-06, "loss": 0.4067, "step": 2041 }, { "epoch": 0.13950946232151398, "grad_norm": 4.721646785736084, "learning_rate": 6.782639782462894e-06, "loss": 0.5421, "step": 2042 }, { "epoch": 0.13957778233244517, "grad_norm": 5.0287370681762695, "learning_rate": 6.782371017542383e-06, "loss": 0.421, "step": 2043 }, { "epoch": 0.13964610234337638, "grad_norm": 5.404580593109131, "learning_rate": 6.78210209189287e-06, "loss": 0.4828, "step": 2044 }, { "epoch": 0.1397144223543076, "grad_norm": 3.8602454662323, "learning_rate": 6.781833005527524e-06, "loss": 0.4752, "step": 2045 }, { "epoch": 0.13978274236523877, "grad_norm": 4.394689083099365, "learning_rate": 6.781563758459521e-06, "loss": 0.4127, "step": 2046 }, { "epoch": 0.13985106237616998, "grad_norm": 5.316488742828369, "learning_rate": 6.781294350702045e-06, "loss": 0.4454, "step": 2047 }, { "epoch": 0.1399193823871012, "grad_norm": 5.300230503082275, "learning_rate": 6.781024782268288e-06, "loss": 0.495, "step": 2048 }, { "epoch": 0.13998770239803238, "grad_norm": 4.208281517028809, "learning_rate": 6.780755053171451e-06, "loss": 0.4547, "step": 2049 }, { "epoch": 0.1400560224089636, "grad_norm": 3.703256607055664, "learning_rate": 6.780485163424742e-06, "loss": 0.4267, "step": 2050 }, { "epoch": 0.1401243424198948, "grad_norm": 5.132388591766357, "learning_rate": 6.780215113041376e-06, "loss": 0.4971, "step": 2051 }, { "epoch": 0.14019266243082598, "grad_norm": 4.076319217681885, "learning_rate": 6.779944902034576e-06, "loss": 0.5051, "step": 2052 }, { "epoch": 0.1402609824417572, "grad_norm": 3.440929889678955, "learning_rate": 6.779674530417575e-06, "loss": 0.3657, "step": 2053 }, { "epoch": 0.1403293024526884, "grad_norm": 5.101208209991455, "learning_rate": 6.779403998203612e-06, "loss": 0.4674, "step": 2054 }, { "epoch": 0.1403976224636196, "grad_norm": 4.720597267150879, "learning_rate": 6.779133305405933e-06, "loss": 0.4842, "step": 2055 }, { "epoch": 0.1404659424745508, "grad_norm": 4.550718784332275, "learning_rate": 6.778862452037795e-06, "loss": 0.4852, "step": 2056 }, { "epoch": 0.140534262485482, "grad_norm": 7.817002296447754, "learning_rate": 6.77859143811246e-06, "loss": 0.4601, "step": 2057 }, { "epoch": 0.1406025824964132, "grad_norm": 3.9327564239501953, "learning_rate": 6.778320263643198e-06, "loss": 0.401, "step": 2058 }, { "epoch": 0.1406709025073444, "grad_norm": 4.584049701690674, "learning_rate": 6.778048928643291e-06, "loss": 0.5044, "step": 2059 }, { "epoch": 0.14073922251827561, "grad_norm": 4.886867523193359, "learning_rate": 6.777777433126021e-06, "loss": 0.4856, "step": 2060 }, { "epoch": 0.1408075425292068, "grad_norm": 4.68217134475708, "learning_rate": 6.777505777104686e-06, "loss": 0.5131, "step": 2061 }, { "epoch": 0.140875862540138, "grad_norm": 5.414697647094727, "learning_rate": 6.777233960592586e-06, "loss": 0.3847, "step": 2062 }, { "epoch": 0.14094418255106922, "grad_norm": 5.714630603790283, "learning_rate": 6.776961983603032e-06, "loss": 0.4679, "step": 2063 }, { "epoch": 0.1410125025620004, "grad_norm": 5.282344341278076, "learning_rate": 6.776689846149342e-06, "loss": 0.5225, "step": 2064 }, { "epoch": 0.1410808225729316, "grad_norm": 5.549428939819336, "learning_rate": 6.776417548244842e-06, "loss": 0.5657, "step": 2065 }, { "epoch": 0.14114914258386282, "grad_norm": 4.028607368469238, "learning_rate": 6.776145089902866e-06, "loss": 0.3499, "step": 2066 }, { "epoch": 0.141217462594794, "grad_norm": 4.740364074707031, "learning_rate": 6.775872471136754e-06, "loss": 0.4995, "step": 2067 }, { "epoch": 0.14128578260572522, "grad_norm": 4.855681896209717, "learning_rate": 6.7755996919598575e-06, "loss": 0.5628, "step": 2068 }, { "epoch": 0.14135410261665643, "grad_norm": 4.7176833152771, "learning_rate": 6.775326752385533e-06, "loss": 0.4099, "step": 2069 }, { "epoch": 0.1414224226275876, "grad_norm": 4.358755588531494, "learning_rate": 6.775053652427144e-06, "loss": 0.4595, "step": 2070 }, { "epoch": 0.14149074263851882, "grad_norm": 4.453169822692871, "learning_rate": 6.774780392098066e-06, "loss": 0.4209, "step": 2071 }, { "epoch": 0.14155906264945003, "grad_norm": 5.959786891937256, "learning_rate": 6.7745069714116784e-06, "loss": 0.5582, "step": 2072 }, { "epoch": 0.14162738266038122, "grad_norm": 4.3877339363098145, "learning_rate": 6.77423339038137e-06, "loss": 0.5401, "step": 2073 }, { "epoch": 0.14169570267131243, "grad_norm": 3.119442939758301, "learning_rate": 6.773959649020539e-06, "loss": 0.4079, "step": 2074 }, { "epoch": 0.14176402268224364, "grad_norm": 4.131023406982422, "learning_rate": 6.773685747342587e-06, "loss": 0.4833, "step": 2075 }, { "epoch": 0.14183234269317482, "grad_norm": 3.2657034397125244, "learning_rate": 6.773411685360928e-06, "loss": 0.2825, "step": 2076 }, { "epoch": 0.14190066270410603, "grad_norm": 5.741904258728027, "learning_rate": 6.773137463088982e-06, "loss": 0.5483, "step": 2077 }, { "epoch": 0.14196898271503725, "grad_norm": 4.835196495056152, "learning_rate": 6.7728630805401765e-06, "loss": 0.4945, "step": 2078 }, { "epoch": 0.14203730272596843, "grad_norm": 3.8868751525878906, "learning_rate": 6.7725885377279465e-06, "loss": 0.3983, "step": 2079 }, { "epoch": 0.14210562273689964, "grad_norm": 4.771336555480957, "learning_rate": 6.772313834665738e-06, "loss": 0.3777, "step": 2080 }, { "epoch": 0.14217394274783085, "grad_norm": 4.179172515869141, "learning_rate": 6.772038971367e-06, "loss": 0.4295, "step": 2081 }, { "epoch": 0.14224226275876203, "grad_norm": 3.7167892456054688, "learning_rate": 6.771763947845192e-06, "loss": 0.4154, "step": 2082 }, { "epoch": 0.14231058276969324, "grad_norm": 3.217020034790039, "learning_rate": 6.771488764113783e-06, "loss": 0.4517, "step": 2083 }, { "epoch": 0.14237890278062446, "grad_norm": 5.563082695007324, "learning_rate": 6.771213420186248e-06, "loss": 0.4404, "step": 2084 }, { "epoch": 0.14244722279155564, "grad_norm": 4.186436653137207, "learning_rate": 6.7709379160760675e-06, "loss": 0.373, "step": 2085 }, { "epoch": 0.14251554280248685, "grad_norm": 3.1179065704345703, "learning_rate": 6.770662251796735e-06, "loss": 0.4143, "step": 2086 }, { "epoch": 0.14258386281341806, "grad_norm": 3.655484199523926, "learning_rate": 6.770386427361746e-06, "loss": 0.4026, "step": 2087 }, { "epoch": 0.14265218282434924, "grad_norm": 4.676255702972412, "learning_rate": 6.77011044278461e-06, "loss": 0.4971, "step": 2088 }, { "epoch": 0.14272050283528046, "grad_norm": 5.10113000869751, "learning_rate": 6.769834298078839e-06, "loss": 0.4442, "step": 2089 }, { "epoch": 0.14278882284621167, "grad_norm": 5.614161014556885, "learning_rate": 6.769557993257955e-06, "loss": 0.4767, "step": 2090 }, { "epoch": 0.14285714285714285, "grad_norm": 7.38115119934082, "learning_rate": 6.769281528335491e-06, "loss": 0.6454, "step": 2091 }, { "epoch": 0.14292546286807406, "grad_norm": 5.226982593536377, "learning_rate": 6.76900490332498e-06, "loss": 0.3868, "step": 2092 }, { "epoch": 0.14299378287900527, "grad_norm": 5.571341514587402, "learning_rate": 6.768728118239972e-06, "loss": 0.45, "step": 2093 }, { "epoch": 0.14306210288993645, "grad_norm": 3.7390127182006836, "learning_rate": 6.7684511730940176e-06, "loss": 0.3178, "step": 2094 }, { "epoch": 0.14313042290086767, "grad_norm": 5.567758560180664, "learning_rate": 6.76817406790068e-06, "loss": 0.4877, "step": 2095 }, { "epoch": 0.14319874291179888, "grad_norm": 5.599666118621826, "learning_rate": 6.767896802673526e-06, "loss": 0.5124, "step": 2096 }, { "epoch": 0.14326706292273006, "grad_norm": 5.649111270904541, "learning_rate": 6.767619377426134e-06, "loss": 0.4634, "step": 2097 }, { "epoch": 0.14333538293366127, "grad_norm": 3.8873343467712402, "learning_rate": 6.76734179217209e-06, "loss": 0.3687, "step": 2098 }, { "epoch": 0.14340370294459248, "grad_norm": 3.716566801071167, "learning_rate": 6.767064046924984e-06, "loss": 0.3933, "step": 2099 }, { "epoch": 0.14347202295552366, "grad_norm": 4.144264221191406, "learning_rate": 6.766786141698418e-06, "loss": 0.3434, "step": 2100 }, { "epoch": 0.14354034296645488, "grad_norm": 4.121523857116699, "learning_rate": 6.766508076505999e-06, "loss": 0.4082, "step": 2101 }, { "epoch": 0.1436086629773861, "grad_norm": 4.194265842437744, "learning_rate": 6.7662298513613465e-06, "loss": 0.3693, "step": 2102 }, { "epoch": 0.14367698298831727, "grad_norm": 5.634931564331055, "learning_rate": 6.76595146627808e-06, "loss": 0.4914, "step": 2103 }, { "epoch": 0.14374530299924848, "grad_norm": 4.362638473510742, "learning_rate": 6.765672921269834e-06, "loss": 0.3967, "step": 2104 }, { "epoch": 0.1438136230101797, "grad_norm": 5.084686756134033, "learning_rate": 6.765394216350248e-06, "loss": 0.345, "step": 2105 }, { "epoch": 0.14388194302111088, "grad_norm": 5.309311866760254, "learning_rate": 6.765115351532968e-06, "loss": 0.5589, "step": 2106 }, { "epoch": 0.1439502630320421, "grad_norm": 5.417909622192383, "learning_rate": 6.764836326831651e-06, "loss": 0.4404, "step": 2107 }, { "epoch": 0.1440185830429733, "grad_norm": 5.307255744934082, "learning_rate": 6.764557142259958e-06, "loss": 0.4787, "step": 2108 }, { "epoch": 0.14408690305390448, "grad_norm": 4.723912715911865, "learning_rate": 6.764277797831562e-06, "loss": 0.4343, "step": 2109 }, { "epoch": 0.1441552230648357, "grad_norm": 5.036258220672607, "learning_rate": 6.763998293560141e-06, "loss": 0.4291, "step": 2110 }, { "epoch": 0.1442235430757669, "grad_norm": 2.564710855484009, "learning_rate": 6.763718629459381e-06, "loss": 0.3109, "step": 2111 }, { "epoch": 0.14429186308669809, "grad_norm": 5.384091377258301, "learning_rate": 6.763438805542978e-06, "loss": 0.4373, "step": 2112 }, { "epoch": 0.1443601830976293, "grad_norm": 4.728410720825195, "learning_rate": 6.763158821824632e-06, "loss": 0.4772, "step": 2113 }, { "epoch": 0.1444285031085605, "grad_norm": 5.184577941894531, "learning_rate": 6.762878678318054e-06, "loss": 0.4858, "step": 2114 }, { "epoch": 0.1444968231194917, "grad_norm": 5.237513542175293, "learning_rate": 6.762598375036964e-06, "loss": 0.4633, "step": 2115 }, { "epoch": 0.1445651431304229, "grad_norm": 5.6020588874816895, "learning_rate": 6.762317911995084e-06, "loss": 0.4638, "step": 2116 }, { "epoch": 0.1446334631413541, "grad_norm": 4.822768688201904, "learning_rate": 6.76203728920615e-06, "loss": 0.4911, "step": 2117 }, { "epoch": 0.1447017831522853, "grad_norm": 4.062719345092773, "learning_rate": 6.761756506683903e-06, "loss": 0.4446, "step": 2118 }, { "epoch": 0.1447701031632165, "grad_norm": 3.5793092250823975, "learning_rate": 6.761475564442091e-06, "loss": 0.4053, "step": 2119 }, { "epoch": 0.14483842317414772, "grad_norm": 3.825533866882324, "learning_rate": 6.761194462494473e-06, "loss": 0.4389, "step": 2120 }, { "epoch": 0.1449067431850789, "grad_norm": 6.715073108673096, "learning_rate": 6.760913200854811e-06, "loss": 0.566, "step": 2121 }, { "epoch": 0.1449750631960101, "grad_norm": 4.383380889892578, "learning_rate": 6.7606317795368794e-06, "loss": 0.5082, "step": 2122 }, { "epoch": 0.14504338320694132, "grad_norm": 4.300173759460449, "learning_rate": 6.760350198554461e-06, "loss": 0.4111, "step": 2123 }, { "epoch": 0.1451117032178725, "grad_norm": 4.245336055755615, "learning_rate": 6.76006845792134e-06, "loss": 0.5055, "step": 2124 }, { "epoch": 0.14518002322880372, "grad_norm": 4.323505401611328, "learning_rate": 6.759786557651314e-06, "loss": 0.4227, "step": 2125 }, { "epoch": 0.14524834323973493, "grad_norm": 4.520618438720703, "learning_rate": 6.759504497758187e-06, "loss": 0.5439, "step": 2126 }, { "epoch": 0.1453166632506661, "grad_norm": 5.233403205871582, "learning_rate": 6.759222278255771e-06, "loss": 0.5952, "step": 2127 }, { "epoch": 0.14538498326159732, "grad_norm": 4.353309631347656, "learning_rate": 6.758939899157885e-06, "loss": 0.4243, "step": 2128 }, { "epoch": 0.14545330327252853, "grad_norm": 4.437610149383545, "learning_rate": 6.758657360478358e-06, "loss": 0.4872, "step": 2129 }, { "epoch": 0.14552162328345972, "grad_norm": 4.770645618438721, "learning_rate": 6.7583746622310225e-06, "loss": 0.426, "step": 2130 }, { "epoch": 0.14558994329439093, "grad_norm": 5.662159442901611, "learning_rate": 6.758091804429724e-06, "loss": 0.4935, "step": 2131 }, { "epoch": 0.14565826330532214, "grad_norm": 4.366505146026611, "learning_rate": 6.75780878708831e-06, "loss": 0.3891, "step": 2132 }, { "epoch": 0.14572658331625332, "grad_norm": 3.990299701690674, "learning_rate": 6.757525610220643e-06, "loss": 0.4002, "step": 2133 }, { "epoch": 0.14579490332718453, "grad_norm": 4.763392448425293, "learning_rate": 6.757242273840587e-06, "loss": 0.359, "step": 2134 }, { "epoch": 0.14586322333811574, "grad_norm": 4.663992881774902, "learning_rate": 6.7569587779620165e-06, "loss": 0.458, "step": 2135 }, { "epoch": 0.14593154334904693, "grad_norm": 3.615980863571167, "learning_rate": 6.756675122598814e-06, "loss": 0.4106, "step": 2136 }, { "epoch": 0.14599986335997814, "grad_norm": 4.039426326751709, "learning_rate": 6.756391307764871e-06, "loss": 0.4202, "step": 2137 }, { "epoch": 0.14606818337090935, "grad_norm": 5.189492225646973, "learning_rate": 6.756107333474082e-06, "loss": 0.5039, "step": 2138 }, { "epoch": 0.14613650338184053, "grad_norm": 4.302270412445068, "learning_rate": 6.755823199740354e-06, "loss": 0.3702, "step": 2139 }, { "epoch": 0.14620482339277174, "grad_norm": 4.015743255615234, "learning_rate": 6.7555389065776e-06, "loss": 0.3964, "step": 2140 }, { "epoch": 0.14627314340370295, "grad_norm": 3.285468578338623, "learning_rate": 6.755254453999742e-06, "loss": 0.4022, "step": 2141 }, { "epoch": 0.14634146341463414, "grad_norm": 4.71211051940918, "learning_rate": 6.754969842020707e-06, "loss": 0.4607, "step": 2142 }, { "epoch": 0.14640978342556535, "grad_norm": 4.619289875030518, "learning_rate": 6.754685070654433e-06, "loss": 0.4054, "step": 2143 }, { "epoch": 0.14647810343649656, "grad_norm": 4.931777000427246, "learning_rate": 6.754400139914866e-06, "loss": 0.4557, "step": 2144 }, { "epoch": 0.14654642344742774, "grad_norm": 5.4853339195251465, "learning_rate": 6.754115049815955e-06, "loss": 0.4558, "step": 2145 }, { "epoch": 0.14661474345835895, "grad_norm": 4.138556480407715, "learning_rate": 6.753829800371662e-06, "loss": 0.4333, "step": 2146 }, { "epoch": 0.14668306346929016, "grad_norm": 4.4449238777160645, "learning_rate": 6.753544391595954e-06, "loss": 0.5239, "step": 2147 }, { "epoch": 0.14675138348022135, "grad_norm": 3.472412347793579, "learning_rate": 6.7532588235028085e-06, "loss": 0.3578, "step": 2148 }, { "epoch": 0.14681970349115256, "grad_norm": 4.042326927185059, "learning_rate": 6.752973096106208e-06, "loss": 0.4243, "step": 2149 }, { "epoch": 0.14688802350208377, "grad_norm": 4.1476945877075195, "learning_rate": 6.752687209420143e-06, "loss": 0.4839, "step": 2150 }, { "epoch": 0.14695634351301495, "grad_norm": 3.226411819458008, "learning_rate": 6.752401163458614e-06, "loss": 0.4867, "step": 2151 }, { "epoch": 0.14702466352394616, "grad_norm": 5.216317653656006, "learning_rate": 6.752114958235626e-06, "loss": 0.4739, "step": 2152 }, { "epoch": 0.14709298353487738, "grad_norm": 5.070872783660889, "learning_rate": 6.751828593765195e-06, "loss": 0.3951, "step": 2153 }, { "epoch": 0.14716130354580856, "grad_norm": 3.58902907371521, "learning_rate": 6.7515420700613455e-06, "loss": 0.3921, "step": 2154 }, { "epoch": 0.14722962355673977, "grad_norm": 4.204796314239502, "learning_rate": 6.751255387138104e-06, "loss": 0.4595, "step": 2155 }, { "epoch": 0.14729794356767098, "grad_norm": 4.061190128326416, "learning_rate": 6.750968545009511e-06, "loss": 0.3396, "step": 2156 }, { "epoch": 0.14736626357860216, "grad_norm": 3.3854594230651855, "learning_rate": 6.750681543689611e-06, "loss": 0.348, "step": 2157 }, { "epoch": 0.14743458358953337, "grad_norm": 4.150287628173828, "learning_rate": 6.750394383192459e-06, "loss": 0.4284, "step": 2158 }, { "epoch": 0.14750290360046459, "grad_norm": 3.9013588428497314, "learning_rate": 6.750107063532115e-06, "loss": 0.3541, "step": 2159 }, { "epoch": 0.14757122361139577, "grad_norm": 3.6439754962921143, "learning_rate": 6.7498195847226504e-06, "loss": 0.4277, "step": 2160 }, { "epoch": 0.14763954362232698, "grad_norm": 4.541321754455566, "learning_rate": 6.74953194677814e-06, "loss": 0.4164, "step": 2161 }, { "epoch": 0.1477078636332582, "grad_norm": 5.1401872634887695, "learning_rate": 6.749244149712671e-06, "loss": 0.4071, "step": 2162 }, { "epoch": 0.14777618364418937, "grad_norm": 5.425471782684326, "learning_rate": 6.748956193540334e-06, "loss": 0.539, "step": 2163 }, { "epoch": 0.14784450365512058, "grad_norm": 3.608485460281372, "learning_rate": 6.7486680782752305e-06, "loss": 0.4698, "step": 2164 }, { "epoch": 0.1479128236660518, "grad_norm": 4.592977523803711, "learning_rate": 6.748379803931468e-06, "loss": 0.4669, "step": 2165 }, { "epoch": 0.14798114367698298, "grad_norm": 4.573486328125, "learning_rate": 6.748091370523164e-06, "loss": 0.4976, "step": 2166 }, { "epoch": 0.1480494636879142, "grad_norm": 4.075727939605713, "learning_rate": 6.74780277806444e-06, "loss": 0.3974, "step": 2167 }, { "epoch": 0.1481177836988454, "grad_norm": 4.771549224853516, "learning_rate": 6.747514026569431e-06, "loss": 0.527, "step": 2168 }, { "epoch": 0.14818610370977658, "grad_norm": 4.884782791137695, "learning_rate": 6.7472251160522725e-06, "loss": 0.4631, "step": 2169 }, { "epoch": 0.1482544237207078, "grad_norm": 4.894711017608643, "learning_rate": 6.746936046527114e-06, "loss": 0.4428, "step": 2170 }, { "epoch": 0.148322743731639, "grad_norm": 4.675962924957275, "learning_rate": 6.74664681800811e-06, "loss": 0.4081, "step": 2171 }, { "epoch": 0.1483910637425702, "grad_norm": 3.9056143760681152, "learning_rate": 6.746357430509423e-06, "loss": 0.4084, "step": 2172 }, { "epoch": 0.1484593837535014, "grad_norm": 4.224451065063477, "learning_rate": 6.7460678840452236e-06, "loss": 0.4878, "step": 2173 }, { "epoch": 0.1485277037644326, "grad_norm": 4.391056060791016, "learning_rate": 6.745778178629691e-06, "loss": 0.4376, "step": 2174 }, { "epoch": 0.1485960237753638, "grad_norm": 4.103618144989014, "learning_rate": 6.74548831427701e-06, "loss": 0.3748, "step": 2175 }, { "epoch": 0.148664343786295, "grad_norm": 4.409485816955566, "learning_rate": 6.7451982910013765e-06, "loss": 0.4147, "step": 2176 }, { "epoch": 0.14873266379722622, "grad_norm": 4.117482662200928, "learning_rate": 6.744908108816989e-06, "loss": 0.4384, "step": 2177 }, { "epoch": 0.1488009838081574, "grad_norm": 3.6525394916534424, "learning_rate": 6.74461776773806e-06, "loss": 0.3728, "step": 2178 }, { "epoch": 0.1488693038190886, "grad_norm": 6.482429027557373, "learning_rate": 6.744327267778804e-06, "loss": 0.4457, "step": 2179 }, { "epoch": 0.14893762383001982, "grad_norm": 5.0948486328125, "learning_rate": 6.744036608953449e-06, "loss": 0.4296, "step": 2180 }, { "epoch": 0.149005943840951, "grad_norm": 3.8935530185699463, "learning_rate": 6.743745791276226e-06, "loss": 0.3865, "step": 2181 }, { "epoch": 0.14907426385188222, "grad_norm": 4.769009113311768, "learning_rate": 6.743454814761375e-06, "loss": 0.475, "step": 2182 }, { "epoch": 0.14914258386281343, "grad_norm": 4.927493572235107, "learning_rate": 6.743163679423146e-06, "loss": 0.5488, "step": 2183 }, { "epoch": 0.1492109038737446, "grad_norm": 4.605827808380127, "learning_rate": 6.7428723852757934e-06, "loss": 0.3438, "step": 2184 }, { "epoch": 0.14927922388467582, "grad_norm": 4.13966703414917, "learning_rate": 6.742580932333582e-06, "loss": 0.4404, "step": 2185 }, { "epoch": 0.14934754389560703, "grad_norm": 5.004623889923096, "learning_rate": 6.742289320610785e-06, "loss": 0.4693, "step": 2186 }, { "epoch": 0.14941586390653822, "grad_norm": 4.044467926025391, "learning_rate": 6.7419975501216795e-06, "loss": 0.4313, "step": 2187 }, { "epoch": 0.14948418391746943, "grad_norm": 4.949291229248047, "learning_rate": 6.741705620880553e-06, "loss": 0.356, "step": 2188 }, { "epoch": 0.14955250392840064, "grad_norm": 4.348527908325195, "learning_rate": 6.741413532901701e-06, "loss": 0.4666, "step": 2189 }, { "epoch": 0.14962082393933182, "grad_norm": 4.0230712890625, "learning_rate": 6.741121286199427e-06, "loss": 0.3709, "step": 2190 }, { "epoch": 0.14968914395026303, "grad_norm": 5.227057456970215, "learning_rate": 6.740828880788041e-06, "loss": 0.4901, "step": 2191 }, { "epoch": 0.14975746396119424, "grad_norm": 5.0393195152282715, "learning_rate": 6.740536316681861e-06, "loss": 0.3811, "step": 2192 }, { "epoch": 0.14982578397212543, "grad_norm": 3.981963872909546, "learning_rate": 6.7402435938952125e-06, "loss": 0.3966, "step": 2193 }, { "epoch": 0.14989410398305664, "grad_norm": 3.8227667808532715, "learning_rate": 6.739950712442431e-06, "loss": 0.3962, "step": 2194 }, { "epoch": 0.14996242399398785, "grad_norm": 4.771519660949707, "learning_rate": 6.7396576723378575e-06, "loss": 0.3957, "step": 2195 }, { "epoch": 0.15003074400491903, "grad_norm": 5.52778434753418, "learning_rate": 6.739364473595841e-06, "loss": 0.5347, "step": 2196 }, { "epoch": 0.15009906401585024, "grad_norm": 4.281780242919922, "learning_rate": 6.739071116230738e-06, "loss": 0.4388, "step": 2197 }, { "epoch": 0.15016738402678145, "grad_norm": 5.0262627601623535, "learning_rate": 6.738777600256915e-06, "loss": 0.4742, "step": 2198 }, { "epoch": 0.15023570403771264, "grad_norm": 5.670086860656738, "learning_rate": 6.738483925688744e-06, "loss": 0.5176, "step": 2199 }, { "epoch": 0.15030402404864385, "grad_norm": 4.172206878662109, "learning_rate": 6.7381900925406046e-06, "loss": 0.4129, "step": 2200 }, { "epoch": 0.15037234405957506, "grad_norm": 4.081755638122559, "learning_rate": 6.737896100826886e-06, "loss": 0.2789, "step": 2201 }, { "epoch": 0.15044066407050624, "grad_norm": 5.267216205596924, "learning_rate": 6.737601950561985e-06, "loss": 0.4476, "step": 2202 }, { "epoch": 0.15050898408143745, "grad_norm": 3.6522457599639893, "learning_rate": 6.737307641760304e-06, "loss": 0.276, "step": 2203 }, { "epoch": 0.15057730409236866, "grad_norm": 3.749250888824463, "learning_rate": 6.737013174436256e-06, "loss": 0.431, "step": 2204 }, { "epoch": 0.15064562410329985, "grad_norm": 6.056210517883301, "learning_rate": 6.736718548604257e-06, "loss": 0.3856, "step": 2205 }, { "epoch": 0.15071394411423106, "grad_norm": 4.7086615562438965, "learning_rate": 6.736423764278737e-06, "loss": 0.4137, "step": 2206 }, { "epoch": 0.15078226412516227, "grad_norm": 5.244603157043457, "learning_rate": 6.73612882147413e-06, "loss": 0.463, "step": 2207 }, { "epoch": 0.15085058413609345, "grad_norm": 5.3410234451293945, "learning_rate": 6.73583372020488e-06, "loss": 0.5021, "step": 2208 }, { "epoch": 0.15091890414702466, "grad_norm": 4.166780948638916, "learning_rate": 6.735538460485435e-06, "loss": 0.3996, "step": 2209 }, { "epoch": 0.15098722415795587, "grad_norm": 3.434927463531494, "learning_rate": 6.735243042330254e-06, "loss": 0.3801, "step": 2210 }, { "epoch": 0.15105554416888706, "grad_norm": 5.660940170288086, "learning_rate": 6.734947465753804e-06, "loss": 0.7818, "step": 2211 }, { "epoch": 0.15112386417981827, "grad_norm": 3.96620512008667, "learning_rate": 6.734651730770555e-06, "loss": 0.409, "step": 2212 }, { "epoch": 0.15119218419074948, "grad_norm": 3.513395309448242, "learning_rate": 6.734355837394993e-06, "loss": 0.4023, "step": 2213 }, { "epoch": 0.15126050420168066, "grad_norm": 3.8779006004333496, "learning_rate": 6.7340597856416035e-06, "loss": 0.4153, "step": 2214 }, { "epoch": 0.15132882421261187, "grad_norm": 3.59033203125, "learning_rate": 6.7337635755248855e-06, "loss": 0.4268, "step": 2215 }, { "epoch": 0.15139714422354308, "grad_norm": 5.218724250793457, "learning_rate": 6.733467207059343e-06, "loss": 0.6023, "step": 2216 }, { "epoch": 0.15146546423447427, "grad_norm": 4.119206428527832, "learning_rate": 6.733170680259489e-06, "loss": 0.4712, "step": 2217 }, { "epoch": 0.15153378424540548, "grad_norm": 4.157417297363281, "learning_rate": 6.732873995139841e-06, "loss": 0.4571, "step": 2218 }, { "epoch": 0.1516021042563367, "grad_norm": 4.949321746826172, "learning_rate": 6.732577151714931e-06, "loss": 0.447, "step": 2219 }, { "epoch": 0.15167042426726787, "grad_norm": 3.3492047786712646, "learning_rate": 6.732280149999291e-06, "loss": 0.3936, "step": 2220 }, { "epoch": 0.15173874427819908, "grad_norm": 5.096732139587402, "learning_rate": 6.731982990007467e-06, "loss": 0.4042, "step": 2221 }, { "epoch": 0.1518070642891303, "grad_norm": 4.520752906799316, "learning_rate": 6.731685671754009e-06, "loss": 0.4526, "step": 2222 }, { "epoch": 0.15187538430006148, "grad_norm": 4.039391040802002, "learning_rate": 6.731388195253475e-06, "loss": 0.5303, "step": 2223 }, { "epoch": 0.1519437043109927, "grad_norm": 4.710207462310791, "learning_rate": 6.731090560520433e-06, "loss": 0.4191, "step": 2224 }, { "epoch": 0.1520120243219239, "grad_norm": 3.088317632675171, "learning_rate": 6.7307927675694566e-06, "loss": 0.3421, "step": 2225 }, { "epoch": 0.15208034433285508, "grad_norm": 3.4966299533843994, "learning_rate": 6.730494816415128e-06, "loss": 0.4447, "step": 2226 }, { "epoch": 0.1521486643437863, "grad_norm": 4.794188499450684, "learning_rate": 6.730196707072039e-06, "loss": 0.484, "step": 2227 }, { "epoch": 0.1522169843547175, "grad_norm": 4.21079683303833, "learning_rate": 6.729898439554784e-06, "loss": 0.471, "step": 2228 }, { "epoch": 0.1522853043656487, "grad_norm": 5.596526622772217, "learning_rate": 6.729600013877971e-06, "loss": 0.4346, "step": 2229 }, { "epoch": 0.1523536243765799, "grad_norm": 3.0938796997070312, "learning_rate": 6.7293014300562095e-06, "loss": 0.3669, "step": 2230 }, { "epoch": 0.1524219443875111, "grad_norm": 3.878591537475586, "learning_rate": 6.729002688104125e-06, "loss": 0.4086, "step": 2231 }, { "epoch": 0.1524902643984423, "grad_norm": 5.0074462890625, "learning_rate": 6.728703788036344e-06, "loss": 0.3986, "step": 2232 }, { "epoch": 0.1525585844093735, "grad_norm": 3.831432819366455, "learning_rate": 6.728404729867503e-06, "loss": 0.3444, "step": 2233 }, { "epoch": 0.15262690442030472, "grad_norm": 4.21114444732666, "learning_rate": 6.728105513612245e-06, "loss": 0.3867, "step": 2234 }, { "epoch": 0.1526952244312359, "grad_norm": 5.415219783782959, "learning_rate": 6.727806139285223e-06, "loss": 0.4994, "step": 2235 }, { "epoch": 0.1527635444421671, "grad_norm": 3.784975051879883, "learning_rate": 6.7275066069010965e-06, "loss": 0.4508, "step": 2236 }, { "epoch": 0.15283186445309832, "grad_norm": 4.334141254425049, "learning_rate": 6.727206916474532e-06, "loss": 0.4377, "step": 2237 }, { "epoch": 0.1529001844640295, "grad_norm": 2.925283908843994, "learning_rate": 6.726907068020206e-06, "loss": 0.2583, "step": 2238 }, { "epoch": 0.15296850447496071, "grad_norm": 3.239333152770996, "learning_rate": 6.726607061552799e-06, "loss": 0.4112, "step": 2239 }, { "epoch": 0.15303682448589193, "grad_norm": 4.820125579833984, "learning_rate": 6.726306897087005e-06, "loss": 0.5368, "step": 2240 }, { "epoch": 0.1531051444968231, "grad_norm": 3.757256507873535, "learning_rate": 6.726006574637519e-06, "loss": 0.4182, "step": 2241 }, { "epoch": 0.15317346450775432, "grad_norm": 3.9240469932556152, "learning_rate": 6.725706094219049e-06, "loss": 0.453, "step": 2242 }, { "epoch": 0.15324178451868553, "grad_norm": 4.179774761199951, "learning_rate": 6.725405455846307e-06, "loss": 0.3859, "step": 2243 }, { "epoch": 0.15331010452961671, "grad_norm": 4.084915637969971, "learning_rate": 6.725104659534016e-06, "loss": 0.3936, "step": 2244 }, { "epoch": 0.15337842454054793, "grad_norm": 6.673885345458984, "learning_rate": 6.724803705296904e-06, "loss": 0.4445, "step": 2245 }, { "epoch": 0.15344674455147914, "grad_norm": 3.389615774154663, "learning_rate": 6.724502593149709e-06, "loss": 0.3224, "step": 2246 }, { "epoch": 0.15351506456241032, "grad_norm": 5.20891809463501, "learning_rate": 6.724201323107176e-06, "loss": 0.3873, "step": 2247 }, { "epoch": 0.15358338457334153, "grad_norm": 4.315654754638672, "learning_rate": 6.723899895184055e-06, "loss": 0.2821, "step": 2248 }, { "epoch": 0.15365170458427274, "grad_norm": 4.805233478546143, "learning_rate": 6.723598309395109e-06, "loss": 0.474, "step": 2249 }, { "epoch": 0.15372002459520392, "grad_norm": 6.656600475311279, "learning_rate": 6.723296565755103e-06, "loss": 0.4422, "step": 2250 }, { "epoch": 0.15378834460613514, "grad_norm": 6.390875816345215, "learning_rate": 6.722994664278816e-06, "loss": 0.4821, "step": 2251 }, { "epoch": 0.15385666461706635, "grad_norm": 5.218954563140869, "learning_rate": 6.722692604981028e-06, "loss": 0.3449, "step": 2252 }, { "epoch": 0.15392498462799753, "grad_norm": 5.046809673309326, "learning_rate": 6.722390387876533e-06, "loss": 0.4558, "step": 2253 }, { "epoch": 0.15399330463892874, "grad_norm": 4.69354248046875, "learning_rate": 6.722088012980127e-06, "loss": 0.4192, "step": 2254 }, { "epoch": 0.15406162464985995, "grad_norm": 4.59827184677124, "learning_rate": 6.721785480306618e-06, "loss": 0.3691, "step": 2255 }, { "epoch": 0.15412994466079113, "grad_norm": 4.727415084838867, "learning_rate": 6.72148278987082e-06, "loss": 0.3964, "step": 2256 }, { "epoch": 0.15419826467172235, "grad_norm": 3.881910562515259, "learning_rate": 6.721179941687556e-06, "loss": 0.3535, "step": 2257 }, { "epoch": 0.15426658468265356, "grad_norm": 3.642441511154175, "learning_rate": 6.720876935771654e-06, "loss": 0.4574, "step": 2258 }, { "epoch": 0.15433490469358474, "grad_norm": 4.796630382537842, "learning_rate": 6.720573772137951e-06, "loss": 0.3453, "step": 2259 }, { "epoch": 0.15440322470451595, "grad_norm": 4.495761394500732, "learning_rate": 6.720270450801295e-06, "loss": 0.5161, "step": 2260 }, { "epoch": 0.15447154471544716, "grad_norm": 3.803838014602661, "learning_rate": 6.719966971776536e-06, "loss": 0.366, "step": 2261 }, { "epoch": 0.15453986472637835, "grad_norm": 4.866438388824463, "learning_rate": 6.719663335078535e-06, "loss": 0.4053, "step": 2262 }, { "epoch": 0.15460818473730956, "grad_norm": 4.262722492218018, "learning_rate": 6.719359540722161e-06, "loss": 0.4297, "step": 2263 }, { "epoch": 0.15467650474824077, "grad_norm": 3.4463305473327637, "learning_rate": 6.71905558872229e-06, "loss": 0.3956, "step": 2264 }, { "epoch": 0.15474482475917195, "grad_norm": 3.6737406253814697, "learning_rate": 6.718751479093806e-06, "loss": 0.3937, "step": 2265 }, { "epoch": 0.15481314477010316, "grad_norm": 3.659099578857422, "learning_rate": 6.7184472118515995e-06, "loss": 0.4154, "step": 2266 }, { "epoch": 0.15488146478103437, "grad_norm": 3.679903030395508, "learning_rate": 6.7181427870105705e-06, "loss": 0.3579, "step": 2267 }, { "epoch": 0.15494978479196556, "grad_norm": 5.934732437133789, "learning_rate": 6.7178382045856246e-06, "loss": 0.4789, "step": 2268 }, { "epoch": 0.15501810480289677, "grad_norm": 4.981320381164551, "learning_rate": 6.717533464591679e-06, "loss": 0.4321, "step": 2269 }, { "epoch": 0.15508642481382798, "grad_norm": 4.749303340911865, "learning_rate": 6.717228567043654e-06, "loss": 0.4343, "step": 2270 }, { "epoch": 0.15515474482475916, "grad_norm": 3.9362170696258545, "learning_rate": 6.716923511956478e-06, "loss": 0.3662, "step": 2271 }, { "epoch": 0.15522306483569037, "grad_norm": 3.9902806282043457, "learning_rate": 6.716618299345093e-06, "loss": 0.49, "step": 2272 }, { "epoch": 0.15529138484662158, "grad_norm": 4.231635093688965, "learning_rate": 6.716312929224441e-06, "loss": 0.3847, "step": 2273 }, { "epoch": 0.15535970485755277, "grad_norm": 6.581840515136719, "learning_rate": 6.716007401609476e-06, "loss": 0.533, "step": 2274 }, { "epoch": 0.15542802486848398, "grad_norm": 4.267332077026367, "learning_rate": 6.71570171651516e-06, "loss": 0.5004, "step": 2275 }, { "epoch": 0.1554963448794152, "grad_norm": 4.498322010040283, "learning_rate": 6.715395873956461e-06, "loss": 0.4355, "step": 2276 }, { "epoch": 0.15556466489034637, "grad_norm": 4.014673709869385, "learning_rate": 6.7150898739483545e-06, "loss": 0.4342, "step": 2277 }, { "epoch": 0.15563298490127758, "grad_norm": 5.716678619384766, "learning_rate": 6.7147837165058255e-06, "loss": 0.4648, "step": 2278 }, { "epoch": 0.1557013049122088, "grad_norm": 4.313531398773193, "learning_rate": 6.714477401643865e-06, "loss": 0.4362, "step": 2279 }, { "epoch": 0.15576962492313998, "grad_norm": 3.50669264793396, "learning_rate": 6.714170929377473e-06, "loss": 0.3934, "step": 2280 }, { "epoch": 0.1558379449340712, "grad_norm": 6.408971786499023, "learning_rate": 6.7138642997216555e-06, "loss": 0.4005, "step": 2281 }, { "epoch": 0.1559062649450024, "grad_norm": 5.057468891143799, "learning_rate": 6.713557512691428e-06, "loss": 0.4883, "step": 2282 }, { "epoch": 0.15597458495593358, "grad_norm": 5.295645713806152, "learning_rate": 6.713250568301816e-06, "loss": 0.5277, "step": 2283 }, { "epoch": 0.1560429049668648, "grad_norm": 3.5563743114471436, "learning_rate": 6.712943466567844e-06, "loss": 0.3968, "step": 2284 }, { "epoch": 0.156111224977796, "grad_norm": 3.57848858833313, "learning_rate": 6.712636207504554e-06, "loss": 0.3675, "step": 2285 }, { "epoch": 0.1561795449887272, "grad_norm": 3.9648725986480713, "learning_rate": 6.712328791126991e-06, "loss": 0.4128, "step": 2286 }, { "epoch": 0.1562478649996584, "grad_norm": 3.9059627056121826, "learning_rate": 6.712021217450207e-06, "loss": 0.4519, "step": 2287 }, { "epoch": 0.1563161850105896, "grad_norm": 4.70875883102417, "learning_rate": 6.711713486489265e-06, "loss": 0.4364, "step": 2288 }, { "epoch": 0.1563845050215208, "grad_norm": 4.418084144592285, "learning_rate": 6.711405598259233e-06, "loss": 0.427, "step": 2289 }, { "epoch": 0.156452825032452, "grad_norm": 3.9894680976867676, "learning_rate": 6.711097552775186e-06, "loss": 0.4246, "step": 2290 }, { "epoch": 0.15652114504338321, "grad_norm": 3.8288609981536865, "learning_rate": 6.71078935005221e-06, "loss": 0.3915, "step": 2291 }, { "epoch": 0.1565894650543144, "grad_norm": 5.130881309509277, "learning_rate": 6.710480990105397e-06, "loss": 0.5499, "step": 2292 }, { "epoch": 0.1566577850652456, "grad_norm": 4.011767387390137, "learning_rate": 6.7101724729498465e-06, "loss": 0.4672, "step": 2293 }, { "epoch": 0.15672610507617682, "grad_norm": 3.9690442085266113, "learning_rate": 6.709863798600664e-06, "loss": 0.3846, "step": 2294 }, { "epoch": 0.156794425087108, "grad_norm": 4.071434020996094, "learning_rate": 6.709554967072965e-06, "loss": 0.4285, "step": 2295 }, { "epoch": 0.1568627450980392, "grad_norm": 3.710702896118164, "learning_rate": 6.7092459783818736e-06, "loss": 0.5002, "step": 2296 }, { "epoch": 0.15693106510897042, "grad_norm": 4.7243852615356445, "learning_rate": 6.7089368325425196e-06, "loss": 0.3591, "step": 2297 }, { "epoch": 0.1569993851199016, "grad_norm": 4.255397796630859, "learning_rate": 6.70862752957004e-06, "loss": 0.4227, "step": 2298 }, { "epoch": 0.15706770513083282, "grad_norm": 3.6063578128814697, "learning_rate": 6.708318069479583e-06, "loss": 0.3816, "step": 2299 }, { "epoch": 0.15713602514176403, "grad_norm": 4.544340133666992, "learning_rate": 6.708008452286299e-06, "loss": 0.5268, "step": 2300 }, { "epoch": 0.1572043451526952, "grad_norm": 3.3944239616394043, "learning_rate": 6.7076986780053505e-06, "loss": 0.4253, "step": 2301 }, { "epoch": 0.15727266516362642, "grad_norm": 4.317480564117432, "learning_rate": 6.707388746651907e-06, "loss": 0.4366, "step": 2302 }, { "epoch": 0.15734098517455763, "grad_norm": 5.3862104415893555, "learning_rate": 6.707078658241144e-06, "loss": 0.3091, "step": 2303 }, { "epoch": 0.15740930518548882, "grad_norm": 4.144115447998047, "learning_rate": 6.706768412788246e-06, "loss": 0.3985, "step": 2304 }, { "epoch": 0.15747762519642003, "grad_norm": 5.056548595428467, "learning_rate": 6.706458010308405e-06, "loss": 0.4844, "step": 2305 }, { "epoch": 0.15754594520735124, "grad_norm": 4.952244758605957, "learning_rate": 6.706147450816821e-06, "loss": 0.3477, "step": 2306 }, { "epoch": 0.15761426521828242, "grad_norm": 3.1742804050445557, "learning_rate": 6.7058367343287e-06, "loss": 0.3653, "step": 2307 }, { "epoch": 0.15768258522921363, "grad_norm": 4.527181148529053, "learning_rate": 6.705525860859259e-06, "loss": 0.4671, "step": 2308 }, { "epoch": 0.15775090524014485, "grad_norm": 4.095280647277832, "learning_rate": 6.7052148304237185e-06, "loss": 0.3659, "step": 2309 }, { "epoch": 0.15781922525107603, "grad_norm": 4.481977939605713, "learning_rate": 6.70490364303731e-06, "loss": 0.4487, "step": 2310 }, { "epoch": 0.15788754526200724, "grad_norm": 6.17425012588501, "learning_rate": 6.704592298715271e-06, "loss": 0.3996, "step": 2311 }, { "epoch": 0.15795586527293845, "grad_norm": 4.091660976409912, "learning_rate": 6.704280797472848e-06, "loss": 0.4008, "step": 2312 }, { "epoch": 0.15802418528386963, "grad_norm": 3.7590715885162354, "learning_rate": 6.703969139325293e-06, "loss": 0.4343, "step": 2313 }, { "epoch": 0.15809250529480084, "grad_norm": 5.581441402435303, "learning_rate": 6.703657324287869e-06, "loss": 0.6702, "step": 2314 }, { "epoch": 0.15816082530573206, "grad_norm": 5.220716953277588, "learning_rate": 6.703345352375843e-06, "loss": 0.4063, "step": 2315 }, { "epoch": 0.15822914531666324, "grad_norm": 4.902714729309082, "learning_rate": 6.703033223604493e-06, "loss": 0.3852, "step": 2316 }, { "epoch": 0.15829746532759445, "grad_norm": 5.591001510620117, "learning_rate": 6.702720937989102e-06, "loss": 0.5376, "step": 2317 }, { "epoch": 0.15836578533852566, "grad_norm": 4.087155818939209, "learning_rate": 6.702408495544961e-06, "loss": 0.4011, "step": 2318 }, { "epoch": 0.15843410534945684, "grad_norm": 4.70833683013916, "learning_rate": 6.702095896287372e-06, "loss": 0.3954, "step": 2319 }, { "epoch": 0.15850242536038806, "grad_norm": 4.113614082336426, "learning_rate": 6.70178314023164e-06, "loss": 0.3837, "step": 2320 }, { "epoch": 0.15857074537131927, "grad_norm": 4.104172706604004, "learning_rate": 6.701470227393081e-06, "loss": 0.4136, "step": 2321 }, { "epoch": 0.15863906538225045, "grad_norm": 4.030162334442139, "learning_rate": 6.701157157787016e-06, "loss": 0.3922, "step": 2322 }, { "epoch": 0.15870738539318166, "grad_norm": 4.821390628814697, "learning_rate": 6.700843931428777e-06, "loss": 0.4063, "step": 2323 }, { "epoch": 0.15877570540411287, "grad_norm": 4.071599006652832, "learning_rate": 6.7005305483337e-06, "loss": 0.3888, "step": 2324 }, { "epoch": 0.15884402541504405, "grad_norm": 4.716682434082031, "learning_rate": 6.700217008517134e-06, "loss": 0.4034, "step": 2325 }, { "epoch": 0.15891234542597527, "grad_norm": 3.787566900253296, "learning_rate": 6.699903311994428e-06, "loss": 0.4245, "step": 2326 }, { "epoch": 0.15898066543690648, "grad_norm": 4.525181770324707, "learning_rate": 6.699589458780946e-06, "loss": 0.461, "step": 2327 }, { "epoch": 0.15904898544783766, "grad_norm": 4.1313323974609375, "learning_rate": 6.699275448892054e-06, "loss": 0.4308, "step": 2328 }, { "epoch": 0.15911730545876887, "grad_norm": 4.545898914337158, "learning_rate": 6.69896128234313e-06, "loss": 0.3415, "step": 2329 }, { "epoch": 0.15918562546970008, "grad_norm": 4.647426128387451, "learning_rate": 6.6986469591495575e-06, "loss": 0.346, "step": 2330 }, { "epoch": 0.15925394548063126, "grad_norm": 3.7121505737304688, "learning_rate": 6.698332479326728e-06, "loss": 0.3899, "step": 2331 }, { "epoch": 0.15932226549156248, "grad_norm": 4.717318534851074, "learning_rate": 6.698017842890041e-06, "loss": 0.4311, "step": 2332 }, { "epoch": 0.1593905855024937, "grad_norm": 4.188830852508545, "learning_rate": 6.697703049854903e-06, "loss": 0.435, "step": 2333 }, { "epoch": 0.15945890551342487, "grad_norm": 3.9492170810699463, "learning_rate": 6.697388100236729e-06, "loss": 0.3787, "step": 2334 }, { "epoch": 0.15952722552435608, "grad_norm": 4.544697284698486, "learning_rate": 6.69707299405094e-06, "loss": 0.3761, "step": 2335 }, { "epoch": 0.1595955455352873, "grad_norm": 3.9151089191436768, "learning_rate": 6.696757731312968e-06, "loss": 0.3606, "step": 2336 }, { "epoch": 0.15966386554621848, "grad_norm": 4.223330020904541, "learning_rate": 6.696442312038249e-06, "loss": 0.4491, "step": 2337 }, { "epoch": 0.1597321855571497, "grad_norm": 6.0803542137146, "learning_rate": 6.696126736242229e-06, "loss": 0.5685, "step": 2338 }, { "epoch": 0.1598005055680809, "grad_norm": 4.740524768829346, "learning_rate": 6.69581100394036e-06, "loss": 0.4238, "step": 2339 }, { "epoch": 0.15986882557901208, "grad_norm": 5.6169586181640625, "learning_rate": 6.6954951151481036e-06, "loss": 0.4811, "step": 2340 }, { "epoch": 0.1599371455899433, "grad_norm": 3.919337511062622, "learning_rate": 6.695179069880927e-06, "loss": 0.4789, "step": 2341 }, { "epoch": 0.1600054656008745, "grad_norm": 5.144383907318115, "learning_rate": 6.694862868154308e-06, "loss": 0.4478, "step": 2342 }, { "epoch": 0.16007378561180569, "grad_norm": 4.46295690536499, "learning_rate": 6.694546509983728e-06, "loss": 0.5036, "step": 2343 }, { "epoch": 0.1601421056227369, "grad_norm": 4.423378944396973, "learning_rate": 6.69422999538468e-06, "loss": 0.386, "step": 2344 }, { "epoch": 0.1602104256336681, "grad_norm": 4.269779205322266, "learning_rate": 6.693913324372661e-06, "loss": 0.4614, "step": 2345 }, { "epoch": 0.1602787456445993, "grad_norm": 6.118886947631836, "learning_rate": 6.6935964969631795e-06, "loss": 0.4588, "step": 2346 }, { "epoch": 0.1603470656555305, "grad_norm": 4.103940010070801, "learning_rate": 6.693279513171748e-06, "loss": 0.5018, "step": 2347 }, { "epoch": 0.1604153856664617, "grad_norm": 4.2117018699646, "learning_rate": 6.6929623730138895e-06, "loss": 0.4815, "step": 2348 }, { "epoch": 0.1604837056773929, "grad_norm": 4.55515718460083, "learning_rate": 6.692645076505133e-06, "loss": 0.4285, "step": 2349 }, { "epoch": 0.1605520256883241, "grad_norm": 3.7189385890960693, "learning_rate": 6.692327623661016e-06, "loss": 0.3499, "step": 2350 }, { "epoch": 0.16062034569925532, "grad_norm": 4.998322486877441, "learning_rate": 6.6920100144970836e-06, "loss": 0.4074, "step": 2351 }, { "epoch": 0.1606886657101865, "grad_norm": 4.667483806610107, "learning_rate": 6.691692249028886e-06, "loss": 0.4824, "step": 2352 }, { "epoch": 0.1607569857211177, "grad_norm": 4.098176002502441, "learning_rate": 6.691374327271986e-06, "loss": 0.4461, "step": 2353 }, { "epoch": 0.16082530573204892, "grad_norm": 4.614086151123047, "learning_rate": 6.69105624924195e-06, "loss": 0.5044, "step": 2354 }, { "epoch": 0.1608936257429801, "grad_norm": 5.039193630218506, "learning_rate": 6.690738014954356e-06, "loss": 0.4046, "step": 2355 }, { "epoch": 0.16096194575391132, "grad_norm": 4.326196670532227, "learning_rate": 6.690419624424783e-06, "loss": 0.4693, "step": 2356 }, { "epoch": 0.16103026576484253, "grad_norm": 4.0950822830200195, "learning_rate": 6.690101077668825e-06, "loss": 0.4272, "step": 2357 }, { "epoch": 0.1610985857757737, "grad_norm": 4.411546230316162, "learning_rate": 6.689782374702078e-06, "loss": 0.4727, "step": 2358 }, { "epoch": 0.16116690578670492, "grad_norm": 6.97610330581665, "learning_rate": 6.68946351554015e-06, "loss": 0.3926, "step": 2359 }, { "epoch": 0.16123522579763613, "grad_norm": 4.768793106079102, "learning_rate": 6.689144500198654e-06, "loss": 0.4009, "step": 2360 }, { "epoch": 0.16130354580856732, "grad_norm": 4.7920708656311035, "learning_rate": 6.688825328693211e-06, "loss": 0.4764, "step": 2361 }, { "epoch": 0.16137186581949853, "grad_norm": 4.006071090698242, "learning_rate": 6.688506001039449e-06, "loss": 0.4093, "step": 2362 }, { "epoch": 0.16144018583042974, "grad_norm": 4.491388320922852, "learning_rate": 6.688186517253008e-06, "loss": 0.4133, "step": 2363 }, { "epoch": 0.16150850584136092, "grad_norm": 5.123956203460693, "learning_rate": 6.687866877349528e-06, "loss": 0.4468, "step": 2364 }, { "epoch": 0.16157682585229213, "grad_norm": 3.0583786964416504, "learning_rate": 6.687547081344664e-06, "loss": 0.357, "step": 2365 }, { "epoch": 0.16164514586322334, "grad_norm": 5.270841121673584, "learning_rate": 6.687227129254075e-06, "loss": 0.5521, "step": 2366 }, { "epoch": 0.16171346587415453, "grad_norm": 4.536800384521484, "learning_rate": 6.686907021093427e-06, "loss": 0.4122, "step": 2367 }, { "epoch": 0.16178178588508574, "grad_norm": 3.001235008239746, "learning_rate": 6.686586756878397e-06, "loss": 0.3408, "step": 2368 }, { "epoch": 0.16185010589601695, "grad_norm": 3.9564383029937744, "learning_rate": 6.6862663366246645e-06, "loss": 0.4209, "step": 2369 }, { "epoch": 0.16191842590694813, "grad_norm": 4.28416633605957, "learning_rate": 6.685945760347922e-06, "loss": 0.4381, "step": 2370 }, { "epoch": 0.16198674591787934, "grad_norm": 4.732412815093994, "learning_rate": 6.685625028063867e-06, "loss": 0.565, "step": 2371 }, { "epoch": 0.16205506592881055, "grad_norm": 5.05093240737915, "learning_rate": 6.685304139788203e-06, "loss": 0.3954, "step": 2372 }, { "epoch": 0.16212338593974174, "grad_norm": 3.9455132484436035, "learning_rate": 6.684983095536646e-06, "loss": 0.3861, "step": 2373 }, { "epoch": 0.16219170595067295, "grad_norm": 3.617528200149536, "learning_rate": 6.6846618953249135e-06, "loss": 0.4765, "step": 2374 }, { "epoch": 0.16226002596160416, "grad_norm": 3.9469075202941895, "learning_rate": 6.684340539168737e-06, "loss": 0.4118, "step": 2375 }, { "epoch": 0.16232834597253534, "grad_norm": 3.2004404067993164, "learning_rate": 6.68401902708385e-06, "loss": 0.3916, "step": 2376 }, { "epoch": 0.16239666598346655, "grad_norm": 4.909274101257324, "learning_rate": 6.683697359085998e-06, "loss": 0.4146, "step": 2377 }, { "epoch": 0.16246498599439776, "grad_norm": 7.303028583526611, "learning_rate": 6.683375535190931e-06, "loss": 0.4957, "step": 2378 }, { "epoch": 0.16253330600532895, "grad_norm": 3.215393304824829, "learning_rate": 6.683053555414407e-06, "loss": 0.3509, "step": 2379 }, { "epoch": 0.16260162601626016, "grad_norm": 3.8823280334472656, "learning_rate": 6.682731419772195e-06, "loss": 0.4076, "step": 2380 }, { "epoch": 0.16266994602719137, "grad_norm": 4.370840072631836, "learning_rate": 6.682409128280067e-06, "loss": 0.4682, "step": 2381 }, { "epoch": 0.16273826603812255, "grad_norm": 5.68874454498291, "learning_rate": 6.682086680953806e-06, "loss": 0.4025, "step": 2382 }, { "epoch": 0.16280658604905376, "grad_norm": 4.493097305297852, "learning_rate": 6.6817640778092e-06, "loss": 0.3352, "step": 2383 }, { "epoch": 0.16287490605998498, "grad_norm": 5.180200576782227, "learning_rate": 6.6814413188620486e-06, "loss": 0.3674, "step": 2384 }, { "epoch": 0.16294322607091616, "grad_norm": 4.350634574890137, "learning_rate": 6.681118404128153e-06, "loss": 0.4076, "step": 2385 }, { "epoch": 0.16301154608184737, "grad_norm": 4.684810161590576, "learning_rate": 6.680795333623328e-06, "loss": 0.3925, "step": 2386 }, { "epoch": 0.16307986609277858, "grad_norm": 5.494087219238281, "learning_rate": 6.680472107363393e-06, "loss": 0.404, "step": 2387 }, { "epoch": 0.16314818610370976, "grad_norm": 4.8392791748046875, "learning_rate": 6.680148725364175e-06, "loss": 0.4355, "step": 2388 }, { "epoch": 0.16321650611464097, "grad_norm": 4.581396579742432, "learning_rate": 6.67982518764151e-06, "loss": 0.4727, "step": 2389 }, { "epoch": 0.16328482612557219, "grad_norm": 5.527618408203125, "learning_rate": 6.679501494211239e-06, "loss": 0.4992, "step": 2390 }, { "epoch": 0.16335314613650337, "grad_norm": 5.107563018798828, "learning_rate": 6.679177645089214e-06, "loss": 0.4104, "step": 2391 }, { "epoch": 0.16342146614743458, "grad_norm": 4.293732643127441, "learning_rate": 6.678853640291294e-06, "loss": 0.3842, "step": 2392 }, { "epoch": 0.1634897861583658, "grad_norm": 5.179251670837402, "learning_rate": 6.678529479833343e-06, "loss": 0.4131, "step": 2393 }, { "epoch": 0.16355810616929697, "grad_norm": 4.230368614196777, "learning_rate": 6.6782051637312334e-06, "loss": 0.446, "step": 2394 }, { "epoch": 0.16362642618022818, "grad_norm": 4.404331684112549, "learning_rate": 6.677880692000848e-06, "loss": 0.4042, "step": 2395 }, { "epoch": 0.1636947461911594, "grad_norm": 4.405868053436279, "learning_rate": 6.677556064658074e-06, "loss": 0.4974, "step": 2396 }, { "epoch": 0.16376306620209058, "grad_norm": 4.669291019439697, "learning_rate": 6.677231281718809e-06, "loss": 0.4381, "step": 2397 }, { "epoch": 0.1638313862130218, "grad_norm": 4.652437210083008, "learning_rate": 6.6769063431989556e-06, "loss": 0.3247, "step": 2398 }, { "epoch": 0.163899706223953, "grad_norm": 4.354762554168701, "learning_rate": 6.6765812491144255e-06, "loss": 0.4839, "step": 2399 }, { "epoch": 0.16396802623488418, "grad_norm": 5.070544242858887, "learning_rate": 6.676255999481138e-06, "loss": 0.4343, "step": 2400 }, { "epoch": 0.1640363462458154, "grad_norm": 3.804333448410034, "learning_rate": 6.675930594315018e-06, "loss": 0.3698, "step": 2401 }, { "epoch": 0.1641046662567466, "grad_norm": 4.104040145874023, "learning_rate": 6.675605033632003e-06, "loss": 0.3869, "step": 2402 }, { "epoch": 0.1641729862676778, "grad_norm": 3.3271026611328125, "learning_rate": 6.675279317448033e-06, "loss": 0.3284, "step": 2403 }, { "epoch": 0.164241306278609, "grad_norm": 5.557889461517334, "learning_rate": 6.674953445779056e-06, "loss": 0.3628, "step": 2404 }, { "epoch": 0.1643096262895402, "grad_norm": 5.030124187469482, "learning_rate": 6.674627418641031e-06, "loss": 0.4923, "step": 2405 }, { "epoch": 0.1643779463004714, "grad_norm": 4.709857940673828, "learning_rate": 6.674301236049923e-06, "loss": 0.4017, "step": 2406 }, { "epoch": 0.1644462663114026, "grad_norm": 4.1910834312438965, "learning_rate": 6.673974898021703e-06, "loss": 0.4445, "step": 2407 }, { "epoch": 0.16451458632233382, "grad_norm": 3.56377911567688, "learning_rate": 6.67364840457235e-06, "loss": 0.4604, "step": 2408 }, { "epoch": 0.164582906333265, "grad_norm": 3.796361207962036, "learning_rate": 6.673321755717855e-06, "loss": 0.3643, "step": 2409 }, { "epoch": 0.1646512263441962, "grad_norm": 3.6598565578460693, "learning_rate": 6.67299495147421e-06, "loss": 0.342, "step": 2410 }, { "epoch": 0.16471954635512742, "grad_norm": 3.3594701290130615, "learning_rate": 6.6726679918574185e-06, "loss": 0.4212, "step": 2411 }, { "epoch": 0.1647878663660586, "grad_norm": 4.170466423034668, "learning_rate": 6.672340876883493e-06, "loss": 0.4975, "step": 2412 }, { "epoch": 0.16485618637698982, "grad_norm": 4.233780384063721, "learning_rate": 6.6720136065684475e-06, "loss": 0.436, "step": 2413 }, { "epoch": 0.16492450638792103, "grad_norm": 4.915952682495117, "learning_rate": 6.67168618092831e-06, "loss": 0.3844, "step": 2414 }, { "epoch": 0.1649928263988522, "grad_norm": 5.494889736175537, "learning_rate": 6.671358599979115e-06, "loss": 0.5514, "step": 2415 }, { "epoch": 0.16506114640978342, "grad_norm": 5.732289791107178, "learning_rate": 6.6710308637369e-06, "loss": 0.5047, "step": 2416 }, { "epoch": 0.16512946642071463, "grad_norm": 3.210278272628784, "learning_rate": 6.670702972217714e-06, "loss": 0.4371, "step": 2417 }, { "epoch": 0.16519778643164582, "grad_norm": 5.224283695220947, "learning_rate": 6.670374925437616e-06, "loss": 0.4593, "step": 2418 }, { "epoch": 0.16526610644257703, "grad_norm": 5.05266809463501, "learning_rate": 6.670046723412667e-06, "loss": 0.3767, "step": 2419 }, { "epoch": 0.16533442645350824, "grad_norm": 3.706631898880005, "learning_rate": 6.669718366158938e-06, "loss": 0.4483, "step": 2420 }, { "epoch": 0.16540274646443942, "grad_norm": 4.683236122131348, "learning_rate": 6.669389853692508e-06, "loss": 0.4698, "step": 2421 }, { "epoch": 0.16547106647537063, "grad_norm": 4.2366180419921875, "learning_rate": 6.6690611860294656e-06, "loss": 0.4908, "step": 2422 }, { "epoch": 0.16553938648630184, "grad_norm": 3.4559593200683594, "learning_rate": 6.668732363185902e-06, "loss": 0.386, "step": 2423 }, { "epoch": 0.16560770649723303, "grad_norm": 3.643314838409424, "learning_rate": 6.6684033851779196e-06, "loss": 0.3817, "step": 2424 }, { "epoch": 0.16567602650816424, "grad_norm": 4.358884334564209, "learning_rate": 6.668074252021627e-06, "loss": 0.4023, "step": 2425 }, { "epoch": 0.16574434651909545, "grad_norm": 4.956054210662842, "learning_rate": 6.667744963733144e-06, "loss": 0.4737, "step": 2426 }, { "epoch": 0.16581266653002663, "grad_norm": 5.733674049377441, "learning_rate": 6.6674155203285915e-06, "loss": 0.4492, "step": 2427 }, { "epoch": 0.16588098654095784, "grad_norm": 5.093634128570557, "learning_rate": 6.667085921824103e-06, "loss": 0.5157, "step": 2428 }, { "epoch": 0.16594930655188905, "grad_norm": 4.1490864753723145, "learning_rate": 6.666756168235818e-06, "loss": 0.4371, "step": 2429 }, { "epoch": 0.16601762656282024, "grad_norm": 5.139257907867432, "learning_rate": 6.666426259579883e-06, "loss": 0.4322, "step": 2430 }, { "epoch": 0.16608594657375145, "grad_norm": 5.761337757110596, "learning_rate": 6.666096195872453e-06, "loss": 0.4729, "step": 2431 }, { "epoch": 0.16615426658468266, "grad_norm": 4.024005889892578, "learning_rate": 6.66576597712969e-06, "loss": 0.4774, "step": 2432 }, { "epoch": 0.16622258659561384, "grad_norm": 4.283023834228516, "learning_rate": 6.665435603367767e-06, "loss": 0.3761, "step": 2433 }, { "epoch": 0.16629090660654505, "grad_norm": 5.176533222198486, "learning_rate": 6.6651050746028566e-06, "loss": 0.5206, "step": 2434 }, { "epoch": 0.16635922661747626, "grad_norm": 6.089422702789307, "learning_rate": 6.664774390851147e-06, "loss": 0.3756, "step": 2435 }, { "epoch": 0.16642754662840745, "grad_norm": 6.049905300140381, "learning_rate": 6.66444355212883e-06, "loss": 0.4341, "step": 2436 }, { "epoch": 0.16649586663933866, "grad_norm": 4.615180015563965, "learning_rate": 6.664112558452107e-06, "loss": 0.4194, "step": 2437 }, { "epoch": 0.16656418665026987, "grad_norm": 3.5927815437316895, "learning_rate": 6.663781409837184e-06, "loss": 0.4127, "step": 2438 }, { "epoch": 0.16663250666120105, "grad_norm": 4.347229480743408, "learning_rate": 6.663450106300278e-06, "loss": 0.4451, "step": 2439 }, { "epoch": 0.16670082667213226, "grad_norm": 4.517730236053467, "learning_rate": 6.663118647857612e-06, "loss": 0.5323, "step": 2440 }, { "epoch": 0.16676914668306347, "grad_norm": 3.908547878265381, "learning_rate": 6.662787034525415e-06, "loss": 0.3931, "step": 2441 }, { "epoch": 0.16683746669399468, "grad_norm": 3.564234733581543, "learning_rate": 6.662455266319927e-06, "loss": 0.3616, "step": 2442 }, { "epoch": 0.16690578670492587, "grad_norm": 4.534394264221191, "learning_rate": 6.662123343257394e-06, "loss": 0.3204, "step": 2443 }, { "epoch": 0.16697410671585708, "grad_norm": 4.8081560134887695, "learning_rate": 6.661791265354067e-06, "loss": 0.4528, "step": 2444 }, { "epoch": 0.1670424267267883, "grad_norm": 4.1334052085876465, "learning_rate": 6.661459032626209e-06, "loss": 0.3978, "step": 2445 }, { "epoch": 0.16711074673771947, "grad_norm": 4.058101177215576, "learning_rate": 6.661126645090089e-06, "loss": 0.3928, "step": 2446 }, { "epoch": 0.16717906674865068, "grad_norm": 5.343186855316162, "learning_rate": 6.660794102761981e-06, "loss": 0.4972, "step": 2447 }, { "epoch": 0.1672473867595819, "grad_norm": 5.088109016418457, "learning_rate": 6.660461405658171e-06, "loss": 0.5046, "step": 2448 }, { "epoch": 0.16731570677051308, "grad_norm": 4.805006980895996, "learning_rate": 6.660128553794949e-06, "loss": 0.4497, "step": 2449 }, { "epoch": 0.1673840267814443, "grad_norm": 5.521426677703857, "learning_rate": 6.659795547188615e-06, "loss": 0.4445, "step": 2450 }, { "epoch": 0.1674523467923755, "grad_norm": 4.151753902435303, "learning_rate": 6.659462385855473e-06, "loss": 0.3926, "step": 2451 }, { "epoch": 0.16752066680330668, "grad_norm": 4.573609352111816, "learning_rate": 6.659129069811839e-06, "loss": 0.5995, "step": 2452 }, { "epoch": 0.1675889868142379, "grad_norm": 3.0600016117095947, "learning_rate": 6.658795599074035e-06, "loss": 0.2919, "step": 2453 }, { "epoch": 0.1676573068251691, "grad_norm": 5.238872051239014, "learning_rate": 6.658461973658389e-06, "loss": 0.43, "step": 2454 }, { "epoch": 0.1677256268361003, "grad_norm": 4.268948078155518, "learning_rate": 6.658128193581238e-06, "loss": 0.3859, "step": 2455 }, { "epoch": 0.1677939468470315, "grad_norm": 4.63042688369751, "learning_rate": 6.657794258858927e-06, "loss": 0.4182, "step": 2456 }, { "epoch": 0.1678622668579627, "grad_norm": 4.465023517608643, "learning_rate": 6.657460169507807e-06, "loss": 0.4037, "step": 2457 }, { "epoch": 0.1679305868688939, "grad_norm": 3.9159998893737793, "learning_rate": 6.657125925544237e-06, "loss": 0.3673, "step": 2458 }, { "epoch": 0.1679989068798251, "grad_norm": 3.1941118240356445, "learning_rate": 6.656791526984586e-06, "loss": 0.3085, "step": 2459 }, { "epoch": 0.16806722689075632, "grad_norm": 4.545957088470459, "learning_rate": 6.656456973845227e-06, "loss": 0.4409, "step": 2460 }, { "epoch": 0.1681355469016875, "grad_norm": 5.177128791809082, "learning_rate": 6.656122266142544e-06, "loss": 0.4852, "step": 2461 }, { "epoch": 0.1682038669126187, "grad_norm": 3.964056968688965, "learning_rate": 6.655787403892924e-06, "loss": 0.4012, "step": 2462 }, { "epoch": 0.16827218692354992, "grad_norm": 5.558220863342285, "learning_rate": 6.655452387112767e-06, "loss": 0.5093, "step": 2463 }, { "epoch": 0.1683405069344811, "grad_norm": 5.305512428283691, "learning_rate": 6.655117215818475e-06, "loss": 0.4895, "step": 2464 }, { "epoch": 0.16840882694541232, "grad_norm": 4.2579145431518555, "learning_rate": 6.654781890026463e-06, "loss": 0.4036, "step": 2465 }, { "epoch": 0.16847714695634353, "grad_norm": 4.191915988922119, "learning_rate": 6.654446409753149e-06, "loss": 0.425, "step": 2466 }, { "epoch": 0.1685454669672747, "grad_norm": 4.920033931732178, "learning_rate": 6.654110775014963e-06, "loss": 0.3736, "step": 2467 }, { "epoch": 0.16861378697820592, "grad_norm": 5.55234432220459, "learning_rate": 6.653774985828338e-06, "loss": 0.3462, "step": 2468 }, { "epoch": 0.16868210698913713, "grad_norm": 4.1511006355285645, "learning_rate": 6.653439042209718e-06, "loss": 0.5034, "step": 2469 }, { "epoch": 0.16875042700006831, "grad_norm": 4.287595272064209, "learning_rate": 6.653102944175553e-06, "loss": 0.4137, "step": 2470 }, { "epoch": 0.16881874701099953, "grad_norm": 4.482803821563721, "learning_rate": 6.6527666917423e-06, "loss": 0.4321, "step": 2471 }, { "epoch": 0.16888706702193074, "grad_norm": 3.742201805114746, "learning_rate": 6.6524302849264255e-06, "loss": 0.4794, "step": 2472 }, { "epoch": 0.16895538703286192, "grad_norm": 3.6639721393585205, "learning_rate": 6.652093723744402e-06, "loss": 0.3647, "step": 2473 }, { "epoch": 0.16902370704379313, "grad_norm": 5.081488609313965, "learning_rate": 6.65175700821271e-06, "loss": 0.4226, "step": 2474 }, { "epoch": 0.16909202705472434, "grad_norm": 4.21819543838501, "learning_rate": 6.651420138347836e-06, "loss": 0.3847, "step": 2475 }, { "epoch": 0.16916034706565553, "grad_norm": 5.316274166107178, "learning_rate": 6.6510831141662794e-06, "loss": 0.461, "step": 2476 }, { "epoch": 0.16922866707658674, "grad_norm": 4.284478664398193, "learning_rate": 6.65074593568454e-06, "loss": 0.4728, "step": 2477 }, { "epoch": 0.16929698708751795, "grad_norm": 4.536814212799072, "learning_rate": 6.650408602919129e-06, "loss": 0.4546, "step": 2478 }, { "epoch": 0.16936530709844913, "grad_norm": 5.011091709136963, "learning_rate": 6.650071115886567e-06, "loss": 0.3872, "step": 2479 }, { "epoch": 0.16943362710938034, "grad_norm": 3.184142589569092, "learning_rate": 6.6497334746033764e-06, "loss": 0.3799, "step": 2480 }, { "epoch": 0.16950194712031155, "grad_norm": 3.998129367828369, "learning_rate": 6.6493956790860945e-06, "loss": 0.5034, "step": 2481 }, { "epoch": 0.16957026713124274, "grad_norm": 3.8952114582061768, "learning_rate": 6.6490577293512585e-06, "loss": 0.4021, "step": 2482 }, { "epoch": 0.16963858714217395, "grad_norm": 3.5954959392547607, "learning_rate": 6.64871962541542e-06, "loss": 0.4668, "step": 2483 }, { "epoch": 0.16970690715310516, "grad_norm": 3.768584728240967, "learning_rate": 6.648381367295131e-06, "loss": 0.4706, "step": 2484 }, { "epoch": 0.16977522716403634, "grad_norm": 4.671468257904053, "learning_rate": 6.64804295500696e-06, "loss": 0.4019, "step": 2485 }, { "epoch": 0.16984354717496755, "grad_norm": 4.379472255706787, "learning_rate": 6.647704388567476e-06, "loss": 0.4377, "step": 2486 }, { "epoch": 0.16991186718589876, "grad_norm": 3.2436413764953613, "learning_rate": 6.647365667993256e-06, "loss": 0.3419, "step": 2487 }, { "epoch": 0.16998018719682995, "grad_norm": 3.519176721572876, "learning_rate": 6.647026793300889e-06, "loss": 0.4321, "step": 2488 }, { "epoch": 0.17004850720776116, "grad_norm": 5.5271687507629395, "learning_rate": 6.6466877645069665e-06, "loss": 0.4257, "step": 2489 }, { "epoch": 0.17011682721869237, "grad_norm": 5.45037317276001, "learning_rate": 6.646348581628092e-06, "loss": 0.5077, "step": 2490 }, { "epoch": 0.17018514722962355, "grad_norm": 4.205333709716797, "learning_rate": 6.646009244680873e-06, "loss": 0.5049, "step": 2491 }, { "epoch": 0.17025346724055476, "grad_norm": 5.953914642333984, "learning_rate": 6.645669753681926e-06, "loss": 0.5868, "step": 2492 }, { "epoch": 0.17032178725148597, "grad_norm": 5.271664142608643, "learning_rate": 6.645330108647875e-06, "loss": 0.3771, "step": 2493 }, { "epoch": 0.17039010726241716, "grad_norm": 3.2129456996917725, "learning_rate": 6.6449903095953526e-06, "loss": 0.3562, "step": 2494 }, { "epoch": 0.17045842727334837, "grad_norm": 3.7663393020629883, "learning_rate": 6.6446503565409966e-06, "loss": 0.2806, "step": 2495 }, { "epoch": 0.17052674728427958, "grad_norm": 4.57636833190918, "learning_rate": 6.644310249501454e-06, "loss": 0.4533, "step": 2496 }, { "epoch": 0.17059506729521076, "grad_norm": 4.502344131469727, "learning_rate": 6.643969988493379e-06, "loss": 0.4996, "step": 2497 }, { "epoch": 0.17066338730614197, "grad_norm": 4.1625566482543945, "learning_rate": 6.643629573533434e-06, "loss": 0.3897, "step": 2498 }, { "epoch": 0.17073170731707318, "grad_norm": 3.709472179412842, "learning_rate": 6.643289004638286e-06, "loss": 0.3843, "step": 2499 }, { "epoch": 0.17080002732800437, "grad_norm": 4.372208118438721, "learning_rate": 6.642948281824614e-06, "loss": 0.4282, "step": 2500 }, { "epoch": 0.17086834733893558, "grad_norm": 3.6177453994750977, "learning_rate": 6.642607405109101e-06, "loss": 0.3595, "step": 2501 }, { "epoch": 0.1709366673498668, "grad_norm": 4.101351261138916, "learning_rate": 6.642266374508439e-06, "loss": 0.409, "step": 2502 }, { "epoch": 0.17100498736079797, "grad_norm": 4.376796245574951, "learning_rate": 6.6419251900393294e-06, "loss": 0.3907, "step": 2503 }, { "epoch": 0.17107330737172918, "grad_norm": 4.388703346252441, "learning_rate": 6.641583851718476e-06, "loss": 0.3054, "step": 2504 }, { "epoch": 0.1711416273826604, "grad_norm": 3.94098162651062, "learning_rate": 6.641242359562595e-06, "loss": 0.4058, "step": 2505 }, { "epoch": 0.17120994739359158, "grad_norm": 5.522054672241211, "learning_rate": 6.640900713588408e-06, "loss": 0.4723, "step": 2506 }, { "epoch": 0.1712782674045228, "grad_norm": 3.9037911891937256, "learning_rate": 6.640558913812645e-06, "loss": 0.4317, "step": 2507 }, { "epoch": 0.171346587415454, "grad_norm": 3.663811206817627, "learning_rate": 6.6402169602520415e-06, "loss": 0.4843, "step": 2508 }, { "epoch": 0.17141490742638518, "grad_norm": 4.297123908996582, "learning_rate": 6.639874852923344e-06, "loss": 0.4789, "step": 2509 }, { "epoch": 0.1714832274373164, "grad_norm": 5.278457164764404, "learning_rate": 6.6395325918433025e-06, "loss": 0.4509, "step": 2510 }, { "epoch": 0.1715515474482476, "grad_norm": 4.439509868621826, "learning_rate": 6.639190177028679e-06, "loss": 0.4268, "step": 2511 }, { "epoch": 0.1716198674591788, "grad_norm": 4.200689792633057, "learning_rate": 6.638847608496238e-06, "loss": 0.3952, "step": 2512 }, { "epoch": 0.17168818747011, "grad_norm": 6.434699535369873, "learning_rate": 6.638504886262757e-06, "loss": 0.4679, "step": 2513 }, { "epoch": 0.1717565074810412, "grad_norm": 4.4997334480285645, "learning_rate": 6.6381620103450154e-06, "loss": 0.4108, "step": 2514 }, { "epoch": 0.1718248274919724, "grad_norm": 3.9258265495300293, "learning_rate": 6.6378189807598055e-06, "loss": 0.5099, "step": 2515 }, { "epoch": 0.1718931475029036, "grad_norm": 4.959238529205322, "learning_rate": 6.637475797523923e-06, "loss": 0.4195, "step": 2516 }, { "epoch": 0.17196146751383481, "grad_norm": 4.528360843658447, "learning_rate": 6.637132460654173e-06, "loss": 0.541, "step": 2517 }, { "epoch": 0.172029787524766, "grad_norm": 5.411151885986328, "learning_rate": 6.636788970167368e-06, "loss": 0.4207, "step": 2518 }, { "epoch": 0.1720981075356972, "grad_norm": 4.115804195404053, "learning_rate": 6.6364453260803275e-06, "loss": 0.3317, "step": 2519 }, { "epoch": 0.17216642754662842, "grad_norm": 6.790795803070068, "learning_rate": 6.636101528409879e-06, "loss": 0.4456, "step": 2520 }, { "epoch": 0.1722347475575596, "grad_norm": 3.372323751449585, "learning_rate": 6.635757577172857e-06, "loss": 0.346, "step": 2521 }, { "epoch": 0.17230306756849081, "grad_norm": 4.3565263748168945, "learning_rate": 6.6354134723861045e-06, "loss": 0.4534, "step": 2522 }, { "epoch": 0.17237138757942203, "grad_norm": 4.883514404296875, "learning_rate": 6.6350692140664715e-06, "loss": 0.5212, "step": 2523 }, { "epoch": 0.1724397075903532, "grad_norm": 4.78321647644043, "learning_rate": 6.634724802230815e-06, "loss": 0.4465, "step": 2524 }, { "epoch": 0.17250802760128442, "grad_norm": 3.3884904384613037, "learning_rate": 6.634380236896e-06, "loss": 0.3779, "step": 2525 }, { "epoch": 0.17257634761221563, "grad_norm": 3.851435899734497, "learning_rate": 6.634035518078898e-06, "loss": 0.445, "step": 2526 }, { "epoch": 0.1726446676231468, "grad_norm": 3.3991825580596924, "learning_rate": 6.633690645796391e-06, "loss": 0.3422, "step": 2527 }, { "epoch": 0.17271298763407802, "grad_norm": 4.997655868530273, "learning_rate": 6.633345620065364e-06, "loss": 0.4537, "step": 2528 }, { "epoch": 0.17278130764500924, "grad_norm": 3.445827007293701, "learning_rate": 6.633000440902714e-06, "loss": 0.3892, "step": 2529 }, { "epoch": 0.17284962765594042, "grad_norm": 3.8251988887786865, "learning_rate": 6.632655108325344e-06, "loss": 0.3025, "step": 2530 }, { "epoch": 0.17291794766687163, "grad_norm": 4.944834232330322, "learning_rate": 6.632309622350162e-06, "loss": 0.4384, "step": 2531 }, { "epoch": 0.17298626767780284, "grad_norm": 3.548785924911499, "learning_rate": 6.6319639829940865e-06, "loss": 0.3267, "step": 2532 }, { "epoch": 0.17305458768873402, "grad_norm": 4.940762042999268, "learning_rate": 6.631618190274042e-06, "loss": 0.5195, "step": 2533 }, { "epoch": 0.17312290769966523, "grad_norm": 2.9329657554626465, "learning_rate": 6.6312722442069615e-06, "loss": 0.3758, "step": 2534 }, { "epoch": 0.17319122771059645, "grad_norm": 3.9970951080322266, "learning_rate": 6.630926144809786e-06, "loss": 0.4514, "step": 2535 }, { "epoch": 0.17325954772152763, "grad_norm": 4.680131435394287, "learning_rate": 6.630579892099461e-06, "loss": 0.3982, "step": 2536 }, { "epoch": 0.17332786773245884, "grad_norm": 11.038859367370605, "learning_rate": 6.6302334860929434e-06, "loss": 0.4969, "step": 2537 }, { "epoch": 0.17339618774339005, "grad_norm": 4.344335079193115, "learning_rate": 6.629886926807195e-06, "loss": 0.4109, "step": 2538 }, { "epoch": 0.17346450775432123, "grad_norm": 5.327185153961182, "learning_rate": 6.629540214259187e-06, "loss": 0.436, "step": 2539 }, { "epoch": 0.17353282776525245, "grad_norm": 4.448189735412598, "learning_rate": 6.629193348465895e-06, "loss": 0.3956, "step": 2540 }, { "epoch": 0.17360114777618366, "grad_norm": 4.584094524383545, "learning_rate": 6.628846329444306e-06, "loss": 0.3786, "step": 2541 }, { "epoch": 0.17366946778711484, "grad_norm": 3.9002859592437744, "learning_rate": 6.62849915721141e-06, "loss": 0.4669, "step": 2542 }, { "epoch": 0.17373778779804605, "grad_norm": 3.889859199523926, "learning_rate": 6.62815183178421e-06, "loss": 0.3503, "step": 2543 }, { "epoch": 0.17380610780897726, "grad_norm": 4.1980204582214355, "learning_rate": 6.627804353179712e-06, "loss": 0.387, "step": 2544 }, { "epoch": 0.17387442781990844, "grad_norm": 4.587588787078857, "learning_rate": 6.627456721414932e-06, "loss": 0.4489, "step": 2545 }, { "epoch": 0.17394274783083966, "grad_norm": 4.771138668060303, "learning_rate": 6.6271089365068915e-06, "loss": 0.4208, "step": 2546 }, { "epoch": 0.17401106784177087, "grad_norm": 5.035216808319092, "learning_rate": 6.6267609984726215e-06, "loss": 0.4732, "step": 2547 }, { "epoch": 0.17407938785270205, "grad_norm": 4.6579742431640625, "learning_rate": 6.626412907329159e-06, "loss": 0.3785, "step": 2548 }, { "epoch": 0.17414770786363326, "grad_norm": 3.7918715476989746, "learning_rate": 6.62606466309355e-06, "loss": 0.4021, "step": 2549 }, { "epoch": 0.17421602787456447, "grad_norm": 3.4213340282440186, "learning_rate": 6.6257162657828464e-06, "loss": 0.3731, "step": 2550 }, { "epoch": 0.17428434788549566, "grad_norm": 6.483099937438965, "learning_rate": 6.625367715414108e-06, "loss": 0.5065, "step": 2551 }, { "epoch": 0.17435266789642687, "grad_norm": 4.793565273284912, "learning_rate": 6.625019012004403e-06, "loss": 0.4514, "step": 2552 }, { "epoch": 0.17442098790735808, "grad_norm": 4.975826740264893, "learning_rate": 6.624670155570806e-06, "loss": 0.4448, "step": 2553 }, { "epoch": 0.17448930791828926, "grad_norm": 4.491964817047119, "learning_rate": 6.6243211461304e-06, "loss": 0.4645, "step": 2554 }, { "epoch": 0.17455762792922047, "grad_norm": 4.365842342376709, "learning_rate": 6.623971983700275e-06, "loss": 0.4304, "step": 2555 }, { "epoch": 0.17462594794015168, "grad_norm": 3.771221160888672, "learning_rate": 6.623622668297529e-06, "loss": 0.3894, "step": 2556 }, { "epoch": 0.17469426795108287, "grad_norm": 4.135219573974609, "learning_rate": 6.623273199939266e-06, "loss": 0.389, "step": 2557 }, { "epoch": 0.17476258796201408, "grad_norm": 4.712584018707275, "learning_rate": 6.6229235786425985e-06, "loss": 0.4623, "step": 2558 }, { "epoch": 0.1748309079729453, "grad_norm": 4.423340797424316, "learning_rate": 6.622573804424649e-06, "loss": 0.4753, "step": 2559 }, { "epoch": 0.17489922798387647, "grad_norm": 4.023922920227051, "learning_rate": 6.622223877302541e-06, "loss": 0.333, "step": 2560 }, { "epoch": 0.17496754799480768, "grad_norm": 4.14876127243042, "learning_rate": 6.621873797293413e-06, "loss": 0.4056, "step": 2561 }, { "epoch": 0.1750358680057389, "grad_norm": 5.68959903717041, "learning_rate": 6.621523564414405e-06, "loss": 0.4585, "step": 2562 }, { "epoch": 0.17510418801667008, "grad_norm": 4.086818695068359, "learning_rate": 6.621173178682669e-06, "loss": 0.4595, "step": 2563 }, { "epoch": 0.1751725080276013, "grad_norm": 4.818070888519287, "learning_rate": 6.620822640115361e-06, "loss": 0.4207, "step": 2564 }, { "epoch": 0.1752408280385325, "grad_norm": 3.597355604171753, "learning_rate": 6.620471948729647e-06, "loss": 0.3843, "step": 2565 }, { "epoch": 0.17530914804946368, "grad_norm": 4.337073802947998, "learning_rate": 6.620121104542698e-06, "loss": 0.467, "step": 2566 }, { "epoch": 0.1753774680603949, "grad_norm": 4.487431526184082, "learning_rate": 6.619770107571696e-06, "loss": 0.5301, "step": 2567 }, { "epoch": 0.1754457880713261, "grad_norm": 4.477319717407227, "learning_rate": 6.619418957833827e-06, "loss": 0.3845, "step": 2568 }, { "epoch": 0.1755141080822573, "grad_norm": 3.008584976196289, "learning_rate": 6.619067655346285e-06, "loss": 0.3099, "step": 2569 }, { "epoch": 0.1755824280931885, "grad_norm": 3.7502989768981934, "learning_rate": 6.618716200126273e-06, "loss": 0.4575, "step": 2570 }, { "epoch": 0.1756507481041197, "grad_norm": 3.012040376663208, "learning_rate": 6.618364592191003e-06, "loss": 0.3743, "step": 2571 }, { "epoch": 0.1757190681150509, "grad_norm": 4.859961032867432, "learning_rate": 6.6180128315576895e-06, "loss": 0.4224, "step": 2572 }, { "epoch": 0.1757873881259821, "grad_norm": 4.402594089508057, "learning_rate": 6.617660918243559e-06, "loss": 0.4298, "step": 2573 }, { "epoch": 0.1758557081369133, "grad_norm": 4.474635124206543, "learning_rate": 6.6173088522658435e-06, "loss": 0.4496, "step": 2574 }, { "epoch": 0.1759240281478445, "grad_norm": 4.9873738288879395, "learning_rate": 6.616956633641781e-06, "loss": 0.4601, "step": 2575 }, { "epoch": 0.1759923481587757, "grad_norm": 3.8691213130950928, "learning_rate": 6.616604262388621e-06, "loss": 0.3728, "step": 2576 }, { "epoch": 0.17606066816970692, "grad_norm": 4.692743301391602, "learning_rate": 6.616251738523617e-06, "loss": 0.4641, "step": 2577 }, { "epoch": 0.1761289881806381, "grad_norm": 4.542240619659424, "learning_rate": 6.6158990620640304e-06, "loss": 0.343, "step": 2578 }, { "epoch": 0.1761973081915693, "grad_norm": 3.04042911529541, "learning_rate": 6.615546233027134e-06, "loss": 0.4114, "step": 2579 }, { "epoch": 0.17626562820250052, "grad_norm": 4.381035327911377, "learning_rate": 6.615193251430201e-06, "loss": 0.4066, "step": 2580 }, { "epoch": 0.1763339482134317, "grad_norm": 5.443952560424805, "learning_rate": 6.614840117290519e-06, "loss": 0.4569, "step": 2581 }, { "epoch": 0.17640226822436292, "grad_norm": 3.5905442237854004, "learning_rate": 6.614486830625377e-06, "loss": 0.4229, "step": 2582 }, { "epoch": 0.17647058823529413, "grad_norm": 4.119831562042236, "learning_rate": 6.614133391452078e-06, "loss": 0.3539, "step": 2583 }, { "epoch": 0.1765389082462253, "grad_norm": 4.542084693908691, "learning_rate": 6.613779799787926e-06, "loss": 0.3972, "step": 2584 }, { "epoch": 0.17660722825715652, "grad_norm": 3.8133292198181152, "learning_rate": 6.613426055650237e-06, "loss": 0.4166, "step": 2585 }, { "epoch": 0.17667554826808773, "grad_norm": 4.681187629699707, "learning_rate": 6.613072159056333e-06, "loss": 0.4119, "step": 2586 }, { "epoch": 0.17674386827901892, "grad_norm": 6.315896987915039, "learning_rate": 6.6127181100235425e-06, "loss": 0.3522, "step": 2587 }, { "epoch": 0.17681218828995013, "grad_norm": 5.221776962280273, "learning_rate": 6.612363908569204e-06, "loss": 0.5367, "step": 2588 }, { "epoch": 0.17688050830088134, "grad_norm": 5.101037502288818, "learning_rate": 6.61200955471066e-06, "loss": 0.5134, "step": 2589 }, { "epoch": 0.17694882831181252, "grad_norm": 4.940505504608154, "learning_rate": 6.611655048465262e-06, "loss": 0.4068, "step": 2590 }, { "epoch": 0.17701714832274373, "grad_norm": 4.523764610290527, "learning_rate": 6.61130038985037e-06, "loss": 0.3836, "step": 2591 }, { "epoch": 0.17708546833367494, "grad_norm": 4.348531723022461, "learning_rate": 6.6109455788833525e-06, "loss": 0.4226, "step": 2592 }, { "epoch": 0.17715378834460613, "grad_norm": 4.382110595703125, "learning_rate": 6.6105906155815815e-06, "loss": 0.4361, "step": 2593 }, { "epoch": 0.17722210835553734, "grad_norm": 3.589313268661499, "learning_rate": 6.610235499962438e-06, "loss": 0.4282, "step": 2594 }, { "epoch": 0.17729042836646855, "grad_norm": 4.743160247802734, "learning_rate": 6.6098802320433125e-06, "loss": 0.6028, "step": 2595 }, { "epoch": 0.17735874837739973, "grad_norm": 4.858645439147949, "learning_rate": 6.609524811841601e-06, "loss": 0.4376, "step": 2596 }, { "epoch": 0.17742706838833094, "grad_norm": 4.823980331420898, "learning_rate": 6.609169239374709e-06, "loss": 0.3572, "step": 2597 }, { "epoch": 0.17749538839926216, "grad_norm": 6.078403472900391, "learning_rate": 6.608813514660046e-06, "loss": 0.5678, "step": 2598 }, { "epoch": 0.17756370841019334, "grad_norm": 4.059502124786377, "learning_rate": 6.608457637715031e-06, "loss": 0.4569, "step": 2599 }, { "epoch": 0.17763202842112455, "grad_norm": 4.890416622161865, "learning_rate": 6.608101608557091e-06, "loss": 0.4567, "step": 2600 }, { "epoch": 0.17770034843205576, "grad_norm": 5.544472694396973, "learning_rate": 6.60774542720366e-06, "loss": 0.5829, "step": 2601 }, { "epoch": 0.17776866844298694, "grad_norm": 3.2122585773468018, "learning_rate": 6.607389093672178e-06, "loss": 0.3812, "step": 2602 }, { "epoch": 0.17783698845391815, "grad_norm": 4.42949914932251, "learning_rate": 6.607032607980095e-06, "loss": 0.3492, "step": 2603 }, { "epoch": 0.17790530846484937, "grad_norm": 3.973539352416992, "learning_rate": 6.606675970144867e-06, "loss": 0.4508, "step": 2604 }, { "epoch": 0.17797362847578055, "grad_norm": 4.742079734802246, "learning_rate": 6.606319180183958e-06, "loss": 0.3898, "step": 2605 }, { "epoch": 0.17804194848671176, "grad_norm": 4.848693370819092, "learning_rate": 6.605962238114838e-06, "loss": 0.4512, "step": 2606 }, { "epoch": 0.17811026849764297, "grad_norm": 4.059149265289307, "learning_rate": 6.605605143954985e-06, "loss": 0.3485, "step": 2607 }, { "epoch": 0.17817858850857415, "grad_norm": 4.774200916290283, "learning_rate": 6.605247897721887e-06, "loss": 0.5217, "step": 2608 }, { "epoch": 0.17824690851950536, "grad_norm": 4.1397504806518555, "learning_rate": 6.604890499433035e-06, "loss": 0.4105, "step": 2609 }, { "epoch": 0.17831522853043658, "grad_norm": 5.12399435043335, "learning_rate": 6.6045329491059325e-06, "loss": 0.3922, "step": 2610 }, { "epoch": 0.17838354854136776, "grad_norm": 3.625511884689331, "learning_rate": 6.6041752467580864e-06, "loss": 0.4796, "step": 2611 }, { "epoch": 0.17845186855229897, "grad_norm": 4.0740203857421875, "learning_rate": 6.603817392407013e-06, "loss": 0.5525, "step": 2612 }, { "epoch": 0.17852018856323018, "grad_norm": 4.917315483093262, "learning_rate": 6.603459386070234e-06, "loss": 0.4606, "step": 2613 }, { "epoch": 0.17858850857416136, "grad_norm": 4.60610818862915, "learning_rate": 6.6031012277652805e-06, "loss": 0.4397, "step": 2614 }, { "epoch": 0.17865682858509258, "grad_norm": 4.893176078796387, "learning_rate": 6.602742917509692e-06, "loss": 0.5284, "step": 2615 }, { "epoch": 0.1787251485960238, "grad_norm": 4.613275051116943, "learning_rate": 6.602384455321012e-06, "loss": 0.4599, "step": 2616 }, { "epoch": 0.17879346860695497, "grad_norm": 3.335696220397949, "learning_rate": 6.602025841216794e-06, "loss": 0.352, "step": 2617 }, { "epoch": 0.17886178861788618, "grad_norm": 4.802335739135742, "learning_rate": 6.601667075214601e-06, "loss": 0.4311, "step": 2618 }, { "epoch": 0.1789301086288174, "grad_norm": 3.6945486068725586, "learning_rate": 6.601308157331998e-06, "loss": 0.4549, "step": 2619 }, { "epoch": 0.17899842863974857, "grad_norm": 4.325118541717529, "learning_rate": 6.60094908758656e-06, "loss": 0.4078, "step": 2620 }, { "epoch": 0.17906674865067979, "grad_norm": 4.293048858642578, "learning_rate": 6.600589865995871e-06, "loss": 0.4289, "step": 2621 }, { "epoch": 0.179135068661611, "grad_norm": 5.0676655769348145, "learning_rate": 6.600230492577521e-06, "loss": 0.4759, "step": 2622 }, { "epoch": 0.17920338867254218, "grad_norm": 5.259476661682129, "learning_rate": 6.5998709673491075e-06, "loss": 0.423, "step": 2623 }, { "epoch": 0.1792717086834734, "grad_norm": 3.8277788162231445, "learning_rate": 6.599511290328235e-06, "loss": 0.3707, "step": 2624 }, { "epoch": 0.1793400286944046, "grad_norm": 5.289161205291748, "learning_rate": 6.599151461532516e-06, "loss": 0.3898, "step": 2625 }, { "epoch": 0.17940834870533579, "grad_norm": 3.556579351425171, "learning_rate": 6.598791480979571e-06, "loss": 0.4267, "step": 2626 }, { "epoch": 0.179476668716267, "grad_norm": 4.736988544464111, "learning_rate": 6.598431348687027e-06, "loss": 0.4316, "step": 2627 }, { "epoch": 0.1795449887271982, "grad_norm": 4.0690507888793945, "learning_rate": 6.598071064672518e-06, "loss": 0.497, "step": 2628 }, { "epoch": 0.1796133087381294, "grad_norm": 4.360960483551025, "learning_rate": 6.597710628953688e-06, "loss": 0.4572, "step": 2629 }, { "epoch": 0.1796816287490606, "grad_norm": 4.646924018859863, "learning_rate": 6.5973500415481846e-06, "loss": 0.3561, "step": 2630 }, { "epoch": 0.1797499487599918, "grad_norm": 3.417459726333618, "learning_rate": 6.596989302473666e-06, "loss": 0.4187, "step": 2631 }, { "epoch": 0.179818268770923, "grad_norm": 4.364264011383057, "learning_rate": 6.596628411747796e-06, "loss": 0.3764, "step": 2632 }, { "epoch": 0.1798865887818542, "grad_norm": 4.591729164123535, "learning_rate": 6.596267369388246e-06, "loss": 0.4437, "step": 2633 }, { "epoch": 0.17995490879278542, "grad_norm": 4.673453330993652, "learning_rate": 6.5959061754126955e-06, "loss": 0.3718, "step": 2634 }, { "epoch": 0.1800232288037166, "grad_norm": 4.137871742248535, "learning_rate": 6.595544829838833e-06, "loss": 0.3971, "step": 2635 }, { "epoch": 0.1800915488146478, "grad_norm": 3.9677350521087646, "learning_rate": 6.595183332684351e-06, "loss": 0.4599, "step": 2636 }, { "epoch": 0.18015986882557902, "grad_norm": 3.8634345531463623, "learning_rate": 6.5948216839669516e-06, "loss": 0.4154, "step": 2637 }, { "epoch": 0.1802281888365102, "grad_norm": 5.190941333770752, "learning_rate": 6.594459883704342e-06, "loss": 0.4366, "step": 2638 }, { "epoch": 0.18029650884744142, "grad_norm": 3.4838876724243164, "learning_rate": 6.594097931914243e-06, "loss": 0.3507, "step": 2639 }, { "epoch": 0.18036482885837263, "grad_norm": 3.975132703781128, "learning_rate": 6.5937358286143725e-06, "loss": 0.3981, "step": 2640 }, { "epoch": 0.1804331488693038, "grad_norm": 5.0724005699157715, "learning_rate": 6.5933735738224665e-06, "loss": 0.4347, "step": 2641 }, { "epoch": 0.18050146888023502, "grad_norm": 3.847036600112915, "learning_rate": 6.5930111675562616e-06, "loss": 0.3536, "step": 2642 }, { "epoch": 0.18056978889116623, "grad_norm": 4.449566841125488, "learning_rate": 6.592648609833505e-06, "loss": 0.427, "step": 2643 }, { "epoch": 0.18063810890209742, "grad_norm": 5.533968448638916, "learning_rate": 6.5922859006719475e-06, "loss": 0.4409, "step": 2644 }, { "epoch": 0.18070642891302863, "grad_norm": 4.850465774536133, "learning_rate": 6.591923040089353e-06, "loss": 0.4948, "step": 2645 }, { "epoch": 0.18077474892395984, "grad_norm": 4.189269065856934, "learning_rate": 6.591560028103489e-06, "loss": 0.3431, "step": 2646 }, { "epoch": 0.18084306893489102, "grad_norm": 2.908632755279541, "learning_rate": 6.5911968647321305e-06, "loss": 0.3067, "step": 2647 }, { "epoch": 0.18091138894582223, "grad_norm": 5.658529758453369, "learning_rate": 6.5908335499930615e-06, "loss": 0.402, "step": 2648 }, { "epoch": 0.18097970895675344, "grad_norm": 3.1141719818115234, "learning_rate": 6.590470083904072e-06, "loss": 0.3695, "step": 2649 }, { "epoch": 0.18104802896768463, "grad_norm": 4.283707618713379, "learning_rate": 6.59010646648296e-06, "loss": 0.4832, "step": 2650 }, { "epoch": 0.18111634897861584, "grad_norm": 6.228906154632568, "learning_rate": 6.589742697747531e-06, "loss": 0.4687, "step": 2651 }, { "epoch": 0.18118466898954705, "grad_norm": 4.499719619750977, "learning_rate": 6.589378777715598e-06, "loss": 0.4329, "step": 2652 }, { "epoch": 0.18125298900047823, "grad_norm": 5.575033187866211, "learning_rate": 6.589014706404981e-06, "loss": 0.465, "step": 2653 }, { "epoch": 0.18132130901140944, "grad_norm": 4.033123016357422, "learning_rate": 6.588650483833508e-06, "loss": 0.4807, "step": 2654 }, { "epoch": 0.18138962902234065, "grad_norm": 4.779489994049072, "learning_rate": 6.588286110019013e-06, "loss": 0.4097, "step": 2655 }, { "epoch": 0.18145794903327184, "grad_norm": 4.053021430969238, "learning_rate": 6.58792158497934e-06, "loss": 0.533, "step": 2656 }, { "epoch": 0.18152626904420305, "grad_norm": 4.386050224304199, "learning_rate": 6.5875569087323364e-06, "loss": 0.4164, "step": 2657 }, { "epoch": 0.18159458905513426, "grad_norm": 4.324334144592285, "learning_rate": 6.587192081295862e-06, "loss": 0.3789, "step": 2658 }, { "epoch": 0.18166290906606544, "grad_norm": 3.947814464569092, "learning_rate": 6.586827102687779e-06, "loss": 0.4182, "step": 2659 }, { "epoch": 0.18173122907699665, "grad_norm": 4.296578884124756, "learning_rate": 6.586461972925963e-06, "loss": 0.4993, "step": 2660 }, { "epoch": 0.18179954908792786, "grad_norm": 4.388278961181641, "learning_rate": 6.586096692028289e-06, "loss": 0.4096, "step": 2661 }, { "epoch": 0.18186786909885905, "grad_norm": 4.248262882232666, "learning_rate": 6.585731260012647e-06, "loss": 0.354, "step": 2662 }, { "epoch": 0.18193618910979026, "grad_norm": 3.427483320236206, "learning_rate": 6.585365676896931e-06, "loss": 0.384, "step": 2663 }, { "epoch": 0.18200450912072147, "grad_norm": 3.704075813293457, "learning_rate": 6.5849999426990415e-06, "loss": 0.4934, "step": 2664 }, { "epoch": 0.18207282913165265, "grad_norm": 3.2340550422668457, "learning_rate": 6.584634057436887e-06, "loss": 0.4106, "step": 2665 }, { "epoch": 0.18214114914258386, "grad_norm": 4.12659215927124, "learning_rate": 6.584268021128385e-06, "loss": 0.4237, "step": 2666 }, { "epoch": 0.18220946915351507, "grad_norm": 5.187088489532471, "learning_rate": 6.583901833791461e-06, "loss": 0.3211, "step": 2667 }, { "epoch": 0.18227778916444626, "grad_norm": 4.293549060821533, "learning_rate": 6.583535495444043e-06, "loss": 0.4621, "step": 2668 }, { "epoch": 0.18234610917537747, "grad_norm": 4.252345085144043, "learning_rate": 6.583169006104071e-06, "loss": 0.3942, "step": 2669 }, { "epoch": 0.18241442918630868, "grad_norm": 4.980461597442627, "learning_rate": 6.5828023657894905e-06, "loss": 0.5071, "step": 2670 }, { "epoch": 0.18248274919723986, "grad_norm": 4.620758056640625, "learning_rate": 6.582435574518257e-06, "loss": 0.471, "step": 2671 }, { "epoch": 0.18255106920817107, "grad_norm": 5.542603969573975, "learning_rate": 6.582068632308329e-06, "loss": 0.431, "step": 2672 }, { "epoch": 0.18261938921910228, "grad_norm": 3.630967855453491, "learning_rate": 6.581701539177676e-06, "loss": 0.4169, "step": 2673 }, { "epoch": 0.18268770923003347, "grad_norm": 3.641482353210449, "learning_rate": 6.581334295144271e-06, "loss": 0.2962, "step": 2674 }, { "epoch": 0.18275602924096468, "grad_norm": 4.533750057220459, "learning_rate": 6.580966900226101e-06, "loss": 0.3877, "step": 2675 }, { "epoch": 0.1828243492518959, "grad_norm": 4.254948616027832, "learning_rate": 6.580599354441154e-06, "loss": 0.3896, "step": 2676 }, { "epoch": 0.18289266926282707, "grad_norm": 5.048367500305176, "learning_rate": 6.5802316578074275e-06, "loss": 0.5403, "step": 2677 }, { "epoch": 0.18296098927375828, "grad_norm": 4.631496429443359, "learning_rate": 6.579863810342928e-06, "loss": 0.5108, "step": 2678 }, { "epoch": 0.1830293092846895, "grad_norm": 3.8251686096191406, "learning_rate": 6.579495812065666e-06, "loss": 0.3299, "step": 2679 }, { "epoch": 0.18309762929562068, "grad_norm": 4.166234016418457, "learning_rate": 6.579127662993664e-06, "loss": 0.3738, "step": 2680 }, { "epoch": 0.1831659493065519, "grad_norm": 3.8376710414886475, "learning_rate": 6.578759363144947e-06, "loss": 0.4023, "step": 2681 }, { "epoch": 0.1832342693174831, "grad_norm": 4.2013750076293945, "learning_rate": 6.578390912537551e-06, "loss": 0.4585, "step": 2682 }, { "epoch": 0.18330258932841428, "grad_norm": 4.128582000732422, "learning_rate": 6.578022311189517e-06, "loss": 0.3748, "step": 2683 }, { "epoch": 0.1833709093393455, "grad_norm": 4.658531188964844, "learning_rate": 6.577653559118897e-06, "loss": 0.323, "step": 2684 }, { "epoch": 0.1834392293502767, "grad_norm": 5.868928909301758, "learning_rate": 6.5772846563437445e-06, "loss": 0.5445, "step": 2685 }, { "epoch": 0.1835075493612079, "grad_norm": 3.2526159286499023, "learning_rate": 6.576915602882124e-06, "loss": 0.4194, "step": 2686 }, { "epoch": 0.1835758693721391, "grad_norm": 3.3080825805664062, "learning_rate": 6.57654639875211e-06, "loss": 0.2905, "step": 2687 }, { "epoch": 0.1836441893830703, "grad_norm": 3.945026159286499, "learning_rate": 6.576177043971778e-06, "loss": 0.4858, "step": 2688 }, { "epoch": 0.1837125093940015, "grad_norm": 5.139094829559326, "learning_rate": 6.575807538559217e-06, "loss": 0.3664, "step": 2689 }, { "epoch": 0.1837808294049327, "grad_norm": 3.705681800842285, "learning_rate": 6.575437882532519e-06, "loss": 0.4064, "step": 2690 }, { "epoch": 0.18384914941586392, "grad_norm": 5.116023540496826, "learning_rate": 6.575068075909786e-06, "loss": 0.518, "step": 2691 }, { "epoch": 0.1839174694267951, "grad_norm": 3.131948232650757, "learning_rate": 6.574698118709126e-06, "loss": 0.3367, "step": 2692 }, { "epoch": 0.1839857894377263, "grad_norm": 3.9049901962280273, "learning_rate": 6.574328010948655e-06, "loss": 0.4125, "step": 2693 }, { "epoch": 0.18405410944865752, "grad_norm": 3.662843704223633, "learning_rate": 6.5739577526464955e-06, "loss": 0.4287, "step": 2694 }, { "epoch": 0.1841224294595887, "grad_norm": 4.398468017578125, "learning_rate": 6.5735873438207796e-06, "loss": 0.5517, "step": 2695 }, { "epoch": 0.18419074947051992, "grad_norm": 4.617735862731934, "learning_rate": 6.573216784489643e-06, "loss": 0.3893, "step": 2696 }, { "epoch": 0.18425906948145113, "grad_norm": 4.203571796417236, "learning_rate": 6.572846074671235e-06, "loss": 0.5927, "step": 2697 }, { "epoch": 0.1843273894923823, "grad_norm": 4.024961471557617, "learning_rate": 6.572475214383703e-06, "loss": 0.4059, "step": 2698 }, { "epoch": 0.18439570950331352, "grad_norm": 4.972995281219482, "learning_rate": 6.5721042036452115e-06, "loss": 0.5293, "step": 2699 }, { "epoch": 0.18446402951424473, "grad_norm": 4.603569984436035, "learning_rate": 6.571733042473925e-06, "loss": 0.4073, "step": 2700 }, { "epoch": 0.18453234952517591, "grad_norm": 3.966970443725586, "learning_rate": 6.571361730888019e-06, "loss": 0.411, "step": 2701 }, { "epoch": 0.18460066953610713, "grad_norm": 4.862159729003906, "learning_rate": 6.570990268905677e-06, "loss": 0.4722, "step": 2702 }, { "epoch": 0.18466898954703834, "grad_norm": 3.9175803661346436, "learning_rate": 6.570618656545087e-06, "loss": 0.4088, "step": 2703 }, { "epoch": 0.18473730955796952, "grad_norm": 4.619751930236816, "learning_rate": 6.570246893824447e-06, "loss": 0.4437, "step": 2704 }, { "epoch": 0.18480562956890073, "grad_norm": 3.7569777965545654, "learning_rate": 6.56987498076196e-06, "loss": 0.4257, "step": 2705 }, { "epoch": 0.18487394957983194, "grad_norm": 3.2491977214813232, "learning_rate": 6.5695029173758385e-06, "loss": 0.3957, "step": 2706 }, { "epoch": 0.18494226959076313, "grad_norm": 4.776217460632324, "learning_rate": 6.569130703684301e-06, "loss": 0.4885, "step": 2707 }, { "epoch": 0.18501058960169434, "grad_norm": 3.994298219680786, "learning_rate": 6.5687583397055745e-06, "loss": 0.4597, "step": 2708 }, { "epoch": 0.18507890961262555, "grad_norm": 2.894392490386963, "learning_rate": 6.568385825457892e-06, "loss": 0.2912, "step": 2709 }, { "epoch": 0.18514722962355673, "grad_norm": 3.85111141204834, "learning_rate": 6.568013160959494e-06, "loss": 0.4727, "step": 2710 }, { "epoch": 0.18521554963448794, "grad_norm": 3.313122510910034, "learning_rate": 6.5676403462286295e-06, "loss": 0.3503, "step": 2711 }, { "epoch": 0.18528386964541915, "grad_norm": 4.2526068687438965, "learning_rate": 6.567267381283555e-06, "loss": 0.4035, "step": 2712 }, { "epoch": 0.18535218965635034, "grad_norm": 4.309880256652832, "learning_rate": 6.566894266142533e-06, "loss": 0.338, "step": 2713 }, { "epoch": 0.18542050966728155, "grad_norm": 5.301568984985352, "learning_rate": 6.566521000823833e-06, "loss": 0.4913, "step": 2714 }, { "epoch": 0.18548882967821276, "grad_norm": 4.438916206359863, "learning_rate": 6.566147585345735e-06, "loss": 0.4375, "step": 2715 }, { "epoch": 0.18555714968914394, "grad_norm": 3.9606430530548096, "learning_rate": 6.565774019726522e-06, "loss": 0.3413, "step": 2716 }, { "epoch": 0.18562546970007515, "grad_norm": 3.9267215728759766, "learning_rate": 6.5654003039844875e-06, "loss": 0.4676, "step": 2717 }, { "epoch": 0.18569378971100636, "grad_norm": 4.024475574493408, "learning_rate": 6.565026438137931e-06, "loss": 0.5067, "step": 2718 }, { "epoch": 0.18576210972193755, "grad_norm": 4.067423343658447, "learning_rate": 6.564652422205161e-06, "loss": 0.4397, "step": 2719 }, { "epoch": 0.18583042973286876, "grad_norm": 3.8815526962280273, "learning_rate": 6.5642782562044906e-06, "loss": 0.4388, "step": 2720 }, { "epoch": 0.18589874974379997, "grad_norm": 3.1737990379333496, "learning_rate": 6.5639039401542424e-06, "loss": 0.4533, "step": 2721 }, { "epoch": 0.18596706975473115, "grad_norm": 4.103595733642578, "learning_rate": 6.563529474072744e-06, "loss": 0.3945, "step": 2722 }, { "epoch": 0.18603538976566236, "grad_norm": 5.358869552612305, "learning_rate": 6.563154857978335e-06, "loss": 0.6062, "step": 2723 }, { "epoch": 0.18610370977659357, "grad_norm": 5.4805498123168945, "learning_rate": 6.5627800918893575e-06, "loss": 0.6278, "step": 2724 }, { "epoch": 0.18617202978752476, "grad_norm": 4.181786060333252, "learning_rate": 6.562405175824162e-06, "loss": 0.3838, "step": 2725 }, { "epoch": 0.18624034979845597, "grad_norm": 3.8941495418548584, "learning_rate": 6.56203010980111e-06, "loss": 0.3629, "step": 2726 }, { "epoch": 0.18630866980938718, "grad_norm": 4.579390525817871, "learning_rate": 6.561654893838565e-06, "loss": 0.453, "step": 2727 }, { "epoch": 0.18637698982031836, "grad_norm": 4.260838985443115, "learning_rate": 6.561279527954901e-06, "loss": 0.4479, "step": 2728 }, { "epoch": 0.18644530983124957, "grad_norm": 3.6100168228149414, "learning_rate": 6.5609040121684985e-06, "loss": 0.3408, "step": 2729 }, { "epoch": 0.18651362984218078, "grad_norm": 4.50144100189209, "learning_rate": 6.5605283464977465e-06, "loss": 0.3789, "step": 2730 }, { "epoch": 0.18658194985311197, "grad_norm": 6.11000919342041, "learning_rate": 6.560152530961039e-06, "loss": 0.3841, "step": 2731 }, { "epoch": 0.18665026986404318, "grad_norm": 5.539147853851318, "learning_rate": 6.559776565576779e-06, "loss": 0.4877, "step": 2732 }, { "epoch": 0.1867185898749744, "grad_norm": 5.144613742828369, "learning_rate": 6.559400450363378e-06, "loss": 0.4222, "step": 2733 }, { "epoch": 0.18678690988590557, "grad_norm": 3.943765640258789, "learning_rate": 6.5590241853392504e-06, "loss": 0.4801, "step": 2734 }, { "epoch": 0.18685522989683678, "grad_norm": 3.9755187034606934, "learning_rate": 6.558647770522824e-06, "loss": 0.3581, "step": 2735 }, { "epoch": 0.186923549907768, "grad_norm": 4.427952289581299, "learning_rate": 6.558271205932529e-06, "loss": 0.6156, "step": 2736 }, { "epoch": 0.18699186991869918, "grad_norm": 4.296370983123779, "learning_rate": 6.557894491586806e-06, "loss": 0.4697, "step": 2737 }, { "epoch": 0.1870601899296304, "grad_norm": 3.7069320678710938, "learning_rate": 6.5575176275041e-06, "loss": 0.4266, "step": 2738 }, { "epoch": 0.1871285099405616, "grad_norm": 4.668558120727539, "learning_rate": 6.557140613702866e-06, "loss": 0.3184, "step": 2739 }, { "epoch": 0.18719682995149278, "grad_norm": 3.885068893432617, "learning_rate": 6.556763450201566e-06, "loss": 0.3173, "step": 2740 }, { "epoch": 0.187265149962424, "grad_norm": 3.909510612487793, "learning_rate": 6.556386137018669e-06, "loss": 0.3605, "step": 2741 }, { "epoch": 0.1873334699733552, "grad_norm": 4.736051082611084, "learning_rate": 6.5560086741726485e-06, "loss": 0.3466, "step": 2742 }, { "epoch": 0.1874017899842864, "grad_norm": 4.7361741065979, "learning_rate": 6.5556310616819906e-06, "loss": 0.4134, "step": 2743 }, { "epoch": 0.1874701099952176, "grad_norm": 5.064148902893066, "learning_rate": 6.555253299565183e-06, "loss": 0.4549, "step": 2744 }, { "epoch": 0.1875384300061488, "grad_norm": 3.9583182334899902, "learning_rate": 6.554875387840727e-06, "loss": 0.5045, "step": 2745 }, { "epoch": 0.18760675001708, "grad_norm": 3.084627628326416, "learning_rate": 6.554497326527127e-06, "loss": 0.4377, "step": 2746 }, { "epoch": 0.1876750700280112, "grad_norm": 4.154781818389893, "learning_rate": 6.554119115642894e-06, "loss": 0.408, "step": 2747 }, { "epoch": 0.18774339003894241, "grad_norm": 3.633692979812622, "learning_rate": 6.553740755206549e-06, "loss": 0.4345, "step": 2748 }, { "epoch": 0.1878117100498736, "grad_norm": 4.621810436248779, "learning_rate": 6.553362245236621e-06, "loss": 0.4452, "step": 2749 }, { "epoch": 0.1878800300608048, "grad_norm": 3.5209293365478516, "learning_rate": 6.5529835857516425e-06, "loss": 0.3481, "step": 2750 }, { "epoch": 0.18794835007173602, "grad_norm": 4.114699840545654, "learning_rate": 6.5526047767701555e-06, "loss": 0.402, "step": 2751 }, { "epoch": 0.1880166700826672, "grad_norm": 4.203819751739502, "learning_rate": 6.5522258183107095e-06, "loss": 0.4629, "step": 2752 }, { "epoch": 0.18808499009359841, "grad_norm": 4.762743949890137, "learning_rate": 6.551846710391862e-06, "loss": 0.5253, "step": 2753 }, { "epoch": 0.18815331010452963, "grad_norm": 4.071295261383057, "learning_rate": 6.551467453032176e-06, "loss": 0.3438, "step": 2754 }, { "epoch": 0.1882216301154608, "grad_norm": 3.4863483905792236, "learning_rate": 6.551088046250223e-06, "loss": 0.3595, "step": 2755 }, { "epoch": 0.18828995012639202, "grad_norm": 4.344355583190918, "learning_rate": 6.550708490064582e-06, "loss": 0.304, "step": 2756 }, { "epoch": 0.18835827013732323, "grad_norm": 4.495382308959961, "learning_rate": 6.550328784493839e-06, "loss": 0.4405, "step": 2757 }, { "epoch": 0.1884265901482544, "grad_norm": 3.8028676509857178, "learning_rate": 6.549948929556585e-06, "loss": 0.4219, "step": 2758 }, { "epoch": 0.18849491015918562, "grad_norm": 3.6651906967163086, "learning_rate": 6.549568925271424e-06, "loss": 0.4685, "step": 2759 }, { "epoch": 0.18856323017011684, "grad_norm": 2.988629102706909, "learning_rate": 6.5491887716569595e-06, "loss": 0.329, "step": 2760 }, { "epoch": 0.18863155018104802, "grad_norm": 4.053235054016113, "learning_rate": 6.548808468731811e-06, "loss": 0.3583, "step": 2761 }, { "epoch": 0.18869987019197923, "grad_norm": 4.348961353302002, "learning_rate": 6.548428016514598e-06, "loss": 0.3945, "step": 2762 }, { "epoch": 0.18876819020291044, "grad_norm": 4.4354777336120605, "learning_rate": 6.548047415023951e-06, "loss": 0.3374, "step": 2763 }, { "epoch": 0.18883651021384162, "grad_norm": 4.032161235809326, "learning_rate": 6.547666664278507e-06, "loss": 0.4116, "step": 2764 }, { "epoch": 0.18890483022477284, "grad_norm": 3.301698684692383, "learning_rate": 6.547285764296912e-06, "loss": 0.3686, "step": 2765 }, { "epoch": 0.18897315023570405, "grad_norm": 4.998493194580078, "learning_rate": 6.546904715097815e-06, "loss": 0.4222, "step": 2766 }, { "epoch": 0.18904147024663523, "grad_norm": 4.2878522872924805, "learning_rate": 6.546523516699877e-06, "loss": 0.5018, "step": 2767 }, { "epoch": 0.18910979025756644, "grad_norm": 4.769786834716797, "learning_rate": 6.546142169121762e-06, "loss": 0.3956, "step": 2768 }, { "epoch": 0.18917811026849765, "grad_norm": 4.86293888092041, "learning_rate": 6.545760672382146e-06, "loss": 0.4667, "step": 2769 }, { "epoch": 0.18924643027942883, "grad_norm": 4.320261478424072, "learning_rate": 6.545379026499709e-06, "loss": 0.4452, "step": 2770 }, { "epoch": 0.18931475029036005, "grad_norm": 5.05289888381958, "learning_rate": 6.544997231493139e-06, "loss": 0.553, "step": 2771 }, { "epoch": 0.18938307030129126, "grad_norm": 4.290295600891113, "learning_rate": 6.544615287381133e-06, "loss": 0.5132, "step": 2772 }, { "epoch": 0.18945139031222244, "grad_norm": 4.6434431076049805, "learning_rate": 6.54423319418239e-06, "loss": 0.4304, "step": 2773 }, { "epoch": 0.18951971032315365, "grad_norm": 3.328115463256836, "learning_rate": 6.543850951915624e-06, "loss": 0.3258, "step": 2774 }, { "epoch": 0.18958803033408486, "grad_norm": 4.915628433227539, "learning_rate": 6.543468560599551e-06, "loss": 0.5404, "step": 2775 }, { "epoch": 0.18965635034501604, "grad_norm": 5.0624308586120605, "learning_rate": 6.543086020252894e-06, "loss": 0.5293, "step": 2776 }, { "epoch": 0.18972467035594726, "grad_norm": 4.326827526092529, "learning_rate": 6.542703330894388e-06, "loss": 0.3675, "step": 2777 }, { "epoch": 0.18979299036687847, "grad_norm": 4.960775852203369, "learning_rate": 6.5423204925427705e-06, "loss": 0.5767, "step": 2778 }, { "epoch": 0.18986131037780965, "grad_norm": 2.544048547744751, "learning_rate": 6.541937505216788e-06, "loss": 0.3899, "step": 2779 }, { "epoch": 0.18992963038874086, "grad_norm": 3.6156699657440186, "learning_rate": 6.541554368935195e-06, "loss": 0.3755, "step": 2780 }, { "epoch": 0.18999795039967207, "grad_norm": 3.9287588596343994, "learning_rate": 6.541171083716753e-06, "loss": 0.3729, "step": 2781 }, { "epoch": 0.19006627041060326, "grad_norm": 4.752269268035889, "learning_rate": 6.540787649580229e-06, "loss": 0.4251, "step": 2782 }, { "epoch": 0.19013459042153447, "grad_norm": 5.010992527008057, "learning_rate": 6.540404066544401e-06, "loss": 0.4387, "step": 2783 }, { "epoch": 0.19020291043246568, "grad_norm": 4.281731128692627, "learning_rate": 6.54002033462805e-06, "loss": 0.42, "step": 2784 }, { "epoch": 0.19027123044339686, "grad_norm": 4.559709548950195, "learning_rate": 6.539636453849965e-06, "loss": 0.43, "step": 2785 }, { "epoch": 0.19033955045432807, "grad_norm": 4.152073383331299, "learning_rate": 6.539252424228948e-06, "loss": 0.5035, "step": 2786 }, { "epoch": 0.19040787046525928, "grad_norm": 5.85416316986084, "learning_rate": 6.538868245783802e-06, "loss": 0.4202, "step": 2787 }, { "epoch": 0.19047619047619047, "grad_norm": 3.7589457035064697, "learning_rate": 6.538483918533338e-06, "loss": 0.4024, "step": 2788 }, { "epoch": 0.19054451048712168, "grad_norm": 5.092323303222656, "learning_rate": 6.538099442496376e-06, "loss": 0.4837, "step": 2789 }, { "epoch": 0.1906128304980529, "grad_norm": 4.404135227203369, "learning_rate": 6.537714817691743e-06, "loss": 0.3905, "step": 2790 }, { "epoch": 0.19068115050898407, "grad_norm": 3.575157403945923, "learning_rate": 6.537330044138273e-06, "loss": 0.3636, "step": 2791 }, { "epoch": 0.19074947051991528, "grad_norm": 4.110052585601807, "learning_rate": 6.536945121854808e-06, "loss": 0.3773, "step": 2792 }, { "epoch": 0.1908177905308465, "grad_norm": 4.335690975189209, "learning_rate": 6.536560050860196e-06, "loss": 0.4683, "step": 2793 }, { "epoch": 0.19088611054177768, "grad_norm": 4.5635085105896, "learning_rate": 6.536174831173294e-06, "loss": 0.3684, "step": 2794 }, { "epoch": 0.1909544305527089, "grad_norm": 4.056288719177246, "learning_rate": 6.535789462812962e-06, "loss": 0.4336, "step": 2795 }, { "epoch": 0.1910227505636401, "grad_norm": 4.619571685791016, "learning_rate": 6.535403945798073e-06, "loss": 0.2933, "step": 2796 }, { "epoch": 0.19109107057457128, "grad_norm": 4.847533702850342, "learning_rate": 6.535018280147505e-06, "loss": 0.5031, "step": 2797 }, { "epoch": 0.1911593905855025, "grad_norm": 4.552142143249512, "learning_rate": 6.534632465880143e-06, "loss": 0.4808, "step": 2798 }, { "epoch": 0.1912277105964337, "grad_norm": 3.983996629714966, "learning_rate": 6.534246503014878e-06, "loss": 0.44, "step": 2799 }, { "epoch": 0.1912960306073649, "grad_norm": 5.154252052307129, "learning_rate": 6.53386039157061e-06, "loss": 0.3947, "step": 2800 }, { "epoch": 0.1913643506182961, "grad_norm": 5.056489944458008, "learning_rate": 6.533474131566245e-06, "loss": 0.5152, "step": 2801 }, { "epoch": 0.1914326706292273, "grad_norm": 2.5572969913482666, "learning_rate": 6.5330877230207004e-06, "loss": 0.3642, "step": 2802 }, { "epoch": 0.1915009906401585, "grad_norm": 4.820781707763672, "learning_rate": 6.532701165952895e-06, "loss": 0.4107, "step": 2803 }, { "epoch": 0.1915693106510897, "grad_norm": 4.538373947143555, "learning_rate": 6.532314460381757e-06, "loss": 0.4753, "step": 2804 }, { "epoch": 0.1916376306620209, "grad_norm": 4.619412899017334, "learning_rate": 6.531927606326225e-06, "loss": 0.3702, "step": 2805 }, { "epoch": 0.1917059506729521, "grad_norm": 3.43820858001709, "learning_rate": 6.531540603805239e-06, "loss": 0.3901, "step": 2806 }, { "epoch": 0.1917742706838833, "grad_norm": 3.8458449840545654, "learning_rate": 6.531153452837751e-06, "loss": 0.4555, "step": 2807 }, { "epoch": 0.19184259069481452, "grad_norm": 4.598122596740723, "learning_rate": 6.530766153442719e-06, "loss": 0.4863, "step": 2808 }, { "epoch": 0.1919109107057457, "grad_norm": 3.849832534790039, "learning_rate": 6.530378705639108e-06, "loss": 0.4598, "step": 2809 }, { "epoch": 0.1919792307166769, "grad_norm": 3.495769500732422, "learning_rate": 6.529991109445889e-06, "loss": 0.3657, "step": 2810 }, { "epoch": 0.19204755072760812, "grad_norm": 4.133504390716553, "learning_rate": 6.529603364882043e-06, "loss": 0.4032, "step": 2811 }, { "epoch": 0.1921158707385393, "grad_norm": 3.574467658996582, "learning_rate": 6.529215471966558e-06, "loss": 0.3703, "step": 2812 }, { "epoch": 0.19218419074947052, "grad_norm": 4.170619010925293, "learning_rate": 6.528827430718425e-06, "loss": 0.3942, "step": 2813 }, { "epoch": 0.19225251076040173, "grad_norm": 3.3323380947113037, "learning_rate": 6.528439241156647e-06, "loss": 0.5049, "step": 2814 }, { "epoch": 0.1923208307713329, "grad_norm": 5.042751312255859, "learning_rate": 6.528050903300233e-06, "loss": 0.519, "step": 2815 }, { "epoch": 0.19238915078226412, "grad_norm": 3.177321672439575, "learning_rate": 6.527662417168198e-06, "loss": 0.3311, "step": 2816 }, { "epoch": 0.19245747079319533, "grad_norm": 4.257692337036133, "learning_rate": 6.527273782779565e-06, "loss": 0.4902, "step": 2817 }, { "epoch": 0.19252579080412652, "grad_norm": 4.055874347686768, "learning_rate": 6.526885000153365e-06, "loss": 0.3523, "step": 2818 }, { "epoch": 0.19259411081505773, "grad_norm": 4.072390556335449, "learning_rate": 6.526496069308637e-06, "loss": 0.3891, "step": 2819 }, { "epoch": 0.19266243082598894, "grad_norm": 4.4546685218811035, "learning_rate": 6.526106990264423e-06, "loss": 0.3847, "step": 2820 }, { "epoch": 0.19273075083692012, "grad_norm": 4.602292537689209, "learning_rate": 6.525717763039777e-06, "loss": 0.3625, "step": 2821 }, { "epoch": 0.19279907084785133, "grad_norm": 3.3042778968811035, "learning_rate": 6.525328387653758e-06, "loss": 0.4278, "step": 2822 }, { "epoch": 0.19286739085878254, "grad_norm": 3.4399473667144775, "learning_rate": 6.524938864125432e-06, "loss": 0.4157, "step": 2823 }, { "epoch": 0.19293571086971373, "grad_norm": 4.015476703643799, "learning_rate": 6.524549192473875e-06, "loss": 0.4316, "step": 2824 }, { "epoch": 0.19300403088064494, "grad_norm": 3.359976291656494, "learning_rate": 6.524159372718167e-06, "loss": 0.4464, "step": 2825 }, { "epoch": 0.19307235089157615, "grad_norm": 4.636595726013184, "learning_rate": 6.5237694048773945e-06, "loss": 0.4677, "step": 2826 }, { "epoch": 0.19314067090250733, "grad_norm": 2.9900143146514893, "learning_rate": 6.523379288970656e-06, "loss": 0.3792, "step": 2827 }, { "epoch": 0.19320899091343854, "grad_norm": 6.313147068023682, "learning_rate": 6.522989025017054e-06, "loss": 0.4441, "step": 2828 }, { "epoch": 0.19327731092436976, "grad_norm": 3.8347890377044678, "learning_rate": 6.5225986130356965e-06, "loss": 0.4182, "step": 2829 }, { "epoch": 0.19334563093530094, "grad_norm": 5.249545097351074, "learning_rate": 6.522208053045704e-06, "loss": 0.3761, "step": 2830 }, { "epoch": 0.19341395094623215, "grad_norm": 4.432191371917725, "learning_rate": 6.521817345066199e-06, "loss": 0.5204, "step": 2831 }, { "epoch": 0.19348227095716336, "grad_norm": 5.171835422515869, "learning_rate": 6.521426489116315e-06, "loss": 0.4642, "step": 2832 }, { "epoch": 0.19355059096809454, "grad_norm": 5.218504905700684, "learning_rate": 6.521035485215189e-06, "loss": 0.4734, "step": 2833 }, { "epoch": 0.19361891097902575, "grad_norm": 4.629220008850098, "learning_rate": 6.520644333381969e-06, "loss": 0.4819, "step": 2834 }, { "epoch": 0.19368723098995697, "grad_norm": 3.7738280296325684, "learning_rate": 6.52025303363581e-06, "loss": 0.4162, "step": 2835 }, { "epoch": 0.19375555100088815, "grad_norm": 3.5535457134246826, "learning_rate": 6.51986158599587e-06, "loss": 0.4599, "step": 2836 }, { "epoch": 0.19382387101181936, "grad_norm": 4.248062610626221, "learning_rate": 6.519469990481319e-06, "loss": 0.3893, "step": 2837 }, { "epoch": 0.19389219102275057, "grad_norm": 5.89317512512207, "learning_rate": 6.519078247111332e-06, "loss": 0.4472, "step": 2838 }, { "epoch": 0.19396051103368175, "grad_norm": 4.800119400024414, "learning_rate": 6.51868635590509e-06, "loss": 0.4123, "step": 2839 }, { "epoch": 0.19402883104461296, "grad_norm": 4.522480010986328, "learning_rate": 6.518294316881787e-06, "loss": 0.5119, "step": 2840 }, { "epoch": 0.19409715105554418, "grad_norm": 3.8201146125793457, "learning_rate": 6.517902130060615e-06, "loss": 0.4379, "step": 2841 }, { "epoch": 0.19416547106647536, "grad_norm": 4.137192249298096, "learning_rate": 6.5175097954607825e-06, "loss": 0.4431, "step": 2842 }, { "epoch": 0.19423379107740657, "grad_norm": 3.343766450881958, "learning_rate": 6.5171173131014985e-06, "loss": 0.4258, "step": 2843 }, { "epoch": 0.19430211108833778, "grad_norm": 4.62389612197876, "learning_rate": 6.5167246830019826e-06, "loss": 0.3975, "step": 2844 }, { "epoch": 0.19437043109926896, "grad_norm": 4.028156757354736, "learning_rate": 6.516331905181462e-06, "loss": 0.3208, "step": 2845 }, { "epoch": 0.19443875111020018, "grad_norm": 3.110025405883789, "learning_rate": 6.5159389796591684e-06, "loss": 0.3217, "step": 2846 }, { "epoch": 0.1945070711211314, "grad_norm": 4.060014724731445, "learning_rate": 6.515545906454343e-06, "loss": 0.4961, "step": 2847 }, { "epoch": 0.19457539113206257, "grad_norm": 3.3157050609588623, "learning_rate": 6.515152685586233e-06, "loss": 0.41, "step": 2848 }, { "epoch": 0.19464371114299378, "grad_norm": 4.029358386993408, "learning_rate": 6.514759317074095e-06, "loss": 0.3824, "step": 2849 }, { "epoch": 0.194712031153925, "grad_norm": 4.207115650177002, "learning_rate": 6.51436580093719e-06, "loss": 0.4347, "step": 2850 }, { "epoch": 0.19478035116485617, "grad_norm": 4.010578632354736, "learning_rate": 6.513972137194787e-06, "loss": 0.358, "step": 2851 }, { "epoch": 0.19484867117578739, "grad_norm": 3.566727638244629, "learning_rate": 6.513578325866162e-06, "loss": 0.3659, "step": 2852 }, { "epoch": 0.1949169911867186, "grad_norm": 4.998861789703369, "learning_rate": 6.5131843669706015e-06, "loss": 0.463, "step": 2853 }, { "epoch": 0.19498531119764978, "grad_norm": 3.6415581703186035, "learning_rate": 6.512790260527395e-06, "loss": 0.4282, "step": 2854 }, { "epoch": 0.195053631208581, "grad_norm": 4.538722515106201, "learning_rate": 6.512396006555841e-06, "loss": 0.4661, "step": 2855 }, { "epoch": 0.1951219512195122, "grad_norm": 3.8897829055786133, "learning_rate": 6.512001605075245e-06, "loss": 0.3922, "step": 2856 }, { "epoch": 0.19519027123044339, "grad_norm": 4.122582912445068, "learning_rate": 6.51160705610492e-06, "loss": 0.4642, "step": 2857 }, { "epoch": 0.1952585912413746, "grad_norm": 4.037227630615234, "learning_rate": 6.5112123596641865e-06, "loss": 0.398, "step": 2858 }, { "epoch": 0.1953269112523058, "grad_norm": 4.20319128036499, "learning_rate": 6.51081751577237e-06, "loss": 0.3738, "step": 2859 }, { "epoch": 0.195395231263237, "grad_norm": 4.9169840812683105, "learning_rate": 6.510422524448806e-06, "loss": 0.4956, "step": 2860 }, { "epoch": 0.1954635512741682, "grad_norm": 4.508000373840332, "learning_rate": 6.510027385712837e-06, "loss": 0.2972, "step": 2861 }, { "epoch": 0.1955318712850994, "grad_norm": 4.338135242462158, "learning_rate": 6.509632099583812e-06, "loss": 0.5244, "step": 2862 }, { "epoch": 0.1956001912960306, "grad_norm": 4.359055995941162, "learning_rate": 6.509236666081085e-06, "loss": 0.3488, "step": 2863 }, { "epoch": 0.1956685113069618, "grad_norm": 4.062676906585693, "learning_rate": 6.508841085224022e-06, "loss": 0.4168, "step": 2864 }, { "epoch": 0.19573683131789302, "grad_norm": 5.285229206085205, "learning_rate": 6.508445357031991e-06, "loss": 0.33, "step": 2865 }, { "epoch": 0.1958051513288242, "grad_norm": 3.5668749809265137, "learning_rate": 6.5080494815243714e-06, "loss": 0.3511, "step": 2866 }, { "epoch": 0.1958734713397554, "grad_norm": 3.3809945583343506, "learning_rate": 6.507653458720547e-06, "loss": 0.321, "step": 2867 }, { "epoch": 0.19594179135068662, "grad_norm": 4.707839488983154, "learning_rate": 6.507257288639911e-06, "loss": 0.408, "step": 2868 }, { "epoch": 0.1960101113616178, "grad_norm": 4.8280463218688965, "learning_rate": 6.506860971301863e-06, "loss": 0.3655, "step": 2869 }, { "epoch": 0.19607843137254902, "grad_norm": 4.0985612869262695, "learning_rate": 6.50646450672581e-06, "loss": 0.3961, "step": 2870 }, { "epoch": 0.19614675138348023, "grad_norm": 4.678066253662109, "learning_rate": 6.506067894931163e-06, "loss": 0.2831, "step": 2871 }, { "epoch": 0.1962150713944114, "grad_norm": 3.1560819149017334, "learning_rate": 6.505671135937345e-06, "loss": 0.3164, "step": 2872 }, { "epoch": 0.19628339140534262, "grad_norm": 5.251000881195068, "learning_rate": 6.505274229763786e-06, "loss": 0.4928, "step": 2873 }, { "epoch": 0.19635171141627383, "grad_norm": 5.073249340057373, "learning_rate": 6.504877176429918e-06, "loss": 0.3677, "step": 2874 }, { "epoch": 0.19642003142720502, "grad_norm": 5.131999492645264, "learning_rate": 6.504479975955186e-06, "loss": 0.4617, "step": 2875 }, { "epoch": 0.19648835143813623, "grad_norm": 4.394196033477783, "learning_rate": 6.504082628359038e-06, "loss": 0.3252, "step": 2876 }, { "epoch": 0.19655667144906744, "grad_norm": 4.73817777633667, "learning_rate": 6.503685133660934e-06, "loss": 0.3979, "step": 2877 }, { "epoch": 0.19662499145999862, "grad_norm": 3.4247078895568848, "learning_rate": 6.5032874918803354e-06, "loss": 0.3654, "step": 2878 }, { "epoch": 0.19669331147092983, "grad_norm": 4.945194244384766, "learning_rate": 6.502889703036716e-06, "loss": 0.4147, "step": 2879 }, { "epoch": 0.19676163148186104, "grad_norm": 4.547695159912109, "learning_rate": 6.502491767149552e-06, "loss": 0.4253, "step": 2880 }, { "epoch": 0.19682995149279223, "grad_norm": 5.516889572143555, "learning_rate": 6.50209368423833e-06, "loss": 0.5079, "step": 2881 }, { "epoch": 0.19689827150372344, "grad_norm": 3.9974777698516846, "learning_rate": 6.501695454322544e-06, "loss": 0.4394, "step": 2882 }, { "epoch": 0.19696659151465465, "grad_norm": 3.6710429191589355, "learning_rate": 6.501297077421694e-06, "loss": 0.3683, "step": 2883 }, { "epoch": 0.19703491152558583, "grad_norm": 4.253237247467041, "learning_rate": 6.500898553555287e-06, "loss": 0.3811, "step": 2884 }, { "epoch": 0.19710323153651704, "grad_norm": 3.6305572986602783, "learning_rate": 6.500499882742839e-06, "loss": 0.3616, "step": 2885 }, { "epoch": 0.19717155154744825, "grad_norm": 3.201519250869751, "learning_rate": 6.500101065003869e-06, "loss": 0.3471, "step": 2886 }, { "epoch": 0.19723987155837944, "grad_norm": 3.6360981464385986, "learning_rate": 6.499702100357908e-06, "loss": 0.3751, "step": 2887 }, { "epoch": 0.19730819156931065, "grad_norm": 4.447009086608887, "learning_rate": 6.499302988824493e-06, "loss": 0.3755, "step": 2888 }, { "epoch": 0.19737651158024186, "grad_norm": 3.792759895324707, "learning_rate": 6.498903730423165e-06, "loss": 0.3718, "step": 2889 }, { "epoch": 0.19744483159117304, "grad_norm": 4.26724910736084, "learning_rate": 6.4985043251734775e-06, "loss": 0.55, "step": 2890 }, { "epoch": 0.19751315160210425, "grad_norm": 4.273008346557617, "learning_rate": 6.498104773094987e-06, "loss": 0.3782, "step": 2891 }, { "epoch": 0.19758147161303546, "grad_norm": 4.4669342041015625, "learning_rate": 6.497705074207258e-06, "loss": 0.508, "step": 2892 }, { "epoch": 0.19764979162396665, "grad_norm": 4.711877346038818, "learning_rate": 6.4973052285298635e-06, "loss": 0.5102, "step": 2893 }, { "epoch": 0.19771811163489786, "grad_norm": 3.914252281188965, "learning_rate": 6.496905236082382e-06, "loss": 0.3797, "step": 2894 }, { "epoch": 0.19778643164582907, "grad_norm": 3.697336196899414, "learning_rate": 6.4965050968844e-06, "loss": 0.3523, "step": 2895 }, { "epoch": 0.19785475165676025, "grad_norm": 4.335900783538818, "learning_rate": 6.496104810955512e-06, "loss": 0.4883, "step": 2896 }, { "epoch": 0.19792307166769146, "grad_norm": 4.019652843475342, "learning_rate": 6.495704378315321e-06, "loss": 0.4239, "step": 2897 }, { "epoch": 0.19799139167862267, "grad_norm": 3.9622445106506348, "learning_rate": 6.49530379898343e-06, "loss": 0.4217, "step": 2898 }, { "epoch": 0.19805971168955386, "grad_norm": 3.2766075134277344, "learning_rate": 6.494903072979458e-06, "loss": 0.436, "step": 2899 }, { "epoch": 0.19812803170048507, "grad_norm": 4.5165276527404785, "learning_rate": 6.494502200323027e-06, "loss": 0.5478, "step": 2900 }, { "epoch": 0.19819635171141628, "grad_norm": 4.310739040374756, "learning_rate": 6.494101181033766e-06, "loss": 0.4706, "step": 2901 }, { "epoch": 0.19826467172234746, "grad_norm": 4.373380184173584, "learning_rate": 6.493700015131313e-06, "loss": 0.3651, "step": 2902 }, { "epoch": 0.19833299173327867, "grad_norm": 4.977863788604736, "learning_rate": 6.49329870263531e-06, "loss": 0.4617, "step": 2903 }, { "epoch": 0.19840131174420989, "grad_norm": 5.222811222076416, "learning_rate": 6.49289724356541e-06, "loss": 0.4359, "step": 2904 }, { "epoch": 0.19846963175514107, "grad_norm": 3.7168025970458984, "learning_rate": 6.4924956379412715e-06, "loss": 0.3873, "step": 2905 }, { "epoch": 0.19853795176607228, "grad_norm": 4.87352180480957, "learning_rate": 6.492093885782558e-06, "loss": 0.4778, "step": 2906 }, { "epoch": 0.1986062717770035, "grad_norm": 4.088132381439209, "learning_rate": 6.491691987108944e-06, "loss": 0.4012, "step": 2907 }, { "epoch": 0.19867459178793467, "grad_norm": 4.371504783630371, "learning_rate": 6.49128994194011e-06, "loss": 0.4171, "step": 2908 }, { "epoch": 0.19874291179886588, "grad_norm": 3.6336405277252197, "learning_rate": 6.490887750295741e-06, "loss": 0.3491, "step": 2909 }, { "epoch": 0.1988112318097971, "grad_norm": 3.7703475952148438, "learning_rate": 6.490485412195533e-06, "loss": 0.3446, "step": 2910 }, { "epoch": 0.19887955182072828, "grad_norm": 3.8445518016815186, "learning_rate": 6.490082927659188e-06, "loss": 0.3172, "step": 2911 }, { "epoch": 0.1989478718316595, "grad_norm": 4.617082595825195, "learning_rate": 6.4896802967064115e-06, "loss": 0.3946, "step": 2912 }, { "epoch": 0.1990161918425907, "grad_norm": 4.335381031036377, "learning_rate": 6.489277519356922e-06, "loss": 0.4662, "step": 2913 }, { "epoch": 0.19908451185352188, "grad_norm": 4.256378650665283, "learning_rate": 6.488874595630442e-06, "loss": 0.3492, "step": 2914 }, { "epoch": 0.1991528318644531, "grad_norm": 5.357242107391357, "learning_rate": 6.4884715255467e-06, "loss": 0.4381, "step": 2915 }, { "epoch": 0.1992211518753843, "grad_norm": 3.991042137145996, "learning_rate": 6.488068309125435e-06, "loss": 0.4586, "step": 2916 }, { "epoch": 0.1992894718863155, "grad_norm": 8.27412223815918, "learning_rate": 6.487664946386391e-06, "loss": 0.4081, "step": 2917 }, { "epoch": 0.1993577918972467, "grad_norm": 50.82715606689453, "learning_rate": 6.487261437349319e-06, "loss": 0.5168, "step": 2918 }, { "epoch": 0.1994261119081779, "grad_norm": 7.511081695556641, "learning_rate": 6.48685778203398e-06, "loss": 0.498, "step": 2919 }, { "epoch": 0.1994944319191091, "grad_norm": 5.284734725952148, "learning_rate": 6.486453980460137e-06, "loss": 0.4196, "step": 2920 }, { "epoch": 0.1995627519300403, "grad_norm": 5.059754371643066, "learning_rate": 6.486050032647564e-06, "loss": 0.4212, "step": 2921 }, { "epoch": 0.19963107194097152, "grad_norm": 4.710303783416748, "learning_rate": 6.485645938616042e-06, "loss": 0.4053, "step": 2922 }, { "epoch": 0.1996993919519027, "grad_norm": 4.472461223602295, "learning_rate": 6.485241698385358e-06, "loss": 0.4047, "step": 2923 }, { "epoch": 0.1997677119628339, "grad_norm": 5.302112579345703, "learning_rate": 6.4848373119753064e-06, "loss": 0.5721, "step": 2924 }, { "epoch": 0.19983603197376512, "grad_norm": 10.412224769592285, "learning_rate": 6.484432779405689e-06, "loss": 0.4851, "step": 2925 }, { "epoch": 0.1999043519846963, "grad_norm": 6.245748043060303, "learning_rate": 6.484028100696315e-06, "loss": 0.5586, "step": 2926 }, { "epoch": 0.19997267199562752, "grad_norm": 7.7943315505981445, "learning_rate": 6.483623275867001e-06, "loss": 0.5014, "step": 2927 }, { "epoch": 0.20004099200655873, "grad_norm": 5.423660755157471, "learning_rate": 6.483218304937568e-06, "loss": 0.4855, "step": 2928 }, { "epoch": 0.2001093120174899, "grad_norm": 4.204268932342529, "learning_rate": 6.482813187927849e-06, "loss": 0.4134, "step": 2929 }, { "epoch": 0.20017763202842112, "grad_norm": 4.480395317077637, "learning_rate": 6.482407924857679e-06, "loss": 0.4856, "step": 2930 }, { "epoch": 0.20024595203935233, "grad_norm": 4.4528093338012695, "learning_rate": 6.482002515746905e-06, "loss": 0.4496, "step": 2931 }, { "epoch": 0.20031427205028352, "grad_norm": 4.386806964874268, "learning_rate": 6.481596960615377e-06, "loss": 0.3489, "step": 2932 }, { "epoch": 0.20038259206121473, "grad_norm": 4.188319683074951, "learning_rate": 6.481191259482955e-06, "loss": 0.3328, "step": 2933 }, { "epoch": 0.20045091207214594, "grad_norm": 6.652598857879639, "learning_rate": 6.480785412369505e-06, "loss": 0.3646, "step": 2934 }, { "epoch": 0.20051923208307712, "grad_norm": 3.791053295135498, "learning_rate": 6.4803794192949e-06, "loss": 0.417, "step": 2935 }, { "epoch": 0.20058755209400833, "grad_norm": 4.697190284729004, "learning_rate": 6.47997328027902e-06, "loss": 0.5206, "step": 2936 }, { "epoch": 0.20065587210493954, "grad_norm": 4.617077350616455, "learning_rate": 6.479566995341754e-06, "loss": 0.4393, "step": 2937 }, { "epoch": 0.20072419211587073, "grad_norm": 3.7817325592041016, "learning_rate": 6.479160564502995e-06, "loss": 0.2762, "step": 2938 }, { "epoch": 0.20079251212680194, "grad_norm": 5.866036415100098, "learning_rate": 6.4787539877826455e-06, "loss": 0.5963, "step": 2939 }, { "epoch": 0.20086083213773315, "grad_norm": 4.496635913848877, "learning_rate": 6.478347265200614e-06, "loss": 0.3522, "step": 2940 }, { "epoch": 0.20092915214866433, "grad_norm": 4.701753616333008, "learning_rate": 6.477940396776819e-06, "loss": 0.4848, "step": 2941 }, { "epoch": 0.20099747215959554, "grad_norm": 3.9057188034057617, "learning_rate": 6.47753338253118e-06, "loss": 0.3909, "step": 2942 }, { "epoch": 0.20106579217052675, "grad_norm": 4.725513458251953, "learning_rate": 6.47712622248363e-06, "loss": 0.4475, "step": 2943 }, { "epoch": 0.20113411218145794, "grad_norm": 5.007720470428467, "learning_rate": 6.476718916654106e-06, "loss": 0.451, "step": 2944 }, { "epoch": 0.20120243219238915, "grad_norm": 5.508209228515625, "learning_rate": 6.476311465062553e-06, "loss": 0.4341, "step": 2945 }, { "epoch": 0.20127075220332036, "grad_norm": 4.137303352355957, "learning_rate": 6.475903867728922e-06, "loss": 0.3994, "step": 2946 }, { "epoch": 0.20133907221425154, "grad_norm": 4.655981063842773, "learning_rate": 6.475496124673172e-06, "loss": 0.4312, "step": 2947 }, { "epoch": 0.20140739222518275, "grad_norm": 4.1125168800354, "learning_rate": 6.47508823591527e-06, "loss": 0.4027, "step": 2948 }, { "epoch": 0.20147571223611396, "grad_norm": 4.005348205566406, "learning_rate": 6.4746802014751885e-06, "loss": 0.4704, "step": 2949 }, { "epoch": 0.20154403224704515, "grad_norm": 3.4038515090942383, "learning_rate": 6.474272021372907e-06, "loss": 0.3476, "step": 2950 }, { "epoch": 0.20161235225797636, "grad_norm": 4.164205551147461, "learning_rate": 6.4738636956284155e-06, "loss": 0.4354, "step": 2951 }, { "epoch": 0.20168067226890757, "grad_norm": 6.320474624633789, "learning_rate": 6.473455224261707e-06, "loss": 0.4869, "step": 2952 }, { "epoch": 0.20174899227983875, "grad_norm": 3.968693971633911, "learning_rate": 6.473046607292783e-06, "loss": 0.4302, "step": 2953 }, { "epoch": 0.20181731229076996, "grad_norm": 3.1476547718048096, "learning_rate": 6.472637844741654e-06, "loss": 0.3812, "step": 2954 }, { "epoch": 0.20188563230170117, "grad_norm": 5.052080154418945, "learning_rate": 6.472228936628333e-06, "loss": 0.422, "step": 2955 }, { "epoch": 0.20195395231263236, "grad_norm": 4.1615824699401855, "learning_rate": 6.471819882972846e-06, "loss": 0.4544, "step": 2956 }, { "epoch": 0.20202227232356357, "grad_norm": 5.763993263244629, "learning_rate": 6.471410683795222e-06, "loss": 0.4505, "step": 2957 }, { "epoch": 0.20209059233449478, "grad_norm": 3.6522538661956787, "learning_rate": 6.471001339115498e-06, "loss": 0.3757, "step": 2958 }, { "epoch": 0.20215891234542596, "grad_norm": 4.8794426918029785, "learning_rate": 6.4705918489537206e-06, "loss": 0.4583, "step": 2959 }, { "epoch": 0.20222723235635717, "grad_norm": 5.236362934112549, "learning_rate": 6.470182213329939e-06, "loss": 0.4294, "step": 2960 }, { "epoch": 0.20229555236728838, "grad_norm": 4.500513553619385, "learning_rate": 6.4697724322642135e-06, "loss": 0.4068, "step": 2961 }, { "epoch": 0.20236387237821957, "grad_norm": 5.198691368103027, "learning_rate": 6.469362505776609e-06, "loss": 0.4911, "step": 2962 }, { "epoch": 0.20243219238915078, "grad_norm": 4.453947067260742, "learning_rate": 6.468952433887199e-06, "loss": 0.5683, "step": 2963 }, { "epoch": 0.202500512400082, "grad_norm": 5.975510120391846, "learning_rate": 6.468542216616064e-06, "loss": 0.4996, "step": 2964 }, { "epoch": 0.20256883241101317, "grad_norm": 3.057849407196045, "learning_rate": 6.468131853983291e-06, "loss": 0.3095, "step": 2965 }, { "epoch": 0.20263715242194438, "grad_norm": 5.9467997550964355, "learning_rate": 6.467721346008974e-06, "loss": 0.4334, "step": 2966 }, { "epoch": 0.2027054724328756, "grad_norm": 4.917690277099609, "learning_rate": 6.4673106927132154e-06, "loss": 0.4947, "step": 2967 }, { "epoch": 0.20277379244380678, "grad_norm": 3.675987720489502, "learning_rate": 6.466899894116123e-06, "loss": 0.3787, "step": 2968 }, { "epoch": 0.202842112454738, "grad_norm": 5.481696605682373, "learning_rate": 6.466488950237811e-06, "loss": 0.488, "step": 2969 }, { "epoch": 0.2029104324656692, "grad_norm": 4.056066513061523, "learning_rate": 6.4660778610984065e-06, "loss": 0.3086, "step": 2970 }, { "epoch": 0.20297875247660038, "grad_norm": 4.600196361541748, "learning_rate": 6.4656666267180345e-06, "loss": 0.537, "step": 2971 }, { "epoch": 0.2030470724875316, "grad_norm": 4.607511520385742, "learning_rate": 6.465255247116835e-06, "loss": 0.5255, "step": 2972 }, { "epoch": 0.2031153924984628, "grad_norm": 4.436085224151611, "learning_rate": 6.464843722314953e-06, "loss": 0.4063, "step": 2973 }, { "epoch": 0.203183712509394, "grad_norm": 5.2205986976623535, "learning_rate": 6.464432052332537e-06, "loss": 0.4405, "step": 2974 }, { "epoch": 0.2032520325203252, "grad_norm": 4.029919147491455, "learning_rate": 6.464020237189746e-06, "loss": 0.2969, "step": 2975 }, { "epoch": 0.2033203525312564, "grad_norm": 4.689297676086426, "learning_rate": 6.463608276906747e-06, "loss": 0.4208, "step": 2976 }, { "epoch": 0.2033886725421876, "grad_norm": 3.120568037033081, "learning_rate": 6.463196171503713e-06, "loss": 0.3316, "step": 2977 }, { "epoch": 0.2034569925531188, "grad_norm": 4.635097026824951, "learning_rate": 6.462783921000821e-06, "loss": 0.4818, "step": 2978 }, { "epoch": 0.20352531256405001, "grad_norm": 4.7907867431640625, "learning_rate": 6.462371525418261e-06, "loss": 0.4159, "step": 2979 }, { "epoch": 0.2035936325749812, "grad_norm": 4.9839348793029785, "learning_rate": 6.461958984776223e-06, "loss": 0.3743, "step": 2980 }, { "epoch": 0.2036619525859124, "grad_norm": 3.8378100395202637, "learning_rate": 6.461546299094912e-06, "loss": 0.3855, "step": 2981 }, { "epoch": 0.20373027259684362, "grad_norm": 4.471879482269287, "learning_rate": 6.461133468394533e-06, "loss": 0.4319, "step": 2982 }, { "epoch": 0.20379859260777483, "grad_norm": 4.386713981628418, "learning_rate": 6.460720492695304e-06, "loss": 0.5015, "step": 2983 }, { "epoch": 0.20386691261870601, "grad_norm": 4.305727481842041, "learning_rate": 6.460307372017446e-06, "loss": 0.3036, "step": 2984 }, { "epoch": 0.20393523262963723, "grad_norm": 5.011876106262207, "learning_rate": 6.459894106381187e-06, "loss": 0.4785, "step": 2985 }, { "epoch": 0.20400355264056844, "grad_norm": 3.9574599266052246, "learning_rate": 6.4594806958067655e-06, "loss": 0.3981, "step": 2986 }, { "epoch": 0.20407187265149962, "grad_norm": 4.020018100738525, "learning_rate": 6.459067140314425e-06, "loss": 0.3711, "step": 2987 }, { "epoch": 0.20414019266243083, "grad_norm": 4.338308334350586, "learning_rate": 6.458653439924416e-06, "loss": 0.5084, "step": 2988 }, { "epoch": 0.20420851267336204, "grad_norm": 5.938326358795166, "learning_rate": 6.458239594656996e-06, "loss": 0.4648, "step": 2989 }, { "epoch": 0.20427683268429322, "grad_norm": 4.438140392303467, "learning_rate": 6.457825604532429e-06, "loss": 0.4898, "step": 2990 }, { "epoch": 0.20434515269522444, "grad_norm": 4.454181671142578, "learning_rate": 6.4574114695709885e-06, "loss": 0.4147, "step": 2991 }, { "epoch": 0.20441347270615565, "grad_norm": 4.454991817474365, "learning_rate": 6.456997189792953e-06, "loss": 0.4514, "step": 2992 }, { "epoch": 0.20448179271708683, "grad_norm": 3.6470470428466797, "learning_rate": 6.456582765218608e-06, "loss": 0.3115, "step": 2993 }, { "epoch": 0.20455011272801804, "grad_norm": 4.051044940948486, "learning_rate": 6.456168195868249e-06, "loss": 0.381, "step": 2994 }, { "epoch": 0.20461843273894925, "grad_norm": 4.891106605529785, "learning_rate": 6.455753481762174e-06, "loss": 0.3837, "step": 2995 }, { "epoch": 0.20468675274988044, "grad_norm": 4.5653815269470215, "learning_rate": 6.4553386229206905e-06, "loss": 0.4298, "step": 2996 }, { "epoch": 0.20475507276081165, "grad_norm": 4.583090305328369, "learning_rate": 6.454923619364116e-06, "loss": 0.4421, "step": 2997 }, { "epoch": 0.20482339277174286, "grad_norm": 4.166491508483887, "learning_rate": 6.454508471112769e-06, "loss": 0.3696, "step": 2998 }, { "epoch": 0.20489171278267404, "grad_norm": 4.540464878082275, "learning_rate": 6.454093178186978e-06, "loss": 0.3957, "step": 2999 }, { "epoch": 0.20496003279360525, "grad_norm": 5.06810998916626, "learning_rate": 6.45367774060708e-06, "loss": 0.4027, "step": 3000 }, { "epoch": 0.20502835280453646, "grad_norm": 4.404561996459961, "learning_rate": 6.4532621583934195e-06, "loss": 0.42, "step": 3001 }, { "epoch": 0.20509667281546765, "grad_norm": 3.9362080097198486, "learning_rate": 6.452846431566344e-06, "loss": 0.4879, "step": 3002 }, { "epoch": 0.20516499282639886, "grad_norm": 5.0087890625, "learning_rate": 6.452430560146212e-06, "loss": 0.4724, "step": 3003 }, { "epoch": 0.20523331283733007, "grad_norm": 4.540518760681152, "learning_rate": 6.4520145441533854e-06, "loss": 0.3601, "step": 3004 }, { "epoch": 0.20530163284826125, "grad_norm": 4.35433292388916, "learning_rate": 6.451598383608238e-06, "loss": 0.3906, "step": 3005 }, { "epoch": 0.20536995285919246, "grad_norm": 6.393198490142822, "learning_rate": 6.451182078531147e-06, "loss": 0.562, "step": 3006 }, { "epoch": 0.20543827287012367, "grad_norm": 4.316501617431641, "learning_rate": 6.450765628942497e-06, "loss": 0.3705, "step": 3007 }, { "epoch": 0.20550659288105486, "grad_norm": 4.903007984161377, "learning_rate": 6.450349034862682e-06, "loss": 0.456, "step": 3008 }, { "epoch": 0.20557491289198607, "grad_norm": 4.621607303619385, "learning_rate": 6.449932296312101e-06, "loss": 0.4161, "step": 3009 }, { "epoch": 0.20564323290291728, "grad_norm": 5.843926906585693, "learning_rate": 6.44951541331116e-06, "loss": 0.4441, "step": 3010 }, { "epoch": 0.20571155291384846, "grad_norm": 3.8454384803771973, "learning_rate": 6.449098385880272e-06, "loss": 0.4008, "step": 3011 }, { "epoch": 0.20577987292477967, "grad_norm": 4.396502494812012, "learning_rate": 6.44868121403986e-06, "loss": 0.4232, "step": 3012 }, { "epoch": 0.20584819293571088, "grad_norm": 3.2202494144439697, "learning_rate": 6.44826389781035e-06, "loss": 0.3617, "step": 3013 }, { "epoch": 0.20591651294664207, "grad_norm": 4.718555450439453, "learning_rate": 6.447846437212178e-06, "loss": 0.4994, "step": 3014 }, { "epoch": 0.20598483295757328, "grad_norm": 4.10580587387085, "learning_rate": 6.447428832265785e-06, "loss": 0.4513, "step": 3015 }, { "epoch": 0.2060531529685045, "grad_norm": 3.4912970066070557, "learning_rate": 6.447011082991619e-06, "loss": 0.3973, "step": 3016 }, { "epoch": 0.20612147297943567, "grad_norm": 4.646996021270752, "learning_rate": 6.446593189410139e-06, "loss": 0.4765, "step": 3017 }, { "epoch": 0.20618979299036688, "grad_norm": 4.436005115509033, "learning_rate": 6.446175151541807e-06, "loss": 0.4532, "step": 3018 }, { "epoch": 0.2062581130012981, "grad_norm": 4.649001121520996, "learning_rate": 6.445756969407091e-06, "loss": 0.418, "step": 3019 }, { "epoch": 0.20632643301222928, "grad_norm": 3.9266366958618164, "learning_rate": 6.445338643026471e-06, "loss": 0.392, "step": 3020 }, { "epoch": 0.2063947530231605, "grad_norm": 4.45050048828125, "learning_rate": 6.4449201724204305e-06, "loss": 0.6108, "step": 3021 }, { "epoch": 0.2064630730340917, "grad_norm": 4.948290824890137, "learning_rate": 6.44450155760946e-06, "loss": 0.4142, "step": 3022 }, { "epoch": 0.20653139304502288, "grad_norm": 3.361391067504883, "learning_rate": 6.4440827986140586e-06, "loss": 0.401, "step": 3023 }, { "epoch": 0.2065997130559541, "grad_norm": 4.566828727722168, "learning_rate": 6.443663895454733e-06, "loss": 0.4705, "step": 3024 }, { "epoch": 0.2066680330668853, "grad_norm": 3.9856410026550293, "learning_rate": 6.443244848151994e-06, "loss": 0.4339, "step": 3025 }, { "epoch": 0.2067363530778165, "grad_norm": 4.857908725738525, "learning_rate": 6.4428256567263615e-06, "loss": 0.4539, "step": 3026 }, { "epoch": 0.2068046730887477, "grad_norm": 4.6605939865112305, "learning_rate": 6.442406321198363e-06, "loss": 0.3835, "step": 3027 }, { "epoch": 0.2068729930996789, "grad_norm": 4.554666042327881, "learning_rate": 6.4419868415885315e-06, "loss": 0.3828, "step": 3028 }, { "epoch": 0.2069413131106101, "grad_norm": 3.4433999061584473, "learning_rate": 6.441567217917409e-06, "loss": 0.4262, "step": 3029 }, { "epoch": 0.2070096331215413, "grad_norm": 2.745793342590332, "learning_rate": 6.441147450205542e-06, "loss": 0.4258, "step": 3030 }, { "epoch": 0.20707795313247251, "grad_norm": 3.6345362663269043, "learning_rate": 6.440727538473486e-06, "loss": 0.4127, "step": 3031 }, { "epoch": 0.2071462731434037, "grad_norm": 3.121579647064209, "learning_rate": 6.4403074827418025e-06, "loss": 0.3854, "step": 3032 }, { "epoch": 0.2072145931543349, "grad_norm": 4.719638347625732, "learning_rate": 6.439887283031061e-06, "loss": 0.5022, "step": 3033 }, { "epoch": 0.20728291316526612, "grad_norm": 4.087364196777344, "learning_rate": 6.4394669393618385e-06, "loss": 0.3962, "step": 3034 }, { "epoch": 0.2073512331761973, "grad_norm": 3.6348416805267334, "learning_rate": 6.439046451754716e-06, "loss": 0.3219, "step": 3035 }, { "epoch": 0.2074195531871285, "grad_norm": 3.7181570529937744, "learning_rate": 6.438625820230285e-06, "loss": 0.3665, "step": 3036 }, { "epoch": 0.20748787319805972, "grad_norm": 4.647576332092285, "learning_rate": 6.438205044809142e-06, "loss": 0.3867, "step": 3037 }, { "epoch": 0.2075561932089909, "grad_norm": 5.309108734130859, "learning_rate": 6.437784125511893e-06, "loss": 0.5781, "step": 3038 }, { "epoch": 0.20762451321992212, "grad_norm": 5.620753765106201, "learning_rate": 6.4373630623591475e-06, "loss": 0.3686, "step": 3039 }, { "epoch": 0.20769283323085333, "grad_norm": 4.186039924621582, "learning_rate": 6.436941855371524e-06, "loss": 0.4708, "step": 3040 }, { "epoch": 0.2077611532417845, "grad_norm": 3.1229264736175537, "learning_rate": 6.436520504569648e-06, "loss": 0.338, "step": 3041 }, { "epoch": 0.20782947325271572, "grad_norm": 5.355889320373535, "learning_rate": 6.436099009974153e-06, "loss": 0.4731, "step": 3042 }, { "epoch": 0.20789779326364694, "grad_norm": 3.463697910308838, "learning_rate": 6.435677371605677e-06, "loss": 0.3548, "step": 3043 }, { "epoch": 0.20796611327457812, "grad_norm": 3.968454360961914, "learning_rate": 6.435255589484868e-06, "loss": 0.4206, "step": 3044 }, { "epoch": 0.20803443328550933, "grad_norm": 4.113187789916992, "learning_rate": 6.4348336636323775e-06, "loss": 0.4638, "step": 3045 }, { "epoch": 0.20810275329644054, "grad_norm": 4.134106159210205, "learning_rate": 6.434411594068868e-06, "loss": 0.3982, "step": 3046 }, { "epoch": 0.20817107330737172, "grad_norm": 3.667329788208008, "learning_rate": 6.433989380815006e-06, "loss": 0.3596, "step": 3047 }, { "epoch": 0.20823939331830293, "grad_norm": 5.150569438934326, "learning_rate": 6.4335670238914675e-06, "loss": 0.4896, "step": 3048 }, { "epoch": 0.20830771332923415, "grad_norm": 4.828310012817383, "learning_rate": 6.433144523318933e-06, "loss": 0.3883, "step": 3049 }, { "epoch": 0.20837603334016533, "grad_norm": 4.615864276885986, "learning_rate": 6.432721879118091e-06, "loss": 0.4545, "step": 3050 }, { "epoch": 0.20844435335109654, "grad_norm": 4.929495811462402, "learning_rate": 6.432299091309638e-06, "loss": 0.5036, "step": 3051 }, { "epoch": 0.20851267336202775, "grad_norm": 3.2639896869659424, "learning_rate": 6.431876159914277e-06, "loss": 0.3524, "step": 3052 }, { "epoch": 0.20858099337295893, "grad_norm": 4.968373775482178, "learning_rate": 6.431453084952717e-06, "loss": 0.43, "step": 3053 }, { "epoch": 0.20864931338389014, "grad_norm": 3.9610681533813477, "learning_rate": 6.431029866445675e-06, "loss": 0.3332, "step": 3054 }, { "epoch": 0.20871763339482136, "grad_norm": 4.754159927368164, "learning_rate": 6.430606504413875e-06, "loss": 0.4747, "step": 3055 }, { "epoch": 0.20878595340575254, "grad_norm": 4.373126029968262, "learning_rate": 6.430182998878049e-06, "loss": 0.4584, "step": 3056 }, { "epoch": 0.20885427341668375, "grad_norm": 3.8413572311401367, "learning_rate": 6.429759349858934e-06, "loss": 0.4452, "step": 3057 }, { "epoch": 0.20892259342761496, "grad_norm": 5.039614200592041, "learning_rate": 6.429335557377275e-06, "loss": 0.4333, "step": 3058 }, { "epoch": 0.20899091343854614, "grad_norm": 4.693214416503906, "learning_rate": 6.428911621453823e-06, "loss": 0.4663, "step": 3059 }, { "epoch": 0.20905923344947736, "grad_norm": 5.484711647033691, "learning_rate": 6.4284875421093395e-06, "loss": 0.435, "step": 3060 }, { "epoch": 0.20912755346040857, "grad_norm": 3.944084644317627, "learning_rate": 6.4280633193645875e-06, "loss": 0.4149, "step": 3061 }, { "epoch": 0.20919587347133975, "grad_norm": 4.339480876922607, "learning_rate": 6.4276389532403425e-06, "loss": 0.4588, "step": 3062 }, { "epoch": 0.20926419348227096, "grad_norm": 4.543679237365723, "learning_rate": 6.427214443757384e-06, "loss": 0.56, "step": 3063 }, { "epoch": 0.20933251349320217, "grad_norm": 4.753041744232178, "learning_rate": 6.426789790936498e-06, "loss": 0.4819, "step": 3064 }, { "epoch": 0.20940083350413335, "grad_norm": 3.7613298892974854, "learning_rate": 6.42636499479848e-06, "loss": 0.4499, "step": 3065 }, { "epoch": 0.20946915351506457, "grad_norm": 4.029979705810547, "learning_rate": 6.42594005536413e-06, "loss": 0.3854, "step": 3066 }, { "epoch": 0.20953747352599578, "grad_norm": 3.610944986343384, "learning_rate": 6.425514972654257e-06, "loss": 0.3379, "step": 3067 }, { "epoch": 0.20960579353692696, "grad_norm": 4.96393346786499, "learning_rate": 6.425089746689677e-06, "loss": 0.447, "step": 3068 }, { "epoch": 0.20967411354785817, "grad_norm": 5.315513610839844, "learning_rate": 6.424664377491208e-06, "loss": 0.3672, "step": 3069 }, { "epoch": 0.20974243355878938, "grad_norm": 5.211189270019531, "learning_rate": 6.424238865079685e-06, "loss": 0.5155, "step": 3070 }, { "epoch": 0.20981075356972057, "grad_norm": 5.277944087982178, "learning_rate": 6.423813209475941e-06, "loss": 0.3895, "step": 3071 }, { "epoch": 0.20987907358065178, "grad_norm": 3.6454153060913086, "learning_rate": 6.423387410700819e-06, "loss": 0.3659, "step": 3072 }, { "epoch": 0.209947393591583, "grad_norm": 3.2429494857788086, "learning_rate": 6.422961468775171e-06, "loss": 0.2753, "step": 3073 }, { "epoch": 0.21001571360251417, "grad_norm": 4.846439838409424, "learning_rate": 6.422535383719852e-06, "loss": 0.4102, "step": 3074 }, { "epoch": 0.21008403361344538, "grad_norm": 4.672868251800537, "learning_rate": 6.422109155555729e-06, "loss": 0.392, "step": 3075 }, { "epoch": 0.2101523536243766, "grad_norm": 3.191472291946411, "learning_rate": 6.42168278430367e-06, "loss": 0.3125, "step": 3076 }, { "epoch": 0.21022067363530778, "grad_norm": 3.452150583267212, "learning_rate": 6.421256269984556e-06, "loss": 0.3151, "step": 3077 }, { "epoch": 0.210288993646239, "grad_norm": 3.654245138168335, "learning_rate": 6.420829612619271e-06, "loss": 0.4618, "step": 3078 }, { "epoch": 0.2103573136571702, "grad_norm": 3.721095561981201, "learning_rate": 6.420402812228707e-06, "loss": 0.339, "step": 3079 }, { "epoch": 0.21042563366810138, "grad_norm": 5.199950218200684, "learning_rate": 6.419975868833765e-06, "loss": 0.5057, "step": 3080 }, { "epoch": 0.2104939536790326, "grad_norm": 4.314368724822998, "learning_rate": 6.41954878245535e-06, "loss": 0.3825, "step": 3081 }, { "epoch": 0.2105622736899638, "grad_norm": 4.376937389373779, "learning_rate": 6.419121553114374e-06, "loss": 0.4131, "step": 3082 }, { "epoch": 0.21063059370089499, "grad_norm": 4.968967914581299, "learning_rate": 6.41869418083176e-06, "loss": 0.3913, "step": 3083 }, { "epoch": 0.2106989137118262, "grad_norm": 4.175090789794922, "learning_rate": 6.418266665628434e-06, "loss": 0.4279, "step": 3084 }, { "epoch": 0.2107672337227574, "grad_norm": 3.9492015838623047, "learning_rate": 6.417839007525329e-06, "loss": 0.4337, "step": 3085 }, { "epoch": 0.2108355537336886, "grad_norm": 3.960688829421997, "learning_rate": 6.4174112065433885e-06, "loss": 0.3957, "step": 3086 }, { "epoch": 0.2109038737446198, "grad_norm": 4.282503604888916, "learning_rate": 6.416983262703559e-06, "loss": 0.3965, "step": 3087 }, { "epoch": 0.210972193755551, "grad_norm": 3.765491485595703, "learning_rate": 6.416555176026798e-06, "loss": 0.3895, "step": 3088 }, { "epoch": 0.2110405137664822, "grad_norm": 3.5210819244384766, "learning_rate": 6.416126946534065e-06, "loss": 0.2777, "step": 3089 }, { "epoch": 0.2111088337774134, "grad_norm": 4.0995707511901855, "learning_rate": 6.415698574246332e-06, "loss": 0.3704, "step": 3090 }, { "epoch": 0.21117715378834462, "grad_norm": 4.833585739135742, "learning_rate": 6.415270059184573e-06, "loss": 0.5397, "step": 3091 }, { "epoch": 0.2112454737992758, "grad_norm": 4.211092472076416, "learning_rate": 6.414841401369773e-06, "loss": 0.4094, "step": 3092 }, { "epoch": 0.211313793810207, "grad_norm": 5.122683525085449, "learning_rate": 6.4144126008229195e-06, "loss": 0.4924, "step": 3093 }, { "epoch": 0.21138211382113822, "grad_norm": 3.755284309387207, "learning_rate": 6.4139836575650136e-06, "loss": 0.3605, "step": 3094 }, { "epoch": 0.2114504338320694, "grad_norm": 4.212170124053955, "learning_rate": 6.413554571617055e-06, "loss": 0.3355, "step": 3095 }, { "epoch": 0.21151875384300062, "grad_norm": 4.288959980010986, "learning_rate": 6.413125343000061e-06, "loss": 0.4225, "step": 3096 }, { "epoch": 0.21158707385393183, "grad_norm": 4.253032207489014, "learning_rate": 6.412695971735044e-06, "loss": 0.4293, "step": 3097 }, { "epoch": 0.211655393864863, "grad_norm": 4.092264175415039, "learning_rate": 6.412266457843031e-06, "loss": 0.4319, "step": 3098 }, { "epoch": 0.21172371387579422, "grad_norm": 4.125974655151367, "learning_rate": 6.411836801345055e-06, "loss": 0.3836, "step": 3099 }, { "epoch": 0.21179203388672543, "grad_norm": 4.193979740142822, "learning_rate": 6.411407002262154e-06, "loss": 0.3995, "step": 3100 }, { "epoch": 0.21186035389765662, "grad_norm": 3.894838333129883, "learning_rate": 6.4109770606153765e-06, "loss": 0.4756, "step": 3101 }, { "epoch": 0.21192867390858783, "grad_norm": 4.8356828689575195, "learning_rate": 6.410546976425772e-06, "loss": 0.4742, "step": 3102 }, { "epoch": 0.21199699391951904, "grad_norm": 5.085597515106201, "learning_rate": 6.410116749714404e-06, "loss": 0.4564, "step": 3103 }, { "epoch": 0.21206531393045022, "grad_norm": 4.909700870513916, "learning_rate": 6.409686380502338e-06, "loss": 0.4815, "step": 3104 }, { "epoch": 0.21213363394138143, "grad_norm": 4.586737155914307, "learning_rate": 6.409255868810646e-06, "loss": 0.4097, "step": 3105 }, { "epoch": 0.21220195395231264, "grad_norm": 3.3413991928100586, "learning_rate": 6.4088252146604135e-06, "loss": 0.4273, "step": 3106 }, { "epoch": 0.21227027396324383, "grad_norm": 4.731027603149414, "learning_rate": 6.408394418072725e-06, "loss": 0.3354, "step": 3107 }, { "epoch": 0.21233859397417504, "grad_norm": 3.693286180496216, "learning_rate": 6.407963479068676e-06, "loss": 0.363, "step": 3108 }, { "epoch": 0.21240691398510625, "grad_norm": 4.1711859703063965, "learning_rate": 6.40753239766937e-06, "loss": 0.5163, "step": 3109 }, { "epoch": 0.21247523399603743, "grad_norm": 4.360662460327148, "learning_rate": 6.407101173895914e-06, "loss": 0.4284, "step": 3110 }, { "epoch": 0.21254355400696864, "grad_norm": 3.737342357635498, "learning_rate": 6.4066698077694255e-06, "loss": 0.3843, "step": 3111 }, { "epoch": 0.21261187401789985, "grad_norm": 3.0950894355773926, "learning_rate": 6.406238299311027e-06, "loss": 0.4363, "step": 3112 }, { "epoch": 0.21268019402883104, "grad_norm": 3.735063076019287, "learning_rate": 6.405806648541847e-06, "loss": 0.3807, "step": 3113 }, { "epoch": 0.21274851403976225, "grad_norm": 4.793746471405029, "learning_rate": 6.4053748554830236e-06, "loss": 0.4306, "step": 3114 }, { "epoch": 0.21281683405069346, "grad_norm": 4.53726863861084, "learning_rate": 6.4049429201557e-06, "loss": 0.4146, "step": 3115 }, { "epoch": 0.21288515406162464, "grad_norm": 4.774743556976318, "learning_rate": 6.404510842581028e-06, "loss": 0.5075, "step": 3116 }, { "epoch": 0.21295347407255585, "grad_norm": 4.789685249328613, "learning_rate": 6.404078622780164e-06, "loss": 0.4505, "step": 3117 }, { "epoch": 0.21302179408348706, "grad_norm": 4.458197116851807, "learning_rate": 6.403646260774272e-06, "loss": 0.4379, "step": 3118 }, { "epoch": 0.21309011409441825, "grad_norm": 4.561275482177734, "learning_rate": 6.403213756584526e-06, "loss": 0.3749, "step": 3119 }, { "epoch": 0.21315843410534946, "grad_norm": 4.5645928382873535, "learning_rate": 6.402781110232102e-06, "loss": 0.4993, "step": 3120 }, { "epoch": 0.21322675411628067, "grad_norm": 4.474173545837402, "learning_rate": 6.4023483217381875e-06, "loss": 0.3482, "step": 3121 }, { "epoch": 0.21329507412721185, "grad_norm": 3.892472267150879, "learning_rate": 6.401915391123975e-06, "loss": 0.4028, "step": 3122 }, { "epoch": 0.21336339413814306, "grad_norm": 4.430928707122803, "learning_rate": 6.401482318410662e-06, "loss": 0.4691, "step": 3123 }, { "epoch": 0.21343171414907428, "grad_norm": 4.307372570037842, "learning_rate": 6.401049103619458e-06, "loss": 0.3913, "step": 3124 }, { "epoch": 0.21350003416000546, "grad_norm": 4.659814834594727, "learning_rate": 6.400615746771573e-06, "loss": 0.471, "step": 3125 }, { "epoch": 0.21356835417093667, "grad_norm": 3.5795373916625977, "learning_rate": 6.400182247888229e-06, "loss": 0.3702, "step": 3126 }, { "epoch": 0.21363667418186788, "grad_norm": 4.556371688842773, "learning_rate": 6.3997486069906536e-06, "loss": 0.351, "step": 3127 }, { "epoch": 0.21370499419279906, "grad_norm": 3.3681938648223877, "learning_rate": 6.3993148241000815e-06, "loss": 0.3575, "step": 3128 }, { "epoch": 0.21377331420373027, "grad_norm": 3.256626605987549, "learning_rate": 6.398880899237752e-06, "loss": 0.2405, "step": 3129 }, { "epoch": 0.21384163421466149, "grad_norm": 3.9466264247894287, "learning_rate": 6.398446832424915e-06, "loss": 0.4331, "step": 3130 }, { "epoch": 0.21390995422559267, "grad_norm": 4.587176322937012, "learning_rate": 6.398012623682825e-06, "loss": 0.4975, "step": 3131 }, { "epoch": 0.21397827423652388, "grad_norm": 3.6779260635375977, "learning_rate": 6.3975782730327436e-06, "loss": 0.429, "step": 3132 }, { "epoch": 0.2140465942474551, "grad_norm": 4.024048328399658, "learning_rate": 6.397143780495939e-06, "loss": 0.3854, "step": 3133 }, { "epoch": 0.21411491425838627, "grad_norm": 4.588553428649902, "learning_rate": 6.39670914609369e-06, "loss": 0.4961, "step": 3134 }, { "epoch": 0.21418323426931749, "grad_norm": 3.4791481494903564, "learning_rate": 6.396274369847277e-06, "loss": 0.2915, "step": 3135 }, { "epoch": 0.2142515542802487, "grad_norm": 3.646488904953003, "learning_rate": 6.395839451777991e-06, "loss": 0.3562, "step": 3136 }, { "epoch": 0.21431987429117988, "grad_norm": 5.0251264572143555, "learning_rate": 6.395404391907128e-06, "loss": 0.479, "step": 3137 }, { "epoch": 0.2143881943021111, "grad_norm": 5.062012195587158, "learning_rate": 6.394969190255993e-06, "loss": 0.3995, "step": 3138 }, { "epoch": 0.2144565143130423, "grad_norm": 3.815187692642212, "learning_rate": 6.394533846845895e-06, "loss": 0.4361, "step": 3139 }, { "epoch": 0.21452483432397348, "grad_norm": 4.120763778686523, "learning_rate": 6.394098361698153e-06, "loss": 0.4474, "step": 3140 }, { "epoch": 0.2145931543349047, "grad_norm": 3.8514821529388428, "learning_rate": 6.393662734834091e-06, "loss": 0.4164, "step": 3141 }, { "epoch": 0.2146614743458359, "grad_norm": 4.361042022705078, "learning_rate": 6.39322696627504e-06, "loss": 0.3942, "step": 3142 }, { "epoch": 0.2147297943567671, "grad_norm": 5.334843635559082, "learning_rate": 6.392791056042339e-06, "loss": 0.4347, "step": 3143 }, { "epoch": 0.2147981143676983, "grad_norm": 3.2390966415405273, "learning_rate": 6.392355004157334e-06, "loss": 0.3539, "step": 3144 }, { "epoch": 0.2148664343786295, "grad_norm": 4.557446002960205, "learning_rate": 6.391918810641375e-06, "loss": 0.4944, "step": 3145 }, { "epoch": 0.2149347543895607, "grad_norm": 4.374122619628906, "learning_rate": 6.391482475515824e-06, "loss": 0.4428, "step": 3146 }, { "epoch": 0.2150030744004919, "grad_norm": 5.217205047607422, "learning_rate": 6.391045998802046e-06, "loss": 0.4933, "step": 3147 }, { "epoch": 0.21507139441142312, "grad_norm": 3.815187931060791, "learning_rate": 6.390609380521414e-06, "loss": 0.3035, "step": 3148 }, { "epoch": 0.2151397144223543, "grad_norm": 4.48129940032959, "learning_rate": 6.390172620695307e-06, "loss": 0.4799, "step": 3149 }, { "epoch": 0.2152080344332855, "grad_norm": 4.002451419830322, "learning_rate": 6.389735719345115e-06, "loss": 0.42, "step": 3150 }, { "epoch": 0.21527635444421672, "grad_norm": 3.929567575454712, "learning_rate": 6.3892986764922285e-06, "loss": 0.4182, "step": 3151 }, { "epoch": 0.2153446744551479, "grad_norm": 4.114485740661621, "learning_rate": 6.388861492158052e-06, "loss": 0.4177, "step": 3152 }, { "epoch": 0.21541299446607912, "grad_norm": 4.445856094360352, "learning_rate": 6.388424166363989e-06, "loss": 0.4497, "step": 3153 }, { "epoch": 0.21548131447701033, "grad_norm": 5.496957302093506, "learning_rate": 6.387986699131457e-06, "loss": 0.4784, "step": 3154 }, { "epoch": 0.2155496344879415, "grad_norm": 5.318446159362793, "learning_rate": 6.3875490904818764e-06, "loss": 0.4103, "step": 3155 }, { "epoch": 0.21561795449887272, "grad_norm": 4.494206428527832, "learning_rate": 6.387111340436677e-06, "loss": 0.5291, "step": 3156 }, { "epoch": 0.21568627450980393, "grad_norm": 4.372988224029541, "learning_rate": 6.386673449017293e-06, "loss": 0.4305, "step": 3157 }, { "epoch": 0.21575459452073512, "grad_norm": 5.664793014526367, "learning_rate": 6.3862354162451674e-06, "loss": 0.4925, "step": 3158 }, { "epoch": 0.21582291453166633, "grad_norm": 4.385673522949219, "learning_rate": 6.38579724214175e-06, "loss": 0.383, "step": 3159 }, { "epoch": 0.21589123454259754, "grad_norm": 3.8397469520568848, "learning_rate": 6.385358926728495e-06, "loss": 0.4303, "step": 3160 }, { "epoch": 0.21595955455352872, "grad_norm": 4.984703540802002, "learning_rate": 6.384920470026868e-06, "loss": 0.5349, "step": 3161 }, { "epoch": 0.21602787456445993, "grad_norm": 3.4798240661621094, "learning_rate": 6.384481872058338e-06, "loss": 0.4098, "step": 3162 }, { "epoch": 0.21609619457539114, "grad_norm": 4.573818683624268, "learning_rate": 6.384043132844381e-06, "loss": 0.4603, "step": 3163 }, { "epoch": 0.21616451458632233, "grad_norm": 3.9837911128997803, "learning_rate": 6.383604252406483e-06, "loss": 0.4724, "step": 3164 }, { "epoch": 0.21623283459725354, "grad_norm": 4.3353071212768555, "learning_rate": 6.383165230766133e-06, "loss": 0.4335, "step": 3165 }, { "epoch": 0.21630115460818475, "grad_norm": 3.178664207458496, "learning_rate": 6.38272606794483e-06, "loss": 0.3564, "step": 3166 }, { "epoch": 0.21636947461911593, "grad_norm": 4.5875563621521, "learning_rate": 6.382286763964078e-06, "loss": 0.4536, "step": 3167 }, { "epoch": 0.21643779463004714, "grad_norm": 4.365592002868652, "learning_rate": 6.3818473188453875e-06, "loss": 0.3802, "step": 3168 }, { "epoch": 0.21650611464097835, "grad_norm": 3.245680570602417, "learning_rate": 6.38140773261028e-06, "loss": 0.29, "step": 3169 }, { "epoch": 0.21657443465190954, "grad_norm": 4.243463516235352, "learning_rate": 6.3809680052802775e-06, "loss": 0.4593, "step": 3170 }, { "epoch": 0.21664275466284075, "grad_norm": 4.761685371398926, "learning_rate": 6.380528136876914e-06, "loss": 0.4842, "step": 3171 }, { "epoch": 0.21671107467377196, "grad_norm": 3.6572635173797607, "learning_rate": 6.380088127421728e-06, "loss": 0.3405, "step": 3172 }, { "epoch": 0.21677939468470314, "grad_norm": 5.248632431030273, "learning_rate": 6.379647976936267e-06, "loss": 0.3873, "step": 3173 }, { "epoch": 0.21684771469563435, "grad_norm": 3.314363956451416, "learning_rate": 6.379207685442083e-06, "loss": 0.3414, "step": 3174 }, { "epoch": 0.21691603470656556, "grad_norm": 4.284717082977295, "learning_rate": 6.3787672529607344e-06, "loss": 0.3538, "step": 3175 }, { "epoch": 0.21698435471749675, "grad_norm": 5.215823173522949, "learning_rate": 6.378326679513791e-06, "loss": 0.4209, "step": 3176 }, { "epoch": 0.21705267472842796, "grad_norm": 4.6303558349609375, "learning_rate": 6.377885965122825e-06, "loss": 0.3651, "step": 3177 }, { "epoch": 0.21712099473935917, "grad_norm": 4.644762992858887, "learning_rate": 6.377445109809416e-06, "loss": 0.4151, "step": 3178 }, { "epoch": 0.21718931475029035, "grad_norm": 4.539631366729736, "learning_rate": 6.3770041135951525e-06, "loss": 0.4123, "step": 3179 }, { "epoch": 0.21725763476122156, "grad_norm": 4.252823829650879, "learning_rate": 6.376562976501631e-06, "loss": 0.4044, "step": 3180 }, { "epoch": 0.21732595477215277, "grad_norm": 5.252802848815918, "learning_rate": 6.376121698550449e-06, "loss": 0.4624, "step": 3181 }, { "epoch": 0.21739427478308396, "grad_norm": 4.372532844543457, "learning_rate": 6.375680279763217e-06, "loss": 0.4264, "step": 3182 }, { "epoch": 0.21746259479401517, "grad_norm": 2.9504554271698, "learning_rate": 6.37523872016155e-06, "loss": 0.3022, "step": 3183 }, { "epoch": 0.21753091480494638, "grad_norm": 3.7402169704437256, "learning_rate": 6.374797019767069e-06, "loss": 0.4133, "step": 3184 }, { "epoch": 0.21759923481587756, "grad_norm": 4.486865520477295, "learning_rate": 6.374355178601404e-06, "loss": 0.4094, "step": 3185 }, { "epoch": 0.21766755482680877, "grad_norm": 4.143078804016113, "learning_rate": 6.37391319668619e-06, "loss": 0.4374, "step": 3186 }, { "epoch": 0.21773587483773998, "grad_norm": 4.675766944885254, "learning_rate": 6.373471074043069e-06, "loss": 0.3844, "step": 3187 }, { "epoch": 0.21780419484867117, "grad_norm": 3.9790518283843994, "learning_rate": 6.373028810693693e-06, "loss": 0.3919, "step": 3188 }, { "epoch": 0.21787251485960238, "grad_norm": 5.819563865661621, "learning_rate": 6.3725864066597165e-06, "loss": 0.5199, "step": 3189 }, { "epoch": 0.2179408348705336, "grad_norm": 3.2050235271453857, "learning_rate": 6.372143861962804e-06, "loss": 0.3511, "step": 3190 }, { "epoch": 0.21800915488146477, "grad_norm": 4.260687828063965, "learning_rate": 6.3717011766246245e-06, "loss": 0.4668, "step": 3191 }, { "epoch": 0.21807747489239598, "grad_norm": 3.1784615516662598, "learning_rate": 6.371258350666855e-06, "loss": 0.3587, "step": 3192 }, { "epoch": 0.2181457949033272, "grad_norm": 3.584846019744873, "learning_rate": 6.370815384111181e-06, "loss": 0.3365, "step": 3193 }, { "epoch": 0.21821411491425838, "grad_norm": 3.28235125541687, "learning_rate": 6.370372276979293e-06, "loss": 0.4005, "step": 3194 }, { "epoch": 0.2182824349251896, "grad_norm": 3.739851951599121, "learning_rate": 6.369929029292888e-06, "loss": 0.3776, "step": 3195 }, { "epoch": 0.2183507549361208, "grad_norm": 3.67669939994812, "learning_rate": 6.369485641073671e-06, "loss": 0.4202, "step": 3196 }, { "epoch": 0.21841907494705198, "grad_norm": 4.920645236968994, "learning_rate": 6.369042112343354e-06, "loss": 0.5424, "step": 3197 }, { "epoch": 0.2184873949579832, "grad_norm": 4.367238998413086, "learning_rate": 6.368598443123655e-06, "loss": 0.4584, "step": 3198 }, { "epoch": 0.2185557149689144, "grad_norm": 3.8089499473571777, "learning_rate": 6.3681546334362986e-06, "loss": 0.3873, "step": 3199 }, { "epoch": 0.2186240349798456, "grad_norm": 3.0227880477905273, "learning_rate": 6.367710683303019e-06, "loss": 0.3051, "step": 3200 }, { "epoch": 0.2186923549907768, "grad_norm": 4.165598392486572, "learning_rate": 6.367266592745552e-06, "loss": 0.4327, "step": 3201 }, { "epoch": 0.218760675001708, "grad_norm": 3.2131152153015137, "learning_rate": 6.3668223617856475e-06, "loss": 0.3561, "step": 3202 }, { "epoch": 0.2188289950126392, "grad_norm": 2.8333170413970947, "learning_rate": 6.366377990445056e-06, "loss": 0.3471, "step": 3203 }, { "epoch": 0.2188973150235704, "grad_norm": 4.0609893798828125, "learning_rate": 6.365933478745537e-06, "loss": 0.3874, "step": 3204 }, { "epoch": 0.21896563503450162, "grad_norm": 5.875830173492432, "learning_rate": 6.365488826708859e-06, "loss": 0.4053, "step": 3205 }, { "epoch": 0.2190339550454328, "grad_norm": 3.8862061500549316, "learning_rate": 6.365044034356793e-06, "loss": 0.3899, "step": 3206 }, { "epoch": 0.219102275056364, "grad_norm": 3.7596583366394043, "learning_rate": 6.364599101711119e-06, "loss": 0.4012, "step": 3207 }, { "epoch": 0.21917059506729522, "grad_norm": 4.909789085388184, "learning_rate": 6.364154028793627e-06, "loss": 0.4224, "step": 3208 }, { "epoch": 0.2192389150782264, "grad_norm": 4.667140483856201, "learning_rate": 6.36370881562611e-06, "loss": 0.442, "step": 3209 }, { "epoch": 0.21930723508915761, "grad_norm": 4.669923305511475, "learning_rate": 6.363263462230367e-06, "loss": 0.4422, "step": 3210 }, { "epoch": 0.21937555510008883, "grad_norm": 3.9857287406921387, "learning_rate": 6.362817968628209e-06, "loss": 0.3698, "step": 3211 }, { "epoch": 0.21944387511102, "grad_norm": 4.112052917480469, "learning_rate": 6.362372334841448e-06, "loss": 0.4241, "step": 3212 }, { "epoch": 0.21951219512195122, "grad_norm": 3.9845168590545654, "learning_rate": 6.361926560891906e-06, "loss": 0.3698, "step": 3213 }, { "epoch": 0.21958051513288243, "grad_norm": 3.204169750213623, "learning_rate": 6.361480646801412e-06, "loss": 0.3569, "step": 3214 }, { "epoch": 0.21964883514381361, "grad_norm": 4.322516441345215, "learning_rate": 6.361034592591801e-06, "loss": 0.4742, "step": 3215 }, { "epoch": 0.21971715515474483, "grad_norm": 3.485752820968628, "learning_rate": 6.360588398284915e-06, "loss": 0.4227, "step": 3216 }, { "epoch": 0.21978547516567604, "grad_norm": 4.974384307861328, "learning_rate": 6.360142063902603e-06, "loss": 0.4907, "step": 3217 }, { "epoch": 0.21985379517660722, "grad_norm": 2.9150986671447754, "learning_rate": 6.3596955894667205e-06, "loss": 0.2947, "step": 3218 }, { "epoch": 0.21992211518753843, "grad_norm": 3.899526357650757, "learning_rate": 6.3592489749991304e-06, "loss": 0.3731, "step": 3219 }, { "epoch": 0.21999043519846964, "grad_norm": 4.746647834777832, "learning_rate": 6.358802220521704e-06, "loss": 0.407, "step": 3220 }, { "epoch": 0.22005875520940082, "grad_norm": 5.043062686920166, "learning_rate": 6.358355326056314e-06, "loss": 0.419, "step": 3221 }, { "epoch": 0.22012707522033204, "grad_norm": 3.2209970951080322, "learning_rate": 6.357908291624846e-06, "loss": 0.3614, "step": 3222 }, { "epoch": 0.22019539523126325, "grad_norm": 3.411087989807129, "learning_rate": 6.357461117249191e-06, "loss": 0.3818, "step": 3223 }, { "epoch": 0.22026371524219443, "grad_norm": 4.133845806121826, "learning_rate": 6.357013802951244e-06, "loss": 0.5013, "step": 3224 }, { "epoch": 0.22033203525312564, "grad_norm": 4.356076240539551, "learning_rate": 6.3565663487529095e-06, "loss": 0.4285, "step": 3225 }, { "epoch": 0.22040035526405685, "grad_norm": 2.7455129623413086, "learning_rate": 6.3561187546760975e-06, "loss": 0.3378, "step": 3226 }, { "epoch": 0.22046867527498804, "grad_norm": 3.732041358947754, "learning_rate": 6.355671020742725e-06, "loss": 0.3688, "step": 3227 }, { "epoch": 0.22053699528591925, "grad_norm": 4.221344470977783, "learning_rate": 6.35522314697472e-06, "loss": 0.422, "step": 3228 }, { "epoch": 0.22060531529685046, "grad_norm": 4.157024383544922, "learning_rate": 6.35477513339401e-06, "loss": 0.4228, "step": 3229 }, { "epoch": 0.22067363530778164, "grad_norm": 3.4334232807159424, "learning_rate": 6.354326980022536e-06, "loss": 0.3317, "step": 3230 }, { "epoch": 0.22074195531871285, "grad_norm": 3.3487555980682373, "learning_rate": 6.353878686882238e-06, "loss": 0.3427, "step": 3231 }, { "epoch": 0.22081027532964406, "grad_norm": 5.751039028167725, "learning_rate": 6.353430253995072e-06, "loss": 0.4329, "step": 3232 }, { "epoch": 0.22087859534057525, "grad_norm": 3.6502840518951416, "learning_rate": 6.352981681382995e-06, "loss": 0.3659, "step": 3233 }, { "epoch": 0.22094691535150646, "grad_norm": 3.9237735271453857, "learning_rate": 6.352532969067974e-06, "loss": 0.3467, "step": 3234 }, { "epoch": 0.22101523536243767, "grad_norm": 4.947150707244873, "learning_rate": 6.352084117071979e-06, "loss": 0.4319, "step": 3235 }, { "epoch": 0.22108355537336885, "grad_norm": 4.873137950897217, "learning_rate": 6.351635125416991e-06, "loss": 0.3543, "step": 3236 }, { "epoch": 0.22115187538430006, "grad_norm": 4.523248195648193, "learning_rate": 6.351185994124994e-06, "loss": 0.4196, "step": 3237 }, { "epoch": 0.22122019539523127, "grad_norm": 4.364364147186279, "learning_rate": 6.350736723217983e-06, "loss": 0.3384, "step": 3238 }, { "epoch": 0.22128851540616246, "grad_norm": 3.838498592376709, "learning_rate": 6.350287312717954e-06, "loss": 0.4102, "step": 3239 }, { "epoch": 0.22135683541709367, "grad_norm": 5.99211311340332, "learning_rate": 6.349837762646918e-06, "loss": 0.4118, "step": 3240 }, { "epoch": 0.22142515542802488, "grad_norm": 5.025266647338867, "learning_rate": 6.349388073026886e-06, "loss": 0.424, "step": 3241 }, { "epoch": 0.22149347543895606, "grad_norm": 3.8157153129577637, "learning_rate": 6.348938243879877e-06, "loss": 0.2919, "step": 3242 }, { "epoch": 0.22156179544988727, "grad_norm": 3.7554378509521484, "learning_rate": 6.34848827522792e-06, "loss": 0.4778, "step": 3243 }, { "epoch": 0.22163011546081848, "grad_norm": 5.045438766479492, "learning_rate": 6.348038167093048e-06, "loss": 0.3368, "step": 3244 }, { "epoch": 0.22169843547174967, "grad_norm": 3.745042324066162, "learning_rate": 6.3475879194973014e-06, "loss": 0.4296, "step": 3245 }, { "epoch": 0.22176675548268088, "grad_norm": 4.212330341339111, "learning_rate": 6.347137532462728e-06, "loss": 0.3323, "step": 3246 }, { "epoch": 0.2218350754936121, "grad_norm": 5.01473331451416, "learning_rate": 6.346687006011383e-06, "loss": 0.406, "step": 3247 }, { "epoch": 0.22190339550454327, "grad_norm": 4.951469421386719, "learning_rate": 6.346236340165324e-06, "loss": 0.5295, "step": 3248 }, { "epoch": 0.22197171551547448, "grad_norm": 5.637778282165527, "learning_rate": 6.345785534946622e-06, "loss": 0.311, "step": 3249 }, { "epoch": 0.2220400355264057, "grad_norm": 3.8361458778381348, "learning_rate": 6.345334590377351e-06, "loss": 0.4451, "step": 3250 }, { "epoch": 0.22210835553733688, "grad_norm": 4.47965145111084, "learning_rate": 6.344883506479593e-06, "loss": 0.4616, "step": 3251 }, { "epoch": 0.2221766755482681, "grad_norm": 3.224093437194824, "learning_rate": 6.3444322832754366e-06, "loss": 0.4237, "step": 3252 }, { "epoch": 0.2222449955591993, "grad_norm": 3.9195921421051025, "learning_rate": 6.343980920786974e-06, "loss": 0.4452, "step": 3253 }, { "epoch": 0.22231331557013048, "grad_norm": 3.2488629817962646, "learning_rate": 6.343529419036311e-06, "loss": 0.3901, "step": 3254 }, { "epoch": 0.2223816355810617, "grad_norm": 4.442494869232178, "learning_rate": 6.343077778045555e-06, "loss": 0.3341, "step": 3255 }, { "epoch": 0.2224499555919929, "grad_norm": 4.702830791473389, "learning_rate": 6.342625997836822e-06, "loss": 0.3937, "step": 3256 }, { "epoch": 0.2225182756029241, "grad_norm": 4.612091541290283, "learning_rate": 6.342174078432233e-06, "loss": 0.4896, "step": 3257 }, { "epoch": 0.2225865956138553, "grad_norm": 4.680546760559082, "learning_rate": 6.3417220198539195e-06, "loss": 0.4421, "step": 3258 }, { "epoch": 0.2226549156247865, "grad_norm": 4.463124752044678, "learning_rate": 6.341269822124016e-06, "loss": 0.3659, "step": 3259 }, { "epoch": 0.2227232356357177, "grad_norm": 4.101335048675537, "learning_rate": 6.340817485264666e-06, "loss": 0.4121, "step": 3260 }, { "epoch": 0.2227915556466489, "grad_norm": 4.605353832244873, "learning_rate": 6.340365009298019e-06, "loss": 0.4844, "step": 3261 }, { "epoch": 0.22285987565758011, "grad_norm": 3.194064140319824, "learning_rate": 6.339912394246232e-06, "loss": 0.2902, "step": 3262 }, { "epoch": 0.2229281956685113, "grad_norm": 4.575514316558838, "learning_rate": 6.339459640131468e-06, "loss": 0.4314, "step": 3263 }, { "epoch": 0.2229965156794425, "grad_norm": 4.9347004890441895, "learning_rate": 6.3390067469758975e-06, "loss": 0.4544, "step": 3264 }, { "epoch": 0.22306483569037372, "grad_norm": 4.935187816619873, "learning_rate": 6.3385537148016966e-06, "loss": 0.4324, "step": 3265 }, { "epoch": 0.2231331557013049, "grad_norm": 5.006368160247803, "learning_rate": 6.33810054363105e-06, "loss": 0.4235, "step": 3266 }, { "epoch": 0.2232014757122361, "grad_norm": 3.689795970916748, "learning_rate": 6.3376472334861484e-06, "loss": 0.3623, "step": 3267 }, { "epoch": 0.22326979572316732, "grad_norm": 4.426003456115723, "learning_rate": 6.337193784389188e-06, "loss": 0.3676, "step": 3268 }, { "epoch": 0.2233381157340985, "grad_norm": 3.8425705432891846, "learning_rate": 6.336740196362375e-06, "loss": 0.371, "step": 3269 }, { "epoch": 0.22340643574502972, "grad_norm": 4.636287212371826, "learning_rate": 6.336286469427918e-06, "loss": 0.408, "step": 3270 }, { "epoch": 0.22347475575596093, "grad_norm": 4.987454891204834, "learning_rate": 6.335832603608037e-06, "loss": 0.4337, "step": 3271 }, { "epoch": 0.2235430757668921, "grad_norm": 3.9928035736083984, "learning_rate": 6.335378598924955e-06, "loss": 0.3428, "step": 3272 }, { "epoch": 0.22361139577782332, "grad_norm": 4.240739822387695, "learning_rate": 6.334924455400904e-06, "loss": 0.4723, "step": 3273 }, { "epoch": 0.22367971578875454, "grad_norm": 3.8082845211029053, "learning_rate": 6.334470173058123e-06, "loss": 0.3472, "step": 3274 }, { "epoch": 0.22374803579968572, "grad_norm": 3.4322128295898438, "learning_rate": 6.3340157519188545e-06, "loss": 0.3596, "step": 3275 }, { "epoch": 0.22381635581061693, "grad_norm": 3.685605049133301, "learning_rate": 6.333561192005354e-06, "loss": 0.3983, "step": 3276 }, { "epoch": 0.22388467582154814, "grad_norm": 4.191880702972412, "learning_rate": 6.333106493339876e-06, "loss": 0.3263, "step": 3277 }, { "epoch": 0.22395299583247932, "grad_norm": 4.200597286224365, "learning_rate": 6.33265165594469e-06, "loss": 0.4718, "step": 3278 }, { "epoch": 0.22402131584341053, "grad_norm": 4.98560905456543, "learning_rate": 6.3321966798420656e-06, "loss": 0.4934, "step": 3279 }, { "epoch": 0.22408963585434175, "grad_norm": 4.117783546447754, "learning_rate": 6.331741565054282e-06, "loss": 0.3563, "step": 3280 }, { "epoch": 0.22415795586527293, "grad_norm": 4.23923397064209, "learning_rate": 6.331286311603625e-06, "loss": 0.4132, "step": 3281 }, { "epoch": 0.22422627587620414, "grad_norm": 4.491927623748779, "learning_rate": 6.3308309195123885e-06, "loss": 0.3766, "step": 3282 }, { "epoch": 0.22429459588713535, "grad_norm": 4.854093074798584, "learning_rate": 6.33037538880287e-06, "loss": 0.4111, "step": 3283 }, { "epoch": 0.22436291589806653, "grad_norm": 4.610790729522705, "learning_rate": 6.329919719497376e-06, "loss": 0.4787, "step": 3284 }, { "epoch": 0.22443123590899774, "grad_norm": 3.76741099357605, "learning_rate": 6.329463911618222e-06, "loss": 0.3867, "step": 3285 }, { "epoch": 0.22449955591992896, "grad_norm": 5.145551681518555, "learning_rate": 6.329007965187724e-06, "loss": 0.3698, "step": 3286 }, { "epoch": 0.22456787593086014, "grad_norm": 5.041630744934082, "learning_rate": 6.32855188022821e-06, "loss": 0.5632, "step": 3287 }, { "epoch": 0.22463619594179135, "grad_norm": 5.109694004058838, "learning_rate": 6.328095656762013e-06, "loss": 0.3902, "step": 3288 }, { "epoch": 0.22470451595272256, "grad_norm": 3.5079903602600098, "learning_rate": 6.327639294811474e-06, "loss": 0.4015, "step": 3289 }, { "epoch": 0.22477283596365374, "grad_norm": 4.890403747558594, "learning_rate": 6.327182794398939e-06, "loss": 0.4376, "step": 3290 }, { "epoch": 0.22484115597458496, "grad_norm": 3.93333101272583, "learning_rate": 6.326726155546762e-06, "loss": 0.4655, "step": 3291 }, { "epoch": 0.22490947598551617, "grad_norm": 7.119459629058838, "learning_rate": 6.326269378277303e-06, "loss": 0.4334, "step": 3292 }, { "epoch": 0.22497779599644735, "grad_norm": 3.60398530960083, "learning_rate": 6.32581246261293e-06, "loss": 0.3767, "step": 3293 }, { "epoch": 0.22504611600737856, "grad_norm": 3.1255035400390625, "learning_rate": 6.325355408576016e-06, "loss": 0.3432, "step": 3294 }, { "epoch": 0.22511443601830977, "grad_norm": 4.788181304931641, "learning_rate": 6.324898216188942e-06, "loss": 0.371, "step": 3295 }, { "epoch": 0.22518275602924095, "grad_norm": 4.297891616821289, "learning_rate": 6.324440885474095e-06, "loss": 0.43, "step": 3296 }, { "epoch": 0.22525107604017217, "grad_norm": 3.8696327209472656, "learning_rate": 6.3239834164538715e-06, "loss": 0.3465, "step": 3297 }, { "epoch": 0.22531939605110338, "grad_norm": 4.961650371551514, "learning_rate": 6.323525809150669e-06, "loss": 0.4563, "step": 3298 }, { "epoch": 0.22538771606203456, "grad_norm": 4.62445068359375, "learning_rate": 6.323068063586898e-06, "loss": 0.3427, "step": 3299 }, { "epoch": 0.22545603607296577, "grad_norm": 4.014305114746094, "learning_rate": 6.3226101797849725e-06, "loss": 0.3773, "step": 3300 }, { "epoch": 0.22552435608389698, "grad_norm": 3.8785769939422607, "learning_rate": 6.322152157767313e-06, "loss": 0.3322, "step": 3301 }, { "epoch": 0.22559267609482817, "grad_norm": 4.719727039337158, "learning_rate": 6.321693997556348e-06, "loss": 0.4905, "step": 3302 }, { "epoch": 0.22566099610575938, "grad_norm": 4.170672416687012, "learning_rate": 6.321235699174514e-06, "loss": 0.5274, "step": 3303 }, { "epoch": 0.2257293161166906, "grad_norm": 5.199453353881836, "learning_rate": 6.32077726264425e-06, "loss": 0.5315, "step": 3304 }, { "epoch": 0.22579763612762177, "grad_norm": 4.987823486328125, "learning_rate": 6.320318687988006e-06, "loss": 0.4813, "step": 3305 }, { "epoch": 0.22586595613855298, "grad_norm": 4.884703636169434, "learning_rate": 6.319859975228238e-06, "loss": 0.3596, "step": 3306 }, { "epoch": 0.2259342761494842, "grad_norm": 4.600988388061523, "learning_rate": 6.319401124387406e-06, "loss": 0.5305, "step": 3307 }, { "epoch": 0.22600259616041538, "grad_norm": 4.826005458831787, "learning_rate": 6.318942135487979e-06, "loss": 0.3188, "step": 3308 }, { "epoch": 0.2260709161713466, "grad_norm": 5.260870456695557, "learning_rate": 6.318483008552434e-06, "loss": 0.3332, "step": 3309 }, { "epoch": 0.2261392361822778, "grad_norm": 3.910788059234619, "learning_rate": 6.318023743603252e-06, "loss": 0.382, "step": 3310 }, { "epoch": 0.22620755619320898, "grad_norm": 6.566200256347656, "learning_rate": 6.317564340662922e-06, "loss": 0.339, "step": 3311 }, { "epoch": 0.2262758762041402, "grad_norm": 4.961874008178711, "learning_rate": 6.31710479975394e-06, "loss": 0.3019, "step": 3312 }, { "epoch": 0.2263441962150714, "grad_norm": 5.030183792114258, "learning_rate": 6.316645120898809e-06, "loss": 0.4034, "step": 3313 }, { "epoch": 0.22641251622600259, "grad_norm": 3.9841556549072266, "learning_rate": 6.316185304120037e-06, "loss": 0.3385, "step": 3314 }, { "epoch": 0.2264808362369338, "grad_norm": 3.720634698867798, "learning_rate": 6.315725349440141e-06, "loss": 0.5273, "step": 3315 }, { "epoch": 0.226549156247865, "grad_norm": 4.238167762756348, "learning_rate": 6.315265256881643e-06, "loss": 0.4063, "step": 3316 }, { "epoch": 0.2266174762587962, "grad_norm": 4.106662273406982, "learning_rate": 6.314805026467075e-06, "loss": 0.427, "step": 3317 }, { "epoch": 0.2266857962697274, "grad_norm": 3.543501853942871, "learning_rate": 6.31434465821897e-06, "loss": 0.466, "step": 3318 }, { "epoch": 0.2267541162806586, "grad_norm": 3.9074954986572266, "learning_rate": 6.3138841521598715e-06, "loss": 0.3732, "step": 3319 }, { "epoch": 0.2268224362915898, "grad_norm": 3.830449104309082, "learning_rate": 6.313423508312331e-06, "loss": 0.4573, "step": 3320 }, { "epoch": 0.226890756302521, "grad_norm": 3.7304487228393555, "learning_rate": 6.312962726698904e-06, "loss": 0.3665, "step": 3321 }, { "epoch": 0.22695907631345222, "grad_norm": 4.55789852142334, "learning_rate": 6.312501807342154e-06, "loss": 0.4237, "step": 3322 }, { "epoch": 0.2270273963243834, "grad_norm": 5.650750160217285, "learning_rate": 6.31204075026465e-06, "loss": 0.4591, "step": 3323 }, { "epoch": 0.2270957163353146, "grad_norm": 4.180910110473633, "learning_rate": 6.3115795554889704e-06, "loss": 0.4297, "step": 3324 }, { "epoch": 0.22716403634624582, "grad_norm": 5.103305339813232, "learning_rate": 6.311118223037698e-06, "loss": 0.4714, "step": 3325 }, { "epoch": 0.227232356357177, "grad_norm": 5.318322658538818, "learning_rate": 6.310656752933421e-06, "loss": 0.348, "step": 3326 }, { "epoch": 0.22730067636810822, "grad_norm": 3.613396406173706, "learning_rate": 6.31019514519874e-06, "loss": 0.2795, "step": 3327 }, { "epoch": 0.22736899637903943, "grad_norm": 4.717040538787842, "learning_rate": 6.309733399856257e-06, "loss": 0.4085, "step": 3328 }, { "epoch": 0.2274373163899706, "grad_norm": 4.209524154663086, "learning_rate": 6.309271516928582e-06, "loss": 0.4601, "step": 3329 }, { "epoch": 0.22750563640090182, "grad_norm": 4.811774253845215, "learning_rate": 6.308809496438333e-06, "loss": 0.4401, "step": 3330 }, { "epoch": 0.22757395641183303, "grad_norm": 4.24464225769043, "learning_rate": 6.308347338408133e-06, "loss": 0.282, "step": 3331 }, { "epoch": 0.22764227642276422, "grad_norm": 4.792961120605469, "learning_rate": 6.3078850428606125e-06, "loss": 0.3678, "step": 3332 }, { "epoch": 0.22771059643369543, "grad_norm": 3.8974716663360596, "learning_rate": 6.307422609818411e-06, "loss": 0.3578, "step": 3333 }, { "epoch": 0.22777891644462664, "grad_norm": 4.341977119445801, "learning_rate": 6.30696003930417e-06, "loss": 0.4699, "step": 3334 }, { "epoch": 0.22784723645555782, "grad_norm": 3.620496988296509, "learning_rate": 6.306497331340542e-06, "loss": 0.4217, "step": 3335 }, { "epoch": 0.22791555646648903, "grad_norm": 5.192434787750244, "learning_rate": 6.306034485950184e-06, "loss": 0.4308, "step": 3336 }, { "epoch": 0.22798387647742024, "grad_norm": 4.3644328117370605, "learning_rate": 6.30557150315576e-06, "loss": 0.4584, "step": 3337 }, { "epoch": 0.22805219648835143, "grad_norm": 4.012783050537109, "learning_rate": 6.305108382979942e-06, "loss": 0.3938, "step": 3338 }, { "epoch": 0.22812051649928264, "grad_norm": 4.648184776306152, "learning_rate": 6.304645125445406e-06, "loss": 0.4336, "step": 3339 }, { "epoch": 0.22818883651021385, "grad_norm": 4.660030841827393, "learning_rate": 6.30418173057484e-06, "loss": 0.3413, "step": 3340 }, { "epoch": 0.22825715652114503, "grad_norm": 4.7625508308410645, "learning_rate": 6.303718198390931e-06, "loss": 0.4395, "step": 3341 }, { "epoch": 0.22832547653207624, "grad_norm": 3.146226167678833, "learning_rate": 6.30325452891638e-06, "loss": 0.3082, "step": 3342 }, { "epoch": 0.22839379654300745, "grad_norm": 6.46570348739624, "learning_rate": 6.30279072217389e-06, "loss": 0.3119, "step": 3343 }, { "epoch": 0.22846211655393864, "grad_norm": 5.7950968742370605, "learning_rate": 6.302326778186172e-06, "loss": 0.5009, "step": 3344 }, { "epoch": 0.22853043656486985, "grad_norm": 3.856719732284546, "learning_rate": 6.301862696975946e-06, "loss": 0.3664, "step": 3345 }, { "epoch": 0.22859875657580106, "grad_norm": 4.032095909118652, "learning_rate": 6.301398478565936e-06, "loss": 0.408, "step": 3346 }, { "epoch": 0.22866707658673224, "grad_norm": 5.122629642486572, "learning_rate": 6.300934122978873e-06, "loss": 0.4287, "step": 3347 }, { "epoch": 0.22873539659766345, "grad_norm": 4.344850540161133, "learning_rate": 6.3004696302374956e-06, "loss": 0.4502, "step": 3348 }, { "epoch": 0.22880371660859466, "grad_norm": 6.096971035003662, "learning_rate": 6.300005000364549e-06, "loss": 0.3839, "step": 3349 }, { "epoch": 0.22887203661952585, "grad_norm": 5.212522506713867, "learning_rate": 6.2995402333827845e-06, "loss": 0.3662, "step": 3350 }, { "epoch": 0.22894035663045706, "grad_norm": 3.739339590072632, "learning_rate": 6.299075329314961e-06, "loss": 0.4103, "step": 3351 }, { "epoch": 0.22900867664138827, "grad_norm": 3.318789482116699, "learning_rate": 6.298610288183843e-06, "loss": 0.4256, "step": 3352 }, { "epoch": 0.22907699665231945, "grad_norm": 3.860743284225464, "learning_rate": 6.298145110012203e-06, "loss": 0.338, "step": 3353 }, { "epoch": 0.22914531666325066, "grad_norm": 3.487136125564575, "learning_rate": 6.297679794822819e-06, "loss": 0.3043, "step": 3354 }, { "epoch": 0.22921363667418188, "grad_norm": 4.215843677520752, "learning_rate": 6.297214342638477e-06, "loss": 0.4505, "step": 3355 }, { "epoch": 0.22928195668511306, "grad_norm": 4.1380462646484375, "learning_rate": 6.296748753481968e-06, "loss": 0.4399, "step": 3356 }, { "epoch": 0.22935027669604427, "grad_norm": 3.5394468307495117, "learning_rate": 6.296283027376091e-06, "loss": 0.332, "step": 3357 }, { "epoch": 0.22941859670697548, "grad_norm": 3.6264023780822754, "learning_rate": 6.2958171643436505e-06, "loss": 0.3387, "step": 3358 }, { "epoch": 0.22948691671790666, "grad_norm": 5.777950763702393, "learning_rate": 6.29535116440746e-06, "loss": 0.5092, "step": 3359 }, { "epoch": 0.22955523672883787, "grad_norm": 3.5798416137695312, "learning_rate": 6.294885027590339e-06, "loss": 0.4288, "step": 3360 }, { "epoch": 0.22962355673976909, "grad_norm": 14.018732070922852, "learning_rate": 6.294418753915111e-06, "loss": 0.4981, "step": 3361 }, { "epoch": 0.22969187675070027, "grad_norm": 3.5709218978881836, "learning_rate": 6.293952343404609e-06, "loss": 0.2916, "step": 3362 }, { "epoch": 0.22976019676163148, "grad_norm": 3.6861751079559326, "learning_rate": 6.293485796081672e-06, "loss": 0.33, "step": 3363 }, { "epoch": 0.2298285167725627, "grad_norm": 4.381486892700195, "learning_rate": 6.2930191119691456e-06, "loss": 0.3383, "step": 3364 }, { "epoch": 0.22989683678349387, "grad_norm": 4.4346771240234375, "learning_rate": 6.2925522910898824e-06, "loss": 0.3868, "step": 3365 }, { "epoch": 0.22996515679442509, "grad_norm": 5.942524433135986, "learning_rate": 6.292085333466739e-06, "loss": 0.4639, "step": 3366 }, { "epoch": 0.2300334768053563, "grad_norm": 4.72446870803833, "learning_rate": 6.291618239122584e-06, "loss": 0.5229, "step": 3367 }, { "epoch": 0.23010179681628748, "grad_norm": 4.231206893920898, "learning_rate": 6.2911510080802896e-06, "loss": 0.3783, "step": 3368 }, { "epoch": 0.2301701168272187, "grad_norm": 3.7629199028015137, "learning_rate": 6.290683640362734e-06, "loss": 0.4347, "step": 3369 }, { "epoch": 0.2302384368381499, "grad_norm": 3.8796517848968506, "learning_rate": 6.2902161359928015e-06, "loss": 0.4288, "step": 3370 }, { "epoch": 0.23030675684908108, "grad_norm": 3.1466152667999268, "learning_rate": 6.289748494993388e-06, "loss": 0.4571, "step": 3371 }, { "epoch": 0.2303750768600123, "grad_norm": 4.270725727081299, "learning_rate": 6.289280717387389e-06, "loss": 0.3811, "step": 3372 }, { "epoch": 0.2304433968709435, "grad_norm": 4.155753135681152, "learning_rate": 6.288812803197713e-06, "loss": 0.4347, "step": 3373 }, { "epoch": 0.2305117168818747, "grad_norm": 4.0712571144104, "learning_rate": 6.288344752447271e-06, "loss": 0.3569, "step": 3374 }, { "epoch": 0.2305800368928059, "grad_norm": 4.208462238311768, "learning_rate": 6.287876565158983e-06, "loss": 0.4086, "step": 3375 }, { "epoch": 0.2306483569037371, "grad_norm": 4.125338077545166, "learning_rate": 6.287408241355775e-06, "loss": 0.3687, "step": 3376 }, { "epoch": 0.2307166769146683, "grad_norm": 4.491386413574219, "learning_rate": 6.286939781060577e-06, "loss": 0.3871, "step": 3377 }, { "epoch": 0.2307849969255995, "grad_norm": 5.162965774536133, "learning_rate": 6.286471184296332e-06, "loss": 0.4099, "step": 3378 }, { "epoch": 0.23085331693653072, "grad_norm": 5.14743709564209, "learning_rate": 6.286002451085986e-06, "loss": 0.4911, "step": 3379 }, { "epoch": 0.2309216369474619, "grad_norm": 3.932976245880127, "learning_rate": 6.2855335814524875e-06, "loss": 0.4304, "step": 3380 }, { "epoch": 0.2309899569583931, "grad_norm": 3.9273743629455566, "learning_rate": 6.2850645754187995e-06, "loss": 0.3951, "step": 3381 }, { "epoch": 0.23105827696932432, "grad_norm": 3.259467840194702, "learning_rate": 6.2845954330078865e-06, "loss": 0.3224, "step": 3382 }, { "epoch": 0.2311265969802555, "grad_norm": 4.080451488494873, "learning_rate": 6.284126154242722e-06, "loss": 0.4214, "step": 3383 }, { "epoch": 0.23119491699118672, "grad_norm": 4.45844030380249, "learning_rate": 6.283656739146284e-06, "loss": 0.3459, "step": 3384 }, { "epoch": 0.23126323700211793, "grad_norm": 4.108824253082275, "learning_rate": 6.283187187741559e-06, "loss": 0.499, "step": 3385 }, { "epoch": 0.2313315570130491, "grad_norm": 4.8757781982421875, "learning_rate": 6.2827175000515415e-06, "loss": 0.4684, "step": 3386 }, { "epoch": 0.23139987702398032, "grad_norm": 3.277392625808716, "learning_rate": 6.2822476760992285e-06, "loss": 0.3383, "step": 3387 }, { "epoch": 0.23146819703491153, "grad_norm": 5.290621280670166, "learning_rate": 6.281777715907627e-06, "loss": 0.3883, "step": 3388 }, { "epoch": 0.23153651704584272, "grad_norm": 3.7184062004089355, "learning_rate": 6.281307619499749e-06, "loss": 0.405, "step": 3389 }, { "epoch": 0.23160483705677393, "grad_norm": 3.982328414916992, "learning_rate": 6.280837386898615e-06, "loss": 0.2984, "step": 3390 }, { "epoch": 0.23167315706770514, "grad_norm": 6.426968574523926, "learning_rate": 6.280367018127249e-06, "loss": 0.4424, "step": 3391 }, { "epoch": 0.23174147707863632, "grad_norm": 5.492124557495117, "learning_rate": 6.279896513208688e-06, "loss": 0.4436, "step": 3392 }, { "epoch": 0.23180979708956753, "grad_norm": 4.493229866027832, "learning_rate": 6.279425872165966e-06, "loss": 0.426, "step": 3393 }, { "epoch": 0.23187811710049874, "grad_norm": 3.8152449131011963, "learning_rate": 6.278955095022133e-06, "loss": 0.2958, "step": 3394 }, { "epoch": 0.23194643711142993, "grad_norm": 5.310492515563965, "learning_rate": 6.2784841818002405e-06, "loss": 0.4696, "step": 3395 }, { "epoch": 0.23201475712236114, "grad_norm": 4.881303787231445, "learning_rate": 6.278013132523347e-06, "loss": 0.3944, "step": 3396 }, { "epoch": 0.23208307713329235, "grad_norm": 5.325389385223389, "learning_rate": 6.27754194721452e-06, "loss": 0.5512, "step": 3397 }, { "epoch": 0.23215139714422353, "grad_norm": 4.5807108879089355, "learning_rate": 6.27707062589683e-06, "loss": 0.4151, "step": 3398 }, { "epoch": 0.23221971715515474, "grad_norm": 4.275853157043457, "learning_rate": 6.27659916859336e-06, "loss": 0.382, "step": 3399 }, { "epoch": 0.23228803716608595, "grad_norm": 4.071019649505615, "learning_rate": 6.2761275753271924e-06, "loss": 0.3919, "step": 3400 }, { "epoch": 0.23235635717701714, "grad_norm": 6.515214443206787, "learning_rate": 6.2756558461214205e-06, "loss": 0.403, "step": 3401 }, { "epoch": 0.23242467718794835, "grad_norm": 4.264764785766602, "learning_rate": 6.275183980999146e-06, "loss": 0.4152, "step": 3402 }, { "epoch": 0.23249299719887956, "grad_norm": 5.422216415405273, "learning_rate": 6.274711979983473e-06, "loss": 0.4949, "step": 3403 }, { "epoch": 0.23256131720981074, "grad_norm": 3.984680652618408, "learning_rate": 6.274239843097514e-06, "loss": 0.3917, "step": 3404 }, { "epoch": 0.23262963722074195, "grad_norm": 3.713796615600586, "learning_rate": 6.273767570364388e-06, "loss": 0.2909, "step": 3405 }, { "epoch": 0.23269795723167316, "grad_norm": 4.009910583496094, "learning_rate": 6.273295161807223e-06, "loss": 0.4151, "step": 3406 }, { "epoch": 0.23276627724260435, "grad_norm": 3.476820945739746, "learning_rate": 6.272822617449149e-06, "loss": 0.3863, "step": 3407 }, { "epoch": 0.23283459725353556, "grad_norm": 5.0555524826049805, "learning_rate": 6.272349937313308e-06, "loss": 0.4528, "step": 3408 }, { "epoch": 0.23290291726446677, "grad_norm": 4.17169713973999, "learning_rate": 6.271877121422843e-06, "loss": 0.3527, "step": 3409 }, { "epoch": 0.23297123727539795, "grad_norm": 4.22653865814209, "learning_rate": 6.271404169800909e-06, "loss": 0.3796, "step": 3410 }, { "epoch": 0.23303955728632916, "grad_norm": 3.702958106994629, "learning_rate": 6.2709310824706634e-06, "loss": 0.358, "step": 3411 }, { "epoch": 0.23310787729726037, "grad_norm": 4.805278301239014, "learning_rate": 6.270457859455273e-06, "loss": 0.3915, "step": 3412 }, { "epoch": 0.23317619730819156, "grad_norm": 3.4123637676239014, "learning_rate": 6.26998450077791e-06, "loss": 0.3438, "step": 3413 }, { "epoch": 0.23324451731912277, "grad_norm": 4.6784162521362305, "learning_rate": 6.269511006461753e-06, "loss": 0.3908, "step": 3414 }, { "epoch": 0.23331283733005398, "grad_norm": 5.3779520988464355, "learning_rate": 6.269037376529989e-06, "loss": 0.3824, "step": 3415 }, { "epoch": 0.23338115734098516, "grad_norm": 4.547228813171387, "learning_rate": 6.2685636110058105e-06, "loss": 0.4278, "step": 3416 }, { "epoch": 0.23344947735191637, "grad_norm": 3.82902193069458, "learning_rate": 6.268089709912414e-06, "loss": 0.4477, "step": 3417 }, { "epoch": 0.23351779736284758, "grad_norm": 5.272950649261475, "learning_rate": 6.2676156732730075e-06, "loss": 0.5645, "step": 3418 }, { "epoch": 0.23358611737377877, "grad_norm": 4.907961845397949, "learning_rate": 6.267141501110804e-06, "loss": 0.3363, "step": 3419 }, { "epoch": 0.23365443738470998, "grad_norm": 4.177010536193848, "learning_rate": 6.266667193449019e-06, "loss": 0.4616, "step": 3420 }, { "epoch": 0.2337227573956412, "grad_norm": 4.824283599853516, "learning_rate": 6.266192750310883e-06, "loss": 0.4618, "step": 3421 }, { "epoch": 0.23379107740657237, "grad_norm": 4.406660079956055, "learning_rate": 6.265718171719623e-06, "loss": 0.4856, "step": 3422 }, { "epoch": 0.23385939741750358, "grad_norm": 3.2390236854553223, "learning_rate": 6.265243457698482e-06, "loss": 0.3347, "step": 3423 }, { "epoch": 0.2339277174284348, "grad_norm": 3.2527894973754883, "learning_rate": 6.264768608270703e-06, "loss": 0.3313, "step": 3424 }, { "epoch": 0.23399603743936598, "grad_norm": 3.8004753589630127, "learning_rate": 6.264293623459538e-06, "loss": 0.5244, "step": 3425 }, { "epoch": 0.2340643574502972, "grad_norm": 5.316768646240234, "learning_rate": 6.263818503288248e-06, "loss": 0.5278, "step": 3426 }, { "epoch": 0.2341326774612284, "grad_norm": 3.5989248752593994, "learning_rate": 6.2633432477800975e-06, "loss": 0.4076, "step": 3427 }, { "epoch": 0.23420099747215958, "grad_norm": 4.151168346405029, "learning_rate": 6.262867856958357e-06, "loss": 0.4336, "step": 3428 }, { "epoch": 0.2342693174830908, "grad_norm": 4.1713972091674805, "learning_rate": 6.2623923308463065e-06, "loss": 0.3642, "step": 3429 }, { "epoch": 0.234337637494022, "grad_norm": 3.8447060585021973, "learning_rate": 6.261916669467231e-06, "loss": 0.3303, "step": 3430 }, { "epoch": 0.2344059575049532, "grad_norm": 3.3985040187835693, "learning_rate": 6.261440872844423e-06, "loss": 0.3814, "step": 3431 }, { "epoch": 0.2344742775158844, "grad_norm": 3.628612518310547, "learning_rate": 6.2609649410011795e-06, "loss": 0.3651, "step": 3432 }, { "epoch": 0.2345425975268156, "grad_norm": 3.271899700164795, "learning_rate": 6.260488873960808e-06, "loss": 0.3624, "step": 3433 }, { "epoch": 0.2346109175377468, "grad_norm": 3.863971710205078, "learning_rate": 6.2600126717466165e-06, "loss": 0.4395, "step": 3434 }, { "epoch": 0.234679237548678, "grad_norm": 3.7688419818878174, "learning_rate": 6.259536334381927e-06, "loss": 0.3752, "step": 3435 }, { "epoch": 0.23474755755960922, "grad_norm": 3.8876662254333496, "learning_rate": 6.259059861890062e-06, "loss": 0.3458, "step": 3436 }, { "epoch": 0.2348158775705404, "grad_norm": 4.613287448883057, "learning_rate": 6.258583254294355e-06, "loss": 0.4312, "step": 3437 }, { "epoch": 0.2348841975814716, "grad_norm": 3.615429162979126, "learning_rate": 6.258106511618143e-06, "loss": 0.3586, "step": 3438 }, { "epoch": 0.23495251759240282, "grad_norm": 3.23836612701416, "learning_rate": 6.257629633884771e-06, "loss": 0.3073, "step": 3439 }, { "epoch": 0.235020837603334, "grad_norm": 2.9027154445648193, "learning_rate": 6.257152621117591e-06, "loss": 0.3572, "step": 3440 }, { "epoch": 0.23508915761426522, "grad_norm": 3.4998667240142822, "learning_rate": 6.256675473339961e-06, "loss": 0.4141, "step": 3441 }, { "epoch": 0.23515747762519643, "grad_norm": 4.552066326141357, "learning_rate": 6.2561981905752446e-06, "loss": 0.2874, "step": 3442 }, { "epoch": 0.2352257976361276, "grad_norm": 4.744157791137695, "learning_rate": 6.255720772846814e-06, "loss": 0.4718, "step": 3443 }, { "epoch": 0.23529411764705882, "grad_norm": 3.874419689178467, "learning_rate": 6.2552432201780475e-06, "loss": 0.4705, "step": 3444 }, { "epoch": 0.23536243765799003, "grad_norm": 3.9087960720062256, "learning_rate": 6.254765532592328e-06, "loss": 0.4658, "step": 3445 }, { "epoch": 0.23543075766892121, "grad_norm": 3.8066203594207764, "learning_rate": 6.2542877101130485e-06, "loss": 0.4862, "step": 3446 }, { "epoch": 0.23549907767985243, "grad_norm": 4.101160049438477, "learning_rate": 6.253809752763605e-06, "loss": 0.3783, "step": 3447 }, { "epoch": 0.23556739769078364, "grad_norm": 5.844799041748047, "learning_rate": 6.253331660567402e-06, "loss": 0.443, "step": 3448 }, { "epoch": 0.23563571770171482, "grad_norm": 3.3665013313293457, "learning_rate": 6.2528534335478514e-06, "loss": 0.2996, "step": 3449 }, { "epoch": 0.23570403771264603, "grad_norm": 4.233473300933838, "learning_rate": 6.252375071728371e-06, "loss": 0.3347, "step": 3450 }, { "epoch": 0.23577235772357724, "grad_norm": 4.957714557647705, "learning_rate": 6.251896575132384e-06, "loss": 0.4098, "step": 3451 }, { "epoch": 0.23584067773450842, "grad_norm": 4.140905380249023, "learning_rate": 6.25141794378332e-06, "loss": 0.5078, "step": 3452 }, { "epoch": 0.23590899774543964, "grad_norm": 5.100541114807129, "learning_rate": 6.250939177704619e-06, "loss": 0.4821, "step": 3453 }, { "epoch": 0.23597731775637085, "grad_norm": 4.034644603729248, "learning_rate": 6.250460276919723e-06, "loss": 0.4203, "step": 3454 }, { "epoch": 0.23604563776730203, "grad_norm": 3.0493874549865723, "learning_rate": 6.249981241452083e-06, "loss": 0.2715, "step": 3455 }, { "epoch": 0.23611395777823324, "grad_norm": 5.41282844543457, "learning_rate": 6.249502071325156e-06, "loss": 0.4028, "step": 3456 }, { "epoch": 0.23618227778916445, "grad_norm": 4.1049699783325195, "learning_rate": 6.249022766562406e-06, "loss": 0.3272, "step": 3457 }, { "epoch": 0.23625059780009564, "grad_norm": 4.735520362854004, "learning_rate": 6.248543327187303e-06, "loss": 0.3983, "step": 3458 }, { "epoch": 0.23631891781102685, "grad_norm": 3.8110857009887695, "learning_rate": 6.248063753223323e-06, "loss": 0.4049, "step": 3459 }, { "epoch": 0.23638723782195806, "grad_norm": 4.196627140045166, "learning_rate": 6.247584044693952e-06, "loss": 0.385, "step": 3460 }, { "epoch": 0.23645555783288924, "grad_norm": 4.422773361206055, "learning_rate": 6.247104201622678e-06, "loss": 0.3469, "step": 3461 }, { "epoch": 0.23652387784382045, "grad_norm": 4.095890045166016, "learning_rate": 6.246624224032998e-06, "loss": 0.3917, "step": 3462 }, { "epoch": 0.23659219785475166, "grad_norm": 3.371480941772461, "learning_rate": 6.246144111948415e-06, "loss": 0.3543, "step": 3463 }, { "epoch": 0.23666051786568285, "grad_norm": 4.215140342712402, "learning_rate": 6.2456638653924396e-06, "loss": 0.3907, "step": 3464 }, { "epoch": 0.23672883787661406, "grad_norm": 4.281844615936279, "learning_rate": 6.245183484388587e-06, "loss": 0.3692, "step": 3465 }, { "epoch": 0.23679715788754527, "grad_norm": 5.911148548126221, "learning_rate": 6.2447029689603815e-06, "loss": 0.3185, "step": 3466 }, { "epoch": 0.23686547789847645, "grad_norm": 4.323533058166504, "learning_rate": 6.2442223191313516e-06, "loss": 0.477, "step": 3467 }, { "epoch": 0.23693379790940766, "grad_norm": 3.9359962940216064, "learning_rate": 6.243741534925035e-06, "loss": 0.4855, "step": 3468 }, { "epoch": 0.23700211792033887, "grad_norm": 5.022217273712158, "learning_rate": 6.243260616364972e-06, "loss": 0.5083, "step": 3469 }, { "epoch": 0.23707043793127006, "grad_norm": 4.481346607208252, "learning_rate": 6.242779563474715e-06, "loss": 0.357, "step": 3470 }, { "epoch": 0.23713875794220127, "grad_norm": 3.700493097305298, "learning_rate": 6.242298376277816e-06, "loss": 0.3944, "step": 3471 }, { "epoch": 0.23720707795313248, "grad_norm": 5.099846363067627, "learning_rate": 6.241817054797842e-06, "loss": 0.4529, "step": 3472 }, { "epoch": 0.23727539796406366, "grad_norm": 4.036827087402344, "learning_rate": 6.241335599058358e-06, "loss": 0.3948, "step": 3473 }, { "epoch": 0.23734371797499487, "grad_norm": 4.464264869689941, "learning_rate": 6.240854009082942e-06, "loss": 0.3873, "step": 3474 }, { "epoch": 0.23741203798592608, "grad_norm": 4.190308570861816, "learning_rate": 6.240372284895174e-06, "loss": 0.3207, "step": 3475 }, { "epoch": 0.23748035799685727, "grad_norm": 3.570730686187744, "learning_rate": 6.239890426518646e-06, "loss": 0.3884, "step": 3476 }, { "epoch": 0.23754867800778848, "grad_norm": 6.4983229637146, "learning_rate": 6.23940843397695e-06, "loss": 0.3426, "step": 3477 }, { "epoch": 0.2376169980187197, "grad_norm": 4.231215953826904, "learning_rate": 6.238926307293691e-06, "loss": 0.4465, "step": 3478 }, { "epoch": 0.23768531802965087, "grad_norm": 3.754484176635742, "learning_rate": 6.238444046492475e-06, "loss": 0.3717, "step": 3479 }, { "epoch": 0.23775363804058208, "grad_norm": 3.6128249168395996, "learning_rate": 6.237961651596919e-06, "loss": 0.3759, "step": 3480 }, { "epoch": 0.2378219580515133, "grad_norm": 3.677448272705078, "learning_rate": 6.237479122630642e-06, "loss": 0.378, "step": 3481 }, { "epoch": 0.23789027806244448, "grad_norm": 4.033154010772705, "learning_rate": 6.236996459617275e-06, "loss": 0.4709, "step": 3482 }, { "epoch": 0.2379585980733757, "grad_norm": 3.1668498516082764, "learning_rate": 6.23651366258045e-06, "loss": 0.3428, "step": 3483 }, { "epoch": 0.2380269180843069, "grad_norm": 4.373801231384277, "learning_rate": 6.23603073154381e-06, "loss": 0.5374, "step": 3484 }, { "epoch": 0.23809523809523808, "grad_norm": 4.195518493652344, "learning_rate": 6.235547666531004e-06, "loss": 0.5171, "step": 3485 }, { "epoch": 0.2381635581061693, "grad_norm": 3.7408013343811035, "learning_rate": 6.235064467565684e-06, "loss": 0.5031, "step": 3486 }, { "epoch": 0.2382318781171005, "grad_norm": 4.1258039474487305, "learning_rate": 6.234581134671512e-06, "loss": 0.4204, "step": 3487 }, { "epoch": 0.2383001981280317, "grad_norm": 4.583456039428711, "learning_rate": 6.234097667872155e-06, "loss": 0.3781, "step": 3488 }, { "epoch": 0.2383685181389629, "grad_norm": 4.60692834854126, "learning_rate": 6.233614067191288e-06, "loss": 0.5247, "step": 3489 }, { "epoch": 0.2384368381498941, "grad_norm": 3.924229621887207, "learning_rate": 6.2331303326525915e-06, "loss": 0.3395, "step": 3490 }, { "epoch": 0.2385051581608253, "grad_norm": 3.1233415603637695, "learning_rate": 6.2326464642797515e-06, "loss": 0.3248, "step": 3491 }, { "epoch": 0.2385734781717565, "grad_norm": 4.273736953735352, "learning_rate": 6.232162462096463e-06, "loss": 0.4778, "step": 3492 }, { "epoch": 0.23864179818268771, "grad_norm": 3.5961761474609375, "learning_rate": 6.231678326126427e-06, "loss": 0.3587, "step": 3493 }, { "epoch": 0.2387101181936189, "grad_norm": 4.252706050872803, "learning_rate": 6.231194056393349e-06, "loss": 0.3603, "step": 3494 }, { "epoch": 0.2387784382045501, "grad_norm": 3.5260708332061768, "learning_rate": 6.230709652920942e-06, "loss": 0.3242, "step": 3495 }, { "epoch": 0.23884675821548132, "grad_norm": 3.7523210048675537, "learning_rate": 6.230225115732926e-06, "loss": 0.3798, "step": 3496 }, { "epoch": 0.2389150782264125, "grad_norm": 5.472437381744385, "learning_rate": 6.2297404448530295e-06, "loss": 0.3355, "step": 3497 }, { "epoch": 0.2389833982373437, "grad_norm": 3.2301859855651855, "learning_rate": 6.229255640304983e-06, "loss": 0.3747, "step": 3498 }, { "epoch": 0.23905171824827492, "grad_norm": 4.018654823303223, "learning_rate": 6.2287707021125285e-06, "loss": 0.3929, "step": 3499 }, { "epoch": 0.2391200382592061, "grad_norm": 3.1074042320251465, "learning_rate": 6.22828563029941e-06, "loss": 0.3248, "step": 3500 }, { "epoch": 0.23918835827013732, "grad_norm": 3.8129537105560303, "learning_rate": 6.227800424889382e-06, "loss": 0.3064, "step": 3501 }, { "epoch": 0.23925667828106853, "grad_norm": 3.573237180709839, "learning_rate": 6.2273150859062025e-06, "loss": 0.4036, "step": 3502 }, { "epoch": 0.2393249982919997, "grad_norm": 3.930968761444092, "learning_rate": 6.226829613373637e-06, "loss": 0.4147, "step": 3503 }, { "epoch": 0.23939331830293092, "grad_norm": 4.888017654418945, "learning_rate": 6.22634400731546e-06, "loss": 0.3951, "step": 3504 }, { "epoch": 0.23946163831386214, "grad_norm": 4.519711017608643, "learning_rate": 6.225858267755447e-06, "loss": 0.3131, "step": 3505 }, { "epoch": 0.23952995832479332, "grad_norm": 4.283595561981201, "learning_rate": 6.225372394717386e-06, "loss": 0.4519, "step": 3506 }, { "epoch": 0.23959827833572453, "grad_norm": 3.6108763217926025, "learning_rate": 6.224886388225068e-06, "loss": 0.3337, "step": 3507 }, { "epoch": 0.23966659834665574, "grad_norm": 4.885725498199463, "learning_rate": 6.224400248302292e-06, "loss": 0.414, "step": 3508 }, { "epoch": 0.23973491835758692, "grad_norm": 3.73702073097229, "learning_rate": 6.223913974972861e-06, "loss": 0.3866, "step": 3509 }, { "epoch": 0.23980323836851813, "grad_norm": 4.196281433105469, "learning_rate": 6.2234275682605885e-06, "loss": 0.4965, "step": 3510 }, { "epoch": 0.23987155837944935, "grad_norm": 5.209987163543701, "learning_rate": 6.222941028189293e-06, "loss": 0.5081, "step": 3511 }, { "epoch": 0.23993987839038053, "grad_norm": 3.1458590030670166, "learning_rate": 6.222454354782797e-06, "loss": 0.3754, "step": 3512 }, { "epoch": 0.24000819840131174, "grad_norm": 3.853074073791504, "learning_rate": 6.221967548064933e-06, "loss": 0.4355, "step": 3513 }, { "epoch": 0.24007651841224295, "grad_norm": 4.382321357727051, "learning_rate": 6.221480608059538e-06, "loss": 0.3311, "step": 3514 }, { "epoch": 0.24014483842317413, "grad_norm": 4.4778642654418945, "learning_rate": 6.220993534790459e-06, "loss": 0.4535, "step": 3515 }, { "epoch": 0.24021315843410534, "grad_norm": 4.675379753112793, "learning_rate": 6.220506328281541e-06, "loss": 0.4064, "step": 3516 }, { "epoch": 0.24028147844503656, "grad_norm": 4.448306560516357, "learning_rate": 6.220018988556646e-06, "loss": 0.4193, "step": 3517 }, { "epoch": 0.24034979845596774, "grad_norm": 3.7586607933044434, "learning_rate": 6.219531515639636e-06, "loss": 0.413, "step": 3518 }, { "epoch": 0.24041811846689895, "grad_norm": 4.836580753326416, "learning_rate": 6.2190439095543805e-06, "loss": 0.3962, "step": 3519 }, { "epoch": 0.24048643847783016, "grad_norm": 3.802640914916992, "learning_rate": 6.2185561703247585e-06, "loss": 0.285, "step": 3520 }, { "epoch": 0.24055475848876134, "grad_norm": 4.139723777770996, "learning_rate": 6.2180682979746515e-06, "loss": 0.4257, "step": 3521 }, { "epoch": 0.24062307849969256, "grad_norm": 5.202380657196045, "learning_rate": 6.217580292527949e-06, "loss": 0.5203, "step": 3522 }, { "epoch": 0.24069139851062377, "grad_norm": 4.946014404296875, "learning_rate": 6.2170921540085485e-06, "loss": 0.4371, "step": 3523 }, { "epoch": 0.24075971852155498, "grad_norm": 5.119202613830566, "learning_rate": 6.216603882440352e-06, "loss": 0.4294, "step": 3524 }, { "epoch": 0.24082803853248616, "grad_norm": 3.9790642261505127, "learning_rate": 6.216115477847271e-06, "loss": 0.4226, "step": 3525 }, { "epoch": 0.24089635854341737, "grad_norm": 3.701141357421875, "learning_rate": 6.215626940253218e-06, "loss": 0.4405, "step": 3526 }, { "epoch": 0.24096467855434858, "grad_norm": 3.2872891426086426, "learning_rate": 6.215138269682118e-06, "loss": 0.4157, "step": 3527 }, { "epoch": 0.24103299856527977, "grad_norm": 3.724809408187866, "learning_rate": 6.2146494661579e-06, "loss": 0.3798, "step": 3528 }, { "epoch": 0.24110131857621098, "grad_norm": 4.728972911834717, "learning_rate": 6.214160529704497e-06, "loss": 0.4532, "step": 3529 }, { "epoch": 0.2411696385871422, "grad_norm": 4.132839202880859, "learning_rate": 6.213671460345853e-06, "loss": 0.4517, "step": 3530 }, { "epoch": 0.24123795859807337, "grad_norm": 4.269538402557373, "learning_rate": 6.213182258105914e-06, "loss": 0.4282, "step": 3531 }, { "epoch": 0.24130627860900458, "grad_norm": 3.3645100593566895, "learning_rate": 6.212692923008638e-06, "loss": 0.2645, "step": 3532 }, { "epoch": 0.2413745986199358, "grad_norm": 3.9973082542419434, "learning_rate": 6.212203455077985e-06, "loss": 0.3834, "step": 3533 }, { "epoch": 0.24144291863086698, "grad_norm": 4.835588455200195, "learning_rate": 6.211713854337924e-06, "loss": 0.3197, "step": 3534 }, { "epoch": 0.2415112386417982, "grad_norm": 3.322627544403076, "learning_rate": 6.211224120812428e-06, "loss": 0.3178, "step": 3535 }, { "epoch": 0.2415795586527294, "grad_norm": 3.167004346847534, "learning_rate": 6.210734254525478e-06, "loss": 0.3935, "step": 3536 }, { "epoch": 0.24164787866366058, "grad_norm": 3.6907243728637695, "learning_rate": 6.210244255501062e-06, "loss": 0.4237, "step": 3537 }, { "epoch": 0.2417161986745918, "grad_norm": 4.19243860244751, "learning_rate": 6.209754123763174e-06, "loss": 0.4399, "step": 3538 }, { "epoch": 0.241784518685523, "grad_norm": 4.5345988273620605, "learning_rate": 6.209263859335815e-06, "loss": 0.3994, "step": 3539 }, { "epoch": 0.2418528386964542, "grad_norm": 3.5295257568359375, "learning_rate": 6.2087734622429894e-06, "loss": 0.3323, "step": 3540 }, { "epoch": 0.2419211587073854, "grad_norm": 4.306577682495117, "learning_rate": 6.208282932508714e-06, "loss": 0.4198, "step": 3541 }, { "epoch": 0.2419894787183166, "grad_norm": 3.870309352874756, "learning_rate": 6.207792270157007e-06, "loss": 0.3249, "step": 3542 }, { "epoch": 0.2420577987292478, "grad_norm": 3.127749443054199, "learning_rate": 6.207301475211895e-06, "loss": 0.3953, "step": 3543 }, { "epoch": 0.242126118740179, "grad_norm": 4.5946149826049805, "learning_rate": 6.206810547697411e-06, "loss": 0.3387, "step": 3544 }, { "epoch": 0.2421944387511102, "grad_norm": 3.8651342391967773, "learning_rate": 6.206319487637595e-06, "loss": 0.4629, "step": 3545 }, { "epoch": 0.2422627587620414, "grad_norm": 4.151945114135742, "learning_rate": 6.205828295056492e-06, "loss": 0.4233, "step": 3546 }, { "epoch": 0.2423310787729726, "grad_norm": 4.350295543670654, "learning_rate": 6.205336969978156e-06, "loss": 0.4434, "step": 3547 }, { "epoch": 0.24239939878390382, "grad_norm": 4.995729923248291, "learning_rate": 6.204845512426643e-06, "loss": 0.4009, "step": 3548 }, { "epoch": 0.242467718794835, "grad_norm": 4.112821578979492, "learning_rate": 6.20435392242602e-06, "loss": 0.4965, "step": 3549 }, { "epoch": 0.2425360388057662, "grad_norm": 4.625650405883789, "learning_rate": 6.203862200000359e-06, "loss": 0.3021, "step": 3550 }, { "epoch": 0.24260435881669742, "grad_norm": 4.068665981292725, "learning_rate": 6.203370345173738e-06, "loss": 0.4531, "step": 3551 }, { "epoch": 0.2426726788276286, "grad_norm": 4.753964900970459, "learning_rate": 6.202878357970244e-06, "loss": 0.4389, "step": 3552 }, { "epoch": 0.24274099883855982, "grad_norm": 4.126916408538818, "learning_rate": 6.202386238413965e-06, "loss": 0.3703, "step": 3553 }, { "epoch": 0.24280931884949103, "grad_norm": 5.284003734588623, "learning_rate": 6.201893986529e-06, "loss": 0.4555, "step": 3554 }, { "epoch": 0.2428776388604222, "grad_norm": 4.187948703765869, "learning_rate": 6.201401602339453e-06, "loss": 0.4473, "step": 3555 }, { "epoch": 0.24294595887135342, "grad_norm": 2.5471763610839844, "learning_rate": 6.200909085869435e-06, "loss": 0.2834, "step": 3556 }, { "epoch": 0.24301427888228463, "grad_norm": 4.122276782989502, "learning_rate": 6.200416437143065e-06, "loss": 0.4382, "step": 3557 }, { "epoch": 0.24308259889321582, "grad_norm": 3.792492151260376, "learning_rate": 6.199923656184463e-06, "loss": 0.4606, "step": 3558 }, { "epoch": 0.24315091890414703, "grad_norm": 4.411911964416504, "learning_rate": 6.199430743017764e-06, "loss": 0.4571, "step": 3559 }, { "epoch": 0.24321923891507824, "grad_norm": 5.7829084396362305, "learning_rate": 6.1989376976671e-06, "loss": 0.4531, "step": 3560 }, { "epoch": 0.24328755892600942, "grad_norm": 4.632686614990234, "learning_rate": 6.198444520156617e-06, "loss": 0.4888, "step": 3561 }, { "epoch": 0.24335587893694063, "grad_norm": 3.7176647186279297, "learning_rate": 6.197951210510462e-06, "loss": 0.3832, "step": 3562 }, { "epoch": 0.24342419894787184, "grad_norm": 4.649359226226807, "learning_rate": 6.197457768752793e-06, "loss": 0.4172, "step": 3563 }, { "epoch": 0.24349251895880303, "grad_norm": 3.396345853805542, "learning_rate": 6.196964194907773e-06, "loss": 0.3378, "step": 3564 }, { "epoch": 0.24356083896973424, "grad_norm": 3.909330368041992, "learning_rate": 6.1964704889995704e-06, "loss": 0.4494, "step": 3565 }, { "epoch": 0.24362915898066545, "grad_norm": 3.3271589279174805, "learning_rate": 6.19597665105236e-06, "loss": 0.406, "step": 3566 }, { "epoch": 0.24369747899159663, "grad_norm": 3.715888738632202, "learning_rate": 6.195482681090325e-06, "loss": 0.3096, "step": 3567 }, { "epoch": 0.24376579900252784, "grad_norm": 3.715947389602661, "learning_rate": 6.194988579137652e-06, "loss": 0.4005, "step": 3568 }, { "epoch": 0.24383411901345906, "grad_norm": 4.809877872467041, "learning_rate": 6.194494345218538e-06, "loss": 0.4253, "step": 3569 }, { "epoch": 0.24390243902439024, "grad_norm": 4.596024990081787, "learning_rate": 6.193999979357182e-06, "loss": 0.294, "step": 3570 }, { "epoch": 0.24397075903532145, "grad_norm": 4.067285060882568, "learning_rate": 6.193505481577794e-06, "loss": 0.4717, "step": 3571 }, { "epoch": 0.24403907904625266, "grad_norm": 3.8665072917938232, "learning_rate": 6.1930108519045875e-06, "loss": 0.3652, "step": 3572 }, { "epoch": 0.24410739905718384, "grad_norm": 4.448660373687744, "learning_rate": 6.192516090361782e-06, "loss": 0.3695, "step": 3573 }, { "epoch": 0.24417571906811505, "grad_norm": 4.04165506362915, "learning_rate": 6.192021196973606e-06, "loss": 0.3752, "step": 3574 }, { "epoch": 0.24424403907904627, "grad_norm": 3.9784250259399414, "learning_rate": 6.191526171764293e-06, "loss": 0.4601, "step": 3575 }, { "epoch": 0.24431235908997745, "grad_norm": 4.20519495010376, "learning_rate": 6.1910310147580815e-06, "loss": 0.371, "step": 3576 }, { "epoch": 0.24438067910090866, "grad_norm": 3.745805263519287, "learning_rate": 6.19053572597922e-06, "loss": 0.3652, "step": 3577 }, { "epoch": 0.24444899911183987, "grad_norm": 4.137881278991699, "learning_rate": 6.1900403054519615e-06, "loss": 0.402, "step": 3578 }, { "epoch": 0.24451731912277105, "grad_norm": 4.100247383117676, "learning_rate": 6.1895447532005645e-06, "loss": 0.4868, "step": 3579 }, { "epoch": 0.24458563913370227, "grad_norm": 3.2703943252563477, "learning_rate": 6.189049069249295e-06, "loss": 0.4109, "step": 3580 }, { "epoch": 0.24465395914463348, "grad_norm": 5.191071033477783, "learning_rate": 6.188553253622425e-06, "loss": 0.4936, "step": 3581 }, { "epoch": 0.24472227915556466, "grad_norm": 4.080719470977783, "learning_rate": 6.188057306344234e-06, "loss": 0.3973, "step": 3582 }, { "epoch": 0.24479059916649587, "grad_norm": 5.0252180099487305, "learning_rate": 6.187561227439008e-06, "loss": 0.4207, "step": 3583 }, { "epoch": 0.24485891917742708, "grad_norm": 4.047994613647461, "learning_rate": 6.187065016931036e-06, "loss": 0.3631, "step": 3584 }, { "epoch": 0.24492723918835826, "grad_norm": 4.7157816886901855, "learning_rate": 6.186568674844619e-06, "loss": 0.5771, "step": 3585 }, { "epoch": 0.24499555919928948, "grad_norm": 4.447230339050293, "learning_rate": 6.186072201204059e-06, "loss": 0.5445, "step": 3586 }, { "epoch": 0.2450638792102207, "grad_norm": 3.6173994541168213, "learning_rate": 6.185575596033668e-06, "loss": 0.2566, "step": 3587 }, { "epoch": 0.24513219922115187, "grad_norm": 3.733318567276001, "learning_rate": 6.1850788593577655e-06, "loss": 0.4579, "step": 3588 }, { "epoch": 0.24520051923208308, "grad_norm": 3.728710651397705, "learning_rate": 6.184581991200673e-06, "loss": 0.3417, "step": 3589 }, { "epoch": 0.2452688392430143, "grad_norm": 4.44773530960083, "learning_rate": 6.18408499158672e-06, "loss": 0.4026, "step": 3590 }, { "epoch": 0.24533715925394547, "grad_norm": 3.012282133102417, "learning_rate": 6.183587860540246e-06, "loss": 0.4048, "step": 3591 }, { "epoch": 0.24540547926487669, "grad_norm": 4.870153903961182, "learning_rate": 6.183090598085593e-06, "loss": 0.5167, "step": 3592 }, { "epoch": 0.2454737992758079, "grad_norm": 3.629577159881592, "learning_rate": 6.18259320424711e-06, "loss": 0.3327, "step": 3593 }, { "epoch": 0.24554211928673908, "grad_norm": 4.416171550750732, "learning_rate": 6.182095679049154e-06, "loss": 0.4276, "step": 3594 }, { "epoch": 0.2456104392976703, "grad_norm": 4.8898844718933105, "learning_rate": 6.181598022516086e-06, "loss": 0.3743, "step": 3595 }, { "epoch": 0.2456787593086015, "grad_norm": 3.9017157554626465, "learning_rate": 6.181100234672276e-06, "loss": 0.3691, "step": 3596 }, { "epoch": 0.24574707931953269, "grad_norm": 4.0050950050354, "learning_rate": 6.1806023155421e-06, "loss": 0.4005, "step": 3597 }, { "epoch": 0.2458153993304639, "grad_norm": 4.476140975952148, "learning_rate": 6.18010426514994e-06, "loss": 0.4097, "step": 3598 }, { "epoch": 0.2458837193413951, "grad_norm": 4.144796848297119, "learning_rate": 6.179606083520182e-06, "loss": 0.3966, "step": 3599 }, { "epoch": 0.2459520393523263, "grad_norm": 4.192280292510986, "learning_rate": 6.179107770677221e-06, "loss": 0.4179, "step": 3600 }, { "epoch": 0.2460203593632575, "grad_norm": 4.589825630187988, "learning_rate": 6.178609326645461e-06, "loss": 0.4768, "step": 3601 }, { "epoch": 0.2460886793741887, "grad_norm": 3.149574041366577, "learning_rate": 6.178110751449306e-06, "loss": 0.3884, "step": 3602 }, { "epoch": 0.2461569993851199, "grad_norm": 3.758741617202759, "learning_rate": 6.1776120451131716e-06, "loss": 0.3018, "step": 3603 }, { "epoch": 0.2462253193960511, "grad_norm": 3.5278351306915283, "learning_rate": 6.177113207661478e-06, "loss": 0.352, "step": 3604 }, { "epoch": 0.24629363940698232, "grad_norm": 3.387519359588623, "learning_rate": 6.176614239118652e-06, "loss": 0.3522, "step": 3605 }, { "epoch": 0.2463619594179135, "grad_norm": 5.152687072753906, "learning_rate": 6.176115139509126e-06, "loss": 0.3174, "step": 3606 }, { "epoch": 0.2464302794288447, "grad_norm": 4.21945858001709, "learning_rate": 6.175615908857341e-06, "loss": 0.5279, "step": 3607 }, { "epoch": 0.24649859943977592, "grad_norm": 4.8324480056762695, "learning_rate": 6.175116547187742e-06, "loss": 0.3764, "step": 3608 }, { "epoch": 0.2465669194507071, "grad_norm": 4.840660095214844, "learning_rate": 6.174617054524781e-06, "loss": 0.326, "step": 3609 }, { "epoch": 0.24663523946163832, "grad_norm": 4.34480619430542, "learning_rate": 6.174117430892917e-06, "loss": 0.3203, "step": 3610 }, { "epoch": 0.24670355947256953, "grad_norm": 3.5167927742004395, "learning_rate": 6.173617676316617e-06, "loss": 0.4467, "step": 3611 }, { "epoch": 0.2467718794835007, "grad_norm": 3.461552381515503, "learning_rate": 6.17311779082035e-06, "loss": 0.3836, "step": 3612 }, { "epoch": 0.24684019949443192, "grad_norm": 4.754734516143799, "learning_rate": 6.172617774428597e-06, "loss": 0.3117, "step": 3613 }, { "epoch": 0.24690851950536313, "grad_norm": 4.588643550872803, "learning_rate": 6.17211762716584e-06, "loss": 0.4769, "step": 3614 }, { "epoch": 0.24697683951629432, "grad_norm": 5.859267234802246, "learning_rate": 6.1716173490565704e-06, "loss": 0.5394, "step": 3615 }, { "epoch": 0.24704515952722553, "grad_norm": 4.8078532218933105, "learning_rate": 6.171116940125287e-06, "loss": 0.4167, "step": 3616 }, { "epoch": 0.24711347953815674, "grad_norm": 3.733924627304077, "learning_rate": 6.170616400396492e-06, "loss": 0.3761, "step": 3617 }, { "epoch": 0.24718179954908792, "grad_norm": 3.7548580169677734, "learning_rate": 6.170115729894695e-06, "loss": 0.4555, "step": 3618 }, { "epoch": 0.24725011956001913, "grad_norm": 6.519580841064453, "learning_rate": 6.1696149286444135e-06, "loss": 0.436, "step": 3619 }, { "epoch": 0.24731843957095034, "grad_norm": 3.1648104190826416, "learning_rate": 6.16911399667017e-06, "loss": 0.3559, "step": 3620 }, { "epoch": 0.24738675958188153, "grad_norm": 3.4726998805999756, "learning_rate": 6.168612933996495e-06, "loss": 0.3794, "step": 3621 }, { "epoch": 0.24745507959281274, "grad_norm": 4.400402069091797, "learning_rate": 6.168111740647923e-06, "loss": 0.4487, "step": 3622 }, { "epoch": 0.24752339960374395, "grad_norm": 3.20186710357666, "learning_rate": 6.1676104166489955e-06, "loss": 0.3394, "step": 3623 }, { "epoch": 0.24759171961467513, "grad_norm": 3.829467296600342, "learning_rate": 6.167108962024263e-06, "loss": 0.3908, "step": 3624 }, { "epoch": 0.24766003962560634, "grad_norm": 3.6825971603393555, "learning_rate": 6.1666073767982795e-06, "loss": 0.417, "step": 3625 }, { "epoch": 0.24772835963653755, "grad_norm": 3.0245721340179443, "learning_rate": 6.166105660995605e-06, "loss": 0.3007, "step": 3626 }, { "epoch": 0.24779667964746874, "grad_norm": 3.6264142990112305, "learning_rate": 6.165603814640809e-06, "loss": 0.4696, "step": 3627 }, { "epoch": 0.24786499965839995, "grad_norm": 3.6305439472198486, "learning_rate": 6.165101837758464e-06, "loss": 0.3768, "step": 3628 }, { "epoch": 0.24793331966933116, "grad_norm": 3.9296464920043945, "learning_rate": 6.164599730373153e-06, "loss": 0.3241, "step": 3629 }, { "epoch": 0.24800163968026234, "grad_norm": 3.9393727779388428, "learning_rate": 6.164097492509459e-06, "loss": 0.279, "step": 3630 }, { "epoch": 0.24806995969119355, "grad_norm": 3.8839449882507324, "learning_rate": 6.1635951241919794e-06, "loss": 0.3464, "step": 3631 }, { "epoch": 0.24813827970212476, "grad_norm": 3.969799280166626, "learning_rate": 6.163092625445311e-06, "loss": 0.3532, "step": 3632 }, { "epoch": 0.24820659971305595, "grad_norm": 3.9213221073150635, "learning_rate": 6.16258999629406e-06, "loss": 0.3325, "step": 3633 }, { "epoch": 0.24827491972398716, "grad_norm": 3.8744428157806396, "learning_rate": 6.162087236762841e-06, "loss": 0.369, "step": 3634 }, { "epoch": 0.24834323973491837, "grad_norm": 4.12895393371582, "learning_rate": 6.161584346876271e-06, "loss": 0.4414, "step": 3635 }, { "epoch": 0.24841155974584955, "grad_norm": 5.092297077178955, "learning_rate": 6.161081326658976e-06, "loss": 0.4307, "step": 3636 }, { "epoch": 0.24847987975678076, "grad_norm": 3.568612575531006, "learning_rate": 6.160578176135586e-06, "loss": 0.3594, "step": 3637 }, { "epoch": 0.24854819976771197, "grad_norm": 4.257532119750977, "learning_rate": 6.16007489533074e-06, "loss": 0.3836, "step": 3638 }, { "epoch": 0.24861651977864316, "grad_norm": 2.7109837532043457, "learning_rate": 6.1595714842690835e-06, "loss": 0.3428, "step": 3639 }, { "epoch": 0.24868483978957437, "grad_norm": 4.184452533721924, "learning_rate": 6.159067942975265e-06, "loss": 0.3663, "step": 3640 }, { "epoch": 0.24875315980050558, "grad_norm": 4.277373313903809, "learning_rate": 6.158564271473944e-06, "loss": 0.4867, "step": 3641 }, { "epoch": 0.24882147981143676, "grad_norm": 4.7015604972839355, "learning_rate": 6.158060469789781e-06, "loss": 0.4053, "step": 3642 }, { "epoch": 0.24888979982236797, "grad_norm": 3.5337109565734863, "learning_rate": 6.157556537947448e-06, "loss": 0.3237, "step": 3643 }, { "epoch": 0.24895811983329919, "grad_norm": 3.683734893798828, "learning_rate": 6.157052475971622e-06, "loss": 0.3419, "step": 3644 }, { "epoch": 0.24902643984423037, "grad_norm": 4.161667346954346, "learning_rate": 6.156548283886983e-06, "loss": 0.4569, "step": 3645 }, { "epoch": 0.24909475985516158, "grad_norm": 3.596564531326294, "learning_rate": 6.156043961718221e-06, "loss": 0.3857, "step": 3646 }, { "epoch": 0.2491630798660928, "grad_norm": 5.4815449714660645, "learning_rate": 6.155539509490031e-06, "loss": 0.5005, "step": 3647 }, { "epoch": 0.24923139987702397, "grad_norm": 4.649372100830078, "learning_rate": 6.155034927227116e-06, "loss": 0.3477, "step": 3648 }, { "epoch": 0.24929971988795518, "grad_norm": 3.7196054458618164, "learning_rate": 6.154530214954184e-06, "loss": 0.3715, "step": 3649 }, { "epoch": 0.2493680398988864, "grad_norm": 4.083428859710693, "learning_rate": 6.154025372695948e-06, "loss": 0.4664, "step": 3650 }, { "epoch": 0.24943635990981758, "grad_norm": 5.017843246459961, "learning_rate": 6.1535204004771285e-06, "loss": 0.4413, "step": 3651 }, { "epoch": 0.2495046799207488, "grad_norm": 3.0025062561035156, "learning_rate": 6.153015298322455e-06, "loss": 0.3288, "step": 3652 }, { "epoch": 0.24957299993168, "grad_norm": 3.0769705772399902, "learning_rate": 6.152510066256659e-06, "loss": 0.384, "step": 3653 }, { "epoch": 0.24964131994261118, "grad_norm": 4.570688247680664, "learning_rate": 6.152004704304481e-06, "loss": 0.3531, "step": 3654 }, { "epoch": 0.2497096399535424, "grad_norm": 4.792782783508301, "learning_rate": 6.151499212490668e-06, "loss": 0.4146, "step": 3655 }, { "epoch": 0.2497779599644736, "grad_norm": 4.039405345916748, "learning_rate": 6.150993590839971e-06, "loss": 0.3455, "step": 3656 }, { "epoch": 0.2498462799754048, "grad_norm": 3.85941743850708, "learning_rate": 6.150487839377149e-06, "loss": 0.4348, "step": 3657 }, { "epoch": 0.249914599986336, "grad_norm": 4.458270072937012, "learning_rate": 6.14998195812697e-06, "loss": 0.4655, "step": 3658 }, { "epoch": 0.2499829199972672, "grad_norm": 3.4254136085510254, "learning_rate": 6.149475947114202e-06, "loss": 0.3852, "step": 3659 }, { "epoch": 0.2500512400081984, "grad_norm": 4.003978252410889, "learning_rate": 6.148969806363626e-06, "loss": 0.4775, "step": 3660 }, { "epoch": 0.2501195600191296, "grad_norm": 4.073651313781738, "learning_rate": 6.148463535900024e-06, "loss": 0.4854, "step": 3661 }, { "epoch": 0.2501878800300608, "grad_norm": 3.7948782444000244, "learning_rate": 6.14795713574819e-06, "loss": 0.3299, "step": 3662 }, { "epoch": 0.250256200040992, "grad_norm": 3.9336764812469482, "learning_rate": 6.147450605932916e-06, "loss": 0.4332, "step": 3663 }, { "epoch": 0.2503245200519232, "grad_norm": 4.382108211517334, "learning_rate": 6.14694394647901e-06, "loss": 0.4836, "step": 3664 }, { "epoch": 0.2503928400628544, "grad_norm": 4.272336006164551, "learning_rate": 6.1464371574112795e-06, "loss": 0.4057, "step": 3665 }, { "epoch": 0.2504611600737856, "grad_norm": 3.866185426712036, "learning_rate": 6.1459302387545426e-06, "loss": 0.3005, "step": 3666 }, { "epoch": 0.2505294800847168, "grad_norm": 3.4446446895599365, "learning_rate": 6.14542319053362e-06, "loss": 0.3994, "step": 3667 }, { "epoch": 0.250597800095648, "grad_norm": 4.557162284851074, "learning_rate": 6.144916012773339e-06, "loss": 0.378, "step": 3668 }, { "epoch": 0.25066612010657924, "grad_norm": 3.6901535987854004, "learning_rate": 6.144408705498538e-06, "loss": 0.3562, "step": 3669 }, { "epoch": 0.2507344401175104, "grad_norm": 4.465907096862793, "learning_rate": 6.143901268734058e-06, "loss": 0.3347, "step": 3670 }, { "epoch": 0.2508027601284416, "grad_norm": 3.3584375381469727, "learning_rate": 6.143393702504744e-06, "loss": 0.3468, "step": 3671 }, { "epoch": 0.2508710801393728, "grad_norm": 5.048662185668945, "learning_rate": 6.1428860068354535e-06, "loss": 0.4075, "step": 3672 }, { "epoch": 0.250939400150304, "grad_norm": 3.997586488723755, "learning_rate": 6.142378181751046e-06, "loss": 0.3923, "step": 3673 }, { "epoch": 0.25100772016123524, "grad_norm": 3.8774373531341553, "learning_rate": 6.141870227276387e-06, "loss": 0.4378, "step": 3674 }, { "epoch": 0.25107604017216645, "grad_norm": 4.549546718597412, "learning_rate": 6.1413621434363505e-06, "loss": 0.4706, "step": 3675 }, { "epoch": 0.2511443601830976, "grad_norm": 5.429966926574707, "learning_rate": 6.140853930255817e-06, "loss": 0.4195, "step": 3676 }, { "epoch": 0.2512126801940288, "grad_norm": 4.196766376495361, "learning_rate": 6.1403455877596714e-06, "loss": 0.404, "step": 3677 }, { "epoch": 0.25128100020496, "grad_norm": 3.745760440826416, "learning_rate": 6.139837115972806e-06, "loss": 0.3523, "step": 3678 }, { "epoch": 0.25134932021589124, "grad_norm": 4.194832801818848, "learning_rate": 6.139328514920119e-06, "loss": 0.4626, "step": 3679 }, { "epoch": 0.25141764022682245, "grad_norm": 3.280531167984009, "learning_rate": 6.138819784626516e-06, "loss": 0.3576, "step": 3680 }, { "epoch": 0.25148596023775366, "grad_norm": 3.53482985496521, "learning_rate": 6.138310925116907e-06, "loss": 0.3422, "step": 3681 }, { "epoch": 0.2515542802486848, "grad_norm": 5.059259414672852, "learning_rate": 6.137801936416212e-06, "loss": 0.4692, "step": 3682 }, { "epoch": 0.251622600259616, "grad_norm": 4.471395015716553, "learning_rate": 6.137292818549352e-06, "loss": 0.4777, "step": 3683 }, { "epoch": 0.25169092027054724, "grad_norm": 4.243409633636475, "learning_rate": 6.136783571541258e-06, "loss": 0.4292, "step": 3684 }, { "epoch": 0.25175924028147845, "grad_norm": 3.9896867275238037, "learning_rate": 6.136274195416866e-06, "loss": 0.4364, "step": 3685 }, { "epoch": 0.25182756029240966, "grad_norm": 5.100282192230225, "learning_rate": 6.135764690201121e-06, "loss": 0.4839, "step": 3686 }, { "epoch": 0.25189588030334087, "grad_norm": 4.661340236663818, "learning_rate": 6.135255055918971e-06, "loss": 0.4128, "step": 3687 }, { "epoch": 0.251964200314272, "grad_norm": 4.9447407722473145, "learning_rate": 6.13474529259537e-06, "loss": 0.2759, "step": 3688 }, { "epoch": 0.25203252032520324, "grad_norm": 4.442732810974121, "learning_rate": 6.134235400255281e-06, "loss": 0.4372, "step": 3689 }, { "epoch": 0.25210084033613445, "grad_norm": 3.5880050659179688, "learning_rate": 6.133725378923671e-06, "loss": 0.3271, "step": 3690 }, { "epoch": 0.25216916034706566, "grad_norm": 3.5355749130249023, "learning_rate": 6.133215228625517e-06, "loss": 0.3706, "step": 3691 }, { "epoch": 0.25223748035799687, "grad_norm": 5.1844282150268555, "learning_rate": 6.132704949385797e-06, "loss": 0.4421, "step": 3692 }, { "epoch": 0.2523058003689281, "grad_norm": 4.242308139801025, "learning_rate": 6.1321945412295e-06, "loss": 0.4065, "step": 3693 }, { "epoch": 0.25237412037985923, "grad_norm": 6.038686275482178, "learning_rate": 6.1316840041816175e-06, "loss": 0.5391, "step": 3694 }, { "epoch": 0.25244244039079045, "grad_norm": 2.911287546157837, "learning_rate": 6.13117333826715e-06, "loss": 0.3143, "step": 3695 }, { "epoch": 0.25251076040172166, "grad_norm": 4.971306800842285, "learning_rate": 6.130662543511104e-06, "loss": 0.4241, "step": 3696 }, { "epoch": 0.25257908041265287, "grad_norm": 3.2065482139587402, "learning_rate": 6.130151619938491e-06, "loss": 0.2532, "step": 3697 }, { "epoch": 0.2526474004235841, "grad_norm": 3.0261101722717285, "learning_rate": 6.129640567574329e-06, "loss": 0.3886, "step": 3698 }, { "epoch": 0.2527157204345153, "grad_norm": 3.8016715049743652, "learning_rate": 6.1291293864436445e-06, "loss": 0.4034, "step": 3699 }, { "epoch": 0.25278404044544645, "grad_norm": 4.052627086639404, "learning_rate": 6.128618076571468e-06, "loss": 0.4737, "step": 3700 }, { "epoch": 0.25285236045637766, "grad_norm": 3.7888989448547363, "learning_rate": 6.128106637982837e-06, "loss": 0.3212, "step": 3701 }, { "epoch": 0.25292068046730887, "grad_norm": 3.8706257343292236, "learning_rate": 6.127595070702795e-06, "loss": 0.4281, "step": 3702 }, { "epoch": 0.2529890004782401, "grad_norm": 3.674571990966797, "learning_rate": 6.127083374756391e-06, "loss": 0.5018, "step": 3703 }, { "epoch": 0.2530573204891713, "grad_norm": 4.5387864112854, "learning_rate": 6.126571550168684e-06, "loss": 0.4396, "step": 3704 }, { "epoch": 0.2531256405001025, "grad_norm": 4.3374199867248535, "learning_rate": 6.126059596964735e-06, "loss": 0.3574, "step": 3705 }, { "epoch": 0.25319396051103366, "grad_norm": 3.5761306285858154, "learning_rate": 6.125547515169613e-06, "loss": 0.4208, "step": 3706 }, { "epoch": 0.25326228052196487, "grad_norm": 4.863802909851074, "learning_rate": 6.125035304808395e-06, "loss": 0.3493, "step": 3707 }, { "epoch": 0.2533306005328961, "grad_norm": 3.2514278888702393, "learning_rate": 6.124522965906159e-06, "loss": 0.4049, "step": 3708 }, { "epoch": 0.2533989205438273, "grad_norm": 4.557576656341553, "learning_rate": 6.124010498487996e-06, "loss": 0.4173, "step": 3709 }, { "epoch": 0.2534672405547585, "grad_norm": 3.6403980255126953, "learning_rate": 6.123497902579e-06, "loss": 0.4126, "step": 3710 }, { "epoch": 0.2535355605656897, "grad_norm": 5.887677192687988, "learning_rate": 6.12298517820427e-06, "loss": 0.5051, "step": 3711 }, { "epoch": 0.25360388057662087, "grad_norm": 4.260605335235596, "learning_rate": 6.122472325388914e-06, "loss": 0.3978, "step": 3712 }, { "epoch": 0.2536722005875521, "grad_norm": 3.065586805343628, "learning_rate": 6.1219593441580435e-06, "loss": 0.3403, "step": 3713 }, { "epoch": 0.2537405205984833, "grad_norm": 5.046430587768555, "learning_rate": 6.1214462345367785e-06, "loss": 0.4939, "step": 3714 }, { "epoch": 0.2538088406094145, "grad_norm": 3.126404047012329, "learning_rate": 6.120932996550246e-06, "loss": 0.3116, "step": 3715 }, { "epoch": 0.2538771606203457, "grad_norm": 3.771898031234741, "learning_rate": 6.120419630223577e-06, "loss": 0.3626, "step": 3716 }, { "epoch": 0.2539454806312769, "grad_norm": 3.827378988265991, "learning_rate": 6.119906135581909e-06, "loss": 0.3804, "step": 3717 }, { "epoch": 0.2540138006422081, "grad_norm": 4.214380741119385, "learning_rate": 6.119392512650387e-06, "loss": 0.4574, "step": 3718 }, { "epoch": 0.2540821206531393, "grad_norm": 3.676006555557251, "learning_rate": 6.118878761454162e-06, "loss": 0.4092, "step": 3719 }, { "epoch": 0.2541504406640705, "grad_norm": 3.4461588859558105, "learning_rate": 6.118364882018391e-06, "loss": 0.3254, "step": 3720 }, { "epoch": 0.2542187606750017, "grad_norm": 4.005209922790527, "learning_rate": 6.1178508743682355e-06, "loss": 0.4468, "step": 3721 }, { "epoch": 0.2542870806859329, "grad_norm": 3.6751418113708496, "learning_rate": 6.117336738528869e-06, "loss": 0.312, "step": 3722 }, { "epoch": 0.25435540069686413, "grad_norm": 4.110629558563232, "learning_rate": 6.116822474525464e-06, "loss": 0.4152, "step": 3723 }, { "epoch": 0.2544237207077953, "grad_norm": 3.752007007598877, "learning_rate": 6.1163080823832025e-06, "loss": 0.3592, "step": 3724 }, { "epoch": 0.2544920407187265, "grad_norm": 3.9020042419433594, "learning_rate": 6.115793562127276e-06, "loss": 0.4686, "step": 3725 }, { "epoch": 0.2545603607296577, "grad_norm": 5.166430950164795, "learning_rate": 6.115278913782877e-06, "loss": 0.3829, "step": 3726 }, { "epoch": 0.2546286807405889, "grad_norm": 4.872012615203857, "learning_rate": 6.114764137375206e-06, "loss": 0.5243, "step": 3727 }, { "epoch": 0.25469700075152013, "grad_norm": 4.07771110534668, "learning_rate": 6.114249232929471e-06, "loss": 0.4511, "step": 3728 }, { "epoch": 0.25476532076245134, "grad_norm": 3.3817014694213867, "learning_rate": 6.113734200470886e-06, "loss": 0.367, "step": 3729 }, { "epoch": 0.2548336407733825, "grad_norm": 3.4299404621124268, "learning_rate": 6.11321904002467e-06, "loss": 0.4005, "step": 3730 }, { "epoch": 0.2549019607843137, "grad_norm": 4.323414325714111, "learning_rate": 6.112703751616049e-06, "loss": 0.3247, "step": 3731 }, { "epoch": 0.2549702807952449, "grad_norm": 5.174563884735107, "learning_rate": 6.112188335270256e-06, "loss": 0.5073, "step": 3732 }, { "epoch": 0.25503860080617613, "grad_norm": 3.686347484588623, "learning_rate": 6.11167279101253e-06, "loss": 0.4239, "step": 3733 }, { "epoch": 0.25510692081710734, "grad_norm": 3.478999376296997, "learning_rate": 6.111157118868114e-06, "loss": 0.461, "step": 3734 }, { "epoch": 0.25517524082803855, "grad_norm": 3.8496148586273193, "learning_rate": 6.1106413188622615e-06, "loss": 0.449, "step": 3735 }, { "epoch": 0.2552435608389697, "grad_norm": 4.547183036804199, "learning_rate": 6.110125391020227e-06, "loss": 0.2919, "step": 3736 }, { "epoch": 0.2553118808499009, "grad_norm": 3.6866824626922607, "learning_rate": 6.109609335367275e-06, "loss": 0.3479, "step": 3737 }, { "epoch": 0.25538020086083213, "grad_norm": 4.263284683227539, "learning_rate": 6.109093151928678e-06, "loss": 0.4407, "step": 3738 }, { "epoch": 0.25544852087176334, "grad_norm": 4.357040882110596, "learning_rate": 6.108576840729709e-06, "loss": 0.3782, "step": 3739 }, { "epoch": 0.25551684088269455, "grad_norm": 4.1164231300354, "learning_rate": 6.108060401795651e-06, "loss": 0.4285, "step": 3740 }, { "epoch": 0.25558516089362576, "grad_norm": 4.721220016479492, "learning_rate": 6.1075438351517945e-06, "loss": 0.4705, "step": 3741 }, { "epoch": 0.2556534809045569, "grad_norm": 3.3608367443084717, "learning_rate": 6.107027140823432e-06, "loss": 0.3797, "step": 3742 }, { "epoch": 0.25572180091548813, "grad_norm": 4.062227249145508, "learning_rate": 6.106510318835866e-06, "loss": 0.3947, "step": 3743 }, { "epoch": 0.25579012092641934, "grad_norm": 4.875380516052246, "learning_rate": 6.105993369214403e-06, "loss": 0.4041, "step": 3744 }, { "epoch": 0.25585844093735055, "grad_norm": 4.490452289581299, "learning_rate": 6.105476291984359e-06, "loss": 0.3971, "step": 3745 }, { "epoch": 0.25592676094828176, "grad_norm": 4.413267612457275, "learning_rate": 6.10495908717105e-06, "loss": 0.3751, "step": 3746 }, { "epoch": 0.255995080959213, "grad_norm": 3.5800282955169678, "learning_rate": 6.104441754799807e-06, "loss": 0.39, "step": 3747 }, { "epoch": 0.25606340097014413, "grad_norm": 4.711578369140625, "learning_rate": 6.103924294895958e-06, "loss": 0.4269, "step": 3748 }, { "epoch": 0.25613172098107534, "grad_norm": 4.872072219848633, "learning_rate": 6.103406707484843e-06, "loss": 0.4032, "step": 3749 }, { "epoch": 0.25620004099200655, "grad_norm": 4.39914083480835, "learning_rate": 6.102888992591809e-06, "loss": 0.4474, "step": 3750 }, { "epoch": 0.25626836100293776, "grad_norm": 3.849653482437134, "learning_rate": 6.102371150242203e-06, "loss": 0.3727, "step": 3751 }, { "epoch": 0.25633668101386897, "grad_norm": 5.1915082931518555, "learning_rate": 6.101853180461386e-06, "loss": 0.4084, "step": 3752 }, { "epoch": 0.2564050010248002, "grad_norm": 3.9981918334960938, "learning_rate": 6.10133508327472e-06, "loss": 0.3519, "step": 3753 }, { "epoch": 0.25647332103573134, "grad_norm": 3.6595284938812256, "learning_rate": 6.100816858707575e-06, "loss": 0.4461, "step": 3754 }, { "epoch": 0.25654164104666255, "grad_norm": 5.332067966461182, "learning_rate": 6.100298506785328e-06, "loss": 0.562, "step": 3755 }, { "epoch": 0.25660996105759376, "grad_norm": 4.815200328826904, "learning_rate": 6.09978002753336e-06, "loss": 0.4217, "step": 3756 }, { "epoch": 0.25667828106852497, "grad_norm": 4.5171003341674805, "learning_rate": 6.0992614209770606e-06, "loss": 0.5397, "step": 3757 }, { "epoch": 0.2567466010794562, "grad_norm": 3.373591899871826, "learning_rate": 6.098742687141823e-06, "loss": 0.4344, "step": 3758 }, { "epoch": 0.2568149210903874, "grad_norm": 3.6821610927581787, "learning_rate": 6.09822382605305e-06, "loss": 0.3748, "step": 3759 }, { "epoch": 0.25688324110131855, "grad_norm": 4.273515224456787, "learning_rate": 6.0977048377361484e-06, "loss": 0.4076, "step": 3760 }, { "epoch": 0.25695156111224976, "grad_norm": 4.288153648376465, "learning_rate": 6.097185722216532e-06, "loss": 0.385, "step": 3761 }, { "epoch": 0.25701988112318097, "grad_norm": 4.337771892547607, "learning_rate": 6.096666479519618e-06, "loss": 0.4397, "step": 3762 }, { "epoch": 0.2570882011341122, "grad_norm": 3.37742280960083, "learning_rate": 6.096147109670836e-06, "loss": 0.405, "step": 3763 }, { "epoch": 0.2571565211450434, "grad_norm": 2.8558735847473145, "learning_rate": 6.095627612695616e-06, "loss": 0.3552, "step": 3764 }, { "epoch": 0.2572248411559746, "grad_norm": 4.480291366577148, "learning_rate": 6.095107988619397e-06, "loss": 0.4711, "step": 3765 }, { "epoch": 0.25729316116690576, "grad_norm": 3.9123289585113525, "learning_rate": 6.094588237467623e-06, "loss": 0.448, "step": 3766 }, { "epoch": 0.25736148117783697, "grad_norm": 4.981053829193115, "learning_rate": 6.0940683592657455e-06, "loss": 0.4239, "step": 3767 }, { "epoch": 0.2574298011887682, "grad_norm": 4.951015472412109, "learning_rate": 6.093548354039222e-06, "loss": 0.43, "step": 3768 }, { "epoch": 0.2574981211996994, "grad_norm": 4.471009731292725, "learning_rate": 6.093028221813515e-06, "loss": 0.2636, "step": 3769 }, { "epoch": 0.2575664412106306, "grad_norm": 5.003203868865967, "learning_rate": 6.092507962614093e-06, "loss": 0.4266, "step": 3770 }, { "epoch": 0.2576347612215618, "grad_norm": 5.67179012298584, "learning_rate": 6.091987576466434e-06, "loss": 0.4896, "step": 3771 }, { "epoch": 0.25770308123249297, "grad_norm": 3.7354753017425537, "learning_rate": 6.091467063396019e-06, "loss": 0.3365, "step": 3772 }, { "epoch": 0.2577714012434242, "grad_norm": 3.6373789310455322, "learning_rate": 6.090946423428334e-06, "loss": 0.403, "step": 3773 }, { "epoch": 0.2578397212543554, "grad_norm": 2.487766981124878, "learning_rate": 6.090425656588877e-06, "loss": 0.2961, "step": 3774 }, { "epoch": 0.2579080412652866, "grad_norm": 4.996341705322266, "learning_rate": 6.089904762903147e-06, "loss": 0.3585, "step": 3775 }, { "epoch": 0.2579763612762178, "grad_norm": 4.527476787567139, "learning_rate": 6.0893837423966505e-06, "loss": 0.445, "step": 3776 }, { "epoch": 0.258044681287149, "grad_norm": 3.450111150741577, "learning_rate": 6.088862595094899e-06, "loss": 0.4045, "step": 3777 }, { "epoch": 0.2581130012980802, "grad_norm": 5.712319374084473, "learning_rate": 6.0883413210234155e-06, "loss": 0.4905, "step": 3778 }, { "epoch": 0.2581813213090114, "grad_norm": 4.117053031921387, "learning_rate": 6.087819920207723e-06, "loss": 0.4541, "step": 3779 }, { "epoch": 0.2582496413199426, "grad_norm": 3.315615177154541, "learning_rate": 6.087298392673352e-06, "loss": 0.309, "step": 3780 }, { "epoch": 0.2583179613308738, "grad_norm": 5.447295665740967, "learning_rate": 6.0867767384458426e-06, "loss": 0.4931, "step": 3781 }, { "epoch": 0.258386281341805, "grad_norm": 4.2230305671691895, "learning_rate": 6.086254957550738e-06, "loss": 0.3747, "step": 3782 }, { "epoch": 0.25845460135273624, "grad_norm": 4.559149742126465, "learning_rate": 6.0857330500135875e-06, "loss": 0.3982, "step": 3783 }, { "epoch": 0.2585229213636674, "grad_norm": 4.243009090423584, "learning_rate": 6.085211015859949e-06, "loss": 0.3929, "step": 3784 }, { "epoch": 0.2585912413745986, "grad_norm": 3.740483045578003, "learning_rate": 6.084688855115383e-06, "loss": 0.3596, "step": 3785 }, { "epoch": 0.2586595613855298, "grad_norm": 4.318760395050049, "learning_rate": 6.084166567805462e-06, "loss": 0.4468, "step": 3786 }, { "epoch": 0.258727881396461, "grad_norm": 3.5768158435821533, "learning_rate": 6.083644153955758e-06, "loss": 0.3271, "step": 3787 }, { "epoch": 0.25879620140739223, "grad_norm": 3.756139039993286, "learning_rate": 6.083121613591852e-06, "loss": 0.3745, "step": 3788 }, { "epoch": 0.25886452141832345, "grad_norm": 4.663967132568359, "learning_rate": 6.082598946739335e-06, "loss": 0.4234, "step": 3789 }, { "epoch": 0.2589328414292546, "grad_norm": 4.977180480957031, "learning_rate": 6.082076153423795e-06, "loss": 0.3881, "step": 3790 }, { "epoch": 0.2590011614401858, "grad_norm": 4.17164945602417, "learning_rate": 6.081553233670837e-06, "loss": 0.2594, "step": 3791 }, { "epoch": 0.259069481451117, "grad_norm": 3.893585443496704, "learning_rate": 6.0810301875060634e-06, "loss": 0.3474, "step": 3792 }, { "epoch": 0.25913780146204823, "grad_norm": 3.526655912399292, "learning_rate": 6.080507014955089e-06, "loss": 0.3459, "step": 3793 }, { "epoch": 0.25920612147297944, "grad_norm": 3.688194751739502, "learning_rate": 6.079983716043532e-06, "loss": 0.3293, "step": 3794 }, { "epoch": 0.25927444148391066, "grad_norm": 4.797875881195068, "learning_rate": 6.079460290797014e-06, "loss": 0.5792, "step": 3795 }, { "epoch": 0.25934276149484187, "grad_norm": 3.796499013900757, "learning_rate": 6.078936739241168e-06, "loss": 0.3533, "step": 3796 }, { "epoch": 0.259411081505773, "grad_norm": 2.937217950820923, "learning_rate": 6.078413061401632e-06, "loss": 0.3203, "step": 3797 }, { "epoch": 0.25947940151670423, "grad_norm": 4.710093021392822, "learning_rate": 6.077889257304046e-06, "loss": 0.4281, "step": 3798 }, { "epoch": 0.25954772152763544, "grad_norm": 4.412635326385498, "learning_rate": 6.077365326974062e-06, "loss": 0.4371, "step": 3799 }, { "epoch": 0.25961604153856666, "grad_norm": 4.176217555999756, "learning_rate": 6.076841270437334e-06, "loss": 0.3359, "step": 3800 }, { "epoch": 0.25968436154949787, "grad_norm": 4.152630805969238, "learning_rate": 6.076317087719526e-06, "loss": 0.3966, "step": 3801 }, { "epoch": 0.2597526815604291, "grad_norm": 5.122087001800537, "learning_rate": 6.075792778846302e-06, "loss": 0.3504, "step": 3802 }, { "epoch": 0.25982100157136023, "grad_norm": 3.738537311553955, "learning_rate": 6.075268343843339e-06, "loss": 0.4027, "step": 3803 }, { "epoch": 0.25988932158229144, "grad_norm": 4.04227876663208, "learning_rate": 6.074743782736316e-06, "loss": 0.3992, "step": 3804 }, { "epoch": 0.25995764159322265, "grad_norm": 4.192661285400391, "learning_rate": 6.074219095550919e-06, "loss": 0.4116, "step": 3805 }, { "epoch": 0.26002596160415387, "grad_norm": 4.391695499420166, "learning_rate": 6.0736942823128425e-06, "loss": 0.5389, "step": 3806 }, { "epoch": 0.2600942816150851, "grad_norm": 2.808803081512451, "learning_rate": 6.073169343047783e-06, "loss": 0.3004, "step": 3807 }, { "epoch": 0.2601626016260163, "grad_norm": 4.721212863922119, "learning_rate": 6.072644277781446e-06, "loss": 0.4663, "step": 3808 }, { "epoch": 0.26023092163694744, "grad_norm": 5.709514617919922, "learning_rate": 6.072119086539544e-06, "loss": 0.3669, "step": 3809 }, { "epoch": 0.26029924164787865, "grad_norm": 4.167557716369629, "learning_rate": 6.071593769347792e-06, "loss": 0.4575, "step": 3810 }, { "epoch": 0.26036756165880987, "grad_norm": 4.435286045074463, "learning_rate": 6.071068326231915e-06, "loss": 0.4241, "step": 3811 }, { "epoch": 0.2604358816697411, "grad_norm": 5.935349941253662, "learning_rate": 6.0705427572176414e-06, "loss": 0.3782, "step": 3812 }, { "epoch": 0.2605042016806723, "grad_norm": 3.842529773712158, "learning_rate": 6.070017062330708e-06, "loss": 0.3494, "step": 3813 }, { "epoch": 0.2605725216916035, "grad_norm": 3.2543914318084717, "learning_rate": 6.069491241596856e-06, "loss": 0.3058, "step": 3814 }, { "epoch": 0.26064084170253465, "grad_norm": 3.988694429397583, "learning_rate": 6.068965295041833e-06, "loss": 0.4369, "step": 3815 }, { "epoch": 0.26070916171346586, "grad_norm": 4.364629745483398, "learning_rate": 6.0684392226913944e-06, "loss": 0.4549, "step": 3816 }, { "epoch": 0.2607774817243971, "grad_norm": 4.1725616455078125, "learning_rate": 6.0679130245713e-06, "loss": 0.4278, "step": 3817 }, { "epoch": 0.2608458017353283, "grad_norm": 4.0214104652404785, "learning_rate": 6.067386700707316e-06, "loss": 0.4116, "step": 3818 }, { "epoch": 0.2609141217462595, "grad_norm": 3.715014696121216, "learning_rate": 6.066860251125216e-06, "loss": 0.3501, "step": 3819 }, { "epoch": 0.2609824417571907, "grad_norm": 3.3824832439422607, "learning_rate": 6.066333675850778e-06, "loss": 0.4306, "step": 3820 }, { "epoch": 0.26105076176812186, "grad_norm": 5.347842693328857, "learning_rate": 6.065806974909787e-06, "loss": 0.393, "step": 3821 }, { "epoch": 0.2611190817790531, "grad_norm": 4.237652778625488, "learning_rate": 6.0652801483280344e-06, "loss": 0.397, "step": 3822 }, { "epoch": 0.2611874017899843, "grad_norm": 2.80214786529541, "learning_rate": 6.064753196131319e-06, "loss": 0.3271, "step": 3823 }, { "epoch": 0.2612557218009155, "grad_norm": 4.436269760131836, "learning_rate": 6.064226118345441e-06, "loss": 0.3894, "step": 3824 }, { "epoch": 0.2613240418118467, "grad_norm": 3.902672529220581, "learning_rate": 6.063698914996212e-06, "loss": 0.38, "step": 3825 }, { "epoch": 0.2613923618227779, "grad_norm": 4.170753479003906, "learning_rate": 6.063171586109448e-06, "loss": 0.4898, "step": 3826 }, { "epoch": 0.2614606818337091, "grad_norm": 4.454129695892334, "learning_rate": 6.06264413171097e-06, "loss": 0.4091, "step": 3827 }, { "epoch": 0.2615290018446403, "grad_norm": 4.171334266662598, "learning_rate": 6.062116551826606e-06, "loss": 0.4297, "step": 3828 }, { "epoch": 0.2615973218555715, "grad_norm": 3.8910741806030273, "learning_rate": 6.061588846482191e-06, "loss": 0.3594, "step": 3829 }, { "epoch": 0.2616656418665027, "grad_norm": 3.1550257205963135, "learning_rate": 6.061061015703565e-06, "loss": 0.371, "step": 3830 }, { "epoch": 0.2617339618774339, "grad_norm": 3.1498219966888428, "learning_rate": 6.060533059516575e-06, "loss": 0.3474, "step": 3831 }, { "epoch": 0.26180228188836513, "grad_norm": 4.154820919036865, "learning_rate": 6.060004977947073e-06, "loss": 0.4261, "step": 3832 }, { "epoch": 0.2618706018992963, "grad_norm": 4.7036638259887695, "learning_rate": 6.0594767710209165e-06, "loss": 0.351, "step": 3833 }, { "epoch": 0.2619389219102275, "grad_norm": 4.17974328994751, "learning_rate": 6.058948438763973e-06, "loss": 0.3255, "step": 3834 }, { "epoch": 0.2620072419211587, "grad_norm": 3.832940101623535, "learning_rate": 6.058419981202112e-06, "loss": 0.3359, "step": 3835 }, { "epoch": 0.2620755619320899, "grad_norm": 3.111022710800171, "learning_rate": 6.057891398361211e-06, "loss": 0.3505, "step": 3836 }, { "epoch": 0.26214388194302113, "grad_norm": 3.1218197345733643, "learning_rate": 6.057362690267152e-06, "loss": 0.4087, "step": 3837 }, { "epoch": 0.26221220195395234, "grad_norm": 4.812538146972656, "learning_rate": 6.056833856945827e-06, "loss": 0.6081, "step": 3838 }, { "epoch": 0.2622805219648835, "grad_norm": 3.9999396800994873, "learning_rate": 6.056304898423129e-06, "loss": 0.3637, "step": 3839 }, { "epoch": 0.2623488419758147, "grad_norm": 3.93843412399292, "learning_rate": 6.055775814724962e-06, "loss": 0.3578, "step": 3840 }, { "epoch": 0.2624171619867459, "grad_norm": 4.418386936187744, "learning_rate": 6.055246605877231e-06, "loss": 0.4096, "step": 3841 }, { "epoch": 0.26248548199767713, "grad_norm": 3.2774808406829834, "learning_rate": 6.054717271905853e-06, "loss": 0.377, "step": 3842 }, { "epoch": 0.26255380200860834, "grad_norm": 3.7646002769470215, "learning_rate": 6.054187812836747e-06, "loss": 0.4898, "step": 3843 }, { "epoch": 0.26262212201953955, "grad_norm": 4.370978355407715, "learning_rate": 6.053658228695839e-06, "loss": 0.4034, "step": 3844 }, { "epoch": 0.2626904420304707, "grad_norm": 3.768303632736206, "learning_rate": 6.05312851950906e-06, "loss": 0.2671, "step": 3845 }, { "epoch": 0.2627587620414019, "grad_norm": 3.3252580165863037, "learning_rate": 6.0525986853023514e-06, "loss": 0.3939, "step": 3846 }, { "epoch": 0.2628270820523331, "grad_norm": 3.7001090049743652, "learning_rate": 6.0520687261016544e-06, "loss": 0.3942, "step": 3847 }, { "epoch": 0.26289540206326434, "grad_norm": 4.791535377502441, "learning_rate": 6.051538641932922e-06, "loss": 0.5044, "step": 3848 }, { "epoch": 0.26296372207419555, "grad_norm": 3.4072959423065186, "learning_rate": 6.051008432822111e-06, "loss": 0.3546, "step": 3849 }, { "epoch": 0.26303204208512676, "grad_norm": 5.118863582611084, "learning_rate": 6.050478098795183e-06, "loss": 0.3874, "step": 3850 }, { "epoch": 0.2631003620960579, "grad_norm": 5.392466068267822, "learning_rate": 6.049947639878108e-06, "loss": 0.4209, "step": 3851 }, { "epoch": 0.2631686821069891, "grad_norm": 3.8551409244537354, "learning_rate": 6.049417056096862e-06, "loss": 0.3056, "step": 3852 }, { "epoch": 0.26323700211792034, "grad_norm": 3.296102523803711, "learning_rate": 6.0488863474774245e-06, "loss": 0.2992, "step": 3853 }, { "epoch": 0.26330532212885155, "grad_norm": 5.648981094360352, "learning_rate": 6.048355514045783e-06, "loss": 0.3461, "step": 3854 }, { "epoch": 0.26337364213978276, "grad_norm": 4.47861385345459, "learning_rate": 6.047824555827932e-06, "loss": 0.3578, "step": 3855 }, { "epoch": 0.26344196215071397, "grad_norm": 4.203469753265381, "learning_rate": 6.047293472849871e-06, "loss": 0.4797, "step": 3856 }, { "epoch": 0.2635102821616451, "grad_norm": 3.9146037101745605, "learning_rate": 6.046762265137605e-06, "loss": 0.4076, "step": 3857 }, { "epoch": 0.26357860217257634, "grad_norm": 3.3353939056396484, "learning_rate": 6.046230932717147e-06, "loss": 0.3402, "step": 3858 }, { "epoch": 0.26364692218350755, "grad_norm": 3.9672951698303223, "learning_rate": 6.045699475614513e-06, "loss": 0.3729, "step": 3859 }, { "epoch": 0.26371524219443876, "grad_norm": 5.8354902267456055, "learning_rate": 6.04516789385573e-06, "loss": 0.3998, "step": 3860 }, { "epoch": 0.26378356220536997, "grad_norm": 4.424616813659668, "learning_rate": 6.044636187466826e-06, "loss": 0.5143, "step": 3861 }, { "epoch": 0.2638518822163012, "grad_norm": 4.309009552001953, "learning_rate": 6.044104356473837e-06, "loss": 0.3566, "step": 3862 }, { "epoch": 0.26392020222723234, "grad_norm": 4.109398365020752, "learning_rate": 6.0435724009028065e-06, "loss": 0.3235, "step": 3863 }, { "epoch": 0.26398852223816355, "grad_norm": 3.594698667526245, "learning_rate": 6.043040320779782e-06, "loss": 0.3821, "step": 3864 }, { "epoch": 0.26405684224909476, "grad_norm": 3.948800563812256, "learning_rate": 6.042508116130819e-06, "loss": 0.3763, "step": 3865 }, { "epoch": 0.26412516226002597, "grad_norm": 3.256270408630371, "learning_rate": 6.041975786981977e-06, "loss": 0.296, "step": 3866 }, { "epoch": 0.2641934822709572, "grad_norm": 4.174013137817383, "learning_rate": 6.041443333359324e-06, "loss": 0.4352, "step": 3867 }, { "epoch": 0.2642618022818884, "grad_norm": 3.4871909618377686, "learning_rate": 6.0409107552889325e-06, "loss": 0.3658, "step": 3868 }, { "epoch": 0.26433012229281955, "grad_norm": 3.9280197620391846, "learning_rate": 6.040378052796882e-06, "loss": 0.3483, "step": 3869 }, { "epoch": 0.26439844230375076, "grad_norm": 2.9134323596954346, "learning_rate": 6.039845225909255e-06, "loss": 0.3158, "step": 3870 }, { "epoch": 0.26446676231468197, "grad_norm": 5.112494945526123, "learning_rate": 6.039312274652145e-06, "loss": 0.4474, "step": 3871 }, { "epoch": 0.2645350823256132, "grad_norm": 4.471314430236816, "learning_rate": 6.03877919905165e-06, "loss": 0.4462, "step": 3872 }, { "epoch": 0.2646034023365444, "grad_norm": 3.8414900302886963, "learning_rate": 6.038245999133871e-06, "loss": 0.3462, "step": 3873 }, { "epoch": 0.2646717223474756, "grad_norm": 5.299713134765625, "learning_rate": 6.0377126749249175e-06, "loss": 0.4484, "step": 3874 }, { "epoch": 0.26474004235840676, "grad_norm": 3.0749411582946777, "learning_rate": 6.037179226450906e-06, "loss": 0.439, "step": 3875 }, { "epoch": 0.26480836236933797, "grad_norm": 5.4087419509887695, "learning_rate": 6.036645653737957e-06, "loss": 0.4134, "step": 3876 }, { "epoch": 0.2648766823802692, "grad_norm": 4.199869155883789, "learning_rate": 6.036111956812201e-06, "loss": 0.461, "step": 3877 }, { "epoch": 0.2649450023912004, "grad_norm": 4.310484886169434, "learning_rate": 6.035578135699768e-06, "loss": 0.4165, "step": 3878 }, { "epoch": 0.2650133224021316, "grad_norm": 4.199382781982422, "learning_rate": 6.0350441904268e-06, "loss": 0.3746, "step": 3879 }, { "epoch": 0.2650816424130628, "grad_norm": 4.3885884284973145, "learning_rate": 6.034510121019443e-06, "loss": 0.4233, "step": 3880 }, { "epoch": 0.26514996242399397, "grad_norm": 3.1152234077453613, "learning_rate": 6.033975927503847e-06, "loss": 0.3731, "step": 3881 }, { "epoch": 0.2652182824349252, "grad_norm": 3.8346850872039795, "learning_rate": 6.033441609906173e-06, "loss": 0.3719, "step": 3882 }, { "epoch": 0.2652866024458564, "grad_norm": 3.0872433185577393, "learning_rate": 6.032907168252583e-06, "loss": 0.371, "step": 3883 }, { "epoch": 0.2653549224567876, "grad_norm": 4.333824634552002, "learning_rate": 6.032372602569246e-06, "loss": 0.4177, "step": 3884 }, { "epoch": 0.2654232424677188, "grad_norm": 4.419057846069336, "learning_rate": 6.031837912882342e-06, "loss": 0.348, "step": 3885 }, { "epoch": 0.26549156247865, "grad_norm": 3.5992019176483154, "learning_rate": 6.03130309921805e-06, "loss": 0.3066, "step": 3886 }, { "epoch": 0.2655598824895812, "grad_norm": 3.485541343688965, "learning_rate": 6.03076816160256e-06, "loss": 0.3422, "step": 3887 }, { "epoch": 0.2656282025005124, "grad_norm": 3.7792117595672607, "learning_rate": 6.030233100062066e-06, "loss": 0.3827, "step": 3888 }, { "epoch": 0.2656965225114436, "grad_norm": 4.0607404708862305, "learning_rate": 6.029697914622769e-06, "loss": 0.3447, "step": 3889 }, { "epoch": 0.2657648425223748, "grad_norm": 4.49020528793335, "learning_rate": 6.029162605310875e-06, "loss": 0.4068, "step": 3890 }, { "epoch": 0.265833162533306, "grad_norm": 3.260456085205078, "learning_rate": 6.0286271721525964e-06, "loss": 0.3193, "step": 3891 }, { "epoch": 0.26590148254423723, "grad_norm": 4.187283992767334, "learning_rate": 6.028091615174154e-06, "loss": 0.4564, "step": 3892 }, { "epoch": 0.2659698025551684, "grad_norm": 6.241669654846191, "learning_rate": 6.02755593440177e-06, "loss": 0.4284, "step": 3893 }, { "epoch": 0.2660381225660996, "grad_norm": 3.622020959854126, "learning_rate": 6.027020129861677e-06, "loss": 0.3951, "step": 3894 }, { "epoch": 0.2661064425770308, "grad_norm": 4.385955333709717, "learning_rate": 6.026484201580111e-06, "loss": 0.3123, "step": 3895 }, { "epoch": 0.266174762587962, "grad_norm": 3.2269070148468018, "learning_rate": 6.0259481495833154e-06, "loss": 0.3405, "step": 3896 }, { "epoch": 0.26624308259889323, "grad_norm": 5.15159273147583, "learning_rate": 6.025411973897539e-06, "loss": 0.4179, "step": 3897 }, { "epoch": 0.26631140260982444, "grad_norm": 4.864407062530518, "learning_rate": 6.024875674549037e-06, "loss": 0.4809, "step": 3898 }, { "epoch": 0.2663797226207556, "grad_norm": 5.29070520401001, "learning_rate": 6.024339251564071e-06, "loss": 0.4835, "step": 3899 }, { "epoch": 0.2664480426316868, "grad_norm": 4.03548526763916, "learning_rate": 6.023802704968908e-06, "loss": 0.5061, "step": 3900 }, { "epoch": 0.266516362642618, "grad_norm": 3.9541618824005127, "learning_rate": 6.02326603478982e-06, "loss": 0.4019, "step": 3901 }, { "epoch": 0.26658468265354923, "grad_norm": 3.1730947494506836, "learning_rate": 6.022729241053088e-06, "loss": 0.3767, "step": 3902 }, { "epoch": 0.26665300266448044, "grad_norm": 4.614483833312988, "learning_rate": 6.022192323784997e-06, "loss": 0.4323, "step": 3903 }, { "epoch": 0.26672132267541165, "grad_norm": 4.982420921325684, "learning_rate": 6.021655283011837e-06, "loss": 0.399, "step": 3904 }, { "epoch": 0.2667896426863428, "grad_norm": 4.151454448699951, "learning_rate": 6.0211181187599085e-06, "loss": 0.4932, "step": 3905 }, { "epoch": 0.266857962697274, "grad_norm": 3.492971658706665, "learning_rate": 6.020580831055511e-06, "loss": 0.3586, "step": 3906 }, { "epoch": 0.26692628270820523, "grad_norm": 4.299581527709961, "learning_rate": 6.020043419924958e-06, "loss": 0.4412, "step": 3907 }, { "epoch": 0.26699460271913644, "grad_norm": 4.101737022399902, "learning_rate": 6.019505885394563e-06, "loss": 0.3401, "step": 3908 }, { "epoch": 0.26706292273006765, "grad_norm": 3.631079912185669, "learning_rate": 6.018968227490648e-06, "loss": 0.3757, "step": 3909 }, { "epoch": 0.26713124274099886, "grad_norm": 4.693797588348389, "learning_rate": 6.018430446239539e-06, "loss": 0.3926, "step": 3910 }, { "epoch": 0.26719956275193, "grad_norm": 3.149754047393799, "learning_rate": 6.0178925416675725e-06, "loss": 0.2532, "step": 3911 }, { "epoch": 0.26726788276286123, "grad_norm": 2.867558717727661, "learning_rate": 6.017354513801088e-06, "loss": 0.3071, "step": 3912 }, { "epoch": 0.26733620277379244, "grad_norm": 5.620395183563232, "learning_rate": 6.01681636266643e-06, "loss": 0.5212, "step": 3913 }, { "epoch": 0.26740452278472365, "grad_norm": 4.014864444732666, "learning_rate": 6.016278088289949e-06, "loss": 0.4195, "step": 3914 }, { "epoch": 0.26747284279565486, "grad_norm": 3.012127637863159, "learning_rate": 6.0157396906980064e-06, "loss": 0.3534, "step": 3915 }, { "epoch": 0.2675411628065861, "grad_norm": 4.195074081420898, "learning_rate": 6.015201169916964e-06, "loss": 0.4823, "step": 3916 }, { "epoch": 0.26760948281751723, "grad_norm": 4.238467693328857, "learning_rate": 6.0146625259731906e-06, "loss": 0.4234, "step": 3917 }, { "epoch": 0.26767780282844844, "grad_norm": 3.895524740219116, "learning_rate": 6.0141237588930655e-06, "loss": 0.3979, "step": 3918 }, { "epoch": 0.26774612283937965, "grad_norm": 3.5859487056732178, "learning_rate": 6.013584868702968e-06, "loss": 0.3476, "step": 3919 }, { "epoch": 0.26781444285031086, "grad_norm": 3.8402326107025146, "learning_rate": 6.013045855429286e-06, "loss": 0.4866, "step": 3920 }, { "epoch": 0.2678827628612421, "grad_norm": 4.252267837524414, "learning_rate": 6.012506719098417e-06, "loss": 0.3926, "step": 3921 }, { "epoch": 0.2679510828721733, "grad_norm": 4.379078388214111, "learning_rate": 6.011967459736755e-06, "loss": 0.2948, "step": 3922 }, { "epoch": 0.26801940288310444, "grad_norm": 3.8912181854248047, "learning_rate": 6.011428077370712e-06, "loss": 0.4048, "step": 3923 }, { "epoch": 0.26808772289403565, "grad_norm": 4.282443523406982, "learning_rate": 6.010888572026698e-06, "loss": 0.436, "step": 3924 }, { "epoch": 0.26815604290496686, "grad_norm": 3.798764705657959, "learning_rate": 6.01034894373113e-06, "loss": 0.4213, "step": 3925 }, { "epoch": 0.2682243629158981, "grad_norm": 4.057993412017822, "learning_rate": 6.009809192510434e-06, "loss": 0.4214, "step": 3926 }, { "epoch": 0.2682926829268293, "grad_norm": 3.8071820735931396, "learning_rate": 6.009269318391038e-06, "loss": 0.3147, "step": 3927 }, { "epoch": 0.2683610029377605, "grad_norm": 4.521738052368164, "learning_rate": 6.00872932139938e-06, "loss": 0.3961, "step": 3928 }, { "epoch": 0.26842932294869165, "grad_norm": 4.525043964385986, "learning_rate": 6.008189201561902e-06, "loss": 0.3905, "step": 3929 }, { "epoch": 0.26849764295962286, "grad_norm": 3.4338200092315674, "learning_rate": 6.007648958905051e-06, "loss": 0.371, "step": 3930 }, { "epoch": 0.2685659629705541, "grad_norm": 4.65214729309082, "learning_rate": 6.007108593455283e-06, "loss": 0.391, "step": 3931 }, { "epoch": 0.2686342829814853, "grad_norm": 4.656839370727539, "learning_rate": 6.006568105239057e-06, "loss": 0.4402, "step": 3932 }, { "epoch": 0.2687026029924165, "grad_norm": 3.7001450061798096, "learning_rate": 6.00602749428284e-06, "loss": 0.3999, "step": 3933 }, { "epoch": 0.2687709230033477, "grad_norm": 4.101143836975098, "learning_rate": 6.005486760613103e-06, "loss": 0.4391, "step": 3934 }, { "epoch": 0.26883924301427886, "grad_norm": 3.521440029144287, "learning_rate": 6.004945904256327e-06, "loss": 0.4298, "step": 3935 }, { "epoch": 0.2689075630252101, "grad_norm": 3.1421468257904053, "learning_rate": 6.004404925238993e-06, "loss": 0.3594, "step": 3936 }, { "epoch": 0.2689758830361413, "grad_norm": 3.464620351791382, "learning_rate": 6.003863823587594e-06, "loss": 0.358, "step": 3937 }, { "epoch": 0.2690442030470725, "grad_norm": 4.593307971954346, "learning_rate": 6.003322599328626e-06, "loss": 0.3964, "step": 3938 }, { "epoch": 0.2691125230580037, "grad_norm": 4.1917924880981445, "learning_rate": 6.002781252488589e-06, "loss": 0.4587, "step": 3939 }, { "epoch": 0.2691808430689349, "grad_norm": 3.7296736240386963, "learning_rate": 6.002239783093993e-06, "loss": 0.3647, "step": 3940 }, { "epoch": 0.26924916307986607, "grad_norm": 3.1230857372283936, "learning_rate": 6.001698191171352e-06, "loss": 0.3527, "step": 3941 }, { "epoch": 0.2693174830907973, "grad_norm": 3.248260498046875, "learning_rate": 6.001156476747187e-06, "loss": 0.3882, "step": 3942 }, { "epoch": 0.2693858031017285, "grad_norm": 4.631838798522949, "learning_rate": 6.000614639848024e-06, "loss": 0.3806, "step": 3943 }, { "epoch": 0.2694541231126597, "grad_norm": 4.646344184875488, "learning_rate": 6.000072680500394e-06, "loss": 0.5072, "step": 3944 }, { "epoch": 0.2695224431235909, "grad_norm": 4.73347806930542, "learning_rate": 5.999530598730837e-06, "loss": 0.3993, "step": 3945 }, { "epoch": 0.2695907631345221, "grad_norm": 4.559440612792969, "learning_rate": 5.998988394565897e-06, "loss": 0.5024, "step": 3946 }, { "epoch": 0.2696590831454533, "grad_norm": 4.455270767211914, "learning_rate": 5.9984460680321244e-06, "loss": 0.4105, "step": 3947 }, { "epoch": 0.2697274031563845, "grad_norm": 4.81535530090332, "learning_rate": 5.997903619156075e-06, "loss": 0.4154, "step": 3948 }, { "epoch": 0.2697957231673157, "grad_norm": 3.1323323249816895, "learning_rate": 5.997361047964312e-06, "loss": 0.3347, "step": 3949 }, { "epoch": 0.2698640431782469, "grad_norm": 4.973820209503174, "learning_rate": 5.9968183544834e-06, "loss": 0.4254, "step": 3950 }, { "epoch": 0.2699323631891781, "grad_norm": 4.505687236785889, "learning_rate": 5.9962755387399185e-06, "loss": 0.3442, "step": 3951 }, { "epoch": 0.27000068320010934, "grad_norm": 4.981281280517578, "learning_rate": 5.995732600760445e-06, "loss": 0.5047, "step": 3952 }, { "epoch": 0.2700690032110405, "grad_norm": 3.8025949001312256, "learning_rate": 5.9951895405715655e-06, "loss": 0.3801, "step": 3953 }, { "epoch": 0.2701373232219717, "grad_norm": 4.2140092849731445, "learning_rate": 5.9946463581998735e-06, "loss": 0.4045, "step": 3954 }, { "epoch": 0.2702056432329029, "grad_norm": 4.435161590576172, "learning_rate": 5.994103053671966e-06, "loss": 0.4467, "step": 3955 }, { "epoch": 0.2702739632438341, "grad_norm": 5.711728096008301, "learning_rate": 5.9935596270144485e-06, "loss": 0.4059, "step": 3956 }, { "epoch": 0.27034228325476534, "grad_norm": 3.591245651245117, "learning_rate": 5.99301607825393e-06, "loss": 0.3668, "step": 3957 }, { "epoch": 0.27041060326569655, "grad_norm": 3.740145444869995, "learning_rate": 5.992472407417026e-06, "loss": 0.4518, "step": 3958 }, { "epoch": 0.2704789232766277, "grad_norm": 3.345092535018921, "learning_rate": 5.991928614530361e-06, "loss": 0.4656, "step": 3959 }, { "epoch": 0.2705472432875589, "grad_norm": 3.4404900074005127, "learning_rate": 5.9913846996205605e-06, "loss": 0.4127, "step": 3960 }, { "epoch": 0.2706155632984901, "grad_norm": 4.636463165283203, "learning_rate": 5.990840662714261e-06, "loss": 0.4286, "step": 3961 }, { "epoch": 0.27068388330942134, "grad_norm": 4.071545600891113, "learning_rate": 5.990296503838101e-06, "loss": 0.5073, "step": 3962 }, { "epoch": 0.27075220332035255, "grad_norm": 3.664869546890259, "learning_rate": 5.989752223018727e-06, "loss": 0.3954, "step": 3963 }, { "epoch": 0.27082052333128376, "grad_norm": 3.978952646255493, "learning_rate": 5.989207820282791e-06, "loss": 0.4121, "step": 3964 }, { "epoch": 0.2708888433422149, "grad_norm": 3.954744577407837, "learning_rate": 5.98866329565695e-06, "loss": 0.3974, "step": 3965 }, { "epoch": 0.2709571633531461, "grad_norm": 3.2332208156585693, "learning_rate": 5.98811864916787e-06, "loss": 0.3448, "step": 3966 }, { "epoch": 0.27102548336407734, "grad_norm": 3.9479196071624756, "learning_rate": 5.987573880842219e-06, "loss": 0.4082, "step": 3967 }, { "epoch": 0.27109380337500855, "grad_norm": 2.8715693950653076, "learning_rate": 5.987028990706672e-06, "loss": 0.2987, "step": 3968 }, { "epoch": 0.27116212338593976, "grad_norm": 4.17053747177124, "learning_rate": 5.986483978787915e-06, "loss": 0.5156, "step": 3969 }, { "epoch": 0.27123044339687097, "grad_norm": 3.50920033454895, "learning_rate": 5.98593884511263e-06, "loss": 0.3869, "step": 3970 }, { "epoch": 0.2712987634078021, "grad_norm": 5.979689121246338, "learning_rate": 5.985393589707516e-06, "loss": 0.391, "step": 3971 }, { "epoch": 0.27136708341873333, "grad_norm": 2.4963412284851074, "learning_rate": 5.9848482125992706e-06, "loss": 0.2626, "step": 3972 }, { "epoch": 0.27143540342966455, "grad_norm": 5.222360610961914, "learning_rate": 5.984302713814599e-06, "loss": 0.4671, "step": 3973 }, { "epoch": 0.27150372344059576, "grad_norm": 4.208815097808838, "learning_rate": 5.983757093380212e-06, "loss": 0.3902, "step": 3974 }, { "epoch": 0.27157204345152697, "grad_norm": 2.9504706859588623, "learning_rate": 5.9832113513228305e-06, "loss": 0.3271, "step": 3975 }, { "epoch": 0.2716403634624582, "grad_norm": 4.051161289215088, "learning_rate": 5.982665487669176e-06, "loss": 0.4247, "step": 3976 }, { "epoch": 0.27170868347338933, "grad_norm": 3.8801076412200928, "learning_rate": 5.982119502445976e-06, "loss": 0.4977, "step": 3977 }, { "epoch": 0.27177700348432055, "grad_norm": 4.338613510131836, "learning_rate": 5.981573395679969e-06, "loss": 0.4151, "step": 3978 }, { "epoch": 0.27184532349525176, "grad_norm": 3.7249631881713867, "learning_rate": 5.981027167397895e-06, "loss": 0.3847, "step": 3979 }, { "epoch": 0.27191364350618297, "grad_norm": 4.366867542266846, "learning_rate": 5.980480817626503e-06, "loss": 0.3557, "step": 3980 }, { "epoch": 0.2719819635171142, "grad_norm": 4.118197441101074, "learning_rate": 5.979934346392544e-06, "loss": 0.4737, "step": 3981 }, { "epoch": 0.2720502835280454, "grad_norm": 3.81136417388916, "learning_rate": 5.979387753722779e-06, "loss": 0.4523, "step": 3982 }, { "epoch": 0.27211860353897654, "grad_norm": 3.9353830814361572, "learning_rate": 5.978841039643971e-06, "loss": 0.4083, "step": 3983 }, { "epoch": 0.27218692354990776, "grad_norm": 4.292784690856934, "learning_rate": 5.978294204182893e-06, "loss": 0.3444, "step": 3984 }, { "epoch": 0.27225524356083897, "grad_norm": 3.587226152420044, "learning_rate": 5.977747247366323e-06, "loss": 0.372, "step": 3985 }, { "epoch": 0.2723235635717702, "grad_norm": 3.9813737869262695, "learning_rate": 5.977200169221041e-06, "loss": 0.3592, "step": 3986 }, { "epoch": 0.2723918835827014, "grad_norm": 4.984121322631836, "learning_rate": 5.976652969773838e-06, "loss": 0.4186, "step": 3987 }, { "epoch": 0.2724602035936326, "grad_norm": 3.8172125816345215, "learning_rate": 5.976105649051509e-06, "loss": 0.3131, "step": 3988 }, { "epoch": 0.27252852360456375, "grad_norm": 4.419304847717285, "learning_rate": 5.975558207080854e-06, "loss": 0.4111, "step": 3989 }, { "epoch": 0.27259684361549497, "grad_norm": 4.462096214294434, "learning_rate": 5.975010643888681e-06, "loss": 0.3866, "step": 3990 }, { "epoch": 0.2726651636264262, "grad_norm": 3.2255284786224365, "learning_rate": 5.974462959501799e-06, "loss": 0.3003, "step": 3991 }, { "epoch": 0.2727334836373574, "grad_norm": 3.9739387035369873, "learning_rate": 5.9739151539470336e-06, "loss": 0.3831, "step": 3992 }, { "epoch": 0.2728018036482886, "grad_norm": 2.6868112087249756, "learning_rate": 5.973367227251203e-06, "loss": 0.2811, "step": 3993 }, { "epoch": 0.2728701236592198, "grad_norm": 3.2263166904449463, "learning_rate": 5.972819179441141e-06, "loss": 0.3785, "step": 3994 }, { "epoch": 0.27293844367015097, "grad_norm": 3.538971185684204, "learning_rate": 5.972271010543682e-06, "loss": 0.3148, "step": 3995 }, { "epoch": 0.2730067636810822, "grad_norm": 3.860974073410034, "learning_rate": 5.97172272058567e-06, "loss": 0.3684, "step": 3996 }, { "epoch": 0.2730750836920134, "grad_norm": 3.2389132976531982, "learning_rate": 5.971174309593953e-06, "loss": 0.2988, "step": 3997 }, { "epoch": 0.2731434037029446, "grad_norm": 4.375298500061035, "learning_rate": 5.970625777595384e-06, "loss": 0.3903, "step": 3998 }, { "epoch": 0.2732117237138758, "grad_norm": 3.9762816429138184, "learning_rate": 5.970077124616826e-06, "loss": 0.3913, "step": 3999 }, { "epoch": 0.273280043724807, "grad_norm": 4.401137828826904, "learning_rate": 5.969528350685141e-06, "loss": 0.4007, "step": 4000 }, { "epoch": 0.2733483637357382, "grad_norm": 5.666378021240234, "learning_rate": 5.968979455827204e-06, "loss": 0.3379, "step": 4001 }, { "epoch": 0.2734166837466694, "grad_norm": 3.301807403564453, "learning_rate": 5.968430440069893e-06, "loss": 0.454, "step": 4002 }, { "epoch": 0.2734850037576006, "grad_norm": 4.057171821594238, "learning_rate": 5.96788130344009e-06, "loss": 0.3687, "step": 4003 }, { "epoch": 0.2735533237685318, "grad_norm": 4.411857604980469, "learning_rate": 5.967332045964687e-06, "loss": 0.3859, "step": 4004 }, { "epoch": 0.273621643779463, "grad_norm": 4.1988725662231445, "learning_rate": 5.9667826676705775e-06, "loss": 0.4732, "step": 4005 }, { "epoch": 0.27368996379039423, "grad_norm": 5.294785022735596, "learning_rate": 5.966233168584664e-06, "loss": 0.3325, "step": 4006 }, { "epoch": 0.2737582838013254, "grad_norm": 4.037930488586426, "learning_rate": 5.965683548733855e-06, "loss": 0.4096, "step": 4007 }, { "epoch": 0.2738266038122566, "grad_norm": 3.0900495052337646, "learning_rate": 5.9651338081450625e-06, "loss": 0.3394, "step": 4008 }, { "epoch": 0.2738949238231878, "grad_norm": 6.28211784362793, "learning_rate": 5.964583946845206e-06, "loss": 0.4715, "step": 4009 }, { "epoch": 0.273963243834119, "grad_norm": 4.0605316162109375, "learning_rate": 5.964033964861211e-06, "loss": 0.3568, "step": 4010 }, { "epoch": 0.27403156384505023, "grad_norm": 4.407546520233154, "learning_rate": 5.96348386222001e-06, "loss": 0.3293, "step": 4011 }, { "epoch": 0.27409988385598144, "grad_norm": 4.304399013519287, "learning_rate": 5.9629336389485375e-06, "loss": 0.2723, "step": 4012 }, { "epoch": 0.2741682038669126, "grad_norm": 4.546482563018799, "learning_rate": 5.962383295073737e-06, "loss": 0.4405, "step": 4013 }, { "epoch": 0.2742365238778438, "grad_norm": 3.7906062602996826, "learning_rate": 5.961832830622559e-06, "loss": 0.2953, "step": 4014 }, { "epoch": 0.274304843888775, "grad_norm": 5.827200412750244, "learning_rate": 5.961282245621958e-06, "loss": 0.5494, "step": 4015 }, { "epoch": 0.27437316389970623, "grad_norm": 5.069851875305176, "learning_rate": 5.9607315400988945e-06, "loss": 0.4794, "step": 4016 }, { "epoch": 0.27444148391063744, "grad_norm": 4.842767715454102, "learning_rate": 5.960180714080334e-06, "loss": 0.4588, "step": 4017 }, { "epoch": 0.27450980392156865, "grad_norm": 4.30730676651001, "learning_rate": 5.95962976759325e-06, "loss": 0.5217, "step": 4018 }, { "epoch": 0.2745781239324998, "grad_norm": 4.086268424987793, "learning_rate": 5.95907870066462e-06, "loss": 0.4464, "step": 4019 }, { "epoch": 0.274646443943431, "grad_norm": 3.27728533744812, "learning_rate": 5.958527513321429e-06, "loss": 0.3518, "step": 4020 }, { "epoch": 0.27471476395436223, "grad_norm": 4.6117048263549805, "learning_rate": 5.957976205590667e-06, "loss": 0.3781, "step": 4021 }, { "epoch": 0.27478308396529344, "grad_norm": 4.115276336669922, "learning_rate": 5.9574247774993305e-06, "loss": 0.4065, "step": 4022 }, { "epoch": 0.27485140397622465, "grad_norm": 3.9994256496429443, "learning_rate": 5.956873229074421e-06, "loss": 0.4204, "step": 4023 }, { "epoch": 0.27491972398715586, "grad_norm": 4.365489959716797, "learning_rate": 5.956321560342947e-06, "loss": 0.474, "step": 4024 }, { "epoch": 0.274988043998087, "grad_norm": 3.075955390930176, "learning_rate": 5.95576977133192e-06, "loss": 0.3989, "step": 4025 }, { "epoch": 0.27505636400901823, "grad_norm": 4.446264743804932, "learning_rate": 5.955217862068363e-06, "loss": 0.3862, "step": 4026 }, { "epoch": 0.27512468401994944, "grad_norm": 4.215246200561523, "learning_rate": 5.9546658325793e-06, "loss": 0.435, "step": 4027 }, { "epoch": 0.27519300403088065, "grad_norm": 3.9906184673309326, "learning_rate": 5.954113682891762e-06, "loss": 0.3745, "step": 4028 }, { "epoch": 0.27526132404181186, "grad_norm": 4.435620307922363, "learning_rate": 5.953561413032786e-06, "loss": 0.389, "step": 4029 }, { "epoch": 0.27532964405274307, "grad_norm": 4.004550457000732, "learning_rate": 5.953009023029416e-06, "loss": 0.412, "step": 4030 }, { "epoch": 0.2753979640636742, "grad_norm": 3.6426687240600586, "learning_rate": 5.952456512908701e-06, "loss": 0.371, "step": 4031 }, { "epoch": 0.27546628407460544, "grad_norm": 4.03297233581543, "learning_rate": 5.951903882697696e-06, "loss": 0.3814, "step": 4032 }, { "epoch": 0.27553460408553665, "grad_norm": 4.798764228820801, "learning_rate": 5.951351132423462e-06, "loss": 0.4687, "step": 4033 }, { "epoch": 0.27560292409646786, "grad_norm": 4.0650129318237305, "learning_rate": 5.950798262113065e-06, "loss": 0.3086, "step": 4034 }, { "epoch": 0.27567124410739907, "grad_norm": 4.025753498077393, "learning_rate": 5.950245271793578e-06, "loss": 0.3581, "step": 4035 }, { "epoch": 0.2757395641183303, "grad_norm": 5.112277507781982, "learning_rate": 5.94969216149208e-06, "loss": 0.3328, "step": 4036 }, { "epoch": 0.27580788412926144, "grad_norm": 3.9666056632995605, "learning_rate": 5.949138931235653e-06, "loss": 0.3311, "step": 4037 }, { "epoch": 0.27587620414019265, "grad_norm": 4.809014320373535, "learning_rate": 5.9485855810513905e-06, "loss": 0.4105, "step": 4038 }, { "epoch": 0.27594452415112386, "grad_norm": 4.088703155517578, "learning_rate": 5.948032110966388e-06, "loss": 0.4753, "step": 4039 }, { "epoch": 0.27601284416205507, "grad_norm": 3.0994040966033936, "learning_rate": 5.947478521007745e-06, "loss": 0.3273, "step": 4040 }, { "epoch": 0.2760811641729863, "grad_norm": 3.8492801189422607, "learning_rate": 5.946924811202571e-06, "loss": 0.365, "step": 4041 }, { "epoch": 0.2761494841839175, "grad_norm": 3.312016487121582, "learning_rate": 5.9463709815779806e-06, "loss": 0.2601, "step": 4042 }, { "epoch": 0.27621780419484865, "grad_norm": 5.236012935638428, "learning_rate": 5.945817032161092e-06, "loss": 0.4803, "step": 4043 }, { "epoch": 0.27628612420577986, "grad_norm": 3.308568239212036, "learning_rate": 5.945262962979031e-06, "loss": 0.3568, "step": 4044 }, { "epoch": 0.27635444421671107, "grad_norm": 3.7556772232055664, "learning_rate": 5.944708774058929e-06, "loss": 0.3689, "step": 4045 }, { "epoch": 0.2764227642276423, "grad_norm": 3.451296806335449, "learning_rate": 5.944154465427922e-06, "loss": 0.3857, "step": 4046 }, { "epoch": 0.2764910842385735, "grad_norm": 4.0406494140625, "learning_rate": 5.943600037113155e-06, "loss": 0.4349, "step": 4047 }, { "epoch": 0.2765594042495047, "grad_norm": 2.9092438220977783, "learning_rate": 5.943045489141775e-06, "loss": 0.3418, "step": 4048 }, { "epoch": 0.27662772426043586, "grad_norm": 3.8136703968048096, "learning_rate": 5.94249082154094e-06, "loss": 0.4058, "step": 4049 }, { "epoch": 0.27669604427136707, "grad_norm": 3.565182685852051, "learning_rate": 5.941936034337806e-06, "loss": 0.395, "step": 4050 }, { "epoch": 0.2767643642822983, "grad_norm": 4.863437652587891, "learning_rate": 5.941381127559543e-06, "loss": 0.4517, "step": 4051 }, { "epoch": 0.2768326842932295, "grad_norm": 4.1873273849487305, "learning_rate": 5.940826101233321e-06, "loss": 0.3935, "step": 4052 }, { "epoch": 0.2769010043041607, "grad_norm": 5.151226043701172, "learning_rate": 5.940270955386321e-06, "loss": 0.4878, "step": 4053 }, { "epoch": 0.2769693243150919, "grad_norm": 3.4618871212005615, "learning_rate": 5.939715690045724e-06, "loss": 0.3929, "step": 4054 }, { "epoch": 0.27703764432602307, "grad_norm": 5.457845211029053, "learning_rate": 5.939160305238722e-06, "loss": 0.3836, "step": 4055 }, { "epoch": 0.2771059643369543, "grad_norm": 3.3931236267089844, "learning_rate": 5.938604800992508e-06, "loss": 0.4003, "step": 4056 }, { "epoch": 0.2771742843478855, "grad_norm": 4.548960208892822, "learning_rate": 5.938049177334287e-06, "loss": 0.3494, "step": 4057 }, { "epoch": 0.2772426043588167, "grad_norm": 3.878913402557373, "learning_rate": 5.937493434291265e-06, "loss": 0.432, "step": 4058 }, { "epoch": 0.2773109243697479, "grad_norm": 3.8147294521331787, "learning_rate": 5.936937571890655e-06, "loss": 0.48, "step": 4059 }, { "epoch": 0.2773792443806791, "grad_norm": 4.344320774078369, "learning_rate": 5.936381590159676e-06, "loss": 0.3408, "step": 4060 }, { "epoch": 0.2774475643916103, "grad_norm": 3.3566012382507324, "learning_rate": 5.935825489125553e-06, "loss": 0.3197, "step": 4061 }, { "epoch": 0.2775158844025415, "grad_norm": 3.52130389213562, "learning_rate": 5.935269268815517e-06, "loss": 0.3146, "step": 4062 }, { "epoch": 0.2775842044134727, "grad_norm": 3.727890729904175, "learning_rate": 5.934712929256805e-06, "loss": 0.3665, "step": 4063 }, { "epoch": 0.2776525244244039, "grad_norm": 4.036701679229736, "learning_rate": 5.934156470476658e-06, "loss": 0.345, "step": 4064 }, { "epoch": 0.2777208444353351, "grad_norm": 4.492201805114746, "learning_rate": 5.933599892502326e-06, "loss": 0.3707, "step": 4065 }, { "epoch": 0.27778916444626633, "grad_norm": 4.8561835289001465, "learning_rate": 5.933043195361063e-06, "loss": 0.3381, "step": 4066 }, { "epoch": 0.2778574844571975, "grad_norm": 3.6175174713134766, "learning_rate": 5.932486379080128e-06, "loss": 0.3555, "step": 4067 }, { "epoch": 0.2779258044681287, "grad_norm": 3.382176399230957, "learning_rate": 5.931929443686788e-06, "loss": 0.4236, "step": 4068 }, { "epoch": 0.2779941244790599, "grad_norm": 4.059696674346924, "learning_rate": 5.931372389208313e-06, "loss": 0.3591, "step": 4069 }, { "epoch": 0.2780624444899911, "grad_norm": 3.009996175765991, "learning_rate": 5.9308152156719824e-06, "loss": 0.2935, "step": 4070 }, { "epoch": 0.27813076450092233, "grad_norm": 7.007876873016357, "learning_rate": 5.9302579231050775e-06, "loss": 0.3701, "step": 4071 }, { "epoch": 0.27819908451185354, "grad_norm": 4.161542892456055, "learning_rate": 5.9297005115348895e-06, "loss": 0.4062, "step": 4072 }, { "epoch": 0.2782674045227847, "grad_norm": 4.082624912261963, "learning_rate": 5.929142980988712e-06, "loss": 0.3621, "step": 4073 }, { "epoch": 0.2783357245337159, "grad_norm": 3.37312388420105, "learning_rate": 5.928585331493845e-06, "loss": 0.3284, "step": 4074 }, { "epoch": 0.2784040445446471, "grad_norm": 4.77215051651001, "learning_rate": 5.928027563077599e-06, "loss": 0.3988, "step": 4075 }, { "epoch": 0.27847236455557833, "grad_norm": 4.88916540145874, "learning_rate": 5.927469675767281e-06, "loss": 0.4662, "step": 4076 }, { "epoch": 0.27854068456650954, "grad_norm": 4.254429817199707, "learning_rate": 5.926911669590214e-06, "loss": 0.4847, "step": 4077 }, { "epoch": 0.27860900457744076, "grad_norm": 3.4992520809173584, "learning_rate": 5.926353544573719e-06, "loss": 0.3726, "step": 4078 }, { "epoch": 0.2786773245883719, "grad_norm": 4.329829216003418, "learning_rate": 5.925795300745128e-06, "loss": 0.3163, "step": 4079 }, { "epoch": 0.2787456445993031, "grad_norm": 3.859572649002075, "learning_rate": 5.925236938131773e-06, "loss": 0.3881, "step": 4080 }, { "epoch": 0.27881396461023433, "grad_norm": 5.581212043762207, "learning_rate": 5.924678456761001e-06, "loss": 0.5128, "step": 4081 }, { "epoch": 0.27888228462116554, "grad_norm": 4.079759120941162, "learning_rate": 5.924119856660156e-06, "loss": 0.3953, "step": 4082 }, { "epoch": 0.27895060463209675, "grad_norm": 4.09417200088501, "learning_rate": 5.923561137856591e-06, "loss": 0.383, "step": 4083 }, { "epoch": 0.27901892464302797, "grad_norm": 4.203803539276123, "learning_rate": 5.923002300377666e-06, "loss": 0.418, "step": 4084 }, { "epoch": 0.2790872446539591, "grad_norm": 4.082980632781982, "learning_rate": 5.922443344250746e-06, "loss": 0.4236, "step": 4085 }, { "epoch": 0.27915556466489033, "grad_norm": 4.160694122314453, "learning_rate": 5.921884269503201e-06, "loss": 0.41, "step": 4086 }, { "epoch": 0.27922388467582154, "grad_norm": 3.598210573196411, "learning_rate": 5.921325076162407e-06, "loss": 0.3956, "step": 4087 }, { "epoch": 0.27929220468675275, "grad_norm": 4.3116021156311035, "learning_rate": 5.920765764255746e-06, "loss": 0.4947, "step": 4088 }, { "epoch": 0.27936052469768397, "grad_norm": 3.5066802501678467, "learning_rate": 5.9202063338106085e-06, "loss": 0.3496, "step": 4089 }, { "epoch": 0.2794288447086152, "grad_norm": 4.727373123168945, "learning_rate": 5.9196467848543855e-06, "loss": 0.4036, "step": 4090 }, { "epoch": 0.27949716471954633, "grad_norm": 3.904362440109253, "learning_rate": 5.919087117414478e-06, "loss": 0.3503, "step": 4091 }, { "epoch": 0.27956548473047754, "grad_norm": 3.1624224185943604, "learning_rate": 5.9185273315182905e-06, "loss": 0.3402, "step": 4092 }, { "epoch": 0.27963380474140875, "grad_norm": 3.673276901245117, "learning_rate": 5.917967427193237e-06, "loss": 0.318, "step": 4093 }, { "epoch": 0.27970212475233996, "grad_norm": 4.244298458099365, "learning_rate": 5.91740740446673e-06, "loss": 0.455, "step": 4094 }, { "epoch": 0.2797704447632712, "grad_norm": 3.880255937576294, "learning_rate": 5.916847263366195e-06, "loss": 0.4585, "step": 4095 }, { "epoch": 0.2798387647742024, "grad_norm": 3.3450660705566406, "learning_rate": 5.916287003919061e-06, "loss": 0.3793, "step": 4096 }, { "epoch": 0.27990708478513354, "grad_norm": 4.166553974151611, "learning_rate": 5.915726626152762e-06, "loss": 0.4269, "step": 4097 }, { "epoch": 0.27997540479606475, "grad_norm": 3.5903494358062744, "learning_rate": 5.915166130094738e-06, "loss": 0.4035, "step": 4098 }, { "epoch": 0.28004372480699596, "grad_norm": 3.1957125663757324, "learning_rate": 5.914605515772435e-06, "loss": 0.2892, "step": 4099 }, { "epoch": 0.2801120448179272, "grad_norm": 4.01315975189209, "learning_rate": 5.914044783213304e-06, "loss": 0.5144, "step": 4100 }, { "epoch": 0.2801803648288584, "grad_norm": 4.695545673370361, "learning_rate": 5.913483932444803e-06, "loss": 0.4083, "step": 4101 }, { "epoch": 0.2802486848397896, "grad_norm": 3.583845376968384, "learning_rate": 5.912922963494397e-06, "loss": 0.3035, "step": 4102 }, { "epoch": 0.28031700485072075, "grad_norm": 4.874661922454834, "learning_rate": 5.9123618763895535e-06, "loss": 0.38, "step": 4103 }, { "epoch": 0.28038532486165196, "grad_norm": 4.74495792388916, "learning_rate": 5.911800671157748e-06, "loss": 0.4005, "step": 4104 }, { "epoch": 0.2804536448725832, "grad_norm": 4.07474422454834, "learning_rate": 5.911239347826461e-06, "loss": 0.3664, "step": 4105 }, { "epoch": 0.2805219648835144, "grad_norm": 3.355548143386841, "learning_rate": 5.910677906423178e-06, "loss": 0.3537, "step": 4106 }, { "epoch": 0.2805902848944456, "grad_norm": 3.3912317752838135, "learning_rate": 5.9101163469753925e-06, "loss": 0.4292, "step": 4107 }, { "epoch": 0.2806586049053768, "grad_norm": 8.310935974121094, "learning_rate": 5.909554669510604e-06, "loss": 0.4209, "step": 4108 }, { "epoch": 0.28072692491630796, "grad_norm": 3.5823731422424316, "learning_rate": 5.908992874056314e-06, "loss": 0.3853, "step": 4109 }, { "epoch": 0.2807952449272392, "grad_norm": 4.4874267578125, "learning_rate": 5.908430960640034e-06, "loss": 0.4853, "step": 4110 }, { "epoch": 0.2808635649381704, "grad_norm": 3.6821279525756836, "learning_rate": 5.907868929289277e-06, "loss": 0.2652, "step": 4111 }, { "epoch": 0.2809318849491016, "grad_norm": 4.45242977142334, "learning_rate": 5.907306780031566e-06, "loss": 0.4681, "step": 4112 }, { "epoch": 0.2810002049600328, "grad_norm": 3.619967460632324, "learning_rate": 5.906744512894426e-06, "loss": 0.4694, "step": 4113 }, { "epoch": 0.281068524970964, "grad_norm": 4.1831374168396, "learning_rate": 5.906182127905394e-06, "loss": 0.4428, "step": 4114 }, { "epoch": 0.2811368449818952, "grad_norm": 5.436455249786377, "learning_rate": 5.905619625092004e-06, "loss": 0.4317, "step": 4115 }, { "epoch": 0.2812051649928264, "grad_norm": 4.421023368835449, "learning_rate": 5.905057004481803e-06, "loss": 0.3031, "step": 4116 }, { "epoch": 0.2812734850037576, "grad_norm": 3.9394142627716064, "learning_rate": 5.904494266102339e-06, "loss": 0.5171, "step": 4117 }, { "epoch": 0.2813418050146888, "grad_norm": 4.20835542678833, "learning_rate": 5.90393140998117e-06, "loss": 0.3678, "step": 4118 }, { "epoch": 0.28141012502562, "grad_norm": 3.9881036281585693, "learning_rate": 5.903368436145854e-06, "loss": 0.3253, "step": 4119 }, { "epoch": 0.28147844503655123, "grad_norm": 4.6313958168029785, "learning_rate": 5.902805344623964e-06, "loss": 0.4552, "step": 4120 }, { "epoch": 0.2815467650474824, "grad_norm": 3.230910539627075, "learning_rate": 5.902242135443068e-06, "loss": 0.3787, "step": 4121 }, { "epoch": 0.2816150850584136, "grad_norm": 7.164751052856445, "learning_rate": 5.901678808630747e-06, "loss": 0.3565, "step": 4122 }, { "epoch": 0.2816834050693448, "grad_norm": 4.096643924713135, "learning_rate": 5.901115364214586e-06, "loss": 0.436, "step": 4123 }, { "epoch": 0.281751725080276, "grad_norm": 4.176559925079346, "learning_rate": 5.9005518022221735e-06, "loss": 0.381, "step": 4124 }, { "epoch": 0.2818200450912072, "grad_norm": 4.339180946350098, "learning_rate": 5.899988122681107e-06, "loss": 0.4135, "step": 4125 }, { "epoch": 0.28188836510213844, "grad_norm": 4.654956340789795, "learning_rate": 5.8994243256189895e-06, "loss": 0.4215, "step": 4126 }, { "epoch": 0.2819566851130696, "grad_norm": 4.388355255126953, "learning_rate": 5.898860411063426e-06, "loss": 0.4503, "step": 4127 }, { "epoch": 0.2820250051240008, "grad_norm": 5.614105701446533, "learning_rate": 5.898296379042032e-06, "loss": 0.3869, "step": 4128 }, { "epoch": 0.282093325134932, "grad_norm": 4.155330657958984, "learning_rate": 5.897732229582426e-06, "loss": 0.414, "step": 4129 }, { "epoch": 0.2821616451458632, "grad_norm": 4.411938190460205, "learning_rate": 5.897167962712232e-06, "loss": 0.3437, "step": 4130 }, { "epoch": 0.28222996515679444, "grad_norm": 4.134944915771484, "learning_rate": 5.896603578459083e-06, "loss": 0.5194, "step": 4131 }, { "epoch": 0.28229828516772565, "grad_norm": 4.119423866271973, "learning_rate": 5.896039076850613e-06, "loss": 0.5178, "step": 4132 }, { "epoch": 0.2823666051786568, "grad_norm": 3.7462756633758545, "learning_rate": 5.895474457914465e-06, "loss": 0.3861, "step": 4133 }, { "epoch": 0.282434925189588, "grad_norm": 4.387585639953613, "learning_rate": 5.894909721678287e-06, "loss": 0.4193, "step": 4134 }, { "epoch": 0.2825032452005192, "grad_norm": 4.229567050933838, "learning_rate": 5.894344868169733e-06, "loss": 0.481, "step": 4135 }, { "epoch": 0.28257156521145044, "grad_norm": 4.38010835647583, "learning_rate": 5.893779897416462e-06, "loss": 0.7051, "step": 4136 }, { "epoch": 0.28263988522238165, "grad_norm": 3.0806803703308105, "learning_rate": 5.893214809446137e-06, "loss": 0.3988, "step": 4137 }, { "epoch": 0.28270820523331286, "grad_norm": 4.480249404907227, "learning_rate": 5.892649604286434e-06, "loss": 0.3396, "step": 4138 }, { "epoch": 0.282776525244244, "grad_norm": 4.304830551147461, "learning_rate": 5.892084281965024e-06, "loss": 0.4163, "step": 4139 }, { "epoch": 0.2828448452551752, "grad_norm": 3.155257225036621, "learning_rate": 5.8915188425095934e-06, "loss": 0.4234, "step": 4140 }, { "epoch": 0.28291316526610644, "grad_norm": 3.4109840393066406, "learning_rate": 5.890953285947828e-06, "loss": 0.3048, "step": 4141 }, { "epoch": 0.28298148527703765, "grad_norm": 2.8869242668151855, "learning_rate": 5.890387612307423e-06, "loss": 0.3628, "step": 4142 }, { "epoch": 0.28304980528796886, "grad_norm": 4.058587074279785, "learning_rate": 5.889821821616076e-06, "loss": 0.4841, "step": 4143 }, { "epoch": 0.28311812529890007, "grad_norm": 3.9690732955932617, "learning_rate": 5.889255913901494e-06, "loss": 0.3947, "step": 4144 }, { "epoch": 0.2831864453098312, "grad_norm": 4.751044273376465, "learning_rate": 5.888689889191388e-06, "loss": 0.4491, "step": 4145 }, { "epoch": 0.28325476532076244, "grad_norm": 3.6536011695861816, "learning_rate": 5.888123747513475e-06, "loss": 0.4282, "step": 4146 }, { "epoch": 0.28332308533169365, "grad_norm": 4.790407657623291, "learning_rate": 5.887557488895475e-06, "loss": 0.4733, "step": 4147 }, { "epoch": 0.28339140534262486, "grad_norm": 3.7172799110412598, "learning_rate": 5.8869911133651195e-06, "loss": 0.3982, "step": 4148 }, { "epoch": 0.28345972535355607, "grad_norm": 4.181422710418701, "learning_rate": 5.88642462095014e-06, "loss": 0.5058, "step": 4149 }, { "epoch": 0.2835280453644873, "grad_norm": 4.060210227966309, "learning_rate": 5.885858011678277e-06, "loss": 0.4392, "step": 4150 }, { "epoch": 0.28359636537541844, "grad_norm": 3.9166455268859863, "learning_rate": 5.885291285577276e-06, "loss": 0.4316, "step": 4151 }, { "epoch": 0.28366468538634965, "grad_norm": 3.7107722759246826, "learning_rate": 5.884724442674887e-06, "loss": 0.4942, "step": 4152 }, { "epoch": 0.28373300539728086, "grad_norm": 2.8987317085266113, "learning_rate": 5.884157482998869e-06, "loss": 0.2465, "step": 4153 }, { "epoch": 0.28380132540821207, "grad_norm": 2.9696524143218994, "learning_rate": 5.883590406576983e-06, "loss": 0.3518, "step": 4154 }, { "epoch": 0.2838696454191433, "grad_norm": 3.32281756401062, "learning_rate": 5.883023213436996e-06, "loss": 0.3108, "step": 4155 }, { "epoch": 0.2839379654300745, "grad_norm": 4.079299449920654, "learning_rate": 5.882455903606685e-06, "loss": 0.383, "step": 4156 }, { "epoch": 0.28400628544100565, "grad_norm": 3.898259401321411, "learning_rate": 5.881888477113827e-06, "loss": 0.3277, "step": 4157 }, { "epoch": 0.28407460545193686, "grad_norm": 4.352470874786377, "learning_rate": 5.8813209339862084e-06, "loss": 0.3767, "step": 4158 }, { "epoch": 0.28414292546286807, "grad_norm": 4.791748046875, "learning_rate": 5.880753274251621e-06, "loss": 0.5498, "step": 4159 }, { "epoch": 0.2842112454737993, "grad_norm": 4.0800089836120605, "learning_rate": 5.88018549793786e-06, "loss": 0.4176, "step": 4160 }, { "epoch": 0.2842795654847305, "grad_norm": 3.3662829399108887, "learning_rate": 5.8796176050727294e-06, "loss": 0.3163, "step": 4161 }, { "epoch": 0.2843478854956617, "grad_norm": 4.105982780456543, "learning_rate": 5.879049595684035e-06, "loss": 0.4438, "step": 4162 }, { "epoch": 0.28441620550659286, "grad_norm": 3.5258655548095703, "learning_rate": 5.878481469799595e-06, "loss": 0.3928, "step": 4163 }, { "epoch": 0.28448452551752407, "grad_norm": 4.5570173263549805, "learning_rate": 5.877913227447225e-06, "loss": 0.3904, "step": 4164 }, { "epoch": 0.2845528455284553, "grad_norm": 3.8273723125457764, "learning_rate": 5.877344868654752e-06, "loss": 0.3288, "step": 4165 }, { "epoch": 0.2846211655393865, "grad_norm": 4.763164520263672, "learning_rate": 5.876776393450008e-06, "loss": 0.4547, "step": 4166 }, { "epoch": 0.2846894855503177, "grad_norm": 3.707573175430298, "learning_rate": 5.876207801860827e-06, "loss": 0.3137, "step": 4167 }, { "epoch": 0.2847578055612489, "grad_norm": 5.234945774078369, "learning_rate": 5.8756390939150535e-06, "loss": 0.4322, "step": 4168 }, { "epoch": 0.28482612557218007, "grad_norm": 3.462317705154419, "learning_rate": 5.875070269640536e-06, "loss": 0.3675, "step": 4169 }, { "epoch": 0.2848944455831113, "grad_norm": 2.9132893085479736, "learning_rate": 5.874501329065126e-06, "loss": 0.3075, "step": 4170 }, { "epoch": 0.2849627655940425, "grad_norm": 5.364025592803955, "learning_rate": 5.873932272216685e-06, "loss": 0.438, "step": 4171 }, { "epoch": 0.2850310856049737, "grad_norm": 4.172837734222412, "learning_rate": 5.873363099123078e-06, "loss": 0.4059, "step": 4172 }, { "epoch": 0.2850994056159049, "grad_norm": 3.1678593158721924, "learning_rate": 5.872793809812175e-06, "loss": 0.3237, "step": 4173 }, { "epoch": 0.2851677256268361, "grad_norm": 3.2247049808502197, "learning_rate": 5.872224404311852e-06, "loss": 0.342, "step": 4174 }, { "epoch": 0.2852360456377673, "grad_norm": 4.509390354156494, "learning_rate": 5.871654882649995e-06, "loss": 0.5703, "step": 4175 }, { "epoch": 0.2853043656486985, "grad_norm": 4.26711893081665, "learning_rate": 5.8710852448544875e-06, "loss": 0.3791, "step": 4176 }, { "epoch": 0.2853726856596297, "grad_norm": 5.583741188049316, "learning_rate": 5.870515490953226e-06, "loss": 0.5357, "step": 4177 }, { "epoch": 0.2854410056705609, "grad_norm": 4.253556728363037, "learning_rate": 5.869945620974108e-06, "loss": 0.4636, "step": 4178 }, { "epoch": 0.2855093256814921, "grad_norm": 3.5600693225860596, "learning_rate": 5.869375634945039e-06, "loss": 0.3427, "step": 4179 }, { "epoch": 0.28557764569242333, "grad_norm": 3.877490997314453, "learning_rate": 5.868805532893931e-06, "loss": 0.3625, "step": 4180 }, { "epoch": 0.2856459657033545, "grad_norm": 4.069847106933594, "learning_rate": 5.8682353148487e-06, "loss": 0.4122, "step": 4181 }, { "epoch": 0.2857142857142857, "grad_norm": 3.36189866065979, "learning_rate": 5.867664980837266e-06, "loss": 0.4316, "step": 4182 }, { "epoch": 0.2857826057252169, "grad_norm": 4.067906379699707, "learning_rate": 5.86709453088756e-06, "loss": 0.3375, "step": 4183 }, { "epoch": 0.2858509257361481, "grad_norm": 3.5665764808654785, "learning_rate": 5.866523965027512e-06, "loss": 0.336, "step": 4184 }, { "epoch": 0.28591924574707933, "grad_norm": 3.808854579925537, "learning_rate": 5.865953283285064e-06, "loss": 0.4411, "step": 4185 }, { "epoch": 0.28598756575801054, "grad_norm": 3.5181496143341064, "learning_rate": 5.865382485688159e-06, "loss": 0.3285, "step": 4186 }, { "epoch": 0.2860558857689417, "grad_norm": 3.1880855560302734, "learning_rate": 5.864811572264748e-06, "loss": 0.3301, "step": 4187 }, { "epoch": 0.2861242057798729, "grad_norm": 4.022672176361084, "learning_rate": 5.864240543042786e-06, "loss": 0.4518, "step": 4188 }, { "epoch": 0.2861925257908041, "grad_norm": 4.120712757110596, "learning_rate": 5.863669398050237e-06, "loss": 0.432, "step": 4189 }, { "epoch": 0.28626084580173533, "grad_norm": 3.0320956707000732, "learning_rate": 5.863098137315068e-06, "loss": 0.3327, "step": 4190 }, { "epoch": 0.28632916581266654, "grad_norm": 3.3596231937408447, "learning_rate": 5.862526760865251e-06, "loss": 0.3463, "step": 4191 }, { "epoch": 0.28639748582359775, "grad_norm": 3.1733038425445557, "learning_rate": 5.861955268728765e-06, "loss": 0.2676, "step": 4192 }, { "epoch": 0.2864658058345289, "grad_norm": 3.3687756061553955, "learning_rate": 5.8613836609335946e-06, "loss": 0.327, "step": 4193 }, { "epoch": 0.2865341258454601, "grad_norm": 4.570694446563721, "learning_rate": 5.86081193750773e-06, "loss": 0.4546, "step": 4194 }, { "epoch": 0.28660244585639133, "grad_norm": 3.56022047996521, "learning_rate": 5.860240098479167e-06, "loss": 0.3404, "step": 4195 }, { "epoch": 0.28667076586732254, "grad_norm": 3.772698163986206, "learning_rate": 5.859668143875908e-06, "loss": 0.3853, "step": 4196 }, { "epoch": 0.28673908587825375, "grad_norm": 3.762181282043457, "learning_rate": 5.859096073725958e-06, "loss": 0.3481, "step": 4197 }, { "epoch": 0.28680740588918496, "grad_norm": 4.0736775398254395, "learning_rate": 5.858523888057331e-06, "loss": 0.3713, "step": 4198 }, { "epoch": 0.2868757259001161, "grad_norm": 4.925954341888428, "learning_rate": 5.857951586898046e-06, "loss": 0.4838, "step": 4199 }, { "epoch": 0.28694404591104733, "grad_norm": 4.780118465423584, "learning_rate": 5.857379170276126e-06, "loss": 0.3692, "step": 4200 }, { "epoch": 0.28701236592197854, "grad_norm": 5.135120391845703, "learning_rate": 5.856806638219602e-06, "loss": 0.3708, "step": 4201 }, { "epoch": 0.28708068593290975, "grad_norm": 4.683544158935547, "learning_rate": 5.856233990756507e-06, "loss": 0.325, "step": 4202 }, { "epoch": 0.28714900594384096, "grad_norm": 4.819493770599365, "learning_rate": 5.855661227914884e-06, "loss": 0.4347, "step": 4203 }, { "epoch": 0.2872173259547722, "grad_norm": 4.3189616203308105, "learning_rate": 5.855088349722779e-06, "loss": 0.3861, "step": 4204 }, { "epoch": 0.28728564596570333, "grad_norm": 4.401467800140381, "learning_rate": 5.854515356208245e-06, "loss": 0.3586, "step": 4205 }, { "epoch": 0.28735396597663454, "grad_norm": 4.007730007171631, "learning_rate": 5.853942247399339e-06, "loss": 0.3792, "step": 4206 }, { "epoch": 0.28742228598756575, "grad_norm": 3.088336229324341, "learning_rate": 5.853369023324126e-06, "loss": 0.3556, "step": 4207 }, { "epoch": 0.28749060599849696, "grad_norm": 4.5559163093566895, "learning_rate": 5.852795684010674e-06, "loss": 0.3209, "step": 4208 }, { "epoch": 0.2875589260094282, "grad_norm": 4.164216041564941, "learning_rate": 5.852222229487058e-06, "loss": 0.3207, "step": 4209 }, { "epoch": 0.2876272460203594, "grad_norm": 4.633236885070801, "learning_rate": 5.8516486597813595e-06, "loss": 0.4397, "step": 4210 }, { "epoch": 0.28769556603129054, "grad_norm": 4.501002788543701, "learning_rate": 5.851074974921663e-06, "loss": 0.5369, "step": 4211 }, { "epoch": 0.28776388604222175, "grad_norm": 5.181503772735596, "learning_rate": 5.850501174936062e-06, "loss": 0.4811, "step": 4212 }, { "epoch": 0.28783220605315296, "grad_norm": 4.195122241973877, "learning_rate": 5.849927259852653e-06, "loss": 0.3805, "step": 4213 }, { "epoch": 0.2879005260640842, "grad_norm": 3.689119577407837, "learning_rate": 5.84935322969954e-06, "loss": 0.3004, "step": 4214 }, { "epoch": 0.2879688460750154, "grad_norm": 3.9637255668640137, "learning_rate": 5.8487790845048305e-06, "loss": 0.3673, "step": 4215 }, { "epoch": 0.2880371660859466, "grad_norm": 3.632675886154175, "learning_rate": 5.848204824296639e-06, "loss": 0.3242, "step": 4216 }, { "epoch": 0.28810548609687775, "grad_norm": 4.313445568084717, "learning_rate": 5.847630449103088e-06, "loss": 0.4603, "step": 4217 }, { "epoch": 0.28817380610780896, "grad_norm": 4.391641616821289, "learning_rate": 5.847055958952299e-06, "loss": 0.3673, "step": 4218 }, { "epoch": 0.28824212611874017, "grad_norm": 3.9395840167999268, "learning_rate": 5.846481353872406e-06, "loss": 0.4147, "step": 4219 }, { "epoch": 0.2883104461296714, "grad_norm": 4.8522467613220215, "learning_rate": 5.845906633891546e-06, "loss": 0.3887, "step": 4220 }, { "epoch": 0.2883787661406026, "grad_norm": 5.004664421081543, "learning_rate": 5.84533179903786e-06, "loss": 0.5596, "step": 4221 }, { "epoch": 0.2884470861515338, "grad_norm": 4.028304100036621, "learning_rate": 5.8447568493394976e-06, "loss": 0.4739, "step": 4222 }, { "epoch": 0.28851540616246496, "grad_norm": 4.717967510223389, "learning_rate": 5.844181784824611e-06, "loss": 0.4205, "step": 4223 }, { "epoch": 0.28858372617339617, "grad_norm": 3.3026862144470215, "learning_rate": 5.843606605521361e-06, "loss": 0.345, "step": 4224 }, { "epoch": 0.2886520461843274, "grad_norm": 3.4881958961486816, "learning_rate": 5.843031311457912e-06, "loss": 0.4623, "step": 4225 }, { "epoch": 0.2887203661952586, "grad_norm": 4.077934741973877, "learning_rate": 5.8424559026624355e-06, "loss": 0.3935, "step": 4226 }, { "epoch": 0.2887886862061898, "grad_norm": 5.0634446144104, "learning_rate": 5.841880379163106e-06, "loss": 0.3868, "step": 4227 }, { "epoch": 0.288857006217121, "grad_norm": 7.3377580642700195, "learning_rate": 5.841304740988106e-06, "loss": 0.4348, "step": 4228 }, { "epoch": 0.28892532622805217, "grad_norm": 4.6566901206970215, "learning_rate": 5.840728988165625e-06, "loss": 0.3832, "step": 4229 }, { "epoch": 0.2889936462389834, "grad_norm": 3.8757200241088867, "learning_rate": 5.840153120723853e-06, "loss": 0.4334, "step": 4230 }, { "epoch": 0.2890619662499146, "grad_norm": 4.105249881744385, "learning_rate": 5.839577138690991e-06, "loss": 0.3488, "step": 4231 }, { "epoch": 0.2891302862608458, "grad_norm": 4.112967491149902, "learning_rate": 5.839001042095241e-06, "loss": 0.4071, "step": 4232 }, { "epoch": 0.289198606271777, "grad_norm": 4.810210227966309, "learning_rate": 5.838424830964815e-06, "loss": 0.3602, "step": 4233 }, { "epoch": 0.2892669262827082, "grad_norm": 2.7659013271331787, "learning_rate": 5.8378485053279274e-06, "loss": 0.3582, "step": 4234 }, { "epoch": 0.2893352462936394, "grad_norm": 3.4620718955993652, "learning_rate": 5.8372720652128e-06, "loss": 0.4325, "step": 4235 }, { "epoch": 0.2894035663045706, "grad_norm": 4.1592583656311035, "learning_rate": 5.8366955106476596e-06, "loss": 0.4317, "step": 4236 }, { "epoch": 0.2894718863155018, "grad_norm": 5.128536701202393, "learning_rate": 5.836118841660738e-06, "loss": 0.3786, "step": 4237 }, { "epoch": 0.289540206326433, "grad_norm": 4.038158893585205, "learning_rate": 5.835542058280273e-06, "loss": 0.3945, "step": 4238 }, { "epoch": 0.2896085263373642, "grad_norm": 3.7982187271118164, "learning_rate": 5.83496516053451e-06, "loss": 0.3452, "step": 4239 }, { "epoch": 0.28967684634829544, "grad_norm": 4.209968090057373, "learning_rate": 5.834388148451696e-06, "loss": 0.3854, "step": 4240 }, { "epoch": 0.2897451663592266, "grad_norm": 2.2599573135375977, "learning_rate": 5.833811022060086e-06, "loss": 0.278, "step": 4241 }, { "epoch": 0.2898134863701578, "grad_norm": 3.724766492843628, "learning_rate": 5.833233781387942e-06, "loss": 0.358, "step": 4242 }, { "epoch": 0.289881806381089, "grad_norm": 3.8336493968963623, "learning_rate": 5.832656426463527e-06, "loss": 0.3818, "step": 4243 }, { "epoch": 0.2899501263920202, "grad_norm": 5.118603229522705, "learning_rate": 5.832078957315115e-06, "loss": 0.4709, "step": 4244 }, { "epoch": 0.29001844640295144, "grad_norm": 4.581751823425293, "learning_rate": 5.8315013739709824e-06, "loss": 0.4216, "step": 4245 }, { "epoch": 0.29008676641388265, "grad_norm": 3.7920217514038086, "learning_rate": 5.830923676459413e-06, "loss": 0.4022, "step": 4246 }, { "epoch": 0.2901550864248138, "grad_norm": 3.7854113578796387, "learning_rate": 5.830345864808693e-06, "loss": 0.4059, "step": 4247 }, { "epoch": 0.290223406435745, "grad_norm": 3.437737464904785, "learning_rate": 5.829767939047117e-06, "loss": 0.3928, "step": 4248 }, { "epoch": 0.2902917264466762, "grad_norm": 3.2155303955078125, "learning_rate": 5.829189899202986e-06, "loss": 0.3879, "step": 4249 }, { "epoch": 0.29036004645760743, "grad_norm": 6.109994411468506, "learning_rate": 5.828611745304603e-06, "loss": 0.546, "step": 4250 }, { "epoch": 0.29042836646853865, "grad_norm": 3.4705188274383545, "learning_rate": 5.828033477380281e-06, "loss": 0.3267, "step": 4251 }, { "epoch": 0.29049668647946986, "grad_norm": 4.202494144439697, "learning_rate": 5.827455095458334e-06, "loss": 0.543, "step": 4252 }, { "epoch": 0.290565006490401, "grad_norm": 3.129199266433716, "learning_rate": 5.826876599567084e-06, "loss": 0.2625, "step": 4253 }, { "epoch": 0.2906333265013322, "grad_norm": 4.011744022369385, "learning_rate": 5.82629798973486e-06, "loss": 0.3838, "step": 4254 }, { "epoch": 0.29070164651226343, "grad_norm": 4.574763774871826, "learning_rate": 5.825719265989994e-06, "loss": 0.3642, "step": 4255 }, { "epoch": 0.29076996652319465, "grad_norm": 4.248493671417236, "learning_rate": 5.825140428360824e-06, "loss": 0.477, "step": 4256 }, { "epoch": 0.29083828653412586, "grad_norm": 3.4179415702819824, "learning_rate": 5.824561476875696e-06, "loss": 0.3599, "step": 4257 }, { "epoch": 0.29090660654505707, "grad_norm": 4.89133882522583, "learning_rate": 5.823982411562958e-06, "loss": 0.3977, "step": 4258 }, { "epoch": 0.2909749265559882, "grad_norm": 2.820909023284912, "learning_rate": 5.8234032324509645e-06, "loss": 0.3529, "step": 4259 }, { "epoch": 0.29104324656691943, "grad_norm": 4.092672824859619, "learning_rate": 5.82282393956808e-06, "loss": 0.3822, "step": 4260 }, { "epoch": 0.29111156657785064, "grad_norm": 3.827497959136963, "learning_rate": 5.822244532942666e-06, "loss": 0.4036, "step": 4261 }, { "epoch": 0.29117988658878186, "grad_norm": 2.9585838317871094, "learning_rate": 5.821665012603099e-06, "loss": 0.3382, "step": 4262 }, { "epoch": 0.29124820659971307, "grad_norm": 4.34893798828125, "learning_rate": 5.8210853785777546e-06, "loss": 0.4409, "step": 4263 }, { "epoch": 0.2913165266106443, "grad_norm": 4.4889750480651855, "learning_rate": 5.8205056308950156e-06, "loss": 0.4794, "step": 4264 }, { "epoch": 0.29138484662157543, "grad_norm": 4.851016044616699, "learning_rate": 5.819925769583271e-06, "loss": 0.4145, "step": 4265 }, { "epoch": 0.29145316663250664, "grad_norm": 4.310144901275635, "learning_rate": 5.8193457946709155e-06, "loss": 0.4441, "step": 4266 }, { "epoch": 0.29152148664343785, "grad_norm": 5.496801853179932, "learning_rate": 5.818765706186348e-06, "loss": 0.5007, "step": 4267 }, { "epoch": 0.29158980665436907, "grad_norm": 3.6788341999053955, "learning_rate": 5.8181855041579754e-06, "loss": 0.3229, "step": 4268 }, { "epoch": 0.2916581266653003, "grad_norm": 3.6274707317352295, "learning_rate": 5.817605188614207e-06, "loss": 0.4616, "step": 4269 }, { "epoch": 0.2917264466762315, "grad_norm": 4.783492565155029, "learning_rate": 5.81702475958346e-06, "loss": 0.3609, "step": 4270 }, { "epoch": 0.29179476668716264, "grad_norm": 3.512457847595215, "learning_rate": 5.8164442170941576e-06, "loss": 0.3502, "step": 4271 }, { "epoch": 0.29186308669809385, "grad_norm": 3.216153144836426, "learning_rate": 5.815863561174725e-06, "loss": 0.3821, "step": 4272 }, { "epoch": 0.29193140670902507, "grad_norm": 3.6571757793426514, "learning_rate": 5.815282791853598e-06, "loss": 0.3236, "step": 4273 }, { "epoch": 0.2919997267199563, "grad_norm": 3.6607611179351807, "learning_rate": 5.814701909159215e-06, "loss": 0.3147, "step": 4274 }, { "epoch": 0.2920680467308875, "grad_norm": 4.112756729125977, "learning_rate": 5.814120913120017e-06, "loss": 0.3526, "step": 4275 }, { "epoch": 0.2921363667418187, "grad_norm": 4.547914981842041, "learning_rate": 5.813539803764458e-06, "loss": 0.3699, "step": 4276 }, { "epoch": 0.29220468675274985, "grad_norm": 4.099687099456787, "learning_rate": 5.81295858112099e-06, "loss": 0.4775, "step": 4277 }, { "epoch": 0.29227300676368106, "grad_norm": 3.6889588832855225, "learning_rate": 5.812377245218077e-06, "loss": 0.369, "step": 4278 }, { "epoch": 0.2923413267746123, "grad_norm": 4.928436279296875, "learning_rate": 5.811795796084183e-06, "loss": 0.3996, "step": 4279 }, { "epoch": 0.2924096467855435, "grad_norm": 3.8902699947357178, "learning_rate": 5.8112142337477825e-06, "loss": 0.3531, "step": 4280 }, { "epoch": 0.2924779667964747, "grad_norm": 4.198592662811279, "learning_rate": 5.810632558237349e-06, "loss": 0.4777, "step": 4281 }, { "epoch": 0.2925462868074059, "grad_norm": 4.903616428375244, "learning_rate": 5.810050769581371e-06, "loss": 0.3988, "step": 4282 }, { "epoch": 0.29261460681833706, "grad_norm": 3.819169521331787, "learning_rate": 5.809468867808333e-06, "loss": 0.4594, "step": 4283 }, { "epoch": 0.2926829268292683, "grad_norm": 4.185140609741211, "learning_rate": 5.808886852946731e-06, "loss": 0.4067, "step": 4284 }, { "epoch": 0.2927512468401995, "grad_norm": 5.032297611236572, "learning_rate": 5.808304725025064e-06, "loss": 0.4133, "step": 4285 }, { "epoch": 0.2928195668511307, "grad_norm": 4.732351779937744, "learning_rate": 5.807722484071837e-06, "loss": 0.4243, "step": 4286 }, { "epoch": 0.2928878868620619, "grad_norm": 4.18360710144043, "learning_rate": 5.807140130115562e-06, "loss": 0.3443, "step": 4287 }, { "epoch": 0.2929562068729931, "grad_norm": 4.434434413909912, "learning_rate": 5.806557663184755e-06, "loss": 0.4155, "step": 4288 }, { "epoch": 0.2930245268839243, "grad_norm": 3.8071465492248535, "learning_rate": 5.805975083307937e-06, "loss": 0.2582, "step": 4289 }, { "epoch": 0.2930928468948555, "grad_norm": 3.816967248916626, "learning_rate": 5.805392390513636e-06, "loss": 0.3065, "step": 4290 }, { "epoch": 0.2931611669057867, "grad_norm": 3.555793046951294, "learning_rate": 5.8048095848303844e-06, "loss": 0.4137, "step": 4291 }, { "epoch": 0.2932294869167179, "grad_norm": 4.514466762542725, "learning_rate": 5.804226666286722e-06, "loss": 0.4381, "step": 4292 }, { "epoch": 0.2932978069276491, "grad_norm": 3.322490930557251, "learning_rate": 5.803643634911191e-06, "loss": 0.3314, "step": 4293 }, { "epoch": 0.29336612693858033, "grad_norm": 3.939749240875244, "learning_rate": 5.803060490732341e-06, "loss": 0.3377, "step": 4294 }, { "epoch": 0.2934344469495115, "grad_norm": 3.649315357208252, "learning_rate": 5.80247723377873e-06, "loss": 0.3834, "step": 4295 }, { "epoch": 0.2935027669604427, "grad_norm": 3.8619542121887207, "learning_rate": 5.801893864078915e-06, "loss": 0.389, "step": 4296 }, { "epoch": 0.2935710869713739, "grad_norm": 3.4614851474761963, "learning_rate": 5.8013103816614625e-06, "loss": 0.4298, "step": 4297 }, { "epoch": 0.2936394069823051, "grad_norm": 4.214547634124756, "learning_rate": 5.800726786554947e-06, "loss": 0.3383, "step": 4298 }, { "epoch": 0.29370772699323633, "grad_norm": 3.388902187347412, "learning_rate": 5.800143078787942e-06, "loss": 0.3544, "step": 4299 }, { "epoch": 0.29377604700416754, "grad_norm": 4.556663990020752, "learning_rate": 5.799559258389032e-06, "loss": 0.3531, "step": 4300 }, { "epoch": 0.2938443670150987, "grad_norm": 4.449833869934082, "learning_rate": 5.798975325386806e-06, "loss": 0.511, "step": 4301 }, { "epoch": 0.2939126870260299, "grad_norm": 3.4021215438842773, "learning_rate": 5.7983912798098555e-06, "loss": 0.3471, "step": 4302 }, { "epoch": 0.2939810070369611, "grad_norm": 4.120904445648193, "learning_rate": 5.797807121686781e-06, "loss": 0.3856, "step": 4303 }, { "epoch": 0.29404932704789233, "grad_norm": 3.8623926639556885, "learning_rate": 5.797222851046186e-06, "loss": 0.5167, "step": 4304 }, { "epoch": 0.29411764705882354, "grad_norm": 3.8951642513275146, "learning_rate": 5.796638467916682e-06, "loss": 0.3311, "step": 4305 }, { "epoch": 0.29418596706975475, "grad_norm": 4.0831499099731445, "learning_rate": 5.796053972326884e-06, "loss": 0.397, "step": 4306 }, { "epoch": 0.2942542870806859, "grad_norm": 3.360954523086548, "learning_rate": 5.795469364305414e-06, "loss": 0.3602, "step": 4307 }, { "epoch": 0.2943226070916171, "grad_norm": 4.010778903961182, "learning_rate": 5.794884643880897e-06, "loss": 0.4303, "step": 4308 }, { "epoch": 0.2943909271025483, "grad_norm": 2.9815433025360107, "learning_rate": 5.794299811081967e-06, "loss": 0.452, "step": 4309 }, { "epoch": 0.29445924711347954, "grad_norm": 4.155124664306641, "learning_rate": 5.793714865937261e-06, "loss": 0.3665, "step": 4310 }, { "epoch": 0.29452756712441075, "grad_norm": 4.939413070678711, "learning_rate": 5.793129808475423e-06, "loss": 0.3969, "step": 4311 }, { "epoch": 0.29459588713534196, "grad_norm": 3.4560470581054688, "learning_rate": 5.7925446387250995e-06, "loss": 0.356, "step": 4312 }, { "epoch": 0.2946642071462731, "grad_norm": 4.373769283294678, "learning_rate": 5.791959356714947e-06, "loss": 0.335, "step": 4313 }, { "epoch": 0.2947325271572043, "grad_norm": 4.750433921813965, "learning_rate": 5.791373962473624e-06, "loss": 0.4024, "step": 4314 }, { "epoch": 0.29480084716813554, "grad_norm": 4.272796154022217, "learning_rate": 5.790788456029796e-06, "loss": 0.4546, "step": 4315 }, { "epoch": 0.29486916717906675, "grad_norm": 3.4589483737945557, "learning_rate": 5.790202837412134e-06, "loss": 0.3085, "step": 4316 }, { "epoch": 0.29493748718999796, "grad_norm": 3.940004825592041, "learning_rate": 5.789617106649314e-06, "loss": 0.4298, "step": 4317 }, { "epoch": 0.29500580720092917, "grad_norm": 4.247891426086426, "learning_rate": 5.789031263770018e-06, "loss": 0.3802, "step": 4318 }, { "epoch": 0.2950741272118603, "grad_norm": 4.2356486320495605, "learning_rate": 5.788445308802933e-06, "loss": 0.3777, "step": 4319 }, { "epoch": 0.29514244722279154, "grad_norm": 3.7554357051849365, "learning_rate": 5.78785924177675e-06, "loss": 0.3997, "step": 4320 }, { "epoch": 0.29521076723372275, "grad_norm": 3.4017529487609863, "learning_rate": 5.78727306272017e-06, "loss": 0.3368, "step": 4321 }, { "epoch": 0.29527908724465396, "grad_norm": 3.384204864501953, "learning_rate": 5.786686771661895e-06, "loss": 0.4096, "step": 4322 }, { "epoch": 0.29534740725558517, "grad_norm": 4.255509853363037, "learning_rate": 5.786100368630634e-06, "loss": 0.3984, "step": 4323 }, { "epoch": 0.2954157272665164, "grad_norm": 3.587489604949951, "learning_rate": 5.785513853655101e-06, "loss": 0.3746, "step": 4324 }, { "epoch": 0.29548404727744754, "grad_norm": 3.5817408561706543, "learning_rate": 5.784927226764018e-06, "loss": 0.2453, "step": 4325 }, { "epoch": 0.29555236728837875, "grad_norm": 3.818765640258789, "learning_rate": 5.7843404879861105e-06, "loss": 0.3415, "step": 4326 }, { "epoch": 0.29562068729930996, "grad_norm": 4.023281574249268, "learning_rate": 5.783753637350108e-06, "loss": 0.3509, "step": 4327 }, { "epoch": 0.29568900731024117, "grad_norm": 4.104785442352295, "learning_rate": 5.783166674884747e-06, "loss": 0.2991, "step": 4328 }, { "epoch": 0.2957573273211724, "grad_norm": 4.326423168182373, "learning_rate": 5.782579600618771e-06, "loss": 0.3976, "step": 4329 }, { "epoch": 0.2958256473321036, "grad_norm": 4.1342034339904785, "learning_rate": 5.781992414580926e-06, "loss": 0.4132, "step": 4330 }, { "epoch": 0.29589396734303475, "grad_norm": 3.416667938232422, "learning_rate": 5.781405116799966e-06, "loss": 0.3628, "step": 4331 }, { "epoch": 0.29596228735396596, "grad_norm": 3.750826835632324, "learning_rate": 5.780817707304649e-06, "loss": 0.4416, "step": 4332 }, { "epoch": 0.29603060736489717, "grad_norm": 5.1567158699035645, "learning_rate": 5.780230186123738e-06, "loss": 0.3456, "step": 4333 }, { "epoch": 0.2960989273758284, "grad_norm": 3.5379302501678467, "learning_rate": 5.779642553286004e-06, "loss": 0.3449, "step": 4334 }, { "epoch": 0.2961672473867596, "grad_norm": 4.221546173095703, "learning_rate": 5.779054808820222e-06, "loss": 0.466, "step": 4335 }, { "epoch": 0.2962355673976908, "grad_norm": 3.284644603729248, "learning_rate": 5.77846695275517e-06, "loss": 0.3846, "step": 4336 }, { "epoch": 0.296303887408622, "grad_norm": 4.2106404304504395, "learning_rate": 5.777878985119636e-06, "loss": 0.4055, "step": 4337 }, { "epoch": 0.29637220741955317, "grad_norm": 5.677632808685303, "learning_rate": 5.777290905942409e-06, "loss": 0.5366, "step": 4338 }, { "epoch": 0.2964405274304844, "grad_norm": 4.942630290985107, "learning_rate": 5.776702715252288e-06, "loss": 0.3739, "step": 4339 }, { "epoch": 0.2965088474414156, "grad_norm": 3.4560205936431885, "learning_rate": 5.776114413078074e-06, "loss": 0.2743, "step": 4340 }, { "epoch": 0.2965771674523468, "grad_norm": 4.652191638946533, "learning_rate": 5.775525999448574e-06, "loss": 0.375, "step": 4341 }, { "epoch": 0.296645487463278, "grad_norm": 4.128881931304932, "learning_rate": 5.774937474392602e-06, "loss": 0.3403, "step": 4342 }, { "epoch": 0.2967138074742092, "grad_norm": 4.314795970916748, "learning_rate": 5.7743488379389775e-06, "loss": 0.3066, "step": 4343 }, { "epoch": 0.2967821274851404, "grad_norm": 4.085243225097656, "learning_rate": 5.773760090116522e-06, "loss": 0.3525, "step": 4344 }, { "epoch": 0.2968504474960716, "grad_norm": 4.384589195251465, "learning_rate": 5.7731712309540675e-06, "loss": 0.3874, "step": 4345 }, { "epoch": 0.2969187675070028, "grad_norm": 2.800579309463501, "learning_rate": 5.772582260480446e-06, "loss": 0.3516, "step": 4346 }, { "epoch": 0.296987087517934, "grad_norm": 3.8375167846679688, "learning_rate": 5.7719931787245e-06, "loss": 0.324, "step": 4347 }, { "epoch": 0.2970554075288652, "grad_norm": 4.643314361572266, "learning_rate": 5.771403985715074e-06, "loss": 0.4976, "step": 4348 }, { "epoch": 0.29712372753979643, "grad_norm": 5.091596603393555, "learning_rate": 5.770814681481022e-06, "loss": 0.4253, "step": 4349 }, { "epoch": 0.2971920475507276, "grad_norm": 4.448541164398193, "learning_rate": 5.770225266051196e-06, "loss": 0.3867, "step": 4350 }, { "epoch": 0.2972603675616588, "grad_norm": 2.922766923904419, "learning_rate": 5.769635739454462e-06, "loss": 0.2879, "step": 4351 }, { "epoch": 0.29732868757259, "grad_norm": 4.015557289123535, "learning_rate": 5.769046101719685e-06, "loss": 0.4276, "step": 4352 }, { "epoch": 0.2973970075835212, "grad_norm": 4.494125843048096, "learning_rate": 5.768456352875741e-06, "loss": 0.3894, "step": 4353 }, { "epoch": 0.29746532759445243, "grad_norm": 5.349596977233887, "learning_rate": 5.767866492951505e-06, "loss": 0.3588, "step": 4354 }, { "epoch": 0.29753364760538364, "grad_norm": 3.7421088218688965, "learning_rate": 5.767276521975864e-06, "loss": 0.4167, "step": 4355 }, { "epoch": 0.2976019676163148, "grad_norm": 4.106849670410156, "learning_rate": 5.766686439977704e-06, "loss": 0.3675, "step": 4356 }, { "epoch": 0.297670287627246, "grad_norm": 4.234707832336426, "learning_rate": 5.766096246985923e-06, "loss": 0.512, "step": 4357 }, { "epoch": 0.2977386076381772, "grad_norm": 3.6063883304595947, "learning_rate": 5.7655059430294185e-06, "loss": 0.2958, "step": 4358 }, { "epoch": 0.29780692764910843, "grad_norm": 3.736769914627075, "learning_rate": 5.7649155281370974e-06, "loss": 0.366, "step": 4359 }, { "epoch": 0.29787524766003964, "grad_norm": 3.7708559036254883, "learning_rate": 5.764325002337872e-06, "loss": 0.4612, "step": 4360 }, { "epoch": 0.29794356767097085, "grad_norm": 3.6514828205108643, "learning_rate": 5.763734365660657e-06, "loss": 0.3619, "step": 4361 }, { "epoch": 0.298011887681902, "grad_norm": 3.9830732345581055, "learning_rate": 5.7631436181343745e-06, "loss": 0.4289, "step": 4362 }, { "epoch": 0.2980802076928332, "grad_norm": 3.8287484645843506, "learning_rate": 5.762552759787953e-06, "loss": 0.3249, "step": 4363 }, { "epoch": 0.29814852770376443, "grad_norm": 3.931682825088501, "learning_rate": 5.761961790650322e-06, "loss": 0.4258, "step": 4364 }, { "epoch": 0.29821684771469564, "grad_norm": 3.662053108215332, "learning_rate": 5.761370710750425e-06, "loss": 0.3474, "step": 4365 }, { "epoch": 0.29828516772562685, "grad_norm": 4.159935474395752, "learning_rate": 5.760779520117201e-06, "loss": 0.4617, "step": 4366 }, { "epoch": 0.29835348773655807, "grad_norm": 4.493617057800293, "learning_rate": 5.760188218779601e-06, "loss": 0.3533, "step": 4367 }, { "epoch": 0.2984218077474892, "grad_norm": 5.003146648406982, "learning_rate": 5.75959680676658e-06, "loss": 0.3831, "step": 4368 }, { "epoch": 0.29849012775842043, "grad_norm": 3.8640341758728027, "learning_rate": 5.759005284107097e-06, "loss": 0.4536, "step": 4369 }, { "epoch": 0.29855844776935164, "grad_norm": 4.92168664932251, "learning_rate": 5.758413650830116e-06, "loss": 0.5076, "step": 4370 }, { "epoch": 0.29862676778028285, "grad_norm": 3.9623069763183594, "learning_rate": 5.757821906964609e-06, "loss": 0.3685, "step": 4371 }, { "epoch": 0.29869508779121406, "grad_norm": 5.059058666229248, "learning_rate": 5.757230052539555e-06, "loss": 0.5086, "step": 4372 }, { "epoch": 0.2987634078021453, "grad_norm": 3.7552783489227295, "learning_rate": 5.756638087583931e-06, "loss": 0.4156, "step": 4373 }, { "epoch": 0.29883172781307643, "grad_norm": 4.043987274169922, "learning_rate": 5.756046012126725e-06, "loss": 0.4979, "step": 4374 }, { "epoch": 0.29890004782400764, "grad_norm": 3.114617109298706, "learning_rate": 5.75545382619693e-06, "loss": 0.2845, "step": 4375 }, { "epoch": 0.29896836783493885, "grad_norm": 2.6504273414611816, "learning_rate": 5.754861529823545e-06, "loss": 0.3138, "step": 4376 }, { "epoch": 0.29903668784587006, "grad_norm": 3.6074016094207764, "learning_rate": 5.754269123035572e-06, "loss": 0.3934, "step": 4377 }, { "epoch": 0.2991050078568013, "grad_norm": 4.5729522705078125, "learning_rate": 5.753676605862019e-06, "loss": 0.4329, "step": 4378 }, { "epoch": 0.2991733278677325, "grad_norm": 3.906121015548706, "learning_rate": 5.7530839783319015e-06, "loss": 0.4307, "step": 4379 }, { "epoch": 0.29924164787866364, "grad_norm": 4.4562811851501465, "learning_rate": 5.752491240474237e-06, "loss": 0.4975, "step": 4380 }, { "epoch": 0.29930996788959485, "grad_norm": 4.2995686531066895, "learning_rate": 5.751898392318051e-06, "loss": 0.398, "step": 4381 }, { "epoch": 0.29937828790052606, "grad_norm": 3.362516164779663, "learning_rate": 5.7513054338923755e-06, "loss": 0.3548, "step": 4382 }, { "epoch": 0.2994466079114573, "grad_norm": 4.055441856384277, "learning_rate": 5.750712365226244e-06, "loss": 0.3739, "step": 4383 }, { "epoch": 0.2995149279223885, "grad_norm": 3.5382492542266846, "learning_rate": 5.750119186348697e-06, "loss": 0.3943, "step": 4384 }, { "epoch": 0.2995832479333197, "grad_norm": 3.5567784309387207, "learning_rate": 5.749525897288783e-06, "loss": 0.3929, "step": 4385 }, { "epoch": 0.29965156794425085, "grad_norm": 3.8509137630462646, "learning_rate": 5.748932498075553e-06, "loss": 0.3722, "step": 4386 }, { "epoch": 0.29971988795518206, "grad_norm": 4.156147480010986, "learning_rate": 5.748338988738064e-06, "loss": 0.328, "step": 4387 }, { "epoch": 0.2997882079661133, "grad_norm": 3.5135793685913086, "learning_rate": 5.747745369305379e-06, "loss": 0.2929, "step": 4388 }, { "epoch": 0.2998565279770445, "grad_norm": 3.4281017780303955, "learning_rate": 5.7471516398065645e-06, "loss": 0.269, "step": 4389 }, { "epoch": 0.2999248479879757, "grad_norm": 3.01019549369812, "learning_rate": 5.746557800270695e-06, "loss": 0.4333, "step": 4390 }, { "epoch": 0.2999931679989069, "grad_norm": 3.9796102046966553, "learning_rate": 5.745963850726849e-06, "loss": 0.4045, "step": 4391 }, { "epoch": 0.30006148800983806, "grad_norm": 3.1848912239074707, "learning_rate": 5.745369791204112e-06, "loss": 0.3555, "step": 4392 }, { "epoch": 0.3001298080207693, "grad_norm": 3.7011611461639404, "learning_rate": 5.744775621731571e-06, "loss": 0.3754, "step": 4393 }, { "epoch": 0.3001981280317005, "grad_norm": 3.3067524433135986, "learning_rate": 5.744181342338323e-06, "loss": 0.364, "step": 4394 }, { "epoch": 0.3002664480426317, "grad_norm": 4.509027481079102, "learning_rate": 5.743586953053467e-06, "loss": 0.4987, "step": 4395 }, { "epoch": 0.3003347680535629, "grad_norm": 5.214199542999268, "learning_rate": 5.74299245390611e-06, "loss": 0.3757, "step": 4396 }, { "epoch": 0.3004030880644941, "grad_norm": 3.7197296619415283, "learning_rate": 5.742397844925362e-06, "loss": 0.4001, "step": 4397 }, { "epoch": 0.3004714080754253, "grad_norm": 2.752117156982422, "learning_rate": 5.741803126140339e-06, "loss": 0.3096, "step": 4398 }, { "epoch": 0.3005397280863565, "grad_norm": 3.894012451171875, "learning_rate": 5.7412082975801635e-06, "loss": 0.4878, "step": 4399 }, { "epoch": 0.3006080480972877, "grad_norm": 3.631568431854248, "learning_rate": 5.740613359273963e-06, "loss": 0.3962, "step": 4400 }, { "epoch": 0.3006763681082189, "grad_norm": 3.246565341949463, "learning_rate": 5.740018311250869e-06, "loss": 0.4121, "step": 4401 }, { "epoch": 0.3007446881191501, "grad_norm": 4.215761661529541, "learning_rate": 5.739423153540021e-06, "loss": 0.4115, "step": 4402 }, { "epoch": 0.3008130081300813, "grad_norm": 4.082180976867676, "learning_rate": 5.738827886170561e-06, "loss": 0.4589, "step": 4403 }, { "epoch": 0.3008813281410125, "grad_norm": 4.755364418029785, "learning_rate": 5.738232509171638e-06, "loss": 0.3778, "step": 4404 }, { "epoch": 0.3009496481519437, "grad_norm": 3.9566822052001953, "learning_rate": 5.737637022572405e-06, "loss": 0.3617, "step": 4405 }, { "epoch": 0.3010179681628749, "grad_norm": 4.747067451477051, "learning_rate": 5.7370414264020234e-06, "loss": 0.4606, "step": 4406 }, { "epoch": 0.3010862881738061, "grad_norm": 3.6171951293945312, "learning_rate": 5.736445720689657e-06, "loss": 0.3159, "step": 4407 }, { "epoch": 0.3011546081847373, "grad_norm": 3.8546221256256104, "learning_rate": 5.735849905464477e-06, "loss": 0.3121, "step": 4408 }, { "epoch": 0.30122292819566854, "grad_norm": 4.02046012878418, "learning_rate": 5.735253980755656e-06, "loss": 0.4371, "step": 4409 }, { "epoch": 0.3012912482065997, "grad_norm": 3.297183036804199, "learning_rate": 5.7346579465923785e-06, "loss": 0.4064, "step": 4410 }, { "epoch": 0.3013595682175309, "grad_norm": 4.945972919464111, "learning_rate": 5.734061803003826e-06, "loss": 0.4585, "step": 4411 }, { "epoch": 0.3014278882284621, "grad_norm": 5.071324825286865, "learning_rate": 5.733465550019196e-06, "loss": 0.4349, "step": 4412 }, { "epoch": 0.3014962082393933, "grad_norm": 4.042301654815674, "learning_rate": 5.732869187667682e-06, "loss": 0.3792, "step": 4413 }, { "epoch": 0.30156452825032454, "grad_norm": 3.078387498855591, "learning_rate": 5.732272715978486e-06, "loss": 0.3426, "step": 4414 }, { "epoch": 0.30163284826125575, "grad_norm": 4.504617214202881, "learning_rate": 5.731676134980816e-06, "loss": 0.4456, "step": 4415 }, { "epoch": 0.3017011682721869, "grad_norm": 3.908609628677368, "learning_rate": 5.7310794447038865e-06, "loss": 0.3208, "step": 4416 }, { "epoch": 0.3017694882831181, "grad_norm": 5.334878444671631, "learning_rate": 5.730482645176914e-06, "loss": 0.4181, "step": 4417 }, { "epoch": 0.3018378082940493, "grad_norm": 3.27827787399292, "learning_rate": 5.729885736429123e-06, "loss": 0.3201, "step": 4418 }, { "epoch": 0.30190612830498054, "grad_norm": 3.592555046081543, "learning_rate": 5.729288718489741e-06, "loss": 0.4236, "step": 4419 }, { "epoch": 0.30197444831591175, "grad_norm": 3.180570602416992, "learning_rate": 5.728691591388006e-06, "loss": 0.4262, "step": 4420 }, { "epoch": 0.30204276832684296, "grad_norm": 6.3228607177734375, "learning_rate": 5.728094355153154e-06, "loss": 0.397, "step": 4421 }, { "epoch": 0.3021110883377741, "grad_norm": 5.82435417175293, "learning_rate": 5.727497009814432e-06, "loss": 0.4462, "step": 4422 }, { "epoch": 0.3021794083487053, "grad_norm": 3.7071709632873535, "learning_rate": 5.726899555401089e-06, "loss": 0.4141, "step": 4423 }, { "epoch": 0.30224772835963654, "grad_norm": 4.341253280639648, "learning_rate": 5.726301991942383e-06, "loss": 0.3652, "step": 4424 }, { "epoch": 0.30231604837056775, "grad_norm": 3.515899419784546, "learning_rate": 5.725704319467574e-06, "loss": 0.3608, "step": 4425 }, { "epoch": 0.30238436838149896, "grad_norm": 3.8917431831359863, "learning_rate": 5.725106538005928e-06, "loss": 0.3962, "step": 4426 }, { "epoch": 0.30245268839243017, "grad_norm": 4.5692667961120605, "learning_rate": 5.7245086475867166e-06, "loss": 0.5063, "step": 4427 }, { "epoch": 0.3025210084033613, "grad_norm": 4.274187088012695, "learning_rate": 5.723910648239218e-06, "loss": 0.4631, "step": 4428 }, { "epoch": 0.30258932841429254, "grad_norm": 3.899791717529297, "learning_rate": 5.723312539992713e-06, "loss": 0.4003, "step": 4429 }, { "epoch": 0.30265764842522375, "grad_norm": 3.4467391967773438, "learning_rate": 5.722714322876492e-06, "loss": 0.3392, "step": 4430 }, { "epoch": 0.30272596843615496, "grad_norm": 3.7420074939727783, "learning_rate": 5.722115996919845e-06, "loss": 0.3712, "step": 4431 }, { "epoch": 0.30279428844708617, "grad_norm": 4.109622955322266, "learning_rate": 5.721517562152073e-06, "loss": 0.4197, "step": 4432 }, { "epoch": 0.3028626084580174, "grad_norm": 3.282780408859253, "learning_rate": 5.720919018602478e-06, "loss": 0.4372, "step": 4433 }, { "epoch": 0.30293092846894853, "grad_norm": 3.387192964553833, "learning_rate": 5.720320366300369e-06, "loss": 0.3347, "step": 4434 }, { "epoch": 0.30299924847987975, "grad_norm": 2.5846383571624756, "learning_rate": 5.719721605275064e-06, "loss": 0.2819, "step": 4435 }, { "epoch": 0.30306756849081096, "grad_norm": 3.9143519401550293, "learning_rate": 5.719122735555877e-06, "loss": 0.3983, "step": 4436 }, { "epoch": 0.30313588850174217, "grad_norm": 3.6911158561706543, "learning_rate": 5.7185237571721375e-06, "loss": 0.4373, "step": 4437 }, { "epoch": 0.3032042085126734, "grad_norm": 3.152740240097046, "learning_rate": 5.717924670153174e-06, "loss": 0.3627, "step": 4438 }, { "epoch": 0.3032725285236046, "grad_norm": 4.608799934387207, "learning_rate": 5.7173254745283226e-06, "loss": 0.4435, "step": 4439 }, { "epoch": 0.30334084853453575, "grad_norm": 5.07755184173584, "learning_rate": 5.716726170326925e-06, "loss": 0.4248, "step": 4440 }, { "epoch": 0.30340916854546696, "grad_norm": 3.553987741470337, "learning_rate": 5.716126757578325e-06, "loss": 0.3862, "step": 4441 }, { "epoch": 0.30347748855639817, "grad_norm": 4.0121002197265625, "learning_rate": 5.715527236311877e-06, "loss": 0.3765, "step": 4442 }, { "epoch": 0.3035458085673294, "grad_norm": 3.791727304458618, "learning_rate": 5.714927606556937e-06, "loss": 0.3467, "step": 4443 }, { "epoch": 0.3036141285782606, "grad_norm": 3.0610365867614746, "learning_rate": 5.714327868342868e-06, "loss": 0.2669, "step": 4444 }, { "epoch": 0.3036824485891918, "grad_norm": 4.373810291290283, "learning_rate": 5.713728021699037e-06, "loss": 0.4755, "step": 4445 }, { "epoch": 0.30375076860012296, "grad_norm": 3.9127514362335205, "learning_rate": 5.713128066654815e-06, "loss": 0.3931, "step": 4446 }, { "epoch": 0.30381908861105417, "grad_norm": 4.083183765411377, "learning_rate": 5.712528003239584e-06, "loss": 0.4192, "step": 4447 }, { "epoch": 0.3038874086219854, "grad_norm": 5.263698577880859, "learning_rate": 5.711927831482725e-06, "loss": 0.4695, "step": 4448 }, { "epoch": 0.3039557286329166, "grad_norm": 3.5802528858184814, "learning_rate": 5.711327551413628e-06, "loss": 0.3276, "step": 4449 }, { "epoch": 0.3040240486438478, "grad_norm": 3.8170218467712402, "learning_rate": 5.710727163061685e-06, "loss": 0.3788, "step": 4450 }, { "epoch": 0.304092368654779, "grad_norm": 3.7997984886169434, "learning_rate": 5.710126666456298e-06, "loss": 0.3471, "step": 4451 }, { "epoch": 0.30416068866571017, "grad_norm": 3.437328815460205, "learning_rate": 5.70952606162687e-06, "loss": 0.3175, "step": 4452 }, { "epoch": 0.3042290086766414, "grad_norm": 4.990029811859131, "learning_rate": 5.708925348602812e-06, "loss": 0.3641, "step": 4453 }, { "epoch": 0.3042973286875726, "grad_norm": 4.070879936218262, "learning_rate": 5.708324527413538e-06, "loss": 0.4761, "step": 4454 }, { "epoch": 0.3043656486985038, "grad_norm": 5.574244022369385, "learning_rate": 5.707723598088472e-06, "loss": 0.3724, "step": 4455 }, { "epoch": 0.304433968709435, "grad_norm": 4.479315280914307, "learning_rate": 5.7071225606570355e-06, "loss": 0.4182, "step": 4456 }, { "epoch": 0.3045022887203662, "grad_norm": 5.2023138999938965, "learning_rate": 5.706521415148662e-06, "loss": 0.4, "step": 4457 }, { "epoch": 0.3045706087312974, "grad_norm": 4.839859485626221, "learning_rate": 5.705920161592788e-06, "loss": 0.3314, "step": 4458 }, { "epoch": 0.3046389287422286, "grad_norm": 3.6743457317352295, "learning_rate": 5.705318800018855e-06, "loss": 0.3386, "step": 4459 }, { "epoch": 0.3047072487531598, "grad_norm": 3.8234195709228516, "learning_rate": 5.70471733045631e-06, "loss": 0.4129, "step": 4460 }, { "epoch": 0.304775568764091, "grad_norm": 5.218071460723877, "learning_rate": 5.704115752934606e-06, "loss": 0.3491, "step": 4461 }, { "epoch": 0.3048438887750222, "grad_norm": 3.9645473957061768, "learning_rate": 5.703514067483198e-06, "loss": 0.4764, "step": 4462 }, { "epoch": 0.30491220878595343, "grad_norm": 4.1603102684021, "learning_rate": 5.702912274131554e-06, "loss": 0.394, "step": 4463 }, { "epoch": 0.3049805287968846, "grad_norm": 4.362984657287598, "learning_rate": 5.702310372909138e-06, "loss": 0.3975, "step": 4464 }, { "epoch": 0.3050488488078158, "grad_norm": 4.552549839019775, "learning_rate": 5.701708363845425e-06, "loss": 0.3711, "step": 4465 }, { "epoch": 0.305117168818747, "grad_norm": 5.161533832550049, "learning_rate": 5.701106246969893e-06, "loss": 0.3926, "step": 4466 }, { "epoch": 0.3051854888296782, "grad_norm": 3.809251070022583, "learning_rate": 5.700504022312028e-06, "loss": 0.3907, "step": 4467 }, { "epoch": 0.30525380884060943, "grad_norm": 3.7333903312683105, "learning_rate": 5.699901689901316e-06, "loss": 0.4815, "step": 4468 }, { "epoch": 0.30532212885154064, "grad_norm": 4.447298049926758, "learning_rate": 5.699299249767255e-06, "loss": 0.4896, "step": 4469 }, { "epoch": 0.3053904488624718, "grad_norm": 4.378053665161133, "learning_rate": 5.698696701939343e-06, "loss": 0.3552, "step": 4470 }, { "epoch": 0.305458768873403, "grad_norm": 4.904073715209961, "learning_rate": 5.698094046447086e-06, "loss": 0.4607, "step": 4471 }, { "epoch": 0.3055270888843342, "grad_norm": 3.517162561416626, "learning_rate": 5.697491283319994e-06, "loss": 0.3918, "step": 4472 }, { "epoch": 0.30559540889526543, "grad_norm": 4.508244037628174, "learning_rate": 5.696888412587583e-06, "loss": 0.3366, "step": 4473 }, { "epoch": 0.30566372890619664, "grad_norm": 3.568936586380005, "learning_rate": 5.696285434279374e-06, "loss": 0.3943, "step": 4474 }, { "epoch": 0.30573204891712785, "grad_norm": 3.756983995437622, "learning_rate": 5.695682348424892e-06, "loss": 0.3504, "step": 4475 }, { "epoch": 0.305800368928059, "grad_norm": 3.2067441940307617, "learning_rate": 5.69507915505367e-06, "loss": 0.3375, "step": 4476 }, { "epoch": 0.3058686889389902, "grad_norm": 3.154799699783325, "learning_rate": 5.6944758541952455e-06, "loss": 0.4048, "step": 4477 }, { "epoch": 0.30593700894992143, "grad_norm": 4.088085174560547, "learning_rate": 5.693872445879158e-06, "loss": 0.373, "step": 4478 }, { "epoch": 0.30600532896085264, "grad_norm": 4.939962863922119, "learning_rate": 5.693268930134957e-06, "loss": 0.3423, "step": 4479 }, { "epoch": 0.30607364897178385, "grad_norm": 3.5306243896484375, "learning_rate": 5.692665306992194e-06, "loss": 0.3697, "step": 4480 }, { "epoch": 0.30614196898271506, "grad_norm": 3.803725004196167, "learning_rate": 5.692061576480427e-06, "loss": 0.3505, "step": 4481 }, { "epoch": 0.3062102889936462, "grad_norm": 4.2204155921936035, "learning_rate": 5.691457738629219e-06, "loss": 0.3967, "step": 4482 }, { "epoch": 0.30627860900457743, "grad_norm": 3.8260905742645264, "learning_rate": 5.690853793468139e-06, "loss": 0.3899, "step": 4483 }, { "epoch": 0.30634692901550864, "grad_norm": 4.004978656768799, "learning_rate": 5.690249741026759e-06, "loss": 0.3724, "step": 4484 }, { "epoch": 0.30641524902643985, "grad_norm": 3.233266592025757, "learning_rate": 5.689645581334661e-06, "loss": 0.3163, "step": 4485 }, { "epoch": 0.30648356903737106, "grad_norm": 3.946027994155884, "learning_rate": 5.689041314421425e-06, "loss": 0.3999, "step": 4486 }, { "epoch": 0.3065518890483023, "grad_norm": 3.662619113922119, "learning_rate": 5.688436940316643e-06, "loss": 0.3566, "step": 4487 }, { "epoch": 0.30662020905923343, "grad_norm": 3.367109537124634, "learning_rate": 5.687832459049909e-06, "loss": 0.3771, "step": 4488 }, { "epoch": 0.30668852907016464, "grad_norm": 4.528521537780762, "learning_rate": 5.687227870650824e-06, "loss": 0.533, "step": 4489 }, { "epoch": 0.30675684908109585, "grad_norm": 3.579993724822998, "learning_rate": 5.68662317514899e-06, "loss": 0.4182, "step": 4490 }, { "epoch": 0.30682516909202706, "grad_norm": 3.70944881439209, "learning_rate": 5.686018372574021e-06, "loss": 0.3499, "step": 4491 }, { "epoch": 0.3068934891029583, "grad_norm": 4.244306564331055, "learning_rate": 5.685413462955529e-06, "loss": 0.3825, "step": 4492 }, { "epoch": 0.3069618091138895, "grad_norm": 4.136760711669922, "learning_rate": 5.6848084463231385e-06, "loss": 0.3156, "step": 4493 }, { "epoch": 0.30703012912482064, "grad_norm": 2.95930814743042, "learning_rate": 5.6842033227064735e-06, "loss": 0.4044, "step": 4494 }, { "epoch": 0.30709844913575185, "grad_norm": 4.608483791351318, "learning_rate": 5.683598092135164e-06, "loss": 0.4038, "step": 4495 }, { "epoch": 0.30716676914668306, "grad_norm": 3.1771488189697266, "learning_rate": 5.6829927546388494e-06, "loss": 0.2987, "step": 4496 }, { "epoch": 0.30723508915761427, "grad_norm": 5.840109825134277, "learning_rate": 5.68238731024717e-06, "loss": 0.3919, "step": 4497 }, { "epoch": 0.3073034091685455, "grad_norm": 3.7156548500061035, "learning_rate": 5.681781758989772e-06, "loss": 0.3781, "step": 4498 }, { "epoch": 0.3073717291794767, "grad_norm": 3.785344123840332, "learning_rate": 5.68117610089631e-06, "loss": 0.3695, "step": 4499 }, { "epoch": 0.30744004919040785, "grad_norm": 4.894802093505859, "learning_rate": 5.6805703359964385e-06, "loss": 0.4389, "step": 4500 }, { "epoch": 0.30750836920133906, "grad_norm": 3.8266124725341797, "learning_rate": 5.679964464319822e-06, "loss": 0.2935, "step": 4501 }, { "epoch": 0.30757668921227027, "grad_norm": 3.4946980476379395, "learning_rate": 5.67935848589613e-06, "loss": 0.3696, "step": 4502 }, { "epoch": 0.3076450092232015, "grad_norm": 3.1577792167663574, "learning_rate": 5.678752400755032e-06, "loss": 0.3439, "step": 4503 }, { "epoch": 0.3077133292341327, "grad_norm": 4.34098482131958, "learning_rate": 5.678146208926208e-06, "loss": 0.4326, "step": 4504 }, { "epoch": 0.3077816492450639, "grad_norm": 4.791823387145996, "learning_rate": 5.677539910439343e-06, "loss": 0.3651, "step": 4505 }, { "epoch": 0.30784996925599506, "grad_norm": 3.799266815185547, "learning_rate": 5.6769335053241245e-06, "loss": 0.3335, "step": 4506 }, { "epoch": 0.30791828926692627, "grad_norm": 3.930875062942505, "learning_rate": 5.676326993610245e-06, "loss": 0.3648, "step": 4507 }, { "epoch": 0.3079866092778575, "grad_norm": 3.536282539367676, "learning_rate": 5.6757203753274075e-06, "loss": 0.3708, "step": 4508 }, { "epoch": 0.3080549292887887, "grad_norm": 3.1037495136260986, "learning_rate": 5.675113650505314e-06, "loss": 0.3022, "step": 4509 }, { "epoch": 0.3081232492997199, "grad_norm": 3.5794191360473633, "learning_rate": 5.674506819173674e-06, "loss": 0.4261, "step": 4510 }, { "epoch": 0.3081915693106511, "grad_norm": 3.7061896324157715, "learning_rate": 5.673899881362203e-06, "loss": 0.3551, "step": 4511 }, { "epoch": 0.30825988932158227, "grad_norm": 3.0869784355163574, "learning_rate": 5.673292837100622e-06, "loss": 0.2623, "step": 4512 }, { "epoch": 0.3083282093325135, "grad_norm": 3.9247467517852783, "learning_rate": 5.672685686418655e-06, "loss": 0.3479, "step": 4513 }, { "epoch": 0.3083965293434447, "grad_norm": 4.197567462921143, "learning_rate": 5.672078429346034e-06, "loss": 0.3909, "step": 4514 }, { "epoch": 0.3084648493543759, "grad_norm": 4.211453914642334, "learning_rate": 5.671471065912493e-06, "loss": 0.2913, "step": 4515 }, { "epoch": 0.3085331693653071, "grad_norm": 3.6570072174072266, "learning_rate": 5.670863596147773e-06, "loss": 0.2779, "step": 4516 }, { "epoch": 0.3086014893762383, "grad_norm": 4.202027797698975, "learning_rate": 5.670256020081623e-06, "loss": 0.4093, "step": 4517 }, { "epoch": 0.3086698093871695, "grad_norm": 2.9887173175811768, "learning_rate": 5.669648337743791e-06, "loss": 0.3155, "step": 4518 }, { "epoch": 0.3087381293981007, "grad_norm": 3.9314515590667725, "learning_rate": 5.669040549164036e-06, "loss": 0.4375, "step": 4519 }, { "epoch": 0.3088064494090319, "grad_norm": 3.428053855895996, "learning_rate": 5.668432654372119e-06, "loss": 0.3239, "step": 4520 }, { "epoch": 0.3088747694199631, "grad_norm": 3.8657119274139404, "learning_rate": 5.667824653397806e-06, "loss": 0.321, "step": 4521 }, { "epoch": 0.3089430894308943, "grad_norm": 4.04781436920166, "learning_rate": 5.66721654627087e-06, "loss": 0.4742, "step": 4522 }, { "epoch": 0.30901140944182554, "grad_norm": 2.9664182662963867, "learning_rate": 5.666608333021088e-06, "loss": 0.2869, "step": 4523 }, { "epoch": 0.3090797294527567, "grad_norm": 4.6416826248168945, "learning_rate": 5.666000013678244e-06, "loss": 0.3696, "step": 4524 }, { "epoch": 0.3091480494636879, "grad_norm": 3.8330981731414795, "learning_rate": 5.665391588272124e-06, "loss": 0.5046, "step": 4525 }, { "epoch": 0.3092163694746191, "grad_norm": 3.3554389476776123, "learning_rate": 5.664783056832522e-06, "loss": 0.3378, "step": 4526 }, { "epoch": 0.3092846894855503, "grad_norm": 3.878161668777466, "learning_rate": 5.664174419389236e-06, "loss": 0.3442, "step": 4527 }, { "epoch": 0.30935300949648153, "grad_norm": 4.142190933227539, "learning_rate": 5.66356567597207e-06, "loss": 0.4929, "step": 4528 }, { "epoch": 0.30942132950741275, "grad_norm": 3.117568016052246, "learning_rate": 5.66295682661083e-06, "loss": 0.2593, "step": 4529 }, { "epoch": 0.3094896495183439, "grad_norm": 4.468307018280029, "learning_rate": 5.662347871335333e-06, "loss": 0.3431, "step": 4530 }, { "epoch": 0.3095579695292751, "grad_norm": 4.751859664916992, "learning_rate": 5.661738810175396e-06, "loss": 0.4184, "step": 4531 }, { "epoch": 0.3096262895402063, "grad_norm": 4.516450881958008, "learning_rate": 5.661129643160845e-06, "loss": 0.3452, "step": 4532 }, { "epoch": 0.30969460955113753, "grad_norm": 3.6341006755828857, "learning_rate": 5.660520370321506e-06, "loss": 0.3223, "step": 4533 }, { "epoch": 0.30976292956206875, "grad_norm": 4.388654708862305, "learning_rate": 5.659910991687216e-06, "loss": 0.4795, "step": 4534 }, { "epoch": 0.30983124957299996, "grad_norm": 4.394974231719971, "learning_rate": 5.659301507287814e-06, "loss": 0.4753, "step": 4535 }, { "epoch": 0.3098995695839311, "grad_norm": 5.373911380767822, "learning_rate": 5.658691917153144e-06, "loss": 0.5043, "step": 4536 }, { "epoch": 0.3099678895948623, "grad_norm": 3.8355624675750732, "learning_rate": 5.658082221313059e-06, "loss": 0.4278, "step": 4537 }, { "epoch": 0.31003620960579353, "grad_norm": 4.618902683258057, "learning_rate": 5.657472419797411e-06, "loss": 0.3732, "step": 4538 }, { "epoch": 0.31010452961672474, "grad_norm": 5.086912155151367, "learning_rate": 5.65686251263606e-06, "loss": 0.4266, "step": 4539 }, { "epoch": 0.31017284962765596, "grad_norm": 3.4522604942321777, "learning_rate": 5.656252499858875e-06, "loss": 0.2577, "step": 4540 }, { "epoch": 0.31024116963858717, "grad_norm": 4.268441677093506, "learning_rate": 5.6556423814957235e-06, "loss": 0.3575, "step": 4541 }, { "epoch": 0.3103094896495183, "grad_norm": 3.4375176429748535, "learning_rate": 5.6550321575764836e-06, "loss": 0.331, "step": 4542 }, { "epoch": 0.31037780966044953, "grad_norm": 3.5702476501464844, "learning_rate": 5.654421828131034e-06, "loss": 0.3655, "step": 4543 }, { "epoch": 0.31044612967138074, "grad_norm": 3.590825080871582, "learning_rate": 5.653811393189263e-06, "loss": 0.3059, "step": 4544 }, { "epoch": 0.31051444968231195, "grad_norm": 4.562346458435059, "learning_rate": 5.653200852781061e-06, "loss": 0.4423, "step": 4545 }, { "epoch": 0.31058276969324317, "grad_norm": 3.464806079864502, "learning_rate": 5.652590206936325e-06, "loss": 0.3832, "step": 4546 }, { "epoch": 0.3106510897041744, "grad_norm": 4.270017147064209, "learning_rate": 5.651979455684956e-06, "loss": 0.4474, "step": 4547 }, { "epoch": 0.31071940971510553, "grad_norm": 4.015577793121338, "learning_rate": 5.651368599056861e-06, "loss": 0.3273, "step": 4548 }, { "epoch": 0.31078772972603674, "grad_norm": 3.8954989910125732, "learning_rate": 5.6507576370819535e-06, "loss": 0.3494, "step": 4549 }, { "epoch": 0.31085604973696795, "grad_norm": 3.902944564819336, "learning_rate": 5.650146569790149e-06, "loss": 0.4138, "step": 4550 }, { "epoch": 0.31092436974789917, "grad_norm": 3.91813588142395, "learning_rate": 5.64953539721137e-06, "loss": 0.3072, "step": 4551 }, { "epoch": 0.3109926897588304, "grad_norm": 4.124589920043945, "learning_rate": 5.648924119375545e-06, "loss": 0.4529, "step": 4552 }, { "epoch": 0.3110610097697616, "grad_norm": 3.7904348373413086, "learning_rate": 5.648312736312605e-06, "loss": 0.3654, "step": 4553 }, { "epoch": 0.31112932978069274, "grad_norm": 5.0381669998168945, "learning_rate": 5.64770124805249e-06, "loss": 0.437, "step": 4554 }, { "epoch": 0.31119764979162395, "grad_norm": 3.9528987407684326, "learning_rate": 5.647089654625141e-06, "loss": 0.3912, "step": 4555 }, { "epoch": 0.31126596980255516, "grad_norm": 3.452374219894409, "learning_rate": 5.646477956060507e-06, "loss": 0.3743, "step": 4556 }, { "epoch": 0.3113342898134864, "grad_norm": 4.328404426574707, "learning_rate": 5.645866152388541e-06, "loss": 0.3498, "step": 4557 }, { "epoch": 0.3114026098244176, "grad_norm": 4.043459892272949, "learning_rate": 5.645254243639203e-06, "loss": 0.3818, "step": 4558 }, { "epoch": 0.3114709298353488, "grad_norm": 3.711153745651245, "learning_rate": 5.644642229842454e-06, "loss": 0.2624, "step": 4559 }, { "epoch": 0.31153924984627995, "grad_norm": 4.461136817932129, "learning_rate": 5.644030111028264e-06, "loss": 0.418, "step": 4560 }, { "epoch": 0.31160756985721116, "grad_norm": 3.3484947681427, "learning_rate": 5.643417887226605e-06, "loss": 0.3551, "step": 4561 }, { "epoch": 0.3116758898681424, "grad_norm": 3.3023271560668945, "learning_rate": 5.642805558467459e-06, "loss": 0.3217, "step": 4562 }, { "epoch": 0.3117442098790736, "grad_norm": 3.6147968769073486, "learning_rate": 5.642193124780809e-06, "loss": 0.3141, "step": 4563 }, { "epoch": 0.3118125298900048, "grad_norm": 4.632759094238281, "learning_rate": 5.641580586196644e-06, "loss": 0.3588, "step": 4564 }, { "epoch": 0.311880849900936, "grad_norm": 3.39554762840271, "learning_rate": 5.640967942744958e-06, "loss": 0.4555, "step": 4565 }, { "epoch": 0.31194916991186716, "grad_norm": 4.658933162689209, "learning_rate": 5.6403551944557504e-06, "loss": 0.369, "step": 4566 }, { "epoch": 0.3120174899227984, "grad_norm": 3.670469045639038, "learning_rate": 5.639742341359027e-06, "loss": 0.3447, "step": 4567 }, { "epoch": 0.3120858099337296, "grad_norm": 4.498167514801025, "learning_rate": 5.639129383484797e-06, "loss": 0.4079, "step": 4568 }, { "epoch": 0.3121541299446608, "grad_norm": 4.6247100830078125, "learning_rate": 5.638516320863075e-06, "loss": 0.4855, "step": 4569 }, { "epoch": 0.312222449955592, "grad_norm": 4.287580490112305, "learning_rate": 5.637903153523881e-06, "loss": 0.3924, "step": 4570 }, { "epoch": 0.3122907699665232, "grad_norm": 3.9366867542266846, "learning_rate": 5.637289881497242e-06, "loss": 0.4683, "step": 4571 }, { "epoch": 0.3123590899774544, "grad_norm": 4.230350017547607, "learning_rate": 5.636676504813185e-06, "loss": 0.4571, "step": 4572 }, { "epoch": 0.3124274099883856, "grad_norm": 4.5687689781188965, "learning_rate": 5.636063023501748e-06, "loss": 0.445, "step": 4573 }, { "epoch": 0.3124957299993168, "grad_norm": 4.043628215789795, "learning_rate": 5.63544943759297e-06, "loss": 0.3861, "step": 4574 }, { "epoch": 0.312564050010248, "grad_norm": 2.568232774734497, "learning_rate": 5.634835747116898e-06, "loss": 0.2676, "step": 4575 }, { "epoch": 0.3126323700211792, "grad_norm": 3.6741244792938232, "learning_rate": 5.634221952103583e-06, "loss": 0.3202, "step": 4576 }, { "epoch": 0.31270069003211043, "grad_norm": 3.512040376663208, "learning_rate": 5.633608052583079e-06, "loss": 0.4112, "step": 4577 }, { "epoch": 0.3127690100430416, "grad_norm": 4.65917444229126, "learning_rate": 5.6329940485854485e-06, "loss": 0.4234, "step": 4578 }, { "epoch": 0.3128373300539728, "grad_norm": 2.9784364700317383, "learning_rate": 5.6323799401407575e-06, "loss": 0.3055, "step": 4579 }, { "epoch": 0.312905650064904, "grad_norm": 4.2171478271484375, "learning_rate": 5.631765727279077e-06, "loss": 0.3704, "step": 4580 }, { "epoch": 0.3129739700758352, "grad_norm": 4.597839832305908, "learning_rate": 5.631151410030483e-06, "loss": 0.4502, "step": 4581 }, { "epoch": 0.31304229008676643, "grad_norm": 4.294281005859375, "learning_rate": 5.6305369884250585e-06, "loss": 0.4695, "step": 4582 }, { "epoch": 0.31311061009769764, "grad_norm": 2.8737874031066895, "learning_rate": 5.629922462492889e-06, "loss": 0.3064, "step": 4583 }, { "epoch": 0.3131789301086288, "grad_norm": 3.9044272899627686, "learning_rate": 5.629307832264066e-06, "loss": 0.3103, "step": 4584 }, { "epoch": 0.31324725011956, "grad_norm": 3.369645595550537, "learning_rate": 5.628693097768686e-06, "loss": 0.4547, "step": 4585 }, { "epoch": 0.3133155701304912, "grad_norm": 3.9710090160369873, "learning_rate": 5.628078259036851e-06, "loss": 0.4845, "step": 4586 }, { "epoch": 0.3133838901414224, "grad_norm": 4.863073825836182, "learning_rate": 5.62746331609867e-06, "loss": 0.4263, "step": 4587 }, { "epoch": 0.31345221015235364, "grad_norm": 5.120116710662842, "learning_rate": 5.626848268984253e-06, "loss": 0.4324, "step": 4588 }, { "epoch": 0.31352053016328485, "grad_norm": 4.802399158477783, "learning_rate": 5.626233117723719e-06, "loss": 0.2987, "step": 4589 }, { "epoch": 0.313588850174216, "grad_norm": 3.6628496646881104, "learning_rate": 5.625617862347187e-06, "loss": 0.3413, "step": 4590 }, { "epoch": 0.3136571701851472, "grad_norm": 4.111997604370117, "learning_rate": 5.625002502884787e-06, "loss": 0.3176, "step": 4591 }, { "epoch": 0.3137254901960784, "grad_norm": 3.7201292514801025, "learning_rate": 5.624387039366653e-06, "loss": 0.4391, "step": 4592 }, { "epoch": 0.31379381020700964, "grad_norm": 3.861877679824829, "learning_rate": 5.6237714718229186e-06, "loss": 0.3541, "step": 4593 }, { "epoch": 0.31386213021794085, "grad_norm": 4.0282697677612305, "learning_rate": 5.62315580028373e-06, "loss": 0.3323, "step": 4594 }, { "epoch": 0.31393045022887206, "grad_norm": 4.562498092651367, "learning_rate": 5.6225400247792325e-06, "loss": 0.3511, "step": 4595 }, { "epoch": 0.3139987702398032, "grad_norm": 3.8190670013427734, "learning_rate": 5.621924145339582e-06, "loss": 0.3477, "step": 4596 }, { "epoch": 0.3140670902507344, "grad_norm": 3.804943799972534, "learning_rate": 5.621308161994933e-06, "loss": 0.3171, "step": 4597 }, { "epoch": 0.31413541026166564, "grad_norm": 3.9283034801483154, "learning_rate": 5.6206920747754494e-06, "loss": 0.3984, "step": 4598 }, { "epoch": 0.31420373027259685, "grad_norm": 5.279965877532959, "learning_rate": 5.620075883711302e-06, "loss": 0.472, "step": 4599 }, { "epoch": 0.31427205028352806, "grad_norm": 3.840646982192993, "learning_rate": 5.619459588832662e-06, "loss": 0.369, "step": 4600 }, { "epoch": 0.31434037029445927, "grad_norm": 3.2758243083953857, "learning_rate": 5.618843190169707e-06, "loss": 0.3168, "step": 4601 }, { "epoch": 0.3144086903053904, "grad_norm": 4.983372688293457, "learning_rate": 5.618226687752623e-06, "loss": 0.4575, "step": 4602 }, { "epoch": 0.31447701031632164, "grad_norm": 3.839294910430908, "learning_rate": 5.617610081611596e-06, "loss": 0.361, "step": 4603 }, { "epoch": 0.31454533032725285, "grad_norm": 3.664889097213745, "learning_rate": 5.616993371776819e-06, "loss": 0.4232, "step": 4604 }, { "epoch": 0.31461365033818406, "grad_norm": 3.8299567699432373, "learning_rate": 5.616376558278494e-06, "loss": 0.4586, "step": 4605 }, { "epoch": 0.31468197034911527, "grad_norm": 3.2376456260681152, "learning_rate": 5.615759641146821e-06, "loss": 0.2899, "step": 4606 }, { "epoch": 0.3147502903600465, "grad_norm": 2.747255802154541, "learning_rate": 5.615142620412012e-06, "loss": 0.3745, "step": 4607 }, { "epoch": 0.31481861037097764, "grad_norm": 4.467461109161377, "learning_rate": 5.6145254961042794e-06, "loss": 0.3584, "step": 4608 }, { "epoch": 0.31488693038190885, "grad_norm": 4.283105373382568, "learning_rate": 5.61390826825384e-06, "loss": 0.4138, "step": 4609 }, { "epoch": 0.31495525039284006, "grad_norm": 3.6793265342712402, "learning_rate": 5.613290936890923e-06, "loss": 0.3956, "step": 4610 }, { "epoch": 0.31502357040377127, "grad_norm": 4.182212829589844, "learning_rate": 5.612673502045753e-06, "loss": 0.4534, "step": 4611 }, { "epoch": 0.3150918904147025, "grad_norm": 3.9784560203552246, "learning_rate": 5.612055963748565e-06, "loss": 0.3154, "step": 4612 }, { "epoch": 0.3151602104256337, "grad_norm": 3.2900636196136475, "learning_rate": 5.6114383220296005e-06, "loss": 0.337, "step": 4613 }, { "epoch": 0.31522853043656485, "grad_norm": 3.206592082977295, "learning_rate": 5.6108205769191e-06, "loss": 0.292, "step": 4614 }, { "epoch": 0.31529685044749606, "grad_norm": 4.25394344329834, "learning_rate": 5.610202728447315e-06, "loss": 0.4762, "step": 4615 }, { "epoch": 0.31536517045842727, "grad_norm": 3.940880537033081, "learning_rate": 5.609584776644503e-06, "loss": 0.3501, "step": 4616 }, { "epoch": 0.3154334904693585, "grad_norm": 2.8091375827789307, "learning_rate": 5.608966721540917e-06, "loss": 0.3337, "step": 4617 }, { "epoch": 0.3155018104802897, "grad_norm": 5.024214744567871, "learning_rate": 5.608348563166826e-06, "loss": 0.3631, "step": 4618 }, { "epoch": 0.3155701304912209, "grad_norm": 5.228930950164795, "learning_rate": 5.607730301552498e-06, "loss": 0.3249, "step": 4619 }, { "epoch": 0.31563845050215206, "grad_norm": 4.06660270690918, "learning_rate": 5.607111936728209e-06, "loss": 0.387, "step": 4620 }, { "epoch": 0.31570677051308327, "grad_norm": 3.8521337509155273, "learning_rate": 5.606493468724236e-06, "loss": 0.4085, "step": 4621 }, { "epoch": 0.3157750905240145, "grad_norm": 3.138509750366211, "learning_rate": 5.605874897570867e-06, "loss": 0.3353, "step": 4622 }, { "epoch": 0.3158434105349457, "grad_norm": 2.8706743717193604, "learning_rate": 5.60525622329839e-06, "loss": 0.3418, "step": 4623 }, { "epoch": 0.3159117305458769, "grad_norm": 3.794548273086548, "learning_rate": 5.6046374459370995e-06, "loss": 0.3243, "step": 4624 }, { "epoch": 0.3159800505568081, "grad_norm": 3.9607388973236084, "learning_rate": 5.604018565517296e-06, "loss": 0.3732, "step": 4625 }, { "epoch": 0.31604837056773927, "grad_norm": 4.250669479370117, "learning_rate": 5.603399582069285e-06, "loss": 0.381, "step": 4626 }, { "epoch": 0.3161166905786705, "grad_norm": 4.2352399826049805, "learning_rate": 5.602780495623375e-06, "loss": 0.4162, "step": 4627 }, { "epoch": 0.3161850105896017, "grad_norm": 4.027412414550781, "learning_rate": 5.602161306209881e-06, "loss": 0.5368, "step": 4628 }, { "epoch": 0.3162533306005329, "grad_norm": 3.7378125190734863, "learning_rate": 5.601542013859126e-06, "loss": 0.3059, "step": 4629 }, { "epoch": 0.3163216506114641, "grad_norm": 3.9444544315338135, "learning_rate": 5.600922618601432e-06, "loss": 0.406, "step": 4630 }, { "epoch": 0.3163899706223953, "grad_norm": 4.276487350463867, "learning_rate": 5.600303120467131e-06, "loss": 0.3959, "step": 4631 }, { "epoch": 0.3164582906333265, "grad_norm": 4.356963634490967, "learning_rate": 5.599683519486556e-06, "loss": 0.341, "step": 4632 }, { "epoch": 0.3165266106442577, "grad_norm": 4.128361701965332, "learning_rate": 5.59906381569005e-06, "loss": 0.3844, "step": 4633 }, { "epoch": 0.3165949306551889, "grad_norm": 3.51539945602417, "learning_rate": 5.5984440091079555e-06, "loss": 0.3247, "step": 4634 }, { "epoch": 0.3166632506661201, "grad_norm": 6.2843122482299805, "learning_rate": 5.597824099770625e-06, "loss": 0.4904, "step": 4635 }, { "epoch": 0.3167315706770513, "grad_norm": 4.026482105255127, "learning_rate": 5.597204087708412e-06, "loss": 0.4412, "step": 4636 }, { "epoch": 0.31679989068798253, "grad_norm": 4.725183010101318, "learning_rate": 5.5965839729516794e-06, "loss": 0.538, "step": 4637 }, { "epoch": 0.3168682106989137, "grad_norm": 4.307034492492676, "learning_rate": 5.59596375553079e-06, "loss": 0.3603, "step": 4638 }, { "epoch": 0.3169365307098449, "grad_norm": 3.771789073944092, "learning_rate": 5.595343435476115e-06, "loss": 0.3966, "step": 4639 }, { "epoch": 0.3170048507207761, "grad_norm": 4.991438388824463, "learning_rate": 5.59472301281803e-06, "loss": 0.4095, "step": 4640 }, { "epoch": 0.3170731707317073, "grad_norm": 4.345936298370361, "learning_rate": 5.594102487586916e-06, "loss": 0.4313, "step": 4641 }, { "epoch": 0.31714149074263853, "grad_norm": 4.179461479187012, "learning_rate": 5.593481859813158e-06, "loss": 0.4517, "step": 4642 }, { "epoch": 0.31720981075356974, "grad_norm": 3.5576956272125244, "learning_rate": 5.592861129527145e-06, "loss": 0.3188, "step": 4643 }, { "epoch": 0.3172781307645009, "grad_norm": 3.4364748001098633, "learning_rate": 5.592240296759275e-06, "loss": 0.2977, "step": 4644 }, { "epoch": 0.3173464507754321, "grad_norm": 3.242753267288208, "learning_rate": 5.591619361539947e-06, "loss": 0.3531, "step": 4645 }, { "epoch": 0.3174147707863633, "grad_norm": 3.6383285522460938, "learning_rate": 5.590998323899568e-06, "loss": 0.4875, "step": 4646 }, { "epoch": 0.31748309079729453, "grad_norm": 3.476025342941284, "learning_rate": 5.5903771838685465e-06, "loss": 0.3422, "step": 4647 }, { "epoch": 0.31755141080822574, "grad_norm": 4.540338039398193, "learning_rate": 5.5897559414773e-06, "loss": 0.4221, "step": 4648 }, { "epoch": 0.31761973081915695, "grad_norm": 4.407923221588135, "learning_rate": 5.589134596756248e-06, "loss": 0.2968, "step": 4649 }, { "epoch": 0.3176880508300881, "grad_norm": 4.080932140350342, "learning_rate": 5.588513149735817e-06, "loss": 0.362, "step": 4650 }, { "epoch": 0.3177563708410193, "grad_norm": 3.500028133392334, "learning_rate": 5.587891600446435e-06, "loss": 0.3393, "step": 4651 }, { "epoch": 0.31782469085195053, "grad_norm": 3.579857349395752, "learning_rate": 5.587269948918541e-06, "loss": 0.3528, "step": 4652 }, { "epoch": 0.31789301086288174, "grad_norm": 3.147763967514038, "learning_rate": 5.586648195182573e-06, "loss": 0.2896, "step": 4653 }, { "epoch": 0.31796133087381295, "grad_norm": 5.571372032165527, "learning_rate": 5.58602633926898e-06, "loss": 0.4772, "step": 4654 }, { "epoch": 0.31802965088474416, "grad_norm": 3.733825206756592, "learning_rate": 5.585404381208209e-06, "loss": 0.3633, "step": 4655 }, { "epoch": 0.3180979708956753, "grad_norm": 5.653640270233154, "learning_rate": 5.584782321030718e-06, "loss": 0.3422, "step": 4656 }, { "epoch": 0.31816629090660653, "grad_norm": 4.525973796844482, "learning_rate": 5.584160158766965e-06, "loss": 0.4753, "step": 4657 }, { "epoch": 0.31823461091753774, "grad_norm": 5.168333530426025, "learning_rate": 5.583537894447419e-06, "loss": 0.5912, "step": 4658 }, { "epoch": 0.31830293092846895, "grad_norm": 4.227919578552246, "learning_rate": 5.5829155281025485e-06, "loss": 0.3448, "step": 4659 }, { "epoch": 0.31837125093940016, "grad_norm": 3.923765182495117, "learning_rate": 5.58229305976283e-06, "loss": 0.3192, "step": 4660 }, { "epoch": 0.3184395709503314, "grad_norm": 4.111042022705078, "learning_rate": 5.581670489458742e-06, "loss": 0.3628, "step": 4661 }, { "epoch": 0.31850789096126253, "grad_norm": 4.203314304351807, "learning_rate": 5.581047817220774e-06, "loss": 0.3983, "step": 4662 }, { "epoch": 0.31857621097219374, "grad_norm": 5.1643900871276855, "learning_rate": 5.580425043079415e-06, "loss": 0.3331, "step": 4663 }, { "epoch": 0.31864453098312495, "grad_norm": 3.4863414764404297, "learning_rate": 5.579802167065158e-06, "loss": 0.3722, "step": 4664 }, { "epoch": 0.31871285099405616, "grad_norm": 4.388134479522705, "learning_rate": 5.579179189208506e-06, "loss": 0.3456, "step": 4665 }, { "epoch": 0.3187811710049874, "grad_norm": 4.069612503051758, "learning_rate": 5.578556109539965e-06, "loss": 0.457, "step": 4666 }, { "epoch": 0.3188494910159186, "grad_norm": 4.268705368041992, "learning_rate": 5.577932928090044e-06, "loss": 0.4404, "step": 4667 }, { "epoch": 0.31891781102684974, "grad_norm": 3.6676456928253174, "learning_rate": 5.577309644889261e-06, "loss": 0.2452, "step": 4668 }, { "epoch": 0.31898613103778095, "grad_norm": 3.736691474914551, "learning_rate": 5.576686259968134e-06, "loss": 0.4173, "step": 4669 }, { "epoch": 0.31905445104871216, "grad_norm": 6.189737319946289, "learning_rate": 5.57606277335719e-06, "loss": 0.3632, "step": 4670 }, { "epoch": 0.3191227710596434, "grad_norm": 3.500274181365967, "learning_rate": 5.575439185086957e-06, "loss": 0.3013, "step": 4671 }, { "epoch": 0.3191910910705746, "grad_norm": 3.3684685230255127, "learning_rate": 5.574815495187974e-06, "loss": 0.3997, "step": 4672 }, { "epoch": 0.3192594110815058, "grad_norm": 4.3159708976745605, "learning_rate": 5.574191703690779e-06, "loss": 0.4404, "step": 4673 }, { "epoch": 0.31932773109243695, "grad_norm": 3.869265079498291, "learning_rate": 5.573567810625919e-06, "loss": 0.4643, "step": 4674 }, { "epoch": 0.31939605110336816, "grad_norm": 3.186927556991577, "learning_rate": 5.5729438160239415e-06, "loss": 0.3503, "step": 4675 }, { "epoch": 0.3194643711142994, "grad_norm": 3.559732437133789, "learning_rate": 5.5723197199154055e-06, "loss": 0.4206, "step": 4676 }, { "epoch": 0.3195326911252306, "grad_norm": 3.919222116470337, "learning_rate": 5.5716955223308706e-06, "loss": 0.3837, "step": 4677 }, { "epoch": 0.3196010111361618, "grad_norm": 3.0995655059814453, "learning_rate": 5.571071223300899e-06, "loss": 0.3662, "step": 4678 }, { "epoch": 0.319669331147093, "grad_norm": 4.014984130859375, "learning_rate": 5.5704468228560656e-06, "loss": 0.4163, "step": 4679 }, { "epoch": 0.31973765115802416, "grad_norm": 3.0573198795318604, "learning_rate": 5.569822321026942e-06, "loss": 0.3515, "step": 4680 }, { "epoch": 0.31980597116895537, "grad_norm": 3.684142827987671, "learning_rate": 5.56919771784411e-06, "loss": 0.2996, "step": 4681 }, { "epoch": 0.3198742911798866, "grad_norm": 3.5126664638519287, "learning_rate": 5.568573013338154e-06, "loss": 0.3394, "step": 4682 }, { "epoch": 0.3199426111908178, "grad_norm": 2.310678005218506, "learning_rate": 5.567948207539665e-06, "loss": 0.3004, "step": 4683 }, { "epoch": 0.320010931201749, "grad_norm": 4.016465187072754, "learning_rate": 5.567323300479237e-06, "loss": 0.3503, "step": 4684 }, { "epoch": 0.3200792512126802, "grad_norm": 3.056459426879883, "learning_rate": 5.566698292187471e-06, "loss": 0.3663, "step": 4685 }, { "epoch": 0.32014757122361137, "grad_norm": 4.128964900970459, "learning_rate": 5.566073182694972e-06, "loss": 0.3179, "step": 4686 }, { "epoch": 0.3202158912345426, "grad_norm": 3.4099645614624023, "learning_rate": 5.565447972032349e-06, "loss": 0.4368, "step": 4687 }, { "epoch": 0.3202842112454738, "grad_norm": 3.6373350620269775, "learning_rate": 5.5648226602302186e-06, "loss": 0.3437, "step": 4688 }, { "epoch": 0.320352531256405, "grad_norm": 4.179126739501953, "learning_rate": 5.564197247319199e-06, "loss": 0.3309, "step": 4689 }, { "epoch": 0.3204208512673362, "grad_norm": 5.157217025756836, "learning_rate": 5.563571733329916e-06, "loss": 0.3508, "step": 4690 }, { "epoch": 0.3204891712782674, "grad_norm": 3.111572027206421, "learning_rate": 5.562946118292998e-06, "loss": 0.3634, "step": 4691 }, { "epoch": 0.3205574912891986, "grad_norm": 3.5012309551239014, "learning_rate": 5.562320402239082e-06, "loss": 0.3471, "step": 4692 }, { "epoch": 0.3206258113001298, "grad_norm": 4.951180458068848, "learning_rate": 5.561694585198804e-06, "loss": 0.3543, "step": 4693 }, { "epoch": 0.320694131311061, "grad_norm": 3.931046485900879, "learning_rate": 5.5610686672028135e-06, "loss": 0.4077, "step": 4694 }, { "epoch": 0.3207624513219922, "grad_norm": 4.6581220626831055, "learning_rate": 5.560442648281756e-06, "loss": 0.3846, "step": 4695 }, { "epoch": 0.3208307713329234, "grad_norm": 3.765184164047241, "learning_rate": 5.559816528466289e-06, "loss": 0.5001, "step": 4696 }, { "epoch": 0.32089909134385464, "grad_norm": 4.172109603881836, "learning_rate": 5.559190307787068e-06, "loss": 0.5143, "step": 4697 }, { "epoch": 0.3209674113547858, "grad_norm": 4.375961780548096, "learning_rate": 5.5585639862747614e-06, "loss": 0.3777, "step": 4698 }, { "epoch": 0.321035731365717, "grad_norm": 3.593421459197998, "learning_rate": 5.557937563960037e-06, "loss": 0.38, "step": 4699 }, { "epoch": 0.3211040513766482, "grad_norm": 4.434642314910889, "learning_rate": 5.5573110408735675e-06, "loss": 0.4637, "step": 4700 }, { "epoch": 0.3211723713875794, "grad_norm": 3.376716136932373, "learning_rate": 5.556684417046035e-06, "loss": 0.3585, "step": 4701 }, { "epoch": 0.32124069139851064, "grad_norm": 3.4844717979431152, "learning_rate": 5.556057692508121e-06, "loss": 0.4038, "step": 4702 }, { "epoch": 0.32130901140944185, "grad_norm": 2.4614479541778564, "learning_rate": 5.555430867290516e-06, "loss": 0.2615, "step": 4703 }, { "epoch": 0.321377331420373, "grad_norm": 3.977527618408203, "learning_rate": 5.554803941423913e-06, "loss": 0.3964, "step": 4704 }, { "epoch": 0.3214456514313042, "grad_norm": 4.604752540588379, "learning_rate": 5.554176914939013e-06, "loss": 0.3685, "step": 4705 }, { "epoch": 0.3215139714422354, "grad_norm": 5.369147777557373, "learning_rate": 5.553549787866517e-06, "loss": 0.4203, "step": 4706 }, { "epoch": 0.32158229145316664, "grad_norm": 4.447107791900635, "learning_rate": 5.552922560237136e-06, "loss": 0.3475, "step": 4707 }, { "epoch": 0.32165061146409785, "grad_norm": 3.954519271850586, "learning_rate": 5.552295232081581e-06, "loss": 0.3337, "step": 4708 }, { "epoch": 0.32171893147502906, "grad_norm": 4.420767784118652, "learning_rate": 5.551667803430575e-06, "loss": 0.4396, "step": 4709 }, { "epoch": 0.3217872514859602, "grad_norm": 3.811734676361084, "learning_rate": 5.5510402743148354e-06, "loss": 0.3367, "step": 4710 }, { "epoch": 0.3218555714968914, "grad_norm": 3.1572041511535645, "learning_rate": 5.550412644765096e-06, "loss": 0.3536, "step": 4711 }, { "epoch": 0.32192389150782263, "grad_norm": 4.080296039581299, "learning_rate": 5.5497849148120885e-06, "loss": 0.3691, "step": 4712 }, { "epoch": 0.32199221151875385, "grad_norm": 4.166895866394043, "learning_rate": 5.549157084486552e-06, "loss": 0.3041, "step": 4713 }, { "epoch": 0.32206053152968506, "grad_norm": 2.9232470989227295, "learning_rate": 5.548529153819226e-06, "loss": 0.3659, "step": 4714 }, { "epoch": 0.32212885154061627, "grad_norm": 2.9534976482391357, "learning_rate": 5.5479011228408635e-06, "loss": 0.2733, "step": 4715 }, { "epoch": 0.3221971715515474, "grad_norm": 3.8695569038391113, "learning_rate": 5.5472729915822125e-06, "loss": 0.3926, "step": 4716 }, { "epoch": 0.32226549156247863, "grad_norm": 3.730797052383423, "learning_rate": 5.546644760074036e-06, "loss": 0.431, "step": 4717 }, { "epoch": 0.32233381157340985, "grad_norm": 4.4547438621521, "learning_rate": 5.546016428347093e-06, "loss": 0.3725, "step": 4718 }, { "epoch": 0.32240213158434106, "grad_norm": 4.197160243988037, "learning_rate": 5.545387996432154e-06, "loss": 0.3742, "step": 4719 }, { "epoch": 0.32247045159527227, "grad_norm": 3.243361711502075, "learning_rate": 5.544759464359989e-06, "loss": 0.2634, "step": 4720 }, { "epoch": 0.3225387716062035, "grad_norm": 4.160927772521973, "learning_rate": 5.544130832161378e-06, "loss": 0.4027, "step": 4721 }, { "epoch": 0.32260709161713463, "grad_norm": 3.4836466312408447, "learning_rate": 5.543502099867101e-06, "loss": 0.3569, "step": 4722 }, { "epoch": 0.32267541162806584, "grad_norm": 4.618745803833008, "learning_rate": 5.542873267507948e-06, "loss": 0.4396, "step": 4723 }, { "epoch": 0.32274373163899706, "grad_norm": 3.9520113468170166, "learning_rate": 5.54224433511471e-06, "loss": 0.3457, "step": 4724 }, { "epoch": 0.32281205164992827, "grad_norm": 3.377486228942871, "learning_rate": 5.541615302718183e-06, "loss": 0.3434, "step": 4725 }, { "epoch": 0.3228803716608595, "grad_norm": 3.5050926208496094, "learning_rate": 5.54098617034917e-06, "loss": 0.3766, "step": 4726 }, { "epoch": 0.3229486916717907, "grad_norm": 3.733097791671753, "learning_rate": 5.54035693803848e-06, "loss": 0.3895, "step": 4727 }, { "epoch": 0.32301701168272184, "grad_norm": 4.256439208984375, "learning_rate": 5.53972760581692e-06, "loss": 0.3724, "step": 4728 }, { "epoch": 0.32308533169365306, "grad_norm": 2.5609166622161865, "learning_rate": 5.539098173715312e-06, "loss": 0.2525, "step": 4729 }, { "epoch": 0.32315365170458427, "grad_norm": 4.159631252288818, "learning_rate": 5.538468641764474e-06, "loss": 0.4868, "step": 4730 }, { "epoch": 0.3232219717155155, "grad_norm": 3.7808547019958496, "learning_rate": 5.537839009995235e-06, "loss": 0.4045, "step": 4731 }, { "epoch": 0.3232902917264467, "grad_norm": 3.5881896018981934, "learning_rate": 5.537209278438424e-06, "loss": 0.3159, "step": 4732 }, { "epoch": 0.3233586117373779, "grad_norm": 4.046442031860352, "learning_rate": 5.536579447124879e-06, "loss": 0.3759, "step": 4733 }, { "epoch": 0.32342693174830905, "grad_norm": 3.841362237930298, "learning_rate": 5.5359495160854405e-06, "loss": 0.3826, "step": 4734 }, { "epoch": 0.32349525175924027, "grad_norm": 3.027047872543335, "learning_rate": 5.535319485350955e-06, "loss": 0.2607, "step": 4735 }, { "epoch": 0.3235635717701715, "grad_norm": 4.066406726837158, "learning_rate": 5.534689354952272e-06, "loss": 0.4255, "step": 4736 }, { "epoch": 0.3236318917811027, "grad_norm": 4.081809997558594, "learning_rate": 5.53405912492025e-06, "loss": 0.4035, "step": 4737 }, { "epoch": 0.3237002117920339, "grad_norm": 3.1461613178253174, "learning_rate": 5.533428795285747e-06, "loss": 0.274, "step": 4738 }, { "epoch": 0.3237685318029651, "grad_norm": 3.708994150161743, "learning_rate": 5.53279836607963e-06, "loss": 0.3952, "step": 4739 }, { "epoch": 0.32383685181389626, "grad_norm": 3.385744094848633, "learning_rate": 5.532167837332769e-06, "loss": 0.3771, "step": 4740 }, { "epoch": 0.3239051718248275, "grad_norm": 4.094042778015137, "learning_rate": 5.53153720907604e-06, "loss": 0.4007, "step": 4741 }, { "epoch": 0.3239734918357587, "grad_norm": 3.7684836387634277, "learning_rate": 5.5309064813403226e-06, "loss": 0.4017, "step": 4742 }, { "epoch": 0.3240418118466899, "grad_norm": 4.338698863983154, "learning_rate": 5.530275654156501e-06, "loss": 0.3409, "step": 4743 }, { "epoch": 0.3241101318576211, "grad_norm": 3.4899508953094482, "learning_rate": 5.529644727555467e-06, "loss": 0.2934, "step": 4744 }, { "epoch": 0.3241784518685523, "grad_norm": 3.252845048904419, "learning_rate": 5.529013701568114e-06, "loss": 0.2721, "step": 4745 }, { "epoch": 0.3242467718794835, "grad_norm": 3.6072440147399902, "learning_rate": 5.528382576225342e-06, "loss": 0.4501, "step": 4746 }, { "epoch": 0.3243150918904147, "grad_norm": 4.008630275726318, "learning_rate": 5.527751351558056e-06, "loss": 0.3899, "step": 4747 }, { "epoch": 0.3243834119013459, "grad_norm": 3.9399452209472656, "learning_rate": 5.527120027597164e-06, "loss": 0.3455, "step": 4748 }, { "epoch": 0.3244517319122771, "grad_norm": 3.2644386291503906, "learning_rate": 5.526488604373583e-06, "loss": 0.4879, "step": 4749 }, { "epoch": 0.3245200519232083, "grad_norm": 3.60610032081604, "learning_rate": 5.525857081918229e-06, "loss": 0.3391, "step": 4750 }, { "epoch": 0.32458837193413953, "grad_norm": 4.316967487335205, "learning_rate": 5.5252254602620285e-06, "loss": 0.3786, "step": 4751 }, { "epoch": 0.3246566919450707, "grad_norm": 3.486048460006714, "learning_rate": 5.524593739435908e-06, "loss": 0.3166, "step": 4752 }, { "epoch": 0.3247250119560019, "grad_norm": 3.574230670928955, "learning_rate": 5.523961919470803e-06, "loss": 0.3121, "step": 4753 }, { "epoch": 0.3247933319669331, "grad_norm": 5.007359981536865, "learning_rate": 5.523330000397652e-06, "loss": 0.3494, "step": 4754 }, { "epoch": 0.3248616519778643, "grad_norm": 4.210724830627441, "learning_rate": 5.522697982247398e-06, "loss": 0.4355, "step": 4755 }, { "epoch": 0.32492997198879553, "grad_norm": 3.843082904815674, "learning_rate": 5.522065865050988e-06, "loss": 0.3565, "step": 4756 }, { "epoch": 0.32499829199972674, "grad_norm": 3.676971197128296, "learning_rate": 5.521433648839377e-06, "loss": 0.3403, "step": 4757 }, { "epoch": 0.3250666120106579, "grad_norm": 4.582594871520996, "learning_rate": 5.520801333643522e-06, "loss": 0.3286, "step": 4758 }, { "epoch": 0.3251349320215891, "grad_norm": 4.056361675262451, "learning_rate": 5.520168919494386e-06, "loss": 0.4045, "step": 4759 }, { "epoch": 0.3252032520325203, "grad_norm": 4.556021213531494, "learning_rate": 5.519536406422937e-06, "loss": 0.3896, "step": 4760 }, { "epoch": 0.32527157204345153, "grad_norm": 4.149113655090332, "learning_rate": 5.518903794460146e-06, "loss": 0.294, "step": 4761 }, { "epoch": 0.32533989205438274, "grad_norm": 4.880176067352295, "learning_rate": 5.518271083636993e-06, "loss": 0.3112, "step": 4762 }, { "epoch": 0.32540821206531395, "grad_norm": 5.209297180175781, "learning_rate": 5.517638273984458e-06, "loss": 0.3797, "step": 4763 }, { "epoch": 0.3254765320762451, "grad_norm": 4.060407638549805, "learning_rate": 5.517005365533528e-06, "loss": 0.3927, "step": 4764 }, { "epoch": 0.3255448520871763, "grad_norm": 4.042975425720215, "learning_rate": 5.5163723583151955e-06, "loss": 0.4124, "step": 4765 }, { "epoch": 0.32561317209810753, "grad_norm": 4.124492168426514, "learning_rate": 5.515739252360458e-06, "loss": 0.5099, "step": 4766 }, { "epoch": 0.32568149210903874, "grad_norm": 3.398557662963867, "learning_rate": 5.515106047700316e-06, "loss": 0.3896, "step": 4767 }, { "epoch": 0.32574981211996995, "grad_norm": 3.894164800643921, "learning_rate": 5.514472744365775e-06, "loss": 0.4271, "step": 4768 }, { "epoch": 0.32581813213090116, "grad_norm": 3.4732306003570557, "learning_rate": 5.513839342387848e-06, "loss": 0.3432, "step": 4769 }, { "epoch": 0.3258864521418323, "grad_norm": 3.5017969608306885, "learning_rate": 5.513205841797551e-06, "loss": 0.3187, "step": 4770 }, { "epoch": 0.32595477215276353, "grad_norm": 3.7018754482269287, "learning_rate": 5.512572242625903e-06, "loss": 0.4267, "step": 4771 }, { "epoch": 0.32602309216369474, "grad_norm": 4.779425144195557, "learning_rate": 5.511938544903931e-06, "loss": 0.3871, "step": 4772 }, { "epoch": 0.32609141217462595, "grad_norm": 3.0373799800872803, "learning_rate": 5.511304748662667e-06, "loss": 0.2722, "step": 4773 }, { "epoch": 0.32615973218555716, "grad_norm": 4.5296173095703125, "learning_rate": 5.510670853933141e-06, "loss": 0.3601, "step": 4774 }, { "epoch": 0.32622805219648837, "grad_norm": 3.2690155506134033, "learning_rate": 5.5100368607463996e-06, "loss": 0.3137, "step": 4775 }, { "epoch": 0.3262963722074195, "grad_norm": 3.9279637336730957, "learning_rate": 5.509402769133483e-06, "loss": 0.3571, "step": 4776 }, { "epoch": 0.32636469221835074, "grad_norm": 4.709490776062012, "learning_rate": 5.508768579125444e-06, "loss": 0.4392, "step": 4777 }, { "epoch": 0.32643301222928195, "grad_norm": 3.832151412963867, "learning_rate": 5.508134290753336e-06, "loss": 0.325, "step": 4778 }, { "epoch": 0.32650133224021316, "grad_norm": 4.6020684242248535, "learning_rate": 5.507499904048217e-06, "loss": 0.3101, "step": 4779 }, { "epoch": 0.32656965225114437, "grad_norm": 3.813438892364502, "learning_rate": 5.506865419041155e-06, "loss": 0.4244, "step": 4780 }, { "epoch": 0.3266379722620756, "grad_norm": 6.551901817321777, "learning_rate": 5.506230835763216e-06, "loss": 0.4109, "step": 4781 }, { "epoch": 0.32670629227300674, "grad_norm": 3.6064460277557373, "learning_rate": 5.505596154245473e-06, "loss": 0.37, "step": 4782 }, { "epoch": 0.32677461228393795, "grad_norm": 3.4917752742767334, "learning_rate": 5.504961374519008e-06, "loss": 0.332, "step": 4783 }, { "epoch": 0.32684293229486916, "grad_norm": 4.287868976593018, "learning_rate": 5.504326496614903e-06, "loss": 0.3905, "step": 4784 }, { "epoch": 0.32691125230580037, "grad_norm": 3.966151714324951, "learning_rate": 5.503691520564245e-06, "loss": 0.353, "step": 4785 }, { "epoch": 0.3269795723167316, "grad_norm": 5.104113578796387, "learning_rate": 5.50305644639813e-06, "loss": 0.4706, "step": 4786 }, { "epoch": 0.3270478923276628, "grad_norm": 4.739766597747803, "learning_rate": 5.502421274147651e-06, "loss": 0.3936, "step": 4787 }, { "epoch": 0.32711621233859395, "grad_norm": 4.976346969604492, "learning_rate": 5.5017860038439164e-06, "loss": 0.3905, "step": 4788 }, { "epoch": 0.32718453234952516, "grad_norm": 4.484990119934082, "learning_rate": 5.501150635518029e-06, "loss": 0.4199, "step": 4789 }, { "epoch": 0.32725285236045637, "grad_norm": 3.5169007778167725, "learning_rate": 5.500515169201105e-06, "loss": 0.4121, "step": 4790 }, { "epoch": 0.3273211723713876, "grad_norm": 3.594235420227051, "learning_rate": 5.499879604924258e-06, "loss": 0.3723, "step": 4791 }, { "epoch": 0.3273894923823188, "grad_norm": 3.189253568649292, "learning_rate": 5.499243942718614e-06, "loss": 0.3367, "step": 4792 }, { "epoch": 0.32745781239325, "grad_norm": 3.2057390213012695, "learning_rate": 5.498608182615294e-06, "loss": 0.337, "step": 4793 }, { "epoch": 0.32752613240418116, "grad_norm": 3.1258339881896973, "learning_rate": 5.497972324645435e-06, "loss": 0.3892, "step": 4794 }, { "epoch": 0.32759445241511237, "grad_norm": 4.079960346221924, "learning_rate": 5.49733636884017e-06, "loss": 0.3373, "step": 4795 }, { "epoch": 0.3276627724260436, "grad_norm": 3.613506317138672, "learning_rate": 5.496700315230642e-06, "loss": 0.3314, "step": 4796 }, { "epoch": 0.3277310924369748, "grad_norm": 2.708456039428711, "learning_rate": 5.496064163847994e-06, "loss": 0.3285, "step": 4797 }, { "epoch": 0.327799412447906, "grad_norm": 3.2502593994140625, "learning_rate": 5.495427914723381e-06, "loss": 0.4115, "step": 4798 }, { "epoch": 0.3278677324588372, "grad_norm": 3.6400625705718994, "learning_rate": 5.494791567887955e-06, "loss": 0.3196, "step": 4799 }, { "epoch": 0.32793605246976837, "grad_norm": 4.716844081878662, "learning_rate": 5.494155123372876e-06, "loss": 0.4497, "step": 4800 }, { "epoch": 0.3280043724806996, "grad_norm": 3.7690653800964355, "learning_rate": 5.493518581209312e-06, "loss": 0.3917, "step": 4801 }, { "epoch": 0.3280726924916308, "grad_norm": 4.758843421936035, "learning_rate": 5.492881941428431e-06, "loss": 0.404, "step": 4802 }, { "epoch": 0.328141012502562, "grad_norm": 3.770966053009033, "learning_rate": 5.492245204061406e-06, "loss": 0.324, "step": 4803 }, { "epoch": 0.3282093325134932, "grad_norm": 3.422281503677368, "learning_rate": 5.491608369139419e-06, "loss": 0.2931, "step": 4804 }, { "epoch": 0.3282776525244244, "grad_norm": 3.426297426223755, "learning_rate": 5.490971436693653e-06, "loss": 0.4363, "step": 4805 }, { "epoch": 0.3283459725353556, "grad_norm": 3.9543702602386475, "learning_rate": 5.490334406755297e-06, "loss": 0.4218, "step": 4806 }, { "epoch": 0.3284142925462868, "grad_norm": 3.020367383956909, "learning_rate": 5.489697279355544e-06, "loss": 0.3336, "step": 4807 }, { "epoch": 0.328482612557218, "grad_norm": 3.583660364151001, "learning_rate": 5.489060054525594e-06, "loss": 0.372, "step": 4808 }, { "epoch": 0.3285509325681492, "grad_norm": 3.999213695526123, "learning_rate": 5.488422732296648e-06, "loss": 0.3913, "step": 4809 }, { "epoch": 0.3286192525790804, "grad_norm": 4.5897746086120605, "learning_rate": 5.4877853126999155e-06, "loss": 0.3372, "step": 4810 }, { "epoch": 0.32868757259001163, "grad_norm": 3.9117863178253174, "learning_rate": 5.487147795766609e-06, "loss": 0.4162, "step": 4811 }, { "epoch": 0.3287558926009428, "grad_norm": 5.082238674163818, "learning_rate": 5.486510181527947e-06, "loss": 0.3713, "step": 4812 }, { "epoch": 0.328824212611874, "grad_norm": 4.175920486450195, "learning_rate": 5.485872470015149e-06, "loss": 0.3701, "step": 4813 }, { "epoch": 0.3288925326228052, "grad_norm": 3.0963191986083984, "learning_rate": 5.485234661259445e-06, "loss": 0.2945, "step": 4814 }, { "epoch": 0.3289608526337364, "grad_norm": 3.893080234527588, "learning_rate": 5.484596755292066e-06, "loss": 0.3902, "step": 4815 }, { "epoch": 0.32902917264466763, "grad_norm": 3.5591390132904053, "learning_rate": 5.483958752144247e-06, "loss": 0.2953, "step": 4816 }, { "epoch": 0.32909749265559884, "grad_norm": 3.0937728881835938, "learning_rate": 5.4833206518472304e-06, "loss": 0.4051, "step": 4817 }, { "epoch": 0.32916581266653, "grad_norm": 3.4787888526916504, "learning_rate": 5.482682454432263e-06, "loss": 0.3989, "step": 4818 }, { "epoch": 0.3292341326774612, "grad_norm": 3.210167407989502, "learning_rate": 5.482044159930595e-06, "loss": 0.2602, "step": 4819 }, { "epoch": 0.3293024526883924, "grad_norm": 3.74532151222229, "learning_rate": 5.481405768373482e-06, "loss": 0.3634, "step": 4820 }, { "epoch": 0.32937077269932363, "grad_norm": 5.220985412597656, "learning_rate": 5.480767279792185e-06, "loss": 0.4264, "step": 4821 }, { "epoch": 0.32943909271025484, "grad_norm": 4.14556360244751, "learning_rate": 5.4801286942179665e-06, "loss": 0.3892, "step": 4822 }, { "epoch": 0.32950741272118605, "grad_norm": 4.586601257324219, "learning_rate": 5.4794900116821e-06, "loss": 0.5205, "step": 4823 }, { "epoch": 0.3295757327321172, "grad_norm": 5.678202152252197, "learning_rate": 5.478851232215857e-06, "loss": 0.4644, "step": 4824 }, { "epoch": 0.3296440527430484, "grad_norm": 4.0914154052734375, "learning_rate": 5.478212355850519e-06, "loss": 0.431, "step": 4825 }, { "epoch": 0.32971237275397963, "grad_norm": 4.459441184997559, "learning_rate": 5.477573382617369e-06, "loss": 0.3787, "step": 4826 }, { "epoch": 0.32978069276491084, "grad_norm": 3.8693764209747314, "learning_rate": 5.4769343125476966e-06, "loss": 0.4017, "step": 4827 }, { "epoch": 0.32984901277584205, "grad_norm": 3.5812416076660156, "learning_rate": 5.476295145672794e-06, "loss": 0.3721, "step": 4828 }, { "epoch": 0.32991733278677327, "grad_norm": 4.998559474945068, "learning_rate": 5.475655882023961e-06, "loss": 0.4957, "step": 4829 }, { "epoch": 0.3299856527977044, "grad_norm": 4.015589714050293, "learning_rate": 5.4750165216325e-06, "loss": 0.4817, "step": 4830 }, { "epoch": 0.33005397280863563, "grad_norm": 3.5972838401794434, "learning_rate": 5.474377064529718e-06, "loss": 0.3165, "step": 4831 }, { "epoch": 0.33012229281956684, "grad_norm": 4.240512847900391, "learning_rate": 5.473737510746929e-06, "loss": 0.3835, "step": 4832 }, { "epoch": 0.33019061283049805, "grad_norm": 2.945678949356079, "learning_rate": 5.473097860315449e-06, "loss": 0.366, "step": 4833 }, { "epoch": 0.33025893284142926, "grad_norm": 4.417860507965088, "learning_rate": 5.472458113266602e-06, "loss": 0.4015, "step": 4834 }, { "epoch": 0.3303272528523605, "grad_norm": 3.2557907104492188, "learning_rate": 5.471818269631711e-06, "loss": 0.3139, "step": 4835 }, { "epoch": 0.33039557286329163, "grad_norm": 4.827265739440918, "learning_rate": 5.471178329442112e-06, "loss": 0.363, "step": 4836 }, { "epoch": 0.33046389287422284, "grad_norm": 3.3804547786712646, "learning_rate": 5.470538292729137e-06, "loss": 0.3634, "step": 4837 }, { "epoch": 0.33053221288515405, "grad_norm": 2.608887195587158, "learning_rate": 5.46989815952413e-06, "loss": 0.3151, "step": 4838 }, { "epoch": 0.33060053289608526, "grad_norm": 3.9026949405670166, "learning_rate": 5.469257929858435e-06, "loss": 0.4182, "step": 4839 }, { "epoch": 0.3306688529070165, "grad_norm": 3.3427281379699707, "learning_rate": 5.468617603763403e-06, "loss": 0.3572, "step": 4840 }, { "epoch": 0.3307371729179477, "grad_norm": 3.3309519290924072, "learning_rate": 5.4679771812703874e-06, "loss": 0.3321, "step": 4841 }, { "epoch": 0.33080549292887884, "grad_norm": 3.501356363296509, "learning_rate": 5.467336662410751e-06, "loss": 0.4634, "step": 4842 }, { "epoch": 0.33087381293981005, "grad_norm": 2.785092830657959, "learning_rate": 5.4666960472158565e-06, "loss": 0.27, "step": 4843 }, { "epoch": 0.33094213295074126, "grad_norm": 3.7076597213745117, "learning_rate": 5.466055335717072e-06, "loss": 0.4633, "step": 4844 }, { "epoch": 0.3310104529616725, "grad_norm": 3.3101110458374023, "learning_rate": 5.465414527945775e-06, "loss": 0.3309, "step": 4845 }, { "epoch": 0.3310787729726037, "grad_norm": 3.7886455059051514, "learning_rate": 5.464773623933341e-06, "loss": 0.4578, "step": 4846 }, { "epoch": 0.3311470929835349, "grad_norm": 3.9460744857788086, "learning_rate": 5.464132623711154e-06, "loss": 0.3882, "step": 4847 }, { "epoch": 0.33121541299446605, "grad_norm": 4.524980068206787, "learning_rate": 5.4634915273106025e-06, "loss": 0.3172, "step": 4848 }, { "epoch": 0.33128373300539726, "grad_norm": 4.271259307861328, "learning_rate": 5.4628503347630796e-06, "loss": 0.4568, "step": 4849 }, { "epoch": 0.3313520530163285, "grad_norm": 3.7229726314544678, "learning_rate": 5.46220904609998e-06, "loss": 0.379, "step": 4850 }, { "epoch": 0.3314203730272597, "grad_norm": 3.524909257888794, "learning_rate": 5.461567661352712e-06, "loss": 0.3569, "step": 4851 }, { "epoch": 0.3314886930381909, "grad_norm": 4.830631732940674, "learning_rate": 5.4609261805526765e-06, "loss": 0.4366, "step": 4852 }, { "epoch": 0.3315570130491221, "grad_norm": 3.9379987716674805, "learning_rate": 5.460284603731289e-06, "loss": 0.4322, "step": 4853 }, { "epoch": 0.33162533306005326, "grad_norm": 5.356272220611572, "learning_rate": 5.459642930919962e-06, "loss": 0.4383, "step": 4854 }, { "epoch": 0.3316936530709845, "grad_norm": 3.3693065643310547, "learning_rate": 5.459001162150121e-06, "loss": 0.307, "step": 4855 }, { "epoch": 0.3317619730819157, "grad_norm": 5.113974571228027, "learning_rate": 5.458359297453188e-06, "loss": 0.3598, "step": 4856 }, { "epoch": 0.3318302930928469, "grad_norm": 3.3184666633605957, "learning_rate": 5.4577173368605944e-06, "loss": 0.3216, "step": 4857 }, { "epoch": 0.3318986131037781, "grad_norm": 3.9126508235931396, "learning_rate": 5.457075280403777e-06, "loss": 0.3719, "step": 4858 }, { "epoch": 0.3319669331147093, "grad_norm": 3.8443193435668945, "learning_rate": 5.456433128114175e-06, "loss": 0.3817, "step": 4859 }, { "epoch": 0.3320352531256405, "grad_norm": 3.7723770141601562, "learning_rate": 5.455790880023231e-06, "loss": 0.3614, "step": 4860 }, { "epoch": 0.3321035731365717, "grad_norm": 5.008831024169922, "learning_rate": 5.455148536162396e-06, "loss": 0.291, "step": 4861 }, { "epoch": 0.3321718931475029, "grad_norm": 3.9954731464385986, "learning_rate": 5.454506096563122e-06, "loss": 0.3987, "step": 4862 }, { "epoch": 0.3322402131584341, "grad_norm": 3.82246994972229, "learning_rate": 5.453863561256871e-06, "loss": 0.3868, "step": 4863 }, { "epoch": 0.3323085331693653, "grad_norm": 4.068338394165039, "learning_rate": 5.453220930275103e-06, "loss": 0.4098, "step": 4864 }, { "epoch": 0.3323768531802965, "grad_norm": 4.345888137817383, "learning_rate": 5.452578203649287e-06, "loss": 0.4631, "step": 4865 }, { "epoch": 0.3324451731912277, "grad_norm": 3.7771213054656982, "learning_rate": 5.4519353814108954e-06, "loss": 0.376, "step": 4866 }, { "epoch": 0.3325134932021589, "grad_norm": 4.153929233551025, "learning_rate": 5.451292463591407e-06, "loss": 0.4128, "step": 4867 }, { "epoch": 0.3325818132130901, "grad_norm": 3.893014669418335, "learning_rate": 5.450649450222302e-06, "loss": 0.3704, "step": 4868 }, { "epoch": 0.3326501332240213, "grad_norm": 3.8064844608306885, "learning_rate": 5.450006341335067e-06, "loss": 0.4674, "step": 4869 }, { "epoch": 0.3327184532349525, "grad_norm": 3.607698678970337, "learning_rate": 5.449363136961194e-06, "loss": 0.378, "step": 4870 }, { "epoch": 0.33278677324588374, "grad_norm": 3.729180097579956, "learning_rate": 5.448719837132179e-06, "loss": 0.4379, "step": 4871 }, { "epoch": 0.3328550932568149, "grad_norm": 2.9890286922454834, "learning_rate": 5.448076441879524e-06, "loss": 0.3352, "step": 4872 }, { "epoch": 0.3329234132677461, "grad_norm": 3.642070770263672, "learning_rate": 5.447432951234733e-06, "loss": 0.2864, "step": 4873 }, { "epoch": 0.3329917332786773, "grad_norm": 3.8255057334899902, "learning_rate": 5.446789365229313e-06, "loss": 0.4035, "step": 4874 }, { "epoch": 0.3330600532896085, "grad_norm": 4.419129371643066, "learning_rate": 5.446145683894786e-06, "loss": 0.4543, "step": 4875 }, { "epoch": 0.33312837330053974, "grad_norm": 4.5977020263671875, "learning_rate": 5.445501907262664e-06, "loss": 0.5153, "step": 4876 }, { "epoch": 0.33319669331147095, "grad_norm": 4.111778259277344, "learning_rate": 5.444858035364476e-06, "loss": 0.5624, "step": 4877 }, { "epoch": 0.3332650133224021, "grad_norm": 3.792712688446045, "learning_rate": 5.444214068231747e-06, "loss": 0.4846, "step": 4878 }, { "epoch": 0.3333333333333333, "grad_norm": 3.482806921005249, "learning_rate": 5.4435700058960145e-06, "loss": 0.3495, "step": 4879 }, { "epoch": 0.3334016533442645, "grad_norm": 3.6975250244140625, "learning_rate": 5.442925848388813e-06, "loss": 0.4041, "step": 4880 }, { "epoch": 0.33346997335519574, "grad_norm": 4.25905179977417, "learning_rate": 5.442281595741686e-06, "loss": 0.4677, "step": 4881 }, { "epoch": 0.33353829336612695, "grad_norm": 4.736522197723389, "learning_rate": 5.441637247986182e-06, "loss": 0.3195, "step": 4882 }, { "epoch": 0.33360661337705816, "grad_norm": 3.5629019737243652, "learning_rate": 5.440992805153852e-06, "loss": 0.3004, "step": 4883 }, { "epoch": 0.33367493338798937, "grad_norm": 5.051530838012695, "learning_rate": 5.440348267276252e-06, "loss": 0.4024, "step": 4884 }, { "epoch": 0.3337432533989205, "grad_norm": 3.8428258895874023, "learning_rate": 5.439703634384947e-06, "loss": 0.4175, "step": 4885 }, { "epoch": 0.33381157340985174, "grad_norm": 4.009377956390381, "learning_rate": 5.439058906511498e-06, "loss": 0.4843, "step": 4886 }, { "epoch": 0.33387989342078295, "grad_norm": 3.9187541007995605, "learning_rate": 5.438414083687478e-06, "loss": 0.4485, "step": 4887 }, { "epoch": 0.33394821343171416, "grad_norm": 5.289397716522217, "learning_rate": 5.437769165944462e-06, "loss": 0.4221, "step": 4888 }, { "epoch": 0.33401653344264537, "grad_norm": 4.426632881164551, "learning_rate": 5.4371241533140305e-06, "loss": 0.363, "step": 4889 }, { "epoch": 0.3340848534535766, "grad_norm": 2.333010673522949, "learning_rate": 5.436479045827767e-06, "loss": 0.2582, "step": 4890 }, { "epoch": 0.33415317346450774, "grad_norm": 4.096120834350586, "learning_rate": 5.4358338435172624e-06, "loss": 0.5238, "step": 4891 }, { "epoch": 0.33422149347543895, "grad_norm": 4.267861843109131, "learning_rate": 5.4351885464141074e-06, "loss": 0.4936, "step": 4892 }, { "epoch": 0.33428981348637016, "grad_norm": 3.4787986278533936, "learning_rate": 5.434543154549904e-06, "loss": 0.3678, "step": 4893 }, { "epoch": 0.33435813349730137, "grad_norm": 3.7234530448913574, "learning_rate": 5.4338976679562535e-06, "loss": 0.3345, "step": 4894 }, { "epoch": 0.3344264535082326, "grad_norm": 2.990006446838379, "learning_rate": 5.433252086664764e-06, "loss": 0.4035, "step": 4895 }, { "epoch": 0.3344947735191638, "grad_norm": 4.584222316741943, "learning_rate": 5.432606410707047e-06, "loss": 0.4426, "step": 4896 }, { "epoch": 0.33456309353009495, "grad_norm": 3.414771318435669, "learning_rate": 5.431960640114723e-06, "loss": 0.3245, "step": 4897 }, { "epoch": 0.33463141354102616, "grad_norm": 3.5800862312316895, "learning_rate": 5.431314774919408e-06, "loss": 0.3088, "step": 4898 }, { "epoch": 0.33469973355195737, "grad_norm": 4.203995704650879, "learning_rate": 5.430668815152733e-06, "loss": 0.4455, "step": 4899 }, { "epoch": 0.3347680535628886, "grad_norm": 3.1944830417633057, "learning_rate": 5.4300227608463274e-06, "loss": 0.3733, "step": 4900 }, { "epoch": 0.3348363735738198, "grad_norm": 5.283998012542725, "learning_rate": 5.429376612031826e-06, "loss": 0.3925, "step": 4901 }, { "epoch": 0.334904693584751, "grad_norm": 3.3379557132720947, "learning_rate": 5.42873036874087e-06, "loss": 0.4164, "step": 4902 }, { "epoch": 0.33497301359568216, "grad_norm": 3.864744186401367, "learning_rate": 5.428084031005104e-06, "loss": 0.3994, "step": 4903 }, { "epoch": 0.33504133360661337, "grad_norm": 3.763514995574951, "learning_rate": 5.427437598856177e-06, "loss": 0.4661, "step": 4904 }, { "epoch": 0.3351096536175446, "grad_norm": 3.6496548652648926, "learning_rate": 5.426791072325745e-06, "loss": 0.3055, "step": 4905 }, { "epoch": 0.3351779736284758, "grad_norm": 4.147003650665283, "learning_rate": 5.426144451445462e-06, "loss": 0.3215, "step": 4906 }, { "epoch": 0.335246293639407, "grad_norm": 3.4945974349975586, "learning_rate": 5.425497736246998e-06, "loss": 0.3256, "step": 4907 }, { "epoch": 0.3353146136503382, "grad_norm": 3.463407039642334, "learning_rate": 5.424850926762015e-06, "loss": 0.384, "step": 4908 }, { "epoch": 0.33538293366126937, "grad_norm": 2.6465072631835938, "learning_rate": 5.424204023022188e-06, "loss": 0.2694, "step": 4909 }, { "epoch": 0.3354512536722006, "grad_norm": 3.1368699073791504, "learning_rate": 5.423557025059196e-06, "loss": 0.3706, "step": 4910 }, { "epoch": 0.3355195736831318, "grad_norm": 3.3105037212371826, "learning_rate": 5.422909932904717e-06, "loss": 0.4752, "step": 4911 }, { "epoch": 0.335587893694063, "grad_norm": 4.147960186004639, "learning_rate": 5.4222627465904395e-06, "loss": 0.3761, "step": 4912 }, { "epoch": 0.3356562137049942, "grad_norm": 2.929290294647217, "learning_rate": 5.4216154661480545e-06, "loss": 0.2449, "step": 4913 }, { "epoch": 0.3357245337159254, "grad_norm": 4.248023986816406, "learning_rate": 5.420968091609257e-06, "loss": 0.4151, "step": 4914 }, { "epoch": 0.3357928537268566, "grad_norm": 3.920015811920166, "learning_rate": 5.4203206230057475e-06, "loss": 0.4048, "step": 4915 }, { "epoch": 0.3358611737377878, "grad_norm": 6.660116195678711, "learning_rate": 5.419673060369232e-06, "loss": 0.422, "step": 4916 }, { "epoch": 0.335929493748719, "grad_norm": 4.393025875091553, "learning_rate": 5.419025403731417e-06, "loss": 0.4577, "step": 4917 }, { "epoch": 0.3359978137596502, "grad_norm": 3.676056385040283, "learning_rate": 5.418377653124019e-06, "loss": 0.4667, "step": 4918 }, { "epoch": 0.3360661337705814, "grad_norm": 3.2129313945770264, "learning_rate": 5.417729808578756e-06, "loss": 0.3942, "step": 4919 }, { "epoch": 0.33613445378151263, "grad_norm": 3.473762273788452, "learning_rate": 5.417081870127352e-06, "loss": 0.347, "step": 4920 }, { "epoch": 0.3362027737924438, "grad_norm": 3.945247173309326, "learning_rate": 5.416433837801532e-06, "loss": 0.4859, "step": 4921 }, { "epoch": 0.336271093803375, "grad_norm": 4.770306587219238, "learning_rate": 5.415785711633033e-06, "loss": 0.368, "step": 4922 }, { "epoch": 0.3363394138143062, "grad_norm": 3.4229073524475098, "learning_rate": 5.415137491653587e-06, "loss": 0.3259, "step": 4923 }, { "epoch": 0.3364077338252374, "grad_norm": 3.2465105056762695, "learning_rate": 5.41448917789494e-06, "loss": 0.3569, "step": 4924 }, { "epoch": 0.33647605383616863, "grad_norm": 4.38114595413208, "learning_rate": 5.413840770388835e-06, "loss": 0.4, "step": 4925 }, { "epoch": 0.33654437384709984, "grad_norm": 3.545154094696045, "learning_rate": 5.413192269167025e-06, "loss": 0.3001, "step": 4926 }, { "epoch": 0.336612693858031, "grad_norm": 3.956451654434204, "learning_rate": 5.412543674261263e-06, "loss": 0.34, "step": 4927 }, { "epoch": 0.3366810138689622, "grad_norm": 3.374932289123535, "learning_rate": 5.4118949857033115e-06, "loss": 0.3493, "step": 4928 }, { "epoch": 0.3367493338798934, "grad_norm": 4.0651116371154785, "learning_rate": 5.411246203524933e-06, "loss": 0.3798, "step": 4929 }, { "epoch": 0.33681765389082463, "grad_norm": 4.411234378814697, "learning_rate": 5.410597327757899e-06, "loss": 0.405, "step": 4930 }, { "epoch": 0.33688597390175584, "grad_norm": 3.6744191646575928, "learning_rate": 5.409948358433981e-06, "loss": 0.3223, "step": 4931 }, { "epoch": 0.33695429391268705, "grad_norm": 3.3931281566619873, "learning_rate": 5.409299295584959e-06, "loss": 0.3391, "step": 4932 }, { "epoch": 0.3370226139236182, "grad_norm": 4.893033027648926, "learning_rate": 5.408650139242614e-06, "loss": 0.2989, "step": 4933 }, { "epoch": 0.3370909339345494, "grad_norm": 3.5510478019714355, "learning_rate": 5.408000889438735e-06, "loss": 0.3189, "step": 4934 }, { "epoch": 0.33715925394548063, "grad_norm": 4.727475643157959, "learning_rate": 5.407351546205113e-06, "loss": 0.3601, "step": 4935 }, { "epoch": 0.33722757395641184, "grad_norm": 4.819359302520752, "learning_rate": 5.406702109573546e-06, "loss": 0.3252, "step": 4936 }, { "epoch": 0.33729589396734305, "grad_norm": 3.404240369796753, "learning_rate": 5.406052579575833e-06, "loss": 0.3292, "step": 4937 }, { "epoch": 0.33736421397827426, "grad_norm": 5.415605545043945, "learning_rate": 5.405402956243782e-06, "loss": 0.4556, "step": 4938 }, { "epoch": 0.3374325339892054, "grad_norm": 5.785000324249268, "learning_rate": 5.404753239609202e-06, "loss": 0.4499, "step": 4939 }, { "epoch": 0.33750085400013663, "grad_norm": 4.648220539093018, "learning_rate": 5.404103429703909e-06, "loss": 0.3392, "step": 4940 }, { "epoch": 0.33756917401106784, "grad_norm": 3.7587358951568604, "learning_rate": 5.403453526559721e-06, "loss": 0.351, "step": 4941 }, { "epoch": 0.33763749402199905, "grad_norm": 3.7819206714630127, "learning_rate": 5.402803530208464e-06, "loss": 0.3648, "step": 4942 }, { "epoch": 0.33770581403293026, "grad_norm": 3.7859344482421875, "learning_rate": 5.402153440681964e-06, "loss": 0.3409, "step": 4943 }, { "epoch": 0.3377741340438615, "grad_norm": 4.676014423370361, "learning_rate": 5.401503258012057e-06, "loss": 0.4995, "step": 4944 }, { "epoch": 0.33784245405479263, "grad_norm": 4.252037525177002, "learning_rate": 5.4008529822305795e-06, "loss": 0.4755, "step": 4945 }, { "epoch": 0.33791077406572384, "grad_norm": 3.7663419246673584, "learning_rate": 5.400202613369372e-06, "loss": 0.3952, "step": 4946 }, { "epoch": 0.33797909407665505, "grad_norm": 4.374579906463623, "learning_rate": 5.399552151460284e-06, "loss": 0.4367, "step": 4947 }, { "epoch": 0.33804741408758626, "grad_norm": 3.0355920791625977, "learning_rate": 5.398901596535165e-06, "loss": 0.4038, "step": 4948 }, { "epoch": 0.3381157340985175, "grad_norm": 4.1904215812683105, "learning_rate": 5.398250948625873e-06, "loss": 0.4326, "step": 4949 }, { "epoch": 0.3381840541094487, "grad_norm": 3.856661796569824, "learning_rate": 5.397600207764265e-06, "loss": 0.377, "step": 4950 }, { "epoch": 0.33825237412037984, "grad_norm": 4.690081596374512, "learning_rate": 5.39694937398221e-06, "loss": 0.378, "step": 4951 }, { "epoch": 0.33832069413131105, "grad_norm": 3.8136425018310547, "learning_rate": 5.396298447311576e-06, "loss": 0.3684, "step": 4952 }, { "epoch": 0.33838901414224226, "grad_norm": 2.863297700881958, "learning_rate": 5.395647427784236e-06, "loss": 0.3487, "step": 4953 }, { "epoch": 0.3384573341531735, "grad_norm": 3.737546682357788, "learning_rate": 5.394996315432073e-06, "loss": 0.4007, "step": 4954 }, { "epoch": 0.3385256541641047, "grad_norm": 3.282273054122925, "learning_rate": 5.3943451102869634e-06, "loss": 0.362, "step": 4955 }, { "epoch": 0.3385939741750359, "grad_norm": 4.167119026184082, "learning_rate": 5.3936938123807995e-06, "loss": 0.5086, "step": 4956 }, { "epoch": 0.33866229418596705, "grad_norm": 3.5182418823242188, "learning_rate": 5.393042421745473e-06, "loss": 0.5076, "step": 4957 }, { "epoch": 0.33873061419689826, "grad_norm": 4.5746283531188965, "learning_rate": 5.3923909384128805e-06, "loss": 0.4761, "step": 4958 }, { "epoch": 0.33879893420782947, "grad_norm": 4.545299053192139, "learning_rate": 5.391739362414924e-06, "loss": 0.4022, "step": 4959 }, { "epoch": 0.3388672542187607, "grad_norm": 3.1629137992858887, "learning_rate": 5.391087693783508e-06, "loss": 0.4229, "step": 4960 }, { "epoch": 0.3389355742296919, "grad_norm": 2.962002992630005, "learning_rate": 5.390435932550544e-06, "loss": 0.3363, "step": 4961 }, { "epoch": 0.3390038942406231, "grad_norm": 3.5521204471588135, "learning_rate": 5.3897840787479476e-06, "loss": 0.3451, "step": 4962 }, { "epoch": 0.33907221425155426, "grad_norm": 4.163255214691162, "learning_rate": 5.389132132407636e-06, "loss": 0.3097, "step": 4963 }, { "epoch": 0.33914053426248547, "grad_norm": 3.910280704498291, "learning_rate": 5.388480093561537e-06, "loss": 0.3448, "step": 4964 }, { "epoch": 0.3392088542734167, "grad_norm": 3.1892282962799072, "learning_rate": 5.387827962241575e-06, "loss": 0.3267, "step": 4965 }, { "epoch": 0.3392771742843479, "grad_norm": 3.4454400539398193, "learning_rate": 5.387175738479687e-06, "loss": 0.3785, "step": 4966 }, { "epoch": 0.3393454942952791, "grad_norm": 2.9874556064605713, "learning_rate": 5.386523422307808e-06, "loss": 0.295, "step": 4967 }, { "epoch": 0.3394138143062103, "grad_norm": 3.188616991043091, "learning_rate": 5.385871013757883e-06, "loss": 0.3778, "step": 4968 }, { "epoch": 0.33948213431714147, "grad_norm": 4.189243793487549, "learning_rate": 5.385218512861855e-06, "loss": 0.4506, "step": 4969 }, { "epoch": 0.3395504543280727, "grad_norm": 3.8098175525665283, "learning_rate": 5.384565919651677e-06, "loss": 0.4056, "step": 4970 }, { "epoch": 0.3396187743390039, "grad_norm": 3.4008429050445557, "learning_rate": 5.3839132341593055e-06, "loss": 0.4945, "step": 4971 }, { "epoch": 0.3396870943499351, "grad_norm": 3.314499855041504, "learning_rate": 5.3832604564167e-06, "loss": 0.2768, "step": 4972 }, { "epoch": 0.3397554143608663, "grad_norm": 3.9772603511810303, "learning_rate": 5.382607586455825e-06, "loss": 0.4391, "step": 4973 }, { "epoch": 0.3398237343717975, "grad_norm": 5.149554252624512, "learning_rate": 5.3819546243086495e-06, "loss": 0.3343, "step": 4974 }, { "epoch": 0.3398920543827287, "grad_norm": 5.6788787841796875, "learning_rate": 5.38130157000715e-06, "loss": 0.395, "step": 4975 }, { "epoch": 0.3399603743936599, "grad_norm": 3.3605077266693115, "learning_rate": 5.380648423583301e-06, "loss": 0.3256, "step": 4976 }, { "epoch": 0.3400286944045911, "grad_norm": 4.146890163421631, "learning_rate": 5.379995185069089e-06, "loss": 0.4186, "step": 4977 }, { "epoch": 0.3400970144155223, "grad_norm": 4.466804504394531, "learning_rate": 5.3793418544964976e-06, "loss": 0.4513, "step": 4978 }, { "epoch": 0.3401653344264535, "grad_norm": 3.889619827270508, "learning_rate": 5.378688431897523e-06, "loss": 0.3547, "step": 4979 }, { "epoch": 0.34023365443738474, "grad_norm": 3.3688673973083496, "learning_rate": 5.378034917304157e-06, "loss": 0.3481, "step": 4980 }, { "epoch": 0.3403019744483159, "grad_norm": 2.6070899963378906, "learning_rate": 5.377381310748404e-06, "loss": 0.2864, "step": 4981 }, { "epoch": 0.3403702944592471, "grad_norm": 4.531043529510498, "learning_rate": 5.376727612262267e-06, "loss": 0.3911, "step": 4982 }, { "epoch": 0.3404386144701783, "grad_norm": 4.087549686431885, "learning_rate": 5.376073821877757e-06, "loss": 0.2918, "step": 4983 }, { "epoch": 0.3405069344811095, "grad_norm": 6.105728626251221, "learning_rate": 5.375419939626891e-06, "loss": 0.3435, "step": 4984 }, { "epoch": 0.34057525449204074, "grad_norm": 3.302234172821045, "learning_rate": 5.3747659655416825e-06, "loss": 0.3565, "step": 4985 }, { "epoch": 0.34064357450297195, "grad_norm": 3.489654064178467, "learning_rate": 5.374111899654158e-06, "loss": 0.3857, "step": 4986 }, { "epoch": 0.3407118945139031, "grad_norm": 4.246906280517578, "learning_rate": 5.373457741996346e-06, "loss": 0.4759, "step": 4987 }, { "epoch": 0.3407802145248343, "grad_norm": 5.473368167877197, "learning_rate": 5.372803492600277e-06, "loss": 0.4827, "step": 4988 }, { "epoch": 0.3408485345357655, "grad_norm": 3.490481376647949, "learning_rate": 5.372149151497988e-06, "loss": 0.3947, "step": 4989 }, { "epoch": 0.34091685454669673, "grad_norm": 4.346726894378662, "learning_rate": 5.371494718721522e-06, "loss": 0.4314, "step": 4990 }, { "epoch": 0.34098517455762795, "grad_norm": 3.4647488594055176, "learning_rate": 5.370840194302924e-06, "loss": 0.3663, "step": 4991 }, { "epoch": 0.34105349456855916, "grad_norm": 3.4335782527923584, "learning_rate": 5.370185578274244e-06, "loss": 0.3491, "step": 4992 }, { "epoch": 0.3411218145794903, "grad_norm": 2.58992600440979, "learning_rate": 5.369530870667538e-06, "loss": 0.2706, "step": 4993 }, { "epoch": 0.3411901345904215, "grad_norm": 4.545749187469482, "learning_rate": 5.368876071514863e-06, "loss": 0.4731, "step": 4994 }, { "epoch": 0.34125845460135273, "grad_norm": 4.473833084106445, "learning_rate": 5.368221180848284e-06, "loss": 0.327, "step": 4995 }, { "epoch": 0.34132677461228395, "grad_norm": 5.21007776260376, "learning_rate": 5.367566198699869e-06, "loss": 0.3556, "step": 4996 }, { "epoch": 0.34139509462321516, "grad_norm": 3.7628583908081055, "learning_rate": 5.366911125101692e-06, "loss": 0.4421, "step": 4997 }, { "epoch": 0.34146341463414637, "grad_norm": 4.717937469482422, "learning_rate": 5.366255960085829e-06, "loss": 0.369, "step": 4998 }, { "epoch": 0.3415317346450775, "grad_norm": 3.360058069229126, "learning_rate": 5.3656007036843624e-06, "loss": 0.4256, "step": 4999 }, { "epoch": 0.34160005465600873, "grad_norm": 3.8197875022888184, "learning_rate": 5.364945355929377e-06, "loss": 0.4385, "step": 5000 }, { "epoch": 0.34166837466693994, "grad_norm": 4.776058197021484, "learning_rate": 5.3642899168529654e-06, "loss": 0.3805, "step": 5001 }, { "epoch": 0.34173669467787116, "grad_norm": 4.404972553253174, "learning_rate": 5.363634386487221e-06, "loss": 0.3443, "step": 5002 }, { "epoch": 0.34180501468880237, "grad_norm": 3.091763973236084, "learning_rate": 5.3629787648642444e-06, "loss": 0.3151, "step": 5003 }, { "epoch": 0.3418733346997336, "grad_norm": 4.26239013671875, "learning_rate": 5.362323052016139e-06, "loss": 0.4248, "step": 5004 }, { "epoch": 0.34194165471066473, "grad_norm": 3.762138843536377, "learning_rate": 5.361667247975015e-06, "loss": 0.3994, "step": 5005 }, { "epoch": 0.34200997472159594, "grad_norm": 3.055800199508667, "learning_rate": 5.361011352772982e-06, "loss": 0.2943, "step": 5006 }, { "epoch": 0.34207829473252716, "grad_norm": 3.3037056922912598, "learning_rate": 5.360355366442161e-06, "loss": 0.4176, "step": 5007 }, { "epoch": 0.34214661474345837, "grad_norm": 3.9449431896209717, "learning_rate": 5.3596992890146714e-06, "loss": 0.4037, "step": 5008 }, { "epoch": 0.3422149347543896, "grad_norm": 4.340317249298096, "learning_rate": 5.359043120522641e-06, "loss": 0.3774, "step": 5009 }, { "epoch": 0.3422832547653208, "grad_norm": 6.0866265296936035, "learning_rate": 5.358386860998201e-06, "loss": 0.4658, "step": 5010 }, { "epoch": 0.34235157477625194, "grad_norm": 3.2246956825256348, "learning_rate": 5.357730510473485e-06, "loss": 0.3511, "step": 5011 }, { "epoch": 0.34241989478718315, "grad_norm": 4.120926856994629, "learning_rate": 5.357074068980634e-06, "loss": 0.3193, "step": 5012 }, { "epoch": 0.34248821479811437, "grad_norm": 3.8618762493133545, "learning_rate": 5.356417536551791e-06, "loss": 0.4537, "step": 5013 }, { "epoch": 0.3425565348090456, "grad_norm": 3.9247183799743652, "learning_rate": 5.355760913219107e-06, "loss": 0.4425, "step": 5014 }, { "epoch": 0.3426248548199768, "grad_norm": 4.395878314971924, "learning_rate": 5.355104199014733e-06, "loss": 0.4407, "step": 5015 }, { "epoch": 0.342693174830908, "grad_norm": 4.149184703826904, "learning_rate": 5.354447393970826e-06, "loss": 0.3306, "step": 5016 }, { "epoch": 0.34276149484183915, "grad_norm": 4.4617414474487305, "learning_rate": 5.353790498119551e-06, "loss": 0.3362, "step": 5017 }, { "epoch": 0.34282981485277036, "grad_norm": 3.039506673812866, "learning_rate": 5.353133511493071e-06, "loss": 0.3892, "step": 5018 }, { "epoch": 0.3428981348637016, "grad_norm": 4.532219886779785, "learning_rate": 5.352476434123559e-06, "loss": 0.3861, "step": 5019 }, { "epoch": 0.3429664548746328, "grad_norm": 3.46203875541687, "learning_rate": 5.35181926604319e-06, "loss": 0.336, "step": 5020 }, { "epoch": 0.343034774885564, "grad_norm": 4.434244632720947, "learning_rate": 5.351162007284144e-06, "loss": 0.3545, "step": 5021 }, { "epoch": 0.3431030948964952, "grad_norm": 4.199279308319092, "learning_rate": 5.3505046578786045e-06, "loss": 0.327, "step": 5022 }, { "epoch": 0.34317141490742636, "grad_norm": 4.331969261169434, "learning_rate": 5.34984721785876e-06, "loss": 0.3996, "step": 5023 }, { "epoch": 0.3432397349183576, "grad_norm": 4.658703804016113, "learning_rate": 5.349189687256804e-06, "loss": 0.3963, "step": 5024 }, { "epoch": 0.3433080549292888, "grad_norm": 5.086531162261963, "learning_rate": 5.348532066104935e-06, "loss": 0.3688, "step": 5025 }, { "epoch": 0.34337637494022, "grad_norm": 3.5839288234710693, "learning_rate": 5.347874354435353e-06, "loss": 0.3669, "step": 5026 }, { "epoch": 0.3434446949511512, "grad_norm": 3.898754119873047, "learning_rate": 5.347216552280267e-06, "loss": 0.4538, "step": 5027 }, { "epoch": 0.3435130149620824, "grad_norm": 3.204958438873291, "learning_rate": 5.346558659671886e-06, "loss": 0.322, "step": 5028 }, { "epoch": 0.3435813349730136, "grad_norm": 3.1384146213531494, "learning_rate": 5.345900676642424e-06, "loss": 0.3736, "step": 5029 }, { "epoch": 0.3436496549839448, "grad_norm": 5.07961893081665, "learning_rate": 5.3452426032241035e-06, "loss": 0.4179, "step": 5030 }, { "epoch": 0.343717974994876, "grad_norm": 3.9212400913238525, "learning_rate": 5.3445844394491464e-06, "loss": 0.3759, "step": 5031 }, { "epoch": 0.3437862950058072, "grad_norm": 3.352660894393921, "learning_rate": 5.3439261853497825e-06, "loss": 0.3535, "step": 5032 }, { "epoch": 0.3438546150167384, "grad_norm": 3.2211666107177734, "learning_rate": 5.343267840958245e-06, "loss": 0.2542, "step": 5033 }, { "epoch": 0.34392293502766963, "grad_norm": 3.2440121173858643, "learning_rate": 5.342609406306771e-06, "loss": 0.362, "step": 5034 }, { "epoch": 0.3439912550386008, "grad_norm": 3.4643430709838867, "learning_rate": 5.341950881427601e-06, "loss": 0.2658, "step": 5035 }, { "epoch": 0.344059575049532, "grad_norm": 4.043481826782227, "learning_rate": 5.341292266352982e-06, "loss": 0.3746, "step": 5036 }, { "epoch": 0.3441278950604632, "grad_norm": 3.321826219558716, "learning_rate": 5.340633561115165e-06, "loss": 0.2917, "step": 5037 }, { "epoch": 0.3441962150713944, "grad_norm": 4.432241439819336, "learning_rate": 5.339974765746406e-06, "loss": 0.4177, "step": 5038 }, { "epoch": 0.34426453508232563, "grad_norm": 4.344860553741455, "learning_rate": 5.339315880278963e-06, "loss": 0.4896, "step": 5039 }, { "epoch": 0.34433285509325684, "grad_norm": 4.1861114501953125, "learning_rate": 5.3386569047451e-06, "loss": 0.3395, "step": 5040 }, { "epoch": 0.344401175104188, "grad_norm": 4.303407669067383, "learning_rate": 5.337997839177085e-06, "loss": 0.339, "step": 5041 }, { "epoch": 0.3444694951151192, "grad_norm": 3.861146926879883, "learning_rate": 5.337338683607191e-06, "loss": 0.4037, "step": 5042 }, { "epoch": 0.3445378151260504, "grad_norm": 4.051241397857666, "learning_rate": 5.336679438067696e-06, "loss": 0.4342, "step": 5043 }, { "epoch": 0.34460613513698163, "grad_norm": 4.220722675323486, "learning_rate": 5.336020102590881e-06, "loss": 0.3834, "step": 5044 }, { "epoch": 0.34467445514791284, "grad_norm": 3.701032876968384, "learning_rate": 5.335360677209032e-06, "loss": 0.3407, "step": 5045 }, { "epoch": 0.34474277515884405, "grad_norm": 3.86613392829895, "learning_rate": 5.334701161954438e-06, "loss": 0.3583, "step": 5046 }, { "epoch": 0.3448110951697752, "grad_norm": 3.6302144527435303, "learning_rate": 5.334041556859395e-06, "loss": 0.494, "step": 5047 }, { "epoch": 0.3448794151807064, "grad_norm": 3.6124966144561768, "learning_rate": 5.3333818619562026e-06, "loss": 0.3594, "step": 5048 }, { "epoch": 0.34494773519163763, "grad_norm": 3.00589919090271, "learning_rate": 5.332722077277163e-06, "loss": 0.3452, "step": 5049 }, { "epoch": 0.34501605520256884, "grad_norm": 3.9192357063293457, "learning_rate": 5.332062202854585e-06, "loss": 0.4708, "step": 5050 }, { "epoch": 0.34508437521350005, "grad_norm": 4.553040504455566, "learning_rate": 5.33140223872078e-06, "loss": 0.3397, "step": 5051 }, { "epoch": 0.34515269522443126, "grad_norm": 4.1088361740112305, "learning_rate": 5.330742184908065e-06, "loss": 0.3925, "step": 5052 }, { "epoch": 0.3452210152353624, "grad_norm": 4.004312038421631, "learning_rate": 5.3300820414487635e-06, "loss": 0.3791, "step": 5053 }, { "epoch": 0.3452893352462936, "grad_norm": 2.734189033508301, "learning_rate": 5.329421808375197e-06, "loss": 0.3109, "step": 5054 }, { "epoch": 0.34535765525722484, "grad_norm": 4.543745994567871, "learning_rate": 5.328761485719698e-06, "loss": 0.4824, "step": 5055 }, { "epoch": 0.34542597526815605, "grad_norm": 3.7589213848114014, "learning_rate": 5.328101073514599e-06, "loss": 0.3018, "step": 5056 }, { "epoch": 0.34549429527908726, "grad_norm": 3.4207170009613037, "learning_rate": 5.327440571792239e-06, "loss": 0.4538, "step": 5057 }, { "epoch": 0.34556261529001847, "grad_norm": 3.912727117538452, "learning_rate": 5.326779980584964e-06, "loss": 0.4116, "step": 5058 }, { "epoch": 0.3456309353009496, "grad_norm": 3.8389453887939453, "learning_rate": 5.326119299925117e-06, "loss": 0.3641, "step": 5059 }, { "epoch": 0.34569925531188084, "grad_norm": 3.7982523441314697, "learning_rate": 5.3254585298450525e-06, "loss": 0.4419, "step": 5060 }, { "epoch": 0.34576757532281205, "grad_norm": 2.914214849472046, "learning_rate": 5.324797670377126e-06, "loss": 0.3058, "step": 5061 }, { "epoch": 0.34583589533374326, "grad_norm": 4.597625732421875, "learning_rate": 5.324136721553698e-06, "loss": 0.3913, "step": 5062 }, { "epoch": 0.34590421534467447, "grad_norm": 3.4399845600128174, "learning_rate": 5.323475683407133e-06, "loss": 0.3795, "step": 5063 }, { "epoch": 0.3459725353556057, "grad_norm": 4.255248069763184, "learning_rate": 5.322814555969801e-06, "loss": 0.4408, "step": 5064 }, { "epoch": 0.34604085536653684, "grad_norm": 3.790536880493164, "learning_rate": 5.322153339274074e-06, "loss": 0.3801, "step": 5065 }, { "epoch": 0.34610917537746805, "grad_norm": 4.6885504722595215, "learning_rate": 5.3214920333523335e-06, "loss": 0.3831, "step": 5066 }, { "epoch": 0.34617749538839926, "grad_norm": 3.946258544921875, "learning_rate": 5.320830638236959e-06, "loss": 0.3848, "step": 5067 }, { "epoch": 0.34624581539933047, "grad_norm": 3.3927254676818848, "learning_rate": 5.320169153960338e-06, "loss": 0.4079, "step": 5068 }, { "epoch": 0.3463141354102617, "grad_norm": 3.5510971546173096, "learning_rate": 5.31950758055486e-06, "loss": 0.352, "step": 5069 }, { "epoch": 0.3463824554211929, "grad_norm": 3.789609670639038, "learning_rate": 5.318845918052924e-06, "loss": 0.3634, "step": 5070 }, { "epoch": 0.34645077543212405, "grad_norm": 4.19282865524292, "learning_rate": 5.3181841664869284e-06, "loss": 0.4119, "step": 5071 }, { "epoch": 0.34651909544305526, "grad_norm": 3.5870273113250732, "learning_rate": 5.317522325889275e-06, "loss": 0.3714, "step": 5072 }, { "epoch": 0.34658741545398647, "grad_norm": 3.7939345836639404, "learning_rate": 5.316860396292376e-06, "loss": 0.3211, "step": 5073 }, { "epoch": 0.3466557354649177, "grad_norm": 3.8531434535980225, "learning_rate": 5.316198377728643e-06, "loss": 0.4226, "step": 5074 }, { "epoch": 0.3467240554758489, "grad_norm": 3.6292531490325928, "learning_rate": 5.315536270230491e-06, "loss": 0.4699, "step": 5075 }, { "epoch": 0.3467923754867801, "grad_norm": 4.575174808502197, "learning_rate": 5.3148740738303456e-06, "loss": 0.3899, "step": 5076 }, { "epoch": 0.34686069549771126, "grad_norm": 4.6002678871154785, "learning_rate": 5.31421178856063e-06, "loss": 0.4181, "step": 5077 }, { "epoch": 0.34692901550864247, "grad_norm": 3.219768762588501, "learning_rate": 5.313549414453776e-06, "loss": 0.3886, "step": 5078 }, { "epoch": 0.3469973355195737, "grad_norm": 3.611896276473999, "learning_rate": 5.312886951542216e-06, "loss": 0.352, "step": 5079 }, { "epoch": 0.3470656555305049, "grad_norm": 3.353322744369507, "learning_rate": 5.312224399858393e-06, "loss": 0.38, "step": 5080 }, { "epoch": 0.3471339755414361, "grad_norm": 4.181166648864746, "learning_rate": 5.311561759434747e-06, "loss": 0.4365, "step": 5081 }, { "epoch": 0.3472022955523673, "grad_norm": 3.5829765796661377, "learning_rate": 5.310899030303727e-06, "loss": 0.3928, "step": 5082 }, { "epoch": 0.34727061556329847, "grad_norm": 2.736239194869995, "learning_rate": 5.310236212497785e-06, "loss": 0.3087, "step": 5083 }, { "epoch": 0.3473389355742297, "grad_norm": 3.9842793941497803, "learning_rate": 5.309573306049377e-06, "loss": 0.3904, "step": 5084 }, { "epoch": 0.3474072555851609, "grad_norm": 4.012902736663818, "learning_rate": 5.308910310990965e-06, "loss": 0.377, "step": 5085 }, { "epoch": 0.3474755755960921, "grad_norm": 4.2161865234375, "learning_rate": 5.308247227355014e-06, "loss": 0.3371, "step": 5086 }, { "epoch": 0.3475438956070233, "grad_norm": 3.8586556911468506, "learning_rate": 5.307584055173991e-06, "loss": 0.3733, "step": 5087 }, { "epoch": 0.3476122156179545, "grad_norm": 3.54167103767395, "learning_rate": 5.3069207944803725e-06, "loss": 0.3769, "step": 5088 }, { "epoch": 0.3476805356288857, "grad_norm": 4.149322032928467, "learning_rate": 5.306257445306635e-06, "loss": 0.4007, "step": 5089 }, { "epoch": 0.3477488556398169, "grad_norm": 3.5629777908325195, "learning_rate": 5.305594007685264e-06, "loss": 0.3585, "step": 5090 }, { "epoch": 0.3478171756507481, "grad_norm": 2.7734763622283936, "learning_rate": 5.3049304816487414e-06, "loss": 0.2669, "step": 5091 }, { "epoch": 0.3478854956616793, "grad_norm": 3.628063440322876, "learning_rate": 5.304266867229561e-06, "loss": 0.3141, "step": 5092 }, { "epoch": 0.3479538156726105, "grad_norm": 3.7475521564483643, "learning_rate": 5.303603164460218e-06, "loss": 0.3831, "step": 5093 }, { "epoch": 0.34802213568354173, "grad_norm": 2.990079641342163, "learning_rate": 5.302939373373214e-06, "loss": 0.3425, "step": 5094 }, { "epoch": 0.3480904556944729, "grad_norm": 3.468061685562134, "learning_rate": 5.302275494001049e-06, "loss": 0.3949, "step": 5095 }, { "epoch": 0.3481587757054041, "grad_norm": 4.614128589630127, "learning_rate": 5.301611526376235e-06, "loss": 0.3901, "step": 5096 }, { "epoch": 0.3482270957163353, "grad_norm": 3.6302125453948975, "learning_rate": 5.300947470531283e-06, "loss": 0.3441, "step": 5097 }, { "epoch": 0.3482954157272665, "grad_norm": 4.6662445068359375, "learning_rate": 5.30028332649871e-06, "loss": 0.3809, "step": 5098 }, { "epoch": 0.34836373573819773, "grad_norm": 4.671960353851318, "learning_rate": 5.299619094311039e-06, "loss": 0.4979, "step": 5099 }, { "epoch": 0.34843205574912894, "grad_norm": 3.9878792762756348, "learning_rate": 5.2989547740007946e-06, "loss": 0.2979, "step": 5100 }, { "epoch": 0.3485003757600601, "grad_norm": 3.908047676086426, "learning_rate": 5.298290365600505e-06, "loss": 0.408, "step": 5101 }, { "epoch": 0.3485686957709913, "grad_norm": 4.205578327178955, "learning_rate": 5.297625869142708e-06, "loss": 0.3548, "step": 5102 }, { "epoch": 0.3486370157819225, "grad_norm": 3.567298412322998, "learning_rate": 5.2969612846599395e-06, "loss": 0.3198, "step": 5103 }, { "epoch": 0.34870533579285373, "grad_norm": 3.7079834938049316, "learning_rate": 5.296296612184745e-06, "loss": 0.3539, "step": 5104 }, { "epoch": 0.34877365580378494, "grad_norm": 4.18052864074707, "learning_rate": 5.295631851749668e-06, "loss": 0.3856, "step": 5105 }, { "epoch": 0.34884197581471615, "grad_norm": 3.2014877796173096, "learning_rate": 5.2949670033872634e-06, "loss": 0.37, "step": 5106 }, { "epoch": 0.3489102958256473, "grad_norm": 3.9218597412109375, "learning_rate": 5.294302067130086e-06, "loss": 0.3521, "step": 5107 }, { "epoch": 0.3489786158365785, "grad_norm": 2.958510637283325, "learning_rate": 5.293637043010695e-06, "loss": 0.425, "step": 5108 }, { "epoch": 0.34904693584750973, "grad_norm": 3.458181619644165, "learning_rate": 5.292971931061656e-06, "loss": 0.385, "step": 5109 }, { "epoch": 0.34911525585844094, "grad_norm": 3.5353565216064453, "learning_rate": 5.292306731315538e-06, "loss": 0.337, "step": 5110 }, { "epoch": 0.34918357586937215, "grad_norm": 3.486969470977783, "learning_rate": 5.291641443804913e-06, "loss": 0.3376, "step": 5111 }, { "epoch": 0.34925189588030336, "grad_norm": 4.298409938812256, "learning_rate": 5.29097606856236e-06, "loss": 0.4501, "step": 5112 }, { "epoch": 0.3493202158912345, "grad_norm": 3.8046350479125977, "learning_rate": 5.2903106056204595e-06, "loss": 0.3673, "step": 5113 }, { "epoch": 0.34938853590216573, "grad_norm": 3.4224886894226074, "learning_rate": 5.289645055011796e-06, "loss": 0.3555, "step": 5114 }, { "epoch": 0.34945685591309694, "grad_norm": 2.9523367881774902, "learning_rate": 5.288979416768962e-06, "loss": 0.3201, "step": 5115 }, { "epoch": 0.34952517592402815, "grad_norm": 4.659306526184082, "learning_rate": 5.288313690924553e-06, "loss": 0.4559, "step": 5116 }, { "epoch": 0.34959349593495936, "grad_norm": 3.6838884353637695, "learning_rate": 5.287647877511164e-06, "loss": 0.3074, "step": 5117 }, { "epoch": 0.3496618159458906, "grad_norm": 2.9221267700195312, "learning_rate": 5.286981976561401e-06, "loss": 0.3106, "step": 5118 }, { "epoch": 0.34973013595682173, "grad_norm": 3.6624879837036133, "learning_rate": 5.286315988107871e-06, "loss": 0.4257, "step": 5119 }, { "epoch": 0.34979845596775294, "grad_norm": 3.855393648147583, "learning_rate": 5.285649912183185e-06, "loss": 0.3844, "step": 5120 }, { "epoch": 0.34986677597868415, "grad_norm": 4.315733432769775, "learning_rate": 5.284983748819959e-06, "loss": 0.463, "step": 5121 }, { "epoch": 0.34993509598961536, "grad_norm": 4.134825706481934, "learning_rate": 5.2843174980508145e-06, "loss": 0.4373, "step": 5122 }, { "epoch": 0.3500034160005466, "grad_norm": 3.229290246963501, "learning_rate": 5.283651159908375e-06, "loss": 0.3586, "step": 5123 }, { "epoch": 0.3500717360114778, "grad_norm": 3.1909306049346924, "learning_rate": 5.282984734425269e-06, "loss": 0.2863, "step": 5124 }, { "epoch": 0.35014005602240894, "grad_norm": 4.587948799133301, "learning_rate": 5.28231822163413e-06, "loss": 0.4708, "step": 5125 }, { "epoch": 0.35020837603334015, "grad_norm": 3.4640257358551025, "learning_rate": 5.2816516215675954e-06, "loss": 0.3428, "step": 5126 }, { "epoch": 0.35027669604427136, "grad_norm": 3.7717700004577637, "learning_rate": 5.2809849342583075e-06, "loss": 0.2832, "step": 5127 }, { "epoch": 0.3503450160552026, "grad_norm": 3.117952823638916, "learning_rate": 5.28031815973891e-06, "loss": 0.2984, "step": 5128 }, { "epoch": 0.3504133360661338, "grad_norm": 3.517399787902832, "learning_rate": 5.279651298042056e-06, "loss": 0.4152, "step": 5129 }, { "epoch": 0.350481656077065, "grad_norm": 3.9750733375549316, "learning_rate": 5.2789843492003975e-06, "loss": 0.443, "step": 5130 }, { "epoch": 0.35054997608799615, "grad_norm": 3.4798386096954346, "learning_rate": 5.2783173132465945e-06, "loss": 0.3722, "step": 5131 }, { "epoch": 0.35061829609892736, "grad_norm": 3.5445547103881836, "learning_rate": 5.277650190213311e-06, "loss": 0.3631, "step": 5132 }, { "epoch": 0.3506866161098586, "grad_norm": 5.139763355255127, "learning_rate": 5.276982980133212e-06, "loss": 0.4822, "step": 5133 }, { "epoch": 0.3507549361207898, "grad_norm": 3.4834179878234863, "learning_rate": 5.27631568303897e-06, "loss": 0.45, "step": 5134 }, { "epoch": 0.350823256131721, "grad_norm": 3.6264734268188477, "learning_rate": 5.275648298963261e-06, "loss": 0.3945, "step": 5135 }, { "epoch": 0.3508915761426522, "grad_norm": 4.2756524085998535, "learning_rate": 5.274980827938764e-06, "loss": 0.4567, "step": 5136 }, { "epoch": 0.35095989615358336, "grad_norm": 3.363877534866333, "learning_rate": 5.274313269998166e-06, "loss": 0.335, "step": 5137 }, { "epoch": 0.3510282161645146, "grad_norm": 3.9634885787963867, "learning_rate": 5.273645625174152e-06, "loss": 0.3652, "step": 5138 }, { "epoch": 0.3510965361754458, "grad_norm": 2.8761086463928223, "learning_rate": 5.272977893499417e-06, "loss": 0.3385, "step": 5139 }, { "epoch": 0.351164856186377, "grad_norm": 4.624061107635498, "learning_rate": 5.272310075006658e-06, "loss": 0.4506, "step": 5140 }, { "epoch": 0.3512331761973082, "grad_norm": 4.6640119552612305, "learning_rate": 5.271642169728576e-06, "loss": 0.4614, "step": 5141 }, { "epoch": 0.3513014962082394, "grad_norm": 3.826524496078491, "learning_rate": 5.270974177697875e-06, "loss": 0.3681, "step": 5142 }, { "epoch": 0.35136981621917057, "grad_norm": 3.999652862548828, "learning_rate": 5.2703060989472675e-06, "loss": 0.3627, "step": 5143 }, { "epoch": 0.3514381362301018, "grad_norm": 4.5198140144348145, "learning_rate": 5.269637933509465e-06, "loss": 0.4129, "step": 5144 }, { "epoch": 0.351506456241033, "grad_norm": 4.479886054992676, "learning_rate": 5.268969681417187e-06, "loss": 0.4108, "step": 5145 }, { "epoch": 0.3515747762519642, "grad_norm": 5.44114875793457, "learning_rate": 5.268301342703156e-06, "loss": 0.3484, "step": 5146 }, { "epoch": 0.3516430962628954, "grad_norm": 4.108806610107422, "learning_rate": 5.2676329174001e-06, "loss": 0.4168, "step": 5147 }, { "epoch": 0.3517114162738266, "grad_norm": 3.1220648288726807, "learning_rate": 5.266964405540747e-06, "loss": 0.3208, "step": 5148 }, { "epoch": 0.3517797362847578, "grad_norm": 4.363036632537842, "learning_rate": 5.266295807157836e-06, "loss": 0.4122, "step": 5149 }, { "epoch": 0.351848056295689, "grad_norm": 3.8791868686676025, "learning_rate": 5.2656271222841024e-06, "loss": 0.3081, "step": 5150 }, { "epoch": 0.3519163763066202, "grad_norm": 3.092881202697754, "learning_rate": 5.264958350952293e-06, "loss": 0.3846, "step": 5151 }, { "epoch": 0.3519846963175514, "grad_norm": 2.8719146251678467, "learning_rate": 5.264289493195155e-06, "loss": 0.3258, "step": 5152 }, { "epoch": 0.3520530163284826, "grad_norm": 3.41400408744812, "learning_rate": 5.263620549045441e-06, "loss": 0.3041, "step": 5153 }, { "epoch": 0.35212133633941384, "grad_norm": 3.56181001663208, "learning_rate": 5.262951518535907e-06, "loss": 0.4003, "step": 5154 }, { "epoch": 0.352189656350345, "grad_norm": 3.25498366355896, "learning_rate": 5.262282401699312e-06, "loss": 0.2987, "step": 5155 }, { "epoch": 0.3522579763612762, "grad_norm": 3.1104493141174316, "learning_rate": 5.261613198568423e-06, "loss": 0.3692, "step": 5156 }, { "epoch": 0.3523262963722074, "grad_norm": 3.6109585762023926, "learning_rate": 5.260943909176009e-06, "loss": 0.3305, "step": 5157 }, { "epoch": 0.3523946163831386, "grad_norm": 3.5769481658935547, "learning_rate": 5.2602745335548406e-06, "loss": 0.3608, "step": 5158 }, { "epoch": 0.35246293639406984, "grad_norm": 4.767906665802002, "learning_rate": 5.2596050717376995e-06, "loss": 0.349, "step": 5159 }, { "epoch": 0.35253125640500105, "grad_norm": 4.101192474365234, "learning_rate": 5.258935523757365e-06, "loss": 0.3368, "step": 5160 }, { "epoch": 0.3525995764159322, "grad_norm": 4.386245250701904, "learning_rate": 5.258265889646624e-06, "loss": 0.4381, "step": 5161 }, { "epoch": 0.3526678964268634, "grad_norm": 3.52664852142334, "learning_rate": 5.257596169438265e-06, "loss": 0.3135, "step": 5162 }, { "epoch": 0.3527362164377946, "grad_norm": 3.8705544471740723, "learning_rate": 5.2569263631650855e-06, "loss": 0.2908, "step": 5163 }, { "epoch": 0.35280453644872584, "grad_norm": 3.9466898441314697, "learning_rate": 5.256256470859881e-06, "loss": 0.3348, "step": 5164 }, { "epoch": 0.35287285645965705, "grad_norm": 3.894968271255493, "learning_rate": 5.255586492555456e-06, "loss": 0.3351, "step": 5165 }, { "epoch": 0.35294117647058826, "grad_norm": 4.608673572540283, "learning_rate": 5.254916428284618e-06, "loss": 0.4141, "step": 5166 }, { "epoch": 0.3530094964815194, "grad_norm": 3.507589340209961, "learning_rate": 5.2542462780801776e-06, "loss": 0.4221, "step": 5167 }, { "epoch": 0.3530778164924506, "grad_norm": 4.377367973327637, "learning_rate": 5.253576041974949e-06, "loss": 0.4459, "step": 5168 }, { "epoch": 0.35314613650338184, "grad_norm": 3.7684245109558105, "learning_rate": 5.252905720001754e-06, "loss": 0.5277, "step": 5169 }, { "epoch": 0.35321445651431305, "grad_norm": 3.974674701690674, "learning_rate": 5.252235312193415e-06, "loss": 0.324, "step": 5170 }, { "epoch": 0.35328277652524426, "grad_norm": 3.7071545124053955, "learning_rate": 5.25156481858276e-06, "loss": 0.3989, "step": 5171 }, { "epoch": 0.35335109653617547, "grad_norm": 5.3183465003967285, "learning_rate": 5.250894239202624e-06, "loss": 0.3877, "step": 5172 }, { "epoch": 0.3534194165471066, "grad_norm": 3.3178975582122803, "learning_rate": 5.25022357408584e-06, "loss": 0.2589, "step": 5173 }, { "epoch": 0.35348773655803783, "grad_norm": 3.9983253479003906, "learning_rate": 5.24955282326525e-06, "loss": 0.3315, "step": 5174 }, { "epoch": 0.35355605656896905, "grad_norm": 5.327145576477051, "learning_rate": 5.2488819867737e-06, "loss": 0.4257, "step": 5175 }, { "epoch": 0.35362437657990026, "grad_norm": 4.362156391143799, "learning_rate": 5.2482110646440384e-06, "loss": 0.4281, "step": 5176 }, { "epoch": 0.35369269659083147, "grad_norm": 3.4348692893981934, "learning_rate": 5.247540056909117e-06, "loss": 0.4335, "step": 5177 }, { "epoch": 0.3537610166017627, "grad_norm": 3.4971675872802734, "learning_rate": 5.246868963601794e-06, "loss": 0.2153, "step": 5178 }, { "epoch": 0.35382933661269383, "grad_norm": 4.60748815536499, "learning_rate": 5.246197784754933e-06, "loss": 0.3846, "step": 5179 }, { "epoch": 0.35389765662362505, "grad_norm": 3.982449769973755, "learning_rate": 5.2455265204013975e-06, "loss": 0.3909, "step": 5180 }, { "epoch": 0.35396597663455626, "grad_norm": 4.019913673400879, "learning_rate": 5.244855170574059e-06, "loss": 0.4347, "step": 5181 }, { "epoch": 0.35403429664548747, "grad_norm": 4.24358606338501, "learning_rate": 5.24418373530579e-06, "loss": 0.321, "step": 5182 }, { "epoch": 0.3541026166564187, "grad_norm": 4.0902628898620605, "learning_rate": 5.243512214629471e-06, "loss": 0.4466, "step": 5183 }, { "epoch": 0.3541709366673499, "grad_norm": 3.750302791595459, "learning_rate": 5.242840608577984e-06, "loss": 0.3705, "step": 5184 }, { "epoch": 0.35423925667828104, "grad_norm": 3.963818311691284, "learning_rate": 5.242168917184215e-06, "loss": 0.3501, "step": 5185 }, { "epoch": 0.35430757668921226, "grad_norm": 5.257729530334473, "learning_rate": 5.241497140481055e-06, "loss": 0.4321, "step": 5186 }, { "epoch": 0.35437589670014347, "grad_norm": 3.4770240783691406, "learning_rate": 5.240825278501401e-06, "loss": 0.2965, "step": 5187 }, { "epoch": 0.3544442167110747, "grad_norm": 3.579909563064575, "learning_rate": 5.240153331278148e-06, "loss": 0.4329, "step": 5188 }, { "epoch": 0.3545125367220059, "grad_norm": 3.6691038608551025, "learning_rate": 5.239481298844203e-06, "loss": 0.2825, "step": 5189 }, { "epoch": 0.3545808567329371, "grad_norm": 4.083907127380371, "learning_rate": 5.238809181232475e-06, "loss": 0.4825, "step": 5190 }, { "epoch": 0.35464917674386826, "grad_norm": 4.202073574066162, "learning_rate": 5.2381369784758715e-06, "loss": 0.3628, "step": 5191 }, { "epoch": 0.35471749675479947, "grad_norm": 4.578487873077393, "learning_rate": 5.237464690607312e-06, "loss": 0.3932, "step": 5192 }, { "epoch": 0.3547858167657307, "grad_norm": 3.840181350708008, "learning_rate": 5.2367923176597145e-06, "loss": 0.3834, "step": 5193 }, { "epoch": 0.3548541367766619, "grad_norm": 4.211114883422852, "learning_rate": 5.236119859666004e-06, "loss": 0.4245, "step": 5194 }, { "epoch": 0.3549224567875931, "grad_norm": 4.129767894744873, "learning_rate": 5.235447316659109e-06, "loss": 0.3445, "step": 5195 }, { "epoch": 0.3549907767985243, "grad_norm": 2.9256997108459473, "learning_rate": 5.234774688671963e-06, "loss": 0.3633, "step": 5196 }, { "epoch": 0.35505909680945547, "grad_norm": 2.9957194328308105, "learning_rate": 5.234101975737503e-06, "loss": 0.382, "step": 5197 }, { "epoch": 0.3551274168203867, "grad_norm": 3.3443188667297363, "learning_rate": 5.233429177888667e-06, "loss": 0.4007, "step": 5198 }, { "epoch": 0.3551957368313179, "grad_norm": 3.5460126399993896, "learning_rate": 5.2327562951584024e-06, "loss": 0.3203, "step": 5199 }, { "epoch": 0.3552640568422491, "grad_norm": 2.8042922019958496, "learning_rate": 5.23208332757966e-06, "loss": 0.2671, "step": 5200 }, { "epoch": 0.3553323768531803, "grad_norm": 4.254795074462891, "learning_rate": 5.23141027518539e-06, "loss": 0.4179, "step": 5201 }, { "epoch": 0.3554006968641115, "grad_norm": 3.771359443664551, "learning_rate": 5.230737138008552e-06, "loss": 0.3754, "step": 5202 }, { "epoch": 0.3554690168750427, "grad_norm": 3.8010475635528564, "learning_rate": 5.230063916082107e-06, "loss": 0.4455, "step": 5203 }, { "epoch": 0.3555373368859739, "grad_norm": 3.301424264907837, "learning_rate": 5.229390609439021e-06, "loss": 0.3849, "step": 5204 }, { "epoch": 0.3556056568969051, "grad_norm": 3.6879377365112305, "learning_rate": 5.228717218112265e-06, "loss": 0.322, "step": 5205 }, { "epoch": 0.3556739769078363, "grad_norm": 3.4512197971343994, "learning_rate": 5.2280437421348115e-06, "loss": 0.3678, "step": 5206 }, { "epoch": 0.3557422969187675, "grad_norm": 4.223723411560059, "learning_rate": 5.227370181539641e-06, "loss": 0.3623, "step": 5207 }, { "epoch": 0.35581061692969873, "grad_norm": 3.39384126663208, "learning_rate": 5.226696536359733e-06, "loss": 0.3963, "step": 5208 }, { "epoch": 0.3558789369406299, "grad_norm": 3.709139108657837, "learning_rate": 5.226022806628076e-06, "loss": 0.359, "step": 5209 }, { "epoch": 0.3559472569515611, "grad_norm": 4.3645758628845215, "learning_rate": 5.225348992377662e-06, "loss": 0.3969, "step": 5210 }, { "epoch": 0.3560155769624923, "grad_norm": 3.344099283218384, "learning_rate": 5.224675093641482e-06, "loss": 0.2845, "step": 5211 }, { "epoch": 0.3560838969734235, "grad_norm": 3.8728432655334473, "learning_rate": 5.2240011104525395e-06, "loss": 0.3469, "step": 5212 }, { "epoch": 0.35615221698435473, "grad_norm": 4.493924617767334, "learning_rate": 5.223327042843835e-06, "loss": 0.444, "step": 5213 }, { "epoch": 0.35622053699528594, "grad_norm": 3.8340704441070557, "learning_rate": 5.222652890848376e-06, "loss": 0.3289, "step": 5214 }, { "epoch": 0.3562888570062171, "grad_norm": 2.6522817611694336, "learning_rate": 5.221978654499175e-06, "loss": 0.3368, "step": 5215 }, { "epoch": 0.3563571770171483, "grad_norm": 4.95530366897583, "learning_rate": 5.221304333829248e-06, "loss": 0.4044, "step": 5216 }, { "epoch": 0.3564254970280795, "grad_norm": 3.2848262786865234, "learning_rate": 5.220629928871611e-06, "loss": 0.2807, "step": 5217 }, { "epoch": 0.35649381703901073, "grad_norm": 3.8484675884246826, "learning_rate": 5.219955439659292e-06, "loss": 0.4085, "step": 5218 }, { "epoch": 0.35656213704994194, "grad_norm": 4.322426795959473, "learning_rate": 5.2192808662253175e-06, "loss": 0.4121, "step": 5219 }, { "epoch": 0.35663045706087315, "grad_norm": 4.281240940093994, "learning_rate": 5.218606208602719e-06, "loss": 0.4135, "step": 5220 }, { "epoch": 0.3566987770718043, "grad_norm": 3.960156202316284, "learning_rate": 5.217931466824533e-06, "loss": 0.4181, "step": 5221 }, { "epoch": 0.3567670970827355, "grad_norm": 5.12708854675293, "learning_rate": 5.2172566409238015e-06, "loss": 0.48, "step": 5222 }, { "epoch": 0.35683541709366673, "grad_norm": 4.171304702758789, "learning_rate": 5.216581730933566e-06, "loss": 0.4204, "step": 5223 }, { "epoch": 0.35690373710459794, "grad_norm": 3.659839630126953, "learning_rate": 5.215906736886877e-06, "loss": 0.3907, "step": 5224 }, { "epoch": 0.35697205711552915, "grad_norm": 4.441311836242676, "learning_rate": 5.215231658816786e-06, "loss": 0.4018, "step": 5225 }, { "epoch": 0.35704037712646036, "grad_norm": 3.1889874935150146, "learning_rate": 5.214556496756351e-06, "loss": 0.2779, "step": 5226 }, { "epoch": 0.3571086971373915, "grad_norm": 3.036015510559082, "learning_rate": 5.213881250738632e-06, "loss": 0.3245, "step": 5227 }, { "epoch": 0.35717701714832273, "grad_norm": 3.8908517360687256, "learning_rate": 5.213205920796694e-06, "loss": 0.3303, "step": 5228 }, { "epoch": 0.35724533715925394, "grad_norm": 3.6802659034729004, "learning_rate": 5.2125305069636075e-06, "loss": 0.4255, "step": 5229 }, { "epoch": 0.35731365717018515, "grad_norm": 4.493654251098633, "learning_rate": 5.211855009272444e-06, "loss": 0.4953, "step": 5230 }, { "epoch": 0.35738197718111636, "grad_norm": 3.85347843170166, "learning_rate": 5.211179427756281e-06, "loss": 0.3258, "step": 5231 }, { "epoch": 0.3574502971920476, "grad_norm": 3.0399341583251953, "learning_rate": 5.210503762448202e-06, "loss": 0.3772, "step": 5232 }, { "epoch": 0.35751861720297873, "grad_norm": 3.1981658935546875, "learning_rate": 5.209828013381289e-06, "loss": 0.4771, "step": 5233 }, { "epoch": 0.35758693721390994, "grad_norm": 4.079440593719482, "learning_rate": 5.209152180588635e-06, "loss": 0.447, "step": 5234 }, { "epoch": 0.35765525722484115, "grad_norm": 3.309617757797241, "learning_rate": 5.208476264103332e-06, "loss": 0.3355, "step": 5235 }, { "epoch": 0.35772357723577236, "grad_norm": 5.035168170928955, "learning_rate": 5.207800263958478e-06, "loss": 0.4008, "step": 5236 }, { "epoch": 0.35779189724670357, "grad_norm": 3.2987077236175537, "learning_rate": 5.2071241801871745e-06, "loss": 0.3105, "step": 5237 }, { "epoch": 0.3578602172576348, "grad_norm": 3.629801034927368, "learning_rate": 5.20644801282253e-06, "loss": 0.3523, "step": 5238 }, { "epoch": 0.35792853726856594, "grad_norm": 4.012219429016113, "learning_rate": 5.2057717618976505e-06, "loss": 0.4682, "step": 5239 }, { "epoch": 0.35799685727949715, "grad_norm": 5.218185901641846, "learning_rate": 5.205095427445654e-06, "loss": 0.4003, "step": 5240 }, { "epoch": 0.35806517729042836, "grad_norm": 4.084267616271973, "learning_rate": 5.204419009499657e-06, "loss": 0.4174, "step": 5241 }, { "epoch": 0.35813349730135957, "grad_norm": 3.963361978530884, "learning_rate": 5.203742508092781e-06, "loss": 0.3841, "step": 5242 }, { "epoch": 0.3582018173122908, "grad_norm": 5.794641494750977, "learning_rate": 5.203065923258155e-06, "loss": 0.3896, "step": 5243 }, { "epoch": 0.358270137323222, "grad_norm": 4.519818305969238, "learning_rate": 5.202389255028908e-06, "loss": 0.497, "step": 5244 }, { "epoch": 0.35833845733415315, "grad_norm": 4.118075847625732, "learning_rate": 5.201712503438175e-06, "loss": 0.3964, "step": 5245 }, { "epoch": 0.35840677734508436, "grad_norm": 4.819569110870361, "learning_rate": 5.201035668519094e-06, "loss": 0.3947, "step": 5246 }, { "epoch": 0.35847509735601557, "grad_norm": 4.5450286865234375, "learning_rate": 5.200358750304809e-06, "loss": 0.378, "step": 5247 }, { "epoch": 0.3585434173669468, "grad_norm": 3.713820219039917, "learning_rate": 5.199681748828466e-06, "loss": 0.4507, "step": 5248 }, { "epoch": 0.358611737377878, "grad_norm": 3.930776596069336, "learning_rate": 5.199004664123217e-06, "loss": 0.3882, "step": 5249 }, { "epoch": 0.3586800573888092, "grad_norm": 3.917757272720337, "learning_rate": 5.198327496222215e-06, "loss": 0.401, "step": 5250 }, { "epoch": 0.35874837739974036, "grad_norm": 3.7703750133514404, "learning_rate": 5.1976502451586205e-06, "loss": 0.4377, "step": 5251 }, { "epoch": 0.35881669741067157, "grad_norm": 3.977855920791626, "learning_rate": 5.196972910965598e-06, "loss": 0.4578, "step": 5252 }, { "epoch": 0.3588850174216028, "grad_norm": 4.95200777053833, "learning_rate": 5.196295493676313e-06, "loss": 0.3334, "step": 5253 }, { "epoch": 0.358953337432534, "grad_norm": 3.4005343914031982, "learning_rate": 5.195617993323935e-06, "loss": 0.3499, "step": 5254 }, { "epoch": 0.3590216574434652, "grad_norm": 4.000039100646973, "learning_rate": 5.194940409941644e-06, "loss": 0.4281, "step": 5255 }, { "epoch": 0.3590899774543964, "grad_norm": 4.289655685424805, "learning_rate": 5.194262743562617e-06, "loss": 0.3773, "step": 5256 }, { "epoch": 0.35915829746532757, "grad_norm": 3.7099714279174805, "learning_rate": 5.193584994220036e-06, "loss": 0.3829, "step": 5257 }, { "epoch": 0.3592266174762588, "grad_norm": 3.4668960571289062, "learning_rate": 5.192907161947092e-06, "loss": 0.3848, "step": 5258 }, { "epoch": 0.35929493748719, "grad_norm": 3.478032350540161, "learning_rate": 5.192229246776974e-06, "loss": 0.3721, "step": 5259 }, { "epoch": 0.3593632574981212, "grad_norm": 5.6062912940979, "learning_rate": 5.191551248742877e-06, "loss": 0.4487, "step": 5260 }, { "epoch": 0.3594315775090524, "grad_norm": 3.03109073638916, "learning_rate": 5.190873167878005e-06, "loss": 0.3756, "step": 5261 }, { "epoch": 0.3594998975199836, "grad_norm": 4.715311050415039, "learning_rate": 5.190195004215557e-06, "loss": 0.3146, "step": 5262 }, { "epoch": 0.3595682175309148, "grad_norm": 3.358950614929199, "learning_rate": 5.189516757788744e-06, "loss": 0.2829, "step": 5263 }, { "epoch": 0.359636537541846, "grad_norm": 3.65936279296875, "learning_rate": 5.188838428630777e-06, "loss": 0.3238, "step": 5264 }, { "epoch": 0.3597048575527772, "grad_norm": 4.345449447631836, "learning_rate": 5.188160016774872e-06, "loss": 0.3863, "step": 5265 }, { "epoch": 0.3597731775637084, "grad_norm": 4.038930892944336, "learning_rate": 5.187481522254248e-06, "loss": 0.4087, "step": 5266 }, { "epoch": 0.3598414975746396, "grad_norm": 4.350560188293457, "learning_rate": 5.186802945102131e-06, "loss": 0.3612, "step": 5267 }, { "epoch": 0.35990981758557083, "grad_norm": 4.401815414428711, "learning_rate": 5.1861242853517475e-06, "loss": 0.3927, "step": 5268 }, { "epoch": 0.359978137596502, "grad_norm": 3.948585271835327, "learning_rate": 5.185445543036331e-06, "loss": 0.3968, "step": 5269 }, { "epoch": 0.3600464576074332, "grad_norm": 5.511415481567383, "learning_rate": 5.184766718189115e-06, "loss": 0.4493, "step": 5270 }, { "epoch": 0.3601147776183644, "grad_norm": 4.740651607513428, "learning_rate": 5.184087810843345e-06, "loss": 0.3271, "step": 5271 }, { "epoch": 0.3601830976292956, "grad_norm": 4.9231343269348145, "learning_rate": 5.18340882103226e-06, "loss": 0.4518, "step": 5272 }, { "epoch": 0.36025141764022683, "grad_norm": 3.3075544834136963, "learning_rate": 5.182729748789111e-06, "loss": 0.4245, "step": 5273 }, { "epoch": 0.36031973765115805, "grad_norm": 4.3816046714782715, "learning_rate": 5.182050594147148e-06, "loss": 0.4213, "step": 5274 }, { "epoch": 0.3603880576620892, "grad_norm": 3.4740099906921387, "learning_rate": 5.181371357139632e-06, "loss": 0.3996, "step": 5275 }, { "epoch": 0.3604563776730204, "grad_norm": 3.3463752269744873, "learning_rate": 5.180692037799819e-06, "loss": 0.3349, "step": 5276 }, { "epoch": 0.3605246976839516, "grad_norm": 3.289846420288086, "learning_rate": 5.180012636160975e-06, "loss": 0.4173, "step": 5277 }, { "epoch": 0.36059301769488283, "grad_norm": 3.3552238941192627, "learning_rate": 5.179333152256368e-06, "loss": 0.3105, "step": 5278 }, { "epoch": 0.36066133770581404, "grad_norm": 3.2331504821777344, "learning_rate": 5.1786535861192725e-06, "loss": 0.3451, "step": 5279 }, { "epoch": 0.36072965771674526, "grad_norm": 3.694519519805908, "learning_rate": 5.1779739377829626e-06, "loss": 0.4895, "step": 5280 }, { "epoch": 0.3607979777276764, "grad_norm": 3.7764453887939453, "learning_rate": 5.1772942072807205e-06, "loss": 0.3338, "step": 5281 }, { "epoch": 0.3608662977386076, "grad_norm": 3.7498836517333984, "learning_rate": 5.17661439464583e-06, "loss": 0.3852, "step": 5282 }, { "epoch": 0.36093461774953883, "grad_norm": 3.3254764080047607, "learning_rate": 5.175934499911579e-06, "loss": 0.3568, "step": 5283 }, { "epoch": 0.36100293776047004, "grad_norm": 3.3646061420440674, "learning_rate": 5.175254523111262e-06, "loss": 0.3643, "step": 5284 }, { "epoch": 0.36107125777140125, "grad_norm": 3.150794506072998, "learning_rate": 5.174574464278175e-06, "loss": 0.3436, "step": 5285 }, { "epoch": 0.36113957778233247, "grad_norm": 3.743746757507324, "learning_rate": 5.173894323445618e-06, "loss": 0.37, "step": 5286 }, { "epoch": 0.3612078977932636, "grad_norm": 3.819103240966797, "learning_rate": 5.173214100646895e-06, "loss": 0.3886, "step": 5287 }, { "epoch": 0.36127621780419483, "grad_norm": 2.892791509628296, "learning_rate": 5.172533795915317e-06, "loss": 0.3082, "step": 5288 }, { "epoch": 0.36134453781512604, "grad_norm": 3.826159715652466, "learning_rate": 5.171853409284196e-06, "loss": 0.2961, "step": 5289 }, { "epoch": 0.36141285782605725, "grad_norm": 3.9549570083618164, "learning_rate": 5.171172940786846e-06, "loss": 0.4654, "step": 5290 }, { "epoch": 0.36148117783698847, "grad_norm": 5.33860969543457, "learning_rate": 5.170492390456593e-06, "loss": 0.4485, "step": 5291 }, { "epoch": 0.3615494978479197, "grad_norm": 3.219376564025879, "learning_rate": 5.169811758326756e-06, "loss": 0.3719, "step": 5292 }, { "epoch": 0.36161781785885083, "grad_norm": 4.327666759490967, "learning_rate": 5.1691310444306685e-06, "loss": 0.469, "step": 5293 }, { "epoch": 0.36168613786978204, "grad_norm": 3.431365966796875, "learning_rate": 5.1684502488016605e-06, "loss": 0.3371, "step": 5294 }, { "epoch": 0.36175445788071325, "grad_norm": 3.898411273956299, "learning_rate": 5.1677693714730694e-06, "loss": 0.3537, "step": 5295 }, { "epoch": 0.36182277789164446, "grad_norm": 4.208511829376221, "learning_rate": 5.167088412478236e-06, "loss": 0.4403, "step": 5296 }, { "epoch": 0.3618910979025757, "grad_norm": 3.5385305881500244, "learning_rate": 5.166407371850506e-06, "loss": 0.3304, "step": 5297 }, { "epoch": 0.3619594179135069, "grad_norm": 5.009298801422119, "learning_rate": 5.165726249623225e-06, "loss": 0.4224, "step": 5298 }, { "epoch": 0.36202773792443804, "grad_norm": 4.846315860748291, "learning_rate": 5.1650450458297504e-06, "loss": 0.491, "step": 5299 }, { "epoch": 0.36209605793536925, "grad_norm": 3.277261734008789, "learning_rate": 5.164363760503435e-06, "loss": 0.2762, "step": 5300 }, { "epoch": 0.36216437794630046, "grad_norm": 3.8746118545532227, "learning_rate": 5.163682393677642e-06, "loss": 0.3422, "step": 5301 }, { "epoch": 0.3622326979572317, "grad_norm": 4.574134826660156, "learning_rate": 5.1630009453857345e-06, "loss": 0.3993, "step": 5302 }, { "epoch": 0.3623010179681629, "grad_norm": 2.710449457168579, "learning_rate": 5.162319415661082e-06, "loss": 0.3249, "step": 5303 }, { "epoch": 0.3623693379790941, "grad_norm": 5.280049800872803, "learning_rate": 5.161637804537057e-06, "loss": 0.4084, "step": 5304 }, { "epoch": 0.36243765799002525, "grad_norm": 4.082230091094971, "learning_rate": 5.160956112047038e-06, "loss": 0.3993, "step": 5305 }, { "epoch": 0.36250597800095646, "grad_norm": 4.775373935699463, "learning_rate": 5.1602743382244005e-06, "loss": 0.3998, "step": 5306 }, { "epoch": 0.3625742980118877, "grad_norm": 5.391085147857666, "learning_rate": 5.159592483102535e-06, "loss": 0.3878, "step": 5307 }, { "epoch": 0.3626426180228189, "grad_norm": 3.780834436416626, "learning_rate": 5.158910546714828e-06, "loss": 0.3174, "step": 5308 }, { "epoch": 0.3627109380337501, "grad_norm": 3.9939045906066895, "learning_rate": 5.158228529094672e-06, "loss": 0.4602, "step": 5309 }, { "epoch": 0.3627792580446813, "grad_norm": 2.935452938079834, "learning_rate": 5.157546430275462e-06, "loss": 0.2859, "step": 5310 }, { "epoch": 0.36284757805561246, "grad_norm": 3.104001760482788, "learning_rate": 5.156864250290601e-06, "loss": 0.2439, "step": 5311 }, { "epoch": 0.3629158980665437, "grad_norm": 5.074400901794434, "learning_rate": 5.156181989173493e-06, "loss": 0.3889, "step": 5312 }, { "epoch": 0.3629842180774749, "grad_norm": 4.874338626861572, "learning_rate": 5.155499646957544e-06, "loss": 0.4255, "step": 5313 }, { "epoch": 0.3630525380884061, "grad_norm": 4.519630432128906, "learning_rate": 5.154817223676172e-06, "loss": 0.3124, "step": 5314 }, { "epoch": 0.3631208580993373, "grad_norm": 3.412776231765747, "learning_rate": 5.154134719362787e-06, "loss": 0.3732, "step": 5315 }, { "epoch": 0.3631891781102685, "grad_norm": 4.295465469360352, "learning_rate": 5.1534521340508144e-06, "loss": 0.3316, "step": 5316 }, { "epoch": 0.3632574981211997, "grad_norm": 3.567981004714966, "learning_rate": 5.152769467773675e-06, "loss": 0.3422, "step": 5317 }, { "epoch": 0.3633258181321309, "grad_norm": 4.906406402587891, "learning_rate": 5.1520867205648e-06, "loss": 0.4145, "step": 5318 }, { "epoch": 0.3633941381430621, "grad_norm": 2.882234811782837, "learning_rate": 5.151403892457621e-06, "loss": 0.2781, "step": 5319 }, { "epoch": 0.3634624581539933, "grad_norm": 3.7007853984832764, "learning_rate": 5.1507209834855736e-06, "loss": 0.3873, "step": 5320 }, { "epoch": 0.3635307781649245, "grad_norm": 3.793365001678467, "learning_rate": 5.150037993682098e-06, "loss": 0.3157, "step": 5321 }, { "epoch": 0.36359909817585573, "grad_norm": 3.224931478500366, "learning_rate": 5.149354923080639e-06, "loss": 0.3444, "step": 5322 }, { "epoch": 0.3636674181867869, "grad_norm": 3.6150591373443604, "learning_rate": 5.148671771714643e-06, "loss": 0.3324, "step": 5323 }, { "epoch": 0.3637357381977181, "grad_norm": 4.476932525634766, "learning_rate": 5.147988539617565e-06, "loss": 0.4812, "step": 5324 }, { "epoch": 0.3638040582086493, "grad_norm": 3.2066104412078857, "learning_rate": 5.147305226822859e-06, "loss": 0.3136, "step": 5325 }, { "epoch": 0.3638723782195805, "grad_norm": 3.387418270111084, "learning_rate": 5.146621833363985e-06, "loss": 0.3347, "step": 5326 }, { "epoch": 0.3639406982305117, "grad_norm": 4.83083963394165, "learning_rate": 5.145938359274408e-06, "loss": 0.3699, "step": 5327 }, { "epoch": 0.36400901824144294, "grad_norm": 4.079063415527344, "learning_rate": 5.145254804587597e-06, "loss": 0.37, "step": 5328 }, { "epoch": 0.3640773382523741, "grad_norm": 3.2332911491394043, "learning_rate": 5.144571169337021e-06, "loss": 0.3705, "step": 5329 }, { "epoch": 0.3641456582633053, "grad_norm": 3.5293498039245605, "learning_rate": 5.143887453556157e-06, "loss": 0.2673, "step": 5330 }, { "epoch": 0.3642139782742365, "grad_norm": 4.488199234008789, "learning_rate": 5.143203657278485e-06, "loss": 0.4082, "step": 5331 }, { "epoch": 0.3642822982851677, "grad_norm": 4.166097164154053, "learning_rate": 5.1425197805374895e-06, "loss": 0.4065, "step": 5332 }, { "epoch": 0.36435061829609894, "grad_norm": 4.067748546600342, "learning_rate": 5.141835823366656e-06, "loss": 0.3546, "step": 5333 }, { "epoch": 0.36441893830703015, "grad_norm": 4.2860236167907715, "learning_rate": 5.141151785799478e-06, "loss": 0.3375, "step": 5334 }, { "epoch": 0.3644872583179613, "grad_norm": 4.420882225036621, "learning_rate": 5.1404676678694495e-06, "loss": 0.508, "step": 5335 }, { "epoch": 0.3645555783288925, "grad_norm": 4.306102275848389, "learning_rate": 5.1397834696100725e-06, "loss": 0.3706, "step": 5336 }, { "epoch": 0.3646238983398237, "grad_norm": 3.5598299503326416, "learning_rate": 5.139099191054847e-06, "loss": 0.3719, "step": 5337 }, { "epoch": 0.36469221835075494, "grad_norm": 4.0432329177856445, "learning_rate": 5.138414832237283e-06, "loss": 0.3215, "step": 5338 }, { "epoch": 0.36476053836168615, "grad_norm": 3.9809927940368652, "learning_rate": 5.1377303931908894e-06, "loss": 0.3988, "step": 5339 }, { "epoch": 0.36482885837261736, "grad_norm": 3.3947513103485107, "learning_rate": 5.137045873949183e-06, "loss": 0.3149, "step": 5340 }, { "epoch": 0.3648971783835485, "grad_norm": 4.416839122772217, "learning_rate": 5.136361274545684e-06, "loss": 0.4963, "step": 5341 }, { "epoch": 0.3649654983944797, "grad_norm": 2.980464220046997, "learning_rate": 5.135676595013912e-06, "loss": 0.3521, "step": 5342 }, { "epoch": 0.36503381840541094, "grad_norm": 3.5825562477111816, "learning_rate": 5.134991835387396e-06, "loss": 0.3476, "step": 5343 }, { "epoch": 0.36510213841634215, "grad_norm": 3.9974095821380615, "learning_rate": 5.134306995699668e-06, "loss": 0.3921, "step": 5344 }, { "epoch": 0.36517045842727336, "grad_norm": 3.282656669616699, "learning_rate": 5.133622075984261e-06, "loss": 0.3965, "step": 5345 }, { "epoch": 0.36523877843820457, "grad_norm": 3.834256410598755, "learning_rate": 5.132937076274714e-06, "loss": 0.4715, "step": 5346 }, { "epoch": 0.3653070984491357, "grad_norm": 3.880157709121704, "learning_rate": 5.13225199660457e-06, "loss": 0.3475, "step": 5347 }, { "epoch": 0.36537541846006694, "grad_norm": 4.166618347167969, "learning_rate": 5.131566837007375e-06, "loss": 0.3983, "step": 5348 }, { "epoch": 0.36544373847099815, "grad_norm": 3.7218050956726074, "learning_rate": 5.13088159751668e-06, "loss": 0.4373, "step": 5349 }, { "epoch": 0.36551205848192936, "grad_norm": 3.3193397521972656, "learning_rate": 5.1301962781660395e-06, "loss": 0.2605, "step": 5350 }, { "epoch": 0.36558037849286057, "grad_norm": 4.280540943145752, "learning_rate": 5.129510878989011e-06, "loss": 0.396, "step": 5351 }, { "epoch": 0.3656486985037918, "grad_norm": 3.790633201599121, "learning_rate": 5.128825400019157e-06, "loss": 0.3959, "step": 5352 }, { "epoch": 0.36571701851472294, "grad_norm": 3.5447795391082764, "learning_rate": 5.1281398412900445e-06, "loss": 0.3984, "step": 5353 }, { "epoch": 0.36578533852565415, "grad_norm": 4.221926689147949, "learning_rate": 5.127454202835243e-06, "loss": 0.4127, "step": 5354 }, { "epoch": 0.36585365853658536, "grad_norm": 3.6385366916656494, "learning_rate": 5.126768484688325e-06, "loss": 0.3626, "step": 5355 }, { "epoch": 0.36592197854751657, "grad_norm": 4.158903121948242, "learning_rate": 5.126082686882869e-06, "loss": 0.3002, "step": 5356 }, { "epoch": 0.3659902985584478, "grad_norm": 4.222854137420654, "learning_rate": 5.125396809452458e-06, "loss": 0.4273, "step": 5357 }, { "epoch": 0.366058618569379, "grad_norm": 5.111474990844727, "learning_rate": 5.1247108524306775e-06, "loss": 0.3431, "step": 5358 }, { "epoch": 0.36612693858031015, "grad_norm": 3.368853807449341, "learning_rate": 5.124024815851115e-06, "loss": 0.3046, "step": 5359 }, { "epoch": 0.36619525859124136, "grad_norm": 4.037813663482666, "learning_rate": 5.1233386997473656e-06, "loss": 0.405, "step": 5360 }, { "epoch": 0.36626357860217257, "grad_norm": 4.4257025718688965, "learning_rate": 5.1226525041530256e-06, "loss": 0.3497, "step": 5361 }, { "epoch": 0.3663318986131038, "grad_norm": 3.5842642784118652, "learning_rate": 5.121966229101697e-06, "loss": 0.3721, "step": 5362 }, { "epoch": 0.366400218624035, "grad_norm": 3.0782711505889893, "learning_rate": 5.121279874626983e-06, "loss": 0.4421, "step": 5363 }, { "epoch": 0.3664685386349662, "grad_norm": 3.4717376232147217, "learning_rate": 5.120593440762497e-06, "loss": 0.3382, "step": 5364 }, { "epoch": 0.36653685864589736, "grad_norm": 4.079007148742676, "learning_rate": 5.119906927541848e-06, "loss": 0.3137, "step": 5365 }, { "epoch": 0.36660517865682857, "grad_norm": 2.7520334720611572, "learning_rate": 5.119220334998652e-06, "loss": 0.2583, "step": 5366 }, { "epoch": 0.3666734986677598, "grad_norm": 3.1689538955688477, "learning_rate": 5.118533663166533e-06, "loss": 0.3756, "step": 5367 }, { "epoch": 0.366741818678691, "grad_norm": 4.536454200744629, "learning_rate": 5.117846912079113e-06, "loss": 0.3588, "step": 5368 }, { "epoch": 0.3668101386896222, "grad_norm": 4.040726661682129, "learning_rate": 5.117160081770021e-06, "loss": 0.4891, "step": 5369 }, { "epoch": 0.3668784587005534, "grad_norm": 3.5048394203186035, "learning_rate": 5.116473172272889e-06, "loss": 0.3635, "step": 5370 }, { "epoch": 0.36694677871148457, "grad_norm": 4.239763259887695, "learning_rate": 5.115786183621354e-06, "loss": 0.4426, "step": 5371 }, { "epoch": 0.3670150987224158, "grad_norm": 5.174818515777588, "learning_rate": 5.115099115849057e-06, "loss": 0.3215, "step": 5372 }, { "epoch": 0.367083418733347, "grad_norm": 3.6728358268737793, "learning_rate": 5.1144119689896385e-06, "loss": 0.3241, "step": 5373 }, { "epoch": 0.3671517387442782, "grad_norm": 4.49991512298584, "learning_rate": 5.113724743076748e-06, "loss": 0.3955, "step": 5374 }, { "epoch": 0.3672200587552094, "grad_norm": 3.004453420639038, "learning_rate": 5.113037438144039e-06, "loss": 0.3326, "step": 5375 }, { "epoch": 0.3672883787661406, "grad_norm": 5.193267345428467, "learning_rate": 5.112350054225164e-06, "loss": 0.3643, "step": 5376 }, { "epoch": 0.3673566987770718, "grad_norm": 3.668071746826172, "learning_rate": 5.111662591353785e-06, "loss": 0.419, "step": 5377 }, { "epoch": 0.367425018788003, "grad_norm": 3.2242109775543213, "learning_rate": 5.110975049563563e-06, "loss": 0.3571, "step": 5378 }, { "epoch": 0.3674933387989342, "grad_norm": 4.089392185211182, "learning_rate": 5.110287428888166e-06, "loss": 0.4546, "step": 5379 }, { "epoch": 0.3675616588098654, "grad_norm": 4.061347484588623, "learning_rate": 5.109599729361266e-06, "loss": 0.3375, "step": 5380 }, { "epoch": 0.3676299788207966, "grad_norm": 3.176727771759033, "learning_rate": 5.108911951016536e-06, "loss": 0.3156, "step": 5381 }, { "epoch": 0.36769829883172783, "grad_norm": 3.4171149730682373, "learning_rate": 5.1082240938876555e-06, "loss": 0.3195, "step": 5382 }, { "epoch": 0.367766618842659, "grad_norm": 3.7302985191345215, "learning_rate": 5.107536158008307e-06, "loss": 0.3504, "step": 5383 }, { "epoch": 0.3678349388535902, "grad_norm": 4.476597785949707, "learning_rate": 5.106848143412176e-06, "loss": 0.4315, "step": 5384 }, { "epoch": 0.3679032588645214, "grad_norm": 4.0770368576049805, "learning_rate": 5.106160050132956e-06, "loss": 0.4023, "step": 5385 }, { "epoch": 0.3679715788754526, "grad_norm": 4.416018962860107, "learning_rate": 5.105471878204337e-06, "loss": 0.4998, "step": 5386 }, { "epoch": 0.36803989888638383, "grad_norm": 3.5297114849090576, "learning_rate": 5.10478362766002e-06, "loss": 0.2994, "step": 5387 }, { "epoch": 0.36810821889731504, "grad_norm": 4.5681023597717285, "learning_rate": 5.104095298533704e-06, "loss": 0.3884, "step": 5388 }, { "epoch": 0.3681765389082462, "grad_norm": 4.750865459442139, "learning_rate": 5.103406890859097e-06, "loss": 0.4773, "step": 5389 }, { "epoch": 0.3682448589191774, "grad_norm": 4.490571022033691, "learning_rate": 5.102718404669907e-06, "loss": 0.3887, "step": 5390 }, { "epoch": 0.3683131789301086, "grad_norm": 3.6511573791503906, "learning_rate": 5.1020298399998495e-06, "loss": 0.3114, "step": 5391 }, { "epoch": 0.36838149894103983, "grad_norm": 3.219432830810547, "learning_rate": 5.1013411968826385e-06, "loss": 0.3488, "step": 5392 }, { "epoch": 0.36844981895197104, "grad_norm": 4.207674980163574, "learning_rate": 5.100652475351999e-06, "loss": 0.4892, "step": 5393 }, { "epoch": 0.36851813896290225, "grad_norm": 3.1359522342681885, "learning_rate": 5.0999636754416505e-06, "loss": 0.2671, "step": 5394 }, { "epoch": 0.3685864589738334, "grad_norm": 2.6450767517089844, "learning_rate": 5.099274797185327e-06, "loss": 0.2819, "step": 5395 }, { "epoch": 0.3686547789847646, "grad_norm": 4.111347675323486, "learning_rate": 5.098585840616759e-06, "loss": 0.3825, "step": 5396 }, { "epoch": 0.36872309899569583, "grad_norm": 4.209456920623779, "learning_rate": 5.097896805769682e-06, "loss": 0.3633, "step": 5397 }, { "epoch": 0.36879141900662704, "grad_norm": 3.7964377403259277, "learning_rate": 5.097207692677837e-06, "loss": 0.4044, "step": 5398 }, { "epoch": 0.36885973901755825, "grad_norm": 2.773142099380493, "learning_rate": 5.0965185013749674e-06, "loss": 0.3635, "step": 5399 }, { "epoch": 0.36892805902848946, "grad_norm": 4.444189071655273, "learning_rate": 5.095829231894822e-06, "loss": 0.3748, "step": 5400 }, { "epoch": 0.3689963790394206, "grad_norm": 4.06489372253418, "learning_rate": 5.095139884271153e-06, "loss": 0.3602, "step": 5401 }, { "epoch": 0.36906469905035183, "grad_norm": 3.545349359512329, "learning_rate": 5.094450458537714e-06, "loss": 0.3724, "step": 5402 }, { "epoch": 0.36913301906128304, "grad_norm": 4.6050333976745605, "learning_rate": 5.093760954728266e-06, "loss": 0.3623, "step": 5403 }, { "epoch": 0.36920133907221425, "grad_norm": 3.2594692707061768, "learning_rate": 5.093071372876572e-06, "loss": 0.353, "step": 5404 }, { "epoch": 0.36926965908314546, "grad_norm": 3.215667963027954, "learning_rate": 5.0923817130163975e-06, "loss": 0.3549, "step": 5405 }, { "epoch": 0.3693379790940767, "grad_norm": 4.264608860015869, "learning_rate": 5.091691975181514e-06, "loss": 0.388, "step": 5406 }, { "epoch": 0.36940629910500783, "grad_norm": 4.529977798461914, "learning_rate": 5.091002159405698e-06, "loss": 0.3275, "step": 5407 }, { "epoch": 0.36947461911593904, "grad_norm": 2.491130828857422, "learning_rate": 5.090312265722727e-06, "loss": 0.3198, "step": 5408 }, { "epoch": 0.36954293912687025, "grad_norm": 3.517123222351074, "learning_rate": 5.089622294166381e-06, "loss": 0.3136, "step": 5409 }, { "epoch": 0.36961125913780146, "grad_norm": 4.3548264503479, "learning_rate": 5.088932244770449e-06, "loss": 0.3198, "step": 5410 }, { "epoch": 0.3696795791487327, "grad_norm": 4.023149490356445, "learning_rate": 5.088242117568719e-06, "loss": 0.3155, "step": 5411 }, { "epoch": 0.3697478991596639, "grad_norm": 3.098341464996338, "learning_rate": 5.087551912594985e-06, "loss": 0.3504, "step": 5412 }, { "epoch": 0.36981621917059504, "grad_norm": 3.248478651046753, "learning_rate": 5.086861629883046e-06, "loss": 0.3192, "step": 5413 }, { "epoch": 0.36988453918152625, "grad_norm": 3.666837215423584, "learning_rate": 5.086171269466701e-06, "loss": 0.2866, "step": 5414 }, { "epoch": 0.36995285919245746, "grad_norm": 4.828179359436035, "learning_rate": 5.085480831379758e-06, "loss": 0.4491, "step": 5415 }, { "epoch": 0.3700211792033887, "grad_norm": 3.9439139366149902, "learning_rate": 5.0847903156560225e-06, "loss": 0.4796, "step": 5416 }, { "epoch": 0.3700894992143199, "grad_norm": 3.4957215785980225, "learning_rate": 5.084099722329311e-06, "loss": 0.3003, "step": 5417 }, { "epoch": 0.3701578192252511, "grad_norm": 3.4885456562042236, "learning_rate": 5.083409051433436e-06, "loss": 0.3545, "step": 5418 }, { "epoch": 0.37022613923618225, "grad_norm": 3.845259666442871, "learning_rate": 5.082718303002221e-06, "loss": 0.3549, "step": 5419 }, { "epoch": 0.37029445924711346, "grad_norm": 3.9636154174804688, "learning_rate": 5.082027477069489e-06, "loss": 0.3908, "step": 5420 }, { "epoch": 0.37036277925804467, "grad_norm": 3.251845598220825, "learning_rate": 5.081336573669068e-06, "loss": 0.2175, "step": 5421 }, { "epoch": 0.3704310992689759, "grad_norm": 4.029804229736328, "learning_rate": 5.0806455928347886e-06, "loss": 0.3615, "step": 5422 }, { "epoch": 0.3704994192799071, "grad_norm": 3.7983295917510986, "learning_rate": 5.079954534600488e-06, "loss": 0.4077, "step": 5423 }, { "epoch": 0.3705677392908383, "grad_norm": 3.4746477603912354, "learning_rate": 5.079263399000005e-06, "loss": 0.2826, "step": 5424 }, { "epoch": 0.3706360593017695, "grad_norm": 4.771389484405518, "learning_rate": 5.078572186067181e-06, "loss": 0.3745, "step": 5425 }, { "epoch": 0.37070437931270067, "grad_norm": 4.7252278327941895, "learning_rate": 5.077880895835866e-06, "loss": 0.4147, "step": 5426 }, { "epoch": 0.3707726993236319, "grad_norm": 5.153135776519775, "learning_rate": 5.077189528339909e-06, "loss": 0.564, "step": 5427 }, { "epoch": 0.3708410193345631, "grad_norm": 4.276693820953369, "learning_rate": 5.0764980836131624e-06, "loss": 0.3857, "step": 5428 }, { "epoch": 0.3709093393454943, "grad_norm": 3.8894567489624023, "learning_rate": 5.075806561689488e-06, "loss": 0.4421, "step": 5429 }, { "epoch": 0.3709776593564255, "grad_norm": 3.8089563846588135, "learning_rate": 5.075114962602746e-06, "loss": 0.2946, "step": 5430 }, { "epoch": 0.3710459793673567, "grad_norm": 4.668061256408691, "learning_rate": 5.074423286386802e-06, "loss": 0.4302, "step": 5431 }, { "epoch": 0.3711142993782879, "grad_norm": 4.865716934204102, "learning_rate": 5.073731533075526e-06, "loss": 0.3457, "step": 5432 }, { "epoch": 0.3711826193892191, "grad_norm": 2.541691780090332, "learning_rate": 5.0730397027027915e-06, "loss": 0.2433, "step": 5433 }, { "epoch": 0.3712509394001503, "grad_norm": 4.284637451171875, "learning_rate": 5.072347795302475e-06, "loss": 0.4152, "step": 5434 }, { "epoch": 0.3713192594110815, "grad_norm": 4.250579357147217, "learning_rate": 5.071655810908458e-06, "loss": 0.3283, "step": 5435 }, { "epoch": 0.3713875794220127, "grad_norm": 3.8114025592803955, "learning_rate": 5.070963749554624e-06, "loss": 0.2811, "step": 5436 }, { "epoch": 0.37145589943294394, "grad_norm": 4.232720375061035, "learning_rate": 5.070271611274864e-06, "loss": 0.4305, "step": 5437 }, { "epoch": 0.3715242194438751, "grad_norm": 2.9006264209747314, "learning_rate": 5.069579396103067e-06, "loss": 0.3444, "step": 5438 }, { "epoch": 0.3715925394548063, "grad_norm": 4.020294189453125, "learning_rate": 5.068887104073131e-06, "loss": 0.3628, "step": 5439 }, { "epoch": 0.3716608594657375, "grad_norm": 3.9450948238372803, "learning_rate": 5.068194735218955e-06, "loss": 0.3941, "step": 5440 }, { "epoch": 0.3717291794766687, "grad_norm": 3.9073057174682617, "learning_rate": 5.067502289574442e-06, "loss": 0.3447, "step": 5441 }, { "epoch": 0.37179749948759994, "grad_norm": 3.861602306365967, "learning_rate": 5.0668097671735e-06, "loss": 0.3348, "step": 5442 }, { "epoch": 0.37186581949853115, "grad_norm": 3.5448763370513916, "learning_rate": 5.06611716805004e-06, "loss": 0.3491, "step": 5443 }, { "epoch": 0.3719341395094623, "grad_norm": 4.547931671142578, "learning_rate": 5.065424492237977e-06, "loss": 0.4178, "step": 5444 }, { "epoch": 0.3720024595203935, "grad_norm": 4.4211530685424805, "learning_rate": 5.064731739771227e-06, "loss": 0.4353, "step": 5445 }, { "epoch": 0.3720707795313247, "grad_norm": 3.5880489349365234, "learning_rate": 5.064038910683717e-06, "loss": 0.3659, "step": 5446 }, { "epoch": 0.37213909954225594, "grad_norm": 4.0723443031311035, "learning_rate": 5.063346005009368e-06, "loss": 0.4037, "step": 5447 }, { "epoch": 0.37220741955318715, "grad_norm": 4.249641418457031, "learning_rate": 5.062653022782114e-06, "loss": 0.3684, "step": 5448 }, { "epoch": 0.37227573956411836, "grad_norm": 3.4813084602355957, "learning_rate": 5.0619599640358846e-06, "loss": 0.3201, "step": 5449 }, { "epoch": 0.3723440595750495, "grad_norm": 3.821489095687866, "learning_rate": 5.06126682880462e-06, "loss": 0.4857, "step": 5450 }, { "epoch": 0.3724123795859807, "grad_norm": 3.5856740474700928, "learning_rate": 5.0605736171222605e-06, "loss": 0.2656, "step": 5451 }, { "epoch": 0.37248069959691193, "grad_norm": 4.226949214935303, "learning_rate": 5.059880329022749e-06, "loss": 0.4323, "step": 5452 }, { "epoch": 0.37254901960784315, "grad_norm": 4.354571342468262, "learning_rate": 5.059186964540037e-06, "loss": 0.3611, "step": 5453 }, { "epoch": 0.37261733961877436, "grad_norm": 4.434874057769775, "learning_rate": 5.058493523708075e-06, "loss": 0.329, "step": 5454 }, { "epoch": 0.37268565962970557, "grad_norm": 3.8085579872131348, "learning_rate": 5.057800006560818e-06, "loss": 0.3834, "step": 5455 }, { "epoch": 0.3727539796406367, "grad_norm": 5.426718711853027, "learning_rate": 5.0571064131322275e-06, "loss": 0.3933, "step": 5456 }, { "epoch": 0.37282229965156793, "grad_norm": 4.2447004318237305, "learning_rate": 5.056412743456267e-06, "loss": 0.554, "step": 5457 }, { "epoch": 0.37289061966249915, "grad_norm": 3.4049978256225586, "learning_rate": 5.055718997566902e-06, "loss": 0.3813, "step": 5458 }, { "epoch": 0.37295893967343036, "grad_norm": 4.8950934410095215, "learning_rate": 5.055025175498104e-06, "loss": 0.3891, "step": 5459 }, { "epoch": 0.37302725968436157, "grad_norm": 3.7736380100250244, "learning_rate": 5.05433127728385e-06, "loss": 0.3834, "step": 5460 }, { "epoch": 0.3730955796952928, "grad_norm": 4.614487171173096, "learning_rate": 5.053637302958114e-06, "loss": 0.4483, "step": 5461 }, { "epoch": 0.37316389970622393, "grad_norm": 3.7111902236938477, "learning_rate": 5.052943252554881e-06, "loss": 0.2909, "step": 5462 }, { "epoch": 0.37323221971715514, "grad_norm": 2.7688369750976562, "learning_rate": 5.052249126108136e-06, "loss": 0.2801, "step": 5463 }, { "epoch": 0.37330053972808636, "grad_norm": 3.912848949432373, "learning_rate": 5.051554923651871e-06, "loss": 0.473, "step": 5464 }, { "epoch": 0.37336885973901757, "grad_norm": 3.6079630851745605, "learning_rate": 5.050860645220074e-06, "loss": 0.3055, "step": 5465 }, { "epoch": 0.3734371797499488, "grad_norm": 3.1896209716796875, "learning_rate": 5.0501662908467465e-06, "loss": 0.2514, "step": 5466 }, { "epoch": 0.37350549976088, "grad_norm": 3.798780679702759, "learning_rate": 5.049471860565886e-06, "loss": 0.3841, "step": 5467 }, { "epoch": 0.37357381977181114, "grad_norm": 4.096311092376709, "learning_rate": 5.0487773544115e-06, "loss": 0.42, "step": 5468 }, { "epoch": 0.37364213978274236, "grad_norm": 3.4541380405426025, "learning_rate": 5.048082772417595e-06, "loss": 0.3125, "step": 5469 }, { "epoch": 0.37371045979367357, "grad_norm": 4.806546211242676, "learning_rate": 5.047388114618183e-06, "loss": 0.3957, "step": 5470 }, { "epoch": 0.3737787798046048, "grad_norm": 2.967902660369873, "learning_rate": 5.04669338104728e-06, "loss": 0.3252, "step": 5471 }, { "epoch": 0.373847099815536, "grad_norm": 3.6223905086517334, "learning_rate": 5.045998571738904e-06, "loss": 0.3512, "step": 5472 }, { "epoch": 0.3739154198264672, "grad_norm": 6.828437805175781, "learning_rate": 5.0453036867270785e-06, "loss": 0.5941, "step": 5473 }, { "epoch": 0.37398373983739835, "grad_norm": 4.433794021606445, "learning_rate": 5.044608726045832e-06, "loss": 0.3583, "step": 5474 }, { "epoch": 0.37405205984832957, "grad_norm": 3.261338233947754, "learning_rate": 5.043913689729192e-06, "loss": 0.3208, "step": 5475 }, { "epoch": 0.3741203798592608, "grad_norm": 4.124645233154297, "learning_rate": 5.043218577811195e-06, "loss": 0.4062, "step": 5476 }, { "epoch": 0.374188699870192, "grad_norm": 5.467041969299316, "learning_rate": 5.042523390325876e-06, "loss": 0.4796, "step": 5477 }, { "epoch": 0.3742570198811232, "grad_norm": 4.832248687744141, "learning_rate": 5.04182812730728e-06, "loss": 0.308, "step": 5478 }, { "epoch": 0.3743253398920544, "grad_norm": 4.343997478485107, "learning_rate": 5.04113278878945e-06, "loss": 0.3711, "step": 5479 }, { "epoch": 0.37439365990298556, "grad_norm": 3.8453238010406494, "learning_rate": 5.040437374806436e-06, "loss": 0.3567, "step": 5480 }, { "epoch": 0.3744619799139168, "grad_norm": 4.2572832107543945, "learning_rate": 5.039741885392287e-06, "loss": 0.3164, "step": 5481 }, { "epoch": 0.374530299924848, "grad_norm": 5.069650173187256, "learning_rate": 5.0390463205810644e-06, "loss": 0.4722, "step": 5482 }, { "epoch": 0.3745986199357792, "grad_norm": 3.1116132736206055, "learning_rate": 5.038350680406825e-06, "loss": 0.2685, "step": 5483 }, { "epoch": 0.3746669399467104, "grad_norm": 3.0741159915924072, "learning_rate": 5.037654964903635e-06, "loss": 0.309, "step": 5484 }, { "epoch": 0.3747352599576416, "grad_norm": 3.1846561431884766, "learning_rate": 5.036959174105558e-06, "loss": 0.3437, "step": 5485 }, { "epoch": 0.3748035799685728, "grad_norm": 3.4953339099884033, "learning_rate": 5.036263308046669e-06, "loss": 0.3112, "step": 5486 }, { "epoch": 0.374871899979504, "grad_norm": 3.675161600112915, "learning_rate": 5.035567366761039e-06, "loss": 0.3525, "step": 5487 }, { "epoch": 0.3749402199904352, "grad_norm": 5.2367963790893555, "learning_rate": 5.034871350282749e-06, "loss": 0.3325, "step": 5488 }, { "epoch": 0.3750085400013664, "grad_norm": 4.245115280151367, "learning_rate": 5.03417525864588e-06, "loss": 0.4283, "step": 5489 }, { "epoch": 0.3750768600122976, "grad_norm": 3.923243522644043, "learning_rate": 5.033479091884519e-06, "loss": 0.3568, "step": 5490 }, { "epoch": 0.37514518002322883, "grad_norm": 3.71071720123291, "learning_rate": 5.032782850032754e-06, "loss": 0.3252, "step": 5491 }, { "epoch": 0.37521350003416, "grad_norm": 4.221269607543945, "learning_rate": 5.032086533124679e-06, "loss": 0.4823, "step": 5492 }, { "epoch": 0.3752818200450912, "grad_norm": 4.256802082061768, "learning_rate": 5.031390141194389e-06, "loss": 0.3906, "step": 5493 }, { "epoch": 0.3753501400560224, "grad_norm": 3.966148614883423, "learning_rate": 5.0306936742759876e-06, "loss": 0.3942, "step": 5494 }, { "epoch": 0.3754184600669536, "grad_norm": 5.108987808227539, "learning_rate": 5.029997132403576e-06, "loss": 0.3237, "step": 5495 }, { "epoch": 0.37548678007788483, "grad_norm": 3.596810817718506, "learning_rate": 5.0293005156112644e-06, "loss": 0.3007, "step": 5496 }, { "epoch": 0.37555510008881604, "grad_norm": 3.99729323387146, "learning_rate": 5.028603823933162e-06, "loss": 0.41, "step": 5497 }, { "epoch": 0.3756234200997472, "grad_norm": 2.7609879970550537, "learning_rate": 5.027907057403386e-06, "loss": 0.2641, "step": 5498 }, { "epoch": 0.3756917401106784, "grad_norm": 3.8003509044647217, "learning_rate": 5.027210216056053e-06, "loss": 0.3755, "step": 5499 }, { "epoch": 0.3757600601216096, "grad_norm": 2.9230563640594482, "learning_rate": 5.026513299925289e-06, "loss": 0.3106, "step": 5500 }, { "epoch": 0.37582838013254083, "grad_norm": 3.363205671310425, "learning_rate": 5.0258163090452155e-06, "loss": 0.369, "step": 5501 }, { "epoch": 0.37589670014347204, "grad_norm": 4.602808952331543, "learning_rate": 5.025119243449966e-06, "loss": 0.3059, "step": 5502 }, { "epoch": 0.37596502015440325, "grad_norm": 4.559813499450684, "learning_rate": 5.024422103173672e-06, "loss": 0.2889, "step": 5503 }, { "epoch": 0.3760333401653344, "grad_norm": 4.886681079864502, "learning_rate": 5.023724888250471e-06, "loss": 0.4607, "step": 5504 }, { "epoch": 0.3761016601762656, "grad_norm": 3.4851744174957275, "learning_rate": 5.023027598714504e-06, "loss": 0.405, "step": 5505 }, { "epoch": 0.37616998018719683, "grad_norm": 3.1541028022766113, "learning_rate": 5.0223302345999165e-06, "loss": 0.2817, "step": 5506 }, { "epoch": 0.37623830019812804, "grad_norm": 3.9744439125061035, "learning_rate": 5.0216327959408545e-06, "loss": 0.3971, "step": 5507 }, { "epoch": 0.37630662020905925, "grad_norm": 3.7140440940856934, "learning_rate": 5.02093528277147e-06, "loss": 0.3914, "step": 5508 }, { "epoch": 0.37637494021999046, "grad_norm": 4.170051574707031, "learning_rate": 5.0202376951259215e-06, "loss": 0.4268, "step": 5509 }, { "epoch": 0.3764432602309216, "grad_norm": 3.3266232013702393, "learning_rate": 5.019540033038364e-06, "loss": 0.3771, "step": 5510 }, { "epoch": 0.37651158024185283, "grad_norm": 3.3336758613586426, "learning_rate": 5.018842296542961e-06, "loss": 0.3435, "step": 5511 }, { "epoch": 0.37657990025278404, "grad_norm": 4.9662346839904785, "learning_rate": 5.0181444856738805e-06, "loss": 0.4762, "step": 5512 }, { "epoch": 0.37664822026371525, "grad_norm": 4.531665325164795, "learning_rate": 5.017446600465292e-06, "loss": 0.2893, "step": 5513 }, { "epoch": 0.37671654027464646, "grad_norm": 3.4685726165771484, "learning_rate": 5.016748640951369e-06, "loss": 0.3641, "step": 5514 }, { "epoch": 0.37678486028557767, "grad_norm": 4.084151744842529, "learning_rate": 5.016050607166286e-06, "loss": 0.3828, "step": 5515 }, { "epoch": 0.3768531802965088, "grad_norm": 3.713003158569336, "learning_rate": 5.0153524991442286e-06, "loss": 0.4046, "step": 5516 }, { "epoch": 0.37692150030744004, "grad_norm": 3.805567741394043, "learning_rate": 5.014654316919378e-06, "loss": 0.2756, "step": 5517 }, { "epoch": 0.37698982031837125, "grad_norm": 3.6257383823394775, "learning_rate": 5.013956060525923e-06, "loss": 0.2922, "step": 5518 }, { "epoch": 0.37705814032930246, "grad_norm": 4.103796482086182, "learning_rate": 5.013257729998056e-06, "loss": 0.4403, "step": 5519 }, { "epoch": 0.37712646034023367, "grad_norm": 4.383237361907959, "learning_rate": 5.01255932536997e-06, "loss": 0.4502, "step": 5520 }, { "epoch": 0.3771947803511649, "grad_norm": 3.415526866912842, "learning_rate": 5.011860846675868e-06, "loss": 0.3545, "step": 5521 }, { "epoch": 0.37726310036209604, "grad_norm": 3.7757627964019775, "learning_rate": 5.01116229394995e-06, "loss": 0.4878, "step": 5522 }, { "epoch": 0.37733142037302725, "grad_norm": 4.625922679901123, "learning_rate": 5.010463667226425e-06, "loss": 0.4497, "step": 5523 }, { "epoch": 0.37739974038395846, "grad_norm": 3.1362972259521484, "learning_rate": 5.009764966539498e-06, "loss": 0.3749, "step": 5524 }, { "epoch": 0.37746806039488967, "grad_norm": 3.8697264194488525, "learning_rate": 5.009066191923386e-06, "loss": 0.3437, "step": 5525 }, { "epoch": 0.3775363804058209, "grad_norm": 4.306570529937744, "learning_rate": 5.008367343412305e-06, "loss": 0.4089, "step": 5526 }, { "epoch": 0.3776047004167521, "grad_norm": 3.8673760890960693, "learning_rate": 5.007668421040478e-06, "loss": 0.3926, "step": 5527 }, { "epoch": 0.37767302042768325, "grad_norm": 3.848139524459839, "learning_rate": 5.006969424842126e-06, "loss": 0.355, "step": 5528 }, { "epoch": 0.37774134043861446, "grad_norm": 3.471000909805298, "learning_rate": 5.0062703548514795e-06, "loss": 0.3684, "step": 5529 }, { "epoch": 0.37780966044954567, "grad_norm": 3.805586099624634, "learning_rate": 5.0055712111027695e-06, "loss": 0.3739, "step": 5530 }, { "epoch": 0.3778779804604769, "grad_norm": 4.968194484710693, "learning_rate": 5.004871993630229e-06, "loss": 0.3447, "step": 5531 }, { "epoch": 0.3779463004714081, "grad_norm": 5.334326267242432, "learning_rate": 5.0041727024681e-06, "loss": 0.4367, "step": 5532 }, { "epoch": 0.3780146204823393, "grad_norm": 4.4241943359375, "learning_rate": 5.003473337650624e-06, "loss": 0.3571, "step": 5533 }, { "epoch": 0.37808294049327046, "grad_norm": 5.197521209716797, "learning_rate": 5.0027738992120454e-06, "loss": 0.4232, "step": 5534 }, { "epoch": 0.37815126050420167, "grad_norm": 3.615813732147217, "learning_rate": 5.002074387186617e-06, "loss": 0.425, "step": 5535 }, { "epoch": 0.3782195805151329, "grad_norm": 3.909734010696411, "learning_rate": 5.001374801608589e-06, "loss": 0.3016, "step": 5536 }, { "epoch": 0.3782879005260641, "grad_norm": 3.921044111251831, "learning_rate": 5.0006751425122195e-06, "loss": 0.377, "step": 5537 }, { "epoch": 0.3783562205369953, "grad_norm": 3.3915445804595947, "learning_rate": 4.99997540993177e-06, "loss": 0.3048, "step": 5538 }, { "epoch": 0.3784245405479265, "grad_norm": 3.303211212158203, "learning_rate": 4.9992756039015025e-06, "loss": 0.4168, "step": 5539 }, { "epoch": 0.37849286055885767, "grad_norm": 3.41265869140625, "learning_rate": 4.998575724455686e-06, "loss": 0.4058, "step": 5540 }, { "epoch": 0.3785611805697889, "grad_norm": 3.812305212020874, "learning_rate": 4.99787577162859e-06, "loss": 0.3472, "step": 5541 }, { "epoch": 0.3786295005807201, "grad_norm": 3.9301154613494873, "learning_rate": 4.997175745454492e-06, "loss": 0.2978, "step": 5542 }, { "epoch": 0.3786978205916513, "grad_norm": 3.7022533416748047, "learning_rate": 4.99647564596767e-06, "loss": 0.302, "step": 5543 }, { "epoch": 0.3787661406025825, "grad_norm": 4.535841941833496, "learning_rate": 4.995775473202402e-06, "loss": 0.4015, "step": 5544 }, { "epoch": 0.3788344606135137, "grad_norm": 3.576721429824829, "learning_rate": 4.995075227192979e-06, "loss": 0.3449, "step": 5545 }, { "epoch": 0.3789027806244449, "grad_norm": 4.1746721267700195, "learning_rate": 4.994374907973689e-06, "loss": 0.3675, "step": 5546 }, { "epoch": 0.3789711006353761, "grad_norm": 4.490838050842285, "learning_rate": 4.993674515578821e-06, "loss": 0.3626, "step": 5547 }, { "epoch": 0.3790394206463073, "grad_norm": 3.795316219329834, "learning_rate": 4.992974050042675e-06, "loss": 0.3656, "step": 5548 }, { "epoch": 0.3791077406572385, "grad_norm": 3.1023848056793213, "learning_rate": 4.99227351139955e-06, "loss": 0.2607, "step": 5549 }, { "epoch": 0.3791760606681697, "grad_norm": 3.8972439765930176, "learning_rate": 4.99157289968375e-06, "loss": 0.2857, "step": 5550 }, { "epoch": 0.37924438067910093, "grad_norm": 4.687768459320068, "learning_rate": 4.9908722149295815e-06, "loss": 0.3573, "step": 5551 }, { "epoch": 0.3793127006900321, "grad_norm": 4.369008541107178, "learning_rate": 4.990171457171355e-06, "loss": 0.4243, "step": 5552 }, { "epoch": 0.3793810207009633, "grad_norm": 4.026924133300781, "learning_rate": 4.9894706264433855e-06, "loss": 0.3778, "step": 5553 }, { "epoch": 0.3794493407118945, "grad_norm": 4.2706427574157715, "learning_rate": 4.988769722779989e-06, "loss": 0.4054, "step": 5554 }, { "epoch": 0.3795176607228257, "grad_norm": 4.748632907867432, "learning_rate": 4.98806874621549e-06, "loss": 0.3645, "step": 5555 }, { "epoch": 0.37958598073375693, "grad_norm": 2.6275265216827393, "learning_rate": 4.98736769678421e-06, "loss": 0.2096, "step": 5556 }, { "epoch": 0.37965430074468814, "grad_norm": 3.8253676891326904, "learning_rate": 4.98666657452048e-06, "loss": 0.3616, "step": 5557 }, { "epoch": 0.3797226207556193, "grad_norm": 4.591419219970703, "learning_rate": 4.985965379458631e-06, "loss": 0.3085, "step": 5558 }, { "epoch": 0.3797909407665505, "grad_norm": 4.5111985206604, "learning_rate": 4.985264111633e-06, "loss": 0.4121, "step": 5559 }, { "epoch": 0.3798592607774817, "grad_norm": 4.522039413452148, "learning_rate": 4.984562771077924e-06, "loss": 0.4243, "step": 5560 }, { "epoch": 0.37992758078841293, "grad_norm": 4.868644714355469, "learning_rate": 4.983861357827748e-06, "loss": 0.4121, "step": 5561 }, { "epoch": 0.37999590079934414, "grad_norm": 4.2927985191345215, "learning_rate": 4.983159871916816e-06, "loss": 0.4191, "step": 5562 }, { "epoch": 0.38006422081027535, "grad_norm": 4.101760387420654, "learning_rate": 4.982458313379482e-06, "loss": 0.3933, "step": 5563 }, { "epoch": 0.3801325408212065, "grad_norm": 3.534614324569702, "learning_rate": 4.981756682250093e-06, "loss": 0.376, "step": 5564 }, { "epoch": 0.3802008608321377, "grad_norm": 3.2317168712615967, "learning_rate": 4.981054978563012e-06, "loss": 0.305, "step": 5565 }, { "epoch": 0.38026918084306893, "grad_norm": 4.4267120361328125, "learning_rate": 4.980353202352596e-06, "loss": 0.412, "step": 5566 }, { "epoch": 0.38033750085400014, "grad_norm": 4.100800514221191, "learning_rate": 4.9796513536532115e-06, "loss": 0.3464, "step": 5567 }, { "epoch": 0.38040582086493135, "grad_norm": 3.936444044113159, "learning_rate": 4.978949432499225e-06, "loss": 0.3023, "step": 5568 }, { "epoch": 0.38047414087586257, "grad_norm": 3.6772241592407227, "learning_rate": 4.978247438925008e-06, "loss": 0.3034, "step": 5569 }, { "epoch": 0.3805424608867937, "grad_norm": 3.844546318054199, "learning_rate": 4.977545372964934e-06, "loss": 0.372, "step": 5570 }, { "epoch": 0.38061078089772493, "grad_norm": 3.4876558780670166, "learning_rate": 4.976843234653384e-06, "loss": 0.3883, "step": 5571 }, { "epoch": 0.38067910090865614, "grad_norm": 3.8869824409484863, "learning_rate": 4.976141024024735e-06, "loss": 0.3791, "step": 5572 }, { "epoch": 0.38074742091958735, "grad_norm": 4.810473442077637, "learning_rate": 4.975438741113379e-06, "loss": 0.5294, "step": 5573 }, { "epoch": 0.38081574093051856, "grad_norm": 4.298820972442627, "learning_rate": 4.9747363859537e-06, "loss": 0.4553, "step": 5574 }, { "epoch": 0.3808840609414498, "grad_norm": 4.348569869995117, "learning_rate": 4.974033958580092e-06, "loss": 0.4406, "step": 5575 }, { "epoch": 0.38095238095238093, "grad_norm": 3.623971939086914, "learning_rate": 4.973331459026952e-06, "loss": 0.2709, "step": 5576 }, { "epoch": 0.38102070096331214, "grad_norm": 3.6957807540893555, "learning_rate": 4.9726288873286775e-06, "loss": 0.3466, "step": 5577 }, { "epoch": 0.38108902097424335, "grad_norm": 4.273348808288574, "learning_rate": 4.971926243519673e-06, "loss": 0.2942, "step": 5578 }, { "epoch": 0.38115734098517456, "grad_norm": 5.104150295257568, "learning_rate": 4.971223527634344e-06, "loss": 0.3697, "step": 5579 }, { "epoch": 0.3812256609961058, "grad_norm": 3.8918492794036865, "learning_rate": 4.970520739707102e-06, "loss": 0.3802, "step": 5580 }, { "epoch": 0.381293981007037, "grad_norm": 2.8761117458343506, "learning_rate": 4.96981787977236e-06, "loss": 0.3159, "step": 5581 }, { "epoch": 0.38136230101796814, "grad_norm": 4.175942897796631, "learning_rate": 4.9691149478645366e-06, "loss": 0.4395, "step": 5582 }, { "epoch": 0.38143062102889935, "grad_norm": 2.5900352001190186, "learning_rate": 4.968411944018049e-06, "loss": 0.2817, "step": 5583 }, { "epoch": 0.38149894103983056, "grad_norm": 3.4378395080566406, "learning_rate": 4.967708868267324e-06, "loss": 0.4348, "step": 5584 }, { "epoch": 0.3815672610507618, "grad_norm": 3.6144659519195557, "learning_rate": 4.96700572064679e-06, "loss": 0.3063, "step": 5585 }, { "epoch": 0.381635581061693, "grad_norm": 4.0403242111206055, "learning_rate": 4.966302501190876e-06, "loss": 0.3831, "step": 5586 }, { "epoch": 0.3817039010726242, "grad_norm": 4.88682222366333, "learning_rate": 4.9655992099340185e-06, "loss": 0.4864, "step": 5587 }, { "epoch": 0.38177222108355535, "grad_norm": 4.647435665130615, "learning_rate": 4.964895846910655e-06, "loss": 0.4023, "step": 5588 }, { "epoch": 0.38184054109448656, "grad_norm": 3.969416379928589, "learning_rate": 4.964192412155228e-06, "loss": 0.3862, "step": 5589 }, { "epoch": 0.3819088611054178, "grad_norm": 4.897650241851807, "learning_rate": 4.963488905702182e-06, "loss": 0.4173, "step": 5590 }, { "epoch": 0.381977181116349, "grad_norm": 3.8174691200256348, "learning_rate": 4.962785327585966e-06, "loss": 0.3547, "step": 5591 }, { "epoch": 0.3820455011272802, "grad_norm": 3.4931201934814453, "learning_rate": 4.962081677841034e-06, "loss": 0.3501, "step": 5592 }, { "epoch": 0.3821138211382114, "grad_norm": 4.401615142822266, "learning_rate": 4.96137795650184e-06, "loss": 0.2586, "step": 5593 }, { "epoch": 0.38218214114914256, "grad_norm": 5.413939476013184, "learning_rate": 4.960674163602843e-06, "loss": 0.2564, "step": 5594 }, { "epoch": 0.3822504611600738, "grad_norm": 3.341115713119507, "learning_rate": 4.959970299178507e-06, "loss": 0.3862, "step": 5595 }, { "epoch": 0.382318781171005, "grad_norm": 3.573108434677124, "learning_rate": 4.959266363263299e-06, "loss": 0.3613, "step": 5596 }, { "epoch": 0.3823871011819362, "grad_norm": 4.329796314239502, "learning_rate": 4.958562355891686e-06, "loss": 0.329, "step": 5597 }, { "epoch": 0.3824554211928674, "grad_norm": 3.8489348888397217, "learning_rate": 4.957858277098145e-06, "loss": 0.3354, "step": 5598 }, { "epoch": 0.3825237412037986, "grad_norm": 3.5359790325164795, "learning_rate": 4.957154126917151e-06, "loss": 0.3141, "step": 5599 }, { "epoch": 0.3825920612147298, "grad_norm": 4.208176136016846, "learning_rate": 4.956449905383185e-06, "loss": 0.402, "step": 5600 }, { "epoch": 0.382660381225661, "grad_norm": 4.091917037963867, "learning_rate": 4.955745612530729e-06, "loss": 0.3475, "step": 5601 }, { "epoch": 0.3827287012365922, "grad_norm": 3.388715982437134, "learning_rate": 4.955041248394274e-06, "loss": 0.3153, "step": 5602 }, { "epoch": 0.3827970212475234, "grad_norm": 3.453758478164673, "learning_rate": 4.954336813008307e-06, "loss": 0.2967, "step": 5603 }, { "epoch": 0.3828653412584546, "grad_norm": 4.686208248138428, "learning_rate": 4.953632306407323e-06, "loss": 0.3154, "step": 5604 }, { "epoch": 0.3829336612693858, "grad_norm": 4.208861827850342, "learning_rate": 4.952927728625823e-06, "loss": 0.3138, "step": 5605 }, { "epoch": 0.383001981280317, "grad_norm": 3.8168892860412598, "learning_rate": 4.952223079698306e-06, "loss": 0.238, "step": 5606 }, { "epoch": 0.3830703012912482, "grad_norm": 3.619307518005371, "learning_rate": 4.9515183596592755e-06, "loss": 0.3199, "step": 5607 }, { "epoch": 0.3831386213021794, "grad_norm": 4.026501655578613, "learning_rate": 4.950813568543241e-06, "loss": 0.3176, "step": 5608 }, { "epoch": 0.3832069413131106, "grad_norm": 4.245466709136963, "learning_rate": 4.950108706384716e-06, "loss": 0.4048, "step": 5609 }, { "epoch": 0.3832752613240418, "grad_norm": 5.4862470626831055, "learning_rate": 4.949403773218213e-06, "loss": 0.3448, "step": 5610 }, { "epoch": 0.38334358133497304, "grad_norm": 5.480976104736328, "learning_rate": 4.948698769078252e-06, "loss": 0.4524, "step": 5611 }, { "epoch": 0.3834119013459042, "grad_norm": 2.6124074459075928, "learning_rate": 4.947993693999356e-06, "loss": 0.2579, "step": 5612 }, { "epoch": 0.3834802213568354, "grad_norm": 3.4734854698181152, "learning_rate": 4.947288548016049e-06, "loss": 0.348, "step": 5613 }, { "epoch": 0.3835485413677666, "grad_norm": 4.28365421295166, "learning_rate": 4.94658333116286e-06, "loss": 0.3428, "step": 5614 }, { "epoch": 0.3836168613786978, "grad_norm": 4.0353569984436035, "learning_rate": 4.945878043474324e-06, "loss": 0.3101, "step": 5615 }, { "epoch": 0.38368518138962904, "grad_norm": 4.514856815338135, "learning_rate": 4.945172684984975e-06, "loss": 0.3889, "step": 5616 }, { "epoch": 0.38375350140056025, "grad_norm": 4.55754280090332, "learning_rate": 4.944467255729352e-06, "loss": 0.48, "step": 5617 }, { "epoch": 0.3838218214114914, "grad_norm": 3.435978889465332, "learning_rate": 4.943761755742e-06, "loss": 0.2704, "step": 5618 }, { "epoch": 0.3838901414224226, "grad_norm": 3.754690170288086, "learning_rate": 4.943056185057464e-06, "loss": 0.3126, "step": 5619 }, { "epoch": 0.3839584614333538, "grad_norm": 5.113699913024902, "learning_rate": 4.942350543710294e-06, "loss": 0.3398, "step": 5620 }, { "epoch": 0.38402678144428504, "grad_norm": 4.293787002563477, "learning_rate": 4.941644831735044e-06, "loss": 0.3842, "step": 5621 }, { "epoch": 0.38409510145521625, "grad_norm": 3.4330179691314697, "learning_rate": 4.940939049166272e-06, "loss": 0.258, "step": 5622 }, { "epoch": 0.38416342146614746, "grad_norm": 3.832106113433838, "learning_rate": 4.940233196038535e-06, "loss": 0.3474, "step": 5623 }, { "epoch": 0.3842317414770786, "grad_norm": 3.1060426235198975, "learning_rate": 4.939527272386401e-06, "loss": 0.3383, "step": 5624 }, { "epoch": 0.3843000614880098, "grad_norm": 3.8902900218963623, "learning_rate": 4.938821278244434e-06, "loss": 0.4923, "step": 5625 }, { "epoch": 0.38436838149894104, "grad_norm": 4.379792213439941, "learning_rate": 4.938115213647204e-06, "loss": 0.4662, "step": 5626 }, { "epoch": 0.38443670150987225, "grad_norm": 4.777813911437988, "learning_rate": 4.9374090786292875e-06, "loss": 0.4295, "step": 5627 }, { "epoch": 0.38450502152080346, "grad_norm": 3.3249258995056152, "learning_rate": 4.936702873225262e-06, "loss": 0.2729, "step": 5628 }, { "epoch": 0.38457334153173467, "grad_norm": 4.060474872589111, "learning_rate": 4.935996597469708e-06, "loss": 0.3144, "step": 5629 }, { "epoch": 0.3846416615426658, "grad_norm": 2.9664297103881836, "learning_rate": 4.935290251397208e-06, "loss": 0.353, "step": 5630 }, { "epoch": 0.38470998155359704, "grad_norm": 5.718914985656738, "learning_rate": 4.9345838350423526e-06, "loss": 0.3713, "step": 5631 }, { "epoch": 0.38477830156452825, "grad_norm": 4.682794094085693, "learning_rate": 4.933877348439733e-06, "loss": 0.4119, "step": 5632 }, { "epoch": 0.38484662157545946, "grad_norm": 4.791052341461182, "learning_rate": 4.9331707916239415e-06, "loss": 0.4328, "step": 5633 }, { "epoch": 0.38491494158639067, "grad_norm": 3.997450590133667, "learning_rate": 4.932464164629579e-06, "loss": 0.3811, "step": 5634 }, { "epoch": 0.3849832615973219, "grad_norm": 4.386481761932373, "learning_rate": 4.931757467491245e-06, "loss": 0.3188, "step": 5635 }, { "epoch": 0.38505158160825304, "grad_norm": 3.139702558517456, "learning_rate": 4.931050700243546e-06, "loss": 0.3461, "step": 5636 }, { "epoch": 0.38511990161918425, "grad_norm": 2.8768723011016846, "learning_rate": 4.930343862921091e-06, "loss": 0.2788, "step": 5637 }, { "epoch": 0.38518822163011546, "grad_norm": 4.336355686187744, "learning_rate": 4.929636955558491e-06, "loss": 0.3892, "step": 5638 }, { "epoch": 0.38525654164104667, "grad_norm": 3.313427686691284, "learning_rate": 4.928929978190361e-06, "loss": 0.3527, "step": 5639 }, { "epoch": 0.3853248616519779, "grad_norm": 3.685075283050537, "learning_rate": 4.9282229308513186e-06, "loss": 0.4709, "step": 5640 }, { "epoch": 0.3853931816629091, "grad_norm": 3.8022048473358154, "learning_rate": 4.92751581357599e-06, "loss": 0.4009, "step": 5641 }, { "epoch": 0.38546150167384025, "grad_norm": 4.167503356933594, "learning_rate": 4.926808626398998e-06, "loss": 0.476, "step": 5642 }, { "epoch": 0.38552982168477146, "grad_norm": 4.782937049865723, "learning_rate": 4.926101369354972e-06, "loss": 0.4312, "step": 5643 }, { "epoch": 0.38559814169570267, "grad_norm": 4.5706682205200195, "learning_rate": 4.9253940424785445e-06, "loss": 0.3965, "step": 5644 }, { "epoch": 0.3856664617066339, "grad_norm": 3.6641950607299805, "learning_rate": 4.924686645804354e-06, "loss": 0.4, "step": 5645 }, { "epoch": 0.3857347817175651, "grad_norm": 3.859206438064575, "learning_rate": 4.923979179367035e-06, "loss": 0.4731, "step": 5646 }, { "epoch": 0.3858031017284963, "grad_norm": 2.9702868461608887, "learning_rate": 4.9232716432012335e-06, "loss": 0.3119, "step": 5647 }, { "epoch": 0.38587142173942746, "grad_norm": 4.096439361572266, "learning_rate": 4.922564037341595e-06, "loss": 0.374, "step": 5648 }, { "epoch": 0.38593974175035867, "grad_norm": 2.8460052013397217, "learning_rate": 4.921856361822769e-06, "loss": 0.2435, "step": 5649 }, { "epoch": 0.3860080617612899, "grad_norm": 4.534144878387451, "learning_rate": 4.921148616679408e-06, "loss": 0.4931, "step": 5650 }, { "epoch": 0.3860763817722211, "grad_norm": 3.79508900642395, "learning_rate": 4.920440801946169e-06, "loss": 0.3322, "step": 5651 }, { "epoch": 0.3861447017831523, "grad_norm": 4.2144060134887695, "learning_rate": 4.919732917657713e-06, "loss": 0.3624, "step": 5652 }, { "epoch": 0.3862130217940835, "grad_norm": 4.265219211578369, "learning_rate": 4.919024963848701e-06, "loss": 0.5489, "step": 5653 }, { "epoch": 0.38628134180501467, "grad_norm": 3.749866247177124, "learning_rate": 4.9183169405537995e-06, "loss": 0.3081, "step": 5654 }, { "epoch": 0.3863496618159459, "grad_norm": 4.748652458190918, "learning_rate": 4.917608847807681e-06, "loss": 0.418, "step": 5655 }, { "epoch": 0.3864179818268771, "grad_norm": 5.001226425170898, "learning_rate": 4.916900685645018e-06, "loss": 0.4487, "step": 5656 }, { "epoch": 0.3864863018378083, "grad_norm": 3.68410325050354, "learning_rate": 4.916192454100486e-06, "loss": 0.3699, "step": 5657 }, { "epoch": 0.3865546218487395, "grad_norm": 4.075516700744629, "learning_rate": 4.915484153208766e-06, "loss": 0.4081, "step": 5658 }, { "epoch": 0.3866229418596707, "grad_norm": 4.889416217803955, "learning_rate": 4.9147757830045425e-06, "loss": 0.3874, "step": 5659 }, { "epoch": 0.3866912618706019, "grad_norm": 3.181549310684204, "learning_rate": 4.9140673435225e-06, "loss": 0.3789, "step": 5660 }, { "epoch": 0.3867595818815331, "grad_norm": 3.6656861305236816, "learning_rate": 4.913358834797331e-06, "loss": 0.3906, "step": 5661 }, { "epoch": 0.3868279018924643, "grad_norm": 3.862711191177368, "learning_rate": 4.912650256863729e-06, "loss": 0.3562, "step": 5662 }, { "epoch": 0.3868962219033955, "grad_norm": 3.406724452972412, "learning_rate": 4.911941609756393e-06, "loss": 0.3225, "step": 5663 }, { "epoch": 0.3869645419143267, "grad_norm": 3.615893602371216, "learning_rate": 4.911232893510019e-06, "loss": 0.3797, "step": 5664 }, { "epoch": 0.38703286192525793, "grad_norm": 3.925525188446045, "learning_rate": 4.910524108159316e-06, "loss": 0.3786, "step": 5665 }, { "epoch": 0.3871011819361891, "grad_norm": 4.023923873901367, "learning_rate": 4.909815253738987e-06, "loss": 0.3044, "step": 5666 }, { "epoch": 0.3871695019471203, "grad_norm": 3.8434290885925293, "learning_rate": 4.909106330283745e-06, "loss": 0.2734, "step": 5667 }, { "epoch": 0.3872378219580515, "grad_norm": 3.064014196395874, "learning_rate": 4.9083973378283045e-06, "loss": 0.3986, "step": 5668 }, { "epoch": 0.3873061419689827, "grad_norm": 3.357746124267578, "learning_rate": 4.907688276407381e-06, "loss": 0.3911, "step": 5669 }, { "epoch": 0.38737446197991393, "grad_norm": 3.697157621383667, "learning_rate": 4.906979146055698e-06, "loss": 0.2402, "step": 5670 }, { "epoch": 0.38744278199084514, "grad_norm": 4.126026153564453, "learning_rate": 4.906269946807978e-06, "loss": 0.4448, "step": 5671 }, { "epoch": 0.3875111020017763, "grad_norm": 5.536336898803711, "learning_rate": 4.905560678698948e-06, "loss": 0.3265, "step": 5672 }, { "epoch": 0.3875794220127075, "grad_norm": 4.7823710441589355, "learning_rate": 4.90485134176334e-06, "loss": 0.4043, "step": 5673 }, { "epoch": 0.3876477420236387, "grad_norm": 3.1668620109558105, "learning_rate": 4.904141936035889e-06, "loss": 0.2727, "step": 5674 }, { "epoch": 0.38771606203456993, "grad_norm": 4.776108741760254, "learning_rate": 4.9034324615513315e-06, "loss": 0.3797, "step": 5675 }, { "epoch": 0.38778438204550114, "grad_norm": 4.034736633300781, "learning_rate": 4.902722918344408e-06, "loss": 0.3856, "step": 5676 }, { "epoch": 0.38785270205643235, "grad_norm": 3.9172747135162354, "learning_rate": 4.902013306449866e-06, "loss": 0.3186, "step": 5677 }, { "epoch": 0.3879210220673635, "grad_norm": 3.7382333278656006, "learning_rate": 4.901303625902451e-06, "loss": 0.4002, "step": 5678 }, { "epoch": 0.3879893420782947, "grad_norm": 3.9274840354919434, "learning_rate": 4.900593876736914e-06, "loss": 0.4101, "step": 5679 }, { "epoch": 0.38805766208922593, "grad_norm": 3.8290445804595947, "learning_rate": 4.89988405898801e-06, "loss": 0.249, "step": 5680 }, { "epoch": 0.38812598210015714, "grad_norm": 4.677832126617432, "learning_rate": 4.8991741726904965e-06, "loss": 0.4282, "step": 5681 }, { "epoch": 0.38819430211108835, "grad_norm": 3.625821352005005, "learning_rate": 4.898464217879136e-06, "loss": 0.2601, "step": 5682 }, { "epoch": 0.38826262212201956, "grad_norm": 4.951064586639404, "learning_rate": 4.897754194588691e-06, "loss": 0.4396, "step": 5683 }, { "epoch": 0.3883309421329507, "grad_norm": 4.212857723236084, "learning_rate": 4.897044102853932e-06, "loss": 0.4324, "step": 5684 }, { "epoch": 0.38839926214388193, "grad_norm": 4.70280647277832, "learning_rate": 4.896333942709629e-06, "loss": 0.4053, "step": 5685 }, { "epoch": 0.38846758215481314, "grad_norm": 4.965171813964844, "learning_rate": 4.895623714190556e-06, "loss": 0.4258, "step": 5686 }, { "epoch": 0.38853590216574435, "grad_norm": 3.6575348377227783, "learning_rate": 4.894913417331492e-06, "loss": 0.3963, "step": 5687 }, { "epoch": 0.38860422217667556, "grad_norm": 4.635013103485107, "learning_rate": 4.894203052167217e-06, "loss": 0.4374, "step": 5688 }, { "epoch": 0.3886725421876068, "grad_norm": 4.779440402984619, "learning_rate": 4.893492618732518e-06, "loss": 0.4077, "step": 5689 }, { "epoch": 0.38874086219853793, "grad_norm": 2.7787773609161377, "learning_rate": 4.892782117062181e-06, "loss": 0.2985, "step": 5690 }, { "epoch": 0.38880918220946914, "grad_norm": 4.350784778594971, "learning_rate": 4.8920715471909995e-06, "loss": 0.3893, "step": 5691 }, { "epoch": 0.38887750222040035, "grad_norm": 2.898452043533325, "learning_rate": 4.891360909153766e-06, "loss": 0.3534, "step": 5692 }, { "epoch": 0.38894582223133156, "grad_norm": 3.344496488571167, "learning_rate": 4.890650202985279e-06, "loss": 0.3041, "step": 5693 }, { "epoch": 0.3890141422422628, "grad_norm": 3.7986900806427, "learning_rate": 4.889939428720341e-06, "loss": 0.3337, "step": 5694 }, { "epoch": 0.389082462253194, "grad_norm": 2.659838914871216, "learning_rate": 4.889228586393757e-06, "loss": 0.3442, "step": 5695 }, { "epoch": 0.38915078226412514, "grad_norm": 3.8196604251861572, "learning_rate": 4.8885176760403315e-06, "loss": 0.451, "step": 5696 }, { "epoch": 0.38921910227505635, "grad_norm": 3.8014793395996094, "learning_rate": 4.887806697694881e-06, "loss": 0.2956, "step": 5697 }, { "epoch": 0.38928742228598756, "grad_norm": 3.47531795501709, "learning_rate": 4.887095651392216e-06, "loss": 0.3031, "step": 5698 }, { "epoch": 0.38935574229691877, "grad_norm": 4.509604454040527, "learning_rate": 4.886384537167157e-06, "loss": 0.3816, "step": 5699 }, { "epoch": 0.38942406230785, "grad_norm": 3.5403621196746826, "learning_rate": 4.885673355054525e-06, "loss": 0.3507, "step": 5700 }, { "epoch": 0.3894923823187812, "grad_norm": 3.6797890663146973, "learning_rate": 4.884962105089144e-06, "loss": 0.4107, "step": 5701 }, { "epoch": 0.38956070232971235, "grad_norm": 2.6321423053741455, "learning_rate": 4.8842507873058436e-06, "loss": 0.3915, "step": 5702 }, { "epoch": 0.38962902234064356, "grad_norm": 2.1747868061065674, "learning_rate": 4.883539401739453e-06, "loss": 0.301, "step": 5703 }, { "epoch": 0.38969734235157477, "grad_norm": 3.2915186882019043, "learning_rate": 4.882827948424807e-06, "loss": 0.414, "step": 5704 }, { "epoch": 0.389765662362506, "grad_norm": 4.916965007781982, "learning_rate": 4.882116427396746e-06, "loss": 0.3937, "step": 5705 }, { "epoch": 0.3898339823734372, "grad_norm": 3.744410753250122, "learning_rate": 4.881404838690108e-06, "loss": 0.3835, "step": 5706 }, { "epoch": 0.3899023023843684, "grad_norm": 4.334160804748535, "learning_rate": 4.88069318233974e-06, "loss": 0.5066, "step": 5707 }, { "epoch": 0.38997062239529956, "grad_norm": 3.589916706085205, "learning_rate": 4.87998145838049e-06, "loss": 0.3855, "step": 5708 }, { "epoch": 0.39003894240623077, "grad_norm": 4.3981523513793945, "learning_rate": 4.879269666847207e-06, "loss": 0.3892, "step": 5709 }, { "epoch": 0.390107262417162, "grad_norm": 2.771691083908081, "learning_rate": 4.878557807774747e-06, "loss": 0.352, "step": 5710 }, { "epoch": 0.3901755824280932, "grad_norm": 4.414819717407227, "learning_rate": 4.877845881197969e-06, "loss": 0.3232, "step": 5711 }, { "epoch": 0.3902439024390244, "grad_norm": 4.331627368927002, "learning_rate": 4.877133887151733e-06, "loss": 0.3702, "step": 5712 }, { "epoch": 0.3903122224499556, "grad_norm": 4.314086437225342, "learning_rate": 4.876421825670903e-06, "loss": 0.4532, "step": 5713 }, { "epoch": 0.39038054246088677, "grad_norm": 3.1325900554656982, "learning_rate": 4.875709696790348e-06, "loss": 0.2861, "step": 5714 }, { "epoch": 0.390448862471818, "grad_norm": 3.746713638305664, "learning_rate": 4.8749975005449375e-06, "loss": 0.2851, "step": 5715 }, { "epoch": 0.3905171824827492, "grad_norm": 4.586487293243408, "learning_rate": 4.874285236969546e-06, "loss": 0.3146, "step": 5716 }, { "epoch": 0.3905855024936804, "grad_norm": 3.863356113433838, "learning_rate": 4.873572906099054e-06, "loss": 0.3869, "step": 5717 }, { "epoch": 0.3906538225046116, "grad_norm": 4.29858922958374, "learning_rate": 4.872860507968339e-06, "loss": 0.3891, "step": 5718 }, { "epoch": 0.3907221425155428, "grad_norm": 3.9155640602111816, "learning_rate": 4.872148042612285e-06, "loss": 0.4024, "step": 5719 }, { "epoch": 0.390790462526474, "grad_norm": 3.7795050144195557, "learning_rate": 4.871435510065783e-06, "loss": 0.3618, "step": 5720 }, { "epoch": 0.3908587825374052, "grad_norm": 4.335015296936035, "learning_rate": 4.870722910363721e-06, "loss": 0.4197, "step": 5721 }, { "epoch": 0.3909271025483364, "grad_norm": 4.073315143585205, "learning_rate": 4.870010243540996e-06, "loss": 0.3626, "step": 5722 }, { "epoch": 0.3909954225592676, "grad_norm": 4.1855549812316895, "learning_rate": 4.869297509632501e-06, "loss": 0.3939, "step": 5723 }, { "epoch": 0.3910637425701988, "grad_norm": 3.5498900413513184, "learning_rate": 4.868584708673141e-06, "loss": 0.316, "step": 5724 }, { "epoch": 0.39113206258113004, "grad_norm": 3.6777775287628174, "learning_rate": 4.867871840697816e-06, "loss": 0.3525, "step": 5725 }, { "epoch": 0.3912003825920612, "grad_norm": 2.5319344997406006, "learning_rate": 4.867158905741436e-06, "loss": 0.3064, "step": 5726 }, { "epoch": 0.3912687026029924, "grad_norm": 3.764664649963379, "learning_rate": 4.8664459038389115e-06, "loss": 0.3557, "step": 5727 }, { "epoch": 0.3913370226139236, "grad_norm": 4.599119663238525, "learning_rate": 4.865732835025156e-06, "loss": 0.403, "step": 5728 }, { "epoch": 0.3914053426248548, "grad_norm": 3.2690632343292236, "learning_rate": 4.865019699335085e-06, "loss": 0.2549, "step": 5729 }, { "epoch": 0.39147366263578603, "grad_norm": 3.234666347503662, "learning_rate": 4.864306496803621e-06, "loss": 0.334, "step": 5730 }, { "epoch": 0.39154198264671725, "grad_norm": 4.207328796386719, "learning_rate": 4.863593227465686e-06, "loss": 0.3812, "step": 5731 }, { "epoch": 0.3916103026576484, "grad_norm": 3.5472240447998047, "learning_rate": 4.862879891356208e-06, "loss": 0.4207, "step": 5732 }, { "epoch": 0.3916786226685796, "grad_norm": 3.069300889968872, "learning_rate": 4.862166488510117e-06, "loss": 0.2971, "step": 5733 }, { "epoch": 0.3917469426795108, "grad_norm": 4.8857855796813965, "learning_rate": 4.861453018962348e-06, "loss": 0.5126, "step": 5734 }, { "epoch": 0.39181526269044203, "grad_norm": 4.080052375793457, "learning_rate": 4.860739482747833e-06, "loss": 0.3499, "step": 5735 }, { "epoch": 0.39188358270137325, "grad_norm": 2.558030128479004, "learning_rate": 4.860025879901516e-06, "loss": 0.271, "step": 5736 }, { "epoch": 0.39195190271230446, "grad_norm": 3.715494155883789, "learning_rate": 4.85931221045834e-06, "loss": 0.4207, "step": 5737 }, { "epoch": 0.3920202227232356, "grad_norm": 5.339147567749023, "learning_rate": 4.85859847445325e-06, "loss": 0.3516, "step": 5738 }, { "epoch": 0.3920885427341668, "grad_norm": 3.326939821243286, "learning_rate": 4.857884671921196e-06, "loss": 0.3253, "step": 5739 }, { "epoch": 0.39215686274509803, "grad_norm": 3.7156386375427246, "learning_rate": 4.857170802897132e-06, "loss": 0.3149, "step": 5740 }, { "epoch": 0.39222518275602924, "grad_norm": 4.663715362548828, "learning_rate": 4.856456867416014e-06, "loss": 0.3917, "step": 5741 }, { "epoch": 0.39229350276696046, "grad_norm": 3.762650966644287, "learning_rate": 4.855742865512801e-06, "loss": 0.4388, "step": 5742 }, { "epoch": 0.39236182277789167, "grad_norm": 3.951612949371338, "learning_rate": 4.8550287972224565e-06, "loss": 0.3061, "step": 5743 }, { "epoch": 0.3924301427888228, "grad_norm": 3.6725518703460693, "learning_rate": 4.854314662579946e-06, "loss": 0.349, "step": 5744 }, { "epoch": 0.39249846279975403, "grad_norm": 3.1469128131866455, "learning_rate": 4.8536004616202385e-06, "loss": 0.4246, "step": 5745 }, { "epoch": 0.39256678281068524, "grad_norm": 4.091678142547607, "learning_rate": 4.852886194378306e-06, "loss": 0.4199, "step": 5746 }, { "epoch": 0.39263510282161646, "grad_norm": 3.8193490505218506, "learning_rate": 4.852171860889127e-06, "loss": 0.2964, "step": 5747 }, { "epoch": 0.39270342283254767, "grad_norm": 2.8578553199768066, "learning_rate": 4.851457461187679e-06, "loss": 0.2893, "step": 5748 }, { "epoch": 0.3927717428434789, "grad_norm": 3.572598457336426, "learning_rate": 4.850742995308942e-06, "loss": 0.2821, "step": 5749 }, { "epoch": 0.39284006285441003, "grad_norm": 3.566821336746216, "learning_rate": 4.8500284632879045e-06, "loss": 0.3352, "step": 5750 }, { "epoch": 0.39290838286534124, "grad_norm": 4.2642741203308105, "learning_rate": 4.849313865159554e-06, "loss": 0.3921, "step": 5751 }, { "epoch": 0.39297670287627245, "grad_norm": 4.469193458557129, "learning_rate": 4.848599200958882e-06, "loss": 0.3607, "step": 5752 }, { "epoch": 0.39304502288720367, "grad_norm": 3.4370803833007812, "learning_rate": 4.847884470720886e-06, "loss": 0.3939, "step": 5753 }, { "epoch": 0.3931133428981349, "grad_norm": 3.8411827087402344, "learning_rate": 4.8471696744805614e-06, "loss": 0.3687, "step": 5754 }, { "epoch": 0.3931816629090661, "grad_norm": 3.307778835296631, "learning_rate": 4.8464548122729116e-06, "loss": 0.2423, "step": 5755 }, { "epoch": 0.39324998291999724, "grad_norm": 3.510533094406128, "learning_rate": 4.845739884132941e-06, "loss": 0.3534, "step": 5756 }, { "epoch": 0.39331830293092845, "grad_norm": 4.1402459144592285, "learning_rate": 4.8450248900956585e-06, "loss": 0.4097, "step": 5757 }, { "epoch": 0.39338662294185966, "grad_norm": 3.8767428398132324, "learning_rate": 4.844309830196074e-06, "loss": 0.3459, "step": 5758 }, { "epoch": 0.3934549429527909, "grad_norm": 3.063565254211426, "learning_rate": 4.843594704469204e-06, "loss": 0.3038, "step": 5759 }, { "epoch": 0.3935232629637221, "grad_norm": 4.278714179992676, "learning_rate": 4.842879512950065e-06, "loss": 0.4249, "step": 5760 }, { "epoch": 0.3935915829746533, "grad_norm": 2.828308582305908, "learning_rate": 4.842164255673678e-06, "loss": 0.3189, "step": 5761 }, { "epoch": 0.39365990298558445, "grad_norm": 3.6541619300842285, "learning_rate": 4.8414489326750674e-06, "loss": 0.4692, "step": 5762 }, { "epoch": 0.39372822299651566, "grad_norm": 4.199499607086182, "learning_rate": 4.840733543989261e-06, "loss": 0.4139, "step": 5763 }, { "epoch": 0.3937965430074469, "grad_norm": 3.794532537460327, "learning_rate": 4.8400180896512905e-06, "loss": 0.3882, "step": 5764 }, { "epoch": 0.3938648630183781, "grad_norm": 4.3950324058532715, "learning_rate": 4.839302569696186e-06, "loss": 0.4893, "step": 5765 }, { "epoch": 0.3939331830293093, "grad_norm": 3.000317096710205, "learning_rate": 4.8385869841589906e-06, "loss": 0.3056, "step": 5766 }, { "epoch": 0.3940015030402405, "grad_norm": 3.4931230545043945, "learning_rate": 4.837871333074739e-06, "loss": 0.3036, "step": 5767 }, { "epoch": 0.39406982305117166, "grad_norm": 2.6954994201660156, "learning_rate": 4.837155616478477e-06, "loss": 0.245, "step": 5768 }, { "epoch": 0.3941381430621029, "grad_norm": 3.4523684978485107, "learning_rate": 4.836439834405252e-06, "loss": 0.3679, "step": 5769 }, { "epoch": 0.3942064630730341, "grad_norm": 2.8400604724884033, "learning_rate": 4.835723986890114e-06, "loss": 0.3365, "step": 5770 }, { "epoch": 0.3942747830839653, "grad_norm": 3.1416096687316895, "learning_rate": 4.835008073968115e-06, "loss": 0.3378, "step": 5771 }, { "epoch": 0.3943431030948965, "grad_norm": 4.557987689971924, "learning_rate": 4.8342920956743105e-06, "loss": 0.4708, "step": 5772 }, { "epoch": 0.3944114231058277, "grad_norm": 3.3857533931732178, "learning_rate": 4.833576052043762e-06, "loss": 0.2528, "step": 5773 }, { "epoch": 0.3944797431167589, "grad_norm": 3.74888277053833, "learning_rate": 4.832859943111532e-06, "loss": 0.3311, "step": 5774 }, { "epoch": 0.3945480631276901, "grad_norm": 4.3538713455200195, "learning_rate": 4.832143768912687e-06, "loss": 0.3126, "step": 5775 }, { "epoch": 0.3946163831386213, "grad_norm": 3.906379222869873, "learning_rate": 4.831427529482294e-06, "loss": 0.3476, "step": 5776 }, { "epoch": 0.3946847031495525, "grad_norm": 4.811717987060547, "learning_rate": 4.8307112248554264e-06, "loss": 0.4619, "step": 5777 }, { "epoch": 0.3947530231604837, "grad_norm": 3.7107162475585938, "learning_rate": 4.82999485506716e-06, "loss": 0.331, "step": 5778 }, { "epoch": 0.39482134317141493, "grad_norm": 3.5716025829315186, "learning_rate": 4.829278420152574e-06, "loss": 0.3106, "step": 5779 }, { "epoch": 0.3948896631823461, "grad_norm": 4.773961544036865, "learning_rate": 4.828561920146749e-06, "loss": 0.3283, "step": 5780 }, { "epoch": 0.3949579831932773, "grad_norm": 4.998812675476074, "learning_rate": 4.827845355084773e-06, "loss": 0.4065, "step": 5781 }, { "epoch": 0.3950263032042085, "grad_norm": 3.546442747116089, "learning_rate": 4.827128725001729e-06, "loss": 0.3015, "step": 5782 }, { "epoch": 0.3950946232151397, "grad_norm": 5.067137718200684, "learning_rate": 4.826412029932714e-06, "loss": 0.4106, "step": 5783 }, { "epoch": 0.39516294322607093, "grad_norm": 2.920971632003784, "learning_rate": 4.825695269912819e-06, "loss": 0.3124, "step": 5784 }, { "epoch": 0.39523126323700214, "grad_norm": 4.032855987548828, "learning_rate": 4.824978444977143e-06, "loss": 0.3585, "step": 5785 }, { "epoch": 0.3952995832479333, "grad_norm": 4.346636772155762, "learning_rate": 4.824261555160788e-06, "loss": 0.4168, "step": 5786 }, { "epoch": 0.3953679032588645, "grad_norm": 5.813634395599365, "learning_rate": 4.823544600498858e-06, "loss": 0.4223, "step": 5787 }, { "epoch": 0.3954362232697957, "grad_norm": 3.8673574924468994, "learning_rate": 4.822827581026458e-06, "loss": 0.3597, "step": 5788 }, { "epoch": 0.39550454328072693, "grad_norm": 3.5913033485412598, "learning_rate": 4.8221104967787e-06, "loss": 0.2802, "step": 5789 }, { "epoch": 0.39557286329165814, "grad_norm": 3.2163097858428955, "learning_rate": 4.821393347790699e-06, "loss": 0.2989, "step": 5790 }, { "epoch": 0.39564118330258935, "grad_norm": 4.82466459274292, "learning_rate": 4.820676134097572e-06, "loss": 0.3697, "step": 5791 }, { "epoch": 0.3957095033135205, "grad_norm": 4.2116594314575195, "learning_rate": 4.819958855734435e-06, "loss": 0.349, "step": 5792 }, { "epoch": 0.3957778233244517, "grad_norm": 3.6796555519104004, "learning_rate": 4.8192415127364166e-06, "loss": 0.2644, "step": 5793 }, { "epoch": 0.3958461433353829, "grad_norm": 4.710118293762207, "learning_rate": 4.818524105138639e-06, "loss": 0.4117, "step": 5794 }, { "epoch": 0.39591446334631414, "grad_norm": 3.5503547191619873, "learning_rate": 4.8178066329762345e-06, "loss": 0.2748, "step": 5795 }, { "epoch": 0.39598278335724535, "grad_norm": 3.8382880687713623, "learning_rate": 4.817089096284335e-06, "loss": 0.4561, "step": 5796 }, { "epoch": 0.39605110336817656, "grad_norm": 3.5357487201690674, "learning_rate": 4.816371495098076e-06, "loss": 0.3215, "step": 5797 }, { "epoch": 0.3961194233791077, "grad_norm": 4.07682466506958, "learning_rate": 4.8156538294525956e-06, "loss": 0.4389, "step": 5798 }, { "epoch": 0.3961877433900389, "grad_norm": 9.186925888061523, "learning_rate": 4.814936099383038e-06, "loss": 0.4207, "step": 5799 }, { "epoch": 0.39625606340097014, "grad_norm": 3.104360818862915, "learning_rate": 4.814218304924547e-06, "loss": 0.2505, "step": 5800 }, { "epoch": 0.39632438341190135, "grad_norm": 4.060421466827393, "learning_rate": 4.8135004461122715e-06, "loss": 0.3484, "step": 5801 }, { "epoch": 0.39639270342283256, "grad_norm": 3.7467901706695557, "learning_rate": 4.812782522981363e-06, "loss": 0.3282, "step": 5802 }, { "epoch": 0.39646102343376377, "grad_norm": 3.9046826362609863, "learning_rate": 4.8120645355669774e-06, "loss": 0.4223, "step": 5803 }, { "epoch": 0.3965293434446949, "grad_norm": 4.105795860290527, "learning_rate": 4.811346483904271e-06, "loss": 0.3577, "step": 5804 }, { "epoch": 0.39659766345562614, "grad_norm": 3.8393189907073975, "learning_rate": 4.810628368028407e-06, "loss": 0.3694, "step": 5805 }, { "epoch": 0.39666598346655735, "grad_norm": 4.092180252075195, "learning_rate": 4.809910187974547e-06, "loss": 0.3593, "step": 5806 }, { "epoch": 0.39673430347748856, "grad_norm": 4.6415629386901855, "learning_rate": 4.809191943777862e-06, "loss": 0.4127, "step": 5807 }, { "epoch": 0.39680262348841977, "grad_norm": 4.165136337280273, "learning_rate": 4.808473635473517e-06, "loss": 0.4349, "step": 5808 }, { "epoch": 0.396870943499351, "grad_norm": 4.197314739227295, "learning_rate": 4.807755263096691e-06, "loss": 0.3332, "step": 5809 }, { "epoch": 0.39693926351028214, "grad_norm": 4.022213459014893, "learning_rate": 4.807036826682558e-06, "loss": 0.3611, "step": 5810 }, { "epoch": 0.39700758352121335, "grad_norm": 3.4408085346221924, "learning_rate": 4.8063183262662985e-06, "loss": 0.3125, "step": 5811 }, { "epoch": 0.39707590353214456, "grad_norm": 5.135282039642334, "learning_rate": 4.805599761883096e-06, "loss": 0.4092, "step": 5812 }, { "epoch": 0.39714422354307577, "grad_norm": 4.731851577758789, "learning_rate": 4.804881133568137e-06, "loss": 0.4192, "step": 5813 }, { "epoch": 0.397212543554007, "grad_norm": 4.260194301605225, "learning_rate": 4.804162441356609e-06, "loss": 0.379, "step": 5814 }, { "epoch": 0.3972808635649382, "grad_norm": 3.225376605987549, "learning_rate": 4.8034436852837065e-06, "loss": 0.2498, "step": 5815 }, { "epoch": 0.39734918357586935, "grad_norm": 3.0622589588165283, "learning_rate": 4.802724865384623e-06, "loss": 0.3711, "step": 5816 }, { "epoch": 0.39741750358680056, "grad_norm": 3.9728589057922363, "learning_rate": 4.80200598169456e-06, "loss": 0.3558, "step": 5817 }, { "epoch": 0.39748582359773177, "grad_norm": 3.878202199935913, "learning_rate": 4.801287034248715e-06, "loss": 0.4788, "step": 5818 }, { "epoch": 0.397554143608663, "grad_norm": 3.4482436180114746, "learning_rate": 4.8005680230822985e-06, "loss": 0.2848, "step": 5819 }, { "epoch": 0.3976224636195942, "grad_norm": 3.4446465969085693, "learning_rate": 4.799848948230515e-06, "loss": 0.3508, "step": 5820 }, { "epoch": 0.3976907836305254, "grad_norm": 4.070770263671875, "learning_rate": 4.799129809728576e-06, "loss": 0.3766, "step": 5821 }, { "epoch": 0.39775910364145656, "grad_norm": 3.4298455715179443, "learning_rate": 4.7984106076116956e-06, "loss": 0.3827, "step": 5822 }, { "epoch": 0.39782742365238777, "grad_norm": 3.4714267253875732, "learning_rate": 4.797691341915093e-06, "loss": 0.2403, "step": 5823 }, { "epoch": 0.397895743663319, "grad_norm": 6.595193386077881, "learning_rate": 4.796972012673988e-06, "loss": 0.4, "step": 5824 }, { "epoch": 0.3979640636742502, "grad_norm": 3.4710278511047363, "learning_rate": 4.7962526199236025e-06, "loss": 0.4002, "step": 5825 }, { "epoch": 0.3980323836851814, "grad_norm": 3.343848943710327, "learning_rate": 4.795533163699165e-06, "loss": 0.2902, "step": 5826 }, { "epoch": 0.3981007036961126, "grad_norm": 3.504700183868408, "learning_rate": 4.794813644035906e-06, "loss": 0.3044, "step": 5827 }, { "epoch": 0.39816902370704377, "grad_norm": 3.2626705169677734, "learning_rate": 4.794094060969055e-06, "loss": 0.3596, "step": 5828 }, { "epoch": 0.398237343717975, "grad_norm": 3.993818998336792, "learning_rate": 4.793374414533852e-06, "loss": 0.4729, "step": 5829 }, { "epoch": 0.3983056637289062, "grad_norm": 3.926631212234497, "learning_rate": 4.7926547047655356e-06, "loss": 0.3634, "step": 5830 }, { "epoch": 0.3983739837398374, "grad_norm": 3.2005860805511475, "learning_rate": 4.7919349316993455e-06, "loss": 0.2844, "step": 5831 }, { "epoch": 0.3984423037507686, "grad_norm": 3.740610361099243, "learning_rate": 4.79121509537053e-06, "loss": 0.4264, "step": 5832 }, { "epoch": 0.3985106237616998, "grad_norm": 4.042945861816406, "learning_rate": 4.790495195814336e-06, "loss": 0.3781, "step": 5833 }, { "epoch": 0.398578943772631, "grad_norm": 4.0624847412109375, "learning_rate": 4.789775233066015e-06, "loss": 0.2947, "step": 5834 }, { "epoch": 0.3986472637835622, "grad_norm": 3.7534356117248535, "learning_rate": 4.789055207160822e-06, "loss": 0.3818, "step": 5835 }, { "epoch": 0.3987155837944934, "grad_norm": 3.8492190837860107, "learning_rate": 4.788335118134015e-06, "loss": 0.4502, "step": 5836 }, { "epoch": 0.3987839038054246, "grad_norm": 4.629018306732178, "learning_rate": 4.787614966020856e-06, "loss": 0.3972, "step": 5837 }, { "epoch": 0.3988522238163558, "grad_norm": 4.5245513916015625, "learning_rate": 4.786894750856604e-06, "loss": 0.417, "step": 5838 }, { "epoch": 0.39892054382728703, "grad_norm": 4.466022491455078, "learning_rate": 4.786174472676533e-06, "loss": 0.3565, "step": 5839 }, { "epoch": 0.3989888638382182, "grad_norm": 5.692446708679199, "learning_rate": 4.785454131515908e-06, "loss": 0.4499, "step": 5840 }, { "epoch": 0.3990571838491494, "grad_norm": 3.7930383682250977, "learning_rate": 4.784733727410005e-06, "loss": 0.4109, "step": 5841 }, { "epoch": 0.3991255038600806, "grad_norm": 4.211543560028076, "learning_rate": 4.784013260394098e-06, "loss": 0.3756, "step": 5842 }, { "epoch": 0.3991938238710118, "grad_norm": 3.26070237159729, "learning_rate": 4.783292730503468e-06, "loss": 0.2611, "step": 5843 }, { "epoch": 0.39926214388194303, "grad_norm": 3.197411298751831, "learning_rate": 4.782572137773398e-06, "loss": 0.2674, "step": 5844 }, { "epoch": 0.39933046389287424, "grad_norm": 3.500429630279541, "learning_rate": 4.781851482239171e-06, "loss": 0.4086, "step": 5845 }, { "epoch": 0.3993987839038054, "grad_norm": 3.4063799381256104, "learning_rate": 4.781130763936078e-06, "loss": 0.3568, "step": 5846 }, { "epoch": 0.3994671039147366, "grad_norm": 4.494941234588623, "learning_rate": 4.780409982899409e-06, "loss": 0.3428, "step": 5847 }, { "epoch": 0.3995354239256678, "grad_norm": 3.270728349685669, "learning_rate": 4.779689139164461e-06, "loss": 0.359, "step": 5848 }, { "epoch": 0.39960374393659903, "grad_norm": 4.127630233764648, "learning_rate": 4.778968232766529e-06, "loss": 0.3083, "step": 5849 }, { "epoch": 0.39967206394753024, "grad_norm": 3.736529588699341, "learning_rate": 4.778247263740916e-06, "loss": 0.3168, "step": 5850 }, { "epoch": 0.39974038395846145, "grad_norm": 3.345750570297241, "learning_rate": 4.777526232122925e-06, "loss": 0.4341, "step": 5851 }, { "epoch": 0.3998087039693926, "grad_norm": 4.303170204162598, "learning_rate": 4.776805137947862e-06, "loss": 0.3981, "step": 5852 }, { "epoch": 0.3998770239803238, "grad_norm": 4.728977203369141, "learning_rate": 4.776083981251039e-06, "loss": 0.4191, "step": 5853 }, { "epoch": 0.39994534399125503, "grad_norm": 4.225487232208252, "learning_rate": 4.775362762067769e-06, "loss": 0.496, "step": 5854 }, { "epoch": 0.40001366400218624, "grad_norm": 3.835700511932373, "learning_rate": 4.774641480433366e-06, "loss": 0.4169, "step": 5855 }, { "epoch": 0.40008198401311745, "grad_norm": 3.525095224380493, "learning_rate": 4.773920136383152e-06, "loss": 0.4031, "step": 5856 }, { "epoch": 0.40015030402404866, "grad_norm": 4.1205735206604, "learning_rate": 4.773198729952448e-06, "loss": 0.4257, "step": 5857 }, { "epoch": 0.4002186240349798, "grad_norm": 4.241528034210205, "learning_rate": 4.7724772611765785e-06, "loss": 0.3643, "step": 5858 }, { "epoch": 0.40028694404591103, "grad_norm": 4.296458721160889, "learning_rate": 4.771755730090874e-06, "loss": 0.4258, "step": 5859 }, { "epoch": 0.40035526405684224, "grad_norm": 3.311530351638794, "learning_rate": 4.771034136730664e-06, "loss": 0.3397, "step": 5860 }, { "epoch": 0.40042358406777345, "grad_norm": 3.1314542293548584, "learning_rate": 4.770312481131284e-06, "loss": 0.2601, "step": 5861 }, { "epoch": 0.40049190407870466, "grad_norm": 3.57599139213562, "learning_rate": 4.769590763328072e-06, "loss": 0.391, "step": 5862 }, { "epoch": 0.4005602240896359, "grad_norm": 3.43538236618042, "learning_rate": 4.768868983356367e-06, "loss": 0.3193, "step": 5863 }, { "epoch": 0.40062854410056703, "grad_norm": 3.015279531478882, "learning_rate": 4.768147141251514e-06, "loss": 0.4252, "step": 5864 }, { "epoch": 0.40069686411149824, "grad_norm": 3.9162683486938477, "learning_rate": 4.76742523704886e-06, "loss": 0.2901, "step": 5865 }, { "epoch": 0.40076518412242945, "grad_norm": 4.064541816711426, "learning_rate": 4.7667032707837535e-06, "loss": 0.4049, "step": 5866 }, { "epoch": 0.40083350413336066, "grad_norm": 3.9257774353027344, "learning_rate": 4.765981242491547e-06, "loss": 0.4407, "step": 5867 }, { "epoch": 0.4009018241442919, "grad_norm": 3.523357391357422, "learning_rate": 4.7652591522075984e-06, "loss": 0.3564, "step": 5868 }, { "epoch": 0.4009701441552231, "grad_norm": 4.425994396209717, "learning_rate": 4.764536999967265e-06, "loss": 0.3992, "step": 5869 }, { "epoch": 0.40103846416615424, "grad_norm": 3.805255651473999, "learning_rate": 4.763814785805908e-06, "loss": 0.3266, "step": 5870 }, { "epoch": 0.40110678417708545, "grad_norm": 3.5986711978912354, "learning_rate": 4.763092509758894e-06, "loss": 0.4071, "step": 5871 }, { "epoch": 0.40117510418801666, "grad_norm": 3.5199999809265137, "learning_rate": 4.762370171861591e-06, "loss": 0.412, "step": 5872 }, { "epoch": 0.4012434241989479, "grad_norm": 4.441145896911621, "learning_rate": 4.761647772149368e-06, "loss": 0.4559, "step": 5873 }, { "epoch": 0.4013117442098791, "grad_norm": 2.9639127254486084, "learning_rate": 4.760925310657601e-06, "loss": 0.3089, "step": 5874 }, { "epoch": 0.4013800642208103, "grad_norm": 4.249920845031738, "learning_rate": 4.760202787421666e-06, "loss": 0.2746, "step": 5875 }, { "epoch": 0.40144838423174145, "grad_norm": 3.2570488452911377, "learning_rate": 4.759480202476944e-06, "loss": 0.3616, "step": 5876 }, { "epoch": 0.40151670424267266, "grad_norm": 3.290843963623047, "learning_rate": 4.7587575558588155e-06, "loss": 0.3325, "step": 5877 }, { "epoch": 0.4015850242536039, "grad_norm": 4.04375696182251, "learning_rate": 4.75803484760267e-06, "loss": 0.287, "step": 5878 }, { "epoch": 0.4016533442645351, "grad_norm": 3.5225632190704346, "learning_rate": 4.757312077743894e-06, "loss": 0.4232, "step": 5879 }, { "epoch": 0.4017216642754663, "grad_norm": 3.690606117248535, "learning_rate": 4.756589246317882e-06, "loss": 0.3469, "step": 5880 }, { "epoch": 0.4017899842863975, "grad_norm": 4.357700824737549, "learning_rate": 4.755866353360025e-06, "loss": 0.3491, "step": 5881 }, { "epoch": 0.40185830429732866, "grad_norm": 3.8540382385253906, "learning_rate": 4.755143398905728e-06, "loss": 0.3355, "step": 5882 }, { "epoch": 0.40192662430825987, "grad_norm": 4.32461404800415, "learning_rate": 4.754420382990385e-06, "loss": 0.4098, "step": 5883 }, { "epoch": 0.4019949443191911, "grad_norm": 3.849884271621704, "learning_rate": 4.753697305649404e-06, "loss": 0.3421, "step": 5884 }, { "epoch": 0.4020632643301223, "grad_norm": 3.7294933795928955, "learning_rate": 4.752974166918192e-06, "loss": 0.2983, "step": 5885 }, { "epoch": 0.4021315843410535, "grad_norm": 4.272853374481201, "learning_rate": 4.752250966832158e-06, "loss": 0.3524, "step": 5886 }, { "epoch": 0.4021999043519847, "grad_norm": 4.3971991539001465, "learning_rate": 4.751527705426716e-06, "loss": 0.3437, "step": 5887 }, { "epoch": 0.40226822436291587, "grad_norm": 3.675088405609131, "learning_rate": 4.750804382737282e-06, "loss": 0.3885, "step": 5888 }, { "epoch": 0.4023365443738471, "grad_norm": 3.6139416694641113, "learning_rate": 4.750080998799275e-06, "loss": 0.3445, "step": 5889 }, { "epoch": 0.4024048643847783, "grad_norm": 4.384253978729248, "learning_rate": 4.7493575536481186e-06, "loss": 0.337, "step": 5890 }, { "epoch": 0.4024731843957095, "grad_norm": 3.2598376274108887, "learning_rate": 4.748634047319235e-06, "loss": 0.2746, "step": 5891 }, { "epoch": 0.4025415044066407, "grad_norm": 4.251307010650635, "learning_rate": 4.747910479848056e-06, "loss": 0.4191, "step": 5892 }, { "epoch": 0.4026098244175719, "grad_norm": 3.4894356727600098, "learning_rate": 4.74718685127001e-06, "loss": 0.3715, "step": 5893 }, { "epoch": 0.4026781444285031, "grad_norm": 3.5060923099517822, "learning_rate": 4.746463161620532e-06, "loss": 0.3606, "step": 5894 }, { "epoch": 0.4027464644394343, "grad_norm": 3.344844102859497, "learning_rate": 4.745739410935059e-06, "loss": 0.3633, "step": 5895 }, { "epoch": 0.4028147844503655, "grad_norm": 3.7889952659606934, "learning_rate": 4.745015599249033e-06, "loss": 0.3545, "step": 5896 }, { "epoch": 0.4028831044612967, "grad_norm": 3.930429220199585, "learning_rate": 4.744291726597893e-06, "loss": 0.3546, "step": 5897 }, { "epoch": 0.4029514244722279, "grad_norm": 3.892683744430542, "learning_rate": 4.74356779301709e-06, "loss": 0.3341, "step": 5898 }, { "epoch": 0.40301974448315914, "grad_norm": 4.339266300201416, "learning_rate": 4.74284379854207e-06, "loss": 0.4376, "step": 5899 }, { "epoch": 0.4030880644940903, "grad_norm": 2.998758316040039, "learning_rate": 4.7421197432082854e-06, "loss": 0.2789, "step": 5900 }, { "epoch": 0.4031563845050215, "grad_norm": 3.4482572078704834, "learning_rate": 4.7413956270511925e-06, "loss": 0.4359, "step": 5901 }, { "epoch": 0.4032247045159527, "grad_norm": 3.287966728210449, "learning_rate": 4.740671450106249e-06, "loss": 0.2617, "step": 5902 }, { "epoch": 0.4032930245268839, "grad_norm": 4.376923084259033, "learning_rate": 4.739947212408914e-06, "loss": 0.4884, "step": 5903 }, { "epoch": 0.40336134453781514, "grad_norm": 3.499990701675415, "learning_rate": 4.7392229139946546e-06, "loss": 0.3059, "step": 5904 }, { "epoch": 0.40342966454874635, "grad_norm": 4.941557884216309, "learning_rate": 4.738498554898935e-06, "loss": 0.4135, "step": 5905 }, { "epoch": 0.4034979845596775, "grad_norm": 2.9024713039398193, "learning_rate": 4.737774135157228e-06, "loss": 0.2573, "step": 5906 }, { "epoch": 0.4035663045706087, "grad_norm": 4.404479503631592, "learning_rate": 4.737049654805002e-06, "loss": 0.3441, "step": 5907 }, { "epoch": 0.4036346245815399, "grad_norm": 4.042867183685303, "learning_rate": 4.7363251138777385e-06, "loss": 0.3579, "step": 5908 }, { "epoch": 0.40370294459247114, "grad_norm": 4.378161430358887, "learning_rate": 4.735600512410913e-06, "loss": 0.3506, "step": 5909 }, { "epoch": 0.40377126460340235, "grad_norm": 5.994889259338379, "learning_rate": 4.734875850440007e-06, "loss": 0.4091, "step": 5910 }, { "epoch": 0.40383958461433356, "grad_norm": 3.9772017002105713, "learning_rate": 4.734151128000507e-06, "loss": 0.4793, "step": 5911 }, { "epoch": 0.4039079046252647, "grad_norm": 5.242705821990967, "learning_rate": 4.733426345127899e-06, "loss": 0.3913, "step": 5912 }, { "epoch": 0.4039762246361959, "grad_norm": 3.8954944610595703, "learning_rate": 4.732701501857676e-06, "loss": 0.4644, "step": 5913 }, { "epoch": 0.40404454464712714, "grad_norm": 3.9370217323303223, "learning_rate": 4.731976598225329e-06, "loss": 0.2861, "step": 5914 }, { "epoch": 0.40411286465805835, "grad_norm": 3.803652048110962, "learning_rate": 4.731251634266357e-06, "loss": 0.2851, "step": 5915 }, { "epoch": 0.40418118466898956, "grad_norm": 4.335806846618652, "learning_rate": 4.7305266100162576e-06, "loss": 0.4655, "step": 5916 }, { "epoch": 0.40424950467992077, "grad_norm": 4.209072589874268, "learning_rate": 4.729801525510534e-06, "loss": 0.34, "step": 5917 }, { "epoch": 0.4043178246908519, "grad_norm": 4.04979944229126, "learning_rate": 4.729076380784693e-06, "loss": 0.479, "step": 5918 }, { "epoch": 0.40438614470178313, "grad_norm": 3.601222038269043, "learning_rate": 4.728351175874241e-06, "loss": 0.3523, "step": 5919 }, { "epoch": 0.40445446471271435, "grad_norm": 4.402819633483887, "learning_rate": 4.72762591081469e-06, "loss": 0.3846, "step": 5920 }, { "epoch": 0.40452278472364556, "grad_norm": 3.789686918258667, "learning_rate": 4.726900585641555e-06, "loss": 0.3985, "step": 5921 }, { "epoch": 0.40459110473457677, "grad_norm": 3.41945219039917, "learning_rate": 4.726175200390352e-06, "loss": 0.2877, "step": 5922 }, { "epoch": 0.404659424745508, "grad_norm": 4.0265302658081055, "learning_rate": 4.725449755096603e-06, "loss": 0.41, "step": 5923 }, { "epoch": 0.40472774475643913, "grad_norm": 3.049778461456299, "learning_rate": 4.724724249795828e-06, "loss": 0.3055, "step": 5924 }, { "epoch": 0.40479606476737034, "grad_norm": 4.368663311004639, "learning_rate": 4.723998684523558e-06, "loss": 0.4464, "step": 5925 }, { "epoch": 0.40486438477830156, "grad_norm": 4.224126815795898, "learning_rate": 4.723273059315317e-06, "loss": 0.4222, "step": 5926 }, { "epoch": 0.40493270478923277, "grad_norm": 3.426464080810547, "learning_rate": 4.722547374206641e-06, "loss": 0.262, "step": 5927 }, { "epoch": 0.405001024800164, "grad_norm": 5.452763557434082, "learning_rate": 4.721821629233061e-06, "loss": 0.4608, "step": 5928 }, { "epoch": 0.4050693448110952, "grad_norm": 4.517038345336914, "learning_rate": 4.721095824430118e-06, "loss": 0.3987, "step": 5929 }, { "epoch": 0.40513766482202634, "grad_norm": 4.451761722564697, "learning_rate": 4.720369959833351e-06, "loss": 0.4064, "step": 5930 }, { "epoch": 0.40520598483295756, "grad_norm": 2.986917018890381, "learning_rate": 4.7196440354783045e-06, "loss": 0.2765, "step": 5931 }, { "epoch": 0.40527430484388877, "grad_norm": 4.336385726928711, "learning_rate": 4.718918051400524e-06, "loss": 0.3516, "step": 5932 }, { "epoch": 0.40534262485482, "grad_norm": 3.938995122909546, "learning_rate": 4.718192007635561e-06, "loss": 0.3298, "step": 5933 }, { "epoch": 0.4054109448657512, "grad_norm": 3.206413507461548, "learning_rate": 4.717465904218965e-06, "loss": 0.3281, "step": 5934 }, { "epoch": 0.4054792648766824, "grad_norm": 3.81534743309021, "learning_rate": 4.716739741186295e-06, "loss": 0.3634, "step": 5935 }, { "epoch": 0.40554758488761355, "grad_norm": 5.558172702789307, "learning_rate": 4.716013518573106e-06, "loss": 0.5003, "step": 5936 }, { "epoch": 0.40561590489854477, "grad_norm": 3.241415500640869, "learning_rate": 4.715287236414962e-06, "loss": 0.3292, "step": 5937 }, { "epoch": 0.405684224909476, "grad_norm": 3.7067813873291016, "learning_rate": 4.714560894747424e-06, "loss": 0.343, "step": 5938 }, { "epoch": 0.4057525449204072, "grad_norm": 4.188735008239746, "learning_rate": 4.7138344936060616e-06, "loss": 0.3762, "step": 5939 }, { "epoch": 0.4058208649313384, "grad_norm": 3.352271318435669, "learning_rate": 4.713108033026443e-06, "loss": 0.3468, "step": 5940 }, { "epoch": 0.4058891849422696, "grad_norm": 3.9708688259124756, "learning_rate": 4.7123815130441424e-06, "loss": 0.2995, "step": 5941 }, { "epoch": 0.40595750495320077, "grad_norm": 5.234139919281006, "learning_rate": 4.711654933694735e-06, "loss": 0.4689, "step": 5942 }, { "epoch": 0.406025824964132, "grad_norm": 4.132580280303955, "learning_rate": 4.7109282950138e-06, "loss": 0.3388, "step": 5943 }, { "epoch": 0.4060941449750632, "grad_norm": 4.444972515106201, "learning_rate": 4.710201597036917e-06, "loss": 0.3655, "step": 5944 }, { "epoch": 0.4061624649859944, "grad_norm": 4.048312664031982, "learning_rate": 4.709474839799673e-06, "loss": 0.3883, "step": 5945 }, { "epoch": 0.4062307849969256, "grad_norm": 3.8359830379486084, "learning_rate": 4.708748023337655e-06, "loss": 0.2453, "step": 5946 }, { "epoch": 0.4062991050078568, "grad_norm": 2.9746384620666504, "learning_rate": 4.708021147686452e-06, "loss": 0.2718, "step": 5947 }, { "epoch": 0.406367425018788, "grad_norm": 3.677004814147949, "learning_rate": 4.707294212881657e-06, "loss": 0.2413, "step": 5948 }, { "epoch": 0.4064357450297192, "grad_norm": 4.012383937835693, "learning_rate": 4.706567218958868e-06, "loss": 0.3184, "step": 5949 }, { "epoch": 0.4065040650406504, "grad_norm": 3.6133456230163574, "learning_rate": 4.705840165953681e-06, "loss": 0.4011, "step": 5950 }, { "epoch": 0.4065723850515816, "grad_norm": 3.6007049083709717, "learning_rate": 4.705113053901701e-06, "loss": 0.3523, "step": 5951 }, { "epoch": 0.4066407050625128, "grad_norm": 4.074924945831299, "learning_rate": 4.704385882838532e-06, "loss": 0.3079, "step": 5952 }, { "epoch": 0.40670902507344403, "grad_norm": 2.8882393836975098, "learning_rate": 4.7036586527997786e-06, "loss": 0.2997, "step": 5953 }, { "epoch": 0.4067773450843752, "grad_norm": 3.919163942337036, "learning_rate": 4.702931363821056e-06, "loss": 0.2846, "step": 5954 }, { "epoch": 0.4068456650953064, "grad_norm": 3.5829429626464844, "learning_rate": 4.702204015937975e-06, "loss": 0.3658, "step": 5955 }, { "epoch": 0.4069139851062376, "grad_norm": 3.4727721214294434, "learning_rate": 4.7014766091861515e-06, "loss": 0.2723, "step": 5956 }, { "epoch": 0.4069823051171688, "grad_norm": 4.702902317047119, "learning_rate": 4.700749143601206e-06, "loss": 0.386, "step": 5957 }, { "epoch": 0.40705062512810003, "grad_norm": 3.2482588291168213, "learning_rate": 4.7000216192187595e-06, "loss": 0.3259, "step": 5958 }, { "epoch": 0.40711894513903124, "grad_norm": 3.2480201721191406, "learning_rate": 4.699294036074439e-06, "loss": 0.3279, "step": 5959 }, { "epoch": 0.4071872651499624, "grad_norm": 3.9853246212005615, "learning_rate": 4.698566394203868e-06, "loss": 0.3684, "step": 5960 }, { "epoch": 0.4072555851608936, "grad_norm": 3.708937644958496, "learning_rate": 4.697838693642682e-06, "loss": 0.3542, "step": 5961 }, { "epoch": 0.4073239051718248, "grad_norm": 3.208400011062622, "learning_rate": 4.697110934426512e-06, "loss": 0.3239, "step": 5962 }, { "epoch": 0.40739222518275603, "grad_norm": 2.891519546508789, "learning_rate": 4.696383116590995e-06, "loss": 0.2465, "step": 5963 }, { "epoch": 0.40746054519368724, "grad_norm": 3.8828086853027344, "learning_rate": 4.695655240171769e-06, "loss": 0.2814, "step": 5964 }, { "epoch": 0.40752886520461845, "grad_norm": 5.105139255523682, "learning_rate": 4.6949273052044805e-06, "loss": 0.4078, "step": 5965 }, { "epoch": 0.40759718521554966, "grad_norm": 4.570760250091553, "learning_rate": 4.6941993117247684e-06, "loss": 0.4089, "step": 5966 }, { "epoch": 0.4076655052264808, "grad_norm": 4.234431266784668, "learning_rate": 4.693471259768285e-06, "loss": 0.3323, "step": 5967 }, { "epoch": 0.40773382523741203, "grad_norm": 4.187037944793701, "learning_rate": 4.69274314937068e-06, "loss": 0.3903, "step": 5968 }, { "epoch": 0.40780214524834324, "grad_norm": 3.827087163925171, "learning_rate": 4.692014980567607e-06, "loss": 0.3757, "step": 5969 }, { "epoch": 0.40787046525927445, "grad_norm": 4.511204242706299, "learning_rate": 4.69128675339472e-06, "loss": 0.3643, "step": 5970 }, { "epoch": 0.40793878527020566, "grad_norm": 3.719325542449951, "learning_rate": 4.690558467887683e-06, "loss": 0.3406, "step": 5971 }, { "epoch": 0.4080071052811369, "grad_norm": 4.0163655281066895, "learning_rate": 4.689830124082155e-06, "loss": 0.3383, "step": 5972 }, { "epoch": 0.40807542529206803, "grad_norm": 4.064739227294922, "learning_rate": 4.689101722013802e-06, "loss": 0.3706, "step": 5973 }, { "epoch": 0.40814374530299924, "grad_norm": 3.736839771270752, "learning_rate": 4.688373261718293e-06, "loss": 0.3511, "step": 5974 }, { "epoch": 0.40821206531393045, "grad_norm": 4.678846836090088, "learning_rate": 4.687644743231296e-06, "loss": 0.4672, "step": 5975 }, { "epoch": 0.40828038532486166, "grad_norm": 4.5894317626953125, "learning_rate": 4.686916166588487e-06, "loss": 0.3845, "step": 5976 }, { "epoch": 0.40834870533579287, "grad_norm": 3.295994520187378, "learning_rate": 4.68618753182554e-06, "loss": 0.3303, "step": 5977 }, { "epoch": 0.4084170253467241, "grad_norm": 2.733462333679199, "learning_rate": 4.685458838978139e-06, "loss": 0.3474, "step": 5978 }, { "epoch": 0.40848534535765524, "grad_norm": 3.6211442947387695, "learning_rate": 4.684730088081961e-06, "loss": 0.3079, "step": 5979 }, { "epoch": 0.40855366536858645, "grad_norm": 4.046278953552246, "learning_rate": 4.684001279172694e-06, "loss": 0.2912, "step": 5980 }, { "epoch": 0.40862198537951766, "grad_norm": 3.0814552307128906, "learning_rate": 4.683272412286024e-06, "loss": 0.371, "step": 5981 }, { "epoch": 0.40869030539044887, "grad_norm": 4.653820991516113, "learning_rate": 4.682543487457644e-06, "loss": 0.4606, "step": 5982 }, { "epoch": 0.4087586254013801, "grad_norm": 3.6698358058929443, "learning_rate": 4.681814504723246e-06, "loss": 0.3627, "step": 5983 }, { "epoch": 0.4088269454123113, "grad_norm": 3.9731123447418213, "learning_rate": 4.681085464118527e-06, "loss": 0.3415, "step": 5984 }, { "epoch": 0.40889526542324245, "grad_norm": 3.4984188079833984, "learning_rate": 4.680356365679184e-06, "loss": 0.3477, "step": 5985 }, { "epoch": 0.40896358543417366, "grad_norm": 3.947681427001953, "learning_rate": 4.679627209440922e-06, "loss": 0.3206, "step": 5986 }, { "epoch": 0.40903190544510487, "grad_norm": 4.0336761474609375, "learning_rate": 4.678897995439443e-06, "loss": 0.3163, "step": 5987 }, { "epoch": 0.4091002254560361, "grad_norm": 3.1792006492614746, "learning_rate": 4.678168723710459e-06, "loss": 0.3011, "step": 5988 }, { "epoch": 0.4091685454669673, "grad_norm": 4.129857063293457, "learning_rate": 4.677439394289676e-06, "loss": 0.4664, "step": 5989 }, { "epoch": 0.4092368654778985, "grad_norm": 3.938215970993042, "learning_rate": 4.676710007212809e-06, "loss": 0.3613, "step": 5990 }, { "epoch": 0.40930518548882966, "grad_norm": 4.166388988494873, "learning_rate": 4.675980562515574e-06, "loss": 0.2923, "step": 5991 }, { "epoch": 0.40937350549976087, "grad_norm": 4.080498695373535, "learning_rate": 4.675251060233691e-06, "loss": 0.2951, "step": 5992 }, { "epoch": 0.4094418255106921, "grad_norm": 3.8278305530548096, "learning_rate": 4.674521500402881e-06, "loss": 0.2805, "step": 5993 }, { "epoch": 0.4095101455216233, "grad_norm": 5.17371129989624, "learning_rate": 4.673791883058868e-06, "loss": 0.4203, "step": 5994 }, { "epoch": 0.4095784655325545, "grad_norm": 4.0151286125183105, "learning_rate": 4.67306220823738e-06, "loss": 0.3103, "step": 5995 }, { "epoch": 0.4096467855434857, "grad_norm": 3.052342653274536, "learning_rate": 4.672332475974148e-06, "loss": 0.2476, "step": 5996 }, { "epoch": 0.40971510555441687, "grad_norm": 3.4689486026763916, "learning_rate": 4.671602686304904e-06, "loss": 0.2447, "step": 5997 }, { "epoch": 0.4097834255653481, "grad_norm": 3.866666078567505, "learning_rate": 4.670872839265385e-06, "loss": 0.4278, "step": 5998 }, { "epoch": 0.4098517455762793, "grad_norm": 3.639936685562134, "learning_rate": 4.670142934891328e-06, "loss": 0.3287, "step": 5999 }, { "epoch": 0.4099200655872105, "grad_norm": 4.3433685302734375, "learning_rate": 4.6694129732184744e-06, "loss": 0.2934, "step": 6000 }, { "epoch": 0.4099883855981417, "grad_norm": 4.242071628570557, "learning_rate": 4.668682954282571e-06, "loss": 0.3326, "step": 6001 }, { "epoch": 0.4100567056090729, "grad_norm": 4.162583351135254, "learning_rate": 4.6679528781193634e-06, "loss": 0.3647, "step": 6002 }, { "epoch": 0.4101250256200041, "grad_norm": 5.445889472961426, "learning_rate": 4.667222744764599e-06, "loss": 0.4427, "step": 6003 }, { "epoch": 0.4101933456309353, "grad_norm": 5.263439178466797, "learning_rate": 4.666492554254036e-06, "loss": 0.3646, "step": 6004 }, { "epoch": 0.4102616656418665, "grad_norm": 6.036040306091309, "learning_rate": 4.665762306623426e-06, "loss": 0.3991, "step": 6005 }, { "epoch": 0.4103299856527977, "grad_norm": 4.120932579040527, "learning_rate": 4.665032001908527e-06, "loss": 0.3279, "step": 6006 }, { "epoch": 0.4103983056637289, "grad_norm": 3.847893714904785, "learning_rate": 4.664301640145101e-06, "loss": 0.3419, "step": 6007 }, { "epoch": 0.41046662567466013, "grad_norm": 4.500528335571289, "learning_rate": 4.6635712213689136e-06, "loss": 0.4289, "step": 6008 }, { "epoch": 0.4105349456855913, "grad_norm": 3.646514654159546, "learning_rate": 4.662840745615729e-06, "loss": 0.3141, "step": 6009 }, { "epoch": 0.4106032656965225, "grad_norm": 3.295252561569214, "learning_rate": 4.662110212921319e-06, "loss": 0.3593, "step": 6010 }, { "epoch": 0.4106715857074537, "grad_norm": 4.992164134979248, "learning_rate": 4.661379623321452e-06, "loss": 0.286, "step": 6011 }, { "epoch": 0.4107399057183849, "grad_norm": 3.1918678283691406, "learning_rate": 4.660648976851908e-06, "loss": 0.3551, "step": 6012 }, { "epoch": 0.41080822572931613, "grad_norm": 4.2113213539123535, "learning_rate": 4.6599182735484595e-06, "loss": 0.4784, "step": 6013 }, { "epoch": 0.41087654574024735, "grad_norm": 3.8929715156555176, "learning_rate": 4.659187513446892e-06, "loss": 0.3486, "step": 6014 }, { "epoch": 0.4109448657511785, "grad_norm": 4.139848232269287, "learning_rate": 4.658456696582986e-06, "loss": 0.352, "step": 6015 }, { "epoch": 0.4110131857621097, "grad_norm": 3.4802918434143066, "learning_rate": 4.657725822992529e-06, "loss": 0.316, "step": 6016 }, { "epoch": 0.4110815057730409, "grad_norm": 3.500507116317749, "learning_rate": 4.656994892711309e-06, "loss": 0.2903, "step": 6017 }, { "epoch": 0.41114982578397213, "grad_norm": 3.010223388671875, "learning_rate": 4.656263905775119e-06, "loss": 0.3711, "step": 6018 }, { "epoch": 0.41121814579490334, "grad_norm": 3.902355432510376, "learning_rate": 4.6555328622197506e-06, "loss": 0.3471, "step": 6019 }, { "epoch": 0.41128646580583456, "grad_norm": 2.9865269660949707, "learning_rate": 4.6548017620810044e-06, "loss": 0.3101, "step": 6020 }, { "epoch": 0.4113547858167657, "grad_norm": 3.8514997959136963, "learning_rate": 4.6540706053946784e-06, "loss": 0.3869, "step": 6021 }, { "epoch": 0.4114231058276969, "grad_norm": 3.584298849105835, "learning_rate": 4.6533393921965774e-06, "loss": 0.3964, "step": 6022 }, { "epoch": 0.41149142583862813, "grad_norm": 5.174529075622559, "learning_rate": 4.652608122522505e-06, "loss": 0.3789, "step": 6023 }, { "epoch": 0.41155974584955934, "grad_norm": 3.6402957439422607, "learning_rate": 4.65187679640827e-06, "loss": 0.4804, "step": 6024 }, { "epoch": 0.41162806586049056, "grad_norm": 3.243532419204712, "learning_rate": 4.651145413889683e-06, "loss": 0.383, "step": 6025 }, { "epoch": 0.41169638587142177, "grad_norm": 3.2190258502960205, "learning_rate": 4.650413975002559e-06, "loss": 0.3418, "step": 6026 }, { "epoch": 0.4117647058823529, "grad_norm": 4.483279705047607, "learning_rate": 4.6496824797827146e-06, "loss": 0.3473, "step": 6027 }, { "epoch": 0.41183302589328413, "grad_norm": 3.294302463531494, "learning_rate": 4.64895092826597e-06, "loss": 0.3498, "step": 6028 }, { "epoch": 0.41190134590421534, "grad_norm": 3.3742213249206543, "learning_rate": 4.648219320488144e-06, "loss": 0.3007, "step": 6029 }, { "epoch": 0.41196966591514655, "grad_norm": 3.666607141494751, "learning_rate": 4.647487656485064e-06, "loss": 0.3696, "step": 6030 }, { "epoch": 0.41203798592607777, "grad_norm": 2.8094918727874756, "learning_rate": 4.646755936292557e-06, "loss": 0.2958, "step": 6031 }, { "epoch": 0.412106305937009, "grad_norm": 4.4682464599609375, "learning_rate": 4.6460241599464544e-06, "loss": 0.4297, "step": 6032 }, { "epoch": 0.41217462594794013, "grad_norm": 4.254701614379883, "learning_rate": 4.645292327482589e-06, "loss": 0.3209, "step": 6033 }, { "epoch": 0.41224294595887134, "grad_norm": 4.092820644378662, "learning_rate": 4.644560438936796e-06, "loss": 0.3911, "step": 6034 }, { "epoch": 0.41231126596980255, "grad_norm": 4.298447608947754, "learning_rate": 4.643828494344913e-06, "loss": 0.3657, "step": 6035 }, { "epoch": 0.41237958598073376, "grad_norm": 3.8131346702575684, "learning_rate": 4.643096493742785e-06, "loss": 0.3487, "step": 6036 }, { "epoch": 0.412447905991665, "grad_norm": 3.9046061038970947, "learning_rate": 4.642364437166251e-06, "loss": 0.4994, "step": 6037 }, { "epoch": 0.4125162260025962, "grad_norm": 4.847949981689453, "learning_rate": 4.641632324651163e-06, "loss": 0.5347, "step": 6038 }, { "epoch": 0.41258454601352734, "grad_norm": 3.75618839263916, "learning_rate": 4.640900156233367e-06, "loss": 0.3244, "step": 6039 }, { "epoch": 0.41265286602445855, "grad_norm": 6.338820934295654, "learning_rate": 4.640167931948717e-06, "loss": 0.4899, "step": 6040 }, { "epoch": 0.41272118603538976, "grad_norm": 3.8424088954925537, "learning_rate": 4.639435651833069e-06, "loss": 0.336, "step": 6041 }, { "epoch": 0.412789506046321, "grad_norm": 6.1377668380737305, "learning_rate": 4.638703315922278e-06, "loss": 0.4362, "step": 6042 }, { "epoch": 0.4128578260572522, "grad_norm": 3.7472479343414307, "learning_rate": 4.637970924252206e-06, "loss": 0.3264, "step": 6043 }, { "epoch": 0.4129261460681834, "grad_norm": 4.353744029998779, "learning_rate": 4.6372384768587166e-06, "loss": 0.3247, "step": 6044 }, { "epoch": 0.41299446607911455, "grad_norm": 4.571052551269531, "learning_rate": 4.636505973777677e-06, "loss": 0.3658, "step": 6045 }, { "epoch": 0.41306278609004576, "grad_norm": 3.7349464893341064, "learning_rate": 4.635773415044952e-06, "loss": 0.3671, "step": 6046 }, { "epoch": 0.413131106100977, "grad_norm": 3.887192726135254, "learning_rate": 4.635040800696417e-06, "loss": 0.3835, "step": 6047 }, { "epoch": 0.4131994261119082, "grad_norm": 3.223095655441284, "learning_rate": 4.634308130767945e-06, "loss": 0.3675, "step": 6048 }, { "epoch": 0.4132677461228394, "grad_norm": 4.233901023864746, "learning_rate": 4.6335754052954125e-06, "loss": 0.283, "step": 6049 }, { "epoch": 0.4133360661337706, "grad_norm": 3.290243625640869, "learning_rate": 4.632842624314699e-06, "loss": 0.2893, "step": 6050 }, { "epoch": 0.41340438614470176, "grad_norm": 3.9008188247680664, "learning_rate": 4.6321097878616884e-06, "loss": 0.3419, "step": 6051 }, { "epoch": 0.413472706155633, "grad_norm": 2.777719736099243, "learning_rate": 4.631376895972263e-06, "loss": 0.3469, "step": 6052 }, { "epoch": 0.4135410261665642, "grad_norm": 4.595632553100586, "learning_rate": 4.630643948682312e-06, "loss": 0.3073, "step": 6053 }, { "epoch": 0.4136093461774954, "grad_norm": 4.317234039306641, "learning_rate": 4.629910946027727e-06, "loss": 0.4262, "step": 6054 }, { "epoch": 0.4136776661884266, "grad_norm": 3.687556266784668, "learning_rate": 4.6291778880444e-06, "loss": 0.2692, "step": 6055 }, { "epoch": 0.4137459861993578, "grad_norm": 3.5501062870025635, "learning_rate": 4.6284447747682254e-06, "loss": 0.3605, "step": 6056 }, { "epoch": 0.413814306210289, "grad_norm": 3.0103418827056885, "learning_rate": 4.627711606235105e-06, "loss": 0.2686, "step": 6057 }, { "epoch": 0.4138826262212202, "grad_norm": 4.0334601402282715, "learning_rate": 4.626978382480938e-06, "loss": 0.3105, "step": 6058 }, { "epoch": 0.4139509462321514, "grad_norm": 3.2678020000457764, "learning_rate": 4.626245103541629e-06, "loss": 0.3412, "step": 6059 }, { "epoch": 0.4140192662430826, "grad_norm": 3.4782865047454834, "learning_rate": 4.6255117694530845e-06, "loss": 0.3187, "step": 6060 }, { "epoch": 0.4140875862540138, "grad_norm": 4.3224077224731445, "learning_rate": 4.624778380251216e-06, "loss": 0.3796, "step": 6061 }, { "epoch": 0.41415590626494503, "grad_norm": 3.2502195835113525, "learning_rate": 4.6240449359719315e-06, "loss": 0.3186, "step": 6062 }, { "epoch": 0.4142242262758762, "grad_norm": 5.341643333435059, "learning_rate": 4.623311436651148e-06, "loss": 0.3405, "step": 6063 }, { "epoch": 0.4142925462868074, "grad_norm": 3.6458017826080322, "learning_rate": 4.622577882324784e-06, "loss": 0.3731, "step": 6064 }, { "epoch": 0.4143608662977386, "grad_norm": 4.1393585205078125, "learning_rate": 4.62184427302876e-06, "loss": 0.4095, "step": 6065 }, { "epoch": 0.4144291863086698, "grad_norm": 3.287863254547119, "learning_rate": 4.621110608798995e-06, "loss": 0.3375, "step": 6066 }, { "epoch": 0.41449750631960103, "grad_norm": 6.3578715324401855, "learning_rate": 4.6203768896714195e-06, "loss": 0.3627, "step": 6067 }, { "epoch": 0.41456582633053224, "grad_norm": 4.097894668579102, "learning_rate": 4.6196431156819585e-06, "loss": 0.3222, "step": 6068 }, { "epoch": 0.4146341463414634, "grad_norm": 3.85912823677063, "learning_rate": 4.618909286866544e-06, "loss": 0.3551, "step": 6069 }, { "epoch": 0.4147024663523946, "grad_norm": 3.0154387950897217, "learning_rate": 4.61817540326111e-06, "loss": 0.3308, "step": 6070 }, { "epoch": 0.4147707863633258, "grad_norm": 3.3829751014709473, "learning_rate": 4.6174414649015926e-06, "loss": 0.3702, "step": 6071 }, { "epoch": 0.414839106374257, "grad_norm": 3.5802974700927734, "learning_rate": 4.616707471823931e-06, "loss": 0.3388, "step": 6072 }, { "epoch": 0.41490742638518824, "grad_norm": 3.402352809906006, "learning_rate": 4.615973424064066e-06, "loss": 0.4081, "step": 6073 }, { "epoch": 0.41497574639611945, "grad_norm": 4.09106969833374, "learning_rate": 4.615239321657943e-06, "loss": 0.3111, "step": 6074 }, { "epoch": 0.4150440664070506, "grad_norm": 2.990743398666382, "learning_rate": 4.61450516464151e-06, "loss": 0.2762, "step": 6075 }, { "epoch": 0.4151123864179818, "grad_norm": 4.315993785858154, "learning_rate": 4.613770953050712e-06, "loss": 0.3228, "step": 6076 }, { "epoch": 0.415180706428913, "grad_norm": 4.62515115737915, "learning_rate": 4.6130366869215075e-06, "loss": 0.3399, "step": 6077 }, { "epoch": 0.41524902643984424, "grad_norm": 2.9179656505584717, "learning_rate": 4.612302366289848e-06, "loss": 0.3888, "step": 6078 }, { "epoch": 0.41531734645077545, "grad_norm": 3.9338390827178955, "learning_rate": 4.6115679911916925e-06, "loss": 0.3947, "step": 6079 }, { "epoch": 0.41538566646170666, "grad_norm": 2.534668207168579, "learning_rate": 4.610833561663e-06, "loss": 0.2729, "step": 6080 }, { "epoch": 0.4154539864726378, "grad_norm": 4.354250907897949, "learning_rate": 4.610099077739735e-06, "loss": 0.2731, "step": 6081 }, { "epoch": 0.415522306483569, "grad_norm": 4.918849945068359, "learning_rate": 4.609364539457863e-06, "loss": 0.3698, "step": 6082 }, { "epoch": 0.41559062649450024, "grad_norm": 3.8142824172973633, "learning_rate": 4.608629946853353e-06, "loss": 0.3444, "step": 6083 }, { "epoch": 0.41565894650543145, "grad_norm": 4.044057369232178, "learning_rate": 4.607895299962174e-06, "loss": 0.3123, "step": 6084 }, { "epoch": 0.41572726651636266, "grad_norm": 4.325571537017822, "learning_rate": 4.607160598820301e-06, "loss": 0.3905, "step": 6085 }, { "epoch": 0.41579558652729387, "grad_norm": 3.8137857913970947, "learning_rate": 4.60642584346371e-06, "loss": 0.2716, "step": 6086 }, { "epoch": 0.415863906538225, "grad_norm": 4.651513576507568, "learning_rate": 4.605691033928381e-06, "loss": 0.2978, "step": 6087 }, { "epoch": 0.41593222654915624, "grad_norm": 3.352400064468384, "learning_rate": 4.604956170250296e-06, "loss": 0.3707, "step": 6088 }, { "epoch": 0.41600054656008745, "grad_norm": 4.039019584655762, "learning_rate": 4.6042212524654365e-06, "loss": 0.4211, "step": 6089 }, { "epoch": 0.41606886657101866, "grad_norm": 4.355178356170654, "learning_rate": 4.603486280609791e-06, "loss": 0.3505, "step": 6090 }, { "epoch": 0.41613718658194987, "grad_norm": 4.584346771240234, "learning_rate": 4.602751254719351e-06, "loss": 0.2914, "step": 6091 }, { "epoch": 0.4162055065928811, "grad_norm": 3.5630197525024414, "learning_rate": 4.602016174830106e-06, "loss": 0.3258, "step": 6092 }, { "epoch": 0.41627382660381224, "grad_norm": 4.4203200340271, "learning_rate": 4.601281040978053e-06, "loss": 0.2999, "step": 6093 }, { "epoch": 0.41634214661474345, "grad_norm": 5.321430206298828, "learning_rate": 4.600545853199188e-06, "loss": 0.3789, "step": 6094 }, { "epoch": 0.41641046662567466, "grad_norm": 3.3138251304626465, "learning_rate": 4.599810611529512e-06, "loss": 0.2608, "step": 6095 }, { "epoch": 0.41647878663660587, "grad_norm": 4.689943313598633, "learning_rate": 4.599075316005027e-06, "loss": 0.3831, "step": 6096 }, { "epoch": 0.4165471066475371, "grad_norm": 3.781381845474243, "learning_rate": 4.59833996666174e-06, "loss": 0.2838, "step": 6097 }, { "epoch": 0.4166154266584683, "grad_norm": 4.641912937164307, "learning_rate": 4.597604563535658e-06, "loss": 0.339, "step": 6098 }, { "epoch": 0.41668374666939945, "grad_norm": 4.015976428985596, "learning_rate": 4.596869106662792e-06, "loss": 0.341, "step": 6099 }, { "epoch": 0.41675206668033066, "grad_norm": 5.5183186531066895, "learning_rate": 4.5961335960791555e-06, "loss": 0.3985, "step": 6100 }, { "epoch": 0.41682038669126187, "grad_norm": 5.473280906677246, "learning_rate": 4.595398031820764e-06, "loss": 0.3513, "step": 6101 }, { "epoch": 0.4168887067021931, "grad_norm": 3.2185111045837402, "learning_rate": 4.594662413923636e-06, "loss": 0.3755, "step": 6102 }, { "epoch": 0.4169570267131243, "grad_norm": 4.352929592132568, "learning_rate": 4.593926742423794e-06, "loss": 0.2914, "step": 6103 }, { "epoch": 0.4170253467240555, "grad_norm": 3.8196959495544434, "learning_rate": 4.593191017357262e-06, "loss": 0.3499, "step": 6104 }, { "epoch": 0.41709366673498666, "grad_norm": 3.9974522590637207, "learning_rate": 4.592455238760064e-06, "loss": 0.2982, "step": 6105 }, { "epoch": 0.41716198674591787, "grad_norm": 3.888777256011963, "learning_rate": 4.591719406668232e-06, "loss": 0.3301, "step": 6106 }, { "epoch": 0.4172303067568491, "grad_norm": 4.156316757202148, "learning_rate": 4.590983521117796e-06, "loss": 0.3517, "step": 6107 }, { "epoch": 0.4172986267677803, "grad_norm": 3.734482526779175, "learning_rate": 4.590247582144791e-06, "loss": 0.4099, "step": 6108 }, { "epoch": 0.4173669467787115, "grad_norm": 5.294587135314941, "learning_rate": 4.589511589785253e-06, "loss": 0.4473, "step": 6109 }, { "epoch": 0.4174352667896427, "grad_norm": 3.9081192016601562, "learning_rate": 4.5887755440752235e-06, "loss": 0.4922, "step": 6110 }, { "epoch": 0.41750358680057387, "grad_norm": 4.243196964263916, "learning_rate": 4.588039445050744e-06, "loss": 0.3636, "step": 6111 }, { "epoch": 0.4175719068115051, "grad_norm": 2.3298473358154297, "learning_rate": 4.587303292747858e-06, "loss": 0.3211, "step": 6112 }, { "epoch": 0.4176402268224363, "grad_norm": 3.4122231006622314, "learning_rate": 4.586567087202614e-06, "loss": 0.3047, "step": 6113 }, { "epoch": 0.4177085468333675, "grad_norm": 4.610877513885498, "learning_rate": 4.585830828451062e-06, "loss": 0.3455, "step": 6114 }, { "epoch": 0.4177768668442987, "grad_norm": 3.806678533554077, "learning_rate": 4.5850945165292545e-06, "loss": 0.3741, "step": 6115 }, { "epoch": 0.4178451868552299, "grad_norm": 2.788660764694214, "learning_rate": 4.584358151473247e-06, "loss": 0.2981, "step": 6116 }, { "epoch": 0.4179135068661611, "grad_norm": 2.9007225036621094, "learning_rate": 4.583621733319096e-06, "loss": 0.2665, "step": 6117 }, { "epoch": 0.4179818268770923, "grad_norm": 3.084224224090576, "learning_rate": 4.582885262102865e-06, "loss": 0.278, "step": 6118 }, { "epoch": 0.4180501468880235, "grad_norm": 4.694036960601807, "learning_rate": 4.582148737860614e-06, "loss": 0.3473, "step": 6119 }, { "epoch": 0.4181184668989547, "grad_norm": 4.432071685791016, "learning_rate": 4.58141216062841e-06, "loss": 0.2859, "step": 6120 }, { "epoch": 0.4181867869098859, "grad_norm": 4.424864768981934, "learning_rate": 4.5806755304423214e-06, "loss": 0.3218, "step": 6121 }, { "epoch": 0.41825510692081713, "grad_norm": 3.8154659271240234, "learning_rate": 4.579938847338418e-06, "loss": 0.4469, "step": 6122 }, { "epoch": 0.4183234269317483, "grad_norm": 4.394642353057861, "learning_rate": 4.579202111352775e-06, "loss": 0.4196, "step": 6123 }, { "epoch": 0.4183917469426795, "grad_norm": 5.182959079742432, "learning_rate": 4.578465322521466e-06, "loss": 0.4495, "step": 6124 }, { "epoch": 0.4184600669536107, "grad_norm": 3.8354909420013428, "learning_rate": 4.5777284808805714e-06, "loss": 0.3664, "step": 6125 }, { "epoch": 0.4185283869645419, "grad_norm": 4.638780117034912, "learning_rate": 4.5769915864661715e-06, "loss": 0.4093, "step": 6126 }, { "epoch": 0.41859670697547313, "grad_norm": 2.8993136882781982, "learning_rate": 4.57625463931435e-06, "loss": 0.2935, "step": 6127 }, { "epoch": 0.41866502698640434, "grad_norm": 3.491058588027954, "learning_rate": 4.575517639461194e-06, "loss": 0.3406, "step": 6128 }, { "epoch": 0.4187333469973355, "grad_norm": 4.139122486114502, "learning_rate": 4.574780586942792e-06, "loss": 0.4858, "step": 6129 }, { "epoch": 0.4188016670082667, "grad_norm": 4.4255194664001465, "learning_rate": 4.574043481795237e-06, "loss": 0.3291, "step": 6130 }, { "epoch": 0.4188699870191979, "grad_norm": 3.0836338996887207, "learning_rate": 4.5733063240546205e-06, "loss": 0.3272, "step": 6131 }, { "epoch": 0.41893830703012913, "grad_norm": 4.220668315887451, "learning_rate": 4.57256911375704e-06, "loss": 0.4188, "step": 6132 }, { "epoch": 0.41900662704106034, "grad_norm": 6.0311479568481445, "learning_rate": 4.571831850938595e-06, "loss": 0.37, "step": 6133 }, { "epoch": 0.41907494705199155, "grad_norm": 3.629519462585449, "learning_rate": 4.571094535635388e-06, "loss": 0.2972, "step": 6134 }, { "epoch": 0.4191432670629227, "grad_norm": 3.384425163269043, "learning_rate": 4.570357167883522e-06, "loss": 0.3381, "step": 6135 }, { "epoch": 0.4192115870738539, "grad_norm": 4.096510410308838, "learning_rate": 4.5696197477191056e-06, "loss": 0.3376, "step": 6136 }, { "epoch": 0.41927990708478513, "grad_norm": 3.2335197925567627, "learning_rate": 4.568882275178247e-06, "loss": 0.3194, "step": 6137 }, { "epoch": 0.41934822709571634, "grad_norm": 4.923239707946777, "learning_rate": 4.568144750297058e-06, "loss": 0.3891, "step": 6138 }, { "epoch": 0.41941654710664755, "grad_norm": 4.092127323150635, "learning_rate": 4.567407173111654e-06, "loss": 0.4035, "step": 6139 }, { "epoch": 0.41948486711757876, "grad_norm": 3.291619300842285, "learning_rate": 4.566669543658154e-06, "loss": 0.3698, "step": 6140 }, { "epoch": 0.4195531871285099, "grad_norm": 3.14884877204895, "learning_rate": 4.565931861972673e-06, "loss": 0.3343, "step": 6141 }, { "epoch": 0.41962150713944113, "grad_norm": 3.071791410446167, "learning_rate": 4.565194128091339e-06, "loss": 0.2845, "step": 6142 }, { "epoch": 0.41968982715037234, "grad_norm": 2.8842673301696777, "learning_rate": 4.564456342050273e-06, "loss": 0.3029, "step": 6143 }, { "epoch": 0.41975814716130355, "grad_norm": 4.087090492248535, "learning_rate": 4.5637185038856025e-06, "loss": 0.3761, "step": 6144 }, { "epoch": 0.41982646717223476, "grad_norm": 4.811781883239746, "learning_rate": 4.562980613633458e-06, "loss": 0.3526, "step": 6145 }, { "epoch": 0.419894787183166, "grad_norm": 3.900906801223755, "learning_rate": 4.562242671329974e-06, "loss": 0.3924, "step": 6146 }, { "epoch": 0.41996310719409713, "grad_norm": 4.5689873695373535, "learning_rate": 4.5615046770112835e-06, "loss": 0.3675, "step": 6147 }, { "epoch": 0.42003142720502834, "grad_norm": 4.4257917404174805, "learning_rate": 4.560766630713524e-06, "loss": 0.4236, "step": 6148 }, { "epoch": 0.42009974721595955, "grad_norm": 3.075932025909424, "learning_rate": 4.560028532472838e-06, "loss": 0.2963, "step": 6149 }, { "epoch": 0.42016806722689076, "grad_norm": 4.606650352478027, "learning_rate": 4.559290382325366e-06, "loss": 0.4206, "step": 6150 }, { "epoch": 0.420236387237822, "grad_norm": 3.728989839553833, "learning_rate": 4.558552180307253e-06, "loss": 0.3509, "step": 6151 }, { "epoch": 0.4203047072487532, "grad_norm": 4.610903263092041, "learning_rate": 4.557813926454647e-06, "loss": 0.2802, "step": 6152 }, { "epoch": 0.42037302725968434, "grad_norm": 3.5911850929260254, "learning_rate": 4.5570756208037005e-06, "loss": 0.3581, "step": 6153 }, { "epoch": 0.42044134727061555, "grad_norm": 3.487294912338257, "learning_rate": 4.556337263390565e-06, "loss": 0.3324, "step": 6154 }, { "epoch": 0.42050966728154676, "grad_norm": 4.615320682525635, "learning_rate": 4.555598854251394e-06, "loss": 0.3406, "step": 6155 }, { "epoch": 0.420577987292478, "grad_norm": 3.057684898376465, "learning_rate": 4.554860393422349e-06, "loss": 0.2742, "step": 6156 }, { "epoch": 0.4206463073034092, "grad_norm": 3.115147590637207, "learning_rate": 4.554121880939587e-06, "loss": 0.3522, "step": 6157 }, { "epoch": 0.4207146273143404, "grad_norm": 3.699528932571411, "learning_rate": 4.553383316839273e-06, "loss": 0.3366, "step": 6158 }, { "epoch": 0.42078294732527155, "grad_norm": 3.5188956260681152, "learning_rate": 4.552644701157573e-06, "loss": 0.3718, "step": 6159 }, { "epoch": 0.42085126733620276, "grad_norm": 3.144881010055542, "learning_rate": 4.551906033930654e-06, "loss": 0.3547, "step": 6160 }, { "epoch": 0.42091958734713397, "grad_norm": 4.156063556671143, "learning_rate": 4.551167315194686e-06, "loss": 0.474, "step": 6161 }, { "epoch": 0.4209879073580652, "grad_norm": 4.9320068359375, "learning_rate": 4.550428544985844e-06, "loss": 0.4054, "step": 6162 }, { "epoch": 0.4210562273689964, "grad_norm": 3.3242592811584473, "learning_rate": 4.549689723340301e-06, "loss": 0.3552, "step": 6163 }, { "epoch": 0.4211245473799276, "grad_norm": 3.3507471084594727, "learning_rate": 4.5489508502942374e-06, "loss": 0.3712, "step": 6164 }, { "epoch": 0.42119286739085876, "grad_norm": 3.429124116897583, "learning_rate": 4.548211925883833e-06, "loss": 0.2232, "step": 6165 }, { "epoch": 0.42126118740178997, "grad_norm": 2.84952974319458, "learning_rate": 4.547472950145272e-06, "loss": 0.2406, "step": 6166 }, { "epoch": 0.4213295074127212, "grad_norm": 4.181464672088623, "learning_rate": 4.546733923114737e-06, "loss": 0.4051, "step": 6167 }, { "epoch": 0.4213978274236524, "grad_norm": 5.04991340637207, "learning_rate": 4.545994844828419e-06, "loss": 0.4, "step": 6168 }, { "epoch": 0.4214661474345836, "grad_norm": 4.63212776184082, "learning_rate": 4.545255715322509e-06, "loss": 0.2916, "step": 6169 }, { "epoch": 0.4215344674455148, "grad_norm": 3.7615373134613037, "learning_rate": 4.544516534633199e-06, "loss": 0.3263, "step": 6170 }, { "epoch": 0.42160278745644597, "grad_norm": 4.822882652282715, "learning_rate": 4.543777302796684e-06, "loss": 0.3293, "step": 6171 }, { "epoch": 0.4216711074673772, "grad_norm": 3.1964645385742188, "learning_rate": 4.543038019849164e-06, "loss": 0.2739, "step": 6172 }, { "epoch": 0.4217394274783084, "grad_norm": 5.816633701324463, "learning_rate": 4.54229868582684e-06, "loss": 0.3646, "step": 6173 }, { "epoch": 0.4218077474892396, "grad_norm": 3.7341227531433105, "learning_rate": 4.541559300765911e-06, "loss": 0.3011, "step": 6174 }, { "epoch": 0.4218760675001708, "grad_norm": 4.171807765960693, "learning_rate": 4.540819864702588e-06, "loss": 0.3749, "step": 6175 }, { "epoch": 0.421944387511102, "grad_norm": 5.133558750152588, "learning_rate": 4.540080377673077e-06, "loss": 0.477, "step": 6176 }, { "epoch": 0.4220127075220332, "grad_norm": 4.245529651641846, "learning_rate": 4.539340839713589e-06, "loss": 0.386, "step": 6177 }, { "epoch": 0.4220810275329644, "grad_norm": 3.443188190460205, "learning_rate": 4.538601250860335e-06, "loss": 0.2628, "step": 6178 }, { "epoch": 0.4221493475438956, "grad_norm": 3.273416519165039, "learning_rate": 4.537861611149535e-06, "loss": 0.3312, "step": 6179 }, { "epoch": 0.4222176675548268, "grad_norm": 5.161772727966309, "learning_rate": 4.537121920617404e-06, "loss": 0.3962, "step": 6180 }, { "epoch": 0.422285987565758, "grad_norm": 4.2114057540893555, "learning_rate": 4.536382179300163e-06, "loss": 0.4425, "step": 6181 }, { "epoch": 0.42235430757668924, "grad_norm": 3.930752754211426, "learning_rate": 4.535642387234037e-06, "loss": 0.3503, "step": 6182 }, { "epoch": 0.4224226275876204, "grad_norm": 4.473230838775635, "learning_rate": 4.534902544455251e-06, "loss": 0.3108, "step": 6183 }, { "epoch": 0.4224909475985516, "grad_norm": 4.449361801147461, "learning_rate": 4.53416265100003e-06, "loss": 0.4233, "step": 6184 }, { "epoch": 0.4225592676094828, "grad_norm": 2.738440990447998, "learning_rate": 4.533422706904609e-06, "loss": 0.2139, "step": 6185 }, { "epoch": 0.422627587620414, "grad_norm": 3.6636428833007812, "learning_rate": 4.532682712205218e-06, "loss": 0.3726, "step": 6186 }, { "epoch": 0.42269590763134524, "grad_norm": 4.53972864151001, "learning_rate": 4.531942666938095e-06, "loss": 0.4745, "step": 6187 }, { "epoch": 0.42276422764227645, "grad_norm": 3.5307955741882324, "learning_rate": 4.531202571139476e-06, "loss": 0.3217, "step": 6188 }, { "epoch": 0.4228325476532076, "grad_norm": 4.015519618988037, "learning_rate": 4.530462424845603e-06, "loss": 0.3725, "step": 6189 }, { "epoch": 0.4229008676641388, "grad_norm": 2.8872432708740234, "learning_rate": 4.5297222280927185e-06, "loss": 0.3069, "step": 6190 }, { "epoch": 0.42296918767507, "grad_norm": 3.803206443786621, "learning_rate": 4.528981980917068e-06, "loss": 0.3486, "step": 6191 }, { "epoch": 0.42303750768600124, "grad_norm": 3.51564884185791, "learning_rate": 4.528241683354899e-06, "loss": 0.3487, "step": 6192 }, { "epoch": 0.42310582769693245, "grad_norm": 2.7894363403320312, "learning_rate": 4.527501335442462e-06, "loss": 0.3675, "step": 6193 }, { "epoch": 0.42317414770786366, "grad_norm": 3.65130352973938, "learning_rate": 4.52676093721601e-06, "loss": 0.372, "step": 6194 }, { "epoch": 0.4232424677187948, "grad_norm": 3.3544681072235107, "learning_rate": 4.526020488711799e-06, "loss": 0.3405, "step": 6195 }, { "epoch": 0.423310787729726, "grad_norm": 4.324575901031494, "learning_rate": 4.525279989966086e-06, "loss": 0.4082, "step": 6196 }, { "epoch": 0.42337910774065723, "grad_norm": 3.4469237327575684, "learning_rate": 4.524539441015132e-06, "loss": 0.3201, "step": 6197 }, { "epoch": 0.42344742775158845, "grad_norm": 4.237051486968994, "learning_rate": 4.523798841895199e-06, "loss": 0.4217, "step": 6198 }, { "epoch": 0.42351574776251966, "grad_norm": 3.4772109985351562, "learning_rate": 4.5230581926425515e-06, "loss": 0.4043, "step": 6199 }, { "epoch": 0.42358406777345087, "grad_norm": 4.677580833435059, "learning_rate": 4.522317493293459e-06, "loss": 0.3471, "step": 6200 }, { "epoch": 0.423652387784382, "grad_norm": 5.254188537597656, "learning_rate": 4.521576743884191e-06, "loss": 0.4712, "step": 6201 }, { "epoch": 0.42372070779531323, "grad_norm": 3.6909868717193604, "learning_rate": 4.520835944451018e-06, "loss": 0.3132, "step": 6202 }, { "epoch": 0.42378902780624444, "grad_norm": 3.9463300704956055, "learning_rate": 4.5200950950302186e-06, "loss": 0.3633, "step": 6203 }, { "epoch": 0.42385734781717566, "grad_norm": 3.125269651412964, "learning_rate": 4.519354195658067e-06, "loss": 0.3155, "step": 6204 }, { "epoch": 0.42392566782810687, "grad_norm": 3.662342071533203, "learning_rate": 4.518613246370844e-06, "loss": 0.3506, "step": 6205 }, { "epoch": 0.4239939878390381, "grad_norm": 4.058374404907227, "learning_rate": 4.517872247204833e-06, "loss": 0.3129, "step": 6206 }, { "epoch": 0.42406230784996923, "grad_norm": 4.142981052398682, "learning_rate": 4.517131198196318e-06, "loss": 0.3837, "step": 6207 }, { "epoch": 0.42413062786090044, "grad_norm": 3.562633752822876, "learning_rate": 4.516390099381585e-06, "loss": 0.2518, "step": 6208 }, { "epoch": 0.42419894787183166, "grad_norm": 3.2984707355499268, "learning_rate": 4.515648950796927e-06, "loss": 0.3168, "step": 6209 }, { "epoch": 0.42426726788276287, "grad_norm": 3.258262872695923, "learning_rate": 4.514907752478632e-06, "loss": 0.2787, "step": 6210 }, { "epoch": 0.4243355878936941, "grad_norm": 5.1809916496276855, "learning_rate": 4.514166504462997e-06, "loss": 0.2773, "step": 6211 }, { "epoch": 0.4244039079046253, "grad_norm": 4.508866310119629, "learning_rate": 4.513425206786319e-06, "loss": 0.3538, "step": 6212 }, { "epoch": 0.42447222791555644, "grad_norm": 4.327610492706299, "learning_rate": 4.5126838594848976e-06, "loss": 0.3772, "step": 6213 }, { "epoch": 0.42454054792648765, "grad_norm": 4.795812129974365, "learning_rate": 4.511942462595033e-06, "loss": 0.3991, "step": 6214 }, { "epoch": 0.42460886793741887, "grad_norm": 3.7273199558258057, "learning_rate": 4.511201016153029e-06, "loss": 0.2998, "step": 6215 }, { "epoch": 0.4246771879483501, "grad_norm": 4.712002277374268, "learning_rate": 4.5104595201951945e-06, "loss": 0.4209, "step": 6216 }, { "epoch": 0.4247455079592813, "grad_norm": 3.8418257236480713, "learning_rate": 4.509717974757838e-06, "loss": 0.3604, "step": 6217 }, { "epoch": 0.4248138279702125, "grad_norm": 3.957170009613037, "learning_rate": 4.50897637987727e-06, "loss": 0.4056, "step": 6218 }, { "epoch": 0.42488214798114365, "grad_norm": 4.620532035827637, "learning_rate": 4.508234735589806e-06, "loss": 0.3517, "step": 6219 }, { "epoch": 0.42495046799207487, "grad_norm": 4.264073848724365, "learning_rate": 4.50749304193176e-06, "loss": 0.3353, "step": 6220 }, { "epoch": 0.4250187880030061, "grad_norm": 4.174532413482666, "learning_rate": 4.506751298939453e-06, "loss": 0.3808, "step": 6221 }, { "epoch": 0.4250871080139373, "grad_norm": 4.128157615661621, "learning_rate": 4.506009506649205e-06, "loss": 0.4264, "step": 6222 }, { "epoch": 0.4251554280248685, "grad_norm": 3.944690227508545, "learning_rate": 4.505267665097341e-06, "loss": 0.3175, "step": 6223 }, { "epoch": 0.4252237480357997, "grad_norm": 4.34605598449707, "learning_rate": 4.504525774320184e-06, "loss": 0.3173, "step": 6224 }, { "epoch": 0.42529206804673086, "grad_norm": 4.312792778015137, "learning_rate": 4.503783834354066e-06, "loss": 0.4306, "step": 6225 }, { "epoch": 0.4253603880576621, "grad_norm": 3.4291296005249023, "learning_rate": 4.503041845235316e-06, "loss": 0.2533, "step": 6226 }, { "epoch": 0.4254287080685933, "grad_norm": 4.596522808074951, "learning_rate": 4.502299807000267e-06, "loss": 0.3721, "step": 6227 }, { "epoch": 0.4254970280795245, "grad_norm": 4.061675071716309, "learning_rate": 4.5015577196852554e-06, "loss": 0.382, "step": 6228 }, { "epoch": 0.4255653480904557, "grad_norm": 4.830687522888184, "learning_rate": 4.500815583326619e-06, "loss": 0.3333, "step": 6229 }, { "epoch": 0.4256336681013869, "grad_norm": 5.157603740692139, "learning_rate": 4.500073397960698e-06, "loss": 0.413, "step": 6230 }, { "epoch": 0.4257019881123181, "grad_norm": 4.080501079559326, "learning_rate": 4.4993311636238355e-06, "loss": 0.2847, "step": 6231 }, { "epoch": 0.4257703081232493, "grad_norm": 4.026567459106445, "learning_rate": 4.498588880352377e-06, "loss": 0.3283, "step": 6232 }, { "epoch": 0.4258386281341805, "grad_norm": 4.700159549713135, "learning_rate": 4.49784654818267e-06, "loss": 0.3987, "step": 6233 }, { "epoch": 0.4259069481451117, "grad_norm": 3.494776487350464, "learning_rate": 4.497104167151063e-06, "loss": 0.3887, "step": 6234 }, { "epoch": 0.4259752681560429, "grad_norm": 3.432251214981079, "learning_rate": 4.496361737293911e-06, "loss": 0.3242, "step": 6235 }, { "epoch": 0.42604358816697413, "grad_norm": 3.499603271484375, "learning_rate": 4.495619258647568e-06, "loss": 0.2877, "step": 6236 }, { "epoch": 0.4261119081779053, "grad_norm": 4.477941513061523, "learning_rate": 4.49487673124839e-06, "loss": 0.4525, "step": 6237 }, { "epoch": 0.4261802281888365, "grad_norm": 3.1039209365844727, "learning_rate": 4.494134155132737e-06, "loss": 0.2881, "step": 6238 }, { "epoch": 0.4262485481997677, "grad_norm": 3.4759111404418945, "learning_rate": 4.493391530336973e-06, "loss": 0.3101, "step": 6239 }, { "epoch": 0.4263168682106989, "grad_norm": 4.674259662628174, "learning_rate": 4.49264885689746e-06, "loss": 0.3997, "step": 6240 }, { "epoch": 0.42638518822163013, "grad_norm": 3.6946909427642822, "learning_rate": 4.491906134850565e-06, "loss": 0.253, "step": 6241 }, { "epoch": 0.42645350823256134, "grad_norm": 3.5080277919769287, "learning_rate": 4.491163364232658e-06, "loss": 0.2445, "step": 6242 }, { "epoch": 0.4265218282434925, "grad_norm": 3.802396059036255, "learning_rate": 4.49042054508011e-06, "loss": 0.3128, "step": 6243 }, { "epoch": 0.4265901482544237, "grad_norm": 3.959841012954712, "learning_rate": 4.4896776774292956e-06, "loss": 0.3318, "step": 6244 }, { "epoch": 0.4266584682653549, "grad_norm": 4.11704683303833, "learning_rate": 4.48893476131659e-06, "loss": 0.3693, "step": 6245 }, { "epoch": 0.42672678827628613, "grad_norm": 4.943748474121094, "learning_rate": 4.488191796778373e-06, "loss": 0.4417, "step": 6246 }, { "epoch": 0.42679510828721734, "grad_norm": 5.805783748626709, "learning_rate": 4.487448783851025e-06, "loss": 0.3747, "step": 6247 }, { "epoch": 0.42686342829814855, "grad_norm": 3.207944869995117, "learning_rate": 4.486705722570928e-06, "loss": 0.2495, "step": 6248 }, { "epoch": 0.4269317483090797, "grad_norm": 3.4975638389587402, "learning_rate": 4.48596261297447e-06, "loss": 0.281, "step": 6249 }, { "epoch": 0.4270000683200109, "grad_norm": 4.493081092834473, "learning_rate": 4.485219455098039e-06, "loss": 0.3395, "step": 6250 }, { "epoch": 0.42706838833094213, "grad_norm": 5.0575408935546875, "learning_rate": 4.484476248978023e-06, "loss": 0.395, "step": 6251 }, { "epoch": 0.42713670834187334, "grad_norm": 4.6967668533325195, "learning_rate": 4.483732994650818e-06, "loss": 0.3518, "step": 6252 }, { "epoch": 0.42720502835280455, "grad_norm": 4.028919219970703, "learning_rate": 4.482989692152816e-06, "loss": 0.4174, "step": 6253 }, { "epoch": 0.42727334836373576, "grad_norm": 6.282420635223389, "learning_rate": 4.482246341520417e-06, "loss": 0.4721, "step": 6254 }, { "epoch": 0.4273416683746669, "grad_norm": 4.148308753967285, "learning_rate": 4.48150294279002e-06, "loss": 0.3106, "step": 6255 }, { "epoch": 0.4274099883855981, "grad_norm": 3.8716557025909424, "learning_rate": 4.480759495998028e-06, "loss": 0.3823, "step": 6256 }, { "epoch": 0.42747830839652934, "grad_norm": 4.717145919799805, "learning_rate": 4.480016001180844e-06, "loss": 0.2976, "step": 6257 }, { "epoch": 0.42754662840746055, "grad_norm": 3.546483278274536, "learning_rate": 4.479272458374877e-06, "loss": 0.3328, "step": 6258 }, { "epoch": 0.42761494841839176, "grad_norm": 3.6984806060791016, "learning_rate": 4.478528867616535e-06, "loss": 0.3247, "step": 6259 }, { "epoch": 0.42768326842932297, "grad_norm": 3.6118674278259277, "learning_rate": 4.47778522894223e-06, "loss": 0.3253, "step": 6260 }, { "epoch": 0.4277515884402541, "grad_norm": 5.03029727935791, "learning_rate": 4.477041542388375e-06, "loss": 0.3815, "step": 6261 }, { "epoch": 0.42781990845118534, "grad_norm": 3.942239761352539, "learning_rate": 4.4762978079913886e-06, "loss": 0.3419, "step": 6262 }, { "epoch": 0.42788822846211655, "grad_norm": 4.047852039337158, "learning_rate": 4.475554025787688e-06, "loss": 0.474, "step": 6263 }, { "epoch": 0.42795654847304776, "grad_norm": 3.55047607421875, "learning_rate": 4.474810195813694e-06, "loss": 0.4174, "step": 6264 }, { "epoch": 0.42802486848397897, "grad_norm": 3.599376916885376, "learning_rate": 4.47406631810583e-06, "loss": 0.3401, "step": 6265 }, { "epoch": 0.4280931884949102, "grad_norm": 5.925895690917969, "learning_rate": 4.473322392700523e-06, "loss": 0.5076, "step": 6266 }, { "epoch": 0.42816150850584134, "grad_norm": 4.949873924255371, "learning_rate": 4.4725784196342e-06, "loss": 0.4975, "step": 6267 }, { "epoch": 0.42822982851677255, "grad_norm": 3.254075765609741, "learning_rate": 4.471834398943293e-06, "loss": 0.315, "step": 6268 }, { "epoch": 0.42829814852770376, "grad_norm": 3.0930871963500977, "learning_rate": 4.471090330664231e-06, "loss": 0.3237, "step": 6269 }, { "epoch": 0.42836646853863497, "grad_norm": 5.150807857513428, "learning_rate": 4.470346214833453e-06, "loss": 0.4027, "step": 6270 }, { "epoch": 0.4284347885495662, "grad_norm": 3.2735002040863037, "learning_rate": 4.469602051487393e-06, "loss": 0.3205, "step": 6271 }, { "epoch": 0.4285031085604974, "grad_norm": 4.253270626068115, "learning_rate": 4.468857840662495e-06, "loss": 0.3788, "step": 6272 }, { "epoch": 0.42857142857142855, "grad_norm": 2.77630877494812, "learning_rate": 4.468113582395197e-06, "loss": 0.253, "step": 6273 }, { "epoch": 0.42863974858235976, "grad_norm": 3.902312994003296, "learning_rate": 4.4673692767219445e-06, "loss": 0.2935, "step": 6274 }, { "epoch": 0.42870806859329097, "grad_norm": 5.703765392303467, "learning_rate": 4.4666249236791845e-06, "loss": 0.594, "step": 6275 }, { "epoch": 0.4287763886042222, "grad_norm": 4.105781078338623, "learning_rate": 4.465880523303367e-06, "loss": 0.374, "step": 6276 }, { "epoch": 0.4288447086151534, "grad_norm": 3.169947862625122, "learning_rate": 4.465136075630941e-06, "loss": 0.2833, "step": 6277 }, { "epoch": 0.4289130286260846, "grad_norm": 3.8141896724700928, "learning_rate": 4.464391580698363e-06, "loss": 0.3394, "step": 6278 }, { "epoch": 0.42898134863701576, "grad_norm": 3.2168731689453125, "learning_rate": 4.463647038542086e-06, "loss": 0.3194, "step": 6279 }, { "epoch": 0.42904966864794697, "grad_norm": 3.7831127643585205, "learning_rate": 4.462902449198572e-06, "loss": 0.439, "step": 6280 }, { "epoch": 0.4291179886588782, "grad_norm": 4.467616081237793, "learning_rate": 4.462157812704276e-06, "loss": 0.3351, "step": 6281 }, { "epoch": 0.4291863086698094, "grad_norm": 4.0609846115112305, "learning_rate": 4.461413129095666e-06, "loss": 0.3429, "step": 6282 }, { "epoch": 0.4292546286807406, "grad_norm": 2.7034595012664795, "learning_rate": 4.460668398409205e-06, "loss": 0.2423, "step": 6283 }, { "epoch": 0.4293229486916718, "grad_norm": 2.928086996078491, "learning_rate": 4.45992362068136e-06, "loss": 0.2407, "step": 6284 }, { "epoch": 0.42939126870260297, "grad_norm": 3.7764475345611572, "learning_rate": 4.459178795948603e-06, "loss": 0.3524, "step": 6285 }, { "epoch": 0.4294595887135342, "grad_norm": 3.3183224201202393, "learning_rate": 4.458433924247404e-06, "loss": 0.2571, "step": 6286 }, { "epoch": 0.4295279087244654, "grad_norm": 4.398226261138916, "learning_rate": 4.457689005614237e-06, "loss": 0.3713, "step": 6287 }, { "epoch": 0.4295962287353966, "grad_norm": 4.255239486694336, "learning_rate": 4.456944040085582e-06, "loss": 0.2996, "step": 6288 }, { "epoch": 0.4296645487463278, "grad_norm": 4.3415961265563965, "learning_rate": 4.456199027697913e-06, "loss": 0.3002, "step": 6289 }, { "epoch": 0.429732868757259, "grad_norm": 3.606062650680542, "learning_rate": 4.4554539684877155e-06, "loss": 0.41, "step": 6290 }, { "epoch": 0.4298011887681902, "grad_norm": 4.6488871574401855, "learning_rate": 4.4547088624914714e-06, "loss": 0.3946, "step": 6291 }, { "epoch": 0.4298695087791214, "grad_norm": 3.7095232009887695, "learning_rate": 4.453963709745667e-06, "loss": 0.3465, "step": 6292 }, { "epoch": 0.4299378287900526, "grad_norm": 3.0371487140655518, "learning_rate": 4.4532185102867905e-06, "loss": 0.3262, "step": 6293 }, { "epoch": 0.4300061488009838, "grad_norm": 4.369150161743164, "learning_rate": 4.4524732641513315e-06, "loss": 0.3362, "step": 6294 }, { "epoch": 0.430074468811915, "grad_norm": 3.1133813858032227, "learning_rate": 4.451727971375782e-06, "loss": 0.2861, "step": 6295 }, { "epoch": 0.43014278882284623, "grad_norm": 3.429049491882324, "learning_rate": 4.450982631996641e-06, "loss": 0.2748, "step": 6296 }, { "epoch": 0.4302111088337774, "grad_norm": 4.539577007293701, "learning_rate": 4.450237246050399e-06, "loss": 0.3933, "step": 6297 }, { "epoch": 0.4302794288447086, "grad_norm": 4.362136363983154, "learning_rate": 4.4494918135735635e-06, "loss": 0.3762, "step": 6298 }, { "epoch": 0.4303477488556398, "grad_norm": 4.941337585449219, "learning_rate": 4.448746334602631e-06, "loss": 0.3597, "step": 6299 }, { "epoch": 0.430416068866571, "grad_norm": 5.19799280166626, "learning_rate": 4.448000809174107e-06, "loss": 0.4198, "step": 6300 }, { "epoch": 0.43048438887750223, "grad_norm": 4.28462028503418, "learning_rate": 4.447255237324497e-06, "loss": 0.2798, "step": 6301 }, { "epoch": 0.43055270888843344, "grad_norm": 3.4017858505249023, "learning_rate": 4.446509619090313e-06, "loss": 0.3217, "step": 6302 }, { "epoch": 0.4306210288993646, "grad_norm": 3.1410908699035645, "learning_rate": 4.445763954508062e-06, "loss": 0.4224, "step": 6303 }, { "epoch": 0.4306893489102958, "grad_norm": 4.574371814727783, "learning_rate": 4.44501824361426e-06, "loss": 0.3408, "step": 6304 }, { "epoch": 0.430757668921227, "grad_norm": 3.6957356929779053, "learning_rate": 4.44427248644542e-06, "loss": 0.2817, "step": 6305 }, { "epoch": 0.43082598893215823, "grad_norm": 3.66221022605896, "learning_rate": 4.443526683038062e-06, "loss": 0.2994, "step": 6306 }, { "epoch": 0.43089430894308944, "grad_norm": 3.6297688484191895, "learning_rate": 4.442780833428704e-06, "loss": 0.3946, "step": 6307 }, { "epoch": 0.43096262895402065, "grad_norm": 3.8651928901672363, "learning_rate": 4.4420349376538705e-06, "loss": 0.34, "step": 6308 }, { "epoch": 0.4310309489649518, "grad_norm": 5.3123908042907715, "learning_rate": 4.441288995750085e-06, "loss": 0.382, "step": 6309 }, { "epoch": 0.431099268975883, "grad_norm": 3.400104522705078, "learning_rate": 4.440543007753873e-06, "loss": 0.3279, "step": 6310 }, { "epoch": 0.43116758898681423, "grad_norm": 2.621371269226074, "learning_rate": 4.439796973701766e-06, "loss": 0.305, "step": 6311 }, { "epoch": 0.43123590899774544, "grad_norm": 3.7220585346221924, "learning_rate": 4.439050893630293e-06, "loss": 0.3408, "step": 6312 }, { "epoch": 0.43130422900867665, "grad_norm": 3.690514087677002, "learning_rate": 4.4383047675759896e-06, "loss": 0.2529, "step": 6313 }, { "epoch": 0.43137254901960786, "grad_norm": 3.8826076984405518, "learning_rate": 4.43755859557539e-06, "loss": 0.2883, "step": 6314 }, { "epoch": 0.431440869030539, "grad_norm": 5.191251277923584, "learning_rate": 4.4368123776650334e-06, "loss": 0.4119, "step": 6315 }, { "epoch": 0.43150918904147023, "grad_norm": 4.394298076629639, "learning_rate": 4.43606611388146e-06, "loss": 0.4971, "step": 6316 }, { "epoch": 0.43157750905240144, "grad_norm": 3.367286205291748, "learning_rate": 4.4353198042612105e-06, "loss": 0.3888, "step": 6317 }, { "epoch": 0.43164582906333265, "grad_norm": 3.991060972213745, "learning_rate": 4.434573448840831e-06, "loss": 0.3585, "step": 6318 }, { "epoch": 0.43171414907426386, "grad_norm": 3.932443857192993, "learning_rate": 4.433827047656871e-06, "loss": 0.4727, "step": 6319 }, { "epoch": 0.4317824690851951, "grad_norm": 4.218954563140869, "learning_rate": 4.433080600745875e-06, "loss": 0.302, "step": 6320 }, { "epoch": 0.43185078909612623, "grad_norm": 4.562566757202148, "learning_rate": 4.4323341081444e-06, "loss": 0.4428, "step": 6321 }, { "epoch": 0.43191910910705744, "grad_norm": 4.4192705154418945, "learning_rate": 4.431587569888994e-06, "loss": 0.312, "step": 6322 }, { "epoch": 0.43198742911798865, "grad_norm": 3.6828513145446777, "learning_rate": 4.430840986016217e-06, "loss": 0.4314, "step": 6323 }, { "epoch": 0.43205574912891986, "grad_norm": 3.4926908016204834, "learning_rate": 4.430094356562626e-06, "loss": 0.3027, "step": 6324 }, { "epoch": 0.4321240691398511, "grad_norm": 4.226521968841553, "learning_rate": 4.429347681564781e-06, "loss": 0.3366, "step": 6325 }, { "epoch": 0.4321923891507823, "grad_norm": 5.003610610961914, "learning_rate": 4.4286009610592455e-06, "loss": 0.3582, "step": 6326 }, { "epoch": 0.43226070916171344, "grad_norm": 2.7428343296051025, "learning_rate": 4.427854195082584e-06, "loss": 0.3008, "step": 6327 }, { "epoch": 0.43232902917264465, "grad_norm": 4.479712009429932, "learning_rate": 4.427107383671364e-06, "loss": 0.4317, "step": 6328 }, { "epoch": 0.43239734918357586, "grad_norm": 4.052872657775879, "learning_rate": 4.426360526862155e-06, "loss": 0.3448, "step": 6329 }, { "epoch": 0.4324656691945071, "grad_norm": 4.100121021270752, "learning_rate": 4.425613624691528e-06, "loss": 0.4311, "step": 6330 }, { "epoch": 0.4325339892054383, "grad_norm": 3.7537310123443604, "learning_rate": 4.424866677196057e-06, "loss": 0.3067, "step": 6331 }, { "epoch": 0.4326023092163695, "grad_norm": 4.520216464996338, "learning_rate": 4.424119684412317e-06, "loss": 0.3728, "step": 6332 }, { "epoch": 0.43267062922730065, "grad_norm": 2.8104445934295654, "learning_rate": 4.423372646376888e-06, "loss": 0.3008, "step": 6333 }, { "epoch": 0.43273894923823186, "grad_norm": 4.038681507110596, "learning_rate": 4.4226255631263505e-06, "loss": 0.3992, "step": 6334 }, { "epoch": 0.4328072692491631, "grad_norm": 3.5217204093933105, "learning_rate": 4.421878434697286e-06, "loss": 0.283, "step": 6335 }, { "epoch": 0.4328755892600943, "grad_norm": 3.933466672897339, "learning_rate": 4.4211312611262804e-06, "loss": 0.359, "step": 6336 }, { "epoch": 0.4329439092710255, "grad_norm": 4.445262432098389, "learning_rate": 4.42038404244992e-06, "loss": 0.3551, "step": 6337 }, { "epoch": 0.4330122292819567, "grad_norm": 3.8528025150299072, "learning_rate": 4.419636778704794e-06, "loss": 0.3513, "step": 6338 }, { "epoch": 0.43308054929288786, "grad_norm": 3.7273242473602295, "learning_rate": 4.418889469927495e-06, "loss": 0.3046, "step": 6339 }, { "epoch": 0.4331488693038191, "grad_norm": 3.9004459381103516, "learning_rate": 4.418142116154615e-06, "loss": 0.3583, "step": 6340 }, { "epoch": 0.4332171893147503, "grad_norm": 4.256470680236816, "learning_rate": 4.417394717422752e-06, "loss": 0.4041, "step": 6341 }, { "epoch": 0.4332855093256815, "grad_norm": 3.5624735355377197, "learning_rate": 4.416647273768503e-06, "loss": 0.3332, "step": 6342 }, { "epoch": 0.4333538293366127, "grad_norm": 3.894394636154175, "learning_rate": 4.415899785228467e-06, "loss": 0.3912, "step": 6343 }, { "epoch": 0.4334221493475439, "grad_norm": 3.119154691696167, "learning_rate": 4.41515225183925e-06, "loss": 0.3063, "step": 6344 }, { "epoch": 0.4334904693584751, "grad_norm": 3.704878568649292, "learning_rate": 4.414404673637454e-06, "loss": 0.4123, "step": 6345 }, { "epoch": 0.4335587893694063, "grad_norm": 3.045889377593994, "learning_rate": 4.413657050659687e-06, "loss": 0.2923, "step": 6346 }, { "epoch": 0.4336271093803375, "grad_norm": 4.096713542938232, "learning_rate": 4.412909382942556e-06, "loss": 0.4707, "step": 6347 }, { "epoch": 0.4336954293912687, "grad_norm": 3.0033700466156006, "learning_rate": 4.412161670522676e-06, "loss": 0.2637, "step": 6348 }, { "epoch": 0.4337637494021999, "grad_norm": 3.9592576026916504, "learning_rate": 4.411413913436658e-06, "loss": 0.3412, "step": 6349 }, { "epoch": 0.4338320694131311, "grad_norm": 3.327359199523926, "learning_rate": 4.410666111721118e-06, "loss": 0.344, "step": 6350 }, { "epoch": 0.4339003894240623, "grad_norm": 3.1176843643188477, "learning_rate": 4.409918265412674e-06, "loss": 0.2811, "step": 6351 }, { "epoch": 0.4339687094349935, "grad_norm": 4.359914779663086, "learning_rate": 4.409170374547947e-06, "loss": 0.4046, "step": 6352 }, { "epoch": 0.4340370294459247, "grad_norm": 4.526219367980957, "learning_rate": 4.408422439163558e-06, "loss": 0.4129, "step": 6353 }, { "epoch": 0.4341053494568559, "grad_norm": 4.216360569000244, "learning_rate": 4.407674459296131e-06, "loss": 0.4112, "step": 6354 }, { "epoch": 0.4341736694677871, "grad_norm": 3.910053253173828, "learning_rate": 4.406926434982294e-06, "loss": 0.3645, "step": 6355 }, { "epoch": 0.43424198947871834, "grad_norm": 3.717975378036499, "learning_rate": 4.4061783662586745e-06, "loss": 0.3078, "step": 6356 }, { "epoch": 0.4343103094896495, "grad_norm": 4.294404983520508, "learning_rate": 4.405430253161906e-06, "loss": 0.2668, "step": 6357 }, { "epoch": 0.4343786295005807, "grad_norm": 5.95176887512207, "learning_rate": 4.404682095728617e-06, "loss": 0.3698, "step": 6358 }, { "epoch": 0.4344469495115119, "grad_norm": 4.415122985839844, "learning_rate": 4.403933893995448e-06, "loss": 0.2936, "step": 6359 }, { "epoch": 0.4345152695224431, "grad_norm": 4.041932106018066, "learning_rate": 4.403185647999033e-06, "loss": 0.3473, "step": 6360 }, { "epoch": 0.43458358953337434, "grad_norm": 3.296855926513672, "learning_rate": 4.402437357776013e-06, "loss": 0.3235, "step": 6361 }, { "epoch": 0.43465190954430555, "grad_norm": 3.643307685852051, "learning_rate": 4.401689023363028e-06, "loss": 0.4106, "step": 6362 }, { "epoch": 0.4347202295552367, "grad_norm": 4.1153645515441895, "learning_rate": 4.400940644796724e-06, "loss": 0.419, "step": 6363 }, { "epoch": 0.4347885495661679, "grad_norm": 3.7682926654815674, "learning_rate": 4.400192222113747e-06, "loss": 0.3852, "step": 6364 }, { "epoch": 0.4348568695770991, "grad_norm": 4.30622673034668, "learning_rate": 4.399443755350745e-06, "loss": 0.366, "step": 6365 }, { "epoch": 0.43492518958803034, "grad_norm": 3.39154314994812, "learning_rate": 4.398695244544366e-06, "loss": 0.3112, "step": 6366 }, { "epoch": 0.43499350959896155, "grad_norm": 2.830622434616089, "learning_rate": 4.397946689731267e-06, "loss": 0.3383, "step": 6367 }, { "epoch": 0.43506182960989276, "grad_norm": 3.807711362838745, "learning_rate": 4.3971980909481e-06, "loss": 0.33, "step": 6368 }, { "epoch": 0.4351301496208239, "grad_norm": 4.000731468200684, "learning_rate": 4.396449448231522e-06, "loss": 0.4126, "step": 6369 }, { "epoch": 0.4351984696317551, "grad_norm": 4.291138648986816, "learning_rate": 4.395700761618191e-06, "loss": 0.4326, "step": 6370 }, { "epoch": 0.43526678964268634, "grad_norm": 3.6678714752197266, "learning_rate": 4.394952031144772e-06, "loss": 0.3262, "step": 6371 }, { "epoch": 0.43533510965361755, "grad_norm": 2.5441458225250244, "learning_rate": 4.394203256847924e-06, "loss": 0.3262, "step": 6372 }, { "epoch": 0.43540342966454876, "grad_norm": 4.064444065093994, "learning_rate": 4.393454438764316e-06, "loss": 0.3756, "step": 6373 }, { "epoch": 0.43547174967547997, "grad_norm": 2.9340767860412598, "learning_rate": 4.392705576930613e-06, "loss": 0.3521, "step": 6374 }, { "epoch": 0.4355400696864111, "grad_norm": 3.6956427097320557, "learning_rate": 4.391956671383486e-06, "loss": 0.2702, "step": 6375 }, { "epoch": 0.43560838969734234, "grad_norm": 4.677330493927002, "learning_rate": 4.391207722159607e-06, "loss": 0.2798, "step": 6376 }, { "epoch": 0.43567670970827355, "grad_norm": 4.488933086395264, "learning_rate": 4.3904587292956495e-06, "loss": 0.4495, "step": 6377 }, { "epoch": 0.43574502971920476, "grad_norm": 3.005025863647461, "learning_rate": 4.389709692828292e-06, "loss": 0.2795, "step": 6378 }, { "epoch": 0.43581334973013597, "grad_norm": 4.114795207977295, "learning_rate": 4.388960612794208e-06, "loss": 0.248, "step": 6379 }, { "epoch": 0.4358816697410672, "grad_norm": 3.691802740097046, "learning_rate": 4.388211489230082e-06, "loss": 0.3291, "step": 6380 }, { "epoch": 0.43594998975199833, "grad_norm": 3.601647138595581, "learning_rate": 4.387462322172595e-06, "loss": 0.3705, "step": 6381 }, { "epoch": 0.43601830976292955, "grad_norm": 3.5533697605133057, "learning_rate": 4.386713111658433e-06, "loss": 0.3601, "step": 6382 }, { "epoch": 0.43608662977386076, "grad_norm": 3.9266998767852783, "learning_rate": 4.3859638577242815e-06, "loss": 0.3587, "step": 6383 }, { "epoch": 0.43615494978479197, "grad_norm": 3.935410499572754, "learning_rate": 4.38521456040683e-06, "loss": 0.3948, "step": 6384 }, { "epoch": 0.4362232697957232, "grad_norm": 3.7638306617736816, "learning_rate": 4.38446521974277e-06, "loss": 0.3678, "step": 6385 }, { "epoch": 0.4362915898066544, "grad_norm": 4.609556674957275, "learning_rate": 4.383715835768793e-06, "loss": 0.4283, "step": 6386 }, { "epoch": 0.43635990981758555, "grad_norm": 4.213557243347168, "learning_rate": 4.3829664085215964e-06, "loss": 0.3022, "step": 6387 }, { "epoch": 0.43642822982851676, "grad_norm": 4.45639705657959, "learning_rate": 4.382216938037878e-06, "loss": 0.2889, "step": 6388 }, { "epoch": 0.43649654983944797, "grad_norm": 3.611260414123535, "learning_rate": 4.381467424354335e-06, "loss": 0.3208, "step": 6389 }, { "epoch": 0.4365648698503792, "grad_norm": 3.887040138244629, "learning_rate": 4.38071786750767e-06, "loss": 0.4028, "step": 6390 }, { "epoch": 0.4366331898613104, "grad_norm": 4.251737594604492, "learning_rate": 4.3799682675345885e-06, "loss": 0.2942, "step": 6391 }, { "epoch": 0.4367015098722416, "grad_norm": 4.000924587249756, "learning_rate": 4.379218624471795e-06, "loss": 0.2654, "step": 6392 }, { "epoch": 0.43676982988317276, "grad_norm": 3.2073540687561035, "learning_rate": 4.3784689383559955e-06, "loss": 0.3385, "step": 6393 }, { "epoch": 0.43683814989410397, "grad_norm": 4.088994979858398, "learning_rate": 4.377719209223905e-06, "loss": 0.4847, "step": 6394 }, { "epoch": 0.4369064699050352, "grad_norm": 5.1173787117004395, "learning_rate": 4.376969437112232e-06, "loss": 0.3664, "step": 6395 }, { "epoch": 0.4369747899159664, "grad_norm": 5.060575008392334, "learning_rate": 4.3762196220576905e-06, "loss": 0.3661, "step": 6396 }, { "epoch": 0.4370431099268976, "grad_norm": 3.461535692214966, "learning_rate": 4.375469764097e-06, "loss": 0.3208, "step": 6397 }, { "epoch": 0.4371114299378288, "grad_norm": 5.571994304656982, "learning_rate": 4.374719863266878e-06, "loss": 0.3735, "step": 6398 }, { "epoch": 0.43717974994875997, "grad_norm": 4.4378275871276855, "learning_rate": 4.3739699196040436e-06, "loss": 0.4204, "step": 6399 }, { "epoch": 0.4372480699596912, "grad_norm": 4.037461280822754, "learning_rate": 4.373219933145219e-06, "loss": 0.3109, "step": 6400 }, { "epoch": 0.4373163899706224, "grad_norm": 4.285742282867432, "learning_rate": 4.3724699039271325e-06, "loss": 0.3662, "step": 6401 }, { "epoch": 0.4373847099815536, "grad_norm": 3.8884594440460205, "learning_rate": 4.371719831986508e-06, "loss": 0.4238, "step": 6402 }, { "epoch": 0.4374530299924848, "grad_norm": 4.7665486335754395, "learning_rate": 4.3709697173600766e-06, "loss": 0.3957, "step": 6403 }, { "epoch": 0.437521350003416, "grad_norm": 3.9964711666107178, "learning_rate": 4.3702195600845685e-06, "loss": 0.297, "step": 6404 }, { "epoch": 0.4375896700143472, "grad_norm": 4.097436428070068, "learning_rate": 4.369469360196716e-06, "loss": 0.3989, "step": 6405 }, { "epoch": 0.4376579900252784, "grad_norm": 3.547297477722168, "learning_rate": 4.368719117733255e-06, "loss": 0.3221, "step": 6406 }, { "epoch": 0.4377263100362096, "grad_norm": 3.768794536590576, "learning_rate": 4.367968832730924e-06, "loss": 0.4007, "step": 6407 }, { "epoch": 0.4377946300471408, "grad_norm": 3.406428098678589, "learning_rate": 4.367218505226461e-06, "loss": 0.3635, "step": 6408 }, { "epoch": 0.437862950058072, "grad_norm": 3.7752718925476074, "learning_rate": 4.366468135256608e-06, "loss": 0.3889, "step": 6409 }, { "epoch": 0.43793127006900323, "grad_norm": 4.630050182342529, "learning_rate": 4.3657177228581105e-06, "loss": 0.4282, "step": 6410 }, { "epoch": 0.4379995900799344, "grad_norm": 4.494342803955078, "learning_rate": 4.36496726806771e-06, "loss": 0.5071, "step": 6411 }, { "epoch": 0.4380679100908656, "grad_norm": 3.8767476081848145, "learning_rate": 4.364216770922158e-06, "loss": 0.3727, "step": 6412 }, { "epoch": 0.4381362301017968, "grad_norm": 4.478272914886475, "learning_rate": 4.363466231458204e-06, "loss": 0.3978, "step": 6413 }, { "epoch": 0.438204550112728, "grad_norm": 4.506673812866211, "learning_rate": 4.362715649712598e-06, "loss": 0.363, "step": 6414 }, { "epoch": 0.43827287012365923, "grad_norm": 3.895941972732544, "learning_rate": 4.361965025722095e-06, "loss": 0.359, "step": 6415 }, { "epoch": 0.43834119013459044, "grad_norm": 3.702021837234497, "learning_rate": 4.361214359523452e-06, "loss": 0.3495, "step": 6416 }, { "epoch": 0.4384095101455216, "grad_norm": 4.977489471435547, "learning_rate": 4.360463651153424e-06, "loss": 0.3446, "step": 6417 }, { "epoch": 0.4384778301564528, "grad_norm": 3.4388797283172607, "learning_rate": 4.359712900648776e-06, "loss": 0.249, "step": 6418 }, { "epoch": 0.438546150167384, "grad_norm": 5.372361183166504, "learning_rate": 4.358962108046266e-06, "loss": 0.3314, "step": 6419 }, { "epoch": 0.43861447017831523, "grad_norm": 4.168542861938477, "learning_rate": 4.3582112733826604e-06, "loss": 0.3985, "step": 6420 }, { "epoch": 0.43868279018924644, "grad_norm": 4.372204780578613, "learning_rate": 4.3574603966947255e-06, "loss": 0.4019, "step": 6421 }, { "epoch": 0.43875111020017765, "grad_norm": 3.837446451187134, "learning_rate": 4.356709478019229e-06, "loss": 0.3658, "step": 6422 }, { "epoch": 0.4388194302111088, "grad_norm": 3.9010062217712402, "learning_rate": 4.3559585173929425e-06, "loss": 0.3121, "step": 6423 }, { "epoch": 0.43888775022204, "grad_norm": 5.26325798034668, "learning_rate": 4.355207514852638e-06, "loss": 0.4298, "step": 6424 }, { "epoch": 0.43895607023297123, "grad_norm": 4.105519771575928, "learning_rate": 4.35445647043509e-06, "loss": 0.4196, "step": 6425 }, { "epoch": 0.43902439024390244, "grad_norm": 3.588106393814087, "learning_rate": 4.353705384177074e-06, "loss": 0.3722, "step": 6426 }, { "epoch": 0.43909271025483365, "grad_norm": 3.8090500831604004, "learning_rate": 4.352954256115371e-06, "loss": 0.3596, "step": 6427 }, { "epoch": 0.43916103026576486, "grad_norm": 4.1800761222839355, "learning_rate": 4.3522030862867605e-06, "loss": 0.466, "step": 6428 }, { "epoch": 0.439229350276696, "grad_norm": 3.638894557952881, "learning_rate": 4.3514518747280234e-06, "loss": 0.2996, "step": 6429 }, { "epoch": 0.43929767028762723, "grad_norm": 4.257111072540283, "learning_rate": 4.35070062147595e-06, "loss": 0.3726, "step": 6430 }, { "epoch": 0.43936599029855844, "grad_norm": 4.611887454986572, "learning_rate": 4.349949326567321e-06, "loss": 0.2768, "step": 6431 }, { "epoch": 0.43943431030948965, "grad_norm": 3.6871445178985596, "learning_rate": 4.349197990038928e-06, "loss": 0.3489, "step": 6432 }, { "epoch": 0.43950263032042086, "grad_norm": 4.599042892456055, "learning_rate": 4.348446611927564e-06, "loss": 0.2882, "step": 6433 }, { "epoch": 0.4395709503313521, "grad_norm": 3.772366523742676, "learning_rate": 4.347695192270019e-06, "loss": 0.3374, "step": 6434 }, { "epoch": 0.43963927034228323, "grad_norm": 4.154968738555908, "learning_rate": 4.346943731103088e-06, "loss": 0.3782, "step": 6435 }, { "epoch": 0.43970759035321444, "grad_norm": 4.467822551727295, "learning_rate": 4.34619222846357e-06, "loss": 0.4655, "step": 6436 }, { "epoch": 0.43977591036414565, "grad_norm": 4.4472856521606445, "learning_rate": 4.3454406843882635e-06, "loss": 0.2791, "step": 6437 }, { "epoch": 0.43984423037507686, "grad_norm": 4.2163496017456055, "learning_rate": 4.344689098913969e-06, "loss": 0.3235, "step": 6438 }, { "epoch": 0.43991255038600807, "grad_norm": 4.521373271942139, "learning_rate": 4.343937472077489e-06, "loss": 0.3451, "step": 6439 }, { "epoch": 0.4399808703969393, "grad_norm": 5.202215671539307, "learning_rate": 4.3431858039156295e-06, "loss": 0.3185, "step": 6440 }, { "epoch": 0.44004919040787044, "grad_norm": 4.378134250640869, "learning_rate": 4.342434094465199e-06, "loss": 0.4058, "step": 6441 }, { "epoch": 0.44011751041880165, "grad_norm": 3.5392470359802246, "learning_rate": 4.341682343763005e-06, "loss": 0.2996, "step": 6442 }, { "epoch": 0.44018583042973286, "grad_norm": 3.795510768890381, "learning_rate": 4.34093055184586e-06, "loss": 0.3431, "step": 6443 }, { "epoch": 0.44025415044066407, "grad_norm": 4.953372001647949, "learning_rate": 4.340178718750575e-06, "loss": 0.3888, "step": 6444 }, { "epoch": 0.4403224704515953, "grad_norm": 4.101383209228516, "learning_rate": 4.339426844513968e-06, "loss": 0.4194, "step": 6445 }, { "epoch": 0.4403907904625265, "grad_norm": 3.7783870697021484, "learning_rate": 4.338674929172854e-06, "loss": 0.4076, "step": 6446 }, { "epoch": 0.44045911047345765, "grad_norm": 3.0655596256256104, "learning_rate": 4.3379229727640545e-06, "loss": 0.3821, "step": 6447 }, { "epoch": 0.44052743048438886, "grad_norm": 4.650524616241455, "learning_rate": 4.337170975324388e-06, "loss": 0.4437, "step": 6448 }, { "epoch": 0.44059575049532007, "grad_norm": 3.650909423828125, "learning_rate": 4.336418936890682e-06, "loss": 0.3835, "step": 6449 }, { "epoch": 0.4406640705062513, "grad_norm": 2.8844645023345947, "learning_rate": 4.335666857499757e-06, "loss": 0.2293, "step": 6450 }, { "epoch": 0.4407323905171825, "grad_norm": 3.6118550300598145, "learning_rate": 4.334914737188444e-06, "loss": 0.3021, "step": 6451 }, { "epoch": 0.4408007105281137, "grad_norm": 4.425468444824219, "learning_rate": 4.33416257599357e-06, "loss": 0.3381, "step": 6452 }, { "epoch": 0.44086903053904486, "grad_norm": 4.610678672790527, "learning_rate": 4.333410373951969e-06, "loss": 0.3875, "step": 6453 }, { "epoch": 0.44093735054997607, "grad_norm": 4.021933078765869, "learning_rate": 4.332658131100471e-06, "loss": 0.3733, "step": 6454 }, { "epoch": 0.4410056705609073, "grad_norm": 4.244983196258545, "learning_rate": 4.331905847475913e-06, "loss": 0.3358, "step": 6455 }, { "epoch": 0.4410739905718385, "grad_norm": 4.396039009094238, "learning_rate": 4.331153523115132e-06, "loss": 0.3137, "step": 6456 }, { "epoch": 0.4411423105827697, "grad_norm": 3.1405646800994873, "learning_rate": 4.330401158054969e-06, "loss": 0.3043, "step": 6457 }, { "epoch": 0.4412106305937009, "grad_norm": 4.976347923278809, "learning_rate": 4.329648752332265e-06, "loss": 0.4755, "step": 6458 }, { "epoch": 0.44127895060463207, "grad_norm": 3.187197685241699, "learning_rate": 4.32889630598386e-06, "loss": 0.3421, "step": 6459 }, { "epoch": 0.4413472706155633, "grad_norm": 3.0682480335235596, "learning_rate": 4.328143819046602e-06, "loss": 0.2877, "step": 6460 }, { "epoch": 0.4414155906264945, "grad_norm": 4.14896821975708, "learning_rate": 4.3273912915573385e-06, "loss": 0.3661, "step": 6461 }, { "epoch": 0.4414839106374257, "grad_norm": 4.445626735687256, "learning_rate": 4.326638723552917e-06, "loss": 0.3935, "step": 6462 }, { "epoch": 0.4415522306483569, "grad_norm": 4.3069748878479, "learning_rate": 4.325886115070192e-06, "loss": 0.4247, "step": 6463 }, { "epoch": 0.4416205506592881, "grad_norm": 3.6247847080230713, "learning_rate": 4.325133466146013e-06, "loss": 0.3437, "step": 6464 }, { "epoch": 0.4416888706702193, "grad_norm": 2.957047939300537, "learning_rate": 4.324380776817237e-06, "loss": 0.309, "step": 6465 }, { "epoch": 0.4417571906811505, "grad_norm": 3.542323589324951, "learning_rate": 4.323628047120721e-06, "loss": 0.3895, "step": 6466 }, { "epoch": 0.4418255106920817, "grad_norm": 4.458845615386963, "learning_rate": 4.322875277093325e-06, "loss": 0.4953, "step": 6467 }, { "epoch": 0.4418938307030129, "grad_norm": 3.6154568195343018, "learning_rate": 4.322122466771909e-06, "loss": 0.2623, "step": 6468 }, { "epoch": 0.4419621507139441, "grad_norm": 3.607797622680664, "learning_rate": 4.321369616193337e-06, "loss": 0.4152, "step": 6469 }, { "epoch": 0.44203047072487534, "grad_norm": 2.922891139984131, "learning_rate": 4.320616725394473e-06, "loss": 0.2813, "step": 6470 }, { "epoch": 0.4420987907358065, "grad_norm": 4.363068103790283, "learning_rate": 4.319863794412185e-06, "loss": 0.3652, "step": 6471 }, { "epoch": 0.4421671107467377, "grad_norm": 3.316740036010742, "learning_rate": 4.319110823283342e-06, "loss": 0.2743, "step": 6472 }, { "epoch": 0.4422354307576689, "grad_norm": 4.323827266693115, "learning_rate": 4.318357812044814e-06, "loss": 0.3137, "step": 6473 }, { "epoch": 0.4423037507686001, "grad_norm": 3.827674150466919, "learning_rate": 4.317604760733475e-06, "loss": 0.3867, "step": 6474 }, { "epoch": 0.44237207077953133, "grad_norm": 3.859956741333008, "learning_rate": 4.3168516693862e-06, "loss": 0.3526, "step": 6475 }, { "epoch": 0.44244039079046255, "grad_norm": 4.286500453948975, "learning_rate": 4.316098538039864e-06, "loss": 0.4288, "step": 6476 }, { "epoch": 0.4425087108013937, "grad_norm": 4.201509475708008, "learning_rate": 4.315345366731349e-06, "loss": 0.3459, "step": 6477 }, { "epoch": 0.4425770308123249, "grad_norm": 3.9974451065063477, "learning_rate": 4.314592155497534e-06, "loss": 0.304, "step": 6478 }, { "epoch": 0.4426453508232561, "grad_norm": 4.743733882904053, "learning_rate": 4.3138389043753014e-06, "loss": 0.4133, "step": 6479 }, { "epoch": 0.44271367083418733, "grad_norm": 3.7320306301116943, "learning_rate": 4.3130856134015365e-06, "loss": 0.3865, "step": 6480 }, { "epoch": 0.44278199084511854, "grad_norm": 5.420217514038086, "learning_rate": 4.312332282613127e-06, "loss": 0.3539, "step": 6481 }, { "epoch": 0.44285031085604976, "grad_norm": 4.3353705406188965, "learning_rate": 4.311578912046958e-06, "loss": 0.4053, "step": 6482 }, { "epoch": 0.4429186308669809, "grad_norm": 3.880706548690796, "learning_rate": 4.310825501739924e-06, "loss": 0.4225, "step": 6483 }, { "epoch": 0.4429869508779121, "grad_norm": 4.053952693939209, "learning_rate": 4.310072051728916e-06, "loss": 0.284, "step": 6484 }, { "epoch": 0.44305527088884333, "grad_norm": 3.5429487228393555, "learning_rate": 4.309318562050828e-06, "loss": 0.244, "step": 6485 }, { "epoch": 0.44312359089977454, "grad_norm": 4.876114368438721, "learning_rate": 4.3085650327425564e-06, "loss": 0.5174, "step": 6486 }, { "epoch": 0.44319191091070576, "grad_norm": 6.745744705200195, "learning_rate": 4.3078114638410015e-06, "loss": 0.4036, "step": 6487 }, { "epoch": 0.44326023092163697, "grad_norm": 3.4499566555023193, "learning_rate": 4.30705785538306e-06, "loss": 0.3595, "step": 6488 }, { "epoch": 0.4433285509325681, "grad_norm": 6.014729976654053, "learning_rate": 4.306304207405637e-06, "loss": 0.3372, "step": 6489 }, { "epoch": 0.44339687094349933, "grad_norm": 4.203282356262207, "learning_rate": 4.305550519945636e-06, "loss": 0.4316, "step": 6490 }, { "epoch": 0.44346519095443054, "grad_norm": 3.417508125305176, "learning_rate": 4.304796793039962e-06, "loss": 0.3131, "step": 6491 }, { "epoch": 0.44353351096536175, "grad_norm": 3.7888565063476562, "learning_rate": 4.304043026725524e-06, "loss": 0.3497, "step": 6492 }, { "epoch": 0.44360183097629297, "grad_norm": 6.564039707183838, "learning_rate": 4.3032892210392326e-06, "loss": 0.3367, "step": 6493 }, { "epoch": 0.4436701509872242, "grad_norm": 3.6185245513916016, "learning_rate": 4.302535376017998e-06, "loss": 0.2679, "step": 6494 }, { "epoch": 0.44373847099815533, "grad_norm": 3.655118942260742, "learning_rate": 4.301781491698735e-06, "loss": 0.2905, "step": 6495 }, { "epoch": 0.44380679100908654, "grad_norm": 3.542513608932495, "learning_rate": 4.3010275681183585e-06, "loss": 0.3482, "step": 6496 }, { "epoch": 0.44387511102001775, "grad_norm": 4.741878509521484, "learning_rate": 4.3002736053137884e-06, "loss": 0.4442, "step": 6497 }, { "epoch": 0.44394343103094897, "grad_norm": 4.723883628845215, "learning_rate": 4.299519603321941e-06, "loss": 0.3667, "step": 6498 }, { "epoch": 0.4440117510418802, "grad_norm": 3.440650701522827, "learning_rate": 4.298765562179741e-06, "loss": 0.3573, "step": 6499 }, { "epoch": 0.4440800710528114, "grad_norm": 4.088853359222412, "learning_rate": 4.2980114819241085e-06, "loss": 0.3533, "step": 6500 }, { "epoch": 0.44414839106374254, "grad_norm": 4.146888732910156, "learning_rate": 4.297257362591971e-06, "loss": 0.3374, "step": 6501 }, { "epoch": 0.44421671107467375, "grad_norm": 5.260348320007324, "learning_rate": 4.296503204220256e-06, "loss": 0.4883, "step": 6502 }, { "epoch": 0.44428503108560496, "grad_norm": 3.616572618484497, "learning_rate": 4.2957490068458915e-06, "loss": 0.3414, "step": 6503 }, { "epoch": 0.4443533510965362, "grad_norm": 4.240824222564697, "learning_rate": 4.294994770505809e-06, "loss": 0.4027, "step": 6504 }, { "epoch": 0.4444216711074674, "grad_norm": 3.7916512489318848, "learning_rate": 4.294240495236941e-06, "loss": 0.2855, "step": 6505 }, { "epoch": 0.4444899911183986, "grad_norm": 3.3834002017974854, "learning_rate": 4.293486181076222e-06, "loss": 0.3484, "step": 6506 }, { "epoch": 0.4445583111293298, "grad_norm": 4.171750068664551, "learning_rate": 4.29273182806059e-06, "loss": 0.3477, "step": 6507 }, { "epoch": 0.44462663114026096, "grad_norm": 3.1737258434295654, "learning_rate": 4.291977436226984e-06, "loss": 0.2728, "step": 6508 }, { "epoch": 0.4446949511511922, "grad_norm": 3.670051097869873, "learning_rate": 4.291223005612343e-06, "loss": 0.3683, "step": 6509 }, { "epoch": 0.4447632711621234, "grad_norm": 3.986412525177002, "learning_rate": 4.290468536253611e-06, "loss": 0.3016, "step": 6510 }, { "epoch": 0.4448315911730546, "grad_norm": 3.6173253059387207, "learning_rate": 4.289714028187731e-06, "loss": 0.3587, "step": 6511 }, { "epoch": 0.4448999111839858, "grad_norm": 3.184568405151367, "learning_rate": 4.288959481451649e-06, "loss": 0.3516, "step": 6512 }, { "epoch": 0.444968231194917, "grad_norm": 3.6766316890716553, "learning_rate": 4.2882048960823145e-06, "loss": 0.3569, "step": 6513 }, { "epoch": 0.4450365512058482, "grad_norm": 3.0080223083496094, "learning_rate": 4.287450272116677e-06, "loss": 0.2804, "step": 6514 }, { "epoch": 0.4451048712167794, "grad_norm": 3.4124910831451416, "learning_rate": 4.286695609591688e-06, "loss": 0.4123, "step": 6515 }, { "epoch": 0.4451731912277106, "grad_norm": 3.2380216121673584, "learning_rate": 4.285940908544303e-06, "loss": 0.2682, "step": 6516 }, { "epoch": 0.4452415112386418, "grad_norm": 3.190359592437744, "learning_rate": 4.285186169011476e-06, "loss": 0.3082, "step": 6517 }, { "epoch": 0.445309831249573, "grad_norm": 3.7680227756500244, "learning_rate": 4.284431391030165e-06, "loss": 0.4016, "step": 6518 }, { "epoch": 0.44537815126050423, "grad_norm": 3.8204457759857178, "learning_rate": 4.283676574637329e-06, "loss": 0.446, "step": 6519 }, { "epoch": 0.4454464712714354, "grad_norm": 3.563973903656006, "learning_rate": 4.282921719869931e-06, "loss": 0.3053, "step": 6520 }, { "epoch": 0.4455147912823666, "grad_norm": 5.661425590515137, "learning_rate": 4.282166826764932e-06, "loss": 0.402, "step": 6521 }, { "epoch": 0.4455831112932978, "grad_norm": 3.1017746925354004, "learning_rate": 4.2814118953592985e-06, "loss": 0.2736, "step": 6522 }, { "epoch": 0.445651431304229, "grad_norm": 4.070095539093018, "learning_rate": 4.280656925689997e-06, "loss": 0.3915, "step": 6523 }, { "epoch": 0.44571975131516023, "grad_norm": 3.4527578353881836, "learning_rate": 4.279901917793997e-06, "loss": 0.4188, "step": 6524 }, { "epoch": 0.44578807132609144, "grad_norm": 4.633389472961426, "learning_rate": 4.279146871708268e-06, "loss": 0.4225, "step": 6525 }, { "epoch": 0.4458563913370226, "grad_norm": 3.385448694229126, "learning_rate": 4.278391787469785e-06, "loss": 0.3957, "step": 6526 }, { "epoch": 0.4459247113479538, "grad_norm": 3.2419090270996094, "learning_rate": 4.27763666511552e-06, "loss": 0.4188, "step": 6527 }, { "epoch": 0.445993031358885, "grad_norm": 3.9341890811920166, "learning_rate": 4.2768815046824505e-06, "loss": 0.337, "step": 6528 }, { "epoch": 0.44606135136981623, "grad_norm": 3.9660444259643555, "learning_rate": 4.276126306207554e-06, "loss": 0.3688, "step": 6529 }, { "epoch": 0.44612967138074744, "grad_norm": 4.323117733001709, "learning_rate": 4.275371069727813e-06, "loss": 0.4106, "step": 6530 }, { "epoch": 0.44619799139167865, "grad_norm": 3.5179357528686523, "learning_rate": 4.274615795280205e-06, "loss": 0.3002, "step": 6531 }, { "epoch": 0.4462663114026098, "grad_norm": 3.163602590560913, "learning_rate": 4.273860482901716e-06, "loss": 0.3193, "step": 6532 }, { "epoch": 0.446334631413541, "grad_norm": 6.584564208984375, "learning_rate": 4.273105132629333e-06, "loss": 0.3445, "step": 6533 }, { "epoch": 0.4464029514244722, "grad_norm": 3.7852723598480225, "learning_rate": 4.272349744500043e-06, "loss": 0.4069, "step": 6534 }, { "epoch": 0.44647127143540344, "grad_norm": 4.091159820556641, "learning_rate": 4.271594318550833e-06, "loss": 0.3268, "step": 6535 }, { "epoch": 0.44653959144633465, "grad_norm": 4.215181827545166, "learning_rate": 4.270838854818697e-06, "loss": 0.339, "step": 6536 }, { "epoch": 0.44660791145726586, "grad_norm": 3.761608839035034, "learning_rate": 4.270083353340628e-06, "loss": 0.3888, "step": 6537 }, { "epoch": 0.446676231468197, "grad_norm": 3.0094807147979736, "learning_rate": 4.269327814153617e-06, "loss": 0.2873, "step": 6538 }, { "epoch": 0.4467445514791282, "grad_norm": 4.043521404266357, "learning_rate": 4.268572237294665e-06, "loss": 0.445, "step": 6539 }, { "epoch": 0.44681287149005944, "grad_norm": 3.407036542892456, "learning_rate": 4.267816622800771e-06, "loss": 0.3841, "step": 6540 }, { "epoch": 0.44688119150099065, "grad_norm": 3.4490623474121094, "learning_rate": 4.267060970708931e-06, "loss": 0.2971, "step": 6541 }, { "epoch": 0.44694951151192186, "grad_norm": 4.541342735290527, "learning_rate": 4.266305281056152e-06, "loss": 0.3544, "step": 6542 }, { "epoch": 0.44701783152285307, "grad_norm": 3.230290174484253, "learning_rate": 4.265549553879435e-06, "loss": 0.2585, "step": 6543 }, { "epoch": 0.4470861515337842, "grad_norm": 3.3250176906585693, "learning_rate": 4.264793789215786e-06, "loss": 0.3316, "step": 6544 }, { "epoch": 0.44715447154471544, "grad_norm": 3.6637749671936035, "learning_rate": 4.264037987102215e-06, "loss": 0.2813, "step": 6545 }, { "epoch": 0.44722279155564665, "grad_norm": 4.08583402633667, "learning_rate": 4.263282147575732e-06, "loss": 0.3645, "step": 6546 }, { "epoch": 0.44729111156657786, "grad_norm": 3.387946605682373, "learning_rate": 4.262526270673345e-06, "loss": 0.2519, "step": 6547 }, { "epoch": 0.44735943157750907, "grad_norm": 5.218303203582764, "learning_rate": 4.261770356432069e-06, "loss": 0.4067, "step": 6548 }, { "epoch": 0.4474277515884403, "grad_norm": 4.5273871421813965, "learning_rate": 4.261014404888919e-06, "loss": 0.4194, "step": 6549 }, { "epoch": 0.44749607159937144, "grad_norm": 4.256922245025635, "learning_rate": 4.260258416080913e-06, "loss": 0.4768, "step": 6550 }, { "epoch": 0.44756439161030265, "grad_norm": 4.188406944274902, "learning_rate": 4.259502390045069e-06, "loss": 0.3042, "step": 6551 }, { "epoch": 0.44763271162123386, "grad_norm": 4.774356365203857, "learning_rate": 4.258746326818408e-06, "loss": 0.3877, "step": 6552 }, { "epoch": 0.44770103163216507, "grad_norm": 3.326552391052246, "learning_rate": 4.257990226437951e-06, "loss": 0.246, "step": 6553 }, { "epoch": 0.4477693516430963, "grad_norm": 4.198992729187012, "learning_rate": 4.257234088940724e-06, "loss": 0.3265, "step": 6554 }, { "epoch": 0.4478376716540275, "grad_norm": 3.743421792984009, "learning_rate": 4.2564779143637515e-06, "loss": 0.2641, "step": 6555 }, { "epoch": 0.44790599166495865, "grad_norm": 3.385519027709961, "learning_rate": 4.255721702744064e-06, "loss": 0.2826, "step": 6556 }, { "epoch": 0.44797431167588986, "grad_norm": 3.496432304382324, "learning_rate": 4.254965454118687e-06, "loss": 0.303, "step": 6557 }, { "epoch": 0.44804263168682107, "grad_norm": 3.082612991333008, "learning_rate": 4.254209168524656e-06, "loss": 0.2946, "step": 6558 }, { "epoch": 0.4481109516977523, "grad_norm": 4.080922603607178, "learning_rate": 4.253452845999001e-06, "loss": 0.3391, "step": 6559 }, { "epoch": 0.4481792717086835, "grad_norm": 4.1824798583984375, "learning_rate": 4.25269648657876e-06, "loss": 0.3799, "step": 6560 }, { "epoch": 0.4482475917196147, "grad_norm": 4.117686748504639, "learning_rate": 4.251940090300968e-06, "loss": 0.3393, "step": 6561 }, { "epoch": 0.44831591173054586, "grad_norm": 3.3126211166381836, "learning_rate": 4.251183657202665e-06, "loss": 0.3121, "step": 6562 }, { "epoch": 0.44838423174147707, "grad_norm": 3.429716110229492, "learning_rate": 4.2504271873208904e-06, "loss": 0.354, "step": 6563 }, { "epoch": 0.4484525517524083, "grad_norm": 5.022970676422119, "learning_rate": 4.249670680692686e-06, "loss": 0.331, "step": 6564 }, { "epoch": 0.4485208717633395, "grad_norm": 4.617959976196289, "learning_rate": 4.248914137355098e-06, "loss": 0.3404, "step": 6565 }, { "epoch": 0.4485891917742707, "grad_norm": 4.067296981811523, "learning_rate": 4.248157557345172e-06, "loss": 0.375, "step": 6566 }, { "epoch": 0.4486575117852019, "grad_norm": 4.714181900024414, "learning_rate": 4.247400940699954e-06, "loss": 0.4313, "step": 6567 }, { "epoch": 0.44872583179613307, "grad_norm": 3.735755681991577, "learning_rate": 4.246644287456494e-06, "loss": 0.4133, "step": 6568 }, { "epoch": 0.4487941518070643, "grad_norm": 4.930593490600586, "learning_rate": 4.2458875976518445e-06, "loss": 0.3542, "step": 6569 }, { "epoch": 0.4488624718179955, "grad_norm": 3.30332088470459, "learning_rate": 4.2451308713230585e-06, "loss": 0.3494, "step": 6570 }, { "epoch": 0.4489307918289267, "grad_norm": 3.4010913372039795, "learning_rate": 4.244374108507188e-06, "loss": 0.2555, "step": 6571 }, { "epoch": 0.4489991118398579, "grad_norm": 4.009014129638672, "learning_rate": 4.243617309241295e-06, "loss": 0.2595, "step": 6572 }, { "epoch": 0.4490674318507891, "grad_norm": 3.0309600830078125, "learning_rate": 4.242860473562432e-06, "loss": 0.2665, "step": 6573 }, { "epoch": 0.4491357518617203, "grad_norm": 3.484574317932129, "learning_rate": 4.2421036015076635e-06, "loss": 0.317, "step": 6574 }, { "epoch": 0.4492040718726515, "grad_norm": 4.297475337982178, "learning_rate": 4.24134669311405e-06, "loss": 0.4044, "step": 6575 }, { "epoch": 0.4492723918835827, "grad_norm": 4.020126819610596, "learning_rate": 4.240589748418655e-06, "loss": 0.3468, "step": 6576 }, { "epoch": 0.4493407118945139, "grad_norm": 3.3940978050231934, "learning_rate": 4.239832767458545e-06, "loss": 0.3409, "step": 6577 }, { "epoch": 0.4494090319054451, "grad_norm": 3.6521499156951904, "learning_rate": 4.2390757502707855e-06, "loss": 0.3714, "step": 6578 }, { "epoch": 0.44947735191637633, "grad_norm": 4.291301250457764, "learning_rate": 4.238318696892449e-06, "loss": 0.3829, "step": 6579 }, { "epoch": 0.4495456719273075, "grad_norm": 3.7886645793914795, "learning_rate": 4.237561607360603e-06, "loss": 0.3623, "step": 6580 }, { "epoch": 0.4496139919382387, "grad_norm": 3.439788579940796, "learning_rate": 4.236804481712322e-06, "loss": 0.3017, "step": 6581 }, { "epoch": 0.4496823119491699, "grad_norm": 3.7699944972991943, "learning_rate": 4.236047319984679e-06, "loss": 0.3342, "step": 6582 }, { "epoch": 0.4497506319601011, "grad_norm": 2.736360788345337, "learning_rate": 4.235290122214753e-06, "loss": 0.3614, "step": 6583 }, { "epoch": 0.44981895197103233, "grad_norm": 4.651830196380615, "learning_rate": 4.234532888439618e-06, "loss": 0.3337, "step": 6584 }, { "epoch": 0.44988727198196354, "grad_norm": 3.388756275177002, "learning_rate": 4.233775618696358e-06, "loss": 0.2895, "step": 6585 }, { "epoch": 0.4499555919928947, "grad_norm": 6.604517459869385, "learning_rate": 4.233018313022051e-06, "loss": 0.4827, "step": 6586 }, { "epoch": 0.4500239120038259, "grad_norm": 3.8590140342712402, "learning_rate": 4.232260971453781e-06, "loss": 0.4129, "step": 6587 }, { "epoch": 0.4500922320147571, "grad_norm": 3.6808621883392334, "learning_rate": 4.231503594028635e-06, "loss": 0.4095, "step": 6588 }, { "epoch": 0.45016055202568833, "grad_norm": 3.572364568710327, "learning_rate": 4.230746180783698e-06, "loss": 0.3931, "step": 6589 }, { "epoch": 0.45022887203661954, "grad_norm": 3.5604395866394043, "learning_rate": 4.229988731756058e-06, "loss": 0.3046, "step": 6590 }, { "epoch": 0.45029719204755075, "grad_norm": 4.6786651611328125, "learning_rate": 4.229231246982807e-06, "loss": 0.3364, "step": 6591 }, { "epoch": 0.4503655120584819, "grad_norm": 4.159191608428955, "learning_rate": 4.228473726501036e-06, "loss": 0.3351, "step": 6592 }, { "epoch": 0.4504338320694131, "grad_norm": 4.372110366821289, "learning_rate": 4.22771617034784e-06, "loss": 0.4204, "step": 6593 }, { "epoch": 0.45050215208034433, "grad_norm": 3.025022506713867, "learning_rate": 4.226958578560311e-06, "loss": 0.2975, "step": 6594 }, { "epoch": 0.45057047209127554, "grad_norm": 4.277886390686035, "learning_rate": 4.226200951175551e-06, "loss": 0.4078, "step": 6595 }, { "epoch": 0.45063879210220675, "grad_norm": 3.8955657482147217, "learning_rate": 4.225443288230655e-06, "loss": 0.327, "step": 6596 }, { "epoch": 0.45070711211313796, "grad_norm": 4.611642360687256, "learning_rate": 4.224685589762727e-06, "loss": 0.4465, "step": 6597 }, { "epoch": 0.4507754321240691, "grad_norm": 4.590481281280518, "learning_rate": 4.223927855808867e-06, "loss": 0.3624, "step": 6598 }, { "epoch": 0.45084375213500033, "grad_norm": 3.208557367324829, "learning_rate": 4.22317008640618e-06, "loss": 0.3148, "step": 6599 }, { "epoch": 0.45091207214593154, "grad_norm": 3.495772361755371, "learning_rate": 4.222412281591773e-06, "loss": 0.3049, "step": 6600 }, { "epoch": 0.45098039215686275, "grad_norm": 3.965817451477051, "learning_rate": 4.2216544414027514e-06, "loss": 0.2966, "step": 6601 }, { "epoch": 0.45104871216779396, "grad_norm": 3.7244861125946045, "learning_rate": 4.2208965658762275e-06, "loss": 0.333, "step": 6602 }, { "epoch": 0.4511170321787252, "grad_norm": 3.6188275814056396, "learning_rate": 4.220138655049311e-06, "loss": 0.2691, "step": 6603 }, { "epoch": 0.45118535218965633, "grad_norm": 4.023850440979004, "learning_rate": 4.219380708959113e-06, "loss": 0.4283, "step": 6604 }, { "epoch": 0.45125367220058754, "grad_norm": 3.9573400020599365, "learning_rate": 4.218622727642751e-06, "loss": 0.2964, "step": 6605 }, { "epoch": 0.45132199221151875, "grad_norm": 4.215176582336426, "learning_rate": 4.217864711137341e-06, "loss": 0.4443, "step": 6606 }, { "epoch": 0.45139031222244996, "grad_norm": 3.0222833156585693, "learning_rate": 4.2171066594799995e-06, "loss": 0.2964, "step": 6607 }, { "epoch": 0.4514586322333812, "grad_norm": 3.564523220062256, "learning_rate": 4.216348572707846e-06, "loss": 0.3813, "step": 6608 }, { "epoch": 0.4515269522443124, "grad_norm": 3.9773874282836914, "learning_rate": 4.215590450858005e-06, "loss": 0.3234, "step": 6609 }, { "epoch": 0.45159527225524354, "grad_norm": 3.6736538410186768, "learning_rate": 4.214832293967597e-06, "loss": 0.4709, "step": 6610 }, { "epoch": 0.45166359226617475, "grad_norm": 3.6263294219970703, "learning_rate": 4.214074102073747e-06, "loss": 0.3903, "step": 6611 }, { "epoch": 0.45173191227710596, "grad_norm": 2.910613775253296, "learning_rate": 4.213315875213584e-06, "loss": 0.2437, "step": 6612 }, { "epoch": 0.4518002322880372, "grad_norm": 3.6530544757843018, "learning_rate": 4.2125576134242335e-06, "loss": 0.2762, "step": 6613 }, { "epoch": 0.4518685522989684, "grad_norm": 3.96394681930542, "learning_rate": 4.211799316742826e-06, "loss": 0.3517, "step": 6614 }, { "epoch": 0.4519368723098996, "grad_norm": 3.327470064163208, "learning_rate": 4.2110409852064955e-06, "loss": 0.3697, "step": 6615 }, { "epoch": 0.45200519232083075, "grad_norm": 2.980717897415161, "learning_rate": 4.210282618852373e-06, "loss": 0.3204, "step": 6616 }, { "epoch": 0.45207351233176196, "grad_norm": 3.9006669521331787, "learning_rate": 4.209524217717596e-06, "loss": 0.4015, "step": 6617 }, { "epoch": 0.4521418323426932, "grad_norm": 4.953658103942871, "learning_rate": 4.208765781839299e-06, "loss": 0.3243, "step": 6618 }, { "epoch": 0.4522101523536244, "grad_norm": 3.794544219970703, "learning_rate": 4.2080073112546225e-06, "loss": 0.2883, "step": 6619 }, { "epoch": 0.4522784723645556, "grad_norm": 3.817683696746826, "learning_rate": 4.207248806000705e-06, "loss": 0.3899, "step": 6620 }, { "epoch": 0.4523467923754868, "grad_norm": 4.820249557495117, "learning_rate": 4.20649026611469e-06, "loss": 0.3672, "step": 6621 }, { "epoch": 0.45241511238641796, "grad_norm": 3.979811906814575, "learning_rate": 4.2057316916337205e-06, "loss": 0.3041, "step": 6622 }, { "epoch": 0.4524834323973492, "grad_norm": 3.821488380432129, "learning_rate": 4.204973082594943e-06, "loss": 0.3419, "step": 6623 }, { "epoch": 0.4525517524082804, "grad_norm": 3.9754903316497803, "learning_rate": 4.204214439035502e-06, "loss": 0.3645, "step": 6624 }, { "epoch": 0.4526200724192116, "grad_norm": 3.183650493621826, "learning_rate": 4.20345576099255e-06, "loss": 0.3516, "step": 6625 }, { "epoch": 0.4526883924301428, "grad_norm": 3.1332638263702393, "learning_rate": 4.202697048503233e-06, "loss": 0.2646, "step": 6626 }, { "epoch": 0.452756712441074, "grad_norm": 4.169201374053955, "learning_rate": 4.201938301604707e-06, "loss": 0.2837, "step": 6627 }, { "epoch": 0.45282503245200517, "grad_norm": 3.450417995452881, "learning_rate": 4.201179520334123e-06, "loss": 0.3901, "step": 6628 }, { "epoch": 0.4528933524629364, "grad_norm": 3.50329327583313, "learning_rate": 4.20042070472864e-06, "loss": 0.3307, "step": 6629 }, { "epoch": 0.4529616724738676, "grad_norm": 4.007997512817383, "learning_rate": 4.1996618548254104e-06, "loss": 0.3174, "step": 6630 }, { "epoch": 0.4530299924847988, "grad_norm": 2.9925758838653564, "learning_rate": 4.198902970661598e-06, "loss": 0.3432, "step": 6631 }, { "epoch": 0.45309831249573, "grad_norm": 4.212700843811035, "learning_rate": 4.19814405227436e-06, "loss": 0.2847, "step": 6632 }, { "epoch": 0.4531666325066612, "grad_norm": 3.9845588207244873, "learning_rate": 4.197385099700859e-06, "loss": 0.3237, "step": 6633 }, { "epoch": 0.4532349525175924, "grad_norm": 3.4063665866851807, "learning_rate": 4.196626112978261e-06, "loss": 0.3624, "step": 6634 }, { "epoch": 0.4533032725285236, "grad_norm": 3.475684881210327, "learning_rate": 4.195867092143729e-06, "loss": 0.3703, "step": 6635 }, { "epoch": 0.4533715925394548, "grad_norm": 3.5919487476348877, "learning_rate": 4.195108037234432e-06, "loss": 0.3228, "step": 6636 }, { "epoch": 0.453439912550386, "grad_norm": 3.8910748958587646, "learning_rate": 4.194348948287537e-06, "loss": 0.2506, "step": 6637 }, { "epoch": 0.4535082325613172, "grad_norm": 3.796001672744751, "learning_rate": 4.1935898253402165e-06, "loss": 0.2773, "step": 6638 }, { "epoch": 0.45357655257224844, "grad_norm": 4.746208190917969, "learning_rate": 4.192830668429643e-06, "loss": 0.2461, "step": 6639 }, { "epoch": 0.4536448725831796, "grad_norm": 3.9850926399230957, "learning_rate": 4.192071477592988e-06, "loss": 0.4241, "step": 6640 }, { "epoch": 0.4537131925941108, "grad_norm": 4.763545513153076, "learning_rate": 4.191312252867428e-06, "loss": 0.3946, "step": 6641 }, { "epoch": 0.453781512605042, "grad_norm": 4.344710826873779, "learning_rate": 4.190552994290143e-06, "loss": 0.3589, "step": 6642 }, { "epoch": 0.4538498326159732, "grad_norm": 3.3099210262298584, "learning_rate": 4.189793701898308e-06, "loss": 0.3163, "step": 6643 }, { "epoch": 0.45391815262690444, "grad_norm": 2.9555037021636963, "learning_rate": 4.1890343757291055e-06, "loss": 0.198, "step": 6644 }, { "epoch": 0.45398647263783565, "grad_norm": 3.250422477722168, "learning_rate": 4.188275015819717e-06, "loss": 0.2505, "step": 6645 }, { "epoch": 0.4540547926487668, "grad_norm": 6.806921482086182, "learning_rate": 4.187515622207328e-06, "loss": 0.4552, "step": 6646 }, { "epoch": 0.454123112659698, "grad_norm": 3.961360216140747, "learning_rate": 4.18675619492912e-06, "loss": 0.4119, "step": 6647 }, { "epoch": 0.4541914326706292, "grad_norm": 4.363368034362793, "learning_rate": 4.185996734022285e-06, "loss": 0.4018, "step": 6648 }, { "epoch": 0.45425975268156044, "grad_norm": 4.450761795043945, "learning_rate": 4.18523723952401e-06, "loss": 0.3513, "step": 6649 }, { "epoch": 0.45432807269249165, "grad_norm": 3.0851821899414062, "learning_rate": 4.184477711471483e-06, "loss": 0.2467, "step": 6650 }, { "epoch": 0.45439639270342286, "grad_norm": 3.3991358280181885, "learning_rate": 4.1837181499018994e-06, "loss": 0.2829, "step": 6651 }, { "epoch": 0.454464712714354, "grad_norm": 3.337646961212158, "learning_rate": 4.182958554852452e-06, "loss": 0.3428, "step": 6652 }, { "epoch": 0.4545330327252852, "grad_norm": 4.731215476989746, "learning_rate": 4.1821989263603355e-06, "loss": 0.4237, "step": 6653 }, { "epoch": 0.45460135273621644, "grad_norm": 4.409718990325928, "learning_rate": 4.1814392644627456e-06, "loss": 0.4055, "step": 6654 }, { "epoch": 0.45466967274714765, "grad_norm": 3.7012486457824707, "learning_rate": 4.180679569196884e-06, "loss": 0.3717, "step": 6655 }, { "epoch": 0.45473799275807886, "grad_norm": 3.427241563796997, "learning_rate": 4.17991984059995e-06, "loss": 0.382, "step": 6656 }, { "epoch": 0.45480631276901007, "grad_norm": 3.4626107215881348, "learning_rate": 4.179160078709143e-06, "loss": 0.3065, "step": 6657 }, { "epoch": 0.4548746327799412, "grad_norm": 4.031935214996338, "learning_rate": 4.17840028356167e-06, "loss": 0.2736, "step": 6658 }, { "epoch": 0.45494295279087243, "grad_norm": 3.5121631622314453, "learning_rate": 4.177640455194734e-06, "loss": 0.3452, "step": 6659 }, { "epoch": 0.45501127280180365, "grad_norm": 3.10569167137146, "learning_rate": 4.176880593645542e-06, "loss": 0.3508, "step": 6660 }, { "epoch": 0.45507959281273486, "grad_norm": 3.7000200748443604, "learning_rate": 4.176120698951303e-06, "loss": 0.4295, "step": 6661 }, { "epoch": 0.45514791282366607, "grad_norm": 3.493340253829956, "learning_rate": 4.1753607711492285e-06, "loss": 0.3465, "step": 6662 }, { "epoch": 0.4552162328345973, "grad_norm": 3.530820608139038, "learning_rate": 4.174600810276526e-06, "loss": 0.318, "step": 6663 }, { "epoch": 0.45528455284552843, "grad_norm": 3.342339515686035, "learning_rate": 4.173840816370412e-06, "loss": 0.2794, "step": 6664 }, { "epoch": 0.45535287285645965, "grad_norm": 4.588933944702148, "learning_rate": 4.173080789468101e-06, "loss": 0.3889, "step": 6665 }, { "epoch": 0.45542119286739086, "grad_norm": 4.021820068359375, "learning_rate": 4.172320729606809e-06, "loss": 0.3322, "step": 6666 }, { "epoch": 0.45548951287832207, "grad_norm": 3.6828112602233887, "learning_rate": 4.171560636823754e-06, "loss": 0.3965, "step": 6667 }, { "epoch": 0.4555578328892533, "grad_norm": 4.230199337005615, "learning_rate": 4.170800511156157e-06, "loss": 0.3163, "step": 6668 }, { "epoch": 0.4556261529001845, "grad_norm": 3.6266114711761475, "learning_rate": 4.1700403526412375e-06, "loss": 0.3556, "step": 6669 }, { "epoch": 0.45569447291111564, "grad_norm": 3.4343607425689697, "learning_rate": 4.169280161316219e-06, "loss": 0.3456, "step": 6670 }, { "epoch": 0.45576279292204686, "grad_norm": 4.686539173126221, "learning_rate": 4.168519937218327e-06, "loss": 0.3922, "step": 6671 }, { "epoch": 0.45583111293297807, "grad_norm": 4.18088436126709, "learning_rate": 4.167759680384787e-06, "loss": 0.3989, "step": 6672 }, { "epoch": 0.4558994329439093, "grad_norm": 5.8440375328063965, "learning_rate": 4.166999390852825e-06, "loss": 0.4245, "step": 6673 }, { "epoch": 0.4559677529548405, "grad_norm": 3.9850237369537354, "learning_rate": 4.166239068659674e-06, "loss": 0.3262, "step": 6674 }, { "epoch": 0.4560360729657717, "grad_norm": 4.39368200302124, "learning_rate": 4.165478713842562e-06, "loss": 0.3631, "step": 6675 }, { "epoch": 0.45610439297670285, "grad_norm": 3.252545118331909, "learning_rate": 4.164718326438723e-06, "loss": 0.2865, "step": 6676 }, { "epoch": 0.45617271298763407, "grad_norm": 3.1339964866638184, "learning_rate": 4.16395790648539e-06, "loss": 0.3334, "step": 6677 }, { "epoch": 0.4562410329985653, "grad_norm": 4.8384013175964355, "learning_rate": 4.163197454019802e-06, "loss": 0.2827, "step": 6678 }, { "epoch": 0.4563093530094965, "grad_norm": 4.713578224182129, "learning_rate": 4.1624369690791916e-06, "loss": 0.5846, "step": 6679 }, { "epoch": 0.4563776730204277, "grad_norm": 3.974700689315796, "learning_rate": 4.161676451700801e-06, "loss": 0.3436, "step": 6680 }, { "epoch": 0.4564459930313589, "grad_norm": 4.344177722930908, "learning_rate": 4.1609159019218685e-06, "loss": 0.4137, "step": 6681 }, { "epoch": 0.45651431304229007, "grad_norm": 4.09475564956665, "learning_rate": 4.1601553197796386e-06, "loss": 0.3426, "step": 6682 }, { "epoch": 0.4565826330532213, "grad_norm": 4.1042094230651855, "learning_rate": 4.1593947053113525e-06, "loss": 0.2673, "step": 6683 }, { "epoch": 0.4566509530641525, "grad_norm": 3.9569013118743896, "learning_rate": 4.1586340585542585e-06, "loss": 0.399, "step": 6684 }, { "epoch": 0.4567192730750837, "grad_norm": 4.36024808883667, "learning_rate": 4.1578733795456e-06, "loss": 0.4409, "step": 6685 }, { "epoch": 0.4567875930860149, "grad_norm": 4.212599277496338, "learning_rate": 4.157112668322628e-06, "loss": 0.3304, "step": 6686 }, { "epoch": 0.4568559130969461, "grad_norm": 4.463316917419434, "learning_rate": 4.156351924922591e-06, "loss": 0.3284, "step": 6687 }, { "epoch": 0.4569242331078773, "grad_norm": 5.126779556274414, "learning_rate": 4.155591149382743e-06, "loss": 0.4063, "step": 6688 }, { "epoch": 0.4569925531188085, "grad_norm": 4.080478191375732, "learning_rate": 4.154830341740334e-06, "loss": 0.3718, "step": 6689 }, { "epoch": 0.4570608731297397, "grad_norm": 3.766625165939331, "learning_rate": 4.15406950203262e-06, "loss": 0.3427, "step": 6690 }, { "epoch": 0.4571291931406709, "grad_norm": 3.8273098468780518, "learning_rate": 4.153308630296858e-06, "loss": 0.359, "step": 6691 }, { "epoch": 0.4571975131516021, "grad_norm": 3.335603713989258, "learning_rate": 4.152547726570306e-06, "loss": 0.2761, "step": 6692 }, { "epoch": 0.45726583316253333, "grad_norm": 3.9060637950897217, "learning_rate": 4.151786790890221e-06, "loss": 0.3179, "step": 6693 }, { "epoch": 0.4573341531734645, "grad_norm": 4.167548656463623, "learning_rate": 4.151025823293867e-06, "loss": 0.4467, "step": 6694 }, { "epoch": 0.4574024731843957, "grad_norm": 3.780832052230835, "learning_rate": 4.150264823818505e-06, "loss": 0.3957, "step": 6695 }, { "epoch": 0.4574707931953269, "grad_norm": 3.4531586170196533, "learning_rate": 4.1495037925013995e-06, "loss": 0.2715, "step": 6696 }, { "epoch": 0.4575391132062581, "grad_norm": 3.8116817474365234, "learning_rate": 4.148742729379816e-06, "loss": 0.2745, "step": 6697 }, { "epoch": 0.45760743321718933, "grad_norm": 4.807314395904541, "learning_rate": 4.1479816344910225e-06, "loss": 0.3024, "step": 6698 }, { "epoch": 0.45767575322812054, "grad_norm": 3.8377203941345215, "learning_rate": 4.147220507872286e-06, "loss": 0.3946, "step": 6699 }, { "epoch": 0.4577440732390517, "grad_norm": 4.590827941894531, "learning_rate": 4.146459349560879e-06, "loss": 0.3991, "step": 6700 }, { "epoch": 0.4578123932499829, "grad_norm": 3.6026809215545654, "learning_rate": 4.145698159594071e-06, "loss": 0.3199, "step": 6701 }, { "epoch": 0.4578807132609141, "grad_norm": 4.629756450653076, "learning_rate": 4.144936938009139e-06, "loss": 0.3464, "step": 6702 }, { "epoch": 0.45794903327184533, "grad_norm": 3.9253389835357666, "learning_rate": 4.144175684843354e-06, "loss": 0.3639, "step": 6703 }, { "epoch": 0.45801735328277654, "grad_norm": 2.82446026802063, "learning_rate": 4.143414400133996e-06, "loss": 0.2561, "step": 6704 }, { "epoch": 0.45808567329370775, "grad_norm": 3.2535934448242188, "learning_rate": 4.14265308391834e-06, "loss": 0.2733, "step": 6705 }, { "epoch": 0.4581539933046389, "grad_norm": 3.3729054927825928, "learning_rate": 4.141891736233668e-06, "loss": 0.3021, "step": 6706 }, { "epoch": 0.4582223133155701, "grad_norm": 3.995507001876831, "learning_rate": 4.14113035711726e-06, "loss": 0.3197, "step": 6707 }, { "epoch": 0.45829063332650133, "grad_norm": 4.514494895935059, "learning_rate": 4.1403689466063985e-06, "loss": 0.3478, "step": 6708 }, { "epoch": 0.45835895333743254, "grad_norm": 3.157034397125244, "learning_rate": 4.13960750473837e-06, "loss": 0.2429, "step": 6709 }, { "epoch": 0.45842727334836375, "grad_norm": 4.53297758102417, "learning_rate": 4.138846031550457e-06, "loss": 0.4143, "step": 6710 }, { "epoch": 0.45849559335929496, "grad_norm": 4.418297290802002, "learning_rate": 4.1380845270799494e-06, "loss": 0.339, "step": 6711 }, { "epoch": 0.4585639133702261, "grad_norm": 3.5701112747192383, "learning_rate": 4.137322991364135e-06, "loss": 0.2897, "step": 6712 }, { "epoch": 0.45863223338115733, "grad_norm": 4.830141544342041, "learning_rate": 4.136561424440304e-06, "loss": 0.5755, "step": 6713 }, { "epoch": 0.45870055339208854, "grad_norm": 3.599109411239624, "learning_rate": 4.135799826345748e-06, "loss": 0.2834, "step": 6714 }, { "epoch": 0.45876887340301975, "grad_norm": 4.304433822631836, "learning_rate": 4.135038197117762e-06, "loss": 0.2987, "step": 6715 }, { "epoch": 0.45883719341395096, "grad_norm": 3.45436692237854, "learning_rate": 4.134276536793639e-06, "loss": 0.2644, "step": 6716 }, { "epoch": 0.45890551342488217, "grad_norm": 4.356471061706543, "learning_rate": 4.133514845410678e-06, "loss": 0.3544, "step": 6717 }, { "epoch": 0.4589738334358133, "grad_norm": 4.812446117401123, "learning_rate": 4.132753123006173e-06, "loss": 0.3742, "step": 6718 }, { "epoch": 0.45904215344674454, "grad_norm": 3.169823408126831, "learning_rate": 4.1319913696174285e-06, "loss": 0.2443, "step": 6719 }, { "epoch": 0.45911047345767575, "grad_norm": 3.75014066696167, "learning_rate": 4.1312295852817405e-06, "loss": 0.3108, "step": 6720 }, { "epoch": 0.45917879346860696, "grad_norm": 3.338667631149292, "learning_rate": 4.130467770036417e-06, "loss": 0.3098, "step": 6721 }, { "epoch": 0.45924711347953817, "grad_norm": 4.659575462341309, "learning_rate": 4.129705923918757e-06, "loss": 0.426, "step": 6722 }, { "epoch": 0.4593154334904694, "grad_norm": 3.8260278701782227, "learning_rate": 4.128944046966069e-06, "loss": 0.3427, "step": 6723 }, { "epoch": 0.45938375350140054, "grad_norm": 4.5188889503479, "learning_rate": 4.128182139215659e-06, "loss": 0.384, "step": 6724 }, { "epoch": 0.45945207351233175, "grad_norm": 4.244543552398682, "learning_rate": 4.127420200704837e-06, "loss": 0.4733, "step": 6725 }, { "epoch": 0.45952039352326296, "grad_norm": 4.652833461761475, "learning_rate": 4.126658231470911e-06, "loss": 0.4466, "step": 6726 }, { "epoch": 0.45958871353419417, "grad_norm": 4.528104305267334, "learning_rate": 4.125896231551194e-06, "loss": 0.4556, "step": 6727 }, { "epoch": 0.4596570335451254, "grad_norm": 3.8331456184387207, "learning_rate": 4.125134200982999e-06, "loss": 0.3465, "step": 6728 }, { "epoch": 0.4597253535560566, "grad_norm": 4.527678489685059, "learning_rate": 4.124372139803639e-06, "loss": 0.485, "step": 6729 }, { "epoch": 0.45979367356698775, "grad_norm": 3.5004281997680664, "learning_rate": 4.123610048050432e-06, "loss": 0.3045, "step": 6730 }, { "epoch": 0.45986199357791896, "grad_norm": 3.362604856491089, "learning_rate": 4.122847925760696e-06, "loss": 0.3735, "step": 6731 }, { "epoch": 0.45993031358885017, "grad_norm": 4.158377647399902, "learning_rate": 4.12208577297175e-06, "loss": 0.27, "step": 6732 }, { "epoch": 0.4599986335997814, "grad_norm": 4.365997314453125, "learning_rate": 4.121323589720912e-06, "loss": 0.3808, "step": 6733 }, { "epoch": 0.4600669536107126, "grad_norm": 3.9965622425079346, "learning_rate": 4.120561376045506e-06, "loss": 0.3353, "step": 6734 }, { "epoch": 0.4601352736216438, "grad_norm": 3.1855812072753906, "learning_rate": 4.119799131982857e-06, "loss": 0.2905, "step": 6735 }, { "epoch": 0.46020359363257496, "grad_norm": 3.369128942489624, "learning_rate": 4.119036857570286e-06, "loss": 0.3419, "step": 6736 }, { "epoch": 0.46027191364350617, "grad_norm": 4.262966632843018, "learning_rate": 4.118274552845125e-06, "loss": 0.3581, "step": 6737 }, { "epoch": 0.4603402336544374, "grad_norm": 4.116734027862549, "learning_rate": 4.117512217844697e-06, "loss": 0.3621, "step": 6738 }, { "epoch": 0.4604085536653686, "grad_norm": 3.5594418048858643, "learning_rate": 4.116749852606335e-06, "loss": 0.2779, "step": 6739 }, { "epoch": 0.4604768736762998, "grad_norm": 3.6146206855773926, "learning_rate": 4.1159874571673676e-06, "loss": 0.298, "step": 6740 }, { "epoch": 0.460545193687231, "grad_norm": 4.615256309509277, "learning_rate": 4.11522503156513e-06, "loss": 0.3358, "step": 6741 }, { "epoch": 0.46061351369816217, "grad_norm": 3.577056884765625, "learning_rate": 4.114462575836954e-06, "loss": 0.3184, "step": 6742 }, { "epoch": 0.4606818337090934, "grad_norm": 3.584458112716675, "learning_rate": 4.113700090020174e-06, "loss": 0.2896, "step": 6743 }, { "epoch": 0.4607501537200246, "grad_norm": 2.9469447135925293, "learning_rate": 4.11293757415213e-06, "loss": 0.261, "step": 6744 }, { "epoch": 0.4608184737309558, "grad_norm": 3.320901870727539, "learning_rate": 4.1121750282701584e-06, "loss": 0.2959, "step": 6745 }, { "epoch": 0.460886793741887, "grad_norm": 4.032721996307373, "learning_rate": 4.111412452411599e-06, "loss": 0.3849, "step": 6746 }, { "epoch": 0.4609551137528182, "grad_norm": 3.865248680114746, "learning_rate": 4.110649846613795e-06, "loss": 0.4138, "step": 6747 }, { "epoch": 0.4610234337637494, "grad_norm": 3.0370116233825684, "learning_rate": 4.109887210914087e-06, "loss": 0.2766, "step": 6748 }, { "epoch": 0.4610917537746806, "grad_norm": 4.50259256362915, "learning_rate": 4.109124545349819e-06, "loss": 0.3497, "step": 6749 }, { "epoch": 0.4611600737856118, "grad_norm": 4.030839920043945, "learning_rate": 4.1083618499583385e-06, "loss": 0.4549, "step": 6750 }, { "epoch": 0.461228393796543, "grad_norm": 3.3771774768829346, "learning_rate": 4.107599124776993e-06, "loss": 0.2178, "step": 6751 }, { "epoch": 0.4612967138074742, "grad_norm": 3.8708276748657227, "learning_rate": 4.106836369843129e-06, "loss": 0.3211, "step": 6752 }, { "epoch": 0.46136503381840543, "grad_norm": 4.009035110473633, "learning_rate": 4.106073585194097e-06, "loss": 0.4006, "step": 6753 }, { "epoch": 0.4614333538293366, "grad_norm": 3.564354181289673, "learning_rate": 4.10531077086725e-06, "loss": 0.3482, "step": 6754 }, { "epoch": 0.4615016738402678, "grad_norm": 3.6521828174591064, "learning_rate": 4.104547926899941e-06, "loss": 0.3502, "step": 6755 }, { "epoch": 0.461569993851199, "grad_norm": 3.9579756259918213, "learning_rate": 4.1037850533295215e-06, "loss": 0.3571, "step": 6756 }, { "epoch": 0.4616383138621302, "grad_norm": 4.3782172203063965, "learning_rate": 4.103022150193352e-06, "loss": 0.3767, "step": 6757 }, { "epoch": 0.46170663387306143, "grad_norm": 3.8807992935180664, "learning_rate": 4.102259217528786e-06, "loss": 0.3111, "step": 6758 }, { "epoch": 0.46177495388399264, "grad_norm": 5.182429313659668, "learning_rate": 4.101496255373184e-06, "loss": 0.3504, "step": 6759 }, { "epoch": 0.4618432738949238, "grad_norm": 5.133973598480225, "learning_rate": 4.100733263763905e-06, "loss": 0.4099, "step": 6760 }, { "epoch": 0.461911593905855, "grad_norm": 4.339750289916992, "learning_rate": 4.099970242738313e-06, "loss": 0.4067, "step": 6761 }, { "epoch": 0.4619799139167862, "grad_norm": 3.0975687503814697, "learning_rate": 4.099207192333769e-06, "loss": 0.2018, "step": 6762 }, { "epoch": 0.46204823392771743, "grad_norm": 3.2136757373809814, "learning_rate": 4.098444112587638e-06, "loss": 0.3189, "step": 6763 }, { "epoch": 0.46211655393864864, "grad_norm": 4.287211894989014, "learning_rate": 4.097681003537286e-06, "loss": 0.2977, "step": 6764 }, { "epoch": 0.46218487394957986, "grad_norm": 4.088738441467285, "learning_rate": 4.096917865220081e-06, "loss": 0.2955, "step": 6765 }, { "epoch": 0.462253193960511, "grad_norm": 3.4699854850769043, "learning_rate": 4.0961546976733905e-06, "loss": 0.3051, "step": 6766 }, { "epoch": 0.4623215139714422, "grad_norm": 4.347038745880127, "learning_rate": 4.095391500934588e-06, "loss": 0.4272, "step": 6767 }, { "epoch": 0.46238983398237343, "grad_norm": 3.660649538040161, "learning_rate": 4.094628275041042e-06, "loss": 0.4059, "step": 6768 }, { "epoch": 0.46245815399330464, "grad_norm": 7.7328643798828125, "learning_rate": 4.093865020030126e-06, "loss": 0.2855, "step": 6769 }, { "epoch": 0.46252647400423585, "grad_norm": 3.4450457096099854, "learning_rate": 4.0931017359392146e-06, "loss": 0.3596, "step": 6770 }, { "epoch": 0.46259479401516707, "grad_norm": 3.6881656646728516, "learning_rate": 4.092338422805687e-06, "loss": 0.3156, "step": 6771 }, { "epoch": 0.4626631140260982, "grad_norm": 3.2949059009552, "learning_rate": 4.091575080666915e-06, "loss": 0.2453, "step": 6772 }, { "epoch": 0.46273143403702943, "grad_norm": 4.041884422302246, "learning_rate": 4.0908117095602825e-06, "loss": 0.3579, "step": 6773 }, { "epoch": 0.46279975404796064, "grad_norm": 4.2647929191589355, "learning_rate": 4.090048309523167e-06, "loss": 0.325, "step": 6774 }, { "epoch": 0.46286807405889185, "grad_norm": 3.601439952850342, "learning_rate": 4.089284880592952e-06, "loss": 0.4055, "step": 6775 }, { "epoch": 0.46293639406982307, "grad_norm": 3.7180335521698, "learning_rate": 4.088521422807018e-06, "loss": 0.3161, "step": 6776 }, { "epoch": 0.4630047140807543, "grad_norm": 4.509265422821045, "learning_rate": 4.087757936202752e-06, "loss": 0.3901, "step": 6777 }, { "epoch": 0.46307303409168543, "grad_norm": 4.392324924468994, "learning_rate": 4.086994420817539e-06, "loss": 0.3612, "step": 6778 }, { "epoch": 0.46314135410261664, "grad_norm": 3.3654942512512207, "learning_rate": 4.086230876688765e-06, "loss": 0.3248, "step": 6779 }, { "epoch": 0.46320967411354785, "grad_norm": 3.857398748397827, "learning_rate": 4.085467303853822e-06, "loss": 0.2895, "step": 6780 }, { "epoch": 0.46327799412447906, "grad_norm": 3.8373794555664062, "learning_rate": 4.084703702350096e-06, "loss": 0.2968, "step": 6781 }, { "epoch": 0.4633463141354103, "grad_norm": 4.640571594238281, "learning_rate": 4.083940072214982e-06, "loss": 0.4259, "step": 6782 }, { "epoch": 0.4634146341463415, "grad_norm": 4.199073314666748, "learning_rate": 4.083176413485871e-06, "loss": 0.3662, "step": 6783 }, { "epoch": 0.46348295415727264, "grad_norm": 2.6802048683166504, "learning_rate": 4.082412726200158e-06, "loss": 0.3406, "step": 6784 }, { "epoch": 0.46355127416820385, "grad_norm": 4.005687236785889, "learning_rate": 4.08164901039524e-06, "loss": 0.3902, "step": 6785 }, { "epoch": 0.46361959417913506, "grad_norm": 3.7458560466766357, "learning_rate": 4.080885266108511e-06, "loss": 0.3106, "step": 6786 }, { "epoch": 0.4636879141900663, "grad_norm": 3.215420722961426, "learning_rate": 4.080121493377372e-06, "loss": 0.3168, "step": 6787 }, { "epoch": 0.4637562342009975, "grad_norm": 4.634816646575928, "learning_rate": 4.079357692239223e-06, "loss": 0.4201, "step": 6788 }, { "epoch": 0.4638245542119287, "grad_norm": 3.8977420330047607, "learning_rate": 4.078593862731463e-06, "loss": 0.3539, "step": 6789 }, { "epoch": 0.46389287422285985, "grad_norm": 3.330096960067749, "learning_rate": 4.077830004891497e-06, "loss": 0.3467, "step": 6790 }, { "epoch": 0.46396119423379106, "grad_norm": 3.808000087738037, "learning_rate": 4.077066118756729e-06, "loss": 0.3923, "step": 6791 }, { "epoch": 0.4640295142447223, "grad_norm": 3.5450148582458496, "learning_rate": 4.076302204364563e-06, "loss": 0.3228, "step": 6792 }, { "epoch": 0.4640978342556535, "grad_norm": 3.632847785949707, "learning_rate": 4.0755382617524075e-06, "loss": 0.2595, "step": 6793 }, { "epoch": 0.4641661542665847, "grad_norm": 3.507702350616455, "learning_rate": 4.07477429095767e-06, "loss": 0.3148, "step": 6794 }, { "epoch": 0.4642344742775159, "grad_norm": 3.3785061836242676, "learning_rate": 4.07401029201776e-06, "loss": 0.3839, "step": 6795 }, { "epoch": 0.46430279428844706, "grad_norm": 5.557077884674072, "learning_rate": 4.073246264970088e-06, "loss": 0.345, "step": 6796 }, { "epoch": 0.4643711142993783, "grad_norm": 3.537322759628296, "learning_rate": 4.0724822098520666e-06, "loss": 0.3136, "step": 6797 }, { "epoch": 0.4644394343103095, "grad_norm": 4.769954204559326, "learning_rate": 4.0717181267011114e-06, "loss": 0.4637, "step": 6798 }, { "epoch": 0.4645077543212407, "grad_norm": 5.121145725250244, "learning_rate": 4.070954015554635e-06, "loss": 0.4785, "step": 6799 }, { "epoch": 0.4645760743321719, "grad_norm": 3.590036392211914, "learning_rate": 4.070189876450055e-06, "loss": 0.286, "step": 6800 }, { "epoch": 0.4646443943431031, "grad_norm": 4.132439136505127, "learning_rate": 4.06942570942479e-06, "loss": 0.3331, "step": 6801 }, { "epoch": 0.4647127143540343, "grad_norm": 4.677175998687744, "learning_rate": 4.068661514516258e-06, "loss": 0.5052, "step": 6802 }, { "epoch": 0.4647810343649655, "grad_norm": 3.885303258895874, "learning_rate": 4.067897291761879e-06, "loss": 0.2972, "step": 6803 }, { "epoch": 0.4648493543758967, "grad_norm": 3.625845432281494, "learning_rate": 4.067133041199078e-06, "loss": 0.3773, "step": 6804 }, { "epoch": 0.4649176743868279, "grad_norm": 3.1702635288238525, "learning_rate": 4.066368762865275e-06, "loss": 0.3401, "step": 6805 }, { "epoch": 0.4649859943977591, "grad_norm": 2.815871000289917, "learning_rate": 4.065604456797896e-06, "loss": 0.3207, "step": 6806 }, { "epoch": 0.46505431440869033, "grad_norm": 2.957028388977051, "learning_rate": 4.064840123034368e-06, "loss": 0.3228, "step": 6807 }, { "epoch": 0.4651226344196215, "grad_norm": 4.242199420928955, "learning_rate": 4.064075761612116e-06, "loss": 0.3364, "step": 6808 }, { "epoch": 0.4651909544305527, "grad_norm": 3.6468610763549805, "learning_rate": 4.063311372568571e-06, "loss": 0.3334, "step": 6809 }, { "epoch": 0.4652592744414839, "grad_norm": 3.5325734615325928, "learning_rate": 4.062546955941163e-06, "loss": 0.3165, "step": 6810 }, { "epoch": 0.4653275944524151, "grad_norm": 3.2158620357513428, "learning_rate": 4.061782511767322e-06, "loss": 0.3029, "step": 6811 }, { "epoch": 0.4653959144633463, "grad_norm": 3.297410249710083, "learning_rate": 4.0610180400844805e-06, "loss": 0.302, "step": 6812 }, { "epoch": 0.46546423447427754, "grad_norm": 3.6951563358306885, "learning_rate": 4.060253540930075e-06, "loss": 0.2387, "step": 6813 }, { "epoch": 0.4655325544852087, "grad_norm": 3.7405846118927, "learning_rate": 4.05948901434154e-06, "loss": 0.2851, "step": 6814 }, { "epoch": 0.4656008744961399, "grad_norm": 4.016996383666992, "learning_rate": 4.058724460356311e-06, "loss": 0.3269, "step": 6815 }, { "epoch": 0.4656691945070711, "grad_norm": 4.085407733917236, "learning_rate": 4.057959879011829e-06, "loss": 0.3489, "step": 6816 }, { "epoch": 0.4657375145180023, "grad_norm": 3.647552251815796, "learning_rate": 4.057195270345529e-06, "loss": 0.3333, "step": 6817 }, { "epoch": 0.46580583452893354, "grad_norm": 3.73292875289917, "learning_rate": 4.056430634394856e-06, "loss": 0.3172, "step": 6818 }, { "epoch": 0.46587415453986475, "grad_norm": 3.90075421333313, "learning_rate": 4.055665971197251e-06, "loss": 0.4193, "step": 6819 }, { "epoch": 0.4659424745507959, "grad_norm": 3.2998909950256348, "learning_rate": 4.0549012807901576e-06, "loss": 0.2356, "step": 6820 }, { "epoch": 0.4660107945617271, "grad_norm": 3.990917205810547, "learning_rate": 4.05413656321102e-06, "loss": 0.4262, "step": 6821 }, { "epoch": 0.4660791145726583, "grad_norm": 4.555388927459717, "learning_rate": 4.0533718184972855e-06, "loss": 0.3178, "step": 6822 }, { "epoch": 0.46614743458358954, "grad_norm": 4.989123821258545, "learning_rate": 4.052607046686399e-06, "loss": 0.3719, "step": 6823 }, { "epoch": 0.46621575459452075, "grad_norm": 4.32425594329834, "learning_rate": 4.051842247815813e-06, "loss": 0.3703, "step": 6824 }, { "epoch": 0.46628407460545196, "grad_norm": 3.882049798965454, "learning_rate": 4.051077421922976e-06, "loss": 0.3436, "step": 6825 }, { "epoch": 0.4663523946163831, "grad_norm": 4.582773208618164, "learning_rate": 4.0503125690453385e-06, "loss": 0.4083, "step": 6826 }, { "epoch": 0.4664207146273143, "grad_norm": 3.961885929107666, "learning_rate": 4.049547689220354e-06, "loss": 0.3502, "step": 6827 }, { "epoch": 0.46648903463824554, "grad_norm": 4.1477274894714355, "learning_rate": 4.0487827824854775e-06, "loss": 0.324, "step": 6828 }, { "epoch": 0.46655735464917675, "grad_norm": 3.573943853378296, "learning_rate": 4.0480178488781635e-06, "loss": 0.304, "step": 6829 }, { "epoch": 0.46662567466010796, "grad_norm": 4.423116683959961, "learning_rate": 4.04725288843587e-06, "loss": 0.374, "step": 6830 }, { "epoch": 0.46669399467103917, "grad_norm": 4.972095489501953, "learning_rate": 4.046487901196054e-06, "loss": 0.2457, "step": 6831 }, { "epoch": 0.4667623146819703, "grad_norm": 4.0632100105285645, "learning_rate": 4.0457228871961734e-06, "loss": 0.421, "step": 6832 }, { "epoch": 0.46683063469290154, "grad_norm": 3.8051342964172363, "learning_rate": 4.044957846473692e-06, "loss": 0.3095, "step": 6833 }, { "epoch": 0.46689895470383275, "grad_norm": 5.32199764251709, "learning_rate": 4.044192779066071e-06, "loss": 0.4464, "step": 6834 }, { "epoch": 0.46696727471476396, "grad_norm": 4.142938613891602, "learning_rate": 4.0434276850107705e-06, "loss": 0.3668, "step": 6835 }, { "epoch": 0.46703559472569517, "grad_norm": 3.665492296218872, "learning_rate": 4.042662564345261e-06, "loss": 0.272, "step": 6836 }, { "epoch": 0.4671039147366264, "grad_norm": 4.4392619132995605, "learning_rate": 4.041897417107003e-06, "loss": 0.3208, "step": 6837 }, { "epoch": 0.46717223474755754, "grad_norm": 4.076345443725586, "learning_rate": 4.041132243333466e-06, "loss": 0.2992, "step": 6838 }, { "epoch": 0.46724055475848875, "grad_norm": 3.1104161739349365, "learning_rate": 4.040367043062119e-06, "loss": 0.2557, "step": 6839 }, { "epoch": 0.46730887476941996, "grad_norm": 3.6238839626312256, "learning_rate": 4.039601816330431e-06, "loss": 0.3196, "step": 6840 }, { "epoch": 0.46737719478035117, "grad_norm": 4.157689094543457, "learning_rate": 4.0388365631758745e-06, "loss": 0.3596, "step": 6841 }, { "epoch": 0.4674455147912824, "grad_norm": 3.2612359523773193, "learning_rate": 4.038071283635919e-06, "loss": 0.2036, "step": 6842 }, { "epoch": 0.4675138348022136, "grad_norm": 2.6752054691314697, "learning_rate": 4.037305977748041e-06, "loss": 0.2562, "step": 6843 }, { "epoch": 0.46758215481314475, "grad_norm": 3.5423951148986816, "learning_rate": 4.036540645549714e-06, "loss": 0.3448, "step": 6844 }, { "epoch": 0.46765047482407596, "grad_norm": 4.560976028442383, "learning_rate": 4.035775287078415e-06, "loss": 0.318, "step": 6845 }, { "epoch": 0.46771879483500717, "grad_norm": 4.5475754737854, "learning_rate": 4.035009902371621e-06, "loss": 0.3356, "step": 6846 }, { "epoch": 0.4677871148459384, "grad_norm": 4.674088478088379, "learning_rate": 4.034244491466813e-06, "loss": 0.3149, "step": 6847 }, { "epoch": 0.4678554348568696, "grad_norm": 3.742961883544922, "learning_rate": 4.033479054401468e-06, "loss": 0.3338, "step": 6848 }, { "epoch": 0.4679237548678008, "grad_norm": 3.3222134113311768, "learning_rate": 4.032713591213069e-06, "loss": 0.2922, "step": 6849 }, { "epoch": 0.46799207487873196, "grad_norm": 4.02695369720459, "learning_rate": 4.031948101939098e-06, "loss": 0.3138, "step": 6850 }, { "epoch": 0.46806039488966317, "grad_norm": 3.4317476749420166, "learning_rate": 4.03118258661704e-06, "loss": 0.3247, "step": 6851 }, { "epoch": 0.4681287149005944, "grad_norm": 4.492177486419678, "learning_rate": 4.03041704528438e-06, "loss": 0.3065, "step": 6852 }, { "epoch": 0.4681970349115256, "grad_norm": 4.504474639892578, "learning_rate": 4.029651477978604e-06, "loss": 0.4358, "step": 6853 }, { "epoch": 0.4682653549224568, "grad_norm": 3.7998616695404053, "learning_rate": 4.0288858847372e-06, "loss": 0.3509, "step": 6854 }, { "epoch": 0.468333674933388, "grad_norm": 3.4888038635253906, "learning_rate": 4.028120265597657e-06, "loss": 0.3543, "step": 6855 }, { "epoch": 0.46840199494431917, "grad_norm": 4.318785667419434, "learning_rate": 4.027354620597465e-06, "loss": 0.2918, "step": 6856 }, { "epoch": 0.4684703149552504, "grad_norm": 3.576672315597534, "learning_rate": 4.026588949774117e-06, "loss": 0.31, "step": 6857 }, { "epoch": 0.4685386349661816, "grad_norm": 4.719231605529785, "learning_rate": 4.025823253165104e-06, "loss": 0.2837, "step": 6858 }, { "epoch": 0.4686069549771128, "grad_norm": 3.609027862548828, "learning_rate": 4.025057530807923e-06, "loss": 0.4016, "step": 6859 }, { "epoch": 0.468675274988044, "grad_norm": 4.871450424194336, "learning_rate": 4.024291782740065e-06, "loss": 0.4017, "step": 6860 }, { "epoch": 0.4687435949989752, "grad_norm": 4.006382942199707, "learning_rate": 4.0235260089990305e-06, "loss": 0.3058, "step": 6861 }, { "epoch": 0.4688119150099064, "grad_norm": 4.018517017364502, "learning_rate": 4.022760209622315e-06, "loss": 0.2462, "step": 6862 }, { "epoch": 0.4688802350208376, "grad_norm": 4.006113052368164, "learning_rate": 4.02199438464742e-06, "loss": 0.356, "step": 6863 }, { "epoch": 0.4689485550317688, "grad_norm": 3.656473398208618, "learning_rate": 4.021228534111844e-06, "loss": 0.335, "step": 6864 }, { "epoch": 0.4690168750427, "grad_norm": 3.671313762664795, "learning_rate": 4.020462658053089e-06, "loss": 0.3368, "step": 6865 }, { "epoch": 0.4690851950536312, "grad_norm": 4.3537797927856445, "learning_rate": 4.019696756508658e-06, "loss": 0.3102, "step": 6866 }, { "epoch": 0.46915351506456243, "grad_norm": 3.3467061519622803, "learning_rate": 4.018930829516057e-06, "loss": 0.3098, "step": 6867 }, { "epoch": 0.4692218350754936, "grad_norm": 4.267374515533447, "learning_rate": 4.018164877112788e-06, "loss": 0.3771, "step": 6868 }, { "epoch": 0.4692901550864248, "grad_norm": 4.074016094207764, "learning_rate": 4.01739889933636e-06, "loss": 0.4287, "step": 6869 }, { "epoch": 0.469358475097356, "grad_norm": 3.264981746673584, "learning_rate": 4.0166328962242805e-06, "loss": 0.2988, "step": 6870 }, { "epoch": 0.4694267951082872, "grad_norm": 3.98559832572937, "learning_rate": 4.015866867814059e-06, "loss": 0.2859, "step": 6871 }, { "epoch": 0.46949511511921843, "grad_norm": 3.614593267440796, "learning_rate": 4.015100814143204e-06, "loss": 0.2849, "step": 6872 }, { "epoch": 0.46956343513014964, "grad_norm": 4.310020923614502, "learning_rate": 4.0143347352492294e-06, "loss": 0.3172, "step": 6873 }, { "epoch": 0.4696317551410808, "grad_norm": 3.846663475036621, "learning_rate": 4.0135686311696465e-06, "loss": 0.3296, "step": 6874 }, { "epoch": 0.469700075152012, "grad_norm": 5.333407878875732, "learning_rate": 4.0128025019419705e-06, "loss": 0.3077, "step": 6875 }, { "epoch": 0.4697683951629432, "grad_norm": 3.7665088176727295, "learning_rate": 4.0120363476037165e-06, "loss": 0.3582, "step": 6876 }, { "epoch": 0.46983671517387443, "grad_norm": 4.940201282501221, "learning_rate": 4.0112701681924e-06, "loss": 0.3002, "step": 6877 }, { "epoch": 0.46990503518480564, "grad_norm": 3.0482935905456543, "learning_rate": 4.010503963745539e-06, "loss": 0.243, "step": 6878 }, { "epoch": 0.46997335519573685, "grad_norm": 4.890322208404541, "learning_rate": 4.009737734300655e-06, "loss": 0.4612, "step": 6879 }, { "epoch": 0.470041675206668, "grad_norm": 3.476278305053711, "learning_rate": 4.008971479895265e-06, "loss": 0.3253, "step": 6880 }, { "epoch": 0.4701099952175992, "grad_norm": 4.292727947235107, "learning_rate": 4.008205200566892e-06, "loss": 0.3589, "step": 6881 }, { "epoch": 0.47017831522853043, "grad_norm": 4.661533832550049, "learning_rate": 4.007438896353057e-06, "loss": 0.5176, "step": 6882 }, { "epoch": 0.47024663523946164, "grad_norm": 4.751856803894043, "learning_rate": 4.006672567291288e-06, "loss": 0.4242, "step": 6883 }, { "epoch": 0.47031495525039285, "grad_norm": 3.227170467376709, "learning_rate": 4.0059062134191064e-06, "loss": 0.3665, "step": 6884 }, { "epoch": 0.47038327526132406, "grad_norm": 3.702296495437622, "learning_rate": 4.0051398347740385e-06, "loss": 0.3351, "step": 6885 }, { "epoch": 0.4704515952722552, "grad_norm": 3.6301651000976562, "learning_rate": 4.004373431393613e-06, "loss": 0.3096, "step": 6886 }, { "epoch": 0.47051991528318643, "grad_norm": 3.4249205589294434, "learning_rate": 4.003607003315359e-06, "loss": 0.3341, "step": 6887 }, { "epoch": 0.47058823529411764, "grad_norm": 5.759091854095459, "learning_rate": 4.002840550576805e-06, "loss": 0.5262, "step": 6888 }, { "epoch": 0.47065655530504885, "grad_norm": 5.334762096405029, "learning_rate": 4.002074073215484e-06, "loss": 0.2608, "step": 6889 }, { "epoch": 0.47072487531598006, "grad_norm": 3.5775716304779053, "learning_rate": 4.001307571268927e-06, "loss": 0.2534, "step": 6890 }, { "epoch": 0.4707931953269113, "grad_norm": 2.824631452560425, "learning_rate": 4.000541044774668e-06, "loss": 0.329, "step": 6891 }, { "epoch": 0.47086151533784243, "grad_norm": 3.9996702671051025, "learning_rate": 3.999774493770242e-06, "loss": 0.3551, "step": 6892 }, { "epoch": 0.47092983534877364, "grad_norm": 4.764784812927246, "learning_rate": 3.999007918293185e-06, "loss": 0.4815, "step": 6893 }, { "epoch": 0.47099815535970485, "grad_norm": 3.897045850753784, "learning_rate": 3.998241318381033e-06, "loss": 0.3533, "step": 6894 }, { "epoch": 0.47106647537063606, "grad_norm": 4.155614376068115, "learning_rate": 3.997474694071325e-06, "loss": 0.4095, "step": 6895 }, { "epoch": 0.4711347953815673, "grad_norm": 3.620919942855835, "learning_rate": 3.996708045401601e-06, "loss": 0.278, "step": 6896 }, { "epoch": 0.4712031153924985, "grad_norm": 4.017951488494873, "learning_rate": 3.995941372409403e-06, "loss": 0.5059, "step": 6897 }, { "epoch": 0.47127143540342964, "grad_norm": 3.496400833129883, "learning_rate": 3.99517467513227e-06, "loss": 0.3468, "step": 6898 }, { "epoch": 0.47133975541436085, "grad_norm": 3.512815475463867, "learning_rate": 3.994407953607748e-06, "loss": 0.2976, "step": 6899 }, { "epoch": 0.47140807542529206, "grad_norm": 3.524501085281372, "learning_rate": 3.9936412078733785e-06, "loss": 0.4081, "step": 6900 }, { "epoch": 0.4714763954362233, "grad_norm": 3.456601858139038, "learning_rate": 3.992874437966709e-06, "loss": 0.3147, "step": 6901 }, { "epoch": 0.4715447154471545, "grad_norm": 3.6348702907562256, "learning_rate": 3.992107643925285e-06, "loss": 0.3077, "step": 6902 }, { "epoch": 0.4716130354580857, "grad_norm": 3.8248023986816406, "learning_rate": 3.991340825786658e-06, "loss": 0.3524, "step": 6903 }, { "epoch": 0.47168135546901685, "grad_norm": 3.9421634674072266, "learning_rate": 3.99057398358837e-06, "loss": 0.3486, "step": 6904 }, { "epoch": 0.47174967547994806, "grad_norm": 4.457542419433594, "learning_rate": 3.989807117367977e-06, "loss": 0.3199, "step": 6905 }, { "epoch": 0.47181799549087927, "grad_norm": 3.741060733795166, "learning_rate": 3.98904022716303e-06, "loss": 0.258, "step": 6906 }, { "epoch": 0.4718863155018105, "grad_norm": 4.961792469024658, "learning_rate": 3.988273313011079e-06, "loss": 0.4118, "step": 6907 }, { "epoch": 0.4719546355127417, "grad_norm": 4.090061187744141, "learning_rate": 3.987506374949678e-06, "loss": 0.3894, "step": 6908 }, { "epoch": 0.4720229555236729, "grad_norm": 4.228132247924805, "learning_rate": 3.986739413016384e-06, "loss": 0.4353, "step": 6909 }, { "epoch": 0.47209127553460406, "grad_norm": 4.811740875244141, "learning_rate": 3.985972427248753e-06, "loss": 0.4409, "step": 6910 }, { "epoch": 0.47215959554553527, "grad_norm": 4.221566200256348, "learning_rate": 3.98520541768434e-06, "loss": 0.3704, "step": 6911 }, { "epoch": 0.4722279155564665, "grad_norm": 3.430016040802002, "learning_rate": 3.984438384360705e-06, "loss": 0.3425, "step": 6912 }, { "epoch": 0.4722962355673977, "grad_norm": 4.005910396575928, "learning_rate": 3.9836713273154075e-06, "loss": 0.3666, "step": 6913 }, { "epoch": 0.4723645555783289, "grad_norm": 3.7781922817230225, "learning_rate": 3.982904246586008e-06, "loss": 0.3035, "step": 6914 }, { "epoch": 0.4724328755892601, "grad_norm": 4.0481791496276855, "learning_rate": 3.982137142210069e-06, "loss": 0.4106, "step": 6915 }, { "epoch": 0.47250119560019127, "grad_norm": 5.182481288909912, "learning_rate": 3.981370014225153e-06, "loss": 0.3732, "step": 6916 }, { "epoch": 0.4725695156111225, "grad_norm": 3.4757449626922607, "learning_rate": 3.980602862668824e-06, "loss": 0.3911, "step": 6917 }, { "epoch": 0.4726378356220537, "grad_norm": 3.908087730407715, "learning_rate": 3.979835687578647e-06, "loss": 0.2767, "step": 6918 }, { "epoch": 0.4727061556329849, "grad_norm": 3.408573627471924, "learning_rate": 3.97906848899219e-06, "loss": 0.3956, "step": 6919 }, { "epoch": 0.4727744756439161, "grad_norm": 3.164788246154785, "learning_rate": 3.97830126694702e-06, "loss": 0.3386, "step": 6920 }, { "epoch": 0.4728427956548473, "grad_norm": 3.1389756202697754, "learning_rate": 3.977534021480704e-06, "loss": 0.2435, "step": 6921 }, { "epoch": 0.4729111156657785, "grad_norm": 6.294010639190674, "learning_rate": 3.976766752630815e-06, "loss": 0.317, "step": 6922 }, { "epoch": 0.4729794356767097, "grad_norm": 3.001195192337036, "learning_rate": 3.9759994604349225e-06, "loss": 0.2389, "step": 6923 }, { "epoch": 0.4730477556876409, "grad_norm": 3.491915464401245, "learning_rate": 3.9752321449306e-06, "loss": 0.3228, "step": 6924 }, { "epoch": 0.4731160756985721, "grad_norm": 4.265435695648193, "learning_rate": 3.974464806155419e-06, "loss": 0.3513, "step": 6925 }, { "epoch": 0.4731843957095033, "grad_norm": 3.876253843307495, "learning_rate": 3.973697444146955e-06, "loss": 0.2974, "step": 6926 }, { "epoch": 0.47325271572043454, "grad_norm": 3.9046356678009033, "learning_rate": 3.972930058942784e-06, "loss": 0.3175, "step": 6927 }, { "epoch": 0.4733210357313657, "grad_norm": 3.741114377975464, "learning_rate": 3.972162650580482e-06, "loss": 0.3437, "step": 6928 }, { "epoch": 0.4733893557422969, "grad_norm": 3.7855329513549805, "learning_rate": 3.971395219097628e-06, "loss": 0.3955, "step": 6929 }, { "epoch": 0.4734576757532281, "grad_norm": 4.157637119293213, "learning_rate": 3.970627764531801e-06, "loss": 0.3618, "step": 6930 }, { "epoch": 0.4735259957641593, "grad_norm": 4.841651916503906, "learning_rate": 3.969860286920579e-06, "loss": 0.3811, "step": 6931 }, { "epoch": 0.47359431577509054, "grad_norm": 5.22726583480835, "learning_rate": 3.969092786301547e-06, "loss": 0.4094, "step": 6932 }, { "epoch": 0.47366263578602175, "grad_norm": 3.9842734336853027, "learning_rate": 3.968325262712284e-06, "loss": 0.3472, "step": 6933 }, { "epoch": 0.4737309557969529, "grad_norm": 3.4022772312164307, "learning_rate": 3.967557716190375e-06, "loss": 0.3561, "step": 6934 }, { "epoch": 0.4737992758078841, "grad_norm": 3.210289478302002, "learning_rate": 3.966790146773406e-06, "loss": 0.3313, "step": 6935 }, { "epoch": 0.4738675958188153, "grad_norm": 3.3920931816101074, "learning_rate": 3.966022554498962e-06, "loss": 0.3606, "step": 6936 }, { "epoch": 0.47393591582974653, "grad_norm": 4.329751491546631, "learning_rate": 3.965254939404629e-06, "loss": 0.3079, "step": 6937 }, { "epoch": 0.47400423584067775, "grad_norm": 3.4984829425811768, "learning_rate": 3.964487301527995e-06, "loss": 0.2634, "step": 6938 }, { "epoch": 0.47407255585160896, "grad_norm": 3.9667463302612305, "learning_rate": 3.963719640906651e-06, "loss": 0.2686, "step": 6939 }, { "epoch": 0.4741408758625401, "grad_norm": 3.8356621265411377, "learning_rate": 3.9629519575781865e-06, "loss": 0.3828, "step": 6940 }, { "epoch": 0.4742091958734713, "grad_norm": 2.7983365058898926, "learning_rate": 3.962184251580191e-06, "loss": 0.3005, "step": 6941 }, { "epoch": 0.47427751588440253, "grad_norm": 3.303952932357788, "learning_rate": 3.96141652295026e-06, "loss": 0.2198, "step": 6942 }, { "epoch": 0.47434583589533375, "grad_norm": 3.771099805831909, "learning_rate": 3.960648771725985e-06, "loss": 0.3319, "step": 6943 }, { "epoch": 0.47441415590626496, "grad_norm": 3.628779888153076, "learning_rate": 3.959880997944963e-06, "loss": 0.3269, "step": 6944 }, { "epoch": 0.47448247591719617, "grad_norm": 3.6936774253845215, "learning_rate": 3.959113201644787e-06, "loss": 0.2403, "step": 6945 }, { "epoch": 0.4745507959281273, "grad_norm": 5.329373359680176, "learning_rate": 3.958345382863056e-06, "loss": 0.3, "step": 6946 }, { "epoch": 0.47461911593905853, "grad_norm": 3.786637783050537, "learning_rate": 3.957577541637367e-06, "loss": 0.2776, "step": 6947 }, { "epoch": 0.47468743594998974, "grad_norm": 3.975013494491577, "learning_rate": 3.95680967800532e-06, "loss": 0.3878, "step": 6948 }, { "epoch": 0.47475575596092096, "grad_norm": 3.447657823562622, "learning_rate": 3.9560417920045146e-06, "loss": 0.3152, "step": 6949 }, { "epoch": 0.47482407597185217, "grad_norm": 4.647482395172119, "learning_rate": 3.955273883672551e-06, "loss": 0.3758, "step": 6950 }, { "epoch": 0.4748923959827834, "grad_norm": 4.683440208435059, "learning_rate": 3.954505953047035e-06, "loss": 0.296, "step": 6951 }, { "epoch": 0.47496071599371453, "grad_norm": 4.20034122467041, "learning_rate": 3.953738000165567e-06, "loss": 0.289, "step": 6952 }, { "epoch": 0.47502903600464574, "grad_norm": 4.0478715896606445, "learning_rate": 3.952970025065754e-06, "loss": 0.3269, "step": 6953 }, { "epoch": 0.47509735601557695, "grad_norm": 3.694119930267334, "learning_rate": 3.9522020277851986e-06, "loss": 0.2093, "step": 6954 }, { "epoch": 0.47516567602650817, "grad_norm": 4.318047046661377, "learning_rate": 3.951434008361509e-06, "loss": 0.3116, "step": 6955 }, { "epoch": 0.4752339960374394, "grad_norm": 3.9502170085906982, "learning_rate": 3.950665966832296e-06, "loss": 0.4255, "step": 6956 }, { "epoch": 0.4753023160483706, "grad_norm": 4.23166036605835, "learning_rate": 3.949897903235163e-06, "loss": 0.411, "step": 6957 }, { "epoch": 0.47537063605930174, "grad_norm": 4.254009246826172, "learning_rate": 3.949129817607726e-06, "loss": 0.3354, "step": 6958 }, { "epoch": 0.47543895607023295, "grad_norm": 6.0050458908081055, "learning_rate": 3.948361709987592e-06, "loss": 0.3234, "step": 6959 }, { "epoch": 0.47550727608116417, "grad_norm": 6.20290470123291, "learning_rate": 3.947593580412374e-06, "loss": 0.3517, "step": 6960 }, { "epoch": 0.4755755960920954, "grad_norm": 4.5411810874938965, "learning_rate": 3.946825428919686e-06, "loss": 0.2923, "step": 6961 }, { "epoch": 0.4756439161030266, "grad_norm": 4.863342761993408, "learning_rate": 3.946057255547142e-06, "loss": 0.4092, "step": 6962 }, { "epoch": 0.4757122361139578, "grad_norm": 5.215646266937256, "learning_rate": 3.945289060332357e-06, "loss": 0.3424, "step": 6963 }, { "epoch": 0.47578055612488895, "grad_norm": 6.862398624420166, "learning_rate": 3.944520843312948e-06, "loss": 0.3451, "step": 6964 }, { "epoch": 0.47584887613582016, "grad_norm": 3.605363607406616, "learning_rate": 3.9437526045265325e-06, "loss": 0.3102, "step": 6965 }, { "epoch": 0.4759171961467514, "grad_norm": 3.648573875427246, "learning_rate": 3.9429843440107295e-06, "loss": 0.3373, "step": 6966 }, { "epoch": 0.4759855161576826, "grad_norm": 5.044005870819092, "learning_rate": 3.9422160618031565e-06, "loss": 0.383, "step": 6967 }, { "epoch": 0.4760538361686138, "grad_norm": 3.9293766021728516, "learning_rate": 3.941447757941438e-06, "loss": 0.4324, "step": 6968 }, { "epoch": 0.476122156179545, "grad_norm": 3.812666416168213, "learning_rate": 3.940679432463193e-06, "loss": 0.3663, "step": 6969 }, { "epoch": 0.47619047619047616, "grad_norm": 3.7349860668182373, "learning_rate": 3.939911085406045e-06, "loss": 0.3332, "step": 6970 }, { "epoch": 0.4762587962014074, "grad_norm": 5.23126220703125, "learning_rate": 3.939142716807618e-06, "loss": 0.3238, "step": 6971 }, { "epoch": 0.4763271162123386, "grad_norm": 3.9594781398773193, "learning_rate": 3.9383743267055375e-06, "loss": 0.3239, "step": 6972 }, { "epoch": 0.4763954362232698, "grad_norm": 3.8271265029907227, "learning_rate": 3.937605915137429e-06, "loss": 0.352, "step": 6973 }, { "epoch": 0.476463756234201, "grad_norm": 3.230903148651123, "learning_rate": 3.936837482140918e-06, "loss": 0.2645, "step": 6974 }, { "epoch": 0.4765320762451322, "grad_norm": 2.7279655933380127, "learning_rate": 3.9360690277536364e-06, "loss": 0.2853, "step": 6975 }, { "epoch": 0.4766003962560634, "grad_norm": 3.4773049354553223, "learning_rate": 3.93530055201321e-06, "loss": 0.3094, "step": 6976 }, { "epoch": 0.4766687162669946, "grad_norm": 4.363474369049072, "learning_rate": 3.93453205495727e-06, "loss": 0.3347, "step": 6977 }, { "epoch": 0.4767370362779258, "grad_norm": 3.94762921333313, "learning_rate": 3.933763536623449e-06, "loss": 0.3136, "step": 6978 }, { "epoch": 0.476805356288857, "grad_norm": 3.955155372619629, "learning_rate": 3.9329949970493785e-06, "loss": 0.2742, "step": 6979 }, { "epoch": 0.4768736762997882, "grad_norm": 3.735823631286621, "learning_rate": 3.93222643627269e-06, "loss": 0.3505, "step": 6980 }, { "epoch": 0.47694199631071943, "grad_norm": 4.148001670837402, "learning_rate": 3.93145785433102e-06, "loss": 0.2665, "step": 6981 }, { "epoch": 0.4770103163216506, "grad_norm": 3.38466739654541, "learning_rate": 3.930689251262004e-06, "loss": 0.3386, "step": 6982 }, { "epoch": 0.4770786363325818, "grad_norm": 4.188096523284912, "learning_rate": 3.929920627103277e-06, "loss": 0.2972, "step": 6983 }, { "epoch": 0.477146956343513, "grad_norm": 4.021984577178955, "learning_rate": 3.929151981892477e-06, "loss": 0.3658, "step": 6984 }, { "epoch": 0.4772152763544442, "grad_norm": 4.518863201141357, "learning_rate": 3.9283833156672435e-06, "loss": 0.4021, "step": 6985 }, { "epoch": 0.47728359636537543, "grad_norm": 4.000339984893799, "learning_rate": 3.9276146284652155e-06, "loss": 0.3515, "step": 6986 }, { "epoch": 0.47735191637630664, "grad_norm": 4.480921268463135, "learning_rate": 3.926845920324032e-06, "loss": 0.3828, "step": 6987 }, { "epoch": 0.4774202363872378, "grad_norm": 4.167462348937988, "learning_rate": 3.926077191281337e-06, "loss": 0.3278, "step": 6988 }, { "epoch": 0.477488556398169, "grad_norm": 4.737244129180908, "learning_rate": 3.925308441374774e-06, "loss": 0.4206, "step": 6989 }, { "epoch": 0.4775568764091002, "grad_norm": 3.5380890369415283, "learning_rate": 3.924539670641983e-06, "loss": 0.4, "step": 6990 }, { "epoch": 0.47762519642003143, "grad_norm": 4.479226589202881, "learning_rate": 3.92377087912061e-06, "loss": 0.2767, "step": 6991 }, { "epoch": 0.47769351643096264, "grad_norm": 4.122621536254883, "learning_rate": 3.923002066848301e-06, "loss": 0.3494, "step": 6992 }, { "epoch": 0.47776183644189385, "grad_norm": 3.914994478225708, "learning_rate": 3.922233233862703e-06, "loss": 0.3487, "step": 6993 }, { "epoch": 0.477830156452825, "grad_norm": 3.2989885807037354, "learning_rate": 3.921464380201464e-06, "loss": 0.274, "step": 6994 }, { "epoch": 0.4778984764637562, "grad_norm": 3.1218421459198, "learning_rate": 3.920695505902234e-06, "loss": 0.3318, "step": 6995 }, { "epoch": 0.4779667964746874, "grad_norm": 4.129315376281738, "learning_rate": 3.9199266110026584e-06, "loss": 0.4008, "step": 6996 }, { "epoch": 0.47803511648561864, "grad_norm": 4.146008491516113, "learning_rate": 3.919157695540391e-06, "loss": 0.3458, "step": 6997 }, { "epoch": 0.47810343649654985, "grad_norm": 4.217962741851807, "learning_rate": 3.918388759553084e-06, "loss": 0.3311, "step": 6998 }, { "epoch": 0.47817175650748106, "grad_norm": 4.9951372146606445, "learning_rate": 3.917619803078389e-06, "loss": 0.2729, "step": 6999 }, { "epoch": 0.4782400765184122, "grad_norm": 3.8530306816101074, "learning_rate": 3.916850826153958e-06, "loss": 0.3617, "step": 7000 }, { "epoch": 0.4783083965293434, "grad_norm": 2.8806424140930176, "learning_rate": 3.9160818288174506e-06, "loss": 0.2677, "step": 7001 }, { "epoch": 0.47837671654027464, "grad_norm": 4.288058280944824, "learning_rate": 3.915312811106518e-06, "loss": 0.2917, "step": 7002 }, { "epoch": 0.47844503655120585, "grad_norm": 3.877239942550659, "learning_rate": 3.9145437730588195e-06, "loss": 0.2497, "step": 7003 }, { "epoch": 0.47851335656213706, "grad_norm": 3.8610029220581055, "learning_rate": 3.913774714712012e-06, "loss": 0.2948, "step": 7004 }, { "epoch": 0.47858167657306827, "grad_norm": 4.3912787437438965, "learning_rate": 3.913005636103755e-06, "loss": 0.3459, "step": 7005 }, { "epoch": 0.4786499965839994, "grad_norm": 5.366875171661377, "learning_rate": 3.912236537271706e-06, "loss": 0.3382, "step": 7006 }, { "epoch": 0.47871831659493064, "grad_norm": 3.5463244915008545, "learning_rate": 3.911467418253528e-06, "loss": 0.2652, "step": 7007 }, { "epoch": 0.47878663660586185, "grad_norm": 3.179558038711548, "learning_rate": 3.910698279086882e-06, "loss": 0.3084, "step": 7008 }, { "epoch": 0.47885495661679306, "grad_norm": 3.5884220600128174, "learning_rate": 3.9099291198094305e-06, "loss": 0.3264, "step": 7009 }, { "epoch": 0.47892327662772427, "grad_norm": 4.773807048797607, "learning_rate": 3.909159940458837e-06, "loss": 0.2512, "step": 7010 }, { "epoch": 0.4789915966386555, "grad_norm": 4.461183071136475, "learning_rate": 3.908390741072767e-06, "loss": 0.3183, "step": 7011 }, { "epoch": 0.47905991664958664, "grad_norm": 3.3932435512542725, "learning_rate": 3.907621521688885e-06, "loss": 0.3159, "step": 7012 }, { "epoch": 0.47912823666051785, "grad_norm": 3.8999645709991455, "learning_rate": 3.906852282344859e-06, "loss": 0.4153, "step": 7013 }, { "epoch": 0.47919655667144906, "grad_norm": 3.9607505798339844, "learning_rate": 3.906083023078356e-06, "loss": 0.4097, "step": 7014 }, { "epoch": 0.47926487668238027, "grad_norm": 4.141533851623535, "learning_rate": 3.905313743927045e-06, "loss": 0.2664, "step": 7015 }, { "epoch": 0.4793331966933115, "grad_norm": 3.7059102058410645, "learning_rate": 3.904544444928594e-06, "loss": 0.2785, "step": 7016 }, { "epoch": 0.4794015167042427, "grad_norm": 3.9101409912109375, "learning_rate": 3.903775126120675e-06, "loss": 0.3962, "step": 7017 }, { "epoch": 0.47946983671517385, "grad_norm": 4.436603546142578, "learning_rate": 3.903005787540959e-06, "loss": 0.2841, "step": 7018 }, { "epoch": 0.47953815672610506, "grad_norm": 4.193756580352783, "learning_rate": 3.902236429227119e-06, "loss": 0.3393, "step": 7019 }, { "epoch": 0.47960647673703627, "grad_norm": 3.079110860824585, "learning_rate": 3.901467051216827e-06, "loss": 0.2761, "step": 7020 }, { "epoch": 0.4796747967479675, "grad_norm": 5.772738933563232, "learning_rate": 3.90069765354776e-06, "loss": 0.4066, "step": 7021 }, { "epoch": 0.4797431167588987, "grad_norm": 5.0271759033203125, "learning_rate": 3.89992823625759e-06, "loss": 0.333, "step": 7022 }, { "epoch": 0.4798114367698299, "grad_norm": 4.373832702636719, "learning_rate": 3.8991587993839965e-06, "loss": 0.2566, "step": 7023 }, { "epoch": 0.47987975678076106, "grad_norm": 4.263852119445801, "learning_rate": 3.898389342964655e-06, "loss": 0.2923, "step": 7024 }, { "epoch": 0.47994807679169227, "grad_norm": 3.744772434234619, "learning_rate": 3.897619867037244e-06, "loss": 0.332, "step": 7025 }, { "epoch": 0.4800163968026235, "grad_norm": 4.185589790344238, "learning_rate": 3.896850371639443e-06, "loss": 0.3195, "step": 7026 }, { "epoch": 0.4800847168135547, "grad_norm": 4.071225166320801, "learning_rate": 3.896080856808932e-06, "loss": 0.3526, "step": 7027 }, { "epoch": 0.4801530368244859, "grad_norm": 3.1594736576080322, "learning_rate": 3.895311322583391e-06, "loss": 0.2973, "step": 7028 }, { "epoch": 0.4802213568354171, "grad_norm": 4.3305206298828125, "learning_rate": 3.894541769000506e-06, "loss": 0.4138, "step": 7029 }, { "epoch": 0.48028967684634827, "grad_norm": 4.3644328117370605, "learning_rate": 3.893772196097954e-06, "loss": 0.3923, "step": 7030 }, { "epoch": 0.4803579968572795, "grad_norm": 3.3367574214935303, "learning_rate": 3.8930026039134234e-06, "loss": 0.3747, "step": 7031 }, { "epoch": 0.4804263168682107, "grad_norm": 3.3576536178588867, "learning_rate": 3.892232992484598e-06, "loss": 0.3302, "step": 7032 }, { "epoch": 0.4804946368791419, "grad_norm": 3.309905767440796, "learning_rate": 3.891463361849162e-06, "loss": 0.3795, "step": 7033 }, { "epoch": 0.4805629568900731, "grad_norm": 3.5694804191589355, "learning_rate": 3.890693712044805e-06, "loss": 0.2358, "step": 7034 }, { "epoch": 0.4806312769010043, "grad_norm": 4.332345485687256, "learning_rate": 3.889924043109213e-06, "loss": 0.2635, "step": 7035 }, { "epoch": 0.4806995969119355, "grad_norm": 3.815312385559082, "learning_rate": 3.889154355080075e-06, "loss": 0.3321, "step": 7036 }, { "epoch": 0.4807679169228667, "grad_norm": 3.22956919670105, "learning_rate": 3.88838464799508e-06, "loss": 0.2913, "step": 7037 }, { "epoch": 0.4808362369337979, "grad_norm": 3.4804797172546387, "learning_rate": 3.887614921891918e-06, "loss": 0.3194, "step": 7038 }, { "epoch": 0.4809045569447291, "grad_norm": 4.219523906707764, "learning_rate": 3.886845176808283e-06, "loss": 0.3845, "step": 7039 }, { "epoch": 0.4809728769556603, "grad_norm": 3.852289915084839, "learning_rate": 3.886075412781866e-06, "loss": 0.3425, "step": 7040 }, { "epoch": 0.48104119696659153, "grad_norm": 4.324203014373779, "learning_rate": 3.885305629850358e-06, "loss": 0.2991, "step": 7041 }, { "epoch": 0.4811095169775227, "grad_norm": 4.775223255157471, "learning_rate": 3.884535828051459e-06, "loss": 0.3702, "step": 7042 }, { "epoch": 0.4811778369884539, "grad_norm": 3.4641318321228027, "learning_rate": 3.883766007422857e-06, "loss": 0.3462, "step": 7043 }, { "epoch": 0.4812461569993851, "grad_norm": 3.583223342895508, "learning_rate": 3.8829961680022535e-06, "loss": 0.2582, "step": 7044 }, { "epoch": 0.4813144770103163, "grad_norm": 3.361222505569458, "learning_rate": 3.882226309827342e-06, "loss": 0.297, "step": 7045 }, { "epoch": 0.48138279702124753, "grad_norm": 5.434213638305664, "learning_rate": 3.8814564329358236e-06, "loss": 0.4811, "step": 7046 }, { "epoch": 0.48145111703217874, "grad_norm": 3.9662671089172363, "learning_rate": 3.8806865373653945e-06, "loss": 0.3096, "step": 7047 }, { "epoch": 0.48151943704310995, "grad_norm": 4.143031597137451, "learning_rate": 3.8799166231537566e-06, "loss": 0.452, "step": 7048 }, { "epoch": 0.4815877570540411, "grad_norm": 2.793959617614746, "learning_rate": 3.879146690338609e-06, "loss": 0.2856, "step": 7049 }, { "epoch": 0.4816560770649723, "grad_norm": 3.3899309635162354, "learning_rate": 3.878376738957653e-06, "loss": 0.29, "step": 7050 }, { "epoch": 0.48172439707590353, "grad_norm": 4.316115379333496, "learning_rate": 3.877606769048593e-06, "loss": 0.4146, "step": 7051 }, { "epoch": 0.48179271708683474, "grad_norm": 4.215080261230469, "learning_rate": 3.8768367806491295e-06, "loss": 0.4309, "step": 7052 }, { "epoch": 0.48186103709776595, "grad_norm": 3.357339859008789, "learning_rate": 3.876066773796969e-06, "loss": 0.2995, "step": 7053 }, { "epoch": 0.48192935710869717, "grad_norm": 3.887237071990967, "learning_rate": 3.875296748529817e-06, "loss": 0.2674, "step": 7054 }, { "epoch": 0.4819976771196283, "grad_norm": 4.008896827697754, "learning_rate": 3.874526704885378e-06, "loss": 0.2656, "step": 7055 }, { "epoch": 0.48206599713055953, "grad_norm": 4.267147541046143, "learning_rate": 3.873756642901359e-06, "loss": 0.3505, "step": 7056 }, { "epoch": 0.48213431714149074, "grad_norm": 3.4942128658294678, "learning_rate": 3.8729865626154696e-06, "loss": 0.3231, "step": 7057 }, { "epoch": 0.48220263715242195, "grad_norm": 3.446108102798462, "learning_rate": 3.8722164640654184e-06, "loss": 0.2635, "step": 7058 }, { "epoch": 0.48227095716335316, "grad_norm": 3.1456298828125, "learning_rate": 3.871446347288913e-06, "loss": 0.2854, "step": 7059 }, { "epoch": 0.4823392771742844, "grad_norm": 4.229757785797119, "learning_rate": 3.8706762123236654e-06, "loss": 0.3037, "step": 7060 }, { "epoch": 0.48240759718521553, "grad_norm": 3.0800795555114746, "learning_rate": 3.869906059207388e-06, "loss": 0.3452, "step": 7061 }, { "epoch": 0.48247591719614674, "grad_norm": 4.439645290374756, "learning_rate": 3.8691358879777905e-06, "loss": 0.4408, "step": 7062 }, { "epoch": 0.48254423720707795, "grad_norm": 5.113327980041504, "learning_rate": 3.868365698672587e-06, "loss": 0.3698, "step": 7063 }, { "epoch": 0.48261255721800916, "grad_norm": 5.133747100830078, "learning_rate": 3.867595491329495e-06, "loss": 0.4912, "step": 7064 }, { "epoch": 0.4826808772289404, "grad_norm": 3.8232085704803467, "learning_rate": 3.8668252659862255e-06, "loss": 0.31, "step": 7065 }, { "epoch": 0.4827491972398716, "grad_norm": 3.7018258571624756, "learning_rate": 3.866055022680496e-06, "loss": 0.3627, "step": 7066 }, { "epoch": 0.48281751725080274, "grad_norm": 4.168606281280518, "learning_rate": 3.865284761450022e-06, "loss": 0.4865, "step": 7067 }, { "epoch": 0.48288583726173395, "grad_norm": 4.817471504211426, "learning_rate": 3.864514482332524e-06, "loss": 0.4581, "step": 7068 }, { "epoch": 0.48295415727266516, "grad_norm": 5.144006252288818, "learning_rate": 3.863744185365717e-06, "loss": 0.3522, "step": 7069 }, { "epoch": 0.4830224772835964, "grad_norm": 4.005857944488525, "learning_rate": 3.862973870587323e-06, "loss": 0.3278, "step": 7070 }, { "epoch": 0.4830907972945276, "grad_norm": 3.0162081718444824, "learning_rate": 3.862203538035061e-06, "loss": 0.3157, "step": 7071 }, { "epoch": 0.4831591173054588, "grad_norm": 4.169532299041748, "learning_rate": 3.861433187746653e-06, "loss": 0.3789, "step": 7072 }, { "epoch": 0.48322743731638995, "grad_norm": 3.1678519248962402, "learning_rate": 3.86066281975982e-06, "loss": 0.2735, "step": 7073 }, { "epoch": 0.48329575732732116, "grad_norm": 3.827497720718384, "learning_rate": 3.859892434112286e-06, "loss": 0.3412, "step": 7074 }, { "epoch": 0.4833640773382524, "grad_norm": 3.540898084640503, "learning_rate": 3.859122030841773e-06, "loss": 0.3164, "step": 7075 }, { "epoch": 0.4834323973491836, "grad_norm": 3.2900278568267822, "learning_rate": 3.858351609986008e-06, "loss": 0.3105, "step": 7076 }, { "epoch": 0.4835007173601148, "grad_norm": 3.753253936767578, "learning_rate": 3.857581171582714e-06, "loss": 0.2477, "step": 7077 }, { "epoch": 0.483569037371046, "grad_norm": 4.548667907714844, "learning_rate": 3.85681071566962e-06, "loss": 0.2975, "step": 7078 }, { "epoch": 0.48363735738197716, "grad_norm": 3.0140929222106934, "learning_rate": 3.856040242284451e-06, "loss": 0.2461, "step": 7079 }, { "epoch": 0.4837056773929084, "grad_norm": 5.502109527587891, "learning_rate": 3.855269751464935e-06, "loss": 0.4646, "step": 7080 }, { "epoch": 0.4837739974038396, "grad_norm": 3.6710329055786133, "learning_rate": 3.854499243248802e-06, "loss": 0.3219, "step": 7081 }, { "epoch": 0.4838423174147708, "grad_norm": 4.365018367767334, "learning_rate": 3.8537287176737825e-06, "loss": 0.4428, "step": 7082 }, { "epoch": 0.483910637425702, "grad_norm": 2.888331413269043, "learning_rate": 3.8529581747776045e-06, "loss": 0.2732, "step": 7083 }, { "epoch": 0.4839789574366332, "grad_norm": 3.4715425968170166, "learning_rate": 3.852187614598002e-06, "loss": 0.2323, "step": 7084 }, { "epoch": 0.4840472774475644, "grad_norm": 4.53761100769043, "learning_rate": 3.851417037172707e-06, "loss": 0.3507, "step": 7085 }, { "epoch": 0.4841155974584956, "grad_norm": 5.264657020568848, "learning_rate": 3.85064644253945e-06, "loss": 0.4066, "step": 7086 }, { "epoch": 0.4841839174694268, "grad_norm": 3.1526036262512207, "learning_rate": 3.849875830735967e-06, "loss": 0.3085, "step": 7087 }, { "epoch": 0.484252237480358, "grad_norm": 4.773902893066406, "learning_rate": 3.8491052017999945e-06, "loss": 0.3661, "step": 7088 }, { "epoch": 0.4843205574912892, "grad_norm": 3.2612464427948, "learning_rate": 3.848334555769264e-06, "loss": 0.3103, "step": 7089 }, { "epoch": 0.4843888775022204, "grad_norm": 3.8594067096710205, "learning_rate": 3.847563892681515e-06, "loss": 0.472, "step": 7090 }, { "epoch": 0.4844571975131516, "grad_norm": 3.775304079055786, "learning_rate": 3.846793212574484e-06, "loss": 0.2741, "step": 7091 }, { "epoch": 0.4845255175240828, "grad_norm": 3.471442222595215, "learning_rate": 3.8460225154859095e-06, "loss": 0.3324, "step": 7092 }, { "epoch": 0.484593837535014, "grad_norm": 3.567821502685547, "learning_rate": 3.8452518014535305e-06, "loss": 0.3811, "step": 7093 }, { "epoch": 0.4846621575459452, "grad_norm": 3.4967639446258545, "learning_rate": 3.844481070515087e-06, "loss": 0.2021, "step": 7094 }, { "epoch": 0.4847304775568764, "grad_norm": 4.065464973449707, "learning_rate": 3.843710322708319e-06, "loss": 0.3622, "step": 7095 }, { "epoch": 0.48479879756780764, "grad_norm": 3.327219009399414, "learning_rate": 3.842939558070968e-06, "loss": 0.2972, "step": 7096 }, { "epoch": 0.4848671175787388, "grad_norm": 4.1333231925964355, "learning_rate": 3.842168776640776e-06, "loss": 0.3328, "step": 7097 }, { "epoch": 0.48493543758967, "grad_norm": 3.5377001762390137, "learning_rate": 3.841397978455487e-06, "loss": 0.2691, "step": 7098 }, { "epoch": 0.4850037576006012, "grad_norm": 3.8054287433624268, "learning_rate": 3.8406271635528445e-06, "loss": 0.2896, "step": 7099 }, { "epoch": 0.4850720776115324, "grad_norm": 3.081455707550049, "learning_rate": 3.839856331970593e-06, "loss": 0.2397, "step": 7100 }, { "epoch": 0.48514039762246364, "grad_norm": 3.3481719493865967, "learning_rate": 3.839085483746479e-06, "loss": 0.3264, "step": 7101 }, { "epoch": 0.48520871763339485, "grad_norm": 4.930756568908691, "learning_rate": 3.8383146189182486e-06, "loss": 0.3159, "step": 7102 }, { "epoch": 0.485277037644326, "grad_norm": 4.184255599975586, "learning_rate": 3.8375437375236475e-06, "loss": 0.3196, "step": 7103 }, { "epoch": 0.4853453576552572, "grad_norm": 3.1834867000579834, "learning_rate": 3.836772839600427e-06, "loss": 0.2619, "step": 7104 }, { "epoch": 0.4854136776661884, "grad_norm": 3.510185956954956, "learning_rate": 3.836001925186332e-06, "loss": 0.2801, "step": 7105 }, { "epoch": 0.48548199767711964, "grad_norm": 3.8725507259368896, "learning_rate": 3.835230994319114e-06, "loss": 0.2845, "step": 7106 }, { "epoch": 0.48555031768805085, "grad_norm": 4.624349117279053, "learning_rate": 3.834460047036524e-06, "loss": 0.3228, "step": 7107 }, { "epoch": 0.48561863769898206, "grad_norm": 4.089527606964111, "learning_rate": 3.833689083376313e-06, "loss": 0.2837, "step": 7108 }, { "epoch": 0.4856869577099132, "grad_norm": 4.944221496582031, "learning_rate": 3.832918103376232e-06, "loss": 0.2133, "step": 7109 }, { "epoch": 0.4857552777208444, "grad_norm": 3.940993070602417, "learning_rate": 3.832147107074035e-06, "loss": 0.4508, "step": 7110 }, { "epoch": 0.48582359773177564, "grad_norm": 3.5574791431427, "learning_rate": 3.8313760945074746e-06, "loss": 0.2739, "step": 7111 }, { "epoch": 0.48589191774270685, "grad_norm": 3.923382520675659, "learning_rate": 3.830605065714307e-06, "loss": 0.3714, "step": 7112 }, { "epoch": 0.48596023775363806, "grad_norm": 3.8664791584014893, "learning_rate": 3.829834020732285e-06, "loss": 0.3803, "step": 7113 }, { "epoch": 0.48602855776456927, "grad_norm": 3.5307371616363525, "learning_rate": 3.829062959599167e-06, "loss": 0.2316, "step": 7114 }, { "epoch": 0.4860968777755004, "grad_norm": 4.382948875427246, "learning_rate": 3.828291882352709e-06, "loss": 0.325, "step": 7115 }, { "epoch": 0.48616519778643164, "grad_norm": 3.814425468444824, "learning_rate": 3.827520789030666e-06, "loss": 0.2645, "step": 7116 }, { "epoch": 0.48623351779736285, "grad_norm": 4.535834312438965, "learning_rate": 3.826749679670801e-06, "loss": 0.4041, "step": 7117 }, { "epoch": 0.48630183780829406, "grad_norm": 3.5453052520751953, "learning_rate": 3.825978554310871e-06, "loss": 0.3144, "step": 7118 }, { "epoch": 0.48637015781922527, "grad_norm": 4.023793697357178, "learning_rate": 3.8252074129886344e-06, "loss": 0.3376, "step": 7119 }, { "epoch": 0.4864384778301565, "grad_norm": 3.6280670166015625, "learning_rate": 3.8244362557418536e-06, "loss": 0.347, "step": 7120 }, { "epoch": 0.48650679784108763, "grad_norm": 4.793787956237793, "learning_rate": 3.823665082608291e-06, "loss": 0.4123, "step": 7121 }, { "epoch": 0.48657511785201885, "grad_norm": 3.781731605529785, "learning_rate": 3.8228938936257075e-06, "loss": 0.331, "step": 7122 }, { "epoch": 0.48664343786295006, "grad_norm": 3.423569679260254, "learning_rate": 3.8221226888318655e-06, "loss": 0.3492, "step": 7123 }, { "epoch": 0.48671175787388127, "grad_norm": 4.694912433624268, "learning_rate": 3.821351468264531e-06, "loss": 0.3831, "step": 7124 }, { "epoch": 0.4867800778848125, "grad_norm": 4.556612491607666, "learning_rate": 3.820580231961467e-06, "loss": 0.3529, "step": 7125 }, { "epoch": 0.4868483978957437, "grad_norm": 3.6658716201782227, "learning_rate": 3.8198089799604395e-06, "loss": 0.3057, "step": 7126 }, { "epoch": 0.48691671790667485, "grad_norm": 4.090088844299316, "learning_rate": 3.819037712299216e-06, "loss": 0.3654, "step": 7127 }, { "epoch": 0.48698503791760606, "grad_norm": 4.2390618324279785, "learning_rate": 3.8182664290155604e-06, "loss": 0.376, "step": 7128 }, { "epoch": 0.48705335792853727, "grad_norm": 3.6654469966888428, "learning_rate": 3.817495130147242e-06, "loss": 0.2896, "step": 7129 }, { "epoch": 0.4871216779394685, "grad_norm": 3.673086166381836, "learning_rate": 3.8167238157320295e-06, "loss": 0.3174, "step": 7130 }, { "epoch": 0.4871899979503997, "grad_norm": 3.774296283721924, "learning_rate": 3.815952485807692e-06, "loss": 0.3327, "step": 7131 }, { "epoch": 0.4872583179613309, "grad_norm": 4.964496612548828, "learning_rate": 3.815181140411998e-06, "loss": 0.457, "step": 7132 }, { "epoch": 0.48732663797226206, "grad_norm": 4.083314895629883, "learning_rate": 3.8144097795827217e-06, "loss": 0.3169, "step": 7133 }, { "epoch": 0.48739495798319327, "grad_norm": 2.864881753921509, "learning_rate": 3.813638403357632e-06, "loss": 0.274, "step": 7134 }, { "epoch": 0.4874632779941245, "grad_norm": 3.6838879585266113, "learning_rate": 3.812867011774501e-06, "loss": 0.2404, "step": 7135 }, { "epoch": 0.4875315980050557, "grad_norm": 4.134002685546875, "learning_rate": 3.8120956048711023e-06, "loss": 0.3855, "step": 7136 }, { "epoch": 0.4875999180159869, "grad_norm": 3.092910051345825, "learning_rate": 3.8113241826852104e-06, "loss": 0.2283, "step": 7137 }, { "epoch": 0.4876682380269181, "grad_norm": 4.6168975830078125, "learning_rate": 3.810552745254598e-06, "loss": 0.3483, "step": 7138 }, { "epoch": 0.48773655803784927, "grad_norm": 4.838432312011719, "learning_rate": 3.8097812926170426e-06, "loss": 0.4638, "step": 7139 }, { "epoch": 0.4878048780487805, "grad_norm": 3.7232682704925537, "learning_rate": 3.8090098248103182e-06, "loss": 0.4246, "step": 7140 }, { "epoch": 0.4878731980597117, "grad_norm": 3.9473154544830322, "learning_rate": 3.8082383418722032e-06, "loss": 0.3744, "step": 7141 }, { "epoch": 0.4879415180706429, "grad_norm": 3.5871362686157227, "learning_rate": 3.8074668438404728e-06, "loss": 0.4325, "step": 7142 }, { "epoch": 0.4880098380815741, "grad_norm": 4.7999587059021, "learning_rate": 3.8066953307529082e-06, "loss": 0.3442, "step": 7143 }, { "epoch": 0.4880781580925053, "grad_norm": 3.5793914794921875, "learning_rate": 3.8059238026472856e-06, "loss": 0.3222, "step": 7144 }, { "epoch": 0.4881464781034365, "grad_norm": 4.20220422744751, "learning_rate": 3.8051522595613865e-06, "loss": 0.3338, "step": 7145 }, { "epoch": 0.4882147981143677, "grad_norm": 4.0998311042785645, "learning_rate": 3.8043807015329905e-06, "loss": 0.3408, "step": 7146 }, { "epoch": 0.4882831181252989, "grad_norm": 3.480548143386841, "learning_rate": 3.8036091285998793e-06, "loss": 0.3338, "step": 7147 }, { "epoch": 0.4883514381362301, "grad_norm": 3.541207790374756, "learning_rate": 3.802837540799834e-06, "loss": 0.306, "step": 7148 }, { "epoch": 0.4884197581471613, "grad_norm": 4.1023125648498535, "learning_rate": 3.8020659381706376e-06, "loss": 0.3134, "step": 7149 }, { "epoch": 0.48848807815809253, "grad_norm": 5.582080841064453, "learning_rate": 3.801294320750073e-06, "loss": 0.332, "step": 7150 }, { "epoch": 0.4885563981690237, "grad_norm": 3.935478687286377, "learning_rate": 3.800522688575926e-06, "loss": 0.3088, "step": 7151 }, { "epoch": 0.4886247181799549, "grad_norm": 3.611546754837036, "learning_rate": 3.7997510416859787e-06, "loss": 0.2729, "step": 7152 }, { "epoch": 0.4886930381908861, "grad_norm": 3.7124762535095215, "learning_rate": 3.798979380118019e-06, "loss": 0.4287, "step": 7153 }, { "epoch": 0.4887613582018173, "grad_norm": 2.9717917442321777, "learning_rate": 3.7982077039098315e-06, "loss": 0.2119, "step": 7154 }, { "epoch": 0.48882967821274853, "grad_norm": 4.522755146026611, "learning_rate": 3.7974360130992044e-06, "loss": 0.4836, "step": 7155 }, { "epoch": 0.48889799822367974, "grad_norm": 4.875766277313232, "learning_rate": 3.7966643077239237e-06, "loss": 0.3038, "step": 7156 }, { "epoch": 0.4889663182346109, "grad_norm": 5.668376445770264, "learning_rate": 3.7958925878217798e-06, "loss": 0.3244, "step": 7157 }, { "epoch": 0.4890346382455421, "grad_norm": 3.112265110015869, "learning_rate": 3.7951208534305604e-06, "loss": 0.2456, "step": 7158 }, { "epoch": 0.4891029582564733, "grad_norm": 5.07783842086792, "learning_rate": 3.7943491045880552e-06, "loss": 0.3396, "step": 7159 }, { "epoch": 0.48917127826740453, "grad_norm": 3.941751003265381, "learning_rate": 3.7935773413320554e-06, "loss": 0.3227, "step": 7160 }, { "epoch": 0.48923959827833574, "grad_norm": 6.285951137542725, "learning_rate": 3.792805563700353e-06, "loss": 0.5658, "step": 7161 }, { "epoch": 0.48930791828926695, "grad_norm": 6.082517147064209, "learning_rate": 3.792033771730737e-06, "loss": 0.3884, "step": 7162 }, { "epoch": 0.4893762383001981, "grad_norm": 3.382843255996704, "learning_rate": 3.7912619654610028e-06, "loss": 0.2878, "step": 7163 }, { "epoch": 0.4894445583111293, "grad_norm": 3.3046793937683105, "learning_rate": 3.790490144928943e-06, "loss": 0.2646, "step": 7164 }, { "epoch": 0.48951287832206053, "grad_norm": 5.332719802856445, "learning_rate": 3.7897183101723505e-06, "loss": 0.2858, "step": 7165 }, { "epoch": 0.48958119833299174, "grad_norm": 4.204648494720459, "learning_rate": 3.788946461229021e-06, "loss": 0.2898, "step": 7166 }, { "epoch": 0.48964951834392295, "grad_norm": 3.888538360595703, "learning_rate": 3.788174598136751e-06, "loss": 0.2878, "step": 7167 }, { "epoch": 0.48971783835485416, "grad_norm": 4.816192626953125, "learning_rate": 3.787402720933334e-06, "loss": 0.4561, "step": 7168 }, { "epoch": 0.4897861583657853, "grad_norm": 4.618217945098877, "learning_rate": 3.7866308296565685e-06, "loss": 0.3479, "step": 7169 }, { "epoch": 0.48985447837671653, "grad_norm": 4.34010124206543, "learning_rate": 3.7858589243442517e-06, "loss": 0.3606, "step": 7170 }, { "epoch": 0.48992279838764774, "grad_norm": 4.165502548217773, "learning_rate": 3.7850870050341818e-06, "loss": 0.3755, "step": 7171 }, { "epoch": 0.48999111839857895, "grad_norm": 5.213016986846924, "learning_rate": 3.7843150717641563e-06, "loss": 0.3807, "step": 7172 }, { "epoch": 0.49005943840951016, "grad_norm": 3.19521164894104, "learning_rate": 3.7835431245719764e-06, "loss": 0.1784, "step": 7173 }, { "epoch": 0.4901277584204414, "grad_norm": 3.276276111602783, "learning_rate": 3.7827711634954424e-06, "loss": 0.3414, "step": 7174 }, { "epoch": 0.49019607843137253, "grad_norm": 4.294459819793701, "learning_rate": 3.7819991885723545e-06, "loss": 0.3853, "step": 7175 }, { "epoch": 0.49026439844230374, "grad_norm": 3.5165083408355713, "learning_rate": 3.7812271998405146e-06, "loss": 0.3552, "step": 7176 }, { "epoch": 0.49033271845323495, "grad_norm": 2.8390390872955322, "learning_rate": 3.7804551973377244e-06, "loss": 0.2214, "step": 7177 }, { "epoch": 0.49040103846416616, "grad_norm": 3.798358917236328, "learning_rate": 3.7796831811017865e-06, "loss": 0.3817, "step": 7178 }, { "epoch": 0.4904693584750974, "grad_norm": 3.4850518703460693, "learning_rate": 3.7789111511705055e-06, "loss": 0.3133, "step": 7179 }, { "epoch": 0.4905376784860286, "grad_norm": 3.770591974258423, "learning_rate": 3.7781391075816863e-06, "loss": 0.3077, "step": 7180 }, { "epoch": 0.49060599849695974, "grad_norm": 2.6333560943603516, "learning_rate": 3.7773670503731313e-06, "loss": 0.252, "step": 7181 }, { "epoch": 0.49067431850789095, "grad_norm": 3.978381633758545, "learning_rate": 3.776594979582648e-06, "loss": 0.2837, "step": 7182 }, { "epoch": 0.49074263851882216, "grad_norm": 3.918804407119751, "learning_rate": 3.775822895248042e-06, "loss": 0.3964, "step": 7183 }, { "epoch": 0.49081095852975337, "grad_norm": 7.99697208404541, "learning_rate": 3.7750507974071215e-06, "loss": 0.4146, "step": 7184 }, { "epoch": 0.4908792785406846, "grad_norm": 4.708348751068115, "learning_rate": 3.7742786860976913e-06, "loss": 0.397, "step": 7185 }, { "epoch": 0.4909475985516158, "grad_norm": 3.8152573108673096, "learning_rate": 3.773506561357563e-06, "loss": 0.2579, "step": 7186 }, { "epoch": 0.49101591856254695, "grad_norm": 4.309960842132568, "learning_rate": 3.7727344232245437e-06, "loss": 0.3021, "step": 7187 }, { "epoch": 0.49108423857347816, "grad_norm": 5.336458683013916, "learning_rate": 3.7719622717364423e-06, "loss": 0.3947, "step": 7188 }, { "epoch": 0.49115255858440937, "grad_norm": 4.2719316482543945, "learning_rate": 3.77119010693107e-06, "loss": 0.2831, "step": 7189 }, { "epoch": 0.4912208785953406, "grad_norm": 4.219692707061768, "learning_rate": 3.770417928846238e-06, "loss": 0.3736, "step": 7190 }, { "epoch": 0.4912891986062718, "grad_norm": 3.895470142364502, "learning_rate": 3.7696457375197577e-06, "loss": 0.3753, "step": 7191 }, { "epoch": 0.491357518617203, "grad_norm": 4.875890254974365, "learning_rate": 3.76887353298944e-06, "loss": 0.3813, "step": 7192 }, { "epoch": 0.49142583862813416, "grad_norm": 5.26134729385376, "learning_rate": 3.768101315293099e-06, "loss": 0.3508, "step": 7193 }, { "epoch": 0.49149415863906537, "grad_norm": 4.258451461791992, "learning_rate": 3.767329084468548e-06, "loss": 0.2509, "step": 7194 }, { "epoch": 0.4915624786499966, "grad_norm": 4.516200542449951, "learning_rate": 3.7665568405535993e-06, "loss": 0.3015, "step": 7195 }, { "epoch": 0.4916307986609278, "grad_norm": 4.8940348625183105, "learning_rate": 3.765784583586071e-06, "loss": 0.3796, "step": 7196 }, { "epoch": 0.491699118671859, "grad_norm": 3.660975933074951, "learning_rate": 3.765012313603776e-06, "loss": 0.3447, "step": 7197 }, { "epoch": 0.4917674386827902, "grad_norm": 3.874229907989502, "learning_rate": 3.764240030644531e-06, "loss": 0.3299, "step": 7198 }, { "epoch": 0.49183575869372137, "grad_norm": 3.5876944065093994, "learning_rate": 3.7634677347461527e-06, "loss": 0.3772, "step": 7199 }, { "epoch": 0.4919040787046526, "grad_norm": 4.138949871063232, "learning_rate": 3.7626954259464583e-06, "loss": 0.3378, "step": 7200 }, { "epoch": 0.4919723987155838, "grad_norm": 3.9432754516601562, "learning_rate": 3.761923104283266e-06, "loss": 0.4007, "step": 7201 }, { "epoch": 0.492040718726515, "grad_norm": 3.85178804397583, "learning_rate": 3.7611507697943934e-06, "loss": 0.3966, "step": 7202 }, { "epoch": 0.4921090387374462, "grad_norm": 3.1261038780212402, "learning_rate": 3.7603784225176607e-06, "loss": 0.246, "step": 7203 }, { "epoch": 0.4921773587483774, "grad_norm": 4.613424301147461, "learning_rate": 3.759606062490888e-06, "loss": 0.4576, "step": 7204 }, { "epoch": 0.4922456787593086, "grad_norm": 2.7687084674835205, "learning_rate": 3.7588336897518934e-06, "loss": 0.2524, "step": 7205 }, { "epoch": 0.4923139987702398, "grad_norm": 3.1960039138793945, "learning_rate": 3.7580613043385005e-06, "loss": 0.3655, "step": 7206 }, { "epoch": 0.492382318781171, "grad_norm": 3.723912239074707, "learning_rate": 3.7572889062885297e-06, "loss": 0.3254, "step": 7207 }, { "epoch": 0.4924506387921022, "grad_norm": 4.582752704620361, "learning_rate": 3.756516495639804e-06, "loss": 0.5078, "step": 7208 }, { "epoch": 0.4925189588030334, "grad_norm": 3.997938394546509, "learning_rate": 3.7557440724301454e-06, "loss": 0.38, "step": 7209 }, { "epoch": 0.49258727881396464, "grad_norm": 3.15899920463562, "learning_rate": 3.7549716366973792e-06, "loss": 0.2627, "step": 7210 }, { "epoch": 0.4926555988248958, "grad_norm": 5.231146812438965, "learning_rate": 3.754199188479327e-06, "loss": 0.3997, "step": 7211 }, { "epoch": 0.492723918835827, "grad_norm": 4.048276901245117, "learning_rate": 3.7534267278138157e-06, "loss": 0.2813, "step": 7212 }, { "epoch": 0.4927922388467582, "grad_norm": 4.469110012054443, "learning_rate": 3.7526542547386686e-06, "loss": 0.3107, "step": 7213 }, { "epoch": 0.4928605588576894, "grad_norm": 5.077155590057373, "learning_rate": 3.7518817692917146e-06, "loss": 0.4343, "step": 7214 }, { "epoch": 0.49292887886862063, "grad_norm": 4.236578941345215, "learning_rate": 3.7511092715107768e-06, "loss": 0.4325, "step": 7215 }, { "epoch": 0.49299719887955185, "grad_norm": 3.9430103302001953, "learning_rate": 3.750336761433685e-06, "loss": 0.3417, "step": 7216 }, { "epoch": 0.493065518890483, "grad_norm": 5.845988750457764, "learning_rate": 3.7495642390982656e-06, "loss": 0.3829, "step": 7217 }, { "epoch": 0.4931338389014142, "grad_norm": 4.094759941101074, "learning_rate": 3.748791704542348e-06, "loss": 0.3417, "step": 7218 }, { "epoch": 0.4932021589123454, "grad_norm": 3.758601427078247, "learning_rate": 3.7480191578037596e-06, "loss": 0.3129, "step": 7219 }, { "epoch": 0.49327047892327663, "grad_norm": 4.295594215393066, "learning_rate": 3.7472465989203315e-06, "loss": 0.3361, "step": 7220 }, { "epoch": 0.49333879893420785, "grad_norm": 3.72046160697937, "learning_rate": 3.7464740279298926e-06, "loss": 0.3681, "step": 7221 }, { "epoch": 0.49340711894513906, "grad_norm": 3.3420796394348145, "learning_rate": 3.7457014448702757e-06, "loss": 0.3012, "step": 7222 }, { "epoch": 0.4934754389560702, "grad_norm": 5.078312397003174, "learning_rate": 3.74492884977931e-06, "loss": 0.3792, "step": 7223 }, { "epoch": 0.4935437589670014, "grad_norm": 3.0712881088256836, "learning_rate": 3.7441562426948283e-06, "loss": 0.3278, "step": 7224 }, { "epoch": 0.49361207897793263, "grad_norm": 4.022790908813477, "learning_rate": 3.743383623654663e-06, "loss": 0.2407, "step": 7225 }, { "epoch": 0.49368039898886384, "grad_norm": 3.5167429447174072, "learning_rate": 3.7426109926966478e-06, "loss": 0.3017, "step": 7226 }, { "epoch": 0.49374871899979506, "grad_norm": 4.041968822479248, "learning_rate": 3.7418383498586144e-06, "loss": 0.2721, "step": 7227 }, { "epoch": 0.49381703901072627, "grad_norm": 3.6213738918304443, "learning_rate": 3.7410656951783998e-06, "loss": 0.333, "step": 7228 }, { "epoch": 0.4938853590216574, "grad_norm": 5.242080211639404, "learning_rate": 3.7402930286938366e-06, "loss": 0.3221, "step": 7229 }, { "epoch": 0.49395367903258863, "grad_norm": 4.131837844848633, "learning_rate": 3.739520350442762e-06, "loss": 0.3967, "step": 7230 }, { "epoch": 0.49402199904351984, "grad_norm": 4.065510272979736, "learning_rate": 3.73874766046301e-06, "loss": 0.3865, "step": 7231 }, { "epoch": 0.49409031905445105, "grad_norm": 3.3018484115600586, "learning_rate": 3.73797495879242e-06, "loss": 0.2149, "step": 7232 }, { "epoch": 0.49415863906538227, "grad_norm": 4.235354900360107, "learning_rate": 3.737202245468827e-06, "loss": 0.397, "step": 7233 }, { "epoch": 0.4942269590763135, "grad_norm": 3.3725497722625732, "learning_rate": 3.7364295205300684e-06, "loss": 0.2236, "step": 7234 }, { "epoch": 0.49429527908724463, "grad_norm": 3.5808298587799072, "learning_rate": 3.7356567840139837e-06, "loss": 0.2235, "step": 7235 }, { "epoch": 0.49436359909817584, "grad_norm": 3.473362684249878, "learning_rate": 3.7348840359584125e-06, "loss": 0.2031, "step": 7236 }, { "epoch": 0.49443191910910705, "grad_norm": 3.886004686355591, "learning_rate": 3.734111276401192e-06, "loss": 0.2201, "step": 7237 }, { "epoch": 0.49450023912003827, "grad_norm": 3.2077229022979736, "learning_rate": 3.7333385053801633e-06, "loss": 0.3597, "step": 7238 }, { "epoch": 0.4945685591309695, "grad_norm": 4.090630054473877, "learning_rate": 3.7325657229331665e-06, "loss": 0.383, "step": 7239 }, { "epoch": 0.4946368791419007, "grad_norm": 4.305929183959961, "learning_rate": 3.731792929098044e-06, "loss": 0.3634, "step": 7240 }, { "epoch": 0.49470519915283184, "grad_norm": 3.6942238807678223, "learning_rate": 3.7310201239126363e-06, "loss": 0.2974, "step": 7241 }, { "epoch": 0.49477351916376305, "grad_norm": 3.9975080490112305, "learning_rate": 3.7302473074147857e-06, "loss": 0.3884, "step": 7242 }, { "epoch": 0.49484183917469426, "grad_norm": 3.7694292068481445, "learning_rate": 3.729474479642336e-06, "loss": 0.3191, "step": 7243 }, { "epoch": 0.4949101591856255, "grad_norm": 4.692880153656006, "learning_rate": 3.7287016406331287e-06, "loss": 0.2735, "step": 7244 }, { "epoch": 0.4949784791965567, "grad_norm": 3.736234426498413, "learning_rate": 3.7279287904250093e-06, "loss": 0.3041, "step": 7245 }, { "epoch": 0.4950467992074879, "grad_norm": 4.311388969421387, "learning_rate": 3.7271559290558214e-06, "loss": 0.2775, "step": 7246 }, { "epoch": 0.49511511921841905, "grad_norm": 4.370894908905029, "learning_rate": 3.726383056563411e-06, "loss": 0.2894, "step": 7247 }, { "epoch": 0.49518343922935026, "grad_norm": 4.549378395080566, "learning_rate": 3.725610172985621e-06, "loss": 0.2731, "step": 7248 }, { "epoch": 0.4952517592402815, "grad_norm": 4.195481777191162, "learning_rate": 3.724837278360301e-06, "loss": 0.4276, "step": 7249 }, { "epoch": 0.4953200792512127, "grad_norm": 3.9368879795074463, "learning_rate": 3.7240643727252952e-06, "loss": 0.2954, "step": 7250 }, { "epoch": 0.4953883992621439, "grad_norm": 3.9704952239990234, "learning_rate": 3.723291456118452e-06, "loss": 0.4099, "step": 7251 }, { "epoch": 0.4954567192730751, "grad_norm": 3.9415829181671143, "learning_rate": 3.722518528577617e-06, "loss": 0.3363, "step": 7252 }, { "epoch": 0.49552503928400626, "grad_norm": 3.7277331352233887, "learning_rate": 3.7217455901406416e-06, "loss": 0.257, "step": 7253 }, { "epoch": 0.4955933592949375, "grad_norm": 3.9492650032043457, "learning_rate": 3.7209726408453722e-06, "loss": 0.3442, "step": 7254 }, { "epoch": 0.4956616793058687, "grad_norm": 4.1408514976501465, "learning_rate": 3.720199680729659e-06, "loss": 0.3891, "step": 7255 }, { "epoch": 0.4957299993167999, "grad_norm": 3.094782590866089, "learning_rate": 3.7194267098313503e-06, "loss": 0.279, "step": 7256 }, { "epoch": 0.4957983193277311, "grad_norm": 4.012547492980957, "learning_rate": 3.7186537281882996e-06, "loss": 0.3777, "step": 7257 }, { "epoch": 0.4958666393386623, "grad_norm": 3.956012487411499, "learning_rate": 3.7178807358383535e-06, "loss": 0.2516, "step": 7258 }, { "epoch": 0.4959349593495935, "grad_norm": 2.6968584060668945, "learning_rate": 3.7171077328193676e-06, "loss": 0.231, "step": 7259 }, { "epoch": 0.4960032793605247, "grad_norm": 3.7391510009765625, "learning_rate": 3.716334719169191e-06, "loss": 0.4525, "step": 7260 }, { "epoch": 0.4960715993714559, "grad_norm": 3.678385019302368, "learning_rate": 3.7155616949256776e-06, "loss": 0.3426, "step": 7261 }, { "epoch": 0.4961399193823871, "grad_norm": 3.3468284606933594, "learning_rate": 3.7147886601266796e-06, "loss": 0.336, "step": 7262 }, { "epoch": 0.4962082393933183, "grad_norm": 4.535091876983643, "learning_rate": 3.7140156148100507e-06, "loss": 0.3251, "step": 7263 }, { "epoch": 0.49627655940424953, "grad_norm": 3.866250514984131, "learning_rate": 3.713242559013644e-06, "loss": 0.3235, "step": 7264 }, { "epoch": 0.4963448794151807, "grad_norm": 2.795736789703369, "learning_rate": 3.7124694927753163e-06, "loss": 0.2574, "step": 7265 }, { "epoch": 0.4964131994261119, "grad_norm": 4.127404689788818, "learning_rate": 3.7116964161329195e-06, "loss": 0.4246, "step": 7266 }, { "epoch": 0.4964815194370431, "grad_norm": 3.2995166778564453, "learning_rate": 3.7109233291243116e-06, "loss": 0.2444, "step": 7267 }, { "epoch": 0.4965498394479743, "grad_norm": 4.970466136932373, "learning_rate": 3.7101502317873477e-06, "loss": 0.3481, "step": 7268 }, { "epoch": 0.49661815945890553, "grad_norm": 5.266897678375244, "learning_rate": 3.709377124159885e-06, "loss": 0.4277, "step": 7269 }, { "epoch": 0.49668647946983674, "grad_norm": 2.977663040161133, "learning_rate": 3.7086040062797795e-06, "loss": 0.2042, "step": 7270 }, { "epoch": 0.4967547994807679, "grad_norm": 3.3478474617004395, "learning_rate": 3.7078308781848885e-06, "loss": 0.2294, "step": 7271 }, { "epoch": 0.4968231194916991, "grad_norm": 3.9298646450042725, "learning_rate": 3.707057739913071e-06, "loss": 0.2509, "step": 7272 }, { "epoch": 0.4968914395026303, "grad_norm": 3.347325086593628, "learning_rate": 3.706284591502186e-06, "loss": 0.2818, "step": 7273 }, { "epoch": 0.4969597595135615, "grad_norm": 4.9161272048950195, "learning_rate": 3.70551143299009e-06, "loss": 0.4024, "step": 7274 }, { "epoch": 0.49702807952449274, "grad_norm": 4.254612922668457, "learning_rate": 3.704738264414646e-06, "loss": 0.3216, "step": 7275 }, { "epoch": 0.49709639953542395, "grad_norm": 3.5533649921417236, "learning_rate": 3.703965085813712e-06, "loss": 0.449, "step": 7276 }, { "epoch": 0.4971647195463551, "grad_norm": 3.580200433731079, "learning_rate": 3.703191897225148e-06, "loss": 0.2663, "step": 7277 }, { "epoch": 0.4972330395572863, "grad_norm": 4.162956714630127, "learning_rate": 3.702418698686816e-06, "loss": 0.3394, "step": 7278 }, { "epoch": 0.4973013595682175, "grad_norm": 3.782144784927368, "learning_rate": 3.701645490236578e-06, "loss": 0.3042, "step": 7279 }, { "epoch": 0.49736967957914874, "grad_norm": 4.454018592834473, "learning_rate": 3.700872271912295e-06, "loss": 0.3591, "step": 7280 }, { "epoch": 0.49743799959007995, "grad_norm": 2.9244585037231445, "learning_rate": 3.700099043751829e-06, "loss": 0.1711, "step": 7281 }, { "epoch": 0.49750631960101116, "grad_norm": 4.33762788772583, "learning_rate": 3.6993258057930433e-06, "loss": 0.3601, "step": 7282 }, { "epoch": 0.4975746396119423, "grad_norm": 4.88734245300293, "learning_rate": 3.6985525580738034e-06, "loss": 0.3146, "step": 7283 }, { "epoch": 0.4976429596228735, "grad_norm": 4.355593681335449, "learning_rate": 3.6977793006319694e-06, "loss": 0.2546, "step": 7284 }, { "epoch": 0.49771127963380474, "grad_norm": 3.861177444458008, "learning_rate": 3.6970060335054086e-06, "loss": 0.346, "step": 7285 }, { "epoch": 0.49777959964473595, "grad_norm": 7.485238552093506, "learning_rate": 3.6962327567319852e-06, "loss": 0.4258, "step": 7286 }, { "epoch": 0.49784791965566716, "grad_norm": 4.399423599243164, "learning_rate": 3.695459470349564e-06, "loss": 0.5524, "step": 7287 }, { "epoch": 0.49791623966659837, "grad_norm": 3.049363374710083, "learning_rate": 3.6946861743960103e-06, "loss": 0.2587, "step": 7288 }, { "epoch": 0.4979845596775295, "grad_norm": 4.035248279571533, "learning_rate": 3.6939128689091925e-06, "loss": 0.3233, "step": 7289 }, { "epoch": 0.49805287968846074, "grad_norm": 4.55733060836792, "learning_rate": 3.6931395539269747e-06, "loss": 0.2552, "step": 7290 }, { "epoch": 0.49812119969939195, "grad_norm": 6.67389440536499, "learning_rate": 3.692366229487225e-06, "loss": 0.3719, "step": 7291 }, { "epoch": 0.49818951971032316, "grad_norm": 3.90195369720459, "learning_rate": 3.691592895627812e-06, "loss": 0.3062, "step": 7292 }, { "epoch": 0.49825783972125437, "grad_norm": 3.564424514770508, "learning_rate": 3.690819552386604e-06, "loss": 0.2479, "step": 7293 }, { "epoch": 0.4983261597321856, "grad_norm": 4.1870436668396, "learning_rate": 3.6900461998014666e-06, "loss": 0.3682, "step": 7294 }, { "epoch": 0.49839447974311674, "grad_norm": 3.852649450302124, "learning_rate": 3.6892728379102724e-06, "loss": 0.3125, "step": 7295 }, { "epoch": 0.49846279975404795, "grad_norm": 4.474947929382324, "learning_rate": 3.6884994667508887e-06, "loss": 0.3959, "step": 7296 }, { "epoch": 0.49853111976497916, "grad_norm": 3.190624237060547, "learning_rate": 3.687726086361186e-06, "loss": 0.2818, "step": 7297 }, { "epoch": 0.49859943977591037, "grad_norm": 5.331765174865723, "learning_rate": 3.686952696779035e-06, "loss": 0.3007, "step": 7298 }, { "epoch": 0.4986677597868416, "grad_norm": 4.315773963928223, "learning_rate": 3.6861792980423076e-06, "loss": 0.2575, "step": 7299 }, { "epoch": 0.4987360797977728, "grad_norm": 5.131656646728516, "learning_rate": 3.685405890188872e-06, "loss": 0.4537, "step": 7300 }, { "epoch": 0.49880439980870395, "grad_norm": 4.416945457458496, "learning_rate": 3.6846324732566025e-06, "loss": 0.3455, "step": 7301 }, { "epoch": 0.49887271981963516, "grad_norm": 3.945291042327881, "learning_rate": 3.6838590472833704e-06, "loss": 0.3397, "step": 7302 }, { "epoch": 0.49894103983056637, "grad_norm": 4.2183661460876465, "learning_rate": 3.6830856123070497e-06, "loss": 0.3162, "step": 7303 }, { "epoch": 0.4990093598414976, "grad_norm": 4.366352558135986, "learning_rate": 3.6823121683655095e-06, "loss": 0.3714, "step": 7304 }, { "epoch": 0.4990776798524288, "grad_norm": 3.973344564437866, "learning_rate": 3.6815387154966287e-06, "loss": 0.2547, "step": 7305 }, { "epoch": 0.49914599986336, "grad_norm": 4.00820779800415, "learning_rate": 3.680765253738277e-06, "loss": 0.3461, "step": 7306 }, { "epoch": 0.49921431987429116, "grad_norm": 2.9035933017730713, "learning_rate": 3.6799917831283304e-06, "loss": 0.3774, "step": 7307 }, { "epoch": 0.49928263988522237, "grad_norm": 3.1863720417022705, "learning_rate": 3.6792183037046635e-06, "loss": 0.3614, "step": 7308 }, { "epoch": 0.4993509598961536, "grad_norm": 3.355358123779297, "learning_rate": 3.678444815505152e-06, "loss": 0.301, "step": 7309 }, { "epoch": 0.4994192799070848, "grad_norm": 4.2217116355896, "learning_rate": 3.6776713185676725e-06, "loss": 0.37, "step": 7310 }, { "epoch": 0.499487599918016, "grad_norm": 4.57002067565918, "learning_rate": 3.676897812930098e-06, "loss": 0.3072, "step": 7311 }, { "epoch": 0.4995559199289472, "grad_norm": 4.259086608886719, "learning_rate": 3.676124298630308e-06, "loss": 0.3561, "step": 7312 }, { "epoch": 0.49962423993987837, "grad_norm": 2.9164645671844482, "learning_rate": 3.675350775706177e-06, "loss": 0.232, "step": 7313 }, { "epoch": 0.4996925599508096, "grad_norm": 4.7341694831848145, "learning_rate": 3.674577244195585e-06, "loss": 0.325, "step": 7314 }, { "epoch": 0.4997608799617408, "grad_norm": 3.913897752761841, "learning_rate": 3.6738037041364076e-06, "loss": 0.4264, "step": 7315 }, { "epoch": 0.499829199972672, "grad_norm": 4.1456403732299805, "learning_rate": 3.6730301555665245e-06, "loss": 0.3293, "step": 7316 }, { "epoch": 0.4998975199836032, "grad_norm": 3.1588993072509766, "learning_rate": 3.6722565985238122e-06, "loss": 0.3141, "step": 7317 }, { "epoch": 0.4999658399945344, "grad_norm": 3.187713384628296, "learning_rate": 3.6714830330461524e-06, "loss": 0.2774, "step": 7318 }, { "epoch": 0.5000341600054656, "grad_norm": 3.6634650230407715, "learning_rate": 3.6707094591714233e-06, "loss": 0.3097, "step": 7319 }, { "epoch": 0.5001024800163968, "grad_norm": 4.208949565887451, "learning_rate": 3.6699358769375045e-06, "loss": 0.3498, "step": 7320 }, { "epoch": 0.500170800027328, "grad_norm": 4.134578704833984, "learning_rate": 3.6691622863822763e-06, "loss": 0.2928, "step": 7321 }, { "epoch": 0.5002391200382592, "grad_norm": 3.998915195465088, "learning_rate": 3.668388687543621e-06, "loss": 0.3458, "step": 7322 }, { "epoch": 0.5003074400491904, "grad_norm": 4.1969313621521, "learning_rate": 3.6676150804594174e-06, "loss": 0.3248, "step": 7323 }, { "epoch": 0.5003757600601216, "grad_norm": 3.334197998046875, "learning_rate": 3.666841465167548e-06, "loss": 0.388, "step": 7324 }, { "epoch": 0.5004440800710528, "grad_norm": 4.296741962432861, "learning_rate": 3.666067841705894e-06, "loss": 0.3882, "step": 7325 }, { "epoch": 0.500512400081984, "grad_norm": 3.580674409866333, "learning_rate": 3.6652942101123397e-06, "loss": 0.3358, "step": 7326 }, { "epoch": 0.5005807200929152, "grad_norm": 3.991551399230957, "learning_rate": 3.6645205704247643e-06, "loss": 0.3443, "step": 7327 }, { "epoch": 0.5006490401038464, "grad_norm": 2.821430206298828, "learning_rate": 3.6637469226810546e-06, "loss": 0.2576, "step": 7328 }, { "epoch": 0.5007173601147776, "grad_norm": 3.4001307487487793, "learning_rate": 3.6629732669190923e-06, "loss": 0.2409, "step": 7329 }, { "epoch": 0.5007856801257088, "grad_norm": 3.7605652809143066, "learning_rate": 3.6621996031767607e-06, "loss": 0.3657, "step": 7330 }, { "epoch": 0.50085400013664, "grad_norm": 3.1834943294525146, "learning_rate": 3.6614259314919457e-06, "loss": 0.2501, "step": 7331 }, { "epoch": 0.5009223201475712, "grad_norm": 3.3690297603607178, "learning_rate": 3.66065225190253e-06, "loss": 0.2405, "step": 7332 }, { "epoch": 0.5009906401585025, "grad_norm": 3.363142251968384, "learning_rate": 3.6598785644464004e-06, "loss": 0.2892, "step": 7333 }, { "epoch": 0.5010589601694336, "grad_norm": 5.0329508781433105, "learning_rate": 3.6591048691614415e-06, "loss": 0.2297, "step": 7334 }, { "epoch": 0.5011272801803648, "grad_norm": 3.2282185554504395, "learning_rate": 3.6583311660855393e-06, "loss": 0.2322, "step": 7335 }, { "epoch": 0.501195600191296, "grad_norm": 3.6108760833740234, "learning_rate": 3.65755745525658e-06, "loss": 0.3489, "step": 7336 }, { "epoch": 0.5012639202022272, "grad_norm": 4.15950870513916, "learning_rate": 3.6567837367124493e-06, "loss": 0.3383, "step": 7337 }, { "epoch": 0.5013322402131585, "grad_norm": 4.1598896980285645, "learning_rate": 3.6560100104910355e-06, "loss": 0.288, "step": 7338 }, { "epoch": 0.5014005602240896, "grad_norm": 4.421590805053711, "learning_rate": 3.655236276630225e-06, "loss": 0.3114, "step": 7339 }, { "epoch": 0.5014688802350208, "grad_norm": 3.8221275806427, "learning_rate": 3.6544625351679057e-06, "loss": 0.3002, "step": 7340 }, { "epoch": 0.501537200245952, "grad_norm": 3.584524154663086, "learning_rate": 3.6536887861419657e-06, "loss": 0.2598, "step": 7341 }, { "epoch": 0.5016055202568832, "grad_norm": 4.609580039978027, "learning_rate": 3.6529150295902947e-06, "loss": 0.3439, "step": 7342 }, { "epoch": 0.5016738402678145, "grad_norm": 4.802963733673096, "learning_rate": 3.652141265550779e-06, "loss": 0.4542, "step": 7343 }, { "epoch": 0.5017421602787456, "grad_norm": 3.8320419788360596, "learning_rate": 3.6513674940613094e-06, "loss": 0.2144, "step": 7344 }, { "epoch": 0.5018104802896769, "grad_norm": 3.871302843093872, "learning_rate": 3.6505937151597754e-06, "loss": 0.3112, "step": 7345 }, { "epoch": 0.501878800300608, "grad_norm": 3.442066192626953, "learning_rate": 3.6498199288840666e-06, "loss": 0.3484, "step": 7346 }, { "epoch": 0.5019471203115392, "grad_norm": 3.9856324195861816, "learning_rate": 3.6490461352720724e-06, "loss": 0.3537, "step": 7347 }, { "epoch": 0.5020154403224705, "grad_norm": 4.680604457855225, "learning_rate": 3.648272334361686e-06, "loss": 0.3492, "step": 7348 }, { "epoch": 0.5020837603334016, "grad_norm": 3.054386615753174, "learning_rate": 3.6474985261907956e-06, "loss": 0.2585, "step": 7349 }, { "epoch": 0.5021520803443329, "grad_norm": 4.250927448272705, "learning_rate": 3.646724710797293e-06, "loss": 0.3043, "step": 7350 }, { "epoch": 0.502220400355264, "grad_norm": 3.3035929203033447, "learning_rate": 3.6459508882190716e-06, "loss": 0.2017, "step": 7351 }, { "epoch": 0.5022887203661952, "grad_norm": 4.124586582183838, "learning_rate": 3.645177058494023e-06, "loss": 0.3214, "step": 7352 }, { "epoch": 0.5023570403771265, "grad_norm": 4.203832149505615, "learning_rate": 3.6444032216600372e-06, "loss": 0.3698, "step": 7353 }, { "epoch": 0.5024253603880576, "grad_norm": 3.6862330436706543, "learning_rate": 3.6436293777550095e-06, "loss": 0.324, "step": 7354 }, { "epoch": 0.5024936803989889, "grad_norm": 3.9230785369873047, "learning_rate": 3.642855526816832e-06, "loss": 0.4086, "step": 7355 }, { "epoch": 0.50256200040992, "grad_norm": 4.885694980621338, "learning_rate": 3.642081668883398e-06, "loss": 0.3721, "step": 7356 }, { "epoch": 0.5026303204208513, "grad_norm": 3.754467010498047, "learning_rate": 3.6413078039926017e-06, "loss": 0.2822, "step": 7357 }, { "epoch": 0.5026986404317825, "grad_norm": 4.216911315917969, "learning_rate": 3.6405339321823382e-06, "loss": 0.2972, "step": 7358 }, { "epoch": 0.5027669604427136, "grad_norm": 3.707059144973755, "learning_rate": 3.6397600534904995e-06, "loss": 0.4146, "step": 7359 }, { "epoch": 0.5028352804536449, "grad_norm": 4.554109573364258, "learning_rate": 3.6389861679549823e-06, "loss": 0.3449, "step": 7360 }, { "epoch": 0.502903600464576, "grad_norm": 4.666843891143799, "learning_rate": 3.6382122756136804e-06, "loss": 0.2598, "step": 7361 }, { "epoch": 0.5029719204755073, "grad_norm": 3.9572830200195312, "learning_rate": 3.6374383765044915e-06, "loss": 0.2659, "step": 7362 }, { "epoch": 0.5030402404864385, "grad_norm": 3.952173948287964, "learning_rate": 3.6366644706653075e-06, "loss": 0.4424, "step": 7363 }, { "epoch": 0.5031085604973696, "grad_norm": 3.3523638248443604, "learning_rate": 3.635890558134029e-06, "loss": 0.2296, "step": 7364 }, { "epoch": 0.5031768805083009, "grad_norm": 4.355494022369385, "learning_rate": 3.6351166389485496e-06, "loss": 0.4076, "step": 7365 }, { "epoch": 0.503245200519232, "grad_norm": 3.036257743835449, "learning_rate": 3.6343427131467666e-06, "loss": 0.2713, "step": 7366 }, { "epoch": 0.5033135205301633, "grad_norm": 3.207556962966919, "learning_rate": 3.633568780766577e-06, "loss": 0.2773, "step": 7367 }, { "epoch": 0.5033818405410945, "grad_norm": 3.4573874473571777, "learning_rate": 3.63279484184588e-06, "loss": 0.329, "step": 7368 }, { "epoch": 0.5034501605520257, "grad_norm": 4.443116188049316, "learning_rate": 3.632020896422571e-06, "loss": 0.3219, "step": 7369 }, { "epoch": 0.5035184805629569, "grad_norm": 4.5010085105896, "learning_rate": 3.6312469445345486e-06, "loss": 0.2841, "step": 7370 }, { "epoch": 0.503586800573888, "grad_norm": 3.7521259784698486, "learning_rate": 3.6304729862197105e-06, "loss": 0.417, "step": 7371 }, { "epoch": 0.5036551205848193, "grad_norm": 3.481886148452759, "learning_rate": 3.6296990215159586e-06, "loss": 0.3544, "step": 7372 }, { "epoch": 0.5037234405957505, "grad_norm": 4.017792701721191, "learning_rate": 3.6289250504611875e-06, "loss": 0.3227, "step": 7373 }, { "epoch": 0.5037917606066817, "grad_norm": 3.8527028560638428, "learning_rate": 3.6281510730932995e-06, "loss": 0.3902, "step": 7374 }, { "epoch": 0.5038600806176129, "grad_norm": 6.584684371948242, "learning_rate": 3.6273770894501938e-06, "loss": 0.5012, "step": 7375 }, { "epoch": 0.503928400628544, "grad_norm": 6.413451194763184, "learning_rate": 3.62660309956977e-06, "loss": 0.4077, "step": 7376 }, { "epoch": 0.5039967206394753, "grad_norm": 5.6045026779174805, "learning_rate": 3.6258291034899275e-06, "loss": 0.4493, "step": 7377 }, { "epoch": 0.5040650406504065, "grad_norm": 3.6300954818725586, "learning_rate": 3.625055101248568e-06, "loss": 0.2802, "step": 7378 }, { "epoch": 0.5041333606613377, "grad_norm": 6.175231456756592, "learning_rate": 3.6242810928835924e-06, "loss": 0.3698, "step": 7379 }, { "epoch": 0.5042016806722689, "grad_norm": 3.317955255508423, "learning_rate": 3.6235070784329e-06, "loss": 0.2923, "step": 7380 }, { "epoch": 0.5042700006832002, "grad_norm": 3.023806571960449, "learning_rate": 3.622733057934395e-06, "loss": 0.2471, "step": 7381 }, { "epoch": 0.5043383206941313, "grad_norm": 5.040317535400391, "learning_rate": 3.621959031425977e-06, "loss": 0.4198, "step": 7382 }, { "epoch": 0.5044066407050625, "grad_norm": 2.441349983215332, "learning_rate": 3.6211849989455485e-06, "loss": 0.2588, "step": 7383 }, { "epoch": 0.5044749607159937, "grad_norm": 3.03049898147583, "learning_rate": 3.6204109605310126e-06, "loss": 0.2726, "step": 7384 }, { "epoch": 0.5045432807269249, "grad_norm": 4.134446144104004, "learning_rate": 3.6196369162202715e-06, "loss": 0.4128, "step": 7385 }, { "epoch": 0.5046116007378562, "grad_norm": 4.397978782653809, "learning_rate": 3.6188628660512272e-06, "loss": 0.2953, "step": 7386 }, { "epoch": 0.5046799207487873, "grad_norm": 5.2949676513671875, "learning_rate": 3.618088810061784e-06, "loss": 0.3809, "step": 7387 }, { "epoch": 0.5047482407597185, "grad_norm": 5.090333938598633, "learning_rate": 3.6173147482898447e-06, "loss": 0.4384, "step": 7388 }, { "epoch": 0.5048165607706497, "grad_norm": 3.421694755554199, "learning_rate": 3.6165406807733146e-06, "loss": 0.3935, "step": 7389 }, { "epoch": 0.5048848807815809, "grad_norm": 3.76298189163208, "learning_rate": 3.6157666075500944e-06, "loss": 0.3259, "step": 7390 }, { "epoch": 0.5049532007925122, "grad_norm": 2.9901561737060547, "learning_rate": 3.614992528658092e-06, "loss": 0.3385, "step": 7391 }, { "epoch": 0.5050215208034433, "grad_norm": 4.0540900230407715, "learning_rate": 3.6142184441352102e-06, "loss": 0.323, "step": 7392 }, { "epoch": 0.5050898408143746, "grad_norm": 4.147463798522949, "learning_rate": 3.6134443540193536e-06, "loss": 0.3782, "step": 7393 }, { "epoch": 0.5051581608253057, "grad_norm": 3.0511977672576904, "learning_rate": 3.612670258348429e-06, "loss": 0.2875, "step": 7394 }, { "epoch": 0.5052264808362369, "grad_norm": 4.337051868438721, "learning_rate": 3.6118961571603404e-06, "loss": 0.3604, "step": 7395 }, { "epoch": 0.5052948008471682, "grad_norm": 5.682448387145996, "learning_rate": 3.6111220504929937e-06, "loss": 0.3526, "step": 7396 }, { "epoch": 0.5053631208580993, "grad_norm": 3.0068485736846924, "learning_rate": 3.6103479383842946e-06, "loss": 0.3101, "step": 7397 }, { "epoch": 0.5054314408690306, "grad_norm": 3.6785452365875244, "learning_rate": 3.6095738208721496e-06, "loss": 0.3978, "step": 7398 }, { "epoch": 0.5054997608799617, "grad_norm": 3.1480743885040283, "learning_rate": 3.608799697994466e-06, "loss": 0.3392, "step": 7399 }, { "epoch": 0.5055680808908929, "grad_norm": 4.018892288208008, "learning_rate": 3.6080255697891483e-06, "loss": 0.346, "step": 7400 }, { "epoch": 0.5056364009018242, "grad_norm": 4.304881572723389, "learning_rate": 3.607251436294107e-06, "loss": 0.3217, "step": 7401 }, { "epoch": 0.5057047209127553, "grad_norm": 4.684370517730713, "learning_rate": 3.6064772975472458e-06, "loss": 0.3365, "step": 7402 }, { "epoch": 0.5057730409236866, "grad_norm": 3.5040268898010254, "learning_rate": 3.6057031535864746e-06, "loss": 0.3463, "step": 7403 }, { "epoch": 0.5058413609346177, "grad_norm": 4.099161148071289, "learning_rate": 3.6049290044497e-06, "loss": 0.3054, "step": 7404 }, { "epoch": 0.505909680945549, "grad_norm": 2.8148043155670166, "learning_rate": 3.6041548501748306e-06, "loss": 0.2495, "step": 7405 }, { "epoch": 0.5059780009564802, "grad_norm": 2.8859000205993652, "learning_rate": 3.6033806907997737e-06, "loss": 0.258, "step": 7406 }, { "epoch": 0.5060463209674113, "grad_norm": 4.431487083435059, "learning_rate": 3.6026065263624397e-06, "loss": 0.3541, "step": 7407 }, { "epoch": 0.5061146409783426, "grad_norm": 5.009481430053711, "learning_rate": 3.601832356900736e-06, "loss": 0.3159, "step": 7408 }, { "epoch": 0.5061829609892737, "grad_norm": 5.113243579864502, "learning_rate": 3.601058182452571e-06, "loss": 0.306, "step": 7409 }, { "epoch": 0.506251281000205, "grad_norm": 6.014674186706543, "learning_rate": 3.6002840030558556e-06, "loss": 0.3512, "step": 7410 }, { "epoch": 0.5063196010111362, "grad_norm": 4.009071350097656, "learning_rate": 3.599509818748499e-06, "loss": 0.32, "step": 7411 }, { "epoch": 0.5063879210220673, "grad_norm": 6.124129772186279, "learning_rate": 3.59873562956841e-06, "loss": 0.4305, "step": 7412 }, { "epoch": 0.5064562410329986, "grad_norm": 5.443064212799072, "learning_rate": 3.5979614355534993e-06, "loss": 0.2652, "step": 7413 }, { "epoch": 0.5065245610439297, "grad_norm": 3.59333872795105, "learning_rate": 3.5971872367416764e-06, "loss": 0.2209, "step": 7414 }, { "epoch": 0.506592881054861, "grad_norm": 4.0265421867370605, "learning_rate": 3.596413033170853e-06, "loss": 0.2248, "step": 7415 }, { "epoch": 0.5066612010657922, "grad_norm": 5.03273868560791, "learning_rate": 3.5956388248789383e-06, "loss": 0.443, "step": 7416 }, { "epoch": 0.5067295210767234, "grad_norm": 4.446054458618164, "learning_rate": 3.5948646119038453e-06, "loss": 0.3257, "step": 7417 }, { "epoch": 0.5067978410876546, "grad_norm": 4.993832111358643, "learning_rate": 3.594090394283483e-06, "loss": 0.3541, "step": 7418 }, { "epoch": 0.5068661610985857, "grad_norm": 4.046298980712891, "learning_rate": 3.5933161720557633e-06, "loss": 0.2891, "step": 7419 }, { "epoch": 0.506934481109517, "grad_norm": 3.417189598083496, "learning_rate": 3.5925419452585987e-06, "loss": 0.3177, "step": 7420 }, { "epoch": 0.5070028011204482, "grad_norm": 3.4429128170013428, "learning_rate": 3.5917677139299017e-06, "loss": 0.3434, "step": 7421 }, { "epoch": 0.5070711211313794, "grad_norm": 6.784792900085449, "learning_rate": 3.5909934781075815e-06, "loss": 0.4017, "step": 7422 }, { "epoch": 0.5071394411423106, "grad_norm": 3.300177574157715, "learning_rate": 3.590219237829553e-06, "loss": 0.3438, "step": 7423 }, { "epoch": 0.5072077611532417, "grad_norm": 2.772054672241211, "learning_rate": 3.5894449931337265e-06, "loss": 0.3281, "step": 7424 }, { "epoch": 0.507276081164173, "grad_norm": 3.917818069458008, "learning_rate": 3.5886707440580173e-06, "loss": 0.2277, "step": 7425 }, { "epoch": 0.5073444011751042, "grad_norm": 4.189470291137695, "learning_rate": 3.587896490640336e-06, "loss": 0.2604, "step": 7426 }, { "epoch": 0.5074127211860354, "grad_norm": 4.02224063873291, "learning_rate": 3.587122232918597e-06, "loss": 0.3557, "step": 7427 }, { "epoch": 0.5074810411969666, "grad_norm": 3.166290044784546, "learning_rate": 3.586347970930714e-06, "loss": 0.3225, "step": 7428 }, { "epoch": 0.5075493612078978, "grad_norm": 4.511301040649414, "learning_rate": 3.5855737047145993e-06, "loss": 0.3592, "step": 7429 }, { "epoch": 0.507617681218829, "grad_norm": 4.032824516296387, "learning_rate": 3.5847994343081665e-06, "loss": 0.3211, "step": 7430 }, { "epoch": 0.5076860012297602, "grad_norm": 4.459217071533203, "learning_rate": 3.584025159749332e-06, "loss": 0.2882, "step": 7431 }, { "epoch": 0.5077543212406914, "grad_norm": 4.083069801330566, "learning_rate": 3.5832508810760073e-06, "loss": 0.3587, "step": 7432 }, { "epoch": 0.5078226412516226, "grad_norm": 3.4202938079833984, "learning_rate": 3.582476598326108e-06, "loss": 0.2896, "step": 7433 }, { "epoch": 0.5078909612625538, "grad_norm": 3.54178524017334, "learning_rate": 3.581702311537548e-06, "loss": 0.3226, "step": 7434 }, { "epoch": 0.507959281273485, "grad_norm": 3.461782217025757, "learning_rate": 3.5809280207482442e-06, "loss": 0.2768, "step": 7435 }, { "epoch": 0.5080276012844162, "grad_norm": 3.02002215385437, "learning_rate": 3.5801537259961067e-06, "loss": 0.2454, "step": 7436 }, { "epoch": 0.5080959212953474, "grad_norm": 3.763333559036255, "learning_rate": 3.579379427319057e-06, "loss": 0.2934, "step": 7437 }, { "epoch": 0.5081642413062786, "grad_norm": 3.695247173309326, "learning_rate": 3.5786051247550057e-06, "loss": 0.3285, "step": 7438 }, { "epoch": 0.5082325613172098, "grad_norm": 3.0284621715545654, "learning_rate": 3.57783081834187e-06, "loss": 0.2636, "step": 7439 }, { "epoch": 0.508300881328141, "grad_norm": 3.7696831226348877, "learning_rate": 3.5770565081175663e-06, "loss": 0.2868, "step": 7440 }, { "epoch": 0.5083692013390723, "grad_norm": 3.8003220558166504, "learning_rate": 3.576282194120009e-06, "loss": 0.3324, "step": 7441 }, { "epoch": 0.5084375213500034, "grad_norm": 4.59470272064209, "learning_rate": 3.5755078763871158e-06, "loss": 0.3337, "step": 7442 }, { "epoch": 0.5085058413609346, "grad_norm": 4.530360698699951, "learning_rate": 3.574733554956801e-06, "loss": 0.3052, "step": 7443 }, { "epoch": 0.5085741613718658, "grad_norm": 4.071902275085449, "learning_rate": 3.5739592298669833e-06, "loss": 0.2963, "step": 7444 }, { "epoch": 0.508642481382797, "grad_norm": 3.9853529930114746, "learning_rate": 3.573184901155578e-06, "loss": 0.3152, "step": 7445 }, { "epoch": 0.5087108013937283, "grad_norm": 3.9933865070343018, "learning_rate": 3.5724105688605026e-06, "loss": 0.3493, "step": 7446 }, { "epoch": 0.5087791214046594, "grad_norm": 3.031524896621704, "learning_rate": 3.571636233019673e-06, "loss": 0.2517, "step": 7447 }, { "epoch": 0.5088474414155906, "grad_norm": 4.5505146980285645, "learning_rate": 3.5708618936710078e-06, "loss": 0.2344, "step": 7448 }, { "epoch": 0.5089157614265218, "grad_norm": 4.230645179748535, "learning_rate": 3.570087550852423e-06, "loss": 0.2883, "step": 7449 }, { "epoch": 0.508984081437453, "grad_norm": 4.472741603851318, "learning_rate": 3.5693132046018376e-06, "loss": 0.3321, "step": 7450 }, { "epoch": 0.5090524014483843, "grad_norm": 4.148345947265625, "learning_rate": 3.5685388549571675e-06, "loss": 0.2141, "step": 7451 }, { "epoch": 0.5091207214593154, "grad_norm": 3.6730756759643555, "learning_rate": 3.5677645019563314e-06, "loss": 0.3156, "step": 7452 }, { "epoch": 0.5091890414702467, "grad_norm": 4.193722724914551, "learning_rate": 3.5669901456372475e-06, "loss": 0.2626, "step": 7453 }, { "epoch": 0.5092573614811778, "grad_norm": 3.8264358043670654, "learning_rate": 3.5662157860378346e-06, "loss": 0.3208, "step": 7454 }, { "epoch": 0.509325681492109, "grad_norm": 4.234748840332031, "learning_rate": 3.56544142319601e-06, "loss": 0.3407, "step": 7455 }, { "epoch": 0.5093940015030403, "grad_norm": 4.679222583770752, "learning_rate": 3.564667057149692e-06, "loss": 0.3918, "step": 7456 }, { "epoch": 0.5094623215139714, "grad_norm": 4.973359107971191, "learning_rate": 3.5638926879368004e-06, "loss": 0.4543, "step": 7457 }, { "epoch": 0.5095306415249027, "grad_norm": 4.741574764251709, "learning_rate": 3.563118315595254e-06, "loss": 0.4572, "step": 7458 }, { "epoch": 0.5095989615358338, "grad_norm": 2.8384649753570557, "learning_rate": 3.562343940162969e-06, "loss": 0.2522, "step": 7459 }, { "epoch": 0.509667281546765, "grad_norm": 5.705024719238281, "learning_rate": 3.561569561677868e-06, "loss": 0.314, "step": 7460 }, { "epoch": 0.5097356015576963, "grad_norm": 3.667579174041748, "learning_rate": 3.5607951801778686e-06, "loss": 0.3232, "step": 7461 }, { "epoch": 0.5098039215686274, "grad_norm": 3.0356435775756836, "learning_rate": 3.5600207957008905e-06, "loss": 0.1946, "step": 7462 }, { "epoch": 0.5098722415795587, "grad_norm": 3.8626163005828857, "learning_rate": 3.5592464082848537e-06, "loss": 0.3673, "step": 7463 }, { "epoch": 0.5099405615904898, "grad_norm": 4.174970626831055, "learning_rate": 3.5584720179676775e-06, "loss": 0.3143, "step": 7464 }, { "epoch": 0.5100088816014211, "grad_norm": 4.5079450607299805, "learning_rate": 3.5576976247872812e-06, "loss": 0.3448, "step": 7465 }, { "epoch": 0.5100772016123523, "grad_norm": 3.2893869876861572, "learning_rate": 3.556923228781585e-06, "loss": 0.2896, "step": 7466 }, { "epoch": 0.5101455216232834, "grad_norm": 3.9318113327026367, "learning_rate": 3.5561488299885096e-06, "loss": 0.2314, "step": 7467 }, { "epoch": 0.5102138416342147, "grad_norm": 5.830905914306641, "learning_rate": 3.5553744284459754e-06, "loss": 0.4459, "step": 7468 }, { "epoch": 0.5102821616451458, "grad_norm": 3.643963575363159, "learning_rate": 3.554600024191901e-06, "loss": 0.2594, "step": 7469 }, { "epoch": 0.5103504816560771, "grad_norm": 3.780755043029785, "learning_rate": 3.553825617264209e-06, "loss": 0.3807, "step": 7470 }, { "epoch": 0.5104188016670083, "grad_norm": 2.841749668121338, "learning_rate": 3.55305120770082e-06, "loss": 0.3054, "step": 7471 }, { "epoch": 0.5104871216779394, "grad_norm": 4.4875383377075195, "learning_rate": 3.552276795539653e-06, "loss": 0.341, "step": 7472 }, { "epoch": 0.5105554416888707, "grad_norm": 3.488750457763672, "learning_rate": 3.5515023808186298e-06, "loss": 0.3541, "step": 7473 }, { "epoch": 0.5106237616998018, "grad_norm": 4.120579242706299, "learning_rate": 3.5507279635756733e-06, "loss": 0.4624, "step": 7474 }, { "epoch": 0.5106920817107331, "grad_norm": 5.376229286193848, "learning_rate": 3.5499535438487016e-06, "loss": 0.351, "step": 7475 }, { "epoch": 0.5107604017216643, "grad_norm": 3.001821517944336, "learning_rate": 3.5491791216756377e-06, "loss": 0.2432, "step": 7476 }, { "epoch": 0.5108287217325955, "grad_norm": 3.297210216522217, "learning_rate": 3.5484046970944024e-06, "loss": 0.2947, "step": 7477 }, { "epoch": 0.5108970417435267, "grad_norm": 4.450343132019043, "learning_rate": 3.5476302701429182e-06, "loss": 0.3771, "step": 7478 }, { "epoch": 0.5109653617544578, "grad_norm": 3.9850122928619385, "learning_rate": 3.546855840859105e-06, "loss": 0.3524, "step": 7479 }, { "epoch": 0.5110336817653891, "grad_norm": 3.7382500171661377, "learning_rate": 3.5460814092808867e-06, "loss": 0.3106, "step": 7480 }, { "epoch": 0.5111020017763203, "grad_norm": 4.153324127197266, "learning_rate": 3.5453069754461832e-06, "loss": 0.3844, "step": 7481 }, { "epoch": 0.5111703217872515, "grad_norm": 3.907651662826538, "learning_rate": 3.5445325393929175e-06, "loss": 0.383, "step": 7482 }, { "epoch": 0.5112386417981827, "grad_norm": 3.5437448024749756, "learning_rate": 3.543758101159011e-06, "loss": 0.3315, "step": 7483 }, { "epoch": 0.5113069618091138, "grad_norm": 4.52564001083374, "learning_rate": 3.5429836607823876e-06, "loss": 0.3179, "step": 7484 }, { "epoch": 0.5113752818200451, "grad_norm": 4.708589553833008, "learning_rate": 3.5422092183009677e-06, "loss": 0.3384, "step": 7485 }, { "epoch": 0.5114436018309763, "grad_norm": 3.9970133304595947, "learning_rate": 3.5414347737526742e-06, "loss": 0.281, "step": 7486 }, { "epoch": 0.5115119218419075, "grad_norm": 4.594146251678467, "learning_rate": 3.540660327175429e-06, "loss": 0.379, "step": 7487 }, { "epoch": 0.5115802418528387, "grad_norm": 4.682394504547119, "learning_rate": 3.5398858786071575e-06, "loss": 0.3699, "step": 7488 }, { "epoch": 0.51164856186377, "grad_norm": 3.787919521331787, "learning_rate": 3.5391114280857783e-06, "loss": 0.2963, "step": 7489 }, { "epoch": 0.5117168818747011, "grad_norm": 3.7135326862335205, "learning_rate": 3.5383369756492173e-06, "loss": 0.3044, "step": 7490 }, { "epoch": 0.5117852018856323, "grad_norm": 4.0029988288879395, "learning_rate": 3.5375625213353955e-06, "loss": 0.3561, "step": 7491 }, { "epoch": 0.5118535218965635, "grad_norm": 4.71183967590332, "learning_rate": 3.536788065182237e-06, "loss": 0.3466, "step": 7492 }, { "epoch": 0.5119218419074947, "grad_norm": 4.854861259460449, "learning_rate": 3.536013607227665e-06, "loss": 0.3905, "step": 7493 }, { "epoch": 0.511990161918426, "grad_norm": 4.501122951507568, "learning_rate": 3.535239147509602e-06, "loss": 0.4577, "step": 7494 }, { "epoch": 0.5120584819293571, "grad_norm": 4.035604953765869, "learning_rate": 3.5344646860659705e-06, "loss": 0.3496, "step": 7495 }, { "epoch": 0.5121268019402883, "grad_norm": 3.277137041091919, "learning_rate": 3.5336902229346962e-06, "loss": 0.2971, "step": 7496 }, { "epoch": 0.5121951219512195, "grad_norm": 5.937412261962891, "learning_rate": 3.5329157581537e-06, "loss": 0.3891, "step": 7497 }, { "epoch": 0.5122634419621507, "grad_norm": 5.972489356994629, "learning_rate": 3.5321412917609066e-06, "loss": 0.3265, "step": 7498 }, { "epoch": 0.512331761973082, "grad_norm": 4.314980506896973, "learning_rate": 3.5313668237942397e-06, "loss": 0.374, "step": 7499 }, { "epoch": 0.5124000819840131, "grad_norm": 3.713505506515503, "learning_rate": 3.530592354291623e-06, "loss": 0.2392, "step": 7500 }, { "epoch": 0.5124684019949444, "grad_norm": 3.3384976387023926, "learning_rate": 3.5298178832909794e-06, "loss": 0.2741, "step": 7501 }, { "epoch": 0.5125367220058755, "grad_norm": 3.307063102722168, "learning_rate": 3.5290434108302326e-06, "loss": 0.2203, "step": 7502 }, { "epoch": 0.5126050420168067, "grad_norm": 3.4234611988067627, "learning_rate": 3.5282689369473074e-06, "loss": 0.407, "step": 7503 }, { "epoch": 0.5126733620277379, "grad_norm": 3.9463632106781006, "learning_rate": 3.527494461680128e-06, "loss": 0.425, "step": 7504 }, { "epoch": 0.5127416820386691, "grad_norm": 7.685656547546387, "learning_rate": 3.526719985066616e-06, "loss": 0.4213, "step": 7505 }, { "epoch": 0.5128100020496004, "grad_norm": 3.3412387371063232, "learning_rate": 3.5259455071446995e-06, "loss": 0.3317, "step": 7506 }, { "epoch": 0.5128783220605315, "grad_norm": 3.581861972808838, "learning_rate": 3.525171027952299e-06, "loss": 0.2489, "step": 7507 }, { "epoch": 0.5129466420714627, "grad_norm": 3.5699305534362793, "learning_rate": 3.5243965475273402e-06, "loss": 0.3646, "step": 7508 }, { "epoch": 0.5130149620823939, "grad_norm": 4.161043643951416, "learning_rate": 3.5236220659077473e-06, "loss": 0.3941, "step": 7509 }, { "epoch": 0.5130832820933251, "grad_norm": 4.114192962646484, "learning_rate": 3.5228475831314446e-06, "loss": 0.3385, "step": 7510 }, { "epoch": 0.5131516021042564, "grad_norm": 4.593708038330078, "learning_rate": 3.5220730992363566e-06, "loss": 0.4324, "step": 7511 }, { "epoch": 0.5132199221151875, "grad_norm": 3.3546559810638428, "learning_rate": 3.5212986142604066e-06, "loss": 0.2725, "step": 7512 }, { "epoch": 0.5132882421261188, "grad_norm": 3.452183246612549, "learning_rate": 3.5205241282415207e-06, "loss": 0.2905, "step": 7513 }, { "epoch": 0.5133565621370499, "grad_norm": 4.3225250244140625, "learning_rate": 3.5197496412176226e-06, "loss": 0.2589, "step": 7514 }, { "epoch": 0.5134248821479811, "grad_norm": 3.974146842956543, "learning_rate": 3.5189751532266364e-06, "loss": 0.3191, "step": 7515 }, { "epoch": 0.5134932021589124, "grad_norm": 3.2685863971710205, "learning_rate": 3.5182006643064878e-06, "loss": 0.2892, "step": 7516 }, { "epoch": 0.5135615221698435, "grad_norm": 3.1998119354248047, "learning_rate": 3.517426174495101e-06, "loss": 0.306, "step": 7517 }, { "epoch": 0.5136298421807748, "grad_norm": 3.803234577178955, "learning_rate": 3.5166516838304005e-06, "loss": 0.2911, "step": 7518 }, { "epoch": 0.5136981621917059, "grad_norm": 3.9603471755981445, "learning_rate": 3.515877192350311e-06, "loss": 0.2583, "step": 7519 }, { "epoch": 0.5137664822026371, "grad_norm": 3.818927764892578, "learning_rate": 3.5151027000927574e-06, "loss": 0.2985, "step": 7520 }, { "epoch": 0.5138348022135684, "grad_norm": 4.364125728607178, "learning_rate": 3.514328207095666e-06, "loss": 0.416, "step": 7521 }, { "epoch": 0.5139031222244995, "grad_norm": 4.763454914093018, "learning_rate": 3.5135537133969576e-06, "loss": 0.4058, "step": 7522 }, { "epoch": 0.5139714422354308, "grad_norm": 3.8887205123901367, "learning_rate": 3.512779219034562e-06, "loss": 0.2538, "step": 7523 }, { "epoch": 0.5140397622463619, "grad_norm": 4.308028697967529, "learning_rate": 3.512004724046402e-06, "loss": 0.3151, "step": 7524 }, { "epoch": 0.5141080822572932, "grad_norm": 4.412790775299072, "learning_rate": 3.5112302284704018e-06, "loss": 0.2582, "step": 7525 }, { "epoch": 0.5141764022682244, "grad_norm": 3.5714111328125, "learning_rate": 3.5104557323444866e-06, "loss": 0.2866, "step": 7526 }, { "epoch": 0.5142447222791555, "grad_norm": 3.913959264755249, "learning_rate": 3.5096812357065833e-06, "loss": 0.4356, "step": 7527 }, { "epoch": 0.5143130422900868, "grad_norm": 2.2749106884002686, "learning_rate": 3.5089067385946147e-06, "loss": 0.1901, "step": 7528 }, { "epoch": 0.5143813623010179, "grad_norm": 4.040849685668945, "learning_rate": 3.5081322410465066e-06, "loss": 0.3094, "step": 7529 }, { "epoch": 0.5144496823119492, "grad_norm": 4.107551097869873, "learning_rate": 3.5073577431001843e-06, "loss": 0.3476, "step": 7530 }, { "epoch": 0.5145180023228804, "grad_norm": 3.5167076587677, "learning_rate": 3.5065832447935735e-06, "loss": 0.3654, "step": 7531 }, { "epoch": 0.5145863223338115, "grad_norm": 3.8406362533569336, "learning_rate": 3.5058087461645973e-06, "loss": 0.2489, "step": 7532 }, { "epoch": 0.5146546423447428, "grad_norm": 3.5384416580200195, "learning_rate": 3.505034247251184e-06, "loss": 0.294, "step": 7533 }, { "epoch": 0.5147229623556739, "grad_norm": 3.2500011920928955, "learning_rate": 3.504259748091256e-06, "loss": 0.3055, "step": 7534 }, { "epoch": 0.5147912823666052, "grad_norm": 4.873980522155762, "learning_rate": 3.503485248722739e-06, "loss": 0.304, "step": 7535 }, { "epoch": 0.5148596023775364, "grad_norm": 4.211033821105957, "learning_rate": 3.502710749183559e-06, "loss": 0.3738, "step": 7536 }, { "epoch": 0.5149279223884676, "grad_norm": 4.388284683227539, "learning_rate": 3.5019362495116413e-06, "loss": 0.3796, "step": 7537 }, { "epoch": 0.5149962423993988, "grad_norm": 4.458773136138916, "learning_rate": 3.5011617497449097e-06, "loss": 0.4019, "step": 7538 }, { "epoch": 0.5150645624103299, "grad_norm": 3.251344919204712, "learning_rate": 3.5003872499212913e-06, "loss": 0.2469, "step": 7539 }, { "epoch": 0.5151328824212612, "grad_norm": 5.45503044128418, "learning_rate": 3.4996127500787095e-06, "loss": 0.3862, "step": 7540 }, { "epoch": 0.5152012024321924, "grad_norm": 3.584122657775879, "learning_rate": 3.4988382502550902e-06, "loss": 0.2989, "step": 7541 }, { "epoch": 0.5152695224431236, "grad_norm": 4.555100917816162, "learning_rate": 3.49806375048836e-06, "loss": 0.4608, "step": 7542 }, { "epoch": 0.5153378424540548, "grad_norm": 4.279459476470947, "learning_rate": 3.4972892508164416e-06, "loss": 0.3209, "step": 7543 }, { "epoch": 0.5154061624649859, "grad_norm": 3.5345113277435303, "learning_rate": 3.4965147512772616e-06, "loss": 0.2233, "step": 7544 }, { "epoch": 0.5154744824759172, "grad_norm": 5.096740245819092, "learning_rate": 3.495740251908744e-06, "loss": 0.333, "step": 7545 }, { "epoch": 0.5155428024868484, "grad_norm": 3.9700701236724854, "learning_rate": 3.4949657527488176e-06, "loss": 0.2668, "step": 7546 }, { "epoch": 0.5156111224977796, "grad_norm": 4.9341654777526855, "learning_rate": 3.494191253835403e-06, "loss": 0.3208, "step": 7547 }, { "epoch": 0.5156794425087108, "grad_norm": 4.77533483505249, "learning_rate": 3.4934167552064268e-06, "loss": 0.41, "step": 7548 }, { "epoch": 0.515747762519642, "grad_norm": 5.088878154754639, "learning_rate": 3.492642256899815e-06, "loss": 0.4204, "step": 7549 }, { "epoch": 0.5158160825305732, "grad_norm": 5.241314888000488, "learning_rate": 3.4918677589534945e-06, "loss": 0.3258, "step": 7550 }, { "epoch": 0.5158844025415044, "grad_norm": 3.8754656314849854, "learning_rate": 3.4910932614053857e-06, "loss": 0.2598, "step": 7551 }, { "epoch": 0.5159527225524356, "grad_norm": 3.5445926189422607, "learning_rate": 3.490318764293417e-06, "loss": 0.2994, "step": 7552 }, { "epoch": 0.5160210425633668, "grad_norm": 5.831856727600098, "learning_rate": 3.489544267655513e-06, "loss": 0.3556, "step": 7553 }, { "epoch": 0.516089362574298, "grad_norm": 5.376394748687744, "learning_rate": 3.488769771529599e-06, "loss": 0.2914, "step": 7554 }, { "epoch": 0.5161576825852292, "grad_norm": 3.8408267498016357, "learning_rate": 3.487995275953599e-06, "loss": 0.2574, "step": 7555 }, { "epoch": 0.5162260025961604, "grad_norm": 3.136448383331299, "learning_rate": 3.487220780965438e-06, "loss": 0.2424, "step": 7556 }, { "epoch": 0.5162943226070916, "grad_norm": 4.503582000732422, "learning_rate": 3.4864462866030415e-06, "loss": 0.2605, "step": 7557 }, { "epoch": 0.5163626426180228, "grad_norm": 4.340565204620361, "learning_rate": 3.4856717929043357e-06, "loss": 0.321, "step": 7558 }, { "epoch": 0.516430962628954, "grad_norm": 4.26249885559082, "learning_rate": 3.484897299907243e-06, "loss": 0.3492, "step": 7559 }, { "epoch": 0.5164992826398852, "grad_norm": 2.9652581214904785, "learning_rate": 3.484122807649689e-06, "loss": 0.2767, "step": 7560 }, { "epoch": 0.5165676026508165, "grad_norm": 2.9953253269195557, "learning_rate": 3.4833483161696e-06, "loss": 0.2648, "step": 7561 }, { "epoch": 0.5166359226617476, "grad_norm": 3.2357177734375, "learning_rate": 3.4825738255049004e-06, "loss": 0.2885, "step": 7562 }, { "epoch": 0.5167042426726788, "grad_norm": 4.512239456176758, "learning_rate": 3.481799335693513e-06, "loss": 0.3129, "step": 7563 }, { "epoch": 0.51677256268361, "grad_norm": 6.525304794311523, "learning_rate": 3.481024846773364e-06, "loss": 0.3913, "step": 7564 }, { "epoch": 0.5168408826945412, "grad_norm": 4.294619083404541, "learning_rate": 3.4802503587823773e-06, "loss": 0.3692, "step": 7565 }, { "epoch": 0.5169092027054725, "grad_norm": 3.7547144889831543, "learning_rate": 3.4794758717584804e-06, "loss": 0.3288, "step": 7566 }, { "epoch": 0.5169775227164036, "grad_norm": 4.163125038146973, "learning_rate": 3.478701385739594e-06, "loss": 0.3762, "step": 7567 }, { "epoch": 0.5170458427273348, "grad_norm": 3.312455415725708, "learning_rate": 3.4779269007636438e-06, "loss": 0.3773, "step": 7568 }, { "epoch": 0.517114162738266, "grad_norm": 3.187312364578247, "learning_rate": 3.4771524168685553e-06, "loss": 0.2833, "step": 7569 }, { "epoch": 0.5171824827491972, "grad_norm": 2.899120569229126, "learning_rate": 3.476377934092254e-06, "loss": 0.2485, "step": 7570 }, { "epoch": 0.5172508027601285, "grad_norm": 4.530650615692139, "learning_rate": 3.47560345247266e-06, "loss": 0.4153, "step": 7571 }, { "epoch": 0.5173191227710596, "grad_norm": 3.370697498321533, "learning_rate": 3.474828972047701e-06, "loss": 0.3494, "step": 7572 }, { "epoch": 0.5173874427819909, "grad_norm": 4.207926273345947, "learning_rate": 3.474054492855301e-06, "loss": 0.3621, "step": 7573 }, { "epoch": 0.517455762792922, "grad_norm": 4.08348274230957, "learning_rate": 3.4732800149333833e-06, "loss": 0.337, "step": 7574 }, { "epoch": 0.5175240828038532, "grad_norm": 3.5550878047943115, "learning_rate": 3.472505538319873e-06, "loss": 0.2966, "step": 7575 }, { "epoch": 0.5175924028147845, "grad_norm": 3.8463470935821533, "learning_rate": 3.4717310630526925e-06, "loss": 0.4336, "step": 7576 }, { "epoch": 0.5176607228257156, "grad_norm": 3.288961410522461, "learning_rate": 3.4709565891697677e-06, "loss": 0.2776, "step": 7577 }, { "epoch": 0.5177290428366469, "grad_norm": 5.32250452041626, "learning_rate": 3.470182116709021e-06, "loss": 0.4523, "step": 7578 }, { "epoch": 0.517797362847578, "grad_norm": 3.7330520153045654, "learning_rate": 3.469407645708378e-06, "loss": 0.3853, "step": 7579 }, { "epoch": 0.5178656828585092, "grad_norm": 6.348146438598633, "learning_rate": 3.4686331762057606e-06, "loss": 0.2552, "step": 7580 }, { "epoch": 0.5179340028694405, "grad_norm": 4.526138782501221, "learning_rate": 3.4678587082390937e-06, "loss": 0.296, "step": 7581 }, { "epoch": 0.5180023228803716, "grad_norm": 2.8098275661468506, "learning_rate": 3.467084241846299e-06, "loss": 0.2515, "step": 7582 }, { "epoch": 0.5180706428913029, "grad_norm": 3.5836572647094727, "learning_rate": 3.466309777065305e-06, "loss": 0.3772, "step": 7583 }, { "epoch": 0.518138962902234, "grad_norm": 3.077202796936035, "learning_rate": 3.4655353139340298e-06, "loss": 0.3042, "step": 7584 }, { "epoch": 0.5182072829131653, "grad_norm": 3.2208831310272217, "learning_rate": 3.4647608524903983e-06, "loss": 0.2626, "step": 7585 }, { "epoch": 0.5182756029240965, "grad_norm": 3.195894479751587, "learning_rate": 3.4639863927723348e-06, "loss": 0.3159, "step": 7586 }, { "epoch": 0.5183439229350276, "grad_norm": 3.0272364616394043, "learning_rate": 3.4632119348177633e-06, "loss": 0.3142, "step": 7587 }, { "epoch": 0.5184122429459589, "grad_norm": 3.928555727005005, "learning_rate": 3.4624374786646044e-06, "loss": 0.4236, "step": 7588 }, { "epoch": 0.51848056295689, "grad_norm": 4.448713302612305, "learning_rate": 3.461663024350783e-06, "loss": 0.3594, "step": 7589 }, { "epoch": 0.5185488829678213, "grad_norm": 4.558788299560547, "learning_rate": 3.4608885719142216e-06, "loss": 0.3322, "step": 7590 }, { "epoch": 0.5186172029787525, "grad_norm": 4.448854446411133, "learning_rate": 3.4601141213928437e-06, "loss": 0.2824, "step": 7591 }, { "epoch": 0.5186855229896837, "grad_norm": 3.3829078674316406, "learning_rate": 3.459339672824571e-06, "loss": 0.3068, "step": 7592 }, { "epoch": 0.5187538430006149, "grad_norm": 3.777362823486328, "learning_rate": 3.4585652262473265e-06, "loss": 0.3021, "step": 7593 }, { "epoch": 0.518822163011546, "grad_norm": 4.220573902130127, "learning_rate": 3.4577907816990326e-06, "loss": 0.2634, "step": 7594 }, { "epoch": 0.5188904830224773, "grad_norm": 4.362198829650879, "learning_rate": 3.457016339217613e-06, "loss": 0.283, "step": 7595 }, { "epoch": 0.5189588030334085, "grad_norm": 3.5533766746520996, "learning_rate": 3.456241898840989e-06, "loss": 0.3305, "step": 7596 }, { "epoch": 0.5190271230443397, "grad_norm": 3.7217092514038086, "learning_rate": 3.455467460607083e-06, "loss": 0.2499, "step": 7597 }, { "epoch": 0.5190954430552709, "grad_norm": 5.4116973876953125, "learning_rate": 3.4546930245538162e-06, "loss": 0.3228, "step": 7598 }, { "epoch": 0.519163763066202, "grad_norm": 3.664916515350342, "learning_rate": 3.4539185907191144e-06, "loss": 0.3225, "step": 7599 }, { "epoch": 0.5192320830771333, "grad_norm": 3.8658595085144043, "learning_rate": 3.4531441591408957e-06, "loss": 0.4292, "step": 7600 }, { "epoch": 0.5193004030880645, "grad_norm": 5.062362194061279, "learning_rate": 3.452369729857082e-06, "loss": 0.2678, "step": 7601 }, { "epoch": 0.5193687230989957, "grad_norm": 3.592019557952881, "learning_rate": 3.451595302905597e-06, "loss": 0.2693, "step": 7602 }, { "epoch": 0.5194370431099269, "grad_norm": 4.2978196144104, "learning_rate": 3.4508208783243634e-06, "loss": 0.3672, "step": 7603 }, { "epoch": 0.5195053631208582, "grad_norm": 3.9519052505493164, "learning_rate": 3.4500464561512983e-06, "loss": 0.2936, "step": 7604 }, { "epoch": 0.5195736831317893, "grad_norm": 4.032670974731445, "learning_rate": 3.449272036424327e-06, "loss": 0.322, "step": 7605 }, { "epoch": 0.5196420031427205, "grad_norm": 3.7342653274536133, "learning_rate": 3.4484976191813697e-06, "loss": 0.2816, "step": 7606 }, { "epoch": 0.5197103231536517, "grad_norm": 2.796999216079712, "learning_rate": 3.4477232044603475e-06, "loss": 0.2746, "step": 7607 }, { "epoch": 0.5197786431645829, "grad_norm": 3.6524462699890137, "learning_rate": 3.446948792299181e-06, "loss": 0.2902, "step": 7608 }, { "epoch": 0.5198469631755142, "grad_norm": 4.567037105560303, "learning_rate": 3.446174382735791e-06, "loss": 0.397, "step": 7609 }, { "epoch": 0.5199152831864453, "grad_norm": 6.159543991088867, "learning_rate": 3.4453999758080988e-06, "loss": 0.3234, "step": 7610 }, { "epoch": 0.5199836031973765, "grad_norm": 3.937385320663452, "learning_rate": 3.444625571554026e-06, "loss": 0.232, "step": 7611 }, { "epoch": 0.5200519232083077, "grad_norm": 6.015509128570557, "learning_rate": 3.443851170011491e-06, "loss": 0.3062, "step": 7612 }, { "epoch": 0.5201202432192389, "grad_norm": 4.700982093811035, "learning_rate": 3.4430767712184153e-06, "loss": 0.4068, "step": 7613 }, { "epoch": 0.5201885632301702, "grad_norm": 3.766265392303467, "learning_rate": 3.4423023752127195e-06, "loss": 0.3345, "step": 7614 }, { "epoch": 0.5202568832411013, "grad_norm": 3.900470733642578, "learning_rate": 3.441527982032324e-06, "loss": 0.313, "step": 7615 }, { "epoch": 0.5203252032520326, "grad_norm": 3.917332410812378, "learning_rate": 3.440753591715147e-06, "loss": 0.3249, "step": 7616 }, { "epoch": 0.5203935232629637, "grad_norm": 5.090302467346191, "learning_rate": 3.4399792042991094e-06, "loss": 0.366, "step": 7617 }, { "epoch": 0.5204618432738949, "grad_norm": 5.133086681365967, "learning_rate": 3.439204819822131e-06, "loss": 0.3118, "step": 7618 }, { "epoch": 0.5205301632848262, "grad_norm": 4.117516994476318, "learning_rate": 3.4384304383221326e-06, "loss": 0.3086, "step": 7619 }, { "epoch": 0.5205984832957573, "grad_norm": 3.7696444988250732, "learning_rate": 3.4376560598370317e-06, "loss": 0.2483, "step": 7620 }, { "epoch": 0.5206668033066886, "grad_norm": 3.5968096256256104, "learning_rate": 3.4368816844047468e-06, "loss": 0.2932, "step": 7621 }, { "epoch": 0.5207351233176197, "grad_norm": 6.328405380249023, "learning_rate": 3.436107312063199e-06, "loss": 0.3513, "step": 7622 }, { "epoch": 0.5208034433285509, "grad_norm": 4.038694381713867, "learning_rate": 3.4353329428503087e-06, "loss": 0.3529, "step": 7623 }, { "epoch": 0.5208717633394822, "grad_norm": 3.7346272468566895, "learning_rate": 3.43455857680399e-06, "loss": 0.3037, "step": 7624 }, { "epoch": 0.5209400833504133, "grad_norm": 4.627223968505859, "learning_rate": 3.4337842139621653e-06, "loss": 0.3319, "step": 7625 }, { "epoch": 0.5210084033613446, "grad_norm": 4.064317226409912, "learning_rate": 3.433009854362752e-06, "loss": 0.3308, "step": 7626 }, { "epoch": 0.5210767233722757, "grad_norm": 3.1982483863830566, "learning_rate": 3.432235498043669e-06, "loss": 0.2446, "step": 7627 }, { "epoch": 0.521145043383207, "grad_norm": 2.968263626098633, "learning_rate": 3.431461145042833e-06, "loss": 0.2292, "step": 7628 }, { "epoch": 0.5212133633941382, "grad_norm": 3.0072779655456543, "learning_rate": 3.430686795398163e-06, "loss": 0.2795, "step": 7629 }, { "epoch": 0.5212816834050693, "grad_norm": 4.533499717712402, "learning_rate": 3.429912449147577e-06, "loss": 0.4405, "step": 7630 }, { "epoch": 0.5213500034160006, "grad_norm": 4.54101037979126, "learning_rate": 3.4291381063289934e-06, "loss": 0.2791, "step": 7631 }, { "epoch": 0.5214183234269317, "grad_norm": 4.073424339294434, "learning_rate": 3.4283637669803273e-06, "loss": 0.4334, "step": 7632 }, { "epoch": 0.521486643437863, "grad_norm": 4.428133487701416, "learning_rate": 3.4275894311394978e-06, "loss": 0.4297, "step": 7633 }, { "epoch": 0.5215549634487942, "grad_norm": 3.584733009338379, "learning_rate": 3.4268150988444214e-06, "loss": 0.2711, "step": 7634 }, { "epoch": 0.5216232834597253, "grad_norm": 3.3127548694610596, "learning_rate": 3.4260407701330174e-06, "loss": 0.302, "step": 7635 }, { "epoch": 0.5216916034706566, "grad_norm": 3.5159337520599365, "learning_rate": 3.425266445043199e-06, "loss": 0.2424, "step": 7636 }, { "epoch": 0.5217599234815877, "grad_norm": 3.9479763507843018, "learning_rate": 3.4244921236128846e-06, "loss": 0.273, "step": 7637 }, { "epoch": 0.521828243492519, "grad_norm": 3.683772563934326, "learning_rate": 3.4237178058799906e-06, "loss": 0.3173, "step": 7638 }, { "epoch": 0.5218965635034502, "grad_norm": 4.225527763366699, "learning_rate": 3.422943491882435e-06, "loss": 0.3257, "step": 7639 }, { "epoch": 0.5219648835143814, "grad_norm": 4.701714515686035, "learning_rate": 3.4221691816581297e-06, "loss": 0.324, "step": 7640 }, { "epoch": 0.5220332035253126, "grad_norm": 4.337890625, "learning_rate": 3.4213948752449942e-06, "loss": 0.3524, "step": 7641 }, { "epoch": 0.5221015235362437, "grad_norm": 4.472990989685059, "learning_rate": 3.4206205726809433e-06, "loss": 0.449, "step": 7642 }, { "epoch": 0.522169843547175, "grad_norm": 4.039473533630371, "learning_rate": 3.4198462740038923e-06, "loss": 0.4174, "step": 7643 }, { "epoch": 0.5222381635581061, "grad_norm": 5.157229423522949, "learning_rate": 3.4190719792517574e-06, "loss": 0.3202, "step": 7644 }, { "epoch": 0.5223064835690374, "grad_norm": 4.394739627838135, "learning_rate": 3.4182976884624525e-06, "loss": 0.2996, "step": 7645 }, { "epoch": 0.5223748035799686, "grad_norm": 5.6830668449401855, "learning_rate": 3.4175234016738926e-06, "loss": 0.2981, "step": 7646 }, { "epoch": 0.5224431235908997, "grad_norm": 3.9748518466949463, "learning_rate": 3.4167491189239926e-06, "loss": 0.353, "step": 7647 }, { "epoch": 0.522511443601831, "grad_norm": 4.512106895446777, "learning_rate": 3.415974840250669e-06, "loss": 0.3373, "step": 7648 }, { "epoch": 0.5225797636127621, "grad_norm": 4.079988479614258, "learning_rate": 3.415200565691834e-06, "loss": 0.4191, "step": 7649 }, { "epoch": 0.5226480836236934, "grad_norm": 4.609827041625977, "learning_rate": 3.4144262952854015e-06, "loss": 0.2505, "step": 7650 }, { "epoch": 0.5227164036346246, "grad_norm": 3.40628719329834, "learning_rate": 3.4136520290692856e-06, "loss": 0.3007, "step": 7651 }, { "epoch": 0.5227847236455558, "grad_norm": 4.031205177307129, "learning_rate": 3.412877767081404e-06, "loss": 0.3661, "step": 7652 }, { "epoch": 0.522853043656487, "grad_norm": 4.5096869468688965, "learning_rate": 3.412103509359665e-06, "loss": 0.3993, "step": 7653 }, { "epoch": 0.5229213636674181, "grad_norm": 5.598145008087158, "learning_rate": 3.4113292559419826e-06, "loss": 0.4972, "step": 7654 }, { "epoch": 0.5229896836783494, "grad_norm": 4.094780445098877, "learning_rate": 3.410555006866273e-06, "loss": 0.3424, "step": 7655 }, { "epoch": 0.5230580036892806, "grad_norm": 3.5670695304870605, "learning_rate": 3.4097807621704487e-06, "loss": 0.3427, "step": 7656 }, { "epoch": 0.5231263237002118, "grad_norm": 3.9913387298583984, "learning_rate": 3.409006521892419e-06, "loss": 0.2702, "step": 7657 }, { "epoch": 0.523194643711143, "grad_norm": 3.8097474575042725, "learning_rate": 3.408232286070099e-06, "loss": 0.3262, "step": 7658 }, { "epoch": 0.5232629637220741, "grad_norm": 4.160706996917725, "learning_rate": 3.4074580547414004e-06, "loss": 0.336, "step": 7659 }, { "epoch": 0.5233312837330054, "grad_norm": 3.3060684204101562, "learning_rate": 3.4066838279442366e-06, "loss": 0.2565, "step": 7660 }, { "epoch": 0.5233996037439366, "grad_norm": 4.427307605743408, "learning_rate": 3.4059096057165173e-06, "loss": 0.3951, "step": 7661 }, { "epoch": 0.5234679237548678, "grad_norm": 3.5565543174743652, "learning_rate": 3.405135388096155e-06, "loss": 0.2464, "step": 7662 }, { "epoch": 0.523536243765799, "grad_norm": 3.8617844581604004, "learning_rate": 3.404361175121061e-06, "loss": 0.3232, "step": 7663 }, { "epoch": 0.5236045637767303, "grad_norm": 3.94582200050354, "learning_rate": 3.4035869668291473e-06, "loss": 0.3681, "step": 7664 }, { "epoch": 0.5236728837876614, "grad_norm": 3.275838613510132, "learning_rate": 3.4028127632583235e-06, "loss": 0.2963, "step": 7665 }, { "epoch": 0.5237412037985926, "grad_norm": 3.9131431579589844, "learning_rate": 3.402038564446501e-06, "loss": 0.4094, "step": 7666 }, { "epoch": 0.5238095238095238, "grad_norm": 3.2411985397338867, "learning_rate": 3.40126437043159e-06, "loss": 0.3162, "step": 7667 }, { "epoch": 0.523877843820455, "grad_norm": 3.6222636699676514, "learning_rate": 3.400490181251502e-06, "loss": 0.3998, "step": 7668 }, { "epoch": 0.5239461638313863, "grad_norm": 3.2850520610809326, "learning_rate": 3.3997159969441447e-06, "loss": 0.3085, "step": 7669 }, { "epoch": 0.5240144838423174, "grad_norm": 3.462646007537842, "learning_rate": 3.398941817547429e-06, "loss": 0.2788, "step": 7670 }, { "epoch": 0.5240828038532486, "grad_norm": 2.9104819297790527, "learning_rate": 3.3981676430992638e-06, "loss": 0.2847, "step": 7671 }, { "epoch": 0.5241511238641798, "grad_norm": 3.05737042427063, "learning_rate": 3.3973934736375615e-06, "loss": 0.3438, "step": 7672 }, { "epoch": 0.524219443875111, "grad_norm": 2.973798990249634, "learning_rate": 3.3966193092002266e-06, "loss": 0.3332, "step": 7673 }, { "epoch": 0.5242877638860423, "grad_norm": 3.3624460697174072, "learning_rate": 3.3958451498251693e-06, "loss": 0.2774, "step": 7674 }, { "epoch": 0.5243560838969734, "grad_norm": 3.9577877521514893, "learning_rate": 3.3950709955502998e-06, "loss": 0.2865, "step": 7675 }, { "epoch": 0.5244244039079047, "grad_norm": 3.733457088470459, "learning_rate": 3.3942968464135258e-06, "loss": 0.2873, "step": 7676 }, { "epoch": 0.5244927239188358, "grad_norm": 3.4256606101989746, "learning_rate": 3.3935227024527545e-06, "loss": 0.2544, "step": 7677 }, { "epoch": 0.524561043929767, "grad_norm": 6.951685905456543, "learning_rate": 3.3927485637058938e-06, "loss": 0.3316, "step": 7678 }, { "epoch": 0.5246293639406983, "grad_norm": 4.340455532073975, "learning_rate": 3.391974430210851e-06, "loss": 0.4051, "step": 7679 }, { "epoch": 0.5246976839516294, "grad_norm": 4.194436073303223, "learning_rate": 3.391200302005535e-06, "loss": 0.2713, "step": 7680 }, { "epoch": 0.5247660039625607, "grad_norm": 4.658147811889648, "learning_rate": 3.3904261791278507e-06, "loss": 0.3485, "step": 7681 }, { "epoch": 0.5248343239734918, "grad_norm": 4.58062744140625, "learning_rate": 3.389652061615706e-06, "loss": 0.3486, "step": 7682 }, { "epoch": 0.524902643984423, "grad_norm": 3.5051608085632324, "learning_rate": 3.388877949507007e-06, "loss": 0.2651, "step": 7683 }, { "epoch": 0.5249709639953543, "grad_norm": 5.329373359680176, "learning_rate": 3.3881038428396603e-06, "loss": 0.3806, "step": 7684 }, { "epoch": 0.5250392840062854, "grad_norm": 4.191403388977051, "learning_rate": 3.3873297416515716e-06, "loss": 0.259, "step": 7685 }, { "epoch": 0.5251076040172167, "grad_norm": 4.558887004852295, "learning_rate": 3.3865556459806463e-06, "loss": 0.3606, "step": 7686 }, { "epoch": 0.5251759240281478, "grad_norm": 5.7215681076049805, "learning_rate": 3.3857815558647892e-06, "loss": 0.3363, "step": 7687 }, { "epoch": 0.5252442440390791, "grad_norm": 5.2906036376953125, "learning_rate": 3.385007471341909e-06, "loss": 0.3457, "step": 7688 }, { "epoch": 0.5253125640500103, "grad_norm": 4.653945446014404, "learning_rate": 3.384233392449906e-06, "loss": 0.3252, "step": 7689 }, { "epoch": 0.5253808840609414, "grad_norm": 3.9680018424987793, "learning_rate": 3.3834593192266857e-06, "loss": 0.315, "step": 7690 }, { "epoch": 0.5254492040718727, "grad_norm": 4.088746070861816, "learning_rate": 3.3826852517101547e-06, "loss": 0.3998, "step": 7691 }, { "epoch": 0.5255175240828038, "grad_norm": 3.1196255683898926, "learning_rate": 3.3819111899382173e-06, "loss": 0.2826, "step": 7692 }, { "epoch": 0.5255858440937351, "grad_norm": 3.7226498126983643, "learning_rate": 3.3811371339487735e-06, "loss": 0.2932, "step": 7693 }, { "epoch": 0.5256541641046663, "grad_norm": 3.77872633934021, "learning_rate": 3.3803630837797293e-06, "loss": 0.2389, "step": 7694 }, { "epoch": 0.5257224841155974, "grad_norm": 3.199897527694702, "learning_rate": 3.3795890394689873e-06, "loss": 0.3446, "step": 7695 }, { "epoch": 0.5257908041265287, "grad_norm": 4.522586345672607, "learning_rate": 3.3788150010544518e-06, "loss": 0.4042, "step": 7696 }, { "epoch": 0.5258591241374598, "grad_norm": 3.4910390377044678, "learning_rate": 3.3780409685740237e-06, "loss": 0.4047, "step": 7697 }, { "epoch": 0.5259274441483911, "grad_norm": 4.357405185699463, "learning_rate": 3.3772669420656057e-06, "loss": 0.3176, "step": 7698 }, { "epoch": 0.5259957641593223, "grad_norm": 3.9410200119018555, "learning_rate": 3.3764929215671e-06, "loss": 0.3286, "step": 7699 }, { "epoch": 0.5260640841702535, "grad_norm": 4.039185047149658, "learning_rate": 3.375718907116409e-06, "loss": 0.396, "step": 7700 }, { "epoch": 0.5261324041811847, "grad_norm": 3.8615596294403076, "learning_rate": 3.3749448987514327e-06, "loss": 0.3411, "step": 7701 }, { "epoch": 0.5262007241921158, "grad_norm": 3.4370152950286865, "learning_rate": 3.3741708965100728e-06, "loss": 0.3229, "step": 7702 }, { "epoch": 0.5262690442030471, "grad_norm": 3.93513560295105, "learning_rate": 3.3733969004302304e-06, "loss": 0.3366, "step": 7703 }, { "epoch": 0.5263373642139783, "grad_norm": 3.483323335647583, "learning_rate": 3.372622910549807e-06, "loss": 0.3094, "step": 7704 }, { "epoch": 0.5264056842249095, "grad_norm": 3.9120688438415527, "learning_rate": 3.3718489269067008e-06, "loss": 0.3061, "step": 7705 }, { "epoch": 0.5264740042358407, "grad_norm": 3.327737808227539, "learning_rate": 3.3710749495388128e-06, "loss": 0.2633, "step": 7706 }, { "epoch": 0.5265423242467718, "grad_norm": 4.1049346923828125, "learning_rate": 3.3703009784840417e-06, "loss": 0.4482, "step": 7707 }, { "epoch": 0.5266106442577031, "grad_norm": 5.433395862579346, "learning_rate": 3.3695270137802886e-06, "loss": 0.4578, "step": 7708 }, { "epoch": 0.5266789642686343, "grad_norm": 3.772043466567993, "learning_rate": 3.368753055465453e-06, "loss": 0.3664, "step": 7709 }, { "epoch": 0.5267472842795655, "grad_norm": 4.746032238006592, "learning_rate": 3.3679791035774294e-06, "loss": 0.3704, "step": 7710 }, { "epoch": 0.5268156042904967, "grad_norm": 3.758317708969116, "learning_rate": 3.3672051581541206e-06, "loss": 0.3472, "step": 7711 }, { "epoch": 0.5268839243014279, "grad_norm": 3.201124668121338, "learning_rate": 3.3664312192334223e-06, "loss": 0.3792, "step": 7712 }, { "epoch": 0.5269522443123591, "grad_norm": 3.580940008163452, "learning_rate": 3.3656572868532337e-06, "loss": 0.2452, "step": 7713 }, { "epoch": 0.5270205643232903, "grad_norm": 4.346185207366943, "learning_rate": 3.3648833610514503e-06, "loss": 0.331, "step": 7714 }, { "epoch": 0.5270888843342215, "grad_norm": 3.7151503562927246, "learning_rate": 3.3641094418659713e-06, "loss": 0.2912, "step": 7715 }, { "epoch": 0.5271572043451527, "grad_norm": 3.0632550716400146, "learning_rate": 3.363335529334692e-06, "loss": 0.2768, "step": 7716 }, { "epoch": 0.5272255243560839, "grad_norm": 3.329859495162964, "learning_rate": 3.3625616234955096e-06, "loss": 0.2414, "step": 7717 }, { "epoch": 0.5272938443670151, "grad_norm": 4.08127498626709, "learning_rate": 3.3617877243863195e-06, "loss": 0.2915, "step": 7718 }, { "epoch": 0.5273621643779463, "grad_norm": 3.750396251678467, "learning_rate": 3.3610138320450184e-06, "loss": 0.3499, "step": 7719 }, { "epoch": 0.5274304843888775, "grad_norm": 3.9694206714630127, "learning_rate": 3.3602399465095008e-06, "loss": 0.2445, "step": 7720 }, { "epoch": 0.5274988043998087, "grad_norm": 3.4193055629730225, "learning_rate": 3.3594660678176634e-06, "loss": 0.3071, "step": 7721 }, { "epoch": 0.5275671244107399, "grad_norm": 3.461744785308838, "learning_rate": 3.3586921960073986e-06, "loss": 0.2082, "step": 7722 }, { "epoch": 0.5276354444216711, "grad_norm": 3.869365692138672, "learning_rate": 3.357918331116602e-06, "loss": 0.2768, "step": 7723 }, { "epoch": 0.5277037644326024, "grad_norm": 4.765226364135742, "learning_rate": 3.3571444731831674e-06, "loss": 0.2815, "step": 7724 }, { "epoch": 0.5277720844435335, "grad_norm": 5.2109198570251465, "learning_rate": 3.3563706222449917e-06, "loss": 0.3168, "step": 7725 }, { "epoch": 0.5278404044544647, "grad_norm": 3.8817861080169678, "learning_rate": 3.355596778339963e-06, "loss": 0.3199, "step": 7726 }, { "epoch": 0.5279087244653959, "grad_norm": 3.9721691608428955, "learning_rate": 3.354822941505978e-06, "loss": 0.3822, "step": 7727 }, { "epoch": 0.5279770444763271, "grad_norm": 4.005753993988037, "learning_rate": 3.354049111780928e-06, "loss": 0.2717, "step": 7728 }, { "epoch": 0.5280453644872584, "grad_norm": 4.168554306030273, "learning_rate": 3.353275289202707e-06, "loss": 0.3535, "step": 7729 }, { "epoch": 0.5281136844981895, "grad_norm": 5.125295639038086, "learning_rate": 3.352501473809205e-06, "loss": 0.4416, "step": 7730 }, { "epoch": 0.5281820045091207, "grad_norm": 4.147287845611572, "learning_rate": 3.3517276656383146e-06, "loss": 0.3411, "step": 7731 }, { "epoch": 0.5282503245200519, "grad_norm": 4.646646976470947, "learning_rate": 3.350953864727927e-06, "loss": 0.3477, "step": 7732 }, { "epoch": 0.5283186445309831, "grad_norm": 3.9235305786132812, "learning_rate": 3.350180071115934e-06, "loss": 0.3222, "step": 7733 }, { "epoch": 0.5283869645419144, "grad_norm": 5.074267864227295, "learning_rate": 3.3494062848402253e-06, "loss": 0.3424, "step": 7734 }, { "epoch": 0.5284552845528455, "grad_norm": 4.214656352996826, "learning_rate": 3.348632505938691e-06, "loss": 0.285, "step": 7735 }, { "epoch": 0.5285236045637768, "grad_norm": 4.392101764678955, "learning_rate": 3.3478587344492216e-06, "loss": 0.2981, "step": 7736 }, { "epoch": 0.5285919245747079, "grad_norm": 3.8379533290863037, "learning_rate": 3.3470849704097065e-06, "loss": 0.3008, "step": 7737 }, { "epoch": 0.5286602445856391, "grad_norm": 4.94608736038208, "learning_rate": 3.3463112138580346e-06, "loss": 0.2733, "step": 7738 }, { "epoch": 0.5287285645965704, "grad_norm": 4.148621559143066, "learning_rate": 3.345537464832095e-06, "loss": 0.322, "step": 7739 }, { "epoch": 0.5287968846075015, "grad_norm": 3.5463526248931885, "learning_rate": 3.3447637233697747e-06, "loss": 0.3082, "step": 7740 }, { "epoch": 0.5288652046184328, "grad_norm": 3.4038848876953125, "learning_rate": 3.3439899895089657e-06, "loss": 0.334, "step": 7741 }, { "epoch": 0.5289335246293639, "grad_norm": 3.346660852432251, "learning_rate": 3.3432162632875514e-06, "loss": 0.3651, "step": 7742 }, { "epoch": 0.5290018446402951, "grad_norm": 3.937448501586914, "learning_rate": 3.34244254474342e-06, "loss": 0.3525, "step": 7743 }, { "epoch": 0.5290701646512264, "grad_norm": 3.453808546066284, "learning_rate": 3.3416688339144606e-06, "loss": 0.2956, "step": 7744 }, { "epoch": 0.5291384846621575, "grad_norm": 3.874234437942505, "learning_rate": 3.340895130838559e-06, "loss": 0.3496, "step": 7745 }, { "epoch": 0.5292068046730888, "grad_norm": 4.091048240661621, "learning_rate": 3.3401214355535995e-06, "loss": 0.261, "step": 7746 }, { "epoch": 0.5292751246840199, "grad_norm": 4.765542030334473, "learning_rate": 3.3393477480974697e-06, "loss": 0.2867, "step": 7747 }, { "epoch": 0.5293434446949512, "grad_norm": 3.001889705657959, "learning_rate": 3.3385740685080542e-06, "loss": 0.3092, "step": 7748 }, { "epoch": 0.5294117647058824, "grad_norm": 4.743386745452881, "learning_rate": 3.337800396823239e-06, "loss": 0.2982, "step": 7749 }, { "epoch": 0.5294800847168135, "grad_norm": 4.179986476898193, "learning_rate": 3.337026733080908e-06, "loss": 0.2476, "step": 7750 }, { "epoch": 0.5295484047277448, "grad_norm": 3.2852377891540527, "learning_rate": 3.3362530773189453e-06, "loss": 0.3013, "step": 7751 }, { "epoch": 0.5296167247386759, "grad_norm": 3.9536240100860596, "learning_rate": 3.335479429575235e-06, "loss": 0.2304, "step": 7752 }, { "epoch": 0.5296850447496072, "grad_norm": 4.56248140335083, "learning_rate": 3.3347057898876615e-06, "loss": 0.4495, "step": 7753 }, { "epoch": 0.5297533647605384, "grad_norm": 4.357619285583496, "learning_rate": 3.3339321582941063e-06, "loss": 0.4719, "step": 7754 }, { "epoch": 0.5298216847714695, "grad_norm": 4.764216423034668, "learning_rate": 3.333158534832453e-06, "loss": 0.2534, "step": 7755 }, { "epoch": 0.5298900047824008, "grad_norm": 3.670509099960327, "learning_rate": 3.332384919540583e-06, "loss": 0.307, "step": 7756 }, { "epoch": 0.5299583247933319, "grad_norm": 3.5262863636016846, "learning_rate": 3.33161131245638e-06, "loss": 0.2795, "step": 7757 }, { "epoch": 0.5300266448042632, "grad_norm": 4.04163122177124, "learning_rate": 3.3308377136177236e-06, "loss": 0.3131, "step": 7758 }, { "epoch": 0.5300949648151944, "grad_norm": 3.1497573852539062, "learning_rate": 3.3300641230624954e-06, "loss": 0.3513, "step": 7759 }, { "epoch": 0.5301632848261256, "grad_norm": 4.512763977050781, "learning_rate": 3.329290540828576e-06, "loss": 0.3504, "step": 7760 }, { "epoch": 0.5302316048370568, "grad_norm": 3.392765522003174, "learning_rate": 3.3285169669538484e-06, "loss": 0.3281, "step": 7761 }, { "epoch": 0.5302999248479879, "grad_norm": 3.0457444190979004, "learning_rate": 3.327743401476188e-06, "loss": 0.3328, "step": 7762 }, { "epoch": 0.5303682448589192, "grad_norm": 4.468921661376953, "learning_rate": 3.326969844433476e-06, "loss": 0.3613, "step": 7763 }, { "epoch": 0.5304365648698504, "grad_norm": 4.448946475982666, "learning_rate": 3.3261962958635923e-06, "loss": 0.4158, "step": 7764 }, { "epoch": 0.5305048848807816, "grad_norm": 2.9301819801330566, "learning_rate": 3.3254227558044158e-06, "loss": 0.2019, "step": 7765 }, { "epoch": 0.5305732048917128, "grad_norm": 4.2121076583862305, "learning_rate": 3.3246492242938228e-06, "loss": 0.3542, "step": 7766 }, { "epoch": 0.5306415249026439, "grad_norm": 3.171948194503784, "learning_rate": 3.323875701369693e-06, "loss": 0.2757, "step": 7767 }, { "epoch": 0.5307098449135752, "grad_norm": 3.1945865154266357, "learning_rate": 3.323102187069902e-06, "loss": 0.2182, "step": 7768 }, { "epoch": 0.5307781649245064, "grad_norm": 3.289670705795288, "learning_rate": 3.3223286814323287e-06, "loss": 0.2669, "step": 7769 }, { "epoch": 0.5308464849354376, "grad_norm": 3.7123615741729736, "learning_rate": 3.321555184494848e-06, "loss": 0.2986, "step": 7770 }, { "epoch": 0.5309148049463688, "grad_norm": 3.1559293270111084, "learning_rate": 3.3207816962953364e-06, "loss": 0.3012, "step": 7771 }, { "epoch": 0.5309831249573, "grad_norm": 3.5853278636932373, "learning_rate": 3.3200082168716695e-06, "loss": 0.3233, "step": 7772 }, { "epoch": 0.5310514449682312, "grad_norm": 3.5058062076568604, "learning_rate": 3.3192347462617226e-06, "loss": 0.2815, "step": 7773 }, { "epoch": 0.5311197649791624, "grad_norm": 3.0401928424835205, "learning_rate": 3.318461284503373e-06, "loss": 0.2089, "step": 7774 }, { "epoch": 0.5311880849900936, "grad_norm": 4.514279365539551, "learning_rate": 3.317687831634491e-06, "loss": 0.3786, "step": 7775 }, { "epoch": 0.5312564050010248, "grad_norm": 3.773648738861084, "learning_rate": 3.316914387692951e-06, "loss": 0.2846, "step": 7776 }, { "epoch": 0.531324725011956, "grad_norm": 3.8634912967681885, "learning_rate": 3.316140952716629e-06, "loss": 0.2916, "step": 7777 }, { "epoch": 0.5313930450228872, "grad_norm": 6.1987128257751465, "learning_rate": 3.3153675267433987e-06, "loss": 0.4055, "step": 7778 }, { "epoch": 0.5314613650338184, "grad_norm": 3.8572235107421875, "learning_rate": 3.314594109811128e-06, "loss": 0.3013, "step": 7779 }, { "epoch": 0.5315296850447496, "grad_norm": 4.503528118133545, "learning_rate": 3.313820701957693e-06, "loss": 0.2647, "step": 7780 }, { "epoch": 0.5315980050556808, "grad_norm": 5.315959930419922, "learning_rate": 3.3130473032209643e-06, "loss": 0.4254, "step": 7781 }, { "epoch": 0.531666325066612, "grad_norm": 4.036839962005615, "learning_rate": 3.312273913638814e-06, "loss": 0.2739, "step": 7782 }, { "epoch": 0.5317346450775432, "grad_norm": 3.523087978363037, "learning_rate": 3.3115005332491116e-06, "loss": 0.3761, "step": 7783 }, { "epoch": 0.5318029650884745, "grad_norm": 3.9088048934936523, "learning_rate": 3.3107271620897283e-06, "loss": 0.3605, "step": 7784 }, { "epoch": 0.5318712850994056, "grad_norm": 3.963099241256714, "learning_rate": 3.3099538001985333e-06, "loss": 0.4421, "step": 7785 }, { "epoch": 0.5319396051103368, "grad_norm": 3.869957208633423, "learning_rate": 3.3091804476133975e-06, "loss": 0.36, "step": 7786 }, { "epoch": 0.532007925121268, "grad_norm": 4.614005088806152, "learning_rate": 3.3084071043721887e-06, "loss": 0.3746, "step": 7787 }, { "epoch": 0.5320762451321992, "grad_norm": 4.629911422729492, "learning_rate": 3.307633770512775e-06, "loss": 0.409, "step": 7788 }, { "epoch": 0.5321445651431305, "grad_norm": 3.547745943069458, "learning_rate": 3.3068604460730256e-06, "loss": 0.3748, "step": 7789 }, { "epoch": 0.5322128851540616, "grad_norm": 3.369039297103882, "learning_rate": 3.306087131090809e-06, "loss": 0.2971, "step": 7790 }, { "epoch": 0.5322812051649928, "grad_norm": 4.3983941078186035, "learning_rate": 3.30531382560399e-06, "loss": 0.3913, "step": 7791 }, { "epoch": 0.532349525175924, "grad_norm": 3.7758569717407227, "learning_rate": 3.304540529650437e-06, "loss": 0.3334, "step": 7792 }, { "epoch": 0.5324178451868552, "grad_norm": 3.213183641433716, "learning_rate": 3.3037672432680142e-06, "loss": 0.2841, "step": 7793 }, { "epoch": 0.5324861651977865, "grad_norm": 3.948366403579712, "learning_rate": 3.3029939664945917e-06, "loss": 0.2264, "step": 7794 }, { "epoch": 0.5325544852087176, "grad_norm": 4.357192516326904, "learning_rate": 3.3022206993680313e-06, "loss": 0.3495, "step": 7795 }, { "epoch": 0.5326228052196489, "grad_norm": 3.0197181701660156, "learning_rate": 3.301447441926197e-06, "loss": 0.241, "step": 7796 }, { "epoch": 0.53269112523058, "grad_norm": 3.479405641555786, "learning_rate": 3.3006741942069558e-06, "loss": 0.2531, "step": 7797 }, { "epoch": 0.5327594452415112, "grad_norm": 4.131915092468262, "learning_rate": 3.2999009562481722e-06, "loss": 0.374, "step": 7798 }, { "epoch": 0.5328277652524425, "grad_norm": 4.2053399085998535, "learning_rate": 3.2991277280877057e-06, "loss": 0.4169, "step": 7799 }, { "epoch": 0.5328960852633736, "grad_norm": 4.791285991668701, "learning_rate": 3.298354509763422e-06, "loss": 0.4872, "step": 7800 }, { "epoch": 0.5329644052743049, "grad_norm": 4.181192398071289, "learning_rate": 3.297581301313183e-06, "loss": 0.3299, "step": 7801 }, { "epoch": 0.533032725285236, "grad_norm": 4.8290863037109375, "learning_rate": 3.296808102774852e-06, "loss": 0.3608, "step": 7802 }, { "epoch": 0.5331010452961672, "grad_norm": 3.096436023712158, "learning_rate": 3.2960349141862885e-06, "loss": 0.3194, "step": 7803 }, { "epoch": 0.5331693653070985, "grad_norm": 4.084928512573242, "learning_rate": 3.295261735585354e-06, "loss": 0.367, "step": 7804 }, { "epoch": 0.5332376853180296, "grad_norm": 2.752218008041382, "learning_rate": 3.294488567009909e-06, "loss": 0.2935, "step": 7805 }, { "epoch": 0.5333060053289609, "grad_norm": 4.231423854827881, "learning_rate": 3.293715408497815e-06, "loss": 0.3234, "step": 7806 }, { "epoch": 0.533374325339892, "grad_norm": 3.3743271827697754, "learning_rate": 3.292942260086929e-06, "loss": 0.2595, "step": 7807 }, { "epoch": 0.5334426453508233, "grad_norm": 4.1240458488464355, "learning_rate": 3.2921691218151114e-06, "loss": 0.4189, "step": 7808 }, { "epoch": 0.5335109653617545, "grad_norm": 4.541742324829102, "learning_rate": 3.291395993720221e-06, "loss": 0.4163, "step": 7809 }, { "epoch": 0.5335792853726856, "grad_norm": 3.3146262168884277, "learning_rate": 3.290622875840116e-06, "loss": 0.2887, "step": 7810 }, { "epoch": 0.5336476053836169, "grad_norm": 4.179045677185059, "learning_rate": 3.2898497682126522e-06, "loss": 0.1893, "step": 7811 }, { "epoch": 0.533715925394548, "grad_norm": 3.8991193771362305, "learning_rate": 3.2890766708756888e-06, "loss": 0.3726, "step": 7812 }, { "epoch": 0.5337842454054793, "grad_norm": 5.064934730529785, "learning_rate": 3.2883035838670796e-06, "loss": 0.3881, "step": 7813 }, { "epoch": 0.5338525654164105, "grad_norm": 3.2540283203125, "learning_rate": 3.2875305072246853e-06, "loss": 0.3689, "step": 7814 }, { "epoch": 0.5339208854273416, "grad_norm": 4.104471206665039, "learning_rate": 3.2867574409863557e-06, "loss": 0.3337, "step": 7815 }, { "epoch": 0.5339892054382729, "grad_norm": 3.8978683948516846, "learning_rate": 3.2859843851899496e-06, "loss": 0.377, "step": 7816 }, { "epoch": 0.534057525449204, "grad_norm": 4.026658058166504, "learning_rate": 3.2852113398733203e-06, "loss": 0.319, "step": 7817 }, { "epoch": 0.5341258454601353, "grad_norm": 4.878927230834961, "learning_rate": 3.2844383050743227e-06, "loss": 0.382, "step": 7818 }, { "epoch": 0.5341941654710665, "grad_norm": 2.802551031112671, "learning_rate": 3.283665280830809e-06, "loss": 0.29, "step": 7819 }, { "epoch": 0.5342624854819977, "grad_norm": 4.127768039703369, "learning_rate": 3.2828922671806327e-06, "loss": 0.3639, "step": 7820 }, { "epoch": 0.5343308054929289, "grad_norm": 4.260578155517578, "learning_rate": 3.282119264161646e-06, "loss": 0.3206, "step": 7821 }, { "epoch": 0.53439912550386, "grad_norm": 3.477389097213745, "learning_rate": 3.281346271811702e-06, "loss": 0.39, "step": 7822 }, { "epoch": 0.5344674455147913, "grad_norm": 3.785829544067383, "learning_rate": 3.2805732901686496e-06, "loss": 0.4917, "step": 7823 }, { "epoch": 0.5345357655257225, "grad_norm": 3.627668857574463, "learning_rate": 3.279800319270342e-06, "loss": 0.3227, "step": 7824 }, { "epoch": 0.5346040855366537, "grad_norm": 4.123161315917969, "learning_rate": 3.279027359154628e-06, "loss": 0.3565, "step": 7825 }, { "epoch": 0.5346724055475849, "grad_norm": 4.134273529052734, "learning_rate": 3.278254409859359e-06, "loss": 0.38, "step": 7826 }, { "epoch": 0.534740725558516, "grad_norm": 4.668335437774658, "learning_rate": 3.277481471422383e-06, "loss": 0.3428, "step": 7827 }, { "epoch": 0.5348090455694473, "grad_norm": 4.268277645111084, "learning_rate": 3.2767085438815488e-06, "loss": 0.312, "step": 7828 }, { "epoch": 0.5348773655803785, "grad_norm": 3.786025285720825, "learning_rate": 3.2759356272747042e-06, "loss": 0.3825, "step": 7829 }, { "epoch": 0.5349456855913097, "grad_norm": 4.0645599365234375, "learning_rate": 3.2751627216397e-06, "loss": 0.3241, "step": 7830 }, { "epoch": 0.5350140056022409, "grad_norm": 3.4742319583892822, "learning_rate": 3.274389827014379e-06, "loss": 0.295, "step": 7831 }, { "epoch": 0.5350823256131721, "grad_norm": 3.9804933071136475, "learning_rate": 3.273616943436589e-06, "loss": 0.2564, "step": 7832 }, { "epoch": 0.5351506456241033, "grad_norm": 4.171632289886475, "learning_rate": 3.272844070944178e-06, "loss": 0.3724, "step": 7833 }, { "epoch": 0.5352189656350345, "grad_norm": 4.853506565093994, "learning_rate": 3.272071209574992e-06, "loss": 0.3731, "step": 7834 }, { "epoch": 0.5352872856459657, "grad_norm": 4.459691524505615, "learning_rate": 3.271298359366871e-06, "loss": 0.3142, "step": 7835 }, { "epoch": 0.5353556056568969, "grad_norm": 4.51122522354126, "learning_rate": 3.2705255203576644e-06, "loss": 0.3304, "step": 7836 }, { "epoch": 0.5354239256678281, "grad_norm": 3.6533565521240234, "learning_rate": 3.269752692585214e-06, "loss": 0.259, "step": 7837 }, { "epoch": 0.5354922456787593, "grad_norm": 4.199973106384277, "learning_rate": 3.268979876087364e-06, "loss": 0.3018, "step": 7838 }, { "epoch": 0.5355605656896905, "grad_norm": 3.467709541320801, "learning_rate": 3.268207070901956e-06, "loss": 0.3329, "step": 7839 }, { "epoch": 0.5356288857006217, "grad_norm": 4.89674186706543, "learning_rate": 3.2674342770668334e-06, "loss": 0.35, "step": 7840 }, { "epoch": 0.5356972057115529, "grad_norm": 3.40598201751709, "learning_rate": 3.266661494619837e-06, "loss": 0.2434, "step": 7841 }, { "epoch": 0.5357655257224841, "grad_norm": 3.4955530166625977, "learning_rate": 3.2658887235988085e-06, "loss": 0.2555, "step": 7842 }, { "epoch": 0.5358338457334153, "grad_norm": 4.47878360748291, "learning_rate": 3.265115964041589e-06, "loss": 0.353, "step": 7843 }, { "epoch": 0.5359021657443466, "grad_norm": 4.01294469833374, "learning_rate": 3.2643432159860162e-06, "loss": 0.2624, "step": 7844 }, { "epoch": 0.5359704857552777, "grad_norm": 4.002775192260742, "learning_rate": 3.263570479469932e-06, "loss": 0.3088, "step": 7845 }, { "epoch": 0.5360388057662089, "grad_norm": 4.036348819732666, "learning_rate": 3.2627977545311727e-06, "loss": 0.3423, "step": 7846 }, { "epoch": 0.5361071257771401, "grad_norm": 3.800109386444092, "learning_rate": 3.262025041207581e-06, "loss": 0.2912, "step": 7847 }, { "epoch": 0.5361754457880713, "grad_norm": 4.3019514083862305, "learning_rate": 3.2612523395369897e-06, "loss": 0.3402, "step": 7848 }, { "epoch": 0.5362437657990026, "grad_norm": 4.185437202453613, "learning_rate": 3.260479649557238e-06, "loss": 0.4301, "step": 7849 }, { "epoch": 0.5363120858099337, "grad_norm": 3.8503806591033936, "learning_rate": 3.259706971306163e-06, "loss": 0.3397, "step": 7850 }, { "epoch": 0.5363804058208649, "grad_norm": 3.298854351043701, "learning_rate": 3.258934304821602e-06, "loss": 0.2629, "step": 7851 }, { "epoch": 0.5364487258317961, "grad_norm": 3.2231805324554443, "learning_rate": 3.2581616501413855e-06, "loss": 0.3042, "step": 7852 }, { "epoch": 0.5365170458427273, "grad_norm": 4.036623477935791, "learning_rate": 3.257389007303353e-06, "loss": 0.2151, "step": 7853 }, { "epoch": 0.5365853658536586, "grad_norm": 4.371991157531738, "learning_rate": 3.256616376345337e-06, "loss": 0.3289, "step": 7854 }, { "epoch": 0.5366536858645897, "grad_norm": 4.668450832366943, "learning_rate": 3.2558437573051725e-06, "loss": 0.3922, "step": 7855 }, { "epoch": 0.536722005875521, "grad_norm": 4.323612213134766, "learning_rate": 3.2550711502206908e-06, "loss": 0.3458, "step": 7856 }, { "epoch": 0.5367903258864521, "grad_norm": 4.679473400115967, "learning_rate": 3.2542985551297246e-06, "loss": 0.2896, "step": 7857 }, { "epoch": 0.5368586458973833, "grad_norm": 4.150384426116943, "learning_rate": 3.253525972070107e-06, "loss": 0.3585, "step": 7858 }, { "epoch": 0.5369269659083146, "grad_norm": 3.2638816833496094, "learning_rate": 3.252753401079669e-06, "loss": 0.2812, "step": 7859 }, { "epoch": 0.5369952859192457, "grad_norm": 5.286532878875732, "learning_rate": 3.2519808421962407e-06, "loss": 0.4872, "step": 7860 }, { "epoch": 0.537063605930177, "grad_norm": 4.4624762535095215, "learning_rate": 3.2512082954576528e-06, "loss": 0.2085, "step": 7861 }, { "epoch": 0.5371319259411081, "grad_norm": 3.5066280364990234, "learning_rate": 3.250435760901734e-06, "loss": 0.2905, "step": 7862 }, { "epoch": 0.5372002459520393, "grad_norm": 3.698664903640747, "learning_rate": 3.2496632385663163e-06, "loss": 0.2896, "step": 7863 }, { "epoch": 0.5372685659629706, "grad_norm": 6.694421291351318, "learning_rate": 3.248890728489224e-06, "loss": 0.3396, "step": 7864 }, { "epoch": 0.5373368859739017, "grad_norm": 5.3227858543396, "learning_rate": 3.2481182307082857e-06, "loss": 0.3801, "step": 7865 }, { "epoch": 0.537405205984833, "grad_norm": 3.5272204875946045, "learning_rate": 3.2473457452613305e-06, "loss": 0.285, "step": 7866 }, { "epoch": 0.5374735259957641, "grad_norm": 4.455972194671631, "learning_rate": 3.2465732721861855e-06, "loss": 0.2172, "step": 7867 }, { "epoch": 0.5375418460066954, "grad_norm": 4.253355503082275, "learning_rate": 3.245800811520673e-06, "loss": 0.3165, "step": 7868 }, { "epoch": 0.5376101660176266, "grad_norm": 3.974266529083252, "learning_rate": 3.245028363302621e-06, "loss": 0.3741, "step": 7869 }, { "epoch": 0.5376784860285577, "grad_norm": 3.7832159996032715, "learning_rate": 3.2442559275698536e-06, "loss": 0.3806, "step": 7870 }, { "epoch": 0.537746806039489, "grad_norm": 3.4196183681488037, "learning_rate": 3.2434835043601963e-06, "loss": 0.3204, "step": 7871 }, { "epoch": 0.5378151260504201, "grad_norm": 2.9506547451019287, "learning_rate": 3.2427110937114706e-06, "loss": 0.2581, "step": 7872 }, { "epoch": 0.5378834460613514, "grad_norm": 4.712214946746826, "learning_rate": 3.2419386956615e-06, "loss": 0.4096, "step": 7873 }, { "epoch": 0.5379517660722826, "grad_norm": 5.144037246704102, "learning_rate": 3.2411663102481065e-06, "loss": 0.3324, "step": 7874 }, { "epoch": 0.5380200860832137, "grad_norm": 4.509045600891113, "learning_rate": 3.2403939375091132e-06, "loss": 0.3544, "step": 7875 }, { "epoch": 0.538088406094145, "grad_norm": 3.72752046585083, "learning_rate": 3.23962157748234e-06, "loss": 0.2534, "step": 7876 }, { "epoch": 0.5381567261050761, "grad_norm": 4.534614562988281, "learning_rate": 3.238849230205607e-06, "loss": 0.3138, "step": 7877 }, { "epoch": 0.5382250461160074, "grad_norm": 4.456179141998291, "learning_rate": 3.2380768957167344e-06, "loss": 0.3156, "step": 7878 }, { "epoch": 0.5382933661269386, "grad_norm": 3.7145285606384277, "learning_rate": 3.237304574053542e-06, "loss": 0.3227, "step": 7879 }, { "epoch": 0.5383616861378698, "grad_norm": 4.631005764007568, "learning_rate": 3.2365322652538476e-06, "loss": 0.3458, "step": 7880 }, { "epoch": 0.538430006148801, "grad_norm": 4.570613384246826, "learning_rate": 3.235759969355469e-06, "loss": 0.3342, "step": 7881 }, { "epoch": 0.5384983261597321, "grad_norm": 4.4813618659973145, "learning_rate": 3.234987686396223e-06, "loss": 0.3208, "step": 7882 }, { "epoch": 0.5385666461706634, "grad_norm": 4.178490161895752, "learning_rate": 3.2342154164139297e-06, "loss": 0.3722, "step": 7883 }, { "epoch": 0.5386349661815946, "grad_norm": 3.899310827255249, "learning_rate": 3.2334431594464005e-06, "loss": 0.3347, "step": 7884 }, { "epoch": 0.5387032861925258, "grad_norm": 3.491511106491089, "learning_rate": 3.2326709155314522e-06, "loss": 0.345, "step": 7885 }, { "epoch": 0.538771606203457, "grad_norm": 3.52278995513916, "learning_rate": 3.2318986847069005e-06, "loss": 0.2966, "step": 7886 }, { "epoch": 0.5388399262143881, "grad_norm": 4.056612968444824, "learning_rate": 3.2311264670105606e-06, "loss": 0.339, "step": 7887 }, { "epoch": 0.5389082462253194, "grad_norm": 5.704577922821045, "learning_rate": 3.2303542624802426e-06, "loss": 0.3404, "step": 7888 }, { "epoch": 0.5389765662362506, "grad_norm": 3.180485725402832, "learning_rate": 3.2295820711537617e-06, "loss": 0.2933, "step": 7889 }, { "epoch": 0.5390448862471818, "grad_norm": 4.455805778503418, "learning_rate": 3.2288098930689295e-06, "loss": 0.401, "step": 7890 }, { "epoch": 0.539113206258113, "grad_norm": 3.941596031188965, "learning_rate": 3.228037728263558e-06, "loss": 0.3393, "step": 7891 }, { "epoch": 0.5391815262690443, "grad_norm": 3.5094404220581055, "learning_rate": 3.227265576775457e-06, "loss": 0.3733, "step": 7892 }, { "epoch": 0.5392498462799754, "grad_norm": 4.213711261749268, "learning_rate": 3.226493438642437e-06, "loss": 0.365, "step": 7893 }, { "epoch": 0.5393181662909066, "grad_norm": 3.3806819915771484, "learning_rate": 3.225721313902308e-06, "loss": 0.2704, "step": 7894 }, { "epoch": 0.5393864863018378, "grad_norm": 3.269170045852661, "learning_rate": 3.2249492025928797e-06, "loss": 0.2985, "step": 7895 }, { "epoch": 0.539454806312769, "grad_norm": 3.4524805545806885, "learning_rate": 3.224177104751958e-06, "loss": 0.218, "step": 7896 }, { "epoch": 0.5395231263237003, "grad_norm": 4.866065502166748, "learning_rate": 3.2234050204173525e-06, "loss": 0.4581, "step": 7897 }, { "epoch": 0.5395914463346314, "grad_norm": 4.817002773284912, "learning_rate": 3.222632949626869e-06, "loss": 0.2884, "step": 7898 }, { "epoch": 0.5396597663455626, "grad_norm": 4.421947479248047, "learning_rate": 3.2218608924183153e-06, "loss": 0.4424, "step": 7899 }, { "epoch": 0.5397280863564938, "grad_norm": 3.6746981143951416, "learning_rate": 3.221088848829495e-06, "loss": 0.2612, "step": 7900 }, { "epoch": 0.539796406367425, "grad_norm": 4.563771724700928, "learning_rate": 3.220316818898214e-06, "loss": 0.3738, "step": 7901 }, { "epoch": 0.5398647263783563, "grad_norm": 4.172909259796143, "learning_rate": 3.2195448026622755e-06, "loss": 0.3018, "step": 7902 }, { "epoch": 0.5399330463892874, "grad_norm": 3.8714964389801025, "learning_rate": 3.2187728001594866e-06, "loss": 0.2393, "step": 7903 }, { "epoch": 0.5400013664002187, "grad_norm": 3.70249080657959, "learning_rate": 3.2180008114276458e-06, "loss": 0.4036, "step": 7904 }, { "epoch": 0.5400696864111498, "grad_norm": 3.613454580307007, "learning_rate": 3.2172288365045575e-06, "loss": 0.2228, "step": 7905 }, { "epoch": 0.540138006422081, "grad_norm": 4.150599956512451, "learning_rate": 3.216456875428023e-06, "loss": 0.2903, "step": 7906 }, { "epoch": 0.5402063264330123, "grad_norm": 2.618269205093384, "learning_rate": 3.215684928235843e-06, "loss": 0.2441, "step": 7907 }, { "epoch": 0.5402746464439434, "grad_norm": 4.213080406188965, "learning_rate": 3.2149129949658194e-06, "loss": 0.2423, "step": 7908 }, { "epoch": 0.5403429664548747, "grad_norm": 5.21905517578125, "learning_rate": 3.2141410756557487e-06, "loss": 0.2867, "step": 7909 }, { "epoch": 0.5404112864658058, "grad_norm": 3.4346437454223633, "learning_rate": 3.213369170343432e-06, "loss": 0.2859, "step": 7910 }, { "epoch": 0.540479606476737, "grad_norm": 5.859413146972656, "learning_rate": 3.212597279066666e-06, "loss": 0.2847, "step": 7911 }, { "epoch": 0.5405479264876683, "grad_norm": 3.9078168869018555, "learning_rate": 3.2118254018632503e-06, "loss": 0.3071, "step": 7912 }, { "epoch": 0.5406162464985994, "grad_norm": 3.5459401607513428, "learning_rate": 3.2110535387709794e-06, "loss": 0.3702, "step": 7913 }, { "epoch": 0.5406845665095307, "grad_norm": 3.032686233520508, "learning_rate": 3.21028168982765e-06, "loss": 0.2319, "step": 7914 }, { "epoch": 0.5407528865204618, "grad_norm": 5.718920707702637, "learning_rate": 3.2095098550710565e-06, "loss": 0.3076, "step": 7915 }, { "epoch": 0.5408212065313931, "grad_norm": 4.049156665802002, "learning_rate": 3.208738034538998e-06, "loss": 0.2934, "step": 7916 }, { "epoch": 0.5408895265423243, "grad_norm": 3.8593833446502686, "learning_rate": 3.2079662282692636e-06, "loss": 0.3048, "step": 7917 }, { "epoch": 0.5409578465532554, "grad_norm": 4.732236862182617, "learning_rate": 3.2071944362996473e-06, "loss": 0.3754, "step": 7918 }, { "epoch": 0.5410261665641867, "grad_norm": 4.086203575134277, "learning_rate": 3.2064226586679437e-06, "loss": 0.3583, "step": 7919 }, { "epoch": 0.5410944865751178, "grad_norm": 4.981455326080322, "learning_rate": 3.2056508954119455e-06, "loss": 0.3015, "step": 7920 }, { "epoch": 0.5411628065860491, "grad_norm": 3.4756603240966797, "learning_rate": 3.20487914656944e-06, "loss": 0.3013, "step": 7921 }, { "epoch": 0.5412311265969802, "grad_norm": 3.6459579467773438, "learning_rate": 3.20410741217822e-06, "loss": 0.3473, "step": 7922 }, { "epoch": 0.5412994466079114, "grad_norm": 5.562938213348389, "learning_rate": 3.2033356922760758e-06, "loss": 0.4336, "step": 7923 }, { "epoch": 0.5413677666188427, "grad_norm": 3.691251039505005, "learning_rate": 3.2025639869007963e-06, "loss": 0.2117, "step": 7924 }, { "epoch": 0.5414360866297738, "grad_norm": 4.094139575958252, "learning_rate": 3.2017922960901688e-06, "loss": 0.3167, "step": 7925 }, { "epoch": 0.5415044066407051, "grad_norm": 3.9647910594940186, "learning_rate": 3.201020619881981e-06, "loss": 0.2931, "step": 7926 }, { "epoch": 0.5415727266516362, "grad_norm": 3.1209640502929688, "learning_rate": 3.200248958314021e-06, "loss": 0.2853, "step": 7927 }, { "epoch": 0.5416410466625675, "grad_norm": 3.462228298187256, "learning_rate": 3.199477311424075e-06, "loss": 0.3265, "step": 7928 }, { "epoch": 0.5417093666734987, "grad_norm": 3.6599178314208984, "learning_rate": 3.1987056792499267e-06, "loss": 0.3345, "step": 7929 }, { "epoch": 0.5417776866844298, "grad_norm": 4.359864711761475, "learning_rate": 3.197934061829363e-06, "loss": 0.3439, "step": 7930 }, { "epoch": 0.5418460066953611, "grad_norm": 5.179888725280762, "learning_rate": 3.1971624592001665e-06, "loss": 0.4039, "step": 7931 }, { "epoch": 0.5419143267062922, "grad_norm": 4.3910136222839355, "learning_rate": 3.196390871400122e-06, "loss": 0.3871, "step": 7932 }, { "epoch": 0.5419826467172235, "grad_norm": 4.214033126831055, "learning_rate": 3.19561929846701e-06, "loss": 0.3668, "step": 7933 }, { "epoch": 0.5420509667281547, "grad_norm": 4.394619941711426, "learning_rate": 3.194847740438614e-06, "loss": 0.4119, "step": 7934 }, { "epoch": 0.5421192867390858, "grad_norm": 6.241683483123779, "learning_rate": 3.1940761973527134e-06, "loss": 0.4674, "step": 7935 }, { "epoch": 0.5421876067500171, "grad_norm": 5.728989601135254, "learning_rate": 3.193304669247093e-06, "loss": 0.4783, "step": 7936 }, { "epoch": 0.5422559267609482, "grad_norm": 3.5315418243408203, "learning_rate": 3.1925331561595276e-06, "loss": 0.3062, "step": 7937 }, { "epoch": 0.5423242467718795, "grad_norm": 3.970341920852661, "learning_rate": 3.191761658127797e-06, "loss": 0.3414, "step": 7938 }, { "epoch": 0.5423925667828107, "grad_norm": 3.7639660835266113, "learning_rate": 3.1909901751896817e-06, "loss": 0.277, "step": 7939 }, { "epoch": 0.5424608867937419, "grad_norm": 3.762831211090088, "learning_rate": 3.1902187073829586e-06, "loss": 0.3226, "step": 7940 }, { "epoch": 0.5425292068046731, "grad_norm": 3.9819514751434326, "learning_rate": 3.1894472547454017e-06, "loss": 0.3416, "step": 7941 }, { "epoch": 0.5425975268156042, "grad_norm": 4.089503288269043, "learning_rate": 3.1886758173147895e-06, "loss": 0.3805, "step": 7942 }, { "epoch": 0.5426658468265355, "grad_norm": 3.4952926635742188, "learning_rate": 3.187904395128897e-06, "loss": 0.2227, "step": 7943 }, { "epoch": 0.5427341668374667, "grad_norm": 4.099998950958252, "learning_rate": 3.1871329882254994e-06, "loss": 0.3568, "step": 7944 }, { "epoch": 0.5428024868483979, "grad_norm": 3.582082509994507, "learning_rate": 3.1863615966423687e-06, "loss": 0.2791, "step": 7945 }, { "epoch": 0.5428708068593291, "grad_norm": 4.465821743011475, "learning_rate": 3.1855902204172782e-06, "loss": 0.3289, "step": 7946 }, { "epoch": 0.5429391268702602, "grad_norm": 4.022528648376465, "learning_rate": 3.184818859588001e-06, "loss": 0.3444, "step": 7947 }, { "epoch": 0.5430074468811915, "grad_norm": 4.113120079040527, "learning_rate": 3.1840475141923088e-06, "loss": 0.3884, "step": 7948 }, { "epoch": 0.5430757668921227, "grad_norm": 3.340487003326416, "learning_rate": 3.1832761842679712e-06, "loss": 0.2486, "step": 7949 }, { "epoch": 0.5431440869030539, "grad_norm": 3.2544949054718018, "learning_rate": 3.1825048698527583e-06, "loss": 0.3049, "step": 7950 }, { "epoch": 0.5432124069139851, "grad_norm": 3.743516206741333, "learning_rate": 3.1817335709844403e-06, "loss": 0.3977, "step": 7951 }, { "epoch": 0.5432807269249164, "grad_norm": 3.852329969406128, "learning_rate": 3.1809622877007857e-06, "loss": 0.2879, "step": 7952 }, { "epoch": 0.5433490469358475, "grad_norm": 3.6425771713256836, "learning_rate": 3.1801910200395603e-06, "loss": 0.2605, "step": 7953 }, { "epoch": 0.5434173669467787, "grad_norm": 3.6771628856658936, "learning_rate": 3.179419768038532e-06, "loss": 0.3267, "step": 7954 }, { "epoch": 0.5434856869577099, "grad_norm": 3.907130718231201, "learning_rate": 3.1786485317354685e-06, "loss": 0.3945, "step": 7955 }, { "epoch": 0.5435540069686411, "grad_norm": 2.8514139652252197, "learning_rate": 3.177877311168135e-06, "loss": 0.2832, "step": 7956 }, { "epoch": 0.5436223269795724, "grad_norm": 3.27346134185791, "learning_rate": 3.177106106374293e-06, "loss": 0.2581, "step": 7957 }, { "epoch": 0.5436906469905035, "grad_norm": 3.8206982612609863, "learning_rate": 3.176334917391709e-06, "loss": 0.2737, "step": 7958 }, { "epoch": 0.5437589670014347, "grad_norm": 4.796537399291992, "learning_rate": 3.175563744258146e-06, "loss": 0.4732, "step": 7959 }, { "epoch": 0.5438272870123659, "grad_norm": 3.152630090713501, "learning_rate": 3.174792587011366e-06, "loss": 0.2724, "step": 7960 }, { "epoch": 0.5438956070232971, "grad_norm": 3.9522769451141357, "learning_rate": 3.17402144568913e-06, "loss": 0.33, "step": 7961 }, { "epoch": 0.5439639270342284, "grad_norm": 4.702630519866943, "learning_rate": 3.173250320329199e-06, "loss": 0.4069, "step": 7962 }, { "epoch": 0.5440322470451595, "grad_norm": 3.732311725616455, "learning_rate": 3.1724792109693334e-06, "loss": 0.2493, "step": 7963 }, { "epoch": 0.5441005670560908, "grad_norm": 3.534815788269043, "learning_rate": 3.1717081176472926e-06, "loss": 0.2976, "step": 7964 }, { "epoch": 0.5441688870670219, "grad_norm": 3.7081477642059326, "learning_rate": 3.1709370404008335e-06, "loss": 0.2896, "step": 7965 }, { "epoch": 0.5442372070779531, "grad_norm": 4.514017581939697, "learning_rate": 3.1701659792677154e-06, "loss": 0.4872, "step": 7966 }, { "epoch": 0.5443055270888844, "grad_norm": 4.2601847648620605, "learning_rate": 3.1693949342856938e-06, "loss": 0.3562, "step": 7967 }, { "epoch": 0.5443738470998155, "grad_norm": 4.2922234535217285, "learning_rate": 3.1686239054925258e-06, "loss": 0.4369, "step": 7968 }, { "epoch": 0.5444421671107468, "grad_norm": 4.335413932800293, "learning_rate": 3.167852892925966e-06, "loss": 0.2378, "step": 7969 }, { "epoch": 0.5445104871216779, "grad_norm": 4.553098678588867, "learning_rate": 3.1670818966237687e-06, "loss": 0.3511, "step": 7970 }, { "epoch": 0.5445788071326091, "grad_norm": 4.9346513748168945, "learning_rate": 3.166310916623687e-06, "loss": 0.3266, "step": 7971 }, { "epoch": 0.5446471271435404, "grad_norm": 3.700324296951294, "learning_rate": 3.165539952963477e-06, "loss": 0.3062, "step": 7972 }, { "epoch": 0.5447154471544715, "grad_norm": 3.6162145137786865, "learning_rate": 3.164769005680887e-06, "loss": 0.304, "step": 7973 }, { "epoch": 0.5447837671654028, "grad_norm": 3.951674699783325, "learning_rate": 3.1639980748136684e-06, "loss": 0.316, "step": 7974 }, { "epoch": 0.5448520871763339, "grad_norm": 4.058411121368408, "learning_rate": 3.163227160399574e-06, "loss": 0.2373, "step": 7975 }, { "epoch": 0.5449204071872652, "grad_norm": 4.044488906860352, "learning_rate": 3.162456262476352e-06, "loss": 0.3598, "step": 7976 }, { "epoch": 0.5449887271981964, "grad_norm": 3.7000479698181152, "learning_rate": 3.161685381081752e-06, "loss": 0.3763, "step": 7977 }, { "epoch": 0.5450570472091275, "grad_norm": 3.2675230503082275, "learning_rate": 3.160914516253521e-06, "loss": 0.2998, "step": 7978 }, { "epoch": 0.5451253672200588, "grad_norm": 4.990968704223633, "learning_rate": 3.160143668029407e-06, "loss": 0.3038, "step": 7979 }, { "epoch": 0.5451936872309899, "grad_norm": 3.726627826690674, "learning_rate": 3.159372836447156e-06, "loss": 0.2986, "step": 7980 }, { "epoch": 0.5452620072419212, "grad_norm": 3.8608336448669434, "learning_rate": 3.158602021544514e-06, "loss": 0.2738, "step": 7981 }, { "epoch": 0.5453303272528524, "grad_norm": 4.844053268432617, "learning_rate": 3.157831223359225e-06, "loss": 0.4287, "step": 7982 }, { "epoch": 0.5453986472637835, "grad_norm": 4.001692295074463, "learning_rate": 3.1570604419290327e-06, "loss": 0.4336, "step": 7983 }, { "epoch": 0.5454669672747148, "grad_norm": 4.073052883148193, "learning_rate": 3.1562896772916816e-06, "loss": 0.3205, "step": 7984 }, { "epoch": 0.5455352872856459, "grad_norm": 4.642635822296143, "learning_rate": 3.1555189294849137e-06, "loss": 0.228, "step": 7985 }, { "epoch": 0.5456036072965772, "grad_norm": 4.9966139793396, "learning_rate": 3.15474819854647e-06, "loss": 0.416, "step": 7986 }, { "epoch": 0.5456719273075084, "grad_norm": 5.285464286804199, "learning_rate": 3.1539774845140904e-06, "loss": 0.3748, "step": 7987 }, { "epoch": 0.5457402473184396, "grad_norm": 3.9573941230773926, "learning_rate": 3.153206787425515e-06, "loss": 0.3387, "step": 7988 }, { "epoch": 0.5458085673293708, "grad_norm": 3.9351084232330322, "learning_rate": 3.152436107318486e-06, "loss": 0.259, "step": 7989 }, { "epoch": 0.5458768873403019, "grad_norm": 4.015241622924805, "learning_rate": 3.151665444230737e-06, "loss": 0.3626, "step": 7990 }, { "epoch": 0.5459452073512332, "grad_norm": 3.3688559532165527, "learning_rate": 3.1508947982000067e-06, "loss": 0.2696, "step": 7991 }, { "epoch": 0.5460135273621644, "grad_norm": 4.08907413482666, "learning_rate": 3.1501241692640325e-06, "loss": 0.2919, "step": 7992 }, { "epoch": 0.5460818473730956, "grad_norm": 4.654088497161865, "learning_rate": 3.149353557460551e-06, "loss": 0.3271, "step": 7993 }, { "epoch": 0.5461501673840268, "grad_norm": 5.256798267364502, "learning_rate": 3.148582962827294e-06, "loss": 0.3534, "step": 7994 }, { "epoch": 0.5462184873949579, "grad_norm": 4.2688212394714355, "learning_rate": 3.1478123854019977e-06, "loss": 0.315, "step": 7995 }, { "epoch": 0.5462868074058892, "grad_norm": 3.496084690093994, "learning_rate": 3.1470418252223945e-06, "loss": 0.2769, "step": 7996 }, { "epoch": 0.5463551274168204, "grad_norm": 3.679781436920166, "learning_rate": 3.146271282326218e-06, "loss": 0.3397, "step": 7997 }, { "epoch": 0.5464234474277516, "grad_norm": 4.918313026428223, "learning_rate": 3.1455007567511975e-06, "loss": 0.404, "step": 7998 }, { "epoch": 0.5464917674386828, "grad_norm": 4.427599906921387, "learning_rate": 3.1447302485350645e-06, "loss": 0.2494, "step": 7999 }, { "epoch": 0.546560087449614, "grad_norm": 3.7917633056640625, "learning_rate": 3.143959757715549e-06, "loss": 0.3265, "step": 8000 }, { "epoch": 0.5466284074605452, "grad_norm": 3.2612013816833496, "learning_rate": 3.143189284330381e-06, "loss": 0.2648, "step": 8001 }, { "epoch": 0.5466967274714764, "grad_norm": 3.873225212097168, "learning_rate": 3.142418828417286e-06, "loss": 0.231, "step": 8002 }, { "epoch": 0.5467650474824076, "grad_norm": 4.487840175628662, "learning_rate": 3.1416483900139923e-06, "loss": 0.2968, "step": 8003 }, { "epoch": 0.5468333674933388, "grad_norm": 4.196500778198242, "learning_rate": 3.140877969158226e-06, "loss": 0.3136, "step": 8004 }, { "epoch": 0.54690168750427, "grad_norm": 3.9649388790130615, "learning_rate": 3.140107565887715e-06, "loss": 0.3587, "step": 8005 }, { "epoch": 0.5469700075152012, "grad_norm": 4.676752090454102, "learning_rate": 3.1393371802401808e-06, "loss": 0.2989, "step": 8006 }, { "epoch": 0.5470383275261324, "grad_norm": 4.278329849243164, "learning_rate": 3.138566812253347e-06, "loss": 0.3646, "step": 8007 }, { "epoch": 0.5471066475370636, "grad_norm": 3.483996629714966, "learning_rate": 3.1377964619649383e-06, "loss": 0.3284, "step": 8008 }, { "epoch": 0.5471749675479948, "grad_norm": 4.106455326080322, "learning_rate": 3.137026129412678e-06, "loss": 0.335, "step": 8009 }, { "epoch": 0.547243287558926, "grad_norm": 3.0926239490509033, "learning_rate": 3.136255814634283e-06, "loss": 0.2706, "step": 8010 }, { "epoch": 0.5473116075698572, "grad_norm": 5.099630832672119, "learning_rate": 3.1354855176674765e-06, "loss": 0.4529, "step": 8011 }, { "epoch": 0.5473799275807885, "grad_norm": 4.471615791320801, "learning_rate": 3.134715238549977e-06, "loss": 0.3396, "step": 8012 }, { "epoch": 0.5474482475917196, "grad_norm": 3.6171345710754395, "learning_rate": 3.133944977319505e-06, "loss": 0.3717, "step": 8013 }, { "epoch": 0.5475165676026508, "grad_norm": 4.272401809692383, "learning_rate": 3.1331747340137753e-06, "loss": 0.2759, "step": 8014 }, { "epoch": 0.547584887613582, "grad_norm": 3.467745065689087, "learning_rate": 3.1324045086705055e-06, "loss": 0.3662, "step": 8015 }, { "epoch": 0.5476532076245132, "grad_norm": 4.03273344039917, "learning_rate": 3.131634301327412e-06, "loss": 0.3205, "step": 8016 }, { "epoch": 0.5477215276354445, "grad_norm": 3.8837249279022217, "learning_rate": 3.1308641120222103e-06, "loss": 0.4507, "step": 8017 }, { "epoch": 0.5477898476463756, "grad_norm": 4.074132442474365, "learning_rate": 3.1300939407926136e-06, "loss": 0.4611, "step": 8018 }, { "epoch": 0.5478581676573068, "grad_norm": 3.7334823608398438, "learning_rate": 3.129323787676335e-06, "loss": 0.3303, "step": 8019 }, { "epoch": 0.547926487668238, "grad_norm": 5.215118885040283, "learning_rate": 3.1285536527110876e-06, "loss": 0.3525, "step": 8020 }, { "epoch": 0.5479948076791692, "grad_norm": 3.114966630935669, "learning_rate": 3.1277835359345827e-06, "loss": 0.2907, "step": 8021 }, { "epoch": 0.5480631276901005, "grad_norm": 4.168737888336182, "learning_rate": 3.1270134373845307e-06, "loss": 0.3007, "step": 8022 }, { "epoch": 0.5481314477010316, "grad_norm": 4.264991283416748, "learning_rate": 3.1262433570986407e-06, "loss": 0.3524, "step": 8023 }, { "epoch": 0.5481997677119629, "grad_norm": 3.431309461593628, "learning_rate": 3.125473295114622e-06, "loss": 0.2796, "step": 8024 }, { "epoch": 0.548268087722894, "grad_norm": 3.679172992706299, "learning_rate": 3.1247032514701846e-06, "loss": 0.3334, "step": 8025 }, { "epoch": 0.5483364077338252, "grad_norm": 4.583754062652588, "learning_rate": 3.1239332262030316e-06, "loss": 0.3375, "step": 8026 }, { "epoch": 0.5484047277447565, "grad_norm": 4.470722675323486, "learning_rate": 3.1231632193508704e-06, "loss": 0.3494, "step": 8027 }, { "epoch": 0.5484730477556876, "grad_norm": 4.0436930656433105, "learning_rate": 3.1223932309514075e-06, "loss": 0.2632, "step": 8028 }, { "epoch": 0.5485413677666189, "grad_norm": 3.551726818084717, "learning_rate": 3.121623261042348e-06, "loss": 0.192, "step": 8029 }, { "epoch": 0.54860968777755, "grad_norm": 3.314100980758667, "learning_rate": 3.1208533096613917e-06, "loss": 0.2034, "step": 8030 }, { "epoch": 0.5486780077884812, "grad_norm": 4.3109965324401855, "learning_rate": 3.1200833768462433e-06, "loss": 0.2513, "step": 8031 }, { "epoch": 0.5487463277994125, "grad_norm": 4.579052925109863, "learning_rate": 3.1193134626346046e-06, "loss": 0.3808, "step": 8032 }, { "epoch": 0.5488146478103436, "grad_norm": 2.8679380416870117, "learning_rate": 3.1185435670641768e-06, "loss": 0.1959, "step": 8033 }, { "epoch": 0.5488829678212749, "grad_norm": 3.690890073776245, "learning_rate": 3.1177736901726576e-06, "loss": 0.4178, "step": 8034 }, { "epoch": 0.548951287832206, "grad_norm": 4.0724639892578125, "learning_rate": 3.117003831997747e-06, "loss": 0.3574, "step": 8035 }, { "epoch": 0.5490196078431373, "grad_norm": 3.9871513843536377, "learning_rate": 3.116233992577143e-06, "loss": 0.2373, "step": 8036 }, { "epoch": 0.5490879278540685, "grad_norm": 5.383340835571289, "learning_rate": 3.115464171948543e-06, "loss": 0.3963, "step": 8037 }, { "epoch": 0.5491562478649996, "grad_norm": 4.4601149559021, "learning_rate": 3.1146943701496416e-06, "loss": 0.2577, "step": 8038 }, { "epoch": 0.5492245678759309, "grad_norm": 3.873652458190918, "learning_rate": 3.113924587218135e-06, "loss": 0.3236, "step": 8039 }, { "epoch": 0.549292887886862, "grad_norm": 4.341653347015381, "learning_rate": 3.1131548231917173e-06, "loss": 0.308, "step": 8040 }, { "epoch": 0.5493612078977933, "grad_norm": 4.181788921356201, "learning_rate": 3.112385078108081e-06, "loss": 0.3531, "step": 8041 }, { "epoch": 0.5494295279087245, "grad_norm": 3.9429965019226074, "learning_rate": 3.1116153520049215e-06, "loss": 0.3079, "step": 8042 }, { "epoch": 0.5494978479196556, "grad_norm": 3.598270893096924, "learning_rate": 3.1108456449199255e-06, "loss": 0.2298, "step": 8043 }, { "epoch": 0.5495661679305869, "grad_norm": 6.435648441314697, "learning_rate": 3.1100759568907874e-06, "loss": 0.3553, "step": 8044 }, { "epoch": 0.549634487941518, "grad_norm": 5.305697917938232, "learning_rate": 3.109306287955195e-06, "loss": 0.3706, "step": 8045 }, { "epoch": 0.5497028079524493, "grad_norm": 3.9601752758026123, "learning_rate": 3.108536638150838e-06, "loss": 0.321, "step": 8046 }, { "epoch": 0.5497711279633805, "grad_norm": 3.3165643215179443, "learning_rate": 3.107767007515403e-06, "loss": 0.2865, "step": 8047 }, { "epoch": 0.5498394479743117, "grad_norm": 4.075982570648193, "learning_rate": 3.106997396086577e-06, "loss": 0.3288, "step": 8048 }, { "epoch": 0.5499077679852429, "grad_norm": 3.129127025604248, "learning_rate": 3.106227803902046e-06, "loss": 0.2751, "step": 8049 }, { "epoch": 0.549976087996174, "grad_norm": 3.406142473220825, "learning_rate": 3.105458230999496e-06, "loss": 0.2826, "step": 8050 }, { "epoch": 0.5500444080071053, "grad_norm": 3.250871181488037, "learning_rate": 3.1046886774166085e-06, "loss": 0.2806, "step": 8051 }, { "epoch": 0.5501127280180365, "grad_norm": 3.479224681854248, "learning_rate": 3.1039191431910683e-06, "loss": 0.2403, "step": 8052 }, { "epoch": 0.5501810480289677, "grad_norm": 4.329466342926025, "learning_rate": 3.103149628360557e-06, "loss": 0.3842, "step": 8053 }, { "epoch": 0.5502493680398989, "grad_norm": 4.7724223136901855, "learning_rate": 3.1023801329627568e-06, "loss": 0.2731, "step": 8054 }, { "epoch": 0.55031768805083, "grad_norm": 5.321407318115234, "learning_rate": 3.1016106570353455e-06, "loss": 0.3589, "step": 8055 }, { "epoch": 0.5503860080617613, "grad_norm": 4.5219407081604, "learning_rate": 3.100841200616004e-06, "loss": 0.212, "step": 8056 }, { "epoch": 0.5504543280726925, "grad_norm": 3.9102959632873535, "learning_rate": 3.100071763742409e-06, "loss": 0.2187, "step": 8057 }, { "epoch": 0.5505226480836237, "grad_norm": 4.9124345779418945, "learning_rate": 3.0993023464522414e-06, "loss": 0.3923, "step": 8058 }, { "epoch": 0.5505909680945549, "grad_norm": 3.5795440673828125, "learning_rate": 3.098532948783173e-06, "loss": 0.2687, "step": 8059 }, { "epoch": 0.5506592881054861, "grad_norm": 3.510392904281616, "learning_rate": 3.097763570772881e-06, "loss": 0.3028, "step": 8060 }, { "epoch": 0.5507276081164173, "grad_norm": 3.8745644092559814, "learning_rate": 3.096994212459041e-06, "loss": 0.3451, "step": 8061 }, { "epoch": 0.5507959281273485, "grad_norm": 3.906723737716675, "learning_rate": 3.0962248738793265e-06, "loss": 0.3402, "step": 8062 }, { "epoch": 0.5508642481382797, "grad_norm": 3.9306325912475586, "learning_rate": 3.0954555550714067e-06, "loss": 0.3335, "step": 8063 }, { "epoch": 0.5509325681492109, "grad_norm": 4.674007892608643, "learning_rate": 3.0946862560729557e-06, "loss": 0.3408, "step": 8064 }, { "epoch": 0.5510008881601421, "grad_norm": 3.852562665939331, "learning_rate": 3.093916976921644e-06, "loss": 0.3616, "step": 8065 }, { "epoch": 0.5510692081710733, "grad_norm": 3.6315577030181885, "learning_rate": 3.0931477176551413e-06, "loss": 0.4098, "step": 8066 }, { "epoch": 0.5511375281820045, "grad_norm": 4.155092239379883, "learning_rate": 3.092378478311115e-06, "loss": 0.3041, "step": 8067 }, { "epoch": 0.5512058481929357, "grad_norm": 3.2437944412231445, "learning_rate": 3.0916092589272335e-06, "loss": 0.3137, "step": 8068 }, { "epoch": 0.5512741682038669, "grad_norm": 5.335827827453613, "learning_rate": 3.0908400595411633e-06, "loss": 0.2891, "step": 8069 }, { "epoch": 0.5513424882147981, "grad_norm": 4.502349376678467, "learning_rate": 3.0900708801905707e-06, "loss": 0.3052, "step": 8070 }, { "epoch": 0.5514108082257293, "grad_norm": 5.828493118286133, "learning_rate": 3.0893017209131194e-06, "loss": 0.4274, "step": 8071 }, { "epoch": 0.5514791282366606, "grad_norm": 5.5807342529296875, "learning_rate": 3.088532581746473e-06, "loss": 0.4553, "step": 8072 }, { "epoch": 0.5515474482475917, "grad_norm": 3.654435634613037, "learning_rate": 3.0877634627282947e-06, "loss": 0.3174, "step": 8073 }, { "epoch": 0.5516157682585229, "grad_norm": 4.428308010101318, "learning_rate": 3.0869943638962467e-06, "loss": 0.3602, "step": 8074 }, { "epoch": 0.5516840882694541, "grad_norm": 4.535929203033447, "learning_rate": 3.0862252852879886e-06, "loss": 0.2711, "step": 8075 }, { "epoch": 0.5517524082803853, "grad_norm": 2.861992597579956, "learning_rate": 3.085456226941181e-06, "loss": 0.2354, "step": 8076 }, { "epoch": 0.5518207282913166, "grad_norm": 3.9985134601593018, "learning_rate": 3.0846871888934813e-06, "loss": 0.3002, "step": 8077 }, { "epoch": 0.5518890483022477, "grad_norm": 4.541345119476318, "learning_rate": 3.0839181711825506e-06, "loss": 0.3505, "step": 8078 }, { "epoch": 0.5519573683131789, "grad_norm": 4.415392875671387, "learning_rate": 3.083149173846042e-06, "loss": 0.3482, "step": 8079 }, { "epoch": 0.5520256883241101, "grad_norm": 4.374582767486572, "learning_rate": 3.0823801969216113e-06, "loss": 0.4162, "step": 8080 }, { "epoch": 0.5520940083350413, "grad_norm": 4.886824131011963, "learning_rate": 3.081611240446916e-06, "loss": 0.414, "step": 8081 }, { "epoch": 0.5521623283459726, "grad_norm": 3.8657610416412354, "learning_rate": 3.08084230445961e-06, "loss": 0.3484, "step": 8082 }, { "epoch": 0.5522306483569037, "grad_norm": 3.5552480220794678, "learning_rate": 3.0800733889973423e-06, "loss": 0.3351, "step": 8083 }, { "epoch": 0.552298968367835, "grad_norm": 2.860193967819214, "learning_rate": 3.079304494097767e-06, "loss": 0.2, "step": 8084 }, { "epoch": 0.5523672883787661, "grad_norm": 4.534232139587402, "learning_rate": 3.078535619798535e-06, "loss": 0.3494, "step": 8085 }, { "epoch": 0.5524356083896973, "grad_norm": 4.086740016937256, "learning_rate": 3.0777667661372968e-06, "loss": 0.2903, "step": 8086 }, { "epoch": 0.5525039284006286, "grad_norm": 3.914959669113159, "learning_rate": 3.076997933151699e-06, "loss": 0.2248, "step": 8087 }, { "epoch": 0.5525722484115597, "grad_norm": 4.201766014099121, "learning_rate": 3.0762291208793905e-06, "loss": 0.2832, "step": 8088 }, { "epoch": 0.552640568422491, "grad_norm": 3.2507758140563965, "learning_rate": 3.075460329358018e-06, "loss": 0.2846, "step": 8089 }, { "epoch": 0.5527088884334221, "grad_norm": 5.621804237365723, "learning_rate": 3.0746915586252273e-06, "loss": 0.2928, "step": 8090 }, { "epoch": 0.5527772084443533, "grad_norm": 3.155665636062622, "learning_rate": 3.0739228087186626e-06, "loss": 0.271, "step": 8091 }, { "epoch": 0.5528455284552846, "grad_norm": 4.270615100860596, "learning_rate": 3.0731540796759675e-06, "loss": 0.3371, "step": 8092 }, { "epoch": 0.5529138484662157, "grad_norm": 3.808671712875366, "learning_rate": 3.072385371534784e-06, "loss": 0.3582, "step": 8093 }, { "epoch": 0.552982168477147, "grad_norm": 4.306127548217773, "learning_rate": 3.0716166843327572e-06, "loss": 0.3641, "step": 8094 }, { "epoch": 0.5530504884880781, "grad_norm": 3.906238079071045, "learning_rate": 3.0708480181075234e-06, "loss": 0.3379, "step": 8095 }, { "epoch": 0.5531188084990094, "grad_norm": 4.138192176818848, "learning_rate": 3.0700793728967226e-06, "loss": 0.4159, "step": 8096 }, { "epoch": 0.5531871285099406, "grad_norm": 3.865304946899414, "learning_rate": 3.069310748737996e-06, "loss": 0.4118, "step": 8097 }, { "epoch": 0.5532554485208717, "grad_norm": 4.255003452301025, "learning_rate": 3.068542145668981e-06, "loss": 0.3734, "step": 8098 }, { "epoch": 0.553323768531803, "grad_norm": 3.87404203414917, "learning_rate": 3.06777356372731e-06, "loss": 0.3304, "step": 8099 }, { "epoch": 0.5533920885427341, "grad_norm": 4.682151794433594, "learning_rate": 3.0670050029506223e-06, "loss": 0.4539, "step": 8100 }, { "epoch": 0.5534604085536654, "grad_norm": 4.710636138916016, "learning_rate": 3.066236463376551e-06, "loss": 0.3175, "step": 8101 }, { "epoch": 0.5535287285645966, "grad_norm": 3.2802908420562744, "learning_rate": 3.0654679450427296e-06, "loss": 0.3212, "step": 8102 }, { "epoch": 0.5535970485755277, "grad_norm": 3.461122512817383, "learning_rate": 3.0646994479867903e-06, "loss": 0.3952, "step": 8103 }, { "epoch": 0.553665368586459, "grad_norm": 5.346607685089111, "learning_rate": 3.0639309722463643e-06, "loss": 0.4777, "step": 8104 }, { "epoch": 0.5537336885973901, "grad_norm": 4.676662921905518, "learning_rate": 3.063162517859081e-06, "loss": 0.2906, "step": 8105 }, { "epoch": 0.5538020086083214, "grad_norm": 4.230706691741943, "learning_rate": 3.0623940848625724e-06, "loss": 0.3571, "step": 8106 }, { "epoch": 0.5538703286192526, "grad_norm": 4.56614351272583, "learning_rate": 3.0616256732944633e-06, "loss": 0.3693, "step": 8107 }, { "epoch": 0.5539386486301838, "grad_norm": 3.512732982635498, "learning_rate": 3.0608572831923823e-06, "loss": 0.3101, "step": 8108 }, { "epoch": 0.554006968641115, "grad_norm": 3.374408721923828, "learning_rate": 3.060088914593955e-06, "loss": 0.2319, "step": 8109 }, { "epoch": 0.5540752886520461, "grad_norm": 3.453080177307129, "learning_rate": 3.0593205675368065e-06, "loss": 0.2423, "step": 8110 }, { "epoch": 0.5541436086629774, "grad_norm": 4.280364513397217, "learning_rate": 3.0585522420585626e-06, "loss": 0.3077, "step": 8111 }, { "epoch": 0.5542119286739086, "grad_norm": 3.738816976547241, "learning_rate": 3.0577839381968434e-06, "loss": 0.3372, "step": 8112 }, { "epoch": 0.5542802486848398, "grad_norm": 3.94083571434021, "learning_rate": 3.057015655989271e-06, "loss": 0.3997, "step": 8113 }, { "epoch": 0.554348568695771, "grad_norm": 3.782345771789551, "learning_rate": 3.056247395473467e-06, "loss": 0.3317, "step": 8114 }, { "epoch": 0.5544168887067021, "grad_norm": 3.274590253829956, "learning_rate": 3.055479156687053e-06, "loss": 0.2376, "step": 8115 }, { "epoch": 0.5544852087176334, "grad_norm": 4.023395538330078, "learning_rate": 3.0547109396676434e-06, "loss": 0.3461, "step": 8116 }, { "epoch": 0.5545535287285646, "grad_norm": 3.5146706104278564, "learning_rate": 3.0539427444528583e-06, "loss": 0.3254, "step": 8117 }, { "epoch": 0.5546218487394958, "grad_norm": 3.842529296875, "learning_rate": 3.053174571080314e-06, "loss": 0.3237, "step": 8118 }, { "epoch": 0.554690168750427, "grad_norm": 4.510336875915527, "learning_rate": 3.0524064195876264e-06, "loss": 0.3457, "step": 8119 }, { "epoch": 0.5547584887613582, "grad_norm": 2.915201187133789, "learning_rate": 3.051638290012409e-06, "loss": 0.2428, "step": 8120 }, { "epoch": 0.5548268087722894, "grad_norm": 4.890641689300537, "learning_rate": 3.0508701823922744e-06, "loss": 0.2124, "step": 8121 }, { "epoch": 0.5548951287832206, "grad_norm": 4.6130690574646, "learning_rate": 3.050102096764836e-06, "loss": 0.2781, "step": 8122 }, { "epoch": 0.5549634487941518, "grad_norm": 5.006263256072998, "learning_rate": 3.049334033167705e-06, "loss": 0.2705, "step": 8123 }, { "epoch": 0.555031768805083, "grad_norm": 3.8672826290130615, "learning_rate": 3.0485659916384904e-06, "loss": 0.3136, "step": 8124 }, { "epoch": 0.5551000888160142, "grad_norm": 3.5704023838043213, "learning_rate": 3.047797972214802e-06, "loss": 0.2349, "step": 8125 }, { "epoch": 0.5551684088269454, "grad_norm": 4.599846363067627, "learning_rate": 3.0470299749342476e-06, "loss": 0.3246, "step": 8126 }, { "epoch": 0.5552367288378766, "grad_norm": 3.8254587650299072, "learning_rate": 3.0462619998344336e-06, "loss": 0.3177, "step": 8127 }, { "epoch": 0.5553050488488078, "grad_norm": 3.8361170291900635, "learning_rate": 3.045494046952966e-06, "loss": 0.2928, "step": 8128 }, { "epoch": 0.555373368859739, "grad_norm": 4.101009368896484, "learning_rate": 3.0447261163274487e-06, "loss": 0.2956, "step": 8129 }, { "epoch": 0.5554416888706702, "grad_norm": 4.808731555938721, "learning_rate": 3.0439582079954853e-06, "loss": 0.42, "step": 8130 }, { "epoch": 0.5555100088816014, "grad_norm": 5.243772506713867, "learning_rate": 3.0431903219946817e-06, "loss": 0.3277, "step": 8131 }, { "epoch": 0.5555783288925327, "grad_norm": 5.144742965698242, "learning_rate": 3.042422458362634e-06, "loss": 0.3967, "step": 8132 }, { "epoch": 0.5556466489034638, "grad_norm": 3.272019624710083, "learning_rate": 3.041654617136944e-06, "loss": 0.3053, "step": 8133 }, { "epoch": 0.555714968914395, "grad_norm": 4.185593128204346, "learning_rate": 3.040886798355213e-06, "loss": 0.3125, "step": 8134 }, { "epoch": 0.5557832889253262, "grad_norm": 3.5474417209625244, "learning_rate": 3.040119002055038e-06, "loss": 0.3734, "step": 8135 }, { "epoch": 0.5558516089362574, "grad_norm": 3.496410608291626, "learning_rate": 3.0393512282740145e-06, "loss": 0.2791, "step": 8136 }, { "epoch": 0.5559199289471887, "grad_norm": 3.9724557399749756, "learning_rate": 3.03858347704974e-06, "loss": 0.3368, "step": 8137 }, { "epoch": 0.5559882489581198, "grad_norm": 4.503060817718506, "learning_rate": 3.0378157484198085e-06, "loss": 0.3966, "step": 8138 }, { "epoch": 0.5560565689690511, "grad_norm": 4.014015197753906, "learning_rate": 3.0370480424218147e-06, "loss": 0.3597, "step": 8139 }, { "epoch": 0.5561248889799822, "grad_norm": 4.416665077209473, "learning_rate": 3.0362803590933494e-06, "loss": 0.3336, "step": 8140 }, { "epoch": 0.5561932089909134, "grad_norm": 3.6149299144744873, "learning_rate": 3.035512698472005e-06, "loss": 0.3234, "step": 8141 }, { "epoch": 0.5562615290018447, "grad_norm": 5.129428863525391, "learning_rate": 3.0347450605953713e-06, "loss": 0.3355, "step": 8142 }, { "epoch": 0.5563298490127758, "grad_norm": 4.148093223571777, "learning_rate": 3.0339774455010394e-06, "loss": 0.3597, "step": 8143 }, { "epoch": 0.5563981690237071, "grad_norm": 3.35862398147583, "learning_rate": 3.033209853226594e-06, "loss": 0.2484, "step": 8144 }, { "epoch": 0.5564664890346382, "grad_norm": 3.7621281147003174, "learning_rate": 3.0324422838096247e-06, "loss": 0.3098, "step": 8145 }, { "epoch": 0.5565348090455694, "grad_norm": 3.290102005004883, "learning_rate": 3.0316747372877158e-06, "loss": 0.3284, "step": 8146 }, { "epoch": 0.5566031290565007, "grad_norm": 4.0426130294799805, "learning_rate": 3.0309072136984548e-06, "loss": 0.2631, "step": 8147 }, { "epoch": 0.5566714490674318, "grad_norm": 4.242602348327637, "learning_rate": 3.0301397130794216e-06, "loss": 0.4018, "step": 8148 }, { "epoch": 0.5567397690783631, "grad_norm": 3.033886432647705, "learning_rate": 3.0293722354682e-06, "loss": 0.3533, "step": 8149 }, { "epoch": 0.5568080890892942, "grad_norm": 3.7244300842285156, "learning_rate": 3.028604780902372e-06, "loss": 0.2539, "step": 8150 }, { "epoch": 0.5568764091002255, "grad_norm": 4.4998250007629395, "learning_rate": 3.027837349419519e-06, "loss": 0.3035, "step": 8151 }, { "epoch": 0.5569447291111567, "grad_norm": 3.3441786766052246, "learning_rate": 3.027069941057216e-06, "loss": 0.267, "step": 8152 }, { "epoch": 0.5570130491220878, "grad_norm": 4.163088321685791, "learning_rate": 3.026302555853045e-06, "loss": 0.3049, "step": 8153 }, { "epoch": 0.5570813691330191, "grad_norm": 3.37009859085083, "learning_rate": 3.025535193844581e-06, "loss": 0.3215, "step": 8154 }, { "epoch": 0.5571496891439502, "grad_norm": 3.160788059234619, "learning_rate": 3.0247678550694006e-06, "loss": 0.2847, "step": 8155 }, { "epoch": 0.5572180091548815, "grad_norm": 4.3458991050720215, "learning_rate": 3.024000539565077e-06, "loss": 0.2824, "step": 8156 }, { "epoch": 0.5572863291658127, "grad_norm": 3.6794605255126953, "learning_rate": 3.0232332473691845e-06, "loss": 0.3317, "step": 8157 }, { "epoch": 0.5573546491767438, "grad_norm": 4.498724460601807, "learning_rate": 3.0224659785192955e-06, "loss": 0.2847, "step": 8158 }, { "epoch": 0.5574229691876751, "grad_norm": 5.520112991333008, "learning_rate": 3.0216987330529812e-06, "loss": 0.3014, "step": 8159 }, { "epoch": 0.5574912891986062, "grad_norm": 3.9639272689819336, "learning_rate": 3.0209315110078107e-06, "loss": 0.3551, "step": 8160 }, { "epoch": 0.5575596092095375, "grad_norm": 3.4892287254333496, "learning_rate": 3.020164312421353e-06, "loss": 0.2079, "step": 8161 }, { "epoch": 0.5576279292204687, "grad_norm": 3.6050705909729004, "learning_rate": 3.0193971373311767e-06, "loss": 0.2964, "step": 8162 }, { "epoch": 0.5576962492313999, "grad_norm": 3.504411220550537, "learning_rate": 3.018629985774848e-06, "loss": 0.3004, "step": 8163 }, { "epoch": 0.5577645692423311, "grad_norm": 4.013301849365234, "learning_rate": 3.0178628577899317e-06, "loss": 0.2526, "step": 8164 }, { "epoch": 0.5578328892532622, "grad_norm": 3.965003490447998, "learning_rate": 3.017095753413992e-06, "loss": 0.3694, "step": 8165 }, { "epoch": 0.5579012092641935, "grad_norm": 4.201812267303467, "learning_rate": 3.0163286726845916e-06, "loss": 0.3984, "step": 8166 }, { "epoch": 0.5579695292751247, "grad_norm": 3.729203224182129, "learning_rate": 3.0155616156392953e-06, "loss": 0.3015, "step": 8167 }, { "epoch": 0.5580378492860559, "grad_norm": 2.9643051624298096, "learning_rate": 3.0147945823156608e-06, "loss": 0.2513, "step": 8168 }, { "epoch": 0.5581061692969871, "grad_norm": 4.761935710906982, "learning_rate": 3.014027572751247e-06, "loss": 0.3596, "step": 8169 }, { "epoch": 0.5581744893079182, "grad_norm": 3.9939372539520264, "learning_rate": 3.013260586983615e-06, "loss": 0.2905, "step": 8170 }, { "epoch": 0.5582428093188495, "grad_norm": 4.201735496520996, "learning_rate": 3.0124936250503227e-06, "loss": 0.2652, "step": 8171 }, { "epoch": 0.5583111293297807, "grad_norm": 3.795159101486206, "learning_rate": 3.0117266869889223e-06, "loss": 0.3795, "step": 8172 }, { "epoch": 0.5583794493407119, "grad_norm": 4.33571195602417, "learning_rate": 3.010959772836971e-06, "loss": 0.3917, "step": 8173 }, { "epoch": 0.5584477693516431, "grad_norm": 4.255688667297363, "learning_rate": 3.0101928826320223e-06, "loss": 0.4081, "step": 8174 }, { "epoch": 0.5585160893625744, "grad_norm": 4.5945539474487305, "learning_rate": 3.0094260164116295e-06, "loss": 0.3135, "step": 8175 }, { "epoch": 0.5585844093735055, "grad_norm": 4.686074256896973, "learning_rate": 3.0086591742133443e-06, "loss": 0.3859, "step": 8176 }, { "epoch": 0.5586527293844367, "grad_norm": 3.889719247817993, "learning_rate": 3.007892356074715e-06, "loss": 0.3875, "step": 8177 }, { "epoch": 0.5587210493953679, "grad_norm": 3.8740015029907227, "learning_rate": 3.007125562033291e-06, "loss": 0.3636, "step": 8178 }, { "epoch": 0.5587893694062991, "grad_norm": 3.9128975868225098, "learning_rate": 3.006358792126622e-06, "loss": 0.2391, "step": 8179 }, { "epoch": 0.5588576894172304, "grad_norm": 4.125805377960205, "learning_rate": 3.0055920463922536e-06, "loss": 0.317, "step": 8180 }, { "epoch": 0.5589260094281615, "grad_norm": 3.2751336097717285, "learning_rate": 3.0048253248677306e-06, "loss": 0.3153, "step": 8181 }, { "epoch": 0.5589943294390927, "grad_norm": 4.448874473571777, "learning_rate": 3.0040586275905972e-06, "loss": 0.4125, "step": 8182 }, { "epoch": 0.5590626494500239, "grad_norm": 4.276469707489014, "learning_rate": 3.003291954598398e-06, "loss": 0.2747, "step": 8183 }, { "epoch": 0.5591309694609551, "grad_norm": 3.8995401859283447, "learning_rate": 3.002525305928676e-06, "loss": 0.2391, "step": 8184 }, { "epoch": 0.5591992894718864, "grad_norm": 3.7691924571990967, "learning_rate": 3.0017586816189674e-06, "loss": 0.3738, "step": 8185 }, { "epoch": 0.5592676094828175, "grad_norm": 3.8724513053894043, "learning_rate": 3.0009920817068157e-06, "loss": 0.2659, "step": 8186 }, { "epoch": 0.5593359294937488, "grad_norm": 4.596986293792725, "learning_rate": 3.000225506229758e-06, "loss": 0.3183, "step": 8187 }, { "epoch": 0.5594042495046799, "grad_norm": 4.5058913230896, "learning_rate": 2.9994589552253325e-06, "loss": 0.2897, "step": 8188 }, { "epoch": 0.5594725695156111, "grad_norm": 4.501772403717041, "learning_rate": 2.9986924287310735e-06, "loss": 0.27, "step": 8189 }, { "epoch": 0.5595408895265424, "grad_norm": 4.091150760650635, "learning_rate": 2.9979259267845164e-06, "loss": 0.3389, "step": 8190 }, { "epoch": 0.5596092095374735, "grad_norm": 3.1081366539001465, "learning_rate": 2.9971594494231946e-06, "loss": 0.2125, "step": 8191 }, { "epoch": 0.5596775295484048, "grad_norm": 3.347547769546509, "learning_rate": 2.996392996684642e-06, "loss": 0.3063, "step": 8192 }, { "epoch": 0.5597458495593359, "grad_norm": 2.9033899307250977, "learning_rate": 2.9956265686063875e-06, "loss": 0.3404, "step": 8193 }, { "epoch": 0.5598141695702671, "grad_norm": 3.785719394683838, "learning_rate": 2.9948601652259623e-06, "loss": 0.3016, "step": 8194 }, { "epoch": 0.5598824895811984, "grad_norm": 5.8401031494140625, "learning_rate": 2.9940937865808947e-06, "loss": 0.3195, "step": 8195 }, { "epoch": 0.5599508095921295, "grad_norm": 4.148136138916016, "learning_rate": 2.993327432708713e-06, "loss": 0.3537, "step": 8196 }, { "epoch": 0.5600191296030608, "grad_norm": 3.736855983734131, "learning_rate": 2.9925611036469426e-06, "loss": 0.4057, "step": 8197 }, { "epoch": 0.5600874496139919, "grad_norm": 4.759865760803223, "learning_rate": 2.9917947994331085e-06, "loss": 0.3626, "step": 8198 }, { "epoch": 0.5601557696249232, "grad_norm": 4.29431676864624, "learning_rate": 2.9910285201047347e-06, "loss": 0.337, "step": 8199 }, { "epoch": 0.5602240896358543, "grad_norm": 4.950549602508545, "learning_rate": 2.990262265699346e-06, "loss": 0.2521, "step": 8200 }, { "epoch": 0.5602924096467855, "grad_norm": 3.381899356842041, "learning_rate": 2.9894960362544605e-06, "loss": 0.2734, "step": 8201 }, { "epoch": 0.5603607296577168, "grad_norm": 3.340280294418335, "learning_rate": 2.9887298318076e-06, "loss": 0.2666, "step": 8202 }, { "epoch": 0.5604290496686479, "grad_norm": 4.343057155609131, "learning_rate": 2.987963652396284e-06, "loss": 0.3266, "step": 8203 }, { "epoch": 0.5604973696795792, "grad_norm": 4.358138084411621, "learning_rate": 2.9871974980580302e-06, "loss": 0.3272, "step": 8204 }, { "epoch": 0.5605656896905103, "grad_norm": 4.43867826461792, "learning_rate": 2.9864313688303534e-06, "loss": 0.3241, "step": 8205 }, { "epoch": 0.5606340097014415, "grad_norm": 4.334312438964844, "learning_rate": 2.985665264750771e-06, "loss": 0.2902, "step": 8206 }, { "epoch": 0.5607023297123728, "grad_norm": 4.864526271820068, "learning_rate": 2.9848991858567958e-06, "loss": 0.3271, "step": 8207 }, { "epoch": 0.5607706497233039, "grad_norm": 3.920090436935425, "learning_rate": 2.9841331321859422e-06, "loss": 0.2717, "step": 8208 }, { "epoch": 0.5608389697342352, "grad_norm": 4.480238437652588, "learning_rate": 2.98336710377572e-06, "loss": 0.2637, "step": 8209 }, { "epoch": 0.5609072897451663, "grad_norm": 4.145420074462891, "learning_rate": 2.98260110066364e-06, "loss": 0.27, "step": 8210 }, { "epoch": 0.5609756097560976, "grad_norm": 3.015720844268799, "learning_rate": 2.981835122887212e-06, "loss": 0.2582, "step": 8211 }, { "epoch": 0.5610439297670288, "grad_norm": 3.965587615966797, "learning_rate": 2.9810691704839437e-06, "loss": 0.3592, "step": 8212 }, { "epoch": 0.5611122497779599, "grad_norm": 4.276705741882324, "learning_rate": 2.9803032434913415e-06, "loss": 0.3199, "step": 8213 }, { "epoch": 0.5611805697888912, "grad_norm": 3.889146566390991, "learning_rate": 2.9795373419469107e-06, "loss": 0.2721, "step": 8214 }, { "epoch": 0.5612488897998223, "grad_norm": 4.19555139541626, "learning_rate": 2.978771465888156e-06, "loss": 0.3795, "step": 8215 }, { "epoch": 0.5613172098107536, "grad_norm": 5.189528942108154, "learning_rate": 2.9780056153525807e-06, "loss": 0.3475, "step": 8216 }, { "epoch": 0.5613855298216848, "grad_norm": 4.663236618041992, "learning_rate": 2.977239790377685e-06, "loss": 0.3777, "step": 8217 }, { "epoch": 0.5614538498326159, "grad_norm": 3.255922555923462, "learning_rate": 2.9764739910009703e-06, "loss": 0.2164, "step": 8218 }, { "epoch": 0.5615221698435472, "grad_norm": 3.8724465370178223, "learning_rate": 2.9757082172599343e-06, "loss": 0.2827, "step": 8219 }, { "epoch": 0.5615904898544783, "grad_norm": 4.565827369689941, "learning_rate": 2.9749424691920786e-06, "loss": 0.2695, "step": 8220 }, { "epoch": 0.5616588098654096, "grad_norm": 4.795891284942627, "learning_rate": 2.974176746834896e-06, "loss": 0.3341, "step": 8221 }, { "epoch": 0.5617271298763408, "grad_norm": 4.411127090454102, "learning_rate": 2.9734110502258828e-06, "loss": 0.3232, "step": 8222 }, { "epoch": 0.561795449887272, "grad_norm": 4.224985599517822, "learning_rate": 2.9726453794025343e-06, "loss": 0.3343, "step": 8223 }, { "epoch": 0.5618637698982032, "grad_norm": 5.100974082946777, "learning_rate": 2.971879734402343e-06, "loss": 0.3165, "step": 8224 }, { "epoch": 0.5619320899091343, "grad_norm": 3.169057607650757, "learning_rate": 2.9711141152628003e-06, "loss": 0.267, "step": 8225 }, { "epoch": 0.5620004099200656, "grad_norm": 4.022390365600586, "learning_rate": 2.970348522021396e-06, "loss": 0.3386, "step": 8226 }, { "epoch": 0.5620687299309968, "grad_norm": 4.132258892059326, "learning_rate": 2.9695829547156197e-06, "loss": 0.2915, "step": 8227 }, { "epoch": 0.562137049941928, "grad_norm": 3.43047833442688, "learning_rate": 2.96881741338296e-06, "loss": 0.2618, "step": 8228 }, { "epoch": 0.5622053699528592, "grad_norm": 4.246367931365967, "learning_rate": 2.9680518980609015e-06, "loss": 0.3181, "step": 8229 }, { "epoch": 0.5622736899637903, "grad_norm": 4.535789966583252, "learning_rate": 2.967286408786931e-06, "loss": 0.2562, "step": 8230 }, { "epoch": 0.5623420099747216, "grad_norm": 3.7783496379852295, "learning_rate": 2.9665209455985316e-06, "loss": 0.3344, "step": 8231 }, { "epoch": 0.5624103299856528, "grad_norm": 3.8901467323303223, "learning_rate": 2.9657555085331878e-06, "loss": 0.3087, "step": 8232 }, { "epoch": 0.562478649996584, "grad_norm": 4.208967685699463, "learning_rate": 2.964990097628378e-06, "loss": 0.3529, "step": 8233 }, { "epoch": 0.5625469700075152, "grad_norm": 3.0341508388519287, "learning_rate": 2.9642247129215848e-06, "loss": 0.2291, "step": 8234 }, { "epoch": 0.5626152900184465, "grad_norm": 4.240809917449951, "learning_rate": 2.963459354450285e-06, "loss": 0.2974, "step": 8235 }, { "epoch": 0.5626836100293776, "grad_norm": 3.703397750854492, "learning_rate": 2.96269402225196e-06, "loss": 0.2691, "step": 8236 }, { "epoch": 0.5627519300403088, "grad_norm": 3.9213569164276123, "learning_rate": 2.9619287163640814e-06, "loss": 0.287, "step": 8237 }, { "epoch": 0.56282025005124, "grad_norm": 3.592768430709839, "learning_rate": 2.961163436824126e-06, "loss": 0.2423, "step": 8238 }, { "epoch": 0.5628885700621712, "grad_norm": 3.3753011226654053, "learning_rate": 2.9603981836695685e-06, "loss": 0.3905, "step": 8239 }, { "epoch": 0.5629568900731025, "grad_norm": 5.600413799285889, "learning_rate": 2.9596329569378803e-06, "loss": 0.3223, "step": 8240 }, { "epoch": 0.5630252100840336, "grad_norm": 3.9503791332244873, "learning_rate": 2.9588677566665337e-06, "loss": 0.4432, "step": 8241 }, { "epoch": 0.5630935300949648, "grad_norm": 3.787090301513672, "learning_rate": 2.958102582892997e-06, "loss": 0.2365, "step": 8242 }, { "epoch": 0.563161850105896, "grad_norm": 3.3013663291931152, "learning_rate": 2.95733743565474e-06, "loss": 0.2462, "step": 8243 }, { "epoch": 0.5632301701168272, "grad_norm": 4.141722679138184, "learning_rate": 2.9565723149892285e-06, "loss": 0.2929, "step": 8244 }, { "epoch": 0.5632984901277585, "grad_norm": 4.216622829437256, "learning_rate": 2.9558072209339306e-06, "loss": 0.4463, "step": 8245 }, { "epoch": 0.5633668101386896, "grad_norm": 4.04780912399292, "learning_rate": 2.9550421535263086e-06, "loss": 0.3273, "step": 8246 }, { "epoch": 0.5634351301496209, "grad_norm": 4.241066932678223, "learning_rate": 2.954277112803827e-06, "loss": 0.3139, "step": 8247 }, { "epoch": 0.563503450160552, "grad_norm": 4.677488803863525, "learning_rate": 2.9535120988039467e-06, "loss": 0.4264, "step": 8248 }, { "epoch": 0.5635717701714832, "grad_norm": 4.157680511474609, "learning_rate": 2.952747111564131e-06, "loss": 0.3816, "step": 8249 }, { "epoch": 0.5636400901824145, "grad_norm": 3.2031965255737305, "learning_rate": 2.951982151121837e-06, "loss": 0.249, "step": 8250 }, { "epoch": 0.5637084101933456, "grad_norm": 3.8406002521514893, "learning_rate": 2.951217217514523e-06, "loss": 0.3677, "step": 8251 }, { "epoch": 0.5637767302042769, "grad_norm": 3.899440050125122, "learning_rate": 2.950452310779645e-06, "loss": 0.288, "step": 8252 }, { "epoch": 0.563845050215208, "grad_norm": 4.894641399383545, "learning_rate": 2.9496874309546627e-06, "loss": 0.2776, "step": 8253 }, { "epoch": 0.5639133702261392, "grad_norm": 5.003360748291016, "learning_rate": 2.948922578077025e-06, "loss": 0.3311, "step": 8254 }, { "epoch": 0.5639816902370705, "grad_norm": 3.677112340927124, "learning_rate": 2.948157752184187e-06, "loss": 0.3067, "step": 8255 }, { "epoch": 0.5640500102480016, "grad_norm": 3.464111328125, "learning_rate": 2.9473929533136e-06, "loss": 0.3361, "step": 8256 }, { "epoch": 0.5641183302589329, "grad_norm": 4.034210205078125, "learning_rate": 2.946628181502716e-06, "loss": 0.3621, "step": 8257 }, { "epoch": 0.564186650269864, "grad_norm": 4.222460746765137, "learning_rate": 2.94586343678898e-06, "loss": 0.3599, "step": 8258 }, { "epoch": 0.5642549702807953, "grad_norm": 3.014920711517334, "learning_rate": 2.945098719209842e-06, "loss": 0.3309, "step": 8259 }, { "epoch": 0.5643232902917265, "grad_norm": 3.538997173309326, "learning_rate": 2.9443340288027485e-06, "loss": 0.2585, "step": 8260 }, { "epoch": 0.5643916103026576, "grad_norm": 3.7748281955718994, "learning_rate": 2.943569365605144e-06, "loss": 0.2443, "step": 8261 }, { "epoch": 0.5644599303135889, "grad_norm": 4.52616548538208, "learning_rate": 2.942804729654471e-06, "loss": 0.303, "step": 8262 }, { "epoch": 0.56452825032452, "grad_norm": 4.4763875007629395, "learning_rate": 2.942040120988172e-06, "loss": 0.2248, "step": 8263 }, { "epoch": 0.5645965703354513, "grad_norm": 3.7123076915740967, "learning_rate": 2.941275539643689e-06, "loss": 0.3239, "step": 8264 }, { "epoch": 0.5646648903463825, "grad_norm": 3.849652051925659, "learning_rate": 2.940510985658461e-06, "loss": 0.2901, "step": 8265 }, { "epoch": 0.5647332103573136, "grad_norm": 4.597590446472168, "learning_rate": 2.939746459069925e-06, "loss": 0.327, "step": 8266 }, { "epoch": 0.5648015303682449, "grad_norm": 4.109375476837158, "learning_rate": 2.9389819599155194e-06, "loss": 0.3379, "step": 8267 }, { "epoch": 0.564869850379176, "grad_norm": 3.5870916843414307, "learning_rate": 2.938217488232679e-06, "loss": 0.2296, "step": 8268 }, { "epoch": 0.5649381703901073, "grad_norm": 3.4395041465759277, "learning_rate": 2.937453044058839e-06, "loss": 0.2153, "step": 8269 }, { "epoch": 0.5650064904010385, "grad_norm": 5.6456403732299805, "learning_rate": 2.9366886274314296e-06, "loss": 0.3421, "step": 8270 }, { "epoch": 0.5650748104119697, "grad_norm": 6.066136837005615, "learning_rate": 2.9359242383878837e-06, "loss": 0.4313, "step": 8271 }, { "epoch": 0.5651431304229009, "grad_norm": 3.6276259422302246, "learning_rate": 2.9351598769656324e-06, "loss": 0.238, "step": 8272 }, { "epoch": 0.565211450433832, "grad_norm": 3.482598066329956, "learning_rate": 2.9343955432021053e-06, "loss": 0.245, "step": 8273 }, { "epoch": 0.5652797704447633, "grad_norm": 3.7087717056274414, "learning_rate": 2.9336312371347255e-06, "loss": 0.3223, "step": 8274 }, { "epoch": 0.5653480904556945, "grad_norm": 4.007534027099609, "learning_rate": 2.9328669588009227e-06, "loss": 0.4215, "step": 8275 }, { "epoch": 0.5654164104666257, "grad_norm": 4.4379143714904785, "learning_rate": 2.9321027082381204e-06, "loss": 0.3505, "step": 8276 }, { "epoch": 0.5654847304775569, "grad_norm": 4.317567348480225, "learning_rate": 2.931338485483743e-06, "loss": 0.2666, "step": 8277 }, { "epoch": 0.565553050488488, "grad_norm": 3.1800119876861572, "learning_rate": 2.930574290575211e-06, "loss": 0.2096, "step": 8278 }, { "epoch": 0.5656213704994193, "grad_norm": 4.5080742835998535, "learning_rate": 2.929810123549945e-06, "loss": 0.4548, "step": 8279 }, { "epoch": 0.5656896905103505, "grad_norm": 3.501866102218628, "learning_rate": 2.929045984445365e-06, "loss": 0.2887, "step": 8280 }, { "epoch": 0.5657580105212817, "grad_norm": 4.285065650939941, "learning_rate": 2.9282818732988897e-06, "loss": 0.4308, "step": 8281 }, { "epoch": 0.5658263305322129, "grad_norm": 4.001549243927002, "learning_rate": 2.9275177901479333e-06, "loss": 0.2667, "step": 8282 }, { "epoch": 0.5658946505431441, "grad_norm": 4.401519775390625, "learning_rate": 2.926753735029912e-06, "loss": 0.3253, "step": 8283 }, { "epoch": 0.5659629705540753, "grad_norm": 4.6126275062561035, "learning_rate": 2.9259897079822406e-06, "loss": 0.3842, "step": 8284 }, { "epoch": 0.5660312905650065, "grad_norm": 4.618966102600098, "learning_rate": 2.925225709042331e-06, "loss": 0.2909, "step": 8285 }, { "epoch": 0.5660996105759377, "grad_norm": 4.665844917297363, "learning_rate": 2.9244617382475933e-06, "loss": 0.4093, "step": 8286 }, { "epoch": 0.5661679305868689, "grad_norm": 3.9065868854522705, "learning_rate": 2.923697795635437e-06, "loss": 0.3149, "step": 8287 }, { "epoch": 0.5662362505978001, "grad_norm": 5.729527473449707, "learning_rate": 2.9229338812432704e-06, "loss": 0.3181, "step": 8288 }, { "epoch": 0.5663045706087313, "grad_norm": 3.6956257820129395, "learning_rate": 2.9221699951085035e-06, "loss": 0.2398, "step": 8289 }, { "epoch": 0.5663728906196625, "grad_norm": 3.7161693572998047, "learning_rate": 2.9214061372685376e-06, "loss": 0.2806, "step": 8290 }, { "epoch": 0.5664412106305937, "grad_norm": 4.655683994293213, "learning_rate": 2.9206423077607778e-06, "loss": 0.3077, "step": 8291 }, { "epoch": 0.5665095306415249, "grad_norm": 3.93493914604187, "learning_rate": 2.9198785066226276e-06, "loss": 0.2915, "step": 8292 }, { "epoch": 0.5665778506524561, "grad_norm": 3.357821226119995, "learning_rate": 2.9191147338914904e-06, "loss": 0.2991, "step": 8293 }, { "epoch": 0.5666461706633873, "grad_norm": 4.458127498626709, "learning_rate": 2.9183509896047606e-06, "loss": 0.3325, "step": 8294 }, { "epoch": 0.5667144906743186, "grad_norm": 4.018069267272949, "learning_rate": 2.9175872737998415e-06, "loss": 0.3658, "step": 8295 }, { "epoch": 0.5667828106852497, "grad_norm": 4.304079532623291, "learning_rate": 2.9168235865141286e-06, "loss": 0.317, "step": 8296 }, { "epoch": 0.5668511306961809, "grad_norm": 3.6203510761260986, "learning_rate": 2.9160599277850186e-06, "loss": 0.3035, "step": 8297 }, { "epoch": 0.5669194507071121, "grad_norm": 3.9194741249084473, "learning_rate": 2.915296297649904e-06, "loss": 0.3019, "step": 8298 }, { "epoch": 0.5669877707180433, "grad_norm": 3.2194037437438965, "learning_rate": 2.914532696146179e-06, "loss": 0.3132, "step": 8299 }, { "epoch": 0.5670560907289746, "grad_norm": 4.498065948486328, "learning_rate": 2.9137691233112345e-06, "loss": 0.3734, "step": 8300 }, { "epoch": 0.5671244107399057, "grad_norm": 3.707763671875, "learning_rate": 2.9130055791824622e-06, "loss": 0.319, "step": 8301 }, { "epoch": 0.5671927307508369, "grad_norm": 4.248396396636963, "learning_rate": 2.9122420637972486e-06, "loss": 0.2789, "step": 8302 }, { "epoch": 0.5672610507617681, "grad_norm": 3.62320613861084, "learning_rate": 2.9114785771929826e-06, "loss": 0.3294, "step": 8303 }, { "epoch": 0.5673293707726993, "grad_norm": 4.208742141723633, "learning_rate": 2.910715119407049e-06, "loss": 0.4143, "step": 8304 }, { "epoch": 0.5673976907836306, "grad_norm": 3.516680955886841, "learning_rate": 2.9099516904768334e-06, "loss": 0.354, "step": 8305 }, { "epoch": 0.5674660107945617, "grad_norm": 4.494412899017334, "learning_rate": 2.9091882904397182e-06, "loss": 0.3939, "step": 8306 }, { "epoch": 0.567534330805493, "grad_norm": 3.402308702468872, "learning_rate": 2.9084249193330845e-06, "loss": 0.2486, "step": 8307 }, { "epoch": 0.5676026508164241, "grad_norm": 2.738365411758423, "learning_rate": 2.907661577194314e-06, "loss": 0.2864, "step": 8308 }, { "epoch": 0.5676709708273553, "grad_norm": 4.6367902755737305, "learning_rate": 2.906898264060784e-06, "loss": 0.3479, "step": 8309 }, { "epoch": 0.5677392908382866, "grad_norm": 5.814846515655518, "learning_rate": 2.9061349799698755e-06, "loss": 0.2983, "step": 8310 }, { "epoch": 0.5678076108492177, "grad_norm": 5.950410842895508, "learning_rate": 2.905371724958959e-06, "loss": 0.3614, "step": 8311 }, { "epoch": 0.567875930860149, "grad_norm": 3.322490930557251, "learning_rate": 2.9046084990654127e-06, "loss": 0.2655, "step": 8312 }, { "epoch": 0.5679442508710801, "grad_norm": 3.841665029525757, "learning_rate": 2.9038453023266085e-06, "loss": 0.2586, "step": 8313 }, { "epoch": 0.5680125708820113, "grad_norm": 4.383139133453369, "learning_rate": 2.9030821347799195e-06, "loss": 0.4342, "step": 8314 }, { "epoch": 0.5680808908929426, "grad_norm": 4.212918281555176, "learning_rate": 2.9023189964627146e-06, "loss": 0.3614, "step": 8315 }, { "epoch": 0.5681492109038737, "grad_norm": 4.281619548797607, "learning_rate": 2.901555887412363e-06, "loss": 0.2307, "step": 8316 }, { "epoch": 0.568217530914805, "grad_norm": 4.079606056213379, "learning_rate": 2.9007928076662313e-06, "loss": 0.3564, "step": 8317 }, { "epoch": 0.5682858509257361, "grad_norm": 4.748884201049805, "learning_rate": 2.9000297572616883e-06, "loss": 0.2184, "step": 8318 }, { "epoch": 0.5683541709366674, "grad_norm": 3.4136693477630615, "learning_rate": 2.899266736236095e-06, "loss": 0.2573, "step": 8319 }, { "epoch": 0.5684224909475986, "grad_norm": 3.6950690746307373, "learning_rate": 2.898503744626817e-06, "loss": 0.4125, "step": 8320 }, { "epoch": 0.5684908109585297, "grad_norm": 4.218839168548584, "learning_rate": 2.897740782471214e-06, "loss": 0.375, "step": 8321 }, { "epoch": 0.568559130969461, "grad_norm": 5.431127548217773, "learning_rate": 2.896977849806649e-06, "loss": 0.3065, "step": 8322 }, { "epoch": 0.5686274509803921, "grad_norm": 3.4292941093444824, "learning_rate": 2.8962149466704784e-06, "loss": 0.2783, "step": 8323 }, { "epoch": 0.5686957709913234, "grad_norm": 3.645886182785034, "learning_rate": 2.895452073100059e-06, "loss": 0.33, "step": 8324 }, { "epoch": 0.5687640910022546, "grad_norm": 4.1073150634765625, "learning_rate": 2.894689229132749e-06, "loss": 0.3625, "step": 8325 }, { "epoch": 0.5688324110131857, "grad_norm": 4.587807655334473, "learning_rate": 2.893926414805904e-06, "loss": 0.4077, "step": 8326 }, { "epoch": 0.568900731024117, "grad_norm": 4.124634742736816, "learning_rate": 2.8931636301568714e-06, "loss": 0.3672, "step": 8327 }, { "epoch": 0.5689690510350481, "grad_norm": 4.038420677185059, "learning_rate": 2.8924008752230076e-06, "loss": 0.3658, "step": 8328 }, { "epoch": 0.5690373710459794, "grad_norm": 3.6152944564819336, "learning_rate": 2.8916381500416606e-06, "loss": 0.319, "step": 8329 }, { "epoch": 0.5691056910569106, "grad_norm": 4.268401622772217, "learning_rate": 2.8908754546501815e-06, "loss": 0.3062, "step": 8330 }, { "epoch": 0.5691740110678418, "grad_norm": 3.0318515300750732, "learning_rate": 2.8901127890859143e-06, "loss": 0.3368, "step": 8331 }, { "epoch": 0.569242331078773, "grad_norm": 4.248507022857666, "learning_rate": 2.8893501533862057e-06, "loss": 0.4253, "step": 8332 }, { "epoch": 0.5693106510897041, "grad_norm": 3.934389352798462, "learning_rate": 2.8885875475884008e-06, "loss": 0.3491, "step": 8333 }, { "epoch": 0.5693789711006354, "grad_norm": 3.811603307723999, "learning_rate": 2.8878249717298423e-06, "loss": 0.3432, "step": 8334 }, { "epoch": 0.5694472911115666, "grad_norm": 4.058487415313721, "learning_rate": 2.8870624258478705e-06, "loss": 0.3381, "step": 8335 }, { "epoch": 0.5695156111224978, "grad_norm": 3.91264009475708, "learning_rate": 2.886299909979826e-06, "loss": 0.256, "step": 8336 }, { "epoch": 0.569583931133429, "grad_norm": 3.6245667934417725, "learning_rate": 2.885537424163047e-06, "loss": 0.2351, "step": 8337 }, { "epoch": 0.5696522511443601, "grad_norm": 3.1568000316619873, "learning_rate": 2.884774968434871e-06, "loss": 0.2458, "step": 8338 }, { "epoch": 0.5697205711552914, "grad_norm": 4.438701629638672, "learning_rate": 2.8840125428326323e-06, "loss": 0.2916, "step": 8339 }, { "epoch": 0.5697888911662226, "grad_norm": 3.850778102874756, "learning_rate": 2.883250147393665e-06, "loss": 0.2746, "step": 8340 }, { "epoch": 0.5698572111771538, "grad_norm": 4.553271293640137, "learning_rate": 2.882487782155302e-06, "loss": 0.3309, "step": 8341 }, { "epoch": 0.569925531188085, "grad_norm": 3.9494612216949463, "learning_rate": 2.8817254471548765e-06, "loss": 0.2888, "step": 8342 }, { "epoch": 0.5699938511990162, "grad_norm": 4.01897668838501, "learning_rate": 2.8809631424297137e-06, "loss": 0.209, "step": 8343 }, { "epoch": 0.5700621712099474, "grad_norm": 4.736063480377197, "learning_rate": 2.8802008680171437e-06, "loss": 0.3398, "step": 8344 }, { "epoch": 0.5701304912208786, "grad_norm": 3.300142288208008, "learning_rate": 2.8794386239544934e-06, "loss": 0.3716, "step": 8345 }, { "epoch": 0.5701988112318098, "grad_norm": 3.9996798038482666, "learning_rate": 2.8786764102790896e-06, "loss": 0.2818, "step": 8346 }, { "epoch": 0.570267131242741, "grad_norm": 3.879720687866211, "learning_rate": 2.877914227028251e-06, "loss": 0.2301, "step": 8347 }, { "epoch": 0.5703354512536722, "grad_norm": 3.0952227115631104, "learning_rate": 2.8771520742393035e-06, "loss": 0.3295, "step": 8348 }, { "epoch": 0.5704037712646034, "grad_norm": 3.3050782680511475, "learning_rate": 2.8763899519495673e-06, "loss": 0.2477, "step": 8349 }, { "epoch": 0.5704720912755346, "grad_norm": 3.7561306953430176, "learning_rate": 2.875627860196361e-06, "loss": 0.2822, "step": 8350 }, { "epoch": 0.5705404112864658, "grad_norm": 4.075669288635254, "learning_rate": 2.874865799017002e-06, "loss": 0.3117, "step": 8351 }, { "epoch": 0.570608731297397, "grad_norm": 2.7932116985321045, "learning_rate": 2.8741037684488066e-06, "loss": 0.2923, "step": 8352 }, { "epoch": 0.5706770513083282, "grad_norm": 2.983262300491333, "learning_rate": 2.8733417685290894e-06, "loss": 0.2945, "step": 8353 }, { "epoch": 0.5707453713192594, "grad_norm": 4.614416599273682, "learning_rate": 2.8725797992951642e-06, "loss": 0.2338, "step": 8354 }, { "epoch": 0.5708136913301907, "grad_norm": 4.925442695617676, "learning_rate": 2.871817860784341e-06, "loss": 0.3439, "step": 8355 }, { "epoch": 0.5708820113411218, "grad_norm": 3.770679235458374, "learning_rate": 2.8710559530339314e-06, "loss": 0.3487, "step": 8356 }, { "epoch": 0.570950331352053, "grad_norm": 5.054464817047119, "learning_rate": 2.870294076081243e-06, "loss": 0.2935, "step": 8357 }, { "epoch": 0.5710186513629842, "grad_norm": 4.198545455932617, "learning_rate": 2.869532229963584e-06, "loss": 0.2291, "step": 8358 }, { "epoch": 0.5710869713739154, "grad_norm": 4.339919090270996, "learning_rate": 2.868770414718259e-06, "loss": 0.2766, "step": 8359 }, { "epoch": 0.5711552913848467, "grad_norm": 6.827444553375244, "learning_rate": 2.8680086303825727e-06, "loss": 0.3556, "step": 8360 }, { "epoch": 0.5712236113957778, "grad_norm": 3.3242809772491455, "learning_rate": 2.8672468769938255e-06, "loss": 0.2345, "step": 8361 }, { "epoch": 0.571291931406709, "grad_norm": 3.1061713695526123, "learning_rate": 2.8664851545893233e-06, "loss": 0.2171, "step": 8362 }, { "epoch": 0.5713602514176402, "grad_norm": 4.567415237426758, "learning_rate": 2.8657234632063605e-06, "loss": 0.3366, "step": 8363 }, { "epoch": 0.5714285714285714, "grad_norm": 5.440858364105225, "learning_rate": 2.8649618028822376e-06, "loss": 0.2608, "step": 8364 }, { "epoch": 0.5714968914395027, "grad_norm": 3.125208854675293, "learning_rate": 2.8642001736542513e-06, "loss": 0.2629, "step": 8365 }, { "epoch": 0.5715652114504338, "grad_norm": 3.2028450965881348, "learning_rate": 2.8634385755596963e-06, "loss": 0.3361, "step": 8366 }, { "epoch": 0.5716335314613651, "grad_norm": 4.591334342956543, "learning_rate": 2.8626770086358653e-06, "loss": 0.3375, "step": 8367 }, { "epoch": 0.5717018514722962, "grad_norm": 3.9640536308288574, "learning_rate": 2.8619154729200504e-06, "loss": 0.3211, "step": 8368 }, { "epoch": 0.5717701714832274, "grad_norm": 3.7356350421905518, "learning_rate": 2.8611539684495425e-06, "loss": 0.2522, "step": 8369 }, { "epoch": 0.5718384914941587, "grad_norm": 3.5746941566467285, "learning_rate": 2.860392495261631e-06, "loss": 0.3073, "step": 8370 }, { "epoch": 0.5719068115050898, "grad_norm": 4.697809219360352, "learning_rate": 2.859631053393601e-06, "loss": 0.3164, "step": 8371 }, { "epoch": 0.5719751315160211, "grad_norm": 3.616635799407959, "learning_rate": 2.85886964288274e-06, "loss": 0.2578, "step": 8372 }, { "epoch": 0.5720434515269522, "grad_norm": 4.767706394195557, "learning_rate": 2.8581082637663325e-06, "loss": 0.2943, "step": 8373 }, { "epoch": 0.5721117715378834, "grad_norm": 4.841503143310547, "learning_rate": 2.857346916081659e-06, "loss": 0.2235, "step": 8374 }, { "epoch": 0.5721800915488147, "grad_norm": 4.4454545974731445, "learning_rate": 2.8565855998660055e-06, "loss": 0.3373, "step": 8375 }, { "epoch": 0.5722484115597458, "grad_norm": 4.464327812194824, "learning_rate": 2.8558243151566465e-06, "loss": 0.3231, "step": 8376 }, { "epoch": 0.5723167315706771, "grad_norm": 4.0377326011657715, "learning_rate": 2.8550630619908614e-06, "loss": 0.2527, "step": 8377 }, { "epoch": 0.5723850515816082, "grad_norm": 3.5071916580200195, "learning_rate": 2.8543018404059285e-06, "loss": 0.2147, "step": 8378 }, { "epoch": 0.5724533715925395, "grad_norm": 3.4800913333892822, "learning_rate": 2.8535406504391228e-06, "loss": 0.2346, "step": 8379 }, { "epoch": 0.5725216916034707, "grad_norm": 3.8579189777374268, "learning_rate": 2.852779492127714e-06, "loss": 0.2194, "step": 8380 }, { "epoch": 0.5725900116144018, "grad_norm": 3.661724090576172, "learning_rate": 2.8520183655089783e-06, "loss": 0.2992, "step": 8381 }, { "epoch": 0.5726583316253331, "grad_norm": 4.014646053314209, "learning_rate": 2.851257270620184e-06, "loss": 0.4044, "step": 8382 }, { "epoch": 0.5727266516362642, "grad_norm": 4.571440696716309, "learning_rate": 2.8504962074986013e-06, "loss": 0.4196, "step": 8383 }, { "epoch": 0.5727949716471955, "grad_norm": 3.7048866748809814, "learning_rate": 2.849735176181495e-06, "loss": 0.2992, "step": 8384 }, { "epoch": 0.5728632916581267, "grad_norm": 4.116926670074463, "learning_rate": 2.8489741767061333e-06, "loss": 0.2674, "step": 8385 }, { "epoch": 0.5729316116690578, "grad_norm": 3.218125104904175, "learning_rate": 2.8482132091097785e-06, "loss": 0.21, "step": 8386 }, { "epoch": 0.5729999316799891, "grad_norm": 4.605668544769287, "learning_rate": 2.8474522734296954e-06, "loss": 0.2397, "step": 8387 }, { "epoch": 0.5730682516909202, "grad_norm": 4.645671844482422, "learning_rate": 2.846691369703142e-06, "loss": 0.2687, "step": 8388 }, { "epoch": 0.5731365717018515, "grad_norm": 2.8231537342071533, "learning_rate": 2.84593049796738e-06, "loss": 0.2618, "step": 8389 }, { "epoch": 0.5732048917127827, "grad_norm": 3.29311466217041, "learning_rate": 2.845169658259666e-06, "loss": 0.2732, "step": 8390 }, { "epoch": 0.5732732117237139, "grad_norm": 3.3921217918395996, "learning_rate": 2.844408850617258e-06, "loss": 0.3344, "step": 8391 }, { "epoch": 0.5733415317346451, "grad_norm": 4.7686767578125, "learning_rate": 2.843648075077409e-06, "loss": 0.384, "step": 8392 }, { "epoch": 0.5734098517455762, "grad_norm": 3.5439653396606445, "learning_rate": 2.8428873316773725e-06, "loss": 0.1889, "step": 8393 }, { "epoch": 0.5734781717565075, "grad_norm": 4.365605354309082, "learning_rate": 2.842126620454399e-06, "loss": 0.2647, "step": 8394 }, { "epoch": 0.5735464917674387, "grad_norm": 3.232222318649292, "learning_rate": 2.8413659414457426e-06, "loss": 0.268, "step": 8395 }, { "epoch": 0.5736148117783699, "grad_norm": 3.978415012359619, "learning_rate": 2.8406052946886474e-06, "loss": 0.2941, "step": 8396 }, { "epoch": 0.5736831317893011, "grad_norm": 3.8958566188812256, "learning_rate": 2.8398446802203617e-06, "loss": 0.3753, "step": 8397 }, { "epoch": 0.5737514518002322, "grad_norm": 4.312788963317871, "learning_rate": 2.839084098078131e-06, "loss": 0.3153, "step": 8398 }, { "epoch": 0.5738197718111635, "grad_norm": 3.473954916000366, "learning_rate": 2.8383235482992007e-06, "loss": 0.315, "step": 8399 }, { "epoch": 0.5738880918220947, "grad_norm": 4.490472316741943, "learning_rate": 2.8375630309208088e-06, "loss": 0.2924, "step": 8400 }, { "epoch": 0.5739564118330259, "grad_norm": 4.415393829345703, "learning_rate": 2.8368025459801987e-06, "loss": 0.3891, "step": 8401 }, { "epoch": 0.5740247318439571, "grad_norm": 4.010298252105713, "learning_rate": 2.8360420935146087e-06, "loss": 0.3436, "step": 8402 }, { "epoch": 0.5740930518548883, "grad_norm": 3.2451205253601074, "learning_rate": 2.8352816735612776e-06, "loss": 0.2979, "step": 8403 }, { "epoch": 0.5741613718658195, "grad_norm": 4.508078098297119, "learning_rate": 2.834521286157438e-06, "loss": 0.2688, "step": 8404 }, { "epoch": 0.5742296918767507, "grad_norm": 3.30098819732666, "learning_rate": 2.8337609313403265e-06, "loss": 0.2715, "step": 8405 }, { "epoch": 0.5742980118876819, "grad_norm": 5.113598823547363, "learning_rate": 2.8330006091471743e-06, "loss": 0.2939, "step": 8406 }, { "epoch": 0.5743663318986131, "grad_norm": 3.8220953941345215, "learning_rate": 2.832240319615215e-06, "loss": 0.3903, "step": 8407 }, { "epoch": 0.5744346519095443, "grad_norm": 4.243350982666016, "learning_rate": 2.831480062781674e-06, "loss": 0.2807, "step": 8408 }, { "epoch": 0.5745029719204755, "grad_norm": 4.271158695220947, "learning_rate": 2.8307198386837813e-06, "loss": 0.3366, "step": 8409 }, { "epoch": 0.5745712919314067, "grad_norm": 3.570122480392456, "learning_rate": 2.8299596473587623e-06, "loss": 0.2749, "step": 8410 }, { "epoch": 0.5746396119423379, "grad_norm": 3.478487968444824, "learning_rate": 2.829199488843844e-06, "loss": 0.3366, "step": 8411 }, { "epoch": 0.5747079319532691, "grad_norm": 3.5360357761383057, "learning_rate": 2.828439363176246e-06, "loss": 0.1895, "step": 8412 }, { "epoch": 0.5747762519642003, "grad_norm": 3.507383108139038, "learning_rate": 2.8276792703931906e-06, "loss": 0.2681, "step": 8413 }, { "epoch": 0.5748445719751315, "grad_norm": 4.808208465576172, "learning_rate": 2.8269192105318986e-06, "loss": 0.3856, "step": 8414 }, { "epoch": 0.5749128919860628, "grad_norm": 3.5593998432159424, "learning_rate": 2.8261591836295886e-06, "loss": 0.2921, "step": 8415 }, { "epoch": 0.5749812119969939, "grad_norm": 4.394647598266602, "learning_rate": 2.825399189723474e-06, "loss": 0.3294, "step": 8416 }, { "epoch": 0.5750495320079251, "grad_norm": 4.177145004272461, "learning_rate": 2.8246392288507726e-06, "loss": 0.3042, "step": 8417 }, { "epoch": 0.5751178520188563, "grad_norm": 3.6267383098602295, "learning_rate": 2.8238793010486963e-06, "loss": 0.293, "step": 8418 }, { "epoch": 0.5751861720297875, "grad_norm": 3.7457878589630127, "learning_rate": 2.8231194063544583e-06, "loss": 0.304, "step": 8419 }, { "epoch": 0.5752544920407188, "grad_norm": 3.915173292160034, "learning_rate": 2.822359544805267e-06, "loss": 0.3313, "step": 8420 }, { "epoch": 0.5753228120516499, "grad_norm": 3.854154109954834, "learning_rate": 2.8215997164383307e-06, "loss": 0.3273, "step": 8421 }, { "epoch": 0.5753911320625811, "grad_norm": 3.2673592567443848, "learning_rate": 2.8208399212908567e-06, "loss": 0.3085, "step": 8422 }, { "epoch": 0.5754594520735123, "grad_norm": 3.734842538833618, "learning_rate": 2.8200801594000516e-06, "loss": 0.264, "step": 8423 }, { "epoch": 0.5755277720844435, "grad_norm": 3.166327953338623, "learning_rate": 2.8193204308031165e-06, "loss": 0.2598, "step": 8424 }, { "epoch": 0.5755960920953748, "grad_norm": 3.5334038734436035, "learning_rate": 2.8185607355372543e-06, "loss": 0.265, "step": 8425 }, { "epoch": 0.5756644121063059, "grad_norm": 4.0504937171936035, "learning_rate": 2.8178010736396656e-06, "loss": 0.3172, "step": 8426 }, { "epoch": 0.5757327321172372, "grad_norm": 5.691551208496094, "learning_rate": 2.817041445147549e-06, "loss": 0.4892, "step": 8427 }, { "epoch": 0.5758010521281683, "grad_norm": 3.6132140159606934, "learning_rate": 2.816281850098101e-06, "loss": 0.3003, "step": 8428 }, { "epoch": 0.5758693721390995, "grad_norm": 3.7505228519439697, "learning_rate": 2.8155222885285167e-06, "loss": 0.2531, "step": 8429 }, { "epoch": 0.5759376921500308, "grad_norm": 3.279423475265503, "learning_rate": 2.81476276047599e-06, "loss": 0.2532, "step": 8430 }, { "epoch": 0.5760060121609619, "grad_norm": 4.696456432342529, "learning_rate": 2.8140032659777153e-06, "loss": 0.4042, "step": 8431 }, { "epoch": 0.5760743321718932, "grad_norm": 3.6356682777404785, "learning_rate": 2.8132438050708797e-06, "loss": 0.3481, "step": 8432 }, { "epoch": 0.5761426521828243, "grad_norm": 2.875021457672119, "learning_rate": 2.8124843777926726e-06, "loss": 0.3053, "step": 8433 }, { "epoch": 0.5762109721937555, "grad_norm": 6.255266189575195, "learning_rate": 2.811724984180282e-06, "loss": 0.3293, "step": 8434 }, { "epoch": 0.5762792922046868, "grad_norm": 5.88186502456665, "learning_rate": 2.810965624270896e-06, "loss": 0.3464, "step": 8435 }, { "epoch": 0.5763476122156179, "grad_norm": 4.837267875671387, "learning_rate": 2.8102062981016926e-06, "loss": 0.4073, "step": 8436 }, { "epoch": 0.5764159322265492, "grad_norm": 2.8632266521453857, "learning_rate": 2.8094470057098575e-06, "loss": 0.2437, "step": 8437 }, { "epoch": 0.5764842522374803, "grad_norm": 5.317376136779785, "learning_rate": 2.808687747132571e-06, "loss": 0.3067, "step": 8438 }, { "epoch": 0.5765525722484116, "grad_norm": 3.7476203441619873, "learning_rate": 2.8079285224070126e-06, "loss": 0.2832, "step": 8439 }, { "epoch": 0.5766208922593428, "grad_norm": 3.4897470474243164, "learning_rate": 2.8071693315703583e-06, "loss": 0.2567, "step": 8440 }, { "epoch": 0.5766892122702739, "grad_norm": 5.446495056152344, "learning_rate": 2.806410174659784e-06, "loss": 0.4053, "step": 8441 }, { "epoch": 0.5767575322812052, "grad_norm": 3.894434928894043, "learning_rate": 2.805651051712463e-06, "loss": 0.2765, "step": 8442 }, { "epoch": 0.5768258522921363, "grad_norm": 5.531675815582275, "learning_rate": 2.804891962765569e-06, "loss": 0.3168, "step": 8443 }, { "epoch": 0.5768941723030676, "grad_norm": 3.7616708278656006, "learning_rate": 2.804132907856272e-06, "loss": 0.4254, "step": 8444 }, { "epoch": 0.5769624923139988, "grad_norm": 3.318983793258667, "learning_rate": 2.80337388702174e-06, "loss": 0.2881, "step": 8445 }, { "epoch": 0.5770308123249299, "grad_norm": 4.025074481964111, "learning_rate": 2.802614900299141e-06, "loss": 0.2344, "step": 8446 }, { "epoch": 0.5770991323358612, "grad_norm": 4.514859676361084, "learning_rate": 2.8018559477256398e-06, "loss": 0.3288, "step": 8447 }, { "epoch": 0.5771674523467923, "grad_norm": 5.604260444641113, "learning_rate": 2.8010970293384027e-06, "loss": 0.39, "step": 8448 }, { "epoch": 0.5772357723577236, "grad_norm": 7.456281661987305, "learning_rate": 2.8003381451745895e-06, "loss": 0.2762, "step": 8449 }, { "epoch": 0.5773040923686548, "grad_norm": 4.265867710113525, "learning_rate": 2.7995792952713604e-06, "loss": 0.2991, "step": 8450 }, { "epoch": 0.577372412379586, "grad_norm": 3.1470627784729004, "learning_rate": 2.7988204796658755e-06, "loss": 0.4076, "step": 8451 }, { "epoch": 0.5774407323905172, "grad_norm": 5.036750793457031, "learning_rate": 2.7980616983952935e-06, "loss": 0.3594, "step": 8452 }, { "epoch": 0.5775090524014483, "grad_norm": 4.090817451477051, "learning_rate": 2.7973029514967667e-06, "loss": 0.3803, "step": 8453 }, { "epoch": 0.5775773724123796, "grad_norm": 3.7541663646698, "learning_rate": 2.796544239007451e-06, "loss": 0.2305, "step": 8454 }, { "epoch": 0.5776456924233108, "grad_norm": 3.84517765045166, "learning_rate": 2.7957855609644975e-06, "loss": 0.2902, "step": 8455 }, { "epoch": 0.577714012434242, "grad_norm": 2.8112857341766357, "learning_rate": 2.795026917405058e-06, "loss": 0.2572, "step": 8456 }, { "epoch": 0.5777823324451732, "grad_norm": 4.382497310638428, "learning_rate": 2.79426830836628e-06, "loss": 0.2736, "step": 8457 }, { "epoch": 0.5778506524561043, "grad_norm": 3.6518399715423584, "learning_rate": 2.79350973388531e-06, "loss": 0.2717, "step": 8458 }, { "epoch": 0.5779189724670356, "grad_norm": 4.194064617156982, "learning_rate": 2.792751193999295e-06, "loss": 0.3498, "step": 8459 }, { "epoch": 0.5779872924779668, "grad_norm": 3.3736696243286133, "learning_rate": 2.7919926887453787e-06, "loss": 0.3226, "step": 8460 }, { "epoch": 0.578055612488898, "grad_norm": 4.82313871383667, "learning_rate": 2.7912342181607015e-06, "loss": 0.2639, "step": 8461 }, { "epoch": 0.5781239324998292, "grad_norm": 4.740266799926758, "learning_rate": 2.790475782282405e-06, "loss": 0.4778, "step": 8462 }, { "epoch": 0.5781922525107605, "grad_norm": 3.5752789974212646, "learning_rate": 2.789717381147626e-06, "loss": 0.2706, "step": 8463 }, { "epoch": 0.5782605725216916, "grad_norm": 4.033843040466309, "learning_rate": 2.7889590147935056e-06, "loss": 0.3944, "step": 8464 }, { "epoch": 0.5783288925326228, "grad_norm": 3.6938226222991943, "learning_rate": 2.788200683257174e-06, "loss": 0.3744, "step": 8465 }, { "epoch": 0.578397212543554, "grad_norm": 3.663182258605957, "learning_rate": 2.787442386575767e-06, "loss": 0.2733, "step": 8466 }, { "epoch": 0.5784655325544852, "grad_norm": 3.328775644302368, "learning_rate": 2.7866841247864162e-06, "loss": 0.2652, "step": 8467 }, { "epoch": 0.5785338525654165, "grad_norm": 4.148248195648193, "learning_rate": 2.7859258979262535e-06, "loss": 0.3613, "step": 8468 }, { "epoch": 0.5786021725763476, "grad_norm": 4.1940083503723145, "learning_rate": 2.7851677060324032e-06, "loss": 0.3163, "step": 8469 }, { "epoch": 0.5786704925872788, "grad_norm": 3.069688081741333, "learning_rate": 2.784409549141995e-06, "loss": 0.2452, "step": 8470 }, { "epoch": 0.57873881259821, "grad_norm": 4.015169143676758, "learning_rate": 2.783651427292153e-06, "loss": 0.2914, "step": 8471 }, { "epoch": 0.5788071326091412, "grad_norm": 4.894075393676758, "learning_rate": 2.782893340520001e-06, "loss": 0.3769, "step": 8472 }, { "epoch": 0.5788754526200725, "grad_norm": 3.9208950996398926, "learning_rate": 2.7821352888626596e-06, "loss": 0.2915, "step": 8473 }, { "epoch": 0.5789437726310036, "grad_norm": 3.636904001235962, "learning_rate": 2.7813772723572487e-06, "loss": 0.3649, "step": 8474 }, { "epoch": 0.5790120926419349, "grad_norm": 3.0721395015716553, "learning_rate": 2.7806192910408866e-06, "loss": 0.2556, "step": 8475 }, { "epoch": 0.579080412652866, "grad_norm": 4.436734199523926, "learning_rate": 2.77986134495069e-06, "loss": 0.2023, "step": 8476 }, { "epoch": 0.5791487326637972, "grad_norm": 3.423868179321289, "learning_rate": 2.779103434123773e-06, "loss": 0.2762, "step": 8477 }, { "epoch": 0.5792170526747284, "grad_norm": 4.724982261657715, "learning_rate": 2.7783455585972485e-06, "loss": 0.352, "step": 8478 }, { "epoch": 0.5792853726856596, "grad_norm": 4.01289176940918, "learning_rate": 2.777587718408228e-06, "loss": 0.3705, "step": 8479 }, { "epoch": 0.5793536926965909, "grad_norm": 2.672667980194092, "learning_rate": 2.7768299135938203e-06, "loss": 0.2017, "step": 8480 }, { "epoch": 0.579422012707522, "grad_norm": 3.7852015495300293, "learning_rate": 2.776072144191134e-06, "loss": 0.273, "step": 8481 }, { "epoch": 0.5794903327184532, "grad_norm": 3.6749346256256104, "learning_rate": 2.7753144102372737e-06, "loss": 0.2812, "step": 8482 }, { "epoch": 0.5795586527293844, "grad_norm": 4.615044116973877, "learning_rate": 2.7745567117693442e-06, "loss": 0.3779, "step": 8483 }, { "epoch": 0.5796269727403156, "grad_norm": 4.934029579162598, "learning_rate": 2.7737990488244506e-06, "loss": 0.3263, "step": 8484 }, { "epoch": 0.5796952927512469, "grad_norm": 4.356973648071289, "learning_rate": 2.7730414214396895e-06, "loss": 0.2853, "step": 8485 }, { "epoch": 0.579763612762178, "grad_norm": 4.376649379730225, "learning_rate": 2.772283829652161e-06, "loss": 0.355, "step": 8486 }, { "epoch": 0.5798319327731093, "grad_norm": 3.307690382003784, "learning_rate": 2.7715262734989634e-06, "loss": 0.2345, "step": 8487 }, { "epoch": 0.5799002527840404, "grad_norm": 4.033560276031494, "learning_rate": 2.770768753017194e-06, "loss": 0.3274, "step": 8488 }, { "epoch": 0.5799685727949716, "grad_norm": 4.17089319229126, "learning_rate": 2.7700112682439416e-06, "loss": 0.2695, "step": 8489 }, { "epoch": 0.5800368928059029, "grad_norm": 3.599290132522583, "learning_rate": 2.7692538192163023e-06, "loss": 0.2826, "step": 8490 }, { "epoch": 0.580105212816834, "grad_norm": 4.464461803436279, "learning_rate": 2.768496405971365e-06, "loss": 0.4081, "step": 8491 }, { "epoch": 0.5801735328277653, "grad_norm": 3.3864927291870117, "learning_rate": 2.767739028546219e-06, "loss": 0.2573, "step": 8492 }, { "epoch": 0.5802418528386964, "grad_norm": 3.1420695781707764, "learning_rate": 2.7669816869779497e-06, "loss": 0.3206, "step": 8493 }, { "epoch": 0.5803101728496276, "grad_norm": 4.800455093383789, "learning_rate": 2.766224381303643e-06, "loss": 0.3838, "step": 8494 }, { "epoch": 0.5803784928605589, "grad_norm": 4.321641445159912, "learning_rate": 2.7654671115603816e-06, "loss": 0.2832, "step": 8495 }, { "epoch": 0.58044681287149, "grad_norm": 3.2783961296081543, "learning_rate": 2.764709877785248e-06, "loss": 0.2516, "step": 8496 }, { "epoch": 0.5805151328824213, "grad_norm": 3.1933727264404297, "learning_rate": 2.763952680015321e-06, "loss": 0.2291, "step": 8497 }, { "epoch": 0.5805834528933524, "grad_norm": 2.835649013519287, "learning_rate": 2.7631955182876788e-06, "loss": 0.2473, "step": 8498 }, { "epoch": 0.5806517729042837, "grad_norm": 4.187294960021973, "learning_rate": 2.7624383926393976e-06, "loss": 0.3764, "step": 8499 }, { "epoch": 0.5807200929152149, "grad_norm": 4.832720756530762, "learning_rate": 2.7616813031075526e-06, "loss": 0.3223, "step": 8500 }, { "epoch": 0.580788412926146, "grad_norm": 5.758338928222656, "learning_rate": 2.7609242497292148e-06, "loss": 0.3213, "step": 8501 }, { "epoch": 0.5808567329370773, "grad_norm": 4.124735355377197, "learning_rate": 2.7601672325414552e-06, "loss": 0.2993, "step": 8502 }, { "epoch": 0.5809250529480084, "grad_norm": 4.147117614746094, "learning_rate": 2.759410251581345e-06, "loss": 0.3247, "step": 8503 }, { "epoch": 0.5809933729589397, "grad_norm": 3.4735982418060303, "learning_rate": 2.7586533068859518e-06, "loss": 0.2797, "step": 8504 }, { "epoch": 0.5810616929698709, "grad_norm": 5.16066837310791, "learning_rate": 2.7578963984923372e-06, "loss": 0.2414, "step": 8505 }, { "epoch": 0.581130012980802, "grad_norm": 3.532155990600586, "learning_rate": 2.7571395264375677e-06, "loss": 0.1686, "step": 8506 }, { "epoch": 0.5811983329917333, "grad_norm": 2.9753620624542236, "learning_rate": 2.756382690758706e-06, "loss": 0.2148, "step": 8507 }, { "epoch": 0.5812666530026644, "grad_norm": 3.6568093299865723, "learning_rate": 2.7556258914928113e-06, "loss": 0.2202, "step": 8508 }, { "epoch": 0.5813349730135957, "grad_norm": 3.7106754779815674, "learning_rate": 2.7548691286769427e-06, "loss": 0.3266, "step": 8509 }, { "epoch": 0.5814032930245269, "grad_norm": 4.087608814239502, "learning_rate": 2.754112402348156e-06, "loss": 0.3202, "step": 8510 }, { "epoch": 0.5814716130354581, "grad_norm": 3.811718702316284, "learning_rate": 2.7533557125435064e-06, "loss": 0.3403, "step": 8511 }, { "epoch": 0.5815399330463893, "grad_norm": 3.8458094596862793, "learning_rate": 2.752599059300047e-06, "loss": 0.2881, "step": 8512 }, { "epoch": 0.5816082530573204, "grad_norm": 5.09836483001709, "learning_rate": 2.751842442654829e-06, "loss": 0.4098, "step": 8513 }, { "epoch": 0.5816765730682517, "grad_norm": 3.3022255897521973, "learning_rate": 2.751085862644902e-06, "loss": 0.2418, "step": 8514 }, { "epoch": 0.5817448930791829, "grad_norm": 4.565221309661865, "learning_rate": 2.750329319307314e-06, "loss": 0.4082, "step": 8515 }, { "epoch": 0.5818132130901141, "grad_norm": 4.167186737060547, "learning_rate": 2.7495728126791095e-06, "loss": 0.2365, "step": 8516 }, { "epoch": 0.5818815331010453, "grad_norm": 4.798982620239258, "learning_rate": 2.7488163427973363e-06, "loss": 0.3593, "step": 8517 }, { "epoch": 0.5819498531119764, "grad_norm": 4.211785316467285, "learning_rate": 2.7480599096990327e-06, "loss": 0.2454, "step": 8518 }, { "epoch": 0.5820181731229077, "grad_norm": 4.004752159118652, "learning_rate": 2.74730351342124e-06, "loss": 0.3188, "step": 8519 }, { "epoch": 0.5820864931338389, "grad_norm": 3.8294241428375244, "learning_rate": 2.7465471540009983e-06, "loss": 0.3593, "step": 8520 }, { "epoch": 0.5821548131447701, "grad_norm": 3.6952149868011475, "learning_rate": 2.7457908314753455e-06, "loss": 0.3056, "step": 8521 }, { "epoch": 0.5822231331557013, "grad_norm": 3.9567055702209473, "learning_rate": 2.745034545881313e-06, "loss": 0.2951, "step": 8522 }, { "epoch": 0.5822914531666326, "grad_norm": 4.174036026000977, "learning_rate": 2.744278297255937e-06, "loss": 0.2437, "step": 8523 }, { "epoch": 0.5823597731775637, "grad_norm": 3.1864850521087646, "learning_rate": 2.7435220856362476e-06, "loss": 0.3502, "step": 8524 }, { "epoch": 0.5824280931884949, "grad_norm": 2.9786083698272705, "learning_rate": 2.7427659110592766e-06, "loss": 0.3219, "step": 8525 }, { "epoch": 0.5824964131994261, "grad_norm": 5.191247940063477, "learning_rate": 2.7420097735620487e-06, "loss": 0.3583, "step": 8526 }, { "epoch": 0.5825647332103573, "grad_norm": 3.0199315547943115, "learning_rate": 2.7412536731815927e-06, "loss": 0.2345, "step": 8527 }, { "epoch": 0.5826330532212886, "grad_norm": 3.1406633853912354, "learning_rate": 2.740497609954931e-06, "loss": 0.2604, "step": 8528 }, { "epoch": 0.5827013732322197, "grad_norm": 3.8589038848876953, "learning_rate": 2.7397415839190875e-06, "loss": 0.2458, "step": 8529 }, { "epoch": 0.5827696932431509, "grad_norm": 3.536287546157837, "learning_rate": 2.738985595111081e-06, "loss": 0.2973, "step": 8530 }, { "epoch": 0.5828380132540821, "grad_norm": 3.393603563308716, "learning_rate": 2.7382296435679317e-06, "loss": 0.3015, "step": 8531 }, { "epoch": 0.5829063332650133, "grad_norm": 2.555185317993164, "learning_rate": 2.737473729326656e-06, "loss": 0.2036, "step": 8532 }, { "epoch": 0.5829746532759446, "grad_norm": 5.114118576049805, "learning_rate": 2.73671785242427e-06, "loss": 0.3852, "step": 8533 }, { "epoch": 0.5830429732868757, "grad_norm": 3.8754711151123047, "learning_rate": 2.735962012897785e-06, "loss": 0.3411, "step": 8534 }, { "epoch": 0.583111293297807, "grad_norm": 3.856919050216675, "learning_rate": 2.735206210784214e-06, "loss": 0.3389, "step": 8535 }, { "epoch": 0.5831796133087381, "grad_norm": 3.614560604095459, "learning_rate": 2.7344504461205652e-06, "loss": 0.2452, "step": 8536 }, { "epoch": 0.5832479333196693, "grad_norm": 3.946709394454956, "learning_rate": 2.7336947189438495e-06, "loss": 0.2724, "step": 8537 }, { "epoch": 0.5833162533306006, "grad_norm": 3.9430508613586426, "learning_rate": 2.7329390292910695e-06, "loss": 0.2595, "step": 8538 }, { "epoch": 0.5833845733415317, "grad_norm": 3.930422306060791, "learning_rate": 2.73218337719923e-06, "loss": 0.2507, "step": 8539 }, { "epoch": 0.583452893352463, "grad_norm": 3.6400046348571777, "learning_rate": 2.731427762705334e-06, "loss": 0.3699, "step": 8540 }, { "epoch": 0.5835212133633941, "grad_norm": 2.9272282123565674, "learning_rate": 2.730672185846384e-06, "loss": 0.2514, "step": 8541 }, { "epoch": 0.5835895333743253, "grad_norm": 3.3787126541137695, "learning_rate": 2.7299166466593736e-06, "loss": 0.2034, "step": 8542 }, { "epoch": 0.5836578533852566, "grad_norm": 4.234578609466553, "learning_rate": 2.729161145181303e-06, "loss": 0.3003, "step": 8543 }, { "epoch": 0.5837261733961877, "grad_norm": 2.916273593902588, "learning_rate": 2.728405681449167e-06, "loss": 0.2655, "step": 8544 }, { "epoch": 0.583794493407119, "grad_norm": 3.443572521209717, "learning_rate": 2.7276502554999585e-06, "loss": 0.2342, "step": 8545 }, { "epoch": 0.5838628134180501, "grad_norm": 4.790826320648193, "learning_rate": 2.7268948673706673e-06, "loss": 0.3199, "step": 8546 }, { "epoch": 0.5839311334289814, "grad_norm": 4.008310794830322, "learning_rate": 2.726139517098284e-06, "loss": 0.3052, "step": 8547 }, { "epoch": 0.5839994534399126, "grad_norm": 3.6363532543182373, "learning_rate": 2.7253842047197955e-06, "loss": 0.3247, "step": 8548 }, { "epoch": 0.5840677734508437, "grad_norm": 3.919520854949951, "learning_rate": 2.7246289302721892e-06, "loss": 0.2556, "step": 8549 }, { "epoch": 0.584136093461775, "grad_norm": 3.925187587738037, "learning_rate": 2.723873693792446e-06, "loss": 0.2344, "step": 8550 }, { "epoch": 0.5842044134727061, "grad_norm": 4.873880386352539, "learning_rate": 2.7231184953175494e-06, "loss": 0.4047, "step": 8551 }, { "epoch": 0.5842727334836374, "grad_norm": 3.8215131759643555, "learning_rate": 2.7223633348844787e-06, "loss": 0.3183, "step": 8552 }, { "epoch": 0.5843410534945686, "grad_norm": 3.293569326400757, "learning_rate": 2.721608212530215e-06, "loss": 0.257, "step": 8553 }, { "epoch": 0.5844093735054997, "grad_norm": 4.397093772888184, "learning_rate": 2.7208531282917316e-06, "loss": 0.3058, "step": 8554 }, { "epoch": 0.584477693516431, "grad_norm": 3.2043797969818115, "learning_rate": 2.7200980822060023e-06, "loss": 0.2486, "step": 8555 }, { "epoch": 0.5845460135273621, "grad_norm": 4.286147117614746, "learning_rate": 2.7193430743100023e-06, "loss": 0.4104, "step": 8556 }, { "epoch": 0.5846143335382934, "grad_norm": 4.934279918670654, "learning_rate": 2.7185881046407022e-06, "loss": 0.2354, "step": 8557 }, { "epoch": 0.5846826535492246, "grad_norm": 4.991907596588135, "learning_rate": 2.717833173235068e-06, "loss": 0.3163, "step": 8558 }, { "epoch": 0.5847509735601558, "grad_norm": 3.6402089595794678, "learning_rate": 2.7170782801300693e-06, "loss": 0.2684, "step": 8559 }, { "epoch": 0.584819293571087, "grad_norm": 3.8961069583892822, "learning_rate": 2.7163234253626706e-06, "loss": 0.2875, "step": 8560 }, { "epoch": 0.5848876135820181, "grad_norm": 4.29252815246582, "learning_rate": 2.7155686089698353e-06, "loss": 0.4989, "step": 8561 }, { "epoch": 0.5849559335929494, "grad_norm": 4.565150737762451, "learning_rate": 2.714813830988524e-06, "loss": 0.3203, "step": 8562 }, { "epoch": 0.5850242536038806, "grad_norm": 2.872645616531372, "learning_rate": 2.714059091455697e-06, "loss": 0.1952, "step": 8563 }, { "epoch": 0.5850925736148118, "grad_norm": 5.686465263366699, "learning_rate": 2.713304390408311e-06, "loss": 0.3041, "step": 8564 }, { "epoch": 0.585160893625743, "grad_norm": 3.8094608783721924, "learning_rate": 2.712549727883323e-06, "loss": 0.308, "step": 8565 }, { "epoch": 0.5852292136366741, "grad_norm": 2.5145881175994873, "learning_rate": 2.711795103917686e-06, "loss": 0.2675, "step": 8566 }, { "epoch": 0.5852975336476054, "grad_norm": 2.442230463027954, "learning_rate": 2.711040518548351e-06, "loss": 0.1797, "step": 8567 }, { "epoch": 0.5853658536585366, "grad_norm": 4.103476047515869, "learning_rate": 2.71028597181227e-06, "loss": 0.2879, "step": 8568 }, { "epoch": 0.5854341736694678, "grad_norm": 5.479560375213623, "learning_rate": 2.70953146374639e-06, "loss": 0.2484, "step": 8569 }, { "epoch": 0.585502493680399, "grad_norm": 3.358517646789551, "learning_rate": 2.7087769943876577e-06, "loss": 0.2202, "step": 8570 }, { "epoch": 0.5855708136913302, "grad_norm": 3.738393783569336, "learning_rate": 2.7080225637730166e-06, "loss": 0.2511, "step": 8571 }, { "epoch": 0.5856391337022614, "grad_norm": 2.947716236114502, "learning_rate": 2.707268171939409e-06, "loss": 0.217, "step": 8572 }, { "epoch": 0.5857074537131925, "grad_norm": 5.296523094177246, "learning_rate": 2.706513818923777e-06, "loss": 0.3041, "step": 8573 }, { "epoch": 0.5857757737241238, "grad_norm": 5.966710090637207, "learning_rate": 2.7057595047630607e-06, "loss": 0.4432, "step": 8574 }, { "epoch": 0.585844093735055, "grad_norm": 4.020462989807129, "learning_rate": 2.7050052294941914e-06, "loss": 0.2173, "step": 8575 }, { "epoch": 0.5859124137459862, "grad_norm": 4.092427730560303, "learning_rate": 2.704250993154109e-06, "loss": 0.2941, "step": 8576 }, { "epoch": 0.5859807337569174, "grad_norm": 3.148409605026245, "learning_rate": 2.703496795779744e-06, "loss": 0.2488, "step": 8577 }, { "epoch": 0.5860490537678485, "grad_norm": 4.48142671585083, "learning_rate": 2.702742637408029e-06, "loss": 0.3965, "step": 8578 }, { "epoch": 0.5861173737787798, "grad_norm": 3.809039831161499, "learning_rate": 2.701988518075892e-06, "loss": 0.3162, "step": 8579 }, { "epoch": 0.586185693789711, "grad_norm": 3.835590124130249, "learning_rate": 2.70123443782026e-06, "loss": 0.3341, "step": 8580 }, { "epoch": 0.5862540138006422, "grad_norm": 4.196529865264893, "learning_rate": 2.7004803966780586e-06, "loss": 0.2752, "step": 8581 }, { "epoch": 0.5863223338115734, "grad_norm": 4.413631916046143, "learning_rate": 2.6997263946862123e-06, "loss": 0.3292, "step": 8582 }, { "epoch": 0.5863906538225047, "grad_norm": 5.769351959228516, "learning_rate": 2.698972431881641e-06, "loss": 0.5224, "step": 8583 }, { "epoch": 0.5864589738334358, "grad_norm": 3.886012077331543, "learning_rate": 2.6982185083012655e-06, "loss": 0.3213, "step": 8584 }, { "epoch": 0.586527293844367, "grad_norm": 3.3001503944396973, "learning_rate": 2.6974646239820023e-06, "loss": 0.3462, "step": 8585 }, { "epoch": 0.5865956138552982, "grad_norm": 4.520765781402588, "learning_rate": 2.6967107789607686e-06, "loss": 0.3601, "step": 8586 }, { "epoch": 0.5866639338662294, "grad_norm": 4.0073723793029785, "learning_rate": 2.6959569732744766e-06, "loss": 0.3052, "step": 8587 }, { "epoch": 0.5867322538771607, "grad_norm": 4.912604808807373, "learning_rate": 2.6952032069600386e-06, "loss": 0.3575, "step": 8588 }, { "epoch": 0.5868005738880918, "grad_norm": 4.078853607177734, "learning_rate": 2.694449480054364e-06, "loss": 0.2582, "step": 8589 }, { "epoch": 0.586868893899023, "grad_norm": 3.542900800704956, "learning_rate": 2.6936957925943644e-06, "loss": 0.2405, "step": 8590 }, { "epoch": 0.5869372139099542, "grad_norm": 3.4828150272369385, "learning_rate": 2.6929421446169403e-06, "loss": 0.3174, "step": 8591 }, { "epoch": 0.5870055339208854, "grad_norm": 3.424858808517456, "learning_rate": 2.6921885361589993e-06, "loss": 0.2844, "step": 8592 }, { "epoch": 0.5870738539318167, "grad_norm": 4.655350685119629, "learning_rate": 2.6914349672574435e-06, "loss": 0.4035, "step": 8593 }, { "epoch": 0.5871421739427478, "grad_norm": 4.024553298950195, "learning_rate": 2.6906814379491727e-06, "loss": 0.2851, "step": 8594 }, { "epoch": 0.5872104939536791, "grad_norm": 3.514921188354492, "learning_rate": 2.689927948271085e-06, "loss": 0.3137, "step": 8595 }, { "epoch": 0.5872788139646102, "grad_norm": 4.255675792694092, "learning_rate": 2.689174498260076e-06, "loss": 0.3143, "step": 8596 }, { "epoch": 0.5873471339755414, "grad_norm": 3.1326751708984375, "learning_rate": 2.6884210879530416e-06, "loss": 0.2173, "step": 8597 }, { "epoch": 0.5874154539864727, "grad_norm": 3.3863513469696045, "learning_rate": 2.6876677173868745e-06, "loss": 0.2501, "step": 8598 }, { "epoch": 0.5874837739974038, "grad_norm": 4.056747913360596, "learning_rate": 2.686914386598464e-06, "loss": 0.3301, "step": 8599 }, { "epoch": 0.5875520940083351, "grad_norm": 3.397736072540283, "learning_rate": 2.686161095624699e-06, "loss": 0.284, "step": 8600 }, { "epoch": 0.5876204140192662, "grad_norm": 2.796696662902832, "learning_rate": 2.685407844502466e-06, "loss": 0.2792, "step": 8601 }, { "epoch": 0.5876887340301974, "grad_norm": 3.40187406539917, "learning_rate": 2.6846546332686513e-06, "loss": 0.3893, "step": 8602 }, { "epoch": 0.5877570540411287, "grad_norm": 4.305060386657715, "learning_rate": 2.6839014619601355e-06, "loss": 0.3211, "step": 8603 }, { "epoch": 0.5878253740520598, "grad_norm": 3.902724027633667, "learning_rate": 2.6831483306138007e-06, "loss": 0.2689, "step": 8604 }, { "epoch": 0.5878936940629911, "grad_norm": 4.790149688720703, "learning_rate": 2.6823952392665246e-06, "loss": 0.3958, "step": 8605 }, { "epoch": 0.5879620140739222, "grad_norm": 3.862776279449463, "learning_rate": 2.681642187955187e-06, "loss": 0.2981, "step": 8606 }, { "epoch": 0.5880303340848535, "grad_norm": 4.122937202453613, "learning_rate": 2.680889176716659e-06, "loss": 0.3549, "step": 8607 }, { "epoch": 0.5880986540957847, "grad_norm": 4.606778144836426, "learning_rate": 2.6801362055878145e-06, "loss": 0.3366, "step": 8608 }, { "epoch": 0.5881669741067158, "grad_norm": 4.237154006958008, "learning_rate": 2.6793832746055267e-06, "loss": 0.23, "step": 8609 }, { "epoch": 0.5882352941176471, "grad_norm": 3.6267783641815186, "learning_rate": 2.678630383806664e-06, "loss": 0.2444, "step": 8610 }, { "epoch": 0.5883036141285782, "grad_norm": 3.497101306915283, "learning_rate": 2.6778775332280907e-06, "loss": 0.2453, "step": 8611 }, { "epoch": 0.5883719341395095, "grad_norm": 3.9007644653320312, "learning_rate": 2.677124722906675e-06, "loss": 0.3505, "step": 8612 }, { "epoch": 0.5884402541504407, "grad_norm": 4.633070945739746, "learning_rate": 2.6763719528792784e-06, "loss": 0.3292, "step": 8613 }, { "epoch": 0.5885085741613718, "grad_norm": 2.9389090538024902, "learning_rate": 2.6756192231827633e-06, "loss": 0.2653, "step": 8614 }, { "epoch": 0.5885768941723031, "grad_norm": 6.788023471832275, "learning_rate": 2.6748665338539876e-06, "loss": 0.2738, "step": 8615 }, { "epoch": 0.5886452141832342, "grad_norm": 3.9401674270629883, "learning_rate": 2.6741138849298087e-06, "loss": 0.3106, "step": 8616 }, { "epoch": 0.5887135341941655, "grad_norm": 5.323347091674805, "learning_rate": 2.6733612764470824e-06, "loss": 0.3251, "step": 8617 }, { "epoch": 0.5887818542050967, "grad_norm": 4.221315860748291, "learning_rate": 2.6726087084426627e-06, "loss": 0.2989, "step": 8618 }, { "epoch": 0.5888501742160279, "grad_norm": 2.7993781566619873, "learning_rate": 2.6718561809533983e-06, "loss": 0.2083, "step": 8619 }, { "epoch": 0.5889184942269591, "grad_norm": 5.273470401763916, "learning_rate": 2.671103694016141e-06, "loss": 0.2262, "step": 8620 }, { "epoch": 0.5889868142378902, "grad_norm": 3.7235193252563477, "learning_rate": 2.6703512476677366e-06, "loss": 0.3514, "step": 8621 }, { "epoch": 0.5890551342488215, "grad_norm": 3.63895583152771, "learning_rate": 2.6695988419450314e-06, "loss": 0.2568, "step": 8622 }, { "epoch": 0.5891234542597527, "grad_norm": 4.353485107421875, "learning_rate": 2.6688464768848676e-06, "loss": 0.2868, "step": 8623 }, { "epoch": 0.5891917742706839, "grad_norm": 3.670341968536377, "learning_rate": 2.668094152524087e-06, "loss": 0.2551, "step": 8624 }, { "epoch": 0.5892600942816151, "grad_norm": 4.648011684417725, "learning_rate": 2.667341868899529e-06, "loss": 0.3599, "step": 8625 }, { "epoch": 0.5893284142925462, "grad_norm": 4.843110084533691, "learning_rate": 2.666589626048033e-06, "loss": 0.3841, "step": 8626 }, { "epoch": 0.5893967343034775, "grad_norm": 4.797299861907959, "learning_rate": 2.6658374240064304e-06, "loss": 0.2803, "step": 8627 }, { "epoch": 0.5894650543144087, "grad_norm": 3.8330154418945312, "learning_rate": 2.665085262811556e-06, "loss": 0.3449, "step": 8628 }, { "epoch": 0.5895333743253399, "grad_norm": 4.651729106903076, "learning_rate": 2.664333142500242e-06, "loss": 0.2676, "step": 8629 }, { "epoch": 0.5896016943362711, "grad_norm": 6.420537948608398, "learning_rate": 2.6635810631093196e-06, "loss": 0.3426, "step": 8630 }, { "epoch": 0.5896700143472023, "grad_norm": 4.226216793060303, "learning_rate": 2.6628290246756112e-06, "loss": 0.274, "step": 8631 }, { "epoch": 0.5897383343581335, "grad_norm": 4.247854709625244, "learning_rate": 2.6620770272359458e-06, "loss": 0.3703, "step": 8632 }, { "epoch": 0.5898066543690647, "grad_norm": 3.383530378341675, "learning_rate": 2.661325070827146e-06, "loss": 0.2196, "step": 8633 }, { "epoch": 0.5898749743799959, "grad_norm": 5.773009300231934, "learning_rate": 2.660573155486033e-06, "loss": 0.3199, "step": 8634 }, { "epoch": 0.5899432943909271, "grad_norm": 3.928454875946045, "learning_rate": 2.6598212812494253e-06, "loss": 0.2951, "step": 8635 }, { "epoch": 0.5900116144018583, "grad_norm": 3.802767515182495, "learning_rate": 2.6590694481541407e-06, "loss": 0.2245, "step": 8636 }, { "epoch": 0.5900799344127895, "grad_norm": 4.030580997467041, "learning_rate": 2.658317656236995e-06, "loss": 0.281, "step": 8637 }, { "epoch": 0.5901482544237207, "grad_norm": 3.4851551055908203, "learning_rate": 2.657565905534802e-06, "loss": 0.326, "step": 8638 }, { "epoch": 0.5902165744346519, "grad_norm": 4.323462963104248, "learning_rate": 2.6568141960843704e-06, "loss": 0.3486, "step": 8639 }, { "epoch": 0.5902848944455831, "grad_norm": 4.521346092224121, "learning_rate": 2.656062527922512e-06, "loss": 0.2075, "step": 8640 }, { "epoch": 0.5903532144565143, "grad_norm": 3.5280935764312744, "learning_rate": 2.6553109010860313e-06, "loss": 0.1696, "step": 8641 }, { "epoch": 0.5904215344674455, "grad_norm": 4.89918851852417, "learning_rate": 2.654559315611737e-06, "loss": 0.361, "step": 8642 }, { "epoch": 0.5904898544783768, "grad_norm": 3.235379457473755, "learning_rate": 2.653807771536431e-06, "loss": 0.2505, "step": 8643 }, { "epoch": 0.5905581744893079, "grad_norm": 4.098290920257568, "learning_rate": 2.653056268896912e-06, "loss": 0.3256, "step": 8644 }, { "epoch": 0.5906264945002391, "grad_norm": 3.170088529586792, "learning_rate": 2.6523048077299816e-06, "loss": 0.3023, "step": 8645 }, { "epoch": 0.5906948145111703, "grad_norm": 3.8869240283966064, "learning_rate": 2.651553388072436e-06, "loss": 0.2606, "step": 8646 }, { "epoch": 0.5907631345221015, "grad_norm": 5.242075443267822, "learning_rate": 2.6508020099610716e-06, "loss": 0.3852, "step": 8647 }, { "epoch": 0.5908314545330328, "grad_norm": 4.4008307456970215, "learning_rate": 2.650050673432679e-06, "loss": 0.3637, "step": 8648 }, { "epoch": 0.5908997745439639, "grad_norm": 2.9714038372039795, "learning_rate": 2.649299378524051e-06, "loss": 0.2656, "step": 8649 }, { "epoch": 0.5909680945548951, "grad_norm": 3.7813549041748047, "learning_rate": 2.6485481252719756e-06, "loss": 0.3474, "step": 8650 }, { "epoch": 0.5910364145658263, "grad_norm": 4.425690650939941, "learning_rate": 2.6477969137132407e-06, "loss": 0.3387, "step": 8651 }, { "epoch": 0.5911047345767575, "grad_norm": 3.8797013759613037, "learning_rate": 2.6470457438846296e-06, "loss": 0.3119, "step": 8652 }, { "epoch": 0.5911730545876888, "grad_norm": 4.1339192390441895, "learning_rate": 2.646294615822926e-06, "loss": 0.3742, "step": 8653 }, { "epoch": 0.5912413745986199, "grad_norm": 3.88273549079895, "learning_rate": 2.6455435295649106e-06, "loss": 0.3588, "step": 8654 }, { "epoch": 0.5913096946095512, "grad_norm": 5.0500383377075195, "learning_rate": 2.644792485147363e-06, "loss": 0.2521, "step": 8655 }, { "epoch": 0.5913780146204823, "grad_norm": 4.092228889465332, "learning_rate": 2.6440414826070574e-06, "loss": 0.2818, "step": 8656 }, { "epoch": 0.5914463346314135, "grad_norm": 3.888833999633789, "learning_rate": 2.643290521980771e-06, "loss": 0.4239, "step": 8657 }, { "epoch": 0.5915146546423448, "grad_norm": 4.421661376953125, "learning_rate": 2.642539603305274e-06, "loss": 0.3405, "step": 8658 }, { "epoch": 0.5915829746532759, "grad_norm": 2.9524736404418945, "learning_rate": 2.64178872661734e-06, "loss": 0.257, "step": 8659 }, { "epoch": 0.5916512946642072, "grad_norm": 2.7330737113952637, "learning_rate": 2.6410378919537347e-06, "loss": 0.1704, "step": 8660 }, { "epoch": 0.5917196146751383, "grad_norm": 3.8365328311920166, "learning_rate": 2.6402870993512246e-06, "loss": 0.2633, "step": 8661 }, { "epoch": 0.5917879346860695, "grad_norm": 2.729024648666382, "learning_rate": 2.639536348846575e-06, "loss": 0.2195, "step": 8662 }, { "epoch": 0.5918562546970008, "grad_norm": 3.762709617614746, "learning_rate": 2.63878564047655e-06, "loss": 0.405, "step": 8663 }, { "epoch": 0.5919245747079319, "grad_norm": 4.540624618530273, "learning_rate": 2.638034974277905e-06, "loss": 0.3811, "step": 8664 }, { "epoch": 0.5919928947188632, "grad_norm": 4.216101169586182, "learning_rate": 2.637284350287402e-06, "loss": 0.3204, "step": 8665 }, { "epoch": 0.5920612147297943, "grad_norm": 3.1903977394104004, "learning_rate": 2.6365337685417963e-06, "loss": 0.3308, "step": 8666 }, { "epoch": 0.5921295347407256, "grad_norm": 3.875652551651001, "learning_rate": 2.6357832290778418e-06, "loss": 0.2511, "step": 8667 }, { "epoch": 0.5921978547516568, "grad_norm": 4.0713419914245605, "learning_rate": 2.6350327319322894e-06, "loss": 0.3841, "step": 8668 }, { "epoch": 0.5922661747625879, "grad_norm": 3.396315813064575, "learning_rate": 2.6342822771418906e-06, "loss": 0.2542, "step": 8669 }, { "epoch": 0.5923344947735192, "grad_norm": 4.368788719177246, "learning_rate": 2.6335318647433915e-06, "loss": 0.2847, "step": 8670 }, { "epoch": 0.5924028147844503, "grad_norm": 3.195988178253174, "learning_rate": 2.6327814947735393e-06, "loss": 0.2595, "step": 8671 }, { "epoch": 0.5924711347953816, "grad_norm": 5.004416465759277, "learning_rate": 2.6320311672690766e-06, "loss": 0.3796, "step": 8672 }, { "epoch": 0.5925394548063128, "grad_norm": 3.2587709426879883, "learning_rate": 2.631280882266745e-06, "loss": 0.3283, "step": 8673 }, { "epoch": 0.592607774817244, "grad_norm": 3.799114942550659, "learning_rate": 2.6305306398032845e-06, "loss": 0.2296, "step": 8674 }, { "epoch": 0.5926760948281752, "grad_norm": 2.792656898498535, "learning_rate": 2.6297804399154326e-06, "loss": 0.1786, "step": 8675 }, { "epoch": 0.5927444148391063, "grad_norm": 3.9236109256744385, "learning_rate": 2.629030282639924e-06, "loss": 0.3725, "step": 8676 }, { "epoch": 0.5928127348500376, "grad_norm": 3.690046548843384, "learning_rate": 2.628280168013492e-06, "loss": 0.2537, "step": 8677 }, { "epoch": 0.5928810548609688, "grad_norm": 3.9215264320373535, "learning_rate": 2.627530096072867e-06, "loss": 0.307, "step": 8678 }, { "epoch": 0.5929493748719, "grad_norm": 3.964946985244751, "learning_rate": 2.6267800668547815e-06, "loss": 0.2646, "step": 8679 }, { "epoch": 0.5930176948828312, "grad_norm": 3.8404219150543213, "learning_rate": 2.626030080395957e-06, "loss": 0.2653, "step": 8680 }, { "epoch": 0.5930860148937623, "grad_norm": 4.4994587898254395, "learning_rate": 2.6252801367331227e-06, "loss": 0.2758, "step": 8681 }, { "epoch": 0.5931543349046936, "grad_norm": 3.513906717300415, "learning_rate": 2.6245302359029995e-06, "loss": 0.3107, "step": 8682 }, { "epoch": 0.5932226549156248, "grad_norm": 4.677910804748535, "learning_rate": 2.623780377942309e-06, "loss": 0.2628, "step": 8683 }, { "epoch": 0.593290974926556, "grad_norm": 4.172338962554932, "learning_rate": 2.623030562887769e-06, "loss": 0.2982, "step": 8684 }, { "epoch": 0.5933592949374872, "grad_norm": 4.097659587860107, "learning_rate": 2.6222807907760955e-06, "loss": 0.2712, "step": 8685 }, { "epoch": 0.5934276149484184, "grad_norm": 4.404809474945068, "learning_rate": 2.6215310616440035e-06, "loss": 0.401, "step": 8686 }, { "epoch": 0.5934959349593496, "grad_norm": 4.1760358810424805, "learning_rate": 2.6207813755282066e-06, "loss": 0.3266, "step": 8687 }, { "epoch": 0.5935642549702808, "grad_norm": 6.122134208679199, "learning_rate": 2.6200317324654122e-06, "loss": 0.2775, "step": 8688 }, { "epoch": 0.593632574981212, "grad_norm": 2.686647653579712, "learning_rate": 2.6192821324923295e-06, "loss": 0.248, "step": 8689 }, { "epoch": 0.5937008949921432, "grad_norm": 3.682655096054077, "learning_rate": 2.6185325756456654e-06, "loss": 0.2245, "step": 8690 }, { "epoch": 0.5937692150030744, "grad_norm": 3.643479824066162, "learning_rate": 2.617783061962123e-06, "loss": 0.2238, "step": 8691 }, { "epoch": 0.5938375350140056, "grad_norm": 4.489070892333984, "learning_rate": 2.6170335914784034e-06, "loss": 0.2726, "step": 8692 }, { "epoch": 0.5939058550249368, "grad_norm": 3.6416141986846924, "learning_rate": 2.6162841642312065e-06, "loss": 0.2417, "step": 8693 }, { "epoch": 0.593974175035868, "grad_norm": 4.1010422706604, "learning_rate": 2.6155347802572297e-06, "loss": 0.3163, "step": 8694 }, { "epoch": 0.5940424950467992, "grad_norm": 4.858270168304443, "learning_rate": 2.6147854395931706e-06, "loss": 0.2739, "step": 8695 }, { "epoch": 0.5941108150577304, "grad_norm": 4.759588718414307, "learning_rate": 2.614036142275719e-06, "loss": 0.3136, "step": 8696 }, { "epoch": 0.5941791350686616, "grad_norm": 3.6525633335113525, "learning_rate": 2.6132868883415664e-06, "loss": 0.2472, "step": 8697 }, { "epoch": 0.5942474550795929, "grad_norm": 4.373905181884766, "learning_rate": 2.612537677827404e-06, "loss": 0.3432, "step": 8698 }, { "epoch": 0.594315775090524, "grad_norm": 3.7294585704803467, "learning_rate": 2.611788510769919e-06, "loss": 0.299, "step": 8699 }, { "epoch": 0.5943840951014552, "grad_norm": 4.141368389129639, "learning_rate": 2.6110393872057916e-06, "loss": 0.3371, "step": 8700 }, { "epoch": 0.5944524151123864, "grad_norm": 3.480238199234009, "learning_rate": 2.610290307171709e-06, "loss": 0.3361, "step": 8701 }, { "epoch": 0.5945207351233176, "grad_norm": 3.9675095081329346, "learning_rate": 2.6095412707043496e-06, "loss": 0.2782, "step": 8702 }, { "epoch": 0.5945890551342489, "grad_norm": 4.488548755645752, "learning_rate": 2.6087922778403932e-06, "loss": 0.2814, "step": 8703 }, { "epoch": 0.59465737514518, "grad_norm": 4.863644123077393, "learning_rate": 2.608043328616514e-06, "loss": 0.3017, "step": 8704 }, { "epoch": 0.5947256951561112, "grad_norm": 4.212576866149902, "learning_rate": 2.607294423069387e-06, "loss": 0.3418, "step": 8705 }, { "epoch": 0.5947940151670424, "grad_norm": 4.098137855529785, "learning_rate": 2.6065455612356843e-06, "loss": 0.2913, "step": 8706 }, { "epoch": 0.5948623351779736, "grad_norm": 3.3041598796844482, "learning_rate": 2.6057967431520757e-06, "loss": 0.2808, "step": 8707 }, { "epoch": 0.5949306551889049, "grad_norm": 2.729779005050659, "learning_rate": 2.6050479688552292e-06, "loss": 0.2517, "step": 8708 }, { "epoch": 0.594998975199836, "grad_norm": 4.442595958709717, "learning_rate": 2.6042992383818088e-06, "loss": 0.2982, "step": 8709 }, { "epoch": 0.5950672952107673, "grad_norm": 4.2476887702941895, "learning_rate": 2.6035505517684794e-06, "loss": 0.2497, "step": 8710 }, { "epoch": 0.5951356152216984, "grad_norm": 4.165502548217773, "learning_rate": 2.6028019090519e-06, "loss": 0.289, "step": 8711 }, { "epoch": 0.5952039352326296, "grad_norm": 4.084407806396484, "learning_rate": 2.602053310268734e-06, "loss": 0.2859, "step": 8712 }, { "epoch": 0.5952722552435609, "grad_norm": 4.460696220397949, "learning_rate": 2.6013047554556342e-06, "loss": 0.1751, "step": 8713 }, { "epoch": 0.595340575254492, "grad_norm": 4.460668087005615, "learning_rate": 2.6005562446492556e-06, "loss": 0.3311, "step": 8714 }, { "epoch": 0.5954088952654233, "grad_norm": 3.176792621612549, "learning_rate": 2.599807777886253e-06, "loss": 0.1359, "step": 8715 }, { "epoch": 0.5954772152763544, "grad_norm": 4.230162143707275, "learning_rate": 2.5990593552032768e-06, "loss": 0.2213, "step": 8716 }, { "epoch": 0.5955455352872856, "grad_norm": 3.9779064655303955, "learning_rate": 2.598310976636972e-06, "loss": 0.2978, "step": 8717 }, { "epoch": 0.5956138552982169, "grad_norm": 3.8552825450897217, "learning_rate": 2.5975626422239874e-06, "loss": 0.2705, "step": 8718 }, { "epoch": 0.595682175309148, "grad_norm": 4.1790289878845215, "learning_rate": 2.5968143520009667e-06, "loss": 0.3158, "step": 8719 }, { "epoch": 0.5957504953200793, "grad_norm": 4.115687847137451, "learning_rate": 2.5960661060045525e-06, "loss": 0.2561, "step": 8720 }, { "epoch": 0.5958188153310104, "grad_norm": 4.251437664031982, "learning_rate": 2.5953179042713823e-06, "loss": 0.309, "step": 8721 }, { "epoch": 0.5958871353419417, "grad_norm": 3.6287362575531006, "learning_rate": 2.594569746838095e-06, "loss": 0.3465, "step": 8722 }, { "epoch": 0.5959554553528729, "grad_norm": 4.226949691772461, "learning_rate": 2.5938216337413246e-06, "loss": 0.256, "step": 8723 }, { "epoch": 0.596023775363804, "grad_norm": 3.849189281463623, "learning_rate": 2.5930735650177063e-06, "loss": 0.3811, "step": 8724 }, { "epoch": 0.5960920953747353, "grad_norm": 4.559587478637695, "learning_rate": 2.5923255407038696e-06, "loss": 0.2952, "step": 8725 }, { "epoch": 0.5961604153856664, "grad_norm": 4.140587329864502, "learning_rate": 2.5915775608364427e-06, "loss": 0.3791, "step": 8726 }, { "epoch": 0.5962287353965977, "grad_norm": 5.276636123657227, "learning_rate": 2.5908296254520535e-06, "loss": 0.3093, "step": 8727 }, { "epoch": 0.5962970554075289, "grad_norm": 4.150853633880615, "learning_rate": 2.5900817345873263e-06, "loss": 0.291, "step": 8728 }, { "epoch": 0.59636537541846, "grad_norm": 3.979721784591675, "learning_rate": 2.5893338882788827e-06, "loss": 0.2603, "step": 8729 }, { "epoch": 0.5964336954293913, "grad_norm": 4.748447895050049, "learning_rate": 2.588586086563342e-06, "loss": 0.2244, "step": 8730 }, { "epoch": 0.5965020154403224, "grad_norm": 3.8897109031677246, "learning_rate": 2.587838329477324e-06, "loss": 0.3312, "step": 8731 }, { "epoch": 0.5965703354512537, "grad_norm": 3.4661734104156494, "learning_rate": 2.587090617057445e-06, "loss": 0.2756, "step": 8732 }, { "epoch": 0.5966386554621849, "grad_norm": 2.952773094177246, "learning_rate": 2.586342949340314e-06, "loss": 0.2028, "step": 8733 }, { "epoch": 0.5967069754731161, "grad_norm": 4.0924577713012695, "learning_rate": 2.5855953263625463e-06, "loss": 0.3543, "step": 8734 }, { "epoch": 0.5967752954840473, "grad_norm": 2.423158645629883, "learning_rate": 2.5848477481607505e-06, "loss": 0.201, "step": 8735 }, { "epoch": 0.5968436154949784, "grad_norm": 6.358595848083496, "learning_rate": 2.5841002147715328e-06, "loss": 0.2987, "step": 8736 }, { "epoch": 0.5969119355059097, "grad_norm": 4.992985725402832, "learning_rate": 2.5833527262314983e-06, "loss": 0.3491, "step": 8737 }, { "epoch": 0.5969802555168409, "grad_norm": 5.759675025939941, "learning_rate": 2.5826052825772487e-06, "loss": 0.3059, "step": 8738 }, { "epoch": 0.5970485755277721, "grad_norm": 3.6331255435943604, "learning_rate": 2.581857883845385e-06, "loss": 0.2697, "step": 8739 }, { "epoch": 0.5971168955387033, "grad_norm": 3.379793405532837, "learning_rate": 2.5811105300725065e-06, "loss": 0.2302, "step": 8740 }, { "epoch": 0.5971852155496344, "grad_norm": 4.614860534667969, "learning_rate": 2.5803632212952064e-06, "loss": 0.4365, "step": 8741 }, { "epoch": 0.5972535355605657, "grad_norm": 4.755061626434326, "learning_rate": 2.5796159575500806e-06, "loss": 0.2686, "step": 8742 }, { "epoch": 0.5973218555714969, "grad_norm": 4.8434906005859375, "learning_rate": 2.57886873887372e-06, "loss": 0.2401, "step": 8743 }, { "epoch": 0.5973901755824281, "grad_norm": 4.545299053192139, "learning_rate": 2.5781215653027145e-06, "loss": 0.2426, "step": 8744 }, { "epoch": 0.5974584955933593, "grad_norm": 4.9562530517578125, "learning_rate": 2.5773744368736502e-06, "loss": 0.2353, "step": 8745 }, { "epoch": 0.5975268156042906, "grad_norm": 4.11366605758667, "learning_rate": 2.5766273536231114e-06, "loss": 0.2624, "step": 8746 }, { "epoch": 0.5975951356152217, "grad_norm": 2.711003303527832, "learning_rate": 2.575880315587682e-06, "loss": 0.2145, "step": 8747 }, { "epoch": 0.5976634556261529, "grad_norm": 3.757497549057007, "learning_rate": 2.575133322803944e-06, "loss": 0.2289, "step": 8748 }, { "epoch": 0.5977317756370841, "grad_norm": 4.40900182723999, "learning_rate": 2.574386375308473e-06, "loss": 0.309, "step": 8749 }, { "epoch": 0.5978000956480153, "grad_norm": 4.806961536407471, "learning_rate": 2.573639473137845e-06, "loss": 0.4045, "step": 8750 }, { "epoch": 0.5978684156589466, "grad_norm": 3.35949444770813, "learning_rate": 2.572892616328635e-06, "loss": 0.3086, "step": 8751 }, { "epoch": 0.5979367356698777, "grad_norm": 4.699203968048096, "learning_rate": 2.5721458049174168e-06, "loss": 0.3093, "step": 8752 }, { "epoch": 0.5980050556808089, "grad_norm": 3.733285903930664, "learning_rate": 2.571399038940754e-06, "loss": 0.3146, "step": 8753 }, { "epoch": 0.5980733756917401, "grad_norm": 4.609850883483887, "learning_rate": 2.5706523184352185e-06, "loss": 0.2472, "step": 8754 }, { "epoch": 0.5981416957026713, "grad_norm": 3.8469018936157227, "learning_rate": 2.5699056434373735e-06, "loss": 0.1781, "step": 8755 }, { "epoch": 0.5982100157136025, "grad_norm": 4.807186603546143, "learning_rate": 2.5691590139837832e-06, "loss": 0.3289, "step": 8756 }, { "epoch": 0.5982783357245337, "grad_norm": 7.131807327270508, "learning_rate": 2.568412430111006e-06, "loss": 0.4057, "step": 8757 }, { "epoch": 0.598346655735465, "grad_norm": 3.382657051086426, "learning_rate": 2.5676658918556007e-06, "loss": 0.2891, "step": 8758 }, { "epoch": 0.5984149757463961, "grad_norm": 3.119201421737671, "learning_rate": 2.566919399254124e-06, "loss": 0.2325, "step": 8759 }, { "epoch": 0.5984832957573273, "grad_norm": 3.108130931854248, "learning_rate": 2.5661729523431295e-06, "loss": 0.2268, "step": 8760 }, { "epoch": 0.5985516157682585, "grad_norm": 3.2871551513671875, "learning_rate": 2.5654265511591684e-06, "loss": 0.2503, "step": 8761 }, { "epoch": 0.5986199357791897, "grad_norm": 4.254929065704346, "learning_rate": 2.56468019573879e-06, "loss": 0.3351, "step": 8762 }, { "epoch": 0.598688255790121, "grad_norm": 4.645315170288086, "learning_rate": 2.563933886118541e-06, "loss": 0.3843, "step": 8763 }, { "epoch": 0.5987565758010521, "grad_norm": 3.3640592098236084, "learning_rate": 2.5631876223349677e-06, "loss": 0.2591, "step": 8764 }, { "epoch": 0.5988248958119833, "grad_norm": 4.985630035400391, "learning_rate": 2.5624414044246105e-06, "loss": 0.3753, "step": 8765 }, { "epoch": 0.5988932158229145, "grad_norm": 3.8247649669647217, "learning_rate": 2.561695232424011e-06, "loss": 0.2923, "step": 8766 }, { "epoch": 0.5989615358338457, "grad_norm": 4.330562591552734, "learning_rate": 2.5609491063697065e-06, "loss": 0.4091, "step": 8767 }, { "epoch": 0.599029855844777, "grad_norm": 3.812044143676758, "learning_rate": 2.560203026298235e-06, "loss": 0.3412, "step": 8768 }, { "epoch": 0.5990981758557081, "grad_norm": 5.041745185852051, "learning_rate": 2.559456992246128e-06, "loss": 0.3131, "step": 8769 }, { "epoch": 0.5991664958666394, "grad_norm": 3.1292104721069336, "learning_rate": 2.5587110042499152e-06, "loss": 0.2835, "step": 8770 }, { "epoch": 0.5992348158775705, "grad_norm": 4.067700386047363, "learning_rate": 2.557965062346129e-06, "loss": 0.3321, "step": 8771 }, { "epoch": 0.5993031358885017, "grad_norm": 3.040830373764038, "learning_rate": 2.5572191665712963e-06, "loss": 0.1515, "step": 8772 }, { "epoch": 0.599371455899433, "grad_norm": 2.997832775115967, "learning_rate": 2.5564733169619387e-06, "loss": 0.2147, "step": 8773 }, { "epoch": 0.5994397759103641, "grad_norm": 4.591594219207764, "learning_rate": 2.5557275135545805e-06, "loss": 0.3878, "step": 8774 }, { "epoch": 0.5995080959212954, "grad_norm": 3.928105354309082, "learning_rate": 2.554981756385741e-06, "loss": 0.2783, "step": 8775 }, { "epoch": 0.5995764159322265, "grad_norm": 3.9451687335968018, "learning_rate": 2.554236045491938e-06, "loss": 0.3092, "step": 8776 }, { "epoch": 0.5996447359431577, "grad_norm": 4.224898338317871, "learning_rate": 2.553490380909688e-06, "loss": 0.2958, "step": 8777 }, { "epoch": 0.599713055954089, "grad_norm": 4.604305744171143, "learning_rate": 2.5527447626755025e-06, "loss": 0.3327, "step": 8778 }, { "epoch": 0.5997813759650201, "grad_norm": 3.356637716293335, "learning_rate": 2.5519991908258937e-06, "loss": 0.3018, "step": 8779 }, { "epoch": 0.5998496959759514, "grad_norm": 4.304012775421143, "learning_rate": 2.551253665397369e-06, "loss": 0.3343, "step": 8780 }, { "epoch": 0.5999180159868825, "grad_norm": 4.660634994506836, "learning_rate": 2.5505081864264377e-06, "loss": 0.271, "step": 8781 }, { "epoch": 0.5999863359978138, "grad_norm": 4.947070598602295, "learning_rate": 2.5497627539496e-06, "loss": 0.3626, "step": 8782 }, { "epoch": 0.600054656008745, "grad_norm": 4.4863104820251465, "learning_rate": 2.54901736800336e-06, "loss": 0.3889, "step": 8783 }, { "epoch": 0.6001229760196761, "grad_norm": 7.096210479736328, "learning_rate": 2.5482720286242175e-06, "loss": 0.5227, "step": 8784 }, { "epoch": 0.6001912960306074, "grad_norm": 3.9953136444091797, "learning_rate": 2.54752673584867e-06, "loss": 0.2444, "step": 8785 }, { "epoch": 0.6002596160415385, "grad_norm": 4.662413120269775, "learning_rate": 2.5467814897132106e-06, "loss": 0.3658, "step": 8786 }, { "epoch": 0.6003279360524698, "grad_norm": 3.2346956729888916, "learning_rate": 2.546036290254333e-06, "loss": 0.2473, "step": 8787 }, { "epoch": 0.600396256063401, "grad_norm": 4.234926223754883, "learning_rate": 2.545291137508528e-06, "loss": 0.3465, "step": 8788 }, { "epoch": 0.6004645760743321, "grad_norm": 2.2632315158843994, "learning_rate": 2.5445460315122844e-06, "loss": 0.2176, "step": 8789 }, { "epoch": 0.6005328960852634, "grad_norm": 3.47351336479187, "learning_rate": 2.5438009723020865e-06, "loss": 0.2398, "step": 8790 }, { "epoch": 0.6006012160961945, "grad_norm": 3.810917377471924, "learning_rate": 2.543055959914419e-06, "loss": 0.2527, "step": 8791 }, { "epoch": 0.6006695361071258, "grad_norm": 4.738777160644531, "learning_rate": 2.5423109943857626e-06, "loss": 0.3099, "step": 8792 }, { "epoch": 0.600737856118057, "grad_norm": 3.775904893875122, "learning_rate": 2.5415660757525966e-06, "loss": 0.278, "step": 8793 }, { "epoch": 0.6008061761289882, "grad_norm": 4.018228054046631, "learning_rate": 2.5408212040513977e-06, "loss": 0.3339, "step": 8794 }, { "epoch": 0.6008744961399194, "grad_norm": 3.5554893016815186, "learning_rate": 2.5400763793186394e-06, "loss": 0.3313, "step": 8795 }, { "epoch": 0.6009428161508505, "grad_norm": 5.015514850616455, "learning_rate": 2.539331601590795e-06, "loss": 0.2248, "step": 8796 }, { "epoch": 0.6010111361617818, "grad_norm": 4.15749979019165, "learning_rate": 2.5385868709043345e-06, "loss": 0.3164, "step": 8797 }, { "epoch": 0.601079456172713, "grad_norm": 3.4006361961364746, "learning_rate": 2.537842187295724e-06, "loss": 0.2998, "step": 8798 }, { "epoch": 0.6011477761836442, "grad_norm": 3.708540916442871, "learning_rate": 2.5370975508014296e-06, "loss": 0.3854, "step": 8799 }, { "epoch": 0.6012160961945754, "grad_norm": 5.262868404388428, "learning_rate": 2.536352961457913e-06, "loss": 0.3263, "step": 8800 }, { "epoch": 0.6012844162055065, "grad_norm": 4.146543979644775, "learning_rate": 2.535608419301638e-06, "loss": 0.3686, "step": 8801 }, { "epoch": 0.6013527362164378, "grad_norm": 4.102266311645508, "learning_rate": 2.5348639243690587e-06, "loss": 0.2682, "step": 8802 }, { "epoch": 0.601421056227369, "grad_norm": 3.4387757778167725, "learning_rate": 2.534119476696633e-06, "loss": 0.3882, "step": 8803 }, { "epoch": 0.6014893762383002, "grad_norm": 4.149074077606201, "learning_rate": 2.533375076320815e-06, "loss": 0.2677, "step": 8804 }, { "epoch": 0.6015576962492314, "grad_norm": 4.027284145355225, "learning_rate": 2.5326307232780566e-06, "loss": 0.2758, "step": 8805 }, { "epoch": 0.6016260162601627, "grad_norm": 4.086045742034912, "learning_rate": 2.5318864176048036e-06, "loss": 0.4134, "step": 8806 }, { "epoch": 0.6016943362710938, "grad_norm": 4.875331401824951, "learning_rate": 2.5311421593375057e-06, "loss": 0.2935, "step": 8807 }, { "epoch": 0.601762656282025, "grad_norm": 4.432849884033203, "learning_rate": 2.5303979485126063e-06, "loss": 0.3292, "step": 8808 }, { "epoch": 0.6018309762929562, "grad_norm": 4.1913909912109375, "learning_rate": 2.529653785166548e-06, "loss": 0.2776, "step": 8809 }, { "epoch": 0.6018992963038874, "grad_norm": 2.890340805053711, "learning_rate": 2.5289096693357692e-06, "loss": 0.2443, "step": 8810 }, { "epoch": 0.6019676163148187, "grad_norm": 3.3114540576934814, "learning_rate": 2.528165601056708e-06, "loss": 0.3288, "step": 8811 }, { "epoch": 0.6020359363257498, "grad_norm": 4.834712505340576, "learning_rate": 2.5274215803657993e-06, "loss": 0.3786, "step": 8812 }, { "epoch": 0.602104256336681, "grad_norm": 3.112445831298828, "learning_rate": 2.526677607299477e-06, "loss": 0.3136, "step": 8813 }, { "epoch": 0.6021725763476122, "grad_norm": 3.12882399559021, "learning_rate": 2.52593368189417e-06, "loss": 0.229, "step": 8814 }, { "epoch": 0.6022408963585434, "grad_norm": 3.5536394119262695, "learning_rate": 2.5251898041863064e-06, "loss": 0.2227, "step": 8815 }, { "epoch": 0.6023092163694747, "grad_norm": 3.2887606620788574, "learning_rate": 2.5244459742123122e-06, "loss": 0.2361, "step": 8816 }, { "epoch": 0.6023775363804058, "grad_norm": 4.053205490112305, "learning_rate": 2.5237021920086118e-06, "loss": 0.279, "step": 8817 }, { "epoch": 0.6024458563913371, "grad_norm": 5.074942111968994, "learning_rate": 2.5229584576116253e-06, "loss": 0.3065, "step": 8818 }, { "epoch": 0.6025141764022682, "grad_norm": 4.33262825012207, "learning_rate": 2.52221477105777e-06, "loss": 0.3574, "step": 8819 }, { "epoch": 0.6025824964131994, "grad_norm": 3.7024178504943848, "learning_rate": 2.5214711323834647e-06, "loss": 0.2407, "step": 8820 }, { "epoch": 0.6026508164241307, "grad_norm": 3.443136215209961, "learning_rate": 2.5207275416251236e-06, "loss": 0.2457, "step": 8821 }, { "epoch": 0.6027191364350618, "grad_norm": 4.8525614738464355, "learning_rate": 2.519983998819155e-06, "loss": 0.2498, "step": 8822 }, { "epoch": 0.6027874564459931, "grad_norm": 2.891692638397217, "learning_rate": 2.519240504001972e-06, "loss": 0.2277, "step": 8823 }, { "epoch": 0.6028557764569242, "grad_norm": 4.070615291595459, "learning_rate": 2.5184970572099794e-06, "loss": 0.2781, "step": 8824 }, { "epoch": 0.6029240964678554, "grad_norm": 3.902322292327881, "learning_rate": 2.5177536584795834e-06, "loss": 0.2981, "step": 8825 }, { "epoch": 0.6029924164787867, "grad_norm": 4.650713920593262, "learning_rate": 2.517010307847184e-06, "loss": 0.2901, "step": 8826 }, { "epoch": 0.6030607364897178, "grad_norm": 4.329326152801514, "learning_rate": 2.5162670053491826e-06, "loss": 0.2203, "step": 8827 }, { "epoch": 0.6031290565006491, "grad_norm": 4.832791328430176, "learning_rate": 2.5155237510219764e-06, "loss": 0.3942, "step": 8828 }, { "epoch": 0.6031973765115802, "grad_norm": 5.111297607421875, "learning_rate": 2.5147805449019624e-06, "loss": 0.3097, "step": 8829 }, { "epoch": 0.6032656965225115, "grad_norm": 3.883650779724121, "learning_rate": 2.51403738702553e-06, "loss": 0.2356, "step": 8830 }, { "epoch": 0.6033340165334427, "grad_norm": 5.875015735626221, "learning_rate": 2.513294277429072e-06, "loss": 0.3085, "step": 8831 }, { "epoch": 0.6034023365443738, "grad_norm": 3.4738690853118896, "learning_rate": 2.512551216148976e-06, "loss": 0.2165, "step": 8832 }, { "epoch": 0.6034706565553051, "grad_norm": 3.7926526069641113, "learning_rate": 2.5118082032216275e-06, "loss": 0.2885, "step": 8833 }, { "epoch": 0.6035389765662362, "grad_norm": 4.3058037757873535, "learning_rate": 2.51106523868341e-06, "loss": 0.3285, "step": 8834 }, { "epoch": 0.6036072965771675, "grad_norm": 3.7893786430358887, "learning_rate": 2.5103223225707048e-06, "loss": 0.2752, "step": 8835 }, { "epoch": 0.6036756165880987, "grad_norm": 4.514995574951172, "learning_rate": 2.5095794549198895e-06, "loss": 0.2433, "step": 8836 }, { "epoch": 0.6037439365990298, "grad_norm": 4.231389999389648, "learning_rate": 2.508836635767343e-06, "loss": 0.3305, "step": 8837 }, { "epoch": 0.6038122566099611, "grad_norm": 3.2856075763702393, "learning_rate": 2.5080938651494364e-06, "loss": 0.3137, "step": 8838 }, { "epoch": 0.6038805766208922, "grad_norm": 3.8341031074523926, "learning_rate": 2.5073511431025408e-06, "loss": 0.221, "step": 8839 }, { "epoch": 0.6039488966318235, "grad_norm": 4.297191143035889, "learning_rate": 2.5066084696630273e-06, "loss": 0.2508, "step": 8840 }, { "epoch": 0.6040172166427547, "grad_norm": 4.9275407791137695, "learning_rate": 2.5058658448672624e-06, "loss": 0.2846, "step": 8841 }, { "epoch": 0.6040855366536859, "grad_norm": 4.308567523956299, "learning_rate": 2.5051232687516105e-06, "loss": 0.375, "step": 8842 }, { "epoch": 0.6041538566646171, "grad_norm": 4.214626312255859, "learning_rate": 2.5043807413524326e-06, "loss": 0.273, "step": 8843 }, { "epoch": 0.6042221766755482, "grad_norm": 2.873331308364868, "learning_rate": 2.503638262706089e-06, "loss": 0.2497, "step": 8844 }, { "epoch": 0.6042904966864795, "grad_norm": 3.806692600250244, "learning_rate": 2.5028958328489366e-06, "loss": 0.2854, "step": 8845 }, { "epoch": 0.6043588166974107, "grad_norm": 4.404019832611084, "learning_rate": 2.502153451817331e-06, "loss": 0.2834, "step": 8846 }, { "epoch": 0.6044271367083419, "grad_norm": 3.7763006687164307, "learning_rate": 2.5014111196476237e-06, "loss": 0.4347, "step": 8847 }, { "epoch": 0.6044954567192731, "grad_norm": 3.2116451263427734, "learning_rate": 2.500668836376165e-06, "loss": 0.2985, "step": 8848 }, { "epoch": 0.6045637767302042, "grad_norm": 4.536550998687744, "learning_rate": 2.499926602039302e-06, "loss": 0.3481, "step": 8849 }, { "epoch": 0.6046320967411355, "grad_norm": 4.809161186218262, "learning_rate": 2.499184416673382e-06, "loss": 0.2947, "step": 8850 }, { "epoch": 0.6047004167520666, "grad_norm": 4.10333776473999, "learning_rate": 2.4984422803147453e-06, "loss": 0.321, "step": 8851 }, { "epoch": 0.6047687367629979, "grad_norm": 4.693068981170654, "learning_rate": 2.497700192999733e-06, "loss": 0.2946, "step": 8852 }, { "epoch": 0.6048370567739291, "grad_norm": 5.538238048553467, "learning_rate": 2.4969581547646836e-06, "loss": 0.3532, "step": 8853 }, { "epoch": 0.6049053767848603, "grad_norm": 3.151287078857422, "learning_rate": 2.496216165645935e-06, "loss": 0.2238, "step": 8854 }, { "epoch": 0.6049736967957915, "grad_norm": 4.1192402839660645, "learning_rate": 2.495474225679816e-06, "loss": 0.3134, "step": 8855 }, { "epoch": 0.6050420168067226, "grad_norm": 5.081987380981445, "learning_rate": 2.4947323349026595e-06, "loss": 0.3878, "step": 8856 }, { "epoch": 0.6051103368176539, "grad_norm": 3.470149517059326, "learning_rate": 2.4939904933507943e-06, "loss": 0.3139, "step": 8857 }, { "epoch": 0.6051786568285851, "grad_norm": 3.965786933898926, "learning_rate": 2.4932487010605477e-06, "loss": 0.297, "step": 8858 }, { "epoch": 0.6052469768395163, "grad_norm": 5.053110122680664, "learning_rate": 2.49250695806824e-06, "loss": 0.3296, "step": 8859 }, { "epoch": 0.6053152968504475, "grad_norm": 3.4803168773651123, "learning_rate": 2.4917652644101946e-06, "loss": 0.2219, "step": 8860 }, { "epoch": 0.6053836168613786, "grad_norm": 2.7240872383117676, "learning_rate": 2.4910236201227292e-06, "loss": 0.2257, "step": 8861 }, { "epoch": 0.6054519368723099, "grad_norm": 4.218183994293213, "learning_rate": 2.4902820252421625e-06, "loss": 0.3148, "step": 8862 }, { "epoch": 0.6055202568832411, "grad_norm": 3.6224026679992676, "learning_rate": 2.4895404798048054e-06, "loss": 0.1725, "step": 8863 }, { "epoch": 0.6055885768941723, "grad_norm": 3.5006110668182373, "learning_rate": 2.4887989838469707e-06, "loss": 0.3745, "step": 8864 }, { "epoch": 0.6056568969051035, "grad_norm": 3.8593225479125977, "learning_rate": 2.4880575374049677e-06, "loss": 0.322, "step": 8865 }, { "epoch": 0.6057252169160348, "grad_norm": 4.423726558685303, "learning_rate": 2.4873161405151036e-06, "loss": 0.311, "step": 8866 }, { "epoch": 0.6057935369269659, "grad_norm": 3.5668697357177734, "learning_rate": 2.486574793213681e-06, "loss": 0.2998, "step": 8867 }, { "epoch": 0.6058618569378971, "grad_norm": 4.459712028503418, "learning_rate": 2.4858334955370024e-06, "loss": 0.272, "step": 8868 }, { "epoch": 0.6059301769488283, "grad_norm": 3.742506742477417, "learning_rate": 2.485092247521367e-06, "loss": 0.3034, "step": 8869 }, { "epoch": 0.6059984969597595, "grad_norm": 3.779374599456787, "learning_rate": 2.484351049203074e-06, "loss": 0.2894, "step": 8870 }, { "epoch": 0.6060668169706908, "grad_norm": 4.822783470153809, "learning_rate": 2.483609900618415e-06, "loss": 0.3163, "step": 8871 }, { "epoch": 0.6061351369816219, "grad_norm": 3.818979024887085, "learning_rate": 2.4828688018036824e-06, "loss": 0.2249, "step": 8872 }, { "epoch": 0.6062034569925531, "grad_norm": 5.128011703491211, "learning_rate": 2.482127752795167e-06, "loss": 0.3144, "step": 8873 }, { "epoch": 0.6062717770034843, "grad_norm": 3.3615787029266357, "learning_rate": 2.4813867536291572e-06, "loss": 0.2771, "step": 8874 }, { "epoch": 0.6063400970144155, "grad_norm": 4.58066463470459, "learning_rate": 2.480645804341934e-06, "loss": 0.3261, "step": 8875 }, { "epoch": 0.6064084170253468, "grad_norm": 4.809382438659668, "learning_rate": 2.479904904969782e-06, "loss": 0.3585, "step": 8876 }, { "epoch": 0.6064767370362779, "grad_norm": 4.514069080352783, "learning_rate": 2.4791640555489816e-06, "loss": 0.3253, "step": 8877 }, { "epoch": 0.6065450570472092, "grad_norm": 4.23560905456543, "learning_rate": 2.4784232561158105e-06, "loss": 0.3044, "step": 8878 }, { "epoch": 0.6066133770581403, "grad_norm": 5.011561393737793, "learning_rate": 2.477682506706542e-06, "loss": 0.2299, "step": 8879 }, { "epoch": 0.6066816970690715, "grad_norm": 3.5578184127807617, "learning_rate": 2.4769418073574484e-06, "loss": 0.2746, "step": 8880 }, { "epoch": 0.6067500170800028, "grad_norm": 4.5249481201171875, "learning_rate": 2.4762011581048016e-06, "loss": 0.2962, "step": 8881 }, { "epoch": 0.6068183370909339, "grad_norm": 4.373018741607666, "learning_rate": 2.4754605589848686e-06, "loss": 0.2997, "step": 8882 }, { "epoch": 0.6068866571018652, "grad_norm": 4.391305446624756, "learning_rate": 2.474720010033914e-06, "loss": 0.2878, "step": 8883 }, { "epoch": 0.6069549771127963, "grad_norm": 4.452069282531738, "learning_rate": 2.473979511288201e-06, "loss": 0.3091, "step": 8884 }, { "epoch": 0.6070232971237275, "grad_norm": 3.3654441833496094, "learning_rate": 2.47323906278399e-06, "loss": 0.2585, "step": 8885 }, { "epoch": 0.6070916171346588, "grad_norm": 3.3494629859924316, "learning_rate": 2.4724986645575385e-06, "loss": 0.2956, "step": 8886 }, { "epoch": 0.6071599371455899, "grad_norm": 3.722480297088623, "learning_rate": 2.4717583166451018e-06, "loss": 0.2298, "step": 8887 }, { "epoch": 0.6072282571565212, "grad_norm": 3.720090389251709, "learning_rate": 2.471018019082933e-06, "loss": 0.2959, "step": 8888 }, { "epoch": 0.6072965771674523, "grad_norm": 4.485962867736816, "learning_rate": 2.470277771907281e-06, "loss": 0.3524, "step": 8889 }, { "epoch": 0.6073648971783836, "grad_norm": 3.7640156745910645, "learning_rate": 2.4695375751543974e-06, "loss": 0.291, "step": 8890 }, { "epoch": 0.6074332171893148, "grad_norm": 4.016875743865967, "learning_rate": 2.4687974288605245e-06, "loss": 0.3517, "step": 8891 }, { "epoch": 0.6075015372002459, "grad_norm": 5.242239475250244, "learning_rate": 2.468057333061905e-06, "loss": 0.2943, "step": 8892 }, { "epoch": 0.6075698572111772, "grad_norm": 3.9489150047302246, "learning_rate": 2.467317287794781e-06, "loss": 0.2646, "step": 8893 }, { "epoch": 0.6076381772221083, "grad_norm": 2.7799861431121826, "learning_rate": 2.4665772930953924e-06, "loss": 0.2229, "step": 8894 }, { "epoch": 0.6077064972330396, "grad_norm": 5.324644088745117, "learning_rate": 2.46583734899997e-06, "loss": 0.354, "step": 8895 }, { "epoch": 0.6077748172439708, "grad_norm": 4.317009925842285, "learning_rate": 2.4650974555447503e-06, "loss": 0.3931, "step": 8896 }, { "epoch": 0.6078431372549019, "grad_norm": 3.2879116535186768, "learning_rate": 2.4643576127659626e-06, "loss": 0.3039, "step": 8897 }, { "epoch": 0.6079114572658332, "grad_norm": 3.1616618633270264, "learning_rate": 2.4636178206998367e-06, "loss": 0.2653, "step": 8898 }, { "epoch": 0.6079797772767643, "grad_norm": 4.4788594245910645, "learning_rate": 2.4628780793825963e-06, "loss": 0.2878, "step": 8899 }, { "epoch": 0.6080480972876956, "grad_norm": 3.5472607612609863, "learning_rate": 2.462138388850465e-06, "loss": 0.2553, "step": 8900 }, { "epoch": 0.6081164172986268, "grad_norm": 3.8611366748809814, "learning_rate": 2.461398749139664e-06, "loss": 0.3032, "step": 8901 }, { "epoch": 0.608184737309558, "grad_norm": 4.606494426727295, "learning_rate": 2.4606591602864124e-06, "loss": 0.349, "step": 8902 }, { "epoch": 0.6082530573204892, "grad_norm": 3.4040327072143555, "learning_rate": 2.459919622326924e-06, "loss": 0.2985, "step": 8903 }, { "epoch": 0.6083213773314203, "grad_norm": 4.3301801681518555, "learning_rate": 2.459180135297412e-06, "loss": 0.3948, "step": 8904 }, { "epoch": 0.6083896973423516, "grad_norm": 3.240323781967163, "learning_rate": 2.4584406992340885e-06, "loss": 0.2864, "step": 8905 }, { "epoch": 0.6084580173532828, "grad_norm": 4.794697284698486, "learning_rate": 2.4577013141731607e-06, "loss": 0.3135, "step": 8906 }, { "epoch": 0.608526337364214, "grad_norm": 3.497485637664795, "learning_rate": 2.4569619801508363e-06, "loss": 0.295, "step": 8907 }, { "epoch": 0.6085946573751452, "grad_norm": 3.7695298194885254, "learning_rate": 2.4562226972033162e-06, "loss": 0.3506, "step": 8908 }, { "epoch": 0.6086629773860763, "grad_norm": 5.224239349365234, "learning_rate": 2.4554834653668005e-06, "loss": 0.3344, "step": 8909 }, { "epoch": 0.6087312973970076, "grad_norm": 3.575350284576416, "learning_rate": 2.4547442846774905e-06, "loss": 0.2631, "step": 8910 }, { "epoch": 0.6087996174079388, "grad_norm": 3.8582353591918945, "learning_rate": 2.4540051551715804e-06, "loss": 0.3244, "step": 8911 }, { "epoch": 0.60886793741887, "grad_norm": 4.445860385894775, "learning_rate": 2.453266076885263e-06, "loss": 0.3476, "step": 8912 }, { "epoch": 0.6089362574298012, "grad_norm": 5.1329545974731445, "learning_rate": 2.452527049854729e-06, "loss": 0.3199, "step": 8913 }, { "epoch": 0.6090045774407324, "grad_norm": 2.8176701068878174, "learning_rate": 2.451788074116167e-06, "loss": 0.2503, "step": 8914 }, { "epoch": 0.6090728974516636, "grad_norm": 4.200029373168945, "learning_rate": 2.451049149705763e-06, "loss": 0.3518, "step": 8915 }, { "epoch": 0.6091412174625948, "grad_norm": 4.876670837402344, "learning_rate": 2.450310276659699e-06, "loss": 0.3334, "step": 8916 }, { "epoch": 0.609209537473526, "grad_norm": 5.383134365081787, "learning_rate": 2.449571455014157e-06, "loss": 0.3081, "step": 8917 }, { "epoch": 0.6092778574844572, "grad_norm": 3.6667003631591797, "learning_rate": 2.4488326848053138e-06, "loss": 0.2888, "step": 8918 }, { "epoch": 0.6093461774953884, "grad_norm": 3.548255443572998, "learning_rate": 2.448093966069347e-06, "loss": 0.2849, "step": 8919 }, { "epoch": 0.6094144975063196, "grad_norm": 3.6167571544647217, "learning_rate": 2.4473552988424275e-06, "loss": 0.251, "step": 8920 }, { "epoch": 0.6094828175172508, "grad_norm": 5.405863285064697, "learning_rate": 2.4466166831607267e-06, "loss": 0.2956, "step": 8921 }, { "epoch": 0.609551137528182, "grad_norm": 3.781928777694702, "learning_rate": 2.445878119060412e-06, "loss": 0.2615, "step": 8922 }, { "epoch": 0.6096194575391132, "grad_norm": 3.0342137813568115, "learning_rate": 2.4451396065776523e-06, "loss": 0.281, "step": 8923 }, { "epoch": 0.6096877775500444, "grad_norm": 4.9037322998046875, "learning_rate": 2.444401145748606e-06, "loss": 0.2751, "step": 8924 }, { "epoch": 0.6097560975609756, "grad_norm": 3.896479368209839, "learning_rate": 2.443662736609435e-06, "loss": 0.3409, "step": 8925 }, { "epoch": 0.6098244175719069, "grad_norm": 3.865394353866577, "learning_rate": 2.4429243791962986e-06, "loss": 0.2742, "step": 8926 }, { "epoch": 0.609892737582838, "grad_norm": 4.8494391441345215, "learning_rate": 2.442186073545353e-06, "loss": 0.2918, "step": 8927 }, { "epoch": 0.6099610575937692, "grad_norm": 4.305893421173096, "learning_rate": 2.4414478196927476e-06, "loss": 0.2857, "step": 8928 }, { "epoch": 0.6100293776047004, "grad_norm": 4.155515193939209, "learning_rate": 2.4407096176746345e-06, "loss": 0.3182, "step": 8929 }, { "epoch": 0.6100976976156316, "grad_norm": 4.250232219696045, "learning_rate": 2.439971467527162e-06, "loss": 0.336, "step": 8930 }, { "epoch": 0.6101660176265629, "grad_norm": 3.8159892559051514, "learning_rate": 2.4392333692864755e-06, "loss": 0.299, "step": 8931 }, { "epoch": 0.610234337637494, "grad_norm": 4.5171966552734375, "learning_rate": 2.4384953229887168e-06, "loss": 0.3969, "step": 8932 }, { "epoch": 0.6103026576484252, "grad_norm": 4.962459564208984, "learning_rate": 2.437757328670026e-06, "loss": 0.396, "step": 8933 }, { "epoch": 0.6103709776593564, "grad_norm": 3.7496423721313477, "learning_rate": 2.437019386366541e-06, "loss": 0.262, "step": 8934 }, { "epoch": 0.6104392976702876, "grad_norm": 4.803715229034424, "learning_rate": 2.4362814961143982e-06, "loss": 0.272, "step": 8935 }, { "epoch": 0.6105076176812189, "grad_norm": 3.991802930831909, "learning_rate": 2.435543657949728e-06, "loss": 0.2549, "step": 8936 }, { "epoch": 0.61057593769215, "grad_norm": 4.6107258796691895, "learning_rate": 2.434805871908662e-06, "loss": 0.2658, "step": 8937 }, { "epoch": 0.6106442577030813, "grad_norm": 4.5042853355407715, "learning_rate": 2.434068138027326e-06, "loss": 0.2507, "step": 8938 }, { "epoch": 0.6107125777140124, "grad_norm": 3.7212860584259033, "learning_rate": 2.4333304563418477e-06, "loss": 0.3497, "step": 8939 }, { "epoch": 0.6107808977249436, "grad_norm": 5.101291179656982, "learning_rate": 2.4325928268883455e-06, "loss": 0.2845, "step": 8940 }, { "epoch": 0.6108492177358749, "grad_norm": 4.601099967956543, "learning_rate": 2.431855249702942e-06, "loss": 0.2772, "step": 8941 }, { "epoch": 0.610917537746806, "grad_norm": 3.5354137420654297, "learning_rate": 2.4311177248217526e-06, "loss": 0.2731, "step": 8942 }, { "epoch": 0.6109858577577373, "grad_norm": 4.136716842651367, "learning_rate": 2.430380252280895e-06, "loss": 0.4073, "step": 8943 }, { "epoch": 0.6110541777686684, "grad_norm": 4.737143039703369, "learning_rate": 2.429642832116478e-06, "loss": 0.3554, "step": 8944 }, { "epoch": 0.6111224977795996, "grad_norm": 3.1925361156463623, "learning_rate": 2.4289054643646118e-06, "loss": 0.2742, "step": 8945 }, { "epoch": 0.6111908177905309, "grad_norm": 2.9442245960235596, "learning_rate": 2.4281681490614047e-06, "loss": 0.2116, "step": 8946 }, { "epoch": 0.611259137801462, "grad_norm": 3.658518075942993, "learning_rate": 2.4274308862429615e-06, "loss": 0.3149, "step": 8947 }, { "epoch": 0.6113274578123933, "grad_norm": 4.16516637802124, "learning_rate": 2.4266936759453803e-06, "loss": 0.3648, "step": 8948 }, { "epoch": 0.6113957778233244, "grad_norm": 5.011862754821777, "learning_rate": 2.4259565182047636e-06, "loss": 0.221, "step": 8949 }, { "epoch": 0.6114640978342557, "grad_norm": 6.35409688949585, "learning_rate": 2.4252194130572074e-06, "loss": 0.4281, "step": 8950 }, { "epoch": 0.6115324178451869, "grad_norm": 5.149960041046143, "learning_rate": 2.4244823605388063e-06, "loss": 0.3981, "step": 8951 }, { "epoch": 0.611600737856118, "grad_norm": 4.522498607635498, "learning_rate": 2.4237453606856503e-06, "loss": 0.3318, "step": 8952 }, { "epoch": 0.6116690578670493, "grad_norm": 3.478257656097412, "learning_rate": 2.4230084135338292e-06, "loss": 0.2309, "step": 8953 }, { "epoch": 0.6117373778779804, "grad_norm": 4.159120082855225, "learning_rate": 2.4222715191194293e-06, "loss": 0.2605, "step": 8954 }, { "epoch": 0.6118056978889117, "grad_norm": 2.954876661300659, "learning_rate": 2.4215346774785346e-06, "loss": 0.3038, "step": 8955 }, { "epoch": 0.6118740178998429, "grad_norm": 5.115668296813965, "learning_rate": 2.4207978886472265e-06, "loss": 0.3667, "step": 8956 }, { "epoch": 0.611942337910774, "grad_norm": 3.971221446990967, "learning_rate": 2.420061152661582e-06, "loss": 0.3992, "step": 8957 }, { "epoch": 0.6120106579217053, "grad_norm": 3.524358034133911, "learning_rate": 2.4193244695576784e-06, "loss": 0.3417, "step": 8958 }, { "epoch": 0.6120789779326364, "grad_norm": 3.9924497604370117, "learning_rate": 2.4185878393715904e-06, "loss": 0.357, "step": 8959 }, { "epoch": 0.6121472979435677, "grad_norm": 4.219514846801758, "learning_rate": 2.4178512621393867e-06, "loss": 0.2612, "step": 8960 }, { "epoch": 0.6122156179544989, "grad_norm": 3.9600868225097656, "learning_rate": 2.4171147378971346e-06, "loss": 0.27, "step": 8961 }, { "epoch": 0.6122839379654301, "grad_norm": 4.871627330780029, "learning_rate": 2.4163782666809027e-06, "loss": 0.3578, "step": 8962 }, { "epoch": 0.6123522579763613, "grad_norm": 4.632501602172852, "learning_rate": 2.415641848526754e-06, "loss": 0.2974, "step": 8963 }, { "epoch": 0.6124205779872924, "grad_norm": 4.140776634216309, "learning_rate": 2.414905483470746e-06, "loss": 0.4463, "step": 8964 }, { "epoch": 0.6124888979982237, "grad_norm": 3.9490158557891846, "learning_rate": 2.414169171548938e-06, "loss": 0.3489, "step": 8965 }, { "epoch": 0.6125572180091549, "grad_norm": 3.8063437938690186, "learning_rate": 2.4134329127973862e-06, "loss": 0.2747, "step": 8966 }, { "epoch": 0.6126255380200861, "grad_norm": 3.335853338241577, "learning_rate": 2.4126967072521433e-06, "loss": 0.1935, "step": 8967 }, { "epoch": 0.6126938580310173, "grad_norm": 3.560696840286255, "learning_rate": 2.4119605549492573e-06, "loss": 0.2367, "step": 8968 }, { "epoch": 0.6127621780419484, "grad_norm": 5.155357837677002, "learning_rate": 2.4112244559247768e-06, "loss": 0.3755, "step": 8969 }, { "epoch": 0.6128304980528797, "grad_norm": 3.833993673324585, "learning_rate": 2.4104884102147467e-06, "loss": 0.3559, "step": 8970 }, { "epoch": 0.6128988180638109, "grad_norm": 3.9479446411132812, "learning_rate": 2.4097524178552096e-06, "loss": 0.3063, "step": 8971 }, { "epoch": 0.6129671380747421, "grad_norm": 3.7814979553222656, "learning_rate": 2.4090164788822047e-06, "loss": 0.2533, "step": 8972 }, { "epoch": 0.6130354580856733, "grad_norm": 3.7772610187530518, "learning_rate": 2.4082805933317685e-06, "loss": 0.3021, "step": 8973 }, { "epoch": 0.6131037780966045, "grad_norm": 4.583524227142334, "learning_rate": 2.407544761239936e-06, "loss": 0.3836, "step": 8974 }, { "epoch": 0.6131720981075357, "grad_norm": 3.286954641342163, "learning_rate": 2.4068089826427377e-06, "loss": 0.2048, "step": 8975 }, { "epoch": 0.6132404181184669, "grad_norm": 3.929967164993286, "learning_rate": 2.406073257576206e-06, "loss": 0.2965, "step": 8976 }, { "epoch": 0.6133087381293981, "grad_norm": 3.8680684566497803, "learning_rate": 2.4053375860763637e-06, "loss": 0.22, "step": 8977 }, { "epoch": 0.6133770581403293, "grad_norm": 3.8879799842834473, "learning_rate": 2.4046019681792357e-06, "loss": 0.2982, "step": 8978 }, { "epoch": 0.6134453781512605, "grad_norm": 3.427609920501709, "learning_rate": 2.4038664039208444e-06, "loss": 0.2039, "step": 8979 }, { "epoch": 0.6135136981621917, "grad_norm": 4.114086627960205, "learning_rate": 2.403130893337209e-06, "loss": 0.2673, "step": 8980 }, { "epoch": 0.6135820181731229, "grad_norm": 3.5952842235565186, "learning_rate": 2.402395436464342e-06, "loss": 0.2349, "step": 8981 }, { "epoch": 0.6136503381840541, "grad_norm": 4.540695667266846, "learning_rate": 2.40166003333826e-06, "loss": 0.2692, "step": 8982 }, { "epoch": 0.6137186581949853, "grad_norm": 3.4891655445098877, "learning_rate": 2.4009246839949724e-06, "loss": 0.2877, "step": 8983 }, { "epoch": 0.6137869782059165, "grad_norm": 3.562321662902832, "learning_rate": 2.4001893884704886e-06, "loss": 0.309, "step": 8984 }, { "epoch": 0.6138552982168477, "grad_norm": 5.50165319442749, "learning_rate": 2.399454146800813e-06, "loss": 0.4431, "step": 8985 }, { "epoch": 0.613923618227779, "grad_norm": 3.964369058609009, "learning_rate": 2.3987189590219477e-06, "loss": 0.2569, "step": 8986 }, { "epoch": 0.6139919382387101, "grad_norm": 3.707833766937256, "learning_rate": 2.397983825169894e-06, "loss": 0.2953, "step": 8987 }, { "epoch": 0.6140602582496413, "grad_norm": 5.05224609375, "learning_rate": 2.39724874528065e-06, "loss": 0.2966, "step": 8988 }, { "epoch": 0.6141285782605725, "grad_norm": 4.5812668800354, "learning_rate": 2.396513719390209e-06, "loss": 0.3356, "step": 8989 }, { "epoch": 0.6141968982715037, "grad_norm": 3.6571171283721924, "learning_rate": 2.3957787475345643e-06, "loss": 0.2486, "step": 8990 }, { "epoch": 0.614265218282435, "grad_norm": 4.273266315460205, "learning_rate": 2.3950438297497055e-06, "loss": 0.3038, "step": 8991 }, { "epoch": 0.6143335382933661, "grad_norm": 3.759881019592285, "learning_rate": 2.3943089660716194e-06, "loss": 0.266, "step": 8992 }, { "epoch": 0.6144018583042973, "grad_norm": 4.1762309074401855, "learning_rate": 2.39357415653629e-06, "loss": 0.3062, "step": 8993 }, { "epoch": 0.6144701783152285, "grad_norm": 4.576512336730957, "learning_rate": 2.3928394011796996e-06, "loss": 0.3716, "step": 8994 }, { "epoch": 0.6145384983261597, "grad_norm": 4.227806091308594, "learning_rate": 2.392104700037826e-06, "loss": 0.2604, "step": 8995 }, { "epoch": 0.614606818337091, "grad_norm": 4.4506449699401855, "learning_rate": 2.3913700531466487e-06, "loss": 0.3778, "step": 8996 }, { "epoch": 0.6146751383480221, "grad_norm": 3.591808795928955, "learning_rate": 2.3906354605421374e-06, "loss": 0.2023, "step": 8997 }, { "epoch": 0.6147434583589534, "grad_norm": 3.586172342300415, "learning_rate": 2.3899009222602644e-06, "loss": 0.2366, "step": 8998 }, { "epoch": 0.6148117783698845, "grad_norm": 4.161261558532715, "learning_rate": 2.3891664383369996e-06, "loss": 0.3582, "step": 8999 }, { "epoch": 0.6148800983808157, "grad_norm": 4.629856109619141, "learning_rate": 2.3884320088083082e-06, "loss": 0.3535, "step": 9000 }, { "epoch": 0.614948418391747, "grad_norm": 5.496280193328857, "learning_rate": 2.3876976337101522e-06, "loss": 0.3407, "step": 9001 }, { "epoch": 0.6150167384026781, "grad_norm": 4.282445907592773, "learning_rate": 2.3869633130784924e-06, "loss": 0.3313, "step": 9002 }, { "epoch": 0.6150850584136094, "grad_norm": 3.329958915710449, "learning_rate": 2.386229046949287e-06, "loss": 0.3164, "step": 9003 }, { "epoch": 0.6151533784245405, "grad_norm": 3.656151533126831, "learning_rate": 2.3854948353584917e-06, "loss": 0.3116, "step": 9004 }, { "epoch": 0.6152216984354717, "grad_norm": 3.9233875274658203, "learning_rate": 2.384760678342057e-06, "loss": 0.2484, "step": 9005 }, { "epoch": 0.615290018446403, "grad_norm": 4.108064651489258, "learning_rate": 2.384026575935934e-06, "loss": 0.2525, "step": 9006 }, { "epoch": 0.6153583384573341, "grad_norm": 3.902576446533203, "learning_rate": 2.383292528176069e-06, "loss": 0.2675, "step": 9007 }, { "epoch": 0.6154266584682654, "grad_norm": 4.12043571472168, "learning_rate": 2.3825585350984078e-06, "loss": 0.303, "step": 9008 }, { "epoch": 0.6154949784791965, "grad_norm": 4.547950267791748, "learning_rate": 2.3818245967388906e-06, "loss": 0.3031, "step": 9009 }, { "epoch": 0.6155632984901278, "grad_norm": 4.400629997253418, "learning_rate": 2.3810907131334564e-06, "loss": 0.3593, "step": 9010 }, { "epoch": 0.615631618501059, "grad_norm": 4.354587078094482, "learning_rate": 2.380356884318041e-06, "loss": 0.3125, "step": 9011 }, { "epoch": 0.6156999385119901, "grad_norm": 4.113077163696289, "learning_rate": 2.379623110328582e-06, "loss": 0.3242, "step": 9012 }, { "epoch": 0.6157682585229214, "grad_norm": 4.563455581665039, "learning_rate": 2.378889391201005e-06, "loss": 0.425, "step": 9013 }, { "epoch": 0.6158365785338525, "grad_norm": 5.015744209289551, "learning_rate": 2.378155726971241e-06, "loss": 0.3072, "step": 9014 }, { "epoch": 0.6159048985447838, "grad_norm": 3.8753774166107178, "learning_rate": 2.3774221176752157e-06, "loss": 0.3317, "step": 9015 }, { "epoch": 0.615973218555715, "grad_norm": 4.607199192047119, "learning_rate": 2.376688563348853e-06, "loss": 0.2592, "step": 9016 }, { "epoch": 0.6160415385666461, "grad_norm": 3.258147716522217, "learning_rate": 2.3759550640280692e-06, "loss": 0.2647, "step": 9017 }, { "epoch": 0.6161098585775774, "grad_norm": 4.6530303955078125, "learning_rate": 2.375221619748785e-06, "loss": 0.383, "step": 9018 }, { "epoch": 0.6161781785885085, "grad_norm": 3.67733097076416, "learning_rate": 2.374488230546915e-06, "loss": 0.2722, "step": 9019 }, { "epoch": 0.6162464985994398, "grad_norm": 4.037845611572266, "learning_rate": 2.3737548964583718e-06, "loss": 0.3074, "step": 9020 }, { "epoch": 0.616314818610371, "grad_norm": 4.424005031585693, "learning_rate": 2.3730216175190626e-06, "loss": 0.2779, "step": 9021 }, { "epoch": 0.6163831386213022, "grad_norm": 4.016085147857666, "learning_rate": 2.372288393764895e-06, "loss": 0.2693, "step": 9022 }, { "epoch": 0.6164514586322334, "grad_norm": 4.1103835105896, "learning_rate": 2.3715552252317745e-06, "loss": 0.3397, "step": 9023 }, { "epoch": 0.6165197786431645, "grad_norm": 3.3312931060791016, "learning_rate": 2.3708221119556015e-06, "loss": 0.2751, "step": 9024 }, { "epoch": 0.6165880986540958, "grad_norm": 3.4568839073181152, "learning_rate": 2.370089053972274e-06, "loss": 0.2793, "step": 9025 }, { "epoch": 0.616656418665027, "grad_norm": 3.6413917541503906, "learning_rate": 2.369356051317688e-06, "loss": 0.3517, "step": 9026 }, { "epoch": 0.6167247386759582, "grad_norm": 3.886913299560547, "learning_rate": 2.3686231040277374e-06, "loss": 0.203, "step": 9027 }, { "epoch": 0.6167930586868894, "grad_norm": 3.8056623935699463, "learning_rate": 2.3678902121383127e-06, "loss": 0.2958, "step": 9028 }, { "epoch": 0.6168613786978205, "grad_norm": 4.44740104675293, "learning_rate": 2.367157375685301e-06, "loss": 0.3345, "step": 9029 }, { "epoch": 0.6169296987087518, "grad_norm": 3.7801175117492676, "learning_rate": 2.3664245947045874e-06, "loss": 0.4221, "step": 9030 }, { "epoch": 0.616998018719683, "grad_norm": 4.586207866668701, "learning_rate": 2.3656918692320537e-06, "loss": 0.3185, "step": 9031 }, { "epoch": 0.6170663387306142, "grad_norm": 4.17329740524292, "learning_rate": 2.364959199303583e-06, "loss": 0.2748, "step": 9032 }, { "epoch": 0.6171346587415454, "grad_norm": 3.5140979290008545, "learning_rate": 2.364226584955048e-06, "loss": 0.2142, "step": 9033 }, { "epoch": 0.6172029787524766, "grad_norm": 4.119091987609863, "learning_rate": 2.3634940262223237e-06, "loss": 0.3078, "step": 9034 }, { "epoch": 0.6172712987634078, "grad_norm": 4.480249881744385, "learning_rate": 2.362761523141283e-06, "loss": 0.3078, "step": 9035 }, { "epoch": 0.617339618774339, "grad_norm": 4.370704174041748, "learning_rate": 2.362029075747795e-06, "loss": 0.3077, "step": 9036 }, { "epoch": 0.6174079387852702, "grad_norm": 3.538593292236328, "learning_rate": 2.361296684077723e-06, "loss": 0.2258, "step": 9037 }, { "epoch": 0.6174762587962014, "grad_norm": 3.9684603214263916, "learning_rate": 2.3605643481669317e-06, "loss": 0.3811, "step": 9038 }, { "epoch": 0.6175445788071326, "grad_norm": 4.39290189743042, "learning_rate": 2.359832068051283e-06, "loss": 0.2999, "step": 9039 }, { "epoch": 0.6176128988180638, "grad_norm": 3.6094725131988525, "learning_rate": 2.3590998437666325e-06, "loss": 0.2647, "step": 9040 }, { "epoch": 0.617681218828995, "grad_norm": 2.8491475582122803, "learning_rate": 2.358367675348838e-06, "loss": 0.2701, "step": 9041 }, { "epoch": 0.6177495388399262, "grad_norm": 4.292740345001221, "learning_rate": 2.3576355628337486e-06, "loss": 0.3224, "step": 9042 }, { "epoch": 0.6178178588508574, "grad_norm": 3.742509365081787, "learning_rate": 2.356903506257216e-06, "loss": 0.3457, "step": 9043 }, { "epoch": 0.6178861788617886, "grad_norm": 3.777888774871826, "learning_rate": 2.3561715056550867e-06, "loss": 0.2971, "step": 9044 }, { "epoch": 0.6179544988727198, "grad_norm": 4.22520637512207, "learning_rate": 2.3554395610632058e-06, "loss": 0.2962, "step": 9045 }, { "epoch": 0.6180228188836511, "grad_norm": 3.361943483352661, "learning_rate": 2.3547076725174117e-06, "loss": 0.2753, "step": 9046 }, { "epoch": 0.6180911388945822, "grad_norm": 3.847007989883423, "learning_rate": 2.3539758400535455e-06, "loss": 0.3357, "step": 9047 }, { "epoch": 0.6181594589055134, "grad_norm": 4.175436496734619, "learning_rate": 2.353244063707442e-06, "loss": 0.3096, "step": 9048 }, { "epoch": 0.6182277789164446, "grad_norm": 4.0530781745910645, "learning_rate": 2.352512343514937e-06, "loss": 0.2892, "step": 9049 }, { "epoch": 0.6182960989273758, "grad_norm": 4.248539924621582, "learning_rate": 2.3517806795118565e-06, "loss": 0.3678, "step": 9050 }, { "epoch": 0.6183644189383071, "grad_norm": 3.521648406982422, "learning_rate": 2.351049071734031e-06, "loss": 0.2004, "step": 9051 }, { "epoch": 0.6184327389492382, "grad_norm": 4.825382232666016, "learning_rate": 2.350317520217285e-06, "loss": 0.304, "step": 9052 }, { "epoch": 0.6185010589601694, "grad_norm": 2.3998732566833496, "learning_rate": 2.3495860249974406e-06, "loss": 0.1872, "step": 9053 }, { "epoch": 0.6185693789711006, "grad_norm": 3.8825490474700928, "learning_rate": 2.348854586110317e-06, "loss": 0.2086, "step": 9054 }, { "epoch": 0.6186376989820318, "grad_norm": 3.4767227172851562, "learning_rate": 2.3481232035917305e-06, "loss": 0.2852, "step": 9055 }, { "epoch": 0.6187060189929631, "grad_norm": 5.349471092224121, "learning_rate": 2.3473918774774956e-06, "loss": 0.3589, "step": 9056 }, { "epoch": 0.6187743390038942, "grad_norm": 3.833601236343384, "learning_rate": 2.3466606078034237e-06, "loss": 0.3249, "step": 9057 }, { "epoch": 0.6188426590148255, "grad_norm": 3.5949573516845703, "learning_rate": 2.3459293946053215e-06, "loss": 0.2099, "step": 9058 }, { "epoch": 0.6189109790257566, "grad_norm": 4.109299659729004, "learning_rate": 2.345198237918996e-06, "loss": 0.3451, "step": 9059 }, { "epoch": 0.6189792990366878, "grad_norm": 4.000363349914551, "learning_rate": 2.344467137780249e-06, "loss": 0.3219, "step": 9060 }, { "epoch": 0.6190476190476191, "grad_norm": 4.634576797485352, "learning_rate": 2.3437360942248825e-06, "loss": 0.2973, "step": 9061 }, { "epoch": 0.6191159390585502, "grad_norm": 4.352885723114014, "learning_rate": 2.3430051072886915e-06, "loss": 0.3371, "step": 9062 }, { "epoch": 0.6191842590694815, "grad_norm": 4.730254650115967, "learning_rate": 2.3422741770074715e-06, "loss": 0.3338, "step": 9063 }, { "epoch": 0.6192525790804126, "grad_norm": 4.35487174987793, "learning_rate": 2.341543303417013e-06, "loss": 0.3592, "step": 9064 }, { "epoch": 0.6193208990913438, "grad_norm": 3.747286319732666, "learning_rate": 2.3408124865531086e-06, "loss": 0.3057, "step": 9065 }, { "epoch": 0.6193892191022751, "grad_norm": 4.016849517822266, "learning_rate": 2.3400817264515404e-06, "loss": 0.3278, "step": 9066 }, { "epoch": 0.6194575391132062, "grad_norm": 6.0080389976501465, "learning_rate": 2.339351023148093e-06, "loss": 0.2974, "step": 9067 }, { "epoch": 0.6195258591241375, "grad_norm": 3.3823111057281494, "learning_rate": 2.338620376678547e-06, "loss": 0.2857, "step": 9068 }, { "epoch": 0.6195941791350686, "grad_norm": 5.444587230682373, "learning_rate": 2.337889787078683e-06, "loss": 0.2335, "step": 9069 }, { "epoch": 0.6196624991459999, "grad_norm": 4.484408378601074, "learning_rate": 2.3371592543842705e-06, "loss": 0.296, "step": 9070 }, { "epoch": 0.6197308191569311, "grad_norm": 4.9514031410217285, "learning_rate": 2.336428778631086e-06, "loss": 0.4105, "step": 9071 }, { "epoch": 0.6197991391678622, "grad_norm": 4.703462600708008, "learning_rate": 2.335698359854898e-06, "loss": 0.185, "step": 9072 }, { "epoch": 0.6198674591787935, "grad_norm": 3.8327596187591553, "learning_rate": 2.3349679980914735e-06, "loss": 0.2969, "step": 9073 }, { "epoch": 0.6199357791897246, "grad_norm": 3.999131441116333, "learning_rate": 2.334237693376575e-06, "loss": 0.2102, "step": 9074 }, { "epoch": 0.6200040992006559, "grad_norm": 4.089545726776123, "learning_rate": 2.333507445745964e-06, "loss": 0.3795, "step": 9075 }, { "epoch": 0.6200724192115871, "grad_norm": 4.366141319274902, "learning_rate": 2.3327772552353995e-06, "loss": 0.3209, "step": 9076 }, { "epoch": 0.6201407392225182, "grad_norm": 4.15756893157959, "learning_rate": 2.332047121880638e-06, "loss": 0.3209, "step": 9077 }, { "epoch": 0.6202090592334495, "grad_norm": 4.408979415893555, "learning_rate": 2.3313170457174296e-06, "loss": 0.3755, "step": 9078 }, { "epoch": 0.6202773792443806, "grad_norm": 3.2374181747436523, "learning_rate": 2.330587026781525e-06, "loss": 0.2777, "step": 9079 }, { "epoch": 0.6203456992553119, "grad_norm": 3.995635747909546, "learning_rate": 2.329857065108673e-06, "loss": 0.2608, "step": 9080 }, { "epoch": 0.6204140192662431, "grad_norm": 3.7196476459503174, "learning_rate": 2.329127160734616e-06, "loss": 0.2377, "step": 9081 }, { "epoch": 0.6204823392771743, "grad_norm": 4.941783905029297, "learning_rate": 2.3283973136950967e-06, "loss": 0.3669, "step": 9082 }, { "epoch": 0.6205506592881055, "grad_norm": 4.360530853271484, "learning_rate": 2.3276675240258524e-06, "loss": 0.3782, "step": 9083 }, { "epoch": 0.6206189792990366, "grad_norm": 4.098020076751709, "learning_rate": 2.326937791762619e-06, "loss": 0.2946, "step": 9084 }, { "epoch": 0.6206872993099679, "grad_norm": 2.8573615550994873, "learning_rate": 2.326208116941133e-06, "loss": 0.2815, "step": 9085 }, { "epoch": 0.6207556193208991, "grad_norm": 4.6279120445251465, "learning_rate": 2.32547849959712e-06, "loss": 0.3159, "step": 9086 }, { "epoch": 0.6208239393318303, "grad_norm": 3.6081793308258057, "learning_rate": 2.324748939766309e-06, "loss": 0.3721, "step": 9087 }, { "epoch": 0.6208922593427615, "grad_norm": 3.5569727420806885, "learning_rate": 2.3240194374844256e-06, "loss": 0.2347, "step": 9088 }, { "epoch": 0.6209605793536926, "grad_norm": 4.921845436096191, "learning_rate": 2.3232899927871917e-06, "loss": 0.2829, "step": 9089 }, { "epoch": 0.6210288993646239, "grad_norm": 3.606963872909546, "learning_rate": 2.3225606057103254e-06, "loss": 0.3794, "step": 9090 }, { "epoch": 0.6210972193755551, "grad_norm": 3.9838573932647705, "learning_rate": 2.3218312762895424e-06, "loss": 0.2584, "step": 9091 }, { "epoch": 0.6211655393864863, "grad_norm": 4.0695319175720215, "learning_rate": 2.3211020045605564e-06, "loss": 0.2855, "step": 9092 }, { "epoch": 0.6212338593974175, "grad_norm": 5.282348155975342, "learning_rate": 2.320372790559079e-06, "loss": 0.3052, "step": 9093 }, { "epoch": 0.6213021794083488, "grad_norm": 4.315634727478027, "learning_rate": 2.3196436343208166e-06, "loss": 0.2693, "step": 9094 }, { "epoch": 0.6213704994192799, "grad_norm": 3.523488759994507, "learning_rate": 2.3189145358814744e-06, "loss": 0.2071, "step": 9095 }, { "epoch": 0.6214388194302111, "grad_norm": 3.57334566116333, "learning_rate": 2.318185495276754e-06, "loss": 0.2635, "step": 9096 }, { "epoch": 0.6215071394411423, "grad_norm": 3.350005626678467, "learning_rate": 2.3174565125423565e-06, "loss": 0.1858, "step": 9097 }, { "epoch": 0.6215754594520735, "grad_norm": 3.9584712982177734, "learning_rate": 2.316727587713976e-06, "loss": 0.2893, "step": 9098 }, { "epoch": 0.6216437794630048, "grad_norm": 4.609442710876465, "learning_rate": 2.3159987208273068e-06, "loss": 0.3407, "step": 9099 }, { "epoch": 0.6217120994739359, "grad_norm": 3.3737525939941406, "learning_rate": 2.3152699119180383e-06, "loss": 0.3216, "step": 9100 }, { "epoch": 0.6217804194848671, "grad_norm": 3.592416763305664, "learning_rate": 2.3145411610218625e-06, "loss": 0.218, "step": 9101 }, { "epoch": 0.6218487394957983, "grad_norm": 5.371716022491455, "learning_rate": 2.3138124681744597e-06, "loss": 0.2433, "step": 9102 }, { "epoch": 0.6219170595067295, "grad_norm": 4.370206832885742, "learning_rate": 2.3130838334115132e-06, "loss": 0.3416, "step": 9103 }, { "epoch": 0.6219853795176608, "grad_norm": 5.815556526184082, "learning_rate": 2.312355256768704e-06, "loss": 0.3054, "step": 9104 }, { "epoch": 0.6220536995285919, "grad_norm": 3.458289623260498, "learning_rate": 2.311626738281709e-06, "loss": 0.3075, "step": 9105 }, { "epoch": 0.6221220195395232, "grad_norm": 4.9369893074035645, "learning_rate": 2.3108982779861977e-06, "loss": 0.2735, "step": 9106 }, { "epoch": 0.6221903395504543, "grad_norm": 5.342353820800781, "learning_rate": 2.310169875917845e-06, "loss": 0.3367, "step": 9107 }, { "epoch": 0.6222586595613855, "grad_norm": 4.078559875488281, "learning_rate": 2.3094415321123167e-06, "loss": 0.2905, "step": 9108 }, { "epoch": 0.6223269795723168, "grad_norm": 3.6261074542999268, "learning_rate": 2.308713246605279e-06, "loss": 0.2913, "step": 9109 }, { "epoch": 0.6223952995832479, "grad_norm": 2.5835275650024414, "learning_rate": 2.3079850194323944e-06, "loss": 0.1842, "step": 9110 }, { "epoch": 0.6224636195941792, "grad_norm": 3.571268081665039, "learning_rate": 2.307256850629321e-06, "loss": 0.2572, "step": 9111 }, { "epoch": 0.6225319396051103, "grad_norm": 3.6740055084228516, "learning_rate": 2.306528740231715e-06, "loss": 0.2036, "step": 9112 }, { "epoch": 0.6226002596160415, "grad_norm": 2.6857247352600098, "learning_rate": 2.3058006882752315e-06, "loss": 0.2148, "step": 9113 }, { "epoch": 0.6226685796269728, "grad_norm": 4.629032611846924, "learning_rate": 2.3050726947955215e-06, "loss": 0.4717, "step": 9114 }, { "epoch": 0.6227368996379039, "grad_norm": 3.849648952484131, "learning_rate": 2.3043447598282305e-06, "loss": 0.3012, "step": 9115 }, { "epoch": 0.6228052196488352, "grad_norm": 3.3922431468963623, "learning_rate": 2.3036168834090057e-06, "loss": 0.2534, "step": 9116 }, { "epoch": 0.6228735396597663, "grad_norm": 4.530058860778809, "learning_rate": 2.3028890655734875e-06, "loss": 0.3299, "step": 9117 }, { "epoch": 0.6229418596706976, "grad_norm": 4.904822826385498, "learning_rate": 2.302161306357319e-06, "loss": 0.2711, "step": 9118 }, { "epoch": 0.6230101796816288, "grad_norm": 3.740617036819458, "learning_rate": 2.3014336057961326e-06, "loss": 0.2258, "step": 9119 }, { "epoch": 0.6230784996925599, "grad_norm": 4.297337055206299, "learning_rate": 2.3007059639255618e-06, "loss": 0.2389, "step": 9120 }, { "epoch": 0.6231468197034912, "grad_norm": 3.8772430419921875, "learning_rate": 2.29997838078124e-06, "loss": 0.3321, "step": 9121 }, { "epoch": 0.6232151397144223, "grad_norm": 3.521127939224243, "learning_rate": 2.299250856398795e-06, "loss": 0.2581, "step": 9122 }, { "epoch": 0.6232834597253536, "grad_norm": 4.101109504699707, "learning_rate": 2.2985233908138488e-06, "loss": 0.3109, "step": 9123 }, { "epoch": 0.6233517797362848, "grad_norm": 4.11191987991333, "learning_rate": 2.2977959840620253e-06, "loss": 0.3196, "step": 9124 }, { "epoch": 0.6234200997472159, "grad_norm": 3.5621695518493652, "learning_rate": 2.297068636178944e-06, "loss": 0.347, "step": 9125 }, { "epoch": 0.6234884197581472, "grad_norm": 4.7060627937316895, "learning_rate": 2.2963413472002213e-06, "loss": 0.3245, "step": 9126 }, { "epoch": 0.6235567397690783, "grad_norm": 4.621176242828369, "learning_rate": 2.295614117161469e-06, "loss": 0.2255, "step": 9127 }, { "epoch": 0.6236250597800096, "grad_norm": 5.614509105682373, "learning_rate": 2.294886946098299e-06, "loss": 0.3607, "step": 9128 }, { "epoch": 0.6236933797909407, "grad_norm": 4.155248641967773, "learning_rate": 2.294159834046319e-06, "loss": 0.2978, "step": 9129 }, { "epoch": 0.623761699801872, "grad_norm": 5.596879482269287, "learning_rate": 2.2934327810411333e-06, "loss": 0.37, "step": 9130 }, { "epoch": 0.6238300198128032, "grad_norm": 4.12957763671875, "learning_rate": 2.2927057871183432e-06, "loss": 0.4218, "step": 9131 }, { "epoch": 0.6238983398237343, "grad_norm": 3.8686165809631348, "learning_rate": 2.291978852313549e-06, "loss": 0.2099, "step": 9132 }, { "epoch": 0.6239666598346656, "grad_norm": 4.216334342956543, "learning_rate": 2.2912519766623457e-06, "loss": 0.3445, "step": 9133 }, { "epoch": 0.6240349798455967, "grad_norm": 3.7077131271362305, "learning_rate": 2.2905251602003272e-06, "loss": 0.3235, "step": 9134 }, { "epoch": 0.624103299856528, "grad_norm": 4.307738780975342, "learning_rate": 2.2897984029630832e-06, "loss": 0.2532, "step": 9135 }, { "epoch": 0.6241716198674592, "grad_norm": 4.493906021118164, "learning_rate": 2.289071704986201e-06, "loss": 0.3148, "step": 9136 }, { "epoch": 0.6242399398783903, "grad_norm": 3.51374888420105, "learning_rate": 2.2883450663052643e-06, "loss": 0.2527, "step": 9137 }, { "epoch": 0.6243082598893216, "grad_norm": 4.737118721008301, "learning_rate": 2.2876184869558583e-06, "loss": 0.3221, "step": 9138 }, { "epoch": 0.6243765799002527, "grad_norm": 2.8882293701171875, "learning_rate": 2.2868919669735567e-06, "loss": 0.2724, "step": 9139 }, { "epoch": 0.624444899911184, "grad_norm": 4.360501289367676, "learning_rate": 2.2861655063939388e-06, "loss": 0.3831, "step": 9140 }, { "epoch": 0.6245132199221152, "grad_norm": 5.40994119644165, "learning_rate": 2.285439105252576e-06, "loss": 0.3884, "step": 9141 }, { "epoch": 0.6245815399330464, "grad_norm": 5.985200881958008, "learning_rate": 2.284712763585039e-06, "loss": 0.2455, "step": 9142 }, { "epoch": 0.6246498599439776, "grad_norm": 3.971195936203003, "learning_rate": 2.2839864814268936e-06, "loss": 0.2375, "step": 9143 }, { "epoch": 0.6247181799549087, "grad_norm": 3.5495481491088867, "learning_rate": 2.2832602588137054e-06, "loss": 0.264, "step": 9144 }, { "epoch": 0.62478649996584, "grad_norm": 4.0457072257995605, "learning_rate": 2.282534095781034e-06, "loss": 0.2806, "step": 9145 }, { "epoch": 0.6248548199767712, "grad_norm": 3.960057258605957, "learning_rate": 2.2818079923644403e-06, "loss": 0.2404, "step": 9146 }, { "epoch": 0.6249231399877024, "grad_norm": 4.082526683807373, "learning_rate": 2.281081948599476e-06, "loss": 0.2058, "step": 9147 }, { "epoch": 0.6249914599986336, "grad_norm": 4.351716995239258, "learning_rate": 2.2803559645216958e-06, "loss": 0.4318, "step": 9148 }, { "epoch": 0.6250597800095647, "grad_norm": 5.773955821990967, "learning_rate": 2.2796300401666494e-06, "loss": 0.4553, "step": 9149 }, { "epoch": 0.625128100020496, "grad_norm": 4.019444942474365, "learning_rate": 2.278904175569883e-06, "loss": 0.2557, "step": 9150 }, { "epoch": 0.6251964200314272, "grad_norm": 3.502591848373413, "learning_rate": 2.2781783707669394e-06, "loss": 0.2106, "step": 9151 }, { "epoch": 0.6252647400423584, "grad_norm": 3.6168739795684814, "learning_rate": 2.27745262579336e-06, "loss": 0.2183, "step": 9152 }, { "epoch": 0.6253330600532896, "grad_norm": 3.7248237133026123, "learning_rate": 2.276726940684682e-06, "loss": 0.2488, "step": 9153 }, { "epoch": 0.6254013800642209, "grad_norm": 4.285985469818115, "learning_rate": 2.276001315476443e-06, "loss": 0.2879, "step": 9154 }, { "epoch": 0.625469700075152, "grad_norm": 3.760556697845459, "learning_rate": 2.2752757502041718e-06, "loss": 0.3162, "step": 9155 }, { "epoch": 0.6255380200860832, "grad_norm": 3.6088380813598633, "learning_rate": 2.2745502449033976e-06, "loss": 0.2606, "step": 9156 }, { "epoch": 0.6256063400970144, "grad_norm": 3.491100788116455, "learning_rate": 2.2738247996096473e-06, "loss": 0.2938, "step": 9157 }, { "epoch": 0.6256746601079456, "grad_norm": 4.423503398895264, "learning_rate": 2.2730994143584462e-06, "loss": 0.3408, "step": 9158 }, { "epoch": 0.6257429801188769, "grad_norm": 3.990706443786621, "learning_rate": 2.27237408918531e-06, "loss": 0.2913, "step": 9159 }, { "epoch": 0.625811300129808, "grad_norm": 3.549058198928833, "learning_rate": 2.2716488241257595e-06, "loss": 0.2689, "step": 9160 }, { "epoch": 0.6258796201407392, "grad_norm": 3.315020799636841, "learning_rate": 2.2709236192153073e-06, "loss": 0.27, "step": 9161 }, { "epoch": 0.6259479401516704, "grad_norm": 7.484966278076172, "learning_rate": 2.270198474489466e-06, "loss": 0.4955, "step": 9162 }, { "epoch": 0.6260162601626016, "grad_norm": 4.458646297454834, "learning_rate": 2.2694733899837428e-06, "loss": 0.2818, "step": 9163 }, { "epoch": 0.6260845801735329, "grad_norm": 4.591877460479736, "learning_rate": 2.2687483657336436e-06, "loss": 0.36, "step": 9164 }, { "epoch": 0.626152900184464, "grad_norm": 2.7467994689941406, "learning_rate": 2.2680234017746707e-06, "loss": 0.2592, "step": 9165 }, { "epoch": 0.6262212201953953, "grad_norm": 4.651007652282715, "learning_rate": 2.2672984981423245e-06, "loss": 0.4157, "step": 9166 }, { "epoch": 0.6262895402063264, "grad_norm": 4.171921253204346, "learning_rate": 2.266573654872101e-06, "loss": 0.283, "step": 9167 }, { "epoch": 0.6263578602172576, "grad_norm": 4.36801290512085, "learning_rate": 2.2658488719994937e-06, "loss": 0.2962, "step": 9168 }, { "epoch": 0.6264261802281889, "grad_norm": 3.730628490447998, "learning_rate": 2.265124149559993e-06, "loss": 0.2876, "step": 9169 }, { "epoch": 0.62649450023912, "grad_norm": 5.4998459815979, "learning_rate": 2.264399487589088e-06, "loss": 0.3146, "step": 9170 }, { "epoch": 0.6265628202500513, "grad_norm": 3.9747512340545654, "learning_rate": 2.2636748861222626e-06, "loss": 0.3035, "step": 9171 }, { "epoch": 0.6266311402609824, "grad_norm": 4.1653971672058105, "learning_rate": 2.2629503451949975e-06, "loss": 0.2585, "step": 9172 }, { "epoch": 0.6266994602719136, "grad_norm": 4.504354953765869, "learning_rate": 2.2622258648427727e-06, "loss": 0.2748, "step": 9173 }, { "epoch": 0.6267677802828449, "grad_norm": 4.834123134613037, "learning_rate": 2.2615014451010645e-06, "loss": 0.3048, "step": 9174 }, { "epoch": 0.626836100293776, "grad_norm": 4.9853057861328125, "learning_rate": 2.260777086005347e-06, "loss": 0.3674, "step": 9175 }, { "epoch": 0.6269044203047073, "grad_norm": 4.153550148010254, "learning_rate": 2.260052787591086e-06, "loss": 0.3119, "step": 9176 }, { "epoch": 0.6269727403156384, "grad_norm": 4.101962566375732, "learning_rate": 2.259328549893752e-06, "loss": 0.374, "step": 9177 }, { "epoch": 0.6270410603265697, "grad_norm": 3.425123453140259, "learning_rate": 2.2586043729488074e-06, "loss": 0.2106, "step": 9178 }, { "epoch": 0.6271093803375009, "grad_norm": 3.4994544982910156, "learning_rate": 2.257880256791715e-06, "loss": 0.2626, "step": 9179 }, { "epoch": 0.627177700348432, "grad_norm": 4.135837078094482, "learning_rate": 2.257156201457931e-06, "loss": 0.2595, "step": 9180 }, { "epoch": 0.6272460203593633, "grad_norm": 3.530000925064087, "learning_rate": 2.2564322069829104e-06, "loss": 0.3407, "step": 9181 }, { "epoch": 0.6273143403702944, "grad_norm": 4.297816753387451, "learning_rate": 2.2557082734021066e-06, "loss": 0.2345, "step": 9182 }, { "epoch": 0.6273826603812257, "grad_norm": 3.403789758682251, "learning_rate": 2.254984400750969e-06, "loss": 0.2796, "step": 9183 }, { "epoch": 0.6274509803921569, "grad_norm": 4.109457015991211, "learning_rate": 2.2542605890649414e-06, "loss": 0.2276, "step": 9184 }, { "epoch": 0.627519300403088, "grad_norm": 4.223748207092285, "learning_rate": 2.2535368383794688e-06, "loss": 0.2462, "step": 9185 }, { "epoch": 0.6275876204140193, "grad_norm": 3.4317235946655273, "learning_rate": 2.25281314872999e-06, "loss": 0.2258, "step": 9186 }, { "epoch": 0.6276559404249504, "grad_norm": 5.130075454711914, "learning_rate": 2.252089520151945e-06, "loss": 0.2306, "step": 9187 }, { "epoch": 0.6277242604358817, "grad_norm": 4.582544803619385, "learning_rate": 2.251365952680765e-06, "loss": 0.3338, "step": 9188 }, { "epoch": 0.6277925804468129, "grad_norm": 3.890773296356201, "learning_rate": 2.2506424463518818e-06, "loss": 0.3846, "step": 9189 }, { "epoch": 0.6278609004577441, "grad_norm": 4.288197040557861, "learning_rate": 2.2499190012007244e-06, "loss": 0.2919, "step": 9190 }, { "epoch": 0.6279292204686753, "grad_norm": 4.462541103363037, "learning_rate": 2.249195617262719e-06, "loss": 0.292, "step": 9191 }, { "epoch": 0.6279975404796064, "grad_norm": 4.649036884307861, "learning_rate": 2.248472294573284e-06, "loss": 0.3307, "step": 9192 }, { "epoch": 0.6280658604905377, "grad_norm": 3.3974783420562744, "learning_rate": 2.247749033167842e-06, "loss": 0.2203, "step": 9193 }, { "epoch": 0.6281341805014689, "grad_norm": 3.3200225830078125, "learning_rate": 2.2470258330818084e-06, "loss": 0.3423, "step": 9194 }, { "epoch": 0.6282025005124001, "grad_norm": 2.9445247650146484, "learning_rate": 2.2463026943505965e-06, "loss": 0.2326, "step": 9195 }, { "epoch": 0.6282708205233313, "grad_norm": 6.1920552253723145, "learning_rate": 2.2455796170096157e-06, "loss": 0.2473, "step": 9196 }, { "epoch": 0.6283391405342624, "grad_norm": 4.162729263305664, "learning_rate": 2.2448566010942734e-06, "loss": 0.3172, "step": 9197 }, { "epoch": 0.6284074605451937, "grad_norm": 5.120251178741455, "learning_rate": 2.244133646639974e-06, "loss": 0.3213, "step": 9198 }, { "epoch": 0.6284757805561249, "grad_norm": 3.4929730892181396, "learning_rate": 2.2434107536821194e-06, "loss": 0.2809, "step": 9199 }, { "epoch": 0.6285441005670561, "grad_norm": 4.315192222595215, "learning_rate": 2.2426879222561065e-06, "loss": 0.2815, "step": 9200 }, { "epoch": 0.6286124205779873, "grad_norm": 4.077929496765137, "learning_rate": 2.241965152397331e-06, "loss": 0.3042, "step": 9201 }, { "epoch": 0.6286807405889185, "grad_norm": 4.519874095916748, "learning_rate": 2.2412424441411843e-06, "loss": 0.307, "step": 9202 }, { "epoch": 0.6287490605998497, "grad_norm": 4.334991931915283, "learning_rate": 2.2405197975230578e-06, "loss": 0.3585, "step": 9203 }, { "epoch": 0.6288173806107809, "grad_norm": 5.677831172943115, "learning_rate": 2.239797212578335e-06, "loss": 0.3566, "step": 9204 }, { "epoch": 0.6288857006217121, "grad_norm": 4.063526153564453, "learning_rate": 2.239074689342399e-06, "loss": 0.2971, "step": 9205 }, { "epoch": 0.6289540206326433, "grad_norm": 3.675424098968506, "learning_rate": 2.238352227850631e-06, "loss": 0.2818, "step": 9206 }, { "epoch": 0.6290223406435745, "grad_norm": 2.2950165271759033, "learning_rate": 2.2376298281384096e-06, "loss": 0.1958, "step": 9207 }, { "epoch": 0.6290906606545057, "grad_norm": 4.15023946762085, "learning_rate": 2.2369074902411062e-06, "loss": 0.2954, "step": 9208 }, { "epoch": 0.6291589806654369, "grad_norm": 3.889167547225952, "learning_rate": 2.2361852141940915e-06, "loss": 0.3684, "step": 9209 }, { "epoch": 0.6292273006763681, "grad_norm": 3.6734566688537598, "learning_rate": 2.235463000032735e-06, "loss": 0.1742, "step": 9210 }, { "epoch": 0.6292956206872993, "grad_norm": 4.394802093505859, "learning_rate": 2.2347408477924027e-06, "loss": 0.2879, "step": 9211 }, { "epoch": 0.6293639406982305, "grad_norm": 4.430087566375732, "learning_rate": 2.2340187575084526e-06, "loss": 0.3684, "step": 9212 }, { "epoch": 0.6294322607091617, "grad_norm": 4.470284938812256, "learning_rate": 2.233296729216247e-06, "loss": 0.3107, "step": 9213 }, { "epoch": 0.629500580720093, "grad_norm": 3.3952009677886963, "learning_rate": 2.2325747629511402e-06, "loss": 0.3502, "step": 9214 }, { "epoch": 0.6295689007310241, "grad_norm": 3.715632200241089, "learning_rate": 2.2318528587484862e-06, "loss": 0.322, "step": 9215 }, { "epoch": 0.6296372207419553, "grad_norm": 4.655247688293457, "learning_rate": 2.231131016643633e-06, "loss": 0.3927, "step": 9216 }, { "epoch": 0.6297055407528865, "grad_norm": 3.5670387744903564, "learning_rate": 2.2304092366719287e-06, "loss": 0.317, "step": 9217 }, { "epoch": 0.6297738607638177, "grad_norm": 4.351548671722412, "learning_rate": 2.2296875188687156e-06, "loss": 0.3648, "step": 9218 }, { "epoch": 0.629842180774749, "grad_norm": 4.415576457977295, "learning_rate": 2.2289658632693363e-06, "loss": 0.2883, "step": 9219 }, { "epoch": 0.6299105007856801, "grad_norm": 6.081542491912842, "learning_rate": 2.228244269909127e-06, "loss": 0.3233, "step": 9220 }, { "epoch": 0.6299788207966114, "grad_norm": 5.426986217498779, "learning_rate": 2.2275227388234214e-06, "loss": 0.4191, "step": 9221 }, { "epoch": 0.6300471408075425, "grad_norm": 4.099833965301514, "learning_rate": 2.2268012700475526e-06, "loss": 0.406, "step": 9222 }, { "epoch": 0.6301154608184737, "grad_norm": 2.9520809650421143, "learning_rate": 2.226079863616849e-06, "loss": 0.2899, "step": 9223 }, { "epoch": 0.630183780829405, "grad_norm": 4.256582736968994, "learning_rate": 2.2253585195666344e-06, "loss": 0.2807, "step": 9224 }, { "epoch": 0.6302521008403361, "grad_norm": 4.796685695648193, "learning_rate": 2.224637237932232e-06, "loss": 0.3141, "step": 9225 }, { "epoch": 0.6303204208512674, "grad_norm": 3.426274299621582, "learning_rate": 2.2239160187489604e-06, "loss": 0.2626, "step": 9226 }, { "epoch": 0.6303887408621985, "grad_norm": 3.578024387359619, "learning_rate": 2.2231948620521386e-06, "loss": 0.2943, "step": 9227 }, { "epoch": 0.6304570608731297, "grad_norm": 3.9096014499664307, "learning_rate": 2.2224737678770756e-06, "loss": 0.3016, "step": 9228 }, { "epoch": 0.630525380884061, "grad_norm": 3.8607492446899414, "learning_rate": 2.2217527362590846e-06, "loss": 0.2907, "step": 9229 }, { "epoch": 0.6305937008949921, "grad_norm": 2.523921251296997, "learning_rate": 2.221031767233471e-06, "loss": 0.2557, "step": 9230 }, { "epoch": 0.6306620209059234, "grad_norm": 4.539708614349365, "learning_rate": 2.2203108608355397e-06, "loss": 0.2514, "step": 9231 }, { "epoch": 0.6307303409168545, "grad_norm": 4.957020282745361, "learning_rate": 2.2195900171005907e-06, "loss": 0.2746, "step": 9232 }, { "epoch": 0.6307986609277858, "grad_norm": 5.888706684112549, "learning_rate": 2.218869236063922e-06, "loss": 0.4493, "step": 9233 }, { "epoch": 0.630866980938717, "grad_norm": 3.7276761531829834, "learning_rate": 2.218148517760829e-06, "loss": 0.2646, "step": 9234 }, { "epoch": 0.6309353009496481, "grad_norm": 4.4347405433654785, "learning_rate": 2.217427862226603e-06, "loss": 0.2946, "step": 9235 }, { "epoch": 0.6310036209605794, "grad_norm": 4.210602283477783, "learning_rate": 2.2167072694965323e-06, "loss": 0.3242, "step": 9236 }, { "epoch": 0.6310719409715105, "grad_norm": 4.1145195960998535, "learning_rate": 2.215986739605902e-06, "loss": 0.2865, "step": 9237 }, { "epoch": 0.6311402609824418, "grad_norm": 2.7870516777038574, "learning_rate": 2.215266272589996e-06, "loss": 0.2719, "step": 9238 }, { "epoch": 0.631208580993373, "grad_norm": 3.7133891582489014, "learning_rate": 2.214545868484093e-06, "loss": 0.2193, "step": 9239 }, { "epoch": 0.6312769010043041, "grad_norm": 4.272994041442871, "learning_rate": 2.213825527323468e-06, "loss": 0.3711, "step": 9240 }, { "epoch": 0.6313452210152354, "grad_norm": 3.126647710800171, "learning_rate": 2.213105249143396e-06, "loss": 0.2667, "step": 9241 }, { "epoch": 0.6314135410261665, "grad_norm": 3.949136972427368, "learning_rate": 2.212385033979145e-06, "loss": 0.3022, "step": 9242 }, { "epoch": 0.6314818610370978, "grad_norm": 3.5137391090393066, "learning_rate": 2.211664881865985e-06, "loss": 0.3137, "step": 9243 }, { "epoch": 0.631550181048029, "grad_norm": 3.926250457763672, "learning_rate": 2.2109447928391786e-06, "loss": 0.3624, "step": 9244 }, { "epoch": 0.6316185010589602, "grad_norm": 3.641289710998535, "learning_rate": 2.2102247669339854e-06, "loss": 0.28, "step": 9245 }, { "epoch": 0.6316868210698914, "grad_norm": 3.036198854446411, "learning_rate": 2.2095048041856644e-06, "loss": 0.2413, "step": 9246 }, { "epoch": 0.6317551410808225, "grad_norm": 6.2466206550598145, "learning_rate": 2.2087849046294702e-06, "loss": 0.3087, "step": 9247 }, { "epoch": 0.6318234610917538, "grad_norm": 4.276885986328125, "learning_rate": 2.2080650683006543e-06, "loss": 0.3117, "step": 9248 }, { "epoch": 0.631891781102685, "grad_norm": 5.346490859985352, "learning_rate": 2.2073452952344648e-06, "loss": 0.2481, "step": 9249 }, { "epoch": 0.6319601011136162, "grad_norm": 3.6475162506103516, "learning_rate": 2.2066255854661474e-06, "loss": 0.3454, "step": 9250 }, { "epoch": 0.6320284211245474, "grad_norm": 4.601018905639648, "learning_rate": 2.2059059390309446e-06, "loss": 0.3932, "step": 9251 }, { "epoch": 0.6320967411354785, "grad_norm": 3.9833970069885254, "learning_rate": 2.2051863559640957e-06, "loss": 0.2256, "step": 9252 }, { "epoch": 0.6321650611464098, "grad_norm": 3.9070847034454346, "learning_rate": 2.2044668363008354e-06, "loss": 0.3634, "step": 9253 }, { "epoch": 0.632233381157341, "grad_norm": 4.174132347106934, "learning_rate": 2.203747380076398e-06, "loss": 0.3937, "step": 9254 }, { "epoch": 0.6323017011682722, "grad_norm": 4.011155605316162, "learning_rate": 2.203027987326013e-06, "loss": 0.2622, "step": 9255 }, { "epoch": 0.6323700211792034, "grad_norm": 3.8767476081848145, "learning_rate": 2.2023086580849074e-06, "loss": 0.2659, "step": 9256 }, { "epoch": 0.6324383411901346, "grad_norm": 4.330173492431641, "learning_rate": 2.2015893923883043e-06, "loss": 0.3446, "step": 9257 }, { "epoch": 0.6325066612010658, "grad_norm": 3.4154584407806396, "learning_rate": 2.2008701902714245e-06, "loss": 0.3556, "step": 9258 }, { "epoch": 0.632574981211997, "grad_norm": 4.031851768493652, "learning_rate": 2.200151051769485e-06, "loss": 0.2979, "step": 9259 }, { "epoch": 0.6326433012229282, "grad_norm": 3.578005790710449, "learning_rate": 2.199431976917702e-06, "loss": 0.2983, "step": 9260 }, { "epoch": 0.6327116212338594, "grad_norm": 3.4566152095794678, "learning_rate": 2.1987129657512845e-06, "loss": 0.2502, "step": 9261 }, { "epoch": 0.6327799412447906, "grad_norm": 4.606147289276123, "learning_rate": 2.1979940183054406e-06, "loss": 0.2732, "step": 9262 }, { "epoch": 0.6328482612557218, "grad_norm": 4.0179548263549805, "learning_rate": 2.1972751346153766e-06, "loss": 0.1925, "step": 9263 }, { "epoch": 0.632916581266653, "grad_norm": 4.679784774780273, "learning_rate": 2.196556314716295e-06, "loss": 0.3403, "step": 9264 }, { "epoch": 0.6329849012775842, "grad_norm": 5.701273441314697, "learning_rate": 2.195837558643391e-06, "loss": 0.1971, "step": 9265 }, { "epoch": 0.6330532212885154, "grad_norm": 3.4144649505615234, "learning_rate": 2.1951188664318636e-06, "loss": 0.2716, "step": 9266 }, { "epoch": 0.6331215412994466, "grad_norm": 3.3159406185150146, "learning_rate": 2.194400238116903e-06, "loss": 0.1838, "step": 9267 }, { "epoch": 0.6331898613103778, "grad_norm": 5.107249736785889, "learning_rate": 2.1936816737337014e-06, "loss": 0.3346, "step": 9268 }, { "epoch": 0.6332581813213091, "grad_norm": 4.850197792053223, "learning_rate": 2.192963173317442e-06, "loss": 0.3658, "step": 9269 }, { "epoch": 0.6333265013322402, "grad_norm": 4.530911922454834, "learning_rate": 2.192244736903309e-06, "loss": 0.3241, "step": 9270 }, { "epoch": 0.6333948213431714, "grad_norm": 3.9567177295684814, "learning_rate": 2.1915263645264823e-06, "loss": 0.2703, "step": 9271 }, { "epoch": 0.6334631413541026, "grad_norm": 3.8971478939056396, "learning_rate": 2.1908080562221397e-06, "loss": 0.4237, "step": 9272 }, { "epoch": 0.6335314613650338, "grad_norm": 4.683571815490723, "learning_rate": 2.1900898120254524e-06, "loss": 0.2438, "step": 9273 }, { "epoch": 0.6335997813759651, "grad_norm": 3.0657551288604736, "learning_rate": 2.189371631971593e-06, "loss": 0.1929, "step": 9274 }, { "epoch": 0.6336681013868962, "grad_norm": 4.5441083908081055, "learning_rate": 2.188653516095728e-06, "loss": 0.3269, "step": 9275 }, { "epoch": 0.6337364213978274, "grad_norm": 7.662209510803223, "learning_rate": 2.187935464433023e-06, "loss": 0.3145, "step": 9276 }, { "epoch": 0.6338047414087586, "grad_norm": 4.808516979217529, "learning_rate": 2.187217477018637e-06, "loss": 0.2527, "step": 9277 }, { "epoch": 0.6338730614196898, "grad_norm": 3.561429500579834, "learning_rate": 2.1864995538877283e-06, "loss": 0.269, "step": 9278 }, { "epoch": 0.6339413814306211, "grad_norm": 3.5348551273345947, "learning_rate": 2.185781695075453e-06, "loss": 0.1793, "step": 9279 }, { "epoch": 0.6340097014415522, "grad_norm": 4.124418258666992, "learning_rate": 2.1850639006169635e-06, "loss": 0.2653, "step": 9280 }, { "epoch": 0.6340780214524835, "grad_norm": 4.426787376403809, "learning_rate": 2.1843461705474048e-06, "loss": 0.2357, "step": 9281 }, { "epoch": 0.6341463414634146, "grad_norm": 4.106478214263916, "learning_rate": 2.1836285049019247e-06, "loss": 0.3241, "step": 9282 }, { "epoch": 0.6342146614743458, "grad_norm": 3.369518995285034, "learning_rate": 2.182910903715665e-06, "loss": 0.3079, "step": 9283 }, { "epoch": 0.6342829814852771, "grad_norm": 4.440991401672363, "learning_rate": 2.182193367023766e-06, "loss": 0.4072, "step": 9284 }, { "epoch": 0.6343513014962082, "grad_norm": 4.738173007965088, "learning_rate": 2.1814758948613607e-06, "loss": 0.3624, "step": 9285 }, { "epoch": 0.6344196215071395, "grad_norm": 5.483972549438477, "learning_rate": 2.1807584872635838e-06, "loss": 0.3602, "step": 9286 }, { "epoch": 0.6344879415180706, "grad_norm": 5.465360164642334, "learning_rate": 2.180041144265564e-06, "loss": 0.2358, "step": 9287 }, { "epoch": 0.6345562615290018, "grad_norm": 3.7812743186950684, "learning_rate": 2.1793238659024294e-06, "loss": 0.3771, "step": 9288 }, { "epoch": 0.6346245815399331, "grad_norm": 4.522963047027588, "learning_rate": 2.1786066522093008e-06, "loss": 0.4447, "step": 9289 }, { "epoch": 0.6346929015508642, "grad_norm": 2.5602705478668213, "learning_rate": 2.1778895032212997e-06, "loss": 0.2501, "step": 9290 }, { "epoch": 0.6347612215617955, "grad_norm": 4.172297477722168, "learning_rate": 2.1771724189735426e-06, "loss": 0.2679, "step": 9291 }, { "epoch": 0.6348295415727266, "grad_norm": 3.384674310684204, "learning_rate": 2.1764553995011436e-06, "loss": 0.2738, "step": 9292 }, { "epoch": 0.6348978615836579, "grad_norm": 3.2884433269500732, "learning_rate": 2.1757384448392126e-06, "loss": 0.2306, "step": 9293 }, { "epoch": 0.6349661815945891, "grad_norm": 3.9779605865478516, "learning_rate": 2.175021555022857e-06, "loss": 0.3378, "step": 9294 }, { "epoch": 0.6350345016055202, "grad_norm": 4.756565093994141, "learning_rate": 2.174304730087181e-06, "loss": 0.4174, "step": 9295 }, { "epoch": 0.6351028216164515, "grad_norm": 5.004354000091553, "learning_rate": 2.1735879700672875e-06, "loss": 0.29, "step": 9296 }, { "epoch": 0.6351711416273826, "grad_norm": 4.03468656539917, "learning_rate": 2.1728712749982713e-06, "loss": 0.3417, "step": 9297 }, { "epoch": 0.6352394616383139, "grad_norm": 3.681452512741089, "learning_rate": 2.172154644915228e-06, "loss": 0.304, "step": 9298 }, { "epoch": 0.6353077816492451, "grad_norm": 4.611705780029297, "learning_rate": 2.1714380798532504e-06, "loss": 0.3024, "step": 9299 }, { "epoch": 0.6353761016601762, "grad_norm": 4.07162618637085, "learning_rate": 2.170721579847427e-06, "loss": 0.2966, "step": 9300 }, { "epoch": 0.6354444216711075, "grad_norm": 4.165815830230713, "learning_rate": 2.17000514493284e-06, "loss": 0.2579, "step": 9301 }, { "epoch": 0.6355127416820386, "grad_norm": 3.5266284942626953, "learning_rate": 2.1692887751445735e-06, "loss": 0.2804, "step": 9302 }, { "epoch": 0.6355810616929699, "grad_norm": 3.997434616088867, "learning_rate": 2.1685724705177063e-06, "loss": 0.3015, "step": 9303 }, { "epoch": 0.6356493817039011, "grad_norm": 3.3726108074188232, "learning_rate": 2.1678562310873145e-06, "loss": 0.2433, "step": 9304 }, { "epoch": 0.6357177017148323, "grad_norm": 4.003909587860107, "learning_rate": 2.167140056888468e-06, "loss": 0.3074, "step": 9305 }, { "epoch": 0.6357860217257635, "grad_norm": 3.910675525665283, "learning_rate": 2.166423947956238e-06, "loss": 0.2583, "step": 9306 }, { "epoch": 0.6358543417366946, "grad_norm": 3.8851258754730225, "learning_rate": 2.1657079043256894e-06, "loss": 0.3171, "step": 9307 }, { "epoch": 0.6359226617476259, "grad_norm": 3.077413320541382, "learning_rate": 2.164991926031886e-06, "loss": 0.2357, "step": 9308 }, { "epoch": 0.6359909817585571, "grad_norm": 3.7180397510528564, "learning_rate": 2.164276013109887e-06, "loss": 0.2362, "step": 9309 }, { "epoch": 0.6360593017694883, "grad_norm": 4.003891468048096, "learning_rate": 2.163560165594748e-06, "loss": 0.2348, "step": 9310 }, { "epoch": 0.6361276217804195, "grad_norm": 3.8229262828826904, "learning_rate": 2.162844383521523e-06, "loss": 0.3615, "step": 9311 }, { "epoch": 0.6361959417913506, "grad_norm": 4.562912940979004, "learning_rate": 2.1621286669252603e-06, "loss": 0.3371, "step": 9312 }, { "epoch": 0.6362642618022819, "grad_norm": 3.3521368503570557, "learning_rate": 2.1614130158410106e-06, "loss": 0.2983, "step": 9313 }, { "epoch": 0.6363325818132131, "grad_norm": 3.8500635623931885, "learning_rate": 2.1606974303038135e-06, "loss": 0.381, "step": 9314 }, { "epoch": 0.6364009018241443, "grad_norm": 5.414047718048096, "learning_rate": 2.15998191034871e-06, "loss": 0.2182, "step": 9315 }, { "epoch": 0.6364692218350755, "grad_norm": 3.8680548667907715, "learning_rate": 2.159266456010738e-06, "loss": 0.2639, "step": 9316 }, { "epoch": 0.6365375418460067, "grad_norm": 5.235780239105225, "learning_rate": 2.1585510673249333e-06, "loss": 0.355, "step": 9317 }, { "epoch": 0.6366058618569379, "grad_norm": 4.55716609954834, "learning_rate": 2.1578357443263224e-06, "loss": 0.2895, "step": 9318 }, { "epoch": 0.6366741818678691, "grad_norm": 3.205998420715332, "learning_rate": 2.157120487049935e-06, "loss": 0.2852, "step": 9319 }, { "epoch": 0.6367425018788003, "grad_norm": 4.136711597442627, "learning_rate": 2.156405295530796e-06, "loss": 0.3025, "step": 9320 }, { "epoch": 0.6368108218897315, "grad_norm": 3.9183244705200195, "learning_rate": 2.1556901698039264e-06, "loss": 0.2497, "step": 9321 }, { "epoch": 0.6368791419006627, "grad_norm": 3.7748630046844482, "learning_rate": 2.1549751099043423e-06, "loss": 0.2421, "step": 9322 }, { "epoch": 0.6369474619115939, "grad_norm": 5.10552978515625, "learning_rate": 2.1542601158670595e-06, "loss": 0.3119, "step": 9323 }, { "epoch": 0.6370157819225251, "grad_norm": 3.9031903743743896, "learning_rate": 2.1535451877270888e-06, "loss": 0.2362, "step": 9324 }, { "epoch": 0.6370841019334563, "grad_norm": 3.2906534671783447, "learning_rate": 2.1528303255194397e-06, "loss": 0.2854, "step": 9325 }, { "epoch": 0.6371524219443875, "grad_norm": 5.320740222930908, "learning_rate": 2.152115529279115e-06, "loss": 0.2854, "step": 9326 }, { "epoch": 0.6372207419553187, "grad_norm": 3.4990804195404053, "learning_rate": 2.1514007990411183e-06, "loss": 0.2358, "step": 9327 }, { "epoch": 0.6372890619662499, "grad_norm": 4.028634548187256, "learning_rate": 2.150686134840446e-06, "loss": 0.244, "step": 9328 }, { "epoch": 0.6373573819771812, "grad_norm": 3.970032215118408, "learning_rate": 2.1499715367120966e-06, "loss": 0.2379, "step": 9329 }, { "epoch": 0.6374257019881123, "grad_norm": 4.558985710144043, "learning_rate": 2.1492570046910592e-06, "loss": 0.358, "step": 9330 }, { "epoch": 0.6374940219990435, "grad_norm": 3.557231903076172, "learning_rate": 2.148542538812322e-06, "loss": 0.2693, "step": 9331 }, { "epoch": 0.6375623420099747, "grad_norm": 3.431910514831543, "learning_rate": 2.147828139110873e-06, "loss": 0.2138, "step": 9332 }, { "epoch": 0.6376306620209059, "grad_norm": 4.953211784362793, "learning_rate": 2.1471138056216944e-06, "loss": 0.2615, "step": 9333 }, { "epoch": 0.6376989820318372, "grad_norm": 3.1706948280334473, "learning_rate": 2.1463995383797623e-06, "loss": 0.232, "step": 9334 }, { "epoch": 0.6377673020427683, "grad_norm": 3.981748342514038, "learning_rate": 2.1456853374200543e-06, "loss": 0.3586, "step": 9335 }, { "epoch": 0.6378356220536995, "grad_norm": 3.1115643978118896, "learning_rate": 2.1449712027775438e-06, "loss": 0.1919, "step": 9336 }, { "epoch": 0.6379039420646307, "grad_norm": 3.7257273197174072, "learning_rate": 2.1442571344871995e-06, "loss": 0.2724, "step": 9337 }, { "epoch": 0.6379722620755619, "grad_norm": 4.789478302001953, "learning_rate": 2.1435431325839865e-06, "loss": 0.3338, "step": 9338 }, { "epoch": 0.6380405820864932, "grad_norm": 4.356687545776367, "learning_rate": 2.142829197102868e-06, "loss": 0.2528, "step": 9339 }, { "epoch": 0.6381089020974243, "grad_norm": 3.635859489440918, "learning_rate": 2.142115328078804e-06, "loss": 0.2933, "step": 9340 }, { "epoch": 0.6381772221083556, "grad_norm": 4.28905725479126, "learning_rate": 2.141401525546751e-06, "loss": 0.2594, "step": 9341 }, { "epoch": 0.6382455421192867, "grad_norm": 4.271822929382324, "learning_rate": 2.1406877895416612e-06, "loss": 0.3088, "step": 9342 }, { "epoch": 0.6383138621302179, "grad_norm": 4.107973098754883, "learning_rate": 2.1399741200984845e-06, "loss": 0.2495, "step": 9343 }, { "epoch": 0.6383821821411492, "grad_norm": 3.2430107593536377, "learning_rate": 2.1392605172521675e-06, "loss": 0.2906, "step": 9344 }, { "epoch": 0.6384505021520803, "grad_norm": 2.8981645107269287, "learning_rate": 2.1385469810376544e-06, "loss": 0.2336, "step": 9345 }, { "epoch": 0.6385188221630116, "grad_norm": 4.99744176864624, "learning_rate": 2.1378335114898834e-06, "loss": 0.2625, "step": 9346 }, { "epoch": 0.6385871421739427, "grad_norm": 4.166303634643555, "learning_rate": 2.1371201086437924e-06, "loss": 0.2432, "step": 9347 }, { "epoch": 0.6386554621848739, "grad_norm": 3.970974922180176, "learning_rate": 2.1364067725343135e-06, "loss": 0.324, "step": 9348 }, { "epoch": 0.6387237821958052, "grad_norm": 3.933314561843872, "learning_rate": 2.1356935031963803e-06, "loss": 0.3392, "step": 9349 }, { "epoch": 0.6387921022067363, "grad_norm": 3.293256998062134, "learning_rate": 2.1349803006649158e-06, "loss": 0.2901, "step": 9350 }, { "epoch": 0.6388604222176676, "grad_norm": 3.1234896183013916, "learning_rate": 2.1342671649748447e-06, "loss": 0.2412, "step": 9351 }, { "epoch": 0.6389287422285987, "grad_norm": 3.1575164794921875, "learning_rate": 2.1335540961610884e-06, "loss": 0.1857, "step": 9352 }, { "epoch": 0.63899706223953, "grad_norm": 4.8605828285217285, "learning_rate": 2.1328410942585648e-06, "loss": 0.3371, "step": 9353 }, { "epoch": 0.6390653822504612, "grad_norm": 3.619410753250122, "learning_rate": 2.132128159302184e-06, "loss": 0.1264, "step": 9354 }, { "epoch": 0.6391337022613923, "grad_norm": 3.9592771530151367, "learning_rate": 2.1314152913268604e-06, "loss": 0.3333, "step": 9355 }, { "epoch": 0.6392020222723236, "grad_norm": 3.4665701389312744, "learning_rate": 2.130702490367499e-06, "loss": 0.3075, "step": 9356 }, { "epoch": 0.6392703422832547, "grad_norm": 3.9338226318359375, "learning_rate": 2.1299897564590055e-06, "loss": 0.2103, "step": 9357 }, { "epoch": 0.639338662294186, "grad_norm": 5.30410099029541, "learning_rate": 2.1292770896362783e-06, "loss": 0.4409, "step": 9358 }, { "epoch": 0.6394069823051172, "grad_norm": 4.056968688964844, "learning_rate": 2.1285644899342164e-06, "loss": 0.2834, "step": 9359 }, { "epoch": 0.6394753023160483, "grad_norm": 3.5845885276794434, "learning_rate": 2.127851957387714e-06, "loss": 0.3125, "step": 9360 }, { "epoch": 0.6395436223269796, "grad_norm": 5.063882350921631, "learning_rate": 2.127139492031662e-06, "loss": 0.3391, "step": 9361 }, { "epoch": 0.6396119423379107, "grad_norm": 2.984973192214966, "learning_rate": 2.126427093900947e-06, "loss": 0.2466, "step": 9362 }, { "epoch": 0.639680262348842, "grad_norm": 3.769129753112793, "learning_rate": 2.1257147630304535e-06, "loss": 0.2944, "step": 9363 }, { "epoch": 0.6397485823597732, "grad_norm": 4.010416507720947, "learning_rate": 2.1250024994550628e-06, "loss": 0.3421, "step": 9364 }, { "epoch": 0.6398169023707044, "grad_norm": 4.5781989097595215, "learning_rate": 2.124290303209653e-06, "loss": 0.3999, "step": 9365 }, { "epoch": 0.6398852223816356, "grad_norm": 3.737070083618164, "learning_rate": 2.1235781743290974e-06, "loss": 0.2839, "step": 9366 }, { "epoch": 0.6399535423925667, "grad_norm": 3.874868631362915, "learning_rate": 2.1228661128482666e-06, "loss": 0.2566, "step": 9367 }, { "epoch": 0.640021862403498, "grad_norm": 4.000757217407227, "learning_rate": 2.1221541188020305e-06, "loss": 0.3647, "step": 9368 }, { "epoch": 0.6400901824144292, "grad_norm": 3.879716396331787, "learning_rate": 2.121442192225253e-06, "loss": 0.2347, "step": 9369 }, { "epoch": 0.6401585024253604, "grad_norm": 4.098133563995361, "learning_rate": 2.120730333152793e-06, "loss": 0.2782, "step": 9370 }, { "epoch": 0.6402268224362916, "grad_norm": 4.959881782531738, "learning_rate": 2.120018541619511e-06, "loss": 0.2932, "step": 9371 }, { "epoch": 0.6402951424472227, "grad_norm": 4.434749126434326, "learning_rate": 2.11930681766026e-06, "loss": 0.3517, "step": 9372 }, { "epoch": 0.640363462458154, "grad_norm": 4.009285926818848, "learning_rate": 2.118595161309892e-06, "loss": 0.4095, "step": 9373 }, { "epoch": 0.6404317824690852, "grad_norm": 3.2342607975006104, "learning_rate": 2.1178835726032554e-06, "loss": 0.293, "step": 9374 }, { "epoch": 0.6405001024800164, "grad_norm": 2.7822299003601074, "learning_rate": 2.117172051575193e-06, "loss": 0.2033, "step": 9375 }, { "epoch": 0.6405684224909476, "grad_norm": 3.828413486480713, "learning_rate": 2.116460598260548e-06, "loss": 0.2678, "step": 9376 }, { "epoch": 0.6406367425018789, "grad_norm": 3.443775177001953, "learning_rate": 2.115749212694157e-06, "loss": 0.321, "step": 9377 }, { "epoch": 0.64070506251281, "grad_norm": 3.899778127670288, "learning_rate": 2.1150378949108566e-06, "loss": 0.2467, "step": 9378 }, { "epoch": 0.6407733825237412, "grad_norm": 3.4601192474365234, "learning_rate": 2.1143266449454754e-06, "loss": 0.2076, "step": 9379 }, { "epoch": 0.6408417025346724, "grad_norm": 2.6844866275787354, "learning_rate": 2.113615462832843e-06, "loss": 0.2157, "step": 9380 }, { "epoch": 0.6409100225456036, "grad_norm": 5.380710601806641, "learning_rate": 2.1129043486077832e-06, "loss": 0.3708, "step": 9381 }, { "epoch": 0.6409783425565349, "grad_norm": 4.3716936111450195, "learning_rate": 2.1121933023051202e-06, "loss": 0.3464, "step": 9382 }, { "epoch": 0.641046662567466, "grad_norm": 4.144900321960449, "learning_rate": 2.111482323959669e-06, "loss": 0.3348, "step": 9383 }, { "epoch": 0.6411149825783972, "grad_norm": 3.017937421798706, "learning_rate": 2.1107714136062436e-06, "loss": 0.2282, "step": 9384 }, { "epoch": 0.6411833025893284, "grad_norm": 4.862890243530273, "learning_rate": 2.1100605712796587e-06, "loss": 0.2897, "step": 9385 }, { "epoch": 0.6412516226002596, "grad_norm": 3.61037278175354, "learning_rate": 2.109349797014722e-06, "loss": 0.2414, "step": 9386 }, { "epoch": 0.6413199426111909, "grad_norm": 2.6375632286071777, "learning_rate": 2.1086390908462343e-06, "loss": 0.2151, "step": 9387 }, { "epoch": 0.641388262622122, "grad_norm": 3.337477445602417, "learning_rate": 2.1079284528090004e-06, "loss": 0.2906, "step": 9388 }, { "epoch": 0.6414565826330533, "grad_norm": 3.9846720695495605, "learning_rate": 2.107217882937818e-06, "loss": 0.2924, "step": 9389 }, { "epoch": 0.6415249026439844, "grad_norm": 4.191930294036865, "learning_rate": 2.106507381267482e-06, "loss": 0.2924, "step": 9390 }, { "epoch": 0.6415932226549156, "grad_norm": 5.156216621398926, "learning_rate": 2.1057969478327827e-06, "loss": 0.2964, "step": 9391 }, { "epoch": 0.6416615426658469, "grad_norm": 2.828718423843384, "learning_rate": 2.1050865826685087e-06, "loss": 0.1813, "step": 9392 }, { "epoch": 0.641729862676778, "grad_norm": 2.7017414569854736, "learning_rate": 2.104376285809444e-06, "loss": 0.1797, "step": 9393 }, { "epoch": 0.6417981826877093, "grad_norm": 3.442791700363159, "learning_rate": 2.103666057290372e-06, "loss": 0.281, "step": 9394 }, { "epoch": 0.6418665026986404, "grad_norm": 4.572200775146484, "learning_rate": 2.1029558971460687e-06, "loss": 0.3022, "step": 9395 }, { "epoch": 0.6419348227095716, "grad_norm": 4.28535270690918, "learning_rate": 2.1022458054113086e-06, "loss": 0.2567, "step": 9396 }, { "epoch": 0.6420031427205029, "grad_norm": 4.863293170928955, "learning_rate": 2.1015357821208645e-06, "loss": 0.2836, "step": 9397 }, { "epoch": 0.642071462731434, "grad_norm": 4.039546012878418, "learning_rate": 2.100825827309504e-06, "loss": 0.2897, "step": 9398 }, { "epoch": 0.6421397827423653, "grad_norm": 3.1595990657806396, "learning_rate": 2.1001159410119907e-06, "loss": 0.1573, "step": 9399 }, { "epoch": 0.6422081027532964, "grad_norm": 4.412696361541748, "learning_rate": 2.0994061232630868e-06, "loss": 0.2789, "step": 9400 }, { "epoch": 0.6422764227642277, "grad_norm": 3.2226438522338867, "learning_rate": 2.0986963740975487e-06, "loss": 0.2026, "step": 9401 }, { "epoch": 0.6423447427751589, "grad_norm": 3.3388993740081787, "learning_rate": 2.0979866935501347e-06, "loss": 0.2396, "step": 9402 }, { "epoch": 0.64241306278609, "grad_norm": 4.607992649078369, "learning_rate": 2.0972770816555916e-06, "loss": 0.1939, "step": 9403 }, { "epoch": 0.6424813827970213, "grad_norm": 3.8745720386505127, "learning_rate": 2.0965675384486684e-06, "loss": 0.3113, "step": 9404 }, { "epoch": 0.6425497028079524, "grad_norm": 3.892568826675415, "learning_rate": 2.095858063964111e-06, "loss": 0.3089, "step": 9405 }, { "epoch": 0.6426180228188837, "grad_norm": 4.505077362060547, "learning_rate": 2.0951486582366608e-06, "loss": 0.3432, "step": 9406 }, { "epoch": 0.6426863428298148, "grad_norm": 4.562150955200195, "learning_rate": 2.094439321301052e-06, "loss": 0.1681, "step": 9407 }, { "epoch": 0.642754662840746, "grad_norm": 4.293436050415039, "learning_rate": 2.093730053192023e-06, "loss": 0.4101, "step": 9408 }, { "epoch": 0.6428229828516773, "grad_norm": 5.651000499725342, "learning_rate": 2.0930208539443027e-06, "loss": 0.3009, "step": 9409 }, { "epoch": 0.6428913028626084, "grad_norm": 3.6278889179229736, "learning_rate": 2.092311723592619e-06, "loss": 0.2543, "step": 9410 }, { "epoch": 0.6429596228735397, "grad_norm": 3.898554563522339, "learning_rate": 2.0916026621716967e-06, "loss": 0.2791, "step": 9411 }, { "epoch": 0.6430279428844708, "grad_norm": 5.175069808959961, "learning_rate": 2.0908936697162553e-06, "loss": 0.3435, "step": 9412 }, { "epoch": 0.6430962628954021, "grad_norm": 4.590674877166748, "learning_rate": 2.090184746261013e-06, "loss": 0.2983, "step": 9413 }, { "epoch": 0.6431645829063333, "grad_norm": 4.641266345977783, "learning_rate": 2.0894758918406857e-06, "loss": 0.2644, "step": 9414 }, { "epoch": 0.6432329029172644, "grad_norm": 5.328960418701172, "learning_rate": 2.088767106489981e-06, "loss": 0.3237, "step": 9415 }, { "epoch": 0.6433012229281957, "grad_norm": 4.075146198272705, "learning_rate": 2.088058390243608e-06, "loss": 0.3392, "step": 9416 }, { "epoch": 0.6433695429391268, "grad_norm": 5.059293746948242, "learning_rate": 2.0873497431362694e-06, "loss": 0.2714, "step": 9417 }, { "epoch": 0.6434378629500581, "grad_norm": 7.102657318115234, "learning_rate": 2.0866411652026694e-06, "loss": 0.2721, "step": 9418 }, { "epoch": 0.6435061829609893, "grad_norm": 4.065240859985352, "learning_rate": 2.085932656477501e-06, "loss": 0.3154, "step": 9419 }, { "epoch": 0.6435745029719204, "grad_norm": 4.272047519683838, "learning_rate": 2.0852242169954582e-06, "loss": 0.2715, "step": 9420 }, { "epoch": 0.6436428229828517, "grad_norm": 4.570220947265625, "learning_rate": 2.084515846791234e-06, "loss": 0.3339, "step": 9421 }, { "epoch": 0.6437111429937828, "grad_norm": 5.412914276123047, "learning_rate": 2.0838075458995155e-06, "loss": 0.382, "step": 9422 }, { "epoch": 0.6437794630047141, "grad_norm": 3.522832155227661, "learning_rate": 2.083099314354983e-06, "loss": 0.3151, "step": 9423 }, { "epoch": 0.6438477830156453, "grad_norm": 4.343008995056152, "learning_rate": 2.0823911521923187e-06, "loss": 0.35, "step": 9424 }, { "epoch": 0.6439161030265765, "grad_norm": 3.355029344558716, "learning_rate": 2.0816830594462e-06, "loss": 0.1873, "step": 9425 }, { "epoch": 0.6439844230375077, "grad_norm": 4.978331565856934, "learning_rate": 2.0809750361513e-06, "loss": 0.2956, "step": 9426 }, { "epoch": 0.6440527430484388, "grad_norm": 4.548273086547852, "learning_rate": 2.080267082342288e-06, "loss": 0.3136, "step": 9427 }, { "epoch": 0.6441210630593701, "grad_norm": 11.640081405639648, "learning_rate": 2.0795591980538306e-06, "loss": 0.3648, "step": 9428 }, { "epoch": 0.6441893830703013, "grad_norm": 3.2828171253204346, "learning_rate": 2.0788513833205923e-06, "loss": 0.2859, "step": 9429 }, { "epoch": 0.6442577030812325, "grad_norm": 4.132570266723633, "learning_rate": 2.078143638177232e-06, "loss": 0.3113, "step": 9430 }, { "epoch": 0.6443260230921637, "grad_norm": 2.9736828804016113, "learning_rate": 2.077435962658406e-06, "loss": 0.2372, "step": 9431 }, { "epoch": 0.6443943431030948, "grad_norm": 3.438382148742676, "learning_rate": 2.0767283567987672e-06, "loss": 0.3898, "step": 9432 }, { "epoch": 0.6444626631140261, "grad_norm": 4.582846164703369, "learning_rate": 2.076020820632966e-06, "loss": 0.3124, "step": 9433 }, { "epoch": 0.6445309831249573, "grad_norm": 3.8966100215911865, "learning_rate": 2.075313354195648e-06, "loss": 0.2901, "step": 9434 }, { "epoch": 0.6445993031358885, "grad_norm": 5.7811808586120605, "learning_rate": 2.074605957521455e-06, "loss": 0.2185, "step": 9435 }, { "epoch": 0.6446676231468197, "grad_norm": 3.421186923980713, "learning_rate": 2.073898630645028e-06, "loss": 0.2431, "step": 9436 }, { "epoch": 0.644735943157751, "grad_norm": 3.85916805267334, "learning_rate": 2.073191373601002e-06, "loss": 0.2842, "step": 9437 }, { "epoch": 0.6448042631686821, "grad_norm": 4.344943046569824, "learning_rate": 2.072484186424011e-06, "loss": 0.3133, "step": 9438 }, { "epoch": 0.6448725831796133, "grad_norm": 4.312243938446045, "learning_rate": 2.0717770691486813e-06, "loss": 0.252, "step": 9439 }, { "epoch": 0.6449409031905445, "grad_norm": 3.8049635887145996, "learning_rate": 2.07107002180964e-06, "loss": 0.2515, "step": 9440 }, { "epoch": 0.6450092232014757, "grad_norm": 5.3458251953125, "learning_rate": 2.0703630444415095e-06, "loss": 0.3678, "step": 9441 }, { "epoch": 0.645077543212407, "grad_norm": 4.392329692840576, "learning_rate": 2.069656137078909e-06, "loss": 0.2702, "step": 9442 }, { "epoch": 0.6451458632233381, "grad_norm": 3.757904052734375, "learning_rate": 2.0689492997564542e-06, "loss": 0.3856, "step": 9443 }, { "epoch": 0.6452141832342693, "grad_norm": 3.7714686393737793, "learning_rate": 2.068242532508755e-06, "loss": 0.2479, "step": 9444 }, { "epoch": 0.6452825032452005, "grad_norm": 3.870938301086426, "learning_rate": 2.0675358353704217e-06, "loss": 0.1647, "step": 9445 }, { "epoch": 0.6453508232561317, "grad_norm": 4.223256587982178, "learning_rate": 2.0668292083760584e-06, "loss": 0.3544, "step": 9446 }, { "epoch": 0.645419143267063, "grad_norm": 3.785538673400879, "learning_rate": 2.066122651560268e-06, "loss": 0.2377, "step": 9447 }, { "epoch": 0.6454874632779941, "grad_norm": 4.236582279205322, "learning_rate": 2.0654161649576473e-06, "loss": 0.2913, "step": 9448 }, { "epoch": 0.6455557832889254, "grad_norm": 4.676376819610596, "learning_rate": 2.064709748602792e-06, "loss": 0.3509, "step": 9449 }, { "epoch": 0.6456241032998565, "grad_norm": 5.0989813804626465, "learning_rate": 2.0640034025302925e-06, "loss": 0.3833, "step": 9450 }, { "epoch": 0.6456924233107877, "grad_norm": 3.6428892612457275, "learning_rate": 2.0632971267747384e-06, "loss": 0.2141, "step": 9451 }, { "epoch": 0.645760743321719, "grad_norm": 5.716113567352295, "learning_rate": 2.0625909213707124e-06, "loss": 0.2795, "step": 9452 }, { "epoch": 0.6458290633326501, "grad_norm": 3.218554735183716, "learning_rate": 2.061884786352796e-06, "loss": 0.2299, "step": 9453 }, { "epoch": 0.6458973833435814, "grad_norm": 3.6988437175750732, "learning_rate": 2.0611787217555666e-06, "loss": 0.2271, "step": 9454 }, { "epoch": 0.6459657033545125, "grad_norm": 4.305085182189941, "learning_rate": 2.0604727276136003e-06, "loss": 0.2218, "step": 9455 }, { "epoch": 0.6460340233654437, "grad_norm": 3.9355735778808594, "learning_rate": 2.0597668039614647e-06, "loss": 0.3503, "step": 9456 }, { "epoch": 0.646102343376375, "grad_norm": 3.978994846343994, "learning_rate": 2.0590609508337277e-06, "loss": 0.3015, "step": 9457 }, { "epoch": 0.6461706633873061, "grad_norm": 3.334261655807495, "learning_rate": 2.058355168264955e-06, "loss": 0.2735, "step": 9458 }, { "epoch": 0.6462389833982374, "grad_norm": 5.563270092010498, "learning_rate": 2.057649456289706e-06, "loss": 0.23, "step": 9459 }, { "epoch": 0.6463073034091685, "grad_norm": 4.177560806274414, "learning_rate": 2.0569438149425364e-06, "loss": 0.4501, "step": 9460 }, { "epoch": 0.6463756234200998, "grad_norm": 4.109071731567383, "learning_rate": 2.0562382442580004e-06, "loss": 0.2649, "step": 9461 }, { "epoch": 0.646443943431031, "grad_norm": 3.1392741203308105, "learning_rate": 2.055532744270648e-06, "loss": 0.2899, "step": 9462 }, { "epoch": 0.6465122634419621, "grad_norm": 3.5620994567871094, "learning_rate": 2.0548273150150262e-06, "loss": 0.2433, "step": 9463 }, { "epoch": 0.6465805834528934, "grad_norm": 5.314451217651367, "learning_rate": 2.0541219565256767e-06, "loss": 0.3737, "step": 9464 }, { "epoch": 0.6466489034638245, "grad_norm": 4.090257167816162, "learning_rate": 2.0534166688371396e-06, "loss": 0.3349, "step": 9465 }, { "epoch": 0.6467172234747558, "grad_norm": 4.366032600402832, "learning_rate": 2.0527114519839516e-06, "loss": 0.3038, "step": 9466 }, { "epoch": 0.646785543485687, "grad_norm": 4.9506306648254395, "learning_rate": 2.0520063060006452e-06, "loss": 0.3309, "step": 9467 }, { "epoch": 0.6468538634966181, "grad_norm": 3.2558140754699707, "learning_rate": 2.051301230921748e-06, "loss": 0.3015, "step": 9468 }, { "epoch": 0.6469221835075494, "grad_norm": 3.5906808376312256, "learning_rate": 2.0505962267817872e-06, "loss": 0.1936, "step": 9469 }, { "epoch": 0.6469905035184805, "grad_norm": 3.5836498737335205, "learning_rate": 2.0498912936152836e-06, "loss": 0.2243, "step": 9470 }, { "epoch": 0.6470588235294118, "grad_norm": 3.6287717819213867, "learning_rate": 2.049186431456759e-06, "loss": 0.2519, "step": 9471 }, { "epoch": 0.647127143540343, "grad_norm": 4.31257438659668, "learning_rate": 2.048481640340725e-06, "loss": 0.4389, "step": 9472 }, { "epoch": 0.6471954635512742, "grad_norm": 4.728707313537598, "learning_rate": 2.0477769203016943e-06, "loss": 0.3627, "step": 9473 }, { "epoch": 0.6472637835622054, "grad_norm": 4.078899383544922, "learning_rate": 2.047072271374177e-06, "loss": 0.3573, "step": 9474 }, { "epoch": 0.6473321035731365, "grad_norm": 5.308177471160889, "learning_rate": 2.046367693592677e-06, "loss": 0.345, "step": 9475 }, { "epoch": 0.6474004235840678, "grad_norm": 3.442065477371216, "learning_rate": 2.0456631869916933e-06, "loss": 0.2156, "step": 9476 }, { "epoch": 0.647468743594999, "grad_norm": 5.593814849853516, "learning_rate": 2.0449587516057267e-06, "loss": 0.2837, "step": 9477 }, { "epoch": 0.6475370636059302, "grad_norm": 2.940523624420166, "learning_rate": 2.0442543874692705e-06, "loss": 0.2497, "step": 9478 }, { "epoch": 0.6476053836168614, "grad_norm": 3.2934553623199463, "learning_rate": 2.043550094616816e-06, "loss": 0.262, "step": 9479 }, { "epoch": 0.6476737036277925, "grad_norm": 4.292658805847168, "learning_rate": 2.042845873082849e-06, "loss": 0.3164, "step": 9480 }, { "epoch": 0.6477420236387238, "grad_norm": 6.3044586181640625, "learning_rate": 2.042141722901855e-06, "loss": 0.3585, "step": 9481 }, { "epoch": 0.647810343649655, "grad_norm": 3.9192702770233154, "learning_rate": 2.0414376441083137e-06, "loss": 0.2758, "step": 9482 }, { "epoch": 0.6478786636605862, "grad_norm": 4.109919548034668, "learning_rate": 2.0407336367367034e-06, "loss": 0.3791, "step": 9483 }, { "epoch": 0.6479469836715174, "grad_norm": 4.967620849609375, "learning_rate": 2.040029700821493e-06, "loss": 0.2659, "step": 9484 }, { "epoch": 0.6480153036824486, "grad_norm": 3.596221923828125, "learning_rate": 2.0393258363971566e-06, "loss": 0.3546, "step": 9485 }, { "epoch": 0.6480836236933798, "grad_norm": 2.7705700397491455, "learning_rate": 2.0386220434981603e-06, "loss": 0.2192, "step": 9486 }, { "epoch": 0.648151943704311, "grad_norm": 4.794600963592529, "learning_rate": 2.037918322158967e-06, "loss": 0.2829, "step": 9487 }, { "epoch": 0.6482202637152422, "grad_norm": 4.2293829917907715, "learning_rate": 2.0372146724140335e-06, "loss": 0.3071, "step": 9488 }, { "epoch": 0.6482885837261734, "grad_norm": 5.878322601318359, "learning_rate": 2.0365110942978186e-06, "loss": 0.3642, "step": 9489 }, { "epoch": 0.6483569037371046, "grad_norm": 4.584773063659668, "learning_rate": 2.0358075878447724e-06, "loss": 0.2289, "step": 9490 }, { "epoch": 0.6484252237480358, "grad_norm": 3.253748893737793, "learning_rate": 2.035104153089345e-06, "loss": 0.2739, "step": 9491 }, { "epoch": 0.648493543758967, "grad_norm": 2.68068528175354, "learning_rate": 2.0344007900659814e-06, "loss": 0.2098, "step": 9492 }, { "epoch": 0.6485618637698982, "grad_norm": 4.400241374969482, "learning_rate": 2.033697498809124e-06, "loss": 0.2786, "step": 9493 }, { "epoch": 0.6486301837808294, "grad_norm": 3.4290220737457275, "learning_rate": 2.03299427935321e-06, "loss": 0.2424, "step": 9494 }, { "epoch": 0.6486985037917606, "grad_norm": 4.8581976890563965, "learning_rate": 2.0322911317326764e-06, "loss": 0.2964, "step": 9495 }, { "epoch": 0.6487668238026918, "grad_norm": 4.472208499908447, "learning_rate": 2.031588055981952e-06, "loss": 0.3128, "step": 9496 }, { "epoch": 0.6488351438136231, "grad_norm": 4.582041263580322, "learning_rate": 2.0308850521354646e-06, "loss": 0.2888, "step": 9497 }, { "epoch": 0.6489034638245542, "grad_norm": 3.640918731689453, "learning_rate": 2.030182120227639e-06, "loss": 0.3154, "step": 9498 }, { "epoch": 0.6489717838354854, "grad_norm": 3.63964581489563, "learning_rate": 2.0294792602928976e-06, "loss": 0.2965, "step": 9499 }, { "epoch": 0.6490401038464166, "grad_norm": 4.427668571472168, "learning_rate": 2.028776472365656e-06, "loss": 0.2587, "step": 9500 }, { "epoch": 0.6491084238573478, "grad_norm": 3.5573179721832275, "learning_rate": 2.0280737564803276e-06, "loss": 0.2408, "step": 9501 }, { "epoch": 0.6491767438682791, "grad_norm": 3.5638628005981445, "learning_rate": 2.027371112671323e-06, "loss": 0.3598, "step": 9502 }, { "epoch": 0.6492450638792102, "grad_norm": 3.505953311920166, "learning_rate": 2.0266685409730497e-06, "loss": 0.1623, "step": 9503 }, { "epoch": 0.6493133838901414, "grad_norm": 3.6621029376983643, "learning_rate": 2.025966041419908e-06, "loss": 0.213, "step": 9504 }, { "epoch": 0.6493817039010726, "grad_norm": 3.343357563018799, "learning_rate": 2.0252636140462995e-06, "loss": 0.2743, "step": 9505 }, { "epoch": 0.6494500239120038, "grad_norm": 4.427972316741943, "learning_rate": 2.024561258886621e-06, "loss": 0.3033, "step": 9506 }, { "epoch": 0.6495183439229351, "grad_norm": 3.0018863677978516, "learning_rate": 2.0238589759752633e-06, "loss": 0.1925, "step": 9507 }, { "epoch": 0.6495866639338662, "grad_norm": 4.4554877281188965, "learning_rate": 2.0231567653466183e-06, "loss": 0.2657, "step": 9508 }, { "epoch": 0.6496549839447975, "grad_norm": 3.494357109069824, "learning_rate": 2.0224546270350667e-06, "loss": 0.3747, "step": 9509 }, { "epoch": 0.6497233039557286, "grad_norm": 3.548707962036133, "learning_rate": 2.021752561074993e-06, "loss": 0.2686, "step": 9510 }, { "epoch": 0.6497916239666598, "grad_norm": 4.856220245361328, "learning_rate": 2.0210505675007756e-06, "loss": 0.2886, "step": 9511 }, { "epoch": 0.6498599439775911, "grad_norm": 4.822778224945068, "learning_rate": 2.0203486463467884e-06, "loss": 0.286, "step": 9512 }, { "epoch": 0.6499282639885222, "grad_norm": 3.61712646484375, "learning_rate": 2.0196467976474033e-06, "loss": 0.2703, "step": 9513 }, { "epoch": 0.6499965839994535, "grad_norm": 4.046648979187012, "learning_rate": 2.018945021436988e-06, "loss": 0.3611, "step": 9514 }, { "epoch": 0.6500649040103846, "grad_norm": 4.2143378257751465, "learning_rate": 2.018243317749907e-06, "loss": 0.2515, "step": 9515 }, { "epoch": 0.6501332240213158, "grad_norm": 4.026451110839844, "learning_rate": 2.0175416866205206e-06, "loss": 0.1752, "step": 9516 }, { "epoch": 0.6502015440322471, "grad_norm": 4.3217387199401855, "learning_rate": 2.0168401280831848e-06, "loss": 0.3002, "step": 9517 }, { "epoch": 0.6502698640431782, "grad_norm": 4.715149402618408, "learning_rate": 2.016138642172252e-06, "loss": 0.2347, "step": 9518 }, { "epoch": 0.6503381840541095, "grad_norm": 3.3738343715667725, "learning_rate": 2.015437228922075e-06, "loss": 0.232, "step": 9519 }, { "epoch": 0.6504065040650406, "grad_norm": 2.3596062660217285, "learning_rate": 2.014735888367001e-06, "loss": 0.199, "step": 9520 }, { "epoch": 0.6504748240759719, "grad_norm": 3.1691932678222656, "learning_rate": 2.0140346205413687e-06, "loss": 0.2547, "step": 9521 }, { "epoch": 0.6505431440869031, "grad_norm": 4.077073097229004, "learning_rate": 2.01333342547952e-06, "loss": 0.1751, "step": 9522 }, { "epoch": 0.6506114640978342, "grad_norm": 4.9076666831970215, "learning_rate": 2.0126323032157897e-06, "loss": 0.2845, "step": 9523 }, { "epoch": 0.6506797841087655, "grad_norm": 5.130451679229736, "learning_rate": 2.0119312537845104e-06, "loss": 0.3176, "step": 9524 }, { "epoch": 0.6507481041196966, "grad_norm": 4.202709197998047, "learning_rate": 2.0112302772200104e-06, "loss": 0.3421, "step": 9525 }, { "epoch": 0.6508164241306279, "grad_norm": 3.967299699783325, "learning_rate": 2.0105293735566144e-06, "loss": 0.3636, "step": 9526 }, { "epoch": 0.6508847441415591, "grad_norm": 4.295354843139648, "learning_rate": 2.009828542828645e-06, "loss": 0.3139, "step": 9527 }, { "epoch": 0.6509530641524902, "grad_norm": 5.145328044891357, "learning_rate": 2.0091277850704196e-06, "loss": 0.3378, "step": 9528 }, { "epoch": 0.6510213841634215, "grad_norm": 4.6488518714904785, "learning_rate": 2.0084271003162505e-06, "loss": 0.2811, "step": 9529 }, { "epoch": 0.6510897041743526, "grad_norm": 3.64275860786438, "learning_rate": 2.00772648860045e-06, "loss": 0.1935, "step": 9530 }, { "epoch": 0.6511580241852839, "grad_norm": 4.1188812255859375, "learning_rate": 2.007025949957324e-06, "loss": 0.22, "step": 9531 }, { "epoch": 0.6512263441962151, "grad_norm": 5.687528610229492, "learning_rate": 2.006325484421179e-06, "loss": 0.2711, "step": 9532 }, { "epoch": 0.6512946642071463, "grad_norm": 3.0810647010803223, "learning_rate": 2.005625092026312e-06, "loss": 0.2608, "step": 9533 }, { "epoch": 0.6513629842180775, "grad_norm": 7.280255317687988, "learning_rate": 2.0049247728070205e-06, "loss": 0.2462, "step": 9534 }, { "epoch": 0.6514313042290086, "grad_norm": 3.276460647583008, "learning_rate": 2.0042245267975968e-06, "loss": 0.2434, "step": 9535 }, { "epoch": 0.6514996242399399, "grad_norm": 5.5719757080078125, "learning_rate": 2.003524354032332e-06, "loss": 0.3462, "step": 9536 }, { "epoch": 0.6515679442508711, "grad_norm": 4.667758941650391, "learning_rate": 2.0028242545455072e-06, "loss": 0.3769, "step": 9537 }, { "epoch": 0.6516362642618023, "grad_norm": 4.322475433349609, "learning_rate": 2.002124228371409e-06, "loss": 0.1977, "step": 9538 }, { "epoch": 0.6517045842727335, "grad_norm": 4.506265163421631, "learning_rate": 2.001424275544314e-06, "loss": 0.3092, "step": 9539 }, { "epoch": 0.6517729042836646, "grad_norm": 6.873916149139404, "learning_rate": 2.0007243960984987e-06, "loss": 0.2909, "step": 9540 }, { "epoch": 0.6518412242945959, "grad_norm": 4.876538276672363, "learning_rate": 2.000024590068231e-06, "loss": 0.3986, "step": 9541 }, { "epoch": 0.6519095443055271, "grad_norm": 5.433753490447998, "learning_rate": 1.9993248574877804e-06, "loss": 0.2803, "step": 9542 }, { "epoch": 0.6519778643164583, "grad_norm": 5.038136005401611, "learning_rate": 1.9986251983914113e-06, "loss": 0.3473, "step": 9543 }, { "epoch": 0.6520461843273895, "grad_norm": 3.892911672592163, "learning_rate": 1.997925612813383e-06, "loss": 0.1679, "step": 9544 }, { "epoch": 0.6521145043383207, "grad_norm": 4.521301746368408, "learning_rate": 1.9972261007879536e-06, "loss": 0.2777, "step": 9545 }, { "epoch": 0.6521828243492519, "grad_norm": 4.143197536468506, "learning_rate": 1.996526662349376e-06, "loss": 0.2926, "step": 9546 }, { "epoch": 0.652251144360183, "grad_norm": 4.445615291595459, "learning_rate": 1.9958272975318992e-06, "loss": 0.3395, "step": 9547 }, { "epoch": 0.6523194643711143, "grad_norm": 4.70892858505249, "learning_rate": 1.9951280063697717e-06, "loss": 0.3288, "step": 9548 }, { "epoch": 0.6523877843820455, "grad_norm": 3.609675645828247, "learning_rate": 1.994428788897232e-06, "loss": 0.2354, "step": 9549 }, { "epoch": 0.6524561043929767, "grad_norm": 2.9213786125183105, "learning_rate": 1.9937296451485212e-06, "loss": 0.2195, "step": 9550 }, { "epoch": 0.6525244244039079, "grad_norm": 4.627295970916748, "learning_rate": 1.993030575157873e-06, "loss": 0.2925, "step": 9551 }, { "epoch": 0.652592744414839, "grad_norm": 4.842071056365967, "learning_rate": 1.9923315789595227e-06, "loss": 0.2533, "step": 9552 }, { "epoch": 0.6526610644257703, "grad_norm": 3.944295883178711, "learning_rate": 1.9916326565876948e-06, "loss": 0.2207, "step": 9553 }, { "epoch": 0.6527293844367015, "grad_norm": 3.7203192710876465, "learning_rate": 1.9909338080766147e-06, "loss": 0.2506, "step": 9554 }, { "epoch": 0.6527977044476327, "grad_norm": 3.9772796630859375, "learning_rate": 1.9902350334605025e-06, "loss": 0.3567, "step": 9555 }, { "epoch": 0.6528660244585639, "grad_norm": 3.5674080848693848, "learning_rate": 1.9895363327735777e-06, "loss": 0.3115, "step": 9556 }, { "epoch": 0.6529343444694952, "grad_norm": 4.531823635101318, "learning_rate": 1.988837706050049e-06, "loss": 0.2715, "step": 9557 }, { "epoch": 0.6530026644804263, "grad_norm": 4.3511433601379395, "learning_rate": 1.9881391533241313e-06, "loss": 0.3204, "step": 9558 }, { "epoch": 0.6530709844913575, "grad_norm": 4.7333197593688965, "learning_rate": 1.987440674630029e-06, "loss": 0.3205, "step": 9559 }, { "epoch": 0.6531393045022887, "grad_norm": 3.6554455757141113, "learning_rate": 1.986742270001946e-06, "loss": 0.2769, "step": 9560 }, { "epoch": 0.6532076245132199, "grad_norm": 4.0042805671691895, "learning_rate": 1.9860439394740784e-06, "loss": 0.2938, "step": 9561 }, { "epoch": 0.6532759445241512, "grad_norm": 3.707897901535034, "learning_rate": 1.985345683080623e-06, "loss": 0.3266, "step": 9562 }, { "epoch": 0.6533442645350823, "grad_norm": 3.402147054672241, "learning_rate": 1.984647500855772e-06, "loss": 0.2681, "step": 9563 }, { "epoch": 0.6534125845460135, "grad_norm": 4.496431827545166, "learning_rate": 1.9839493928337136e-06, "loss": 0.2793, "step": 9564 }, { "epoch": 0.6534809045569447, "grad_norm": 3.492187738418579, "learning_rate": 1.983251359048632e-06, "loss": 0.2778, "step": 9565 }, { "epoch": 0.6535492245678759, "grad_norm": 2.901421308517456, "learning_rate": 1.9825533995347077e-06, "loss": 0.2112, "step": 9566 }, { "epoch": 0.6536175445788072, "grad_norm": 3.5683324337005615, "learning_rate": 1.9818555143261185e-06, "loss": 0.3146, "step": 9567 }, { "epoch": 0.6536858645897383, "grad_norm": 3.3942248821258545, "learning_rate": 1.9811577034570393e-06, "loss": 0.2515, "step": 9568 }, { "epoch": 0.6537541846006696, "grad_norm": 2.6882247924804688, "learning_rate": 1.980459966961637e-06, "loss": 0.246, "step": 9569 }, { "epoch": 0.6538225046116007, "grad_norm": 3.3560447692871094, "learning_rate": 1.9797623048740797e-06, "loss": 0.3041, "step": 9570 }, { "epoch": 0.6538908246225319, "grad_norm": 3.9411656856536865, "learning_rate": 1.9790647172285283e-06, "loss": 0.2949, "step": 9571 }, { "epoch": 0.6539591446334632, "grad_norm": 4.0556640625, "learning_rate": 1.978367204059146e-06, "loss": 0.3182, "step": 9572 }, { "epoch": 0.6540274646443943, "grad_norm": 3.9442365169525146, "learning_rate": 1.9776697654000838e-06, "loss": 0.3108, "step": 9573 }, { "epoch": 0.6540957846553256, "grad_norm": 4.319756031036377, "learning_rate": 1.9769724012854954e-06, "loss": 0.3305, "step": 9574 }, { "epoch": 0.6541641046662567, "grad_norm": 3.970534324645996, "learning_rate": 1.976275111749529e-06, "loss": 0.2512, "step": 9575 }, { "epoch": 0.6542324246771879, "grad_norm": 1.9248368740081787, "learning_rate": 1.9755778968263286e-06, "loss": 0.1774, "step": 9576 }, { "epoch": 0.6543007446881192, "grad_norm": 3.4816293716430664, "learning_rate": 1.9748807565500344e-06, "loss": 0.2992, "step": 9577 }, { "epoch": 0.6543690646990503, "grad_norm": 4.348275661468506, "learning_rate": 1.9741836909547844e-06, "loss": 0.2981, "step": 9578 }, { "epoch": 0.6544373847099816, "grad_norm": 5.190357208251953, "learning_rate": 1.973486700074712e-06, "loss": 0.2592, "step": 9579 }, { "epoch": 0.6545057047209127, "grad_norm": 3.8837249279022217, "learning_rate": 1.972789783943946e-06, "loss": 0.3062, "step": 9580 }, { "epoch": 0.654574024731844, "grad_norm": 3.755438804626465, "learning_rate": 1.9720929425966154e-06, "loss": 0.3432, "step": 9581 }, { "epoch": 0.6546423447427752, "grad_norm": 3.112297773361206, "learning_rate": 1.9713961760668385e-06, "loss": 0.2671, "step": 9582 }, { "epoch": 0.6547106647537063, "grad_norm": 3.7594869136810303, "learning_rate": 1.9706994843887363e-06, "loss": 0.3133, "step": 9583 }, { "epoch": 0.6547789847646376, "grad_norm": 4.953070640563965, "learning_rate": 1.9700028675964223e-06, "loss": 0.359, "step": 9584 }, { "epoch": 0.6548473047755687, "grad_norm": 3.8333935737609863, "learning_rate": 1.9693063257240127e-06, "loss": 0.3127, "step": 9585 }, { "epoch": 0.6549156247865, "grad_norm": 4.975511074066162, "learning_rate": 1.9686098588056106e-06, "loss": 0.3653, "step": 9586 }, { "epoch": 0.6549839447974312, "grad_norm": 4.482421875, "learning_rate": 1.9679134668753215e-06, "loss": 0.3942, "step": 9587 }, { "epoch": 0.6550522648083623, "grad_norm": 4.214100360870361, "learning_rate": 1.9672171499672462e-06, "loss": 0.3242, "step": 9588 }, { "epoch": 0.6551205848192936, "grad_norm": 4.772873878479004, "learning_rate": 1.966520908115482e-06, "loss": 0.2853, "step": 9589 }, { "epoch": 0.6551889048302247, "grad_norm": 3.328023672103882, "learning_rate": 1.9658247413541192e-06, "loss": 0.2491, "step": 9590 }, { "epoch": 0.655257224841156, "grad_norm": 2.5654284954071045, "learning_rate": 1.9651286497172507e-06, "loss": 0.2194, "step": 9591 }, { "epoch": 0.6553255448520872, "grad_norm": 4.033681869506836, "learning_rate": 1.96443263323896e-06, "loss": 0.2387, "step": 9592 }, { "epoch": 0.6553938648630184, "grad_norm": 3.5950748920440674, "learning_rate": 1.9637366919533323e-06, "loss": 0.2912, "step": 9593 }, { "epoch": 0.6554621848739496, "grad_norm": 5.362418174743652, "learning_rate": 1.963040825894442e-06, "loss": 0.3133, "step": 9594 }, { "epoch": 0.6555305048848807, "grad_norm": 3.7728700637817383, "learning_rate": 1.9623450350963663e-06, "loss": 0.354, "step": 9595 }, { "epoch": 0.655598824895812, "grad_norm": 4.288103103637695, "learning_rate": 1.961649319593175e-06, "loss": 0.3878, "step": 9596 }, { "epoch": 0.6556671449067432, "grad_norm": 4.694637775421143, "learning_rate": 1.960953679418936e-06, "loss": 0.3355, "step": 9597 }, { "epoch": 0.6557354649176744, "grad_norm": 4.6297760009765625, "learning_rate": 1.9602581146077127e-06, "loss": 0.3561, "step": 9598 }, { "epoch": 0.6558037849286056, "grad_norm": 3.2395856380462646, "learning_rate": 1.9595626251935653e-06, "loss": 0.2648, "step": 9599 }, { "epoch": 0.6558721049395367, "grad_norm": 5.665063858032227, "learning_rate": 1.9588672112105505e-06, "loss": 0.3576, "step": 9600 }, { "epoch": 0.655940424950468, "grad_norm": 3.7331418991088867, "learning_rate": 1.958171872692721e-06, "loss": 0.3352, "step": 9601 }, { "epoch": 0.6560087449613992, "grad_norm": 3.78629732131958, "learning_rate": 1.9574766096741244e-06, "loss": 0.1875, "step": 9602 }, { "epoch": 0.6560770649723304, "grad_norm": 3.086106538772583, "learning_rate": 1.9567814221888064e-06, "loss": 0.2723, "step": 9603 }, { "epoch": 0.6561453849832616, "grad_norm": 4.754950523376465, "learning_rate": 1.9560863102708073e-06, "loss": 0.3974, "step": 9604 }, { "epoch": 0.6562137049941928, "grad_norm": 2.482602834701538, "learning_rate": 1.955391273954169e-06, "loss": 0.2362, "step": 9605 }, { "epoch": 0.656282025005124, "grad_norm": 2.891187906265259, "learning_rate": 1.9546963132729214e-06, "loss": 0.2589, "step": 9606 }, { "epoch": 0.6563503450160552, "grad_norm": 4.180107116699219, "learning_rate": 1.9540014282610966e-06, "loss": 0.3102, "step": 9607 }, { "epoch": 0.6564186650269864, "grad_norm": 3.4912378787994385, "learning_rate": 1.9533066189527203e-06, "loss": 0.3164, "step": 9608 }, { "epoch": 0.6564869850379176, "grad_norm": 4.029937267303467, "learning_rate": 1.9526118853818187e-06, "loss": 0.2582, "step": 9609 }, { "epoch": 0.6565553050488488, "grad_norm": 2.7122137546539307, "learning_rate": 1.951917227582405e-06, "loss": 0.272, "step": 9610 }, { "epoch": 0.65662362505978, "grad_norm": 4.484364032745361, "learning_rate": 1.9512226455884994e-06, "loss": 0.2646, "step": 9611 }, { "epoch": 0.6566919450707112, "grad_norm": 3.621079683303833, "learning_rate": 1.950528139434113e-06, "loss": 0.3179, "step": 9612 }, { "epoch": 0.6567602650816424, "grad_norm": 4.247912883758545, "learning_rate": 1.949833709153255e-06, "loss": 0.3106, "step": 9613 }, { "epoch": 0.6568285850925736, "grad_norm": 4.133389472961426, "learning_rate": 1.949139354779927e-06, "loss": 0.3161, "step": 9614 }, { "epoch": 0.6568969051035048, "grad_norm": 3.422365427017212, "learning_rate": 1.9484450763481313e-06, "loss": 0.2311, "step": 9615 }, { "epoch": 0.656965225114436, "grad_norm": 4.583613395690918, "learning_rate": 1.947750873891864e-06, "loss": 0.3683, "step": 9616 }, { "epoch": 0.6570335451253673, "grad_norm": 5.038590908050537, "learning_rate": 1.9470567474451195e-06, "loss": 0.4215, "step": 9617 }, { "epoch": 0.6571018651362984, "grad_norm": 3.7938408851623535, "learning_rate": 1.9463626970418864e-06, "loss": 0.2228, "step": 9618 }, { "epoch": 0.6571701851472296, "grad_norm": 4.2629899978637695, "learning_rate": 1.945668722716151e-06, "loss": 0.3422, "step": 9619 }, { "epoch": 0.6572385051581608, "grad_norm": 4.520116806030273, "learning_rate": 1.9449748245018956e-06, "loss": 0.2619, "step": 9620 }, { "epoch": 0.657306825169092, "grad_norm": 2.9082541465759277, "learning_rate": 1.9442810024330995e-06, "loss": 0.2166, "step": 9621 }, { "epoch": 0.6573751451800233, "grad_norm": 5.197153568267822, "learning_rate": 1.9435872565437344e-06, "loss": 0.2918, "step": 9622 }, { "epoch": 0.6574434651909544, "grad_norm": 3.7194087505340576, "learning_rate": 1.9428935868677716e-06, "loss": 0.2807, "step": 9623 }, { "epoch": 0.6575117852018856, "grad_norm": 3.9682207107543945, "learning_rate": 1.942199993439181e-06, "loss": 0.2538, "step": 9624 }, { "epoch": 0.6575801052128168, "grad_norm": 3.8763647079467773, "learning_rate": 1.941506476291926e-06, "loss": 0.3154, "step": 9625 }, { "epoch": 0.657648425223748, "grad_norm": 4.172832012176514, "learning_rate": 1.9408130354599635e-06, "loss": 0.269, "step": 9626 }, { "epoch": 0.6577167452346793, "grad_norm": 4.941012382507324, "learning_rate": 1.940119670977251e-06, "loss": 0.2884, "step": 9627 }, { "epoch": 0.6577850652456104, "grad_norm": 3.2321906089782715, "learning_rate": 1.9394263828777402e-06, "loss": 0.2941, "step": 9628 }, { "epoch": 0.6578533852565417, "grad_norm": 5.26002836227417, "learning_rate": 1.93873317119538e-06, "loss": 0.2986, "step": 9629 }, { "epoch": 0.6579217052674728, "grad_norm": 4.217459201812744, "learning_rate": 1.938040035964115e-06, "loss": 0.3608, "step": 9630 }, { "epoch": 0.657990025278404, "grad_norm": 3.8681294918060303, "learning_rate": 1.9373469772178862e-06, "loss": 0.2411, "step": 9631 }, { "epoch": 0.6580583452893353, "grad_norm": 3.6743783950805664, "learning_rate": 1.936653994990631e-06, "loss": 0.2342, "step": 9632 }, { "epoch": 0.6581266653002664, "grad_norm": 3.957280158996582, "learning_rate": 1.935961089316284e-06, "loss": 0.3613, "step": 9633 }, { "epoch": 0.6581949853111977, "grad_norm": 4.12285041809082, "learning_rate": 1.9352682602287726e-06, "loss": 0.2437, "step": 9634 }, { "epoch": 0.6582633053221288, "grad_norm": 4.625939846038818, "learning_rate": 1.934575507762024e-06, "loss": 0.2857, "step": 9635 }, { "epoch": 0.65833162533306, "grad_norm": 4.350297451019287, "learning_rate": 1.9338828319499605e-06, "loss": 0.3183, "step": 9636 }, { "epoch": 0.6583999453439913, "grad_norm": 4.8545660972595215, "learning_rate": 1.9331902328265e-06, "loss": 0.2789, "step": 9637 }, { "epoch": 0.6584682653549224, "grad_norm": 3.6439950466156006, "learning_rate": 1.9324977104255583e-06, "loss": 0.2792, "step": 9638 }, { "epoch": 0.6585365853658537, "grad_norm": 4.395077228546143, "learning_rate": 1.931805264781046e-06, "loss": 0.3539, "step": 9639 }, { "epoch": 0.6586049053767848, "grad_norm": 4.002048015594482, "learning_rate": 1.9311128959268695e-06, "loss": 0.3562, "step": 9640 }, { "epoch": 0.6586732253877161, "grad_norm": 3.5691041946411133, "learning_rate": 1.930420603896933e-06, "loss": 0.3972, "step": 9641 }, { "epoch": 0.6587415453986473, "grad_norm": 4.733166694641113, "learning_rate": 1.9297283887251377e-06, "loss": 0.3196, "step": 9642 }, { "epoch": 0.6588098654095784, "grad_norm": 2.824054718017578, "learning_rate": 1.929036250445375e-06, "loss": 0.1811, "step": 9643 }, { "epoch": 0.6588781854205097, "grad_norm": 6.108663558959961, "learning_rate": 1.9283441890915417e-06, "loss": 0.3347, "step": 9644 }, { "epoch": 0.6589465054314408, "grad_norm": 3.5696043968200684, "learning_rate": 1.927652204697525e-06, "loss": 0.2107, "step": 9645 }, { "epoch": 0.6590148254423721, "grad_norm": 3.4733705520629883, "learning_rate": 1.9269602972972097e-06, "loss": 0.3223, "step": 9646 }, { "epoch": 0.6590831454533033, "grad_norm": 3.8666889667510986, "learning_rate": 1.9262684669244745e-06, "loss": 0.3237, "step": 9647 }, { "epoch": 0.6591514654642344, "grad_norm": 3.5523500442504883, "learning_rate": 1.9255767136131984e-06, "loss": 0.2558, "step": 9648 }, { "epoch": 0.6592197854751657, "grad_norm": 4.269216537475586, "learning_rate": 1.924885037397255e-06, "loss": 0.3841, "step": 9649 }, { "epoch": 0.6592881054860968, "grad_norm": 3.623885154724121, "learning_rate": 1.924193438310513e-06, "loss": 0.2562, "step": 9650 }, { "epoch": 0.6593564254970281, "grad_norm": 3.5994997024536133, "learning_rate": 1.923501916386838e-06, "loss": 0.2323, "step": 9651 }, { "epoch": 0.6594247455079593, "grad_norm": 4.184093952178955, "learning_rate": 1.9228104716600924e-06, "loss": 0.2211, "step": 9652 }, { "epoch": 0.6594930655188905, "grad_norm": 2.7344672679901123, "learning_rate": 1.9221191041641343e-06, "loss": 0.2262, "step": 9653 }, { "epoch": 0.6595613855298217, "grad_norm": 4.358567237854004, "learning_rate": 1.9214278139328195e-06, "loss": 0.3998, "step": 9654 }, { "epoch": 0.6596297055407528, "grad_norm": 4.383909225463867, "learning_rate": 1.920736600999997e-06, "loss": 0.4421, "step": 9655 }, { "epoch": 0.6596980255516841, "grad_norm": 3.2091238498687744, "learning_rate": 1.920045465399513e-06, "loss": 0.2503, "step": 9656 }, { "epoch": 0.6597663455626153, "grad_norm": 4.455700397491455, "learning_rate": 1.9193544071652105e-06, "loss": 0.2679, "step": 9657 }, { "epoch": 0.6598346655735465, "grad_norm": 3.2862279415130615, "learning_rate": 1.9186634263309333e-06, "loss": 0.3093, "step": 9658 }, { "epoch": 0.6599029855844777, "grad_norm": 4.21366548538208, "learning_rate": 1.917972522930511e-06, "loss": 0.3333, "step": 9659 }, { "epoch": 0.6599713055954088, "grad_norm": 4.208853244781494, "learning_rate": 1.917281696997779e-06, "loss": 0.3644, "step": 9660 }, { "epoch": 0.6600396256063401, "grad_norm": 4.111928939819336, "learning_rate": 1.9165909485665636e-06, "loss": 0.2911, "step": 9661 }, { "epoch": 0.6601079456172713, "grad_norm": 4.6265363693237305, "learning_rate": 1.9159002776706894e-06, "loss": 0.3714, "step": 9662 }, { "epoch": 0.6601762656282025, "grad_norm": 2.833000659942627, "learning_rate": 1.9152096843439766e-06, "loss": 0.1837, "step": 9663 }, { "epoch": 0.6602445856391337, "grad_norm": 3.5475149154663086, "learning_rate": 1.9145191686202417e-06, "loss": 0.4108, "step": 9664 }, { "epoch": 0.660312905650065, "grad_norm": 4.115403175354004, "learning_rate": 1.9138287305332975e-06, "loss": 0.1966, "step": 9665 }, { "epoch": 0.6603812256609961, "grad_norm": 7.289072513580322, "learning_rate": 1.913138370116955e-06, "loss": 0.2403, "step": 9666 }, { "epoch": 0.6604495456719273, "grad_norm": 3.046950340270996, "learning_rate": 1.912448087405015e-06, "loss": 0.2211, "step": 9667 }, { "epoch": 0.6605178656828585, "grad_norm": 3.793255090713501, "learning_rate": 1.9117578824312816e-06, "loss": 0.3028, "step": 9668 }, { "epoch": 0.6605861856937897, "grad_norm": 3.7901530265808105, "learning_rate": 1.9110677552295517e-06, "loss": 0.2177, "step": 9669 }, { "epoch": 0.660654505704721, "grad_norm": 3.558276414871216, "learning_rate": 1.910377705833619e-06, "loss": 0.3038, "step": 9670 }, { "epoch": 0.6607228257156521, "grad_norm": 4.528357028961182, "learning_rate": 1.9096877342772736e-06, "loss": 0.3637, "step": 9671 }, { "epoch": 0.6607911457265833, "grad_norm": 4.652063369750977, "learning_rate": 1.9089978405943017e-06, "loss": 0.2824, "step": 9672 }, { "epoch": 0.6608594657375145, "grad_norm": 3.0697808265686035, "learning_rate": 1.908308024818485e-06, "loss": 0.2685, "step": 9673 }, { "epoch": 0.6609277857484457, "grad_norm": 3.7881758213043213, "learning_rate": 1.9076182869836037e-06, "loss": 0.2402, "step": 9674 }, { "epoch": 0.660996105759377, "grad_norm": 3.2677969932556152, "learning_rate": 1.9069286271234293e-06, "loss": 0.2529, "step": 9675 }, { "epoch": 0.6610644257703081, "grad_norm": 4.791398525238037, "learning_rate": 1.9062390452717334e-06, "loss": 0.3086, "step": 9676 }, { "epoch": 0.6611327457812394, "grad_norm": 3.6785855293273926, "learning_rate": 1.905549541462285e-06, "loss": 0.2861, "step": 9677 }, { "epoch": 0.6612010657921705, "grad_norm": 3.3543009757995605, "learning_rate": 1.9048601157288475e-06, "loss": 0.2287, "step": 9678 }, { "epoch": 0.6612693858031017, "grad_norm": 2.8584516048431396, "learning_rate": 1.904170768105178e-06, "loss": 0.2037, "step": 9679 }, { "epoch": 0.661337705814033, "grad_norm": 5.478919982910156, "learning_rate": 1.9034814986250327e-06, "loss": 0.2827, "step": 9680 }, { "epoch": 0.6614060258249641, "grad_norm": 2.9644439220428467, "learning_rate": 1.9027923073221636e-06, "loss": 0.2073, "step": 9681 }, { "epoch": 0.6614743458358954, "grad_norm": 3.322983503341675, "learning_rate": 1.9021031942303183e-06, "loss": 0.2247, "step": 9682 }, { "epoch": 0.6615426658468265, "grad_norm": 6.344610214233398, "learning_rate": 1.9014141593832415e-06, "loss": 0.4349, "step": 9683 }, { "epoch": 0.6616109858577577, "grad_norm": 2.749033212661743, "learning_rate": 1.9007252028146723e-06, "loss": 0.2224, "step": 9684 }, { "epoch": 0.661679305868689, "grad_norm": 3.9856059551239014, "learning_rate": 1.9000363245583483e-06, "loss": 0.3934, "step": 9685 }, { "epoch": 0.6617476258796201, "grad_norm": 5.6909098625183105, "learning_rate": 1.899347524648003e-06, "loss": 0.3993, "step": 9686 }, { "epoch": 0.6618159458905514, "grad_norm": 4.555417060852051, "learning_rate": 1.8986588031173618e-06, "loss": 0.3264, "step": 9687 }, { "epoch": 0.6618842659014825, "grad_norm": 4.348962783813477, "learning_rate": 1.8979701600001517e-06, "loss": 0.3494, "step": 9688 }, { "epoch": 0.6619525859124138, "grad_norm": 3.6041791439056396, "learning_rate": 1.8972815953300928e-06, "loss": 0.2254, "step": 9689 }, { "epoch": 0.662020905923345, "grad_norm": 3.3992111682891846, "learning_rate": 1.8965931091409034e-06, "loss": 0.3071, "step": 9690 }, { "epoch": 0.6620892259342761, "grad_norm": 4.628875255584717, "learning_rate": 1.8959047014662962e-06, "loss": 0.3207, "step": 9691 }, { "epoch": 0.6621575459452074, "grad_norm": 4.301292419433594, "learning_rate": 1.8952163723399806e-06, "loss": 0.2982, "step": 9692 }, { "epoch": 0.6622258659561385, "grad_norm": 5.526942253112793, "learning_rate": 1.894528121795663e-06, "loss": 0.4051, "step": 9693 }, { "epoch": 0.6622941859670698, "grad_norm": 3.4262895584106445, "learning_rate": 1.8938399498670455e-06, "loss": 0.3322, "step": 9694 }, { "epoch": 0.662362505978001, "grad_norm": 5.47754430770874, "learning_rate": 1.8931518565878241e-06, "loss": 0.3591, "step": 9695 }, { "epoch": 0.6624308259889321, "grad_norm": 3.985365629196167, "learning_rate": 1.8924638419916925e-06, "loss": 0.3376, "step": 9696 }, { "epoch": 0.6624991459998634, "grad_norm": 3.2566535472869873, "learning_rate": 1.891775906112344e-06, "loss": 0.2618, "step": 9697 }, { "epoch": 0.6625674660107945, "grad_norm": 4.43767786026001, "learning_rate": 1.8910880489834648e-06, "loss": 0.2007, "step": 9698 }, { "epoch": 0.6626357860217258, "grad_norm": 4.008073329925537, "learning_rate": 1.8904002706387345e-06, "loss": 0.2526, "step": 9699 }, { "epoch": 0.662704106032657, "grad_norm": 4.26936149597168, "learning_rate": 1.889712571111834e-06, "loss": 0.2569, "step": 9700 }, { "epoch": 0.6627724260435882, "grad_norm": 4.448143482208252, "learning_rate": 1.8890249504364368e-06, "loss": 0.2623, "step": 9701 }, { "epoch": 0.6628407460545194, "grad_norm": 5.956096649169922, "learning_rate": 1.8883374086462147e-06, "loss": 0.3611, "step": 9702 }, { "epoch": 0.6629090660654505, "grad_norm": 3.8276422023773193, "learning_rate": 1.8876499457748348e-06, "loss": 0.3702, "step": 9703 }, { "epoch": 0.6629773860763818, "grad_norm": 3.7920286655426025, "learning_rate": 1.8869625618559602e-06, "loss": 0.2717, "step": 9704 }, { "epoch": 0.663045706087313, "grad_norm": 5.389698505401611, "learning_rate": 1.8862752569232505e-06, "loss": 0.3713, "step": 9705 }, { "epoch": 0.6631140260982442, "grad_norm": 4.246241092681885, "learning_rate": 1.885588031010361e-06, "loss": 0.2432, "step": 9706 }, { "epoch": 0.6631823461091754, "grad_norm": 3.2569985389709473, "learning_rate": 1.8849008841509444e-06, "loss": 0.2882, "step": 9707 }, { "epoch": 0.6632506661201065, "grad_norm": 2.918210029602051, "learning_rate": 1.884213816378646e-06, "loss": 0.1664, "step": 9708 }, { "epoch": 0.6633189861310378, "grad_norm": 3.438316822052002, "learning_rate": 1.8835268277271111e-06, "loss": 0.2804, "step": 9709 }, { "epoch": 0.663387306141969, "grad_norm": 2.920311212539673, "learning_rate": 1.8828399182299783e-06, "loss": 0.2935, "step": 9710 }, { "epoch": 0.6634556261529002, "grad_norm": 3.1706197261810303, "learning_rate": 1.8821530879208876e-06, "loss": 0.2285, "step": 9711 }, { "epoch": 0.6635239461638314, "grad_norm": 3.5098159313201904, "learning_rate": 1.881466336833468e-06, "loss": 0.3381, "step": 9712 }, { "epoch": 0.6635922661747626, "grad_norm": 4.313921928405762, "learning_rate": 1.8807796650013481e-06, "loss": 0.2813, "step": 9713 }, { "epoch": 0.6636605861856938, "grad_norm": 4.705272674560547, "learning_rate": 1.880093072458153e-06, "loss": 0.3359, "step": 9714 }, { "epoch": 0.663728906196625, "grad_norm": 3.6376562118530273, "learning_rate": 1.8794065592375033e-06, "loss": 0.1563, "step": 9715 }, { "epoch": 0.6637972262075562, "grad_norm": 3.017910957336426, "learning_rate": 1.8787201253730155e-06, "loss": 0.2218, "step": 9716 }, { "epoch": 0.6638655462184874, "grad_norm": 4.140507698059082, "learning_rate": 1.8780337708983032e-06, "loss": 0.4052, "step": 9717 }, { "epoch": 0.6639338662294186, "grad_norm": 2.4773008823394775, "learning_rate": 1.8773474958469741e-06, "loss": 0.1727, "step": 9718 }, { "epoch": 0.6640021862403498, "grad_norm": 3.5427446365356445, "learning_rate": 1.8766613002526358e-06, "loss": 0.2197, "step": 9719 }, { "epoch": 0.664070506251281, "grad_norm": 3.8559629917144775, "learning_rate": 1.875975184148886e-06, "loss": 0.3573, "step": 9720 }, { "epoch": 0.6641388262622122, "grad_norm": 3.635143280029297, "learning_rate": 1.8752891475693234e-06, "loss": 0.2587, "step": 9721 }, { "epoch": 0.6642071462731434, "grad_norm": 4.231071472167969, "learning_rate": 1.874603190547542e-06, "loss": 0.3259, "step": 9722 }, { "epoch": 0.6642754662840746, "grad_norm": 5.965099334716797, "learning_rate": 1.8739173131171307e-06, "loss": 0.3117, "step": 9723 }, { "epoch": 0.6643437862950058, "grad_norm": 3.7299160957336426, "learning_rate": 1.8732315153116753e-06, "loss": 0.2392, "step": 9724 }, { "epoch": 0.6644121063059371, "grad_norm": 4.621878147125244, "learning_rate": 1.8725457971647577e-06, "loss": 0.2452, "step": 9725 }, { "epoch": 0.6644804263168682, "grad_norm": 3.378291130065918, "learning_rate": 1.8718601587099554e-06, "loss": 0.1765, "step": 9726 }, { "epoch": 0.6645487463277994, "grad_norm": 3.9378721714019775, "learning_rate": 1.8711745999808437e-06, "loss": 0.2957, "step": 9727 }, { "epoch": 0.6646170663387306, "grad_norm": 4.2352423667907715, "learning_rate": 1.8704891210109896e-06, "loss": 0.3247, "step": 9728 }, { "epoch": 0.6646853863496618, "grad_norm": 3.652830123901367, "learning_rate": 1.86980372183396e-06, "loss": 0.2862, "step": 9729 }, { "epoch": 0.664753706360593, "grad_norm": 4.745821475982666, "learning_rate": 1.8691184024833192e-06, "loss": 0.3546, "step": 9730 }, { "epoch": 0.6648220263715242, "grad_norm": 3.5261569023132324, "learning_rate": 1.8684331629926254e-06, "loss": 0.1711, "step": 9731 }, { "epoch": 0.6648903463824554, "grad_norm": 6.721527099609375, "learning_rate": 1.8677480033954309e-06, "loss": 0.4383, "step": 9732 }, { "epoch": 0.6649586663933866, "grad_norm": 4.7813239097595215, "learning_rate": 1.8670629237252863e-06, "loss": 0.2374, "step": 9733 }, { "epoch": 0.6650269864043178, "grad_norm": 7.128182888031006, "learning_rate": 1.8663779240157394e-06, "loss": 0.2663, "step": 9734 }, { "epoch": 0.665095306415249, "grad_norm": 4.54365873336792, "learning_rate": 1.8656930043003319e-06, "loss": 0.3232, "step": 9735 }, { "epoch": 0.6651636264261802, "grad_norm": 3.6668519973754883, "learning_rate": 1.8650081646126031e-06, "loss": 0.2479, "step": 9736 }, { "epoch": 0.6652319464371115, "grad_norm": 4.735178470611572, "learning_rate": 1.8643234049860879e-06, "loss": 0.2542, "step": 9737 }, { "epoch": 0.6653002664480426, "grad_norm": 4.487210273742676, "learning_rate": 1.8636387254543165e-06, "loss": 0.292, "step": 9738 }, { "epoch": 0.6653685864589738, "grad_norm": 4.555368423461914, "learning_rate": 1.8629541260508178e-06, "loss": 0.2992, "step": 9739 }, { "epoch": 0.665436906469905, "grad_norm": 4.73776388168335, "learning_rate": 1.8622696068091113e-06, "loss": 0.2739, "step": 9740 }, { "epoch": 0.6655052264808362, "grad_norm": 4.260216236114502, "learning_rate": 1.861585167762718e-06, "loss": 0.2867, "step": 9741 }, { "epoch": 0.6655735464917675, "grad_norm": 3.4842278957366943, "learning_rate": 1.8609008089451537e-06, "loss": 0.2602, "step": 9742 }, { "epoch": 0.6656418665026986, "grad_norm": 5.717491149902344, "learning_rate": 1.8602165303899285e-06, "loss": 0.3188, "step": 9743 }, { "epoch": 0.6657101865136298, "grad_norm": 5.003137588500977, "learning_rate": 1.8595323321305501e-06, "loss": 0.3033, "step": 9744 }, { "epoch": 0.665778506524561, "grad_norm": 4.617451190948486, "learning_rate": 1.858848214200522e-06, "loss": 0.272, "step": 9745 }, { "epoch": 0.6658468265354922, "grad_norm": 4.739840030670166, "learning_rate": 1.8581641766333438e-06, "loss": 0.3561, "step": 9746 }, { "epoch": 0.6659151465464235, "grad_norm": 3.4173645973205566, "learning_rate": 1.857480219462512e-06, "loss": 0.2233, "step": 9747 }, { "epoch": 0.6659834665573546, "grad_norm": 4.2023234367370605, "learning_rate": 1.8567963427215156e-06, "loss": 0.3492, "step": 9748 }, { "epoch": 0.6660517865682859, "grad_norm": 4.217238426208496, "learning_rate": 1.8561125464438424e-06, "loss": 0.3987, "step": 9749 }, { "epoch": 0.666120106579217, "grad_norm": 3.51536226272583, "learning_rate": 1.8554288306629789e-06, "loss": 0.2235, "step": 9750 }, { "epoch": 0.6661884265901482, "grad_norm": 3.631894826889038, "learning_rate": 1.8547451954124044e-06, "loss": 0.2821, "step": 9751 }, { "epoch": 0.6662567466010795, "grad_norm": 3.5020864009857178, "learning_rate": 1.8540616407255919e-06, "loss": 0.3348, "step": 9752 }, { "epoch": 0.6663250666120106, "grad_norm": 3.538668155670166, "learning_rate": 1.8533781666360153e-06, "loss": 0.2107, "step": 9753 }, { "epoch": 0.6663933866229419, "grad_norm": 3.81061053276062, "learning_rate": 1.8526947731771415e-06, "loss": 0.2302, "step": 9754 }, { "epoch": 0.666461706633873, "grad_norm": 5.246432781219482, "learning_rate": 1.8520114603824355e-06, "loss": 0.2488, "step": 9755 }, { "epoch": 0.6665300266448042, "grad_norm": 3.2731924057006836, "learning_rate": 1.8513282282853568e-06, "loss": 0.1918, "step": 9756 }, { "epoch": 0.6665983466557355, "grad_norm": 2.671623706817627, "learning_rate": 1.8506450769193619e-06, "loss": 0.1529, "step": 9757 }, { "epoch": 0.6666666666666666, "grad_norm": 4.426097869873047, "learning_rate": 1.8499620063179026e-06, "loss": 0.2544, "step": 9758 }, { "epoch": 0.6667349866775979, "grad_norm": 5.664732933044434, "learning_rate": 1.8492790165144278e-06, "loss": 0.3198, "step": 9759 }, { "epoch": 0.666803306688529, "grad_norm": 3.8963727951049805, "learning_rate": 1.8485961075423797e-06, "loss": 0.2794, "step": 9760 }, { "epoch": 0.6668716266994603, "grad_norm": 4.573329925537109, "learning_rate": 1.8479132794352001e-06, "loss": 0.3087, "step": 9761 }, { "epoch": 0.6669399467103915, "grad_norm": 4.049487590789795, "learning_rate": 1.8472305322263237e-06, "loss": 0.3653, "step": 9762 }, { "epoch": 0.6670082667213226, "grad_norm": 5.090599536895752, "learning_rate": 1.8465478659491865e-06, "loss": 0.3248, "step": 9763 }, { "epoch": 0.6670765867322539, "grad_norm": 3.154315710067749, "learning_rate": 1.8458652806372128e-06, "loss": 0.2507, "step": 9764 }, { "epoch": 0.667144906743185, "grad_norm": 4.657652854919434, "learning_rate": 1.845182776323829e-06, "loss": 0.3469, "step": 9765 }, { "epoch": 0.6672132267541163, "grad_norm": 6.080886363983154, "learning_rate": 1.844500353042455e-06, "loss": 0.3501, "step": 9766 }, { "epoch": 0.6672815467650475, "grad_norm": 4.281428337097168, "learning_rate": 1.8438180108265086e-06, "loss": 0.3661, "step": 9767 }, { "epoch": 0.6673498667759787, "grad_norm": 3.405754566192627, "learning_rate": 1.8431357497093987e-06, "loss": 0.238, "step": 9768 }, { "epoch": 0.6674181867869099, "grad_norm": 4.124093532562256, "learning_rate": 1.8424535697245375e-06, "loss": 0.3876, "step": 9769 }, { "epoch": 0.667486506797841, "grad_norm": 4.158698081970215, "learning_rate": 1.8417714709053282e-06, "loss": 0.2985, "step": 9770 }, { "epoch": 0.6675548268087723, "grad_norm": 3.752364158630371, "learning_rate": 1.8410894532851725e-06, "loss": 0.3005, "step": 9771 }, { "epoch": 0.6676231468197035, "grad_norm": 4.220123767852783, "learning_rate": 1.8404075168974652e-06, "loss": 0.2864, "step": 9772 }, { "epoch": 0.6676914668306347, "grad_norm": 3.9731314182281494, "learning_rate": 1.839725661775599e-06, "loss": 0.3097, "step": 9773 }, { "epoch": 0.6677597868415659, "grad_norm": 6.079761981964111, "learning_rate": 1.8390438879529637e-06, "loss": 0.4229, "step": 9774 }, { "epoch": 0.667828106852497, "grad_norm": 3.8360695838928223, "learning_rate": 1.8383621954629433e-06, "loss": 0.2323, "step": 9775 }, { "epoch": 0.6678964268634283, "grad_norm": 3.448361873626709, "learning_rate": 1.8376805843389183e-06, "loss": 0.2322, "step": 9776 }, { "epoch": 0.6679647468743595, "grad_norm": 4.4545063972473145, "learning_rate": 1.836999054614266e-06, "loss": 0.2394, "step": 9777 }, { "epoch": 0.6680330668852907, "grad_norm": 3.6485471725463867, "learning_rate": 1.8363176063223585e-06, "loss": 0.2891, "step": 9778 }, { "epoch": 0.6681013868962219, "grad_norm": 3.2516889572143555, "learning_rate": 1.8356362394965651e-06, "loss": 0.2386, "step": 9779 }, { "epoch": 0.6681697069071532, "grad_norm": 3.714949131011963, "learning_rate": 1.8349549541702514e-06, "loss": 0.3119, "step": 9780 }, { "epoch": 0.6682380269180843, "grad_norm": 3.70743465423584, "learning_rate": 1.8342737503767755e-06, "loss": 0.3491, "step": 9781 }, { "epoch": 0.6683063469290155, "grad_norm": 3.8722469806671143, "learning_rate": 1.8335926281494946e-06, "loss": 0.2606, "step": 9782 }, { "epoch": 0.6683746669399467, "grad_norm": 2.686230182647705, "learning_rate": 1.8329115875217632e-06, "loss": 0.2755, "step": 9783 }, { "epoch": 0.6684429869508779, "grad_norm": 4.625156402587891, "learning_rate": 1.8322306285269313e-06, "loss": 0.2655, "step": 9784 }, { "epoch": 0.6685113069618092, "grad_norm": 3.990935802459717, "learning_rate": 1.8315497511983399e-06, "loss": 0.3496, "step": 9785 }, { "epoch": 0.6685796269727403, "grad_norm": 3.5821144580841064, "learning_rate": 1.8308689555693318e-06, "loss": 0.339, "step": 9786 }, { "epoch": 0.6686479469836715, "grad_norm": 3.116595506668091, "learning_rate": 1.8301882416732433e-06, "loss": 0.228, "step": 9787 }, { "epoch": 0.6687162669946027, "grad_norm": 3.5918917655944824, "learning_rate": 1.8295076095434076e-06, "loss": 0.281, "step": 9788 }, { "epoch": 0.6687845870055339, "grad_norm": 4.702404022216797, "learning_rate": 1.8288270592131528e-06, "loss": 0.3141, "step": 9789 }, { "epoch": 0.6688529070164652, "grad_norm": 4.7129716873168945, "learning_rate": 1.8281465907158044e-06, "loss": 0.2465, "step": 9790 }, { "epoch": 0.6689212270273963, "grad_norm": 4.668518543243408, "learning_rate": 1.8274662040846822e-06, "loss": 0.2518, "step": 9791 }, { "epoch": 0.6689895470383276, "grad_norm": 4.0340728759765625, "learning_rate": 1.8267858993531054e-06, "loss": 0.3005, "step": 9792 }, { "epoch": 0.6690578670492587, "grad_norm": 3.379364490509033, "learning_rate": 1.8261056765543833e-06, "loss": 0.3045, "step": 9793 }, { "epoch": 0.6691261870601899, "grad_norm": 3.8564798831939697, "learning_rate": 1.8254255357218261e-06, "loss": 0.296, "step": 9794 }, { "epoch": 0.6691945070711212, "grad_norm": 4.2152485847473145, "learning_rate": 1.8247454768887386e-06, "loss": 0.3169, "step": 9795 }, { "epoch": 0.6692628270820523, "grad_norm": 4.81980037689209, "learning_rate": 1.8240655000884214e-06, "loss": 0.3414, "step": 9796 }, { "epoch": 0.6693311470929836, "grad_norm": 4.249809265136719, "learning_rate": 1.823385605354171e-06, "loss": 0.3996, "step": 9797 }, { "epoch": 0.6693994671039147, "grad_norm": 4.760293006896973, "learning_rate": 1.82270579271928e-06, "loss": 0.3238, "step": 9798 }, { "epoch": 0.6694677871148459, "grad_norm": 3.9742586612701416, "learning_rate": 1.8220260622170373e-06, "loss": 0.1976, "step": 9799 }, { "epoch": 0.6695361071257772, "grad_norm": 5.9262518882751465, "learning_rate": 1.8213464138807291e-06, "loss": 0.2965, "step": 9800 }, { "epoch": 0.6696044271367083, "grad_norm": 3.760971784591675, "learning_rate": 1.8206668477436313e-06, "loss": 0.3239, "step": 9801 }, { "epoch": 0.6696727471476396, "grad_norm": 3.7565863132476807, "learning_rate": 1.819987363839025e-06, "loss": 0.2454, "step": 9802 }, { "epoch": 0.6697410671585707, "grad_norm": 3.9699437618255615, "learning_rate": 1.8193079622001807e-06, "loss": 0.2667, "step": 9803 }, { "epoch": 0.669809387169502, "grad_norm": 4.547824859619141, "learning_rate": 1.8186286428603697e-06, "loss": 0.3702, "step": 9804 }, { "epoch": 0.6698777071804332, "grad_norm": 4.031076431274414, "learning_rate": 1.817949405852852e-06, "loss": 0.258, "step": 9805 }, { "epoch": 0.6699460271913643, "grad_norm": 4.150110721588135, "learning_rate": 1.81727025121089e-06, "loss": 0.3184, "step": 9806 }, { "epoch": 0.6700143472022956, "grad_norm": 4.486241817474365, "learning_rate": 1.8165911789677407e-06, "loss": 0.306, "step": 9807 }, { "epoch": 0.6700826672132267, "grad_norm": 4.491544723510742, "learning_rate": 1.815912189156656e-06, "loss": 0.328, "step": 9808 }, { "epoch": 0.670150987224158, "grad_norm": 4.660648345947266, "learning_rate": 1.8152332818108836e-06, "loss": 0.2662, "step": 9809 }, { "epoch": 0.6702193072350892, "grad_norm": 4.606199741363525, "learning_rate": 1.814554456963669e-06, "loss": 0.333, "step": 9810 }, { "epoch": 0.6702876272460203, "grad_norm": 4.521092414855957, "learning_rate": 1.813875714648252e-06, "loss": 0.3453, "step": 9811 }, { "epoch": 0.6703559472569516, "grad_norm": 4.665366172790527, "learning_rate": 1.81319705489787e-06, "loss": 0.2532, "step": 9812 }, { "epoch": 0.6704242672678827, "grad_norm": 3.9725759029388428, "learning_rate": 1.8125184777457524e-06, "loss": 0.2783, "step": 9813 }, { "epoch": 0.670492587278814, "grad_norm": 3.2127933502197266, "learning_rate": 1.811839983225129e-06, "loss": 0.2524, "step": 9814 }, { "epoch": 0.6705609072897452, "grad_norm": 4.835397243499756, "learning_rate": 1.8111615713692223e-06, "loss": 0.3087, "step": 9815 }, { "epoch": 0.6706292273006764, "grad_norm": 4.536190032958984, "learning_rate": 1.810483242211256e-06, "loss": 0.3054, "step": 9816 }, { "epoch": 0.6706975473116076, "grad_norm": 5.413248538970947, "learning_rate": 1.8098049957844425e-06, "loss": 0.3175, "step": 9817 }, { "epoch": 0.6707658673225387, "grad_norm": 5.2292914390563965, "learning_rate": 1.8091268321219955e-06, "loss": 0.305, "step": 9818 }, { "epoch": 0.67083418733347, "grad_norm": 3.694899320602417, "learning_rate": 1.808448751257122e-06, "loss": 0.1828, "step": 9819 }, { "epoch": 0.6709025073444012, "grad_norm": 4.045827865600586, "learning_rate": 1.8077707532230276e-06, "loss": 0.2826, "step": 9820 }, { "epoch": 0.6709708273553324, "grad_norm": 4.243731498718262, "learning_rate": 1.8070928380529083e-06, "loss": 0.3242, "step": 9821 }, { "epoch": 0.6710391473662636, "grad_norm": 4.411529064178467, "learning_rate": 1.8064150057799633e-06, "loss": 0.2645, "step": 9822 }, { "epoch": 0.6711074673771947, "grad_norm": 3.9215571880340576, "learning_rate": 1.805737256437383e-06, "loss": 0.2999, "step": 9823 }, { "epoch": 0.671175787388126, "grad_norm": 3.0444259643554688, "learning_rate": 1.8050595900583566e-06, "loss": 0.2293, "step": 9824 }, { "epoch": 0.6712441073990572, "grad_norm": 4.417538166046143, "learning_rate": 1.8043820066760646e-06, "loss": 0.2101, "step": 9825 }, { "epoch": 0.6713124274099884, "grad_norm": 3.4727981090545654, "learning_rate": 1.8037045063236884e-06, "loss": 0.2031, "step": 9826 }, { "epoch": 0.6713807474209196, "grad_norm": 3.285517692565918, "learning_rate": 1.803027089034403e-06, "loss": 0.2889, "step": 9827 }, { "epoch": 0.6714490674318508, "grad_norm": 3.817546844482422, "learning_rate": 1.8023497548413794e-06, "loss": 0.2796, "step": 9828 }, { "epoch": 0.671517387442782, "grad_norm": 4.151528835296631, "learning_rate": 1.8016725037777854e-06, "loss": 0.265, "step": 9829 }, { "epoch": 0.6715857074537132, "grad_norm": 3.0199615955352783, "learning_rate": 1.8009953358767838e-06, "loss": 0.2248, "step": 9830 }, { "epoch": 0.6716540274646444, "grad_norm": 3.077319860458374, "learning_rate": 1.8003182511715339e-06, "loss": 0.1984, "step": 9831 }, { "epoch": 0.6717223474755756, "grad_norm": 2.727309226989746, "learning_rate": 1.7996412496951925e-06, "loss": 0.2237, "step": 9832 }, { "epoch": 0.6717906674865068, "grad_norm": 4.569345474243164, "learning_rate": 1.798964331480907e-06, "loss": 0.3015, "step": 9833 }, { "epoch": 0.671858987497438, "grad_norm": 3.0194239616394043, "learning_rate": 1.7982874965618261e-06, "loss": 0.251, "step": 9834 }, { "epoch": 0.6719273075083692, "grad_norm": 5.266173362731934, "learning_rate": 1.7976107449710915e-06, "loss": 0.4248, "step": 9835 }, { "epoch": 0.6719956275193004, "grad_norm": 4.623787879943848, "learning_rate": 1.7969340767418455e-06, "loss": 0.3058, "step": 9836 }, { "epoch": 0.6720639475302316, "grad_norm": 2.6972408294677734, "learning_rate": 1.7962574919072188e-06, "loss": 0.2361, "step": 9837 }, { "epoch": 0.6721322675411628, "grad_norm": 3.302950143814087, "learning_rate": 1.7955809905003441e-06, "loss": 0.1837, "step": 9838 }, { "epoch": 0.672200587552094, "grad_norm": 5.4109392166137695, "learning_rate": 1.7949045725543469e-06, "loss": 0.3601, "step": 9839 }, { "epoch": 0.6722689075630253, "grad_norm": 3.229678153991699, "learning_rate": 1.7942282381023494e-06, "loss": 0.2744, "step": 9840 }, { "epoch": 0.6723372275739564, "grad_norm": 4.397501468658447, "learning_rate": 1.793551987177471e-06, "loss": 0.2783, "step": 9841 }, { "epoch": 0.6724055475848876, "grad_norm": 3.560342311859131, "learning_rate": 1.792875819812825e-06, "loss": 0.2477, "step": 9842 }, { "epoch": 0.6724738675958188, "grad_norm": 3.7260031700134277, "learning_rate": 1.7921997360415225e-06, "loss": 0.2103, "step": 9843 }, { "epoch": 0.67254218760675, "grad_norm": 5.557295799255371, "learning_rate": 1.7915237358966682e-06, "loss": 0.3198, "step": 9844 }, { "epoch": 0.6726105076176813, "grad_norm": 4.191572666168213, "learning_rate": 1.7908478194113662e-06, "loss": 0.1605, "step": 9845 }, { "epoch": 0.6726788276286124, "grad_norm": 3.3761045932769775, "learning_rate": 1.7901719866187115e-06, "loss": 0.257, "step": 9846 }, { "epoch": 0.6727471476395436, "grad_norm": 4.353034973144531, "learning_rate": 1.7894962375517996e-06, "loss": 0.2153, "step": 9847 }, { "epoch": 0.6728154676504748, "grad_norm": 3.952023506164551, "learning_rate": 1.788820572243718e-06, "loss": 0.2124, "step": 9848 }, { "epoch": 0.672883787661406, "grad_norm": 5.50042200088501, "learning_rate": 1.7881449907275569e-06, "loss": 0.2755, "step": 9849 }, { "epoch": 0.6729521076723373, "grad_norm": 5.504140377044678, "learning_rate": 1.7874694930363932e-06, "loss": 0.3818, "step": 9850 }, { "epoch": 0.6730204276832684, "grad_norm": 3.6166486740112305, "learning_rate": 1.7867940792033061e-06, "loss": 0.2283, "step": 9851 }, { "epoch": 0.6730887476941997, "grad_norm": 4.635888576507568, "learning_rate": 1.7861187492613682e-06, "loss": 0.2403, "step": 9852 }, { "epoch": 0.6731570677051308, "grad_norm": 4.176936149597168, "learning_rate": 1.7854435032436503e-06, "loss": 0.3525, "step": 9853 }, { "epoch": 0.673225387716062, "grad_norm": 3.9837772846221924, "learning_rate": 1.784768341183214e-06, "loss": 0.2878, "step": 9854 }, { "epoch": 0.6732937077269933, "grad_norm": 4.17889404296875, "learning_rate": 1.7840932631131232e-06, "loss": 0.2438, "step": 9855 }, { "epoch": 0.6733620277379244, "grad_norm": 3.7555201053619385, "learning_rate": 1.7834182690664337e-06, "loss": 0.2331, "step": 9856 }, { "epoch": 0.6734303477488557, "grad_norm": 4.2487053871154785, "learning_rate": 1.7827433590761995e-06, "loss": 0.3479, "step": 9857 }, { "epoch": 0.6734986677597868, "grad_norm": 3.185063123703003, "learning_rate": 1.7820685331754668e-06, "loss": 0.2447, "step": 9858 }, { "epoch": 0.673566987770718, "grad_norm": 3.893627405166626, "learning_rate": 1.7813937913972811e-06, "loss": 0.3063, "step": 9859 }, { "epoch": 0.6736353077816493, "grad_norm": 3.5163280963897705, "learning_rate": 1.780719133774683e-06, "loss": 0.2788, "step": 9860 }, { "epoch": 0.6737036277925804, "grad_norm": 2.9168429374694824, "learning_rate": 1.7800445603407077e-06, "loss": 0.3324, "step": 9861 }, { "epoch": 0.6737719478035117, "grad_norm": 5.622344493865967, "learning_rate": 1.7793700711283884e-06, "loss": 0.2861, "step": 9862 }, { "epoch": 0.6738402678144428, "grad_norm": 3.0857112407684326, "learning_rate": 1.7786956661707531e-06, "loss": 0.2536, "step": 9863 }, { "epoch": 0.6739085878253741, "grad_norm": 3.6286799907684326, "learning_rate": 1.7780213455008245e-06, "loss": 0.3307, "step": 9864 }, { "epoch": 0.6739769078363053, "grad_norm": 3.5294840335845947, "learning_rate": 1.7773471091516242e-06, "loss": 0.274, "step": 9865 }, { "epoch": 0.6740452278472364, "grad_norm": 3.8249847888946533, "learning_rate": 1.7766729571561657e-06, "loss": 0.176, "step": 9866 }, { "epoch": 0.6741135478581677, "grad_norm": 4.328030586242676, "learning_rate": 1.775998889547461e-06, "loss": 0.2917, "step": 9867 }, { "epoch": 0.6741818678690988, "grad_norm": 4.173280239105225, "learning_rate": 1.7753249063585163e-06, "loss": 0.2775, "step": 9868 }, { "epoch": 0.6742501878800301, "grad_norm": 4.415442943572998, "learning_rate": 1.7746510076223392e-06, "loss": 0.318, "step": 9869 }, { "epoch": 0.6743185078909613, "grad_norm": 4.0413408279418945, "learning_rate": 1.7739771933719242e-06, "loss": 0.2427, "step": 9870 }, { "epoch": 0.6743868279018924, "grad_norm": 3.6671409606933594, "learning_rate": 1.7733034636402675e-06, "loss": 0.2701, "step": 9871 }, { "epoch": 0.6744551479128237, "grad_norm": 3.547600507736206, "learning_rate": 1.7726298184603598e-06, "loss": 0.265, "step": 9872 }, { "epoch": 0.6745234679237548, "grad_norm": 3.9936015605926514, "learning_rate": 1.7719562578651897e-06, "loss": 0.2678, "step": 9873 }, { "epoch": 0.6745917879346861, "grad_norm": 4.2142205238342285, "learning_rate": 1.7712827818877352e-06, "loss": 0.1867, "step": 9874 }, { "epoch": 0.6746601079456173, "grad_norm": 3.817542552947998, "learning_rate": 1.770609390560978e-06, "loss": 0.3073, "step": 9875 }, { "epoch": 0.6747284279565485, "grad_norm": 4.010817527770996, "learning_rate": 1.7699360839178922e-06, "loss": 0.3643, "step": 9876 }, { "epoch": 0.6747967479674797, "grad_norm": 3.7180161476135254, "learning_rate": 1.7692628619914489e-06, "loss": 0.2156, "step": 9877 }, { "epoch": 0.6748650679784108, "grad_norm": 3.654707908630371, "learning_rate": 1.7685897248146107e-06, "loss": 0.3358, "step": 9878 }, { "epoch": 0.6749333879893421, "grad_norm": 5.338021755218506, "learning_rate": 1.7679166724203407e-06, "loss": 0.2686, "step": 9879 }, { "epoch": 0.6750017080002733, "grad_norm": 4.721706390380859, "learning_rate": 1.7672437048415972e-06, "loss": 0.2959, "step": 9880 }, { "epoch": 0.6750700280112045, "grad_norm": 4.035952568054199, "learning_rate": 1.7665708221113334e-06, "loss": 0.2823, "step": 9881 }, { "epoch": 0.6751383480221357, "grad_norm": 3.8560726642608643, "learning_rate": 1.765898024262498e-06, "loss": 0.248, "step": 9882 }, { "epoch": 0.6752066680330668, "grad_norm": 4.783965587615967, "learning_rate": 1.7652253113280367e-06, "loss": 0.2892, "step": 9883 }, { "epoch": 0.6752749880439981, "grad_norm": 4.556495189666748, "learning_rate": 1.7645526833408905e-06, "loss": 0.2309, "step": 9884 }, { "epoch": 0.6753433080549293, "grad_norm": 3.950554370880127, "learning_rate": 1.7638801403339969e-06, "loss": 0.2157, "step": 9885 }, { "epoch": 0.6754116280658605, "grad_norm": 3.5454154014587402, "learning_rate": 1.7632076823402864e-06, "loss": 0.25, "step": 9886 }, { "epoch": 0.6754799480767917, "grad_norm": 6.503325462341309, "learning_rate": 1.7625353093926895e-06, "loss": 0.2395, "step": 9887 }, { "epoch": 0.675548268087723, "grad_norm": 3.9237775802612305, "learning_rate": 1.7618630215241277e-06, "loss": 0.2938, "step": 9888 }, { "epoch": 0.6756165880986541, "grad_norm": 4.849203109741211, "learning_rate": 1.7611908187675261e-06, "loss": 0.3693, "step": 9889 }, { "epoch": 0.6756849081095853, "grad_norm": 4.279439926147461, "learning_rate": 1.7605187011557966e-06, "loss": 0.2539, "step": 9890 }, { "epoch": 0.6757532281205165, "grad_norm": 4.199918746948242, "learning_rate": 1.759846668721852e-06, "loss": 0.3144, "step": 9891 }, { "epoch": 0.6758215481314477, "grad_norm": 3.7643582820892334, "learning_rate": 1.7591747214986004e-06, "loss": 0.336, "step": 9892 }, { "epoch": 0.675889868142379, "grad_norm": 3.9749016761779785, "learning_rate": 1.7585028595189448e-06, "loss": 0.4018, "step": 9893 }, { "epoch": 0.6759581881533101, "grad_norm": 4.4997477531433105, "learning_rate": 1.757831082815785e-06, "loss": 0.2747, "step": 9894 }, { "epoch": 0.6760265081642413, "grad_norm": 6.558140754699707, "learning_rate": 1.7571593914220158e-06, "loss": 0.3866, "step": 9895 }, { "epoch": 0.6760948281751725, "grad_norm": 4.4057159423828125, "learning_rate": 1.7564877853705279e-06, "loss": 0.2556, "step": 9896 }, { "epoch": 0.6761631481861037, "grad_norm": 3.7167844772338867, "learning_rate": 1.7558162646942101e-06, "loss": 0.2044, "step": 9897 }, { "epoch": 0.676231468197035, "grad_norm": 5.892935276031494, "learning_rate": 1.755144829425942e-06, "loss": 0.2, "step": 9898 }, { "epoch": 0.6762997882079661, "grad_norm": 3.664238214492798, "learning_rate": 1.7544734795986028e-06, "loss": 0.2794, "step": 9899 }, { "epoch": 0.6763681082188974, "grad_norm": 3.4754793643951416, "learning_rate": 1.7538022152450673e-06, "loss": 0.216, "step": 9900 }, { "epoch": 0.6764364282298285, "grad_norm": 4.393466949462891, "learning_rate": 1.7531310363982058e-06, "loss": 0.3474, "step": 9901 }, { "epoch": 0.6765047482407597, "grad_norm": 3.6946825981140137, "learning_rate": 1.7524599430908835e-06, "loss": 0.2622, "step": 9902 }, { "epoch": 0.676573068251691, "grad_norm": 5.6115851402282715, "learning_rate": 1.7517889353559625e-06, "loss": 0.38, "step": 9903 }, { "epoch": 0.6766413882626221, "grad_norm": 3.4607033729553223, "learning_rate": 1.7511180132263002e-06, "loss": 0.2724, "step": 9904 }, { "epoch": 0.6767097082735534, "grad_norm": 4.773101329803467, "learning_rate": 1.7504471767347507e-06, "loss": 0.2676, "step": 9905 }, { "epoch": 0.6767780282844845, "grad_norm": 3.7588558197021484, "learning_rate": 1.749776425914161e-06, "loss": 0.2052, "step": 9906 }, { "epoch": 0.6768463482954157, "grad_norm": 4.210696697235107, "learning_rate": 1.749105760797376e-06, "loss": 0.1952, "step": 9907 }, { "epoch": 0.676914668306347, "grad_norm": 4.062680721282959, "learning_rate": 1.7484351814172388e-06, "loss": 0.3921, "step": 9908 }, { "epoch": 0.6769829883172781, "grad_norm": 3.6928250789642334, "learning_rate": 1.7477646878065845e-06, "loss": 0.2524, "step": 9909 }, { "epoch": 0.6770513083282094, "grad_norm": 4.910633087158203, "learning_rate": 1.747094279998247e-06, "loss": 0.2932, "step": 9910 }, { "epoch": 0.6771196283391405, "grad_norm": 2.975203514099121, "learning_rate": 1.7464239580250516e-06, "loss": 0.2175, "step": 9911 }, { "epoch": 0.6771879483500718, "grad_norm": 5.49910306930542, "learning_rate": 1.7457537219198234e-06, "loss": 0.2781, "step": 9912 }, { "epoch": 0.6772562683610029, "grad_norm": 4.708981037139893, "learning_rate": 1.7450835717153823e-06, "loss": 0.3615, "step": 9913 }, { "epoch": 0.6773245883719341, "grad_norm": 3.9717843532562256, "learning_rate": 1.7444135074445436e-06, "loss": 0.3839, "step": 9914 }, { "epoch": 0.6773929083828654, "grad_norm": 3.4980947971343994, "learning_rate": 1.7437435291401187e-06, "loss": 0.2115, "step": 9915 }, { "epoch": 0.6774612283937965, "grad_norm": 4.572143077850342, "learning_rate": 1.7430736368349144e-06, "loss": 0.2299, "step": 9916 }, { "epoch": 0.6775295484047278, "grad_norm": 4.75024938583374, "learning_rate": 1.742403830561734e-06, "loss": 0.3338, "step": 9917 }, { "epoch": 0.6775978684156589, "grad_norm": 4.151112079620361, "learning_rate": 1.7417341103533773e-06, "loss": 0.2957, "step": 9918 }, { "epoch": 0.6776661884265901, "grad_norm": 4.8426289558410645, "learning_rate": 1.7410644762426358e-06, "loss": 0.3074, "step": 9919 }, { "epoch": 0.6777345084375214, "grad_norm": 4.877649307250977, "learning_rate": 1.7403949282623012e-06, "loss": 0.3536, "step": 9920 }, { "epoch": 0.6778028284484525, "grad_norm": 3.4450225830078125, "learning_rate": 1.7397254664451583e-06, "loss": 0.1969, "step": 9921 }, { "epoch": 0.6778711484593838, "grad_norm": 3.0763626098632812, "learning_rate": 1.7390560908239928e-06, "loss": 0.2219, "step": 9922 }, { "epoch": 0.6779394684703149, "grad_norm": 3.647251605987549, "learning_rate": 1.738386801431578e-06, "loss": 0.143, "step": 9923 }, { "epoch": 0.6780077884812462, "grad_norm": 4.520187854766846, "learning_rate": 1.7377175983006889e-06, "loss": 0.3002, "step": 9924 }, { "epoch": 0.6780761084921774, "grad_norm": 5.197363376617432, "learning_rate": 1.7370484814640943e-06, "loss": 0.324, "step": 9925 }, { "epoch": 0.6781444285031085, "grad_norm": 3.844741106033325, "learning_rate": 1.7363794509545606e-06, "loss": 0.2605, "step": 9926 }, { "epoch": 0.6782127485140398, "grad_norm": 3.6359446048736572, "learning_rate": 1.7357105068048447e-06, "loss": 0.2675, "step": 9927 }, { "epoch": 0.6782810685249709, "grad_norm": 6.825600624084473, "learning_rate": 1.7350416490477063e-06, "loss": 0.3872, "step": 9928 }, { "epoch": 0.6783493885359022, "grad_norm": 4.923151969909668, "learning_rate": 1.734372877715897e-06, "loss": 0.3196, "step": 9929 }, { "epoch": 0.6784177085468334, "grad_norm": 4.762673854827881, "learning_rate": 1.7337041928421655e-06, "loss": 0.3111, "step": 9930 }, { "epoch": 0.6784860285577645, "grad_norm": 5.671697616577148, "learning_rate": 1.7330355944592531e-06, "loss": 0.2302, "step": 9931 }, { "epoch": 0.6785543485686958, "grad_norm": 3.8672921657562256, "learning_rate": 1.732367082599901e-06, "loss": 0.2766, "step": 9932 }, { "epoch": 0.6786226685796269, "grad_norm": 4.323828220367432, "learning_rate": 1.7316986572968442e-06, "loss": 0.3546, "step": 9933 }, { "epoch": 0.6786909885905582, "grad_norm": 2.753899335861206, "learning_rate": 1.7310303185828134e-06, "loss": 0.2365, "step": 9934 }, { "epoch": 0.6787593086014894, "grad_norm": 3.673583507537842, "learning_rate": 1.7303620664905354e-06, "loss": 0.1363, "step": 9935 }, { "epoch": 0.6788276286124206, "grad_norm": 4.2981133460998535, "learning_rate": 1.7296939010527332e-06, "loss": 0.2423, "step": 9936 }, { "epoch": 0.6788959486233518, "grad_norm": 4.366027355194092, "learning_rate": 1.7290258223021247e-06, "loss": 0.2479, "step": 9937 }, { "epoch": 0.6789642686342829, "grad_norm": 3.5698626041412354, "learning_rate": 1.7283578302714255e-06, "loss": 0.2407, "step": 9938 }, { "epoch": 0.6790325886452142, "grad_norm": 3.8892881870269775, "learning_rate": 1.727689924993343e-06, "loss": 0.2668, "step": 9939 }, { "epoch": 0.6791009086561454, "grad_norm": 4.659445285797119, "learning_rate": 1.7270221065005817e-06, "loss": 0.2848, "step": 9940 }, { "epoch": 0.6791692286670766, "grad_norm": 3.9321959018707275, "learning_rate": 1.7263543748258467e-06, "loss": 0.2165, "step": 9941 }, { "epoch": 0.6792375486780078, "grad_norm": 4.258413791656494, "learning_rate": 1.7256867300018344e-06, "loss": 0.2538, "step": 9942 }, { "epoch": 0.6793058686889389, "grad_norm": 4.352418422698975, "learning_rate": 1.7250191720612349e-06, "loss": 0.3223, "step": 9943 }, { "epoch": 0.6793741886998702, "grad_norm": 3.1247246265411377, "learning_rate": 1.7243517010367388e-06, "loss": 0.2027, "step": 9944 }, { "epoch": 0.6794425087108014, "grad_norm": 3.631326675415039, "learning_rate": 1.7236843169610298e-06, "loss": 0.3295, "step": 9945 }, { "epoch": 0.6795108287217326, "grad_norm": 5.145957946777344, "learning_rate": 1.7230170198667877e-06, "loss": 0.2909, "step": 9946 }, { "epoch": 0.6795791487326638, "grad_norm": 3.746764898300171, "learning_rate": 1.722349809786689e-06, "loss": 0.2358, "step": 9947 }, { "epoch": 0.679647468743595, "grad_norm": 3.236377000808716, "learning_rate": 1.7216826867534045e-06, "loss": 0.2391, "step": 9948 }, { "epoch": 0.6797157887545262, "grad_norm": 4.116025447845459, "learning_rate": 1.7210156507996016e-06, "loss": 0.3324, "step": 9949 }, { "epoch": 0.6797841087654574, "grad_norm": 3.980895519256592, "learning_rate": 1.7203487019579452e-06, "loss": 0.2928, "step": 9950 }, { "epoch": 0.6798524287763886, "grad_norm": 4.438775062561035, "learning_rate": 1.7196818402610907e-06, "loss": 0.3118, "step": 9951 }, { "epoch": 0.6799207487873198, "grad_norm": 2.889441728591919, "learning_rate": 1.719015065741694e-06, "loss": 0.2541, "step": 9952 }, { "epoch": 0.679989068798251, "grad_norm": 5.317218780517578, "learning_rate": 1.718348378432405e-06, "loss": 0.4056, "step": 9953 }, { "epoch": 0.6800573888091822, "grad_norm": 3.3638367652893066, "learning_rate": 1.7176817783658709e-06, "loss": 0.277, "step": 9954 }, { "epoch": 0.6801257088201134, "grad_norm": 2.824909210205078, "learning_rate": 1.7170152655747316e-06, "loss": 0.269, "step": 9955 }, { "epoch": 0.6801940288310446, "grad_norm": 4.286909580230713, "learning_rate": 1.7163488400916254e-06, "loss": 0.3471, "step": 9956 }, { "epoch": 0.6802623488419758, "grad_norm": 3.8422014713287354, "learning_rate": 1.7156825019491858e-06, "loss": 0.3438, "step": 9957 }, { "epoch": 0.680330668852907, "grad_norm": 4.591217517852783, "learning_rate": 1.715016251180042e-06, "loss": 0.3001, "step": 9958 }, { "epoch": 0.6803989888638382, "grad_norm": 4.108627796173096, "learning_rate": 1.7143500878168163e-06, "loss": 0.2213, "step": 9959 }, { "epoch": 0.6804673088747695, "grad_norm": 4.285722255706787, "learning_rate": 1.713684011892129e-06, "loss": 0.3198, "step": 9960 }, { "epoch": 0.6805356288857006, "grad_norm": 5.133144855499268, "learning_rate": 1.7130180234385985e-06, "loss": 0.351, "step": 9961 }, { "epoch": 0.6806039488966318, "grad_norm": 3.4843673706054688, "learning_rate": 1.7123521224888366e-06, "loss": 0.2313, "step": 9962 }, { "epoch": 0.680672268907563, "grad_norm": 2.7653756141662598, "learning_rate": 1.7116863090754482e-06, "loss": 0.2039, "step": 9963 }, { "epoch": 0.6807405889184942, "grad_norm": 3.4273202419281006, "learning_rate": 1.711020583231038e-06, "loss": 0.2661, "step": 9964 }, { "epoch": 0.6808089089294255, "grad_norm": 3.903564214706421, "learning_rate": 1.7103549449882037e-06, "loss": 0.18, "step": 9965 }, { "epoch": 0.6808772289403566, "grad_norm": 4.986099720001221, "learning_rate": 1.709689394379541e-06, "loss": 0.3016, "step": 9966 }, { "epoch": 0.6809455489512878, "grad_norm": 5.308919429779053, "learning_rate": 1.70902393143764e-06, "loss": 0.3117, "step": 9967 }, { "epoch": 0.681013868962219, "grad_norm": 3.9025824069976807, "learning_rate": 1.7083585561950861e-06, "loss": 0.3024, "step": 9968 }, { "epoch": 0.6810821889731502, "grad_norm": 4.355384826660156, "learning_rate": 1.7076932686844614e-06, "loss": 0.3331, "step": 9969 }, { "epoch": 0.6811505089840815, "grad_norm": 3.9780373573303223, "learning_rate": 1.7070280689383447e-06, "loss": 0.2909, "step": 9970 }, { "epoch": 0.6812188289950126, "grad_norm": 4.357957363128662, "learning_rate": 1.7063629569893059e-06, "loss": 0.3084, "step": 9971 }, { "epoch": 0.6812871490059439, "grad_norm": 4.050161361694336, "learning_rate": 1.7056979328699153e-06, "loss": 0.2386, "step": 9972 }, { "epoch": 0.681355469016875, "grad_norm": 3.3018641471862793, "learning_rate": 1.7050329966127373e-06, "loss": 0.3122, "step": 9973 }, { "epoch": 0.6814237890278062, "grad_norm": 3.8628714084625244, "learning_rate": 1.704368148250331e-06, "loss": 0.2665, "step": 9974 }, { "epoch": 0.6814921090387375, "grad_norm": 4.135935306549072, "learning_rate": 1.7037033878152563e-06, "loss": 0.3057, "step": 9975 }, { "epoch": 0.6815604290496686, "grad_norm": 2.7398486137390137, "learning_rate": 1.7030387153400604e-06, "loss": 0.2392, "step": 9976 }, { "epoch": 0.6816287490605999, "grad_norm": 4.183191776275635, "learning_rate": 1.7023741308572924e-06, "loss": 0.2324, "step": 9977 }, { "epoch": 0.681697069071531, "grad_norm": 3.8400840759277344, "learning_rate": 1.7017096343994946e-06, "loss": 0.254, "step": 9978 }, { "epoch": 0.6817653890824622, "grad_norm": 4.175837516784668, "learning_rate": 1.7010452259992074e-06, "loss": 0.3375, "step": 9979 }, { "epoch": 0.6818337090933935, "grad_norm": 4.345763683319092, "learning_rate": 1.7003809056889609e-06, "loss": 0.3324, "step": 9980 }, { "epoch": 0.6819020291043246, "grad_norm": 4.218606948852539, "learning_rate": 1.6997166735012894e-06, "loss": 0.2869, "step": 9981 }, { "epoch": 0.6819703491152559, "grad_norm": 3.848609447479248, "learning_rate": 1.6990525294687165e-06, "loss": 0.3048, "step": 9982 }, { "epoch": 0.682038669126187, "grad_norm": 3.8527047634124756, "learning_rate": 1.698388473623766e-06, "loss": 0.264, "step": 9983 }, { "epoch": 0.6821069891371183, "grad_norm": 3.8026580810546875, "learning_rate": 1.6977245059989514e-06, "loss": 0.252, "step": 9984 }, { "epoch": 0.6821753091480495, "grad_norm": 4.150226593017578, "learning_rate": 1.6970606266267874e-06, "loss": 0.2619, "step": 9985 }, { "epoch": 0.6822436291589806, "grad_norm": 3.164217948913574, "learning_rate": 1.6963968355397817e-06, "loss": 0.2571, "step": 9986 }, { "epoch": 0.6823119491699119, "grad_norm": 4.011612415313721, "learning_rate": 1.6957331327704392e-06, "loss": 0.2647, "step": 9987 }, { "epoch": 0.682380269180843, "grad_norm": 4.770676136016846, "learning_rate": 1.6950695183512591e-06, "loss": 0.3364, "step": 9988 }, { "epoch": 0.6824485891917743, "grad_norm": 3.352151393890381, "learning_rate": 1.6944059923147369e-06, "loss": 0.2502, "step": 9989 }, { "epoch": 0.6825169092027055, "grad_norm": 4.689779281616211, "learning_rate": 1.6937425546933642e-06, "loss": 0.2801, "step": 9990 }, { "epoch": 0.6825852292136366, "grad_norm": 5.0884881019592285, "learning_rate": 1.6930792055196282e-06, "loss": 0.3468, "step": 9991 }, { "epoch": 0.6826535492245679, "grad_norm": 3.552551031112671, "learning_rate": 1.6924159448260095e-06, "loss": 0.2636, "step": 9992 }, { "epoch": 0.682721869235499, "grad_norm": 3.80022931098938, "learning_rate": 1.691752772644986e-06, "loss": 0.2528, "step": 9993 }, { "epoch": 0.6827901892464303, "grad_norm": 4.39415979385376, "learning_rate": 1.6910896890090338e-06, "loss": 0.2782, "step": 9994 }, { "epoch": 0.6828585092573615, "grad_norm": 3.9274559020996094, "learning_rate": 1.690426693950623e-06, "loss": 0.2382, "step": 9995 }, { "epoch": 0.6829268292682927, "grad_norm": 3.4563486576080322, "learning_rate": 1.6897637875022153e-06, "loss": 0.3082, "step": 9996 }, { "epoch": 0.6829951492792239, "grad_norm": 3.7477493286132812, "learning_rate": 1.6891009696962734e-06, "loss": 0.2144, "step": 9997 }, { "epoch": 0.683063469290155, "grad_norm": 3.505826950073242, "learning_rate": 1.6884382405652535e-06, "loss": 0.2446, "step": 9998 }, { "epoch": 0.6831317893010863, "grad_norm": 3.9283034801483154, "learning_rate": 1.6877756001416071e-06, "loss": 0.3405, "step": 9999 }, { "epoch": 0.6832001093120175, "grad_norm": 3.2863311767578125, "learning_rate": 1.6871130484577832e-06, "loss": 0.2157, "step": 10000 }, { "epoch": 0.6832684293229487, "grad_norm": 3.812871217727661, "learning_rate": 1.6864505855462243e-06, "loss": 0.2629, "step": 10001 }, { "epoch": 0.6833367493338799, "grad_norm": 3.7472074031829834, "learning_rate": 1.6857882114393697e-06, "loss": 0.2414, "step": 10002 }, { "epoch": 0.683405069344811, "grad_norm": 3.737774133682251, "learning_rate": 1.6851259261696556e-06, "loss": 0.2943, "step": 10003 }, { "epoch": 0.6834733893557423, "grad_norm": 4.683539390563965, "learning_rate": 1.6844637297695097e-06, "loss": 0.3765, "step": 10004 }, { "epoch": 0.6835417093666735, "grad_norm": 3.721634864807129, "learning_rate": 1.6838016222713585e-06, "loss": 0.2909, "step": 10005 }, { "epoch": 0.6836100293776047, "grad_norm": 4.039627552032471, "learning_rate": 1.6831396037076245e-06, "loss": 0.2706, "step": 10006 }, { "epoch": 0.6836783493885359, "grad_norm": 3.1644649505615234, "learning_rate": 1.6824776741107247e-06, "loss": 0.2726, "step": 10007 }, { "epoch": 0.6837466693994672, "grad_norm": 2.9438445568084717, "learning_rate": 1.6818158335130725e-06, "loss": 0.3083, "step": 10008 }, { "epoch": 0.6838149894103983, "grad_norm": 4.813957214355469, "learning_rate": 1.6811540819470759e-06, "loss": 0.3382, "step": 10009 }, { "epoch": 0.6838833094213295, "grad_norm": 3.4803249835968018, "learning_rate": 1.6804924194451393e-06, "loss": 0.2001, "step": 10010 }, { "epoch": 0.6839516294322607, "grad_norm": 4.156682014465332, "learning_rate": 1.6798308460396637e-06, "loss": 0.2268, "step": 10011 }, { "epoch": 0.6840199494431919, "grad_norm": 4.243354797363281, "learning_rate": 1.6791693617630423e-06, "loss": 0.3464, "step": 10012 }, { "epoch": 0.6840882694541232, "grad_norm": 3.6806795597076416, "learning_rate": 1.6785079666476664e-06, "loss": 0.3363, "step": 10013 }, { "epoch": 0.6841565894650543, "grad_norm": 2.8071465492248535, "learning_rate": 1.6778466607259247e-06, "loss": 0.2088, "step": 10014 }, { "epoch": 0.6842249094759855, "grad_norm": 3.6219167709350586, "learning_rate": 1.6771854440302e-06, "loss": 0.2479, "step": 10015 }, { "epoch": 0.6842932294869167, "grad_norm": 7.133606433868408, "learning_rate": 1.6765243165928674e-06, "loss": 0.2599, "step": 10016 }, { "epoch": 0.6843615494978479, "grad_norm": 4.691498279571533, "learning_rate": 1.6758632784463026e-06, "loss": 0.366, "step": 10017 }, { "epoch": 0.6844298695087792, "grad_norm": 4.462756633758545, "learning_rate": 1.6752023296228743e-06, "loss": 0.3472, "step": 10018 }, { "epoch": 0.6844981895197103, "grad_norm": 3.7945778369903564, "learning_rate": 1.6745414701549476e-06, "loss": 0.2721, "step": 10019 }, { "epoch": 0.6845665095306416, "grad_norm": 3.4935925006866455, "learning_rate": 1.6738807000748828e-06, "loss": 0.2385, "step": 10020 }, { "epoch": 0.6846348295415727, "grad_norm": 3.210296869277954, "learning_rate": 1.6732200194150364e-06, "loss": 0.254, "step": 10021 }, { "epoch": 0.6847031495525039, "grad_norm": 5.751941680908203, "learning_rate": 1.6725594282077597e-06, "loss": 0.3064, "step": 10022 }, { "epoch": 0.6847714695634352, "grad_norm": 4.545648097991943, "learning_rate": 1.6718989264854021e-06, "loss": 0.3741, "step": 10023 }, { "epoch": 0.6848397895743663, "grad_norm": 3.5814945697784424, "learning_rate": 1.6712385142803035e-06, "loss": 0.3011, "step": 10024 }, { "epoch": 0.6849081095852976, "grad_norm": 4.145343780517578, "learning_rate": 1.6705781916248038e-06, "loss": 0.3703, "step": 10025 }, { "epoch": 0.6849764295962287, "grad_norm": 4.0382184982299805, "learning_rate": 1.6699179585512374e-06, "loss": 0.2873, "step": 10026 }, { "epoch": 0.6850447496071599, "grad_norm": 4.536962985992432, "learning_rate": 1.6692578150919346e-06, "loss": 0.3083, "step": 10027 }, { "epoch": 0.6851130696180912, "grad_norm": 3.3771438598632812, "learning_rate": 1.66859776127922e-06, "loss": 0.2933, "step": 10028 }, { "epoch": 0.6851813896290223, "grad_norm": 5.020133018493652, "learning_rate": 1.6679377971454154e-06, "loss": 0.4127, "step": 10029 }, { "epoch": 0.6852497096399536, "grad_norm": 4.817233562469482, "learning_rate": 1.6672779227228366e-06, "loss": 0.3813, "step": 10030 }, { "epoch": 0.6853180296508847, "grad_norm": 3.749927043914795, "learning_rate": 1.6666181380437988e-06, "loss": 0.3025, "step": 10031 }, { "epoch": 0.685386349661816, "grad_norm": 4.289163589477539, "learning_rate": 1.6659584431406044e-06, "loss": 0.4408, "step": 10032 }, { "epoch": 0.6854546696727472, "grad_norm": 4.003791332244873, "learning_rate": 1.6652988380455615e-06, "loss": 0.3218, "step": 10033 }, { "epoch": 0.6855229896836783, "grad_norm": 3.4082114696502686, "learning_rate": 1.6646393227909677e-06, "loss": 0.2241, "step": 10034 }, { "epoch": 0.6855913096946096, "grad_norm": 3.5987915992736816, "learning_rate": 1.6639798974091198e-06, "loss": 0.2628, "step": 10035 }, { "epoch": 0.6856596297055407, "grad_norm": 3.584611415863037, "learning_rate": 1.6633205619323044e-06, "loss": 0.2139, "step": 10036 }, { "epoch": 0.685727949716472, "grad_norm": 3.819215774536133, "learning_rate": 1.662661316392809e-06, "loss": 0.2596, "step": 10037 }, { "epoch": 0.6857962697274032, "grad_norm": 3.0970866680145264, "learning_rate": 1.6620021608229155e-06, "loss": 0.2546, "step": 10038 }, { "epoch": 0.6858645897383343, "grad_norm": 3.2049331665039062, "learning_rate": 1.6613430952549008e-06, "loss": 0.233, "step": 10039 }, { "epoch": 0.6859329097492656, "grad_norm": 3.432278871536255, "learning_rate": 1.6606841197210376e-06, "loss": 0.1507, "step": 10040 }, { "epoch": 0.6860012297601967, "grad_norm": 4.269147872924805, "learning_rate": 1.6600252342535943e-06, "loss": 0.2512, "step": 10041 }, { "epoch": 0.686069549771128, "grad_norm": 4.115718364715576, "learning_rate": 1.6593664388848343e-06, "loss": 0.3257, "step": 10042 }, { "epoch": 0.6861378697820592, "grad_norm": 5.211476802825928, "learning_rate": 1.6587077336470174e-06, "loss": 0.4119, "step": 10043 }, { "epoch": 0.6862061897929904, "grad_norm": 4.261236190795898, "learning_rate": 1.6580491185724002e-06, "loss": 0.2966, "step": 10044 }, { "epoch": 0.6862745098039216, "grad_norm": 4.3451828956604, "learning_rate": 1.6573905936932307e-06, "loss": 0.271, "step": 10045 }, { "epoch": 0.6863428298148527, "grad_norm": 3.9604923725128174, "learning_rate": 1.6567321590417543e-06, "loss": 0.2992, "step": 10046 }, { "epoch": 0.686411149825784, "grad_norm": 4.119492530822754, "learning_rate": 1.6560738146502164e-06, "loss": 0.3403, "step": 10047 }, { "epoch": 0.6864794698367152, "grad_norm": 5.448282718658447, "learning_rate": 1.655415560550854e-06, "loss": 0.3405, "step": 10048 }, { "epoch": 0.6865477898476464, "grad_norm": 3.7712790966033936, "learning_rate": 1.6547573967758973e-06, "loss": 0.2927, "step": 10049 }, { "epoch": 0.6866161098585776, "grad_norm": 4.943894863128662, "learning_rate": 1.6540993233575764e-06, "loss": 0.2829, "step": 10050 }, { "epoch": 0.6866844298695087, "grad_norm": 3.5146703720092773, "learning_rate": 1.653441340328115e-06, "loss": 0.2534, "step": 10051 }, { "epoch": 0.68675274988044, "grad_norm": 3.5013086795806885, "learning_rate": 1.6527834477197333e-06, "loss": 0.237, "step": 10052 }, { "epoch": 0.6868210698913711, "grad_norm": 3.4381964206695557, "learning_rate": 1.6521256455646463e-06, "loss": 0.2832, "step": 10053 }, { "epoch": 0.6868893899023024, "grad_norm": 5.056427478790283, "learning_rate": 1.6514679338950646e-06, "loss": 0.2732, "step": 10054 }, { "epoch": 0.6869577099132336, "grad_norm": 3.737159013748169, "learning_rate": 1.6508103127431952e-06, "loss": 0.2883, "step": 10055 }, { "epoch": 0.6870260299241648, "grad_norm": 5.081972599029541, "learning_rate": 1.6501527821412408e-06, "loss": 0.2323, "step": 10056 }, { "epoch": 0.687094349935096, "grad_norm": 2.3520193099975586, "learning_rate": 1.6494953421213966e-06, "loss": 0.1293, "step": 10057 }, { "epoch": 0.6871626699460271, "grad_norm": 3.0878610610961914, "learning_rate": 1.6488379927158567e-06, "loss": 0.3788, "step": 10058 }, { "epoch": 0.6872309899569584, "grad_norm": 2.863785982131958, "learning_rate": 1.6481807339568104e-06, "loss": 0.1855, "step": 10059 }, { "epoch": 0.6872993099678896, "grad_norm": 4.653815746307373, "learning_rate": 1.6475235658764412e-06, "loss": 0.4064, "step": 10060 }, { "epoch": 0.6873676299788208, "grad_norm": 4.148265361785889, "learning_rate": 1.646866488506929e-06, "loss": 0.3542, "step": 10061 }, { "epoch": 0.687435949989752, "grad_norm": 3.4531514644622803, "learning_rate": 1.6462095018804497e-06, "loss": 0.2181, "step": 10062 }, { "epoch": 0.6875042700006831, "grad_norm": 4.035280227661133, "learning_rate": 1.6455526060291737e-06, "loss": 0.2971, "step": 10063 }, { "epoch": 0.6875725900116144, "grad_norm": 4.70766544342041, "learning_rate": 1.6448958009852686e-06, "loss": 0.3861, "step": 10064 }, { "epoch": 0.6876409100225456, "grad_norm": 3.430788278579712, "learning_rate": 1.6442390867808945e-06, "loss": 0.2328, "step": 10065 }, { "epoch": 0.6877092300334768, "grad_norm": 4.795309066772461, "learning_rate": 1.6435824634482082e-06, "loss": 0.296, "step": 10066 }, { "epoch": 0.687777550044408, "grad_norm": 4.080753803253174, "learning_rate": 1.6429259310193658e-06, "loss": 0.2526, "step": 10067 }, { "epoch": 0.6878458700553393, "grad_norm": 6.091126441955566, "learning_rate": 1.6422694895265158e-06, "loss": 0.4697, "step": 10068 }, { "epoch": 0.6879141900662704, "grad_norm": 3.74410080909729, "learning_rate": 1.6416131390018e-06, "loss": 0.2339, "step": 10069 }, { "epoch": 0.6879825100772016, "grad_norm": 4.61980676651001, "learning_rate": 1.6409568794773588e-06, "loss": 0.1917, "step": 10070 }, { "epoch": 0.6880508300881328, "grad_norm": 3.9487030506134033, "learning_rate": 1.6403007109853283e-06, "loss": 0.2181, "step": 10071 }, { "epoch": 0.688119150099064, "grad_norm": 4.542557239532471, "learning_rate": 1.6396446335578393e-06, "loss": 0.3051, "step": 10072 }, { "epoch": 0.6881874701099953, "grad_norm": 4.447736740112305, "learning_rate": 1.6389886472270179e-06, "loss": 0.3104, "step": 10073 }, { "epoch": 0.6882557901209264, "grad_norm": 3.3038270473480225, "learning_rate": 1.6383327520249856e-06, "loss": 0.2035, "step": 10074 }, { "epoch": 0.6883241101318576, "grad_norm": 4.594448566436768, "learning_rate": 1.6376769479838605e-06, "loss": 0.337, "step": 10075 }, { "epoch": 0.6883924301427888, "grad_norm": 4.28684663772583, "learning_rate": 1.6370212351357567e-06, "loss": 0.2762, "step": 10076 }, { "epoch": 0.68846075015372, "grad_norm": 3.557253360748291, "learning_rate": 1.6363656135127798e-06, "loss": 0.2497, "step": 10077 }, { "epoch": 0.6885290701646513, "grad_norm": 2.816746234893799, "learning_rate": 1.6357100831470357e-06, "loss": 0.1815, "step": 10078 }, { "epoch": 0.6885973901755824, "grad_norm": 3.5308187007904053, "learning_rate": 1.6350546440706223e-06, "loss": 0.1831, "step": 10079 }, { "epoch": 0.6886657101865137, "grad_norm": 3.428129196166992, "learning_rate": 1.6343992963156385e-06, "loss": 0.2614, "step": 10080 }, { "epoch": 0.6887340301974448, "grad_norm": 4.867459774017334, "learning_rate": 1.6337440399141714e-06, "loss": 0.3742, "step": 10081 }, { "epoch": 0.688802350208376, "grad_norm": 4.064301013946533, "learning_rate": 1.6330888748983083e-06, "loss": 0.3297, "step": 10082 }, { "epoch": 0.6888706702193073, "grad_norm": 6.0959930419921875, "learning_rate": 1.6324338013001306e-06, "loss": 0.3101, "step": 10083 }, { "epoch": 0.6889389902302384, "grad_norm": 3.2614548206329346, "learning_rate": 1.6317788191517177e-06, "loss": 0.1841, "step": 10084 }, { "epoch": 0.6890073102411697, "grad_norm": 3.566736936569214, "learning_rate": 1.6311239284851376e-06, "loss": 0.2524, "step": 10085 }, { "epoch": 0.6890756302521008, "grad_norm": 3.4988791942596436, "learning_rate": 1.6304691293324628e-06, "loss": 0.2698, "step": 10086 }, { "epoch": 0.689143950263032, "grad_norm": 2.8560407161712646, "learning_rate": 1.6298144217257554e-06, "loss": 0.2323, "step": 10087 }, { "epoch": 0.6892122702739633, "grad_norm": 3.794621706008911, "learning_rate": 1.6291598056970768e-06, "loss": 0.2233, "step": 10088 }, { "epoch": 0.6892805902848944, "grad_norm": 4.307900428771973, "learning_rate": 1.6285052812784781e-06, "loss": 0.271, "step": 10089 }, { "epoch": 0.6893489102958257, "grad_norm": 3.656050682067871, "learning_rate": 1.627850848502012e-06, "loss": 0.2207, "step": 10090 }, { "epoch": 0.6894172303067568, "grad_norm": 3.330029010772705, "learning_rate": 1.6271965073997237e-06, "loss": 0.2349, "step": 10091 }, { "epoch": 0.6894855503176881, "grad_norm": 3.3476784229278564, "learning_rate": 1.6265422580036545e-06, "loss": 0.2253, "step": 10092 }, { "epoch": 0.6895538703286193, "grad_norm": 4.107776165008545, "learning_rate": 1.6258881003458419e-06, "loss": 0.2889, "step": 10093 }, { "epoch": 0.6896221903395504, "grad_norm": 3.7091033458709717, "learning_rate": 1.6252340344583174e-06, "loss": 0.2625, "step": 10094 }, { "epoch": 0.6896905103504817, "grad_norm": 4.5715436935424805, "learning_rate": 1.6245800603731095e-06, "loss": 0.2345, "step": 10095 }, { "epoch": 0.6897588303614128, "grad_norm": 3.669905424118042, "learning_rate": 1.6239261781222426e-06, "loss": 0.2767, "step": 10096 }, { "epoch": 0.6898271503723441, "grad_norm": 4.003756046295166, "learning_rate": 1.6232723877377332e-06, "loss": 0.3997, "step": 10097 }, { "epoch": 0.6898954703832753, "grad_norm": 3.104935884475708, "learning_rate": 1.622618689251597e-06, "loss": 0.2143, "step": 10098 }, { "epoch": 0.6899637903942064, "grad_norm": 4.7919487953186035, "learning_rate": 1.6219650826958423e-06, "loss": 0.2635, "step": 10099 }, { "epoch": 0.6900321104051377, "grad_norm": 5.225671768188477, "learning_rate": 1.6213115681024784e-06, "loss": 0.4054, "step": 10100 }, { "epoch": 0.6901004304160688, "grad_norm": 3.7948784828186035, "learning_rate": 1.6206581455035023e-06, "loss": 0.3664, "step": 10101 }, { "epoch": 0.6901687504270001, "grad_norm": 2.8238885402679443, "learning_rate": 1.620004814930912e-06, "loss": 0.1843, "step": 10102 }, { "epoch": 0.6902370704379313, "grad_norm": 3.6296756267547607, "learning_rate": 1.6193515764166987e-06, "loss": 0.3547, "step": 10103 }, { "epoch": 0.6903053904488625, "grad_norm": 3.352874517440796, "learning_rate": 1.6186984299928518e-06, "loss": 0.2763, "step": 10104 }, { "epoch": 0.6903737104597937, "grad_norm": 3.6979706287384033, "learning_rate": 1.61804537569135e-06, "loss": 0.3147, "step": 10105 }, { "epoch": 0.6904420304707248, "grad_norm": 3.7492778301239014, "learning_rate": 1.6173924135441751e-06, "loss": 0.1759, "step": 10106 }, { "epoch": 0.6905103504816561, "grad_norm": 6.169955730438232, "learning_rate": 1.6167395435833002e-06, "loss": 0.2395, "step": 10107 }, { "epoch": 0.6905786704925873, "grad_norm": 4.714298248291016, "learning_rate": 1.6160867658406946e-06, "loss": 0.3917, "step": 10108 }, { "epoch": 0.6906469905035185, "grad_norm": 3.867609977722168, "learning_rate": 1.615434080348324e-06, "loss": 0.2146, "step": 10109 }, { "epoch": 0.6907153105144497, "grad_norm": 4.613580703735352, "learning_rate": 1.6147814871381463e-06, "loss": 0.3392, "step": 10110 }, { "epoch": 0.6907836305253808, "grad_norm": 3.8283417224884033, "learning_rate": 1.6141289862421183e-06, "loss": 0.1954, "step": 10111 }, { "epoch": 0.6908519505363121, "grad_norm": 4.48789119720459, "learning_rate": 1.6134765776921915e-06, "loss": 0.2046, "step": 10112 }, { "epoch": 0.6909202705472433, "grad_norm": 5.673102855682373, "learning_rate": 1.6128242615203127e-06, "loss": 0.4147, "step": 10113 }, { "epoch": 0.6909885905581745, "grad_norm": 3.931511640548706, "learning_rate": 1.6121720377584243e-06, "loss": 0.2253, "step": 10114 }, { "epoch": 0.6910569105691057, "grad_norm": 4.886856555938721, "learning_rate": 1.6115199064384632e-06, "loss": 0.2667, "step": 10115 }, { "epoch": 0.6911252305800369, "grad_norm": 3.3086373805999756, "learning_rate": 1.6108678675923629e-06, "loss": 0.2322, "step": 10116 }, { "epoch": 0.6911935505909681, "grad_norm": 4.161778926849365, "learning_rate": 1.6102159212520538e-06, "loss": 0.3316, "step": 10117 }, { "epoch": 0.6912618706018993, "grad_norm": 3.600982427597046, "learning_rate": 1.6095640674494569e-06, "loss": 0.2684, "step": 10118 }, { "epoch": 0.6913301906128305, "grad_norm": 3.1703131198883057, "learning_rate": 1.6089123062164916e-06, "loss": 0.169, "step": 10119 }, { "epoch": 0.6913985106237617, "grad_norm": 4.350849628448486, "learning_rate": 1.608260637585076e-06, "loss": 0.4347, "step": 10120 }, { "epoch": 0.6914668306346929, "grad_norm": 3.955798387527466, "learning_rate": 1.60760906158712e-06, "loss": 0.3183, "step": 10121 }, { "epoch": 0.6915351506456241, "grad_norm": 3.894136428833008, "learning_rate": 1.6069575782545271e-06, "loss": 0.2639, "step": 10122 }, { "epoch": 0.6916034706565553, "grad_norm": 4.413161754608154, "learning_rate": 1.6063061876192006e-06, "loss": 0.3495, "step": 10123 }, { "epoch": 0.6916717906674865, "grad_norm": 3.427406072616577, "learning_rate": 1.6056548897130369e-06, "loss": 0.2751, "step": 10124 }, { "epoch": 0.6917401106784177, "grad_norm": 2.936718702316284, "learning_rate": 1.6050036845679283e-06, "loss": 0.2777, "step": 10125 }, { "epoch": 0.6918084306893489, "grad_norm": 4.89363431930542, "learning_rate": 1.6043525722157627e-06, "loss": 0.4222, "step": 10126 }, { "epoch": 0.6918767507002801, "grad_norm": 3.314653158187866, "learning_rate": 1.6037015526884236e-06, "loss": 0.1972, "step": 10127 }, { "epoch": 0.6919450707112114, "grad_norm": 3.770880937576294, "learning_rate": 1.6030506260177892e-06, "loss": 0.2935, "step": 10128 }, { "epoch": 0.6920133907221425, "grad_norm": 4.185431957244873, "learning_rate": 1.602399792235735e-06, "loss": 0.3769, "step": 10129 }, { "epoch": 0.6920817107330737, "grad_norm": 4.094621658325195, "learning_rate": 1.6017490513741283e-06, "loss": 0.3474, "step": 10130 }, { "epoch": 0.6921500307440049, "grad_norm": 3.032292127609253, "learning_rate": 1.6010984034648354e-06, "loss": 0.2436, "step": 10131 }, { "epoch": 0.6922183507549361, "grad_norm": 3.316025972366333, "learning_rate": 1.6004478485397153e-06, "loss": 0.2539, "step": 10132 }, { "epoch": 0.6922866707658674, "grad_norm": 4.371910572052002, "learning_rate": 1.5997973866306283e-06, "loss": 0.2742, "step": 10133 }, { "epoch": 0.6923549907767985, "grad_norm": 3.971956491470337, "learning_rate": 1.5991470177694213e-06, "loss": 0.3234, "step": 10134 }, { "epoch": 0.6924233107877297, "grad_norm": 7.008084297180176, "learning_rate": 1.5984967419879426e-06, "loss": 0.3914, "step": 10135 }, { "epoch": 0.6924916307986609, "grad_norm": 5.698514938354492, "learning_rate": 1.597846559318035e-06, "loss": 0.4155, "step": 10136 }, { "epoch": 0.6925599508095921, "grad_norm": 3.7346179485321045, "learning_rate": 1.5971964697915368e-06, "loss": 0.2785, "step": 10137 }, { "epoch": 0.6926282708205234, "grad_norm": 3.486138343811035, "learning_rate": 1.596546473440278e-06, "loss": 0.2816, "step": 10138 }, { "epoch": 0.6926965908314545, "grad_norm": 4.685161113739014, "learning_rate": 1.5958965702960902e-06, "loss": 0.3458, "step": 10139 }, { "epoch": 0.6927649108423858, "grad_norm": 4.5817084312438965, "learning_rate": 1.5952467603907969e-06, "loss": 0.4425, "step": 10140 }, { "epoch": 0.6928332308533169, "grad_norm": 3.3526813983917236, "learning_rate": 1.5945970437562182e-06, "loss": 0.2052, "step": 10141 }, { "epoch": 0.6929015508642481, "grad_norm": 3.6765637397766113, "learning_rate": 1.5939474204241672e-06, "loss": 0.3596, "step": 10142 }, { "epoch": 0.6929698708751794, "grad_norm": 4.808382987976074, "learning_rate": 1.5932978904264546e-06, "loss": 0.2119, "step": 10143 }, { "epoch": 0.6930381908861105, "grad_norm": 3.5228452682495117, "learning_rate": 1.592648453794887e-06, "loss": 0.286, "step": 10144 }, { "epoch": 0.6931065108970418, "grad_norm": 3.9625020027160645, "learning_rate": 1.5919991105612653e-06, "loss": 0.3048, "step": 10145 }, { "epoch": 0.6931748309079729, "grad_norm": 4.875919818878174, "learning_rate": 1.5913498607573863e-06, "loss": 0.3863, "step": 10146 }, { "epoch": 0.6932431509189041, "grad_norm": 3.394005537033081, "learning_rate": 1.5907007044150414e-06, "loss": 0.2999, "step": 10147 }, { "epoch": 0.6933114709298354, "grad_norm": 7.512362003326416, "learning_rate": 1.5900516415660186e-06, "loss": 0.3292, "step": 10148 }, { "epoch": 0.6933797909407665, "grad_norm": 4.608662128448486, "learning_rate": 1.5894026722421016e-06, "loss": 0.2679, "step": 10149 }, { "epoch": 0.6934481109516978, "grad_norm": 3.854440689086914, "learning_rate": 1.588753796475067e-06, "loss": 0.3072, "step": 10150 }, { "epoch": 0.6935164309626289, "grad_norm": 3.6741867065429688, "learning_rate": 1.588105014296689e-06, "loss": 0.2166, "step": 10151 }, { "epoch": 0.6935847509735602, "grad_norm": 3.3272805213928223, "learning_rate": 1.5874563257387359e-06, "loss": 0.2483, "step": 10152 }, { "epoch": 0.6936530709844914, "grad_norm": 3.501756191253662, "learning_rate": 1.586807730832976e-06, "loss": 0.2832, "step": 10153 }, { "epoch": 0.6937213909954225, "grad_norm": 3.331256628036499, "learning_rate": 1.5861592296111654e-06, "loss": 0.3285, "step": 10154 }, { "epoch": 0.6937897110063538, "grad_norm": 5.499710559844971, "learning_rate": 1.5855108221050607e-06, "loss": 0.3849, "step": 10155 }, { "epoch": 0.6938580310172849, "grad_norm": 5.149737358093262, "learning_rate": 1.5848625083464128e-06, "loss": 0.2174, "step": 10156 }, { "epoch": 0.6939263510282162, "grad_norm": 4.021364688873291, "learning_rate": 1.584214288366969e-06, "loss": 0.3174, "step": 10157 }, { "epoch": 0.6939946710391474, "grad_norm": 2.269967794418335, "learning_rate": 1.583566162198467e-06, "loss": 0.1821, "step": 10158 }, { "epoch": 0.6940629910500785, "grad_norm": 3.0609772205352783, "learning_rate": 1.5829181298726482e-06, "loss": 0.1604, "step": 10159 }, { "epoch": 0.6941313110610098, "grad_norm": 5.3353095054626465, "learning_rate": 1.5822701914212435e-06, "loss": 0.2697, "step": 10160 }, { "epoch": 0.6941996310719409, "grad_norm": 3.9701426029205322, "learning_rate": 1.5816223468759818e-06, "loss": 0.2826, "step": 10161 }, { "epoch": 0.6942679510828722, "grad_norm": 5.860738754272461, "learning_rate": 1.5809745962685837e-06, "loss": 0.2209, "step": 10162 }, { "epoch": 0.6943362710938034, "grad_norm": 4.034273624420166, "learning_rate": 1.5803269396307695e-06, "loss": 0.3413, "step": 10163 }, { "epoch": 0.6944045911047346, "grad_norm": 3.6648190021514893, "learning_rate": 1.5796793769942528e-06, "loss": 0.2101, "step": 10164 }, { "epoch": 0.6944729111156658, "grad_norm": 3.487520217895508, "learning_rate": 1.5790319083907437e-06, "loss": 0.2601, "step": 10165 }, { "epoch": 0.6945412311265969, "grad_norm": 3.935192108154297, "learning_rate": 1.5783845338519459e-06, "loss": 0.1888, "step": 10166 }, { "epoch": 0.6946095511375282, "grad_norm": 3.1602585315704346, "learning_rate": 1.577737253409561e-06, "loss": 0.2425, "step": 10167 }, { "epoch": 0.6946778711484594, "grad_norm": 4.209742069244385, "learning_rate": 1.5770900670952831e-06, "loss": 0.3043, "step": 10168 }, { "epoch": 0.6947461911593906, "grad_norm": 5.328949928283691, "learning_rate": 1.5764429749408062e-06, "loss": 0.3495, "step": 10169 }, { "epoch": 0.6948145111703218, "grad_norm": 3.6661269664764404, "learning_rate": 1.5757959769778125e-06, "loss": 0.2255, "step": 10170 }, { "epoch": 0.6948828311812529, "grad_norm": 4.179604530334473, "learning_rate": 1.5751490732379847e-06, "loss": 0.3252, "step": 10171 }, { "epoch": 0.6949511511921842, "grad_norm": 4.4910454750061035, "learning_rate": 1.574502263753002e-06, "loss": 0.2041, "step": 10172 }, { "epoch": 0.6950194712031154, "grad_norm": 3.246180295944214, "learning_rate": 1.5738555485545361e-06, "loss": 0.2463, "step": 10173 }, { "epoch": 0.6950877912140466, "grad_norm": 3.929443120956421, "learning_rate": 1.5732089276742563e-06, "loss": 0.2341, "step": 10174 }, { "epoch": 0.6951561112249778, "grad_norm": 3.7659966945648193, "learning_rate": 1.5725624011438229e-06, "loss": 0.2866, "step": 10175 }, { "epoch": 0.695224431235909, "grad_norm": 4.543381214141846, "learning_rate": 1.5719159689948961e-06, "loss": 0.2934, "step": 10176 }, { "epoch": 0.6952927512468402, "grad_norm": 4.717530727386475, "learning_rate": 1.57126963125913e-06, "loss": 0.3765, "step": 10177 }, { "epoch": 0.6953610712577714, "grad_norm": 6.786770820617676, "learning_rate": 1.570623387968174e-06, "loss": 0.218, "step": 10178 }, { "epoch": 0.6954293912687026, "grad_norm": 3.3980791568756104, "learning_rate": 1.5699772391536729e-06, "loss": 0.2535, "step": 10179 }, { "epoch": 0.6954977112796338, "grad_norm": 3.664156198501587, "learning_rate": 1.5693311848472665e-06, "loss": 0.3328, "step": 10180 }, { "epoch": 0.695566031290565, "grad_norm": 5.114970684051514, "learning_rate": 1.5686852250805914e-06, "loss": 0.3669, "step": 10181 }, { "epoch": 0.6956343513014962, "grad_norm": 4.003361225128174, "learning_rate": 1.568039359885279e-06, "loss": 0.3046, "step": 10182 }, { "epoch": 0.6957026713124274, "grad_norm": 3.0422770977020264, "learning_rate": 1.567393589292953e-06, "loss": 0.2061, "step": 10183 }, { "epoch": 0.6957709913233586, "grad_norm": 5.270010471343994, "learning_rate": 1.5667479133352368e-06, "loss": 0.2884, "step": 10184 }, { "epoch": 0.6958393113342898, "grad_norm": 3.917131185531616, "learning_rate": 1.5661023320437458e-06, "loss": 0.1911, "step": 10185 }, { "epoch": 0.695907631345221, "grad_norm": 4.990530014038086, "learning_rate": 1.565456845450096e-06, "loss": 0.2449, "step": 10186 }, { "epoch": 0.6959759513561522, "grad_norm": 3.3340580463409424, "learning_rate": 1.5648114535858927e-06, "loss": 0.3113, "step": 10187 }, { "epoch": 0.6960442713670835, "grad_norm": 4.060601234436035, "learning_rate": 1.5641661564827385e-06, "loss": 0.2481, "step": 10188 }, { "epoch": 0.6961125913780146, "grad_norm": 3.533123254776001, "learning_rate": 1.5635209541722329e-06, "loss": 0.2208, "step": 10189 }, { "epoch": 0.6961809113889458, "grad_norm": 3.443918228149414, "learning_rate": 1.562875846685971e-06, "loss": 0.2236, "step": 10190 }, { "epoch": 0.696249231399877, "grad_norm": 4.057753086090088, "learning_rate": 1.562230834055538e-06, "loss": 0.2748, "step": 10191 }, { "epoch": 0.6963175514108082, "grad_norm": 3.2881481647491455, "learning_rate": 1.561585916312522e-06, "loss": 0.2635, "step": 10192 }, { "epoch": 0.6963858714217395, "grad_norm": 3.5920798778533936, "learning_rate": 1.560941093488502e-06, "loss": 0.3248, "step": 10193 }, { "epoch": 0.6964541914326706, "grad_norm": 3.473555088043213, "learning_rate": 1.5602963656150545e-06, "loss": 0.2133, "step": 10194 }, { "epoch": 0.6965225114436018, "grad_norm": 4.790879726409912, "learning_rate": 1.559651732723748e-06, "loss": 0.2482, "step": 10195 }, { "epoch": 0.696590831454533, "grad_norm": 3.436846971511841, "learning_rate": 1.5590071948461485e-06, "loss": 0.1802, "step": 10196 }, { "epoch": 0.6966591514654642, "grad_norm": 4.307508945465088, "learning_rate": 1.5583627520138185e-06, "loss": 0.2266, "step": 10197 }, { "epoch": 0.6967274714763955, "grad_norm": 4.83402156829834, "learning_rate": 1.5577184042583141e-06, "loss": 0.2303, "step": 10198 }, { "epoch": 0.6967957914873266, "grad_norm": 3.4988999366760254, "learning_rate": 1.5570741516111875e-06, "loss": 0.2635, "step": 10199 }, { "epoch": 0.6968641114982579, "grad_norm": 4.297914981842041, "learning_rate": 1.5564299941039858e-06, "loss": 0.2621, "step": 10200 }, { "epoch": 0.696932431509189, "grad_norm": 4.215307712554932, "learning_rate": 1.5557859317682522e-06, "loss": 0.2754, "step": 10201 }, { "epoch": 0.6970007515201202, "grad_norm": 3.176305055618286, "learning_rate": 1.5551419646355256e-06, "loss": 0.2663, "step": 10202 }, { "epoch": 0.6970690715310515, "grad_norm": 5.293613433837891, "learning_rate": 1.5544980927373364e-06, "loss": 0.3923, "step": 10203 }, { "epoch": 0.6971373915419826, "grad_norm": 2.644390344619751, "learning_rate": 1.5538543161052156e-06, "loss": 0.2458, "step": 10204 }, { "epoch": 0.6972057115529139, "grad_norm": 3.6831138134002686, "learning_rate": 1.553210634770685e-06, "loss": 0.2512, "step": 10205 }, { "epoch": 0.697274031563845, "grad_norm": 4.177581787109375, "learning_rate": 1.5525670487652685e-06, "loss": 0.2879, "step": 10206 }, { "epoch": 0.6973423515747762, "grad_norm": 4.81027889251709, "learning_rate": 1.5519235581204763e-06, "loss": 0.2225, "step": 10207 }, { "epoch": 0.6974106715857075, "grad_norm": 3.9861223697662354, "learning_rate": 1.55128016286782e-06, "loss": 0.2637, "step": 10208 }, { "epoch": 0.6974789915966386, "grad_norm": 2.732527732849121, "learning_rate": 1.5506368630388056e-06, "loss": 0.2096, "step": 10209 }, { "epoch": 0.6975473116075699, "grad_norm": 4.063854694366455, "learning_rate": 1.549993658664933e-06, "loss": 0.3864, "step": 10210 }, { "epoch": 0.697615631618501, "grad_norm": 4.6931304931640625, "learning_rate": 1.5493505497776987e-06, "loss": 0.259, "step": 10211 }, { "epoch": 0.6976839516294323, "grad_norm": 4.153534412384033, "learning_rate": 1.5487075364085931e-06, "loss": 0.2173, "step": 10212 }, { "epoch": 0.6977522716403635, "grad_norm": 3.4328577518463135, "learning_rate": 1.5480646185891038e-06, "loss": 0.264, "step": 10213 }, { "epoch": 0.6978205916512946, "grad_norm": 4.162262439727783, "learning_rate": 1.5474217963507139e-06, "loss": 0.2892, "step": 10214 }, { "epoch": 0.6978889116622259, "grad_norm": 4.665855407714844, "learning_rate": 1.546779069724898e-06, "loss": 0.2461, "step": 10215 }, { "epoch": 0.697957231673157, "grad_norm": 4.241872310638428, "learning_rate": 1.54613643874313e-06, "loss": 0.2716, "step": 10216 }, { "epoch": 0.6980255516840883, "grad_norm": 3.1540157794952393, "learning_rate": 1.5454939034368779e-06, "loss": 0.1958, "step": 10217 }, { "epoch": 0.6980938716950195, "grad_norm": 4.101859092712402, "learning_rate": 1.5448514638376047e-06, "loss": 0.3599, "step": 10218 }, { "epoch": 0.6981621917059506, "grad_norm": 3.393655300140381, "learning_rate": 1.5442091199767693e-06, "loss": 0.2453, "step": 10219 }, { "epoch": 0.6982305117168819, "grad_norm": 2.9405763149261475, "learning_rate": 1.5435668718858257e-06, "loss": 0.2097, "step": 10220 }, { "epoch": 0.698298831727813, "grad_norm": 4.101827621459961, "learning_rate": 1.5429247195962225e-06, "loss": 0.2639, "step": 10221 }, { "epoch": 0.6983671517387443, "grad_norm": 4.4785237312316895, "learning_rate": 1.5422826631394059e-06, "loss": 0.338, "step": 10222 }, { "epoch": 0.6984354717496755, "grad_norm": 3.5913403034210205, "learning_rate": 1.541640702546813e-06, "loss": 0.2287, "step": 10223 }, { "epoch": 0.6985037917606067, "grad_norm": 3.1214780807495117, "learning_rate": 1.5409988378498788e-06, "loss": 0.2633, "step": 10224 }, { "epoch": 0.6985721117715379, "grad_norm": 7.010303974151611, "learning_rate": 1.5403570690800367e-06, "loss": 0.2414, "step": 10225 }, { "epoch": 0.698640431782469, "grad_norm": 3.8706510066986084, "learning_rate": 1.5397153962687123e-06, "loss": 0.3295, "step": 10226 }, { "epoch": 0.6987087517934003, "grad_norm": 4.672990798950195, "learning_rate": 1.5390738194473234e-06, "loss": 0.2999, "step": 10227 }, { "epoch": 0.6987770718043315, "grad_norm": 5.159854412078857, "learning_rate": 1.5384323386472886e-06, "loss": 0.2694, "step": 10228 }, { "epoch": 0.6988453918152627, "grad_norm": 2.802666664123535, "learning_rate": 1.5377909539000187e-06, "loss": 0.2043, "step": 10229 }, { "epoch": 0.6989137118261939, "grad_norm": 4.529232501983643, "learning_rate": 1.537149665236921e-06, "loss": 0.2966, "step": 10230 }, { "epoch": 0.698982031837125, "grad_norm": 4.584545135498047, "learning_rate": 1.5365084726893976e-06, "loss": 0.3333, "step": 10231 }, { "epoch": 0.6990503518480563, "grad_norm": 2.8154823780059814, "learning_rate": 1.535867376288846e-06, "loss": 0.152, "step": 10232 }, { "epoch": 0.6991186718589875, "grad_norm": 4.683619022369385, "learning_rate": 1.5352263760666588e-06, "loss": 0.3498, "step": 10233 }, { "epoch": 0.6991869918699187, "grad_norm": 5.108297348022461, "learning_rate": 1.534585472054226e-06, "loss": 0.2889, "step": 10234 }, { "epoch": 0.6992553118808499, "grad_norm": 5.6835761070251465, "learning_rate": 1.533944664282928e-06, "loss": 0.4268, "step": 10235 }, { "epoch": 0.6993236318917812, "grad_norm": 3.9910755157470703, "learning_rate": 1.5333039527841443e-06, "loss": 0.2891, "step": 10236 }, { "epoch": 0.6993919519027123, "grad_norm": 4.694355010986328, "learning_rate": 1.5326633375892495e-06, "loss": 0.3204, "step": 10237 }, { "epoch": 0.6994602719136435, "grad_norm": 5.096558570861816, "learning_rate": 1.5320228187296125e-06, "loss": 0.4352, "step": 10238 }, { "epoch": 0.6995285919245747, "grad_norm": 4.233419418334961, "learning_rate": 1.5313823962365976e-06, "loss": 0.2495, "step": 10239 }, { "epoch": 0.6995969119355059, "grad_norm": 3.700232982635498, "learning_rate": 1.5307420701415652e-06, "loss": 0.2205, "step": 10240 }, { "epoch": 0.6996652319464371, "grad_norm": 4.763369083404541, "learning_rate": 1.5301018404758702e-06, "loss": 0.3305, "step": 10241 }, { "epoch": 0.6997335519573683, "grad_norm": 4.536303997039795, "learning_rate": 1.5294617072708625e-06, "loss": 0.2854, "step": 10242 }, { "epoch": 0.6998018719682995, "grad_norm": 4.784301280975342, "learning_rate": 1.5288216705578895e-06, "loss": 0.3012, "step": 10243 }, { "epoch": 0.6998701919792307, "grad_norm": 3.5730741024017334, "learning_rate": 1.528181730368288e-06, "loss": 0.3554, "step": 10244 }, { "epoch": 0.6999385119901619, "grad_norm": 5.6249566078186035, "learning_rate": 1.5275418867333983e-06, "loss": 0.2791, "step": 10245 }, { "epoch": 0.7000068320010931, "grad_norm": 4.895020961761475, "learning_rate": 1.5269021396845498e-06, "loss": 0.2552, "step": 10246 }, { "epoch": 0.7000751520120243, "grad_norm": 4.71208381652832, "learning_rate": 1.5262624892530717e-06, "loss": 0.1665, "step": 10247 }, { "epoch": 0.7001434720229556, "grad_norm": 3.129239082336426, "learning_rate": 1.5256229354702824e-06, "loss": 0.3166, "step": 10248 }, { "epoch": 0.7002117920338867, "grad_norm": 5.288112163543701, "learning_rate": 1.5249834783675005e-06, "loss": 0.3107, "step": 10249 }, { "epoch": 0.7002801120448179, "grad_norm": 4.02602481842041, "learning_rate": 1.5243441179760393e-06, "loss": 0.2562, "step": 10250 }, { "epoch": 0.7003484320557491, "grad_norm": 3.6757028102874756, "learning_rate": 1.5237048543272062e-06, "loss": 0.3475, "step": 10251 }, { "epoch": 0.7004167520666803, "grad_norm": 4.255478858947754, "learning_rate": 1.523065687452304e-06, "loss": 0.348, "step": 10252 }, { "epoch": 0.7004850720776116, "grad_norm": 4.843113899230957, "learning_rate": 1.522426617382631e-06, "loss": 0.319, "step": 10253 }, { "epoch": 0.7005533920885427, "grad_norm": 4.105876445770264, "learning_rate": 1.5217876441494806e-06, "loss": 0.2893, "step": 10254 }, { "epoch": 0.7006217120994739, "grad_norm": 2.6637980937957764, "learning_rate": 1.521148767784144e-06, "loss": 0.1903, "step": 10255 }, { "epoch": 0.7006900321104051, "grad_norm": 3.797995090484619, "learning_rate": 1.5205099883179015e-06, "loss": 0.2807, "step": 10256 }, { "epoch": 0.7007583521213363, "grad_norm": 4.345890522003174, "learning_rate": 1.5198713057820327e-06, "loss": 0.2771, "step": 10257 }, { "epoch": 0.7008266721322676, "grad_norm": 3.684969902038574, "learning_rate": 1.5192327202078152e-06, "loss": 0.3677, "step": 10258 }, { "epoch": 0.7008949921431987, "grad_norm": 4.701109409332275, "learning_rate": 1.5185942316265186e-06, "loss": 0.3249, "step": 10259 }, { "epoch": 0.70096331215413, "grad_norm": 3.931185007095337, "learning_rate": 1.5179558400694055e-06, "loss": 0.2548, "step": 10260 }, { "epoch": 0.7010316321650611, "grad_norm": 3.8660731315612793, "learning_rate": 1.517317545567737e-06, "loss": 0.2963, "step": 10261 }, { "epoch": 0.7010999521759923, "grad_norm": 4.669135570526123, "learning_rate": 1.5166793481527694e-06, "loss": 0.3343, "step": 10262 }, { "epoch": 0.7011682721869236, "grad_norm": 4.222990036010742, "learning_rate": 1.5160412478557533e-06, "loss": 0.3191, "step": 10263 }, { "epoch": 0.7012365921978547, "grad_norm": 3.9503111839294434, "learning_rate": 1.5154032447079345e-06, "loss": 0.2862, "step": 10264 }, { "epoch": 0.701304912208786, "grad_norm": 3.305917739868164, "learning_rate": 1.5147653387405547e-06, "loss": 0.3294, "step": 10265 }, { "epoch": 0.7013732322197171, "grad_norm": 3.8383193016052246, "learning_rate": 1.5141275299848503e-06, "loss": 0.2535, "step": 10266 }, { "epoch": 0.7014415522306483, "grad_norm": 4.186342716217041, "learning_rate": 1.5134898184720545e-06, "loss": 0.2449, "step": 10267 }, { "epoch": 0.7015098722415796, "grad_norm": 5.14029598236084, "learning_rate": 1.5128522042333917e-06, "loss": 0.318, "step": 10268 }, { "epoch": 0.7015781922525107, "grad_norm": 4.183638095855713, "learning_rate": 1.5122146873000848e-06, "loss": 0.2896, "step": 10269 }, { "epoch": 0.701646512263442, "grad_norm": 3.5981194972991943, "learning_rate": 1.5115772677033525e-06, "loss": 0.2071, "step": 10270 }, { "epoch": 0.7017148322743731, "grad_norm": 5.1930460929870605, "learning_rate": 1.510939945474407e-06, "loss": 0.4014, "step": 10271 }, { "epoch": 0.7017831522853044, "grad_norm": 3.8523874282836914, "learning_rate": 1.5103027206444564e-06, "loss": 0.2874, "step": 10272 }, { "epoch": 0.7018514722962356, "grad_norm": 5.273778438568115, "learning_rate": 1.5096655932447035e-06, "loss": 0.301, "step": 10273 }, { "epoch": 0.7019197923071667, "grad_norm": 5.997122287750244, "learning_rate": 1.509028563306347e-06, "loss": 0.318, "step": 10274 }, { "epoch": 0.701988112318098, "grad_norm": 7.270479202270508, "learning_rate": 1.508391630860582e-06, "loss": 0.344, "step": 10275 }, { "epoch": 0.7020564323290291, "grad_norm": 2.870389938354492, "learning_rate": 1.507754795938595e-06, "loss": 0.231, "step": 10276 }, { "epoch": 0.7021247523399604, "grad_norm": 2.6893157958984375, "learning_rate": 1.5071180585715694e-06, "loss": 0.153, "step": 10277 }, { "epoch": 0.7021930723508916, "grad_norm": 3.0785629749298096, "learning_rate": 1.5064814187906875e-06, "loss": 0.2903, "step": 10278 }, { "epoch": 0.7022613923618227, "grad_norm": 4.893639087677002, "learning_rate": 1.5058448766271241e-06, "loss": 0.3392, "step": 10279 }, { "epoch": 0.702329712372754, "grad_norm": 4.269574165344238, "learning_rate": 1.505208432112046e-06, "loss": 0.1817, "step": 10280 }, { "epoch": 0.7023980323836851, "grad_norm": 5.355473041534424, "learning_rate": 1.5045720852766196e-06, "loss": 0.4463, "step": 10281 }, { "epoch": 0.7024663523946164, "grad_norm": 3.993178606033325, "learning_rate": 1.5039358361520055e-06, "loss": 0.2009, "step": 10282 }, { "epoch": 0.7025346724055476, "grad_norm": 4.535934925079346, "learning_rate": 1.5032996847693587e-06, "loss": 0.3394, "step": 10283 }, { "epoch": 0.7026029924164788, "grad_norm": 3.544081926345825, "learning_rate": 1.5026636311598297e-06, "loss": 0.3541, "step": 10284 }, { "epoch": 0.70267131242741, "grad_norm": 3.8991079330444336, "learning_rate": 1.5020276753545651e-06, "loss": 0.2096, "step": 10285 }, { "epoch": 0.7027396324383411, "grad_norm": 3.7895569801330566, "learning_rate": 1.5013918173847052e-06, "loss": 0.2496, "step": 10286 }, { "epoch": 0.7028079524492724, "grad_norm": 4.662783145904541, "learning_rate": 1.5007560572813878e-06, "loss": 0.3185, "step": 10287 }, { "epoch": 0.7028762724602036, "grad_norm": 2.9962332248687744, "learning_rate": 1.5001203950757423e-06, "loss": 0.2868, "step": 10288 }, { "epoch": 0.7029445924711348, "grad_norm": 3.5405263900756836, "learning_rate": 1.4994848307988959e-06, "loss": 0.298, "step": 10289 }, { "epoch": 0.703012912482066, "grad_norm": 5.1182098388671875, "learning_rate": 1.4988493644819707e-06, "loss": 0.2768, "step": 10290 }, { "epoch": 0.7030812324929971, "grad_norm": 5.025762557983398, "learning_rate": 1.4982139961560845e-06, "loss": 0.2962, "step": 10291 }, { "epoch": 0.7031495525039284, "grad_norm": 3.2591288089752197, "learning_rate": 1.4975787258523488e-06, "loss": 0.2285, "step": 10292 }, { "epoch": 0.7032178725148596, "grad_norm": 4.783292293548584, "learning_rate": 1.4969435536018713e-06, "loss": 0.2773, "step": 10293 }, { "epoch": 0.7032861925257908, "grad_norm": 3.722196578979492, "learning_rate": 1.4963084794357545e-06, "loss": 0.2013, "step": 10294 }, { "epoch": 0.703354512536722, "grad_norm": 4.574776649475098, "learning_rate": 1.495673503385098e-06, "loss": 0.3057, "step": 10295 }, { "epoch": 0.7034228325476533, "grad_norm": 2.7254037857055664, "learning_rate": 1.4950386254809924e-06, "loss": 0.2045, "step": 10296 }, { "epoch": 0.7034911525585844, "grad_norm": 4.721736431121826, "learning_rate": 1.4944038457545258e-06, "loss": 0.2715, "step": 10297 }, { "epoch": 0.7035594725695156, "grad_norm": 3.36474609375, "learning_rate": 1.493769164236784e-06, "loss": 0.2557, "step": 10298 }, { "epoch": 0.7036277925804468, "grad_norm": 3.1956470012664795, "learning_rate": 1.4931345809588458e-06, "loss": 0.2681, "step": 10299 }, { "epoch": 0.703696112591378, "grad_norm": 6.780584335327148, "learning_rate": 1.4925000959517827e-06, "loss": 0.4082, "step": 10300 }, { "epoch": 0.7037644326023093, "grad_norm": 4.353774547576904, "learning_rate": 1.4918657092466648e-06, "loss": 0.39, "step": 10301 }, { "epoch": 0.7038327526132404, "grad_norm": 4.16865873336792, "learning_rate": 1.4912314208745563e-06, "loss": 0.2074, "step": 10302 }, { "epoch": 0.7039010726241717, "grad_norm": 3.821525812149048, "learning_rate": 1.490597230866517e-06, "loss": 0.2929, "step": 10303 }, { "epoch": 0.7039693926351028, "grad_norm": 6.071303367614746, "learning_rate": 1.489963139253601e-06, "loss": 0.2987, "step": 10304 }, { "epoch": 0.704037712646034, "grad_norm": 4.08361291885376, "learning_rate": 1.4893291460668586e-06, "loss": 0.2101, "step": 10305 }, { "epoch": 0.7041060326569653, "grad_norm": 3.462252378463745, "learning_rate": 1.4886952513373343e-06, "loss": 0.2959, "step": 10306 }, { "epoch": 0.7041743526678964, "grad_norm": 3.7535648345947266, "learning_rate": 1.4880614550960682e-06, "loss": 0.2669, "step": 10307 }, { "epoch": 0.7042426726788277, "grad_norm": 4.727083683013916, "learning_rate": 1.4874277573740975e-06, "loss": 0.2309, "step": 10308 }, { "epoch": 0.7043109926897588, "grad_norm": 5.4220404624938965, "learning_rate": 1.4867941582024496e-06, "loss": 0.2944, "step": 10309 }, { "epoch": 0.70437931270069, "grad_norm": 5.6297712326049805, "learning_rate": 1.4861606576121507e-06, "loss": 0.382, "step": 10310 }, { "epoch": 0.7044476327116213, "grad_norm": 3.990511894226074, "learning_rate": 1.4855272556342236e-06, "loss": 0.2932, "step": 10311 }, { "epoch": 0.7045159527225524, "grad_norm": 4.548180103302002, "learning_rate": 1.4848939522996843e-06, "loss": 0.2264, "step": 10312 }, { "epoch": 0.7045842727334837, "grad_norm": 4.117648124694824, "learning_rate": 1.4842607476395422e-06, "loss": 0.3527, "step": 10313 }, { "epoch": 0.7046525927444148, "grad_norm": 4.358964920043945, "learning_rate": 1.483627641684804e-06, "loss": 0.276, "step": 10314 }, { "epoch": 0.7047209127553461, "grad_norm": 4.132629871368408, "learning_rate": 1.482994634466472e-06, "loss": 0.358, "step": 10315 }, { "epoch": 0.7047892327662773, "grad_norm": 3.378973960876465, "learning_rate": 1.4823617260155425e-06, "loss": 0.2336, "step": 10316 }, { "epoch": 0.7048575527772084, "grad_norm": 3.24812388420105, "learning_rate": 1.4817289163630072e-06, "loss": 0.2723, "step": 10317 }, { "epoch": 0.7049258727881397, "grad_norm": 3.9841885566711426, "learning_rate": 1.481096205539853e-06, "loss": 0.302, "step": 10318 }, { "epoch": 0.7049941927990708, "grad_norm": 4.904389381408691, "learning_rate": 1.480463593577063e-06, "loss": 0.2525, "step": 10319 }, { "epoch": 0.7050625128100021, "grad_norm": 3.3686392307281494, "learning_rate": 1.4798310805056148e-06, "loss": 0.2048, "step": 10320 }, { "epoch": 0.7051308328209333, "grad_norm": 4.099684238433838, "learning_rate": 1.4791986663564785e-06, "loss": 0.2777, "step": 10321 }, { "epoch": 0.7051991528318644, "grad_norm": 3.2228870391845703, "learning_rate": 1.4785663511606234e-06, "loss": 0.3116, "step": 10322 }, { "epoch": 0.7052674728427957, "grad_norm": 4.135218143463135, "learning_rate": 1.4779341349490121e-06, "loss": 0.2636, "step": 10323 }, { "epoch": 0.7053357928537268, "grad_norm": 5.653976917266846, "learning_rate": 1.477302017752603e-06, "loss": 0.3458, "step": 10324 }, { "epoch": 0.7054041128646581, "grad_norm": 4.563522815704346, "learning_rate": 1.4766699996023479e-06, "loss": 0.2116, "step": 10325 }, { "epoch": 0.7054724328755893, "grad_norm": 4.028326988220215, "learning_rate": 1.4760380805291963e-06, "loss": 0.129, "step": 10326 }, { "epoch": 0.7055407528865205, "grad_norm": 3.0382955074310303, "learning_rate": 1.4754062605640915e-06, "loss": 0.222, "step": 10327 }, { "epoch": 0.7056090728974517, "grad_norm": 3.190992593765259, "learning_rate": 1.474774539737973e-06, "loss": 0.2208, "step": 10328 }, { "epoch": 0.7056773929083828, "grad_norm": 3.7369165420532227, "learning_rate": 1.4741429180817718e-06, "loss": 0.3281, "step": 10329 }, { "epoch": 0.7057457129193141, "grad_norm": 3.720933437347412, "learning_rate": 1.4735113956264172e-06, "loss": 0.3151, "step": 10330 }, { "epoch": 0.7058140329302452, "grad_norm": 2.8859989643096924, "learning_rate": 1.472879972402835e-06, "loss": 0.2153, "step": 10331 }, { "epoch": 0.7058823529411765, "grad_norm": 4.137972831726074, "learning_rate": 1.472248648441945e-06, "loss": 0.3183, "step": 10332 }, { "epoch": 0.7059506729521077, "grad_norm": 3.17551589012146, "learning_rate": 1.4716174237746587e-06, "loss": 0.1462, "step": 10333 }, { "epoch": 0.7060189929630388, "grad_norm": 3.2353506088256836, "learning_rate": 1.4709862984318867e-06, "loss": 0.2601, "step": 10334 }, { "epoch": 0.7060873129739701, "grad_norm": 5.260566234588623, "learning_rate": 1.4703552724445334e-06, "loss": 0.2798, "step": 10335 }, { "epoch": 0.7061556329849012, "grad_norm": 4.460172653198242, "learning_rate": 1.469724345843499e-06, "loss": 0.3211, "step": 10336 }, { "epoch": 0.7062239529958325, "grad_norm": 5.0275044441223145, "learning_rate": 1.4690935186596778e-06, "loss": 0.2948, "step": 10337 }, { "epoch": 0.7062922730067637, "grad_norm": 4.487692356109619, "learning_rate": 1.46846279092396e-06, "loss": 0.4038, "step": 10338 }, { "epoch": 0.7063605930176949, "grad_norm": 2.779698371887207, "learning_rate": 1.4678321626672304e-06, "loss": 0.1969, "step": 10339 }, { "epoch": 0.7064289130286261, "grad_norm": 4.460308074951172, "learning_rate": 1.4672016339203705e-06, "loss": 0.2224, "step": 10340 }, { "epoch": 0.7064972330395572, "grad_norm": 5.203380107879639, "learning_rate": 1.4665712047142533e-06, "loss": 0.1889, "step": 10341 }, { "epoch": 0.7065655530504885, "grad_norm": 5.146070957183838, "learning_rate": 1.4659408750797506e-06, "loss": 0.147, "step": 10342 }, { "epoch": 0.7066338730614197, "grad_norm": 4.910027027130127, "learning_rate": 1.465310645047728e-06, "loss": 0.3018, "step": 10343 }, { "epoch": 0.7067021930723509, "grad_norm": 5.451884746551514, "learning_rate": 1.4646805146490453e-06, "loss": 0.3374, "step": 10344 }, { "epoch": 0.7067705130832821, "grad_norm": 4.9135236740112305, "learning_rate": 1.4640504839145596e-06, "loss": 0.3889, "step": 10345 }, { "epoch": 0.7068388330942132, "grad_norm": 5.550090789794922, "learning_rate": 1.4634205528751212e-06, "loss": 0.3604, "step": 10346 }, { "epoch": 0.7069071531051445, "grad_norm": 3.065072774887085, "learning_rate": 1.4627907215615758e-06, "loss": 0.2128, "step": 10347 }, { "epoch": 0.7069754731160757, "grad_norm": 4.085695266723633, "learning_rate": 1.4621609900047662e-06, "loss": 0.2345, "step": 10348 }, { "epoch": 0.7070437931270069, "grad_norm": 4.3206787109375, "learning_rate": 1.4615313582355253e-06, "loss": 0.2795, "step": 10349 }, { "epoch": 0.7071121131379381, "grad_norm": 5.135733127593994, "learning_rate": 1.4609018262846877e-06, "loss": 0.2767, "step": 10350 }, { "epoch": 0.7071804331488694, "grad_norm": 3.881800889968872, "learning_rate": 1.460272394183079e-06, "loss": 0.3462, "step": 10351 }, { "epoch": 0.7072487531598005, "grad_norm": 4.382108688354492, "learning_rate": 1.4596430619615217e-06, "loss": 0.3122, "step": 10352 }, { "epoch": 0.7073170731707317, "grad_norm": 4.063671112060547, "learning_rate": 1.4590138296508298e-06, "loss": 0.2333, "step": 10353 }, { "epoch": 0.7073853931816629, "grad_norm": 4.026269435882568, "learning_rate": 1.4583846972818176e-06, "loss": 0.3013, "step": 10354 }, { "epoch": 0.7074537131925941, "grad_norm": 4.373703956604004, "learning_rate": 1.4577556648852908e-06, "loss": 0.23, "step": 10355 }, { "epoch": 0.7075220332035254, "grad_norm": 4.453375339508057, "learning_rate": 1.4571267324920522e-06, "loss": 0.3999, "step": 10356 }, { "epoch": 0.7075903532144565, "grad_norm": 3.9159793853759766, "learning_rate": 1.4564979001328988e-06, "loss": 0.3653, "step": 10357 }, { "epoch": 0.7076586732253877, "grad_norm": 4.435400485992432, "learning_rate": 1.455869167838622e-06, "loss": 0.304, "step": 10358 }, { "epoch": 0.7077269932363189, "grad_norm": 3.318856716156006, "learning_rate": 1.4552405356400107e-06, "loss": 0.2601, "step": 10359 }, { "epoch": 0.7077953132472501, "grad_norm": 5.608332633972168, "learning_rate": 1.4546120035678473e-06, "loss": 0.3184, "step": 10360 }, { "epoch": 0.7078636332581814, "grad_norm": 3.9779508113861084, "learning_rate": 1.4539835716529076e-06, "loss": 0.3076, "step": 10361 }, { "epoch": 0.7079319532691125, "grad_norm": 3.636307716369629, "learning_rate": 1.4533552399259649e-06, "loss": 0.2949, "step": 10362 }, { "epoch": 0.7080002732800438, "grad_norm": 4.106476306915283, "learning_rate": 1.4527270084177863e-06, "loss": 0.3023, "step": 10363 }, { "epoch": 0.7080685932909749, "grad_norm": 3.261899471282959, "learning_rate": 1.4520988771591379e-06, "loss": 0.2505, "step": 10364 }, { "epoch": 0.7081369133019061, "grad_norm": 3.6611034870147705, "learning_rate": 1.451470846180774e-06, "loss": 0.2106, "step": 10365 }, { "epoch": 0.7082052333128374, "grad_norm": 4.284729480743408, "learning_rate": 1.4508429155134494e-06, "loss": 0.3169, "step": 10366 }, { "epoch": 0.7082735533237685, "grad_norm": 4.044494152069092, "learning_rate": 1.4502150851879112e-06, "loss": 0.2635, "step": 10367 }, { "epoch": 0.7083418733346998, "grad_norm": 3.19976544380188, "learning_rate": 1.4495873552349045e-06, "loss": 0.276, "step": 10368 }, { "epoch": 0.7084101933456309, "grad_norm": 3.7607710361480713, "learning_rate": 1.4489597256851638e-06, "loss": 0.3127, "step": 10369 }, { "epoch": 0.7084785133565621, "grad_norm": 3.2214553356170654, "learning_rate": 1.4483321965694258e-06, "loss": 0.2769, "step": 10370 }, { "epoch": 0.7085468333674934, "grad_norm": 4.593674659729004, "learning_rate": 1.4477047679184183e-06, "loss": 0.215, "step": 10371 }, { "epoch": 0.7086151533784245, "grad_norm": 4.501918792724609, "learning_rate": 1.4470774397628653e-06, "loss": 0.4045, "step": 10372 }, { "epoch": 0.7086834733893558, "grad_norm": 4.344035625457764, "learning_rate": 1.446450212133484e-06, "loss": 0.2542, "step": 10373 }, { "epoch": 0.7087517934002869, "grad_norm": 4.7187418937683105, "learning_rate": 1.445823085060988e-06, "loss": 0.2996, "step": 10374 }, { "epoch": 0.7088201134112182, "grad_norm": 3.2703309059143066, "learning_rate": 1.445196058576087e-06, "loss": 0.21, "step": 10375 }, { "epoch": 0.7088884334221494, "grad_norm": 3.6903631687164307, "learning_rate": 1.4445691327094845e-06, "loss": 0.1763, "step": 10376 }, { "epoch": 0.7089567534330805, "grad_norm": 5.6022725105285645, "learning_rate": 1.4439423074918796e-06, "loss": 0.4367, "step": 10377 }, { "epoch": 0.7090250734440118, "grad_norm": 3.4377431869506836, "learning_rate": 1.4433155829539658e-06, "loss": 0.2475, "step": 10378 }, { "epoch": 0.7090933934549429, "grad_norm": 3.789937973022461, "learning_rate": 1.4426889591264322e-06, "loss": 0.2625, "step": 10379 }, { "epoch": 0.7091617134658742, "grad_norm": 5.396628379821777, "learning_rate": 1.4420624360399635e-06, "loss": 0.3973, "step": 10380 }, { "epoch": 0.7092300334768054, "grad_norm": 4.299612522125244, "learning_rate": 1.4414360137252397e-06, "loss": 0.2807, "step": 10381 }, { "epoch": 0.7092983534877365, "grad_norm": 3.139040946960449, "learning_rate": 1.440809692212933e-06, "loss": 0.2274, "step": 10382 }, { "epoch": 0.7093666734986678, "grad_norm": 5.07480525970459, "learning_rate": 1.4401834715337117e-06, "loss": 0.2266, "step": 10383 }, { "epoch": 0.7094349935095989, "grad_norm": 4.996472358703613, "learning_rate": 1.4395573517182432e-06, "loss": 0.2647, "step": 10384 }, { "epoch": 0.7095033135205302, "grad_norm": 3.7967894077301025, "learning_rate": 1.4389313327971872e-06, "loss": 0.2947, "step": 10385 }, { "epoch": 0.7095716335314614, "grad_norm": 5.074301242828369, "learning_rate": 1.4383054148011958e-06, "loss": 0.2781, "step": 10386 }, { "epoch": 0.7096399535423926, "grad_norm": 4.412695407867432, "learning_rate": 1.437679597760919e-06, "loss": 0.291, "step": 10387 }, { "epoch": 0.7097082735533238, "grad_norm": 4.50929069519043, "learning_rate": 1.4370538817070022e-06, "loss": 0.3003, "step": 10388 }, { "epoch": 0.7097765935642549, "grad_norm": 3.3239006996154785, "learning_rate": 1.4364282666700843e-06, "loss": 0.2394, "step": 10389 }, { "epoch": 0.7098449135751862, "grad_norm": 5.51987886428833, "learning_rate": 1.4358027526808012e-06, "loss": 0.2269, "step": 10390 }, { "epoch": 0.7099132335861174, "grad_norm": 3.406944751739502, "learning_rate": 1.4351773397697811e-06, "loss": 0.2194, "step": 10391 }, { "epoch": 0.7099815535970486, "grad_norm": 4.319650650024414, "learning_rate": 1.4345520279676502e-06, "loss": 0.306, "step": 10392 }, { "epoch": 0.7100498736079798, "grad_norm": 3.412332773208618, "learning_rate": 1.4339268173050286e-06, "loss": 0.2501, "step": 10393 }, { "epoch": 0.7101181936189109, "grad_norm": 5.442495346069336, "learning_rate": 1.4333017078125293e-06, "loss": 0.2238, "step": 10394 }, { "epoch": 0.7101865136298422, "grad_norm": 3.2782204151153564, "learning_rate": 1.4326766995207632e-06, "loss": 0.301, "step": 10395 }, { "epoch": 0.7102548336407734, "grad_norm": 3.099292516708374, "learning_rate": 1.4320517924603343e-06, "loss": 0.2724, "step": 10396 }, { "epoch": 0.7103231536517046, "grad_norm": 4.118289947509766, "learning_rate": 1.4314269866618464e-06, "loss": 0.2334, "step": 10397 }, { "epoch": 0.7103914736626358, "grad_norm": 3.4599270820617676, "learning_rate": 1.4308022821558906e-06, "loss": 0.1742, "step": 10398 }, { "epoch": 0.710459793673567, "grad_norm": 4.359234809875488, "learning_rate": 1.4301776789730583e-06, "loss": 0.2789, "step": 10399 }, { "epoch": 0.7105281136844982, "grad_norm": 4.4310688972473145, "learning_rate": 1.4295531771439347e-06, "loss": 0.4117, "step": 10400 }, { "epoch": 0.7105964336954294, "grad_norm": 3.728360891342163, "learning_rate": 1.4289287766991014e-06, "loss": 0.2043, "step": 10401 }, { "epoch": 0.7106647537063606, "grad_norm": 4.387049674987793, "learning_rate": 1.42830447766913e-06, "loss": 0.3377, "step": 10402 }, { "epoch": 0.7107330737172918, "grad_norm": 4.305124759674072, "learning_rate": 1.427680280084594e-06, "loss": 0.294, "step": 10403 }, { "epoch": 0.710801393728223, "grad_norm": 4.167579650878906, "learning_rate": 1.4270561839760577e-06, "loss": 0.3277, "step": 10404 }, { "epoch": 0.7108697137391542, "grad_norm": 4.219839572906494, "learning_rate": 1.4264321893740828e-06, "loss": 0.2852, "step": 10405 }, { "epoch": 0.7109380337500854, "grad_norm": 3.2477030754089355, "learning_rate": 1.4258082963092219e-06, "loss": 0.1843, "step": 10406 }, { "epoch": 0.7110063537610166, "grad_norm": 4.34011697769165, "learning_rate": 1.425184504812027e-06, "loss": 0.2829, "step": 10407 }, { "epoch": 0.7110746737719478, "grad_norm": 4.105612277984619, "learning_rate": 1.4245608149130433e-06, "loss": 0.3196, "step": 10408 }, { "epoch": 0.711142993782879, "grad_norm": 4.86906623840332, "learning_rate": 1.4239372266428114e-06, "loss": 0.3443, "step": 10409 }, { "epoch": 0.7112113137938102, "grad_norm": 4.723255157470703, "learning_rate": 1.423313740031866e-06, "loss": 0.2822, "step": 10410 }, { "epoch": 0.7112796338047415, "grad_norm": 2.8491690158843994, "learning_rate": 1.422690355110739e-06, "loss": 0.2151, "step": 10411 }, { "epoch": 0.7113479538156726, "grad_norm": 4.272146701812744, "learning_rate": 1.4220670719099548e-06, "loss": 0.2253, "step": 10412 }, { "epoch": 0.7114162738266038, "grad_norm": 4.867216110229492, "learning_rate": 1.4214438904600355e-06, "loss": 0.2202, "step": 10413 }, { "epoch": 0.711484593837535, "grad_norm": 4.116021156311035, "learning_rate": 1.4208208107914942e-06, "loss": 0.2818, "step": 10414 }, { "epoch": 0.7115529138484662, "grad_norm": 3.342104434967041, "learning_rate": 1.4201978329348425e-06, "loss": 0.3246, "step": 10415 }, { "epoch": 0.7116212338593975, "grad_norm": 3.8473637104034424, "learning_rate": 1.4195749569205852e-06, "loss": 0.2944, "step": 10416 }, { "epoch": 0.7116895538703286, "grad_norm": 5.326265335083008, "learning_rate": 1.4189521827792261e-06, "loss": 0.4064, "step": 10417 }, { "epoch": 0.7117578738812598, "grad_norm": 5.11160945892334, "learning_rate": 1.4183295105412572e-06, "loss": 0.238, "step": 10418 }, { "epoch": 0.711826193892191, "grad_norm": 5.359591484069824, "learning_rate": 1.4177069402371705e-06, "loss": 0.3739, "step": 10419 }, { "epoch": 0.7118945139031222, "grad_norm": 7.301584720611572, "learning_rate": 1.4170844718974516e-06, "loss": 0.4026, "step": 10420 }, { "epoch": 0.7119628339140535, "grad_norm": 3.6160385608673096, "learning_rate": 1.4164621055525823e-06, "loss": 0.323, "step": 10421 }, { "epoch": 0.7120311539249846, "grad_norm": 3.6732940673828125, "learning_rate": 1.4158398412330343e-06, "loss": 0.2951, "step": 10422 }, { "epoch": 0.7120994739359159, "grad_norm": 2.9718143939971924, "learning_rate": 1.4152176789692822e-06, "loss": 0.2441, "step": 10423 }, { "epoch": 0.712167793946847, "grad_norm": 4.712131023406982, "learning_rate": 1.4145956187917905e-06, "loss": 0.3596, "step": 10424 }, { "epoch": 0.7122361139577782, "grad_norm": 3.697514295578003, "learning_rate": 1.4139736607310209e-06, "loss": 0.1861, "step": 10425 }, { "epoch": 0.7123044339687095, "grad_norm": 3.903129816055298, "learning_rate": 1.4133518048174266e-06, "loss": 0.2561, "step": 10426 }, { "epoch": 0.7123727539796406, "grad_norm": 4.148380279541016, "learning_rate": 1.4127300510814595e-06, "loss": 0.2291, "step": 10427 }, { "epoch": 0.7124410739905719, "grad_norm": 3.967008113861084, "learning_rate": 1.4121083995535651e-06, "loss": 0.1756, "step": 10428 }, { "epoch": 0.712509394001503, "grad_norm": 4.715822696685791, "learning_rate": 1.4114868502641842e-06, "loss": 0.2837, "step": 10429 }, { "epoch": 0.7125777140124342, "grad_norm": 4.374115943908691, "learning_rate": 1.4108654032437522e-06, "loss": 0.2445, "step": 10430 }, { "epoch": 0.7126460340233655, "grad_norm": 4.99275016784668, "learning_rate": 1.4102440585227e-06, "loss": 0.3407, "step": 10431 }, { "epoch": 0.7127143540342966, "grad_norm": 3.070188522338867, "learning_rate": 1.409622816131453e-06, "loss": 0.2404, "step": 10432 }, { "epoch": 0.7127826740452279, "grad_norm": 5.005785942077637, "learning_rate": 1.409001676100433e-06, "loss": 0.4078, "step": 10433 }, { "epoch": 0.712850994056159, "grad_norm": 3.695284366607666, "learning_rate": 1.408380638460053e-06, "loss": 0.2946, "step": 10434 }, { "epoch": 0.7129193140670903, "grad_norm": 4.407217979431152, "learning_rate": 1.4077597032407254e-06, "loss": 0.3123, "step": 10435 }, { "epoch": 0.7129876340780215, "grad_norm": 4.197594165802002, "learning_rate": 1.407138870472854e-06, "loss": 0.3578, "step": 10436 }, { "epoch": 0.7130559540889526, "grad_norm": 4.4560723304748535, "learning_rate": 1.4065181401868431e-06, "loss": 0.2471, "step": 10437 }, { "epoch": 0.7131242740998839, "grad_norm": 3.5814144611358643, "learning_rate": 1.4058975124130842e-06, "loss": 0.2858, "step": 10438 }, { "epoch": 0.713192594110815, "grad_norm": 4.2395806312561035, "learning_rate": 1.4052769871819702e-06, "loss": 0.3805, "step": 10439 }, { "epoch": 0.7132609141217463, "grad_norm": 3.979099750518799, "learning_rate": 1.404656564523885e-06, "loss": 0.3054, "step": 10440 }, { "epoch": 0.7133292341326775, "grad_norm": 4.706780433654785, "learning_rate": 1.4040362444692103e-06, "loss": 0.3397, "step": 10441 }, { "epoch": 0.7133975541436086, "grad_norm": 3.938847064971924, "learning_rate": 1.4034160270483207e-06, "loss": 0.2752, "step": 10442 }, { "epoch": 0.7134658741545399, "grad_norm": 4.326015472412109, "learning_rate": 1.4027959122915874e-06, "loss": 0.3384, "step": 10443 }, { "epoch": 0.713534194165471, "grad_norm": 3.968410015106201, "learning_rate": 1.4021759002293748e-06, "loss": 0.2926, "step": 10444 }, { "epoch": 0.7136025141764023, "grad_norm": 2.2602896690368652, "learning_rate": 1.4015559908920441e-06, "loss": 0.1304, "step": 10445 }, { "epoch": 0.7136708341873335, "grad_norm": 4.377340316772461, "learning_rate": 1.4009361843099511e-06, "loss": 0.3267, "step": 10446 }, { "epoch": 0.7137391541982647, "grad_norm": 4.030993938446045, "learning_rate": 1.4003164805134447e-06, "loss": 0.263, "step": 10447 }, { "epoch": 0.7138074742091959, "grad_norm": 3.9566519260406494, "learning_rate": 1.39969687953287e-06, "loss": 0.2157, "step": 10448 }, { "epoch": 0.713875794220127, "grad_norm": 3.444063663482666, "learning_rate": 1.3990773813985674e-06, "loss": 0.2279, "step": 10449 }, { "epoch": 0.7139441142310583, "grad_norm": 3.9015965461730957, "learning_rate": 1.3984579861408746e-06, "loss": 0.2547, "step": 10450 }, { "epoch": 0.7140124342419895, "grad_norm": 3.8217673301696777, "learning_rate": 1.3978386937901183e-06, "loss": 0.2598, "step": 10451 }, { "epoch": 0.7140807542529207, "grad_norm": 6.307848930358887, "learning_rate": 1.3972195043766255e-06, "loss": 0.2555, "step": 10452 }, { "epoch": 0.7141490742638519, "grad_norm": 3.7227272987365723, "learning_rate": 1.3966004179307158e-06, "loss": 0.3388, "step": 10453 }, { "epoch": 0.714217394274783, "grad_norm": 4.618172645568848, "learning_rate": 1.3959814344827055e-06, "loss": 0.3037, "step": 10454 }, { "epoch": 0.7142857142857143, "grad_norm": 5.176080226898193, "learning_rate": 1.3953625540629006e-06, "loss": 0.3377, "step": 10455 }, { "epoch": 0.7143540342966455, "grad_norm": 5.652438640594482, "learning_rate": 1.3947437767016102e-06, "loss": 0.2726, "step": 10456 }, { "epoch": 0.7144223543075767, "grad_norm": 3.412477970123291, "learning_rate": 1.3941251024291329e-06, "loss": 0.2362, "step": 10457 }, { "epoch": 0.7144906743185079, "grad_norm": 3.3953866958618164, "learning_rate": 1.3935065312757642e-06, "loss": 0.2585, "step": 10458 }, { "epoch": 0.7145589943294391, "grad_norm": 3.0700418949127197, "learning_rate": 1.3928880632717922e-06, "loss": 0.2987, "step": 10459 }, { "epoch": 0.7146273143403703, "grad_norm": 3.935063123703003, "learning_rate": 1.392269698447502e-06, "loss": 0.2489, "step": 10460 }, { "epoch": 0.7146956343513015, "grad_norm": 3.9950571060180664, "learning_rate": 1.3916514368331742e-06, "loss": 0.269, "step": 10461 }, { "epoch": 0.7147639543622327, "grad_norm": 4.76222038269043, "learning_rate": 1.3910332784590833e-06, "loss": 0.32, "step": 10462 }, { "epoch": 0.7148322743731639, "grad_norm": 2.693765163421631, "learning_rate": 1.390415223355498e-06, "loss": 0.2154, "step": 10463 }, { "epoch": 0.7149005943840951, "grad_norm": 4.1057305335998535, "learning_rate": 1.3897972715526839e-06, "loss": 0.2903, "step": 10464 }, { "epoch": 0.7149689143950263, "grad_norm": 2.793299674987793, "learning_rate": 1.3891794230808997e-06, "loss": 0.2684, "step": 10465 }, { "epoch": 0.7150372344059575, "grad_norm": 5.204946041107178, "learning_rate": 1.3885616779704013e-06, "loss": 0.3948, "step": 10466 }, { "epoch": 0.7151055544168887, "grad_norm": 3.964005708694458, "learning_rate": 1.3879440362514355e-06, "loss": 0.1641, "step": 10467 }, { "epoch": 0.7151738744278199, "grad_norm": 4.659752368927002, "learning_rate": 1.3873264979542482e-06, "loss": 0.3221, "step": 10468 }, { "epoch": 0.7152421944387511, "grad_norm": 2.6087634563446045, "learning_rate": 1.3867090631090766e-06, "loss": 0.1983, "step": 10469 }, { "epoch": 0.7153105144496823, "grad_norm": 2.7193007469177246, "learning_rate": 1.3860917317461593e-06, "loss": 0.2161, "step": 10470 }, { "epoch": 0.7153788344606136, "grad_norm": 6.057454586029053, "learning_rate": 1.3854745038957215e-06, "loss": 0.3415, "step": 10471 }, { "epoch": 0.7154471544715447, "grad_norm": 3.9469821453094482, "learning_rate": 1.3848573795879884e-06, "loss": 0.2088, "step": 10472 }, { "epoch": 0.7155154744824759, "grad_norm": 3.5655691623687744, "learning_rate": 1.3842403588531785e-06, "loss": 0.2687, "step": 10473 }, { "epoch": 0.7155837944934071, "grad_norm": 4.3109965324401855, "learning_rate": 1.3836234417215077e-06, "loss": 0.267, "step": 10474 }, { "epoch": 0.7156521145043383, "grad_norm": 3.0090410709381104, "learning_rate": 1.3830066282231805e-06, "loss": 0.1494, "step": 10475 }, { "epoch": 0.7157204345152696, "grad_norm": 4.691463470458984, "learning_rate": 1.3823899183884044e-06, "loss": 0.3151, "step": 10476 }, { "epoch": 0.7157887545262007, "grad_norm": 6.387932777404785, "learning_rate": 1.381773312247377e-06, "loss": 0.3748, "step": 10477 }, { "epoch": 0.7158570745371319, "grad_norm": 5.959107398986816, "learning_rate": 1.381156809830293e-06, "loss": 0.3391, "step": 10478 }, { "epoch": 0.7159253945480631, "grad_norm": 4.2612738609313965, "learning_rate": 1.3805404111673385e-06, "loss": 0.2386, "step": 10479 }, { "epoch": 0.7159937145589943, "grad_norm": 4.458054065704346, "learning_rate": 1.379924116288698e-06, "loss": 0.2897, "step": 10480 }, { "epoch": 0.7160620345699256, "grad_norm": 4.085663795471191, "learning_rate": 1.37930792522455e-06, "loss": 0.2769, "step": 10481 }, { "epoch": 0.7161303545808567, "grad_norm": 4.161209583282471, "learning_rate": 1.3786918380050677e-06, "loss": 0.2556, "step": 10482 }, { "epoch": 0.716198674591788, "grad_norm": 4.333175182342529, "learning_rate": 1.3780758546604192e-06, "loss": 0.2845, "step": 10483 }, { "epoch": 0.7162669946027191, "grad_norm": 3.375840187072754, "learning_rate": 1.377459975220767e-06, "loss": 0.2313, "step": 10484 }, { "epoch": 0.7163353146136503, "grad_norm": 3.7630560398101807, "learning_rate": 1.3768441997162701e-06, "loss": 0.3062, "step": 10485 }, { "epoch": 0.7164036346245816, "grad_norm": 3.967308759689331, "learning_rate": 1.3762285281770822e-06, "loss": 0.2235, "step": 10486 }, { "epoch": 0.7164719546355127, "grad_norm": 3.5574259757995605, "learning_rate": 1.3756129606333488e-06, "loss": 0.2656, "step": 10487 }, { "epoch": 0.716540274646444, "grad_norm": 4.059977054595947, "learning_rate": 1.374997497115212e-06, "loss": 0.2526, "step": 10488 }, { "epoch": 0.7166085946573751, "grad_norm": 3.4800608158111572, "learning_rate": 1.3743821376528126e-06, "loss": 0.2776, "step": 10489 }, { "epoch": 0.7166769146683063, "grad_norm": 4.058716773986816, "learning_rate": 1.3737668822762827e-06, "loss": 0.2332, "step": 10490 }, { "epoch": 0.7167452346792376, "grad_norm": 2.683988332748413, "learning_rate": 1.3731517310157474e-06, "loss": 0.2261, "step": 10491 }, { "epoch": 0.7168135546901687, "grad_norm": 3.428987979888916, "learning_rate": 1.3725366839013303e-06, "loss": 0.2281, "step": 10492 }, { "epoch": 0.7168818747011, "grad_norm": 5.16069221496582, "learning_rate": 1.3719217409631487e-06, "loss": 0.3057, "step": 10493 }, { "epoch": 0.7169501947120311, "grad_norm": 4.147243976593018, "learning_rate": 1.3713069022313143e-06, "loss": 0.338, "step": 10494 }, { "epoch": 0.7170185147229624, "grad_norm": 4.69193696975708, "learning_rate": 1.3706921677359345e-06, "loss": 0.2746, "step": 10495 }, { "epoch": 0.7170868347338936, "grad_norm": 3.561582326889038, "learning_rate": 1.3700775375071114e-06, "loss": 0.2145, "step": 10496 }, { "epoch": 0.7171551547448247, "grad_norm": 4.285763263702393, "learning_rate": 1.3694630115749413e-06, "loss": 0.2642, "step": 10497 }, { "epoch": 0.717223474755756, "grad_norm": 3.6288704872131348, "learning_rate": 1.3688485899695172e-06, "loss": 0.2561, "step": 10498 }, { "epoch": 0.7172917947666871, "grad_norm": 3.8409087657928467, "learning_rate": 1.3682342727209234e-06, "loss": 0.3048, "step": 10499 }, { "epoch": 0.7173601147776184, "grad_norm": 3.5339181423187256, "learning_rate": 1.367620059859243e-06, "loss": 0.2936, "step": 10500 }, { "epoch": 0.7174284347885496, "grad_norm": 3.7150893211364746, "learning_rate": 1.367005951414552e-06, "loss": 0.2493, "step": 10501 }, { "epoch": 0.7174967547994807, "grad_norm": 5.096451759338379, "learning_rate": 1.3663919474169213e-06, "loss": 0.3414, "step": 10502 }, { "epoch": 0.717565074810412, "grad_norm": 2.864384174346924, "learning_rate": 1.3657780478964175e-06, "loss": 0.1947, "step": 10503 }, { "epoch": 0.7176333948213431, "grad_norm": 1.8940231800079346, "learning_rate": 1.3651642528831017e-06, "loss": 0.1663, "step": 10504 }, { "epoch": 0.7177017148322744, "grad_norm": 3.8731353282928467, "learning_rate": 1.36455056240703e-06, "loss": 0.2457, "step": 10505 }, { "epoch": 0.7177700348432056, "grad_norm": 4.24818754196167, "learning_rate": 1.3639369764982524e-06, "loss": 0.2562, "step": 10506 }, { "epoch": 0.7178383548541368, "grad_norm": 4.239482402801514, "learning_rate": 1.3633234951868164e-06, "loss": 0.2794, "step": 10507 }, { "epoch": 0.717906674865068, "grad_norm": 4.988409996032715, "learning_rate": 1.3627101185027585e-06, "loss": 0.2548, "step": 10508 }, { "epoch": 0.7179749948759991, "grad_norm": 4.381262302398682, "learning_rate": 1.3620968464761183e-06, "loss": 0.3852, "step": 10509 }, { "epoch": 0.7180433148869304, "grad_norm": 4.158920764923096, "learning_rate": 1.3614836791369248e-06, "loss": 0.2495, "step": 10510 }, { "epoch": 0.7181116348978616, "grad_norm": 4.352282524108887, "learning_rate": 1.3608706165152039e-06, "loss": 0.2619, "step": 10511 }, { "epoch": 0.7181799549087928, "grad_norm": 3.9957618713378906, "learning_rate": 1.3602576586409734e-06, "loss": 0.245, "step": 10512 }, { "epoch": 0.718248274919724, "grad_norm": 3.2778358459472656, "learning_rate": 1.3596448055442497e-06, "loss": 0.188, "step": 10513 }, { "epoch": 0.7183165949306551, "grad_norm": 3.982578754425049, "learning_rate": 1.3590320572550426e-06, "loss": 0.219, "step": 10514 }, { "epoch": 0.7183849149415864, "grad_norm": 3.071706533432007, "learning_rate": 1.3584194138033565e-06, "loss": 0.2341, "step": 10515 }, { "epoch": 0.7184532349525176, "grad_norm": 4.768503665924072, "learning_rate": 1.3578068752191912e-06, "loss": 0.2176, "step": 10516 }, { "epoch": 0.7185215549634488, "grad_norm": 4.415010452270508, "learning_rate": 1.3571944415325408e-06, "loss": 0.2576, "step": 10517 }, { "epoch": 0.71858987497438, "grad_norm": 5.090269088745117, "learning_rate": 1.3565821127733942e-06, "loss": 0.2882, "step": 10518 }, { "epoch": 0.7186581949853112, "grad_norm": 3.668755531311035, "learning_rate": 1.3559698889717376e-06, "loss": 0.1977, "step": 10519 }, { "epoch": 0.7187265149962424, "grad_norm": 2.9553539752960205, "learning_rate": 1.3553577701575474e-06, "loss": 0.232, "step": 10520 }, { "epoch": 0.7187948350071736, "grad_norm": 5.033618450164795, "learning_rate": 1.3547457563607982e-06, "loss": 0.2897, "step": 10521 }, { "epoch": 0.7188631550181048, "grad_norm": 4.023041725158691, "learning_rate": 1.3541338476114578e-06, "loss": 0.2651, "step": 10522 }, { "epoch": 0.718931475029036, "grad_norm": 3.5623955726623535, "learning_rate": 1.3535220439394932e-06, "loss": 0.2133, "step": 10523 }, { "epoch": 0.7189997950399672, "grad_norm": 3.9015214443206787, "learning_rate": 1.3529103453748592e-06, "loss": 0.2842, "step": 10524 }, { "epoch": 0.7190681150508984, "grad_norm": 3.7478222846984863, "learning_rate": 1.3522987519475103e-06, "loss": 0.2717, "step": 10525 }, { "epoch": 0.7191364350618296, "grad_norm": 3.3192577362060547, "learning_rate": 1.3516872636873945e-06, "loss": 0.2676, "step": 10526 }, { "epoch": 0.7192047550727608, "grad_norm": 4.370886325836182, "learning_rate": 1.3510758806244562e-06, "loss": 0.2625, "step": 10527 }, { "epoch": 0.719273075083692, "grad_norm": 4.6334004402160645, "learning_rate": 1.3504646027886298e-06, "loss": 0.292, "step": 10528 }, { "epoch": 0.7193413950946232, "grad_norm": 8.80473518371582, "learning_rate": 1.3498534302098508e-06, "loss": 0.3656, "step": 10529 }, { "epoch": 0.7194097151055544, "grad_norm": 2.8148255348205566, "learning_rate": 1.349242362918046e-06, "loss": 0.1687, "step": 10530 }, { "epoch": 0.7194780351164857, "grad_norm": 3.7124924659729004, "learning_rate": 1.348631400943139e-06, "loss": 0.1625, "step": 10531 }, { "epoch": 0.7195463551274168, "grad_norm": 4.718760967254639, "learning_rate": 1.3480205443150442e-06, "loss": 0.3376, "step": 10532 }, { "epoch": 0.719614675138348, "grad_norm": 3.480632781982422, "learning_rate": 1.3474097930636753e-06, "loss": 0.3328, "step": 10533 }, { "epoch": 0.7196829951492792, "grad_norm": 3.899259090423584, "learning_rate": 1.3467991472189391e-06, "loss": 0.275, "step": 10534 }, { "epoch": 0.7197513151602104, "grad_norm": 3.371095895767212, "learning_rate": 1.3461886068107373e-06, "loss": 0.2652, "step": 10535 }, { "epoch": 0.7198196351711417, "grad_norm": 4.933217525482178, "learning_rate": 1.3455781718689658e-06, "loss": 0.355, "step": 10536 }, { "epoch": 0.7198879551820728, "grad_norm": 3.8202574253082275, "learning_rate": 1.3449678424235168e-06, "loss": 0.2424, "step": 10537 }, { "epoch": 0.719956275193004, "grad_norm": 4.767594814300537, "learning_rate": 1.3443576185042758e-06, "loss": 0.2511, "step": 10538 }, { "epoch": 0.7200245952039352, "grad_norm": 3.870027780532837, "learning_rate": 1.3437475001411262e-06, "loss": 0.3736, "step": 10539 }, { "epoch": 0.7200929152148664, "grad_norm": 4.586700439453125, "learning_rate": 1.3431374873639402e-06, "loss": 0.3177, "step": 10540 }, { "epoch": 0.7201612352257977, "grad_norm": 5.2759270668029785, "learning_rate": 1.3425275802025893e-06, "loss": 0.2892, "step": 10541 }, { "epoch": 0.7202295552367288, "grad_norm": 4.765860080718994, "learning_rate": 1.3419177786869409e-06, "loss": 0.2547, "step": 10542 }, { "epoch": 0.7202978752476601, "grad_norm": 4.624880790710449, "learning_rate": 1.341308082846856e-06, "loss": 0.2289, "step": 10543 }, { "epoch": 0.7203661952585912, "grad_norm": 3.1118905544281006, "learning_rate": 1.3406984927121867e-06, "loss": 0.2458, "step": 10544 }, { "epoch": 0.7204345152695224, "grad_norm": 4.774821758270264, "learning_rate": 1.3400890083127847e-06, "loss": 0.3379, "step": 10545 }, { "epoch": 0.7205028352804537, "grad_norm": 3.8461058139801025, "learning_rate": 1.3394796296784945e-06, "loss": 0.2483, "step": 10546 }, { "epoch": 0.7205711552913848, "grad_norm": 4.730382919311523, "learning_rate": 1.3388703568391558e-06, "loss": 0.2988, "step": 10547 }, { "epoch": 0.7206394753023161, "grad_norm": 5.728384971618652, "learning_rate": 1.3382611898246034e-06, "loss": 0.2655, "step": 10548 }, { "epoch": 0.7207077953132472, "grad_norm": 4.928520202636719, "learning_rate": 1.3376521286646664e-06, "loss": 0.298, "step": 10549 }, { "epoch": 0.7207761153241784, "grad_norm": 6.145047664642334, "learning_rate": 1.3370431733891692e-06, "loss": 0.299, "step": 10550 }, { "epoch": 0.7208444353351097, "grad_norm": 5.254673957824707, "learning_rate": 1.3364343240279316e-06, "loss": 0.2135, "step": 10551 }, { "epoch": 0.7209127553460408, "grad_norm": 5.4268269538879395, "learning_rate": 1.3358255806107646e-06, "loss": 0.4, "step": 10552 }, { "epoch": 0.7209810753569721, "grad_norm": 4.112092018127441, "learning_rate": 1.3352169431674782e-06, "loss": 0.2892, "step": 10553 }, { "epoch": 0.7210493953679032, "grad_norm": 5.563740253448486, "learning_rate": 1.3346084117278764e-06, "loss": 0.2061, "step": 10554 }, { "epoch": 0.7211177153788345, "grad_norm": 5.015347003936768, "learning_rate": 1.3339999863217565e-06, "loss": 0.308, "step": 10555 }, { "epoch": 0.7211860353897657, "grad_norm": 3.5578103065490723, "learning_rate": 1.333391666978912e-06, "loss": 0.2634, "step": 10556 }, { "epoch": 0.7212543554006968, "grad_norm": 4.1457905769348145, "learning_rate": 1.3327834537291302e-06, "loss": 0.2177, "step": 10557 }, { "epoch": 0.7213226754116281, "grad_norm": 4.405083179473877, "learning_rate": 1.3321753466021943e-06, "loss": 0.2816, "step": 10558 }, { "epoch": 0.7213909954225592, "grad_norm": 4.05672550201416, "learning_rate": 1.3315673456278828e-06, "loss": 0.2742, "step": 10559 }, { "epoch": 0.7214593154334905, "grad_norm": 4.714196681976318, "learning_rate": 1.3309594508359649e-06, "loss": 0.3287, "step": 10560 }, { "epoch": 0.7215276354444217, "grad_norm": 3.287835121154785, "learning_rate": 1.330351662256208e-06, "loss": 0.3515, "step": 10561 }, { "epoch": 0.7215959554553528, "grad_norm": 4.774267673492432, "learning_rate": 1.3297439799183763e-06, "loss": 0.2929, "step": 10562 }, { "epoch": 0.7216642754662841, "grad_norm": 3.4373769760131836, "learning_rate": 1.3291364038522269e-06, "loss": 0.3249, "step": 10563 }, { "epoch": 0.7217325954772152, "grad_norm": 3.05362606048584, "learning_rate": 1.3285289340875076e-06, "loss": 0.2679, "step": 10564 }, { "epoch": 0.7218009154881465, "grad_norm": 3.522005319595337, "learning_rate": 1.3279215706539665e-06, "loss": 0.2002, "step": 10565 }, { "epoch": 0.7218692354990777, "grad_norm": 4.412306785583496, "learning_rate": 1.3273143135813449e-06, "loss": 0.3077, "step": 10566 }, { "epoch": 0.7219375555100089, "grad_norm": 5.067180633544922, "learning_rate": 1.326707162899378e-06, "loss": 0.2667, "step": 10567 }, { "epoch": 0.7220058755209401, "grad_norm": 3.309711456298828, "learning_rate": 1.3261001186377964e-06, "loss": 0.2508, "step": 10568 }, { "epoch": 0.7220741955318712, "grad_norm": 3.5007519721984863, "learning_rate": 1.3254931808263258e-06, "loss": 0.3356, "step": 10569 }, { "epoch": 0.7221425155428025, "grad_norm": 3.429788589477539, "learning_rate": 1.324886349494686e-06, "loss": 0.1556, "step": 10570 }, { "epoch": 0.7222108355537337, "grad_norm": 3.153914213180542, "learning_rate": 1.3242796246725935e-06, "loss": 0.1973, "step": 10571 }, { "epoch": 0.7222791555646649, "grad_norm": 2.956350088119507, "learning_rate": 1.3236730063897548e-06, "loss": 0.2617, "step": 10572 }, { "epoch": 0.7223474755755961, "grad_norm": 3.3499577045440674, "learning_rate": 1.3230664946758767e-06, "loss": 0.2832, "step": 10573 }, { "epoch": 0.7224157955865272, "grad_norm": 5.017319202423096, "learning_rate": 1.3224600895606579e-06, "loss": 0.2649, "step": 10574 }, { "epoch": 0.7224841155974585, "grad_norm": 4.171113014221191, "learning_rate": 1.3218537910737905e-06, "loss": 0.2942, "step": 10575 }, { "epoch": 0.7225524356083897, "grad_norm": 5.385615348815918, "learning_rate": 1.3212475992449686e-06, "loss": 0.2928, "step": 10576 }, { "epoch": 0.7226207556193209, "grad_norm": 4.047399520874023, "learning_rate": 1.3206415141038712e-06, "loss": 0.1921, "step": 10577 }, { "epoch": 0.7226890756302521, "grad_norm": 3.57892107963562, "learning_rate": 1.3200355356801775e-06, "loss": 0.2099, "step": 10578 }, { "epoch": 0.7227573956411834, "grad_norm": 4.028188705444336, "learning_rate": 1.3194296640035614e-06, "loss": 0.2911, "step": 10579 }, { "epoch": 0.7228257156521145, "grad_norm": 4.179756164550781, "learning_rate": 1.3188238991036906e-06, "loss": 0.3306, "step": 10580 }, { "epoch": 0.7228940356630457, "grad_norm": 3.340367317199707, "learning_rate": 1.3182182410102276e-06, "loss": 0.2826, "step": 10581 }, { "epoch": 0.7229623556739769, "grad_norm": 3.033262014389038, "learning_rate": 1.31761268975283e-06, "loss": 0.2159, "step": 10582 }, { "epoch": 0.7230306756849081, "grad_norm": 3.6839945316314697, "learning_rate": 1.3170072453611503e-06, "loss": 0.2332, "step": 10583 }, { "epoch": 0.7230989956958394, "grad_norm": 4.815561771392822, "learning_rate": 1.3164019078648364e-06, "loss": 0.2454, "step": 10584 }, { "epoch": 0.7231673157067705, "grad_norm": 3.9773671627044678, "learning_rate": 1.3157966772935275e-06, "loss": 0.263, "step": 10585 }, { "epoch": 0.7232356357177017, "grad_norm": 3.7736434936523438, "learning_rate": 1.315191553676862e-06, "loss": 0.2778, "step": 10586 }, { "epoch": 0.7233039557286329, "grad_norm": 3.038743257522583, "learning_rate": 1.3145865370444706e-06, "loss": 0.2067, "step": 10587 }, { "epoch": 0.7233722757395641, "grad_norm": 3.411965847015381, "learning_rate": 1.3139816274259796e-06, "loss": 0.1724, "step": 10588 }, { "epoch": 0.7234405957504954, "grad_norm": 3.011392593383789, "learning_rate": 1.3133768248510098e-06, "loss": 0.1984, "step": 10589 }, { "epoch": 0.7235089157614265, "grad_norm": 4.7542500495910645, "learning_rate": 1.3127721293491766e-06, "loss": 0.29, "step": 10590 }, { "epoch": 0.7235772357723578, "grad_norm": 3.3072595596313477, "learning_rate": 1.3121675409500905e-06, "loss": 0.2302, "step": 10591 }, { "epoch": 0.7236455557832889, "grad_norm": 4.694758415222168, "learning_rate": 1.3115630596833578e-06, "loss": 0.3274, "step": 10592 }, { "epoch": 0.7237138757942201, "grad_norm": 3.912224769592285, "learning_rate": 1.3109586855785758e-06, "loss": 0.2996, "step": 10593 }, { "epoch": 0.7237821958051514, "grad_norm": 4.043060302734375, "learning_rate": 1.3103544186653393e-06, "loss": 0.2922, "step": 10594 }, { "epoch": 0.7238505158160825, "grad_norm": 5.23725700378418, "learning_rate": 1.30975025897324e-06, "loss": 0.2242, "step": 10595 }, { "epoch": 0.7239188358270138, "grad_norm": 3.889495611190796, "learning_rate": 1.309146206531862e-06, "loss": 0.2339, "step": 10596 }, { "epoch": 0.7239871558379449, "grad_norm": 2.9367098808288574, "learning_rate": 1.3085422613707814e-06, "loss": 0.2593, "step": 10597 }, { "epoch": 0.7240554758488761, "grad_norm": 3.8138914108276367, "learning_rate": 1.3079384235195738e-06, "loss": 0.2778, "step": 10598 }, { "epoch": 0.7241237958598074, "grad_norm": 3.169558048248291, "learning_rate": 1.3073346930078066e-06, "loss": 0.1722, "step": 10599 }, { "epoch": 0.7241921158707385, "grad_norm": 4.158839702606201, "learning_rate": 1.3067310698650435e-06, "loss": 0.3309, "step": 10600 }, { "epoch": 0.7242604358816698, "grad_norm": 3.150907039642334, "learning_rate": 1.3061275541208418e-06, "loss": 0.1674, "step": 10601 }, { "epoch": 0.7243287558926009, "grad_norm": 3.4644675254821777, "learning_rate": 1.3055241458047548e-06, "loss": 0.2024, "step": 10602 }, { "epoch": 0.7243970759035322, "grad_norm": 3.6498234272003174, "learning_rate": 1.3049208449463291e-06, "loss": 0.2379, "step": 10603 }, { "epoch": 0.7244653959144634, "grad_norm": 4.8422932624816895, "learning_rate": 1.3043176515751088e-06, "loss": 0.3524, "step": 10604 }, { "epoch": 0.7245337159253945, "grad_norm": 3.880382537841797, "learning_rate": 1.3037145657206274e-06, "loss": 0.2844, "step": 10605 }, { "epoch": 0.7246020359363258, "grad_norm": 4.372976779937744, "learning_rate": 1.3031115874124177e-06, "loss": 0.3189, "step": 10606 }, { "epoch": 0.7246703559472569, "grad_norm": 3.706873893737793, "learning_rate": 1.3025087166800064e-06, "loss": 0.2351, "step": 10607 }, { "epoch": 0.7247386759581882, "grad_norm": 4.5822296142578125, "learning_rate": 1.3019059535529142e-06, "loss": 0.3268, "step": 10608 }, { "epoch": 0.7248069959691193, "grad_norm": 3.7642135620117188, "learning_rate": 1.301303298060657e-06, "loss": 0.2214, "step": 10609 }, { "epoch": 0.7248753159800505, "grad_norm": 4.136916160583496, "learning_rate": 1.300700750232745e-06, "loss": 0.3166, "step": 10610 }, { "epoch": 0.7249436359909818, "grad_norm": 4.995746612548828, "learning_rate": 1.3000983100986836e-06, "loss": 0.2825, "step": 10611 }, { "epoch": 0.7250119560019129, "grad_norm": 3.954054594039917, "learning_rate": 1.2994959776879736e-06, "loss": 0.2786, "step": 10612 }, { "epoch": 0.7250802760128442, "grad_norm": 3.6717991828918457, "learning_rate": 1.2988937530301079e-06, "loss": 0.2882, "step": 10613 }, { "epoch": 0.7251485960237753, "grad_norm": 5.025681495666504, "learning_rate": 1.298291636154575e-06, "loss": 0.1909, "step": 10614 }, { "epoch": 0.7252169160347066, "grad_norm": 5.3929643630981445, "learning_rate": 1.2976896270908617e-06, "loss": 0.3271, "step": 10615 }, { "epoch": 0.7252852360456378, "grad_norm": 4.67171049118042, "learning_rate": 1.2970877258684465e-06, "loss": 0.1771, "step": 10616 }, { "epoch": 0.7253535560565689, "grad_norm": 4.864197254180908, "learning_rate": 1.296485932516801e-06, "loss": 0.2513, "step": 10617 }, { "epoch": 0.7254218760675002, "grad_norm": 5.201406955718994, "learning_rate": 1.2958842470653948e-06, "loss": 0.2141, "step": 10618 }, { "epoch": 0.7254901960784313, "grad_norm": 4.245782375335693, "learning_rate": 1.2952826695436902e-06, "loss": 0.2583, "step": 10619 }, { "epoch": 0.7255585160893626, "grad_norm": 4.868922233581543, "learning_rate": 1.2946811999811453e-06, "loss": 0.229, "step": 10620 }, { "epoch": 0.7256268361002938, "grad_norm": 4.4480462074279785, "learning_rate": 1.294079838407212e-06, "loss": 0.2948, "step": 10621 }, { "epoch": 0.7256951561112249, "grad_norm": 4.149014949798584, "learning_rate": 1.293478584851338e-06, "loss": 0.3537, "step": 10622 }, { "epoch": 0.7257634761221562, "grad_norm": 3.7405831813812256, "learning_rate": 1.2928774393429646e-06, "loss": 0.2134, "step": 10623 }, { "epoch": 0.7258317961330873, "grad_norm": 5.414229869842529, "learning_rate": 1.2922764019115298e-06, "loss": 0.2749, "step": 10624 }, { "epoch": 0.7259001161440186, "grad_norm": 4.812239646911621, "learning_rate": 1.291675472586462e-06, "loss": 0.3024, "step": 10625 }, { "epoch": 0.7259684361549498, "grad_norm": 3.8086013793945312, "learning_rate": 1.2910746513971889e-06, "loss": 0.2749, "step": 10626 }, { "epoch": 0.726036756165881, "grad_norm": 4.1579270362854, "learning_rate": 1.2904739383731293e-06, "loss": 0.3403, "step": 10627 }, { "epoch": 0.7261050761768122, "grad_norm": 3.8064732551574707, "learning_rate": 1.2898733335437027e-06, "loss": 0.2595, "step": 10628 }, { "epoch": 0.7261733961877433, "grad_norm": 5.093757152557373, "learning_rate": 1.2892728369383152e-06, "loss": 0.3544, "step": 10629 }, { "epoch": 0.7262417161986746, "grad_norm": 5.207265377044678, "learning_rate": 1.2886724485863728e-06, "loss": 0.3329, "step": 10630 }, { "epoch": 0.7263100362096058, "grad_norm": 3.7989115715026855, "learning_rate": 1.2880721685172748e-06, "loss": 0.2707, "step": 10631 }, { "epoch": 0.726378356220537, "grad_norm": 2.9882724285125732, "learning_rate": 1.287471996760417e-06, "loss": 0.251, "step": 10632 }, { "epoch": 0.7264466762314682, "grad_norm": 4.050894737243652, "learning_rate": 1.2868719333451838e-06, "loss": 0.3229, "step": 10633 }, { "epoch": 0.7265149962423993, "grad_norm": 5.479717254638672, "learning_rate": 1.2862719783009634e-06, "loss": 0.4048, "step": 10634 }, { "epoch": 0.7265833162533306, "grad_norm": 4.364316463470459, "learning_rate": 1.2856721316571316e-06, "loss": 0.4106, "step": 10635 }, { "epoch": 0.7266516362642618, "grad_norm": 4.421105861663818, "learning_rate": 1.285072393443063e-06, "loss": 0.2493, "step": 10636 }, { "epoch": 0.726719956275193, "grad_norm": 4.397767543792725, "learning_rate": 1.2844727636881233e-06, "loss": 0.2821, "step": 10637 }, { "epoch": 0.7267882762861242, "grad_norm": 4.333643913269043, "learning_rate": 1.283873242421675e-06, "loss": 0.2949, "step": 10638 }, { "epoch": 0.7268565962970555, "grad_norm": 4.913967609405518, "learning_rate": 1.2832738296730764e-06, "loss": 0.3012, "step": 10639 }, { "epoch": 0.7269249163079866, "grad_norm": 5.833409309387207, "learning_rate": 1.2826745254716777e-06, "loss": 0.2232, "step": 10640 }, { "epoch": 0.7269932363189178, "grad_norm": 3.93099308013916, "learning_rate": 1.2820753298468262e-06, "loss": 0.2973, "step": 10641 }, { "epoch": 0.727061556329849, "grad_norm": 4.540489673614502, "learning_rate": 1.2814762428278628e-06, "loss": 0.1965, "step": 10642 }, { "epoch": 0.7271298763407802, "grad_norm": 3.7675716876983643, "learning_rate": 1.2808772644441227e-06, "loss": 0.3059, "step": 10643 }, { "epoch": 0.7271981963517115, "grad_norm": 4.607097625732422, "learning_rate": 1.2802783947249363e-06, "loss": 0.4222, "step": 10644 }, { "epoch": 0.7272665163626426, "grad_norm": 3.2244484424591064, "learning_rate": 1.279679633699631e-06, "loss": 0.2428, "step": 10645 }, { "epoch": 0.7273348363735738, "grad_norm": 2.9923267364501953, "learning_rate": 1.2790809813975226e-06, "loss": 0.1557, "step": 10646 }, { "epoch": 0.727403156384505, "grad_norm": 4.710912704467773, "learning_rate": 1.2784824378479266e-06, "loss": 0.3748, "step": 10647 }, { "epoch": 0.7274714763954362, "grad_norm": 4.31955099105835, "learning_rate": 1.277884003080154e-06, "loss": 0.2929, "step": 10648 }, { "epoch": 0.7275397964063675, "grad_norm": 3.9598281383514404, "learning_rate": 1.2772856771235087e-06, "loss": 0.2304, "step": 10649 }, { "epoch": 0.7276081164172986, "grad_norm": 3.555100202560425, "learning_rate": 1.2766874600072868e-06, "loss": 0.1792, "step": 10650 }, { "epoch": 0.7276764364282299, "grad_norm": 4.018316268920898, "learning_rate": 1.2760893517607823e-06, "loss": 0.2162, "step": 10651 }, { "epoch": 0.727744756439161, "grad_norm": 3.381484270095825, "learning_rate": 1.2754913524132833e-06, "loss": 0.2535, "step": 10652 }, { "epoch": 0.7278130764500922, "grad_norm": 4.32705020904541, "learning_rate": 1.2748934619940721e-06, "loss": 0.2373, "step": 10653 }, { "epoch": 0.7278813964610235, "grad_norm": 4.23558235168457, "learning_rate": 1.2742956805324259e-06, "loss": 0.3205, "step": 10654 }, { "epoch": 0.7279497164719546, "grad_norm": 3.5283026695251465, "learning_rate": 1.2736980080576164e-06, "loss": 0.292, "step": 10655 }, { "epoch": 0.7280180364828859, "grad_norm": 4.522618770599365, "learning_rate": 1.2731004445989102e-06, "loss": 0.3199, "step": 10656 }, { "epoch": 0.728086356493817, "grad_norm": 4.228614807128906, "learning_rate": 1.2725029901855692e-06, "loss": 0.2341, "step": 10657 }, { "epoch": 0.7281546765047482, "grad_norm": 4.008824825286865, "learning_rate": 1.2719056448468472e-06, "loss": 0.2913, "step": 10658 }, { "epoch": 0.7282229965156795, "grad_norm": 4.0394744873046875, "learning_rate": 1.2713084086119953e-06, "loss": 0.2338, "step": 10659 }, { "epoch": 0.7282913165266106, "grad_norm": 4.494678020477295, "learning_rate": 1.270711281510259e-06, "loss": 0.2698, "step": 10660 }, { "epoch": 0.7283596365375419, "grad_norm": 3.888289451599121, "learning_rate": 1.2701142635708783e-06, "loss": 0.3154, "step": 10661 }, { "epoch": 0.728427956548473, "grad_norm": 4.639837741851807, "learning_rate": 1.2695173548230868e-06, "loss": 0.2412, "step": 10662 }, { "epoch": 0.7284962765594043, "grad_norm": 4.080217361450195, "learning_rate": 1.268920555296114e-06, "loss": 0.2384, "step": 10663 }, { "epoch": 0.7285645965703355, "grad_norm": 4.122745513916016, "learning_rate": 1.2683238650191837e-06, "loss": 0.1985, "step": 10664 }, { "epoch": 0.7286329165812666, "grad_norm": 4.495713710784912, "learning_rate": 1.267727284021515e-06, "loss": 0.2086, "step": 10665 }, { "epoch": 0.7287012365921979, "grad_norm": 3.588806390762329, "learning_rate": 1.267130812332318e-06, "loss": 0.2341, "step": 10666 }, { "epoch": 0.728769556603129, "grad_norm": 3.251931667327881, "learning_rate": 1.2665344499808032e-06, "loss": 0.1968, "step": 10667 }, { "epoch": 0.7288378766140603, "grad_norm": 3.8511569499969482, "learning_rate": 1.265938196996172e-06, "loss": 0.1965, "step": 10668 }, { "epoch": 0.7289061966249915, "grad_norm": 5.241744518280029, "learning_rate": 1.2653420534076227e-06, "loss": 0.3772, "step": 10669 }, { "epoch": 0.7289745166359226, "grad_norm": 5.686784744262695, "learning_rate": 1.2647460192443443e-06, "loss": 0.2635, "step": 10670 }, { "epoch": 0.7290428366468539, "grad_norm": 5.20137357711792, "learning_rate": 1.264150094535524e-06, "loss": 0.2589, "step": 10671 }, { "epoch": 0.729111156657785, "grad_norm": 4.458532333374023, "learning_rate": 1.2635542793103428e-06, "loss": 0.3031, "step": 10672 }, { "epoch": 0.7291794766687163, "grad_norm": 5.021819114685059, "learning_rate": 1.2629585735979764e-06, "loss": 0.2916, "step": 10673 }, { "epoch": 0.7292477966796475, "grad_norm": 3.2088022232055664, "learning_rate": 1.2623629774275945e-06, "loss": 0.2399, "step": 10674 }, { "epoch": 0.7293161166905787, "grad_norm": 3.0277223587036133, "learning_rate": 1.2617674908283624e-06, "loss": 0.1661, "step": 10675 }, { "epoch": 0.7293844367015099, "grad_norm": 6.807760238647461, "learning_rate": 1.261172113829439e-06, "loss": 0.333, "step": 10676 }, { "epoch": 0.729452756712441, "grad_norm": 3.7199833393096924, "learning_rate": 1.2605768464599803e-06, "loss": 0.281, "step": 10677 }, { "epoch": 0.7295210767233723, "grad_norm": 4.485163688659668, "learning_rate": 1.2599816887491317e-06, "loss": 0.3416, "step": 10678 }, { "epoch": 0.7295893967343035, "grad_norm": 4.554069995880127, "learning_rate": 1.2593866407260377e-06, "loss": 0.2712, "step": 10679 }, { "epoch": 0.7296577167452347, "grad_norm": 5.5011372566223145, "learning_rate": 1.2587917024198358e-06, "loss": 0.3196, "step": 10680 }, { "epoch": 0.7297260367561659, "grad_norm": 4.1948137283325195, "learning_rate": 1.2581968738596616e-06, "loss": 0.3593, "step": 10681 }, { "epoch": 0.729794356767097, "grad_norm": 3.616600751876831, "learning_rate": 1.2576021550746386e-06, "loss": 0.25, "step": 10682 }, { "epoch": 0.7298626767780283, "grad_norm": 4.8242669105529785, "learning_rate": 1.2570075460938903e-06, "loss": 0.3275, "step": 10683 }, { "epoch": 0.7299309967889595, "grad_norm": 3.4053142070770264, "learning_rate": 1.2564130469465325e-06, "loss": 0.2082, "step": 10684 }, { "epoch": 0.7299993167998907, "grad_norm": 3.525813579559326, "learning_rate": 1.2558186576616777e-06, "loss": 0.19, "step": 10685 }, { "epoch": 0.7300676368108219, "grad_norm": 4.965883731842041, "learning_rate": 1.2552243782684281e-06, "loss": 0.3349, "step": 10686 }, { "epoch": 0.7301359568217531, "grad_norm": 4.720338821411133, "learning_rate": 1.2546302087958877e-06, "loss": 0.3006, "step": 10687 }, { "epoch": 0.7302042768326843, "grad_norm": 3.974229574203491, "learning_rate": 1.2540361492731496e-06, "loss": 0.3232, "step": 10688 }, { "epoch": 0.7302725968436155, "grad_norm": 3.6856911182403564, "learning_rate": 1.2534421997293051e-06, "loss": 0.3009, "step": 10689 }, { "epoch": 0.7303409168545467, "grad_norm": 3.5732059478759766, "learning_rate": 1.2528483601934363e-06, "loss": 0.252, "step": 10690 }, { "epoch": 0.7304092368654779, "grad_norm": 6.204128742218018, "learning_rate": 1.2522546306946219e-06, "loss": 0.3415, "step": 10691 }, { "epoch": 0.7304775568764091, "grad_norm": 4.846205234527588, "learning_rate": 1.2516610112619362e-06, "loss": 0.276, "step": 10692 }, { "epoch": 0.7305458768873403, "grad_norm": 4.932292938232422, "learning_rate": 1.2510675019244466e-06, "loss": 0.299, "step": 10693 }, { "epoch": 0.7306141968982715, "grad_norm": 3.3716013431549072, "learning_rate": 1.2504741027112165e-06, "loss": 0.281, "step": 10694 }, { "epoch": 0.7306825169092027, "grad_norm": 3.4609742164611816, "learning_rate": 1.2498808136513025e-06, "loss": 0.2766, "step": 10695 }, { "epoch": 0.7307508369201339, "grad_norm": 4.3109307289123535, "learning_rate": 1.2492876347737562e-06, "loss": 0.224, "step": 10696 }, { "epoch": 0.7308191569310651, "grad_norm": 4.328110694885254, "learning_rate": 1.2486945661076255e-06, "loss": 0.2931, "step": 10697 }, { "epoch": 0.7308874769419963, "grad_norm": 4.0095720291137695, "learning_rate": 1.2481016076819492e-06, "loss": 0.275, "step": 10698 }, { "epoch": 0.7309557969529276, "grad_norm": 3.7043113708496094, "learning_rate": 1.2475087595257638e-06, "loss": 0.2949, "step": 10699 }, { "epoch": 0.7310241169638587, "grad_norm": 3.448695659637451, "learning_rate": 1.2469160216680984e-06, "loss": 0.3436, "step": 10700 }, { "epoch": 0.7310924369747899, "grad_norm": 4.475603103637695, "learning_rate": 1.2463233941379816e-06, "loss": 0.3464, "step": 10701 }, { "epoch": 0.7311607569857211, "grad_norm": 4.190629482269287, "learning_rate": 1.2457308769644287e-06, "loss": 0.3154, "step": 10702 }, { "epoch": 0.7312290769966523, "grad_norm": 3.5446937084198, "learning_rate": 1.2451384701764551e-06, "loss": 0.2819, "step": 10703 }, { "epoch": 0.7312973970075836, "grad_norm": 3.983830690383911, "learning_rate": 1.2445461738030698e-06, "loss": 0.2953, "step": 10704 }, { "epoch": 0.7313657170185147, "grad_norm": 3.499936103820801, "learning_rate": 1.2439539878732767e-06, "loss": 0.1535, "step": 10705 }, { "epoch": 0.7314340370294459, "grad_norm": 3.7811343669891357, "learning_rate": 1.2433619124160701e-06, "loss": 0.2275, "step": 10706 }, { "epoch": 0.7315023570403771, "grad_norm": 5.238065242767334, "learning_rate": 1.2427699474604458e-06, "loss": 0.2684, "step": 10707 }, { "epoch": 0.7315706770513083, "grad_norm": 4.012139797210693, "learning_rate": 1.2421780930353899e-06, "loss": 0.2832, "step": 10708 }, { "epoch": 0.7316389970622396, "grad_norm": 2.8191425800323486, "learning_rate": 1.2415863491698835e-06, "loss": 0.2051, "step": 10709 }, { "epoch": 0.7317073170731707, "grad_norm": 4.881267547607422, "learning_rate": 1.2409947158929047e-06, "loss": 0.4403, "step": 10710 }, { "epoch": 0.731775637084102, "grad_norm": 3.04982590675354, "learning_rate": 1.240403193233421e-06, "loss": 0.2187, "step": 10711 }, { "epoch": 0.7318439570950331, "grad_norm": 3.4805104732513428, "learning_rate": 1.2398117812203995e-06, "loss": 0.2834, "step": 10712 }, { "epoch": 0.7319122771059643, "grad_norm": 6.125149726867676, "learning_rate": 1.2392204798827995e-06, "loss": 0.3975, "step": 10713 }, { "epoch": 0.7319805971168956, "grad_norm": 3.5855019092559814, "learning_rate": 1.2386292892495756e-06, "loss": 0.1584, "step": 10714 }, { "epoch": 0.7320489171278267, "grad_norm": 4.598171710968018, "learning_rate": 1.2380382093496775e-06, "loss": 0.2737, "step": 10715 }, { "epoch": 0.732117237138758, "grad_norm": 2.929804801940918, "learning_rate": 1.2374472402120478e-06, "loss": 0.2278, "step": 10716 }, { "epoch": 0.7321855571496891, "grad_norm": 4.624726295471191, "learning_rate": 1.2368563818656256e-06, "loss": 0.3786, "step": 10717 }, { "epoch": 0.7322538771606203, "grad_norm": 4.8661041259765625, "learning_rate": 1.2362656343393443e-06, "loss": 0.336, "step": 10718 }, { "epoch": 0.7323221971715516, "grad_norm": 4.522604465484619, "learning_rate": 1.2356749976621277e-06, "loss": 0.3676, "step": 10719 }, { "epoch": 0.7323905171824827, "grad_norm": 4.0485310554504395, "learning_rate": 1.2350844718629018e-06, "loss": 0.3204, "step": 10720 }, { "epoch": 0.732458837193414, "grad_norm": 4.931859493255615, "learning_rate": 1.2344940569705812e-06, "loss": 0.2059, "step": 10721 }, { "epoch": 0.7325271572043451, "grad_norm": 4.058084011077881, "learning_rate": 1.2339037530140784e-06, "loss": 0.2402, "step": 10722 }, { "epoch": 0.7325954772152764, "grad_norm": 4.149600028991699, "learning_rate": 1.2333135600222964e-06, "loss": 0.3129, "step": 10723 }, { "epoch": 0.7326637972262076, "grad_norm": 3.8842408657073975, "learning_rate": 1.2327234780241372e-06, "loss": 0.1935, "step": 10724 }, { "epoch": 0.7327321172371387, "grad_norm": 3.434887647628784, "learning_rate": 1.232133507048495e-06, "loss": 0.2712, "step": 10725 }, { "epoch": 0.73280043724807, "grad_norm": 2.8636207580566406, "learning_rate": 1.2315436471242598e-06, "loss": 0.2105, "step": 10726 }, { "epoch": 0.7328687572590011, "grad_norm": 3.8629302978515625, "learning_rate": 1.2309538982803143e-06, "loss": 0.2317, "step": 10727 }, { "epoch": 0.7329370772699324, "grad_norm": 4.282279014587402, "learning_rate": 1.2303642605455382e-06, "loss": 0.2815, "step": 10728 }, { "epoch": 0.7330053972808636, "grad_norm": 3.985490322113037, "learning_rate": 1.2297747339488037e-06, "loss": 0.2439, "step": 10729 }, { "epoch": 0.7330737172917947, "grad_norm": 3.2274057865142822, "learning_rate": 1.2291853185189795e-06, "loss": 0.2348, "step": 10730 }, { "epoch": 0.733142037302726, "grad_norm": 3.039534330368042, "learning_rate": 1.228596014284926e-06, "loss": 0.2511, "step": 10731 }, { "epoch": 0.7332103573136571, "grad_norm": 4.832247734069824, "learning_rate": 1.2280068212755004e-06, "loss": 0.2337, "step": 10732 }, { "epoch": 0.7332786773245884, "grad_norm": 4.047199726104736, "learning_rate": 1.227417739519553e-06, "loss": 0.2899, "step": 10733 }, { "epoch": 0.7333469973355196, "grad_norm": 4.487539291381836, "learning_rate": 1.2268287690459334e-06, "loss": 0.3176, "step": 10734 }, { "epoch": 0.7334153173464508, "grad_norm": 4.726891994476318, "learning_rate": 1.2262399098834779e-06, "loss": 0.258, "step": 10735 }, { "epoch": 0.733483637357382, "grad_norm": 3.697512626647949, "learning_rate": 1.2256511620610226e-06, "loss": 0.3276, "step": 10736 }, { "epoch": 0.7335519573683131, "grad_norm": 3.665544033050537, "learning_rate": 1.225062525607397e-06, "loss": 0.2998, "step": 10737 }, { "epoch": 0.7336202773792444, "grad_norm": 4.816009998321533, "learning_rate": 1.2244740005514265e-06, "loss": 0.2526, "step": 10738 }, { "epoch": 0.7336885973901756, "grad_norm": 5.722267150878906, "learning_rate": 1.2238855869219258e-06, "loss": 0.2793, "step": 10739 }, { "epoch": 0.7337569174011068, "grad_norm": 3.3807075023651123, "learning_rate": 1.2232972847477117e-06, "loss": 0.2239, "step": 10740 }, { "epoch": 0.733825237412038, "grad_norm": 4.027153968811035, "learning_rate": 1.2227090940575903e-06, "loss": 0.2383, "step": 10741 }, { "epoch": 0.7338935574229691, "grad_norm": 4.327499866485596, "learning_rate": 1.222121014880365e-06, "loss": 0.1915, "step": 10742 }, { "epoch": 0.7339618774339004, "grad_norm": 3.980224609375, "learning_rate": 1.2215330472448304e-06, "loss": 0.2771, "step": 10743 }, { "epoch": 0.7340301974448316, "grad_norm": 5.389658451080322, "learning_rate": 1.2209451911797787e-06, "loss": 0.2679, "step": 10744 }, { "epoch": 0.7340985174557628, "grad_norm": 4.7582550048828125, "learning_rate": 1.2203574467139956e-06, "loss": 0.3177, "step": 10745 }, { "epoch": 0.734166837466694, "grad_norm": 4.578681468963623, "learning_rate": 1.2197698138762613e-06, "loss": 0.2979, "step": 10746 }, { "epoch": 0.7342351574776252, "grad_norm": 3.230897903442383, "learning_rate": 1.2191822926953512e-06, "loss": 0.2171, "step": 10747 }, { "epoch": 0.7343034774885564, "grad_norm": 4.8726348876953125, "learning_rate": 1.2185948832000339e-06, "loss": 0.3696, "step": 10748 }, { "epoch": 0.7343717974994876, "grad_norm": 4.897977352142334, "learning_rate": 1.2180075854190737e-06, "loss": 0.296, "step": 10749 }, { "epoch": 0.7344401175104188, "grad_norm": 3.59417462348938, "learning_rate": 1.2174203993812302e-06, "loss": 0.2093, "step": 10750 }, { "epoch": 0.73450843752135, "grad_norm": 4.385024070739746, "learning_rate": 1.2168333251152537e-06, "loss": 0.2857, "step": 10751 }, { "epoch": 0.7345767575322812, "grad_norm": 4.90126895904541, "learning_rate": 1.216246362649893e-06, "loss": 0.2729, "step": 10752 }, { "epoch": 0.7346450775432124, "grad_norm": 3.4411609172821045, "learning_rate": 1.215659512013889e-06, "loss": 0.2881, "step": 10753 }, { "epoch": 0.7347133975541436, "grad_norm": 4.158459186553955, "learning_rate": 1.2150727732359821e-06, "loss": 0.2659, "step": 10754 }, { "epoch": 0.7347817175650748, "grad_norm": 3.8781940937042236, "learning_rate": 1.214486146344899e-06, "loss": 0.217, "step": 10755 }, { "epoch": 0.734850037576006, "grad_norm": 3.8981454372406006, "learning_rate": 1.213899631369367e-06, "loss": 0.3353, "step": 10756 }, { "epoch": 0.7349183575869372, "grad_norm": 4.860176086425781, "learning_rate": 1.2133132283381057e-06, "loss": 0.3086, "step": 10757 }, { "epoch": 0.7349866775978684, "grad_norm": 3.714625835418701, "learning_rate": 1.2127269372798303e-06, "loss": 0.1759, "step": 10758 }, { "epoch": 0.7350549976087997, "grad_norm": 3.4155142307281494, "learning_rate": 1.2121407582232496e-06, "loss": 0.216, "step": 10759 }, { "epoch": 0.7351233176197308, "grad_norm": 4.51765775680542, "learning_rate": 1.2115546911970673e-06, "loss": 0.1708, "step": 10760 }, { "epoch": 0.735191637630662, "grad_norm": 5.439807891845703, "learning_rate": 1.210968736229982e-06, "loss": 0.3367, "step": 10761 }, { "epoch": 0.7352599576415932, "grad_norm": 3.056466817855835, "learning_rate": 1.2103828933506862e-06, "loss": 0.2857, "step": 10762 }, { "epoch": 0.7353282776525244, "grad_norm": 5.640998840332031, "learning_rate": 1.2097971625878662e-06, "loss": 0.2515, "step": 10763 }, { "epoch": 0.7353965976634557, "grad_norm": 4.353093147277832, "learning_rate": 1.2092115439702043e-06, "loss": 0.3536, "step": 10764 }, { "epoch": 0.7354649176743868, "grad_norm": 3.4080066680908203, "learning_rate": 1.2086260375263764e-06, "loss": 0.2015, "step": 10765 }, { "epoch": 0.735533237685318, "grad_norm": 4.158008575439453, "learning_rate": 1.2080406432850533e-06, "loss": 0.3073, "step": 10766 }, { "epoch": 0.7356015576962492, "grad_norm": 3.455357551574707, "learning_rate": 1.2074553612749006e-06, "loss": 0.242, "step": 10767 }, { "epoch": 0.7356698777071804, "grad_norm": 5.00103759765625, "learning_rate": 1.2068701915245779e-06, "loss": 0.342, "step": 10768 }, { "epoch": 0.7357381977181117, "grad_norm": 2.472304582595825, "learning_rate": 1.2062851340627387e-06, "loss": 0.2206, "step": 10769 }, { "epoch": 0.7358065177290428, "grad_norm": 3.3818819522857666, "learning_rate": 1.2057001889180338e-06, "loss": 0.227, "step": 10770 }, { "epoch": 0.7358748377399741, "grad_norm": 3.6755142211914062, "learning_rate": 1.2051153561191038e-06, "loss": 0.2877, "step": 10771 }, { "epoch": 0.7359431577509052, "grad_norm": 4.1440510749816895, "learning_rate": 1.2045306356945862e-06, "loss": 0.2469, "step": 10772 }, { "epoch": 0.7360114777618364, "grad_norm": 4.657116889953613, "learning_rate": 1.2039460276731157e-06, "loss": 0.2909, "step": 10773 }, { "epoch": 0.7360797977727677, "grad_norm": 5.132954120635986, "learning_rate": 1.203361532083318e-06, "loss": 0.2811, "step": 10774 }, { "epoch": 0.7361481177836988, "grad_norm": 4.384451389312744, "learning_rate": 1.202777148953815e-06, "loss": 0.2932, "step": 10775 }, { "epoch": 0.7362164377946301, "grad_norm": 3.8438589572906494, "learning_rate": 1.2021928783132202e-06, "loss": 0.1968, "step": 10776 }, { "epoch": 0.7362847578055612, "grad_norm": 4.562028408050537, "learning_rate": 1.2016087201901455e-06, "loss": 0.2787, "step": 10777 }, { "epoch": 0.7363530778164924, "grad_norm": 5.292978286743164, "learning_rate": 1.2010246746131948e-06, "loss": 0.3103, "step": 10778 }, { "epoch": 0.7364213978274237, "grad_norm": 4.434116840362549, "learning_rate": 1.2004407416109677e-06, "loss": 0.2511, "step": 10779 }, { "epoch": 0.7364897178383548, "grad_norm": 6.411589622497559, "learning_rate": 1.1998569212120581e-06, "loss": 0.3922, "step": 10780 }, { "epoch": 0.7365580378492861, "grad_norm": 3.2016007900238037, "learning_rate": 1.1992732134450532e-06, "loss": 0.2782, "step": 10781 }, { "epoch": 0.7366263578602172, "grad_norm": 3.9222919940948486, "learning_rate": 1.1986896183385365e-06, "loss": 0.2556, "step": 10782 }, { "epoch": 0.7366946778711485, "grad_norm": 4.849614143371582, "learning_rate": 1.1981061359210862e-06, "loss": 0.2676, "step": 10783 }, { "epoch": 0.7367629978820797, "grad_norm": 3.986734390258789, "learning_rate": 1.1975227662212712e-06, "loss": 0.2935, "step": 10784 }, { "epoch": 0.7368313178930108, "grad_norm": 3.903510808944702, "learning_rate": 1.1969395092676588e-06, "loss": 0.2664, "step": 10785 }, { "epoch": 0.7368996379039421, "grad_norm": 3.9797844886779785, "learning_rate": 1.1963563650888088e-06, "loss": 0.2843, "step": 10786 }, { "epoch": 0.7369679579148732, "grad_norm": 3.228698253631592, "learning_rate": 1.1957733337132788e-06, "loss": 0.2168, "step": 10787 }, { "epoch": 0.7370362779258045, "grad_norm": 3.229745626449585, "learning_rate": 1.195190415169616e-06, "loss": 0.234, "step": 10788 }, { "epoch": 0.7371045979367357, "grad_norm": 5.2088727951049805, "learning_rate": 1.1946076094863643e-06, "loss": 0.4446, "step": 10789 }, { "epoch": 0.7371729179476668, "grad_norm": 3.4374349117279053, "learning_rate": 1.1940249166920633e-06, "loss": 0.2997, "step": 10790 }, { "epoch": 0.7372412379585981, "grad_norm": 3.935966730117798, "learning_rate": 1.1934423368152465e-06, "loss": 0.2851, "step": 10791 }, { "epoch": 0.7373095579695292, "grad_norm": 3.3881125450134277, "learning_rate": 1.1928598698844376e-06, "loss": 0.2416, "step": 10792 }, { "epoch": 0.7373778779804605, "grad_norm": 4.829874515533447, "learning_rate": 1.1922775159281627e-06, "loss": 0.2758, "step": 10793 }, { "epoch": 0.7374461979913917, "grad_norm": 3.787261486053467, "learning_rate": 1.191695274974936e-06, "loss": 0.2269, "step": 10794 }, { "epoch": 0.7375145180023229, "grad_norm": 4.464798450469971, "learning_rate": 1.19111314705327e-06, "loss": 0.3797, "step": 10795 }, { "epoch": 0.7375828380132541, "grad_norm": 5.445372581481934, "learning_rate": 1.1905311321916678e-06, "loss": 0.341, "step": 10796 }, { "epoch": 0.7376511580241852, "grad_norm": 4.230011940002441, "learning_rate": 1.1899492304186296e-06, "loss": 0.2727, "step": 10797 }, { "epoch": 0.7377194780351165, "grad_norm": 3.433645486831665, "learning_rate": 1.1893674417626504e-06, "loss": 0.2401, "step": 10798 }, { "epoch": 0.7377877980460477, "grad_norm": 4.595880508422852, "learning_rate": 1.1887857662522184e-06, "loss": 0.388, "step": 10799 }, { "epoch": 0.7378561180569789, "grad_norm": 4.074073314666748, "learning_rate": 1.188204203915817e-06, "loss": 0.2764, "step": 10800 }, { "epoch": 0.7379244380679101, "grad_norm": 4.28987455368042, "learning_rate": 1.187622754781923e-06, "loss": 0.2276, "step": 10801 }, { "epoch": 0.7379927580788412, "grad_norm": 4.341294288635254, "learning_rate": 1.1870414188790096e-06, "loss": 0.2997, "step": 10802 }, { "epoch": 0.7380610780897725, "grad_norm": 4.720865249633789, "learning_rate": 1.1864601962355434e-06, "loss": 0.2473, "step": 10803 }, { "epoch": 0.7381293981007037, "grad_norm": 4.720569610595703, "learning_rate": 1.185879086879984e-06, "loss": 0.2144, "step": 10804 }, { "epoch": 0.7381977181116349, "grad_norm": 4.435263156890869, "learning_rate": 1.1852980908407856e-06, "loss": 0.2628, "step": 10805 }, { "epoch": 0.7382660381225661, "grad_norm": 4.554040431976318, "learning_rate": 1.1847172081464012e-06, "loss": 0.3544, "step": 10806 }, { "epoch": 0.7383343581334973, "grad_norm": 4.031370162963867, "learning_rate": 1.1841364388252746e-06, "loss": 0.3226, "step": 10807 }, { "epoch": 0.7384026781444285, "grad_norm": 4.091009616851807, "learning_rate": 1.183555782905843e-06, "loss": 0.3183, "step": 10808 }, { "epoch": 0.7384709981553597, "grad_norm": 4.47030782699585, "learning_rate": 1.1829752404165398e-06, "loss": 0.2555, "step": 10809 }, { "epoch": 0.7385393181662909, "grad_norm": 6.059315204620361, "learning_rate": 1.182394811385793e-06, "loss": 0.3556, "step": 10810 }, { "epoch": 0.7386076381772221, "grad_norm": 2.6603100299835205, "learning_rate": 1.1818144958420249e-06, "loss": 0.2452, "step": 10811 }, { "epoch": 0.7386759581881533, "grad_norm": 5.263760566711426, "learning_rate": 1.1812342938136518e-06, "loss": 0.3774, "step": 10812 }, { "epoch": 0.7387442781990845, "grad_norm": 4.718023777008057, "learning_rate": 1.1806542053290848e-06, "loss": 0.2378, "step": 10813 }, { "epoch": 0.7388125982100157, "grad_norm": 4.472245216369629, "learning_rate": 1.1800742304167288e-06, "loss": 0.3553, "step": 10814 }, { "epoch": 0.7388809182209469, "grad_norm": 3.8779826164245605, "learning_rate": 1.1794943691049856e-06, "loss": 0.2717, "step": 10815 }, { "epoch": 0.7389492382318781, "grad_norm": 4.65954065322876, "learning_rate": 1.1789146214222464e-06, "loss": 0.2734, "step": 10816 }, { "epoch": 0.7390175582428093, "grad_norm": 4.3059563636779785, "learning_rate": 1.1783349873969014e-06, "loss": 0.242, "step": 10817 }, { "epoch": 0.7390858782537405, "grad_norm": 4.896071434020996, "learning_rate": 1.1777554670573336e-06, "loss": 0.3608, "step": 10818 }, { "epoch": 0.7391541982646718, "grad_norm": 4.623600006103516, "learning_rate": 1.177176060431921e-06, "loss": 0.2513, "step": 10819 }, { "epoch": 0.7392225182756029, "grad_norm": 3.7959060668945312, "learning_rate": 1.176596767549035e-06, "loss": 0.2682, "step": 10820 }, { "epoch": 0.7392908382865341, "grad_norm": 6.326565265655518, "learning_rate": 1.1760175884370426e-06, "loss": 0.2831, "step": 10821 }, { "epoch": 0.7393591582974653, "grad_norm": 4.5439066886901855, "learning_rate": 1.1754385231243045e-06, "loss": 0.2935, "step": 10822 }, { "epoch": 0.7394274783083965, "grad_norm": 3.366994857788086, "learning_rate": 1.1748595716391767e-06, "loss": 0.2569, "step": 10823 }, { "epoch": 0.7394957983193278, "grad_norm": 4.787656307220459, "learning_rate": 1.1742807340100068e-06, "loss": 0.2903, "step": 10824 }, { "epoch": 0.7395641183302589, "grad_norm": 5.701282501220703, "learning_rate": 1.1737020102651395e-06, "loss": 0.3121, "step": 10825 }, { "epoch": 0.7396324383411901, "grad_norm": 6.379132270812988, "learning_rate": 1.1731234004329154e-06, "loss": 0.3462, "step": 10826 }, { "epoch": 0.7397007583521213, "grad_norm": 4.258517742156982, "learning_rate": 1.1725449045416668e-06, "loss": 0.2741, "step": 10827 }, { "epoch": 0.7397690783630525, "grad_norm": 2.8713245391845703, "learning_rate": 1.1719665226197194e-06, "loss": 0.2237, "step": 10828 }, { "epoch": 0.7398373983739838, "grad_norm": 5.5549211502075195, "learning_rate": 1.1713882546953968e-06, "loss": 0.3223, "step": 10829 }, { "epoch": 0.7399057183849149, "grad_norm": 3.892517328262329, "learning_rate": 1.1708101007970138e-06, "loss": 0.2711, "step": 10830 }, { "epoch": 0.7399740383958462, "grad_norm": 4.518335342407227, "learning_rate": 1.1702320609528827e-06, "loss": 0.3588, "step": 10831 }, { "epoch": 0.7400423584067773, "grad_norm": 4.368980407714844, "learning_rate": 1.169654135191307e-06, "loss": 0.2734, "step": 10832 }, { "epoch": 0.7401106784177085, "grad_norm": 4.5847368240356445, "learning_rate": 1.1690763235405874e-06, "loss": 0.301, "step": 10833 }, { "epoch": 0.7401789984286398, "grad_norm": 4.985349178314209, "learning_rate": 1.168498626029017e-06, "loss": 0.3123, "step": 10834 }, { "epoch": 0.7402473184395709, "grad_norm": 3.8844447135925293, "learning_rate": 1.1679210426848856e-06, "loss": 0.1871, "step": 10835 }, { "epoch": 0.7403156384505022, "grad_norm": 4.669567108154297, "learning_rate": 1.1673435735364738e-06, "loss": 0.2285, "step": 10836 }, { "epoch": 0.7403839584614333, "grad_norm": 4.650909900665283, "learning_rate": 1.1667662186120595e-06, "loss": 0.2013, "step": 10837 }, { "epoch": 0.7404522784723645, "grad_norm": 4.820308685302734, "learning_rate": 1.1661889779399146e-06, "loss": 0.2671, "step": 10838 }, { "epoch": 0.7405205984832958, "grad_norm": 4.253692150115967, "learning_rate": 1.1656118515483037e-06, "loss": 0.2266, "step": 10839 }, { "epoch": 0.7405889184942269, "grad_norm": 3.6953134536743164, "learning_rate": 1.1650348394654905e-06, "loss": 0.3311, "step": 10840 }, { "epoch": 0.7406572385051582, "grad_norm": 3.749399423599243, "learning_rate": 1.1644579417197265e-06, "loss": 0.3351, "step": 10841 }, { "epoch": 0.7407255585160893, "grad_norm": 4.173879623413086, "learning_rate": 1.1638811583392621e-06, "loss": 0.2814, "step": 10842 }, { "epoch": 0.7407938785270206, "grad_norm": 4.988337993621826, "learning_rate": 1.1633044893523405e-06, "loss": 0.3265, "step": 10843 }, { "epoch": 0.7408621985379518, "grad_norm": 3.5629806518554688, "learning_rate": 1.162727934787201e-06, "loss": 0.2051, "step": 10844 }, { "epoch": 0.7409305185488829, "grad_norm": 4.372827529907227, "learning_rate": 1.1621514946720727e-06, "loss": 0.3116, "step": 10845 }, { "epoch": 0.7409988385598142, "grad_norm": 2.915529489517212, "learning_rate": 1.1615751690351852e-06, "loss": 0.2387, "step": 10846 }, { "epoch": 0.7410671585707453, "grad_norm": 4.172065734863281, "learning_rate": 1.160998957904759e-06, "loss": 0.2699, "step": 10847 }, { "epoch": 0.7411354785816766, "grad_norm": 3.6107892990112305, "learning_rate": 1.1604228613090107e-06, "loss": 0.2436, "step": 10848 }, { "epoch": 0.7412037985926078, "grad_norm": 4.050515651702881, "learning_rate": 1.159846879276148e-06, "loss": 0.1391, "step": 10849 }, { "epoch": 0.741272118603539, "grad_norm": 6.051455020904541, "learning_rate": 1.159271011834376e-06, "loss": 0.218, "step": 10850 }, { "epoch": 0.7413404386144702, "grad_norm": 3.837930202484131, "learning_rate": 1.158695259011894e-06, "loss": 0.2334, "step": 10851 }, { "epoch": 0.7414087586254013, "grad_norm": 3.7255351543426514, "learning_rate": 1.1581196208368942e-06, "loss": 0.1805, "step": 10852 }, { "epoch": 0.7414770786363326, "grad_norm": 4.649930000305176, "learning_rate": 1.1575440973375648e-06, "loss": 0.2337, "step": 10853 }, { "epoch": 0.7415453986472638, "grad_norm": 3.2602884769439697, "learning_rate": 1.1569686885420878e-06, "loss": 0.225, "step": 10854 }, { "epoch": 0.741613718658195, "grad_norm": 4.7166852951049805, "learning_rate": 1.1563933944786384e-06, "loss": 0.1964, "step": 10855 }, { "epoch": 0.7416820386691262, "grad_norm": 4.307231426239014, "learning_rate": 1.1558182151753897e-06, "loss": 0.2464, "step": 10856 }, { "epoch": 0.7417503586800573, "grad_norm": 5.6439948081970215, "learning_rate": 1.1552431506605036e-06, "loss": 0.2891, "step": 10857 }, { "epoch": 0.7418186786909886, "grad_norm": 4.318972587585449, "learning_rate": 1.1546682009621394e-06, "loss": 0.2983, "step": 10858 }, { "epoch": 0.7418869987019198, "grad_norm": 4.517829418182373, "learning_rate": 1.1540933661084536e-06, "loss": 0.2544, "step": 10859 }, { "epoch": 0.741955318712851, "grad_norm": 4.3473687171936035, "learning_rate": 1.153518646127594e-06, "loss": 0.2904, "step": 10860 }, { "epoch": 0.7420236387237822, "grad_norm": 4.147871494293213, "learning_rate": 1.152944041047701e-06, "loss": 0.3008, "step": 10861 }, { "epoch": 0.7420919587347135, "grad_norm": 4.632801532745361, "learning_rate": 1.152369550896913e-06, "loss": 0.421, "step": 10862 }, { "epoch": 0.7421602787456446, "grad_norm": 4.444302558898926, "learning_rate": 1.1517951757033606e-06, "loss": 0.2555, "step": 10863 }, { "epoch": 0.7422285987565758, "grad_norm": 3.3671603202819824, "learning_rate": 1.1512209154951698e-06, "loss": 0.1766, "step": 10864 }, { "epoch": 0.742296918767507, "grad_norm": 4.3976922035217285, "learning_rate": 1.1506467703004602e-06, "loss": 0.2843, "step": 10865 }, { "epoch": 0.7423652387784382, "grad_norm": 4.73635196685791, "learning_rate": 1.150072740147347e-06, "loss": 0.2451, "step": 10866 }, { "epoch": 0.7424335587893695, "grad_norm": 3.504774570465088, "learning_rate": 1.1494988250639377e-06, "loss": 0.2099, "step": 10867 }, { "epoch": 0.7425018788003006, "grad_norm": 4.594570636749268, "learning_rate": 1.1489250250783378e-06, "loss": 0.2352, "step": 10868 }, { "epoch": 0.7425701988112318, "grad_norm": 3.725356101989746, "learning_rate": 1.1483513402186414e-06, "loss": 0.1961, "step": 10869 }, { "epoch": 0.742638518822163, "grad_norm": 4.864328861236572, "learning_rate": 1.1477777705129423e-06, "loss": 0.2453, "step": 10870 }, { "epoch": 0.7427068388330942, "grad_norm": 4.548094272613525, "learning_rate": 1.1472043159893267e-06, "loss": 0.3775, "step": 10871 }, { "epoch": 0.7427751588440255, "grad_norm": 5.676962375640869, "learning_rate": 1.1466309766758744e-06, "loss": 0.2516, "step": 10872 }, { "epoch": 0.7428434788549566, "grad_norm": 4.081577777862549, "learning_rate": 1.1460577526006607e-06, "loss": 0.3414, "step": 10873 }, { "epoch": 0.7429117988658879, "grad_norm": 4.2125749588012695, "learning_rate": 1.1454846437917551e-06, "loss": 0.3581, "step": 10874 }, { "epoch": 0.742980118876819, "grad_norm": 3.049407958984375, "learning_rate": 1.144911650277221e-06, "loss": 0.3027, "step": 10875 }, { "epoch": 0.7430484388877502, "grad_norm": 3.612272024154663, "learning_rate": 1.1443387720851173e-06, "loss": 0.2195, "step": 10876 }, { "epoch": 0.7431167588986815, "grad_norm": 4.29765510559082, "learning_rate": 1.1437660092434944e-06, "loss": 0.3107, "step": 10877 }, { "epoch": 0.7431850789096126, "grad_norm": 3.28756046295166, "learning_rate": 1.1431933617803984e-06, "loss": 0.2456, "step": 10878 }, { "epoch": 0.7432533989205439, "grad_norm": 4.590488433837891, "learning_rate": 1.1426208297238737e-06, "loss": 0.251, "step": 10879 }, { "epoch": 0.743321718931475, "grad_norm": 4.011130332946777, "learning_rate": 1.1420484131019546e-06, "loss": 0.2474, "step": 10880 }, { "epoch": 0.7433900389424062, "grad_norm": 4.607738018035889, "learning_rate": 1.1414761119426694e-06, "loss": 0.2261, "step": 10881 }, { "epoch": 0.7434583589533375, "grad_norm": 4.134452819824219, "learning_rate": 1.1409039262740427e-06, "loss": 0.2604, "step": 10882 }, { "epoch": 0.7435266789642686, "grad_norm": 9.38508129119873, "learning_rate": 1.1403318561240927e-06, "loss": 0.2141, "step": 10883 }, { "epoch": 0.7435949989751999, "grad_norm": 4.482766628265381, "learning_rate": 1.139759901520833e-06, "loss": 0.22, "step": 10884 }, { "epoch": 0.743663318986131, "grad_norm": 3.100984811782837, "learning_rate": 1.13918806249227e-06, "loss": 0.195, "step": 10885 }, { "epoch": 0.7437316389970623, "grad_norm": 4.766585350036621, "learning_rate": 1.1386163390664056e-06, "loss": 0.3653, "step": 10886 }, { "epoch": 0.7437999590079934, "grad_norm": 3.2093520164489746, "learning_rate": 1.1380447312712353e-06, "loss": 0.2634, "step": 10887 }, { "epoch": 0.7438682790189246, "grad_norm": 3.7031030654907227, "learning_rate": 1.1374732391347504e-06, "loss": 0.2609, "step": 10888 }, { "epoch": 0.7439365990298559, "grad_norm": 4.330747127532959, "learning_rate": 1.136901862684933e-06, "loss": 0.3645, "step": 10889 }, { "epoch": 0.744004919040787, "grad_norm": 4.963832378387451, "learning_rate": 1.1363306019497634e-06, "loss": 0.3043, "step": 10890 }, { "epoch": 0.7440732390517183, "grad_norm": 4.420423984527588, "learning_rate": 1.135759456957214e-06, "loss": 0.426, "step": 10891 }, { "epoch": 0.7441415590626494, "grad_norm": 3.3236358165740967, "learning_rate": 1.1351884277352532e-06, "loss": 0.1854, "step": 10892 }, { "epoch": 0.7442098790735806, "grad_norm": 4.4662909507751465, "learning_rate": 1.1346175143118417e-06, "loss": 0.2843, "step": 10893 }, { "epoch": 0.7442781990845119, "grad_norm": 2.9918386936187744, "learning_rate": 1.1340467167149368e-06, "loss": 0.2031, "step": 10894 }, { "epoch": 0.744346519095443, "grad_norm": 5.019748687744141, "learning_rate": 1.1334760349724878e-06, "loss": 0.3328, "step": 10895 }, { "epoch": 0.7444148391063743, "grad_norm": 4.251860618591309, "learning_rate": 1.1329054691124415e-06, "loss": 0.2673, "step": 10896 }, { "epoch": 0.7444831591173054, "grad_norm": 3.4140028953552246, "learning_rate": 1.1323350191627335e-06, "loss": 0.2144, "step": 10897 }, { "epoch": 0.7445514791282367, "grad_norm": 3.7259068489074707, "learning_rate": 1.1317646851512998e-06, "loss": 0.2427, "step": 10898 }, { "epoch": 0.7446197991391679, "grad_norm": 2.88472056388855, "learning_rate": 1.131194467106068e-06, "loss": 0.174, "step": 10899 }, { "epoch": 0.744688119150099, "grad_norm": 5.015794277191162, "learning_rate": 1.130624365054961e-06, "loss": 0.2855, "step": 10900 }, { "epoch": 0.7447564391610303, "grad_norm": 4.036139488220215, "learning_rate": 1.1300543790258923e-06, "loss": 0.2337, "step": 10901 }, { "epoch": 0.7448247591719614, "grad_norm": 4.311898231506348, "learning_rate": 1.1294845090467746e-06, "loss": 0.2355, "step": 10902 }, { "epoch": 0.7448930791828927, "grad_norm": 4.022953987121582, "learning_rate": 1.1289147551455126e-06, "loss": 0.2249, "step": 10903 }, { "epoch": 0.7449613991938239, "grad_norm": 4.5508713722229, "learning_rate": 1.1283451173500054e-06, "loss": 0.2969, "step": 10904 }, { "epoch": 0.745029719204755, "grad_norm": 3.848965883255005, "learning_rate": 1.127775595688147e-06, "loss": 0.2395, "step": 10905 }, { "epoch": 0.7450980392156863, "grad_norm": 3.7202744483947754, "learning_rate": 1.127206190187825e-06, "loss": 0.334, "step": 10906 }, { "epoch": 0.7451663592266174, "grad_norm": 3.830181121826172, "learning_rate": 1.1266369008769222e-06, "loss": 0.2312, "step": 10907 }, { "epoch": 0.7452346792375487, "grad_norm": 3.024582862854004, "learning_rate": 1.1260677277833147e-06, "loss": 0.3303, "step": 10908 }, { "epoch": 0.7453029992484799, "grad_norm": 4.981906890869141, "learning_rate": 1.1254986709348746e-06, "loss": 0.2929, "step": 10909 }, { "epoch": 0.7453713192594111, "grad_norm": 5.516674518585205, "learning_rate": 1.1249297303594654e-06, "loss": 0.2728, "step": 10910 }, { "epoch": 0.7454396392703423, "grad_norm": 3.22162127494812, "learning_rate": 1.1243609060849456e-06, "loss": 0.2544, "step": 10911 }, { "epoch": 0.7455079592812734, "grad_norm": 3.382235050201416, "learning_rate": 1.1237921981391722e-06, "loss": 0.3208, "step": 10912 }, { "epoch": 0.7455762792922047, "grad_norm": 4.302907466888428, "learning_rate": 1.1232236065499927e-06, "loss": 0.2529, "step": 10913 }, { "epoch": 0.7456445993031359, "grad_norm": 4.996603012084961, "learning_rate": 1.1226551313452475e-06, "loss": 0.3572, "step": 10914 }, { "epoch": 0.7457129193140671, "grad_norm": 3.4345648288726807, "learning_rate": 1.1220867725527749e-06, "loss": 0.2286, "step": 10915 }, { "epoch": 0.7457812393249983, "grad_norm": 3.831623077392578, "learning_rate": 1.121518530200405e-06, "loss": 0.2862, "step": 10916 }, { "epoch": 0.7458495593359294, "grad_norm": 4.816682815551758, "learning_rate": 1.1209504043159639e-06, "loss": 0.3268, "step": 10917 }, { "epoch": 0.7459178793468607, "grad_norm": 5.318861484527588, "learning_rate": 1.120382394927271e-06, "loss": 0.2723, "step": 10918 }, { "epoch": 0.7459861993577919, "grad_norm": 4.682007789611816, "learning_rate": 1.1198145020621398e-06, "loss": 0.3358, "step": 10919 }, { "epoch": 0.7460545193687231, "grad_norm": 3.8495726585388184, "learning_rate": 1.1192467257483794e-06, "loss": 0.2543, "step": 10920 }, { "epoch": 0.7461228393796543, "grad_norm": 3.9576783180236816, "learning_rate": 1.118679066013792e-06, "loss": 0.3153, "step": 10921 }, { "epoch": 0.7461911593905856, "grad_norm": 2.932513952255249, "learning_rate": 1.118111522886174e-06, "loss": 0.2276, "step": 10922 }, { "epoch": 0.7462594794015167, "grad_norm": 4.236752033233643, "learning_rate": 1.1175440963933161e-06, "loss": 0.2906, "step": 10923 }, { "epoch": 0.7463277994124479, "grad_norm": 4.474943161010742, "learning_rate": 1.1169767865630043e-06, "loss": 0.3578, "step": 10924 }, { "epoch": 0.7463961194233791, "grad_norm": 3.8218328952789307, "learning_rate": 1.116409593423018e-06, "loss": 0.3093, "step": 10925 }, { "epoch": 0.7464644394343103, "grad_norm": 6.239551067352295, "learning_rate": 1.1158425170011314e-06, "loss": 0.2257, "step": 10926 }, { "epoch": 0.7465327594452416, "grad_norm": 4.442816734313965, "learning_rate": 1.1152755573251125e-06, "loss": 0.27, "step": 10927 }, { "epoch": 0.7466010794561727, "grad_norm": 3.62619686126709, "learning_rate": 1.1147087144227242e-06, "loss": 0.2471, "step": 10928 }, { "epoch": 0.7466693994671039, "grad_norm": 2.7692606449127197, "learning_rate": 1.1141419883217242e-06, "loss": 0.224, "step": 10929 }, { "epoch": 0.7467377194780351, "grad_norm": 4.804455280303955, "learning_rate": 1.113575379049861e-06, "loss": 0.3063, "step": 10930 }, { "epoch": 0.7468060394889663, "grad_norm": 3.871992826461792, "learning_rate": 1.1130088866348804e-06, "loss": 0.2755, "step": 10931 }, { "epoch": 0.7468743594998976, "grad_norm": 6.409246444702148, "learning_rate": 1.1124425111045242e-06, "loss": 0.3634, "step": 10932 }, { "epoch": 0.7469426795108287, "grad_norm": 3.804032325744629, "learning_rate": 1.1118762524865261e-06, "loss": 0.2365, "step": 10933 }, { "epoch": 0.74701099952176, "grad_norm": 4.204543590545654, "learning_rate": 1.111310110808612e-06, "loss": 0.2564, "step": 10934 }, { "epoch": 0.7470793195326911, "grad_norm": 4.199655532836914, "learning_rate": 1.1107440860985058e-06, "loss": 0.3292, "step": 10935 }, { "epoch": 0.7471476395436223, "grad_norm": 3.1896071434020996, "learning_rate": 1.110178178383924e-06, "loss": 0.1697, "step": 10936 }, { "epoch": 0.7472159595545536, "grad_norm": 2.9619832038879395, "learning_rate": 1.1096123876925778e-06, "loss": 0.2145, "step": 10937 }, { "epoch": 0.7472842795654847, "grad_norm": 3.4157121181488037, "learning_rate": 1.1090467140521723e-06, "loss": 0.296, "step": 10938 }, { "epoch": 0.747352599576416, "grad_norm": 2.655975103378296, "learning_rate": 1.1084811574904069e-06, "loss": 0.1695, "step": 10939 }, { "epoch": 0.7474209195873471, "grad_norm": 4.143889904022217, "learning_rate": 1.1079157180349758e-06, "loss": 0.3404, "step": 10940 }, { "epoch": 0.7474892395982783, "grad_norm": 2.878378391265869, "learning_rate": 1.107350395713568e-06, "loss": 0.1441, "step": 10941 }, { "epoch": 0.7475575596092096, "grad_norm": 4.2326340675354, "learning_rate": 1.1067851905538632e-06, "loss": 0.2425, "step": 10942 }, { "epoch": 0.7476258796201407, "grad_norm": 2.98525333404541, "learning_rate": 1.1062201025835398e-06, "loss": 0.201, "step": 10943 }, { "epoch": 0.747694199631072, "grad_norm": 3.80914044380188, "learning_rate": 1.105655131830267e-06, "loss": 0.3573, "step": 10944 }, { "epoch": 0.7477625196420031, "grad_norm": 5.1697001457214355, "learning_rate": 1.1050902783217134e-06, "loss": 0.2522, "step": 10945 }, { "epoch": 0.7478308396529344, "grad_norm": 4.5882182121276855, "learning_rate": 1.1045255420855352e-06, "loss": 0.3128, "step": 10946 }, { "epoch": 0.7478991596638656, "grad_norm": 4.143200874328613, "learning_rate": 1.1039609231493873e-06, "loss": 0.2857, "step": 10947 }, { "epoch": 0.7479674796747967, "grad_norm": 5.3979878425598145, "learning_rate": 1.1033964215409173e-06, "loss": 0.2265, "step": 10948 }, { "epoch": 0.748035799685728, "grad_norm": 3.921900987625122, "learning_rate": 1.1028320372877686e-06, "loss": 0.2241, "step": 10949 }, { "epoch": 0.7481041196966591, "grad_norm": 4.8462114334106445, "learning_rate": 1.1022677704175737e-06, "loss": 0.3028, "step": 10950 }, { "epoch": 0.7481724397075904, "grad_norm": 2.5202927589416504, "learning_rate": 1.1017036209579678e-06, "loss": 0.1977, "step": 10951 }, { "epoch": 0.7482407597185216, "grad_norm": 4.730627536773682, "learning_rate": 1.1011395889365734e-06, "loss": 0.3561, "step": 10952 }, { "epoch": 0.7483090797294527, "grad_norm": 3.9611310958862305, "learning_rate": 1.1005756743810113e-06, "loss": 0.268, "step": 10953 }, { "epoch": 0.748377399740384, "grad_norm": 4.1789937019348145, "learning_rate": 1.100011877318893e-06, "loss": 0.1871, "step": 10954 }, { "epoch": 0.7484457197513151, "grad_norm": 4.979367256164551, "learning_rate": 1.0994481977778268e-06, "loss": 0.2527, "step": 10955 }, { "epoch": 0.7485140397622464, "grad_norm": 3.3101086616516113, "learning_rate": 1.0988846357854151e-06, "loss": 0.2445, "step": 10956 }, { "epoch": 0.7485823597731776, "grad_norm": 4.581556797027588, "learning_rate": 1.098321191369253e-06, "loss": 0.2785, "step": 10957 }, { "epoch": 0.7486506797841088, "grad_norm": 4.730275630950928, "learning_rate": 1.0977578645569321e-06, "loss": 0.3304, "step": 10958 }, { "epoch": 0.74871899979504, "grad_norm": 4.453466892242432, "learning_rate": 1.0971946553760365e-06, "loss": 0.1968, "step": 10959 }, { "epoch": 0.7487873198059711, "grad_norm": 5.587663173675537, "learning_rate": 1.0966315638541447e-06, "loss": 0.3472, "step": 10960 }, { "epoch": 0.7488556398169024, "grad_norm": 3.7663798332214355, "learning_rate": 1.0960685900188314e-06, "loss": 0.222, "step": 10961 }, { "epoch": 0.7489239598278336, "grad_norm": 5.104623794555664, "learning_rate": 1.0955057338976617e-06, "loss": 0.2902, "step": 10962 }, { "epoch": 0.7489922798387648, "grad_norm": 2.671863317489624, "learning_rate": 1.0949429955181981e-06, "loss": 0.1963, "step": 10963 }, { "epoch": 0.749060599849696, "grad_norm": 4.347640037536621, "learning_rate": 1.094380374907995e-06, "loss": 0.2081, "step": 10964 }, { "epoch": 0.7491289198606271, "grad_norm": 4.029059886932373, "learning_rate": 1.0938178720946066e-06, "loss": 0.3518, "step": 10965 }, { "epoch": 0.7491972398715584, "grad_norm": 3.3938112258911133, "learning_rate": 1.0932554871055733e-06, "loss": 0.2564, "step": 10966 }, { "epoch": 0.7492655598824896, "grad_norm": 4.48965311050415, "learning_rate": 1.0926932199684344e-06, "loss": 0.3595, "step": 10967 }, { "epoch": 0.7493338798934208, "grad_norm": 4.287538528442383, "learning_rate": 1.0921310707107233e-06, "loss": 0.2928, "step": 10968 }, { "epoch": 0.749402199904352, "grad_norm": 4.726912021636963, "learning_rate": 1.0915690393599677e-06, "loss": 0.2633, "step": 10969 }, { "epoch": 0.7494705199152832, "grad_norm": 5.118366241455078, "learning_rate": 1.0910071259436855e-06, "loss": 0.4083, "step": 10970 }, { "epoch": 0.7495388399262144, "grad_norm": 4.882385730743408, "learning_rate": 1.0904453304893956e-06, "loss": 0.3485, "step": 10971 }, { "epoch": 0.7496071599371456, "grad_norm": 3.09757661819458, "learning_rate": 1.0898836530246061e-06, "loss": 0.2536, "step": 10972 }, { "epoch": 0.7496754799480768, "grad_norm": 3.3951773643493652, "learning_rate": 1.0893220935768213e-06, "loss": 0.2061, "step": 10973 }, { "epoch": 0.749743799959008, "grad_norm": 4.0710225105285645, "learning_rate": 1.0887606521735404e-06, "loss": 0.2675, "step": 10974 }, { "epoch": 0.7498121199699392, "grad_norm": 5.20920991897583, "learning_rate": 1.0881993288422528e-06, "loss": 0.2563, "step": 10975 }, { "epoch": 0.7498804399808704, "grad_norm": 5.1143341064453125, "learning_rate": 1.087638123610447e-06, "loss": 0.2915, "step": 10976 }, { "epoch": 0.7499487599918016, "grad_norm": 4.468410968780518, "learning_rate": 1.0870770365056033e-06, "loss": 0.2271, "step": 10977 }, { "epoch": 0.7500170800027328, "grad_norm": 3.316624164581299, "learning_rate": 1.0865160675551965e-06, "loss": 0.2407, "step": 10978 }, { "epoch": 0.750085400013664, "grad_norm": 3.9805216789245605, "learning_rate": 1.0859552167866963e-06, "loss": 0.1988, "step": 10979 }, { "epoch": 0.7501537200245952, "grad_norm": 5.2989583015441895, "learning_rate": 1.0853944842275657e-06, "loss": 0.3633, "step": 10980 }, { "epoch": 0.7502220400355264, "grad_norm": 3.957942247390747, "learning_rate": 1.0848338699052621e-06, "loss": 0.3854, "step": 10981 }, { "epoch": 0.7502903600464577, "grad_norm": 5.175034046173096, "learning_rate": 1.084273373847239e-06, "loss": 0.2843, "step": 10982 }, { "epoch": 0.7503586800573888, "grad_norm": 4.186328887939453, "learning_rate": 1.0837129960809395e-06, "loss": 0.3963, "step": 10983 }, { "epoch": 0.75042700006832, "grad_norm": 4.970858573913574, "learning_rate": 1.0831527366338042e-06, "loss": 0.2905, "step": 10984 }, { "epoch": 0.7504953200792512, "grad_norm": 3.2145769596099854, "learning_rate": 1.0825925955332694e-06, "loss": 0.2417, "step": 10985 }, { "epoch": 0.7505636400901824, "grad_norm": 4.254011631011963, "learning_rate": 1.0820325728067641e-06, "loss": 0.3018, "step": 10986 }, { "epoch": 0.7506319601011137, "grad_norm": 3.840825080871582, "learning_rate": 1.0814726684817092e-06, "loss": 0.3851, "step": 10987 }, { "epoch": 0.7507002801120448, "grad_norm": 3.1910760402679443, "learning_rate": 1.0809128825855221e-06, "loss": 0.2395, "step": 10988 }, { "epoch": 0.750768600122976, "grad_norm": 5.328285217285156, "learning_rate": 1.0803532151456148e-06, "loss": 0.2265, "step": 10989 }, { "epoch": 0.7508369201339072, "grad_norm": 3.9874162673950195, "learning_rate": 1.0797936661893916e-06, "loss": 0.2707, "step": 10990 }, { "epoch": 0.7509052401448384, "grad_norm": 4.017014503479004, "learning_rate": 1.0792342357442533e-06, "loss": 0.2719, "step": 10991 }, { "epoch": 0.7509735601557697, "grad_norm": 4.852911949157715, "learning_rate": 1.0786749238375931e-06, "loss": 0.2654, "step": 10992 }, { "epoch": 0.7510418801667008, "grad_norm": 3.0496106147766113, "learning_rate": 1.0781157304967992e-06, "loss": 0.2272, "step": 10993 }, { "epoch": 0.7511102001776321, "grad_norm": 5.302966117858887, "learning_rate": 1.0775566557492551e-06, "loss": 0.3449, "step": 10994 }, { "epoch": 0.7511785201885632, "grad_norm": 4.272471904754639, "learning_rate": 1.0769976996223344e-06, "loss": 0.2977, "step": 10995 }, { "epoch": 0.7512468401994944, "grad_norm": 4.968545913696289, "learning_rate": 1.0764388621434094e-06, "loss": 0.2401, "step": 10996 }, { "epoch": 0.7513151602104257, "grad_norm": 5.048311233520508, "learning_rate": 1.0758801433398436e-06, "loss": 0.3215, "step": 10997 }, { "epoch": 0.7513834802213568, "grad_norm": 2.9846556186676025, "learning_rate": 1.0753215432389992e-06, "loss": 0.2225, "step": 10998 }, { "epoch": 0.7514518002322881, "grad_norm": 3.263580560684204, "learning_rate": 1.0747630618682261e-06, "loss": 0.2579, "step": 10999 }, { "epoch": 0.7515201202432192, "grad_norm": 3.416745901107788, "learning_rate": 1.074204699254873e-06, "loss": 0.2514, "step": 11000 }, { "epoch": 0.7515884402541504, "grad_norm": 3.1559181213378906, "learning_rate": 1.073646455426281e-06, "loss": 0.1952, "step": 11001 }, { "epoch": 0.7516567602650817, "grad_norm": 3.5582804679870605, "learning_rate": 1.0730883304097873e-06, "loss": 0.2114, "step": 11002 }, { "epoch": 0.7517250802760128, "grad_norm": 5.370107173919678, "learning_rate": 1.0725303242327185e-06, "loss": 0.3155, "step": 11003 }, { "epoch": 0.7517934002869441, "grad_norm": 4.341961860656738, "learning_rate": 1.0719724369224011e-06, "loss": 0.2305, "step": 11004 }, { "epoch": 0.7518617202978752, "grad_norm": 4.785800933837891, "learning_rate": 1.0714146685061535e-06, "loss": 0.3405, "step": 11005 }, { "epoch": 0.7519300403088065, "grad_norm": 4.073336124420166, "learning_rate": 1.0708570190112889e-06, "loss": 0.275, "step": 11006 }, { "epoch": 0.7519983603197377, "grad_norm": 3.632157325744629, "learning_rate": 1.0702994884651109e-06, "loss": 0.3181, "step": 11007 }, { "epoch": 0.7520666803306688, "grad_norm": 3.182180881500244, "learning_rate": 1.0697420768949226e-06, "loss": 0.2514, "step": 11008 }, { "epoch": 0.7521350003416001, "grad_norm": 2.8252265453338623, "learning_rate": 1.0691847843280183e-06, "loss": 0.166, "step": 11009 }, { "epoch": 0.7522033203525312, "grad_norm": 4.027969837188721, "learning_rate": 1.068627610791687e-06, "loss": 0.2052, "step": 11010 }, { "epoch": 0.7522716403634625, "grad_norm": 3.407468557357788, "learning_rate": 1.0680705563132124e-06, "loss": 0.2681, "step": 11011 }, { "epoch": 0.7523399603743937, "grad_norm": 6.470906734466553, "learning_rate": 1.0675136209198719e-06, "loss": 0.2935, "step": 11012 }, { "epoch": 0.7524082803853248, "grad_norm": 4.706686496734619, "learning_rate": 1.066956804638937e-06, "loss": 0.245, "step": 11013 }, { "epoch": 0.7524766003962561, "grad_norm": 4.0220184326171875, "learning_rate": 1.0664001074976744e-06, "loss": 0.2034, "step": 11014 }, { "epoch": 0.7525449204071872, "grad_norm": 4.569455623626709, "learning_rate": 1.0658435295233425e-06, "loss": 0.3743, "step": 11015 }, { "epoch": 0.7526132404181185, "grad_norm": 4.392451286315918, "learning_rate": 1.0652870707431963e-06, "loss": 0.3037, "step": 11016 }, { "epoch": 0.7526815604290497, "grad_norm": 4.291525363922119, "learning_rate": 1.0647307311844824e-06, "loss": 0.2384, "step": 11017 }, { "epoch": 0.7527498804399809, "grad_norm": 5.8808207511901855, "learning_rate": 1.064174510874448e-06, "loss": 0.2701, "step": 11018 }, { "epoch": 0.7528182004509121, "grad_norm": 2.9650206565856934, "learning_rate": 1.063618409840325e-06, "loss": 0.2452, "step": 11019 }, { "epoch": 0.7528865204618432, "grad_norm": 5.122222423553467, "learning_rate": 1.0630624281093457e-06, "loss": 0.2684, "step": 11020 }, { "epoch": 0.7529548404727745, "grad_norm": 3.350956916809082, "learning_rate": 1.0625065657087353e-06, "loss": 0.2208, "step": 11021 }, { "epoch": 0.7530231604837057, "grad_norm": 3.1891162395477295, "learning_rate": 1.061950822665714e-06, "loss": 0.2386, "step": 11022 }, { "epoch": 0.7530914804946369, "grad_norm": 4.230383396148682, "learning_rate": 1.0613951990074911e-06, "loss": 0.3265, "step": 11023 }, { "epoch": 0.7531598005055681, "grad_norm": 6.390013217926025, "learning_rate": 1.0608396947612786e-06, "loss": 0.4526, "step": 11024 }, { "epoch": 0.7532281205164992, "grad_norm": 4.868422031402588, "learning_rate": 1.0602843099542758e-06, "loss": 0.3659, "step": 11025 }, { "epoch": 0.7532964405274305, "grad_norm": 3.135251760482788, "learning_rate": 1.0597290446136801e-06, "loss": 0.1922, "step": 11026 }, { "epoch": 0.7533647605383617, "grad_norm": 3.764857292175293, "learning_rate": 1.059173898766679e-06, "loss": 0.2558, "step": 11027 }, { "epoch": 0.7534330805492929, "grad_norm": 3.8352370262145996, "learning_rate": 1.0586188724404577e-06, "loss": 0.2436, "step": 11028 }, { "epoch": 0.7535014005602241, "grad_norm": 4.925297737121582, "learning_rate": 1.058063965662194e-06, "loss": 0.341, "step": 11029 }, { "epoch": 0.7535697205711553, "grad_norm": 4.372638702392578, "learning_rate": 1.057509178459061e-06, "loss": 0.2738, "step": 11030 }, { "epoch": 0.7536380405820865, "grad_norm": 3.865746021270752, "learning_rate": 1.0569545108582243e-06, "loss": 0.2532, "step": 11031 }, { "epoch": 0.7537063605930177, "grad_norm": 3.8667244911193848, "learning_rate": 1.0563999628868452e-06, "loss": 0.2007, "step": 11032 }, { "epoch": 0.7537746806039489, "grad_norm": 3.815906286239624, "learning_rate": 1.055845534572078e-06, "loss": 0.286, "step": 11033 }, { "epoch": 0.7538430006148801, "grad_norm": 4.44061803817749, "learning_rate": 1.0552912259410726e-06, "loss": 0.2612, "step": 11034 }, { "epoch": 0.7539113206258113, "grad_norm": 4.942377090454102, "learning_rate": 1.0547370370209704e-06, "loss": 0.2638, "step": 11035 }, { "epoch": 0.7539796406367425, "grad_norm": 4.824163913726807, "learning_rate": 1.0541829678389082e-06, "loss": 0.3011, "step": 11036 }, { "epoch": 0.7540479606476737, "grad_norm": 6.645432472229004, "learning_rate": 1.053629018422019e-06, "loss": 0.192, "step": 11037 }, { "epoch": 0.7541162806586049, "grad_norm": 4.2627153396606445, "learning_rate": 1.053075188797429e-06, "loss": 0.2456, "step": 11038 }, { "epoch": 0.7541846006695361, "grad_norm": 5.578389644622803, "learning_rate": 1.0525214789922553e-06, "loss": 0.3054, "step": 11039 }, { "epoch": 0.7542529206804673, "grad_norm": 4.611590385437012, "learning_rate": 1.051967889033613e-06, "loss": 0.3305, "step": 11040 }, { "epoch": 0.7543212406913985, "grad_norm": 4.619884014129639, "learning_rate": 1.051414418948609e-06, "loss": 0.266, "step": 11041 }, { "epoch": 0.7543895607023298, "grad_norm": 2.4289472103118896, "learning_rate": 1.0508610687643462e-06, "loss": 0.1881, "step": 11042 }, { "epoch": 0.7544578807132609, "grad_norm": 2.558225154876709, "learning_rate": 1.0503078385079205e-06, "loss": 0.1205, "step": 11043 }, { "epoch": 0.7545262007241921, "grad_norm": 4.8009819984436035, "learning_rate": 1.049754728206422e-06, "loss": 0.3179, "step": 11044 }, { "epoch": 0.7545945207351233, "grad_norm": 3.570227861404419, "learning_rate": 1.0492017378869349e-06, "loss": 0.2197, "step": 11045 }, { "epoch": 0.7546628407460545, "grad_norm": 3.796266794204712, "learning_rate": 1.048648867576538e-06, "loss": 0.2821, "step": 11046 }, { "epoch": 0.7547311607569858, "grad_norm": 4.316915035247803, "learning_rate": 1.0480961173023043e-06, "loss": 0.3406, "step": 11047 }, { "epoch": 0.7547994807679169, "grad_norm": 3.5627055168151855, "learning_rate": 1.0475434870912994e-06, "loss": 0.2627, "step": 11048 }, { "epoch": 0.7548678007788481, "grad_norm": 3.7558412551879883, "learning_rate": 1.0469909769705841e-06, "loss": 0.2641, "step": 11049 }, { "epoch": 0.7549361207897793, "grad_norm": 4.038384914398193, "learning_rate": 1.0464385869672134e-06, "loss": 0.3056, "step": 11050 }, { "epoch": 0.7550044408007105, "grad_norm": 2.9405388832092285, "learning_rate": 1.0458863171082388e-06, "loss": 0.1525, "step": 11051 }, { "epoch": 0.7550727608116418, "grad_norm": 5.626940727233887, "learning_rate": 1.0453341674207002e-06, "loss": 0.3162, "step": 11052 }, { "epoch": 0.7551410808225729, "grad_norm": 5.253147125244141, "learning_rate": 1.0447821379316368e-06, "loss": 0.2817, "step": 11053 }, { "epoch": 0.7552094008335042, "grad_norm": 3.8871026039123535, "learning_rate": 1.044230228668079e-06, "loss": 0.4049, "step": 11054 }, { "epoch": 0.7552777208444353, "grad_norm": 4.8843584060668945, "learning_rate": 1.0436784396570544e-06, "loss": 0.2856, "step": 11055 }, { "epoch": 0.7553460408553665, "grad_norm": 4.2258782386779785, "learning_rate": 1.0431267709255789e-06, "loss": 0.2358, "step": 11056 }, { "epoch": 0.7554143608662978, "grad_norm": 5.469635009765625, "learning_rate": 1.0425752225006692e-06, "loss": 0.3212, "step": 11057 }, { "epoch": 0.7554826808772289, "grad_norm": 4.319515228271484, "learning_rate": 1.0420237944093323e-06, "loss": 0.3235, "step": 11058 }, { "epoch": 0.7555510008881602, "grad_norm": 4.293184280395508, "learning_rate": 1.0414724866785717e-06, "loss": 0.3746, "step": 11059 }, { "epoch": 0.7556193208990913, "grad_norm": 4.158994197845459, "learning_rate": 1.0409212993353807e-06, "loss": 0.2531, "step": 11060 }, { "epoch": 0.7556876409100225, "grad_norm": 3.6704845428466797, "learning_rate": 1.040370232406751e-06, "loss": 0.2804, "step": 11061 }, { "epoch": 0.7557559609209538, "grad_norm": 6.211381435394287, "learning_rate": 1.0398192859196664e-06, "loss": 0.2653, "step": 11062 }, { "epoch": 0.7558242809318849, "grad_norm": 3.3324880599975586, "learning_rate": 1.0392684599011056e-06, "loss": 0.2486, "step": 11063 }, { "epoch": 0.7558926009428162, "grad_norm": 4.326152324676514, "learning_rate": 1.0387177543780417e-06, "loss": 0.2753, "step": 11064 }, { "epoch": 0.7559609209537473, "grad_norm": 6.188669204711914, "learning_rate": 1.0381671693774399e-06, "loss": 0.2332, "step": 11065 }, { "epoch": 0.7560292409646786, "grad_norm": 4.327916145324707, "learning_rate": 1.0376167049262621e-06, "loss": 0.419, "step": 11066 }, { "epoch": 0.7560975609756098, "grad_norm": 3.456829786300659, "learning_rate": 1.0370663610514639e-06, "loss": 0.2075, "step": 11067 }, { "epoch": 0.7561658809865409, "grad_norm": 4.066357612609863, "learning_rate": 1.0365161377799915e-06, "loss": 0.2147, "step": 11068 }, { "epoch": 0.7562342009974722, "grad_norm": 4.736393928527832, "learning_rate": 1.0359660351387896e-06, "loss": 0.3509, "step": 11069 }, { "epoch": 0.7563025210084033, "grad_norm": 4.266510486602783, "learning_rate": 1.0354160531547935e-06, "loss": 0.1855, "step": 11070 }, { "epoch": 0.7563708410193346, "grad_norm": 5.700790882110596, "learning_rate": 1.034866191854938e-06, "loss": 0.3198, "step": 11071 }, { "epoch": 0.7564391610302658, "grad_norm": 4.2428879737854, "learning_rate": 1.0343164512661454e-06, "loss": 0.3459, "step": 11072 }, { "epoch": 0.7565074810411969, "grad_norm": 3.938173532485962, "learning_rate": 1.0337668314153354e-06, "loss": 0.213, "step": 11073 }, { "epoch": 0.7565758010521282, "grad_norm": 4.994911193847656, "learning_rate": 1.0332173323294224e-06, "loss": 0.2923, "step": 11074 }, { "epoch": 0.7566441210630593, "grad_norm": 3.4421980381011963, "learning_rate": 1.0326679540353131e-06, "loss": 0.2792, "step": 11075 }, { "epoch": 0.7567124410739906, "grad_norm": 4.72484827041626, "learning_rate": 1.032118696559909e-06, "loss": 0.4305, "step": 11076 }, { "epoch": 0.7567807610849218, "grad_norm": 3.8916423320770264, "learning_rate": 1.0315695599301068e-06, "loss": 0.292, "step": 11077 }, { "epoch": 0.756849081095853, "grad_norm": 5.351559638977051, "learning_rate": 1.0310205441727949e-06, "loss": 0.2057, "step": 11078 }, { "epoch": 0.7569174011067842, "grad_norm": 3.207096815109253, "learning_rate": 1.0304716493148596e-06, "loss": 0.1692, "step": 11079 }, { "epoch": 0.7569857211177153, "grad_norm": 4.44789981842041, "learning_rate": 1.0299228753831756e-06, "loss": 0.2963, "step": 11080 }, { "epoch": 0.7570540411286466, "grad_norm": 2.792220115661621, "learning_rate": 1.029374222404616e-06, "loss": 0.2148, "step": 11081 }, { "epoch": 0.7571223611395778, "grad_norm": 4.430724143981934, "learning_rate": 1.028825690406048e-06, "loss": 0.2796, "step": 11082 }, { "epoch": 0.757190681150509, "grad_norm": 4.616994380950928, "learning_rate": 1.0282772794143306e-06, "loss": 0.3422, "step": 11083 }, { "epoch": 0.7572590011614402, "grad_norm": 4.361711025238037, "learning_rate": 1.0277289894563182e-06, "loss": 0.2686, "step": 11084 }, { "epoch": 0.7573273211723713, "grad_norm": 4.2773590087890625, "learning_rate": 1.0271808205588598e-06, "loss": 0.2733, "step": 11085 }, { "epoch": 0.7573956411833026, "grad_norm": 3.9513638019561768, "learning_rate": 1.0266327727487972e-06, "loss": 0.3479, "step": 11086 }, { "epoch": 0.7574639611942338, "grad_norm": 4.543659687042236, "learning_rate": 1.0260848460529678e-06, "loss": 0.2962, "step": 11087 }, { "epoch": 0.757532281205165, "grad_norm": 5.3063530921936035, "learning_rate": 1.0255370404982007e-06, "loss": 0.3566, "step": 11088 }, { "epoch": 0.7576006012160962, "grad_norm": 6.411733627319336, "learning_rate": 1.0249893561113195e-06, "loss": 0.3266, "step": 11089 }, { "epoch": 0.7576689212270274, "grad_norm": 4.68740701675415, "learning_rate": 1.0244417929191456e-06, "loss": 0.2802, "step": 11090 }, { "epoch": 0.7577372412379586, "grad_norm": 4.081657886505127, "learning_rate": 1.023894350948492e-06, "loss": 0.3611, "step": 11091 }, { "epoch": 0.7578055612488898, "grad_norm": 5.481057643890381, "learning_rate": 1.0233470302261624e-06, "loss": 0.4061, "step": 11092 }, { "epoch": 0.757873881259821, "grad_norm": 4.620609283447266, "learning_rate": 1.0227998307789594e-06, "loss": 0.3444, "step": 11093 }, { "epoch": 0.7579422012707522, "grad_norm": 4.290679931640625, "learning_rate": 1.022252752633678e-06, "loss": 0.2167, "step": 11094 }, { "epoch": 0.7580105212816834, "grad_norm": 4.089045524597168, "learning_rate": 1.0217057958171067e-06, "loss": 0.3505, "step": 11095 }, { "epoch": 0.7580788412926146, "grad_norm": 3.570513963699341, "learning_rate": 1.021158960356029e-06, "loss": 0.2455, "step": 11096 }, { "epoch": 0.7581471613035458, "grad_norm": 4.012719631195068, "learning_rate": 1.0206122462772218e-06, "loss": 0.2873, "step": 11097 }, { "epoch": 0.758215481314477, "grad_norm": 5.464109420776367, "learning_rate": 1.0200656536074558e-06, "loss": 0.2992, "step": 11098 }, { "epoch": 0.7582838013254082, "grad_norm": 5.4112229347229, "learning_rate": 1.019519182373498e-06, "loss": 0.3792, "step": 11099 }, { "epoch": 0.7583521213363394, "grad_norm": 2.421719551086426, "learning_rate": 1.0189728326021051e-06, "loss": 0.2232, "step": 11100 }, { "epoch": 0.7584204413472706, "grad_norm": 3.648564338684082, "learning_rate": 1.0184266043200313e-06, "loss": 0.3279, "step": 11101 }, { "epoch": 0.7584887613582019, "grad_norm": 3.8251612186431885, "learning_rate": 1.017880497554024e-06, "loss": 0.299, "step": 11102 }, { "epoch": 0.758557081369133, "grad_norm": 3.575010061264038, "learning_rate": 1.0173345123308253e-06, "loss": 0.2663, "step": 11103 }, { "epoch": 0.7586254013800642, "grad_norm": 4.403907299041748, "learning_rate": 1.0167886486771697e-06, "loss": 0.2305, "step": 11104 }, { "epoch": 0.7586937213909954, "grad_norm": 4.342677593231201, "learning_rate": 1.0162429066197872e-06, "loss": 0.3346, "step": 11105 }, { "epoch": 0.7587620414019266, "grad_norm": 4.542914390563965, "learning_rate": 1.0156972861854013e-06, "loss": 0.2294, "step": 11106 }, { "epoch": 0.7588303614128579, "grad_norm": 5.4855217933654785, "learning_rate": 1.0151517874007298e-06, "loss": 0.2775, "step": 11107 }, { "epoch": 0.758898681423789, "grad_norm": 4.067325592041016, "learning_rate": 1.0146064102924849e-06, "loss": 0.3368, "step": 11108 }, { "epoch": 0.7589670014347202, "grad_norm": 4.515944480895996, "learning_rate": 1.0140611548873694e-06, "loss": 0.2734, "step": 11109 }, { "epoch": 0.7590353214456514, "grad_norm": 3.150054931640625, "learning_rate": 1.013516021212086e-06, "loss": 0.2549, "step": 11110 }, { "epoch": 0.7591036414565826, "grad_norm": 4.298287391662598, "learning_rate": 1.0129710092933271e-06, "loss": 0.258, "step": 11111 }, { "epoch": 0.7591719614675139, "grad_norm": 4.20013427734375, "learning_rate": 1.0124261191577826e-06, "loss": 0.2412, "step": 11112 }, { "epoch": 0.759240281478445, "grad_norm": 4.411806583404541, "learning_rate": 1.0118813508321312e-06, "loss": 0.2653, "step": 11113 }, { "epoch": 0.7593086014893763, "grad_norm": 5.345635890960693, "learning_rate": 1.0113367043430506e-06, "loss": 0.3217, "step": 11114 }, { "epoch": 0.7593769215003074, "grad_norm": 4.150559425354004, "learning_rate": 1.0107921797172097e-06, "loss": 0.2779, "step": 11115 }, { "epoch": 0.7594452415112386, "grad_norm": 6.861724376678467, "learning_rate": 1.0102477769812732e-06, "loss": 0.3196, "step": 11116 }, { "epoch": 0.7595135615221699, "grad_norm": 3.715894937515259, "learning_rate": 1.009703496161899e-06, "loss": 0.2822, "step": 11117 }, { "epoch": 0.759581881533101, "grad_norm": 3.287368059158325, "learning_rate": 1.0091593372857387e-06, "loss": 0.1633, "step": 11118 }, { "epoch": 0.7596502015440323, "grad_norm": 3.548063278198242, "learning_rate": 1.0086153003794386e-06, "loss": 0.2615, "step": 11119 }, { "epoch": 0.7597185215549634, "grad_norm": 3.582643508911133, "learning_rate": 1.0080713854696397e-06, "loss": 0.2721, "step": 11120 }, { "epoch": 0.7597868415658946, "grad_norm": 4.737072944641113, "learning_rate": 1.0075275925829745e-06, "loss": 0.2752, "step": 11121 }, { "epoch": 0.7598551615768259, "grad_norm": 3.26420259475708, "learning_rate": 1.0069839217460711e-06, "loss": 0.2349, "step": 11122 }, { "epoch": 0.759923481587757, "grad_norm": 3.88118052482605, "learning_rate": 1.0064403729855513e-06, "loss": 0.3305, "step": 11123 }, { "epoch": 0.7599918015986883, "grad_norm": 4.970526695251465, "learning_rate": 1.0058969463280342e-06, "loss": 0.2949, "step": 11124 }, { "epoch": 0.7600601216096194, "grad_norm": 3.3273346424102783, "learning_rate": 1.0053536418001266e-06, "loss": 0.2674, "step": 11125 }, { "epoch": 0.7601284416205507, "grad_norm": 3.8261239528656006, "learning_rate": 1.0048104594284346e-06, "loss": 0.1977, "step": 11126 }, { "epoch": 0.7601967616314819, "grad_norm": 4.272781848907471, "learning_rate": 1.0042673992395552e-06, "loss": 0.3009, "step": 11127 }, { "epoch": 0.760265081642413, "grad_norm": 3.698442220687866, "learning_rate": 1.0037244612600816e-06, "loss": 0.3652, "step": 11128 }, { "epoch": 0.7603334016533443, "grad_norm": 4.434323787689209, "learning_rate": 1.0031816455165993e-06, "loss": 0.4506, "step": 11129 }, { "epoch": 0.7604017216642754, "grad_norm": 2.925929307937622, "learning_rate": 1.002638952035689e-06, "loss": 0.1757, "step": 11130 }, { "epoch": 0.7604700416752067, "grad_norm": 3.596129894256592, "learning_rate": 1.0020963808439247e-06, "loss": 0.1977, "step": 11131 }, { "epoch": 0.7605383616861379, "grad_norm": 3.2163050174713135, "learning_rate": 1.0015539319678759e-06, "loss": 0.1983, "step": 11132 }, { "epoch": 0.760606681697069, "grad_norm": 3.935429334640503, "learning_rate": 1.0010116054341028e-06, "loss": 0.2932, "step": 11133 }, { "epoch": 0.7606750017080003, "grad_norm": 2.4311225414276123, "learning_rate": 1.0004694012691628e-06, "loss": 0.2484, "step": 11134 }, { "epoch": 0.7607433217189314, "grad_norm": 3.5844016075134277, "learning_rate": 9.999273194996057e-07, "loss": 0.3257, "step": 11135 }, { "epoch": 0.7608116417298627, "grad_norm": 3.5789854526519775, "learning_rate": 9.993853601519765e-07, "loss": 0.2476, "step": 11136 }, { "epoch": 0.7608799617407939, "grad_norm": 4.030813217163086, "learning_rate": 9.988435232528132e-07, "loss": 0.2311, "step": 11137 }, { "epoch": 0.7609482817517251, "grad_norm": 3.4661240577697754, "learning_rate": 9.98301808828648e-07, "loss": 0.2365, "step": 11138 }, { "epoch": 0.7610166017626563, "grad_norm": 3.7556698322296143, "learning_rate": 9.977602169060069e-07, "loss": 0.3136, "step": 11139 }, { "epoch": 0.7610849217735874, "grad_norm": 3.809609889984131, "learning_rate": 9.972187475114121e-07, "loss": 0.1565, "step": 11140 }, { "epoch": 0.7611532417845187, "grad_norm": 4.4133524894714355, "learning_rate": 9.966774006713756e-07, "loss": 0.3529, "step": 11141 }, { "epoch": 0.7612215617954499, "grad_norm": 3.66919207572937, "learning_rate": 9.961361764124051e-07, "loss": 0.1463, "step": 11142 }, { "epoch": 0.7612898818063811, "grad_norm": 2.875694751739502, "learning_rate": 9.95595074761006e-07, "loss": 0.2187, "step": 11143 }, { "epoch": 0.7613582018173123, "grad_norm": 5.436810493469238, "learning_rate": 9.950540957436734e-07, "loss": 0.3567, "step": 11144 }, { "epoch": 0.7614265218282434, "grad_norm": 3.426673650741577, "learning_rate": 9.945132393868965e-07, "loss": 0.2713, "step": 11145 }, { "epoch": 0.7614948418391747, "grad_norm": 4.010721206665039, "learning_rate": 9.939725057171604e-07, "loss": 0.3127, "step": 11146 }, { "epoch": 0.7615631618501059, "grad_norm": 4.229510307312012, "learning_rate": 9.93431894760943e-07, "loss": 0.3319, "step": 11147 }, { "epoch": 0.7616314818610371, "grad_norm": 4.229679107666016, "learning_rate": 9.928914065447172e-07, "loss": 0.3807, "step": 11148 }, { "epoch": 0.7616998018719683, "grad_norm": 3.267190456390381, "learning_rate": 9.923510410949488e-07, "loss": 0.2017, "step": 11149 }, { "epoch": 0.7617681218828996, "grad_norm": 4.0538554191589355, "learning_rate": 9.918107984380983e-07, "loss": 0.2911, "step": 11150 }, { "epoch": 0.7618364418938307, "grad_norm": 4.305261135101318, "learning_rate": 9.912706786006201e-07, "loss": 0.2477, "step": 11151 }, { "epoch": 0.7619047619047619, "grad_norm": 4.683229923248291, "learning_rate": 9.90730681608963e-07, "loss": 0.3223, "step": 11152 }, { "epoch": 0.7619730819156931, "grad_norm": 5.417736530303955, "learning_rate": 9.901908074895673e-07, "loss": 0.3095, "step": 11153 }, { "epoch": 0.7620414019266243, "grad_norm": 2.8991191387176514, "learning_rate": 9.896510562688704e-07, "loss": 0.2656, "step": 11154 }, { "epoch": 0.7621097219375556, "grad_norm": 4.551339626312256, "learning_rate": 9.891114279733027e-07, "loss": 0.2841, "step": 11155 }, { "epoch": 0.7621780419484867, "grad_norm": 3.5920095443725586, "learning_rate": 9.885719226292878e-07, "loss": 0.2649, "step": 11156 }, { "epoch": 0.7622463619594179, "grad_norm": 4.1280837059021, "learning_rate": 9.880325402632443e-07, "loss": 0.2728, "step": 11157 }, { "epoch": 0.7623146819703491, "grad_norm": 4.3830485343933105, "learning_rate": 9.874932809015842e-07, "loss": 0.383, "step": 11158 }, { "epoch": 0.7623830019812803, "grad_norm": 3.149775743484497, "learning_rate": 9.869541445707134e-07, "loss": 0.2119, "step": 11159 }, { "epoch": 0.7624513219922116, "grad_norm": 4.57755184173584, "learning_rate": 9.864151312970334e-07, "loss": 0.3418, "step": 11160 }, { "epoch": 0.7625196420031427, "grad_norm": 3.641079902648926, "learning_rate": 9.858762411069359e-07, "loss": 0.2571, "step": 11161 }, { "epoch": 0.762587962014074, "grad_norm": 5.95427131652832, "learning_rate": 9.85337474026809e-07, "loss": 0.3367, "step": 11162 }, { "epoch": 0.7626562820250051, "grad_norm": 4.693990230560303, "learning_rate": 9.847988300830364e-07, "loss": 0.276, "step": 11163 }, { "epoch": 0.7627246020359363, "grad_norm": 4.16876745223999, "learning_rate": 9.842603093019947e-07, "loss": 0.2109, "step": 11164 }, { "epoch": 0.7627929220468675, "grad_norm": 4.4041337966918945, "learning_rate": 9.837219117100514e-07, "loss": 0.2738, "step": 11165 }, { "epoch": 0.7628612420577987, "grad_norm": 4.870697498321533, "learning_rate": 9.831836373335715e-07, "loss": 0.3903, "step": 11166 }, { "epoch": 0.76292956206873, "grad_norm": 5.068111896514893, "learning_rate": 9.826454861989125e-07, "loss": 0.2669, "step": 11167 }, { "epoch": 0.7629978820796611, "grad_norm": 6.245829105377197, "learning_rate": 9.821074583324272e-07, "loss": 0.3124, "step": 11168 }, { "epoch": 0.7630662020905923, "grad_norm": 4.161472320556641, "learning_rate": 9.815695537604607e-07, "loss": 0.2374, "step": 11169 }, { "epoch": 0.7631345221015235, "grad_norm": 3.7988882064819336, "learning_rate": 9.810317725093525e-07, "loss": 0.2589, "step": 11170 }, { "epoch": 0.7632028421124547, "grad_norm": 3.85099720954895, "learning_rate": 9.804941146054371e-07, "loss": 0.2415, "step": 11171 }, { "epoch": 0.763271162123386, "grad_norm": 3.392037868499756, "learning_rate": 9.79956580075043e-07, "loss": 0.2568, "step": 11172 }, { "epoch": 0.7633394821343171, "grad_norm": 3.7017245292663574, "learning_rate": 9.794191689444891e-07, "loss": 0.2553, "step": 11173 }, { "epoch": 0.7634078021452484, "grad_norm": 3.9323177337646484, "learning_rate": 9.788818812400928e-07, "loss": 0.3311, "step": 11174 }, { "epoch": 0.7634761221561795, "grad_norm": 4.751031875610352, "learning_rate": 9.78344716988162e-07, "loss": 0.3464, "step": 11175 }, { "epoch": 0.7635444421671107, "grad_norm": 5.804032802581787, "learning_rate": 9.778076762150029e-07, "loss": 0.3221, "step": 11176 }, { "epoch": 0.763612762178042, "grad_norm": 3.4983954429626465, "learning_rate": 9.772707589469123e-07, "loss": 0.2214, "step": 11177 }, { "epoch": 0.7636810821889731, "grad_norm": 4.120029449462891, "learning_rate": 9.767339652101802e-07, "loss": 0.3052, "step": 11178 }, { "epoch": 0.7637494021999044, "grad_norm": 2.652920961380005, "learning_rate": 9.761972950310928e-07, "loss": 0.233, "step": 11179 }, { "epoch": 0.7638177222108355, "grad_norm": 4.2924418449401855, "learning_rate": 9.75660748435929e-07, "loss": 0.2418, "step": 11180 }, { "epoch": 0.7638860422217667, "grad_norm": 4.8175811767578125, "learning_rate": 9.751243254509627e-07, "loss": 0.2737, "step": 11181 }, { "epoch": 0.763954362232698, "grad_norm": 3.615351676940918, "learning_rate": 9.745880261024606e-07, "loss": 0.2397, "step": 11182 }, { "epoch": 0.7640226822436291, "grad_norm": 5.642849445343018, "learning_rate": 9.740518504166845e-07, "loss": 0.3375, "step": 11183 }, { "epoch": 0.7640910022545604, "grad_norm": 2.9472744464874268, "learning_rate": 9.735157984198886e-07, "loss": 0.2755, "step": 11184 }, { "epoch": 0.7641593222654915, "grad_norm": 3.9079015254974365, "learning_rate": 9.729798701383235e-07, "loss": 0.2127, "step": 11185 }, { "epoch": 0.7642276422764228, "grad_norm": 4.219180583953857, "learning_rate": 9.724440655982302e-07, "loss": 0.247, "step": 11186 }, { "epoch": 0.764295962287354, "grad_norm": 3.3291053771972656, "learning_rate": 9.719083848258464e-07, "loss": 0.2478, "step": 11187 }, { "epoch": 0.7643642822982851, "grad_norm": 3.3205606937408447, "learning_rate": 9.713728278474032e-07, "loss": 0.2804, "step": 11188 }, { "epoch": 0.7644326023092164, "grad_norm": 2.3449244499206543, "learning_rate": 9.708373946891253e-07, "loss": 0.1663, "step": 11189 }, { "epoch": 0.7645009223201475, "grad_norm": 4.840405464172363, "learning_rate": 9.703020853772313e-07, "loss": 0.2039, "step": 11190 }, { "epoch": 0.7645692423310788, "grad_norm": 3.05912184715271, "learning_rate": 9.697668999379339e-07, "loss": 0.1882, "step": 11191 }, { "epoch": 0.76463756234201, "grad_norm": 4.0805253982543945, "learning_rate": 9.692318383974402e-07, "loss": 0.2254, "step": 11192 }, { "epoch": 0.7647058823529411, "grad_norm": 4.72450065612793, "learning_rate": 9.686969007819509e-07, "loss": 0.3719, "step": 11193 }, { "epoch": 0.7647742023638724, "grad_norm": 3.1292507648468018, "learning_rate": 9.681620871176591e-07, "loss": 0.3085, "step": 11194 }, { "epoch": 0.7648425223748035, "grad_norm": 3.864163398742676, "learning_rate": 9.676273974307533e-07, "loss": 0.3048, "step": 11195 }, { "epoch": 0.7649108423857348, "grad_norm": 3.687225341796875, "learning_rate": 9.670928317474172e-07, "loss": 0.2885, "step": 11196 }, { "epoch": 0.764979162396666, "grad_norm": 4.889645576477051, "learning_rate": 9.66558390093828e-07, "loss": 0.3386, "step": 11197 }, { "epoch": 0.7650474824075972, "grad_norm": 4.671293258666992, "learning_rate": 9.660240724961525e-07, "loss": 0.2587, "step": 11198 }, { "epoch": 0.7651158024185284, "grad_norm": 4.235963344573975, "learning_rate": 9.654898789805573e-07, "loss": 0.3321, "step": 11199 }, { "epoch": 0.7651841224294595, "grad_norm": 2.433108329772949, "learning_rate": 9.649558095731997e-07, "loss": 0.1443, "step": 11200 }, { "epoch": 0.7652524424403908, "grad_norm": 3.5607686042785645, "learning_rate": 9.644218643002316e-07, "loss": 0.2351, "step": 11201 }, { "epoch": 0.765320762451322, "grad_norm": 3.698044538497925, "learning_rate": 9.638880431877992e-07, "loss": 0.2697, "step": 11202 }, { "epoch": 0.7653890824622532, "grad_norm": 3.33135724067688, "learning_rate": 9.633543462620419e-07, "loss": 0.2128, "step": 11203 }, { "epoch": 0.7654574024731844, "grad_norm": 6.241321563720703, "learning_rate": 9.62820773549094e-07, "loss": 0.3514, "step": 11204 }, { "epoch": 0.7655257224841155, "grad_norm": 5.510430812835693, "learning_rate": 9.622873250750837e-07, "loss": 0.2602, "step": 11205 }, { "epoch": 0.7655940424950468, "grad_norm": 3.001880645751953, "learning_rate": 9.617540008661306e-07, "loss": 0.179, "step": 11206 }, { "epoch": 0.765662362505978, "grad_norm": 4.477391719818115, "learning_rate": 9.61220800948351e-07, "loss": 0.2957, "step": 11207 }, { "epoch": 0.7657306825169092, "grad_norm": 3.732452392578125, "learning_rate": 9.606877253478548e-07, "loss": 0.2659, "step": 11208 }, { "epoch": 0.7657990025278404, "grad_norm": 3.8974361419677734, "learning_rate": 9.60154774090745e-07, "loss": 0.3084, "step": 11209 }, { "epoch": 0.7658673225387717, "grad_norm": 3.8648147583007812, "learning_rate": 9.596219472031187e-07, "loss": 0.2733, "step": 11210 }, { "epoch": 0.7659356425497028, "grad_norm": 3.138779878616333, "learning_rate": 9.590892447110672e-07, "loss": 0.2132, "step": 11211 }, { "epoch": 0.766003962560634, "grad_norm": 3.7225961685180664, "learning_rate": 9.585566666406753e-07, "loss": 0.2545, "step": 11212 }, { "epoch": 0.7660722825715652, "grad_norm": 3.859931707382202, "learning_rate": 9.580242130180236e-07, "loss": 0.2991, "step": 11213 }, { "epoch": 0.7661406025824964, "grad_norm": 5.143956184387207, "learning_rate": 9.57491883869181e-07, "loss": 0.3072, "step": 11214 }, { "epoch": 0.7662089225934277, "grad_norm": 5.3121018409729, "learning_rate": 9.569596792202178e-07, "loss": 0.354, "step": 11215 }, { "epoch": 0.7662772426043588, "grad_norm": 3.6521525382995605, "learning_rate": 9.564275990971936e-07, "loss": 0.276, "step": 11216 }, { "epoch": 0.76634556261529, "grad_norm": 4.054306507110596, "learning_rate": 9.558956435261637e-07, "loss": 0.3253, "step": 11217 }, { "epoch": 0.7664138826262212, "grad_norm": 6.991160869598389, "learning_rate": 9.553638125331753e-07, "loss": 0.242, "step": 11218 }, { "epoch": 0.7664822026371524, "grad_norm": 5.22837495803833, "learning_rate": 9.548321061442708e-07, "loss": 0.3462, "step": 11219 }, { "epoch": 0.7665505226480837, "grad_norm": 5.459592819213867, "learning_rate": 9.54300524385487e-07, "loss": 0.2105, "step": 11220 }, { "epoch": 0.7666188426590148, "grad_norm": 3.540794849395752, "learning_rate": 9.537690672828535e-07, "loss": 0.2068, "step": 11221 }, { "epoch": 0.7666871626699461, "grad_norm": 3.3772976398468018, "learning_rate": 9.532377348623952e-07, "loss": 0.2114, "step": 11222 }, { "epoch": 0.7667554826808772, "grad_norm": 3.4790701866149902, "learning_rate": 9.527065271501294e-07, "loss": 0.2033, "step": 11223 }, { "epoch": 0.7668238026918084, "grad_norm": 3.43430495262146, "learning_rate": 9.52175444172068e-07, "loss": 0.2483, "step": 11224 }, { "epoch": 0.7668921227027397, "grad_norm": 4.052552223205566, "learning_rate": 9.51644485954218e-07, "loss": 0.3977, "step": 11225 }, { "epoch": 0.7669604427136708, "grad_norm": 4.061938285827637, "learning_rate": 9.511136525225768e-07, "loss": 0.3469, "step": 11226 }, { "epoch": 0.7670287627246021, "grad_norm": 3.634559154510498, "learning_rate": 9.50582943903139e-07, "loss": 0.2001, "step": 11227 }, { "epoch": 0.7670970827355332, "grad_norm": 4.4464898109436035, "learning_rate": 9.500523601218909e-07, "loss": 0.3039, "step": 11228 }, { "epoch": 0.7671654027464644, "grad_norm": 3.0049262046813965, "learning_rate": 9.495219012048169e-07, "loss": 0.1936, "step": 11229 }, { "epoch": 0.7672337227573957, "grad_norm": 5.427870750427246, "learning_rate": 9.489915671778894e-07, "loss": 0.2721, "step": 11230 }, { "epoch": 0.7673020427683268, "grad_norm": 3.244281053543091, "learning_rate": 9.484613580670777e-07, "loss": 0.2867, "step": 11231 }, { "epoch": 0.7673703627792581, "grad_norm": 4.330465316772461, "learning_rate": 9.479312738983455e-07, "loss": 0.2997, "step": 11232 }, { "epoch": 0.7674386827901892, "grad_norm": 5.063919544219971, "learning_rate": 9.4740131469765e-07, "loss": 0.4407, "step": 11233 }, { "epoch": 0.7675070028011205, "grad_norm": 2.951124429702759, "learning_rate": 9.468714804909396e-07, "loss": 0.2593, "step": 11234 }, { "epoch": 0.7675753228120517, "grad_norm": 3.8114967346191406, "learning_rate": 9.463417713041611e-07, "loss": 0.2494, "step": 11235 }, { "epoch": 0.7676436428229828, "grad_norm": 3.9197516441345215, "learning_rate": 9.458121871632528e-07, "loss": 0.2981, "step": 11236 }, { "epoch": 0.7677119628339141, "grad_norm": 5.08396577835083, "learning_rate": 9.452827280941471e-07, "loss": 0.2656, "step": 11237 }, { "epoch": 0.7677802828448452, "grad_norm": 4.620965480804443, "learning_rate": 9.447533941227686e-07, "loss": 0.2856, "step": 11238 }, { "epoch": 0.7678486028557765, "grad_norm": 3.9162051677703857, "learning_rate": 9.442241852750389e-07, "loss": 0.2486, "step": 11239 }, { "epoch": 0.7679169228667077, "grad_norm": 3.7247307300567627, "learning_rate": 9.43695101576871e-07, "loss": 0.1597, "step": 11240 }, { "epoch": 0.7679852428776388, "grad_norm": 4.442137718200684, "learning_rate": 9.431661430541735e-07, "loss": 0.3095, "step": 11241 }, { "epoch": 0.7680535628885701, "grad_norm": 5.135228633880615, "learning_rate": 9.426373097328481e-07, "loss": 0.2855, "step": 11242 }, { "epoch": 0.7681218828995012, "grad_norm": 4.161400318145752, "learning_rate": 9.421086016387898e-07, "loss": 0.3299, "step": 11243 }, { "epoch": 0.7681902029104325, "grad_norm": 4.608334541320801, "learning_rate": 9.415800187978882e-07, "loss": 0.293, "step": 11244 }, { "epoch": 0.7682585229213637, "grad_norm": 4.741596221923828, "learning_rate": 9.41051561236027e-07, "loss": 0.2177, "step": 11245 }, { "epoch": 0.7683268429322949, "grad_norm": 4.42338228225708, "learning_rate": 9.405232289790839e-07, "loss": 0.3001, "step": 11246 }, { "epoch": 0.7683951629432261, "grad_norm": 5.1057233810424805, "learning_rate": 9.399950220529281e-07, "loss": 0.2317, "step": 11247 }, { "epoch": 0.7684634829541572, "grad_norm": 4.532284259796143, "learning_rate": 9.394669404834246e-07, "loss": 0.2855, "step": 11248 }, { "epoch": 0.7685318029650885, "grad_norm": 3.4919686317443848, "learning_rate": 9.389389842964339e-07, "loss": 0.2627, "step": 11249 }, { "epoch": 0.7686001229760197, "grad_norm": 2.984165668487549, "learning_rate": 9.384111535178089e-07, "loss": 0.2269, "step": 11250 }, { "epoch": 0.7686684429869509, "grad_norm": 4.739665985107422, "learning_rate": 9.37883448173394e-07, "loss": 0.3342, "step": 11251 }, { "epoch": 0.7687367629978821, "grad_norm": 3.997239351272583, "learning_rate": 9.373558682890304e-07, "loss": 0.2527, "step": 11252 }, { "epoch": 0.7688050830088132, "grad_norm": 3.590301990509033, "learning_rate": 9.368284138905524e-07, "loss": 0.2818, "step": 11253 }, { "epoch": 0.7688734030197445, "grad_norm": 4.914782524108887, "learning_rate": 9.363010850037878e-07, "loss": 0.3935, "step": 11254 }, { "epoch": 0.7689417230306757, "grad_norm": 6.1133246421813965, "learning_rate": 9.35773881654559e-07, "loss": 0.3122, "step": 11255 }, { "epoch": 0.7690100430416069, "grad_norm": 2.475883722305298, "learning_rate": 9.352468038686814e-07, "loss": 0.141, "step": 11256 }, { "epoch": 0.7690783630525381, "grad_norm": 3.3820626735687256, "learning_rate": 9.347198516719645e-07, "loss": 0.2301, "step": 11257 }, { "epoch": 0.7691466830634693, "grad_norm": 3.9695703983306885, "learning_rate": 9.341930250902132e-07, "loss": 0.2418, "step": 11258 }, { "epoch": 0.7692150030744005, "grad_norm": 4.235678195953369, "learning_rate": 9.336663241492223e-07, "loss": 0.2346, "step": 11259 }, { "epoch": 0.7692833230853316, "grad_norm": 3.9517312049865723, "learning_rate": 9.331397488747843e-07, "loss": 0.2379, "step": 11260 }, { "epoch": 0.7693516430962629, "grad_norm": 3.1152541637420654, "learning_rate": 9.326132992926837e-07, "loss": 0.2134, "step": 11261 }, { "epoch": 0.7694199631071941, "grad_norm": 3.5550336837768555, "learning_rate": 9.320869754287e-07, "loss": 0.2177, "step": 11262 }, { "epoch": 0.7694882831181253, "grad_norm": 4.208772659301758, "learning_rate": 9.315607773086056e-07, "loss": 0.2565, "step": 11263 }, { "epoch": 0.7695566031290565, "grad_norm": 3.7386837005615234, "learning_rate": 9.310347049581667e-07, "loss": 0.247, "step": 11264 }, { "epoch": 0.7696249231399876, "grad_norm": 3.5635058879852295, "learning_rate": 9.305087584031445e-07, "loss": 0.2512, "step": 11265 }, { "epoch": 0.7696932431509189, "grad_norm": 4.247020244598389, "learning_rate": 9.299829376692932e-07, "loss": 0.2974, "step": 11266 }, { "epoch": 0.7697615631618501, "grad_norm": 3.1362361907958984, "learning_rate": 9.294572427823583e-07, "loss": 0.2038, "step": 11267 }, { "epoch": 0.7698298831727813, "grad_norm": 4.324026584625244, "learning_rate": 9.289316737680848e-07, "loss": 0.2561, "step": 11268 }, { "epoch": 0.7698982031837125, "grad_norm": 3.5616672039031982, "learning_rate": 9.284062306522075e-07, "loss": 0.2595, "step": 11269 }, { "epoch": 0.7699665231946438, "grad_norm": 6.5568389892578125, "learning_rate": 9.278809134604567e-07, "loss": 0.2861, "step": 11270 }, { "epoch": 0.7700348432055749, "grad_norm": 3.984511375427246, "learning_rate": 9.27355722218554e-07, "loss": 0.1862, "step": 11271 }, { "epoch": 0.7701031632165061, "grad_norm": 2.90099835395813, "learning_rate": 9.268306569522174e-07, "loss": 0.2663, "step": 11272 }, { "epoch": 0.7701714832274373, "grad_norm": 2.856168746948242, "learning_rate": 9.26305717687158e-07, "loss": 0.1934, "step": 11273 }, { "epoch": 0.7702398032383685, "grad_norm": 4.42318058013916, "learning_rate": 9.257809044490807e-07, "loss": 0.4134, "step": 11274 }, { "epoch": 0.7703081232492998, "grad_norm": 3.8880527019500732, "learning_rate": 9.25256217263684e-07, "loss": 0.2682, "step": 11275 }, { "epoch": 0.7703764432602309, "grad_norm": 3.4852895736694336, "learning_rate": 9.24731656156661e-07, "loss": 0.2643, "step": 11276 }, { "epoch": 0.7704447632711621, "grad_norm": 7.397267818450928, "learning_rate": 9.242072211536977e-07, "loss": 0.2522, "step": 11277 }, { "epoch": 0.7705130832820933, "grad_norm": 3.8704159259796143, "learning_rate": 9.236829122804755e-07, "loss": 0.2303, "step": 11278 }, { "epoch": 0.7705814032930245, "grad_norm": 5.116611957550049, "learning_rate": 9.231587295626661e-07, "loss": 0.2507, "step": 11279 }, { "epoch": 0.7706497233039558, "grad_norm": 4.902030944824219, "learning_rate": 9.226346730259383e-07, "loss": 0.3189, "step": 11280 }, { "epoch": 0.7707180433148869, "grad_norm": 3.783510446548462, "learning_rate": 9.221107426959531e-07, "loss": 0.2926, "step": 11281 }, { "epoch": 0.7707863633258182, "grad_norm": 3.1386749744415283, "learning_rate": 9.215869385983689e-07, "loss": 0.2317, "step": 11282 }, { "epoch": 0.7708546833367493, "grad_norm": 3.0037198066711426, "learning_rate": 9.21063260758832e-07, "loss": 0.2389, "step": 11283 }, { "epoch": 0.7709230033476805, "grad_norm": 4.1581645011901855, "learning_rate": 9.205397092029865e-07, "loss": 0.358, "step": 11284 }, { "epoch": 0.7709913233586118, "grad_norm": 4.111618518829346, "learning_rate": 9.200162839564688e-07, "loss": 0.2612, "step": 11285 }, { "epoch": 0.7710596433695429, "grad_norm": 5.07423734664917, "learning_rate": 9.194929850449117e-07, "loss": 0.3896, "step": 11286 }, { "epoch": 0.7711279633804742, "grad_norm": 3.586512804031372, "learning_rate": 9.189698124939359e-07, "loss": 0.2136, "step": 11287 }, { "epoch": 0.7711962833914053, "grad_norm": 4.4560136795043945, "learning_rate": 9.18446766329163e-07, "loss": 0.3673, "step": 11288 }, { "epoch": 0.7712646034023365, "grad_norm": 3.9311580657958984, "learning_rate": 9.17923846576204e-07, "loss": 0.195, "step": 11289 }, { "epoch": 0.7713329234132678, "grad_norm": 3.6105380058288574, "learning_rate": 9.174010532606665e-07, "loss": 0.216, "step": 11290 }, { "epoch": 0.7714012434241989, "grad_norm": 3.570741653442383, "learning_rate": 9.168783864081479e-07, "loss": 0.2705, "step": 11291 }, { "epoch": 0.7714695634351302, "grad_norm": 4.239686012268066, "learning_rate": 9.163558460442426e-07, "loss": 0.278, "step": 11292 }, { "epoch": 0.7715378834460613, "grad_norm": 3.7708592414855957, "learning_rate": 9.158334321945386e-07, "loss": 0.3188, "step": 11293 }, { "epoch": 0.7716062034569926, "grad_norm": 3.343890905380249, "learning_rate": 9.153111448846165e-07, "loss": 0.2206, "step": 11294 }, { "epoch": 0.7716745234679238, "grad_norm": 2.443161964416504, "learning_rate": 9.147889841400517e-07, "loss": 0.1714, "step": 11295 }, { "epoch": 0.7717428434788549, "grad_norm": 3.9837241172790527, "learning_rate": 9.14266949986413e-07, "loss": 0.2003, "step": 11296 }, { "epoch": 0.7718111634897862, "grad_norm": 3.9229509830474854, "learning_rate": 9.137450424492627e-07, "loss": 0.2461, "step": 11297 }, { "epoch": 0.7718794835007173, "grad_norm": 4.254953861236572, "learning_rate": 9.132232615541587e-07, "loss": 0.2168, "step": 11298 }, { "epoch": 0.7719478035116486, "grad_norm": 3.6913139820098877, "learning_rate": 9.127016073266488e-07, "loss": 0.2721, "step": 11299 }, { "epoch": 0.7720161235225798, "grad_norm": 3.4829180240631104, "learning_rate": 9.121800797922786e-07, "loss": 0.2962, "step": 11300 }, { "epoch": 0.7720844435335109, "grad_norm": 3.6020681858062744, "learning_rate": 9.116586789765842e-07, "loss": 0.288, "step": 11301 }, { "epoch": 0.7721527635444422, "grad_norm": 4.8792805671691895, "learning_rate": 9.111374049051007e-07, "loss": 0.3071, "step": 11302 }, { "epoch": 0.7722210835553733, "grad_norm": 3.8103079795837402, "learning_rate": 9.106162576033505e-07, "loss": 0.2689, "step": 11303 }, { "epoch": 0.7722894035663046, "grad_norm": 2.966658592224121, "learning_rate": 9.100952370968533e-07, "loss": 0.2602, "step": 11304 }, { "epoch": 0.7723577235772358, "grad_norm": 4.403379440307617, "learning_rate": 9.095743434111226e-07, "loss": 0.3045, "step": 11305 }, { "epoch": 0.772426043588167, "grad_norm": 4.917736053466797, "learning_rate": 9.090535765716651e-07, "loss": 0.2318, "step": 11306 }, { "epoch": 0.7724943635990982, "grad_norm": 6.022473335266113, "learning_rate": 9.085329366039817e-07, "loss": 0.2519, "step": 11307 }, { "epoch": 0.7725626836100293, "grad_norm": 3.5214221477508545, "learning_rate": 9.080124235335658e-07, "loss": 0.278, "step": 11308 }, { "epoch": 0.7726310036209606, "grad_norm": 6.202938556671143, "learning_rate": 9.074920373859065e-07, "loss": 0.2647, "step": 11309 }, { "epoch": 0.7726993236318918, "grad_norm": 3.150636672973633, "learning_rate": 9.069717781864852e-07, "loss": 0.1946, "step": 11310 }, { "epoch": 0.772767643642823, "grad_norm": 3.5478386878967285, "learning_rate": 9.064516459607787e-07, "loss": 0.2366, "step": 11311 }, { "epoch": 0.7728359636537542, "grad_norm": 3.3895628452301025, "learning_rate": 9.059316407342547e-07, "loss": 0.2568, "step": 11312 }, { "epoch": 0.7729042836646853, "grad_norm": 4.09794282913208, "learning_rate": 9.054117625323773e-07, "loss": 0.268, "step": 11313 }, { "epoch": 0.7729726036756166, "grad_norm": 3.766026496887207, "learning_rate": 9.048920113806023e-07, "loss": 0.3082, "step": 11314 }, { "epoch": 0.7730409236865478, "grad_norm": 4.008050918579102, "learning_rate": 9.043723873043843e-07, "loss": 0.2922, "step": 11315 }, { "epoch": 0.773109243697479, "grad_norm": 2.7190258502960205, "learning_rate": 9.038528903291643e-07, "loss": 0.2785, "step": 11316 }, { "epoch": 0.7731775637084102, "grad_norm": 4.112956523895264, "learning_rate": 9.033335204803816e-07, "loss": 0.2901, "step": 11317 }, { "epoch": 0.7732458837193414, "grad_norm": 6.069885730743408, "learning_rate": 9.028142777834686e-07, "loss": 0.4841, "step": 11318 }, { "epoch": 0.7733142037302726, "grad_norm": 4.252133846282959, "learning_rate": 9.022951622638526e-07, "loss": 0.2563, "step": 11319 }, { "epoch": 0.7733825237412038, "grad_norm": 3.9849042892456055, "learning_rate": 9.017761739469496e-07, "loss": 0.3018, "step": 11320 }, { "epoch": 0.773450843752135, "grad_norm": 5.118330001831055, "learning_rate": 9.012573128581764e-07, "loss": 0.2699, "step": 11321 }, { "epoch": 0.7735191637630662, "grad_norm": 3.7757885456085205, "learning_rate": 9.007385790229396e-07, "loss": 0.2542, "step": 11322 }, { "epoch": 0.7735874837739974, "grad_norm": 4.4551496505737305, "learning_rate": 9.002199724666406e-07, "loss": 0.3095, "step": 11323 }, { "epoch": 0.7736558037849286, "grad_norm": 3.69624400138855, "learning_rate": 8.997014932146729e-07, "loss": 0.3283, "step": 11324 }, { "epoch": 0.7737241237958598, "grad_norm": 3.0262885093688965, "learning_rate": 8.99183141292425e-07, "loss": 0.2081, "step": 11325 }, { "epoch": 0.773792443806791, "grad_norm": 4.344710826873779, "learning_rate": 8.986649167252803e-07, "loss": 0.2773, "step": 11326 }, { "epoch": 0.7738607638177222, "grad_norm": 4.04285192489624, "learning_rate": 8.981468195386144e-07, "loss": 0.2741, "step": 11327 }, { "epoch": 0.7739290838286534, "grad_norm": 5.208704948425293, "learning_rate": 8.976288497577968e-07, "loss": 0.2407, "step": 11328 }, { "epoch": 0.7739974038395846, "grad_norm": 4.170088768005371, "learning_rate": 8.971110074081918e-07, "loss": 0.2511, "step": 11329 }, { "epoch": 0.7740657238505159, "grad_norm": 3.8257386684417725, "learning_rate": 8.965932925151564e-07, "loss": 0.243, "step": 11330 }, { "epoch": 0.774134043861447, "grad_norm": 4.297859191894531, "learning_rate": 8.960757051040429e-07, "loss": 0.2944, "step": 11331 }, { "epoch": 0.7742023638723782, "grad_norm": 3.767549514770508, "learning_rate": 8.955582452001938e-07, "loss": 0.2201, "step": 11332 }, { "epoch": 0.7742706838833094, "grad_norm": 3.4926509857177734, "learning_rate": 8.950409128289494e-07, "loss": 0.2522, "step": 11333 }, { "epoch": 0.7743390038942406, "grad_norm": 4.165724277496338, "learning_rate": 8.945237080156401e-07, "loss": 0.2527, "step": 11334 }, { "epoch": 0.7744073239051719, "grad_norm": 6.7410383224487305, "learning_rate": 8.940066307855961e-07, "loss": 0.3817, "step": 11335 }, { "epoch": 0.774475643916103, "grad_norm": 4.903865337371826, "learning_rate": 8.934896811641338e-07, "loss": 0.2961, "step": 11336 }, { "epoch": 0.7745439639270342, "grad_norm": 4.771302700042725, "learning_rate": 8.929728591765679e-07, "loss": 0.2955, "step": 11337 }, { "epoch": 0.7746122839379654, "grad_norm": 4.529160499572754, "learning_rate": 8.924561648482058e-07, "loss": 0.2243, "step": 11338 }, { "epoch": 0.7746806039488966, "grad_norm": 4.329599380493164, "learning_rate": 8.919395982043494e-07, "loss": 0.2182, "step": 11339 }, { "epoch": 0.7747489239598279, "grad_norm": 4.242663860321045, "learning_rate": 8.914231592702911e-07, "loss": 0.2602, "step": 11340 }, { "epoch": 0.774817243970759, "grad_norm": 3.1786675453186035, "learning_rate": 8.90906848071322e-07, "loss": 0.2114, "step": 11341 }, { "epoch": 0.7748855639816903, "grad_norm": 3.516385555267334, "learning_rate": 8.903906646327241e-07, "loss": 0.2429, "step": 11342 }, { "epoch": 0.7749538839926214, "grad_norm": 4.313797473907471, "learning_rate": 8.898746089797741e-07, "loss": 0.3251, "step": 11343 }, { "epoch": 0.7750222040035526, "grad_norm": 3.7133119106292725, "learning_rate": 8.893586811377399e-07, "loss": 0.3436, "step": 11344 }, { "epoch": 0.7750905240144839, "grad_norm": 4.156843662261963, "learning_rate": 8.888428811318863e-07, "loss": 0.2027, "step": 11345 }, { "epoch": 0.775158844025415, "grad_norm": 3.1116268634796143, "learning_rate": 8.883272089874706e-07, "loss": 0.2218, "step": 11346 }, { "epoch": 0.7752271640363463, "grad_norm": 2.7577130794525146, "learning_rate": 8.878116647297436e-07, "loss": 0.2213, "step": 11347 }, { "epoch": 0.7752954840472774, "grad_norm": 4.920604228973389, "learning_rate": 8.872962483839508e-07, "loss": 0.3602, "step": 11348 }, { "epoch": 0.7753638040582086, "grad_norm": 3.7180593013763428, "learning_rate": 8.867809599753303e-07, "loss": 0.2584, "step": 11349 }, { "epoch": 0.7754321240691399, "grad_norm": 3.7344634532928467, "learning_rate": 8.86265799529114e-07, "loss": 0.2807, "step": 11350 }, { "epoch": 0.775500444080071, "grad_norm": 3.0603384971618652, "learning_rate": 8.8575076707053e-07, "loss": 0.1513, "step": 11351 }, { "epoch": 0.7755687640910023, "grad_norm": 4.340585231781006, "learning_rate": 8.85235862624795e-07, "loss": 0.317, "step": 11352 }, { "epoch": 0.7756370841019334, "grad_norm": 5.6500420570373535, "learning_rate": 8.847210862171235e-07, "loss": 0.2418, "step": 11353 }, { "epoch": 0.7757054041128647, "grad_norm": 4.855808734893799, "learning_rate": 8.842064378727237e-07, "loss": 0.279, "step": 11354 }, { "epoch": 0.7757737241237959, "grad_norm": 3.7018063068389893, "learning_rate": 8.836919176167975e-07, "loss": 0.239, "step": 11355 }, { "epoch": 0.775842044134727, "grad_norm": 4.642219066619873, "learning_rate": 8.831775254745372e-07, "loss": 0.2727, "step": 11356 }, { "epoch": 0.7759103641456583, "grad_norm": 3.9199435710906982, "learning_rate": 8.826632614711319e-07, "loss": 0.2817, "step": 11357 }, { "epoch": 0.7759786841565894, "grad_norm": 5.386734485626221, "learning_rate": 8.82149125631764e-07, "loss": 0.305, "step": 11358 }, { "epoch": 0.7760470041675207, "grad_norm": 4.2372660636901855, "learning_rate": 8.816351179816098e-07, "loss": 0.2693, "step": 11359 }, { "epoch": 0.7761153241784519, "grad_norm": 3.4215474128723145, "learning_rate": 8.811212385458381e-07, "loss": 0.2026, "step": 11360 }, { "epoch": 0.776183644189383, "grad_norm": 3.5935120582580566, "learning_rate": 8.806074873496129e-07, "loss": 0.2251, "step": 11361 }, { "epoch": 0.7762519642003143, "grad_norm": 3.2515151500701904, "learning_rate": 8.800938644180909e-07, "loss": 0.1967, "step": 11362 }, { "epoch": 0.7763202842112454, "grad_norm": 3.7982444763183594, "learning_rate": 8.79580369776424e-07, "loss": 0.3281, "step": 11363 }, { "epoch": 0.7763886042221767, "grad_norm": 3.899780511856079, "learning_rate": 8.790670034497544e-07, "loss": 0.2555, "step": 11364 }, { "epoch": 0.7764569242331079, "grad_norm": 4.850103855133057, "learning_rate": 8.785537654632214e-07, "loss": 0.3753, "step": 11365 }, { "epoch": 0.7765252442440391, "grad_norm": 4.270620346069336, "learning_rate": 8.780406558419572e-07, "loss": 0.3056, "step": 11366 }, { "epoch": 0.7765935642549703, "grad_norm": 3.6333658695220947, "learning_rate": 8.775276746110872e-07, "loss": 0.1937, "step": 11367 }, { "epoch": 0.7766618842659014, "grad_norm": 4.958290100097656, "learning_rate": 8.770148217957303e-07, "loss": 0.289, "step": 11368 }, { "epoch": 0.7767302042768327, "grad_norm": 4.505704879760742, "learning_rate": 8.765020974210001e-07, "loss": 0.3494, "step": 11369 }, { "epoch": 0.7767985242877639, "grad_norm": 3.4758803844451904, "learning_rate": 8.759895015120037e-07, "loss": 0.2199, "step": 11370 }, { "epoch": 0.7768668442986951, "grad_norm": 4.248950958251953, "learning_rate": 8.754770340938416e-07, "loss": 0.2051, "step": 11371 }, { "epoch": 0.7769351643096263, "grad_norm": 4.544951915740967, "learning_rate": 8.749646951916064e-07, "loss": 0.3961, "step": 11372 }, { "epoch": 0.7770034843205574, "grad_norm": 4.901768207550049, "learning_rate": 8.744524848303865e-07, "loss": 0.2968, "step": 11373 }, { "epoch": 0.7770718043314887, "grad_norm": 4.478786468505859, "learning_rate": 8.739404030352646e-07, "loss": 0.2005, "step": 11374 }, { "epoch": 0.7771401243424199, "grad_norm": 2.901214122772217, "learning_rate": 8.734284498313154e-07, "loss": 0.2705, "step": 11375 }, { "epoch": 0.7772084443533511, "grad_norm": 4.152034759521484, "learning_rate": 8.729166252436087e-07, "loss": 0.3199, "step": 11376 }, { "epoch": 0.7772767643642823, "grad_norm": 3.767096996307373, "learning_rate": 8.724049292972057e-07, "loss": 0.2467, "step": 11377 }, { "epoch": 0.7773450843752135, "grad_norm": 4.505956172943115, "learning_rate": 8.718933620171636e-07, "loss": 0.3303, "step": 11378 }, { "epoch": 0.7774134043861447, "grad_norm": 4.850427150726318, "learning_rate": 8.713819234285319e-07, "loss": 0.2808, "step": 11379 }, { "epoch": 0.7774817243970759, "grad_norm": 4.623610019683838, "learning_rate": 8.708706135563552e-07, "loss": 0.2652, "step": 11380 }, { "epoch": 0.7775500444080071, "grad_norm": 2.6851511001586914, "learning_rate": 8.703594324256706e-07, "loss": 0.2029, "step": 11381 }, { "epoch": 0.7776183644189383, "grad_norm": 4.460541248321533, "learning_rate": 8.698483800615089e-07, "loss": 0.2754, "step": 11382 }, { "epoch": 0.7776866844298695, "grad_norm": 2.9478087425231934, "learning_rate": 8.693374564888958e-07, "loss": 0.2244, "step": 11383 }, { "epoch": 0.7777550044408007, "grad_norm": 3.5071616172790527, "learning_rate": 8.688266617328504e-07, "loss": 0.2422, "step": 11384 }, { "epoch": 0.777823324451732, "grad_norm": 4.155479907989502, "learning_rate": 8.683159958183832e-07, "loss": 0.3306, "step": 11385 }, { "epoch": 0.7778916444626631, "grad_norm": 3.869779586791992, "learning_rate": 8.678054587705007e-07, "loss": 0.3207, "step": 11386 }, { "epoch": 0.7779599644735943, "grad_norm": 3.449965000152588, "learning_rate": 8.67295050614202e-07, "loss": 0.2604, "step": 11387 }, { "epoch": 0.7780282844845255, "grad_norm": 3.530332565307617, "learning_rate": 8.66784771374483e-07, "loss": 0.2689, "step": 11388 }, { "epoch": 0.7780966044954567, "grad_norm": 3.752535343170166, "learning_rate": 8.662746210763283e-07, "loss": 0.2398, "step": 11389 }, { "epoch": 0.778164924506388, "grad_norm": 4.219845294952393, "learning_rate": 8.657645997447194e-07, "loss": 0.2049, "step": 11390 }, { "epoch": 0.7782332445173191, "grad_norm": 4.065384864807129, "learning_rate": 8.652547074046303e-07, "loss": 0.2714, "step": 11391 }, { "epoch": 0.7783015645282503, "grad_norm": 3.6894328594207764, "learning_rate": 8.647449440810303e-07, "loss": 0.2471, "step": 11392 }, { "epoch": 0.7783698845391815, "grad_norm": 3.9276087284088135, "learning_rate": 8.642353097988784e-07, "loss": 0.2189, "step": 11393 }, { "epoch": 0.7784382045501127, "grad_norm": 2.5192861557006836, "learning_rate": 8.637258045831327e-07, "loss": 0.2121, "step": 11394 }, { "epoch": 0.778506524561044, "grad_norm": 3.399848699569702, "learning_rate": 8.632164284587419e-07, "loss": 0.2405, "step": 11395 }, { "epoch": 0.7785748445719751, "grad_norm": 4.660792827606201, "learning_rate": 8.627071814506491e-07, "loss": 0.2033, "step": 11396 }, { "epoch": 0.7786431645829064, "grad_norm": 3.842453718185425, "learning_rate": 8.62198063583789e-07, "loss": 0.3365, "step": 11397 }, { "epoch": 0.7787114845938375, "grad_norm": 4.4002180099487305, "learning_rate": 8.616890748830928e-07, "loss": 0.2416, "step": 11398 }, { "epoch": 0.7787798046047687, "grad_norm": 3.307168483734131, "learning_rate": 8.611802153734843e-07, "loss": 0.2024, "step": 11399 }, { "epoch": 0.7788481246157, "grad_norm": 3.8343684673309326, "learning_rate": 8.606714850798809e-07, "loss": 0.3477, "step": 11400 }, { "epoch": 0.7789164446266311, "grad_norm": 3.707331657409668, "learning_rate": 8.601628840271942e-07, "loss": 0.2023, "step": 11401 }, { "epoch": 0.7789847646375624, "grad_norm": 3.035172700881958, "learning_rate": 8.596544122403288e-07, "loss": 0.2075, "step": 11402 }, { "epoch": 0.7790530846484935, "grad_norm": 3.3756978511810303, "learning_rate": 8.591460697441828e-07, "loss": 0.2351, "step": 11403 }, { "epoch": 0.7791214046594247, "grad_norm": 3.2402536869049072, "learning_rate": 8.586378565636502e-07, "loss": 0.2211, "step": 11404 }, { "epoch": 0.779189724670356, "grad_norm": 4.813853740692139, "learning_rate": 8.581297727236139e-07, "loss": 0.3901, "step": 11405 }, { "epoch": 0.7792580446812871, "grad_norm": 3.9302284717559814, "learning_rate": 8.576218182489542e-07, "loss": 0.2761, "step": 11406 }, { "epoch": 0.7793263646922184, "grad_norm": 4.7409987449646, "learning_rate": 8.571139931645462e-07, "loss": 0.3389, "step": 11407 }, { "epoch": 0.7793946847031495, "grad_norm": 4.498868465423584, "learning_rate": 8.566062974952563e-07, "loss": 0.2746, "step": 11408 }, { "epoch": 0.7794630047140808, "grad_norm": 4.428112506866455, "learning_rate": 8.56098731265943e-07, "loss": 0.3163, "step": 11409 }, { "epoch": 0.779531324725012, "grad_norm": 4.284769535064697, "learning_rate": 8.555912945014621e-07, "loss": 0.3185, "step": 11410 }, { "epoch": 0.7795996447359431, "grad_norm": 4.628399848937988, "learning_rate": 8.550839872266612e-07, "loss": 0.2915, "step": 11411 }, { "epoch": 0.7796679647468744, "grad_norm": 3.8434720039367676, "learning_rate": 8.545768094663811e-07, "loss": 0.2098, "step": 11412 }, { "epoch": 0.7797362847578055, "grad_norm": 6.230965614318848, "learning_rate": 8.540697612454578e-07, "loss": 0.3186, "step": 11413 }, { "epoch": 0.7798046047687368, "grad_norm": 3.8452258110046387, "learning_rate": 8.535628425887198e-07, "loss": 0.3183, "step": 11414 }, { "epoch": 0.779872924779668, "grad_norm": 3.1186792850494385, "learning_rate": 8.530560535209896e-07, "loss": 0.1242, "step": 11415 }, { "epoch": 0.7799412447905991, "grad_norm": 4.849484920501709, "learning_rate": 8.525493940670845e-07, "loss": 0.4081, "step": 11416 }, { "epoch": 0.7800095648015304, "grad_norm": 2.6428210735321045, "learning_rate": 8.520428642518114e-07, "loss": 0.1691, "step": 11417 }, { "epoch": 0.7800778848124615, "grad_norm": 4.3668212890625, "learning_rate": 8.515364640999761e-07, "loss": 0.1862, "step": 11418 }, { "epoch": 0.7801462048233928, "grad_norm": 3.8442981243133545, "learning_rate": 8.510301936363744e-07, "loss": 0.3153, "step": 11419 }, { "epoch": 0.780214524834324, "grad_norm": 4.817015647888184, "learning_rate": 8.50524052885798e-07, "loss": 0.225, "step": 11420 }, { "epoch": 0.7802828448452552, "grad_norm": 4.707502365112305, "learning_rate": 8.500180418730304e-07, "loss": 0.3238, "step": 11421 }, { "epoch": 0.7803511648561864, "grad_norm": 2.5894317626953125, "learning_rate": 8.495121606228506e-07, "loss": 0.1426, "step": 11422 }, { "epoch": 0.7804194848671175, "grad_norm": 4.246269226074219, "learning_rate": 8.490064091600291e-07, "loss": 0.2802, "step": 11423 }, { "epoch": 0.7804878048780488, "grad_norm": 4.363049507141113, "learning_rate": 8.485007875093335e-07, "loss": 0.2808, "step": 11424 }, { "epoch": 0.78055612488898, "grad_norm": 3.2961056232452393, "learning_rate": 8.479952956955195e-07, "loss": 0.2804, "step": 11425 }, { "epoch": 0.7806244448999112, "grad_norm": 5.242316246032715, "learning_rate": 8.474899337433404e-07, "loss": 0.2983, "step": 11426 }, { "epoch": 0.7806927649108424, "grad_norm": 4.469101428985596, "learning_rate": 8.469847016775446e-07, "loss": 0.3319, "step": 11427 }, { "epoch": 0.7807610849217735, "grad_norm": 3.6840012073516846, "learning_rate": 8.464795995228715e-07, "loss": 0.2517, "step": 11428 }, { "epoch": 0.7808294049327048, "grad_norm": 3.7959136962890625, "learning_rate": 8.459746273040527e-07, "loss": 0.2352, "step": 11429 }, { "epoch": 0.780897724943636, "grad_norm": 4.663221836090088, "learning_rate": 8.454697850458165e-07, "loss": 0.3491, "step": 11430 }, { "epoch": 0.7809660449545672, "grad_norm": 4.074378490447998, "learning_rate": 8.449650727728839e-07, "loss": 0.3668, "step": 11431 }, { "epoch": 0.7810343649654984, "grad_norm": 3.060262441635132, "learning_rate": 8.444604905099686e-07, "loss": 0.1777, "step": 11432 }, { "epoch": 0.7811026849764297, "grad_norm": 5.770873069763184, "learning_rate": 8.439560382817793e-07, "loss": 0.339, "step": 11433 }, { "epoch": 0.7811710049873608, "grad_norm": 3.6053857803344727, "learning_rate": 8.434517161130176e-07, "loss": 0.2839, "step": 11434 }, { "epoch": 0.781239324998292, "grad_norm": 4.099149703979492, "learning_rate": 8.429475240283787e-07, "loss": 0.2765, "step": 11435 }, { "epoch": 0.7813076450092232, "grad_norm": 3.3246536254882812, "learning_rate": 8.424434620525524e-07, "loss": 0.2751, "step": 11436 }, { "epoch": 0.7813759650201544, "grad_norm": 5.739389896392822, "learning_rate": 8.419395302102195e-07, "loss": 0.2723, "step": 11437 }, { "epoch": 0.7814442850310857, "grad_norm": 3.946199655532837, "learning_rate": 8.41435728526057e-07, "loss": 0.2865, "step": 11438 }, { "epoch": 0.7815126050420168, "grad_norm": 5.267997741699219, "learning_rate": 8.409320570247353e-07, "loss": 0.3515, "step": 11439 }, { "epoch": 0.781580925052948, "grad_norm": 4.170090198516846, "learning_rate": 8.404285157309161e-07, "loss": 0.2589, "step": 11440 }, { "epoch": 0.7816492450638792, "grad_norm": 3.068845510482788, "learning_rate": 8.399251046692598e-07, "loss": 0.2892, "step": 11441 }, { "epoch": 0.7817175650748104, "grad_norm": 4.372420787811279, "learning_rate": 8.394218238644143e-07, "loss": 0.3892, "step": 11442 }, { "epoch": 0.7817858850857416, "grad_norm": 4.491340637207031, "learning_rate": 8.389186733410247e-07, "loss": 0.2159, "step": 11443 }, { "epoch": 0.7818542050966728, "grad_norm": 5.053823947906494, "learning_rate": 8.384156531237289e-07, "loss": 0.3121, "step": 11444 }, { "epoch": 0.7819225251076041, "grad_norm": 4.7809624671936035, "learning_rate": 8.379127632371588e-07, "loss": 0.2616, "step": 11445 }, { "epoch": 0.7819908451185352, "grad_norm": 4.833752155303955, "learning_rate": 8.37410003705939e-07, "loss": 0.2669, "step": 11446 }, { "epoch": 0.7820591651294664, "grad_norm": 4.959475994110107, "learning_rate": 8.36907374554689e-07, "loss": 0.3196, "step": 11447 }, { "epoch": 0.7821274851403976, "grad_norm": 4.719292163848877, "learning_rate": 8.364048758080206e-07, "loss": 0.2501, "step": 11448 }, { "epoch": 0.7821958051513288, "grad_norm": 2.8358967304229736, "learning_rate": 8.359025074905408e-07, "loss": 0.1549, "step": 11449 }, { "epoch": 0.7822641251622601, "grad_norm": 3.27809476852417, "learning_rate": 8.354002696268479e-07, "loss": 0.2775, "step": 11450 }, { "epoch": 0.7823324451731912, "grad_norm": 3.340796709060669, "learning_rate": 8.348981622415362e-07, "loss": 0.2771, "step": 11451 }, { "epoch": 0.7824007651841224, "grad_norm": 5.600986480712891, "learning_rate": 8.343961853591916e-07, "loss": 0.3407, "step": 11452 }, { "epoch": 0.7824690851950536, "grad_norm": 4.625959873199463, "learning_rate": 8.338943390043952e-07, "loss": 0.3734, "step": 11453 }, { "epoch": 0.7825374052059848, "grad_norm": 4.66489315032959, "learning_rate": 8.333926232017212e-07, "loss": 0.3127, "step": 11454 }, { "epoch": 0.7826057252169161, "grad_norm": 5.297080039978027, "learning_rate": 8.32891037975737e-07, "loss": 0.3464, "step": 11455 }, { "epoch": 0.7826740452278472, "grad_norm": 5.057108402252197, "learning_rate": 8.323895833510039e-07, "loss": 0.2606, "step": 11456 }, { "epoch": 0.7827423652387785, "grad_norm": 4.4543633460998535, "learning_rate": 8.318882593520778e-07, "loss": 0.3056, "step": 11457 }, { "epoch": 0.7828106852497096, "grad_norm": 3.691582679748535, "learning_rate": 8.313870660035056e-07, "loss": 0.3289, "step": 11458 }, { "epoch": 0.7828790052606408, "grad_norm": 5.6386823654174805, "learning_rate": 8.308860033298289e-07, "loss": 0.2651, "step": 11459 }, { "epoch": 0.7829473252715721, "grad_norm": 2.9562430381774902, "learning_rate": 8.30385071355586e-07, "loss": 0.1902, "step": 11460 }, { "epoch": 0.7830156452825032, "grad_norm": 4.218028545379639, "learning_rate": 8.298842701053056e-07, "loss": 0.2023, "step": 11461 }, { "epoch": 0.7830839652934345, "grad_norm": 3.7390074729919434, "learning_rate": 8.293835996035087e-07, "loss": 0.2614, "step": 11462 }, { "epoch": 0.7831522853043656, "grad_norm": 3.5966591835021973, "learning_rate": 8.288830598747133e-07, "loss": 0.3139, "step": 11463 }, { "epoch": 0.7832206053152968, "grad_norm": 4.1331892013549805, "learning_rate": 8.283826509434293e-07, "loss": 0.2477, "step": 11464 }, { "epoch": 0.7832889253262281, "grad_norm": 3.8989968299865723, "learning_rate": 8.278823728341598e-07, "loss": 0.2393, "step": 11465 }, { "epoch": 0.7833572453371592, "grad_norm": 3.916428565979004, "learning_rate": 8.273822255714025e-07, "loss": 0.1952, "step": 11466 }, { "epoch": 0.7834255653480905, "grad_norm": 3.9179024696350098, "learning_rate": 8.268822091796487e-07, "loss": 0.2336, "step": 11467 }, { "epoch": 0.7834938853590216, "grad_norm": 4.834298133850098, "learning_rate": 8.263823236833823e-07, "loss": 0.3566, "step": 11468 }, { "epoch": 0.7835622053699529, "grad_norm": 3.9061057567596436, "learning_rate": 8.258825691070825e-07, "loss": 0.2471, "step": 11469 }, { "epoch": 0.7836305253808841, "grad_norm": 4.040836811065674, "learning_rate": 8.253829454752194e-07, "loss": 0.2887, "step": 11470 }, { "epoch": 0.7836988453918152, "grad_norm": 4.339016437530518, "learning_rate": 8.248834528122588e-07, "loss": 0.375, "step": 11471 }, { "epoch": 0.7837671654027465, "grad_norm": 3.9904661178588867, "learning_rate": 8.243840911426595e-07, "loss": 0.1877, "step": 11472 }, { "epoch": 0.7838354854136776, "grad_norm": 3.898679733276367, "learning_rate": 8.238848604908739e-07, "loss": 0.3485, "step": 11473 }, { "epoch": 0.7839038054246089, "grad_norm": 3.672804355621338, "learning_rate": 8.233857608813483e-07, "loss": 0.1771, "step": 11474 }, { "epoch": 0.7839721254355401, "grad_norm": 4.285305976867676, "learning_rate": 8.228867923385222e-07, "loss": 0.2731, "step": 11475 }, { "epoch": 0.7840404454464712, "grad_norm": 3.8064448833465576, "learning_rate": 8.223879548868282e-07, "loss": 0.2499, "step": 11476 }, { "epoch": 0.7841087654574025, "grad_norm": 5.4290618896484375, "learning_rate": 8.218892485506952e-07, "loss": 0.2722, "step": 11477 }, { "epoch": 0.7841770854683336, "grad_norm": 3.8778600692749023, "learning_rate": 8.213906733545406e-07, "loss": 0.3266, "step": 11478 }, { "epoch": 0.7842454054792649, "grad_norm": 4.426769256591797, "learning_rate": 8.208922293227782e-07, "loss": 0.2259, "step": 11479 }, { "epoch": 0.7843137254901961, "grad_norm": 3.9030704498291016, "learning_rate": 8.203939164798184e-07, "loss": 0.3157, "step": 11480 }, { "epoch": 0.7843820455011273, "grad_norm": 5.098936557769775, "learning_rate": 8.198957348500614e-07, "loss": 0.2744, "step": 11481 }, { "epoch": 0.7844503655120585, "grad_norm": 4.39483642578125, "learning_rate": 8.193976844579001e-07, "loss": 0.2889, "step": 11482 }, { "epoch": 0.7845186855229896, "grad_norm": 4.5664262771606445, "learning_rate": 8.188997653277237e-07, "loss": 0.3086, "step": 11483 }, { "epoch": 0.7845870055339209, "grad_norm": 3.1163382530212402, "learning_rate": 8.184019774839143e-07, "loss": 0.2068, "step": 11484 }, { "epoch": 0.7846553255448521, "grad_norm": 5.102339744567871, "learning_rate": 8.179043209508466e-07, "loss": 0.2886, "step": 11485 }, { "epoch": 0.7847236455557833, "grad_norm": 3.6941354274749756, "learning_rate": 8.1740679575289e-07, "loss": 0.2006, "step": 11486 }, { "epoch": 0.7847919655667145, "grad_norm": 4.582832336425781, "learning_rate": 8.16909401914407e-07, "loss": 0.3401, "step": 11487 }, { "epoch": 0.7848602855776456, "grad_norm": 2.9065589904785156, "learning_rate": 8.164121394597534e-07, "loss": 0.1961, "step": 11488 }, { "epoch": 0.7849286055885769, "grad_norm": 3.7033746242523193, "learning_rate": 8.159150084132802e-07, "loss": 0.2144, "step": 11489 }, { "epoch": 0.7849969255995081, "grad_norm": 5.263885974884033, "learning_rate": 8.15418008799328e-07, "loss": 0.3001, "step": 11490 }, { "epoch": 0.7850652456104393, "grad_norm": 3.535487413406372, "learning_rate": 8.149211406422352e-07, "loss": 0.1939, "step": 11491 }, { "epoch": 0.7851335656213705, "grad_norm": 3.265073537826538, "learning_rate": 8.144244039663308e-07, "loss": 0.3366, "step": 11492 }, { "epoch": 0.7852018856323018, "grad_norm": 2.940185070037842, "learning_rate": 8.139277987959413e-07, "loss": 0.1652, "step": 11493 }, { "epoch": 0.7852702056432329, "grad_norm": 3.6761794090270996, "learning_rate": 8.134313251553821e-07, "loss": 0.2407, "step": 11494 }, { "epoch": 0.7853385256541641, "grad_norm": 7.098692893981934, "learning_rate": 8.129349830689644e-07, "loss": 0.2716, "step": 11495 }, { "epoch": 0.7854068456650953, "grad_norm": 3.8183465003967285, "learning_rate": 8.124387725609927e-07, "loss": 0.1895, "step": 11496 }, { "epoch": 0.7854751656760265, "grad_norm": 6.955235958099365, "learning_rate": 8.119426936557666e-07, "loss": 0.3567, "step": 11497 }, { "epoch": 0.7855434856869578, "grad_norm": 4.284249782562256, "learning_rate": 8.114467463775748e-07, "loss": 0.3335, "step": 11498 }, { "epoch": 0.7856118056978889, "grad_norm": 4.319952964782715, "learning_rate": 8.109509307507048e-07, "loss": 0.2239, "step": 11499 }, { "epoch": 0.7856801257088201, "grad_norm": 2.9696145057678223, "learning_rate": 8.104552467994354e-07, "loss": 0.2435, "step": 11500 }, { "epoch": 0.7857484457197513, "grad_norm": 3.080996036529541, "learning_rate": 8.099596945480391e-07, "loss": 0.2431, "step": 11501 }, { "epoch": 0.7858167657306825, "grad_norm": 3.5845134258270264, "learning_rate": 8.094642740207799e-07, "loss": 0.3143, "step": 11502 }, { "epoch": 0.7858850857416138, "grad_norm": 4.415707588195801, "learning_rate": 8.089689852419186e-07, "loss": 0.2373, "step": 11503 }, { "epoch": 0.7859534057525449, "grad_norm": 3.553010940551758, "learning_rate": 8.084738282357081e-07, "loss": 0.2724, "step": 11504 }, { "epoch": 0.7860217257634762, "grad_norm": 4.720917701721191, "learning_rate": 8.079788030263947e-07, "loss": 0.3053, "step": 11505 }, { "epoch": 0.7860900457744073, "grad_norm": 3.3297948837280273, "learning_rate": 8.074839096382183e-07, "loss": 0.2357, "step": 11506 }, { "epoch": 0.7861583657853385, "grad_norm": 3.186896800994873, "learning_rate": 8.069891480954132e-07, "loss": 0.1421, "step": 11507 }, { "epoch": 0.7862266857962698, "grad_norm": 4.1905059814453125, "learning_rate": 8.064945184222057e-07, "loss": 0.2491, "step": 11508 }, { "epoch": 0.7862950058072009, "grad_norm": 4.812110424041748, "learning_rate": 8.060000206428174e-07, "loss": 0.2303, "step": 11509 }, { "epoch": 0.7863633258181322, "grad_norm": 3.6703577041625977, "learning_rate": 8.055056547814627e-07, "loss": 0.2382, "step": 11510 }, { "epoch": 0.7864316458290633, "grad_norm": 4.0824480056762695, "learning_rate": 8.050114208623481e-07, "loss": 0.2678, "step": 11511 }, { "epoch": 0.7864999658399945, "grad_norm": 3.575521469116211, "learning_rate": 8.045173189096747e-07, "loss": 0.1998, "step": 11512 }, { "epoch": 0.7865682858509258, "grad_norm": 3.347921371459961, "learning_rate": 8.040233489476388e-07, "loss": 0.2431, "step": 11513 }, { "epoch": 0.7866366058618569, "grad_norm": 4.019567966461182, "learning_rate": 8.035295110004295e-07, "loss": 0.2758, "step": 11514 }, { "epoch": 0.7867049258727882, "grad_norm": 4.329036235809326, "learning_rate": 8.030358050922267e-07, "loss": 0.2533, "step": 11515 }, { "epoch": 0.7867732458837193, "grad_norm": 4.527327060699463, "learning_rate": 8.025422312472066e-07, "loss": 0.233, "step": 11516 }, { "epoch": 0.7868415658946506, "grad_norm": 3.496168851852417, "learning_rate": 8.02048789489538e-07, "loss": 0.2352, "step": 11517 }, { "epoch": 0.7869098859055818, "grad_norm": 4.6542792320251465, "learning_rate": 8.015554798433839e-07, "loss": 0.2219, "step": 11518 }, { "epoch": 0.7869782059165129, "grad_norm": 3.1941378116607666, "learning_rate": 8.010623023329001e-07, "loss": 0.2595, "step": 11519 }, { "epoch": 0.7870465259274442, "grad_norm": 4.539802074432373, "learning_rate": 8.005692569822365e-07, "loss": 0.3283, "step": 11520 }, { "epoch": 0.7871148459383753, "grad_norm": 5.200253963470459, "learning_rate": 8.000763438155358e-07, "loss": 0.2512, "step": 11521 }, { "epoch": 0.7871831659493066, "grad_norm": 4.419107437133789, "learning_rate": 7.995835628569357e-07, "loss": 0.2153, "step": 11522 }, { "epoch": 0.7872514859602378, "grad_norm": 4.706418037414551, "learning_rate": 7.990909141305646e-07, "loss": 0.3298, "step": 11523 }, { "epoch": 0.7873198059711689, "grad_norm": 4.418118000030518, "learning_rate": 7.985983976605472e-07, "loss": 0.2967, "step": 11524 }, { "epoch": 0.7873881259821002, "grad_norm": 4.831419467926025, "learning_rate": 7.981060134710008e-07, "loss": 0.3077, "step": 11525 }, { "epoch": 0.7874564459930313, "grad_norm": 3.628361225128174, "learning_rate": 7.976137615860358e-07, "loss": 0.1701, "step": 11526 }, { "epoch": 0.7875247660039626, "grad_norm": 7.585418224334717, "learning_rate": 7.971216420297564e-07, "loss": 0.2609, "step": 11527 }, { "epoch": 0.7875930860148938, "grad_norm": 3.856048107147217, "learning_rate": 7.966296548262611e-07, "loss": 0.2469, "step": 11528 }, { "epoch": 0.787661406025825, "grad_norm": 3.58890438079834, "learning_rate": 7.961377999996406e-07, "loss": 0.2554, "step": 11529 }, { "epoch": 0.7877297260367562, "grad_norm": 3.0699400901794434, "learning_rate": 7.956460775739807e-07, "loss": 0.2441, "step": 11530 }, { "epoch": 0.7877980460476873, "grad_norm": 5.182253837585449, "learning_rate": 7.951544875733581e-07, "loss": 0.2646, "step": 11531 }, { "epoch": 0.7878663660586186, "grad_norm": 4.2803053855896, "learning_rate": 7.946630300218447e-07, "loss": 0.2225, "step": 11532 }, { "epoch": 0.7879346860695498, "grad_norm": 3.390397548675537, "learning_rate": 7.941717049435074e-07, "loss": 0.204, "step": 11533 }, { "epoch": 0.788003006080481, "grad_norm": 3.4707906246185303, "learning_rate": 7.936805123624053e-07, "loss": 0.249, "step": 11534 }, { "epoch": 0.7880713260914122, "grad_norm": 3.8597450256347656, "learning_rate": 7.931894523025888e-07, "loss": 0.2592, "step": 11535 }, { "epoch": 0.7881396461023433, "grad_norm": 3.9169483184814453, "learning_rate": 7.926985247881046e-07, "loss": 0.2427, "step": 11536 }, { "epoch": 0.7882079661132746, "grad_norm": 6.011513710021973, "learning_rate": 7.92207729842993e-07, "loss": 0.3036, "step": 11537 }, { "epoch": 0.7882762861242057, "grad_norm": 4.466972351074219, "learning_rate": 7.917170674912858e-07, "loss": 0.2615, "step": 11538 }, { "epoch": 0.788344606135137, "grad_norm": 3.7720866203308105, "learning_rate": 7.912265377570105e-07, "loss": 0.2856, "step": 11539 }, { "epoch": 0.7884129261460682, "grad_norm": 3.3859729766845703, "learning_rate": 7.907361406641861e-07, "loss": 0.1905, "step": 11540 }, { "epoch": 0.7884812461569994, "grad_norm": 3.041574478149414, "learning_rate": 7.90245876236826e-07, "loss": 0.151, "step": 11541 }, { "epoch": 0.7885495661679306, "grad_norm": 4.363929271697998, "learning_rate": 7.89755744498939e-07, "loss": 0.2303, "step": 11542 }, { "epoch": 0.7886178861788617, "grad_norm": 3.1327760219573975, "learning_rate": 7.892657454745232e-07, "loss": 0.2015, "step": 11543 }, { "epoch": 0.788686206189793, "grad_norm": 3.248558282852173, "learning_rate": 7.887758791875733e-07, "loss": 0.2564, "step": 11544 }, { "epoch": 0.7887545262007242, "grad_norm": 4.4978203773498535, "learning_rate": 7.882861456620762e-07, "loss": 0.2412, "step": 11545 }, { "epoch": 0.7888228462116554, "grad_norm": 4.046473026275635, "learning_rate": 7.877965449220149e-07, "loss": 0.2518, "step": 11546 }, { "epoch": 0.7888911662225866, "grad_norm": 3.966506004333496, "learning_rate": 7.873070769913622e-07, "loss": 0.1979, "step": 11547 }, { "epoch": 0.7889594862335177, "grad_norm": 4.271816253662109, "learning_rate": 7.868177418940861e-07, "loss": 0.2435, "step": 11548 }, { "epoch": 0.789027806244449, "grad_norm": 3.692548990249634, "learning_rate": 7.863285396541483e-07, "loss": 0.2212, "step": 11549 }, { "epoch": 0.7890961262553802, "grad_norm": 6.477598667144775, "learning_rate": 7.858394702955047e-07, "loss": 0.1773, "step": 11550 }, { "epoch": 0.7891644462663114, "grad_norm": 4.639930725097656, "learning_rate": 7.853505338421006e-07, "loss": 0.2352, "step": 11551 }, { "epoch": 0.7892327662772426, "grad_norm": 4.589292526245117, "learning_rate": 7.848617303178815e-07, "loss": 0.3632, "step": 11552 }, { "epoch": 0.7893010862881739, "grad_norm": 2.6010971069335938, "learning_rate": 7.843730597467814e-07, "loss": 0.1705, "step": 11553 }, { "epoch": 0.789369406299105, "grad_norm": 3.872260808944702, "learning_rate": 7.838845221527299e-07, "loss": 0.2458, "step": 11554 }, { "epoch": 0.7894377263100362, "grad_norm": 4.006788730621338, "learning_rate": 7.833961175596477e-07, "loss": 0.2871, "step": 11555 }, { "epoch": 0.7895060463209674, "grad_norm": 4.816256046295166, "learning_rate": 7.829078459914518e-07, "loss": 0.2567, "step": 11556 }, { "epoch": 0.7895743663318986, "grad_norm": 4.619613170623779, "learning_rate": 7.824197074720512e-07, "loss": 0.226, "step": 11557 }, { "epoch": 0.7896426863428299, "grad_norm": 6.14579439163208, "learning_rate": 7.819317020253495e-07, "loss": 0.3117, "step": 11558 }, { "epoch": 0.789711006353761, "grad_norm": 4.4813079833984375, "learning_rate": 7.814438296752421e-07, "loss": 0.3219, "step": 11559 }, { "epoch": 0.7897793263646922, "grad_norm": 3.2841227054595947, "learning_rate": 7.809560904456194e-07, "loss": 0.2088, "step": 11560 }, { "epoch": 0.7898476463756234, "grad_norm": 3.394047498703003, "learning_rate": 7.804684843603644e-07, "loss": 0.2011, "step": 11561 }, { "epoch": 0.7899159663865546, "grad_norm": 3.611860990524292, "learning_rate": 7.799810114433548e-07, "loss": 0.1948, "step": 11562 }, { "epoch": 0.7899842863974859, "grad_norm": 4.655341148376465, "learning_rate": 7.794936717184596e-07, "loss": 0.2616, "step": 11563 }, { "epoch": 0.790052606408417, "grad_norm": 4.499206066131592, "learning_rate": 7.790064652095428e-07, "loss": 0.3208, "step": 11564 }, { "epoch": 0.7901209264193483, "grad_norm": 5.0868940353393555, "learning_rate": 7.785193919404608e-07, "loss": 0.419, "step": 11565 }, { "epoch": 0.7901892464302794, "grad_norm": 3.7729482650756836, "learning_rate": 7.780324519350672e-07, "loss": 0.2599, "step": 11566 }, { "epoch": 0.7902575664412106, "grad_norm": 3.4105353355407715, "learning_rate": 7.775456452172031e-07, "loss": 0.1955, "step": 11567 }, { "epoch": 0.7903258864521419, "grad_norm": 3.730971336364746, "learning_rate": 7.770589718107076e-07, "loss": 0.2513, "step": 11568 }, { "epoch": 0.790394206463073, "grad_norm": 5.341244697570801, "learning_rate": 7.765724317394113e-07, "loss": 0.2461, "step": 11569 }, { "epoch": 0.7904625264740043, "grad_norm": 4.113864898681641, "learning_rate": 7.7608602502714e-07, "loss": 0.2465, "step": 11570 }, { "epoch": 0.7905308464849354, "grad_norm": 2.6412858963012695, "learning_rate": 7.755997516977086e-07, "loss": 0.1903, "step": 11571 }, { "epoch": 0.7905991664958666, "grad_norm": 3.8464105129241943, "learning_rate": 7.751136117749321e-07, "loss": 0.286, "step": 11572 }, { "epoch": 0.7906674865067979, "grad_norm": 3.940178155899048, "learning_rate": 7.746276052826135e-07, "loss": 0.3715, "step": 11573 }, { "epoch": 0.790735806517729, "grad_norm": 5.897779941558838, "learning_rate": 7.741417322445526e-07, "loss": 0.3467, "step": 11574 }, { "epoch": 0.7908041265286603, "grad_norm": 3.9464621543884277, "learning_rate": 7.73655992684541e-07, "loss": 0.3004, "step": 11575 }, { "epoch": 0.7908724465395914, "grad_norm": 5.50777530670166, "learning_rate": 7.731703866263631e-07, "loss": 0.2342, "step": 11576 }, { "epoch": 0.7909407665505227, "grad_norm": 4.263643264770508, "learning_rate": 7.72684914093798e-07, "loss": 0.2382, "step": 11577 }, { "epoch": 0.7910090865614539, "grad_norm": 4.334808349609375, "learning_rate": 7.721995751106184e-07, "loss": 0.3592, "step": 11578 }, { "epoch": 0.791077406572385, "grad_norm": 3.0606472492218018, "learning_rate": 7.717143697005895e-07, "loss": 0.3152, "step": 11579 }, { "epoch": 0.7911457265833163, "grad_norm": 4.042362689971924, "learning_rate": 7.712292978874715e-07, "loss": 0.3107, "step": 11580 }, { "epoch": 0.7912140465942474, "grad_norm": 3.6361281871795654, "learning_rate": 7.707443596950165e-07, "loss": 0.2817, "step": 11581 }, { "epoch": 0.7912823666051787, "grad_norm": 4.0441155433654785, "learning_rate": 7.702595551469706e-07, "loss": 0.2132, "step": 11582 }, { "epoch": 0.7913506866161099, "grad_norm": 3.998452663421631, "learning_rate": 7.697748842670746e-07, "loss": 0.3, "step": 11583 }, { "epoch": 0.791419006627041, "grad_norm": 4.2635393142700195, "learning_rate": 7.692903470790584e-07, "loss": 0.2163, "step": 11584 }, { "epoch": 0.7914873266379723, "grad_norm": 3.0320539474487305, "learning_rate": 7.688059436066515e-07, "loss": 0.2148, "step": 11585 }, { "epoch": 0.7915556466489034, "grad_norm": 4.281739711761475, "learning_rate": 7.683216738735727e-07, "loss": 0.2802, "step": 11586 }, { "epoch": 0.7916239666598347, "grad_norm": 3.686603307723999, "learning_rate": 7.678375379035366e-07, "loss": 0.2615, "step": 11587 }, { "epoch": 0.7916922866707659, "grad_norm": 4.298026084899902, "learning_rate": 7.673535357202485e-07, "loss": 0.2295, "step": 11588 }, { "epoch": 0.7917606066816971, "grad_norm": 5.019933700561523, "learning_rate": 7.668696673474088e-07, "loss": 0.3491, "step": 11589 }, { "epoch": 0.7918289266926283, "grad_norm": 4.3301496505737305, "learning_rate": 7.663859328087119e-07, "loss": 0.2891, "step": 11590 }, { "epoch": 0.7918972467035594, "grad_norm": 5.405754089355469, "learning_rate": 7.65902332127845e-07, "loss": 0.2997, "step": 11591 }, { "epoch": 0.7919655667144907, "grad_norm": 4.185233116149902, "learning_rate": 7.654188653284882e-07, "loss": 0.2921, "step": 11592 }, { "epoch": 0.7920338867254219, "grad_norm": 5.973466873168945, "learning_rate": 7.649355324343159e-07, "loss": 0.3994, "step": 11593 }, { "epoch": 0.7921022067363531, "grad_norm": 3.7762463092803955, "learning_rate": 7.644523334689958e-07, "loss": 0.2702, "step": 11594 }, { "epoch": 0.7921705267472843, "grad_norm": 3.5435168743133545, "learning_rate": 7.639692684561897e-07, "loss": 0.3435, "step": 11595 }, { "epoch": 0.7922388467582154, "grad_norm": 3.315202474594116, "learning_rate": 7.634863374195502e-07, "loss": 0.2067, "step": 11596 }, { "epoch": 0.7923071667691467, "grad_norm": 3.2469706535339355, "learning_rate": 7.630035403827261e-07, "loss": 0.2771, "step": 11597 }, { "epoch": 0.7923754867800779, "grad_norm": 3.789403200149536, "learning_rate": 7.625208773693571e-07, "loss": 0.3218, "step": 11598 }, { "epoch": 0.7924438067910091, "grad_norm": 3.54709529876709, "learning_rate": 7.620383484030819e-07, "loss": 0.2567, "step": 11599 }, { "epoch": 0.7925121268019403, "grad_norm": 3.4028666019439697, "learning_rate": 7.615559535075246e-07, "loss": 0.2367, "step": 11600 }, { "epoch": 0.7925804468128715, "grad_norm": 3.65940260887146, "learning_rate": 7.61073692706309e-07, "loss": 0.1589, "step": 11601 }, { "epoch": 0.7926487668238027, "grad_norm": 3.8721070289611816, "learning_rate": 7.605915660230489e-07, "loss": 0.2819, "step": 11602 }, { "epoch": 0.7927170868347339, "grad_norm": 3.9308862686157227, "learning_rate": 7.601095734813548e-07, "loss": 0.2314, "step": 11603 }, { "epoch": 0.7927854068456651, "grad_norm": 5.6093974113464355, "learning_rate": 7.596277151048253e-07, "loss": 0.3391, "step": 11604 }, { "epoch": 0.7928537268565963, "grad_norm": 4.262086391448975, "learning_rate": 7.591459909170583e-07, "loss": 0.389, "step": 11605 }, { "epoch": 0.7929220468675275, "grad_norm": 5.150961875915527, "learning_rate": 7.586644009416418e-07, "loss": 0.3748, "step": 11606 }, { "epoch": 0.7929903668784587, "grad_norm": 3.8596208095550537, "learning_rate": 7.58182945202159e-07, "loss": 0.2279, "step": 11607 }, { "epoch": 0.7930586868893899, "grad_norm": 4.0095906257629395, "learning_rate": 7.577016237221837e-07, "loss": 0.2858, "step": 11608 }, { "epoch": 0.7931270069003211, "grad_norm": 4.89683723449707, "learning_rate": 7.572204365252858e-07, "loss": 0.2354, "step": 11609 }, { "epoch": 0.7931953269112523, "grad_norm": 4.150171279907227, "learning_rate": 7.567393836350277e-07, "loss": 0.2877, "step": 11610 }, { "epoch": 0.7932636469221835, "grad_norm": 5.208874702453613, "learning_rate": 7.562584650749653e-07, "loss": 0.3486, "step": 11611 }, { "epoch": 0.7933319669331147, "grad_norm": 3.4754226207733154, "learning_rate": 7.557776808686479e-07, "loss": 0.1943, "step": 11612 }, { "epoch": 0.793400286944046, "grad_norm": 3.968902111053467, "learning_rate": 7.552970310396184e-07, "loss": 0.2186, "step": 11613 }, { "epoch": 0.7934686069549771, "grad_norm": 5.2656168937683105, "learning_rate": 7.548165156114129e-07, "loss": 0.3325, "step": 11614 }, { "epoch": 0.7935369269659083, "grad_norm": 4.362019062042236, "learning_rate": 7.543361346075614e-07, "loss": 0.2407, "step": 11615 }, { "epoch": 0.7936052469768395, "grad_norm": 3.2732322216033936, "learning_rate": 7.538558880515858e-07, "loss": 0.2423, "step": 11616 }, { "epoch": 0.7936735669877707, "grad_norm": 3.8357083797454834, "learning_rate": 7.533757759670028e-07, "loss": 0.2367, "step": 11617 }, { "epoch": 0.793741886998702, "grad_norm": 3.6472270488739014, "learning_rate": 7.528957983773215e-07, "loss": 0.2145, "step": 11618 }, { "epoch": 0.7938102070096331, "grad_norm": 4.760129451751709, "learning_rate": 7.524159553060484e-07, "loss": 0.2748, "step": 11619 }, { "epoch": 0.7938785270205643, "grad_norm": 4.663644313812256, "learning_rate": 7.519362467766763e-07, "loss": 0.24, "step": 11620 }, { "epoch": 0.7939468470314955, "grad_norm": 2.7221951484680176, "learning_rate": 7.514566728126974e-07, "loss": 0.2075, "step": 11621 }, { "epoch": 0.7940151670424267, "grad_norm": 4.245726585388184, "learning_rate": 7.50977233437594e-07, "loss": 0.2559, "step": 11622 }, { "epoch": 0.794083487053358, "grad_norm": 4.744621753692627, "learning_rate": 7.504979286748441e-07, "loss": 0.2297, "step": 11623 }, { "epoch": 0.7941518070642891, "grad_norm": 3.7596232891082764, "learning_rate": 7.500187585479171e-07, "loss": 0.3752, "step": 11624 }, { "epoch": 0.7942201270752204, "grad_norm": 3.8853209018707275, "learning_rate": 7.495397230802768e-07, "loss": 0.3051, "step": 11625 }, { "epoch": 0.7942884470861515, "grad_norm": 5.8365302085876465, "learning_rate": 7.490608222953809e-07, "loss": 0.3443, "step": 11626 }, { "epoch": 0.7943567670970827, "grad_norm": 4.679698944091797, "learning_rate": 7.485820562166804e-07, "loss": 0.3296, "step": 11627 }, { "epoch": 0.794425087108014, "grad_norm": 5.634565353393555, "learning_rate": 7.481034248676171e-07, "loss": 0.3736, "step": 11628 }, { "epoch": 0.7944934071189451, "grad_norm": 3.595503091812134, "learning_rate": 7.476249282716298e-07, "loss": 0.2194, "step": 11629 }, { "epoch": 0.7945617271298764, "grad_norm": 4.793607711791992, "learning_rate": 7.471465664521488e-07, "loss": 0.3108, "step": 11630 }, { "epoch": 0.7946300471408075, "grad_norm": 3.7393617630004883, "learning_rate": 7.466683394325984e-07, "loss": 0.2797, "step": 11631 }, { "epoch": 0.7946983671517387, "grad_norm": 5.458176612854004, "learning_rate": 7.461902472363958e-07, "loss": 0.3001, "step": 11632 }, { "epoch": 0.79476668716267, "grad_norm": 3.493455648422241, "learning_rate": 7.457122898869521e-07, "loss": 0.2552, "step": 11633 }, { "epoch": 0.7948350071736011, "grad_norm": 3.9166202545166016, "learning_rate": 7.452344674076719e-07, "loss": 0.1843, "step": 11634 }, { "epoch": 0.7949033271845324, "grad_norm": 3.1956448554992676, "learning_rate": 7.447567798219536e-07, "loss": 0.168, "step": 11635 }, { "epoch": 0.7949716471954635, "grad_norm": 5.3076605796813965, "learning_rate": 7.442792271531864e-07, "loss": 0.2179, "step": 11636 }, { "epoch": 0.7950399672063948, "grad_norm": 4.62615442276001, "learning_rate": 7.438018094247549e-07, "loss": 0.2764, "step": 11637 }, { "epoch": 0.795108287217326, "grad_norm": 4.653709411621094, "learning_rate": 7.433245266600385e-07, "loss": 0.4323, "step": 11638 }, { "epoch": 0.7951766072282571, "grad_norm": 2.8811049461364746, "learning_rate": 7.428473788824078e-07, "loss": 0.2134, "step": 11639 }, { "epoch": 0.7952449272391884, "grad_norm": 3.371115207672119, "learning_rate": 7.423703661152285e-07, "loss": 0.2604, "step": 11640 }, { "epoch": 0.7953132472501195, "grad_norm": 3.158752202987671, "learning_rate": 7.418934883818568e-07, "loss": 0.1787, "step": 11641 }, { "epoch": 0.7953815672610508, "grad_norm": 3.5732033252716064, "learning_rate": 7.41416745705645e-07, "loss": 0.295, "step": 11642 }, { "epoch": 0.795449887271982, "grad_norm": 3.6123223304748535, "learning_rate": 7.409401381099375e-07, "loss": 0.2458, "step": 11643 }, { "epoch": 0.7955182072829131, "grad_norm": 2.9699337482452393, "learning_rate": 7.404636656180731e-07, "loss": 0.1797, "step": 11644 }, { "epoch": 0.7955865272938444, "grad_norm": 3.656346559524536, "learning_rate": 7.399873282533832e-07, "loss": 0.2756, "step": 11645 }, { "epoch": 0.7956548473047755, "grad_norm": 3.574953556060791, "learning_rate": 7.395111260391924e-07, "loss": 0.2209, "step": 11646 }, { "epoch": 0.7957231673157068, "grad_norm": 4.8480000495910645, "learning_rate": 7.390350589988196e-07, "loss": 0.2041, "step": 11647 }, { "epoch": 0.795791487326638, "grad_norm": 5.888564109802246, "learning_rate": 7.385591271555774e-07, "loss": 0.3198, "step": 11648 }, { "epoch": 0.7958598073375692, "grad_norm": 4.256382465362549, "learning_rate": 7.380833305327689e-07, "loss": 0.2751, "step": 11649 }, { "epoch": 0.7959281273485004, "grad_norm": 3.483203887939453, "learning_rate": 7.376076691536935e-07, "loss": 0.172, "step": 11650 }, { "epoch": 0.7959964473594315, "grad_norm": 3.9650444984436035, "learning_rate": 7.37132143041642e-07, "loss": 0.2572, "step": 11651 }, { "epoch": 0.7960647673703628, "grad_norm": 3.8741819858551025, "learning_rate": 7.366567522199031e-07, "loss": 0.2618, "step": 11652 }, { "epoch": 0.796133087381294, "grad_norm": 3.7235965728759766, "learning_rate": 7.361814967117519e-07, "loss": 0.2824, "step": 11653 }, { "epoch": 0.7962014073922252, "grad_norm": 4.362067699432373, "learning_rate": 7.357063765404615e-07, "loss": 0.313, "step": 11654 }, { "epoch": 0.7962697274031564, "grad_norm": 3.4901256561279297, "learning_rate": 7.352313917292976e-07, "loss": 0.3608, "step": 11655 }, { "epoch": 0.7963380474140875, "grad_norm": 5.534635543823242, "learning_rate": 7.347565423015196e-07, "loss": 0.3198, "step": 11656 }, { "epoch": 0.7964063674250188, "grad_norm": 4.300023078918457, "learning_rate": 7.342818282803767e-07, "loss": 0.2756, "step": 11657 }, { "epoch": 0.79647468743595, "grad_norm": 3.8053290843963623, "learning_rate": 7.338072496891178e-07, "loss": 0.2703, "step": 11658 }, { "epoch": 0.7965430074468812, "grad_norm": 3.353891372680664, "learning_rate": 7.333328065509799e-07, "loss": 0.3435, "step": 11659 }, { "epoch": 0.7966113274578124, "grad_norm": 4.501417636871338, "learning_rate": 7.32858498889197e-07, "loss": 0.2094, "step": 11660 }, { "epoch": 0.7966796474687436, "grad_norm": 4.054618835449219, "learning_rate": 7.323843267269926e-07, "loss": 0.2773, "step": 11661 }, { "epoch": 0.7967479674796748, "grad_norm": 3.7703208923339844, "learning_rate": 7.319102900875863e-07, "loss": 0.2476, "step": 11662 }, { "epoch": 0.796816287490606, "grad_norm": 3.0588812828063965, "learning_rate": 7.314363889941905e-07, "loss": 0.2134, "step": 11663 }, { "epoch": 0.7968846075015372, "grad_norm": 4.614760875701904, "learning_rate": 7.309626234700109e-07, "loss": 0.282, "step": 11664 }, { "epoch": 0.7969529275124684, "grad_norm": 4.231185436248779, "learning_rate": 7.304889935382465e-07, "loss": 0.1844, "step": 11665 }, { "epoch": 0.7970212475233996, "grad_norm": 4.088746547698975, "learning_rate": 7.300154992220898e-07, "loss": 0.3336, "step": 11666 }, { "epoch": 0.7970895675343308, "grad_norm": 4.924482345581055, "learning_rate": 7.295421405447268e-07, "loss": 0.2608, "step": 11667 }, { "epoch": 0.797157887545262, "grad_norm": 4.053433895111084, "learning_rate": 7.290689175293374e-07, "loss": 0.2502, "step": 11668 }, { "epoch": 0.7972262075561932, "grad_norm": 3.3675198554992676, "learning_rate": 7.28595830199092e-07, "loss": 0.2465, "step": 11669 }, { "epoch": 0.7972945275671244, "grad_norm": 3.2511990070343018, "learning_rate": 7.281228785771574e-07, "loss": 0.2465, "step": 11670 }, { "epoch": 0.7973628475780556, "grad_norm": 3.9448060989379883, "learning_rate": 7.276500626866919e-07, "loss": 0.271, "step": 11671 }, { "epoch": 0.7974311675889868, "grad_norm": 3.3253519535064697, "learning_rate": 7.271773825508509e-07, "loss": 0.2143, "step": 11672 }, { "epoch": 0.7974994875999181, "grad_norm": 4.8788347244262695, "learning_rate": 7.267048381927774e-07, "loss": 0.2869, "step": 11673 }, { "epoch": 0.7975678076108492, "grad_norm": 2.931840419769287, "learning_rate": 7.262324296356117e-07, "loss": 0.1852, "step": 11674 }, { "epoch": 0.7976361276217804, "grad_norm": 4.591677665710449, "learning_rate": 7.257601569024864e-07, "loss": 0.1861, "step": 11675 }, { "epoch": 0.7977044476327116, "grad_norm": 4.415395259857178, "learning_rate": 7.252880200165274e-07, "loss": 0.1894, "step": 11676 }, { "epoch": 0.7977727676436428, "grad_norm": 4.425388336181641, "learning_rate": 7.248160190008541e-07, "loss": 0.2833, "step": 11677 }, { "epoch": 0.7978410876545741, "grad_norm": 3.5444376468658447, "learning_rate": 7.24344153878579e-07, "loss": 0.332, "step": 11678 }, { "epoch": 0.7979094076655052, "grad_norm": 3.5976266860961914, "learning_rate": 7.238724246728077e-07, "loss": 0.2055, "step": 11679 }, { "epoch": 0.7979777276764364, "grad_norm": 3.5429701805114746, "learning_rate": 7.234008314066411e-07, "loss": 0.2443, "step": 11680 }, { "epoch": 0.7980460476873676, "grad_norm": 3.6322526931762695, "learning_rate": 7.229293741031698e-07, "loss": 0.3062, "step": 11681 }, { "epoch": 0.7981143676982988, "grad_norm": 4.538482666015625, "learning_rate": 7.224580527854808e-07, "loss": 0.1668, "step": 11682 }, { "epoch": 0.7981826877092301, "grad_norm": 5.315415859222412, "learning_rate": 7.219868674766533e-07, "loss": 0.2519, "step": 11683 }, { "epoch": 0.7982510077201612, "grad_norm": 4.604053974151611, "learning_rate": 7.215158181997598e-07, "loss": 0.3289, "step": 11684 }, { "epoch": 0.7983193277310925, "grad_norm": 4.171956539154053, "learning_rate": 7.210449049778671e-07, "loss": 0.2683, "step": 11685 }, { "epoch": 0.7983876477420236, "grad_norm": 3.037537097930908, "learning_rate": 7.205741278340334e-07, "loss": 0.2093, "step": 11686 }, { "epoch": 0.7984559677529548, "grad_norm": 3.628713607788086, "learning_rate": 7.201034867913124e-07, "loss": 0.1652, "step": 11687 }, { "epoch": 0.7985242877638861, "grad_norm": 3.9156932830810547, "learning_rate": 7.196329818727509e-07, "loss": 0.1484, "step": 11688 }, { "epoch": 0.7985926077748172, "grad_norm": 3.3410215377807617, "learning_rate": 7.191626131013862e-07, "loss": 0.2467, "step": 11689 }, { "epoch": 0.7986609277857485, "grad_norm": 3.571499824523926, "learning_rate": 7.186923805002509e-07, "loss": 0.246, "step": 11690 }, { "epoch": 0.7987292477966796, "grad_norm": 3.5033767223358154, "learning_rate": 7.182222840923733e-07, "loss": 0.2004, "step": 11691 }, { "epoch": 0.7987975678076108, "grad_norm": 4.249207496643066, "learning_rate": 7.177523239007722e-07, "loss": 0.2383, "step": 11692 }, { "epoch": 0.7988658878185421, "grad_norm": 3.6348648071289062, "learning_rate": 7.172824999484591e-07, "loss": 0.2879, "step": 11693 }, { "epoch": 0.7989342078294732, "grad_norm": 4.29256010055542, "learning_rate": 7.168128122584407e-07, "loss": 0.2274, "step": 11694 }, { "epoch": 0.7990025278404045, "grad_norm": 3.4344089031219482, "learning_rate": 7.163432608537163e-07, "loss": 0.3161, "step": 11695 }, { "epoch": 0.7990708478513356, "grad_norm": 4.134175777435303, "learning_rate": 7.158738457572784e-07, "loss": 0.2998, "step": 11696 }, { "epoch": 0.7991391678622669, "grad_norm": 3.738616943359375, "learning_rate": 7.154045669921133e-07, "loss": 0.2347, "step": 11697 }, { "epoch": 0.7992074878731981, "grad_norm": 3.5617733001708984, "learning_rate": 7.149354245812003e-07, "loss": 0.2894, "step": 11698 }, { "epoch": 0.7992758078841292, "grad_norm": 3.425818920135498, "learning_rate": 7.144664185475119e-07, "loss": 0.2549, "step": 11699 }, { "epoch": 0.7993441278950605, "grad_norm": 2.6503567695617676, "learning_rate": 7.139975489140152e-07, "loss": 0.2219, "step": 11700 }, { "epoch": 0.7994124479059916, "grad_norm": 4.114627838134766, "learning_rate": 7.135288157036676e-07, "loss": 0.3206, "step": 11701 }, { "epoch": 0.7994807679169229, "grad_norm": 4.197556972503662, "learning_rate": 7.130602189394226e-07, "loss": 0.3171, "step": 11702 }, { "epoch": 0.7995490879278541, "grad_norm": 2.9797892570495605, "learning_rate": 7.125917586442264e-07, "loss": 0.1965, "step": 11703 }, { "epoch": 0.7996174079387852, "grad_norm": 4.950660228729248, "learning_rate": 7.121234348410175e-07, "loss": 0.2964, "step": 11704 }, { "epoch": 0.7996857279497165, "grad_norm": 5.236757278442383, "learning_rate": 7.116552475527294e-07, "loss": 0.2867, "step": 11705 }, { "epoch": 0.7997540479606476, "grad_norm": 3.7907416820526123, "learning_rate": 7.111871968022873e-07, "loss": 0.2631, "step": 11706 }, { "epoch": 0.7998223679715789, "grad_norm": 4.197178363800049, "learning_rate": 7.107192826126111e-07, "loss": 0.4156, "step": 11707 }, { "epoch": 0.7998906879825101, "grad_norm": 4.409280776977539, "learning_rate": 7.102515050066123e-07, "loss": 0.2648, "step": 11708 }, { "epoch": 0.7999590079934413, "grad_norm": 3.676041841506958, "learning_rate": 7.097838640071987e-07, "loss": 0.1753, "step": 11709 }, { "epoch": 0.8000273280043725, "grad_norm": 4.0616607666015625, "learning_rate": 7.093163596372658e-07, "loss": 0.2652, "step": 11710 }, { "epoch": 0.8000956480153036, "grad_norm": 2.735809326171875, "learning_rate": 7.088489919197097e-07, "loss": 0.1678, "step": 11711 }, { "epoch": 0.8001639680262349, "grad_norm": 5.531935214996338, "learning_rate": 7.083817608774146e-07, "loss": 0.3513, "step": 11712 }, { "epoch": 0.8002322880371661, "grad_norm": 3.628598690032959, "learning_rate": 7.079146665332607e-07, "loss": 0.2468, "step": 11713 }, { "epoch": 0.8003006080480973, "grad_norm": 3.288931369781494, "learning_rate": 7.074477089101186e-07, "loss": 0.2232, "step": 11714 }, { "epoch": 0.8003689280590285, "grad_norm": 4.788961887359619, "learning_rate": 7.069808880308544e-07, "loss": 0.3413, "step": 11715 }, { "epoch": 0.8004372480699596, "grad_norm": 4.815098762512207, "learning_rate": 7.065142039183277e-07, "loss": 0.3389, "step": 11716 }, { "epoch": 0.8005055680808909, "grad_norm": 3.5520401000976562, "learning_rate": 7.060476565953908e-07, "loss": 0.257, "step": 11717 }, { "epoch": 0.8005738880918221, "grad_norm": 2.9957079887390137, "learning_rate": 7.055812460848885e-07, "loss": 0.3466, "step": 11718 }, { "epoch": 0.8006422081027533, "grad_norm": 4.148945331573486, "learning_rate": 7.051149724096607e-07, "loss": 0.1847, "step": 11719 }, { "epoch": 0.8007105281136845, "grad_norm": 3.83373761177063, "learning_rate": 7.046488355925387e-07, "loss": 0.3363, "step": 11720 }, { "epoch": 0.8007788481246157, "grad_norm": 4.266904354095459, "learning_rate": 7.041828356563496e-07, "loss": 0.3006, "step": 11721 }, { "epoch": 0.8008471681355469, "grad_norm": 3.9868130683898926, "learning_rate": 7.037169726239102e-07, "loss": 0.3199, "step": 11722 }, { "epoch": 0.8009154881464781, "grad_norm": 3.3870792388916016, "learning_rate": 7.032512465180323e-07, "loss": 0.1992, "step": 11723 }, { "epoch": 0.8009838081574093, "grad_norm": 4.3926801681518555, "learning_rate": 7.027856573615228e-07, "loss": 0.387, "step": 11724 }, { "epoch": 0.8010521281683405, "grad_norm": 3.1395351886749268, "learning_rate": 7.023202051771811e-07, "loss": 0.2226, "step": 11725 }, { "epoch": 0.8011204481792717, "grad_norm": 4.779182434082031, "learning_rate": 7.018548899877971e-07, "loss": 0.363, "step": 11726 }, { "epoch": 0.8011887681902029, "grad_norm": 3.9830386638641357, "learning_rate": 7.013897118161571e-07, "loss": 0.2635, "step": 11727 }, { "epoch": 0.8012570882011341, "grad_norm": 4.4601545333862305, "learning_rate": 7.00924670685039e-07, "loss": 0.3313, "step": 11728 }, { "epoch": 0.8013254082120653, "grad_norm": 4.281652450561523, "learning_rate": 7.004597666172154e-07, "loss": 0.2992, "step": 11729 }, { "epoch": 0.8013937282229965, "grad_norm": 3.7946064472198486, "learning_rate": 6.999949996354509e-07, "loss": 0.2791, "step": 11730 }, { "epoch": 0.8014620482339277, "grad_norm": 5.176578998565674, "learning_rate": 6.995303697625041e-07, "loss": 0.3268, "step": 11731 }, { "epoch": 0.8015303682448589, "grad_norm": 4.660160541534424, "learning_rate": 6.990658770211268e-07, "loss": 0.2756, "step": 11732 }, { "epoch": 0.8015986882557902, "grad_norm": 5.0773515701293945, "learning_rate": 6.986015214340648e-07, "loss": 0.2658, "step": 11733 }, { "epoch": 0.8016670082667213, "grad_norm": 4.266173839569092, "learning_rate": 6.981373030240542e-07, "loss": 0.2927, "step": 11734 }, { "epoch": 0.8017353282776525, "grad_norm": 4.5573201179504395, "learning_rate": 6.976732218138282e-07, "loss": 0.2635, "step": 11735 }, { "epoch": 0.8018036482885837, "grad_norm": 4.589416027069092, "learning_rate": 6.972092778261109e-07, "loss": 0.2532, "step": 11736 }, { "epoch": 0.8018719682995149, "grad_norm": 3.1180033683776855, "learning_rate": 6.967454710836207e-07, "loss": 0.2304, "step": 11737 }, { "epoch": 0.8019402883104462, "grad_norm": 3.908674716949463, "learning_rate": 6.962818016090692e-07, "loss": 0.3123, "step": 11738 }, { "epoch": 0.8020086083213773, "grad_norm": 5.063393592834473, "learning_rate": 6.958182694251605e-07, "loss": 0.3193, "step": 11739 }, { "epoch": 0.8020769283323085, "grad_norm": 4.239829063415527, "learning_rate": 6.953548745545933e-07, "loss": 0.3129, "step": 11740 }, { "epoch": 0.8021452483432397, "grad_norm": 3.595837354660034, "learning_rate": 6.948916170200591e-07, "loss": 0.2105, "step": 11741 }, { "epoch": 0.8022135683541709, "grad_norm": 3.42499041557312, "learning_rate": 6.944284968442405e-07, "loss": 0.2946, "step": 11742 }, { "epoch": 0.8022818883651022, "grad_norm": 5.643292427062988, "learning_rate": 6.93965514049816e-07, "loss": 0.2763, "step": 11743 }, { "epoch": 0.8023502083760333, "grad_norm": 3.141411304473877, "learning_rate": 6.935026686594577e-07, "loss": 0.2287, "step": 11744 }, { "epoch": 0.8024185283869646, "grad_norm": 3.9284610748291016, "learning_rate": 6.930399606958303e-07, "loss": 0.2593, "step": 11745 }, { "epoch": 0.8024868483978957, "grad_norm": 2.852428674697876, "learning_rate": 6.925773901815899e-07, "loss": 0.2147, "step": 11746 }, { "epoch": 0.8025551684088269, "grad_norm": 5.3252387046813965, "learning_rate": 6.921149571393876e-07, "loss": 0.3254, "step": 11747 }, { "epoch": 0.8026234884197582, "grad_norm": 4.065101146697998, "learning_rate": 6.916526615918673e-07, "loss": 0.2743, "step": 11748 }, { "epoch": 0.8026918084306893, "grad_norm": 3.013655424118042, "learning_rate": 6.911905035616675e-07, "loss": 0.2407, "step": 11749 }, { "epoch": 0.8027601284416206, "grad_norm": 3.328692674636841, "learning_rate": 6.907284830714179e-07, "loss": 0.2965, "step": 11750 }, { "epoch": 0.8028284484525517, "grad_norm": 3.7990024089813232, "learning_rate": 6.902666001437428e-07, "loss": 0.2616, "step": 11751 }, { "epoch": 0.8028967684634829, "grad_norm": 3.428169012069702, "learning_rate": 6.898048548012595e-07, "loss": 0.2963, "step": 11752 }, { "epoch": 0.8029650884744142, "grad_norm": 4.553971290588379, "learning_rate": 6.893432470665787e-07, "loss": 0.293, "step": 11753 }, { "epoch": 0.8030334084853453, "grad_norm": 4.197880268096924, "learning_rate": 6.888817769623031e-07, "loss": 0.3101, "step": 11754 }, { "epoch": 0.8031017284962766, "grad_norm": 5.832830905914307, "learning_rate": 6.884204445110302e-07, "loss": 0.251, "step": 11755 }, { "epoch": 0.8031700485072077, "grad_norm": 4.555658340454102, "learning_rate": 6.879592497353502e-07, "loss": 0.185, "step": 11756 }, { "epoch": 0.803238368518139, "grad_norm": 3.563767910003662, "learning_rate": 6.874981926578468e-07, "loss": 0.2715, "step": 11757 }, { "epoch": 0.8033066885290702, "grad_norm": 3.4767651557922363, "learning_rate": 6.870372733010963e-07, "loss": 0.2461, "step": 11758 }, { "epoch": 0.8033750085400013, "grad_norm": 3.9293503761291504, "learning_rate": 6.86576491687669e-07, "loss": 0.2684, "step": 11759 }, { "epoch": 0.8034433285509326, "grad_norm": 3.6262431144714355, "learning_rate": 6.861158478401283e-07, "loss": 0.1401, "step": 11760 }, { "epoch": 0.8035116485618637, "grad_norm": 2.5845961570739746, "learning_rate": 6.856553417810313e-07, "loss": 0.1784, "step": 11761 }, { "epoch": 0.803579968572795, "grad_norm": 3.2488725185394287, "learning_rate": 6.851949735329253e-07, "loss": 0.2146, "step": 11762 }, { "epoch": 0.8036482885837262, "grad_norm": 5.449948787689209, "learning_rate": 6.847347431183558e-07, "loss": 0.3908, "step": 11763 }, { "epoch": 0.8037166085946573, "grad_norm": 2.9740548133850098, "learning_rate": 6.842746505598585e-07, "loss": 0.1897, "step": 11764 }, { "epoch": 0.8037849286055886, "grad_norm": 4.363295078277588, "learning_rate": 6.838146958799634e-07, "loss": 0.2262, "step": 11765 }, { "epoch": 0.8038532486165197, "grad_norm": 3.8768556118011475, "learning_rate": 6.833548791011915e-07, "loss": 0.296, "step": 11766 }, { "epoch": 0.803921568627451, "grad_norm": 3.9551737308502197, "learning_rate": 6.828952002460601e-07, "loss": 0.2181, "step": 11767 }, { "epoch": 0.8039898886383822, "grad_norm": 5.608160018920898, "learning_rate": 6.824356593370783e-07, "loss": 0.3126, "step": 11768 }, { "epoch": 0.8040582086493134, "grad_norm": 4.0414838790893555, "learning_rate": 6.819762563967483e-07, "loss": 0.189, "step": 11769 }, { "epoch": 0.8041265286602446, "grad_norm": 5.760669708251953, "learning_rate": 6.81516991447566e-07, "loss": 0.2747, "step": 11770 }, { "epoch": 0.8041948486711757, "grad_norm": 3.922055959701538, "learning_rate": 6.810578645120208e-07, "loss": 0.2733, "step": 11771 }, { "epoch": 0.804263168682107, "grad_norm": 4.07960844039917, "learning_rate": 6.805988756125942e-07, "loss": 0.2475, "step": 11772 }, { "epoch": 0.8043314886930382, "grad_norm": 3.8386385440826416, "learning_rate": 6.80140024771762e-07, "loss": 0.2113, "step": 11773 }, { "epoch": 0.8043998087039694, "grad_norm": 3.558718204498291, "learning_rate": 6.796813120119941e-07, "loss": 0.3486, "step": 11774 }, { "epoch": 0.8044681287149006, "grad_norm": 3.716466188430786, "learning_rate": 6.792227373557503e-07, "loss": 0.3149, "step": 11775 }, { "epoch": 0.8045364487258317, "grad_norm": 4.823958873748779, "learning_rate": 6.787643008254859e-07, "loss": 0.2427, "step": 11776 }, { "epoch": 0.804604768736763, "grad_norm": 4.236669540405273, "learning_rate": 6.783060024436511e-07, "loss": 0.3167, "step": 11777 }, { "epoch": 0.8046730887476942, "grad_norm": 4.591721057891846, "learning_rate": 6.778478422326873e-07, "loss": 0.2529, "step": 11778 }, { "epoch": 0.8047414087586254, "grad_norm": 5.184838771820068, "learning_rate": 6.773898202150281e-07, "loss": 0.2851, "step": 11779 }, { "epoch": 0.8048097287695566, "grad_norm": 3.066174268722534, "learning_rate": 6.769319364131024e-07, "loss": 0.2395, "step": 11780 }, { "epoch": 0.8048780487804879, "grad_norm": 4.349093437194824, "learning_rate": 6.764741908493309e-07, "loss": 0.2835, "step": 11781 }, { "epoch": 0.804946368791419, "grad_norm": 7.282095432281494, "learning_rate": 6.760165835461291e-07, "loss": 0.3389, "step": 11782 }, { "epoch": 0.8050146888023502, "grad_norm": 5.257335186004639, "learning_rate": 6.755591145259044e-07, "loss": 0.293, "step": 11783 }, { "epoch": 0.8050830088132814, "grad_norm": 3.643390655517578, "learning_rate": 6.751017838110578e-07, "loss": 0.2752, "step": 11784 }, { "epoch": 0.8051513288242126, "grad_norm": 2.6956796646118164, "learning_rate": 6.746445914239837e-07, "loss": 0.2385, "step": 11785 }, { "epoch": 0.8052196488351439, "grad_norm": 7.246084213256836, "learning_rate": 6.741875373870703e-07, "loss": 0.2317, "step": 11786 }, { "epoch": 0.805287968846075, "grad_norm": 4.139432430267334, "learning_rate": 6.737306217226968e-07, "loss": 0.2489, "step": 11787 }, { "epoch": 0.8053562888570062, "grad_norm": 4.166531085968018, "learning_rate": 6.732738444532379e-07, "loss": 0.2846, "step": 11788 }, { "epoch": 0.8054246088679374, "grad_norm": 5.788026332855225, "learning_rate": 6.728172056010607e-07, "loss": 0.2539, "step": 11789 }, { "epoch": 0.8054929288788686, "grad_norm": 3.730448007583618, "learning_rate": 6.723607051885258e-07, "loss": 0.217, "step": 11790 }, { "epoch": 0.8055612488897999, "grad_norm": 6.095815658569336, "learning_rate": 6.719043432379868e-07, "loss": 0.2411, "step": 11791 }, { "epoch": 0.805629568900731, "grad_norm": 5.359914302825928, "learning_rate": 6.714481197717901e-07, "loss": 0.3092, "step": 11792 }, { "epoch": 0.8056978889116623, "grad_norm": 3.5487184524536133, "learning_rate": 6.709920348122761e-07, "loss": 0.1527, "step": 11793 }, { "epoch": 0.8057662089225934, "grad_norm": 5.521548748016357, "learning_rate": 6.705360883817793e-07, "loss": 0.2665, "step": 11794 }, { "epoch": 0.8058345289335246, "grad_norm": 4.1200056076049805, "learning_rate": 6.700802805026237e-07, "loss": 0.3111, "step": 11795 }, { "epoch": 0.8059028489444559, "grad_norm": 5.435249328613281, "learning_rate": 6.696246111971294e-07, "loss": 0.3601, "step": 11796 }, { "epoch": 0.805971168955387, "grad_norm": 3.9875142574310303, "learning_rate": 6.691690804876111e-07, "loss": 0.2091, "step": 11797 }, { "epoch": 0.8060394889663183, "grad_norm": 4.050394535064697, "learning_rate": 6.687136883963749e-07, "loss": 0.2997, "step": 11798 }, { "epoch": 0.8061078089772494, "grad_norm": 3.989443302154541, "learning_rate": 6.682584349457184e-07, "loss": 0.2568, "step": 11799 }, { "epoch": 0.8061761289881806, "grad_norm": 3.556058406829834, "learning_rate": 6.678033201579349e-07, "loss": 0.2813, "step": 11800 }, { "epoch": 0.8062444489991119, "grad_norm": 4.756643295288086, "learning_rate": 6.6734834405531e-07, "loss": 0.277, "step": 11801 }, { "epoch": 0.806312769010043, "grad_norm": 5.384213924407959, "learning_rate": 6.668935066601232e-07, "loss": 0.3166, "step": 11802 }, { "epoch": 0.8063810890209743, "grad_norm": 3.768531084060669, "learning_rate": 6.664388079946464e-07, "loss": 0.223, "step": 11803 }, { "epoch": 0.8064494090319054, "grad_norm": 4.352479934692383, "learning_rate": 6.659842480811449e-07, "loss": 0.1515, "step": 11804 }, { "epoch": 0.8065177290428367, "grad_norm": 4.508915901184082, "learning_rate": 6.655298269418774e-07, "loss": 0.3418, "step": 11805 }, { "epoch": 0.8065860490537679, "grad_norm": 4.015735149383545, "learning_rate": 6.650755445990965e-07, "loss": 0.2059, "step": 11806 }, { "epoch": 0.806654369064699, "grad_norm": 4.348640441894531, "learning_rate": 6.646214010750456e-07, "loss": 0.387, "step": 11807 }, { "epoch": 0.8067226890756303, "grad_norm": 4.538684844970703, "learning_rate": 6.641673963919635e-07, "loss": 0.2684, "step": 11808 }, { "epoch": 0.8067910090865614, "grad_norm": 3.643266439437866, "learning_rate": 6.637135305720822e-07, "loss": 0.2423, "step": 11809 }, { "epoch": 0.8068593290974927, "grad_norm": 4.894707679748535, "learning_rate": 6.632598036376256e-07, "loss": 0.3409, "step": 11810 }, { "epoch": 0.8069276491084239, "grad_norm": 3.5355803966522217, "learning_rate": 6.628062156108116e-07, "loss": 0.2402, "step": 11811 }, { "epoch": 0.806995969119355, "grad_norm": 5.573309898376465, "learning_rate": 6.623527665138519e-07, "loss": 0.3705, "step": 11812 }, { "epoch": 0.8070642891302863, "grad_norm": 5.321414470672607, "learning_rate": 6.618994563689496e-07, "loss": 0.2597, "step": 11813 }, { "epoch": 0.8071326091412174, "grad_norm": 4.15818977355957, "learning_rate": 6.614462851983041e-07, "loss": 0.3757, "step": 11814 }, { "epoch": 0.8072009291521487, "grad_norm": 5.980101585388184, "learning_rate": 6.609932530241025e-07, "loss": 0.3793, "step": 11815 }, { "epoch": 0.8072692491630798, "grad_norm": 4.021087169647217, "learning_rate": 6.605403598685317e-07, "loss": 0.297, "step": 11816 }, { "epoch": 0.8073375691740111, "grad_norm": 3.0309622287750244, "learning_rate": 6.600876057537677e-07, "loss": 0.189, "step": 11817 }, { "epoch": 0.8074058891849423, "grad_norm": 4.319680690765381, "learning_rate": 6.596349907019813e-07, "loss": 0.308, "step": 11818 }, { "epoch": 0.8074742091958734, "grad_norm": 4.032246112823486, "learning_rate": 6.591825147353345e-07, "loss": 0.2143, "step": 11819 }, { "epoch": 0.8075425292068047, "grad_norm": 3.689331531524658, "learning_rate": 6.587301778759844e-07, "loss": 0.2788, "step": 11820 }, { "epoch": 0.8076108492177358, "grad_norm": 4.113719463348389, "learning_rate": 6.582779801460809e-07, "loss": 0.3542, "step": 11821 }, { "epoch": 0.8076791692286671, "grad_norm": 4.151234149932861, "learning_rate": 6.578259215677669e-07, "loss": 0.2124, "step": 11822 }, { "epoch": 0.8077474892395983, "grad_norm": 4.746397972106934, "learning_rate": 6.573740021631781e-07, "loss": 0.2263, "step": 11823 }, { "epoch": 0.8078158092505294, "grad_norm": 3.563236713409424, "learning_rate": 6.569222219544448e-07, "loss": 0.2686, "step": 11824 }, { "epoch": 0.8078841292614607, "grad_norm": 3.832479476928711, "learning_rate": 6.564705809636884e-07, "loss": 0.2525, "step": 11825 }, { "epoch": 0.8079524492723918, "grad_norm": 4.221083164215088, "learning_rate": 6.560190792130259e-07, "loss": 0.1972, "step": 11826 }, { "epoch": 0.8080207692833231, "grad_norm": 4.420125484466553, "learning_rate": 6.555677167245649e-07, "loss": 0.3832, "step": 11827 }, { "epoch": 0.8080890892942543, "grad_norm": 4.1543731689453125, "learning_rate": 6.551164935204074e-07, "loss": 0.2447, "step": 11828 }, { "epoch": 0.8081574093051855, "grad_norm": 3.342802047729492, "learning_rate": 6.546654096226481e-07, "loss": 0.2658, "step": 11829 }, { "epoch": 0.8082257293161167, "grad_norm": 4.954988956451416, "learning_rate": 6.542144650533781e-07, "loss": 0.3018, "step": 11830 }, { "epoch": 0.8082940493270478, "grad_norm": 4.86751651763916, "learning_rate": 6.537636598346763e-07, "loss": 0.2058, "step": 11831 }, { "epoch": 0.8083623693379791, "grad_norm": 3.7126967906951904, "learning_rate": 6.533129939886183e-07, "loss": 0.1846, "step": 11832 }, { "epoch": 0.8084306893489103, "grad_norm": 3.911649465560913, "learning_rate": 6.528624675372718e-07, "loss": 0.2599, "step": 11833 }, { "epoch": 0.8084990093598415, "grad_norm": 6.08276891708374, "learning_rate": 6.52412080502699e-07, "loss": 0.3199, "step": 11834 }, { "epoch": 0.8085673293707727, "grad_norm": 5.0564117431640625, "learning_rate": 6.519618329069515e-07, "loss": 0.28, "step": 11835 }, { "epoch": 0.8086356493817038, "grad_norm": 4.445830345153809, "learning_rate": 6.515117247720794e-07, "loss": 0.2898, "step": 11836 }, { "epoch": 0.8087039693926351, "grad_norm": 3.6739513874053955, "learning_rate": 6.510617561201223e-07, "loss": 0.3062, "step": 11837 }, { "epoch": 0.8087722894035663, "grad_norm": 3.8205366134643555, "learning_rate": 6.506119269731149e-07, "loss": 0.2775, "step": 11838 }, { "epoch": 0.8088406094144975, "grad_norm": 6.0272417068481445, "learning_rate": 6.501622373530821e-07, "loss": 0.2975, "step": 11839 }, { "epoch": 0.8089089294254287, "grad_norm": 6.3254570960998535, "learning_rate": 6.497126872820457e-07, "loss": 0.3652, "step": 11840 }, { "epoch": 0.80897724943636, "grad_norm": 3.6199631690979004, "learning_rate": 6.492632767820184e-07, "loss": 0.2609, "step": 11841 }, { "epoch": 0.8090455694472911, "grad_norm": 4.398098945617676, "learning_rate": 6.488140058750063e-07, "loss": 0.2983, "step": 11842 }, { "epoch": 0.8091138894582223, "grad_norm": 3.7394747734069824, "learning_rate": 6.483648745830097e-07, "loss": 0.1259, "step": 11843 }, { "epoch": 0.8091822094691535, "grad_norm": 3.464057445526123, "learning_rate": 6.479158829280208e-07, "loss": 0.1443, "step": 11844 }, { "epoch": 0.8092505294800847, "grad_norm": 3.4233977794647217, "learning_rate": 6.474670309320259e-07, "loss": 0.1818, "step": 11845 }, { "epoch": 0.809318849491016, "grad_norm": 5.158996105194092, "learning_rate": 6.470183186170041e-07, "loss": 0.3706, "step": 11846 }, { "epoch": 0.8093871695019471, "grad_norm": 4.07289457321167, "learning_rate": 6.465697460049286e-07, "loss": 0.2518, "step": 11847 }, { "epoch": 0.8094554895128783, "grad_norm": 3.7288436889648438, "learning_rate": 6.461213131177626e-07, "loss": 0.3185, "step": 11848 }, { "epoch": 0.8095238095238095, "grad_norm": 3.19106125831604, "learning_rate": 6.456730199774649e-07, "loss": 0.1723, "step": 11849 }, { "epoch": 0.8095921295347407, "grad_norm": 4.098758697509766, "learning_rate": 6.452248666059892e-07, "loss": 0.3125, "step": 11850 }, { "epoch": 0.809660449545672, "grad_norm": 3.882927656173706, "learning_rate": 6.4477685302528e-07, "loss": 0.2515, "step": 11851 }, { "epoch": 0.8097287695566031, "grad_norm": 4.901756763458252, "learning_rate": 6.443289792572738e-07, "loss": 0.3249, "step": 11852 }, { "epoch": 0.8097970895675344, "grad_norm": 4.581380367279053, "learning_rate": 6.438812453239027e-07, "loss": 0.231, "step": 11853 }, { "epoch": 0.8098654095784655, "grad_norm": 4.50883150100708, "learning_rate": 6.43433651247091e-07, "loss": 0.2384, "step": 11854 }, { "epoch": 0.8099337295893967, "grad_norm": 4.853156566619873, "learning_rate": 6.429861970487561e-07, "loss": 0.276, "step": 11855 }, { "epoch": 0.810002049600328, "grad_norm": 3.330031633377075, "learning_rate": 6.425388827508093e-07, "loss": 0.2747, "step": 11856 }, { "epoch": 0.8100703696112591, "grad_norm": 3.714813709259033, "learning_rate": 6.420917083751532e-07, "loss": 0.2862, "step": 11857 }, { "epoch": 0.8101386896221904, "grad_norm": 3.173605442047119, "learning_rate": 6.416446739436853e-07, "loss": 0.168, "step": 11858 }, { "epoch": 0.8102070096331215, "grad_norm": 3.4050443172454834, "learning_rate": 6.411977794782972e-07, "loss": 0.2711, "step": 11859 }, { "epoch": 0.8102753296440527, "grad_norm": 5.322225093841553, "learning_rate": 6.407510250008692e-07, "loss": 0.2336, "step": 11860 }, { "epoch": 0.810343649654984, "grad_norm": 4.279419898986816, "learning_rate": 6.403044105332796e-07, "loss": 0.2747, "step": 11861 }, { "epoch": 0.8104119696659151, "grad_norm": 2.9114670753479004, "learning_rate": 6.398579360973964e-07, "loss": 0.1609, "step": 11862 }, { "epoch": 0.8104802896768464, "grad_norm": 3.3949368000030518, "learning_rate": 6.394116017150856e-07, "loss": 0.2473, "step": 11863 }, { "epoch": 0.8105486096877775, "grad_norm": 3.462618350982666, "learning_rate": 6.389654074081996e-07, "loss": 0.2623, "step": 11864 }, { "epoch": 0.8106169296987088, "grad_norm": 4.294506549835205, "learning_rate": 6.385193531985882e-07, "loss": 0.2954, "step": 11865 }, { "epoch": 0.81068524970964, "grad_norm": 4.537067413330078, "learning_rate": 6.380734391080942e-07, "loss": 0.2974, "step": 11866 }, { "epoch": 0.8107535697205711, "grad_norm": 3.395277976989746, "learning_rate": 6.376276651585534e-07, "loss": 0.2476, "step": 11867 }, { "epoch": 0.8108218897315024, "grad_norm": 3.027827262878418, "learning_rate": 6.371820313717912e-07, "loss": 0.1587, "step": 11868 }, { "epoch": 0.8108902097424335, "grad_norm": 2.98775053024292, "learning_rate": 6.36736537769632e-07, "loss": 0.2612, "step": 11869 }, { "epoch": 0.8109585297533648, "grad_norm": 5.198929309844971, "learning_rate": 6.362911843738896e-07, "loss": 0.2742, "step": 11870 }, { "epoch": 0.811026849764296, "grad_norm": 4.239519119262695, "learning_rate": 6.358459712063731e-07, "loss": 0.2721, "step": 11871 }, { "epoch": 0.8110951697752271, "grad_norm": 3.3166987895965576, "learning_rate": 6.354008982888808e-07, "loss": 0.1942, "step": 11872 }, { "epoch": 0.8111634897861584, "grad_norm": 5.422000408172607, "learning_rate": 6.349559656432077e-07, "loss": 0.3071, "step": 11873 }, { "epoch": 0.8112318097970895, "grad_norm": 4.338289737701416, "learning_rate": 6.345111732911418e-07, "loss": 0.2496, "step": 11874 }, { "epoch": 0.8113001298080208, "grad_norm": 3.6897501945495605, "learning_rate": 6.340665212544625e-07, "loss": 0.2469, "step": 11875 }, { "epoch": 0.811368449818952, "grad_norm": 4.249990940093994, "learning_rate": 6.336220095549438e-07, "loss": 0.2743, "step": 11876 }, { "epoch": 0.8114367698298832, "grad_norm": 3.3392715454101562, "learning_rate": 6.331776382143521e-07, "loss": 0.2409, "step": 11877 }, { "epoch": 0.8115050898408144, "grad_norm": 3.663379430770874, "learning_rate": 6.327334072544468e-07, "loss": 0.2791, "step": 11878 }, { "epoch": 0.8115734098517455, "grad_norm": 3.872199773788452, "learning_rate": 6.322893166969821e-07, "loss": 0.2045, "step": 11879 }, { "epoch": 0.8116417298626768, "grad_norm": 5.630107402801514, "learning_rate": 6.31845366563702e-07, "loss": 0.2592, "step": 11880 }, { "epoch": 0.811710049873608, "grad_norm": 3.1557271480560303, "learning_rate": 6.314015568763457e-07, "loss": 0.224, "step": 11881 }, { "epoch": 0.8117783698845392, "grad_norm": 3.3988258838653564, "learning_rate": 6.309578876566458e-07, "loss": 0.2653, "step": 11882 }, { "epoch": 0.8118466898954704, "grad_norm": 5.633166313171387, "learning_rate": 6.305143589263291e-07, "loss": 0.2306, "step": 11883 }, { "epoch": 0.8119150099064015, "grad_norm": 3.236332416534424, "learning_rate": 6.300709707071123e-07, "loss": 0.2363, "step": 11884 }, { "epoch": 0.8119833299173328, "grad_norm": 3.006850004196167, "learning_rate": 6.296277230207074e-07, "loss": 0.2127, "step": 11885 }, { "epoch": 0.812051649928264, "grad_norm": 4.662961483001709, "learning_rate": 6.291846158888188e-07, "loss": 0.3498, "step": 11886 }, { "epoch": 0.8121199699391952, "grad_norm": 4.781907558441162, "learning_rate": 6.287416493331456e-07, "loss": 0.3204, "step": 11887 }, { "epoch": 0.8121882899501264, "grad_norm": 4.990349769592285, "learning_rate": 6.282988233753763e-07, "loss": 0.3151, "step": 11888 }, { "epoch": 0.8122566099610576, "grad_norm": 8.539173126220703, "learning_rate": 6.278561380371964e-07, "loss": 0.2835, "step": 11889 }, { "epoch": 0.8123249299719888, "grad_norm": 4.0266876220703125, "learning_rate": 6.274135933402834e-07, "loss": 0.301, "step": 11890 }, { "epoch": 0.81239324998292, "grad_norm": 4.512084484100342, "learning_rate": 6.269711893063077e-07, "loss": 0.3806, "step": 11891 }, { "epoch": 0.8124615699938512, "grad_norm": 3.7428786754608154, "learning_rate": 6.265289259569309e-07, "loss": 0.2797, "step": 11892 }, { "epoch": 0.8125298900047824, "grad_norm": 7.424208164215088, "learning_rate": 6.260868033138108e-07, "loss": 0.2818, "step": 11893 }, { "epoch": 0.8125982100157136, "grad_norm": 6.774956703186035, "learning_rate": 6.256448213985969e-07, "loss": 0.3313, "step": 11894 }, { "epoch": 0.8126665300266448, "grad_norm": 3.905618906021118, "learning_rate": 6.252029802329313e-07, "loss": 0.2328, "step": 11895 }, { "epoch": 0.812734850037576, "grad_norm": 4.171415328979492, "learning_rate": 6.247612798384505e-07, "loss": 0.3898, "step": 11896 }, { "epoch": 0.8128031700485072, "grad_norm": 3.336057424545288, "learning_rate": 6.24319720236783e-07, "loss": 0.2198, "step": 11897 }, { "epoch": 0.8128714900594384, "grad_norm": 3.5855021476745605, "learning_rate": 6.238783014495507e-07, "loss": 0.2472, "step": 11898 }, { "epoch": 0.8129398100703696, "grad_norm": 3.545793056488037, "learning_rate": 6.2343702349837e-07, "loss": 0.2976, "step": 11899 }, { "epoch": 0.8130081300813008, "grad_norm": 3.7501680850982666, "learning_rate": 6.22995886404847e-07, "loss": 0.1697, "step": 11900 }, { "epoch": 0.8130764500922321, "grad_norm": 4.108203411102295, "learning_rate": 6.225548901905833e-07, "loss": 0.2315, "step": 11901 }, { "epoch": 0.8131447701031632, "grad_norm": 2.7199277877807617, "learning_rate": 6.221140348771747e-07, "loss": 0.2183, "step": 11902 }, { "epoch": 0.8132130901140944, "grad_norm": 5.152907371520996, "learning_rate": 6.216733204862093e-07, "loss": 0.3555, "step": 11903 }, { "epoch": 0.8132814101250256, "grad_norm": 6.173268795013428, "learning_rate": 6.212327470392653e-07, "loss": 0.2595, "step": 11904 }, { "epoch": 0.8133497301359568, "grad_norm": 3.1993963718414307, "learning_rate": 6.207923145579176e-07, "loss": 0.1957, "step": 11905 }, { "epoch": 0.8134180501468881, "grad_norm": 4.392627239227295, "learning_rate": 6.203520230637329e-07, "loss": 0.3049, "step": 11906 }, { "epoch": 0.8134863701578192, "grad_norm": 5.349631309509277, "learning_rate": 6.199118725782716e-07, "loss": 0.3472, "step": 11907 }, { "epoch": 0.8135546901687504, "grad_norm": 4.542244911193848, "learning_rate": 6.19471863123086e-07, "loss": 0.3144, "step": 11908 }, { "epoch": 0.8136230101796816, "grad_norm": 4.686586856842041, "learning_rate": 6.190319947197225e-07, "loss": 0.1744, "step": 11909 }, { "epoch": 0.8136913301906128, "grad_norm": 4.604970932006836, "learning_rate": 6.185922673897203e-07, "loss": 0.2263, "step": 11910 }, { "epoch": 0.8137596502015441, "grad_norm": 2.8955469131469727, "learning_rate": 6.181526811546113e-07, "loss": 0.2156, "step": 11911 }, { "epoch": 0.8138279702124752, "grad_norm": 3.270078659057617, "learning_rate": 6.177132360359227e-07, "loss": 0.2078, "step": 11912 }, { "epoch": 0.8138962902234065, "grad_norm": 6.177737712860107, "learning_rate": 6.172739320551702e-07, "loss": 0.2804, "step": 11913 }, { "epoch": 0.8139646102343376, "grad_norm": 3.044426441192627, "learning_rate": 6.16834769233867e-07, "loss": 0.2285, "step": 11914 }, { "epoch": 0.8140329302452688, "grad_norm": 4.6441330909729, "learning_rate": 6.163957475935164e-07, "loss": 0.2834, "step": 11915 }, { "epoch": 0.8141012502562001, "grad_norm": 3.4421730041503906, "learning_rate": 6.159568671556186e-07, "loss": 0.1911, "step": 11916 }, { "epoch": 0.8141695702671312, "grad_norm": 3.467859983444214, "learning_rate": 6.155181279416624e-07, "loss": 0.2209, "step": 11917 }, { "epoch": 0.8142378902780625, "grad_norm": 3.4379446506500244, "learning_rate": 6.150795299731318e-07, "loss": 0.2436, "step": 11918 }, { "epoch": 0.8143062102889936, "grad_norm": 3.808419704437256, "learning_rate": 6.146410732715048e-07, "loss": 0.2285, "step": 11919 }, { "epoch": 0.8143745302999248, "grad_norm": 4.462516784667969, "learning_rate": 6.142027578582513e-07, "loss": 0.1982, "step": 11920 }, { "epoch": 0.8144428503108561, "grad_norm": 4.096871376037598, "learning_rate": 6.137645837548323e-07, "loss": 0.2649, "step": 11921 }, { "epoch": 0.8145111703217872, "grad_norm": 5.745120525360107, "learning_rate": 6.133265509827069e-07, "loss": 0.4261, "step": 11922 }, { "epoch": 0.8145794903327185, "grad_norm": 4.906059741973877, "learning_rate": 6.128886595633226e-07, "loss": 0.284, "step": 11923 }, { "epoch": 0.8146478103436496, "grad_norm": 4.084592819213867, "learning_rate": 6.124509095181239e-07, "loss": 0.3028, "step": 11924 }, { "epoch": 0.8147161303545809, "grad_norm": 3.318873405456543, "learning_rate": 6.120133008685437e-07, "loss": 0.2542, "step": 11925 }, { "epoch": 0.8147844503655121, "grad_norm": 4.5023417472839355, "learning_rate": 6.115758336360112e-07, "loss": 0.2526, "step": 11926 }, { "epoch": 0.8148527703764432, "grad_norm": 3.322695016860962, "learning_rate": 6.111385078419488e-07, "loss": 0.2343, "step": 11927 }, { "epoch": 0.8149210903873745, "grad_norm": 4.227212429046631, "learning_rate": 6.10701323507771e-07, "loss": 0.2282, "step": 11928 }, { "epoch": 0.8149894103983056, "grad_norm": 4.695022106170654, "learning_rate": 6.102642806548849e-07, "loss": 0.2651, "step": 11929 }, { "epoch": 0.8150577304092369, "grad_norm": 3.624864101409912, "learning_rate": 6.098273793046922e-07, "loss": 0.2451, "step": 11930 }, { "epoch": 0.8151260504201681, "grad_norm": 3.553471326828003, "learning_rate": 6.093906194785862e-07, "loss": 0.2769, "step": 11931 }, { "epoch": 0.8151943704310993, "grad_norm": 2.671509027481079, "learning_rate": 6.089540011979549e-07, "loss": 0.1832, "step": 11932 }, { "epoch": 0.8152626904420305, "grad_norm": 5.04869270324707, "learning_rate": 6.085175244841768e-07, "loss": 0.2201, "step": 11933 }, { "epoch": 0.8153310104529616, "grad_norm": 3.611729145050049, "learning_rate": 6.080811893586255e-07, "loss": 0.2782, "step": 11934 }, { "epoch": 0.8153993304638929, "grad_norm": 3.617985486984253, "learning_rate": 6.076449958426662e-07, "loss": 0.2327, "step": 11935 }, { "epoch": 0.8154676504748241, "grad_norm": 5.2344160079956055, "learning_rate": 6.072089439576615e-07, "loss": 0.3311, "step": 11936 }, { "epoch": 0.8155359704857553, "grad_norm": 3.2951221466064453, "learning_rate": 6.067730337249605e-07, "loss": 0.264, "step": 11937 }, { "epoch": 0.8156042904966865, "grad_norm": 3.996466875076294, "learning_rate": 6.063372651659099e-07, "loss": 0.2869, "step": 11938 }, { "epoch": 0.8156726105076176, "grad_norm": 4.209892272949219, "learning_rate": 6.059016383018472e-07, "loss": 0.161, "step": 11939 }, { "epoch": 0.8157409305185489, "grad_norm": 3.8611974716186523, "learning_rate": 6.054661531541061e-07, "loss": 0.1859, "step": 11940 }, { "epoch": 0.8158092505294801, "grad_norm": 3.580810546875, "learning_rate": 6.05030809744007e-07, "loss": 0.2839, "step": 11941 }, { "epoch": 0.8158775705404113, "grad_norm": 3.567640781402588, "learning_rate": 6.045956080928715e-07, "loss": 0.2401, "step": 11942 }, { "epoch": 0.8159458905513425, "grad_norm": 4.243797302246094, "learning_rate": 6.041605482220089e-07, "loss": 0.3104, "step": 11943 }, { "epoch": 0.8160142105622737, "grad_norm": 3.7531018257141113, "learning_rate": 6.037256301527236e-07, "loss": 0.1894, "step": 11944 }, { "epoch": 0.8160825305732049, "grad_norm": 4.282856464385986, "learning_rate": 6.032908539063106e-07, "loss": 0.3286, "step": 11945 }, { "epoch": 0.8161508505841361, "grad_norm": 2.9934022426605225, "learning_rate": 6.028562195040611e-07, "loss": 0.1254, "step": 11946 }, { "epoch": 0.8162191705950673, "grad_norm": 3.4312996864318848, "learning_rate": 6.024217269672575e-07, "loss": 0.1794, "step": 11947 }, { "epoch": 0.8162874906059985, "grad_norm": 3.0764331817626953, "learning_rate": 6.019873763171759e-07, "loss": 0.223, "step": 11948 }, { "epoch": 0.8163558106169297, "grad_norm": 3.2699472904205322, "learning_rate": 6.015531675750854e-07, "loss": 0.3039, "step": 11949 }, { "epoch": 0.8164241306278609, "grad_norm": 4.431440353393555, "learning_rate": 6.011191007622478e-07, "loss": 0.2891, "step": 11950 }, { "epoch": 0.8164924506387921, "grad_norm": 4.206689834594727, "learning_rate": 6.006851758999188e-07, "loss": 0.2459, "step": 11951 }, { "epoch": 0.8165607706497233, "grad_norm": 4.723163604736328, "learning_rate": 6.002513930093467e-07, "loss": 0.2571, "step": 11952 }, { "epoch": 0.8166290906606545, "grad_norm": 3.525559186935425, "learning_rate": 5.998177521117712e-07, "loss": 0.2586, "step": 11953 }, { "epoch": 0.8166974106715857, "grad_norm": 4.704823017120361, "learning_rate": 5.993842532284268e-07, "loss": 0.284, "step": 11954 }, { "epoch": 0.8167657306825169, "grad_norm": 4.114904880523682, "learning_rate": 5.989508963805421e-07, "loss": 0.265, "step": 11955 }, { "epoch": 0.8168340506934482, "grad_norm": 4.566954135894775, "learning_rate": 5.98517681589338e-07, "loss": 0.2063, "step": 11956 }, { "epoch": 0.8169023707043793, "grad_norm": 3.3893043994903564, "learning_rate": 5.980846088760255e-07, "loss": 0.2795, "step": 11957 }, { "epoch": 0.8169706907153105, "grad_norm": 4.169647693634033, "learning_rate": 5.976516782618124e-07, "loss": 0.2503, "step": 11958 }, { "epoch": 0.8170390107262417, "grad_norm": 4.088161468505859, "learning_rate": 5.972188897678979e-07, "loss": 0.2858, "step": 11959 }, { "epoch": 0.8171073307371729, "grad_norm": 3.2778570652008057, "learning_rate": 5.967862434154748e-07, "loss": 0.1745, "step": 11960 }, { "epoch": 0.8171756507481042, "grad_norm": 4.695367813110352, "learning_rate": 5.963537392257282e-07, "loss": 0.2076, "step": 11961 }, { "epoch": 0.8172439707590353, "grad_norm": 4.138233661651611, "learning_rate": 5.959213772198367e-07, "loss": 0.265, "step": 11962 }, { "epoch": 0.8173122907699665, "grad_norm": 3.3116202354431152, "learning_rate": 5.954891574189724e-07, "loss": 0.2347, "step": 11963 }, { "epoch": 0.8173806107808977, "grad_norm": 2.6420114040374756, "learning_rate": 5.950570798443007e-07, "loss": 0.2058, "step": 11964 }, { "epoch": 0.8174489307918289, "grad_norm": 3.8212404251098633, "learning_rate": 5.946251445169771e-07, "loss": 0.2405, "step": 11965 }, { "epoch": 0.8175172508027602, "grad_norm": 2.8120410442352295, "learning_rate": 5.941933514581535e-07, "loss": 0.1594, "step": 11966 }, { "epoch": 0.8175855708136913, "grad_norm": 3.9778995513916016, "learning_rate": 5.937617006889737e-07, "loss": 0.2219, "step": 11967 }, { "epoch": 0.8176538908246226, "grad_norm": 3.2511515617370605, "learning_rate": 5.933301922305745e-07, "loss": 0.2349, "step": 11968 }, { "epoch": 0.8177222108355537, "grad_norm": 3.447026252746582, "learning_rate": 5.928988261040855e-07, "loss": 0.345, "step": 11969 }, { "epoch": 0.8177905308464849, "grad_norm": 4.78444242477417, "learning_rate": 5.9246760233063e-07, "loss": 0.2492, "step": 11970 }, { "epoch": 0.8178588508574162, "grad_norm": 4.220332622528076, "learning_rate": 5.920365209313236e-07, "loss": 0.2256, "step": 11971 }, { "epoch": 0.8179271708683473, "grad_norm": 3.426906108856201, "learning_rate": 5.91605581927275e-07, "loss": 0.2233, "step": 11972 }, { "epoch": 0.8179954908792786, "grad_norm": 4.376595497131348, "learning_rate": 5.911747853395874e-07, "loss": 0.2927, "step": 11973 }, { "epoch": 0.8180638108902097, "grad_norm": 3.756305456161499, "learning_rate": 5.907441311893528e-07, "loss": 0.389, "step": 11974 }, { "epoch": 0.8181321309011409, "grad_norm": 4.623882293701172, "learning_rate": 5.903136194976624e-07, "loss": 0.2513, "step": 11975 }, { "epoch": 0.8182004509120722, "grad_norm": 3.8095831871032715, "learning_rate": 5.898832502855957e-07, "loss": 0.282, "step": 11976 }, { "epoch": 0.8182687709230033, "grad_norm": 3.569298505783081, "learning_rate": 5.894530235742277e-07, "loss": 0.2369, "step": 11977 }, { "epoch": 0.8183370909339346, "grad_norm": 3.4054219722747803, "learning_rate": 5.89022939384624e-07, "loss": 0.3007, "step": 11978 }, { "epoch": 0.8184054109448657, "grad_norm": 5.211380958557129, "learning_rate": 5.885929977378455e-07, "loss": 0.2362, "step": 11979 }, { "epoch": 0.818473730955797, "grad_norm": 5.041520595550537, "learning_rate": 5.881631986549453e-07, "loss": 0.378, "step": 11980 }, { "epoch": 0.8185420509667282, "grad_norm": 4.166159152984619, "learning_rate": 5.877335421569693e-07, "loss": 0.2259, "step": 11981 }, { "epoch": 0.8186103709776593, "grad_norm": 4.2458906173706055, "learning_rate": 5.873040282649566e-07, "loss": 0.2837, "step": 11982 }, { "epoch": 0.8186786909885906, "grad_norm": 4.203566551208496, "learning_rate": 5.868746569999399e-07, "loss": 0.316, "step": 11983 }, { "epoch": 0.8187470109995217, "grad_norm": 3.0696496963500977, "learning_rate": 5.864454283829438e-07, "loss": 0.2527, "step": 11984 }, { "epoch": 0.818815331010453, "grad_norm": 4.876514911651611, "learning_rate": 5.860163424349876e-07, "loss": 0.2261, "step": 11985 }, { "epoch": 0.8188836510213842, "grad_norm": 4.3699188232421875, "learning_rate": 5.855873991770804e-07, "loss": 0.3311, "step": 11986 }, { "epoch": 0.8189519710323153, "grad_norm": 4.045523643493652, "learning_rate": 5.851585986302283e-07, "loss": 0.3854, "step": 11987 }, { "epoch": 0.8190202910432466, "grad_norm": 3.511807441711426, "learning_rate": 5.847299408154263e-07, "loss": 0.174, "step": 11988 }, { "epoch": 0.8190886110541777, "grad_norm": 3.894982099533081, "learning_rate": 5.843014257536685e-07, "loss": 0.2774, "step": 11989 }, { "epoch": 0.819156931065109, "grad_norm": 3.5038132667541504, "learning_rate": 5.838730534659346e-07, "loss": 0.3449, "step": 11990 }, { "epoch": 0.8192252510760402, "grad_norm": 4.606349945068359, "learning_rate": 5.83444823973202e-07, "loss": 0.3039, "step": 11991 }, { "epoch": 0.8192935710869714, "grad_norm": 3.6992528438568115, "learning_rate": 5.830167372964404e-07, "loss": 0.2807, "step": 11992 }, { "epoch": 0.8193618910979026, "grad_norm": 4.996673583984375, "learning_rate": 5.825887934566114e-07, "loss": 0.2836, "step": 11993 }, { "epoch": 0.8194302111088337, "grad_norm": 4.040063381195068, "learning_rate": 5.821609924746705e-07, "loss": 0.322, "step": 11994 }, { "epoch": 0.819498531119765, "grad_norm": 3.499386787414551, "learning_rate": 5.817333343715664e-07, "loss": 0.2147, "step": 11995 }, { "epoch": 0.8195668511306962, "grad_norm": 3.5174660682678223, "learning_rate": 5.813058191682398e-07, "loss": 0.2101, "step": 11996 }, { "epoch": 0.8196351711416274, "grad_norm": 4.73719596862793, "learning_rate": 5.808784468856261e-07, "loss": 0.2957, "step": 11997 }, { "epoch": 0.8197034911525586, "grad_norm": 4.126672744750977, "learning_rate": 5.804512175446509e-07, "loss": 0.2217, "step": 11998 }, { "epoch": 0.8197718111634897, "grad_norm": 4.219008445739746, "learning_rate": 5.800241311662355e-07, "loss": 0.2829, "step": 11999 }, { "epoch": 0.819840131174421, "grad_norm": 3.518916606903076, "learning_rate": 5.795971877712928e-07, "loss": 0.2684, "step": 12000 }, { "epoch": 0.8199084511853522, "grad_norm": 6.463168621063232, "learning_rate": 5.791703873807292e-07, "loss": 0.3817, "step": 12001 }, { "epoch": 0.8199767711962834, "grad_norm": 3.020716667175293, "learning_rate": 5.787437300154443e-07, "loss": 0.1704, "step": 12002 }, { "epoch": 0.8200450912072146, "grad_norm": 4.559600830078125, "learning_rate": 5.783172156963299e-07, "loss": 0.3641, "step": 12003 }, { "epoch": 0.8201134112181458, "grad_norm": 3.4295217990875244, "learning_rate": 5.778908444442714e-07, "loss": 0.201, "step": 12004 }, { "epoch": 0.820181731229077, "grad_norm": 3.7174315452575684, "learning_rate": 5.774646162801486e-07, "loss": 0.2734, "step": 12005 }, { "epoch": 0.8202500512400082, "grad_norm": 3.5541326999664307, "learning_rate": 5.7703853122483e-07, "loss": 0.2546, "step": 12006 }, { "epoch": 0.8203183712509394, "grad_norm": 4.39409875869751, "learning_rate": 5.766125892991801e-07, "loss": 0.2064, "step": 12007 }, { "epoch": 0.8203866912618706, "grad_norm": 3.966017961502075, "learning_rate": 5.761867905240586e-07, "loss": 0.1896, "step": 12008 }, { "epoch": 0.8204550112728018, "grad_norm": 2.529573440551758, "learning_rate": 5.757611349203152e-07, "loss": 0.1908, "step": 12009 }, { "epoch": 0.820523331283733, "grad_norm": 4.52035665512085, "learning_rate": 5.753356225087914e-07, "loss": 0.3596, "step": 12010 }, { "epoch": 0.8205916512946642, "grad_norm": 2.8224031925201416, "learning_rate": 5.749102533103242e-07, "loss": 0.1933, "step": 12011 }, { "epoch": 0.8206599713055954, "grad_norm": 3.066297769546509, "learning_rate": 5.74485027345743e-07, "loss": 0.2403, "step": 12012 }, { "epoch": 0.8207282913165266, "grad_norm": 4.468993186950684, "learning_rate": 5.7405994463587e-07, "loss": 0.2807, "step": 12013 }, { "epoch": 0.8207966113274578, "grad_norm": 4.669862747192383, "learning_rate": 5.736350052015202e-07, "loss": 0.1712, "step": 12014 }, { "epoch": 0.820864931338389, "grad_norm": 4.486301898956299, "learning_rate": 5.732102090635017e-07, "loss": 0.3358, "step": 12015 }, { "epoch": 0.8209332513493203, "grad_norm": 4.769626617431641, "learning_rate": 5.727855562426162e-07, "loss": 0.2706, "step": 12016 }, { "epoch": 0.8210015713602514, "grad_norm": 5.290122032165527, "learning_rate": 5.723610467596579e-07, "loss": 0.1993, "step": 12017 }, { "epoch": 0.8210698913711826, "grad_norm": 4.351301670074463, "learning_rate": 5.719366806354126e-07, "loss": 0.259, "step": 12018 }, { "epoch": 0.8211382113821138, "grad_norm": 4.874310493469238, "learning_rate": 5.715124578906611e-07, "loss": 0.3193, "step": 12019 }, { "epoch": 0.821206531393045, "grad_norm": 3.124760866165161, "learning_rate": 5.710883785461769e-07, "loss": 0.2823, "step": 12020 }, { "epoch": 0.8212748514039763, "grad_norm": 5.95000696182251, "learning_rate": 5.706644426227253e-07, "loss": 0.2869, "step": 12021 }, { "epoch": 0.8213431714149074, "grad_norm": 5.682680130004883, "learning_rate": 5.702406501410661e-07, "loss": 0.3021, "step": 12022 }, { "epoch": 0.8214114914258386, "grad_norm": 3.5618624687194824, "learning_rate": 5.698170011219506e-07, "loss": 0.3152, "step": 12023 }, { "epoch": 0.8214798114367698, "grad_norm": 4.555853366851807, "learning_rate": 5.69393495586124e-07, "loss": 0.3815, "step": 12024 }, { "epoch": 0.821548131447701, "grad_norm": 3.9283249378204346, "learning_rate": 5.689701335543253e-07, "loss": 0.2942, "step": 12025 }, { "epoch": 0.8216164514586323, "grad_norm": 3.226714611053467, "learning_rate": 5.685469150472839e-07, "loss": 0.2182, "step": 12026 }, { "epoch": 0.8216847714695634, "grad_norm": 4.2356767654418945, "learning_rate": 5.68123840085723e-07, "loss": 0.3017, "step": 12027 }, { "epoch": 0.8217530914804947, "grad_norm": 4.671561241149902, "learning_rate": 5.677009086903613e-07, "loss": 0.3002, "step": 12028 }, { "epoch": 0.8218214114914258, "grad_norm": 5.436834812164307, "learning_rate": 5.672781208819092e-07, "loss": 0.2503, "step": 12029 }, { "epoch": 0.821889731502357, "grad_norm": 4.148519515991211, "learning_rate": 5.668554766810674e-07, "loss": 0.2864, "step": 12030 }, { "epoch": 0.8219580515132883, "grad_norm": 3.257420778274536, "learning_rate": 5.664329761085327e-07, "loss": 0.197, "step": 12031 }, { "epoch": 0.8220263715242194, "grad_norm": 4.8830742835998535, "learning_rate": 5.660106191849934e-07, "loss": 0.2238, "step": 12032 }, { "epoch": 0.8220946915351507, "grad_norm": 4.335320472717285, "learning_rate": 5.655884059311321e-07, "loss": 0.2935, "step": 12033 }, { "epoch": 0.8221630115460818, "grad_norm": 6.638853549957275, "learning_rate": 5.651663363676224e-07, "loss": 0.3738, "step": 12034 }, { "epoch": 0.822231331557013, "grad_norm": 3.297380208969116, "learning_rate": 5.647444105151323e-07, "loss": 0.2444, "step": 12035 }, { "epoch": 0.8222996515679443, "grad_norm": 6.075325965881348, "learning_rate": 5.643226283943228e-07, "loss": 0.2688, "step": 12036 }, { "epoch": 0.8223679715788754, "grad_norm": 3.2327470779418945, "learning_rate": 5.639009900258474e-07, "loss": 0.1461, "step": 12037 }, { "epoch": 0.8224362915898067, "grad_norm": 3.1591134071350098, "learning_rate": 5.63479495430352e-07, "loss": 0.2376, "step": 12038 }, { "epoch": 0.8225046116007378, "grad_norm": 3.260180950164795, "learning_rate": 5.630581446284764e-07, "loss": 0.2176, "step": 12039 }, { "epoch": 0.8225729316116691, "grad_norm": 4.014407634735107, "learning_rate": 5.626369376408522e-07, "loss": 0.2527, "step": 12040 }, { "epoch": 0.8226412516226003, "grad_norm": 5.408356666564941, "learning_rate": 5.622158744881064e-07, "loss": 0.3125, "step": 12041 }, { "epoch": 0.8227095716335314, "grad_norm": 4.047483921051025, "learning_rate": 5.617949551908573e-07, "loss": 0.2924, "step": 12042 }, { "epoch": 0.8227778916444627, "grad_norm": 4.69095516204834, "learning_rate": 5.61374179769715e-07, "loss": 0.2556, "step": 12043 }, { "epoch": 0.8228462116553938, "grad_norm": 2.6860055923461914, "learning_rate": 5.60953548245284e-07, "loss": 0.1994, "step": 12044 }, { "epoch": 0.8229145316663251, "grad_norm": 4.605876922607422, "learning_rate": 5.605330606381614e-07, "loss": 0.2143, "step": 12045 }, { "epoch": 0.8229828516772563, "grad_norm": 3.318118095397949, "learning_rate": 5.601127169689383e-07, "loss": 0.2707, "step": 12046 }, { "epoch": 0.8230511716881874, "grad_norm": 3.727794885635376, "learning_rate": 5.59692517258197e-07, "loss": 0.2536, "step": 12047 }, { "epoch": 0.8231194916991187, "grad_norm": 3.398280143737793, "learning_rate": 5.592724615265137e-07, "loss": 0.232, "step": 12048 }, { "epoch": 0.8231878117100498, "grad_norm": 4.293588638305664, "learning_rate": 5.588525497944576e-07, "loss": 0.2938, "step": 12049 }, { "epoch": 0.8232561317209811, "grad_norm": 3.1952896118164062, "learning_rate": 5.584327820825916e-07, "loss": 0.1699, "step": 12050 }, { "epoch": 0.8233244517319123, "grad_norm": 3.1126489639282227, "learning_rate": 5.580131584114682e-07, "loss": 0.2999, "step": 12051 }, { "epoch": 0.8233927717428435, "grad_norm": 4.7747907638549805, "learning_rate": 5.575936788016373e-07, "loss": 0.2752, "step": 12052 }, { "epoch": 0.8234610917537747, "grad_norm": 4.086633682250977, "learning_rate": 5.571743432736387e-07, "loss": 0.2313, "step": 12053 }, { "epoch": 0.8235294117647058, "grad_norm": 3.8735995292663574, "learning_rate": 5.567551518480064e-07, "loss": 0.3142, "step": 12054 }, { "epoch": 0.8235977317756371, "grad_norm": 3.5501463413238525, "learning_rate": 5.563361045452673e-07, "loss": 0.1913, "step": 12055 }, { "epoch": 0.8236660517865683, "grad_norm": 4.469837188720703, "learning_rate": 5.559172013859409e-07, "loss": 0.364, "step": 12056 }, { "epoch": 0.8237343717974995, "grad_norm": 4.307292461395264, "learning_rate": 5.5549844239054e-07, "loss": 0.2551, "step": 12057 }, { "epoch": 0.8238026918084307, "grad_norm": 3.6235673427581787, "learning_rate": 5.550798275795706e-07, "loss": 0.2405, "step": 12058 }, { "epoch": 0.8238710118193618, "grad_norm": 1.7352144718170166, "learning_rate": 5.546613569735294e-07, "loss": 0.1133, "step": 12059 }, { "epoch": 0.8239393318302931, "grad_norm": 5.292325496673584, "learning_rate": 5.542430305929081e-07, "loss": 0.4057, "step": 12060 }, { "epoch": 0.8240076518412243, "grad_norm": 5.199055194854736, "learning_rate": 5.53824848458193e-07, "loss": 0.2375, "step": 12061 }, { "epoch": 0.8240759718521555, "grad_norm": 3.430443525314331, "learning_rate": 5.534068105898608e-07, "loss": 0.2267, "step": 12062 }, { "epoch": 0.8241442918630867, "grad_norm": 4.571047782897949, "learning_rate": 5.529889170083804e-07, "loss": 0.32, "step": 12063 }, { "epoch": 0.824212611874018, "grad_norm": 3.1039810180664062, "learning_rate": 5.525711677342154e-07, "loss": 0.236, "step": 12064 }, { "epoch": 0.8242809318849491, "grad_norm": 3.9890999794006348, "learning_rate": 5.521535627878221e-07, "loss": 0.2471, "step": 12065 }, { "epoch": 0.8243492518958803, "grad_norm": 4.501777648925781, "learning_rate": 5.517361021896495e-07, "loss": 0.285, "step": 12066 }, { "epoch": 0.8244175719068115, "grad_norm": 3.8174123764038086, "learning_rate": 5.513187859601395e-07, "loss": 0.3011, "step": 12067 }, { "epoch": 0.8244858919177427, "grad_norm": 5.3194122314453125, "learning_rate": 5.50901614119727e-07, "loss": 0.3354, "step": 12068 }, { "epoch": 0.824554211928674, "grad_norm": 4.08754301071167, "learning_rate": 5.5048458668884e-07, "loss": 0.21, "step": 12069 }, { "epoch": 0.8246225319396051, "grad_norm": 3.9220259189605713, "learning_rate": 5.500677036878997e-07, "loss": 0.2319, "step": 12070 }, { "epoch": 0.8246908519505363, "grad_norm": 4.889350414276123, "learning_rate": 5.496509651373181e-07, "loss": 0.405, "step": 12071 }, { "epoch": 0.8247591719614675, "grad_norm": 4.507148265838623, "learning_rate": 5.492343710575028e-07, "loss": 0.3801, "step": 12072 }, { "epoch": 0.8248274919723987, "grad_norm": 4.034814357757568, "learning_rate": 5.488179214688535e-07, "loss": 0.247, "step": 12073 }, { "epoch": 0.82489581198333, "grad_norm": 4.612389087677002, "learning_rate": 5.484016163917625e-07, "loss": 0.2109, "step": 12074 }, { "epoch": 0.8249641319942611, "grad_norm": 5.146475791931152, "learning_rate": 5.479854558466146e-07, "loss": 0.2086, "step": 12075 }, { "epoch": 0.8250324520051924, "grad_norm": 4.672728538513184, "learning_rate": 5.47569439853789e-07, "loss": 0.3562, "step": 12076 }, { "epoch": 0.8251007720161235, "grad_norm": 4.312057018280029, "learning_rate": 5.47153568433656e-07, "loss": 0.4373, "step": 12077 }, { "epoch": 0.8251690920270547, "grad_norm": 5.072148323059082, "learning_rate": 5.467378416065813e-07, "loss": 0.2635, "step": 12078 }, { "epoch": 0.825237412037986, "grad_norm": 5.7668585777282715, "learning_rate": 5.463222593929201e-07, "loss": 0.3184, "step": 12079 }, { "epoch": 0.8253057320489171, "grad_norm": 3.2519266605377197, "learning_rate": 5.459068218130218e-07, "loss": 0.2913, "step": 12080 }, { "epoch": 0.8253740520598484, "grad_norm": 5.345665454864502, "learning_rate": 5.454915288872317e-07, "loss": 0.2431, "step": 12081 }, { "epoch": 0.8254423720707795, "grad_norm": 3.9763736724853516, "learning_rate": 5.450763806358852e-07, "loss": 0.3211, "step": 12082 }, { "epoch": 0.8255106920817107, "grad_norm": 3.218405246734619, "learning_rate": 5.446613770793092e-07, "loss": 0.2184, "step": 12083 }, { "epoch": 0.825579012092642, "grad_norm": 4.811639785766602, "learning_rate": 5.442465182378263e-07, "loss": 0.3605, "step": 12084 }, { "epoch": 0.8256473321035731, "grad_norm": 4.668636798858643, "learning_rate": 5.438318041317514e-07, "loss": 0.1964, "step": 12085 }, { "epoch": 0.8257156521145044, "grad_norm": 4.625537395477295, "learning_rate": 5.434172347813915e-07, "loss": 0.3521, "step": 12086 }, { "epoch": 0.8257839721254355, "grad_norm": 6.275354385375977, "learning_rate": 5.430028102070473e-07, "loss": 0.2797, "step": 12087 }, { "epoch": 0.8258522921363668, "grad_norm": 5.646534442901611, "learning_rate": 5.425885304290117e-07, "loss": 0.4118, "step": 12088 }, { "epoch": 0.825920612147298, "grad_norm": 4.56453800201416, "learning_rate": 5.421743954675708e-07, "loss": 0.3401, "step": 12089 }, { "epoch": 0.8259889321582291, "grad_norm": 2.351850748062134, "learning_rate": 5.41760405343005e-07, "loss": 0.1367, "step": 12090 }, { "epoch": 0.8260572521691604, "grad_norm": 3.9667916297912598, "learning_rate": 5.413465600755845e-07, "loss": 0.3386, "step": 12091 }, { "epoch": 0.8261255721800915, "grad_norm": 2.7683029174804688, "learning_rate": 5.409328596855751e-07, "loss": 0.2182, "step": 12092 }, { "epoch": 0.8261938921910228, "grad_norm": 4.0979743003845215, "learning_rate": 5.405193041932333e-07, "loss": 0.2098, "step": 12093 }, { "epoch": 0.826262212201954, "grad_norm": 4.034728050231934, "learning_rate": 5.401058936188127e-07, "loss": 0.2857, "step": 12094 }, { "epoch": 0.8263305322128851, "grad_norm": 3.357855796813965, "learning_rate": 5.396926279825544e-07, "loss": 0.2567, "step": 12095 }, { "epoch": 0.8263988522238164, "grad_norm": 4.132151126861572, "learning_rate": 5.392795073046957e-07, "loss": 0.2887, "step": 12096 }, { "epoch": 0.8264671722347475, "grad_norm": 3.4202544689178467, "learning_rate": 5.388665316054662e-07, "loss": 0.2346, "step": 12097 }, { "epoch": 0.8265354922456788, "grad_norm": 4.146193504333496, "learning_rate": 5.384537009050889e-07, "loss": 0.2465, "step": 12098 }, { "epoch": 0.82660381225661, "grad_norm": 4.400948524475098, "learning_rate": 5.380410152237766e-07, "loss": 0.327, "step": 12099 }, { "epoch": 0.8266721322675412, "grad_norm": 4.013453006744385, "learning_rate": 5.376284745817395e-07, "loss": 0.2399, "step": 12100 }, { "epoch": 0.8267404522784724, "grad_norm": 3.4610376358032227, "learning_rate": 5.372160789991782e-07, "loss": 0.2485, "step": 12101 }, { "epoch": 0.8268087722894035, "grad_norm": 3.472769260406494, "learning_rate": 5.368038284962876e-07, "loss": 0.2322, "step": 12102 }, { "epoch": 0.8268770923003348, "grad_norm": 3.378303289413452, "learning_rate": 5.363917230932524e-07, "loss": 0.2368, "step": 12103 }, { "epoch": 0.826945412311266, "grad_norm": 5.562863826751709, "learning_rate": 5.359797628102538e-07, "loss": 0.4502, "step": 12104 }, { "epoch": 0.8270137323221972, "grad_norm": 4.839356422424316, "learning_rate": 5.355679476674635e-07, "loss": 0.2299, "step": 12105 }, { "epoch": 0.8270820523331284, "grad_norm": 3.2486791610717773, "learning_rate": 5.351562776850477e-07, "loss": 0.1895, "step": 12106 }, { "epoch": 0.8271503723440595, "grad_norm": 4.319755554199219, "learning_rate": 5.347447528831646e-07, "loss": 0.2371, "step": 12107 }, { "epoch": 0.8272186923549908, "grad_norm": 3.572139263153076, "learning_rate": 5.343333732819655e-07, "loss": 0.3372, "step": 12108 }, { "epoch": 0.827287012365922, "grad_norm": 3.5850327014923096, "learning_rate": 5.339221389015942e-07, "loss": 0.3968, "step": 12109 }, { "epoch": 0.8273553323768532, "grad_norm": 4.4691901206970215, "learning_rate": 5.33511049762188e-07, "loss": 0.2693, "step": 12110 }, { "epoch": 0.8274236523877844, "grad_norm": 3.517934560775757, "learning_rate": 5.331001058838781e-07, "loss": 0.2895, "step": 12111 }, { "epoch": 0.8274919723987156, "grad_norm": 4.218206882476807, "learning_rate": 5.326893072867854e-07, "loss": 0.2766, "step": 12112 }, { "epoch": 0.8275602924096468, "grad_norm": 4.962223529815674, "learning_rate": 5.322786539910253e-07, "loss": 0.2102, "step": 12113 }, { "epoch": 0.827628612420578, "grad_norm": 4.785215854644775, "learning_rate": 5.31868146016708e-07, "loss": 0.3539, "step": 12114 }, { "epoch": 0.8276969324315092, "grad_norm": 5.362037181854248, "learning_rate": 5.314577833839358e-07, "loss": 0.2362, "step": 12115 }, { "epoch": 0.8277652524424404, "grad_norm": 3.960407257080078, "learning_rate": 5.310475661128005e-07, "loss": 0.2333, "step": 12116 }, { "epoch": 0.8278335724533716, "grad_norm": 3.706890344619751, "learning_rate": 5.306374942233908e-07, "loss": 0.2529, "step": 12117 }, { "epoch": 0.8279018924643028, "grad_norm": 3.4611668586730957, "learning_rate": 5.302275677357864e-07, "loss": 0.2665, "step": 12118 }, { "epoch": 0.827970212475234, "grad_norm": 4.250833034515381, "learning_rate": 5.29817786670061e-07, "loss": 0.2659, "step": 12119 }, { "epoch": 0.8280385324861652, "grad_norm": 3.917539119720459, "learning_rate": 5.294081510462794e-07, "loss": 0.269, "step": 12120 }, { "epoch": 0.8281068524970964, "grad_norm": 4.044164180755615, "learning_rate": 5.289986608845011e-07, "loss": 0.205, "step": 12121 }, { "epoch": 0.8281751725080276, "grad_norm": 3.604050397872925, "learning_rate": 5.28589316204778e-07, "loss": 0.2562, "step": 12122 }, { "epoch": 0.8282434925189588, "grad_norm": 3.282517910003662, "learning_rate": 5.281801170271546e-07, "loss": 0.2656, "step": 12123 }, { "epoch": 0.8283118125298901, "grad_norm": 3.3120388984680176, "learning_rate": 5.277710633716676e-07, "loss": 0.2481, "step": 12124 }, { "epoch": 0.8283801325408212, "grad_norm": 3.2510528564453125, "learning_rate": 5.273621552583472e-07, "loss": 0.22, "step": 12125 }, { "epoch": 0.8284484525517524, "grad_norm": 6.237862586975098, "learning_rate": 5.269533927072171e-07, "loss": 0.2501, "step": 12126 }, { "epoch": 0.8285167725626836, "grad_norm": 3.916692018508911, "learning_rate": 5.265447757382934e-07, "loss": 0.2124, "step": 12127 }, { "epoch": 0.8285850925736148, "grad_norm": 3.4219584465026855, "learning_rate": 5.261363043715844e-07, "loss": 0.3406, "step": 12128 }, { "epoch": 0.8286534125845461, "grad_norm": 3.388745069503784, "learning_rate": 5.257279786270923e-07, "loss": 0.2315, "step": 12129 }, { "epoch": 0.8287217325954772, "grad_norm": 3.4618821144104004, "learning_rate": 5.253197985248116e-07, "loss": 0.2327, "step": 12130 }, { "epoch": 0.8287900526064084, "grad_norm": 4.410609245300293, "learning_rate": 5.249117640847309e-07, "loss": 0.2074, "step": 12131 }, { "epoch": 0.8288583726173396, "grad_norm": 3.6137099266052246, "learning_rate": 5.245038753268279e-07, "loss": 0.2423, "step": 12132 }, { "epoch": 0.8289266926282708, "grad_norm": 4.493754863739014, "learning_rate": 5.240961322710781e-07, "loss": 0.2434, "step": 12133 }, { "epoch": 0.8289950126392021, "grad_norm": 3.9832870960235596, "learning_rate": 5.236885349374471e-07, "loss": 0.3675, "step": 12134 }, { "epoch": 0.8290633326501332, "grad_norm": 3.522702932357788, "learning_rate": 5.232810833458943e-07, "loss": 0.2076, "step": 12135 }, { "epoch": 0.8291316526610645, "grad_norm": 4.407016277313232, "learning_rate": 5.228737775163704e-07, "loss": 0.2903, "step": 12136 }, { "epoch": 0.8291999726719956, "grad_norm": 5.209782600402832, "learning_rate": 5.224666174688204e-07, "loss": 0.291, "step": 12137 }, { "epoch": 0.8292682926829268, "grad_norm": 4.414952754974365, "learning_rate": 5.22059603223182e-07, "loss": 0.2745, "step": 12138 }, { "epoch": 0.829336612693858, "grad_norm": 4.603198528289795, "learning_rate": 5.216527347993856e-07, "loss": 0.2348, "step": 12139 }, { "epoch": 0.8294049327047892, "grad_norm": 3.6135964393615723, "learning_rate": 5.212460122173547e-07, "loss": 0.1744, "step": 12140 }, { "epoch": 0.8294732527157205, "grad_norm": 4.020873546600342, "learning_rate": 5.208394354970055e-07, "loss": 0.2195, "step": 12141 }, { "epoch": 0.8295415727266516, "grad_norm": 5.735432147979736, "learning_rate": 5.204330046582464e-07, "loss": 0.3126, "step": 12142 }, { "epoch": 0.8296098927375828, "grad_norm": 3.6942362785339355, "learning_rate": 5.200267197209807e-07, "loss": 0.3486, "step": 12143 }, { "epoch": 0.829678212748514, "grad_norm": 3.4533214569091797, "learning_rate": 5.196205807051009e-07, "loss": 0.2776, "step": 12144 }, { "epoch": 0.8297465327594452, "grad_norm": 3.5757458209991455, "learning_rate": 5.192145876304957e-07, "loss": 0.2914, "step": 12145 }, { "epoch": 0.8298148527703765, "grad_norm": 4.924195289611816, "learning_rate": 5.188087405170445e-07, "loss": 0.3389, "step": 12146 }, { "epoch": 0.8298831727813076, "grad_norm": 3.6861987113952637, "learning_rate": 5.184030393846234e-07, "loss": 0.3265, "step": 12147 }, { "epoch": 0.8299514927922389, "grad_norm": 3.9105820655822754, "learning_rate": 5.179974842530955e-07, "loss": 0.2375, "step": 12148 }, { "epoch": 0.83001981280317, "grad_norm": 5.252742290496826, "learning_rate": 5.175920751423211e-07, "loss": 0.2158, "step": 12149 }, { "epoch": 0.8300881328141012, "grad_norm": 5.397589683532715, "learning_rate": 5.171868120721519e-07, "loss": 0.2923, "step": 12150 }, { "epoch": 0.8301564528250325, "grad_norm": 4.985729217529297, "learning_rate": 5.167816950624328e-07, "loss": 0.2163, "step": 12151 }, { "epoch": 0.8302247728359636, "grad_norm": 5.0846757888793945, "learning_rate": 5.163767241329997e-07, "loss": 0.2368, "step": 12152 }, { "epoch": 0.8302930928468949, "grad_norm": 5.056708335876465, "learning_rate": 5.159718993036845e-07, "loss": 0.2631, "step": 12153 }, { "epoch": 0.830361412857826, "grad_norm": 5.179205417633057, "learning_rate": 5.155672205943105e-07, "loss": 0.2011, "step": 12154 }, { "epoch": 0.8304297328687572, "grad_norm": 2.9799532890319824, "learning_rate": 5.151626880246943e-07, "loss": 0.192, "step": 12155 }, { "epoch": 0.8304980528796885, "grad_norm": 5.780887126922607, "learning_rate": 5.147583016146425e-07, "loss": 0.2503, "step": 12156 }, { "epoch": 0.8305663728906196, "grad_norm": 4.515979766845703, "learning_rate": 5.143540613839584e-07, "loss": 0.3085, "step": 12157 }, { "epoch": 0.8306346929015509, "grad_norm": 4.070642948150635, "learning_rate": 5.139499673524363e-07, "loss": 0.388, "step": 12158 }, { "epoch": 0.830703012912482, "grad_norm": 4.370751857757568, "learning_rate": 5.135460195398639e-07, "loss": 0.3198, "step": 12159 }, { "epoch": 0.8307713329234133, "grad_norm": 4.749828338623047, "learning_rate": 5.131422179660206e-07, "loss": 0.2523, "step": 12160 }, { "epoch": 0.8308396529343445, "grad_norm": 4.090279579162598, "learning_rate": 5.127385626506806e-07, "loss": 0.3036, "step": 12161 }, { "epoch": 0.8309079729452756, "grad_norm": 3.4045727252960205, "learning_rate": 5.123350536136091e-07, "loss": 0.2159, "step": 12162 }, { "epoch": 0.8309762929562069, "grad_norm": 5.549650192260742, "learning_rate": 5.119316908745656e-07, "loss": 0.3147, "step": 12163 }, { "epoch": 0.831044612967138, "grad_norm": 3.8531088829040527, "learning_rate": 5.115284744533009e-07, "loss": 0.287, "step": 12164 }, { "epoch": 0.8311129329780693, "grad_norm": 3.7810750007629395, "learning_rate": 5.111254043695592e-07, "loss": 0.2862, "step": 12165 }, { "epoch": 0.8311812529890005, "grad_norm": 3.667038679122925, "learning_rate": 5.107224806430776e-07, "loss": 0.2669, "step": 12166 }, { "epoch": 0.8312495729999316, "grad_norm": 4.246114253997803, "learning_rate": 5.103197032935886e-07, "loss": 0.2478, "step": 12167 }, { "epoch": 0.8313178930108629, "grad_norm": 4.397198677062988, "learning_rate": 5.099170723408129e-07, "loss": 0.2658, "step": 12168 }, { "epoch": 0.831386213021794, "grad_norm": 3.86932635307312, "learning_rate": 5.095145878044665e-07, "loss": 0.1985, "step": 12169 }, { "epoch": 0.8314545330327253, "grad_norm": 5.432529926300049, "learning_rate": 5.091122497042583e-07, "loss": 0.2171, "step": 12170 }, { "epoch": 0.8315228530436565, "grad_norm": 4.044617176055908, "learning_rate": 5.087100580598896e-07, "loss": 0.2737, "step": 12171 }, { "epoch": 0.8315911730545877, "grad_norm": 3.6076290607452393, "learning_rate": 5.083080128910552e-07, "loss": 0.2687, "step": 12172 }, { "epoch": 0.8316594930655189, "grad_norm": 4.927012920379639, "learning_rate": 5.079061142174413e-07, "loss": 0.2752, "step": 12173 }, { "epoch": 0.83172781307645, "grad_norm": 4.021744728088379, "learning_rate": 5.075043620587287e-07, "loss": 0.2721, "step": 12174 }, { "epoch": 0.8317961330873813, "grad_norm": 3.3054428100585938, "learning_rate": 5.071027564345893e-07, "loss": 0.2571, "step": 12175 }, { "epoch": 0.8318644530983125, "grad_norm": 4.56431245803833, "learning_rate": 5.0670129736469e-07, "loss": 0.224, "step": 12176 }, { "epoch": 0.8319327731092437, "grad_norm": 5.011070728302002, "learning_rate": 5.062999848686878e-07, "loss": 0.2108, "step": 12177 }, { "epoch": 0.8320010931201749, "grad_norm": 3.948835611343384, "learning_rate": 5.058988189662339e-07, "loss": 0.3416, "step": 12178 }, { "epoch": 0.832069413131106, "grad_norm": 3.9633636474609375, "learning_rate": 5.054977996769725e-07, "loss": 0.2286, "step": 12179 }, { "epoch": 0.8321377331420373, "grad_norm": 4.682118892669678, "learning_rate": 5.05096927020542e-07, "loss": 0.2173, "step": 12180 }, { "epoch": 0.8322060531529685, "grad_norm": 3.1794581413269043, "learning_rate": 5.0469620101657e-07, "loss": 0.1776, "step": 12181 }, { "epoch": 0.8322743731638997, "grad_norm": 4.006266117095947, "learning_rate": 5.0429562168468e-07, "loss": 0.2309, "step": 12182 }, { "epoch": 0.8323426931748309, "grad_norm": 4.672455310821533, "learning_rate": 5.03895189044487e-07, "loss": 0.2788, "step": 12183 }, { "epoch": 0.8324110131857622, "grad_norm": 4.2708234786987305, "learning_rate": 5.034949031156004e-07, "loss": 0.4093, "step": 12184 }, { "epoch": 0.8324793331966933, "grad_norm": 4.010239601135254, "learning_rate": 5.030947639176181e-07, "loss": 0.2435, "step": 12185 }, { "epoch": 0.8325476532076245, "grad_norm": 3.58585262298584, "learning_rate": 5.026947714701366e-07, "loss": 0.2361, "step": 12186 }, { "epoch": 0.8326159732185557, "grad_norm": 3.381993532180786, "learning_rate": 5.022949257927413e-07, "loss": 0.2412, "step": 12187 }, { "epoch": 0.8326842932294869, "grad_norm": 3.8813652992248535, "learning_rate": 5.018952269050132e-07, "loss": 0.2085, "step": 12188 }, { "epoch": 0.8327526132404182, "grad_norm": 4.934501647949219, "learning_rate": 5.01495674826522e-07, "loss": 0.2546, "step": 12189 }, { "epoch": 0.8328209332513493, "grad_norm": 3.41672682762146, "learning_rate": 5.01096269576834e-07, "loss": 0.224, "step": 12190 }, { "epoch": 0.8328892532622805, "grad_norm": 4.038393497467041, "learning_rate": 5.00697011175507e-07, "loss": 0.2399, "step": 12191 }, { "epoch": 0.8329575732732117, "grad_norm": 5.729598522186279, "learning_rate": 5.002978996420916e-07, "loss": 0.2426, "step": 12192 }, { "epoch": 0.8330258932841429, "grad_norm": 3.9626224040985107, "learning_rate": 4.998989349961309e-07, "loss": 0.2755, "step": 12193 }, { "epoch": 0.8330942132950742, "grad_norm": 2.8724944591522217, "learning_rate": 4.995001172571618e-07, "loss": 0.2199, "step": 12194 }, { "epoch": 0.8331625333060053, "grad_norm": 3.5597333908081055, "learning_rate": 4.991014464447127e-07, "loss": 0.284, "step": 12195 }, { "epoch": 0.8332308533169366, "grad_norm": 3.4461894035339355, "learning_rate": 4.987029225783064e-07, "loss": 0.2603, "step": 12196 }, { "epoch": 0.8332991733278677, "grad_norm": 3.897944450378418, "learning_rate": 4.983045456774561e-07, "loss": 0.2442, "step": 12197 }, { "epoch": 0.8333674933387989, "grad_norm": 4.163177013397217, "learning_rate": 4.979063157616705e-07, "loss": 0.3415, "step": 12198 }, { "epoch": 0.8334358133497302, "grad_norm": 3.946047782897949, "learning_rate": 4.975082328504481e-07, "loss": 0.2711, "step": 12199 }, { "epoch": 0.8335041333606613, "grad_norm": 4.939801216125488, "learning_rate": 4.971102969632849e-07, "loss": 0.2687, "step": 12200 }, { "epoch": 0.8335724533715926, "grad_norm": 3.427445888519287, "learning_rate": 4.967125081196644e-07, "loss": 0.3096, "step": 12201 }, { "epoch": 0.8336407733825237, "grad_norm": 4.3301849365234375, "learning_rate": 4.963148663390659e-07, "loss": 0.3352, "step": 12202 }, { "epoch": 0.8337090933934549, "grad_norm": 3.6813273429870605, "learning_rate": 4.959173716409611e-07, "loss": 0.3038, "step": 12203 }, { "epoch": 0.8337774134043862, "grad_norm": 3.058382749557495, "learning_rate": 4.955200240448147e-07, "loss": 0.2413, "step": 12204 }, { "epoch": 0.8338457334153173, "grad_norm": 4.459123611450195, "learning_rate": 4.951228235700819e-07, "loss": 0.1946, "step": 12205 }, { "epoch": 0.8339140534262486, "grad_norm": 4.08712100982666, "learning_rate": 4.947257702362142e-07, "loss": 0.3553, "step": 12206 }, { "epoch": 0.8339823734371797, "grad_norm": 3.827493190765381, "learning_rate": 4.94328864062654e-07, "loss": 0.294, "step": 12207 }, { "epoch": 0.834050693448111, "grad_norm": 3.709463357925415, "learning_rate": 4.939321050688375e-07, "loss": 0.2384, "step": 12208 }, { "epoch": 0.8341190134590422, "grad_norm": 4.3088603019714355, "learning_rate": 4.935354932741914e-07, "loss": 0.2229, "step": 12209 }, { "epoch": 0.8341873334699733, "grad_norm": 3.5764524936676025, "learning_rate": 4.931390286981373e-07, "loss": 0.2069, "step": 12210 }, { "epoch": 0.8342556534809046, "grad_norm": 3.640205144882202, "learning_rate": 4.927427113600888e-07, "loss": 0.2087, "step": 12211 }, { "epoch": 0.8343239734918357, "grad_norm": 4.089507102966309, "learning_rate": 4.923465412794529e-07, "loss": 0.2702, "step": 12212 }, { "epoch": 0.834392293502767, "grad_norm": 2.520965576171875, "learning_rate": 4.919505184756292e-07, "loss": 0.1573, "step": 12213 }, { "epoch": 0.8344606135136982, "grad_norm": 3.2736659049987793, "learning_rate": 4.915546429680095e-07, "loss": 0.1318, "step": 12214 }, { "epoch": 0.8345289335246293, "grad_norm": 4.807438373565674, "learning_rate": 4.911589147759784e-07, "loss": 0.2902, "step": 12215 }, { "epoch": 0.8345972535355606, "grad_norm": 5.409212589263916, "learning_rate": 4.907633339189154e-07, "loss": 0.289, "step": 12216 }, { "epoch": 0.8346655735464917, "grad_norm": 3.447503089904785, "learning_rate": 4.903679004161888e-07, "loss": 0.2525, "step": 12217 }, { "epoch": 0.834733893557423, "grad_norm": 3.421720266342163, "learning_rate": 4.899726142871627e-07, "loss": 0.2304, "step": 12218 }, { "epoch": 0.8348022135683542, "grad_norm": 4.013233661651611, "learning_rate": 4.895774755511929e-07, "loss": 0.164, "step": 12219 }, { "epoch": 0.8348705335792854, "grad_norm": 3.7871880531311035, "learning_rate": 4.891824842276304e-07, "loss": 0.2844, "step": 12220 }, { "epoch": 0.8349388535902166, "grad_norm": 4.247317790985107, "learning_rate": 4.887876403358143e-07, "loss": 0.2363, "step": 12221 }, { "epoch": 0.8350071736011477, "grad_norm": 4.342831611633301, "learning_rate": 4.8839294389508e-07, "loss": 0.2459, "step": 12222 }, { "epoch": 0.835075493612079, "grad_norm": 3.3480446338653564, "learning_rate": 4.879983949247549e-07, "loss": 0.1997, "step": 12223 }, { "epoch": 0.8351438136230102, "grad_norm": 3.581383466720581, "learning_rate": 4.876039934441592e-07, "loss": 0.2357, "step": 12224 }, { "epoch": 0.8352121336339414, "grad_norm": 3.426955461502075, "learning_rate": 4.87209739472605e-07, "loss": 0.2277, "step": 12225 }, { "epoch": 0.8352804536448726, "grad_norm": 4.725368499755859, "learning_rate": 4.868156330293983e-07, "loss": 0.2808, "step": 12226 }, { "epoch": 0.8353487736558037, "grad_norm": 4.240091800689697, "learning_rate": 4.864216741338372e-07, "loss": 0.2913, "step": 12227 }, { "epoch": 0.835417093666735, "grad_norm": 3.521833658218384, "learning_rate": 4.86027862805214e-07, "loss": 0.2981, "step": 12228 }, { "epoch": 0.8354854136776662, "grad_norm": 3.3986291885375977, "learning_rate": 4.856341990628111e-07, "loss": 0.2543, "step": 12229 }, { "epoch": 0.8355537336885974, "grad_norm": 4.7426886558532715, "learning_rate": 4.852406829259053e-07, "loss": 0.2515, "step": 12230 }, { "epoch": 0.8356220536995286, "grad_norm": 4.47467565536499, "learning_rate": 4.848473144137668e-07, "loss": 0.2251, "step": 12231 }, { "epoch": 0.8356903737104598, "grad_norm": 5.5296950340271, "learning_rate": 4.844540935456572e-07, "loss": 0.3443, "step": 12232 }, { "epoch": 0.835758693721391, "grad_norm": 3.361476421356201, "learning_rate": 4.840610203408314e-07, "loss": 0.2005, "step": 12233 }, { "epoch": 0.8358270137323222, "grad_norm": 2.908346176147461, "learning_rate": 4.836680948185381e-07, "loss": 0.2286, "step": 12234 }, { "epoch": 0.8358953337432534, "grad_norm": 4.01909065246582, "learning_rate": 4.832753169980169e-07, "loss": 0.2422, "step": 12235 }, { "epoch": 0.8359636537541846, "grad_norm": 4.222534656524658, "learning_rate": 4.828826868985024e-07, "loss": 0.2849, "step": 12236 }, { "epoch": 0.8360319737651158, "grad_norm": 4.621047019958496, "learning_rate": 4.824902045392184e-07, "loss": 0.2956, "step": 12237 }, { "epoch": 0.836100293776047, "grad_norm": 3.6713714599609375, "learning_rate": 4.820978699393845e-07, "loss": 0.2381, "step": 12238 }, { "epoch": 0.8361686137869782, "grad_norm": 4.455852031707764, "learning_rate": 4.817056831182136e-07, "loss": 0.2706, "step": 12239 }, { "epoch": 0.8362369337979094, "grad_norm": 3.901113510131836, "learning_rate": 4.813136440949092e-07, "loss": 0.288, "step": 12240 }, { "epoch": 0.8363052538088406, "grad_norm": 3.674811601638794, "learning_rate": 4.809217528886688e-07, "loss": 0.2102, "step": 12241 }, { "epoch": 0.8363735738197718, "grad_norm": 5.135401725769043, "learning_rate": 4.805300095186815e-07, "loss": 0.2275, "step": 12242 }, { "epoch": 0.836441893830703, "grad_norm": 5.189859390258789, "learning_rate": 4.801384140041299e-07, "loss": 0.2409, "step": 12243 }, { "epoch": 0.8365102138416343, "grad_norm": 3.8898961544036865, "learning_rate": 4.797469663641903e-07, "loss": 0.1801, "step": 12244 }, { "epoch": 0.8365785338525654, "grad_norm": 4.999659538269043, "learning_rate": 4.793556666180301e-07, "loss": 0.2247, "step": 12245 }, { "epoch": 0.8366468538634966, "grad_norm": 4.559657096862793, "learning_rate": 4.789645147848104e-07, "loss": 0.2529, "step": 12246 }, { "epoch": 0.8367151738744278, "grad_norm": 5.28312349319458, "learning_rate": 4.785735108836852e-07, "loss": 0.2494, "step": 12247 }, { "epoch": 0.836783493885359, "grad_norm": 4.094806671142578, "learning_rate": 4.781826549338007e-07, "loss": 0.2947, "step": 12248 }, { "epoch": 0.8368518138962903, "grad_norm": 2.894554376602173, "learning_rate": 4.777919469542963e-07, "loss": 0.2585, "step": 12249 }, { "epoch": 0.8369201339072214, "grad_norm": 3.605480909347534, "learning_rate": 4.774013869643034e-07, "loss": 0.3517, "step": 12250 }, { "epoch": 0.8369884539181526, "grad_norm": 5.918199062347412, "learning_rate": 4.77010974982947e-07, "loss": 0.3286, "step": 12251 }, { "epoch": 0.8370567739290838, "grad_norm": 4.5465593338012695, "learning_rate": 4.7662071102934337e-07, "loss": 0.2113, "step": 12252 }, { "epoch": 0.837125093940015, "grad_norm": 4.4714741706848145, "learning_rate": 4.762305951226054e-07, "loss": 0.2448, "step": 12253 }, { "epoch": 0.8371934139509463, "grad_norm": 2.949164628982544, "learning_rate": 4.758406272818339e-07, "loss": 0.1513, "step": 12254 }, { "epoch": 0.8372617339618774, "grad_norm": 4.484110355377197, "learning_rate": 4.754508075261248e-07, "loss": 0.3526, "step": 12255 }, { "epoch": 0.8373300539728087, "grad_norm": 4.542839050292969, "learning_rate": 4.7506113587456735e-07, "loss": 0.3414, "step": 12256 }, { "epoch": 0.8373983739837398, "grad_norm": 3.127547264099121, "learning_rate": 4.7467161234624264e-07, "loss": 0.2452, "step": 12257 }, { "epoch": 0.837466693994671, "grad_norm": 3.896686553955078, "learning_rate": 4.7428223696022296e-07, "loss": 0.1981, "step": 12258 }, { "epoch": 0.8375350140056023, "grad_norm": 4.483586311340332, "learning_rate": 4.7389300973557657e-07, "loss": 0.3451, "step": 12259 }, { "epoch": 0.8376033340165334, "grad_norm": 3.7963693141937256, "learning_rate": 4.735039306913628e-07, "loss": 0.207, "step": 12260 }, { "epoch": 0.8376716540274647, "grad_norm": 4.1590447425842285, "learning_rate": 4.731149998466345e-07, "loss": 0.3423, "step": 12261 }, { "epoch": 0.8377399740383958, "grad_norm": 2.6326749324798584, "learning_rate": 4.7272621722043494e-07, "loss": 0.2069, "step": 12262 }, { "epoch": 0.837808294049327, "grad_norm": 8.079724311828613, "learning_rate": 4.7233758283180226e-07, "loss": 0.3801, "step": 12263 }, { "epoch": 0.8378766140602583, "grad_norm": 2.7749946117401123, "learning_rate": 4.719490966997674e-07, "loss": 0.2402, "step": 12264 }, { "epoch": 0.8379449340711894, "grad_norm": 5.507546901702881, "learning_rate": 4.7156075884335287e-07, "loss": 0.4111, "step": 12265 }, { "epoch": 0.8380132540821207, "grad_norm": 3.0877506732940674, "learning_rate": 4.7117256928157477e-07, "loss": 0.1714, "step": 12266 }, { "epoch": 0.8380815740930518, "grad_norm": 3.7474448680877686, "learning_rate": 4.7078452803344213e-07, "loss": 0.286, "step": 12267 }, { "epoch": 0.8381498941039831, "grad_norm": 4.499997615814209, "learning_rate": 4.7039663511795585e-07, "loss": 0.1747, "step": 12268 }, { "epoch": 0.8382182141149143, "grad_norm": 3.852092742919922, "learning_rate": 4.700088905541109e-07, "loss": 0.1979, "step": 12269 }, { "epoch": 0.8382865341258454, "grad_norm": 5.328375339508057, "learning_rate": 4.696212943608928e-07, "loss": 0.3007, "step": 12270 }, { "epoch": 0.8383548541367767, "grad_norm": 4.613188743591309, "learning_rate": 4.6923384655728087e-07, "loss": 0.2808, "step": 12271 }, { "epoch": 0.8384231741477078, "grad_norm": 3.4016520977020264, "learning_rate": 4.688465471622486e-07, "loss": 0.2736, "step": 12272 }, { "epoch": 0.8384914941586391, "grad_norm": 4.463062286376953, "learning_rate": 4.6845939619476165e-07, "loss": 0.2756, "step": 12273 }, { "epoch": 0.8385598141695703, "grad_norm": 4.297812461853027, "learning_rate": 4.6807239367377573e-07, "loss": 0.2229, "step": 12274 }, { "epoch": 0.8386281341805014, "grad_norm": 5.3201775550842285, "learning_rate": 4.676855396182425e-07, "loss": 0.3576, "step": 12275 }, { "epoch": 0.8386964541914327, "grad_norm": 6.28256368637085, "learning_rate": 4.6729883404710516e-07, "loss": 0.2717, "step": 12276 }, { "epoch": 0.8387647742023638, "grad_norm": 2.919175624847412, "learning_rate": 4.6691227697929946e-07, "loss": 0.182, "step": 12277 }, { "epoch": 0.8388330942132951, "grad_norm": 4.7602715492248535, "learning_rate": 4.665258684337539e-07, "loss": 0.3592, "step": 12278 }, { "epoch": 0.8389014142242263, "grad_norm": 2.9209046363830566, "learning_rate": 4.661396084293903e-07, "loss": 0.2285, "step": 12279 }, { "epoch": 0.8389697342351575, "grad_norm": 3.863837718963623, "learning_rate": 4.6575349698512216e-07, "loss": 0.311, "step": 12280 }, { "epoch": 0.8390380542460887, "grad_norm": 5.511746883392334, "learning_rate": 4.6536753411985785e-07, "loss": 0.2385, "step": 12281 }, { "epoch": 0.8391063742570198, "grad_norm": 4.387101650238037, "learning_rate": 4.6498171985249505e-07, "loss": 0.2599, "step": 12282 }, { "epoch": 0.8391746942679511, "grad_norm": 4.098962306976318, "learning_rate": 4.645960542019271e-07, "loss": 0.2398, "step": 12283 }, { "epoch": 0.8392430142788823, "grad_norm": 4.025897026062012, "learning_rate": 4.6421053718703863e-07, "loss": 0.1656, "step": 12284 }, { "epoch": 0.8393113342898135, "grad_norm": 3.9268760681152344, "learning_rate": 4.638251688267075e-07, "loss": 0.2545, "step": 12285 }, { "epoch": 0.8393796543007447, "grad_norm": 4.0190019607543945, "learning_rate": 4.6343994913980416e-07, "loss": 0.2892, "step": 12286 }, { "epoch": 0.8394479743116758, "grad_norm": 5.336305618286133, "learning_rate": 4.6305487814519184e-07, "loss": 0.3359, "step": 12287 }, { "epoch": 0.8395162943226071, "grad_norm": 3.5925867557525635, "learning_rate": 4.6266995586172626e-07, "loss": 0.2709, "step": 12288 }, { "epoch": 0.8395846143335383, "grad_norm": 2.476923704147339, "learning_rate": 4.622851823082573e-07, "loss": 0.2041, "step": 12289 }, { "epoch": 0.8396529343444695, "grad_norm": 5.246253967285156, "learning_rate": 4.6190055750362444e-07, "loss": 0.4369, "step": 12290 }, { "epoch": 0.8397212543554007, "grad_norm": 4.267975330352783, "learning_rate": 4.6151608146666205e-07, "loss": 0.3397, "step": 12291 }, { "epoch": 0.839789574366332, "grad_norm": 3.8153469562530518, "learning_rate": 4.6113175421619786e-07, "loss": 0.3429, "step": 12292 }, { "epoch": 0.8398578943772631, "grad_norm": 3.187155246734619, "learning_rate": 4.607475757710516e-07, "loss": 0.2368, "step": 12293 }, { "epoch": 0.8399262143881943, "grad_norm": 4.842059135437012, "learning_rate": 4.603635461500342e-07, "loss": 0.2553, "step": 12294 }, { "epoch": 0.8399945343991255, "grad_norm": 3.774354934692383, "learning_rate": 4.599796653719509e-07, "loss": 0.3203, "step": 12295 }, { "epoch": 0.8400628544100567, "grad_norm": 3.438668727874756, "learning_rate": 4.595959334555996e-07, "loss": 0.1982, "step": 12296 }, { "epoch": 0.840131174420988, "grad_norm": 4.766022205352783, "learning_rate": 4.5921235041977084e-07, "loss": 0.3664, "step": 12297 }, { "epoch": 0.8401994944319191, "grad_norm": 5.0474066734313965, "learning_rate": 4.5882891628324715e-07, "loss": 0.2155, "step": 12298 }, { "epoch": 0.8402678144428503, "grad_norm": 5.340352535247803, "learning_rate": 4.584456310648048e-07, "loss": 0.2175, "step": 12299 }, { "epoch": 0.8403361344537815, "grad_norm": 3.645449638366699, "learning_rate": 4.5806249478321156e-07, "loss": 0.2326, "step": 12300 }, { "epoch": 0.8404044544647127, "grad_norm": 3.61199688911438, "learning_rate": 4.5767950745723047e-07, "loss": 0.3624, "step": 12301 }, { "epoch": 0.840472774475644, "grad_norm": 3.9984610080718994, "learning_rate": 4.572966691056126e-07, "loss": 0.263, "step": 12302 }, { "epoch": 0.8405410944865751, "grad_norm": 5.315504550933838, "learning_rate": 4.5691397974710616e-07, "loss": 0.2561, "step": 12303 }, { "epoch": 0.8406094144975064, "grad_norm": 2.88836669921875, "learning_rate": 4.565314394004499e-07, "loss": 0.2408, "step": 12304 }, { "epoch": 0.8406777345084375, "grad_norm": 4.527339935302734, "learning_rate": 4.5614904808437645e-07, "loss": 0.2195, "step": 12305 }, { "epoch": 0.8407460545193687, "grad_norm": 4.229075908660889, "learning_rate": 4.5576680581761016e-07, "loss": 0.2329, "step": 12306 }, { "epoch": 0.8408143745303, "grad_norm": 5.328653812408447, "learning_rate": 4.5538471261886804e-07, "loss": 0.2624, "step": 12307 }, { "epoch": 0.8408826945412311, "grad_norm": 4.813487529754639, "learning_rate": 4.5500276850686057e-07, "loss": 0.2582, "step": 12308 }, { "epoch": 0.8409510145521624, "grad_norm": 6.132042407989502, "learning_rate": 4.546209735002907e-07, "loss": 0.3124, "step": 12309 }, { "epoch": 0.8410193345630935, "grad_norm": 4.447208881378174, "learning_rate": 4.5423932761785343e-07, "loss": 0.395, "step": 12310 }, { "epoch": 0.8410876545740247, "grad_norm": 5.600240230560303, "learning_rate": 4.538578308782374e-07, "loss": 0.2666, "step": 12311 }, { "epoch": 0.841155974584956, "grad_norm": 3.799941301345825, "learning_rate": 4.534764833001234e-07, "loss": 0.2374, "step": 12312 }, { "epoch": 0.8412242945958871, "grad_norm": 5.809014320373535, "learning_rate": 4.5309528490218463e-07, "loss": 0.3248, "step": 12313 }, { "epoch": 0.8412926146068184, "grad_norm": 3.58147931098938, "learning_rate": 4.5271423570308874e-07, "loss": 0.2397, "step": 12314 }, { "epoch": 0.8413609346177495, "grad_norm": 4.619055271148682, "learning_rate": 4.52333335721493e-07, "loss": 0.2275, "step": 12315 }, { "epoch": 0.8414292546286808, "grad_norm": 3.5302932262420654, "learning_rate": 4.519525849760494e-07, "loss": 0.2424, "step": 12316 }, { "epoch": 0.841497574639612, "grad_norm": 5.522772789001465, "learning_rate": 4.5157198348540277e-07, "loss": 0.3343, "step": 12317 }, { "epoch": 0.8415658946505431, "grad_norm": 2.971996307373047, "learning_rate": 4.5119153126818974e-07, "loss": 0.1765, "step": 12318 }, { "epoch": 0.8416342146614744, "grad_norm": 3.100738286972046, "learning_rate": 4.508112283430402e-07, "loss": 0.1944, "step": 12319 }, { "epoch": 0.8417025346724055, "grad_norm": 4.5686492919921875, "learning_rate": 4.5043107472857684e-07, "loss": 0.2087, "step": 12320 }, { "epoch": 0.8417708546833368, "grad_norm": 3.4485745429992676, "learning_rate": 4.5005107044341487e-07, "loss": 0.2638, "step": 12321 }, { "epoch": 0.8418391746942679, "grad_norm": 5.068578243255615, "learning_rate": 4.4967121550616205e-07, "loss": 0.3125, "step": 12322 }, { "epoch": 0.8419074947051991, "grad_norm": 2.709770679473877, "learning_rate": 4.492915099354182e-07, "loss": 0.1796, "step": 12323 }, { "epoch": 0.8419758147161304, "grad_norm": 2.8639438152313232, "learning_rate": 4.489119537497762e-07, "loss": 0.1958, "step": 12324 }, { "epoch": 0.8420441347270615, "grad_norm": 4.470039367675781, "learning_rate": 4.485325469678235e-07, "loss": 0.3514, "step": 12325 }, { "epoch": 0.8421124547379928, "grad_norm": 3.6809639930725098, "learning_rate": 4.4815328960813823e-07, "loss": 0.2634, "step": 12326 }, { "epoch": 0.8421807747489239, "grad_norm": 4.512203216552734, "learning_rate": 4.4777418168929023e-07, "loss": 0.2817, "step": 12327 }, { "epoch": 0.8422490947598552, "grad_norm": 3.862377643585205, "learning_rate": 4.473952232298446e-07, "loss": 0.22, "step": 12328 }, { "epoch": 0.8423174147707864, "grad_norm": 4.338425159454346, "learning_rate": 4.4701641424835785e-07, "loss": 0.235, "step": 12329 }, { "epoch": 0.8423857347817175, "grad_norm": 4.530489921569824, "learning_rate": 4.466377547633788e-07, "loss": 0.2793, "step": 12330 }, { "epoch": 0.8424540547926488, "grad_norm": 3.8185198307037354, "learning_rate": 4.4625924479344995e-07, "loss": 0.1873, "step": 12331 }, { "epoch": 0.8425223748035799, "grad_norm": 3.208022117614746, "learning_rate": 4.458808843571054e-07, "loss": 0.1864, "step": 12332 }, { "epoch": 0.8425906948145112, "grad_norm": 3.4843530654907227, "learning_rate": 4.455026734728728e-07, "loss": 0.1872, "step": 12333 }, { "epoch": 0.8426590148254424, "grad_norm": 3.8886353969573975, "learning_rate": 4.4512461215927306e-07, "loss": 0.2556, "step": 12334 }, { "epoch": 0.8427273348363735, "grad_norm": 3.493546485900879, "learning_rate": 4.4474670043481685e-07, "loss": 0.2536, "step": 12335 }, { "epoch": 0.8427956548473048, "grad_norm": 3.184439182281494, "learning_rate": 4.4436893831801017e-07, "loss": 0.2357, "step": 12336 }, { "epoch": 0.8428639748582359, "grad_norm": 3.4495368003845215, "learning_rate": 4.439913258273517e-07, "loss": 0.2289, "step": 12337 }, { "epoch": 0.8429322948691672, "grad_norm": 4.395766258239746, "learning_rate": 4.436138629813319e-07, "loss": 0.2537, "step": 12338 }, { "epoch": 0.8430006148800984, "grad_norm": 3.2563536167144775, "learning_rate": 4.4323654979843387e-07, "loss": 0.1976, "step": 12339 }, { "epoch": 0.8430689348910296, "grad_norm": 4.654868125915527, "learning_rate": 4.428593862971333e-07, "loss": 0.2608, "step": 12340 }, { "epoch": 0.8431372549019608, "grad_norm": 4.932120323181152, "learning_rate": 4.4248237249589984e-07, "loss": 0.2602, "step": 12341 }, { "epoch": 0.8432055749128919, "grad_norm": 3.564460515975952, "learning_rate": 4.4210550841319505e-07, "loss": 0.2198, "step": 12342 }, { "epoch": 0.8432738949238232, "grad_norm": 2.7513370513916016, "learning_rate": 4.4172879406747146e-07, "loss": 0.2396, "step": 12343 }, { "epoch": 0.8433422149347544, "grad_norm": 2.3878328800201416, "learning_rate": 4.4135222947717596e-07, "loss": 0.1408, "step": 12344 }, { "epoch": 0.8434105349456856, "grad_norm": 4.616620063781738, "learning_rate": 4.4097581466074894e-07, "loss": 0.3543, "step": 12345 }, { "epoch": 0.8434788549566168, "grad_norm": 5.275394916534424, "learning_rate": 4.4059954963662294e-07, "loss": 0.2841, "step": 12346 }, { "epoch": 0.8435471749675479, "grad_norm": 4.132693290710449, "learning_rate": 4.402234344232213e-07, "loss": 0.2011, "step": 12347 }, { "epoch": 0.8436154949784792, "grad_norm": 4.010519027709961, "learning_rate": 4.3984746903896144e-07, "loss": 0.2693, "step": 12348 }, { "epoch": 0.8436838149894104, "grad_norm": 6.524487495422363, "learning_rate": 4.3947165350225413e-07, "loss": 0.2993, "step": 12349 }, { "epoch": 0.8437521350003416, "grad_norm": 5.4849467277526855, "learning_rate": 4.3909598783150134e-07, "loss": 0.3528, "step": 12350 }, { "epoch": 0.8438204550112728, "grad_norm": 2.725051164627075, "learning_rate": 4.3872047204509907e-07, "loss": 0.1476, "step": 12351 }, { "epoch": 0.843888775022204, "grad_norm": 3.7850735187530518, "learning_rate": 4.3834510616143483e-07, "loss": 0.3284, "step": 12352 }, { "epoch": 0.8439570950331352, "grad_norm": 3.8388614654541016, "learning_rate": 4.379698901988896e-07, "loss": 0.2949, "step": 12353 }, { "epoch": 0.8440254150440664, "grad_norm": 4.690072536468506, "learning_rate": 4.375948241758376e-07, "loss": 0.2844, "step": 12354 }, { "epoch": 0.8440937350549976, "grad_norm": 4.493520259857178, "learning_rate": 4.372199081106431e-07, "loss": 0.3332, "step": 12355 }, { "epoch": 0.8441620550659288, "grad_norm": 2.9500949382781982, "learning_rate": 4.368451420216652e-07, "loss": 0.2501, "step": 12356 }, { "epoch": 0.84423037507686, "grad_norm": 3.101785182952881, "learning_rate": 4.3647052592725564e-07, "loss": 0.2347, "step": 12357 }, { "epoch": 0.8442986950877912, "grad_norm": 5.332686424255371, "learning_rate": 4.3609605984575847e-07, "loss": 0.2956, "step": 12358 }, { "epoch": 0.8443670150987224, "grad_norm": 4.1457624435424805, "learning_rate": 4.3572174379550953e-07, "loss": 0.2357, "step": 12359 }, { "epoch": 0.8444353351096536, "grad_norm": 3.6421778202056885, "learning_rate": 4.353475777948392e-07, "loss": 0.2552, "step": 12360 }, { "epoch": 0.8445036551205848, "grad_norm": 4.386711120605469, "learning_rate": 4.3497356186206845e-07, "loss": 0.2022, "step": 12361 }, { "epoch": 0.844571975131516, "grad_norm": 3.1107823848724365, "learning_rate": 4.345996960155131e-07, "loss": 0.1716, "step": 12362 }, { "epoch": 0.8446402951424472, "grad_norm": 6.049479961395264, "learning_rate": 4.342259802734777e-07, "loss": 0.4371, "step": 12363 }, { "epoch": 0.8447086151533785, "grad_norm": 2.7078840732574463, "learning_rate": 4.3385241465426514e-07, "loss": 0.2039, "step": 12364 }, { "epoch": 0.8447769351643096, "grad_norm": 3.921985149383545, "learning_rate": 4.3347899917616624e-07, "loss": 0.1714, "step": 12365 }, { "epoch": 0.8448452551752408, "grad_norm": 6.425976753234863, "learning_rate": 4.3310573385746773e-07, "loss": 0.3116, "step": 12366 }, { "epoch": 0.844913575186172, "grad_norm": 4.7364397048950195, "learning_rate": 4.3273261871644535e-07, "loss": 0.2429, "step": 12367 }, { "epoch": 0.8449818951971032, "grad_norm": 4.5251946449279785, "learning_rate": 4.323596537713707e-07, "loss": 0.412, "step": 12368 }, { "epoch": 0.8450502152080345, "grad_norm": 3.1810104846954346, "learning_rate": 4.319868390405067e-07, "loss": 0.241, "step": 12369 }, { "epoch": 0.8451185352189656, "grad_norm": 3.761910915374756, "learning_rate": 4.31614174542109e-07, "loss": 0.2563, "step": 12370 }, { "epoch": 0.8451868552298968, "grad_norm": 4.8481316566467285, "learning_rate": 4.3124166029442614e-07, "loss": 0.2931, "step": 12371 }, { "epoch": 0.845255175240828, "grad_norm": 4.9445414543151855, "learning_rate": 4.3086929631569895e-07, "loss": 0.2922, "step": 12372 }, { "epoch": 0.8453234952517592, "grad_norm": 6.07014799118042, "learning_rate": 4.3049708262416164e-07, "loss": 0.2426, "step": 12373 }, { "epoch": 0.8453918152626905, "grad_norm": 4.178619384765625, "learning_rate": 4.301250192380396e-07, "loss": 0.3044, "step": 12374 }, { "epoch": 0.8454601352736216, "grad_norm": 4.364474296569824, "learning_rate": 4.297531061755536e-07, "loss": 0.3029, "step": 12375 }, { "epoch": 0.8455284552845529, "grad_norm": 4.006925582885742, "learning_rate": 4.293813434549129e-07, "loss": 0.2427, "step": 12376 }, { "epoch": 0.845596775295484, "grad_norm": 4.113940238952637, "learning_rate": 4.2900973109432234e-07, "loss": 0.2128, "step": 12377 }, { "epoch": 0.8456650953064152, "grad_norm": 4.502935409545898, "learning_rate": 4.286382691119798e-07, "loss": 0.3267, "step": 12378 }, { "epoch": 0.8457334153173465, "grad_norm": 3.9362995624542236, "learning_rate": 4.2826695752607536e-07, "loss": 0.2887, "step": 12379 }, { "epoch": 0.8458017353282776, "grad_norm": 3.7475666999816895, "learning_rate": 4.278957963547891e-07, "loss": 0.1915, "step": 12380 }, { "epoch": 0.8458700553392089, "grad_norm": 4.2641191482543945, "learning_rate": 4.2752478561629667e-07, "loss": 0.2129, "step": 12381 }, { "epoch": 0.84593837535014, "grad_norm": 4.63284969329834, "learning_rate": 4.2715392532876536e-07, "loss": 0.2923, "step": 12382 }, { "epoch": 0.8460066953610712, "grad_norm": 4.646280765533447, "learning_rate": 4.2678321551035575e-07, "loss": 0.2114, "step": 12383 }, { "epoch": 0.8460750153720025, "grad_norm": 3.7781295776367188, "learning_rate": 4.2641265617922017e-07, "loss": 0.3021, "step": 12384 }, { "epoch": 0.8461433353829336, "grad_norm": 3.671124219894409, "learning_rate": 4.2604224735350377e-07, "loss": 0.1967, "step": 12385 }, { "epoch": 0.8462116553938649, "grad_norm": 4.909953594207764, "learning_rate": 4.256719890513449e-07, "loss": 0.2355, "step": 12386 }, { "epoch": 0.846279975404796, "grad_norm": 2.8613123893737793, "learning_rate": 4.2530188129087463e-07, "loss": 0.1818, "step": 12387 }, { "epoch": 0.8463482954157273, "grad_norm": 3.2978222370147705, "learning_rate": 4.2493192409021433e-07, "loss": 0.322, "step": 12388 }, { "epoch": 0.8464166154266585, "grad_norm": 4.273952484130859, "learning_rate": 4.2456211746748107e-07, "loss": 0.265, "step": 12389 }, { "epoch": 0.8464849354375896, "grad_norm": 2.284832000732422, "learning_rate": 4.241924614407833e-07, "loss": 0.1394, "step": 12390 }, { "epoch": 0.8465532554485209, "grad_norm": 3.087700366973877, "learning_rate": 4.2382295602822184e-07, "loss": 0.2557, "step": 12391 }, { "epoch": 0.846621575459452, "grad_norm": 5.493760108947754, "learning_rate": 4.2345360124789037e-07, "loss": 0.2653, "step": 12392 }, { "epoch": 0.8466898954703833, "grad_norm": 4.116891860961914, "learning_rate": 4.230843971178757e-07, "loss": 0.2429, "step": 12393 }, { "epoch": 0.8467582154813145, "grad_norm": 3.5166940689086914, "learning_rate": 4.22715343656256e-07, "loss": 0.2229, "step": 12394 }, { "epoch": 0.8468265354922456, "grad_norm": 4.7508769035339355, "learning_rate": 4.223464408811041e-07, "loss": 0.2691, "step": 12395 }, { "epoch": 0.8468948555031769, "grad_norm": 3.574817419052124, "learning_rate": 4.219776888104825e-07, "loss": 0.2316, "step": 12396 }, { "epoch": 0.846963175514108, "grad_norm": 4.787837028503418, "learning_rate": 4.2160908746244865e-07, "loss": 0.3044, "step": 12397 }, { "epoch": 0.8470314955250393, "grad_norm": 3.2339892387390137, "learning_rate": 4.212406368550522e-07, "loss": 0.1493, "step": 12398 }, { "epoch": 0.8470998155359705, "grad_norm": 5.012094974517822, "learning_rate": 4.208723370063365e-07, "loss": 0.2628, "step": 12399 }, { "epoch": 0.8471681355469017, "grad_norm": 5.190948009490967, "learning_rate": 4.205041879343338e-07, "loss": 0.2467, "step": 12400 }, { "epoch": 0.8472364555578329, "grad_norm": 5.271564483642578, "learning_rate": 4.2013618965707225e-07, "loss": 0.2414, "step": 12401 }, { "epoch": 0.847304775568764, "grad_norm": 5.027198791503906, "learning_rate": 4.197683421925726e-07, "loss": 0.2415, "step": 12402 }, { "epoch": 0.8473730955796953, "grad_norm": 5.504014492034912, "learning_rate": 4.194006455588461e-07, "loss": 0.353, "step": 12403 }, { "epoch": 0.8474414155906265, "grad_norm": 3.3790197372436523, "learning_rate": 4.190330997738987e-07, "loss": 0.1918, "step": 12404 }, { "epoch": 0.8475097356015577, "grad_norm": 3.8006510734558105, "learning_rate": 4.186657048557283e-07, "loss": 0.3221, "step": 12405 }, { "epoch": 0.8475780556124889, "grad_norm": 3.4066619873046875, "learning_rate": 4.1829846082232474e-07, "loss": 0.2718, "step": 12406 }, { "epoch": 0.84764637562342, "grad_norm": 3.1310834884643555, "learning_rate": 4.179313676916719e-07, "loss": 0.2035, "step": 12407 }, { "epoch": 0.8477146956343513, "grad_norm": 5.017972469329834, "learning_rate": 4.1756442548174374e-07, "loss": 0.3373, "step": 12408 }, { "epoch": 0.8477830156452825, "grad_norm": 3.0891623497009277, "learning_rate": 4.1719763421050944e-07, "loss": 0.2468, "step": 12409 }, { "epoch": 0.8478513356562137, "grad_norm": 4.719934940338135, "learning_rate": 4.1683099389592894e-07, "loss": 0.3422, "step": 12410 }, { "epoch": 0.8479196556671449, "grad_norm": 3.06915545463562, "learning_rate": 4.164645045559577e-07, "loss": 0.2421, "step": 12411 }, { "epoch": 0.8479879756780762, "grad_norm": 6.034300804138184, "learning_rate": 4.160981662085398e-07, "loss": 0.3019, "step": 12412 }, { "epoch": 0.8480562956890073, "grad_norm": 5.135015964508057, "learning_rate": 4.157319788716145e-07, "loss": 0.225, "step": 12413 }, { "epoch": 0.8481246156999385, "grad_norm": 4.045066833496094, "learning_rate": 4.15365942563113e-07, "loss": 0.2821, "step": 12414 }, { "epoch": 0.8481929357108697, "grad_norm": 3.9009227752685547, "learning_rate": 4.1500005730095964e-07, "loss": 0.3501, "step": 12415 }, { "epoch": 0.8482612557218009, "grad_norm": 3.5101375579833984, "learning_rate": 4.14634323103069e-07, "loss": 0.2663, "step": 12416 }, { "epoch": 0.8483295757327322, "grad_norm": 4.501421928405762, "learning_rate": 4.142687399873523e-07, "loss": 0.2771, "step": 12417 }, { "epoch": 0.8483978957436633, "grad_norm": 3.1433591842651367, "learning_rate": 4.139033079717105e-07, "loss": 0.2242, "step": 12418 }, { "epoch": 0.8484662157545945, "grad_norm": 4.349353790283203, "learning_rate": 4.1353802707403796e-07, "loss": 0.3094, "step": 12419 }, { "epoch": 0.8485345357655257, "grad_norm": 3.4851877689361572, "learning_rate": 4.131728973122206e-07, "loss": 0.2773, "step": 12420 }, { "epoch": 0.8486028557764569, "grad_norm": 4.619515419006348, "learning_rate": 4.1280791870413846e-07, "loss": 0.4083, "step": 12421 }, { "epoch": 0.8486711757873882, "grad_norm": 4.3537774085998535, "learning_rate": 4.124430912676638e-07, "loss": 0.2163, "step": 12422 }, { "epoch": 0.8487394957983193, "grad_norm": 5.518752574920654, "learning_rate": 4.12078415020661e-07, "loss": 0.2843, "step": 12423 }, { "epoch": 0.8488078158092506, "grad_norm": 4.510322093963623, "learning_rate": 4.117138899809873e-07, "loss": 0.2987, "step": 12424 }, { "epoch": 0.8488761358201817, "grad_norm": 4.010684967041016, "learning_rate": 4.1134951616649235e-07, "loss": 0.2452, "step": 12425 }, { "epoch": 0.8489444558311129, "grad_norm": 5.615786552429199, "learning_rate": 4.109852935950189e-07, "loss": 0.3162, "step": 12426 }, { "epoch": 0.8490127758420442, "grad_norm": 4.212878227233887, "learning_rate": 4.1062122228440226e-07, "loss": 0.236, "step": 12427 }, { "epoch": 0.8490810958529753, "grad_norm": 4.378360271453857, "learning_rate": 4.102573022524693e-07, "loss": 0.3332, "step": 12428 }, { "epoch": 0.8491494158639066, "grad_norm": 5.68220329284668, "learning_rate": 4.0989353351704054e-07, "loss": 0.2584, "step": 12429 }, { "epoch": 0.8492177358748377, "grad_norm": 3.115251302719116, "learning_rate": 4.0952991609592765e-07, "loss": 0.2559, "step": 12430 }, { "epoch": 0.8492860558857689, "grad_norm": 3.4215168952941895, "learning_rate": 4.091664500069387e-07, "loss": 0.2213, "step": 12431 }, { "epoch": 0.8493543758967002, "grad_norm": 3.509390115737915, "learning_rate": 4.0880313526786956e-07, "loss": 0.2883, "step": 12432 }, { "epoch": 0.8494226959076313, "grad_norm": 4.358392715454102, "learning_rate": 4.084399718965111e-07, "loss": 0.2653, "step": 12433 }, { "epoch": 0.8494910159185626, "grad_norm": 4.432206630706787, "learning_rate": 4.080769599106467e-07, "loss": 0.2841, "step": 12434 }, { "epoch": 0.8495593359294937, "grad_norm": 3.4774203300476074, "learning_rate": 4.07714099328053e-07, "loss": 0.2465, "step": 12435 }, { "epoch": 0.849627655940425, "grad_norm": 3.339435338973999, "learning_rate": 4.073513901664957e-07, "loss": 0.2545, "step": 12436 }, { "epoch": 0.8496959759513562, "grad_norm": 4.292806148529053, "learning_rate": 4.0698883244373845e-07, "loss": 0.3122, "step": 12437 }, { "epoch": 0.8497642959622873, "grad_norm": 3.656621217727661, "learning_rate": 4.066264261775333e-07, "loss": 0.3029, "step": 12438 }, { "epoch": 0.8498326159732186, "grad_norm": 3.236706256866455, "learning_rate": 4.0626417138562665e-07, "loss": 0.261, "step": 12439 }, { "epoch": 0.8499009359841497, "grad_norm": 4.012002944946289, "learning_rate": 4.059020680857582e-07, "loss": 0.2738, "step": 12440 }, { "epoch": 0.849969255995081, "grad_norm": 4.939096927642822, "learning_rate": 4.0554011629565734e-07, "loss": 0.2478, "step": 12441 }, { "epoch": 0.8500375760060122, "grad_norm": 4.2087626457214355, "learning_rate": 4.051783160330489e-07, "loss": 0.1848, "step": 12442 }, { "epoch": 0.8501058960169433, "grad_norm": 3.6343274116516113, "learning_rate": 4.048166673156491e-07, "loss": 0.1965, "step": 12443 }, { "epoch": 0.8501742160278746, "grad_norm": 3.8870627880096436, "learning_rate": 4.044551701611668e-07, "loss": 0.2979, "step": 12444 }, { "epoch": 0.8502425360388057, "grad_norm": 3.0532608032226562, "learning_rate": 4.0409382458730386e-07, "loss": 0.2693, "step": 12445 }, { "epoch": 0.850310856049737, "grad_norm": 3.7608771324157715, "learning_rate": 4.037326306117541e-07, "loss": 0.2534, "step": 12446 }, { "epoch": 0.8503791760606682, "grad_norm": 4.608693599700928, "learning_rate": 4.0337158825220436e-07, "loss": 0.258, "step": 12447 }, { "epoch": 0.8504474960715994, "grad_norm": 4.1741943359375, "learning_rate": 4.0301069752633504e-07, "loss": 0.2774, "step": 12448 }, { "epoch": 0.8505158160825306, "grad_norm": 3.9313268661499023, "learning_rate": 4.026499584518152e-07, "loss": 0.2998, "step": 12449 }, { "epoch": 0.8505841360934617, "grad_norm": 4.499410629272461, "learning_rate": 4.0228937104631177e-07, "loss": 0.3072, "step": 12450 }, { "epoch": 0.850652456104393, "grad_norm": 3.928737163543701, "learning_rate": 4.019289353274807e-07, "loss": 0.2349, "step": 12451 }, { "epoch": 0.8507207761153242, "grad_norm": 3.4355039596557617, "learning_rate": 4.015686513129732e-07, "loss": 0.2317, "step": 12452 }, { "epoch": 0.8507890961262554, "grad_norm": 5.784128665924072, "learning_rate": 4.0120851902042864e-07, "loss": 0.3087, "step": 12453 }, { "epoch": 0.8508574161371866, "grad_norm": 3.1517317295074463, "learning_rate": 4.0084853846748345e-07, "loss": 0.1833, "step": 12454 }, { "epoch": 0.8509257361481177, "grad_norm": 3.7571048736572266, "learning_rate": 4.0048870967176477e-07, "loss": 0.255, "step": 12455 }, { "epoch": 0.850994056159049, "grad_norm": 4.185311794281006, "learning_rate": 4.001290326508926e-07, "loss": 0.3049, "step": 12456 }, { "epoch": 0.8510623761699802, "grad_norm": 3.119312047958374, "learning_rate": 3.997695074224788e-07, "loss": 0.2899, "step": 12457 }, { "epoch": 0.8511306961809114, "grad_norm": 4.892844200134277, "learning_rate": 3.994101340041284e-07, "loss": 0.2681, "step": 12458 }, { "epoch": 0.8511990161918426, "grad_norm": 2.648749828338623, "learning_rate": 3.9905091241343963e-07, "loss": 0.1836, "step": 12459 }, { "epoch": 0.8512673362027738, "grad_norm": 3.5934181213378906, "learning_rate": 3.9869184266800273e-07, "loss": 0.2467, "step": 12460 }, { "epoch": 0.851335656213705, "grad_norm": 3.3579823970794678, "learning_rate": 3.983329247853996e-07, "loss": 0.2455, "step": 12461 }, { "epoch": 0.8514039762246361, "grad_norm": 3.5227012634277344, "learning_rate": 3.979741587832053e-07, "loss": 0.2673, "step": 12462 }, { "epoch": 0.8514722962355674, "grad_norm": 4.085907936096191, "learning_rate": 3.976155446789874e-07, "loss": 0.3176, "step": 12463 }, { "epoch": 0.8515406162464986, "grad_norm": 4.911481857299805, "learning_rate": 3.972570824903089e-07, "loss": 0.3815, "step": 12464 }, { "epoch": 0.8516089362574298, "grad_norm": 3.4047515392303467, "learning_rate": 3.968987722347199e-07, "loss": 0.2353, "step": 12465 }, { "epoch": 0.851677256268361, "grad_norm": 4.679632186889648, "learning_rate": 3.9654061392976697e-07, "loss": 0.3016, "step": 12466 }, { "epoch": 0.8517455762792921, "grad_norm": 3.6184029579162598, "learning_rate": 3.961826075929879e-07, "loss": 0.2252, "step": 12467 }, { "epoch": 0.8518138962902234, "grad_norm": 4.173841953277588, "learning_rate": 3.9582475324191413e-07, "loss": 0.3206, "step": 12468 }, { "epoch": 0.8518822163011546, "grad_norm": 3.7098429203033447, "learning_rate": 3.95467050894067e-07, "loss": 0.2241, "step": 12469 }, { "epoch": 0.8519505363120858, "grad_norm": 3.958270788192749, "learning_rate": 3.951095005669641e-07, "loss": 0.2913, "step": 12470 }, { "epoch": 0.852018856323017, "grad_norm": 5.291477680206299, "learning_rate": 3.947521022781127e-07, "loss": 0.2733, "step": 12471 }, { "epoch": 0.8520871763339483, "grad_norm": 4.048717021942139, "learning_rate": 3.9439485604501515e-07, "loss": 0.2305, "step": 12472 }, { "epoch": 0.8521554963448794, "grad_norm": 4.222655773162842, "learning_rate": 3.9403776188516296e-07, "loss": 0.2561, "step": 12473 }, { "epoch": 0.8522238163558106, "grad_norm": 4.340779781341553, "learning_rate": 3.936808198160425e-07, "loss": 0.2806, "step": 12474 }, { "epoch": 0.8522921363667418, "grad_norm": 3.9029006958007812, "learning_rate": 3.933240298551328e-07, "loss": 0.2577, "step": 12475 }, { "epoch": 0.852360456377673, "grad_norm": 4.122600078582764, "learning_rate": 3.929673920199047e-07, "loss": 0.4708, "step": 12476 }, { "epoch": 0.8524287763886043, "grad_norm": 5.104711532592773, "learning_rate": 3.926109063278221e-07, "loss": 0.2666, "step": 12477 }, { "epoch": 0.8524970963995354, "grad_norm": 4.081175804138184, "learning_rate": 3.922545727963406e-07, "loss": 0.2311, "step": 12478 }, { "epoch": 0.8525654164104667, "grad_norm": 3.9643869400024414, "learning_rate": 3.91898391442909e-07, "loss": 0.2786, "step": 12479 }, { "epoch": 0.8526337364213978, "grad_norm": 5.677982807159424, "learning_rate": 3.9154236228496985e-07, "loss": 0.3219, "step": 12480 }, { "epoch": 0.852702056432329, "grad_norm": 4.247122764587402, "learning_rate": 3.9118648533995504e-07, "loss": 0.3039, "step": 12481 }, { "epoch": 0.8527703764432603, "grad_norm": 4.3892364501953125, "learning_rate": 3.9083076062529154e-07, "loss": 0.344, "step": 12482 }, { "epoch": 0.8528386964541914, "grad_norm": 3.661370277404785, "learning_rate": 3.9047518815839776e-07, "loss": 0.2111, "step": 12483 }, { "epoch": 0.8529070164651227, "grad_norm": 5.042244911193848, "learning_rate": 3.9011976795668745e-07, "loss": 0.2513, "step": 12484 }, { "epoch": 0.8529753364760538, "grad_norm": 4.962064266204834, "learning_rate": 3.897645000375623e-07, "loss": 0.2211, "step": 12485 }, { "epoch": 0.853043656486985, "grad_norm": 3.143911600112915, "learning_rate": 3.8940938441841933e-07, "loss": 0.2088, "step": 12486 }, { "epoch": 0.8531119764979163, "grad_norm": 3.0039732456207275, "learning_rate": 3.8905442111664777e-07, "loss": 0.1789, "step": 12487 }, { "epoch": 0.8531802965088474, "grad_norm": 4.123202323913574, "learning_rate": 3.8869961014962995e-07, "loss": 0.2391, "step": 12488 }, { "epoch": 0.8532486165197787, "grad_norm": 4.820057392120361, "learning_rate": 3.8834495153473794e-07, "loss": 0.2828, "step": 12489 }, { "epoch": 0.8533169365307098, "grad_norm": 3.855625629425049, "learning_rate": 3.879904452893405e-07, "loss": 0.259, "step": 12490 }, { "epoch": 0.8533852565416411, "grad_norm": 3.251358985900879, "learning_rate": 3.8763609143079614e-07, "loss": 0.2416, "step": 12491 }, { "epoch": 0.8534535765525723, "grad_norm": 4.189003944396973, "learning_rate": 3.872818899764572e-07, "loss": 0.248, "step": 12492 }, { "epoch": 0.8535218965635034, "grad_norm": 4.133047580718994, "learning_rate": 3.8692784094366706e-07, "loss": 0.2717, "step": 12493 }, { "epoch": 0.8535902165744347, "grad_norm": 3.9355528354644775, "learning_rate": 3.8657394434976295e-07, "loss": 0.278, "step": 12494 }, { "epoch": 0.8536585365853658, "grad_norm": 4.441517353057861, "learning_rate": 3.8622020021207387e-07, "loss": 0.3737, "step": 12495 }, { "epoch": 0.8537268565962971, "grad_norm": 3.979252338409424, "learning_rate": 3.8586660854792225e-07, "loss": 0.2733, "step": 12496 }, { "epoch": 0.8537951766072283, "grad_norm": 5.017833709716797, "learning_rate": 3.8551316937462274e-07, "loss": 0.2186, "step": 12497 }, { "epoch": 0.8538634966181594, "grad_norm": 4.001369953155518, "learning_rate": 3.8515988270948145e-07, "loss": 0.3005, "step": 12498 }, { "epoch": 0.8539318166290907, "grad_norm": 3.21160888671875, "learning_rate": 3.8480674856979866e-07, "loss": 0.2382, "step": 12499 }, { "epoch": 0.8540001366400218, "grad_norm": 5.275918483734131, "learning_rate": 3.8445376697286687e-07, "loss": 0.3821, "step": 12500 }, { "epoch": 0.8540684566509531, "grad_norm": 3.9799463748931885, "learning_rate": 3.841009379359692e-07, "loss": 0.2492, "step": 12501 }, { "epoch": 0.8541367766618843, "grad_norm": 4.1907548904418945, "learning_rate": 3.8374826147638305e-07, "loss": 0.2252, "step": 12502 }, { "epoch": 0.8542050966728155, "grad_norm": 4.051751136779785, "learning_rate": 3.833957376113788e-07, "loss": 0.2329, "step": 12503 }, { "epoch": 0.8542734166837467, "grad_norm": 4.517014026641846, "learning_rate": 3.8304336635821934e-07, "loss": 0.232, "step": 12504 }, { "epoch": 0.8543417366946778, "grad_norm": 5.07868766784668, "learning_rate": 3.826911477341576e-07, "loss": 0.2415, "step": 12505 }, { "epoch": 0.8544100567056091, "grad_norm": 3.0991439819335938, "learning_rate": 3.823390817564413e-07, "loss": 0.2384, "step": 12506 }, { "epoch": 0.8544783767165403, "grad_norm": 3.564934730529785, "learning_rate": 3.819871684423102e-07, "loss": 0.2705, "step": 12507 }, { "epoch": 0.8545466967274715, "grad_norm": 3.186443567276001, "learning_rate": 3.8163540780899697e-07, "loss": 0.1433, "step": 12508 }, { "epoch": 0.8546150167384027, "grad_norm": 2.9174203872680664, "learning_rate": 3.812837998737264e-07, "loss": 0.2523, "step": 12509 }, { "epoch": 0.8546833367493338, "grad_norm": 3.7134811878204346, "learning_rate": 3.809323446537153e-07, "loss": 0.1987, "step": 12510 }, { "epoch": 0.8547516567602651, "grad_norm": 3.127171277999878, "learning_rate": 3.805810421661741e-07, "loss": 0.2336, "step": 12511 }, { "epoch": 0.8548199767711963, "grad_norm": 4.239248275756836, "learning_rate": 3.802298924283045e-07, "loss": 0.2566, "step": 12512 }, { "epoch": 0.8548882967821275, "grad_norm": 5.28801155090332, "learning_rate": 3.7987889545730245e-07, "loss": 0.232, "step": 12513 }, { "epoch": 0.8549566167930587, "grad_norm": 3.3416507244110107, "learning_rate": 3.795280512703538e-07, "loss": 0.2291, "step": 12514 }, { "epoch": 0.8550249368039899, "grad_norm": 4.791290283203125, "learning_rate": 3.7917735988463934e-07, "loss": 0.2478, "step": 12515 }, { "epoch": 0.8550932568149211, "grad_norm": 3.758087158203125, "learning_rate": 3.7882682131733086e-07, "loss": 0.2138, "step": 12516 }, { "epoch": 0.8551615768258523, "grad_norm": 4.369235515594482, "learning_rate": 3.784764355855949e-07, "loss": 0.1793, "step": 12517 }, { "epoch": 0.8552298968367835, "grad_norm": 5.546069622039795, "learning_rate": 3.781262027065876e-07, "loss": 0.2479, "step": 12518 }, { "epoch": 0.8552982168477147, "grad_norm": 2.9682445526123047, "learning_rate": 3.777761226974588e-07, "loss": 0.2933, "step": 12519 }, { "epoch": 0.8553665368586459, "grad_norm": 5.329949855804443, "learning_rate": 3.7742619557535144e-07, "loss": 0.2558, "step": 12520 }, { "epoch": 0.8554348568695771, "grad_norm": 3.249269723892212, "learning_rate": 3.7707642135740145e-07, "loss": 0.281, "step": 12521 }, { "epoch": 0.8555031768805083, "grad_norm": 2.5163092613220215, "learning_rate": 3.767268000607338e-07, "loss": 0.1458, "step": 12522 }, { "epoch": 0.8555714968914395, "grad_norm": 3.75935959815979, "learning_rate": 3.763773317024708e-07, "loss": 0.2333, "step": 12523 }, { "epoch": 0.8556398169023707, "grad_norm": 3.9090898036956787, "learning_rate": 3.7602801629972427e-07, "loss": 0.2797, "step": 12524 }, { "epoch": 0.8557081369133019, "grad_norm": 4.6384758949279785, "learning_rate": 3.756788538696002e-07, "loss": 0.3309, "step": 12525 }, { "epoch": 0.8557764569242331, "grad_norm": 4.615711688995361, "learning_rate": 3.753298444291941e-07, "loss": 0.2052, "step": 12526 }, { "epoch": 0.8558447769351644, "grad_norm": 7.810046195983887, "learning_rate": 3.7498098799559715e-07, "loss": 0.2947, "step": 12527 }, { "epoch": 0.8559130969460955, "grad_norm": 4.291934013366699, "learning_rate": 3.746322845858924e-07, "loss": 0.2662, "step": 12528 }, { "epoch": 0.8559814169570267, "grad_norm": 3.8843002319335938, "learning_rate": 3.7428373421715403e-07, "loss": 0.2761, "step": 12529 }, { "epoch": 0.8560497369679579, "grad_norm": 7.456010818481445, "learning_rate": 3.739353369064503e-07, "loss": 0.2181, "step": 12530 }, { "epoch": 0.8561180569788891, "grad_norm": 5.985503196716309, "learning_rate": 3.735870926708409e-07, "loss": 0.2582, "step": 12531 }, { "epoch": 0.8561863769898204, "grad_norm": 4.3636980056762695, "learning_rate": 3.7323900152737833e-07, "loss": 0.2515, "step": 12532 }, { "epoch": 0.8562546970007515, "grad_norm": 3.7033326625823975, "learning_rate": 3.7289106349310896e-07, "loss": 0.3101, "step": 12533 }, { "epoch": 0.8563230170116827, "grad_norm": 3.5176568031311035, "learning_rate": 3.725432785850689e-07, "loss": 0.2169, "step": 12534 }, { "epoch": 0.8563913370226139, "grad_norm": 2.9227116107940674, "learning_rate": 3.721956468202884e-07, "loss": 0.1828, "step": 12535 }, { "epoch": 0.8564596570335451, "grad_norm": 4.53683614730835, "learning_rate": 3.718481682157898e-07, "loss": 0.3147, "step": 12536 }, { "epoch": 0.8565279770444764, "grad_norm": 3.422799825668335, "learning_rate": 3.715008427885898e-07, "loss": 0.175, "step": 12537 }, { "epoch": 0.8565962970554075, "grad_norm": 3.851489305496216, "learning_rate": 3.7115367055569486e-07, "loss": 0.2178, "step": 12538 }, { "epoch": 0.8566646170663388, "grad_norm": 3.0013504028320312, "learning_rate": 3.7080665153410493e-07, "loss": 0.1695, "step": 12539 }, { "epoch": 0.8567329370772699, "grad_norm": 4.400666236877441, "learning_rate": 3.7045978574081286e-07, "loss": 0.3014, "step": 12540 }, { "epoch": 0.8568012570882011, "grad_norm": 3.135728120803833, "learning_rate": 3.701130731928042e-07, "loss": 0.2337, "step": 12541 }, { "epoch": 0.8568695770991324, "grad_norm": 3.9749674797058105, "learning_rate": 3.697665139070559e-07, "loss": 0.2043, "step": 12542 }, { "epoch": 0.8569378971100635, "grad_norm": 3.4068925380706787, "learning_rate": 3.6942010790053795e-07, "loss": 0.2804, "step": 12543 }, { "epoch": 0.8570062171209948, "grad_norm": 3.459456443786621, "learning_rate": 3.690738551902138e-07, "loss": 0.2789, "step": 12544 }, { "epoch": 0.8570745371319259, "grad_norm": 4.150545120239258, "learning_rate": 3.687277557930382e-07, "loss": 0.2407, "step": 12545 }, { "epoch": 0.8571428571428571, "grad_norm": 4.6287736892700195, "learning_rate": 3.683818097259582e-07, "loss": 0.2369, "step": 12546 }, { "epoch": 0.8572111771537884, "grad_norm": 5.245401859283447, "learning_rate": 3.680360170059139e-07, "loss": 0.2884, "step": 12547 }, { "epoch": 0.8572794971647195, "grad_norm": 4.6363606452941895, "learning_rate": 3.676903776498383e-07, "loss": 0.3157, "step": 12548 }, { "epoch": 0.8573478171756508, "grad_norm": 4.726417541503906, "learning_rate": 3.673448916746564e-07, "loss": 0.302, "step": 12549 }, { "epoch": 0.8574161371865819, "grad_norm": 3.3231306076049805, "learning_rate": 3.6699955909728526e-07, "loss": 0.2365, "step": 12550 }, { "epoch": 0.8574844571975132, "grad_norm": 3.9920215606689453, "learning_rate": 3.6665437993463544e-07, "loss": 0.2669, "step": 12551 }, { "epoch": 0.8575527772084444, "grad_norm": 8.402643203735352, "learning_rate": 3.6630935420360934e-07, "loss": 0.2492, "step": 12552 }, { "epoch": 0.8576210972193755, "grad_norm": 3.606259346008301, "learning_rate": 3.659644819211026e-07, "loss": 0.207, "step": 12553 }, { "epoch": 0.8576894172303068, "grad_norm": 4.413259029388428, "learning_rate": 3.656197631040011e-07, "loss": 0.2654, "step": 12554 }, { "epoch": 0.8577577372412379, "grad_norm": 4.234328746795654, "learning_rate": 3.652751977691852e-07, "loss": 0.2533, "step": 12555 }, { "epoch": 0.8578260572521692, "grad_norm": 4.449993133544922, "learning_rate": 3.6493078593352826e-07, "loss": 0.2444, "step": 12556 }, { "epoch": 0.8578943772631004, "grad_norm": 3.720003843307495, "learning_rate": 3.6458652761389535e-07, "loss": 0.3055, "step": 12557 }, { "epoch": 0.8579626972740315, "grad_norm": 4.7824320793151855, "learning_rate": 3.6424242282714295e-07, "loss": 0.2748, "step": 12558 }, { "epoch": 0.8580310172849628, "grad_norm": 5.380431652069092, "learning_rate": 3.638984715901214e-07, "loss": 0.3307, "step": 12559 }, { "epoch": 0.8580993372958939, "grad_norm": 4.435417652130127, "learning_rate": 3.6355467391967275e-07, "loss": 0.3692, "step": 12560 }, { "epoch": 0.8581676573068252, "grad_norm": 3.664855480194092, "learning_rate": 3.6321102983263186e-07, "loss": 0.2718, "step": 12561 }, { "epoch": 0.8582359773177564, "grad_norm": 3.983394145965576, "learning_rate": 3.628675393458268e-07, "loss": 0.1916, "step": 12562 }, { "epoch": 0.8583042973286876, "grad_norm": 3.7361252307891846, "learning_rate": 3.6252420247607687e-07, "loss": 0.2905, "step": 12563 }, { "epoch": 0.8583726173396188, "grad_norm": 3.8719427585601807, "learning_rate": 3.6218101924019416e-07, "loss": 0.265, "step": 12564 }, { "epoch": 0.8584409373505499, "grad_norm": 4.784316062927246, "learning_rate": 3.618379896549844e-07, "loss": 0.2701, "step": 12565 }, { "epoch": 0.8585092573614812, "grad_norm": 4.429901123046875, "learning_rate": 3.6149511373724346e-07, "loss": 0.4027, "step": 12566 }, { "epoch": 0.8585775773724124, "grad_norm": 5.663375377655029, "learning_rate": 3.6115239150376217e-07, "loss": 0.2154, "step": 12567 }, { "epoch": 0.8586458973833436, "grad_norm": 3.6409432888031006, "learning_rate": 3.6080982297132164e-07, "loss": 0.3053, "step": 12568 }, { "epoch": 0.8587142173942748, "grad_norm": 3.659059762954712, "learning_rate": 3.6046740815669757e-07, "loss": 0.1814, "step": 12569 }, { "epoch": 0.8587825374052059, "grad_norm": 3.742459774017334, "learning_rate": 3.6012514707665675e-07, "loss": 0.191, "step": 12570 }, { "epoch": 0.8588508574161372, "grad_norm": 4.33010721206665, "learning_rate": 3.5978303974795854e-07, "loss": 0.2965, "step": 12571 }, { "epoch": 0.8589191774270684, "grad_norm": 3.137413501739502, "learning_rate": 3.594410861873553e-07, "loss": 0.179, "step": 12572 }, { "epoch": 0.8589874974379996, "grad_norm": 3.8027236461639404, "learning_rate": 3.5909928641159166e-07, "loss": 0.2719, "step": 12573 }, { "epoch": 0.8590558174489308, "grad_norm": 3.803375005722046, "learning_rate": 3.587576404374052e-07, "loss": 0.1948, "step": 12574 }, { "epoch": 0.859124137459862, "grad_norm": 4.083523273468018, "learning_rate": 3.584161482815235e-07, "loss": 0.2202, "step": 12575 }, { "epoch": 0.8591924574707932, "grad_norm": 3.612318992614746, "learning_rate": 3.580748099606704e-07, "loss": 0.1882, "step": 12576 }, { "epoch": 0.8592607774817244, "grad_norm": 3.4828546047210693, "learning_rate": 3.577336254915596e-07, "loss": 0.2248, "step": 12577 }, { "epoch": 0.8593290974926556, "grad_norm": 6.053522109985352, "learning_rate": 3.5739259489089913e-07, "loss": 0.2935, "step": 12578 }, { "epoch": 0.8593974175035868, "grad_norm": 3.65708589553833, "learning_rate": 3.570517181753862e-07, "loss": 0.2679, "step": 12579 }, { "epoch": 0.859465737514518, "grad_norm": 3.8999173641204834, "learning_rate": 3.56710995361714e-07, "loss": 0.1998, "step": 12580 }, { "epoch": 0.8595340575254492, "grad_norm": 5.218055725097656, "learning_rate": 3.563704264665667e-07, "loss": 0.2589, "step": 12581 }, { "epoch": 0.8596023775363804, "grad_norm": 4.356103897094727, "learning_rate": 3.5603001150662076e-07, "loss": 0.191, "step": 12582 }, { "epoch": 0.8596706975473116, "grad_norm": 3.8100554943084717, "learning_rate": 3.556897504985459e-07, "loss": 0.1871, "step": 12583 }, { "epoch": 0.8597390175582428, "grad_norm": 3.454212188720703, "learning_rate": 3.553496434590031e-07, "loss": 0.2602, "step": 12584 }, { "epoch": 0.859807337569174, "grad_norm": 3.701805830001831, "learning_rate": 3.5500969040464727e-07, "loss": 0.2129, "step": 12585 }, { "epoch": 0.8598756575801052, "grad_norm": 5.358824729919434, "learning_rate": 3.54669891352125e-07, "loss": 0.2803, "step": 12586 }, { "epoch": 0.8599439775910365, "grad_norm": 6.511092662811279, "learning_rate": 3.543302463180741e-07, "loss": 0.3466, "step": 12587 }, { "epoch": 0.8600122976019676, "grad_norm": 4.480403423309326, "learning_rate": 3.539907553191269e-07, "loss": 0.3122, "step": 12588 }, { "epoch": 0.8600806176128988, "grad_norm": 4.29949951171875, "learning_rate": 3.5365141837190755e-07, "loss": 0.278, "step": 12589 }, { "epoch": 0.86014893762383, "grad_norm": 4.544164180755615, "learning_rate": 3.533122354930335e-07, "loss": 0.3801, "step": 12590 }, { "epoch": 0.8602172576347612, "grad_norm": 3.9150123596191406, "learning_rate": 3.529732066991115e-07, "loss": 0.2386, "step": 12591 }, { "epoch": 0.8602855776456925, "grad_norm": 3.943118095397949, "learning_rate": 3.526343320067439e-07, "loss": 0.207, "step": 12592 }, { "epoch": 0.8603538976566236, "grad_norm": 5.873480319976807, "learning_rate": 3.522956114325246e-07, "loss": 0.4135, "step": 12593 }, { "epoch": 0.8604222176675548, "grad_norm": 3.117924690246582, "learning_rate": 3.5195704499303994e-07, "loss": 0.2579, "step": 12594 }, { "epoch": 0.860490537678486, "grad_norm": 3.5370171070098877, "learning_rate": 3.5161863270486824e-07, "loss": 0.1814, "step": 12595 }, { "epoch": 0.8605588576894172, "grad_norm": 5.780205726623535, "learning_rate": 3.512803745845809e-07, "loss": 0.221, "step": 12596 }, { "epoch": 0.8606271777003485, "grad_norm": 4.299999713897705, "learning_rate": 3.5094227064874134e-07, "loss": 0.3078, "step": 12597 }, { "epoch": 0.8606954977112796, "grad_norm": 4.633594512939453, "learning_rate": 3.5060432091390655e-07, "loss": 0.3455, "step": 12598 }, { "epoch": 0.8607638177222109, "grad_norm": 3.049966335296631, "learning_rate": 3.502665253966233e-07, "loss": 0.2711, "step": 12599 }, { "epoch": 0.860832137733142, "grad_norm": 4.168185234069824, "learning_rate": 3.4992888411343337e-07, "loss": 0.193, "step": 12600 }, { "epoch": 0.8609004577440732, "grad_norm": 4.299478530883789, "learning_rate": 3.495913970808704e-07, "loss": 0.317, "step": 12601 }, { "epoch": 0.8609687777550045, "grad_norm": 3.0481178760528564, "learning_rate": 3.4925406431546017e-07, "loss": 0.2821, "step": 12602 }, { "epoch": 0.8610370977659356, "grad_norm": 4.3220133781433105, "learning_rate": 3.489168858337208e-07, "loss": 0.2802, "step": 12603 }, { "epoch": 0.8611054177768669, "grad_norm": 4.452492713928223, "learning_rate": 3.4857986165216337e-07, "loss": 0.27, "step": 12604 }, { "epoch": 0.861173737787798, "grad_norm": 3.0626721382141113, "learning_rate": 3.4824299178729033e-07, "loss": 0.2005, "step": 12605 }, { "epoch": 0.8612420577987292, "grad_norm": 4.29856014251709, "learning_rate": 3.4790627625559885e-07, "loss": 0.2754, "step": 12606 }, { "epoch": 0.8613103778096605, "grad_norm": 3.8409829139709473, "learning_rate": 3.4756971507357466e-07, "loss": 0.2375, "step": 12607 }, { "epoch": 0.8613786978205916, "grad_norm": 4.741753101348877, "learning_rate": 3.472333082576993e-07, "loss": 0.2408, "step": 12608 }, { "epoch": 0.8614470178315229, "grad_norm": 3.8389856815338135, "learning_rate": 3.4689705582444663e-07, "loss": 0.2345, "step": 12609 }, { "epoch": 0.861515337842454, "grad_norm": 2.790039539337158, "learning_rate": 3.4656095779028177e-07, "loss": 0.1729, "step": 12610 }, { "epoch": 0.8615836578533853, "grad_norm": 4.072659969329834, "learning_rate": 3.4622501417166146e-07, "loss": 0.2413, "step": 12611 }, { "epoch": 0.8616519778643165, "grad_norm": 5.1009626388549805, "learning_rate": 3.458892249850369e-07, "loss": 0.2673, "step": 12612 }, { "epoch": 0.8617202978752476, "grad_norm": 4.203946590423584, "learning_rate": 3.4555359024685036e-07, "loss": 0.3131, "step": 12613 }, { "epoch": 0.8617886178861789, "grad_norm": 3.7057721614837646, "learning_rate": 3.4521810997353715e-07, "loss": 0.1685, "step": 12614 }, { "epoch": 0.86185693789711, "grad_norm": 5.064365386962891, "learning_rate": 3.4488278418152476e-07, "loss": 0.2348, "step": 12615 }, { "epoch": 0.8619252579080413, "grad_norm": 5.057336807250977, "learning_rate": 3.4454761288723347e-07, "loss": 0.3454, "step": 12616 }, { "epoch": 0.8619935779189725, "grad_norm": 3.987813711166382, "learning_rate": 3.442125961070755e-07, "loss": 0.2539, "step": 12617 }, { "epoch": 0.8620618979299036, "grad_norm": 5.1738176345825195, "learning_rate": 3.4387773385745666e-07, "loss": 0.385, "step": 12618 }, { "epoch": 0.8621302179408349, "grad_norm": 4.665053367614746, "learning_rate": 3.4354302615477265e-07, "loss": 0.294, "step": 12619 }, { "epoch": 0.862198537951766, "grad_norm": 4.357758522033691, "learning_rate": 3.432084730154137e-07, "loss": 0.2281, "step": 12620 }, { "epoch": 0.8622668579626973, "grad_norm": 3.5616295337677, "learning_rate": 3.428740744557622e-07, "loss": 0.1779, "step": 12621 }, { "epoch": 0.8623351779736285, "grad_norm": 3.986147165298462, "learning_rate": 3.4253983049219323e-07, "loss": 0.2051, "step": 12622 }, { "epoch": 0.8624034979845597, "grad_norm": 3.6411354541778564, "learning_rate": 3.422057411410729e-07, "loss": 0.2326, "step": 12623 }, { "epoch": 0.8624718179954909, "grad_norm": 4.818999290466309, "learning_rate": 3.418718064187616e-07, "loss": 0.2575, "step": 12624 }, { "epoch": 0.862540138006422, "grad_norm": 3.728987455368042, "learning_rate": 3.415380263416107e-07, "loss": 0.2812, "step": 12625 }, { "epoch": 0.8626084580173533, "grad_norm": 5.4382195472717285, "learning_rate": 3.4120440092596523e-07, "loss": 0.2198, "step": 12626 }, { "epoch": 0.8626767780282845, "grad_norm": 3.8787853717803955, "learning_rate": 3.4087093018816106e-07, "loss": 0.3464, "step": 12627 }, { "epoch": 0.8627450980392157, "grad_norm": 2.9314024448394775, "learning_rate": 3.4053761414452667e-07, "loss": 0.2469, "step": 12628 }, { "epoch": 0.8628134180501469, "grad_norm": 4.4099202156066895, "learning_rate": 3.402044528113854e-07, "loss": 0.1976, "step": 12629 }, { "epoch": 0.862881738061078, "grad_norm": 2.808093786239624, "learning_rate": 3.3987144620505094e-07, "loss": 0.1687, "step": 12630 }, { "epoch": 0.8629500580720093, "grad_norm": 5.136310577392578, "learning_rate": 3.395385943418288e-07, "loss": 0.3774, "step": 12631 }, { "epoch": 0.8630183780829405, "grad_norm": 5.045929908752441, "learning_rate": 3.3920589723801866e-07, "loss": 0.5279, "step": 12632 }, { "epoch": 0.8630866980938717, "grad_norm": 3.914473533630371, "learning_rate": 3.3887335490991133e-07, "loss": 0.2352, "step": 12633 }, { "epoch": 0.8631550181048029, "grad_norm": 5.074233055114746, "learning_rate": 3.3854096737379085e-07, "loss": 0.2501, "step": 12634 }, { "epoch": 0.8632233381157342, "grad_norm": 3.2166547775268555, "learning_rate": 3.3820873464593284e-07, "loss": 0.168, "step": 12635 }, { "epoch": 0.8632916581266653, "grad_norm": 4.149311542510986, "learning_rate": 3.3787665674260675e-07, "loss": 0.2734, "step": 12636 }, { "epoch": 0.8633599781375965, "grad_norm": 5.374462604522705, "learning_rate": 3.375447336800729e-07, "loss": 0.2989, "step": 12637 }, { "epoch": 0.8634282981485277, "grad_norm": 3.6209020614624023, "learning_rate": 3.372129654745856e-07, "loss": 0.289, "step": 12638 }, { "epoch": 0.8634966181594589, "grad_norm": 4.45487642288208, "learning_rate": 3.3688135214238896e-07, "loss": 0.3028, "step": 12639 }, { "epoch": 0.8635649381703902, "grad_norm": 4.276027679443359, "learning_rate": 3.365498936997224e-07, "loss": 0.3102, "step": 12640 }, { "epoch": 0.8636332581813213, "grad_norm": 3.9114036560058594, "learning_rate": 3.362185901628156e-07, "loss": 0.2131, "step": 12641 }, { "epoch": 0.8637015781922525, "grad_norm": 3.6264641284942627, "learning_rate": 3.35887441547893e-07, "loss": 0.2158, "step": 12642 }, { "epoch": 0.8637698982031837, "grad_norm": 6.062687397003174, "learning_rate": 3.355564478711699e-07, "loss": 0.2406, "step": 12643 }, { "epoch": 0.8638382182141149, "grad_norm": 4.914855480194092, "learning_rate": 3.352256091488531e-07, "loss": 0.2907, "step": 12644 }, { "epoch": 0.8639065382250462, "grad_norm": 3.4300012588500977, "learning_rate": 3.3489492539714355e-07, "loss": 0.2679, "step": 12645 }, { "epoch": 0.8639748582359773, "grad_norm": 4.220017910003662, "learning_rate": 3.3456439663223377e-07, "loss": 0.3241, "step": 12646 }, { "epoch": 0.8640431782469086, "grad_norm": 5.864713668823242, "learning_rate": 3.342340228703091e-07, "loss": 0.2961, "step": 12647 }, { "epoch": 0.8641114982578397, "grad_norm": 3.5693609714508057, "learning_rate": 3.3390380412754694e-07, "loss": 0.2372, "step": 12648 }, { "epoch": 0.8641798182687709, "grad_norm": 3.9478228092193604, "learning_rate": 3.335737404201171e-07, "loss": 0.1815, "step": 12649 }, { "epoch": 0.8642481382797021, "grad_norm": 3.814363479614258, "learning_rate": 3.332438317641821e-07, "loss": 0.2415, "step": 12650 }, { "epoch": 0.8643164582906333, "grad_norm": 3.3636324405670166, "learning_rate": 3.329140781758972e-07, "loss": 0.2326, "step": 12651 }, { "epoch": 0.8643847783015646, "grad_norm": 4.419479846954346, "learning_rate": 3.325844796714088e-07, "loss": 0.2815, "step": 12652 }, { "epoch": 0.8644530983124957, "grad_norm": 3.761824607849121, "learning_rate": 3.322550362668562e-07, "loss": 0.2303, "step": 12653 }, { "epoch": 0.8645214183234269, "grad_norm": 4.847682476043701, "learning_rate": 3.31925747978372e-07, "loss": 0.2458, "step": 12654 }, { "epoch": 0.8645897383343581, "grad_norm": 4.3100504875183105, "learning_rate": 3.3159661482208055e-07, "loss": 0.265, "step": 12655 }, { "epoch": 0.8646580583452893, "grad_norm": 3.516324043273926, "learning_rate": 3.3126763681409827e-07, "loss": 0.3114, "step": 12656 }, { "epoch": 0.8647263783562206, "grad_norm": 3.9491679668426514, "learning_rate": 3.309388139705347e-07, "loss": 0.2068, "step": 12657 }, { "epoch": 0.8647946983671517, "grad_norm": 2.9951610565185547, "learning_rate": 3.306101463074914e-07, "loss": 0.2569, "step": 12658 }, { "epoch": 0.864863018378083, "grad_norm": 3.504821300506592, "learning_rate": 3.302816338410629e-07, "loss": 0.2571, "step": 12659 }, { "epoch": 0.8649313383890141, "grad_norm": 4.581240177154541, "learning_rate": 3.2995327658733395e-07, "loss": 0.2164, "step": 12660 }, { "epoch": 0.8649996583999453, "grad_norm": 3.189114809036255, "learning_rate": 3.2962507456238397e-07, "loss": 0.2306, "step": 12661 }, { "epoch": 0.8650679784108766, "grad_norm": 5.210043430328369, "learning_rate": 3.2929702778228493e-07, "loss": 0.3003, "step": 12662 }, { "epoch": 0.8651362984218077, "grad_norm": 5.593164443969727, "learning_rate": 3.289691362631006e-07, "loss": 0.3805, "step": 12663 }, { "epoch": 0.865204618432739, "grad_norm": 5.348151683807373, "learning_rate": 3.286414000208859e-07, "loss": 0.3098, "step": 12664 }, { "epoch": 0.8652729384436701, "grad_norm": 5.614386558532715, "learning_rate": 3.283138190716894e-07, "loss": 0.4215, "step": 12665 }, { "epoch": 0.8653412584546013, "grad_norm": 3.342977523803711, "learning_rate": 3.279863934315522e-07, "loss": 0.2014, "step": 12666 }, { "epoch": 0.8654095784655326, "grad_norm": 5.0464653968811035, "learning_rate": 3.276591231165076e-07, "loss": 0.2417, "step": 12667 }, { "epoch": 0.8654778984764637, "grad_norm": 3.806939125061035, "learning_rate": 3.273320081425806e-07, "loss": 0.2141, "step": 12668 }, { "epoch": 0.865546218487395, "grad_norm": 4.037089824676514, "learning_rate": 3.2700504852578966e-07, "loss": 0.2212, "step": 12669 }, { "epoch": 0.8656145384983261, "grad_norm": 4.196271896362305, "learning_rate": 3.2667824428214487e-07, "loss": 0.307, "step": 12670 }, { "epoch": 0.8656828585092574, "grad_norm": 4.211350917816162, "learning_rate": 3.2635159542764943e-07, "loss": 0.3407, "step": 12671 }, { "epoch": 0.8657511785201886, "grad_norm": 4.6171040534973145, "learning_rate": 3.260251019782977e-07, "loss": 0.3017, "step": 12672 }, { "epoch": 0.8658194985311197, "grad_norm": 4.065362453460693, "learning_rate": 3.256987639500776e-07, "loss": 0.2939, "step": 12673 }, { "epoch": 0.865887818542051, "grad_norm": 4.605491638183594, "learning_rate": 3.253725813589691e-07, "loss": 0.3429, "step": 12674 }, { "epoch": 0.8659561385529821, "grad_norm": 3.861192464828491, "learning_rate": 3.250465542209439e-07, "loss": 0.2556, "step": 12675 }, { "epoch": 0.8660244585639134, "grad_norm": 4.317089557647705, "learning_rate": 3.247206825519679e-07, "loss": 0.2172, "step": 12676 }, { "epoch": 0.8660927785748446, "grad_norm": 3.3752806186676025, "learning_rate": 3.243949663679969e-07, "loss": 0.231, "step": 12677 }, { "epoch": 0.8661610985857757, "grad_norm": 3.298027753829956, "learning_rate": 3.2406940568498117e-07, "loss": 0.2032, "step": 12678 }, { "epoch": 0.866229418596707, "grad_norm": 4.029231071472168, "learning_rate": 3.2374400051886297e-07, "loss": 0.2458, "step": 12679 }, { "epoch": 0.8662977386076381, "grad_norm": 3.2719902992248535, "learning_rate": 3.234187508855744e-07, "loss": 0.2689, "step": 12680 }, { "epoch": 0.8663660586185694, "grad_norm": 3.949577569961548, "learning_rate": 3.230936568010441e-07, "loss": 0.3477, "step": 12681 }, { "epoch": 0.8664343786295006, "grad_norm": 5.2379560470581055, "learning_rate": 3.227687182811908e-07, "loss": 0.1407, "step": 12682 }, { "epoch": 0.8665026986404318, "grad_norm": 6.237131118774414, "learning_rate": 3.2244393534192613e-07, "loss": 0.2382, "step": 12683 }, { "epoch": 0.866571018651363, "grad_norm": 3.6269571781158447, "learning_rate": 3.2211930799915255e-07, "loss": 0.277, "step": 12684 }, { "epoch": 0.8666393386622941, "grad_norm": 4.171607494354248, "learning_rate": 3.217948362687669e-07, "loss": 0.2083, "step": 12685 }, { "epoch": 0.8667076586732254, "grad_norm": 4.691490650177002, "learning_rate": 3.21470520166658e-07, "loss": 0.2918, "step": 12686 }, { "epoch": 0.8667759786841566, "grad_norm": 4.142725944519043, "learning_rate": 3.2114635970870634e-07, "loss": 0.2179, "step": 12687 }, { "epoch": 0.8668442986950878, "grad_norm": 7.393949508666992, "learning_rate": 3.2082235491078536e-07, "loss": 0.3013, "step": 12688 }, { "epoch": 0.866912618706019, "grad_norm": 4.153208255767822, "learning_rate": 3.2049850578876065e-07, "loss": 0.2992, "step": 12689 }, { "epoch": 0.8669809387169501, "grad_norm": 4.525501251220703, "learning_rate": 3.201748123584901e-07, "loss": 0.2897, "step": 12690 }, { "epoch": 0.8670492587278814, "grad_norm": 4.106122970581055, "learning_rate": 3.1985127463582535e-07, "loss": 0.1884, "step": 12691 }, { "epoch": 0.8671175787388126, "grad_norm": 3.572300910949707, "learning_rate": 3.195278926366072e-07, "loss": 0.2929, "step": 12692 }, { "epoch": 0.8671858987497438, "grad_norm": 4.845074653625488, "learning_rate": 3.192046663766721e-07, "loss": 0.2451, "step": 12693 }, { "epoch": 0.867254218760675, "grad_norm": 3.9918556213378906, "learning_rate": 3.188815958718465e-07, "loss": 0.2611, "step": 12694 }, { "epoch": 0.8673225387716063, "grad_norm": 3.819493055343628, "learning_rate": 3.185586811379521e-07, "loss": 0.373, "step": 12695 }, { "epoch": 0.8673908587825374, "grad_norm": 3.3461761474609375, "learning_rate": 3.182359221907998e-07, "loss": 0.2494, "step": 12696 }, { "epoch": 0.8674591787934686, "grad_norm": 3.256988048553467, "learning_rate": 3.1791331904619415e-07, "loss": 0.2141, "step": 12697 }, { "epoch": 0.8675274988043998, "grad_norm": 3.845696210861206, "learning_rate": 3.175908717199328e-07, "loss": 0.2599, "step": 12698 }, { "epoch": 0.867595818815331, "grad_norm": 3.752822160720825, "learning_rate": 3.172685802278057e-07, "loss": 0.2528, "step": 12699 }, { "epoch": 0.8676641388262623, "grad_norm": 4.426399230957031, "learning_rate": 3.1694644458559264e-07, "loss": 0.2941, "step": 12700 }, { "epoch": 0.8677324588371934, "grad_norm": 4.191329479217529, "learning_rate": 3.1662446480906933e-07, "loss": 0.2581, "step": 12701 }, { "epoch": 0.8678007788481246, "grad_norm": 3.341676712036133, "learning_rate": 3.163026409140021e-07, "loss": 0.2637, "step": 12702 }, { "epoch": 0.8678690988590558, "grad_norm": 4.902732849121094, "learning_rate": 3.159809729161501e-07, "loss": 0.2772, "step": 12703 }, { "epoch": 0.867937418869987, "grad_norm": 3.3938827514648438, "learning_rate": 3.156594608312634e-07, "loss": 0.2379, "step": 12704 }, { "epoch": 0.8680057388809183, "grad_norm": 4.539539337158203, "learning_rate": 3.153381046750863e-07, "loss": 0.2595, "step": 12705 }, { "epoch": 0.8680740588918494, "grad_norm": 3.644946575164795, "learning_rate": 3.150169044633548e-07, "loss": 0.2783, "step": 12706 }, { "epoch": 0.8681423789027807, "grad_norm": 4.869890213012695, "learning_rate": 3.146958602117971e-07, "loss": 0.2938, "step": 12707 }, { "epoch": 0.8682106989137118, "grad_norm": 4.333909034729004, "learning_rate": 3.1437497193613383e-07, "loss": 0.2708, "step": 12708 }, { "epoch": 0.868279018924643, "grad_norm": 3.527203321456909, "learning_rate": 3.14054239652078e-07, "loss": 0.2562, "step": 12709 }, { "epoch": 0.8683473389355743, "grad_norm": 5.663466930389404, "learning_rate": 3.1373366337533554e-07, "loss": 0.2367, "step": 12710 }, { "epoch": 0.8684156589465054, "grad_norm": 5.013830661773682, "learning_rate": 3.134132431216034e-07, "loss": 0.2737, "step": 12711 }, { "epoch": 0.8684839789574367, "grad_norm": 4.40455436706543, "learning_rate": 3.130929789065732e-07, "loss": 0.2701, "step": 12712 }, { "epoch": 0.8685522989683678, "grad_norm": 6.286356449127197, "learning_rate": 3.1277287074592553e-07, "loss": 0.3734, "step": 12713 }, { "epoch": 0.868620618979299, "grad_norm": 4.010592937469482, "learning_rate": 3.1245291865533533e-07, "loss": 0.2389, "step": 12714 }, { "epoch": 0.8686889389902303, "grad_norm": 3.0697412490844727, "learning_rate": 3.1213312265047114e-07, "loss": 0.2878, "step": 12715 }, { "epoch": 0.8687572590011614, "grad_norm": 4.36063289642334, "learning_rate": 3.1181348274699276e-07, "loss": 0.3926, "step": 12716 }, { "epoch": 0.8688255790120927, "grad_norm": 4.501978874206543, "learning_rate": 3.114939989605505e-07, "loss": 0.2518, "step": 12717 }, { "epoch": 0.8688938990230238, "grad_norm": 4.099759101867676, "learning_rate": 3.111746713067897e-07, "loss": 0.2606, "step": 12718 }, { "epoch": 0.8689622190339551, "grad_norm": 5.104969024658203, "learning_rate": 3.1085549980134637e-07, "loss": 0.3566, "step": 12719 }, { "epoch": 0.8690305390448863, "grad_norm": 5.334834098815918, "learning_rate": 3.105364844598498e-07, "loss": 0.2689, "step": 12720 }, { "epoch": 0.8690988590558174, "grad_norm": 4.430591106414795, "learning_rate": 3.1021762529792176e-07, "loss": 0.2632, "step": 12721 }, { "epoch": 0.8691671790667487, "grad_norm": 3.188835859298706, "learning_rate": 3.0989892233117516e-07, "loss": 0.1895, "step": 12722 }, { "epoch": 0.8692354990776798, "grad_norm": 2.880702018737793, "learning_rate": 3.0958037557521656e-07, "loss": 0.1956, "step": 12723 }, { "epoch": 0.8693038190886111, "grad_norm": 5.554949760437012, "learning_rate": 3.092619850456446e-07, "loss": 0.2777, "step": 12724 }, { "epoch": 0.8693721390995423, "grad_norm": 4.632697582244873, "learning_rate": 3.0894375075804945e-07, "loss": 0.2023, "step": 12725 }, { "epoch": 0.8694404591104734, "grad_norm": 4.018604755401611, "learning_rate": 3.0862567272801386e-07, "loss": 0.2304, "step": 12726 }, { "epoch": 0.8695087791214047, "grad_norm": 3.426736354827881, "learning_rate": 3.0830775097111325e-07, "loss": 0.2518, "step": 12727 }, { "epoch": 0.8695770991323358, "grad_norm": 3.897594690322876, "learning_rate": 3.0798998550291744e-07, "loss": 0.2704, "step": 12728 }, { "epoch": 0.8696454191432671, "grad_norm": 4.014583587646484, "learning_rate": 3.0767237633898443e-07, "loss": 0.2337, "step": 12729 }, { "epoch": 0.8697137391541983, "grad_norm": 4.659801006317139, "learning_rate": 3.0735492349486696e-07, "loss": 0.2879, "step": 12730 }, { "epoch": 0.8697820591651295, "grad_norm": 4.517146587371826, "learning_rate": 3.070376269861102e-07, "loss": 0.2347, "step": 12731 }, { "epoch": 0.8698503791760607, "grad_norm": 4.903753757476807, "learning_rate": 3.0672048682825216e-07, "loss": 0.4456, "step": 12732 }, { "epoch": 0.8699186991869918, "grad_norm": 3.9304516315460205, "learning_rate": 3.064035030368204e-07, "loss": 0.275, "step": 12733 }, { "epoch": 0.8699870191979231, "grad_norm": 3.1311659812927246, "learning_rate": 3.0608667562733824e-07, "loss": 0.2147, "step": 12734 }, { "epoch": 0.8700553392088543, "grad_norm": 4.479076385498047, "learning_rate": 3.057700046153197e-07, "loss": 0.2793, "step": 12735 }, { "epoch": 0.8701236592197855, "grad_norm": 4.204498291015625, "learning_rate": 3.0545349001627175e-07, "loss": 0.3278, "step": 12736 }, { "epoch": 0.8701919792307167, "grad_norm": 4.59055757522583, "learning_rate": 3.05137131845692e-07, "loss": 0.2367, "step": 12737 }, { "epoch": 0.8702602992416478, "grad_norm": 3.803982734680176, "learning_rate": 3.0482093011907233e-07, "loss": 0.2821, "step": 12738 }, { "epoch": 0.8703286192525791, "grad_norm": 3.870722770690918, "learning_rate": 3.045048848518964e-07, "loss": 0.2754, "step": 12739 }, { "epoch": 0.8703969392635102, "grad_norm": 4.685425281524658, "learning_rate": 3.041889960596401e-07, "loss": 0.2611, "step": 12740 }, { "epoch": 0.8704652592744415, "grad_norm": 4.172398567199707, "learning_rate": 3.0387326375777157e-07, "loss": 0.2507, "step": 12741 }, { "epoch": 0.8705335792853727, "grad_norm": 5.096667766571045, "learning_rate": 3.0355768796175115e-07, "loss": 0.3574, "step": 12742 }, { "epoch": 0.8706018992963039, "grad_norm": 4.091765880584717, "learning_rate": 3.032422686870322e-07, "loss": 0.2657, "step": 12743 }, { "epoch": 0.8706702193072351, "grad_norm": 4.244617462158203, "learning_rate": 3.029270059490603e-07, "loss": 0.2475, "step": 12744 }, { "epoch": 0.8707385393181662, "grad_norm": 4.314030647277832, "learning_rate": 3.026118997632721e-07, "loss": 0.2366, "step": 12745 }, { "epoch": 0.8708068593290975, "grad_norm": 6.251728534698486, "learning_rate": 3.022969501450977e-07, "loss": 0.2842, "step": 12746 }, { "epoch": 0.8708751793400287, "grad_norm": 5.748011589050293, "learning_rate": 3.0198215710995896e-07, "loss": 0.2628, "step": 12747 }, { "epoch": 0.8709434993509599, "grad_norm": 3.7206778526306152, "learning_rate": 3.0166752067327227e-07, "loss": 0.1849, "step": 12748 }, { "epoch": 0.8710118193618911, "grad_norm": 4.458171844482422, "learning_rate": 3.013530408504425e-07, "loss": 0.3202, "step": 12749 }, { "epoch": 0.8710801393728222, "grad_norm": 3.34366512298584, "learning_rate": 3.0103871765687014e-07, "loss": 0.2761, "step": 12750 }, { "epoch": 0.8711484593837535, "grad_norm": 4.288573265075684, "learning_rate": 3.007245511079463e-07, "loss": 0.201, "step": 12751 }, { "epoch": 0.8712167793946847, "grad_norm": 2.9106409549713135, "learning_rate": 3.004105412190553e-07, "loss": 0.2494, "step": 12752 }, { "epoch": 0.8712850994056159, "grad_norm": 3.9749832153320312, "learning_rate": 3.0009668800557194e-07, "loss": 0.2234, "step": 12753 }, { "epoch": 0.8713534194165471, "grad_norm": 3.9719431400299072, "learning_rate": 2.9978299148286643e-07, "loss": 0.3412, "step": 12754 }, { "epoch": 0.8714217394274784, "grad_norm": 4.00042724609375, "learning_rate": 2.9946945166629887e-07, "loss": 0.4094, "step": 12755 }, { "epoch": 0.8714900594384095, "grad_norm": 3.017169237136841, "learning_rate": 2.991560685712235e-07, "loss": 0.2028, "step": 12756 }, { "epoch": 0.8715583794493407, "grad_norm": 4.894667625427246, "learning_rate": 2.988428422129842e-07, "loss": 0.2295, "step": 12757 }, { "epoch": 0.8716266994602719, "grad_norm": 3.1950502395629883, "learning_rate": 2.9852977260692e-07, "loss": 0.2468, "step": 12758 }, { "epoch": 0.8716950194712031, "grad_norm": 6.175729274749756, "learning_rate": 2.9821685976836034e-07, "loss": 0.2762, "step": 12759 }, { "epoch": 0.8717633394821344, "grad_norm": 3.780724287033081, "learning_rate": 2.9790410371262835e-07, "loss": 0.2013, "step": 12760 }, { "epoch": 0.8718316594930655, "grad_norm": 3.4063453674316406, "learning_rate": 2.9759150445503876e-07, "loss": 0.1913, "step": 12761 }, { "epoch": 0.8718999795039967, "grad_norm": 2.8492255210876465, "learning_rate": 2.972790620108984e-07, "loss": 0.2014, "step": 12762 }, { "epoch": 0.8719682995149279, "grad_norm": 2.5863752365112305, "learning_rate": 2.9696677639550716e-07, "loss": 0.185, "step": 12763 }, { "epoch": 0.8720366195258591, "grad_norm": 3.066956043243408, "learning_rate": 2.966546476241571e-07, "loss": 0.2712, "step": 12764 }, { "epoch": 0.8721049395367904, "grad_norm": 4.833408832550049, "learning_rate": 2.963426757121314e-07, "loss": 0.31, "step": 12765 }, { "epoch": 0.8721732595477215, "grad_norm": 4.003361701965332, "learning_rate": 2.9603086067470704e-07, "loss": 0.2948, "step": 12766 }, { "epoch": 0.8722415795586528, "grad_norm": 5.237508773803711, "learning_rate": 2.9571920252715193e-07, "loss": 0.2785, "step": 12767 }, { "epoch": 0.8723098995695839, "grad_norm": 3.761233329772949, "learning_rate": 2.9540770128472954e-07, "loss": 0.2385, "step": 12768 }, { "epoch": 0.8723782195805151, "grad_norm": 3.989797592163086, "learning_rate": 2.950963569626907e-07, "loss": 0.2152, "step": 12769 }, { "epoch": 0.8724465395914464, "grad_norm": 5.4976606369018555, "learning_rate": 2.947851695762821e-07, "loss": 0.3051, "step": 12770 }, { "epoch": 0.8725148596023775, "grad_norm": 4.828826904296875, "learning_rate": 2.944741391407414e-07, "loss": 0.3306, "step": 12771 }, { "epoch": 0.8725831796133088, "grad_norm": 4.928962230682373, "learning_rate": 2.941632656712997e-07, "loss": 0.2637, "step": 12772 }, { "epoch": 0.8726514996242399, "grad_norm": 3.924203872680664, "learning_rate": 2.9385254918317917e-07, "loss": 0.3183, "step": 12773 }, { "epoch": 0.8727198196351711, "grad_norm": 3.065575361251831, "learning_rate": 2.9354198969159455e-07, "loss": 0.1521, "step": 12774 }, { "epoch": 0.8727881396461024, "grad_norm": 3.8897886276245117, "learning_rate": 2.9323158721175366e-07, "loss": 0.2553, "step": 12775 }, { "epoch": 0.8728564596570335, "grad_norm": 4.48320198059082, "learning_rate": 2.9292134175885576e-07, "loss": 0.2936, "step": 12776 }, { "epoch": 0.8729247796679648, "grad_norm": 3.67966365814209, "learning_rate": 2.926112533480934e-07, "loss": 0.2536, "step": 12777 }, { "epoch": 0.8729930996788959, "grad_norm": 3.5509660243988037, "learning_rate": 2.923013219946495e-07, "loss": 0.2504, "step": 12778 }, { "epoch": 0.8730614196898272, "grad_norm": 4.475486755371094, "learning_rate": 2.9199154771370124e-07, "loss": 0.3265, "step": 12779 }, { "epoch": 0.8731297397007584, "grad_norm": 3.1696012020111084, "learning_rate": 2.9168193052041703e-07, "loss": 0.1628, "step": 12780 }, { "epoch": 0.8731980597116895, "grad_norm": 6.773506164550781, "learning_rate": 2.913724704299592e-07, "loss": 0.2424, "step": 12781 }, { "epoch": 0.8732663797226208, "grad_norm": 4.083436965942383, "learning_rate": 2.9106316745748044e-07, "loss": 0.263, "step": 12782 }, { "epoch": 0.8733346997335519, "grad_norm": 4.5399298667907715, "learning_rate": 2.907540216181259e-07, "loss": 0.3381, "step": 12783 }, { "epoch": 0.8734030197444832, "grad_norm": 6.114186763763428, "learning_rate": 2.904450329270346e-07, "loss": 0.3224, "step": 12784 }, { "epoch": 0.8734713397554144, "grad_norm": 4.804893493652344, "learning_rate": 2.90136201399337e-07, "loss": 0.3502, "step": 12785 }, { "epoch": 0.8735396597663455, "grad_norm": 3.6661200523376465, "learning_rate": 2.898275270501538e-07, "loss": 0.2181, "step": 12786 }, { "epoch": 0.8736079797772768, "grad_norm": 3.484726667404175, "learning_rate": 2.895190098946023e-07, "loss": 0.2996, "step": 12787 }, { "epoch": 0.8736762997882079, "grad_norm": 4.1355204582214355, "learning_rate": 2.892106499477888e-07, "loss": 0.2869, "step": 12788 }, { "epoch": 0.8737446197991392, "grad_norm": 4.127165794372559, "learning_rate": 2.8890244722481386e-07, "loss": 0.2323, "step": 12789 }, { "epoch": 0.8738129398100704, "grad_norm": 5.2070512771606445, "learning_rate": 2.885944017407675e-07, "loss": 0.3773, "step": 12790 }, { "epoch": 0.8738812598210016, "grad_norm": 3.6721808910369873, "learning_rate": 2.8828651351073514e-07, "loss": 0.271, "step": 12791 }, { "epoch": 0.8739495798319328, "grad_norm": 4.649238109588623, "learning_rate": 2.879787825497928e-07, "loss": 0.3379, "step": 12792 }, { "epoch": 0.8740178998428639, "grad_norm": 3.48504638671875, "learning_rate": 2.876712088730092e-07, "loss": 0.2446, "step": 12793 }, { "epoch": 0.8740862198537952, "grad_norm": 4.565316677093506, "learning_rate": 2.873637924954459e-07, "loss": 0.2493, "step": 12794 }, { "epoch": 0.8741545398647264, "grad_norm": 4.020078659057617, "learning_rate": 2.8705653343215573e-07, "loss": 0.2171, "step": 12795 }, { "epoch": 0.8742228598756576, "grad_norm": 11.22336483001709, "learning_rate": 2.8674943169818485e-07, "loss": 0.1927, "step": 12796 }, { "epoch": 0.8742911798865888, "grad_norm": 4.397779941558838, "learning_rate": 2.8644248730857155e-07, "loss": 0.2322, "step": 12797 }, { "epoch": 0.8743594998975199, "grad_norm": 2.9880871772766113, "learning_rate": 2.8613570027834446e-07, "loss": 0.2952, "step": 12798 }, { "epoch": 0.8744278199084512, "grad_norm": 4.176165580749512, "learning_rate": 2.858290706225276e-07, "loss": 0.3085, "step": 12799 }, { "epoch": 0.8744961399193824, "grad_norm": 2.883394241333008, "learning_rate": 2.855225983561349e-07, "loss": 0.1647, "step": 12800 }, { "epoch": 0.8745644599303136, "grad_norm": 5.139040470123291, "learning_rate": 2.8521628349417466e-07, "loss": 0.3176, "step": 12801 }, { "epoch": 0.8746327799412448, "grad_norm": 3.4528615474700928, "learning_rate": 2.8491012605164535e-07, "loss": 0.1999, "step": 12802 }, { "epoch": 0.874701099952176, "grad_norm": 4.631282329559326, "learning_rate": 2.84604126043539e-07, "loss": 0.3326, "step": 12803 }, { "epoch": 0.8747694199631072, "grad_norm": 4.943780422210693, "learning_rate": 2.8429828348483964e-07, "loss": 0.3059, "step": 12804 }, { "epoch": 0.8748377399740384, "grad_norm": 4.154438018798828, "learning_rate": 2.839925983905238e-07, "loss": 0.3053, "step": 12805 }, { "epoch": 0.8749060599849696, "grad_norm": 5.695854187011719, "learning_rate": 2.8368707077555875e-07, "loss": 0.2934, "step": 12806 }, { "epoch": 0.8749743799959008, "grad_norm": 3.9789984226226807, "learning_rate": 2.8338170065490667e-07, "loss": 0.2583, "step": 12807 }, { "epoch": 0.875042700006832, "grad_norm": 4.051143169403076, "learning_rate": 2.8307648804352085e-07, "loss": 0.2481, "step": 12808 }, { "epoch": 0.8751110200177632, "grad_norm": 4.9423418045043945, "learning_rate": 2.827714329563468e-07, "loss": 0.323, "step": 12809 }, { "epoch": 0.8751793400286944, "grad_norm": 3.753618001937866, "learning_rate": 2.8246653540832133e-07, "loss": 0.2598, "step": 12810 }, { "epoch": 0.8752476600396256, "grad_norm": 4.490667819976807, "learning_rate": 2.821617954143749e-07, "loss": 0.2942, "step": 12811 }, { "epoch": 0.8753159800505568, "grad_norm": 3.862416982650757, "learning_rate": 2.818572129894297e-07, "loss": 0.3054, "step": 12812 }, { "epoch": 0.875384300061488, "grad_norm": 3.857262134552002, "learning_rate": 2.815527881484004e-07, "loss": 0.3212, "step": 12813 }, { "epoch": 0.8754526200724192, "grad_norm": 3.975313186645508, "learning_rate": 2.812485209061941e-07, "loss": 0.2692, "step": 12814 }, { "epoch": 0.8755209400833505, "grad_norm": 3.2987403869628906, "learning_rate": 2.809444112777097e-07, "loss": 0.2663, "step": 12815 }, { "epoch": 0.8755892600942816, "grad_norm": 3.710930109024048, "learning_rate": 2.806404592778386e-07, "loss": 0.2668, "step": 12816 }, { "epoch": 0.8756575801052128, "grad_norm": 2.733607769012451, "learning_rate": 2.803366649214657e-07, "loss": 0.116, "step": 12817 }, { "epoch": 0.875725900116144, "grad_norm": 2.8829710483551025, "learning_rate": 2.80033028223465e-07, "loss": 0.172, "step": 12818 }, { "epoch": 0.8757942201270752, "grad_norm": 4.98438024520874, "learning_rate": 2.797295491987054e-07, "loss": 0.1925, "step": 12819 }, { "epoch": 0.8758625401380065, "grad_norm": 3.461611747741699, "learning_rate": 2.794262278620481e-07, "loss": 0.2971, "step": 12820 }, { "epoch": 0.8759308601489376, "grad_norm": 4.268073081970215, "learning_rate": 2.7912306422834655e-07, "loss": 0.2922, "step": 12821 }, { "epoch": 0.8759991801598688, "grad_norm": 3.4751393795013428, "learning_rate": 2.788200583124443e-07, "loss": 0.2292, "step": 12822 }, { "epoch": 0.8760675001708, "grad_norm": 3.781096935272217, "learning_rate": 2.785172101291793e-07, "loss": 0.3529, "step": 12823 }, { "epoch": 0.8761358201817312, "grad_norm": 3.9384210109710693, "learning_rate": 2.782145196933817e-07, "loss": 0.2119, "step": 12824 }, { "epoch": 0.8762041401926625, "grad_norm": 3.7473669052124023, "learning_rate": 2.7791198701987286e-07, "loss": 0.2711, "step": 12825 }, { "epoch": 0.8762724602035936, "grad_norm": 4.081953525543213, "learning_rate": 2.776096121234674e-07, "loss": 0.3046, "step": 12826 }, { "epoch": 0.8763407802145249, "grad_norm": 3.796093463897705, "learning_rate": 2.7730739501897133e-07, "loss": 0.2012, "step": 12827 }, { "epoch": 0.876409100225456, "grad_norm": 3.1538140773773193, "learning_rate": 2.77005335721184e-07, "loss": 0.183, "step": 12828 }, { "epoch": 0.8764774202363872, "grad_norm": 5.343568325042725, "learning_rate": 2.767034342448967e-07, "loss": 0.2879, "step": 12829 }, { "epoch": 0.8765457402473185, "grad_norm": 4.379587650299072, "learning_rate": 2.764016906048916e-07, "loss": 0.1927, "step": 12830 }, { "epoch": 0.8766140602582496, "grad_norm": 4.569782733917236, "learning_rate": 2.7610010481594525e-07, "loss": 0.2872, "step": 12831 }, { "epoch": 0.8766823802691809, "grad_norm": 5.074582576751709, "learning_rate": 2.7579867689282475e-07, "loss": 0.2762, "step": 12832 }, { "epoch": 0.876750700280112, "grad_norm": 6.334705352783203, "learning_rate": 2.7549740685029107e-07, "loss": 0.2369, "step": 12833 }, { "epoch": 0.8768190202910432, "grad_norm": 2.862510919570923, "learning_rate": 2.751962947030957e-07, "loss": 0.204, "step": 12834 }, { "epoch": 0.8768873403019745, "grad_norm": 3.7157747745513916, "learning_rate": 2.7489534046598405e-07, "loss": 0.2092, "step": 12835 }, { "epoch": 0.8769556603129056, "grad_norm": 4.363408088684082, "learning_rate": 2.74594544153693e-07, "loss": 0.2373, "step": 12836 }, { "epoch": 0.8770239803238369, "grad_norm": 4.409677028656006, "learning_rate": 2.742939057809519e-07, "loss": 0.2309, "step": 12837 }, { "epoch": 0.877092300334768, "grad_norm": 4.070281982421875, "learning_rate": 2.739934253624813e-07, "loss": 0.2677, "step": 12838 }, { "epoch": 0.8771606203456993, "grad_norm": 3.043290376663208, "learning_rate": 2.736931029129948e-07, "loss": 0.2231, "step": 12839 }, { "epoch": 0.8772289403566305, "grad_norm": 2.764817476272583, "learning_rate": 2.7339293844719984e-07, "loss": 0.1352, "step": 12840 }, { "epoch": 0.8772972603675616, "grad_norm": 3.210472822189331, "learning_rate": 2.7309293197979383e-07, "loss": 0.1717, "step": 12841 }, { "epoch": 0.8773655803784929, "grad_norm": 4.163244247436523, "learning_rate": 2.7279308352546807e-07, "loss": 0.2315, "step": 12842 }, { "epoch": 0.877433900389424, "grad_norm": 3.3836445808410645, "learning_rate": 2.724933930989041e-07, "loss": 0.2315, "step": 12843 }, { "epoch": 0.8775022204003553, "grad_norm": 4.989278793334961, "learning_rate": 2.721938607147775e-07, "loss": 0.3711, "step": 12844 }, { "epoch": 0.8775705404112865, "grad_norm": 4.741950988769531, "learning_rate": 2.718944863877555e-07, "loss": 0.2133, "step": 12845 }, { "epoch": 0.8776388604222176, "grad_norm": 4.308967590332031, "learning_rate": 2.715952701324975e-07, "loss": 0.3403, "step": 12846 }, { "epoch": 0.8777071804331489, "grad_norm": 2.831962823867798, "learning_rate": 2.712962119636561e-07, "loss": 0.2477, "step": 12847 }, { "epoch": 0.87777550044408, "grad_norm": 2.836122989654541, "learning_rate": 2.709973118958744e-07, "loss": 0.182, "step": 12848 }, { "epoch": 0.8778438204550113, "grad_norm": 2.9342360496520996, "learning_rate": 2.706985699437895e-07, "loss": 0.1788, "step": 12849 }, { "epoch": 0.8779121404659425, "grad_norm": 2.9974985122680664, "learning_rate": 2.7039998612203044e-07, "loss": 0.199, "step": 12850 }, { "epoch": 0.8779804604768737, "grad_norm": 4.357113361358643, "learning_rate": 2.7010156044521673e-07, "loss": 0.2336, "step": 12851 }, { "epoch": 0.8780487804878049, "grad_norm": 4.859326362609863, "learning_rate": 2.69803292927962e-07, "loss": 0.259, "step": 12852 }, { "epoch": 0.878117100498736, "grad_norm": 2.6951498985290527, "learning_rate": 2.6950518358487104e-07, "loss": 0.2126, "step": 12853 }, { "epoch": 0.8781854205096673, "grad_norm": 4.102459907531738, "learning_rate": 2.692072324305434e-07, "loss": 0.2148, "step": 12854 }, { "epoch": 0.8782537405205985, "grad_norm": 6.162263870239258, "learning_rate": 2.6890943947956715e-07, "loss": 0.254, "step": 12855 }, { "epoch": 0.8783220605315297, "grad_norm": 3.507333517074585, "learning_rate": 2.686118047465248e-07, "loss": 0.2671, "step": 12856 }, { "epoch": 0.8783903805424609, "grad_norm": 2.766801595687866, "learning_rate": 2.6831432824599115e-07, "loss": 0.1844, "step": 12857 }, { "epoch": 0.878458700553392, "grad_norm": 4.1718363761901855, "learning_rate": 2.6801700999253287e-07, "loss": 0.2516, "step": 12858 }, { "epoch": 0.8785270205643233, "grad_norm": 4.228996276855469, "learning_rate": 2.6771985000070833e-07, "loss": 0.2423, "step": 12859 }, { "epoch": 0.8785953405752545, "grad_norm": 3.2231945991516113, "learning_rate": 2.6742284828506873e-07, "loss": 0.1603, "step": 12860 }, { "epoch": 0.8786636605861857, "grad_norm": 4.1863627433776855, "learning_rate": 2.671260048601577e-07, "loss": 0.3324, "step": 12861 }, { "epoch": 0.8787319805971169, "grad_norm": 3.938188314437866, "learning_rate": 2.6682931974051155e-07, "loss": 0.344, "step": 12862 }, { "epoch": 0.8788003006080481, "grad_norm": 4.396957874298096, "learning_rate": 2.665327929406569e-07, "loss": 0.2733, "step": 12863 }, { "epoch": 0.8788686206189793, "grad_norm": 3.8312253952026367, "learning_rate": 2.662364244751145e-07, "loss": 0.2739, "step": 12864 }, { "epoch": 0.8789369406299105, "grad_norm": 6.31022310256958, "learning_rate": 2.6594021435839626e-07, "loss": 0.3619, "step": 12865 }, { "epoch": 0.8790052606408417, "grad_norm": 5.457581996917725, "learning_rate": 2.6564416260500773e-07, "loss": 0.3839, "step": 12866 }, { "epoch": 0.8790735806517729, "grad_norm": 4.966736793518066, "learning_rate": 2.653482692294448e-07, "loss": 0.2971, "step": 12867 }, { "epoch": 0.8791419006627041, "grad_norm": 3.5102169513702393, "learning_rate": 2.650525342461972e-07, "loss": 0.3399, "step": 12868 }, { "epoch": 0.8792102206736353, "grad_norm": 4.258433818817139, "learning_rate": 2.647569576697461e-07, "loss": 0.2771, "step": 12869 }, { "epoch": 0.8792785406845665, "grad_norm": 5.14807653427124, "learning_rate": 2.644615395145656e-07, "loss": 0.2734, "step": 12870 }, { "epoch": 0.8793468606954977, "grad_norm": 3.765644073486328, "learning_rate": 2.641662797951205e-07, "loss": 0.2683, "step": 12871 }, { "epoch": 0.8794151807064289, "grad_norm": 5.618635177612305, "learning_rate": 2.63871178525869e-07, "loss": 0.436, "step": 12872 }, { "epoch": 0.8794835007173601, "grad_norm": 4.077479839324951, "learning_rate": 2.6357623572126235e-07, "loss": 0.3236, "step": 12873 }, { "epoch": 0.8795518207282913, "grad_norm": 3.9134480953216553, "learning_rate": 2.6328145139574325e-07, "loss": 0.2304, "step": 12874 }, { "epoch": 0.8796201407392226, "grad_norm": 2.713750123977661, "learning_rate": 2.62986825563745e-07, "loss": 0.2331, "step": 12875 }, { "epoch": 0.8796884607501537, "grad_norm": 3.9637017250061035, "learning_rate": 2.6269235823969593e-07, "loss": 0.1176, "step": 12876 }, { "epoch": 0.8797567807610849, "grad_norm": 4.665065288543701, "learning_rate": 2.623980494380146e-07, "loss": 0.2319, "step": 12877 }, { "epoch": 0.8798251007720161, "grad_norm": 2.7757647037506104, "learning_rate": 2.621038991731133e-07, "loss": 0.226, "step": 12878 }, { "epoch": 0.8798934207829473, "grad_norm": 3.4492650032043457, "learning_rate": 2.6180990745939517e-07, "loss": 0.1778, "step": 12879 }, { "epoch": 0.8799617407938786, "grad_norm": 3.1841371059417725, "learning_rate": 2.615160743112562e-07, "loss": 0.2429, "step": 12880 }, { "epoch": 0.8800300608048097, "grad_norm": 6.043997764587402, "learning_rate": 2.612223997430851e-07, "loss": 0.3263, "step": 12881 }, { "epoch": 0.8800983808157409, "grad_norm": 4.936105728149414, "learning_rate": 2.6092888376926227e-07, "loss": 0.3096, "step": 12882 }, { "epoch": 0.8801667008266721, "grad_norm": 3.1970086097717285, "learning_rate": 2.606355264041597e-07, "loss": 0.2013, "step": 12883 }, { "epoch": 0.8802350208376033, "grad_norm": 3.961085319519043, "learning_rate": 2.603423276621428e-07, "loss": 0.2913, "step": 12884 }, { "epoch": 0.8803033408485346, "grad_norm": 5.903151988983154, "learning_rate": 2.6004928755756903e-07, "loss": 0.2718, "step": 12885 }, { "epoch": 0.8803716608594657, "grad_norm": 4.327644348144531, "learning_rate": 2.5975640610478706e-07, "loss": 0.228, "step": 12886 }, { "epoch": 0.880439980870397, "grad_norm": 2.9763405323028564, "learning_rate": 2.5946368331813937e-07, "loss": 0.2365, "step": 12887 }, { "epoch": 0.8805083008813281, "grad_norm": 4.353935718536377, "learning_rate": 2.59171119211959e-07, "loss": 0.2982, "step": 12888 }, { "epoch": 0.8805766208922593, "grad_norm": 4.711178302764893, "learning_rate": 2.5887871380057276e-07, "loss": 0.2755, "step": 12889 }, { "epoch": 0.8806449409031906, "grad_norm": 4.3943071365356445, "learning_rate": 2.5858646709829906e-07, "loss": 0.1346, "step": 12890 }, { "epoch": 0.8807132609141217, "grad_norm": 4.506491661071777, "learning_rate": 2.582943791194473e-07, "loss": 0.299, "step": 12891 }, { "epoch": 0.880781580925053, "grad_norm": 5.063217639923096, "learning_rate": 2.580024498783206e-07, "loss": 0.2998, "step": 12892 }, { "epoch": 0.8808499009359841, "grad_norm": 4.638827323913574, "learning_rate": 2.577106793892148e-07, "loss": 0.3312, "step": 12893 }, { "epoch": 0.8809182209469153, "grad_norm": 3.850963830947876, "learning_rate": 2.574190676664174e-07, "loss": 0.254, "step": 12894 }, { "epoch": 0.8809865409578466, "grad_norm": 4.62106466293335, "learning_rate": 2.5712761472420645e-07, "loss": 0.3349, "step": 12895 }, { "epoch": 0.8810548609687777, "grad_norm": 4.944026470184326, "learning_rate": 2.568363205768544e-07, "loss": 0.2649, "step": 12896 }, { "epoch": 0.881123180979709, "grad_norm": 4.656867027282715, "learning_rate": 2.565451852386251e-07, "loss": 0.331, "step": 12897 }, { "epoch": 0.8811915009906401, "grad_norm": 3.9432594776153564, "learning_rate": 2.562542087237744e-07, "loss": 0.16, "step": 12898 }, { "epoch": 0.8812598210015714, "grad_norm": 3.2917442321777344, "learning_rate": 2.55963391046551e-07, "loss": 0.2995, "step": 12899 }, { "epoch": 0.8813281410125026, "grad_norm": 5.4964280128479, "learning_rate": 2.5567273222119516e-07, "loss": 0.3181, "step": 12900 }, { "epoch": 0.8813964610234337, "grad_norm": 4.701969623565674, "learning_rate": 2.553822322619401e-07, "loss": 0.2741, "step": 12901 }, { "epoch": 0.881464781034365, "grad_norm": 4.200233459472656, "learning_rate": 2.550918911830109e-07, "loss": 0.2201, "step": 12902 }, { "epoch": 0.8815331010452961, "grad_norm": 3.230670213699341, "learning_rate": 2.548017089986244e-07, "loss": 0.2142, "step": 12903 }, { "epoch": 0.8816014210562274, "grad_norm": 3.4280364513397217, "learning_rate": 2.5451168572298983e-07, "loss": 0.1275, "step": 12904 }, { "epoch": 0.8816697410671586, "grad_norm": 3.202883243560791, "learning_rate": 2.5422182137030846e-07, "loss": 0.2383, "step": 12905 }, { "epoch": 0.8817380610780897, "grad_norm": 4.152599334716797, "learning_rate": 2.539321159547755e-07, "loss": 0.3091, "step": 12906 }, { "epoch": 0.881806381089021, "grad_norm": 3.663630247116089, "learning_rate": 2.536425694905771e-07, "loss": 0.2537, "step": 12907 }, { "epoch": 0.8818747010999521, "grad_norm": 2.84787917137146, "learning_rate": 2.533531819918902e-07, "loss": 0.1874, "step": 12908 }, { "epoch": 0.8819430211108834, "grad_norm": 3.831230878829956, "learning_rate": 2.530639534728862e-07, "loss": 0.2557, "step": 12909 }, { "epoch": 0.8820113411218146, "grad_norm": 4.528861999511719, "learning_rate": 2.527748839477277e-07, "loss": 0.2167, "step": 12910 }, { "epoch": 0.8820796611327458, "grad_norm": 2.3881936073303223, "learning_rate": 2.524859734305697e-07, "loss": 0.1263, "step": 12911 }, { "epoch": 0.882147981143677, "grad_norm": 2.2603206634521484, "learning_rate": 2.5219722193555914e-07, "loss": 0.0919, "step": 12912 }, { "epoch": 0.8822163011546081, "grad_norm": 3.7866392135620117, "learning_rate": 2.519086294768361e-07, "loss": 0.3541, "step": 12913 }, { "epoch": 0.8822846211655394, "grad_norm": 5.6475725173950195, "learning_rate": 2.516201960685312e-07, "loss": 0.2963, "step": 12914 }, { "epoch": 0.8823529411764706, "grad_norm": 5.739160537719727, "learning_rate": 2.513319217247696e-07, "loss": 0.207, "step": 12915 }, { "epoch": 0.8824212611874018, "grad_norm": 4.383932113647461, "learning_rate": 2.51043806459666e-07, "loss": 0.199, "step": 12916 }, { "epoch": 0.882489581198333, "grad_norm": 5.195690631866455, "learning_rate": 2.5075585028732934e-07, "loss": 0.2261, "step": 12917 }, { "epoch": 0.8825579012092641, "grad_norm": 4.179557800292969, "learning_rate": 2.504680532218595e-07, "loss": 0.2556, "step": 12918 }, { "epoch": 0.8826262212201954, "grad_norm": 5.4015350341796875, "learning_rate": 2.5018041527734945e-07, "loss": 0.2038, "step": 12919 }, { "epoch": 0.8826945412311266, "grad_norm": 4.817237377166748, "learning_rate": 2.498929364678844e-07, "loss": 0.3003, "step": 12920 }, { "epoch": 0.8827628612420578, "grad_norm": 2.333169460296631, "learning_rate": 2.4960561680754094e-07, "loss": 0.19, "step": 12921 }, { "epoch": 0.882831181252989, "grad_norm": 4.77129602432251, "learning_rate": 2.4931845631038873e-07, "loss": 0.2136, "step": 12922 }, { "epoch": 0.8828995012639202, "grad_norm": 3.628864049911499, "learning_rate": 2.490314549904896e-07, "loss": 0.169, "step": 12923 }, { "epoch": 0.8829678212748514, "grad_norm": 4.229070663452148, "learning_rate": 2.487446128618965e-07, "loss": 0.2491, "step": 12924 }, { "epoch": 0.8830361412857826, "grad_norm": 2.735666036605835, "learning_rate": 2.4845792993865464e-07, "loss": 0.187, "step": 12925 }, { "epoch": 0.8831044612967138, "grad_norm": 5.002098560333252, "learning_rate": 2.481714062348036e-07, "loss": 0.2947, "step": 12926 }, { "epoch": 0.883172781307645, "grad_norm": 5.086452484130859, "learning_rate": 2.4788504176437385e-07, "loss": 0.3626, "step": 12927 }, { "epoch": 0.8832411013185762, "grad_norm": 3.241255760192871, "learning_rate": 2.475988365413865e-07, "loss": 0.1704, "step": 12928 }, { "epoch": 0.8833094213295074, "grad_norm": 4.008424758911133, "learning_rate": 2.4731279057985707e-07, "loss": 0.3375, "step": 12929 }, { "epoch": 0.8833777413404386, "grad_norm": 6.625540733337402, "learning_rate": 2.470269038937923e-07, "loss": 0.2768, "step": 12930 }, { "epoch": 0.8834460613513698, "grad_norm": 4.116179943084717, "learning_rate": 2.4674117649719106e-07, "loss": 0.2292, "step": 12931 }, { "epoch": 0.883514381362301, "grad_norm": 4.3688554763793945, "learning_rate": 2.4645560840404536e-07, "loss": 0.3396, "step": 12932 }, { "epoch": 0.8835827013732322, "grad_norm": 3.5475125312805176, "learning_rate": 2.4617019962833806e-07, "loss": 0.3054, "step": 12933 }, { "epoch": 0.8836510213841634, "grad_norm": 3.8023197650909424, "learning_rate": 2.458849501840449e-07, "loss": 0.1947, "step": 12934 }, { "epoch": 0.8837193413950947, "grad_norm": 4.049623489379883, "learning_rate": 2.455998600851351e-07, "loss": 0.2099, "step": 12935 }, { "epoch": 0.8837876614060258, "grad_norm": 5.089493274688721, "learning_rate": 2.453149293455665e-07, "loss": 0.1474, "step": 12936 }, { "epoch": 0.883855981416957, "grad_norm": 3.515605926513672, "learning_rate": 2.4503015797929296e-07, "loss": 0.2463, "step": 12937 }, { "epoch": 0.8839243014278882, "grad_norm": 4.493338108062744, "learning_rate": 2.447455460002585e-07, "loss": 0.3499, "step": 12938 }, { "epoch": 0.8839926214388194, "grad_norm": 4.245057582855225, "learning_rate": 2.444610934223999e-07, "loss": 0.2659, "step": 12939 }, { "epoch": 0.8840609414497507, "grad_norm": 4.923420429229736, "learning_rate": 2.4417680025964614e-07, "loss": 0.4095, "step": 12940 }, { "epoch": 0.8841292614606818, "grad_norm": 3.273122787475586, "learning_rate": 2.438926665259181e-07, "loss": 0.162, "step": 12941 }, { "epoch": 0.884197581471613, "grad_norm": 4.876485347747803, "learning_rate": 2.4360869223512943e-07, "loss": 0.2477, "step": 12942 }, { "epoch": 0.8842659014825442, "grad_norm": 4.75294303894043, "learning_rate": 2.433248774011855e-07, "loss": 0.2363, "step": 12943 }, { "epoch": 0.8843342214934754, "grad_norm": 3.428926706314087, "learning_rate": 2.4304122203798347e-07, "loss": 0.2664, "step": 12944 }, { "epoch": 0.8844025415044067, "grad_norm": 5.290650844573975, "learning_rate": 2.427577261594129e-07, "loss": 0.341, "step": 12945 }, { "epoch": 0.8844708615153378, "grad_norm": 4.719996929168701, "learning_rate": 2.424743897793571e-07, "loss": 0.2453, "step": 12946 }, { "epoch": 0.8845391815262691, "grad_norm": 2.7201030254364014, "learning_rate": 2.4219121291169007e-07, "loss": 0.1874, "step": 12947 }, { "epoch": 0.8846075015372002, "grad_norm": 5.126185417175293, "learning_rate": 2.419081955702772e-07, "loss": 0.2659, "step": 12948 }, { "epoch": 0.8846758215481314, "grad_norm": 4.214986324310303, "learning_rate": 2.416253377689778e-07, "loss": 0.3812, "step": 12949 }, { "epoch": 0.8847441415590627, "grad_norm": 2.935450315475464, "learning_rate": 2.413426395216428e-07, "loss": 0.1741, "step": 12950 }, { "epoch": 0.8848124615699938, "grad_norm": 5.0936079025268555, "learning_rate": 2.4106010084211487e-07, "loss": 0.2842, "step": 12951 }, { "epoch": 0.8848807815809251, "grad_norm": 3.7621846199035645, "learning_rate": 2.4077772174422894e-07, "loss": 0.3722, "step": 12952 }, { "epoch": 0.8849491015918562, "grad_norm": 3.2812886238098145, "learning_rate": 2.40495502241813e-07, "loss": 0.2675, "step": 12953 }, { "epoch": 0.8850174216027874, "grad_norm": 5.3179144859313965, "learning_rate": 2.4021344234868635e-07, "loss": 0.3042, "step": 12954 }, { "epoch": 0.8850857416137187, "grad_norm": 5.138421535491943, "learning_rate": 2.399315420786611e-07, "loss": 0.3557, "step": 12955 }, { "epoch": 0.8851540616246498, "grad_norm": 3.9656753540039062, "learning_rate": 2.396498014455402e-07, "loss": 0.2357, "step": 12956 }, { "epoch": 0.8852223816355811, "grad_norm": 4.722225189208984, "learning_rate": 2.393682204631198e-07, "loss": 0.318, "step": 12957 }, { "epoch": 0.8852907016465122, "grad_norm": 4.362865924835205, "learning_rate": 2.3908679914518863e-07, "loss": 0.2176, "step": 12958 }, { "epoch": 0.8853590216574435, "grad_norm": 4.522510528564453, "learning_rate": 2.3880553750552797e-07, "loss": 0.2411, "step": 12959 }, { "epoch": 0.8854273416683747, "grad_norm": 3.7569940090179443, "learning_rate": 2.3852443555790936e-07, "loss": 0.2974, "step": 12960 }, { "epoch": 0.8854956616793058, "grad_norm": 3.5438997745513916, "learning_rate": 2.3824349331609783e-07, "loss": 0.2552, "step": 12961 }, { "epoch": 0.8855639816902371, "grad_norm": 3.0252885818481445, "learning_rate": 2.3796271079385022e-07, "loss": 0.2057, "step": 12962 }, { "epoch": 0.8856323017011682, "grad_norm": 3.881246328353882, "learning_rate": 2.3768208800491673e-07, "loss": 0.2479, "step": 12963 }, { "epoch": 0.8857006217120995, "grad_norm": 3.5080816745758057, "learning_rate": 2.3740162496303634e-07, "loss": 0.192, "step": 12964 }, { "epoch": 0.8857689417230307, "grad_norm": 3.9799532890319824, "learning_rate": 2.3712132168194527e-07, "loss": 0.1881, "step": 12965 }, { "epoch": 0.8858372617339618, "grad_norm": 4.909971237182617, "learning_rate": 2.368411781753677e-07, "loss": 0.2806, "step": 12966 }, { "epoch": 0.8859055817448931, "grad_norm": 4.339603900909424, "learning_rate": 2.3656119445702278e-07, "loss": 0.2448, "step": 12967 }, { "epoch": 0.8859739017558242, "grad_norm": 3.5611510276794434, "learning_rate": 2.3628137054061877e-07, "loss": 0.2399, "step": 12968 }, { "epoch": 0.8860422217667555, "grad_norm": 4.0960373878479, "learning_rate": 2.3600170643985922e-07, "loss": 0.1935, "step": 12969 }, { "epoch": 0.8861105417776867, "grad_norm": 5.036384582519531, "learning_rate": 2.357222021684377e-07, "loss": 0.3845, "step": 12970 }, { "epoch": 0.8861788617886179, "grad_norm": 4.594369888305664, "learning_rate": 2.3544285774004182e-07, "loss": 0.1855, "step": 12971 }, { "epoch": 0.8862471817995491, "grad_norm": 5.090483665466309, "learning_rate": 2.3516367316834913e-07, "loss": 0.2478, "step": 12972 }, { "epoch": 0.8863155018104802, "grad_norm": 5.116391658782959, "learning_rate": 2.3488464846703178e-07, "loss": 0.2492, "step": 12973 }, { "epoch": 0.8863838218214115, "grad_norm": 4.086554527282715, "learning_rate": 2.3460578364975215e-07, "loss": 0.3389, "step": 12974 }, { "epoch": 0.8864521418323427, "grad_norm": 4.797199726104736, "learning_rate": 2.343270787301653e-07, "loss": 0.2542, "step": 12975 }, { "epoch": 0.8865204618432739, "grad_norm": 3.4404704570770264, "learning_rate": 2.3404853372191997e-07, "loss": 0.3029, "step": 12976 }, { "epoch": 0.8865887818542051, "grad_norm": 3.8027853965759277, "learning_rate": 2.3377014863865415e-07, "loss": 0.2378, "step": 12977 }, { "epoch": 0.8866571018651362, "grad_norm": 5.4244704246521, "learning_rate": 2.3349192349399948e-07, "loss": 0.3669, "step": 12978 }, { "epoch": 0.8867254218760675, "grad_norm": 4.173957824707031, "learning_rate": 2.3321385830158153e-07, "loss": 0.2849, "step": 12979 }, { "epoch": 0.8867937418869987, "grad_norm": 3.7467715740203857, "learning_rate": 2.3293595307501602e-07, "loss": 0.1801, "step": 12980 }, { "epoch": 0.8868620618979299, "grad_norm": 4.097174167633057, "learning_rate": 2.3265820782791057e-07, "loss": 0.2438, "step": 12981 }, { "epoch": 0.8869303819088611, "grad_norm": 3.987543821334839, "learning_rate": 2.3238062257386543e-07, "loss": 0.2649, "step": 12982 }, { "epoch": 0.8869987019197924, "grad_norm": 3.7506322860717773, "learning_rate": 2.3210319732647384e-07, "loss": 0.2229, "step": 12983 }, { "epoch": 0.8870670219307235, "grad_norm": 4.636627197265625, "learning_rate": 2.3182593209932046e-07, "loss": 0.2916, "step": 12984 }, { "epoch": 0.8871353419416547, "grad_norm": 5.505546569824219, "learning_rate": 2.3154882690598221e-07, "loss": 0.285, "step": 12985 }, { "epoch": 0.8872036619525859, "grad_norm": 4.120919704437256, "learning_rate": 2.312718817600279e-07, "loss": 0.2686, "step": 12986 }, { "epoch": 0.8872719819635171, "grad_norm": 3.7431118488311768, "learning_rate": 2.309950966750192e-07, "loss": 0.3149, "step": 12987 }, { "epoch": 0.8873403019744484, "grad_norm": 5.520092487335205, "learning_rate": 2.3071847166450982e-07, "loss": 0.249, "step": 12988 }, { "epoch": 0.8874086219853795, "grad_norm": 4.463207721710205, "learning_rate": 2.3044200674204474e-07, "loss": 0.2174, "step": 12989 }, { "epoch": 0.8874769419963107, "grad_norm": 5.67183256149292, "learning_rate": 2.3016570192116164e-07, "loss": 0.2613, "step": 12990 }, { "epoch": 0.8875452620072419, "grad_norm": 4.219589710235596, "learning_rate": 2.2988955721539046e-07, "loss": 0.1801, "step": 12991 }, { "epoch": 0.8876135820181731, "grad_norm": 3.897609233856201, "learning_rate": 2.2961357263825366e-07, "loss": 0.2774, "step": 12992 }, { "epoch": 0.8876819020291044, "grad_norm": 3.7546939849853516, "learning_rate": 2.2933774820326564e-07, "loss": 0.2303, "step": 12993 }, { "epoch": 0.8877502220400355, "grad_norm": 4.579154014587402, "learning_rate": 2.2906208392393218e-07, "loss": 0.2442, "step": 12994 }, { "epoch": 0.8878185420509668, "grad_norm": 4.486947536468506, "learning_rate": 2.287865798137521e-07, "loss": 0.3065, "step": 12995 }, { "epoch": 0.8878868620618979, "grad_norm": 5.010332107543945, "learning_rate": 2.2851123588621686e-07, "loss": 0.3323, "step": 12996 }, { "epoch": 0.8879551820728291, "grad_norm": 5.107842922210693, "learning_rate": 2.2823605215480735e-07, "loss": 0.3433, "step": 12997 }, { "epoch": 0.8880235020837604, "grad_norm": 4.576278209686279, "learning_rate": 2.2796102863300028e-07, "loss": 0.2417, "step": 12998 }, { "epoch": 0.8880918220946915, "grad_norm": 3.8264541625976562, "learning_rate": 2.2768616533426256e-07, "loss": 0.2212, "step": 12999 }, { "epoch": 0.8881601421056228, "grad_norm": 3.73528790473938, "learning_rate": 2.2741146227205338e-07, "loss": 0.1817, "step": 13000 }, { "epoch": 0.8882284621165539, "grad_norm": 4.163084983825684, "learning_rate": 2.2713691945982417e-07, "loss": 0.3446, "step": 13001 }, { "epoch": 0.8882967821274851, "grad_norm": 3.75779128074646, "learning_rate": 2.2686253691101853e-07, "loss": 0.2422, "step": 13002 }, { "epoch": 0.8883651021384164, "grad_norm": 5.042704105377197, "learning_rate": 2.2658831463907198e-07, "loss": 0.3042, "step": 13003 }, { "epoch": 0.8884334221493475, "grad_norm": 4.767394542694092, "learning_rate": 2.2631425265741296e-07, "loss": 0.2035, "step": 13004 }, { "epoch": 0.8885017421602788, "grad_norm": 3.6540825366973877, "learning_rate": 2.2604035097946102e-07, "loss": 0.2537, "step": 13005 }, { "epoch": 0.8885700621712099, "grad_norm": 3.9691479206085205, "learning_rate": 2.257666096186291e-07, "loss": 0.2644, "step": 13006 }, { "epoch": 0.8886383821821412, "grad_norm": 3.338392972946167, "learning_rate": 2.254930285883212e-07, "loss": 0.2225, "step": 13007 }, { "epoch": 0.8887067021930724, "grad_norm": 2.593303680419922, "learning_rate": 2.2521960790193435e-07, "loss": 0.2012, "step": 13008 }, { "epoch": 0.8887750222040035, "grad_norm": 4.067846298217773, "learning_rate": 2.2494634757285582e-07, "loss": 0.2692, "step": 13009 }, { "epoch": 0.8888433422149348, "grad_norm": 4.585488319396973, "learning_rate": 2.2467324761446783e-07, "loss": 0.2511, "step": 13010 }, { "epoch": 0.8889116622258659, "grad_norm": 4.076488018035889, "learning_rate": 2.2440030804014217e-07, "loss": 0.2461, "step": 13011 }, { "epoch": 0.8889799822367972, "grad_norm": 3.17452073097229, "learning_rate": 2.241275288632459e-07, "loss": 0.2295, "step": 13012 }, { "epoch": 0.8890483022477284, "grad_norm": 5.077775955200195, "learning_rate": 2.2385491009713444e-07, "loss": 0.2539, "step": 13013 }, { "epoch": 0.8891166222586596, "grad_norm": 4.690742015838623, "learning_rate": 2.2358245175515816e-07, "loss": 0.2979, "step": 13014 }, { "epoch": 0.8891849422695908, "grad_norm": 3.1939408779144287, "learning_rate": 2.2331015385065817e-07, "loss": 0.2499, "step": 13015 }, { "epoch": 0.8892532622805219, "grad_norm": 5.912555694580078, "learning_rate": 2.2303801639696884e-07, "loss": 0.2623, "step": 13016 }, { "epoch": 0.8893215822914532, "grad_norm": 3.9165079593658447, "learning_rate": 2.2276603940741417e-07, "loss": 0.3382, "step": 13017 }, { "epoch": 0.8893899023023843, "grad_norm": 3.531550168991089, "learning_rate": 2.2249422289531424e-07, "loss": 0.2428, "step": 13018 }, { "epoch": 0.8894582223133156, "grad_norm": 3.862091064453125, "learning_rate": 2.2222256687397855e-07, "loss": 0.2668, "step": 13019 }, { "epoch": 0.8895265423242468, "grad_norm": 4.339593887329102, "learning_rate": 2.2195107135670976e-07, "loss": 0.3403, "step": 13020 }, { "epoch": 0.8895948623351779, "grad_norm": 3.4192538261413574, "learning_rate": 2.216797363568015e-07, "loss": 0.1793, "step": 13021 }, { "epoch": 0.8896631823461092, "grad_norm": 4.713221549987793, "learning_rate": 2.2140856188753999e-07, "loss": 0.3649, "step": 13022 }, { "epoch": 0.8897315023570403, "grad_norm": 3.842357873916626, "learning_rate": 2.2113754796220492e-07, "loss": 0.2434, "step": 13023 }, { "epoch": 0.8897998223679716, "grad_norm": 5.179317951202393, "learning_rate": 2.20866694594067e-07, "loss": 0.26, "step": 13024 }, { "epoch": 0.8898681423789028, "grad_norm": 3.5978567600250244, "learning_rate": 2.2059600179638837e-07, "loss": 0.163, "step": 13025 }, { "epoch": 0.889936462389834, "grad_norm": 4.112588882446289, "learning_rate": 2.2032546958242503e-07, "loss": 0.3154, "step": 13026 }, { "epoch": 0.8900047824007652, "grad_norm": 3.6613576412200928, "learning_rate": 2.20055097965424e-07, "loss": 0.2051, "step": 13027 }, { "epoch": 0.8900731024116963, "grad_norm": 3.807342767715454, "learning_rate": 2.1978488695862524e-07, "loss": 0.1932, "step": 13028 }, { "epoch": 0.8901414224226276, "grad_norm": 4.097719192504883, "learning_rate": 2.1951483657525872e-07, "loss": 0.307, "step": 13029 }, { "epoch": 0.8902097424335588, "grad_norm": 2.779201030731201, "learning_rate": 2.1924494682854928e-07, "loss": 0.2476, "step": 13030 }, { "epoch": 0.89027806244449, "grad_norm": 4.329422473907471, "learning_rate": 2.189752177317117e-07, "loss": 0.2967, "step": 13031 }, { "epoch": 0.8903463824554212, "grad_norm": 3.231229305267334, "learning_rate": 2.1870564929795567e-07, "loss": 0.2133, "step": 13032 }, { "epoch": 0.8904147024663523, "grad_norm": 3.829812526702881, "learning_rate": 2.1843624154047964e-07, "loss": 0.2388, "step": 13033 }, { "epoch": 0.8904830224772836, "grad_norm": 4.397648334503174, "learning_rate": 2.1816699447247622e-07, "loss": 0.2891, "step": 13034 }, { "epoch": 0.8905513424882148, "grad_norm": 4.069960594177246, "learning_rate": 2.178979081071299e-07, "loss": 0.2318, "step": 13035 }, { "epoch": 0.890619662499146, "grad_norm": 4.1626505851745605, "learning_rate": 2.1762898245761769e-07, "loss": 0.2395, "step": 13036 }, { "epoch": 0.8906879825100772, "grad_norm": 4.578632354736328, "learning_rate": 2.1736021753710621e-07, "loss": 0.36, "step": 13037 }, { "epoch": 0.8907563025210085, "grad_norm": 4.029205799102783, "learning_rate": 2.1709161335875815e-07, "loss": 0.2286, "step": 13038 }, { "epoch": 0.8908246225319396, "grad_norm": 4.184306621551514, "learning_rate": 2.168231699357257e-07, "loss": 0.2689, "step": 13039 }, { "epoch": 0.8908929425428708, "grad_norm": 3.7340214252471924, "learning_rate": 2.1655488728115367e-07, "loss": 0.2151, "step": 13040 }, { "epoch": 0.890961262553802, "grad_norm": 2.689870834350586, "learning_rate": 2.162867654081803e-07, "loss": 0.2283, "step": 13041 }, { "epoch": 0.8910295825647332, "grad_norm": 8.140138626098633, "learning_rate": 2.160188043299329e-07, "loss": 0.2386, "step": 13042 }, { "epoch": 0.8910979025756645, "grad_norm": 5.23397159576416, "learning_rate": 2.157510040595334e-07, "loss": 0.3814, "step": 13043 }, { "epoch": 0.8911662225865956, "grad_norm": 5.668813228607178, "learning_rate": 2.1548336461009549e-07, "loss": 0.3453, "step": 13044 }, { "epoch": 0.8912345425975268, "grad_norm": 5.090968132019043, "learning_rate": 2.152158859947259e-07, "loss": 0.1694, "step": 13045 }, { "epoch": 0.891302862608458, "grad_norm": 3.5310704708099365, "learning_rate": 2.1494856822652053e-07, "loss": 0.2849, "step": 13046 }, { "epoch": 0.8913711826193892, "grad_norm": 6.532289981842041, "learning_rate": 2.1468141131857015e-07, "loss": 0.3404, "step": 13047 }, { "epoch": 0.8914395026303205, "grad_norm": 4.703789234161377, "learning_rate": 2.1441441528395628e-07, "loss": 0.2108, "step": 13048 }, { "epoch": 0.8915078226412516, "grad_norm": 3.896946430206299, "learning_rate": 2.1414758013575413e-07, "loss": 0.177, "step": 13049 }, { "epoch": 0.8915761426521829, "grad_norm": 4.288315296173096, "learning_rate": 2.1388090588702773e-07, "loss": 0.2946, "step": 13050 }, { "epoch": 0.891644462663114, "grad_norm": 2.4282076358795166, "learning_rate": 2.1361439255083758e-07, "loss": 0.2375, "step": 13051 }, { "epoch": 0.8917127826740452, "grad_norm": 4.004718780517578, "learning_rate": 2.133480401402333e-07, "loss": 0.2039, "step": 13052 }, { "epoch": 0.8917811026849765, "grad_norm": 4.558579921722412, "learning_rate": 2.1308184866825786e-07, "loss": 0.3493, "step": 13053 }, { "epoch": 0.8918494226959076, "grad_norm": 5.771393299102783, "learning_rate": 2.1281581814794496e-07, "loss": 0.3036, "step": 13054 }, { "epoch": 0.8919177427068389, "grad_norm": 4.098357677459717, "learning_rate": 2.1254994859232173e-07, "loss": 0.2061, "step": 13055 }, { "epoch": 0.89198606271777, "grad_norm": 4.2754974365234375, "learning_rate": 2.122842400144078e-07, "loss": 0.2422, "step": 13056 }, { "epoch": 0.8920543827287012, "grad_norm": 4.248308181762695, "learning_rate": 2.120186924272136e-07, "loss": 0.2101, "step": 13057 }, { "epoch": 0.8921227027396325, "grad_norm": 3.0030903816223145, "learning_rate": 2.1175330584374246e-07, "loss": 0.2229, "step": 13058 }, { "epoch": 0.8921910227505636, "grad_norm": 4.667335033416748, "learning_rate": 2.1148808027699e-07, "loss": 0.2607, "step": 13059 }, { "epoch": 0.8922593427614949, "grad_norm": 4.4039106369018555, "learning_rate": 2.112230157399429e-07, "loss": 0.3496, "step": 13060 }, { "epoch": 0.892327662772426, "grad_norm": 3.9279849529266357, "learning_rate": 2.1095811224558157e-07, "loss": 0.3116, "step": 13061 }, { "epoch": 0.8923959827833573, "grad_norm": 4.62321662902832, "learning_rate": 2.106933698068768e-07, "loss": 0.2288, "step": 13062 }, { "epoch": 0.8924643027942885, "grad_norm": 3.44193959236145, "learning_rate": 2.104287884367923e-07, "loss": 0.2568, "step": 13063 }, { "epoch": 0.8925326228052196, "grad_norm": 2.5326201915740967, "learning_rate": 2.1016436814828404e-07, "loss": 0.1368, "step": 13064 }, { "epoch": 0.8926009428161509, "grad_norm": 3.855743885040283, "learning_rate": 2.099001089543014e-07, "loss": 0.2721, "step": 13065 }, { "epoch": 0.892669262827082, "grad_norm": 3.8074285984039307, "learning_rate": 2.0963601086778244e-07, "loss": 0.2441, "step": 13066 }, { "epoch": 0.8927375828380133, "grad_norm": 3.524468421936035, "learning_rate": 2.0937207390165985e-07, "loss": 0.2355, "step": 13067 }, { "epoch": 0.8928059028489445, "grad_norm": 4.640817642211914, "learning_rate": 2.091082980688585e-07, "loss": 0.2927, "step": 13068 }, { "epoch": 0.8928742228598756, "grad_norm": 3.011124849319458, "learning_rate": 2.0884468338229512e-07, "loss": 0.2071, "step": 13069 }, { "epoch": 0.8929425428708069, "grad_norm": 5.026020526885986, "learning_rate": 2.085812298548767e-07, "loss": 0.268, "step": 13070 }, { "epoch": 0.893010862881738, "grad_norm": 4.826179027557373, "learning_rate": 2.083179374995052e-07, "loss": 0.1957, "step": 13071 }, { "epoch": 0.8930791828926693, "grad_norm": 4.2687482833862305, "learning_rate": 2.0805480632907252e-07, "loss": 0.1692, "step": 13072 }, { "epoch": 0.8931475029036005, "grad_norm": 4.689302444458008, "learning_rate": 2.0779183635646503e-07, "loss": 0.2253, "step": 13073 }, { "epoch": 0.8932158229145317, "grad_norm": 5.477966785430908, "learning_rate": 2.075290275945575e-07, "loss": 0.1983, "step": 13074 }, { "epoch": 0.8932841429254629, "grad_norm": 3.780700922012329, "learning_rate": 2.0726638005622038e-07, "loss": 0.2279, "step": 13075 }, { "epoch": 0.893352462936394, "grad_norm": 2.6510322093963623, "learning_rate": 2.070038937543141e-07, "loss": 0.2226, "step": 13076 }, { "epoch": 0.8934207829473253, "grad_norm": 4.857934474945068, "learning_rate": 2.067415687016928e-07, "loss": 0.2608, "step": 13077 }, { "epoch": 0.8934891029582565, "grad_norm": 4.761612892150879, "learning_rate": 2.0647940491120092e-07, "loss": 0.3527, "step": 13078 }, { "epoch": 0.8935574229691877, "grad_norm": 5.327435493469238, "learning_rate": 2.0621740239567633e-07, "loss": 0.241, "step": 13079 }, { "epoch": 0.8936257429801189, "grad_norm": 4.399095058441162, "learning_rate": 2.0595556116794865e-07, "loss": 0.3496, "step": 13080 }, { "epoch": 0.89369406299105, "grad_norm": 5.380720138549805, "learning_rate": 2.0569388124084025e-07, "loss": 0.4109, "step": 13081 }, { "epoch": 0.8937623830019813, "grad_norm": 3.2806506156921387, "learning_rate": 2.054323626271633e-07, "loss": 0.2036, "step": 13082 }, { "epoch": 0.8938307030129125, "grad_norm": 4.390605449676514, "learning_rate": 2.0517100533972453e-07, "loss": 0.3143, "step": 13083 }, { "epoch": 0.8938990230238437, "grad_norm": 3.2617669105529785, "learning_rate": 2.049098093913214e-07, "loss": 0.2172, "step": 13084 }, { "epoch": 0.8939673430347749, "grad_norm": 4.246429920196533, "learning_rate": 2.0464877479474592e-07, "loss": 0.2972, "step": 13085 }, { "epoch": 0.8940356630457061, "grad_norm": 4.081501483917236, "learning_rate": 2.0438790156277798e-07, "loss": 0.2812, "step": 13086 }, { "epoch": 0.8941039830566373, "grad_norm": 3.726614475250244, "learning_rate": 2.0412718970819248e-07, "loss": 0.1757, "step": 13087 }, { "epoch": 0.8941723030675685, "grad_norm": 7.349577903747559, "learning_rate": 2.038666392437566e-07, "loss": 0.2249, "step": 13088 }, { "epoch": 0.8942406230784997, "grad_norm": 4.617528915405273, "learning_rate": 2.0360625018222766e-07, "loss": 0.3076, "step": 13089 }, { "epoch": 0.8943089430894309, "grad_norm": 3.1930582523345947, "learning_rate": 2.033460225363569e-07, "loss": 0.1593, "step": 13090 }, { "epoch": 0.8943772631003621, "grad_norm": 3.4083871841430664, "learning_rate": 2.03085956318887e-07, "loss": 0.1747, "step": 13091 }, { "epoch": 0.8944455831112933, "grad_norm": 2.7515854835510254, "learning_rate": 2.0282605154255234e-07, "loss": 0.2104, "step": 13092 }, { "epoch": 0.8945139031222245, "grad_norm": 3.390981674194336, "learning_rate": 2.0256630822008086e-07, "loss": 0.3266, "step": 13093 }, { "epoch": 0.8945822231331557, "grad_norm": 3.735675096511841, "learning_rate": 2.0230672636418994e-07, "loss": 0.3467, "step": 13094 }, { "epoch": 0.8946505431440869, "grad_norm": 2.9825375080108643, "learning_rate": 2.0204730598759113e-07, "loss": 0.1904, "step": 13095 }, { "epoch": 0.8947188631550181, "grad_norm": 5.208960056304932, "learning_rate": 2.0178804710298782e-07, "loss": 0.2194, "step": 13096 }, { "epoch": 0.8947871831659493, "grad_norm": 3.029963970184326, "learning_rate": 2.015289497230752e-07, "loss": 0.2021, "step": 13097 }, { "epoch": 0.8948555031768806, "grad_norm": 4.7058610916137695, "learning_rate": 2.0127001386054045e-07, "loss": 0.2176, "step": 13098 }, { "epoch": 0.8949238231878117, "grad_norm": 2.8954529762268066, "learning_rate": 2.0101123952806315e-07, "loss": 0.1645, "step": 13099 }, { "epoch": 0.8949921431987429, "grad_norm": 3.730771541595459, "learning_rate": 2.0075262673831488e-07, "loss": 0.3271, "step": 13100 }, { "epoch": 0.8950604632096741, "grad_norm": 4.807953834533691, "learning_rate": 2.0049417550395903e-07, "loss": 0.3422, "step": 13101 }, { "epoch": 0.8951287832206053, "grad_norm": 4.538983345031738, "learning_rate": 2.0023588583765116e-07, "loss": 0.3406, "step": 13102 }, { "epoch": 0.8951971032315366, "grad_norm": 3.7142693996429443, "learning_rate": 1.9997775775203874e-07, "loss": 0.2038, "step": 13103 }, { "epoch": 0.8952654232424677, "grad_norm": 3.761643171310425, "learning_rate": 1.997197912597622e-07, "loss": 0.1905, "step": 13104 }, { "epoch": 0.8953337432533989, "grad_norm": 5.802999496459961, "learning_rate": 1.994619863734534e-07, "loss": 0.2423, "step": 13105 }, { "epoch": 0.8954020632643301, "grad_norm": 5.196573734283447, "learning_rate": 1.9920434310573654e-07, "loss": 0.2138, "step": 13106 }, { "epoch": 0.8954703832752613, "grad_norm": 3.4406933784484863, "learning_rate": 1.9894686146922752e-07, "loss": 0.3005, "step": 13107 }, { "epoch": 0.8955387032861926, "grad_norm": 4.393585681915283, "learning_rate": 1.986895414765338e-07, "loss": 0.2996, "step": 13108 }, { "epoch": 0.8956070232971237, "grad_norm": 3.998171091079712, "learning_rate": 1.984323831402569e-07, "loss": 0.3593, "step": 13109 }, { "epoch": 0.895675343308055, "grad_norm": 3.8715322017669678, "learning_rate": 1.9817538647298837e-07, "loss": 0.2196, "step": 13110 }, { "epoch": 0.8957436633189861, "grad_norm": 3.679426908493042, "learning_rate": 1.979185514873127e-07, "loss": 0.2304, "step": 13111 }, { "epoch": 0.8958119833299173, "grad_norm": 4.867925643920898, "learning_rate": 1.9766187819580654e-07, "loss": 0.2951, "step": 13112 }, { "epoch": 0.8958803033408486, "grad_norm": 3.7840137481689453, "learning_rate": 1.9740536661103892e-07, "loss": 0.1972, "step": 13113 }, { "epoch": 0.8959486233517797, "grad_norm": 3.741987705230713, "learning_rate": 1.9714901674557057e-07, "loss": 0.2153, "step": 13114 }, { "epoch": 0.896016943362711, "grad_norm": 3.340984344482422, "learning_rate": 1.9689282861195333e-07, "loss": 0.1857, "step": 13115 }, { "epoch": 0.8960852633736421, "grad_norm": 4.763922691345215, "learning_rate": 1.966368022227325e-07, "loss": 0.1957, "step": 13116 }, { "epoch": 0.8961535833845733, "grad_norm": 4.877518653869629, "learning_rate": 1.9638093759044472e-07, "loss": 0.276, "step": 13117 }, { "epoch": 0.8962219033955046, "grad_norm": 4.188104629516602, "learning_rate": 1.9612523472762011e-07, "loss": 0.224, "step": 13118 }, { "epoch": 0.8962902234064357, "grad_norm": 4.005407333374023, "learning_rate": 1.9586969364677863e-07, "loss": 0.2703, "step": 13119 }, { "epoch": 0.896358543417367, "grad_norm": 4.176821231842041, "learning_rate": 1.9561431436043409e-07, "loss": 0.321, "step": 13120 }, { "epoch": 0.8964268634282981, "grad_norm": 4.007422924041748, "learning_rate": 1.9535909688109127e-07, "loss": 0.2742, "step": 13121 }, { "epoch": 0.8964951834392294, "grad_norm": 6.444922924041748, "learning_rate": 1.951040412212484e-07, "loss": 0.261, "step": 13122 }, { "epoch": 0.8965635034501606, "grad_norm": 4.139121055603027, "learning_rate": 1.9484914739339326e-07, "loss": 0.2903, "step": 13123 }, { "epoch": 0.8966318234610917, "grad_norm": 4.02526330947876, "learning_rate": 1.945944154100085e-07, "loss": 0.2149, "step": 13124 }, { "epoch": 0.896700143472023, "grad_norm": 3.9248008728027344, "learning_rate": 1.9433984528356784e-07, "loss": 0.2228, "step": 13125 }, { "epoch": 0.8967684634829541, "grad_norm": 3.5172667503356934, "learning_rate": 1.9408543702653651e-07, "loss": 0.1608, "step": 13126 }, { "epoch": 0.8968367834938854, "grad_norm": 3.689220428466797, "learning_rate": 1.9383119065137195e-07, "loss": 0.2632, "step": 13127 }, { "epoch": 0.8969051035048166, "grad_norm": 4.225705623626709, "learning_rate": 1.9357710617052414e-07, "loss": 0.2136, "step": 13128 }, { "epoch": 0.8969734235157477, "grad_norm": 3.0450682640075684, "learning_rate": 1.93323183596435e-07, "loss": 0.1717, "step": 13129 }, { "epoch": 0.897041743526679, "grad_norm": 4.910749912261963, "learning_rate": 1.9306942294153828e-07, "loss": 0.2109, "step": 13130 }, { "epoch": 0.8971100635376101, "grad_norm": 3.662485361099243, "learning_rate": 1.9281582421826027e-07, "loss": 0.2178, "step": 13131 }, { "epoch": 0.8971783835485414, "grad_norm": 3.655970811843872, "learning_rate": 1.9256238743901877e-07, "loss": 0.2666, "step": 13132 }, { "epoch": 0.8972467035594726, "grad_norm": 4.681794166564941, "learning_rate": 1.9230911261622419e-07, "loss": 0.2506, "step": 13133 }, { "epoch": 0.8973150235704038, "grad_norm": 3.8742847442626953, "learning_rate": 1.9205599976227876e-07, "loss": 0.2949, "step": 13134 }, { "epoch": 0.897383343581335, "grad_norm": 5.228948593139648, "learning_rate": 1.9180304888957584e-07, "loss": 0.2035, "step": 13135 }, { "epoch": 0.8974516635922661, "grad_norm": 3.674516201019287, "learning_rate": 1.9155026001050245e-07, "loss": 0.203, "step": 13136 }, { "epoch": 0.8975199836031974, "grad_norm": 3.1917617321014404, "learning_rate": 1.912976331374368e-07, "loss": 0.1445, "step": 13137 }, { "epoch": 0.8975883036141286, "grad_norm": 4.923120021820068, "learning_rate": 1.9104516828275043e-07, "loss": 0.2935, "step": 13138 }, { "epoch": 0.8976566236250598, "grad_norm": 3.8138625621795654, "learning_rate": 1.9079286545880476e-07, "loss": 0.1947, "step": 13139 }, { "epoch": 0.897724943635991, "grad_norm": 4.24542236328125, "learning_rate": 1.9054072467795423e-07, "loss": 0.3012, "step": 13140 }, { "epoch": 0.8977932636469221, "grad_norm": 2.810776472091675, "learning_rate": 1.9028874595254595e-07, "loss": 0.2181, "step": 13141 }, { "epoch": 0.8978615836578534, "grad_norm": 4.532049179077148, "learning_rate": 1.9003692929491845e-07, "loss": 0.3035, "step": 13142 }, { "epoch": 0.8979299036687846, "grad_norm": 4.95497465133667, "learning_rate": 1.897852747174028e-07, "loss": 0.1924, "step": 13143 }, { "epoch": 0.8979982236797158, "grad_norm": 3.6945323944091797, "learning_rate": 1.8953378223232164e-07, "loss": 0.1587, "step": 13144 }, { "epoch": 0.898066543690647, "grad_norm": 5.589807510375977, "learning_rate": 1.8928245185199016e-07, "loss": 0.2861, "step": 13145 }, { "epoch": 0.8981348637015782, "grad_norm": 3.940060615539551, "learning_rate": 1.890312835887154e-07, "loss": 0.2317, "step": 13146 }, { "epoch": 0.8982031837125094, "grad_norm": 4.1117963790893555, "learning_rate": 1.8878027745479547e-07, "loss": 0.2201, "step": 13147 }, { "epoch": 0.8982715037234406, "grad_norm": 3.6593427658081055, "learning_rate": 1.8852943346252265e-07, "loss": 0.1735, "step": 13148 }, { "epoch": 0.8983398237343718, "grad_norm": 3.8933820724487305, "learning_rate": 1.8827875162417912e-07, "loss": 0.2643, "step": 13149 }, { "epoch": 0.898408143745303, "grad_norm": 4.028716087341309, "learning_rate": 1.8802823195204082e-07, "loss": 0.2416, "step": 13150 }, { "epoch": 0.8984764637562342, "grad_norm": 4.959017276763916, "learning_rate": 1.8777787445837479e-07, "loss": 0.1656, "step": 13151 }, { "epoch": 0.8985447837671654, "grad_norm": 3.9854745864868164, "learning_rate": 1.8752767915544067e-07, "loss": 0.2963, "step": 13152 }, { "epoch": 0.8986131037780966, "grad_norm": 4.3747124671936035, "learning_rate": 1.8727764605548913e-07, "loss": 0.2767, "step": 13153 }, { "epoch": 0.8986814237890278, "grad_norm": 4.036706924438477, "learning_rate": 1.8702777517076468e-07, "loss": 0.1881, "step": 13154 }, { "epoch": 0.898749743799959, "grad_norm": 3.521348476409912, "learning_rate": 1.8677806651350209e-07, "loss": 0.2484, "step": 13155 }, { "epoch": 0.8988180638108902, "grad_norm": 4.355363368988037, "learning_rate": 1.8652852009592796e-07, "loss": 0.3061, "step": 13156 }, { "epoch": 0.8988863838218214, "grad_norm": 4.4536356925964355, "learning_rate": 1.8627913593026384e-07, "loss": 0.2549, "step": 13157 }, { "epoch": 0.8989547038327527, "grad_norm": 5.318011283874512, "learning_rate": 1.860299140287212e-07, "loss": 0.3154, "step": 13158 }, { "epoch": 0.8990230238436838, "grad_norm": 3.2183890342712402, "learning_rate": 1.8578085440350256e-07, "loss": 0.1947, "step": 13159 }, { "epoch": 0.899091343854615, "grad_norm": 4.365662097930908, "learning_rate": 1.855319570668042e-07, "loss": 0.1889, "step": 13160 }, { "epoch": 0.8991596638655462, "grad_norm": 4.512696266174316, "learning_rate": 1.8528322203081425e-07, "loss": 0.1765, "step": 13161 }, { "epoch": 0.8992279838764774, "grad_norm": 3.0403332710266113, "learning_rate": 1.8503464930771234e-07, "loss": 0.2257, "step": 13162 }, { "epoch": 0.8992963038874087, "grad_norm": 3.8538711071014404, "learning_rate": 1.8478623890967065e-07, "loss": 0.2548, "step": 13163 }, { "epoch": 0.8993646238983398, "grad_norm": 3.2765867710113525, "learning_rate": 1.8453799084885285e-07, "loss": 0.2356, "step": 13164 }, { "epoch": 0.899432943909271, "grad_norm": 3.3727264404296875, "learning_rate": 1.842899051374156e-07, "loss": 0.2303, "step": 13165 }, { "epoch": 0.8995012639202022, "grad_norm": 3.589736223220825, "learning_rate": 1.8404198178750663e-07, "loss": 0.1949, "step": 13166 }, { "epoch": 0.8995695839311334, "grad_norm": 4.689190864562988, "learning_rate": 1.8379422081126551e-07, "loss": 0.3041, "step": 13167 }, { "epoch": 0.8996379039420647, "grad_norm": 4.785168170928955, "learning_rate": 1.8354662222082517e-07, "loss": 0.1976, "step": 13168 }, { "epoch": 0.8997062239529958, "grad_norm": 4.13515567779541, "learning_rate": 1.8329918602830969e-07, "loss": 0.201, "step": 13169 }, { "epoch": 0.8997745439639271, "grad_norm": 3.9544286727905273, "learning_rate": 1.830519122458353e-07, "loss": 0.2324, "step": 13170 }, { "epoch": 0.8998428639748582, "grad_norm": 4.200780391693115, "learning_rate": 1.8280480088551044e-07, "loss": 0.2565, "step": 13171 }, { "epoch": 0.8999111839857894, "grad_norm": 4.654654026031494, "learning_rate": 1.8255785195943507e-07, "loss": 0.3423, "step": 13172 }, { "epoch": 0.8999795039967207, "grad_norm": 3.8309128284454346, "learning_rate": 1.8231106547970215e-07, "loss": 0.2605, "step": 13173 }, { "epoch": 0.9000478240076518, "grad_norm": 2.9161324501037598, "learning_rate": 1.8206444145839646e-07, "loss": 0.1729, "step": 13174 }, { "epoch": 0.9001161440185831, "grad_norm": 4.101001739501953, "learning_rate": 1.818179799075938e-07, "loss": 0.2232, "step": 13175 }, { "epoch": 0.9001844640295142, "grad_norm": 3.830608606338501, "learning_rate": 1.8157168083936265e-07, "loss": 0.2776, "step": 13176 }, { "epoch": 0.9002527840404454, "grad_norm": 2.6433584690093994, "learning_rate": 1.8132554426576408e-07, "loss": 0.1975, "step": 13177 }, { "epoch": 0.9003211040513767, "grad_norm": 4.208939075469971, "learning_rate": 1.8107957019885063e-07, "loss": 0.239, "step": 13178 }, { "epoch": 0.9003894240623078, "grad_norm": 6.0634236335754395, "learning_rate": 1.8083375865066743e-07, "loss": 0.3279, "step": 13179 }, { "epoch": 0.9004577440732391, "grad_norm": 4.303891181945801, "learning_rate": 1.8058810963325067e-07, "loss": 0.2466, "step": 13180 }, { "epoch": 0.9005260640841702, "grad_norm": 3.130556583404541, "learning_rate": 1.803426231586288e-07, "loss": 0.2197, "step": 13181 }, { "epoch": 0.9005943840951015, "grad_norm": 3.390716552734375, "learning_rate": 1.8009729923882327e-07, "loss": 0.2306, "step": 13182 }, { "epoch": 0.9006627041060327, "grad_norm": 3.7549383640289307, "learning_rate": 1.7985213788584697e-07, "loss": 0.2197, "step": 13183 }, { "epoch": 0.9007310241169638, "grad_norm": 4.779079914093018, "learning_rate": 1.7960713911170422e-07, "loss": 0.2605, "step": 13184 }, { "epoch": 0.9007993441278951, "grad_norm": 4.263868808746338, "learning_rate": 1.793623029283928e-07, "loss": 0.321, "step": 13185 }, { "epoch": 0.9008676641388262, "grad_norm": 3.8679275512695312, "learning_rate": 1.7911762934790066e-07, "loss": 0.2084, "step": 13186 }, { "epoch": 0.9009359841497575, "grad_norm": 5.075686931610107, "learning_rate": 1.7887311838221043e-07, "loss": 0.2482, "step": 13187 }, { "epoch": 0.9010043041606887, "grad_norm": 4.5401129722595215, "learning_rate": 1.786287700432934e-07, "loss": 0.17, "step": 13188 }, { "epoch": 0.9010726241716198, "grad_norm": 4.394189834594727, "learning_rate": 1.7838458434311472e-07, "loss": 0.2983, "step": 13189 }, { "epoch": 0.9011409441825511, "grad_norm": 3.9518744945526123, "learning_rate": 1.7814056129363281e-07, "loss": 0.2218, "step": 13190 }, { "epoch": 0.9012092641934822, "grad_norm": 4.174501419067383, "learning_rate": 1.778967009067965e-07, "loss": 0.3218, "step": 13191 }, { "epoch": 0.9012775842044135, "grad_norm": 4.344613075256348, "learning_rate": 1.7765300319454636e-07, "loss": 0.2555, "step": 13192 }, { "epoch": 0.9013459042153447, "grad_norm": 3.385427951812744, "learning_rate": 1.7740946816881565e-07, "loss": 0.2598, "step": 13193 }, { "epoch": 0.9014142242262759, "grad_norm": 3.513960361480713, "learning_rate": 1.771660958415302e-07, "loss": 0.2766, "step": 13194 }, { "epoch": 0.9014825442372071, "grad_norm": 3.9984936714172363, "learning_rate": 1.7692288622460694e-07, "loss": 0.2853, "step": 13195 }, { "epoch": 0.9015508642481382, "grad_norm": 3.638526439666748, "learning_rate": 1.766798393299554e-07, "loss": 0.2016, "step": 13196 }, { "epoch": 0.9016191842590695, "grad_norm": 4.399878025054932, "learning_rate": 1.7643695516947695e-07, "loss": 0.2453, "step": 13197 }, { "epoch": 0.9016875042700007, "grad_norm": 3.18052339553833, "learning_rate": 1.7619423375506438e-07, "loss": 0.13, "step": 13198 }, { "epoch": 0.9017558242809319, "grad_norm": 2.8701000213623047, "learning_rate": 1.7595167509860432e-07, "loss": 0.1277, "step": 13199 }, { "epoch": 0.9018241442918631, "grad_norm": 3.1329917907714844, "learning_rate": 1.7570927921197326e-07, "loss": 0.1807, "step": 13200 }, { "epoch": 0.9018924643027942, "grad_norm": 3.8151369094848633, "learning_rate": 1.7546704610704072e-07, "loss": 0.2717, "step": 13201 }, { "epoch": 0.9019607843137255, "grad_norm": 4.074719429016113, "learning_rate": 1.7522497579566842e-07, "loss": 0.1803, "step": 13202 }, { "epoch": 0.9020291043246567, "grad_norm": 6.2138471603393555, "learning_rate": 1.7498306828970995e-07, "loss": 0.3662, "step": 13203 }, { "epoch": 0.9020974243355879, "grad_norm": 4.693608283996582, "learning_rate": 1.747413236010107e-07, "loss": 0.1479, "step": 13204 }, { "epoch": 0.9021657443465191, "grad_norm": 4.777554512023926, "learning_rate": 1.744997417414083e-07, "loss": 0.3095, "step": 13205 }, { "epoch": 0.9022340643574503, "grad_norm": 3.812134027481079, "learning_rate": 1.7425832272273266e-07, "loss": 0.2214, "step": 13206 }, { "epoch": 0.9023023843683815, "grad_norm": 4.098163604736328, "learning_rate": 1.740170665568055e-07, "loss": 0.2595, "step": 13207 }, { "epoch": 0.9023707043793127, "grad_norm": 4.538025379180908, "learning_rate": 1.7377597325543993e-07, "loss": 0.306, "step": 13208 }, { "epoch": 0.9024390243902439, "grad_norm": 3.683278799057007, "learning_rate": 1.7353504283044173e-07, "loss": 0.2947, "step": 13209 }, { "epoch": 0.9025073444011751, "grad_norm": 2.7181642055511475, "learning_rate": 1.7329427529360897e-07, "loss": 0.145, "step": 13210 }, { "epoch": 0.9025756644121063, "grad_norm": 3.4356071949005127, "learning_rate": 1.7305367065673183e-07, "loss": 0.2273, "step": 13211 }, { "epoch": 0.9026439844230375, "grad_norm": 5.1379714012146, "learning_rate": 1.7281322893159085e-07, "loss": 0.2683, "step": 13212 }, { "epoch": 0.9027123044339687, "grad_norm": 4.2736053466796875, "learning_rate": 1.725729501299607e-07, "loss": 0.3495, "step": 13213 }, { "epoch": 0.9027806244448999, "grad_norm": 4.203914165496826, "learning_rate": 1.7233283426360678e-07, "loss": 0.3149, "step": 13214 }, { "epoch": 0.9028489444558311, "grad_norm": 4.601449012756348, "learning_rate": 1.7209288134428747e-07, "loss": 0.2985, "step": 13215 }, { "epoch": 0.9029172644667623, "grad_norm": 4.242033958435059, "learning_rate": 1.7185309138375215e-07, "loss": 0.2444, "step": 13216 }, { "epoch": 0.9029855844776935, "grad_norm": 3.9432895183563232, "learning_rate": 1.7161346439374293e-07, "loss": 0.2374, "step": 13217 }, { "epoch": 0.9030539044886248, "grad_norm": 3.5625219345092773, "learning_rate": 1.7137400038599327e-07, "loss": 0.2353, "step": 13218 }, { "epoch": 0.9031222244995559, "grad_norm": 5.185794353485107, "learning_rate": 1.7113469937223008e-07, "loss": 0.2654, "step": 13219 }, { "epoch": 0.9031905445104871, "grad_norm": 3.589221954345703, "learning_rate": 1.7089556136417056e-07, "loss": 0.1972, "step": 13220 }, { "epoch": 0.9032588645214183, "grad_norm": 4.811890602111816, "learning_rate": 1.7065658637352408e-07, "loss": 0.242, "step": 13221 }, { "epoch": 0.9033271845323495, "grad_norm": 4.716381549835205, "learning_rate": 1.704177744119939e-07, "loss": 0.2944, "step": 13222 }, { "epoch": 0.9033955045432808, "grad_norm": 4.88252067565918, "learning_rate": 1.7017912549127306e-07, "loss": 0.231, "step": 13223 }, { "epoch": 0.9034638245542119, "grad_norm": 6.121945858001709, "learning_rate": 1.6994063962304807e-07, "loss": 0.3331, "step": 13224 }, { "epoch": 0.9035321445651431, "grad_norm": 3.2361271381378174, "learning_rate": 1.6970231681899645e-07, "loss": 0.219, "step": 13225 }, { "epoch": 0.9036004645760743, "grad_norm": 3.818434000015259, "learning_rate": 1.6946415709078878e-07, "loss": 0.2536, "step": 13226 }, { "epoch": 0.9036687845870055, "grad_norm": 4.206772327423096, "learning_rate": 1.6922616045008743e-07, "loss": 0.2249, "step": 13227 }, { "epoch": 0.9037371045979368, "grad_norm": 4.228823184967041, "learning_rate": 1.6898832690854508e-07, "loss": 0.169, "step": 13228 }, { "epoch": 0.9038054246088679, "grad_norm": 6.423652172088623, "learning_rate": 1.68750656477809e-07, "loss": 0.238, "step": 13229 }, { "epoch": 0.9038737446197992, "grad_norm": 3.5155868530273438, "learning_rate": 1.6851314916951708e-07, "loss": 0.2247, "step": 13230 }, { "epoch": 0.9039420646307303, "grad_norm": 3.9170775413513184, "learning_rate": 1.6827580499529947e-07, "loss": 0.2628, "step": 13231 }, { "epoch": 0.9040103846416615, "grad_norm": 3.81467866897583, "learning_rate": 1.6803862396677775e-07, "loss": 0.2061, "step": 13232 }, { "epoch": 0.9040787046525928, "grad_norm": 4.606302738189697, "learning_rate": 1.6780160609556655e-07, "loss": 0.2238, "step": 13233 }, { "epoch": 0.9041470246635239, "grad_norm": 4.109469890594482, "learning_rate": 1.6756475139327147e-07, "loss": 0.2864, "step": 13234 }, { "epoch": 0.9042153446744552, "grad_norm": 3.4272708892822266, "learning_rate": 1.6732805987149124e-07, "loss": 0.1778, "step": 13235 }, { "epoch": 0.9042836646853863, "grad_norm": 3.7227394580841064, "learning_rate": 1.6709153154181594e-07, "loss": 0.279, "step": 13236 }, { "epoch": 0.9043519846963175, "grad_norm": 5.134792327880859, "learning_rate": 1.6685516641582755e-07, "loss": 0.2882, "step": 13237 }, { "epoch": 0.9044203047072488, "grad_norm": 3.922764539718628, "learning_rate": 1.6661896450510022e-07, "loss": 0.2225, "step": 13238 }, { "epoch": 0.9044886247181799, "grad_norm": 4.154425621032715, "learning_rate": 1.663829258212004e-07, "loss": 0.3016, "step": 13239 }, { "epoch": 0.9045569447291112, "grad_norm": 3.5885393619537354, "learning_rate": 1.6614705037568633e-07, "loss": 0.2165, "step": 13240 }, { "epoch": 0.9046252647400423, "grad_norm": 3.568073034286499, "learning_rate": 1.6591133818010733e-07, "loss": 0.2933, "step": 13241 }, { "epoch": 0.9046935847509736, "grad_norm": 3.8747079372406006, "learning_rate": 1.6567578924600612e-07, "loss": 0.1611, "step": 13242 }, { "epoch": 0.9047619047619048, "grad_norm": 5.433393955230713, "learning_rate": 1.6544040358491763e-07, "loss": 0.3245, "step": 13243 }, { "epoch": 0.9048302247728359, "grad_norm": 3.922346830368042, "learning_rate": 1.6520518120836746e-07, "loss": 0.2417, "step": 13244 }, { "epoch": 0.9048985447837672, "grad_norm": 3.7301177978515625, "learning_rate": 1.649701221278735e-07, "loss": 0.2605, "step": 13245 }, { "epoch": 0.9049668647946983, "grad_norm": 3.2136192321777344, "learning_rate": 1.6473522635494653e-07, "loss": 0.2382, "step": 13246 }, { "epoch": 0.9050351848056296, "grad_norm": 4.135942459106445, "learning_rate": 1.645004939010885e-07, "loss": 0.2696, "step": 13247 }, { "epoch": 0.9051035048165608, "grad_norm": 5.252523422241211, "learning_rate": 1.6426592477779355e-07, "loss": 0.3671, "step": 13248 }, { "epoch": 0.9051718248274919, "grad_norm": 4.9708943367004395, "learning_rate": 1.6403151899654805e-07, "loss": 0.2511, "step": 13249 }, { "epoch": 0.9052401448384232, "grad_norm": 3.0754411220550537, "learning_rate": 1.637972765688302e-07, "loss": 0.2468, "step": 13250 }, { "epoch": 0.9053084648493543, "grad_norm": 5.176520824432373, "learning_rate": 1.6356319750611004e-07, "loss": 0.4822, "step": 13251 }, { "epoch": 0.9053767848602856, "grad_norm": 3.7781500816345215, "learning_rate": 1.6332928181985063e-07, "loss": 0.1745, "step": 13252 }, { "epoch": 0.9054451048712168, "grad_norm": 7.278445243835449, "learning_rate": 1.630955295215053e-07, "loss": 0.3795, "step": 13253 }, { "epoch": 0.905513424882148, "grad_norm": 3.4980881214141846, "learning_rate": 1.628619406225204e-07, "loss": 0.1726, "step": 13254 }, { "epoch": 0.9055817448930792, "grad_norm": 3.0291216373443604, "learning_rate": 1.6262851513433414e-07, "loss": 0.1751, "step": 13255 }, { "epoch": 0.9056500649040103, "grad_norm": 4.631341934204102, "learning_rate": 1.6239525306837727e-07, "loss": 0.3603, "step": 13256 }, { "epoch": 0.9057183849149416, "grad_norm": 3.9657399654388428, "learning_rate": 1.6216215443607167e-07, "loss": 0.2325, "step": 13257 }, { "epoch": 0.9057867049258728, "grad_norm": 4.106828689575195, "learning_rate": 1.619292192488311e-07, "loss": 0.1891, "step": 13258 }, { "epoch": 0.905855024936804, "grad_norm": 3.0293142795562744, "learning_rate": 1.6169644751806256e-07, "loss": 0.2719, "step": 13259 }, { "epoch": 0.9059233449477352, "grad_norm": 5.347117900848389, "learning_rate": 1.6146383925516428e-07, "loss": 0.3181, "step": 13260 }, { "epoch": 0.9059916649586663, "grad_norm": 5.13423490524292, "learning_rate": 1.6123139447152585e-07, "loss": 0.2576, "step": 13261 }, { "epoch": 0.9060599849695976, "grad_norm": 4.426768779754639, "learning_rate": 1.609991131785291e-07, "loss": 0.2556, "step": 13262 }, { "epoch": 0.9061283049805288, "grad_norm": 3.60841703414917, "learning_rate": 1.607669953875497e-07, "loss": 0.2345, "step": 13263 }, { "epoch": 0.90619662499146, "grad_norm": 4.26772403717041, "learning_rate": 1.605350411099531e-07, "loss": 0.2921, "step": 13264 }, { "epoch": 0.9062649450023912, "grad_norm": 4.258065700531006, "learning_rate": 1.6030325035709708e-07, "loss": 0.276, "step": 13265 }, { "epoch": 0.9063332650133225, "grad_norm": 3.5966808795928955, "learning_rate": 1.60071623140332e-07, "loss": 0.2964, "step": 13266 }, { "epoch": 0.9064015850242536, "grad_norm": 6.323652744293213, "learning_rate": 1.598401594710005e-07, "loss": 0.2919, "step": 13267 }, { "epoch": 0.9064699050351848, "grad_norm": 4.644239902496338, "learning_rate": 1.5960885936043616e-07, "loss": 0.2779, "step": 13268 }, { "epoch": 0.906538225046116, "grad_norm": 3.6295077800750732, "learning_rate": 1.593777228199657e-07, "loss": 0.2641, "step": 13269 }, { "epoch": 0.9066065450570472, "grad_norm": 4.059085369110107, "learning_rate": 1.5914674986090682e-07, "loss": 0.181, "step": 13270 }, { "epoch": 0.9066748650679785, "grad_norm": 4.208731651306152, "learning_rate": 1.5891594049456947e-07, "loss": 0.2761, "step": 13271 }, { "epoch": 0.9067431850789096, "grad_norm": 6.310253620147705, "learning_rate": 1.58685294732257e-07, "loss": 0.2761, "step": 13272 }, { "epoch": 0.9068115050898408, "grad_norm": 3.999768018722534, "learning_rate": 1.584548125852619e-07, "loss": 0.1967, "step": 13273 }, { "epoch": 0.906879825100772, "grad_norm": 3.7896358966827393, "learning_rate": 1.5822449406487078e-07, "loss": 0.2632, "step": 13274 }, { "epoch": 0.9069481451117032, "grad_norm": 3.6320579051971436, "learning_rate": 1.579943391823617e-07, "loss": 0.3167, "step": 13275 }, { "epoch": 0.9070164651226345, "grad_norm": 2.958132028579712, "learning_rate": 1.5776434794900624e-07, "loss": 0.244, "step": 13276 }, { "epoch": 0.9070847851335656, "grad_norm": 3.969242572784424, "learning_rate": 1.5753452037606415e-07, "loss": 0.194, "step": 13277 }, { "epoch": 0.9071531051444969, "grad_norm": 2.9181206226348877, "learning_rate": 1.5730485647479098e-07, "loss": 0.1944, "step": 13278 }, { "epoch": 0.907221425155428, "grad_norm": 4.472327709197998, "learning_rate": 1.5707535625643183e-07, "loss": 0.2731, "step": 13279 }, { "epoch": 0.9072897451663592, "grad_norm": 3.7820799350738525, "learning_rate": 1.5684601973222588e-07, "loss": 0.2791, "step": 13280 }, { "epoch": 0.9073580651772905, "grad_norm": 4.01786470413208, "learning_rate": 1.5661684691340155e-07, "loss": 0.172, "step": 13281 }, { "epoch": 0.9074263851882216, "grad_norm": 4.631997108459473, "learning_rate": 1.563878378111821e-07, "loss": 0.2635, "step": 13282 }, { "epoch": 0.9074947051991529, "grad_norm": 4.103423118591309, "learning_rate": 1.561589924367811e-07, "loss": 0.2636, "step": 13283 }, { "epoch": 0.907563025210084, "grad_norm": 4.314847946166992, "learning_rate": 1.5593031080140517e-07, "loss": 0.2337, "step": 13284 }, { "epoch": 0.9076313452210152, "grad_norm": 3.952552556991577, "learning_rate": 1.557017929162508e-07, "loss": 0.2537, "step": 13285 }, { "epoch": 0.9076996652319465, "grad_norm": 4.817608833312988, "learning_rate": 1.5547343879250902e-07, "loss": 0.2475, "step": 13286 }, { "epoch": 0.9077679852428776, "grad_norm": 5.322210788726807, "learning_rate": 1.5524524844136117e-07, "loss": 0.2417, "step": 13287 }, { "epoch": 0.9078363052538089, "grad_norm": 4.556116104125977, "learning_rate": 1.550172218739812e-07, "loss": 0.2168, "step": 13288 }, { "epoch": 0.90790462526474, "grad_norm": 4.46253776550293, "learning_rate": 1.547893591015353e-07, "loss": 0.3364, "step": 13289 }, { "epoch": 0.9079729452756713, "grad_norm": 4.210557460784912, "learning_rate": 1.5456166013518147e-07, "loss": 0.2567, "step": 13290 }, { "epoch": 0.9080412652866025, "grad_norm": 4.229702949523926, "learning_rate": 1.5433412498606878e-07, "loss": 0.1697, "step": 13291 }, { "epoch": 0.9081095852975336, "grad_norm": 3.5559587478637695, "learning_rate": 1.5410675366533977e-07, "loss": 0.247, "step": 13292 }, { "epoch": 0.9081779053084649, "grad_norm": 4.60088586807251, "learning_rate": 1.538795461841275e-07, "loss": 0.2175, "step": 13293 }, { "epoch": 0.908246225319396, "grad_norm": 4.243644714355469, "learning_rate": 1.536525025535582e-07, "loss": 0.3047, "step": 13294 }, { "epoch": 0.9083145453303273, "grad_norm": 4.306098937988281, "learning_rate": 1.534256227847487e-07, "loss": 0.3063, "step": 13295 }, { "epoch": 0.9083828653412584, "grad_norm": 4.713160991668701, "learning_rate": 1.531989068888107e-07, "loss": 0.4222, "step": 13296 }, { "epoch": 0.9084511853521896, "grad_norm": 4.241543769836426, "learning_rate": 1.5297235487684402e-07, "loss": 0.2695, "step": 13297 }, { "epoch": 0.9085195053631209, "grad_norm": 4.241107940673828, "learning_rate": 1.527459667599433e-07, "loss": 0.2842, "step": 13298 }, { "epoch": 0.908587825374052, "grad_norm": 4.914922714233398, "learning_rate": 1.525197425491935e-07, "loss": 0.2274, "step": 13299 }, { "epoch": 0.9086561453849833, "grad_norm": 3.822063446044922, "learning_rate": 1.5229368225567296e-07, "loss": 0.2878, "step": 13300 }, { "epoch": 0.9087244653959144, "grad_norm": 4.16832971572876, "learning_rate": 1.520677858904496e-07, "loss": 0.2384, "step": 13301 }, { "epoch": 0.9087927854068457, "grad_norm": 3.6018574237823486, "learning_rate": 1.51842053464587e-07, "loss": 0.2654, "step": 13302 }, { "epoch": 0.9088611054177769, "grad_norm": 4.102698802947998, "learning_rate": 1.5161648498913787e-07, "loss": 0.2956, "step": 13303 }, { "epoch": 0.908929425428708, "grad_norm": 4.988727569580078, "learning_rate": 1.5139108047514833e-07, "loss": 0.1999, "step": 13304 }, { "epoch": 0.9089977454396393, "grad_norm": 3.2713022232055664, "learning_rate": 1.5116583993365434e-07, "loss": 0.1857, "step": 13305 }, { "epoch": 0.9090660654505704, "grad_norm": 5.424086093902588, "learning_rate": 1.5094076337568652e-07, "loss": 0.3085, "step": 13306 }, { "epoch": 0.9091343854615017, "grad_norm": 5.253338813781738, "learning_rate": 1.5071585081226568e-07, "loss": 0.2695, "step": 13307 }, { "epoch": 0.9092027054724329, "grad_norm": 3.625825881958008, "learning_rate": 1.504911022544053e-07, "loss": 0.2701, "step": 13308 }, { "epoch": 0.909271025483364, "grad_norm": 3.995384931564331, "learning_rate": 1.502665177131115e-07, "loss": 0.2369, "step": 13309 }, { "epoch": 0.9093393454942953, "grad_norm": 3.449263572692871, "learning_rate": 1.5004209719938022e-07, "loss": 0.3044, "step": 13310 }, { "epoch": 0.9094076655052264, "grad_norm": 3.2330174446105957, "learning_rate": 1.49817840724202e-07, "loss": 0.2997, "step": 13311 }, { "epoch": 0.9094759855161577, "grad_norm": 4.052210330963135, "learning_rate": 1.4959374829855732e-07, "loss": 0.2181, "step": 13312 }, { "epoch": 0.9095443055270889, "grad_norm": 3.803128719329834, "learning_rate": 1.4936981993342035e-07, "loss": 0.2038, "step": 13313 }, { "epoch": 0.9096126255380201, "grad_norm": 4.604508876800537, "learning_rate": 1.4914605563975447e-07, "loss": 0.2289, "step": 13314 }, { "epoch": 0.9096809455489513, "grad_norm": 6.050687789916992, "learning_rate": 1.4892245542851833e-07, "loss": 0.242, "step": 13315 }, { "epoch": 0.9097492655598824, "grad_norm": 5.212674140930176, "learning_rate": 1.4869901931066055e-07, "loss": 0.2477, "step": 13316 }, { "epoch": 0.9098175855708137, "grad_norm": 2.873148202896118, "learning_rate": 1.4847574729712303e-07, "loss": 0.2049, "step": 13317 }, { "epoch": 0.9098859055817449, "grad_norm": 5.671762943267822, "learning_rate": 1.482526393988373e-07, "loss": 0.3922, "step": 13318 }, { "epoch": 0.9099542255926761, "grad_norm": 3.337231397628784, "learning_rate": 1.480296956267294e-07, "loss": 0.2305, "step": 13319 }, { "epoch": 0.9100225456036073, "grad_norm": 3.542811155319214, "learning_rate": 1.4780691599171563e-07, "loss": 0.1436, "step": 13320 }, { "epoch": 0.9100908656145384, "grad_norm": 3.729572057723999, "learning_rate": 1.4758430050470573e-07, "loss": 0.2335, "step": 13321 }, { "epoch": 0.9101591856254697, "grad_norm": 5.325178146362305, "learning_rate": 1.4736184917659967e-07, "loss": 0.351, "step": 13322 }, { "epoch": 0.9102275056364009, "grad_norm": 5.13903284072876, "learning_rate": 1.4713956201829126e-07, "loss": 0.2285, "step": 13323 }, { "epoch": 0.9102958256473321, "grad_norm": 3.2682394981384277, "learning_rate": 1.4691743904066457e-07, "loss": 0.1954, "step": 13324 }, { "epoch": 0.9103641456582633, "grad_norm": 6.883387088775635, "learning_rate": 1.4669548025459706e-07, "loss": 0.3586, "step": 13325 }, { "epoch": 0.9104324656691946, "grad_norm": 5.597380638122559, "learning_rate": 1.4647368567095646e-07, "loss": 0.2281, "step": 13326 }, { "epoch": 0.9105007856801257, "grad_norm": 3.0887227058410645, "learning_rate": 1.4625205530060436e-07, "loss": 0.207, "step": 13327 }, { "epoch": 0.9105691056910569, "grad_norm": 3.7392983436584473, "learning_rate": 1.4603058915439254e-07, "loss": 0.1371, "step": 13328 }, { "epoch": 0.9106374257019881, "grad_norm": 4.805637359619141, "learning_rate": 1.45809287243167e-07, "loss": 0.2882, "step": 13329 }, { "epoch": 0.9107057457129193, "grad_norm": 4.484814643859863, "learning_rate": 1.4558814957776322e-07, "loss": 0.2972, "step": 13330 }, { "epoch": 0.9107740657238506, "grad_norm": 4.681083679199219, "learning_rate": 1.4536717616901014e-07, "loss": 0.2302, "step": 13331 }, { "epoch": 0.9108423857347817, "grad_norm": 3.9809396266937256, "learning_rate": 1.4514636702772765e-07, "loss": 0.2131, "step": 13332 }, { "epoch": 0.9109107057457129, "grad_norm": 3.2327072620391846, "learning_rate": 1.4492572216472915e-07, "loss": 0.1881, "step": 13333 }, { "epoch": 0.9109790257566441, "grad_norm": 3.569906711578369, "learning_rate": 1.4470524159081787e-07, "loss": 0.2352, "step": 13334 }, { "epoch": 0.9110473457675753, "grad_norm": 3.0876729488372803, "learning_rate": 1.4448492531679086e-07, "loss": 0.1671, "step": 13335 }, { "epoch": 0.9111156657785066, "grad_norm": 4.782747745513916, "learning_rate": 1.4426477335343657e-07, "loss": 0.3329, "step": 13336 }, { "epoch": 0.9111839857894377, "grad_norm": 3.281075954437256, "learning_rate": 1.4404478571153534e-07, "loss": 0.2577, "step": 13337 }, { "epoch": 0.911252305800369, "grad_norm": 5.825052261352539, "learning_rate": 1.4382496240185893e-07, "loss": 0.2993, "step": 13338 }, { "epoch": 0.9113206258113001, "grad_norm": 3.2183682918548584, "learning_rate": 1.4360530343517136e-07, "loss": 0.2199, "step": 13339 }, { "epoch": 0.9113889458222313, "grad_norm": 3.4771807193756104, "learning_rate": 1.4338580882222923e-07, "loss": 0.2186, "step": 13340 }, { "epoch": 0.9114572658331626, "grad_norm": 5.3486008644104, "learning_rate": 1.431664785737806e-07, "loss": 0.1739, "step": 13341 }, { "epoch": 0.9115255858440937, "grad_norm": 4.25420618057251, "learning_rate": 1.4294731270056505e-07, "loss": 0.3255, "step": 13342 }, { "epoch": 0.911593905855025, "grad_norm": 4.934554576873779, "learning_rate": 1.4272831121331503e-07, "loss": 0.2808, "step": 13343 }, { "epoch": 0.9116622258659561, "grad_norm": 4.43315315246582, "learning_rate": 1.4250947412275415e-07, "loss": 0.2289, "step": 13344 }, { "epoch": 0.9117305458768873, "grad_norm": 6.020153045654297, "learning_rate": 1.4229080143959864e-07, "loss": 0.3052, "step": 13345 }, { "epoch": 0.9117988658878186, "grad_norm": 4.349206447601318, "learning_rate": 1.4207229317455576e-07, "loss": 0.2533, "step": 13346 }, { "epoch": 0.9118671858987497, "grad_norm": 3.8406999111175537, "learning_rate": 1.418539493383254e-07, "loss": 0.1641, "step": 13347 }, { "epoch": 0.911935505909681, "grad_norm": 5.550808906555176, "learning_rate": 1.4163576994159927e-07, "loss": 0.3371, "step": 13348 }, { "epoch": 0.9120038259206121, "grad_norm": 4.467075824737549, "learning_rate": 1.414177549950621e-07, "loss": 0.2553, "step": 13349 }, { "epoch": 0.9120721459315434, "grad_norm": 3.665266752243042, "learning_rate": 1.4119990450938813e-07, "loss": 0.2313, "step": 13350 }, { "epoch": 0.9121404659424746, "grad_norm": 5.081510066986084, "learning_rate": 1.409822184952454e-07, "loss": 0.2378, "step": 13351 }, { "epoch": 0.9122087859534057, "grad_norm": 2.6739604473114014, "learning_rate": 1.4076469696329337e-07, "loss": 0.2445, "step": 13352 }, { "epoch": 0.912277105964337, "grad_norm": 5.172435760498047, "learning_rate": 1.405473399241841e-07, "loss": 0.3573, "step": 13353 }, { "epoch": 0.9123454259752681, "grad_norm": 4.3714728355407715, "learning_rate": 1.4033014738855965e-07, "loss": 0.3288, "step": 13354 }, { "epoch": 0.9124137459861994, "grad_norm": 4.141313552856445, "learning_rate": 1.401131193670565e-07, "loss": 0.2271, "step": 13355 }, { "epoch": 0.9124820659971306, "grad_norm": 3.5405547618865967, "learning_rate": 1.3989625587030152e-07, "loss": 0.2435, "step": 13356 }, { "epoch": 0.9125503860080617, "grad_norm": 2.7211318016052246, "learning_rate": 1.3967955690891492e-07, "loss": 0.1689, "step": 13357 }, { "epoch": 0.912618706018993, "grad_norm": 4.78314733505249, "learning_rate": 1.3946302249350605e-07, "loss": 0.3307, "step": 13358 }, { "epoch": 0.9126870260299241, "grad_norm": 4.64674186706543, "learning_rate": 1.3924665263467883e-07, "loss": 0.3093, "step": 13359 }, { "epoch": 0.9127553460408554, "grad_norm": 4.548959732055664, "learning_rate": 1.39030447343029e-07, "loss": 0.3066, "step": 13360 }, { "epoch": 0.9128236660517866, "grad_norm": 3.736790657043457, "learning_rate": 1.3881440662914257e-07, "loss": 0.141, "step": 13361 }, { "epoch": 0.9128919860627178, "grad_norm": 3.4977550506591797, "learning_rate": 1.38598530503599e-07, "loss": 0.2463, "step": 13362 }, { "epoch": 0.912960306073649, "grad_norm": 5.163126468658447, "learning_rate": 1.3838281897696913e-07, "loss": 0.2773, "step": 13363 }, { "epoch": 0.9130286260845801, "grad_norm": 3.490211248397827, "learning_rate": 1.3816727205981572e-07, "loss": 0.2648, "step": 13364 }, { "epoch": 0.9130969460955114, "grad_norm": 3.1842565536499023, "learning_rate": 1.3795188976269368e-07, "loss": 0.206, "step": 13365 }, { "epoch": 0.9131652661064426, "grad_norm": 9.354742050170898, "learning_rate": 1.3773667209614942e-07, "loss": 0.2974, "step": 13366 }, { "epoch": 0.9132335861173738, "grad_norm": 2.8396928310394287, "learning_rate": 1.375216190707212e-07, "loss": 0.2444, "step": 13367 }, { "epoch": 0.913301906128305, "grad_norm": 3.8029425144195557, "learning_rate": 1.3730673069694061e-07, "loss": 0.2735, "step": 13368 }, { "epoch": 0.9133702261392361, "grad_norm": 3.0503711700439453, "learning_rate": 1.3709200698533036e-07, "loss": 0.241, "step": 13369 }, { "epoch": 0.9134385461501674, "grad_norm": 5.1313066482543945, "learning_rate": 1.3687744794640343e-07, "loss": 0.2634, "step": 13370 }, { "epoch": 0.9135068661610986, "grad_norm": 3.4359724521636963, "learning_rate": 1.3666305359066698e-07, "loss": 0.1605, "step": 13371 }, { "epoch": 0.9135751861720298, "grad_norm": 4.0266194343566895, "learning_rate": 1.364488239286196e-07, "loss": 0.2603, "step": 13372 }, { "epoch": 0.913643506182961, "grad_norm": 3.886345863342285, "learning_rate": 1.3623475897075137e-07, "loss": 0.2191, "step": 13373 }, { "epoch": 0.9137118261938922, "grad_norm": 2.508495569229126, "learning_rate": 1.3602085872754417e-07, "loss": 0.1658, "step": 13374 }, { "epoch": 0.9137801462048234, "grad_norm": 3.648263454437256, "learning_rate": 1.3580712320947245e-07, "loss": 0.2592, "step": 13375 }, { "epoch": 0.9138484662157546, "grad_norm": 4.093700408935547, "learning_rate": 1.355935524270019e-07, "loss": 0.2729, "step": 13376 }, { "epoch": 0.9139167862266858, "grad_norm": 5.513304233551025, "learning_rate": 1.35380146390591e-07, "loss": 0.2549, "step": 13377 }, { "epoch": 0.913985106237617, "grad_norm": 4.432642459869385, "learning_rate": 1.351669051106902e-07, "loss": 0.3266, "step": 13378 }, { "epoch": 0.9140534262485482, "grad_norm": 6.107931613922119, "learning_rate": 1.3495382859773979e-07, "loss": 0.2685, "step": 13379 }, { "epoch": 0.9141217462594794, "grad_norm": 4.411496639251709, "learning_rate": 1.347409168621747e-07, "loss": 0.2636, "step": 13380 }, { "epoch": 0.9141900662704106, "grad_norm": 5.34352445602417, "learning_rate": 1.345281699144197e-07, "loss": 0.2519, "step": 13381 }, { "epoch": 0.9142583862813418, "grad_norm": 4.78695011138916, "learning_rate": 1.3431558776489405e-07, "loss": 0.3054, "step": 13382 }, { "epoch": 0.914326706292273, "grad_norm": 6.062073230743408, "learning_rate": 1.3410317042400627e-07, "loss": 0.3333, "step": 13383 }, { "epoch": 0.9143950263032042, "grad_norm": 3.851896047592163, "learning_rate": 1.3389091790215778e-07, "loss": 0.2361, "step": 13384 }, { "epoch": 0.9144633463141354, "grad_norm": 3.3261914253234863, "learning_rate": 1.336788302097419e-07, "loss": 0.2074, "step": 13385 }, { "epoch": 0.9145316663250667, "grad_norm": 4.115556716918945, "learning_rate": 1.3346690735714535e-07, "loss": 0.3043, "step": 13386 }, { "epoch": 0.9145999863359978, "grad_norm": 5.392540454864502, "learning_rate": 1.3325514935474308e-07, "loss": 0.2534, "step": 13387 }, { "epoch": 0.914668306346929, "grad_norm": 3.192579746246338, "learning_rate": 1.330435562129063e-07, "loss": 0.2319, "step": 13388 }, { "epoch": 0.9147366263578602, "grad_norm": 3.9785315990448, "learning_rate": 1.3283212794199566e-07, "loss": 0.2096, "step": 13389 }, { "epoch": 0.9148049463687914, "grad_norm": 3.0122690200805664, "learning_rate": 1.3262086455236444e-07, "loss": 0.2945, "step": 13390 }, { "epoch": 0.9148732663797227, "grad_norm": 3.6363577842712402, "learning_rate": 1.3240976605435657e-07, "loss": 0.2399, "step": 13391 }, { "epoch": 0.9149415863906538, "grad_norm": 3.5645222663879395, "learning_rate": 1.3219883245831016e-07, "loss": 0.2079, "step": 13392 }, { "epoch": 0.915009906401585, "grad_norm": 5.334932804107666, "learning_rate": 1.3198806377455323e-07, "loss": 0.2298, "step": 13393 }, { "epoch": 0.9150782264125162, "grad_norm": 4.818181037902832, "learning_rate": 1.3177746001340724e-07, "loss": 0.3111, "step": 13394 }, { "epoch": 0.9151465464234474, "grad_norm": 4.43942928314209, "learning_rate": 1.315670211851846e-07, "loss": 0.1876, "step": 13395 }, { "epoch": 0.9152148664343787, "grad_norm": 3.4609792232513428, "learning_rate": 1.3135674730019002e-07, "loss": 0.2152, "step": 13396 }, { "epoch": 0.9152831864453098, "grad_norm": 4.572158336639404, "learning_rate": 1.311466383687201e-07, "loss": 0.3159, "step": 13397 }, { "epoch": 0.9153515064562411, "grad_norm": 4.6796488761901855, "learning_rate": 1.3093669440106314e-07, "loss": 0.2831, "step": 13398 }, { "epoch": 0.9154198264671722, "grad_norm": 3.8458566665649414, "learning_rate": 1.307269154074998e-07, "loss": 0.2384, "step": 13399 }, { "epoch": 0.9154881464781034, "grad_norm": 6.444874286651611, "learning_rate": 1.3051730139830174e-07, "loss": 0.2397, "step": 13400 }, { "epoch": 0.9155564664890347, "grad_norm": 4.554252624511719, "learning_rate": 1.3030785238373366e-07, "loss": 0.2007, "step": 13401 }, { "epoch": 0.9156247864999658, "grad_norm": 4.208062171936035, "learning_rate": 1.3009856837405242e-07, "loss": 0.2268, "step": 13402 }, { "epoch": 0.9156931065108971, "grad_norm": 3.6709024906158447, "learning_rate": 1.2988944937950485e-07, "loss": 0.2546, "step": 13403 }, { "epoch": 0.9157614265218282, "grad_norm": 4.56534481048584, "learning_rate": 1.2968049541033156e-07, "loss": 0.2772, "step": 13404 }, { "epoch": 0.9158297465327594, "grad_norm": 4.158039093017578, "learning_rate": 1.2947170647676454e-07, "loss": 0.2098, "step": 13405 }, { "epoch": 0.9158980665436907, "grad_norm": 3.547670841217041, "learning_rate": 1.2926308258902768e-07, "loss": 0.2881, "step": 13406 }, { "epoch": 0.9159663865546218, "grad_norm": 5.528045177459717, "learning_rate": 1.2905462375733634e-07, "loss": 0.345, "step": 13407 }, { "epoch": 0.9160347065655531, "grad_norm": 4.748624324798584, "learning_rate": 1.2884632999189844e-07, "loss": 0.3762, "step": 13408 }, { "epoch": 0.9161030265764842, "grad_norm": 3.7018373012542725, "learning_rate": 1.2863820130291376e-07, "loss": 0.3294, "step": 13409 }, { "epoch": 0.9161713465874155, "grad_norm": 3.5396177768707275, "learning_rate": 1.2843023770057394e-07, "loss": 0.1832, "step": 13410 }, { "epoch": 0.9162396665983467, "grad_norm": 3.6042697429656982, "learning_rate": 1.2822243919506165e-07, "loss": 0.1872, "step": 13411 }, { "epoch": 0.9163079866092778, "grad_norm": 4.9287567138671875, "learning_rate": 1.28014805796553e-07, "loss": 0.262, "step": 13412 }, { "epoch": 0.9163763066202091, "grad_norm": 4.546202659606934, "learning_rate": 1.2780733751521433e-07, "loss": 0.2674, "step": 13413 }, { "epoch": 0.9164446266311402, "grad_norm": 3.790304660797119, "learning_rate": 1.2760003436120581e-07, "loss": 0.2661, "step": 13414 }, { "epoch": 0.9165129466420715, "grad_norm": 4.666128635406494, "learning_rate": 1.2739289634467788e-07, "loss": 0.2918, "step": 13415 }, { "epoch": 0.9165812666530027, "grad_norm": 3.190450668334961, "learning_rate": 1.2718592347577396e-07, "loss": 0.2406, "step": 13416 }, { "epoch": 0.9166495866639338, "grad_norm": 3.990028142929077, "learning_rate": 1.2697911576462893e-07, "loss": 0.2286, "step": 13417 }, { "epoch": 0.9167179066748651, "grad_norm": 3.595471143722534, "learning_rate": 1.2677247322136959e-07, "loss": 0.1875, "step": 13418 }, { "epoch": 0.9167862266857962, "grad_norm": 3.528536558151245, "learning_rate": 1.2656599585611444e-07, "loss": 0.1922, "step": 13419 }, { "epoch": 0.9168545466967275, "grad_norm": 4.563926696777344, "learning_rate": 1.2635968367897355e-07, "loss": 0.1811, "step": 13420 }, { "epoch": 0.9169228667076587, "grad_norm": 3.3752102851867676, "learning_rate": 1.261535367000507e-07, "loss": 0.2575, "step": 13421 }, { "epoch": 0.9169911867185899, "grad_norm": 2.448258876800537, "learning_rate": 1.2594755492944e-07, "loss": 0.1384, "step": 13422 }, { "epoch": 0.9170595067295211, "grad_norm": 5.061403274536133, "learning_rate": 1.2574173837722773e-07, "loss": 0.2651, "step": 13423 }, { "epoch": 0.9171278267404522, "grad_norm": 5.027768135070801, "learning_rate": 1.2553608705349174e-07, "loss": 0.2167, "step": 13424 }, { "epoch": 0.9171961467513835, "grad_norm": 4.409302711486816, "learning_rate": 1.2533060096830274e-07, "loss": 0.2872, "step": 13425 }, { "epoch": 0.9172644667623147, "grad_norm": 4.5744524002075195, "learning_rate": 1.2512528013172258e-07, "loss": 0.26, "step": 13426 }, { "epoch": 0.9173327867732459, "grad_norm": 4.359010696411133, "learning_rate": 1.2492012455380574e-07, "loss": 0.274, "step": 13427 }, { "epoch": 0.9174011067841771, "grad_norm": 4.159878253936768, "learning_rate": 1.2471513424459736e-07, "loss": 0.2466, "step": 13428 }, { "epoch": 0.9174694267951082, "grad_norm": 4.982776165008545, "learning_rate": 1.2451030921413591e-07, "loss": 0.304, "step": 13429 }, { "epoch": 0.9175377468060395, "grad_norm": 2.950195550918579, "learning_rate": 1.2430564947245142e-07, "loss": 0.1705, "step": 13430 }, { "epoch": 0.9176060668169707, "grad_norm": 3.5037176609039307, "learning_rate": 1.2410115502956453e-07, "loss": 0.2911, "step": 13431 }, { "epoch": 0.9176743868279019, "grad_norm": 4.12448787689209, "learning_rate": 1.2389682589548928e-07, "loss": 0.2641, "step": 13432 }, { "epoch": 0.9177427068388331, "grad_norm": 4.1558356285095215, "learning_rate": 1.2369266208023116e-07, "loss": 0.1743, "step": 13433 }, { "epoch": 0.9178110268497643, "grad_norm": 5.001534461975098, "learning_rate": 1.234886635937879e-07, "loss": 0.308, "step": 13434 }, { "epoch": 0.9178793468606955, "grad_norm": 5.633107662200928, "learning_rate": 1.232848304461479e-07, "loss": 0.3426, "step": 13435 }, { "epoch": 0.9179476668716267, "grad_norm": 3.5375890731811523, "learning_rate": 1.2308116264729296e-07, "loss": 0.3316, "step": 13436 }, { "epoch": 0.9180159868825579, "grad_norm": 2.902060031890869, "learning_rate": 1.228776602071963e-07, "loss": 0.2106, "step": 13437 }, { "epoch": 0.9180843068934891, "grad_norm": 4.111266136169434, "learning_rate": 1.2267432313582267e-07, "loss": 0.3278, "step": 13438 }, { "epoch": 0.9181526269044203, "grad_norm": 2.766077995300293, "learning_rate": 1.2247115144312852e-07, "loss": 0.14, "step": 13439 }, { "epoch": 0.9182209469153515, "grad_norm": 4.204154968261719, "learning_rate": 1.222681451390627e-07, "loss": 0.2229, "step": 13440 }, { "epoch": 0.9182892669262827, "grad_norm": 3.5381124019622803, "learning_rate": 1.2206530423356655e-07, "loss": 0.2216, "step": 13441 }, { "epoch": 0.9183575869372139, "grad_norm": 3.687467336654663, "learning_rate": 1.218626287365721e-07, "loss": 0.233, "step": 13442 }, { "epoch": 0.9184259069481451, "grad_norm": 4.67631721496582, "learning_rate": 1.2166011865800447e-07, "loss": 0.3137, "step": 13443 }, { "epoch": 0.9184942269590763, "grad_norm": 4.23135232925415, "learning_rate": 1.2145777400777934e-07, "loss": 0.251, "step": 13444 }, { "epoch": 0.9185625469700075, "grad_norm": 3.235945463180542, "learning_rate": 1.212555947958051e-07, "loss": 0.2508, "step": 13445 }, { "epoch": 0.9186308669809388, "grad_norm": 4.043428421020508, "learning_rate": 1.2105358103198233e-07, "loss": 0.1919, "step": 13446 }, { "epoch": 0.9186991869918699, "grad_norm": 6.203104496002197, "learning_rate": 1.2085173272620265e-07, "loss": 0.3559, "step": 13447 }, { "epoch": 0.9187675070028011, "grad_norm": 4.20858097076416, "learning_rate": 1.2065004988835032e-07, "loss": 0.2894, "step": 13448 }, { "epoch": 0.9188358270137323, "grad_norm": 4.673062801361084, "learning_rate": 1.2044853252830106e-07, "loss": 0.254, "step": 13449 }, { "epoch": 0.9189041470246635, "grad_norm": 4.097467422485352, "learning_rate": 1.2024718065592243e-07, "loss": 0.2114, "step": 13450 }, { "epoch": 0.9189724670355948, "grad_norm": 4.038395404815674, "learning_rate": 1.2004599428107538e-07, "loss": 0.3205, "step": 13451 }, { "epoch": 0.9190407870465259, "grad_norm": 3.150381088256836, "learning_rate": 1.1984497341360956e-07, "loss": 0.1817, "step": 13452 }, { "epoch": 0.9191091070574571, "grad_norm": 4.529663562774658, "learning_rate": 1.1964411806336922e-07, "loss": 0.2837, "step": 13453 }, { "epoch": 0.9191774270683883, "grad_norm": 3.5459275245666504, "learning_rate": 1.1944342824019005e-07, "loss": 0.2059, "step": 13454 }, { "epoch": 0.9192457470793195, "grad_norm": 4.408910751342773, "learning_rate": 1.1924290395389957e-07, "loss": 0.2979, "step": 13455 }, { "epoch": 0.9193140670902508, "grad_norm": 5.431634902954102, "learning_rate": 1.1904254521431562e-07, "loss": 0.3542, "step": 13456 }, { "epoch": 0.9193823871011819, "grad_norm": 4.2690324783325195, "learning_rate": 1.1884235203125055e-07, "loss": 0.2085, "step": 13457 }, { "epoch": 0.9194507071121132, "grad_norm": 3.9698739051818848, "learning_rate": 1.1864232441450662e-07, "loss": 0.2245, "step": 13458 }, { "epoch": 0.9195190271230443, "grad_norm": 4.16731595993042, "learning_rate": 1.1844246237387873e-07, "loss": 0.247, "step": 13459 }, { "epoch": 0.9195873471339755, "grad_norm": 5.439449787139893, "learning_rate": 1.18242765919154e-07, "loss": 0.3144, "step": 13460 }, { "epoch": 0.9196556671449068, "grad_norm": 3.407601833343506, "learning_rate": 1.1804323506011022e-07, "loss": 0.1408, "step": 13461 }, { "epoch": 0.9197239871558379, "grad_norm": 3.4973936080932617, "learning_rate": 1.1784386980651895e-07, "loss": 0.2041, "step": 13462 }, { "epoch": 0.9197923071667692, "grad_norm": 3.7115297317504883, "learning_rate": 1.1764467016814206e-07, "loss": 0.2146, "step": 13463 }, { "epoch": 0.9198606271777003, "grad_norm": 3.1228983402252197, "learning_rate": 1.1744563615473325e-07, "loss": 0.1654, "step": 13464 }, { "epoch": 0.9199289471886315, "grad_norm": 4.685136795043945, "learning_rate": 1.1724676777603959e-07, "loss": 0.1699, "step": 13465 }, { "epoch": 0.9199972671995628, "grad_norm": 2.8788561820983887, "learning_rate": 1.1704806504179848e-07, "loss": 0.2182, "step": 13466 }, { "epoch": 0.9200655872104939, "grad_norm": 4.189076900482178, "learning_rate": 1.168495279617403e-07, "loss": 0.2078, "step": 13467 }, { "epoch": 0.9201339072214252, "grad_norm": 4.872132301330566, "learning_rate": 1.166511565455865e-07, "loss": 0.2922, "step": 13468 }, { "epoch": 0.9202022272323563, "grad_norm": 5.401614189147949, "learning_rate": 1.1645295080305113e-07, "loss": 0.3406, "step": 13469 }, { "epoch": 0.9202705472432876, "grad_norm": 4.5664873123168945, "learning_rate": 1.1625491074384008e-07, "loss": 0.2055, "step": 13470 }, { "epoch": 0.9203388672542188, "grad_norm": 4.104379177093506, "learning_rate": 1.1605703637765036e-07, "loss": 0.2141, "step": 13471 }, { "epoch": 0.9204071872651499, "grad_norm": 5.624873638153076, "learning_rate": 1.1585932771417112e-07, "loss": 0.3316, "step": 13472 }, { "epoch": 0.9204755072760812, "grad_norm": 5.866299152374268, "learning_rate": 1.1566178476308381e-07, "loss": 0.2953, "step": 13473 }, { "epoch": 0.9205438272870123, "grad_norm": 4.532373905181885, "learning_rate": 1.1546440753406206e-07, "loss": 0.2683, "step": 13474 }, { "epoch": 0.9206121472979436, "grad_norm": 5.76646089553833, "learning_rate": 1.1526719603677099e-07, "loss": 0.2139, "step": 13475 }, { "epoch": 0.9206804673088748, "grad_norm": 4.225186347961426, "learning_rate": 1.1507015028086675e-07, "loss": 0.2404, "step": 13476 }, { "epoch": 0.9207487873198059, "grad_norm": 3.7558610439300537, "learning_rate": 1.1487327027599852e-07, "loss": 0.1959, "step": 13477 }, { "epoch": 0.9208171073307372, "grad_norm": 4.948535442352295, "learning_rate": 1.1467655603180693e-07, "loss": 0.3679, "step": 13478 }, { "epoch": 0.9208854273416683, "grad_norm": 3.511240243911743, "learning_rate": 1.1448000755792482e-07, "loss": 0.2617, "step": 13479 }, { "epoch": 0.9209537473525996, "grad_norm": 4.028995037078857, "learning_rate": 1.1428362486397609e-07, "loss": 0.2753, "step": 13480 }, { "epoch": 0.9210220673635308, "grad_norm": 7.437253475189209, "learning_rate": 1.1408740795957766e-07, "loss": 0.3641, "step": 13481 }, { "epoch": 0.921090387374462, "grad_norm": 4.423912048339844, "learning_rate": 1.1389135685433754e-07, "loss": 0.2655, "step": 13482 }, { "epoch": 0.9211587073853932, "grad_norm": 5.375180244445801, "learning_rate": 1.1369547155785631e-07, "loss": 0.2462, "step": 13483 }, { "epoch": 0.9212270273963243, "grad_norm": 3.2056643962860107, "learning_rate": 1.1349975207972485e-07, "loss": 0.2959, "step": 13484 }, { "epoch": 0.9212953474072556, "grad_norm": 3.919804096221924, "learning_rate": 1.1330419842952782e-07, "loss": 0.2125, "step": 13485 }, { "epoch": 0.9213636674181868, "grad_norm": 4.392906665802002, "learning_rate": 1.1310881061684058e-07, "loss": 0.3878, "step": 13486 }, { "epoch": 0.921431987429118, "grad_norm": 7.213086128234863, "learning_rate": 1.1291358865123108e-07, "loss": 0.1603, "step": 13487 }, { "epoch": 0.9215003074400492, "grad_norm": 3.6092944145202637, "learning_rate": 1.1271853254225872e-07, "loss": 0.2261, "step": 13488 }, { "epoch": 0.9215686274509803, "grad_norm": 4.174281597137451, "learning_rate": 1.1252364229947475e-07, "loss": 0.1705, "step": 13489 }, { "epoch": 0.9216369474619116, "grad_norm": 5.143202781677246, "learning_rate": 1.1232891793242266e-07, "loss": 0.2778, "step": 13490 }, { "epoch": 0.9217052674728428, "grad_norm": 4.06520938873291, "learning_rate": 1.1213435945063776e-07, "loss": 0.2685, "step": 13491 }, { "epoch": 0.921773587483774, "grad_norm": 3.3523550033569336, "learning_rate": 1.1193996686364643e-07, "loss": 0.2859, "step": 13492 }, { "epoch": 0.9218419074947052, "grad_norm": 3.7364754676818848, "learning_rate": 1.1174574018096766e-07, "loss": 0.252, "step": 13493 }, { "epoch": 0.9219102275056364, "grad_norm": 3.1553356647491455, "learning_rate": 1.1155167941211269e-07, "loss": 0.2771, "step": 13494 }, { "epoch": 0.9219785475165676, "grad_norm": 3.520076274871826, "learning_rate": 1.1135778456658418e-07, "loss": 0.2606, "step": 13495 }, { "epoch": 0.9220468675274988, "grad_norm": 3.870875835418701, "learning_rate": 1.1116405565387627e-07, "loss": 0.3117, "step": 13496 }, { "epoch": 0.92211518753843, "grad_norm": 5.4289422035217285, "learning_rate": 1.1097049268347531e-07, "loss": 0.3171, "step": 13497 }, { "epoch": 0.9221835075493612, "grad_norm": 4.411963939666748, "learning_rate": 1.1077709566485988e-07, "loss": 0.2493, "step": 13498 }, { "epoch": 0.9222518275602924, "grad_norm": 3.3234903812408447, "learning_rate": 1.1058386460749964e-07, "loss": 0.2454, "step": 13499 }, { "epoch": 0.9223201475712236, "grad_norm": 3.893963575363159, "learning_rate": 1.1039079952085723e-07, "loss": 0.1893, "step": 13500 }, { "epoch": 0.9223884675821548, "grad_norm": 4.689470291137695, "learning_rate": 1.1019790041438599e-07, "loss": 0.2825, "step": 13501 }, { "epoch": 0.922456787593086, "grad_norm": 3.701362371444702, "learning_rate": 1.1000516729753223e-07, "loss": 0.2414, "step": 13502 }, { "epoch": 0.9225251076040172, "grad_norm": 4.323471546173096, "learning_rate": 1.0981260017973337e-07, "loss": 0.1888, "step": 13503 }, { "epoch": 0.9225934276149484, "grad_norm": 3.989708662033081, "learning_rate": 1.0962019907041865e-07, "loss": 0.2459, "step": 13504 }, { "epoch": 0.9226617476258796, "grad_norm": 3.4719104766845703, "learning_rate": 1.094279639790095e-07, "loss": 0.2054, "step": 13505 }, { "epoch": 0.9227300676368109, "grad_norm": 4.655343532562256, "learning_rate": 1.0923589491491886e-07, "loss": 0.2556, "step": 13506 }, { "epoch": 0.922798387647742, "grad_norm": 3.3543970584869385, "learning_rate": 1.0904399188755265e-07, "loss": 0.249, "step": 13507 }, { "epoch": 0.9228667076586732, "grad_norm": 2.8386452198028564, "learning_rate": 1.0885225490630785e-07, "loss": 0.1856, "step": 13508 }, { "epoch": 0.9229350276696044, "grad_norm": 5.373129367828369, "learning_rate": 1.0866068398057249e-07, "loss": 0.2026, "step": 13509 }, { "epoch": 0.9230033476805356, "grad_norm": 5.801807880401611, "learning_rate": 1.0846927911972764e-07, "loss": 0.2867, "step": 13510 }, { "epoch": 0.9230716676914669, "grad_norm": 6.28096342086792, "learning_rate": 1.0827804033314581e-07, "loss": 0.3762, "step": 13511 }, { "epoch": 0.923139987702398, "grad_norm": 4.502951145172119, "learning_rate": 1.080869676301921e-07, "loss": 0.3566, "step": 13512 }, { "epoch": 0.9232083077133292, "grad_norm": 4.673813343048096, "learning_rate": 1.0789606102022192e-07, "loss": 0.3112, "step": 13513 }, { "epoch": 0.9232766277242604, "grad_norm": 4.590301990509033, "learning_rate": 1.0770532051258407e-07, "loss": 0.2361, "step": 13514 }, { "epoch": 0.9233449477351916, "grad_norm": 3.4672532081604004, "learning_rate": 1.0751474611661844e-07, "loss": 0.2844, "step": 13515 }, { "epoch": 0.9234132677461229, "grad_norm": 4.543034076690674, "learning_rate": 1.0732433784165707e-07, "loss": 0.2578, "step": 13516 }, { "epoch": 0.923481587757054, "grad_norm": 5.332230091094971, "learning_rate": 1.0713409569702353e-07, "loss": 0.3426, "step": 13517 }, { "epoch": 0.9235499077679853, "grad_norm": 5.497230529785156, "learning_rate": 1.0694401969203321e-07, "loss": 0.421, "step": 13518 }, { "epoch": 0.9236182277789164, "grad_norm": 4.35994291305542, "learning_rate": 1.0675410983599409e-07, "loss": 0.2283, "step": 13519 }, { "epoch": 0.9236865477898476, "grad_norm": 3.000537395477295, "learning_rate": 1.0656436613820563e-07, "loss": 0.2171, "step": 13520 }, { "epoch": 0.9237548678007789, "grad_norm": 3.9432334899902344, "learning_rate": 1.0637478860795874e-07, "loss": 0.2718, "step": 13521 }, { "epoch": 0.92382318781171, "grad_norm": 3.0596907138824463, "learning_rate": 1.0618537725453652e-07, "loss": 0.1349, "step": 13522 }, { "epoch": 0.9238915078226413, "grad_norm": 3.004180431365967, "learning_rate": 1.0599613208721437e-07, "loss": 0.2128, "step": 13523 }, { "epoch": 0.9239598278335724, "grad_norm": 5.107791900634766, "learning_rate": 1.0580705311525906e-07, "loss": 0.2731, "step": 13524 }, { "epoch": 0.9240281478445036, "grad_norm": 5.271121025085449, "learning_rate": 1.0561814034792887e-07, "loss": 0.2311, "step": 13525 }, { "epoch": 0.9240964678554349, "grad_norm": 3.4121832847595215, "learning_rate": 1.0542939379447391e-07, "loss": 0.2929, "step": 13526 }, { "epoch": 0.924164787866366, "grad_norm": 4.451507568359375, "learning_rate": 1.0524081346413766e-07, "loss": 0.371, "step": 13527 }, { "epoch": 0.9242331078772973, "grad_norm": 3.3117287158966064, "learning_rate": 1.0505239936615429e-07, "loss": 0.2, "step": 13528 }, { "epoch": 0.9243014278882284, "grad_norm": 3.5185039043426514, "learning_rate": 1.0486415150974943e-07, "loss": 0.2543, "step": 13529 }, { "epoch": 0.9243697478991597, "grad_norm": 4.4599785804748535, "learning_rate": 1.046760699041413e-07, "loss": 0.3221, "step": 13530 }, { "epoch": 0.9244380679100909, "grad_norm": 3.198625326156616, "learning_rate": 1.044881545585396e-07, "loss": 0.1583, "step": 13531 }, { "epoch": 0.924506387921022, "grad_norm": 4.460411548614502, "learning_rate": 1.0430040548214626e-07, "loss": 0.2823, "step": 13532 }, { "epoch": 0.9245747079319533, "grad_norm": 4.702836036682129, "learning_rate": 1.0411282268415462e-07, "loss": 0.2858, "step": 13533 }, { "epoch": 0.9246430279428844, "grad_norm": 4.080735683441162, "learning_rate": 1.0392540617375028e-07, "loss": 0.197, "step": 13534 }, { "epoch": 0.9247113479538157, "grad_norm": 5.38531494140625, "learning_rate": 1.0373815596011071e-07, "loss": 0.2778, "step": 13535 }, { "epoch": 0.9247796679647469, "grad_norm": 3.982957124710083, "learning_rate": 1.0355107205240516e-07, "loss": 0.2736, "step": 13536 }, { "epoch": 0.924847987975678, "grad_norm": 4.58801794052124, "learning_rate": 1.0336415445979397e-07, "loss": 0.2561, "step": 13537 }, { "epoch": 0.9249163079866093, "grad_norm": 2.9268524646759033, "learning_rate": 1.031774031914301e-07, "loss": 0.1648, "step": 13538 }, { "epoch": 0.9249846279975404, "grad_norm": 2.473217725753784, "learning_rate": 1.0299081825645872e-07, "loss": 0.1848, "step": 13539 }, { "epoch": 0.9250529480084717, "grad_norm": 2.9745075702667236, "learning_rate": 1.028043996640165e-07, "loss": 0.1468, "step": 13540 }, { "epoch": 0.9251212680194029, "grad_norm": 4.089197635650635, "learning_rate": 1.026181474232311e-07, "loss": 0.2042, "step": 13541 }, { "epoch": 0.9251895880303341, "grad_norm": 4.335411071777344, "learning_rate": 1.0243206154322365e-07, "loss": 0.2415, "step": 13542 }, { "epoch": 0.9252579080412653, "grad_norm": 5.224900722503662, "learning_rate": 1.022461420331055e-07, "loss": 0.2687, "step": 13543 }, { "epoch": 0.9253262280521964, "grad_norm": 3.5247950553894043, "learning_rate": 1.0206038890198182e-07, "loss": 0.2942, "step": 13544 }, { "epoch": 0.9253945480631277, "grad_norm": 4.241222858428955, "learning_rate": 1.0187480215894651e-07, "loss": 0.1482, "step": 13545 }, { "epoch": 0.9254628680740589, "grad_norm": 3.7732062339782715, "learning_rate": 1.0168938181308917e-07, "loss": 0.3068, "step": 13546 }, { "epoch": 0.9255311880849901, "grad_norm": 5.625832557678223, "learning_rate": 1.0150412787348817e-07, "loss": 0.276, "step": 13547 }, { "epoch": 0.9255995080959213, "grad_norm": 3.9666476249694824, "learning_rate": 1.0131904034921601e-07, "loss": 0.2054, "step": 13548 }, { "epoch": 0.9256678281068524, "grad_norm": 4.510969161987305, "learning_rate": 1.0113411924933474e-07, "loss": 0.2549, "step": 13549 }, { "epoch": 0.9257361481177837, "grad_norm": 5.2135396003723145, "learning_rate": 1.0094936458290016e-07, "loss": 0.2499, "step": 13550 }, { "epoch": 0.9258044681287149, "grad_norm": 4.08479642868042, "learning_rate": 1.0076477635895876e-07, "loss": 0.1907, "step": 13551 }, { "epoch": 0.9258727881396461, "grad_norm": 5.104111671447754, "learning_rate": 1.0058035458654962e-07, "loss": 0.3436, "step": 13552 }, { "epoch": 0.9259411081505773, "grad_norm": 3.9177517890930176, "learning_rate": 1.0039609927470371e-07, "loss": 0.2353, "step": 13553 }, { "epoch": 0.9260094281615086, "grad_norm": 3.287473201751709, "learning_rate": 1.0021201043244266e-07, "loss": 0.2701, "step": 13554 }, { "epoch": 0.9260777481724397, "grad_norm": 4.974564075469971, "learning_rate": 1.0002808806878183e-07, "loss": 0.3356, "step": 13555 }, { "epoch": 0.9261460681833709, "grad_norm": 3.936899423599243, "learning_rate": 9.984433219272692e-08, "loss": 0.2951, "step": 13556 }, { "epoch": 0.9262143881943021, "grad_norm": 4.890305042266846, "learning_rate": 9.966074281327547e-08, "loss": 0.2442, "step": 13557 }, { "epoch": 0.9262827082052333, "grad_norm": 4.926356792449951, "learning_rate": 9.9477319939418e-08, "loss": 0.2164, "step": 13558 }, { "epoch": 0.9263510282161646, "grad_norm": 3.7134432792663574, "learning_rate": 9.92940635801357e-08, "loss": 0.2328, "step": 13559 }, { "epoch": 0.9264193482270957, "grad_norm": 8.210877418518066, "learning_rate": 9.911097374440358e-08, "loss": 0.1918, "step": 13560 }, { "epoch": 0.926487668238027, "grad_norm": 3.821274518966675, "learning_rate": 9.892805044118535e-08, "loss": 0.2765, "step": 13561 }, { "epoch": 0.9265559882489581, "grad_norm": 5.027040958404541, "learning_rate": 9.874529367943891e-08, "loss": 0.2453, "step": 13562 }, { "epoch": 0.9266243082598893, "grad_norm": 3.9116854667663574, "learning_rate": 9.856270346811358e-08, "loss": 0.2289, "step": 13563 }, { "epoch": 0.9266926282708206, "grad_norm": 3.8040506839752197, "learning_rate": 9.838027981615055e-08, "loss": 0.2917, "step": 13564 }, { "epoch": 0.9267609482817517, "grad_norm": 2.714801073074341, "learning_rate": 9.819802273248129e-08, "loss": 0.1739, "step": 13565 }, { "epoch": 0.926829268292683, "grad_norm": 3.3844356536865234, "learning_rate": 9.801593222603183e-08, "loss": 0.2398, "step": 13566 }, { "epoch": 0.9268975883036141, "grad_norm": 4.1307268142700195, "learning_rate": 9.783400830571808e-08, "loss": 0.2791, "step": 13567 }, { "epoch": 0.9269659083145453, "grad_norm": 2.807480812072754, "learning_rate": 9.765225098044899e-08, "loss": 0.1658, "step": 13568 }, { "epoch": 0.9270342283254766, "grad_norm": 3.4279563426971436, "learning_rate": 9.747066025912376e-08, "loss": 0.2935, "step": 13569 }, { "epoch": 0.9271025483364077, "grad_norm": 3.353499412536621, "learning_rate": 9.7289236150635e-08, "loss": 0.3087, "step": 13570 }, { "epoch": 0.927170868347339, "grad_norm": 3.971342086791992, "learning_rate": 9.710797866386678e-08, "loss": 0.2578, "step": 13571 }, { "epoch": 0.9272391883582701, "grad_norm": 4.047178745269775, "learning_rate": 9.692688780769382e-08, "loss": 0.2872, "step": 13572 }, { "epoch": 0.9273075083692014, "grad_norm": 3.6469902992248535, "learning_rate": 9.674596359098464e-08, "loss": 0.2507, "step": 13573 }, { "epoch": 0.9273758283801325, "grad_norm": 4.475594997406006, "learning_rate": 9.656520602259844e-08, "loss": 0.2105, "step": 13574 }, { "epoch": 0.9274441483910637, "grad_norm": 4.089027404785156, "learning_rate": 9.638461511138624e-08, "loss": 0.2277, "step": 13575 }, { "epoch": 0.927512468401995, "grad_norm": 4.338169574737549, "learning_rate": 9.620419086619092e-08, "loss": 0.289, "step": 13576 }, { "epoch": 0.9275807884129261, "grad_norm": 3.499617338180542, "learning_rate": 9.602393329584835e-08, "loss": 0.2411, "step": 13577 }, { "epoch": 0.9276491084238574, "grad_norm": 3.5307505130767822, "learning_rate": 9.584384240918393e-08, "loss": 0.2662, "step": 13578 }, { "epoch": 0.9277174284347885, "grad_norm": 5.6240434646606445, "learning_rate": 9.566391821501641e-08, "loss": 0.3287, "step": 13579 }, { "epoch": 0.9277857484457197, "grad_norm": 4.02776575088501, "learning_rate": 9.548416072215682e-08, "loss": 0.2944, "step": 13580 }, { "epoch": 0.927854068456651, "grad_norm": 4.812607288360596, "learning_rate": 9.530456993940761e-08, "loss": 0.255, "step": 13581 }, { "epoch": 0.9279223884675821, "grad_norm": 4.705231666564941, "learning_rate": 9.512514587556231e-08, "loss": 0.2349, "step": 13582 }, { "epoch": 0.9279907084785134, "grad_norm": 4.4278106689453125, "learning_rate": 9.494588853940706e-08, "loss": 0.3844, "step": 13583 }, { "epoch": 0.9280590284894445, "grad_norm": 4.085874080657959, "learning_rate": 9.476679793971943e-08, "loss": 0.2548, "step": 13584 }, { "epoch": 0.9281273485003758, "grad_norm": 4.02828311920166, "learning_rate": 9.458787408526925e-08, "loss": 0.2328, "step": 13585 }, { "epoch": 0.928195668511307, "grad_norm": 4.054621696472168, "learning_rate": 9.440911698481741e-08, "loss": 0.2189, "step": 13586 }, { "epoch": 0.9282639885222381, "grad_norm": 3.4333977699279785, "learning_rate": 9.423052664711779e-08, "loss": 0.2545, "step": 13587 }, { "epoch": 0.9283323085331694, "grad_norm": 4.351890563964844, "learning_rate": 9.405210308091533e-08, "loss": 0.2415, "step": 13588 }, { "epoch": 0.9284006285441005, "grad_norm": 4.419431209564209, "learning_rate": 9.387384629494722e-08, "loss": 0.2686, "step": 13589 }, { "epoch": 0.9284689485550318, "grad_norm": 3.9047188758850098, "learning_rate": 9.369575629794169e-08, "loss": 0.2852, "step": 13590 }, { "epoch": 0.928537268565963, "grad_norm": 4.4380340576171875, "learning_rate": 9.35178330986196e-08, "loss": 0.3242, "step": 13591 }, { "epoch": 0.9286055885768941, "grad_norm": 4.988393783569336, "learning_rate": 9.334007670569289e-08, "loss": 0.3413, "step": 13592 }, { "epoch": 0.9286739085878254, "grad_norm": 5.0896759033203125, "learning_rate": 9.316248712786684e-08, "loss": 0.2508, "step": 13593 }, { "epoch": 0.9287422285987565, "grad_norm": 4.254123210906982, "learning_rate": 9.298506437383668e-08, "loss": 0.2523, "step": 13594 }, { "epoch": 0.9288105486096878, "grad_norm": 4.089828014373779, "learning_rate": 9.280780845229064e-08, "loss": 0.2513, "step": 13595 }, { "epoch": 0.928878868620619, "grad_norm": 3.2765414714813232, "learning_rate": 9.263071937190836e-08, "loss": 0.1888, "step": 13596 }, { "epoch": 0.9289471886315502, "grad_norm": 5.43747615814209, "learning_rate": 9.245379714136176e-08, "loss": 0.2456, "step": 13597 }, { "epoch": 0.9290155086424814, "grad_norm": 4.252595901489258, "learning_rate": 9.227704176931339e-08, "loss": 0.3776, "step": 13598 }, { "epoch": 0.9290838286534125, "grad_norm": 5.858763694763184, "learning_rate": 9.210045326441962e-08, "loss": 0.2607, "step": 13599 }, { "epoch": 0.9291521486643438, "grad_norm": 5.472280502319336, "learning_rate": 9.192403163532708e-08, "loss": 0.2232, "step": 13600 }, { "epoch": 0.929220468675275, "grad_norm": 2.9456965923309326, "learning_rate": 9.174777689067464e-08, "loss": 0.1605, "step": 13601 }, { "epoch": 0.9292887886862062, "grad_norm": 2.8347320556640625, "learning_rate": 9.157168903909263e-08, "loss": 0.1285, "step": 13602 }, { "epoch": 0.9293571086971374, "grad_norm": 5.192698955535889, "learning_rate": 9.139576808920396e-08, "loss": 0.3452, "step": 13603 }, { "epoch": 0.9294254287080685, "grad_norm": 4.171304225921631, "learning_rate": 9.122001404962343e-08, "loss": 0.2604, "step": 13604 }, { "epoch": 0.9294937487189998, "grad_norm": 3.882573366165161, "learning_rate": 9.104442692895648e-08, "loss": 0.1561, "step": 13605 }, { "epoch": 0.929562068729931, "grad_norm": 3.2690248489379883, "learning_rate": 9.086900673580156e-08, "loss": 0.1674, "step": 13606 }, { "epoch": 0.9296303887408622, "grad_norm": 3.8749091625213623, "learning_rate": 9.069375347874819e-08, "loss": 0.2228, "step": 13607 }, { "epoch": 0.9296987087517934, "grad_norm": 5.763411521911621, "learning_rate": 9.051866716637851e-08, "loss": 0.3065, "step": 13608 }, { "epoch": 0.9297670287627247, "grad_norm": 2.8998372554779053, "learning_rate": 9.034374780726611e-08, "loss": 0.2018, "step": 13609 }, { "epoch": 0.9298353487736558, "grad_norm": 4.407770156860352, "learning_rate": 9.016899540997603e-08, "loss": 0.2788, "step": 13610 }, { "epoch": 0.929903668784587, "grad_norm": 5.098852157592773, "learning_rate": 8.999440998306512e-08, "loss": 0.2759, "step": 13611 }, { "epoch": 0.9299719887955182, "grad_norm": 4.132100582122803, "learning_rate": 8.981999153508252e-08, "loss": 0.256, "step": 13612 }, { "epoch": 0.9300403088064494, "grad_norm": 3.0340209007263184, "learning_rate": 8.964574007456994e-08, "loss": 0.1564, "step": 13613 }, { "epoch": 0.9301086288173807, "grad_norm": 4.866656303405762, "learning_rate": 8.947165561005899e-08, "loss": 0.2083, "step": 13614 }, { "epoch": 0.9301769488283118, "grad_norm": 4.159021854400635, "learning_rate": 8.929773815007469e-08, "loss": 0.242, "step": 13615 }, { "epoch": 0.930245268839243, "grad_norm": 3.788419485092163, "learning_rate": 8.912398770313273e-08, "loss": 0.2346, "step": 13616 }, { "epoch": 0.9303135888501742, "grad_norm": 4.555079460144043, "learning_rate": 8.895040427774181e-08, "loss": 0.2557, "step": 13617 }, { "epoch": 0.9303819088611054, "grad_norm": 3.754857301712036, "learning_rate": 8.877698788240091e-08, "loss": 0.2865, "step": 13618 }, { "epoch": 0.9304502288720367, "grad_norm": 5.806417465209961, "learning_rate": 8.860373852560315e-08, "loss": 0.3179, "step": 13619 }, { "epoch": 0.9305185488829678, "grad_norm": 4.639980792999268, "learning_rate": 8.843065621583085e-08, "loss": 0.2831, "step": 13620 }, { "epoch": 0.9305868688938991, "grad_norm": 3.1075000762939453, "learning_rate": 8.82577409615608e-08, "loss": 0.2136, "step": 13621 }, { "epoch": 0.9306551889048302, "grad_norm": 6.250763416290283, "learning_rate": 8.808499277125858e-08, "loss": 0.3854, "step": 13622 }, { "epoch": 0.9307235089157614, "grad_norm": 5.858055591583252, "learning_rate": 8.791241165338431e-08, "loss": 0.3204, "step": 13623 }, { "epoch": 0.9307918289266927, "grad_norm": 3.2011189460754395, "learning_rate": 8.773999761638801e-08, "loss": 0.1873, "step": 13624 }, { "epoch": 0.9308601489376238, "grad_norm": 4.537455081939697, "learning_rate": 8.756775066871308e-08, "loss": 0.2158, "step": 13625 }, { "epoch": 0.9309284689485551, "grad_norm": 4.552728652954102, "learning_rate": 8.739567081879401e-08, "loss": 0.2257, "step": 13626 }, { "epoch": 0.9309967889594862, "grad_norm": 3.652400016784668, "learning_rate": 8.722375807505633e-08, "loss": 0.2466, "step": 13627 }, { "epoch": 0.9310651089704174, "grad_norm": 4.479336738586426, "learning_rate": 8.705201244591898e-08, "loss": 0.2416, "step": 13628 }, { "epoch": 0.9311334289813487, "grad_norm": 4.80872106552124, "learning_rate": 8.688043393979156e-08, "loss": 0.3029, "step": 13629 }, { "epoch": 0.9312017489922798, "grad_norm": 3.858046770095825, "learning_rate": 8.67090225650759e-08, "loss": 0.2804, "step": 13630 }, { "epoch": 0.9312700690032111, "grad_norm": 4.066470146179199, "learning_rate": 8.65377783301649e-08, "loss": 0.3235, "step": 13631 }, { "epoch": 0.9313383890141422, "grad_norm": 5.212569713592529, "learning_rate": 8.636670124344448e-08, "loss": 0.3492, "step": 13632 }, { "epoch": 0.9314067090250735, "grad_norm": 2.669768810272217, "learning_rate": 8.619579131329236e-08, "loss": 0.1019, "step": 13633 }, { "epoch": 0.9314750290360047, "grad_norm": 3.270688772201538, "learning_rate": 8.602504854807696e-08, "loss": 0.2034, "step": 13634 }, { "epoch": 0.9315433490469358, "grad_norm": 4.873747825622559, "learning_rate": 8.585447295615894e-08, "loss": 0.2015, "step": 13635 }, { "epoch": 0.9316116690578671, "grad_norm": 4.256372451782227, "learning_rate": 8.568406454589117e-08, "loss": 0.354, "step": 13636 }, { "epoch": 0.9316799890687982, "grad_norm": 4.845798492431641, "learning_rate": 8.551382332561835e-08, "loss": 0.2273, "step": 13637 }, { "epoch": 0.9317483090797295, "grad_norm": 3.3787131309509277, "learning_rate": 8.534374930367589e-08, "loss": 0.2739, "step": 13638 }, { "epoch": 0.9318166290906607, "grad_norm": 3.2034714221954346, "learning_rate": 8.517384248839294e-08, "loss": 0.1774, "step": 13639 }, { "epoch": 0.9318849491015918, "grad_norm": 5.122707366943359, "learning_rate": 8.500410288808896e-08, "loss": 0.3235, "step": 13640 }, { "epoch": 0.9319532691125231, "grad_norm": 3.2129437923431396, "learning_rate": 8.483453051107526e-08, "loss": 0.246, "step": 13641 }, { "epoch": 0.9320215891234542, "grad_norm": 4.439233303070068, "learning_rate": 8.466512536565612e-08, "loss": 0.2215, "step": 13642 }, { "epoch": 0.9320899091343855, "grad_norm": 2.5717275142669678, "learning_rate": 8.449588746012616e-08, "loss": 0.2227, "step": 13643 }, { "epoch": 0.9321582291453167, "grad_norm": 3.7289466857910156, "learning_rate": 8.432681680277254e-08, "loss": 0.1899, "step": 13644 }, { "epoch": 0.9322265491562479, "grad_norm": 3.090893030166626, "learning_rate": 8.415791340187434e-08, "loss": 0.192, "step": 13645 }, { "epoch": 0.9322948691671791, "grad_norm": 6.0919189453125, "learning_rate": 8.39891772657032e-08, "loss": 0.3072, "step": 13646 }, { "epoch": 0.9323631891781102, "grad_norm": 3.628361463546753, "learning_rate": 8.382060840252031e-08, "loss": 0.2904, "step": 13647 }, { "epoch": 0.9324315091890415, "grad_norm": 3.126990556716919, "learning_rate": 8.36522068205806e-08, "loss": 0.1958, "step": 13648 }, { "epoch": 0.9324998291999727, "grad_norm": 3.702486515045166, "learning_rate": 8.348397252813011e-08, "loss": 0.24, "step": 13649 }, { "epoch": 0.9325681492109039, "grad_norm": 4.337964057922363, "learning_rate": 8.331590553340745e-08, "loss": 0.2957, "step": 13650 }, { "epoch": 0.9326364692218351, "grad_norm": 4.741244316101074, "learning_rate": 8.314800584464155e-08, "loss": 0.2425, "step": 13651 }, { "epoch": 0.9327047892327662, "grad_norm": 3.9890329837799072, "learning_rate": 8.298027347005432e-08, "loss": 0.2093, "step": 13652 }, { "epoch": 0.9327731092436975, "grad_norm": 3.6166505813598633, "learning_rate": 8.281270841785954e-08, "loss": 0.2099, "step": 13653 }, { "epoch": 0.9328414292546287, "grad_norm": 6.119770050048828, "learning_rate": 8.26453106962624e-08, "loss": 0.3146, "step": 13654 }, { "epoch": 0.9329097492655599, "grad_norm": 4.582235813140869, "learning_rate": 8.247808031345921e-08, "loss": 0.3018, "step": 13655 }, { "epoch": 0.9329780692764911, "grad_norm": 3.4237051010131836, "learning_rate": 8.231101727763923e-08, "loss": 0.2149, "step": 13656 }, { "epoch": 0.9330463892874223, "grad_norm": 3.9505021572113037, "learning_rate": 8.214412159698359e-08, "loss": 0.1964, "step": 13657 }, { "epoch": 0.9331147092983535, "grad_norm": 3.4716663360595703, "learning_rate": 8.197739327966369e-08, "loss": 0.1849, "step": 13658 }, { "epoch": 0.9331830293092847, "grad_norm": 4.623603820800781, "learning_rate": 8.181083233384473e-08, "loss": 0.3407, "step": 13659 }, { "epoch": 0.9332513493202159, "grad_norm": 3.1283085346221924, "learning_rate": 8.164443876768219e-08, "loss": 0.2276, "step": 13660 }, { "epoch": 0.9333196693311471, "grad_norm": 4.593173503875732, "learning_rate": 8.147821258932375e-08, "loss": 0.3238, "step": 13661 }, { "epoch": 0.9333879893420783, "grad_norm": 3.8381357192993164, "learning_rate": 8.131215380691014e-08, "loss": 0.2275, "step": 13662 }, { "epoch": 0.9334563093530095, "grad_norm": 3.514411211013794, "learning_rate": 8.11462624285716e-08, "loss": 0.167, "step": 13663 }, { "epoch": 0.9335246293639407, "grad_norm": 5.853823184967041, "learning_rate": 8.098053846243209e-08, "loss": 0.3817, "step": 13664 }, { "epoch": 0.9335929493748719, "grad_norm": 3.6038410663604736, "learning_rate": 8.081498191660591e-08, "loss": 0.2688, "step": 13665 }, { "epoch": 0.9336612693858031, "grad_norm": 2.8686790466308594, "learning_rate": 8.064959279920153e-08, "loss": 0.1594, "step": 13666 }, { "epoch": 0.9337295893967343, "grad_norm": 3.4990651607513428, "learning_rate": 8.048437111831575e-08, "loss": 0.2442, "step": 13667 }, { "epoch": 0.9337979094076655, "grad_norm": 4.631331920623779, "learning_rate": 8.031931688203992e-08, "loss": 0.2445, "step": 13668 }, { "epoch": 0.9338662294185968, "grad_norm": 4.035116195678711, "learning_rate": 8.015443009845647e-08, "loss": 0.2729, "step": 13669 }, { "epoch": 0.9339345494295279, "grad_norm": 3.9655141830444336, "learning_rate": 7.998971077563965e-08, "loss": 0.1793, "step": 13670 }, { "epoch": 0.9340028694404591, "grad_norm": 5.497297286987305, "learning_rate": 7.982515892165404e-08, "loss": 0.257, "step": 13671 }, { "epoch": 0.9340711894513903, "grad_norm": 4.6061506271362305, "learning_rate": 7.966077454455833e-08, "loss": 0.26, "step": 13672 }, { "epoch": 0.9341395094623215, "grad_norm": 2.6435718536376953, "learning_rate": 7.949655765240232e-08, "loss": 0.1986, "step": 13673 }, { "epoch": 0.9342078294732528, "grad_norm": 6.1918463706970215, "learning_rate": 7.933250825322685e-08, "loss": 0.3308, "step": 13674 }, { "epoch": 0.9342761494841839, "grad_norm": 3.5805108547210693, "learning_rate": 7.916862635506461e-08, "loss": 0.2138, "step": 13675 }, { "epoch": 0.9343444694951151, "grad_norm": 3.9363174438476562, "learning_rate": 7.90049119659405e-08, "loss": 0.2673, "step": 13676 }, { "epoch": 0.9344127895060463, "grad_norm": 3.054147481918335, "learning_rate": 7.884136509387168e-08, "loss": 0.2731, "step": 13677 }, { "epoch": 0.9344811095169775, "grad_norm": 3.8562734127044678, "learning_rate": 7.867798574686675e-08, "loss": 0.3892, "step": 13678 }, { "epoch": 0.9345494295279088, "grad_norm": 2.641554355621338, "learning_rate": 7.851477393292533e-08, "loss": 0.2026, "step": 13679 }, { "epoch": 0.9346177495388399, "grad_norm": 5.947962284088135, "learning_rate": 7.835172966003973e-08, "loss": 0.2427, "step": 13680 }, { "epoch": 0.9346860695497712, "grad_norm": 5.1549787521362305, "learning_rate": 7.818885293619365e-08, "loss": 0.2139, "step": 13681 }, { "epoch": 0.9347543895607023, "grad_norm": 3.743774652481079, "learning_rate": 7.802614376936346e-08, "loss": 0.3237, "step": 13682 }, { "epoch": 0.9348227095716335, "grad_norm": 4.490734100341797, "learning_rate": 7.786360216751536e-08, "loss": 0.19, "step": 13683 }, { "epoch": 0.9348910295825648, "grad_norm": 3.7309978008270264, "learning_rate": 7.77012281386094e-08, "loss": 0.2291, "step": 13684 }, { "epoch": 0.9349593495934959, "grad_norm": 3.1936962604522705, "learning_rate": 7.753902169059629e-08, "loss": 0.2291, "step": 13685 }, { "epoch": 0.9350276696044272, "grad_norm": 4.8548665046691895, "learning_rate": 7.737698283141969e-08, "loss": 0.2892, "step": 13686 }, { "epoch": 0.9350959896153583, "grad_norm": 6.334661483764648, "learning_rate": 7.721511156901284e-08, "loss": 0.3153, "step": 13687 }, { "epoch": 0.9351643096262895, "grad_norm": 4.449168682098389, "learning_rate": 7.705340791130311e-08, "loss": 0.1836, "step": 13688 }, { "epoch": 0.9352326296372208, "grad_norm": 3.605421781539917, "learning_rate": 7.689187186620855e-08, "loss": 0.2691, "step": 13689 }, { "epoch": 0.9353009496481519, "grad_norm": 5.357752323150635, "learning_rate": 7.673050344163906e-08, "loss": 0.1965, "step": 13690 }, { "epoch": 0.9353692696590832, "grad_norm": 4.578482151031494, "learning_rate": 7.656930264549599e-08, "loss": 0.2434, "step": 13691 }, { "epoch": 0.9354375896700143, "grad_norm": 4.425868988037109, "learning_rate": 7.64082694856737e-08, "loss": 0.3221, "step": 13692 }, { "epoch": 0.9355059096809456, "grad_norm": 2.8830132484436035, "learning_rate": 7.624740397005681e-08, "loss": 0.2361, "step": 13693 }, { "epoch": 0.9355742296918768, "grad_norm": 3.2496254444122314, "learning_rate": 7.608670610652334e-08, "loss": 0.1719, "step": 13694 }, { "epoch": 0.9356425497028079, "grad_norm": 4.424688816070557, "learning_rate": 7.592617590294165e-08, "loss": 0.2776, "step": 13695 }, { "epoch": 0.9357108697137392, "grad_norm": 4.06417179107666, "learning_rate": 7.576581336717225e-08, "loss": 0.1817, "step": 13696 }, { "epoch": 0.9357791897246703, "grad_norm": 3.6498594284057617, "learning_rate": 7.560561850706832e-08, "loss": 0.1817, "step": 13697 }, { "epoch": 0.9358475097356016, "grad_norm": 3.020237684249878, "learning_rate": 7.544559133047369e-08, "loss": 0.1956, "step": 13698 }, { "epoch": 0.9359158297465328, "grad_norm": 3.2453815937042236, "learning_rate": 7.52857318452244e-08, "loss": 0.232, "step": 13699 }, { "epoch": 0.9359841497574639, "grad_norm": 5.075618743896484, "learning_rate": 7.512604005914881e-08, "loss": 0.2937, "step": 13700 }, { "epoch": 0.9360524697683952, "grad_norm": 4.925901412963867, "learning_rate": 7.496651598006621e-08, "loss": 0.2359, "step": 13701 }, { "epoch": 0.9361207897793263, "grad_norm": 3.745889663696289, "learning_rate": 7.48071596157886e-08, "loss": 0.2708, "step": 13702 }, { "epoch": 0.9361891097902576, "grad_norm": 4.905290603637695, "learning_rate": 7.464797097411863e-08, "loss": 0.2027, "step": 13703 }, { "epoch": 0.9362574298011888, "grad_norm": 3.6467161178588867, "learning_rate": 7.448895006285117e-08, "loss": 0.2382, "step": 13704 }, { "epoch": 0.93632574981212, "grad_norm": 3.6900553703308105, "learning_rate": 7.433009688977371e-08, "loss": 0.3361, "step": 13705 }, { "epoch": 0.9363940698230512, "grad_norm": 3.8742775917053223, "learning_rate": 7.417141146266482e-08, "loss": 0.2185, "step": 13706 }, { "epoch": 0.9364623898339823, "grad_norm": 3.4436068534851074, "learning_rate": 7.401289378929488e-08, "loss": 0.2427, "step": 13707 }, { "epoch": 0.9365307098449136, "grad_norm": 3.070215940475464, "learning_rate": 7.385454387742574e-08, "loss": 0.2264, "step": 13708 }, { "epoch": 0.9365990298558448, "grad_norm": 4.108850955963135, "learning_rate": 7.369636173481147e-08, "loss": 0.2758, "step": 13709 }, { "epoch": 0.936667349866776, "grad_norm": 3.8004543781280518, "learning_rate": 7.353834736919762e-08, "loss": 0.3199, "step": 13710 }, { "epoch": 0.9367356698777072, "grad_norm": 3.573495388031006, "learning_rate": 7.338050078832231e-08, "loss": 0.255, "step": 13711 }, { "epoch": 0.9368039898886383, "grad_norm": 2.983154058456421, "learning_rate": 7.322282199991437e-08, "loss": 0.1787, "step": 13712 }, { "epoch": 0.9368723098995696, "grad_norm": 4.226447582244873, "learning_rate": 7.306531101169522e-08, "loss": 0.1968, "step": 13713 }, { "epoch": 0.9369406299105008, "grad_norm": 3.839744806289673, "learning_rate": 7.290796783137776e-08, "loss": 0.3393, "step": 13714 }, { "epoch": 0.937008949921432, "grad_norm": 3.4250776767730713, "learning_rate": 7.275079246666671e-08, "loss": 0.2854, "step": 13715 }, { "epoch": 0.9370772699323632, "grad_norm": 2.675807237625122, "learning_rate": 7.259378492525786e-08, "loss": 0.1818, "step": 13716 }, { "epoch": 0.9371455899432944, "grad_norm": 4.585783004760742, "learning_rate": 7.243694521484039e-08, "loss": 0.2002, "step": 13717 }, { "epoch": 0.9372139099542256, "grad_norm": 4.630276203155518, "learning_rate": 7.228027334309339e-08, "loss": 0.2565, "step": 13718 }, { "epoch": 0.9372822299651568, "grad_norm": 3.5138025283813477, "learning_rate": 7.212376931768971e-08, "loss": 0.2584, "step": 13719 }, { "epoch": 0.937350549976088, "grad_norm": 3.694840669631958, "learning_rate": 7.196743314629211e-08, "loss": 0.2076, "step": 13720 }, { "epoch": 0.9374188699870192, "grad_norm": 4.029086112976074, "learning_rate": 7.181126483655637e-08, "loss": 0.2142, "step": 13721 }, { "epoch": 0.9374871899979504, "grad_norm": 4.232611656188965, "learning_rate": 7.165526439612968e-08, "loss": 0.2652, "step": 13722 }, { "epoch": 0.9375555100088816, "grad_norm": 3.556185483932495, "learning_rate": 7.149943183265072e-08, "loss": 0.1924, "step": 13723 }, { "epoch": 0.9376238300198128, "grad_norm": 2.153411865234375, "learning_rate": 7.134376715375001e-08, "loss": 0.1485, "step": 13724 }, { "epoch": 0.937692150030744, "grad_norm": 3.314317226409912, "learning_rate": 7.118827036705028e-08, "loss": 0.1564, "step": 13725 }, { "epoch": 0.9377604700416752, "grad_norm": 4.565471649169922, "learning_rate": 7.103294148016609e-08, "loss": 0.2686, "step": 13726 }, { "epoch": 0.9378287900526064, "grad_norm": 5.4641008377075195, "learning_rate": 7.087778050070348e-08, "loss": 0.2832, "step": 13727 }, { "epoch": 0.9378971100635376, "grad_norm": 6.870445728302002, "learning_rate": 7.072278743625992e-08, "loss": 0.2844, "step": 13728 }, { "epoch": 0.9379654300744689, "grad_norm": 5.271274566650391, "learning_rate": 7.056796229442474e-08, "loss": 0.2585, "step": 13729 }, { "epoch": 0.9380337500854, "grad_norm": 6.820046901702881, "learning_rate": 7.041330508277987e-08, "loss": 0.2529, "step": 13730 }, { "epoch": 0.9381020700963312, "grad_norm": 12.209716796875, "learning_rate": 7.02588158088983e-08, "loss": 0.3012, "step": 13731 }, { "epoch": 0.9381703901072624, "grad_norm": 3.814324378967285, "learning_rate": 7.01044944803445e-08, "loss": 0.2617, "step": 13732 }, { "epoch": 0.9382387101181936, "grad_norm": 3.453774929046631, "learning_rate": 6.99503411046763e-08, "loss": 0.2216, "step": 13733 }, { "epoch": 0.9383070301291249, "grad_norm": 4.513197898864746, "learning_rate": 6.979635568944104e-08, "loss": 0.2752, "step": 13734 }, { "epoch": 0.938375350140056, "grad_norm": 3.5580687522888184, "learning_rate": 6.964253824217986e-08, "loss": 0.1968, "step": 13735 }, { "epoch": 0.9384436701509872, "grad_norm": 3.7741103172302246, "learning_rate": 6.94888887704238e-08, "loss": 0.2503, "step": 13736 }, { "epoch": 0.9385119901619184, "grad_norm": 3.6170361042022705, "learning_rate": 6.93354072816973e-08, "loss": 0.2065, "step": 13737 }, { "epoch": 0.9385803101728496, "grad_norm": 4.4397873878479, "learning_rate": 6.918209378351581e-08, "loss": 0.2783, "step": 13738 }, { "epoch": 0.9386486301837809, "grad_norm": 4.2992095947265625, "learning_rate": 6.902894828338746e-08, "loss": 0.1787, "step": 13739 }, { "epoch": 0.938716950194712, "grad_norm": 4.633798599243164, "learning_rate": 6.887597078881024e-08, "loss": 0.261, "step": 13740 }, { "epoch": 0.9387852702056433, "grad_norm": 5.187243938446045, "learning_rate": 6.872316130727518e-08, "loss": 0.2329, "step": 13741 }, { "epoch": 0.9388535902165744, "grad_norm": 5.508032321929932, "learning_rate": 6.85705198462655e-08, "loss": 0.3237, "step": 13742 }, { "epoch": 0.9389219102275056, "grad_norm": 3.8806347846984863, "learning_rate": 6.84180464132555e-08, "loss": 0.3238, "step": 13743 }, { "epoch": 0.9389902302384369, "grad_norm": 3.8188397884368896, "learning_rate": 6.826574101571092e-08, "loss": 0.2218, "step": 13744 }, { "epoch": 0.939058550249368, "grad_norm": 3.5078816413879395, "learning_rate": 6.811360366109054e-08, "loss": 0.278, "step": 13745 }, { "epoch": 0.9391268702602993, "grad_norm": 3.4166510105133057, "learning_rate": 6.796163435684338e-08, "loss": 0.2932, "step": 13746 }, { "epoch": 0.9391951902712304, "grad_norm": 3.4984960556030273, "learning_rate": 6.780983311041188e-08, "loss": 0.1913, "step": 13747 }, { "epoch": 0.9392635102821616, "grad_norm": 3.6204092502593994, "learning_rate": 6.7658199929228e-08, "loss": 0.2636, "step": 13748 }, { "epoch": 0.9393318302930929, "grad_norm": 2.8612778186798096, "learning_rate": 6.750673482071823e-08, "loss": 0.2079, "step": 13749 }, { "epoch": 0.939400150304024, "grad_norm": 3.610525131225586, "learning_rate": 6.735543779229819e-08, "loss": 0.2377, "step": 13750 }, { "epoch": 0.9394684703149553, "grad_norm": 2.9059062004089355, "learning_rate": 6.720430885137729e-08, "loss": 0.2633, "step": 13751 }, { "epoch": 0.9395367903258864, "grad_norm": 4.099400520324707, "learning_rate": 6.7053348005356e-08, "loss": 0.2511, "step": 13752 }, { "epoch": 0.9396051103368177, "grad_norm": 4.565703868865967, "learning_rate": 6.690255526162586e-08, "loss": 0.2555, "step": 13753 }, { "epoch": 0.9396734303477489, "grad_norm": 4.2851738929748535, "learning_rate": 6.675193062757101e-08, "loss": 0.2641, "step": 13754 }, { "epoch": 0.93974175035868, "grad_norm": 4.168163299560547, "learning_rate": 6.660147411056783e-08, "loss": 0.3205, "step": 13755 }, { "epoch": 0.9398100703696113, "grad_norm": 4.824312210083008, "learning_rate": 6.645118571798258e-08, "loss": 0.2727, "step": 13756 }, { "epoch": 0.9398783903805424, "grad_norm": 3.1830661296844482, "learning_rate": 6.630106545717496e-08, "loss": 0.2255, "step": 13757 }, { "epoch": 0.9399467103914737, "grad_norm": 5.717076301574707, "learning_rate": 6.615111333549606e-08, "loss": 0.2484, "step": 13758 }, { "epoch": 0.9400150304024049, "grad_norm": 3.6270620822906494, "learning_rate": 6.600132936028923e-08, "loss": 0.3673, "step": 13759 }, { "epoch": 0.940083350413336, "grad_norm": 4.534330368041992, "learning_rate": 6.585171353888814e-08, "loss": 0.4111, "step": 13760 }, { "epoch": 0.9401516704242673, "grad_norm": 5.382364273071289, "learning_rate": 6.570226587861938e-08, "loss": 0.3528, "step": 13761 }, { "epoch": 0.9402199904351984, "grad_norm": 4.596237659454346, "learning_rate": 6.555298638680069e-08, "loss": 0.3024, "step": 13762 }, { "epoch": 0.9402883104461297, "grad_norm": 5.2359089851379395, "learning_rate": 6.540387507074274e-08, "loss": 0.2529, "step": 13763 }, { "epoch": 0.9403566304570609, "grad_norm": 3.9822256565093994, "learning_rate": 6.525493193774618e-08, "loss": 0.2661, "step": 13764 }, { "epoch": 0.9404249504679921, "grad_norm": 6.873991966247559, "learning_rate": 6.510615699510457e-08, "loss": 0.2906, "step": 13765 }, { "epoch": 0.9404932704789233, "grad_norm": 3.3666725158691406, "learning_rate": 6.49575502501034e-08, "loss": 0.2383, "step": 13766 }, { "epoch": 0.9405615904898544, "grad_norm": 3.126420021057129, "learning_rate": 6.480911171001957e-08, "loss": 0.3033, "step": 13767 }, { "epoch": 0.9406299105007857, "grad_norm": 3.971672773361206, "learning_rate": 6.466084138212141e-08, "loss": 0.2993, "step": 13768 }, { "epoch": 0.9406982305117169, "grad_norm": 3.915437698364258, "learning_rate": 6.451273927366951e-08, "loss": 0.1596, "step": 13769 }, { "epoch": 0.9407665505226481, "grad_norm": 2.763352394104004, "learning_rate": 6.436480539191553e-08, "loss": 0.165, "step": 13770 }, { "epoch": 0.9408348705335793, "grad_norm": 4.144481182098389, "learning_rate": 6.421703974410409e-08, "loss": 0.2228, "step": 13771 }, { "epoch": 0.9409031905445104, "grad_norm": 3.5693883895874023, "learning_rate": 6.406944233747053e-08, "loss": 0.252, "step": 13772 }, { "epoch": 0.9409715105554417, "grad_norm": 3.4869325160980225, "learning_rate": 6.392201317924239e-08, "loss": 0.1556, "step": 13773 }, { "epoch": 0.9410398305663729, "grad_norm": 3.354471445083618, "learning_rate": 6.37747522766387e-08, "loss": 0.2292, "step": 13774 }, { "epoch": 0.9411081505773041, "grad_norm": 3.0941073894500732, "learning_rate": 6.362765963687067e-08, "loss": 0.2566, "step": 13775 }, { "epoch": 0.9411764705882353, "grad_norm": 3.5780105590820312, "learning_rate": 6.348073526714099e-08, "loss": 0.1977, "step": 13776 }, { "epoch": 0.9412447905991665, "grad_norm": 5.683834552764893, "learning_rate": 6.333397917464417e-08, "loss": 0.2019, "step": 13777 }, { "epoch": 0.9413131106100977, "grad_norm": 5.074209690093994, "learning_rate": 6.31873913665666e-08, "loss": 0.304, "step": 13778 }, { "epoch": 0.9413814306210289, "grad_norm": 3.2686760425567627, "learning_rate": 6.304097185008567e-08, "loss": 0.279, "step": 13779 }, { "epoch": 0.9414497506319601, "grad_norm": 4.192019939422607, "learning_rate": 6.289472063237222e-08, "loss": 0.2065, "step": 13780 }, { "epoch": 0.9415180706428913, "grad_norm": 4.771342754364014, "learning_rate": 6.274863772058698e-08, "loss": 0.3908, "step": 13781 }, { "epoch": 0.9415863906538225, "grad_norm": 4.980447769165039, "learning_rate": 6.260272312188325e-08, "loss": 0.3367, "step": 13782 }, { "epoch": 0.9416547106647537, "grad_norm": 3.3183045387268066, "learning_rate": 6.245697684340622e-08, "loss": 0.2018, "step": 13783 }, { "epoch": 0.9417230306756849, "grad_norm": 3.452749729156494, "learning_rate": 6.231139889229292e-08, "loss": 0.2297, "step": 13784 }, { "epoch": 0.9417913506866161, "grad_norm": 4.475455284118652, "learning_rate": 6.21659892756718e-08, "loss": 0.2719, "step": 13785 }, { "epoch": 0.9418596706975473, "grad_norm": 3.9109926223754883, "learning_rate": 6.202074800066314e-08, "loss": 0.2529, "step": 13786 }, { "epoch": 0.9419279907084785, "grad_norm": 3.7034261226654053, "learning_rate": 6.187567507437875e-08, "loss": 0.2434, "step": 13787 }, { "epoch": 0.9419963107194097, "grad_norm": 4.986611366271973, "learning_rate": 6.173077050392295e-08, "loss": 0.3053, "step": 13788 }, { "epoch": 0.942064630730341, "grad_norm": 3.1500351428985596, "learning_rate": 6.15860342963912e-08, "loss": 0.2466, "step": 13789 }, { "epoch": 0.9421329507412721, "grad_norm": 3.5052289962768555, "learning_rate": 6.144146645887038e-08, "loss": 0.2574, "step": 13790 }, { "epoch": 0.9422012707522033, "grad_norm": 4.032787799835205, "learning_rate": 6.129706699844e-08, "loss": 0.3228, "step": 13791 }, { "epoch": 0.9422695907631345, "grad_norm": 3.0446012020111084, "learning_rate": 6.115283592217142e-08, "loss": 0.2315, "step": 13792 }, { "epoch": 0.9423379107740657, "grad_norm": 5.814069747924805, "learning_rate": 6.100877323712622e-08, "loss": 0.2557, "step": 13793 }, { "epoch": 0.942406230784997, "grad_norm": 3.9114487171173096, "learning_rate": 6.086487895035947e-08, "loss": 0.2069, "step": 13794 }, { "epoch": 0.9424745507959281, "grad_norm": 3.569597005844116, "learning_rate": 6.072115306891724e-08, "loss": 0.2226, "step": 13795 }, { "epoch": 0.9425428708068593, "grad_norm": 5.6563005447387695, "learning_rate": 6.057759559983705e-08, "loss": 0.3811, "step": 13796 }, { "epoch": 0.9426111908177905, "grad_norm": 4.6898651123046875, "learning_rate": 6.04342065501487e-08, "loss": 0.2171, "step": 13797 }, { "epoch": 0.9426795108287217, "grad_norm": 4.602001667022705, "learning_rate": 6.029098592687338e-08, "loss": 0.2741, "step": 13798 }, { "epoch": 0.942747830839653, "grad_norm": 4.782357215881348, "learning_rate": 6.014793373702493e-08, "loss": 0.2034, "step": 13799 }, { "epoch": 0.9428161508505841, "grad_norm": 3.5493693351745605, "learning_rate": 6.000504998760786e-08, "loss": 0.2051, "step": 13800 }, { "epoch": 0.9428844708615154, "grad_norm": 4.397519111633301, "learning_rate": 5.986233468561813e-08, "loss": 0.226, "step": 13801 }, { "epoch": 0.9429527908724465, "grad_norm": 2.2684812545776367, "learning_rate": 5.97197878380451e-08, "loss": 0.1654, "step": 13802 }, { "epoch": 0.9430211108833777, "grad_norm": 4.092310905456543, "learning_rate": 5.95774094518684e-08, "loss": 0.2593, "step": 13803 }, { "epoch": 0.943089430894309, "grad_norm": 5.028842449188232, "learning_rate": 5.94351995340599e-08, "loss": 0.3206, "step": 13804 }, { "epoch": 0.9431577509052401, "grad_norm": 3.8104305267333984, "learning_rate": 5.9293158091583695e-08, "loss": 0.2453, "step": 13805 }, { "epoch": 0.9432260709161714, "grad_norm": 4.002154350280762, "learning_rate": 5.9151285131394556e-08, "loss": 0.2986, "step": 13806 }, { "epoch": 0.9432943909271025, "grad_norm": 3.065281391143799, "learning_rate": 5.9009580660439876e-08, "loss": 0.2441, "step": 13807 }, { "epoch": 0.9433627109380337, "grad_norm": 4.249060153961182, "learning_rate": 5.8868044685659257e-08, "loss": 0.1944, "step": 13808 }, { "epoch": 0.943431030948965, "grad_norm": 4.204752445220947, "learning_rate": 5.872667721398223e-08, "loss": 0.2393, "step": 13809 }, { "epoch": 0.9434993509598961, "grad_norm": 4.337080001831055, "learning_rate": 5.858547825233129e-08, "loss": 0.186, "step": 13810 }, { "epoch": 0.9435676709708274, "grad_norm": 3.248833656311035, "learning_rate": 5.84444478076212e-08, "loss": 0.1554, "step": 13811 }, { "epoch": 0.9436359909817585, "grad_norm": 4.053175926208496, "learning_rate": 5.830358588675777e-08, "loss": 0.239, "step": 13812 }, { "epoch": 0.9437043109926898, "grad_norm": 4.753659248352051, "learning_rate": 5.816289249663825e-08, "loss": 0.278, "step": 13813 }, { "epoch": 0.943772631003621, "grad_norm": 3.407102584838867, "learning_rate": 5.802236764415214e-08, "loss": 0.2284, "step": 13814 }, { "epoch": 0.9438409510145521, "grad_norm": 4.8727922439575195, "learning_rate": 5.788201133618076e-08, "loss": 0.2582, "step": 13815 }, { "epoch": 0.9439092710254834, "grad_norm": 3.616941213607788, "learning_rate": 5.7741823579596894e-08, "loss": 0.2358, "step": 13816 }, { "epoch": 0.9439775910364145, "grad_norm": 3.3060319423675537, "learning_rate": 5.7601804381265166e-08, "loss": 0.2179, "step": 13817 }, { "epoch": 0.9440459110473458, "grad_norm": 4.234038829803467, "learning_rate": 5.746195374804164e-08, "loss": 0.3446, "step": 13818 }, { "epoch": 0.944114231058277, "grad_norm": 4.250232696533203, "learning_rate": 5.732227168677501e-08, "loss": 0.1855, "step": 13819 }, { "epoch": 0.9441825510692081, "grad_norm": 4.1543989181518555, "learning_rate": 5.7182758204305014e-08, "loss": 0.2667, "step": 13820 }, { "epoch": 0.9442508710801394, "grad_norm": 4.003087043762207, "learning_rate": 5.7043413307462874e-08, "loss": 0.172, "step": 13821 }, { "epoch": 0.9443191910910705, "grad_norm": 5.507321834564209, "learning_rate": 5.690423700307201e-08, "loss": 0.3332, "step": 13822 }, { "epoch": 0.9443875111020018, "grad_norm": 3.849858045578003, "learning_rate": 5.67652292979473e-08, "loss": 0.1946, "step": 13823 }, { "epoch": 0.944455831112933, "grad_norm": 5.005902290344238, "learning_rate": 5.662639019889665e-08, "loss": 0.3956, "step": 13824 }, { "epoch": 0.9445241511238642, "grad_norm": 4.030824661254883, "learning_rate": 5.648771971271743e-08, "loss": 0.2621, "step": 13825 }, { "epoch": 0.9445924711347954, "grad_norm": 6.4534478187561035, "learning_rate": 5.634921784620084e-08, "loss": 0.3861, "step": 13826 }, { "epoch": 0.9446607911457265, "grad_norm": 4.456081867218018, "learning_rate": 5.621088460612833e-08, "loss": 0.1389, "step": 13827 }, { "epoch": 0.9447291111566578, "grad_norm": 4.964757919311523, "learning_rate": 5.6072719999274376e-08, "loss": 0.1943, "step": 13828 }, { "epoch": 0.944797431167589, "grad_norm": 3.102611541748047, "learning_rate": 5.593472403240335e-08, "loss": 0.2537, "step": 13829 }, { "epoch": 0.9448657511785202, "grad_norm": 3.4230711460113525, "learning_rate": 5.5796896712273393e-08, "loss": 0.2132, "step": 13830 }, { "epoch": 0.9449340711894514, "grad_norm": 6.361598491668701, "learning_rate": 5.565923804563372e-08, "loss": 0.2658, "step": 13831 }, { "epoch": 0.9450023912003825, "grad_norm": 3.7405521869659424, "learning_rate": 5.552174803922538e-08, "loss": 0.2813, "step": 13832 }, { "epoch": 0.9450707112113138, "grad_norm": 3.6674675941467285, "learning_rate": 5.538442669977972e-08, "loss": 0.2597, "step": 13833 }, { "epoch": 0.945139031222245, "grad_norm": 3.3907899856567383, "learning_rate": 5.524727403402146e-08, "loss": 0.2364, "step": 13834 }, { "epoch": 0.9452073512331762, "grad_norm": 4.452376365661621, "learning_rate": 5.5110290048667186e-08, "loss": 0.3421, "step": 13835 }, { "epoch": 0.9452756712441074, "grad_norm": 5.031730651855469, "learning_rate": 5.497347475042413e-08, "loss": 0.2832, "step": 13836 }, { "epoch": 0.9453439912550387, "grad_norm": 4.735201358795166, "learning_rate": 5.4836828145991775e-08, "loss": 0.1936, "step": 13837 }, { "epoch": 0.9454123112659698, "grad_norm": 5.20804500579834, "learning_rate": 5.4700350242061435e-08, "loss": 0.3995, "step": 13838 }, { "epoch": 0.945480631276901, "grad_norm": 3.4157443046569824, "learning_rate": 5.456404104531626e-08, "loss": 0.2619, "step": 13839 }, { "epoch": 0.9455489512878322, "grad_norm": 3.0854501724243164, "learning_rate": 5.4427900562430854e-08, "loss": 0.2066, "step": 13840 }, { "epoch": 0.9456172712987634, "grad_norm": 6.108434200286865, "learning_rate": 5.4291928800071665e-08, "loss": 0.2695, "step": 13841 }, { "epoch": 0.9456855913096947, "grad_norm": 4.335752964019775, "learning_rate": 5.415612576489659e-08, "loss": 0.225, "step": 13842 }, { "epoch": 0.9457539113206258, "grad_norm": 5.588141441345215, "learning_rate": 5.402049146355536e-08, "loss": 0.337, "step": 13843 }, { "epoch": 0.945822231331557, "grad_norm": 5.239070892333984, "learning_rate": 5.3885025902690344e-08, "loss": 0.2965, "step": 13844 }, { "epoch": 0.9458905513424882, "grad_norm": 4.2774176597595215, "learning_rate": 5.3749729088934546e-08, "loss": 0.4574, "step": 13845 }, { "epoch": 0.9459588713534194, "grad_norm": 4.00728178024292, "learning_rate": 5.361460102891324e-08, "loss": 0.2884, "step": 13846 }, { "epoch": 0.9460271913643507, "grad_norm": 3.725904941558838, "learning_rate": 5.347964172924313e-08, "loss": 0.21, "step": 13847 }, { "epoch": 0.9460955113752818, "grad_norm": 3.800685167312622, "learning_rate": 5.334485119653276e-08, "loss": 0.1804, "step": 13848 }, { "epoch": 0.9461638313862131, "grad_norm": 4.038520336151123, "learning_rate": 5.321022943738291e-08, "loss": 0.2453, "step": 13849 }, { "epoch": 0.9462321513971442, "grad_norm": 3.5889086723327637, "learning_rate": 5.3075776458385015e-08, "loss": 0.2294, "step": 13850 }, { "epoch": 0.9463004714080754, "grad_norm": 4.3094072341918945, "learning_rate": 5.2941492266123156e-08, "loss": 0.2377, "step": 13851 }, { "epoch": 0.9463687914190066, "grad_norm": 4.956653118133545, "learning_rate": 5.280737686717285e-08, "loss": 0.3566, "step": 13852 }, { "epoch": 0.9464371114299378, "grad_norm": 3.8335700035095215, "learning_rate": 5.267343026810184e-08, "loss": 0.2172, "step": 13853 }, { "epoch": 0.9465054314408691, "grad_norm": 3.862170934677124, "learning_rate": 5.2539652475468155e-08, "loss": 0.2786, "step": 13854 }, { "epoch": 0.9465737514518002, "grad_norm": 4.612180709838867, "learning_rate": 5.2406043495823617e-08, "loss": 0.2584, "step": 13855 }, { "epoch": 0.9466420714627314, "grad_norm": 4.12070369720459, "learning_rate": 5.227260333570993e-08, "loss": 0.2391, "step": 13856 }, { "epoch": 0.9467103914736626, "grad_norm": 3.5050480365753174, "learning_rate": 5.213933200166143e-08, "loss": 0.2871, "step": 13857 }, { "epoch": 0.9467787114845938, "grad_norm": 3.9056556224823, "learning_rate": 5.200622950020428e-08, "loss": 0.1896, "step": 13858 }, { "epoch": 0.9468470314955251, "grad_norm": 2.4960029125213623, "learning_rate": 5.18732958378561e-08, "loss": 0.1603, "step": 13859 }, { "epoch": 0.9469153515064562, "grad_norm": 5.756089210510254, "learning_rate": 5.174053102112636e-08, "loss": 0.2886, "step": 13860 }, { "epoch": 0.9469836715173875, "grad_norm": 3.5538463592529297, "learning_rate": 5.1607935056515954e-08, "loss": 0.1458, "step": 13861 }, { "epoch": 0.9470519915283186, "grad_norm": 3.7209856510162354, "learning_rate": 5.147550795051764e-08, "loss": 0.2437, "step": 13862 }, { "epoch": 0.9471203115392498, "grad_norm": 3.374490261077881, "learning_rate": 5.134324970961679e-08, "loss": 0.2298, "step": 13863 }, { "epoch": 0.9471886315501811, "grad_norm": 5.31111478805542, "learning_rate": 5.1211160340289054e-08, "loss": 0.3623, "step": 13864 }, { "epoch": 0.9472569515611122, "grad_norm": 4.721531867980957, "learning_rate": 5.10792398490027e-08, "loss": 0.3718, "step": 13865 }, { "epoch": 0.9473252715720435, "grad_norm": 3.9926016330718994, "learning_rate": 5.094748824221745e-08, "loss": 0.2935, "step": 13866 }, { "epoch": 0.9473935915829746, "grad_norm": 5.139297008514404, "learning_rate": 5.081590552638526e-08, "loss": 0.2782, "step": 13867 }, { "epoch": 0.9474619115939058, "grad_norm": 4.344863414764404, "learning_rate": 5.0684491707948364e-08, "loss": 0.1986, "step": 13868 }, { "epoch": 0.9475302316048371, "grad_norm": 3.393651008605957, "learning_rate": 5.055324679334277e-08, "loss": 0.3017, "step": 13869 }, { "epoch": 0.9475985516157682, "grad_norm": 3.434025526046753, "learning_rate": 5.04221707889948e-08, "loss": 0.2057, "step": 13870 }, { "epoch": 0.9476668716266995, "grad_norm": 4.008497714996338, "learning_rate": 5.0291263701322976e-08, "loss": 0.2275, "step": 13871 }, { "epoch": 0.9477351916376306, "grad_norm": 4.797817230224609, "learning_rate": 5.0160525536737276e-08, "loss": 0.2324, "step": 13872 }, { "epoch": 0.9478035116485619, "grad_norm": 4.576079845428467, "learning_rate": 5.002995630163992e-08, "loss": 0.2407, "step": 13873 }, { "epoch": 0.9478718316594931, "grad_norm": 4.298495292663574, "learning_rate": 4.989955600242418e-08, "loss": 0.2514, "step": 13874 }, { "epoch": 0.9479401516704242, "grad_norm": 3.7801733016967773, "learning_rate": 4.9769324645475566e-08, "loss": 0.2098, "step": 13875 }, { "epoch": 0.9480084716813555, "grad_norm": 5.1479082107543945, "learning_rate": 4.963926223717102e-08, "loss": 0.29, "step": 13876 }, { "epoch": 0.9480767916922866, "grad_norm": 4.960011959075928, "learning_rate": 4.9509368783880125e-08, "loss": 0.3305, "step": 13877 }, { "epoch": 0.9481451117032179, "grad_norm": 5.1315693855285645, "learning_rate": 4.9379644291962346e-08, "loss": 0.1935, "step": 13878 }, { "epoch": 0.9482134317141491, "grad_norm": 4.417801856994629, "learning_rate": 4.925008876777054e-08, "loss": 0.1797, "step": 13879 }, { "epoch": 0.9482817517250802, "grad_norm": 4.114107608795166, "learning_rate": 4.9120702217648634e-08, "loss": 0.3086, "step": 13880 }, { "epoch": 0.9483500717360115, "grad_norm": 4.176301002502441, "learning_rate": 4.89914846479324e-08, "loss": 0.154, "step": 13881 }, { "epoch": 0.9484183917469426, "grad_norm": 4.44336462020874, "learning_rate": 4.8862436064948664e-08, "loss": 0.2344, "step": 13882 }, { "epoch": 0.9484867117578739, "grad_norm": 2.933244466781616, "learning_rate": 4.873355647501726e-08, "loss": 0.2367, "step": 13883 }, { "epoch": 0.9485550317688051, "grad_norm": 3.8867688179016113, "learning_rate": 4.860484588444908e-08, "loss": 0.2432, "step": 13884 }, { "epoch": 0.9486233517797363, "grad_norm": 3.3513224124908447, "learning_rate": 4.8476304299546466e-08, "loss": 0.2099, "step": 13885 }, { "epoch": 0.9486916717906675, "grad_norm": 3.033080577850342, "learning_rate": 4.834793172660401e-08, "loss": 0.2115, "step": 13886 }, { "epoch": 0.9487599918015986, "grad_norm": 4.026317596435547, "learning_rate": 4.821972817190773e-08, "loss": 0.1951, "step": 13887 }, { "epoch": 0.9488283118125299, "grad_norm": 3.445807695388794, "learning_rate": 4.809169364173471e-08, "loss": 0.2383, "step": 13888 }, { "epoch": 0.9488966318234611, "grad_norm": 5.025571346282959, "learning_rate": 4.7963828142355444e-08, "loss": 0.2316, "step": 13889 }, { "epoch": 0.9489649518343923, "grad_norm": 4.062117099761963, "learning_rate": 4.783613168003109e-08, "loss": 0.1891, "step": 13890 }, { "epoch": 0.9490332718453235, "grad_norm": 4.185990333557129, "learning_rate": 4.7708604261013865e-08, "loss": 0.3259, "step": 13891 }, { "epoch": 0.9491015918562546, "grad_norm": 7.889100074768066, "learning_rate": 4.758124589154899e-08, "loss": 0.2457, "step": 13892 }, { "epoch": 0.9491699118671859, "grad_norm": 4.29128360748291, "learning_rate": 4.745405657787316e-08, "loss": 0.2487, "step": 13893 }, { "epoch": 0.9492382318781171, "grad_norm": 3.3361148834228516, "learning_rate": 4.732703632621371e-08, "loss": 0.252, "step": 13894 }, { "epoch": 0.9493065518890483, "grad_norm": 3.013749361038208, "learning_rate": 4.7200185142791e-08, "loss": 0.2343, "step": 13895 }, { "epoch": 0.9493748718999795, "grad_norm": 3.4459638595581055, "learning_rate": 4.7073503033816075e-08, "loss": 0.3124, "step": 13896 }, { "epoch": 0.9494431919109108, "grad_norm": 2.6983351707458496, "learning_rate": 4.694699000549296e-08, "loss": 0.1651, "step": 13897 }, { "epoch": 0.9495115119218419, "grad_norm": 5.244021892547607, "learning_rate": 4.682064606401637e-08, "loss": 0.2038, "step": 13898 }, { "epoch": 0.9495798319327731, "grad_norm": 4.0416364669799805, "learning_rate": 4.669447121557285e-08, "loss": 0.2004, "step": 13899 }, { "epoch": 0.9496481519437043, "grad_norm": 3.1157472133636475, "learning_rate": 4.65684654663408e-08, "loss": 0.1648, "step": 13900 }, { "epoch": 0.9497164719546355, "grad_norm": 2.918163537979126, "learning_rate": 4.644262882249123e-08, "loss": 0.2211, "step": 13901 }, { "epoch": 0.9497847919655668, "grad_norm": 2.9526588916778564, "learning_rate": 4.631696129018464e-08, "loss": 0.2085, "step": 13902 }, { "epoch": 0.9498531119764979, "grad_norm": 4.466782569885254, "learning_rate": 4.6191462875575736e-08, "loss": 0.2542, "step": 13903 }, { "epoch": 0.9499214319874291, "grad_norm": 4.680208206176758, "learning_rate": 4.60661335848091e-08, "loss": 0.2675, "step": 13904 }, { "epoch": 0.9499897519983603, "grad_norm": 4.388944149017334, "learning_rate": 4.5940973424022704e-08, "loss": 0.1324, "step": 13905 }, { "epoch": 0.9500580720092915, "grad_norm": 3.067171573638916, "learning_rate": 4.581598239934442e-08, "loss": 0.2232, "step": 13906 }, { "epoch": 0.9501263920202228, "grad_norm": 4.149310111999512, "learning_rate": 4.5691160516895146e-08, "loss": 0.2971, "step": 13907 }, { "epoch": 0.9501947120311539, "grad_norm": 7.2482147216796875, "learning_rate": 4.5566507782786814e-08, "loss": 0.2561, "step": 13908 }, { "epoch": 0.9502630320420852, "grad_norm": 5.052797794342041, "learning_rate": 4.54420242031236e-08, "loss": 0.3302, "step": 13909 }, { "epoch": 0.9503313520530163, "grad_norm": 4.473222732543945, "learning_rate": 4.531770978400112e-08, "loss": 0.3343, "step": 13910 }, { "epoch": 0.9503996720639475, "grad_norm": 8.039555549621582, "learning_rate": 4.519356453150647e-08, "loss": 0.2299, "step": 13911 }, { "epoch": 0.9504679920748788, "grad_norm": 4.066229343414307, "learning_rate": 4.506958845171932e-08, "loss": 0.3306, "step": 13912 }, { "epoch": 0.9505363120858099, "grad_norm": 3.610440492630005, "learning_rate": 4.4945781550709665e-08, "loss": 0.2482, "step": 13913 }, { "epoch": 0.9506046320967412, "grad_norm": 2.9023170471191406, "learning_rate": 4.4822143834540474e-08, "loss": 0.1792, "step": 13914 }, { "epoch": 0.9506729521076723, "grad_norm": 3.015503168106079, "learning_rate": 4.469867530926541e-08, "loss": 0.1684, "step": 13915 }, { "epoch": 0.9507412721186035, "grad_norm": 5.48190975189209, "learning_rate": 4.457537598093114e-08, "loss": 0.2058, "step": 13916 }, { "epoch": 0.9508095921295348, "grad_norm": 2.7890818119049072, "learning_rate": 4.4452245855575e-08, "loss": 0.1764, "step": 13917 }, { "epoch": 0.9508779121404659, "grad_norm": 3.708167314529419, "learning_rate": 4.432928493922655e-08, "loss": 0.1909, "step": 13918 }, { "epoch": 0.9509462321513972, "grad_norm": 4.971653938293457, "learning_rate": 4.420649323790682e-08, "loss": 0.2832, "step": 13919 }, { "epoch": 0.9510145521623283, "grad_norm": 4.3470635414123535, "learning_rate": 4.4083870757627876e-08, "loss": 0.2612, "step": 13920 }, { "epoch": 0.9510828721732596, "grad_norm": 3.021182060241699, "learning_rate": 4.396141750439481e-08, "loss": 0.2478, "step": 13921 }, { "epoch": 0.9511511921841908, "grad_norm": 5.617950439453125, "learning_rate": 4.383913348420415e-08, "loss": 0.2838, "step": 13922 }, { "epoch": 0.9512195121951219, "grad_norm": 6.0260701179504395, "learning_rate": 4.371701870304312e-08, "loss": 0.2384, "step": 13923 }, { "epoch": 0.9512878322060532, "grad_norm": 4.831273555755615, "learning_rate": 4.359507316689193e-08, "loss": 0.255, "step": 13924 }, { "epoch": 0.9513561522169843, "grad_norm": 3.838735818862915, "learning_rate": 4.3473296881721464e-08, "loss": 0.2371, "step": 13925 }, { "epoch": 0.9514244722279156, "grad_norm": 5.17485237121582, "learning_rate": 4.3351689853495246e-08, "loss": 0.2883, "step": 13926 }, { "epoch": 0.9514927922388468, "grad_norm": 4.918763160705566, "learning_rate": 4.323025208816783e-08, "loss": 0.2515, "step": 13927 }, { "epoch": 0.9515611122497779, "grad_norm": 4.100543022155762, "learning_rate": 4.310898359168525e-08, "loss": 0.3012, "step": 13928 }, { "epoch": 0.9516294322607092, "grad_norm": 4.816221237182617, "learning_rate": 4.2987884369986126e-08, "loss": 0.2957, "step": 13929 }, { "epoch": 0.9516977522716403, "grad_norm": 3.725792407989502, "learning_rate": 4.286695442900096e-08, "loss": 0.2721, "step": 13930 }, { "epoch": 0.9517660722825716, "grad_norm": 3.4411683082580566, "learning_rate": 4.27461937746505e-08, "loss": 0.1613, "step": 13931 }, { "epoch": 0.9518343922935028, "grad_norm": 4.353887557983398, "learning_rate": 4.262560241284852e-08, "loss": 0.3497, "step": 13932 }, { "epoch": 0.951902712304434, "grad_norm": 7.505589008331299, "learning_rate": 4.250518034949985e-08, "loss": 0.2427, "step": 13933 }, { "epoch": 0.9519710323153652, "grad_norm": 2.7988967895507812, "learning_rate": 4.238492759050116e-08, "loss": 0.1822, "step": 13934 }, { "epoch": 0.9520393523262963, "grad_norm": 3.617420196533203, "learning_rate": 4.2264844141740976e-08, "loss": 0.2073, "step": 13935 }, { "epoch": 0.9521076723372276, "grad_norm": 4.8827338218688965, "learning_rate": 4.214493000909964e-08, "loss": 0.3524, "step": 13936 }, { "epoch": 0.9521759923481588, "grad_norm": 4.806201934814453, "learning_rate": 4.2025185198448944e-08, "loss": 0.2657, "step": 13937 }, { "epoch": 0.95224431235909, "grad_norm": 4.682076454162598, "learning_rate": 4.190560971565255e-08, "loss": 0.32, "step": 13938 }, { "epoch": 0.9523126323700212, "grad_norm": 7.436021327972412, "learning_rate": 4.178620356656554e-08, "loss": 0.2871, "step": 13939 }, { "epoch": 0.9523809523809523, "grad_norm": 4.374111652374268, "learning_rate": 4.166696675703485e-08, "loss": 0.2482, "step": 13940 }, { "epoch": 0.9524492723918836, "grad_norm": 6.078279495239258, "learning_rate": 4.1547899292899646e-08, "loss": 0.2406, "step": 13941 }, { "epoch": 0.9525175924028148, "grad_norm": 3.83425235748291, "learning_rate": 4.142900117998977e-08, "loss": 0.1925, "step": 13942 }, { "epoch": 0.952585912413746, "grad_norm": 4.490410804748535, "learning_rate": 4.131027242412766e-08, "loss": 0.2657, "step": 13943 }, { "epoch": 0.9526542324246772, "grad_norm": 4.461927890777588, "learning_rate": 4.119171303112723e-08, "loss": 0.2649, "step": 13944 }, { "epoch": 0.9527225524356084, "grad_norm": 4.632152557373047, "learning_rate": 4.1073323006794224e-08, "loss": 0.2569, "step": 13945 }, { "epoch": 0.9527908724465396, "grad_norm": 3.0652267932891846, "learning_rate": 4.0955102356925454e-08, "loss": 0.1214, "step": 13946 }, { "epoch": 0.9528591924574707, "grad_norm": 4.162593364715576, "learning_rate": 4.083705108730995e-08, "loss": 0.2741, "step": 13947 }, { "epoch": 0.952927512468402, "grad_norm": 5.171548843383789, "learning_rate": 4.07191692037282e-08, "loss": 0.3175, "step": 13948 }, { "epoch": 0.9529958324793332, "grad_norm": 4.364040851593018, "learning_rate": 4.0601456711952534e-08, "loss": 0.2944, "step": 13949 }, { "epoch": 0.9530641524902644, "grad_norm": 2.7249786853790283, "learning_rate": 4.048391361774789e-08, "loss": 0.2655, "step": 13950 }, { "epoch": 0.9531324725011956, "grad_norm": 4.424685955047607, "learning_rate": 4.0366539926869504e-08, "loss": 0.1849, "step": 13951 }, { "epoch": 0.9532007925121267, "grad_norm": 4.437219142913818, "learning_rate": 4.024933564506444e-08, "loss": 0.3883, "step": 13952 }, { "epoch": 0.953269112523058, "grad_norm": 4.192808151245117, "learning_rate": 4.0132300778072006e-08, "loss": 0.2355, "step": 13953 }, { "epoch": 0.9533374325339892, "grad_norm": 4.7427144050598145, "learning_rate": 4.0015435331623716e-08, "loss": 0.3325, "step": 13954 }, { "epoch": 0.9534057525449204, "grad_norm": 3.9673423767089844, "learning_rate": 3.989873931144178e-08, "loss": 0.1746, "step": 13955 }, { "epoch": 0.9534740725558516, "grad_norm": 3.544032096862793, "learning_rate": 3.978221272324023e-08, "loss": 0.2649, "step": 13956 }, { "epoch": 0.9535423925667829, "grad_norm": 3.803818702697754, "learning_rate": 3.966585557272534e-08, "loss": 0.2592, "step": 13957 }, { "epoch": 0.953610712577714, "grad_norm": 4.397026538848877, "learning_rate": 3.954966786559483e-08, "loss": 0.2077, "step": 13958 }, { "epoch": 0.9536790325886452, "grad_norm": 3.5872557163238525, "learning_rate": 3.943364960753787e-08, "loss": 0.3122, "step": 13959 }, { "epoch": 0.9537473525995764, "grad_norm": 3.5564000606536865, "learning_rate": 3.931780080423586e-08, "loss": 0.2477, "step": 13960 }, { "epoch": 0.9538156726105076, "grad_norm": 4.404290676116943, "learning_rate": 3.920212146136165e-08, "loss": 0.3521, "step": 13961 }, { "epoch": 0.9538839926214389, "grad_norm": 3.7877068519592285, "learning_rate": 3.9086611584579156e-08, "loss": 0.1942, "step": 13962 }, { "epoch": 0.95395231263237, "grad_norm": 5.360866546630859, "learning_rate": 3.89712711795453e-08, "loss": 0.2557, "step": 13963 }, { "epoch": 0.9540206326433012, "grad_norm": 3.3952603340148926, "learning_rate": 3.8856100251907664e-08, "loss": 0.212, "step": 13964 }, { "epoch": 0.9540889526542324, "grad_norm": 4.195403575897217, "learning_rate": 3.8741098807306084e-08, "loss": 0.3046, "step": 13965 }, { "epoch": 0.9541572726651636, "grad_norm": 3.3222692012786865, "learning_rate": 3.862626685137144e-08, "loss": 0.1778, "step": 13966 }, { "epoch": 0.9542255926760949, "grad_norm": 6.249027729034424, "learning_rate": 3.8511604389727225e-08, "loss": 0.2302, "step": 13967 }, { "epoch": 0.954293912687026, "grad_norm": 4.362079620361328, "learning_rate": 3.839711142798763e-08, "loss": 0.3023, "step": 13968 }, { "epoch": 0.9543622326979573, "grad_norm": 4.781299591064453, "learning_rate": 3.8282787971759436e-08, "loss": 0.1819, "step": 13969 }, { "epoch": 0.9544305527088884, "grad_norm": 3.109576463699341, "learning_rate": 3.8168634026641276e-08, "loss": 0.2179, "step": 13970 }, { "epoch": 0.9544988727198196, "grad_norm": 4.487426280975342, "learning_rate": 3.805464959822169e-08, "loss": 0.2577, "step": 13971 }, { "epoch": 0.9545671927307509, "grad_norm": 3.1159722805023193, "learning_rate": 3.7940834692083366e-08, "loss": 0.2769, "step": 13972 }, { "epoch": 0.954635512741682, "grad_norm": 4.705898761749268, "learning_rate": 3.7827189313798915e-08, "loss": 0.3681, "step": 13973 }, { "epoch": 0.9547038327526133, "grad_norm": 4.5069169998168945, "learning_rate": 3.771371346893315e-08, "loss": 0.2093, "step": 13974 }, { "epoch": 0.9547721527635444, "grad_norm": 3.740724802017212, "learning_rate": 3.7600407163043135e-08, "loss": 0.3165, "step": 13975 }, { "epoch": 0.9548404727744756, "grad_norm": 4.588109970092773, "learning_rate": 3.748727040167698e-08, "loss": 0.1756, "step": 13976 }, { "epoch": 0.9549087927854069, "grad_norm": 3.316087007522583, "learning_rate": 3.737430319037466e-08, "loss": 0.19, "step": 13977 }, { "epoch": 0.954977112796338, "grad_norm": 3.338935613632202, "learning_rate": 3.7261505534667946e-08, "loss": 0.208, "step": 13978 }, { "epoch": 0.9550454328072693, "grad_norm": 4.458852767944336, "learning_rate": 3.7148877440080105e-08, "loss": 0.175, "step": 13979 }, { "epoch": 0.9551137528182004, "grad_norm": 4.255149841308594, "learning_rate": 3.703641891212661e-08, "loss": 0.2463, "step": 13980 }, { "epoch": 0.9551820728291317, "grad_norm": 6.882785320281982, "learning_rate": 3.692412995631361e-08, "loss": 0.2368, "step": 13981 }, { "epoch": 0.9552503928400629, "grad_norm": 4.670780181884766, "learning_rate": 3.681201057813987e-08, "loss": 0.3033, "step": 13982 }, { "epoch": 0.955318712850994, "grad_norm": 3.8863725662231445, "learning_rate": 3.670006078309601e-08, "loss": 0.2053, "step": 13983 }, { "epoch": 0.9553870328619253, "grad_norm": 5.327614784240723, "learning_rate": 3.65882805766633e-08, "loss": 0.2529, "step": 13984 }, { "epoch": 0.9554553528728564, "grad_norm": 3.4677107334136963, "learning_rate": 3.6476669964315644e-08, "loss": 0.1776, "step": 13985 }, { "epoch": 0.9555236728837877, "grad_norm": 4.055099964141846, "learning_rate": 3.6365228951518394e-08, "loss": 0.2846, "step": 13986 }, { "epoch": 0.9555919928947189, "grad_norm": 4.68991756439209, "learning_rate": 3.625395754372834e-08, "loss": 0.2877, "step": 13987 }, { "epoch": 0.95566031290565, "grad_norm": 3.459994316101074, "learning_rate": 3.614285574639375e-08, "loss": 0.2684, "step": 13988 }, { "epoch": 0.9557286329165813, "grad_norm": 3.9233193397521973, "learning_rate": 3.603192356495588e-08, "loss": 0.3314, "step": 13989 }, { "epoch": 0.9557969529275124, "grad_norm": 4.410370349884033, "learning_rate": 3.592116100484627e-08, "loss": 0.2194, "step": 13990 }, { "epoch": 0.9558652729384437, "grad_norm": 7.076732158660889, "learning_rate": 3.581056807148908e-08, "loss": 0.2958, "step": 13991 }, { "epoch": 0.9559335929493749, "grad_norm": 3.1277482509613037, "learning_rate": 3.570014477029915e-08, "loss": 0.2769, "step": 13992 }, { "epoch": 0.9560019129603061, "grad_norm": 3.4332258701324463, "learning_rate": 3.558989110668353e-08, "loss": 0.2166, "step": 13993 }, { "epoch": 0.9560702329712373, "grad_norm": 4.957153797149658, "learning_rate": 3.547980708604192e-08, "loss": 0.267, "step": 13994 }, { "epoch": 0.9561385529821684, "grad_norm": 4.780466556549072, "learning_rate": 3.53698927137639e-08, "loss": 0.2688, "step": 13995 }, { "epoch": 0.9562068729930997, "grad_norm": 2.958331346511841, "learning_rate": 3.5260147995232025e-08, "loss": 0.2147, "step": 13996 }, { "epoch": 0.9562751930040309, "grad_norm": 4.060258865356445, "learning_rate": 3.5150572935820745e-08, "loss": 0.2747, "step": 13997 }, { "epoch": 0.9563435130149621, "grad_norm": 4.2908830642700195, "learning_rate": 3.504116754089476e-08, "loss": 0.27, "step": 13998 }, { "epoch": 0.9564118330258933, "grad_norm": 4.09237813949585, "learning_rate": 3.4931931815812166e-08, "loss": 0.2457, "step": 13999 }, { "epoch": 0.9564801530368244, "grad_norm": 2.4857733249664307, "learning_rate": 3.4822865765921364e-08, "loss": 0.1913, "step": 14000 }, { "epoch": 0.9565484730477557, "grad_norm": 2.566145658493042, "learning_rate": 3.471396939656335e-08, "loss": 0.2154, "step": 14001 }, { "epoch": 0.9566167930586869, "grad_norm": 3.9018142223358154, "learning_rate": 3.4605242713070196e-08, "loss": 0.219, "step": 14002 }, { "epoch": 0.9566851130696181, "grad_norm": 3.266389846801758, "learning_rate": 3.449668572076619e-08, "loss": 0.1894, "step": 14003 }, { "epoch": 0.9567534330805493, "grad_norm": 5.367807865142822, "learning_rate": 3.43882984249671e-08, "loss": 0.2645, "step": 14004 }, { "epoch": 0.9568217530914805, "grad_norm": 3.3208155632019043, "learning_rate": 3.4280080830980494e-08, "loss": 0.2262, "step": 14005 }, { "epoch": 0.9568900731024117, "grad_norm": 5.8599677085876465, "learning_rate": 3.4172032944105034e-08, "loss": 0.3182, "step": 14006 }, { "epoch": 0.9569583931133429, "grad_norm": 4.6273698806762695, "learning_rate": 3.406415476963198e-08, "loss": 0.2365, "step": 14007 }, { "epoch": 0.9570267131242741, "grad_norm": 5.916577339172363, "learning_rate": 3.395644631284328e-08, "loss": 0.3255, "step": 14008 }, { "epoch": 0.9570950331352053, "grad_norm": 6.587430477142334, "learning_rate": 3.3848907579013877e-08, "loss": 0.2787, "step": 14009 }, { "epoch": 0.9571633531461365, "grad_norm": 4.826356410980225, "learning_rate": 3.374153857340939e-08, "loss": 0.2666, "step": 14010 }, { "epoch": 0.9572316731570677, "grad_norm": 4.650415897369385, "learning_rate": 3.3634339301287284e-08, "loss": 0.1702, "step": 14011 }, { "epoch": 0.9572999931679989, "grad_norm": 4.023136138916016, "learning_rate": 3.3527309767896865e-08, "loss": 0.268, "step": 14012 }, { "epoch": 0.9573683131789301, "grad_norm": 5.7448954582214355, "learning_rate": 3.3420449978478873e-08, "loss": 0.249, "step": 14013 }, { "epoch": 0.9574366331898613, "grad_norm": 4.041322708129883, "learning_rate": 3.331375993826591e-08, "loss": 0.2444, "step": 14014 }, { "epoch": 0.9575049532007925, "grad_norm": 4.3991522789001465, "learning_rate": 3.320723965248279e-08, "loss": 0.2745, "step": 14015 }, { "epoch": 0.9575732732117237, "grad_norm": 4.761181354522705, "learning_rate": 3.31008891263454e-08, "loss": 0.2444, "step": 14016 }, { "epoch": 0.957641593222655, "grad_norm": 4.4644927978515625, "learning_rate": 3.2994708365061074e-08, "loss": 0.3501, "step": 14017 }, { "epoch": 0.9577099132335861, "grad_norm": 5.210318565368652, "learning_rate": 3.288869737382977e-08, "loss": 0.2517, "step": 14018 }, { "epoch": 0.9577782332445173, "grad_norm": 5.9766364097595215, "learning_rate": 3.27828561578421e-08, "loss": 0.3835, "step": 14019 }, { "epoch": 0.9578465532554485, "grad_norm": 2.660792589187622, "learning_rate": 3.267718472228093e-08, "loss": 0.1638, "step": 14020 }, { "epoch": 0.9579148732663797, "grad_norm": 4.688914775848389, "learning_rate": 3.257168307232056e-08, "loss": 0.3519, "step": 14021 }, { "epoch": 0.957983193277311, "grad_norm": 5.228686332702637, "learning_rate": 3.246635121312752e-08, "loss": 0.2335, "step": 14022 }, { "epoch": 0.9580515132882421, "grad_norm": 3.54975962638855, "learning_rate": 3.2361189149859417e-08, "loss": 0.2108, "step": 14023 }, { "epoch": 0.9581198332991733, "grad_norm": 4.45084810256958, "learning_rate": 3.225619688766568e-08, "loss": 0.2577, "step": 14024 }, { "epoch": 0.9581881533101045, "grad_norm": 5.385603427886963, "learning_rate": 3.2151374431687583e-08, "loss": 0.3164, "step": 14025 }, { "epoch": 0.9582564733210357, "grad_norm": 3.3200690746307373, "learning_rate": 3.204672178705825e-08, "loss": 0.1953, "step": 14026 }, { "epoch": 0.958324793331967, "grad_norm": 3.249450445175171, "learning_rate": 3.194223895890186e-08, "loss": 0.2222, "step": 14027 }, { "epoch": 0.9583931133428981, "grad_norm": 3.237112522125244, "learning_rate": 3.183792595233481e-08, "loss": 0.1696, "step": 14028 }, { "epoch": 0.9584614333538294, "grad_norm": 4.056130409240723, "learning_rate": 3.1733782772464966e-08, "loss": 0.2108, "step": 14029 }, { "epoch": 0.9585297533647605, "grad_norm": 6.347014904022217, "learning_rate": 3.162980942439203e-08, "loss": 0.2863, "step": 14030 }, { "epoch": 0.9585980733756917, "grad_norm": 4.853764533996582, "learning_rate": 3.1526005913207144e-08, "loss": 0.3496, "step": 14031 }, { "epoch": 0.958666393386623, "grad_norm": 3.407827615737915, "learning_rate": 3.142237224399369e-08, "loss": 0.2535, "step": 14032 }, { "epoch": 0.9587347133975541, "grad_norm": 6.384562015533447, "learning_rate": 3.131890842182572e-08, "loss": 0.2752, "step": 14033 }, { "epoch": 0.9588030334084854, "grad_norm": 3.1584136486053467, "learning_rate": 3.121561445177029e-08, "loss": 0.2399, "step": 14034 }, { "epoch": 0.9588713534194165, "grad_norm": 3.8455443382263184, "learning_rate": 3.1112490338884745e-08, "loss": 0.2836, "step": 14035 }, { "epoch": 0.9589396734303477, "grad_norm": 3.8526225090026855, "learning_rate": 3.1009536088219436e-08, "loss": 0.2134, "step": 14036 }, { "epoch": 0.959007993441279, "grad_norm": 4.986140727996826, "learning_rate": 3.0906751704814994e-08, "loss": 0.3093, "step": 14037 }, { "epoch": 0.9590763134522101, "grad_norm": 4.738772392272949, "learning_rate": 3.0804137193705447e-08, "loss": 0.1423, "step": 14038 }, { "epoch": 0.9591446334631414, "grad_norm": 4.036554336547852, "learning_rate": 3.0701692559914725e-08, "loss": 0.1896, "step": 14039 }, { "epoch": 0.9592129534740725, "grad_norm": 4.139853477478027, "learning_rate": 3.059941780845976e-08, "loss": 0.2851, "step": 14040 }, { "epoch": 0.9592812734850038, "grad_norm": 3.8254785537719727, "learning_rate": 3.049731294434854e-08, "loss": 0.1304, "step": 14041 }, { "epoch": 0.959349593495935, "grad_norm": 3.3397374153137207, "learning_rate": 3.0395377972580514e-08, "loss": 0.2545, "step": 14042 }, { "epoch": 0.9594179135068661, "grad_norm": 3.1872549057006836, "learning_rate": 3.029361289814775e-08, "loss": 0.2303, "step": 14043 }, { "epoch": 0.9594862335177974, "grad_norm": 5.389784336090088, "learning_rate": 3.0192017726033356e-08, "loss": 0.3246, "step": 14044 }, { "epoch": 0.9595545535287285, "grad_norm": 2.3474037647247314, "learning_rate": 3.009059246121193e-08, "loss": 0.1299, "step": 14045 }, { "epoch": 0.9596228735396598, "grad_norm": 2.846970796585083, "learning_rate": 2.998933710864987e-08, "loss": 0.1924, "step": 14046 }, { "epoch": 0.959691193550591, "grad_norm": 3.437392473220825, "learning_rate": 2.9888251673305835e-08, "loss": 0.2347, "step": 14047 }, { "epoch": 0.9597595135615221, "grad_norm": 5.342563629150391, "learning_rate": 2.978733616012913e-08, "loss": 0.2972, "step": 14048 }, { "epoch": 0.9598278335724534, "grad_norm": 3.9501967430114746, "learning_rate": 2.96865905740617e-08, "loss": 0.2395, "step": 14049 }, { "epoch": 0.9598961535833845, "grad_norm": 5.9788665771484375, "learning_rate": 2.958601492003654e-08, "loss": 0.2357, "step": 14050 }, { "epoch": 0.9599644735943158, "grad_norm": 6.069711208343506, "learning_rate": 2.9485609202979268e-08, "loss": 0.3662, "step": 14051 }, { "epoch": 0.960032793605247, "grad_norm": 3.0969977378845215, "learning_rate": 2.9385373427805783e-08, "loss": 0.1978, "step": 14052 }, { "epoch": 0.9601011136161782, "grad_norm": 3.1374032497406006, "learning_rate": 2.9285307599424215e-08, "loss": 0.1905, "step": 14053 }, { "epoch": 0.9601694336271094, "grad_norm": 3.198993444442749, "learning_rate": 2.918541172273492e-08, "loss": 0.2585, "step": 14054 }, { "epoch": 0.9602377536380405, "grad_norm": 2.650571346282959, "learning_rate": 2.9085685802629712e-08, "loss": 0.2028, "step": 14055 }, { "epoch": 0.9603060736489718, "grad_norm": 3.3746540546417236, "learning_rate": 2.8986129843991848e-08, "loss": 0.2094, "step": 14056 }, { "epoch": 0.960374393659903, "grad_norm": 3.23720383644104, "learning_rate": 2.8886743851695652e-08, "loss": 0.2873, "step": 14057 }, { "epoch": 0.9604427136708342, "grad_norm": 4.242792129516602, "learning_rate": 2.878752783060845e-08, "loss": 0.2548, "step": 14058 }, { "epoch": 0.9605110336817654, "grad_norm": 3.6141748428344727, "learning_rate": 2.8688481785588638e-08, "loss": 0.2113, "step": 14059 }, { "epoch": 0.9605793536926965, "grad_norm": 3.395597219467163, "learning_rate": 2.8589605721485667e-08, "loss": 0.2654, "step": 14060 }, { "epoch": 0.9606476737036278, "grad_norm": 3.1103382110595703, "learning_rate": 2.8490899643141997e-08, "loss": 0.2702, "step": 14061 }, { "epoch": 0.960715993714559, "grad_norm": 5.160185813903809, "learning_rate": 2.8392363555390375e-08, "loss": 0.1985, "step": 14062 }, { "epoch": 0.9607843137254902, "grad_norm": 3.6886212825775146, "learning_rate": 2.8293997463055774e-08, "loss": 0.2195, "step": 14063 }, { "epoch": 0.9608526337364214, "grad_norm": 4.3833818435668945, "learning_rate": 2.8195801370955786e-08, "loss": 0.2891, "step": 14064 }, { "epoch": 0.9609209537473526, "grad_norm": 3.0424182415008545, "learning_rate": 2.8097775283897898e-08, "loss": 0.144, "step": 14065 }, { "epoch": 0.9609892737582838, "grad_norm": 6.802420616149902, "learning_rate": 2.79999192066826e-08, "loss": 0.2584, "step": 14066 }, { "epoch": 0.961057593769215, "grad_norm": 4.099384784698486, "learning_rate": 2.790223314410184e-08, "loss": 0.2131, "step": 14067 }, { "epoch": 0.9611259137801462, "grad_norm": 5.117990970611572, "learning_rate": 2.7804717100938235e-08, "loss": 0.2066, "step": 14068 }, { "epoch": 0.9611942337910774, "grad_norm": 3.7441558837890625, "learning_rate": 2.7707371081967802e-08, "loss": 0.2069, "step": 14069 }, { "epoch": 0.9612625538020086, "grad_norm": 4.534829139709473, "learning_rate": 2.7610195091956835e-08, "loss": 0.2792, "step": 14070 }, { "epoch": 0.9613308738129398, "grad_norm": 6.787880897521973, "learning_rate": 2.751318913566386e-08, "loss": 0.3421, "step": 14071 }, { "epoch": 0.961399193823871, "grad_norm": 3.4689154624938965, "learning_rate": 2.741635321783925e-08, "loss": 0.2163, "step": 14072 }, { "epoch": 0.9614675138348022, "grad_norm": 3.9571781158447266, "learning_rate": 2.731968734322443e-08, "loss": 0.2299, "step": 14073 }, { "epoch": 0.9615358338457334, "grad_norm": 4.341765403747559, "learning_rate": 2.722319151655267e-08, "loss": 0.2842, "step": 14074 }, { "epoch": 0.9616041538566646, "grad_norm": 4.268698692321777, "learning_rate": 2.7126865742549854e-08, "loss": 0.3527, "step": 14075 }, { "epoch": 0.9616724738675958, "grad_norm": 5.24461555480957, "learning_rate": 2.7030710025932548e-08, "loss": 0.2988, "step": 14076 }, { "epoch": 0.9617407938785271, "grad_norm": 4.083198547363281, "learning_rate": 2.6934724371409145e-08, "loss": 0.2026, "step": 14077 }, { "epoch": 0.9618091138894582, "grad_norm": 4.462440013885498, "learning_rate": 2.6838908783679505e-08, "loss": 0.2383, "step": 14078 }, { "epoch": 0.9618774339003894, "grad_norm": 3.8408026695251465, "learning_rate": 2.674326326743609e-08, "loss": 0.2133, "step": 14079 }, { "epoch": 0.9619457539113206, "grad_norm": 4.223066329956055, "learning_rate": 2.6647787827361656e-08, "loss": 0.3331, "step": 14080 }, { "epoch": 0.9620140739222518, "grad_norm": 4.085244655609131, "learning_rate": 2.6552482468131965e-08, "loss": 0.3117, "step": 14081 }, { "epoch": 0.9620823939331831, "grad_norm": 4.690220355987549, "learning_rate": 2.645734719441345e-08, "loss": 0.2985, "step": 14082 }, { "epoch": 0.9621507139441142, "grad_norm": 3.1069302558898926, "learning_rate": 2.636238201086516e-08, "loss": 0.1957, "step": 14083 }, { "epoch": 0.9622190339550454, "grad_norm": 2.3654026985168457, "learning_rate": 2.626758692213721e-08, "loss": 0.1842, "step": 14084 }, { "epoch": 0.9622873539659766, "grad_norm": 3.77986216545105, "learning_rate": 2.6172961932870776e-08, "loss": 0.2556, "step": 14085 }, { "epoch": 0.9623556739769078, "grad_norm": 3.7038633823394775, "learning_rate": 2.6078507047700038e-08, "loss": 0.2329, "step": 14086 }, { "epoch": 0.9624239939878391, "grad_norm": 3.262066602706909, "learning_rate": 2.5984222271250245e-08, "loss": 0.2426, "step": 14087 }, { "epoch": 0.9624923139987702, "grad_norm": 3.5373356342315674, "learning_rate": 2.58901076081377e-08, "loss": 0.1912, "step": 14088 }, { "epoch": 0.9625606340097015, "grad_norm": 3.4365744590759277, "learning_rate": 2.579616306297133e-08, "loss": 0.1326, "step": 14089 }, { "epoch": 0.9626289540206326, "grad_norm": 4.082604885101318, "learning_rate": 2.5702388640351514e-08, "loss": 0.2075, "step": 14090 }, { "epoch": 0.9626972740315638, "grad_norm": 3.8037168979644775, "learning_rate": 2.5608784344870072e-08, "loss": 0.2907, "step": 14091 }, { "epoch": 0.9627655940424951, "grad_norm": 4.002998352050781, "learning_rate": 2.5515350181110285e-08, "loss": 0.1762, "step": 14092 }, { "epoch": 0.9628339140534262, "grad_norm": 4.431595802307129, "learning_rate": 2.5422086153647272e-08, "loss": 0.2805, "step": 14093 }, { "epoch": 0.9629022340643575, "grad_norm": 4.038757801055908, "learning_rate": 2.5328992267048377e-08, "loss": 0.2619, "step": 14094 }, { "epoch": 0.9629705540752886, "grad_norm": 3.7738168239593506, "learning_rate": 2.523606852587201e-08, "loss": 0.25, "step": 14095 }, { "epoch": 0.9630388740862199, "grad_norm": 3.7568962574005127, "learning_rate": 2.5143314934668425e-08, "loss": 0.2642, "step": 14096 }, { "epoch": 0.9631071940971511, "grad_norm": 4.142624855041504, "learning_rate": 2.5050731497979316e-08, "loss": 0.2623, "step": 14097 }, { "epoch": 0.9631755141080822, "grad_norm": 4.014171600341797, "learning_rate": 2.4958318220338614e-08, "loss": 0.2309, "step": 14098 }, { "epoch": 0.9632438341190135, "grad_norm": 2.040451765060425, "learning_rate": 2.4866075106270924e-08, "loss": 0.1459, "step": 14099 }, { "epoch": 0.9633121541299446, "grad_norm": 3.8484303951263428, "learning_rate": 2.477400216029385e-08, "loss": 0.3075, "step": 14100 }, { "epoch": 0.9633804741408759, "grad_norm": 5.233817100524902, "learning_rate": 2.468209938691529e-08, "loss": 0.2601, "step": 14101 }, { "epoch": 0.9634487941518071, "grad_norm": 3.8925821781158447, "learning_rate": 2.4590366790636142e-08, "loss": 0.3489, "step": 14102 }, { "epoch": 0.9635171141627382, "grad_norm": 4.023733139038086, "learning_rate": 2.449880437594798e-08, "loss": 0.2864, "step": 14103 }, { "epoch": 0.9635854341736695, "grad_norm": 8.05605697631836, "learning_rate": 2.4407412147334605e-08, "loss": 0.2682, "step": 14104 }, { "epoch": 0.9636537541846006, "grad_norm": 4.920704364776611, "learning_rate": 2.4316190109270885e-08, "loss": 0.3478, "step": 14105 }, { "epoch": 0.9637220741955319, "grad_norm": 3.6852025985717773, "learning_rate": 2.4225138266223522e-08, "loss": 0.2235, "step": 14106 }, { "epoch": 0.9637903942064631, "grad_norm": 4.894929885864258, "learning_rate": 2.413425662265184e-08, "loss": 0.2637, "step": 14107 }, { "epoch": 0.9638587142173943, "grad_norm": 9.569369316101074, "learning_rate": 2.4043545183005443e-08, "loss": 0.2132, "step": 14108 }, { "epoch": 0.9639270342283255, "grad_norm": 3.0246055126190186, "learning_rate": 2.3953003951726557e-08, "loss": 0.2236, "step": 14109 }, { "epoch": 0.9639953542392566, "grad_norm": 4.588312149047852, "learning_rate": 2.3862632933248863e-08, "loss": 0.224, "step": 14110 }, { "epoch": 0.9640636742501879, "grad_norm": 4.897657871246338, "learning_rate": 2.377243213199709e-08, "loss": 0.2517, "step": 14111 }, { "epoch": 0.9641319942611191, "grad_norm": 5.351536750793457, "learning_rate": 2.3682401552388598e-08, "loss": 0.343, "step": 14112 }, { "epoch": 0.9642003142720503, "grad_norm": 3.170441150665283, "learning_rate": 2.3592541198831416e-08, "loss": 0.2257, "step": 14113 }, { "epoch": 0.9642686342829815, "grad_norm": 4.265718936920166, "learning_rate": 2.3502851075726583e-08, "loss": 0.2831, "step": 14114 }, { "epoch": 0.9643369542939126, "grad_norm": 2.5755722522735596, "learning_rate": 2.3413331187465026e-08, "loss": 0.2009, "step": 14115 }, { "epoch": 0.9644052743048439, "grad_norm": 5.11683988571167, "learning_rate": 2.332398153843107e-08, "loss": 0.2531, "step": 14116 }, { "epoch": 0.9644735943157751, "grad_norm": 4.19166374206543, "learning_rate": 2.323480213300011e-08, "loss": 0.2904, "step": 14117 }, { "epoch": 0.9645419143267063, "grad_norm": 3.362813711166382, "learning_rate": 2.3145792975537815e-08, "loss": 0.1985, "step": 14118 }, { "epoch": 0.9646102343376375, "grad_norm": 4.9162492752075195, "learning_rate": 2.3056954070404033e-08, "loss": 0.2502, "step": 14119 }, { "epoch": 0.9646785543485688, "grad_norm": 3.0378105640411377, "learning_rate": 2.296828542194812e-08, "loss": 0.1535, "step": 14120 }, { "epoch": 0.9647468743594999, "grad_norm": 3.427698850631714, "learning_rate": 2.2879787034512045e-08, "loss": 0.2458, "step": 14121 }, { "epoch": 0.9648151943704311, "grad_norm": 3.6354777812957764, "learning_rate": 2.2791458912430008e-08, "loss": 0.2595, "step": 14122 }, { "epoch": 0.9648835143813623, "grad_norm": 4.364974021911621, "learning_rate": 2.2703301060026493e-08, "loss": 0.2951, "step": 14123 }, { "epoch": 0.9649518343922935, "grad_norm": 4.789905548095703, "learning_rate": 2.2615313481618604e-08, "loss": 0.2242, "step": 14124 }, { "epoch": 0.9650201544032248, "grad_norm": 3.591538667678833, "learning_rate": 2.2527496181514895e-08, "loss": 0.3129, "step": 14125 }, { "epoch": 0.9650884744141559, "grad_norm": 3.4519448280334473, "learning_rate": 2.243984916401537e-08, "loss": 0.2264, "step": 14126 }, { "epoch": 0.9651567944250871, "grad_norm": 4.32307243347168, "learning_rate": 2.235237243341187e-08, "loss": 0.1977, "step": 14127 }, { "epoch": 0.9652251144360183, "grad_norm": 4.754485607147217, "learning_rate": 2.2265065993988474e-08, "loss": 0.2753, "step": 14128 }, { "epoch": 0.9652934344469495, "grad_norm": 3.059695243835449, "learning_rate": 2.2177929850019538e-08, "loss": 0.1828, "step": 14129 }, { "epoch": 0.9653617544578807, "grad_norm": 4.174712181091309, "learning_rate": 2.2090964005772428e-08, "loss": 0.3449, "step": 14130 }, { "epoch": 0.9654300744688119, "grad_norm": 4.2718305587768555, "learning_rate": 2.2004168465505568e-08, "loss": 0.2692, "step": 14131 }, { "epoch": 0.9654983944797432, "grad_norm": 4.1646552085876465, "learning_rate": 2.191754323346845e-08, "loss": 0.3069, "step": 14132 }, { "epoch": 0.9655667144906743, "grad_norm": 3.9808340072631836, "learning_rate": 2.1831088313903955e-08, "loss": 0.2253, "step": 14133 }, { "epoch": 0.9656350345016055, "grad_norm": 2.9290125370025635, "learning_rate": 2.174480371104487e-08, "loss": 0.1716, "step": 14134 }, { "epoch": 0.9657033545125367, "grad_norm": 3.805628776550293, "learning_rate": 2.1658689429116585e-08, "loss": 0.1884, "step": 14135 }, { "epoch": 0.9657716745234679, "grad_norm": 4.495532035827637, "learning_rate": 2.1572745472335564e-08, "loss": 0.349, "step": 14136 }, { "epoch": 0.9658399945343992, "grad_norm": 4.233419418334961, "learning_rate": 2.1486971844910498e-08, "loss": 0.1731, "step": 14137 }, { "epoch": 0.9659083145453303, "grad_norm": 3.231753349304199, "learning_rate": 2.140136855104152e-08, "loss": 0.138, "step": 14138 }, { "epoch": 0.9659766345562615, "grad_norm": 3.573333978652954, "learning_rate": 2.131593559492023e-08, "loss": 0.2734, "step": 14139 }, { "epoch": 0.9660449545671927, "grad_norm": 4.452225208282471, "learning_rate": 2.1230672980730445e-08, "loss": 0.2813, "step": 14140 }, { "epoch": 0.9661132745781239, "grad_norm": 5.246598243713379, "learning_rate": 2.1145580712646662e-08, "loss": 0.2622, "step": 14141 }, { "epoch": 0.9661815945890552, "grad_norm": 3.450937032699585, "learning_rate": 2.1060658794835984e-08, "loss": 0.1735, "step": 14142 }, { "epoch": 0.9662499145999863, "grad_norm": 3.8417422771453857, "learning_rate": 2.097590723145698e-08, "loss": 0.1833, "step": 14143 }, { "epoch": 0.9663182346109176, "grad_norm": 3.880258560180664, "learning_rate": 2.0891326026659273e-08, "loss": 0.2978, "step": 14144 }, { "epoch": 0.9663865546218487, "grad_norm": 4.997775554656982, "learning_rate": 2.0806915184585107e-08, "loss": 0.2501, "step": 14145 }, { "epoch": 0.9664548746327799, "grad_norm": 6.242799282073975, "learning_rate": 2.0722674709367007e-08, "loss": 0.2976, "step": 14146 }, { "epoch": 0.9665231946437112, "grad_norm": 3.5742666721343994, "learning_rate": 2.0638604605130896e-08, "loss": 0.2555, "step": 14147 }, { "epoch": 0.9665915146546423, "grad_norm": 5.4524126052856445, "learning_rate": 2.055470487599298e-08, "loss": 0.277, "step": 14148 }, { "epoch": 0.9666598346655736, "grad_norm": 3.958933115005493, "learning_rate": 2.0470975526061697e-08, "loss": 0.2648, "step": 14149 }, { "epoch": 0.9667281546765047, "grad_norm": 5.228418350219727, "learning_rate": 2.0387416559437322e-08, "loss": 0.3644, "step": 14150 }, { "epoch": 0.9667964746874359, "grad_norm": 3.6309738159179688, "learning_rate": 2.0304027980211193e-08, "loss": 0.2578, "step": 14151 }, { "epoch": 0.9668647946983672, "grad_norm": 3.6993520259857178, "learning_rate": 2.0220809792466487e-08, "loss": 0.2582, "step": 14152 }, { "epoch": 0.9669331147092983, "grad_norm": 3.431598424911499, "learning_rate": 2.0137762000278613e-08, "loss": 0.1791, "step": 14153 }, { "epoch": 0.9670014347202296, "grad_norm": 3.939497709274292, "learning_rate": 2.005488460771404e-08, "loss": 0.1646, "step": 14154 }, { "epoch": 0.9670697547311607, "grad_norm": 4.334873199462891, "learning_rate": 1.997217761883069e-08, "loss": 0.1713, "step": 14155 }, { "epoch": 0.967138074742092, "grad_norm": 2.346615791320801, "learning_rate": 1.9889641037679095e-08, "loss": 0.1346, "step": 14156 }, { "epoch": 0.9672063947530232, "grad_norm": 4.0420918464660645, "learning_rate": 1.980727486830086e-08, "loss": 0.3382, "step": 14157 }, { "epoch": 0.9672747147639543, "grad_norm": 3.2179014682769775, "learning_rate": 1.972507911472865e-08, "loss": 0.114, "step": 14158 }, { "epoch": 0.9673430347748856, "grad_norm": 4.660645484924316, "learning_rate": 1.9643053780987353e-08, "loss": 0.2404, "step": 14159 }, { "epoch": 0.9674113547858167, "grad_norm": 4.838193893432617, "learning_rate": 1.956119887109409e-08, "loss": 0.2458, "step": 14160 }, { "epoch": 0.967479674796748, "grad_norm": 5.118243217468262, "learning_rate": 1.947951438905704e-08, "loss": 0.2352, "step": 14161 }, { "epoch": 0.9675479948076792, "grad_norm": 2.3350131511688232, "learning_rate": 1.9398000338875842e-08, "loss": 0.1191, "step": 14162 }, { "epoch": 0.9676163148186103, "grad_norm": 4.406408309936523, "learning_rate": 1.931665672454197e-08, "loss": 0.3399, "step": 14163 }, { "epoch": 0.9676846348295416, "grad_norm": 3.544903039932251, "learning_rate": 1.9235483550038733e-08, "loss": 0.1604, "step": 14164 }, { "epoch": 0.9677529548404727, "grad_norm": 3.146453619003296, "learning_rate": 1.915448081934129e-08, "loss": 0.1998, "step": 14165 }, { "epoch": 0.967821274851404, "grad_norm": 4.234166145324707, "learning_rate": 1.9073648536415076e-08, "loss": 0.2817, "step": 14166 }, { "epoch": 0.9678895948623352, "grad_norm": 3.3085715770721436, "learning_rate": 1.8992986705219315e-08, "loss": 0.2398, "step": 14167 }, { "epoch": 0.9679579148732664, "grad_norm": 4.00118350982666, "learning_rate": 1.8912495329703515e-08, "loss": 0.2666, "step": 14168 }, { "epoch": 0.9680262348841976, "grad_norm": 4.664091110229492, "learning_rate": 1.883217441380902e-08, "loss": 0.251, "step": 14169 }, { "epoch": 0.9680945548951287, "grad_norm": 4.449556827545166, "learning_rate": 1.8752023961469023e-08, "loss": 0.2404, "step": 14170 }, { "epoch": 0.96816287490606, "grad_norm": 4.000760078430176, "learning_rate": 1.8672043976607767e-08, "loss": 0.2707, "step": 14171 }, { "epoch": 0.9682311949169912, "grad_norm": 3.602266311645508, "learning_rate": 1.859223446314251e-08, "loss": 0.2701, "step": 14172 }, { "epoch": 0.9682995149279224, "grad_norm": 3.572298526763916, "learning_rate": 1.8512595424980403e-08, "loss": 0.2061, "step": 14173 }, { "epoch": 0.9683678349388536, "grad_norm": 3.6001017093658447, "learning_rate": 1.8433126866021997e-08, "loss": 0.2454, "step": 14174 }, { "epoch": 0.9684361549497847, "grad_norm": 4.07478141784668, "learning_rate": 1.835382879015851e-08, "loss": 0.1996, "step": 14175 }, { "epoch": 0.968504474960716, "grad_norm": 3.873190402984619, "learning_rate": 1.8274701201272224e-08, "loss": 0.223, "step": 14176 }, { "epoch": 0.9685727949716472, "grad_norm": 4.586373805999756, "learning_rate": 1.8195744103238818e-08, "loss": 0.1949, "step": 14177 }, { "epoch": 0.9686411149825784, "grad_norm": 3.6943013668060303, "learning_rate": 1.8116957499924257e-08, "loss": 0.2456, "step": 14178 }, { "epoch": 0.9687094349935096, "grad_norm": 3.1989619731903076, "learning_rate": 1.8038341395185953e-08, "loss": 0.2719, "step": 14179 }, { "epoch": 0.9687777550044409, "grad_norm": 3.5825295448303223, "learning_rate": 1.7959895792873935e-08, "loss": 0.2194, "step": 14180 }, { "epoch": 0.968846075015372, "grad_norm": 5.228213787078857, "learning_rate": 1.7881620696829692e-08, "loss": 0.1883, "step": 14181 }, { "epoch": 0.9689143950263032, "grad_norm": 6.246530532836914, "learning_rate": 1.780351611088615e-08, "loss": 0.2864, "step": 14182 }, { "epoch": 0.9689827150372344, "grad_norm": 5.085816383361816, "learning_rate": 1.7725582038867316e-08, "loss": 0.3832, "step": 14183 }, { "epoch": 0.9690510350481656, "grad_norm": 4.350870132446289, "learning_rate": 1.7647818484589794e-08, "loss": 0.1482, "step": 14184 }, { "epoch": 0.9691193550590969, "grad_norm": 4.384940147399902, "learning_rate": 1.7570225451861654e-08, "loss": 0.2708, "step": 14185 }, { "epoch": 0.969187675070028, "grad_norm": 3.4506449699401855, "learning_rate": 1.749280294448241e-08, "loss": 0.1854, "step": 14186 }, { "epoch": 0.9692559950809592, "grad_norm": 4.728800296783447, "learning_rate": 1.7415550966242644e-08, "loss": 0.3082, "step": 14187 }, { "epoch": 0.9693243150918904, "grad_norm": 4.672534942626953, "learning_rate": 1.7338469520925938e-08, "loss": 0.2316, "step": 14188 }, { "epoch": 0.9693926351028216, "grad_norm": 4.389451503753662, "learning_rate": 1.726155861230616e-08, "loss": 0.2046, "step": 14189 }, { "epoch": 0.9694609551137529, "grad_norm": 3.5254104137420654, "learning_rate": 1.7184818244149802e-08, "loss": 0.261, "step": 14190 }, { "epoch": 0.969529275124684, "grad_norm": 3.8894121646881104, "learning_rate": 1.71082484202148e-08, "loss": 0.2207, "step": 14191 }, { "epoch": 0.9695975951356153, "grad_norm": 5.081105709075928, "learning_rate": 1.7031849144249765e-08, "loss": 0.2431, "step": 14192 }, { "epoch": 0.9696659151465464, "grad_norm": 3.865180015563965, "learning_rate": 1.6955620419996318e-08, "loss": 0.3126, "step": 14193 }, { "epoch": 0.9697342351574776, "grad_norm": 4.797886371612549, "learning_rate": 1.687956225118753e-08, "loss": 0.2638, "step": 14194 }, { "epoch": 0.9698025551684089, "grad_norm": 4.259920120239258, "learning_rate": 1.6803674641547527e-08, "loss": 0.2757, "step": 14195 }, { "epoch": 0.96987087517934, "grad_norm": 4.078639030456543, "learning_rate": 1.6727957594791898e-08, "loss": 0.2426, "step": 14196 }, { "epoch": 0.9699391951902713, "grad_norm": 3.6335158348083496, "learning_rate": 1.665241111462845e-08, "loss": 0.2144, "step": 14197 }, { "epoch": 0.9700075152012024, "grad_norm": 4.150928497314453, "learning_rate": 1.657703520475684e-08, "loss": 0.2199, "step": 14198 }, { "epoch": 0.9700758352121336, "grad_norm": 4.785776138305664, "learning_rate": 1.650182986886778e-08, "loss": 0.2313, "step": 14199 }, { "epoch": 0.9701441552230649, "grad_norm": 3.963408946990967, "learning_rate": 1.642679511064421e-08, "loss": 0.2835, "step": 14200 }, { "epoch": 0.970212475233996, "grad_norm": 4.694112300872803, "learning_rate": 1.635193093375975e-08, "loss": 0.1891, "step": 14201 }, { "epoch": 0.9702807952449273, "grad_norm": 4.352455139160156, "learning_rate": 1.6277237341880636e-08, "loss": 0.304, "step": 14202 }, { "epoch": 0.9703491152558584, "grad_norm": 5.693536758422852, "learning_rate": 1.620271433866455e-08, "loss": 0.2315, "step": 14203 }, { "epoch": 0.9704174352667897, "grad_norm": 3.7352700233459473, "learning_rate": 1.6128361927760626e-08, "loss": 0.1841, "step": 14204 }, { "epoch": 0.9704857552777209, "grad_norm": 3.3144326210021973, "learning_rate": 1.6054180112809847e-08, "loss": 0.2285, "step": 14205 }, { "epoch": 0.970554075288652, "grad_norm": 5.11912202835083, "learning_rate": 1.5980168897444248e-08, "loss": 0.2944, "step": 14206 }, { "epoch": 0.9706223952995833, "grad_norm": 4.62114953994751, "learning_rate": 1.5906328285288097e-08, "loss": 0.3178, "step": 14207 }, { "epoch": 0.9706907153105144, "grad_norm": 3.463897228240967, "learning_rate": 1.5832658279957502e-08, "loss": 0.2099, "step": 14208 }, { "epoch": 0.9707590353214457, "grad_norm": 4.637325286865234, "learning_rate": 1.5759158885059244e-08, "loss": 0.2508, "step": 14209 }, { "epoch": 0.9708273553323769, "grad_norm": 4.186702728271484, "learning_rate": 1.5685830104193497e-08, "loss": 0.3134, "step": 14210 }, { "epoch": 0.970895675343308, "grad_norm": 2.46673846244812, "learning_rate": 1.5612671940949952e-08, "loss": 0.2215, "step": 14211 }, { "epoch": 0.9709639953542393, "grad_norm": 5.80738639831543, "learning_rate": 1.5539684398911292e-08, "loss": 0.2459, "step": 14212 }, { "epoch": 0.9710323153651704, "grad_norm": 3.1875014305114746, "learning_rate": 1.546686748165127e-08, "loss": 0.2224, "step": 14213 }, { "epoch": 0.9711006353761017, "grad_norm": 5.468564033508301, "learning_rate": 1.5394221192736256e-08, "loss": 0.377, "step": 14214 }, { "epoch": 0.9711689553870329, "grad_norm": 3.934535264968872, "learning_rate": 1.532174553572291e-08, "loss": 0.2666, "step": 14215 }, { "epoch": 0.9712372753979641, "grad_norm": 3.893704414367676, "learning_rate": 1.5249440514160495e-08, "loss": 0.2564, "step": 14216 }, { "epoch": 0.9713055954088953, "grad_norm": 3.0334925651550293, "learning_rate": 1.5177306131589353e-08, "loss": 0.2147, "step": 14217 }, { "epoch": 0.9713739154198264, "grad_norm": 4.5037360191345215, "learning_rate": 1.5105342391542044e-08, "loss": 0.2762, "step": 14218 }, { "epoch": 0.9714422354307577, "grad_norm": 4.704814910888672, "learning_rate": 1.5033549297542193e-08, "loss": 0.2345, "step": 14219 }, { "epoch": 0.9715105554416889, "grad_norm": 4.3409528732299805, "learning_rate": 1.496192685310527e-08, "loss": 0.2595, "step": 14220 }, { "epoch": 0.9715788754526201, "grad_norm": 4.042525768280029, "learning_rate": 1.4890475061738573e-08, "loss": 0.243, "step": 14221 }, { "epoch": 0.9716471954635513, "grad_norm": 4.788126468658447, "learning_rate": 1.4819193926940865e-08, "loss": 0.3966, "step": 14222 }, { "epoch": 0.9717155154744824, "grad_norm": 4.010177135467529, "learning_rate": 1.4748083452202354e-08, "loss": 0.2959, "step": 14223 }, { "epoch": 0.9717838354854137, "grad_norm": 4.226286888122559, "learning_rate": 1.4677143641005475e-08, "loss": 0.3346, "step": 14224 }, { "epoch": 0.9718521554963448, "grad_norm": 5.121162414550781, "learning_rate": 1.4606374496824115e-08, "loss": 0.3065, "step": 14225 }, { "epoch": 0.9719204755072761, "grad_norm": 3.41018009185791, "learning_rate": 1.4535776023122836e-08, "loss": 0.1969, "step": 14226 }, { "epoch": 0.9719887955182073, "grad_norm": 6.414452075958252, "learning_rate": 1.4465348223359598e-08, "loss": 0.1857, "step": 14227 }, { "epoch": 0.9720571155291385, "grad_norm": 3.9486143589019775, "learning_rate": 1.4395091100982638e-08, "loss": 0.197, "step": 14228 }, { "epoch": 0.9721254355400697, "grad_norm": 3.49099063873291, "learning_rate": 1.4325004659432428e-08, "loss": 0.2101, "step": 14229 }, { "epoch": 0.9721937555510008, "grad_norm": 3.599532127380371, "learning_rate": 1.42550889021405e-08, "loss": 0.2687, "step": 14230 }, { "epoch": 0.9722620755619321, "grad_norm": 3.2160589694976807, "learning_rate": 1.4185343832531005e-08, "loss": 0.251, "step": 14231 }, { "epoch": 0.9723303955728633, "grad_norm": 3.3820981979370117, "learning_rate": 1.4115769454018767e-08, "loss": 0.2333, "step": 14232 }, { "epoch": 0.9723987155837945, "grad_norm": 4.071512222290039, "learning_rate": 1.4046365770010449e-08, "loss": 0.3096, "step": 14233 }, { "epoch": 0.9724670355947257, "grad_norm": 4.193127632141113, "learning_rate": 1.3977132783905333e-08, "loss": 0.2609, "step": 14234 }, { "epoch": 0.9725353556056568, "grad_norm": 3.817110300064087, "learning_rate": 1.3908070499092984e-08, "loss": 0.2486, "step": 14235 }, { "epoch": 0.9726036756165881, "grad_norm": 5.392463207244873, "learning_rate": 1.3839178918955586e-08, "loss": 0.295, "step": 14236 }, { "epoch": 0.9726719956275193, "grad_norm": 3.50441837310791, "learning_rate": 1.3770458046865996e-08, "loss": 0.1753, "step": 14237 }, { "epoch": 0.9727403156384505, "grad_norm": 3.1849093437194824, "learning_rate": 1.3701907886190079e-08, "loss": 0.183, "step": 14238 }, { "epoch": 0.9728086356493817, "grad_norm": 4.61875057220459, "learning_rate": 1.3633528440283593e-08, "loss": 0.2309, "step": 14239 }, { "epoch": 0.972876955660313, "grad_norm": 5.394072532653809, "learning_rate": 1.3565319712495693e-08, "loss": 0.3708, "step": 14240 }, { "epoch": 0.9729452756712441, "grad_norm": 4.107297420501709, "learning_rate": 1.3497281706166208e-08, "loss": 0.2825, "step": 14241 }, { "epoch": 0.9730135956821753, "grad_norm": 3.65873122215271, "learning_rate": 1.3429414424626807e-08, "loss": 0.3247, "step": 14242 }, { "epoch": 0.9730819156931065, "grad_norm": 7.623343467712402, "learning_rate": 1.3361717871200218e-08, "loss": 0.3727, "step": 14243 }, { "epoch": 0.9731502357040377, "grad_norm": 4.933572292327881, "learning_rate": 1.329419204920218e-08, "loss": 0.2218, "step": 14244 }, { "epoch": 0.973218555714969, "grad_norm": 4.3999247550964355, "learning_rate": 1.3226836961938714e-08, "loss": 0.2401, "step": 14245 }, { "epoch": 0.9732868757259001, "grad_norm": 5.228680610656738, "learning_rate": 1.3159652612708072e-08, "loss": 0.2744, "step": 14246 }, { "epoch": 0.9733551957368313, "grad_norm": 3.899890422821045, "learning_rate": 1.3092639004800343e-08, "loss": 0.2871, "step": 14247 }, { "epoch": 0.9734235157477625, "grad_norm": 3.5053751468658447, "learning_rate": 1.3025796141496682e-08, "loss": 0.2598, "step": 14248 }, { "epoch": 0.9734918357586937, "grad_norm": 3.765825033187866, "learning_rate": 1.2959124026070467e-08, "loss": 0.3458, "step": 14249 }, { "epoch": 0.973560155769625, "grad_norm": 2.907604932785034, "learning_rate": 1.2892622661786534e-08, "loss": 0.2067, "step": 14250 }, { "epoch": 0.9736284757805561, "grad_norm": 3.793843984603882, "learning_rate": 1.2826292051901167e-08, "loss": 0.2139, "step": 14251 }, { "epoch": 0.9736967957914874, "grad_norm": 3.097189426422119, "learning_rate": 1.2760132199662099e-08, "loss": 0.2476, "step": 14252 }, { "epoch": 0.9737651158024185, "grad_norm": 5.305685043334961, "learning_rate": 1.2694143108309298e-08, "loss": 0.2166, "step": 14253 }, { "epoch": 0.9738334358133497, "grad_norm": 6.649468898773193, "learning_rate": 1.2628324781073785e-08, "loss": 0.3542, "step": 14254 }, { "epoch": 0.973901755824281, "grad_norm": 4.933762073516846, "learning_rate": 1.2562677221179208e-08, "loss": 0.2085, "step": 14255 }, { "epoch": 0.9739700758352121, "grad_norm": 4.272726535797119, "learning_rate": 1.2497200431839494e-08, "loss": 0.2213, "step": 14256 }, { "epoch": 0.9740383958461434, "grad_norm": 5.9509758949279785, "learning_rate": 1.243189441626119e-08, "loss": 0.2159, "step": 14257 }, { "epoch": 0.9741067158570745, "grad_norm": 3.0457866191864014, "learning_rate": 1.2366759177641905e-08, "loss": 0.1925, "step": 14258 }, { "epoch": 0.9741750358680057, "grad_norm": 4.41744327545166, "learning_rate": 1.2301794719171088e-08, "loss": 0.2097, "step": 14259 }, { "epoch": 0.974243355878937, "grad_norm": 3.880017042160034, "learning_rate": 1.2237001044030415e-08, "loss": 0.2416, "step": 14260 }, { "epoch": 0.9743116758898681, "grad_norm": 5.062867164611816, "learning_rate": 1.217237815539185e-08, "loss": 0.2637, "step": 14261 }, { "epoch": 0.9743799959007994, "grad_norm": 3.6948390007019043, "learning_rate": 1.2107926056420748e-08, "loss": 0.2914, "step": 14262 }, { "epoch": 0.9744483159117305, "grad_norm": 3.5367491245269775, "learning_rate": 1.2043644750272364e-08, "loss": 0.2614, "step": 14263 }, { "epoch": 0.9745166359226618, "grad_norm": 4.085489273071289, "learning_rate": 1.1979534240094957e-08, "loss": 0.2813, "step": 14264 }, { "epoch": 0.974584955933593, "grad_norm": 3.9191737174987793, "learning_rate": 1.1915594529027462e-08, "loss": 0.1959, "step": 14265 }, { "epoch": 0.9746532759445241, "grad_norm": 7.224788188934326, "learning_rate": 1.185182562020065e-08, "loss": 0.3159, "step": 14266 }, { "epoch": 0.9747215959554554, "grad_norm": 5.706302642822266, "learning_rate": 1.1788227516737914e-08, "loss": 0.2529, "step": 14267 }, { "epoch": 0.9747899159663865, "grad_norm": 5.730257034301758, "learning_rate": 1.172480022175254e-08, "loss": 0.186, "step": 14268 }, { "epoch": 0.9748582359773178, "grad_norm": 4.618454933166504, "learning_rate": 1.1661543738351209e-08, "loss": 0.2437, "step": 14269 }, { "epoch": 0.974926555988249, "grad_norm": 3.7043778896331787, "learning_rate": 1.159845806963089e-08, "loss": 0.2511, "step": 14270 }, { "epoch": 0.9749948759991801, "grad_norm": 3.116576671600342, "learning_rate": 1.1535543218680777e-08, "loss": 0.236, "step": 14271 }, { "epoch": 0.9750631960101114, "grad_norm": 3.94490385055542, "learning_rate": 1.1472799188581905e-08, "loss": 0.1883, "step": 14272 }, { "epoch": 0.9751315160210425, "grad_norm": 3.417978286743164, "learning_rate": 1.1410225982406764e-08, "loss": 0.2429, "step": 14273 }, { "epoch": 0.9751998360319738, "grad_norm": 4.606127738952637, "learning_rate": 1.1347823603218899e-08, "loss": 0.3114, "step": 14274 }, { "epoch": 0.975268156042905, "grad_norm": 3.9986050128936768, "learning_rate": 1.1285592054074477e-08, "loss": 0.2223, "step": 14275 }, { "epoch": 0.9753364760538362, "grad_norm": 3.2765886783599854, "learning_rate": 1.1223531338020342e-08, "loss": 0.251, "step": 14276 }, { "epoch": 0.9754047960647674, "grad_norm": 5.248148441314697, "learning_rate": 1.1161641458095562e-08, "loss": 0.2508, "step": 14277 }, { "epoch": 0.9754731160756985, "grad_norm": 4.095773220062256, "learning_rate": 1.1099922417331042e-08, "loss": 0.2158, "step": 14278 }, { "epoch": 0.9755414360866298, "grad_norm": 3.5925166606903076, "learning_rate": 1.103837421874876e-08, "loss": 0.2041, "step": 14279 }, { "epoch": 0.975609756097561, "grad_norm": 3.5288355350494385, "learning_rate": 1.0976996865362909e-08, "loss": 0.2588, "step": 14280 }, { "epoch": 0.9756780761084922, "grad_norm": 3.1394879817962646, "learning_rate": 1.0915790360178368e-08, "loss": 0.213, "step": 14281 }, { "epoch": 0.9757463961194234, "grad_norm": 4.329687118530273, "learning_rate": 1.0854754706192626e-08, "loss": 0.2729, "step": 14282 }, { "epoch": 0.9758147161303545, "grad_norm": 3.66872239112854, "learning_rate": 1.0793889906394627e-08, "loss": 0.2666, "step": 14283 }, { "epoch": 0.9758830361412858, "grad_norm": 3.4714322090148926, "learning_rate": 1.0733195963763986e-08, "loss": 0.2243, "step": 14284 }, { "epoch": 0.975951356152217, "grad_norm": 3.8995251655578613, "learning_rate": 1.0672672881273715e-08, "loss": 0.2154, "step": 14285 }, { "epoch": 0.9760196761631482, "grad_norm": 3.696622610092163, "learning_rate": 1.0612320661886721e-08, "loss": 0.1688, "step": 14286 }, { "epoch": 0.9760879961740794, "grad_norm": 4.686995983123779, "learning_rate": 1.0552139308558916e-08, "loss": 0.2797, "step": 14287 }, { "epoch": 0.9761563161850106, "grad_norm": 4.868488788604736, "learning_rate": 1.0492128824236501e-08, "loss": 0.2061, "step": 14288 }, { "epoch": 0.9762246361959418, "grad_norm": 3.2386810779571533, "learning_rate": 1.0432289211858681e-08, "loss": 0.2013, "step": 14289 }, { "epoch": 0.976292956206873, "grad_norm": 4.144886493682861, "learning_rate": 1.0372620474355332e-08, "loss": 0.2207, "step": 14290 }, { "epoch": 0.9763612762178042, "grad_norm": 3.534396171569824, "learning_rate": 1.0313122614648173e-08, "loss": 0.173, "step": 14291 }, { "epoch": 0.9764295962287354, "grad_norm": 3.7952866554260254, "learning_rate": 1.0253795635651153e-08, "loss": 0.2753, "step": 14292 }, { "epoch": 0.9764979162396666, "grad_norm": 7.021458625793457, "learning_rate": 1.0194639540268501e-08, "loss": 0.3811, "step": 14293 }, { "epoch": 0.9765662362505978, "grad_norm": 3.682140588760376, "learning_rate": 1.0135654331397847e-08, "loss": 0.1801, "step": 14294 }, { "epoch": 0.976634556261529, "grad_norm": 6.346162796020508, "learning_rate": 1.0076840011927101e-08, "loss": 0.359, "step": 14295 }, { "epoch": 0.9767028762724602, "grad_norm": 4.119283676147461, "learning_rate": 1.0018196584736017e-08, "loss": 0.2379, "step": 14296 }, { "epoch": 0.9767711962833914, "grad_norm": 6.16506814956665, "learning_rate": 9.959724052696572e-09, "loss": 0.3191, "step": 14297 }, { "epoch": 0.9768395162943226, "grad_norm": 4.015295505523682, "learning_rate": 9.9014224186722e-09, "loss": 0.1853, "step": 14298 }, { "epoch": 0.9769078363052538, "grad_norm": 2.755805492401123, "learning_rate": 9.843291685517008e-09, "loss": 0.2663, "step": 14299 }, { "epoch": 0.9769761563161851, "grad_norm": 3.8583309650421143, "learning_rate": 9.785331856078105e-09, "loss": 0.2096, "step": 14300 }, { "epoch": 0.9770444763271162, "grad_norm": 4.419081211090088, "learning_rate": 9.727542933193667e-09, "loss": 0.1933, "step": 14301 }, { "epoch": 0.9771127963380474, "grad_norm": 4.096506118774414, "learning_rate": 9.669924919693318e-09, "loss": 0.3279, "step": 14302 }, { "epoch": 0.9771811163489786, "grad_norm": 4.973808765411377, "learning_rate": 9.612477818398524e-09, "loss": 0.1984, "step": 14303 }, { "epoch": 0.9772494363599098, "grad_norm": 4.936200141906738, "learning_rate": 9.555201632121812e-09, "loss": 0.2915, "step": 14304 }, { "epoch": 0.9773177563708411, "grad_norm": 4.513254642486572, "learning_rate": 9.49809636366833e-09, "loss": 0.3324, "step": 14305 }, { "epoch": 0.9773860763817722, "grad_norm": 3.9419100284576416, "learning_rate": 9.441162015834669e-09, "loss": 0.2951, "step": 14306 }, { "epoch": 0.9774543963927034, "grad_norm": 4.143322467803955, "learning_rate": 9.384398591408104e-09, "loss": 0.2663, "step": 14307 }, { "epoch": 0.9775227164036346, "grad_norm": 5.511620044708252, "learning_rate": 9.327806093168522e-09, "loss": 0.3628, "step": 14308 }, { "epoch": 0.9775910364145658, "grad_norm": 3.426213502883911, "learning_rate": 9.27138452388726e-09, "loss": 0.1996, "step": 14309 }, { "epoch": 0.9776593564254971, "grad_norm": 5.065614223480225, "learning_rate": 9.21513388632672e-09, "loss": 0.1785, "step": 14310 }, { "epoch": 0.9777276764364282, "grad_norm": 4.166222095489502, "learning_rate": 9.159054183241922e-09, "loss": 0.3257, "step": 14311 }, { "epoch": 0.9777959964473595, "grad_norm": 4.3545379638671875, "learning_rate": 9.103145417378167e-09, "loss": 0.2427, "step": 14312 }, { "epoch": 0.9778643164582906, "grad_norm": 4.200120449066162, "learning_rate": 9.047407591473765e-09, "loss": 0.2586, "step": 14313 }, { "epoch": 0.9779326364692218, "grad_norm": 3.776823043823242, "learning_rate": 8.991840708257704e-09, "loss": 0.2312, "step": 14314 }, { "epoch": 0.9780009564801531, "grad_norm": 3.822270393371582, "learning_rate": 8.93644477045119e-09, "loss": 0.2422, "step": 14315 }, { "epoch": 0.9780692764910842, "grad_norm": 4.391430854797363, "learning_rate": 8.881219780766892e-09, "loss": 0.3299, "step": 14316 }, { "epoch": 0.9781375965020155, "grad_norm": 4.520754337310791, "learning_rate": 8.82616574190853e-09, "loss": 0.1994, "step": 14317 }, { "epoch": 0.9782059165129466, "grad_norm": 4.572635650634766, "learning_rate": 8.771282656572454e-09, "loss": 0.2113, "step": 14318 }, { "epoch": 0.9782742365238778, "grad_norm": 3.5413880348205566, "learning_rate": 8.716570527446066e-09, "loss": 0.2248, "step": 14319 }, { "epoch": 0.9783425565348091, "grad_norm": 3.3652451038360596, "learning_rate": 8.662029357208223e-09, "loss": 0.2351, "step": 14320 }, { "epoch": 0.9784108765457402, "grad_norm": 4.713329315185547, "learning_rate": 8.60765914853001e-09, "loss": 0.2916, "step": 14321 }, { "epoch": 0.9784791965566715, "grad_norm": 6.50744104385376, "learning_rate": 8.55345990407358e-09, "loss": 0.1541, "step": 14322 }, { "epoch": 0.9785475165676026, "grad_norm": 3.8431079387664795, "learning_rate": 8.499431626492915e-09, "loss": 0.3543, "step": 14323 }, { "epoch": 0.9786158365785339, "grad_norm": 4.247878074645996, "learning_rate": 8.445574318433457e-09, "loss": 0.2395, "step": 14324 }, { "epoch": 0.9786841565894651, "grad_norm": 6.929589748382568, "learning_rate": 8.391887982532874e-09, "loss": 0.3064, "step": 14325 }, { "epoch": 0.9787524766003962, "grad_norm": 3.989539623260498, "learning_rate": 8.338372621419898e-09, "loss": 0.1902, "step": 14326 }, { "epoch": 0.9788207966113275, "grad_norm": 4.138282775878906, "learning_rate": 8.28502823771471e-09, "loss": 0.2275, "step": 14327 }, { "epoch": 0.9788891166222586, "grad_norm": 4.095611572265625, "learning_rate": 8.231854834030105e-09, "loss": 0.2058, "step": 14328 }, { "epoch": 0.9789574366331899, "grad_norm": 3.944206476211548, "learning_rate": 8.178852412969172e-09, "loss": 0.3138, "step": 14329 }, { "epoch": 0.9790257566441211, "grad_norm": 3.737302303314209, "learning_rate": 8.126020977127612e-09, "loss": 0.2699, "step": 14330 }, { "epoch": 0.9790940766550522, "grad_norm": 3.633164405822754, "learning_rate": 8.073360529092577e-09, "loss": 0.2229, "step": 14331 }, { "epoch": 0.9791623966659835, "grad_norm": 5.1989359855651855, "learning_rate": 8.02087107144228e-09, "loss": 0.4509, "step": 14332 }, { "epoch": 0.9792307166769146, "grad_norm": 4.128684043884277, "learning_rate": 7.968552606747559e-09, "loss": 0.2123, "step": 14333 }, { "epoch": 0.9792990366878459, "grad_norm": 5.084318161010742, "learning_rate": 7.916405137569526e-09, "loss": 0.2717, "step": 14334 }, { "epoch": 0.9793673566987771, "grad_norm": 3.2063217163085938, "learning_rate": 7.864428666462698e-09, "loss": 0.2418, "step": 14335 }, { "epoch": 0.9794356767097083, "grad_norm": 4.300771236419678, "learning_rate": 7.812623195971479e-09, "loss": 0.2096, "step": 14336 }, { "epoch": 0.9795039967206395, "grad_norm": 3.790471076965332, "learning_rate": 7.7609887286329e-09, "loss": 0.2999, "step": 14337 }, { "epoch": 0.9795723167315706, "grad_norm": 4.495595932006836, "learning_rate": 7.709525266975047e-09, "loss": 0.3325, "step": 14338 }, { "epoch": 0.9796406367425019, "grad_norm": 4.756397724151611, "learning_rate": 7.658232813518629e-09, "loss": 0.2063, "step": 14339 }, { "epoch": 0.9797089567534331, "grad_norm": 4.188876628875732, "learning_rate": 7.607111370775021e-09, "loss": 0.2922, "step": 14340 }, { "epoch": 0.9797772767643643, "grad_norm": 3.570016384124756, "learning_rate": 7.556160941247058e-09, "loss": 0.279, "step": 14341 }, { "epoch": 0.9798455967752955, "grad_norm": 5.910639762878418, "learning_rate": 7.5053815274298e-09, "loss": 0.2924, "step": 14342 }, { "epoch": 0.9799139167862266, "grad_norm": 3.7741289138793945, "learning_rate": 7.454773131810532e-09, "loss": 0.2374, "step": 14343 }, { "epoch": 0.9799822367971579, "grad_norm": 4.801538944244385, "learning_rate": 7.404335756866443e-09, "loss": 0.3086, "step": 14344 }, { "epoch": 0.9800505568080891, "grad_norm": 3.866795539855957, "learning_rate": 7.35406940506772e-09, "loss": 0.2684, "step": 14345 }, { "epoch": 0.9801188768190203, "grad_norm": 3.4344518184661865, "learning_rate": 7.303974078876008e-09, "loss": 0.2494, "step": 14346 }, { "epoch": 0.9801871968299515, "grad_norm": 3.6469528675079346, "learning_rate": 7.25404978074401e-09, "loss": 0.1491, "step": 14347 }, { "epoch": 0.9802555168408827, "grad_norm": 4.0225510597229, "learning_rate": 7.20429651311627e-09, "loss": 0.2761, "step": 14348 }, { "epoch": 0.9803238368518139, "grad_norm": 4.324227809906006, "learning_rate": 7.154714278429563e-09, "loss": 0.2551, "step": 14349 }, { "epoch": 0.9803921568627451, "grad_norm": 4.255499839782715, "learning_rate": 7.105303079111336e-09, "loss": 0.2899, "step": 14350 }, { "epoch": 0.9804604768736763, "grad_norm": 4.753640174865723, "learning_rate": 7.056062917581263e-09, "loss": 0.2669, "step": 14351 }, { "epoch": 0.9805287968846075, "grad_norm": 2.827785015106201, "learning_rate": 7.00699379625086e-09, "loss": 0.14, "step": 14352 }, { "epoch": 0.9805971168955387, "grad_norm": 4.311821460723877, "learning_rate": 6.958095717522317e-09, "loss": 0.174, "step": 14353 }, { "epoch": 0.9806654369064699, "grad_norm": 3.6525936126708984, "learning_rate": 6.90936868379044e-09, "loss": 0.2012, "step": 14354 }, { "epoch": 0.9807337569174011, "grad_norm": 5.845663547515869, "learning_rate": 6.860812697441099e-09, "loss": 0.2448, "step": 14355 }, { "epoch": 0.9808020769283323, "grad_norm": 3.6767306327819824, "learning_rate": 6.812427760852002e-09, "loss": 0.3031, "step": 14356 }, { "epoch": 0.9808703969392635, "grad_norm": 4.327517032623291, "learning_rate": 6.764213876392699e-09, "loss": 0.261, "step": 14357 }, { "epoch": 0.9809387169501947, "grad_norm": 4.009260654449463, "learning_rate": 6.716171046423414e-09, "loss": 0.2756, "step": 14358 }, { "epoch": 0.9810070369611259, "grad_norm": 7.48527717590332, "learning_rate": 6.668299273297373e-09, "loss": 0.3147, "step": 14359 }, { "epoch": 0.9810753569720572, "grad_norm": 4.2686262130737305, "learning_rate": 6.620598559358481e-09, "loss": 0.2976, "step": 14360 }, { "epoch": 0.9811436769829883, "grad_norm": 5.112276077270508, "learning_rate": 6.573068906942092e-09, "loss": 0.2595, "step": 14361 }, { "epoch": 0.9812119969939195, "grad_norm": 3.791001558303833, "learning_rate": 6.525710318376565e-09, "loss": 0.2152, "step": 14362 }, { "epoch": 0.9812803170048507, "grad_norm": 3.8243002891540527, "learning_rate": 6.478522795979769e-09, "loss": 0.3024, "step": 14363 }, { "epoch": 0.9813486370157819, "grad_norm": 3.9099888801574707, "learning_rate": 6.431506342063353e-09, "loss": 0.2979, "step": 14364 }, { "epoch": 0.9814169570267132, "grad_norm": 3.5920543670654297, "learning_rate": 6.384660958928867e-09, "loss": 0.2203, "step": 14365 }, { "epoch": 0.9814852770376443, "grad_norm": 6.256784439086914, "learning_rate": 6.337986648870864e-09, "loss": 0.3224, "step": 14366 }, { "epoch": 0.9815535970485755, "grad_norm": 3.7157628536224365, "learning_rate": 6.291483414174182e-09, "loss": 0.2661, "step": 14367 }, { "epoch": 0.9816219170595067, "grad_norm": 3.1137216091156006, "learning_rate": 6.245151257116666e-09, "loss": 0.2092, "step": 14368 }, { "epoch": 0.9816902370704379, "grad_norm": 4.42003059387207, "learning_rate": 6.198990179966446e-09, "loss": 0.2865, "step": 14369 }, { "epoch": 0.9817585570813692, "grad_norm": 5.043473243713379, "learning_rate": 6.1530001849842696e-09, "loss": 0.3342, "step": 14370 }, { "epoch": 0.9818268770923003, "grad_norm": 4.557610988616943, "learning_rate": 6.107181274421946e-09, "loss": 0.2983, "step": 14371 }, { "epoch": 0.9818951971032316, "grad_norm": 4.714426517486572, "learning_rate": 6.0615334505231265e-09, "loss": 0.2921, "step": 14372 }, { "epoch": 0.9819635171141627, "grad_norm": 6.387899398803711, "learning_rate": 6.016056715523299e-09, "loss": 0.2175, "step": 14373 }, { "epoch": 0.9820318371250939, "grad_norm": 3.6858513355255127, "learning_rate": 5.970751071649405e-09, "loss": 0.2053, "step": 14374 }, { "epoch": 0.9821001571360252, "grad_norm": 5.208673477172852, "learning_rate": 5.925616521119448e-09, "loss": 0.2945, "step": 14375 }, { "epoch": 0.9821684771469563, "grad_norm": 3.1970455646514893, "learning_rate": 5.88065306614366e-09, "loss": 0.1957, "step": 14376 }, { "epoch": 0.9822367971578876, "grad_norm": 3.655421495437622, "learning_rate": 5.835860708924112e-09, "loss": 0.2794, "step": 14377 }, { "epoch": 0.9823051171688187, "grad_norm": 3.7511706352233887, "learning_rate": 5.79123945165394e-09, "loss": 0.1967, "step": 14378 }, { "epoch": 0.9823734371797499, "grad_norm": 4.014549732208252, "learning_rate": 5.746789296518118e-09, "loss": 0.2467, "step": 14379 }, { "epoch": 0.9824417571906812, "grad_norm": 5.084699630737305, "learning_rate": 5.7025102456934596e-09, "loss": 0.3301, "step": 14380 }, { "epoch": 0.9825100772016123, "grad_norm": 5.844682216644287, "learning_rate": 5.658402301348231e-09, "loss": 0.4063, "step": 14381 }, { "epoch": 0.9825783972125436, "grad_norm": 4.5015082359313965, "learning_rate": 5.6144654656417605e-09, "loss": 0.2252, "step": 14382 }, { "epoch": 0.9826467172234747, "grad_norm": 5.704818248748779, "learning_rate": 5.570699740725993e-09, "loss": 0.3376, "step": 14383 }, { "epoch": 0.982715037234406, "grad_norm": 4.565707206726074, "learning_rate": 5.527105128743548e-09, "loss": 0.2452, "step": 14384 }, { "epoch": 0.9827833572453372, "grad_norm": 5.0016303062438965, "learning_rate": 5.4836816318300505e-09, "loss": 0.2359, "step": 14385 }, { "epoch": 0.9828516772562683, "grad_norm": 4.8470916748046875, "learning_rate": 5.440429252111023e-09, "loss": 0.3822, "step": 14386 }, { "epoch": 0.9829199972671996, "grad_norm": 5.301812171936035, "learning_rate": 5.397347991704215e-09, "loss": 0.224, "step": 14387 }, { "epoch": 0.9829883172781307, "grad_norm": 3.4918367862701416, "learning_rate": 5.354437852719995e-09, "loss": 0.3292, "step": 14388 }, { "epoch": 0.983056637289062, "grad_norm": 5.47722053527832, "learning_rate": 5.311698837259404e-09, "loss": 0.2252, "step": 14389 }, { "epoch": 0.9831249572999932, "grad_norm": 5.235077381134033, "learning_rate": 5.269130947414935e-09, "loss": 0.2462, "step": 14390 }, { "epoch": 0.9831932773109243, "grad_norm": 3.319667100906372, "learning_rate": 5.22673418527092e-09, "loss": 0.2482, "step": 14391 }, { "epoch": 0.9832615973218556, "grad_norm": 3.2602951526641846, "learning_rate": 5.184508552903921e-09, "loss": 0.1355, "step": 14392 }, { "epoch": 0.9833299173327867, "grad_norm": 3.609630584716797, "learning_rate": 5.142454052381173e-09, "loss": 0.2752, "step": 14393 }, { "epoch": 0.983398237343718, "grad_norm": 3.8328216075897217, "learning_rate": 5.10057068576214e-09, "loss": 0.2666, "step": 14394 }, { "epoch": 0.9834665573546492, "grad_norm": 3.9517459869384766, "learning_rate": 5.058858455097737e-09, "loss": 0.1904, "step": 14395 }, { "epoch": 0.9835348773655804, "grad_norm": 3.370649814605713, "learning_rate": 5.017317362430718e-09, "loss": 0.2685, "step": 14396 }, { "epoch": 0.9836031973765116, "grad_norm": 4.699897766113281, "learning_rate": 4.9759474097949e-09, "loss": 0.2941, "step": 14397 }, { "epoch": 0.9836715173874427, "grad_norm": 4.026394367218018, "learning_rate": 4.934748599215944e-09, "loss": 0.2981, "step": 14398 }, { "epoch": 0.983739837398374, "grad_norm": 3.725752115249634, "learning_rate": 4.893720932711731e-09, "loss": 0.2754, "step": 14399 }, { "epoch": 0.9838081574093052, "grad_norm": 4.800041675567627, "learning_rate": 4.852864412290825e-09, "loss": 0.2699, "step": 14400 }, { "epoch": 0.9838764774202364, "grad_norm": 3.700561046600342, "learning_rate": 4.8121790399544005e-09, "loss": 0.2636, "step": 14401 }, { "epoch": 0.9839447974311676, "grad_norm": 3.6178460121154785, "learning_rate": 4.77166481769431e-09, "loss": 0.2718, "step": 14402 }, { "epoch": 0.9840131174420987, "grad_norm": 5.3089189529418945, "learning_rate": 4.7313217474942436e-09, "loss": 0.2696, "step": 14403 }, { "epoch": 0.98408143745303, "grad_norm": 2.6237571239471436, "learning_rate": 4.691149831330121e-09, "loss": 0.1956, "step": 14404 }, { "epoch": 0.9841497574639612, "grad_norm": 4.654031753540039, "learning_rate": 4.651149071168925e-09, "loss": 0.223, "step": 14405 }, { "epoch": 0.9842180774748924, "grad_norm": 4.994807243347168, "learning_rate": 4.611319468969089e-09, "loss": 0.3132, "step": 14406 }, { "epoch": 0.9842863974858236, "grad_norm": 4.972214698791504, "learning_rate": 4.571661026681273e-09, "loss": 0.2229, "step": 14407 }, { "epoch": 0.9843547174967548, "grad_norm": 4.473289966583252, "learning_rate": 4.5321737462475915e-09, "loss": 0.2643, "step": 14408 }, { "epoch": 0.984423037507686, "grad_norm": 3.766026020050049, "learning_rate": 4.492857629601221e-09, "loss": 0.26, "step": 14409 }, { "epoch": 0.9844913575186172, "grad_norm": 4.094738483428955, "learning_rate": 4.453712678667953e-09, "loss": 0.2797, "step": 14410 }, { "epoch": 0.9845596775295484, "grad_norm": 4.62681245803833, "learning_rate": 4.414738895363867e-09, "loss": 0.2982, "step": 14411 }, { "epoch": 0.9846279975404796, "grad_norm": 3.4837541580200195, "learning_rate": 4.3759362815976585e-09, "loss": 0.2954, "step": 14412 }, { "epoch": 0.9846963175514108, "grad_norm": 3.728177785873413, "learning_rate": 4.337304839269862e-09, "loss": 0.2765, "step": 14413 }, { "epoch": 0.984764637562342, "grad_norm": 4.408207416534424, "learning_rate": 4.298844570271687e-09, "loss": 0.2427, "step": 14414 }, { "epoch": 0.9848329575732732, "grad_norm": 4.199907302856445, "learning_rate": 4.260555476486571e-09, "loss": 0.2973, "step": 14415 }, { "epoch": 0.9849012775842044, "grad_norm": 5.056538105010986, "learning_rate": 4.222437559789405e-09, "loss": 0.2257, "step": 14416 }, { "epoch": 0.9849695975951356, "grad_norm": 4.025671005249023, "learning_rate": 4.184490822046527e-09, "loss": 0.2181, "step": 14417 }, { "epoch": 0.9850379176060668, "grad_norm": 5.7812910079956055, "learning_rate": 4.146715265116507e-09, "loss": 0.3496, "step": 14418 }, { "epoch": 0.985106237616998, "grad_norm": 5.857226371765137, "learning_rate": 4.109110890848589e-09, "loss": 0.309, "step": 14419 }, { "epoch": 0.9851745576279293, "grad_norm": 3.5173022747039795, "learning_rate": 4.0716777010846305e-09, "loss": 0.2125, "step": 14420 }, { "epoch": 0.9852428776388604, "grad_norm": 3.5665698051452637, "learning_rate": 4.034415697657556e-09, "loss": 0.2176, "step": 14421 }, { "epoch": 0.9853111976497916, "grad_norm": 4.066441535949707, "learning_rate": 3.99732488239174e-09, "loss": 0.3182, "step": 14422 }, { "epoch": 0.9853795176607228, "grad_norm": 4.219202995300293, "learning_rate": 3.960405257103783e-09, "loss": 0.26, "step": 14423 }, { "epoch": 0.985447837671654, "grad_norm": 4.209702968597412, "learning_rate": 3.9236568236009624e-09, "loss": 0.1837, "step": 14424 }, { "epoch": 0.9855161576825853, "grad_norm": 3.8449134826660156, "learning_rate": 3.887079583683561e-09, "loss": 0.2071, "step": 14425 }, { "epoch": 0.9855844776935164, "grad_norm": 4.191067218780518, "learning_rate": 3.850673539142146e-09, "loss": 0.2401, "step": 14426 }, { "epoch": 0.9856527977044476, "grad_norm": 3.880476236343384, "learning_rate": 3.814438691759126e-09, "loss": 0.2679, "step": 14427 }, { "epoch": 0.9857211177153788, "grad_norm": 5.7203688621521, "learning_rate": 3.778375043309523e-09, "loss": 0.2884, "step": 14428 }, { "epoch": 0.98578943772631, "grad_norm": 4.1149420738220215, "learning_rate": 3.742482595559038e-09, "loss": 0.2378, "step": 14429 }, { "epoch": 0.9858577577372413, "grad_norm": 3.439561367034912, "learning_rate": 3.70676135026482e-09, "loss": 0.2706, "step": 14430 }, { "epoch": 0.9859260777481724, "grad_norm": 3.9290151596069336, "learning_rate": 3.6712113091766364e-09, "loss": 0.2428, "step": 14431 }, { "epoch": 0.9859943977591037, "grad_norm": 3.0384247303009033, "learning_rate": 3.6358324740349278e-09, "loss": 0.2574, "step": 14432 }, { "epoch": 0.9860627177700348, "grad_norm": 4.064198017120361, "learning_rate": 3.6006248465723644e-09, "loss": 0.2431, "step": 14433 }, { "epoch": 0.986131037780966, "grad_norm": 3.4627206325531006, "learning_rate": 3.565588428512678e-09, "loss": 0.2175, "step": 14434 }, { "epoch": 0.9861993577918973, "grad_norm": 4.389651775360107, "learning_rate": 3.530723221571441e-09, "loss": 0.3783, "step": 14435 }, { "epoch": 0.9862676778028284, "grad_norm": 5.427654266357422, "learning_rate": 3.496029227456454e-09, "loss": 0.339, "step": 14436 }, { "epoch": 0.9863359978137597, "grad_norm": 3.9040699005126953, "learning_rate": 3.461506447865803e-09, "loss": 0.2432, "step": 14437 }, { "epoch": 0.9864043178246908, "grad_norm": 4.3512187004089355, "learning_rate": 3.42715488449058e-09, "loss": 0.2233, "step": 14438 }, { "epoch": 0.986472637835622, "grad_norm": 3.1960501670837402, "learning_rate": 3.3929745390129383e-09, "loss": 0.2423, "step": 14439 }, { "epoch": 0.9865409578465533, "grad_norm": 4.673553466796875, "learning_rate": 3.3589654131060963e-09, "loss": 0.2736, "step": 14440 }, { "epoch": 0.9866092778574844, "grad_norm": 5.529869556427002, "learning_rate": 3.325127508435888e-09, "loss": 0.1723, "step": 14441 }, { "epoch": 0.9866775978684157, "grad_norm": 4.136258602142334, "learning_rate": 3.2914608266588205e-09, "loss": 0.1998, "step": 14442 }, { "epoch": 0.9867459178793468, "grad_norm": 4.3119916915893555, "learning_rate": 3.257965369424021e-09, "loss": 0.2455, "step": 14443 }, { "epoch": 0.9868142378902781, "grad_norm": 5.800519943237305, "learning_rate": 3.2246411383708982e-09, "loss": 0.1589, "step": 14444 }, { "epoch": 0.9868825579012093, "grad_norm": 4.095444202423096, "learning_rate": 3.191488135132259e-09, "loss": 0.2969, "step": 14445 }, { "epoch": 0.9869508779121404, "grad_norm": 4.754659652709961, "learning_rate": 3.1585063613304154e-09, "loss": 0.1742, "step": 14446 }, { "epoch": 0.9870191979230717, "grad_norm": 4.920273780822754, "learning_rate": 3.1256958185814643e-09, "loss": 0.2918, "step": 14447 }, { "epoch": 0.9870875179340028, "grad_norm": 4.120257377624512, "learning_rate": 3.09305650849101e-09, "loss": 0.2249, "step": 14448 }, { "epoch": 0.9871558379449341, "grad_norm": 6.5285964012146, "learning_rate": 3.0605884326580513e-09, "loss": 0.448, "step": 14449 }, { "epoch": 0.9872241579558653, "grad_norm": 3.1760294437408447, "learning_rate": 3.0282915926722607e-09, "loss": 0.2465, "step": 14450 }, { "epoch": 0.9872924779667964, "grad_norm": 3.988234758377075, "learning_rate": 2.9961659901151515e-09, "loss": 0.2087, "step": 14451 }, { "epoch": 0.9873607979777277, "grad_norm": 7.801355361938477, "learning_rate": 2.9642116265596872e-09, "loss": 0.3222, "step": 14452 }, { "epoch": 0.9874291179886588, "grad_norm": 3.188941240310669, "learning_rate": 2.932428503570672e-09, "loss": 0.232, "step": 14453 }, { "epoch": 0.9874974379995901, "grad_norm": 3.2925355434417725, "learning_rate": 2.9008166227047492e-09, "loss": 0.2152, "step": 14454 }, { "epoch": 0.9875657580105213, "grad_norm": 5.195062637329102, "learning_rate": 2.8693759855092373e-09, "loss": 0.2305, "step": 14455 }, { "epoch": 0.9876340780214525, "grad_norm": 4.832563877105713, "learning_rate": 2.83810659352407e-09, "loss": 0.2778, "step": 14456 }, { "epoch": 0.9877023980323837, "grad_norm": 3.7436866760253906, "learning_rate": 2.8070084482806345e-09, "loss": 0.2522, "step": 14457 }, { "epoch": 0.9877707180433148, "grad_norm": 4.142847061157227, "learning_rate": 2.776081551301379e-09, "loss": 0.1994, "step": 14458 }, { "epoch": 0.9878390380542461, "grad_norm": 5.61443567276001, "learning_rate": 2.745325904100593e-09, "loss": 0.3289, "step": 14459 }, { "epoch": 0.9879073580651773, "grad_norm": 4.011586666107178, "learning_rate": 2.7147415081847927e-09, "loss": 0.3219, "step": 14460 }, { "epoch": 0.9879756780761085, "grad_norm": 3.8604612350463867, "learning_rate": 2.6843283650511694e-09, "loss": 0.1852, "step": 14461 }, { "epoch": 0.9880439980870397, "grad_norm": 4.223409175872803, "learning_rate": 2.6540864761891437e-09, "loss": 0.2864, "step": 14462 }, { "epoch": 0.9881123180979708, "grad_norm": 2.8319106101989746, "learning_rate": 2.6240158430799742e-09, "loss": 0.1869, "step": 14463 }, { "epoch": 0.9881806381089021, "grad_norm": 4.182962894439697, "learning_rate": 2.5941164671952067e-09, "loss": 0.3083, "step": 14464 }, { "epoch": 0.9882489581198333, "grad_norm": 3.3354461193084717, "learning_rate": 2.56438834999978e-09, "loss": 0.2358, "step": 14465 }, { "epoch": 0.9883172781307645, "grad_norm": 3.289963483810425, "learning_rate": 2.5348314929493078e-09, "loss": 0.174, "step": 14466 }, { "epoch": 0.9883855981416957, "grad_norm": 6.735445976257324, "learning_rate": 2.5054458974904658e-09, "loss": 0.2923, "step": 14467 }, { "epoch": 0.988453918152627, "grad_norm": 3.9424166679382324, "learning_rate": 2.476231565062936e-09, "loss": 0.2577, "step": 14468 }, { "epoch": 0.9885222381635581, "grad_norm": 4.957606792449951, "learning_rate": 2.4471884970966863e-09, "loss": 0.2681, "step": 14469 }, { "epoch": 0.9885905581744893, "grad_norm": 4.463254928588867, "learning_rate": 2.4183166950142997e-09, "loss": 0.3002, "step": 14470 }, { "epoch": 0.9886588781854205, "grad_norm": 3.8188304901123047, "learning_rate": 2.389616160229424e-09, "loss": 0.2069, "step": 14471 }, { "epoch": 0.9887271981963517, "grad_norm": 4.269947052001953, "learning_rate": 2.361086894147546e-09, "loss": 0.292, "step": 14472 }, { "epoch": 0.988795518207283, "grad_norm": 5.4301557540893555, "learning_rate": 2.332728898165215e-09, "loss": 0.3153, "step": 14473 }, { "epoch": 0.9888638382182141, "grad_norm": 3.3749399185180664, "learning_rate": 2.3045421736715974e-09, "loss": 0.1862, "step": 14474 }, { "epoch": 0.9889321582291453, "grad_norm": 4.604598522186279, "learning_rate": 2.276526722046923e-09, "loss": 0.2074, "step": 14475 }, { "epoch": 0.9890004782400765, "grad_norm": 3.2092480659484863, "learning_rate": 2.2486825446624836e-09, "loss": 0.1878, "step": 14476 }, { "epoch": 0.9890687982510077, "grad_norm": 5.112948417663574, "learning_rate": 2.221009642882188e-09, "loss": 0.2673, "step": 14477 }, { "epoch": 0.989137118261939, "grad_norm": 4.562286376953125, "learning_rate": 2.1935080180610077e-09, "loss": 0.3001, "step": 14478 }, { "epoch": 0.9892054382728701, "grad_norm": 3.0035085678100586, "learning_rate": 2.1661776715457547e-09, "loss": 0.1987, "step": 14479 }, { "epoch": 0.9892737582838014, "grad_norm": 4.430942535400391, "learning_rate": 2.1390186046743032e-09, "loss": 0.1687, "step": 14480 }, { "epoch": 0.9893420782947325, "grad_norm": 4.280961513519287, "learning_rate": 2.1120308187767555e-09, "loss": 0.2995, "step": 14481 }, { "epoch": 0.9894103983056637, "grad_norm": 5.198007583618164, "learning_rate": 2.0852143151750545e-09, "loss": 0.1996, "step": 14482 }, { "epoch": 0.989478718316595, "grad_norm": 4.554967880249023, "learning_rate": 2.058569095181817e-09, "loss": 0.3205, "step": 14483 }, { "epoch": 0.9895470383275261, "grad_norm": 3.7292990684509277, "learning_rate": 2.0320951601018875e-09, "loss": 0.2137, "step": 14484 }, { "epoch": 0.9896153583384574, "grad_norm": 3.236419677734375, "learning_rate": 2.0057925112315633e-09, "loss": 0.2569, "step": 14485 }, { "epoch": 0.9896836783493885, "grad_norm": 4.162407398223877, "learning_rate": 1.9796611498593687e-09, "loss": 0.2997, "step": 14486 }, { "epoch": 0.9897519983603197, "grad_norm": 5.768928050994873, "learning_rate": 1.953701077264114e-09, "loss": 0.3469, "step": 14487 }, { "epoch": 0.989820318371251, "grad_norm": 4.192006587982178, "learning_rate": 1.9279122947176152e-09, "loss": 0.2193, "step": 14488 }, { "epoch": 0.9898886383821821, "grad_norm": 4.380094051361084, "learning_rate": 1.902294803482363e-09, "loss": 0.2706, "step": 14489 }, { "epoch": 0.9899569583931134, "grad_norm": 3.1285510063171387, "learning_rate": 1.8768486048126863e-09, "loss": 0.1354, "step": 14490 }, { "epoch": 0.9900252784040445, "grad_norm": 4.5065412521362305, "learning_rate": 1.851573699954756e-09, "loss": 0.2555, "step": 14491 }, { "epoch": 0.9900935984149758, "grad_norm": 3.9637598991394043, "learning_rate": 1.8264700901461928e-09, "loss": 0.2163, "step": 14492 }, { "epoch": 0.990161918425907, "grad_norm": 3.2669577598571777, "learning_rate": 1.8015377766164574e-09, "loss": 0.3377, "step": 14493 }, { "epoch": 0.9902302384368381, "grad_norm": 4.111731052398682, "learning_rate": 1.776776760586074e-09, "loss": 0.2543, "step": 14494 }, { "epoch": 0.9902985584477694, "grad_norm": 3.220829486846924, "learning_rate": 1.7521870432677944e-09, "loss": 0.2032, "step": 14495 }, { "epoch": 0.9903668784587005, "grad_norm": 4.505678176879883, "learning_rate": 1.7277686258654335e-09, "loss": 0.2965, "step": 14496 }, { "epoch": 0.9904351984696318, "grad_norm": 4.757819175720215, "learning_rate": 1.7035215095750343e-09, "loss": 0.2436, "step": 14497 }, { "epoch": 0.990503518480563, "grad_norm": 4.10587739944458, "learning_rate": 1.6794456955837033e-09, "loss": 0.2493, "step": 14498 }, { "epoch": 0.9905718384914941, "grad_norm": 3.6800661087036133, "learning_rate": 1.655541185070386e-09, "loss": 0.3163, "step": 14499 }, { "epoch": 0.9906401585024254, "grad_norm": 3.501939058303833, "learning_rate": 1.6318079792058682e-09, "loss": 0.2396, "step": 14500 }, { "epoch": 0.9907084785133565, "grad_norm": 3.420004367828369, "learning_rate": 1.6082460791516095e-09, "loss": 0.2266, "step": 14501 }, { "epoch": 0.9907767985242878, "grad_norm": 3.983199119567871, "learning_rate": 1.5848554860620756e-09, "loss": 0.2456, "step": 14502 }, { "epoch": 0.990845118535219, "grad_norm": 1.9887017011642456, "learning_rate": 1.5616362010827943e-09, "loss": 0.1244, "step": 14503 }, { "epoch": 0.9909134385461502, "grad_norm": 3.8468918800354004, "learning_rate": 1.5385882253499683e-09, "loss": 0.2052, "step": 14504 }, { "epoch": 0.9909817585570814, "grad_norm": 3.973161220550537, "learning_rate": 1.515711559992805e-09, "loss": 0.2618, "step": 14505 }, { "epoch": 0.9910500785680125, "grad_norm": 4.395277500152588, "learning_rate": 1.4930062061311866e-09, "loss": 0.2786, "step": 14506 }, { "epoch": 0.9911183985789438, "grad_norm": 3.623757839202881, "learning_rate": 1.4704721648772235e-09, "loss": 0.2731, "step": 14507 }, { "epoch": 0.991186718589875, "grad_norm": 4.658561706542969, "learning_rate": 1.4481094373337e-09, "loss": 0.3095, "step": 14508 }, { "epoch": 0.9912550386008062, "grad_norm": 4.63050651550293, "learning_rate": 1.4259180245967951e-09, "loss": 0.1922, "step": 14509 }, { "epoch": 0.9913233586117374, "grad_norm": 3.8407089710235596, "learning_rate": 1.4038979277521956e-09, "loss": 0.2167, "step": 14510 }, { "epoch": 0.9913916786226685, "grad_norm": 3.5817883014678955, "learning_rate": 1.382049147878206e-09, "loss": 0.1626, "step": 14511 }, { "epoch": 0.9914599986335998, "grad_norm": 5.573488712310791, "learning_rate": 1.3603716860453585e-09, "loss": 0.2754, "step": 14512 }, { "epoch": 0.991528318644531, "grad_norm": 3.535278558731079, "learning_rate": 1.3388655433148599e-09, "loss": 0.1701, "step": 14513 }, { "epoch": 0.9915966386554622, "grad_norm": 5.114072322845459, "learning_rate": 1.3175307207393684e-09, "loss": 0.3037, "step": 14514 }, { "epoch": 0.9916649586663934, "grad_norm": 4.109646797180176, "learning_rate": 1.2963672193641584e-09, "loss": 0.2474, "step": 14515 }, { "epoch": 0.9917332786773246, "grad_norm": 3.9760372638702393, "learning_rate": 1.2753750402251795e-09, "loss": 0.2317, "step": 14516 }, { "epoch": 0.9918015986882558, "grad_norm": 5.127110004425049, "learning_rate": 1.2545541843506091e-09, "loss": 0.2898, "step": 14517 }, { "epoch": 0.991869918699187, "grad_norm": 6.524141311645508, "learning_rate": 1.2339046527600762e-09, "loss": 0.3025, "step": 14518 }, { "epoch": 0.9919382387101182, "grad_norm": 4.223613262176514, "learning_rate": 1.2134264464642718e-09, "loss": 0.2099, "step": 14519 }, { "epoch": 0.9920065587210494, "grad_norm": 4.353693962097168, "learning_rate": 1.1931195664665051e-09, "loss": 0.2361, "step": 14520 }, { "epoch": 0.9920748787319806, "grad_norm": 3.182481050491333, "learning_rate": 1.1729840137607583e-09, "loss": 0.2009, "step": 14521 }, { "epoch": 0.9921431987429118, "grad_norm": 4.0395612716674805, "learning_rate": 1.1530197893328541e-09, "loss": 0.2212, "step": 14522 }, { "epoch": 0.992211518753843, "grad_norm": 3.7722716331481934, "learning_rate": 1.1332268941608436e-09, "loss": 0.2465, "step": 14523 }, { "epoch": 0.9922798387647742, "grad_norm": 7.574491500854492, "learning_rate": 1.1136053292138404e-09, "loss": 0.278, "step": 14524 }, { "epoch": 0.9923481587757054, "grad_norm": 3.957233428955078, "learning_rate": 1.0941550954527979e-09, "loss": 0.1821, "step": 14525 }, { "epoch": 0.9924164787866366, "grad_norm": 5.113203525543213, "learning_rate": 1.0748761938297324e-09, "loss": 0.2774, "step": 14526 }, { "epoch": 0.9924847987975678, "grad_norm": 2.7412326335906982, "learning_rate": 1.0557686252888886e-09, "loss": 0.1911, "step": 14527 }, { "epoch": 0.9925531188084991, "grad_norm": 3.563694477081299, "learning_rate": 1.036832390765574e-09, "loss": 0.205, "step": 14528 }, { "epoch": 0.9926214388194302, "grad_norm": 3.2009949684143066, "learning_rate": 1.018067491187713e-09, "loss": 0.1978, "step": 14529 }, { "epoch": 0.9926897588303614, "grad_norm": 3.931992292404175, "learning_rate": 9.99473927473904e-10, "loss": 0.1653, "step": 14530 }, { "epoch": 0.9927580788412926, "grad_norm": 4.034735202789307, "learning_rate": 9.810517005345853e-10, "loss": 0.2705, "step": 14531 }, { "epoch": 0.9928263988522238, "grad_norm": 3.7469615936279297, "learning_rate": 9.628008112716468e-10, "loss": 0.2649, "step": 14532 }, { "epoch": 0.9928947188631551, "grad_norm": 4.3450751304626465, "learning_rate": 9.44721260578818e-10, "loss": 0.214, "step": 14533 }, { "epoch": 0.9929630388740862, "grad_norm": 4.165452003479004, "learning_rate": 9.268130493416682e-10, "loss": 0.2458, "step": 14534 }, { "epoch": 0.9930313588850174, "grad_norm": 3.551945447921753, "learning_rate": 9.090761784372181e-10, "loss": 0.1994, "step": 14535 }, { "epoch": 0.9930996788959486, "grad_norm": 4.621650695800781, "learning_rate": 8.915106487335511e-10, "loss": 0.3357, "step": 14536 }, { "epoch": 0.9931679989068798, "grad_norm": 4.6885175704956055, "learning_rate": 8.741164610909791e-10, "loss": 0.2885, "step": 14537 }, { "epoch": 0.9932363189178111, "grad_norm": 4.052745342254639, "learning_rate": 8.568936163612651e-10, "loss": 0.2, "step": 14538 }, { "epoch": 0.9933046389287422, "grad_norm": 3.192901611328125, "learning_rate": 8.398421153876234e-10, "loss": 0.2495, "step": 14539 }, { "epoch": 0.9933729589396735, "grad_norm": 8.036092758178711, "learning_rate": 8.22961959005497e-10, "loss": 0.3335, "step": 14540 }, { "epoch": 0.9934412789506046, "grad_norm": 4.06540060043335, "learning_rate": 8.062531480410029e-10, "loss": 0.2802, "step": 14541 }, { "epoch": 0.9935095989615358, "grad_norm": 3.451843738555908, "learning_rate": 7.897156833128749e-10, "loss": 0.2534, "step": 14542 }, { "epoch": 0.9935779189724671, "grad_norm": 4.507925033569336, "learning_rate": 7.733495656301326e-10, "loss": 0.3045, "step": 14543 }, { "epoch": 0.9936462389833982, "grad_norm": 4.153252124786377, "learning_rate": 7.571547957944124e-10, "loss": 0.2059, "step": 14544 }, { "epoch": 0.9937145589943295, "grad_norm": 5.378134727478027, "learning_rate": 7.41131374599191e-10, "loss": 0.2488, "step": 14545 }, { "epoch": 0.9937828790052606, "grad_norm": 4.080507278442383, "learning_rate": 7.252793028286186e-10, "loss": 0.2173, "step": 14546 }, { "epoch": 0.9938511990161918, "grad_norm": 3.7979750633239746, "learning_rate": 7.095985812590743e-10, "loss": 0.3245, "step": 14547 }, { "epoch": 0.993919519027123, "grad_norm": 4.751473426818848, "learning_rate": 6.940892106583884e-10, "loss": 0.2846, "step": 14548 }, { "epoch": 0.9939878390380542, "grad_norm": 3.3238959312438965, "learning_rate": 6.78751191786231e-10, "loss": 0.1523, "step": 14549 }, { "epoch": 0.9940561590489855, "grad_norm": 4.754688739776611, "learning_rate": 6.635845253933348e-10, "loss": 0.3678, "step": 14550 }, { "epoch": 0.9941244790599166, "grad_norm": 5.229087829589844, "learning_rate": 6.485892122226611e-10, "loss": 0.2674, "step": 14551 }, { "epoch": 0.9941927990708479, "grad_norm": 4.13501501083374, "learning_rate": 6.337652530082338e-10, "loss": 0.3811, "step": 14552 }, { "epoch": 0.994261119081779, "grad_norm": 3.2629005908966064, "learning_rate": 6.191126484759169e-10, "loss": 0.2034, "step": 14553 }, { "epoch": 0.9943294390927102, "grad_norm": 3.6427464485168457, "learning_rate": 6.046313993434139e-10, "loss": 0.2575, "step": 14554 }, { "epoch": 0.9943977591036415, "grad_norm": 5.091838836669922, "learning_rate": 5.903215063198796e-10, "loss": 0.2478, "step": 14555 }, { "epoch": 0.9944660791145726, "grad_norm": 3.533487319946289, "learning_rate": 5.761829701059206e-10, "loss": 0.1865, "step": 14556 }, { "epoch": 0.9945343991255039, "grad_norm": 5.4524641036987305, "learning_rate": 5.622157913935943e-10, "loss": 0.316, "step": 14557 }, { "epoch": 0.994602719136435, "grad_norm": 4.484199523925781, "learning_rate": 5.484199708671866e-10, "loss": 0.3174, "step": 14558 }, { "epoch": 0.9946710391473662, "grad_norm": 4.950183868408203, "learning_rate": 5.347955092024347e-10, "loss": 0.2984, "step": 14559 }, { "epoch": 0.9947393591582975, "grad_norm": 4.6331281661987305, "learning_rate": 5.213424070661388e-10, "loss": 0.3555, "step": 14560 }, { "epoch": 0.9948076791692286, "grad_norm": 4.577620983123779, "learning_rate": 5.080606651169384e-10, "loss": 0.1815, "step": 14561 }, { "epoch": 0.9948759991801599, "grad_norm": 5.127532958984375, "learning_rate": 4.949502840053133e-10, "loss": 0.2551, "step": 14562 }, { "epoch": 0.994944319191091, "grad_norm": 2.704928398132324, "learning_rate": 4.82011264373583e-10, "loss": 0.1581, "step": 14563 }, { "epoch": 0.9950126392020223, "grad_norm": 6.146534442901611, "learning_rate": 4.692436068551298e-10, "loss": 0.3877, "step": 14564 }, { "epoch": 0.9950809592129535, "grad_norm": 3.6689910888671875, "learning_rate": 4.566473120747871e-10, "loss": 0.2082, "step": 14565 }, { "epoch": 0.9951492792238846, "grad_norm": 6.127915859222412, "learning_rate": 4.442223806500056e-10, "loss": 0.2236, "step": 14566 }, { "epoch": 0.9952175992348159, "grad_norm": 6.3898491859436035, "learning_rate": 4.319688131885213e-10, "loss": 0.3384, "step": 14567 }, { "epoch": 0.995285919245747, "grad_norm": 3.5257198810577393, "learning_rate": 4.198866102910759e-10, "loss": 0.2246, "step": 14568 }, { "epoch": 0.9953542392566783, "grad_norm": 2.5478203296661377, "learning_rate": 4.0797577254869654e-10, "loss": 0.2093, "step": 14569 }, { "epoch": 0.9954225592676095, "grad_norm": 4.803579807281494, "learning_rate": 3.962363005450276e-10, "loss": 0.2493, "step": 14570 }, { "epoch": 0.9954908792785406, "grad_norm": 4.702914237976074, "learning_rate": 3.846681948547759e-10, "loss": 0.2989, "step": 14571 }, { "epoch": 0.9955591992894719, "grad_norm": 4.147317409515381, "learning_rate": 3.732714560440997e-10, "loss": 0.2788, "step": 14572 }, { "epoch": 0.995627519300403, "grad_norm": 4.860592842102051, "learning_rate": 3.6204608467177436e-10, "loss": 0.2553, "step": 14573 }, { "epoch": 0.9956958393113343, "grad_norm": 4.584429740905762, "learning_rate": 3.5099208128686055e-10, "loss": 0.2336, "step": 14574 }, { "epoch": 0.9957641593222655, "grad_norm": 3.2982022762298584, "learning_rate": 3.401094464306475e-10, "loss": 0.2462, "step": 14575 }, { "epoch": 0.9958324793331967, "grad_norm": 4.067894458770752, "learning_rate": 3.2939818063665303e-10, "loss": 0.229, "step": 14576 }, { "epoch": 0.9959007993441279, "grad_norm": 4.269998073577881, "learning_rate": 3.1885828442868024e-10, "loss": 0.2929, "step": 14577 }, { "epoch": 0.995969119355059, "grad_norm": 4.084346771240234, "learning_rate": 3.084897583231494e-10, "loss": 0.2339, "step": 14578 }, { "epoch": 0.9960374393659903, "grad_norm": 4.531469821929932, "learning_rate": 2.9829260282754344e-10, "loss": 0.3103, "step": 14579 }, { "epoch": 0.9961057593769215, "grad_norm": 4.307982444763184, "learning_rate": 2.8826681844157376e-10, "loss": 0.2701, "step": 14580 }, { "epoch": 0.9961740793878527, "grad_norm": 3.172661542892456, "learning_rate": 2.7841240565601444e-10, "loss": 0.1784, "step": 14581 }, { "epoch": 0.9962423993987839, "grad_norm": 4.852576732635498, "learning_rate": 2.6872936495347944e-10, "loss": 0.266, "step": 14582 }, { "epoch": 0.996310719409715, "grad_norm": 3.3958189487457275, "learning_rate": 2.5921769680764537e-10, "loss": 0.1841, "step": 14583 }, { "epoch": 0.9963790394206463, "grad_norm": 3.7263739109039307, "learning_rate": 2.4987740168519456e-10, "loss": 0.2297, "step": 14584 }, { "epoch": 0.9964473594315775, "grad_norm": 4.255266189575195, "learning_rate": 2.4070848004231757e-10, "loss": 0.2571, "step": 14585 }, { "epoch": 0.9965156794425087, "grad_norm": 3.3426103591918945, "learning_rate": 2.3171093232898787e-10, "loss": 0.2548, "step": 14586 }, { "epoch": 0.9965839994534399, "grad_norm": 2.9929659366607666, "learning_rate": 2.2288475898546434e-10, "loss": 0.1865, "step": 14587 }, { "epoch": 0.9966523194643712, "grad_norm": 4.821292877197266, "learning_rate": 2.1422996044384578e-10, "loss": 0.2098, "step": 14588 }, { "epoch": 0.9967206394753023, "grad_norm": 4.021329402923584, "learning_rate": 2.057465371280709e-10, "loss": 0.2061, "step": 14589 }, { "epoch": 0.9967889594862335, "grad_norm": 4.535852909088135, "learning_rate": 1.974344894535296e-10, "loss": 0.201, "step": 14590 }, { "epoch": 0.9968572794971647, "grad_norm": 4.422484874725342, "learning_rate": 1.892938178270631e-10, "loss": 0.2505, "step": 14591 }, { "epoch": 0.9969255995080959, "grad_norm": 3.487759828567505, "learning_rate": 1.8132452264735254e-10, "loss": 0.2143, "step": 14592 }, { "epoch": 0.9969939195190272, "grad_norm": 3.249573230743408, "learning_rate": 1.7352660430491883e-10, "loss": 0.1726, "step": 14593 }, { "epoch": 0.9970622395299583, "grad_norm": 3.229743242263794, "learning_rate": 1.6590006318134566e-10, "loss": 0.1964, "step": 14594 }, { "epoch": 0.9971305595408895, "grad_norm": 3.752119779586792, "learning_rate": 1.5844489965005647e-10, "loss": 0.2014, "step": 14595 }, { "epoch": 0.9971988795518207, "grad_norm": 4.114258766174316, "learning_rate": 1.511611140763147e-10, "loss": 0.2124, "step": 14596 }, { "epoch": 0.9972671995627519, "grad_norm": 5.12037467956543, "learning_rate": 1.4404870681644644e-10, "loss": 0.307, "step": 14597 }, { "epoch": 0.9973355195736832, "grad_norm": 4.383820056915283, "learning_rate": 1.3710767821900614e-10, "loss": 0.226, "step": 14598 }, { "epoch": 0.9974038395846143, "grad_norm": 4.453164577484131, "learning_rate": 1.3033802862399968e-10, "loss": 0.2246, "step": 14599 }, { "epoch": 0.9974721595955456, "grad_norm": 4.408101558685303, "learning_rate": 1.2373975836249552e-10, "loss": 0.2721, "step": 14600 }, { "epoch": 0.9975404796064767, "grad_norm": 3.6654577255249023, "learning_rate": 1.173128677577906e-10, "loss": 0.2869, "step": 14601 }, { "epoch": 0.9976087996174079, "grad_norm": 5.306695938110352, "learning_rate": 1.1105735712463316e-10, "loss": 0.3355, "step": 14602 }, { "epoch": 0.9976771196283392, "grad_norm": 3.6981680393218994, "learning_rate": 1.0497322676961129e-10, "loss": 0.211, "step": 14603 }, { "epoch": 0.9977454396392703, "grad_norm": 6.136325359344482, "learning_rate": 9.90604769903758e-11, "loss": 0.2773, "step": 14604 }, { "epoch": 0.9978137596502016, "grad_norm": 3.566359281539917, "learning_rate": 9.331910807602873e-11, "loss": 0.2548, "step": 14605 }, { "epoch": 0.9978820796611327, "grad_norm": 2.77876615524292, "learning_rate": 8.774912030867776e-11, "loss": 0.194, "step": 14606 }, { "epoch": 0.9979503996720639, "grad_norm": 4.337716579437256, "learning_rate": 8.235051396032756e-11, "loss": 0.2369, "step": 14607 }, { "epoch": 0.9980187196829952, "grad_norm": 4.751617908477783, "learning_rate": 7.712328929521118e-11, "loss": 0.2413, "step": 14608 }, { "epoch": 0.9980870396939263, "grad_norm": 5.611706256866455, "learning_rate": 7.206744656979013e-11, "loss": 0.2491, "step": 14609 }, { "epoch": 0.9981553597048576, "grad_norm": 4.9299397468566895, "learning_rate": 6.718298603158867e-11, "loss": 0.2752, "step": 14610 }, { "epoch": 0.9982236797157887, "grad_norm": 3.9153480529785156, "learning_rate": 6.246990791958229e-11, "loss": 0.3321, "step": 14611 }, { "epoch": 0.99829199972672, "grad_norm": 3.081084966659546, "learning_rate": 5.7928212464974923e-11, "loss": 0.1978, "step": 14612 }, { "epoch": 0.9983603197376512, "grad_norm": 6.068963050842285, "learning_rate": 5.3557899889256074e-11, "loss": 0.2499, "step": 14613 }, { "epoch": 0.9984286397485823, "grad_norm": 4.984313011169434, "learning_rate": 4.935897040730941e-11, "loss": 0.2646, "step": 14614 }, { "epoch": 0.9984969597595136, "grad_norm": 3.8421502113342285, "learning_rate": 4.533142422430414e-11, "loss": 0.2026, "step": 14615 }, { "epoch": 0.9985652797704447, "grad_norm": 4.396087646484375, "learning_rate": 4.1475261537637917e-11, "loss": 0.2424, "step": 14616 }, { "epoch": 0.998633599781376, "grad_norm": 4.894076824188232, "learning_rate": 3.779048253615969e-11, "loss": 0.1997, "step": 14617 }, { "epoch": 0.9987019197923072, "grad_norm": 5.550635814666748, "learning_rate": 3.427708740016966e-11, "loss": 0.2986, "step": 14618 }, { "epoch": 0.9987702398032383, "grad_norm": 4.194211006164551, "learning_rate": 3.0935076301807916e-11, "loss": 0.2519, "step": 14619 }, { "epoch": 0.9988385598141696, "grad_norm": 4.2139177322387695, "learning_rate": 2.7764449404665825e-11, "loss": 0.195, "step": 14620 }, { "epoch": 0.9989068798251007, "grad_norm": 5.373988151550293, "learning_rate": 2.4765206864174605e-11, "loss": 0.3552, "step": 14621 }, { "epoch": 0.998975199836032, "grad_norm": 4.100463390350342, "learning_rate": 2.1937348826828183e-11, "loss": 0.2924, "step": 14622 }, { "epoch": 0.9990435198469632, "grad_norm": 5.331884860992432, "learning_rate": 1.9280875431348932e-11, "loss": 0.3266, "step": 14623 }, { "epoch": 0.9991118398578944, "grad_norm": 3.900144100189209, "learning_rate": 1.6795786807521916e-11, "loss": 0.2663, "step": 14624 }, { "epoch": 0.9991801598688256, "grad_norm": 2.7908525466918945, "learning_rate": 1.448208307774923e-11, "loss": 0.1451, "step": 14625 }, { "epoch": 0.9992484798797567, "grad_norm": 4.777917385101318, "learning_rate": 1.2339764354718507e-11, "loss": 0.3362, "step": 14626 }, { "epoch": 0.999316799890688, "grad_norm": 3.6800899505615234, "learning_rate": 1.0368830743345826e-11, "loss": 0.196, "step": 14627 }, { "epoch": 0.9993851199016192, "grad_norm": 4.916032791137695, "learning_rate": 8.56928234038712e-12, "loss": 0.2193, "step": 14628 }, { "epoch": 0.9994534399125504, "grad_norm": 4.537232875823975, "learning_rate": 6.9411192340496085e-12, "loss": 0.2946, "step": 14629 }, { "epoch": 0.9995217599234816, "grad_norm": 4.280489444732666, "learning_rate": 5.484341503603219e-12, "loss": 0.2499, "step": 14630 }, { "epoch": 0.9995900799344127, "grad_norm": 4.542267799377441, "learning_rate": 4.198949220546311e-12, "loss": 0.3653, "step": 14631 }, { "epoch": 0.999658399945344, "grad_norm": 3.4038100242614746, "learning_rate": 3.0849424482171093e-12, "loss": 0.1608, "step": 14632 }, { "epoch": 0.9997267199562752, "grad_norm": 3.746758222579956, "learning_rate": 2.1423212406279645e-12, "loss": 0.2184, "step": 14633 }, { "epoch": 0.9997950399672064, "grad_norm": 3.5625178813934326, "learning_rate": 1.371085644408243e-12, "loss": 0.2403, "step": 14634 }, { "epoch": 0.9998633599781376, "grad_norm": 3.792663335800171, "learning_rate": 7.712356968614387e-13, "loss": 0.2342, "step": 14635 }, { "epoch": 0.9999316799890688, "grad_norm": 3.313319683074951, "learning_rate": 3.427714279080618e-13, "loss": 0.2518, "step": 14636 }, { "epoch": 1.0, "grad_norm": 6.427085876464844, "learning_rate": 8.569285814274963e-14, "loss": 0.3075, "step": 14637 } ], "logging_steps": 1.0, "max_steps": 14637, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 4575, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 6.032963025023638e+19, "train_batch_size": 1, "trial_name": null, "trial_params": null }