{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 5.0, "eval_steps": 500, "global_step": 3740, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0013377926421404682, "grad_norm": 4.409006604383345, "learning_rate": 0.0, "loss": 1.1741, "step": 1 }, { "epoch": 0.0026755852842809363, "grad_norm": 4.2588495033175375, "learning_rate": 2.6737967914438503e-08, "loss": 0.9687, "step": 2 }, { "epoch": 0.004013377926421404, "grad_norm": 3.883276179424439, "learning_rate": 5.3475935828877005e-08, "loss": 0.9567, "step": 3 }, { "epoch": 0.005351170568561873, "grad_norm": 4.520466009450144, "learning_rate": 8.021390374331552e-08, "loss": 0.9858, "step": 4 }, { "epoch": 0.006688963210702341, "grad_norm": 3.843189430341132, "learning_rate": 1.0695187165775401e-07, "loss": 1.0237, "step": 5 }, { "epoch": 0.008026755852842809, "grad_norm": 4.729842109044172, "learning_rate": 1.3368983957219251e-07, "loss": 1.0852, "step": 6 }, { "epoch": 0.009364548494983277, "grad_norm": 3.6623845349410087, "learning_rate": 1.6042780748663104e-07, "loss": 0.9431, "step": 7 }, { "epoch": 0.010702341137123745, "grad_norm": 4.055622804188012, "learning_rate": 1.8716577540106952e-07, "loss": 0.9773, "step": 8 }, { "epoch": 0.012040133779264214, "grad_norm": 4.270882156637532, "learning_rate": 2.1390374331550802e-07, "loss": 1.0507, "step": 9 }, { "epoch": 0.013377926421404682, "grad_norm": 3.5965037451482544, "learning_rate": 2.4064171122994655e-07, "loss": 0.9944, "step": 10 }, { "epoch": 0.01471571906354515, "grad_norm": 3.9455598891900414, "learning_rate": 2.6737967914438503e-07, "loss": 0.869, "step": 11 }, { "epoch": 0.016053511705685617, "grad_norm": 4.594611271877487, "learning_rate": 2.9411764705882356e-07, "loss": 0.9876, "step": 12 }, { "epoch": 0.017391304347826087, "grad_norm": 3.889148594307988, "learning_rate": 3.208556149732621e-07, "loss": 1.0413, "step": 13 }, { "epoch": 0.018729096989966554, "grad_norm": 3.9472344268693553, "learning_rate": 3.4759358288770056e-07, "loss": 0.9566, "step": 14 }, { "epoch": 0.020066889632107024, "grad_norm": 3.8123056120576497, "learning_rate": 3.7433155080213904e-07, "loss": 0.9845, "step": 15 }, { "epoch": 0.02140468227424749, "grad_norm": 3.5876143670191114, "learning_rate": 4.0106951871657757e-07, "loss": 1.0202, "step": 16 }, { "epoch": 0.02274247491638796, "grad_norm": 4.364437167449153, "learning_rate": 4.2780748663101604e-07, "loss": 0.9941, "step": 17 }, { "epoch": 0.024080267558528427, "grad_norm": 3.638698110118719, "learning_rate": 4.5454545454545457e-07, "loss": 1.0718, "step": 18 }, { "epoch": 0.025418060200668897, "grad_norm": 3.959337231197812, "learning_rate": 4.812834224598931e-07, "loss": 0.9735, "step": 19 }, { "epoch": 0.026755852842809364, "grad_norm": 3.680021074479617, "learning_rate": 5.080213903743316e-07, "loss": 1.0297, "step": 20 }, { "epoch": 0.028093645484949834, "grad_norm": 2.9508621175859866, "learning_rate": 5.347593582887701e-07, "loss": 0.9486, "step": 21 }, { "epoch": 0.0294314381270903, "grad_norm": 3.8478095921773434, "learning_rate": 5.614973262032086e-07, "loss": 1.0068, "step": 22 }, { "epoch": 0.03076923076923077, "grad_norm": 3.211443817071394, "learning_rate": 5.882352941176471e-07, "loss": 1.0051, "step": 23 }, { "epoch": 0.032107023411371234, "grad_norm": 3.8005396309260924, "learning_rate": 6.149732620320856e-07, "loss": 1.046, "step": 24 }, { "epoch": 0.033444816053511704, "grad_norm": 3.4432384022652016, "learning_rate": 6.417112299465242e-07, "loss": 1.0701, "step": 25 }, { "epoch": 0.034782608695652174, "grad_norm": 2.666772164794055, "learning_rate": 6.684491978609627e-07, "loss": 0.8799, "step": 26 }, { "epoch": 0.036120401337792644, "grad_norm": 2.443820883897326, "learning_rate": 6.951871657754011e-07, "loss": 0.8989, "step": 27 }, { "epoch": 0.03745819397993311, "grad_norm": 2.9105094453387683, "learning_rate": 7.219251336898397e-07, "loss": 1.1292, "step": 28 }, { "epoch": 0.03879598662207358, "grad_norm": 2.376722212697692, "learning_rate": 7.486631016042781e-07, "loss": 0.9469, "step": 29 }, { "epoch": 0.04013377926421405, "grad_norm": 1.9900460583068758, "learning_rate": 7.754010695187167e-07, "loss": 0.9585, "step": 30 }, { "epoch": 0.04147157190635452, "grad_norm": 2.5775959398010237, "learning_rate": 8.021390374331551e-07, "loss": 0.9432, "step": 31 }, { "epoch": 0.04280936454849498, "grad_norm": 2.0749513191024, "learning_rate": 8.288770053475937e-07, "loss": 0.959, "step": 32 }, { "epoch": 0.04414715719063545, "grad_norm": 1.9716141091828596, "learning_rate": 8.556149732620321e-07, "loss": 0.9468, "step": 33 }, { "epoch": 0.04548494983277592, "grad_norm": 1.9864977008642357, "learning_rate": 8.823529411764707e-07, "loss": 0.9623, "step": 34 }, { "epoch": 0.046822742474916385, "grad_norm": 2.0334225235198957, "learning_rate": 9.090909090909091e-07, "loss": 0.9289, "step": 35 }, { "epoch": 0.048160535117056855, "grad_norm": 1.933721594229697, "learning_rate": 9.358288770053477e-07, "loss": 0.9976, "step": 36 }, { "epoch": 0.049498327759197325, "grad_norm": 1.5449701837183902, "learning_rate": 9.625668449197862e-07, "loss": 0.9533, "step": 37 }, { "epoch": 0.050836120401337795, "grad_norm": 1.9335421316879244, "learning_rate": 9.893048128342248e-07, "loss": 1.0073, "step": 38 }, { "epoch": 0.05217391304347826, "grad_norm": 2.6319834887582934, "learning_rate": 1.0160427807486633e-06, "loss": 1.0522, "step": 39 }, { "epoch": 0.05351170568561873, "grad_norm": 2.0314036026646027, "learning_rate": 1.0427807486631017e-06, "loss": 0.9388, "step": 40 }, { "epoch": 0.0548494983277592, "grad_norm": 2.0856233717073094, "learning_rate": 1.0695187165775401e-06, "loss": 0.9518, "step": 41 }, { "epoch": 0.05618729096989967, "grad_norm": 1.7660095837681888, "learning_rate": 1.0962566844919787e-06, "loss": 0.9093, "step": 42 }, { "epoch": 0.05752508361204013, "grad_norm": 1.9005937668739956, "learning_rate": 1.1229946524064172e-06, "loss": 0.8974, "step": 43 }, { "epoch": 0.0588628762541806, "grad_norm": 2.011634790636646, "learning_rate": 1.1497326203208558e-06, "loss": 0.991, "step": 44 }, { "epoch": 0.06020066889632107, "grad_norm": 1.8534282405143565, "learning_rate": 1.1764705882352942e-06, "loss": 0.9327, "step": 45 }, { "epoch": 0.06153846153846154, "grad_norm": 1.762043051753459, "learning_rate": 1.2032085561497326e-06, "loss": 0.9768, "step": 46 }, { "epoch": 0.06287625418060201, "grad_norm": 1.9007576840385252, "learning_rate": 1.2299465240641713e-06, "loss": 0.9367, "step": 47 }, { "epoch": 0.06421404682274247, "grad_norm": 1.5705876994049355, "learning_rate": 1.2566844919786097e-06, "loss": 0.8876, "step": 48 }, { "epoch": 0.06555183946488294, "grad_norm": 1.5282281428283275, "learning_rate": 1.2834224598930483e-06, "loss": 0.9458, "step": 49 }, { "epoch": 0.06688963210702341, "grad_norm": 1.6131281934431567, "learning_rate": 1.3101604278074868e-06, "loss": 0.9962, "step": 50 }, { "epoch": 0.06822742474916388, "grad_norm": 1.627853228798658, "learning_rate": 1.3368983957219254e-06, "loss": 0.9917, "step": 51 }, { "epoch": 0.06956521739130435, "grad_norm": 1.9077606064713974, "learning_rate": 1.3636363636363636e-06, "loss": 0.9417, "step": 52 }, { "epoch": 0.07090301003344482, "grad_norm": 1.468413867227117, "learning_rate": 1.3903743315508022e-06, "loss": 1.0608, "step": 53 }, { "epoch": 0.07224080267558529, "grad_norm": 1.6159036417496382, "learning_rate": 1.4171122994652409e-06, "loss": 1.1052, "step": 54 }, { "epoch": 0.07357859531772576, "grad_norm": 1.4942323813596345, "learning_rate": 1.4438502673796793e-06, "loss": 1.0127, "step": 55 }, { "epoch": 0.07491638795986622, "grad_norm": 1.7700182543378875, "learning_rate": 1.4705882352941177e-06, "loss": 1.0152, "step": 56 }, { "epoch": 0.07625418060200669, "grad_norm": 1.1838322000682198, "learning_rate": 1.4973262032085562e-06, "loss": 0.9107, "step": 57 }, { "epoch": 0.07759197324414716, "grad_norm": 1.2864311112872926, "learning_rate": 1.5240641711229948e-06, "loss": 0.873, "step": 58 }, { "epoch": 0.07892976588628763, "grad_norm": 1.3048174632972713, "learning_rate": 1.5508021390374334e-06, "loss": 0.892, "step": 59 }, { "epoch": 0.0802675585284281, "grad_norm": 1.2016067566097997, "learning_rate": 1.5775401069518716e-06, "loss": 0.9159, "step": 60 }, { "epoch": 0.08160535117056857, "grad_norm": 1.229925864844518, "learning_rate": 1.6042780748663103e-06, "loss": 0.8868, "step": 61 }, { "epoch": 0.08294314381270904, "grad_norm": 1.1994969499047456, "learning_rate": 1.631016042780749e-06, "loss": 0.9137, "step": 62 }, { "epoch": 0.08428093645484949, "grad_norm": 1.2391570855614096, "learning_rate": 1.6577540106951873e-06, "loss": 1.0021, "step": 63 }, { "epoch": 0.08561872909698996, "grad_norm": 1.0790539516252011, "learning_rate": 1.684491978609626e-06, "loss": 0.8464, "step": 64 }, { "epoch": 0.08695652173913043, "grad_norm": 1.5685371282657372, "learning_rate": 1.7112299465240642e-06, "loss": 0.9787, "step": 65 }, { "epoch": 0.0882943143812709, "grad_norm": 1.0579516293302924, "learning_rate": 1.7379679144385028e-06, "loss": 0.7923, "step": 66 }, { "epoch": 0.08963210702341137, "grad_norm": 1.3975028353942327, "learning_rate": 1.7647058823529414e-06, "loss": 1.0296, "step": 67 }, { "epoch": 0.09096989966555184, "grad_norm": 1.0177067114618514, "learning_rate": 1.7914438502673799e-06, "loss": 0.8029, "step": 68 }, { "epoch": 0.09230769230769231, "grad_norm": 1.266487516263961, "learning_rate": 1.8181818181818183e-06, "loss": 0.9702, "step": 69 }, { "epoch": 0.09364548494983277, "grad_norm": 1.0820695347096654, "learning_rate": 1.8449197860962567e-06, "loss": 0.8641, "step": 70 }, { "epoch": 0.09498327759197324, "grad_norm": 1.2136049524549672, "learning_rate": 1.8716577540106954e-06, "loss": 0.8575, "step": 71 }, { "epoch": 0.09632107023411371, "grad_norm": 1.3736351935928732, "learning_rate": 1.898395721925134e-06, "loss": 1.0145, "step": 72 }, { "epoch": 0.09765886287625418, "grad_norm": 1.0212629831192908, "learning_rate": 1.9251336898395724e-06, "loss": 0.7621, "step": 73 }, { "epoch": 0.09899665551839465, "grad_norm": 1.062349156020465, "learning_rate": 1.951871657754011e-06, "loss": 0.87, "step": 74 }, { "epoch": 0.10033444816053512, "grad_norm": 1.0339115744949383, "learning_rate": 1.9786096256684497e-06, "loss": 0.8839, "step": 75 }, { "epoch": 0.10167224080267559, "grad_norm": 1.0054353922687012, "learning_rate": 2.0053475935828877e-06, "loss": 0.8845, "step": 76 }, { "epoch": 0.10301003344481606, "grad_norm": 0.9928331369006105, "learning_rate": 2.0320855614973265e-06, "loss": 0.9148, "step": 77 }, { "epoch": 0.10434782608695652, "grad_norm": 1.6993559565139011, "learning_rate": 2.058823529411765e-06, "loss": 0.9484, "step": 78 }, { "epoch": 0.10568561872909699, "grad_norm": 1.2287771848866011, "learning_rate": 2.0855614973262034e-06, "loss": 0.8514, "step": 79 }, { "epoch": 0.10702341137123746, "grad_norm": 1.0463420431526307, "learning_rate": 2.112299465240642e-06, "loss": 0.8509, "step": 80 }, { "epoch": 0.10836120401337793, "grad_norm": 1.0831678810450969, "learning_rate": 2.1390374331550802e-06, "loss": 0.8448, "step": 81 }, { "epoch": 0.1096989966555184, "grad_norm": 1.0574487645138468, "learning_rate": 2.165775401069519e-06, "loss": 0.8052, "step": 82 }, { "epoch": 0.11103678929765887, "grad_norm": 1.3571444228395675, "learning_rate": 2.1925133689839575e-06, "loss": 0.8152, "step": 83 }, { "epoch": 0.11237458193979934, "grad_norm": 1.0505429628414498, "learning_rate": 2.219251336898396e-06, "loss": 0.8363, "step": 84 }, { "epoch": 0.11371237458193979, "grad_norm": 1.038113299565345, "learning_rate": 2.2459893048128343e-06, "loss": 0.851, "step": 85 }, { "epoch": 0.11505016722408026, "grad_norm": 1.4558931336210472, "learning_rate": 2.2727272727272728e-06, "loss": 1.0758, "step": 86 }, { "epoch": 0.11638795986622073, "grad_norm": 1.0273732942063736, "learning_rate": 2.2994652406417116e-06, "loss": 0.7341, "step": 87 }, { "epoch": 0.1177257525083612, "grad_norm": 1.0797250391414512, "learning_rate": 2.32620320855615e-06, "loss": 0.8979, "step": 88 }, { "epoch": 0.11906354515050167, "grad_norm": 1.2157341923748606, "learning_rate": 2.3529411764705885e-06, "loss": 0.8635, "step": 89 }, { "epoch": 0.12040133779264214, "grad_norm": 1.1359136544397592, "learning_rate": 2.379679144385027e-06, "loss": 0.8618, "step": 90 }, { "epoch": 0.12173913043478261, "grad_norm": 1.1156846381106096, "learning_rate": 2.4064171122994653e-06, "loss": 0.8399, "step": 91 }, { "epoch": 0.12307692307692308, "grad_norm": 0.9172188149731915, "learning_rate": 2.433155080213904e-06, "loss": 0.7737, "step": 92 }, { "epoch": 0.12441471571906354, "grad_norm": 0.926633021683806, "learning_rate": 2.4598930481283426e-06, "loss": 0.8437, "step": 93 }, { "epoch": 0.12575250836120402, "grad_norm": 0.9360473950364757, "learning_rate": 2.486631016042781e-06, "loss": 0.9189, "step": 94 }, { "epoch": 0.12709030100334448, "grad_norm": 1.1730303669440025, "learning_rate": 2.5133689839572194e-06, "loss": 0.8637, "step": 95 }, { "epoch": 0.12842809364548494, "grad_norm": 0.9888171831019306, "learning_rate": 2.5401069518716583e-06, "loss": 0.8106, "step": 96 }, { "epoch": 0.12976588628762542, "grad_norm": 1.3636294281679404, "learning_rate": 2.5668449197860967e-06, "loss": 0.9112, "step": 97 }, { "epoch": 0.13110367892976588, "grad_norm": 1.1098041288735345, "learning_rate": 2.5935828877005347e-06, "loss": 0.7818, "step": 98 }, { "epoch": 0.13244147157190636, "grad_norm": 0.8028265884838518, "learning_rate": 2.6203208556149735e-06, "loss": 0.712, "step": 99 }, { "epoch": 0.13377926421404682, "grad_norm": 1.051264264832606, "learning_rate": 2.647058823529412e-06, "loss": 0.866, "step": 100 }, { "epoch": 0.1351170568561873, "grad_norm": 1.11679591893264, "learning_rate": 2.673796791443851e-06, "loss": 0.92, "step": 101 }, { "epoch": 0.13645484949832776, "grad_norm": 1.1469690025487778, "learning_rate": 2.7005347593582892e-06, "loss": 0.8519, "step": 102 }, { "epoch": 0.13779264214046824, "grad_norm": 0.8859286926713444, "learning_rate": 2.7272727272727272e-06, "loss": 0.9101, "step": 103 }, { "epoch": 0.1391304347826087, "grad_norm": 1.400632329170443, "learning_rate": 2.754010695187166e-06, "loss": 1.0297, "step": 104 }, { "epoch": 0.14046822742474915, "grad_norm": 1.1246426233062796, "learning_rate": 2.7807486631016045e-06, "loss": 0.8854, "step": 105 }, { "epoch": 0.14180602006688964, "grad_norm": 0.9316700895750073, "learning_rate": 2.807486631016043e-06, "loss": 0.8383, "step": 106 }, { "epoch": 0.1431438127090301, "grad_norm": 1.2698881387943322, "learning_rate": 2.8342245989304818e-06, "loss": 0.8838, "step": 107 }, { "epoch": 0.14448160535117058, "grad_norm": 0.9689561272459126, "learning_rate": 2.8609625668449198e-06, "loss": 0.967, "step": 108 }, { "epoch": 0.14581939799331103, "grad_norm": 1.0215115408527753, "learning_rate": 2.8877005347593586e-06, "loss": 0.8442, "step": 109 }, { "epoch": 0.14715719063545152, "grad_norm": 1.1548338601274062, "learning_rate": 2.914438502673797e-06, "loss": 0.816, "step": 110 }, { "epoch": 0.14849498327759197, "grad_norm": 1.1148004161065879, "learning_rate": 2.9411764705882355e-06, "loss": 0.8435, "step": 111 }, { "epoch": 0.14983277591973243, "grad_norm": 1.091547079663197, "learning_rate": 2.9679144385026743e-06, "loss": 0.9947, "step": 112 }, { "epoch": 0.15117056856187291, "grad_norm": 0.9844161748248134, "learning_rate": 2.9946524064171123e-06, "loss": 0.8321, "step": 113 }, { "epoch": 0.15250836120401337, "grad_norm": 0.8471188722994232, "learning_rate": 3.0213903743315507e-06, "loss": 0.8454, "step": 114 }, { "epoch": 0.15384615384615385, "grad_norm": 0.97131136984459, "learning_rate": 3.0481283422459896e-06, "loss": 0.8343, "step": 115 }, { "epoch": 0.1551839464882943, "grad_norm": 0.9085967034667118, "learning_rate": 3.074866310160428e-06, "loss": 0.8429, "step": 116 }, { "epoch": 0.1565217391304348, "grad_norm": 0.9688441814434292, "learning_rate": 3.101604278074867e-06, "loss": 0.74, "step": 117 }, { "epoch": 0.15785953177257525, "grad_norm": 1.3566313688219835, "learning_rate": 3.128342245989305e-06, "loss": 0.8803, "step": 118 }, { "epoch": 0.1591973244147157, "grad_norm": 0.8997803895235135, "learning_rate": 3.1550802139037433e-06, "loss": 0.8058, "step": 119 }, { "epoch": 0.1605351170568562, "grad_norm": 1.2720389155081744, "learning_rate": 3.181818181818182e-06, "loss": 0.8391, "step": 120 }, { "epoch": 0.16187290969899665, "grad_norm": 0.9754711341904101, "learning_rate": 3.2085561497326205e-06, "loss": 0.89, "step": 121 }, { "epoch": 0.16321070234113713, "grad_norm": 0.9120433797566744, "learning_rate": 3.2352941176470594e-06, "loss": 0.7764, "step": 122 }, { "epoch": 0.1645484949832776, "grad_norm": 1.0652178112663513, "learning_rate": 3.262032085561498e-06, "loss": 0.9042, "step": 123 }, { "epoch": 0.16588628762541807, "grad_norm": 0.8976528257342651, "learning_rate": 3.288770053475936e-06, "loss": 0.9376, "step": 124 }, { "epoch": 0.16722408026755853, "grad_norm": 0.9718098868974586, "learning_rate": 3.3155080213903747e-06, "loss": 0.7802, "step": 125 }, { "epoch": 0.16856187290969898, "grad_norm": 1.5339311054850115, "learning_rate": 3.342245989304813e-06, "loss": 0.9, "step": 126 }, { "epoch": 0.16989966555183947, "grad_norm": 1.0191538532300253, "learning_rate": 3.368983957219252e-06, "loss": 0.9104, "step": 127 }, { "epoch": 0.17123745819397992, "grad_norm": 0.9602508483559264, "learning_rate": 3.3957219251336904e-06, "loss": 0.8606, "step": 128 }, { "epoch": 0.1725752508361204, "grad_norm": 0.786968355504978, "learning_rate": 3.4224598930481284e-06, "loss": 0.8314, "step": 129 }, { "epoch": 0.17391304347826086, "grad_norm": 0.8598727289267617, "learning_rate": 3.449197860962567e-06, "loss": 0.8568, "step": 130 }, { "epoch": 0.17525083612040135, "grad_norm": 0.9348593119430254, "learning_rate": 3.4759358288770056e-06, "loss": 0.8591, "step": 131 }, { "epoch": 0.1765886287625418, "grad_norm": 1.2484971616561633, "learning_rate": 3.5026737967914445e-06, "loss": 0.8807, "step": 132 }, { "epoch": 0.17792642140468226, "grad_norm": 1.1129185163916502, "learning_rate": 3.529411764705883e-06, "loss": 0.838, "step": 133 }, { "epoch": 0.17926421404682275, "grad_norm": 0.8694044925853882, "learning_rate": 3.556149732620321e-06, "loss": 0.7181, "step": 134 }, { "epoch": 0.1806020066889632, "grad_norm": 0.9210087276004763, "learning_rate": 3.5828877005347597e-06, "loss": 0.7351, "step": 135 }, { "epoch": 0.18193979933110369, "grad_norm": 0.9609885545319682, "learning_rate": 3.609625668449198e-06, "loss": 0.7886, "step": 136 }, { "epoch": 0.18327759197324414, "grad_norm": 1.3634089720182487, "learning_rate": 3.6363636363636366e-06, "loss": 1.0025, "step": 137 }, { "epoch": 0.18461538461538463, "grad_norm": 1.0633905393504577, "learning_rate": 3.6631016042780754e-06, "loss": 0.9899, "step": 138 }, { "epoch": 0.18595317725752508, "grad_norm": 1.0888467033909266, "learning_rate": 3.6898395721925134e-06, "loss": 0.8273, "step": 139 }, { "epoch": 0.18729096989966554, "grad_norm": 0.822777137513094, "learning_rate": 3.716577540106952e-06, "loss": 0.7931, "step": 140 }, { "epoch": 0.18862876254180602, "grad_norm": 0.9363225414591415, "learning_rate": 3.7433155080213907e-06, "loss": 0.7333, "step": 141 }, { "epoch": 0.18996655518394648, "grad_norm": 0.8992711836069108, "learning_rate": 3.770053475935829e-06, "loss": 0.7499, "step": 142 }, { "epoch": 0.19130434782608696, "grad_norm": 1.2288912644263246, "learning_rate": 3.796791443850268e-06, "loss": 0.9082, "step": 143 }, { "epoch": 0.19264214046822742, "grad_norm": 1.2330867315144167, "learning_rate": 3.8235294117647055e-06, "loss": 0.8973, "step": 144 }, { "epoch": 0.1939799331103679, "grad_norm": 0.8925670227568923, "learning_rate": 3.850267379679145e-06, "loss": 0.8591, "step": 145 }, { "epoch": 0.19531772575250836, "grad_norm": 1.2920204470348093, "learning_rate": 3.877005347593583e-06, "loss": 0.9189, "step": 146 }, { "epoch": 0.19665551839464884, "grad_norm": 1.1160618658773087, "learning_rate": 3.903743315508022e-06, "loss": 0.8312, "step": 147 }, { "epoch": 0.1979933110367893, "grad_norm": 0.9059950787117953, "learning_rate": 3.93048128342246e-06, "loss": 0.7689, "step": 148 }, { "epoch": 0.19933110367892976, "grad_norm": 0.9525918508278345, "learning_rate": 3.957219251336899e-06, "loss": 0.8097, "step": 149 }, { "epoch": 0.20066889632107024, "grad_norm": 1.170797719487341, "learning_rate": 3.983957219251337e-06, "loss": 0.9051, "step": 150 }, { "epoch": 0.2020066889632107, "grad_norm": 1.0824245141965827, "learning_rate": 4.010695187165775e-06, "loss": 0.8647, "step": 151 }, { "epoch": 0.20334448160535118, "grad_norm": 2.051248876736371, "learning_rate": 4.037433155080215e-06, "loss": 0.9468, "step": 152 }, { "epoch": 0.20468227424749164, "grad_norm": 1.059828562816712, "learning_rate": 4.064171122994653e-06, "loss": 0.9126, "step": 153 }, { "epoch": 0.20602006688963212, "grad_norm": 1.0787659947059414, "learning_rate": 4.0909090909090915e-06, "loss": 0.9407, "step": 154 }, { "epoch": 0.20735785953177258, "grad_norm": 1.6982363761853583, "learning_rate": 4.11764705882353e-06, "loss": 0.9084, "step": 155 }, { "epoch": 0.20869565217391303, "grad_norm": 1.4599297813908798, "learning_rate": 4.144385026737968e-06, "loss": 0.9583, "step": 156 }, { "epoch": 0.21003344481605352, "grad_norm": 1.0258066924024372, "learning_rate": 4.171122994652407e-06, "loss": 0.758, "step": 157 }, { "epoch": 0.21137123745819397, "grad_norm": 1.290121648092725, "learning_rate": 4.197860962566845e-06, "loss": 0.8293, "step": 158 }, { "epoch": 0.21270903010033446, "grad_norm": 0.8771836924764271, "learning_rate": 4.224598930481284e-06, "loss": 0.743, "step": 159 }, { "epoch": 0.2140468227424749, "grad_norm": 1.2230800108259272, "learning_rate": 4.251336898395722e-06, "loss": 0.8618, "step": 160 }, { "epoch": 0.2153846153846154, "grad_norm": 1.4475396411466466, "learning_rate": 4.2780748663101604e-06, "loss": 0.8493, "step": 161 }, { "epoch": 0.21672240802675585, "grad_norm": 1.2067806512937118, "learning_rate": 4.304812834224599e-06, "loss": 0.8971, "step": 162 }, { "epoch": 0.2180602006688963, "grad_norm": 0.8305045767557514, "learning_rate": 4.331550802139038e-06, "loss": 0.5994, "step": 163 }, { "epoch": 0.2193979933110368, "grad_norm": 1.5987623627404155, "learning_rate": 4.3582887700534766e-06, "loss": 0.9945, "step": 164 }, { "epoch": 0.22073578595317725, "grad_norm": 1.0933951940430426, "learning_rate": 4.385026737967915e-06, "loss": 0.8, "step": 165 }, { "epoch": 0.22207357859531773, "grad_norm": 1.058263910044373, "learning_rate": 4.411764705882353e-06, "loss": 0.8746, "step": 166 }, { "epoch": 0.2234113712374582, "grad_norm": 1.3349968859839787, "learning_rate": 4.438502673796792e-06, "loss": 0.8149, "step": 167 }, { "epoch": 0.22474916387959867, "grad_norm": 0.8847971876325429, "learning_rate": 4.46524064171123e-06, "loss": 0.7871, "step": 168 }, { "epoch": 0.22608695652173913, "grad_norm": 0.8674300757163639, "learning_rate": 4.491978609625669e-06, "loss": 0.7146, "step": 169 }, { "epoch": 0.22742474916387959, "grad_norm": 0.9702045651447103, "learning_rate": 4.518716577540107e-06, "loss": 0.8148, "step": 170 }, { "epoch": 0.22876254180602007, "grad_norm": 1.0606904797044934, "learning_rate": 4.5454545454545455e-06, "loss": 0.8319, "step": 171 }, { "epoch": 0.23010033444816053, "grad_norm": 1.243354702450188, "learning_rate": 4.572192513368984e-06, "loss": 0.866, "step": 172 }, { "epoch": 0.231438127090301, "grad_norm": 1.2761283765175806, "learning_rate": 4.598930481283423e-06, "loss": 0.9011, "step": 173 }, { "epoch": 0.23277591973244147, "grad_norm": 1.1115089185663325, "learning_rate": 4.625668449197862e-06, "loss": 0.8563, "step": 174 }, { "epoch": 0.23411371237458195, "grad_norm": 1.3880177103852658, "learning_rate": 4.6524064171123e-06, "loss": 0.8921, "step": 175 }, { "epoch": 0.2354515050167224, "grad_norm": 0.9351473489328439, "learning_rate": 4.6791443850267385e-06, "loss": 0.8301, "step": 176 }, { "epoch": 0.23678929765886286, "grad_norm": 1.3900398931783902, "learning_rate": 4.705882352941177e-06, "loss": 0.9247, "step": 177 }, { "epoch": 0.23812709030100335, "grad_norm": 0.9760157064504643, "learning_rate": 4.732620320855615e-06, "loss": 0.7642, "step": 178 }, { "epoch": 0.2394648829431438, "grad_norm": 1.3674699747423797, "learning_rate": 4.759358288770054e-06, "loss": 0.872, "step": 179 }, { "epoch": 0.2408026755852843, "grad_norm": 1.063692940766443, "learning_rate": 4.786096256684493e-06, "loss": 0.8298, "step": 180 }, { "epoch": 0.24214046822742474, "grad_norm": 1.0402460932202997, "learning_rate": 4.812834224598931e-06, "loss": 0.7875, "step": 181 }, { "epoch": 0.24347826086956523, "grad_norm": 0.7965050559593347, "learning_rate": 4.839572192513369e-06, "loss": 0.7527, "step": 182 }, { "epoch": 0.24481605351170568, "grad_norm": 1.33131761430513, "learning_rate": 4.866310160427808e-06, "loss": 0.9945, "step": 183 }, { "epoch": 0.24615384615384617, "grad_norm": 0.9042651982504675, "learning_rate": 4.893048128342247e-06, "loss": 0.8026, "step": 184 }, { "epoch": 0.24749163879598662, "grad_norm": 1.350384932376468, "learning_rate": 4.919786096256685e-06, "loss": 0.8143, "step": 185 }, { "epoch": 0.24882943143812708, "grad_norm": 1.2971219032296686, "learning_rate": 4.9465240641711236e-06, "loss": 0.8607, "step": 186 }, { "epoch": 0.25016722408026754, "grad_norm": 1.0517485660729502, "learning_rate": 4.973262032085562e-06, "loss": 0.7102, "step": 187 }, { "epoch": 0.25150501672240805, "grad_norm": 1.1694840437184875, "learning_rate": 5e-06, "loss": 0.8474, "step": 188 }, { "epoch": 0.2528428093645485, "grad_norm": 1.0863918254056437, "learning_rate": 5.026737967914439e-06, "loss": 0.8264, "step": 189 }, { "epoch": 0.25418060200668896, "grad_norm": 1.0218199053818744, "learning_rate": 5.053475935828877e-06, "loss": 0.856, "step": 190 }, { "epoch": 0.2555183946488294, "grad_norm": 0.9445279901974053, "learning_rate": 5.0802139037433165e-06, "loss": 0.7354, "step": 191 }, { "epoch": 0.2568561872909699, "grad_norm": 1.07216143653346, "learning_rate": 5.106951871657755e-06, "loss": 0.8516, "step": 192 }, { "epoch": 0.2581939799331104, "grad_norm": 0.890854966684192, "learning_rate": 5.133689839572193e-06, "loss": 0.7947, "step": 193 }, { "epoch": 0.25953177257525084, "grad_norm": 1.097605462632299, "learning_rate": 5.160427807486631e-06, "loss": 0.7377, "step": 194 }, { "epoch": 0.2608695652173913, "grad_norm": 1.317694109564137, "learning_rate": 5.187165775401069e-06, "loss": 0.8855, "step": 195 }, { "epoch": 0.26220735785953175, "grad_norm": 0.969048578828747, "learning_rate": 5.213903743315508e-06, "loss": 0.8206, "step": 196 }, { "epoch": 0.26354515050167227, "grad_norm": 0.9869187974516757, "learning_rate": 5.240641711229947e-06, "loss": 0.8029, "step": 197 }, { "epoch": 0.2648829431438127, "grad_norm": 0.8570166498963627, "learning_rate": 5.2673796791443855e-06, "loss": 0.7131, "step": 198 }, { "epoch": 0.2662207357859532, "grad_norm": 0.9303023871864873, "learning_rate": 5.294117647058824e-06, "loss": 0.7382, "step": 199 }, { "epoch": 0.26755852842809363, "grad_norm": 0.9705065708189378, "learning_rate": 5.320855614973262e-06, "loss": 0.7857, "step": 200 }, { "epoch": 0.2688963210702341, "grad_norm": 1.4946442267485498, "learning_rate": 5.347593582887702e-06, "loss": 0.915, "step": 201 }, { "epoch": 0.2702341137123746, "grad_norm": 0.9624184209162681, "learning_rate": 5.37433155080214e-06, "loss": 0.8017, "step": 202 }, { "epoch": 0.27157190635451506, "grad_norm": 0.9715167670677858, "learning_rate": 5.4010695187165785e-06, "loss": 0.7648, "step": 203 }, { "epoch": 0.2729096989966555, "grad_norm": 1.0882930735337346, "learning_rate": 5.427807486631016e-06, "loss": 0.7931, "step": 204 }, { "epoch": 0.27424749163879597, "grad_norm": 1.0244196320431183, "learning_rate": 5.4545454545454545e-06, "loss": 0.8548, "step": 205 }, { "epoch": 0.2755852842809365, "grad_norm": 1.0532578627761224, "learning_rate": 5.481283422459893e-06, "loss": 0.7822, "step": 206 }, { "epoch": 0.27692307692307694, "grad_norm": 1.0007404284843104, "learning_rate": 5.508021390374332e-06, "loss": 0.826, "step": 207 }, { "epoch": 0.2782608695652174, "grad_norm": 1.240361077987632, "learning_rate": 5.5347593582887706e-06, "loss": 0.713, "step": 208 }, { "epoch": 0.27959866220735785, "grad_norm": 1.0865928074109263, "learning_rate": 5.561497326203209e-06, "loss": 0.8939, "step": 209 }, { "epoch": 0.2809364548494983, "grad_norm": 1.239270039020005, "learning_rate": 5.588235294117647e-06, "loss": 0.8549, "step": 210 }, { "epoch": 0.2822742474916388, "grad_norm": 1.1988780364284353, "learning_rate": 5.614973262032086e-06, "loss": 0.8535, "step": 211 }, { "epoch": 0.2836120401337793, "grad_norm": 0.8489070503285858, "learning_rate": 5.641711229946525e-06, "loss": 0.6998, "step": 212 }, { "epoch": 0.28494983277591973, "grad_norm": 1.0338714753361717, "learning_rate": 5.6684491978609635e-06, "loss": 0.8124, "step": 213 }, { "epoch": 0.2862876254180602, "grad_norm": 1.3453404615978342, "learning_rate": 5.695187165775401e-06, "loss": 0.8088, "step": 214 }, { "epoch": 0.28762541806020064, "grad_norm": 1.381852242824755, "learning_rate": 5.7219251336898395e-06, "loss": 0.8331, "step": 215 }, { "epoch": 0.28896321070234116, "grad_norm": 1.1069638877948618, "learning_rate": 5.748663101604278e-06, "loss": 0.794, "step": 216 }, { "epoch": 0.2903010033444816, "grad_norm": 1.4722733289441194, "learning_rate": 5.775401069518717e-06, "loss": 0.8171, "step": 217 }, { "epoch": 0.29163879598662207, "grad_norm": 1.314778707806668, "learning_rate": 5.802139037433156e-06, "loss": 0.7518, "step": 218 }, { "epoch": 0.2929765886287625, "grad_norm": 1.0652435349740372, "learning_rate": 5.828877005347594e-06, "loss": 0.8568, "step": 219 }, { "epoch": 0.29431438127090304, "grad_norm": 1.2417732992160615, "learning_rate": 5.8556149732620325e-06, "loss": 0.6678, "step": 220 }, { "epoch": 0.2956521739130435, "grad_norm": 1.248350292560151, "learning_rate": 5.882352941176471e-06, "loss": 0.8472, "step": 221 }, { "epoch": 0.29698996655518395, "grad_norm": 1.0628223410194053, "learning_rate": 5.90909090909091e-06, "loss": 0.7846, "step": 222 }, { "epoch": 0.2983277591973244, "grad_norm": 1.5902616477991165, "learning_rate": 5.935828877005349e-06, "loss": 0.7006, "step": 223 }, { "epoch": 0.29966555183946486, "grad_norm": 1.2516914370479306, "learning_rate": 5.962566844919787e-06, "loss": 0.7769, "step": 224 }, { "epoch": 0.3010033444816054, "grad_norm": 1.1607637123185148, "learning_rate": 5.989304812834225e-06, "loss": 0.822, "step": 225 }, { "epoch": 0.30234113712374583, "grad_norm": 0.7719067702060238, "learning_rate": 6.016042780748663e-06, "loss": 0.5901, "step": 226 }, { "epoch": 0.3036789297658863, "grad_norm": 1.1502645067971662, "learning_rate": 6.0427807486631015e-06, "loss": 0.7857, "step": 227 }, { "epoch": 0.30501672240802674, "grad_norm": 1.1693000162686173, "learning_rate": 6.069518716577541e-06, "loss": 0.7777, "step": 228 }, { "epoch": 0.3063545150501672, "grad_norm": 1.0657091712962299, "learning_rate": 6.096256684491979e-06, "loss": 0.7738, "step": 229 }, { "epoch": 0.3076923076923077, "grad_norm": 1.253644745645477, "learning_rate": 6.122994652406418e-06, "loss": 0.7566, "step": 230 }, { "epoch": 0.30903010033444817, "grad_norm": 0.968576621556058, "learning_rate": 6.149732620320856e-06, "loss": 0.8427, "step": 231 }, { "epoch": 0.3103678929765886, "grad_norm": 0.981200335556971, "learning_rate": 6.176470588235295e-06, "loss": 0.7036, "step": 232 }, { "epoch": 0.3117056856187291, "grad_norm": 1.298583692154219, "learning_rate": 6.203208556149734e-06, "loss": 0.8291, "step": 233 }, { "epoch": 0.3130434782608696, "grad_norm": 1.0755567252956262, "learning_rate": 6.229946524064172e-06, "loss": 0.7005, "step": 234 }, { "epoch": 0.31438127090301005, "grad_norm": 0.8678405636165536, "learning_rate": 6.25668449197861e-06, "loss": 0.7189, "step": 235 }, { "epoch": 0.3157190635451505, "grad_norm": 0.9163123252333754, "learning_rate": 6.283422459893048e-06, "loss": 0.8356, "step": 236 }, { "epoch": 0.31705685618729096, "grad_norm": 0.9908733617509841, "learning_rate": 6.3101604278074865e-06, "loss": 0.835, "step": 237 }, { "epoch": 0.3183946488294314, "grad_norm": 1.0617466442638301, "learning_rate": 6.336898395721926e-06, "loss": 0.8202, "step": 238 }, { "epoch": 0.3197324414715719, "grad_norm": 1.0160899021609813, "learning_rate": 6.363636363636364e-06, "loss": 0.8092, "step": 239 }, { "epoch": 0.3210702341137124, "grad_norm": 0.9077714077885142, "learning_rate": 6.390374331550803e-06, "loss": 0.7476, "step": 240 }, { "epoch": 0.32240802675585284, "grad_norm": 1.4170840976251275, "learning_rate": 6.417112299465241e-06, "loss": 0.8719, "step": 241 }, { "epoch": 0.3237458193979933, "grad_norm": 1.017885339112884, "learning_rate": 6.4438502673796795e-06, "loss": 0.7492, "step": 242 }, { "epoch": 0.3250836120401338, "grad_norm": 0.8750287772792433, "learning_rate": 6.470588235294119e-06, "loss": 0.7186, "step": 243 }, { "epoch": 0.32642140468227426, "grad_norm": 1.300142874923966, "learning_rate": 6.497326203208557e-06, "loss": 0.6788, "step": 244 }, { "epoch": 0.3277591973244147, "grad_norm": 0.797651145538199, "learning_rate": 6.524064171122996e-06, "loss": 0.7311, "step": 245 }, { "epoch": 0.3290969899665552, "grad_norm": 1.4189771624485765, "learning_rate": 6.550802139037433e-06, "loss": 0.9633, "step": 246 }, { "epoch": 0.33043478260869563, "grad_norm": 1.1151999028069053, "learning_rate": 6.577540106951872e-06, "loss": 0.8157, "step": 247 }, { "epoch": 0.33177257525083614, "grad_norm": 1.2031946604537214, "learning_rate": 6.60427807486631e-06, "loss": 0.8873, "step": 248 }, { "epoch": 0.3331103678929766, "grad_norm": 0.9800773862235514, "learning_rate": 6.631016042780749e-06, "loss": 0.7424, "step": 249 }, { "epoch": 0.33444816053511706, "grad_norm": 1.2356289787372106, "learning_rate": 6.657754010695188e-06, "loss": 0.8408, "step": 250 }, { "epoch": 0.3357859531772575, "grad_norm": 1.04161049118866, "learning_rate": 6.684491978609626e-06, "loss": 0.8017, "step": 251 }, { "epoch": 0.33712374581939797, "grad_norm": 1.288818027538431, "learning_rate": 6.711229946524065e-06, "loss": 0.8429, "step": 252 }, { "epoch": 0.3384615384615385, "grad_norm": 1.0997645662180697, "learning_rate": 6.737967914438504e-06, "loss": 0.827, "step": 253 }, { "epoch": 0.33979933110367894, "grad_norm": 1.044504844120835, "learning_rate": 6.764705882352942e-06, "loss": 0.7355, "step": 254 }, { "epoch": 0.3411371237458194, "grad_norm": 0.9623249132194232, "learning_rate": 6.791443850267381e-06, "loss": 0.7989, "step": 255 }, { "epoch": 0.34247491638795985, "grad_norm": 1.174616762365448, "learning_rate": 6.818181818181818e-06, "loss": 0.8511, "step": 256 }, { "epoch": 0.34381270903010036, "grad_norm": 1.0073290117526987, "learning_rate": 6.844919786096257e-06, "loss": 0.7924, "step": 257 }, { "epoch": 0.3451505016722408, "grad_norm": 0.7697869892907622, "learning_rate": 6.871657754010695e-06, "loss": 0.6829, "step": 258 }, { "epoch": 0.3464882943143813, "grad_norm": 0.9867747013109816, "learning_rate": 6.898395721925134e-06, "loss": 0.7387, "step": 259 }, { "epoch": 0.34782608695652173, "grad_norm": 0.9849487103013835, "learning_rate": 6.925133689839573e-06, "loss": 0.8035, "step": 260 }, { "epoch": 0.3491638795986622, "grad_norm": 0.9137369359397817, "learning_rate": 6.951871657754011e-06, "loss": 0.7783, "step": 261 }, { "epoch": 0.3505016722408027, "grad_norm": 1.2059457694274331, "learning_rate": 6.97860962566845e-06, "loss": 0.8144, "step": 262 }, { "epoch": 0.35183946488294315, "grad_norm": 1.0145810941936102, "learning_rate": 7.005347593582889e-06, "loss": 0.8346, "step": 263 }, { "epoch": 0.3531772575250836, "grad_norm": 1.1931480642309062, "learning_rate": 7.032085561497327e-06, "loss": 0.8544, "step": 264 }, { "epoch": 0.35451505016722407, "grad_norm": 1.0329316680338936, "learning_rate": 7.058823529411766e-06, "loss": 0.6778, "step": 265 }, { "epoch": 0.3558528428093645, "grad_norm": 0.9130053526709693, "learning_rate": 7.085561497326203e-06, "loss": 0.7126, "step": 266 }, { "epoch": 0.35719063545150503, "grad_norm": 0.730230228772404, "learning_rate": 7.112299465240642e-06, "loss": 0.6969, "step": 267 }, { "epoch": 0.3585284280936455, "grad_norm": 1.5083385749421403, "learning_rate": 7.13903743315508e-06, "loss": 0.969, "step": 268 }, { "epoch": 0.35986622073578595, "grad_norm": 0.9667729075130154, "learning_rate": 7.1657754010695195e-06, "loss": 0.6985, "step": 269 }, { "epoch": 0.3612040133779264, "grad_norm": 1.2784234997048811, "learning_rate": 7.192513368983958e-06, "loss": 0.6701, "step": 270 }, { "epoch": 0.3625418060200669, "grad_norm": 1.354608989987979, "learning_rate": 7.219251336898396e-06, "loss": 0.9068, "step": 271 }, { "epoch": 0.36387959866220737, "grad_norm": 0.9392201652222124, "learning_rate": 7.245989304812835e-06, "loss": 0.799, "step": 272 }, { "epoch": 0.3652173913043478, "grad_norm": 1.028701492481212, "learning_rate": 7.272727272727273e-06, "loss": 0.868, "step": 273 }, { "epoch": 0.3665551839464883, "grad_norm": 1.0348230884501048, "learning_rate": 7.2994652406417124e-06, "loss": 0.8324, "step": 274 }, { "epoch": 0.36789297658862874, "grad_norm": 1.346705301980157, "learning_rate": 7.326203208556151e-06, "loss": 0.8468, "step": 275 }, { "epoch": 0.36923076923076925, "grad_norm": 1.1664983162409515, "learning_rate": 7.352941176470589e-06, "loss": 0.8871, "step": 276 }, { "epoch": 0.3705685618729097, "grad_norm": 1.2590075563010288, "learning_rate": 7.379679144385027e-06, "loss": 0.7677, "step": 277 }, { "epoch": 0.37190635451505016, "grad_norm": 1.157435950135683, "learning_rate": 7.406417112299465e-06, "loss": 0.6854, "step": 278 }, { "epoch": 0.3732441471571906, "grad_norm": 1.3689888343539693, "learning_rate": 7.433155080213904e-06, "loss": 0.8547, "step": 279 }, { "epoch": 0.3745819397993311, "grad_norm": 1.0517391986878584, "learning_rate": 7.459893048128343e-06, "loss": 0.882, "step": 280 }, { "epoch": 0.3759197324414716, "grad_norm": 1.1371968572945703, "learning_rate": 7.486631016042781e-06, "loss": 0.7252, "step": 281 }, { "epoch": 0.37725752508361204, "grad_norm": 1.0335896700403238, "learning_rate": 7.51336898395722e-06, "loss": 0.7657, "step": 282 }, { "epoch": 0.3785953177257525, "grad_norm": 1.0460391132777844, "learning_rate": 7.540106951871658e-06, "loss": 0.8309, "step": 283 }, { "epoch": 0.37993311036789296, "grad_norm": 1.1168753203738513, "learning_rate": 7.5668449197860975e-06, "loss": 0.8495, "step": 284 }, { "epoch": 0.38127090301003347, "grad_norm": 1.1471020410416726, "learning_rate": 7.593582887700536e-06, "loss": 0.7852, "step": 285 }, { "epoch": 0.3826086956521739, "grad_norm": 1.2757645255411707, "learning_rate": 7.620320855614974e-06, "loss": 0.9235, "step": 286 }, { "epoch": 0.3839464882943144, "grad_norm": 1.087094287130164, "learning_rate": 7.647058823529411e-06, "loss": 0.7629, "step": 287 }, { "epoch": 0.38528428093645484, "grad_norm": 1.2506313755148732, "learning_rate": 7.67379679144385e-06, "loss": 0.8683, "step": 288 }, { "epoch": 0.3866220735785953, "grad_norm": 0.9762197008212815, "learning_rate": 7.70053475935829e-06, "loss": 0.7798, "step": 289 }, { "epoch": 0.3879598662207358, "grad_norm": 1.1092148431548492, "learning_rate": 7.727272727272727e-06, "loss": 0.9053, "step": 290 }, { "epoch": 0.38929765886287626, "grad_norm": 1.110064734750379, "learning_rate": 7.754010695187166e-06, "loss": 0.7881, "step": 291 }, { "epoch": 0.3906354515050167, "grad_norm": 1.2552214323701134, "learning_rate": 7.780748663101606e-06, "loss": 0.9549, "step": 292 }, { "epoch": 0.3919732441471572, "grad_norm": 1.1578820817429678, "learning_rate": 7.807486631016043e-06, "loss": 0.8208, "step": 293 }, { "epoch": 0.3933110367892977, "grad_norm": 1.2083914522948587, "learning_rate": 7.834224598930483e-06, "loss": 0.8779, "step": 294 }, { "epoch": 0.39464882943143814, "grad_norm": 0.8835099548531078, "learning_rate": 7.86096256684492e-06, "loss": 0.7558, "step": 295 }, { "epoch": 0.3959866220735786, "grad_norm": 0.9818713219628681, "learning_rate": 7.88770053475936e-06, "loss": 0.6565, "step": 296 }, { "epoch": 0.39732441471571905, "grad_norm": 1.0316499242512907, "learning_rate": 7.914438502673799e-06, "loss": 0.7456, "step": 297 }, { "epoch": 0.3986622073578595, "grad_norm": 1.3473602627406067, "learning_rate": 7.941176470588236e-06, "loss": 0.8467, "step": 298 }, { "epoch": 0.4, "grad_norm": 1.1735660466466404, "learning_rate": 7.967914438502674e-06, "loss": 0.8572, "step": 299 }, { "epoch": 0.4013377926421405, "grad_norm": 1.1977793478093022, "learning_rate": 7.994652406417113e-06, "loss": 0.8717, "step": 300 }, { "epoch": 0.40267558528428093, "grad_norm": 1.4200265527177158, "learning_rate": 8.02139037433155e-06, "loss": 0.9007, "step": 301 }, { "epoch": 0.4040133779264214, "grad_norm": 1.4366388260920049, "learning_rate": 8.04812834224599e-06, "loss": 0.7591, "step": 302 }, { "epoch": 0.40535117056856185, "grad_norm": 1.0151158584409758, "learning_rate": 8.07486631016043e-06, "loss": 0.6822, "step": 303 }, { "epoch": 0.40668896321070236, "grad_norm": 0.9560028067098651, "learning_rate": 8.101604278074867e-06, "loss": 0.7203, "step": 304 }, { "epoch": 0.4080267558528428, "grad_norm": 1.270575839133999, "learning_rate": 8.128342245989306e-06, "loss": 0.8112, "step": 305 }, { "epoch": 0.40936454849498327, "grad_norm": 1.4093440191428086, "learning_rate": 8.155080213903744e-06, "loss": 0.8217, "step": 306 }, { "epoch": 0.4107023411371237, "grad_norm": 0.9610938276814698, "learning_rate": 8.181818181818183e-06, "loss": 0.7997, "step": 307 }, { "epoch": 0.41204013377926424, "grad_norm": 1.160686024142558, "learning_rate": 8.20855614973262e-06, "loss": 0.7527, "step": 308 }, { "epoch": 0.4133779264214047, "grad_norm": 1.058065727995255, "learning_rate": 8.23529411764706e-06, "loss": 0.8044, "step": 309 }, { "epoch": 0.41471571906354515, "grad_norm": 0.9281558251222778, "learning_rate": 8.262032085561497e-06, "loss": 0.6421, "step": 310 }, { "epoch": 0.4160535117056856, "grad_norm": 1.1314281702814468, "learning_rate": 8.288770053475937e-06, "loss": 0.7184, "step": 311 }, { "epoch": 0.41739130434782606, "grad_norm": 1.004809460177363, "learning_rate": 8.315508021390374e-06, "loss": 0.7292, "step": 312 }, { "epoch": 0.4187290969899666, "grad_norm": 1.4083805113648278, "learning_rate": 8.342245989304813e-06, "loss": 0.8666, "step": 313 }, { "epoch": 0.42006688963210703, "grad_norm": 1.5243015543117464, "learning_rate": 8.368983957219253e-06, "loss": 0.9639, "step": 314 }, { "epoch": 0.4214046822742475, "grad_norm": 1.0599038709672737, "learning_rate": 8.39572192513369e-06, "loss": 0.7971, "step": 315 }, { "epoch": 0.42274247491638794, "grad_norm": 0.9295187715322848, "learning_rate": 8.42245989304813e-06, "loss": 0.8149, "step": 316 }, { "epoch": 0.4240802675585284, "grad_norm": 1.1674328533300677, "learning_rate": 8.449197860962567e-06, "loss": 0.8395, "step": 317 }, { "epoch": 0.4254180602006689, "grad_norm": 0.9718977539208824, "learning_rate": 8.475935828877005e-06, "loss": 0.8022, "step": 318 }, { "epoch": 0.42675585284280937, "grad_norm": 1.0397683069544352, "learning_rate": 8.502673796791444e-06, "loss": 0.7291, "step": 319 }, { "epoch": 0.4280936454849498, "grad_norm": 1.3127673854188049, "learning_rate": 8.529411764705883e-06, "loss": 0.9036, "step": 320 }, { "epoch": 0.4294314381270903, "grad_norm": 1.1273288838914497, "learning_rate": 8.556149732620321e-06, "loss": 0.6249, "step": 321 }, { "epoch": 0.4307692307692308, "grad_norm": 1.1109566569195661, "learning_rate": 8.58288770053476e-06, "loss": 0.8116, "step": 322 }, { "epoch": 0.43210702341137125, "grad_norm": 1.3531095474512809, "learning_rate": 8.609625668449198e-06, "loss": 0.8517, "step": 323 }, { "epoch": 0.4334448160535117, "grad_norm": 0.9597330287769932, "learning_rate": 8.636363636363637e-06, "loss": 0.6951, "step": 324 }, { "epoch": 0.43478260869565216, "grad_norm": 1.2303001671820695, "learning_rate": 8.663101604278076e-06, "loss": 0.7917, "step": 325 }, { "epoch": 0.4361204013377926, "grad_norm": 1.3574939535981787, "learning_rate": 8.689839572192514e-06, "loss": 0.7775, "step": 326 }, { "epoch": 0.43745819397993313, "grad_norm": 1.271436149603729, "learning_rate": 8.716577540106953e-06, "loss": 0.7905, "step": 327 }, { "epoch": 0.4387959866220736, "grad_norm": 0.8642962006624595, "learning_rate": 8.743315508021392e-06, "loss": 0.6777, "step": 328 }, { "epoch": 0.44013377926421404, "grad_norm": 1.422528543404114, "learning_rate": 8.77005347593583e-06, "loss": 0.9195, "step": 329 }, { "epoch": 0.4414715719063545, "grad_norm": 1.375616783306796, "learning_rate": 8.796791443850268e-06, "loss": 0.9031, "step": 330 }, { "epoch": 0.442809364548495, "grad_norm": 1.420837639885293, "learning_rate": 8.823529411764707e-06, "loss": 0.8919, "step": 331 }, { "epoch": 0.44414715719063547, "grad_norm": 1.2695390877445398, "learning_rate": 8.850267379679144e-06, "loss": 0.7762, "step": 332 }, { "epoch": 0.4454849498327759, "grad_norm": 1.2886436510981405, "learning_rate": 8.877005347593584e-06, "loss": 0.9079, "step": 333 }, { "epoch": 0.4468227424749164, "grad_norm": 1.2968572144180457, "learning_rate": 8.903743315508023e-06, "loss": 0.816, "step": 334 }, { "epoch": 0.44816053511705684, "grad_norm": 1.0565533279245416, "learning_rate": 8.93048128342246e-06, "loss": 0.8139, "step": 335 }, { "epoch": 0.44949832775919735, "grad_norm": 1.12543180320438, "learning_rate": 8.9572192513369e-06, "loss": 0.8299, "step": 336 }, { "epoch": 0.4508361204013378, "grad_norm": 0.8828262927203159, "learning_rate": 8.983957219251337e-06, "loss": 0.7175, "step": 337 }, { "epoch": 0.45217391304347826, "grad_norm": 1.423309034677749, "learning_rate": 9.010695187165777e-06, "loss": 0.7131, "step": 338 }, { "epoch": 0.4535117056856187, "grad_norm": 0.8906089356282173, "learning_rate": 9.037433155080214e-06, "loss": 0.6457, "step": 339 }, { "epoch": 0.45484949832775917, "grad_norm": 1.0320611748141035, "learning_rate": 9.064171122994653e-06, "loss": 0.6909, "step": 340 }, { "epoch": 0.4561872909698997, "grad_norm": 1.034664738992666, "learning_rate": 9.090909090909091e-06, "loss": 0.7703, "step": 341 }, { "epoch": 0.45752508361204014, "grad_norm": 1.5995569823064906, "learning_rate": 9.11764705882353e-06, "loss": 0.8089, "step": 342 }, { "epoch": 0.4588628762541806, "grad_norm": 1.5549259777085425, "learning_rate": 9.144385026737968e-06, "loss": 0.8446, "step": 343 }, { "epoch": 0.46020066889632105, "grad_norm": 1.3789942578979042, "learning_rate": 9.171122994652407e-06, "loss": 0.8241, "step": 344 }, { "epoch": 0.46153846153846156, "grad_norm": 1.144658292414686, "learning_rate": 9.197860962566846e-06, "loss": 0.7124, "step": 345 }, { "epoch": 0.462876254180602, "grad_norm": 0.9723604149767651, "learning_rate": 9.224598930481284e-06, "loss": 0.7383, "step": 346 }, { "epoch": 0.4642140468227425, "grad_norm": 0.9503905917523253, "learning_rate": 9.251336898395723e-06, "loss": 0.7879, "step": 347 }, { "epoch": 0.46555183946488293, "grad_norm": 1.4162843493328081, "learning_rate": 9.278074866310161e-06, "loss": 0.8772, "step": 348 }, { "epoch": 0.4668896321070234, "grad_norm": 1.0145046091144407, "learning_rate": 9.3048128342246e-06, "loss": 0.7026, "step": 349 }, { "epoch": 0.4682274247491639, "grad_norm": 1.2493872855752464, "learning_rate": 9.331550802139038e-06, "loss": 0.8339, "step": 350 }, { "epoch": 0.46956521739130436, "grad_norm": 1.2652680037243147, "learning_rate": 9.358288770053477e-06, "loss": 0.738, "step": 351 }, { "epoch": 0.4709030100334448, "grad_norm": 1.0217678271324466, "learning_rate": 9.385026737967915e-06, "loss": 0.8733, "step": 352 }, { "epoch": 0.47224080267558527, "grad_norm": 0.8455432766069636, "learning_rate": 9.411764705882354e-06, "loss": 0.6995, "step": 353 }, { "epoch": 0.4735785953177257, "grad_norm": 1.6369093719479353, "learning_rate": 9.438502673796791e-06, "loss": 0.8765, "step": 354 }, { "epoch": 0.47491638795986624, "grad_norm": 1.6463013967124578, "learning_rate": 9.46524064171123e-06, "loss": 0.839, "step": 355 }, { "epoch": 0.4762541806020067, "grad_norm": 0.9340251975568328, "learning_rate": 9.49197860962567e-06, "loss": 0.7307, "step": 356 }, { "epoch": 0.47759197324414715, "grad_norm": 1.194620824429809, "learning_rate": 9.518716577540108e-06, "loss": 0.781, "step": 357 }, { "epoch": 0.4789297658862876, "grad_norm": 1.1898497998826214, "learning_rate": 9.545454545454547e-06, "loss": 0.8015, "step": 358 }, { "epoch": 0.4802675585284281, "grad_norm": 0.9813463685988366, "learning_rate": 9.572192513368986e-06, "loss": 0.706, "step": 359 }, { "epoch": 0.4816053511705686, "grad_norm": 1.238337781108697, "learning_rate": 9.598930481283422e-06, "loss": 0.7569, "step": 360 }, { "epoch": 0.48294314381270903, "grad_norm": 1.2362445366073271, "learning_rate": 9.625668449197861e-06, "loss": 0.7892, "step": 361 }, { "epoch": 0.4842809364548495, "grad_norm": 0.7462093009771003, "learning_rate": 9.6524064171123e-06, "loss": 0.6256, "step": 362 }, { "epoch": 0.48561872909698994, "grad_norm": 1.0159811448276082, "learning_rate": 9.679144385026738e-06, "loss": 0.7832, "step": 363 }, { "epoch": 0.48695652173913045, "grad_norm": 1.150445792702278, "learning_rate": 9.705882352941177e-06, "loss": 0.8456, "step": 364 }, { "epoch": 0.4882943143812709, "grad_norm": 1.0281287550895575, "learning_rate": 9.732620320855617e-06, "loss": 0.7641, "step": 365 }, { "epoch": 0.48963210702341137, "grad_norm": 1.4323388723756687, "learning_rate": 9.759358288770054e-06, "loss": 0.8846, "step": 366 }, { "epoch": 0.4909698996655518, "grad_norm": 1.212645730177698, "learning_rate": 9.786096256684493e-06, "loss": 0.8084, "step": 367 }, { "epoch": 0.49230769230769234, "grad_norm": 1.1211643355501109, "learning_rate": 9.812834224598931e-06, "loss": 0.5766, "step": 368 }, { "epoch": 0.4936454849498328, "grad_norm": 0.8951509432505766, "learning_rate": 9.83957219251337e-06, "loss": 0.6659, "step": 369 }, { "epoch": 0.49498327759197325, "grad_norm": 1.0436263737317217, "learning_rate": 9.866310160427808e-06, "loss": 0.8005, "step": 370 }, { "epoch": 0.4963210702341137, "grad_norm": 1.0667180983929563, "learning_rate": 9.893048128342247e-06, "loss": 0.74, "step": 371 }, { "epoch": 0.49765886287625416, "grad_norm": 1.5012322801124787, "learning_rate": 9.919786096256685e-06, "loss": 0.7221, "step": 372 }, { "epoch": 0.49899665551839467, "grad_norm": 1.3389419486844563, "learning_rate": 9.946524064171124e-06, "loss": 0.9096, "step": 373 }, { "epoch": 0.5003344481605351, "grad_norm": 1.27319962910146, "learning_rate": 9.973262032085562e-06, "loss": 0.7487, "step": 374 }, { "epoch": 0.5016722408026756, "grad_norm": 1.1037782533297351, "learning_rate": 1e-05, "loss": 0.8838, "step": 375 }, { "epoch": 0.5030100334448161, "grad_norm": 1.2087065868777167, "learning_rate": 9.999997822232566e-06, "loss": 0.7712, "step": 376 }, { "epoch": 0.5043478260869565, "grad_norm": 1.0994810962282677, "learning_rate": 9.99999128893216e-06, "loss": 0.8138, "step": 377 }, { "epoch": 0.505685618729097, "grad_norm": 1.115671921767239, "learning_rate": 9.999980400104472e-06, "loss": 0.7935, "step": 378 }, { "epoch": 0.5070234113712374, "grad_norm": 1.4623711439981164, "learning_rate": 9.99996515575899e-06, "loss": 0.7869, "step": 379 }, { "epoch": 0.5083612040133779, "grad_norm": 1.1649450239909762, "learning_rate": 9.99994555590899e-06, "loss": 0.8062, "step": 380 }, { "epoch": 0.5096989966555184, "grad_norm": 1.4049904711686325, "learning_rate": 9.99992160057155e-06, "loss": 0.6932, "step": 381 }, { "epoch": 0.5110367892976588, "grad_norm": 0.9471824793612531, "learning_rate": 9.999893289767533e-06, "loss": 0.6072, "step": 382 }, { "epoch": 0.5123745819397993, "grad_norm": 1.3275481282751187, "learning_rate": 9.999860623521604e-06, "loss": 0.841, "step": 383 }, { "epoch": 0.5137123745819397, "grad_norm": 0.8896871122824314, "learning_rate": 9.999823601862217e-06, "loss": 0.635, "step": 384 }, { "epoch": 0.5150501672240803, "grad_norm": 0.9545607827106793, "learning_rate": 9.999782224821624e-06, "loss": 0.7998, "step": 385 }, { "epoch": 0.5163879598662208, "grad_norm": 0.9886891431584565, "learning_rate": 9.999736492435867e-06, "loss": 0.7754, "step": 386 }, { "epoch": 0.5177257525083612, "grad_norm": 0.9242586635494241, "learning_rate": 9.999686404744782e-06, "loss": 0.7313, "step": 387 }, { "epoch": 0.5190635451505017, "grad_norm": 1.2566989435195657, "learning_rate": 9.999631961792006e-06, "loss": 1.0095, "step": 388 }, { "epoch": 0.5204013377926422, "grad_norm": 1.2617382323555209, "learning_rate": 9.99957316362496e-06, "loss": 0.7698, "step": 389 }, { "epoch": 0.5217391304347826, "grad_norm": 1.0206345727084716, "learning_rate": 9.999510010294864e-06, "loss": 0.822, "step": 390 }, { "epoch": 0.5230769230769231, "grad_norm": 1.0274626938153113, "learning_rate": 9.999442501856736e-06, "loss": 0.7524, "step": 391 }, { "epoch": 0.5244147157190635, "grad_norm": 1.1560365506868073, "learning_rate": 9.999370638369377e-06, "loss": 0.7516, "step": 392 }, { "epoch": 0.525752508361204, "grad_norm": 1.1071924985366115, "learning_rate": 9.999294419895389e-06, "loss": 0.7075, "step": 393 }, { "epoch": 0.5270903010033445, "grad_norm": 1.287082911608142, "learning_rate": 9.99921384650117e-06, "loss": 0.714, "step": 394 }, { "epoch": 0.5284280936454849, "grad_norm": 1.392659619801044, "learning_rate": 9.999128918256904e-06, "loss": 0.7465, "step": 395 }, { "epoch": 0.5297658862876254, "grad_norm": 1.288889532779329, "learning_rate": 9.999039635236576e-06, "loss": 0.6343, "step": 396 }, { "epoch": 0.5311036789297658, "grad_norm": 0.913493405962881, "learning_rate": 9.998945997517957e-06, "loss": 0.7293, "step": 397 }, { "epoch": 0.5324414715719064, "grad_norm": 0.8446982955965048, "learning_rate": 9.99884800518262e-06, "loss": 0.6782, "step": 398 }, { "epoch": 0.5337792642140469, "grad_norm": 1.1880968184906773, "learning_rate": 9.998745658315924e-06, "loss": 0.6948, "step": 399 }, { "epoch": 0.5351170568561873, "grad_norm": 0.9935581235712179, "learning_rate": 9.998638957007024e-06, "loss": 0.797, "step": 400 }, { "epoch": 0.5364548494983278, "grad_norm": 1.1210786016647916, "learning_rate": 9.998527901348869e-06, "loss": 0.8706, "step": 401 }, { "epoch": 0.5377926421404682, "grad_norm": 1.093259870124014, "learning_rate": 9.998412491438201e-06, "loss": 0.7424, "step": 402 }, { "epoch": 0.5391304347826087, "grad_norm": 1.282232491376853, "learning_rate": 9.998292727375554e-06, "loss": 0.8911, "step": 403 }, { "epoch": 0.5404682274247492, "grad_norm": 1.3040255843370845, "learning_rate": 9.998168609265254e-06, "loss": 0.8267, "step": 404 }, { "epoch": 0.5418060200668896, "grad_norm": 1.007606949624056, "learning_rate": 9.998040137215423e-06, "loss": 0.7775, "step": 405 }, { "epoch": 0.5431438127090301, "grad_norm": 1.0331507100978425, "learning_rate": 9.997907311337973e-06, "loss": 0.7145, "step": 406 }, { "epoch": 0.5444816053511705, "grad_norm": 1.5247217423507322, "learning_rate": 9.99777013174861e-06, "loss": 0.8268, "step": 407 }, { "epoch": 0.545819397993311, "grad_norm": 1.1808431828123707, "learning_rate": 9.99762859856683e-06, "loss": 0.7474, "step": 408 }, { "epoch": 0.5471571906354515, "grad_norm": 1.1790478122447219, "learning_rate": 9.997482711915926e-06, "loss": 0.824, "step": 409 }, { "epoch": 0.5484949832775919, "grad_norm": 1.456471522416305, "learning_rate": 9.997332471922981e-06, "loss": 0.8494, "step": 410 }, { "epoch": 0.5498327759197325, "grad_norm": 0.9332051566947629, "learning_rate": 9.99717787871887e-06, "loss": 0.7727, "step": 411 }, { "epoch": 0.551170568561873, "grad_norm": 1.1511428237150294, "learning_rate": 9.997018932438256e-06, "loss": 0.7245, "step": 412 }, { "epoch": 0.5525083612040134, "grad_norm": 1.0716257588697145, "learning_rate": 9.996855633219605e-06, "loss": 0.7932, "step": 413 }, { "epoch": 0.5538461538461539, "grad_norm": 1.138083197149699, "learning_rate": 9.996687981205163e-06, "loss": 0.841, "step": 414 }, { "epoch": 0.5551839464882943, "grad_norm": 1.0871666040562573, "learning_rate": 9.996515976540974e-06, "loss": 0.7804, "step": 415 }, { "epoch": 0.5565217391304348, "grad_norm": 0.7559631800923657, "learning_rate": 9.996339619376876e-06, "loss": 0.5985, "step": 416 }, { "epoch": 0.5578595317725753, "grad_norm": 1.2157204245574154, "learning_rate": 9.99615890986649e-06, "loss": 0.8163, "step": 417 }, { "epoch": 0.5591973244147157, "grad_norm": 1.1021174918723826, "learning_rate": 9.995973848167234e-06, "loss": 0.8259, "step": 418 }, { "epoch": 0.5605351170568562, "grad_norm": 0.9184681300710551, "learning_rate": 9.99578443444032e-06, "loss": 0.7411, "step": 419 }, { "epoch": 0.5618729096989966, "grad_norm": 1.377119642877741, "learning_rate": 9.995590668850745e-06, "loss": 0.832, "step": 420 }, { "epoch": 0.5632107023411371, "grad_norm": 1.0896215242355805, "learning_rate": 9.9953925515673e-06, "loss": 0.7925, "step": 421 }, { "epoch": 0.5645484949832776, "grad_norm": 1.215778856783488, "learning_rate": 9.995190082762566e-06, "loss": 0.8576, "step": 422 }, { "epoch": 0.565886287625418, "grad_norm": 1.089970559280234, "learning_rate": 9.994983262612916e-06, "loss": 0.8431, "step": 423 }, { "epoch": 0.5672240802675586, "grad_norm": 1.0731260379312166, "learning_rate": 9.99477209129851e-06, "loss": 0.7774, "step": 424 }, { "epoch": 0.568561872909699, "grad_norm": 1.2280860518296275, "learning_rate": 9.994556569003305e-06, "loss": 0.9003, "step": 425 }, { "epoch": 0.5698996655518395, "grad_norm": 1.2467516867170996, "learning_rate": 9.994336695915041e-06, "loss": 0.8572, "step": 426 }, { "epoch": 0.57123745819398, "grad_norm": 0.9244837423344162, "learning_rate": 9.99411247222525e-06, "loss": 0.6292, "step": 427 }, { "epoch": 0.5725752508361204, "grad_norm": 1.4158823072186852, "learning_rate": 9.993883898129259e-06, "loss": 0.9345, "step": 428 }, { "epoch": 0.5739130434782609, "grad_norm": 1.1584841444856042, "learning_rate": 9.993650973826177e-06, "loss": 0.7144, "step": 429 }, { "epoch": 0.5752508361204013, "grad_norm": 1.6476655632448989, "learning_rate": 9.993413699518906e-06, "loss": 0.8421, "step": 430 }, { "epoch": 0.5765886287625418, "grad_norm": 1.1713818533483766, "learning_rate": 9.99317207541414e-06, "loss": 0.8949, "step": 431 }, { "epoch": 0.5779264214046823, "grad_norm": 1.4762656049169056, "learning_rate": 9.992926101722355e-06, "loss": 0.8636, "step": 432 }, { "epoch": 0.5792642140468227, "grad_norm": 1.1639080492677774, "learning_rate": 9.992675778657824e-06, "loss": 0.8094, "step": 433 }, { "epoch": 0.5806020066889632, "grad_norm": 0.9393239063695691, "learning_rate": 9.992421106438606e-06, "loss": 0.7055, "step": 434 }, { "epoch": 0.5819397993311036, "grad_norm": 1.0259528433981415, "learning_rate": 9.992162085286543e-06, "loss": 0.739, "step": 435 }, { "epoch": 0.5832775919732441, "grad_norm": 1.0231596009410362, "learning_rate": 9.991898715427274e-06, "loss": 0.7822, "step": 436 }, { "epoch": 0.5846153846153846, "grad_norm": 1.2672487311025133, "learning_rate": 9.991630997090222e-06, "loss": 0.8038, "step": 437 }, { "epoch": 0.585953177257525, "grad_norm": 0.9747756571373525, "learning_rate": 9.991358930508599e-06, "loss": 0.6201, "step": 438 }, { "epoch": 0.5872909698996656, "grad_norm": 1.0876203427798814, "learning_rate": 9.991082515919402e-06, "loss": 0.7547, "step": 439 }, { "epoch": 0.5886287625418061, "grad_norm": 1.0439857079645132, "learning_rate": 9.990801753563418e-06, "loss": 0.7494, "step": 440 }, { "epoch": 0.5899665551839465, "grad_norm": 1.4783052228509717, "learning_rate": 9.990516643685222e-06, "loss": 0.7844, "step": 441 }, { "epoch": 0.591304347826087, "grad_norm": 0.9283324352675925, "learning_rate": 9.990227186533174e-06, "loss": 0.7267, "step": 442 }, { "epoch": 0.5926421404682274, "grad_norm": 0.9922757205874483, "learning_rate": 9.989933382359423e-06, "loss": 0.735, "step": 443 }, { "epoch": 0.5939799331103679, "grad_norm": 1.1155039129998388, "learning_rate": 9.989635231419903e-06, "loss": 0.74, "step": 444 }, { "epoch": 0.5953177257525084, "grad_norm": 1.1773119645508705, "learning_rate": 9.989332733974337e-06, "loss": 0.7418, "step": 445 }, { "epoch": 0.5966555183946488, "grad_norm": 1.2733328660163108, "learning_rate": 9.989025890286233e-06, "loss": 0.8867, "step": 446 }, { "epoch": 0.5979933110367893, "grad_norm": 1.2163299453552718, "learning_rate": 9.988714700622882e-06, "loss": 0.7556, "step": 447 }, { "epoch": 0.5993311036789297, "grad_norm": 1.0363441222292755, "learning_rate": 9.988399165255365e-06, "loss": 0.7822, "step": 448 }, { "epoch": 0.6006688963210702, "grad_norm": 1.3724845590693693, "learning_rate": 9.988079284458547e-06, "loss": 0.7511, "step": 449 }, { "epoch": 0.6020066889632107, "grad_norm": 1.3793928323485753, "learning_rate": 9.987755058511079e-06, "loss": 0.7291, "step": 450 }, { "epoch": 0.6033444816053511, "grad_norm": 1.2605034511228408, "learning_rate": 9.987426487695396e-06, "loss": 0.7292, "step": 451 }, { "epoch": 0.6046822742474917, "grad_norm": 1.375841646875911, "learning_rate": 9.987093572297716e-06, "loss": 0.9275, "step": 452 }, { "epoch": 0.6060200668896321, "grad_norm": 1.2114703812870238, "learning_rate": 9.986756312608048e-06, "loss": 0.7948, "step": 453 }, { "epoch": 0.6073578595317726, "grad_norm": 0.862345616706627, "learning_rate": 9.98641470892018e-06, "loss": 0.7437, "step": 454 }, { "epoch": 0.6086956521739131, "grad_norm": 0.9355821267538272, "learning_rate": 9.986068761531681e-06, "loss": 0.6814, "step": 455 }, { "epoch": 0.6100334448160535, "grad_norm": 1.209386039261872, "learning_rate": 9.985718470743916e-06, "loss": 0.8362, "step": 456 }, { "epoch": 0.611371237458194, "grad_norm": 0.9657681075171413, "learning_rate": 9.985363836862021e-06, "loss": 0.6458, "step": 457 }, { "epoch": 0.6127090301003344, "grad_norm": 1.1161545022621853, "learning_rate": 9.98500486019492e-06, "loss": 0.8888, "step": 458 }, { "epoch": 0.6140468227424749, "grad_norm": 1.2492387462736416, "learning_rate": 9.98464154105532e-06, "loss": 0.8126, "step": 459 }, { "epoch": 0.6153846153846154, "grad_norm": 0.9069084632796247, "learning_rate": 9.984273879759713e-06, "loss": 0.7433, "step": 460 }, { "epoch": 0.6167224080267558, "grad_norm": 1.0169911288644369, "learning_rate": 9.983901876628369e-06, "loss": 0.7665, "step": 461 }, { "epoch": 0.6180602006688963, "grad_norm": 1.0977461382124665, "learning_rate": 9.983525531985343e-06, "loss": 0.7599, "step": 462 }, { "epoch": 0.6193979933110368, "grad_norm": 1.3134060533667233, "learning_rate": 9.983144846158472e-06, "loss": 0.8954, "step": 463 }, { "epoch": 0.6207357859531772, "grad_norm": 1.3103338882456954, "learning_rate": 9.982759819479375e-06, "loss": 0.7542, "step": 464 }, { "epoch": 0.6220735785953178, "grad_norm": 1.490542830184032, "learning_rate": 9.982370452283451e-06, "loss": 0.9651, "step": 465 }, { "epoch": 0.6234113712374582, "grad_norm": 1.1912820093759666, "learning_rate": 9.981976744909878e-06, "loss": 0.8444, "step": 466 }, { "epoch": 0.6247491638795987, "grad_norm": 1.1751211565447184, "learning_rate": 9.98157869770162e-06, "loss": 0.949, "step": 467 }, { "epoch": 0.6260869565217392, "grad_norm": 1.0166832223253168, "learning_rate": 9.981176311005419e-06, "loss": 0.642, "step": 468 }, { "epoch": 0.6274247491638796, "grad_norm": 1.2508130338338952, "learning_rate": 9.980769585171795e-06, "loss": 0.8578, "step": 469 }, { "epoch": 0.6287625418060201, "grad_norm": 1.0994341513718409, "learning_rate": 9.980358520555048e-06, "loss": 0.7666, "step": 470 }, { "epoch": 0.6301003344481605, "grad_norm": 1.3653831385120847, "learning_rate": 9.979943117513265e-06, "loss": 0.8924, "step": 471 }, { "epoch": 0.631438127090301, "grad_norm": 0.9952627355995922, "learning_rate": 9.9795233764083e-06, "loss": 0.7727, "step": 472 }, { "epoch": 0.6327759197324415, "grad_norm": 1.0263894396014055, "learning_rate": 9.979099297605798e-06, "loss": 0.7743, "step": 473 }, { "epoch": 0.6341137123745819, "grad_norm": 0.8763111072591719, "learning_rate": 9.978670881475173e-06, "loss": 0.7731, "step": 474 }, { "epoch": 0.6354515050167224, "grad_norm": 0.8116935537834303, "learning_rate": 9.978238128389623e-06, "loss": 0.7158, "step": 475 }, { "epoch": 0.6367892976588628, "grad_norm": 1.0456584904260309, "learning_rate": 9.977801038726123e-06, "loss": 0.6593, "step": 476 }, { "epoch": 0.6381270903010033, "grad_norm": 1.045547395279884, "learning_rate": 9.977359612865424e-06, "loss": 0.7749, "step": 477 }, { "epoch": 0.6394648829431439, "grad_norm": 1.1054100850428168, "learning_rate": 9.976913851192053e-06, "loss": 0.7538, "step": 478 }, { "epoch": 0.6408026755852843, "grad_norm": 0.9185776124935355, "learning_rate": 9.976463754094321e-06, "loss": 0.7002, "step": 479 }, { "epoch": 0.6421404682274248, "grad_norm": 0.828593284167028, "learning_rate": 9.976009321964306e-06, "loss": 0.6186, "step": 480 }, { "epoch": 0.6434782608695652, "grad_norm": 0.9768797389433662, "learning_rate": 9.97555055519787e-06, "loss": 0.8174, "step": 481 }, { "epoch": 0.6448160535117057, "grad_norm": 1.4758357810703209, "learning_rate": 9.975087454194645e-06, "loss": 0.9146, "step": 482 }, { "epoch": 0.6461538461538462, "grad_norm": 1.186509933029167, "learning_rate": 9.974620019358046e-06, "loss": 0.8964, "step": 483 }, { "epoch": 0.6474916387959866, "grad_norm": 1.4562942135484718, "learning_rate": 9.974148251095253e-06, "loss": 0.8554, "step": 484 }, { "epoch": 0.6488294314381271, "grad_norm": 1.350893058280438, "learning_rate": 9.973672149817232e-06, "loss": 0.8116, "step": 485 }, { "epoch": 0.6501672240802676, "grad_norm": 0.8911335385328688, "learning_rate": 9.973191715938715e-06, "loss": 0.7057, "step": 486 }, { "epoch": 0.651505016722408, "grad_norm": 1.0689268149177158, "learning_rate": 9.972706949878212e-06, "loss": 0.7937, "step": 487 }, { "epoch": 0.6528428093645485, "grad_norm": 1.0909724511974244, "learning_rate": 9.972217852058006e-06, "loss": 0.8353, "step": 488 }, { "epoch": 0.6541806020066889, "grad_norm": 1.1051943371922381, "learning_rate": 9.971724422904154e-06, "loss": 0.7228, "step": 489 }, { "epoch": 0.6555183946488294, "grad_norm": 0.9337541032453491, "learning_rate": 9.971226662846485e-06, "loss": 0.6934, "step": 490 }, { "epoch": 0.65685618729097, "grad_norm": 0.9332294397848354, "learning_rate": 9.970724572318602e-06, "loss": 0.7298, "step": 491 }, { "epoch": 0.6581939799331104, "grad_norm": 1.219816119615376, "learning_rate": 9.97021815175788e-06, "loss": 0.7899, "step": 492 }, { "epoch": 0.6595317725752509, "grad_norm": 1.1258594507016408, "learning_rate": 9.969707401605464e-06, "loss": 0.7968, "step": 493 }, { "epoch": 0.6608695652173913, "grad_norm": 1.0560425272676537, "learning_rate": 9.969192322306271e-06, "loss": 0.7073, "step": 494 }, { "epoch": 0.6622073578595318, "grad_norm": 0.9326669264269886, "learning_rate": 9.968672914308995e-06, "loss": 0.7559, "step": 495 }, { "epoch": 0.6635451505016723, "grad_norm": 1.0406721490297282, "learning_rate": 9.96814917806609e-06, "loss": 0.6091, "step": 496 }, { "epoch": 0.6648829431438127, "grad_norm": 1.186575521069599, "learning_rate": 9.96762111403379e-06, "loss": 0.7521, "step": 497 }, { "epoch": 0.6662207357859532, "grad_norm": 1.128108299351593, "learning_rate": 9.967088722672094e-06, "loss": 0.7529, "step": 498 }, { "epoch": 0.6675585284280936, "grad_norm": 0.9902441775261838, "learning_rate": 9.966552004444772e-06, "loss": 0.7149, "step": 499 }, { "epoch": 0.6688963210702341, "grad_norm": 1.0539055833415776, "learning_rate": 9.966010959819363e-06, "loss": 0.7713, "step": 500 }, { "epoch": 0.6702341137123746, "grad_norm": 1.1390109738784728, "learning_rate": 9.965465589267176e-06, "loss": 0.8484, "step": 501 }, { "epoch": 0.671571906354515, "grad_norm": 0.9820886791971261, "learning_rate": 9.964915893263285e-06, "loss": 0.7672, "step": 502 }, { "epoch": 0.6729096989966555, "grad_norm": 0.8605662483164124, "learning_rate": 9.964361872286534e-06, "loss": 0.6001, "step": 503 }, { "epoch": 0.6742474916387959, "grad_norm": 1.1384687458112128, "learning_rate": 9.963803526819537e-06, "loss": 0.7227, "step": 504 }, { "epoch": 0.6755852842809364, "grad_norm": 1.239629130507622, "learning_rate": 9.963240857348671e-06, "loss": 0.8428, "step": 505 }, { "epoch": 0.676923076923077, "grad_norm": 1.089846125263126, "learning_rate": 9.962673864364081e-06, "loss": 0.7314, "step": 506 }, { "epoch": 0.6782608695652174, "grad_norm": 0.7717755575116174, "learning_rate": 9.96210254835968e-06, "loss": 0.6975, "step": 507 }, { "epoch": 0.6795986622073579, "grad_norm": 0.8266415315750472, "learning_rate": 9.961526909833143e-06, "loss": 0.6569, "step": 508 }, { "epoch": 0.6809364548494983, "grad_norm": 1.3743221795339051, "learning_rate": 9.960946949285915e-06, "loss": 0.8183, "step": 509 }, { "epoch": 0.6822742474916388, "grad_norm": 0.7939549071705042, "learning_rate": 9.960362667223202e-06, "loss": 0.6541, "step": 510 }, { "epoch": 0.6836120401337793, "grad_norm": 0.9735914794157698, "learning_rate": 9.959774064153977e-06, "loss": 0.6474, "step": 511 }, { "epoch": 0.6849498327759197, "grad_norm": 0.8929043262048764, "learning_rate": 9.959181140590977e-06, "loss": 0.67, "step": 512 }, { "epoch": 0.6862876254180602, "grad_norm": 1.027158299174972, "learning_rate": 9.9585838970507e-06, "loss": 0.7265, "step": 513 }, { "epoch": 0.6876254180602007, "grad_norm": 1.5066248361268761, "learning_rate": 9.95798233405341e-06, "loss": 0.9131, "step": 514 }, { "epoch": 0.6889632107023411, "grad_norm": 1.1466861369522137, "learning_rate": 9.957376452123133e-06, "loss": 0.6022, "step": 515 }, { "epoch": 0.6903010033444816, "grad_norm": 1.1062286446650411, "learning_rate": 9.956766251787657e-06, "loss": 0.8182, "step": 516 }, { "epoch": 0.691638795986622, "grad_norm": 1.0580693686639098, "learning_rate": 9.956151733578533e-06, "loss": 0.8305, "step": 517 }, { "epoch": 0.6929765886287625, "grad_norm": 1.071622926332025, "learning_rate": 9.955532898031069e-06, "loss": 0.8359, "step": 518 }, { "epoch": 0.6943143812709031, "grad_norm": 1.1211002550642661, "learning_rate": 9.954909745684339e-06, "loss": 0.8531, "step": 519 }, { "epoch": 0.6956521739130435, "grad_norm": 1.0322121132370203, "learning_rate": 9.954282277081173e-06, "loss": 0.7783, "step": 520 }, { "epoch": 0.696989966555184, "grad_norm": 0.805259362945679, "learning_rate": 9.953650492768167e-06, "loss": 0.7123, "step": 521 }, { "epoch": 0.6983277591973244, "grad_norm": 1.1590620202239807, "learning_rate": 9.95301439329567e-06, "loss": 0.7698, "step": 522 }, { "epoch": 0.6996655518394649, "grad_norm": 1.0383076493439127, "learning_rate": 9.952373979217795e-06, "loss": 0.747, "step": 523 }, { "epoch": 0.7010033444816054, "grad_norm": 1.003072403141758, "learning_rate": 9.951729251092408e-06, "loss": 0.7145, "step": 524 }, { "epoch": 0.7023411371237458, "grad_norm": 0.973607461010513, "learning_rate": 9.951080209481138e-06, "loss": 0.8251, "step": 525 }, { "epoch": 0.7036789297658863, "grad_norm": 1.6023326881158115, "learning_rate": 9.950426854949371e-06, "loss": 0.8818, "step": 526 }, { "epoch": 0.7050167224080267, "grad_norm": 1.1172761156285935, "learning_rate": 9.949769188066247e-06, "loss": 0.865, "step": 527 }, { "epoch": 0.7063545150501672, "grad_norm": 0.973590986672907, "learning_rate": 9.949107209404664e-06, "loss": 0.8914, "step": 528 }, { "epoch": 0.7076923076923077, "grad_norm": 1.7179646369539727, "learning_rate": 9.948440919541277e-06, "loss": 0.9443, "step": 529 }, { "epoch": 0.7090301003344481, "grad_norm": 1.2968112922757646, "learning_rate": 9.947770319056496e-06, "loss": 0.8375, "step": 530 }, { "epoch": 0.7103678929765886, "grad_norm": 1.7656150091760483, "learning_rate": 9.947095408534483e-06, "loss": 0.8684, "step": 531 }, { "epoch": 0.711705685618729, "grad_norm": 1.316456081380093, "learning_rate": 9.946416188563163e-06, "loss": 0.8638, "step": 532 }, { "epoch": 0.7130434782608696, "grad_norm": 1.6314722890481832, "learning_rate": 9.945732659734204e-06, "loss": 1.0177, "step": 533 }, { "epoch": 0.7143812709030101, "grad_norm": 1.1816456666748054, "learning_rate": 9.945044822643033e-06, "loss": 0.8009, "step": 534 }, { "epoch": 0.7157190635451505, "grad_norm": 1.4140310481138245, "learning_rate": 9.944352677888833e-06, "loss": 0.7934, "step": 535 }, { "epoch": 0.717056856187291, "grad_norm": 0.9982753917329991, "learning_rate": 9.943656226074534e-06, "loss": 0.8167, "step": 536 }, { "epoch": 0.7183946488294315, "grad_norm": 1.274114202257041, "learning_rate": 9.94295546780682e-06, "loss": 0.7877, "step": 537 }, { "epoch": 0.7197324414715719, "grad_norm": 1.463981558014625, "learning_rate": 9.942250403696126e-06, "loss": 0.8086, "step": 538 }, { "epoch": 0.7210702341137124, "grad_norm": 1.2142798357768603, "learning_rate": 9.94154103435664e-06, "loss": 0.7036, "step": 539 }, { "epoch": 0.7224080267558528, "grad_norm": 1.1442963144908913, "learning_rate": 9.940827360406297e-06, "loss": 0.8155, "step": 540 }, { "epoch": 0.7237458193979933, "grad_norm": 1.3174955408182305, "learning_rate": 9.940109382466785e-06, "loss": 0.7638, "step": 541 }, { "epoch": 0.7250836120401338, "grad_norm": 1.3632827072379041, "learning_rate": 9.939387101163538e-06, "loss": 0.8093, "step": 542 }, { "epoch": 0.7264214046822742, "grad_norm": 0.8640523560926667, "learning_rate": 9.93866051712574e-06, "loss": 0.6999, "step": 543 }, { "epoch": 0.7277591973244147, "grad_norm": 1.120927102256978, "learning_rate": 9.937929630986324e-06, "loss": 0.7708, "step": 544 }, { "epoch": 0.7290969899665551, "grad_norm": 0.9107810029762359, "learning_rate": 9.937194443381972e-06, "loss": 0.6408, "step": 545 }, { "epoch": 0.7304347826086957, "grad_norm": 1.3063333537158934, "learning_rate": 9.936454954953108e-06, "loss": 0.8753, "step": 546 }, { "epoch": 0.7317725752508362, "grad_norm": 1.1383707189199281, "learning_rate": 9.935711166343909e-06, "loss": 0.823, "step": 547 }, { "epoch": 0.7331103678929766, "grad_norm": 0.9458031522431164, "learning_rate": 9.934963078202289e-06, "loss": 0.6513, "step": 548 }, { "epoch": 0.7344481605351171, "grad_norm": 1.0519490187750211, "learning_rate": 9.934210691179918e-06, "loss": 0.7382, "step": 549 }, { "epoch": 0.7357859531772575, "grad_norm": 0.9241305522129186, "learning_rate": 9.933454005932204e-06, "loss": 0.7274, "step": 550 }, { "epoch": 0.737123745819398, "grad_norm": 0.8667535375329348, "learning_rate": 9.932693023118299e-06, "loss": 0.6542, "step": 551 }, { "epoch": 0.7384615384615385, "grad_norm": 1.2021439519895922, "learning_rate": 9.931927743401102e-06, "loss": 0.8749, "step": 552 }, { "epoch": 0.7397993311036789, "grad_norm": 1.4259519076871945, "learning_rate": 9.931158167447254e-06, "loss": 0.867, "step": 553 }, { "epoch": 0.7411371237458194, "grad_norm": 0.9692723083203503, "learning_rate": 9.930384295927137e-06, "loss": 0.8397, "step": 554 }, { "epoch": 0.7424749163879598, "grad_norm": 0.9356383353574651, "learning_rate": 9.929606129514875e-06, "loss": 0.7057, "step": 555 }, { "epoch": 0.7438127090301003, "grad_norm": 1.0924452477331636, "learning_rate": 9.928823668888337e-06, "loss": 0.7222, "step": 556 }, { "epoch": 0.7451505016722408, "grad_norm": 1.169989391984794, "learning_rate": 9.928036914729129e-06, "loss": 0.8295, "step": 557 }, { "epoch": 0.7464882943143812, "grad_norm": 1.2075939369000215, "learning_rate": 9.927245867722596e-06, "loss": 0.8211, "step": 558 }, { "epoch": 0.7478260869565218, "grad_norm": 1.1953403463533385, "learning_rate": 9.926450528557828e-06, "loss": 0.7681, "step": 559 }, { "epoch": 0.7491638795986622, "grad_norm": 1.270177654425022, "learning_rate": 9.925650897927646e-06, "loss": 0.8179, "step": 560 }, { "epoch": 0.7505016722408027, "grad_norm": 1.142932915635066, "learning_rate": 9.924846976528618e-06, "loss": 0.7882, "step": 561 }, { "epoch": 0.7518394648829432, "grad_norm": 0.975688490545168, "learning_rate": 9.924038765061042e-06, "loss": 0.7067, "step": 562 }, { "epoch": 0.7531772575250836, "grad_norm": 1.0256854537554396, "learning_rate": 9.923226264228958e-06, "loss": 0.6768, "step": 563 }, { "epoch": 0.7545150501672241, "grad_norm": 1.2051310715226342, "learning_rate": 9.922409474740142e-06, "loss": 0.8074, "step": 564 }, { "epoch": 0.7558528428093646, "grad_norm": 1.1047843276621692, "learning_rate": 9.921588397306105e-06, "loss": 0.6625, "step": 565 }, { "epoch": 0.757190635451505, "grad_norm": 0.9948264831625293, "learning_rate": 9.920763032642094e-06, "loss": 0.7806, "step": 566 }, { "epoch": 0.7585284280936455, "grad_norm": 1.2348919513175205, "learning_rate": 9.919933381467088e-06, "loss": 0.8265, "step": 567 }, { "epoch": 0.7598662207357859, "grad_norm": 1.1009206006032963, "learning_rate": 9.919099444503804e-06, "loss": 0.7029, "step": 568 }, { "epoch": 0.7612040133779264, "grad_norm": 0.918200974011368, "learning_rate": 9.918261222478687e-06, "loss": 0.7232, "step": 569 }, { "epoch": 0.7625418060200669, "grad_norm": 1.1359470665493787, "learning_rate": 9.91741871612192e-06, "loss": 0.7104, "step": 570 }, { "epoch": 0.7638795986622073, "grad_norm": 0.9844769636605958, "learning_rate": 9.916571926167417e-06, "loss": 0.757, "step": 571 }, { "epoch": 0.7652173913043478, "grad_norm": 1.0651117094535125, "learning_rate": 9.915720853352821e-06, "loss": 0.6946, "step": 572 }, { "epoch": 0.7665551839464882, "grad_norm": 1.2112418835080145, "learning_rate": 9.91486549841951e-06, "loss": 0.862, "step": 573 }, { "epoch": 0.7678929765886288, "grad_norm": 1.0631879246160831, "learning_rate": 9.914005862112587e-06, "loss": 0.7827, "step": 574 }, { "epoch": 0.7692307692307693, "grad_norm": 1.0580230609901387, "learning_rate": 9.913141945180888e-06, "loss": 0.6937, "step": 575 }, { "epoch": 0.7705685618729097, "grad_norm": 0.9936303844498378, "learning_rate": 9.912273748376976e-06, "loss": 0.6813, "step": 576 }, { "epoch": 0.7719063545150502, "grad_norm": 1.24621973293181, "learning_rate": 9.911401272457145e-06, "loss": 0.7486, "step": 577 }, { "epoch": 0.7732441471571906, "grad_norm": 1.1461721836360215, "learning_rate": 9.910524518181416e-06, "loss": 0.805, "step": 578 }, { "epoch": 0.7745819397993311, "grad_norm": 1.1597223910004457, "learning_rate": 9.909643486313533e-06, "loss": 0.8122, "step": 579 }, { "epoch": 0.7759197324414716, "grad_norm": 1.0286382150617528, "learning_rate": 9.908758177620972e-06, "loss": 0.68, "step": 580 }, { "epoch": 0.777257525083612, "grad_norm": 0.9049281498329772, "learning_rate": 9.907868592874927e-06, "loss": 0.6691, "step": 581 }, { "epoch": 0.7785953177257525, "grad_norm": 1.2118496469608169, "learning_rate": 9.906974732850327e-06, "loss": 0.7443, "step": 582 }, { "epoch": 0.7799331103678929, "grad_norm": 1.0672270948023133, "learning_rate": 9.906076598325815e-06, "loss": 0.766, "step": 583 }, { "epoch": 0.7812709030100334, "grad_norm": 0.9800879677494242, "learning_rate": 9.905174190083763e-06, "loss": 0.7878, "step": 584 }, { "epoch": 0.782608695652174, "grad_norm": 0.8840062761626756, "learning_rate": 9.904267508910269e-06, "loss": 0.6993, "step": 585 }, { "epoch": 0.7839464882943143, "grad_norm": 1.1009919881208539, "learning_rate": 9.903356555595143e-06, "loss": 0.7721, "step": 586 }, { "epoch": 0.7852842809364549, "grad_norm": 1.171863629979334, "learning_rate": 9.90244133093193e-06, "loss": 0.8099, "step": 587 }, { "epoch": 0.7866220735785954, "grad_norm": 1.1767093644337023, "learning_rate": 9.901521835717882e-06, "loss": 0.6783, "step": 588 }, { "epoch": 0.7879598662207358, "grad_norm": 1.373146333818288, "learning_rate": 9.900598070753981e-06, "loss": 0.6767, "step": 589 }, { "epoch": 0.7892976588628763, "grad_norm": 0.8478604227978296, "learning_rate": 9.899670036844926e-06, "loss": 0.6673, "step": 590 }, { "epoch": 0.7906354515050167, "grad_norm": 1.0634729917143413, "learning_rate": 9.898737734799134e-06, "loss": 0.7901, "step": 591 }, { "epoch": 0.7919732441471572, "grad_norm": 0.7916086117555936, "learning_rate": 9.897801165428736e-06, "loss": 0.6708, "step": 592 }, { "epoch": 0.7933110367892977, "grad_norm": 1.3826478480689792, "learning_rate": 9.896860329549585e-06, "loss": 0.8659, "step": 593 }, { "epoch": 0.7946488294314381, "grad_norm": 1.4469918642226134, "learning_rate": 9.895915227981254e-06, "loss": 0.8367, "step": 594 }, { "epoch": 0.7959866220735786, "grad_norm": 1.0047777291936801, "learning_rate": 9.894965861547023e-06, "loss": 0.8031, "step": 595 }, { "epoch": 0.797324414715719, "grad_norm": 1.2028763600947787, "learning_rate": 9.894012231073895e-06, "loss": 0.7291, "step": 596 }, { "epoch": 0.7986622073578595, "grad_norm": 0.8458967737172843, "learning_rate": 9.89305433739258e-06, "loss": 0.6764, "step": 597 }, { "epoch": 0.8, "grad_norm": 1.449757648557503, "learning_rate": 9.892092181337512e-06, "loss": 0.7562, "step": 598 }, { "epoch": 0.8013377926421404, "grad_norm": 1.2504196401044978, "learning_rate": 9.891125763746824e-06, "loss": 0.6878, "step": 599 }, { "epoch": 0.802675585284281, "grad_norm": 1.2650203674268132, "learning_rate": 9.890155085462376e-06, "loss": 0.9178, "step": 600 }, { "epoch": 0.8040133779264214, "grad_norm": 1.0570200862642583, "learning_rate": 9.889180147329731e-06, "loss": 0.7853, "step": 601 }, { "epoch": 0.8053511705685619, "grad_norm": 0.8572883094050113, "learning_rate": 9.888200950198162e-06, "loss": 0.6348, "step": 602 }, { "epoch": 0.8066889632107024, "grad_norm": 0.9974490392584062, "learning_rate": 9.887217494920655e-06, "loss": 0.7699, "step": 603 }, { "epoch": 0.8080267558528428, "grad_norm": 0.9760885182150358, "learning_rate": 9.886229782353907e-06, "loss": 0.7013, "step": 604 }, { "epoch": 0.8093645484949833, "grad_norm": 1.1487194965216376, "learning_rate": 9.88523781335832e-06, "loss": 0.7612, "step": 605 }, { "epoch": 0.8107023411371237, "grad_norm": 1.1701108034676848, "learning_rate": 9.884241588798004e-06, "loss": 0.8473, "step": 606 }, { "epoch": 0.8120401337792642, "grad_norm": 0.9982677630500792, "learning_rate": 9.88324110954078e-06, "loss": 0.7873, "step": 607 }, { "epoch": 0.8133779264214047, "grad_norm": 1.0713634306555846, "learning_rate": 9.88223637645817e-06, "loss": 0.8451, "step": 608 }, { "epoch": 0.8147157190635451, "grad_norm": 0.7993870962130153, "learning_rate": 9.881227390425404e-06, "loss": 0.6347, "step": 609 }, { "epoch": 0.8160535117056856, "grad_norm": 1.013840301640455, "learning_rate": 9.880214152321417e-06, "loss": 0.7619, "step": 610 }, { "epoch": 0.8173913043478261, "grad_norm": 1.2055288931390697, "learning_rate": 9.879196663028847e-06, "loss": 0.8076, "step": 611 }, { "epoch": 0.8187290969899665, "grad_norm": 0.9969646751383683, "learning_rate": 9.87817492343404e-06, "loss": 0.7791, "step": 612 }, { "epoch": 0.820066889632107, "grad_norm": 0.8053604276820112, "learning_rate": 9.877148934427037e-06, "loss": 0.6718, "step": 613 }, { "epoch": 0.8214046822742475, "grad_norm": 0.9555779600771894, "learning_rate": 9.876118696901585e-06, "loss": 0.6373, "step": 614 }, { "epoch": 0.822742474916388, "grad_norm": 1.2520186582146162, "learning_rate": 9.875084211755127e-06, "loss": 0.8323, "step": 615 }, { "epoch": 0.8240802675585285, "grad_norm": 0.9645368728267706, "learning_rate": 9.874045479888819e-06, "loss": 0.7536, "step": 616 }, { "epoch": 0.8254180602006689, "grad_norm": 1.0577722573961799, "learning_rate": 9.873002502207502e-06, "loss": 0.8801, "step": 617 }, { "epoch": 0.8267558528428094, "grad_norm": 1.4961219759007043, "learning_rate": 9.871955279619721e-06, "loss": 0.852, "step": 618 }, { "epoch": 0.8280936454849498, "grad_norm": 1.1129482681908867, "learning_rate": 9.87090381303772e-06, "loss": 0.8589, "step": 619 }, { "epoch": 0.8294314381270903, "grad_norm": 1.3861809484975318, "learning_rate": 9.86984810337744e-06, "loss": 0.8353, "step": 620 }, { "epoch": 0.8307692307692308, "grad_norm": 1.2730706516786325, "learning_rate": 9.868788151558513e-06, "loss": 0.8272, "step": 621 }, { "epoch": 0.8321070234113712, "grad_norm": 1.241548707419779, "learning_rate": 9.867723958504275e-06, "loss": 0.7729, "step": 622 }, { "epoch": 0.8334448160535117, "grad_norm": 1.0062750714506854, "learning_rate": 9.86665552514175e-06, "loss": 0.7294, "step": 623 }, { "epoch": 0.8347826086956521, "grad_norm": 1.136041777315869, "learning_rate": 9.865582852401659e-06, "loss": 0.8688, "step": 624 }, { "epoch": 0.8361204013377926, "grad_norm": 0.9958406599014873, "learning_rate": 9.86450594121841e-06, "loss": 0.629, "step": 625 }, { "epoch": 0.8374581939799332, "grad_norm": 1.2028387169565808, "learning_rate": 9.863424792530114e-06, "loss": 0.7891, "step": 626 }, { "epoch": 0.8387959866220736, "grad_norm": 1.028193555772259, "learning_rate": 9.862339407278564e-06, "loss": 0.7042, "step": 627 }, { "epoch": 0.8401337792642141, "grad_norm": 1.0729798309920937, "learning_rate": 9.861249786409248e-06, "loss": 0.7236, "step": 628 }, { "epoch": 0.8414715719063545, "grad_norm": 1.525073456951817, "learning_rate": 9.860155930871341e-06, "loss": 0.9533, "step": 629 }, { "epoch": 0.842809364548495, "grad_norm": 1.245389366060029, "learning_rate": 9.859057841617709e-06, "loss": 0.7421, "step": 630 }, { "epoch": 0.8441471571906355, "grad_norm": 0.9858821351978345, "learning_rate": 9.857955519604906e-06, "loss": 0.834, "step": 631 }, { "epoch": 0.8454849498327759, "grad_norm": 1.1330442898757735, "learning_rate": 9.856848965793168e-06, "loss": 0.7139, "step": 632 }, { "epoch": 0.8468227424749164, "grad_norm": 1.202694601188144, "learning_rate": 9.855738181146427e-06, "loss": 0.7953, "step": 633 }, { "epoch": 0.8481605351170568, "grad_norm": 1.1198260087970293, "learning_rate": 9.854623166632296e-06, "loss": 0.7198, "step": 634 }, { "epoch": 0.8494983277591973, "grad_norm": 0.961711492618783, "learning_rate": 9.853503923222066e-06, "loss": 0.8245, "step": 635 }, { "epoch": 0.8508361204013378, "grad_norm": 1.0599390449893915, "learning_rate": 9.852380451890723e-06, "loss": 0.7666, "step": 636 }, { "epoch": 0.8521739130434782, "grad_norm": 1.0330122168114069, "learning_rate": 9.851252753616928e-06, "loss": 0.6175, "step": 637 }, { "epoch": 0.8535117056856187, "grad_norm": 1.1429627066549046, "learning_rate": 9.850120829383027e-06, "loss": 0.7177, "step": 638 }, { "epoch": 0.8548494983277592, "grad_norm": 0.9333653555195208, "learning_rate": 9.848984680175049e-06, "loss": 0.7848, "step": 639 }, { "epoch": 0.8561872909698997, "grad_norm": 1.1799044081925265, "learning_rate": 9.847844306982698e-06, "loss": 0.7538, "step": 640 }, { "epoch": 0.8575250836120402, "grad_norm": 0.9646440962101648, "learning_rate": 9.846699710799365e-06, "loss": 0.7065, "step": 641 }, { "epoch": 0.8588628762541806, "grad_norm": 1.287432549788801, "learning_rate": 9.845550892622113e-06, "loss": 0.8746, "step": 642 }, { "epoch": 0.8602006688963211, "grad_norm": 1.4524741195552076, "learning_rate": 9.844397853451687e-06, "loss": 0.8863, "step": 643 }, { "epoch": 0.8615384615384616, "grad_norm": 1.1165628951043483, "learning_rate": 9.843240594292507e-06, "loss": 0.7333, "step": 644 }, { "epoch": 0.862876254180602, "grad_norm": 1.558832703009952, "learning_rate": 9.84207911615267e-06, "loss": 0.7945, "step": 645 }, { "epoch": 0.8642140468227425, "grad_norm": 1.4287736135238647, "learning_rate": 9.840913420043945e-06, "loss": 0.9283, "step": 646 }, { "epoch": 0.8655518394648829, "grad_norm": 0.9700187068692222, "learning_rate": 9.839743506981783e-06, "loss": 0.7515, "step": 647 }, { "epoch": 0.8668896321070234, "grad_norm": 1.2690770237371467, "learning_rate": 9.838569377985298e-06, "loss": 0.8336, "step": 648 }, { "epoch": 0.8682274247491639, "grad_norm": 0.9401212890535413, "learning_rate": 9.837391034077286e-06, "loss": 0.6687, "step": 649 }, { "epoch": 0.8695652173913043, "grad_norm": 0.9657180453230656, "learning_rate": 9.836208476284208e-06, "loss": 0.7309, "step": 650 }, { "epoch": 0.8709030100334448, "grad_norm": 1.170542822490556, "learning_rate": 9.835021705636201e-06, "loss": 0.6824, "step": 651 }, { "epoch": 0.8722408026755852, "grad_norm": 1.234654989220532, "learning_rate": 9.833830723167067e-06, "loss": 0.9209, "step": 652 }, { "epoch": 0.8735785953177257, "grad_norm": 1.0584312392798658, "learning_rate": 9.83263552991428e-06, "loss": 0.7862, "step": 653 }, { "epoch": 0.8749163879598663, "grad_norm": 1.1446278217888108, "learning_rate": 9.83143612691898e-06, "loss": 0.8011, "step": 654 }, { "epoch": 0.8762541806020067, "grad_norm": 0.9164380851796017, "learning_rate": 9.830232515225976e-06, "loss": 0.6814, "step": 655 }, { "epoch": 0.8775919732441472, "grad_norm": 1.433205519801478, "learning_rate": 9.829024695883746e-06, "loss": 0.8451, "step": 656 }, { "epoch": 0.8789297658862876, "grad_norm": 0.9660612125128512, "learning_rate": 9.827812669944423e-06, "loss": 0.7517, "step": 657 }, { "epoch": 0.8802675585284281, "grad_norm": 1.1143122079693755, "learning_rate": 9.826596438463818e-06, "loss": 0.8043, "step": 658 }, { "epoch": 0.8816053511705686, "grad_norm": 1.1100380529470573, "learning_rate": 9.825376002501393e-06, "loss": 0.7847, "step": 659 }, { "epoch": 0.882943143812709, "grad_norm": 1.1564209852079868, "learning_rate": 9.824151363120283e-06, "loss": 0.794, "step": 660 }, { "epoch": 0.8842809364548495, "grad_norm": 1.3503441404906356, "learning_rate": 9.822922521387277e-06, "loss": 0.7868, "step": 661 }, { "epoch": 0.88561872909699, "grad_norm": 1.1355147928151914, "learning_rate": 9.821689478372827e-06, "loss": 0.7705, "step": 662 }, { "epoch": 0.8869565217391304, "grad_norm": 1.1429022666635937, "learning_rate": 9.82045223515105e-06, "loss": 0.7373, "step": 663 }, { "epoch": 0.8882943143812709, "grad_norm": 1.2029789548291703, "learning_rate": 9.819210792799711e-06, "loss": 0.7466, "step": 664 }, { "epoch": 0.8896321070234113, "grad_norm": 1.2865378611760687, "learning_rate": 9.817965152400244e-06, "loss": 0.781, "step": 665 }, { "epoch": 0.8909698996655518, "grad_norm": 0.922091939989735, "learning_rate": 9.816715315037733e-06, "loss": 0.8166, "step": 666 }, { "epoch": 0.8923076923076924, "grad_norm": 0.9238957667466119, "learning_rate": 9.815461281800917e-06, "loss": 0.7781, "step": 667 }, { "epoch": 0.8936454849498328, "grad_norm": 0.996567399665391, "learning_rate": 9.814203053782201e-06, "loss": 0.7078, "step": 668 }, { "epoch": 0.8949832775919733, "grad_norm": 1.111072602629524, "learning_rate": 9.812940632077629e-06, "loss": 0.6899, "step": 669 }, { "epoch": 0.8963210702341137, "grad_norm": 1.5590009013569257, "learning_rate": 9.811674017786908e-06, "loss": 0.7567, "step": 670 }, { "epoch": 0.8976588628762542, "grad_norm": 1.183742837334987, "learning_rate": 9.810403212013395e-06, "loss": 0.7363, "step": 671 }, { "epoch": 0.8989966555183947, "grad_norm": 1.1008257182340009, "learning_rate": 9.809128215864096e-06, "loss": 0.674, "step": 672 }, { "epoch": 0.9003344481605351, "grad_norm": 1.1218668775227685, "learning_rate": 9.807849030449671e-06, "loss": 0.854, "step": 673 }, { "epoch": 0.9016722408026756, "grad_norm": 1.3136715218622053, "learning_rate": 9.806565656884426e-06, "loss": 0.7128, "step": 674 }, { "epoch": 0.903010033444816, "grad_norm": 1.131855371506293, "learning_rate": 9.805278096286318e-06, "loss": 0.796, "step": 675 }, { "epoch": 0.9043478260869565, "grad_norm": 0.8828129749940323, "learning_rate": 9.803986349776948e-06, "loss": 0.7125, "step": 676 }, { "epoch": 0.905685618729097, "grad_norm": 1.0232090049861313, "learning_rate": 9.802690418481569e-06, "loss": 0.737, "step": 677 }, { "epoch": 0.9070234113712374, "grad_norm": 1.1468118881726923, "learning_rate": 9.80139030352907e-06, "loss": 0.7539, "step": 678 }, { "epoch": 0.9083612040133779, "grad_norm": 1.1807638172183443, "learning_rate": 9.800086006051996e-06, "loss": 0.7833, "step": 679 }, { "epoch": 0.9096989966555183, "grad_norm": 1.1723418929957148, "learning_rate": 9.798777527186527e-06, "loss": 0.8223, "step": 680 }, { "epoch": 0.9110367892976589, "grad_norm": 1.0215241967460398, "learning_rate": 9.797464868072489e-06, "loss": 0.709, "step": 681 }, { "epoch": 0.9123745819397994, "grad_norm": 0.9621148188380786, "learning_rate": 9.796148029853345e-06, "loss": 0.7021, "step": 682 }, { "epoch": 0.9137123745819398, "grad_norm": 1.0941896546186929, "learning_rate": 9.794827013676206e-06, "loss": 0.77, "step": 683 }, { "epoch": 0.9150501672240803, "grad_norm": 0.9631903037817073, "learning_rate": 9.793501820691818e-06, "loss": 0.7061, "step": 684 }, { "epoch": 0.9163879598662207, "grad_norm": 1.0104783106077269, "learning_rate": 9.792172452054565e-06, "loss": 0.7599, "step": 685 }, { "epoch": 0.9177257525083612, "grad_norm": 1.536284399347717, "learning_rate": 9.790838908922468e-06, "loss": 0.9895, "step": 686 }, { "epoch": 0.9190635451505017, "grad_norm": 1.1314011513186724, "learning_rate": 9.789501192457188e-06, "loss": 0.8363, "step": 687 }, { "epoch": 0.9204013377926421, "grad_norm": 1.3390072396528603, "learning_rate": 9.788159303824018e-06, "loss": 0.8584, "step": 688 }, { "epoch": 0.9217391304347826, "grad_norm": 0.8597613583056176, "learning_rate": 9.786813244191885e-06, "loss": 0.7376, "step": 689 }, { "epoch": 0.9230769230769231, "grad_norm": 0.9280585309261845, "learning_rate": 9.785463014733356e-06, "loss": 0.8027, "step": 690 }, { "epoch": 0.9244147157190635, "grad_norm": 0.8972728664978195, "learning_rate": 9.78410861662462e-06, "loss": 0.6055, "step": 691 }, { "epoch": 0.925752508361204, "grad_norm": 1.1312367094234308, "learning_rate": 9.782750051045504e-06, "loss": 0.7252, "step": 692 }, { "epoch": 0.9270903010033444, "grad_norm": 0.8052916068201594, "learning_rate": 9.781387319179465e-06, "loss": 0.6334, "step": 693 }, { "epoch": 0.928428093645485, "grad_norm": 1.4481442224990329, "learning_rate": 9.78002042221359e-06, "loss": 0.7208, "step": 694 }, { "epoch": 0.9297658862876255, "grad_norm": 0.931833411135937, "learning_rate": 9.778649361338588e-06, "loss": 0.72, "step": 695 }, { "epoch": 0.9311036789297659, "grad_norm": 1.3467833338295412, "learning_rate": 9.777274137748802e-06, "loss": 0.8535, "step": 696 }, { "epoch": 0.9324414715719064, "grad_norm": 1.0880840021080387, "learning_rate": 9.775894752642199e-06, "loss": 0.7508, "step": 697 }, { "epoch": 0.9337792642140468, "grad_norm": 0.880480857009773, "learning_rate": 9.774511207220369e-06, "loss": 0.759, "step": 698 }, { "epoch": 0.9351170568561873, "grad_norm": 1.1547648464475904, "learning_rate": 9.773123502688532e-06, "loss": 0.7526, "step": 699 }, { "epoch": 0.9364548494983278, "grad_norm": 1.1011712516734795, "learning_rate": 9.771731640255525e-06, "loss": 0.8049, "step": 700 }, { "epoch": 0.9377926421404682, "grad_norm": 1.4612102860073197, "learning_rate": 9.770335621133808e-06, "loss": 0.8071, "step": 701 }, { "epoch": 0.9391304347826087, "grad_norm": 1.064905088061253, "learning_rate": 9.768935446539464e-06, "loss": 0.7059, "step": 702 }, { "epoch": 0.9404682274247491, "grad_norm": 0.9821559283199923, "learning_rate": 9.767531117692196e-06, "loss": 0.8071, "step": 703 }, { "epoch": 0.9418060200668896, "grad_norm": 1.0198940689787943, "learning_rate": 9.766122635815322e-06, "loss": 0.781, "step": 704 }, { "epoch": 0.9431438127090301, "grad_norm": 1.1376451977690434, "learning_rate": 9.764710002135784e-06, "loss": 0.774, "step": 705 }, { "epoch": 0.9444816053511705, "grad_norm": 0.8518375113632961, "learning_rate": 9.763293217884133e-06, "loss": 0.7728, "step": 706 }, { "epoch": 0.945819397993311, "grad_norm": 0.9523003446252343, "learning_rate": 9.761872284294542e-06, "loss": 0.5846, "step": 707 }, { "epoch": 0.9471571906354515, "grad_norm": 1.091451126534351, "learning_rate": 9.760447202604796e-06, "loss": 0.6543, "step": 708 }, { "epoch": 0.948494983277592, "grad_norm": 0.9334585239236298, "learning_rate": 9.759017974056292e-06, "loss": 0.752, "step": 709 }, { "epoch": 0.9498327759197325, "grad_norm": 1.2124222004649214, "learning_rate": 9.757584599894045e-06, "loss": 0.794, "step": 710 }, { "epoch": 0.9511705685618729, "grad_norm": 1.0662100489453816, "learning_rate": 9.756147081366673e-06, "loss": 0.6955, "step": 711 }, { "epoch": 0.9525083612040134, "grad_norm": 0.9848953811376512, "learning_rate": 9.75470541972641e-06, "loss": 0.7726, "step": 712 }, { "epoch": 0.9538461538461539, "grad_norm": 1.3755053429267878, "learning_rate": 9.753259616229096e-06, "loss": 0.9022, "step": 713 }, { "epoch": 0.9551839464882943, "grad_norm": 0.9671511712176996, "learning_rate": 9.751809672134184e-06, "loss": 0.6666, "step": 714 }, { "epoch": 0.9565217391304348, "grad_norm": 0.9175827464669697, "learning_rate": 9.750355588704728e-06, "loss": 0.7347, "step": 715 }, { "epoch": 0.9578595317725752, "grad_norm": 0.9972605175511805, "learning_rate": 9.748897367207391e-06, "loss": 0.7011, "step": 716 }, { "epoch": 0.9591973244147157, "grad_norm": 0.8927508493783339, "learning_rate": 9.747435008912438e-06, "loss": 0.6677, "step": 717 }, { "epoch": 0.9605351170568562, "grad_norm": 0.9671916780277281, "learning_rate": 9.745968515093741e-06, "loss": 0.688, "step": 718 }, { "epoch": 0.9618729096989966, "grad_norm": 0.9300708583877648, "learning_rate": 9.744497887028774e-06, "loss": 0.6573, "step": 719 }, { "epoch": 0.9632107023411371, "grad_norm": 1.0476310647980138, "learning_rate": 9.74302312599861e-06, "loss": 0.7366, "step": 720 }, { "epoch": 0.9645484949832775, "grad_norm": 0.913408371050379, "learning_rate": 9.741544233287924e-06, "loss": 0.5969, "step": 721 }, { "epoch": 0.9658862876254181, "grad_norm": 1.338128879615913, "learning_rate": 9.74006121018499e-06, "loss": 0.8325, "step": 722 }, { "epoch": 0.9672240802675586, "grad_norm": 0.9539250641834541, "learning_rate": 9.73857405798168e-06, "loss": 0.6964, "step": 723 }, { "epoch": 0.968561872909699, "grad_norm": 1.4120635723138721, "learning_rate": 9.73708277797346e-06, "loss": 0.8528, "step": 724 }, { "epoch": 0.9698996655518395, "grad_norm": 0.8517078374367245, "learning_rate": 9.735587371459399e-06, "loss": 0.6629, "step": 725 }, { "epoch": 0.9712374581939799, "grad_norm": 0.9192849673951148, "learning_rate": 9.734087839742152e-06, "loss": 0.6805, "step": 726 }, { "epoch": 0.9725752508361204, "grad_norm": 0.9027761809188563, "learning_rate": 9.732584184127973e-06, "loss": 0.6689, "step": 727 }, { "epoch": 0.9739130434782609, "grad_norm": 1.0762998611250811, "learning_rate": 9.731076405926706e-06, "loss": 0.8407, "step": 728 }, { "epoch": 0.9752508361204013, "grad_norm": 0.9979729583789638, "learning_rate": 9.729564506451791e-06, "loss": 0.6128, "step": 729 }, { "epoch": 0.9765886287625418, "grad_norm": 0.9848250824309499, "learning_rate": 9.72804848702025e-06, "loss": 0.761, "step": 730 }, { "epoch": 0.9779264214046822, "grad_norm": 1.5105414041100287, "learning_rate": 9.7265283489527e-06, "loss": 0.8292, "step": 731 }, { "epoch": 0.9792642140468227, "grad_norm": 1.0737388782816408, "learning_rate": 9.725004093573343e-06, "loss": 0.6822, "step": 732 }, { "epoch": 0.9806020066889632, "grad_norm": 1.315970864404672, "learning_rate": 9.72347572220997e-06, "loss": 0.9287, "step": 733 }, { "epoch": 0.9819397993311036, "grad_norm": 1.3904394798388233, "learning_rate": 9.721943236193952e-06, "loss": 0.8411, "step": 734 }, { "epoch": 0.9832775919732442, "grad_norm": 0.9879174590048183, "learning_rate": 9.720406636860252e-06, "loss": 0.7454, "step": 735 }, { "epoch": 0.9846153846153847, "grad_norm": 1.1700071283932125, "learning_rate": 9.718865925547411e-06, "loss": 0.7798, "step": 736 }, { "epoch": 0.9859531772575251, "grad_norm": 0.9256549902057899, "learning_rate": 9.717321103597556e-06, "loss": 0.7281, "step": 737 }, { "epoch": 0.9872909698996656, "grad_norm": 0.9466777155582704, "learning_rate": 9.715772172356388e-06, "loss": 0.7262, "step": 738 }, { "epoch": 0.988628762541806, "grad_norm": 1.4158289021905108, "learning_rate": 9.714219133173194e-06, "loss": 0.8899, "step": 739 }, { "epoch": 0.9899665551839465, "grad_norm": 0.9806513324837907, "learning_rate": 9.712661987400838e-06, "loss": 0.6163, "step": 740 }, { "epoch": 0.991304347826087, "grad_norm": 0.9985590919477962, "learning_rate": 9.711100736395758e-06, "loss": 0.6408, "step": 741 }, { "epoch": 0.9926421404682274, "grad_norm": 0.9078423759149866, "learning_rate": 9.709535381517973e-06, "loss": 0.6741, "step": 742 }, { "epoch": 0.9939799331103679, "grad_norm": 1.1575295206950553, "learning_rate": 9.707965924131074e-06, "loss": 0.7332, "step": 743 }, { "epoch": 0.9953177257525083, "grad_norm": 1.259080517355542, "learning_rate": 9.706392365602224e-06, "loss": 0.7693, "step": 744 }, { "epoch": 0.9966555183946488, "grad_norm": 0.7981392962501053, "learning_rate": 9.704814707302166e-06, "loss": 0.7292, "step": 745 }, { "epoch": 0.9979933110367893, "grad_norm": 0.934609960418448, "learning_rate": 9.703232950605203e-06, "loss": 0.6757, "step": 746 }, { "epoch": 0.9993311036789297, "grad_norm": 1.2069376396436848, "learning_rate": 9.70164709688922e-06, "loss": 0.7748, "step": 747 }, { "epoch": 1.0, "grad_norm": 1.2069376396436848, "learning_rate": 9.70005714753566e-06, "loss": 0.8285, "step": 748 }, { "epoch": 1.0013377926421405, "grad_norm": 2.1598628136573286, "learning_rate": 9.698463103929542e-06, "loss": 0.6026, "step": 749 }, { "epoch": 1.002675585284281, "grad_norm": 1.147316518713372, "learning_rate": 9.69686496745945e-06, "loss": 0.5862, "step": 750 }, { "epoch": 1.0040133779264213, "grad_norm": 1.1179243856562098, "learning_rate": 9.695262739517528e-06, "loss": 0.6592, "step": 751 }, { "epoch": 1.0053511705685618, "grad_norm": 1.1183941638732344, "learning_rate": 9.69365642149949e-06, "loss": 0.7373, "step": 752 }, { "epoch": 1.0066889632107023, "grad_norm": 1.1555195571696777, "learning_rate": 9.69204601480461e-06, "loss": 0.6588, "step": 753 }, { "epoch": 1.0080267558528428, "grad_norm": 1.257180581788991, "learning_rate": 9.690431520835725e-06, "loss": 0.7405, "step": 754 }, { "epoch": 1.0093645484949834, "grad_norm": 0.9692684646087125, "learning_rate": 9.688812940999232e-06, "loss": 0.6554, "step": 755 }, { "epoch": 1.0107023411371236, "grad_norm": 1.1247774435081714, "learning_rate": 9.687190276705088e-06, "loss": 0.6161, "step": 756 }, { "epoch": 1.0120401337792642, "grad_norm": 0.8344439397017793, "learning_rate": 9.685563529366806e-06, "loss": 0.5723, "step": 757 }, { "epoch": 1.0133779264214047, "grad_norm": 1.0514210575415828, "learning_rate": 9.683932700401457e-06, "loss": 0.6563, "step": 758 }, { "epoch": 1.0147157190635452, "grad_norm": 1.1470563205035973, "learning_rate": 9.682297791229668e-06, "loss": 0.667, "step": 759 }, { "epoch": 1.0160535117056857, "grad_norm": 1.0600233017059435, "learning_rate": 9.68065880327562e-06, "loss": 0.7068, "step": 760 }, { "epoch": 1.017391304347826, "grad_norm": 1.6624577530135407, "learning_rate": 9.679015737967046e-06, "loss": 0.6746, "step": 761 }, { "epoch": 1.0187290969899665, "grad_norm": 1.4868949251104884, "learning_rate": 9.677368596735232e-06, "loss": 0.6491, "step": 762 }, { "epoch": 1.020066889632107, "grad_norm": 1.1915293240291902, "learning_rate": 9.675717381015014e-06, "loss": 0.6532, "step": 763 }, { "epoch": 1.0214046822742475, "grad_norm": 0.8858773702447198, "learning_rate": 9.674062092244779e-06, "loss": 0.6415, "step": 764 }, { "epoch": 1.022742474916388, "grad_norm": 0.9746404099126087, "learning_rate": 9.67240273186646e-06, "loss": 0.5688, "step": 765 }, { "epoch": 1.0240802675585283, "grad_norm": 1.0335676929553566, "learning_rate": 9.670739301325534e-06, "loss": 0.6816, "step": 766 }, { "epoch": 1.0254180602006688, "grad_norm": 1.2298654491105236, "learning_rate": 9.669071802071032e-06, "loss": 0.647, "step": 767 }, { "epoch": 1.0267558528428093, "grad_norm": 0.9813864095796442, "learning_rate": 9.66740023555552e-06, "loss": 0.6877, "step": 768 }, { "epoch": 1.0280936454849499, "grad_norm": 1.1920723609544563, "learning_rate": 9.665724603235115e-06, "loss": 0.6177, "step": 769 }, { "epoch": 1.0294314381270904, "grad_norm": 1.1738122110191824, "learning_rate": 9.66404490656947e-06, "loss": 0.681, "step": 770 }, { "epoch": 1.0307692307692307, "grad_norm": 0.9665567027785782, "learning_rate": 9.66236114702178e-06, "loss": 0.6422, "step": 771 }, { "epoch": 1.0321070234113712, "grad_norm": 1.2003089404561278, "learning_rate": 9.66067332605878e-06, "loss": 0.6709, "step": 772 }, { "epoch": 1.0334448160535117, "grad_norm": 0.9158230816830558, "learning_rate": 9.658981445150744e-06, "loss": 0.6629, "step": 773 }, { "epoch": 1.0347826086956522, "grad_norm": 1.3006927998232192, "learning_rate": 9.65728550577148e-06, "loss": 0.7265, "step": 774 }, { "epoch": 1.0361204013377927, "grad_norm": 0.883194827666467, "learning_rate": 9.655585509398334e-06, "loss": 0.6911, "step": 775 }, { "epoch": 1.037458193979933, "grad_norm": 1.1269133043620845, "learning_rate": 9.65388145751218e-06, "loss": 0.7062, "step": 776 }, { "epoch": 1.0387959866220735, "grad_norm": 1.1636362926189352, "learning_rate": 9.652173351597435e-06, "loss": 0.6496, "step": 777 }, { "epoch": 1.040133779264214, "grad_norm": 1.1473916553509989, "learning_rate": 9.650461193142042e-06, "loss": 0.8089, "step": 778 }, { "epoch": 1.0414715719063545, "grad_norm": 1.0172657522656683, "learning_rate": 9.648744983637471e-06, "loss": 0.6373, "step": 779 }, { "epoch": 1.042809364548495, "grad_norm": 1.4315816928377205, "learning_rate": 9.647024724578724e-06, "loss": 0.7188, "step": 780 }, { "epoch": 1.0441471571906356, "grad_norm": 1.2979727584967542, "learning_rate": 9.645300417464332e-06, "loss": 0.6679, "step": 781 }, { "epoch": 1.0454849498327758, "grad_norm": 1.2391869393971362, "learning_rate": 9.643572063796352e-06, "loss": 0.6485, "step": 782 }, { "epoch": 1.0468227424749164, "grad_norm": 1.1139387027000052, "learning_rate": 9.641839665080363e-06, "loss": 0.6359, "step": 783 }, { "epoch": 1.0481605351170569, "grad_norm": 1.0127266086969418, "learning_rate": 9.640103222825472e-06, "loss": 0.5831, "step": 784 }, { "epoch": 1.0494983277591974, "grad_norm": 1.0229829783037296, "learning_rate": 9.638362738544302e-06, "loss": 0.7971, "step": 785 }, { "epoch": 1.050836120401338, "grad_norm": 1.4560668513480817, "learning_rate": 9.636618213753006e-06, "loss": 0.7035, "step": 786 }, { "epoch": 1.0521739130434782, "grad_norm": 0.6940195707539007, "learning_rate": 9.634869649971247e-06, "loss": 0.7272, "step": 787 }, { "epoch": 1.0535117056856187, "grad_norm": 1.3847963130005694, "learning_rate": 9.633117048722213e-06, "loss": 0.6424, "step": 788 }, { "epoch": 1.0548494983277592, "grad_norm": 0.9580122947131917, "learning_rate": 9.631360411532609e-06, "loss": 0.7028, "step": 789 }, { "epoch": 1.0561872909698997, "grad_norm": 1.0837975363781809, "learning_rate": 9.629599739932652e-06, "loss": 0.6935, "step": 790 }, { "epoch": 1.0575250836120402, "grad_norm": 1.0883144593226444, "learning_rate": 9.627835035456074e-06, "loss": 0.7132, "step": 791 }, { "epoch": 1.0588628762541805, "grad_norm": 1.199957963380447, "learning_rate": 9.626066299640124e-06, "loss": 0.6577, "step": 792 }, { "epoch": 1.060200668896321, "grad_norm": 1.3031361081925925, "learning_rate": 9.62429353402556e-06, "loss": 0.7392, "step": 793 }, { "epoch": 1.0615384615384615, "grad_norm": 1.162966225753283, "learning_rate": 9.62251674015665e-06, "loss": 0.609, "step": 794 }, { "epoch": 1.062876254180602, "grad_norm": 1.0952659751227272, "learning_rate": 9.620735919581168e-06, "loss": 0.722, "step": 795 }, { "epoch": 1.0642140468227426, "grad_norm": 0.9143090325916488, "learning_rate": 9.618951073850404e-06, "loss": 0.6832, "step": 796 }, { "epoch": 1.0655518394648829, "grad_norm": 1.0060214025914485, "learning_rate": 9.617162204519147e-06, "loss": 0.6635, "step": 797 }, { "epoch": 1.0668896321070234, "grad_norm": 1.1507790314188533, "learning_rate": 9.615369313145695e-06, "loss": 0.689, "step": 798 }, { "epoch": 1.0682274247491639, "grad_norm": 1.1184396565188204, "learning_rate": 9.61357240129185e-06, "loss": 0.7136, "step": 799 }, { "epoch": 1.0695652173913044, "grad_norm": 1.4326211626082266, "learning_rate": 9.611771470522908e-06, "loss": 0.8081, "step": 800 }, { "epoch": 1.070903010033445, "grad_norm": 0.953037738423754, "learning_rate": 9.609966522407678e-06, "loss": 0.5463, "step": 801 }, { "epoch": 1.0722408026755852, "grad_norm": 0.8524355439347789, "learning_rate": 9.60815755851846e-06, "loss": 0.6399, "step": 802 }, { "epoch": 1.0735785953177257, "grad_norm": 1.1677112980608688, "learning_rate": 9.60634458043106e-06, "loss": 0.7265, "step": 803 }, { "epoch": 1.0749163879598662, "grad_norm": 1.311266220287425, "learning_rate": 9.60452758972477e-06, "loss": 0.6442, "step": 804 }, { "epoch": 1.0762541806020067, "grad_norm": 1.13046118786126, "learning_rate": 9.602706587982384e-06, "loss": 0.6144, "step": 805 }, { "epoch": 1.0775919732441472, "grad_norm": 1.0379030690697797, "learning_rate": 9.600881576790194e-06, "loss": 0.6742, "step": 806 }, { "epoch": 1.0789297658862875, "grad_norm": 1.1180032674994291, "learning_rate": 9.599052557737973e-06, "loss": 0.594, "step": 807 }, { "epoch": 1.080267558528428, "grad_norm": 0.9404946287959991, "learning_rate": 9.597219532418997e-06, "loss": 0.6634, "step": 808 }, { "epoch": 1.0816053511705686, "grad_norm": 1.3740183316574865, "learning_rate": 9.59538250243003e-06, "loss": 0.7124, "step": 809 }, { "epoch": 1.082943143812709, "grad_norm": 1.4123292020776053, "learning_rate": 9.593541469371313e-06, "loss": 0.7209, "step": 810 }, { "epoch": 1.0842809364548496, "grad_norm": 1.1476915750687426, "learning_rate": 9.591696434846589e-06, "loss": 0.7904, "step": 811 }, { "epoch": 1.0856187290969899, "grad_norm": 1.146715618039771, "learning_rate": 9.589847400463079e-06, "loss": 0.7282, "step": 812 }, { "epoch": 1.0869565217391304, "grad_norm": 1.1116507623659515, "learning_rate": 9.58799436783149e-06, "loss": 0.7019, "step": 813 }, { "epoch": 1.0882943143812709, "grad_norm": 0.9602825938831175, "learning_rate": 9.586137338566012e-06, "loss": 0.7321, "step": 814 }, { "epoch": 1.0896321070234114, "grad_norm": 1.0120791550584878, "learning_rate": 9.584276314284316e-06, "loss": 0.6705, "step": 815 }, { "epoch": 1.090969899665552, "grad_norm": 1.4068616259395446, "learning_rate": 9.58241129660755e-06, "loss": 0.7869, "step": 816 }, { "epoch": 1.0923076923076924, "grad_norm": 1.050471282442121, "learning_rate": 9.580542287160348e-06, "loss": 0.5831, "step": 817 }, { "epoch": 1.0936454849498327, "grad_norm": 1.1711732600806042, "learning_rate": 9.578669287570817e-06, "loss": 0.6434, "step": 818 }, { "epoch": 1.0949832775919732, "grad_norm": 1.0230290460933156, "learning_rate": 9.576792299470537e-06, "loss": 0.6257, "step": 819 }, { "epoch": 1.0963210702341137, "grad_norm": 1.0114648091033522, "learning_rate": 9.574911324494569e-06, "loss": 0.6862, "step": 820 }, { "epoch": 1.0976588628762542, "grad_norm": 0.9513163696076965, "learning_rate": 9.573026364281441e-06, "loss": 0.7158, "step": 821 }, { "epoch": 1.0989966555183948, "grad_norm": 1.1937600222537785, "learning_rate": 9.571137420473154e-06, "loss": 0.631, "step": 822 }, { "epoch": 1.100334448160535, "grad_norm": 1.2419055934120167, "learning_rate": 9.569244494715183e-06, "loss": 0.6569, "step": 823 }, { "epoch": 1.1016722408026756, "grad_norm": 1.185165920467502, "learning_rate": 9.567347588656468e-06, "loss": 0.6834, "step": 824 }, { "epoch": 1.103010033444816, "grad_norm": 1.2296666013685438, "learning_rate": 9.565446703949417e-06, "loss": 0.6871, "step": 825 }, { "epoch": 1.1043478260869566, "grad_norm": 1.1922229542697311, "learning_rate": 9.563541842249903e-06, "loss": 0.799, "step": 826 }, { "epoch": 1.105685618729097, "grad_norm": 1.1899454347451357, "learning_rate": 9.561633005217264e-06, "loss": 0.769, "step": 827 }, { "epoch": 1.1070234113712374, "grad_norm": 1.0859268111905063, "learning_rate": 9.559720194514303e-06, "loss": 0.6567, "step": 828 }, { "epoch": 1.108361204013378, "grad_norm": 1.3028073301176175, "learning_rate": 9.557803411807283e-06, "loss": 0.7145, "step": 829 }, { "epoch": 1.1096989966555184, "grad_norm": 1.1467862588160271, "learning_rate": 9.555882658765924e-06, "loss": 0.7446, "step": 830 }, { "epoch": 1.111036789297659, "grad_norm": 0.9445293638725968, "learning_rate": 9.55395793706341e-06, "loss": 0.7536, "step": 831 }, { "epoch": 1.1123745819397994, "grad_norm": 1.1714480725823733, "learning_rate": 9.552029248376378e-06, "loss": 0.669, "step": 832 }, { "epoch": 1.1137123745819397, "grad_norm": 1.414644098010865, "learning_rate": 9.550096594384923e-06, "loss": 0.755, "step": 833 }, { "epoch": 1.1150501672240802, "grad_norm": 1.3099450277416262, "learning_rate": 9.548159976772593e-06, "loss": 0.7326, "step": 834 }, { "epoch": 1.1163879598662207, "grad_norm": 1.4263397733600391, "learning_rate": 9.54621939722639e-06, "loss": 0.6938, "step": 835 }, { "epoch": 1.1177257525083613, "grad_norm": 1.0583819263380843, "learning_rate": 9.544274857436763e-06, "loss": 0.7181, "step": 836 }, { "epoch": 1.1190635451505018, "grad_norm": 1.0451257241227974, "learning_rate": 9.542326359097619e-06, "loss": 0.6505, "step": 837 }, { "epoch": 1.120401337792642, "grad_norm": 1.2195277136069689, "learning_rate": 9.540373903906306e-06, "loss": 0.694, "step": 838 }, { "epoch": 1.1217391304347826, "grad_norm": 1.1272928909930273, "learning_rate": 9.538417493563621e-06, "loss": 0.6434, "step": 839 }, { "epoch": 1.123076923076923, "grad_norm": 1.1472935731323006, "learning_rate": 9.536457129773808e-06, "loss": 0.6672, "step": 840 }, { "epoch": 1.1244147157190636, "grad_norm": 1.101199471220356, "learning_rate": 9.534492814244552e-06, "loss": 0.6171, "step": 841 }, { "epoch": 1.125752508361204, "grad_norm": 0.9645268653308842, "learning_rate": 9.532524548686984e-06, "loss": 0.6378, "step": 842 }, { "epoch": 1.1270903010033444, "grad_norm": 1.0323279525594302, "learning_rate": 9.530552334815672e-06, "loss": 0.859, "step": 843 }, { "epoch": 1.128428093645485, "grad_norm": 1.1404099440120947, "learning_rate": 9.528576174348625e-06, "loss": 0.7069, "step": 844 }, { "epoch": 1.1297658862876254, "grad_norm": 0.9892298713624359, "learning_rate": 9.526596069007292e-06, "loss": 0.6314, "step": 845 }, { "epoch": 1.131103678929766, "grad_norm": 0.914444232135347, "learning_rate": 9.524612020516556e-06, "loss": 0.6416, "step": 846 }, { "epoch": 1.1324414715719064, "grad_norm": 0.9731009547773307, "learning_rate": 9.522624030604735e-06, "loss": 0.6497, "step": 847 }, { "epoch": 1.1337792642140467, "grad_norm": 1.3752579857566276, "learning_rate": 9.520632101003579e-06, "loss": 0.7741, "step": 848 }, { "epoch": 1.1351170568561872, "grad_norm": 0.723570443584989, "learning_rate": 9.518636233448276e-06, "loss": 0.6882, "step": 849 }, { "epoch": 1.1364548494983278, "grad_norm": 1.1738007901668799, "learning_rate": 9.516636429677437e-06, "loss": 0.7561, "step": 850 }, { "epoch": 1.1377926421404683, "grad_norm": 1.1974221531156632, "learning_rate": 9.514632691433108e-06, "loss": 0.6424, "step": 851 }, { "epoch": 1.1391304347826088, "grad_norm": 0.9146498345066884, "learning_rate": 9.512625020460754e-06, "loss": 0.7586, "step": 852 }, { "epoch": 1.140468227424749, "grad_norm": 0.9826837380763861, "learning_rate": 9.510613418509276e-06, "loss": 0.6675, "step": 853 }, { "epoch": 1.1418060200668896, "grad_norm": 1.553966386748512, "learning_rate": 9.508597887330993e-06, "loss": 0.692, "step": 854 }, { "epoch": 1.14314381270903, "grad_norm": 0.9828748878712401, "learning_rate": 9.506578428681648e-06, "loss": 0.6333, "step": 855 }, { "epoch": 1.1444816053511706, "grad_norm": 1.2935417431285716, "learning_rate": 9.504555044320407e-06, "loss": 0.7219, "step": 856 }, { "epoch": 1.1458193979933111, "grad_norm": 1.0060977864391403, "learning_rate": 9.50252773600985e-06, "loss": 0.6636, "step": 857 }, { "epoch": 1.1471571906354514, "grad_norm": 1.1012180421437594, "learning_rate": 9.500496505515986e-06, "loss": 0.7066, "step": 858 }, { "epoch": 1.148494983277592, "grad_norm": 0.9785601375791119, "learning_rate": 9.498461354608228e-06, "loss": 0.6824, "step": 859 }, { "epoch": 1.1498327759197324, "grad_norm": 1.406316839002803, "learning_rate": 9.496422285059412e-06, "loss": 0.6806, "step": 860 }, { "epoch": 1.151170568561873, "grad_norm": 1.3250987301541661, "learning_rate": 9.494379298645788e-06, "loss": 0.5632, "step": 861 }, { "epoch": 1.1525083612040135, "grad_norm": 1.1785150023272017, "learning_rate": 9.492332397147013e-06, "loss": 0.7106, "step": 862 }, { "epoch": 1.1538461538461537, "grad_norm": 1.0453683712709296, "learning_rate": 9.490281582346159e-06, "loss": 0.6698, "step": 863 }, { "epoch": 1.1551839464882943, "grad_norm": 1.2495634289584954, "learning_rate": 9.488226856029704e-06, "loss": 0.7676, "step": 864 }, { "epoch": 1.1565217391304348, "grad_norm": 1.4276311827788013, "learning_rate": 9.486168219987534e-06, "loss": 0.5103, "step": 865 }, { "epoch": 1.1578595317725753, "grad_norm": 1.0700366960631773, "learning_rate": 9.484105676012943e-06, "loss": 0.6204, "step": 866 }, { "epoch": 1.1591973244147158, "grad_norm": 1.2913423642360096, "learning_rate": 9.482039225902623e-06, "loss": 0.8039, "step": 867 }, { "epoch": 1.160535117056856, "grad_norm": 1.1814413482392654, "learning_rate": 9.47996887145668e-06, "loss": 0.7202, "step": 868 }, { "epoch": 1.1618729096989966, "grad_norm": 0.848553322828833, "learning_rate": 9.47789461447861e-06, "loss": 0.6673, "step": 869 }, { "epoch": 1.163210702341137, "grad_norm": 1.1777839563784138, "learning_rate": 9.475816456775313e-06, "loss": 0.7221, "step": 870 }, { "epoch": 1.1645484949832776, "grad_norm": 0.975829062427739, "learning_rate": 9.473734400157086e-06, "loss": 0.8435, "step": 871 }, { "epoch": 1.1658862876254181, "grad_norm": 1.3312419768797978, "learning_rate": 9.471648446437625e-06, "loss": 0.6177, "step": 872 }, { "epoch": 1.1672240802675584, "grad_norm": 0.9008121701335665, "learning_rate": 9.469558597434018e-06, "loss": 0.6357, "step": 873 }, { "epoch": 1.168561872909699, "grad_norm": 1.0940651346032848, "learning_rate": 9.467464854966746e-06, "loss": 0.6401, "step": 874 }, { "epoch": 1.1698996655518394, "grad_norm": 1.146341305285185, "learning_rate": 9.465367220859684e-06, "loss": 0.6703, "step": 875 }, { "epoch": 1.17123745819398, "grad_norm": 1.1746091982105682, "learning_rate": 9.463265696940095e-06, "loss": 0.7164, "step": 876 }, { "epoch": 1.1725752508361205, "grad_norm": 1.0129176046301196, "learning_rate": 9.461160285038632e-06, "loss": 0.713, "step": 877 }, { "epoch": 1.1739130434782608, "grad_norm": 1.0617115767821774, "learning_rate": 9.459050986989333e-06, "loss": 0.7099, "step": 878 }, { "epoch": 1.1752508361204013, "grad_norm": 0.9808329443709954, "learning_rate": 9.456937804629623e-06, "loss": 0.7747, "step": 879 }, { "epoch": 1.1765886287625418, "grad_norm": 1.3068793990710392, "learning_rate": 9.45482073980031e-06, "loss": 0.7452, "step": 880 }, { "epoch": 1.1779264214046823, "grad_norm": 1.2948899850601499, "learning_rate": 9.452699794345583e-06, "loss": 0.6927, "step": 881 }, { "epoch": 1.1792642140468228, "grad_norm": 1.2955392612664316, "learning_rate": 9.45057497011301e-06, "loss": 0.6999, "step": 882 }, { "epoch": 1.180602006688963, "grad_norm": 0.9137382003719366, "learning_rate": 9.448446268953549e-06, "loss": 0.688, "step": 883 }, { "epoch": 1.1819397993311036, "grad_norm": 0.8125969636639577, "learning_rate": 9.446313692721515e-06, "loss": 0.6465, "step": 884 }, { "epoch": 1.1832775919732441, "grad_norm": 1.2063665551065692, "learning_rate": 9.444177243274619e-06, "loss": 0.6432, "step": 885 }, { "epoch": 1.1846153846153846, "grad_norm": 1.3252777983506112, "learning_rate": 9.442036922473932e-06, "loss": 0.7232, "step": 886 }, { "epoch": 1.1859531772575251, "grad_norm": 0.943624425241358, "learning_rate": 9.439892732183903e-06, "loss": 0.7308, "step": 887 }, { "epoch": 1.1872909698996654, "grad_norm": 1.1023650269029386, "learning_rate": 9.437744674272353e-06, "loss": 0.6301, "step": 888 }, { "epoch": 1.188628762541806, "grad_norm": 1.0819221674336803, "learning_rate": 9.435592750610469e-06, "loss": 0.678, "step": 889 }, { "epoch": 1.1899665551839465, "grad_norm": 1.4841016863878036, "learning_rate": 9.433436963072807e-06, "loss": 0.68, "step": 890 }, { "epoch": 1.191304347826087, "grad_norm": 1.2511049132490948, "learning_rate": 9.43127731353729e-06, "loss": 0.5213, "step": 891 }, { "epoch": 1.1926421404682275, "grad_norm": 1.1204647518596393, "learning_rate": 9.429113803885199e-06, "loss": 0.6171, "step": 892 }, { "epoch": 1.193979933110368, "grad_norm": 0.9396848993317021, "learning_rate": 9.426946436001189e-06, "loss": 0.6822, "step": 893 }, { "epoch": 1.1953177257525083, "grad_norm": 1.1325663409278672, "learning_rate": 9.424775211773263e-06, "loss": 0.6857, "step": 894 }, { "epoch": 1.1966555183946488, "grad_norm": 1.0305749418260157, "learning_rate": 9.422600133092795e-06, "loss": 0.7688, "step": 895 }, { "epoch": 1.1979933110367893, "grad_norm": 1.1429398798525654, "learning_rate": 9.420421201854507e-06, "loss": 0.595, "step": 896 }, { "epoch": 1.1993311036789298, "grad_norm": 0.9610502009123489, "learning_rate": 9.418238419956484e-06, "loss": 0.7123, "step": 897 }, { "epoch": 1.2006688963210703, "grad_norm": 1.1413481829131749, "learning_rate": 9.41605178930016e-06, "loss": 0.7138, "step": 898 }, { "epoch": 1.2020066889632106, "grad_norm": 1.1302316412354012, "learning_rate": 9.413861311790327e-06, "loss": 0.7526, "step": 899 }, { "epoch": 1.2033444816053511, "grad_norm": 1.0338478301633198, "learning_rate": 9.411666989335123e-06, "loss": 0.6267, "step": 900 }, { "epoch": 1.2046822742474916, "grad_norm": 1.0258042533448166, "learning_rate": 9.409468823846038e-06, "loss": 0.5899, "step": 901 }, { "epoch": 1.2060200668896321, "grad_norm": 1.0836518431496711, "learning_rate": 9.40726681723791e-06, "loss": 0.6709, "step": 902 }, { "epoch": 1.2073578595317727, "grad_norm": 0.9146123832825053, "learning_rate": 9.405060971428924e-06, "loss": 0.6423, "step": 903 }, { "epoch": 1.208695652173913, "grad_norm": 1.3068488126768536, "learning_rate": 9.402851288340604e-06, "loss": 0.7362, "step": 904 }, { "epoch": 1.2100334448160535, "grad_norm": 1.2993006156353712, "learning_rate": 9.400637769897823e-06, "loss": 0.7824, "step": 905 }, { "epoch": 1.211371237458194, "grad_norm": 0.9658290352457473, "learning_rate": 9.398420418028789e-06, "loss": 0.5823, "step": 906 }, { "epoch": 1.2127090301003345, "grad_norm": 1.1015370898720191, "learning_rate": 9.396199234665056e-06, "loss": 0.8457, "step": 907 }, { "epoch": 1.214046822742475, "grad_norm": 1.2608149500981585, "learning_rate": 9.393974221741514e-06, "loss": 0.6071, "step": 908 }, { "epoch": 1.2153846153846155, "grad_norm": 0.9716777386506078, "learning_rate": 9.391745381196382e-06, "loss": 0.6843, "step": 909 }, { "epoch": 1.2167224080267558, "grad_norm": 1.0004454742472435, "learning_rate": 9.38951271497122e-06, "loss": 0.614, "step": 910 }, { "epoch": 1.2180602006688963, "grad_norm": 1.1887210528770231, "learning_rate": 9.387276225010925e-06, "loss": 0.824, "step": 911 }, { "epoch": 1.2193979933110368, "grad_norm": 0.8467343984900518, "learning_rate": 9.38503591326371e-06, "loss": 0.7734, "step": 912 }, { "epoch": 1.2207357859531773, "grad_norm": 1.1831506152013358, "learning_rate": 9.382791781681133e-06, "loss": 0.614, "step": 913 }, { "epoch": 1.2220735785953178, "grad_norm": 1.062647729108463, "learning_rate": 9.380543832218069e-06, "loss": 0.6515, "step": 914 }, { "epoch": 1.2234113712374581, "grad_norm": 1.0788702929015248, "learning_rate": 9.378292066832723e-06, "loss": 0.7316, "step": 915 }, { "epoch": 1.2247491638795986, "grad_norm": 0.9559421320773824, "learning_rate": 9.376036487486626e-06, "loss": 0.7081, "step": 916 }, { "epoch": 1.2260869565217392, "grad_norm": 1.1459077982962216, "learning_rate": 9.373777096144625e-06, "loss": 0.6107, "step": 917 }, { "epoch": 1.2274247491638797, "grad_norm": 1.069170657480157, "learning_rate": 9.371513894774894e-06, "loss": 0.5637, "step": 918 }, { "epoch": 1.2287625418060202, "grad_norm": 0.7843671035604909, "learning_rate": 9.369246885348926e-06, "loss": 0.6647, "step": 919 }, { "epoch": 1.2301003344481605, "grad_norm": 1.0368216803373222, "learning_rate": 9.366976069841524e-06, "loss": 0.718, "step": 920 }, { "epoch": 1.231438127090301, "grad_norm": 1.1693083153271895, "learning_rate": 9.364701450230813e-06, "loss": 0.6749, "step": 921 }, { "epoch": 1.2327759197324415, "grad_norm": 0.8735907579784534, "learning_rate": 9.362423028498229e-06, "loss": 0.5505, "step": 922 }, { "epoch": 1.234113712374582, "grad_norm": 0.9348327527850412, "learning_rate": 9.360140806628523e-06, "loss": 0.7235, "step": 923 }, { "epoch": 1.2354515050167225, "grad_norm": 1.308268730749401, "learning_rate": 9.357854786609754e-06, "loss": 0.7463, "step": 924 }, { "epoch": 1.2367892976588628, "grad_norm": 1.2261256990498843, "learning_rate": 9.355564970433288e-06, "loss": 0.6832, "step": 925 }, { "epoch": 1.2381270903010033, "grad_norm": 0.9111700982115823, "learning_rate": 9.353271360093802e-06, "loss": 0.5991, "step": 926 }, { "epoch": 1.2394648829431438, "grad_norm": 1.1505235531588005, "learning_rate": 9.350973957589278e-06, "loss": 0.6524, "step": 927 }, { "epoch": 1.2408026755852843, "grad_norm": 1.1318001894115224, "learning_rate": 9.348672764920995e-06, "loss": 0.6547, "step": 928 }, { "epoch": 1.2421404682274249, "grad_norm": 0.8424382065238454, "learning_rate": 9.346367784093538e-06, "loss": 0.5844, "step": 929 }, { "epoch": 1.2434782608695651, "grad_norm": 0.9458216325666865, "learning_rate": 9.344059017114796e-06, "loss": 0.6184, "step": 930 }, { "epoch": 1.2448160535117057, "grad_norm": 1.2650630990475367, "learning_rate": 9.341746465995947e-06, "loss": 0.771, "step": 931 }, { "epoch": 1.2461538461538462, "grad_norm": 1.4126509984796878, "learning_rate": 9.339430132751474e-06, "loss": 0.6222, "step": 932 }, { "epoch": 1.2474916387959867, "grad_norm": 1.0957874376351802, "learning_rate": 9.33711001939915e-06, "loss": 0.6954, "step": 933 }, { "epoch": 1.2488294314381272, "grad_norm": 1.0158884544157123, "learning_rate": 9.33478612796004e-06, "loss": 0.722, "step": 934 }, { "epoch": 1.2501672240802675, "grad_norm": 1.126185525039112, "learning_rate": 9.332458460458507e-06, "loss": 0.6991, "step": 935 }, { "epoch": 1.251505016722408, "grad_norm": 1.461957059067002, "learning_rate": 9.330127018922195e-06, "loss": 0.7397, "step": 936 }, { "epoch": 1.2528428093645485, "grad_norm": 1.1458406298628143, "learning_rate": 9.327791805382038e-06, "loss": 0.8283, "step": 937 }, { "epoch": 1.254180602006689, "grad_norm": 1.1954527706193734, "learning_rate": 9.325452821872258e-06, "loss": 0.6614, "step": 938 }, { "epoch": 1.2555183946488295, "grad_norm": 0.9950663822211678, "learning_rate": 9.32311007043036e-06, "loss": 0.6789, "step": 939 }, { "epoch": 1.2568561872909698, "grad_norm": 1.034614860297284, "learning_rate": 9.320763553097132e-06, "loss": 0.655, "step": 940 }, { "epoch": 1.2581939799331103, "grad_norm": 1.3725509192454186, "learning_rate": 9.31841327191664e-06, "loss": 0.6883, "step": 941 }, { "epoch": 1.2595317725752508, "grad_norm": 1.0284720322607999, "learning_rate": 9.316059228936231e-06, "loss": 0.6678, "step": 942 }, { "epoch": 1.2608695652173914, "grad_norm": 1.340515962864512, "learning_rate": 9.31370142620653e-06, "loss": 0.6449, "step": 943 }, { "epoch": 1.2622073578595319, "grad_norm": 0.9588161485670318, "learning_rate": 9.311339865781432e-06, "loss": 0.6217, "step": 944 }, { "epoch": 1.2635451505016722, "grad_norm": 0.9690880656675217, "learning_rate": 9.30897454971811e-06, "loss": 0.707, "step": 945 }, { "epoch": 1.2648829431438127, "grad_norm": 1.088688175567279, "learning_rate": 9.30660548007701e-06, "loss": 0.6794, "step": 946 }, { "epoch": 1.2662207357859532, "grad_norm": 0.8082217936991014, "learning_rate": 9.30423265892184e-06, "loss": 0.7028, "step": 947 }, { "epoch": 1.2675585284280937, "grad_norm": 1.1939474140501312, "learning_rate": 9.301856088319584e-06, "loss": 0.6927, "step": 948 }, { "epoch": 1.2688963210702342, "grad_norm": 1.1763592563556242, "learning_rate": 9.299475770340492e-06, "loss": 0.6597, "step": 949 }, { "epoch": 1.2702341137123745, "grad_norm": 1.336852309952923, "learning_rate": 9.297091707058071e-06, "loss": 0.659, "step": 950 }, { "epoch": 1.271571906354515, "grad_norm": 1.0123963167761783, "learning_rate": 9.294703900549096e-06, "loss": 0.5698, "step": 951 }, { "epoch": 1.2729096989966555, "grad_norm": 1.3381725691877617, "learning_rate": 9.292312352893603e-06, "loss": 0.6993, "step": 952 }, { "epoch": 1.274247491638796, "grad_norm": 0.9922204639862421, "learning_rate": 9.289917066174887e-06, "loss": 0.7352, "step": 953 }, { "epoch": 1.2755852842809365, "grad_norm": 0.8338229666579892, "learning_rate": 9.287518042479495e-06, "loss": 0.6689, "step": 954 }, { "epoch": 1.2769230769230768, "grad_norm": 0.9521398474749191, "learning_rate": 9.285115283897237e-06, "loss": 0.7701, "step": 955 }, { "epoch": 1.2782608695652173, "grad_norm": 1.2916040645255276, "learning_rate": 9.282708792521173e-06, "loss": 0.6114, "step": 956 }, { "epoch": 1.2795986622073579, "grad_norm": 1.072411952725776, "learning_rate": 9.280298570447612e-06, "loss": 0.5765, "step": 957 }, { "epoch": 1.2809364548494984, "grad_norm": 1.377106390443161, "learning_rate": 9.277884619776116e-06, "loss": 0.7377, "step": 958 }, { "epoch": 1.2822742474916389, "grad_norm": 1.3368797339939626, "learning_rate": 9.275466942609495e-06, "loss": 0.7168, "step": 959 }, { "epoch": 1.2836120401337792, "grad_norm": 1.4093799700101912, "learning_rate": 9.273045541053805e-06, "loss": 0.6612, "step": 960 }, { "epoch": 1.2849498327759197, "grad_norm": 1.3344463286774118, "learning_rate": 9.270620417218344e-06, "loss": 0.6794, "step": 961 }, { "epoch": 1.2862876254180602, "grad_norm": 1.2335836019943416, "learning_rate": 9.268191573215653e-06, "loss": 0.7201, "step": 962 }, { "epoch": 1.2876254180602007, "grad_norm": 1.0743163442894044, "learning_rate": 9.265759011161519e-06, "loss": 0.6967, "step": 963 }, { "epoch": 1.2889632107023412, "grad_norm": 0.9405858161206572, "learning_rate": 9.263322733174962e-06, "loss": 0.7478, "step": 964 }, { "epoch": 1.2903010033444815, "grad_norm": 1.1633949356269762, "learning_rate": 9.26088274137824e-06, "loss": 0.7418, "step": 965 }, { "epoch": 1.291638795986622, "grad_norm": 1.2281362866035093, "learning_rate": 9.258439037896846e-06, "loss": 0.6932, "step": 966 }, { "epoch": 1.2929765886287625, "grad_norm": 1.0771086062890798, "learning_rate": 9.25599162485951e-06, "loss": 0.6707, "step": 967 }, { "epoch": 1.294314381270903, "grad_norm": 0.9917483915239256, "learning_rate": 9.25354050439819e-06, "loss": 0.5355, "step": 968 }, { "epoch": 1.2956521739130435, "grad_norm": 1.0801542705811298, "learning_rate": 9.251085678648072e-06, "loss": 0.7269, "step": 969 }, { "epoch": 1.2969899665551838, "grad_norm": 0.8408404332459946, "learning_rate": 9.248627149747573e-06, "loss": 0.6398, "step": 970 }, { "epoch": 1.2983277591973243, "grad_norm": 0.7583064874234128, "learning_rate": 9.246164919838334e-06, "loss": 0.724, "step": 971 }, { "epoch": 1.2996655518394649, "grad_norm": 0.9742099518774369, "learning_rate": 9.243698991065222e-06, "loss": 0.6414, "step": 972 }, { "epoch": 1.3010033444816054, "grad_norm": 0.8463696082977692, "learning_rate": 9.241229365576325e-06, "loss": 0.6771, "step": 973 }, { "epoch": 1.3023411371237459, "grad_norm": 1.078940919038303, "learning_rate": 9.238756045522949e-06, "loss": 0.6742, "step": 974 }, { "epoch": 1.3036789297658862, "grad_norm": 1.0231980503874716, "learning_rate": 9.236279033059622e-06, "loss": 0.7014, "step": 975 }, { "epoch": 1.3050167224080267, "grad_norm": 0.9031326606686917, "learning_rate": 9.233798330344085e-06, "loss": 0.68, "step": 976 }, { "epoch": 1.3063545150501672, "grad_norm": 1.0430850334296933, "learning_rate": 9.231313939537298e-06, "loss": 0.6909, "step": 977 }, { "epoch": 1.3076923076923077, "grad_norm": 1.0385652077062277, "learning_rate": 9.22882586280343e-06, "loss": 0.6995, "step": 978 }, { "epoch": 1.3090301003344482, "grad_norm": 1.023537572724792, "learning_rate": 9.226334102309862e-06, "loss": 0.6742, "step": 979 }, { "epoch": 1.3103678929765885, "grad_norm": 0.9100759634243084, "learning_rate": 9.223838660227183e-06, "loss": 0.6593, "step": 980 }, { "epoch": 1.311705685618729, "grad_norm": 0.9450177372372109, "learning_rate": 9.221339538729191e-06, "loss": 0.5808, "step": 981 }, { "epoch": 1.3130434782608695, "grad_norm": 0.9498566728808606, "learning_rate": 9.21883673999289e-06, "loss": 0.7376, "step": 982 }, { "epoch": 1.31438127090301, "grad_norm": 1.0668007555023493, "learning_rate": 9.21633026619848e-06, "loss": 0.7571, "step": 983 }, { "epoch": 1.3157190635451506, "grad_norm": 1.2457776435631316, "learning_rate": 9.213820119529372e-06, "loss": 0.6205, "step": 984 }, { "epoch": 1.3170568561872908, "grad_norm": 1.1543011110648975, "learning_rate": 9.211306302172174e-06, "loss": 0.7317, "step": 985 }, { "epoch": 1.3183946488294314, "grad_norm": 0.866417665036379, "learning_rate": 9.208788816316684e-06, "loss": 0.7586, "step": 986 }, { "epoch": 1.3197324414715719, "grad_norm": 1.0705945274499022, "learning_rate": 9.206267664155906e-06, "loss": 0.7722, "step": 987 }, { "epoch": 1.3210702341137124, "grad_norm": 0.9551386593909016, "learning_rate": 9.203742847886033e-06, "loss": 0.6607, "step": 988 }, { "epoch": 1.322408026755853, "grad_norm": 0.8148337594819525, "learning_rate": 9.201214369706448e-06, "loss": 0.699, "step": 989 }, { "epoch": 1.3237458193979932, "grad_norm": 1.115986356444048, "learning_rate": 9.198682231819727e-06, "loss": 0.6908, "step": 990 }, { "epoch": 1.325083612040134, "grad_norm": 0.9854020489991547, "learning_rate": 9.196146436431635e-06, "loss": 0.6419, "step": 991 }, { "epoch": 1.3264214046822742, "grad_norm": 1.3861846612041966, "learning_rate": 9.193606985751117e-06, "loss": 0.6575, "step": 992 }, { "epoch": 1.3277591973244147, "grad_norm": 1.2143614624209806, "learning_rate": 9.191063881990308e-06, "loss": 0.7249, "step": 993 }, { "epoch": 1.3290969899665552, "grad_norm": 1.0146253529851508, "learning_rate": 9.188517127364524e-06, "loss": 0.6596, "step": 994 }, { "epoch": 1.3304347826086955, "grad_norm": 1.0242470429994814, "learning_rate": 9.185966724092261e-06, "loss": 0.7477, "step": 995 }, { "epoch": 1.3317725752508363, "grad_norm": 1.070678394653194, "learning_rate": 9.183412674395193e-06, "loss": 0.5987, "step": 996 }, { "epoch": 1.3331103678929765, "grad_norm": 0.9604056849133985, "learning_rate": 9.180854980498168e-06, "loss": 0.7349, "step": 997 }, { "epoch": 1.334448160535117, "grad_norm": 0.8410681840700408, "learning_rate": 9.178293644629214e-06, "loss": 0.6633, "step": 998 }, { "epoch": 1.3357859531772576, "grad_norm": 1.0354849974674951, "learning_rate": 9.17572866901953e-06, "loss": 0.7057, "step": 999 }, { "epoch": 1.3371237458193979, "grad_norm": 0.894351596654042, "learning_rate": 9.173160055903478e-06, "loss": 0.5906, "step": 1000 }, { "epoch": 1.3384615384615386, "grad_norm": 0.9353148278847161, "learning_rate": 9.1705878075186e-06, "loss": 0.7104, "step": 1001 }, { "epoch": 1.3397993311036789, "grad_norm": 0.9479764067234489, "learning_rate": 9.168011926105598e-06, "loss": 0.5989, "step": 1002 }, { "epoch": 1.3411371237458194, "grad_norm": 1.0490202751668305, "learning_rate": 9.165432413908341e-06, "loss": 0.6457, "step": 1003 }, { "epoch": 1.34247491638796, "grad_norm": 0.8877774985479381, "learning_rate": 9.162849273173857e-06, "loss": 0.7737, "step": 1004 }, { "epoch": 1.3438127090301004, "grad_norm": 1.1794460967605593, "learning_rate": 9.160262506152343e-06, "loss": 0.7132, "step": 1005 }, { "epoch": 1.345150501672241, "grad_norm": 1.1198093531741986, "learning_rate": 9.157672115097145e-06, "loss": 0.6377, "step": 1006 }, { "epoch": 1.3464882943143812, "grad_norm": 1.026529410910508, "learning_rate": 9.155078102264773e-06, "loss": 0.7076, "step": 1007 }, { "epoch": 1.3478260869565217, "grad_norm": 1.0902480888171542, "learning_rate": 9.15248046991489e-06, "loss": 0.7164, "step": 1008 }, { "epoch": 1.3491638795986622, "grad_norm": 1.3106213774024482, "learning_rate": 9.14987922031031e-06, "loss": 0.5969, "step": 1009 }, { "epoch": 1.3505016722408028, "grad_norm": 1.0072107461073883, "learning_rate": 9.147274355717002e-06, "loss": 0.6868, "step": 1010 }, { "epoch": 1.3518394648829433, "grad_norm": 1.2845952408605426, "learning_rate": 9.14466587840408e-06, "loss": 0.8372, "step": 1011 }, { "epoch": 1.3531772575250836, "grad_norm": 1.138423488906652, "learning_rate": 9.142053790643806e-06, "loss": 0.6707, "step": 1012 }, { "epoch": 1.354515050167224, "grad_norm": 1.1310510236721125, "learning_rate": 9.13943809471159e-06, "loss": 0.6767, "step": 1013 }, { "epoch": 1.3558528428093646, "grad_norm": 0.8648819799576265, "learning_rate": 9.136818792885981e-06, "loss": 0.6187, "step": 1014 }, { "epoch": 1.357190635451505, "grad_norm": 1.142942738519383, "learning_rate": 9.134195887448673e-06, "loss": 0.674, "step": 1015 }, { "epoch": 1.3585284280936456, "grad_norm": 1.3473816694489704, "learning_rate": 9.131569380684497e-06, "loss": 0.6861, "step": 1016 }, { "epoch": 1.359866220735786, "grad_norm": 1.0190136123822604, "learning_rate": 9.12893927488142e-06, "loss": 0.6278, "step": 1017 }, { "epoch": 1.3612040133779264, "grad_norm": 0.998434603439526, "learning_rate": 9.126305572330547e-06, "loss": 0.6737, "step": 1018 }, { "epoch": 1.362541806020067, "grad_norm": 0.8675466577189269, "learning_rate": 9.123668275326113e-06, "loss": 0.6491, "step": 1019 }, { "epoch": 1.3638795986622074, "grad_norm": 0.7099901954774915, "learning_rate": 9.121027386165487e-06, "loss": 0.6525, "step": 1020 }, { "epoch": 1.365217391304348, "grad_norm": 0.9201168289877366, "learning_rate": 9.118382907149164e-06, "loss": 0.6835, "step": 1021 }, { "epoch": 1.3665551839464882, "grad_norm": 1.3127444983057697, "learning_rate": 9.115734840580772e-06, "loss": 0.7132, "step": 1022 }, { "epoch": 1.3678929765886287, "grad_norm": 0.8214556634939508, "learning_rate": 9.113083188767057e-06, "loss": 0.6316, "step": 1023 }, { "epoch": 1.3692307692307693, "grad_norm": 0.9042454475891798, "learning_rate": 9.110427954017891e-06, "loss": 0.7006, "step": 1024 }, { "epoch": 1.3705685618729098, "grad_norm": 0.9447502606765443, "learning_rate": 9.107769138646273e-06, "loss": 0.4576, "step": 1025 }, { "epoch": 1.3719063545150503, "grad_norm": 0.8357718581752007, "learning_rate": 9.105106744968308e-06, "loss": 0.5938, "step": 1026 }, { "epoch": 1.3732441471571906, "grad_norm": 0.8422010333239957, "learning_rate": 9.10244077530323e-06, "loss": 0.6656, "step": 1027 }, { "epoch": 1.374581939799331, "grad_norm": 0.9996882440428304, "learning_rate": 9.099771231973382e-06, "loss": 0.6682, "step": 1028 }, { "epoch": 1.3759197324414716, "grad_norm": 0.9687015245660848, "learning_rate": 9.097098117304223e-06, "loss": 0.5855, "step": 1029 }, { "epoch": 1.377257525083612, "grad_norm": 1.4695699871937766, "learning_rate": 9.094421433624322e-06, "loss": 0.762, "step": 1030 }, { "epoch": 1.3785953177257526, "grad_norm": 1.2346868807626512, "learning_rate": 9.09174118326536e-06, "loss": 0.8152, "step": 1031 }, { "epoch": 1.379933110367893, "grad_norm": 1.2860063245992435, "learning_rate": 9.089057368562113e-06, "loss": 0.6512, "step": 1032 }, { "epoch": 1.3812709030100334, "grad_norm": 0.909851024263668, "learning_rate": 9.086369991852478e-06, "loss": 0.6785, "step": 1033 }, { "epoch": 1.382608695652174, "grad_norm": 0.9337392096684539, "learning_rate": 9.083679055477446e-06, "loss": 0.8062, "step": 1034 }, { "epoch": 1.3839464882943144, "grad_norm": 1.434493605768086, "learning_rate": 9.08098456178111e-06, "loss": 0.6942, "step": 1035 }, { "epoch": 1.385284280936455, "grad_norm": 0.9005824827295957, "learning_rate": 9.078286513110661e-06, "loss": 0.6991, "step": 1036 }, { "epoch": 1.3866220735785952, "grad_norm": 1.0896129837211428, "learning_rate": 9.07558491181639e-06, "loss": 0.5863, "step": 1037 }, { "epoch": 1.3879598662207357, "grad_norm": 0.9263433760174503, "learning_rate": 9.07287976025168e-06, "loss": 0.6783, "step": 1038 }, { "epoch": 1.3892976588628763, "grad_norm": 0.7985888163434259, "learning_rate": 9.070171060773007e-06, "loss": 0.6544, "step": 1039 }, { "epoch": 1.3906354515050168, "grad_norm": 1.3366195814749982, "learning_rate": 9.067458815739938e-06, "loss": 0.7266, "step": 1040 }, { "epoch": 1.3919732441471573, "grad_norm": 1.0117505830231661, "learning_rate": 9.064743027515127e-06, "loss": 0.7445, "step": 1041 }, { "epoch": 1.3933110367892976, "grad_norm": 1.1516687063052915, "learning_rate": 9.062023698464322e-06, "loss": 0.6888, "step": 1042 }, { "epoch": 1.394648829431438, "grad_norm": 0.9772283570419628, "learning_rate": 9.059300830956343e-06, "loss": 0.6915, "step": 1043 }, { "epoch": 1.3959866220735786, "grad_norm": 1.3936448515783213, "learning_rate": 9.056574427363102e-06, "loss": 0.7952, "step": 1044 }, { "epoch": 1.397324414715719, "grad_norm": 1.2884872232838458, "learning_rate": 9.053844490059589e-06, "loss": 0.6567, "step": 1045 }, { "epoch": 1.3986622073578596, "grad_norm": 0.9764035412332709, "learning_rate": 9.051111021423868e-06, "loss": 0.6427, "step": 1046 }, { "epoch": 1.4, "grad_norm": 1.12187401455489, "learning_rate": 9.048374023837086e-06, "loss": 0.6026, "step": 1047 }, { "epoch": 1.4013377926421404, "grad_norm": 1.2276784401479182, "learning_rate": 9.045633499683457e-06, "loss": 0.8525, "step": 1048 }, { "epoch": 1.402675585284281, "grad_norm": 1.6824985964887091, "learning_rate": 9.042889451350274e-06, "loss": 0.6927, "step": 1049 }, { "epoch": 1.4040133779264214, "grad_norm": 1.235724138382472, "learning_rate": 9.040141881227897e-06, "loss": 0.8126, "step": 1050 }, { "epoch": 1.405351170568562, "grad_norm": 1.0844661175578072, "learning_rate": 9.03739079170975e-06, "loss": 0.7173, "step": 1051 }, { "epoch": 1.4066889632107022, "grad_norm": 0.7671127333950281, "learning_rate": 9.034636185192329e-06, "loss": 0.6355, "step": 1052 }, { "epoch": 1.4080267558528428, "grad_norm": 1.1728950319754212, "learning_rate": 9.03187806407519e-06, "loss": 0.653, "step": 1053 }, { "epoch": 1.4093645484949833, "grad_norm": 0.9704133571557073, "learning_rate": 9.029116430760952e-06, "loss": 0.5906, "step": 1054 }, { "epoch": 1.4107023411371238, "grad_norm": 0.9589143266743498, "learning_rate": 9.026351287655294e-06, "loss": 0.6809, "step": 1055 }, { "epoch": 1.4120401337792643, "grad_norm": 1.2210731300135411, "learning_rate": 9.023582637166948e-06, "loss": 0.6817, "step": 1056 }, { "epoch": 1.4133779264214046, "grad_norm": 1.028904886868192, "learning_rate": 9.020810481707709e-06, "loss": 0.6775, "step": 1057 }, { "epoch": 1.414715719063545, "grad_norm": 0.822396931220635, "learning_rate": 9.01803482369242e-06, "loss": 0.7387, "step": 1058 }, { "epoch": 1.4160535117056856, "grad_norm": 1.1930077817330123, "learning_rate": 9.015255665538972e-06, "loss": 0.7454, "step": 1059 }, { "epoch": 1.4173913043478261, "grad_norm": 0.7801092465029583, "learning_rate": 9.012473009668314e-06, "loss": 0.614, "step": 1060 }, { "epoch": 1.4187290969899666, "grad_norm": 1.165979666895002, "learning_rate": 9.009686858504434e-06, "loss": 0.6517, "step": 1061 }, { "epoch": 1.420066889632107, "grad_norm": 0.963911512076559, "learning_rate": 9.00689721447437e-06, "loss": 0.7212, "step": 1062 }, { "epoch": 1.4214046822742474, "grad_norm": 0.980440383644304, "learning_rate": 9.004104080008198e-06, "loss": 0.642, "step": 1063 }, { "epoch": 1.422742474916388, "grad_norm": 0.8706645240190537, "learning_rate": 9.001307457539038e-06, "loss": 0.7115, "step": 1064 }, { "epoch": 1.4240802675585285, "grad_norm": 1.1906169886358273, "learning_rate": 8.998507349503048e-06, "loss": 0.698, "step": 1065 }, { "epoch": 1.425418060200669, "grad_norm": 1.113560911120748, "learning_rate": 8.99570375833942e-06, "loss": 0.6714, "step": 1066 }, { "epoch": 1.4267558528428093, "grad_norm": 1.3914605396373874, "learning_rate": 8.992896686490384e-06, "loss": 0.7084, "step": 1067 }, { "epoch": 1.4280936454849498, "grad_norm": 1.150583188935114, "learning_rate": 8.990086136401199e-06, "loss": 0.7172, "step": 1068 }, { "epoch": 1.4294314381270903, "grad_norm": 1.2744241257971656, "learning_rate": 8.987272110520154e-06, "loss": 0.7294, "step": 1069 }, { "epoch": 1.4307692307692308, "grad_norm": 1.006406864891483, "learning_rate": 8.984454611298565e-06, "loss": 0.7095, "step": 1070 }, { "epoch": 1.4321070234113713, "grad_norm": 1.4368398621687675, "learning_rate": 8.981633641190779e-06, "loss": 0.6377, "step": 1071 }, { "epoch": 1.4334448160535116, "grad_norm": 0.8467734304305675, "learning_rate": 8.978809202654161e-06, "loss": 0.6553, "step": 1072 }, { "epoch": 1.434782608695652, "grad_norm": 1.1954405357468518, "learning_rate": 8.975981298149099e-06, "loss": 0.5623, "step": 1073 }, { "epoch": 1.4361204013377926, "grad_norm": 1.0630343491069691, "learning_rate": 8.973149930139e-06, "loss": 0.6016, "step": 1074 }, { "epoch": 1.4374581939799331, "grad_norm": 0.953241144768789, "learning_rate": 8.97031510109029e-06, "loss": 0.8156, "step": 1075 }, { "epoch": 1.4387959866220736, "grad_norm": 1.2529657338663749, "learning_rate": 8.967476813472407e-06, "loss": 0.7104, "step": 1076 }, { "epoch": 1.440133779264214, "grad_norm": 1.357009288384862, "learning_rate": 8.964635069757803e-06, "loss": 0.6397, "step": 1077 }, { "epoch": 1.4414715719063544, "grad_norm": 1.0208689818317953, "learning_rate": 8.96178987242194e-06, "loss": 0.7071, "step": 1078 }, { "epoch": 1.442809364548495, "grad_norm": 1.2005134217825393, "learning_rate": 8.958941223943292e-06, "loss": 0.7656, "step": 1079 }, { "epoch": 1.4441471571906355, "grad_norm": 1.538551927961479, "learning_rate": 8.956089126803333e-06, "loss": 0.5969, "step": 1080 }, { "epoch": 1.445484949832776, "grad_norm": 1.1017960205402473, "learning_rate": 8.953233583486548e-06, "loss": 0.608, "step": 1081 }, { "epoch": 1.4468227424749163, "grad_norm": 0.9381554609878687, "learning_rate": 8.950374596480419e-06, "loss": 0.6257, "step": 1082 }, { "epoch": 1.4481605351170568, "grad_norm": 1.1702532269982404, "learning_rate": 8.94751216827543e-06, "loss": 0.6476, "step": 1083 }, { "epoch": 1.4494983277591973, "grad_norm": 0.846239300762405, "learning_rate": 8.944646301365061e-06, "loss": 0.7191, "step": 1084 }, { "epoch": 1.4508361204013378, "grad_norm": 1.0420349290679425, "learning_rate": 8.94177699824579e-06, "loss": 0.7462, "step": 1085 }, { "epoch": 1.4521739130434783, "grad_norm": 1.1091588179108058, "learning_rate": 8.938904261417088e-06, "loss": 0.6557, "step": 1086 }, { "epoch": 1.4535117056856186, "grad_norm": 1.0837094733211858, "learning_rate": 8.936028093381414e-06, "loss": 0.6911, "step": 1087 }, { "epoch": 1.4548494983277591, "grad_norm": 1.3183423338617823, "learning_rate": 8.933148496644218e-06, "loss": 0.6658, "step": 1088 }, { "epoch": 1.4561872909698996, "grad_norm": 1.3570965485478566, "learning_rate": 8.930265473713939e-06, "loss": 0.7992, "step": 1089 }, { "epoch": 1.4575250836120401, "grad_norm": 1.2796660953943115, "learning_rate": 8.927379027101994e-06, "loss": 0.6737, "step": 1090 }, { "epoch": 1.4588628762541807, "grad_norm": 1.156806571014319, "learning_rate": 8.924489159322792e-06, "loss": 0.7093, "step": 1091 }, { "epoch": 1.460200668896321, "grad_norm": 1.0098329598024778, "learning_rate": 8.921595872893714e-06, "loss": 0.6646, "step": 1092 }, { "epoch": 1.4615384615384617, "grad_norm": 1.021914058779153, "learning_rate": 8.918699170335123e-06, "loss": 0.7079, "step": 1093 }, { "epoch": 1.462876254180602, "grad_norm": 1.4505981145000273, "learning_rate": 8.915799054170357e-06, "loss": 0.7181, "step": 1094 }, { "epoch": 1.4642140468227425, "grad_norm": 1.5193870833560326, "learning_rate": 8.912895526925726e-06, "loss": 0.6227, "step": 1095 }, { "epoch": 1.465551839464883, "grad_norm": 1.4882128270288892, "learning_rate": 8.909988591130514e-06, "loss": 0.7326, "step": 1096 }, { "epoch": 1.4668896321070233, "grad_norm": 1.3828357770031683, "learning_rate": 8.907078249316972e-06, "loss": 0.6319, "step": 1097 }, { "epoch": 1.468227424749164, "grad_norm": 1.338726984617655, "learning_rate": 8.904164504020321e-06, "loss": 0.7871, "step": 1098 }, { "epoch": 1.4695652173913043, "grad_norm": 1.1787064593872147, "learning_rate": 8.901247357778742e-06, "loss": 0.6343, "step": 1099 }, { "epoch": 1.4709030100334448, "grad_norm": 0.8471970602249911, "learning_rate": 8.898326813133385e-06, "loss": 0.7, "step": 1100 }, { "epoch": 1.4722408026755853, "grad_norm": 0.9792363301666451, "learning_rate": 8.895402872628352e-06, "loss": 0.5896, "step": 1101 }, { "epoch": 1.4735785953177256, "grad_norm": 0.8978201752037943, "learning_rate": 8.892475538810714e-06, "loss": 0.6173, "step": 1102 }, { "epoch": 1.4749163879598663, "grad_norm": 1.1586414698327567, "learning_rate": 8.889544814230487e-06, "loss": 0.6804, "step": 1103 }, { "epoch": 1.4762541806020066, "grad_norm": 0.8186027446550553, "learning_rate": 8.886610701440648e-06, "loss": 0.6821, "step": 1104 }, { "epoch": 1.4775919732441471, "grad_norm": 1.0309008078607145, "learning_rate": 8.883673202997121e-06, "loss": 0.7659, "step": 1105 }, { "epoch": 1.4789297658862877, "grad_norm": 0.9931718865696157, "learning_rate": 8.880732321458785e-06, "loss": 0.6007, "step": 1106 }, { "epoch": 1.4802675585284282, "grad_norm": 1.1617839504118215, "learning_rate": 8.87778805938746e-06, "loss": 0.6497, "step": 1107 }, { "epoch": 1.4816053511705687, "grad_norm": 1.0339451636588093, "learning_rate": 8.874840419347912e-06, "loss": 0.6677, "step": 1108 }, { "epoch": 1.482943143812709, "grad_norm": 0.8445393531740648, "learning_rate": 8.871889403907853e-06, "loss": 0.6545, "step": 1109 }, { "epoch": 1.4842809364548495, "grad_norm": 1.302444142200421, "learning_rate": 8.868935015637932e-06, "loss": 0.6693, "step": 1110 }, { "epoch": 1.48561872909699, "grad_norm": 0.9880697586140219, "learning_rate": 8.865977257111738e-06, "loss": 0.6996, "step": 1111 }, { "epoch": 1.4869565217391305, "grad_norm": 0.842919716996344, "learning_rate": 8.863016130905795e-06, "loss": 0.639, "step": 1112 }, { "epoch": 1.488294314381271, "grad_norm": 0.9890367143943758, "learning_rate": 8.86005163959956e-06, "loss": 0.627, "step": 1113 }, { "epoch": 1.4896321070234113, "grad_norm": 0.9724606965722867, "learning_rate": 8.857083785775423e-06, "loss": 0.6992, "step": 1114 }, { "epoch": 1.4909698996655518, "grad_norm": 1.010339648672021, "learning_rate": 8.854112572018702e-06, "loss": 0.7468, "step": 1115 }, { "epoch": 1.4923076923076923, "grad_norm": 1.3345098555810924, "learning_rate": 8.851138000917641e-06, "loss": 0.734, "step": 1116 }, { "epoch": 1.4936454849498328, "grad_norm": 1.0410738087716667, "learning_rate": 8.84816007506341e-06, "loss": 0.7191, "step": 1117 }, { "epoch": 1.4949832775919734, "grad_norm": 1.2100643987246105, "learning_rate": 8.845178797050102e-06, "loss": 0.5693, "step": 1118 }, { "epoch": 1.4963210702341136, "grad_norm": 0.855346582021968, "learning_rate": 8.842194169474727e-06, "loss": 0.6892, "step": 1119 }, { "epoch": 1.4976588628762542, "grad_norm": 1.00782462244818, "learning_rate": 8.839206194937218e-06, "loss": 0.6644, "step": 1120 }, { "epoch": 1.4989966555183947, "grad_norm": 1.1953565478075479, "learning_rate": 8.836214876040416e-06, "loss": 0.6533, "step": 1121 }, { "epoch": 1.500334448160535, "grad_norm": 0.9021852321313739, "learning_rate": 8.833220215390085e-06, "loss": 0.5427, "step": 1122 }, { "epoch": 1.5016722408026757, "grad_norm": 0.9874068980453881, "learning_rate": 8.83022221559489e-06, "loss": 0.7104, "step": 1123 }, { "epoch": 1.503010033444816, "grad_norm": 0.752832839797581, "learning_rate": 8.827220879266414e-06, "loss": 0.592, "step": 1124 }, { "epoch": 1.5043478260869565, "grad_norm": 1.2410347808385545, "learning_rate": 8.824216209019139e-06, "loss": 0.6649, "step": 1125 }, { "epoch": 1.505685618729097, "grad_norm": 1.024699476565982, "learning_rate": 8.821208207470454e-06, "loss": 0.6786, "step": 1126 }, { "epoch": 1.5070234113712373, "grad_norm": 1.2880451452366413, "learning_rate": 8.818196877240652e-06, "loss": 0.6359, "step": 1127 }, { "epoch": 1.508361204013378, "grad_norm": 1.084015191802355, "learning_rate": 8.815182220952922e-06, "loss": 0.5642, "step": 1128 }, { "epoch": 1.5096989966555183, "grad_norm": 0.9261792688826928, "learning_rate": 8.812164241233354e-06, "loss": 0.7671, "step": 1129 }, { "epoch": 1.5110367892976588, "grad_norm": 1.1945420420067507, "learning_rate": 8.80914294071093e-06, "loss": 0.6209, "step": 1130 }, { "epoch": 1.5123745819397993, "grad_norm": 1.0342587076396617, "learning_rate": 8.806118322017525e-06, "loss": 0.7256, "step": 1131 }, { "epoch": 1.5137123745819396, "grad_norm": 0.9951950445118575, "learning_rate": 8.803090387787909e-06, "loss": 0.6731, "step": 1132 }, { "epoch": 1.5150501672240804, "grad_norm": 1.1598801058810515, "learning_rate": 8.800059140659731e-06, "loss": 0.7182, "step": 1133 }, { "epoch": 1.5163879598662207, "grad_norm": 1.3939816167719798, "learning_rate": 8.797024583273536e-06, "loss": 0.7474, "step": 1134 }, { "epoch": 1.5177257525083612, "grad_norm": 1.0512318141377657, "learning_rate": 8.793986718272747e-06, "loss": 0.7107, "step": 1135 }, { "epoch": 1.5190635451505017, "grad_norm": 1.3388932043848125, "learning_rate": 8.790945548303669e-06, "loss": 0.6879, "step": 1136 }, { "epoch": 1.5204013377926422, "grad_norm": 1.351662447786594, "learning_rate": 8.787901076015487e-06, "loss": 0.7919, "step": 1137 }, { "epoch": 1.5217391304347827, "grad_norm": 1.2296270559234415, "learning_rate": 8.784853304060262e-06, "loss": 0.7287, "step": 1138 }, { "epoch": 1.523076923076923, "grad_norm": 0.797140294522404, "learning_rate": 8.781802235092927e-06, "loss": 0.7121, "step": 1139 }, { "epoch": 1.5244147157190635, "grad_norm": 1.1592606294169467, "learning_rate": 8.778747871771293e-06, "loss": 0.6326, "step": 1140 }, { "epoch": 1.525752508361204, "grad_norm": 1.092233584239412, "learning_rate": 8.775690216756035e-06, "loss": 0.7124, "step": 1141 }, { "epoch": 1.5270903010033445, "grad_norm": 1.1784710283394748, "learning_rate": 8.772629272710698e-06, "loss": 0.6842, "step": 1142 }, { "epoch": 1.528428093645485, "grad_norm": 1.288100346782028, "learning_rate": 8.769565042301692e-06, "loss": 0.7346, "step": 1143 }, { "epoch": 1.5297658862876253, "grad_norm": 1.2127499720036266, "learning_rate": 8.766497528198289e-06, "loss": 0.7972, "step": 1144 }, { "epoch": 1.5311036789297658, "grad_norm": 1.3575019587036647, "learning_rate": 8.763426733072624e-06, "loss": 0.6676, "step": 1145 }, { "epoch": 1.5324414715719064, "grad_norm": 1.006727208465721, "learning_rate": 8.760352659599684e-06, "loss": 0.6718, "step": 1146 }, { "epoch": 1.5337792642140469, "grad_norm": 1.151721944786687, "learning_rate": 8.757275310457321e-06, "loss": 0.7816, "step": 1147 }, { "epoch": 1.5351170568561874, "grad_norm": 1.1951563322339749, "learning_rate": 8.754194688326229e-06, "loss": 0.8429, "step": 1148 }, { "epoch": 1.5364548494983277, "grad_norm": 1.048637861393218, "learning_rate": 8.751110795889966e-06, "loss": 0.5823, "step": 1149 }, { "epoch": 1.5377926421404682, "grad_norm": 1.0018053982719854, "learning_rate": 8.748023635834927e-06, "loss": 0.7104, "step": 1150 }, { "epoch": 1.5391304347826087, "grad_norm": 1.2293833739784585, "learning_rate": 8.744933210850363e-06, "loss": 0.6378, "step": 1151 }, { "epoch": 1.5404682274247492, "grad_norm": 0.955811720466793, "learning_rate": 8.741839523628361e-06, "loss": 0.6102, "step": 1152 }, { "epoch": 1.5418060200668897, "grad_norm": 1.1428810682432229, "learning_rate": 8.738742576863855e-06, "loss": 0.7268, "step": 1153 }, { "epoch": 1.54314381270903, "grad_norm": 1.1742849047998754, "learning_rate": 8.735642373254617e-06, "loss": 0.6587, "step": 1154 }, { "epoch": 1.5444816053511705, "grad_norm": 0.8841646338681431, "learning_rate": 8.732538915501257e-06, "loss": 0.7332, "step": 1155 }, { "epoch": 1.545819397993311, "grad_norm": 0.8580081649028514, "learning_rate": 8.729432206307218e-06, "loss": 0.6419, "step": 1156 }, { "epoch": 1.5471571906354515, "grad_norm": 1.3698209602685436, "learning_rate": 8.726322248378775e-06, "loss": 0.727, "step": 1157 }, { "epoch": 1.548494983277592, "grad_norm": 1.1658759130448284, "learning_rate": 8.723209044425034e-06, "loss": 0.6647, "step": 1158 }, { "epoch": 1.5498327759197323, "grad_norm": 1.1550613961629574, "learning_rate": 8.72009259715793e-06, "loss": 0.6095, "step": 1159 }, { "epoch": 1.551170568561873, "grad_norm": 0.9023562952064945, "learning_rate": 8.71697290929222e-06, "loss": 0.6349, "step": 1160 }, { "epoch": 1.5525083612040134, "grad_norm": 0.9306438373032466, "learning_rate": 8.71384998354549e-06, "loss": 0.5559, "step": 1161 }, { "epoch": 1.5538461538461539, "grad_norm": 0.8952748102922993, "learning_rate": 8.710723822638138e-06, "loss": 0.5352, "step": 1162 }, { "epoch": 1.5551839464882944, "grad_norm": 0.8563138387786008, "learning_rate": 8.707594429293387e-06, "loss": 0.6386, "step": 1163 }, { "epoch": 1.5565217391304347, "grad_norm": 1.1053194862574311, "learning_rate": 8.704461806237272e-06, "loss": 0.7917, "step": 1164 }, { "epoch": 1.5578595317725754, "grad_norm": 1.316655562943971, "learning_rate": 8.701325956198643e-06, "loss": 0.6281, "step": 1165 }, { "epoch": 1.5591973244147157, "grad_norm": 0.9878737353172802, "learning_rate": 8.69818688190916e-06, "loss": 0.6703, "step": 1166 }, { "epoch": 1.5605351170568562, "grad_norm": 0.9898644262255035, "learning_rate": 8.695044586103297e-06, "loss": 0.6489, "step": 1167 }, { "epoch": 1.5618729096989967, "grad_norm": 1.0345265711935012, "learning_rate": 8.691899071518323e-06, "loss": 0.7265, "step": 1168 }, { "epoch": 1.563210702341137, "grad_norm": 0.8044117445568152, "learning_rate": 8.688750340894324e-06, "loss": 0.5798, "step": 1169 }, { "epoch": 1.5645484949832777, "grad_norm": 0.804369357145086, "learning_rate": 8.685598396974178e-06, "loss": 0.5945, "step": 1170 }, { "epoch": 1.565886287625418, "grad_norm": 1.0494525194646054, "learning_rate": 8.682443242503564e-06, "loss": 0.6581, "step": 1171 }, { "epoch": 1.5672240802675586, "grad_norm": 0.9047324292774804, "learning_rate": 8.679284880230963e-06, "loss": 0.6536, "step": 1172 }, { "epoch": 1.568561872909699, "grad_norm": 0.7991382099606802, "learning_rate": 8.676123312907641e-06, "loss": 0.5656, "step": 1173 }, { "epoch": 1.5698996655518394, "grad_norm": 1.1141788540842406, "learning_rate": 8.672958543287666e-06, "loss": 0.7769, "step": 1174 }, { "epoch": 1.57123745819398, "grad_norm": 1.2591333637016342, "learning_rate": 8.66979057412789e-06, "loss": 0.7055, "step": 1175 }, { "epoch": 1.5725752508361204, "grad_norm": 1.2152036256909204, "learning_rate": 8.666619408187953e-06, "loss": 0.6342, "step": 1176 }, { "epoch": 1.5739130434782609, "grad_norm": 0.8835529964089501, "learning_rate": 8.663445048230278e-06, "loss": 0.6721, "step": 1177 }, { "epoch": 1.5752508361204014, "grad_norm": 1.0668194224401752, "learning_rate": 8.660267497020074e-06, "loss": 0.6275, "step": 1178 }, { "epoch": 1.5765886287625417, "grad_norm": 0.7659752972277236, "learning_rate": 8.657086757325328e-06, "loss": 0.5327, "step": 1179 }, { "epoch": 1.5779264214046824, "grad_norm": 0.9676701108687129, "learning_rate": 8.653902831916803e-06, "loss": 0.5929, "step": 1180 }, { "epoch": 1.5792642140468227, "grad_norm": 0.9073307065470096, "learning_rate": 8.650715723568039e-06, "loss": 0.7025, "step": 1181 }, { "epoch": 1.5806020066889632, "grad_norm": 1.1838434723935698, "learning_rate": 8.64752543505535e-06, "loss": 0.7029, "step": 1182 }, { "epoch": 1.5819397993311037, "grad_norm": 1.4461253691659657, "learning_rate": 8.644331969157815e-06, "loss": 0.7039, "step": 1183 }, { "epoch": 1.583277591973244, "grad_norm": 0.9446953040212871, "learning_rate": 8.641135328657288e-06, "loss": 0.6605, "step": 1184 }, { "epoch": 1.5846153846153848, "grad_norm": 1.024895761857663, "learning_rate": 8.637935516338384e-06, "loss": 0.664, "step": 1185 }, { "epoch": 1.585953177257525, "grad_norm": 0.8992337220483263, "learning_rate": 8.63473253498848e-06, "loss": 0.5644, "step": 1186 }, { "epoch": 1.5872909698996656, "grad_norm": 0.9585736085648028, "learning_rate": 8.63152638739772e-06, "loss": 0.6152, "step": 1187 }, { "epoch": 1.588628762541806, "grad_norm": 1.0355940235994165, "learning_rate": 8.628317076358997e-06, "loss": 0.7238, "step": 1188 }, { "epoch": 1.5899665551839464, "grad_norm": 0.6578624360444107, "learning_rate": 8.625104604667965e-06, "loss": 0.677, "step": 1189 }, { "epoch": 1.591304347826087, "grad_norm": 0.9045364513728077, "learning_rate": 8.62188897512303e-06, "loss": 0.7007, "step": 1190 }, { "epoch": 1.5926421404682274, "grad_norm": 1.1594021402131633, "learning_rate": 8.61867019052535e-06, "loss": 0.6899, "step": 1191 }, { "epoch": 1.593979933110368, "grad_norm": 0.8908038167666917, "learning_rate": 8.615448253678834e-06, "loss": 0.7955, "step": 1192 }, { "epoch": 1.5953177257525084, "grad_norm": 1.0439777610597625, "learning_rate": 8.61222316739013e-06, "loss": 0.5643, "step": 1193 }, { "epoch": 1.5966555183946487, "grad_norm": 1.260125250369829, "learning_rate": 8.608994934468633e-06, "loss": 0.698, "step": 1194 }, { "epoch": 1.5979933110367894, "grad_norm": 1.3478656411228962, "learning_rate": 8.60576355772648e-06, "loss": 0.6572, "step": 1195 }, { "epoch": 1.5993311036789297, "grad_norm": 1.2773246229339568, "learning_rate": 8.602529039978546e-06, "loss": 0.7319, "step": 1196 }, { "epoch": 1.6006688963210702, "grad_norm": 1.52868873795585, "learning_rate": 8.599291384042442e-06, "loss": 0.6402, "step": 1197 }, { "epoch": 1.6020066889632107, "grad_norm": 0.828229638858939, "learning_rate": 8.596050592738514e-06, "loss": 0.6234, "step": 1198 }, { "epoch": 1.603344481605351, "grad_norm": 1.173847482470892, "learning_rate": 8.592806668889835e-06, "loss": 0.6125, "step": 1199 }, { "epoch": 1.6046822742474918, "grad_norm": 1.1421603023531524, "learning_rate": 8.58955961532221e-06, "loss": 0.747, "step": 1200 }, { "epoch": 1.606020066889632, "grad_norm": 0.933929799660904, "learning_rate": 8.586309434864173e-06, "loss": 0.7335, "step": 1201 }, { "epoch": 1.6073578595317726, "grad_norm": 1.0430395162120005, "learning_rate": 8.583056130346977e-06, "loss": 0.6279, "step": 1202 }, { "epoch": 1.608695652173913, "grad_norm": 1.359307529655613, "learning_rate": 8.579799704604597e-06, "loss": 0.7254, "step": 1203 }, { "epoch": 1.6100334448160534, "grad_norm": 1.1842202496254846, "learning_rate": 8.57654016047373e-06, "loss": 0.605, "step": 1204 }, { "epoch": 1.611371237458194, "grad_norm": 0.7674692877464087, "learning_rate": 8.573277500793788e-06, "loss": 0.634, "step": 1205 }, { "epoch": 1.6127090301003344, "grad_norm": 1.1320127114606005, "learning_rate": 8.570011728406895e-06, "loss": 0.6482, "step": 1206 }, { "epoch": 1.614046822742475, "grad_norm": 0.7465511318272045, "learning_rate": 8.56674284615789e-06, "loss": 0.6499, "step": 1207 }, { "epoch": 1.6153846153846154, "grad_norm": 0.81747113551735, "learning_rate": 8.563470856894316e-06, "loss": 0.6797, "step": 1208 }, { "epoch": 1.6167224080267557, "grad_norm": 1.0124709788974366, "learning_rate": 8.560195763466428e-06, "loss": 0.6414, "step": 1209 }, { "epoch": 1.6180602006688964, "grad_norm": 1.001736049403648, "learning_rate": 8.556917568727182e-06, "loss": 0.6095, "step": 1210 }, { "epoch": 1.6193979933110367, "grad_norm": 1.0758930033442353, "learning_rate": 8.553636275532236e-06, "loss": 0.7285, "step": 1211 }, { "epoch": 1.6207357859531772, "grad_norm": 0.9768119847447594, "learning_rate": 8.550351886739949e-06, "loss": 0.7044, "step": 1212 }, { "epoch": 1.6220735785953178, "grad_norm": 1.3425689570461412, "learning_rate": 8.547064405211376e-06, "loss": 0.7585, "step": 1213 }, { "epoch": 1.623411371237458, "grad_norm": 1.1915015604418435, "learning_rate": 8.54377383381026e-06, "loss": 0.6208, "step": 1214 }, { "epoch": 1.6247491638795988, "grad_norm": 1.1040504817078043, "learning_rate": 8.540480175403045e-06, "loss": 0.6024, "step": 1215 }, { "epoch": 1.626086956521739, "grad_norm": 0.825888549654254, "learning_rate": 8.53718343285886e-06, "loss": 0.6022, "step": 1216 }, { "epoch": 1.6274247491638796, "grad_norm": 0.8483131416819834, "learning_rate": 8.533883609049517e-06, "loss": 0.7805, "step": 1217 }, { "epoch": 1.62876254180602, "grad_norm": 1.0000424151944831, "learning_rate": 8.530580706849518e-06, "loss": 0.7239, "step": 1218 }, { "epoch": 1.6301003344481604, "grad_norm": 1.1682818570448161, "learning_rate": 8.527274729136042e-06, "loss": 0.7374, "step": 1219 }, { "epoch": 1.6314381270903011, "grad_norm": 1.0813932790237637, "learning_rate": 8.523965678788952e-06, "loss": 0.6304, "step": 1220 }, { "epoch": 1.6327759197324414, "grad_norm": 1.1936298910251737, "learning_rate": 8.520653558690785e-06, "loss": 0.6274, "step": 1221 }, { "epoch": 1.634113712374582, "grad_norm": 1.2237846319133912, "learning_rate": 8.51733837172675e-06, "loss": 0.7705, "step": 1222 }, { "epoch": 1.6354515050167224, "grad_norm": 1.0403084793238806, "learning_rate": 8.51402012078473e-06, "loss": 0.7067, "step": 1223 }, { "epoch": 1.6367892976588627, "grad_norm": 0.9612043349324197, "learning_rate": 8.510698808755275e-06, "loss": 0.6824, "step": 1224 }, { "epoch": 1.6381270903010035, "grad_norm": 0.9855485122776091, "learning_rate": 8.507374438531606e-06, "loss": 0.6375, "step": 1225 }, { "epoch": 1.6394648829431437, "grad_norm": 1.071902806705292, "learning_rate": 8.504047013009605e-06, "loss": 0.594, "step": 1226 }, { "epoch": 1.6408026755852843, "grad_norm": 0.8758713017981613, "learning_rate": 8.500716535087815e-06, "loss": 0.6152, "step": 1227 }, { "epoch": 1.6421404682274248, "grad_norm": 0.8756223534116362, "learning_rate": 8.497383007667435e-06, "loss": 0.7213, "step": 1228 }, { "epoch": 1.643478260869565, "grad_norm": 1.2716030376676666, "learning_rate": 8.494046433652327e-06, "loss": 0.7737, "step": 1229 }, { "epoch": 1.6448160535117058, "grad_norm": 0.94829160275124, "learning_rate": 8.490706815949006e-06, "loss": 0.7584, "step": 1230 }, { "epoch": 1.646153846153846, "grad_norm": 1.1375893153041914, "learning_rate": 8.487364157466633e-06, "loss": 0.5732, "step": 1231 }, { "epoch": 1.6474916387959866, "grad_norm": 0.9003007412528179, "learning_rate": 8.484018461117023e-06, "loss": 0.685, "step": 1232 }, { "epoch": 1.648829431438127, "grad_norm": 1.0299835244857694, "learning_rate": 8.480669729814635e-06, "loss": 0.6707, "step": 1233 }, { "epoch": 1.6501672240802676, "grad_norm": 0.9802440451924674, "learning_rate": 8.477317966476569e-06, "loss": 0.752, "step": 1234 }, { "epoch": 1.6515050167224081, "grad_norm": 0.9591710324886016, "learning_rate": 8.473963174022574e-06, "loss": 0.6577, "step": 1235 }, { "epoch": 1.6528428093645484, "grad_norm": 1.3707397776338057, "learning_rate": 8.470605355375033e-06, "loss": 0.6306, "step": 1236 }, { "epoch": 1.654180602006689, "grad_norm": 1.0990653992476742, "learning_rate": 8.467244513458961e-06, "loss": 0.6661, "step": 1237 }, { "epoch": 1.6555183946488294, "grad_norm": 0.9351421201573539, "learning_rate": 8.463880651202014e-06, "loss": 0.6066, "step": 1238 }, { "epoch": 1.65685618729097, "grad_norm": 0.9786909806894342, "learning_rate": 8.460513771534475e-06, "loss": 0.6815, "step": 1239 }, { "epoch": 1.6581939799331105, "grad_norm": 1.2508605984132315, "learning_rate": 8.457143877389258e-06, "loss": 0.8012, "step": 1240 }, { "epoch": 1.6595317725752508, "grad_norm": 1.2248709564321334, "learning_rate": 8.453770971701899e-06, "loss": 0.6815, "step": 1241 }, { "epoch": 1.6608695652173913, "grad_norm": 1.2518983247210251, "learning_rate": 8.450395057410561e-06, "loss": 0.7095, "step": 1242 }, { "epoch": 1.6622073578595318, "grad_norm": 1.3620394573987118, "learning_rate": 8.447016137456025e-06, "loss": 0.7222, "step": 1243 }, { "epoch": 1.6635451505016723, "grad_norm": 1.3176144517467268, "learning_rate": 8.443634214781693e-06, "loss": 0.7329, "step": 1244 }, { "epoch": 1.6648829431438128, "grad_norm": 1.2285838137998646, "learning_rate": 8.440249292333583e-06, "loss": 0.6959, "step": 1245 }, { "epoch": 1.666220735785953, "grad_norm": 1.075790982156241, "learning_rate": 8.43686137306032e-06, "loss": 0.6548, "step": 1246 }, { "epoch": 1.6675585284280936, "grad_norm": 0.9614966192435662, "learning_rate": 8.43347045991315e-06, "loss": 0.6751, "step": 1247 }, { "epoch": 1.6688963210702341, "grad_norm": 1.2258902183912541, "learning_rate": 8.430076555845917e-06, "loss": 0.632, "step": 1248 }, { "epoch": 1.6702341137123746, "grad_norm": 0.8861727750003745, "learning_rate": 8.426679663815073e-06, "loss": 0.6375, "step": 1249 }, { "epoch": 1.6715719063545151, "grad_norm": 1.506695305533459, "learning_rate": 8.42327978677968e-06, "loss": 0.7596, "step": 1250 }, { "epoch": 1.6729096989966554, "grad_norm": 1.1150729248923241, "learning_rate": 8.41987692770139e-06, "loss": 0.8232, "step": 1251 }, { "epoch": 1.674247491638796, "grad_norm": 1.2291480404843722, "learning_rate": 8.41647108954446e-06, "loss": 0.7008, "step": 1252 }, { "epoch": 1.6755852842809364, "grad_norm": 1.0370037602812983, "learning_rate": 8.413062275275737e-06, "loss": 0.6845, "step": 1253 }, { "epoch": 1.676923076923077, "grad_norm": 0.8055757352305363, "learning_rate": 8.409650487864662e-06, "loss": 0.7641, "step": 1254 }, { "epoch": 1.6782608695652175, "grad_norm": 1.290587580257566, "learning_rate": 8.40623573028327e-06, "loss": 0.6878, "step": 1255 }, { "epoch": 1.6795986622073578, "grad_norm": 0.7902829685572961, "learning_rate": 8.402818005506181e-06, "loss": 0.7631, "step": 1256 }, { "epoch": 1.6809364548494983, "grad_norm": 1.1997519972892676, "learning_rate": 8.399397316510596e-06, "loss": 0.7484, "step": 1257 }, { "epoch": 1.6822742474916388, "grad_norm": 1.1291317987862215, "learning_rate": 8.395973666276301e-06, "loss": 0.7124, "step": 1258 }, { "epoch": 1.6836120401337793, "grad_norm": 1.0249891213135573, "learning_rate": 8.392547057785662e-06, "loss": 0.6228, "step": 1259 }, { "epoch": 1.6849498327759198, "grad_norm": 0.9911755103005523, "learning_rate": 8.389117494023622e-06, "loss": 0.6554, "step": 1260 }, { "epoch": 1.68628762541806, "grad_norm": 1.439579558166103, "learning_rate": 8.385684977977698e-06, "loss": 0.8368, "step": 1261 }, { "epoch": 1.6876254180602008, "grad_norm": 1.4996964268127602, "learning_rate": 8.382249512637978e-06, "loss": 0.7013, "step": 1262 }, { "epoch": 1.6889632107023411, "grad_norm": 1.41179434970428, "learning_rate": 8.378811100997122e-06, "loss": 0.7726, "step": 1263 }, { "epoch": 1.6903010033444816, "grad_norm": 1.2819783791701294, "learning_rate": 8.375369746050353e-06, "loss": 0.7579, "step": 1264 }, { "epoch": 1.6916387959866221, "grad_norm": 1.0492733380317012, "learning_rate": 8.371925450795458e-06, "loss": 0.5994, "step": 1265 }, { "epoch": 1.6929765886287624, "grad_norm": 1.279662814537098, "learning_rate": 8.368478218232787e-06, "loss": 0.691, "step": 1266 }, { "epoch": 1.6943143812709032, "grad_norm": 1.3519595371203201, "learning_rate": 8.365028051365249e-06, "loss": 0.6159, "step": 1267 }, { "epoch": 1.6956521739130435, "grad_norm": 0.8671637929688717, "learning_rate": 8.361574953198309e-06, "loss": 0.6839, "step": 1268 }, { "epoch": 1.696989966555184, "grad_norm": 1.3538233609572046, "learning_rate": 8.358118926739984e-06, "loss": 0.6951, "step": 1269 }, { "epoch": 1.6983277591973245, "grad_norm": 0.9256919528212204, "learning_rate": 8.354659975000843e-06, "loss": 0.7068, "step": 1270 }, { "epoch": 1.6996655518394648, "grad_norm": 1.123849273976157, "learning_rate": 8.351198100994003e-06, "loss": 0.6546, "step": 1271 }, { "epoch": 1.7010033444816055, "grad_norm": 1.5683208417930234, "learning_rate": 8.347733307735127e-06, "loss": 0.7435, "step": 1272 }, { "epoch": 1.7023411371237458, "grad_norm": 1.1254002716213, "learning_rate": 8.34426559824242e-06, "loss": 0.7069, "step": 1273 }, { "epoch": 1.7036789297658863, "grad_norm": 1.0314883394188856, "learning_rate": 8.340794975536627e-06, "loss": 0.6438, "step": 1274 }, { "epoch": 1.7050167224080268, "grad_norm": 0.8541745665352457, "learning_rate": 8.337321442641036e-06, "loss": 0.6161, "step": 1275 }, { "epoch": 1.706354515050167, "grad_norm": 1.1377476251554504, "learning_rate": 8.33384500258146e-06, "loss": 0.6498, "step": 1276 }, { "epoch": 1.7076923076923078, "grad_norm": 0.8447270881655976, "learning_rate": 8.330365658386252e-06, "loss": 0.6618, "step": 1277 }, { "epoch": 1.7090301003344481, "grad_norm": 1.2146377827489538, "learning_rate": 8.326883413086295e-06, "loss": 0.6026, "step": 1278 }, { "epoch": 1.7103678929765886, "grad_norm": 1.005593199888882, "learning_rate": 8.323398269714994e-06, "loss": 0.6745, "step": 1279 }, { "epoch": 1.7117056856187292, "grad_norm": 1.1191080844886758, "learning_rate": 8.319910231308285e-06, "loss": 0.6493, "step": 1280 }, { "epoch": 1.7130434782608694, "grad_norm": 0.9813679953787331, "learning_rate": 8.316419300904622e-06, "loss": 0.715, "step": 1281 }, { "epoch": 1.7143812709030102, "grad_norm": 1.161464747083413, "learning_rate": 8.312925481544976e-06, "loss": 0.6203, "step": 1282 }, { "epoch": 1.7157190635451505, "grad_norm": 0.9862780078341299, "learning_rate": 8.309428776272838e-06, "loss": 0.6307, "step": 1283 }, { "epoch": 1.717056856187291, "grad_norm": 1.2135506904427869, "learning_rate": 8.305929188134216e-06, "loss": 0.882, "step": 1284 }, { "epoch": 1.7183946488294315, "grad_norm": 1.1653257486704534, "learning_rate": 8.302426720177624e-06, "loss": 0.688, "step": 1285 }, { "epoch": 1.7197324414715718, "grad_norm": 1.1983070076280327, "learning_rate": 8.298921375454083e-06, "loss": 0.6087, "step": 1286 }, { "epoch": 1.7210702341137125, "grad_norm": 1.210999686078487, "learning_rate": 8.295413157017127e-06, "loss": 0.6732, "step": 1287 }, { "epoch": 1.7224080267558528, "grad_norm": 1.1323873380541223, "learning_rate": 8.291902067922791e-06, "loss": 0.694, "step": 1288 }, { "epoch": 1.7237458193979933, "grad_norm": 1.0280023942069392, "learning_rate": 8.288388111229601e-06, "loss": 0.7019, "step": 1289 }, { "epoch": 1.7250836120401338, "grad_norm": 0.9500558132847621, "learning_rate": 8.284871289998599e-06, "loss": 0.7084, "step": 1290 }, { "epoch": 1.7264214046822741, "grad_norm": 0.8811788337898476, "learning_rate": 8.281351607293307e-06, "loss": 0.6599, "step": 1291 }, { "epoch": 1.7277591973244149, "grad_norm": 1.2303190105397437, "learning_rate": 8.277829066179746e-06, "loss": 0.7285, "step": 1292 }, { "epoch": 1.7290969899665551, "grad_norm": 1.5265971962372145, "learning_rate": 8.274303669726427e-06, "loss": 0.8655, "step": 1293 }, { "epoch": 1.7304347826086957, "grad_norm": 1.0166737221069138, "learning_rate": 8.270775421004345e-06, "loss": 0.6909, "step": 1294 }, { "epoch": 1.7317725752508362, "grad_norm": 1.017656614587151, "learning_rate": 8.267244323086985e-06, "loss": 0.6471, "step": 1295 }, { "epoch": 1.7331103678929765, "grad_norm": 1.0635522711250112, "learning_rate": 8.263710379050311e-06, "loss": 0.6797, "step": 1296 }, { "epoch": 1.7344481605351172, "grad_norm": 0.9083946761644286, "learning_rate": 8.260173591972765e-06, "loss": 0.6653, "step": 1297 }, { "epoch": 1.7357859531772575, "grad_norm": 1.0037628614404328, "learning_rate": 8.256633964935268e-06, "loss": 0.5732, "step": 1298 }, { "epoch": 1.737123745819398, "grad_norm": 0.938211321792516, "learning_rate": 8.25309150102121e-06, "loss": 0.6331, "step": 1299 }, { "epoch": 1.7384615384615385, "grad_norm": 1.1880950186429209, "learning_rate": 8.249546203316461e-06, "loss": 0.6965, "step": 1300 }, { "epoch": 1.7397993311036788, "grad_norm": 1.0645623359157614, "learning_rate": 8.245998074909354e-06, "loss": 0.6573, "step": 1301 }, { "epoch": 1.7411371237458195, "grad_norm": 1.115956567353716, "learning_rate": 8.242447118890686e-06, "loss": 0.6196, "step": 1302 }, { "epoch": 1.7424749163879598, "grad_norm": 0.9197682284932021, "learning_rate": 8.23889333835372e-06, "loss": 0.7524, "step": 1303 }, { "epoch": 1.7438127090301003, "grad_norm": 1.2216153080177594, "learning_rate": 8.235336736394179e-06, "loss": 0.6396, "step": 1304 }, { "epoch": 1.7451505016722408, "grad_norm": 0.9390027304502073, "learning_rate": 8.231777316110245e-06, "loss": 0.6799, "step": 1305 }, { "epoch": 1.7464882943143811, "grad_norm": 0.9096990620174733, "learning_rate": 8.228215080602554e-06, "loss": 0.5502, "step": 1306 }, { "epoch": 1.7478260869565219, "grad_norm": 0.7285025433487885, "learning_rate": 8.22465003297419e-06, "loss": 0.6306, "step": 1307 }, { "epoch": 1.7491638795986622, "grad_norm": 1.0522758674454233, "learning_rate": 8.221082176330697e-06, "loss": 0.6751, "step": 1308 }, { "epoch": 1.7505016722408027, "grad_norm": 1.2337856918209806, "learning_rate": 8.217511513780056e-06, "loss": 0.7012, "step": 1309 }, { "epoch": 1.7518394648829432, "grad_norm": 0.6971770630725438, "learning_rate": 8.213938048432697e-06, "loss": 0.6017, "step": 1310 }, { "epoch": 1.7531772575250835, "grad_norm": 1.2113809895686838, "learning_rate": 8.210361783401491e-06, "loss": 0.8216, "step": 1311 }, { "epoch": 1.7545150501672242, "grad_norm": 1.222057870486896, "learning_rate": 8.206782721801747e-06, "loss": 0.7042, "step": 1312 }, { "epoch": 1.7558528428093645, "grad_norm": 1.3202174718165067, "learning_rate": 8.203200866751212e-06, "loss": 0.6803, "step": 1313 }, { "epoch": 1.757190635451505, "grad_norm": 1.1812844162686669, "learning_rate": 8.19961622137006e-06, "loss": 0.6911, "step": 1314 }, { "epoch": 1.7585284280936455, "grad_norm": 1.0083438081839693, "learning_rate": 8.196028788780905e-06, "loss": 0.691, "step": 1315 }, { "epoch": 1.7598662207357858, "grad_norm": 0.9658787539534048, "learning_rate": 8.192438572108786e-06, "loss": 0.6812, "step": 1316 }, { "epoch": 1.7612040133779265, "grad_norm": 1.2392576949805363, "learning_rate": 8.188845574481162e-06, "loss": 0.5375, "step": 1317 }, { "epoch": 1.7625418060200668, "grad_norm": 0.8024182378041277, "learning_rate": 8.185249799027919e-06, "loss": 0.6065, "step": 1318 }, { "epoch": 1.7638795986622073, "grad_norm": 0.8734586359462746, "learning_rate": 8.181651248881364e-06, "loss": 0.5588, "step": 1319 }, { "epoch": 1.7652173913043478, "grad_norm": 1.1553401878057792, "learning_rate": 8.178049927176217e-06, "loss": 0.7469, "step": 1320 }, { "epoch": 1.7665551839464881, "grad_norm": 1.1972592194517722, "learning_rate": 8.174445837049614e-06, "loss": 0.6503, "step": 1321 }, { "epoch": 1.7678929765886289, "grad_norm": 1.0918952180271462, "learning_rate": 8.170838981641108e-06, "loss": 0.6722, "step": 1322 }, { "epoch": 1.7692307692307692, "grad_norm": 1.1349403058355023, "learning_rate": 8.167229364092648e-06, "loss": 0.5835, "step": 1323 }, { "epoch": 1.7705685618729097, "grad_norm": 0.9865467190904511, "learning_rate": 8.163616987548605e-06, "loss": 0.5532, "step": 1324 }, { "epoch": 1.7719063545150502, "grad_norm": 1.1585172051625774, "learning_rate": 8.16000185515574e-06, "loss": 0.6793, "step": 1325 }, { "epoch": 1.7732441471571905, "grad_norm": 1.125890230847332, "learning_rate": 8.15638397006322e-06, "loss": 0.8068, "step": 1326 }, { "epoch": 1.7745819397993312, "grad_norm": 1.1895839350426822, "learning_rate": 8.152763335422612e-06, "loss": 0.6918, "step": 1327 }, { "epoch": 1.7759197324414715, "grad_norm": 1.0447054288129605, "learning_rate": 8.14913995438788e-06, "loss": 0.5773, "step": 1328 }, { "epoch": 1.777257525083612, "grad_norm": 0.8169352309821328, "learning_rate": 8.145513830115367e-06, "loss": 0.6024, "step": 1329 }, { "epoch": 1.7785953177257525, "grad_norm": 0.8912981218816205, "learning_rate": 8.141884965763822e-06, "loss": 0.6753, "step": 1330 }, { "epoch": 1.7799331103678928, "grad_norm": 0.9466013531223565, "learning_rate": 8.138253364494374e-06, "loss": 0.7129, "step": 1331 }, { "epoch": 1.7812709030100335, "grad_norm": 1.1408355354562925, "learning_rate": 8.134619029470535e-06, "loss": 0.6817, "step": 1332 }, { "epoch": 1.7826086956521738, "grad_norm": 1.063259670146752, "learning_rate": 8.130981963858195e-06, "loss": 0.6536, "step": 1333 }, { "epoch": 1.7839464882943143, "grad_norm": 1.0091966661458134, "learning_rate": 8.127342170825635e-06, "loss": 0.6737, "step": 1334 }, { "epoch": 1.7852842809364549, "grad_norm": 1.0560136436459717, "learning_rate": 8.1236996535435e-06, "loss": 0.694, "step": 1335 }, { "epoch": 1.7866220735785954, "grad_norm": 1.1900623497813172, "learning_rate": 8.120054415184811e-06, "loss": 0.6657, "step": 1336 }, { "epoch": 1.7879598662207359, "grad_norm": 0.8708284000788681, "learning_rate": 8.116406458924964e-06, "loss": 0.6576, "step": 1337 }, { "epoch": 1.7892976588628762, "grad_norm": 1.0881391731915537, "learning_rate": 8.112755787941718e-06, "loss": 0.6762, "step": 1338 }, { "epoch": 1.7906354515050167, "grad_norm": 0.9270768650166734, "learning_rate": 8.109102405415195e-06, "loss": 0.7513, "step": 1339 }, { "epoch": 1.7919732441471572, "grad_norm": 1.4231560041294915, "learning_rate": 8.105446314527885e-06, "loss": 0.5858, "step": 1340 }, { "epoch": 1.7933110367892977, "grad_norm": 1.4350313371704313, "learning_rate": 8.101787518464634e-06, "loss": 0.7569, "step": 1341 }, { "epoch": 1.7946488294314382, "grad_norm": 1.3334554905378033, "learning_rate": 8.098126020412644e-06, "loss": 0.6022, "step": 1342 }, { "epoch": 1.7959866220735785, "grad_norm": 1.2285250266361707, "learning_rate": 8.094461823561473e-06, "loss": 0.7759, "step": 1343 }, { "epoch": 1.797324414715719, "grad_norm": 1.2116708126136229, "learning_rate": 8.090794931103026e-06, "loss": 0.586, "step": 1344 }, { "epoch": 1.7986622073578595, "grad_norm": 1.1747528330571644, "learning_rate": 8.087125346231562e-06, "loss": 0.7826, "step": 1345 }, { "epoch": 1.8, "grad_norm": 1.7189606097136798, "learning_rate": 8.083453072143678e-06, "loss": 0.6958, "step": 1346 }, { "epoch": 1.8013377926421406, "grad_norm": 1.1596022602938065, "learning_rate": 8.079778112038318e-06, "loss": 0.5607, "step": 1347 }, { "epoch": 1.8026755852842808, "grad_norm": 0.9660930092660649, "learning_rate": 8.07610046911677e-06, "loss": 0.7424, "step": 1348 }, { "epoch": 1.8040133779264214, "grad_norm": 0.9046079931977208, "learning_rate": 8.072420146582649e-06, "loss": 0.512, "step": 1349 }, { "epoch": 1.8053511705685619, "grad_norm": 1.5830105963210865, "learning_rate": 8.068737147641913e-06, "loss": 0.7568, "step": 1350 }, { "epoch": 1.8066889632107024, "grad_norm": 0.9326609594498927, "learning_rate": 8.065051475502847e-06, "loss": 0.6928, "step": 1351 }, { "epoch": 1.808026755852843, "grad_norm": 1.5581586309833364, "learning_rate": 8.061363133376065e-06, "loss": 0.8867, "step": 1352 }, { "epoch": 1.8093645484949832, "grad_norm": 1.0238289975319834, "learning_rate": 8.057672124474508e-06, "loss": 0.7015, "step": 1353 }, { "epoch": 1.8107023411371237, "grad_norm": 0.8368308702688353, "learning_rate": 8.05397845201344e-06, "loss": 0.6706, "step": 1354 }, { "epoch": 1.8120401337792642, "grad_norm": 0.9764333839303804, "learning_rate": 8.050282119210443e-06, "loss": 0.6092, "step": 1355 }, { "epoch": 1.8133779264214047, "grad_norm": 1.041132682242268, "learning_rate": 8.046583129285422e-06, "loss": 0.685, "step": 1356 }, { "epoch": 1.8147157190635452, "grad_norm": 1.3020134298741621, "learning_rate": 8.042881485460591e-06, "loss": 0.701, "step": 1357 }, { "epoch": 1.8160535117056855, "grad_norm": 1.4804126443023284, "learning_rate": 8.039177190960476e-06, "loss": 0.6651, "step": 1358 }, { "epoch": 1.8173913043478263, "grad_norm": 1.3613974961561806, "learning_rate": 8.035470249011916e-06, "loss": 0.6187, "step": 1359 }, { "epoch": 1.8187290969899665, "grad_norm": 0.9289420530408454, "learning_rate": 8.031760662844053e-06, "loss": 0.7077, "step": 1360 }, { "epoch": 1.820066889632107, "grad_norm": 1.2078955658321582, "learning_rate": 8.028048435688333e-06, "loss": 0.706, "step": 1361 }, { "epoch": 1.8214046822742476, "grad_norm": 1.4157488965758152, "learning_rate": 8.024333570778507e-06, "loss": 0.6818, "step": 1362 }, { "epoch": 1.8227424749163879, "grad_norm": 1.14313569819241, "learning_rate": 8.020616071350613e-06, "loss": 0.5984, "step": 1363 }, { "epoch": 1.8240802675585286, "grad_norm": 0.8106201347732511, "learning_rate": 8.016895940642994e-06, "loss": 0.5942, "step": 1364 }, { "epoch": 1.8254180602006689, "grad_norm": 1.6470961663911678, "learning_rate": 8.013173181896283e-06, "loss": 0.6032, "step": 1365 }, { "epoch": 1.8267558528428094, "grad_norm": 1.0039659502750315, "learning_rate": 8.0094477983534e-06, "loss": 0.6131, "step": 1366 }, { "epoch": 1.82809364548495, "grad_norm": 0.9411027518881977, "learning_rate": 8.005719793259552e-06, "loss": 0.7715, "step": 1367 }, { "epoch": 1.8294314381270902, "grad_norm": 1.172524930437293, "learning_rate": 8.00198916986223e-06, "loss": 0.7296, "step": 1368 }, { "epoch": 1.830769230769231, "grad_norm": 0.9739323694197936, "learning_rate": 7.998255931411208e-06, "loss": 0.7088, "step": 1369 }, { "epoch": 1.8321070234113712, "grad_norm": 1.0465782711863374, "learning_rate": 7.994520081158534e-06, "loss": 0.6081, "step": 1370 }, { "epoch": 1.8334448160535117, "grad_norm": 1.0918748431368048, "learning_rate": 7.990781622358535e-06, "loss": 0.5383, "step": 1371 }, { "epoch": 1.8347826086956522, "grad_norm": 0.8582212059923242, "learning_rate": 7.987040558267807e-06, "loss": 0.6092, "step": 1372 }, { "epoch": 1.8361204013377925, "grad_norm": 1.1906803594760935, "learning_rate": 7.983296892145218e-06, "loss": 0.7482, "step": 1373 }, { "epoch": 1.8374581939799333, "grad_norm": 0.9943860458290198, "learning_rate": 7.979550627251901e-06, "loss": 0.7417, "step": 1374 }, { "epoch": 1.8387959866220736, "grad_norm": 1.2357366272416488, "learning_rate": 7.975801766851255e-06, "loss": 0.6794, "step": 1375 }, { "epoch": 1.840133779264214, "grad_norm": 0.900877460419508, "learning_rate": 7.972050314208934e-06, "loss": 0.6574, "step": 1376 }, { "epoch": 1.8414715719063546, "grad_norm": 1.053788644009072, "learning_rate": 7.968296272592862e-06, "loss": 0.6452, "step": 1377 }, { "epoch": 1.8428093645484949, "grad_norm": 1.334506306837904, "learning_rate": 7.964539645273204e-06, "loss": 0.6012, "step": 1378 }, { "epoch": 1.8441471571906356, "grad_norm": 1.0019290307525572, "learning_rate": 7.960780435522387e-06, "loss": 0.8678, "step": 1379 }, { "epoch": 1.845484949832776, "grad_norm": 1.5117605916325572, "learning_rate": 7.957018646615085e-06, "loss": 0.7437, "step": 1380 }, { "epoch": 1.8468227424749164, "grad_norm": 1.0628127121214863, "learning_rate": 7.953254281828217e-06, "loss": 0.5736, "step": 1381 }, { "epoch": 1.848160535117057, "grad_norm": 0.9996711857265214, "learning_rate": 7.94948734444095e-06, "loss": 0.7965, "step": 1382 }, { "epoch": 1.8494983277591972, "grad_norm": 1.285334567308556, "learning_rate": 7.945717837734688e-06, "loss": 0.6748, "step": 1383 }, { "epoch": 1.850836120401338, "grad_norm": 0.9346636646044831, "learning_rate": 7.941945764993074e-06, "loss": 0.5904, "step": 1384 }, { "epoch": 1.8521739130434782, "grad_norm": 1.0524714326634597, "learning_rate": 7.938171129501988e-06, "loss": 0.699, "step": 1385 }, { "epoch": 1.8535117056856187, "grad_norm": 1.2644524434255733, "learning_rate": 7.934393934549542e-06, "loss": 0.8016, "step": 1386 }, { "epoch": 1.8548494983277592, "grad_norm": 1.1074708380194944, "learning_rate": 7.930614183426074e-06, "loss": 0.6581, "step": 1387 }, { "epoch": 1.8561872909698995, "grad_norm": 0.7939649877833284, "learning_rate": 7.926831879424154e-06, "loss": 0.64, "step": 1388 }, { "epoch": 1.8575250836120403, "grad_norm": 1.2932630773235134, "learning_rate": 7.923047025838573e-06, "loss": 0.7292, "step": 1389 }, { "epoch": 1.8588628762541806, "grad_norm": 1.55321272835908, "learning_rate": 7.919259625966342e-06, "loss": 0.654, "step": 1390 }, { "epoch": 1.860200668896321, "grad_norm": 1.0674907975142576, "learning_rate": 7.915469683106694e-06, "loss": 0.6516, "step": 1391 }, { "epoch": 1.8615384615384616, "grad_norm": 1.0952732959804954, "learning_rate": 7.91167720056107e-06, "loss": 0.7235, "step": 1392 }, { "epoch": 1.8628762541806019, "grad_norm": 1.7313249208382926, "learning_rate": 7.907882181633134e-06, "loss": 0.6918, "step": 1393 }, { "epoch": 1.8642140468227426, "grad_norm": 0.9628846997865974, "learning_rate": 7.90408462962875e-06, "loss": 0.6997, "step": 1394 }, { "epoch": 1.865551839464883, "grad_norm": 1.4209253658667766, "learning_rate": 7.900284547855992e-06, "loss": 0.8249, "step": 1395 }, { "epoch": 1.8668896321070234, "grad_norm": 1.1129246263483963, "learning_rate": 7.896481939625139e-06, "loss": 0.6615, "step": 1396 }, { "epoch": 1.868227424749164, "grad_norm": 0.8456325496255075, "learning_rate": 7.892676808248666e-06, "loss": 0.6377, "step": 1397 }, { "epoch": 1.8695652173913042, "grad_norm": 0.9076063811505003, "learning_rate": 7.888869157041257e-06, "loss": 0.6405, "step": 1398 }, { "epoch": 1.870903010033445, "grad_norm": 1.067072526645741, "learning_rate": 7.885058989319776e-06, "loss": 0.6732, "step": 1399 }, { "epoch": 1.8722408026755852, "grad_norm": 0.9189464844222892, "learning_rate": 7.88124630840329e-06, "loss": 0.7163, "step": 1400 }, { "epoch": 1.8735785953177257, "grad_norm": 0.9478541191816333, "learning_rate": 7.87743111761305e-06, "loss": 0.5396, "step": 1401 }, { "epoch": 1.8749163879598663, "grad_norm": 0.8583301135881621, "learning_rate": 7.8736134202725e-06, "loss": 0.6772, "step": 1402 }, { "epoch": 1.8762541806020065, "grad_norm": 1.0480428503731638, "learning_rate": 7.869793219707258e-06, "loss": 0.7069, "step": 1403 }, { "epoch": 1.8775919732441473, "grad_norm": 0.9353699273621536, "learning_rate": 7.865970519245129e-06, "loss": 0.6319, "step": 1404 }, { "epoch": 1.8789297658862876, "grad_norm": 0.6250881283553946, "learning_rate": 7.862145322216092e-06, "loss": 0.6204, "step": 1405 }, { "epoch": 1.880267558528428, "grad_norm": 0.9435097029197781, "learning_rate": 7.858317631952307e-06, "loss": 0.6457, "step": 1406 }, { "epoch": 1.8816053511705686, "grad_norm": 1.1332950353225657, "learning_rate": 7.8544874517881e-06, "loss": 0.7218, "step": 1407 }, { "epoch": 1.8829431438127089, "grad_norm": 1.1564229407631395, "learning_rate": 7.850654785059966e-06, "loss": 0.6065, "step": 1408 }, { "epoch": 1.8842809364548496, "grad_norm": 1.1101590778043937, "learning_rate": 7.846819635106569e-06, "loss": 0.673, "step": 1409 }, { "epoch": 1.88561872909699, "grad_norm": 0.8919150899584872, "learning_rate": 7.842982005268733e-06, "loss": 0.577, "step": 1410 }, { "epoch": 1.8869565217391304, "grad_norm": 1.3350083425111947, "learning_rate": 7.83914189888945e-06, "loss": 0.7626, "step": 1411 }, { "epoch": 1.888294314381271, "grad_norm": 0.8707286453412452, "learning_rate": 7.835299319313854e-06, "loss": 0.6975, "step": 1412 }, { "epoch": 1.8896321070234112, "grad_norm": 0.7988152550669145, "learning_rate": 7.831454269889251e-06, "loss": 0.63, "step": 1413 }, { "epoch": 1.890969899665552, "grad_norm": 0.9350185012249522, "learning_rate": 7.827606753965086e-06, "loss": 0.6214, "step": 1414 }, { "epoch": 1.8923076923076922, "grad_norm": 1.080632717081062, "learning_rate": 7.823756774892961e-06, "loss": 0.6314, "step": 1415 }, { "epoch": 1.8936454849498328, "grad_norm": 1.0070056967977856, "learning_rate": 7.819904336026615e-06, "loss": 0.7311, "step": 1416 }, { "epoch": 1.8949832775919733, "grad_norm": 1.494361056778783, "learning_rate": 7.816049440721937e-06, "loss": 0.6373, "step": 1417 }, { "epoch": 1.8963210702341136, "grad_norm": 1.2558163480255398, "learning_rate": 7.812192092336951e-06, "loss": 0.6882, "step": 1418 }, { "epoch": 1.8976588628762543, "grad_norm": 1.072521174107362, "learning_rate": 7.808332294231824e-06, "loss": 0.6326, "step": 1419 }, { "epoch": 1.8989966555183946, "grad_norm": 1.03440609917902, "learning_rate": 7.80447004976885e-06, "loss": 0.777, "step": 1420 }, { "epoch": 1.900334448160535, "grad_norm": 1.1288422889254945, "learning_rate": 7.800605362312456e-06, "loss": 0.553, "step": 1421 }, { "epoch": 1.9016722408026756, "grad_norm": 0.8852400117232754, "learning_rate": 7.796738235229203e-06, "loss": 0.5812, "step": 1422 }, { "epoch": 1.903010033444816, "grad_norm": 1.0533622808341494, "learning_rate": 7.792868671887768e-06, "loss": 0.6621, "step": 1423 }, { "epoch": 1.9043478260869566, "grad_norm": 1.7523286895325931, "learning_rate": 7.788996675658955e-06, "loss": 0.8085, "step": 1424 }, { "epoch": 1.905685618729097, "grad_norm": 1.3620575874829979, "learning_rate": 7.785122249915688e-06, "loss": 0.7357, "step": 1425 }, { "epoch": 1.9070234113712374, "grad_norm": 1.2141427133657878, "learning_rate": 7.781245398033009e-06, "loss": 0.7507, "step": 1426 }, { "epoch": 1.908361204013378, "grad_norm": 1.1751725476980255, "learning_rate": 7.777366123388065e-06, "loss": 0.7017, "step": 1427 }, { "epoch": 1.9096989966555182, "grad_norm": 0.9423263983039508, "learning_rate": 7.773484429360122e-06, "loss": 0.6772, "step": 1428 }, { "epoch": 1.911036789297659, "grad_norm": 1.2634886361012672, "learning_rate": 7.769600319330553e-06, "loss": 0.6951, "step": 1429 }, { "epoch": 1.9123745819397993, "grad_norm": 0.9760165025491423, "learning_rate": 7.765713796682829e-06, "loss": 0.6659, "step": 1430 }, { "epoch": 1.9137123745819398, "grad_norm": 1.1837640559334706, "learning_rate": 7.76182486480253e-06, "loss": 0.6684, "step": 1431 }, { "epoch": 1.9150501672240803, "grad_norm": 1.2652947059895274, "learning_rate": 7.75793352707733e-06, "loss": 0.7889, "step": 1432 }, { "epoch": 1.9163879598662206, "grad_norm": 0.7223187608495987, "learning_rate": 7.754039786897004e-06, "loss": 0.6623, "step": 1433 }, { "epoch": 1.9177257525083613, "grad_norm": 1.409299306809432, "learning_rate": 7.750143647653409e-06, "loss": 0.6606, "step": 1434 }, { "epoch": 1.9190635451505016, "grad_norm": 1.3437388332638438, "learning_rate": 7.746245112740507e-06, "loss": 0.7083, "step": 1435 }, { "epoch": 1.920401337792642, "grad_norm": 1.103829150260525, "learning_rate": 7.742344185554335e-06, "loss": 0.6504, "step": 1436 }, { "epoch": 1.9217391304347826, "grad_norm": 1.16672429370036, "learning_rate": 7.738440869493018e-06, "loss": 0.7122, "step": 1437 }, { "epoch": 1.9230769230769231, "grad_norm": 1.4066255566951995, "learning_rate": 7.734535167956761e-06, "loss": 0.7781, "step": 1438 }, { "epoch": 1.9244147157190636, "grad_norm": 1.2659279424091157, "learning_rate": 7.73062708434785e-06, "loss": 0.6917, "step": 1439 }, { "epoch": 1.925752508361204, "grad_norm": 0.8935132983181482, "learning_rate": 7.726716622070643e-06, "loss": 0.6318, "step": 1440 }, { "epoch": 1.9270903010033444, "grad_norm": 1.012743732810749, "learning_rate": 7.722803784531572e-06, "loss": 0.6143, "step": 1441 }, { "epoch": 1.928428093645485, "grad_norm": 1.0527688026439388, "learning_rate": 7.718888575139134e-06, "loss": 0.5879, "step": 1442 }, { "epoch": 1.9297658862876255, "grad_norm": 0.7781208406683162, "learning_rate": 7.714970997303898e-06, "loss": 0.6094, "step": 1443 }, { "epoch": 1.931103678929766, "grad_norm": 0.7529006881647116, "learning_rate": 7.711051054438491e-06, "loss": 0.6859, "step": 1444 }, { "epoch": 1.9324414715719063, "grad_norm": 0.8732938756322679, "learning_rate": 7.707128749957606e-06, "loss": 0.6845, "step": 1445 }, { "epoch": 1.9337792642140468, "grad_norm": 0.9534611093057073, "learning_rate": 7.703204087277989e-06, "loss": 0.6192, "step": 1446 }, { "epoch": 1.9351170568561873, "grad_norm": 1.0698283864886995, "learning_rate": 7.699277069818439e-06, "loss": 0.7163, "step": 1447 }, { "epoch": 1.9364548494983278, "grad_norm": 1.1727984673681453, "learning_rate": 7.69534770099981e-06, "loss": 0.7059, "step": 1448 }, { "epoch": 1.9377926421404683, "grad_norm": 1.055328335428356, "learning_rate": 7.691415984244998e-06, "loss": 0.6996, "step": 1449 }, { "epoch": 1.9391304347826086, "grad_norm": 0.7087061808046973, "learning_rate": 7.687481922978955e-06, "loss": 0.623, "step": 1450 }, { "epoch": 1.9404682274247491, "grad_norm": 1.112930859135998, "learning_rate": 7.683545520628667e-06, "loss": 0.6422, "step": 1451 }, { "epoch": 1.9418060200668896, "grad_norm": 0.8683439827318398, "learning_rate": 7.679606780623162e-06, "loss": 0.6697, "step": 1452 }, { "epoch": 1.9431438127090301, "grad_norm": 1.0787560588076397, "learning_rate": 7.675665706393502e-06, "loss": 0.6852, "step": 1453 }, { "epoch": 1.9444816053511706, "grad_norm": 0.969730458986582, "learning_rate": 7.671722301372788e-06, "loss": 0.594, "step": 1454 }, { "epoch": 1.945819397993311, "grad_norm": 0.8463374707736763, "learning_rate": 7.667776568996143e-06, "loss": 0.6628, "step": 1455 }, { "epoch": 1.9471571906354515, "grad_norm": 0.9610821763802976, "learning_rate": 7.663828512700724e-06, "loss": 0.6431, "step": 1456 }, { "epoch": 1.948494983277592, "grad_norm": 1.0753496873915218, "learning_rate": 7.65987813592571e-06, "loss": 0.5873, "step": 1457 }, { "epoch": 1.9498327759197325, "grad_norm": 1.2252120321077997, "learning_rate": 7.655925442112303e-06, "loss": 0.7246, "step": 1458 }, { "epoch": 1.951170568561873, "grad_norm": 1.7273712469720797, "learning_rate": 7.651970434703724e-06, "loss": 0.7161, "step": 1459 }, { "epoch": 1.9525083612040133, "grad_norm": 1.156937393618107, "learning_rate": 7.648013117145203e-06, "loss": 0.7451, "step": 1460 }, { "epoch": 1.953846153846154, "grad_norm": 0.9390914797125197, "learning_rate": 7.64405349288399e-06, "loss": 0.6158, "step": 1461 }, { "epoch": 1.9551839464882943, "grad_norm": 0.7476845811632348, "learning_rate": 7.640091565369339e-06, "loss": 0.5478, "step": 1462 }, { "epoch": 1.9565217391304348, "grad_norm": 0.9716283822116364, "learning_rate": 7.636127338052513e-06, "loss": 0.6758, "step": 1463 }, { "epoch": 1.9578595317725753, "grad_norm": 0.7860118792652475, "learning_rate": 7.63216081438678e-06, "loss": 0.5891, "step": 1464 }, { "epoch": 1.9591973244147156, "grad_norm": 0.8381917414416669, "learning_rate": 7.628191997827405e-06, "loss": 0.5523, "step": 1465 }, { "epoch": 1.9605351170568563, "grad_norm": 1.166616765529657, "learning_rate": 7.624220891831653e-06, "loss": 0.7222, "step": 1466 }, { "epoch": 1.9618729096989966, "grad_norm": 0.8419175342549502, "learning_rate": 7.62024749985878e-06, "loss": 0.6039, "step": 1467 }, { "epoch": 1.9632107023411371, "grad_norm": 1.060896270842127, "learning_rate": 7.616271825370037e-06, "loss": 0.7362, "step": 1468 }, { "epoch": 1.9645484949832777, "grad_norm": 0.8316803817380303, "learning_rate": 7.612293871828662e-06, "loss": 0.6129, "step": 1469 }, { "epoch": 1.965886287625418, "grad_norm": 1.1215056392951497, "learning_rate": 7.6083136426998786e-06, "loss": 0.6016, "step": 1470 }, { "epoch": 1.9672240802675587, "grad_norm": 0.7785496300569228, "learning_rate": 7.604331141450889e-06, "loss": 0.6384, "step": 1471 }, { "epoch": 1.968561872909699, "grad_norm": 0.8361903943265091, "learning_rate": 7.600346371550882e-06, "loss": 0.6317, "step": 1472 }, { "epoch": 1.9698996655518395, "grad_norm": 0.7413846968452461, "learning_rate": 7.596359336471015e-06, "loss": 0.7436, "step": 1473 }, { "epoch": 1.97123745819398, "grad_norm": 0.8930263263916729, "learning_rate": 7.592370039684424e-06, "loss": 0.6652, "step": 1474 }, { "epoch": 1.9725752508361203, "grad_norm": 0.8509219961254768, "learning_rate": 7.588378484666214e-06, "loss": 0.704, "step": 1475 }, { "epoch": 1.973913043478261, "grad_norm": 1.086643025696789, "learning_rate": 7.584384674893454e-06, "loss": 0.6845, "step": 1476 }, { "epoch": 1.9752508361204013, "grad_norm": 0.7581956017530059, "learning_rate": 7.58038861384518e-06, "loss": 0.7724, "step": 1477 }, { "epoch": 1.9765886287625418, "grad_norm": 1.106280434530241, "learning_rate": 7.576390305002389e-06, "loss": 0.752, "step": 1478 }, { "epoch": 1.9779264214046823, "grad_norm": 1.0256749050098255, "learning_rate": 7.572389751848037e-06, "loss": 0.6075, "step": 1479 }, { "epoch": 1.9792642140468226, "grad_norm": 1.1124133284991604, "learning_rate": 7.568386957867033e-06, "loss": 0.7619, "step": 1480 }, { "epoch": 1.9806020066889634, "grad_norm": 1.00969033748569, "learning_rate": 7.564381926546238e-06, "loss": 0.7117, "step": 1481 }, { "epoch": 1.9819397993311036, "grad_norm": 1.2875715758412214, "learning_rate": 7.560374661374463e-06, "loss": 0.724, "step": 1482 }, { "epoch": 1.9832775919732442, "grad_norm": 1.2486450502275517, "learning_rate": 7.556365165842466e-06, "loss": 0.7488, "step": 1483 }, { "epoch": 1.9846153846153847, "grad_norm": 1.267671534017887, "learning_rate": 7.552353443442944e-06, "loss": 0.6743, "step": 1484 }, { "epoch": 1.985953177257525, "grad_norm": 0.9524294046074969, "learning_rate": 7.548339497670538e-06, "loss": 0.6642, "step": 1485 }, { "epoch": 1.9872909698996657, "grad_norm": 1.0359292699753262, "learning_rate": 7.544323332021826e-06, "loss": 0.6528, "step": 1486 }, { "epoch": 1.988628762541806, "grad_norm": 1.3492221246993537, "learning_rate": 7.540304949995314e-06, "loss": 0.7182, "step": 1487 }, { "epoch": 1.9899665551839465, "grad_norm": 1.6001960800491086, "learning_rate": 7.536284355091443e-06, "loss": 0.7338, "step": 1488 }, { "epoch": 1.991304347826087, "grad_norm": 1.3042784686697, "learning_rate": 7.532261550812585e-06, "loss": 0.6164, "step": 1489 }, { "epoch": 1.9926421404682273, "grad_norm": 0.8771048672324426, "learning_rate": 7.528236540663031e-06, "loss": 0.6301, "step": 1490 }, { "epoch": 1.993979933110368, "grad_norm": 1.1969891948475768, "learning_rate": 7.524209328148995e-06, "loss": 0.776, "step": 1491 }, { "epoch": 1.9953177257525083, "grad_norm": 1.4246382308531766, "learning_rate": 7.520179916778608e-06, "loss": 0.7189, "step": 1492 }, { "epoch": 1.9966555183946488, "grad_norm": 1.0036400645326073, "learning_rate": 7.516148310061921e-06, "loss": 0.6898, "step": 1493 }, { "epoch": 1.9979933110367893, "grad_norm": 0.9771780466831826, "learning_rate": 7.512114511510893e-06, "loss": 0.5638, "step": 1494 }, { "epoch": 1.9993311036789296, "grad_norm": 0.9375569492499622, "learning_rate": 7.508078524639397e-06, "loss": 0.7128, "step": 1495 }, { "epoch": 2.0, "grad_norm": 1.890171917332043, "learning_rate": 7.504040352963206e-06, "loss": 0.7768, "step": 1496 }, { "epoch": 2.0013377926421403, "grad_norm": 1.3201514811381077, "learning_rate": 7.500000000000001e-06, "loss": 0.5511, "step": 1497 }, { "epoch": 2.002675585284281, "grad_norm": 1.324047496413525, "learning_rate": 7.495957469269361e-06, "loss": 0.6152, "step": 1498 }, { "epoch": 2.0040133779264213, "grad_norm": 0.7924293852627166, "learning_rate": 7.491912764292764e-06, "loss": 0.5801, "step": 1499 }, { "epoch": 2.005351170568562, "grad_norm": 0.9139283616318002, "learning_rate": 7.487865888593579e-06, "loss": 0.5192, "step": 1500 }, { "epoch": 2.0066889632107023, "grad_norm": 1.049987832622251, "learning_rate": 7.483816845697069e-06, "loss": 0.5392, "step": 1501 }, { "epoch": 2.0080267558528426, "grad_norm": 0.9631382430493614, "learning_rate": 7.479765639130384e-06, "loss": 0.5397, "step": 1502 }, { "epoch": 2.0093645484949834, "grad_norm": 0.962319951777525, "learning_rate": 7.4757122724225575e-06, "loss": 0.6094, "step": 1503 }, { "epoch": 2.0107023411371236, "grad_norm": 0.9590720171190331, "learning_rate": 7.471656749104503e-06, "loss": 0.5776, "step": 1504 }, { "epoch": 2.0120401337792644, "grad_norm": 1.4145883761803857, "learning_rate": 7.467599072709019e-06, "loss": 0.5054, "step": 1505 }, { "epoch": 2.0133779264214047, "grad_norm": 1.0705962296641358, "learning_rate": 7.463539246770775e-06, "loss": 0.5967, "step": 1506 }, { "epoch": 2.014715719063545, "grad_norm": 1.1621350843491547, "learning_rate": 7.459477274826312e-06, "loss": 0.5972, "step": 1507 }, { "epoch": 2.0160535117056857, "grad_norm": 1.1603586086737459, "learning_rate": 7.4554131604140425e-06, "loss": 0.7248, "step": 1508 }, { "epoch": 2.017391304347826, "grad_norm": 1.2370808537695654, "learning_rate": 7.451346907074245e-06, "loss": 0.544, "step": 1509 }, { "epoch": 2.0187290969899667, "grad_norm": 1.1365920170126182, "learning_rate": 7.447278518349062e-06, "loss": 0.6608, "step": 1510 }, { "epoch": 2.020066889632107, "grad_norm": 1.0358484829384536, "learning_rate": 7.443207997782495e-06, "loss": 0.466, "step": 1511 }, { "epoch": 2.0214046822742473, "grad_norm": 1.1312788200390527, "learning_rate": 7.439135348920403e-06, "loss": 0.6615, "step": 1512 }, { "epoch": 2.022742474916388, "grad_norm": 1.0508661848110417, "learning_rate": 7.435060575310498e-06, "loss": 0.6031, "step": 1513 }, { "epoch": 2.0240802675585283, "grad_norm": 1.0849902313762032, "learning_rate": 7.430983680502344e-06, "loss": 0.6027, "step": 1514 }, { "epoch": 2.025418060200669, "grad_norm": 1.0108909680855571, "learning_rate": 7.426904668047352e-06, "loss": 0.59, "step": 1515 }, { "epoch": 2.0267558528428093, "grad_norm": 0.9919174756112618, "learning_rate": 7.4228235414987805e-06, "loss": 0.5038, "step": 1516 }, { "epoch": 2.0280936454849496, "grad_norm": 0.9538504427473871, "learning_rate": 7.418740304411725e-06, "loss": 0.6052, "step": 1517 }, { "epoch": 2.0294314381270904, "grad_norm": 1.2273994944822753, "learning_rate": 7.4146549603431225e-06, "loss": 0.6136, "step": 1518 }, { "epoch": 2.0307692307692307, "grad_norm": 1.0944437286363446, "learning_rate": 7.4105675128517456e-06, "loss": 0.5432, "step": 1519 }, { "epoch": 2.0321070234113714, "grad_norm": 1.1503526361086043, "learning_rate": 7.4064779654981966e-06, "loss": 0.5398, "step": 1520 }, { "epoch": 2.0334448160535117, "grad_norm": 1.1262696216271244, "learning_rate": 7.40238632184491e-06, "loss": 0.5201, "step": 1521 }, { "epoch": 2.034782608695652, "grad_norm": 1.0951566356492122, "learning_rate": 7.398292585456144e-06, "loss": 0.5898, "step": 1522 }, { "epoch": 2.0361204013377927, "grad_norm": 1.1998340277009922, "learning_rate": 7.39419675989798e-06, "loss": 0.5337, "step": 1523 }, { "epoch": 2.037458193979933, "grad_norm": 1.0047730383435611, "learning_rate": 7.390098848738324e-06, "loss": 0.5414, "step": 1524 }, { "epoch": 2.0387959866220737, "grad_norm": 1.1305902733470086, "learning_rate": 7.385998855546892e-06, "loss": 0.7158, "step": 1525 }, { "epoch": 2.040133779264214, "grad_norm": 0.7469600042279781, "learning_rate": 7.381896783895217e-06, "loss": 0.5355, "step": 1526 }, { "epoch": 2.0414715719063543, "grad_norm": 1.105810953076822, "learning_rate": 7.377792637356644e-06, "loss": 0.5429, "step": 1527 }, { "epoch": 2.042809364548495, "grad_norm": 1.0470494239075099, "learning_rate": 7.373686419506321e-06, "loss": 0.6368, "step": 1528 }, { "epoch": 2.0441471571906353, "grad_norm": 1.379620957332175, "learning_rate": 7.369578133921205e-06, "loss": 0.6052, "step": 1529 }, { "epoch": 2.045484949832776, "grad_norm": 1.0418242191465417, "learning_rate": 7.365467784180051e-06, "loss": 0.5084, "step": 1530 }, { "epoch": 2.0468227424749164, "grad_norm": 1.0157346350992933, "learning_rate": 7.361355373863415e-06, "loss": 0.6038, "step": 1531 }, { "epoch": 2.0481605351170566, "grad_norm": 1.530714367629932, "learning_rate": 7.357240906553644e-06, "loss": 0.5939, "step": 1532 }, { "epoch": 2.0494983277591974, "grad_norm": 0.9430088248168239, "learning_rate": 7.35312438583488e-06, "loss": 0.7076, "step": 1533 }, { "epoch": 2.0508361204013377, "grad_norm": 1.3638216498775717, "learning_rate": 7.349005815293055e-06, "loss": 0.4738, "step": 1534 }, { "epoch": 2.0521739130434784, "grad_norm": 0.9085671789856159, "learning_rate": 7.344885198515881e-06, "loss": 0.5975, "step": 1535 }, { "epoch": 2.0535117056856187, "grad_norm": 1.4775098415934886, "learning_rate": 7.340762539092858e-06, "loss": 0.6388, "step": 1536 }, { "epoch": 2.054849498327759, "grad_norm": 1.3058095493716146, "learning_rate": 7.336637840615265e-06, "loss": 0.5893, "step": 1537 }, { "epoch": 2.0561872909698997, "grad_norm": 1.3131868439242769, "learning_rate": 7.332511106676151e-06, "loss": 0.6028, "step": 1538 }, { "epoch": 2.05752508361204, "grad_norm": 1.0842160650335668, "learning_rate": 7.3283823408703466e-06, "loss": 0.6466, "step": 1539 }, { "epoch": 2.0588628762541807, "grad_norm": 0.8382337290363214, "learning_rate": 7.324251546794449e-06, "loss": 0.541, "step": 1540 }, { "epoch": 2.060200668896321, "grad_norm": 1.2305892493247927, "learning_rate": 7.320118728046818e-06, "loss": 0.5748, "step": 1541 }, { "epoch": 2.0615384615384613, "grad_norm": 0.9548964371873004, "learning_rate": 7.315983888227583e-06, "loss": 0.5648, "step": 1542 }, { "epoch": 2.062876254180602, "grad_norm": 1.1843627661539091, "learning_rate": 7.3118470309386325e-06, "loss": 0.5498, "step": 1543 }, { "epoch": 2.0642140468227423, "grad_norm": 1.2551789114645362, "learning_rate": 7.3077081597836105e-06, "loss": 0.5678, "step": 1544 }, { "epoch": 2.065551839464883, "grad_norm": 0.8541546357326075, "learning_rate": 7.303567278367918e-06, "loss": 0.5595, "step": 1545 }, { "epoch": 2.0668896321070234, "grad_norm": 1.333894386537982, "learning_rate": 7.299424390298704e-06, "loss": 0.5662, "step": 1546 }, { "epoch": 2.068227424749164, "grad_norm": 1.0147925040042165, "learning_rate": 7.295279499184867e-06, "loss": 0.5231, "step": 1547 }, { "epoch": 2.0695652173913044, "grad_norm": 0.908990852193761, "learning_rate": 7.291132608637053e-06, "loss": 0.6023, "step": 1548 }, { "epoch": 2.0709030100334447, "grad_norm": 0.8882234514753622, "learning_rate": 7.2869837222676445e-06, "loss": 0.5794, "step": 1549 }, { "epoch": 2.0722408026755854, "grad_norm": 1.072362809941407, "learning_rate": 7.282832843690768e-06, "loss": 0.4957, "step": 1550 }, { "epoch": 2.0735785953177257, "grad_norm": 1.0837003784554298, "learning_rate": 7.278679976522279e-06, "loss": 0.5523, "step": 1551 }, { "epoch": 2.074916387959866, "grad_norm": 1.2925294768990336, "learning_rate": 7.274525124379773e-06, "loss": 0.5868, "step": 1552 }, { "epoch": 2.0762541806020067, "grad_norm": 1.2992642316482552, "learning_rate": 7.2703682908825675e-06, "loss": 0.5782, "step": 1553 }, { "epoch": 2.077591973244147, "grad_norm": 0.8881580733158005, "learning_rate": 7.266209479651712e-06, "loss": 0.6229, "step": 1554 }, { "epoch": 2.0789297658862878, "grad_norm": 1.2061021894228467, "learning_rate": 7.262048694309976e-06, "loss": 0.6021, "step": 1555 }, { "epoch": 2.080267558528428, "grad_norm": 1.3173321235390822, "learning_rate": 7.257885938481845e-06, "loss": 0.5611, "step": 1556 }, { "epoch": 2.0816053511705688, "grad_norm": 1.0252723395540992, "learning_rate": 7.253721215793528e-06, "loss": 0.5145, "step": 1557 }, { "epoch": 2.082943143812709, "grad_norm": 0.9570931462163691, "learning_rate": 7.249554529872941e-06, "loss": 0.573, "step": 1558 }, { "epoch": 2.0842809364548494, "grad_norm": 1.3952168079433747, "learning_rate": 7.245385884349716e-06, "loss": 0.682, "step": 1559 }, { "epoch": 2.08561872909699, "grad_norm": 1.000176423770473, "learning_rate": 7.241215282855189e-06, "loss": 0.4609, "step": 1560 }, { "epoch": 2.0869565217391304, "grad_norm": 0.921517592750968, "learning_rate": 7.2370427290224e-06, "loss": 0.5923, "step": 1561 }, { "epoch": 2.088294314381271, "grad_norm": 0.9795698175394821, "learning_rate": 7.232868226486087e-06, "loss": 0.6067, "step": 1562 }, { "epoch": 2.0896321070234114, "grad_norm": 0.974184852788483, "learning_rate": 7.2286917788826926e-06, "loss": 0.5899, "step": 1563 }, { "epoch": 2.0909698996655517, "grad_norm": 0.9475143911764958, "learning_rate": 7.224513389850345e-06, "loss": 0.6449, "step": 1564 }, { "epoch": 2.0923076923076924, "grad_norm": 1.1077982700717919, "learning_rate": 7.2203330630288714e-06, "loss": 0.7419, "step": 1565 }, { "epoch": 2.0936454849498327, "grad_norm": 1.4094655732278485, "learning_rate": 7.216150802059782e-06, "loss": 0.5921, "step": 1566 }, { "epoch": 2.0949832775919734, "grad_norm": 1.0417201064688573, "learning_rate": 7.211966610586274e-06, "loss": 0.615, "step": 1567 }, { "epoch": 2.0963210702341137, "grad_norm": 0.9572131965674757, "learning_rate": 7.2077804922532245e-06, "loss": 0.6032, "step": 1568 }, { "epoch": 2.097658862876254, "grad_norm": 1.413058163317007, "learning_rate": 7.203592450707193e-06, "loss": 0.5525, "step": 1569 }, { "epoch": 2.0989966555183948, "grad_norm": 1.1403375734642367, "learning_rate": 7.1994024895964095e-06, "loss": 0.5031, "step": 1570 }, { "epoch": 2.100334448160535, "grad_norm": 1.0009127866132872, "learning_rate": 7.195210612570781e-06, "loss": 0.5801, "step": 1571 }, { "epoch": 2.101672240802676, "grad_norm": 1.603872399608476, "learning_rate": 7.1910168232818765e-06, "loss": 0.5833, "step": 1572 }, { "epoch": 2.103010033444816, "grad_norm": 1.169796158443241, "learning_rate": 7.1868211253829375e-06, "loss": 0.482, "step": 1573 }, { "epoch": 2.1043478260869564, "grad_norm": 1.440948011745842, "learning_rate": 7.182623522528866e-06, "loss": 0.7261, "step": 1574 }, { "epoch": 2.105685618729097, "grad_norm": 1.2323179878985704, "learning_rate": 7.178424018376224e-06, "loss": 0.546, "step": 1575 }, { "epoch": 2.1070234113712374, "grad_norm": 0.8785714593752211, "learning_rate": 7.174222616583228e-06, "loss": 0.5512, "step": 1576 }, { "epoch": 2.108361204013378, "grad_norm": 1.0268490158316874, "learning_rate": 7.170019320809747e-06, "loss": 0.6416, "step": 1577 }, { "epoch": 2.1096989966555184, "grad_norm": 1.077644344453767, "learning_rate": 7.165814134717303e-06, "loss": 0.6018, "step": 1578 }, { "epoch": 2.1110367892976587, "grad_norm": 0.8503936682085763, "learning_rate": 7.161607061969061e-06, "loss": 0.4836, "step": 1579 }, { "epoch": 2.1123745819397994, "grad_norm": 1.066834600142531, "learning_rate": 7.157398106229834e-06, "loss": 0.6389, "step": 1580 }, { "epoch": 2.1137123745819397, "grad_norm": 1.3854421746949275, "learning_rate": 7.153187271166071e-06, "loss": 0.6471, "step": 1581 }, { "epoch": 2.1150501672240805, "grad_norm": 1.6476987348925654, "learning_rate": 7.148974560445859e-06, "loss": 0.6177, "step": 1582 }, { "epoch": 2.1163879598662207, "grad_norm": 1.1031032409152517, "learning_rate": 7.144759977738921e-06, "loss": 0.5098, "step": 1583 }, { "epoch": 2.117725752508361, "grad_norm": 1.5152183411047333, "learning_rate": 7.14054352671661e-06, "loss": 0.6589, "step": 1584 }, { "epoch": 2.1190635451505018, "grad_norm": 1.1770562517823384, "learning_rate": 7.136325211051905e-06, "loss": 0.6077, "step": 1585 }, { "epoch": 2.120401337792642, "grad_norm": 1.149093636322085, "learning_rate": 7.132105034419411e-06, "loss": 0.5801, "step": 1586 }, { "epoch": 2.121739130434783, "grad_norm": 1.1736998841889452, "learning_rate": 7.127883000495353e-06, "loss": 0.6257, "step": 1587 }, { "epoch": 2.123076923076923, "grad_norm": 0.9609860253982004, "learning_rate": 7.123659112957571e-06, "loss": 0.6183, "step": 1588 }, { "epoch": 2.1244147157190634, "grad_norm": 1.3750252773811755, "learning_rate": 7.119433375485527e-06, "loss": 0.6332, "step": 1589 }, { "epoch": 2.125752508361204, "grad_norm": 1.6402992393423488, "learning_rate": 7.1152057917602904e-06, "loss": 0.6054, "step": 1590 }, { "epoch": 2.1270903010033444, "grad_norm": 1.2178952830326188, "learning_rate": 7.110976365464537e-06, "loss": 0.661, "step": 1591 }, { "epoch": 2.128428093645485, "grad_norm": 1.02695208185528, "learning_rate": 7.10674510028255e-06, "loss": 0.584, "step": 1592 }, { "epoch": 2.1297658862876254, "grad_norm": 1.3546791666097435, "learning_rate": 7.102511999900213e-06, "loss": 0.5618, "step": 1593 }, { "epoch": 2.1311036789297657, "grad_norm": 1.04700907928337, "learning_rate": 7.098277068005012e-06, "loss": 0.5974, "step": 1594 }, { "epoch": 2.1324414715719064, "grad_norm": 1.1332562651686593, "learning_rate": 7.094040308286023e-06, "loss": 0.5531, "step": 1595 }, { "epoch": 2.1337792642140467, "grad_norm": 1.0428051785893067, "learning_rate": 7.089801724433918e-06, "loss": 0.664, "step": 1596 }, { "epoch": 2.1351170568561875, "grad_norm": 1.0590243531880745, "learning_rate": 7.085561320140958e-06, "loss": 0.6804, "step": 1597 }, { "epoch": 2.1364548494983278, "grad_norm": 1.2959784309129223, "learning_rate": 7.081319099100986e-06, "loss": 0.5775, "step": 1598 }, { "epoch": 2.137792642140468, "grad_norm": 1.2731436042600581, "learning_rate": 7.0770750650094335e-06, "loss": 0.4705, "step": 1599 }, { "epoch": 2.139130434782609, "grad_norm": 0.9272836314843774, "learning_rate": 7.072829221563305e-06, "loss": 0.6217, "step": 1600 }, { "epoch": 2.140468227424749, "grad_norm": 0.770547297173025, "learning_rate": 7.068581572461188e-06, "loss": 0.4932, "step": 1601 }, { "epoch": 2.14180602006689, "grad_norm": 1.1987161859760012, "learning_rate": 7.064332121403237e-06, "loss": 0.592, "step": 1602 }, { "epoch": 2.14314381270903, "grad_norm": 1.1904386769906306, "learning_rate": 7.060080872091178e-06, "loss": 0.502, "step": 1603 }, { "epoch": 2.1444816053511704, "grad_norm": 0.8561678468119798, "learning_rate": 7.055827828228304e-06, "loss": 0.6188, "step": 1604 }, { "epoch": 2.145819397993311, "grad_norm": 1.253862112810335, "learning_rate": 7.051572993519474e-06, "loss": 0.6493, "step": 1605 }, { "epoch": 2.1471571906354514, "grad_norm": 0.8900495992228069, "learning_rate": 7.0473163716711004e-06, "loss": 0.6685, "step": 1606 }, { "epoch": 2.148494983277592, "grad_norm": 1.0431951891801348, "learning_rate": 7.043057966391158e-06, "loss": 0.5526, "step": 1607 }, { "epoch": 2.1498327759197324, "grad_norm": 0.704140132067067, "learning_rate": 7.038797781389174e-06, "loss": 0.5515, "step": 1608 }, { "epoch": 2.1511705685618727, "grad_norm": 0.8805024247727402, "learning_rate": 7.034535820376225e-06, "loss": 0.6133, "step": 1609 }, { "epoch": 2.1525083612040135, "grad_norm": 1.0621462561722013, "learning_rate": 7.030272087064933e-06, "loss": 0.5886, "step": 1610 }, { "epoch": 2.1538461538461537, "grad_norm": 1.0124709455252954, "learning_rate": 7.026006585169467e-06, "loss": 0.4602, "step": 1611 }, { "epoch": 2.1551839464882945, "grad_norm": 1.4845723338883616, "learning_rate": 7.021739318405537e-06, "loss": 0.5206, "step": 1612 }, { "epoch": 2.1565217391304348, "grad_norm": 1.2779236427171707, "learning_rate": 7.017470290490386e-06, "loss": 0.6187, "step": 1613 }, { "epoch": 2.157859531772575, "grad_norm": 1.097869192885128, "learning_rate": 7.013199505142796e-06, "loss": 0.6447, "step": 1614 }, { "epoch": 2.159197324414716, "grad_norm": 1.1430717532091046, "learning_rate": 7.008926966083078e-06, "loss": 0.6388, "step": 1615 }, { "epoch": 2.160535117056856, "grad_norm": 1.1132910781821215, "learning_rate": 7.004652677033069e-06, "loss": 0.5767, "step": 1616 }, { "epoch": 2.161872909698997, "grad_norm": 1.2141683481850791, "learning_rate": 7.0003766417161335e-06, "loss": 0.5278, "step": 1617 }, { "epoch": 2.163210702341137, "grad_norm": 1.115820388951022, "learning_rate": 6.996098863857155e-06, "loss": 0.6062, "step": 1618 }, { "epoch": 2.1645484949832774, "grad_norm": 1.0602172339021523, "learning_rate": 6.991819347182536e-06, "loss": 0.561, "step": 1619 }, { "epoch": 2.165886287625418, "grad_norm": 1.2406865322361804, "learning_rate": 6.987538095420193e-06, "loss": 0.6027, "step": 1620 }, { "epoch": 2.1672240802675584, "grad_norm": 1.1196887858378608, "learning_rate": 6.983255112299554e-06, "loss": 0.623, "step": 1621 }, { "epoch": 2.168561872909699, "grad_norm": 1.137488588532883, "learning_rate": 6.978970401551557e-06, "loss": 0.6457, "step": 1622 }, { "epoch": 2.1698996655518394, "grad_norm": 1.0159114837441172, "learning_rate": 6.974683966908642e-06, "loss": 0.6191, "step": 1623 }, { "epoch": 2.1712374581939797, "grad_norm": 1.427866576118672, "learning_rate": 6.970395812104751e-06, "loss": 0.553, "step": 1624 }, { "epoch": 2.1725752508361205, "grad_norm": 1.4723432597810684, "learning_rate": 6.966105940875328e-06, "loss": 0.5221, "step": 1625 }, { "epoch": 2.1739130434782608, "grad_norm": 1.113503416397594, "learning_rate": 6.961814356957308e-06, "loss": 0.5951, "step": 1626 }, { "epoch": 2.1752508361204015, "grad_norm": 1.0270668916719712, "learning_rate": 6.9575210640891215e-06, "loss": 0.5475, "step": 1627 }, { "epoch": 2.1765886287625418, "grad_norm": 0.905840238930314, "learning_rate": 6.953226066010683e-06, "loss": 0.6144, "step": 1628 }, { "epoch": 2.177926421404682, "grad_norm": 1.088948446710535, "learning_rate": 6.948929366463397e-06, "loss": 0.569, "step": 1629 }, { "epoch": 2.179264214046823, "grad_norm": 0.9604562238280926, "learning_rate": 6.944630969190149e-06, "loss": 0.6721, "step": 1630 }, { "epoch": 2.180602006688963, "grad_norm": 1.3159565867284913, "learning_rate": 6.940330877935304e-06, "loss": 0.6046, "step": 1631 }, { "epoch": 2.181939799331104, "grad_norm": 1.2359312731921177, "learning_rate": 6.936029096444697e-06, "loss": 0.5678, "step": 1632 }, { "epoch": 2.183277591973244, "grad_norm": 1.5854544005475586, "learning_rate": 6.931725628465643e-06, "loss": 0.6055, "step": 1633 }, { "epoch": 2.184615384615385, "grad_norm": 1.1059733764774138, "learning_rate": 6.927420477746923e-06, "loss": 0.5584, "step": 1634 }, { "epoch": 2.185953177257525, "grad_norm": 0.8724993550107463, "learning_rate": 6.923113648038784e-06, "loss": 0.6236, "step": 1635 }, { "epoch": 2.1872909698996654, "grad_norm": 1.1738496244695324, "learning_rate": 6.918805143092935e-06, "loss": 0.6097, "step": 1636 }, { "epoch": 2.188628762541806, "grad_norm": 1.2451274536776424, "learning_rate": 6.9144949666625434e-06, "loss": 0.6635, "step": 1637 }, { "epoch": 2.1899665551839465, "grad_norm": 1.3387850292627614, "learning_rate": 6.910183122502236e-06, "loss": 0.5707, "step": 1638 }, { "epoch": 2.1913043478260867, "grad_norm": 0.9338844023864721, "learning_rate": 6.9058696143680895e-06, "loss": 0.5983, "step": 1639 }, { "epoch": 2.1926421404682275, "grad_norm": 1.187427269659303, "learning_rate": 6.9015544460176296e-06, "loss": 0.4829, "step": 1640 }, { "epoch": 2.1939799331103678, "grad_norm": 1.2629948055470415, "learning_rate": 6.897237621209831e-06, "loss": 0.5557, "step": 1641 }, { "epoch": 2.1953177257525085, "grad_norm": 0.9360156110270624, "learning_rate": 6.89291914370511e-06, "loss": 0.5936, "step": 1642 }, { "epoch": 2.196655518394649, "grad_norm": 1.169829433906408, "learning_rate": 6.888599017265321e-06, "loss": 0.661, "step": 1643 }, { "epoch": 2.1979933110367895, "grad_norm": 1.0144679415496176, "learning_rate": 6.884277245653758e-06, "loss": 0.525, "step": 1644 }, { "epoch": 2.19933110367893, "grad_norm": 0.8917162300280114, "learning_rate": 6.8799538326351455e-06, "loss": 0.6741, "step": 1645 }, { "epoch": 2.20066889632107, "grad_norm": 1.0214045895439396, "learning_rate": 6.87562878197564e-06, "loss": 0.6032, "step": 1646 }, { "epoch": 2.202006688963211, "grad_norm": 1.4293621659551827, "learning_rate": 6.87130209744282e-06, "loss": 0.531, "step": 1647 }, { "epoch": 2.203344481605351, "grad_norm": 1.0219156734528776, "learning_rate": 6.866973782805694e-06, "loss": 0.6184, "step": 1648 }, { "epoch": 2.2046822742474914, "grad_norm": 1.0043248905156936, "learning_rate": 6.862643841834686e-06, "loss": 0.5368, "step": 1649 }, { "epoch": 2.206020066889632, "grad_norm": 1.1064228120977235, "learning_rate": 6.858312278301638e-06, "loss": 0.5223, "step": 1650 }, { "epoch": 2.2073578595317724, "grad_norm": 0.9485620973824272, "learning_rate": 6.8539790959798045e-06, "loss": 0.6247, "step": 1651 }, { "epoch": 2.208695652173913, "grad_norm": 1.2271622857831208, "learning_rate": 6.849644298643852e-06, "loss": 0.5938, "step": 1652 }, { "epoch": 2.2100334448160535, "grad_norm": 1.0576855020182843, "learning_rate": 6.845307890069851e-06, "loss": 0.5498, "step": 1653 }, { "epoch": 2.211371237458194, "grad_norm": 0.9969234927791145, "learning_rate": 6.840969874035278e-06, "loss": 0.5739, "step": 1654 }, { "epoch": 2.2127090301003345, "grad_norm": 0.8745266591130838, "learning_rate": 6.83663025431901e-06, "loss": 0.6403, "step": 1655 }, { "epoch": 2.2140468227424748, "grad_norm": 1.2322192171139081, "learning_rate": 6.832289034701318e-06, "loss": 0.6088, "step": 1656 }, { "epoch": 2.2153846153846155, "grad_norm": 1.416446949184762, "learning_rate": 6.82794621896387e-06, "loss": 0.5881, "step": 1657 }, { "epoch": 2.216722408026756, "grad_norm": 1.2104792238110196, "learning_rate": 6.823601810889723e-06, "loss": 0.5143, "step": 1658 }, { "epoch": 2.218060200668896, "grad_norm": 0.9465334645267002, "learning_rate": 6.8192558142633215e-06, "loss": 0.5729, "step": 1659 }, { "epoch": 2.219397993311037, "grad_norm": 0.9006555102080663, "learning_rate": 6.814908232870493e-06, "loss": 0.5941, "step": 1660 }, { "epoch": 2.220735785953177, "grad_norm": 0.8058548533966078, "learning_rate": 6.810559070498446e-06, "loss": 0.5621, "step": 1661 }, { "epoch": 2.222073578595318, "grad_norm": 1.193725502364377, "learning_rate": 6.806208330935766e-06, "loss": 0.5445, "step": 1662 }, { "epoch": 2.223411371237458, "grad_norm": 1.3427448431135436, "learning_rate": 6.801856017972412e-06, "loss": 0.7029, "step": 1663 }, { "epoch": 2.224749163879599, "grad_norm": 1.2523827389762885, "learning_rate": 6.797502135399716e-06, "loss": 0.5684, "step": 1664 }, { "epoch": 2.226086956521739, "grad_norm": 1.1423308425561345, "learning_rate": 6.7931466870103735e-06, "loss": 0.5797, "step": 1665 }, { "epoch": 2.2274247491638794, "grad_norm": 0.9287015411816323, "learning_rate": 6.788789676598449e-06, "loss": 0.5657, "step": 1666 }, { "epoch": 2.22876254180602, "grad_norm": 1.2420918961592167, "learning_rate": 6.78443110795936e-06, "loss": 0.6455, "step": 1667 }, { "epoch": 2.2301003344481605, "grad_norm": 0.9321732567911217, "learning_rate": 6.78007098488989e-06, "loss": 0.5606, "step": 1668 }, { "epoch": 2.231438127090301, "grad_norm": 1.1190710947189184, "learning_rate": 6.77570931118817e-06, "loss": 0.5672, "step": 1669 }, { "epoch": 2.2327759197324415, "grad_norm": 1.0915372765245035, "learning_rate": 6.771346090653687e-06, "loss": 0.5751, "step": 1670 }, { "epoch": 2.234113712374582, "grad_norm": 1.1295260845699078, "learning_rate": 6.766981327087271e-06, "loss": 0.5322, "step": 1671 }, { "epoch": 2.2354515050167225, "grad_norm": 1.2044786804049408, "learning_rate": 6.762615024291098e-06, "loss": 0.6396, "step": 1672 }, { "epoch": 2.236789297658863, "grad_norm": 1.1386661168863759, "learning_rate": 6.758247186068684e-06, "loss": 0.5211, "step": 1673 }, { "epoch": 2.2381270903010035, "grad_norm": 1.31172983105501, "learning_rate": 6.753877816224886e-06, "loss": 0.6624, "step": 1674 }, { "epoch": 2.239464882943144, "grad_norm": 0.9369560846315564, "learning_rate": 6.749506918565891e-06, "loss": 0.5798, "step": 1675 }, { "epoch": 2.240802675585284, "grad_norm": 1.0079344180983574, "learning_rate": 6.7451344968992184e-06, "loss": 0.6525, "step": 1676 }, { "epoch": 2.242140468227425, "grad_norm": 1.0019478941193634, "learning_rate": 6.740760555033715e-06, "loss": 0.5172, "step": 1677 }, { "epoch": 2.243478260869565, "grad_norm": 0.8705913909290498, "learning_rate": 6.736385096779552e-06, "loss": 0.5769, "step": 1678 }, { "epoch": 2.244816053511706, "grad_norm": 1.4377315528056787, "learning_rate": 6.732008125948223e-06, "loss": 0.6381, "step": 1679 }, { "epoch": 2.246153846153846, "grad_norm": 1.147927313086236, "learning_rate": 6.727629646352536e-06, "loss": 0.5968, "step": 1680 }, { "epoch": 2.2474916387959865, "grad_norm": 1.486663063340762, "learning_rate": 6.723249661806617e-06, "loss": 0.6067, "step": 1681 }, { "epoch": 2.248829431438127, "grad_norm": 1.0721473353574729, "learning_rate": 6.718868176125899e-06, "loss": 0.6356, "step": 1682 }, { "epoch": 2.2501672240802675, "grad_norm": 0.9048611733639308, "learning_rate": 6.714485193127126e-06, "loss": 0.473, "step": 1683 }, { "epoch": 2.251505016722408, "grad_norm": 1.0919278119302276, "learning_rate": 6.710100716628345e-06, "loss": 0.552, "step": 1684 }, { "epoch": 2.2528428093645485, "grad_norm": 1.034752817217826, "learning_rate": 6.705714750448904e-06, "loss": 0.5112, "step": 1685 }, { "epoch": 2.254180602006689, "grad_norm": 0.9609187087737471, "learning_rate": 6.701327298409448e-06, "loss": 0.6766, "step": 1686 }, { "epoch": 2.2555183946488295, "grad_norm": 1.1505790277176209, "learning_rate": 6.6969383643319175e-06, "loss": 0.4954, "step": 1687 }, { "epoch": 2.25685618729097, "grad_norm": 0.8267711329547183, "learning_rate": 6.692547952039543e-06, "loss": 0.5927, "step": 1688 }, { "epoch": 2.2581939799331106, "grad_norm": 0.9377823300162552, "learning_rate": 6.688156065356845e-06, "loss": 0.6714, "step": 1689 }, { "epoch": 2.259531772575251, "grad_norm": 0.8965329657965315, "learning_rate": 6.683762708109625e-06, "loss": 0.5829, "step": 1690 }, { "epoch": 2.260869565217391, "grad_norm": 0.9407755109272019, "learning_rate": 6.679367884124968e-06, "loss": 0.5987, "step": 1691 }, { "epoch": 2.262207357859532, "grad_norm": 0.9633417673191073, "learning_rate": 6.674971597231236e-06, "loss": 0.6278, "step": 1692 }, { "epoch": 2.263545150501672, "grad_norm": 0.7719247579347337, "learning_rate": 6.670573851258063e-06, "loss": 0.4594, "step": 1693 }, { "epoch": 2.264882943143813, "grad_norm": 1.146624046911661, "learning_rate": 6.66617465003636e-06, "loss": 0.4469, "step": 1694 }, { "epoch": 2.266220735785953, "grad_norm": 0.8595384891668091, "learning_rate": 6.6617739973982985e-06, "loss": 0.5622, "step": 1695 }, { "epoch": 2.2675585284280935, "grad_norm": 1.1953341996468516, "learning_rate": 6.6573718971773204e-06, "loss": 0.695, "step": 1696 }, { "epoch": 2.268896321070234, "grad_norm": 1.3645324560627285, "learning_rate": 6.652968353208122e-06, "loss": 0.6523, "step": 1697 }, { "epoch": 2.2702341137123745, "grad_norm": 1.2042621756155676, "learning_rate": 6.648563369326666e-06, "loss": 0.6519, "step": 1698 }, { "epoch": 2.2715719063545152, "grad_norm": 1.2926955909911617, "learning_rate": 6.644156949370162e-06, "loss": 0.6147, "step": 1699 }, { "epoch": 2.2729096989966555, "grad_norm": 0.8362007952497624, "learning_rate": 6.639749097177073e-06, "loss": 0.5555, "step": 1700 }, { "epoch": 2.274247491638796, "grad_norm": 1.0489973175006595, "learning_rate": 6.635339816587109e-06, "loss": 0.6796, "step": 1701 }, { "epoch": 2.2755852842809365, "grad_norm": 0.8561839972175969, "learning_rate": 6.630929111441227e-06, "loss": 0.6209, "step": 1702 }, { "epoch": 2.276923076923077, "grad_norm": 0.9473503232816961, "learning_rate": 6.626516985581621e-06, "loss": 0.5718, "step": 1703 }, { "epoch": 2.2782608695652176, "grad_norm": 1.1632720978884055, "learning_rate": 6.622103442851728e-06, "loss": 0.5902, "step": 1704 }, { "epoch": 2.279598662207358, "grad_norm": 1.2193015601312545, "learning_rate": 6.617688487096213e-06, "loss": 0.5931, "step": 1705 }, { "epoch": 2.280936454849498, "grad_norm": 0.8697511689379307, "learning_rate": 6.613272122160975e-06, "loss": 0.5269, "step": 1706 }, { "epoch": 2.282274247491639, "grad_norm": 1.8254279660697736, "learning_rate": 6.60885435189314e-06, "loss": 0.5409, "step": 1707 }, { "epoch": 2.283612040133779, "grad_norm": 0.7469298133304694, "learning_rate": 6.60443518014106e-06, "loss": 0.5379, "step": 1708 }, { "epoch": 2.28494983277592, "grad_norm": 0.8036345559261026, "learning_rate": 6.600014610754306e-06, "loss": 0.5907, "step": 1709 }, { "epoch": 2.28628762541806, "grad_norm": 0.8612288668005912, "learning_rate": 6.595592647583666e-06, "loss": 0.542, "step": 1710 }, { "epoch": 2.2876254180602005, "grad_norm": 0.9154128755660317, "learning_rate": 6.591169294481143e-06, "loss": 0.5361, "step": 1711 }, { "epoch": 2.288963210702341, "grad_norm": 1.5821077264928352, "learning_rate": 6.586744555299953e-06, "loss": 0.5131, "step": 1712 }, { "epoch": 2.2903010033444815, "grad_norm": 1.2315774542501323, "learning_rate": 6.582318433894513e-06, "loss": 0.5586, "step": 1713 }, { "epoch": 2.2916387959866222, "grad_norm": 0.8984520789302661, "learning_rate": 6.577890934120451e-06, "loss": 0.62, "step": 1714 }, { "epoch": 2.2929765886287625, "grad_norm": 0.9096369733454998, "learning_rate": 6.573462059834593e-06, "loss": 0.6542, "step": 1715 }, { "epoch": 2.294314381270903, "grad_norm": 0.8276233646744702, "learning_rate": 6.569031814894962e-06, "loss": 0.6125, "step": 1716 }, { "epoch": 2.2956521739130435, "grad_norm": 1.0268590510497786, "learning_rate": 6.5646002031607726e-06, "loss": 0.5671, "step": 1717 }, { "epoch": 2.296989966555184, "grad_norm": 0.7808547519016136, "learning_rate": 6.560167228492436e-06, "loss": 0.5082, "step": 1718 }, { "epoch": 2.2983277591973246, "grad_norm": 0.9943067414464263, "learning_rate": 6.555732894751548e-06, "loss": 0.5259, "step": 1719 }, { "epoch": 2.299665551839465, "grad_norm": 0.8532905408792996, "learning_rate": 6.551297205800884e-06, "loss": 0.6206, "step": 1720 }, { "epoch": 2.3010033444816056, "grad_norm": 0.9692092684346195, "learning_rate": 6.546860165504406e-06, "loss": 0.6545, "step": 1721 }, { "epoch": 2.302341137123746, "grad_norm": 1.159332523696649, "learning_rate": 6.5424217777272506e-06, "loss": 0.5424, "step": 1722 }, { "epoch": 2.303678929765886, "grad_norm": 1.3230113531422598, "learning_rate": 6.537982046335727e-06, "loss": 0.6516, "step": 1723 }, { "epoch": 2.305016722408027, "grad_norm": 1.4138495972365441, "learning_rate": 6.533540975197319e-06, "loss": 0.5635, "step": 1724 }, { "epoch": 2.306354515050167, "grad_norm": 0.9831274113763486, "learning_rate": 6.529098568180672e-06, "loss": 0.5252, "step": 1725 }, { "epoch": 2.3076923076923075, "grad_norm": 0.9121105304161671, "learning_rate": 6.524654829155599e-06, "loss": 0.5332, "step": 1726 }, { "epoch": 2.309030100334448, "grad_norm": 1.3237271072845638, "learning_rate": 6.520209761993072e-06, "loss": 0.5992, "step": 1727 }, { "epoch": 2.3103678929765885, "grad_norm": 1.0597717161364273, "learning_rate": 6.515763370565218e-06, "loss": 0.6026, "step": 1728 }, { "epoch": 2.3117056856187292, "grad_norm": 1.143650618835052, "learning_rate": 6.511315658745323e-06, "loss": 0.6284, "step": 1729 }, { "epoch": 2.3130434782608695, "grad_norm": 1.2494763964806246, "learning_rate": 6.506866630407817e-06, "loss": 0.7158, "step": 1730 }, { "epoch": 2.3143812709030103, "grad_norm": 1.6695641462467643, "learning_rate": 6.502416289428282e-06, "loss": 0.6708, "step": 1731 }, { "epoch": 2.3157190635451506, "grad_norm": 0.8767803345237668, "learning_rate": 6.4979646396834375e-06, "loss": 0.661, "step": 1732 }, { "epoch": 2.317056856187291, "grad_norm": 1.1355027115751832, "learning_rate": 6.4935116850511495e-06, "loss": 0.5916, "step": 1733 }, { "epoch": 2.3183946488294316, "grad_norm": 1.0751319500317713, "learning_rate": 6.489057429410418e-06, "loss": 0.5259, "step": 1734 }, { "epoch": 2.319732441471572, "grad_norm": 0.9257231392647008, "learning_rate": 6.484601876641375e-06, "loss": 0.5246, "step": 1735 }, { "epoch": 2.321070234113712, "grad_norm": 1.2421430564815281, "learning_rate": 6.480145030625284e-06, "loss": 0.5707, "step": 1736 }, { "epoch": 2.322408026755853, "grad_norm": 0.7435775633201882, "learning_rate": 6.475686895244534e-06, "loss": 0.5139, "step": 1737 }, { "epoch": 2.323745819397993, "grad_norm": 0.838188622612959, "learning_rate": 6.471227474382639e-06, "loss": 0.578, "step": 1738 }, { "epoch": 2.325083612040134, "grad_norm": 1.05774589360239, "learning_rate": 6.466766771924231e-06, "loss": 0.6686, "step": 1739 }, { "epoch": 2.326421404682274, "grad_norm": 0.9046530608673252, "learning_rate": 6.462304791755059e-06, "loss": 0.659, "step": 1740 }, { "epoch": 2.327759197324415, "grad_norm": 1.0562480549212385, "learning_rate": 6.457841537761985e-06, "loss": 0.5149, "step": 1741 }, { "epoch": 2.3290969899665552, "grad_norm": 1.1724519879082809, "learning_rate": 6.453377013832981e-06, "loss": 0.583, "step": 1742 }, { "epoch": 2.3304347826086955, "grad_norm": 0.9780506359676674, "learning_rate": 6.448911223857124e-06, "loss": 0.6206, "step": 1743 }, { "epoch": 2.3317725752508363, "grad_norm": 0.947029647517796, "learning_rate": 6.444444171724595e-06, "loss": 0.5883, "step": 1744 }, { "epoch": 2.3331103678929765, "grad_norm": 1.2402766730163919, "learning_rate": 6.4399758613266775e-06, "loss": 0.6112, "step": 1745 }, { "epoch": 2.334448160535117, "grad_norm": 1.0944989881952623, "learning_rate": 6.435506296555742e-06, "loss": 0.7059, "step": 1746 }, { "epoch": 2.3357859531772576, "grad_norm": 0.9875740730240993, "learning_rate": 6.431035481305261e-06, "loss": 0.5178, "step": 1747 }, { "epoch": 2.337123745819398, "grad_norm": 1.4008531695458977, "learning_rate": 6.426563419469793e-06, "loss": 0.7119, "step": 1748 }, { "epoch": 2.3384615384615386, "grad_norm": 1.479054295472955, "learning_rate": 6.422090114944982e-06, "loss": 0.5642, "step": 1749 }, { "epoch": 2.339799331103679, "grad_norm": 0.7840584610752732, "learning_rate": 6.417615571627555e-06, "loss": 0.5333, "step": 1750 }, { "epoch": 2.3411371237458196, "grad_norm": 1.4369955895319932, "learning_rate": 6.4131397934153175e-06, "loss": 0.5715, "step": 1751 }, { "epoch": 2.34247491638796, "grad_norm": 1.1114418948591451, "learning_rate": 6.408662784207149e-06, "loss": 0.6594, "step": 1752 }, { "epoch": 2.3438127090301, "grad_norm": 0.9310657494997828, "learning_rate": 6.404184547903006e-06, "loss": 0.6006, "step": 1753 }, { "epoch": 2.345150501672241, "grad_norm": 0.9901798815770166, "learning_rate": 6.399705088403912e-06, "loss": 0.4874, "step": 1754 }, { "epoch": 2.346488294314381, "grad_norm": 1.3677047195950602, "learning_rate": 6.3952244096119535e-06, "loss": 0.6086, "step": 1755 }, { "epoch": 2.3478260869565215, "grad_norm": 1.2111026403615792, "learning_rate": 6.3907425154302815e-06, "loss": 0.5166, "step": 1756 }, { "epoch": 2.3491638795986622, "grad_norm": 0.8483308592493961, "learning_rate": 6.386259409763107e-06, "loss": 0.5595, "step": 1757 }, { "epoch": 2.3505016722408025, "grad_norm": 0.9735147782695415, "learning_rate": 6.381775096515692e-06, "loss": 0.558, "step": 1758 }, { "epoch": 2.3518394648829433, "grad_norm": 1.067851210381359, "learning_rate": 6.377289579594355e-06, "loss": 0.6714, "step": 1759 }, { "epoch": 2.3531772575250836, "grad_norm": 1.0018456350490583, "learning_rate": 6.372802862906459e-06, "loss": 0.4852, "step": 1760 }, { "epoch": 2.3545150501672243, "grad_norm": 0.838868449607585, "learning_rate": 6.368314950360416e-06, "loss": 0.5096, "step": 1761 }, { "epoch": 2.3558528428093646, "grad_norm": 0.9804732674063376, "learning_rate": 6.3638258458656766e-06, "loss": 0.4857, "step": 1762 }, { "epoch": 2.357190635451505, "grad_norm": 1.2619243850176682, "learning_rate": 6.3593355533327314e-06, "loss": 0.636, "step": 1763 }, { "epoch": 2.3585284280936456, "grad_norm": 1.0653126398051582, "learning_rate": 6.354844076673108e-06, "loss": 0.4965, "step": 1764 }, { "epoch": 2.359866220735786, "grad_norm": 1.04392555243826, "learning_rate": 6.35035141979936e-06, "loss": 0.5982, "step": 1765 }, { "epoch": 2.361204013377926, "grad_norm": 1.6219006751821534, "learning_rate": 6.345857586625073e-06, "loss": 0.5562, "step": 1766 }, { "epoch": 2.362541806020067, "grad_norm": 0.9330736144168149, "learning_rate": 6.341362581064856e-06, "loss": 0.609, "step": 1767 }, { "epoch": 2.363879598662207, "grad_norm": 1.0100607804583157, "learning_rate": 6.336866407034341e-06, "loss": 0.4291, "step": 1768 }, { "epoch": 2.365217391304348, "grad_norm": 0.9878018032074792, "learning_rate": 6.332369068450175e-06, "loss": 0.5685, "step": 1769 }, { "epoch": 2.3665551839464882, "grad_norm": 0.9477827155247335, "learning_rate": 6.327870569230022e-06, "loss": 0.6722, "step": 1770 }, { "epoch": 2.367892976588629, "grad_norm": 1.0300492407658548, "learning_rate": 6.323370913292557e-06, "loss": 0.6516, "step": 1771 }, { "epoch": 2.3692307692307693, "grad_norm": 1.2014003301869054, "learning_rate": 6.318870104557459e-06, "loss": 0.6907, "step": 1772 }, { "epoch": 2.3705685618729095, "grad_norm": 0.962410984041785, "learning_rate": 6.314368146945418e-06, "loss": 0.5215, "step": 1773 }, { "epoch": 2.3719063545150503, "grad_norm": 1.1257536069899152, "learning_rate": 6.309865044378115e-06, "loss": 0.6053, "step": 1774 }, { "epoch": 2.3732441471571906, "grad_norm": 1.0200503341954998, "learning_rate": 6.3053608007782385e-06, "loss": 0.5479, "step": 1775 }, { "epoch": 2.374581939799331, "grad_norm": 1.279687194558699, "learning_rate": 6.300855420069465e-06, "loss": 0.6136, "step": 1776 }, { "epoch": 2.3759197324414716, "grad_norm": 1.1398008934891797, "learning_rate": 6.296348906176462e-06, "loss": 0.4922, "step": 1777 }, { "epoch": 2.377257525083612, "grad_norm": 1.2732256724056366, "learning_rate": 6.2918412630248874e-06, "loss": 0.671, "step": 1778 }, { "epoch": 2.3785953177257526, "grad_norm": 0.9333083690764877, "learning_rate": 6.28733249454138e-06, "loss": 0.5573, "step": 1779 }, { "epoch": 2.379933110367893, "grad_norm": 1.3767268207734986, "learning_rate": 6.2828226046535575e-06, "loss": 0.5277, "step": 1780 }, { "epoch": 2.3812709030100336, "grad_norm": 0.9504543142309846, "learning_rate": 6.278311597290019e-06, "loss": 0.6833, "step": 1781 }, { "epoch": 2.382608695652174, "grad_norm": 1.1609604003545388, "learning_rate": 6.273799476380332e-06, "loss": 0.565, "step": 1782 }, { "epoch": 2.383946488294314, "grad_norm": 1.1011180680551342, "learning_rate": 6.269286245855039e-06, "loss": 0.5163, "step": 1783 }, { "epoch": 2.385284280936455, "grad_norm": 1.247245588034396, "learning_rate": 6.264771909645646e-06, "loss": 0.6422, "step": 1784 }, { "epoch": 2.3866220735785952, "grad_norm": 0.9862404148810454, "learning_rate": 6.260256471684622e-06, "loss": 0.5383, "step": 1785 }, { "epoch": 2.387959866220736, "grad_norm": 0.8754604632938257, "learning_rate": 6.255739935905396e-06, "loss": 0.4811, "step": 1786 }, { "epoch": 2.3892976588628763, "grad_norm": 1.0380017257438632, "learning_rate": 6.2512223062423545e-06, "loss": 0.585, "step": 1787 }, { "epoch": 2.3906354515050166, "grad_norm": 0.8478796488202274, "learning_rate": 6.246703586630838e-06, "loss": 0.4838, "step": 1788 }, { "epoch": 2.3919732441471573, "grad_norm": 0.8103137332493882, "learning_rate": 6.242183781007132e-06, "loss": 0.5061, "step": 1789 }, { "epoch": 2.3933110367892976, "grad_norm": 1.2044409572935026, "learning_rate": 6.237662893308471e-06, "loss": 0.7013, "step": 1790 }, { "epoch": 2.3946488294314383, "grad_norm": 1.36909623051235, "learning_rate": 6.233140927473033e-06, "loss": 0.558, "step": 1791 }, { "epoch": 2.3959866220735786, "grad_norm": 0.9107327853202718, "learning_rate": 6.228617887439931e-06, "loss": 0.5795, "step": 1792 }, { "epoch": 2.397324414715719, "grad_norm": 1.2513547186332017, "learning_rate": 6.224093777149222e-06, "loss": 0.5173, "step": 1793 }, { "epoch": 2.3986622073578596, "grad_norm": 0.9255076795470839, "learning_rate": 6.219568600541886e-06, "loss": 0.5723, "step": 1794 }, { "epoch": 2.4, "grad_norm": 1.0555488379781663, "learning_rate": 6.2150423615598376e-06, "loss": 0.5774, "step": 1795 }, { "epoch": 2.4013377926421406, "grad_norm": 1.1804150012653385, "learning_rate": 6.210515064145915e-06, "loss": 0.6606, "step": 1796 }, { "epoch": 2.402675585284281, "grad_norm": 1.2946603673743597, "learning_rate": 6.205986712243876e-06, "loss": 0.5504, "step": 1797 }, { "epoch": 2.4040133779264212, "grad_norm": 1.0532374324456413, "learning_rate": 6.201457309798403e-06, "loss": 0.5551, "step": 1798 }, { "epoch": 2.405351170568562, "grad_norm": 1.239437067993436, "learning_rate": 6.196926860755088e-06, "loss": 0.6958, "step": 1799 }, { "epoch": 2.4066889632107022, "grad_norm": 1.071534152966649, "learning_rate": 6.192395369060439e-06, "loss": 0.6265, "step": 1800 }, { "epoch": 2.408026755852843, "grad_norm": 1.2858055324588769, "learning_rate": 6.187862838661869e-06, "loss": 0.4892, "step": 1801 }, { "epoch": 2.4093645484949833, "grad_norm": 0.9331546026617844, "learning_rate": 6.183329273507693e-06, "loss": 0.6544, "step": 1802 }, { "epoch": 2.4107023411371236, "grad_norm": 0.8800997627796171, "learning_rate": 6.178794677547138e-06, "loss": 0.5604, "step": 1803 }, { "epoch": 2.4120401337792643, "grad_norm": 1.0139477019471568, "learning_rate": 6.174259054730316e-06, "loss": 0.5733, "step": 1804 }, { "epoch": 2.4133779264214046, "grad_norm": 1.426922003643528, "learning_rate": 6.169722409008244e-06, "loss": 0.6443, "step": 1805 }, { "epoch": 2.4147157190635453, "grad_norm": 1.139366698714853, "learning_rate": 6.165184744332824e-06, "loss": 0.6012, "step": 1806 }, { "epoch": 2.4160535117056856, "grad_norm": 1.3742465219626048, "learning_rate": 6.160646064656845e-06, "loss": 0.6342, "step": 1807 }, { "epoch": 2.417391304347826, "grad_norm": 1.010266822706577, "learning_rate": 6.156106373933988e-06, "loss": 0.6043, "step": 1808 }, { "epoch": 2.4187290969899666, "grad_norm": 1.0978943494117872, "learning_rate": 6.151565676118805e-06, "loss": 0.523, "step": 1809 }, { "epoch": 2.420066889632107, "grad_norm": 1.5244634156793198, "learning_rate": 6.147023975166731e-06, "loss": 0.6157, "step": 1810 }, { "epoch": 2.4214046822742477, "grad_norm": 1.4345605587771129, "learning_rate": 6.142481275034072e-06, "loss": 0.604, "step": 1811 }, { "epoch": 2.422742474916388, "grad_norm": 1.5706281984947534, "learning_rate": 6.137937579678007e-06, "loss": 0.6075, "step": 1812 }, { "epoch": 2.4240802675585282, "grad_norm": 1.163998936537645, "learning_rate": 6.133392893056583e-06, "loss": 0.5744, "step": 1813 }, { "epoch": 2.425418060200669, "grad_norm": 1.4425760710740207, "learning_rate": 6.128847219128703e-06, "loss": 0.5675, "step": 1814 }, { "epoch": 2.4267558528428093, "grad_norm": 0.9599283392959892, "learning_rate": 6.124300561854139e-06, "loss": 0.5591, "step": 1815 }, { "epoch": 2.42809364548495, "grad_norm": 0.8804827128922097, "learning_rate": 6.119752925193516e-06, "loss": 0.5982, "step": 1816 }, { "epoch": 2.4294314381270903, "grad_norm": 1.0445370838701382, "learning_rate": 6.1152043131083095e-06, "loss": 0.6544, "step": 1817 }, { "epoch": 2.430769230769231, "grad_norm": 1.275841329017059, "learning_rate": 6.1106547295608495e-06, "loss": 0.5816, "step": 1818 }, { "epoch": 2.4321070234113713, "grad_norm": 1.5395633011082495, "learning_rate": 6.106104178514309e-06, "loss": 0.6576, "step": 1819 }, { "epoch": 2.4334448160535116, "grad_norm": 1.0347629094552597, "learning_rate": 6.101552663932704e-06, "loss": 0.6061, "step": 1820 }, { "epoch": 2.4347826086956523, "grad_norm": 1.4162970347606847, "learning_rate": 6.097000189780893e-06, "loss": 0.6482, "step": 1821 }, { "epoch": 2.4361204013377926, "grad_norm": 0.7477559988417648, "learning_rate": 6.092446760024564e-06, "loss": 0.5659, "step": 1822 }, { "epoch": 2.437458193979933, "grad_norm": 1.1886264633392354, "learning_rate": 6.087892378630245e-06, "loss": 0.6165, "step": 1823 }, { "epoch": 2.4387959866220736, "grad_norm": 1.0116893956567585, "learning_rate": 6.0833370495652885e-06, "loss": 0.5997, "step": 1824 }, { "epoch": 2.440133779264214, "grad_norm": 1.2609217716550385, "learning_rate": 6.0787807767978736e-06, "loss": 0.5552, "step": 1825 }, { "epoch": 2.4414715719063547, "grad_norm": 1.1499883527720263, "learning_rate": 6.074223564296999e-06, "loss": 0.6014, "step": 1826 }, { "epoch": 2.442809364548495, "grad_norm": 0.9146545278494881, "learning_rate": 6.0696654160324875e-06, "loss": 0.5707, "step": 1827 }, { "epoch": 2.4441471571906357, "grad_norm": 1.000678044907977, "learning_rate": 6.065106335974972e-06, "loss": 0.5432, "step": 1828 }, { "epoch": 2.445484949832776, "grad_norm": 1.0535433832503818, "learning_rate": 6.0605463280958995e-06, "loss": 0.564, "step": 1829 }, { "epoch": 2.4468227424749163, "grad_norm": 0.8339238716456927, "learning_rate": 6.055985396367526e-06, "loss": 0.5684, "step": 1830 }, { "epoch": 2.448160535117057, "grad_norm": 1.0881376089037993, "learning_rate": 6.051423544762909e-06, "loss": 0.5017, "step": 1831 }, { "epoch": 2.4494983277591973, "grad_norm": 1.5441203692809025, "learning_rate": 6.046860777255907e-06, "loss": 0.6097, "step": 1832 }, { "epoch": 2.4508361204013376, "grad_norm": 0.9150291802714964, "learning_rate": 6.042297097821184e-06, "loss": 0.5833, "step": 1833 }, { "epoch": 2.4521739130434783, "grad_norm": 1.2739500391637884, "learning_rate": 6.0377325104341885e-06, "loss": 0.6045, "step": 1834 }, { "epoch": 2.4535117056856186, "grad_norm": 1.2749910485260112, "learning_rate": 6.033167019071168e-06, "loss": 0.6519, "step": 1835 }, { "epoch": 2.4548494983277593, "grad_norm": 1.4702078363816924, "learning_rate": 6.028600627709151e-06, "loss": 0.5061, "step": 1836 }, { "epoch": 2.4561872909698996, "grad_norm": 0.8972998603830517, "learning_rate": 6.024033340325954e-06, "loss": 0.591, "step": 1837 }, { "epoch": 2.4575250836120404, "grad_norm": 1.1675602782630523, "learning_rate": 6.019465160900173e-06, "loss": 0.593, "step": 1838 }, { "epoch": 2.4588628762541807, "grad_norm": 1.0268786564327037, "learning_rate": 6.014896093411181e-06, "loss": 0.5538, "step": 1839 }, { "epoch": 2.460200668896321, "grad_norm": 1.4713153033148152, "learning_rate": 6.010326141839125e-06, "loss": 0.6852, "step": 1840 }, { "epoch": 2.4615384615384617, "grad_norm": 1.1519712636405925, "learning_rate": 6.005755310164919e-06, "loss": 0.6604, "step": 1841 }, { "epoch": 2.462876254180602, "grad_norm": 0.8684590010976131, "learning_rate": 6.001183602370249e-06, "loss": 0.5024, "step": 1842 }, { "epoch": 2.4642140468227423, "grad_norm": 1.0126913232842254, "learning_rate": 5.996611022437562e-06, "loss": 0.6056, "step": 1843 }, { "epoch": 2.465551839464883, "grad_norm": 1.6121008335275337, "learning_rate": 5.992037574350062e-06, "loss": 0.5716, "step": 1844 }, { "epoch": 2.4668896321070233, "grad_norm": 1.256483154925823, "learning_rate": 5.987463262091715e-06, "loss": 0.5306, "step": 1845 }, { "epoch": 2.468227424749164, "grad_norm": 1.0499996594925447, "learning_rate": 5.982888089647232e-06, "loss": 0.5856, "step": 1846 }, { "epoch": 2.4695652173913043, "grad_norm": 1.248649722025155, "learning_rate": 5.978312061002078e-06, "loss": 0.6409, "step": 1847 }, { "epoch": 2.470903010033445, "grad_norm": 0.8702968504108612, "learning_rate": 5.973735180142468e-06, "loss": 0.6437, "step": 1848 }, { "epoch": 2.4722408026755853, "grad_norm": 0.9642248289372902, "learning_rate": 5.9691574510553505e-06, "loss": 0.5591, "step": 1849 }, { "epoch": 2.4735785953177256, "grad_norm": 0.9060601328069838, "learning_rate": 5.9645788777284195e-06, "loss": 0.6963, "step": 1850 }, { "epoch": 2.4749163879598663, "grad_norm": 0.9282157994488034, "learning_rate": 5.959999464150101e-06, "loss": 0.6204, "step": 1851 }, { "epoch": 2.4762541806020066, "grad_norm": 0.946557773597313, "learning_rate": 5.9554192143095535e-06, "loss": 0.5742, "step": 1852 }, { "epoch": 2.477591973244147, "grad_norm": 1.37487055136636, "learning_rate": 5.950838132196667e-06, "loss": 0.6246, "step": 1853 }, { "epoch": 2.4789297658862877, "grad_norm": 1.22072023758802, "learning_rate": 5.946256221802052e-06, "loss": 0.5553, "step": 1854 }, { "epoch": 2.480267558528428, "grad_norm": 1.2049814501961686, "learning_rate": 5.941673487117043e-06, "loss": 0.588, "step": 1855 }, { "epoch": 2.4816053511705687, "grad_norm": 1.211520375128029, "learning_rate": 5.937089932133693e-06, "loss": 0.5962, "step": 1856 }, { "epoch": 2.482943143812709, "grad_norm": 1.1072229847091897, "learning_rate": 5.932505560844766e-06, "loss": 0.5524, "step": 1857 }, { "epoch": 2.4842809364548497, "grad_norm": 0.8341188632079856, "learning_rate": 5.927920377243743e-06, "loss": 0.4994, "step": 1858 }, { "epoch": 2.48561872909699, "grad_norm": 0.775787977637086, "learning_rate": 5.923334385324809e-06, "loss": 0.5467, "step": 1859 }, { "epoch": 2.4869565217391303, "grad_norm": 1.0143459703773576, "learning_rate": 5.918747589082853e-06, "loss": 0.6139, "step": 1860 }, { "epoch": 2.488294314381271, "grad_norm": 1.0649248936276938, "learning_rate": 5.914159992513464e-06, "loss": 0.5796, "step": 1861 }, { "epoch": 2.4896321070234113, "grad_norm": 0.83652215777209, "learning_rate": 5.90957159961293e-06, "loss": 0.5246, "step": 1862 }, { "epoch": 2.4909698996655516, "grad_norm": 1.5631710254408853, "learning_rate": 5.904982414378233e-06, "loss": 0.6451, "step": 1863 }, { "epoch": 2.4923076923076923, "grad_norm": 1.2159338965395587, "learning_rate": 5.900392440807044e-06, "loss": 0.6777, "step": 1864 }, { "epoch": 2.4936454849498326, "grad_norm": 0.8891470593826795, "learning_rate": 5.895801682897721e-06, "loss": 0.5097, "step": 1865 }, { "epoch": 2.4949832775919734, "grad_norm": 0.799990844661961, "learning_rate": 5.891210144649303e-06, "loss": 0.4551, "step": 1866 }, { "epoch": 2.4963210702341136, "grad_norm": 1.1318716238397997, "learning_rate": 5.886617830061514e-06, "loss": 0.6138, "step": 1867 }, { "epoch": 2.4976588628762544, "grad_norm": 1.3302721879381265, "learning_rate": 5.88202474313475e-06, "loss": 0.5377, "step": 1868 }, { "epoch": 2.4989966555183947, "grad_norm": 1.7576426549294668, "learning_rate": 5.877430887870081e-06, "loss": 0.5815, "step": 1869 }, { "epoch": 2.500334448160535, "grad_norm": 1.3743697208245995, "learning_rate": 5.872836268269246e-06, "loss": 0.5824, "step": 1870 }, { "epoch": 2.5016722408026757, "grad_norm": 1.1374686265429248, "learning_rate": 5.8682408883346535e-06, "loss": 0.6237, "step": 1871 }, { "epoch": 2.503010033444816, "grad_norm": 0.9258833529694784, "learning_rate": 5.863644752069364e-06, "loss": 0.5791, "step": 1872 }, { "epoch": 2.5043478260869563, "grad_norm": 1.4899658404708753, "learning_rate": 5.859047863477112e-06, "loss": 0.5773, "step": 1873 }, { "epoch": 2.505685618729097, "grad_norm": 0.9702838060106603, "learning_rate": 5.854450226562274e-06, "loss": 0.6891, "step": 1874 }, { "epoch": 2.5070234113712373, "grad_norm": 1.0531786896919706, "learning_rate": 5.849851845329884e-06, "loss": 0.6481, "step": 1875 }, { "epoch": 2.508361204013378, "grad_norm": 0.9764124287461108, "learning_rate": 5.845252723785626e-06, "loss": 0.6512, "step": 1876 }, { "epoch": 2.5096989966555183, "grad_norm": 2.083301802318444, "learning_rate": 5.8406528659358234e-06, "loss": 0.6385, "step": 1877 }, { "epoch": 2.511036789297659, "grad_norm": 1.1590602407733872, "learning_rate": 5.836052275787448e-06, "loss": 0.4454, "step": 1878 }, { "epoch": 2.5123745819397993, "grad_norm": 1.0563024231179376, "learning_rate": 5.831450957348106e-06, "loss": 0.6255, "step": 1879 }, { "epoch": 2.5137123745819396, "grad_norm": 0.9626470816557001, "learning_rate": 5.826848914626035e-06, "loss": 0.5748, "step": 1880 }, { "epoch": 2.5150501672240804, "grad_norm": 1.475431345918029, "learning_rate": 5.822246151630109e-06, "loss": 0.6304, "step": 1881 }, { "epoch": 2.5163879598662207, "grad_norm": 0.9083819311461966, "learning_rate": 5.817642672369825e-06, "loss": 0.5061, "step": 1882 }, { "epoch": 2.517725752508361, "grad_norm": 0.8414690931413852, "learning_rate": 5.813038480855308e-06, "loss": 0.4739, "step": 1883 }, { "epoch": 2.5190635451505017, "grad_norm": 1.219296092564244, "learning_rate": 5.808433581097301e-06, "loss": 0.644, "step": 1884 }, { "epoch": 2.5204013377926424, "grad_norm": 0.9803859325763302, "learning_rate": 5.803827977107163e-06, "loss": 0.5659, "step": 1885 }, { "epoch": 2.5217391304347827, "grad_norm": 1.3254208420689444, "learning_rate": 5.799221672896868e-06, "loss": 0.62, "step": 1886 }, { "epoch": 2.523076923076923, "grad_norm": 1.5617562894914532, "learning_rate": 5.794614672479e-06, "loss": 0.5636, "step": 1887 }, { "epoch": 2.5244147157190637, "grad_norm": 1.1152280844504618, "learning_rate": 5.79000697986675e-06, "loss": 0.5922, "step": 1888 }, { "epoch": 2.525752508361204, "grad_norm": 1.2337843346307593, "learning_rate": 5.7853985990739115e-06, "loss": 0.53, "step": 1889 }, { "epoch": 2.5270903010033443, "grad_norm": 0.8848594597473145, "learning_rate": 5.780789534114875e-06, "loss": 0.584, "step": 1890 }, { "epoch": 2.528428093645485, "grad_norm": 0.9669493020075257, "learning_rate": 5.77617978900463e-06, "loss": 0.7038, "step": 1891 }, { "epoch": 2.5297658862876253, "grad_norm": 1.156972117303574, "learning_rate": 5.771569367758757e-06, "loss": 0.6443, "step": 1892 }, { "epoch": 2.5311036789297656, "grad_norm": 0.9241726001250324, "learning_rate": 5.766958274393428e-06, "loss": 0.5181, "step": 1893 }, { "epoch": 2.5324414715719064, "grad_norm": 1.2387551889524546, "learning_rate": 5.762346512925397e-06, "loss": 0.5801, "step": 1894 }, { "epoch": 2.533779264214047, "grad_norm": 1.1658874787845261, "learning_rate": 5.757734087372003e-06, "loss": 0.6106, "step": 1895 }, { "epoch": 2.5351170568561874, "grad_norm": 1.472343686066949, "learning_rate": 5.753121001751161e-06, "loss": 0.6514, "step": 1896 }, { "epoch": 2.5364548494983277, "grad_norm": 0.961377131750557, "learning_rate": 5.748507260081361e-06, "loss": 0.5916, "step": 1897 }, { "epoch": 2.5377926421404684, "grad_norm": 0.9845110995630391, "learning_rate": 5.743892866381668e-06, "loss": 0.653, "step": 1898 }, { "epoch": 2.5391304347826087, "grad_norm": 0.9223775204160666, "learning_rate": 5.739277824671711e-06, "loss": 0.6096, "step": 1899 }, { "epoch": 2.540468227424749, "grad_norm": 1.1112918454403296, "learning_rate": 5.734662138971686e-06, "loss": 0.6061, "step": 1900 }, { "epoch": 2.5418060200668897, "grad_norm": 0.9770418933098254, "learning_rate": 5.730045813302347e-06, "loss": 0.6844, "step": 1901 }, { "epoch": 2.54314381270903, "grad_norm": 0.9697702679093809, "learning_rate": 5.725428851685011e-06, "loss": 0.5607, "step": 1902 }, { "epoch": 2.5444816053511703, "grad_norm": 1.2502855100600674, "learning_rate": 5.720811258141541e-06, "loss": 0.7448, "step": 1903 }, { "epoch": 2.545819397993311, "grad_norm": 0.8639435612251279, "learning_rate": 5.716193036694359e-06, "loss": 0.5852, "step": 1904 }, { "epoch": 2.5471571906354518, "grad_norm": 0.9114794638414533, "learning_rate": 5.711574191366427e-06, "loss": 0.5683, "step": 1905 }, { "epoch": 2.548494983277592, "grad_norm": 0.7614509726859316, "learning_rate": 5.706954726181255e-06, "loss": 0.5535, "step": 1906 }, { "epoch": 2.5498327759197323, "grad_norm": 1.1630777290965921, "learning_rate": 5.70233464516289e-06, "loss": 0.6499, "step": 1907 }, { "epoch": 2.551170568561873, "grad_norm": 1.0134626505410418, "learning_rate": 5.697713952335918e-06, "loss": 0.5521, "step": 1908 }, { "epoch": 2.5525083612040134, "grad_norm": 1.2171974376161683, "learning_rate": 5.693092651725457e-06, "loss": 0.7312, "step": 1909 }, { "epoch": 2.5538461538461537, "grad_norm": 0.8549897881049024, "learning_rate": 5.688470747357153e-06, "loss": 0.555, "step": 1910 }, { "epoch": 2.5551839464882944, "grad_norm": 0.921421424483782, "learning_rate": 5.683848243257181e-06, "loss": 0.5787, "step": 1911 }, { "epoch": 2.5565217391304347, "grad_norm": 0.8200501544367992, "learning_rate": 5.679225143452233e-06, "loss": 0.5109, "step": 1912 }, { "epoch": 2.5578595317725754, "grad_norm": 1.2040330571416884, "learning_rate": 5.674601451969527e-06, "loss": 0.6266, "step": 1913 }, { "epoch": 2.5591973244147157, "grad_norm": 1.1940390758086359, "learning_rate": 5.669977172836791e-06, "loss": 0.6294, "step": 1914 }, { "epoch": 2.5605351170568564, "grad_norm": 0.99501782513288, "learning_rate": 5.66535231008227e-06, "loss": 0.6688, "step": 1915 }, { "epoch": 2.5618729096989967, "grad_norm": 0.9851168969353392, "learning_rate": 5.66072686773471e-06, "loss": 0.5103, "step": 1916 }, { "epoch": 2.563210702341137, "grad_norm": 0.9981603712080578, "learning_rate": 5.656100849823366e-06, "loss": 0.5675, "step": 1917 }, { "epoch": 2.5645484949832777, "grad_norm": 1.5683863975006889, "learning_rate": 5.651474260377998e-06, "loss": 0.6983, "step": 1918 }, { "epoch": 2.565886287625418, "grad_norm": 1.0435936825218302, "learning_rate": 5.646847103428859e-06, "loss": 0.6027, "step": 1919 }, { "epoch": 2.5672240802675583, "grad_norm": 1.031413827820235, "learning_rate": 5.642219383006696e-06, "loss": 0.5398, "step": 1920 }, { "epoch": 2.568561872909699, "grad_norm": 1.5218191119031557, "learning_rate": 5.63759110314275e-06, "loss": 0.6354, "step": 1921 }, { "epoch": 2.5698996655518394, "grad_norm": 0.8689814477272729, "learning_rate": 5.632962267868747e-06, "loss": 0.5612, "step": 1922 }, { "epoch": 2.57123745819398, "grad_norm": 1.3865051189328284, "learning_rate": 5.628332881216899e-06, "loss": 0.578, "step": 1923 }, { "epoch": 2.5725752508361204, "grad_norm": 1.0706828953725631, "learning_rate": 5.623702947219896e-06, "loss": 0.56, "step": 1924 }, { "epoch": 2.573913043478261, "grad_norm": 1.2959636978347653, "learning_rate": 5.619072469910907e-06, "loss": 0.6013, "step": 1925 }, { "epoch": 2.5752508361204014, "grad_norm": 1.0470836404623889, "learning_rate": 5.614441453323571e-06, "loss": 0.5443, "step": 1926 }, { "epoch": 2.5765886287625417, "grad_norm": 1.079733855776514, "learning_rate": 5.609809901492e-06, "loss": 0.574, "step": 1927 }, { "epoch": 2.5779264214046824, "grad_norm": 1.4269721436330771, "learning_rate": 5.605177818450772e-06, "loss": 0.7326, "step": 1928 }, { "epoch": 2.5792642140468227, "grad_norm": 1.1769001638348844, "learning_rate": 5.600545208234927e-06, "loss": 0.711, "step": 1929 }, { "epoch": 2.580602006688963, "grad_norm": 1.1719855207606389, "learning_rate": 5.595912074879961e-06, "loss": 0.6195, "step": 1930 }, { "epoch": 2.5819397993311037, "grad_norm": 1.0263120602537918, "learning_rate": 5.591278422421831e-06, "loss": 0.5581, "step": 1931 }, { "epoch": 2.583277591973244, "grad_norm": 1.1238018158091676, "learning_rate": 5.586644254896945e-06, "loss": 0.6074, "step": 1932 }, { "epoch": 2.5846153846153848, "grad_norm": 1.0377751074322032, "learning_rate": 5.5820095763421565e-06, "loss": 0.6028, "step": 1933 }, { "epoch": 2.585953177257525, "grad_norm": 1.3671772241642977, "learning_rate": 5.5773743907947674e-06, "loss": 0.559, "step": 1934 }, { "epoch": 2.587290969899666, "grad_norm": 1.2114251187019054, "learning_rate": 5.57273870229252e-06, "loss": 0.4906, "step": 1935 }, { "epoch": 2.588628762541806, "grad_norm": 0.9897289093185625, "learning_rate": 5.568102514873595e-06, "loss": 0.5446, "step": 1936 }, { "epoch": 2.5899665551839464, "grad_norm": 1.1092520126602599, "learning_rate": 5.5634658325766066e-06, "loss": 0.6393, "step": 1937 }, { "epoch": 2.591304347826087, "grad_norm": 1.4448247627586193, "learning_rate": 5.558828659440603e-06, "loss": 0.6297, "step": 1938 }, { "epoch": 2.5926421404682274, "grad_norm": 1.0382636886518575, "learning_rate": 5.5541909995050554e-06, "loss": 0.5326, "step": 1939 }, { "epoch": 2.5939799331103677, "grad_norm": 1.6330744348451927, "learning_rate": 5.549552856809865e-06, "loss": 0.5669, "step": 1940 }, { "epoch": 2.5953177257525084, "grad_norm": 1.2532962367632576, "learning_rate": 5.544914235395347e-06, "loss": 0.5903, "step": 1941 }, { "epoch": 2.5966555183946487, "grad_norm": 0.9093725008303252, "learning_rate": 5.540275139302241e-06, "loss": 0.5912, "step": 1942 }, { "epoch": 2.5979933110367894, "grad_norm": 1.1410901440424945, "learning_rate": 5.53563557257169e-06, "loss": 0.5123, "step": 1943 }, { "epoch": 2.5993311036789297, "grad_norm": 1.317342248464324, "learning_rate": 5.5309955392452585e-06, "loss": 0.6139, "step": 1944 }, { "epoch": 2.6006688963210705, "grad_norm": 0.8869967454559858, "learning_rate": 5.526355043364909e-06, "loss": 0.5494, "step": 1945 }, { "epoch": 2.6020066889632107, "grad_norm": 1.2153482522200467, "learning_rate": 5.521714088973012e-06, "loss": 0.6666, "step": 1946 }, { "epoch": 2.603344481605351, "grad_norm": 1.1200610867399363, "learning_rate": 5.517072680112332e-06, "loss": 0.6238, "step": 1947 }, { "epoch": 2.6046822742474918, "grad_norm": 0.9708105581308163, "learning_rate": 5.512430820826035e-06, "loss": 0.6644, "step": 1948 }, { "epoch": 2.606020066889632, "grad_norm": 1.0670577782522321, "learning_rate": 5.507788515157677e-06, "loss": 0.5661, "step": 1949 }, { "epoch": 2.6073578595317723, "grad_norm": 0.7585735929474905, "learning_rate": 5.503145767151201e-06, "loss": 0.5474, "step": 1950 }, { "epoch": 2.608695652173913, "grad_norm": 1.05643068325582, "learning_rate": 5.498502580850938e-06, "loss": 0.4516, "step": 1951 }, { "epoch": 2.6100334448160534, "grad_norm": 1.062596850761061, "learning_rate": 5.493858960301602e-06, "loss": 0.606, "step": 1952 }, { "epoch": 2.611371237458194, "grad_norm": 1.566620765733278, "learning_rate": 5.4892149095482815e-06, "loss": 0.5184, "step": 1953 }, { "epoch": 2.6127090301003344, "grad_norm": 0.8447849987780603, "learning_rate": 5.484570432636441e-06, "loss": 0.5883, "step": 1954 }, { "epoch": 2.614046822742475, "grad_norm": 1.0639837306416156, "learning_rate": 5.479925533611917e-06, "loss": 0.5665, "step": 1955 }, { "epoch": 2.6153846153846154, "grad_norm": 0.7897705261104728, "learning_rate": 5.475280216520913e-06, "loss": 0.4811, "step": 1956 }, { "epoch": 2.6167224080267557, "grad_norm": 0.9402338227431333, "learning_rate": 5.470634485409999e-06, "loss": 0.5483, "step": 1957 }, { "epoch": 2.6180602006688964, "grad_norm": 1.0307825435931932, "learning_rate": 5.465988344326103e-06, "loss": 0.6994, "step": 1958 }, { "epoch": 2.6193979933110367, "grad_norm": 1.3278336549605896, "learning_rate": 5.46134179731651e-06, "loss": 0.5963, "step": 1959 }, { "epoch": 2.620735785953177, "grad_norm": 1.1939277095263958, "learning_rate": 5.456694848428861e-06, "loss": 0.4898, "step": 1960 }, { "epoch": 2.6220735785953178, "grad_norm": 1.0962224100893851, "learning_rate": 5.452047501711144e-06, "loss": 0.5942, "step": 1961 }, { "epoch": 2.623411371237458, "grad_norm": 1.0098816597509446, "learning_rate": 5.4473997612116956e-06, "loss": 0.6006, "step": 1962 }, { "epoch": 2.624749163879599, "grad_norm": 1.1488144623324574, "learning_rate": 5.442751630979195e-06, "loss": 0.5744, "step": 1963 }, { "epoch": 2.626086956521739, "grad_norm": 1.1226845740255225, "learning_rate": 5.438103115062662e-06, "loss": 0.6405, "step": 1964 }, { "epoch": 2.62742474916388, "grad_norm": 1.1068923836820883, "learning_rate": 5.4334542175114495e-06, "loss": 0.5305, "step": 1965 }, { "epoch": 2.62876254180602, "grad_norm": 0.8919112368452808, "learning_rate": 5.428804942375243e-06, "loss": 0.5977, "step": 1966 }, { "epoch": 2.6301003344481604, "grad_norm": 0.9796557736062584, "learning_rate": 5.424155293704063e-06, "loss": 0.5619, "step": 1967 }, { "epoch": 2.631438127090301, "grad_norm": 0.9940003184898891, "learning_rate": 5.419505275548249e-06, "loss": 0.5694, "step": 1968 }, { "epoch": 2.6327759197324414, "grad_norm": 1.1293505672831796, "learning_rate": 5.414854891958464e-06, "loss": 0.4643, "step": 1969 }, { "epoch": 2.6341137123745817, "grad_norm": 1.2205698020409175, "learning_rate": 5.41020414698569e-06, "loss": 0.5752, "step": 1970 }, { "epoch": 2.6354515050167224, "grad_norm": 0.8894404323255728, "learning_rate": 5.40555304468122e-06, "loss": 0.5736, "step": 1971 }, { "epoch": 2.6367892976588627, "grad_norm": 1.217610152398822, "learning_rate": 5.400901589096667e-06, "loss": 0.6471, "step": 1972 }, { "epoch": 2.6381270903010035, "grad_norm": 1.2408126936573538, "learning_rate": 5.396249784283943e-06, "loss": 0.4848, "step": 1973 }, { "epoch": 2.6394648829431437, "grad_norm": 1.0813859228056333, "learning_rate": 5.391597634295269e-06, "loss": 0.5395, "step": 1974 }, { "epoch": 2.6408026755852845, "grad_norm": 1.1864131564861813, "learning_rate": 5.386945143183164e-06, "loss": 0.5806, "step": 1975 }, { "epoch": 2.6421404682274248, "grad_norm": 1.2280923684212108, "learning_rate": 5.382292315000448e-06, "loss": 0.5512, "step": 1976 }, { "epoch": 2.643478260869565, "grad_norm": 1.4446850799805027, "learning_rate": 5.377639153800229e-06, "loss": 0.6204, "step": 1977 }, { "epoch": 2.644816053511706, "grad_norm": 1.8237502175542395, "learning_rate": 5.37298566363591e-06, "loss": 0.5106, "step": 1978 }, { "epoch": 2.646153846153846, "grad_norm": 1.374976395463004, "learning_rate": 5.368331848561178e-06, "loss": 0.6377, "step": 1979 }, { "epoch": 2.6474916387959864, "grad_norm": 1.182240422965734, "learning_rate": 5.363677712630004e-06, "loss": 0.6455, "step": 1980 }, { "epoch": 2.648829431438127, "grad_norm": 1.3659586633486795, "learning_rate": 5.359023259896638e-06, "loss": 0.5777, "step": 1981 }, { "epoch": 2.650167224080268, "grad_norm": 0.9039581208376548, "learning_rate": 5.354368494415607e-06, "loss": 0.5376, "step": 1982 }, { "epoch": 2.651505016722408, "grad_norm": 1.007610764633232, "learning_rate": 5.34971342024171e-06, "loss": 0.5698, "step": 1983 }, { "epoch": 2.6528428093645484, "grad_norm": 0.9614420449759856, "learning_rate": 5.345058041430013e-06, "loss": 0.516, "step": 1984 }, { "epoch": 2.654180602006689, "grad_norm": 1.1755111896866182, "learning_rate": 5.3404023620358494e-06, "loss": 0.496, "step": 1985 }, { "epoch": 2.6555183946488294, "grad_norm": 1.1786906207774452, "learning_rate": 5.335746386114814e-06, "loss": 0.6294, "step": 1986 }, { "epoch": 2.6568561872909697, "grad_norm": 1.200344859573034, "learning_rate": 5.3310901177227615e-06, "loss": 0.6325, "step": 1987 }, { "epoch": 2.6581939799331105, "grad_norm": 0.9207058761476308, "learning_rate": 5.326433560915798e-06, "loss": 0.5474, "step": 1988 }, { "epoch": 2.6595317725752508, "grad_norm": 1.0381436565745867, "learning_rate": 5.321776719750283e-06, "loss": 0.6313, "step": 1989 }, { "epoch": 2.660869565217391, "grad_norm": 0.8393958596502755, "learning_rate": 5.317119598282823e-06, "loss": 0.4866, "step": 1990 }, { "epoch": 2.6622073578595318, "grad_norm": 1.1321523100922568, "learning_rate": 5.31246220057027e-06, "loss": 0.5314, "step": 1991 }, { "epoch": 2.6635451505016725, "grad_norm": 0.9406041834358588, "learning_rate": 5.3078045306697154e-06, "loss": 0.7245, "step": 1992 }, { "epoch": 2.664882943143813, "grad_norm": 0.8492830675522163, "learning_rate": 5.303146592638487e-06, "loss": 0.5982, "step": 1993 }, { "epoch": 2.666220735785953, "grad_norm": 1.0908905972460141, "learning_rate": 5.298488390534148e-06, "loss": 0.5831, "step": 1994 }, { "epoch": 2.667558528428094, "grad_norm": 0.9007009045222826, "learning_rate": 5.29382992841449e-06, "loss": 0.6283, "step": 1995 }, { "epoch": 2.668896321070234, "grad_norm": 0.864692317983929, "learning_rate": 5.289171210337531e-06, "loss": 0.5883, "step": 1996 }, { "epoch": 2.6702341137123744, "grad_norm": 0.7881105944616883, "learning_rate": 5.284512240361516e-06, "loss": 0.5807, "step": 1997 }, { "epoch": 2.671571906354515, "grad_norm": 0.8697030970894958, "learning_rate": 5.279853022544904e-06, "loss": 0.6177, "step": 1998 }, { "epoch": 2.6729096989966554, "grad_norm": 1.5860601427960714, "learning_rate": 5.275193560946372e-06, "loss": 0.5342, "step": 1999 }, { "epoch": 2.6742474916387957, "grad_norm": 0.8999175129949197, "learning_rate": 5.27053385962481e-06, "loss": 0.605, "step": 2000 }, { "epoch": 2.6755852842809364, "grad_norm": 0.8755213324284448, "learning_rate": 5.265873922639315e-06, "loss": 0.5843, "step": 2001 }, { "epoch": 2.676923076923077, "grad_norm": 1.0846690527550535, "learning_rate": 5.261213754049193e-06, "loss": 0.6458, "step": 2002 }, { "epoch": 2.6782608695652175, "grad_norm": 1.454273896089881, "learning_rate": 5.2565533579139484e-06, "loss": 0.6544, "step": 2003 }, { "epoch": 2.6795986622073578, "grad_norm": 0.9085219199866357, "learning_rate": 5.251892738293285e-06, "loss": 0.6016, "step": 2004 }, { "epoch": 2.6809364548494985, "grad_norm": 0.81121410157855, "learning_rate": 5.247231899247099e-06, "loss": 0.6496, "step": 2005 }, { "epoch": 2.682274247491639, "grad_norm": 1.193776395824329, "learning_rate": 5.242570844835484e-06, "loss": 0.6256, "step": 2006 }, { "epoch": 2.683612040133779, "grad_norm": 0.9157047987891033, "learning_rate": 5.237909579118713e-06, "loss": 0.4938, "step": 2007 }, { "epoch": 2.68494983277592, "grad_norm": 0.9226043497715147, "learning_rate": 5.233248106157248e-06, "loss": 0.5415, "step": 2008 }, { "epoch": 2.68628762541806, "grad_norm": 1.8554387671094046, "learning_rate": 5.228586430011732e-06, "loss": 0.6489, "step": 2009 }, { "epoch": 2.687625418060201, "grad_norm": 0.9827728489266289, "learning_rate": 5.223924554742982e-06, "loss": 0.6292, "step": 2010 }, { "epoch": 2.688963210702341, "grad_norm": 0.8423979140548966, "learning_rate": 5.21926248441199e-06, "loss": 0.5353, "step": 2011 }, { "epoch": 2.690301003344482, "grad_norm": 1.0442506637941174, "learning_rate": 5.21460022307992e-06, "loss": 0.5004, "step": 2012 }, { "epoch": 2.691638795986622, "grad_norm": 0.9461502861668688, "learning_rate": 5.209937774808098e-06, "loss": 0.5493, "step": 2013 }, { "epoch": 2.6929765886287624, "grad_norm": 0.8784964809838524, "learning_rate": 5.205275143658018e-06, "loss": 0.5056, "step": 2014 }, { "epoch": 2.694314381270903, "grad_norm": 0.932027474014103, "learning_rate": 5.2006123336913275e-06, "loss": 0.6193, "step": 2015 }, { "epoch": 2.6956521739130435, "grad_norm": 1.53473267894358, "learning_rate": 5.195949348969833e-06, "loss": 0.6269, "step": 2016 }, { "epoch": 2.6969899665551837, "grad_norm": 1.1577168393986497, "learning_rate": 5.191286193555496e-06, "loss": 0.5042, "step": 2017 }, { "epoch": 2.6983277591973245, "grad_norm": 0.8973289895800044, "learning_rate": 5.186622871510421e-06, "loss": 0.6781, "step": 2018 }, { "epoch": 2.6996655518394648, "grad_norm": 0.8002338042207578, "learning_rate": 5.181959386896862e-06, "loss": 0.4705, "step": 2019 }, { "epoch": 2.7010033444816055, "grad_norm": 0.9773805577565832, "learning_rate": 5.177295743777212e-06, "loss": 0.6078, "step": 2020 }, { "epoch": 2.702341137123746, "grad_norm": 1.5388694750303569, "learning_rate": 5.172631946214003e-06, "loss": 0.6829, "step": 2021 }, { "epoch": 2.7036789297658865, "grad_norm": 1.0409826079939015, "learning_rate": 5.167967998269902e-06, "loss": 0.6146, "step": 2022 }, { "epoch": 2.705016722408027, "grad_norm": 1.062507990130598, "learning_rate": 5.1633039040077046e-06, "loss": 0.5668, "step": 2023 }, { "epoch": 2.706354515050167, "grad_norm": 0.9010147705804853, "learning_rate": 5.15863966749034e-06, "loss": 0.649, "step": 2024 }, { "epoch": 2.707692307692308, "grad_norm": 1.2890134079984872, "learning_rate": 5.153975292780852e-06, "loss": 0.6607, "step": 2025 }, { "epoch": 2.709030100334448, "grad_norm": 1.2987674318968079, "learning_rate": 5.149310783942414e-06, "loss": 0.5124, "step": 2026 }, { "epoch": 2.7103678929765884, "grad_norm": 1.3013669834363044, "learning_rate": 5.144646145038311e-06, "loss": 0.5662, "step": 2027 }, { "epoch": 2.711705685618729, "grad_norm": 1.2028064167042873, "learning_rate": 5.139981380131943e-06, "loss": 0.6235, "step": 2028 }, { "epoch": 2.7130434782608694, "grad_norm": 0.9151823295247117, "learning_rate": 5.135316493286818e-06, "loss": 0.5881, "step": 2029 }, { "epoch": 2.71438127090301, "grad_norm": 0.9072613126098197, "learning_rate": 5.1306514885665524e-06, "loss": 0.5963, "step": 2030 }, { "epoch": 2.7157190635451505, "grad_norm": 0.9710028153998705, "learning_rate": 5.125986370034862e-06, "loss": 0.5153, "step": 2031 }, { "epoch": 2.717056856187291, "grad_norm": 0.8605256125706686, "learning_rate": 5.121321141755568e-06, "loss": 0.6791, "step": 2032 }, { "epoch": 2.7183946488294315, "grad_norm": 1.1202196028593971, "learning_rate": 5.116655807792581e-06, "loss": 0.6646, "step": 2033 }, { "epoch": 2.719732441471572, "grad_norm": 1.0134481949339167, "learning_rate": 5.111990372209906e-06, "loss": 0.6041, "step": 2034 }, { "epoch": 2.7210702341137125, "grad_norm": 0.8344662596198028, "learning_rate": 5.107324839071638e-06, "loss": 0.6618, "step": 2035 }, { "epoch": 2.722408026755853, "grad_norm": 0.9021649658639838, "learning_rate": 5.102659212441953e-06, "loss": 0.5051, "step": 2036 }, { "epoch": 2.723745819397993, "grad_norm": 0.8748608117866038, "learning_rate": 5.097993496385112e-06, "loss": 0.5007, "step": 2037 }, { "epoch": 2.725083612040134, "grad_norm": 1.4337729576240683, "learning_rate": 5.093327694965453e-06, "loss": 0.577, "step": 2038 }, { "epoch": 2.726421404682274, "grad_norm": 0.9697575906724798, "learning_rate": 5.088661812247389e-06, "loss": 0.6068, "step": 2039 }, { "epoch": 2.727759197324415, "grad_norm": 1.14987560158658, "learning_rate": 5.083995852295402e-06, "loss": 0.5628, "step": 2040 }, { "epoch": 2.729096989966555, "grad_norm": 0.782821566091427, "learning_rate": 5.07932981917404e-06, "loss": 0.5472, "step": 2041 }, { "epoch": 2.730434782608696, "grad_norm": 0.900033717799136, "learning_rate": 5.0746637169479205e-06, "loss": 0.5393, "step": 2042 }, { "epoch": 2.731772575250836, "grad_norm": 1.0178715951926072, "learning_rate": 5.069997549681718e-06, "loss": 0.4995, "step": 2043 }, { "epoch": 2.7331103678929765, "grad_norm": 0.8807117350400586, "learning_rate": 5.06533132144016e-06, "loss": 0.6397, "step": 2044 }, { "epoch": 2.734448160535117, "grad_norm": 1.2121222961592455, "learning_rate": 5.060665036288034e-06, "loss": 0.6495, "step": 2045 }, { "epoch": 2.7357859531772575, "grad_norm": 0.8066839767608923, "learning_rate": 5.0559986982901695e-06, "loss": 0.5721, "step": 2046 }, { "epoch": 2.7371237458193978, "grad_norm": 1.0638562617833194, "learning_rate": 5.05133231151145e-06, "loss": 0.6064, "step": 2047 }, { "epoch": 2.7384615384615385, "grad_norm": 1.0317802646732233, "learning_rate": 5.046665880016795e-06, "loss": 0.563, "step": 2048 }, { "epoch": 2.739799331103679, "grad_norm": 1.1246998306415048, "learning_rate": 5.041999407871168e-06, "loss": 0.6108, "step": 2049 }, { "epoch": 2.7411371237458195, "grad_norm": 1.075917853280556, "learning_rate": 5.037332899139563e-06, "loss": 0.6249, "step": 2050 }, { "epoch": 2.74247491638796, "grad_norm": 1.1388656981695928, "learning_rate": 5.0326663578870095e-06, "loss": 0.5425, "step": 2051 }, { "epoch": 2.7438127090301005, "grad_norm": 1.0239170027836961, "learning_rate": 5.0279997881785635e-06, "loss": 0.4665, "step": 2052 }, { "epoch": 2.745150501672241, "grad_norm": 0.8307971581122151, "learning_rate": 5.0233331940793074e-06, "loss": 0.5446, "step": 2053 }, { "epoch": 2.746488294314381, "grad_norm": 1.21890193841614, "learning_rate": 5.018666579654342e-06, "loss": 0.6012, "step": 2054 }, { "epoch": 2.747826086956522, "grad_norm": 1.3068537864993321, "learning_rate": 5.01399994896879e-06, "loss": 0.6689, "step": 2055 }, { "epoch": 2.749163879598662, "grad_norm": 0.9380993092418158, "learning_rate": 5.009333306087784e-06, "loss": 0.5478, "step": 2056 }, { "epoch": 2.7505016722408024, "grad_norm": 1.0377963938781782, "learning_rate": 5.00466665507647e-06, "loss": 0.5949, "step": 2057 }, { "epoch": 2.751839464882943, "grad_norm": 0.9437363600457082, "learning_rate": 5e-06, "loss": 0.6172, "step": 2058 }, { "epoch": 2.7531772575250835, "grad_norm": 1.417797510392653, "learning_rate": 4.995333344923531e-06, "loss": 0.5892, "step": 2059 }, { "epoch": 2.754515050167224, "grad_norm": 1.2226122788713263, "learning_rate": 4.990666693912218e-06, "loss": 0.5497, "step": 2060 }, { "epoch": 2.7558528428093645, "grad_norm": 1.089756497437287, "learning_rate": 4.986000051031212e-06, "loss": 0.5027, "step": 2061 }, { "epoch": 2.7571906354515052, "grad_norm": 1.6387661983643826, "learning_rate": 4.9813334203456595e-06, "loss": 0.4958, "step": 2062 }, { "epoch": 2.7585284280936455, "grad_norm": 1.0611816199898967, "learning_rate": 4.976666805920694e-06, "loss": 0.574, "step": 2063 }, { "epoch": 2.759866220735786, "grad_norm": 1.001572515898327, "learning_rate": 4.972000211821438e-06, "loss": 0.5507, "step": 2064 }, { "epoch": 2.7612040133779265, "grad_norm": 1.4057230701482746, "learning_rate": 4.967333642112992e-06, "loss": 0.6625, "step": 2065 }, { "epoch": 2.762541806020067, "grad_norm": 1.3543636860612396, "learning_rate": 4.9626671008604385e-06, "loss": 0.6031, "step": 2066 }, { "epoch": 2.763879598662207, "grad_norm": 0.8387513533413449, "learning_rate": 4.958000592128834e-06, "loss": 0.5884, "step": 2067 }, { "epoch": 2.765217391304348, "grad_norm": 1.2023422469610312, "learning_rate": 4.953334119983206e-06, "loss": 0.5592, "step": 2068 }, { "epoch": 2.766555183946488, "grad_norm": 1.166661143275948, "learning_rate": 4.948667688488552e-06, "loss": 0.6466, "step": 2069 }, { "epoch": 2.767892976588629, "grad_norm": 1.1176032377214966, "learning_rate": 4.944001301709832e-06, "loss": 0.5977, "step": 2070 }, { "epoch": 2.769230769230769, "grad_norm": 1.0720060898339636, "learning_rate": 4.9393349637119695e-06, "loss": 0.5057, "step": 2071 }, { "epoch": 2.77056856187291, "grad_norm": 1.1820334059775977, "learning_rate": 4.934668678559842e-06, "loss": 0.6165, "step": 2072 }, { "epoch": 2.77190635451505, "grad_norm": 0.8915205549475206, "learning_rate": 4.930002450318282e-06, "loss": 0.619, "step": 2073 }, { "epoch": 2.7732441471571905, "grad_norm": 0.9903302662619633, "learning_rate": 4.925336283052079e-06, "loss": 0.5959, "step": 2074 }, { "epoch": 2.774581939799331, "grad_norm": 1.095280640007386, "learning_rate": 4.9206701808259605e-06, "loss": 0.611, "step": 2075 }, { "epoch": 2.7759197324414715, "grad_norm": 1.108190519349061, "learning_rate": 4.9160041477046e-06, "loss": 0.546, "step": 2076 }, { "epoch": 2.777257525083612, "grad_norm": 0.8421677435388946, "learning_rate": 4.911338187752612e-06, "loss": 0.517, "step": 2077 }, { "epoch": 2.7785953177257525, "grad_norm": 0.9937976852910495, "learning_rate": 4.906672305034548e-06, "loss": 0.5521, "step": 2078 }, { "epoch": 2.779933110367893, "grad_norm": 1.2531988790969761, "learning_rate": 4.9020065036148885e-06, "loss": 0.5844, "step": 2079 }, { "epoch": 2.7812709030100335, "grad_norm": 0.9220711989828273, "learning_rate": 4.8973407875580485e-06, "loss": 0.5599, "step": 2080 }, { "epoch": 2.782608695652174, "grad_norm": 0.8319620481005386, "learning_rate": 4.892675160928364e-06, "loss": 0.5808, "step": 2081 }, { "epoch": 2.7839464882943146, "grad_norm": 1.339133661831721, "learning_rate": 4.888009627790095e-06, "loss": 0.581, "step": 2082 }, { "epoch": 2.785284280936455, "grad_norm": 1.2031861792283889, "learning_rate": 4.8833441922074194e-06, "loss": 0.5457, "step": 2083 }, { "epoch": 2.786622073578595, "grad_norm": 1.2725489643756243, "learning_rate": 4.878678858244432e-06, "loss": 0.6418, "step": 2084 }, { "epoch": 2.787959866220736, "grad_norm": 0.8643787462377233, "learning_rate": 4.874013629965138e-06, "loss": 0.5117, "step": 2085 }, { "epoch": 2.789297658862876, "grad_norm": 1.180397939206015, "learning_rate": 4.869348511433449e-06, "loss": 0.6936, "step": 2086 }, { "epoch": 2.7906354515050165, "grad_norm": 1.227574435877477, "learning_rate": 4.864683506713183e-06, "loss": 0.657, "step": 2087 }, { "epoch": 2.791973244147157, "grad_norm": 1.1832095934152427, "learning_rate": 4.860018619868058e-06, "loss": 0.6206, "step": 2088 }, { "epoch": 2.793311036789298, "grad_norm": 1.1421837692812469, "learning_rate": 4.85535385496169e-06, "loss": 0.5469, "step": 2089 }, { "epoch": 2.794648829431438, "grad_norm": 1.07916010708475, "learning_rate": 4.850689216057587e-06, "loss": 0.6382, "step": 2090 }, { "epoch": 2.7959866220735785, "grad_norm": 1.3065895451919691, "learning_rate": 4.846024707219149e-06, "loss": 0.6859, "step": 2091 }, { "epoch": 2.7973244147157192, "grad_norm": 1.0047199062172782, "learning_rate": 4.841360332509663e-06, "loss": 0.608, "step": 2092 }, { "epoch": 2.7986622073578595, "grad_norm": 1.0433946467952522, "learning_rate": 4.836696095992296e-06, "loss": 0.5668, "step": 2093 }, { "epoch": 2.8, "grad_norm": 1.2948355183563725, "learning_rate": 4.8320320017301e-06, "loss": 0.54, "step": 2094 }, { "epoch": 2.8013377926421406, "grad_norm": 1.3910839335886336, "learning_rate": 4.827368053785999e-06, "loss": 0.697, "step": 2095 }, { "epoch": 2.802675585284281, "grad_norm": 1.1850979506357058, "learning_rate": 4.82270425622279e-06, "loss": 0.6252, "step": 2096 }, { "epoch": 2.804013377926421, "grad_norm": 0.8209442420899352, "learning_rate": 4.818040613103139e-06, "loss": 0.5587, "step": 2097 }, { "epoch": 2.805351170568562, "grad_norm": 0.9084240609940615, "learning_rate": 4.81337712848958e-06, "loss": 0.5118, "step": 2098 }, { "epoch": 2.8066889632107026, "grad_norm": 1.0696173987062316, "learning_rate": 4.808713806444506e-06, "loss": 0.6162, "step": 2099 }, { "epoch": 2.808026755852843, "grad_norm": 0.87273237585413, "learning_rate": 4.804050651030168e-06, "loss": 0.6033, "step": 2100 }, { "epoch": 2.809364548494983, "grad_norm": 0.8177888398596578, "learning_rate": 4.799387666308675e-06, "loss": 0.5948, "step": 2101 }, { "epoch": 2.810702341137124, "grad_norm": 1.3951847601899336, "learning_rate": 4.794724856341985e-06, "loss": 0.5097, "step": 2102 }, { "epoch": 2.812040133779264, "grad_norm": 1.0360612565977143, "learning_rate": 4.790062225191902e-06, "loss": 0.6864, "step": 2103 }, { "epoch": 2.8133779264214045, "grad_norm": 1.179478065458777, "learning_rate": 4.785399776920081e-06, "loss": 0.6018, "step": 2104 }, { "epoch": 2.8147157190635452, "grad_norm": 1.2887835277685575, "learning_rate": 4.780737515588011e-06, "loss": 0.4865, "step": 2105 }, { "epoch": 2.8160535117056855, "grad_norm": 0.7928142267262915, "learning_rate": 4.77607544525702e-06, "loss": 0.5578, "step": 2106 }, { "epoch": 2.8173913043478263, "grad_norm": 1.1279862958052451, "learning_rate": 4.77141356998827e-06, "loss": 0.6346, "step": 2107 }, { "epoch": 2.8187290969899665, "grad_norm": 0.9373306389049352, "learning_rate": 4.7667518938427534e-06, "loss": 0.6354, "step": 2108 }, { "epoch": 2.8200668896321073, "grad_norm": 0.8733032502174533, "learning_rate": 4.762090420881289e-06, "loss": 0.5633, "step": 2109 }, { "epoch": 2.8214046822742476, "grad_norm": 0.9196502123006329, "learning_rate": 4.757429155164518e-06, "loss": 0.5062, "step": 2110 }, { "epoch": 2.822742474916388, "grad_norm": 1.2262005401946583, "learning_rate": 4.752768100752902e-06, "loss": 0.5392, "step": 2111 }, { "epoch": 2.8240802675585286, "grad_norm": 1.8033962315275731, "learning_rate": 4.748107261706716e-06, "loss": 0.6076, "step": 2112 }, { "epoch": 2.825418060200669, "grad_norm": 0.8232941811001306, "learning_rate": 4.7434466420860515e-06, "loss": 0.5585, "step": 2113 }, { "epoch": 2.826755852842809, "grad_norm": 0.7681730940698603, "learning_rate": 4.7387862459508074e-06, "loss": 0.4815, "step": 2114 }, { "epoch": 2.82809364548495, "grad_norm": 0.9164496815775697, "learning_rate": 4.734126077360685e-06, "loss": 0.5584, "step": 2115 }, { "epoch": 2.82943143812709, "grad_norm": 0.9501042046858824, "learning_rate": 4.729466140375192e-06, "loss": 0.5848, "step": 2116 }, { "epoch": 2.830769230769231, "grad_norm": 0.998313718344, "learning_rate": 4.724806439053629e-06, "loss": 0.5868, "step": 2117 }, { "epoch": 2.832107023411371, "grad_norm": 0.920322424375464, "learning_rate": 4.720146977455098e-06, "loss": 0.62, "step": 2118 }, { "epoch": 2.833444816053512, "grad_norm": 0.9555316602160594, "learning_rate": 4.715487759638486e-06, "loss": 0.5715, "step": 2119 }, { "epoch": 2.8347826086956522, "grad_norm": 1.166656078451315, "learning_rate": 4.7108287896624695e-06, "loss": 0.6183, "step": 2120 }, { "epoch": 2.8361204013377925, "grad_norm": 0.8302053382438687, "learning_rate": 4.706170071585513e-06, "loss": 0.5247, "step": 2121 }, { "epoch": 2.8374581939799333, "grad_norm": 0.9152876930268828, "learning_rate": 4.7015116094658544e-06, "loss": 0.4215, "step": 2122 }, { "epoch": 2.8387959866220736, "grad_norm": 1.1064316542984916, "learning_rate": 4.6968534073615145e-06, "loss": 0.535, "step": 2123 }, { "epoch": 2.840133779264214, "grad_norm": 0.8623068871930764, "learning_rate": 4.692195469330286e-06, "loss": 0.509, "step": 2124 }, { "epoch": 2.8414715719063546, "grad_norm": 1.3092149324961735, "learning_rate": 4.687537799429731e-06, "loss": 0.6349, "step": 2125 }, { "epoch": 2.842809364548495, "grad_norm": 1.2046744685846054, "learning_rate": 4.682880401717178e-06, "loss": 0.7453, "step": 2126 }, { "epoch": 2.8441471571906356, "grad_norm": 1.05240455473781, "learning_rate": 4.678223280249718e-06, "loss": 0.5775, "step": 2127 }, { "epoch": 2.845484949832776, "grad_norm": 0.8358150415865108, "learning_rate": 4.673566439084204e-06, "loss": 0.5895, "step": 2128 }, { "epoch": 2.8468227424749166, "grad_norm": 0.8790516205033388, "learning_rate": 4.66890988227724e-06, "loss": 0.4581, "step": 2129 }, { "epoch": 2.848160535117057, "grad_norm": 0.7714612805814635, "learning_rate": 4.664253613885187e-06, "loss": 0.6173, "step": 2130 }, { "epoch": 2.849498327759197, "grad_norm": 1.5858681965145343, "learning_rate": 4.659597637964153e-06, "loss": 0.6665, "step": 2131 }, { "epoch": 2.850836120401338, "grad_norm": 1.3627405952194034, "learning_rate": 4.65494195856999e-06, "loss": 0.7479, "step": 2132 }, { "epoch": 2.8521739130434782, "grad_norm": 0.9704832998601701, "learning_rate": 4.650286579758291e-06, "loss": 0.5871, "step": 2133 }, { "epoch": 2.8535117056856185, "grad_norm": 0.8163247293146708, "learning_rate": 4.645631505584393e-06, "loss": 0.5322, "step": 2134 }, { "epoch": 2.8548494983277592, "grad_norm": 1.2209387220363292, "learning_rate": 4.640976740103363e-06, "loss": 0.7056, "step": 2135 }, { "epoch": 2.8561872909698995, "grad_norm": 0.9357350688472174, "learning_rate": 4.636322287369997e-06, "loss": 0.5131, "step": 2136 }, { "epoch": 2.8575250836120403, "grad_norm": 1.0081632395064035, "learning_rate": 4.6316681514388235e-06, "loss": 0.5331, "step": 2137 }, { "epoch": 2.8588628762541806, "grad_norm": 1.0634717973508458, "learning_rate": 4.6270143363640914e-06, "loss": 0.5761, "step": 2138 }, { "epoch": 2.8602006688963213, "grad_norm": 1.156367604273604, "learning_rate": 4.622360846199772e-06, "loss": 0.6899, "step": 2139 }, { "epoch": 2.8615384615384616, "grad_norm": 0.9853242465729533, "learning_rate": 4.617707684999554e-06, "loss": 0.5452, "step": 2140 }, { "epoch": 2.862876254180602, "grad_norm": 1.048297664662054, "learning_rate": 4.613054856816837e-06, "loss": 0.4972, "step": 2141 }, { "epoch": 2.8642140468227426, "grad_norm": 1.1398388411582254, "learning_rate": 4.608402365704734e-06, "loss": 0.4816, "step": 2142 }, { "epoch": 2.865551839464883, "grad_norm": 1.406893159885874, "learning_rate": 4.603750215716057e-06, "loss": 0.5989, "step": 2143 }, { "epoch": 2.866889632107023, "grad_norm": 1.4206849973764781, "learning_rate": 4.599098410903334e-06, "loss": 0.6814, "step": 2144 }, { "epoch": 2.868227424749164, "grad_norm": 1.4718344946892945, "learning_rate": 4.594446955318781e-06, "loss": 0.541, "step": 2145 }, { "epoch": 2.869565217391304, "grad_norm": 1.0747024393829354, "learning_rate": 4.589795853014313e-06, "loss": 0.5979, "step": 2146 }, { "epoch": 2.870903010033445, "grad_norm": 0.8699995316040057, "learning_rate": 4.585145108041538e-06, "loss": 0.5897, "step": 2147 }, { "epoch": 2.8722408026755852, "grad_norm": 1.034298854233283, "learning_rate": 4.580494724451752e-06, "loss": 0.5518, "step": 2148 }, { "epoch": 2.873578595317726, "grad_norm": 1.5716370057195108, "learning_rate": 4.575844706295938e-06, "loss": 0.6178, "step": 2149 }, { "epoch": 2.8749163879598663, "grad_norm": 1.5020513974564376, "learning_rate": 4.5711950576247585e-06, "loss": 0.629, "step": 2150 }, { "epoch": 2.8762541806020065, "grad_norm": 1.1490530749917591, "learning_rate": 4.566545782488554e-06, "loss": 0.6247, "step": 2151 }, { "epoch": 2.8775919732441473, "grad_norm": 1.0966051615122723, "learning_rate": 4.5618968849373415e-06, "loss": 0.5413, "step": 2152 }, { "epoch": 2.8789297658862876, "grad_norm": 1.408342869088005, "learning_rate": 4.557248369020806e-06, "loss": 0.622, "step": 2153 }, { "epoch": 2.880267558528428, "grad_norm": 1.062223462007811, "learning_rate": 4.552600238788306e-06, "loss": 0.6069, "step": 2154 }, { "epoch": 2.8816053511705686, "grad_norm": 1.0394842472411006, "learning_rate": 4.5479524982888575e-06, "loss": 0.5729, "step": 2155 }, { "epoch": 2.882943143812709, "grad_norm": 0.9275795319121191, "learning_rate": 4.543305151571141e-06, "loss": 0.5909, "step": 2156 }, { "epoch": 2.8842809364548496, "grad_norm": 0.958078611874037, "learning_rate": 4.53865820268349e-06, "loss": 0.4882, "step": 2157 }, { "epoch": 2.88561872909699, "grad_norm": 0.9354590235473943, "learning_rate": 4.534011655673898e-06, "loss": 0.5814, "step": 2158 }, { "epoch": 2.8869565217391306, "grad_norm": 1.2663120255935394, "learning_rate": 4.529365514590002e-06, "loss": 0.6023, "step": 2159 }, { "epoch": 2.888294314381271, "grad_norm": 0.8324597491796409, "learning_rate": 4.524719783479088e-06, "loss": 0.6044, "step": 2160 }, { "epoch": 2.8896321070234112, "grad_norm": 0.8856291462547768, "learning_rate": 4.5200744663880856e-06, "loss": 0.6166, "step": 2161 }, { "epoch": 2.890969899665552, "grad_norm": 1.1147454774509022, "learning_rate": 4.515429567363562e-06, "loss": 0.6138, "step": 2162 }, { "epoch": 2.8923076923076922, "grad_norm": 1.237654980571989, "learning_rate": 4.510785090451719e-06, "loss": 0.5001, "step": 2163 }, { "epoch": 2.8936454849498325, "grad_norm": 1.2144393027182658, "learning_rate": 4.506141039698398e-06, "loss": 0.5716, "step": 2164 }, { "epoch": 2.8949832775919733, "grad_norm": 1.1577226616446807, "learning_rate": 4.501497419149062e-06, "loss": 0.5708, "step": 2165 }, { "epoch": 2.8963210702341136, "grad_norm": 0.9679404641939412, "learning_rate": 4.4968542328488e-06, "loss": 0.5728, "step": 2166 }, { "epoch": 2.8976588628762543, "grad_norm": 0.9679444101318684, "learning_rate": 4.492211484842324e-06, "loss": 0.6142, "step": 2167 }, { "epoch": 2.8989966555183946, "grad_norm": 1.1693183291559908, "learning_rate": 4.4875691791739655e-06, "loss": 0.5117, "step": 2168 }, { "epoch": 2.9003344481605353, "grad_norm": 0.9920366068700693, "learning_rate": 4.482927319887669e-06, "loss": 0.6262, "step": 2169 }, { "epoch": 2.9016722408026756, "grad_norm": 0.879027258612733, "learning_rate": 4.478285911026989e-06, "loss": 0.4378, "step": 2170 }, { "epoch": 2.903010033444816, "grad_norm": 0.953524815094448, "learning_rate": 4.4736449566350924e-06, "loss": 0.611, "step": 2171 }, { "epoch": 2.9043478260869566, "grad_norm": 1.1929444534227307, "learning_rate": 4.469004460754743e-06, "loss": 0.5696, "step": 2172 }, { "epoch": 2.905685618729097, "grad_norm": 1.1836735586344362, "learning_rate": 4.46436442742831e-06, "loss": 0.5668, "step": 2173 }, { "epoch": 2.907023411371237, "grad_norm": 0.9609667685008744, "learning_rate": 4.45972486069776e-06, "loss": 0.6111, "step": 2174 }, { "epoch": 2.908361204013378, "grad_norm": 1.035819001075089, "learning_rate": 4.455085764604653e-06, "loss": 0.5902, "step": 2175 }, { "epoch": 2.9096989966555182, "grad_norm": 1.033387671745112, "learning_rate": 4.450447143190136e-06, "loss": 0.6227, "step": 2176 }, { "epoch": 2.911036789297659, "grad_norm": 1.4411231048097455, "learning_rate": 4.445809000494945e-06, "loss": 0.5689, "step": 2177 }, { "epoch": 2.9123745819397993, "grad_norm": 0.9155502916289948, "learning_rate": 4.441171340559399e-06, "loss": 0.5816, "step": 2178 }, { "epoch": 2.91371237458194, "grad_norm": 0.8598864542763828, "learning_rate": 4.436534167423395e-06, "loss": 0.6075, "step": 2179 }, { "epoch": 2.9150501672240803, "grad_norm": 0.9461660152897475, "learning_rate": 4.431897485126408e-06, "loss": 0.5779, "step": 2180 }, { "epoch": 2.9163879598662206, "grad_norm": 0.7819771968461311, "learning_rate": 4.427261297707482e-06, "loss": 0.4997, "step": 2181 }, { "epoch": 2.9177257525083613, "grad_norm": 0.9764699422499212, "learning_rate": 4.422625609205235e-06, "loss": 0.5678, "step": 2182 }, { "epoch": 2.9190635451505016, "grad_norm": 0.8593459328465577, "learning_rate": 4.417990423657845e-06, "loss": 0.6349, "step": 2183 }, { "epoch": 2.920401337792642, "grad_norm": 1.3243224181000994, "learning_rate": 4.413355745103057e-06, "loss": 0.5146, "step": 2184 }, { "epoch": 2.9217391304347826, "grad_norm": 1.5159694707730809, "learning_rate": 4.40872157757817e-06, "loss": 0.684, "step": 2185 }, { "epoch": 2.9230769230769234, "grad_norm": 0.8515993974081016, "learning_rate": 4.404087925120041e-06, "loss": 0.6429, "step": 2186 }, { "epoch": 2.9244147157190636, "grad_norm": 1.4667991003647527, "learning_rate": 4.399454791765076e-06, "loss": 0.6858, "step": 2187 }, { "epoch": 2.925752508361204, "grad_norm": 1.0302793289747565, "learning_rate": 4.3948221815492294e-06, "loss": 0.583, "step": 2188 }, { "epoch": 2.9270903010033447, "grad_norm": 0.8955462409395388, "learning_rate": 4.390190098508001e-06, "loss": 0.5079, "step": 2189 }, { "epoch": 2.928428093645485, "grad_norm": 0.8593208115816321, "learning_rate": 4.3855585466764305e-06, "loss": 0.4622, "step": 2190 }, { "epoch": 2.9297658862876252, "grad_norm": 0.785425859621842, "learning_rate": 4.3809275300890956e-06, "loss": 0.5126, "step": 2191 }, { "epoch": 2.931103678929766, "grad_norm": 0.9962707932172681, "learning_rate": 4.376297052780106e-06, "loss": 0.6293, "step": 2192 }, { "epoch": 2.9324414715719063, "grad_norm": 1.068737306705739, "learning_rate": 4.371667118783101e-06, "loss": 0.5586, "step": 2193 }, { "epoch": 2.9337792642140466, "grad_norm": 0.9128359838275646, "learning_rate": 4.367037732131254e-06, "loss": 0.5255, "step": 2194 }, { "epoch": 2.9351170568561873, "grad_norm": 0.8645269164041491, "learning_rate": 4.362408896857251e-06, "loss": 0.5383, "step": 2195 }, { "epoch": 2.936454849498328, "grad_norm": 0.8580795098254259, "learning_rate": 4.357780616993305e-06, "loss": 0.5451, "step": 2196 }, { "epoch": 2.9377926421404683, "grad_norm": 0.9110769263943378, "learning_rate": 4.353152896571143e-06, "loss": 0.6402, "step": 2197 }, { "epoch": 2.9391304347826086, "grad_norm": 0.9924642406241925, "learning_rate": 4.348525739622003e-06, "loss": 0.6178, "step": 2198 }, { "epoch": 2.9404682274247493, "grad_norm": 1.1912050990644882, "learning_rate": 4.343899150176635e-06, "loss": 0.5706, "step": 2199 }, { "epoch": 2.9418060200668896, "grad_norm": 1.2391335518758408, "learning_rate": 4.339273132265294e-06, "loss": 0.6121, "step": 2200 }, { "epoch": 2.94314381270903, "grad_norm": 1.8660977723606385, "learning_rate": 4.334647689917734e-06, "loss": 0.5278, "step": 2201 }, { "epoch": 2.9444816053511706, "grad_norm": 1.2171331598947714, "learning_rate": 4.3300228271632105e-06, "loss": 0.595, "step": 2202 }, { "epoch": 2.945819397993311, "grad_norm": 0.7864183676996067, "learning_rate": 4.325398548030473e-06, "loss": 0.555, "step": 2203 }, { "epoch": 2.9471571906354512, "grad_norm": 0.9792378161733277, "learning_rate": 4.320774856547767e-06, "loss": 0.6006, "step": 2204 }, { "epoch": 2.948494983277592, "grad_norm": 1.3591253223822592, "learning_rate": 4.316151756742821e-06, "loss": 0.5744, "step": 2205 }, { "epoch": 2.9498327759197327, "grad_norm": 1.000891732838553, "learning_rate": 4.311529252642848e-06, "loss": 0.6823, "step": 2206 }, { "epoch": 2.951170568561873, "grad_norm": 0.857785951635024, "learning_rate": 4.306907348274545e-06, "loss": 0.6377, "step": 2207 }, { "epoch": 2.9525083612040133, "grad_norm": 1.0298887247213802, "learning_rate": 4.302286047664083e-06, "loss": 0.5786, "step": 2208 }, { "epoch": 2.953846153846154, "grad_norm": 0.900458667932252, "learning_rate": 4.2976653548371115e-06, "loss": 0.5793, "step": 2209 }, { "epoch": 2.9551839464882943, "grad_norm": 0.8437226632785992, "learning_rate": 4.293045273818748e-06, "loss": 0.6196, "step": 2210 }, { "epoch": 2.9565217391304346, "grad_norm": 0.8051199589714974, "learning_rate": 4.2884258086335755e-06, "loss": 0.5178, "step": 2211 }, { "epoch": 2.9578595317725753, "grad_norm": 1.1675599458985138, "learning_rate": 4.283806963305644e-06, "loss": 0.429, "step": 2212 }, { "epoch": 2.9591973244147156, "grad_norm": 1.5627643628857142, "learning_rate": 4.27918874185846e-06, "loss": 0.6097, "step": 2213 }, { "epoch": 2.9605351170568563, "grad_norm": 0.961642941527808, "learning_rate": 4.274571148314991e-06, "loss": 0.6272, "step": 2214 }, { "epoch": 2.9618729096989966, "grad_norm": 1.1589673837920837, "learning_rate": 4.269954186697654e-06, "loss": 0.63, "step": 2215 }, { "epoch": 2.9632107023411374, "grad_norm": 1.0110549624740601, "learning_rate": 4.265337861028316e-06, "loss": 0.4968, "step": 2216 }, { "epoch": 2.9645484949832777, "grad_norm": 0.8894326231083023, "learning_rate": 4.26072217532829e-06, "loss": 0.5976, "step": 2217 }, { "epoch": 2.965886287625418, "grad_norm": 1.1003647894642683, "learning_rate": 4.256107133618333e-06, "loss": 0.5607, "step": 2218 }, { "epoch": 2.9672240802675587, "grad_norm": 1.6036267968392366, "learning_rate": 4.251492739918641e-06, "loss": 0.5079, "step": 2219 }, { "epoch": 2.968561872909699, "grad_norm": 1.352299286937226, "learning_rate": 4.2468789982488415e-06, "loss": 0.5304, "step": 2220 }, { "epoch": 2.9698996655518393, "grad_norm": 1.5178049182260713, "learning_rate": 4.242265912628e-06, "loss": 0.5158, "step": 2221 }, { "epoch": 2.97123745819398, "grad_norm": 1.1617104839959755, "learning_rate": 4.2376534870746054e-06, "loss": 0.6272, "step": 2222 }, { "epoch": 2.9725752508361203, "grad_norm": 1.3233337049233789, "learning_rate": 4.233041725606573e-06, "loss": 0.6079, "step": 2223 }, { "epoch": 2.973913043478261, "grad_norm": 1.5296029423521458, "learning_rate": 4.228430632241244e-06, "loss": 0.6022, "step": 2224 }, { "epoch": 2.9752508361204013, "grad_norm": 0.7562260805763037, "learning_rate": 4.223820210995372e-06, "loss": 0.5344, "step": 2225 }, { "epoch": 2.976588628762542, "grad_norm": 1.0083402778511645, "learning_rate": 4.219210465885127e-06, "loss": 0.54, "step": 2226 }, { "epoch": 2.9779264214046823, "grad_norm": 1.033318841438474, "learning_rate": 4.21460140092609e-06, "loss": 0.5908, "step": 2227 }, { "epoch": 2.9792642140468226, "grad_norm": 1.1466372005436745, "learning_rate": 4.209993020133251e-06, "loss": 0.6415, "step": 2228 }, { "epoch": 2.9806020066889634, "grad_norm": 0.9917355652593777, "learning_rate": 4.205385327521002e-06, "loss": 0.563, "step": 2229 }, { "epoch": 2.9819397993311036, "grad_norm": 1.0926528379595715, "learning_rate": 4.200778327103134e-06, "loss": 0.6386, "step": 2230 }, { "epoch": 2.983277591973244, "grad_norm": 1.1106176405036248, "learning_rate": 4.19617202289284e-06, "loss": 0.5606, "step": 2231 }, { "epoch": 2.9846153846153847, "grad_norm": 1.3590909279727088, "learning_rate": 4.191566418902701e-06, "loss": 0.6675, "step": 2232 }, { "epoch": 2.985953177257525, "grad_norm": 1.2973673628478086, "learning_rate": 4.1869615191446925e-06, "loss": 0.6315, "step": 2233 }, { "epoch": 2.9872909698996657, "grad_norm": 1.0979860245941127, "learning_rate": 4.182357327630175e-06, "loss": 0.5763, "step": 2234 }, { "epoch": 2.988628762541806, "grad_norm": 1.0717348651224665, "learning_rate": 4.177753848369892e-06, "loss": 0.7248, "step": 2235 }, { "epoch": 2.9899665551839467, "grad_norm": 1.17275780258963, "learning_rate": 4.173151085373966e-06, "loss": 0.5995, "step": 2236 }, { "epoch": 2.991304347826087, "grad_norm": 0.9144109258354429, "learning_rate": 4.168549042651896e-06, "loss": 0.6678, "step": 2237 }, { "epoch": 2.9926421404682273, "grad_norm": 0.9857333766388504, "learning_rate": 4.163947724212553e-06, "loss": 0.6281, "step": 2238 }, { "epoch": 2.993979933110368, "grad_norm": 0.948152285873318, "learning_rate": 4.159347134064177e-06, "loss": 0.6088, "step": 2239 }, { "epoch": 2.9953177257525083, "grad_norm": 1.1483335982110643, "learning_rate": 4.154747276214377e-06, "loss": 0.5954, "step": 2240 }, { "epoch": 2.9966555183946486, "grad_norm": 0.9859136601100781, "learning_rate": 4.1501481546701185e-06, "loss": 0.6024, "step": 2241 }, { "epoch": 2.9979933110367893, "grad_norm": 1.0200072924925925, "learning_rate": 4.145549773437728e-06, "loss": 0.6773, "step": 2242 }, { "epoch": 2.9993311036789296, "grad_norm": 0.9973825819157945, "learning_rate": 4.140952136522889e-06, "loss": 0.5354, "step": 2243 }, { "epoch": 3.0, "grad_norm": 0.9973825819157945, "learning_rate": 4.136355247930636e-06, "loss": 0.5757, "step": 2244 }, { "epoch": 3.0013377926421403, "grad_norm": 1.7055135251167246, "learning_rate": 4.131759111665349e-06, "loss": 0.5457, "step": 2245 }, { "epoch": 3.002675585284281, "grad_norm": 1.3041362665776413, "learning_rate": 4.127163731730755e-06, "loss": 0.5293, "step": 2246 }, { "epoch": 3.0040133779264213, "grad_norm": 0.9461431562194924, "learning_rate": 4.12256911212992e-06, "loss": 0.4981, "step": 2247 }, { "epoch": 3.005351170568562, "grad_norm": 1.1279051356473437, "learning_rate": 4.117975256865252e-06, "loss": 0.4748, "step": 2248 }, { "epoch": 3.0066889632107023, "grad_norm": 1.1211916022410717, "learning_rate": 4.113382169938488e-06, "loss": 0.4894, "step": 2249 }, { "epoch": 3.0080267558528426, "grad_norm": 0.7739077561708169, "learning_rate": 4.108789855350699e-06, "loss": 0.5697, "step": 2250 }, { "epoch": 3.0093645484949834, "grad_norm": 1.2968730987171284, "learning_rate": 4.104198317102283e-06, "loss": 0.5343, "step": 2251 }, { "epoch": 3.0107023411371236, "grad_norm": 1.368384597998694, "learning_rate": 4.099607559192959e-06, "loss": 0.4777, "step": 2252 }, { "epoch": 3.0120401337792644, "grad_norm": 1.108631984701316, "learning_rate": 4.095017585621767e-06, "loss": 0.436, "step": 2253 }, { "epoch": 3.0133779264214047, "grad_norm": 1.2206151049186817, "learning_rate": 4.090428400387071e-06, "loss": 0.5401, "step": 2254 }, { "epoch": 3.014715719063545, "grad_norm": 0.9740650894661878, "learning_rate": 4.0858400074865364e-06, "loss": 0.526, "step": 2255 }, { "epoch": 3.0160535117056857, "grad_norm": 0.9937867631062434, "learning_rate": 4.081252410917148e-06, "loss": 0.5482, "step": 2256 }, { "epoch": 3.017391304347826, "grad_norm": 1.429700196732197, "learning_rate": 4.076665614675191e-06, "loss": 0.4912, "step": 2257 }, { "epoch": 3.0187290969899667, "grad_norm": 1.1957879026203113, "learning_rate": 4.0720796227562585e-06, "loss": 0.5898, "step": 2258 }, { "epoch": 3.020066889632107, "grad_norm": 0.9640032717459104, "learning_rate": 4.067494439155236e-06, "loss": 0.4862, "step": 2259 }, { "epoch": 3.0214046822742473, "grad_norm": 1.141361443410556, "learning_rate": 4.0629100678663104e-06, "loss": 0.519, "step": 2260 }, { "epoch": 3.022742474916388, "grad_norm": 1.2455938583095336, "learning_rate": 4.05832651288296e-06, "loss": 0.4266, "step": 2261 }, { "epoch": 3.0240802675585283, "grad_norm": 1.4076865312698437, "learning_rate": 4.053743778197951e-06, "loss": 0.5665, "step": 2262 }, { "epoch": 3.025418060200669, "grad_norm": 1.0978566917481045, "learning_rate": 4.049161867803334e-06, "loss": 0.5732, "step": 2263 }, { "epoch": 3.0267558528428093, "grad_norm": 0.8076918279530908, "learning_rate": 4.0445807856904465e-06, "loss": 0.4483, "step": 2264 }, { "epoch": 3.0280936454849496, "grad_norm": 0.8726285850695492, "learning_rate": 4.0400005358499e-06, "loss": 0.5356, "step": 2265 }, { "epoch": 3.0294314381270904, "grad_norm": 1.2738670107168621, "learning_rate": 4.035421122271581e-06, "loss": 0.4514, "step": 2266 }, { "epoch": 3.0307692307692307, "grad_norm": 1.3237349486734922, "learning_rate": 4.03084254894465e-06, "loss": 0.5752, "step": 2267 }, { "epoch": 3.0321070234113714, "grad_norm": 1.0501653210591295, "learning_rate": 4.026264819857533e-06, "loss": 0.597, "step": 2268 }, { "epoch": 3.0334448160535117, "grad_norm": 1.1117271828689053, "learning_rate": 4.021687938997923e-06, "loss": 0.4876, "step": 2269 }, { "epoch": 3.034782608695652, "grad_norm": 1.2782895674354073, "learning_rate": 4.017111910352771e-06, "loss": 0.4652, "step": 2270 }, { "epoch": 3.0361204013377927, "grad_norm": 1.0988605342172912, "learning_rate": 4.012536737908288e-06, "loss": 0.4853, "step": 2271 }, { "epoch": 3.037458193979933, "grad_norm": 1.1261112481635018, "learning_rate": 4.007962425649939e-06, "loss": 0.461, "step": 2272 }, { "epoch": 3.0387959866220737, "grad_norm": 1.1212253092592355, "learning_rate": 4.003388977562439e-06, "loss": 0.477, "step": 2273 }, { "epoch": 3.040133779264214, "grad_norm": 1.1803271358559388, "learning_rate": 3.998816397629752e-06, "loss": 0.5114, "step": 2274 }, { "epoch": 3.0414715719063543, "grad_norm": 1.40316663428868, "learning_rate": 3.994244689835083e-06, "loss": 0.4827, "step": 2275 }, { "epoch": 3.042809364548495, "grad_norm": 1.155657925620721, "learning_rate": 3.989673858160878e-06, "loss": 0.4752, "step": 2276 }, { "epoch": 3.0441471571906353, "grad_norm": 1.1235540602221072, "learning_rate": 3.985103906588821e-06, "loss": 0.4937, "step": 2277 }, { "epoch": 3.045484949832776, "grad_norm": 1.3302686138382736, "learning_rate": 3.980534839099829e-06, "loss": 0.5017, "step": 2278 }, { "epoch": 3.0468227424749164, "grad_norm": 1.323801086815484, "learning_rate": 3.975966659674048e-06, "loss": 0.4328, "step": 2279 }, { "epoch": 3.0481605351170566, "grad_norm": 1.5449359855469549, "learning_rate": 3.971399372290851e-06, "loss": 0.4714, "step": 2280 }, { "epoch": 3.0494983277591974, "grad_norm": 1.0774718581511202, "learning_rate": 3.966832980928834e-06, "loss": 0.5094, "step": 2281 }, { "epoch": 3.0508361204013377, "grad_norm": 1.164040365807423, "learning_rate": 3.962267489565813e-06, "loss": 0.5722, "step": 2282 }, { "epoch": 3.0521739130434784, "grad_norm": 0.9436726380149637, "learning_rate": 3.957702902178816e-06, "loss": 0.4165, "step": 2283 }, { "epoch": 3.0535117056856187, "grad_norm": 0.9886301526434996, "learning_rate": 3.953139222744093e-06, "loss": 0.4375, "step": 2284 }, { "epoch": 3.054849498327759, "grad_norm": 1.0164557169866864, "learning_rate": 3.9485764552370934e-06, "loss": 0.5159, "step": 2285 }, { "epoch": 3.0561872909698997, "grad_norm": 1.0245165846246038, "learning_rate": 3.944014603632476e-06, "loss": 0.4047, "step": 2286 }, { "epoch": 3.05752508361204, "grad_norm": 0.9958911824940702, "learning_rate": 3.939453671904101e-06, "loss": 0.4916, "step": 2287 }, { "epoch": 3.0588628762541807, "grad_norm": 0.9175014031168927, "learning_rate": 3.93489366402503e-06, "loss": 0.4533, "step": 2288 }, { "epoch": 3.060200668896321, "grad_norm": 1.404913875375316, "learning_rate": 3.930334583967514e-06, "loss": 0.6593, "step": 2289 }, { "epoch": 3.0615384615384613, "grad_norm": 0.8418524233461836, "learning_rate": 3.9257764357030025e-06, "loss": 0.4827, "step": 2290 }, { "epoch": 3.062876254180602, "grad_norm": 1.056834581694591, "learning_rate": 3.92121922320213e-06, "loss": 0.5302, "step": 2291 }, { "epoch": 3.0642140468227423, "grad_norm": 0.9105727390050901, "learning_rate": 3.916662950434714e-06, "loss": 0.4721, "step": 2292 }, { "epoch": 3.065551839464883, "grad_norm": 1.1517905266014812, "learning_rate": 3.912107621369755e-06, "loss": 0.4655, "step": 2293 }, { "epoch": 3.0668896321070234, "grad_norm": 0.6578286977701681, "learning_rate": 3.907553239975437e-06, "loss": 0.5491, "step": 2294 }, { "epoch": 3.068227424749164, "grad_norm": 1.1603548351673283, "learning_rate": 3.902999810219109e-06, "loss": 0.4799, "step": 2295 }, { "epoch": 3.0695652173913044, "grad_norm": 1.2840147346618715, "learning_rate": 3.898447336067297e-06, "loss": 0.5253, "step": 2296 }, { "epoch": 3.0709030100334447, "grad_norm": 1.0208969840076825, "learning_rate": 3.893895821485692e-06, "loss": 0.63, "step": 2297 }, { "epoch": 3.0722408026755854, "grad_norm": 1.1267000232996671, "learning_rate": 3.889345270439152e-06, "loss": 0.58, "step": 2298 }, { "epoch": 3.0735785953177257, "grad_norm": 1.2454879994027046, "learning_rate": 3.884795686891692e-06, "loss": 0.4831, "step": 2299 }, { "epoch": 3.074916387959866, "grad_norm": 1.0030957004118777, "learning_rate": 3.8802470748064855e-06, "loss": 0.5626, "step": 2300 }, { "epoch": 3.0762541806020067, "grad_norm": 1.2552865750754223, "learning_rate": 3.875699438145862e-06, "loss": 0.4369, "step": 2301 }, { "epoch": 3.077591973244147, "grad_norm": 1.1465682206874368, "learning_rate": 3.871152780871298e-06, "loss": 0.4202, "step": 2302 }, { "epoch": 3.0789297658862878, "grad_norm": 0.9953967226034943, "learning_rate": 3.866607106943418e-06, "loss": 0.4908, "step": 2303 }, { "epoch": 3.080267558528428, "grad_norm": 0.9342481397524975, "learning_rate": 3.862062420321993e-06, "loss": 0.6331, "step": 2304 }, { "epoch": 3.0816053511705688, "grad_norm": 1.0314943272377812, "learning_rate": 3.857518724965929e-06, "loss": 0.5359, "step": 2305 }, { "epoch": 3.082943143812709, "grad_norm": 0.8536884181194236, "learning_rate": 3.852976024833271e-06, "loss": 0.5454, "step": 2306 }, { "epoch": 3.0842809364548494, "grad_norm": 0.8051283182037996, "learning_rate": 3.8484343238811976e-06, "loss": 0.5204, "step": 2307 }, { "epoch": 3.08561872909699, "grad_norm": 1.151231706243847, "learning_rate": 3.8438936260660145e-06, "loss": 0.5299, "step": 2308 }, { "epoch": 3.0869565217391304, "grad_norm": 1.2030631911182652, "learning_rate": 3.839353935343156e-06, "loss": 0.5192, "step": 2309 }, { "epoch": 3.088294314381271, "grad_norm": 1.0488755558929603, "learning_rate": 3.834815255667179e-06, "loss": 0.4337, "step": 2310 }, { "epoch": 3.0896321070234114, "grad_norm": 1.417183808184807, "learning_rate": 3.8302775909917585e-06, "loss": 0.6415, "step": 2311 }, { "epoch": 3.0909698996655517, "grad_norm": 1.7863755702941828, "learning_rate": 3.8257409452696845e-06, "loss": 0.6154, "step": 2312 }, { "epoch": 3.0923076923076924, "grad_norm": 0.9059446129341986, "learning_rate": 3.821205322452863e-06, "loss": 0.5064, "step": 2313 }, { "epoch": 3.0936454849498327, "grad_norm": 0.9074243202948926, "learning_rate": 3.816670726492307e-06, "loss": 0.5224, "step": 2314 }, { "epoch": 3.0949832775919734, "grad_norm": 0.9027749991716942, "learning_rate": 3.812137161338133e-06, "loss": 0.466, "step": 2315 }, { "epoch": 3.0963210702341137, "grad_norm": 0.8182558588137828, "learning_rate": 3.8076046309395627e-06, "loss": 0.4869, "step": 2316 }, { "epoch": 3.097658862876254, "grad_norm": 0.9192373694003678, "learning_rate": 3.803073139244913e-06, "loss": 0.4367, "step": 2317 }, { "epoch": 3.0989966555183948, "grad_norm": 1.0964548456366514, "learning_rate": 3.7985426902015987e-06, "loss": 0.5562, "step": 2318 }, { "epoch": 3.100334448160535, "grad_norm": 1.6418754616967455, "learning_rate": 3.794013287756125e-06, "loss": 0.5359, "step": 2319 }, { "epoch": 3.101672240802676, "grad_norm": 1.1020874419874929, "learning_rate": 3.789484935854088e-06, "loss": 0.479, "step": 2320 }, { "epoch": 3.103010033444816, "grad_norm": 1.3763473483210034, "learning_rate": 3.784957638440165e-06, "loss": 0.5032, "step": 2321 }, { "epoch": 3.1043478260869564, "grad_norm": 1.3946214111181392, "learning_rate": 3.7804313994581143e-06, "loss": 0.4822, "step": 2322 }, { "epoch": 3.105685618729097, "grad_norm": 0.9822855902705062, "learning_rate": 3.775906222850778e-06, "loss": 0.4845, "step": 2323 }, { "epoch": 3.1070234113712374, "grad_norm": 1.0876551583847962, "learning_rate": 3.7713821125600687e-06, "loss": 0.4381, "step": 2324 }, { "epoch": 3.108361204013378, "grad_norm": 1.0402352607226781, "learning_rate": 3.766859072526969e-06, "loss": 0.5137, "step": 2325 }, { "epoch": 3.1096989966555184, "grad_norm": 1.1858534856726999, "learning_rate": 3.7623371066915305e-06, "loss": 0.5486, "step": 2326 }, { "epoch": 3.1110367892976587, "grad_norm": 1.0641098971408716, "learning_rate": 3.7578162189928696e-06, "loss": 0.4162, "step": 2327 }, { "epoch": 3.1123745819397994, "grad_norm": 1.133153150194634, "learning_rate": 3.7532964133691634e-06, "loss": 0.458, "step": 2328 }, { "epoch": 3.1137123745819397, "grad_norm": 1.3982867997556598, "learning_rate": 3.748777693757646e-06, "loss": 0.4893, "step": 2329 }, { "epoch": 3.1150501672240805, "grad_norm": 1.1607492587689656, "learning_rate": 3.7442600640946045e-06, "loss": 0.5568, "step": 2330 }, { "epoch": 3.1163879598662207, "grad_norm": 1.3528503157021599, "learning_rate": 3.7397435283153795e-06, "loss": 0.5408, "step": 2331 }, { "epoch": 3.117725752508361, "grad_norm": 0.8770303720003707, "learning_rate": 3.735228090354354e-06, "loss": 0.5065, "step": 2332 }, { "epoch": 3.1190635451505018, "grad_norm": 0.9518857063615636, "learning_rate": 3.730713754144961e-06, "loss": 0.6438, "step": 2333 }, { "epoch": 3.120401337792642, "grad_norm": 1.1119797294136144, "learning_rate": 3.726200523619668e-06, "loss": 0.5443, "step": 2334 }, { "epoch": 3.121739130434783, "grad_norm": 1.186236577228261, "learning_rate": 3.721688402709982e-06, "loss": 0.5721, "step": 2335 }, { "epoch": 3.123076923076923, "grad_norm": 1.0137306300820035, "learning_rate": 3.7171773953464437e-06, "loss": 0.4593, "step": 2336 }, { "epoch": 3.1244147157190634, "grad_norm": 0.870365497947411, "learning_rate": 3.712667505458622e-06, "loss": 0.4359, "step": 2337 }, { "epoch": 3.125752508361204, "grad_norm": 1.3728958433816454, "learning_rate": 3.708158736975114e-06, "loss": 0.5113, "step": 2338 }, { "epoch": 3.1270903010033444, "grad_norm": 1.0272707484350307, "learning_rate": 3.7036510938235394e-06, "loss": 0.5979, "step": 2339 }, { "epoch": 3.128428093645485, "grad_norm": 1.321598801679843, "learning_rate": 3.6991445799305376e-06, "loss": 0.4233, "step": 2340 }, { "epoch": 3.1297658862876254, "grad_norm": 1.5106100132816382, "learning_rate": 3.694639199221764e-06, "loss": 0.4999, "step": 2341 }, { "epoch": 3.1311036789297657, "grad_norm": 1.2722964556741962, "learning_rate": 3.690134955621885e-06, "loss": 0.4969, "step": 2342 }, { "epoch": 3.1324414715719064, "grad_norm": 1.3721645197713326, "learning_rate": 3.685631853054583e-06, "loss": 0.5796, "step": 2343 }, { "epoch": 3.1337792642140467, "grad_norm": 1.0631869967438012, "learning_rate": 3.68112989544254e-06, "loss": 0.6221, "step": 2344 }, { "epoch": 3.1351170568561875, "grad_norm": 0.9221173915320613, "learning_rate": 3.6766290867074444e-06, "loss": 0.6317, "step": 2345 }, { "epoch": 3.1364548494983278, "grad_norm": 0.9948329268622355, "learning_rate": 3.6721294307699786e-06, "loss": 0.4867, "step": 2346 }, { "epoch": 3.137792642140468, "grad_norm": 0.8770007051427254, "learning_rate": 3.667630931549826e-06, "loss": 0.4955, "step": 2347 }, { "epoch": 3.139130434782609, "grad_norm": 0.8936048025088398, "learning_rate": 3.6631335929656608e-06, "loss": 0.5452, "step": 2348 }, { "epoch": 3.140468227424749, "grad_norm": 0.7634243513053727, "learning_rate": 3.658637418935146e-06, "loss": 0.5253, "step": 2349 }, { "epoch": 3.14180602006689, "grad_norm": 1.0674039241378135, "learning_rate": 3.6541424133749293e-06, "loss": 0.4752, "step": 2350 }, { "epoch": 3.14314381270903, "grad_norm": 1.1038814733671294, "learning_rate": 3.6496485802006433e-06, "loss": 0.4785, "step": 2351 }, { "epoch": 3.1444816053511704, "grad_norm": 1.4038575717502972, "learning_rate": 3.645155923326893e-06, "loss": 0.4896, "step": 2352 }, { "epoch": 3.145819397993311, "grad_norm": 1.2003859259402336, "learning_rate": 3.640664446667268e-06, "loss": 0.5079, "step": 2353 }, { "epoch": 3.1471571906354514, "grad_norm": 1.014495452421962, "learning_rate": 3.6361741541343242e-06, "loss": 0.5543, "step": 2354 }, { "epoch": 3.148494983277592, "grad_norm": 1.2160583941273233, "learning_rate": 3.6316850496395863e-06, "loss": 0.6093, "step": 2355 }, { "epoch": 3.1498327759197324, "grad_norm": 1.4196931307060237, "learning_rate": 3.6271971370935432e-06, "loss": 0.5038, "step": 2356 }, { "epoch": 3.1511705685618727, "grad_norm": 1.130400677819672, "learning_rate": 3.622710420405647e-06, "loss": 0.428, "step": 2357 }, { "epoch": 3.1525083612040135, "grad_norm": 1.0363653433481355, "learning_rate": 3.61822490348431e-06, "loss": 0.526, "step": 2358 }, { "epoch": 3.1538461538461537, "grad_norm": 0.8069529697117542, "learning_rate": 3.613740590236895e-06, "loss": 0.4828, "step": 2359 }, { "epoch": 3.1551839464882945, "grad_norm": 1.2253663243239596, "learning_rate": 3.6092574845697193e-06, "loss": 0.5422, "step": 2360 }, { "epoch": 3.1565217391304348, "grad_norm": 1.2672941835684224, "learning_rate": 3.6047755903880478e-06, "loss": 0.5018, "step": 2361 }, { "epoch": 3.157859531772575, "grad_norm": 1.3231531385537145, "learning_rate": 3.6002949115960884e-06, "loss": 0.4312, "step": 2362 }, { "epoch": 3.159197324414716, "grad_norm": 1.2453793207700141, "learning_rate": 3.595815452096994e-06, "loss": 0.4827, "step": 2363 }, { "epoch": 3.160535117056856, "grad_norm": 0.9699381829374236, "learning_rate": 3.5913372157928515e-06, "loss": 0.5289, "step": 2364 }, { "epoch": 3.161872909698997, "grad_norm": 0.9721293955332588, "learning_rate": 3.5868602065846846e-06, "loss": 0.4559, "step": 2365 }, { "epoch": 3.163210702341137, "grad_norm": 1.0603052081396973, "learning_rate": 3.5823844283724464e-06, "loss": 0.5929, "step": 2366 }, { "epoch": 3.1645484949832774, "grad_norm": 1.2114190797746398, "learning_rate": 3.577909885055019e-06, "loss": 0.591, "step": 2367 }, { "epoch": 3.165886287625418, "grad_norm": 1.3100494910839957, "learning_rate": 3.573436580530208e-06, "loss": 0.484, "step": 2368 }, { "epoch": 3.1672240802675584, "grad_norm": 0.926279090363526, "learning_rate": 3.56896451869474e-06, "loss": 0.4259, "step": 2369 }, { "epoch": 3.168561872909699, "grad_norm": 1.144943891097799, "learning_rate": 3.56449370344426e-06, "loss": 0.4873, "step": 2370 }, { "epoch": 3.1698996655518394, "grad_norm": 1.376490451142732, "learning_rate": 3.560024138673326e-06, "loss": 0.435, "step": 2371 }, { "epoch": 3.1712374581939797, "grad_norm": 1.328765702551829, "learning_rate": 3.5555558282754045e-06, "loss": 0.5015, "step": 2372 }, { "epoch": 3.1725752508361205, "grad_norm": 1.789340208288602, "learning_rate": 3.5510887761428764e-06, "loss": 0.4288, "step": 2373 }, { "epoch": 3.1739130434782608, "grad_norm": 1.2292295548620988, "learning_rate": 3.546622986167021e-06, "loss": 0.4347, "step": 2374 }, { "epoch": 3.1752508361204015, "grad_norm": 1.071781795734667, "learning_rate": 3.5421584622380167e-06, "loss": 0.4111, "step": 2375 }, { "epoch": 3.1765886287625418, "grad_norm": 0.9486167432312852, "learning_rate": 3.5376952082449425e-06, "loss": 0.4388, "step": 2376 }, { "epoch": 3.177926421404682, "grad_norm": 1.0084271557002014, "learning_rate": 3.5332332280757706e-06, "loss": 0.5167, "step": 2377 }, { "epoch": 3.179264214046823, "grad_norm": 1.7825620491530383, "learning_rate": 3.5287725256173627e-06, "loss": 0.4929, "step": 2378 }, { "epoch": 3.180602006688963, "grad_norm": 1.5300839337717522, "learning_rate": 3.524313104755468e-06, "loss": 0.5763, "step": 2379 }, { "epoch": 3.181939799331104, "grad_norm": 0.9896450513744701, "learning_rate": 3.5198549693747185e-06, "loss": 0.4961, "step": 2380 }, { "epoch": 3.183277591973244, "grad_norm": 1.3892354558227347, "learning_rate": 3.5153981233586277e-06, "loss": 0.494, "step": 2381 }, { "epoch": 3.184615384615385, "grad_norm": 0.9920920194263009, "learning_rate": 3.510942570589583e-06, "loss": 0.5953, "step": 2382 }, { "epoch": 3.185953177257525, "grad_norm": 1.1151748555094638, "learning_rate": 3.5064883149488505e-06, "loss": 0.4377, "step": 2383 }, { "epoch": 3.1872909698996654, "grad_norm": 0.9800372130507976, "learning_rate": 3.5020353603165634e-06, "loss": 0.6299, "step": 2384 }, { "epoch": 3.188628762541806, "grad_norm": 0.9180931061930658, "learning_rate": 3.4975837105717203e-06, "loss": 0.5312, "step": 2385 }, { "epoch": 3.1899665551839465, "grad_norm": 1.0176255115310615, "learning_rate": 3.4931333695921843e-06, "loss": 0.4713, "step": 2386 }, { "epoch": 3.1913043478260867, "grad_norm": 1.1103328810339517, "learning_rate": 3.488684341254679e-06, "loss": 0.4927, "step": 2387 }, { "epoch": 3.1926421404682275, "grad_norm": 0.956885640243644, "learning_rate": 3.484236629434783e-06, "loss": 0.5411, "step": 2388 }, { "epoch": 3.1939799331103678, "grad_norm": 0.9848102991072653, "learning_rate": 3.4797902380069305e-06, "loss": 0.5492, "step": 2389 }, { "epoch": 3.1953177257525085, "grad_norm": 0.9685481040382105, "learning_rate": 3.475345170844403e-06, "loss": 0.455, "step": 2390 }, { "epoch": 3.196655518394649, "grad_norm": 1.0966063758483904, "learning_rate": 3.4709014318193298e-06, "loss": 0.5243, "step": 2391 }, { "epoch": 3.1979933110367895, "grad_norm": 1.2547803518009095, "learning_rate": 3.466459024802682e-06, "loss": 0.5597, "step": 2392 }, { "epoch": 3.19933110367893, "grad_norm": 1.0204199002597922, "learning_rate": 3.4620179536642727e-06, "loss": 0.4698, "step": 2393 }, { "epoch": 3.20066889632107, "grad_norm": 1.0212315406467702, "learning_rate": 3.4575782222727507e-06, "loss": 0.5927, "step": 2394 }, { "epoch": 3.202006688963211, "grad_norm": 1.1863048126829183, "learning_rate": 3.453139834495596e-06, "loss": 0.5161, "step": 2395 }, { "epoch": 3.203344481605351, "grad_norm": 1.0470677754609963, "learning_rate": 3.448702794199118e-06, "loss": 0.5449, "step": 2396 }, { "epoch": 3.2046822742474914, "grad_norm": 1.464470625726345, "learning_rate": 3.4442671052484545e-06, "loss": 0.5658, "step": 2397 }, { "epoch": 3.206020066889632, "grad_norm": 1.023734076931003, "learning_rate": 3.439832771507565e-06, "loss": 0.4836, "step": 2398 }, { "epoch": 3.2073578595317724, "grad_norm": 1.1750437852989748, "learning_rate": 3.4353997968392295e-06, "loss": 0.4831, "step": 2399 }, { "epoch": 3.208695652173913, "grad_norm": 1.469281146012999, "learning_rate": 3.4309681851050414e-06, "loss": 0.564, "step": 2400 }, { "epoch": 3.2100334448160535, "grad_norm": 1.5049318100632865, "learning_rate": 3.4265379401654096e-06, "loss": 0.4003, "step": 2401 }, { "epoch": 3.211371237458194, "grad_norm": 1.9226764613312535, "learning_rate": 3.4221090658795484e-06, "loss": 0.5208, "step": 2402 }, { "epoch": 3.2127090301003345, "grad_norm": 1.5461075090838408, "learning_rate": 3.4176815661054884e-06, "loss": 0.4433, "step": 2403 }, { "epoch": 3.2140468227424748, "grad_norm": 1.511043496908007, "learning_rate": 3.4132554447000487e-06, "loss": 0.4869, "step": 2404 }, { "epoch": 3.2153846153846155, "grad_norm": 1.2240267208658537, "learning_rate": 3.4088307055188574e-06, "loss": 0.5949, "step": 2405 }, { "epoch": 3.216722408026756, "grad_norm": 1.1167113564669, "learning_rate": 3.4044073524163344e-06, "loss": 0.5659, "step": 2406 }, { "epoch": 3.218060200668896, "grad_norm": 1.225283710968047, "learning_rate": 3.3999853892456945e-06, "loss": 0.4526, "step": 2407 }, { "epoch": 3.219397993311037, "grad_norm": 1.4686519264561801, "learning_rate": 3.3955648198589407e-06, "loss": 0.5798, "step": 2408 }, { "epoch": 3.220735785953177, "grad_norm": 0.9432424858569339, "learning_rate": 3.3911456481068613e-06, "loss": 0.5028, "step": 2409 }, { "epoch": 3.222073578595318, "grad_norm": 0.9451988074302297, "learning_rate": 3.386727877839027e-06, "loss": 0.5895, "step": 2410 }, { "epoch": 3.223411371237458, "grad_norm": 1.3376213889169557, "learning_rate": 3.3823115129037897e-06, "loss": 0.4631, "step": 2411 }, { "epoch": 3.224749163879599, "grad_norm": 1.2880606745190073, "learning_rate": 3.3778965571482723e-06, "loss": 0.4687, "step": 2412 }, { "epoch": 3.226086956521739, "grad_norm": 1.4049634477965887, "learning_rate": 3.3734830144183783e-06, "loss": 0.4604, "step": 2413 }, { "epoch": 3.2274247491638794, "grad_norm": 0.8622828707215513, "learning_rate": 3.369070888558774e-06, "loss": 0.5291, "step": 2414 }, { "epoch": 3.22876254180602, "grad_norm": 1.0477940750461772, "learning_rate": 3.3646601834128924e-06, "loss": 0.5304, "step": 2415 }, { "epoch": 3.2301003344481605, "grad_norm": 1.1754356837245463, "learning_rate": 3.360250902822929e-06, "loss": 0.4781, "step": 2416 }, { "epoch": 3.231438127090301, "grad_norm": 0.9682764613294642, "learning_rate": 3.35584305062984e-06, "loss": 0.451, "step": 2417 }, { "epoch": 3.2327759197324415, "grad_norm": 1.2433290284078147, "learning_rate": 3.3514366306733348e-06, "loss": 0.5895, "step": 2418 }, { "epoch": 3.234113712374582, "grad_norm": 0.9845429039056761, "learning_rate": 3.3470316467918785e-06, "loss": 0.5136, "step": 2419 }, { "epoch": 3.2354515050167225, "grad_norm": 1.0176135599816807, "learning_rate": 3.3426281028226817e-06, "loss": 0.5559, "step": 2420 }, { "epoch": 3.236789297658863, "grad_norm": 1.6804117091889557, "learning_rate": 3.3382260026017027e-06, "loss": 0.5485, "step": 2421 }, { "epoch": 3.2381270903010035, "grad_norm": 1.6443994230691845, "learning_rate": 3.3338253499636407e-06, "loss": 0.6014, "step": 2422 }, { "epoch": 3.239464882943144, "grad_norm": 1.1219777996333582, "learning_rate": 3.329426148741937e-06, "loss": 0.5106, "step": 2423 }, { "epoch": 3.240802675585284, "grad_norm": 0.8907867566387624, "learning_rate": 3.3250284027687652e-06, "loss": 0.5059, "step": 2424 }, { "epoch": 3.242140468227425, "grad_norm": 1.0359437281865835, "learning_rate": 3.320632115875033e-06, "loss": 0.4488, "step": 2425 }, { "epoch": 3.243478260869565, "grad_norm": 1.4809403782665453, "learning_rate": 3.3162372918903764e-06, "loss": 0.4439, "step": 2426 }, { "epoch": 3.244816053511706, "grad_norm": 1.3679874828685754, "learning_rate": 3.311843934643157e-06, "loss": 0.5657, "step": 2427 }, { "epoch": 3.246153846153846, "grad_norm": 1.5209190943769357, "learning_rate": 3.307452047960459e-06, "loss": 0.5063, "step": 2428 }, { "epoch": 3.2474916387959865, "grad_norm": 1.0076055623074038, "learning_rate": 3.3030616356680854e-06, "loss": 0.4813, "step": 2429 }, { "epoch": 3.248829431438127, "grad_norm": 1.1683658189711004, "learning_rate": 3.298672701590555e-06, "loss": 0.4772, "step": 2430 }, { "epoch": 3.2501672240802675, "grad_norm": 0.9500616277176434, "learning_rate": 3.2942852495510992e-06, "loss": 0.4823, "step": 2431 }, { "epoch": 3.251505016722408, "grad_norm": 0.7568205851828648, "learning_rate": 3.289899283371657e-06, "loss": 0.4829, "step": 2432 }, { "epoch": 3.2528428093645485, "grad_norm": 1.0015065717262066, "learning_rate": 3.2855148068728753e-06, "loss": 0.55, "step": 2433 }, { "epoch": 3.254180602006689, "grad_norm": 1.0860418385536363, "learning_rate": 3.2811318238741026e-06, "loss": 0.4461, "step": 2434 }, { "epoch": 3.2555183946488295, "grad_norm": 1.0731785816652584, "learning_rate": 3.276750338193385e-06, "loss": 0.5775, "step": 2435 }, { "epoch": 3.25685618729097, "grad_norm": 1.2839490429424194, "learning_rate": 3.272370353647465e-06, "loss": 0.5295, "step": 2436 }, { "epoch": 3.2581939799331106, "grad_norm": 1.3157950824694777, "learning_rate": 3.2679918740517785e-06, "loss": 0.5489, "step": 2437 }, { "epoch": 3.259531772575251, "grad_norm": 1.0502222050044967, "learning_rate": 3.263614903220449e-06, "loss": 0.4859, "step": 2438 }, { "epoch": 3.260869565217391, "grad_norm": 1.1743764396084586, "learning_rate": 3.2592394449662867e-06, "loss": 0.4855, "step": 2439 }, { "epoch": 3.262207357859532, "grad_norm": 1.5688012715537043, "learning_rate": 3.2548655031007837e-06, "loss": 0.5031, "step": 2440 }, { "epoch": 3.263545150501672, "grad_norm": 1.8198719348435066, "learning_rate": 3.250493081434112e-06, "loss": 0.5225, "step": 2441 }, { "epoch": 3.264882943143813, "grad_norm": 0.9452605288359279, "learning_rate": 3.2461221837751146e-06, "loss": 0.5514, "step": 2442 }, { "epoch": 3.266220735785953, "grad_norm": 1.100398954325693, "learning_rate": 3.241752813931316e-06, "loss": 0.4163, "step": 2443 }, { "epoch": 3.2675585284280935, "grad_norm": 1.4887743016818666, "learning_rate": 3.237384975708904e-06, "loss": 0.5223, "step": 2444 }, { "epoch": 3.268896321070234, "grad_norm": 0.924259711844089, "learning_rate": 3.233018672912731e-06, "loss": 0.572, "step": 2445 }, { "epoch": 3.2702341137123745, "grad_norm": 1.0591367036618933, "learning_rate": 3.228653909346314e-06, "loss": 0.4644, "step": 2446 }, { "epoch": 3.2715719063545152, "grad_norm": 0.9620379068963868, "learning_rate": 3.224290688811831e-06, "loss": 0.5477, "step": 2447 }, { "epoch": 3.2729096989966555, "grad_norm": 1.0379745288994855, "learning_rate": 3.2199290151101115e-06, "loss": 0.5987, "step": 2448 }, { "epoch": 3.274247491638796, "grad_norm": 0.7739187276589057, "learning_rate": 3.2155688920406415e-06, "loss": 0.597, "step": 2449 }, { "epoch": 3.2755852842809365, "grad_norm": 1.0515265718302809, "learning_rate": 3.2112103234015535e-06, "loss": 0.4601, "step": 2450 }, { "epoch": 3.276923076923077, "grad_norm": 1.2793038126592162, "learning_rate": 3.2068533129896273e-06, "loss": 0.518, "step": 2451 }, { "epoch": 3.2782608695652176, "grad_norm": 1.1719248988760322, "learning_rate": 3.2024978646002848e-06, "loss": 0.5589, "step": 2452 }, { "epoch": 3.279598662207358, "grad_norm": 0.8529593921580922, "learning_rate": 3.1981439820275883e-06, "loss": 0.4579, "step": 2453 }, { "epoch": 3.280936454849498, "grad_norm": 1.0246778782471055, "learning_rate": 3.1937916690642356e-06, "loss": 0.4579, "step": 2454 }, { "epoch": 3.282274247491639, "grad_norm": 1.0702353064909274, "learning_rate": 3.189440929501556e-06, "loss": 0.5896, "step": 2455 }, { "epoch": 3.283612040133779, "grad_norm": 1.027301013763465, "learning_rate": 3.185091767129509e-06, "loss": 0.5571, "step": 2456 }, { "epoch": 3.28494983277592, "grad_norm": 1.0048463984994984, "learning_rate": 3.1807441857366798e-06, "loss": 0.4942, "step": 2457 }, { "epoch": 3.28628762541806, "grad_norm": 1.0691629993722263, "learning_rate": 3.1763981891102785e-06, "loss": 0.4894, "step": 2458 }, { "epoch": 3.2876254180602005, "grad_norm": 0.9071771144835132, "learning_rate": 3.172053781036132e-06, "loss": 0.451, "step": 2459 }, { "epoch": 3.288963210702341, "grad_norm": 1.3186715444356079, "learning_rate": 3.167710965298684e-06, "loss": 0.4863, "step": 2460 }, { "epoch": 3.2903010033444815, "grad_norm": 1.0789389728966206, "learning_rate": 3.1633697456809932e-06, "loss": 0.5899, "step": 2461 }, { "epoch": 3.2916387959866222, "grad_norm": 1.338325783015408, "learning_rate": 3.159030125964723e-06, "loss": 0.5028, "step": 2462 }, { "epoch": 3.2929765886287625, "grad_norm": 1.1355583269110847, "learning_rate": 3.1546921099301507e-06, "loss": 0.4976, "step": 2463 }, { "epoch": 3.294314381270903, "grad_norm": 1.0468071041597566, "learning_rate": 3.15035570135615e-06, "loss": 0.4372, "step": 2464 }, { "epoch": 3.2956521739130435, "grad_norm": 1.312019218004905, "learning_rate": 3.1460209040201967e-06, "loss": 0.4884, "step": 2465 }, { "epoch": 3.296989966555184, "grad_norm": 1.1207972692507673, "learning_rate": 3.141687721698363e-06, "loss": 0.4861, "step": 2466 }, { "epoch": 3.2983277591973246, "grad_norm": 0.993244082316038, "learning_rate": 3.1373561581653152e-06, "loss": 0.4809, "step": 2467 }, { "epoch": 3.299665551839465, "grad_norm": 1.5084645784413429, "learning_rate": 3.1330262171943073e-06, "loss": 0.5518, "step": 2468 }, { "epoch": 3.3010033444816056, "grad_norm": 0.8701946297398219, "learning_rate": 3.1286979025571817e-06, "loss": 0.4803, "step": 2469 }, { "epoch": 3.302341137123746, "grad_norm": 0.9699260955432821, "learning_rate": 3.1243712180243633e-06, "loss": 0.5698, "step": 2470 }, { "epoch": 3.303678929765886, "grad_norm": 1.3888973827226003, "learning_rate": 3.120046167364857e-06, "loss": 0.5751, "step": 2471 }, { "epoch": 3.305016722408027, "grad_norm": 1.2881850802483423, "learning_rate": 3.1157227543462428e-06, "loss": 0.4673, "step": 2472 }, { "epoch": 3.306354515050167, "grad_norm": 1.3957631537500557, "learning_rate": 3.11140098273468e-06, "loss": 0.4758, "step": 2473 }, { "epoch": 3.3076923076923075, "grad_norm": 1.0366288454183958, "learning_rate": 3.107080856294892e-06, "loss": 0.4956, "step": 2474 }, { "epoch": 3.309030100334448, "grad_norm": 0.921765594537208, "learning_rate": 3.1027623787901706e-06, "loss": 0.5251, "step": 2475 }, { "epoch": 3.3103678929765885, "grad_norm": 1.0183111248816241, "learning_rate": 3.098445553982372e-06, "loss": 0.5824, "step": 2476 }, { "epoch": 3.3117056856187292, "grad_norm": 1.1011230029319024, "learning_rate": 3.0941303856319126e-06, "loss": 0.5533, "step": 2477 }, { "epoch": 3.3130434782608695, "grad_norm": 1.0984447063632412, "learning_rate": 3.0898168774977654e-06, "loss": 0.473, "step": 2478 }, { "epoch": 3.3143812709030103, "grad_norm": 1.1095321050935083, "learning_rate": 3.0855050333374574e-06, "loss": 0.5569, "step": 2479 }, { "epoch": 3.3157190635451506, "grad_norm": 1.12687782906763, "learning_rate": 3.0811948569070666e-06, "loss": 0.4837, "step": 2480 }, { "epoch": 3.317056856187291, "grad_norm": 0.8968728236953819, "learning_rate": 3.076886351961217e-06, "loss": 0.445, "step": 2481 }, { "epoch": 3.3183946488294316, "grad_norm": 0.9593133195036992, "learning_rate": 3.072579522253076e-06, "loss": 0.5896, "step": 2482 }, { "epoch": 3.319732441471572, "grad_norm": 0.9038252054445406, "learning_rate": 3.0682743715343565e-06, "loss": 0.5143, "step": 2483 }, { "epoch": 3.321070234113712, "grad_norm": 1.3901083728391808, "learning_rate": 3.063970903555304e-06, "loss": 0.5695, "step": 2484 }, { "epoch": 3.322408026755853, "grad_norm": 1.402893208822377, "learning_rate": 3.0596691220646978e-06, "loss": 0.59, "step": 2485 }, { "epoch": 3.323745819397993, "grad_norm": 1.0445221150876478, "learning_rate": 3.0553690308098517e-06, "loss": 0.5309, "step": 2486 }, { "epoch": 3.325083612040134, "grad_norm": 0.9223829233402651, "learning_rate": 3.0510706335366034e-06, "loss": 0.5227, "step": 2487 }, { "epoch": 3.326421404682274, "grad_norm": 0.9231616380511983, "learning_rate": 3.046773933989319e-06, "loss": 0.4678, "step": 2488 }, { "epoch": 3.327759197324415, "grad_norm": 0.8959459947178644, "learning_rate": 3.042478935910881e-06, "loss": 0.5205, "step": 2489 }, { "epoch": 3.3290969899665552, "grad_norm": 0.8254720283634702, "learning_rate": 3.0381856430426935e-06, "loss": 0.5498, "step": 2490 }, { "epoch": 3.3304347826086955, "grad_norm": 1.102979778847579, "learning_rate": 3.033894059124675e-06, "loss": 0.5674, "step": 2491 }, { "epoch": 3.3317725752508363, "grad_norm": 0.8425558556235139, "learning_rate": 3.0296041878952497e-06, "loss": 0.5071, "step": 2492 }, { "epoch": 3.3331103678929765, "grad_norm": 1.0751688089755143, "learning_rate": 3.02531603309136e-06, "loss": 0.5149, "step": 2493 }, { "epoch": 3.334448160535117, "grad_norm": 1.1886136805115932, "learning_rate": 3.0210295984484446e-06, "loss": 0.4626, "step": 2494 }, { "epoch": 3.3357859531772576, "grad_norm": 0.8212893621087618, "learning_rate": 3.016744887700447e-06, "loss": 0.5635, "step": 2495 }, { "epoch": 3.337123745819398, "grad_norm": 1.1574050803625786, "learning_rate": 3.0124619045798087e-06, "loss": 0.4343, "step": 2496 }, { "epoch": 3.3384615384615386, "grad_norm": 1.1774431555386515, "learning_rate": 3.0081806528174655e-06, "loss": 0.4878, "step": 2497 }, { "epoch": 3.339799331103679, "grad_norm": 0.9906293704326787, "learning_rate": 3.0039011361428466e-06, "loss": 0.538, "step": 2498 }, { "epoch": 3.3411371237458196, "grad_norm": 1.515206505989614, "learning_rate": 2.9996233582838686e-06, "loss": 0.4681, "step": 2499 }, { "epoch": 3.34247491638796, "grad_norm": 1.3176899386475605, "learning_rate": 2.995347322966933e-06, "loss": 0.4856, "step": 2500 }, { "epoch": 3.3438127090301, "grad_norm": 1.076027224815296, "learning_rate": 2.9910730339169245e-06, "loss": 0.6088, "step": 2501 }, { "epoch": 3.345150501672241, "grad_norm": 1.0389341058977715, "learning_rate": 2.9868004948572044e-06, "loss": 0.5158, "step": 2502 }, { "epoch": 3.346488294314381, "grad_norm": 0.9222692968000336, "learning_rate": 2.982529709509615e-06, "loss": 0.4883, "step": 2503 }, { "epoch": 3.3478260869565215, "grad_norm": 1.1253521273392304, "learning_rate": 2.978260681594465e-06, "loss": 0.5387, "step": 2504 }, { "epoch": 3.3491638795986622, "grad_norm": 1.1981667315814561, "learning_rate": 2.973993414830534e-06, "loss": 0.3936, "step": 2505 }, { "epoch": 3.3505016722408025, "grad_norm": 1.4685085181234832, "learning_rate": 2.9697279129350686e-06, "loss": 0.514, "step": 2506 }, { "epoch": 3.3518394648829433, "grad_norm": 1.124800332047825, "learning_rate": 2.965464179623777e-06, "loss": 0.476, "step": 2507 }, { "epoch": 3.3531772575250836, "grad_norm": 1.3305854093099239, "learning_rate": 2.9612022186108267e-06, "loss": 0.5026, "step": 2508 }, { "epoch": 3.3545150501672243, "grad_norm": 1.3480018683379698, "learning_rate": 2.956942033608843e-06, "loss": 0.5678, "step": 2509 }, { "epoch": 3.3558528428093646, "grad_norm": 0.9709675161095185, "learning_rate": 2.952683628328901e-06, "loss": 0.5341, "step": 2510 }, { "epoch": 3.357190635451505, "grad_norm": 1.2058231961925956, "learning_rate": 2.948427006480528e-06, "loss": 0.5594, "step": 2511 }, { "epoch": 3.3585284280936456, "grad_norm": 1.084336973423129, "learning_rate": 2.9441721717716966e-06, "loss": 0.4853, "step": 2512 }, { "epoch": 3.359866220735786, "grad_norm": 0.8604918071623089, "learning_rate": 2.9399191279088236e-06, "loss": 0.5652, "step": 2513 }, { "epoch": 3.361204013377926, "grad_norm": 0.7602300867032677, "learning_rate": 2.9356678785967646e-06, "loss": 0.4762, "step": 2514 }, { "epoch": 3.362541806020067, "grad_norm": 1.114485037205909, "learning_rate": 2.9314184275388134e-06, "loss": 0.514, "step": 2515 }, { "epoch": 3.363879598662207, "grad_norm": 1.02685644245962, "learning_rate": 2.9271707784366952e-06, "loss": 0.5997, "step": 2516 }, { "epoch": 3.365217391304348, "grad_norm": 0.8994525142491521, "learning_rate": 2.9229249349905686e-06, "loss": 0.5675, "step": 2517 }, { "epoch": 3.3665551839464882, "grad_norm": 1.0220489662550656, "learning_rate": 2.918680900899017e-06, "loss": 0.5712, "step": 2518 }, { "epoch": 3.367892976588629, "grad_norm": 1.1653672239722705, "learning_rate": 2.914438679859046e-06, "loss": 0.4444, "step": 2519 }, { "epoch": 3.3692307692307693, "grad_norm": 0.7722703281790737, "learning_rate": 2.910198275566085e-06, "loss": 0.4307, "step": 2520 }, { "epoch": 3.3705685618729095, "grad_norm": 1.1617397232045177, "learning_rate": 2.9059596917139804e-06, "loss": 0.5251, "step": 2521 }, { "epoch": 3.3719063545150503, "grad_norm": 1.4195882129002002, "learning_rate": 2.9017229319949897e-06, "loss": 0.5551, "step": 2522 }, { "epoch": 3.3732441471571906, "grad_norm": 1.338451076673123, "learning_rate": 2.897488000099788e-06, "loss": 0.5426, "step": 2523 }, { "epoch": 3.374581939799331, "grad_norm": 0.985155959477231, "learning_rate": 2.893254899717452e-06, "loss": 0.5628, "step": 2524 }, { "epoch": 3.3759197324414716, "grad_norm": 1.3132224526840923, "learning_rate": 2.8890236345354648e-06, "loss": 0.5082, "step": 2525 }, { "epoch": 3.377257525083612, "grad_norm": 1.070321138290375, "learning_rate": 2.8847942082397112e-06, "loss": 0.5678, "step": 2526 }, { "epoch": 3.3785953177257526, "grad_norm": 0.9617458768360516, "learning_rate": 2.8805666245144735e-06, "loss": 0.5906, "step": 2527 }, { "epoch": 3.379933110367893, "grad_norm": 0.9680666368556813, "learning_rate": 2.8763408870424305e-06, "loss": 0.4846, "step": 2528 }, { "epoch": 3.3812709030100336, "grad_norm": 1.3816049016600023, "learning_rate": 2.8721169995046503e-06, "loss": 0.5364, "step": 2529 }, { "epoch": 3.382608695652174, "grad_norm": 0.9453620897616645, "learning_rate": 2.8678949655805915e-06, "loss": 0.5046, "step": 2530 }, { "epoch": 3.383946488294314, "grad_norm": 0.9685144063560904, "learning_rate": 2.863674788948097e-06, "loss": 0.5597, "step": 2531 }, { "epoch": 3.385284280936455, "grad_norm": 1.2709165815084915, "learning_rate": 2.85945647328339e-06, "loss": 0.5134, "step": 2532 }, { "epoch": 3.3866220735785952, "grad_norm": 1.0963175151411704, "learning_rate": 2.8552400222610788e-06, "loss": 0.4389, "step": 2533 }, { "epoch": 3.387959866220736, "grad_norm": 1.1995685069879753, "learning_rate": 2.851025439554142e-06, "loss": 0.436, "step": 2534 }, { "epoch": 3.3892976588628763, "grad_norm": 1.1603285191694062, "learning_rate": 2.846812728833931e-06, "loss": 0.5287, "step": 2535 }, { "epoch": 3.3906354515050166, "grad_norm": 1.3162310422619334, "learning_rate": 2.8426018937701678e-06, "loss": 0.5214, "step": 2536 }, { "epoch": 3.3919732441471573, "grad_norm": 1.1158877631992963, "learning_rate": 2.8383929380309406e-06, "loss": 0.5085, "step": 2537 }, { "epoch": 3.3933110367892976, "grad_norm": 1.2136909118226789, "learning_rate": 2.834185865282699e-06, "loss": 0.5603, "step": 2538 }, { "epoch": 3.3946488294314383, "grad_norm": 0.9112003262405725, "learning_rate": 2.829980679190254e-06, "loss": 0.511, "step": 2539 }, { "epoch": 3.3959866220735786, "grad_norm": 1.048221556541953, "learning_rate": 2.8257773834167736e-06, "loss": 0.5374, "step": 2540 }, { "epoch": 3.397324414715719, "grad_norm": 0.9089999888243845, "learning_rate": 2.8215759816237748e-06, "loss": 0.452, "step": 2541 }, { "epoch": 3.3986622073578596, "grad_norm": 1.2939855186190328, "learning_rate": 2.817376477471132e-06, "loss": 0.4814, "step": 2542 }, { "epoch": 3.4, "grad_norm": 1.0055961270192524, "learning_rate": 2.8131788746170612e-06, "loss": 0.5616, "step": 2543 }, { "epoch": 3.4013377926421406, "grad_norm": 0.8630853409058367, "learning_rate": 2.808983176718125e-06, "loss": 0.5481, "step": 2544 }, { "epoch": 3.402675585284281, "grad_norm": 0.9170931540049544, "learning_rate": 2.804789387429222e-06, "loss": 0.4882, "step": 2545 }, { "epoch": 3.4040133779264212, "grad_norm": 1.1163886715628464, "learning_rate": 2.800597510403592e-06, "loss": 0.5603, "step": 2546 }, { "epoch": 3.405351170568562, "grad_norm": 1.010797628224914, "learning_rate": 2.796407549292809e-06, "loss": 0.5021, "step": 2547 }, { "epoch": 3.4066889632107022, "grad_norm": 1.1332029917723028, "learning_rate": 2.792219507746777e-06, "loss": 0.4415, "step": 2548 }, { "epoch": 3.408026755852843, "grad_norm": 1.1793632177911295, "learning_rate": 2.788033389413729e-06, "loss": 0.535, "step": 2549 }, { "epoch": 3.4093645484949833, "grad_norm": 1.0646797444017875, "learning_rate": 2.7838491979402205e-06, "loss": 0.5444, "step": 2550 }, { "epoch": 3.4107023411371236, "grad_norm": 1.1988459649026655, "learning_rate": 2.7796669369711294e-06, "loss": 0.4674, "step": 2551 }, { "epoch": 3.4120401337792643, "grad_norm": 1.2193558395097301, "learning_rate": 2.7754866101496558e-06, "loss": 0.4832, "step": 2552 }, { "epoch": 3.4133779264214046, "grad_norm": 0.907551433529783, "learning_rate": 2.771308221117309e-06, "loss": 0.5021, "step": 2553 }, { "epoch": 3.4147157190635453, "grad_norm": 1.2383881347983468, "learning_rate": 2.7671317735139136e-06, "loss": 0.6432, "step": 2554 }, { "epoch": 3.4160535117056856, "grad_norm": 1.0594898424122663, "learning_rate": 2.762957270977602e-06, "loss": 0.5845, "step": 2555 }, { "epoch": 3.417391304347826, "grad_norm": 0.8263181580813037, "learning_rate": 2.758784717144812e-06, "loss": 0.5872, "step": 2556 }, { "epoch": 3.4187290969899666, "grad_norm": 1.433430120686991, "learning_rate": 2.754614115650285e-06, "loss": 0.4868, "step": 2557 }, { "epoch": 3.420066889632107, "grad_norm": 1.2806468944594982, "learning_rate": 2.7504454701270604e-06, "loss": 0.565, "step": 2558 }, { "epoch": 3.4214046822742477, "grad_norm": 1.03117943016534, "learning_rate": 2.7462787842064753e-06, "loss": 0.5568, "step": 2559 }, { "epoch": 3.422742474916388, "grad_norm": 0.8843038414540948, "learning_rate": 2.742114061518157e-06, "loss": 0.5955, "step": 2560 }, { "epoch": 3.4240802675585282, "grad_norm": 0.8359764943057167, "learning_rate": 2.7379513056900254e-06, "loss": 0.6247, "step": 2561 }, { "epoch": 3.425418060200669, "grad_norm": 0.8375536735548752, "learning_rate": 2.7337905203482884e-06, "loss": 0.559, "step": 2562 }, { "epoch": 3.4267558528428093, "grad_norm": 1.0470481921229116, "learning_rate": 2.7296317091174325e-06, "loss": 0.4916, "step": 2563 }, { "epoch": 3.42809364548495, "grad_norm": 0.9355094240283514, "learning_rate": 2.725474875620228e-06, "loss": 0.5658, "step": 2564 }, { "epoch": 3.4294314381270903, "grad_norm": 0.991415656783458, "learning_rate": 2.7213200234777215e-06, "loss": 0.4228, "step": 2565 }, { "epoch": 3.430769230769231, "grad_norm": 0.8620395979715874, "learning_rate": 2.717167156309234e-06, "loss": 0.5484, "step": 2566 }, { "epoch": 3.4321070234113713, "grad_norm": 0.759585809078403, "learning_rate": 2.7130162777323567e-06, "loss": 0.4149, "step": 2567 }, { "epoch": 3.4334448160535116, "grad_norm": 1.3172841690044754, "learning_rate": 2.708867391362948e-06, "loss": 0.4584, "step": 2568 }, { "epoch": 3.4347826086956523, "grad_norm": 1.1469216225440242, "learning_rate": 2.7047205008151332e-06, "loss": 0.55, "step": 2569 }, { "epoch": 3.4361204013377926, "grad_norm": 0.7302949043540341, "learning_rate": 2.700575609701298e-06, "loss": 0.4719, "step": 2570 }, { "epoch": 3.437458193979933, "grad_norm": 1.2691226318302369, "learning_rate": 2.696432721632082e-06, "loss": 0.4029, "step": 2571 }, { "epoch": 3.4387959866220736, "grad_norm": 0.9565117484458081, "learning_rate": 2.692291840216389e-06, "loss": 0.5072, "step": 2572 }, { "epoch": 3.440133779264214, "grad_norm": 1.017561316785687, "learning_rate": 2.6881529690613687e-06, "loss": 0.5138, "step": 2573 }, { "epoch": 3.4414715719063547, "grad_norm": 1.147032903368273, "learning_rate": 2.6840161117724184e-06, "loss": 0.4276, "step": 2574 }, { "epoch": 3.442809364548495, "grad_norm": 0.8549669766612613, "learning_rate": 2.6798812719531843e-06, "loss": 0.4808, "step": 2575 }, { "epoch": 3.4441471571906357, "grad_norm": 0.9183241603235439, "learning_rate": 2.6757484532055537e-06, "loss": 0.4437, "step": 2576 }, { "epoch": 3.445484949832776, "grad_norm": 0.9618114712131166, "learning_rate": 2.671617659129655e-06, "loss": 0.5278, "step": 2577 }, { "epoch": 3.4468227424749163, "grad_norm": 0.861245015413977, "learning_rate": 2.667488893323851e-06, "loss": 0.5468, "step": 2578 }, { "epoch": 3.448160535117057, "grad_norm": 1.3110219487917576, "learning_rate": 2.6633621593847387e-06, "loss": 0.4448, "step": 2579 }, { "epoch": 3.4494983277591973, "grad_norm": 1.69687418217743, "learning_rate": 2.6592374609071446e-06, "loss": 0.5278, "step": 2580 }, { "epoch": 3.4508361204013376, "grad_norm": 1.346699394048028, "learning_rate": 2.65511480148412e-06, "loss": 0.4976, "step": 2581 }, { "epoch": 3.4521739130434783, "grad_norm": 0.8760652298149489, "learning_rate": 2.6509941847069466e-06, "loss": 0.4907, "step": 2582 }, { "epoch": 3.4535117056856186, "grad_norm": 0.898736772477795, "learning_rate": 2.646875614165121e-06, "loss": 0.4709, "step": 2583 }, { "epoch": 3.4548494983277593, "grad_norm": 0.9775153399343717, "learning_rate": 2.6427590934463576e-06, "loss": 0.5367, "step": 2584 }, { "epoch": 3.4561872909698996, "grad_norm": 0.8022880154506884, "learning_rate": 2.6386446261365874e-06, "loss": 0.4573, "step": 2585 }, { "epoch": 3.4575250836120404, "grad_norm": 1.2443440495857376, "learning_rate": 2.6345322158199503e-06, "loss": 0.5058, "step": 2586 }, { "epoch": 3.4588628762541807, "grad_norm": 1.3431046409635268, "learning_rate": 2.630421866078797e-06, "loss": 0.3945, "step": 2587 }, { "epoch": 3.460200668896321, "grad_norm": 1.3154177011097226, "learning_rate": 2.626313580493681e-06, "loss": 0.5482, "step": 2588 }, { "epoch": 3.4615384615384617, "grad_norm": 1.0928644850445377, "learning_rate": 2.6222073626433587e-06, "loss": 0.5571, "step": 2589 }, { "epoch": 3.462876254180602, "grad_norm": 0.9023246379094588, "learning_rate": 2.618103216104785e-06, "loss": 0.552, "step": 2590 }, { "epoch": 3.4642140468227423, "grad_norm": 1.0073071571281749, "learning_rate": 2.6140011444531086e-06, "loss": 0.584, "step": 2591 }, { "epoch": 3.465551839464883, "grad_norm": 1.2752917539733526, "learning_rate": 2.6099011512616767e-06, "loss": 0.4927, "step": 2592 }, { "epoch": 3.4668896321070233, "grad_norm": 1.3405521301865406, "learning_rate": 2.60580324010202e-06, "loss": 0.5695, "step": 2593 }, { "epoch": 3.468227424749164, "grad_norm": 0.9795462329717695, "learning_rate": 2.6017074145438583e-06, "loss": 0.4481, "step": 2594 }, { "epoch": 3.4695652173913043, "grad_norm": 1.4075995068624443, "learning_rate": 2.597613678155092e-06, "loss": 0.4875, "step": 2595 }, { "epoch": 3.470903010033445, "grad_norm": 0.8728136873294873, "learning_rate": 2.593522034501805e-06, "loss": 0.3813, "step": 2596 }, { "epoch": 3.4722408026755853, "grad_norm": 1.2605715867902336, "learning_rate": 2.5894324871482557e-06, "loss": 0.4327, "step": 2597 }, { "epoch": 3.4735785953177256, "grad_norm": 1.3980031483040687, "learning_rate": 2.585345039656878e-06, "loss": 0.4541, "step": 2598 }, { "epoch": 3.4749163879598663, "grad_norm": 1.232115823434106, "learning_rate": 2.5812596955882756e-06, "loss": 0.5601, "step": 2599 }, { "epoch": 3.4762541806020066, "grad_norm": 1.0435931402265213, "learning_rate": 2.5771764585012203e-06, "loss": 0.4375, "step": 2600 }, { "epoch": 3.477591973244147, "grad_norm": 1.208611750228433, "learning_rate": 2.573095331952646e-06, "loss": 0.4848, "step": 2601 }, { "epoch": 3.4789297658862877, "grad_norm": 1.7952602976893424, "learning_rate": 2.5690163194976576e-06, "loss": 0.4401, "step": 2602 }, { "epoch": 3.480267558528428, "grad_norm": 1.060465565194676, "learning_rate": 2.5649394246895044e-06, "loss": 0.5092, "step": 2603 }, { "epoch": 3.4816053511705687, "grad_norm": 0.9242419415554505, "learning_rate": 2.560864651079599e-06, "loss": 0.4762, "step": 2604 }, { "epoch": 3.482943143812709, "grad_norm": 1.0954440563293404, "learning_rate": 2.556792002217507e-06, "loss": 0.6304, "step": 2605 }, { "epoch": 3.4842809364548497, "grad_norm": 1.5665099278264254, "learning_rate": 2.5527214816509398e-06, "loss": 0.5073, "step": 2606 }, { "epoch": 3.48561872909699, "grad_norm": 1.1752268635118888, "learning_rate": 2.5486530929257574e-06, "loss": 0.5236, "step": 2607 }, { "epoch": 3.4869565217391303, "grad_norm": 1.120611102350562, "learning_rate": 2.544586839585961e-06, "loss": 0.5327, "step": 2608 }, { "epoch": 3.488294314381271, "grad_norm": 0.9406637963796328, "learning_rate": 2.540522725173692e-06, "loss": 0.4789, "step": 2609 }, { "epoch": 3.4896321070234113, "grad_norm": 1.344504894638657, "learning_rate": 2.5364607532292283e-06, "loss": 0.5293, "step": 2610 }, { "epoch": 3.4909698996655516, "grad_norm": 1.193933421570872, "learning_rate": 2.532400927290982e-06, "loss": 0.5471, "step": 2611 }, { "epoch": 3.4923076923076923, "grad_norm": 1.0881385726553154, "learning_rate": 2.5283432508954976e-06, "loss": 0.5626, "step": 2612 }, { "epoch": 3.4936454849498326, "grad_norm": 0.8411743675997282, "learning_rate": 2.5242877275774446e-06, "loss": 0.4851, "step": 2613 }, { "epoch": 3.4949832775919734, "grad_norm": 1.1071976060496227, "learning_rate": 2.520234360869617e-06, "loss": 0.5331, "step": 2614 }, { "epoch": 3.4963210702341136, "grad_norm": 1.2357265526698025, "learning_rate": 2.5161831543029314e-06, "loss": 0.5032, "step": 2615 }, { "epoch": 3.4976588628762544, "grad_norm": 1.1921963510581166, "learning_rate": 2.512134111406422e-06, "loss": 0.5419, "step": 2616 }, { "epoch": 3.4989966555183947, "grad_norm": 0.9022003164545739, "learning_rate": 2.508087235707237e-06, "loss": 0.4322, "step": 2617 }, { "epoch": 3.500334448160535, "grad_norm": 1.021403939771115, "learning_rate": 2.5040425307306404e-06, "loss": 0.4664, "step": 2618 }, { "epoch": 3.5016722408026757, "grad_norm": 0.9278377911699145, "learning_rate": 2.5000000000000015e-06, "loss": 0.533, "step": 2619 }, { "epoch": 3.503010033444816, "grad_norm": 0.7320597500615058, "learning_rate": 2.4959596470367965e-06, "loss": 0.5994, "step": 2620 }, { "epoch": 3.5043478260869563, "grad_norm": 1.439440596207838, "learning_rate": 2.4919214753606043e-06, "loss": 0.5715, "step": 2621 }, { "epoch": 3.505685618729097, "grad_norm": 1.273486938402825, "learning_rate": 2.4878854884891067e-06, "loss": 0.3725, "step": 2622 }, { "epoch": 3.5070234113712373, "grad_norm": 1.3078232542614525, "learning_rate": 2.4838516899380806e-06, "loss": 0.5162, "step": 2623 }, { "epoch": 3.508361204013378, "grad_norm": 1.282221712403309, "learning_rate": 2.4798200832213933e-06, "loss": 0.5825, "step": 2624 }, { "epoch": 3.5096989966555183, "grad_norm": 0.9502424537898283, "learning_rate": 2.475790671851007e-06, "loss": 0.5197, "step": 2625 }, { "epoch": 3.511036789297659, "grad_norm": 1.1421205829116603, "learning_rate": 2.4717634593369704e-06, "loss": 0.4148, "step": 2626 }, { "epoch": 3.5123745819397993, "grad_norm": 0.6936684877956593, "learning_rate": 2.4677384491874155e-06, "loss": 0.5038, "step": 2627 }, { "epoch": 3.5137123745819396, "grad_norm": 0.9862711821679901, "learning_rate": 2.463715644908557e-06, "loss": 0.5317, "step": 2628 }, { "epoch": 3.5150501672240804, "grad_norm": 1.3831847893467781, "learning_rate": 2.459695050004688e-06, "loss": 0.4395, "step": 2629 }, { "epoch": 3.5163879598662207, "grad_norm": 0.9424475962949549, "learning_rate": 2.4556766679781763e-06, "loss": 0.4812, "step": 2630 }, { "epoch": 3.517725752508361, "grad_norm": 0.9442040423948193, "learning_rate": 2.4516605023294626e-06, "loss": 0.5985, "step": 2631 }, { "epoch": 3.5190635451505017, "grad_norm": 1.1581864613783885, "learning_rate": 2.447646556557057e-06, "loss": 0.5127, "step": 2632 }, { "epoch": 3.5204013377926424, "grad_norm": 0.8311545321735617, "learning_rate": 2.443634834157536e-06, "loss": 0.508, "step": 2633 }, { "epoch": 3.5217391304347827, "grad_norm": 0.799322366620878, "learning_rate": 2.4396253386255386e-06, "loss": 0.5595, "step": 2634 }, { "epoch": 3.523076923076923, "grad_norm": 0.7579520898926658, "learning_rate": 2.4356180734537643e-06, "loss": 0.5461, "step": 2635 }, { "epoch": 3.5244147157190637, "grad_norm": 0.9213113943155367, "learning_rate": 2.4316130421329696e-06, "loss": 0.541, "step": 2636 }, { "epoch": 3.525752508361204, "grad_norm": 1.1239412710031604, "learning_rate": 2.4276102481519655e-06, "loss": 0.5, "step": 2637 }, { "epoch": 3.5270903010033443, "grad_norm": 1.0478137204933347, "learning_rate": 2.4236096949976136e-06, "loss": 0.5312, "step": 2638 }, { "epoch": 3.528428093645485, "grad_norm": 1.21495440916014, "learning_rate": 2.4196113861548233e-06, "loss": 0.4463, "step": 2639 }, { "epoch": 3.5297658862876253, "grad_norm": 1.6227673472892665, "learning_rate": 2.41561532510655e-06, "loss": 0.5425, "step": 2640 }, { "epoch": 3.5311036789297656, "grad_norm": 1.611959845477178, "learning_rate": 2.411621515333788e-06, "loss": 0.3627, "step": 2641 }, { "epoch": 3.5324414715719064, "grad_norm": 1.1748682887230444, "learning_rate": 2.407629960315577e-06, "loss": 0.5326, "step": 2642 }, { "epoch": 3.533779264214047, "grad_norm": 1.0198020155142595, "learning_rate": 2.403640663528986e-06, "loss": 0.4819, "step": 2643 }, { "epoch": 3.5351170568561874, "grad_norm": 1.243465159713057, "learning_rate": 2.3996536284491197e-06, "loss": 0.528, "step": 2644 }, { "epoch": 3.5364548494983277, "grad_norm": 1.1901735269251772, "learning_rate": 2.3956688585491117e-06, "loss": 0.4471, "step": 2645 }, { "epoch": 3.5377926421404684, "grad_norm": 1.029318772935589, "learning_rate": 2.391686357300123e-06, "loss": 0.6183, "step": 2646 }, { "epoch": 3.5391304347826087, "grad_norm": 1.016875272718641, "learning_rate": 2.3877061281713393e-06, "loss": 0.5067, "step": 2647 }, { "epoch": 3.540468227424749, "grad_norm": 1.39468902609096, "learning_rate": 2.383728174629964e-06, "loss": 0.4757, "step": 2648 }, { "epoch": 3.5418060200668897, "grad_norm": 0.8265561273880109, "learning_rate": 2.379752500141222e-06, "loss": 0.4975, "step": 2649 }, { "epoch": 3.54314381270903, "grad_norm": 0.9411405016392146, "learning_rate": 2.3757791081683497e-06, "loss": 0.4278, "step": 2650 }, { "epoch": 3.5444816053511703, "grad_norm": 1.2590046969612736, "learning_rate": 2.371808002172595e-06, "loss": 0.4893, "step": 2651 }, { "epoch": 3.545819397993311, "grad_norm": 0.8617630911844332, "learning_rate": 2.3678391856132203e-06, "loss": 0.4843, "step": 2652 }, { "epoch": 3.5471571906354518, "grad_norm": 1.3506903683989597, "learning_rate": 2.363872661947488e-06, "loss": 0.4734, "step": 2653 }, { "epoch": 3.548494983277592, "grad_norm": 1.1278349207225289, "learning_rate": 2.3599084346306626e-06, "loss": 0.5731, "step": 2654 }, { "epoch": 3.5498327759197323, "grad_norm": 0.8593928918496622, "learning_rate": 2.355946507116012e-06, "loss": 0.5592, "step": 2655 }, { "epoch": 3.551170568561873, "grad_norm": 1.4911020078220958, "learning_rate": 2.3519868828547974e-06, "loss": 0.3568, "step": 2656 }, { "epoch": 3.5525083612040134, "grad_norm": 1.0559471284766486, "learning_rate": 2.348029565296277e-06, "loss": 0.5114, "step": 2657 }, { "epoch": 3.5538461538461537, "grad_norm": 1.2937264409912383, "learning_rate": 2.344074557887696e-06, "loss": 0.5232, "step": 2658 }, { "epoch": 3.5551839464882944, "grad_norm": 0.9863432393212216, "learning_rate": 2.3401218640742894e-06, "loss": 0.5208, "step": 2659 }, { "epoch": 3.5565217391304347, "grad_norm": 0.8556447156277889, "learning_rate": 2.336171487299277e-06, "loss": 0.5356, "step": 2660 }, { "epoch": 3.5578595317725754, "grad_norm": 1.173192822246916, "learning_rate": 2.332223431003859e-06, "loss": 0.5139, "step": 2661 }, { "epoch": 3.5591973244147157, "grad_norm": 1.4052696118388408, "learning_rate": 2.3282776986272143e-06, "loss": 0.5347, "step": 2662 }, { "epoch": 3.5605351170568564, "grad_norm": 1.151358934303147, "learning_rate": 2.324334293606499e-06, "loss": 0.4537, "step": 2663 }, { "epoch": 3.5618729096989967, "grad_norm": 1.8101892247562488, "learning_rate": 2.3203932193768398e-06, "loss": 0.5011, "step": 2664 }, { "epoch": 3.563210702341137, "grad_norm": 1.2975886532539427, "learning_rate": 2.3164544793713345e-06, "loss": 0.6226, "step": 2665 }, { "epoch": 3.5645484949832777, "grad_norm": 0.7602824420585205, "learning_rate": 2.3125180770210464e-06, "loss": 0.5167, "step": 2666 }, { "epoch": 3.565886287625418, "grad_norm": 1.0552652534006313, "learning_rate": 2.3085840157550036e-06, "loss": 0.5489, "step": 2667 }, { "epoch": 3.5672240802675583, "grad_norm": 0.8403290964820144, "learning_rate": 2.3046522990001944e-06, "loss": 0.4372, "step": 2668 }, { "epoch": 3.568561872909699, "grad_norm": 0.8951823782397517, "learning_rate": 2.3007229301815643e-06, "loss": 0.564, "step": 2669 }, { "epoch": 3.5698996655518394, "grad_norm": 1.1100649565088827, "learning_rate": 2.296795912722014e-06, "loss": 0.5338, "step": 2670 }, { "epoch": 3.57123745819398, "grad_norm": 1.0400351330995043, "learning_rate": 2.2928712500423938e-06, "loss": 0.5471, "step": 2671 }, { "epoch": 3.5725752508361204, "grad_norm": 1.1374906566502991, "learning_rate": 2.288948945561509e-06, "loss": 0.4309, "step": 2672 }, { "epoch": 3.573913043478261, "grad_norm": 0.9755146624975823, "learning_rate": 2.2850290026961032e-06, "loss": 0.5052, "step": 2673 }, { "epoch": 3.5752508361204014, "grad_norm": 1.25972550599605, "learning_rate": 2.2811114248608675e-06, "loss": 0.5273, "step": 2674 }, { "epoch": 3.5765886287625417, "grad_norm": 1.216059805504098, "learning_rate": 2.2771962154684303e-06, "loss": 0.583, "step": 2675 }, { "epoch": 3.5779264214046824, "grad_norm": 1.0718987463747893, "learning_rate": 2.2732833779293583e-06, "loss": 0.5007, "step": 2676 }, { "epoch": 3.5792642140468227, "grad_norm": 1.0432111624060738, "learning_rate": 2.2693729156521518e-06, "loss": 0.5795, "step": 2677 }, { "epoch": 3.580602006688963, "grad_norm": 1.436700489444686, "learning_rate": 2.2654648320432403e-06, "loss": 0.5165, "step": 2678 }, { "epoch": 3.5819397993311037, "grad_norm": 1.112002284670176, "learning_rate": 2.2615591305069846e-06, "loss": 0.4702, "step": 2679 }, { "epoch": 3.583277591973244, "grad_norm": 1.0921492805872555, "learning_rate": 2.2576558144456677e-06, "loss": 0.5387, "step": 2680 }, { "epoch": 3.5846153846153848, "grad_norm": 1.2939689005161168, "learning_rate": 2.2537548872594935e-06, "loss": 0.485, "step": 2681 }, { "epoch": 3.585953177257525, "grad_norm": 0.9735640125839026, "learning_rate": 2.2498563523465905e-06, "loss": 0.4837, "step": 2682 }, { "epoch": 3.587290969899666, "grad_norm": 0.9416091672168715, "learning_rate": 2.2459602131029977e-06, "loss": 0.5585, "step": 2683 }, { "epoch": 3.588628762541806, "grad_norm": 0.9460807270561047, "learning_rate": 2.24206647292267e-06, "loss": 0.5233, "step": 2684 }, { "epoch": 3.5899665551839464, "grad_norm": 1.312359780194078, "learning_rate": 2.238175135197471e-06, "loss": 0.4775, "step": 2685 }, { "epoch": 3.591304347826087, "grad_norm": 0.9491545442070325, "learning_rate": 2.234286203317172e-06, "loss": 0.5297, "step": 2686 }, { "epoch": 3.5926421404682274, "grad_norm": 0.9090780495282376, "learning_rate": 2.230399680669449e-06, "loss": 0.4737, "step": 2687 }, { "epoch": 3.5939799331103677, "grad_norm": 1.1652894981062887, "learning_rate": 2.226515570639879e-06, "loss": 0.6075, "step": 2688 }, { "epoch": 3.5953177257525084, "grad_norm": 1.627666925298476, "learning_rate": 2.2226338766119366e-06, "loss": 0.5449, "step": 2689 }, { "epoch": 3.5966555183946487, "grad_norm": 1.2339532328330818, "learning_rate": 2.2187546019669938e-06, "loss": 0.5089, "step": 2690 }, { "epoch": 3.5979933110367894, "grad_norm": 1.2610126367208359, "learning_rate": 2.2148777500843125e-06, "loss": 0.38, "step": 2691 }, { "epoch": 3.5993311036789297, "grad_norm": 1.2163543439390399, "learning_rate": 2.2110033243410462e-06, "loss": 0.5186, "step": 2692 }, { "epoch": 3.6006688963210705, "grad_norm": 0.947930914917415, "learning_rate": 2.207131328112234e-06, "loss": 0.5729, "step": 2693 }, { "epoch": 3.6020066889632107, "grad_norm": 0.9827080731983278, "learning_rate": 2.2032617647707995e-06, "loss": 0.4299, "step": 2694 }, { "epoch": 3.603344481605351, "grad_norm": 1.025341892069626, "learning_rate": 2.1993946376875447e-06, "loss": 0.4603, "step": 2695 }, { "epoch": 3.6046822742474918, "grad_norm": 0.8182009984153075, "learning_rate": 2.1955299502311523e-06, "loss": 0.5376, "step": 2696 }, { "epoch": 3.606020066889632, "grad_norm": 1.1293340267189274, "learning_rate": 2.1916677057681786e-06, "loss": 0.4695, "step": 2697 }, { "epoch": 3.6073578595317723, "grad_norm": 1.2225392830227102, "learning_rate": 2.1878079076630502e-06, "loss": 0.5555, "step": 2698 }, { "epoch": 3.608695652173913, "grad_norm": 1.2526859007449387, "learning_rate": 2.1839505592780658e-06, "loss": 0.4598, "step": 2699 }, { "epoch": 3.6100334448160534, "grad_norm": 0.9933186926419117, "learning_rate": 2.180095663973388e-06, "loss": 0.4704, "step": 2700 }, { "epoch": 3.611371237458194, "grad_norm": 1.178417451438559, "learning_rate": 2.1762432251070404e-06, "loss": 0.5235, "step": 2701 }, { "epoch": 3.6127090301003344, "grad_norm": 0.8806710103803168, "learning_rate": 2.172393246034914e-06, "loss": 0.5593, "step": 2702 }, { "epoch": 3.614046822742475, "grad_norm": 1.336637413385952, "learning_rate": 2.1685457301107506e-06, "loss": 0.4976, "step": 2703 }, { "epoch": 3.6153846153846154, "grad_norm": 1.161953074111897, "learning_rate": 2.1647006806861472e-06, "loss": 0.5237, "step": 2704 }, { "epoch": 3.6167224080267557, "grad_norm": 1.0413687361506643, "learning_rate": 2.1608581011105533e-06, "loss": 0.461, "step": 2705 }, { "epoch": 3.6180602006688964, "grad_norm": 1.1559040469415849, "learning_rate": 2.1570179947312674e-06, "loss": 0.3883, "step": 2706 }, { "epoch": 3.6193979933110367, "grad_norm": 1.3924130284668674, "learning_rate": 2.1531803648934333e-06, "loss": 0.4931, "step": 2707 }, { "epoch": 3.620735785953177, "grad_norm": 1.3636883107158655, "learning_rate": 2.149345214940036e-06, "loss": 0.5037, "step": 2708 }, { "epoch": 3.6220735785953178, "grad_norm": 1.1120133779238095, "learning_rate": 2.145512548211902e-06, "loss": 0.4764, "step": 2709 }, { "epoch": 3.623411371237458, "grad_norm": 0.9419715102379564, "learning_rate": 2.1416823680476945e-06, "loss": 0.5388, "step": 2710 }, { "epoch": 3.624749163879599, "grad_norm": 1.015148924767564, "learning_rate": 2.137854677783907e-06, "loss": 0.4811, "step": 2711 }, { "epoch": 3.626086956521739, "grad_norm": 1.091292975588568, "learning_rate": 2.1340294807548716e-06, "loss": 0.5436, "step": 2712 }, { "epoch": 3.62742474916388, "grad_norm": 1.136904406877191, "learning_rate": 2.130206780292743e-06, "loss": 0.5762, "step": 2713 }, { "epoch": 3.62876254180602, "grad_norm": 1.326347898090345, "learning_rate": 2.1263865797275007e-06, "loss": 0.5322, "step": 2714 }, { "epoch": 3.6301003344481604, "grad_norm": 1.318922292337717, "learning_rate": 2.1225688823869494e-06, "loss": 0.5422, "step": 2715 }, { "epoch": 3.631438127090301, "grad_norm": 1.0373512939307044, "learning_rate": 2.118753691596711e-06, "loss": 0.5405, "step": 2716 }, { "epoch": 3.6327759197324414, "grad_norm": 0.9627059746204993, "learning_rate": 2.1149410106802252e-06, "loss": 0.5667, "step": 2717 }, { "epoch": 3.6341137123745817, "grad_norm": 1.0590757880774533, "learning_rate": 2.1111308429587446e-06, "loss": 0.538, "step": 2718 }, { "epoch": 3.6354515050167224, "grad_norm": 1.2940419612911969, "learning_rate": 2.1073231917513336e-06, "loss": 0.4466, "step": 2719 }, { "epoch": 3.6367892976588627, "grad_norm": 0.9582449334420948, "learning_rate": 2.1035180603748635e-06, "loss": 0.6366, "step": 2720 }, { "epoch": 3.6381270903010035, "grad_norm": 1.2460815196437545, "learning_rate": 2.09971545214401e-06, "loss": 0.5071, "step": 2721 }, { "epoch": 3.6394648829431437, "grad_norm": 0.9654190761054391, "learning_rate": 2.095915370371252e-06, "loss": 0.5479, "step": 2722 }, { "epoch": 3.6408026755852845, "grad_norm": 1.1046251333274495, "learning_rate": 2.0921178183668676e-06, "loss": 0.5567, "step": 2723 }, { "epoch": 3.6421404682274248, "grad_norm": 0.8197329719525103, "learning_rate": 2.088322799438931e-06, "loss": 0.528, "step": 2724 }, { "epoch": 3.643478260869565, "grad_norm": 1.2235132485483484, "learning_rate": 2.084530316893309e-06, "loss": 0.4821, "step": 2725 }, { "epoch": 3.644816053511706, "grad_norm": 0.872894665370101, "learning_rate": 2.08074037403366e-06, "loss": 0.5371, "step": 2726 }, { "epoch": 3.646153846153846, "grad_norm": 0.7465385291901185, "learning_rate": 2.0769529741614297e-06, "loss": 0.4656, "step": 2727 }, { "epoch": 3.6474916387959864, "grad_norm": 1.0772491015527126, "learning_rate": 2.0731681205758485e-06, "loss": 0.4636, "step": 2728 }, { "epoch": 3.648829431438127, "grad_norm": 1.3254179089285165, "learning_rate": 2.069385816573928e-06, "loss": 0.5479, "step": 2729 }, { "epoch": 3.650167224080268, "grad_norm": 1.0861454982976337, "learning_rate": 2.065606065450461e-06, "loss": 0.4301, "step": 2730 }, { "epoch": 3.651505016722408, "grad_norm": 1.2192667285053784, "learning_rate": 2.061828870498012e-06, "loss": 0.5391, "step": 2731 }, { "epoch": 3.6528428093645484, "grad_norm": 1.466934295522067, "learning_rate": 2.0580542350069266e-06, "loss": 0.453, "step": 2732 }, { "epoch": 3.654180602006689, "grad_norm": 1.63903520772004, "learning_rate": 2.054282162265313e-06, "loss": 0.5951, "step": 2733 }, { "epoch": 3.6555183946488294, "grad_norm": 1.2987383663277896, "learning_rate": 2.050512655559051e-06, "loss": 0.5382, "step": 2734 }, { "epoch": 3.6568561872909697, "grad_norm": 1.280171204189432, "learning_rate": 2.046745718171784e-06, "loss": 0.5135, "step": 2735 }, { "epoch": 3.6581939799331105, "grad_norm": 1.1286062307168334, "learning_rate": 2.0429813533849174e-06, "loss": 0.4622, "step": 2736 }, { "epoch": 3.6595317725752508, "grad_norm": 1.407793296904178, "learning_rate": 2.0392195644776153e-06, "loss": 0.4889, "step": 2737 }, { "epoch": 3.660869565217391, "grad_norm": 1.2357317120714642, "learning_rate": 2.0354603547267985e-06, "loss": 0.4448, "step": 2738 }, { "epoch": 3.6622073578595318, "grad_norm": 1.4958679375700499, "learning_rate": 2.0317037274071412e-06, "loss": 0.5059, "step": 2739 }, { "epoch": 3.6635451505016725, "grad_norm": 1.089846549689562, "learning_rate": 2.0279496857910667e-06, "loss": 0.5602, "step": 2740 }, { "epoch": 3.664882943143813, "grad_norm": 1.0669049412627443, "learning_rate": 2.0241982331487465e-06, "loss": 0.4841, "step": 2741 }, { "epoch": 3.666220735785953, "grad_norm": 1.1107212511240356, "learning_rate": 2.0204493727480996e-06, "loss": 0.4356, "step": 2742 }, { "epoch": 3.667558528428094, "grad_norm": 1.0238153940199455, "learning_rate": 2.016703107854783e-06, "loss": 0.5552, "step": 2743 }, { "epoch": 3.668896321070234, "grad_norm": 1.080619716794963, "learning_rate": 2.0129594417321937e-06, "loss": 0.5011, "step": 2744 }, { "epoch": 3.6702341137123744, "grad_norm": 0.8521521541557172, "learning_rate": 2.009218377641466e-06, "loss": 0.6152, "step": 2745 }, { "epoch": 3.671571906354515, "grad_norm": 0.9188051540628714, "learning_rate": 2.0054799188414666e-06, "loss": 0.5107, "step": 2746 }, { "epoch": 3.6729096989966554, "grad_norm": 0.8603633985601806, "learning_rate": 2.0017440685887934e-06, "loss": 0.454, "step": 2747 }, { "epoch": 3.6742474916387957, "grad_norm": 1.0310988610403165, "learning_rate": 1.998010830137771e-06, "loss": 0.5, "step": 2748 }, { "epoch": 3.6755852842809364, "grad_norm": 0.8417086766779424, "learning_rate": 1.99428020674045e-06, "loss": 0.4455, "step": 2749 }, { "epoch": 3.676923076923077, "grad_norm": 0.9532421464423769, "learning_rate": 1.9905522016466023e-06, "loss": 0.5921, "step": 2750 }, { "epoch": 3.6782608695652175, "grad_norm": 1.03248743385432, "learning_rate": 1.9868268181037186e-06, "loss": 0.5228, "step": 2751 }, { "epoch": 3.6795986622073578, "grad_norm": 0.9932662829581713, "learning_rate": 1.9831040593570076e-06, "loss": 0.5697, "step": 2752 }, { "epoch": 3.6809364548494985, "grad_norm": 0.950347902907883, "learning_rate": 1.9793839286493894e-06, "loss": 0.4693, "step": 2753 }, { "epoch": 3.682274247491639, "grad_norm": 1.2617034689664222, "learning_rate": 1.9756664292214962e-06, "loss": 0.573, "step": 2754 }, { "epoch": 3.683612040133779, "grad_norm": 1.553406557221663, "learning_rate": 1.971951564311668e-06, "loss": 0.4524, "step": 2755 }, { "epoch": 3.68494983277592, "grad_norm": 0.8044393600511706, "learning_rate": 1.968239337155949e-06, "loss": 0.5587, "step": 2756 }, { "epoch": 3.68628762541806, "grad_norm": 1.361418053577176, "learning_rate": 1.964529750988086e-06, "loss": 0.5425, "step": 2757 }, { "epoch": 3.687625418060201, "grad_norm": 0.9873384862603606, "learning_rate": 1.960822809039526e-06, "loss": 0.37, "step": 2758 }, { "epoch": 3.688963210702341, "grad_norm": 0.8521128119644632, "learning_rate": 1.9571185145394117e-06, "loss": 0.6293, "step": 2759 }, { "epoch": 3.690301003344482, "grad_norm": 0.9307586971098618, "learning_rate": 1.95341687071458e-06, "loss": 0.6178, "step": 2760 }, { "epoch": 3.691638795986622, "grad_norm": 0.9652151257378236, "learning_rate": 1.949717880789557e-06, "loss": 0.4658, "step": 2761 }, { "epoch": 3.6929765886287624, "grad_norm": 1.2164299382852324, "learning_rate": 1.9460215479865613e-06, "loss": 0.4898, "step": 2762 }, { "epoch": 3.694314381270903, "grad_norm": 1.2877735203922525, "learning_rate": 1.9423278755254933e-06, "loss": 0.5125, "step": 2763 }, { "epoch": 3.6956521739130435, "grad_norm": 1.0751189331295994, "learning_rate": 1.9386368666239364e-06, "loss": 0.6379, "step": 2764 }, { "epoch": 3.6969899665551837, "grad_norm": 1.008365672978253, "learning_rate": 1.9349485244971543e-06, "loss": 0.5166, "step": 2765 }, { "epoch": 3.6983277591973245, "grad_norm": 1.0028104259400559, "learning_rate": 1.9312628523580882e-06, "loss": 0.5966, "step": 2766 }, { "epoch": 3.6996655518394648, "grad_norm": 1.0639953092087757, "learning_rate": 1.927579853417352e-06, "loss": 0.5463, "step": 2767 }, { "epoch": 3.7010033444816055, "grad_norm": 0.9366529696985507, "learning_rate": 1.923899530883232e-06, "loss": 0.551, "step": 2768 }, { "epoch": 3.702341137123746, "grad_norm": 1.0569124812979278, "learning_rate": 1.9202218879616824e-06, "loss": 0.5557, "step": 2769 }, { "epoch": 3.7036789297658865, "grad_norm": 1.156895839482212, "learning_rate": 1.9165469278563243e-06, "loss": 0.5426, "step": 2770 }, { "epoch": 3.705016722408027, "grad_norm": 1.4604738052865445, "learning_rate": 1.912874653768439e-06, "loss": 0.4889, "step": 2771 }, { "epoch": 3.706354515050167, "grad_norm": 1.3296115803063908, "learning_rate": 1.9092050688969736e-06, "loss": 0.4706, "step": 2772 }, { "epoch": 3.707692307692308, "grad_norm": 1.125995003177514, "learning_rate": 1.9055381764385272e-06, "loss": 0.4995, "step": 2773 }, { "epoch": 3.709030100334448, "grad_norm": 1.2951137649556828, "learning_rate": 1.9018739795873558e-06, "loss": 0.4801, "step": 2774 }, { "epoch": 3.7103678929765884, "grad_norm": 1.1705143032644105, "learning_rate": 1.8982124815353665e-06, "loss": 0.4439, "step": 2775 }, { "epoch": 3.711705685618729, "grad_norm": 1.0396800273448454, "learning_rate": 1.8945536854721153e-06, "loss": 0.5106, "step": 2776 }, { "epoch": 3.7130434782608694, "grad_norm": 0.8748715186694537, "learning_rate": 1.8908975945848063e-06, "loss": 0.399, "step": 2777 }, { "epoch": 3.71438127090301, "grad_norm": 0.9743869678260434, "learning_rate": 1.8872442120582845e-06, "loss": 0.4384, "step": 2778 }, { "epoch": 3.7157190635451505, "grad_norm": 1.0383492785348578, "learning_rate": 1.8835935410750372e-06, "loss": 0.4785, "step": 2779 }, { "epoch": 3.717056856187291, "grad_norm": 0.7707971844194171, "learning_rate": 1.8799455848151898e-06, "loss": 0.4869, "step": 2780 }, { "epoch": 3.7183946488294315, "grad_norm": 0.8301149982099637, "learning_rate": 1.8763003464565022e-06, "loss": 0.4827, "step": 2781 }, { "epoch": 3.719732441471572, "grad_norm": 0.8194126248384069, "learning_rate": 1.872657829174367e-06, "loss": 0.5249, "step": 2782 }, { "epoch": 3.7210702341137125, "grad_norm": 1.1796158484813102, "learning_rate": 1.8690180361418058e-06, "loss": 0.4247, "step": 2783 }, { "epoch": 3.722408026755853, "grad_norm": 1.226263510174642, "learning_rate": 1.865380970529469e-06, "loss": 0.4949, "step": 2784 }, { "epoch": 3.723745819397993, "grad_norm": 1.9112734093435215, "learning_rate": 1.8617466355056285e-06, "loss": 0.5113, "step": 2785 }, { "epoch": 3.725083612040134, "grad_norm": 1.2275789299914464, "learning_rate": 1.8581150342361792e-06, "loss": 0.4597, "step": 2786 }, { "epoch": 3.726421404682274, "grad_norm": 1.220824083472637, "learning_rate": 1.854486169884635e-06, "loss": 0.4862, "step": 2787 }, { "epoch": 3.727759197324415, "grad_norm": 0.8838370653150611, "learning_rate": 1.850860045612124e-06, "loss": 0.3953, "step": 2788 }, { "epoch": 3.729096989966555, "grad_norm": 1.0849674106502578, "learning_rate": 1.8472366645773892e-06, "loss": 0.513, "step": 2789 }, { "epoch": 3.730434782608696, "grad_norm": 1.0603855884956943, "learning_rate": 1.8436160299367806e-06, "loss": 0.476, "step": 2790 }, { "epoch": 3.731772575250836, "grad_norm": 1.0988791172503012, "learning_rate": 1.8399981448442623e-06, "loss": 0.5566, "step": 2791 }, { "epoch": 3.7331103678929765, "grad_norm": 1.6264595943920528, "learning_rate": 1.8363830124513975e-06, "loss": 0.4945, "step": 2792 }, { "epoch": 3.734448160535117, "grad_norm": 0.900913610279689, "learning_rate": 1.8327706359073526e-06, "loss": 0.4874, "step": 2793 }, { "epoch": 3.7357859531772575, "grad_norm": 1.029498110156358, "learning_rate": 1.8291610183588949e-06, "loss": 0.4167, "step": 2794 }, { "epoch": 3.7371237458193978, "grad_norm": 1.4311711223976802, "learning_rate": 1.8255541629503865e-06, "loss": 0.5222, "step": 2795 }, { "epoch": 3.7384615384615385, "grad_norm": 1.0850322359644924, "learning_rate": 1.8219500728237849e-06, "loss": 0.4567, "step": 2796 }, { "epoch": 3.739799331103679, "grad_norm": 0.9754746190720558, "learning_rate": 1.8183487511186381e-06, "loss": 0.5943, "step": 2797 }, { "epoch": 3.7411371237458195, "grad_norm": 0.9945333479211921, "learning_rate": 1.8147502009720825e-06, "loss": 0.4204, "step": 2798 }, { "epoch": 3.74247491638796, "grad_norm": 1.1689658016111386, "learning_rate": 1.8111544255188402e-06, "loss": 0.5772, "step": 2799 }, { "epoch": 3.7438127090301005, "grad_norm": 1.3757253400640348, "learning_rate": 1.807561427891214e-06, "loss": 0.5396, "step": 2800 }, { "epoch": 3.745150501672241, "grad_norm": 0.9639230426630965, "learning_rate": 1.8039712112190938e-06, "loss": 0.5727, "step": 2801 }, { "epoch": 3.746488294314381, "grad_norm": 1.7098705735564574, "learning_rate": 1.8003837786299399e-06, "loss": 0.5496, "step": 2802 }, { "epoch": 3.747826086956522, "grad_norm": 1.156534292324238, "learning_rate": 1.79679913324879e-06, "loss": 0.5477, "step": 2803 }, { "epoch": 3.749163879598662, "grad_norm": 1.0831474682991937, "learning_rate": 1.7932172781982532e-06, "loss": 0.4458, "step": 2804 }, { "epoch": 3.7505016722408024, "grad_norm": 1.1741554029196848, "learning_rate": 1.7896382165985094e-06, "loss": 0.5036, "step": 2805 }, { "epoch": 3.751839464882943, "grad_norm": 1.2420546143181406, "learning_rate": 1.7860619515673034e-06, "loss": 0.549, "step": 2806 }, { "epoch": 3.7531772575250835, "grad_norm": 1.3539580180961843, "learning_rate": 1.7824884862199448e-06, "loss": 0.5441, "step": 2807 }, { "epoch": 3.754515050167224, "grad_norm": 0.9258337011749838, "learning_rate": 1.7789178236693045e-06, "loss": 0.5812, "step": 2808 }, { "epoch": 3.7558528428093645, "grad_norm": 1.2404347895678334, "learning_rate": 1.7753499670258106e-06, "loss": 0.4407, "step": 2809 }, { "epoch": 3.7571906354515052, "grad_norm": 0.8889900951333851, "learning_rate": 1.771784919397449e-06, "loss": 0.626, "step": 2810 }, { "epoch": 3.7585284280936455, "grad_norm": 1.4302441151359566, "learning_rate": 1.768222683889757e-06, "loss": 0.5396, "step": 2811 }, { "epoch": 3.759866220735786, "grad_norm": 1.1087226470313356, "learning_rate": 1.764663263605823e-06, "loss": 0.5166, "step": 2812 }, { "epoch": 3.7612040133779265, "grad_norm": 0.8601295940807466, "learning_rate": 1.7611066616462824e-06, "loss": 0.4893, "step": 2813 }, { "epoch": 3.762541806020067, "grad_norm": 0.99694525311689, "learning_rate": 1.7575528811093168e-06, "loss": 0.481, "step": 2814 }, { "epoch": 3.763879598662207, "grad_norm": 0.9742406898247958, "learning_rate": 1.7540019250906481e-06, "loss": 0.551, "step": 2815 }, { "epoch": 3.765217391304348, "grad_norm": 1.0732879732225773, "learning_rate": 1.75045379668354e-06, "loss": 0.4619, "step": 2816 }, { "epoch": 3.766555183946488, "grad_norm": 1.1948869262795818, "learning_rate": 1.746908498978791e-06, "loss": 0.5606, "step": 2817 }, { "epoch": 3.767892976588629, "grad_norm": 1.3774278622012308, "learning_rate": 1.7433660350647347e-06, "loss": 0.4886, "step": 2818 }, { "epoch": 3.769230769230769, "grad_norm": 1.2892585921075699, "learning_rate": 1.7398264080272371e-06, "loss": 0.4831, "step": 2819 }, { "epoch": 3.77056856187291, "grad_norm": 1.126633613077557, "learning_rate": 1.7362896209496894e-06, "loss": 0.546, "step": 2820 }, { "epoch": 3.77190635451505, "grad_norm": 0.9552024092855439, "learning_rate": 1.732755676913015e-06, "loss": 0.4903, "step": 2821 }, { "epoch": 3.7732441471571905, "grad_norm": 1.5509766686738324, "learning_rate": 1.7292245789956552e-06, "loss": 0.5297, "step": 2822 }, { "epoch": 3.774581939799331, "grad_norm": 1.399656607386835, "learning_rate": 1.7256963302735752e-06, "loss": 0.5478, "step": 2823 }, { "epoch": 3.7759197324414715, "grad_norm": 1.4140216308260158, "learning_rate": 1.7221709338202558e-06, "loss": 0.4568, "step": 2824 }, { "epoch": 3.777257525083612, "grad_norm": 1.4276609745219748, "learning_rate": 1.718648392706695e-06, "loss": 0.4437, "step": 2825 }, { "epoch": 3.7785953177257525, "grad_norm": 1.2065888483222593, "learning_rate": 1.715128710001403e-06, "loss": 0.5946, "step": 2826 }, { "epoch": 3.779933110367893, "grad_norm": 1.2492631792265043, "learning_rate": 1.7116118887703997e-06, "loss": 0.5324, "step": 2827 }, { "epoch": 3.7812709030100335, "grad_norm": 1.5240599490418074, "learning_rate": 1.708097932077213e-06, "loss": 0.4473, "step": 2828 }, { "epoch": 3.782608695652174, "grad_norm": 1.0245288826957182, "learning_rate": 1.7045868429828745e-06, "loss": 0.5884, "step": 2829 }, { "epoch": 3.7839464882943146, "grad_norm": 1.176750300422999, "learning_rate": 1.7010786245459166e-06, "loss": 0.4649, "step": 2830 }, { "epoch": 3.785284280936455, "grad_norm": 1.2476419333698683, "learning_rate": 1.697573279822377e-06, "loss": 0.5118, "step": 2831 }, { "epoch": 3.786622073578595, "grad_norm": 1.0512848246100788, "learning_rate": 1.6940708118657838e-06, "loss": 0.4864, "step": 2832 }, { "epoch": 3.787959866220736, "grad_norm": 1.5406592228664642, "learning_rate": 1.6905712237271616e-06, "loss": 0.4587, "step": 2833 }, { "epoch": 3.789297658862876, "grad_norm": 1.413405603431978, "learning_rate": 1.6870745184550257e-06, "loss": 0.5209, "step": 2834 }, { "epoch": 3.7906354515050165, "grad_norm": 1.138886669819129, "learning_rate": 1.6835806990953802e-06, "loss": 0.509, "step": 2835 }, { "epoch": 3.791973244147157, "grad_norm": 0.908271206653858, "learning_rate": 1.680089768691716e-06, "loss": 0.4779, "step": 2836 }, { "epoch": 3.793311036789298, "grad_norm": 1.0521016375539298, "learning_rate": 1.6766017302850068e-06, "loss": 0.4938, "step": 2837 }, { "epoch": 3.794648829431438, "grad_norm": 0.9109512435173277, "learning_rate": 1.6731165869137073e-06, "loss": 0.4416, "step": 2838 }, { "epoch": 3.7959866220735785, "grad_norm": 1.4728365060159911, "learning_rate": 1.6696343416137495e-06, "loss": 0.4475, "step": 2839 }, { "epoch": 3.7973244147157192, "grad_norm": 1.2345625023635316, "learning_rate": 1.6661549974185426e-06, "loss": 0.4964, "step": 2840 }, { "epoch": 3.7986622073578595, "grad_norm": 0.9357722067752265, "learning_rate": 1.6626785573589667e-06, "loss": 0.5594, "step": 2841 }, { "epoch": 3.8, "grad_norm": 1.4760013836448023, "learning_rate": 1.6592050244633733e-06, "loss": 0.53, "step": 2842 }, { "epoch": 3.8013377926421406, "grad_norm": 1.288497198257437, "learning_rate": 1.6557344017575817e-06, "loss": 0.507, "step": 2843 }, { "epoch": 3.802675585284281, "grad_norm": 0.8107464410780175, "learning_rate": 1.6522666922648745e-06, "loss": 0.5719, "step": 2844 }, { "epoch": 3.804013377926421, "grad_norm": 0.9512263349794913, "learning_rate": 1.6488018990059985e-06, "loss": 0.4872, "step": 2845 }, { "epoch": 3.805351170568562, "grad_norm": 1.068295968664807, "learning_rate": 1.6453400249991587e-06, "loss": 0.514, "step": 2846 }, { "epoch": 3.8066889632107026, "grad_norm": 1.0369997464326408, "learning_rate": 1.6418810732600177e-06, "loss": 0.5094, "step": 2847 }, { "epoch": 3.808026755852843, "grad_norm": 1.2476667997074127, "learning_rate": 1.6384250468016932e-06, "loss": 0.5468, "step": 2848 }, { "epoch": 3.809364548494983, "grad_norm": 1.0670836597186626, "learning_rate": 1.6349719486347533e-06, "loss": 0.5523, "step": 2849 }, { "epoch": 3.810702341137124, "grad_norm": 0.8480796181790052, "learning_rate": 1.6315217817672142e-06, "loss": 0.4838, "step": 2850 }, { "epoch": 3.812040133779264, "grad_norm": 1.0278033899014458, "learning_rate": 1.6280745492045435e-06, "loss": 0.4883, "step": 2851 }, { "epoch": 3.8133779264214045, "grad_norm": 1.009216836318743, "learning_rate": 1.6246302539496483e-06, "loss": 0.5599, "step": 2852 }, { "epoch": 3.8147157190635452, "grad_norm": 0.9679529939222131, "learning_rate": 1.6211888990028785e-06, "loss": 0.5071, "step": 2853 }, { "epoch": 3.8160535117056855, "grad_norm": 1.003131147959612, "learning_rate": 1.617750487362022e-06, "loss": 0.4993, "step": 2854 }, { "epoch": 3.8173913043478263, "grad_norm": 0.7799824335634222, "learning_rate": 1.614315022022303e-06, "loss": 0.5136, "step": 2855 }, { "epoch": 3.8187290969899665, "grad_norm": 1.147014157063089, "learning_rate": 1.6108825059763794e-06, "loss": 0.4686, "step": 2856 }, { "epoch": 3.8200668896321073, "grad_norm": 0.9815557732778651, "learning_rate": 1.6074529422143398e-06, "loss": 0.5014, "step": 2857 }, { "epoch": 3.8214046822742476, "grad_norm": 1.108592428293899, "learning_rate": 1.6040263337237017e-06, "loss": 0.5009, "step": 2858 }, { "epoch": 3.822742474916388, "grad_norm": 1.222583435169044, "learning_rate": 1.6006026834894068e-06, "loss": 0.557, "step": 2859 }, { "epoch": 3.8240802675585286, "grad_norm": 0.7115082053163928, "learning_rate": 1.5971819944938194e-06, "loss": 0.4954, "step": 2860 }, { "epoch": 3.825418060200669, "grad_norm": 1.2824273827813206, "learning_rate": 1.5937642697167288e-06, "loss": 0.5507, "step": 2861 }, { "epoch": 3.826755852842809, "grad_norm": 1.069615007594308, "learning_rate": 1.5903495121353373e-06, "loss": 0.5319, "step": 2862 }, { "epoch": 3.82809364548495, "grad_norm": 1.0649656532586111, "learning_rate": 1.5869377247242645e-06, "loss": 0.5001, "step": 2863 }, { "epoch": 3.82943143812709, "grad_norm": 0.9843900424382506, "learning_rate": 1.5835289104555417e-06, "loss": 0.5672, "step": 2864 }, { "epoch": 3.830769230769231, "grad_norm": 0.9364499874003267, "learning_rate": 1.5801230722986104e-06, "loss": 0.506, "step": 2865 }, { "epoch": 3.832107023411371, "grad_norm": 1.2738024360775924, "learning_rate": 1.5767202132203207e-06, "loss": 0.6448, "step": 2866 }, { "epoch": 3.833444816053512, "grad_norm": 1.1443699532685905, "learning_rate": 1.5733203361849265e-06, "loss": 0.4767, "step": 2867 }, { "epoch": 3.8347826086956522, "grad_norm": 0.8723628051338509, "learning_rate": 1.5699234441540845e-06, "loss": 0.6658, "step": 2868 }, { "epoch": 3.8361204013377925, "grad_norm": 0.9947779518106578, "learning_rate": 1.5665295400868513e-06, "loss": 0.5491, "step": 2869 }, { "epoch": 3.8374581939799333, "grad_norm": 0.9039422073156427, "learning_rate": 1.5631386269396798e-06, "loss": 0.577, "step": 2870 }, { "epoch": 3.8387959866220736, "grad_norm": 0.800626974016713, "learning_rate": 1.5597507076664187e-06, "loss": 0.6195, "step": 2871 }, { "epoch": 3.840133779264214, "grad_norm": 0.7719911675662327, "learning_rate": 1.5563657852183072e-06, "loss": 0.5433, "step": 2872 }, { "epoch": 3.8414715719063546, "grad_norm": 0.7483106753831306, "learning_rate": 1.5529838625439763e-06, "loss": 0.5935, "step": 2873 }, { "epoch": 3.842809364548495, "grad_norm": 1.0497281494194306, "learning_rate": 1.549604942589441e-06, "loss": 0.4871, "step": 2874 }, { "epoch": 3.8441471571906356, "grad_norm": 1.0823163143841443, "learning_rate": 1.546229028298103e-06, "loss": 0.5683, "step": 2875 }, { "epoch": 3.845484949832776, "grad_norm": 0.9879834359166172, "learning_rate": 1.5428561226107442e-06, "loss": 0.5415, "step": 2876 }, { "epoch": 3.8468227424749166, "grad_norm": 1.4626535740127997, "learning_rate": 1.5394862284655266e-06, "loss": 0.4621, "step": 2877 }, { "epoch": 3.848160535117057, "grad_norm": 1.427684717454617, "learning_rate": 1.5361193487979881e-06, "loss": 0.5608, "step": 2878 }, { "epoch": 3.849498327759197, "grad_norm": 1.118021461176974, "learning_rate": 1.5327554865410415e-06, "loss": 0.5635, "step": 2879 }, { "epoch": 3.850836120401338, "grad_norm": 1.1385473456811828, "learning_rate": 1.5293946446249686e-06, "loss": 0.4324, "step": 2880 }, { "epoch": 3.8521739130434782, "grad_norm": 1.1395029604265214, "learning_rate": 1.526036825977426e-06, "loss": 0.4644, "step": 2881 }, { "epoch": 3.8535117056856185, "grad_norm": 1.4030875472182247, "learning_rate": 1.5226820335234316e-06, "loss": 0.5029, "step": 2882 }, { "epoch": 3.8548494983277592, "grad_norm": 1.3722537956517142, "learning_rate": 1.5193302701853674e-06, "loss": 0.5693, "step": 2883 }, { "epoch": 3.8561872909698995, "grad_norm": 1.2733490029784256, "learning_rate": 1.5159815388829784e-06, "loss": 0.5581, "step": 2884 }, { "epoch": 3.8575250836120403, "grad_norm": 1.1978080734014718, "learning_rate": 1.5126358425333677e-06, "loss": 0.5584, "step": 2885 }, { "epoch": 3.8588628762541806, "grad_norm": 1.279766200330289, "learning_rate": 1.509293184050995e-06, "loss": 0.6092, "step": 2886 }, { "epoch": 3.8602006688963213, "grad_norm": 1.1078376637454932, "learning_rate": 1.5059535663476731e-06, "loss": 0.5062, "step": 2887 }, { "epoch": 3.8615384615384616, "grad_norm": 1.4458937411848025, "learning_rate": 1.5026169923325668e-06, "loss": 0.5288, "step": 2888 }, { "epoch": 3.862876254180602, "grad_norm": 1.3924707564182521, "learning_rate": 1.499283464912188e-06, "loss": 0.5156, "step": 2889 }, { "epoch": 3.8642140468227426, "grad_norm": 0.9704299615563957, "learning_rate": 1.4959529869903948e-06, "loss": 0.4202, "step": 2890 }, { "epoch": 3.865551839464883, "grad_norm": 1.0815170364352924, "learning_rate": 1.4926255614683931e-06, "loss": 0.5536, "step": 2891 }, { "epoch": 3.866889632107023, "grad_norm": 1.036858232070385, "learning_rate": 1.4893011912447248e-06, "loss": 0.5399, "step": 2892 }, { "epoch": 3.868227424749164, "grad_norm": 1.0042277173961887, "learning_rate": 1.4859798792152713e-06, "loss": 0.541, "step": 2893 }, { "epoch": 3.869565217391304, "grad_norm": 1.1154398476095593, "learning_rate": 1.4826616282732509e-06, "loss": 0.5803, "step": 2894 }, { "epoch": 3.870903010033445, "grad_norm": 1.0034627205689937, "learning_rate": 1.4793464413092161e-06, "loss": 0.3557, "step": 2895 }, { "epoch": 3.8722408026755852, "grad_norm": 1.1925792508544344, "learning_rate": 1.4760343212110484e-06, "loss": 0.5102, "step": 2896 }, { "epoch": 3.873578595317726, "grad_norm": 1.6095972331812292, "learning_rate": 1.4727252708639589e-06, "loss": 0.525, "step": 2897 }, { "epoch": 3.8749163879598663, "grad_norm": 1.2414805706525023, "learning_rate": 1.4694192931504842e-06, "loss": 0.4299, "step": 2898 }, { "epoch": 3.8762541806020065, "grad_norm": 1.3577554885978997, "learning_rate": 1.4661163909504855e-06, "loss": 0.398, "step": 2899 }, { "epoch": 3.8775919732441473, "grad_norm": 1.196515984256643, "learning_rate": 1.4628165671411426e-06, "loss": 0.531, "step": 2900 }, { "epoch": 3.8789297658862876, "grad_norm": 1.2248442627558749, "learning_rate": 1.459519824596956e-06, "loss": 0.5414, "step": 2901 }, { "epoch": 3.880267558528428, "grad_norm": 1.0857181880583269, "learning_rate": 1.4562261661897415e-06, "loss": 0.4857, "step": 2902 }, { "epoch": 3.8816053511705686, "grad_norm": 0.947014431867944, "learning_rate": 1.4529355947886265e-06, "loss": 0.5054, "step": 2903 }, { "epoch": 3.882943143812709, "grad_norm": 1.3167441807551157, "learning_rate": 1.4496481132600516e-06, "loss": 0.4372, "step": 2904 }, { "epoch": 3.8842809364548496, "grad_norm": 1.0689555564772688, "learning_rate": 1.4463637244677648e-06, "loss": 0.538, "step": 2905 }, { "epoch": 3.88561872909699, "grad_norm": 0.9861566810687188, "learning_rate": 1.4430824312728197e-06, "loss": 0.4819, "step": 2906 }, { "epoch": 3.8869565217391306, "grad_norm": 1.1672958768849708, "learning_rate": 1.4398042365335745e-06, "loss": 0.5026, "step": 2907 }, { "epoch": 3.888294314381271, "grad_norm": 1.0619227340195545, "learning_rate": 1.4365291431056871e-06, "loss": 0.5952, "step": 2908 }, { "epoch": 3.8896321070234112, "grad_norm": 0.9010647271584393, "learning_rate": 1.4332571538421136e-06, "loss": 0.4731, "step": 2909 }, { "epoch": 3.890969899665552, "grad_norm": 1.1835720281766922, "learning_rate": 1.4299882715931062e-06, "loss": 0.4583, "step": 2910 }, { "epoch": 3.8923076923076922, "grad_norm": 1.4141236145247007, "learning_rate": 1.4267224992062134e-06, "loss": 0.412, "step": 2911 }, { "epoch": 3.8936454849498325, "grad_norm": 1.3348921163337462, "learning_rate": 1.4234598395262706e-06, "loss": 0.5635, "step": 2912 }, { "epoch": 3.8949832775919733, "grad_norm": 0.8803193220928044, "learning_rate": 1.4202002953954042e-06, "loss": 0.56, "step": 2913 }, { "epoch": 3.8963210702341136, "grad_norm": 1.1197636093129515, "learning_rate": 1.4169438696530246e-06, "loss": 0.6002, "step": 2914 }, { "epoch": 3.8976588628762543, "grad_norm": 0.8934463912125851, "learning_rate": 1.4136905651358284e-06, "loss": 0.5522, "step": 2915 }, { "epoch": 3.8989966555183946, "grad_norm": 1.0455034100767207, "learning_rate": 1.410440384677791e-06, "loss": 0.5031, "step": 2916 }, { "epoch": 3.9003344481605353, "grad_norm": 0.7348162565469404, "learning_rate": 1.4071933311101675e-06, "loss": 0.4278, "step": 2917 }, { "epoch": 3.9016722408026756, "grad_norm": 0.8705343334988522, "learning_rate": 1.4039494072614884e-06, "loss": 0.4367, "step": 2918 }, { "epoch": 3.903010033444816, "grad_norm": 0.8030445852512118, "learning_rate": 1.4007086159575595e-06, "loss": 0.5217, "step": 2919 }, { "epoch": 3.9043478260869566, "grad_norm": 0.9441471804650895, "learning_rate": 1.3974709600214541e-06, "loss": 0.5218, "step": 2920 }, { "epoch": 3.905685618729097, "grad_norm": 0.7460023971017935, "learning_rate": 1.3942364422735205e-06, "loss": 0.4559, "step": 2921 }, { "epoch": 3.907023411371237, "grad_norm": 0.834309555704509, "learning_rate": 1.3910050655313679e-06, "loss": 0.4883, "step": 2922 }, { "epoch": 3.908361204013378, "grad_norm": 0.9152473992573408, "learning_rate": 1.3877768326098712e-06, "loss": 0.5791, "step": 2923 }, { "epoch": 3.9096989966555182, "grad_norm": 0.8540895912789865, "learning_rate": 1.3845517463211667e-06, "loss": 0.5684, "step": 2924 }, { "epoch": 3.911036789297659, "grad_norm": 0.949487705439836, "learning_rate": 1.3813298094746491e-06, "loss": 0.5812, "step": 2925 }, { "epoch": 3.9123745819397993, "grad_norm": 0.9335457408634036, "learning_rate": 1.3781110248769709e-06, "loss": 0.4809, "step": 2926 }, { "epoch": 3.91371237458194, "grad_norm": 1.0645538862106634, "learning_rate": 1.374895395332037e-06, "loss": 0.4971, "step": 2927 }, { "epoch": 3.9150501672240803, "grad_norm": 1.1652208442947645, "learning_rate": 1.371682923641005e-06, "loss": 0.5436, "step": 2928 }, { "epoch": 3.9163879598662206, "grad_norm": 0.8473220726969214, "learning_rate": 1.3684736126022812e-06, "loss": 0.592, "step": 2929 }, { "epoch": 3.9177257525083613, "grad_norm": 1.2275523832673212, "learning_rate": 1.3652674650115193e-06, "loss": 0.6142, "step": 2930 }, { "epoch": 3.9190635451505016, "grad_norm": 1.3518943805940118, "learning_rate": 1.362064483661617e-06, "loss": 0.4861, "step": 2931 }, { "epoch": 3.920401337792642, "grad_norm": 1.2632860660571075, "learning_rate": 1.3588646713427128e-06, "loss": 0.5058, "step": 2932 }, { "epoch": 3.9217391304347826, "grad_norm": 1.1762919334708515, "learning_rate": 1.3556680308421865e-06, "loss": 0.5123, "step": 2933 }, { "epoch": 3.9230769230769234, "grad_norm": 1.1277205322322474, "learning_rate": 1.352474564944653e-06, "loss": 0.5169, "step": 2934 }, { "epoch": 3.9244147157190636, "grad_norm": 2.025006045995579, "learning_rate": 1.349284276431963e-06, "loss": 0.5369, "step": 2935 }, { "epoch": 3.925752508361204, "grad_norm": 0.9614924091495232, "learning_rate": 1.3460971680831996e-06, "loss": 0.5666, "step": 2936 }, { "epoch": 3.9270903010033447, "grad_norm": 1.0880781903077943, "learning_rate": 1.3429132426746743e-06, "loss": 0.5241, "step": 2937 }, { "epoch": 3.928428093645485, "grad_norm": 1.07781087735515, "learning_rate": 1.339732502979928e-06, "loss": 0.4634, "step": 2938 }, { "epoch": 3.9297658862876252, "grad_norm": 1.0762222524184322, "learning_rate": 1.3365549517697234e-06, "loss": 0.555, "step": 2939 }, { "epoch": 3.931103678929766, "grad_norm": 1.2539202468617592, "learning_rate": 1.3333805918120473e-06, "loss": 0.49, "step": 2940 }, { "epoch": 3.9324414715719063, "grad_norm": 1.024181833989656, "learning_rate": 1.33020942587211e-06, "loss": 0.4479, "step": 2941 }, { "epoch": 3.9337792642140466, "grad_norm": 1.4660061310551966, "learning_rate": 1.3270414567123342e-06, "loss": 0.4652, "step": 2942 }, { "epoch": 3.9351170568561873, "grad_norm": 1.1438379357668604, "learning_rate": 1.3238766870923592e-06, "loss": 0.5145, "step": 2943 }, { "epoch": 3.936454849498328, "grad_norm": 1.2046168797202659, "learning_rate": 1.3207151197690392e-06, "loss": 0.4673, "step": 2944 }, { "epoch": 3.9377926421404683, "grad_norm": 0.8919031365196534, "learning_rate": 1.3175567574964372e-06, "loss": 0.5174, "step": 2945 }, { "epoch": 3.9391304347826086, "grad_norm": 0.9229897230911328, "learning_rate": 1.3144016030258244e-06, "loss": 0.4863, "step": 2946 }, { "epoch": 3.9404682274247493, "grad_norm": 1.3462801605640877, "learning_rate": 1.3112496591056778e-06, "loss": 0.513, "step": 2947 }, { "epoch": 3.9418060200668896, "grad_norm": 1.1527755887109314, "learning_rate": 1.3081009284816776e-06, "loss": 0.5046, "step": 2948 }, { "epoch": 3.94314381270903, "grad_norm": 1.0635551096282063, "learning_rate": 1.3049554138967052e-06, "loss": 0.4076, "step": 2949 }, { "epoch": 3.9444816053511706, "grad_norm": 1.3281130711314482, "learning_rate": 1.301813118090839e-06, "loss": 0.4629, "step": 2950 }, { "epoch": 3.945819397993311, "grad_norm": 0.9573670995734417, "learning_rate": 1.2986740438013579e-06, "loss": 0.374, "step": 2951 }, { "epoch": 3.9471571906354512, "grad_norm": 0.9556276545920446, "learning_rate": 1.2955381937627293e-06, "loss": 0.5726, "step": 2952 }, { "epoch": 3.948494983277592, "grad_norm": 0.9683903072684524, "learning_rate": 1.2924055707066141e-06, "loss": 0.4746, "step": 2953 }, { "epoch": 3.9498327759197327, "grad_norm": 1.2765792410220775, "learning_rate": 1.2892761773618628e-06, "loss": 0.6309, "step": 2954 }, { "epoch": 3.951170568561873, "grad_norm": 1.2285929649218201, "learning_rate": 1.286150016454511e-06, "loss": 0.5188, "step": 2955 }, { "epoch": 3.9525083612040133, "grad_norm": 1.231380933719544, "learning_rate": 1.2830270907077797e-06, "loss": 0.5101, "step": 2956 }, { "epoch": 3.953846153846154, "grad_norm": 1.0413016221918954, "learning_rate": 1.279907402842071e-06, "loss": 0.447, "step": 2957 }, { "epoch": 3.9551839464882943, "grad_norm": 0.9870946469986497, "learning_rate": 1.2767909555749676e-06, "loss": 0.4438, "step": 2958 }, { "epoch": 3.9565217391304346, "grad_norm": 1.24804966173006, "learning_rate": 1.2736777516212267e-06, "loss": 0.4615, "step": 2959 }, { "epoch": 3.9578595317725753, "grad_norm": 0.9098410591363367, "learning_rate": 1.2705677936927841e-06, "loss": 0.5295, "step": 2960 }, { "epoch": 3.9591973244147156, "grad_norm": 1.2474627345604283, "learning_rate": 1.267461084498744e-06, "loss": 0.524, "step": 2961 }, { "epoch": 3.9605351170568563, "grad_norm": 1.17452828573256, "learning_rate": 1.2643576267453832e-06, "loss": 0.5418, "step": 2962 }, { "epoch": 3.9618729096989966, "grad_norm": 1.208058460681235, "learning_rate": 1.2612574231361463e-06, "loss": 0.4603, "step": 2963 }, { "epoch": 3.9632107023411374, "grad_norm": 1.4781233102921125, "learning_rate": 1.2581604763716404e-06, "loss": 0.4694, "step": 2964 }, { "epoch": 3.9645484949832777, "grad_norm": 1.1750649749963906, "learning_rate": 1.2550667891496394e-06, "loss": 0.5077, "step": 2965 }, { "epoch": 3.965886287625418, "grad_norm": 1.1472971262376233, "learning_rate": 1.2519763641650739e-06, "loss": 0.5292, "step": 2966 }, { "epoch": 3.9672240802675587, "grad_norm": 1.091846447737041, "learning_rate": 1.2488892041100364e-06, "loss": 0.4847, "step": 2967 }, { "epoch": 3.968561872909699, "grad_norm": 0.8888111990494002, "learning_rate": 1.2458053116737722e-06, "loss": 0.5488, "step": 2968 }, { "epoch": 3.9698996655518393, "grad_norm": 1.0796020783339275, "learning_rate": 1.2427246895426826e-06, "loss": 0.5716, "step": 2969 }, { "epoch": 3.97123745819398, "grad_norm": 0.8033198207154273, "learning_rate": 1.2396473404003162e-06, "loss": 0.529, "step": 2970 }, { "epoch": 3.9725752508361203, "grad_norm": 0.9821051096695236, "learning_rate": 1.2365732669273778e-06, "loss": 0.5206, "step": 2971 }, { "epoch": 3.973913043478261, "grad_norm": 1.3658461734101113, "learning_rate": 1.233502471801712e-06, "loss": 0.4263, "step": 2972 }, { "epoch": 3.9752508361204013, "grad_norm": 1.4094130984743534, "learning_rate": 1.2304349576983094e-06, "loss": 0.5028, "step": 2973 }, { "epoch": 3.976588628762542, "grad_norm": 1.1136001269560205, "learning_rate": 1.2273707272893038e-06, "loss": 0.5276, "step": 2974 }, { "epoch": 3.9779264214046823, "grad_norm": 1.2336978530085114, "learning_rate": 1.2243097832439672e-06, "loss": 0.5257, "step": 2975 }, { "epoch": 3.9792642140468226, "grad_norm": 1.5966230787586975, "learning_rate": 1.2212521282287093e-06, "loss": 0.5323, "step": 2976 }, { "epoch": 3.9806020066889634, "grad_norm": 1.1829977243783487, "learning_rate": 1.2181977649070749e-06, "loss": 0.5209, "step": 2977 }, { "epoch": 3.9819397993311036, "grad_norm": 1.1839968416246205, "learning_rate": 1.2151466959397406e-06, "loss": 0.6068, "step": 2978 }, { "epoch": 3.983277591973244, "grad_norm": 0.8034635134715501, "learning_rate": 1.2120989239845149e-06, "loss": 0.5272, "step": 2979 }, { "epoch": 3.9846153846153847, "grad_norm": 1.1637077313900999, "learning_rate": 1.209054451696331e-06, "loss": 0.4983, "step": 2980 }, { "epoch": 3.985953177257525, "grad_norm": 0.8584074187369783, "learning_rate": 1.206013281727253e-06, "loss": 0.5446, "step": 2981 }, { "epoch": 3.9872909698996657, "grad_norm": 0.8435729937788333, "learning_rate": 1.202975416726464e-06, "loss": 0.4471, "step": 2982 }, { "epoch": 3.988628762541806, "grad_norm": 0.897760104405312, "learning_rate": 1.1999408593402688e-06, "loss": 0.5282, "step": 2983 }, { "epoch": 3.9899665551839467, "grad_norm": 1.067933687805809, "learning_rate": 1.1969096122120927e-06, "loss": 0.5076, "step": 2984 }, { "epoch": 3.991304347826087, "grad_norm": 1.1448647125710358, "learning_rate": 1.1938816779824753e-06, "loss": 0.4645, "step": 2985 }, { "epoch": 3.9926421404682273, "grad_norm": 0.9893404400128809, "learning_rate": 1.190857059289071e-06, "loss": 0.5193, "step": 2986 }, { "epoch": 3.993979933110368, "grad_norm": 0.95936718698179, "learning_rate": 1.1878357587666468e-06, "loss": 0.4185, "step": 2987 }, { "epoch": 3.9953177257525083, "grad_norm": 1.3381046301861708, "learning_rate": 1.1848177790470784e-06, "loss": 0.5281, "step": 2988 }, { "epoch": 3.9966555183946486, "grad_norm": 1.2558826175679252, "learning_rate": 1.1818031227593491e-06, "loss": 0.5404, "step": 2989 }, { "epoch": 3.9979933110367893, "grad_norm": 1.1534161971710364, "learning_rate": 1.1787917925295467e-06, "loss": 0.4849, "step": 2990 }, { "epoch": 3.9993311036789296, "grad_norm": 1.2112677889018963, "learning_rate": 1.1757837909808628e-06, "loss": 0.476, "step": 2991 }, { "epoch": 4.0, "grad_norm": 2.078243963667432, "learning_rate": 1.1727791207335876e-06, "loss": 0.5048, "step": 2992 }, { "epoch": 4.001337792642141, "grad_norm": 1.3697549732705172, "learning_rate": 1.1697777844051105e-06, "loss": 0.4347, "step": 2993 }, { "epoch": 4.002675585284281, "grad_norm": 1.2513060256992716, "learning_rate": 1.1667797846099172e-06, "loss": 0.5886, "step": 2994 }, { "epoch": 4.004013377926421, "grad_norm": 1.3503146436639473, "learning_rate": 1.163785123959585e-06, "loss": 0.407, "step": 2995 }, { "epoch": 4.005351170568562, "grad_norm": 0.8201979125332866, "learning_rate": 1.1607938050627849e-06, "loss": 0.498, "step": 2996 }, { "epoch": 4.006688963210703, "grad_norm": 0.9871894037107743, "learning_rate": 1.157805830525275e-06, "loss": 0.5178, "step": 2997 }, { "epoch": 4.008026755852843, "grad_norm": 0.7472505977503369, "learning_rate": 1.1548212029499006e-06, "loss": 0.4833, "step": 2998 }, { "epoch": 4.009364548494983, "grad_norm": 0.8635772967373119, "learning_rate": 1.1518399249365924e-06, "loss": 0.5655, "step": 2999 }, { "epoch": 4.010702341137124, "grad_norm": 1.1291851729279987, "learning_rate": 1.1488619990823602e-06, "loss": 0.4692, "step": 3000 }, { "epoch": 4.012040133779264, "grad_norm": 0.825039183343269, "learning_rate": 1.1458874279812992e-06, "loss": 0.5094, "step": 3001 }, { "epoch": 4.013377926421405, "grad_norm": 1.3179251364837818, "learning_rate": 1.1429162142245775e-06, "loss": 0.4747, "step": 3002 }, { "epoch": 4.014715719063545, "grad_norm": 1.0602542015736942, "learning_rate": 1.1399483604004403e-06, "loss": 0.5782, "step": 3003 }, { "epoch": 4.016053511705685, "grad_norm": 1.130958141504054, "learning_rate": 1.1369838690942059e-06, "loss": 0.51, "step": 3004 }, { "epoch": 4.017391304347826, "grad_norm": 1.0359694482736432, "learning_rate": 1.1340227428882627e-06, "loss": 0.4347, "step": 3005 }, { "epoch": 4.018729096989967, "grad_norm": 0.827490745582384, "learning_rate": 1.1310649843620686e-06, "loss": 0.3361, "step": 3006 }, { "epoch": 4.0200668896321075, "grad_norm": 1.012503038834436, "learning_rate": 1.1281105960921484e-06, "loss": 0.5053, "step": 3007 }, { "epoch": 4.021404682274247, "grad_norm": 1.0591126606763575, "learning_rate": 1.1251595806520893e-06, "loss": 0.5525, "step": 3008 }, { "epoch": 4.022742474916388, "grad_norm": 1.3841852243379218, "learning_rate": 1.1222119406125426e-06, "loss": 0.3582, "step": 3009 }, { "epoch": 4.024080267558529, "grad_norm": 0.9488628119244944, "learning_rate": 1.1192676785412154e-06, "loss": 0.4476, "step": 3010 }, { "epoch": 4.025418060200669, "grad_norm": 1.24303312001541, "learning_rate": 1.1163267970028786e-06, "loss": 0.4393, "step": 3011 }, { "epoch": 4.026755852842809, "grad_norm": 0.9762735154639965, "learning_rate": 1.1133892985593532e-06, "loss": 0.4332, "step": 3012 }, { "epoch": 4.02809364548495, "grad_norm": 1.1167956000574266, "learning_rate": 1.1104551857695133e-06, "loss": 0.4365, "step": 3013 }, { "epoch": 4.02943143812709, "grad_norm": 0.9728360105178839, "learning_rate": 1.1075244611892872e-06, "loss": 0.4916, "step": 3014 }, { "epoch": 4.030769230769231, "grad_norm": 0.9198856238633072, "learning_rate": 1.1045971273716476e-06, "loss": 0.439, "step": 3015 }, { "epoch": 4.032107023411371, "grad_norm": 1.1360539749651555, "learning_rate": 1.1016731868666169e-06, "loss": 0.5205, "step": 3016 }, { "epoch": 4.033444816053512, "grad_norm": 1.4894157937139842, "learning_rate": 1.0987526422212585e-06, "loss": 0.4031, "step": 3017 }, { "epoch": 4.034782608695652, "grad_norm": 0.9935822396812682, "learning_rate": 1.0958354959796807e-06, "loss": 0.5126, "step": 3018 }, { "epoch": 4.036120401337793, "grad_norm": 1.0612014677979393, "learning_rate": 1.0929217506830292e-06, "loss": 0.4962, "step": 3019 }, { "epoch": 4.037458193979933, "grad_norm": 1.2902509342670203, "learning_rate": 1.0900114088694874e-06, "loss": 0.4346, "step": 3020 }, { "epoch": 4.038795986622073, "grad_norm": 1.2684793447025438, "learning_rate": 1.0871044730742752e-06, "loss": 0.3928, "step": 3021 }, { "epoch": 4.040133779264214, "grad_norm": 0.902685388255474, "learning_rate": 1.084200945829645e-06, "loss": 0.5391, "step": 3022 }, { "epoch": 4.041471571906355, "grad_norm": 1.2667629626849035, "learning_rate": 1.081300829664878e-06, "loss": 0.4161, "step": 3023 }, { "epoch": 4.042809364548495, "grad_norm": 1.0976033510461283, "learning_rate": 1.0784041271062867e-06, "loss": 0.471, "step": 3024 }, { "epoch": 4.044147157190635, "grad_norm": 0.854971541490454, "learning_rate": 1.075510840677209e-06, "loss": 0.4548, "step": 3025 }, { "epoch": 4.045484949832776, "grad_norm": 1.114872894236533, "learning_rate": 1.072620972898007e-06, "loss": 0.5665, "step": 3026 }, { "epoch": 4.046822742474917, "grad_norm": 0.804175232760974, "learning_rate": 1.0697345262860638e-06, "loss": 0.5198, "step": 3027 }, { "epoch": 4.048160535117057, "grad_norm": 1.2849369599087466, "learning_rate": 1.0668515033557835e-06, "loss": 0.4315, "step": 3028 }, { "epoch": 4.049498327759197, "grad_norm": 0.895486338446638, "learning_rate": 1.0639719066185867e-06, "loss": 0.5185, "step": 3029 }, { "epoch": 4.050836120401338, "grad_norm": 0.8715200666486368, "learning_rate": 1.061095738582913e-06, "loss": 0.508, "step": 3030 }, { "epoch": 4.052173913043478, "grad_norm": 1.2022089921872878, "learning_rate": 1.05822300175421e-06, "loss": 0.4785, "step": 3031 }, { "epoch": 4.053511705685619, "grad_norm": 1.4025399622662853, "learning_rate": 1.0553536986349393e-06, "loss": 0.5114, "step": 3032 }, { "epoch": 4.054849498327759, "grad_norm": 1.2901714227384646, "learning_rate": 1.0524878317245713e-06, "loss": 0.4995, "step": 3033 }, { "epoch": 4.056187290969899, "grad_norm": 1.1473405354236086, "learning_rate": 1.0496254035195819e-06, "loss": 0.4906, "step": 3034 }, { "epoch": 4.05752508361204, "grad_norm": 0.95530681755914, "learning_rate": 1.0467664165134534e-06, "loss": 0.5209, "step": 3035 }, { "epoch": 4.058862876254181, "grad_norm": 1.1241922633632018, "learning_rate": 1.043910873196668e-06, "loss": 0.4571, "step": 3036 }, { "epoch": 4.0602006688963215, "grad_norm": 1.2887665878170234, "learning_rate": 1.0410587760567104e-06, "loss": 0.4468, "step": 3037 }, { "epoch": 4.061538461538461, "grad_norm": 0.8650544628466936, "learning_rate": 1.0382101275780615e-06, "loss": 0.4318, "step": 3038 }, { "epoch": 4.062876254180602, "grad_norm": 1.2442616114768392, "learning_rate": 1.0353649302421982e-06, "loss": 0.4973, "step": 3039 }, { "epoch": 4.064214046822743, "grad_norm": 0.9155394396810448, "learning_rate": 1.0325231865275936e-06, "loss": 0.5225, "step": 3040 }, { "epoch": 4.065551839464883, "grad_norm": 1.3707351422989955, "learning_rate": 1.0296848989097103e-06, "loss": 0.3961, "step": 3041 }, { "epoch": 4.066889632107023, "grad_norm": 1.181096544541009, "learning_rate": 1.0268500698609996e-06, "loss": 0.4517, "step": 3042 }, { "epoch": 4.068227424749164, "grad_norm": 0.8978160085888359, "learning_rate": 1.0240187018509012e-06, "loss": 0.4514, "step": 3043 }, { "epoch": 4.069565217391304, "grad_norm": 1.0906902959909268, "learning_rate": 1.0211907973458391e-06, "loss": 0.4824, "step": 3044 }, { "epoch": 4.070903010033445, "grad_norm": 0.8299403554285416, "learning_rate": 1.0183663588092214e-06, "loss": 0.444, "step": 3045 }, { "epoch": 4.072240802675585, "grad_norm": 1.1035314401998586, "learning_rate": 1.015545388701435e-06, "loss": 0.5891, "step": 3046 }, { "epoch": 4.073578595317726, "grad_norm": 0.7787447885122997, "learning_rate": 1.012727889479848e-06, "loss": 0.4499, "step": 3047 }, { "epoch": 4.074916387959866, "grad_norm": 0.9257508138359074, "learning_rate": 1.0099138635988026e-06, "loss": 0.484, "step": 3048 }, { "epoch": 4.076254180602007, "grad_norm": 1.0610124735748436, "learning_rate": 1.007103313509617e-06, "loss": 0.4625, "step": 3049 }, { "epoch": 4.0775919732441475, "grad_norm": 1.392653624565195, "learning_rate": 1.0042962416605805e-06, "loss": 0.5287, "step": 3050 }, { "epoch": 4.078929765886287, "grad_norm": 0.8530007900141767, "learning_rate": 1.0014926504969535e-06, "loss": 0.5016, "step": 3051 }, { "epoch": 4.080267558528428, "grad_norm": 1.0493221577007992, "learning_rate": 9.986925424609633e-07, "loss": 0.4856, "step": 3052 }, { "epoch": 4.081605351170569, "grad_norm": 0.9700549162172354, "learning_rate": 9.95895919991804e-07, "loss": 0.5699, "step": 3053 }, { "epoch": 4.082943143812709, "grad_norm": 0.9101893068948885, "learning_rate": 9.93102785525632e-07, "loss": 0.5418, "step": 3054 }, { "epoch": 4.084280936454849, "grad_norm": 1.0944431927435652, "learning_rate": 9.903131414955674e-07, "loss": 0.4144, "step": 3055 }, { "epoch": 4.08561872909699, "grad_norm": 0.9415690293319794, "learning_rate": 9.87526990331688e-07, "loss": 0.5347, "step": 3056 }, { "epoch": 4.086956521739131, "grad_norm": 1.132979142633828, "learning_rate": 9.847443344610296e-07, "loss": 0.4514, "step": 3057 }, { "epoch": 4.088294314381271, "grad_norm": 1.0778101135104898, "learning_rate": 9.819651763075833e-07, "loss": 0.4307, "step": 3058 }, { "epoch": 4.089632107023411, "grad_norm": 0.9806882496516924, "learning_rate": 9.791895182922911e-07, "loss": 0.4883, "step": 3059 }, { "epoch": 4.090969899665552, "grad_norm": 1.3570961195470788, "learning_rate": 9.764173628330514e-07, "loss": 0.4233, "step": 3060 }, { "epoch": 4.092307692307692, "grad_norm": 1.1499700489252427, "learning_rate": 9.73648712344707e-07, "loss": 0.5265, "step": 3061 }, { "epoch": 4.093645484949833, "grad_norm": 1.0208995910783192, "learning_rate": 9.708835692390483e-07, "loss": 0.4768, "step": 3062 }, { "epoch": 4.0949832775919734, "grad_norm": 1.237420806345756, "learning_rate": 9.681219359248106e-07, "loss": 0.452, "step": 3063 }, { "epoch": 4.096321070234113, "grad_norm": 1.154109325908116, "learning_rate": 9.65363814807672e-07, "loss": 0.4345, "step": 3064 }, { "epoch": 4.097658862876254, "grad_norm": 1.1341108633330539, "learning_rate": 9.626092082902511e-07, "loss": 0.4738, "step": 3065 }, { "epoch": 4.098996655518395, "grad_norm": 1.6428978211019252, "learning_rate": 9.59858118772105e-07, "loss": 0.4035, "step": 3066 }, { "epoch": 4.1003344481605355, "grad_norm": 1.2420233754202705, "learning_rate": 9.571105486497268e-07, "loss": 0.5496, "step": 3067 }, { "epoch": 4.101672240802675, "grad_norm": 1.3254243590378376, "learning_rate": 9.543665003165442e-07, "loss": 0.454, "step": 3068 }, { "epoch": 4.103010033444816, "grad_norm": 1.462743001277944, "learning_rate": 9.516259761629148e-07, "loss": 0.5476, "step": 3069 }, { "epoch": 4.104347826086957, "grad_norm": 1.0390758779067584, "learning_rate": 9.488889785761324e-07, "loss": 0.4125, "step": 3070 }, { "epoch": 4.105685618729097, "grad_norm": 0.9994260877771252, "learning_rate": 9.461555099404119e-07, "loss": 0.3571, "step": 3071 }, { "epoch": 4.107023411371237, "grad_norm": 1.0071065051167483, "learning_rate": 9.434255726368974e-07, "loss": 0.4377, "step": 3072 }, { "epoch": 4.108361204013378, "grad_norm": 1.4663619558797754, "learning_rate": 9.406991690436567e-07, "loss": 0.4087, "step": 3073 }, { "epoch": 4.109698996655518, "grad_norm": 1.3911029170453824, "learning_rate": 9.379763015356785e-07, "loss": 0.3874, "step": 3074 }, { "epoch": 4.111036789297659, "grad_norm": 1.0220437645443745, "learning_rate": 9.352569724848715e-07, "loss": 0.4928, "step": 3075 }, { "epoch": 4.112374581939799, "grad_norm": 1.337089142104088, "learning_rate": 9.325411842600629e-07, "loss": 0.407, "step": 3076 }, { "epoch": 4.11371237458194, "grad_norm": 0.8028582401702052, "learning_rate": 9.298289392269944e-07, "loss": 0.5221, "step": 3077 }, { "epoch": 4.11505016722408, "grad_norm": 0.9877202190141725, "learning_rate": 9.271202397483214e-07, "loss": 0.4376, "step": 3078 }, { "epoch": 4.116387959866221, "grad_norm": 0.9583276123696195, "learning_rate": 9.244150881836117e-07, "loss": 0.4688, "step": 3079 }, { "epoch": 4.1177257525083615, "grad_norm": 1.0128132943788963, "learning_rate": 9.217134868893401e-07, "loss": 0.4679, "step": 3080 }, { "epoch": 4.119063545150501, "grad_norm": 0.9278901743267607, "learning_rate": 9.190154382188921e-07, "loss": 0.5645, "step": 3081 }, { "epoch": 4.120401337792642, "grad_norm": 1.518819269418991, "learning_rate": 9.163209445225557e-07, "loss": 0.3985, "step": 3082 }, { "epoch": 4.121739130434783, "grad_norm": 0.7998406809812477, "learning_rate": 9.13630008147523e-07, "loss": 0.502, "step": 3083 }, { "epoch": 4.123076923076923, "grad_norm": 1.0731779303615951, "learning_rate": 9.109426314378878e-07, "loss": 0.4673, "step": 3084 }, { "epoch": 4.124414715719063, "grad_norm": 1.4421625702641023, "learning_rate": 9.082588167346428e-07, "loss": 0.4345, "step": 3085 }, { "epoch": 4.125752508361204, "grad_norm": 1.1303320162738462, "learning_rate": 9.055785663756778e-07, "loss": 0.4653, "step": 3086 }, { "epoch": 4.127090301003345, "grad_norm": 0.7884669527662823, "learning_rate": 9.029018826957775e-07, "loss": 0.5098, "step": 3087 }, { "epoch": 4.128428093645485, "grad_norm": 1.1668181026636673, "learning_rate": 9.002287680266192e-07, "loss": 0.434, "step": 3088 }, { "epoch": 4.129765886287625, "grad_norm": 0.8046854614006589, "learning_rate": 8.975592246967713e-07, "loss": 0.424, "step": 3089 }, { "epoch": 4.131103678929766, "grad_norm": 0.8704172120300789, "learning_rate": 8.948932550316935e-07, "loss": 0.4283, "step": 3090 }, { "epoch": 4.132441471571906, "grad_norm": 1.2388164141362807, "learning_rate": 8.922308613537295e-07, "loss": 0.4057, "step": 3091 }, { "epoch": 4.133779264214047, "grad_norm": 1.1181216514643986, "learning_rate": 8.895720459821089e-07, "loss": 0.3946, "step": 3092 }, { "epoch": 4.1351170568561875, "grad_norm": 1.2093056456972409, "learning_rate": 8.86916811232944e-07, "loss": 0.3848, "step": 3093 }, { "epoch": 4.136454849498328, "grad_norm": 0.8138379826361167, "learning_rate": 8.842651594192292e-07, "loss": 0.4786, "step": 3094 }, { "epoch": 4.137792642140468, "grad_norm": 1.293370120091419, "learning_rate": 8.816170928508367e-07, "loss": 0.3253, "step": 3095 }, { "epoch": 4.139130434782609, "grad_norm": 1.1774084311216904, "learning_rate": 8.78972613834515e-07, "loss": 0.4854, "step": 3096 }, { "epoch": 4.1404682274247495, "grad_norm": 0.8995739915494196, "learning_rate": 8.763317246738889e-07, "loss": 0.4968, "step": 3097 }, { "epoch": 4.141806020066889, "grad_norm": 1.2459554627653595, "learning_rate": 8.736944276694548e-07, "loss": 0.3797, "step": 3098 }, { "epoch": 4.14314381270903, "grad_norm": 1.0266493711278928, "learning_rate": 8.710607251185799e-07, "loss": 0.495, "step": 3099 }, { "epoch": 4.144481605351171, "grad_norm": 1.2834069376182036, "learning_rate": 8.684306193155034e-07, "loss": 0.543, "step": 3100 }, { "epoch": 4.145819397993311, "grad_norm": 0.9622982504022566, "learning_rate": 8.658041125513267e-07, "loss": 0.4807, "step": 3101 }, { "epoch": 4.147157190635451, "grad_norm": 0.9592511347395818, "learning_rate": 8.631812071140189e-07, "loss": 0.4607, "step": 3102 }, { "epoch": 4.148494983277592, "grad_norm": 0.8238672543729427, "learning_rate": 8.605619052884106e-07, "loss": 0.6361, "step": 3103 }, { "epoch": 4.149832775919732, "grad_norm": 1.4857577403457642, "learning_rate": 8.579462093561947e-07, "loss": 0.5455, "step": 3104 }, { "epoch": 4.151170568561873, "grad_norm": 1.012012348372533, "learning_rate": 8.553341215959215e-07, "loss": 0.4356, "step": 3105 }, { "epoch": 4.1525083612040135, "grad_norm": 1.1786411849624836, "learning_rate": 8.527256442829995e-07, "loss": 0.4739, "step": 3106 }, { "epoch": 4.153846153846154, "grad_norm": 1.0221999486707551, "learning_rate": 8.50120779689691e-07, "loss": 0.5083, "step": 3107 }, { "epoch": 4.155183946488294, "grad_norm": 0.8355231034857724, "learning_rate": 8.475195300851113e-07, "loss": 0.4781, "step": 3108 }, { "epoch": 4.156521739130435, "grad_norm": 1.3397252876797998, "learning_rate": 8.449218977352281e-07, "loss": 0.3762, "step": 3109 }, { "epoch": 4.1578595317725755, "grad_norm": 1.250275089496493, "learning_rate": 8.423278849028565e-07, "loss": 0.4314, "step": 3110 }, { "epoch": 4.159197324414715, "grad_norm": 1.2620587014891522, "learning_rate": 8.397374938476594e-07, "loss": 0.4693, "step": 3111 }, { "epoch": 4.160535117056856, "grad_norm": 1.0838043377632864, "learning_rate": 8.371507268261436e-07, "loss": 0.4224, "step": 3112 }, { "epoch": 4.161872909698997, "grad_norm": 1.1283337702204839, "learning_rate": 8.345675860916613e-07, "loss": 0.4993, "step": 3113 }, { "epoch": 4.1632107023411375, "grad_norm": 0.8077608620506175, "learning_rate": 8.31988073894403e-07, "loss": 0.5557, "step": 3114 }, { "epoch": 4.164548494983277, "grad_norm": 1.0506765936757276, "learning_rate": 8.294121924814014e-07, "loss": 0.359, "step": 3115 }, { "epoch": 4.165886287625418, "grad_norm": 0.9086717780505424, "learning_rate": 8.26839944096523e-07, "loss": 0.4015, "step": 3116 }, { "epoch": 4.167224080267559, "grad_norm": 0.9156184600253692, "learning_rate": 8.242713309804729e-07, "loss": 0.5114, "step": 3117 }, { "epoch": 4.168561872909699, "grad_norm": 0.9448987292643327, "learning_rate": 8.217063553707865e-07, "loss": 0.5556, "step": 3118 }, { "epoch": 4.169899665551839, "grad_norm": 0.950041624170516, "learning_rate": 8.191450195018313e-07, "loss": 0.509, "step": 3119 }, { "epoch": 4.17123745819398, "grad_norm": 1.4434113660705137, "learning_rate": 8.165873256048079e-07, "loss": 0.4954, "step": 3120 }, { "epoch": 4.17257525083612, "grad_norm": 1.0714283516365721, "learning_rate": 8.140332759077397e-07, "loss": 0.4398, "step": 3121 }, { "epoch": 4.173913043478261, "grad_norm": 0.9217798671550184, "learning_rate": 8.114828726354762e-07, "loss": 0.5752, "step": 3122 }, { "epoch": 4.1752508361204015, "grad_norm": 0.9049243516106964, "learning_rate": 8.089361180096927e-07, "loss": 0.4836, "step": 3123 }, { "epoch": 4.176588628762542, "grad_norm": 0.8536762614900795, "learning_rate": 8.063930142488846e-07, "loss": 0.5238, "step": 3124 }, { "epoch": 4.177926421404682, "grad_norm": 0.8056687072364774, "learning_rate": 8.03853563568367e-07, "loss": 0.5584, "step": 3125 }, { "epoch": 4.179264214046823, "grad_norm": 0.9440957076412794, "learning_rate": 8.013177681802736e-07, "loss": 0.5093, "step": 3126 }, { "epoch": 4.1806020066889635, "grad_norm": 1.1328267387427677, "learning_rate": 7.987856302935532e-07, "loss": 0.4458, "step": 3127 }, { "epoch": 4.181939799331103, "grad_norm": 1.376799411850255, "learning_rate": 7.962571521139684e-07, "loss": 0.3852, "step": 3128 }, { "epoch": 4.183277591973244, "grad_norm": 0.9695570466095437, "learning_rate": 7.937323358440935e-07, "loss": 0.5101, "step": 3129 }, { "epoch": 4.184615384615385, "grad_norm": 1.188179953342662, "learning_rate": 7.912111836833158e-07, "loss": 0.4451, "step": 3130 }, { "epoch": 4.185953177257525, "grad_norm": 1.3246081375463237, "learning_rate": 7.886936978278276e-07, "loss": 0.4334, "step": 3131 }, { "epoch": 4.187290969899665, "grad_norm": 1.6184170545291612, "learning_rate": 7.861798804706278e-07, "loss": 0.5012, "step": 3132 }, { "epoch": 4.188628762541806, "grad_norm": 1.0702923659194061, "learning_rate": 7.836697338015203e-07, "loss": 0.4733, "step": 3133 }, { "epoch": 4.189966555183947, "grad_norm": 0.911067963292655, "learning_rate": 7.811632600071117e-07, "loss": 0.5589, "step": 3134 }, { "epoch": 4.191304347826087, "grad_norm": 0.983764813910833, "learning_rate": 7.786604612708093e-07, "loss": 0.5233, "step": 3135 }, { "epoch": 4.1926421404682275, "grad_norm": 1.487222638831298, "learning_rate": 7.761613397728174e-07, "loss": 0.3808, "step": 3136 }, { "epoch": 4.193979933110368, "grad_norm": 0.9446219912415125, "learning_rate": 7.73665897690139e-07, "loss": 0.49, "step": 3137 }, { "epoch": 4.195317725752508, "grad_norm": 0.8662738765006587, "learning_rate": 7.711741371965703e-07, "loss": 0.5102, "step": 3138 }, { "epoch": 4.196655518394649, "grad_norm": 1.142956718223513, "learning_rate": 7.686860604627022e-07, "loss": 0.4031, "step": 3139 }, { "epoch": 4.1979933110367895, "grad_norm": 1.2740104113626058, "learning_rate": 7.662016696559149e-07, "loss": 0.4374, "step": 3140 }, { "epoch": 4.199331103678929, "grad_norm": 1.252610394918203, "learning_rate": 7.637209669403789e-07, "loss": 0.4848, "step": 3141 }, { "epoch": 4.20066889632107, "grad_norm": 1.7950258105874612, "learning_rate": 7.612439544770517e-07, "loss": 0.364, "step": 3142 }, { "epoch": 4.202006688963211, "grad_norm": 1.4897246165654723, "learning_rate": 7.587706344236762e-07, "loss": 0.5109, "step": 3143 }, { "epoch": 4.203344481605352, "grad_norm": 1.0418508518325853, "learning_rate": 7.563010089347789e-07, "loss": 0.4573, "step": 3144 }, { "epoch": 4.204682274247491, "grad_norm": 1.237931088117047, "learning_rate": 7.538350801616673e-07, "loss": 0.5257, "step": 3145 }, { "epoch": 4.206020066889632, "grad_norm": 0.9182130111171042, "learning_rate": 7.513728502524286e-07, "loss": 0.4887, "step": 3146 }, { "epoch": 4.207357859531773, "grad_norm": 0.8520318136818658, "learning_rate": 7.489143213519301e-07, "loss": 0.5162, "step": 3147 }, { "epoch": 4.208695652173913, "grad_norm": 0.965150294995534, "learning_rate": 7.464594956018124e-07, "loss": 0.4323, "step": 3148 }, { "epoch": 4.2100334448160535, "grad_norm": 0.8816852031286756, "learning_rate": 7.440083751404902e-07, "loss": 0.4729, "step": 3149 }, { "epoch": 4.211371237458194, "grad_norm": 0.8751695158874055, "learning_rate": 7.415609621031539e-07, "loss": 0.478, "step": 3150 }, { "epoch": 4.212709030100334, "grad_norm": 1.2576093027761093, "learning_rate": 7.39117258621761e-07, "loss": 0.4657, "step": 3151 }, { "epoch": 4.214046822742475, "grad_norm": 1.4464131574598047, "learning_rate": 7.366772668250394e-07, "loss": 0.4591, "step": 3152 }, { "epoch": 4.2153846153846155, "grad_norm": 0.8942061198990298, "learning_rate": 7.342409888384816e-07, "loss": 0.551, "step": 3153 }, { "epoch": 4.216722408026756, "grad_norm": 0.8635310602519851, "learning_rate": 7.318084267843473e-07, "loss": 0.5121, "step": 3154 }, { "epoch": 4.218060200668896, "grad_norm": 1.6728536072209703, "learning_rate": 7.29379582781658e-07, "loss": 0.4164, "step": 3155 }, { "epoch": 4.219397993311037, "grad_norm": 0.859967176058069, "learning_rate": 7.269544589461968e-07, "loss": 0.5177, "step": 3156 }, { "epoch": 4.2207357859531776, "grad_norm": 0.9805734188817121, "learning_rate": 7.245330573905058e-07, "loss": 0.4978, "step": 3157 }, { "epoch": 4.222073578595317, "grad_norm": 1.7158822304068608, "learning_rate": 7.221153802238845e-07, "loss": 0.4568, "step": 3158 }, { "epoch": 4.223411371237458, "grad_norm": 1.145354243375727, "learning_rate": 7.197014295523879e-07, "loss": 0.5289, "step": 3159 }, { "epoch": 4.224749163879599, "grad_norm": 0.9914763127815794, "learning_rate": 7.172912074788274e-07, "loss": 0.4746, "step": 3160 }, { "epoch": 4.226086956521739, "grad_norm": 0.8012730608285832, "learning_rate": 7.148847161027622e-07, "loss": 0.5163, "step": 3161 }, { "epoch": 4.2274247491638794, "grad_norm": 1.1697322776227443, "learning_rate": 7.12481957520505e-07, "loss": 0.4646, "step": 3162 }, { "epoch": 4.22876254180602, "grad_norm": 1.1568447647105415, "learning_rate": 7.100829338251147e-07, "loss": 0.4334, "step": 3163 }, { "epoch": 4.230100334448161, "grad_norm": 1.628176738798179, "learning_rate": 7.076876471063976e-07, "loss": 0.3841, "step": 3164 }, { "epoch": 4.231438127090301, "grad_norm": 1.1090091662925292, "learning_rate": 7.052960994509056e-07, "loss": 0.4708, "step": 3165 }, { "epoch": 4.2327759197324415, "grad_norm": 1.2216429596122844, "learning_rate": 7.029082929419312e-07, "loss": 0.4478, "step": 3166 }, { "epoch": 4.234113712374582, "grad_norm": 0.9512289020122877, "learning_rate": 7.005242296595099e-07, "loss": 0.3941, "step": 3167 }, { "epoch": 4.235451505016722, "grad_norm": 1.2184760385667233, "learning_rate": 6.981439116804161e-07, "loss": 0.4367, "step": 3168 }, { "epoch": 4.236789297658863, "grad_norm": 1.0622140897803773, "learning_rate": 6.957673410781617e-07, "loss": 0.5188, "step": 3169 }, { "epoch": 4.2381270903010035, "grad_norm": 1.0884329935847066, "learning_rate": 6.93394519922993e-07, "loss": 0.4337, "step": 3170 }, { "epoch": 4.239464882943143, "grad_norm": 0.7780274815014081, "learning_rate": 6.910254502818914e-07, "loss": 0.5019, "step": 3171 }, { "epoch": 4.240802675585284, "grad_norm": 1.158415882223768, "learning_rate": 6.886601342185701e-07, "loss": 0.4816, "step": 3172 }, { "epoch": 4.242140468227425, "grad_norm": 0.966620836992532, "learning_rate": 6.862985737934724e-07, "loss": 0.5217, "step": 3173 }, { "epoch": 4.243478260869566, "grad_norm": 1.164214804032502, "learning_rate": 6.839407710637696e-07, "loss": 0.4959, "step": 3174 }, { "epoch": 4.244816053511705, "grad_norm": 0.9991361809056873, "learning_rate": 6.815867280833611e-07, "loss": 0.5469, "step": 3175 }, { "epoch": 4.246153846153846, "grad_norm": 1.2621783579173382, "learning_rate": 6.792364469028695e-07, "loss": 0.4746, "step": 3176 }, { "epoch": 4.247491638795987, "grad_norm": 1.158610087504897, "learning_rate": 6.768899295696413e-07, "loss": 0.4645, "step": 3177 }, { "epoch": 4.248829431438127, "grad_norm": 1.3278179527568879, "learning_rate": 6.745471781277435e-07, "loss": 0.4813, "step": 3178 }, { "epoch": 4.2501672240802675, "grad_norm": 1.0934333657064705, "learning_rate": 6.722081946179631e-07, "loss": 0.4593, "step": 3179 }, { "epoch": 4.251505016722408, "grad_norm": 0.9262404948014443, "learning_rate": 6.698729810778065e-07, "loss": 0.4222, "step": 3180 }, { "epoch": 4.252842809364548, "grad_norm": 1.0716875269471047, "learning_rate": 6.675415395414942e-07, "loss": 0.5128, "step": 3181 }, { "epoch": 4.254180602006689, "grad_norm": 1.0205455187124168, "learning_rate": 6.652138720399598e-07, "loss": 0.3904, "step": 3182 }, { "epoch": 4.2555183946488295, "grad_norm": 0.8634944956410509, "learning_rate": 6.628899806008515e-07, "loss": 0.4684, "step": 3183 }, { "epoch": 4.25685618729097, "grad_norm": 1.0648782360155975, "learning_rate": 6.605698672485278e-07, "loss": 0.5374, "step": 3184 }, { "epoch": 4.25819397993311, "grad_norm": 0.9071216668017661, "learning_rate": 6.582535340040547e-07, "loss": 0.4921, "step": 3185 }, { "epoch": 4.259531772575251, "grad_norm": 0.9242911992359197, "learning_rate": 6.55940982885207e-07, "loss": 0.5214, "step": 3186 }, { "epoch": 4.260869565217392, "grad_norm": 1.111686327948174, "learning_rate": 6.536322159064634e-07, "loss": 0.5347, "step": 3187 }, { "epoch": 4.262207357859531, "grad_norm": 1.1859843607399865, "learning_rate": 6.513272350790079e-07, "loss": 0.4653, "step": 3188 }, { "epoch": 4.263545150501672, "grad_norm": 0.943779171854495, "learning_rate": 6.490260424107231e-07, "loss": 0.5, "step": 3189 }, { "epoch": 4.264882943143813, "grad_norm": 1.3642528708936033, "learning_rate": 6.467286399061967e-07, "loss": 0.375, "step": 3190 }, { "epoch": 4.266220735785954, "grad_norm": 0.7990322825027136, "learning_rate": 6.444350295667112e-07, "loss": 0.513, "step": 3191 }, { "epoch": 4.2675585284280935, "grad_norm": 0.7807952163399474, "learning_rate": 6.421452133902467e-07, "loss": 0.5166, "step": 3192 }, { "epoch": 4.268896321070234, "grad_norm": 0.9371136375588349, "learning_rate": 6.398591933714771e-07, "loss": 0.4386, "step": 3193 }, { "epoch": 4.270234113712375, "grad_norm": 1.139899256289576, "learning_rate": 6.375769715017716e-07, "loss": 0.4606, "step": 3194 }, { "epoch": 4.271571906354515, "grad_norm": 0.9107733394271479, "learning_rate": 6.352985497691883e-07, "loss": 0.545, "step": 3195 }, { "epoch": 4.2729096989966555, "grad_norm": 1.1190151718289207, "learning_rate": 6.330239301584773e-07, "loss": 0.4429, "step": 3196 }, { "epoch": 4.274247491638796, "grad_norm": 1.3678446649319347, "learning_rate": 6.307531146510754e-07, "loss": 0.4321, "step": 3197 }, { "epoch": 4.275585284280936, "grad_norm": 0.9330636442814773, "learning_rate": 6.284861052251062e-07, "loss": 0.4612, "step": 3198 }, { "epoch": 4.276923076923077, "grad_norm": 1.028907225436757, "learning_rate": 6.262229038553752e-07, "loss": 0.4409, "step": 3199 }, { "epoch": 4.278260869565218, "grad_norm": 1.2633191400060255, "learning_rate": 6.239635125133753e-07, "loss": 0.4439, "step": 3200 }, { "epoch": 4.279598662207357, "grad_norm": 1.1409810168882033, "learning_rate": 6.217079331672777e-07, "loss": 0.4696, "step": 3201 }, { "epoch": 4.280936454849498, "grad_norm": 0.8435313962479654, "learning_rate": 6.194561677819327e-07, "loss": 0.4257, "step": 3202 }, { "epoch": 4.282274247491639, "grad_norm": 1.052949207512104, "learning_rate": 6.172082183188688e-07, "loss": 0.4187, "step": 3203 }, { "epoch": 4.28361204013378, "grad_norm": 1.0655359481558868, "learning_rate": 6.14964086736291e-07, "loss": 0.4073, "step": 3204 }, { "epoch": 4.2849498327759195, "grad_norm": 1.813432755326487, "learning_rate": 6.12723774989078e-07, "loss": 0.3286, "step": 3205 }, { "epoch": 4.28628762541806, "grad_norm": 1.4439982780764242, "learning_rate": 6.104872850287802e-07, "loss": 0.3155, "step": 3206 }, { "epoch": 4.287625418060201, "grad_norm": 1.1222588780743776, "learning_rate": 6.082546188036204e-07, "loss": 0.5114, "step": 3207 }, { "epoch": 4.288963210702341, "grad_norm": 0.9229884104588526, "learning_rate": 6.060257782584889e-07, "loss": 0.4869, "step": 3208 }, { "epoch": 4.2903010033444815, "grad_norm": 0.9408894600320609, "learning_rate": 6.038007653349437e-07, "loss": 0.4009, "step": 3209 }, { "epoch": 4.291638795986622, "grad_norm": 1.4471457024452632, "learning_rate": 6.015795819712117e-07, "loss": 0.4272, "step": 3210 }, { "epoch": 4.292976588628763, "grad_norm": 0.9498721848422018, "learning_rate": 5.99362230102179e-07, "loss": 0.5272, "step": 3211 }, { "epoch": 4.294314381270903, "grad_norm": 1.0264635571680822, "learning_rate": 5.971487116593977e-07, "loss": 0.5298, "step": 3212 }, { "epoch": 4.2956521739130435, "grad_norm": 1.0334605000438322, "learning_rate": 5.949390285710777e-07, "loss": 0.5164, "step": 3213 }, { "epoch": 4.296989966555184, "grad_norm": 0.9939543501544829, "learning_rate": 5.927331827620902e-07, "loss": 0.4377, "step": 3214 }, { "epoch": 4.298327759197324, "grad_norm": 1.4250361606587558, "learning_rate": 5.905311761539622e-07, "loss": 0.4623, "step": 3215 }, { "epoch": 4.299665551839465, "grad_norm": 1.1056330134504069, "learning_rate": 5.883330106648782e-07, "loss": 0.4457, "step": 3216 }, { "epoch": 4.301003344481606, "grad_norm": 0.8835916050285643, "learning_rate": 5.861386882096743e-07, "loss": 0.4787, "step": 3217 }, { "epoch": 4.302341137123745, "grad_norm": 1.418106810215898, "learning_rate": 5.839482106998406e-07, "loss": 0.4623, "step": 3218 }, { "epoch": 4.303678929765886, "grad_norm": 0.9719349944697664, "learning_rate": 5.817615800435167e-07, "loss": 0.4606, "step": 3219 }, { "epoch": 4.305016722408027, "grad_norm": 1.0303826448545224, "learning_rate": 5.795787981454931e-07, "loss": 0.4577, "step": 3220 }, { "epoch": 4.306354515050167, "grad_norm": 0.9488250712545659, "learning_rate": 5.773998669072057e-07, "loss": 0.4286, "step": 3221 }, { "epoch": 4.3076923076923075, "grad_norm": 0.8348721597934287, "learning_rate": 5.752247882267365e-07, "loss": 0.5049, "step": 3222 }, { "epoch": 4.309030100334448, "grad_norm": 0.9205445089928788, "learning_rate": 5.730535639988122e-07, "loss": 0.5829, "step": 3223 }, { "epoch": 4.310367892976589, "grad_norm": 1.1234367355096753, "learning_rate": 5.708861961148004e-07, "loss": 0.4344, "step": 3224 }, { "epoch": 4.311705685618729, "grad_norm": 0.9653790154527245, "learning_rate": 5.687226864627115e-07, "loss": 0.5257, "step": 3225 }, { "epoch": 4.3130434782608695, "grad_norm": 1.6471806013464267, "learning_rate": 5.665630369271935e-07, "loss": 0.4321, "step": 3226 }, { "epoch": 4.31438127090301, "grad_norm": 1.0304244626790728, "learning_rate": 5.644072493895325e-07, "loss": 0.4725, "step": 3227 }, { "epoch": 4.31571906354515, "grad_norm": 0.9778688038767697, "learning_rate": 5.622553257276487e-07, "loss": 0.4576, "step": 3228 }, { "epoch": 4.317056856187291, "grad_norm": 1.4637173910870687, "learning_rate": 5.60107267816098e-07, "loss": 0.4444, "step": 3229 }, { "epoch": 4.318394648829432, "grad_norm": 1.2061468397299875, "learning_rate": 5.579630775260697e-07, "loss": 0.5277, "step": 3230 }, { "epoch": 4.319732441471572, "grad_norm": 0.9848361351914418, "learning_rate": 5.558227567253832e-07, "loss": 0.414, "step": 3231 }, { "epoch": 4.321070234113712, "grad_norm": 1.271708513675022, "learning_rate": 5.53686307278486e-07, "loss": 0.5009, "step": 3232 }, { "epoch": 4.322408026755853, "grad_norm": 0.8760842210182109, "learning_rate": 5.515537310464536e-07, "loss": 0.4714, "step": 3233 }, { "epoch": 4.323745819397994, "grad_norm": 0.9331479207665204, "learning_rate": 5.494250298869896e-07, "loss": 0.5084, "step": 3234 }, { "epoch": 4.3250836120401335, "grad_norm": 1.3555775024844492, "learning_rate": 5.473002056544191e-07, "loss": 0.3025, "step": 3235 }, { "epoch": 4.326421404682274, "grad_norm": 1.397842030681251, "learning_rate": 5.45179260199692e-07, "loss": 0.4761, "step": 3236 }, { "epoch": 4.327759197324415, "grad_norm": 1.4747722746156324, "learning_rate": 5.430621953703785e-07, "loss": 0.3664, "step": 3237 }, { "epoch": 4.329096989966555, "grad_norm": 1.092011491830421, "learning_rate": 5.409490130106682e-07, "loss": 0.5636, "step": 3238 }, { "epoch": 4.3304347826086955, "grad_norm": 0.7742377407158727, "learning_rate": 5.388397149613683e-07, "loss": 0.5091, "step": 3239 }, { "epoch": 4.331772575250836, "grad_norm": 0.859742131550986, "learning_rate": 5.367343030599054e-07, "loss": 0.4702, "step": 3240 }, { "epoch": 4.333110367892977, "grad_norm": 1.1322586531914083, "learning_rate": 5.346327791403167e-07, "loss": 0.5196, "step": 3241 }, { "epoch": 4.334448160535117, "grad_norm": 0.8771999863713216, "learning_rate": 5.32535145033255e-07, "loss": 0.5033, "step": 3242 }, { "epoch": 4.335785953177258, "grad_norm": 1.3407946976284453, "learning_rate": 5.304414025659832e-07, "loss": 0.4895, "step": 3243 }, { "epoch": 4.337123745819398, "grad_norm": 0.9136893699387041, "learning_rate": 5.283515535623762e-07, "loss": 0.5238, "step": 3244 }, { "epoch": 4.338461538461538, "grad_norm": 1.253685959891146, "learning_rate": 5.262655998429151e-07, "loss": 0.5172, "step": 3245 }, { "epoch": 4.339799331103679, "grad_norm": 0.9755935859088938, "learning_rate": 5.241835432246888e-07, "loss": 0.4733, "step": 3246 }, { "epoch": 4.34113712374582, "grad_norm": 1.0671038372258859, "learning_rate": 5.221053855213914e-07, "loss": 0.5016, "step": 3247 }, { "epoch": 4.3424749163879595, "grad_norm": 1.3101689257759508, "learning_rate": 5.200311285433213e-07, "loss": 0.3711, "step": 3248 }, { "epoch": 4.3438127090301, "grad_norm": 0.8293206947156121, "learning_rate": 5.179607740973764e-07, "loss": 0.4774, "step": 3249 }, { "epoch": 4.345150501672241, "grad_norm": 1.3807880845794176, "learning_rate": 5.158943239870585e-07, "loss": 0.4273, "step": 3250 }, { "epoch": 4.346488294314382, "grad_norm": 1.511050053523607, "learning_rate": 5.13831780012467e-07, "loss": 0.3639, "step": 3251 }, { "epoch": 4.3478260869565215, "grad_norm": 1.0182433479989839, "learning_rate": 5.117731439702972e-07, "loss": 0.4845, "step": 3252 }, { "epoch": 4.349163879598662, "grad_norm": 0.8775463221604161, "learning_rate": 5.097184176538423e-07, "loss": 0.4461, "step": 3253 }, { "epoch": 4.350501672240803, "grad_norm": 1.4966227982236893, "learning_rate": 5.076676028529875e-07, "loss": 0.4158, "step": 3254 }, { "epoch": 4.351839464882943, "grad_norm": 1.835563626207298, "learning_rate": 5.056207013542131e-07, "loss": 0.5737, "step": 3255 }, { "epoch": 4.3531772575250836, "grad_norm": 0.9799274189694309, "learning_rate": 5.035777149405891e-07, "loss": 0.5026, "step": 3256 }, { "epoch": 4.354515050167224, "grad_norm": 1.2120590472341395, "learning_rate": 5.015386453917742e-07, "loss": 0.4022, "step": 3257 }, { "epoch": 4.355852842809364, "grad_norm": 1.00757426022634, "learning_rate": 4.995034944840171e-07, "loss": 0.4582, "step": 3258 }, { "epoch": 4.357190635451505, "grad_norm": 0.889453034419105, "learning_rate": 4.974722639901503e-07, "loss": 0.5515, "step": 3259 }, { "epoch": 4.358528428093646, "grad_norm": 0.9228792453674785, "learning_rate": 4.954449556795948e-07, "loss": 0.4162, "step": 3260 }, { "epoch": 4.359866220735786, "grad_norm": 1.1088959779607501, "learning_rate": 4.934215713183527e-07, "loss": 0.517, "step": 3261 }, { "epoch": 4.361204013377926, "grad_norm": 1.4021888546542172, "learning_rate": 4.914021126690083e-07, "loss": 0.5191, "step": 3262 }, { "epoch": 4.362541806020067, "grad_norm": 1.0298029392369514, "learning_rate": 4.89386581490725e-07, "loss": 0.4329, "step": 3263 }, { "epoch": 4.363879598662208, "grad_norm": 1.323282064874689, "learning_rate": 4.873749795392469e-07, "loss": 0.4094, "step": 3264 }, { "epoch": 4.3652173913043475, "grad_norm": 0.9673978130976909, "learning_rate": 4.853673085668947e-07, "loss": 0.4914, "step": 3265 }, { "epoch": 4.366555183946488, "grad_norm": 0.99655170775391, "learning_rate": 4.833635703225637e-07, "loss": 0.4954, "step": 3266 }, { "epoch": 4.367892976588629, "grad_norm": 0.8642169278625351, "learning_rate": 4.813637665517251e-07, "loss": 0.3918, "step": 3267 }, { "epoch": 4.36923076923077, "grad_norm": 1.466292986281621, "learning_rate": 4.793678989964207e-07, "loss": 0.4993, "step": 3268 }, { "epoch": 4.3705685618729095, "grad_norm": 0.9011641479683327, "learning_rate": 4.773759693952662e-07, "loss": 0.4921, "step": 3269 }, { "epoch": 4.37190635451505, "grad_norm": 0.863750902415326, "learning_rate": 4.7538797948344485e-07, "loss": 0.5199, "step": 3270 }, { "epoch": 4.373244147157191, "grad_norm": 1.269602762679063, "learning_rate": 4.7340393099270854e-07, "loss": 0.3719, "step": 3271 }, { "epoch": 4.374581939799331, "grad_norm": 1.2424599802973029, "learning_rate": 4.7142382565137535e-07, "loss": 0.3608, "step": 3272 }, { "epoch": 4.375919732441472, "grad_norm": 1.0061097634305554, "learning_rate": 4.6944766518432936e-07, "loss": 0.3917, "step": 3273 }, { "epoch": 4.377257525083612, "grad_norm": 1.2967690759464179, "learning_rate": 4.6747545131301755e-07, "loss": 0.4813, "step": 3274 }, { "epoch": 4.378595317725752, "grad_norm": 1.3628792734174158, "learning_rate": 4.6550718575544883e-07, "loss": 0.3764, "step": 3275 }, { "epoch": 4.379933110367893, "grad_norm": 1.1849743509806985, "learning_rate": 4.635428702261929e-07, "loss": 0.4594, "step": 3276 }, { "epoch": 4.381270903010034, "grad_norm": 0.960109927991432, "learning_rate": 4.615825064363799e-07, "loss": 0.4874, "step": 3277 }, { "epoch": 4.3826086956521735, "grad_norm": 1.5732401243120342, "learning_rate": 4.5962609609369436e-07, "loss": 0.4136, "step": 3278 }, { "epoch": 4.383946488294314, "grad_norm": 0.900008021488621, "learning_rate": 4.576736409023813e-07, "loss": 0.4135, "step": 3279 }, { "epoch": 4.385284280936455, "grad_norm": 0.954260444511845, "learning_rate": 4.5572514256323697e-07, "loss": 0.4646, "step": 3280 }, { "epoch": 4.386622073578596, "grad_norm": 0.8018245694700193, "learning_rate": 4.537806027736114e-07, "loss": 0.5528, "step": 3281 }, { "epoch": 4.3879598662207355, "grad_norm": 1.1705441502265683, "learning_rate": 4.5184002322740784e-07, "loss": 0.4441, "step": 3282 }, { "epoch": 4.389297658862876, "grad_norm": 1.038411302264413, "learning_rate": 4.4990340561507805e-07, "loss": 0.3692, "step": 3283 }, { "epoch": 4.390635451505017, "grad_norm": 1.3892613682775408, "learning_rate": 4.479707516236231e-07, "loss": 0.5013, "step": 3284 }, { "epoch": 4.391973244147157, "grad_norm": 0.9829976459455018, "learning_rate": 4.460420629365919e-07, "loss": 0.4233, "step": 3285 }, { "epoch": 4.393311036789298, "grad_norm": 1.4729595326005622, "learning_rate": 4.441173412340777e-07, "loss": 0.4737, "step": 3286 }, { "epoch": 4.394648829431438, "grad_norm": 1.0746203252594242, "learning_rate": 4.4219658819271925e-07, "loss": 0.5656, "step": 3287 }, { "epoch": 4.395986622073579, "grad_norm": 1.0236328272773139, "learning_rate": 4.402798054856977e-07, "loss": 0.4678, "step": 3288 }, { "epoch": 4.397324414715719, "grad_norm": 1.1250192736403084, "learning_rate": 4.383669947827368e-07, "loss": 0.5922, "step": 3289 }, { "epoch": 4.39866220735786, "grad_norm": 1.2928522715969053, "learning_rate": 4.364581577500987e-07, "loss": 0.5631, "step": 3290 }, { "epoch": 4.4, "grad_norm": 0.9540783128110831, "learning_rate": 4.3455329605058436e-07, "loss": 0.5336, "step": 3291 }, { "epoch": 4.40133779264214, "grad_norm": 1.247903016008191, "learning_rate": 4.3265241134353265e-07, "loss": 0.455, "step": 3292 }, { "epoch": 4.402675585284281, "grad_norm": 1.0086901350669644, "learning_rate": 4.307555052848178e-07, "loss": 0.53, "step": 3293 }, { "epoch": 4.404013377926422, "grad_norm": 1.0818782130979707, "learning_rate": 4.288625795268464e-07, "loss": 0.4907, "step": 3294 }, { "epoch": 4.4053511705685615, "grad_norm": 0.8289671318446515, "learning_rate": 4.269736357185611e-07, "loss": 0.4309, "step": 3295 }, { "epoch": 4.406688963210702, "grad_norm": 1.6570645432131488, "learning_rate": 4.250886755054329e-07, "loss": 0.3717, "step": 3296 }, { "epoch": 4.408026755852843, "grad_norm": 1.010933658085852, "learning_rate": 4.232077005294638e-07, "loss": 0.4038, "step": 3297 }, { "epoch": 4.409364548494983, "grad_norm": 0.8203299736796151, "learning_rate": 4.213307124291838e-07, "loss": 0.4146, "step": 3298 }, { "epoch": 4.410702341137124, "grad_norm": 1.4380230864422672, "learning_rate": 4.194577128396521e-07, "loss": 0.4405, "step": 3299 }, { "epoch": 4.412040133779264, "grad_norm": 1.2249199430496367, "learning_rate": 4.175887033924503e-07, "loss": 0.4786, "step": 3300 }, { "epoch": 4.413377926421405, "grad_norm": 0.9752909638654799, "learning_rate": 4.15723685715686e-07, "loss": 0.3313, "step": 3301 }, { "epoch": 4.414715719063545, "grad_norm": 1.2893510998472428, "learning_rate": 4.1386266143398855e-07, "loss": 0.4718, "step": 3302 }, { "epoch": 4.416053511705686, "grad_norm": 0.7720525635374004, "learning_rate": 4.120056321685101e-07, "loss": 0.5151, "step": 3303 }, { "epoch": 4.417391304347826, "grad_norm": 1.229762019542367, "learning_rate": 4.10152599536921e-07, "loss": 0.3862, "step": 3304 }, { "epoch": 4.418729096989966, "grad_norm": 1.1142118316526524, "learning_rate": 4.0830356515341173e-07, "loss": 0.4356, "step": 3305 }, { "epoch": 4.420066889632107, "grad_norm": 1.085642719154446, "learning_rate": 4.064585306286878e-07, "loss": 0.5629, "step": 3306 }, { "epoch": 4.421404682274248, "grad_norm": 1.2925240761209174, "learning_rate": 4.046174975699729e-07, "loss": 0.4409, "step": 3307 }, { "epoch": 4.422742474916388, "grad_norm": 1.0537089487906393, "learning_rate": 4.027804675810021e-07, "loss": 0.509, "step": 3308 }, { "epoch": 4.424080267558528, "grad_norm": 0.7369965707786599, "learning_rate": 4.009474422620269e-07, "loss": 0.5175, "step": 3309 }, { "epoch": 4.425418060200669, "grad_norm": 1.1446511849627172, "learning_rate": 3.9911842320980777e-07, "loss": 0.4157, "step": 3310 }, { "epoch": 4.42675585284281, "grad_norm": 0.9842156216165036, "learning_rate": 3.972934120176164e-07, "loss": 0.5366, "step": 3311 }, { "epoch": 4.4280936454849495, "grad_norm": 0.8695119799504638, "learning_rate": 3.9547241027523164e-07, "loss": 0.4001, "step": 3312 }, { "epoch": 4.42943143812709, "grad_norm": 1.4048316090613528, "learning_rate": 3.936554195689418e-07, "loss": 0.3929, "step": 3313 }, { "epoch": 4.430769230769231, "grad_norm": 0.8752594614863212, "learning_rate": 3.9184244148154025e-07, "loss": 0.5289, "step": 3314 }, { "epoch": 4.432107023411371, "grad_norm": 1.0377982539005208, "learning_rate": 3.900334775923237e-07, "loss": 0.3948, "step": 3315 }, { "epoch": 4.433444816053512, "grad_norm": 1.2170449643250312, "learning_rate": 3.882285294770938e-07, "loss": 0.4165, "step": 3316 }, { "epoch": 4.434782608695652, "grad_norm": 1.2466779049802246, "learning_rate": 3.864275987081539e-07, "loss": 0.5351, "step": 3317 }, { "epoch": 4.436120401337792, "grad_norm": 1.5209523635182183, "learning_rate": 3.846306868543054e-07, "loss": 0.4537, "step": 3318 }, { "epoch": 4.437458193979933, "grad_norm": 1.5387124986334517, "learning_rate": 3.828377954808538e-07, "loss": 0.4719, "step": 3319 }, { "epoch": 4.438795986622074, "grad_norm": 0.8138855602605148, "learning_rate": 3.8104892614959757e-07, "loss": 0.63, "step": 3320 }, { "epoch": 4.440133779264214, "grad_norm": 1.2773535675669898, "learning_rate": 3.7926408041883355e-07, "loss": 0.463, "step": 3321 }, { "epoch": 4.441471571906354, "grad_norm": 1.102008820724797, "learning_rate": 3.774832598433531e-07, "loss": 0.4845, "step": 3322 }, { "epoch": 4.442809364548495, "grad_norm": 1.1365732425374977, "learning_rate": 3.7570646597444196e-07, "loss": 0.4841, "step": 3323 }, { "epoch": 4.444147157190636, "grad_norm": 1.233139953730608, "learning_rate": 3.7393370035987697e-07, "loss": 0.4074, "step": 3324 }, { "epoch": 4.4454849498327755, "grad_norm": 1.67234941968457, "learning_rate": 3.721649645439268e-07, "loss": 0.4788, "step": 3325 }, { "epoch": 4.446822742474916, "grad_norm": 0.8516989828252811, "learning_rate": 3.704002600673501e-07, "loss": 0.5232, "step": 3326 }, { "epoch": 4.448160535117057, "grad_norm": 0.9689074567720265, "learning_rate": 3.6863958846739213e-07, "loss": 0.5156, "step": 3327 }, { "epoch": 4.449498327759198, "grad_norm": 1.1567367740910983, "learning_rate": 3.668829512777866e-07, "loss": 0.5296, "step": 3328 }, { "epoch": 4.450836120401338, "grad_norm": 0.992388773251267, "learning_rate": 3.651303500287534e-07, "loss": 0.4801, "step": 3329 }, { "epoch": 4.452173913043478, "grad_norm": 1.3886902490420097, "learning_rate": 3.6338178624699516e-07, "loss": 0.4994, "step": 3330 }, { "epoch": 4.453511705685619, "grad_norm": 1.0318525493703603, "learning_rate": 3.6163726145569787e-07, "loss": 0.4288, "step": 3331 }, { "epoch": 4.454849498327759, "grad_norm": 1.0579583499056397, "learning_rate": 3.5989677717452933e-07, "loss": 0.4748, "step": 3332 }, { "epoch": 4.4561872909699, "grad_norm": 0.9247895543976058, "learning_rate": 3.581603349196372e-07, "loss": 0.5087, "step": 3333 }, { "epoch": 4.45752508361204, "grad_norm": 0.9806258996894206, "learning_rate": 3.564279362036488e-07, "loss": 0.5368, "step": 3334 }, { "epoch": 4.45886287625418, "grad_norm": 1.2114326408577858, "learning_rate": 3.5469958253566807e-07, "loss": 0.4321, "step": 3335 }, { "epoch": 4.460200668896321, "grad_norm": 1.2354042119041546, "learning_rate": 3.5297527542127675e-07, "loss": 0.4879, "step": 3336 }, { "epoch": 4.461538461538462, "grad_norm": 0.8248713283113905, "learning_rate": 3.512550163625311e-07, "loss": 0.4279, "step": 3337 }, { "epoch": 4.462876254180602, "grad_norm": 1.0563040552459169, "learning_rate": 3.495388068579586e-07, "loss": 0.3855, "step": 3338 }, { "epoch": 4.464214046822742, "grad_norm": 1.130970649891742, "learning_rate": 3.4782664840256387e-07, "loss": 0.3917, "step": 3339 }, { "epoch": 4.465551839464883, "grad_norm": 0.9788809190522574, "learning_rate": 3.461185424878194e-07, "loss": 0.4977, "step": 3340 }, { "epoch": 4.466889632107024, "grad_norm": 1.0481008773697902, "learning_rate": 3.4441449060166776e-07, "loss": 0.569, "step": 3341 }, { "epoch": 4.468227424749164, "grad_norm": 1.041421545379994, "learning_rate": 3.427144942285215e-07, "loss": 0.4803, "step": 3342 }, { "epoch": 4.469565217391304, "grad_norm": 1.114070029040969, "learning_rate": 3.4101855484925727e-07, "loss": 0.4301, "step": 3343 }, { "epoch": 4.470903010033445, "grad_norm": 1.0087811393330046, "learning_rate": 3.39326673941221e-07, "loss": 0.5175, "step": 3344 }, { "epoch": 4.472240802675585, "grad_norm": 0.7197454301828417, "learning_rate": 3.3763885297822153e-07, "loss": 0.4388, "step": 3345 }, { "epoch": 4.473578595317726, "grad_norm": 0.9401469426340787, "learning_rate": 3.359550934305322e-07, "loss": 0.4345, "step": 3346 }, { "epoch": 4.474916387959866, "grad_norm": 1.0430606814567516, "learning_rate": 3.342753967648865e-07, "loss": 0.5142, "step": 3347 }, { "epoch": 4.476254180602007, "grad_norm": 0.8770058251287898, "learning_rate": 3.3259976444448005e-07, "loss": 0.4641, "step": 3348 }, { "epoch": 4.477591973244147, "grad_norm": 0.7741810402881004, "learning_rate": 3.3092819792896913e-07, "loss": 0.4324, "step": 3349 }, { "epoch": 4.478929765886288, "grad_norm": 1.0903417949787884, "learning_rate": 3.2926069867446673e-07, "loss": 0.4607, "step": 3350 }, { "epoch": 4.480267558528428, "grad_norm": 1.4240352767972742, "learning_rate": 3.275972681335421e-07, "loss": 0.46, "step": 3351 }, { "epoch": 4.481605351170568, "grad_norm": 1.0652255657424958, "learning_rate": 3.259379077552216e-07, "loss": 0.4041, "step": 3352 }, { "epoch": 4.482943143812709, "grad_norm": 1.1647276427444746, "learning_rate": 3.2428261898498625e-07, "loss": 0.4957, "step": 3353 }, { "epoch": 4.48428093645485, "grad_norm": 1.3109021566766035, "learning_rate": 3.226314032647687e-07, "loss": 0.4789, "step": 3354 }, { "epoch": 4.4856187290969896, "grad_norm": 1.4166596630190527, "learning_rate": 3.209842620329545e-07, "loss": 0.4189, "step": 3355 }, { "epoch": 4.48695652173913, "grad_norm": 1.1120531116024979, "learning_rate": 3.1934119672438093e-07, "loss": 0.4171, "step": 3356 }, { "epoch": 4.488294314381271, "grad_norm": 0.848223088067373, "learning_rate": 3.1770220877033243e-07, "loss": 0.4057, "step": 3357 }, { "epoch": 4.489632107023412, "grad_norm": 0.9224056982324865, "learning_rate": 3.160672995985431e-07, "loss": 0.4268, "step": 3358 }, { "epoch": 4.490969899665552, "grad_norm": 1.192778312171769, "learning_rate": 3.1443647063319425e-07, "loss": 0.4785, "step": 3359 }, { "epoch": 4.492307692307692, "grad_norm": 1.2303347390357424, "learning_rate": 3.128097232949123e-07, "loss": 0.4002, "step": 3360 }, { "epoch": 4.493645484949833, "grad_norm": 1.4862639297250086, "learning_rate": 3.111870590007682e-07, "loss": 0.4514, "step": 3361 }, { "epoch": 4.494983277591973, "grad_norm": 1.0289670868640333, "learning_rate": 3.0956847916427556e-07, "loss": 0.5045, "step": 3362 }, { "epoch": 4.496321070234114, "grad_norm": 1.42855899129393, "learning_rate": 3.0795398519539113e-07, "loss": 0.4361, "step": 3363 }, { "epoch": 4.497658862876254, "grad_norm": 1.009008391422468, "learning_rate": 3.0634357850051144e-07, "loss": 0.4633, "step": 3364 }, { "epoch": 4.498996655518395, "grad_norm": 0.9929208849844635, "learning_rate": 3.0473726048247386e-07, "loss": 0.5017, "step": 3365 }, { "epoch": 4.500334448160535, "grad_norm": 0.8661902364119445, "learning_rate": 3.031350325405519e-07, "loss": 0.5296, "step": 3366 }, { "epoch": 4.501672240802676, "grad_norm": 0.78890936162738, "learning_rate": 3.015368960704584e-07, "loss": 0.5606, "step": 3367 }, { "epoch": 4.503010033444816, "grad_norm": 0.8074510976525764, "learning_rate": 2.9994285246433996e-07, "loss": 0.5446, "step": 3368 }, { "epoch": 4.504347826086956, "grad_norm": 0.988387122814096, "learning_rate": 2.9835290311078123e-07, "loss": 0.4074, "step": 3369 }, { "epoch": 4.505685618729097, "grad_norm": 0.8863125262739829, "learning_rate": 2.967670493947966e-07, "loss": 0.5338, "step": 3370 }, { "epoch": 4.507023411371238, "grad_norm": 1.0124782532126824, "learning_rate": 2.9518529269783604e-07, "loss": 0.5128, "step": 3371 }, { "epoch": 4.508361204013378, "grad_norm": 1.206828010297311, "learning_rate": 2.936076343977762e-07, "loss": 0.4263, "step": 3372 }, { "epoch": 4.509698996655518, "grad_norm": 0.8607167519224018, "learning_rate": 2.9203407586892776e-07, "loss": 0.4216, "step": 3373 }, { "epoch": 4.511036789297659, "grad_norm": 1.1183181268951161, "learning_rate": 2.9046461848202865e-07, "loss": 0.4624, "step": 3374 }, { "epoch": 4.512374581939799, "grad_norm": 1.1123703440682153, "learning_rate": 2.888992636042437e-07, "loss": 0.3844, "step": 3375 }, { "epoch": 4.51371237458194, "grad_norm": 1.1444211967997757, "learning_rate": 2.873380125991643e-07, "loss": 0.6006, "step": 3376 }, { "epoch": 4.51505016722408, "grad_norm": 1.7862764839681424, "learning_rate": 2.857808668268075e-07, "loss": 0.3975, "step": 3377 }, { "epoch": 4.516387959866221, "grad_norm": 0.8206429495771951, "learning_rate": 2.842278276436128e-07, "loss": 0.5095, "step": 3378 }, { "epoch": 4.517725752508361, "grad_norm": 1.2502526564994432, "learning_rate": 2.8267889640244516e-07, "loss": 0.4918, "step": 3379 }, { "epoch": 4.519063545150502, "grad_norm": 0.9332411494201116, "learning_rate": 2.811340744525887e-07, "loss": 0.4716, "step": 3380 }, { "epoch": 4.520401337792642, "grad_norm": 0.9231243906007442, "learning_rate": 2.7959336313974847e-07, "loss": 0.568, "step": 3381 }, { "epoch": 4.521739130434782, "grad_norm": 1.1333494976464036, "learning_rate": 2.7805676380604883e-07, "loss": 0.4043, "step": 3382 }, { "epoch": 4.523076923076923, "grad_norm": 1.3076007672982803, "learning_rate": 2.7652427779003234e-07, "loss": 0.5215, "step": 3383 }, { "epoch": 4.524414715719064, "grad_norm": 0.9009972910819862, "learning_rate": 2.7499590642665773e-07, "loss": 0.5045, "step": 3384 }, { "epoch": 4.5257525083612045, "grad_norm": 1.549971009942612, "learning_rate": 2.734716510473007e-07, "loss": 0.4903, "step": 3385 }, { "epoch": 4.527090301003344, "grad_norm": 0.8146309458926431, "learning_rate": 2.7195151297975065e-07, "loss": 0.5142, "step": 3386 }, { "epoch": 4.528428093645485, "grad_norm": 0.9198284202569553, "learning_rate": 2.704354935482095e-07, "loss": 0.4787, "step": 3387 }, { "epoch": 4.529765886287626, "grad_norm": 1.2230318966797273, "learning_rate": 2.689235940732926e-07, "loss": 0.3672, "step": 3388 }, { "epoch": 4.531103678929766, "grad_norm": 1.1103630538377391, "learning_rate": 2.6741581587202747e-07, "loss": 0.412, "step": 3389 }, { "epoch": 4.532441471571906, "grad_norm": 0.9571024883705711, "learning_rate": 2.6591216025784904e-07, "loss": 0.4565, "step": 3390 }, { "epoch": 4.533779264214047, "grad_norm": 1.369612357871183, "learning_rate": 2.644126285406029e-07, "loss": 0.4911, "step": 3391 }, { "epoch": 4.535117056856187, "grad_norm": 1.0873372677076856, "learning_rate": 2.629172220265408e-07, "loss": 0.5159, "step": 3392 }, { "epoch": 4.536454849498328, "grad_norm": 1.3660556148423437, "learning_rate": 2.6142594201832183e-07, "loss": 0.525, "step": 3393 }, { "epoch": 4.537792642140468, "grad_norm": 1.0021911637077623, "learning_rate": 2.5993878981501133e-07, "loss": 0.4613, "step": 3394 }, { "epoch": 4.539130434782608, "grad_norm": 1.5100656772583594, "learning_rate": 2.584557667120768e-07, "loss": 0.4757, "step": 3395 }, { "epoch": 4.540468227424749, "grad_norm": 1.0773451444295343, "learning_rate": 2.5697687400139115e-07, "loss": 0.4928, "step": 3396 }, { "epoch": 4.54180602006689, "grad_norm": 1.1666985695254617, "learning_rate": 2.5550211297122705e-07, "loss": 0.5083, "step": 3397 }, { "epoch": 4.5431438127090304, "grad_norm": 0.9969113901369439, "learning_rate": 2.540314849062592e-07, "loss": 0.4429, "step": 3398 }, { "epoch": 4.54448160535117, "grad_norm": 1.1606309067650973, "learning_rate": 2.525649910875627e-07, "loss": 0.4629, "step": 3399 }, { "epoch": 4.545819397993311, "grad_norm": 1.0140643953646926, "learning_rate": 2.511026327926114e-07, "loss": 0.422, "step": 3400 }, { "epoch": 4.547157190635452, "grad_norm": 0.8410122519938985, "learning_rate": 2.4964441129527337e-07, "loss": 0.3407, "step": 3401 }, { "epoch": 4.548494983277592, "grad_norm": 1.1479601925558067, "learning_rate": 2.481903278658171e-07, "loss": 0.4067, "step": 3402 }, { "epoch": 4.549832775919732, "grad_norm": 0.9293909487664767, "learning_rate": 2.4674038377090423e-07, "loss": 0.4454, "step": 3403 }, { "epoch": 4.551170568561873, "grad_norm": 1.0828735893562091, "learning_rate": 2.452945802735918e-07, "loss": 0.4793, "step": 3404 }, { "epoch": 4.552508361204014, "grad_norm": 0.9395929463406091, "learning_rate": 2.438529186333288e-07, "loss": 0.3792, "step": 3405 }, { "epoch": 4.553846153846154, "grad_norm": 1.6867771507652225, "learning_rate": 2.42415400105957e-07, "loss": 0.4539, "step": 3406 }, { "epoch": 4.555183946488294, "grad_norm": 0.9192628994374274, "learning_rate": 2.4098202594370844e-07, "loss": 0.5147, "step": 3407 }, { "epoch": 4.556521739130435, "grad_norm": 0.8655347579317354, "learning_rate": 2.3955279739520496e-07, "loss": 0.4715, "step": 3408 }, { "epoch": 4.557859531772575, "grad_norm": 1.1639432283779327, "learning_rate": 2.3812771570545846e-07, "loss": 0.4882, "step": 3409 }, { "epoch": 4.559197324414716, "grad_norm": 1.362733744804304, "learning_rate": 2.3670678211586805e-07, "loss": 0.4612, "step": 3410 }, { "epoch": 4.560535117056856, "grad_norm": 1.3827856738730246, "learning_rate": 2.3528999786421758e-07, "loss": 0.4819, "step": 3411 }, { "epoch": 4.561872909698996, "grad_norm": 1.0613224305936937, "learning_rate": 2.338773641846781e-07, "loss": 0.437, "step": 3412 }, { "epoch": 4.563210702341137, "grad_norm": 0.8218173923748198, "learning_rate": 2.3246888230780474e-07, "loss": 0.427, "step": 3413 }, { "epoch": 4.564548494983278, "grad_norm": 1.448555127966684, "learning_rate": 2.3106455346053603e-07, "loss": 0.3164, "step": 3414 }, { "epoch": 4.565886287625418, "grad_norm": 1.0925184121010614, "learning_rate": 2.2966437886619286e-07, "loss": 0.5678, "step": 3415 }, { "epoch": 4.567224080267558, "grad_norm": 1.3020784011152982, "learning_rate": 2.2826835974447626e-07, "loss": 0.4304, "step": 3416 }, { "epoch": 4.568561872909699, "grad_norm": 1.4253916604491808, "learning_rate": 2.2687649731146844e-07, "loss": 0.5399, "step": 3417 }, { "epoch": 4.56989966555184, "grad_norm": 1.1792352933867303, "learning_rate": 2.2548879277963065e-07, "loss": 0.5216, "step": 3418 }, { "epoch": 4.57123745819398, "grad_norm": 1.5897126908784003, "learning_rate": 2.2410524735780205e-07, "loss": 0.3443, "step": 3419 }, { "epoch": 4.57257525083612, "grad_norm": 0.7939312392712602, "learning_rate": 2.227258622511991e-07, "loss": 0.513, "step": 3420 }, { "epoch": 4.573913043478261, "grad_norm": 0.9820758508304628, "learning_rate": 2.2135063866141337e-07, "loss": 0.3766, "step": 3421 }, { "epoch": 4.575250836120401, "grad_norm": 1.4184715468695337, "learning_rate": 2.1997957778641166e-07, "loss": 0.3817, "step": 3422 }, { "epoch": 4.576588628762542, "grad_norm": 1.0617593871570294, "learning_rate": 2.1861268082053466e-07, "loss": 0.5058, "step": 3423 }, { "epoch": 4.577926421404682, "grad_norm": 1.209134268119244, "learning_rate": 2.1724994895449603e-07, "loss": 0.45, "step": 3424 }, { "epoch": 4.579264214046823, "grad_norm": 1.1163369003381134, "learning_rate": 2.1589138337538062e-07, "loss": 0.4788, "step": 3425 }, { "epoch": 4.580602006688963, "grad_norm": 0.9300292267674497, "learning_rate": 2.1453698526664513e-07, "loss": 0.5019, "step": 3426 }, { "epoch": 4.581939799331104, "grad_norm": 1.019011504188845, "learning_rate": 2.1318675580811409e-07, "loss": 0.4901, "step": 3427 }, { "epoch": 4.5832775919732445, "grad_norm": 0.8937106132027909, "learning_rate": 2.1184069617598225e-07, "loss": 0.531, "step": 3428 }, { "epoch": 4.584615384615384, "grad_norm": 1.2995654900368687, "learning_rate": 2.104988075428127e-07, "loss": 0.3549, "step": 3429 }, { "epoch": 4.585953177257525, "grad_norm": 1.0828992779898174, "learning_rate": 2.0916109107753267e-07, "loss": 0.5343, "step": 3430 }, { "epoch": 4.587290969899666, "grad_norm": 0.8454300470368665, "learning_rate": 2.0782754794543668e-07, "loss": 0.4652, "step": 3431 }, { "epoch": 4.588628762541806, "grad_norm": 0.9094076343740034, "learning_rate": 2.0649817930818326e-07, "loss": 0.6085, "step": 3432 }, { "epoch": 4.589966555183946, "grad_norm": 0.9961893492478562, "learning_rate": 2.0517298632379445e-07, "loss": 0.4906, "step": 3433 }, { "epoch": 4.591304347826087, "grad_norm": 1.1644359436672875, "learning_rate": 2.038519701466557e-07, "loss": 0.4846, "step": 3434 }, { "epoch": 4.592642140468227, "grad_norm": 1.175929993636675, "learning_rate": 2.0253513192751374e-07, "loss": 0.5633, "step": 3435 }, { "epoch": 4.593979933110368, "grad_norm": 0.7740770888240189, "learning_rate": 2.012224728134743e-07, "loss": 0.4753, "step": 3436 }, { "epoch": 4.595317725752508, "grad_norm": 1.031838121555999, "learning_rate": 1.999139939480049e-07, "loss": 0.4015, "step": 3437 }, { "epoch": 4.596655518394649, "grad_norm": 1.4647752221289938, "learning_rate": 1.9860969647092998e-07, "loss": 0.3808, "step": 3438 }, { "epoch": 4.597993311036789, "grad_norm": 0.70151903632623, "learning_rate": 1.9730958151843282e-07, "loss": 0.5459, "step": 3439 }, { "epoch": 4.59933110367893, "grad_norm": 0.8532618786405513, "learning_rate": 1.9601365022305196e-07, "loss": 0.4575, "step": 3440 }, { "epoch": 4.6006688963210705, "grad_norm": 1.1857662858071927, "learning_rate": 1.947219037136827e-07, "loss": 0.5241, "step": 3441 }, { "epoch": 4.602006688963211, "grad_norm": 0.8368163967173479, "learning_rate": 1.934343431155744e-07, "loss": 0.4989, "step": 3442 }, { "epoch": 4.603344481605351, "grad_norm": 1.2154866156476314, "learning_rate": 1.9215096955032986e-07, "loss": 0.5407, "step": 3443 }, { "epoch": 4.604682274247492, "grad_norm": 0.9930423407680444, "learning_rate": 1.908717841359048e-07, "loss": 0.4769, "step": 3444 }, { "epoch": 4.6060200668896325, "grad_norm": 1.4896888088622469, "learning_rate": 1.8959678798660674e-07, "loss": 0.47, "step": 3445 }, { "epoch": 4.607357859531772, "grad_norm": 0.7649268143752417, "learning_rate": 1.883259822130934e-07, "loss": 0.4942, "step": 3446 }, { "epoch": 4.608695652173913, "grad_norm": 1.4112059783919952, "learning_rate": 1.8705936792237255e-07, "loss": 0.445, "step": 3447 }, { "epoch": 4.610033444816054, "grad_norm": 1.5699174250667878, "learning_rate": 1.8579694621780054e-07, "loss": 0.5204, "step": 3448 }, { "epoch": 4.611371237458194, "grad_norm": 0.8369570600096555, "learning_rate": 1.845387181990821e-07, "loss": 0.5809, "step": 3449 }, { "epoch": 4.612709030100334, "grad_norm": 1.0857320535155326, "learning_rate": 1.8328468496226882e-07, "loss": 0.417, "step": 3450 }, { "epoch": 4.614046822742475, "grad_norm": 1.2259822396807138, "learning_rate": 1.8203484759975743e-07, "loss": 0.4782, "step": 3451 }, { "epoch": 4.615384615384615, "grad_norm": 1.0299544497455226, "learning_rate": 1.807892072002898e-07, "loss": 0.5608, "step": 3452 }, { "epoch": 4.616722408026756, "grad_norm": 0.8890996170693454, "learning_rate": 1.7954776484895188e-07, "loss": 0.4376, "step": 3453 }, { "epoch": 4.618060200668896, "grad_norm": 0.9121963222214676, "learning_rate": 1.78310521627173e-07, "loss": 0.5378, "step": 3454 }, { "epoch": 4.619397993311037, "grad_norm": 0.9872526632390404, "learning_rate": 1.770774786127244e-07, "loss": 0.3931, "step": 3455 }, { "epoch": 4.620735785953177, "grad_norm": 1.0563114295789904, "learning_rate": 1.7584863687971852e-07, "loss": 0.4526, "step": 3456 }, { "epoch": 4.622073578595318, "grad_norm": 0.8518646773906522, "learning_rate": 1.7462399749860748e-07, "loss": 0.4916, "step": 3457 }, { "epoch": 4.6234113712374585, "grad_norm": 0.8150345649621075, "learning_rate": 1.7340356153618343e-07, "loss": 0.4375, "step": 3458 }, { "epoch": 4.624749163879598, "grad_norm": 1.1484869236493054, "learning_rate": 1.7218733005557707e-07, "loss": 0.413, "step": 3459 }, { "epoch": 4.626086956521739, "grad_norm": 1.1379331087452433, "learning_rate": 1.7097530411625596e-07, "loss": 0.4339, "step": 3460 }, { "epoch": 4.62742474916388, "grad_norm": 1.053233746769486, "learning_rate": 1.6976748477402384e-07, "loss": 0.4254, "step": 3461 }, { "epoch": 4.6287625418060205, "grad_norm": 1.5968619855444552, "learning_rate": 1.6856387308102073e-07, "loss": 0.3805, "step": 3462 }, { "epoch": 4.63010033444816, "grad_norm": 1.022265716394393, "learning_rate": 1.6736447008572132e-07, "loss": 0.4581, "step": 3463 }, { "epoch": 4.631438127090301, "grad_norm": 1.5223708607794255, "learning_rate": 1.6616927683293427e-07, "loss": 0.5885, "step": 3464 }, { "epoch": 4.632775919732442, "grad_norm": 0.9929063176365922, "learning_rate": 1.6497829436380009e-07, "loss": 0.4436, "step": 3465 }, { "epoch": 4.634113712374582, "grad_norm": 0.8104751443355006, "learning_rate": 1.6379152371579277e-07, "loss": 0.4203, "step": 3466 }, { "epoch": 4.635451505016722, "grad_norm": 0.8779281241555225, "learning_rate": 1.6260896592271534e-07, "loss": 0.5044, "step": 3467 }, { "epoch": 4.636789297658863, "grad_norm": 0.7764967410119191, "learning_rate": 1.614306220147027e-07, "loss": 0.4256, "step": 3468 }, { "epoch": 4.638127090301003, "grad_norm": 0.917102860324449, "learning_rate": 1.6025649301821877e-07, "loss": 0.5496, "step": 3469 }, { "epoch": 4.639464882943144, "grad_norm": 1.1294835824435028, "learning_rate": 1.5908657995605536e-07, "loss": 0.4777, "step": 3470 }, { "epoch": 4.6408026755852845, "grad_norm": 1.610268595664006, "learning_rate": 1.5792088384733174e-07, "loss": 0.4412, "step": 3471 }, { "epoch": 4.642140468227424, "grad_norm": 1.1192631669864146, "learning_rate": 1.5675940570749393e-07, "loss": 0.4802, "step": 3472 }, { "epoch": 4.643478260869565, "grad_norm": 1.2498496619497859, "learning_rate": 1.5560214654831375e-07, "loss": 0.5453, "step": 3473 }, { "epoch": 4.644816053511706, "grad_norm": 1.2426535359428799, "learning_rate": 1.5444910737788755e-07, "loss": 0.4985, "step": 3474 }, { "epoch": 4.6461538461538465, "grad_norm": 1.0408828372631143, "learning_rate": 1.5330028920063634e-07, "loss": 0.4309, "step": 3475 }, { "epoch": 4.647491638795986, "grad_norm": 0.8962194468360171, "learning_rate": 1.5215569301730293e-07, "loss": 0.4187, "step": 3476 }, { "epoch": 4.648829431438127, "grad_norm": 1.2490113709399042, "learning_rate": 1.510153198249531e-07, "loss": 0.5527, "step": 3477 }, { "epoch": 4.650167224080268, "grad_norm": 1.2902971318492462, "learning_rate": 1.4987917061697387e-07, "loss": 0.507, "step": 3478 }, { "epoch": 4.651505016722408, "grad_norm": 1.0415141248952995, "learning_rate": 1.4874724638307303e-07, "loss": 0.569, "step": 3479 }, { "epoch": 4.652842809364548, "grad_norm": 1.2577679918005553, "learning_rate": 1.4761954810927791e-07, "loss": 0.4811, "step": 3480 }, { "epoch": 4.654180602006689, "grad_norm": 1.550111504841104, "learning_rate": 1.4649607677793388e-07, "loss": 0.4301, "step": 3481 }, { "epoch": 4.65551839464883, "grad_norm": 1.4303555571966804, "learning_rate": 1.4537683336770526e-07, "loss": 0.4634, "step": 3482 }, { "epoch": 4.65685618729097, "grad_norm": 1.0487024689164899, "learning_rate": 1.4426181885357215e-07, "loss": 0.5843, "step": 3483 }, { "epoch": 4.6581939799331105, "grad_norm": 1.3826641332544611, "learning_rate": 1.4315103420683152e-07, "loss": 0.5279, "step": 3484 }, { "epoch": 4.659531772575251, "grad_norm": 1.0464289993638973, "learning_rate": 1.42044480395096e-07, "loss": 0.3775, "step": 3485 }, { "epoch": 4.660869565217391, "grad_norm": 0.8095943710231003, "learning_rate": 1.4094215838229176e-07, "loss": 0.5808, "step": 3486 }, { "epoch": 4.662207357859532, "grad_norm": 1.7615442619463253, "learning_rate": 1.3984406912865954e-07, "loss": 0.3914, "step": 3487 }, { "epoch": 4.6635451505016725, "grad_norm": 1.0352730709042297, "learning_rate": 1.3875021359075257e-07, "loss": 0.4257, "step": 3488 }, { "epoch": 4.664882943143812, "grad_norm": 1.030130601297211, "learning_rate": 1.376605927214364e-07, "loss": 0.5251, "step": 3489 }, { "epoch": 4.666220735785953, "grad_norm": 1.219897201921289, "learning_rate": 1.3657520746988674e-07, "loss": 0.4734, "step": 3490 }, { "epoch": 4.667558528428094, "grad_norm": 1.293006725125626, "learning_rate": 1.354940587815906e-07, "loss": 0.3887, "step": 3491 }, { "epoch": 4.668896321070234, "grad_norm": 0.935257723997435, "learning_rate": 1.3441714759834358e-07, "loss": 0.4203, "step": 3492 }, { "epoch": 4.670234113712374, "grad_norm": 0.8552226988239104, "learning_rate": 1.333444748582513e-07, "loss": 0.4982, "step": 3493 }, { "epoch": 4.671571906354515, "grad_norm": 0.9733505521659842, "learning_rate": 1.3227604149572638e-07, "loss": 0.4309, "step": 3494 }, { "epoch": 4.672909698996656, "grad_norm": 0.9310324790684023, "learning_rate": 1.312118484414876e-07, "loss": 0.4495, "step": 3495 }, { "epoch": 4.674247491638796, "grad_norm": 1.01173868153164, "learning_rate": 1.3015189662256234e-07, "loss": 0.5162, "step": 3496 }, { "epoch": 4.6755852842809364, "grad_norm": 0.8729228404905082, "learning_rate": 1.2909618696228088e-07, "loss": 0.4435, "step": 3497 }, { "epoch": 4.676923076923077, "grad_norm": 1.1528516515269758, "learning_rate": 1.2804472038027983e-07, "loss": 0.4101, "step": 3498 }, { "epoch": 4.678260869565217, "grad_norm": 1.2036927762428609, "learning_rate": 1.2699749779249926e-07, "loss": 0.478, "step": 3499 }, { "epoch": 4.679598662207358, "grad_norm": 1.0443871798113273, "learning_rate": 1.259545201111817e-07, "loss": 0.4937, "step": 3500 }, { "epoch": 4.6809364548494985, "grad_norm": 1.2630937486870588, "learning_rate": 1.2491578824487204e-07, "loss": 0.5067, "step": 3501 }, { "epoch": 4.682274247491639, "grad_norm": 1.2965429408872209, "learning_rate": 1.2388130309841762e-07, "loss": 0.3319, "step": 3502 }, { "epoch": 4.683612040133779, "grad_norm": 1.3376451117026649, "learning_rate": 1.2285106557296479e-07, "loss": 0.5587, "step": 3503 }, { "epoch": 4.68494983277592, "grad_norm": 0.9249065071539939, "learning_rate": 1.2182507656596177e-07, "loss": 0.3865, "step": 3504 }, { "epoch": 4.6862876254180605, "grad_norm": 0.9861449631583165, "learning_rate": 1.2080333697115366e-07, "loss": 0.5489, "step": 3505 }, { "epoch": 4.6876254180602, "grad_norm": 0.9507170125238661, "learning_rate": 1.1978584767858513e-07, "loss": 0.4267, "step": 3506 }, { "epoch": 4.688963210702341, "grad_norm": 0.9491259267520535, "learning_rate": 1.1877260957459835e-07, "loss": 0.435, "step": 3507 }, { "epoch": 4.690301003344482, "grad_norm": 1.1822309916921363, "learning_rate": 1.1776362354183224e-07, "loss": 0.4266, "step": 3508 }, { "epoch": 4.691638795986622, "grad_norm": 1.268462902999853, "learning_rate": 1.1675889045922151e-07, "loss": 0.4386, "step": 3509 }, { "epoch": 4.692976588628762, "grad_norm": 0.9090614591847508, "learning_rate": 1.157584112019966e-07, "loss": 0.5498, "step": 3510 }, { "epoch": 4.694314381270903, "grad_norm": 0.8760967572263472, "learning_rate": 1.1476218664168093e-07, "loss": 0.4609, "step": 3511 }, { "epoch": 4.695652173913043, "grad_norm": 0.956472910798691, "learning_rate": 1.1377021764609364e-07, "loss": 0.5149, "step": 3512 }, { "epoch": 4.696989966555184, "grad_norm": 1.1361171449090461, "learning_rate": 1.1278250507934518e-07, "loss": 0.4104, "step": 3513 }, { "epoch": 4.6983277591973245, "grad_norm": 0.9870178802566003, "learning_rate": 1.1179904980183897e-07, "loss": 0.3926, "step": 3514 }, { "epoch": 4.699665551839465, "grad_norm": 0.924752052969308, "learning_rate": 1.1081985267027029e-07, "loss": 0.4346, "step": 3515 }, { "epoch": 4.701003344481605, "grad_norm": 0.9909476848806354, "learning_rate": 1.0984491453762402e-07, "loss": 0.5766, "step": 3516 }, { "epoch": 4.702341137123746, "grad_norm": 0.7233563009224249, "learning_rate": 1.0887423625317584e-07, "loss": 0.4936, "step": 3517 }, { "epoch": 4.7036789297658865, "grad_norm": 1.4397728337307067, "learning_rate": 1.079078186624899e-07, "loss": 0.3873, "step": 3518 }, { "epoch": 4.705016722408026, "grad_norm": 1.334405195346611, "learning_rate": 1.0694566260742001e-07, "loss": 0.489, "step": 3519 }, { "epoch": 4.706354515050167, "grad_norm": 0.7577573166359305, "learning_rate": 1.0598776892610685e-07, "loss": 0.401, "step": 3520 }, { "epoch": 4.707692307692308, "grad_norm": 0.914207849052185, "learning_rate": 1.0503413845297739e-07, "loss": 0.5133, "step": 3521 }, { "epoch": 4.709030100334449, "grad_norm": 0.8864037066928491, "learning_rate": 1.0408477201874712e-07, "loss": 0.4109, "step": 3522 }, { "epoch": 4.710367892976588, "grad_norm": 1.0660149936035124, "learning_rate": 1.0313967045041507e-07, "loss": 0.5305, "step": 3523 }, { "epoch": 4.711705685618729, "grad_norm": 0.757502785492887, "learning_rate": 1.02198834571266e-07, "loss": 0.4504, "step": 3524 }, { "epoch": 4.71304347826087, "grad_norm": 0.9096818150276608, "learning_rate": 1.0126226520086823e-07, "loss": 0.5703, "step": 3525 }, { "epoch": 4.71438127090301, "grad_norm": 1.120538254742979, "learning_rate": 1.0032996315507415e-07, "loss": 0.4335, "step": 3526 }, { "epoch": 4.7157190635451505, "grad_norm": 1.3481317256742231, "learning_rate": 9.940192924601855e-08, "loss": 0.5482, "step": 3527 }, { "epoch": 4.717056856187291, "grad_norm": 1.3278267236061478, "learning_rate": 9.847816428211809e-08, "loss": 0.3924, "step": 3528 }, { "epoch": 4.718394648829431, "grad_norm": 1.4348365820041533, "learning_rate": 9.755866906807188e-08, "loss": 0.3872, "step": 3529 }, { "epoch": 4.719732441471572, "grad_norm": 0.8308410801194482, "learning_rate": 9.664344440485696e-08, "loss": 0.4626, "step": 3530 }, { "epoch": 4.7210702341137125, "grad_norm": 1.422345080887038, "learning_rate": 9.573249108973281e-08, "loss": 0.4787, "step": 3531 }, { "epoch": 4.722408026755852, "grad_norm": 1.232783117729546, "learning_rate": 9.482580991623747e-08, "loss": 0.5116, "step": 3532 }, { "epoch": 4.723745819397993, "grad_norm": 1.139535915719715, "learning_rate": 9.39234016741869e-08, "loss": 0.5608, "step": 3533 }, { "epoch": 4.725083612040134, "grad_norm": 0.8205962582729791, "learning_rate": 9.302526714967508e-08, "loss": 0.5042, "step": 3534 }, { "epoch": 4.726421404682275, "grad_norm": 0.8560126456853151, "learning_rate": 9.21314071250734e-08, "loss": 0.4092, "step": 3535 }, { "epoch": 4.727759197324414, "grad_norm": 0.9985172490497566, "learning_rate": 9.124182237902957e-08, "loss": 0.3123, "step": 3536 }, { "epoch": 4.729096989966555, "grad_norm": 0.9482290745146092, "learning_rate": 9.035651368646647e-08, "loss": 0.4831, "step": 3537 }, { "epoch": 4.730434782608696, "grad_norm": 1.2503273903861873, "learning_rate": 8.94754818185839e-08, "loss": 0.4083, "step": 3538 }, { "epoch": 4.731772575250837, "grad_norm": 1.0041555334043855, "learning_rate": 8.859872754285403e-08, "loss": 0.5397, "step": 3539 }, { "epoch": 4.7331103678929765, "grad_norm": 0.8990547077894566, "learning_rate": 8.772625162302373e-08, "loss": 0.4791, "step": 3540 }, { "epoch": 4.734448160535117, "grad_norm": 0.8087989941993287, "learning_rate": 8.68580548191128e-08, "loss": 0.4816, "step": 3541 }, { "epoch": 4.735785953177258, "grad_norm": 1.0110924033329443, "learning_rate": 8.599413788741407e-08, "loss": 0.5686, "step": 3542 }, { "epoch": 4.737123745819398, "grad_norm": 1.1984993305229337, "learning_rate": 8.513450158049109e-08, "loss": 0.5117, "step": 3543 }, { "epoch": 4.7384615384615385, "grad_norm": 1.2554022350360183, "learning_rate": 8.427914664717929e-08, "loss": 0.5392, "step": 3544 }, { "epoch": 4.739799331103679, "grad_norm": 1.7948709928312163, "learning_rate": 8.342807383258378e-08, "loss": 0.3783, "step": 3545 }, { "epoch": 4.741137123745819, "grad_norm": 1.0835679381222743, "learning_rate": 8.258128387808095e-08, "loss": 0.4179, "step": 3546 }, { "epoch": 4.74247491638796, "grad_norm": 0.931377155415696, "learning_rate": 8.173877752131465e-08, "loss": 0.5251, "step": 3547 }, { "epoch": 4.7438127090301005, "grad_norm": 1.1922506149513612, "learning_rate": 8.090055549619835e-08, "loss": 0.3599, "step": 3548 }, { "epoch": 4.74515050167224, "grad_norm": 1.0647821939980173, "learning_rate": 8.006661853291298e-08, "loss": 0.4972, "step": 3549 }, { "epoch": 4.746488294314381, "grad_norm": 1.0158394202107401, "learning_rate": 7.923696735790687e-08, "loss": 0.4591, "step": 3550 }, { "epoch": 4.747826086956522, "grad_norm": 1.0661532538418723, "learning_rate": 7.84116026938947e-08, "loss": 0.433, "step": 3551 }, { "epoch": 4.749163879598662, "grad_norm": 0.895565518284015, "learning_rate": 7.7590525259858e-08, "loss": 0.52, "step": 3552 }, { "epoch": 4.750501672240802, "grad_norm": 0.9435649493433801, "learning_rate": 7.677373577104296e-08, "loss": 0.5622, "step": 3553 }, { "epoch": 4.751839464882943, "grad_norm": 0.822567290768673, "learning_rate": 7.59612349389599e-08, "loss": 0.513, "step": 3554 }, { "epoch": 4.753177257525084, "grad_norm": 0.8293343993169779, "learning_rate": 7.515302347138486e-08, "loss": 0.5827, "step": 3555 }, { "epoch": 4.754515050167224, "grad_norm": 0.8731969003643429, "learning_rate": 7.434910207235579e-08, "loss": 0.5109, "step": 3556 }, { "epoch": 4.7558528428093645, "grad_norm": 1.032379402454578, "learning_rate": 7.354947144217417e-08, "loss": 0.4904, "step": 3557 }, { "epoch": 4.757190635451505, "grad_norm": 2.0723720510908303, "learning_rate": 7.275413227740446e-08, "loss": 0.4777, "step": 3558 }, { "epoch": 4.758528428093646, "grad_norm": 1.1793387512372546, "learning_rate": 7.196308527087192e-08, "loss": 0.3608, "step": 3559 }, { "epoch": 4.759866220735786, "grad_norm": 1.0561911727057307, "learning_rate": 7.117633111166311e-08, "loss": 0.467, "step": 3560 }, { "epoch": 4.7612040133779265, "grad_norm": 1.35616234811456, "learning_rate": 7.03938704851248e-08, "loss": 0.4469, "step": 3561 }, { "epoch": 4.762541806020067, "grad_norm": 1.148808678902818, "learning_rate": 6.9615704072864e-08, "loss": 0.5221, "step": 3562 }, { "epoch": 4.763879598662207, "grad_norm": 1.20908269569906, "learning_rate": 6.884183255274734e-08, "loss": 0.5662, "step": 3563 }, { "epoch": 4.765217391304348, "grad_norm": 1.360340971586677, "learning_rate": 6.807225659889894e-08, "loss": 0.4377, "step": 3564 }, { "epoch": 4.766555183946489, "grad_norm": 0.7643451839859062, "learning_rate": 6.730697688170251e-08, "loss": 0.4684, "step": 3565 }, { "epoch": 4.767892976588628, "grad_norm": 1.3431529688423753, "learning_rate": 6.654599406779816e-08, "loss": 0.5413, "step": 3566 }, { "epoch": 4.769230769230769, "grad_norm": 0.8869389042985754, "learning_rate": 6.578930882008283e-08, "loss": 0.4434, "step": 3567 }, { "epoch": 4.77056856187291, "grad_norm": 1.1712363289584975, "learning_rate": 6.503692179771148e-08, "loss": 0.5156, "step": 3568 }, { "epoch": 4.77190635451505, "grad_norm": 1.0963977935402232, "learning_rate": 6.428883365609261e-08, "loss": 0.4429, "step": 3569 }, { "epoch": 4.7732441471571905, "grad_norm": 0.8531869523333835, "learning_rate": 6.354504504689219e-08, "loss": 0.4894, "step": 3570 }, { "epoch": 4.774581939799331, "grad_norm": 0.8491938935634903, "learning_rate": 6.280555661802857e-08, "loss": 0.395, "step": 3571 }, { "epoch": 4.775919732441472, "grad_norm": 1.1161590506575474, "learning_rate": 6.207036901367536e-08, "loss": 0.4566, "step": 3572 }, { "epoch": 4.777257525083612, "grad_norm": 0.7890781367711301, "learning_rate": 6.133948287426028e-08, "loss": 0.4317, "step": 3573 }, { "epoch": 4.7785953177257525, "grad_norm": 0.8829723658256405, "learning_rate": 6.061289883646293e-08, "loss": 0.4298, "step": 3574 }, { "epoch": 4.779933110367893, "grad_norm": 1.535523128013936, "learning_rate": 5.98906175332159e-08, "loss": 0.4882, "step": 3575 }, { "epoch": 4.781270903010033, "grad_norm": 0.9177267626656782, "learning_rate": 5.917263959370312e-08, "loss": 0.5133, "step": 3576 }, { "epoch": 4.782608695652174, "grad_norm": 0.9842482594582719, "learning_rate": 5.84589656433604e-08, "loss": 0.352, "step": 3577 }, { "epoch": 4.783946488294315, "grad_norm": 1.1588511988514658, "learning_rate": 5.7749596303874335e-08, "loss": 0.4084, "step": 3578 }, { "epoch": 4.785284280936455, "grad_norm": 1.6189928229143453, "learning_rate": 5.704453219318118e-08, "loss": 0.3674, "step": 3579 }, { "epoch": 4.786622073578595, "grad_norm": 0.8614527164879181, "learning_rate": 5.634377392546741e-08, "loss": 0.5717, "step": 3580 }, { "epoch": 4.787959866220736, "grad_norm": 0.7769536930776102, "learning_rate": 5.564732211116808e-08, "loss": 0.473, "step": 3581 }, { "epoch": 4.789297658862877, "grad_norm": 0.8457286592063431, "learning_rate": 5.495517735696732e-08, "loss": 0.4717, "step": 3582 }, { "epoch": 4.7906354515050165, "grad_norm": 1.1363210691600059, "learning_rate": 5.426734026579783e-08, "loss": 0.4215, "step": 3583 }, { "epoch": 4.791973244147157, "grad_norm": 1.2098366280406296, "learning_rate": 5.358381143683866e-08, "loss": 0.5115, "step": 3584 }, { "epoch": 4.793311036789298, "grad_norm": 1.0547491270007767, "learning_rate": 5.2904591465516855e-08, "loss": 0.4265, "step": 3585 }, { "epoch": 4.794648829431438, "grad_norm": 0.9334530567103171, "learning_rate": 5.2229680943505225e-08, "loss": 0.4463, "step": 3586 }, { "epoch": 4.7959866220735785, "grad_norm": 1.3853104260049947, "learning_rate": 5.155908045872349e-08, "loss": 0.466, "step": 3587 }, { "epoch": 4.797324414715719, "grad_norm": 0.9039898001427306, "learning_rate": 5.089279059533658e-08, "loss": 0.4151, "step": 3588 }, { "epoch": 4.798662207357859, "grad_norm": 0.8405272594190454, "learning_rate": 5.023081193375357e-08, "loss": 0.4335, "step": 3589 }, { "epoch": 4.8, "grad_norm": 0.878662638785315, "learning_rate": 4.957314505062927e-08, "loss": 0.4482, "step": 3590 }, { "epoch": 4.801337792642141, "grad_norm": 1.1497571609627133, "learning_rate": 4.891979051886153e-08, "loss": 0.4636, "step": 3591 }, { "epoch": 4.802675585284281, "grad_norm": 1.0255746845938178, "learning_rate": 4.827074890759231e-08, "loss": 0.5271, "step": 3592 }, { "epoch": 4.804013377926421, "grad_norm": 1.5340878567277523, "learning_rate": 4.762602078220657e-08, "loss": 0.455, "step": 3593 }, { "epoch": 4.805351170568562, "grad_norm": 1.2270359747178172, "learning_rate": 4.698560670433061e-08, "loss": 0.4282, "step": 3594 }, { "epoch": 4.806688963210703, "grad_norm": 1.4489188771820523, "learning_rate": 4.634950723183429e-08, "loss": 0.5131, "step": 3595 }, { "epoch": 4.8080267558528424, "grad_norm": 0.7808055285993606, "learning_rate": 4.57177229188277e-08, "loss": 0.4888, "step": 3596 }, { "epoch": 4.809364548494983, "grad_norm": 1.5192431820883407, "learning_rate": 4.509025431566283e-08, "loss": 0.4358, "step": 3597 }, { "epoch": 4.810702341137124, "grad_norm": 0.8920876820366279, "learning_rate": 4.446710196893245e-08, "loss": 0.4865, "step": 3598 }, { "epoch": 4.812040133779265, "grad_norm": 1.052034962505248, "learning_rate": 4.384826642146844e-08, "loss": 0.3783, "step": 3599 }, { "epoch": 4.8133779264214045, "grad_norm": 0.9310519253854528, "learning_rate": 4.323374821234294e-08, "loss": 0.3167, "step": 3600 }, { "epoch": 4.814715719063545, "grad_norm": 1.0990022758667224, "learning_rate": 4.262354787686718e-08, "loss": 0.3876, "step": 3601 }, { "epoch": 4.816053511705686, "grad_norm": 0.8922113013842999, "learning_rate": 4.20176659465904e-08, "loss": 0.4396, "step": 3602 }, { "epoch": 4.817391304347826, "grad_norm": 0.9824943923384394, "learning_rate": 4.141610294930043e-08, "loss": 0.5549, "step": 3603 }, { "epoch": 4.8187290969899665, "grad_norm": 1.1295958478318935, "learning_rate": 4.081885940902419e-08, "loss": 0.4116, "step": 3604 }, { "epoch": 4.820066889632107, "grad_norm": 1.384225177995793, "learning_rate": 4.02259358460233e-08, "loss": 0.4343, "step": 3605 }, { "epoch": 4.821404682274247, "grad_norm": 1.0278092872659936, "learning_rate": 3.963733277679904e-08, "loss": 0.4783, "step": 3606 }, { "epoch": 4.822742474916388, "grad_norm": 1.4476129685131829, "learning_rate": 3.905305071408627e-08, "loss": 0.4659, "step": 3607 }, { "epoch": 4.824080267558529, "grad_norm": 1.3251606909265086, "learning_rate": 3.847309016685785e-08, "loss": 0.4806, "step": 3608 }, { "epoch": 4.825418060200668, "grad_norm": 1.4172789465766582, "learning_rate": 3.7897451640321326e-08, "loss": 0.4984, "step": 3609 }, { "epoch": 4.826755852842809, "grad_norm": 0.922753620890225, "learning_rate": 3.732613563591947e-08, "loss": 0.4803, "step": 3610 }, { "epoch": 4.82809364548495, "grad_norm": 1.349676735276013, "learning_rate": 3.675914265132974e-08, "loss": 0.5152, "step": 3611 }, { "epoch": 4.829431438127091, "grad_norm": 1.2762588802171546, "learning_rate": 3.619647318046371e-08, "loss": 0.4848, "step": 3612 }, { "epoch": 4.8307692307692305, "grad_norm": 1.3004937728954225, "learning_rate": 3.563812771346598e-08, "loss": 0.4803, "step": 3613 }, { "epoch": 4.832107023411371, "grad_norm": 0.8630729518339606, "learning_rate": 3.508410673671636e-08, "loss": 0.4475, "step": 3614 }, { "epoch": 4.833444816053512, "grad_norm": 1.019674704458286, "learning_rate": 3.4534410732825485e-08, "loss": 0.4322, "step": 3615 }, { "epoch": 4.834782608695652, "grad_norm": 1.027731886568004, "learning_rate": 3.398904018063809e-08, "loss": 0.5203, "step": 3616 }, { "epoch": 4.8361204013377925, "grad_norm": 1.0468685468586363, "learning_rate": 3.344799555522915e-08, "loss": 0.3929, "step": 3617 }, { "epoch": 4.837458193979933, "grad_norm": 0.8907157651924662, "learning_rate": 3.291127732790722e-08, "loss": 0.4999, "step": 3618 }, { "epoch": 4.838795986622074, "grad_norm": 1.0065768376018165, "learning_rate": 3.2378885966211636e-08, "loss": 0.5187, "step": 3619 }, { "epoch": 4.840133779264214, "grad_norm": 0.9356054151665152, "learning_rate": 3.185082193391143e-08, "loss": 0.5265, "step": 3620 }, { "epoch": 4.841471571906355, "grad_norm": 1.0362536395794364, "learning_rate": 3.1327085691006954e-08, "loss": 0.4459, "step": 3621 }, { "epoch": 4.842809364548495, "grad_norm": 0.774751143857253, "learning_rate": 3.080767769372939e-08, "loss": 0.474, "step": 3622 }, { "epoch": 4.844147157190635, "grad_norm": 0.9403404206942145, "learning_rate": 3.029259839453791e-08, "loss": 0.4636, "step": 3623 }, { "epoch": 4.845484949832776, "grad_norm": 1.0117422193497003, "learning_rate": 2.978184824212138e-08, "loss": 0.4024, "step": 3624 }, { "epoch": 4.846822742474917, "grad_norm": 1.2896493985992399, "learning_rate": 2.927542768139835e-08, "loss": 0.5472, "step": 3625 }, { "epoch": 4.8481605351170565, "grad_norm": 1.682034188263944, "learning_rate": 2.877333715351538e-08, "loss": 0.5225, "step": 3626 }, { "epoch": 4.849498327759197, "grad_norm": 0.7862425350458854, "learning_rate": 2.8275577095846495e-08, "loss": 0.586, "step": 3627 }, { "epoch": 4.850836120401338, "grad_norm": 0.9900127243073041, "learning_rate": 2.7782147941994298e-08, "loss": 0.5582, "step": 3628 }, { "epoch": 4.852173913043478, "grad_norm": 1.4539266240718656, "learning_rate": 2.7293050121788843e-08, "loss": 0.429, "step": 3629 }, { "epoch": 4.8535117056856185, "grad_norm": 1.183033483635926, "learning_rate": 2.6808284061285996e-08, "loss": 0.5465, "step": 3630 }, { "epoch": 4.854849498327759, "grad_norm": 1.1237236767713423, "learning_rate": 2.6327850182769065e-08, "loss": 0.5171, "step": 3631 }, { "epoch": 4.8561872909699, "grad_norm": 1.2267502217978847, "learning_rate": 2.5851748904747176e-08, "loss": 0.5455, "step": 3632 }, { "epoch": 4.85752508361204, "grad_norm": 1.2270885069672077, "learning_rate": 2.5379980641955792e-08, "loss": 0.4738, "step": 3633 }, { "epoch": 4.858862876254181, "grad_norm": 1.1706449190047765, "learning_rate": 2.491254580535507e-08, "loss": 0.5612, "step": 3634 }, { "epoch": 4.860200668896321, "grad_norm": 1.2895868775812396, "learning_rate": 2.4449444802130962e-08, "loss": 0.4262, "step": 3635 }, { "epoch": 4.861538461538462, "grad_norm": 0.9655808973624888, "learning_rate": 2.399067803569466e-08, "loss": 0.4692, "step": 3636 }, { "epoch": 4.862876254180602, "grad_norm": 0.9386940056046742, "learning_rate": 2.3536245905679823e-08, "loss": 0.4889, "step": 3637 }, { "epoch": 4.864214046822743, "grad_norm": 1.0629225433943446, "learning_rate": 2.3086148807946463e-08, "loss": 0.5039, "step": 3638 }, { "epoch": 4.865551839464883, "grad_norm": 1.1370031605353876, "learning_rate": 2.264038713457706e-08, "loss": 0.4428, "step": 3639 }, { "epoch": 4.866889632107023, "grad_norm": 1.0250528714706684, "learning_rate": 2.219896127387766e-08, "loss": 0.5376, "step": 3640 }, { "epoch": 4.868227424749164, "grad_norm": 1.207274963693322, "learning_rate": 2.1761871610376794e-08, "loss": 0.3722, "step": 3641 }, { "epoch": 4.869565217391305, "grad_norm": 0.9588657514624445, "learning_rate": 2.1329118524827662e-08, "loss": 0.4788, "step": 3642 }, { "epoch": 4.8709030100334445, "grad_norm": 1.032562556805596, "learning_rate": 2.0900702394203165e-08, "loss": 0.4036, "step": 3643 }, { "epoch": 4.872240802675585, "grad_norm": 0.8725415424820947, "learning_rate": 2.047662359170033e-08, "loss": 0.5283, "step": 3644 }, { "epoch": 4.873578595317726, "grad_norm": 1.55342249529053, "learning_rate": 2.0056882486736982e-08, "loss": 0.4429, "step": 3645 }, { "epoch": 4.874916387959866, "grad_norm": 0.7963022680151106, "learning_rate": 1.9641479444952317e-08, "loss": 0.5105, "step": 3646 }, { "epoch": 4.8762541806020065, "grad_norm": 2.168495318341705, "learning_rate": 1.9230414828206866e-08, "loss": 0.4862, "step": 3647 }, { "epoch": 4.877591973244147, "grad_norm": 0.9134093697252217, "learning_rate": 1.8823688994582533e-08, "loss": 0.4876, "step": 3648 }, { "epoch": 4.878929765886287, "grad_norm": 0.798506214305895, "learning_rate": 1.842130229838035e-08, "loss": 0.5332, "step": 3649 }, { "epoch": 4.880267558528428, "grad_norm": 1.1019368575599575, "learning_rate": 1.802325509012215e-08, "loss": 0.3876, "step": 3650 }, { "epoch": 4.881605351170569, "grad_norm": 1.5406450594319232, "learning_rate": 1.762954771655001e-08, "loss": 0.4885, "step": 3651 }, { "epoch": 4.882943143812709, "grad_norm": 1.1711670591943966, "learning_rate": 1.724018052062515e-08, "loss": 0.5115, "step": 3652 }, { "epoch": 4.884280936454849, "grad_norm": 0.875451707861931, "learning_rate": 1.6855153841527915e-08, "loss": 0.4348, "step": 3653 }, { "epoch": 4.88561872909699, "grad_norm": 1.1336765228048538, "learning_rate": 1.647446801465724e-08, "loss": 0.4951, "step": 3654 }, { "epoch": 4.886956521739131, "grad_norm": 1.2840720880495524, "learning_rate": 1.60981233716323e-08, "loss": 0.483, "step": 3655 }, { "epoch": 4.888294314381271, "grad_norm": 1.2076824279883571, "learning_rate": 1.5726120240288632e-08, "loss": 0.4329, "step": 3656 }, { "epoch": 4.889632107023411, "grad_norm": 0.9869106894339983, "learning_rate": 1.5358458944680356e-08, "loss": 0.5189, "step": 3657 }, { "epoch": 4.890969899665552, "grad_norm": 0.9694791092425411, "learning_rate": 1.4995139805081272e-08, "loss": 0.5396, "step": 3658 }, { "epoch": 4.892307692307693, "grad_norm": 1.2985150351645747, "learning_rate": 1.4636163137980441e-08, "loss": 0.3861, "step": 3659 }, { "epoch": 4.8936454849498325, "grad_norm": 0.9934214044973675, "learning_rate": 1.4281529256084393e-08, "loss": 0.3459, "step": 3660 }, { "epoch": 4.894983277591973, "grad_norm": 1.15656131733994, "learning_rate": 1.393123846831823e-08, "loss": 0.4804, "step": 3661 }, { "epoch": 4.896321070234114, "grad_norm": 1.0444584318378431, "learning_rate": 1.358529107982176e-08, "loss": 0.4456, "step": 3662 }, { "epoch": 4.897658862876254, "grad_norm": 1.178265850182016, "learning_rate": 1.3243687391952809e-08, "loss": 0.419, "step": 3663 }, { "epoch": 4.898996655518395, "grad_norm": 0.9686179560752256, "learning_rate": 1.2906427702284452e-08, "loss": 0.4908, "step": 3664 }, { "epoch": 4.900334448160535, "grad_norm": 1.501442439180621, "learning_rate": 1.2573512304605574e-08, "loss": 0.5295, "step": 3665 }, { "epoch": 4.901672240802675, "grad_norm": 0.765741938563229, "learning_rate": 1.2244941488921968e-08, "loss": 0.5303, "step": 3666 }, { "epoch": 4.903010033444816, "grad_norm": 1.2597093065923712, "learning_rate": 1.1920715541453576e-08, "loss": 0.3825, "step": 3667 }, { "epoch": 4.904347826086957, "grad_norm": 0.9153545353723309, "learning_rate": 1.160083474463558e-08, "loss": 0.4757, "step": 3668 }, { "epoch": 4.905685618729097, "grad_norm": 1.1440798449022551, "learning_rate": 1.1285299377118974e-08, "loss": 0.4659, "step": 3669 }, { "epoch": 4.907023411371237, "grad_norm": 1.1041534217062356, "learning_rate": 1.0974109713768333e-08, "loss": 0.4905, "step": 3670 }, { "epoch": 4.908361204013378, "grad_norm": 1.1609672822045185, "learning_rate": 1.0667266025662925e-08, "loss": 0.4881, "step": 3671 }, { "epoch": 4.909698996655519, "grad_norm": 1.2290699129764575, "learning_rate": 1.0364768580097273e-08, "loss": 0.3911, "step": 3672 }, { "epoch": 4.9110367892976585, "grad_norm": 1.0376081019161267, "learning_rate": 1.006661764057837e-08, "loss": 0.4829, "step": 3673 }, { "epoch": 4.912374581939799, "grad_norm": 1.0431370256331065, "learning_rate": 9.772813466827347e-09, "loss": 0.5359, "step": 3674 }, { "epoch": 4.91371237458194, "grad_norm": 0.9574686555139993, "learning_rate": 9.48335631477948e-09, "loss": 0.4726, "step": 3675 }, { "epoch": 4.915050167224081, "grad_norm": 0.9143344745015016, "learning_rate": 9.19824643658307e-09, "loss": 0.5289, "step": 3676 }, { "epoch": 4.916387959866221, "grad_norm": 1.6689951367807259, "learning_rate": 8.917484080599448e-09, "loss": 0.4052, "step": 3677 }, { "epoch": 4.917725752508361, "grad_norm": 0.8540681091351705, "learning_rate": 8.641069491401866e-09, "loss": 0.4834, "step": 3678 }, { "epoch": 4.919063545150502, "grad_norm": 1.0940205655978583, "learning_rate": 8.369002909777713e-09, "loss": 0.439, "step": 3679 }, { "epoch": 4.920401337792642, "grad_norm": 1.1466589670312626, "learning_rate": 8.101284572725743e-09, "loss": 0.4629, "step": 3680 }, { "epoch": 4.921739130434783, "grad_norm": 0.8661096450243121, "learning_rate": 7.837914713457184e-09, "loss": 0.5096, "step": 3681 }, { "epoch": 4.923076923076923, "grad_norm": 1.0715755526026904, "learning_rate": 7.57889356139574e-09, "loss": 0.5125, "step": 3682 }, { "epoch": 4.924414715719063, "grad_norm": 1.1580297452376842, "learning_rate": 7.324221342176474e-09, "loss": 0.4928, "step": 3683 }, { "epoch": 4.925752508361204, "grad_norm": 1.1453202779100922, "learning_rate": 7.073898277645819e-09, "loss": 0.5602, "step": 3684 }, { "epoch": 4.927090301003345, "grad_norm": 1.0142211314224912, "learning_rate": 6.8279245858621226e-09, "loss": 0.4426, "step": 3685 }, { "epoch": 4.9284280936454845, "grad_norm": 1.6504396200128244, "learning_rate": 6.586300481095098e-09, "loss": 0.4945, "step": 3686 }, { "epoch": 4.929765886287625, "grad_norm": 1.0101080620684302, "learning_rate": 6.349026173824713e-09, "loss": 0.4383, "step": 3687 }, { "epoch": 4.931103678929766, "grad_norm": 1.1179393117927772, "learning_rate": 6.116101870742297e-09, "loss": 0.4491, "step": 3688 }, { "epoch": 4.932441471571907, "grad_norm": 1.0020234768309715, "learning_rate": 5.88752777474999e-09, "loss": 0.4244, "step": 3689 }, { "epoch": 4.933779264214047, "grad_norm": 1.0801712577982474, "learning_rate": 5.6633040849601865e-09, "loss": 0.4048, "step": 3690 }, { "epoch": 4.935117056856187, "grad_norm": 1.1547860757162107, "learning_rate": 5.443430996695531e-09, "loss": 0.4149, "step": 3691 }, { "epoch": 4.936454849498328, "grad_norm": 1.2557813542687946, "learning_rate": 5.227908701490036e-09, "loss": 0.41, "step": 3692 }, { "epoch": 4.937792642140468, "grad_norm": 0.8962285928780369, "learning_rate": 5.016737387085191e-09, "loss": 0.4638, "step": 3693 }, { "epoch": 4.939130434782609, "grad_norm": 0.97199843603162, "learning_rate": 4.8099172374349576e-09, "loss": 0.5791, "step": 3694 }, { "epoch": 4.940468227424749, "grad_norm": 1.0593609376509765, "learning_rate": 4.607448432701333e-09, "loss": 0.4177, "step": 3695 }, { "epoch": 4.94180602006689, "grad_norm": 0.9534932018698223, "learning_rate": 4.409331149256013e-09, "loss": 0.5054, "step": 3696 }, { "epoch": 4.94314381270903, "grad_norm": 1.6037537984906454, "learning_rate": 4.2155655596809455e-09, "loss": 0.5077, "step": 3697 }, { "epoch": 4.944481605351171, "grad_norm": 0.9229200442809204, "learning_rate": 4.026151832766112e-09, "loss": 0.3966, "step": 3698 }, { "epoch": 4.945819397993311, "grad_norm": 0.7395923180554113, "learning_rate": 3.841090133511749e-09, "loss": 0.5262, "step": 3699 }, { "epoch": 4.947157190635451, "grad_norm": 0.8547871391766757, "learning_rate": 3.66038062312557e-09, "loss": 0.4446, "step": 3700 }, { "epoch": 4.948494983277592, "grad_norm": 1.1886970795206147, "learning_rate": 3.484023459026098e-09, "loss": 0.4951, "step": 3701 }, { "epoch": 4.949832775919733, "grad_norm": 0.915782812529582, "learning_rate": 3.3120187948382233e-09, "loss": 0.4188, "step": 3702 }, { "epoch": 4.9511705685618725, "grad_norm": 1.2428359791804464, "learning_rate": 3.144366780396535e-09, "loss": 0.4604, "step": 3703 }, { "epoch": 4.952508361204013, "grad_norm": 1.2313164438314141, "learning_rate": 2.981067561744211e-09, "loss": 0.5305, "step": 3704 }, { "epoch": 4.953846153846154, "grad_norm": 1.1404765126297232, "learning_rate": 2.8221212811324616e-09, "loss": 0.5397, "step": 3705 }, { "epoch": 4.955183946488294, "grad_norm": 1.194822996264021, "learning_rate": 2.6675280770199763e-09, "loss": 0.4352, "step": 3706 }, { "epoch": 4.956521739130435, "grad_norm": 0.8208434596155946, "learning_rate": 2.5172880840745873e-09, "loss": 0.5, "step": 3707 }, { "epoch": 4.957859531772575, "grad_norm": 1.2481457540462395, "learning_rate": 2.371401433170495e-09, "loss": 0.5399, "step": 3708 }, { "epoch": 4.959197324414716, "grad_norm": 1.1519355902130641, "learning_rate": 2.229868251391598e-09, "loss": 0.4204, "step": 3709 }, { "epoch": 4.960535117056856, "grad_norm": 1.3422136447236213, "learning_rate": 2.0926886620281637e-09, "loss": 0.4332, "step": 3710 }, { "epoch": 4.961872909698997, "grad_norm": 1.4159410369423047, "learning_rate": 1.959862784577937e-09, "loss": 0.401, "step": 3711 }, { "epoch": 4.963210702341137, "grad_norm": 1.0238691074182942, "learning_rate": 1.8313907347466963e-09, "loss": 0.5286, "step": 3712 }, { "epoch": 4.964548494983277, "grad_norm": 0.9969053706673487, "learning_rate": 1.7072726244471427e-09, "loss": 0.4435, "step": 3713 }, { "epoch": 4.965886287625418, "grad_norm": 0.8706477663659908, "learning_rate": 1.5875085617994558e-09, "loss": 0.4999, "step": 3714 }, { "epoch": 4.967224080267559, "grad_norm": 0.9348982275571174, "learning_rate": 1.4720986511312928e-09, "loss": 0.5692, "step": 3715 }, { "epoch": 4.968561872909699, "grad_norm": 0.9996104679332959, "learning_rate": 1.36104299297668e-09, "loss": 0.3941, "step": 3716 }, { "epoch": 4.969899665551839, "grad_norm": 0.9745573201005439, "learning_rate": 1.2543416840771206e-09, "loss": 0.5074, "step": 3717 }, { "epoch": 4.97123745819398, "grad_norm": 0.6843089928704155, "learning_rate": 1.1519948173810414e-09, "loss": 0.4244, "step": 3718 }, { "epoch": 4.972575250836121, "grad_norm": 1.1321396131211807, "learning_rate": 1.054002482043237e-09, "loss": 0.6065, "step": 3719 }, { "epoch": 4.973913043478261, "grad_norm": 1.0788783084192568, "learning_rate": 9.603647634259806e-10, "loss": 0.5072, "step": 3720 }, { "epoch": 4.975250836120401, "grad_norm": 1.1438203837825573, "learning_rate": 8.710817430968021e-10, "loss": 0.4796, "step": 3721 }, { "epoch": 4.976588628762542, "grad_norm": 1.792711126595091, "learning_rate": 7.861534988312658e-10, "loss": 0.4176, "step": 3722 }, { "epoch": 4.977926421404682, "grad_norm": 0.8882560343151729, "learning_rate": 7.055801046113031e-10, "loss": 0.5738, "step": 3723 }, { "epoch": 4.979264214046823, "grad_norm": 1.013225927282279, "learning_rate": 6.293616306246586e-10, "loss": 0.4675, "step": 3724 }, { "epoch": 4.980602006688963, "grad_norm": 1.1703433999290014, "learning_rate": 5.574981432659998e-10, "loss": 0.5099, "step": 3725 }, { "epoch": 4.981939799331103, "grad_norm": 0.8246675552115681, "learning_rate": 4.899897051358071e-10, "loss": 0.35, "step": 3726 }, { "epoch": 4.983277591973244, "grad_norm": 0.9752082181290284, "learning_rate": 4.2683637504092877e-10, "loss": 0.5044, "step": 3727 }, { "epoch": 4.984615384615385, "grad_norm": 1.4881373216631975, "learning_rate": 3.6803820799513613e-10, "loss": 0.3697, "step": 3728 }, { "epoch": 4.985953177257525, "grad_norm": 1.5813109181475977, "learning_rate": 3.1359525521801326e-10, "loss": 0.4265, "step": 3729 }, { "epoch": 4.987290969899665, "grad_norm": 0.8356544780545926, "learning_rate": 2.6350756413440203e-10, "loss": 0.4633, "step": 3730 }, { "epoch": 4.988628762541806, "grad_norm": 0.8454497464342929, "learning_rate": 2.1777517837717755e-10, "loss": 0.5305, "step": 3731 }, { "epoch": 4.989966555183947, "grad_norm": 1.265235840067891, "learning_rate": 1.7639813778336233e-10, "loss": 0.4733, "step": 3732 }, { "epoch": 4.9913043478260875, "grad_norm": 1.1472448667068613, "learning_rate": 1.3937647839690205e-10, "loss": 0.4482, "step": 3733 }, { "epoch": 4.992642140468227, "grad_norm": 1.3662482835058298, "learning_rate": 1.0671023246755507e-10, "loss": 0.442, "step": 3734 }, { "epoch": 4.993979933110368, "grad_norm": 1.3911267847013122, "learning_rate": 7.839942845144777e-11, "loss": 0.598, "step": 3735 }, { "epoch": 4.995317725752509, "grad_norm": 0.9390041805200215, "learning_rate": 5.444409100996417e-11, "loss": 0.4807, "step": 3736 }, { "epoch": 4.996655518394649, "grad_norm": 1.0665672393149743, "learning_rate": 3.4844241011411375e-11, "loss": 0.5382, "step": 3737 }, { "epoch": 4.997993311036789, "grad_norm": 0.9094266224950984, "learning_rate": 1.959989552824393e-11, "loss": 0.5051, "step": 3738 }, { "epoch": 4.99933110367893, "grad_norm": 0.8979030217848702, "learning_rate": 8.711067840949661e-12, "loss": 0.5539, "step": 3739 }, { "epoch": 5.0, "grad_norm": 0.8979030217848702, "learning_rate": 2.1777674347189805e-12, "loss": 0.4355, "step": 3740 }, { "epoch": 5.0, "step": 3740, "total_flos": 352949920137216.0, "train_loss": 0.6102628104747297, "train_runtime": 74998.1208, "train_samples_per_second": 0.399, "train_steps_per_second": 0.05 } ], "logging_steps": 1, "max_steps": 3740, "num_input_tokens_seen": 0, "num_train_epochs": 5, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 352949920137216.0, "train_batch_size": 1, "trial_name": null, "trial_params": null }