{ "best_metric": null, "best_model_checkpoint": null, "epoch": 2.9983300862788758, "eval_steps": 500, "global_step": 3591, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0008349568605622043, "grad_norm": 6.048336505889893, "learning_rate": 2.777777777777778e-08, "loss": 0.8898, "step": 1 }, { "epoch": 0.0016699137211244085, "grad_norm": 5.939916610717773, "learning_rate": 5.555555555555556e-08, "loss": 0.8651, "step": 2 }, { "epoch": 0.0025048705816866127, "grad_norm": 5.965843200683594, "learning_rate": 8.333333333333334e-08, "loss": 0.8716, "step": 3 }, { "epoch": 0.003339827442248817, "grad_norm": 5.992764949798584, "learning_rate": 1.1111111111111112e-07, "loss": 0.9094, "step": 4 }, { "epoch": 0.004174784302811021, "grad_norm": 6.084325313568115, "learning_rate": 1.3888888888888888e-07, "loss": 0.8475, "step": 5 }, { "epoch": 0.005009741163373225, "grad_norm": 5.956256866455078, "learning_rate": 1.6666666666666668e-07, "loss": 0.8713, "step": 6 }, { "epoch": 0.00584469802393543, "grad_norm": 5.941786766052246, "learning_rate": 1.9444444444444447e-07, "loss": 0.8593, "step": 7 }, { "epoch": 0.006679654884497634, "grad_norm": 5.776935577392578, "learning_rate": 2.2222222222222224e-07, "loss": 0.8537, "step": 8 }, { "epoch": 0.0075146117450598385, "grad_norm": 6.158945560455322, "learning_rate": 2.5000000000000004e-07, "loss": 0.882, "step": 9 }, { "epoch": 0.008349568605622042, "grad_norm": 5.651492118835449, "learning_rate": 2.7777777777777776e-07, "loss": 0.8395, "step": 10 }, { "epoch": 0.009184525466184246, "grad_norm": 5.715651988983154, "learning_rate": 3.055555555555556e-07, "loss": 0.8735, "step": 11 }, { "epoch": 0.01001948232674645, "grad_norm": 5.7973785400390625, "learning_rate": 3.3333333333333335e-07, "loss": 0.8717, "step": 12 }, { "epoch": 0.010854439187308655, "grad_norm": 5.795179843902588, "learning_rate": 3.611111111111111e-07, "loss": 0.8531, "step": 13 }, { "epoch": 0.01168939604787086, "grad_norm": 5.316883563995361, "learning_rate": 3.8888888888888895e-07, "loss": 0.8408, "step": 14 }, { "epoch": 0.012524352908433064, "grad_norm": 5.346518039703369, "learning_rate": 4.1666666666666667e-07, "loss": 0.849, "step": 15 }, { "epoch": 0.013359309768995268, "grad_norm": 5.274306297302246, "learning_rate": 4.444444444444445e-07, "loss": 0.8582, "step": 16 }, { "epoch": 0.014194266629557473, "grad_norm": 5.312873840332031, "learning_rate": 4.7222222222222226e-07, "loss": 0.8468, "step": 17 }, { "epoch": 0.015029223490119677, "grad_norm": 5.305679798126221, "learning_rate": 5.000000000000001e-07, "loss": 0.8635, "step": 18 }, { "epoch": 0.01586418035068188, "grad_norm": 4.49119758605957, "learning_rate": 5.277777777777779e-07, "loss": 0.836, "step": 19 }, { "epoch": 0.016699137211244084, "grad_norm": 4.378904819488525, "learning_rate": 5.555555555555555e-07, "loss": 0.8028, "step": 20 }, { "epoch": 0.01753409407180629, "grad_norm": 4.3371381759643555, "learning_rate": 5.833333333333334e-07, "loss": 0.8207, "step": 21 }, { "epoch": 0.018369050932368493, "grad_norm": 4.2040863037109375, "learning_rate": 6.111111111111112e-07, "loss": 0.8185, "step": 22 }, { "epoch": 0.0192040077929307, "grad_norm": 4.220301628112793, "learning_rate": 6.388888888888889e-07, "loss": 0.8523, "step": 23 }, { "epoch": 0.0200389646534929, "grad_norm": 4.0004496574401855, "learning_rate": 6.666666666666667e-07, "loss": 0.7966, "step": 24 }, { "epoch": 0.020873921514055108, "grad_norm": 3.1486830711364746, "learning_rate": 6.944444444444446e-07, "loss": 0.7497, "step": 25 }, { "epoch": 0.02170887837461731, "grad_norm": 2.354543447494507, "learning_rate": 7.222222222222222e-07, "loss": 0.7664, "step": 26 }, { "epoch": 0.022543835235179516, "grad_norm": 2.284486770629883, "learning_rate": 7.5e-07, "loss": 0.7819, "step": 27 }, { "epoch": 0.02337879209574172, "grad_norm": 2.245793342590332, "learning_rate": 7.777777777777779e-07, "loss": 0.7603, "step": 28 }, { "epoch": 0.024213748956303925, "grad_norm": 2.081785202026367, "learning_rate": 8.055555555555557e-07, "loss": 0.7664, "step": 29 }, { "epoch": 0.025048705816866128, "grad_norm": 2.0900156497955322, "learning_rate": 8.333333333333333e-07, "loss": 0.7951, "step": 30 }, { "epoch": 0.025883662677428334, "grad_norm": 2.0226805210113525, "learning_rate": 8.611111111111112e-07, "loss": 0.7493, "step": 31 }, { "epoch": 0.026718619537990537, "grad_norm": 1.8227089643478394, "learning_rate": 8.88888888888889e-07, "loss": 0.7003, "step": 32 }, { "epoch": 0.027553576398552743, "grad_norm": 1.7422497272491455, "learning_rate": 9.166666666666666e-07, "loss": 0.7343, "step": 33 }, { "epoch": 0.028388533259114945, "grad_norm": 1.436092495918274, "learning_rate": 9.444444444444445e-07, "loss": 0.7549, "step": 34 }, { "epoch": 0.02922349011967715, "grad_norm": 1.599900484085083, "learning_rate": 9.722222222222224e-07, "loss": 0.7468, "step": 35 }, { "epoch": 0.030058446980239354, "grad_norm": 2.012633800506592, "learning_rate": 1.0000000000000002e-06, "loss": 0.7165, "step": 36 }, { "epoch": 0.03089340384080156, "grad_norm": 2.3142333030700684, "learning_rate": 1.0277777777777777e-06, "loss": 0.7498, "step": 37 }, { "epoch": 0.03172836070136376, "grad_norm": 2.2568912506103516, "learning_rate": 1.0555555555555557e-06, "loss": 0.7046, "step": 38 }, { "epoch": 0.032563317561925965, "grad_norm": 2.077139377593994, "learning_rate": 1.0833333333333335e-06, "loss": 0.6983, "step": 39 }, { "epoch": 0.03339827442248817, "grad_norm": 2.059664249420166, "learning_rate": 1.111111111111111e-06, "loss": 0.7215, "step": 40 }, { "epoch": 0.03423323128305038, "grad_norm": 1.7747143507003784, "learning_rate": 1.138888888888889e-06, "loss": 0.6838, "step": 41 }, { "epoch": 0.03506818814361258, "grad_norm": 1.6833782196044922, "learning_rate": 1.1666666666666668e-06, "loss": 0.6981, "step": 42 }, { "epoch": 0.03590314500417478, "grad_norm": 1.425864577293396, "learning_rate": 1.1944444444444446e-06, "loss": 0.6898, "step": 43 }, { "epoch": 0.036738101864736986, "grad_norm": 1.238877296447754, "learning_rate": 1.2222222222222223e-06, "loss": 0.6851, "step": 44 }, { "epoch": 0.037573058725299195, "grad_norm": 0.9459758996963501, "learning_rate": 1.25e-06, "loss": 0.68, "step": 45 }, { "epoch": 0.0384080155858614, "grad_norm": 0.9148854613304138, "learning_rate": 1.2777777777777779e-06, "loss": 0.6803, "step": 46 }, { "epoch": 0.0392429724464236, "grad_norm": 0.8972862362861633, "learning_rate": 1.3055555555555556e-06, "loss": 0.6676, "step": 47 }, { "epoch": 0.0400779293069858, "grad_norm": 1.0838708877563477, "learning_rate": 1.3333333333333334e-06, "loss": 0.6771, "step": 48 }, { "epoch": 0.04091288616754801, "grad_norm": 0.9867311716079712, "learning_rate": 1.3611111111111112e-06, "loss": 0.6827, "step": 49 }, { "epoch": 0.041747843028110215, "grad_norm": 1.0661039352416992, "learning_rate": 1.3888888888888892e-06, "loss": 0.6648, "step": 50 }, { "epoch": 0.04258279988867242, "grad_norm": 0.8806614875793457, "learning_rate": 1.4166666666666667e-06, "loss": 0.6723, "step": 51 }, { "epoch": 0.04341775674923462, "grad_norm": 0.774612307548523, "learning_rate": 1.4444444444444445e-06, "loss": 0.6216, "step": 52 }, { "epoch": 0.04425271360979683, "grad_norm": 0.7356958985328674, "learning_rate": 1.4722222222222225e-06, "loss": 0.6364, "step": 53 }, { "epoch": 0.04508767047035903, "grad_norm": 0.716899037361145, "learning_rate": 1.5e-06, "loss": 0.6302, "step": 54 }, { "epoch": 0.045922627330921235, "grad_norm": 0.8069896697998047, "learning_rate": 1.527777777777778e-06, "loss": 0.6598, "step": 55 }, { "epoch": 0.04675758419148344, "grad_norm": 0.8955972194671631, "learning_rate": 1.5555555555555558e-06, "loss": 0.6691, "step": 56 }, { "epoch": 0.04759254105204565, "grad_norm": 0.6668679714202881, "learning_rate": 1.5833333333333333e-06, "loss": 0.6441, "step": 57 }, { "epoch": 0.04842749791260785, "grad_norm": 0.6103956699371338, "learning_rate": 1.6111111111111113e-06, "loss": 0.6397, "step": 58 }, { "epoch": 0.04926245477317005, "grad_norm": 0.6347241401672363, "learning_rate": 1.638888888888889e-06, "loss": 0.661, "step": 59 }, { "epoch": 0.050097411633732256, "grad_norm": 0.6119781136512756, "learning_rate": 1.6666666666666667e-06, "loss": 0.6139, "step": 60 }, { "epoch": 0.05093236849429446, "grad_norm": 0.552765965461731, "learning_rate": 1.6944444444444446e-06, "loss": 0.5947, "step": 61 }, { "epoch": 0.05176732535485667, "grad_norm": 0.6343043446540833, "learning_rate": 1.7222222222222224e-06, "loss": 0.6102, "step": 62 }, { "epoch": 0.05260228221541887, "grad_norm": 0.5587901473045349, "learning_rate": 1.75e-06, "loss": 0.6128, "step": 63 }, { "epoch": 0.05343723907598107, "grad_norm": 0.5557250380516052, "learning_rate": 1.777777777777778e-06, "loss": 0.6123, "step": 64 }, { "epoch": 0.054272195936543276, "grad_norm": 0.5134753584861755, "learning_rate": 1.8055555555555557e-06, "loss": 0.6323, "step": 65 }, { "epoch": 0.055107152797105485, "grad_norm": 0.5023328065872192, "learning_rate": 1.8333333333333333e-06, "loss": 0.6054, "step": 66 }, { "epoch": 0.05594210965766769, "grad_norm": 0.4781493842601776, "learning_rate": 1.8611111111111113e-06, "loss": 0.6169, "step": 67 }, { "epoch": 0.05677706651822989, "grad_norm": 0.46679824590682983, "learning_rate": 1.888888888888889e-06, "loss": 0.6007, "step": 68 }, { "epoch": 0.05761202337879209, "grad_norm": 0.46856430172920227, "learning_rate": 1.916666666666667e-06, "loss": 0.6251, "step": 69 }, { "epoch": 0.0584469802393543, "grad_norm": 0.45072585344314575, "learning_rate": 1.944444444444445e-06, "loss": 0.6034, "step": 70 }, { "epoch": 0.059281937099916505, "grad_norm": 0.4675133228302002, "learning_rate": 1.9722222222222224e-06, "loss": 0.596, "step": 71 }, { "epoch": 0.06011689396047871, "grad_norm": 0.44951313734054565, "learning_rate": 2.0000000000000003e-06, "loss": 0.5998, "step": 72 }, { "epoch": 0.06095185082104091, "grad_norm": 0.4363986551761627, "learning_rate": 2.027777777777778e-06, "loss": 0.6083, "step": 73 }, { "epoch": 0.06178680768160312, "grad_norm": 0.4347071349620819, "learning_rate": 2.0555555555555555e-06, "loss": 0.6137, "step": 74 }, { "epoch": 0.06262176454216532, "grad_norm": 0.4121546149253845, "learning_rate": 2.0833333333333334e-06, "loss": 0.5816, "step": 75 }, { "epoch": 0.06345672140272753, "grad_norm": 0.4275442957878113, "learning_rate": 2.1111111111111114e-06, "loss": 0.5687, "step": 76 }, { "epoch": 0.06429167826328973, "grad_norm": 0.4596114754676819, "learning_rate": 2.138888888888889e-06, "loss": 0.6325, "step": 77 }, { "epoch": 0.06512663512385193, "grad_norm": 0.4543353021144867, "learning_rate": 2.166666666666667e-06, "loss": 0.6093, "step": 78 }, { "epoch": 0.06596159198441413, "grad_norm": 0.4401535093784332, "learning_rate": 2.1944444444444445e-06, "loss": 0.6381, "step": 79 }, { "epoch": 0.06679654884497634, "grad_norm": 0.49544909596443176, "learning_rate": 2.222222222222222e-06, "loss": 0.5563, "step": 80 }, { "epoch": 0.06763150570553855, "grad_norm": 0.4526262879371643, "learning_rate": 2.25e-06, "loss": 0.5732, "step": 81 }, { "epoch": 0.06846646256610076, "grad_norm": 0.4491852819919586, "learning_rate": 2.277777777777778e-06, "loss": 0.5756, "step": 82 }, { "epoch": 0.06930141942666296, "grad_norm": 0.5036222338676453, "learning_rate": 2.305555555555556e-06, "loss": 0.5952, "step": 83 }, { "epoch": 0.07013637628722516, "grad_norm": 0.4521656632423401, "learning_rate": 2.3333333333333336e-06, "loss": 0.5644, "step": 84 }, { "epoch": 0.07097133314778736, "grad_norm": 0.4592026472091675, "learning_rate": 2.361111111111111e-06, "loss": 0.5734, "step": 85 }, { "epoch": 0.07180629000834957, "grad_norm": 0.4048601984977722, "learning_rate": 2.388888888888889e-06, "loss": 0.5923, "step": 86 }, { "epoch": 0.07264124686891177, "grad_norm": 0.42403921484947205, "learning_rate": 2.4166666666666667e-06, "loss": 0.571, "step": 87 }, { "epoch": 0.07347620372947397, "grad_norm": 0.44150951504707336, "learning_rate": 2.4444444444444447e-06, "loss": 0.5971, "step": 88 }, { "epoch": 0.07431116059003619, "grad_norm": 0.3851671516895294, "learning_rate": 2.4722222222222226e-06, "loss": 0.5697, "step": 89 }, { "epoch": 0.07514611745059839, "grad_norm": 0.4117358922958374, "learning_rate": 2.5e-06, "loss": 0.5675, "step": 90 }, { "epoch": 0.07598107431116059, "grad_norm": 0.4395599067211151, "learning_rate": 2.5277777777777778e-06, "loss": 0.5804, "step": 91 }, { "epoch": 0.0768160311717228, "grad_norm": 0.4465152621269226, "learning_rate": 2.5555555555555557e-06, "loss": 0.5988, "step": 92 }, { "epoch": 0.077650988032285, "grad_norm": 0.4439001679420471, "learning_rate": 2.5833333333333337e-06, "loss": 0.6168, "step": 93 }, { "epoch": 0.0784859448928472, "grad_norm": 0.483646959066391, "learning_rate": 2.6111111111111113e-06, "loss": 0.5617, "step": 94 }, { "epoch": 0.0793209017534094, "grad_norm": 0.45608407258987427, "learning_rate": 2.6388888888888893e-06, "loss": 0.5969, "step": 95 }, { "epoch": 0.0801558586139716, "grad_norm": 0.39999932050704956, "learning_rate": 2.666666666666667e-06, "loss": 0.5551, "step": 96 }, { "epoch": 0.08099081547453382, "grad_norm": 0.4671604037284851, "learning_rate": 2.6944444444444444e-06, "loss": 0.5569, "step": 97 }, { "epoch": 0.08182577233509603, "grad_norm": 0.5091117024421692, "learning_rate": 2.7222222222222224e-06, "loss": 0.5331, "step": 98 }, { "epoch": 0.08266072919565823, "grad_norm": 0.4887247681617737, "learning_rate": 2.7500000000000004e-06, "loss": 0.5547, "step": 99 }, { "epoch": 0.08349568605622043, "grad_norm": 0.4014512002468109, "learning_rate": 2.7777777777777783e-06, "loss": 0.5436, "step": 100 }, { "epoch": 0.08433064291678263, "grad_norm": 0.4114540219306946, "learning_rate": 2.805555555555556e-06, "loss": 0.5871, "step": 101 }, { "epoch": 0.08516559977734484, "grad_norm": 0.4427196681499481, "learning_rate": 2.8333333333333335e-06, "loss": 0.5606, "step": 102 }, { "epoch": 0.08600055663790704, "grad_norm": 0.4531497061252594, "learning_rate": 2.861111111111111e-06, "loss": 0.5793, "step": 103 }, { "epoch": 0.08683551349846924, "grad_norm": 0.43082141876220703, "learning_rate": 2.888888888888889e-06, "loss": 0.5514, "step": 104 }, { "epoch": 0.08767047035903144, "grad_norm": 0.4344906806945801, "learning_rate": 2.916666666666667e-06, "loss": 0.5694, "step": 105 }, { "epoch": 0.08850542721959366, "grad_norm": 0.484298974275589, "learning_rate": 2.944444444444445e-06, "loss": 0.5256, "step": 106 }, { "epoch": 0.08934038408015586, "grad_norm": 0.39441534876823425, "learning_rate": 2.9722222222222225e-06, "loss": 0.5725, "step": 107 }, { "epoch": 0.09017534094071807, "grad_norm": 0.4050918221473694, "learning_rate": 3e-06, "loss": 0.5452, "step": 108 }, { "epoch": 0.09101029780128027, "grad_norm": 0.4048078954219818, "learning_rate": 3.0277777777777776e-06, "loss": 0.541, "step": 109 }, { "epoch": 0.09184525466184247, "grad_norm": 0.4126303493976593, "learning_rate": 3.055555555555556e-06, "loss": 0.5643, "step": 110 }, { "epoch": 0.09268021152240467, "grad_norm": 0.4516768753528595, "learning_rate": 3.0833333333333336e-06, "loss": 0.5451, "step": 111 }, { "epoch": 0.09351516838296688, "grad_norm": 0.3806609511375427, "learning_rate": 3.1111111111111116e-06, "loss": 0.552, "step": 112 }, { "epoch": 0.09435012524352908, "grad_norm": 0.4316946566104889, "learning_rate": 3.138888888888889e-06, "loss": 0.5328, "step": 113 }, { "epoch": 0.0951850821040913, "grad_norm": 0.41516250371932983, "learning_rate": 3.1666666666666667e-06, "loss": 0.5504, "step": 114 }, { "epoch": 0.0960200389646535, "grad_norm": 0.4454858601093292, "learning_rate": 3.1944444444444443e-06, "loss": 0.5526, "step": 115 }, { "epoch": 0.0968549958252157, "grad_norm": 0.3927322328090668, "learning_rate": 3.2222222222222227e-06, "loss": 0.5535, "step": 116 }, { "epoch": 0.0976899526857779, "grad_norm": 0.44316431879997253, "learning_rate": 3.2500000000000002e-06, "loss": 0.5242, "step": 117 }, { "epoch": 0.0985249095463401, "grad_norm": 0.41941332817077637, "learning_rate": 3.277777777777778e-06, "loss": 0.5398, "step": 118 }, { "epoch": 0.09935986640690231, "grad_norm": 0.37856918573379517, "learning_rate": 3.3055555555555558e-06, "loss": 0.53, "step": 119 }, { "epoch": 0.10019482326746451, "grad_norm": 0.41399744153022766, "learning_rate": 3.3333333333333333e-06, "loss": 0.5782, "step": 120 }, { "epoch": 0.10102978012802671, "grad_norm": 0.38598787784576416, "learning_rate": 3.3611111111111117e-06, "loss": 0.5673, "step": 121 }, { "epoch": 0.10186473698858892, "grad_norm": 0.47129160165786743, "learning_rate": 3.3888888888888893e-06, "loss": 0.5673, "step": 122 }, { "epoch": 0.10269969384915113, "grad_norm": 0.38395658135414124, "learning_rate": 3.416666666666667e-06, "loss": 0.5376, "step": 123 }, { "epoch": 0.10353465070971334, "grad_norm": 0.4213396906852722, "learning_rate": 3.444444444444445e-06, "loss": 0.5758, "step": 124 }, { "epoch": 0.10436960757027554, "grad_norm": 0.3776800036430359, "learning_rate": 3.4722222222222224e-06, "loss": 0.5764, "step": 125 }, { "epoch": 0.10520456443083774, "grad_norm": 0.39729151129722595, "learning_rate": 3.5e-06, "loss": 0.5126, "step": 126 }, { "epoch": 0.10603952129139994, "grad_norm": 0.3930918872356415, "learning_rate": 3.5277777777777784e-06, "loss": 0.5617, "step": 127 }, { "epoch": 0.10687447815196215, "grad_norm": 0.4167371988296509, "learning_rate": 3.555555555555556e-06, "loss": 0.5612, "step": 128 }, { "epoch": 0.10770943501252435, "grad_norm": 0.4182722866535187, "learning_rate": 3.5833333333333335e-06, "loss": 0.5706, "step": 129 }, { "epoch": 0.10854439187308655, "grad_norm": 0.4178430736064911, "learning_rate": 3.6111111111111115e-06, "loss": 0.5628, "step": 130 }, { "epoch": 0.10937934873364877, "grad_norm": 0.3785693645477295, "learning_rate": 3.638888888888889e-06, "loss": 0.5378, "step": 131 }, { "epoch": 0.11021430559421097, "grad_norm": 0.4298681318759918, "learning_rate": 3.6666666666666666e-06, "loss": 0.5368, "step": 132 }, { "epoch": 0.11104926245477317, "grad_norm": 0.4162171483039856, "learning_rate": 3.694444444444445e-06, "loss": 0.5603, "step": 133 }, { "epoch": 0.11188421931533538, "grad_norm": 0.42918750643730164, "learning_rate": 3.7222222222222225e-06, "loss": 0.5559, "step": 134 }, { "epoch": 0.11271917617589758, "grad_norm": 0.3955279588699341, "learning_rate": 3.7500000000000005e-06, "loss": 0.5544, "step": 135 }, { "epoch": 0.11355413303645978, "grad_norm": 0.4331086277961731, "learning_rate": 3.777777777777778e-06, "loss": 0.5413, "step": 136 }, { "epoch": 0.11438908989702198, "grad_norm": 0.3958194851875305, "learning_rate": 3.8055555555555556e-06, "loss": 0.5561, "step": 137 }, { "epoch": 0.11522404675758419, "grad_norm": 0.41914674639701843, "learning_rate": 3.833333333333334e-06, "loss": 0.5615, "step": 138 }, { "epoch": 0.11605900361814639, "grad_norm": 0.44733357429504395, "learning_rate": 3.861111111111112e-06, "loss": 0.5485, "step": 139 }, { "epoch": 0.1168939604787086, "grad_norm": 0.4353850781917572, "learning_rate": 3.88888888888889e-06, "loss": 0.5584, "step": 140 }, { "epoch": 0.11772891733927081, "grad_norm": 0.47591444849967957, "learning_rate": 3.916666666666667e-06, "loss": 0.5445, "step": 141 }, { "epoch": 0.11856387419983301, "grad_norm": 0.4129173755645752, "learning_rate": 3.944444444444445e-06, "loss": 0.5048, "step": 142 }, { "epoch": 0.11939883106039521, "grad_norm": 0.5143245458602905, "learning_rate": 3.972222222222223e-06, "loss": 0.5517, "step": 143 }, { "epoch": 0.12023378792095742, "grad_norm": 0.45406827330589294, "learning_rate": 4.000000000000001e-06, "loss": 0.5082, "step": 144 }, { "epoch": 0.12106874478151962, "grad_norm": 0.49156394600868225, "learning_rate": 4.027777777777779e-06, "loss": 0.5578, "step": 145 }, { "epoch": 0.12190370164208182, "grad_norm": 0.46687865257263184, "learning_rate": 4.055555555555556e-06, "loss": 0.5569, "step": 146 }, { "epoch": 0.12273865850264402, "grad_norm": 0.4573095440864563, "learning_rate": 4.083333333333334e-06, "loss": 0.5365, "step": 147 }, { "epoch": 0.12357361536320624, "grad_norm": 0.43507668375968933, "learning_rate": 4.111111111111111e-06, "loss": 0.5352, "step": 148 }, { "epoch": 0.12440857222376844, "grad_norm": 0.4185396432876587, "learning_rate": 4.138888888888889e-06, "loss": 0.5461, "step": 149 }, { "epoch": 0.12524352908433065, "grad_norm": 0.44131702184677124, "learning_rate": 4.166666666666667e-06, "loss": 0.5508, "step": 150 }, { "epoch": 0.12607848594489285, "grad_norm": 0.43332409858703613, "learning_rate": 4.194444444444445e-06, "loss": 0.5322, "step": 151 }, { "epoch": 0.12691344280545505, "grad_norm": 0.37552493810653687, "learning_rate": 4.222222222222223e-06, "loss": 0.4829, "step": 152 }, { "epoch": 0.12774839966601725, "grad_norm": 0.46336209774017334, "learning_rate": 4.25e-06, "loss": 0.5506, "step": 153 }, { "epoch": 0.12858335652657946, "grad_norm": 0.4451075792312622, "learning_rate": 4.277777777777778e-06, "loss": 0.5484, "step": 154 }, { "epoch": 0.12941831338714166, "grad_norm": 0.5538905262947083, "learning_rate": 4.305555555555556e-06, "loss": 0.5523, "step": 155 }, { "epoch": 0.13025327024770386, "grad_norm": 0.47765395045280457, "learning_rate": 4.333333333333334e-06, "loss": 0.5205, "step": 156 }, { "epoch": 0.13108822710826606, "grad_norm": 0.40014973282814026, "learning_rate": 4.361111111111112e-06, "loss": 0.5239, "step": 157 }, { "epoch": 0.13192318396882827, "grad_norm": 0.4720593988895416, "learning_rate": 4.388888888888889e-06, "loss": 0.5745, "step": 158 }, { "epoch": 0.13275814082939047, "grad_norm": 0.43209192156791687, "learning_rate": 4.416666666666667e-06, "loss": 0.5411, "step": 159 }, { "epoch": 0.13359309768995267, "grad_norm": 0.4444791376590729, "learning_rate": 4.444444444444444e-06, "loss": 0.5358, "step": 160 }, { "epoch": 0.1344280545505149, "grad_norm": 0.40642106533050537, "learning_rate": 4.472222222222223e-06, "loss": 0.5299, "step": 161 }, { "epoch": 0.1352630114110771, "grad_norm": 0.4458726942539215, "learning_rate": 4.5e-06, "loss": 0.5175, "step": 162 }, { "epoch": 0.1360979682716393, "grad_norm": 0.41670772433280945, "learning_rate": 4.527777777777778e-06, "loss": 0.5373, "step": 163 }, { "epoch": 0.1369329251322015, "grad_norm": 0.39907047152519226, "learning_rate": 4.555555555555556e-06, "loss": 0.5526, "step": 164 }, { "epoch": 0.1377678819927637, "grad_norm": 0.43985533714294434, "learning_rate": 4.583333333333333e-06, "loss": 0.5233, "step": 165 }, { "epoch": 0.13860283885332592, "grad_norm": 0.4676657021045685, "learning_rate": 4.611111111111112e-06, "loss": 0.5403, "step": 166 }, { "epoch": 0.13943779571388812, "grad_norm": 0.43863973021507263, "learning_rate": 4.638888888888889e-06, "loss": 0.584, "step": 167 }, { "epoch": 0.14027275257445032, "grad_norm": 0.4357122480869293, "learning_rate": 4.666666666666667e-06, "loss": 0.5071, "step": 168 }, { "epoch": 0.14110770943501252, "grad_norm": 0.43971046805381775, "learning_rate": 4.694444444444445e-06, "loss": 0.4807, "step": 169 }, { "epoch": 0.14194266629557473, "grad_norm": 0.42740535736083984, "learning_rate": 4.722222222222222e-06, "loss": 0.5189, "step": 170 }, { "epoch": 0.14277762315613693, "grad_norm": 0.44121846556663513, "learning_rate": 4.75e-06, "loss": 0.5217, "step": 171 }, { "epoch": 0.14361258001669913, "grad_norm": 0.5266684293746948, "learning_rate": 4.777777777777778e-06, "loss": 0.5478, "step": 172 }, { "epoch": 0.14444753687726133, "grad_norm": 0.3933464288711548, "learning_rate": 4.805555555555556e-06, "loss": 0.5379, "step": 173 }, { "epoch": 0.14528249373782354, "grad_norm": 0.41686132550239563, "learning_rate": 4.833333333333333e-06, "loss": 0.5424, "step": 174 }, { "epoch": 0.14611745059838574, "grad_norm": 0.44143441319465637, "learning_rate": 4.861111111111111e-06, "loss": 0.5431, "step": 175 }, { "epoch": 0.14695240745894794, "grad_norm": 0.39843302965164185, "learning_rate": 4.888888888888889e-06, "loss": 0.5492, "step": 176 }, { "epoch": 0.14778736431951014, "grad_norm": 0.42543309926986694, "learning_rate": 4.9166666666666665e-06, "loss": 0.5331, "step": 177 }, { "epoch": 0.14862232118007238, "grad_norm": 0.45944976806640625, "learning_rate": 4.944444444444445e-06, "loss": 0.5188, "step": 178 }, { "epoch": 0.14945727804063458, "grad_norm": 0.41110265254974365, "learning_rate": 4.9722222222222224e-06, "loss": 0.5304, "step": 179 }, { "epoch": 0.15029223490119678, "grad_norm": 0.460624098777771, "learning_rate": 5e-06, "loss": 0.546, "step": 180 }, { "epoch": 0.15112719176175898, "grad_norm": 0.45279353857040405, "learning_rate": 5.027777777777778e-06, "loss": 0.5042, "step": 181 }, { "epoch": 0.15196214862232119, "grad_norm": 0.4067543148994446, "learning_rate": 5.0555555555555555e-06, "loss": 0.5405, "step": 182 }, { "epoch": 0.1527971054828834, "grad_norm": 0.45868903398513794, "learning_rate": 5.0833333333333335e-06, "loss": 0.4941, "step": 183 }, { "epoch": 0.1536320623434456, "grad_norm": 0.48420512676239014, "learning_rate": 5.1111111111111115e-06, "loss": 0.5223, "step": 184 }, { "epoch": 0.1544670192040078, "grad_norm": 0.40509432554244995, "learning_rate": 5.138888888888889e-06, "loss": 0.5448, "step": 185 }, { "epoch": 0.15530197606457, "grad_norm": 0.482471227645874, "learning_rate": 5.1666666666666675e-06, "loss": 0.5392, "step": 186 }, { "epoch": 0.1561369329251322, "grad_norm": 0.44855406880378723, "learning_rate": 5.1944444444444454e-06, "loss": 0.5433, "step": 187 }, { "epoch": 0.1569718897856944, "grad_norm": 0.4413621723651886, "learning_rate": 5.2222222222222226e-06, "loss": 0.5266, "step": 188 }, { "epoch": 0.1578068466462566, "grad_norm": 0.5143046975135803, "learning_rate": 5.2500000000000006e-06, "loss": 0.5482, "step": 189 }, { "epoch": 0.1586418035068188, "grad_norm": 0.4866412580013275, "learning_rate": 5.2777777777777785e-06, "loss": 0.5241, "step": 190 }, { "epoch": 0.159476760367381, "grad_norm": 0.4413106441497803, "learning_rate": 5.305555555555556e-06, "loss": 0.5427, "step": 191 }, { "epoch": 0.1603117172279432, "grad_norm": 0.4760594069957733, "learning_rate": 5.333333333333334e-06, "loss": 0.5397, "step": 192 }, { "epoch": 0.16114667408850541, "grad_norm": 0.464786171913147, "learning_rate": 5.361111111111112e-06, "loss": 0.5199, "step": 193 }, { "epoch": 0.16198163094906765, "grad_norm": 0.49218153953552246, "learning_rate": 5.388888888888889e-06, "loss": 0.5612, "step": 194 }, { "epoch": 0.16281658780962985, "grad_norm": 0.4292621910572052, "learning_rate": 5.416666666666667e-06, "loss": 0.5117, "step": 195 }, { "epoch": 0.16365154467019205, "grad_norm": 0.41128841042518616, "learning_rate": 5.444444444444445e-06, "loss": 0.5492, "step": 196 }, { "epoch": 0.16448650153075425, "grad_norm": 0.47268083691596985, "learning_rate": 5.4722222222222236e-06, "loss": 0.5385, "step": 197 }, { "epoch": 0.16532145839131646, "grad_norm": 0.4391952157020569, "learning_rate": 5.500000000000001e-06, "loss": 0.5561, "step": 198 }, { "epoch": 0.16615641525187866, "grad_norm": 0.42468196153640747, "learning_rate": 5.527777777777779e-06, "loss": 0.5158, "step": 199 }, { "epoch": 0.16699137211244086, "grad_norm": 0.4502491056919098, "learning_rate": 5.555555555555557e-06, "loss": 0.5666, "step": 200 }, { "epoch": 0.16782632897300306, "grad_norm": 0.4275619089603424, "learning_rate": 5.583333333333334e-06, "loss": 0.5014, "step": 201 }, { "epoch": 0.16866128583356527, "grad_norm": 0.414630264043808, "learning_rate": 5.611111111111112e-06, "loss": 0.5075, "step": 202 }, { "epoch": 0.16949624269412747, "grad_norm": 0.4465274214744568, "learning_rate": 5.638888888888889e-06, "loss": 0.5254, "step": 203 }, { "epoch": 0.17033119955468967, "grad_norm": 0.49801576137542725, "learning_rate": 5.666666666666667e-06, "loss": 0.5314, "step": 204 }, { "epoch": 0.17116615641525187, "grad_norm": 0.48471230268478394, "learning_rate": 5.694444444444445e-06, "loss": 0.5509, "step": 205 }, { "epoch": 0.17200111327581408, "grad_norm": 0.43026813864707947, "learning_rate": 5.722222222222222e-06, "loss": 0.5266, "step": 206 }, { "epoch": 0.17283607013637628, "grad_norm": 0.4679989218711853, "learning_rate": 5.75e-06, "loss": 0.5339, "step": 207 }, { "epoch": 0.17367102699693848, "grad_norm": 0.5423908233642578, "learning_rate": 5.777777777777778e-06, "loss": 0.5414, "step": 208 }, { "epoch": 0.17450598385750068, "grad_norm": 0.47275540232658386, "learning_rate": 5.805555555555557e-06, "loss": 0.4994, "step": 209 }, { "epoch": 0.1753409407180629, "grad_norm": 0.44594377279281616, "learning_rate": 5.833333333333334e-06, "loss": 0.5234, "step": 210 }, { "epoch": 0.17617589757862512, "grad_norm": 0.4378286600112915, "learning_rate": 5.861111111111112e-06, "loss": 0.5253, "step": 211 }, { "epoch": 0.17701085443918732, "grad_norm": 0.4837423861026764, "learning_rate": 5.88888888888889e-06, "loss": 0.5089, "step": 212 }, { "epoch": 0.17784581129974952, "grad_norm": 0.48563292622566223, "learning_rate": 5.916666666666667e-06, "loss": 0.5224, "step": 213 }, { "epoch": 0.17868076816031173, "grad_norm": 0.4208638370037079, "learning_rate": 5.944444444444445e-06, "loss": 0.5252, "step": 214 }, { "epoch": 0.17951572502087393, "grad_norm": 0.5566235780715942, "learning_rate": 5.972222222222222e-06, "loss": 0.5327, "step": 215 }, { "epoch": 0.18035068188143613, "grad_norm": 0.4654887616634369, "learning_rate": 6e-06, "loss": 0.5377, "step": 216 }, { "epoch": 0.18118563874199833, "grad_norm": 0.4192138910293579, "learning_rate": 6.027777777777778e-06, "loss": 0.5438, "step": 217 }, { "epoch": 0.18202059560256054, "grad_norm": 0.4804072380065918, "learning_rate": 6.055555555555555e-06, "loss": 0.5471, "step": 218 }, { "epoch": 0.18285555246312274, "grad_norm": 0.49558112025260925, "learning_rate": 6.083333333333333e-06, "loss": 0.4784, "step": 219 }, { "epoch": 0.18369050932368494, "grad_norm": 0.43617480993270874, "learning_rate": 6.111111111111112e-06, "loss": 0.4967, "step": 220 }, { "epoch": 0.18452546618424714, "grad_norm": 0.3807932138442993, "learning_rate": 6.13888888888889e-06, "loss": 0.5039, "step": 221 }, { "epoch": 0.18536042304480935, "grad_norm": 0.42479848861694336, "learning_rate": 6.166666666666667e-06, "loss": 0.4861, "step": 222 }, { "epoch": 0.18619537990537155, "grad_norm": 0.4746038317680359, "learning_rate": 6.194444444444445e-06, "loss": 0.5292, "step": 223 }, { "epoch": 0.18703033676593375, "grad_norm": 0.4689016044139862, "learning_rate": 6.222222222222223e-06, "loss": 0.5084, "step": 224 }, { "epoch": 0.18786529362649595, "grad_norm": 0.45676934719085693, "learning_rate": 6.25e-06, "loss": 0.5264, "step": 225 }, { "epoch": 0.18870025048705816, "grad_norm": 0.4710977077484131, "learning_rate": 6.277777777777778e-06, "loss": 0.5273, "step": 226 }, { "epoch": 0.18953520734762036, "grad_norm": 0.4449271857738495, "learning_rate": 6.305555555555556e-06, "loss": 0.4903, "step": 227 }, { "epoch": 0.1903701642081826, "grad_norm": 0.5413058996200562, "learning_rate": 6.333333333333333e-06, "loss": 0.5019, "step": 228 }, { "epoch": 0.1912051210687448, "grad_norm": 0.4666474461555481, "learning_rate": 6.361111111111111e-06, "loss": 0.5235, "step": 229 }, { "epoch": 0.192040077929307, "grad_norm": 0.40310418605804443, "learning_rate": 6.3888888888888885e-06, "loss": 0.5075, "step": 230 }, { "epoch": 0.1928750347898692, "grad_norm": 0.46066367626190186, "learning_rate": 6.416666666666667e-06, "loss": 0.5111, "step": 231 }, { "epoch": 0.1937099916504314, "grad_norm": 0.4196142554283142, "learning_rate": 6.444444444444445e-06, "loss": 0.5064, "step": 232 }, { "epoch": 0.1945449485109936, "grad_norm": 0.4757693409919739, "learning_rate": 6.472222222222223e-06, "loss": 0.5197, "step": 233 }, { "epoch": 0.1953799053715558, "grad_norm": 0.4227370321750641, "learning_rate": 6.5000000000000004e-06, "loss": 0.5402, "step": 234 }, { "epoch": 0.196214862232118, "grad_norm": 0.4552716612815857, "learning_rate": 6.5277777777777784e-06, "loss": 0.5215, "step": 235 }, { "epoch": 0.1970498190926802, "grad_norm": 0.4754336178302765, "learning_rate": 6.555555555555556e-06, "loss": 0.5215, "step": 236 }, { "epoch": 0.19788477595324241, "grad_norm": 0.531810998916626, "learning_rate": 6.5833333333333335e-06, "loss": 0.5137, "step": 237 }, { "epoch": 0.19871973281380462, "grad_norm": 0.5141936540603638, "learning_rate": 6.6111111111111115e-06, "loss": 0.5269, "step": 238 }, { "epoch": 0.19955468967436682, "grad_norm": 0.4406936466693878, "learning_rate": 6.6388888888888895e-06, "loss": 0.5088, "step": 239 }, { "epoch": 0.20038964653492902, "grad_norm": 0.5425930619239807, "learning_rate": 6.666666666666667e-06, "loss": 0.5416, "step": 240 }, { "epoch": 0.20122460339549122, "grad_norm": 0.49088504910469055, "learning_rate": 6.694444444444445e-06, "loss": 0.5024, "step": 241 }, { "epoch": 0.20205956025605343, "grad_norm": 0.5182738304138184, "learning_rate": 6.7222222222222235e-06, "loss": 0.5057, "step": 242 }, { "epoch": 0.20289451711661563, "grad_norm": 0.5724807381629944, "learning_rate": 6.750000000000001e-06, "loss": 0.5418, "step": 243 }, { "epoch": 0.20372947397717783, "grad_norm": 0.4523678421974182, "learning_rate": 6.777777777777779e-06, "loss": 0.5185, "step": 244 }, { "epoch": 0.20456443083774006, "grad_norm": 0.4837048351764679, "learning_rate": 6.8055555555555566e-06, "loss": 0.5291, "step": 245 }, { "epoch": 0.20539938769830227, "grad_norm": 0.5890618562698364, "learning_rate": 6.833333333333334e-06, "loss": 0.5024, "step": 246 }, { "epoch": 0.20623434455886447, "grad_norm": 0.4577277600765228, "learning_rate": 6.861111111111112e-06, "loss": 0.5316, "step": 247 }, { "epoch": 0.20706930141942667, "grad_norm": 0.5863877534866333, "learning_rate": 6.88888888888889e-06, "loss": 0.5155, "step": 248 }, { "epoch": 0.20790425827998887, "grad_norm": 0.578341007232666, "learning_rate": 6.916666666666667e-06, "loss": 0.4843, "step": 249 }, { "epoch": 0.20873921514055108, "grad_norm": 0.512344241142273, "learning_rate": 6.944444444444445e-06, "loss": 0.5352, "step": 250 }, { "epoch": 0.20957417200111328, "grad_norm": 0.5055384039878845, "learning_rate": 6.972222222222223e-06, "loss": 0.5295, "step": 251 }, { "epoch": 0.21040912886167548, "grad_norm": 0.4628811180591583, "learning_rate": 7e-06, "loss": 0.4891, "step": 252 }, { "epoch": 0.21124408572223768, "grad_norm": 0.6266967058181763, "learning_rate": 7.027777777777778e-06, "loss": 0.5218, "step": 253 }, { "epoch": 0.2120790425827999, "grad_norm": 0.44172239303588867, "learning_rate": 7.055555555555557e-06, "loss": 0.5204, "step": 254 }, { "epoch": 0.2129139994433621, "grad_norm": 0.47828781604766846, "learning_rate": 7.083333333333335e-06, "loss": 0.5139, "step": 255 }, { "epoch": 0.2137489563039243, "grad_norm": 0.6140791773796082, "learning_rate": 7.111111111111112e-06, "loss": 0.5525, "step": 256 }, { "epoch": 0.2145839131644865, "grad_norm": 0.47673821449279785, "learning_rate": 7.13888888888889e-06, "loss": 0.5119, "step": 257 }, { "epoch": 0.2154188700250487, "grad_norm": 0.4983305335044861, "learning_rate": 7.166666666666667e-06, "loss": 0.5202, "step": 258 }, { "epoch": 0.2162538268856109, "grad_norm": 0.4204765856266022, "learning_rate": 7.194444444444445e-06, "loss": 0.5309, "step": 259 }, { "epoch": 0.2170887837461731, "grad_norm": 0.5400534272193909, "learning_rate": 7.222222222222223e-06, "loss": 0.519, "step": 260 }, { "epoch": 0.2179237406067353, "grad_norm": 0.3989415466785431, "learning_rate": 7.25e-06, "loss": 0.507, "step": 261 }, { "epoch": 0.21875869746729754, "grad_norm": 0.4987964630126953, "learning_rate": 7.277777777777778e-06, "loss": 0.5277, "step": 262 }, { "epoch": 0.21959365432785974, "grad_norm": 0.48711666464805603, "learning_rate": 7.305555555555556e-06, "loss": 0.4944, "step": 263 }, { "epoch": 0.22042861118842194, "grad_norm": 0.47452041506767273, "learning_rate": 7.333333333333333e-06, "loss": 0.4973, "step": 264 }, { "epoch": 0.22126356804898414, "grad_norm": 0.4913634657859802, "learning_rate": 7.361111111111112e-06, "loss": 0.493, "step": 265 }, { "epoch": 0.22209852490954635, "grad_norm": 0.4823426902294159, "learning_rate": 7.38888888888889e-06, "loss": 0.5345, "step": 266 }, { "epoch": 0.22293348177010855, "grad_norm": 0.5708329081535339, "learning_rate": 7.416666666666668e-06, "loss": 0.5399, "step": 267 }, { "epoch": 0.22376843863067075, "grad_norm": 0.46696579456329346, "learning_rate": 7.444444444444445e-06, "loss": 0.541, "step": 268 }, { "epoch": 0.22460339549123295, "grad_norm": 0.4574521780014038, "learning_rate": 7.472222222222223e-06, "loss": 0.4671, "step": 269 }, { "epoch": 0.22543835235179516, "grad_norm": 0.45693835616111755, "learning_rate": 7.500000000000001e-06, "loss": 0.5158, "step": 270 }, { "epoch": 0.22627330921235736, "grad_norm": 0.4894069731235504, "learning_rate": 7.527777777777778e-06, "loss": 0.5006, "step": 271 }, { "epoch": 0.22710826607291956, "grad_norm": 0.5281304121017456, "learning_rate": 7.555555555555556e-06, "loss": 0.5008, "step": 272 }, { "epoch": 0.22794322293348177, "grad_norm": 0.43535110354423523, "learning_rate": 7.583333333333333e-06, "loss": 0.484, "step": 273 }, { "epoch": 0.22877817979404397, "grad_norm": 0.5313926339149475, "learning_rate": 7.611111111111111e-06, "loss": 0.4954, "step": 274 }, { "epoch": 0.22961313665460617, "grad_norm": 0.4995577335357666, "learning_rate": 7.638888888888888e-06, "loss": 0.4953, "step": 275 }, { "epoch": 0.23044809351516837, "grad_norm": 0.44422873854637146, "learning_rate": 7.666666666666667e-06, "loss": 0.5059, "step": 276 }, { "epoch": 0.23128305037573058, "grad_norm": 0.5328710675239563, "learning_rate": 7.694444444444446e-06, "loss": 0.5125, "step": 277 }, { "epoch": 0.23211800723629278, "grad_norm": 0.5435702800750732, "learning_rate": 7.722222222222223e-06, "loss": 0.5027, "step": 278 }, { "epoch": 0.232952964096855, "grad_norm": 0.4949301779270172, "learning_rate": 7.75e-06, "loss": 0.513, "step": 279 }, { "epoch": 0.2337879209574172, "grad_norm": 0.47400036454200745, "learning_rate": 7.77777777777778e-06, "loss": 0.5024, "step": 280 }, { "epoch": 0.23462287781797941, "grad_norm": 0.4942109286785126, "learning_rate": 7.805555555555556e-06, "loss": 0.4909, "step": 281 }, { "epoch": 0.23545783467854162, "grad_norm": 0.5459997653961182, "learning_rate": 7.833333333333333e-06, "loss": 0.502, "step": 282 }, { "epoch": 0.23629279153910382, "grad_norm": 0.4731834828853607, "learning_rate": 7.861111111111112e-06, "loss": 0.5155, "step": 283 }, { "epoch": 0.23712774839966602, "grad_norm": 0.5495770573616028, "learning_rate": 7.88888888888889e-06, "loss": 0.5135, "step": 284 }, { "epoch": 0.23796270526022822, "grad_norm": 0.5098472237586975, "learning_rate": 7.916666666666667e-06, "loss": 0.4951, "step": 285 }, { "epoch": 0.23879766212079043, "grad_norm": 0.48444467782974243, "learning_rate": 7.944444444444445e-06, "loss": 0.4929, "step": 286 }, { "epoch": 0.23963261898135263, "grad_norm": 0.44312694668769836, "learning_rate": 7.972222222222224e-06, "loss": 0.566, "step": 287 }, { "epoch": 0.24046757584191483, "grad_norm": 0.4822857081890106, "learning_rate": 8.000000000000001e-06, "loss": 0.5126, "step": 288 }, { "epoch": 0.24130253270247704, "grad_norm": 0.4108067452907562, "learning_rate": 8.027777777777778e-06, "loss": 0.5033, "step": 289 }, { "epoch": 0.24213748956303924, "grad_norm": 0.4569111168384552, "learning_rate": 8.055555555555557e-06, "loss": 0.5157, "step": 290 }, { "epoch": 0.24297244642360144, "grad_norm": 0.4571804702281952, "learning_rate": 8.083333333333334e-06, "loss": 0.5097, "step": 291 }, { "epoch": 0.24380740328416364, "grad_norm": 0.3941827118396759, "learning_rate": 8.111111111111112e-06, "loss": 0.4893, "step": 292 }, { "epoch": 0.24464236014472585, "grad_norm": 0.406877338886261, "learning_rate": 8.138888888888889e-06, "loss": 0.4822, "step": 293 }, { "epoch": 0.24547731700528805, "grad_norm": 0.48010483384132385, "learning_rate": 8.166666666666668e-06, "loss": 0.5019, "step": 294 }, { "epoch": 0.24631227386585025, "grad_norm": 0.4172366261482239, "learning_rate": 8.194444444444445e-06, "loss": 0.5005, "step": 295 }, { "epoch": 0.24714723072641248, "grad_norm": 0.5547234416007996, "learning_rate": 8.222222222222222e-06, "loss": 0.4724, "step": 296 }, { "epoch": 0.24798218758697468, "grad_norm": 0.3816433250904083, "learning_rate": 8.25e-06, "loss": 0.4987, "step": 297 }, { "epoch": 0.2488171444475369, "grad_norm": 0.46241000294685364, "learning_rate": 8.277777777777778e-06, "loss": 0.4838, "step": 298 }, { "epoch": 0.2496521013080991, "grad_norm": 0.42766863107681274, "learning_rate": 8.305555555555557e-06, "loss": 0.5054, "step": 299 }, { "epoch": 0.2504870581686613, "grad_norm": 0.45693880319595337, "learning_rate": 8.333333333333334e-06, "loss": 0.4937, "step": 300 }, { "epoch": 0.25132201502922347, "grad_norm": 0.4040062725543976, "learning_rate": 8.361111111111113e-06, "loss": 0.5272, "step": 301 }, { "epoch": 0.2521569718897857, "grad_norm": 0.5394395589828491, "learning_rate": 8.38888888888889e-06, "loss": 0.4956, "step": 302 }, { "epoch": 0.25299192875034787, "grad_norm": 0.4593656659126282, "learning_rate": 8.416666666666667e-06, "loss": 0.4946, "step": 303 }, { "epoch": 0.2538268856109101, "grad_norm": 0.4603549838066101, "learning_rate": 8.444444444444446e-06, "loss": 0.4948, "step": 304 }, { "epoch": 0.25466184247147233, "grad_norm": 0.48843467235565186, "learning_rate": 8.472222222222223e-06, "loss": 0.5075, "step": 305 }, { "epoch": 0.2554967993320345, "grad_norm": 0.4073157012462616, "learning_rate": 8.5e-06, "loss": 0.4708, "step": 306 }, { "epoch": 0.25633175619259674, "grad_norm": 0.46339452266693115, "learning_rate": 8.527777777777779e-06, "loss": 0.5113, "step": 307 }, { "epoch": 0.2571667130531589, "grad_norm": 0.5022232532501221, "learning_rate": 8.555555555555556e-06, "loss": 0.4832, "step": 308 }, { "epoch": 0.25800166991372114, "grad_norm": 0.4883805811405182, "learning_rate": 8.583333333333333e-06, "loss": 0.4919, "step": 309 }, { "epoch": 0.2588366267742833, "grad_norm": 0.4853518009185791, "learning_rate": 8.611111111111112e-06, "loss": 0.4897, "step": 310 }, { "epoch": 0.25967158363484555, "grad_norm": 0.5298082232475281, "learning_rate": 8.63888888888889e-06, "loss": 0.4995, "step": 311 }, { "epoch": 0.2605065404954077, "grad_norm": 0.4197370707988739, "learning_rate": 8.666666666666668e-06, "loss": 0.494, "step": 312 }, { "epoch": 0.26134149735596995, "grad_norm": 0.5578119158744812, "learning_rate": 8.694444444444445e-06, "loss": 0.5095, "step": 313 }, { "epoch": 0.26217645421653213, "grad_norm": 0.5282012224197388, "learning_rate": 8.722222222222224e-06, "loss": 0.5118, "step": 314 }, { "epoch": 0.26301141107709436, "grad_norm": 0.4312247633934021, "learning_rate": 8.750000000000001e-06, "loss": 0.5155, "step": 315 }, { "epoch": 0.26384636793765653, "grad_norm": 0.5846942663192749, "learning_rate": 8.777777777777778e-06, "loss": 0.5085, "step": 316 }, { "epoch": 0.26468132479821876, "grad_norm": 0.45834508538246155, "learning_rate": 8.805555555555557e-06, "loss": 0.5021, "step": 317 }, { "epoch": 0.26551628165878094, "grad_norm": 0.5178083777427673, "learning_rate": 8.833333333333334e-06, "loss": 0.5189, "step": 318 }, { "epoch": 0.26635123851934317, "grad_norm": 0.4949430525302887, "learning_rate": 8.861111111111111e-06, "loss": 0.4767, "step": 319 }, { "epoch": 0.26718619537990534, "grad_norm": 0.45216700434684753, "learning_rate": 8.888888888888888e-06, "loss": 0.5012, "step": 320 }, { "epoch": 0.2680211522404676, "grad_norm": 0.4801683723926544, "learning_rate": 8.916666666666667e-06, "loss": 0.5097, "step": 321 }, { "epoch": 0.2688561091010298, "grad_norm": 0.46790096163749695, "learning_rate": 8.944444444444446e-06, "loss": 0.497, "step": 322 }, { "epoch": 0.269691065961592, "grad_norm": 0.42254921793937683, "learning_rate": 8.972222222222223e-06, "loss": 0.4859, "step": 323 }, { "epoch": 0.2705260228221542, "grad_norm": 0.505977988243103, "learning_rate": 9e-06, "loss": 0.4979, "step": 324 }, { "epoch": 0.2713609796827164, "grad_norm": 0.49116232991218567, "learning_rate": 9.027777777777779e-06, "loss": 0.4947, "step": 325 }, { "epoch": 0.2721959365432786, "grad_norm": 0.4451379179954529, "learning_rate": 9.055555555555556e-06, "loss": 0.5141, "step": 326 }, { "epoch": 0.2730308934038408, "grad_norm": 0.5327444672584534, "learning_rate": 9.083333333333333e-06, "loss": 0.5169, "step": 327 }, { "epoch": 0.273865850264403, "grad_norm": 0.5464723110198975, "learning_rate": 9.111111111111112e-06, "loss": 0.5125, "step": 328 }, { "epoch": 0.2747008071249652, "grad_norm": 0.4518400728702545, "learning_rate": 9.13888888888889e-06, "loss": 0.5013, "step": 329 }, { "epoch": 0.2755357639855274, "grad_norm": 0.48419830203056335, "learning_rate": 9.166666666666666e-06, "loss": 0.4941, "step": 330 }, { "epoch": 0.2763707208460896, "grad_norm": 0.4095574617385864, "learning_rate": 9.194444444444445e-06, "loss": 0.5026, "step": 331 }, { "epoch": 0.27720567770665183, "grad_norm": 0.41777655482292175, "learning_rate": 9.222222222222224e-06, "loss": 0.4701, "step": 332 }, { "epoch": 0.278040634567214, "grad_norm": 0.4430456757545471, "learning_rate": 9.250000000000001e-06, "loss": 0.4813, "step": 333 }, { "epoch": 0.27887559142777624, "grad_norm": 0.46150413155555725, "learning_rate": 9.277777777777778e-06, "loss": 0.5005, "step": 334 }, { "epoch": 0.2797105482883384, "grad_norm": 0.48795756697654724, "learning_rate": 9.305555555555557e-06, "loss": 0.5086, "step": 335 }, { "epoch": 0.28054550514890064, "grad_norm": 0.4607313573360443, "learning_rate": 9.333333333333334e-06, "loss": 0.5071, "step": 336 }, { "epoch": 0.2813804620094628, "grad_norm": 0.49418002367019653, "learning_rate": 9.361111111111111e-06, "loss": 0.4863, "step": 337 }, { "epoch": 0.28221541887002505, "grad_norm": 0.5128498673439026, "learning_rate": 9.38888888888889e-06, "loss": 0.4876, "step": 338 }, { "epoch": 0.2830503757305873, "grad_norm": 0.43854820728302, "learning_rate": 9.416666666666667e-06, "loss": 0.504, "step": 339 }, { "epoch": 0.28388533259114945, "grad_norm": 0.46312806010246277, "learning_rate": 9.444444444444445e-06, "loss": 0.493, "step": 340 }, { "epoch": 0.2847202894517117, "grad_norm": 0.452012300491333, "learning_rate": 9.472222222222223e-06, "loss": 0.502, "step": 341 }, { "epoch": 0.28555524631227386, "grad_norm": 0.4609115421772003, "learning_rate": 9.5e-06, "loss": 0.4861, "step": 342 }, { "epoch": 0.2863902031728361, "grad_norm": 0.4475090205669403, "learning_rate": 9.527777777777778e-06, "loss": 0.4999, "step": 343 }, { "epoch": 0.28722516003339826, "grad_norm": 0.4116894006729126, "learning_rate": 9.555555555555556e-06, "loss": 0.4712, "step": 344 }, { "epoch": 0.2880601168939605, "grad_norm": 0.518109917640686, "learning_rate": 9.583333333333335e-06, "loss": 0.5199, "step": 345 }, { "epoch": 0.28889507375452267, "grad_norm": 0.4435453712940216, "learning_rate": 9.611111111111112e-06, "loss": 0.5127, "step": 346 }, { "epoch": 0.2897300306150849, "grad_norm": 0.5352389812469482, "learning_rate": 9.63888888888889e-06, "loss": 0.4863, "step": 347 }, { "epoch": 0.2905649874756471, "grad_norm": 0.5209349989891052, "learning_rate": 9.666666666666667e-06, "loss": 0.4983, "step": 348 }, { "epoch": 0.2913999443362093, "grad_norm": 0.4539010226726532, "learning_rate": 9.694444444444446e-06, "loss": 0.4647, "step": 349 }, { "epoch": 0.2922349011967715, "grad_norm": 0.6009034514427185, "learning_rate": 9.722222222222223e-06, "loss": 0.4745, "step": 350 }, { "epoch": 0.2930698580573337, "grad_norm": 0.4442564845085144, "learning_rate": 9.75e-06, "loss": 0.5121, "step": 351 }, { "epoch": 0.2939048149178959, "grad_norm": 0.521914541721344, "learning_rate": 9.777777777777779e-06, "loss": 0.4942, "step": 352 }, { "epoch": 0.2947397717784581, "grad_norm": 0.514156699180603, "learning_rate": 9.805555555555556e-06, "loss": 0.4919, "step": 353 }, { "epoch": 0.2955747286390203, "grad_norm": 0.4947082996368408, "learning_rate": 9.833333333333333e-06, "loss": 0.5187, "step": 354 }, { "epoch": 0.2964096854995825, "grad_norm": 0.5489581227302551, "learning_rate": 9.861111111111112e-06, "loss": 0.5043, "step": 355 }, { "epoch": 0.29724464236014475, "grad_norm": 0.4507294297218323, "learning_rate": 9.88888888888889e-06, "loss": 0.4794, "step": 356 }, { "epoch": 0.2980795992207069, "grad_norm": 0.5011706948280334, "learning_rate": 9.916666666666668e-06, "loss": 0.5187, "step": 357 }, { "epoch": 0.29891455608126916, "grad_norm": 0.4234829246997833, "learning_rate": 9.944444444444445e-06, "loss": 0.4849, "step": 358 }, { "epoch": 0.29974951294183133, "grad_norm": 0.47561073303222656, "learning_rate": 9.972222222222224e-06, "loss": 0.5026, "step": 359 }, { "epoch": 0.30058446980239356, "grad_norm": 0.4531523883342743, "learning_rate": 1e-05, "loss": 0.5072, "step": 360 }, { "epoch": 0.30141942666295574, "grad_norm": 0.48534873127937317, "learning_rate": 9.999997636444505e-06, "loss": 0.4913, "step": 361 }, { "epoch": 0.30225438352351797, "grad_norm": 0.5420751571655273, "learning_rate": 9.999990545780254e-06, "loss": 0.5032, "step": 362 }, { "epoch": 0.30308934038408014, "grad_norm": 0.5708768963813782, "learning_rate": 9.99997872801395e-06, "loss": 0.4973, "step": 363 }, { "epoch": 0.30392429724464237, "grad_norm": 0.598380446434021, "learning_rate": 9.999962183156767e-06, "loss": 0.4982, "step": 364 }, { "epoch": 0.30475925410520455, "grad_norm": 0.5070905685424805, "learning_rate": 9.999940911224346e-06, "loss": 0.5036, "step": 365 }, { "epoch": 0.3055942109657668, "grad_norm": 0.46123605966567993, "learning_rate": 9.999914912236799e-06, "loss": 0.4999, "step": 366 }, { "epoch": 0.30642916782632895, "grad_norm": 0.4437405467033386, "learning_rate": 9.999884186218705e-06, "loss": 0.502, "step": 367 }, { "epoch": 0.3072641246868912, "grad_norm": 0.44871068000793457, "learning_rate": 9.999848733199114e-06, "loss": 0.4685, "step": 368 }, { "epoch": 0.30809908154745336, "grad_norm": 0.4776581823825836, "learning_rate": 9.99980855321154e-06, "loss": 0.4932, "step": 369 }, { "epoch": 0.3089340384080156, "grad_norm": 0.4816185235977173, "learning_rate": 9.999763646293978e-06, "loss": 0.4996, "step": 370 }, { "epoch": 0.30976899526857776, "grad_norm": 0.44531261920928955, "learning_rate": 9.999714012488878e-06, "loss": 0.4926, "step": 371 }, { "epoch": 0.31060395212914, "grad_norm": 0.4348558783531189, "learning_rate": 9.999659651843164e-06, "loss": 0.4864, "step": 372 }, { "epoch": 0.3114389089897022, "grad_norm": 0.46784207224845886, "learning_rate": 9.999600564408234e-06, "loss": 0.4757, "step": 373 }, { "epoch": 0.3122738658502644, "grad_norm": 0.4526772201061249, "learning_rate": 9.99953675023995e-06, "loss": 0.5079, "step": 374 }, { "epoch": 0.31310882271082663, "grad_norm": 0.5078807473182678, "learning_rate": 9.999468209398639e-06, "loss": 0.4733, "step": 375 }, { "epoch": 0.3139437795713888, "grad_norm": 0.4586752951145172, "learning_rate": 9.999394941949108e-06, "loss": 0.4906, "step": 376 }, { "epoch": 0.31477873643195103, "grad_norm": 0.5451922416687012, "learning_rate": 9.999316947960617e-06, "loss": 0.5013, "step": 377 }, { "epoch": 0.3156136932925132, "grad_norm": 0.5713186860084534, "learning_rate": 9.999234227506912e-06, "loss": 0.5453, "step": 378 }, { "epoch": 0.31644865015307544, "grad_norm": 0.5117863416671753, "learning_rate": 9.999146780666193e-06, "loss": 0.455, "step": 379 }, { "epoch": 0.3172836070136376, "grad_norm": 0.5107588171958923, "learning_rate": 9.999054607521137e-06, "loss": 0.4928, "step": 380 }, { "epoch": 0.31811856387419984, "grad_norm": 0.46639686822891235, "learning_rate": 9.998957708158885e-06, "loss": 0.5013, "step": 381 }, { "epoch": 0.318953520734762, "grad_norm": 0.5144351124763489, "learning_rate": 9.998856082671047e-06, "loss": 0.488, "step": 382 }, { "epoch": 0.31978847759532425, "grad_norm": 0.43867960572242737, "learning_rate": 9.998749731153706e-06, "loss": 0.5097, "step": 383 }, { "epoch": 0.3206234344558864, "grad_norm": 0.5470992922782898, "learning_rate": 9.998638653707403e-06, "loss": 0.4936, "step": 384 }, { "epoch": 0.32145839131644866, "grad_norm": 0.49311527609825134, "learning_rate": 9.998522850437159e-06, "loss": 0.4948, "step": 385 }, { "epoch": 0.32229334817701083, "grad_norm": 0.45356062054634094, "learning_rate": 9.998402321452452e-06, "loss": 0.505, "step": 386 }, { "epoch": 0.32312830503757306, "grad_norm": 0.5330253839492798, "learning_rate": 9.998277066867236e-06, "loss": 0.5329, "step": 387 }, { "epoch": 0.3239632618981353, "grad_norm": 0.4821120500564575, "learning_rate": 9.99814708679993e-06, "loss": 0.4882, "step": 388 }, { "epoch": 0.32479821875869747, "grad_norm": 0.5846246480941772, "learning_rate": 9.99801238137342e-06, "loss": 0.5224, "step": 389 }, { "epoch": 0.3256331756192597, "grad_norm": 0.5383700728416443, "learning_rate": 9.997872950715055e-06, "loss": 0.512, "step": 390 }, { "epoch": 0.32646813247982187, "grad_norm": 0.4937242567539215, "learning_rate": 9.997728794956661e-06, "loss": 0.4767, "step": 391 }, { "epoch": 0.3273030893403841, "grad_norm": 0.5264062285423279, "learning_rate": 9.997579914234524e-06, "loss": 0.5078, "step": 392 }, { "epoch": 0.3281380462009463, "grad_norm": 0.438060998916626, "learning_rate": 9.997426308689397e-06, "loss": 0.4818, "step": 393 }, { "epoch": 0.3289730030615085, "grad_norm": 0.5369731783866882, "learning_rate": 9.997267978466507e-06, "loss": 0.5291, "step": 394 }, { "epoch": 0.3298079599220707, "grad_norm": 0.4996815621852875, "learning_rate": 9.99710492371554e-06, "loss": 0.4797, "step": 395 }, { "epoch": 0.3306429167826329, "grad_norm": 0.41392531991004944, "learning_rate": 9.99693714459065e-06, "loss": 0.4982, "step": 396 }, { "epoch": 0.3314778736431951, "grad_norm": 0.4865576922893524, "learning_rate": 9.996764641250462e-06, "loss": 0.5014, "step": 397 }, { "epoch": 0.3323128305037573, "grad_norm": 0.4550047814846039, "learning_rate": 9.996587413858063e-06, "loss": 0.4956, "step": 398 }, { "epoch": 0.3331477873643195, "grad_norm": 0.44766369462013245, "learning_rate": 9.99640546258101e-06, "loss": 0.5188, "step": 399 }, { "epoch": 0.3339827442248817, "grad_norm": 0.4223597049713135, "learning_rate": 9.99621878759132e-06, "loss": 0.4845, "step": 400 }, { "epoch": 0.3348177010854439, "grad_norm": 0.46855005621910095, "learning_rate": 9.996027389065481e-06, "loss": 0.4862, "step": 401 }, { "epoch": 0.3356526579460061, "grad_norm": 0.46499738097190857, "learning_rate": 9.995831267184447e-06, "loss": 0.4598, "step": 402 }, { "epoch": 0.3364876148065683, "grad_norm": 0.5148522257804871, "learning_rate": 9.995630422133635e-06, "loss": 0.5232, "step": 403 }, { "epoch": 0.33732257166713053, "grad_norm": 0.48951977491378784, "learning_rate": 9.995424854102928e-06, "loss": 0.4896, "step": 404 }, { "epoch": 0.33815752852769276, "grad_norm": 0.46950992941856384, "learning_rate": 9.995214563286677e-06, "loss": 0.5015, "step": 405 }, { "epoch": 0.33899248538825494, "grad_norm": 0.5045955777168274, "learning_rate": 9.99499954988369e-06, "loss": 0.5, "step": 406 }, { "epoch": 0.33982744224881717, "grad_norm": 0.5319128632545471, "learning_rate": 9.994779814097252e-06, "loss": 0.5047, "step": 407 }, { "epoch": 0.34066239910937934, "grad_norm": 0.49241411685943604, "learning_rate": 9.994555356135102e-06, "loss": 0.4869, "step": 408 }, { "epoch": 0.3414973559699416, "grad_norm": 0.5800851583480835, "learning_rate": 9.994326176209448e-06, "loss": 0.4853, "step": 409 }, { "epoch": 0.34233231283050375, "grad_norm": 0.5173487067222595, "learning_rate": 9.994092274536962e-06, "loss": 0.5442, "step": 410 }, { "epoch": 0.343167269691066, "grad_norm": 0.5806189179420471, "learning_rate": 9.993853651338783e-06, "loss": 0.5235, "step": 411 }, { "epoch": 0.34400222655162815, "grad_norm": 0.5173759460449219, "learning_rate": 9.993610306840505e-06, "loss": 0.5033, "step": 412 }, { "epoch": 0.3448371834121904, "grad_norm": 0.46281683444976807, "learning_rate": 9.993362241272195e-06, "loss": 0.4983, "step": 413 }, { "epoch": 0.34567214027275256, "grad_norm": 0.5043473839759827, "learning_rate": 9.993109454868379e-06, "loss": 0.4813, "step": 414 }, { "epoch": 0.3465070971333148, "grad_norm": 0.49548980593681335, "learning_rate": 9.992851947868047e-06, "loss": 0.4877, "step": 415 }, { "epoch": 0.34734205399387696, "grad_norm": 0.4956231117248535, "learning_rate": 9.992589720514651e-06, "loss": 0.5027, "step": 416 }, { "epoch": 0.3481770108544392, "grad_norm": 0.5516596436500549, "learning_rate": 9.992322773056108e-06, "loss": 0.4726, "step": 417 }, { "epoch": 0.34901196771500137, "grad_norm": 0.4959591031074524, "learning_rate": 9.992051105744796e-06, "loss": 0.4779, "step": 418 }, { "epoch": 0.3498469245755636, "grad_norm": 0.47233039140701294, "learning_rate": 9.991774718837552e-06, "loss": 0.4811, "step": 419 }, { "epoch": 0.3506818814361258, "grad_norm": 0.5268328189849854, "learning_rate": 9.991493612595681e-06, "loss": 0.5053, "step": 420 }, { "epoch": 0.351516838296688, "grad_norm": 0.4974232017993927, "learning_rate": 9.991207787284948e-06, "loss": 0.4965, "step": 421 }, { "epoch": 0.35235179515725024, "grad_norm": 0.5046818852424622, "learning_rate": 9.99091724317558e-06, "loss": 0.4518, "step": 422 }, { "epoch": 0.3531867520178124, "grad_norm": 0.5212334990501404, "learning_rate": 9.990621980542258e-06, "loss": 0.4504, "step": 423 }, { "epoch": 0.35402170887837464, "grad_norm": 0.4295724928379059, "learning_rate": 9.990321999664135e-06, "loss": 0.5139, "step": 424 }, { "epoch": 0.3548566657389368, "grad_norm": 0.5695523619651794, "learning_rate": 9.990017300824816e-06, "loss": 0.4873, "step": 425 }, { "epoch": 0.35569162259949905, "grad_norm": 0.46114957332611084, "learning_rate": 9.989707884312371e-06, "loss": 0.4982, "step": 426 }, { "epoch": 0.3565265794600612, "grad_norm": 0.528984785079956, "learning_rate": 9.989393750419335e-06, "loss": 0.467, "step": 427 }, { "epoch": 0.35736153632062345, "grad_norm": 0.5104154944419861, "learning_rate": 9.989074899442689e-06, "loss": 0.496, "step": 428 }, { "epoch": 0.3581964931811856, "grad_norm": 0.540079653263092, "learning_rate": 9.988751331683883e-06, "loss": 0.5188, "step": 429 }, { "epoch": 0.35903145004174786, "grad_norm": 0.44818776845932007, "learning_rate": 9.98842304744883e-06, "loss": 0.478, "step": 430 }, { "epoch": 0.35986640690231003, "grad_norm": 0.5266302824020386, "learning_rate": 9.988090047047894e-06, "loss": 0.4811, "step": 431 }, { "epoch": 0.36070136376287226, "grad_norm": 0.4991501271724701, "learning_rate": 9.9877523307959e-06, "loss": 0.4959, "step": 432 }, { "epoch": 0.36153632062343444, "grad_norm": 0.5260468125343323, "learning_rate": 9.987409899012135e-06, "loss": 0.5258, "step": 433 }, { "epoch": 0.36237127748399667, "grad_norm": 0.5266975164413452, "learning_rate": 9.98706275202034e-06, "loss": 0.5076, "step": 434 }, { "epoch": 0.36320623434455884, "grad_norm": 0.47813817858695984, "learning_rate": 9.986710890148716e-06, "loss": 0.4847, "step": 435 }, { "epoch": 0.3640411912051211, "grad_norm": 0.4764397442340851, "learning_rate": 9.986354313729922e-06, "loss": 0.4995, "step": 436 }, { "epoch": 0.36487614806568325, "grad_norm": 0.5174660086631775, "learning_rate": 9.98599302310107e-06, "loss": 0.462, "step": 437 }, { "epoch": 0.3657111049262455, "grad_norm": 0.4482344388961792, "learning_rate": 9.985627018603736e-06, "loss": 0.5135, "step": 438 }, { "epoch": 0.3665460617868077, "grad_norm": 0.49103280901908875, "learning_rate": 9.985256300583945e-06, "loss": 0.5199, "step": 439 }, { "epoch": 0.3673810186473699, "grad_norm": 0.4996368885040283, "learning_rate": 9.984880869392186e-06, "loss": 0.4926, "step": 440 }, { "epoch": 0.3682159755079321, "grad_norm": 0.48347556591033936, "learning_rate": 9.984500725383397e-06, "loss": 0.5045, "step": 441 }, { "epoch": 0.3690509323684943, "grad_norm": 0.5158573985099792, "learning_rate": 9.984115868916978e-06, "loss": 0.4753, "step": 442 }, { "epoch": 0.3698858892290565, "grad_norm": 0.46691784262657166, "learning_rate": 9.983726300356777e-06, "loss": 0.4824, "step": 443 }, { "epoch": 0.3707208460896187, "grad_norm": 0.5213005542755127, "learning_rate": 9.983332020071102e-06, "loss": 0.4701, "step": 444 }, { "epoch": 0.3715558029501809, "grad_norm": 0.5229199528694153, "learning_rate": 9.982933028432715e-06, "loss": 0.4931, "step": 445 }, { "epoch": 0.3723907598107431, "grad_norm": 0.4349936544895172, "learning_rate": 9.98252932581883e-06, "loss": 0.4781, "step": 446 }, { "epoch": 0.37322571667130533, "grad_norm": 0.5408176183700562, "learning_rate": 9.98212091261112e-06, "loss": 0.4777, "step": 447 }, { "epoch": 0.3740606735318675, "grad_norm": 0.4630272686481476, "learning_rate": 9.981707789195705e-06, "loss": 0.5102, "step": 448 }, { "epoch": 0.37489563039242974, "grad_norm": 0.4851641356945038, "learning_rate": 9.98128995596316e-06, "loss": 0.4953, "step": 449 }, { "epoch": 0.3757305872529919, "grad_norm": 0.49445992708206177, "learning_rate": 9.980867413308516e-06, "loss": 0.4882, "step": 450 }, { "epoch": 0.37656554411355414, "grad_norm": 0.5252190232276917, "learning_rate": 9.980440161631254e-06, "loss": 0.4627, "step": 451 }, { "epoch": 0.3774005009741163, "grad_norm": 0.4619700610637665, "learning_rate": 9.980008201335308e-06, "loss": 0.4658, "step": 452 }, { "epoch": 0.37823545783467855, "grad_norm": 0.47174569964408875, "learning_rate": 9.97957153282906e-06, "loss": 0.4819, "step": 453 }, { "epoch": 0.3790704146952407, "grad_norm": 0.4755402207374573, "learning_rate": 9.979130156525347e-06, "loss": 0.4744, "step": 454 }, { "epoch": 0.37990537155580295, "grad_norm": 0.4508528411388397, "learning_rate": 9.978684072841458e-06, "loss": 0.4646, "step": 455 }, { "epoch": 0.3807403284163652, "grad_norm": 0.5219740271568298, "learning_rate": 9.97823328219913e-06, "loss": 0.4922, "step": 456 }, { "epoch": 0.38157528527692736, "grad_norm": 0.4440976083278656, "learning_rate": 9.977777785024548e-06, "loss": 0.4724, "step": 457 }, { "epoch": 0.3824102421374896, "grad_norm": 0.5598084926605225, "learning_rate": 9.977317581748352e-06, "loss": 0.4736, "step": 458 }, { "epoch": 0.38324519899805176, "grad_norm": 0.632515549659729, "learning_rate": 9.976852672805625e-06, "loss": 0.4992, "step": 459 }, { "epoch": 0.384080155858614, "grad_norm": 0.43448320031166077, "learning_rate": 9.976383058635908e-06, "loss": 0.4902, "step": 460 }, { "epoch": 0.38491511271917617, "grad_norm": 0.6023032665252686, "learning_rate": 9.975908739683177e-06, "loss": 0.4688, "step": 461 }, { "epoch": 0.3857500695797384, "grad_norm": 0.44533196091651917, "learning_rate": 9.975429716395871e-06, "loss": 0.4855, "step": 462 }, { "epoch": 0.38658502644030057, "grad_norm": 0.4033283591270447, "learning_rate": 9.974945989226865e-06, "loss": 0.4913, "step": 463 }, { "epoch": 0.3874199833008628, "grad_norm": 0.49886035919189453, "learning_rate": 9.974457558633485e-06, "loss": 0.478, "step": 464 }, { "epoch": 0.388254940161425, "grad_norm": 0.42858532071113586, "learning_rate": 9.973964425077507e-06, "loss": 0.4637, "step": 465 }, { "epoch": 0.3890898970219872, "grad_norm": 0.5300591588020325, "learning_rate": 9.973466589025149e-06, "loss": 0.5507, "step": 466 }, { "epoch": 0.3899248538825494, "grad_norm": 0.4628649950027466, "learning_rate": 9.972964050947076e-06, "loss": 0.5129, "step": 467 }, { "epoch": 0.3907598107431116, "grad_norm": 0.5064137578010559, "learning_rate": 9.972456811318399e-06, "loss": 0.5048, "step": 468 }, { "epoch": 0.3915947676036738, "grad_norm": 0.46139097213745117, "learning_rate": 9.971944870618673e-06, "loss": 0.4949, "step": 469 }, { "epoch": 0.392429724464236, "grad_norm": 0.44774797558784485, "learning_rate": 9.971428229331898e-06, "loss": 0.495, "step": 470 }, { "epoch": 0.3932646813247982, "grad_norm": 0.4579482972621918, "learning_rate": 9.970906887946518e-06, "loss": 0.456, "step": 471 }, { "epoch": 0.3940996381853604, "grad_norm": 0.49749135971069336, "learning_rate": 9.970380846955422e-06, "loss": 0.5294, "step": 472 }, { "epoch": 0.39493459504592265, "grad_norm": 0.4411174952983856, "learning_rate": 9.969850106855943e-06, "loss": 0.5078, "step": 473 }, { "epoch": 0.39576955190648483, "grad_norm": 0.5637856721878052, "learning_rate": 9.96931466814985e-06, "loss": 0.4942, "step": 474 }, { "epoch": 0.39660450876704706, "grad_norm": 0.4946988821029663, "learning_rate": 9.968774531343359e-06, "loss": 0.4701, "step": 475 }, { "epoch": 0.39743946562760923, "grad_norm": 0.47854432463645935, "learning_rate": 9.96822969694713e-06, "loss": 0.4815, "step": 476 }, { "epoch": 0.39827442248817146, "grad_norm": 0.5748329162597656, "learning_rate": 9.96768016547626e-06, "loss": 0.5259, "step": 477 }, { "epoch": 0.39910937934873364, "grad_norm": 0.43782663345336914, "learning_rate": 9.967125937450285e-06, "loss": 0.484, "step": 478 }, { "epoch": 0.39994433620929587, "grad_norm": 0.593534529209137, "learning_rate": 9.966567013393192e-06, "loss": 0.4897, "step": 479 }, { "epoch": 0.40077929306985804, "grad_norm": 0.5430638194084167, "learning_rate": 9.966003393833396e-06, "loss": 0.4704, "step": 480 }, { "epoch": 0.4016142499304203, "grad_norm": 0.46568360924720764, "learning_rate": 9.965435079303753e-06, "loss": 0.4628, "step": 481 }, { "epoch": 0.40244920679098245, "grad_norm": 0.5038943886756897, "learning_rate": 9.964862070341566e-06, "loss": 0.4991, "step": 482 }, { "epoch": 0.4032841636515447, "grad_norm": 0.46381857991218567, "learning_rate": 9.964284367488565e-06, "loss": 0.4795, "step": 483 }, { "epoch": 0.40411912051210686, "grad_norm": 0.5240722298622131, "learning_rate": 9.963701971290926e-06, "loss": 0.4752, "step": 484 }, { "epoch": 0.4049540773726691, "grad_norm": 0.44154661893844604, "learning_rate": 9.963114882299258e-06, "loss": 0.4916, "step": 485 }, { "epoch": 0.40578903423323126, "grad_norm": 0.4818189740180969, "learning_rate": 9.962523101068608e-06, "loss": 0.5048, "step": 486 }, { "epoch": 0.4066239910937935, "grad_norm": 0.4071812927722931, "learning_rate": 9.961926628158461e-06, "loss": 0.4583, "step": 487 }, { "epoch": 0.40745894795435567, "grad_norm": 0.5234472155570984, "learning_rate": 9.961325464132734e-06, "loss": 0.4952, "step": 488 }, { "epoch": 0.4082939048149179, "grad_norm": 0.4324767291545868, "learning_rate": 9.960719609559781e-06, "loss": 0.5013, "step": 489 }, { "epoch": 0.4091288616754801, "grad_norm": 0.4801684021949768, "learning_rate": 9.96010906501239e-06, "loss": 0.5107, "step": 490 }, { "epoch": 0.4099638185360423, "grad_norm": 0.513319194316864, "learning_rate": 9.959493831067783e-06, "loss": 0.4992, "step": 491 }, { "epoch": 0.41079877539660453, "grad_norm": 0.40581899881362915, "learning_rate": 9.958873908307619e-06, "loss": 0.4631, "step": 492 }, { "epoch": 0.4116337322571667, "grad_norm": 0.5468835830688477, "learning_rate": 9.958249297317983e-06, "loss": 0.4823, "step": 493 }, { "epoch": 0.41246868911772894, "grad_norm": 0.5478339791297913, "learning_rate": 9.957619998689399e-06, "loss": 0.5033, "step": 494 }, { "epoch": 0.4133036459782911, "grad_norm": 0.4510931670665741, "learning_rate": 9.956986013016816e-06, "loss": 0.522, "step": 495 }, { "epoch": 0.41413860283885334, "grad_norm": 0.6107034087181091, "learning_rate": 9.956347340899623e-06, "loss": 0.4854, "step": 496 }, { "epoch": 0.4149735596994155, "grad_norm": 0.5170411467552185, "learning_rate": 9.955703982941632e-06, "loss": 0.4676, "step": 497 }, { "epoch": 0.41580851655997775, "grad_norm": 0.4971524178981781, "learning_rate": 9.955055939751085e-06, "loss": 0.5115, "step": 498 }, { "epoch": 0.4166434734205399, "grad_norm": 0.42795321345329285, "learning_rate": 9.954403211940665e-06, "loss": 0.4667, "step": 499 }, { "epoch": 0.41747843028110215, "grad_norm": 0.47338247299194336, "learning_rate": 9.953745800127465e-06, "loss": 0.4714, "step": 500 }, { "epoch": 0.41831338714166433, "grad_norm": 0.4468337595462799, "learning_rate": 9.953083704933024e-06, "loss": 0.4975, "step": 501 }, { "epoch": 0.41914834400222656, "grad_norm": 0.4382770359516144, "learning_rate": 9.9524169269833e-06, "loss": 0.5179, "step": 502 }, { "epoch": 0.41998330086278873, "grad_norm": 0.5566189289093018, "learning_rate": 9.951745466908677e-06, "loss": 0.4869, "step": 503 }, { "epoch": 0.42081825772335096, "grad_norm": 0.44027578830718994, "learning_rate": 9.951069325343972e-06, "loss": 0.4936, "step": 504 }, { "epoch": 0.42165321458391314, "grad_norm": 0.5000719428062439, "learning_rate": 9.950388502928422e-06, "loss": 0.4927, "step": 505 }, { "epoch": 0.42248817144447537, "grad_norm": 0.5281404852867126, "learning_rate": 9.949703000305691e-06, "loss": 0.4653, "step": 506 }, { "epoch": 0.4233231283050376, "grad_norm": 0.46946752071380615, "learning_rate": 9.94901281812387e-06, "loss": 0.484, "step": 507 }, { "epoch": 0.4241580851655998, "grad_norm": 0.5213874578475952, "learning_rate": 9.948317957035474e-06, "loss": 0.4733, "step": 508 }, { "epoch": 0.424993042026162, "grad_norm": 0.5515614748001099, "learning_rate": 9.947618417697436e-06, "loss": 0.4621, "step": 509 }, { "epoch": 0.4258279988867242, "grad_norm": 0.4805491864681244, "learning_rate": 9.946914200771118e-06, "loss": 0.468, "step": 510 }, { "epoch": 0.4266629557472864, "grad_norm": 0.564431369304657, "learning_rate": 9.946205306922304e-06, "loss": 0.4969, "step": 511 }, { "epoch": 0.4274979126078486, "grad_norm": 0.5754250884056091, "learning_rate": 9.945491736821193e-06, "loss": 0.4973, "step": 512 }, { "epoch": 0.4283328694684108, "grad_norm": 0.5643982291221619, "learning_rate": 9.944773491142416e-06, "loss": 0.4785, "step": 513 }, { "epoch": 0.429167826328973, "grad_norm": 0.496324360370636, "learning_rate": 9.944050570565015e-06, "loss": 0.5063, "step": 514 }, { "epoch": 0.4300027831895352, "grad_norm": 0.5531513690948486, "learning_rate": 9.943322975772459e-06, "loss": 0.4673, "step": 515 }, { "epoch": 0.4308377400500974, "grad_norm": 0.4897029995918274, "learning_rate": 9.942590707452627e-06, "loss": 0.4893, "step": 516 }, { "epoch": 0.4316726969106596, "grad_norm": 0.48453277349472046, "learning_rate": 9.941853766297823e-06, "loss": 0.5254, "step": 517 }, { "epoch": 0.4325076537712218, "grad_norm": 0.4741343557834625, "learning_rate": 9.941112153004769e-06, "loss": 0.4503, "step": 518 }, { "epoch": 0.43334261063178403, "grad_norm": 0.5057637095451355, "learning_rate": 9.940365868274602e-06, "loss": 0.4768, "step": 519 }, { "epoch": 0.4341775674923462, "grad_norm": 0.4422067701816559, "learning_rate": 9.939614912812878e-06, "loss": 0.4685, "step": 520 }, { "epoch": 0.43501252435290844, "grad_norm": 0.39381030201911926, "learning_rate": 9.938859287329563e-06, "loss": 0.4609, "step": 521 }, { "epoch": 0.4358474812134706, "grad_norm": 0.5361443758010864, "learning_rate": 9.938098992539045e-06, "loss": 0.4769, "step": 522 }, { "epoch": 0.43668243807403284, "grad_norm": 0.4798068106174469, "learning_rate": 9.937334029160124e-06, "loss": 0.476, "step": 523 }, { "epoch": 0.43751739493459507, "grad_norm": 0.45051512122154236, "learning_rate": 9.936564397916013e-06, "loss": 0.4754, "step": 524 }, { "epoch": 0.43835235179515725, "grad_norm": 0.45725250244140625, "learning_rate": 9.935790099534337e-06, "loss": 0.4807, "step": 525 }, { "epoch": 0.4391873086557195, "grad_norm": 0.41065359115600586, "learning_rate": 9.935011134747135e-06, "loss": 0.4695, "step": 526 }, { "epoch": 0.44002226551628165, "grad_norm": 0.4493233561515808, "learning_rate": 9.934227504290858e-06, "loss": 0.4939, "step": 527 }, { "epoch": 0.4408572223768439, "grad_norm": 0.4703843593597412, "learning_rate": 9.933439208906369e-06, "loss": 0.4987, "step": 528 }, { "epoch": 0.44169217923740606, "grad_norm": 0.4435077905654907, "learning_rate": 9.932646249338937e-06, "loss": 0.4604, "step": 529 }, { "epoch": 0.4425271360979683, "grad_norm": 0.43542513251304626, "learning_rate": 9.931848626338247e-06, "loss": 0.4788, "step": 530 }, { "epoch": 0.44336209295853046, "grad_norm": 0.47570765018463135, "learning_rate": 9.931046340658387e-06, "loss": 0.4457, "step": 531 }, { "epoch": 0.4441970498190927, "grad_norm": 0.4203772246837616, "learning_rate": 9.930239393057859e-06, "loss": 0.4922, "step": 532 }, { "epoch": 0.44503200667965487, "grad_norm": 0.458988219499588, "learning_rate": 9.929427784299565e-06, "loss": 0.5146, "step": 533 }, { "epoch": 0.4458669635402171, "grad_norm": 0.4255950450897217, "learning_rate": 9.92861151515082e-06, "loss": 0.4801, "step": 534 }, { "epoch": 0.4467019204007793, "grad_norm": 0.46640437841415405, "learning_rate": 9.927790586383343e-06, "loss": 0.5141, "step": 535 }, { "epoch": 0.4475368772613415, "grad_norm": 0.41648024320602417, "learning_rate": 9.926964998773257e-06, "loss": 0.4917, "step": 536 }, { "epoch": 0.4483718341219037, "grad_norm": 0.42028307914733887, "learning_rate": 9.926134753101092e-06, "loss": 0.5001, "step": 537 }, { "epoch": 0.4492067909824659, "grad_norm": 0.4498051702976227, "learning_rate": 9.925299850151782e-06, "loss": 0.4909, "step": 538 }, { "epoch": 0.4500417478430281, "grad_norm": 0.43699824810028076, "learning_rate": 9.924460290714659e-06, "loss": 0.4794, "step": 539 }, { "epoch": 0.4508767047035903, "grad_norm": 0.47550198435783386, "learning_rate": 9.923616075583465e-06, "loss": 0.4887, "step": 540 }, { "epoch": 0.45171166156415254, "grad_norm": 0.5020195245742798, "learning_rate": 9.922767205556338e-06, "loss": 0.4851, "step": 541 }, { "epoch": 0.4525466184247147, "grad_norm": 0.4481446146965027, "learning_rate": 9.92191368143582e-06, "loss": 0.4742, "step": 542 }, { "epoch": 0.45338157528527695, "grad_norm": 0.44970929622650146, "learning_rate": 9.921055504028847e-06, "loss": 0.4912, "step": 543 }, { "epoch": 0.4542165321458391, "grad_norm": 0.4207783043384552, "learning_rate": 9.920192674146765e-06, "loss": 0.5001, "step": 544 }, { "epoch": 0.45505148900640136, "grad_norm": 0.4809519350528717, "learning_rate": 9.91932519260531e-06, "loss": 0.4861, "step": 545 }, { "epoch": 0.45588644586696353, "grad_norm": 0.428415447473526, "learning_rate": 9.918453060224617e-06, "loss": 0.4949, "step": 546 }, { "epoch": 0.45672140272752576, "grad_norm": 0.4120035767555237, "learning_rate": 9.91757627782922e-06, "loss": 0.4612, "step": 547 }, { "epoch": 0.45755635958808794, "grad_norm": 0.403005450963974, "learning_rate": 9.916694846248049e-06, "loss": 0.4996, "step": 548 }, { "epoch": 0.45839131644865017, "grad_norm": 0.4218731224536896, "learning_rate": 9.91580876631443e-06, "loss": 0.4979, "step": 549 }, { "epoch": 0.45922627330921234, "grad_norm": 0.4288797080516815, "learning_rate": 9.914918038866083e-06, "loss": 0.4767, "step": 550 }, { "epoch": 0.46006123016977457, "grad_norm": 0.4345029890537262, "learning_rate": 9.914022664745118e-06, "loss": 0.481, "step": 551 }, { "epoch": 0.46089618703033675, "grad_norm": 0.46054255962371826, "learning_rate": 9.913122644798046e-06, "loss": 0.4557, "step": 552 }, { "epoch": 0.461731143890899, "grad_norm": 0.4818127155303955, "learning_rate": 9.91221797987576e-06, "loss": 0.4821, "step": 553 }, { "epoch": 0.46256610075146115, "grad_norm": 0.5487921833992004, "learning_rate": 9.911308670833556e-06, "loss": 0.5049, "step": 554 }, { "epoch": 0.4634010576120234, "grad_norm": 0.44889089465141296, "learning_rate": 9.910394718531113e-06, "loss": 0.4917, "step": 555 }, { "epoch": 0.46423601447258556, "grad_norm": 0.4664883017539978, "learning_rate": 9.909476123832502e-06, "loss": 0.4875, "step": 556 }, { "epoch": 0.4650709713331478, "grad_norm": 0.4119715988636017, "learning_rate": 9.90855288760618e-06, "loss": 0.4879, "step": 557 }, { "epoch": 0.46590592819371, "grad_norm": 0.48207446932792664, "learning_rate": 9.907625010724999e-06, "loss": 0.4749, "step": 558 }, { "epoch": 0.4667408850542722, "grad_norm": 0.40504103899002075, "learning_rate": 9.906692494066194e-06, "loss": 0.4962, "step": 559 }, { "epoch": 0.4675758419148344, "grad_norm": 0.5064045190811157, "learning_rate": 9.905755338511384e-06, "loss": 0.4974, "step": 560 }, { "epoch": 0.4684107987753966, "grad_norm": 0.4480912983417511, "learning_rate": 9.90481354494658e-06, "loss": 0.4909, "step": 561 }, { "epoch": 0.46924575563595883, "grad_norm": 0.5194793939590454, "learning_rate": 9.903867114262173e-06, "loss": 0.4937, "step": 562 }, { "epoch": 0.470080712496521, "grad_norm": 0.4762931168079376, "learning_rate": 9.902916047352937e-06, "loss": 0.4643, "step": 563 }, { "epoch": 0.47091566935708323, "grad_norm": 0.45576316118240356, "learning_rate": 9.901960345118036e-06, "loss": 0.4767, "step": 564 }, { "epoch": 0.4717506262176454, "grad_norm": 0.37840503454208374, "learning_rate": 9.901000008461013e-06, "loss": 0.4804, "step": 565 }, { "epoch": 0.47258558307820764, "grad_norm": 0.5387411117553711, "learning_rate": 9.900035038289786e-06, "loss": 0.4681, "step": 566 }, { "epoch": 0.4734205399387698, "grad_norm": 0.4974973797798157, "learning_rate": 9.899065435516661e-06, "loss": 0.4822, "step": 567 }, { "epoch": 0.47425549679933204, "grad_norm": 0.4118989408016205, "learning_rate": 9.898091201058327e-06, "loss": 0.4809, "step": 568 }, { "epoch": 0.4750904536598942, "grad_norm": 0.54311603307724, "learning_rate": 9.897112335835841e-06, "loss": 0.4714, "step": 569 }, { "epoch": 0.47592541052045645, "grad_norm": 0.5266126394271851, "learning_rate": 9.896128840774646e-06, "loss": 0.4957, "step": 570 }, { "epoch": 0.4767603673810186, "grad_norm": 0.5119727253913879, "learning_rate": 9.895140716804561e-06, "loss": 0.5102, "step": 571 }, { "epoch": 0.47759532424158085, "grad_norm": 0.47271928191185, "learning_rate": 9.894147964859778e-06, "loss": 0.4699, "step": 572 }, { "epoch": 0.47843028110214303, "grad_norm": 0.5108550786972046, "learning_rate": 9.89315058587887e-06, "loss": 0.5021, "step": 573 }, { "epoch": 0.47926523796270526, "grad_norm": 0.48614248633384705, "learning_rate": 9.892148580804777e-06, "loss": 0.4979, "step": 574 }, { "epoch": 0.4801001948232675, "grad_norm": 0.46254661679267883, "learning_rate": 9.891141950584822e-06, "loss": 0.4648, "step": 575 }, { "epoch": 0.48093515168382966, "grad_norm": 0.5451806783676147, "learning_rate": 9.890130696170691e-06, "loss": 0.4777, "step": 576 }, { "epoch": 0.4817701085443919, "grad_norm": 0.4138660728931427, "learning_rate": 9.889114818518449e-06, "loss": 0.4782, "step": 577 }, { "epoch": 0.48260506540495407, "grad_norm": 0.5796020030975342, "learning_rate": 9.888094318588529e-06, "loss": 0.5101, "step": 578 }, { "epoch": 0.4834400222655163, "grad_norm": 0.44987165927886963, "learning_rate": 9.887069197345731e-06, "loss": 0.472, "step": 579 }, { "epoch": 0.4842749791260785, "grad_norm": 0.4997946619987488, "learning_rate": 9.886039455759233e-06, "loss": 0.4732, "step": 580 }, { "epoch": 0.4851099359866407, "grad_norm": 0.5143769383430481, "learning_rate": 9.885005094802572e-06, "loss": 0.4679, "step": 581 }, { "epoch": 0.4859448928472029, "grad_norm": 0.39419740438461304, "learning_rate": 9.883966115453656e-06, "loss": 0.4909, "step": 582 }, { "epoch": 0.4867798497077651, "grad_norm": 0.5588893294334412, "learning_rate": 9.882922518694759e-06, "loss": 0.4845, "step": 583 }, { "epoch": 0.4876148065683273, "grad_norm": 0.43491268157958984, "learning_rate": 9.881874305512522e-06, "loss": 0.4768, "step": 584 }, { "epoch": 0.4884497634288895, "grad_norm": 0.4910503327846527, "learning_rate": 9.880821476897948e-06, "loss": 0.4894, "step": 585 }, { "epoch": 0.4892847202894517, "grad_norm": 0.40759846568107605, "learning_rate": 9.879764033846406e-06, "loss": 0.4822, "step": 586 }, { "epoch": 0.4901196771500139, "grad_norm": 0.4334510564804077, "learning_rate": 9.878701977357623e-06, "loss": 0.4888, "step": 587 }, { "epoch": 0.4909546340105761, "grad_norm": 0.4337920844554901, "learning_rate": 9.877635308435693e-06, "loss": 0.4559, "step": 588 }, { "epoch": 0.4917895908711383, "grad_norm": 0.4275764226913452, "learning_rate": 9.876564028089069e-06, "loss": 0.4771, "step": 589 }, { "epoch": 0.4926245477317005, "grad_norm": 0.4049099385738373, "learning_rate": 9.875488137330562e-06, "loss": 0.4495, "step": 590 }, { "epoch": 0.49345950459226273, "grad_norm": 0.4253717362880707, "learning_rate": 9.874407637177344e-06, "loss": 0.4726, "step": 591 }, { "epoch": 0.49429446145282496, "grad_norm": 0.4935168921947479, "learning_rate": 9.873322528650941e-06, "loss": 0.5091, "step": 592 }, { "epoch": 0.49512941831338714, "grad_norm": 0.4399954378604889, "learning_rate": 9.872232812777243e-06, "loss": 0.4776, "step": 593 }, { "epoch": 0.49596437517394937, "grad_norm": 0.4223841428756714, "learning_rate": 9.871138490586489e-06, "loss": 0.4815, "step": 594 }, { "epoch": 0.49679933203451154, "grad_norm": 0.5275794863700867, "learning_rate": 9.870039563113274e-06, "loss": 0.4711, "step": 595 }, { "epoch": 0.4976342888950738, "grad_norm": 0.4948101341724396, "learning_rate": 9.868936031396552e-06, "loss": 0.4799, "step": 596 }, { "epoch": 0.49846924575563595, "grad_norm": 0.4956100285053253, "learning_rate": 9.867827896479625e-06, "loss": 0.4874, "step": 597 }, { "epoch": 0.4993042026161982, "grad_norm": 0.4871603846549988, "learning_rate": 9.866715159410148e-06, "loss": 0.4737, "step": 598 }, { "epoch": 0.5001391594767604, "grad_norm": 0.48131489753723145, "learning_rate": 9.865597821240128e-06, "loss": 0.4756, "step": 599 }, { "epoch": 0.5009741163373226, "grad_norm": 0.4610348641872406, "learning_rate": 9.864475883025918e-06, "loss": 0.5005, "step": 600 }, { "epoch": 0.5018090731978848, "grad_norm": 0.5485129356384277, "learning_rate": 9.863349345828228e-06, "loss": 0.4831, "step": 601 }, { "epoch": 0.5026440300584469, "grad_norm": 0.4362499713897705, "learning_rate": 9.862218210712108e-06, "loss": 0.4684, "step": 602 }, { "epoch": 0.5034789869190092, "grad_norm": 0.4320901036262512, "learning_rate": 9.861082478746962e-06, "loss": 0.4873, "step": 603 }, { "epoch": 0.5043139437795714, "grad_norm": 0.5048307180404663, "learning_rate": 9.859942151006532e-06, "loss": 0.4855, "step": 604 }, { "epoch": 0.5051489006401336, "grad_norm": 0.45247015357017517, "learning_rate": 9.85879722856891e-06, "loss": 0.4609, "step": 605 }, { "epoch": 0.5059838575006957, "grad_norm": 0.44942599534988403, "learning_rate": 9.857647712516533e-06, "loss": 0.497, "step": 606 }, { "epoch": 0.506818814361258, "grad_norm": 0.5658351182937622, "learning_rate": 9.856493603936179e-06, "loss": 0.4892, "step": 607 }, { "epoch": 0.5076537712218202, "grad_norm": 0.5142644643783569, "learning_rate": 9.855334903918967e-06, "loss": 0.5149, "step": 608 }, { "epoch": 0.5084887280823824, "grad_norm": 0.46926912665367126, "learning_rate": 9.854171613560355e-06, "loss": 0.495, "step": 609 }, { "epoch": 0.5093236849429447, "grad_norm": 0.6163734793663025, "learning_rate": 9.853003733960148e-06, "loss": 0.4995, "step": 610 }, { "epoch": 0.5101586418035068, "grad_norm": 0.5189292430877686, "learning_rate": 9.851831266222482e-06, "loss": 0.4906, "step": 611 }, { "epoch": 0.510993598664069, "grad_norm": 0.5015621781349182, "learning_rate": 9.850654211455837e-06, "loss": 0.4941, "step": 612 }, { "epoch": 0.5118285555246312, "grad_norm": 0.5007957220077515, "learning_rate": 9.849472570773024e-06, "loss": 0.4826, "step": 613 }, { "epoch": 0.5126635123851935, "grad_norm": 0.5850078463554382, "learning_rate": 9.848286345291196e-06, "loss": 0.4862, "step": 614 }, { "epoch": 0.5134984692457556, "grad_norm": 0.4284389019012451, "learning_rate": 9.847095536131832e-06, "loss": 0.4585, "step": 615 }, { "epoch": 0.5143334261063178, "grad_norm": 0.5294229388237, "learning_rate": 9.84590014442075e-06, "loss": 0.5071, "step": 616 }, { "epoch": 0.5151683829668801, "grad_norm": 0.5171497464179993, "learning_rate": 9.844700171288105e-06, "loss": 0.4926, "step": 617 }, { "epoch": 0.5160033398274423, "grad_norm": 0.427854061126709, "learning_rate": 9.843495617868374e-06, "loss": 0.4359, "step": 618 }, { "epoch": 0.5168382966880044, "grad_norm": 0.5055775046348572, "learning_rate": 9.84228648530037e-06, "loss": 0.503, "step": 619 }, { "epoch": 0.5176732535485666, "grad_norm": 0.5210117101669312, "learning_rate": 9.841072774727231e-06, "loss": 0.4778, "step": 620 }, { "epoch": 0.5185082104091289, "grad_norm": 0.44244384765625, "learning_rate": 9.83985448729643e-06, "loss": 0.4869, "step": 621 }, { "epoch": 0.5193431672696911, "grad_norm": 0.5137484073638916, "learning_rate": 9.83863162415976e-06, "loss": 0.4625, "step": 622 }, { "epoch": 0.5201781241302532, "grad_norm": 0.45714500546455383, "learning_rate": 9.837404186473345e-06, "loss": 0.4678, "step": 623 }, { "epoch": 0.5210130809908154, "grad_norm": 0.5547538995742798, "learning_rate": 9.836172175397629e-06, "loss": 0.4624, "step": 624 }, { "epoch": 0.5218480378513777, "grad_norm": 0.43517276644706726, "learning_rate": 9.834935592097386e-06, "loss": 0.4508, "step": 625 }, { "epoch": 0.5226829947119399, "grad_norm": 0.47286728024482727, "learning_rate": 9.833694437741708e-06, "loss": 0.4589, "step": 626 }, { "epoch": 0.5235179515725021, "grad_norm": 0.4390261769294739, "learning_rate": 9.832448713504008e-06, "loss": 0.4607, "step": 627 }, { "epoch": 0.5243529084330643, "grad_norm": 0.45968690514564514, "learning_rate": 9.831198420562026e-06, "loss": 0.464, "step": 628 }, { "epoch": 0.5251878652936265, "grad_norm": 0.4834216237068176, "learning_rate": 9.82994356009781e-06, "loss": 0.4718, "step": 629 }, { "epoch": 0.5260228221541887, "grad_norm": 0.4443848133087158, "learning_rate": 9.828684133297738e-06, "loss": 0.4538, "step": 630 }, { "epoch": 0.526857779014751, "grad_norm": 0.43862414360046387, "learning_rate": 9.827420141352502e-06, "loss": 0.4694, "step": 631 }, { "epoch": 0.5276927358753131, "grad_norm": 0.4450027048587799, "learning_rate": 9.826151585457101e-06, "loss": 0.4605, "step": 632 }, { "epoch": 0.5285276927358753, "grad_norm": 0.47420427203178406, "learning_rate": 9.824878466810861e-06, "loss": 0.4814, "step": 633 }, { "epoch": 0.5293626495964375, "grad_norm": 0.4832122027873993, "learning_rate": 9.823600786617415e-06, "loss": 0.4468, "step": 634 }, { "epoch": 0.5301976064569998, "grad_norm": 0.45137566328048706, "learning_rate": 9.822318546084711e-06, "loss": 0.4925, "step": 635 }, { "epoch": 0.5310325633175619, "grad_norm": 0.45269283652305603, "learning_rate": 9.821031746425004e-06, "loss": 0.4726, "step": 636 }, { "epoch": 0.5318675201781241, "grad_norm": 0.5275819897651672, "learning_rate": 9.81974038885487e-06, "loss": 0.4524, "step": 637 }, { "epoch": 0.5327024770386863, "grad_norm": 0.47871747612953186, "learning_rate": 9.818444474595182e-06, "loss": 0.4949, "step": 638 }, { "epoch": 0.5335374338992486, "grad_norm": 0.4849627912044525, "learning_rate": 9.817144004871127e-06, "loss": 0.4586, "step": 639 }, { "epoch": 0.5343723907598107, "grad_norm": 0.4828311800956726, "learning_rate": 9.8158389809122e-06, "loss": 0.5042, "step": 640 }, { "epoch": 0.5352073476203729, "grad_norm": 0.5203263163566589, "learning_rate": 9.814529403952196e-06, "loss": 0.5014, "step": 641 }, { "epoch": 0.5360423044809352, "grad_norm": 0.4808894395828247, "learning_rate": 9.81321527522922e-06, "loss": 0.4738, "step": 642 }, { "epoch": 0.5368772613414974, "grad_norm": 0.41775837540626526, "learning_rate": 9.81189659598568e-06, "loss": 0.4751, "step": 643 }, { "epoch": 0.5377122182020596, "grad_norm": 0.39123183488845825, "learning_rate": 9.810573367468282e-06, "loss": 0.4786, "step": 644 }, { "epoch": 0.5385471750626217, "grad_norm": 0.4636596143245697, "learning_rate": 9.809245590928035e-06, "loss": 0.4573, "step": 645 }, { "epoch": 0.539382131923184, "grad_norm": 0.46198153495788574, "learning_rate": 9.807913267620253e-06, "loss": 0.4785, "step": 646 }, { "epoch": 0.5402170887837462, "grad_norm": 0.48326998949050903, "learning_rate": 9.806576398804541e-06, "loss": 0.4828, "step": 647 }, { "epoch": 0.5410520456443084, "grad_norm": 0.4998714029788971, "learning_rate": 9.805234985744804e-06, "loss": 0.4865, "step": 648 }, { "epoch": 0.5418870025048705, "grad_norm": 0.43289849162101746, "learning_rate": 9.803889029709243e-06, "loss": 0.4455, "step": 649 }, { "epoch": 0.5427219593654328, "grad_norm": 0.5598911046981812, "learning_rate": 9.802538531970357e-06, "loss": 0.4894, "step": 650 }, { "epoch": 0.543556916225995, "grad_norm": 0.3949972093105316, "learning_rate": 9.801183493804938e-06, "loss": 0.4884, "step": 651 }, { "epoch": 0.5443918730865572, "grad_norm": 0.5172412395477295, "learning_rate": 9.799823916494064e-06, "loss": 0.5044, "step": 652 }, { "epoch": 0.5452268299471194, "grad_norm": 0.5023715496063232, "learning_rate": 9.798459801323113e-06, "loss": 0.5085, "step": 653 }, { "epoch": 0.5460617868076816, "grad_norm": 0.42741382122039795, "learning_rate": 9.79709114958175e-06, "loss": 0.4793, "step": 654 }, { "epoch": 0.5468967436682438, "grad_norm": 0.42247870564460754, "learning_rate": 9.795717962563926e-06, "loss": 0.478, "step": 655 }, { "epoch": 0.547731700528806, "grad_norm": 0.45813509821891785, "learning_rate": 9.794340241567886e-06, "loss": 0.4596, "step": 656 }, { "epoch": 0.5485666573893682, "grad_norm": 0.4962068498134613, "learning_rate": 9.792957987896154e-06, "loss": 0.4759, "step": 657 }, { "epoch": 0.5494016142499304, "grad_norm": 0.38685324788093567, "learning_rate": 9.791571202855548e-06, "loss": 0.4898, "step": 658 }, { "epoch": 0.5502365711104926, "grad_norm": 0.5227807760238647, "learning_rate": 9.790179887757162e-06, "loss": 0.4913, "step": 659 }, { "epoch": 0.5510715279710549, "grad_norm": 0.4249117970466614, "learning_rate": 9.788784043916378e-06, "loss": 0.4871, "step": 660 }, { "epoch": 0.5519064848316171, "grad_norm": 0.353244811296463, "learning_rate": 9.787383672652855e-06, "loss": 0.4599, "step": 661 }, { "epoch": 0.5527414416921792, "grad_norm": 0.4063340723514557, "learning_rate": 9.78597877529054e-06, "loss": 0.4715, "step": 662 }, { "epoch": 0.5535763985527414, "grad_norm": 0.3959580957889557, "learning_rate": 9.784569353157648e-06, "loss": 0.4624, "step": 663 }, { "epoch": 0.5544113554133037, "grad_norm": 0.4124193787574768, "learning_rate": 9.783155407586682e-06, "loss": 0.5021, "step": 664 }, { "epoch": 0.5552463122738659, "grad_norm": 0.5122234225273132, "learning_rate": 9.781736939914416e-06, "loss": 0.4821, "step": 665 }, { "epoch": 0.556081269134428, "grad_norm": 0.4252382218837738, "learning_rate": 9.780313951481904e-06, "loss": 0.4917, "step": 666 }, { "epoch": 0.5569162259949902, "grad_norm": 0.3832370638847351, "learning_rate": 9.778886443634467e-06, "loss": 0.464, "step": 667 }, { "epoch": 0.5577511828555525, "grad_norm": 0.5233418941497803, "learning_rate": 9.777454417721703e-06, "loss": 0.4713, "step": 668 }, { "epoch": 0.5585861397161147, "grad_norm": 0.45024731755256653, "learning_rate": 9.776017875097484e-06, "loss": 0.4658, "step": 669 }, { "epoch": 0.5594210965766768, "grad_norm": 0.4184330701828003, "learning_rate": 9.774576817119945e-06, "loss": 0.4602, "step": 670 }, { "epoch": 0.560256053437239, "grad_norm": 0.42919185757637024, "learning_rate": 9.773131245151499e-06, "loss": 0.4858, "step": 671 }, { "epoch": 0.5610910102978013, "grad_norm": 0.4704931974411011, "learning_rate": 9.771681160558817e-06, "loss": 0.5109, "step": 672 }, { "epoch": 0.5619259671583635, "grad_norm": 0.40483012795448303, "learning_rate": 9.770226564712845e-06, "loss": 0.4545, "step": 673 }, { "epoch": 0.5627609240189256, "grad_norm": 0.4038953483104706, "learning_rate": 9.768767458988789e-06, "loss": 0.4668, "step": 674 }, { "epoch": 0.5635958808794879, "grad_norm": 0.36788299679756165, "learning_rate": 9.767303844766118e-06, "loss": 0.4808, "step": 675 }, { "epoch": 0.5644308377400501, "grad_norm": 0.44056788086891174, "learning_rate": 9.765835723428568e-06, "loss": 0.4594, "step": 676 }, { "epoch": 0.5652657946006123, "grad_norm": 0.5195746421813965, "learning_rate": 9.764363096364131e-06, "loss": 0.4872, "step": 677 }, { "epoch": 0.5661007514611746, "grad_norm": 0.41490909457206726, "learning_rate": 9.762885964965065e-06, "loss": 0.4813, "step": 678 }, { "epoch": 0.5669357083217367, "grad_norm": 0.4786645770072937, "learning_rate": 9.76140433062788e-06, "loss": 0.4555, "step": 679 }, { "epoch": 0.5677706651822989, "grad_norm": 0.398271381855011, "learning_rate": 9.759918194753346e-06, "loss": 0.4538, "step": 680 }, { "epoch": 0.5686056220428611, "grad_norm": 0.44136863946914673, "learning_rate": 9.75842755874649e-06, "loss": 0.4627, "step": 681 }, { "epoch": 0.5694405789034234, "grad_norm": 0.45471829175949097, "learning_rate": 9.756932424016591e-06, "loss": 0.4593, "step": 682 }, { "epoch": 0.5702755357639855, "grad_norm": 0.4336991608142853, "learning_rate": 9.755432791977184e-06, "loss": 0.4869, "step": 683 }, { "epoch": 0.5711104926245477, "grad_norm": 0.49794191122055054, "learning_rate": 9.753928664046055e-06, "loss": 0.4702, "step": 684 }, { "epoch": 0.57194544948511, "grad_norm": 0.4351523518562317, "learning_rate": 9.752420041645237e-06, "loss": 0.5101, "step": 685 }, { "epoch": 0.5727804063456722, "grad_norm": 0.5267941355705261, "learning_rate": 9.750906926201019e-06, "loss": 0.4781, "step": 686 }, { "epoch": 0.5736153632062343, "grad_norm": 0.38915926218032837, "learning_rate": 9.749389319143929e-06, "loss": 0.4694, "step": 687 }, { "epoch": 0.5744503200667965, "grad_norm": 0.46890291571617126, "learning_rate": 9.747867221908751e-06, "loss": 0.4991, "step": 688 }, { "epoch": 0.5752852769273588, "grad_norm": 0.42377325892448425, "learning_rate": 9.746340635934506e-06, "loss": 0.4587, "step": 689 }, { "epoch": 0.576120233787921, "grad_norm": 0.4047020375728607, "learning_rate": 9.744809562664466e-06, "loss": 0.4785, "step": 690 }, { "epoch": 0.5769551906484831, "grad_norm": 0.41083022952079773, "learning_rate": 9.74327400354614e-06, "loss": 0.4868, "step": 691 }, { "epoch": 0.5777901475090453, "grad_norm": 0.41189607977867126, "learning_rate": 9.741733960031278e-06, "loss": 0.4518, "step": 692 }, { "epoch": 0.5786251043696076, "grad_norm": 0.40783554315567017, "learning_rate": 9.740189433575873e-06, "loss": 0.4573, "step": 693 }, { "epoch": 0.5794600612301698, "grad_norm": 0.4201790690422058, "learning_rate": 9.738640425640154e-06, "loss": 0.4632, "step": 694 }, { "epoch": 0.580295018090732, "grad_norm": 0.39082175493240356, "learning_rate": 9.737086937688587e-06, "loss": 0.4979, "step": 695 }, { "epoch": 0.5811299749512941, "grad_norm": 0.42445895075798035, "learning_rate": 9.735528971189876e-06, "loss": 0.4823, "step": 696 }, { "epoch": 0.5819649318118564, "grad_norm": 0.3648286759853363, "learning_rate": 9.733966527616956e-06, "loss": 0.4605, "step": 697 }, { "epoch": 0.5827998886724186, "grad_norm": 0.42091384530067444, "learning_rate": 9.732399608446994e-06, "loss": 0.4723, "step": 698 }, { "epoch": 0.5836348455329808, "grad_norm": 0.41288307309150696, "learning_rate": 9.730828215161395e-06, "loss": 0.4927, "step": 699 }, { "epoch": 0.584469802393543, "grad_norm": 0.4217223525047302, "learning_rate": 9.729252349245783e-06, "loss": 0.4506, "step": 700 }, { "epoch": 0.5853047592541052, "grad_norm": 0.4116039574146271, "learning_rate": 9.727672012190019e-06, "loss": 0.4715, "step": 701 }, { "epoch": 0.5861397161146674, "grad_norm": 0.47954490780830383, "learning_rate": 9.726087205488192e-06, "loss": 0.4538, "step": 702 }, { "epoch": 0.5869746729752296, "grad_norm": 0.4491751492023468, "learning_rate": 9.72449793063861e-06, "loss": 0.4789, "step": 703 }, { "epoch": 0.5878096298357918, "grad_norm": 0.4636964201927185, "learning_rate": 9.722904189143806e-06, "loss": 0.4663, "step": 704 }, { "epoch": 0.588644586696354, "grad_norm": 0.45410990715026855, "learning_rate": 9.721305982510548e-06, "loss": 0.4605, "step": 705 }, { "epoch": 0.5894795435569162, "grad_norm": 0.44210556149482727, "learning_rate": 9.719703312249807e-06, "loss": 0.4682, "step": 706 }, { "epoch": 0.5903145004174785, "grad_norm": 0.4192987382411957, "learning_rate": 9.718096179876784e-06, "loss": 0.4677, "step": 707 }, { "epoch": 0.5911494572780406, "grad_norm": 0.44725504517555237, "learning_rate": 9.716484586910902e-06, "loss": 0.4648, "step": 708 }, { "epoch": 0.5919844141386028, "grad_norm": 0.4102238416671753, "learning_rate": 9.714868534875793e-06, "loss": 0.4315, "step": 709 }, { "epoch": 0.592819370999165, "grad_norm": 0.42504993081092834, "learning_rate": 9.713248025299314e-06, "loss": 0.4997, "step": 710 }, { "epoch": 0.5936543278597273, "grad_norm": 0.4431855380535126, "learning_rate": 9.711623059713522e-06, "loss": 0.453, "step": 711 }, { "epoch": 0.5944892847202895, "grad_norm": 0.42662084102630615, "learning_rate": 9.709993639654702e-06, "loss": 0.489, "step": 712 }, { "epoch": 0.5953242415808516, "grad_norm": 0.4536081850528717, "learning_rate": 9.708359766663342e-06, "loss": 0.4571, "step": 713 }, { "epoch": 0.5961591984414139, "grad_norm": 0.39782166481018066, "learning_rate": 9.706721442284143e-06, "loss": 0.474, "step": 714 }, { "epoch": 0.5969941553019761, "grad_norm": 0.4004703760147095, "learning_rate": 9.70507866806601e-06, "loss": 0.4692, "step": 715 }, { "epoch": 0.5978291121625383, "grad_norm": 0.4198451340198517, "learning_rate": 9.703431445562062e-06, "loss": 0.4794, "step": 716 }, { "epoch": 0.5986640690231004, "grad_norm": 0.46544215083122253, "learning_rate": 9.701779776329618e-06, "loss": 0.4849, "step": 717 }, { "epoch": 0.5994990258836627, "grad_norm": 0.43542200326919556, "learning_rate": 9.700123661930204e-06, "loss": 0.4607, "step": 718 }, { "epoch": 0.6003339827442249, "grad_norm": 0.4188228249549866, "learning_rate": 9.698463103929542e-06, "loss": 0.4575, "step": 719 }, { "epoch": 0.6011689396047871, "grad_norm": 0.44560056924819946, "learning_rate": 9.696798103897567e-06, "loss": 0.4873, "step": 720 }, { "epoch": 0.6020038964653492, "grad_norm": 0.4753422737121582, "learning_rate": 9.695128663408405e-06, "loss": 0.4746, "step": 721 }, { "epoch": 0.6028388533259115, "grad_norm": 0.4154667258262634, "learning_rate": 9.693454784040381e-06, "loss": 0.4766, "step": 722 }, { "epoch": 0.6036738101864737, "grad_norm": 0.4081837832927704, "learning_rate": 9.69177646737602e-06, "loss": 0.4886, "step": 723 }, { "epoch": 0.6045087670470359, "grad_norm": 0.4469962418079376, "learning_rate": 9.690093715002035e-06, "loss": 0.4807, "step": 724 }, { "epoch": 0.605343723907598, "grad_norm": 0.3963901996612549, "learning_rate": 9.688406528509343e-06, "loss": 0.4459, "step": 725 }, { "epoch": 0.6061786807681603, "grad_norm": 0.4649125635623932, "learning_rate": 9.686714909493045e-06, "loss": 0.4899, "step": 726 }, { "epoch": 0.6070136376287225, "grad_norm": 0.4490525722503662, "learning_rate": 9.685018859552434e-06, "loss": 0.4788, "step": 727 }, { "epoch": 0.6078485944892847, "grad_norm": 0.4339929521083832, "learning_rate": 9.683318380290995e-06, "loss": 0.4612, "step": 728 }, { "epoch": 0.608683551349847, "grad_norm": 0.43845656514167786, "learning_rate": 9.6816134733164e-06, "loss": 0.4555, "step": 729 }, { "epoch": 0.6095185082104091, "grad_norm": 0.47418034076690674, "learning_rate": 9.679904140240504e-06, "loss": 0.4741, "step": 730 }, { "epoch": 0.6103534650709713, "grad_norm": 0.4950982332229614, "learning_rate": 9.678190382679346e-06, "loss": 0.4839, "step": 731 }, { "epoch": 0.6111884219315336, "grad_norm": 0.37014228105545044, "learning_rate": 9.676472202253155e-06, "loss": 0.4726, "step": 732 }, { "epoch": 0.6120233787920958, "grad_norm": 0.44101622700691223, "learning_rate": 9.674749600586336e-06, "loss": 0.4772, "step": 733 }, { "epoch": 0.6128583356526579, "grad_norm": 0.4927801787853241, "learning_rate": 9.673022579307473e-06, "loss": 0.4937, "step": 734 }, { "epoch": 0.6136932925132201, "grad_norm": 0.4501895308494568, "learning_rate": 9.671291140049333e-06, "loss": 0.4891, "step": 735 }, { "epoch": 0.6145282493737824, "grad_norm": 0.3939850926399231, "learning_rate": 9.669555284448853e-06, "loss": 0.4625, "step": 736 }, { "epoch": 0.6153632062343446, "grad_norm": 0.44443944096565247, "learning_rate": 9.667815014147153e-06, "loss": 0.466, "step": 737 }, { "epoch": 0.6161981630949067, "grad_norm": 0.4040965139865875, "learning_rate": 9.66607033078952e-06, "loss": 0.4767, "step": 738 }, { "epoch": 0.617033119955469, "grad_norm": 0.4182543456554413, "learning_rate": 9.664321236025421e-06, "loss": 0.47, "step": 739 }, { "epoch": 0.6178680768160312, "grad_norm": 0.4285540282726288, "learning_rate": 9.662567731508486e-06, "loss": 0.4583, "step": 740 }, { "epoch": 0.6187030336765934, "grad_norm": 0.3961468040943146, "learning_rate": 9.660809818896515e-06, "loss": 0.4542, "step": 741 }, { "epoch": 0.6195379905371555, "grad_norm": 0.40714964270591736, "learning_rate": 9.659047499851483e-06, "loss": 0.4713, "step": 742 }, { "epoch": 0.6203729473977178, "grad_norm": 0.46544966101646423, "learning_rate": 9.657280776039521e-06, "loss": 0.4791, "step": 743 }, { "epoch": 0.62120790425828, "grad_norm": 0.3874083161354065, "learning_rate": 9.655509649130932e-06, "loss": 0.4809, "step": 744 }, { "epoch": 0.6220428611188422, "grad_norm": 0.3869558870792389, "learning_rate": 9.653734120800175e-06, "loss": 0.4452, "step": 745 }, { "epoch": 0.6228778179794044, "grad_norm": 0.3953353464603424, "learning_rate": 9.651954192725877e-06, "loss": 0.4779, "step": 746 }, { "epoch": 0.6237127748399666, "grad_norm": 0.42242637276649475, "learning_rate": 9.65016986659082e-06, "loss": 0.4734, "step": 747 }, { "epoch": 0.6245477317005288, "grad_norm": 0.4446393549442291, "learning_rate": 9.648381144081948e-06, "loss": 0.4594, "step": 748 }, { "epoch": 0.625382688561091, "grad_norm": 0.4006747603416443, "learning_rate": 9.646588026890356e-06, "loss": 0.468, "step": 749 }, { "epoch": 0.6262176454216533, "grad_norm": 0.40216684341430664, "learning_rate": 9.644790516711298e-06, "loss": 0.4793, "step": 750 }, { "epoch": 0.6270526022822154, "grad_norm": 0.4407646656036377, "learning_rate": 9.64298861524418e-06, "loss": 0.476, "step": 751 }, { "epoch": 0.6278875591427776, "grad_norm": 0.370244562625885, "learning_rate": 9.641182324192558e-06, "loss": 0.4883, "step": 752 }, { "epoch": 0.6287225160033398, "grad_norm": 0.4204142391681671, "learning_rate": 9.639371645264143e-06, "loss": 0.4599, "step": 753 }, { "epoch": 0.6295574728639021, "grad_norm": 0.4152199923992157, "learning_rate": 9.637556580170788e-06, "loss": 0.4724, "step": 754 }, { "epoch": 0.6303924297244642, "grad_norm": 0.3838709890842438, "learning_rate": 9.635737130628499e-06, "loss": 0.4482, "step": 755 }, { "epoch": 0.6312273865850264, "grad_norm": 0.4278474450111389, "learning_rate": 9.63391329835742e-06, "loss": 0.4733, "step": 756 }, { "epoch": 0.6320623434455886, "grad_norm": 0.4092269837856293, "learning_rate": 9.632085085081847e-06, "loss": 0.4727, "step": 757 }, { "epoch": 0.6328973003061509, "grad_norm": 0.4550930857658386, "learning_rate": 9.63025249253021e-06, "loss": 0.4659, "step": 758 }, { "epoch": 0.633732257166713, "grad_norm": 0.43368205428123474, "learning_rate": 9.628415522435082e-06, "loss": 0.4628, "step": 759 }, { "epoch": 0.6345672140272752, "grad_norm": 0.5090607404708862, "learning_rate": 9.626574176533178e-06, "loss": 0.481, "step": 760 }, { "epoch": 0.6354021708878375, "grad_norm": 0.3853616714477539, "learning_rate": 9.624728456565347e-06, "loss": 0.4595, "step": 761 }, { "epoch": 0.6362371277483997, "grad_norm": 0.44478297233581543, "learning_rate": 9.622878364276572e-06, "loss": 0.4952, "step": 762 }, { "epoch": 0.6370720846089619, "grad_norm": 0.48612120747566223, "learning_rate": 9.621023901415974e-06, "loss": 0.4763, "step": 763 }, { "epoch": 0.637907041469524, "grad_norm": 0.4346878230571747, "learning_rate": 9.619165069736799e-06, "loss": 0.4974, "step": 764 }, { "epoch": 0.6387419983300863, "grad_norm": 0.46708202362060547, "learning_rate": 9.617301870996432e-06, "loss": 0.4856, "step": 765 }, { "epoch": 0.6395769551906485, "grad_norm": 0.45261701941490173, "learning_rate": 9.615434306956379e-06, "loss": 0.4705, "step": 766 }, { "epoch": 0.6404119120512107, "grad_norm": 0.46454328298568726, "learning_rate": 9.613562379382281e-06, "loss": 0.4608, "step": 767 }, { "epoch": 0.6412468689117728, "grad_norm": 0.4159564971923828, "learning_rate": 9.611686090043895e-06, "loss": 0.4454, "step": 768 }, { "epoch": 0.6420818257723351, "grad_norm": 0.3893854022026062, "learning_rate": 9.60980544071511e-06, "loss": 0.4298, "step": 769 }, { "epoch": 0.6429167826328973, "grad_norm": 0.393304705619812, "learning_rate": 9.607920433173931e-06, "loss": 0.4679, "step": 770 }, { "epoch": 0.6437517394934595, "grad_norm": 0.39543086290359497, "learning_rate": 9.606031069202489e-06, "loss": 0.4719, "step": 771 }, { "epoch": 0.6445866963540217, "grad_norm": 0.3634507954120636, "learning_rate": 9.604137350587028e-06, "loss": 0.4742, "step": 772 }, { "epoch": 0.6454216532145839, "grad_norm": 0.3816275894641876, "learning_rate": 9.602239279117913e-06, "loss": 0.4614, "step": 773 }, { "epoch": 0.6462566100751461, "grad_norm": 0.3874513804912567, "learning_rate": 9.600336856589622e-06, "loss": 0.424, "step": 774 }, { "epoch": 0.6470915669357084, "grad_norm": 0.3658056855201721, "learning_rate": 9.598430084800749e-06, "loss": 0.4597, "step": 775 }, { "epoch": 0.6479265237962706, "grad_norm": 0.3814997375011444, "learning_rate": 9.596518965553996e-06, "loss": 0.4549, "step": 776 }, { "epoch": 0.6487614806568327, "grad_norm": 0.38070234656333923, "learning_rate": 9.59460350065618e-06, "loss": 0.4558, "step": 777 }, { "epoch": 0.6495964375173949, "grad_norm": 0.3997623324394226, "learning_rate": 9.592683691918223e-06, "loss": 0.4695, "step": 778 }, { "epoch": 0.6504313943779572, "grad_norm": 0.4008244574069977, "learning_rate": 9.590759541155155e-06, "loss": 0.473, "step": 779 }, { "epoch": 0.6512663512385194, "grad_norm": 0.4273325204849243, "learning_rate": 9.58883105018611e-06, "loss": 0.4824, "step": 780 }, { "epoch": 0.6521013080990815, "grad_norm": 0.39132124185562134, "learning_rate": 9.586898220834326e-06, "loss": 0.4775, "step": 781 }, { "epoch": 0.6529362649596437, "grad_norm": 0.4021581709384918, "learning_rate": 9.584961054927145e-06, "loss": 0.493, "step": 782 }, { "epoch": 0.653771221820206, "grad_norm": 0.3909318149089813, "learning_rate": 9.583019554296004e-06, "loss": 0.484, "step": 783 }, { "epoch": 0.6546061786807682, "grad_norm": 0.4251841604709625, "learning_rate": 9.581073720776442e-06, "loss": 0.4611, "step": 784 }, { "epoch": 0.6554411355413303, "grad_norm": 0.4117283523082733, "learning_rate": 9.579123556208094e-06, "loss": 0.494, "step": 785 }, { "epoch": 0.6562760924018926, "grad_norm": 0.4113926291465759, "learning_rate": 9.577169062434687e-06, "loss": 0.4445, "step": 786 }, { "epoch": 0.6571110492624548, "grad_norm": 0.3839029371738434, "learning_rate": 9.575210241304045e-06, "loss": 0.491, "step": 787 }, { "epoch": 0.657946006123017, "grad_norm": 0.4759558439254761, "learning_rate": 9.573247094668079e-06, "loss": 0.481, "step": 788 }, { "epoch": 0.6587809629835791, "grad_norm": 0.5296850204467773, "learning_rate": 9.571279624382794e-06, "loss": 0.4812, "step": 789 }, { "epoch": 0.6596159198441414, "grad_norm": 0.41367217898368835, "learning_rate": 9.569307832308276e-06, "loss": 0.4557, "step": 790 }, { "epoch": 0.6604508767047036, "grad_norm": 0.4981672167778015, "learning_rate": 9.567331720308704e-06, "loss": 0.4753, "step": 791 }, { "epoch": 0.6612858335652658, "grad_norm": 0.40773704648017883, "learning_rate": 9.565351290252339e-06, "loss": 0.4718, "step": 792 }, { "epoch": 0.662120790425828, "grad_norm": 0.5292509198188782, "learning_rate": 9.56336654401152e-06, "loss": 0.4696, "step": 793 }, { "epoch": 0.6629557472863902, "grad_norm": 0.43054771423339844, "learning_rate": 9.561377483462672e-06, "loss": 0.5009, "step": 794 }, { "epoch": 0.6637907041469524, "grad_norm": 0.4144614338874817, "learning_rate": 9.559384110486298e-06, "loss": 0.4739, "step": 795 }, { "epoch": 0.6646256610075146, "grad_norm": 0.47468647360801697, "learning_rate": 9.557386426966977e-06, "loss": 0.4683, "step": 796 }, { "epoch": 0.6654606178680769, "grad_norm": 0.4198920428752899, "learning_rate": 9.555384434793362e-06, "loss": 0.4667, "step": 797 }, { "epoch": 0.666295574728639, "grad_norm": 0.4371638000011444, "learning_rate": 9.553378135858181e-06, "loss": 0.4648, "step": 798 }, { "epoch": 0.6671305315892012, "grad_norm": 0.44433271884918213, "learning_rate": 9.551367532058234e-06, "loss": 0.4831, "step": 799 }, { "epoch": 0.6679654884497634, "grad_norm": 0.4292042553424835, "learning_rate": 9.549352625294391e-06, "loss": 0.4337, "step": 800 }, { "epoch": 0.6688004453103257, "grad_norm": 0.4677937626838684, "learning_rate": 9.547333417471589e-06, "loss": 0.4861, "step": 801 }, { "epoch": 0.6696354021708878, "grad_norm": 0.43571165204048157, "learning_rate": 9.545309910498832e-06, "loss": 0.4653, "step": 802 }, { "epoch": 0.67047035903145, "grad_norm": 0.46817219257354736, "learning_rate": 9.543282106289187e-06, "loss": 0.4821, "step": 803 }, { "epoch": 0.6713053158920123, "grad_norm": 0.45261621475219727, "learning_rate": 9.541250006759788e-06, "loss": 0.4574, "step": 804 }, { "epoch": 0.6721402727525745, "grad_norm": 0.45064249634742737, "learning_rate": 9.539213613831823e-06, "loss": 0.4735, "step": 805 }, { "epoch": 0.6729752296131366, "grad_norm": 0.45948299765586853, "learning_rate": 9.53717292943055e-06, "loss": 0.4718, "step": 806 }, { "epoch": 0.6738101864736988, "grad_norm": 0.4406224489212036, "learning_rate": 9.535127955485269e-06, "loss": 0.452, "step": 807 }, { "epoch": 0.6746451433342611, "grad_norm": 0.4829123318195343, "learning_rate": 9.533078693929348e-06, "loss": 0.4715, "step": 808 }, { "epoch": 0.6754801001948233, "grad_norm": 0.5536662936210632, "learning_rate": 9.531025146700205e-06, "loss": 0.4684, "step": 809 }, { "epoch": 0.6763150570553855, "grad_norm": 0.49620160460472107, "learning_rate": 9.528967315739308e-06, "loss": 0.452, "step": 810 }, { "epoch": 0.6771500139159476, "grad_norm": 0.5576233863830566, "learning_rate": 9.526905202992177e-06, "loss": 0.4872, "step": 811 }, { "epoch": 0.6779849707765099, "grad_norm": 0.6092559099197388, "learning_rate": 9.524838810408377e-06, "loss": 0.4802, "step": 812 }, { "epoch": 0.6788199276370721, "grad_norm": 0.43291541934013367, "learning_rate": 9.522768139941522e-06, "loss": 0.4945, "step": 813 }, { "epoch": 0.6796548844976343, "grad_norm": 0.5091139674186707, "learning_rate": 9.520693193549272e-06, "loss": 0.4809, "step": 814 }, { "epoch": 0.6804898413581965, "grad_norm": 0.44682493805885315, "learning_rate": 9.518613973193326e-06, "loss": 0.4768, "step": 815 }, { "epoch": 0.6813247982187587, "grad_norm": 0.4085545539855957, "learning_rate": 9.516530480839423e-06, "loss": 0.4665, "step": 816 }, { "epoch": 0.6821597550793209, "grad_norm": 0.5111335515975952, "learning_rate": 9.514442718457347e-06, "loss": 0.4535, "step": 817 }, { "epoch": 0.6829947119398831, "grad_norm": 0.4013584852218628, "learning_rate": 9.512350688020913e-06, "loss": 0.4611, "step": 818 }, { "epoch": 0.6838296688004453, "grad_norm": 0.42560169100761414, "learning_rate": 9.510254391507971e-06, "loss": 0.4792, "step": 819 }, { "epoch": 0.6846646256610075, "grad_norm": 0.4721892774105072, "learning_rate": 9.50815383090041e-06, "loss": 0.5055, "step": 820 }, { "epoch": 0.6854995825215697, "grad_norm": 0.4325023591518402, "learning_rate": 9.506049008184145e-06, "loss": 0.4611, "step": 821 }, { "epoch": 0.686334539382132, "grad_norm": 0.4085875451564789, "learning_rate": 9.503939925349121e-06, "loss": 0.4841, "step": 822 }, { "epoch": 0.6871694962426941, "grad_norm": 0.3837839663028717, "learning_rate": 9.501826584389313e-06, "loss": 0.5014, "step": 823 }, { "epoch": 0.6880044531032563, "grad_norm": 0.4190601706504822, "learning_rate": 9.49970898730272e-06, "loss": 0.4784, "step": 824 }, { "epoch": 0.6888394099638185, "grad_norm": 0.437589555978775, "learning_rate": 9.497587136091364e-06, "loss": 0.4905, "step": 825 }, { "epoch": 0.6896743668243808, "grad_norm": 0.43172556161880493, "learning_rate": 9.495461032761292e-06, "loss": 0.4725, "step": 826 }, { "epoch": 0.690509323684943, "grad_norm": 0.4341433644294739, "learning_rate": 9.493330679322572e-06, "loss": 0.4825, "step": 827 }, { "epoch": 0.6913442805455051, "grad_norm": 0.42435115575790405, "learning_rate": 9.49119607778928e-06, "loss": 0.455, "step": 828 }, { "epoch": 0.6921792374060673, "grad_norm": 0.46395447850227356, "learning_rate": 9.489057230179522e-06, "loss": 0.4929, "step": 829 }, { "epoch": 0.6930141942666296, "grad_norm": 0.4252876043319702, "learning_rate": 9.48691413851541e-06, "loss": 0.484, "step": 830 }, { "epoch": 0.6938491511271918, "grad_norm": 0.4238281548023224, "learning_rate": 9.484766804823071e-06, "loss": 0.4836, "step": 831 }, { "epoch": 0.6946841079877539, "grad_norm": 0.4382442831993103, "learning_rate": 9.48261523113264e-06, "loss": 0.4667, "step": 832 }, { "epoch": 0.6955190648483162, "grad_norm": 0.4379183053970337, "learning_rate": 9.480459419478266e-06, "loss": 0.5063, "step": 833 }, { "epoch": 0.6963540217088784, "grad_norm": 0.4334677755832672, "learning_rate": 9.478299371898097e-06, "loss": 0.4577, "step": 834 }, { "epoch": 0.6971889785694406, "grad_norm": 0.43183115124702454, "learning_rate": 9.476135090434292e-06, "loss": 0.445, "step": 835 }, { "epoch": 0.6980239354300027, "grad_norm": 0.40232813358306885, "learning_rate": 9.473966577133012e-06, "loss": 0.4854, "step": 836 }, { "epoch": 0.698858892290565, "grad_norm": 0.4307063817977905, "learning_rate": 9.471793834044416e-06, "loss": 0.4727, "step": 837 }, { "epoch": 0.6996938491511272, "grad_norm": 0.4347226321697235, "learning_rate": 9.469616863222664e-06, "loss": 0.4572, "step": 838 }, { "epoch": 0.7005288060116894, "grad_norm": 0.4483940899372101, "learning_rate": 9.467435666725912e-06, "loss": 0.4622, "step": 839 }, { "epoch": 0.7013637628722516, "grad_norm": 0.48568394780158997, "learning_rate": 9.465250246616311e-06, "loss": 0.4776, "step": 840 }, { "epoch": 0.7021987197328138, "grad_norm": 0.401965856552124, "learning_rate": 9.463060604960008e-06, "loss": 0.4608, "step": 841 }, { "epoch": 0.703033676593376, "grad_norm": 0.5018912553787231, "learning_rate": 9.460866743827137e-06, "loss": 0.4863, "step": 842 }, { "epoch": 0.7038686334539382, "grad_norm": 0.3868286907672882, "learning_rate": 9.458668665291826e-06, "loss": 0.4751, "step": 843 }, { "epoch": 0.7047035903145005, "grad_norm": 0.40315359830856323, "learning_rate": 9.456466371432183e-06, "loss": 0.4699, "step": 844 }, { "epoch": 0.7055385471750626, "grad_norm": 0.4154112637042999, "learning_rate": 9.454259864330307e-06, "loss": 0.4798, "step": 845 }, { "epoch": 0.7063735040356248, "grad_norm": 0.4051370918750763, "learning_rate": 9.452049146072278e-06, "loss": 0.46, "step": 846 }, { "epoch": 0.707208460896187, "grad_norm": 0.38432568311691284, "learning_rate": 9.449834218748161e-06, "loss": 0.4525, "step": 847 }, { "epoch": 0.7080434177567493, "grad_norm": 0.5194817185401917, "learning_rate": 9.447615084451995e-06, "loss": 0.48, "step": 848 }, { "epoch": 0.7088783746173114, "grad_norm": 0.3982996940612793, "learning_rate": 9.4453917452818e-06, "loss": 0.4532, "step": 849 }, { "epoch": 0.7097133314778736, "grad_norm": 0.43118464946746826, "learning_rate": 9.443164203339569e-06, "loss": 0.4528, "step": 850 }, { "epoch": 0.7105482883384359, "grad_norm": 0.46074217557907104, "learning_rate": 9.44093246073127e-06, "loss": 0.4688, "step": 851 }, { "epoch": 0.7113832451989981, "grad_norm": 0.44956204295158386, "learning_rate": 9.438696519566843e-06, "loss": 0.481, "step": 852 }, { "epoch": 0.7122182020595602, "grad_norm": 0.4756675660610199, "learning_rate": 9.436456381960195e-06, "loss": 0.4865, "step": 853 }, { "epoch": 0.7130531589201224, "grad_norm": 0.4341013729572296, "learning_rate": 9.434212050029203e-06, "loss": 0.4545, "step": 854 }, { "epoch": 0.7138881157806847, "grad_norm": 0.3966464698314667, "learning_rate": 9.431963525895709e-06, "loss": 0.4421, "step": 855 }, { "epoch": 0.7147230726412469, "grad_norm": 0.4386259913444519, "learning_rate": 9.429710811685515e-06, "loss": 0.4948, "step": 856 }, { "epoch": 0.715558029501809, "grad_norm": 0.44995275139808655, "learning_rate": 9.427453909528389e-06, "loss": 0.4582, "step": 857 }, { "epoch": 0.7163929863623713, "grad_norm": 0.4163367450237274, "learning_rate": 9.425192821558056e-06, "loss": 0.4763, "step": 858 }, { "epoch": 0.7172279432229335, "grad_norm": 0.4039832651615143, "learning_rate": 9.422927549912197e-06, "loss": 0.4639, "step": 859 }, { "epoch": 0.7180629000834957, "grad_norm": 0.4252856969833374, "learning_rate": 9.420658096732453e-06, "loss": 0.4897, "step": 860 }, { "epoch": 0.718897856944058, "grad_norm": 0.41462573409080505, "learning_rate": 9.418384464164413e-06, "loss": 0.4664, "step": 861 }, { "epoch": 0.7197328138046201, "grad_norm": 0.36578169465065, "learning_rate": 9.416106654357623e-06, "loss": 0.4639, "step": 862 }, { "epoch": 0.7205677706651823, "grad_norm": 0.4316444396972656, "learning_rate": 9.413824669465572e-06, "loss": 0.4482, "step": 863 }, { "epoch": 0.7214027275257445, "grad_norm": 0.4237957000732422, "learning_rate": 9.4115385116457e-06, "loss": 0.4589, "step": 864 }, { "epoch": 0.7222376843863068, "grad_norm": 0.3983316719532013, "learning_rate": 9.40924818305939e-06, "loss": 0.4879, "step": 865 }, { "epoch": 0.7230726412468689, "grad_norm": 0.49997004866600037, "learning_rate": 9.40695368587197e-06, "loss": 0.4883, "step": 866 }, { "epoch": 0.7239075981074311, "grad_norm": 0.4744362533092499, "learning_rate": 9.404655022252711e-06, "loss": 0.4466, "step": 867 }, { "epoch": 0.7247425549679933, "grad_norm": 0.41913872957229614, "learning_rate": 9.402352194374819e-06, "loss": 0.465, "step": 868 }, { "epoch": 0.7255775118285556, "grad_norm": 0.4491331875324249, "learning_rate": 9.400045204415438e-06, "loss": 0.488, "step": 869 }, { "epoch": 0.7264124686891177, "grad_norm": 0.4791271686553955, "learning_rate": 9.397734054555647e-06, "loss": 0.4895, "step": 870 }, { "epoch": 0.7272474255496799, "grad_norm": 0.41590768098831177, "learning_rate": 9.395418746980461e-06, "loss": 0.487, "step": 871 }, { "epoch": 0.7280823824102421, "grad_norm": 0.47427603602409363, "learning_rate": 9.393099283878822e-06, "loss": 0.4674, "step": 872 }, { "epoch": 0.7289173392708044, "grad_norm": 0.42695558071136475, "learning_rate": 9.390775667443602e-06, "loss": 0.4683, "step": 873 }, { "epoch": 0.7297522961313665, "grad_norm": 0.39778396487236023, "learning_rate": 9.388447899871596e-06, "loss": 0.4555, "step": 874 }, { "epoch": 0.7305872529919287, "grad_norm": 0.5000306963920593, "learning_rate": 9.386115983363533e-06, "loss": 0.4594, "step": 875 }, { "epoch": 0.731422209852491, "grad_norm": 0.3871552050113678, "learning_rate": 9.383779920124055e-06, "loss": 0.4306, "step": 876 }, { "epoch": 0.7322571667130532, "grad_norm": 0.44250211119651794, "learning_rate": 9.381439712361729e-06, "loss": 0.4789, "step": 877 }, { "epoch": 0.7330921235736154, "grad_norm": 0.3736979067325592, "learning_rate": 9.379095362289037e-06, "loss": 0.4345, "step": 878 }, { "epoch": 0.7339270804341775, "grad_norm": 0.4524669647216797, "learning_rate": 9.376746872122384e-06, "loss": 0.468, "step": 879 }, { "epoch": 0.7347620372947398, "grad_norm": 0.40606340765953064, "learning_rate": 9.374394244082083e-06, "loss": 0.433, "step": 880 }, { "epoch": 0.735596994155302, "grad_norm": 0.40946364402770996, "learning_rate": 9.37203748039236e-06, "loss": 0.455, "step": 881 }, { "epoch": 0.7364319510158642, "grad_norm": 0.42627713084220886, "learning_rate": 9.36967658328135e-06, "loss": 0.4668, "step": 882 }, { "epoch": 0.7372669078764263, "grad_norm": 0.4324961304664612, "learning_rate": 9.367311554981102e-06, "loss": 0.4841, "step": 883 }, { "epoch": 0.7381018647369886, "grad_norm": 0.3595356047153473, "learning_rate": 9.364942397727563e-06, "loss": 0.4705, "step": 884 }, { "epoch": 0.7389368215975508, "grad_norm": 0.48531681299209595, "learning_rate": 9.362569113760588e-06, "loss": 0.4709, "step": 885 }, { "epoch": 0.739771778458113, "grad_norm": 0.3733944892883301, "learning_rate": 9.360191705323933e-06, "loss": 0.476, "step": 886 }, { "epoch": 0.7406067353186752, "grad_norm": 0.3969453275203705, "learning_rate": 9.357810174665249e-06, "loss": 0.4706, "step": 887 }, { "epoch": 0.7414416921792374, "grad_norm": 0.41313809156417847, "learning_rate": 9.355424524036094e-06, "loss": 0.4533, "step": 888 }, { "epoch": 0.7422766490397996, "grad_norm": 0.4675268232822418, "learning_rate": 9.353034755691909e-06, "loss": 0.4649, "step": 889 }, { "epoch": 0.7431116059003618, "grad_norm": 0.4099506735801697, "learning_rate": 9.350640871892039e-06, "loss": 0.4818, "step": 890 }, { "epoch": 0.743946562760924, "grad_norm": 0.46830853819847107, "learning_rate": 9.34824287489971e-06, "loss": 0.4662, "step": 891 }, { "epoch": 0.7447815196214862, "grad_norm": 0.3746400475502014, "learning_rate": 9.345840766982048e-06, "loss": 0.4979, "step": 892 }, { "epoch": 0.7456164764820484, "grad_norm": 0.43918663263320923, "learning_rate": 9.343434550410053e-06, "loss": 0.4614, "step": 893 }, { "epoch": 0.7464514333426107, "grad_norm": 0.44036760926246643, "learning_rate": 9.341024227458617e-06, "loss": 0.4628, "step": 894 }, { "epoch": 0.7472863902031729, "grad_norm": 0.40808165073394775, "learning_rate": 9.338609800406516e-06, "loss": 0.4545, "step": 895 }, { "epoch": 0.748121347063735, "grad_norm": 0.41240692138671875, "learning_rate": 9.3361912715364e-06, "loss": 0.4626, "step": 896 }, { "epoch": 0.7489563039242972, "grad_norm": 0.36688148975372314, "learning_rate": 9.333768643134801e-06, "loss": 0.4495, "step": 897 }, { "epoch": 0.7497912607848595, "grad_norm": 0.39690157771110535, "learning_rate": 9.331341917492125e-06, "loss": 0.4834, "step": 898 }, { "epoch": 0.7506262176454217, "grad_norm": 0.4003102779388428, "learning_rate": 9.328911096902653e-06, "loss": 0.4556, "step": 899 }, { "epoch": 0.7514611745059838, "grad_norm": 0.38743120431900024, "learning_rate": 9.326476183664535e-06, "loss": 0.4708, "step": 900 }, { "epoch": 0.752296131366546, "grad_norm": 0.4151012599468231, "learning_rate": 9.324037180079795e-06, "loss": 0.464, "step": 901 }, { "epoch": 0.7531310882271083, "grad_norm": 0.4199081063270569, "learning_rate": 9.321594088454318e-06, "loss": 0.4559, "step": 902 }, { "epoch": 0.7539660450876705, "grad_norm": 0.4058009088039398, "learning_rate": 9.319146911097858e-06, "loss": 0.4439, "step": 903 }, { "epoch": 0.7548010019482326, "grad_norm": 0.4924733340740204, "learning_rate": 9.316695650324034e-06, "loss": 0.4482, "step": 904 }, { "epoch": 0.7556359588087949, "grad_norm": 0.48145240545272827, "learning_rate": 9.314240308450318e-06, "loss": 0.4726, "step": 905 }, { "epoch": 0.7564709156693571, "grad_norm": 0.38966473937034607, "learning_rate": 9.311780887798046e-06, "loss": 0.4455, "step": 906 }, { "epoch": 0.7573058725299193, "grad_norm": 0.4745093286037445, "learning_rate": 9.30931739069241e-06, "loss": 0.4407, "step": 907 }, { "epoch": 0.7581408293904814, "grad_norm": 0.494422048330307, "learning_rate": 9.306849819462453e-06, "loss": 0.4543, "step": 908 }, { "epoch": 0.7589757862510437, "grad_norm": 0.3481920659542084, "learning_rate": 9.304378176441076e-06, "loss": 0.4342, "step": 909 }, { "epoch": 0.7598107431116059, "grad_norm": 0.46605372428894043, "learning_rate": 9.301902463965018e-06, "loss": 0.4763, "step": 910 }, { "epoch": 0.7606456999721681, "grad_norm": 0.4311828017234802, "learning_rate": 9.299422684374879e-06, "loss": 0.47, "step": 911 }, { "epoch": 0.7614806568327304, "grad_norm": 0.41734200716018677, "learning_rate": 9.296938840015094e-06, "loss": 0.4641, "step": 912 }, { "epoch": 0.7623156136932925, "grad_norm": 0.44150346517562866, "learning_rate": 9.294450933233946e-06, "loss": 0.489, "step": 913 }, { "epoch": 0.7631505705538547, "grad_norm": 0.38190528750419617, "learning_rate": 9.291958966383556e-06, "loss": 0.4546, "step": 914 }, { "epoch": 0.7639855274144169, "grad_norm": 0.43538495898246765, "learning_rate": 9.289462941819887e-06, "loss": 0.4856, "step": 915 }, { "epoch": 0.7648204842749792, "grad_norm": 0.4032411277294159, "learning_rate": 9.286962861902735e-06, "loss": 0.483, "step": 916 }, { "epoch": 0.7656554411355413, "grad_norm": 0.44320106506347656, "learning_rate": 9.28445872899573e-06, "loss": 0.4816, "step": 917 }, { "epoch": 0.7664903979961035, "grad_norm": 0.37622153759002686, "learning_rate": 9.281950545466336e-06, "loss": 0.4606, "step": 918 }, { "epoch": 0.7673253548566658, "grad_norm": 0.4994167983531952, "learning_rate": 9.279438313685847e-06, "loss": 0.4493, "step": 919 }, { "epoch": 0.768160311717228, "grad_norm": 0.40796032547950745, "learning_rate": 9.27692203602938e-06, "loss": 0.4467, "step": 920 }, { "epoch": 0.7689952685777901, "grad_norm": 0.44303077459335327, "learning_rate": 9.274401714875878e-06, "loss": 0.4529, "step": 921 }, { "epoch": 0.7698302254383523, "grad_norm": 0.4783404469490051, "learning_rate": 9.271877352608112e-06, "loss": 0.4608, "step": 922 }, { "epoch": 0.7706651822989146, "grad_norm": 0.40851864218711853, "learning_rate": 9.26934895161267e-06, "loss": 0.4906, "step": 923 }, { "epoch": 0.7715001391594768, "grad_norm": 0.4493561387062073, "learning_rate": 9.266816514279958e-06, "loss": 0.4535, "step": 924 }, { "epoch": 0.7723350960200389, "grad_norm": 0.3898598551750183, "learning_rate": 9.264280043004197e-06, "loss": 0.4755, "step": 925 }, { "epoch": 0.7731700528806011, "grad_norm": 0.4364062249660492, "learning_rate": 9.261739540183425e-06, "loss": 0.4444, "step": 926 }, { "epoch": 0.7740050097411634, "grad_norm": 0.39593759179115295, "learning_rate": 9.25919500821949e-06, "loss": 0.4711, "step": 927 }, { "epoch": 0.7748399666017256, "grad_norm": 0.3943444490432739, "learning_rate": 9.256646449518047e-06, "loss": 0.4299, "step": 928 }, { "epoch": 0.7756749234622878, "grad_norm": 0.4089711308479309, "learning_rate": 9.254093866488563e-06, "loss": 0.4964, "step": 929 }, { "epoch": 0.77650988032285, "grad_norm": 0.3707490563392639, "learning_rate": 9.251537261544301e-06, "loss": 0.4326, "step": 930 }, { "epoch": 0.7773448371834122, "grad_norm": 0.44992759823799133, "learning_rate": 9.24897663710234e-06, "loss": 0.4936, "step": 931 }, { "epoch": 0.7781797940439744, "grad_norm": 0.4186168611049652, "learning_rate": 9.246411995583544e-06, "loss": 0.5139, "step": 932 }, { "epoch": 0.7790147509045366, "grad_norm": 0.38849788904190063, "learning_rate": 9.243843339412586e-06, "loss": 0.4793, "step": 933 }, { "epoch": 0.7798497077650988, "grad_norm": 0.431294322013855, "learning_rate": 9.24127067101793e-06, "loss": 0.4817, "step": 934 }, { "epoch": 0.780684664625661, "grad_norm": 0.4369443655014038, "learning_rate": 9.238693992831833e-06, "loss": 0.4874, "step": 935 }, { "epoch": 0.7815196214862232, "grad_norm": 0.36805614829063416, "learning_rate": 9.236113307290345e-06, "loss": 0.4419, "step": 936 }, { "epoch": 0.7823545783467855, "grad_norm": 0.3909826874732971, "learning_rate": 9.233528616833303e-06, "loss": 0.4822, "step": 937 }, { "epoch": 0.7831895352073476, "grad_norm": 0.37111538648605347, "learning_rate": 9.230939923904328e-06, "loss": 0.463, "step": 938 }, { "epoch": 0.7840244920679098, "grad_norm": 0.39355406165122986, "learning_rate": 9.22834723095083e-06, "loss": 0.4527, "step": 939 }, { "epoch": 0.784859448928472, "grad_norm": 0.4034828841686249, "learning_rate": 9.225750540424e-06, "loss": 0.4704, "step": 940 }, { "epoch": 0.7856944057890343, "grad_norm": 0.348165363073349, "learning_rate": 9.223149854778805e-06, "loss": 0.4377, "step": 941 }, { "epoch": 0.7865293626495964, "grad_norm": 0.41694581508636475, "learning_rate": 9.220545176473992e-06, "loss": 0.4602, "step": 942 }, { "epoch": 0.7873643195101586, "grad_norm": 0.446853369474411, "learning_rate": 9.217936507972081e-06, "loss": 0.4712, "step": 943 }, { "epoch": 0.7881992763707208, "grad_norm": 0.4323854148387909, "learning_rate": 9.215323851739365e-06, "loss": 0.4825, "step": 944 }, { "epoch": 0.7890342332312831, "grad_norm": 0.4398422837257385, "learning_rate": 9.212707210245908e-06, "loss": 0.451, "step": 945 }, { "epoch": 0.7898691900918453, "grad_norm": 0.40688851475715637, "learning_rate": 9.210086585965538e-06, "loss": 0.5033, "step": 946 }, { "epoch": 0.7907041469524074, "grad_norm": 0.40553659200668335, "learning_rate": 9.207461981375855e-06, "loss": 0.4561, "step": 947 }, { "epoch": 0.7915391038129697, "grad_norm": 0.47492164373397827, "learning_rate": 9.204833398958218e-06, "loss": 0.4882, "step": 948 }, { "epoch": 0.7923740606735319, "grad_norm": 0.3909182846546173, "learning_rate": 9.202200841197745e-06, "loss": 0.4719, "step": 949 }, { "epoch": 0.7932090175340941, "grad_norm": 0.3728988468647003, "learning_rate": 9.199564310583315e-06, "loss": 0.466, "step": 950 }, { "epoch": 0.7940439743946562, "grad_norm": 0.39567092061042786, "learning_rate": 9.196923809607565e-06, "loss": 0.4805, "step": 951 }, { "epoch": 0.7948789312552185, "grad_norm": 0.3854317367076874, "learning_rate": 9.194279340766881e-06, "loss": 0.4439, "step": 952 }, { "epoch": 0.7957138881157807, "grad_norm": 0.3909844756126404, "learning_rate": 9.191630906561404e-06, "loss": 0.4729, "step": 953 }, { "epoch": 0.7965488449763429, "grad_norm": 0.3471393287181854, "learning_rate": 9.188978509495022e-06, "loss": 0.4324, "step": 954 }, { "epoch": 0.797383801836905, "grad_norm": 0.407484233379364, "learning_rate": 9.186322152075368e-06, "loss": 0.4692, "step": 955 }, { "epoch": 0.7982187586974673, "grad_norm": 0.4027973711490631, "learning_rate": 9.183661836813823e-06, "loss": 0.4589, "step": 956 }, { "epoch": 0.7990537155580295, "grad_norm": 0.41243547201156616, "learning_rate": 9.180997566225509e-06, "loss": 0.4679, "step": 957 }, { "epoch": 0.7998886724185917, "grad_norm": 0.36790233850479126, "learning_rate": 9.178329342829285e-06, "loss": 0.4689, "step": 958 }, { "epoch": 0.8007236292791539, "grad_norm": 0.49646028876304626, "learning_rate": 9.17565716914775e-06, "loss": 0.4884, "step": 959 }, { "epoch": 0.8015585861397161, "grad_norm": 0.4162334203720093, "learning_rate": 9.172981047707235e-06, "loss": 0.4691, "step": 960 }, { "epoch": 0.8023935430002783, "grad_norm": 0.3919675350189209, "learning_rate": 9.170300981037806e-06, "loss": 0.46, "step": 961 }, { "epoch": 0.8032284998608406, "grad_norm": 0.43607059121131897, "learning_rate": 9.167616971673255e-06, "loss": 0.4666, "step": 962 }, { "epoch": 0.8040634567214028, "grad_norm": 0.41449257731437683, "learning_rate": 9.164929022151106e-06, "loss": 0.4563, "step": 963 }, { "epoch": 0.8048984135819649, "grad_norm": 0.44065386056900024, "learning_rate": 9.162237135012608e-06, "loss": 0.4638, "step": 964 }, { "epoch": 0.8057333704425271, "grad_norm": 0.4006085991859436, "learning_rate": 9.159541312802726e-06, "loss": 0.5024, "step": 965 }, { "epoch": 0.8065683273030894, "grad_norm": 0.4124537408351898, "learning_rate": 9.156841558070155e-06, "loss": 0.4857, "step": 966 }, { "epoch": 0.8074032841636516, "grad_norm": 0.4191204905509949, "learning_rate": 9.154137873367302e-06, "loss": 0.4292, "step": 967 }, { "epoch": 0.8082382410242137, "grad_norm": 0.44017213582992554, "learning_rate": 9.151430261250288e-06, "loss": 0.4791, "step": 968 }, { "epoch": 0.8090731978847759, "grad_norm": 0.4162890613079071, "learning_rate": 9.148718724278949e-06, "loss": 0.4393, "step": 969 }, { "epoch": 0.8099081547453382, "grad_norm": 0.4231550693511963, "learning_rate": 9.146003265016836e-06, "loss": 0.4519, "step": 970 }, { "epoch": 0.8107431116059004, "grad_norm": 0.41570845246315, "learning_rate": 9.143283886031204e-06, "loss": 0.4694, "step": 971 }, { "epoch": 0.8115780684664625, "grad_norm": 0.44144362211227417, "learning_rate": 9.140560589893012e-06, "loss": 0.4501, "step": 972 }, { "epoch": 0.8124130253270248, "grad_norm": 0.4171493649482727, "learning_rate": 9.137833379176926e-06, "loss": 0.468, "step": 973 }, { "epoch": 0.813247982187587, "grad_norm": 0.44854456186294556, "learning_rate": 9.13510225646131e-06, "loss": 0.4566, "step": 974 }, { "epoch": 0.8140829390481492, "grad_norm": 0.4269168972969055, "learning_rate": 9.132367224328232e-06, "loss": 0.4609, "step": 975 }, { "epoch": 0.8149178959087113, "grad_norm": 0.46378380060195923, "learning_rate": 9.129628285363446e-06, "loss": 0.465, "step": 976 }, { "epoch": 0.8157528527692736, "grad_norm": 0.4351177215576172, "learning_rate": 9.12688544215641e-06, "loss": 0.4769, "step": 977 }, { "epoch": 0.8165878096298358, "grad_norm": 0.40437689423561096, "learning_rate": 9.12413869730027e-06, "loss": 0.4331, "step": 978 }, { "epoch": 0.817422766490398, "grad_norm": 0.5273181796073914, "learning_rate": 9.121388053391857e-06, "loss": 0.4553, "step": 979 }, { "epoch": 0.8182577233509603, "grad_norm": 0.4019090235233307, "learning_rate": 9.11863351303169e-06, "loss": 0.4654, "step": 980 }, { "epoch": 0.8190926802115224, "grad_norm": 0.44505634903907776, "learning_rate": 9.115875078823975e-06, "loss": 0.4573, "step": 981 }, { "epoch": 0.8199276370720846, "grad_norm": 0.3893716037273407, "learning_rate": 9.113112753376594e-06, "loss": 0.4704, "step": 982 }, { "epoch": 0.8207625939326468, "grad_norm": 0.43743714690208435, "learning_rate": 9.110346539301114e-06, "loss": 0.456, "step": 983 }, { "epoch": 0.8215975507932091, "grad_norm": 0.47634443640708923, "learning_rate": 9.107576439212773e-06, "loss": 0.4634, "step": 984 }, { "epoch": 0.8224325076537712, "grad_norm": 0.3957846760749817, "learning_rate": 9.104802455730487e-06, "loss": 0.4677, "step": 985 }, { "epoch": 0.8232674645143334, "grad_norm": 0.4874296486377716, "learning_rate": 9.10202459147684e-06, "loss": 0.4873, "step": 986 }, { "epoch": 0.8241024213748956, "grad_norm": 0.42559853196144104, "learning_rate": 9.099242849078087e-06, "loss": 0.4612, "step": 987 }, { "epoch": 0.8249373782354579, "grad_norm": 0.4107365906238556, "learning_rate": 9.096457231164149e-06, "loss": 0.4557, "step": 988 }, { "epoch": 0.82577233509602, "grad_norm": 0.41832423210144043, "learning_rate": 9.093667740368611e-06, "loss": 0.4592, "step": 989 }, { "epoch": 0.8266072919565822, "grad_norm": 0.4000125825405121, "learning_rate": 9.09087437932872e-06, "loss": 0.472, "step": 990 }, { "epoch": 0.8274422488171445, "grad_norm": 0.36851930618286133, "learning_rate": 9.08807715068538e-06, "loss": 0.468, "step": 991 }, { "epoch": 0.8282772056777067, "grad_norm": 0.4152893126010895, "learning_rate": 9.085276057083155e-06, "loss": 0.4736, "step": 992 }, { "epoch": 0.8291121625382688, "grad_norm": 0.37865686416625977, "learning_rate": 9.082471101170261e-06, "loss": 0.4775, "step": 993 }, { "epoch": 0.829947119398831, "grad_norm": 0.4074101448059082, "learning_rate": 9.079662285598563e-06, "loss": 0.4478, "step": 994 }, { "epoch": 0.8307820762593933, "grad_norm": 0.41535744071006775, "learning_rate": 9.07684961302358e-06, "loss": 0.4856, "step": 995 }, { "epoch": 0.8316170331199555, "grad_norm": 0.36426132917404175, "learning_rate": 9.074033086104475e-06, "loss": 0.4527, "step": 996 }, { "epoch": 0.8324519899805177, "grad_norm": 0.36238130927085876, "learning_rate": 9.071212707504054e-06, "loss": 0.4617, "step": 997 }, { "epoch": 0.8332869468410798, "grad_norm": 0.3880484998226166, "learning_rate": 9.068388479888768e-06, "loss": 0.4363, "step": 998 }, { "epoch": 0.8341219037016421, "grad_norm": 0.411693811416626, "learning_rate": 9.065560405928699e-06, "loss": 0.4437, "step": 999 }, { "epoch": 0.8349568605622043, "grad_norm": 0.3899157643318176, "learning_rate": 9.062728488297578e-06, "loss": 0.4617, "step": 1000 }, { "epoch": 0.8357918174227665, "grad_norm": 0.3666572570800781, "learning_rate": 9.059892729672757e-06, "loss": 0.4752, "step": 1001 }, { "epoch": 0.8366267742833287, "grad_norm": 0.4257703423500061, "learning_rate": 9.057053132735228e-06, "loss": 0.4678, "step": 1002 }, { "epoch": 0.8374617311438909, "grad_norm": 0.401671439409256, "learning_rate": 9.05420970016961e-06, "loss": 0.4347, "step": 1003 }, { "epoch": 0.8382966880044531, "grad_norm": 0.4108037054538727, "learning_rate": 9.051362434664143e-06, "loss": 0.4574, "step": 1004 }, { "epoch": 0.8391316448650153, "grad_norm": 0.42155301570892334, "learning_rate": 9.0485113389107e-06, "loss": 0.4744, "step": 1005 }, { "epoch": 0.8399666017255775, "grad_norm": 0.3973124027252197, "learning_rate": 9.045656415604766e-06, "loss": 0.4569, "step": 1006 }, { "epoch": 0.8408015585861397, "grad_norm": 0.3551585078239441, "learning_rate": 9.042797667445451e-06, "loss": 0.4539, "step": 1007 }, { "epoch": 0.8416365154467019, "grad_norm": 0.4569454491138458, "learning_rate": 9.039935097135479e-06, "loss": 0.4841, "step": 1008 }, { "epoch": 0.8424714723072642, "grad_norm": 0.40217700600624084, "learning_rate": 9.037068707381186e-06, "loss": 0.4886, "step": 1009 }, { "epoch": 0.8433064291678263, "grad_norm": 0.3971141576766968, "learning_rate": 9.034198500892525e-06, "loss": 0.4661, "step": 1010 }, { "epoch": 0.8441413860283885, "grad_norm": 0.39705443382263184, "learning_rate": 9.031324480383049e-06, "loss": 0.4618, "step": 1011 }, { "epoch": 0.8449763428889507, "grad_norm": 0.3396252989768982, "learning_rate": 9.028446648569918e-06, "loss": 0.4601, "step": 1012 }, { "epoch": 0.845811299749513, "grad_norm": 0.4263751804828644, "learning_rate": 9.025565008173905e-06, "loss": 0.4612, "step": 1013 }, { "epoch": 0.8466462566100752, "grad_norm": 0.3965504467487335, "learning_rate": 9.022679561919372e-06, "loss": 0.4785, "step": 1014 }, { "epoch": 0.8474812134706373, "grad_norm": 0.39851677417755127, "learning_rate": 9.019790312534284e-06, "loss": 0.4725, "step": 1015 }, { "epoch": 0.8483161703311995, "grad_norm": 0.3874804377555847, "learning_rate": 9.016897262750205e-06, "loss": 0.4405, "step": 1016 }, { "epoch": 0.8491511271917618, "grad_norm": 0.414986252784729, "learning_rate": 9.014000415302286e-06, "loss": 0.495, "step": 1017 }, { "epoch": 0.849986084052324, "grad_norm": 0.38721713423728943, "learning_rate": 9.01109977292927e-06, "loss": 0.4723, "step": 1018 }, { "epoch": 0.8508210409128861, "grad_norm": 0.41110092401504517, "learning_rate": 9.008195338373492e-06, "loss": 0.4812, "step": 1019 }, { "epoch": 0.8516559977734484, "grad_norm": 0.3770105540752411, "learning_rate": 9.005287114380862e-06, "loss": 0.4868, "step": 1020 }, { "epoch": 0.8524909546340106, "grad_norm": 0.40086254477500916, "learning_rate": 9.002375103700889e-06, "loss": 0.4723, "step": 1021 }, { "epoch": 0.8533259114945728, "grad_norm": 0.41888847947120667, "learning_rate": 8.999459309086646e-06, "loss": 0.4437, "step": 1022 }, { "epoch": 0.8541608683551349, "grad_norm": 0.43451961874961853, "learning_rate": 8.99653973329479e-06, "loss": 0.4553, "step": 1023 }, { "epoch": 0.8549958252156972, "grad_norm": 0.40402188897132874, "learning_rate": 8.993616379085559e-06, "loss": 0.4457, "step": 1024 }, { "epoch": 0.8558307820762594, "grad_norm": 0.48918417096138, "learning_rate": 8.99068924922275e-06, "loss": 0.4672, "step": 1025 }, { "epoch": 0.8566657389368216, "grad_norm": 0.4336961805820465, "learning_rate": 8.987758346473739e-06, "loss": 0.48, "step": 1026 }, { "epoch": 0.8575006957973837, "grad_norm": 0.5528713464736938, "learning_rate": 8.984823673609465e-06, "loss": 0.4612, "step": 1027 }, { "epoch": 0.858335652657946, "grad_norm": 0.4374152719974518, "learning_rate": 8.981885233404437e-06, "loss": 0.4613, "step": 1028 }, { "epoch": 0.8591706095185082, "grad_norm": 0.3768051266670227, "learning_rate": 8.978943028636718e-06, "loss": 0.4584, "step": 1029 }, { "epoch": 0.8600055663790704, "grad_norm": 0.5078704357147217, "learning_rate": 8.975997062087934e-06, "loss": 0.4824, "step": 1030 }, { "epoch": 0.8608405232396327, "grad_norm": 0.44574174284935, "learning_rate": 8.973047336543268e-06, "loss": 0.4442, "step": 1031 }, { "epoch": 0.8616754801001948, "grad_norm": 0.42980003356933594, "learning_rate": 8.970093854791456e-06, "loss": 0.4319, "step": 1032 }, { "epoch": 0.862510436960757, "grad_norm": 0.40309667587280273, "learning_rate": 8.967136619624785e-06, "loss": 0.479, "step": 1033 }, { "epoch": 0.8633453938213193, "grad_norm": 0.5169181227684021, "learning_rate": 8.964175633839093e-06, "loss": 0.4466, "step": 1034 }, { "epoch": 0.8641803506818815, "grad_norm": 0.45964986085891724, "learning_rate": 8.961210900233757e-06, "loss": 0.4733, "step": 1035 }, { "epoch": 0.8650153075424436, "grad_norm": 0.4117373824119568, "learning_rate": 8.958242421611705e-06, "loss": 0.4528, "step": 1036 }, { "epoch": 0.8658502644030058, "grad_norm": 0.4803687036037445, "learning_rate": 8.955270200779402e-06, "loss": 0.4524, "step": 1037 }, { "epoch": 0.8666852212635681, "grad_norm": 0.39435717463493347, "learning_rate": 8.952294240546853e-06, "loss": 0.4264, "step": 1038 }, { "epoch": 0.8675201781241303, "grad_norm": 0.414730429649353, "learning_rate": 8.949314543727594e-06, "loss": 0.4526, "step": 1039 }, { "epoch": 0.8683551349846924, "grad_norm": 0.37305697798728943, "learning_rate": 8.946331113138698e-06, "loss": 0.4527, "step": 1040 }, { "epoch": 0.8691900918452546, "grad_norm": 0.40117746591567993, "learning_rate": 8.943343951600768e-06, "loss": 0.4246, "step": 1041 }, { "epoch": 0.8700250487058169, "grad_norm": 0.3810097575187683, "learning_rate": 8.94035306193793e-06, "loss": 0.4693, "step": 1042 }, { "epoch": 0.8708600055663791, "grad_norm": 0.418437659740448, "learning_rate": 8.93735844697784e-06, "loss": 0.4692, "step": 1043 }, { "epoch": 0.8716949624269412, "grad_norm": 0.37509140372276306, "learning_rate": 8.934360109551671e-06, "loss": 0.4484, "step": 1044 }, { "epoch": 0.8725299192875035, "grad_norm": 0.4125625491142273, "learning_rate": 8.931358052494119e-06, "loss": 0.4966, "step": 1045 }, { "epoch": 0.8733648761480657, "grad_norm": 0.42215314507484436, "learning_rate": 8.928352278643397e-06, "loss": 0.4651, "step": 1046 }, { "epoch": 0.8741998330086279, "grad_norm": 0.40050846338272095, "learning_rate": 8.925342790841228e-06, "loss": 0.4633, "step": 1047 }, { "epoch": 0.8750347898691901, "grad_norm": 0.34847772121429443, "learning_rate": 8.92232959193285e-06, "loss": 0.4467, "step": 1048 }, { "epoch": 0.8758697467297523, "grad_norm": 0.3692387044429779, "learning_rate": 8.919312684767005e-06, "loss": 0.4659, "step": 1049 }, { "epoch": 0.8767047035903145, "grad_norm": 0.36542755365371704, "learning_rate": 8.91629207219595e-06, "loss": 0.4762, "step": 1050 }, { "epoch": 0.8775396604508767, "grad_norm": 0.442965567111969, "learning_rate": 8.913267757075432e-06, "loss": 0.4718, "step": 1051 }, { "epoch": 0.878374617311439, "grad_norm": 0.36119112372398376, "learning_rate": 8.910239742264712e-06, "loss": 0.4883, "step": 1052 }, { "epoch": 0.8792095741720011, "grad_norm": 0.37131625413894653, "learning_rate": 8.907208030626538e-06, "loss": 0.4303, "step": 1053 }, { "epoch": 0.8800445310325633, "grad_norm": 0.3986499011516571, "learning_rate": 8.90417262502716e-06, "loss": 0.4432, "step": 1054 }, { "epoch": 0.8808794878931255, "grad_norm": 0.3974599838256836, "learning_rate": 8.901133528336319e-06, "loss": 0.4895, "step": 1055 }, { "epoch": 0.8817144447536878, "grad_norm": 0.41775351762771606, "learning_rate": 8.89809074342724e-06, "loss": 0.4509, "step": 1056 }, { "epoch": 0.8825494016142499, "grad_norm": 0.43113431334495544, "learning_rate": 8.895044273176642e-06, "loss": 0.4765, "step": 1057 }, { "epoch": 0.8833843584748121, "grad_norm": 0.41361457109451294, "learning_rate": 8.891994120464727e-06, "loss": 0.4443, "step": 1058 }, { "epoch": 0.8842193153353743, "grad_norm": 0.3974114656448364, "learning_rate": 8.888940288175174e-06, "loss": 0.4513, "step": 1059 }, { "epoch": 0.8850542721959366, "grad_norm": 0.38580232858657837, "learning_rate": 8.885882779195146e-06, "loss": 0.4713, "step": 1060 }, { "epoch": 0.8858892290564987, "grad_norm": 0.3727777600288391, "learning_rate": 8.882821596415278e-06, "loss": 0.4444, "step": 1061 }, { "epoch": 0.8867241859170609, "grad_norm": 0.395480751991272, "learning_rate": 8.879756742729683e-06, "loss": 0.4722, "step": 1062 }, { "epoch": 0.8875591427776232, "grad_norm": 0.37964779138565063, "learning_rate": 8.876688221035941e-06, "loss": 0.4643, "step": 1063 }, { "epoch": 0.8883940996381854, "grad_norm": 0.4140765964984894, "learning_rate": 8.873616034235098e-06, "loss": 0.4916, "step": 1064 }, { "epoch": 0.8892290564987476, "grad_norm": 0.3914014399051666, "learning_rate": 8.87054018523167e-06, "loss": 0.4633, "step": 1065 }, { "epoch": 0.8900640133593097, "grad_norm": 0.4014011025428772, "learning_rate": 8.867460676933632e-06, "loss": 0.4702, "step": 1066 }, { "epoch": 0.890898970219872, "grad_norm": 0.3577623665332794, "learning_rate": 8.86437751225242e-06, "loss": 0.4416, "step": 1067 }, { "epoch": 0.8917339270804342, "grad_norm": 0.42382901906967163, "learning_rate": 8.861290694102926e-06, "loss": 0.4677, "step": 1068 }, { "epoch": 0.8925688839409964, "grad_norm": 0.3839154541492462, "learning_rate": 8.858200225403496e-06, "loss": 0.4579, "step": 1069 }, { "epoch": 0.8934038408015585, "grad_norm": 0.388854444026947, "learning_rate": 8.855106109075928e-06, "loss": 0.4682, "step": 1070 }, { "epoch": 0.8942387976621208, "grad_norm": 0.4101439416408539, "learning_rate": 8.852008348045468e-06, "loss": 0.4633, "step": 1071 }, { "epoch": 0.895073754522683, "grad_norm": 0.3486945331096649, "learning_rate": 8.848906945240809e-06, "loss": 0.473, "step": 1072 }, { "epoch": 0.8959087113832452, "grad_norm": 0.37250789999961853, "learning_rate": 8.845801903594086e-06, "loss": 0.4439, "step": 1073 }, { "epoch": 0.8967436682438074, "grad_norm": 0.39098939299583435, "learning_rate": 8.842693226040874e-06, "loss": 0.4803, "step": 1074 }, { "epoch": 0.8975786251043696, "grad_norm": 0.38163793087005615, "learning_rate": 8.839580915520185e-06, "loss": 0.4615, "step": 1075 }, { "epoch": 0.8984135819649318, "grad_norm": 0.4190160632133484, "learning_rate": 8.836464974974468e-06, "loss": 0.4595, "step": 1076 }, { "epoch": 0.899248538825494, "grad_norm": 0.354496568441391, "learning_rate": 8.8333454073496e-06, "loss": 0.4636, "step": 1077 }, { "epoch": 0.9000834956860562, "grad_norm": 0.36859792470932007, "learning_rate": 8.83022221559489e-06, "loss": 0.4493, "step": 1078 }, { "epoch": 0.9009184525466184, "grad_norm": 0.3574811816215515, "learning_rate": 8.827095402663077e-06, "loss": 0.4204, "step": 1079 }, { "epoch": 0.9017534094071806, "grad_norm": 0.4095105528831482, "learning_rate": 8.823964971510313e-06, "loss": 0.4365, "step": 1080 }, { "epoch": 0.9025883662677429, "grad_norm": 0.3416433036327362, "learning_rate": 8.82083092509618e-06, "loss": 0.4857, "step": 1081 }, { "epoch": 0.9034233231283051, "grad_norm": 0.38848286867141724, "learning_rate": 8.817693266383677e-06, "loss": 0.4644, "step": 1082 }, { "epoch": 0.9042582799888672, "grad_norm": 0.3751954436302185, "learning_rate": 8.814551998339213e-06, "loss": 0.4602, "step": 1083 }, { "epoch": 0.9050932368494294, "grad_norm": 0.38503170013427734, "learning_rate": 8.811407123932615e-06, "loss": 0.476, "step": 1084 }, { "epoch": 0.9059281937099917, "grad_norm": 0.37285757064819336, "learning_rate": 8.808258646137115e-06, "loss": 0.4657, "step": 1085 }, { "epoch": 0.9067631505705539, "grad_norm": 0.40411680936813354, "learning_rate": 8.805106567929356e-06, "loss": 0.4733, "step": 1086 }, { "epoch": 0.907598107431116, "grad_norm": 0.36742836236953735, "learning_rate": 8.801950892289382e-06, "loss": 0.4637, "step": 1087 }, { "epoch": 0.9084330642916782, "grad_norm": 0.4139085114002228, "learning_rate": 8.798791622200637e-06, "loss": 0.4386, "step": 1088 }, { "epoch": 0.9092680211522405, "grad_norm": 0.38090282678604126, "learning_rate": 8.795628760649965e-06, "loss": 0.449, "step": 1089 }, { "epoch": 0.9101029780128027, "grad_norm": 0.3954095244407654, "learning_rate": 8.792462310627609e-06, "loss": 0.4345, "step": 1090 }, { "epoch": 0.9109379348733648, "grad_norm": 0.397135466337204, "learning_rate": 8.7892922751272e-06, "loss": 0.4389, "step": 1091 }, { "epoch": 0.9117728917339271, "grad_norm": 0.38652503490448, "learning_rate": 8.786118657145756e-06, "loss": 0.5058, "step": 1092 }, { "epoch": 0.9126078485944893, "grad_norm": 0.34283602237701416, "learning_rate": 8.782941459683692e-06, "loss": 0.4623, "step": 1093 }, { "epoch": 0.9134428054550515, "grad_norm": 0.38074058294296265, "learning_rate": 8.779760685744795e-06, "loss": 0.4453, "step": 1094 }, { "epoch": 0.9142777623156136, "grad_norm": 0.40289872884750366, "learning_rate": 8.776576338336243e-06, "loss": 0.4675, "step": 1095 }, { "epoch": 0.9151127191761759, "grad_norm": 0.42308345437049866, "learning_rate": 8.773388420468588e-06, "loss": 0.4386, "step": 1096 }, { "epoch": 0.9159476760367381, "grad_norm": 0.4788864850997925, "learning_rate": 8.77019693515576e-06, "loss": 0.4476, "step": 1097 }, { "epoch": 0.9167826328973003, "grad_norm": 0.3730630874633789, "learning_rate": 8.767001885415055e-06, "loss": 0.4481, "step": 1098 }, { "epoch": 0.9176175897578626, "grad_norm": 0.4375440180301666, "learning_rate": 8.763803274267148e-06, "loss": 0.4705, "step": 1099 }, { "epoch": 0.9184525466184247, "grad_norm": 0.42336300015449524, "learning_rate": 8.760601104736078e-06, "loss": 0.4509, "step": 1100 }, { "epoch": 0.9192875034789869, "grad_norm": 0.3606290817260742, "learning_rate": 8.757395379849243e-06, "loss": 0.4692, "step": 1101 }, { "epoch": 0.9201224603395491, "grad_norm": 0.38672885298728943, "learning_rate": 8.754186102637412e-06, "loss": 0.4288, "step": 1102 }, { "epoch": 0.9209574172001114, "grad_norm": 0.3852262496948242, "learning_rate": 8.750973276134702e-06, "loss": 0.4451, "step": 1103 }, { "epoch": 0.9217923740606735, "grad_norm": 0.394916296005249, "learning_rate": 8.747756903378591e-06, "loss": 0.4672, "step": 1104 }, { "epoch": 0.9226273309212357, "grad_norm": 0.3692956268787384, "learning_rate": 8.744536987409911e-06, "loss": 0.4398, "step": 1105 }, { "epoch": 0.923462287781798, "grad_norm": 0.4405381679534912, "learning_rate": 8.741313531272842e-06, "loss": 0.4429, "step": 1106 }, { "epoch": 0.9242972446423602, "grad_norm": 0.35014188289642334, "learning_rate": 8.73808653801491e-06, "loss": 0.4131, "step": 1107 }, { "epoch": 0.9251322015029223, "grad_norm": 0.37025776505470276, "learning_rate": 8.734856010686987e-06, "loss": 0.4629, "step": 1108 }, { "epoch": 0.9259671583634845, "grad_norm": 0.38558414578437805, "learning_rate": 8.731621952343285e-06, "loss": 0.4448, "step": 1109 }, { "epoch": 0.9268021152240468, "grad_norm": 0.41601499915122986, "learning_rate": 8.728384366041353e-06, "loss": 0.474, "step": 1110 }, { "epoch": 0.927637072084609, "grad_norm": 0.42466601729393005, "learning_rate": 8.72514325484208e-06, "loss": 0.4631, "step": 1111 }, { "epoch": 0.9284720289451711, "grad_norm": 0.38206836581230164, "learning_rate": 8.721898621809682e-06, "loss": 0.4546, "step": 1112 }, { "epoch": 0.9293069858057333, "grad_norm": 0.3705179691314697, "learning_rate": 8.718650470011706e-06, "loss": 0.4453, "step": 1113 }, { "epoch": 0.9301419426662956, "grad_norm": 0.4192202389240265, "learning_rate": 8.71539880251903e-06, "loss": 0.4596, "step": 1114 }, { "epoch": 0.9309768995268578, "grad_norm": 0.41892483830451965, "learning_rate": 8.712143622405852e-06, "loss": 0.4705, "step": 1115 }, { "epoch": 0.93181185638742, "grad_norm": 0.4109576940536499, "learning_rate": 8.70888493274969e-06, "loss": 0.4591, "step": 1116 }, { "epoch": 0.9326468132479822, "grad_norm": 0.4406455457210541, "learning_rate": 8.705622736631385e-06, "loss": 0.4894, "step": 1117 }, { "epoch": 0.9334817701085444, "grad_norm": 0.4680812954902649, "learning_rate": 8.702357037135084e-06, "loss": 0.4714, "step": 1118 }, { "epoch": 0.9343167269691066, "grad_norm": 0.3870704174041748, "learning_rate": 8.699087837348254e-06, "loss": 0.4524, "step": 1119 }, { "epoch": 0.9351516838296688, "grad_norm": 0.4348798096179962, "learning_rate": 8.695815140361672e-06, "loss": 0.4615, "step": 1120 }, { "epoch": 0.935986640690231, "grad_norm": 0.3693825304508209, "learning_rate": 8.692538949269415e-06, "loss": 0.4656, "step": 1121 }, { "epoch": 0.9368215975507932, "grad_norm": 0.43549972772598267, "learning_rate": 8.689259267168868e-06, "loss": 0.4448, "step": 1122 }, { "epoch": 0.9376565544113554, "grad_norm": 0.39185985922813416, "learning_rate": 8.685976097160716e-06, "loss": 0.4747, "step": 1123 }, { "epoch": 0.9384915112719177, "grad_norm": 0.4264979958534241, "learning_rate": 8.682689442348939e-06, "loss": 0.4676, "step": 1124 }, { "epoch": 0.9393264681324798, "grad_norm": 0.4401000738143921, "learning_rate": 8.679399305840815e-06, "loss": 0.4548, "step": 1125 }, { "epoch": 0.940161424993042, "grad_norm": 0.3857555389404297, "learning_rate": 8.676105690746912e-06, "loss": 0.4401, "step": 1126 }, { "epoch": 0.9409963818536042, "grad_norm": 0.45259594917297363, "learning_rate": 8.672808600181086e-06, "loss": 0.4537, "step": 1127 }, { "epoch": 0.9418313387141665, "grad_norm": 0.35687336325645447, "learning_rate": 8.66950803726048e-06, "loss": 0.4276, "step": 1128 }, { "epoch": 0.9426662955747286, "grad_norm": 0.4568375051021576, "learning_rate": 8.66620400510552e-06, "loss": 0.4523, "step": 1129 }, { "epoch": 0.9435012524352908, "grad_norm": 0.42135703563690186, "learning_rate": 8.66289650683991e-06, "loss": 0.4678, "step": 1130 }, { "epoch": 0.944336209295853, "grad_norm": 0.4041791260242462, "learning_rate": 8.659585545590633e-06, "loss": 0.4631, "step": 1131 }, { "epoch": 0.9451711661564153, "grad_norm": 0.446113258600235, "learning_rate": 8.656271124487946e-06, "loss": 0.4275, "step": 1132 }, { "epoch": 0.9460061230169775, "grad_norm": 0.40095254778862, "learning_rate": 8.652953246665374e-06, "loss": 0.4604, "step": 1133 }, { "epoch": 0.9468410798775396, "grad_norm": 0.5515506267547607, "learning_rate": 8.649631915259716e-06, "loss": 0.4642, "step": 1134 }, { "epoch": 0.9476760367381019, "grad_norm": 0.4058224856853485, "learning_rate": 8.646307133411028e-06, "loss": 0.476, "step": 1135 }, { "epoch": 0.9485109935986641, "grad_norm": 0.40712857246398926, "learning_rate": 8.642978904262637e-06, "loss": 0.4571, "step": 1136 }, { "epoch": 0.9493459504592263, "grad_norm": 0.49635809659957886, "learning_rate": 8.63964723096112e-06, "loss": 0.4566, "step": 1137 }, { "epoch": 0.9501809073197884, "grad_norm": 0.40184515714645386, "learning_rate": 8.63631211665632e-06, "loss": 0.4618, "step": 1138 }, { "epoch": 0.9510158641803507, "grad_norm": 0.3993614614009857, "learning_rate": 8.632973564501326e-06, "loss": 0.4156, "step": 1139 }, { "epoch": 0.9518508210409129, "grad_norm": 0.4305977523326874, "learning_rate": 8.629631577652478e-06, "loss": 0.4699, "step": 1140 }, { "epoch": 0.9526857779014751, "grad_norm": 0.40751221776008606, "learning_rate": 8.626286159269366e-06, "loss": 0.4605, "step": 1141 }, { "epoch": 0.9535207347620372, "grad_norm": 0.4151630103588104, "learning_rate": 8.622937312514821e-06, "loss": 0.4859, "step": 1142 }, { "epoch": 0.9543556916225995, "grad_norm": 0.43912795186042786, "learning_rate": 8.61958504055492e-06, "loss": 0.4501, "step": 1143 }, { "epoch": 0.9551906484831617, "grad_norm": 0.4349617660045624, "learning_rate": 8.616229346558973e-06, "loss": 0.4696, "step": 1144 }, { "epoch": 0.9560256053437239, "grad_norm": 0.4230968952178955, "learning_rate": 8.612870233699529e-06, "loss": 0.4803, "step": 1145 }, { "epoch": 0.9568605622042861, "grad_norm": 0.3758566975593567, "learning_rate": 8.609507705152366e-06, "loss": 0.4769, "step": 1146 }, { "epoch": 0.9576955190648483, "grad_norm": 0.43698954582214355, "learning_rate": 8.606141764096493e-06, "loss": 0.4481, "step": 1147 }, { "epoch": 0.9585304759254105, "grad_norm": 0.4195076823234558, "learning_rate": 8.60277241371415e-06, "loss": 0.4594, "step": 1148 }, { "epoch": 0.9593654327859727, "grad_norm": 0.4567149877548218, "learning_rate": 8.59939965719079e-06, "loss": 0.4758, "step": 1149 }, { "epoch": 0.960200389646535, "grad_norm": 0.43232616782188416, "learning_rate": 8.596023497715096e-06, "loss": 0.437, "step": 1150 }, { "epoch": 0.9610353465070971, "grad_norm": 0.40478935837745667, "learning_rate": 8.59264393847896e-06, "loss": 0.4626, "step": 1151 }, { "epoch": 0.9618703033676593, "grad_norm": 0.4077705442905426, "learning_rate": 8.589260982677496e-06, "loss": 0.4535, "step": 1152 }, { "epoch": 0.9627052602282216, "grad_norm": 0.41757848858833313, "learning_rate": 8.585874633509024e-06, "loss": 0.4697, "step": 1153 }, { "epoch": 0.9635402170887838, "grad_norm": 0.3955010175704956, "learning_rate": 8.582484894175075e-06, "loss": 0.433, "step": 1154 }, { "epoch": 0.9643751739493459, "grad_norm": 0.43250808119773865, "learning_rate": 8.57909176788038e-06, "loss": 0.4374, "step": 1155 }, { "epoch": 0.9652101308099081, "grad_norm": 0.4099995493888855, "learning_rate": 8.575695257832882e-06, "loss": 0.4612, "step": 1156 }, { "epoch": 0.9660450876704704, "grad_norm": 0.4488849937915802, "learning_rate": 8.57229536724371e-06, "loss": 0.4696, "step": 1157 }, { "epoch": 0.9668800445310326, "grad_norm": 0.4524378776550293, "learning_rate": 8.568892099327202e-06, "loss": 0.4631, "step": 1158 }, { "epoch": 0.9677150013915947, "grad_norm": 0.3733980059623718, "learning_rate": 8.56548545730088e-06, "loss": 0.452, "step": 1159 }, { "epoch": 0.968549958252157, "grad_norm": 0.44900304079055786, "learning_rate": 8.56207544438546e-06, "loss": 0.4989, "step": 1160 }, { "epoch": 0.9693849151127192, "grad_norm": 0.4003332257270813, "learning_rate": 8.558662063804843e-06, "loss": 0.4536, "step": 1161 }, { "epoch": 0.9702198719732814, "grad_norm": 0.39989808201789856, "learning_rate": 8.555245318786115e-06, "loss": 0.4764, "step": 1162 }, { "epoch": 0.9710548288338435, "grad_norm": 0.33255255222320557, "learning_rate": 8.551825212559544e-06, "loss": 0.4605, "step": 1163 }, { "epoch": 0.9718897856944058, "grad_norm": 0.3808225095272064, "learning_rate": 8.548401748358573e-06, "loss": 0.4499, "step": 1164 }, { "epoch": 0.972724742554968, "grad_norm": 0.37286409735679626, "learning_rate": 8.54497492941982e-06, "loss": 0.4326, "step": 1165 }, { "epoch": 0.9735596994155302, "grad_norm": 0.40604642033576965, "learning_rate": 8.541544758983078e-06, "loss": 0.4639, "step": 1166 }, { "epoch": 0.9743946562760925, "grad_norm": 0.38667207956314087, "learning_rate": 8.538111240291305e-06, "loss": 0.4744, "step": 1167 }, { "epoch": 0.9752296131366546, "grad_norm": 0.42653945088386536, "learning_rate": 8.534674376590625e-06, "loss": 0.4736, "step": 1168 }, { "epoch": 0.9760645699972168, "grad_norm": 0.4048736095428467, "learning_rate": 8.531234171130327e-06, "loss": 0.4636, "step": 1169 }, { "epoch": 0.976899526857779, "grad_norm": 0.3762049674987793, "learning_rate": 8.527790627162858e-06, "loss": 0.455, "step": 1170 }, { "epoch": 0.9777344837183413, "grad_norm": 0.3832264542579651, "learning_rate": 8.524343747943818e-06, "loss": 0.4567, "step": 1171 }, { "epoch": 0.9785694405789034, "grad_norm": 0.37372177839279175, "learning_rate": 8.520893536731967e-06, "loss": 0.4653, "step": 1172 }, { "epoch": 0.9794043974394656, "grad_norm": 0.3452583849430084, "learning_rate": 8.517439996789207e-06, "loss": 0.4597, "step": 1173 }, { "epoch": 0.9802393543000278, "grad_norm": 0.3868357837200165, "learning_rate": 8.513983131380594e-06, "loss": 0.478, "step": 1174 }, { "epoch": 0.9810743111605901, "grad_norm": 0.35661017894744873, "learning_rate": 8.510522943774326e-06, "loss": 0.4658, "step": 1175 }, { "epoch": 0.9819092680211522, "grad_norm": 0.367988258600235, "learning_rate": 8.507059437241741e-06, "loss": 0.4472, "step": 1176 }, { "epoch": 0.9827442248817144, "grad_norm": 0.3845326602458954, "learning_rate": 8.50359261505731e-06, "loss": 0.4479, "step": 1177 }, { "epoch": 0.9835791817422767, "grad_norm": 0.36346471309661865, "learning_rate": 8.50012248049865e-06, "loss": 0.4429, "step": 1178 }, { "epoch": 0.9844141386028389, "grad_norm": 0.38512083888053894, "learning_rate": 8.496649036846502e-06, "loss": 0.4471, "step": 1179 }, { "epoch": 0.985249095463401, "grad_norm": 0.37246620655059814, "learning_rate": 8.493172287384734e-06, "loss": 0.4578, "step": 1180 }, { "epoch": 0.9860840523239632, "grad_norm": 0.38701072335243225, "learning_rate": 8.489692235400343e-06, "loss": 0.4838, "step": 1181 }, { "epoch": 0.9869190091845255, "grad_norm": 0.37742719054222107, "learning_rate": 8.486208884183448e-06, "loss": 0.4464, "step": 1182 }, { "epoch": 0.9877539660450877, "grad_norm": 0.40770673751831055, "learning_rate": 8.482722237027289e-06, "loss": 0.4652, "step": 1183 }, { "epoch": 0.9885889229056499, "grad_norm": 0.3841376304626465, "learning_rate": 8.479232297228216e-06, "loss": 0.476, "step": 1184 }, { "epoch": 0.989423879766212, "grad_norm": 0.40949708223342896, "learning_rate": 8.475739068085697e-06, "loss": 0.4614, "step": 1185 }, { "epoch": 0.9902588366267743, "grad_norm": 0.37597325444221497, "learning_rate": 8.472242552902308e-06, "loss": 0.4751, "step": 1186 }, { "epoch": 0.9910937934873365, "grad_norm": 0.3560693860054016, "learning_rate": 8.468742754983731e-06, "loss": 0.494, "step": 1187 }, { "epoch": 0.9919287503478987, "grad_norm": 0.45301541686058044, "learning_rate": 8.465239677638755e-06, "loss": 0.4642, "step": 1188 }, { "epoch": 0.9927637072084609, "grad_norm": 0.3771940767765045, "learning_rate": 8.461733324179266e-06, "loss": 0.4506, "step": 1189 }, { "epoch": 0.9935986640690231, "grad_norm": 0.4015979468822479, "learning_rate": 8.458223697920248e-06, "loss": 0.4517, "step": 1190 }, { "epoch": 0.9944336209295853, "grad_norm": 0.38234806060791016, "learning_rate": 8.45471080217978e-06, "loss": 0.4706, "step": 1191 }, { "epoch": 0.9952685777901475, "grad_norm": 0.3993140757083893, "learning_rate": 8.451194640279034e-06, "loss": 0.4765, "step": 1192 }, { "epoch": 0.9961035346507097, "grad_norm": 0.3769570291042328, "learning_rate": 8.447675215542262e-06, "loss": 0.4903, "step": 1193 }, { "epoch": 0.9969384915112719, "grad_norm": 0.34079232811927795, "learning_rate": 8.44415253129681e-06, "loss": 0.4705, "step": 1194 }, { "epoch": 0.9977734483718341, "grad_norm": 0.3592251241207123, "learning_rate": 8.440626590873102e-06, "loss": 0.4695, "step": 1195 }, { "epoch": 0.9986084052323964, "grad_norm": 0.4212978482246399, "learning_rate": 8.437097397604639e-06, "loss": 0.4753, "step": 1196 }, { "epoch": 0.9994433620929585, "grad_norm": 0.4179081320762634, "learning_rate": 8.433564954828e-06, "loss": 0.4883, "step": 1197 }, { "epoch": 1.0002783189535207, "grad_norm": 0.5453283190727234, "learning_rate": 8.430029265882832e-06, "loss": 0.5759, "step": 1198 }, { "epoch": 1.001113275814083, "grad_norm": 0.4444243311882019, "learning_rate": 8.426490334111857e-06, "loss": 0.4748, "step": 1199 }, { "epoch": 1.0019482326746452, "grad_norm": 0.3727686405181885, "learning_rate": 8.42294816286086e-06, "loss": 0.3959, "step": 1200 }, { "epoch": 1.0027831895352073, "grad_norm": 0.4036638140678406, "learning_rate": 8.419402755478686e-06, "loss": 0.4226, "step": 1201 }, { "epoch": 1.0036181463957696, "grad_norm": 0.42375990748405457, "learning_rate": 8.415854115317242e-06, "loss": 0.4473, "step": 1202 }, { "epoch": 1.0044531032563317, "grad_norm": 0.37710559368133545, "learning_rate": 8.412302245731491e-06, "loss": 0.4305, "step": 1203 }, { "epoch": 1.0052880601168939, "grad_norm": 0.5333516001701355, "learning_rate": 8.408747150079452e-06, "loss": 0.4546, "step": 1204 }, { "epoch": 1.0061230169774562, "grad_norm": 0.4106464087963104, "learning_rate": 8.405188831722189e-06, "loss": 0.4273, "step": 1205 }, { "epoch": 1.0069579738380183, "grad_norm": 0.3667198419570923, "learning_rate": 8.401627294023815e-06, "loss": 0.3973, "step": 1206 }, { "epoch": 1.0077929306985807, "grad_norm": 0.4031447172164917, "learning_rate": 8.398062540351488e-06, "loss": 0.4898, "step": 1207 }, { "epoch": 1.0086278875591428, "grad_norm": 0.45503509044647217, "learning_rate": 8.394494574075405e-06, "loss": 0.4426, "step": 1208 }, { "epoch": 1.009462844419705, "grad_norm": 0.3898186683654785, "learning_rate": 8.3909233985688e-06, "loss": 0.4233, "step": 1209 }, { "epoch": 1.0102978012802672, "grad_norm": 0.42415669560432434, "learning_rate": 8.387349017207943e-06, "loss": 0.4302, "step": 1210 }, { "epoch": 1.0111327581408294, "grad_norm": 0.39149269461631775, "learning_rate": 8.38377143337213e-06, "loss": 0.4233, "step": 1211 }, { "epoch": 1.0119677150013915, "grad_norm": 0.38561955094337463, "learning_rate": 8.380190650443694e-06, "loss": 0.4031, "step": 1212 }, { "epoch": 1.0128026718619538, "grad_norm": 0.4610874652862549, "learning_rate": 8.37660667180798e-06, "loss": 0.4511, "step": 1213 }, { "epoch": 1.013637628722516, "grad_norm": 0.4715440571308136, "learning_rate": 8.373019500853365e-06, "loss": 0.4536, "step": 1214 }, { "epoch": 1.0144725855830783, "grad_norm": 0.4345378875732422, "learning_rate": 8.369429140971239e-06, "loss": 0.433, "step": 1215 }, { "epoch": 1.0153075424436404, "grad_norm": 0.38793835043907166, "learning_rate": 8.365835595556009e-06, "loss": 0.3752, "step": 1216 }, { "epoch": 1.0161424993042025, "grad_norm": 0.46928998827934265, "learning_rate": 8.362238868005093e-06, "loss": 0.4262, "step": 1217 }, { "epoch": 1.0169774561647649, "grad_norm": 0.4265890419483185, "learning_rate": 8.358638961718913e-06, "loss": 0.4355, "step": 1218 }, { "epoch": 1.017812413025327, "grad_norm": 0.41653627157211304, "learning_rate": 8.355035880100906e-06, "loss": 0.4588, "step": 1219 }, { "epoch": 1.0186473698858893, "grad_norm": 0.42124757170677185, "learning_rate": 8.3514296265575e-06, "loss": 0.4406, "step": 1220 }, { "epoch": 1.0194823267464515, "grad_norm": 0.4201187491416931, "learning_rate": 8.347820204498132e-06, "loss": 0.4425, "step": 1221 }, { "epoch": 1.0203172836070136, "grad_norm": 0.3318859934806824, "learning_rate": 8.344207617335225e-06, "loss": 0.423, "step": 1222 }, { "epoch": 1.021152240467576, "grad_norm": 0.41304415464401245, "learning_rate": 8.340591868484202e-06, "loss": 0.455, "step": 1223 }, { "epoch": 1.021987197328138, "grad_norm": 0.415715754032135, "learning_rate": 8.336972961363472e-06, "loss": 0.4141, "step": 1224 }, { "epoch": 1.0228221541887002, "grad_norm": 0.4331899583339691, "learning_rate": 8.33335089939443e-06, "loss": 0.4911, "step": 1225 }, { "epoch": 1.0236571110492625, "grad_norm": 0.3713350296020508, "learning_rate": 8.329725686001455e-06, "loss": 0.4004, "step": 1226 }, { "epoch": 1.0244920679098246, "grad_norm": 0.4068467915058136, "learning_rate": 8.3260973246119e-06, "loss": 0.4505, "step": 1227 }, { "epoch": 1.025327024770387, "grad_norm": 0.45081421732902527, "learning_rate": 8.322465818656103e-06, "loss": 0.4723, "step": 1228 }, { "epoch": 1.026161981630949, "grad_norm": 0.34469497203826904, "learning_rate": 8.31883117156737e-06, "loss": 0.3819, "step": 1229 }, { "epoch": 1.0269969384915112, "grad_norm": 0.40568721294403076, "learning_rate": 8.315193386781971e-06, "loss": 0.4115, "step": 1230 }, { "epoch": 1.0278318953520735, "grad_norm": 0.4055285155773163, "learning_rate": 8.311552467739158e-06, "loss": 0.4631, "step": 1231 }, { "epoch": 1.0286668522126357, "grad_norm": 0.4403301775455475, "learning_rate": 8.30790841788113e-06, "loss": 0.462, "step": 1232 }, { "epoch": 1.029501809073198, "grad_norm": 0.3618435263633728, "learning_rate": 8.304261240653054e-06, "loss": 0.4034, "step": 1233 }, { "epoch": 1.0303367659337601, "grad_norm": 0.3962664306163788, "learning_rate": 8.300610939503053e-06, "loss": 0.4236, "step": 1234 }, { "epoch": 1.0311717227943222, "grad_norm": 0.44025084376335144, "learning_rate": 8.296957517882203e-06, "loss": 0.4356, "step": 1235 }, { "epoch": 1.0320066796548846, "grad_norm": 0.36214208602905273, "learning_rate": 8.293300979244527e-06, "loss": 0.4288, "step": 1236 }, { "epoch": 1.0328416365154467, "grad_norm": 0.3808722198009491, "learning_rate": 8.289641327047003e-06, "loss": 0.398, "step": 1237 }, { "epoch": 1.0336765933760088, "grad_norm": 0.45300549268722534, "learning_rate": 8.285978564749544e-06, "loss": 0.4408, "step": 1238 }, { "epoch": 1.0345115502365712, "grad_norm": 0.42406371235847473, "learning_rate": 8.282312695815006e-06, "loss": 0.4736, "step": 1239 }, { "epoch": 1.0353465070971333, "grad_norm": 0.36899974942207336, "learning_rate": 8.278643723709185e-06, "loss": 0.4206, "step": 1240 }, { "epoch": 1.0361814639576956, "grad_norm": 0.42341160774230957, "learning_rate": 8.274971651900809e-06, "loss": 0.4489, "step": 1241 }, { "epoch": 1.0370164208182577, "grad_norm": 0.40383002161979675, "learning_rate": 8.271296483861532e-06, "loss": 0.4242, "step": 1242 }, { "epoch": 1.0378513776788199, "grad_norm": 0.3978999853134155, "learning_rate": 8.267618223065947e-06, "loss": 0.4668, "step": 1243 }, { "epoch": 1.0386863345393822, "grad_norm": 0.40718016028404236, "learning_rate": 8.263936872991555e-06, "loss": 0.4455, "step": 1244 }, { "epoch": 1.0395212913999443, "grad_norm": 0.36508607864379883, "learning_rate": 8.260252437118793e-06, "loss": 0.3856, "step": 1245 }, { "epoch": 1.0403562482605064, "grad_norm": 0.46424198150634766, "learning_rate": 8.256564918931004e-06, "loss": 0.4762, "step": 1246 }, { "epoch": 1.0411912051210688, "grad_norm": 0.38435083627700806, "learning_rate": 8.25287432191445e-06, "loss": 0.4375, "step": 1247 }, { "epoch": 1.042026161981631, "grad_norm": 0.38027897477149963, "learning_rate": 8.249180649558306e-06, "loss": 0.4171, "step": 1248 }, { "epoch": 1.0428611188421932, "grad_norm": 0.38001540303230286, "learning_rate": 8.24548390535465e-06, "loss": 0.4743, "step": 1249 }, { "epoch": 1.0436960757027554, "grad_norm": 0.3744577467441559, "learning_rate": 8.241784092798466e-06, "loss": 0.4353, "step": 1250 }, { "epoch": 1.0445310325633175, "grad_norm": 0.36065223813056946, "learning_rate": 8.238081215387639e-06, "loss": 0.4146, "step": 1251 }, { "epoch": 1.0453659894238798, "grad_norm": 0.3580431640148163, "learning_rate": 8.234375276622953e-06, "loss": 0.4356, "step": 1252 }, { "epoch": 1.046200946284442, "grad_norm": 0.38571667671203613, "learning_rate": 8.230666280008082e-06, "loss": 0.429, "step": 1253 }, { "epoch": 1.0470359031450043, "grad_norm": 0.34768784046173096, "learning_rate": 8.226954229049595e-06, "loss": 0.4313, "step": 1254 }, { "epoch": 1.0478708600055664, "grad_norm": 0.31778484582901, "learning_rate": 8.223239127256947e-06, "loss": 0.4294, "step": 1255 }, { "epoch": 1.0487058168661285, "grad_norm": 0.3948061764240265, "learning_rate": 8.219520978142481e-06, "loss": 0.4566, "step": 1256 }, { "epoch": 1.0495407737266909, "grad_norm": 0.42613685131073, "learning_rate": 8.215799785221412e-06, "loss": 0.448, "step": 1257 }, { "epoch": 1.050375730587253, "grad_norm": 0.3526837229728699, "learning_rate": 8.212075552011845e-06, "loss": 0.4558, "step": 1258 }, { "epoch": 1.051210687447815, "grad_norm": 0.4181871712207794, "learning_rate": 8.208348282034745e-06, "loss": 0.4557, "step": 1259 }, { "epoch": 1.0520456443083774, "grad_norm": 0.3557935953140259, "learning_rate": 8.204617978813963e-06, "loss": 0.4256, "step": 1260 }, { "epoch": 1.0528806011689396, "grad_norm": 0.37664347887039185, "learning_rate": 8.200884645876206e-06, "loss": 0.4063, "step": 1261 }, { "epoch": 1.053715558029502, "grad_norm": 0.3904355466365814, "learning_rate": 8.19714828675105e-06, "loss": 0.4398, "step": 1262 }, { "epoch": 1.054550514890064, "grad_norm": 0.3303653597831726, "learning_rate": 8.193408904970935e-06, "loss": 0.4324, "step": 1263 }, { "epoch": 1.0553854717506261, "grad_norm": 0.49074405431747437, "learning_rate": 8.189666504071153e-06, "loss": 0.5002, "step": 1264 }, { "epoch": 1.0562204286111885, "grad_norm": 0.341898113489151, "learning_rate": 8.185921087589852e-06, "loss": 0.3942, "step": 1265 }, { "epoch": 1.0570553854717506, "grad_norm": 0.4133915901184082, "learning_rate": 8.182172659068036e-06, "loss": 0.4153, "step": 1266 }, { "epoch": 1.0578903423323127, "grad_norm": 0.39131951332092285, "learning_rate": 8.178421222049548e-06, "loss": 0.4066, "step": 1267 }, { "epoch": 1.058725299192875, "grad_norm": 0.38009709119796753, "learning_rate": 8.174666780081083e-06, "loss": 0.3912, "step": 1268 }, { "epoch": 1.0595602560534372, "grad_norm": 0.4117136001586914, "learning_rate": 8.170909336712171e-06, "loss": 0.4629, "step": 1269 }, { "epoch": 1.0603952129139995, "grad_norm": 0.43188008666038513, "learning_rate": 8.167148895495185e-06, "loss": 0.4343, "step": 1270 }, { "epoch": 1.0612301697745616, "grad_norm": 0.44647467136383057, "learning_rate": 8.16338545998533e-06, "loss": 0.4252, "step": 1271 }, { "epoch": 1.0620651266351238, "grad_norm": 0.4601932168006897, "learning_rate": 8.159619033740637e-06, "loss": 0.4123, "step": 1272 }, { "epoch": 1.062900083495686, "grad_norm": 0.39546871185302734, "learning_rate": 8.155849620321973e-06, "loss": 0.4049, "step": 1273 }, { "epoch": 1.0637350403562482, "grad_norm": 0.4147133231163025, "learning_rate": 8.152077223293023e-06, "loss": 0.4138, "step": 1274 }, { "epoch": 1.0645699972168106, "grad_norm": 0.44405868649482727, "learning_rate": 8.148301846220298e-06, "loss": 0.4475, "step": 1275 }, { "epoch": 1.0654049540773727, "grad_norm": 0.43125927448272705, "learning_rate": 8.14452349267312e-06, "loss": 0.43, "step": 1276 }, { "epoch": 1.0662399109379348, "grad_norm": 0.42497190833091736, "learning_rate": 8.140742166223629e-06, "loss": 0.4399, "step": 1277 }, { "epoch": 1.0670748677984971, "grad_norm": 0.43800365924835205, "learning_rate": 8.136957870446779e-06, "loss": 0.4308, "step": 1278 }, { "epoch": 1.0679098246590593, "grad_norm": 0.37959644198417664, "learning_rate": 8.13317060892032e-06, "loss": 0.4301, "step": 1279 }, { "epoch": 1.0687447815196216, "grad_norm": 0.40193378925323486, "learning_rate": 8.129380385224817e-06, "loss": 0.423, "step": 1280 }, { "epoch": 1.0695797383801837, "grad_norm": 0.413592666387558, "learning_rate": 8.125587202943633e-06, "loss": 0.4278, "step": 1281 }, { "epoch": 1.0704146952407458, "grad_norm": 0.3684830963611603, "learning_rate": 8.121791065662925e-06, "loss": 0.4111, "step": 1282 }, { "epoch": 1.0712496521013082, "grad_norm": 0.4362364411354065, "learning_rate": 8.117991976971645e-06, "loss": 0.4843, "step": 1283 }, { "epoch": 1.0720846089618703, "grad_norm": 0.33870431780815125, "learning_rate": 8.114189940461536e-06, "loss": 0.3828, "step": 1284 }, { "epoch": 1.0729195658224324, "grad_norm": 0.37360304594039917, "learning_rate": 8.110384959727129e-06, "loss": 0.3913, "step": 1285 }, { "epoch": 1.0737545226829948, "grad_norm": 0.35021284222602844, "learning_rate": 8.106577038365736e-06, "loss": 0.407, "step": 1286 }, { "epoch": 1.0745894795435569, "grad_norm": 0.4205673635005951, "learning_rate": 8.102766179977452e-06, "loss": 0.4145, "step": 1287 }, { "epoch": 1.0754244364041192, "grad_norm": 0.3723352551460266, "learning_rate": 8.098952388165144e-06, "loss": 0.4387, "step": 1288 }, { "epoch": 1.0762593932646813, "grad_norm": 0.4171176552772522, "learning_rate": 8.09513566653446e-06, "loss": 0.4376, "step": 1289 }, { "epoch": 1.0770943501252435, "grad_norm": 0.44888463616371155, "learning_rate": 8.091316018693807e-06, "loss": 0.4786, "step": 1290 }, { "epoch": 1.0779293069858058, "grad_norm": 0.37876078486442566, "learning_rate": 8.087493448254372e-06, "loss": 0.3939, "step": 1291 }, { "epoch": 1.078764263846368, "grad_norm": 0.40357109904289246, "learning_rate": 8.083667958830092e-06, "loss": 0.422, "step": 1292 }, { "epoch": 1.07959922070693, "grad_norm": 0.3791777491569519, "learning_rate": 8.079839554037673e-06, "loss": 0.4533, "step": 1293 }, { "epoch": 1.0804341775674924, "grad_norm": 0.40309950709342957, "learning_rate": 8.076008237496573e-06, "loss": 0.4465, "step": 1294 }, { "epoch": 1.0812691344280545, "grad_norm": 0.3638635575771332, "learning_rate": 8.072174012829004e-06, "loss": 0.4011, "step": 1295 }, { "epoch": 1.0821040912886168, "grad_norm": 0.48886871337890625, "learning_rate": 8.068336883659926e-06, "loss": 0.4846, "step": 1296 }, { "epoch": 1.082939048149179, "grad_norm": 0.39815959334373474, "learning_rate": 8.064496853617047e-06, "loss": 0.3821, "step": 1297 }, { "epoch": 1.083774005009741, "grad_norm": 0.44155022501945496, "learning_rate": 8.060653926330817e-06, "loss": 0.4825, "step": 1298 }, { "epoch": 1.0846089618703034, "grad_norm": 0.36891528964042664, "learning_rate": 8.056808105434425e-06, "loss": 0.3905, "step": 1299 }, { "epoch": 1.0854439187308655, "grad_norm": 0.3900775611400604, "learning_rate": 8.052959394563793e-06, "loss": 0.4742, "step": 1300 }, { "epoch": 1.0862788755914279, "grad_norm": 0.36817318201065063, "learning_rate": 8.049107797357582e-06, "loss": 0.4481, "step": 1301 }, { "epoch": 1.08711383245199, "grad_norm": 0.3819807171821594, "learning_rate": 8.045253317457173e-06, "loss": 0.4185, "step": 1302 }, { "epoch": 1.0879487893125521, "grad_norm": 0.3877802789211273, "learning_rate": 8.04139595850668e-06, "loss": 0.4485, "step": 1303 }, { "epoch": 1.0887837461731145, "grad_norm": 0.37927189469337463, "learning_rate": 8.037535724152934e-06, "loss": 0.4373, "step": 1304 }, { "epoch": 1.0896187030336766, "grad_norm": 0.39462512731552124, "learning_rate": 8.033672618045485e-06, "loss": 0.4492, "step": 1305 }, { "epoch": 1.0904536598942387, "grad_norm": 0.39152246713638306, "learning_rate": 8.029806643836603e-06, "loss": 0.4262, "step": 1306 }, { "epoch": 1.091288616754801, "grad_norm": 0.3869673013687134, "learning_rate": 8.025937805181263e-06, "loss": 0.3871, "step": 1307 }, { "epoch": 1.0921235736153632, "grad_norm": 0.374639630317688, "learning_rate": 8.022066105737153e-06, "loss": 0.487, "step": 1308 }, { "epoch": 1.0929585304759255, "grad_norm": 0.3994144797325134, "learning_rate": 8.018191549164663e-06, "loss": 0.4628, "step": 1309 }, { "epoch": 1.0937934873364876, "grad_norm": 0.35111263394355774, "learning_rate": 8.014314139126882e-06, "loss": 0.4371, "step": 1310 }, { "epoch": 1.0946284441970497, "grad_norm": 0.35921454429626465, "learning_rate": 8.010433879289602e-06, "loss": 0.412, "step": 1311 }, { "epoch": 1.095463401057612, "grad_norm": 0.3948197662830353, "learning_rate": 8.006550773321308e-06, "loss": 0.4134, "step": 1312 }, { "epoch": 1.0962983579181742, "grad_norm": 0.3823160231113434, "learning_rate": 8.002664824893173e-06, "loss": 0.463, "step": 1313 }, { "epoch": 1.0971333147787363, "grad_norm": 0.4031068980693817, "learning_rate": 7.998776037679061e-06, "loss": 0.4591, "step": 1314 }, { "epoch": 1.0979682716392987, "grad_norm": 0.3840579688549042, "learning_rate": 7.994884415355513e-06, "loss": 0.4111, "step": 1315 }, { "epoch": 1.0988032284998608, "grad_norm": 0.39118051528930664, "learning_rate": 7.990989961601758e-06, "loss": 0.4078, "step": 1316 }, { "epoch": 1.0996381853604231, "grad_norm": 0.4263252913951874, "learning_rate": 7.987092680099699e-06, "loss": 0.4737, "step": 1317 }, { "epoch": 1.1004731422209852, "grad_norm": 0.4168325960636139, "learning_rate": 7.983192574533913e-06, "loss": 0.3847, "step": 1318 }, { "epoch": 1.1013080990815474, "grad_norm": 0.42871788144111633, "learning_rate": 7.979289648591645e-06, "loss": 0.4871, "step": 1319 }, { "epoch": 1.1021430559421097, "grad_norm": 0.3897936940193176, "learning_rate": 7.975383905962812e-06, "loss": 0.4238, "step": 1320 }, { "epoch": 1.1029780128026718, "grad_norm": 0.42333969473838806, "learning_rate": 7.971475350339982e-06, "loss": 0.4207, "step": 1321 }, { "epoch": 1.1038129696632342, "grad_norm": 0.3542926013469696, "learning_rate": 7.967563985418397e-06, "loss": 0.4113, "step": 1322 }, { "epoch": 1.1046479265237963, "grad_norm": 0.36245787143707275, "learning_rate": 7.963649814895945e-06, "loss": 0.4225, "step": 1323 }, { "epoch": 1.1054828833843584, "grad_norm": 0.36288517713546753, "learning_rate": 7.959732842473171e-06, "loss": 0.3856, "step": 1324 }, { "epoch": 1.1063178402449207, "grad_norm": 0.4459775686264038, "learning_rate": 7.955813071853266e-06, "loss": 0.4437, "step": 1325 }, { "epoch": 1.1071527971054829, "grad_norm": 0.3226439356803894, "learning_rate": 7.951890506742072e-06, "loss": 0.4062, "step": 1326 }, { "epoch": 1.107987753966045, "grad_norm": 0.41109272837638855, "learning_rate": 7.947965150848065e-06, "loss": 0.4725, "step": 1327 }, { "epoch": 1.1088227108266073, "grad_norm": 0.37764567136764526, "learning_rate": 7.944037007882365e-06, "loss": 0.3927, "step": 1328 }, { "epoch": 1.1096576676871694, "grad_norm": 0.3916904926300049, "learning_rate": 7.940106081558726e-06, "loss": 0.4055, "step": 1329 }, { "epoch": 1.1104926245477318, "grad_norm": 0.37729427218437195, "learning_rate": 7.936172375593532e-06, "loss": 0.4398, "step": 1330 }, { "epoch": 1.111327581408294, "grad_norm": 0.32690486311912537, "learning_rate": 7.932235893705797e-06, "loss": 0.3824, "step": 1331 }, { "epoch": 1.112162538268856, "grad_norm": 0.3848656415939331, "learning_rate": 7.92829663961716e-06, "loss": 0.4329, "step": 1332 }, { "epoch": 1.1129974951294184, "grad_norm": 0.3693627119064331, "learning_rate": 7.924354617051873e-06, "loss": 0.4255, "step": 1333 }, { "epoch": 1.1138324519899805, "grad_norm": 0.3526134788990021, "learning_rate": 7.92040982973682e-06, "loss": 0.4468, "step": 1334 }, { "epoch": 1.1146674088505426, "grad_norm": 0.32349884510040283, "learning_rate": 7.916462281401485e-06, "loss": 0.4197, "step": 1335 }, { "epoch": 1.115502365711105, "grad_norm": 0.39520129561424255, "learning_rate": 7.912511975777968e-06, "loss": 0.454, "step": 1336 }, { "epoch": 1.116337322571667, "grad_norm": 0.35747939348220825, "learning_rate": 7.908558916600977e-06, "loss": 0.4206, "step": 1337 }, { "epoch": 1.1171722794322294, "grad_norm": 0.3532836437225342, "learning_rate": 7.904603107607822e-06, "loss": 0.4005, "step": 1338 }, { "epoch": 1.1180072362927915, "grad_norm": 0.36486977338790894, "learning_rate": 7.900644552538413e-06, "loss": 0.4397, "step": 1339 }, { "epoch": 1.1188421931533536, "grad_norm": 0.34692564606666565, "learning_rate": 7.896683255135255e-06, "loss": 0.4235, "step": 1340 }, { "epoch": 1.119677150013916, "grad_norm": 0.3880660831928253, "learning_rate": 7.892719219143446e-06, "loss": 0.4289, "step": 1341 }, { "epoch": 1.120512106874478, "grad_norm": 0.42362526059150696, "learning_rate": 7.888752448310676e-06, "loss": 0.4617, "step": 1342 }, { "epoch": 1.1213470637350405, "grad_norm": 0.3752431869506836, "learning_rate": 7.884782946387215e-06, "loss": 0.4475, "step": 1343 }, { "epoch": 1.1221820205956026, "grad_norm": 0.37660324573516846, "learning_rate": 7.880810717125921e-06, "loss": 0.4339, "step": 1344 }, { "epoch": 1.1230169774561647, "grad_norm": 0.4151862859725952, "learning_rate": 7.876835764282226e-06, "loss": 0.4094, "step": 1345 }, { "epoch": 1.123851934316727, "grad_norm": 0.3605170249938965, "learning_rate": 7.872858091614137e-06, "loss": 0.432, "step": 1346 }, { "epoch": 1.1246868911772891, "grad_norm": 0.38572126626968384, "learning_rate": 7.868877702882238e-06, "loss": 0.4125, "step": 1347 }, { "epoch": 1.1255218480378515, "grad_norm": 0.40583887696266174, "learning_rate": 7.864894601849676e-06, "loss": 0.4442, "step": 1348 }, { "epoch": 1.1263568048984136, "grad_norm": 0.32703784108161926, "learning_rate": 7.860908792282162e-06, "loss": 0.3827, "step": 1349 }, { "epoch": 1.1271917617589757, "grad_norm": 0.4338562786579132, "learning_rate": 7.856920277947969e-06, "loss": 0.4601, "step": 1350 }, { "epoch": 1.128026718619538, "grad_norm": 0.3768455684185028, "learning_rate": 7.852929062617924e-06, "loss": 0.408, "step": 1351 }, { "epoch": 1.1288616754801002, "grad_norm": 0.3811922073364258, "learning_rate": 7.848935150065415e-06, "loss": 0.4339, "step": 1352 }, { "epoch": 1.1296966323406623, "grad_norm": 0.4405190050601959, "learning_rate": 7.844938544066376e-06, "loss": 0.4405, "step": 1353 }, { "epoch": 1.1305315892012247, "grad_norm": 0.38890495896339417, "learning_rate": 7.840939248399284e-06, "loss": 0.3871, "step": 1354 }, { "epoch": 1.1313665460617868, "grad_norm": 0.39828523993492126, "learning_rate": 7.836937266845164e-06, "loss": 0.4427, "step": 1355 }, { "epoch": 1.132201502922349, "grad_norm": 0.361687570810318, "learning_rate": 7.832932603187574e-06, "loss": 0.4244, "step": 1356 }, { "epoch": 1.1330364597829112, "grad_norm": 0.4024803638458252, "learning_rate": 7.828925261212619e-06, "loss": 0.4135, "step": 1357 }, { "epoch": 1.1338714166434734, "grad_norm": 0.4047793447971344, "learning_rate": 7.824915244708924e-06, "loss": 0.4285, "step": 1358 }, { "epoch": 1.1347063735040357, "grad_norm": 0.4542415142059326, "learning_rate": 7.820902557467648e-06, "loss": 0.4374, "step": 1359 }, { "epoch": 1.1355413303645978, "grad_norm": 0.3767944872379303, "learning_rate": 7.816887203282477e-06, "loss": 0.4084, "step": 1360 }, { "epoch": 1.13637628722516, "grad_norm": 0.39648690819740295, "learning_rate": 7.812869185949613e-06, "loss": 0.4225, "step": 1361 }, { "epoch": 1.1372112440857223, "grad_norm": 0.4014459550380707, "learning_rate": 7.808848509267782e-06, "loss": 0.4074, "step": 1362 }, { "epoch": 1.1380462009462844, "grad_norm": 0.43346646428108215, "learning_rate": 7.804825177038218e-06, "loss": 0.4196, "step": 1363 }, { "epoch": 1.1388811578068467, "grad_norm": 0.4019095003604889, "learning_rate": 7.800799193064669e-06, "loss": 0.4629, "step": 1364 }, { "epoch": 1.1397161146674089, "grad_norm": 0.3485857844352722, "learning_rate": 7.796770561153392e-06, "loss": 0.4063, "step": 1365 }, { "epoch": 1.140551071527971, "grad_norm": 0.46879488229751587, "learning_rate": 7.792739285113142e-06, "loss": 0.433, "step": 1366 }, { "epoch": 1.1413860283885333, "grad_norm": 0.4011640250682831, "learning_rate": 7.788705368755178e-06, "loss": 0.4297, "step": 1367 }, { "epoch": 1.1422209852490954, "grad_norm": 0.39857369661331177, "learning_rate": 7.784668815893256e-06, "loss": 0.4483, "step": 1368 }, { "epoch": 1.1430559421096578, "grad_norm": 0.4166755676269531, "learning_rate": 7.78062963034362e-06, "loss": 0.4033, "step": 1369 }, { "epoch": 1.14389089897022, "grad_norm": 0.37119999527931213, "learning_rate": 7.776587815925007e-06, "loss": 0.4436, "step": 1370 }, { "epoch": 1.144725855830782, "grad_norm": 0.39067912101745605, "learning_rate": 7.772543376458638e-06, "loss": 0.4087, "step": 1371 }, { "epoch": 1.1455608126913444, "grad_norm": 0.39946749806404114, "learning_rate": 7.768496315768217e-06, "loss": 0.4694, "step": 1372 }, { "epoch": 1.1463957695519065, "grad_norm": 0.3958077132701874, "learning_rate": 7.76444663767992e-06, "loss": 0.4352, "step": 1373 }, { "epoch": 1.1472307264124686, "grad_norm": 0.39232560992240906, "learning_rate": 7.760394346022412e-06, "loss": 0.4368, "step": 1374 }, { "epoch": 1.148065683273031, "grad_norm": 0.4417002499103546, "learning_rate": 7.756339444626809e-06, "loss": 0.4717, "step": 1375 }, { "epoch": 1.148900640133593, "grad_norm": 0.38365525007247925, "learning_rate": 7.75228193732671e-06, "loss": 0.4422, "step": 1376 }, { "epoch": 1.1497355969941554, "grad_norm": 0.3846856951713562, "learning_rate": 7.748221827958174e-06, "loss": 0.4014, "step": 1377 }, { "epoch": 1.1505705538547175, "grad_norm": 0.40068158507347107, "learning_rate": 7.744159120359715e-06, "loss": 0.4299, "step": 1378 }, { "epoch": 1.1514055107152796, "grad_norm": 0.3794923722743988, "learning_rate": 7.74009381837231e-06, "loss": 0.4649, "step": 1379 }, { "epoch": 1.152240467575842, "grad_norm": 0.35061946511268616, "learning_rate": 7.736025925839382e-06, "loss": 0.4004, "step": 1380 }, { "epoch": 1.153075424436404, "grad_norm": 0.37009939551353455, "learning_rate": 7.731955446606809e-06, "loss": 0.4065, "step": 1381 }, { "epoch": 1.1539103812969662, "grad_norm": 0.376628577709198, "learning_rate": 7.727882384522914e-06, "loss": 0.4315, "step": 1382 }, { "epoch": 1.1547453381575286, "grad_norm": 0.46997928619384766, "learning_rate": 7.723806743438458e-06, "loss": 0.4762, "step": 1383 }, { "epoch": 1.1555802950180907, "grad_norm": 0.35374686121940613, "learning_rate": 7.719728527206645e-06, "loss": 0.3963, "step": 1384 }, { "epoch": 1.156415251878653, "grad_norm": 0.38668686151504517, "learning_rate": 7.715647739683108e-06, "loss": 0.4269, "step": 1385 }, { "epoch": 1.1572502087392151, "grad_norm": 0.4225122332572937, "learning_rate": 7.711564384725916e-06, "loss": 0.4115, "step": 1386 }, { "epoch": 1.1580851655997773, "grad_norm": 0.3539647161960602, "learning_rate": 7.707478466195562e-06, "loss": 0.4171, "step": 1387 }, { "epoch": 1.1589201224603396, "grad_norm": 0.44125136733055115, "learning_rate": 7.703389987954967e-06, "loss": 0.5022, "step": 1388 }, { "epoch": 1.1597550793209017, "grad_norm": 0.34770938754081726, "learning_rate": 7.699298953869466e-06, "loss": 0.444, "step": 1389 }, { "epoch": 1.160590036181464, "grad_norm": 0.3278290331363678, "learning_rate": 7.695205367806816e-06, "loss": 0.4003, "step": 1390 }, { "epoch": 1.1614249930420262, "grad_norm": 0.4300927519798279, "learning_rate": 7.691109233637182e-06, "loss": 0.437, "step": 1391 }, { "epoch": 1.1622599499025883, "grad_norm": 0.3792922794818878, "learning_rate": 7.687010555233141e-06, "loss": 0.4262, "step": 1392 }, { "epoch": 1.1630949067631506, "grad_norm": 0.37229329347610474, "learning_rate": 7.682909336469674e-06, "loss": 0.4278, "step": 1393 }, { "epoch": 1.1639298636237128, "grad_norm": 0.42937904596328735, "learning_rate": 7.678805581224165e-06, "loss": 0.4196, "step": 1394 }, { "epoch": 1.164764820484275, "grad_norm": 0.3927656412124634, "learning_rate": 7.674699293376397e-06, "loss": 0.4257, "step": 1395 }, { "epoch": 1.1655997773448372, "grad_norm": 0.36888307332992554, "learning_rate": 7.670590476808541e-06, "loss": 0.4137, "step": 1396 }, { "epoch": 1.1664347342053993, "grad_norm": 0.36243802309036255, "learning_rate": 7.666479135405167e-06, "loss": 0.4649, "step": 1397 }, { "epoch": 1.1672696910659617, "grad_norm": 0.35102760791778564, "learning_rate": 7.662365273053228e-06, "loss": 0.4061, "step": 1398 }, { "epoch": 1.1681046479265238, "grad_norm": 0.36719024181365967, "learning_rate": 7.65824889364206e-06, "loss": 0.4157, "step": 1399 }, { "epoch": 1.168939604787086, "grad_norm": 0.4746408462524414, "learning_rate": 7.654130001063381e-06, "loss": 0.4755, "step": 1400 }, { "epoch": 1.1697745616476483, "grad_norm": 0.37389397621154785, "learning_rate": 7.65000859921128e-06, "loss": 0.4283, "step": 1401 }, { "epoch": 1.1706095185082104, "grad_norm": 0.3759618103504181, "learning_rate": 7.645884691982227e-06, "loss": 0.4272, "step": 1402 }, { "epoch": 1.1714444753687725, "grad_norm": 0.41614875197410583, "learning_rate": 7.64175828327505e-06, "loss": 0.4395, "step": 1403 }, { "epoch": 1.1722794322293348, "grad_norm": 0.4279758930206299, "learning_rate": 7.63762937699095e-06, "loss": 0.4375, "step": 1404 }, { "epoch": 1.173114389089897, "grad_norm": 0.38250187039375305, "learning_rate": 7.633497977033488e-06, "loss": 0.4362, "step": 1405 }, { "epoch": 1.1739493459504593, "grad_norm": 0.3615141808986664, "learning_rate": 7.629364087308579e-06, "loss": 0.4116, "step": 1406 }, { "epoch": 1.1747843028110214, "grad_norm": 0.39515697956085205, "learning_rate": 7.625227711724497e-06, "loss": 0.4512, "step": 1407 }, { "epoch": 1.1756192596715835, "grad_norm": 0.38575851917266846, "learning_rate": 7.621088854191858e-06, "loss": 0.3974, "step": 1408 }, { "epoch": 1.1764542165321459, "grad_norm": 0.39303335547447205, "learning_rate": 7.6169475186236344e-06, "loss": 0.4217, "step": 1409 }, { "epoch": 1.177289173392708, "grad_norm": 0.355754554271698, "learning_rate": 7.612803708935136e-06, "loss": 0.439, "step": 1410 }, { "epoch": 1.1781241302532703, "grad_norm": 0.34459832310676575, "learning_rate": 7.608657429044012e-06, "loss": 0.4225, "step": 1411 }, { "epoch": 1.1789590871138325, "grad_norm": 0.3896268606185913, "learning_rate": 7.6045086828702465e-06, "loss": 0.4107, "step": 1412 }, { "epoch": 1.1797940439743946, "grad_norm": 0.3907484710216522, "learning_rate": 7.600357474336157e-06, "loss": 0.4359, "step": 1413 }, { "epoch": 1.180629000834957, "grad_norm": 0.37272027134895325, "learning_rate": 7.596203807366389e-06, "loss": 0.4368, "step": 1414 }, { "epoch": 1.181463957695519, "grad_norm": 0.3808143734931946, "learning_rate": 7.59204768588791e-06, "loss": 0.4102, "step": 1415 }, { "epoch": 1.1822989145560814, "grad_norm": 0.40654709935188293, "learning_rate": 7.587889113830009e-06, "loss": 0.4614, "step": 1416 }, { "epoch": 1.1831338714166435, "grad_norm": 0.43900173902511597, "learning_rate": 7.583728095124296e-06, "loss": 0.4222, "step": 1417 }, { "epoch": 1.1839688282772056, "grad_norm": 0.3915312588214874, "learning_rate": 7.579564633704687e-06, "loss": 0.4381, "step": 1418 }, { "epoch": 1.184803785137768, "grad_norm": 0.4529954195022583, "learning_rate": 7.575398733507411e-06, "loss": 0.4365, "step": 1419 }, { "epoch": 1.18563874199833, "grad_norm": 0.4138186275959015, "learning_rate": 7.5712303984710045e-06, "loss": 0.3935, "step": 1420 }, { "epoch": 1.1864736988588922, "grad_norm": 0.37797433137893677, "learning_rate": 7.567059632536303e-06, "loss": 0.4253, "step": 1421 }, { "epoch": 1.1873086557194545, "grad_norm": 0.46281054615974426, "learning_rate": 7.56288643964644e-06, "loss": 0.4024, "step": 1422 }, { "epoch": 1.1881436125800167, "grad_norm": 0.43259483575820923, "learning_rate": 7.558710823746847e-06, "loss": 0.4683, "step": 1423 }, { "epoch": 1.1889785694405788, "grad_norm": 0.37563300132751465, "learning_rate": 7.55453278878524e-06, "loss": 0.4153, "step": 1424 }, { "epoch": 1.1898135263011411, "grad_norm": 0.40450602769851685, "learning_rate": 7.550352338711632e-06, "loss": 0.3758, "step": 1425 }, { "epoch": 1.1906484831617032, "grad_norm": 0.4231198728084564, "learning_rate": 7.546169477478308e-06, "loss": 0.3907, "step": 1426 }, { "epoch": 1.1914834400222656, "grad_norm": 0.39251431822776794, "learning_rate": 7.541984209039841e-06, "loss": 0.4505, "step": 1427 }, { "epoch": 1.1923183968828277, "grad_norm": 0.5088784694671631, "learning_rate": 7.537796537353075e-06, "loss": 0.4718, "step": 1428 }, { "epoch": 1.1931533537433898, "grad_norm": 0.35804685950279236, "learning_rate": 7.533606466377127e-06, "loss": 0.4234, "step": 1429 }, { "epoch": 1.1939883106039522, "grad_norm": 0.4156755208969116, "learning_rate": 7.5294140000733855e-06, "loss": 0.402, "step": 1430 }, { "epoch": 1.1948232674645143, "grad_norm": 0.4277632236480713, "learning_rate": 7.525219142405501e-06, "loss": 0.4224, "step": 1431 }, { "epoch": 1.1956582243250766, "grad_norm": 0.39706358313560486, "learning_rate": 7.521021897339382e-06, "loss": 0.4421, "step": 1432 }, { "epoch": 1.1964931811856387, "grad_norm": 0.3444989025592804, "learning_rate": 7.516822268843201e-06, "loss": 0.3654, "step": 1433 }, { "epoch": 1.1973281380462009, "grad_norm": 0.41061943769454956, "learning_rate": 7.512620260887378e-06, "loss": 0.4378, "step": 1434 }, { "epoch": 1.1981630949067632, "grad_norm": 0.39634764194488525, "learning_rate": 7.508415877444586e-06, "loss": 0.4825, "step": 1435 }, { "epoch": 1.1989980517673253, "grad_norm": 0.3691011369228363, "learning_rate": 7.504209122489739e-06, "loss": 0.3951, "step": 1436 }, { "epoch": 1.1998330086278877, "grad_norm": 0.43228209018707275, "learning_rate": 7.500000000000001e-06, "loss": 0.4901, "step": 1437 }, { "epoch": 1.2006679654884498, "grad_norm": 0.35838520526885986, "learning_rate": 7.495788513954767e-06, "loss": 0.4007, "step": 1438 }, { "epoch": 1.201502922349012, "grad_norm": 0.37316378951072693, "learning_rate": 7.491574668335669e-06, "loss": 0.3865, "step": 1439 }, { "epoch": 1.2023378792095742, "grad_norm": 0.4492852985858917, "learning_rate": 7.487358467126573e-06, "loss": 0.4366, "step": 1440 }, { "epoch": 1.2031728360701364, "grad_norm": 0.37076810002326965, "learning_rate": 7.483139914313567e-06, "loss": 0.4232, "step": 1441 }, { "epoch": 1.2040077929306985, "grad_norm": 0.34958863258361816, "learning_rate": 7.478919013884965e-06, "loss": 0.4253, "step": 1442 }, { "epoch": 1.2048427497912608, "grad_norm": 0.5043339133262634, "learning_rate": 7.4746957698312985e-06, "loss": 0.4458, "step": 1443 }, { "epoch": 1.205677706651823, "grad_norm": 0.32360202074050903, "learning_rate": 7.47047018614532e-06, "loss": 0.3807, "step": 1444 }, { "epoch": 1.2065126635123853, "grad_norm": 0.4221777617931366, "learning_rate": 7.466242266821986e-06, "loss": 0.4508, "step": 1445 }, { "epoch": 1.2073476203729474, "grad_norm": 0.3798501491546631, "learning_rate": 7.462012015858468e-06, "loss": 0.4365, "step": 1446 }, { "epoch": 1.2081825772335095, "grad_norm": 0.33938702940940857, "learning_rate": 7.457779437254138e-06, "loss": 0.3897, "step": 1447 }, { "epoch": 1.2090175340940719, "grad_norm": 0.4006573557853699, "learning_rate": 7.45354453501057e-06, "loss": 0.4472, "step": 1448 }, { "epoch": 1.209852490954634, "grad_norm": 0.426800400018692, "learning_rate": 7.449307313131533e-06, "loss": 0.4715, "step": 1449 }, { "epoch": 1.210687447815196, "grad_norm": 0.38048478960990906, "learning_rate": 7.445067775622994e-06, "loss": 0.4227, "step": 1450 }, { "epoch": 1.2115224046757584, "grad_norm": 0.3431170582771301, "learning_rate": 7.440825926493104e-06, "loss": 0.3914, "step": 1451 }, { "epoch": 1.2123573615363206, "grad_norm": 0.3694617748260498, "learning_rate": 7.4365817697522e-06, "loss": 0.4364, "step": 1452 }, { "epoch": 1.213192318396883, "grad_norm": 0.3663000464439392, "learning_rate": 7.432335309412805e-06, "loss": 0.4274, "step": 1453 }, { "epoch": 1.214027275257445, "grad_norm": 0.4469339847564697, "learning_rate": 7.428086549489615e-06, "loss": 0.4304, "step": 1454 }, { "epoch": 1.2148622321180071, "grad_norm": 0.41497451066970825, "learning_rate": 7.423835493999501e-06, "loss": 0.4551, "step": 1455 }, { "epoch": 1.2156971889785695, "grad_norm": 0.4033588767051697, "learning_rate": 7.419582146961507e-06, "loss": 0.4166, "step": 1456 }, { "epoch": 1.2165321458391316, "grad_norm": 0.43954309821128845, "learning_rate": 7.415326512396841e-06, "loss": 0.403, "step": 1457 }, { "epoch": 1.217367102699694, "grad_norm": 0.4046994149684906, "learning_rate": 7.411068594328876e-06, "loss": 0.441, "step": 1458 }, { "epoch": 1.218202059560256, "grad_norm": 0.33518078923225403, "learning_rate": 7.406808396783137e-06, "loss": 0.4381, "step": 1459 }, { "epoch": 1.2190370164208182, "grad_norm": 0.4122202694416046, "learning_rate": 7.402545923787317e-06, "loss": 0.4225, "step": 1460 }, { "epoch": 1.2198719732813805, "grad_norm": 0.36203134059906006, "learning_rate": 7.398281179371247e-06, "loss": 0.4351, "step": 1461 }, { "epoch": 1.2207069301419426, "grad_norm": 0.3898944556713104, "learning_rate": 7.394014167566912e-06, "loss": 0.4205, "step": 1462 }, { "epoch": 1.221541887002505, "grad_norm": 0.4227590262889862, "learning_rate": 7.389744892408442e-06, "loss": 0.4183, "step": 1463 }, { "epoch": 1.222376843863067, "grad_norm": 0.3240181505680084, "learning_rate": 7.385473357932102e-06, "loss": 0.3937, "step": 1464 }, { "epoch": 1.2232118007236292, "grad_norm": 0.41274821758270264, "learning_rate": 7.381199568176297e-06, "loss": 0.4642, "step": 1465 }, { "epoch": 1.2240467575841916, "grad_norm": 0.3221842050552368, "learning_rate": 7.376923527181561e-06, "loss": 0.3878, "step": 1466 }, { "epoch": 1.2248817144447537, "grad_norm": 0.43379315733909607, "learning_rate": 7.37264523899056e-06, "loss": 0.4708, "step": 1467 }, { "epoch": 1.2257166713053158, "grad_norm": 0.3406892716884613, "learning_rate": 7.368364707648082e-06, "loss": 0.3966, "step": 1468 }, { "epoch": 1.2265516281658781, "grad_norm": 0.3840421736240387, "learning_rate": 7.364081937201035e-06, "loss": 0.4186, "step": 1469 }, { "epoch": 1.2273865850264403, "grad_norm": 0.3700205981731415, "learning_rate": 7.3597969316984475e-06, "loss": 0.4126, "step": 1470 }, { "epoch": 1.2282215418870024, "grad_norm": 0.37639909982681274, "learning_rate": 7.3555096951914585e-06, "loss": 0.4481, "step": 1471 }, { "epoch": 1.2290564987475647, "grad_norm": 0.43710729479789734, "learning_rate": 7.351220231733314e-06, "loss": 0.4224, "step": 1472 }, { "epoch": 1.2298914556081268, "grad_norm": 0.4142252802848816, "learning_rate": 7.34692854537937e-06, "loss": 0.3994, "step": 1473 }, { "epoch": 1.2307264124686892, "grad_norm": 0.37587112188339233, "learning_rate": 7.3426346401870826e-06, "loss": 0.3912, "step": 1474 }, { "epoch": 1.2315613693292513, "grad_norm": 0.5761707425117493, "learning_rate": 7.338338520216004e-06, "loss": 0.5218, "step": 1475 }, { "epoch": 1.2323963261898134, "grad_norm": 0.37092897295951843, "learning_rate": 7.3340401895277816e-06, "loss": 0.4164, "step": 1476 }, { "epoch": 1.2332312830503758, "grad_norm": 0.43144506216049194, "learning_rate": 7.329739652186153e-06, "loss": 0.3918, "step": 1477 }, { "epoch": 1.234066239910938, "grad_norm": 0.4012158215045929, "learning_rate": 7.325436912256943e-06, "loss": 0.4317, "step": 1478 }, { "epoch": 1.2349011967715002, "grad_norm": 0.4313543438911438, "learning_rate": 7.321131973808052e-06, "loss": 0.4626, "step": 1479 }, { "epoch": 1.2357361536320624, "grad_norm": 0.3709089159965515, "learning_rate": 7.31682484090947e-06, "loss": 0.441, "step": 1480 }, { "epoch": 1.2365711104926245, "grad_norm": 0.3243822455406189, "learning_rate": 7.3125155176332555e-06, "loss": 0.3841, "step": 1481 }, { "epoch": 1.2374060673531868, "grad_norm": 0.41153180599212646, "learning_rate": 7.308204008053536e-06, "loss": 0.493, "step": 1482 }, { "epoch": 1.238241024213749, "grad_norm": 0.3562588095664978, "learning_rate": 7.303890316246509e-06, "loss": 0.4295, "step": 1483 }, { "epoch": 1.2390759810743113, "grad_norm": 0.3693108856678009, "learning_rate": 7.299574446290436e-06, "loss": 0.4142, "step": 1484 }, { "epoch": 1.2399109379348734, "grad_norm": 0.3670476973056793, "learning_rate": 7.295256402265636e-06, "loss": 0.4149, "step": 1485 }, { "epoch": 1.2407458947954355, "grad_norm": 0.37798240780830383, "learning_rate": 7.29093618825448e-06, "loss": 0.4315, "step": 1486 }, { "epoch": 1.2415808516559979, "grad_norm": 0.36163365840911865, "learning_rate": 7.286613808341401e-06, "loss": 0.425, "step": 1487 }, { "epoch": 1.24241580851656, "grad_norm": 0.3699703514575958, "learning_rate": 7.282289266612868e-06, "loss": 0.4247, "step": 1488 }, { "epoch": 1.243250765377122, "grad_norm": 0.3782484233379364, "learning_rate": 7.277962567157397e-06, "loss": 0.3994, "step": 1489 }, { "epoch": 1.2440857222376844, "grad_norm": 0.37684112787246704, "learning_rate": 7.273633714065551e-06, "loss": 0.4303, "step": 1490 }, { "epoch": 1.2449206790982466, "grad_norm": 0.48724839091300964, "learning_rate": 7.269302711429921e-06, "loss": 0.4787, "step": 1491 }, { "epoch": 1.2457556359588087, "grad_norm": 0.4013158082962036, "learning_rate": 7.264969563345133e-06, "loss": 0.4294, "step": 1492 }, { "epoch": 1.246590592819371, "grad_norm": 0.4213993549346924, "learning_rate": 7.260634273907843e-06, "loss": 0.4037, "step": 1493 }, { "epoch": 1.2474255496799331, "grad_norm": 0.44399935007095337, "learning_rate": 7.256296847216727e-06, "loss": 0.4194, "step": 1494 }, { "epoch": 1.2482605065404955, "grad_norm": 0.37258341908454895, "learning_rate": 7.251957287372486e-06, "loss": 0.4297, "step": 1495 }, { "epoch": 1.2490954634010576, "grad_norm": 0.3962920010089874, "learning_rate": 7.2476155984778374e-06, "loss": 0.4265, "step": 1496 }, { "epoch": 1.2499304202616197, "grad_norm": 0.3927966356277466, "learning_rate": 7.243271784637509e-06, "loss": 0.4258, "step": 1497 }, { "epoch": 1.250765377122182, "grad_norm": 0.36338546872138977, "learning_rate": 7.23892584995824e-06, "loss": 0.4502, "step": 1498 }, { "epoch": 1.2516003339827442, "grad_norm": 0.3555108606815338, "learning_rate": 7.234577798548771e-06, "loss": 0.423, "step": 1499 }, { "epoch": 1.2524352908433065, "grad_norm": 0.3596709668636322, "learning_rate": 7.230227634519848e-06, "loss": 0.403, "step": 1500 }, { "epoch": 1.2532702477038686, "grad_norm": 0.39901095628738403, "learning_rate": 7.225875361984213e-06, "loss": 0.4226, "step": 1501 }, { "epoch": 1.2541052045644308, "grad_norm": 0.39038336277008057, "learning_rate": 7.2215209850566e-06, "loss": 0.4137, "step": 1502 }, { "epoch": 1.254940161424993, "grad_norm": 0.3622414171695709, "learning_rate": 7.217164507853734e-06, "loss": 0.4222, "step": 1503 }, { "epoch": 1.2557751182855552, "grad_norm": 0.38064002990722656, "learning_rate": 7.212805934494327e-06, "loss": 0.4231, "step": 1504 }, { "epoch": 1.2566100751461176, "grad_norm": 0.360566109418869, "learning_rate": 7.208445269099067e-06, "loss": 0.4615, "step": 1505 }, { "epoch": 1.2574450320066797, "grad_norm": 0.3850138187408447, "learning_rate": 7.204082515790627e-06, "loss": 0.3887, "step": 1506 }, { "epoch": 1.2582799888672418, "grad_norm": 0.3767158091068268, "learning_rate": 7.199717678693652e-06, "loss": 0.4249, "step": 1507 }, { "epoch": 1.2591149457278041, "grad_norm": 0.36841699481010437, "learning_rate": 7.195350761934753e-06, "loss": 0.4444, "step": 1508 }, { "epoch": 1.2599499025883663, "grad_norm": 0.4083330035209656, "learning_rate": 7.19098176964251e-06, "loss": 0.4673, "step": 1509 }, { "epoch": 1.2607848594489286, "grad_norm": 0.3505357801914215, "learning_rate": 7.186610705947467e-06, "loss": 0.429, "step": 1510 }, { "epoch": 1.2616198163094907, "grad_norm": 0.40056201815605164, "learning_rate": 7.1822375749821234e-06, "loss": 0.4407, "step": 1511 }, { "epoch": 1.2624547731700528, "grad_norm": 0.418160080909729, "learning_rate": 7.177862380880935e-06, "loss": 0.456, "step": 1512 }, { "epoch": 1.263289730030615, "grad_norm": 0.36625027656555176, "learning_rate": 7.173485127780309e-06, "loss": 0.4733, "step": 1513 }, { "epoch": 1.2641246868911773, "grad_norm": 0.353231281042099, "learning_rate": 7.1691058198185935e-06, "loss": 0.4231, "step": 1514 }, { "epoch": 1.2649596437517394, "grad_norm": 0.3577207624912262, "learning_rate": 7.164724461136088e-06, "loss": 0.4017, "step": 1515 }, { "epoch": 1.2657946006123018, "grad_norm": 0.39048415422439575, "learning_rate": 7.160341055875022e-06, "loss": 0.4278, "step": 1516 }, { "epoch": 1.2666295574728639, "grad_norm": 0.3802052140235901, "learning_rate": 7.155955608179568e-06, "loss": 0.4154, "step": 1517 }, { "epoch": 1.267464514333426, "grad_norm": 0.3769386112689972, "learning_rate": 7.1515681221958235e-06, "loss": 0.4525, "step": 1518 }, { "epoch": 1.2682994711939883, "grad_norm": 0.357439249753952, "learning_rate": 7.147178602071815e-06, "loss": 0.4095, "step": 1519 }, { "epoch": 1.2691344280545505, "grad_norm": 0.38668394088745117, "learning_rate": 7.142787051957493e-06, "loss": 0.466, "step": 1520 }, { "epoch": 1.2699693849151128, "grad_norm": 0.31812748312950134, "learning_rate": 7.138393476004725e-06, "loss": 0.3986, "step": 1521 }, { "epoch": 1.270804341775675, "grad_norm": 0.40282902121543884, "learning_rate": 7.133997878367299e-06, "loss": 0.4777, "step": 1522 }, { "epoch": 1.271639298636237, "grad_norm": 0.39419421553611755, "learning_rate": 7.129600263200906e-06, "loss": 0.4246, "step": 1523 }, { "epoch": 1.2724742554967994, "grad_norm": 0.3512858748435974, "learning_rate": 7.125200634663153e-06, "loss": 0.4352, "step": 1524 }, { "epoch": 1.2733092123573615, "grad_norm": 0.3634817600250244, "learning_rate": 7.1207989969135435e-06, "loss": 0.4484, "step": 1525 }, { "epoch": 1.2741441692179238, "grad_norm": 0.36478474736213684, "learning_rate": 7.116395354113483e-06, "loss": 0.4382, "step": 1526 }, { "epoch": 1.274979126078486, "grad_norm": 0.37887340784072876, "learning_rate": 7.111989710426278e-06, "loss": 0.432, "step": 1527 }, { "epoch": 1.275814082939048, "grad_norm": 0.3799281120300293, "learning_rate": 7.107582070017118e-06, "loss": 0.4546, "step": 1528 }, { "epoch": 1.2766490397996104, "grad_norm": 0.42650091648101807, "learning_rate": 7.103172437053082e-06, "loss": 0.4413, "step": 1529 }, { "epoch": 1.2774839966601725, "grad_norm": 0.36384156346321106, "learning_rate": 7.098760815703139e-06, "loss": 0.3992, "step": 1530 }, { "epoch": 1.2783189535207349, "grad_norm": 0.34352535009384155, "learning_rate": 7.094347210138132e-06, "loss": 0.4591, "step": 1531 }, { "epoch": 1.279153910381297, "grad_norm": 0.3610605299472809, "learning_rate": 7.089931624530784e-06, "loss": 0.4156, "step": 1532 }, { "epoch": 1.2799888672418591, "grad_norm": 0.36012107133865356, "learning_rate": 7.085514063055683e-06, "loss": 0.4573, "step": 1533 }, { "epoch": 1.2808238241024212, "grad_norm": 0.3132774531841278, "learning_rate": 7.081094529889294e-06, "loss": 0.3857, "step": 1534 }, { "epoch": 1.2816587809629836, "grad_norm": 0.36206915974617004, "learning_rate": 7.0766730292099395e-06, "loss": 0.4572, "step": 1535 }, { "epoch": 1.282493737823546, "grad_norm": 0.3264034688472748, "learning_rate": 7.072249565197804e-06, "loss": 0.3759, "step": 1536 }, { "epoch": 1.283328694684108, "grad_norm": 0.3580075800418854, "learning_rate": 7.067824142034932e-06, "loss": 0.4235, "step": 1537 }, { "epoch": 1.2841636515446702, "grad_norm": 0.335429847240448, "learning_rate": 7.0633967639052125e-06, "loss": 0.4376, "step": 1538 }, { "epoch": 1.2849986084052323, "grad_norm": 0.3632265329360962, "learning_rate": 7.058967434994388e-06, "loss": 0.4899, "step": 1539 }, { "epoch": 1.2858335652657946, "grad_norm": 0.3417853116989136, "learning_rate": 7.054536159490046e-06, "loss": 0.4265, "step": 1540 }, { "epoch": 1.2866685221263567, "grad_norm": 0.3532632291316986, "learning_rate": 7.0501029415816135e-06, "loss": 0.4177, "step": 1541 }, { "epoch": 1.287503478986919, "grad_norm": 0.3698546290397644, "learning_rate": 7.04566778546035e-06, "loss": 0.4507, "step": 1542 }, { "epoch": 1.2883384358474812, "grad_norm": 0.3447801172733307, "learning_rate": 7.041230695319352e-06, "loss": 0.4275, "step": 1543 }, { "epoch": 1.2891733927080433, "grad_norm": 0.35934150218963623, "learning_rate": 7.0367916753535444e-06, "loss": 0.415, "step": 1544 }, { "epoch": 1.2900083495686057, "grad_norm": 0.41682201623916626, "learning_rate": 7.0323507297596735e-06, "loss": 0.4445, "step": 1545 }, { "epoch": 1.2908433064291678, "grad_norm": 0.3368593454360962, "learning_rate": 7.027907862736307e-06, "loss": 0.4212, "step": 1546 }, { "epoch": 1.2916782632897301, "grad_norm": 0.32976293563842773, "learning_rate": 7.023463078483834e-06, "loss": 0.4003, "step": 1547 }, { "epoch": 1.2925132201502922, "grad_norm": 0.4028090834617615, "learning_rate": 7.019016381204448e-06, "loss": 0.425, "step": 1548 }, { "epoch": 1.2933481770108544, "grad_norm": 0.4227384328842163, "learning_rate": 7.014567775102157e-06, "loss": 0.4567, "step": 1549 }, { "epoch": 1.2941831338714167, "grad_norm": 0.36035484075546265, "learning_rate": 7.010117264382772e-06, "loss": 0.4555, "step": 1550 }, { "epoch": 1.2950180907319788, "grad_norm": 0.33929184079170227, "learning_rate": 7.005664853253904e-06, "loss": 0.3999, "step": 1551 }, { "epoch": 1.2958530475925412, "grad_norm": 0.39322808384895325, "learning_rate": 7.001210545924962e-06, "loss": 0.4369, "step": 1552 }, { "epoch": 1.2966880044531033, "grad_norm": 0.36770233511924744, "learning_rate": 6.996754346607146e-06, "loss": 0.428, "step": 1553 }, { "epoch": 1.2975229613136654, "grad_norm": 0.33572500944137573, "learning_rate": 6.992296259513449e-06, "loss": 0.4291, "step": 1554 }, { "epoch": 1.2983579181742277, "grad_norm": 0.38223642110824585, "learning_rate": 6.987836288858641e-06, "loss": 0.4137, "step": 1555 }, { "epoch": 1.2991928750347899, "grad_norm": 0.3611829876899719, "learning_rate": 6.983374438859278e-06, "loss": 0.3985, "step": 1556 }, { "epoch": 1.3000278318953522, "grad_norm": 0.3854582905769348, "learning_rate": 6.978910713733696e-06, "loss": 0.4248, "step": 1557 }, { "epoch": 1.3008627887559143, "grad_norm": 0.3910920023918152, "learning_rate": 6.974445117701997e-06, "loss": 0.4515, "step": 1558 }, { "epoch": 1.3016977456164764, "grad_norm": 0.37637966871261597, "learning_rate": 6.969977654986053e-06, "loss": 0.4741, "step": 1559 }, { "epoch": 1.3025327024770386, "grad_norm": 0.3504483997821808, "learning_rate": 6.965508329809506e-06, "loss": 0.4271, "step": 1560 }, { "epoch": 1.303367659337601, "grad_norm": 0.3417828381061554, "learning_rate": 6.961037146397752e-06, "loss": 0.4372, "step": 1561 }, { "epoch": 1.304202616198163, "grad_norm": 0.35443708300590515, "learning_rate": 6.95656410897795e-06, "loss": 0.4334, "step": 1562 }, { "epoch": 1.3050375730587254, "grad_norm": 0.35186752676963806, "learning_rate": 6.952089221779006e-06, "loss": 0.4063, "step": 1563 }, { "epoch": 1.3058725299192875, "grad_norm": 0.36395788192749023, "learning_rate": 6.947612489031579e-06, "loss": 0.4554, "step": 1564 }, { "epoch": 1.3067074867798496, "grad_norm": 0.3479922413825989, "learning_rate": 6.943133914968072e-06, "loss": 0.4348, "step": 1565 }, { "epoch": 1.307542443640412, "grad_norm": 0.3531920313835144, "learning_rate": 6.938653503822628e-06, "loss": 0.4464, "step": 1566 }, { "epoch": 1.308377400500974, "grad_norm": 0.33208513259887695, "learning_rate": 6.934171259831126e-06, "loss": 0.4128, "step": 1567 }, { "epoch": 1.3092123573615364, "grad_norm": 0.3635927438735962, "learning_rate": 6.9296871872311825e-06, "loss": 0.4513, "step": 1568 }, { "epoch": 1.3100473142220985, "grad_norm": 0.3896380662918091, "learning_rate": 6.925201290262133e-06, "loss": 0.4579, "step": 1569 }, { "epoch": 1.3108822710826606, "grad_norm": 0.35925042629241943, "learning_rate": 6.9207135731650525e-06, "loss": 0.4533, "step": 1570 }, { "epoch": 1.311717227943223, "grad_norm": 0.3577198088169098, "learning_rate": 6.916224040182719e-06, "loss": 0.4429, "step": 1571 }, { "epoch": 1.312552184803785, "grad_norm": 0.3389522433280945, "learning_rate": 6.911732695559646e-06, "loss": 0.372, "step": 1572 }, { "epoch": 1.3133871416643474, "grad_norm": 0.4572816491127014, "learning_rate": 6.9072395435420415e-06, "loss": 0.4543, "step": 1573 }, { "epoch": 1.3142220985249096, "grad_norm": 0.35703933238983154, "learning_rate": 6.902744588377838e-06, "loss": 0.4597, "step": 1574 }, { "epoch": 1.3150570553854717, "grad_norm": 0.35128161311149597, "learning_rate": 6.898247834316662e-06, "loss": 0.449, "step": 1575 }, { "epoch": 1.315892012246034, "grad_norm": 0.36850157380104065, "learning_rate": 6.8937492856098465e-06, "loss": 0.4343, "step": 1576 }, { "epoch": 1.3167269691065961, "grad_norm": 0.40262097120285034, "learning_rate": 6.889248946510418e-06, "loss": 0.5009, "step": 1577 }, { "epoch": 1.3175619259671585, "grad_norm": 0.3233373463153839, "learning_rate": 6.884746821273099e-06, "loss": 0.4202, "step": 1578 }, { "epoch": 1.3183968828277206, "grad_norm": 0.3974681496620178, "learning_rate": 6.880242914154297e-06, "loss": 0.4297, "step": 1579 }, { "epoch": 1.3192318396882827, "grad_norm": 0.3215453326702118, "learning_rate": 6.8757372294121064e-06, "loss": 0.3867, "step": 1580 }, { "epoch": 1.3200667965488448, "grad_norm": 0.4182925820350647, "learning_rate": 6.871229771306301e-06, "loss": 0.4703, "step": 1581 }, { "epoch": 1.3209017534094072, "grad_norm": 0.33349665999412537, "learning_rate": 6.866720544098331e-06, "loss": 0.3876, "step": 1582 }, { "epoch": 1.3217367102699693, "grad_norm": 0.37202712893486023, "learning_rate": 6.862209552051322e-06, "loss": 0.4818, "step": 1583 }, { "epoch": 1.3225716671305316, "grad_norm": 0.41039517521858215, "learning_rate": 6.857696799430064e-06, "loss": 0.4827, "step": 1584 }, { "epoch": 1.3234066239910938, "grad_norm": 0.38652655482292175, "learning_rate": 6.8531822905010156e-06, "loss": 0.4072, "step": 1585 }, { "epoch": 1.3242415808516559, "grad_norm": 0.37456753849983215, "learning_rate": 6.848666029532293e-06, "loss": 0.4435, "step": 1586 }, { "epoch": 1.3250765377122182, "grad_norm": 0.3268979489803314, "learning_rate": 6.844148020793667e-06, "loss": 0.3727, "step": 1587 }, { "epoch": 1.3259114945727803, "grad_norm": 0.4280584752559662, "learning_rate": 6.839628268556568e-06, "loss": 0.4928, "step": 1588 }, { "epoch": 1.3267464514333427, "grad_norm": 0.33215412497520447, "learning_rate": 6.835106777094065e-06, "loss": 0.3806, "step": 1589 }, { "epoch": 1.3275814082939048, "grad_norm": 0.3771249055862427, "learning_rate": 6.830583550680882e-06, "loss": 0.4361, "step": 1590 }, { "epoch": 1.328416365154467, "grad_norm": 0.3658102750778198, "learning_rate": 6.8260585935933735e-06, "loss": 0.4088, "step": 1591 }, { "epoch": 1.3292513220150293, "grad_norm": 0.3412573039531708, "learning_rate": 6.821531910109535e-06, "loss": 0.391, "step": 1592 }, { "epoch": 1.3300862788755914, "grad_norm": 0.35657981038093567, "learning_rate": 6.817003504508993e-06, "loss": 0.4078, "step": 1593 }, { "epoch": 1.3309212357361537, "grad_norm": 0.3604491949081421, "learning_rate": 6.812473381073006e-06, "loss": 0.4703, "step": 1594 }, { "epoch": 1.3317561925967158, "grad_norm": 0.3614696264266968, "learning_rate": 6.807941544084451e-06, "loss": 0.4066, "step": 1595 }, { "epoch": 1.332591149457278, "grad_norm": 0.3208848834037781, "learning_rate": 6.803407997827826e-06, "loss": 0.4163, "step": 1596 }, { "epoch": 1.3334261063178403, "grad_norm": 0.35581958293914795, "learning_rate": 6.798872746589248e-06, "loss": 0.4142, "step": 1597 }, { "epoch": 1.3342610631784024, "grad_norm": 0.37472474575042725, "learning_rate": 6.794335794656443e-06, "loss": 0.4143, "step": 1598 }, { "epoch": 1.3350960200389648, "grad_norm": 0.3626549243927002, "learning_rate": 6.789797146318748e-06, "loss": 0.4418, "step": 1599 }, { "epoch": 1.335930976899527, "grad_norm": 0.36464202404022217, "learning_rate": 6.785256805867102e-06, "loss": 0.4444, "step": 1600 }, { "epoch": 1.336765933760089, "grad_norm": 0.347439706325531, "learning_rate": 6.780714777594041e-06, "loss": 0.4378, "step": 1601 }, { "epoch": 1.3376008906206511, "grad_norm": 0.4036608040332794, "learning_rate": 6.7761710657936995e-06, "loss": 0.4232, "step": 1602 }, { "epoch": 1.3384358474812135, "grad_norm": 0.3871137499809265, "learning_rate": 6.771625674761806e-06, "loss": 0.4035, "step": 1603 }, { "epoch": 1.3392708043417758, "grad_norm": 0.34587007761001587, "learning_rate": 6.767078608795673e-06, "loss": 0.435, "step": 1604 }, { "epoch": 1.340105761202338, "grad_norm": 0.3684059977531433, "learning_rate": 6.7625298721941975e-06, "loss": 0.4302, "step": 1605 }, { "epoch": 1.3409407180629, "grad_norm": 0.35404708981513977, "learning_rate": 6.757979469257856e-06, "loss": 0.4166, "step": 1606 }, { "epoch": 1.3417756749234622, "grad_norm": 0.3771103024482727, "learning_rate": 6.7534274042887e-06, "loss": 0.4452, "step": 1607 }, { "epoch": 1.3426106317840245, "grad_norm": 0.3611823320388794, "learning_rate": 6.748873681590354e-06, "loss": 0.4194, "step": 1608 }, { "epoch": 1.3434455886445866, "grad_norm": 0.3621721565723419, "learning_rate": 6.744318305468006e-06, "loss": 0.4297, "step": 1609 }, { "epoch": 1.344280545505149, "grad_norm": 0.41272252798080444, "learning_rate": 6.739761280228413e-06, "loss": 0.4513, "step": 1610 }, { "epoch": 1.345115502365711, "grad_norm": 0.3359913229942322, "learning_rate": 6.735202610179886e-06, "loss": 0.4035, "step": 1611 }, { "epoch": 1.3459504592262732, "grad_norm": 0.40779590606689453, "learning_rate": 6.730642299632293e-06, "loss": 0.4096, "step": 1612 }, { "epoch": 1.3467854160868356, "grad_norm": 0.45575159788131714, "learning_rate": 6.726080352897052e-06, "loss": 0.4456, "step": 1613 }, { "epoch": 1.3476203729473977, "grad_norm": 0.36697620153427124, "learning_rate": 6.7215167742871315e-06, "loss": 0.4153, "step": 1614 }, { "epoch": 1.34845532980796, "grad_norm": 0.3346090316772461, "learning_rate": 6.716951568117037e-06, "loss": 0.3958, "step": 1615 }, { "epoch": 1.3492902866685221, "grad_norm": 0.44971325993537903, "learning_rate": 6.712384738702818e-06, "loss": 0.3959, "step": 1616 }, { "epoch": 1.3501252435290843, "grad_norm": 0.3653394877910614, "learning_rate": 6.707816290362053e-06, "loss": 0.4545, "step": 1617 }, { "epoch": 1.3509602003896466, "grad_norm": 0.35742172598838806, "learning_rate": 6.703246227413859e-06, "loss": 0.4351, "step": 1618 }, { "epoch": 1.3517951572502087, "grad_norm": 0.37057173252105713, "learning_rate": 6.698674554178871e-06, "loss": 0.4603, "step": 1619 }, { "epoch": 1.352630114110771, "grad_norm": 0.34722641110420227, "learning_rate": 6.694101274979253e-06, "loss": 0.3848, "step": 1620 }, { "epoch": 1.3534650709713332, "grad_norm": 0.37834399938583374, "learning_rate": 6.689526394138685e-06, "loss": 0.4563, "step": 1621 }, { "epoch": 1.3543000278318953, "grad_norm": 0.41601327061653137, "learning_rate": 6.684949915982358e-06, "loss": 0.5014, "step": 1622 }, { "epoch": 1.3551349846924576, "grad_norm": 0.3371627926826477, "learning_rate": 6.680371844836977e-06, "loss": 0.4153, "step": 1623 }, { "epoch": 1.3559699415530198, "grad_norm": 0.3799642026424408, "learning_rate": 6.675792185030755e-06, "loss": 0.4452, "step": 1624 }, { "epoch": 1.356804898413582, "grad_norm": 0.35518789291381836, "learning_rate": 6.671210940893402e-06, "loss": 0.4066, "step": 1625 }, { "epoch": 1.3576398552741442, "grad_norm": 0.37642350792884827, "learning_rate": 6.666628116756127e-06, "loss": 0.4325, "step": 1626 }, { "epoch": 1.3584748121347063, "grad_norm": 0.3331868350505829, "learning_rate": 6.6620437169516336e-06, "loss": 0.433, "step": 1627 }, { "epoch": 1.3593097689952685, "grad_norm": 0.36125320196151733, "learning_rate": 6.657457745814115e-06, "loss": 0.3944, "step": 1628 }, { "epoch": 1.3601447258558308, "grad_norm": 0.39642176032066345, "learning_rate": 6.652870207679253e-06, "loss": 0.4351, "step": 1629 }, { "epoch": 1.360979682716393, "grad_norm": 0.37639153003692627, "learning_rate": 6.648281106884206e-06, "loss": 0.4421, "step": 1630 }, { "epoch": 1.3618146395769553, "grad_norm": 0.4086742103099823, "learning_rate": 6.643690447767612e-06, "loss": 0.4476, "step": 1631 }, { "epoch": 1.3626495964375174, "grad_norm": 0.3961566686630249, "learning_rate": 6.639098234669581e-06, "loss": 0.4342, "step": 1632 }, { "epoch": 1.3634845532980795, "grad_norm": 0.3696999251842499, "learning_rate": 6.6345044719316934e-06, "loss": 0.3981, "step": 1633 }, { "epoch": 1.3643195101586418, "grad_norm": 0.44050732254981995, "learning_rate": 6.629909163896997e-06, "loss": 0.4554, "step": 1634 }, { "epoch": 1.365154467019204, "grad_norm": 0.3667236864566803, "learning_rate": 6.625312314909995e-06, "loss": 0.4147, "step": 1635 }, { "epoch": 1.3659894238797663, "grad_norm": 0.36997756361961365, "learning_rate": 6.620713929316651e-06, "loss": 0.4259, "step": 1636 }, { "epoch": 1.3668243807403284, "grad_norm": 0.37932249903678894, "learning_rate": 6.616114011464383e-06, "loss": 0.3962, "step": 1637 }, { "epoch": 1.3676593376008905, "grad_norm": 0.44316762685775757, "learning_rate": 6.611512565702053e-06, "loss": 0.4529, "step": 1638 }, { "epoch": 1.3684942944614529, "grad_norm": 0.37817174196243286, "learning_rate": 6.60690959637997e-06, "loss": 0.4234, "step": 1639 }, { "epoch": 1.369329251322015, "grad_norm": 0.3503683805465698, "learning_rate": 6.602305107849886e-06, "loss": 0.3848, "step": 1640 }, { "epoch": 1.3701642081825773, "grad_norm": 0.4415924847126007, "learning_rate": 6.597699104464984e-06, "loss": 0.4393, "step": 1641 }, { "epoch": 1.3709991650431395, "grad_norm": 0.3682541847229004, "learning_rate": 6.593091590579883e-06, "loss": 0.4295, "step": 1642 }, { "epoch": 1.3718341219037016, "grad_norm": 0.3709803819656372, "learning_rate": 6.588482570550627e-06, "loss": 0.4068, "step": 1643 }, { "epoch": 1.372669078764264, "grad_norm": 0.3905399441719055, "learning_rate": 6.583872048734689e-06, "loss": 0.4231, "step": 1644 }, { "epoch": 1.373504035624826, "grad_norm": 0.3914911448955536, "learning_rate": 6.579260029490959e-06, "loss": 0.4386, "step": 1645 }, { "epoch": 1.3743389924853884, "grad_norm": 0.4069015085697174, "learning_rate": 6.574646517179738e-06, "loss": 0.415, "step": 1646 }, { "epoch": 1.3751739493459505, "grad_norm": 0.3890778124332428, "learning_rate": 6.570031516162746e-06, "loss": 0.4201, "step": 1647 }, { "epoch": 1.3760089062065126, "grad_norm": 0.36210671067237854, "learning_rate": 6.565415030803106e-06, "loss": 0.4072, "step": 1648 }, { "epoch": 1.3768438630670747, "grad_norm": 0.35179662704467773, "learning_rate": 6.560797065465348e-06, "loss": 0.3913, "step": 1649 }, { "epoch": 1.377678819927637, "grad_norm": 0.38587573170661926, "learning_rate": 6.556177624515398e-06, "loss": 0.4623, "step": 1650 }, { "epoch": 1.3785137767881992, "grad_norm": 0.3420597314834595, "learning_rate": 6.551556712320577e-06, "loss": 0.4046, "step": 1651 }, { "epoch": 1.3793487336487615, "grad_norm": 0.38271042704582214, "learning_rate": 6.546934333249597e-06, "loss": 0.4527, "step": 1652 }, { "epoch": 1.3801836905093237, "grad_norm": 0.3798007369041443, "learning_rate": 6.542310491672561e-06, "loss": 0.4402, "step": 1653 }, { "epoch": 1.3810186473698858, "grad_norm": 0.31484296917915344, "learning_rate": 6.53768519196095e-06, "loss": 0.4077, "step": 1654 }, { "epoch": 1.3818536042304481, "grad_norm": 0.3635611832141876, "learning_rate": 6.533058438487624e-06, "loss": 0.447, "step": 1655 }, { "epoch": 1.3826885610910102, "grad_norm": 0.3892528712749481, "learning_rate": 6.528430235626819e-06, "loss": 0.4356, "step": 1656 }, { "epoch": 1.3835235179515726, "grad_norm": 0.3488113284111023, "learning_rate": 6.523800587754142e-06, "loss": 0.4544, "step": 1657 }, { "epoch": 1.3843584748121347, "grad_norm": 0.361992746591568, "learning_rate": 6.5191694992465634e-06, "loss": 0.4345, "step": 1658 }, { "epoch": 1.3851934316726968, "grad_norm": 0.3579844832420349, "learning_rate": 6.514536974482416e-06, "loss": 0.4688, "step": 1659 }, { "epoch": 1.3860283885332592, "grad_norm": 0.35155045986175537, "learning_rate": 6.509903017841395e-06, "loss": 0.3985, "step": 1660 }, { "epoch": 1.3868633453938213, "grad_norm": 0.37238311767578125, "learning_rate": 6.505267633704544e-06, "loss": 0.3969, "step": 1661 }, { "epoch": 1.3876983022543836, "grad_norm": 0.3625251054763794, "learning_rate": 6.500630826454257e-06, "loss": 0.4698, "step": 1662 }, { "epoch": 1.3885332591149457, "grad_norm": 0.3395136594772339, "learning_rate": 6.495992600474277e-06, "loss": 0.4041, "step": 1663 }, { "epoch": 1.3893682159755079, "grad_norm": 0.35295355319976807, "learning_rate": 6.4913529601496816e-06, "loss": 0.4031, "step": 1664 }, { "epoch": 1.3902031728360702, "grad_norm": 0.3337438702583313, "learning_rate": 6.486711909866895e-06, "loss": 0.4447, "step": 1665 }, { "epoch": 1.3910381296966323, "grad_norm": 0.3471190929412842, "learning_rate": 6.482069454013665e-06, "loss": 0.4348, "step": 1666 }, { "epoch": 1.3918730865571947, "grad_norm": 0.3089011013507843, "learning_rate": 6.4774255969790745e-06, "loss": 0.3926, "step": 1667 }, { "epoch": 1.3927080434177568, "grad_norm": 0.34705850481987, "learning_rate": 6.4727803431535294e-06, "loss": 0.3974, "step": 1668 }, { "epoch": 1.393543000278319, "grad_norm": 0.33933475613594055, "learning_rate": 6.468133696928753e-06, "loss": 0.4167, "step": 1669 }, { "epoch": 1.3943779571388812, "grad_norm": 0.3453034460544586, "learning_rate": 6.4634856626977916e-06, "loss": 0.4367, "step": 1670 }, { "epoch": 1.3952129139994434, "grad_norm": 0.382854700088501, "learning_rate": 6.458836244854998e-06, "loss": 0.4136, "step": 1671 }, { "epoch": 1.3960478708600057, "grad_norm": 0.35509100556373596, "learning_rate": 6.454185447796034e-06, "loss": 0.4235, "step": 1672 }, { "epoch": 1.3968828277205678, "grad_norm": 0.35633385181427, "learning_rate": 6.449533275917869e-06, "loss": 0.4004, "step": 1673 }, { "epoch": 1.39771778458113, "grad_norm": 0.3578953444957733, "learning_rate": 6.444879733618766e-06, "loss": 0.4276, "step": 1674 }, { "epoch": 1.398552741441692, "grad_norm": 0.33710813522338867, "learning_rate": 6.4402248252982915e-06, "loss": 0.4401, "step": 1675 }, { "epoch": 1.3993876983022544, "grad_norm": 0.39233213663101196, "learning_rate": 6.435568555357295e-06, "loss": 0.4159, "step": 1676 }, { "epoch": 1.4002226551628165, "grad_norm": 0.31103724241256714, "learning_rate": 6.43091092819792e-06, "loss": 0.3736, "step": 1677 }, { "epoch": 1.4010576120233789, "grad_norm": 0.3592934012413025, "learning_rate": 6.426251948223591e-06, "loss": 0.4395, "step": 1678 }, { "epoch": 1.401892568883941, "grad_norm": 0.3536584973335266, "learning_rate": 6.421591619839008e-06, "loss": 0.458, "step": 1679 }, { "epoch": 1.402727525744503, "grad_norm": 0.35465434193611145, "learning_rate": 6.416929947450152e-06, "loss": 0.421, "step": 1680 }, { "epoch": 1.4035624826050654, "grad_norm": 0.37178295850753784, "learning_rate": 6.412266935464271e-06, "loss": 0.4382, "step": 1681 }, { "epoch": 1.4043974394656276, "grad_norm": 0.34702274203300476, "learning_rate": 6.407602588289878e-06, "loss": 0.3952, "step": 1682 }, { "epoch": 1.40523239632619, "grad_norm": 0.38152334094047546, "learning_rate": 6.4029369103367545e-06, "loss": 0.4134, "step": 1683 }, { "epoch": 1.406067353186752, "grad_norm": 0.3456413745880127, "learning_rate": 6.39826990601593e-06, "loss": 0.383, "step": 1684 }, { "epoch": 1.4069023100473141, "grad_norm": 0.3555692732334137, "learning_rate": 6.393601579739701e-06, "loss": 0.4504, "step": 1685 }, { "epoch": 1.4077372669078765, "grad_norm": 0.3433886766433716, "learning_rate": 6.388931935921601e-06, "loss": 0.3782, "step": 1686 }, { "epoch": 1.4085722237684386, "grad_norm": 0.39105120301246643, "learning_rate": 6.384260978976418e-06, "loss": 0.4764, "step": 1687 }, { "epoch": 1.409407180629001, "grad_norm": 0.34286120533943176, "learning_rate": 6.3795887133201775e-06, "loss": 0.4233, "step": 1688 }, { "epoch": 1.410242137489563, "grad_norm": 0.34833794832229614, "learning_rate": 6.374915143370142e-06, "loss": 0.442, "step": 1689 }, { "epoch": 1.4110770943501252, "grad_norm": 0.3347507119178772, "learning_rate": 6.37024027354481e-06, "loss": 0.4114, "step": 1690 }, { "epoch": 1.4119120512106875, "grad_norm": 0.3715648949146271, "learning_rate": 6.365564108263906e-06, "loss": 0.4572, "step": 1691 }, { "epoch": 1.4127470080712496, "grad_norm": 0.33461320400238037, "learning_rate": 6.3608866519483825e-06, "loss": 0.4365, "step": 1692 }, { "epoch": 1.413581964931812, "grad_norm": 0.36092180013656616, "learning_rate": 6.3562079090204084e-06, "loss": 0.4636, "step": 1693 }, { "epoch": 1.414416921792374, "grad_norm": 0.3681912124156952, "learning_rate": 6.351527883903372e-06, "loss": 0.4116, "step": 1694 }, { "epoch": 1.4152518786529362, "grad_norm": 0.35722851753234863, "learning_rate": 6.346846581021874e-06, "loss": 0.4048, "step": 1695 }, { "epoch": 1.4160868355134983, "grad_norm": 0.3897838890552521, "learning_rate": 6.342164004801719e-06, "loss": 0.438, "step": 1696 }, { "epoch": 1.4169217923740607, "grad_norm": 0.35565775632858276, "learning_rate": 6.337480159669921e-06, "loss": 0.4474, "step": 1697 }, { "epoch": 1.4177567492346228, "grad_norm": 0.34460094571113586, "learning_rate": 6.332795050054691e-06, "loss": 0.4182, "step": 1698 }, { "epoch": 1.4185917060951851, "grad_norm": 0.31700295209884644, "learning_rate": 6.3281086803854345e-06, "loss": 0.3885, "step": 1699 }, { "epoch": 1.4194266629557473, "grad_norm": 0.35856470465660095, "learning_rate": 6.3234210550927485e-06, "loss": 0.465, "step": 1700 }, { "epoch": 1.4202616198163094, "grad_norm": 0.3240307569503784, "learning_rate": 6.3187321786084236e-06, "loss": 0.4244, "step": 1701 }, { "epoch": 1.4210965766768717, "grad_norm": 0.335997074842453, "learning_rate": 6.314042055365422e-06, "loss": 0.4288, "step": 1702 }, { "epoch": 1.4219315335374338, "grad_norm": 0.3788629174232483, "learning_rate": 6.309350689797891e-06, "loss": 0.451, "step": 1703 }, { "epoch": 1.4227664903979962, "grad_norm": 0.3278177082538605, "learning_rate": 6.304658086341153e-06, "loss": 0.414, "step": 1704 }, { "epoch": 1.4236014472585583, "grad_norm": 0.3505568504333496, "learning_rate": 6.299964249431698e-06, "loss": 0.4535, "step": 1705 }, { "epoch": 1.4244364041191204, "grad_norm": 0.3340985178947449, "learning_rate": 6.295269183507187e-06, "loss": 0.4004, "step": 1706 }, { "epoch": 1.4252713609796828, "grad_norm": 0.36492228507995605, "learning_rate": 6.290572893006437e-06, "loss": 0.4134, "step": 1707 }, { "epoch": 1.4261063178402449, "grad_norm": 0.37419557571411133, "learning_rate": 6.285875382369425e-06, "loss": 0.4624, "step": 1708 }, { "epoch": 1.4269412747008072, "grad_norm": 0.3434616029262543, "learning_rate": 6.281176656037282e-06, "loss": 0.429, "step": 1709 }, { "epoch": 1.4277762315613693, "grad_norm": 0.3661850690841675, "learning_rate": 6.276476718452289e-06, "loss": 0.4474, "step": 1710 }, { "epoch": 1.4286111884219315, "grad_norm": 0.3666943311691284, "learning_rate": 6.271775574057872e-06, "loss": 0.4337, "step": 1711 }, { "epoch": 1.4294461452824938, "grad_norm": 0.3455953896045685, "learning_rate": 6.267073227298597e-06, "loss": 0.3802, "step": 1712 }, { "epoch": 1.430281102143056, "grad_norm": 0.371152400970459, "learning_rate": 6.262369682620164e-06, "loss": 0.4208, "step": 1713 }, { "epoch": 1.4311160590036183, "grad_norm": 0.38929659128189087, "learning_rate": 6.257664944469412e-06, "loss": 0.4219, "step": 1714 }, { "epoch": 1.4319510158641804, "grad_norm": 0.3584350049495697, "learning_rate": 6.252959017294303e-06, "loss": 0.4181, "step": 1715 }, { "epoch": 1.4327859727247425, "grad_norm": 0.36737513542175293, "learning_rate": 6.248251905543926e-06, "loss": 0.4062, "step": 1716 }, { "epoch": 1.4336209295853046, "grad_norm": 0.3520453870296478, "learning_rate": 6.243543613668489e-06, "loss": 0.4045, "step": 1717 }, { "epoch": 1.434455886445867, "grad_norm": 0.389718621969223, "learning_rate": 6.238834146119316e-06, "loss": 0.452, "step": 1718 }, { "epoch": 1.435290843306429, "grad_norm": 0.34871089458465576, "learning_rate": 6.23412350734884e-06, "loss": 0.4121, "step": 1719 }, { "epoch": 1.4361258001669914, "grad_norm": 0.3547801673412323, "learning_rate": 6.229411701810605e-06, "loss": 0.4125, "step": 1720 }, { "epoch": 1.4369607570275535, "grad_norm": 0.34466683864593506, "learning_rate": 6.224698733959258e-06, "loss": 0.4098, "step": 1721 }, { "epoch": 1.4377957138881157, "grad_norm": 0.3579025864601135, "learning_rate": 6.219984608250541e-06, "loss": 0.4344, "step": 1722 }, { "epoch": 1.438630670748678, "grad_norm": 0.41402703523635864, "learning_rate": 6.215269329141293e-06, "loss": 0.4197, "step": 1723 }, { "epoch": 1.4394656276092401, "grad_norm": 0.3642241954803467, "learning_rate": 6.210552901089447e-06, "loss": 0.4679, "step": 1724 }, { "epoch": 1.4403005844698025, "grad_norm": 0.39851462841033936, "learning_rate": 6.205835328554014e-06, "loss": 0.4128, "step": 1725 }, { "epoch": 1.4411355413303646, "grad_norm": 0.34661874175071716, "learning_rate": 6.201116615995096e-06, "loss": 0.3905, "step": 1726 }, { "epoch": 1.4419704981909267, "grad_norm": 0.35139894485473633, "learning_rate": 6.196396767873866e-06, "loss": 0.427, "step": 1727 }, { "epoch": 1.442805455051489, "grad_norm": 0.3480257987976074, "learning_rate": 6.191675788652574e-06, "loss": 0.4249, "step": 1728 }, { "epoch": 1.4436404119120512, "grad_norm": 0.3825027346611023, "learning_rate": 6.186953682794536e-06, "loss": 0.3997, "step": 1729 }, { "epoch": 1.4444753687726135, "grad_norm": 0.34643417596817017, "learning_rate": 6.182230454764141e-06, "loss": 0.4449, "step": 1730 }, { "epoch": 1.4453103256331756, "grad_norm": 0.3708588182926178, "learning_rate": 6.17750610902683e-06, "loss": 0.4649, "step": 1731 }, { "epoch": 1.4461452824937377, "grad_norm": 0.3373171091079712, "learning_rate": 6.172780650049106e-06, "loss": 0.3987, "step": 1732 }, { "epoch": 1.4469802393543, "grad_norm": 0.39861467480659485, "learning_rate": 6.16805408229852e-06, "loss": 0.41, "step": 1733 }, { "epoch": 1.4478151962148622, "grad_norm": 0.36511892080307007, "learning_rate": 6.1633264102436785e-06, "loss": 0.4311, "step": 1734 }, { "epoch": 1.4486501530754246, "grad_norm": 0.37147894501686096, "learning_rate": 6.158597638354222e-06, "loss": 0.4, "step": 1735 }, { "epoch": 1.4494851099359867, "grad_norm": 0.375443696975708, "learning_rate": 6.153867771100841e-06, "loss": 0.4364, "step": 1736 }, { "epoch": 1.4503200667965488, "grad_norm": 0.34852543473243713, "learning_rate": 6.149136812955256e-06, "loss": 0.3858, "step": 1737 }, { "epoch": 1.4511550236571111, "grad_norm": 0.4413578510284424, "learning_rate": 6.14440476839022e-06, "loss": 0.476, "step": 1738 }, { "epoch": 1.4519899805176733, "grad_norm": 0.3335571587085724, "learning_rate": 6.139671641879511e-06, "loss": 0.4199, "step": 1739 }, { "epoch": 1.4528249373782356, "grad_norm": 0.3746272623538971, "learning_rate": 6.134937437897933e-06, "loss": 0.4175, "step": 1740 }, { "epoch": 1.4536598942387977, "grad_norm": 0.3569086790084839, "learning_rate": 6.130202160921309e-06, "loss": 0.391, "step": 1741 }, { "epoch": 1.4544948510993598, "grad_norm": 0.42714935541152954, "learning_rate": 6.125465815426474e-06, "loss": 0.4461, "step": 1742 }, { "epoch": 1.455329807959922, "grad_norm": 0.3477902114391327, "learning_rate": 6.120728405891272e-06, "loss": 0.421, "step": 1743 }, { "epoch": 1.4561647648204843, "grad_norm": 0.36235061287879944, "learning_rate": 6.115989936794559e-06, "loss": 0.4034, "step": 1744 }, { "epoch": 1.4569997216810464, "grad_norm": 0.39577367901802063, "learning_rate": 6.1112504126161855e-06, "loss": 0.4622, "step": 1745 }, { "epoch": 1.4578346785416088, "grad_norm": 0.3344385027885437, "learning_rate": 6.106509837837004e-06, "loss": 0.4009, "step": 1746 }, { "epoch": 1.4586696354021709, "grad_norm": 0.33754661679267883, "learning_rate": 6.10176821693886e-06, "loss": 0.4147, "step": 1747 }, { "epoch": 1.459504592262733, "grad_norm": 0.3883925974369049, "learning_rate": 6.097025554404587e-06, "loss": 0.4672, "step": 1748 }, { "epoch": 1.4603395491232953, "grad_norm": 0.3668109178543091, "learning_rate": 6.092281854718002e-06, "loss": 0.4575, "step": 1749 }, { "epoch": 1.4611745059838575, "grad_norm": 0.33129364252090454, "learning_rate": 6.087537122363906e-06, "loss": 0.4178, "step": 1750 }, { "epoch": 1.4620094628444198, "grad_norm": 0.35363858938217163, "learning_rate": 6.082791361828072e-06, "loss": 0.4232, "step": 1751 }, { "epoch": 1.462844419704982, "grad_norm": 0.39587000012397766, "learning_rate": 6.07804457759725e-06, "loss": 0.4326, "step": 1752 }, { "epoch": 1.463679376565544, "grad_norm": 0.36366626620292664, "learning_rate": 6.073296774159152e-06, "loss": 0.4452, "step": 1753 }, { "epoch": 1.4645143334261064, "grad_norm": 0.36236506700515747, "learning_rate": 6.068547956002461e-06, "loss": 0.4253, "step": 1754 }, { "epoch": 1.4653492902866685, "grad_norm": 0.35525521636009216, "learning_rate": 6.063798127616811e-06, "loss": 0.4131, "step": 1755 }, { "epoch": 1.4661842471472308, "grad_norm": 0.3758260905742645, "learning_rate": 6.0590472934927956e-06, "loss": 0.4086, "step": 1756 }, { "epoch": 1.467019204007793, "grad_norm": 0.3596494793891907, "learning_rate": 6.054295458121963e-06, "loss": 0.4361, "step": 1757 }, { "epoch": 1.467854160868355, "grad_norm": 0.34519338607788086, "learning_rate": 6.0495426259968e-06, "loss": 0.3952, "step": 1758 }, { "epoch": 1.4686891177289174, "grad_norm": 0.35746482014656067, "learning_rate": 6.044788801610739e-06, "loss": 0.4196, "step": 1759 }, { "epoch": 1.4695240745894795, "grad_norm": 0.3974684774875641, "learning_rate": 6.0400339894581535e-06, "loss": 0.4306, "step": 1760 }, { "epoch": 1.4703590314500419, "grad_norm": 0.39126625657081604, "learning_rate": 6.035278194034347e-06, "loss": 0.4734, "step": 1761 }, { "epoch": 1.471193988310604, "grad_norm": 0.36271193623542786, "learning_rate": 6.0305214198355565e-06, "loss": 0.4361, "step": 1762 }, { "epoch": 1.4720289451711661, "grad_norm": 0.36475226283073425, "learning_rate": 6.025763671358939e-06, "loss": 0.4016, "step": 1763 }, { "epoch": 1.4728639020317282, "grad_norm": 0.3650916516780853, "learning_rate": 6.021004953102576e-06, "loss": 0.3855, "step": 1764 }, { "epoch": 1.4736988588922906, "grad_norm": 0.348285436630249, "learning_rate": 6.016245269565467e-06, "loss": 0.4129, "step": 1765 }, { "epoch": 1.4745338157528527, "grad_norm": 0.39251261949539185, "learning_rate": 6.01148462524752e-06, "loss": 0.4658, "step": 1766 }, { "epoch": 1.475368772613415, "grad_norm": 0.3543471693992615, "learning_rate": 6.006723024649557e-06, "loss": 0.3937, "step": 1767 }, { "epoch": 1.4762037294739772, "grad_norm": 0.3776741623878479, "learning_rate": 6.001960472273299e-06, "loss": 0.3937, "step": 1768 }, { "epoch": 1.4770386863345393, "grad_norm": 0.3445014953613281, "learning_rate": 5.997196972621366e-06, "loss": 0.4072, "step": 1769 }, { "epoch": 1.4778736431951016, "grad_norm": 0.3446212112903595, "learning_rate": 5.992432530197283e-06, "loss": 0.4543, "step": 1770 }, { "epoch": 1.4787086000556637, "grad_norm": 0.37848761677742004, "learning_rate": 5.987667149505453e-06, "loss": 0.4128, "step": 1771 }, { "epoch": 1.479543556916226, "grad_norm": 0.3462899327278137, "learning_rate": 5.982900835051177e-06, "loss": 0.3874, "step": 1772 }, { "epoch": 1.4803785137767882, "grad_norm": 0.34473076462745667, "learning_rate": 5.978133591340633e-06, "loss": 0.4134, "step": 1773 }, { "epoch": 1.4812134706373503, "grad_norm": 0.3737209141254425, "learning_rate": 5.973365422880879e-06, "loss": 0.4289, "step": 1774 }, { "epoch": 1.4820484274979127, "grad_norm": 0.36363986134529114, "learning_rate": 5.968596334179847e-06, "loss": 0.4274, "step": 1775 }, { "epoch": 1.4828833843584748, "grad_norm": 0.37568023800849915, "learning_rate": 5.963826329746338e-06, "loss": 0.444, "step": 1776 }, { "epoch": 1.4837183412190371, "grad_norm": 0.36207306385040283, "learning_rate": 5.959055414090025e-06, "loss": 0.4033, "step": 1777 }, { "epoch": 1.4845532980795992, "grad_norm": 0.38753455877304077, "learning_rate": 5.9542835917214326e-06, "loss": 0.4515, "step": 1778 }, { "epoch": 1.4853882549401614, "grad_norm": 0.36988383531570435, "learning_rate": 5.9495108671519485e-06, "loss": 0.4397, "step": 1779 }, { "epoch": 1.4862232118007237, "grad_norm": 0.33960258960723877, "learning_rate": 5.944737244893815e-06, "loss": 0.4145, "step": 1780 }, { "epoch": 1.4870581686612858, "grad_norm": 0.34904566407203674, "learning_rate": 5.939962729460117e-06, "loss": 0.3767, "step": 1781 }, { "epoch": 1.4878931255218482, "grad_norm": 0.41382449865341187, "learning_rate": 5.935187325364791e-06, "loss": 0.4672, "step": 1782 }, { "epoch": 1.4887280823824103, "grad_norm": 0.32113683223724365, "learning_rate": 5.9304110371226065e-06, "loss": 0.4317, "step": 1783 }, { "epoch": 1.4895630392429724, "grad_norm": 0.3690735697746277, "learning_rate": 5.9256338692491755e-06, "loss": 0.461, "step": 1784 }, { "epoch": 1.4903979961035345, "grad_norm": 0.34115198254585266, "learning_rate": 5.920855826260937e-06, "loss": 0.4017, "step": 1785 }, { "epoch": 1.4912329529640969, "grad_norm": 0.372600793838501, "learning_rate": 5.916076912675158e-06, "loss": 0.4161, "step": 1786 }, { "epoch": 1.492067909824659, "grad_norm": 0.34937945008277893, "learning_rate": 5.911297133009931e-06, "loss": 0.4679, "step": 1787 }, { "epoch": 1.4929028666852213, "grad_norm": 0.32612162828445435, "learning_rate": 5.906516491784167e-06, "loss": 0.3859, "step": 1788 }, { "epoch": 1.4937378235457834, "grad_norm": 0.39035043120384216, "learning_rate": 5.901734993517587e-06, "loss": 0.4128, "step": 1789 }, { "epoch": 1.4945727804063456, "grad_norm": 0.37927237153053284, "learning_rate": 5.896952642730728e-06, "loss": 0.4224, "step": 1790 }, { "epoch": 1.495407737266908, "grad_norm": 0.38825687766075134, "learning_rate": 5.892169443944929e-06, "loss": 0.4423, "step": 1791 }, { "epoch": 1.49624269412747, "grad_norm": 0.3519742488861084, "learning_rate": 5.887385401682334e-06, "loss": 0.4039, "step": 1792 }, { "epoch": 1.4970776509880324, "grad_norm": 0.4160196781158447, "learning_rate": 5.882600520465882e-06, "loss": 0.4557, "step": 1793 }, { "epoch": 1.4979126078485945, "grad_norm": 0.3882049024105072, "learning_rate": 5.877814804819306e-06, "loss": 0.4637, "step": 1794 }, { "epoch": 1.4987475647091566, "grad_norm": 0.32332122325897217, "learning_rate": 5.8730282592671276e-06, "loss": 0.4057, "step": 1795 }, { "epoch": 1.499582521569719, "grad_norm": 0.3710378408432007, "learning_rate": 5.8682408883346535e-06, "loss": 0.4306, "step": 1796 }, { "epoch": 1.500417478430281, "grad_norm": 0.33565661311149597, "learning_rate": 5.863452696547969e-06, "loss": 0.3936, "step": 1797 }, { "epoch": 1.5012524352908434, "grad_norm": 0.34477075934410095, "learning_rate": 5.858663688433939e-06, "loss": 0.4428, "step": 1798 }, { "epoch": 1.5020873921514055, "grad_norm": 0.3508036434650421, "learning_rate": 5.8538738685201964e-06, "loss": 0.4163, "step": 1799 }, { "epoch": 1.5029223490119676, "grad_norm": 0.41541966795921326, "learning_rate": 5.8490832413351465e-06, "loss": 0.412, "step": 1800 }, { "epoch": 1.5037573058725298, "grad_norm": 0.3558414578437805, "learning_rate": 5.844291811407951e-06, "loss": 0.4506, "step": 1801 }, { "epoch": 1.504592262733092, "grad_norm": 0.3178742825984955, "learning_rate": 5.8394995832685345e-06, "loss": 0.396, "step": 1802 }, { "epoch": 1.5054272195936544, "grad_norm": 0.3453831672668457, "learning_rate": 5.834706561447576e-06, "loss": 0.4441, "step": 1803 }, { "epoch": 1.5062621764542166, "grad_norm": 0.35177621245384216, "learning_rate": 5.8299127504765074e-06, "loss": 0.4367, "step": 1804 }, { "epoch": 1.5070971333147787, "grad_norm": 0.3537365198135376, "learning_rate": 5.8251181548875016e-06, "loss": 0.4589, "step": 1805 }, { "epoch": 1.5079320901753408, "grad_norm": 0.30545759201049805, "learning_rate": 5.820322779213476e-06, "loss": 0.3709, "step": 1806 }, { "epoch": 1.5087670470359031, "grad_norm": 0.3415234088897705, "learning_rate": 5.815526627988083e-06, "loss": 0.4281, "step": 1807 }, { "epoch": 1.5096020038964655, "grad_norm": 0.334505558013916, "learning_rate": 5.810729705745716e-06, "loss": 0.4213, "step": 1808 }, { "epoch": 1.5104369607570276, "grad_norm": 0.3561946749687195, "learning_rate": 5.805932017021486e-06, "loss": 0.4237, "step": 1809 }, { "epoch": 1.5112719176175897, "grad_norm": 0.34346452355384827, "learning_rate": 5.801133566351239e-06, "loss": 0.4609, "step": 1810 }, { "epoch": 1.5121068744781518, "grad_norm": 0.3773595690727234, "learning_rate": 5.796334358271534e-06, "loss": 0.4407, "step": 1811 }, { "epoch": 1.5129418313387142, "grad_norm": 0.38373705744743347, "learning_rate": 5.79153439731965e-06, "loss": 0.3993, "step": 1812 }, { "epoch": 1.5137767881992765, "grad_norm": 0.3783143162727356, "learning_rate": 5.786733688033574e-06, "loss": 0.4344, "step": 1813 }, { "epoch": 1.5146117450598386, "grad_norm": 0.3355046510696411, "learning_rate": 5.781932234952008e-06, "loss": 0.4239, "step": 1814 }, { "epoch": 1.5154467019204008, "grad_norm": 0.3455119729042053, "learning_rate": 5.777130042614349e-06, "loss": 0.3839, "step": 1815 }, { "epoch": 1.5162816587809629, "grad_norm": 0.386928915977478, "learning_rate": 5.772327115560696e-06, "loss": 0.4417, "step": 1816 }, { "epoch": 1.5171166156415252, "grad_norm": 0.31704840064048767, "learning_rate": 5.767523458331844e-06, "loss": 0.414, "step": 1817 }, { "epoch": 1.5179515725020873, "grad_norm": 0.35672444105148315, "learning_rate": 5.762719075469277e-06, "loss": 0.4458, "step": 1818 }, { "epoch": 1.5187865293626497, "grad_norm": 0.3519691526889801, "learning_rate": 5.757913971515166e-06, "loss": 0.4659, "step": 1819 }, { "epoch": 1.5196214862232118, "grad_norm": 0.31299376487731934, "learning_rate": 5.753108151012361e-06, "loss": 0.4308, "step": 1820 }, { "epoch": 1.520456443083774, "grad_norm": 0.3142243027687073, "learning_rate": 5.748301618504394e-06, "loss": 0.3948, "step": 1821 }, { "epoch": 1.5212913999443363, "grad_norm": 0.3367689251899719, "learning_rate": 5.743494378535464e-06, "loss": 0.3992, "step": 1822 }, { "epoch": 1.5221263568048984, "grad_norm": 0.3666345477104187, "learning_rate": 5.738686435650445e-06, "loss": 0.4875, "step": 1823 }, { "epoch": 1.5229613136654607, "grad_norm": 0.31503745913505554, "learning_rate": 5.733877794394873e-06, "loss": 0.401, "step": 1824 }, { "epoch": 1.5237962705260228, "grad_norm": 0.3313986659049988, "learning_rate": 5.729068459314944e-06, "loss": 0.4123, "step": 1825 }, { "epoch": 1.524631227386585, "grad_norm": 0.36270931363105774, "learning_rate": 5.724258434957507e-06, "loss": 0.4223, "step": 1826 }, { "epoch": 1.525466184247147, "grad_norm": 0.34861207008361816, "learning_rate": 5.719447725870071e-06, "loss": 0.4253, "step": 1827 }, { "epoch": 1.5263011411077094, "grad_norm": 0.3714761734008789, "learning_rate": 5.714636336600783e-06, "loss": 0.4686, "step": 1828 }, { "epoch": 1.5271360979682718, "grad_norm": 0.3421355187892914, "learning_rate": 5.709824271698438e-06, "loss": 0.4022, "step": 1829 }, { "epoch": 1.5279710548288339, "grad_norm": 0.35986921191215515, "learning_rate": 5.705011535712472e-06, "loss": 0.4539, "step": 1830 }, { "epoch": 1.528806011689396, "grad_norm": 0.33042848110198975, "learning_rate": 5.70019813319295e-06, "loss": 0.3935, "step": 1831 }, { "epoch": 1.5296409685499581, "grad_norm": 0.3587636649608612, "learning_rate": 5.695384068690569e-06, "loss": 0.4411, "step": 1832 }, { "epoch": 1.5304759254105205, "grad_norm": 0.35935425758361816, "learning_rate": 5.690569346756653e-06, "loss": 0.4431, "step": 1833 }, { "epoch": 1.5313108822710828, "grad_norm": 0.3460308909416199, "learning_rate": 5.685753971943149e-06, "loss": 0.4463, "step": 1834 }, { "epoch": 1.532145839131645, "grad_norm": 0.359637051820755, "learning_rate": 5.680937948802616e-06, "loss": 0.4138, "step": 1835 }, { "epoch": 1.532980795992207, "grad_norm": 0.34961411356925964, "learning_rate": 5.67612128188823e-06, "loss": 0.405, "step": 1836 }, { "epoch": 1.5338157528527692, "grad_norm": 0.328326016664505, "learning_rate": 5.671303975753777e-06, "loss": 0.4456, "step": 1837 }, { "epoch": 1.5346507097133315, "grad_norm": 0.37824636697769165, "learning_rate": 5.666486034953642e-06, "loss": 0.4461, "step": 1838 }, { "epoch": 1.5354856665738938, "grad_norm": 0.4088161289691925, "learning_rate": 5.661667464042816e-06, "loss": 0.4282, "step": 1839 }, { "epoch": 1.536320623434456, "grad_norm": 0.3903249502182007, "learning_rate": 5.6568482675768824e-06, "loss": 0.4577, "step": 1840 }, { "epoch": 1.537155580295018, "grad_norm": 0.3384934961795807, "learning_rate": 5.652028450112015e-06, "loss": 0.4163, "step": 1841 }, { "epoch": 1.5379905371555802, "grad_norm": 0.3373096287250519, "learning_rate": 5.647208016204976e-06, "loss": 0.3988, "step": 1842 }, { "epoch": 1.5388254940161425, "grad_norm": 0.3802594840526581, "learning_rate": 5.642386970413112e-06, "loss": 0.4261, "step": 1843 }, { "epoch": 1.5396604508767047, "grad_norm": 0.368760883808136, "learning_rate": 5.637565317294348e-06, "loss": 0.4522, "step": 1844 }, { "epoch": 1.540495407737267, "grad_norm": 0.3810878098011017, "learning_rate": 5.6327430614071794e-06, "loss": 0.4632, "step": 1845 }, { "epoch": 1.5413303645978291, "grad_norm": 0.3652700185775757, "learning_rate": 5.627920207310674e-06, "loss": 0.4423, "step": 1846 }, { "epoch": 1.5421653214583912, "grad_norm": 0.3675464987754822, "learning_rate": 5.623096759564468e-06, "loss": 0.4651, "step": 1847 }, { "epoch": 1.5430002783189534, "grad_norm": 0.34783023595809937, "learning_rate": 5.618272722728752e-06, "loss": 0.4241, "step": 1848 }, { "epoch": 1.5438352351795157, "grad_norm": 0.3227523863315582, "learning_rate": 5.6134481013642794e-06, "loss": 0.4044, "step": 1849 }, { "epoch": 1.544670192040078, "grad_norm": 0.36279383301734924, "learning_rate": 5.608622900032355e-06, "loss": 0.4318, "step": 1850 }, { "epoch": 1.5455051489006402, "grad_norm": 0.47879743576049805, "learning_rate": 5.603797123294832e-06, "loss": 0.4444, "step": 1851 }, { "epoch": 1.5463401057612023, "grad_norm": 0.32762229442596436, "learning_rate": 5.598970775714104e-06, "loss": 0.4076, "step": 1852 }, { "epoch": 1.5471750626217644, "grad_norm": 0.4166167378425598, "learning_rate": 5.594143861853109e-06, "loss": 0.4532, "step": 1853 }, { "epoch": 1.5480100194823267, "grad_norm": 0.36160513758659363, "learning_rate": 5.589316386275318e-06, "loss": 0.4088, "step": 1854 }, { "epoch": 1.548844976342889, "grad_norm": 0.4185416102409363, "learning_rate": 5.584488353544734e-06, "loss": 0.4484, "step": 1855 }, { "epoch": 1.5496799332034512, "grad_norm": 0.3967064917087555, "learning_rate": 5.579659768225885e-06, "loss": 0.387, "step": 1856 }, { "epoch": 1.5505148900640133, "grad_norm": 0.4083962142467499, "learning_rate": 5.574830634883826e-06, "loss": 0.4692, "step": 1857 }, { "epoch": 1.5513498469245754, "grad_norm": 0.33574599027633667, "learning_rate": 5.570000958084122e-06, "loss": 0.4169, "step": 1858 }, { "epoch": 1.5521848037851378, "grad_norm": 0.41693150997161865, "learning_rate": 5.5651707423928594e-06, "loss": 0.4252, "step": 1859 }, { "epoch": 1.5530197606457001, "grad_norm": 0.37447866797447205, "learning_rate": 5.560339992376633e-06, "loss": 0.4257, "step": 1860 }, { "epoch": 1.5538547175062623, "grad_norm": 0.3311242461204529, "learning_rate": 5.555508712602538e-06, "loss": 0.4316, "step": 1861 }, { "epoch": 1.5546896743668244, "grad_norm": 0.37261858582496643, "learning_rate": 5.550676907638176e-06, "loss": 0.4296, "step": 1862 }, { "epoch": 1.5555246312273865, "grad_norm": 0.3934102952480316, "learning_rate": 5.545844582051641e-06, "loss": 0.3868, "step": 1863 }, { "epoch": 1.5563595880879488, "grad_norm": 0.40219613909721375, "learning_rate": 5.5410117404115214e-06, "loss": 0.5454, "step": 1864 }, { "epoch": 1.557194544948511, "grad_norm": 0.3156687617301941, "learning_rate": 5.5361783872868935e-06, "loss": 0.3912, "step": 1865 }, { "epoch": 1.5580295018090733, "grad_norm": 0.41097497940063477, "learning_rate": 5.531344527247314e-06, "loss": 0.4606, "step": 1866 }, { "epoch": 1.5588644586696354, "grad_norm": 0.396454393863678, "learning_rate": 5.526510164862826e-06, "loss": 0.4121, "step": 1867 }, { "epoch": 1.5596994155301975, "grad_norm": 0.36050164699554443, "learning_rate": 5.52167530470394e-06, "loss": 0.4576, "step": 1868 }, { "epoch": 1.5605343723907596, "grad_norm": 0.38064834475517273, "learning_rate": 5.516839951341641e-06, "loss": 0.4224, "step": 1869 }, { "epoch": 1.561369329251322, "grad_norm": 0.38706690073013306, "learning_rate": 5.512004109347378e-06, "loss": 0.4334, "step": 1870 }, { "epoch": 1.5622042861118843, "grad_norm": 0.3633352816104889, "learning_rate": 5.507167783293065e-06, "loss": 0.3794, "step": 1871 }, { "epoch": 1.5630392429724465, "grad_norm": 0.4461795687675476, "learning_rate": 5.502330977751072e-06, "loss": 0.4589, "step": 1872 }, { "epoch": 1.5638741998330086, "grad_norm": 0.36286893486976624, "learning_rate": 5.497493697294221e-06, "loss": 0.4161, "step": 1873 }, { "epoch": 1.5647091566935707, "grad_norm": 0.40584665536880493, "learning_rate": 5.492655946495785e-06, "loss": 0.491, "step": 1874 }, { "epoch": 1.565544113554133, "grad_norm": 0.3504277467727661, "learning_rate": 5.487817729929482e-06, "loss": 0.3706, "step": 1875 }, { "epoch": 1.5663790704146954, "grad_norm": 0.38554561138153076, "learning_rate": 5.482979052169468e-06, "loss": 0.4028, "step": 1876 }, { "epoch": 1.5672140272752575, "grad_norm": 0.4630626440048218, "learning_rate": 5.478139917790337e-06, "loss": 0.4166, "step": 1877 }, { "epoch": 1.5680489841358196, "grad_norm": 0.34224557876586914, "learning_rate": 5.473300331367115e-06, "loss": 0.4225, "step": 1878 }, { "epoch": 1.5688839409963817, "grad_norm": 0.39339420199394226, "learning_rate": 5.468460297475252e-06, "loss": 0.4266, "step": 1879 }, { "epoch": 1.569718897856944, "grad_norm": 0.3753494620323181, "learning_rate": 5.463619820690624e-06, "loss": 0.4002, "step": 1880 }, { "epoch": 1.5705538547175064, "grad_norm": 0.37577491998672485, "learning_rate": 5.458778905589528e-06, "loss": 0.413, "step": 1881 }, { "epoch": 1.5713888115780685, "grad_norm": 0.3539081811904907, "learning_rate": 5.453937556748669e-06, "loss": 0.4093, "step": 1882 }, { "epoch": 1.5722237684386307, "grad_norm": 0.37287676334381104, "learning_rate": 5.449095778745169e-06, "loss": 0.4467, "step": 1883 }, { "epoch": 1.5730587252991928, "grad_norm": 0.3814004957675934, "learning_rate": 5.444253576156549e-06, "loss": 0.4392, "step": 1884 }, { "epoch": 1.5738936821597551, "grad_norm": 0.33997416496276855, "learning_rate": 5.439410953560738e-06, "loss": 0.3864, "step": 1885 }, { "epoch": 1.5747286390203172, "grad_norm": 0.3459224998950958, "learning_rate": 5.434567915536057e-06, "loss": 0.4267, "step": 1886 }, { "epoch": 1.5755635958808796, "grad_norm": 0.3969341814517975, "learning_rate": 5.429724466661221e-06, "loss": 0.4527, "step": 1887 }, { "epoch": 1.5763985527414417, "grad_norm": 0.3159419596195221, "learning_rate": 5.424880611515338e-06, "loss": 0.4133, "step": 1888 }, { "epoch": 1.5772335096020038, "grad_norm": 0.35443925857543945, "learning_rate": 5.42003635467789e-06, "loss": 0.4126, "step": 1889 }, { "epoch": 1.5780684664625662, "grad_norm": 0.30871450901031494, "learning_rate": 5.415191700728749e-06, "loss": 0.3865, "step": 1890 }, { "epoch": 1.5789034233231283, "grad_norm": 0.3449341952800751, "learning_rate": 5.410346654248158e-06, "loss": 0.445, "step": 1891 }, { "epoch": 1.5797383801836906, "grad_norm": 0.34389790892601013, "learning_rate": 5.4055012198167325e-06, "loss": 0.3976, "step": 1892 }, { "epoch": 1.5805733370442527, "grad_norm": 0.34610041975975037, "learning_rate": 5.400655402015451e-06, "loss": 0.446, "step": 1893 }, { "epoch": 1.5814082939048149, "grad_norm": 0.3780929744243622, "learning_rate": 5.395809205425658e-06, "loss": 0.4616, "step": 1894 }, { "epoch": 1.582243250765377, "grad_norm": 0.37403228878974915, "learning_rate": 5.390962634629058e-06, "loss": 0.3996, "step": 1895 }, { "epoch": 1.5830782076259393, "grad_norm": 0.3381528854370117, "learning_rate": 5.386115694207702e-06, "loss": 0.4223, "step": 1896 }, { "epoch": 1.5839131644865017, "grad_norm": 0.36906176805496216, "learning_rate": 5.381268388744e-06, "loss": 0.4134, "step": 1897 }, { "epoch": 1.5847481213470638, "grad_norm": 0.46011972427368164, "learning_rate": 5.3764207228207e-06, "loss": 0.4364, "step": 1898 }, { "epoch": 1.585583078207626, "grad_norm": 0.38621464371681213, "learning_rate": 5.371572701020891e-06, "loss": 0.4757, "step": 1899 }, { "epoch": 1.586418035068188, "grad_norm": 0.3454957902431488, "learning_rate": 5.366724327928002e-06, "loss": 0.4107, "step": 1900 }, { "epoch": 1.5872529919287504, "grad_norm": 0.3929802477359772, "learning_rate": 5.361875608125795e-06, "loss": 0.4456, "step": 1901 }, { "epoch": 1.5880879487893127, "grad_norm": 0.4066632390022278, "learning_rate": 5.357026546198354e-06, "loss": 0.4476, "step": 1902 }, { "epoch": 1.5889229056498748, "grad_norm": 0.3057642877101898, "learning_rate": 5.352177146730091e-06, "loss": 0.3943, "step": 1903 }, { "epoch": 1.589757862510437, "grad_norm": 0.30766838788986206, "learning_rate": 5.347327414305734e-06, "loss": 0.448, "step": 1904 }, { "epoch": 1.590592819370999, "grad_norm": 0.32481852173805237, "learning_rate": 5.342477353510333e-06, "loss": 0.4302, "step": 1905 }, { "epoch": 1.5914277762315614, "grad_norm": 0.35401418805122375, "learning_rate": 5.3376269689292374e-06, "loss": 0.4271, "step": 1906 }, { "epoch": 1.5922627330921237, "grad_norm": 0.3198167681694031, "learning_rate": 5.332776265148111e-06, "loss": 0.4357, "step": 1907 }, { "epoch": 1.5930976899526859, "grad_norm": 0.3053881824016571, "learning_rate": 5.327925246752917e-06, "loss": 0.4084, "step": 1908 }, { "epoch": 1.593932646813248, "grad_norm": 0.29355645179748535, "learning_rate": 5.323073918329914e-06, "loss": 0.4331, "step": 1909 }, { "epoch": 1.59476760367381, "grad_norm": 0.3260944187641144, "learning_rate": 5.318222284465658e-06, "loss": 0.4303, "step": 1910 }, { "epoch": 1.5956025605343724, "grad_norm": 0.3504318594932556, "learning_rate": 5.313370349746991e-06, "loss": 0.4171, "step": 1911 }, { "epoch": 1.5964375173949346, "grad_norm": 0.3131704330444336, "learning_rate": 5.308518118761039e-06, "loss": 0.4369, "step": 1912 }, { "epoch": 1.597272474255497, "grad_norm": 0.35853832960128784, "learning_rate": 5.303665596095208e-06, "loss": 0.4333, "step": 1913 }, { "epoch": 1.598107431116059, "grad_norm": 0.3414697051048279, "learning_rate": 5.298812786337182e-06, "loss": 0.4345, "step": 1914 }, { "epoch": 1.5989423879766211, "grad_norm": 0.3398708403110504, "learning_rate": 5.293959694074915e-06, "loss": 0.4155, "step": 1915 }, { "epoch": 1.5997773448371833, "grad_norm": 0.33270251750946045, "learning_rate": 5.289106323896627e-06, "loss": 0.4448, "step": 1916 }, { "epoch": 1.6006123016977456, "grad_norm": 0.34090009331703186, "learning_rate": 5.284252680390803e-06, "loss": 0.4292, "step": 1917 }, { "epoch": 1.601447258558308, "grad_norm": 0.3545594811439514, "learning_rate": 5.2793987681461864e-06, "loss": 0.4117, "step": 1918 }, { "epoch": 1.60228221541887, "grad_norm": 0.33120858669281006, "learning_rate": 5.274544591751771e-06, "loss": 0.4238, "step": 1919 }, { "epoch": 1.6031171722794322, "grad_norm": 0.36359503865242004, "learning_rate": 5.269690155796805e-06, "loss": 0.4447, "step": 1920 }, { "epoch": 1.6039521291399943, "grad_norm": 0.31687071919441223, "learning_rate": 5.2648354648707785e-06, "loss": 0.4237, "step": 1921 }, { "epoch": 1.6047870860005566, "grad_norm": 0.32106196880340576, "learning_rate": 5.259980523563426e-06, "loss": 0.4222, "step": 1922 }, { "epoch": 1.605622042861119, "grad_norm": 0.32490116357803345, "learning_rate": 5.255125336464714e-06, "loss": 0.3776, "step": 1923 }, { "epoch": 1.606456999721681, "grad_norm": 0.3727554678916931, "learning_rate": 5.250269908164845e-06, "loss": 0.4101, "step": 1924 }, { "epoch": 1.6072919565822432, "grad_norm": 0.37710893154144287, "learning_rate": 5.245414243254251e-06, "loss": 0.4458, "step": 1925 }, { "epoch": 1.6081269134428053, "grad_norm": 0.33087584376335144, "learning_rate": 5.240558346323582e-06, "loss": 0.4006, "step": 1926 }, { "epoch": 1.6089618703033677, "grad_norm": 0.4362911283969879, "learning_rate": 5.235702221963714e-06, "loss": 0.4961, "step": 1927 }, { "epoch": 1.60979682716393, "grad_norm": 0.34910181164741516, "learning_rate": 5.230845874765733e-06, "loss": 0.4001, "step": 1928 }, { "epoch": 1.6106317840244921, "grad_norm": 0.38714462518692017, "learning_rate": 5.225989309320937e-06, "loss": 0.4262, "step": 1929 }, { "epoch": 1.6114667408850543, "grad_norm": 0.353591650724411, "learning_rate": 5.221132530220831e-06, "loss": 0.4089, "step": 1930 }, { "epoch": 1.6123016977456164, "grad_norm": 0.38091251254081726, "learning_rate": 5.216275542057123e-06, "loss": 0.4674, "step": 1931 }, { "epoch": 1.6131366546061787, "grad_norm": 0.3758563697338104, "learning_rate": 5.2114183494217175e-06, "loss": 0.4304, "step": 1932 }, { "epoch": 1.6139716114667408, "grad_norm": 0.38357627391815186, "learning_rate": 5.20656095690671e-06, "loss": 0.4212, "step": 1933 }, { "epoch": 1.6148065683273032, "grad_norm": 0.284772664308548, "learning_rate": 5.2017033691043895e-06, "loss": 0.359, "step": 1934 }, { "epoch": 1.6156415251878653, "grad_norm": 0.3330126702785492, "learning_rate": 5.196845590607225e-06, "loss": 0.4315, "step": 1935 }, { "epoch": 1.6164764820484274, "grad_norm": 0.340209424495697, "learning_rate": 5.191987626007869e-06, "loss": 0.4315, "step": 1936 }, { "epoch": 1.6173114389089895, "grad_norm": 0.34216001629829407, "learning_rate": 5.187129479899152e-06, "loss": 0.4371, "step": 1937 }, { "epoch": 1.6181463957695519, "grad_norm": 0.3907829225063324, "learning_rate": 5.1822711568740706e-06, "loss": 0.4312, "step": 1938 }, { "epoch": 1.6189813526301142, "grad_norm": 0.3343060612678528, "learning_rate": 5.1774126615257905e-06, "loss": 0.4093, "step": 1939 }, { "epoch": 1.6198163094906763, "grad_norm": 0.37176546454429626, "learning_rate": 5.172553998447641e-06, "loss": 0.433, "step": 1940 }, { "epoch": 1.6206512663512385, "grad_norm": 0.3429142236709595, "learning_rate": 5.167695172233112e-06, "loss": 0.4157, "step": 1941 }, { "epoch": 1.6214862232118006, "grad_norm": 0.35078883171081543, "learning_rate": 5.162836187475846e-06, "loss": 0.4107, "step": 1942 }, { "epoch": 1.622321180072363, "grad_norm": 0.3706420660018921, "learning_rate": 5.157977048769632e-06, "loss": 0.46, "step": 1943 }, { "epoch": 1.6231561369329253, "grad_norm": 0.39558786153793335, "learning_rate": 5.153117760708411e-06, "loss": 0.4269, "step": 1944 }, { "epoch": 1.6239910937934874, "grad_norm": 0.31738004088401794, "learning_rate": 5.148258327886258e-06, "loss": 0.3731, "step": 1945 }, { "epoch": 1.6248260506540495, "grad_norm": 0.3525658845901489, "learning_rate": 5.143398754897391e-06, "loss": 0.4281, "step": 1946 }, { "epoch": 1.6256610075146116, "grad_norm": 0.35576876997947693, "learning_rate": 5.138539046336158e-06, "loss": 0.4285, "step": 1947 }, { "epoch": 1.626495964375174, "grad_norm": 0.3395101726055145, "learning_rate": 5.133679206797037e-06, "loss": 0.4044, "step": 1948 }, { "epoch": 1.6273309212357363, "grad_norm": 0.33635228872299194, "learning_rate": 5.128819240874625e-06, "loss": 0.41, "step": 1949 }, { "epoch": 1.6281658780962984, "grad_norm": 0.34870168566703796, "learning_rate": 5.1239591531636425e-06, "loss": 0.4596, "step": 1950 }, { "epoch": 1.6290008349568605, "grad_norm": 0.34783926606178284, "learning_rate": 5.119098948258926e-06, "loss": 0.4164, "step": 1951 }, { "epoch": 1.6298357918174227, "grad_norm": 0.33956190943717957, "learning_rate": 5.11423863075542e-06, "loss": 0.3999, "step": 1952 }, { "epoch": 1.630670748677985, "grad_norm": 0.34020376205444336, "learning_rate": 5.109378205248177e-06, "loss": 0.402, "step": 1953 }, { "epoch": 1.6315057055385471, "grad_norm": 0.36644262075424194, "learning_rate": 5.1045176763323505e-06, "loss": 0.3967, "step": 1954 }, { "epoch": 1.6323406623991095, "grad_norm": 0.32299256324768066, "learning_rate": 5.099657048603193e-06, "loss": 0.4431, "step": 1955 }, { "epoch": 1.6331756192596716, "grad_norm": 0.3148636817932129, "learning_rate": 5.094796326656048e-06, "loss": 0.4139, "step": 1956 }, { "epoch": 1.6340105761202337, "grad_norm": 0.32491156458854675, "learning_rate": 5.089935515086353e-06, "loss": 0.4223, "step": 1957 }, { "epoch": 1.634845532980796, "grad_norm": 0.3649137318134308, "learning_rate": 5.085074618489625e-06, "loss": 0.4381, "step": 1958 }, { "epoch": 1.6356804898413582, "grad_norm": 0.3737083375453949, "learning_rate": 5.080213641461464e-06, "loss": 0.4268, "step": 1959 }, { "epoch": 1.6365154467019205, "grad_norm": 0.3530406057834625, "learning_rate": 5.075352588597546e-06, "loss": 0.4595, "step": 1960 }, { "epoch": 1.6373504035624826, "grad_norm": 0.3585589826107025, "learning_rate": 5.070491464493619e-06, "loss": 0.4188, "step": 1961 }, { "epoch": 1.6381853604230447, "grad_norm": 0.34577885270118713, "learning_rate": 5.065630273745495e-06, "loss": 0.4164, "step": 1962 }, { "epoch": 1.6390203172836069, "grad_norm": 0.38934212923049927, "learning_rate": 5.0607690209490544e-06, "loss": 0.4634, "step": 1963 }, { "epoch": 1.6398552741441692, "grad_norm": 0.329761266708374, "learning_rate": 5.055907710700233e-06, "loss": 0.3976, "step": 1964 }, { "epoch": 1.6406902310047315, "grad_norm": 0.41027137637138367, "learning_rate": 5.05104634759502e-06, "loss": 0.4345, "step": 1965 }, { "epoch": 1.6415251878652937, "grad_norm": 0.34364086389541626, "learning_rate": 5.046184936229456e-06, "loss": 0.4126, "step": 1966 }, { "epoch": 1.6423601447258558, "grad_norm": 0.35932907462120056, "learning_rate": 5.041323481199629e-06, "loss": 0.4491, "step": 1967 }, { "epoch": 1.643195101586418, "grad_norm": 0.3351580798625946, "learning_rate": 5.036461987101666e-06, "loss": 0.3844, "step": 1968 }, { "epoch": 1.6440300584469802, "grad_norm": 0.3834809958934784, "learning_rate": 5.03160045853173e-06, "loss": 0.419, "step": 1969 }, { "epoch": 1.6448650153075426, "grad_norm": 0.37965503334999084, "learning_rate": 5.02673890008602e-06, "loss": 0.4109, "step": 1970 }, { "epoch": 1.6456999721681047, "grad_norm": 0.3291836380958557, "learning_rate": 5.021877316360759e-06, "loss": 0.4351, "step": 1971 }, { "epoch": 1.6465349290286668, "grad_norm": 0.37122640013694763, "learning_rate": 5.0170157119522e-06, "loss": 0.4429, "step": 1972 }, { "epoch": 1.647369885889229, "grad_norm": 0.3883429765701294, "learning_rate": 5.012154091456607e-06, "loss": 0.4069, "step": 1973 }, { "epoch": 1.6482048427497913, "grad_norm": 0.3664863407611847, "learning_rate": 5.007292459470267e-06, "loss": 0.4225, "step": 1974 }, { "epoch": 1.6490397996103536, "grad_norm": 0.3596246838569641, "learning_rate": 5.002430820589474e-06, "loss": 0.4102, "step": 1975 }, { "epoch": 1.6498747564709157, "grad_norm": 0.3610316216945648, "learning_rate": 4.9975691794105276e-06, "loss": 0.4001, "step": 1976 }, { "epoch": 1.6507097133314779, "grad_norm": 0.48287826776504517, "learning_rate": 4.9927075405297356e-06, "loss": 0.4837, "step": 1977 }, { "epoch": 1.65154467019204, "grad_norm": 0.32239797711372375, "learning_rate": 4.987845908543395e-06, "loss": 0.3753, "step": 1978 }, { "epoch": 1.6523796270526023, "grad_norm": 0.35475897789001465, "learning_rate": 4.982984288047802e-06, "loss": 0.4131, "step": 1979 }, { "epoch": 1.6532145839131644, "grad_norm": 0.4339171350002289, "learning_rate": 4.978122683639241e-06, "loss": 0.4282, "step": 1980 }, { "epoch": 1.6540495407737268, "grad_norm": 0.36862677335739136, "learning_rate": 4.9732610999139815e-06, "loss": 0.3958, "step": 1981 }, { "epoch": 1.654884497634289, "grad_norm": 0.3392747640609741, "learning_rate": 4.968399541468271e-06, "loss": 0.4149, "step": 1982 }, { "epoch": 1.655719454494851, "grad_norm": 0.3631640672683716, "learning_rate": 4.9635380128983354e-06, "loss": 0.4302, "step": 1983 }, { "epoch": 1.6565544113554131, "grad_norm": 0.3904920816421509, "learning_rate": 4.958676518800372e-06, "loss": 0.3852, "step": 1984 }, { "epoch": 1.6573893682159755, "grad_norm": 0.3725334107875824, "learning_rate": 4.953815063770546e-06, "loss": 0.4591, "step": 1985 }, { "epoch": 1.6582243250765378, "grad_norm": 0.3575487732887268, "learning_rate": 4.948953652404981e-06, "loss": 0.4441, "step": 1986 }, { "epoch": 1.6590592819371, "grad_norm": 0.3323996663093567, "learning_rate": 4.94409228929977e-06, "loss": 0.4027, "step": 1987 }, { "epoch": 1.659894238797662, "grad_norm": 0.3714267313480377, "learning_rate": 4.939230979050947e-06, "loss": 0.4312, "step": 1988 }, { "epoch": 1.6607291956582242, "grad_norm": 0.3395485579967499, "learning_rate": 4.934369726254506e-06, "loss": 0.4016, "step": 1989 }, { "epoch": 1.6615641525187865, "grad_norm": 0.34637805819511414, "learning_rate": 4.9295085355063835e-06, "loss": 0.4629, "step": 1990 }, { "epoch": 1.6623991093793489, "grad_norm": 0.3552546203136444, "learning_rate": 4.924647411402455e-06, "loss": 0.439, "step": 1991 }, { "epoch": 1.663234066239911, "grad_norm": 0.37970343232154846, "learning_rate": 4.919786358538537e-06, "loss": 0.4711, "step": 1992 }, { "epoch": 1.664069023100473, "grad_norm": 0.33893993496894836, "learning_rate": 4.914925381510376e-06, "loss": 0.4542, "step": 1993 }, { "epoch": 1.6649039799610352, "grad_norm": 0.3268590569496155, "learning_rate": 4.910064484913648e-06, "loss": 0.3903, "step": 1994 }, { "epoch": 1.6657389368215976, "grad_norm": 0.379006028175354, "learning_rate": 4.905203673343954e-06, "loss": 0.4115, "step": 1995 }, { "epoch": 1.66657389368216, "grad_norm": 0.425054132938385, "learning_rate": 4.900342951396808e-06, "loss": 0.4461, "step": 1996 }, { "epoch": 1.667408850542722, "grad_norm": 0.3604053854942322, "learning_rate": 4.895482323667652e-06, "loss": 0.4293, "step": 1997 }, { "epoch": 1.6682438074032842, "grad_norm": 0.4345487654209137, "learning_rate": 4.890621794751825e-06, "loss": 0.4379, "step": 1998 }, { "epoch": 1.6690787642638463, "grad_norm": 0.31838148832321167, "learning_rate": 4.885761369244581e-06, "loss": 0.3869, "step": 1999 }, { "epoch": 1.6699137211244086, "grad_norm": 0.3523746430873871, "learning_rate": 4.880901051741075e-06, "loss": 0.4456, "step": 2000 }, { "epoch": 1.6707486779849707, "grad_norm": 0.36456334590911865, "learning_rate": 4.8760408468363575e-06, "loss": 0.3868, "step": 2001 }, { "epoch": 1.671583634845533, "grad_norm": 0.38105544447898865, "learning_rate": 4.871180759125377e-06, "loss": 0.4575, "step": 2002 }, { "epoch": 1.6724185917060952, "grad_norm": 0.3561810851097107, "learning_rate": 4.866320793202964e-06, "loss": 0.4017, "step": 2003 }, { "epoch": 1.6732535485666573, "grad_norm": 0.3730800151824951, "learning_rate": 4.861460953663842e-06, "loss": 0.4205, "step": 2004 }, { "epoch": 1.6740885054272194, "grad_norm": 0.37747299671173096, "learning_rate": 4.856601245102611e-06, "loss": 0.4511, "step": 2005 }, { "epoch": 1.6749234622877818, "grad_norm": 0.3362569212913513, "learning_rate": 4.851741672113743e-06, "loss": 0.3981, "step": 2006 }, { "epoch": 1.6757584191483441, "grad_norm": 0.37621358036994934, "learning_rate": 4.8468822392915925e-06, "loss": 0.4123, "step": 2007 }, { "epoch": 1.6765933760089062, "grad_norm": 0.39770787954330444, "learning_rate": 4.8420229512303696e-06, "loss": 0.4156, "step": 2008 }, { "epoch": 1.6774283328694684, "grad_norm": 0.34706565737724304, "learning_rate": 4.8371638125241555e-06, "loss": 0.4123, "step": 2009 }, { "epoch": 1.6782632897300305, "grad_norm": 0.3517758250236511, "learning_rate": 4.8323048277668885e-06, "loss": 0.4057, "step": 2010 }, { "epoch": 1.6790982465905928, "grad_norm": 0.3772435486316681, "learning_rate": 4.8274460015523595e-06, "loss": 0.4101, "step": 2011 }, { "epoch": 1.6799332034511552, "grad_norm": 0.3791804909706116, "learning_rate": 4.822587338474211e-06, "loss": 0.413, "step": 2012 }, { "epoch": 1.6807681603117173, "grad_norm": 0.32566654682159424, "learning_rate": 4.81772884312593e-06, "loss": 0.3888, "step": 2013 }, { "epoch": 1.6816031171722794, "grad_norm": 0.3415806293487549, "learning_rate": 4.812870520100848e-06, "loss": 0.4257, "step": 2014 }, { "epoch": 1.6824380740328415, "grad_norm": 0.312272846698761, "learning_rate": 4.808012373992132e-06, "loss": 0.3832, "step": 2015 }, { "epoch": 1.6832730308934039, "grad_norm": 0.35398000478744507, "learning_rate": 4.803154409392776e-06, "loss": 0.4726, "step": 2016 }, { "epoch": 1.6841079877539662, "grad_norm": 0.3544261157512665, "learning_rate": 4.798296630895614e-06, "loss": 0.4257, "step": 2017 }, { "epoch": 1.6849429446145283, "grad_norm": 0.349118173122406, "learning_rate": 4.793439043093292e-06, "loss": 0.4057, "step": 2018 }, { "epoch": 1.6857779014750904, "grad_norm": 0.3374156653881073, "learning_rate": 4.788581650578285e-06, "loss": 0.4018, "step": 2019 }, { "epoch": 1.6866128583356526, "grad_norm": 0.3941425681114197, "learning_rate": 4.783724457942878e-06, "loss": 0.49, "step": 2020 }, { "epoch": 1.687447815196215, "grad_norm": 0.3208763301372528, "learning_rate": 4.778867469779169e-06, "loss": 0.3497, "step": 2021 }, { "epoch": 1.688282772056777, "grad_norm": 0.33282706141471863, "learning_rate": 4.774010690679065e-06, "loss": 0.4124, "step": 2022 }, { "epoch": 1.6891177289173394, "grad_norm": 0.3525705635547638, "learning_rate": 4.7691541252342685e-06, "loss": 0.4333, "step": 2023 }, { "epoch": 1.6899526857779015, "grad_norm": 0.337832510471344, "learning_rate": 4.764297778036289e-06, "loss": 0.4169, "step": 2024 }, { "epoch": 1.6907876426384636, "grad_norm": 0.3368406891822815, "learning_rate": 4.759441653676419e-06, "loss": 0.4402, "step": 2025 }, { "epoch": 1.691622599499026, "grad_norm": 0.365346223115921, "learning_rate": 4.75458575674575e-06, "loss": 0.3954, "step": 2026 }, { "epoch": 1.692457556359588, "grad_norm": 0.42089319229125977, "learning_rate": 4.749730091835156e-06, "loss": 0.4659, "step": 2027 }, { "epoch": 1.6932925132201504, "grad_norm": 0.3225705325603485, "learning_rate": 4.744874663535289e-06, "loss": 0.3573, "step": 2028 }, { "epoch": 1.6941274700807125, "grad_norm": 0.3151475489139557, "learning_rate": 4.740019476436577e-06, "loss": 0.3947, "step": 2029 }, { "epoch": 1.6949624269412746, "grad_norm": 0.35334470868110657, "learning_rate": 4.735164535129222e-06, "loss": 0.4814, "step": 2030 }, { "epoch": 1.6957973838018368, "grad_norm": 0.3108939230442047, "learning_rate": 4.7303098442031956e-06, "loss": 0.4115, "step": 2031 }, { "epoch": 1.696632340662399, "grad_norm": 0.3492731750011444, "learning_rate": 4.72545540824823e-06, "loss": 0.4497, "step": 2032 }, { "epoch": 1.6974672975229614, "grad_norm": 0.33415913581848145, "learning_rate": 4.720601231853814e-06, "loss": 0.4082, "step": 2033 }, { "epoch": 1.6983022543835236, "grad_norm": 0.32531625032424927, "learning_rate": 4.715747319609199e-06, "loss": 0.4181, "step": 2034 }, { "epoch": 1.6991372112440857, "grad_norm": 0.3147023320198059, "learning_rate": 4.710893676103375e-06, "loss": 0.4444, "step": 2035 }, { "epoch": 1.6999721681046478, "grad_norm": 0.32169079780578613, "learning_rate": 4.706040305925087e-06, "loss": 0.3577, "step": 2036 }, { "epoch": 1.7008071249652101, "grad_norm": 0.34920260310173035, "learning_rate": 4.70118721366282e-06, "loss": 0.4769, "step": 2037 }, { "epoch": 1.7016420818257725, "grad_norm": 0.3368724286556244, "learning_rate": 4.696334403904794e-06, "loss": 0.4007, "step": 2038 }, { "epoch": 1.7024770386863346, "grad_norm": 0.32136303186416626, "learning_rate": 4.691481881238964e-06, "loss": 0.4265, "step": 2039 }, { "epoch": 1.7033119955468967, "grad_norm": 0.3516751825809479, "learning_rate": 4.68662965025301e-06, "loss": 0.4082, "step": 2040 }, { "epoch": 1.7041469524074588, "grad_norm": 0.3660966157913208, "learning_rate": 4.681777715534343e-06, "loss": 0.4967, "step": 2041 }, { "epoch": 1.7049819092680212, "grad_norm": 0.32211339473724365, "learning_rate": 4.676926081670087e-06, "loss": 0.4112, "step": 2042 }, { "epoch": 1.7058168661285835, "grad_norm": 0.33570241928100586, "learning_rate": 4.6720747532470845e-06, "loss": 0.4742, "step": 2043 }, { "epoch": 1.7066518229891456, "grad_norm": 0.3409712016582489, "learning_rate": 4.667223734851891e-06, "loss": 0.4005, "step": 2044 }, { "epoch": 1.7074867798497078, "grad_norm": 0.3431800603866577, "learning_rate": 4.662373031070765e-06, "loss": 0.4218, "step": 2045 }, { "epoch": 1.7083217367102699, "grad_norm": 0.3443627953529358, "learning_rate": 4.657522646489669e-06, "loss": 0.4643, "step": 2046 }, { "epoch": 1.7091566935708322, "grad_norm": 0.32822251319885254, "learning_rate": 4.652672585694267e-06, "loss": 0.3962, "step": 2047 }, { "epoch": 1.7099916504313943, "grad_norm": 0.35139337182044983, "learning_rate": 4.647822853269911e-06, "loss": 0.4395, "step": 2048 }, { "epoch": 1.7108266072919567, "grad_norm": 0.326706200838089, "learning_rate": 4.6429734538016485e-06, "loss": 0.4242, "step": 2049 }, { "epoch": 1.7116615641525188, "grad_norm": 0.34919556975364685, "learning_rate": 4.638124391874207e-06, "loss": 0.4202, "step": 2050 }, { "epoch": 1.712496521013081, "grad_norm": 0.30027157068252563, "learning_rate": 4.633275672071998e-06, "loss": 0.4219, "step": 2051 }, { "epoch": 1.713331477873643, "grad_norm": 0.35033535957336426, "learning_rate": 4.628427298979111e-06, "loss": 0.4381, "step": 2052 }, { "epoch": 1.7141664347342054, "grad_norm": 0.36598849296569824, "learning_rate": 4.623579277179302e-06, "loss": 0.4128, "step": 2053 }, { "epoch": 1.7150013915947677, "grad_norm": 0.3485713303089142, "learning_rate": 4.618731611256003e-06, "loss": 0.4432, "step": 2054 }, { "epoch": 1.7158363484553298, "grad_norm": 0.29074057936668396, "learning_rate": 4.613884305792299e-06, "loss": 0.3842, "step": 2055 }, { "epoch": 1.716671305315892, "grad_norm": 0.353759229183197, "learning_rate": 4.609037365370944e-06, "loss": 0.4587, "step": 2056 }, { "epoch": 1.717506262176454, "grad_norm": 0.3331258296966553, "learning_rate": 4.6041907945743436e-06, "loss": 0.4559, "step": 2057 }, { "epoch": 1.7183412190370164, "grad_norm": 0.30563151836395264, "learning_rate": 4.5993445979845505e-06, "loss": 0.4286, "step": 2058 }, { "epoch": 1.7191761758975788, "grad_norm": 0.32027071714401245, "learning_rate": 4.59449878018327e-06, "loss": 0.4289, "step": 2059 }, { "epoch": 1.7200111327581409, "grad_norm": 0.3304319679737091, "learning_rate": 4.5896533457518425e-06, "loss": 0.3991, "step": 2060 }, { "epoch": 1.720846089618703, "grad_norm": 0.3221267759799957, "learning_rate": 4.5848082992712516e-06, "loss": 0.451, "step": 2061 }, { "epoch": 1.7216810464792651, "grad_norm": 0.31960198283195496, "learning_rate": 4.579963645322112e-06, "loss": 0.4373, "step": 2062 }, { "epoch": 1.7225160033398275, "grad_norm": 0.3057258725166321, "learning_rate": 4.575119388484665e-06, "loss": 0.427, "step": 2063 }, { "epoch": 1.7233509602003898, "grad_norm": 0.3274897336959839, "learning_rate": 4.570275533338781e-06, "loss": 0.3978, "step": 2064 }, { "epoch": 1.724185917060952, "grad_norm": 0.3548074960708618, "learning_rate": 4.565432084463946e-06, "loss": 0.4674, "step": 2065 }, { "epoch": 1.725020873921514, "grad_norm": 0.35200071334838867, "learning_rate": 4.560589046439263e-06, "loss": 0.4558, "step": 2066 }, { "epoch": 1.7258558307820762, "grad_norm": 0.31321144104003906, "learning_rate": 4.555746423843452e-06, "loss": 0.4087, "step": 2067 }, { "epoch": 1.7266907876426385, "grad_norm": 0.31088799238204956, "learning_rate": 4.550904221254832e-06, "loss": 0.4129, "step": 2068 }, { "epoch": 1.7275257445032006, "grad_norm": 0.3436810076236725, "learning_rate": 4.546062443251332e-06, "loss": 0.4584, "step": 2069 }, { "epoch": 1.728360701363763, "grad_norm": 0.3265104293823242, "learning_rate": 4.541221094410473e-06, "loss": 0.4451, "step": 2070 }, { "epoch": 1.729195658224325, "grad_norm": 0.3359051048755646, "learning_rate": 4.536380179309376e-06, "loss": 0.429, "step": 2071 }, { "epoch": 1.7300306150848872, "grad_norm": 0.33693617582321167, "learning_rate": 4.53153970252475e-06, "loss": 0.4023, "step": 2072 }, { "epoch": 1.7308655719454493, "grad_norm": 0.30480071902275085, "learning_rate": 4.526699668632887e-06, "loss": 0.4209, "step": 2073 }, { "epoch": 1.7317005288060117, "grad_norm": 0.3390927314758301, "learning_rate": 4.521860082209666e-06, "loss": 0.4679, "step": 2074 }, { "epoch": 1.732535485666574, "grad_norm": 0.3228178918361664, "learning_rate": 4.5170209478305336e-06, "loss": 0.3468, "step": 2075 }, { "epoch": 1.7333704425271361, "grad_norm": 0.32701677083969116, "learning_rate": 4.512182270070519e-06, "loss": 0.4174, "step": 2076 }, { "epoch": 1.7342053993876982, "grad_norm": 0.3326520323753357, "learning_rate": 4.507344053504217e-06, "loss": 0.4019, "step": 2077 }, { "epoch": 1.7350403562482604, "grad_norm": 0.30837902426719666, "learning_rate": 4.502506302705781e-06, "loss": 0.4071, "step": 2078 }, { "epoch": 1.7358753131088227, "grad_norm": 0.3535727858543396, "learning_rate": 4.497669022248931e-06, "loss": 0.4535, "step": 2079 }, { "epoch": 1.736710269969385, "grad_norm": 0.32382434606552124, "learning_rate": 4.492832216706937e-06, "loss": 0.4157, "step": 2080 }, { "epoch": 1.7375452268299472, "grad_norm": 0.3226712644100189, "learning_rate": 4.487995890652623e-06, "loss": 0.3979, "step": 2081 }, { "epoch": 1.7383801836905093, "grad_norm": 0.3595203161239624, "learning_rate": 4.483160048658361e-06, "loss": 0.3989, "step": 2082 }, { "epoch": 1.7392151405510714, "grad_norm": 0.328548401594162, "learning_rate": 4.478324695296061e-06, "loss": 0.4232, "step": 2083 }, { "epoch": 1.7400500974116337, "grad_norm": 0.3627433776855469, "learning_rate": 4.473489835137176e-06, "loss": 0.4565, "step": 2084 }, { "epoch": 1.740885054272196, "grad_norm": 0.324698269367218, "learning_rate": 4.468655472752687e-06, "loss": 0.4052, "step": 2085 }, { "epoch": 1.7417200111327582, "grad_norm": 0.38234850764274597, "learning_rate": 4.463821612713108e-06, "loss": 0.4486, "step": 2086 }, { "epoch": 1.7425549679933203, "grad_norm": 0.34574222564697266, "learning_rate": 4.45898825958848e-06, "loss": 0.4094, "step": 2087 }, { "epoch": 1.7433899248538824, "grad_norm": 0.38574638962745667, "learning_rate": 4.45415541794836e-06, "loss": 0.4311, "step": 2088 }, { "epoch": 1.7442248817144448, "grad_norm": 0.36159300804138184, "learning_rate": 4.449323092361826e-06, "loss": 0.4157, "step": 2089 }, { "epoch": 1.745059838575007, "grad_norm": 0.3228329122066498, "learning_rate": 4.444491287397462e-06, "loss": 0.4008, "step": 2090 }, { "epoch": 1.7458947954355692, "grad_norm": 0.36192587018013, "learning_rate": 4.4396600076233674e-06, "loss": 0.4305, "step": 2091 }, { "epoch": 1.7467297522961314, "grad_norm": 0.38864821195602417, "learning_rate": 4.434829257607141e-06, "loss": 0.4238, "step": 2092 }, { "epoch": 1.7475647091566935, "grad_norm": 0.3545505702495575, "learning_rate": 4.429999041915879e-06, "loss": 0.4262, "step": 2093 }, { "epoch": 1.7483996660172558, "grad_norm": 0.3726899325847626, "learning_rate": 4.4251693651161775e-06, "loss": 0.4095, "step": 2094 }, { "epoch": 1.749234622877818, "grad_norm": 0.3759346008300781, "learning_rate": 4.420340231774116e-06, "loss": 0.4958, "step": 2095 }, { "epoch": 1.7500695797383803, "grad_norm": 0.31748688220977783, "learning_rate": 4.415511646455267e-06, "loss": 0.4075, "step": 2096 }, { "epoch": 1.7509045365989424, "grad_norm": 0.36711546778678894, "learning_rate": 4.410683613724684e-06, "loss": 0.4406, "step": 2097 }, { "epoch": 1.7517394934595045, "grad_norm": 0.35758402943611145, "learning_rate": 4.405856138146892e-06, "loss": 0.4106, "step": 2098 }, { "epoch": 1.7525744503200666, "grad_norm": 0.32199642062187195, "learning_rate": 4.401029224285897e-06, "loss": 0.4095, "step": 2099 }, { "epoch": 1.753409407180629, "grad_norm": 0.3315465450286865, "learning_rate": 4.396202876705169e-06, "loss": 0.4183, "step": 2100 }, { "epoch": 1.7542443640411913, "grad_norm": 0.4063730537891388, "learning_rate": 4.3913770999676446e-06, "loss": 0.4672, "step": 2101 }, { "epoch": 1.7550793209017534, "grad_norm": 0.41117537021636963, "learning_rate": 4.386551898635722e-06, "loss": 0.437, "step": 2102 }, { "epoch": 1.7559142777623156, "grad_norm": 0.3241084814071655, "learning_rate": 4.381727277271248e-06, "loss": 0.3997, "step": 2103 }, { "epoch": 1.7567492346228777, "grad_norm": 0.3372277021408081, "learning_rate": 4.376903240435535e-06, "loss": 0.4079, "step": 2104 }, { "epoch": 1.75758419148344, "grad_norm": 0.3993167579174042, "learning_rate": 4.372079792689327e-06, "loss": 0.4754, "step": 2105 }, { "epoch": 1.7584191483440024, "grad_norm": 0.32958129048347473, "learning_rate": 4.367256938592822e-06, "loss": 0.393, "step": 2106 }, { "epoch": 1.7592541052045645, "grad_norm": 0.3147376477718353, "learning_rate": 4.362434682705653e-06, "loss": 0.4413, "step": 2107 }, { "epoch": 1.7600890620651266, "grad_norm": 0.3799014687538147, "learning_rate": 4.357613029586888e-06, "loss": 0.4322, "step": 2108 }, { "epoch": 1.7609240189256887, "grad_norm": 0.3386380970478058, "learning_rate": 4.352791983795025e-06, "loss": 0.3883, "step": 2109 }, { "epoch": 1.761758975786251, "grad_norm": 0.3292320966720581, "learning_rate": 4.347971549887987e-06, "loss": 0.3993, "step": 2110 }, { "epoch": 1.7625939326468134, "grad_norm": 0.37684500217437744, "learning_rate": 4.343151732423121e-06, "loss": 0.4194, "step": 2111 }, { "epoch": 1.7634288895073755, "grad_norm": 0.3338208496570587, "learning_rate": 4.338332535957186e-06, "loss": 0.4228, "step": 2112 }, { "epoch": 1.7642638463679376, "grad_norm": 0.36370983719825745, "learning_rate": 4.333513965046358e-06, "loss": 0.4611, "step": 2113 }, { "epoch": 1.7650988032284998, "grad_norm": 0.34623032808303833, "learning_rate": 4.328696024246225e-06, "loss": 0.4215, "step": 2114 }, { "epoch": 1.765933760089062, "grad_norm": 0.3758486211299896, "learning_rate": 4.323878718111771e-06, "loss": 0.3733, "step": 2115 }, { "epoch": 1.7667687169496242, "grad_norm": 0.38890960812568665, "learning_rate": 4.319062051197387e-06, "loss": 0.4438, "step": 2116 }, { "epoch": 1.7676036738101866, "grad_norm": 0.3685013949871063, "learning_rate": 4.3142460280568535e-06, "loss": 0.4301, "step": 2117 }, { "epoch": 1.7684386306707487, "grad_norm": 0.3823639750480652, "learning_rate": 4.309430653243347e-06, "loss": 0.4291, "step": 2118 }, { "epoch": 1.7692735875313108, "grad_norm": 0.323375940322876, "learning_rate": 4.304615931309433e-06, "loss": 0.3417, "step": 2119 }, { "epoch": 1.770108544391873, "grad_norm": 0.350719690322876, "learning_rate": 4.299801866807051e-06, "loss": 0.4653, "step": 2120 }, { "epoch": 1.7709435012524353, "grad_norm": 0.3659028708934784, "learning_rate": 4.29498846428753e-06, "loss": 0.4395, "step": 2121 }, { "epoch": 1.7717784581129976, "grad_norm": 0.3819199502468109, "learning_rate": 4.2901757283015624e-06, "loss": 0.4365, "step": 2122 }, { "epoch": 1.7726134149735597, "grad_norm": 0.35544919967651367, "learning_rate": 4.285363663399217e-06, "loss": 0.4346, "step": 2123 }, { "epoch": 1.7734483718341219, "grad_norm": 0.3255147635936737, "learning_rate": 4.280552274129932e-06, "loss": 0.4137, "step": 2124 }, { "epoch": 1.774283328694684, "grad_norm": 0.3001202940940857, "learning_rate": 4.275741565042493e-06, "loss": 0.3906, "step": 2125 }, { "epoch": 1.7751182855552463, "grad_norm": 0.3474373519420624, "learning_rate": 4.27093154068506e-06, "loss": 0.4259, "step": 2126 }, { "epoch": 1.7759532424158087, "grad_norm": 0.3313716650009155, "learning_rate": 4.2661222056051285e-06, "loss": 0.4616, "step": 2127 }, { "epoch": 1.7767881992763708, "grad_norm": 0.3201616108417511, "learning_rate": 4.261313564349556e-06, "loss": 0.4144, "step": 2128 }, { "epoch": 1.777623156136933, "grad_norm": 0.3398187458515167, "learning_rate": 4.256505621464537e-06, "loss": 0.4346, "step": 2129 }, { "epoch": 1.778458112997495, "grad_norm": 0.36959484219551086, "learning_rate": 4.251698381495608e-06, "loss": 0.4349, "step": 2130 }, { "epoch": 1.7792930698580574, "grad_norm": 0.36238500475883484, "learning_rate": 4.246891848987641e-06, "loss": 0.4323, "step": 2131 }, { "epoch": 1.7801280267186197, "grad_norm": 0.3427729606628418, "learning_rate": 4.242086028484836e-06, "loss": 0.3973, "step": 2132 }, { "epoch": 1.7809629835791818, "grad_norm": 0.3551260232925415, "learning_rate": 4.237280924530723e-06, "loss": 0.4338, "step": 2133 }, { "epoch": 1.781797940439744, "grad_norm": 0.3406520187854767, "learning_rate": 4.2324765416681574e-06, "loss": 0.3935, "step": 2134 }, { "epoch": 1.782632897300306, "grad_norm": 0.34763282537460327, "learning_rate": 4.227672884439306e-06, "loss": 0.4151, "step": 2135 }, { "epoch": 1.7834678541608684, "grad_norm": 0.37360435724258423, "learning_rate": 4.222869957385653e-06, "loss": 0.4269, "step": 2136 }, { "epoch": 1.7843028110214305, "grad_norm": 0.30012276768684387, "learning_rate": 4.218067765047993e-06, "loss": 0.4167, "step": 2137 }, { "epoch": 1.7851377678819929, "grad_norm": 0.3347305655479431, "learning_rate": 4.213266311966426e-06, "loss": 0.4852, "step": 2138 }, { "epoch": 1.785972724742555, "grad_norm": 0.3613339364528656, "learning_rate": 4.208465602680352e-06, "loss": 0.4078, "step": 2139 }, { "epoch": 1.786807681603117, "grad_norm": 0.358439564704895, "learning_rate": 4.203665641728467e-06, "loss": 0.393, "step": 2140 }, { "epoch": 1.7876426384636792, "grad_norm": 0.3340969383716583, "learning_rate": 4.198866433648763e-06, "loss": 0.4167, "step": 2141 }, { "epoch": 1.7884775953242416, "grad_norm": 0.3320463299751282, "learning_rate": 4.194067982978516e-06, "loss": 0.4087, "step": 2142 }, { "epoch": 1.789312552184804, "grad_norm": 0.3395244777202606, "learning_rate": 4.189270294254284e-06, "loss": 0.4174, "step": 2143 }, { "epoch": 1.790147509045366, "grad_norm": 0.3697870969772339, "learning_rate": 4.184473372011918e-06, "loss": 0.4467, "step": 2144 }, { "epoch": 1.7909824659059281, "grad_norm": 0.3578076660633087, "learning_rate": 4.179677220786527e-06, "loss": 0.427, "step": 2145 }, { "epoch": 1.7918174227664903, "grad_norm": 0.3110730051994324, "learning_rate": 4.174881845112501e-06, "loss": 0.3817, "step": 2146 }, { "epoch": 1.7926523796270526, "grad_norm": 0.38355302810668945, "learning_rate": 4.170087249523494e-06, "loss": 0.4418, "step": 2147 }, { "epoch": 1.793487336487615, "grad_norm": 0.32311394810676575, "learning_rate": 4.165293438552425e-06, "loss": 0.3833, "step": 2148 }, { "epoch": 1.794322293348177, "grad_norm": 0.3056078851222992, "learning_rate": 4.160500416731468e-06, "loss": 0.425, "step": 2149 }, { "epoch": 1.7951572502087392, "grad_norm": 0.37316659092903137, "learning_rate": 4.155708188592051e-06, "loss": 0.4572, "step": 2150 }, { "epoch": 1.7959922070693013, "grad_norm": 0.32566145062446594, "learning_rate": 4.150916758664857e-06, "loss": 0.4228, "step": 2151 }, { "epoch": 1.7968271639298636, "grad_norm": 0.3004627525806427, "learning_rate": 4.146126131479804e-06, "loss": 0.4199, "step": 2152 }, { "epoch": 1.797662120790426, "grad_norm": 0.33122968673706055, "learning_rate": 4.141336311566062e-06, "loss": 0.4184, "step": 2153 }, { "epoch": 1.798497077650988, "grad_norm": 0.3077215850353241, "learning_rate": 4.136547303452033e-06, "loss": 0.4177, "step": 2154 }, { "epoch": 1.7993320345115502, "grad_norm": 0.32298514246940613, "learning_rate": 4.131759111665349e-06, "loss": 0.4493, "step": 2155 }, { "epoch": 1.8001669913721123, "grad_norm": 0.3282623887062073, "learning_rate": 4.126971740732875e-06, "loss": 0.4126, "step": 2156 }, { "epoch": 1.8010019482326747, "grad_norm": 0.32042574882507324, "learning_rate": 4.122185195180696e-06, "loss": 0.4052, "step": 2157 }, { "epoch": 1.8018369050932368, "grad_norm": 0.33264338970184326, "learning_rate": 4.117399479534119e-06, "loss": 0.4172, "step": 2158 }, { "epoch": 1.8026718619537991, "grad_norm": 0.3318585157394409, "learning_rate": 4.112614598317667e-06, "loss": 0.4248, "step": 2159 }, { "epoch": 1.8035068188143613, "grad_norm": 0.3430902063846588, "learning_rate": 4.107830556055072e-06, "loss": 0.438, "step": 2160 }, { "epoch": 1.8043417756749234, "grad_norm": 0.30758097767829895, "learning_rate": 4.103047357269274e-06, "loss": 0.4013, "step": 2161 }, { "epoch": 1.8051767325354857, "grad_norm": 0.3420552611351013, "learning_rate": 4.098265006482415e-06, "loss": 0.4515, "step": 2162 }, { "epoch": 1.8060116893960478, "grad_norm": 0.3245738744735718, "learning_rate": 4.093483508215834e-06, "loss": 0.4037, "step": 2163 }, { "epoch": 1.8068466462566102, "grad_norm": 0.3123556673526764, "learning_rate": 4.08870286699007e-06, "loss": 0.4085, "step": 2164 }, { "epoch": 1.8076816031171723, "grad_norm": 0.33183637261390686, "learning_rate": 4.083923087324843e-06, "loss": 0.4318, "step": 2165 }, { "epoch": 1.8085165599777344, "grad_norm": 0.3193284571170807, "learning_rate": 4.079144173739065e-06, "loss": 0.4418, "step": 2166 }, { "epoch": 1.8093515168382965, "grad_norm": 0.3377665877342224, "learning_rate": 4.074366130750825e-06, "loss": 0.4739, "step": 2167 }, { "epoch": 1.8101864736988589, "grad_norm": 0.32067328691482544, "learning_rate": 4.0695889628773935e-06, "loss": 0.4161, "step": 2168 }, { "epoch": 1.8110214305594212, "grad_norm": 0.3145309388637543, "learning_rate": 4.06481267463521e-06, "loss": 0.4165, "step": 2169 }, { "epoch": 1.8118563874199833, "grad_norm": 0.3170306086540222, "learning_rate": 4.060037270539883e-06, "loss": 0.4296, "step": 2170 }, { "epoch": 1.8126913442805455, "grad_norm": 0.3248929977416992, "learning_rate": 4.055262755106187e-06, "loss": 0.3674, "step": 2171 }, { "epoch": 1.8135263011411076, "grad_norm": 0.3332226276397705, "learning_rate": 4.050489132848053e-06, "loss": 0.4544, "step": 2172 }, { "epoch": 1.81436125800167, "grad_norm": 0.32795003056526184, "learning_rate": 4.045716408278567e-06, "loss": 0.442, "step": 2173 }, { "epoch": 1.8151962148622323, "grad_norm": 0.3286290168762207, "learning_rate": 4.040944585909977e-06, "loss": 0.434, "step": 2174 }, { "epoch": 1.8160311717227944, "grad_norm": 0.31550920009613037, "learning_rate": 4.036173670253662e-06, "loss": 0.4035, "step": 2175 }, { "epoch": 1.8168661285833565, "grad_norm": 0.36194270849227905, "learning_rate": 4.031403665820155e-06, "loss": 0.4163, "step": 2176 }, { "epoch": 1.8177010854439186, "grad_norm": 0.3391203284263611, "learning_rate": 4.0266345771191226e-06, "loss": 0.4224, "step": 2177 }, { "epoch": 1.818536042304481, "grad_norm": 0.3624163866043091, "learning_rate": 4.021866408659368e-06, "loss": 0.4701, "step": 2178 }, { "epoch": 1.8193709991650433, "grad_norm": 0.3216494619846344, "learning_rate": 4.017099164948824e-06, "loss": 0.4169, "step": 2179 }, { "epoch": 1.8202059560256054, "grad_norm": 0.3187766671180725, "learning_rate": 4.012332850494547e-06, "loss": 0.4427, "step": 2180 }, { "epoch": 1.8210409128861675, "grad_norm": 0.34153714776039124, "learning_rate": 4.0075674698027204e-06, "loss": 0.4437, "step": 2181 }, { "epoch": 1.8218758697467297, "grad_norm": 0.31207507848739624, "learning_rate": 4.002803027378635e-06, "loss": 0.3743, "step": 2182 }, { "epoch": 1.822710826607292, "grad_norm": 0.3470672369003296, "learning_rate": 3.998039527726702e-06, "loss": 0.4215, "step": 2183 }, { "epoch": 1.8235457834678541, "grad_norm": 0.30278652906417847, "learning_rate": 3.993276975350445e-06, "loss": 0.4045, "step": 2184 }, { "epoch": 1.8243807403284165, "grad_norm": 0.357191264629364, "learning_rate": 3.9885153747524804e-06, "loss": 0.4511, "step": 2185 }, { "epoch": 1.8252156971889786, "grad_norm": 0.32466569542884827, "learning_rate": 3.9837547304345344e-06, "loss": 0.3869, "step": 2186 }, { "epoch": 1.8260506540495407, "grad_norm": 0.32495784759521484, "learning_rate": 3.978995046897425e-06, "loss": 0.441, "step": 2187 }, { "epoch": 1.8268856109101028, "grad_norm": 0.4004955589771271, "learning_rate": 3.974236328641062e-06, "loss": 0.4484, "step": 2188 }, { "epoch": 1.8277205677706652, "grad_norm": 0.379339337348938, "learning_rate": 3.969478580164445e-06, "loss": 0.4479, "step": 2189 }, { "epoch": 1.8285555246312275, "grad_norm": 0.33759304881095886, "learning_rate": 3.964721805965653e-06, "loss": 0.4182, "step": 2190 }, { "epoch": 1.8293904814917896, "grad_norm": 0.31323251128196716, "learning_rate": 3.959966010541848e-06, "loss": 0.3861, "step": 2191 }, { "epoch": 1.8302254383523517, "grad_norm": 0.38530784845352173, "learning_rate": 3.9552111983892625e-06, "loss": 0.4289, "step": 2192 }, { "epoch": 1.8310603952129139, "grad_norm": 0.30853381752967834, "learning_rate": 3.950457374003203e-06, "loss": 0.4199, "step": 2193 }, { "epoch": 1.8318953520734762, "grad_norm": 0.3520318865776062, "learning_rate": 3.945704541878039e-06, "loss": 0.4414, "step": 2194 }, { "epoch": 1.8327303089340385, "grad_norm": 0.3504634201526642, "learning_rate": 3.940952706507204e-06, "loss": 0.4273, "step": 2195 }, { "epoch": 1.8335652657946007, "grad_norm": 0.33467522263526917, "learning_rate": 3.9362018723831915e-06, "loss": 0.3765, "step": 2196 }, { "epoch": 1.8344002226551628, "grad_norm": 0.38635948300361633, "learning_rate": 3.9314520439975416e-06, "loss": 0.5102, "step": 2197 }, { "epoch": 1.835235179515725, "grad_norm": 0.3249858021736145, "learning_rate": 3.92670322584085e-06, "loss": 0.3875, "step": 2198 }, { "epoch": 1.8360701363762872, "grad_norm": 0.3169037401676178, "learning_rate": 3.921955422402752e-06, "loss": 0.4487, "step": 2199 }, { "epoch": 1.8369050932368496, "grad_norm": 0.3339771330356598, "learning_rate": 3.917208638171928e-06, "loss": 0.4189, "step": 2200 }, { "epoch": 1.8377400500974117, "grad_norm": 0.321972519159317, "learning_rate": 3.9124628776360965e-06, "loss": 0.4264, "step": 2201 }, { "epoch": 1.8385750069579738, "grad_norm": 0.31870219111442566, "learning_rate": 3.907718145281999e-06, "loss": 0.4315, "step": 2202 }, { "epoch": 1.839409963818536, "grad_norm": 0.3208278715610504, "learning_rate": 3.902974445595415e-06, "loss": 0.4321, "step": 2203 }, { "epoch": 1.8402449206790983, "grad_norm": 0.32876765727996826, "learning_rate": 3.898231783061141e-06, "loss": 0.4097, "step": 2204 }, { "epoch": 1.8410798775396604, "grad_norm": 0.3446105420589447, "learning_rate": 3.893490162162997e-06, "loss": 0.4072, "step": 2205 }, { "epoch": 1.8419148344002227, "grad_norm": 0.3228136897087097, "learning_rate": 3.888749587383816e-06, "loss": 0.4058, "step": 2206 }, { "epoch": 1.8427497912607849, "grad_norm": 0.34928783774375916, "learning_rate": 3.884010063205443e-06, "loss": 0.4346, "step": 2207 }, { "epoch": 1.843584748121347, "grad_norm": 0.3505009412765503, "learning_rate": 3.879271594108731e-06, "loss": 0.4351, "step": 2208 }, { "epoch": 1.844419704981909, "grad_norm": 0.31555888056755066, "learning_rate": 3.874534184573528e-06, "loss": 0.4102, "step": 2209 }, { "epoch": 1.8452546618424714, "grad_norm": 0.33475685119628906, "learning_rate": 3.869797839078691e-06, "loss": 0.4591, "step": 2210 }, { "epoch": 1.8460896187030338, "grad_norm": 0.32045644521713257, "learning_rate": 3.865062562102068e-06, "loss": 0.3637, "step": 2211 }, { "epoch": 1.846924575563596, "grad_norm": 0.3396688997745514, "learning_rate": 3.86032835812049e-06, "loss": 0.4077, "step": 2212 }, { "epoch": 1.847759532424158, "grad_norm": 0.31990504264831543, "learning_rate": 3.8555952316097826e-06, "loss": 0.3899, "step": 2213 }, { "epoch": 1.8485944892847201, "grad_norm": 0.315897673368454, "learning_rate": 3.850863187044745e-06, "loss": 0.4042, "step": 2214 }, { "epoch": 1.8494294461452825, "grad_norm": 0.35471466183662415, "learning_rate": 3.8461322288991584e-06, "loss": 0.4419, "step": 2215 }, { "epoch": 1.8502644030058448, "grad_norm": 0.30807116627693176, "learning_rate": 3.8414023616457785e-06, "loss": 0.3709, "step": 2216 }, { "epoch": 1.851099359866407, "grad_norm": 0.36268168687820435, "learning_rate": 3.836673589756323e-06, "loss": 0.4358, "step": 2217 }, { "epoch": 1.851934316726969, "grad_norm": 0.38062819838523865, "learning_rate": 3.8319459177014815e-06, "loss": 0.4586, "step": 2218 }, { "epoch": 1.8527692735875312, "grad_norm": 0.32442593574523926, "learning_rate": 3.8272193499508954e-06, "loss": 0.3872, "step": 2219 }, { "epoch": 1.8536042304480935, "grad_norm": 0.3994154930114746, "learning_rate": 3.8224938909731704e-06, "loss": 0.4628, "step": 2220 }, { "epoch": 1.8544391873086559, "grad_norm": 0.3180074691772461, "learning_rate": 3.817769545235861e-06, "loss": 0.4254, "step": 2221 }, { "epoch": 1.855274144169218, "grad_norm": 0.3268428444862366, "learning_rate": 3.8130463172054643e-06, "loss": 0.4071, "step": 2222 }, { "epoch": 1.85610910102978, "grad_norm": 0.3129817545413971, "learning_rate": 3.808324211347429e-06, "loss": 0.44, "step": 2223 }, { "epoch": 1.8569440578903422, "grad_norm": 0.34740233421325684, "learning_rate": 3.803603232126136e-06, "loss": 0.4626, "step": 2224 }, { "epoch": 1.8577790147509046, "grad_norm": 0.3346748650074005, "learning_rate": 3.7988833840049056e-06, "loss": 0.397, "step": 2225 }, { "epoch": 1.8586139716114667, "grad_norm": 0.31975001096725464, "learning_rate": 3.794164671445987e-06, "loss": 0.4261, "step": 2226 }, { "epoch": 1.859448928472029, "grad_norm": 0.33082422614097595, "learning_rate": 3.789447098910554e-06, "loss": 0.4113, "step": 2227 }, { "epoch": 1.8602838853325911, "grad_norm": 0.34539011120796204, "learning_rate": 3.7847306708587085e-06, "loss": 0.417, "step": 2228 }, { "epoch": 1.8611188421931533, "grad_norm": 0.33914047479629517, "learning_rate": 3.7800153917494597e-06, "loss": 0.3775, "step": 2229 }, { "epoch": 1.8619537990537156, "grad_norm": 0.30748307704925537, "learning_rate": 3.7753012660407425e-06, "loss": 0.4021, "step": 2230 }, { "epoch": 1.8627887559142777, "grad_norm": 0.38296791911125183, "learning_rate": 3.7705882981893958e-06, "loss": 0.4745, "step": 2231 }, { "epoch": 1.86362371277484, "grad_norm": 0.3492138385772705, "learning_rate": 3.7658764926511613e-06, "loss": 0.399, "step": 2232 }, { "epoch": 1.8644586696354022, "grad_norm": 0.3456253111362457, "learning_rate": 3.7611658538806866e-06, "loss": 0.4195, "step": 2233 }, { "epoch": 1.8652936264959643, "grad_norm": 0.2946562170982361, "learning_rate": 3.7564563863315125e-06, "loss": 0.3919, "step": 2234 }, { "epoch": 1.8661285833565264, "grad_norm": 0.3254052996635437, "learning_rate": 3.7517480944560745e-06, "loss": 0.4511, "step": 2235 }, { "epoch": 1.8669635402170888, "grad_norm": 0.32421788573265076, "learning_rate": 3.747040982705699e-06, "loss": 0.4164, "step": 2236 }, { "epoch": 1.867798497077651, "grad_norm": 0.32078564167022705, "learning_rate": 3.7423350555305894e-06, "loss": 0.4399, "step": 2237 }, { "epoch": 1.8686334539382132, "grad_norm": 0.3263188600540161, "learning_rate": 3.737630317379839e-06, "loss": 0.4249, "step": 2238 }, { "epoch": 1.8694684107987753, "grad_norm": 0.35829946398735046, "learning_rate": 3.732926772701405e-06, "loss": 0.4183, "step": 2239 }, { "epoch": 1.8703033676593375, "grad_norm": 0.3117098808288574, "learning_rate": 3.7282244259421286e-06, "loss": 0.4029, "step": 2240 }, { "epoch": 1.8711383245198998, "grad_norm": 0.3320132791996002, "learning_rate": 3.7235232815477123e-06, "loss": 0.4118, "step": 2241 }, { "epoch": 1.8719732813804622, "grad_norm": 0.3038805425167084, "learning_rate": 3.718823343962719e-06, "loss": 0.3894, "step": 2242 }, { "epoch": 1.8728082382410243, "grad_norm": 0.3008613884449005, "learning_rate": 3.7141246176305775e-06, "loss": 0.3943, "step": 2243 }, { "epoch": 1.8736431951015864, "grad_norm": 0.3228885531425476, "learning_rate": 3.709427106993565e-06, "loss": 0.4153, "step": 2244 }, { "epoch": 1.8744781519621485, "grad_norm": 0.3826487362384796, "learning_rate": 3.704730816492814e-06, "loss": 0.4724, "step": 2245 }, { "epoch": 1.8753131088227109, "grad_norm": 0.2753260135650635, "learning_rate": 3.700035750568303e-06, "loss": 0.3627, "step": 2246 }, { "epoch": 1.8761480656832732, "grad_norm": 0.41555410623550415, "learning_rate": 3.695341913658849e-06, "loss": 0.4446, "step": 2247 }, { "epoch": 1.8769830225438353, "grad_norm": 0.34255316853523254, "learning_rate": 3.6906493102021125e-06, "loss": 0.3949, "step": 2248 }, { "epoch": 1.8778179794043974, "grad_norm": 0.30416983366012573, "learning_rate": 3.6859579446345804e-06, "loss": 0.3955, "step": 2249 }, { "epoch": 1.8786529362649595, "grad_norm": 0.3269352316856384, "learning_rate": 3.6812678213915777e-06, "loss": 0.4281, "step": 2250 }, { "epoch": 1.879487893125522, "grad_norm": 0.2951759994029999, "learning_rate": 3.676578944907252e-06, "loss": 0.4012, "step": 2251 }, { "epoch": 1.880322849986084, "grad_norm": 0.32929226756095886, "learning_rate": 3.6718913196145668e-06, "loss": 0.4236, "step": 2252 }, { "epoch": 1.8811578068466464, "grad_norm": 0.3171452581882477, "learning_rate": 3.667204949945311e-06, "loss": 0.4128, "step": 2253 }, { "epoch": 1.8819927637072085, "grad_norm": 0.37224075198173523, "learning_rate": 3.66251984033008e-06, "loss": 0.4384, "step": 2254 }, { "epoch": 1.8828277205677706, "grad_norm": 0.3460980951786041, "learning_rate": 3.6578359951982817e-06, "loss": 0.4084, "step": 2255 }, { "epoch": 1.8836626774283327, "grad_norm": 0.30917152762413025, "learning_rate": 3.653153418978128e-06, "loss": 0.4219, "step": 2256 }, { "epoch": 1.884497634288895, "grad_norm": 0.33649230003356934, "learning_rate": 3.6484721160966286e-06, "loss": 0.4219, "step": 2257 }, { "epoch": 1.8853325911494574, "grad_norm": 0.39923763275146484, "learning_rate": 3.6437920909795932e-06, "loss": 0.4182, "step": 2258 }, { "epoch": 1.8861675480100195, "grad_norm": 0.3323121964931488, "learning_rate": 3.6391133480516196e-06, "loss": 0.4223, "step": 2259 }, { "epoch": 1.8870025048705816, "grad_norm": 0.3180949091911316, "learning_rate": 3.6344358917360934e-06, "loss": 0.4145, "step": 2260 }, { "epoch": 1.8878374617311438, "grad_norm": 0.3350938856601715, "learning_rate": 3.629759726455192e-06, "loss": 0.4081, "step": 2261 }, { "epoch": 1.888672418591706, "grad_norm": 0.3248671591281891, "learning_rate": 3.625084856629859e-06, "loss": 0.4007, "step": 2262 }, { "epoch": 1.8895073754522684, "grad_norm": 0.3545745313167572, "learning_rate": 3.6204112866798254e-06, "loss": 0.4148, "step": 2263 }, { "epoch": 1.8903423323128306, "grad_norm": 0.33417025208473206, "learning_rate": 3.615739021023584e-06, "loss": 0.4322, "step": 2264 }, { "epoch": 1.8911772891733927, "grad_norm": 0.3307390809059143, "learning_rate": 3.6110680640784003e-06, "loss": 0.4273, "step": 2265 }, { "epoch": 1.8920122460339548, "grad_norm": 0.34062907099723816, "learning_rate": 3.606398420260301e-06, "loss": 0.4511, "step": 2266 }, { "epoch": 1.8928472028945171, "grad_norm": 0.3260069191455841, "learning_rate": 3.6017300939840693e-06, "loss": 0.4004, "step": 2267 }, { "epoch": 1.8936821597550795, "grad_norm": 0.33698317408561707, "learning_rate": 3.5970630896632485e-06, "loss": 0.4138, "step": 2268 }, { "epoch": 1.8945171166156416, "grad_norm": 0.31869566440582275, "learning_rate": 3.5923974117101233e-06, "loss": 0.3991, "step": 2269 }, { "epoch": 1.8953520734762037, "grad_norm": 0.31226846575737, "learning_rate": 3.58773306453573e-06, "loss": 0.4329, "step": 2270 }, { "epoch": 1.8961870303367658, "grad_norm": 0.3491813540458679, "learning_rate": 3.5830700525498496e-06, "loss": 0.3876, "step": 2271 }, { "epoch": 1.8970219871973282, "grad_norm": 0.3509092926979065, "learning_rate": 3.5784083801609932e-06, "loss": 0.4081, "step": 2272 }, { "epoch": 1.8978569440578903, "grad_norm": 0.34484291076660156, "learning_rate": 3.5737480517764117e-06, "loss": 0.4054, "step": 2273 }, { "epoch": 1.8986919009184526, "grad_norm": 0.34711503982543945, "learning_rate": 3.569089071802081e-06, "loss": 0.4126, "step": 2274 }, { "epoch": 1.8995268577790148, "grad_norm": 0.31426405906677246, "learning_rate": 3.564431444642707e-06, "loss": 0.438, "step": 2275 }, { "epoch": 1.9003618146395769, "grad_norm": 0.33892616629600525, "learning_rate": 3.5597751747017106e-06, "loss": 0.4164, "step": 2276 }, { "epoch": 1.901196771500139, "grad_norm": 0.3502761423587799, "learning_rate": 3.5551202663812344e-06, "loss": 0.4327, "step": 2277 }, { "epoch": 1.9020317283607013, "grad_norm": 0.34002435207366943, "learning_rate": 3.550466724082134e-06, "loss": 0.4069, "step": 2278 }, { "epoch": 1.9028666852212637, "grad_norm": 0.3723599910736084, "learning_rate": 3.545814552203968e-06, "loss": 0.4631, "step": 2279 }, { "epoch": 1.9037016420818258, "grad_norm": 0.3209521472454071, "learning_rate": 3.5411637551450044e-06, "loss": 0.4047, "step": 2280 }, { "epoch": 1.904536598942388, "grad_norm": 0.349687784910202, "learning_rate": 3.5365143373022097e-06, "loss": 0.4165, "step": 2281 }, { "epoch": 1.90537155580295, "grad_norm": 0.33358100056648254, "learning_rate": 3.5318663030712476e-06, "loss": 0.4222, "step": 2282 }, { "epoch": 1.9062065126635124, "grad_norm": 0.3230152726173401, "learning_rate": 3.5272196568464722e-06, "loss": 0.4245, "step": 2283 }, { "epoch": 1.9070414695240747, "grad_norm": 0.30074387788772583, "learning_rate": 3.5225744030209263e-06, "loss": 0.3691, "step": 2284 }, { "epoch": 1.9078764263846368, "grad_norm": 0.41433870792388916, "learning_rate": 3.517930545986337e-06, "loss": 0.4106, "step": 2285 }, { "epoch": 1.908711383245199, "grad_norm": 0.3772372305393219, "learning_rate": 3.5132880901331067e-06, "loss": 0.4253, "step": 2286 }, { "epoch": 1.909546340105761, "grad_norm": 0.31344419717788696, "learning_rate": 3.508647039850319e-06, "loss": 0.398, "step": 2287 }, { "epoch": 1.9103812969663234, "grad_norm": 0.31509897112846375, "learning_rate": 3.5040073995257263e-06, "loss": 0.4129, "step": 2288 }, { "epoch": 1.9112162538268858, "grad_norm": 0.3268350660800934, "learning_rate": 3.499369173545745e-06, "loss": 0.4102, "step": 2289 }, { "epoch": 1.9120512106874479, "grad_norm": 0.34718289971351624, "learning_rate": 3.4947323662954587e-06, "loss": 0.4346, "step": 2290 }, { "epoch": 1.91288616754801, "grad_norm": 0.3241332769393921, "learning_rate": 3.490096982158606e-06, "loss": 0.4133, "step": 2291 }, { "epoch": 1.9137211244085721, "grad_norm": 0.34839150309562683, "learning_rate": 3.485463025517584e-06, "loss": 0.4456, "step": 2292 }, { "epoch": 1.9145560812691345, "grad_norm": 0.30497562885284424, "learning_rate": 3.4808305007534386e-06, "loss": 0.3853, "step": 2293 }, { "epoch": 1.9153910381296966, "grad_norm": 0.31341835856437683, "learning_rate": 3.4761994122458596e-06, "loss": 0.4162, "step": 2294 }, { "epoch": 1.916225994990259, "grad_norm": 0.31433895230293274, "learning_rate": 3.4715697643731828e-06, "loss": 0.4108, "step": 2295 }, { "epoch": 1.917060951850821, "grad_norm": 0.32720112800598145, "learning_rate": 3.466941561512377e-06, "loss": 0.4421, "step": 2296 }, { "epoch": 1.9178959087113832, "grad_norm": 0.3012842833995819, "learning_rate": 3.462314808039051e-06, "loss": 0.4058, "step": 2297 }, { "epoch": 1.9187308655719455, "grad_norm": 0.2863467037677765, "learning_rate": 3.457689508327441e-06, "loss": 0.3949, "step": 2298 }, { "epoch": 1.9195658224325076, "grad_norm": 0.3273066580295563, "learning_rate": 3.4530656667504036e-06, "loss": 0.4451, "step": 2299 }, { "epoch": 1.92040077929307, "grad_norm": 0.29081961512565613, "learning_rate": 3.448443287679426e-06, "loss": 0.3853, "step": 2300 }, { "epoch": 1.921235736153632, "grad_norm": 0.3215504288673401, "learning_rate": 3.443822375484604e-06, "loss": 0.4127, "step": 2301 }, { "epoch": 1.9220706930141942, "grad_norm": 0.3323954641819, "learning_rate": 3.439202934534652e-06, "loss": 0.4332, "step": 2302 }, { "epoch": 1.9229056498747563, "grad_norm": 0.3025503158569336, "learning_rate": 3.4345849691968946e-06, "loss": 0.3819, "step": 2303 }, { "epoch": 1.9237406067353187, "grad_norm": 0.342759370803833, "learning_rate": 3.4299684838372547e-06, "loss": 0.4319, "step": 2304 }, { "epoch": 1.924575563595881, "grad_norm": 0.31191763281822205, "learning_rate": 3.425353482820264e-06, "loss": 0.4261, "step": 2305 }, { "epoch": 1.9254105204564431, "grad_norm": 0.3438170254230499, "learning_rate": 3.4207399705090427e-06, "loss": 0.4747, "step": 2306 }, { "epoch": 1.9262454773170052, "grad_norm": 0.32834839820861816, "learning_rate": 3.41612795126531e-06, "loss": 0.4279, "step": 2307 }, { "epoch": 1.9270804341775674, "grad_norm": 0.32156580686569214, "learning_rate": 3.4115174294493737e-06, "loss": 0.4092, "step": 2308 }, { "epoch": 1.9279153910381297, "grad_norm": 0.3522515594959259, "learning_rate": 3.406908409420119e-06, "loss": 0.463, "step": 2309 }, { "epoch": 1.928750347898692, "grad_norm": 0.3041593134403229, "learning_rate": 3.4023008955350178e-06, "loss": 0.41, "step": 2310 }, { "epoch": 1.9295853047592542, "grad_norm": 0.3322739899158478, "learning_rate": 3.3976948921501156e-06, "loss": 0.3987, "step": 2311 }, { "epoch": 1.9304202616198163, "grad_norm": 0.357148677110672, "learning_rate": 3.39309040362003e-06, "loss": 0.4722, "step": 2312 }, { "epoch": 1.9312552184803784, "grad_norm": 0.3387586772441864, "learning_rate": 3.388487434297949e-06, "loss": 0.3912, "step": 2313 }, { "epoch": 1.9320901753409407, "grad_norm": 0.3075653910636902, "learning_rate": 3.3838859885356186e-06, "loss": 0.4327, "step": 2314 }, { "epoch": 1.932925132201503, "grad_norm": 0.3115623891353607, "learning_rate": 3.3792860706833507e-06, "loss": 0.3931, "step": 2315 }, { "epoch": 1.9337600890620652, "grad_norm": 0.32555437088012695, "learning_rate": 3.3746876850900067e-06, "loss": 0.4242, "step": 2316 }, { "epoch": 1.9345950459226273, "grad_norm": 0.3374783992767334, "learning_rate": 3.370090836103004e-06, "loss": 0.4097, "step": 2317 }, { "epoch": 1.9354300027831894, "grad_norm": 0.37309926748275757, "learning_rate": 3.3654955280683082e-06, "loss": 0.4606, "step": 2318 }, { "epoch": 1.9362649596437518, "grad_norm": 0.3294207751750946, "learning_rate": 3.360901765330421e-06, "loss": 0.405, "step": 2319 }, { "epoch": 1.937099916504314, "grad_norm": 0.31718575954437256, "learning_rate": 3.3563095522323906e-06, "loss": 0.4038, "step": 2320 }, { "epoch": 1.9379348733648762, "grad_norm": 0.3287491500377655, "learning_rate": 3.3517188931157956e-06, "loss": 0.4518, "step": 2321 }, { "epoch": 1.9387698302254384, "grad_norm": 0.32968470454216003, "learning_rate": 3.347129792320748e-06, "loss": 0.4203, "step": 2322 }, { "epoch": 1.9396047870860005, "grad_norm": 0.30896440148353577, "learning_rate": 3.3425422541858856e-06, "loss": 0.4198, "step": 2323 }, { "epoch": 1.9404397439465626, "grad_norm": 0.36340051889419556, "learning_rate": 3.337956283048368e-06, "loss": 0.4083, "step": 2324 }, { "epoch": 1.941274700807125, "grad_norm": 0.3143473267555237, "learning_rate": 3.3333718832438768e-06, "loss": 0.3973, "step": 2325 }, { "epoch": 1.9421096576676873, "grad_norm": 0.32260772585868835, "learning_rate": 3.3287890591065995e-06, "loss": 0.4372, "step": 2326 }, { "epoch": 1.9429446145282494, "grad_norm": 0.32285934686660767, "learning_rate": 3.3242078149692446e-06, "loss": 0.4587, "step": 2327 }, { "epoch": 1.9437795713888115, "grad_norm": 0.35692206025123596, "learning_rate": 3.319628155163024e-06, "loss": 0.4232, "step": 2328 }, { "epoch": 1.9446145282493736, "grad_norm": 0.33645099401474, "learning_rate": 3.3150500840176437e-06, "loss": 0.4671, "step": 2329 }, { "epoch": 1.945449485109936, "grad_norm": 0.2889529764652252, "learning_rate": 3.3104736058613175e-06, "loss": 0.3639, "step": 2330 }, { "epoch": 1.9462844419704983, "grad_norm": 0.3121025264263153, "learning_rate": 3.3058987250207476e-06, "loss": 0.4316, "step": 2331 }, { "epoch": 1.9471193988310604, "grad_norm": 0.3395814299583435, "learning_rate": 3.3013254458211298e-06, "loss": 0.4707, "step": 2332 }, { "epoch": 1.9479543556916226, "grad_norm": 0.3150932192802429, "learning_rate": 3.2967537725861434e-06, "loss": 0.3822, "step": 2333 }, { "epoch": 1.9487893125521847, "grad_norm": 0.3161650598049164, "learning_rate": 3.292183709637947e-06, "loss": 0.4118, "step": 2334 }, { "epoch": 1.949624269412747, "grad_norm": 0.333248108625412, "learning_rate": 3.2876152612971856e-06, "loss": 0.4444, "step": 2335 }, { "epoch": 1.9504592262733094, "grad_norm": 0.33139315247535706, "learning_rate": 3.2830484318829634e-06, "loss": 0.3888, "step": 2336 }, { "epoch": 1.9512941831338715, "grad_norm": 0.3255458474159241, "learning_rate": 3.278483225712869e-06, "loss": 0.4448, "step": 2337 }, { "epoch": 1.9521291399944336, "grad_norm": 0.32508721947669983, "learning_rate": 3.2739196471029487e-06, "loss": 0.4409, "step": 2338 }, { "epoch": 1.9529640968549957, "grad_norm": 0.30902284383773804, "learning_rate": 3.2693577003677083e-06, "loss": 0.3669, "step": 2339 }, { "epoch": 1.953799053715558, "grad_norm": 0.33131176233291626, "learning_rate": 3.2647973898201157e-06, "loss": 0.4141, "step": 2340 }, { "epoch": 1.9546340105761202, "grad_norm": 0.3309462070465088, "learning_rate": 3.260238719771588e-06, "loss": 0.4503, "step": 2341 }, { "epoch": 1.9554689674366825, "grad_norm": 0.33764711022377014, "learning_rate": 3.2556816945319946e-06, "loss": 0.3984, "step": 2342 }, { "epoch": 1.9563039242972446, "grad_norm": 0.3439929485321045, "learning_rate": 3.2511263184096476e-06, "loss": 0.3988, "step": 2343 }, { "epoch": 1.9571388811578068, "grad_norm": 0.36586782336235046, "learning_rate": 3.2465725957113008e-06, "loss": 0.4443, "step": 2344 }, { "epoch": 1.957973838018369, "grad_norm": 0.3069925904273987, "learning_rate": 3.2420205307421463e-06, "loss": 0.3911, "step": 2345 }, { "epoch": 1.9588087948789312, "grad_norm": 0.34247416257858276, "learning_rate": 3.237470127805803e-06, "loss": 0.4452, "step": 2346 }, { "epoch": 1.9596437517394936, "grad_norm": 0.29508718848228455, "learning_rate": 3.2329213912043266e-06, "loss": 0.356, "step": 2347 }, { "epoch": 1.9604787086000557, "grad_norm": 0.3579123318195343, "learning_rate": 3.2283743252381948e-06, "loss": 0.4354, "step": 2348 }, { "epoch": 1.9613136654606178, "grad_norm": 0.34025096893310547, "learning_rate": 3.2238289342063013e-06, "loss": 0.4475, "step": 2349 }, { "epoch": 1.96214862232118, "grad_norm": 0.3239527940750122, "learning_rate": 3.219285222405961e-06, "loss": 0.4475, "step": 2350 }, { "epoch": 1.9629835791817423, "grad_norm": 0.35295799374580383, "learning_rate": 3.2147431941329e-06, "loss": 0.3937, "step": 2351 }, { "epoch": 1.9638185360423046, "grad_norm": 0.38921526074409485, "learning_rate": 3.2102028536812524e-06, "loss": 0.4576, "step": 2352 }, { "epoch": 1.9646534929028667, "grad_norm": 0.34124472737312317, "learning_rate": 3.205664205343557e-06, "loss": 0.4451, "step": 2353 }, { "epoch": 1.9654884497634288, "grad_norm": 0.3133748471736908, "learning_rate": 3.2011272534107528e-06, "loss": 0.4099, "step": 2354 }, { "epoch": 1.966323406623991, "grad_norm": 0.34300529956817627, "learning_rate": 3.1965920021721764e-06, "loss": 0.4022, "step": 2355 }, { "epoch": 1.9671583634845533, "grad_norm": 0.4082027077674866, "learning_rate": 3.1920584559155504e-06, "loss": 0.4599, "step": 2356 }, { "epoch": 1.9679933203451156, "grad_norm": 0.33623167872428894, "learning_rate": 3.1875266189269943e-06, "loss": 0.4458, "step": 2357 }, { "epoch": 1.9688282772056778, "grad_norm": 0.34384477138519287, "learning_rate": 3.1829964954910076e-06, "loss": 0.3876, "step": 2358 }, { "epoch": 1.9696632340662399, "grad_norm": 0.36178091168403625, "learning_rate": 3.178468089890467e-06, "loss": 0.4159, "step": 2359 }, { "epoch": 1.970498190926802, "grad_norm": 0.34379351139068604, "learning_rate": 3.1739414064066286e-06, "loss": 0.4353, "step": 2360 }, { "epoch": 1.9713331477873643, "grad_norm": 0.34701573848724365, "learning_rate": 3.16941644931912e-06, "loss": 0.4318, "step": 2361 }, { "epoch": 1.9721681046479267, "grad_norm": 0.31482455134391785, "learning_rate": 3.1648932229059358e-06, "loss": 0.4232, "step": 2362 }, { "epoch": 1.9730030615084888, "grad_norm": 0.3182859420776367, "learning_rate": 3.1603717314434335e-06, "loss": 0.46, "step": 2363 }, { "epoch": 1.973838018369051, "grad_norm": 0.3008085787296295, "learning_rate": 3.1558519792063335e-06, "loss": 0.3954, "step": 2364 }, { "epoch": 1.974672975229613, "grad_norm": 0.295727401971817, "learning_rate": 3.1513339704677104e-06, "loss": 0.3737, "step": 2365 }, { "epoch": 1.9755079320901754, "grad_norm": 0.34602493047714233, "learning_rate": 3.146817709498985e-06, "loss": 0.486, "step": 2366 }, { "epoch": 1.9763428889507375, "grad_norm": 0.3222402334213257, "learning_rate": 3.1423032005699377e-06, "loss": 0.3943, "step": 2367 }, { "epoch": 1.9771778458112998, "grad_norm": 0.3615516722202301, "learning_rate": 3.13779044794868e-06, "loss": 0.4799, "step": 2368 }, { "epoch": 1.978012802671862, "grad_norm": 0.3049153983592987, "learning_rate": 3.13327945590167e-06, "loss": 0.4125, "step": 2369 }, { "epoch": 1.978847759532424, "grad_norm": 0.3229999244213104, "learning_rate": 3.1287702286937018e-06, "loss": 0.4044, "step": 2370 }, { "epoch": 1.9796827163929862, "grad_norm": 0.3417566418647766, "learning_rate": 3.1242627705878952e-06, "loss": 0.4003, "step": 2371 }, { "epoch": 1.9805176732535485, "grad_norm": 0.3098723590373993, "learning_rate": 3.119757085845705e-06, "loss": 0.4076, "step": 2372 }, { "epoch": 1.981352630114111, "grad_norm": 0.31395208835601807, "learning_rate": 3.115253178726902e-06, "loss": 0.4135, "step": 2373 }, { "epoch": 1.982187586974673, "grad_norm": 0.3622572124004364, "learning_rate": 3.110751053489582e-06, "loss": 0.4087, "step": 2374 }, { "epoch": 1.9830225438352351, "grad_norm": 0.3456003963947296, "learning_rate": 3.1062507143901556e-06, "loss": 0.4551, "step": 2375 }, { "epoch": 1.9838575006957972, "grad_norm": 0.32022395730018616, "learning_rate": 3.1017521656833384e-06, "loss": 0.4258, "step": 2376 }, { "epoch": 1.9846924575563596, "grad_norm": 0.31135162711143494, "learning_rate": 3.0972554116221645e-06, "loss": 0.4177, "step": 2377 }, { "epoch": 1.985527414416922, "grad_norm": 0.3231390118598938, "learning_rate": 3.0927604564579593e-06, "loss": 0.4489, "step": 2378 }, { "epoch": 1.986362371277484, "grad_norm": 0.3336527943611145, "learning_rate": 3.0882673044403565e-06, "loss": 0.3748, "step": 2379 }, { "epoch": 1.9871973281380462, "grad_norm": 0.3058059513568878, "learning_rate": 3.083775959817282e-06, "loss": 0.4091, "step": 2380 }, { "epoch": 1.9880322849986083, "grad_norm": 0.3307937681674957, "learning_rate": 3.07928642683495e-06, "loss": 0.4583, "step": 2381 }, { "epoch": 1.9888672418591706, "grad_norm": 0.300340861082077, "learning_rate": 3.0747987097378674e-06, "loss": 0.4123, "step": 2382 }, { "epoch": 1.989702198719733, "grad_norm": 0.3413597345352173, "learning_rate": 3.0703128127688196e-06, "loss": 0.4427, "step": 2383 }, { "epoch": 1.990537155580295, "grad_norm": 0.2992326319217682, "learning_rate": 3.0658287401688734e-06, "loss": 0.4028, "step": 2384 }, { "epoch": 1.9913721124408572, "grad_norm": 0.3137476444244385, "learning_rate": 3.061346496177374e-06, "loss": 0.3902, "step": 2385 }, { "epoch": 1.9922070693014193, "grad_norm": 0.3481084108352661, "learning_rate": 3.056866085031929e-06, "loss": 0.4265, "step": 2386 }, { "epoch": 1.9930420261619817, "grad_norm": 0.3090895116329193, "learning_rate": 3.052387510968423e-06, "loss": 0.4498, "step": 2387 }, { "epoch": 1.9938769830225438, "grad_norm": 0.2816275358200073, "learning_rate": 3.047910778220996e-06, "loss": 0.3832, "step": 2388 }, { "epoch": 1.9947119398831061, "grad_norm": 0.35447055101394653, "learning_rate": 3.043435891022052e-06, "loss": 0.4436, "step": 2389 }, { "epoch": 1.9955468967436683, "grad_norm": 0.32242536544799805, "learning_rate": 3.0389628536022496e-06, "loss": 0.3925, "step": 2390 }, { "epoch": 1.9963818536042304, "grad_norm": 0.3104311227798462, "learning_rate": 3.034491670190495e-06, "loss": 0.3707, "step": 2391 }, { "epoch": 1.9972168104647925, "grad_norm": 0.3575473129749298, "learning_rate": 3.0300223450139475e-06, "loss": 0.4325, "step": 2392 }, { "epoch": 1.9980517673253548, "grad_norm": 0.310964971780777, "learning_rate": 3.025554882298004e-06, "loss": 0.3816, "step": 2393 }, { "epoch": 1.9988867241859172, "grad_norm": 0.363938570022583, "learning_rate": 3.0210892862663043e-06, "loss": 0.418, "step": 2394 }, { "epoch": 1.9997216810464793, "grad_norm": 0.38355928659439087, "learning_rate": 3.0166255611407226e-06, "loss": 0.4727, "step": 2395 }, { "epoch": 2.0005566379070414, "grad_norm": 0.38111406564712524, "learning_rate": 3.01216371114136e-06, "loss": 0.4627, "step": 2396 }, { "epoch": 2.0013915947676035, "grad_norm": 0.34292083978652954, "learning_rate": 3.007703740486554e-06, "loss": 0.3974, "step": 2397 }, { "epoch": 2.002226551628166, "grad_norm": 0.31928274035453796, "learning_rate": 3.0032456533928545e-06, "loss": 0.3737, "step": 2398 }, { "epoch": 2.003061508488728, "grad_norm": 0.32584306597709656, "learning_rate": 2.998789454075039e-06, "loss": 0.3721, "step": 2399 }, { "epoch": 2.0038964653492903, "grad_norm": 0.34921684861183167, "learning_rate": 2.994335146746097e-06, "loss": 0.3865, "step": 2400 }, { "epoch": 2.0047314222098525, "grad_norm": 0.3236585557460785, "learning_rate": 2.9898827356172288e-06, "loss": 0.4199, "step": 2401 }, { "epoch": 2.0055663790704146, "grad_norm": 0.3193267285823822, "learning_rate": 2.9854322248978446e-06, "loss": 0.3586, "step": 2402 }, { "epoch": 2.0064013359309767, "grad_norm": 0.38189736008644104, "learning_rate": 2.9809836187955532e-06, "loss": 0.4238, "step": 2403 }, { "epoch": 2.0072362927915393, "grad_norm": 0.34032684564590454, "learning_rate": 2.9765369215161664e-06, "loss": 0.387, "step": 2404 }, { "epoch": 2.0080712496521014, "grad_norm": 0.3441116511821747, "learning_rate": 2.9720921372636937e-06, "loss": 0.4017, "step": 2405 }, { "epoch": 2.0089062065126635, "grad_norm": 0.3282027840614319, "learning_rate": 2.967649270240327e-06, "loss": 0.4061, "step": 2406 }, { "epoch": 2.0097411633732256, "grad_norm": 0.3229796886444092, "learning_rate": 2.9632083246464572e-06, "loss": 0.3939, "step": 2407 }, { "epoch": 2.0105761202337877, "grad_norm": 0.3527272343635559, "learning_rate": 2.9587693046806486e-06, "loss": 0.4016, "step": 2408 }, { "epoch": 2.0114110770943503, "grad_norm": 0.33957377076148987, "learning_rate": 2.9543322145396515e-06, "loss": 0.4225, "step": 2409 }, { "epoch": 2.0122460339549124, "grad_norm": 0.3664098381996155, "learning_rate": 2.949897058418388e-06, "loss": 0.4348, "step": 2410 }, { "epoch": 2.0130809908154745, "grad_norm": 0.33471494913101196, "learning_rate": 2.9454638405099546e-06, "loss": 0.3525, "step": 2411 }, { "epoch": 2.0139159476760367, "grad_norm": 0.3181726038455963, "learning_rate": 2.941032565005613e-06, "loss": 0.4094, "step": 2412 }, { "epoch": 2.0147509045365988, "grad_norm": 0.3327322006225586, "learning_rate": 2.936603236094789e-06, "loss": 0.3844, "step": 2413 }, { "epoch": 2.0155858613971613, "grad_norm": 0.3203180134296417, "learning_rate": 2.9321758579650696e-06, "loss": 0.3648, "step": 2414 }, { "epoch": 2.0164208182577235, "grad_norm": 0.34910398721694946, "learning_rate": 2.927750434802198e-06, "loss": 0.464, "step": 2415 }, { "epoch": 2.0172557751182856, "grad_norm": 0.2863149344921112, "learning_rate": 2.9233269707900613e-06, "loss": 0.3499, "step": 2416 }, { "epoch": 2.0180907319788477, "grad_norm": 0.3191135823726654, "learning_rate": 2.9189054701107083e-06, "loss": 0.3834, "step": 2417 }, { "epoch": 2.01892568883941, "grad_norm": 0.31380343437194824, "learning_rate": 2.914485936944317e-06, "loss": 0.3641, "step": 2418 }, { "epoch": 2.0197606456999724, "grad_norm": 0.33051228523254395, "learning_rate": 2.9100683754692178e-06, "loss": 0.3718, "step": 2419 }, { "epoch": 2.0205956025605345, "grad_norm": 0.3065633475780487, "learning_rate": 2.9056527898618684e-06, "loss": 0.4168, "step": 2420 }, { "epoch": 2.0214305594210966, "grad_norm": 0.3251338005065918, "learning_rate": 2.90123918429686e-06, "loss": 0.4194, "step": 2421 }, { "epoch": 2.0222655162816587, "grad_norm": 0.31333810091018677, "learning_rate": 2.89682756294692e-06, "loss": 0.361, "step": 2422 }, { "epoch": 2.023100473142221, "grad_norm": 0.31975293159484863, "learning_rate": 2.892417929982884e-06, "loss": 0.4042, "step": 2423 }, { "epoch": 2.023935430002783, "grad_norm": 0.3137202262878418, "learning_rate": 2.888010289573724e-06, "loss": 0.3878, "step": 2424 }, { "epoch": 2.0247703868633455, "grad_norm": 0.3166247606277466, "learning_rate": 2.8836046458865174e-06, "loss": 0.3892, "step": 2425 }, { "epoch": 2.0256053437239077, "grad_norm": 0.3553958237171173, "learning_rate": 2.879201003086457e-06, "loss": 0.4082, "step": 2426 }, { "epoch": 2.02644030058447, "grad_norm": 0.3311893343925476, "learning_rate": 2.8747993653368478e-06, "loss": 0.3891, "step": 2427 }, { "epoch": 2.027275257445032, "grad_norm": 0.31031566858291626, "learning_rate": 2.8703997367990942e-06, "loss": 0.3735, "step": 2428 }, { "epoch": 2.028110214305594, "grad_norm": 0.33895328640937805, "learning_rate": 2.8660021216327007e-06, "loss": 0.3817, "step": 2429 }, { "epoch": 2.0289451711661566, "grad_norm": 0.3778069019317627, "learning_rate": 2.8616065239952763e-06, "loss": 0.4104, "step": 2430 }, { "epoch": 2.0297801280267187, "grad_norm": 0.30845382809638977, "learning_rate": 2.8572129480425082e-06, "loss": 0.3827, "step": 2431 }, { "epoch": 2.030615084887281, "grad_norm": 0.3147810697555542, "learning_rate": 2.8528213979281868e-06, "loss": 0.3854, "step": 2432 }, { "epoch": 2.031450041747843, "grad_norm": 0.3320753872394562, "learning_rate": 2.8484318778041786e-06, "loss": 0.4542, "step": 2433 }, { "epoch": 2.032284998608405, "grad_norm": 0.2961820065975189, "learning_rate": 2.844044391820433e-06, "loss": 0.3911, "step": 2434 }, { "epoch": 2.0331199554689676, "grad_norm": 0.33709076046943665, "learning_rate": 2.8396589441249787e-06, "loss": 0.361, "step": 2435 }, { "epoch": 2.0339549123295297, "grad_norm": 0.3338479697704315, "learning_rate": 2.835275538863914e-06, "loss": 0.3584, "step": 2436 }, { "epoch": 2.034789869190092, "grad_norm": 0.3542519509792328, "learning_rate": 2.830894180181408e-06, "loss": 0.4491, "step": 2437 }, { "epoch": 2.035624826050654, "grad_norm": 0.345567911863327, "learning_rate": 2.826514872219692e-06, "loss": 0.4019, "step": 2438 }, { "epoch": 2.036459782911216, "grad_norm": 0.29389357566833496, "learning_rate": 2.822137619119065e-06, "loss": 0.3619, "step": 2439 }, { "epoch": 2.0372947397717787, "grad_norm": 0.3266390562057495, "learning_rate": 2.8177624250178782e-06, "loss": 0.4367, "step": 2440 }, { "epoch": 2.038129696632341, "grad_norm": 0.32060959935188293, "learning_rate": 2.8133892940525355e-06, "loss": 0.3842, "step": 2441 }, { "epoch": 2.038964653492903, "grad_norm": 0.34633857011795044, "learning_rate": 2.8090182303574923e-06, "loss": 0.4053, "step": 2442 }, { "epoch": 2.039799610353465, "grad_norm": 0.3151768147945404, "learning_rate": 2.804649238065249e-06, "loss": 0.417, "step": 2443 }, { "epoch": 2.040634567214027, "grad_norm": 0.3349325656890869, "learning_rate": 2.80028232130635e-06, "loss": 0.4052, "step": 2444 }, { "epoch": 2.0414695240745893, "grad_norm": 0.34696337580680847, "learning_rate": 2.795917484209374e-06, "loss": 0.3883, "step": 2445 }, { "epoch": 2.042304480935152, "grad_norm": 0.34113654494285583, "learning_rate": 2.7915547309009326e-06, "loss": 0.3961, "step": 2446 }, { "epoch": 2.043139437795714, "grad_norm": 0.3226465582847595, "learning_rate": 2.787194065505675e-06, "loss": 0.382, "step": 2447 }, { "epoch": 2.043974394656276, "grad_norm": 0.31859728693962097, "learning_rate": 2.7828354921462668e-06, "loss": 0.3647, "step": 2448 }, { "epoch": 2.044809351516838, "grad_norm": 0.35405170917510986, "learning_rate": 2.7784790149434016e-06, "loss": 0.4003, "step": 2449 }, { "epoch": 2.0456443083774003, "grad_norm": 0.3403962552547455, "learning_rate": 2.774124638015789e-06, "loss": 0.3936, "step": 2450 }, { "epoch": 2.046479265237963, "grad_norm": 0.31808608770370483, "learning_rate": 2.7697723654801527e-06, "loss": 0.3898, "step": 2451 }, { "epoch": 2.047314222098525, "grad_norm": 0.32287436723709106, "learning_rate": 2.765422201451231e-06, "loss": 0.4253, "step": 2452 }, { "epoch": 2.048149178959087, "grad_norm": 0.31630271673202515, "learning_rate": 2.7610741500417627e-06, "loss": 0.3951, "step": 2453 }, { "epoch": 2.048984135819649, "grad_norm": 0.3273588716983795, "learning_rate": 2.756728215362493e-06, "loss": 0.3738, "step": 2454 }, { "epoch": 2.0498190926802113, "grad_norm": 0.3312573730945587, "learning_rate": 2.752384401522163e-06, "loss": 0.3711, "step": 2455 }, { "epoch": 2.050654049540774, "grad_norm": 0.3435448408126831, "learning_rate": 2.7480427126275143e-06, "loss": 0.4505, "step": 2456 }, { "epoch": 2.051489006401336, "grad_norm": 0.30860602855682373, "learning_rate": 2.7437031527832747e-06, "loss": 0.3838, "step": 2457 }, { "epoch": 2.052323963261898, "grad_norm": 0.31860944628715515, "learning_rate": 2.739365726092158e-06, "loss": 0.4111, "step": 2458 }, { "epoch": 2.0531589201224603, "grad_norm": 0.3091747760772705, "learning_rate": 2.7350304366548695e-06, "loss": 0.4032, "step": 2459 }, { "epoch": 2.0539938769830224, "grad_norm": 0.3383176922798157, "learning_rate": 2.73069728857008e-06, "loss": 0.4092, "step": 2460 }, { "epoch": 2.054828833843585, "grad_norm": 0.31087443232536316, "learning_rate": 2.726366285934451e-06, "loss": 0.3865, "step": 2461 }, { "epoch": 2.055663790704147, "grad_norm": 0.3068051040172577, "learning_rate": 2.722037432842605e-06, "loss": 0.3966, "step": 2462 }, { "epoch": 2.056498747564709, "grad_norm": 0.31685683131217957, "learning_rate": 2.7177107333871344e-06, "loss": 0.3828, "step": 2463 }, { "epoch": 2.0573337044252713, "grad_norm": 0.33679866790771484, "learning_rate": 2.713386191658601e-06, "loss": 0.3878, "step": 2464 }, { "epoch": 2.0581686612858334, "grad_norm": 0.3295711874961853, "learning_rate": 2.7090638117455204e-06, "loss": 0.4101, "step": 2465 }, { "epoch": 2.059003618146396, "grad_norm": 0.3123067021369934, "learning_rate": 2.704743597734365e-06, "loss": 0.4156, "step": 2466 }, { "epoch": 2.059838575006958, "grad_norm": 0.3049241602420807, "learning_rate": 2.700425553709565e-06, "loss": 0.369, "step": 2467 }, { "epoch": 2.0606735318675202, "grad_norm": 0.3122977018356323, "learning_rate": 2.6961096837534917e-06, "loss": 0.4223, "step": 2468 }, { "epoch": 2.0615084887280823, "grad_norm": 0.3244844973087311, "learning_rate": 2.691795991946466e-06, "loss": 0.4573, "step": 2469 }, { "epoch": 2.0623434455886445, "grad_norm": 0.30229607224464417, "learning_rate": 2.6874844823667466e-06, "loss": 0.37, "step": 2470 }, { "epoch": 2.0631784024492066, "grad_norm": 0.32495832443237305, "learning_rate": 2.68317515909053e-06, "loss": 0.4164, "step": 2471 }, { "epoch": 2.064013359309769, "grad_norm": 0.32275786995887756, "learning_rate": 2.678868026191949e-06, "loss": 0.3932, "step": 2472 }, { "epoch": 2.0648483161703313, "grad_norm": 0.31412404775619507, "learning_rate": 2.6745630877430607e-06, "loss": 0.3757, "step": 2473 }, { "epoch": 2.0656832730308934, "grad_norm": 0.3163294196128845, "learning_rate": 2.670260347813849e-06, "loss": 0.3649, "step": 2474 }, { "epoch": 2.0665182298914555, "grad_norm": 0.3082287609577179, "learning_rate": 2.665959810472219e-06, "loss": 0.4368, "step": 2475 }, { "epoch": 2.0673531867520176, "grad_norm": 0.34525951743125916, "learning_rate": 2.6616614797839967e-06, "loss": 0.4248, "step": 2476 }, { "epoch": 2.06818814361258, "grad_norm": 0.32075488567352295, "learning_rate": 2.657365359812919e-06, "loss": 0.3887, "step": 2477 }, { "epoch": 2.0690231004731423, "grad_norm": 0.2928124964237213, "learning_rate": 2.65307145462063e-06, "loss": 0.3911, "step": 2478 }, { "epoch": 2.0698580573337044, "grad_norm": 0.32739749550819397, "learning_rate": 2.648779768266689e-06, "loss": 0.4163, "step": 2479 }, { "epoch": 2.0706930141942665, "grad_norm": 0.31427001953125, "learning_rate": 2.6444903048085436e-06, "loss": 0.3533, "step": 2480 }, { "epoch": 2.0715279710548287, "grad_norm": 0.3297843039035797, "learning_rate": 2.6402030683015537e-06, "loss": 0.4077, "step": 2481 }, { "epoch": 2.0723629279153912, "grad_norm": 0.33433812856674194, "learning_rate": 2.6359180627989665e-06, "loss": 0.4071, "step": 2482 }, { "epoch": 2.0731978847759533, "grad_norm": 0.3189489543437958, "learning_rate": 2.631635292351919e-06, "loss": 0.3979, "step": 2483 }, { "epoch": 2.0740328416365155, "grad_norm": 0.2988552153110504, "learning_rate": 2.6273547610094408e-06, "loss": 0.3783, "step": 2484 }, { "epoch": 2.0748677984970776, "grad_norm": 0.3544017970561981, "learning_rate": 2.6230764728184406e-06, "loss": 0.3897, "step": 2485 }, { "epoch": 2.0757027553576397, "grad_norm": 0.2787191867828369, "learning_rate": 2.6188004318237038e-06, "loss": 0.3759, "step": 2486 }, { "epoch": 2.0765377122182023, "grad_norm": 0.3391836881637573, "learning_rate": 2.6145266420679007e-06, "loss": 0.4323, "step": 2487 }, { "epoch": 2.0773726690787644, "grad_norm": 0.3260510563850403, "learning_rate": 2.6102551075915595e-06, "loss": 0.4044, "step": 2488 }, { "epoch": 2.0782076259393265, "grad_norm": 0.30583691596984863, "learning_rate": 2.6059858324330888e-06, "loss": 0.3813, "step": 2489 }, { "epoch": 2.0790425827998886, "grad_norm": 0.29547491669654846, "learning_rate": 2.601718820628755e-06, "loss": 0.3731, "step": 2490 }, { "epoch": 2.0798775396604507, "grad_norm": 0.32247743010520935, "learning_rate": 2.5974540762126844e-06, "loss": 0.4189, "step": 2491 }, { "epoch": 2.080712496521013, "grad_norm": 0.31234055757522583, "learning_rate": 2.5931916032168626e-06, "loss": 0.3611, "step": 2492 }, { "epoch": 2.0815474533815754, "grad_norm": 0.34039685130119324, "learning_rate": 2.588931405671127e-06, "loss": 0.392, "step": 2493 }, { "epoch": 2.0823824102421375, "grad_norm": 0.31955286860466003, "learning_rate": 2.584673487603161e-06, "loss": 0.3681, "step": 2494 }, { "epoch": 2.0832173671026997, "grad_norm": 0.3391245901584625, "learning_rate": 2.5804178530384936e-06, "loss": 0.4235, "step": 2495 }, { "epoch": 2.084052323963262, "grad_norm": 0.3124650716781616, "learning_rate": 2.5761645060005004e-06, "loss": 0.3725, "step": 2496 }, { "epoch": 2.084887280823824, "grad_norm": 0.29285430908203125, "learning_rate": 2.571913450510387e-06, "loss": 0.3796, "step": 2497 }, { "epoch": 2.0857222376843865, "grad_norm": 0.31977054476737976, "learning_rate": 2.567664690587195e-06, "loss": 0.4056, "step": 2498 }, { "epoch": 2.0865571945449486, "grad_norm": 0.3436834514141083, "learning_rate": 2.5634182302478018e-06, "loss": 0.4109, "step": 2499 }, { "epoch": 2.0873921514055107, "grad_norm": 0.29610559344291687, "learning_rate": 2.559174073506897e-06, "loss": 0.3713, "step": 2500 }, { "epoch": 2.088227108266073, "grad_norm": 0.29370155930519104, "learning_rate": 2.554932224377007e-06, "loss": 0.3951, "step": 2501 }, { "epoch": 2.089062065126635, "grad_norm": 0.3160233795642853, "learning_rate": 2.5506926868684683e-06, "loss": 0.4068, "step": 2502 }, { "epoch": 2.0898970219871975, "grad_norm": 0.33544859290122986, "learning_rate": 2.5464554649894314e-06, "loss": 0.3949, "step": 2503 }, { "epoch": 2.0907319788477596, "grad_norm": 0.30473172664642334, "learning_rate": 2.5422205627458636e-06, "loss": 0.382, "step": 2504 }, { "epoch": 2.0915669357083218, "grad_norm": 0.2883310616016388, "learning_rate": 2.5379879841415334e-06, "loss": 0.3914, "step": 2505 }, { "epoch": 2.092401892568884, "grad_norm": 0.29687970876693726, "learning_rate": 2.533757733178014e-06, "loss": 0.3451, "step": 2506 }, { "epoch": 2.093236849429446, "grad_norm": 0.33005326986312866, "learning_rate": 2.529529813854683e-06, "loss": 0.4287, "step": 2507 }, { "epoch": 2.0940718062900086, "grad_norm": 0.32902100682258606, "learning_rate": 2.5253042301687015e-06, "loss": 0.4205, "step": 2508 }, { "epoch": 2.0949067631505707, "grad_norm": 0.3418387174606323, "learning_rate": 2.521080986115037e-06, "loss": 0.4247, "step": 2509 }, { "epoch": 2.095741720011133, "grad_norm": 0.29723918437957764, "learning_rate": 2.5168600856864354e-06, "loss": 0.3815, "step": 2510 }, { "epoch": 2.096576676871695, "grad_norm": 0.2958418130874634, "learning_rate": 2.5126415328734275e-06, "loss": 0.4128, "step": 2511 }, { "epoch": 2.097411633732257, "grad_norm": 0.3647449016571045, "learning_rate": 2.5084253316643314e-06, "loss": 0.4346, "step": 2512 }, { "epoch": 2.0982465905928196, "grad_norm": 0.3141944110393524, "learning_rate": 2.504211486045235e-06, "loss": 0.3621, "step": 2513 }, { "epoch": 2.0990815474533817, "grad_norm": 0.3234497904777527, "learning_rate": 2.5000000000000015e-06, "loss": 0.4319, "step": 2514 }, { "epoch": 2.099916504313944, "grad_norm": 0.31074216961860657, "learning_rate": 2.495790877510261e-06, "loss": 0.3672, "step": 2515 }, { "epoch": 2.100751461174506, "grad_norm": 0.30130934715270996, "learning_rate": 2.4915841225554154e-06, "loss": 0.3947, "step": 2516 }, { "epoch": 2.101586418035068, "grad_norm": 0.2979569137096405, "learning_rate": 2.487379739112623e-06, "loss": 0.3602, "step": 2517 }, { "epoch": 2.10242137489563, "grad_norm": 0.33304959535598755, "learning_rate": 2.4831777311567985e-06, "loss": 0.4004, "step": 2518 }, { "epoch": 2.1032563317561928, "grad_norm": 0.30456751585006714, "learning_rate": 2.4789781026606195e-06, "loss": 0.4087, "step": 2519 }, { "epoch": 2.104091288616755, "grad_norm": 0.30711039900779724, "learning_rate": 2.4747808575945006e-06, "loss": 0.4088, "step": 2520 }, { "epoch": 2.104926245477317, "grad_norm": 0.29603272676467896, "learning_rate": 2.4705859999266158e-06, "loss": 0.4023, "step": 2521 }, { "epoch": 2.105761202337879, "grad_norm": 0.2935551106929779, "learning_rate": 2.4663935336228745e-06, "loss": 0.3649, "step": 2522 }, { "epoch": 2.1065961591984412, "grad_norm": 0.3035936951637268, "learning_rate": 2.4622034626469265e-06, "loss": 0.3864, "step": 2523 }, { "epoch": 2.107431116059004, "grad_norm": 0.3174712061882019, "learning_rate": 2.4580157909601605e-06, "loss": 0.4428, "step": 2524 }, { "epoch": 2.108266072919566, "grad_norm": 0.2934063673019409, "learning_rate": 2.4538305225216936e-06, "loss": 0.3379, "step": 2525 }, { "epoch": 2.109101029780128, "grad_norm": 0.3555375039577484, "learning_rate": 2.4496476612883684e-06, "loss": 0.4126, "step": 2526 }, { "epoch": 2.10993598664069, "grad_norm": 0.3133206367492676, "learning_rate": 2.4454672112147617e-06, "loss": 0.3813, "step": 2527 }, { "epoch": 2.1107709435012523, "grad_norm": 0.30911704897880554, "learning_rate": 2.441289176253155e-06, "loss": 0.3958, "step": 2528 }, { "epoch": 2.111605900361815, "grad_norm": 0.3128792643547058, "learning_rate": 2.4371135603535613e-06, "loss": 0.3964, "step": 2529 }, { "epoch": 2.112440857222377, "grad_norm": 0.3104906976222992, "learning_rate": 2.4329403674636993e-06, "loss": 0.3764, "step": 2530 }, { "epoch": 2.113275814082939, "grad_norm": 0.33622875809669495, "learning_rate": 2.4287696015289976e-06, "loss": 0.4657, "step": 2531 }, { "epoch": 2.114110770943501, "grad_norm": 0.2916196882724762, "learning_rate": 2.4246012664925892e-06, "loss": 0.358, "step": 2532 }, { "epoch": 2.1149457278040633, "grad_norm": 0.36524254083633423, "learning_rate": 2.4204353662953146e-06, "loss": 0.4372, "step": 2533 }, { "epoch": 2.1157806846646254, "grad_norm": 0.3241313397884369, "learning_rate": 2.416271904875706e-06, "loss": 0.3882, "step": 2534 }, { "epoch": 2.116615641525188, "grad_norm": 0.3228190243244171, "learning_rate": 2.4121108861699903e-06, "loss": 0.3952, "step": 2535 }, { "epoch": 2.11745059838575, "grad_norm": 0.31358692049980164, "learning_rate": 2.407952314112093e-06, "loss": 0.3954, "step": 2536 }, { "epoch": 2.1182855552463122, "grad_norm": 0.3108291029930115, "learning_rate": 2.4037961926336127e-06, "loss": 0.3977, "step": 2537 }, { "epoch": 2.1191205121068744, "grad_norm": 0.29989275336265564, "learning_rate": 2.399642525663843e-06, "loss": 0.3756, "step": 2538 }, { "epoch": 2.1199554689674365, "grad_norm": 0.3399176597595215, "learning_rate": 2.395491317129756e-06, "loss": 0.4296, "step": 2539 }, { "epoch": 2.120790425827999, "grad_norm": 0.3138573169708252, "learning_rate": 2.3913425709559897e-06, "loss": 0.3902, "step": 2540 }, { "epoch": 2.121625382688561, "grad_norm": 0.3270610570907593, "learning_rate": 2.3871962910648653e-06, "loss": 0.4037, "step": 2541 }, { "epoch": 2.1224603395491233, "grad_norm": 0.35955485701560974, "learning_rate": 2.383052481376367e-06, "loss": 0.428, "step": 2542 }, { "epoch": 2.1232952964096854, "grad_norm": 0.3009285628795624, "learning_rate": 2.378911145808142e-06, "loss": 0.3763, "step": 2543 }, { "epoch": 2.1241302532702475, "grad_norm": 0.3552929759025574, "learning_rate": 2.3747722882755055e-06, "loss": 0.4387, "step": 2544 }, { "epoch": 2.12496521013081, "grad_norm": 0.29427656531333923, "learning_rate": 2.370635912691422e-06, "loss": 0.3589, "step": 2545 }, { "epoch": 2.125800166991372, "grad_norm": 0.33603182435035706, "learning_rate": 2.3665020229665138e-06, "loss": 0.4074, "step": 2546 }, { "epoch": 2.1266351238519343, "grad_norm": 0.30732905864715576, "learning_rate": 2.3623706230090517e-06, "loss": 0.4147, "step": 2547 }, { "epoch": 2.1274700807124964, "grad_norm": 0.3328792452812195, "learning_rate": 2.358241716724951e-06, "loss": 0.4078, "step": 2548 }, { "epoch": 2.1283050375730586, "grad_norm": 0.3135516047477722, "learning_rate": 2.354115308017776e-06, "loss": 0.3788, "step": 2549 }, { "epoch": 2.129139994433621, "grad_norm": 0.30739137530326843, "learning_rate": 2.3499914007887216e-06, "loss": 0.3928, "step": 2550 }, { "epoch": 2.1299749512941832, "grad_norm": 0.3278030455112457, "learning_rate": 2.3458699989366224e-06, "loss": 0.4036, "step": 2551 }, { "epoch": 2.1308099081547454, "grad_norm": 0.34515318274497986, "learning_rate": 2.3417511063579405e-06, "loss": 0.3886, "step": 2552 }, { "epoch": 2.1316448650153075, "grad_norm": 0.33474427461624146, "learning_rate": 2.3376347269467735e-06, "loss": 0.399, "step": 2553 }, { "epoch": 2.1324798218758696, "grad_norm": 0.32421231269836426, "learning_rate": 2.333520864594835e-06, "loss": 0.4037, "step": 2554 }, { "epoch": 2.133314778736432, "grad_norm": 0.29802343249320984, "learning_rate": 2.32940952319146e-06, "loss": 0.3829, "step": 2555 }, { "epoch": 2.1341497355969943, "grad_norm": 0.31839242577552795, "learning_rate": 2.325300706623607e-06, "loss": 0.3653, "step": 2556 }, { "epoch": 2.1349846924575564, "grad_norm": 0.30766016244888306, "learning_rate": 2.3211944187758358e-06, "loss": 0.3909, "step": 2557 }, { "epoch": 2.1358196493181185, "grad_norm": 0.35762009024620056, "learning_rate": 2.317090663530326e-06, "loss": 0.4301, "step": 2558 }, { "epoch": 2.1366546061786806, "grad_norm": 0.2998912036418915, "learning_rate": 2.3129894447668617e-06, "loss": 0.3585, "step": 2559 }, { "epoch": 2.137489563039243, "grad_norm": 0.32431697845458984, "learning_rate": 2.308890766362819e-06, "loss": 0.413, "step": 2560 }, { "epoch": 2.1383245198998053, "grad_norm": 0.29566043615341187, "learning_rate": 2.3047946321931853e-06, "loss": 0.3832, "step": 2561 }, { "epoch": 2.1391594767603674, "grad_norm": 0.2912890315055847, "learning_rate": 2.300701046130535e-06, "loss": 0.4203, "step": 2562 }, { "epoch": 2.1399944336209296, "grad_norm": 0.30018648505210876, "learning_rate": 2.296610012045033e-06, "loss": 0.396, "step": 2563 }, { "epoch": 2.1408293904814917, "grad_norm": 0.31132420897483826, "learning_rate": 2.2925215338044375e-06, "loss": 0.4142, "step": 2564 }, { "epoch": 2.141664347342054, "grad_norm": 0.32919567823410034, "learning_rate": 2.288435615274085e-06, "loss": 0.3906, "step": 2565 }, { "epoch": 2.1424993042026164, "grad_norm": 0.2913869321346283, "learning_rate": 2.284352260316894e-06, "loss": 0.3539, "step": 2566 }, { "epoch": 2.1433342610631785, "grad_norm": 0.3337686061859131, "learning_rate": 2.280271472793357e-06, "loss": 0.424, "step": 2567 }, { "epoch": 2.1441692179237406, "grad_norm": 0.32497507333755493, "learning_rate": 2.2761932565615418e-06, "loss": 0.4128, "step": 2568 }, { "epoch": 2.1450041747843027, "grad_norm": 0.2928534746170044, "learning_rate": 2.272117615477087e-06, "loss": 0.3667, "step": 2569 }, { "epoch": 2.145839131644865, "grad_norm": 0.31517931818962097, "learning_rate": 2.268044553393192e-06, "loss": 0.4162, "step": 2570 }, { "epoch": 2.1466740885054274, "grad_norm": 0.3097784221172333, "learning_rate": 2.2639740741606203e-06, "loss": 0.401, "step": 2571 }, { "epoch": 2.1475090453659895, "grad_norm": 0.3367779552936554, "learning_rate": 2.2599061816276917e-06, "loss": 0.3763, "step": 2572 }, { "epoch": 2.1483440022265516, "grad_norm": 0.32167086005210876, "learning_rate": 2.2558408796402864e-06, "loss": 0.3778, "step": 2573 }, { "epoch": 2.1491789590871138, "grad_norm": 0.38052982091903687, "learning_rate": 2.251778172041828e-06, "loss": 0.4124, "step": 2574 }, { "epoch": 2.150013915947676, "grad_norm": 0.3244275748729706, "learning_rate": 2.2477180626732897e-06, "loss": 0.3729, "step": 2575 }, { "epoch": 2.1508488728082384, "grad_norm": 0.3151280879974365, "learning_rate": 2.2436605553731938e-06, "loss": 0.4316, "step": 2576 }, { "epoch": 2.1516838296688006, "grad_norm": 0.30084335803985596, "learning_rate": 2.2396056539775906e-06, "loss": 0.4058, "step": 2577 }, { "epoch": 2.1525187865293627, "grad_norm": 0.307856023311615, "learning_rate": 2.2355533623200775e-06, "loss": 0.3836, "step": 2578 }, { "epoch": 2.153353743389925, "grad_norm": 0.3266623914241791, "learning_rate": 2.2315036842317854e-06, "loss": 0.3557, "step": 2579 }, { "epoch": 2.154188700250487, "grad_norm": 0.33636683225631714, "learning_rate": 2.2274566235413618e-06, "loss": 0.4194, "step": 2580 }, { "epoch": 2.155023657111049, "grad_norm": 0.30781587958335876, "learning_rate": 2.2234121840749932e-06, "loss": 0.3678, "step": 2581 }, { "epoch": 2.1558586139716116, "grad_norm": 0.3244486153125763, "learning_rate": 2.2193703696563805e-06, "loss": 0.4238, "step": 2582 }, { "epoch": 2.1566935708321737, "grad_norm": 0.3471379578113556, "learning_rate": 2.2153311841067438e-06, "loss": 0.4226, "step": 2583 }, { "epoch": 2.157528527692736, "grad_norm": 0.3004728853702545, "learning_rate": 2.2112946312448218e-06, "loss": 0.3299, "step": 2584 }, { "epoch": 2.158363484553298, "grad_norm": 0.3533618450164795, "learning_rate": 2.207260714886859e-06, "loss": 0.4047, "step": 2585 }, { "epoch": 2.15919844141386, "grad_norm": 0.3524574935436249, "learning_rate": 2.20322943884661e-06, "loss": 0.445, "step": 2586 }, { "epoch": 2.1600333982744226, "grad_norm": 0.32805442810058594, "learning_rate": 2.1992008069353325e-06, "loss": 0.4066, "step": 2587 }, { "epoch": 2.1608683551349848, "grad_norm": 0.2837461233139038, "learning_rate": 2.195174822961783e-06, "loss": 0.3293, "step": 2588 }, { "epoch": 2.161703311995547, "grad_norm": 0.33706167340278625, "learning_rate": 2.1911514907322196e-06, "loss": 0.4078, "step": 2589 }, { "epoch": 2.162538268856109, "grad_norm": 0.30405813455581665, "learning_rate": 2.1871308140503883e-06, "loss": 0.3878, "step": 2590 }, { "epoch": 2.163373225716671, "grad_norm": 0.3093661665916443, "learning_rate": 2.183112796717525e-06, "loss": 0.3727, "step": 2591 }, { "epoch": 2.1642081825772337, "grad_norm": 0.3256365656852722, "learning_rate": 2.179097442532352e-06, "loss": 0.3858, "step": 2592 }, { "epoch": 2.165043139437796, "grad_norm": 0.3368619978427887, "learning_rate": 2.175084755291077e-06, "loss": 0.4192, "step": 2593 }, { "epoch": 2.165878096298358, "grad_norm": 0.2937866151332855, "learning_rate": 2.1710747387873822e-06, "loss": 0.3692, "step": 2594 }, { "epoch": 2.16671305315892, "grad_norm": 0.33173251152038574, "learning_rate": 2.1670673968124243e-06, "loss": 0.411, "step": 2595 }, { "epoch": 2.167548010019482, "grad_norm": 0.3224278688430786, "learning_rate": 2.1630627331548392e-06, "loss": 0.3476, "step": 2596 }, { "epoch": 2.1683829668800447, "grad_norm": 0.3324386179447174, "learning_rate": 2.159060751600717e-06, "loss": 0.3956, "step": 2597 }, { "epoch": 2.169217923740607, "grad_norm": 0.32707643508911133, "learning_rate": 2.155061455933625e-06, "loss": 0.3938, "step": 2598 }, { "epoch": 2.170052880601169, "grad_norm": 0.324494332075119, "learning_rate": 2.1510648499345856e-06, "loss": 0.4117, "step": 2599 }, { "epoch": 2.170887837461731, "grad_norm": 0.32096657156944275, "learning_rate": 2.147070937382076e-06, "loss": 0.4054, "step": 2600 }, { "epoch": 2.171722794322293, "grad_norm": 0.3229570686817169, "learning_rate": 2.143079722052034e-06, "loss": 0.3553, "step": 2601 }, { "epoch": 2.1725577511828558, "grad_norm": 0.30216896533966064, "learning_rate": 2.1390912077178405e-06, "loss": 0.3647, "step": 2602 }, { "epoch": 2.173392708043418, "grad_norm": 0.34084397554397583, "learning_rate": 2.1351053981503238e-06, "loss": 0.4252, "step": 2603 }, { "epoch": 2.17422766490398, "grad_norm": 0.30552107095718384, "learning_rate": 2.1311222971177614e-06, "loss": 0.398, "step": 2604 }, { "epoch": 2.175062621764542, "grad_norm": 0.3078589141368866, "learning_rate": 2.127141908385863e-06, "loss": 0.3707, "step": 2605 }, { "epoch": 2.1758975786251042, "grad_norm": 0.3321577310562134, "learning_rate": 2.1231642357177764e-06, "loss": 0.4606, "step": 2606 }, { "epoch": 2.176732535485667, "grad_norm": 0.3208577632904053, "learning_rate": 2.119189282874081e-06, "loss": 0.3784, "step": 2607 }, { "epoch": 2.177567492346229, "grad_norm": 0.3212796151638031, "learning_rate": 2.115217053612785e-06, "loss": 0.423, "step": 2608 }, { "epoch": 2.178402449206791, "grad_norm": 0.2974821627140045, "learning_rate": 2.1112475516893247e-06, "loss": 0.3598, "step": 2609 }, { "epoch": 2.179237406067353, "grad_norm": 0.31853047013282776, "learning_rate": 2.1072807808565547e-06, "loss": 0.39, "step": 2610 }, { "epoch": 2.1800723629279153, "grad_norm": 0.31891778111457825, "learning_rate": 2.103316744864747e-06, "loss": 0.3789, "step": 2611 }, { "epoch": 2.1809073197884774, "grad_norm": 0.3078772723674774, "learning_rate": 2.0993554474615872e-06, "loss": 0.4181, "step": 2612 }, { "epoch": 2.18174227664904, "grad_norm": 0.3259728252887726, "learning_rate": 2.0953968923921784e-06, "loss": 0.4337, "step": 2613 }, { "epoch": 2.182577233509602, "grad_norm": 0.3181164860725403, "learning_rate": 2.0914410833990246e-06, "loss": 0.3963, "step": 2614 }, { "epoch": 2.183412190370164, "grad_norm": 0.2905791401863098, "learning_rate": 2.0874880242220324e-06, "loss": 0.389, "step": 2615 }, { "epoch": 2.1842471472307263, "grad_norm": 0.33251023292541504, "learning_rate": 2.083537718598519e-06, "loss": 0.4108, "step": 2616 }, { "epoch": 2.1850821040912884, "grad_norm": 0.30680111050605774, "learning_rate": 2.079590170263182e-06, "loss": 0.4011, "step": 2617 }, { "epoch": 2.185917060951851, "grad_norm": 0.3164617121219635, "learning_rate": 2.0756453829481278e-06, "loss": 0.3908, "step": 2618 }, { "epoch": 2.186752017812413, "grad_norm": 0.2916562259197235, "learning_rate": 2.0717033603828436e-06, "loss": 0.396, "step": 2619 }, { "epoch": 2.1875869746729752, "grad_norm": 0.3289824426174164, "learning_rate": 2.0677641062942033e-06, "loss": 0.4212, "step": 2620 }, { "epoch": 2.1884219315335374, "grad_norm": 0.28643471002578735, "learning_rate": 2.0638276244064685e-06, "loss": 0.3711, "step": 2621 }, { "epoch": 2.1892568883940995, "grad_norm": 0.31038281321525574, "learning_rate": 2.059893918441276e-06, "loss": 0.3938, "step": 2622 }, { "epoch": 2.1900918452546616, "grad_norm": 0.31465259194374084, "learning_rate": 2.055962992117637e-06, "loss": 0.3684, "step": 2623 }, { "epoch": 2.190926802115224, "grad_norm": 0.3215372562408447, "learning_rate": 2.052034849151936e-06, "loss": 0.3836, "step": 2624 }, { "epoch": 2.1917617589757863, "grad_norm": 0.3086021840572357, "learning_rate": 2.048109493257929e-06, "loss": 0.3948, "step": 2625 }, { "epoch": 2.1925967158363484, "grad_norm": 0.3032572865486145, "learning_rate": 2.0441869281467343e-06, "loss": 0.4145, "step": 2626 }, { "epoch": 2.1934316726969105, "grad_norm": 0.29899391531944275, "learning_rate": 2.0402671575268307e-06, "loss": 0.3753, "step": 2627 }, { "epoch": 2.1942666295574726, "grad_norm": 0.322461873292923, "learning_rate": 2.0363501851040573e-06, "loss": 0.4222, "step": 2628 }, { "epoch": 2.195101586418035, "grad_norm": 0.28658974170684814, "learning_rate": 2.032436014581604e-06, "loss": 0.3673, "step": 2629 }, { "epoch": 2.1959365432785973, "grad_norm": 0.29876959323883057, "learning_rate": 2.0285246496600188e-06, "loss": 0.3981, "step": 2630 }, { "epoch": 2.1967715001391594, "grad_norm": 0.3165908455848694, "learning_rate": 2.0246160940371913e-06, "loss": 0.3722, "step": 2631 }, { "epoch": 2.1976064569997216, "grad_norm": 0.31636154651641846, "learning_rate": 2.020710351408354e-06, "loss": 0.4281, "step": 2632 }, { "epoch": 2.1984414138602837, "grad_norm": 0.3169400691986084, "learning_rate": 2.0168074254660892e-06, "loss": 0.4153, "step": 2633 }, { "epoch": 2.1992763707208463, "grad_norm": 0.28989097476005554, "learning_rate": 2.012907319900302e-06, "loss": 0.3724, "step": 2634 }, { "epoch": 2.2001113275814084, "grad_norm": 0.2951287627220154, "learning_rate": 2.009010038398242e-06, "loss": 0.4086, "step": 2635 }, { "epoch": 2.2009462844419705, "grad_norm": 0.329906702041626, "learning_rate": 2.00511558464449e-06, "loss": 0.3903, "step": 2636 }, { "epoch": 2.2017812413025326, "grad_norm": 0.3198779821395874, "learning_rate": 2.001223962320941e-06, "loss": 0.4324, "step": 2637 }, { "epoch": 2.2026161981630947, "grad_norm": 0.3047266900539398, "learning_rate": 1.997335175106827e-06, "loss": 0.3914, "step": 2638 }, { "epoch": 2.2034511550236573, "grad_norm": 0.32029902935028076, "learning_rate": 1.9934492266786927e-06, "loss": 0.4457, "step": 2639 }, { "epoch": 2.2042861118842194, "grad_norm": 0.333283394575119, "learning_rate": 1.9895661207103967e-06, "loss": 0.4262, "step": 2640 }, { "epoch": 2.2051210687447815, "grad_norm": 0.31652265787124634, "learning_rate": 1.9856858608731184e-06, "loss": 0.4105, "step": 2641 }, { "epoch": 2.2059560256053437, "grad_norm": 0.2994423508644104, "learning_rate": 1.981808450835339e-06, "loss": 0.386, "step": 2642 }, { "epoch": 2.2067909824659058, "grad_norm": 0.3136173188686371, "learning_rate": 1.9779338942628484e-06, "loss": 0.4054, "step": 2643 }, { "epoch": 2.2076259393264683, "grad_norm": 0.3074556589126587, "learning_rate": 1.9740621948187384e-06, "loss": 0.4049, "step": 2644 }, { "epoch": 2.2084608961870305, "grad_norm": 0.28614309430122375, "learning_rate": 1.970193356163398e-06, "loss": 0.4116, "step": 2645 }, { "epoch": 2.2092958530475926, "grad_norm": 0.29773280024528503, "learning_rate": 1.9663273819545157e-06, "loss": 0.401, "step": 2646 }, { "epoch": 2.2101308099081547, "grad_norm": 0.298500657081604, "learning_rate": 1.962464275847069e-06, "loss": 0.3802, "step": 2647 }, { "epoch": 2.210965766768717, "grad_norm": 0.3222505450248718, "learning_rate": 1.9586040414933233e-06, "loss": 0.3825, "step": 2648 }, { "epoch": 2.2118007236292794, "grad_norm": 0.30687645077705383, "learning_rate": 1.954746682542828e-06, "loss": 0.4045, "step": 2649 }, { "epoch": 2.2126356804898415, "grad_norm": 0.30725476145744324, "learning_rate": 1.95089220264242e-06, "loss": 0.3802, "step": 2650 }, { "epoch": 2.2134706373504036, "grad_norm": 0.2782883048057556, "learning_rate": 1.947040605436208e-06, "loss": 0.3684, "step": 2651 }, { "epoch": 2.2143055942109657, "grad_norm": 0.3233407437801361, "learning_rate": 1.943191894565576e-06, "loss": 0.4205, "step": 2652 }, { "epoch": 2.215140551071528, "grad_norm": 0.30732694268226624, "learning_rate": 1.9393460736691848e-06, "loss": 0.3934, "step": 2653 }, { "epoch": 2.21597550793209, "grad_norm": 0.3063552975654602, "learning_rate": 1.935503146382954e-06, "loss": 0.339, "step": 2654 }, { "epoch": 2.2168104647926525, "grad_norm": 0.3660931885242462, "learning_rate": 1.931663116340074e-06, "loss": 0.4102, "step": 2655 }, { "epoch": 2.2176454216532147, "grad_norm": 0.3476693630218506, "learning_rate": 1.9278259871709985e-06, "loss": 0.4382, "step": 2656 }, { "epoch": 2.2184803785137768, "grad_norm": 0.31445521116256714, "learning_rate": 1.923991762503428e-06, "loss": 0.3898, "step": 2657 }, { "epoch": 2.219315335374339, "grad_norm": 0.32259172201156616, "learning_rate": 1.9201604459623284e-06, "loss": 0.3953, "step": 2658 }, { "epoch": 2.220150292234901, "grad_norm": 0.32930395007133484, "learning_rate": 1.91633204116991e-06, "loss": 0.377, "step": 2659 }, { "epoch": 2.2209852490954636, "grad_norm": 0.3378429710865021, "learning_rate": 1.91250655174563e-06, "loss": 0.4002, "step": 2660 }, { "epoch": 2.2218202059560257, "grad_norm": 0.307620644569397, "learning_rate": 1.9086839813061938e-06, "loss": 0.3588, "step": 2661 }, { "epoch": 2.222655162816588, "grad_norm": 0.3339415192604065, "learning_rate": 1.9048643334655431e-06, "loss": 0.4158, "step": 2662 }, { "epoch": 2.22349011967715, "grad_norm": 0.3300795555114746, "learning_rate": 1.9010476118348576e-06, "loss": 0.3843, "step": 2663 }, { "epoch": 2.224325076537712, "grad_norm": 0.32692480087280273, "learning_rate": 1.8972338200225509e-06, "loss": 0.412, "step": 2664 }, { "epoch": 2.2251600333982746, "grad_norm": 0.350928395986557, "learning_rate": 1.8934229616342647e-06, "loss": 0.4019, "step": 2665 }, { "epoch": 2.2259949902588367, "grad_norm": 0.3247718811035156, "learning_rate": 1.8896150402728725e-06, "loss": 0.3936, "step": 2666 }, { "epoch": 2.226829947119399, "grad_norm": 0.3096902072429657, "learning_rate": 1.8858100595384654e-06, "loss": 0.4197, "step": 2667 }, { "epoch": 2.227664903979961, "grad_norm": 0.3093835413455963, "learning_rate": 1.8820080230283577e-06, "loss": 0.3927, "step": 2668 }, { "epoch": 2.228499860840523, "grad_norm": 0.3220185339450836, "learning_rate": 1.8782089343370762e-06, "loss": 0.4275, "step": 2669 }, { "epoch": 2.229334817701085, "grad_norm": 0.323256254196167, "learning_rate": 1.8744127970563685e-06, "loss": 0.386, "step": 2670 }, { "epoch": 2.2301697745616478, "grad_norm": 0.31182682514190674, "learning_rate": 1.8706196147751844e-06, "loss": 0.3841, "step": 2671 }, { "epoch": 2.23100473142221, "grad_norm": 0.3332628607749939, "learning_rate": 1.8668293910796809e-06, "loss": 0.401, "step": 2672 }, { "epoch": 2.231839688282772, "grad_norm": 0.31989091634750366, "learning_rate": 1.8630421295532252e-06, "loss": 0.3755, "step": 2673 }, { "epoch": 2.232674645143334, "grad_norm": 0.3338984251022339, "learning_rate": 1.859257833776371e-06, "loss": 0.4221, "step": 2674 }, { "epoch": 2.2335096020038963, "grad_norm": 0.30847156047821045, "learning_rate": 1.8554765073268794e-06, "loss": 0.3857, "step": 2675 }, { "epoch": 2.234344558864459, "grad_norm": 0.3104780614376068, "learning_rate": 1.8516981537797042e-06, "loss": 0.4191, "step": 2676 }, { "epoch": 2.235179515725021, "grad_norm": 0.3017040491104126, "learning_rate": 1.8479227767069768e-06, "loss": 0.3685, "step": 2677 }, { "epoch": 2.236014472585583, "grad_norm": 0.33131253719329834, "learning_rate": 1.8441503796780287e-06, "loss": 0.3936, "step": 2678 }, { "epoch": 2.236849429446145, "grad_norm": 0.3160119950771332, "learning_rate": 1.8403809662593646e-06, "loss": 0.4047, "step": 2679 }, { "epoch": 2.2376843863067073, "grad_norm": 0.2979438304901123, "learning_rate": 1.8366145400146718e-06, "loss": 0.364, "step": 2680 }, { "epoch": 2.23851934316727, "grad_norm": 0.2909319996833801, "learning_rate": 1.8328511045048153e-06, "loss": 0.4052, "step": 2681 }, { "epoch": 2.239354300027832, "grad_norm": 0.30689340829849243, "learning_rate": 1.8290906632878297e-06, "loss": 0.4123, "step": 2682 }, { "epoch": 2.240189256888394, "grad_norm": 0.2819206118583679, "learning_rate": 1.825333219918919e-06, "loss": 0.366, "step": 2683 }, { "epoch": 2.241024213748956, "grad_norm": 0.3317341208457947, "learning_rate": 1.8215787779504534e-06, "loss": 0.4305, "step": 2684 }, { "epoch": 2.2418591706095183, "grad_norm": 0.31437286734580994, "learning_rate": 1.8178273409319646e-06, "loss": 0.4263, "step": 2685 }, { "epoch": 2.242694127470081, "grad_norm": 0.32102739810943604, "learning_rate": 1.8140789124101483e-06, "loss": 0.3618, "step": 2686 }, { "epoch": 2.243529084330643, "grad_norm": 0.30961787700653076, "learning_rate": 1.8103334959288488e-06, "loss": 0.38, "step": 2687 }, { "epoch": 2.244364041191205, "grad_norm": 0.3202042877674103, "learning_rate": 1.8065910950290672e-06, "loss": 0.3814, "step": 2688 }, { "epoch": 2.2451989980517673, "grad_norm": 0.309676855802536, "learning_rate": 1.8028517132489504e-06, "loss": 0.4139, "step": 2689 }, { "epoch": 2.2460339549123294, "grad_norm": 0.2906240224838257, "learning_rate": 1.799115354123796e-06, "loss": 0.3591, "step": 2690 }, { "epoch": 2.246868911772892, "grad_norm": 0.30472031235694885, "learning_rate": 1.7953820211860395e-06, "loss": 0.4036, "step": 2691 }, { "epoch": 2.247703868633454, "grad_norm": 0.3219873309135437, "learning_rate": 1.7916517179652548e-06, "loss": 0.4397, "step": 2692 }, { "epoch": 2.248538825494016, "grad_norm": 0.3230821490287781, "learning_rate": 1.7879244479881585e-06, "loss": 0.3956, "step": 2693 }, { "epoch": 2.2493737823545783, "grad_norm": 0.28681400418281555, "learning_rate": 1.7842002147785882e-06, "loss": 0.3663, "step": 2694 }, { "epoch": 2.2502087392151404, "grad_norm": 0.32560089230537415, "learning_rate": 1.7804790218575196e-06, "loss": 0.4139, "step": 2695 }, { "epoch": 2.251043696075703, "grad_norm": 0.29812395572662354, "learning_rate": 1.7767608727430542e-06, "loss": 0.3473, "step": 2696 }, { "epoch": 2.251878652936265, "grad_norm": 0.3004838824272156, "learning_rate": 1.7730457709504057e-06, "loss": 0.4144, "step": 2697 }, { "epoch": 2.252713609796827, "grad_norm": 0.3022960424423218, "learning_rate": 1.7693337199919198e-06, "loss": 0.3972, "step": 2698 }, { "epoch": 2.2535485666573893, "grad_norm": 0.2923767566680908, "learning_rate": 1.7656247233770496e-06, "loss": 0.3523, "step": 2699 }, { "epoch": 2.2543835235179515, "grad_norm": 0.3135433793067932, "learning_rate": 1.7619187846123624e-06, "loss": 0.4336, "step": 2700 }, { "epoch": 2.255218480378514, "grad_norm": 0.3025234043598175, "learning_rate": 1.7582159072015347e-06, "loss": 0.3661, "step": 2701 }, { "epoch": 2.256053437239076, "grad_norm": 0.3264736533164978, "learning_rate": 1.754516094645351e-06, "loss": 0.4465, "step": 2702 }, { "epoch": 2.2568883940996383, "grad_norm": 0.31405046582221985, "learning_rate": 1.7508193504416954e-06, "loss": 0.3605, "step": 2703 }, { "epoch": 2.2577233509602004, "grad_norm": 0.2765679657459259, "learning_rate": 1.7471256780855518e-06, "loss": 0.3324, "step": 2704 }, { "epoch": 2.2585583078207625, "grad_norm": 0.32217633724212646, "learning_rate": 1.7434350810689993e-06, "loss": 0.3932, "step": 2705 }, { "epoch": 2.2593932646813246, "grad_norm": 0.29159262776374817, "learning_rate": 1.7397475628812088e-06, "loss": 0.3885, "step": 2706 }, { "epoch": 2.260228221541887, "grad_norm": 0.3388075828552246, "learning_rate": 1.736063127008446e-06, "loss": 0.4406, "step": 2707 }, { "epoch": 2.2610631784024493, "grad_norm": 0.31369054317474365, "learning_rate": 1.7323817769340561e-06, "loss": 0.4232, "step": 2708 }, { "epoch": 2.2618981352630114, "grad_norm": 0.29342690110206604, "learning_rate": 1.7287035161384673e-06, "loss": 0.3806, "step": 2709 }, { "epoch": 2.2627330921235735, "grad_norm": 0.33399075269699097, "learning_rate": 1.7250283480991947e-06, "loss": 0.4229, "step": 2710 }, { "epoch": 2.2635680489841357, "grad_norm": 0.3300558030605316, "learning_rate": 1.7213562762908164e-06, "loss": 0.3839, "step": 2711 }, { "epoch": 2.264403005844698, "grad_norm": 0.3156859576702118, "learning_rate": 1.7176873041849939e-06, "loss": 0.3978, "step": 2712 }, { "epoch": 2.2652379627052603, "grad_norm": 0.30910879373550415, "learning_rate": 1.7140214352504591e-06, "loss": 0.3923, "step": 2713 }, { "epoch": 2.2660729195658225, "grad_norm": 0.29003867506980896, "learning_rate": 1.710358672952998e-06, "loss": 0.3544, "step": 2714 }, { "epoch": 2.2669078764263846, "grad_norm": 0.30642569065093994, "learning_rate": 1.7066990207554735e-06, "loss": 0.4019, "step": 2715 }, { "epoch": 2.2677428332869467, "grad_norm": 0.30524909496307373, "learning_rate": 1.7030424821177998e-06, "loss": 0.4133, "step": 2716 }, { "epoch": 2.268577790147509, "grad_norm": 0.2884988784790039, "learning_rate": 1.6993890604969476e-06, "loss": 0.3448, "step": 2717 }, { "epoch": 2.2694127470080714, "grad_norm": 0.30721575021743774, "learning_rate": 1.695738759346947e-06, "loss": 0.4207, "step": 2718 }, { "epoch": 2.2702477038686335, "grad_norm": 0.30576497316360474, "learning_rate": 1.6920915821188716e-06, "loss": 0.3901, "step": 2719 }, { "epoch": 2.2710826607291956, "grad_norm": 0.315360426902771, "learning_rate": 1.688447532260844e-06, "loss": 0.4172, "step": 2720 }, { "epoch": 2.2719176175897577, "grad_norm": 0.31128066778182983, "learning_rate": 1.6848066132180279e-06, "loss": 0.3615, "step": 2721 }, { "epoch": 2.27275257445032, "grad_norm": 0.29212939739227295, "learning_rate": 1.6811688284326321e-06, "loss": 0.3562, "step": 2722 }, { "epoch": 2.2735875313108824, "grad_norm": 0.3063744008541107, "learning_rate": 1.6775341813438977e-06, "loss": 0.3724, "step": 2723 }, { "epoch": 2.2744224881714445, "grad_norm": 0.31816935539245605, "learning_rate": 1.673902675388101e-06, "loss": 0.4121, "step": 2724 }, { "epoch": 2.2752574450320067, "grad_norm": 0.2824072539806366, "learning_rate": 1.6702743139985477e-06, "loss": 0.3616, "step": 2725 }, { "epoch": 2.276092401892569, "grad_norm": 0.30112892389297485, "learning_rate": 1.6666491006055702e-06, "loss": 0.3862, "step": 2726 }, { "epoch": 2.276927358753131, "grad_norm": 0.29776522517204285, "learning_rate": 1.6630270386365288e-06, "loss": 0.3872, "step": 2727 }, { "epoch": 2.2777623156136935, "grad_norm": 0.3379971385002136, "learning_rate": 1.6594081315157996e-06, "loss": 0.4381, "step": 2728 }, { "epoch": 2.2785972724742556, "grad_norm": 0.3062693476676941, "learning_rate": 1.6557923826647754e-06, "loss": 0.3676, "step": 2729 }, { "epoch": 2.2794322293348177, "grad_norm": 0.3045791685581207, "learning_rate": 1.6521797955018714e-06, "loss": 0.4121, "step": 2730 }, { "epoch": 2.28026718619538, "grad_norm": 0.32432639598846436, "learning_rate": 1.6485703734425012e-06, "loss": 0.4295, "step": 2731 }, { "epoch": 2.281102143055942, "grad_norm": 0.298481822013855, "learning_rate": 1.644964119899095e-06, "loss": 0.3336, "step": 2732 }, { "epoch": 2.2819370999165045, "grad_norm": 0.34202298521995544, "learning_rate": 1.6413610382810885e-06, "loss": 0.4256, "step": 2733 }, { "epoch": 2.2827720567770666, "grad_norm": 0.319119930267334, "learning_rate": 1.6377611319949087e-06, "loss": 0.3876, "step": 2734 }, { "epoch": 2.2836070136376287, "grad_norm": 0.3322019577026367, "learning_rate": 1.6341644044439914e-06, "loss": 0.4348, "step": 2735 }, { "epoch": 2.284441970498191, "grad_norm": 0.30938324332237244, "learning_rate": 1.6305708590287616e-06, "loss": 0.3719, "step": 2736 }, { "epoch": 2.285276927358753, "grad_norm": 0.29136449098587036, "learning_rate": 1.6269804991466353e-06, "loss": 0.347, "step": 2737 }, { "epoch": 2.2861118842193155, "grad_norm": 0.31038206815719604, "learning_rate": 1.623393328192021e-06, "loss": 0.4412, "step": 2738 }, { "epoch": 2.2869468410798777, "grad_norm": 0.2983740568161011, "learning_rate": 1.6198093495563089e-06, "loss": 0.3883, "step": 2739 }, { "epoch": 2.28778179794044, "grad_norm": 0.3343299627304077, "learning_rate": 1.616228566627871e-06, "loss": 0.4259, "step": 2740 }, { "epoch": 2.288616754801002, "grad_norm": 0.2916144132614136, "learning_rate": 1.6126509827920583e-06, "loss": 0.3659, "step": 2741 }, { "epoch": 2.289451711661564, "grad_norm": 0.28083813190460205, "learning_rate": 1.6090766014312009e-06, "loss": 0.3793, "step": 2742 }, { "epoch": 2.2902866685221266, "grad_norm": 0.29068684577941895, "learning_rate": 1.6055054259245966e-06, "loss": 0.3757, "step": 2743 }, { "epoch": 2.2911216253826887, "grad_norm": 0.3483028709888458, "learning_rate": 1.6019374596485137e-06, "loss": 0.4349, "step": 2744 }, { "epoch": 2.291956582243251, "grad_norm": 0.3043074607849121, "learning_rate": 1.5983727059761873e-06, "loss": 0.3731, "step": 2745 }, { "epoch": 2.292791539103813, "grad_norm": 0.3103722929954529, "learning_rate": 1.5948111682778129e-06, "loss": 0.4123, "step": 2746 }, { "epoch": 2.293626495964375, "grad_norm": 0.30872997641563416, "learning_rate": 1.5912528499205498e-06, "loss": 0.4129, "step": 2747 }, { "epoch": 2.294461452824937, "grad_norm": 0.27706190943717957, "learning_rate": 1.5876977542685107e-06, "loss": 0.3714, "step": 2748 }, { "epoch": 2.2952964096854997, "grad_norm": 0.3263702988624573, "learning_rate": 1.5841458846827596e-06, "loss": 0.4256, "step": 2749 }, { "epoch": 2.296131366546062, "grad_norm": 0.27311259508132935, "learning_rate": 1.580597244521318e-06, "loss": 0.3802, "step": 2750 }, { "epoch": 2.296966323406624, "grad_norm": 0.3354285657405853, "learning_rate": 1.5770518371391425e-06, "loss": 0.4573, "step": 2751 }, { "epoch": 2.297801280267186, "grad_norm": 0.32990843057632446, "learning_rate": 1.5735096658881427e-06, "loss": 0.4456, "step": 2752 }, { "epoch": 2.2986362371277482, "grad_norm": 0.2928880751132965, "learning_rate": 1.56997073411717e-06, "loss": 0.3793, "step": 2753 }, { "epoch": 2.299471193988311, "grad_norm": 0.3008490204811096, "learning_rate": 1.5664350451720022e-06, "loss": 0.4096, "step": 2754 }, { "epoch": 2.300306150848873, "grad_norm": 0.3253270387649536, "learning_rate": 1.5629026023953626e-06, "loss": 0.3964, "step": 2755 }, { "epoch": 2.301141107709435, "grad_norm": 0.35446926951408386, "learning_rate": 1.5593734091269002e-06, "loss": 0.4307, "step": 2756 }, { "epoch": 2.301976064569997, "grad_norm": 0.32058390974998474, "learning_rate": 1.5558474687031905e-06, "loss": 0.367, "step": 2757 }, { "epoch": 2.3028110214305593, "grad_norm": 0.3309679329395294, "learning_rate": 1.5523247844577394e-06, "loss": 0.3815, "step": 2758 }, { "epoch": 2.3036459782911214, "grad_norm": 0.2991712987422943, "learning_rate": 1.5488053597209684e-06, "loss": 0.3982, "step": 2759 }, { "epoch": 2.304480935151684, "grad_norm": 0.3020462095737457, "learning_rate": 1.5452891978202205e-06, "loss": 0.39, "step": 2760 }, { "epoch": 2.305315892012246, "grad_norm": 0.30919405817985535, "learning_rate": 1.5417763020797516e-06, "loss": 0.385, "step": 2761 }, { "epoch": 2.306150848872808, "grad_norm": 0.3161711096763611, "learning_rate": 1.5382666758207344e-06, "loss": 0.3947, "step": 2762 }, { "epoch": 2.3069858057333703, "grad_norm": 0.30172014236450195, "learning_rate": 1.5347603223612462e-06, "loss": 0.3828, "step": 2763 }, { "epoch": 2.3078207625939324, "grad_norm": 0.3083163797855377, "learning_rate": 1.5312572450162705e-06, "loss": 0.3881, "step": 2764 }, { "epoch": 2.308655719454495, "grad_norm": 0.2900938093662262, "learning_rate": 1.5277574470976948e-06, "loss": 0.4061, "step": 2765 }, { "epoch": 2.309490676315057, "grad_norm": 0.3038583993911743, "learning_rate": 1.5242609319143043e-06, "loss": 0.3993, "step": 2766 }, { "epoch": 2.3103256331756192, "grad_norm": 0.2955843508243561, "learning_rate": 1.5207677027717854e-06, "loss": 0.3627, "step": 2767 }, { "epoch": 2.3111605900361814, "grad_norm": 0.3058319091796875, "learning_rate": 1.5172777629727126e-06, "loss": 0.4005, "step": 2768 }, { "epoch": 2.3119955468967435, "grad_norm": 0.33200281858444214, "learning_rate": 1.5137911158165509e-06, "loss": 0.3898, "step": 2769 }, { "epoch": 2.312830503757306, "grad_norm": 0.2989174723625183, "learning_rate": 1.5103077645996588e-06, "loss": 0.3836, "step": 2770 }, { "epoch": 2.313665460617868, "grad_norm": 0.32909590005874634, "learning_rate": 1.5068277126152674e-06, "loss": 0.4142, "step": 2771 }, { "epoch": 2.3145004174784303, "grad_norm": 0.2980722486972809, "learning_rate": 1.5033509631534986e-06, "loss": 0.3762, "step": 2772 }, { "epoch": 2.3153353743389924, "grad_norm": 0.31902480125427246, "learning_rate": 1.4998775195013515e-06, "loss": 0.436, "step": 2773 }, { "epoch": 2.3161703311995545, "grad_norm": 0.30820226669311523, "learning_rate": 1.4964073849426902e-06, "loss": 0.3714, "step": 2774 }, { "epoch": 2.317005288060117, "grad_norm": 0.3078463673591614, "learning_rate": 1.4929405627582617e-06, "loss": 0.4052, "step": 2775 }, { "epoch": 2.317840244920679, "grad_norm": 0.2863188683986664, "learning_rate": 1.4894770562256755e-06, "loss": 0.4015, "step": 2776 }, { "epoch": 2.3186752017812413, "grad_norm": 0.30638736486434937, "learning_rate": 1.4860168686194054e-06, "loss": 0.3728, "step": 2777 }, { "epoch": 2.3195101586418034, "grad_norm": 0.30632713437080383, "learning_rate": 1.4825600032107933e-06, "loss": 0.3946, "step": 2778 }, { "epoch": 2.3203451155023656, "grad_norm": 0.3047685921192169, "learning_rate": 1.4791064632680347e-06, "loss": 0.3881, "step": 2779 }, { "epoch": 2.321180072362928, "grad_norm": 0.33069589734077454, "learning_rate": 1.4756562520561829e-06, "loss": 0.4449, "step": 2780 }, { "epoch": 2.3220150292234902, "grad_norm": 0.2984223961830139, "learning_rate": 1.4722093728371427e-06, "loss": 0.4102, "step": 2781 }, { "epoch": 2.3228499860840524, "grad_norm": 0.2947639226913452, "learning_rate": 1.4687658288696732e-06, "loss": 0.3814, "step": 2782 }, { "epoch": 2.3236849429446145, "grad_norm": 0.30261725187301636, "learning_rate": 1.465325623409376e-06, "loss": 0.3651, "step": 2783 }, { "epoch": 2.3245198998051766, "grad_norm": 0.33153390884399414, "learning_rate": 1.4618887597086973e-06, "loss": 0.4229, "step": 2784 }, { "epoch": 2.325354856665739, "grad_norm": 0.2866438329219818, "learning_rate": 1.458455241016925e-06, "loss": 0.3728, "step": 2785 }, { "epoch": 2.3261898135263013, "grad_norm": 0.30132436752319336, "learning_rate": 1.455025070580181e-06, "loss": 0.4123, "step": 2786 }, { "epoch": 2.3270247703868634, "grad_norm": 0.30972573161125183, "learning_rate": 1.4515982516414296e-06, "loss": 0.3959, "step": 2787 }, { "epoch": 2.3278597272474255, "grad_norm": 0.31864526867866516, "learning_rate": 1.4481747874404583e-06, "loss": 0.4161, "step": 2788 }, { "epoch": 2.3286946841079876, "grad_norm": 0.2987009286880493, "learning_rate": 1.444754681213885e-06, "loss": 0.3788, "step": 2789 }, { "epoch": 2.32952964096855, "grad_norm": 0.3653404712677002, "learning_rate": 1.4413379361951596e-06, "loss": 0.4521, "step": 2790 }, { "epoch": 2.3303645978291123, "grad_norm": 0.29595381021499634, "learning_rate": 1.4379245556145415e-06, "loss": 0.3959, "step": 2791 }, { "epoch": 2.3311995546896744, "grad_norm": 0.2861098349094391, "learning_rate": 1.4345145426991213e-06, "loss": 0.366, "step": 2792 }, { "epoch": 2.3320345115502366, "grad_norm": 0.3040812611579895, "learning_rate": 1.4311079006727996e-06, "loss": 0.4098, "step": 2793 }, { "epoch": 2.3328694684107987, "grad_norm": 0.3115420639514923, "learning_rate": 1.4277046327562899e-06, "loss": 0.416, "step": 2794 }, { "epoch": 2.333704425271361, "grad_norm": 0.27989116311073303, "learning_rate": 1.4243047421671197e-06, "loss": 0.3554, "step": 2795 }, { "epoch": 2.3345393821319234, "grad_norm": 0.29660242795944214, "learning_rate": 1.4209082321196206e-06, "loss": 0.4038, "step": 2796 }, { "epoch": 2.3353743389924855, "grad_norm": 0.3176216185092926, "learning_rate": 1.4175151058249275e-06, "loss": 0.3764, "step": 2797 }, { "epoch": 2.3362092958530476, "grad_norm": 0.3404591977596283, "learning_rate": 1.414125366490976e-06, "loss": 0.4113, "step": 2798 }, { "epoch": 2.3370442527136097, "grad_norm": 0.2796476185321808, "learning_rate": 1.4107390173225045e-06, "loss": 0.3499, "step": 2799 }, { "epoch": 2.337879209574172, "grad_norm": 0.2999184727668762, "learning_rate": 1.4073560615210407e-06, "loss": 0.3628, "step": 2800 }, { "epoch": 2.3387141664347344, "grad_norm": 0.3117730915546417, "learning_rate": 1.403976502284905e-06, "loss": 0.3991, "step": 2801 }, { "epoch": 2.3395491232952965, "grad_norm": 0.31002041697502136, "learning_rate": 1.4006003428092118e-06, "loss": 0.3978, "step": 2802 }, { "epoch": 2.3403840801558586, "grad_norm": 0.329157292842865, "learning_rate": 1.3972275862858514e-06, "loss": 0.4608, "step": 2803 }, { "epoch": 2.3412190370164208, "grad_norm": 0.30172446370124817, "learning_rate": 1.3938582359035075e-06, "loss": 0.3536, "step": 2804 }, { "epoch": 2.342053993876983, "grad_norm": 0.32586851716041565, "learning_rate": 1.3904922948476363e-06, "loss": 0.396, "step": 2805 }, { "epoch": 2.342888950737545, "grad_norm": 0.3148805499076843, "learning_rate": 1.3871297663004724e-06, "loss": 0.4399, "step": 2806 }, { "epoch": 2.3437239075981076, "grad_norm": 0.3277604579925537, "learning_rate": 1.3837706534410284e-06, "loss": 0.3975, "step": 2807 }, { "epoch": 2.3445588644586697, "grad_norm": 0.3153846859931946, "learning_rate": 1.3804149594450816e-06, "loss": 0.3825, "step": 2808 }, { "epoch": 2.345393821319232, "grad_norm": 0.2919238805770874, "learning_rate": 1.377062687485179e-06, "loss": 0.3897, "step": 2809 }, { "epoch": 2.346228778179794, "grad_norm": 0.3113614022731781, "learning_rate": 1.3737138407306365e-06, "loss": 0.3678, "step": 2810 }, { "epoch": 2.347063735040356, "grad_norm": 0.29660752415657043, "learning_rate": 1.3703684223475228e-06, "loss": 0.3879, "step": 2811 }, { "epoch": 2.3478986919009186, "grad_norm": 0.30313175916671753, "learning_rate": 1.3670264354986757e-06, "loss": 0.3787, "step": 2812 }, { "epoch": 2.3487336487614807, "grad_norm": 0.3259551525115967, "learning_rate": 1.3636878833436812e-06, "loss": 0.4107, "step": 2813 }, { "epoch": 2.349568605622043, "grad_norm": 0.31628286838531494, "learning_rate": 1.360352769038879e-06, "loss": 0.4202, "step": 2814 }, { "epoch": 2.350403562482605, "grad_norm": 0.2961546778678894, "learning_rate": 1.3570210957373647e-06, "loss": 0.3812, "step": 2815 }, { "epoch": 2.351238519343167, "grad_norm": 0.32461079955101013, "learning_rate": 1.3536928665889732e-06, "loss": 0.4097, "step": 2816 }, { "epoch": 2.3520734762037296, "grad_norm": 0.387960821390152, "learning_rate": 1.3503680847402868e-06, "loss": 0.4004, "step": 2817 }, { "epoch": 2.3529084330642918, "grad_norm": 0.27886658906936646, "learning_rate": 1.3470467533346266e-06, "loss": 0.354, "step": 2818 }, { "epoch": 2.353743389924854, "grad_norm": 0.29526010155677795, "learning_rate": 1.343728875512056e-06, "loss": 0.3853, "step": 2819 }, { "epoch": 2.354578346785416, "grad_norm": 0.32935190200805664, "learning_rate": 1.340414454409369e-06, "loss": 0.4134, "step": 2820 }, { "epoch": 2.355413303645978, "grad_norm": 0.26104769110679626, "learning_rate": 1.3371034931600923e-06, "loss": 0.3075, "step": 2821 }, { "epoch": 2.3562482605065407, "grad_norm": 0.3164222836494446, "learning_rate": 1.3337959948944828e-06, "loss": 0.4494, "step": 2822 }, { "epoch": 2.357083217367103, "grad_norm": 0.32610034942626953, "learning_rate": 1.3304919627395208e-06, "loss": 0.4102, "step": 2823 }, { "epoch": 2.357918174227665, "grad_norm": 0.31042617559432983, "learning_rate": 1.327191399818915e-06, "loss": 0.3879, "step": 2824 }, { "epoch": 2.358753131088227, "grad_norm": 0.33995479345321655, "learning_rate": 1.3238943092530894e-06, "loss": 0.4167, "step": 2825 }, { "epoch": 2.359588087948789, "grad_norm": 0.3161577880382538, "learning_rate": 1.320600694159185e-06, "loss": 0.414, "step": 2826 }, { "epoch": 2.3604230448093517, "grad_norm": 0.3032337427139282, "learning_rate": 1.3173105576510614e-06, "loss": 0.3862, "step": 2827 }, { "epoch": 2.361258001669914, "grad_norm": 0.2847922742366791, "learning_rate": 1.3140239028392855e-06, "loss": 0.403, "step": 2828 }, { "epoch": 2.362092958530476, "grad_norm": 0.31149670481681824, "learning_rate": 1.3107407328311318e-06, "loss": 0.4225, "step": 2829 }, { "epoch": 2.362927915391038, "grad_norm": 0.3153827488422394, "learning_rate": 1.3074610507305868e-06, "loss": 0.3855, "step": 2830 }, { "epoch": 2.3637628722516, "grad_norm": 0.3098379969596863, "learning_rate": 1.3041848596383289e-06, "loss": 0.3958, "step": 2831 }, { "epoch": 2.3645978291121628, "grad_norm": 0.2886215150356293, "learning_rate": 1.3009121626517463e-06, "loss": 0.3892, "step": 2832 }, { "epoch": 2.365432785972725, "grad_norm": 0.2809971272945404, "learning_rate": 1.2976429628649183e-06, "loss": 0.3458, "step": 2833 }, { "epoch": 2.366267742833287, "grad_norm": 0.3189564645290375, "learning_rate": 1.294377263368617e-06, "loss": 0.4543, "step": 2834 }, { "epoch": 2.367102699693849, "grad_norm": 0.293738454580307, "learning_rate": 1.2911150672503098e-06, "loss": 0.3572, "step": 2835 }, { "epoch": 2.3679376565544112, "grad_norm": 0.3204452097415924, "learning_rate": 1.2878563775941483e-06, "loss": 0.3922, "step": 2836 }, { "epoch": 2.368772613414974, "grad_norm": 0.3117440342903137, "learning_rate": 1.2846011974809702e-06, "loss": 0.3704, "step": 2837 }, { "epoch": 2.369607570275536, "grad_norm": 0.2835433781147003, "learning_rate": 1.2813495299882934e-06, "loss": 0.3785, "step": 2838 }, { "epoch": 2.370442527136098, "grad_norm": 0.29475733637809753, "learning_rate": 1.2781013781903196e-06, "loss": 0.402, "step": 2839 }, { "epoch": 2.37127748399666, "grad_norm": 0.30970123410224915, "learning_rate": 1.2748567451579214e-06, "loss": 0.3847, "step": 2840 }, { "epoch": 2.3721124408572223, "grad_norm": 0.3052416443824768, "learning_rate": 1.2716156339586476e-06, "loss": 0.3843, "step": 2841 }, { "epoch": 2.3729473977177844, "grad_norm": 0.3235357403755188, "learning_rate": 1.268378047656717e-06, "loss": 0.4617, "step": 2842 }, { "epoch": 2.373782354578347, "grad_norm": 0.2852126955986023, "learning_rate": 1.2651439893130136e-06, "loss": 0.3491, "step": 2843 }, { "epoch": 2.374617311438909, "grad_norm": 0.2948692739009857, "learning_rate": 1.2619134619850908e-06, "loss": 0.413, "step": 2844 }, { "epoch": 2.375452268299471, "grad_norm": 0.3051479756832123, "learning_rate": 1.2586864687271593e-06, "loss": 0.4263, "step": 2845 }, { "epoch": 2.3762872251600333, "grad_norm": 0.30440762639045715, "learning_rate": 1.255463012590089e-06, "loss": 0.3779, "step": 2846 }, { "epoch": 2.3771221820205954, "grad_norm": 0.30702295899391174, "learning_rate": 1.25224309662141e-06, "loss": 0.3913, "step": 2847 }, { "epoch": 2.3779571388811576, "grad_norm": 0.3141738176345825, "learning_rate": 1.2490267238653004e-06, "loss": 0.4206, "step": 2848 }, { "epoch": 2.37879209574172, "grad_norm": 0.30832162499427795, "learning_rate": 1.2458138973625889e-06, "loss": 0.3416, "step": 2849 }, { "epoch": 2.3796270526022822, "grad_norm": 0.3114174008369446, "learning_rate": 1.242604620150758e-06, "loss": 0.3932, "step": 2850 }, { "epoch": 2.3804620094628444, "grad_norm": 0.34702059626579285, "learning_rate": 1.239398895263923e-06, "loss": 0.4158, "step": 2851 }, { "epoch": 2.3812969663234065, "grad_norm": 0.3250351846218109, "learning_rate": 1.2361967257328523e-06, "loss": 0.4163, "step": 2852 }, { "epoch": 2.3821319231839686, "grad_norm": 0.33402109146118164, "learning_rate": 1.2329981145849468e-06, "loss": 0.4056, "step": 2853 }, { "epoch": 2.382966880044531, "grad_norm": 0.30024853348731995, "learning_rate": 1.2298030648442421e-06, "loss": 0.3709, "step": 2854 }, { "epoch": 2.3838018369050933, "grad_norm": 0.3007797300815582, "learning_rate": 1.2266115795314126e-06, "loss": 0.4009, "step": 2855 }, { "epoch": 2.3846367937656554, "grad_norm": 0.30976927280426025, "learning_rate": 1.2234236616637579e-06, "loss": 0.3894, "step": 2856 }, { "epoch": 2.3854717506262175, "grad_norm": 0.34556296467781067, "learning_rate": 1.2202393142552067e-06, "loss": 0.4234, "step": 2857 }, { "epoch": 2.3863067074867796, "grad_norm": 0.30048125982284546, "learning_rate": 1.21705854031631e-06, "loss": 0.3569, "step": 2858 }, { "epoch": 2.387141664347342, "grad_norm": 0.321746826171875, "learning_rate": 1.2138813428542446e-06, "loss": 0.4269, "step": 2859 }, { "epoch": 2.3879766212079043, "grad_norm": 0.3024010956287384, "learning_rate": 1.2107077248728022e-06, "loss": 0.3828, "step": 2860 }, { "epoch": 2.3888115780684664, "grad_norm": 0.29861995577812195, "learning_rate": 1.2075376893723916e-06, "loss": 0.3749, "step": 2861 }, { "epoch": 2.3896465349290286, "grad_norm": 0.30189046263694763, "learning_rate": 1.2043712393500355e-06, "loss": 0.4279, "step": 2862 }, { "epoch": 2.3904814917895907, "grad_norm": 0.28200748562812805, "learning_rate": 1.2012083777993643e-06, "loss": 0.3742, "step": 2863 }, { "epoch": 2.3913164486501532, "grad_norm": 0.2823050916194916, "learning_rate": 1.19804910771062e-06, "loss": 0.3677, "step": 2864 }, { "epoch": 2.3921514055107154, "grad_norm": 0.2969598174095154, "learning_rate": 1.1948934320706452e-06, "loss": 0.4111, "step": 2865 }, { "epoch": 2.3929863623712775, "grad_norm": 0.3102186620235443, "learning_rate": 1.191741353862884e-06, "loss": 0.4027, "step": 2866 }, { "epoch": 2.3938213192318396, "grad_norm": 0.31072190403938293, "learning_rate": 1.1885928760673865e-06, "loss": 0.3784, "step": 2867 }, { "epoch": 2.3946562760924017, "grad_norm": 0.3006197214126587, "learning_rate": 1.1854480016607872e-06, "loss": 0.4085, "step": 2868 }, { "epoch": 2.3954912329529643, "grad_norm": 0.28356748819351196, "learning_rate": 1.1823067336163241e-06, "loss": 0.3834, "step": 2869 }, { "epoch": 2.3963261898135264, "grad_norm": 0.28761690855026245, "learning_rate": 1.179169074903821e-06, "loss": 0.3825, "step": 2870 }, { "epoch": 2.3971611466740885, "grad_norm": 0.3137459456920624, "learning_rate": 1.1760350284896876e-06, "loss": 0.4377, "step": 2871 }, { "epoch": 2.3979961035346506, "grad_norm": 0.2970288395881653, "learning_rate": 1.1729045973369252e-06, "loss": 0.393, "step": 2872 }, { "epoch": 2.3988310603952128, "grad_norm": 0.31480664014816284, "learning_rate": 1.1697777844051105e-06, "loss": 0.4054, "step": 2873 }, { "epoch": 2.3996660172557753, "grad_norm": 0.29799455404281616, "learning_rate": 1.1666545926504019e-06, "loss": 0.392, "step": 2874 }, { "epoch": 2.4005009741163374, "grad_norm": 0.30847689509391785, "learning_rate": 1.1635350250255334e-06, "loss": 0.3927, "step": 2875 }, { "epoch": 2.4013359309768996, "grad_norm": 0.31834766268730164, "learning_rate": 1.1604190844798157e-06, "loss": 0.3775, "step": 2876 }, { "epoch": 2.4021708878374617, "grad_norm": 0.312133252620697, "learning_rate": 1.1573067739591276e-06, "loss": 0.4388, "step": 2877 }, { "epoch": 2.403005844698024, "grad_norm": 0.30370232462882996, "learning_rate": 1.1541980964059136e-06, "loss": 0.4084, "step": 2878 }, { "epoch": 2.4038408015585864, "grad_norm": 0.29716306924819946, "learning_rate": 1.151093054759192e-06, "loss": 0.3649, "step": 2879 }, { "epoch": 2.4046757584191485, "grad_norm": 0.300133615732193, "learning_rate": 1.1479916519545326e-06, "loss": 0.388, "step": 2880 }, { "epoch": 2.4055107152797106, "grad_norm": 0.293889582157135, "learning_rate": 1.1448938909240737e-06, "loss": 0.3642, "step": 2881 }, { "epoch": 2.4063456721402727, "grad_norm": 0.3058689832687378, "learning_rate": 1.1417997745965058e-06, "loss": 0.4236, "step": 2882 }, { "epoch": 2.407180629000835, "grad_norm": 0.29965677857398987, "learning_rate": 1.1387093058970754e-06, "loss": 0.3776, "step": 2883 }, { "epoch": 2.408015585861397, "grad_norm": 0.30838724970817566, "learning_rate": 1.1356224877475812e-06, "loss": 0.3867, "step": 2884 }, { "epoch": 2.4088505427219595, "grad_norm": 0.3107610046863556, "learning_rate": 1.1325393230663695e-06, "loss": 0.434, "step": 2885 }, { "epoch": 2.4096854995825217, "grad_norm": 0.28837957978248596, "learning_rate": 1.1294598147683305e-06, "loss": 0.3617, "step": 2886 }, { "epoch": 2.4105204564430838, "grad_norm": 0.2946268618106842, "learning_rate": 1.1263839657649039e-06, "loss": 0.3567, "step": 2887 }, { "epoch": 2.411355413303646, "grad_norm": 0.3234250247478485, "learning_rate": 1.1233117789640608e-06, "loss": 0.4343, "step": 2888 }, { "epoch": 2.412190370164208, "grad_norm": 0.2825683355331421, "learning_rate": 1.1202432572703176e-06, "loss": 0.391, "step": 2889 }, { "epoch": 2.4130253270247706, "grad_norm": 0.3116297423839569, "learning_rate": 1.1171784035847227e-06, "loss": 0.4284, "step": 2890 }, { "epoch": 2.4138602838853327, "grad_norm": 0.26898545026779175, "learning_rate": 1.114117220804855e-06, "loss": 0.3746, "step": 2891 }, { "epoch": 2.414695240745895, "grad_norm": 0.3136826455593109, "learning_rate": 1.1110597118248272e-06, "loss": 0.4134, "step": 2892 }, { "epoch": 2.415530197606457, "grad_norm": 0.2987555265426636, "learning_rate": 1.1080058795352755e-06, "loss": 0.3748, "step": 2893 }, { "epoch": 2.416365154467019, "grad_norm": 0.3011055588722229, "learning_rate": 1.1049557268233596e-06, "loss": 0.4068, "step": 2894 }, { "epoch": 2.417200111327581, "grad_norm": 0.3023163974285126, "learning_rate": 1.1019092565727612e-06, "loss": 0.4307, "step": 2895 }, { "epoch": 2.4180350681881437, "grad_norm": 0.2725626230239868, "learning_rate": 1.0988664716636831e-06, "loss": 0.3888, "step": 2896 }, { "epoch": 2.418870025048706, "grad_norm": 0.29970067739486694, "learning_rate": 1.095827374972841e-06, "loss": 0.4118, "step": 2897 }, { "epoch": 2.419704981909268, "grad_norm": 0.3151586651802063, "learning_rate": 1.0927919693734618e-06, "loss": 0.4098, "step": 2898 }, { "epoch": 2.42053993876983, "grad_norm": 0.292091965675354, "learning_rate": 1.08976025773529e-06, "loss": 0.3936, "step": 2899 }, { "epoch": 2.421374895630392, "grad_norm": 0.3067646920681, "learning_rate": 1.0867322429245686e-06, "loss": 0.3929, "step": 2900 }, { "epoch": 2.4222098524909548, "grad_norm": 0.29804515838623047, "learning_rate": 1.0837079278040523e-06, "loss": 0.3885, "step": 2901 }, { "epoch": 2.423044809351517, "grad_norm": 0.2788119316101074, "learning_rate": 1.080687315232996e-06, "loss": 0.3791, "step": 2902 }, { "epoch": 2.423879766212079, "grad_norm": 0.3377365171909332, "learning_rate": 1.0776704080671518e-06, "loss": 0.4453, "step": 2903 }, { "epoch": 2.424714723072641, "grad_norm": 0.27486950159072876, "learning_rate": 1.074657209158773e-06, "loss": 0.3818, "step": 2904 }, { "epoch": 2.4255496799332033, "grad_norm": 0.3138290047645569, "learning_rate": 1.0716477213566045e-06, "loss": 0.4038, "step": 2905 }, { "epoch": 2.426384636793766, "grad_norm": 0.2708354592323303, "learning_rate": 1.0686419475058807e-06, "loss": 0.354, "step": 2906 }, { "epoch": 2.427219593654328, "grad_norm": 0.31075921654701233, "learning_rate": 1.0656398904483312e-06, "loss": 0.453, "step": 2907 }, { "epoch": 2.42805455051489, "grad_norm": 0.2818854749202728, "learning_rate": 1.0626415530221618e-06, "loss": 0.3873, "step": 2908 }, { "epoch": 2.428889507375452, "grad_norm": 0.29288166761398315, "learning_rate": 1.059646938062071e-06, "loss": 0.3625, "step": 2909 }, { "epoch": 2.4297244642360143, "grad_norm": 0.29079559445381165, "learning_rate": 1.056656048399234e-06, "loss": 0.3635, "step": 2910 }, { "epoch": 2.430559421096577, "grad_norm": 0.30136638879776, "learning_rate": 1.053668886861302e-06, "loss": 0.4253, "step": 2911 }, { "epoch": 2.431394377957139, "grad_norm": 0.28941524028778076, "learning_rate": 1.0506854562724072e-06, "loss": 0.3947, "step": 2912 }, { "epoch": 2.432229334817701, "grad_norm": 0.29610347747802734, "learning_rate": 1.0477057594531493e-06, "loss": 0.4046, "step": 2913 }, { "epoch": 2.433064291678263, "grad_norm": 0.3083399832248688, "learning_rate": 1.0447297992205995e-06, "loss": 0.4241, "step": 2914 }, { "epoch": 2.4338992485388253, "grad_norm": 0.3069673478603363, "learning_rate": 1.0417575783882956e-06, "loss": 0.3929, "step": 2915 }, { "epoch": 2.434734205399388, "grad_norm": 0.32460132241249084, "learning_rate": 1.0387890997662443e-06, "loss": 0.4424, "step": 2916 }, { "epoch": 2.43556916225995, "grad_norm": 0.2738597095012665, "learning_rate": 1.0358243661609096e-06, "loss": 0.3787, "step": 2917 }, { "epoch": 2.436404119120512, "grad_norm": 0.30594244599342346, "learning_rate": 1.0328633803752147e-06, "loss": 0.4277, "step": 2918 }, { "epoch": 2.4372390759810743, "grad_norm": 0.3099663555622101, "learning_rate": 1.0299061452085457e-06, "loss": 0.3774, "step": 2919 }, { "epoch": 2.4380740328416364, "grad_norm": 0.2913631200790405, "learning_rate": 1.0269526634567329e-06, "loss": 0.3886, "step": 2920 }, { "epoch": 2.438908989702199, "grad_norm": 0.2975578010082245, "learning_rate": 1.0240029379120675e-06, "loss": 0.3904, "step": 2921 }, { "epoch": 2.439743946562761, "grad_norm": 0.27398884296417236, "learning_rate": 1.0210569713632845e-06, "loss": 0.3679, "step": 2922 }, { "epoch": 2.440578903423323, "grad_norm": 0.3182361125946045, "learning_rate": 1.0181147665955644e-06, "loss": 0.4272, "step": 2923 }, { "epoch": 2.4414138602838853, "grad_norm": 0.2860560119152069, "learning_rate": 1.0151763263905357e-06, "loss": 0.3311, "step": 2924 }, { "epoch": 2.4422488171444474, "grad_norm": 0.3192550539970398, "learning_rate": 1.012241653526263e-06, "loss": 0.412, "step": 2925 }, { "epoch": 2.44308377400501, "grad_norm": 0.3067375719547272, "learning_rate": 1.009310750777251e-06, "loss": 0.3927, "step": 2926 }, { "epoch": 2.443918730865572, "grad_norm": 0.30615538358688354, "learning_rate": 1.0063836209144433e-06, "loss": 0.4193, "step": 2927 }, { "epoch": 2.444753687726134, "grad_norm": 0.2940407693386078, "learning_rate": 1.003460266705209e-06, "loss": 0.3864, "step": 2928 }, { "epoch": 2.4455886445866963, "grad_norm": 0.3067699074745178, "learning_rate": 1.0005406909133552e-06, "loss": 0.4121, "step": 2929 }, { "epoch": 2.4464236014472585, "grad_norm": 0.29432669281959534, "learning_rate": 9.976248962991126e-07, "loss": 0.4205, "step": 2930 }, { "epoch": 2.4472585583078206, "grad_norm": 0.2810162305831909, "learning_rate": 9.947128856191369e-07, "loss": 0.3677, "step": 2931 }, { "epoch": 2.448093515168383, "grad_norm": 0.28385332226753235, "learning_rate": 9.918046616265103e-07, "loss": 0.3795, "step": 2932 }, { "epoch": 2.4489284720289453, "grad_norm": 0.30447542667388916, "learning_rate": 9.88900227070731e-07, "loss": 0.3971, "step": 2933 }, { "epoch": 2.4497634288895074, "grad_norm": 0.3142227828502655, "learning_rate": 9.85999584697716e-07, "loss": 0.4036, "step": 2934 }, { "epoch": 2.4505983857500695, "grad_norm": 0.2743682861328125, "learning_rate": 9.831027372497953e-07, "loss": 0.3992, "step": 2935 }, { "epoch": 2.4514333426106316, "grad_norm": 0.27365007996559143, "learning_rate": 9.802096874657157e-07, "loss": 0.3735, "step": 2936 }, { "epoch": 2.452268299471194, "grad_norm": 0.2802085280418396, "learning_rate": 9.773204380806295e-07, "loss": 0.361, "step": 2937 }, { "epoch": 2.4531032563317563, "grad_norm": 0.29994115233421326, "learning_rate": 9.74434991826096e-07, "loss": 0.4193, "step": 2938 }, { "epoch": 2.4539382131923184, "grad_norm": 0.2958828806877136, "learning_rate": 9.715533514300835e-07, "loss": 0.384, "step": 2939 }, { "epoch": 2.4547731700528805, "grad_norm": 0.3102821707725525, "learning_rate": 9.686755196169533e-07, "loss": 0.3921, "step": 2940 }, { "epoch": 2.4556081269134427, "grad_norm": 0.31599003076553345, "learning_rate": 9.658014991074766e-07, "loss": 0.4045, "step": 2941 }, { "epoch": 2.4564430837740048, "grad_norm": 0.2850222885608673, "learning_rate": 9.629312926188138e-07, "loss": 0.3766, "step": 2942 }, { "epoch": 2.4572780406345673, "grad_norm": 0.2879562973976135, "learning_rate": 9.600649028645215e-07, "loss": 0.3855, "step": 2943 }, { "epoch": 2.4581129974951295, "grad_norm": 0.3168971538543701, "learning_rate": 9.572023325545498e-07, "loss": 0.3969, "step": 2944 }, { "epoch": 2.4589479543556916, "grad_norm": 0.31329959630966187, "learning_rate": 9.54343584395236e-07, "loss": 0.3878, "step": 2945 }, { "epoch": 2.4597829112162537, "grad_norm": 0.29564759135246277, "learning_rate": 9.514886610893015e-07, "loss": 0.3713, "step": 2946 }, { "epoch": 2.460617868076816, "grad_norm": 0.30860424041748047, "learning_rate": 9.486375653358587e-07, "loss": 0.3892, "step": 2947 }, { "epoch": 2.4614528249373784, "grad_norm": 0.29255712032318115, "learning_rate": 9.457902998303919e-07, "loss": 0.3946, "step": 2948 }, { "epoch": 2.4622877817979405, "grad_norm": 0.3173207938671112, "learning_rate": 9.429468672647729e-07, "loss": 0.4013, "step": 2949 }, { "epoch": 2.4631227386585026, "grad_norm": 0.2830371558666229, "learning_rate": 9.401072703272446e-07, "loss": 0.3802, "step": 2950 }, { "epoch": 2.4639576955190647, "grad_norm": 0.2921260595321655, "learning_rate": 9.372715117024228e-07, "loss": 0.3632, "step": 2951 }, { "epoch": 2.464792652379627, "grad_norm": 0.28205475211143494, "learning_rate": 9.344395940713009e-07, "loss": 0.3332, "step": 2952 }, { "epoch": 2.4656276092401894, "grad_norm": 0.3009733557701111, "learning_rate": 9.316115201112341e-07, "loss": 0.3896, "step": 2953 }, { "epoch": 2.4664625661007515, "grad_norm": 0.34482717514038086, "learning_rate": 9.287872924959468e-07, "loss": 0.4261, "step": 2954 }, { "epoch": 2.4672975229613137, "grad_norm": 0.3021727204322815, "learning_rate": 9.259669138955252e-07, "loss": 0.3642, "step": 2955 }, { "epoch": 2.468132479821876, "grad_norm": 0.3065039813518524, "learning_rate": 9.231503869764214e-07, "loss": 0.3485, "step": 2956 }, { "epoch": 2.468967436682438, "grad_norm": 0.3210582137107849, "learning_rate": 9.20337714401438e-07, "loss": 0.3933, "step": 2957 }, { "epoch": 2.4698023935430005, "grad_norm": 0.2967453598976135, "learning_rate": 9.175288988297398e-07, "loss": 0.4112, "step": 2958 }, { "epoch": 2.4706373504035626, "grad_norm": 0.2953278124332428, "learning_rate": 9.147239429168464e-07, "loss": 0.4008, "step": 2959 }, { "epoch": 2.4714723072641247, "grad_norm": 0.28054293990135193, "learning_rate": 9.119228493146204e-07, "loss": 0.3796, "step": 2960 }, { "epoch": 2.472307264124687, "grad_norm": 0.3246673047542572, "learning_rate": 9.091256206712812e-07, "loss": 0.4151, "step": 2961 }, { "epoch": 2.473142220985249, "grad_norm": 0.2826891839504242, "learning_rate": 9.0633225963139e-07, "loss": 0.3439, "step": 2962 }, { "epoch": 2.4739771778458115, "grad_norm": 0.3052365183830261, "learning_rate": 9.035427688358517e-07, "loss": 0.3628, "step": 2963 }, { "epoch": 2.4748121347063736, "grad_norm": 0.30282583832740784, "learning_rate": 9.007571509219137e-07, "loss": 0.3978, "step": 2964 }, { "epoch": 2.4756470915669357, "grad_norm": 0.29415205121040344, "learning_rate": 8.979754085231612e-07, "loss": 0.4083, "step": 2965 }, { "epoch": 2.476482048427498, "grad_norm": 0.3077705502510071, "learning_rate": 8.951975442695143e-07, "loss": 0.4081, "step": 2966 }, { "epoch": 2.47731700528806, "grad_norm": 0.2911575734615326, "learning_rate": 8.924235607872283e-07, "loss": 0.4035, "step": 2967 }, { "epoch": 2.4781519621486225, "grad_norm": 0.2988198399543762, "learning_rate": 8.896534606988865e-07, "loss": 0.3872, "step": 2968 }, { "epoch": 2.4789869190091847, "grad_norm": 0.29932233691215515, "learning_rate": 8.868872466234069e-07, "loss": 0.4305, "step": 2969 }, { "epoch": 2.479821875869747, "grad_norm": 0.3145415186882019, "learning_rate": 8.841249211760272e-07, "loss": 0.3791, "step": 2970 }, { "epoch": 2.480656832730309, "grad_norm": 0.30341264605522156, "learning_rate": 8.81366486968312e-07, "loss": 0.3838, "step": 2971 }, { "epoch": 2.481491789590871, "grad_norm": 0.3142925798892975, "learning_rate": 8.786119466081444e-07, "loss": 0.4365, "step": 2972 }, { "epoch": 2.4823267464514336, "grad_norm": 0.2898375391960144, "learning_rate": 8.758613026997309e-07, "loss": 0.3889, "step": 2973 }, { "epoch": 2.4831617033119957, "grad_norm": 0.31424620747566223, "learning_rate": 8.731145578435901e-07, "loss": 0.3843, "step": 2974 }, { "epoch": 2.483996660172558, "grad_norm": 0.35206350684165955, "learning_rate": 8.703717146365537e-07, "loss": 0.4169, "step": 2975 }, { "epoch": 2.48483161703312, "grad_norm": 0.2954707145690918, "learning_rate": 8.676327756717712e-07, "loss": 0.414, "step": 2976 }, { "epoch": 2.485666573893682, "grad_norm": 0.31046462059020996, "learning_rate": 8.648977435386901e-07, "loss": 0.4177, "step": 2977 }, { "epoch": 2.486501530754244, "grad_norm": 0.30487290024757385, "learning_rate": 8.621666208230744e-07, "loss": 0.3946, "step": 2978 }, { "epoch": 2.4873364876148067, "grad_norm": 0.3195943534374237, "learning_rate": 8.594394101069897e-07, "loss": 0.4406, "step": 2979 }, { "epoch": 2.488171444475369, "grad_norm": 0.29255056381225586, "learning_rate": 8.567161139687968e-07, "loss": 0.3597, "step": 2980 }, { "epoch": 2.489006401335931, "grad_norm": 0.2828947901725769, "learning_rate": 8.539967349831646e-07, "loss": 0.3931, "step": 2981 }, { "epoch": 2.489841358196493, "grad_norm": 0.30642440915107727, "learning_rate": 8.51281275721052e-07, "loss": 0.4083, "step": 2982 }, { "epoch": 2.4906763150570552, "grad_norm": 0.286907434463501, "learning_rate": 8.485697387497139e-07, "loss": 0.3969, "step": 2983 }, { "epoch": 2.4915112719176173, "grad_norm": 0.2941506505012512, "learning_rate": 8.458621266326999e-07, "loss": 0.349, "step": 2984 }, { "epoch": 2.49234622877818, "grad_norm": 0.35683107376098633, "learning_rate": 8.43158441929846e-07, "loss": 0.4241, "step": 2985 }, { "epoch": 2.493181185638742, "grad_norm": 0.3041556775569916, "learning_rate": 8.404586871972747e-07, "loss": 0.3911, "step": 2986 }, { "epoch": 2.494016142499304, "grad_norm": 0.2892906963825226, "learning_rate": 8.377628649873943e-07, "loss": 0.3855, "step": 2987 }, { "epoch": 2.4948510993598663, "grad_norm": 0.2853808104991913, "learning_rate": 8.350709778488941e-07, "loss": 0.4124, "step": 2988 }, { "epoch": 2.4956860562204284, "grad_norm": 0.2803613543510437, "learning_rate": 8.323830283267459e-07, "loss": 0.3925, "step": 2989 }, { "epoch": 2.496521013080991, "grad_norm": 0.2989208996295929, "learning_rate": 8.296990189621967e-07, "loss": 0.418, "step": 2990 }, { "epoch": 2.497355969941553, "grad_norm": 0.34074288606643677, "learning_rate": 8.270189522927669e-07, "loss": 0.3881, "step": 2991 }, { "epoch": 2.498190926802115, "grad_norm": 0.2925005257129669, "learning_rate": 8.243428308522511e-07, "loss": 0.3581, "step": 2992 }, { "epoch": 2.4990258836626773, "grad_norm": 0.3254374861717224, "learning_rate": 8.216706571707162e-07, "loss": 0.4459, "step": 2993 }, { "epoch": 2.4998608405232394, "grad_norm": 0.3069193661212921, "learning_rate": 8.190024337744929e-07, "loss": 0.3762, "step": 2994 }, { "epoch": 2.500695797383802, "grad_norm": 0.32276207208633423, "learning_rate": 8.163381631861777e-07, "loss": 0.4357, "step": 2995 }, { "epoch": 2.501530754244364, "grad_norm": 0.28361377120018005, "learning_rate": 8.136778479246349e-07, "loss": 0.3824, "step": 2996 }, { "epoch": 2.5023657111049262, "grad_norm": 0.29309868812561035, "learning_rate": 8.110214905049802e-07, "loss": 0.3963, "step": 2997 }, { "epoch": 2.5032006679654883, "grad_norm": 0.3234407305717468, "learning_rate": 8.083690934385973e-07, "loss": 0.4115, "step": 2998 }, { "epoch": 2.5040356248260505, "grad_norm": 0.2866528034210205, "learning_rate": 8.057206592331201e-07, "loss": 0.3947, "step": 2999 }, { "epoch": 2.504870581686613, "grad_norm": 0.2881591022014618, "learning_rate": 8.030761903924356e-07, "loss": 0.4008, "step": 3000 }, { "epoch": 2.505705538547175, "grad_norm": 0.3109893500804901, "learning_rate": 8.004356894166853e-07, "loss": 0.4365, "step": 3001 }, { "epoch": 2.5065404954077373, "grad_norm": 0.2800523042678833, "learning_rate": 7.977991588022566e-07, "loss": 0.3673, "step": 3002 }, { "epoch": 2.5073754522682994, "grad_norm": 0.3036733567714691, "learning_rate": 7.95166601041783e-07, "loss": 0.4158, "step": 3003 }, { "epoch": 2.5082104091288615, "grad_norm": 0.3134639263153076, "learning_rate": 7.925380186241455e-07, "loss": 0.3661, "step": 3004 }, { "epoch": 2.509045365989424, "grad_norm": 0.28379198908805847, "learning_rate": 7.899134140344627e-07, "loss": 0.3757, "step": 3005 }, { "epoch": 2.509880322849986, "grad_norm": 0.30568650364875793, "learning_rate": 7.872927897540944e-07, "loss": 0.4058, "step": 3006 }, { "epoch": 2.5107152797105483, "grad_norm": 0.31631386280059814, "learning_rate": 7.846761482606363e-07, "loss": 0.425, "step": 3007 }, { "epoch": 2.5115502365711104, "grad_norm": 0.3148733079433441, "learning_rate": 7.820634920279191e-07, "loss": 0.3949, "step": 3008 }, { "epoch": 2.5123851934316725, "grad_norm": 0.30840444564819336, "learning_rate": 7.79454823526008e-07, "loss": 0.3723, "step": 3009 }, { "epoch": 2.513220150292235, "grad_norm": 0.29061320424079895, "learning_rate": 7.768501452211952e-07, "loss": 0.3868, "step": 3010 }, { "epoch": 2.5140551071527972, "grad_norm": 0.28318333625793457, "learning_rate": 7.742494595760003e-07, "loss": 0.4111, "step": 3011 }, { "epoch": 2.5148900640133593, "grad_norm": 0.2775039076805115, "learning_rate": 7.716527690491694e-07, "loss": 0.3825, "step": 3012 }, { "epoch": 2.5157250208739215, "grad_norm": 0.2936003506183624, "learning_rate": 7.690600760956729e-07, "loss": 0.3975, "step": 3013 }, { "epoch": 2.5165599777344836, "grad_norm": 0.2672482430934906, "learning_rate": 7.664713831666997e-07, "loss": 0.3641, "step": 3014 }, { "epoch": 2.517394934595046, "grad_norm": 0.322564959526062, "learning_rate": 7.638866927096555e-07, "loss": 0.4367, "step": 3015 }, { "epoch": 2.5182298914556083, "grad_norm": 0.2999371588230133, "learning_rate": 7.613060071681682e-07, "loss": 0.3836, "step": 3016 }, { "epoch": 2.5190648483161704, "grad_norm": 0.3006301820278168, "learning_rate": 7.587293289820708e-07, "loss": 0.3972, "step": 3017 }, { "epoch": 2.5198998051767325, "grad_norm": 0.29348450899124146, "learning_rate": 7.561566605874149e-07, "loss": 0.3858, "step": 3018 }, { "epoch": 2.5207347620372946, "grad_norm": 0.28736817836761475, "learning_rate": 7.535880044164573e-07, "loss": 0.3868, "step": 3019 }, { "epoch": 2.521569718897857, "grad_norm": 0.28677189350128174, "learning_rate": 7.510233628976615e-07, "loss": 0.3879, "step": 3020 }, { "epoch": 2.5224046757584193, "grad_norm": 0.2953810691833496, "learning_rate": 7.484627384556986e-07, "loss": 0.4027, "step": 3021 }, { "epoch": 2.5232396326189814, "grad_norm": 0.29715582728385925, "learning_rate": 7.459061335114393e-07, "loss": 0.3736, "step": 3022 }, { "epoch": 2.5240745894795436, "grad_norm": 0.2916484773159027, "learning_rate": 7.433535504819528e-07, "loss": 0.4216, "step": 3023 }, { "epoch": 2.5249095463401057, "grad_norm": 0.2844480276107788, "learning_rate": 7.408049917805104e-07, "loss": 0.3702, "step": 3024 }, { "epoch": 2.5257445032006682, "grad_norm": 0.32517382502555847, "learning_rate": 7.382604598165749e-07, "loss": 0.451, "step": 3025 }, { "epoch": 2.52657946006123, "grad_norm": 0.2933471202850342, "learning_rate": 7.357199569958035e-07, "loss": 0.3783, "step": 3026 }, { "epoch": 2.5274144169217925, "grad_norm": 0.2898200452327728, "learning_rate": 7.331834857200437e-07, "loss": 0.4051, "step": 3027 }, { "epoch": 2.5282493737823546, "grad_norm": 0.3082069158554077, "learning_rate": 7.306510483873302e-07, "loss": 0.4166, "step": 3028 }, { "epoch": 2.5290843306429167, "grad_norm": 0.2815815508365631, "learning_rate": 7.281226473918884e-07, "loss": 0.3485, "step": 3029 }, { "epoch": 2.529919287503479, "grad_norm": 0.28009504079818726, "learning_rate": 7.255982851241234e-07, "loss": 0.3604, "step": 3030 }, { "epoch": 2.530754244364041, "grad_norm": 0.2796679139137268, "learning_rate": 7.230779639706231e-07, "loss": 0.401, "step": 3031 }, { "epoch": 2.5315892012246035, "grad_norm": 0.3264714777469635, "learning_rate": 7.205616863141545e-07, "loss": 0.4054, "step": 3032 }, { "epoch": 2.5324241580851656, "grad_norm": 0.2952086925506592, "learning_rate": 7.180494545336642e-07, "loss": 0.3447, "step": 3033 }, { "epoch": 2.5332591149457278, "grad_norm": 0.2931217849254608, "learning_rate": 7.155412710042708e-07, "loss": 0.4004, "step": 3034 }, { "epoch": 2.53409407180629, "grad_norm": 0.30095401406288147, "learning_rate": 7.13037138097265e-07, "loss": 0.4031, "step": 3035 }, { "epoch": 2.534929028666852, "grad_norm": 0.30051660537719727, "learning_rate": 7.105370581801141e-07, "loss": 0.374, "step": 3036 }, { "epoch": 2.5357639855274146, "grad_norm": 0.31603333353996277, "learning_rate": 7.080410336164439e-07, "loss": 0.3811, "step": 3037 }, { "epoch": 2.5365989423879767, "grad_norm": 0.2832060754299164, "learning_rate": 7.055490667660553e-07, "loss": 0.3872, "step": 3038 }, { "epoch": 2.537433899248539, "grad_norm": 0.3065125346183777, "learning_rate": 7.030611599849074e-07, "loss": 0.4592, "step": 3039 }, { "epoch": 2.538268856109101, "grad_norm": 0.293983519077301, "learning_rate": 7.005773156251222e-07, "loss": 0.3596, "step": 3040 }, { "epoch": 2.539103812969663, "grad_norm": 0.3113046884536743, "learning_rate": 6.980975360349823e-07, "loss": 0.3812, "step": 3041 }, { "epoch": 2.5399387698302256, "grad_norm": 0.3280436396598816, "learning_rate": 6.956218235589263e-07, "loss": 0.4023, "step": 3042 }, { "epoch": 2.5407737266907877, "grad_norm": 0.2917190492153168, "learning_rate": 6.931501805375473e-07, "loss": 0.3886, "step": 3043 }, { "epoch": 2.54160868355135, "grad_norm": 0.29566076397895813, "learning_rate": 6.906826093075919e-07, "loss": 0.4067, "step": 3044 }, { "epoch": 2.542443640411912, "grad_norm": 0.315456748008728, "learning_rate": 6.882191122019549e-07, "loss": 0.4239, "step": 3045 }, { "epoch": 2.543278597272474, "grad_norm": 0.31048232316970825, "learning_rate": 6.857596915496839e-07, "loss": 0.4191, "step": 3046 }, { "epoch": 2.5441135541330366, "grad_norm": 0.27918314933776855, "learning_rate": 6.833043496759684e-07, "loss": 0.3479, "step": 3047 }, { "epoch": 2.5449485109935988, "grad_norm": 0.32949408888816833, "learning_rate": 6.808530889021431e-07, "loss": 0.4101, "step": 3048 }, { "epoch": 2.545783467854161, "grad_norm": 0.28128066658973694, "learning_rate": 6.784059115456832e-07, "loss": 0.3982, "step": 3049 }, { "epoch": 2.546618424714723, "grad_norm": 0.27655255794525146, "learning_rate": 6.759628199202073e-07, "loss": 0.3419, "step": 3050 }, { "epoch": 2.547453381575285, "grad_norm": 0.3108208179473877, "learning_rate": 6.735238163354669e-07, "loss": 0.4311, "step": 3051 }, { "epoch": 2.5482883384358477, "grad_norm": 0.3010253310203552, "learning_rate": 6.710889030973489e-07, "loss": 0.4277, "step": 3052 }, { "epoch": 2.54912329529641, "grad_norm": 0.27942752838134766, "learning_rate": 6.686580825078776e-07, "loss": 0.3477, "step": 3053 }, { "epoch": 2.549958252156972, "grad_norm": 0.3232133984565735, "learning_rate": 6.662313568652006e-07, "loss": 0.4275, "step": 3054 }, { "epoch": 2.550793209017534, "grad_norm": 0.2807627022266388, "learning_rate": 6.638087284636002e-07, "loss": 0.3836, "step": 3055 }, { "epoch": 2.551628165878096, "grad_norm": 0.27812862396240234, "learning_rate": 6.613901995934858e-07, "loss": 0.3645, "step": 3056 }, { "epoch": 2.5524631227386587, "grad_norm": 0.3061511516571045, "learning_rate": 6.589757725413831e-07, "loss": 0.4209, "step": 3057 }, { "epoch": 2.553298079599221, "grad_norm": 0.2961060404777527, "learning_rate": 6.565654495899487e-07, "loss": 0.4076, "step": 3058 }, { "epoch": 2.554133036459783, "grad_norm": 0.290490061044693, "learning_rate": 6.541592330179542e-07, "loss": 0.3585, "step": 3059 }, { "epoch": 2.554967993320345, "grad_norm": 0.2970578372478485, "learning_rate": 6.517571251002896e-07, "loss": 0.4105, "step": 3060 }, { "epoch": 2.555802950180907, "grad_norm": 0.28861021995544434, "learning_rate": 6.493591281079626e-07, "loss": 0.3862, "step": 3061 }, { "epoch": 2.5566379070414698, "grad_norm": 0.3181731104850769, "learning_rate": 6.469652443080915e-07, "loss": 0.4377, "step": 3062 }, { "epoch": 2.557472863902032, "grad_norm": 0.271915078163147, "learning_rate": 6.445754759639078e-07, "loss": 0.3607, "step": 3063 }, { "epoch": 2.558307820762594, "grad_norm": 0.2593531012535095, "learning_rate": 6.42189825334752e-07, "loss": 0.347, "step": 3064 }, { "epoch": 2.559142777623156, "grad_norm": 0.3050023913383484, "learning_rate": 6.398082946760687e-07, "loss": 0.4221, "step": 3065 }, { "epoch": 2.5599777344837182, "grad_norm": 0.3453284800052643, "learning_rate": 6.37430886239413e-07, "loss": 0.4161, "step": 3066 }, { "epoch": 2.560812691344281, "grad_norm": 0.317165344953537, "learning_rate": 6.350576022724381e-07, "loss": 0.4239, "step": 3067 }, { "epoch": 2.5616476482048425, "grad_norm": 0.2994896173477173, "learning_rate": 6.326884450188991e-07, "loss": 0.4112, "step": 3068 }, { "epoch": 2.562482605065405, "grad_norm": 0.31460121273994446, "learning_rate": 6.3032341671865e-07, "loss": 0.3872, "step": 3069 }, { "epoch": 2.563317561925967, "grad_norm": 0.2864668071269989, "learning_rate": 6.279625196076417e-07, "loss": 0.3659, "step": 3070 }, { "epoch": 2.5641525187865293, "grad_norm": 0.28092479705810547, "learning_rate": 6.256057559179185e-07, "loss": 0.4144, "step": 3071 }, { "epoch": 2.564987475647092, "grad_norm": 0.29539379477500916, "learning_rate": 6.232531278776161e-07, "loss": 0.363, "step": 3072 }, { "epoch": 2.5658224325076535, "grad_norm": 0.29897746443748474, "learning_rate": 6.209046377109646e-07, "loss": 0.4108, "step": 3073 }, { "epoch": 2.566657389368216, "grad_norm": 0.3037329912185669, "learning_rate": 6.185602876382735e-07, "loss": 0.4388, "step": 3074 }, { "epoch": 2.567492346228778, "grad_norm": 0.27571776509284973, "learning_rate": 6.162200798759465e-07, "loss": 0.3826, "step": 3075 }, { "epoch": 2.5683273030893403, "grad_norm": 0.31346753239631653, "learning_rate": 6.138840166364696e-07, "loss": 0.3921, "step": 3076 }, { "epoch": 2.5691622599499024, "grad_norm": 0.2852889597415924, "learning_rate": 6.115521001284047e-07, "loss": 0.3939, "step": 3077 }, { "epoch": 2.5699972168104646, "grad_norm": 0.2944137752056122, "learning_rate": 6.092243325564007e-07, "loss": 0.4076, "step": 3078 }, { "epoch": 2.570832173671027, "grad_norm": 0.3040968179702759, "learning_rate": 6.069007161211793e-07, "loss": 0.3983, "step": 3079 }, { "epoch": 2.5716671305315892, "grad_norm": 0.27055996656417847, "learning_rate": 6.045812530195388e-07, "loss": 0.3792, "step": 3080 }, { "epoch": 2.5725020873921514, "grad_norm": 0.3189413845539093, "learning_rate": 6.02265945444353e-07, "loss": 0.3974, "step": 3081 }, { "epoch": 2.5733370442527135, "grad_norm": 0.3024892210960388, "learning_rate": 5.999547955845631e-07, "loss": 0.3863, "step": 3082 }, { "epoch": 2.5741720011132756, "grad_norm": 0.31292983889579773, "learning_rate": 5.976478056251827e-07, "loss": 0.4216, "step": 3083 }, { "epoch": 2.575006957973838, "grad_norm": 0.28819048404693604, "learning_rate": 5.953449777472903e-07, "loss": 0.3631, "step": 3084 }, { "epoch": 2.5758419148344003, "grad_norm": 0.30028507113456726, "learning_rate": 5.930463141280301e-07, "loss": 0.3983, "step": 3085 }, { "epoch": 2.5766768716949624, "grad_norm": 0.31267279386520386, "learning_rate": 5.907518169406118e-07, "loss": 0.396, "step": 3086 }, { "epoch": 2.5775118285555245, "grad_norm": 0.30855342745780945, "learning_rate": 5.884614883543027e-07, "loss": 0.401, "step": 3087 }, { "epoch": 2.5783467854160866, "grad_norm": 0.301296591758728, "learning_rate": 5.861753305344303e-07, "loss": 0.3984, "step": 3088 }, { "epoch": 2.579181742276649, "grad_norm": 0.2671685814857483, "learning_rate": 5.838933456423779e-07, "loss": 0.378, "step": 3089 }, { "epoch": 2.5800166991372113, "grad_norm": 0.28570717573165894, "learning_rate": 5.816155358355869e-07, "loss": 0.385, "step": 3090 }, { "epoch": 2.5808516559977734, "grad_norm": 0.3162669539451599, "learning_rate": 5.793419032675479e-07, "loss": 0.4335, "step": 3091 }, { "epoch": 2.5816866128583356, "grad_norm": 0.31362971663475037, "learning_rate": 5.770724500878028e-07, "loss": 0.3892, "step": 3092 }, { "epoch": 2.5825215697188977, "grad_norm": 0.2874835133552551, "learning_rate": 5.748071784419462e-07, "loss": 0.3486, "step": 3093 }, { "epoch": 2.5833565265794602, "grad_norm": 0.3122391104698181, "learning_rate": 5.725460904716123e-07, "loss": 0.4435, "step": 3094 }, { "epoch": 2.5841914834400224, "grad_norm": 0.30143582820892334, "learning_rate": 5.702891883144851e-07, "loss": 0.4118, "step": 3095 }, { "epoch": 2.5850264403005845, "grad_norm": 0.2792045474052429, "learning_rate": 5.680364741042926e-07, "loss": 0.3726, "step": 3096 }, { "epoch": 2.5858613971611466, "grad_norm": 0.31508302688598633, "learning_rate": 5.657879499707969e-07, "loss": 0.393, "step": 3097 }, { "epoch": 2.5866963540217087, "grad_norm": 0.30456310510635376, "learning_rate": 5.635436180398052e-07, "loss": 0.405, "step": 3098 }, { "epoch": 2.5875313108822713, "grad_norm": 0.28556835651397705, "learning_rate": 5.61303480433158e-07, "loss": 0.3497, "step": 3099 }, { "epoch": 2.5883662677428334, "grad_norm": 0.31672224402427673, "learning_rate": 5.5906753926873e-07, "loss": 0.4382, "step": 3100 }, { "epoch": 2.5892012246033955, "grad_norm": 0.2870565950870514, "learning_rate": 5.568357966604321e-07, "loss": 0.3831, "step": 3101 }, { "epoch": 2.5900361814639576, "grad_norm": 0.2997681796550751, "learning_rate": 5.546082547182019e-07, "loss": 0.3972, "step": 3102 }, { "epoch": 2.5908711383245198, "grad_norm": 0.29724112153053284, "learning_rate": 5.523849155480065e-07, "loss": 0.3988, "step": 3103 }, { "epoch": 2.5917060951850823, "grad_norm": 0.29456159472465515, "learning_rate": 5.501657812518407e-07, "loss": 0.4075, "step": 3104 }, { "epoch": 2.5925410520456444, "grad_norm": 0.2968977093696594, "learning_rate": 5.479508539277229e-07, "loss": 0.4091, "step": 3105 }, { "epoch": 2.5933760089062066, "grad_norm": 0.3013855814933777, "learning_rate": 5.457401356696951e-07, "loss": 0.3659, "step": 3106 }, { "epoch": 2.5942109657667687, "grad_norm": 0.315599650144577, "learning_rate": 5.435336285678194e-07, "loss": 0.4141, "step": 3107 }, { "epoch": 2.595045922627331, "grad_norm": 0.2845761179924011, "learning_rate": 5.413313347081767e-07, "loss": 0.3778, "step": 3108 }, { "epoch": 2.5958808794878934, "grad_norm": 0.2987121641635895, "learning_rate": 5.391332561728629e-07, "loss": 0.3681, "step": 3109 }, { "epoch": 2.5967158363484555, "grad_norm": 0.3123331069946289, "learning_rate": 5.369393950399926e-07, "loss": 0.425, "step": 3110 }, { "epoch": 2.5975507932090176, "grad_norm": 0.2938949167728424, "learning_rate": 5.347497533836898e-07, "loss": 0.4016, "step": 3111 }, { "epoch": 2.5983857500695797, "grad_norm": 0.30743834376335144, "learning_rate": 5.325643332740887e-07, "loss": 0.3714, "step": 3112 }, { "epoch": 2.599220706930142, "grad_norm": 0.3293297290802002, "learning_rate": 5.303831367773382e-07, "loss": 0.4058, "step": 3113 }, { "epoch": 2.6000556637907044, "grad_norm": 0.2823231816291809, "learning_rate": 5.282061659555854e-07, "loss": 0.4053, "step": 3114 }, { "epoch": 2.600890620651266, "grad_norm": 0.2923983037471771, "learning_rate": 5.260334228669883e-07, "loss": 0.3815, "step": 3115 }, { "epoch": 2.6017255775118286, "grad_norm": 0.29587870836257935, "learning_rate": 5.238649095657089e-07, "loss": 0.4085, "step": 3116 }, { "epoch": 2.6025605343723908, "grad_norm": 0.26880016922950745, "learning_rate": 5.217006281019039e-07, "loss": 0.4014, "step": 3117 }, { "epoch": 2.603395491232953, "grad_norm": 0.28177380561828613, "learning_rate": 5.195405805217358e-07, "loss": 0.3761, "step": 3118 }, { "epoch": 2.604230448093515, "grad_norm": 0.2936265766620636, "learning_rate": 5.173847688673606e-07, "loss": 0.4501, "step": 3119 }, { "epoch": 2.605065404954077, "grad_norm": 0.3112708330154419, "learning_rate": 5.152331951769296e-07, "loss": 0.4039, "step": 3120 }, { "epoch": 2.6059003618146397, "grad_norm": 0.30504611134529114, "learning_rate": 5.130858614845902e-07, "loss": 0.4479, "step": 3121 }, { "epoch": 2.606735318675202, "grad_norm": 0.2952655553817749, "learning_rate": 5.109427698204783e-07, "loss": 0.3438, "step": 3122 }, { "epoch": 2.607570275535764, "grad_norm": 0.30684375762939453, "learning_rate": 5.088039222107205e-07, "loss": 0.4247, "step": 3123 }, { "epoch": 2.608405232396326, "grad_norm": 0.2863970100879669, "learning_rate": 5.066693206774303e-07, "loss": 0.4075, "step": 3124 }, { "epoch": 2.609240189256888, "grad_norm": 0.288350373506546, "learning_rate": 5.045389672387085e-07, "loss": 0.3731, "step": 3125 }, { "epoch": 2.6100751461174507, "grad_norm": 0.3229912519454956, "learning_rate": 5.024128639086362e-07, "loss": 0.4154, "step": 3126 }, { "epoch": 2.610910102978013, "grad_norm": 0.2972664535045624, "learning_rate": 5.002910126972816e-07, "loss": 0.3968, "step": 3127 }, { "epoch": 2.611745059838575, "grad_norm": 0.2984808683395386, "learning_rate": 4.981734156106888e-07, "loss": 0.4036, "step": 3128 }, { "epoch": 2.612580016699137, "grad_norm": 0.30468496680259705, "learning_rate": 4.960600746508798e-07, "loss": 0.4151, "step": 3129 }, { "epoch": 2.613414973559699, "grad_norm": 0.26710349321365356, "learning_rate": 4.939509918158569e-07, "loss": 0.3483, "step": 3130 }, { "epoch": 2.6142499304202618, "grad_norm": 0.319543719291687, "learning_rate": 4.918461690995907e-07, "loss": 0.4008, "step": 3131 }, { "epoch": 2.615084887280824, "grad_norm": 0.2854347229003906, "learning_rate": 4.897456084920282e-07, "loss": 0.348, "step": 3132 }, { "epoch": 2.615919844141386, "grad_norm": 0.3234369456768036, "learning_rate": 4.876493119790887e-07, "loss": 0.4607, "step": 3133 }, { "epoch": 2.616754801001948, "grad_norm": 0.2809482514858246, "learning_rate": 4.855572815426535e-07, "loss": 0.3893, "step": 3134 }, { "epoch": 2.6175897578625102, "grad_norm": 0.2958315312862396, "learning_rate": 4.834695191605771e-07, "loss": 0.3881, "step": 3135 }, { "epoch": 2.618424714723073, "grad_norm": 0.2989239990711212, "learning_rate": 4.813860268066762e-07, "loss": 0.4155, "step": 3136 }, { "epoch": 2.619259671583635, "grad_norm": 0.2967926561832428, "learning_rate": 4.793068064507289e-07, "loss": 0.3706, "step": 3137 }, { "epoch": 2.620094628444197, "grad_norm": 0.29635003209114075, "learning_rate": 4.772318600584791e-07, "loss": 0.3973, "step": 3138 }, { "epoch": 2.620929585304759, "grad_norm": 0.2900838255882263, "learning_rate": 4.7516118959162525e-07, "loss": 0.3703, "step": 3139 }, { "epoch": 2.6217645421653213, "grad_norm": 0.2888679802417755, "learning_rate": 4.730947970078259e-07, "loss": 0.398, "step": 3140 }, { "epoch": 2.622599499025884, "grad_norm": 0.2639780640602112, "learning_rate": 4.710326842606927e-07, "loss": 0.3888, "step": 3141 }, { "epoch": 2.623434455886446, "grad_norm": 0.2794172465801239, "learning_rate": 4.6897485329979564e-07, "loss": 0.3949, "step": 3142 }, { "epoch": 2.624269412747008, "grad_norm": 0.3142341077327728, "learning_rate": 4.669213060706529e-07, "loss": 0.3919, "step": 3143 }, { "epoch": 2.62510436960757, "grad_norm": 0.3190731406211853, "learning_rate": 4.6487204451473256e-07, "loss": 0.4075, "step": 3144 }, { "epoch": 2.6259393264681323, "grad_norm": 0.32923221588134766, "learning_rate": 4.628270705694532e-07, "loss": 0.3868, "step": 3145 }, { "epoch": 2.626774283328695, "grad_norm": 0.2945149838924408, "learning_rate": 4.607863861681766e-07, "loss": 0.4033, "step": 3146 }, { "epoch": 2.627609240189257, "grad_norm": 0.29678893089294434, "learning_rate": 4.587499932402134e-07, "loss": 0.3955, "step": 3147 }, { "epoch": 2.628444197049819, "grad_norm": 0.3016296625137329, "learning_rate": 4.567178937108141e-07, "loss": 0.3991, "step": 3148 }, { "epoch": 2.6292791539103813, "grad_norm": 0.28719398379325867, "learning_rate": 4.5469008950116913e-07, "loss": 0.397, "step": 3149 }, { "epoch": 2.6301141107709434, "grad_norm": 0.30168670415878296, "learning_rate": 4.526665825284132e-07, "loss": 0.4005, "step": 3150 }, { "epoch": 2.630949067631506, "grad_norm": 0.3102165460586548, "learning_rate": 4.5064737470561003e-07, "loss": 0.3908, "step": 3151 }, { "epoch": 2.631784024492068, "grad_norm": 0.3179287016391754, "learning_rate": 4.4863246794176587e-07, "loss": 0.4219, "step": 3152 }, { "epoch": 2.63261898135263, "grad_norm": 0.2786749303340912, "learning_rate": 4.466218641418202e-07, "loss": 0.3775, "step": 3153 }, { "epoch": 2.6334539382131923, "grad_norm": 0.30495280027389526, "learning_rate": 4.4461556520663917e-07, "loss": 0.3531, "step": 3154 }, { "epoch": 2.6342888950737544, "grad_norm": 0.3067608177661896, "learning_rate": 4.4261357303302386e-07, "loss": 0.4063, "step": 3155 }, { "epoch": 2.635123851934317, "grad_norm": 0.2966153621673584, "learning_rate": 4.406158895137025e-07, "loss": 0.3926, "step": 3156 }, { "epoch": 2.635958808794879, "grad_norm": 0.3182118833065033, "learning_rate": 4.386225165373281e-07, "loss": 0.4148, "step": 3157 }, { "epoch": 2.636793765655441, "grad_norm": 0.2885640561580658, "learning_rate": 4.3663345598848104e-07, "loss": 0.3819, "step": 3158 }, { "epoch": 2.6376287225160033, "grad_norm": 0.3093430995941162, "learning_rate": 4.3464870974766314e-07, "loss": 0.4058, "step": 3159 }, { "epoch": 2.6384636793765655, "grad_norm": 0.27058374881744385, "learning_rate": 4.3266827969129734e-07, "loss": 0.3262, "step": 3160 }, { "epoch": 2.639298636237128, "grad_norm": 0.3041297495365143, "learning_rate": 4.306921676917247e-07, "loss": 0.4002, "step": 3161 }, { "epoch": 2.6401335930976897, "grad_norm": 0.3077486753463745, "learning_rate": 4.287203756172076e-07, "loss": 0.42, "step": 3162 }, { "epoch": 2.6409685499582523, "grad_norm": 0.30166515707969666, "learning_rate": 4.267529053319214e-07, "loss": 0.3831, "step": 3163 }, { "epoch": 2.6418035068188144, "grad_norm": 0.281514048576355, "learning_rate": 4.2478975869595594e-07, "loss": 0.3712, "step": 3164 }, { "epoch": 2.6426384636793765, "grad_norm": 0.30450087785720825, "learning_rate": 4.228309375653139e-07, "loss": 0.4462, "step": 3165 }, { "epoch": 2.6434734205399386, "grad_norm": 0.2857242524623871, "learning_rate": 4.2087644379190715e-07, "loss": 0.3599, "step": 3166 }, { "epoch": 2.6443083774005007, "grad_norm": 0.28509145975112915, "learning_rate": 4.189262792235588e-07, "loss": 0.4135, "step": 3167 }, { "epoch": 2.6451433342610633, "grad_norm": 0.2985331118106842, "learning_rate": 4.169804457039972e-07, "loss": 0.3936, "step": 3168 }, { "epoch": 2.6459782911216254, "grad_norm": 0.28128811717033386, "learning_rate": 4.150389450728559e-07, "loss": 0.3463, "step": 3169 }, { "epoch": 2.6468132479821875, "grad_norm": 0.276187926530838, "learning_rate": 4.131017791656755e-07, "loss": 0.3779, "step": 3170 }, { "epoch": 2.6476482048427497, "grad_norm": 0.286660760641098, "learning_rate": 4.1116894981389156e-07, "loss": 0.388, "step": 3171 }, { "epoch": 2.6484831617033118, "grad_norm": 0.2998400926589966, "learning_rate": 4.0924045884484564e-07, "loss": 0.4051, "step": 3172 }, { "epoch": 2.6493181185638743, "grad_norm": 0.29918837547302246, "learning_rate": 4.0731630808177825e-07, "loss": 0.3801, "step": 3173 }, { "epoch": 2.6501530754244365, "grad_norm": 0.3004518151283264, "learning_rate": 4.053964993438203e-07, "loss": 0.3624, "step": 3174 }, { "epoch": 2.6509880322849986, "grad_norm": 0.30570143461227417, "learning_rate": 4.034810344460038e-07, "loss": 0.39, "step": 3175 }, { "epoch": 2.6518229891455607, "grad_norm": 0.323946088552475, "learning_rate": 4.015699151992519e-07, "loss": 0.4649, "step": 3176 }, { "epoch": 2.652657946006123, "grad_norm": 0.2721903622150421, "learning_rate": 3.996631434103776e-07, "loss": 0.3305, "step": 3177 }, { "epoch": 2.6534929028666854, "grad_norm": 0.32831501960754395, "learning_rate": 3.977607208820872e-07, "loss": 0.3954, "step": 3178 }, { "epoch": 2.6543278597272475, "grad_norm": 0.2861386835575104, "learning_rate": 3.958626494129725e-07, "loss": 0.3845, "step": 3179 }, { "epoch": 2.6551628165878096, "grad_norm": 0.28329145908355713, "learning_rate": 3.9396893079751197e-07, "loss": 0.4034, "step": 3180 }, { "epoch": 2.6559977734483717, "grad_norm": 0.2848740220069885, "learning_rate": 3.920795668260685e-07, "loss": 0.362, "step": 3181 }, { "epoch": 2.656832730308934, "grad_norm": 0.29888755083084106, "learning_rate": 3.901945592848905e-07, "loss": 0.4009, "step": 3182 }, { "epoch": 2.6576676871694964, "grad_norm": 0.29035407304763794, "learning_rate": 3.8831390995610507e-07, "loss": 0.4071, "step": 3183 }, { "epoch": 2.6585026440300585, "grad_norm": 0.26028668880462646, "learning_rate": 3.864376206177206e-07, "loss": 0.3487, "step": 3184 }, { "epoch": 2.6593376008906207, "grad_norm": 0.2737889885902405, "learning_rate": 3.8456569304362146e-07, "loss": 0.3818, "step": 3185 }, { "epoch": 2.6601725577511828, "grad_norm": 0.29555177688598633, "learning_rate": 3.826981290035692e-07, "loss": 0.4361, "step": 3186 }, { "epoch": 2.661007514611745, "grad_norm": 0.2926006317138672, "learning_rate": 3.8083493026320193e-07, "loss": 0.3969, "step": 3187 }, { "epoch": 2.6618424714723075, "grad_norm": 0.3330947160720825, "learning_rate": 3.7897609858402853e-07, "loss": 0.3992, "step": 3188 }, { "epoch": 2.6626774283328696, "grad_norm": 0.2921561300754547, "learning_rate": 3.7712163572342864e-07, "loss": 0.4103, "step": 3189 }, { "epoch": 2.6635123851934317, "grad_norm": 0.31599071621894836, "learning_rate": 3.752715434346549e-07, "loss": 0.4492, "step": 3190 }, { "epoch": 2.664347342053994, "grad_norm": 0.27568385004997253, "learning_rate": 3.734258234668231e-07, "loss": 0.3426, "step": 3191 }, { "epoch": 2.665182298914556, "grad_norm": 0.3078334927558899, "learning_rate": 3.7158447756491855e-07, "loss": 0.4049, "step": 3192 }, { "epoch": 2.6660172557751185, "grad_norm": 0.29565152525901794, "learning_rate": 3.6974750746979316e-07, "loss": 0.3931, "step": 3193 }, { "epoch": 2.6668522126356806, "grad_norm": 0.30603206157684326, "learning_rate": 3.679149149181549e-07, "loss": 0.3929, "step": 3194 }, { "epoch": 2.6676871694962427, "grad_norm": 0.28209349513053894, "learning_rate": 3.6608670164258065e-07, "loss": 0.3614, "step": 3195 }, { "epoch": 2.668522126356805, "grad_norm": 0.2805164158344269, "learning_rate": 3.642628693715028e-07, "loss": 0.3848, "step": 3196 }, { "epoch": 2.669357083217367, "grad_norm": 0.3302933871746063, "learning_rate": 3.6244341982921203e-07, "loss": 0.4093, "step": 3197 }, { "epoch": 2.6701920400779295, "grad_norm": 0.2979625463485718, "learning_rate": 3.60628354735858e-07, "loss": 0.4224, "step": 3198 }, { "epoch": 2.6710269969384917, "grad_norm": 0.3004210293292999, "learning_rate": 3.5881767580744243e-07, "loss": 0.4011, "step": 3199 }, { "epoch": 2.671861953799054, "grad_norm": 0.29716500639915466, "learning_rate": 3.570113847558221e-07, "loss": 0.3337, "step": 3200 }, { "epoch": 2.672696910659616, "grad_norm": 0.28661277890205383, "learning_rate": 3.552094832887032e-07, "loss": 0.4312, "step": 3201 }, { "epoch": 2.673531867520178, "grad_norm": 0.2932738661766052, "learning_rate": 3.5341197310964524e-07, "loss": 0.3837, "step": 3202 }, { "epoch": 2.6743668243807406, "grad_norm": 0.27754685282707214, "learning_rate": 3.516188559180533e-07, "loss": 0.3905, "step": 3203 }, { "epoch": 2.6752017812413023, "grad_norm": 0.32276299595832825, "learning_rate": 3.4983013340918024e-07, "loss": 0.4283, "step": 3204 }, { "epoch": 2.676036738101865, "grad_norm": 0.26605263352394104, "learning_rate": 3.480458072741244e-07, "loss": 0.3533, "step": 3205 }, { "epoch": 2.676871694962427, "grad_norm": 0.29764091968536377, "learning_rate": 3.462658791998258e-07, "loss": 0.4107, "step": 3206 }, { "epoch": 2.677706651822989, "grad_norm": 0.27201640605926514, "learning_rate": 3.4449035086907003e-07, "loss": 0.3948, "step": 3207 }, { "epoch": 2.6785416086835516, "grad_norm": 0.2941596210002899, "learning_rate": 3.4271922396047986e-07, "loss": 0.384, "step": 3208 }, { "epoch": 2.6793765655441133, "grad_norm": 0.2913140654563904, "learning_rate": 3.4095250014851755e-07, "loss": 0.4025, "step": 3209 }, { "epoch": 2.680211522404676, "grad_norm": 0.2973935008049011, "learning_rate": 3.3919018110348533e-07, "loss": 0.394, "step": 3210 }, { "epoch": 2.681046479265238, "grad_norm": 0.2821689248085022, "learning_rate": 3.374322684915154e-07, "loss": 0.3541, "step": 3211 }, { "epoch": 2.6818814361258, "grad_norm": 0.29130056500434875, "learning_rate": 3.3567876397457955e-07, "loss": 0.3976, "step": 3212 }, { "epoch": 2.682716392986362, "grad_norm": 0.29645460844039917, "learning_rate": 3.3392966921047984e-07, "loss": 0.396, "step": 3213 }, { "epoch": 2.6835513498469243, "grad_norm": 0.28302252292633057, "learning_rate": 3.3218498585284807e-07, "loss": 0.365, "step": 3214 }, { "epoch": 2.684386306707487, "grad_norm": 0.2960718870162964, "learning_rate": 3.304447155511481e-07, "loss": 0.3755, "step": 3215 }, { "epoch": 2.685221263568049, "grad_norm": 0.29418283700942993, "learning_rate": 3.2870885995066883e-07, "loss": 0.3944, "step": 3216 }, { "epoch": 2.686056220428611, "grad_norm": 0.29898902773857117, "learning_rate": 3.2697742069252746e-07, "loss": 0.3899, "step": 3217 }, { "epoch": 2.6868911772891733, "grad_norm": 0.28057029843330383, "learning_rate": 3.2525039941366463e-07, "loss": 0.377, "step": 3218 }, { "epoch": 2.6877261341497354, "grad_norm": 0.3145003318786621, "learning_rate": 3.235277977468448e-07, "loss": 0.4517, "step": 3219 }, { "epoch": 2.688561091010298, "grad_norm": 0.29449066519737244, "learning_rate": 3.218096173206542e-07, "loss": 0.4008, "step": 3220 }, { "epoch": 2.68939604787086, "grad_norm": 0.2705121636390686, "learning_rate": 3.200958597594983e-07, "loss": 0.4026, "step": 3221 }, { "epoch": 2.690231004731422, "grad_norm": 0.29345571994781494, "learning_rate": 3.1838652668360173e-07, "loss": 0.33, "step": 3222 }, { "epoch": 2.6910659615919843, "grad_norm": 0.3228539228439331, "learning_rate": 3.16681619709005e-07, "loss": 0.4077, "step": 3223 }, { "epoch": 2.6919009184525464, "grad_norm": 0.2970106303691864, "learning_rate": 3.149811404475667e-07, "loss": 0.4049, "step": 3224 }, { "epoch": 2.692735875313109, "grad_norm": 0.27612966299057007, "learning_rate": 3.132850905069568e-07, "loss": 0.3462, "step": 3225 }, { "epoch": 2.693570832173671, "grad_norm": 0.3278336822986603, "learning_rate": 3.1159347149065764e-07, "loss": 0.4123, "step": 3226 }, { "epoch": 2.6944057890342332, "grad_norm": 0.30061855912208557, "learning_rate": 3.0990628499796503e-07, "loss": 0.3913, "step": 3227 }, { "epoch": 2.6952407458947953, "grad_norm": 0.3105267286300659, "learning_rate": 3.0822353262398185e-07, "loss": 0.411, "step": 3228 }, { "epoch": 2.6960757027553575, "grad_norm": 0.28079622983932495, "learning_rate": 3.0654521595961883e-07, "loss": 0.3379, "step": 3229 }, { "epoch": 2.69691065961592, "grad_norm": 0.3088369071483612, "learning_rate": 3.0487133659159616e-07, "loss": 0.3956, "step": 3230 }, { "epoch": 2.697745616476482, "grad_norm": 0.2792402505874634, "learning_rate": 3.0320189610243303e-07, "loss": 0.3824, "step": 3231 }, { "epoch": 2.6985805733370443, "grad_norm": 0.27303266525268555, "learning_rate": 3.015368960704584e-07, "loss": 0.3758, "step": 3232 }, { "epoch": 2.6994155301976064, "grad_norm": 0.3063524663448334, "learning_rate": 2.9987633806979886e-07, "loss": 0.3989, "step": 3233 }, { "epoch": 2.7002504870581685, "grad_norm": 0.27675357460975647, "learning_rate": 2.9822022367038295e-07, "loss": 0.3632, "step": 3234 }, { "epoch": 2.701085443918731, "grad_norm": 0.2891473174095154, "learning_rate": 2.9656855443793876e-07, "loss": 0.4225, "step": 3235 }, { "epoch": 2.701920400779293, "grad_norm": 0.2821783125400543, "learning_rate": 2.949213319339905e-07, "loss": 0.3614, "step": 3236 }, { "epoch": 2.7027553576398553, "grad_norm": 0.31188368797302246, "learning_rate": 2.9327855771585867e-07, "loss": 0.4152, "step": 3237 }, { "epoch": 2.7035903145004174, "grad_norm": 0.28410130739212036, "learning_rate": 2.9164023333665804e-07, "loss": 0.3982, "step": 3238 }, { "epoch": 2.7044252713609795, "grad_norm": 0.28793710470199585, "learning_rate": 2.900063603452985e-07, "loss": 0.4039, "step": 3239 }, { "epoch": 2.705260228221542, "grad_norm": 0.2789645195007324, "learning_rate": 2.883769402864789e-07, "loss": 0.3789, "step": 3240 }, { "epoch": 2.7060951850821042, "grad_norm": 0.2970862090587616, "learning_rate": 2.867519747006886e-07, "loss": 0.3808, "step": 3241 }, { "epoch": 2.7069301419426663, "grad_norm": 0.2991337478160858, "learning_rate": 2.851314651242071e-07, "loss": 0.3882, "step": 3242 }, { "epoch": 2.7077650988032285, "grad_norm": 0.3134364187717438, "learning_rate": 2.835154130890988e-07, "loss": 0.4227, "step": 3243 }, { "epoch": 2.7086000556637906, "grad_norm": 0.2957904636859894, "learning_rate": 2.819038201232166e-07, "loss": 0.392, "step": 3244 }, { "epoch": 2.709435012524353, "grad_norm": 0.30339735746383667, "learning_rate": 2.802966877501956e-07, "loss": 0.4051, "step": 3245 }, { "epoch": 2.7102699693849153, "grad_norm": 0.2877518832683563, "learning_rate": 2.7869401748945426e-07, "loss": 0.3658, "step": 3246 }, { "epoch": 2.7111049262454774, "grad_norm": 0.3251330256462097, "learning_rate": 2.770958108561933e-07, "loss": 0.3998, "step": 3247 }, { "epoch": 2.7119398831060395, "grad_norm": 0.29799577593803406, "learning_rate": 2.755020693613919e-07, "loss": 0.3851, "step": 3248 }, { "epoch": 2.7127748399666016, "grad_norm": 0.31254372000694275, "learning_rate": 2.739127945118092e-07, "loss": 0.4287, "step": 3249 }, { "epoch": 2.713609796827164, "grad_norm": 0.3040291368961334, "learning_rate": 2.723279878099816e-07, "loss": 0.4134, "step": 3250 }, { "epoch": 2.714444753687726, "grad_norm": 0.29454585909843445, "learning_rate": 2.7074765075421837e-07, "loss": 0.4196, "step": 3251 }, { "epoch": 2.7152797105482884, "grad_norm": 0.2901422381401062, "learning_rate": 2.6917178483860776e-07, "loss": 0.4239, "step": 3252 }, { "epoch": 2.7161146674088505, "grad_norm": 0.2930153012275696, "learning_rate": 2.6760039155300677e-07, "loss": 0.3864, "step": 3253 }, { "epoch": 2.7169496242694127, "grad_norm": 0.3152342438697815, "learning_rate": 2.6603347238304545e-07, "loss": 0.4208, "step": 3254 }, { "epoch": 2.717784581129975, "grad_norm": 0.30063188076019287, "learning_rate": 2.6447102881012487e-07, "loss": 0.3992, "step": 3255 }, { "epoch": 2.718619537990537, "grad_norm": 0.2745400369167328, "learning_rate": 2.6291306231141347e-07, "loss": 0.3587, "step": 3256 }, { "epoch": 2.7194544948510995, "grad_norm": 0.2757989764213562, "learning_rate": 2.613595743598474e-07, "loss": 0.3982, "step": 3257 }, { "epoch": 2.7202894517116616, "grad_norm": 0.2783917486667633, "learning_rate": 2.5981056642412796e-07, "loss": 0.4034, "step": 3258 }, { "epoch": 2.7211244085722237, "grad_norm": 0.2636207044124603, "learning_rate": 2.5826603996872313e-07, "loss": 0.3758, "step": 3259 }, { "epoch": 2.721959365432786, "grad_norm": 0.2862760126590729, "learning_rate": 2.567259964538615e-07, "loss": 0.3876, "step": 3260 }, { "epoch": 2.722794322293348, "grad_norm": 0.29196932911872864, "learning_rate": 2.551904373355346e-07, "loss": 0.3806, "step": 3261 }, { "epoch": 2.7236292791539105, "grad_norm": 0.29830729961395264, "learning_rate": 2.536593640654944e-07, "loss": 0.431, "step": 3262 }, { "epoch": 2.7244642360144726, "grad_norm": 0.260487824678421, "learning_rate": 2.5213277809125026e-07, "loss": 0.3303, "step": 3263 }, { "epoch": 2.7252991928750347, "grad_norm": 0.30351221561431885, "learning_rate": 2.506106808560721e-07, "loss": 0.4394, "step": 3264 }, { "epoch": 2.726134149735597, "grad_norm": 0.29640403389930725, "learning_rate": 2.490930737989833e-07, "loss": 0.3985, "step": 3265 }, { "epoch": 2.726969106596159, "grad_norm": 0.2904702425003052, "learning_rate": 2.4757995835476334e-07, "loss": 0.3757, "step": 3266 }, { "epoch": 2.7278040634567216, "grad_norm": 0.2905628979206085, "learning_rate": 2.460713359539474e-07, "loss": 0.42, "step": 3267 }, { "epoch": 2.7286390203172837, "grad_norm": 0.2765868306159973, "learning_rate": 2.4456720802281675e-07, "loss": 0.3957, "step": 3268 }, { "epoch": 2.729473977177846, "grad_norm": 0.276748389005661, "learning_rate": 2.4306757598340944e-07, "loss": 0.3758, "step": 3269 }, { "epoch": 2.730308934038408, "grad_norm": 0.2965172529220581, "learning_rate": 2.4157244125351196e-07, "loss": 0.3839, "step": 3270 }, { "epoch": 2.73114389089897, "grad_norm": 0.2780552804470062, "learning_rate": 2.4008180524665525e-07, "loss": 0.3833, "step": 3271 }, { "epoch": 2.7319788477595326, "grad_norm": 0.3195589482784271, "learning_rate": 2.385956693721214e-07, "loss": 0.447, "step": 3272 }, { "epoch": 2.7328138046200947, "grad_norm": 0.26233604550361633, "learning_rate": 2.3711403503493657e-07, "loss": 0.339, "step": 3273 }, { "epoch": 2.733648761480657, "grad_norm": 0.29531994462013245, "learning_rate": 2.3563690363586912e-07, "loss": 0.4636, "step": 3274 }, { "epoch": 2.734483718341219, "grad_norm": 0.2805086374282837, "learning_rate": 2.341642765714336e-07, "loss": 0.4113, "step": 3275 }, { "epoch": 2.735318675201781, "grad_norm": 0.2880486845970154, "learning_rate": 2.3269615523388355e-07, "loss": 0.3737, "step": 3276 }, { "epoch": 2.7361536320623436, "grad_norm": 0.2878006100654602, "learning_rate": 2.3123254101121373e-07, "loss": 0.3882, "step": 3277 }, { "epoch": 2.7369885889229058, "grad_norm": 0.29284629225730896, "learning_rate": 2.2977343528715613e-07, "loss": 0.371, "step": 3278 }, { "epoch": 2.737823545783468, "grad_norm": 0.29953068494796753, "learning_rate": 2.2831883944118393e-07, "loss": 0.4174, "step": 3279 }, { "epoch": 2.73865850264403, "grad_norm": 0.2816229462623596, "learning_rate": 2.2686875484850268e-07, "loss": 0.3903, "step": 3280 }, { "epoch": 2.739493459504592, "grad_norm": 0.3020474910736084, "learning_rate": 2.254231828800557e-07, "loss": 0.3985, "step": 3281 }, { "epoch": 2.7403284163651547, "grad_norm": 0.30186331272125244, "learning_rate": 2.2398212490251758e-07, "loss": 0.3766, "step": 3282 }, { "epoch": 2.741163373225717, "grad_norm": 0.2894587516784668, "learning_rate": 2.2254558227829737e-07, "loss": 0.3808, "step": 3283 }, { "epoch": 2.741998330086279, "grad_norm": 0.31551793217658997, "learning_rate": 2.2111355636553422e-07, "loss": 0.4469, "step": 3284 }, { "epoch": 2.742833286946841, "grad_norm": 0.3084787130355835, "learning_rate": 2.1968604851809738e-07, "loss": 0.3853, "step": 3285 }, { "epoch": 2.743668243807403, "grad_norm": 0.29735687375068665, "learning_rate": 2.182630600855834e-07, "loss": 0.3982, "step": 3286 }, { "epoch": 2.7445032006679657, "grad_norm": 0.3036224842071533, "learning_rate": 2.168445924133189e-07, "loss": 0.357, "step": 3287 }, { "epoch": 2.745338157528528, "grad_norm": 0.2884233891963959, "learning_rate": 2.1543064684235283e-07, "loss": 0.4031, "step": 3288 }, { "epoch": 2.74617311438909, "grad_norm": 0.30598336458206177, "learning_rate": 2.1402122470946196e-07, "loss": 0.3991, "step": 3289 }, { "epoch": 2.747008071249652, "grad_norm": 0.2911061644554138, "learning_rate": 2.12616327347146e-07, "loss": 0.3889, "step": 3290 }, { "epoch": 2.747843028110214, "grad_norm": 0.28520235419273376, "learning_rate": 2.1121595608362354e-07, "loss": 0.361, "step": 3291 }, { "epoch": 2.7486779849707768, "grad_norm": 0.29023638367652893, "learning_rate": 2.0982011224283894e-07, "loss": 0.3727, "step": 3292 }, { "epoch": 2.749512941831339, "grad_norm": 0.2908984124660492, "learning_rate": 2.0842879714445318e-07, "loss": 0.3895, "step": 3293 }, { "epoch": 2.750347898691901, "grad_norm": 0.30383676290512085, "learning_rate": 2.0704201210384634e-07, "loss": 0.411, "step": 3294 }, { "epoch": 2.751182855552463, "grad_norm": 0.2967575490474701, "learning_rate": 2.0565975843211515e-07, "loss": 0.416, "step": 3295 }, { "epoch": 2.7520178124130252, "grad_norm": 0.2865573465824127, "learning_rate": 2.0428203743607488e-07, "loss": 0.3796, "step": 3296 }, { "epoch": 2.752852769273588, "grad_norm": 0.29819202423095703, "learning_rate": 2.0290885041825192e-07, "loss": 0.3919, "step": 3297 }, { "epoch": 2.7536877261341495, "grad_norm": 0.3099372684955597, "learning_rate": 2.0154019867688724e-07, "loss": 0.397, "step": 3298 }, { "epoch": 2.754522682994712, "grad_norm": 0.2752944529056549, "learning_rate": 2.0017608350593687e-07, "loss": 0.376, "step": 3299 }, { "epoch": 2.755357639855274, "grad_norm": 0.32046544551849365, "learning_rate": 1.988165061950631e-07, "loss": 0.4321, "step": 3300 }, { "epoch": 2.7561925967158363, "grad_norm": 0.285174161195755, "learning_rate": 1.9746146802964216e-07, "loss": 0.4048, "step": 3301 }, { "epoch": 2.7570275535763984, "grad_norm": 0.28515589237213135, "learning_rate": 1.961109702907571e-07, "loss": 0.3575, "step": 3302 }, { "epoch": 2.7578625104369605, "grad_norm": 0.28896623849868774, "learning_rate": 1.9476501425519656e-07, "loss": 0.4005, "step": 3303 }, { "epoch": 2.758697467297523, "grad_norm": 0.2859111726284027, "learning_rate": 1.9342360119545987e-07, "loss": 0.3725, "step": 3304 }, { "epoch": 2.759532424158085, "grad_norm": 0.2787877321243286, "learning_rate": 1.9208673237974708e-07, "loss": 0.3964, "step": 3305 }, { "epoch": 2.7603673810186473, "grad_norm": 0.2871955633163452, "learning_rate": 1.9075440907196375e-07, "loss": 0.3964, "step": 3306 }, { "epoch": 2.7612023378792094, "grad_norm": 0.301570326089859, "learning_rate": 1.8942663253171957e-07, "loss": 0.4038, "step": 3307 }, { "epoch": 2.7620372947397716, "grad_norm": 0.3053889870643616, "learning_rate": 1.8810340401432148e-07, "loss": 0.4008, "step": 3308 }, { "epoch": 2.762872251600334, "grad_norm": 0.30205580592155457, "learning_rate": 1.8678472477078047e-07, "loss": 0.3741, "step": 3309 }, { "epoch": 2.7637072084608962, "grad_norm": 0.3049730658531189, "learning_rate": 1.8547059604780537e-07, "loss": 0.4191, "step": 3310 }, { "epoch": 2.7645421653214584, "grad_norm": 0.28498002886772156, "learning_rate": 1.8416101908780126e-07, "loss": 0.4253, "step": 3311 }, { "epoch": 2.7653771221820205, "grad_norm": 0.26134607195854187, "learning_rate": 1.828559951288733e-07, "loss": 0.3636, "step": 3312 }, { "epoch": 2.7662120790425826, "grad_norm": 0.29621556401252747, "learning_rate": 1.8155552540481847e-07, "loss": 0.4253, "step": 3313 }, { "epoch": 2.767047035903145, "grad_norm": 0.2809688448905945, "learning_rate": 1.8025961114513101e-07, "loss": 0.3793, "step": 3314 }, { "epoch": 2.7678819927637073, "grad_norm": 0.31910303235054016, "learning_rate": 1.789682535749948e-07, "loss": 0.4256, "step": 3315 }, { "epoch": 2.7687169496242694, "grad_norm": 0.29313430190086365, "learning_rate": 1.77681453915291e-07, "loss": 0.4073, "step": 3316 }, { "epoch": 2.7695519064848315, "grad_norm": 0.27084222435951233, "learning_rate": 1.763992133825865e-07, "loss": 0.342, "step": 3317 }, { "epoch": 2.7703868633453936, "grad_norm": 0.2874617576599121, "learning_rate": 1.7512153318913983e-07, "loss": 0.4131, "step": 3318 }, { "epoch": 2.771221820205956, "grad_norm": 0.2984589636325836, "learning_rate": 1.7384841454290034e-07, "loss": 0.4062, "step": 3319 }, { "epoch": 2.7720567770665183, "grad_norm": 0.2846328020095825, "learning_rate": 1.7257985864750027e-07, "loss": 0.3959, "step": 3320 }, { "epoch": 2.7728917339270804, "grad_norm": 0.29717233777046204, "learning_rate": 1.713158667022613e-07, "loss": 0.3971, "step": 3321 }, { "epoch": 2.7737266907876426, "grad_norm": 0.27909910678863525, "learning_rate": 1.7005643990219033e-07, "loss": 0.3956, "step": 3322 }, { "epoch": 2.7745616476482047, "grad_norm": 0.2773790955543518, "learning_rate": 1.6880157943797547e-07, "loss": 0.3744, "step": 3323 }, { "epoch": 2.7753966045087672, "grad_norm": 0.29043105244636536, "learning_rate": 1.6755128649599218e-07, "loss": 0.4291, "step": 3324 }, { "epoch": 2.7762315613693294, "grad_norm": 0.2938615679740906, "learning_rate": 1.663055622582932e-07, "loss": 0.3547, "step": 3325 }, { "epoch": 2.7770665182298915, "grad_norm": 0.3015250265598297, "learning_rate": 1.6506440790261424e-07, "loss": 0.4009, "step": 3326 }, { "epoch": 2.7779014750904536, "grad_norm": 0.304777592420578, "learning_rate": 1.638278246023717e-07, "loss": 0.4522, "step": 3327 }, { "epoch": 2.7787364319510157, "grad_norm": 0.28244906663894653, "learning_rate": 1.62595813526657e-07, "loss": 0.3869, "step": 3328 }, { "epoch": 2.7795713888115783, "grad_norm": 0.29905304312705994, "learning_rate": 1.6136837584024123e-07, "loss": 0.3822, "step": 3329 }, { "epoch": 2.7804063456721404, "grad_norm": 0.30201178789138794, "learning_rate": 1.601455127035717e-07, "loss": 0.4005, "step": 3330 }, { "epoch": 2.7812413025327025, "grad_norm": 0.2852819561958313, "learning_rate": 1.589272252727697e-07, "loss": 0.3654, "step": 3331 }, { "epoch": 2.7820762593932646, "grad_norm": 0.29843905568122864, "learning_rate": 1.5771351469963226e-07, "loss": 0.3786, "step": 3332 }, { "epoch": 2.7829112162538268, "grad_norm": 0.2945028841495514, "learning_rate": 1.56504382131627e-07, "loss": 0.4244, "step": 3333 }, { "epoch": 2.7837461731143893, "grad_norm": 0.26812922954559326, "learning_rate": 1.5529982871189565e-07, "loss": 0.3703, "step": 3334 }, { "epoch": 2.7845811299749514, "grad_norm": 0.2844063341617584, "learning_rate": 1.5409985557924945e-07, "loss": 0.4059, "step": 3335 }, { "epoch": 2.7854160868355136, "grad_norm": 0.29433467984199524, "learning_rate": 1.5290446386816927e-07, "loss": 0.3945, "step": 3336 }, { "epoch": 2.7862510436960757, "grad_norm": 0.28736263513565063, "learning_rate": 1.5171365470880606e-07, "loss": 0.3733, "step": 3337 }, { "epoch": 2.787086000556638, "grad_norm": 0.2973058223724365, "learning_rate": 1.505274292269754e-07, "loss": 0.4184, "step": 3338 }, { "epoch": 2.7879209574172004, "grad_norm": 0.2599060833454132, "learning_rate": 1.4934578854416403e-07, "loss": 0.3624, "step": 3339 }, { "epoch": 2.7887559142777625, "grad_norm": 0.28850245475769043, "learning_rate": 1.4816873377751785e-07, "loss": 0.4098, "step": 3340 }, { "epoch": 2.7895908711383246, "grad_norm": 0.2744338810443878, "learning_rate": 1.4699626603985273e-07, "loss": 0.4105, "step": 3341 }, { "epoch": 2.7904258279988867, "grad_norm": 0.28122982382774353, "learning_rate": 1.458283864396459e-07, "loss": 0.3733, "step": 3342 }, { "epoch": 2.791260784859449, "grad_norm": 0.2926989197731018, "learning_rate": 1.4466509608103475e-07, "loss": 0.4279, "step": 3343 }, { "epoch": 2.7920957417200114, "grad_norm": 0.29984942078590393, "learning_rate": 1.4350639606382165e-07, "loss": 0.3734, "step": 3344 }, { "epoch": 2.792930698580573, "grad_norm": 0.31706562638282776, "learning_rate": 1.4235228748346707e-07, "loss": 0.413, "step": 3345 }, { "epoch": 2.7937656554411356, "grad_norm": 0.2874121367931366, "learning_rate": 1.4120277143108984e-07, "loss": 0.3842, "step": 3346 }, { "epoch": 2.7946006123016978, "grad_norm": 0.30415087938308716, "learning_rate": 1.400578489934695e-07, "loss": 0.421, "step": 3347 }, { "epoch": 2.79543556916226, "grad_norm": 0.2772625982761383, "learning_rate": 1.389175212530397e-07, "loss": 0.3642, "step": 3348 }, { "epoch": 2.796270526022822, "grad_norm": 0.29067105054855347, "learning_rate": 1.3778178928789243e-07, "loss": 0.3952, "step": 3349 }, { "epoch": 2.797105482883384, "grad_norm": 0.29435861110687256, "learning_rate": 1.3665065417177327e-07, "loss": 0.4039, "step": 3350 }, { "epoch": 2.7979404397439467, "grad_norm": 0.27262815833091736, "learning_rate": 1.3552411697408285e-07, "loss": 0.4005, "step": 3351 }, { "epoch": 2.798775396604509, "grad_norm": 0.29471784830093384, "learning_rate": 1.3440217875987426e-07, "loss": 0.4056, "step": 3352 }, { "epoch": 2.799610353465071, "grad_norm": 0.2666226923465729, "learning_rate": 1.3328484058985346e-07, "loss": 0.3559, "step": 3353 }, { "epoch": 2.800445310325633, "grad_norm": 0.2976098656654358, "learning_rate": 1.3217210352037657e-07, "loss": 0.402, "step": 3354 }, { "epoch": 2.801280267186195, "grad_norm": 0.28497108817100525, "learning_rate": 1.3106396860344872e-07, "loss": 0.3854, "step": 3355 }, { "epoch": 2.8021152240467577, "grad_norm": 0.2872101366519928, "learning_rate": 1.2996043688672631e-07, "loss": 0.4123, "step": 3356 }, { "epoch": 2.80295018090732, "grad_norm": 0.3012046813964844, "learning_rate": 1.2886150941351317e-07, "loss": 0.4175, "step": 3357 }, { "epoch": 2.803785137767882, "grad_norm": 0.3253025412559509, "learning_rate": 1.2776718722275815e-07, "loss": 0.4344, "step": 3358 }, { "epoch": 2.804620094628444, "grad_norm": 0.26788151264190674, "learning_rate": 1.2667747134905982e-07, "loss": 0.3898, "step": 3359 }, { "epoch": 2.805455051489006, "grad_norm": 0.2716664671897888, "learning_rate": 1.2559236282265795e-07, "loss": 0.367, "step": 3360 }, { "epoch": 2.8062900083495688, "grad_norm": 0.27713924646377563, "learning_rate": 1.2451186266943915e-07, "loss": 0.4414, "step": 3361 }, { "epoch": 2.807124965210131, "grad_norm": 0.2651258409023285, "learning_rate": 1.234359719109318e-07, "loss": 0.3433, "step": 3362 }, { "epoch": 2.807959922070693, "grad_norm": 0.2850373089313507, "learning_rate": 1.2236469156430676e-07, "loss": 0.4163, "step": 3363 }, { "epoch": 2.808794878931255, "grad_norm": 0.2885113060474396, "learning_rate": 1.2129802264237767e-07, "loss": 0.3878, "step": 3364 }, { "epoch": 2.8096298357918172, "grad_norm": 0.29131293296813965, "learning_rate": 1.202359661535951e-07, "loss": 0.3794, "step": 3365 }, { "epoch": 2.81046479265238, "grad_norm": 0.3041238784790039, "learning_rate": 1.1917852310205147e-07, "loss": 0.4104, "step": 3366 }, { "epoch": 2.811299749512942, "grad_norm": 0.27986183762550354, "learning_rate": 1.1812569448747868e-07, "loss": 0.388, "step": 3367 }, { "epoch": 2.812134706373504, "grad_norm": 0.2719592750072479, "learning_rate": 1.170774813052411e-07, "loss": 0.3626, "step": 3368 }, { "epoch": 2.812969663234066, "grad_norm": 0.30129119753837585, "learning_rate": 1.160338845463449e-07, "loss": 0.4113, "step": 3369 }, { "epoch": 2.8138046200946283, "grad_norm": 0.293770432472229, "learning_rate": 1.1499490519742917e-07, "loss": 0.3462, "step": 3370 }, { "epoch": 2.814639576955191, "grad_norm": 0.29673004150390625, "learning_rate": 1.1396054424076763e-07, "loss": 0.4226, "step": 3371 }, { "epoch": 2.815474533815753, "grad_norm": 0.27803727984428406, "learning_rate": 1.129308026542686e-07, "loss": 0.3697, "step": 3372 }, { "epoch": 2.816309490676315, "grad_norm": 0.2794508635997772, "learning_rate": 1.1190568141147273e-07, "loss": 0.3848, "step": 3373 }, { "epoch": 2.817144447536877, "grad_norm": 0.28974127769470215, "learning_rate": 1.1088518148155203e-07, "loss": 0.4171, "step": 3374 }, { "epoch": 2.8179794043974393, "grad_norm": 0.2876083552837372, "learning_rate": 1.0986930382930916e-07, "loss": 0.3836, "step": 3375 }, { "epoch": 2.818814361258002, "grad_norm": 0.29317477345466614, "learning_rate": 1.088580494151792e-07, "loss": 0.3781, "step": 3376 }, { "epoch": 2.819649318118564, "grad_norm": 0.2951151132583618, "learning_rate": 1.0785141919522291e-07, "loss": 0.403, "step": 3377 }, { "epoch": 2.820484274979126, "grad_norm": 0.26237940788269043, "learning_rate": 1.0684941412113126e-07, "loss": 0.348, "step": 3378 }, { "epoch": 2.8213192318396882, "grad_norm": 0.3246859014034271, "learning_rate": 1.0585203514022258e-07, "loss": 0.414, "step": 3379 }, { "epoch": 2.8221541887002504, "grad_norm": 0.28489863872528076, "learning_rate": 1.0485928319544037e-07, "loss": 0.3934, "step": 3380 }, { "epoch": 2.822989145560813, "grad_norm": 0.2977903485298157, "learning_rate": 1.0387115922535495e-07, "loss": 0.4204, "step": 3381 }, { "epoch": 2.823824102421375, "grad_norm": 0.28447917103767395, "learning_rate": 1.0288766416416019e-07, "loss": 0.3683, "step": 3382 }, { "epoch": 2.824659059281937, "grad_norm": 0.2935720384120941, "learning_rate": 1.0190879894167449e-07, "loss": 0.3841, "step": 3383 }, { "epoch": 2.8254940161424993, "grad_norm": 0.28905153274536133, "learning_rate": 1.0093456448333872e-07, "loss": 0.3944, "step": 3384 }, { "epoch": 2.8263289730030614, "grad_norm": 0.28321319818496704, "learning_rate": 9.996496171021553e-08, "loss": 0.366, "step": 3385 }, { "epoch": 2.827163929863624, "grad_norm": 0.32788318395614624, "learning_rate": 9.899999153898942e-08, "loss": 0.4635, "step": 3386 }, { "epoch": 2.8279988867241856, "grad_norm": 0.29552337527275085, "learning_rate": 9.803965488196399e-08, "loss": 0.3831, "step": 3387 }, { "epoch": 2.828833843584748, "grad_norm": 0.26795414090156555, "learning_rate": 9.708395264706294e-08, "loss": 0.3519, "step": 3388 }, { "epoch": 2.8296688004453103, "grad_norm": 0.30434679985046387, "learning_rate": 9.613288573782853e-08, "loss": 0.3853, "step": 3389 }, { "epoch": 2.8305037573058724, "grad_norm": 0.30728521943092346, "learning_rate": 9.518645505342095e-08, "loss": 0.3892, "step": 3390 }, { "epoch": 2.831338714166435, "grad_norm": 0.2933547794818878, "learning_rate": 9.424466148861667e-08, "loss": 0.4193, "step": 3391 }, { "epoch": 2.8321736710269967, "grad_norm": 0.29101845622062683, "learning_rate": 9.330750593380677e-08, "loss": 0.3853, "step": 3392 }, { "epoch": 2.8330086278875592, "grad_norm": 0.2803562879562378, "learning_rate": 9.237498927500088e-08, "loss": 0.3429, "step": 3393 }, { "epoch": 2.8338435847481214, "grad_norm": 0.29199689626693726, "learning_rate": 9.144711239381988e-08, "loss": 0.3709, "step": 3394 }, { "epoch": 2.8346785416086835, "grad_norm": 0.28099071979522705, "learning_rate": 9.052387616749924e-08, "loss": 0.3681, "step": 3395 }, { "epoch": 2.8355134984692456, "grad_norm": 0.2774576246738434, "learning_rate": 8.960528146888803e-08, "loss": 0.3947, "step": 3396 }, { "epoch": 2.8363484553298077, "grad_norm": 0.3029448688030243, "learning_rate": 8.869132916644429e-08, "loss": 0.4079, "step": 3397 }, { "epoch": 2.8371834121903703, "grad_norm": 0.274876207113266, "learning_rate": 8.77820201242402e-08, "loss": 0.3526, "step": 3398 }, { "epoch": 2.8380183690509324, "grad_norm": 0.29027608036994934, "learning_rate": 8.687735520195639e-08, "loss": 0.3699, "step": 3399 }, { "epoch": 2.8388533259114945, "grad_norm": 0.3153742253780365, "learning_rate": 8.59773352548826e-08, "loss": 0.4335, "step": 3400 }, { "epoch": 2.8396882827720566, "grad_norm": 0.30852624773979187, "learning_rate": 8.50819611339182e-08, "loss": 0.4011, "step": 3401 }, { "epoch": 2.8405232396326188, "grad_norm": 0.28618481755256653, "learning_rate": 8.419123368556991e-08, "loss": 0.3694, "step": 3402 }, { "epoch": 2.8413581964931813, "grad_norm": 0.28088459372520447, "learning_rate": 8.330515375195025e-08, "loss": 0.3961, "step": 3403 }, { "epoch": 2.8421931533537435, "grad_norm": 0.29609403014183044, "learning_rate": 8.242372217078021e-08, "loss": 0.4231, "step": 3404 }, { "epoch": 2.8430281102143056, "grad_norm": 0.29469186067581177, "learning_rate": 8.154693977538431e-08, "loss": 0.3643, "step": 3405 }, { "epoch": 2.8438630670748677, "grad_norm": 0.2921200692653656, "learning_rate": 8.067480739469169e-08, "loss": 0.3883, "step": 3406 }, { "epoch": 2.84469802393543, "grad_norm": 0.2875145673751831, "learning_rate": 7.980732585323614e-08, "loss": 0.4005, "step": 3407 }, { "epoch": 2.8455329807959924, "grad_norm": 0.26742425560951233, "learning_rate": 7.894449597115273e-08, "loss": 0.3701, "step": 3408 }, { "epoch": 2.8463679376565545, "grad_norm": 0.301778107881546, "learning_rate": 7.808631856418225e-08, "loss": 0.3986, "step": 3409 }, { "epoch": 2.8472028945171166, "grad_norm": 0.2757822275161743, "learning_rate": 7.723279444366294e-08, "loss": 0.3818, "step": 3410 }, { "epoch": 2.8480378513776787, "grad_norm": 0.29620563983917236, "learning_rate": 7.638392441653542e-08, "loss": 0.4132, "step": 3411 }, { "epoch": 2.848872808238241, "grad_norm": 0.28786996006965637, "learning_rate": 7.553970928534105e-08, "loss": 0.3881, "step": 3412 }, { "epoch": 2.8497077650988034, "grad_norm": 0.31541258096694946, "learning_rate": 7.470014984821916e-08, "loss": 0.3938, "step": 3413 }, { "epoch": 2.8505427219593655, "grad_norm": 0.29366928339004517, "learning_rate": 7.386524689890817e-08, "loss": 0.4182, "step": 3414 }, { "epoch": 2.8513776788199277, "grad_norm": 0.2814786732196808, "learning_rate": 7.303500122674334e-08, "loss": 0.3334, "step": 3415 }, { "epoch": 2.8522126356804898, "grad_norm": 0.3042774498462677, "learning_rate": 7.220941361665846e-08, "loss": 0.4412, "step": 3416 }, { "epoch": 2.853047592541052, "grad_norm": 0.27089419960975647, "learning_rate": 7.138848484918082e-08, "loss": 0.3476, "step": 3417 }, { "epoch": 2.8538825494016145, "grad_norm": 0.2887814939022064, "learning_rate": 7.057221570043571e-08, "loss": 0.4236, "step": 3418 }, { "epoch": 2.8547175062621766, "grad_norm": 0.2724640965461731, "learning_rate": 6.976060694214193e-08, "loss": 0.356, "step": 3419 }, { "epoch": 2.8555524631227387, "grad_norm": 0.2890106439590454, "learning_rate": 6.895365934161236e-08, "loss": 0.4163, "step": 3420 }, { "epoch": 2.856387419983301, "grad_norm": 0.2925017476081848, "learning_rate": 6.815137366175284e-08, "loss": 0.4337, "step": 3421 }, { "epoch": 2.857222376843863, "grad_norm": 0.2861907482147217, "learning_rate": 6.735375066106275e-08, "loss": 0.3678, "step": 3422 }, { "epoch": 2.8580573337044255, "grad_norm": 0.29688796401023865, "learning_rate": 6.656079109363223e-08, "loss": 0.3901, "step": 3423 }, { "epoch": 2.8588922905649876, "grad_norm": 0.2932673394680023, "learning_rate": 6.577249570914323e-08, "loss": 0.4124, "step": 3424 }, { "epoch": 2.8597272474255497, "grad_norm": 0.2887389063835144, "learning_rate": 6.49888652528663e-08, "loss": 0.419, "step": 3425 }, { "epoch": 2.860562204286112, "grad_norm": 0.2791891098022461, "learning_rate": 6.420990046566488e-08, "loss": 0.3676, "step": 3426 }, { "epoch": 2.861397161146674, "grad_norm": 0.2936583459377289, "learning_rate": 6.34356020839888e-08, "loss": 0.4079, "step": 3427 }, { "epoch": 2.8622321180072365, "grad_norm": 0.27548015117645264, "learning_rate": 6.266597083987635e-08, "loss": 0.3776, "step": 3428 }, { "epoch": 2.8630670748677987, "grad_norm": 0.2925541400909424, "learning_rate": 6.190100746095495e-08, "loss": 0.4167, "step": 3429 }, { "epoch": 2.8639020317283608, "grad_norm": 0.2942295968532562, "learning_rate": 6.11407126704372e-08, "loss": 0.411, "step": 3430 }, { "epoch": 2.864736988588923, "grad_norm": 0.2764345109462738, "learning_rate": 6.038508718712365e-08, "loss": 0.3622, "step": 3431 }, { "epoch": 2.865571945449485, "grad_norm": 0.3110189735889435, "learning_rate": 5.963413172539845e-08, "loss": 0.4436, "step": 3432 }, { "epoch": 2.8664069023100476, "grad_norm": 0.27175599336624146, "learning_rate": 5.888784699523198e-08, "loss": 0.3781, "step": 3433 }, { "epoch": 2.8672418591706093, "grad_norm": 0.2900940179824829, "learning_rate": 5.81462337021782e-08, "loss": 0.4314, "step": 3434 }, { "epoch": 2.868076816031172, "grad_norm": 0.28719109296798706, "learning_rate": 5.7409292547374596e-08, "loss": 0.3879, "step": 3435 }, { "epoch": 2.868911772891734, "grad_norm": 0.3011268973350525, "learning_rate": 5.6677024227543284e-08, "loss": 0.403, "step": 3436 }, { "epoch": 2.869746729752296, "grad_norm": 0.2937992215156555, "learning_rate": 5.594942943498438e-08, "loss": 0.4542, "step": 3437 }, { "epoch": 2.870581686612858, "grad_norm": 0.27604350447654724, "learning_rate": 5.522650885758374e-08, "loss": 0.3737, "step": 3438 }, { "epoch": 2.8714166434734203, "grad_norm": 0.30241847038269043, "learning_rate": 5.4508263178806864e-08, "loss": 0.415, "step": 3439 }, { "epoch": 2.872251600333983, "grad_norm": 0.28235167264938354, "learning_rate": 5.37946930776978e-08, "loss": 0.3806, "step": 3440 }, { "epoch": 2.873086557194545, "grad_norm": 0.25233161449432373, "learning_rate": 5.3085799228883015e-08, "loss": 0.3425, "step": 3441 }, { "epoch": 2.873921514055107, "grad_norm": 0.2866743206977844, "learning_rate": 5.2381582302565295e-08, "loss": 0.4028, "step": 3442 }, { "epoch": 2.874756470915669, "grad_norm": 0.2690749168395996, "learning_rate": 5.168204296452761e-08, "loss": 0.4004, "step": 3443 }, { "epoch": 2.8755914277762313, "grad_norm": 0.27216580510139465, "learning_rate": 5.098718187612983e-08, "loss": 0.3826, "step": 3444 }, { "epoch": 2.876426384636794, "grad_norm": 0.2847534716129303, "learning_rate": 5.0296999694309234e-08, "loss": 0.4384, "step": 3445 }, { "epoch": 2.877261341497356, "grad_norm": 0.2972407042980194, "learning_rate": 4.961149707157886e-08, "loss": 0.3846, "step": 3446 }, { "epoch": 2.878096298357918, "grad_norm": 0.2741158902645111, "learning_rate": 4.893067465602863e-08, "loss": 0.3432, "step": 3447 }, { "epoch": 2.8789312552184803, "grad_norm": 0.28921744227409363, "learning_rate": 4.8254533091323106e-08, "loss": 0.3846, "step": 3448 }, { "epoch": 2.8797662120790424, "grad_norm": 0.2990506887435913, "learning_rate": 4.758307301670095e-08, "loss": 0.4255, "step": 3449 }, { "epoch": 2.880601168939605, "grad_norm": 0.2847556471824646, "learning_rate": 4.691629506697659e-08, "loss": 0.3823, "step": 3450 }, { "epoch": 2.881436125800167, "grad_norm": 0.30590713024139404, "learning_rate": 4.625419987253521e-08, "loss": 0.4089, "step": 3451 }, { "epoch": 2.882271082660729, "grad_norm": 0.30736905336380005, "learning_rate": 4.5596788059337206e-08, "loss": 0.3988, "step": 3452 }, { "epoch": 2.8831060395212913, "grad_norm": 0.2756861448287964, "learning_rate": 4.494406024891485e-08, "loss": 0.3631, "step": 3453 }, { "epoch": 2.8839409963818534, "grad_norm": 0.31702613830566406, "learning_rate": 4.429601705837006e-08, "loss": 0.4047, "step": 3454 }, { "epoch": 2.884775953242416, "grad_norm": 0.2880480885505676, "learning_rate": 4.36526591003783e-08, "loss": 0.3873, "step": 3455 }, { "epoch": 2.885610910102978, "grad_norm": 0.2878727912902832, "learning_rate": 4.3013986983184705e-08, "loss": 0.4018, "step": 3456 }, { "epoch": 2.88644586696354, "grad_norm": 0.29486212134361267, "learning_rate": 4.238000131060238e-08, "loss": 0.3943, "step": 3457 }, { "epoch": 2.8872808238241023, "grad_norm": 0.31439831852912903, "learning_rate": 4.175070268201742e-08, "loss": 0.3685, "step": 3458 }, { "epoch": 2.8881157806846645, "grad_norm": 0.283173531293869, "learning_rate": 4.112609169238224e-08, "loss": 0.3905, "step": 3459 }, { "epoch": 2.888950737545227, "grad_norm": 0.28908953070640564, "learning_rate": 4.050616893221671e-08, "loss": 0.3708, "step": 3460 }, { "epoch": 2.889785694405789, "grad_norm": 0.32054564356803894, "learning_rate": 3.989093498761087e-08, "loss": 0.4306, "step": 3461 }, { "epoch": 2.8906206512663513, "grad_norm": 0.30166295170783997, "learning_rate": 3.928039044022058e-08, "loss": 0.3744, "step": 3462 }, { "epoch": 2.8914556081269134, "grad_norm": 0.30642169713974, "learning_rate": 3.8674535867266885e-08, "loss": 0.4076, "step": 3463 }, { "epoch": 2.8922905649874755, "grad_norm": 0.28494322299957275, "learning_rate": 3.807337184153992e-08, "loss": 0.374, "step": 3464 }, { "epoch": 2.893125521848038, "grad_norm": 0.2918509244918823, "learning_rate": 3.747689893139228e-08, "loss": 0.4149, "step": 3465 }, { "epoch": 2.8939604787086, "grad_norm": 0.27941611409187317, "learning_rate": 3.6885117700742876e-08, "loss": 0.374, "step": 3466 }, { "epoch": 2.8947954355691623, "grad_norm": 0.30276960134506226, "learning_rate": 3.629802870907528e-08, "loss": 0.4227, "step": 3467 }, { "epoch": 2.8956303924297244, "grad_norm": 0.2976827919483185, "learning_rate": 3.571563251143606e-08, "loss": 0.4148, "step": 3468 }, { "epoch": 2.8964653492902865, "grad_norm": 0.2865069508552551, "learning_rate": 3.513792965843532e-08, "loss": 0.3864, "step": 3469 }, { "epoch": 2.897300306150849, "grad_norm": 0.2629542350769043, "learning_rate": 3.456492069624673e-08, "loss": 0.366, "step": 3470 }, { "epoch": 2.8981352630114112, "grad_norm": 0.3238036334514618, "learning_rate": 3.3996606166605825e-08, "loss": 0.4327, "step": 3471 }, { "epoch": 2.8989702198719733, "grad_norm": 0.2888815402984619, "learning_rate": 3.343298660680838e-08, "loss": 0.3741, "step": 3472 }, { "epoch": 2.8998051767325355, "grad_norm": 0.27096354961395264, "learning_rate": 3.2874062549714814e-08, "loss": 0.3636, "step": 3473 }, { "epoch": 2.9006401335930976, "grad_norm": 0.29330193996429443, "learning_rate": 3.2319834523742435e-08, "loss": 0.4023, "step": 3474 }, { "epoch": 2.90147509045366, "grad_norm": 0.2873504161834717, "learning_rate": 3.17703030528721e-08, "loss": 0.3942, "step": 3475 }, { "epoch": 2.9023100473142223, "grad_norm": 0.29630234837532043, "learning_rate": 3.1225468656642665e-08, "loss": 0.3987, "step": 3476 }, { "epoch": 2.9031450041747844, "grad_norm": 0.3041030764579773, "learning_rate": 3.06853318501521e-08, "loss": 0.3876, "step": 3477 }, { "epoch": 2.9039799610353465, "grad_norm": 0.2777038514614105, "learning_rate": 3.0149893144058584e-08, "loss": 0.3941, "step": 3478 }, { "epoch": 2.9048149178959086, "grad_norm": 0.27403882145881653, "learning_rate": 2.9619153044577188e-08, "loss": 0.4062, "step": 3479 }, { "epoch": 2.905649874756471, "grad_norm": 0.30988460779190063, "learning_rate": 2.9093112053481532e-08, "loss": 0.4479, "step": 3480 }, { "epoch": 2.906484831617033, "grad_norm": 0.2693648040294647, "learning_rate": 2.8571770668103238e-08, "loss": 0.3757, "step": 3481 }, { "epoch": 2.9073197884775954, "grad_norm": 0.2853405773639679, "learning_rate": 2.8055129381328595e-08, "loss": 0.3759, "step": 3482 }, { "epoch": 2.9081547453381575, "grad_norm": 0.29102352261543274, "learning_rate": 2.754318868160244e-08, "loss": 0.3763, "step": 3483 }, { "epoch": 2.9089897021987197, "grad_norm": 0.2807844579219818, "learning_rate": 2.7035949052924837e-08, "loss": 0.3751, "step": 3484 }, { "epoch": 2.909824659059282, "grad_norm": 0.29253479838371277, "learning_rate": 2.6533410974851627e-08, "loss": 0.3974, "step": 3485 }, { "epoch": 2.910659615919844, "grad_norm": 0.3076030910015106, "learning_rate": 2.603557492249331e-08, "loss": 0.4148, "step": 3486 }, { "epoch": 2.9114945727804065, "grad_norm": 0.28164657950401306, "learning_rate": 2.554244136651507e-08, "loss": 0.3801, "step": 3487 }, { "epoch": 2.9123295296409686, "grad_norm": 0.30726391077041626, "learning_rate": 2.5054010773136185e-08, "loss": 0.4139, "step": 3488 }, { "epoch": 2.9131644865015307, "grad_norm": 0.2918972671031952, "learning_rate": 2.4570283604129496e-08, "loss": 0.366, "step": 3489 }, { "epoch": 2.913999443362093, "grad_norm": 0.25710833072662354, "learning_rate": 2.4091260316822517e-08, "loss": 0.3525, "step": 3490 }, { "epoch": 2.914834400222655, "grad_norm": 0.2862894833087921, "learning_rate": 2.361694136409354e-08, "loss": 0.393, "step": 3491 }, { "epoch": 2.9156693570832175, "grad_norm": 0.29985955357551575, "learning_rate": 2.31473271943744e-08, "loss": 0.417, "step": 3492 }, { "epoch": 2.9165043139437796, "grad_norm": 0.25922656059265137, "learning_rate": 2.2682418251649407e-08, "loss": 0.3405, "step": 3493 }, { "epoch": 2.9173392708043417, "grad_norm": 0.2937055230140686, "learning_rate": 2.2222214975452516e-08, "loss": 0.4583, "step": 3494 }, { "epoch": 2.918174227664904, "grad_norm": 0.2918699383735657, "learning_rate": 2.176671780087125e-08, "loss": 0.3721, "step": 3495 }, { "epoch": 2.919009184525466, "grad_norm": 0.3060970604419708, "learning_rate": 2.1315927158542247e-08, "loss": 0.4034, "step": 3496 }, { "epoch": 2.9198441413860285, "grad_norm": 0.28730636835098267, "learning_rate": 2.0869843474653484e-08, "loss": 0.3933, "step": 3497 }, { "epoch": 2.9206790982465907, "grad_norm": 0.2991330325603485, "learning_rate": 2.0428467170941492e-08, "loss": 0.3996, "step": 3498 }, { "epoch": 2.921514055107153, "grad_norm": 0.300920695066452, "learning_rate": 1.999179866469414e-08, "loss": 0.4399, "step": 3499 }, { "epoch": 2.922349011967715, "grad_norm": 0.2944263219833374, "learning_rate": 1.9559838368746753e-08, "loss": 0.3924, "step": 3500 }, { "epoch": 2.923183968828277, "grad_norm": 0.28971073031425476, "learning_rate": 1.9132586691484323e-08, "loss": 0.4115, "step": 3501 }, { "epoch": 2.9240189256888396, "grad_norm": 0.35841259360313416, "learning_rate": 1.8710044036840958e-08, "loss": 0.36, "step": 3502 }, { "epoch": 2.9248538825494017, "grad_norm": 0.28224772214889526, "learning_rate": 1.8292210804297104e-08, "loss": 0.4261, "step": 3503 }, { "epoch": 2.925688839409964, "grad_norm": 0.2775766849517822, "learning_rate": 1.7879087388881222e-08, "loss": 0.3709, "step": 3504 }, { "epoch": 2.926523796270526, "grad_norm": 0.30862781405448914, "learning_rate": 1.7470674181169766e-08, "loss": 0.4, "step": 3505 }, { "epoch": 2.927358753131088, "grad_norm": 0.2931574285030365, "learning_rate": 1.7066971567286096e-08, "loss": 0.4118, "step": 3506 }, { "epoch": 2.9281937099916506, "grad_norm": 0.2879885137081146, "learning_rate": 1.6667979928899347e-08, "loss": 0.3776, "step": 3507 }, { "epoch": 2.9290286668522127, "grad_norm": 0.3090651333332062, "learning_rate": 1.627369964322445e-08, "loss": 0.415, "step": 3508 }, { "epoch": 2.929863623712775, "grad_norm": 0.26129069924354553, "learning_rate": 1.5884131083023225e-08, "loss": 0.3227, "step": 3509 }, { "epoch": 2.930698580573337, "grad_norm": 0.30643168091773987, "learning_rate": 1.5499274616602723e-08, "loss": 0.3944, "step": 3510 }, { "epoch": 2.931533537433899, "grad_norm": 0.3006287217140198, "learning_rate": 1.511913060781467e-08, "loss": 0.411, "step": 3511 }, { "epoch": 2.9323684942944617, "grad_norm": 0.27079546451568604, "learning_rate": 1.4743699416054914e-08, "loss": 0.374, "step": 3512 }, { "epoch": 2.933203451155024, "grad_norm": 0.2685757577419281, "learning_rate": 1.437298139626564e-08, "loss": 0.3752, "step": 3513 }, { "epoch": 2.934038408015586, "grad_norm": 0.29062625765800476, "learning_rate": 1.4006976898930935e-08, "loss": 0.4106, "step": 3514 }, { "epoch": 2.934873364876148, "grad_norm": 0.2832629382610321, "learning_rate": 1.3645686270079562e-08, "loss": 0.4056, "step": 3515 }, { "epoch": 2.93570832173671, "grad_norm": 0.27785560488700867, "learning_rate": 1.3289109851284954e-08, "loss": 0.3438, "step": 3516 }, { "epoch": 2.9365432785972727, "grad_norm": 0.3005867302417755, "learning_rate": 1.2937247979660783e-08, "loss": 0.3795, "step": 3517 }, { "epoch": 2.937378235457835, "grad_norm": 0.2976515293121338, "learning_rate": 1.259010098786595e-08, "loss": 0.4444, "step": 3518 }, { "epoch": 2.938213192318397, "grad_norm": 0.2739114463329315, "learning_rate": 1.2247669204100699e-08, "loss": 0.3635, "step": 3519 }, { "epoch": 2.939048149178959, "grad_norm": 0.29662999510765076, "learning_rate": 1.1909952952107173e-08, "loss": 0.4246, "step": 3520 }, { "epoch": 2.939883106039521, "grad_norm": 0.2689160704612732, "learning_rate": 1.1576952551171083e-08, "loss": 0.3471, "step": 3521 }, { "epoch": 2.9407180629000838, "grad_norm": 0.292636901140213, "learning_rate": 1.1248668316117262e-08, "loss": 0.4182, "step": 3522 }, { "epoch": 2.9415530197606454, "grad_norm": 0.27746787667274475, "learning_rate": 1.092510055731355e-08, "loss": 0.3634, "step": 3523 }, { "epoch": 2.942387976621208, "grad_norm": 0.28325894474983215, "learning_rate": 1.0606249580667472e-08, "loss": 0.3828, "step": 3524 }, { "epoch": 2.94322293348177, "grad_norm": 0.296983540058136, "learning_rate": 1.0292115687628446e-08, "loss": 0.4205, "step": 3525 }, { "epoch": 2.9440578903423322, "grad_norm": 0.30603763461112976, "learning_rate": 9.982699175185018e-09, "loss": 0.3802, "step": 3526 }, { "epoch": 2.944892847202895, "grad_norm": 0.30387812852859497, "learning_rate": 9.678000335867077e-09, "loss": 0.4055, "step": 3527 }, { "epoch": 2.9457278040634565, "grad_norm": 0.27657824754714966, "learning_rate": 9.378019457743082e-09, "loss": 0.3715, "step": 3528 }, { "epoch": 2.946562760924019, "grad_norm": 0.2915497422218323, "learning_rate": 9.082756824421723e-09, "loss": 0.3987, "step": 3529 }, { "epoch": 2.947397717784581, "grad_norm": 0.2842751443386078, "learning_rate": 8.792212715051374e-09, "loss": 0.4002, "step": 3530 }, { "epoch": 2.9482326746451433, "grad_norm": 0.29941344261169434, "learning_rate": 8.506387404318417e-09, "loss": 0.412, "step": 3531 }, { "epoch": 2.9490676315057054, "grad_norm": 0.27809420228004456, "learning_rate": 8.225281162448916e-09, "loss": 0.3867, "step": 3532 }, { "epoch": 2.9499025883662675, "grad_norm": 0.2853020429611206, "learning_rate": 7.948894255206396e-09, "loss": 0.3937, "step": 3533 }, { "epoch": 2.95073754522683, "grad_norm": 0.2858470678329468, "learning_rate": 7.677226943892946e-09, "loss": 0.4182, "step": 3534 }, { "epoch": 2.951572502087392, "grad_norm": 0.2974978983402252, "learning_rate": 7.4102794853492256e-09, "loss": 0.3971, "step": 3535 }, { "epoch": 2.9524074589479543, "grad_norm": 0.27727949619293213, "learning_rate": 7.148052131953909e-09, "loss": 0.3793, "step": 3536 }, { "epoch": 2.9532424158085164, "grad_norm": 0.3024390637874603, "learning_rate": 6.890545131621462e-09, "loss": 0.4079, "step": 3537 }, { "epoch": 2.9540773726690785, "grad_norm": 0.29189592599868774, "learning_rate": 6.637758727806032e-09, "loss": 0.3918, "step": 3538 }, { "epoch": 2.954912329529641, "grad_norm": 0.27983078360557556, "learning_rate": 6.3896931594958914e-09, "loss": 0.3843, "step": 3539 }, { "epoch": 2.9557472863902032, "grad_norm": 0.2704346776008606, "learning_rate": 6.146348661218992e-09, "loss": 0.3518, "step": 3540 }, { "epoch": 2.9565822432507654, "grad_norm": 0.30433225631713867, "learning_rate": 5.907725463037972e-09, "loss": 0.4435, "step": 3541 }, { "epoch": 2.9574172001113275, "grad_norm": 0.2815457582473755, "learning_rate": 5.673823790552923e-09, "loss": 0.4087, "step": 3542 }, { "epoch": 2.9582521569718896, "grad_norm": 0.26824143528938293, "learning_rate": 5.44464386489918e-09, "loss": 0.3515, "step": 3543 }, { "epoch": 2.959087113832452, "grad_norm": 0.2911878824234009, "learning_rate": 5.2201859027495304e-09, "loss": 0.4423, "step": 3544 }, { "epoch": 2.9599220706930143, "grad_norm": 0.28846120834350586, "learning_rate": 5.000450116309785e-09, "loss": 0.3862, "step": 3545 }, { "epoch": 2.9607570275535764, "grad_norm": 0.2897917330265045, "learning_rate": 4.785436713324876e-09, "loss": 0.432, "step": 3546 }, { "epoch": 2.9615919844141385, "grad_norm": 0.2929547429084778, "learning_rate": 4.575145897072197e-09, "loss": 0.3863, "step": 3547 }, { "epoch": 2.9624269412747006, "grad_norm": 0.27531951665878296, "learning_rate": 4.369577866365493e-09, "loss": 0.3712, "step": 3548 }, { "epoch": 2.963261898135263, "grad_norm": 0.2830955386161804, "learning_rate": 4.168732815553744e-09, "loss": 0.3972, "step": 3549 }, { "epoch": 2.9640968549958253, "grad_norm": 0.29908081889152527, "learning_rate": 3.972610934519505e-09, "loss": 0.4301, "step": 3550 }, { "epoch": 2.9649318118563874, "grad_norm": 0.28037235140800476, "learning_rate": 3.781212408681678e-09, "loss": 0.3613, "step": 3551 }, { "epoch": 2.9657667687169496, "grad_norm": 0.27963826060295105, "learning_rate": 3.5945374189921834e-09, "loss": 0.4092, "step": 3552 }, { "epoch": 2.9666017255775117, "grad_norm": 0.29781022667884827, "learning_rate": 3.4125861419376237e-09, "loss": 0.3901, "step": 3553 }, { "epoch": 2.9674366824380742, "grad_norm": 0.28534606099128723, "learning_rate": 3.2353587495387305e-09, "loss": 0.4297, "step": 3554 }, { "epoch": 2.9682716392986364, "grad_norm": 0.27236440777778625, "learning_rate": 3.062855409350918e-09, "loss": 0.388, "step": 3555 }, { "epoch": 2.9691065961591985, "grad_norm": 0.2716915011405945, "learning_rate": 2.8950762844620615e-09, "loss": 0.3871, "step": 3556 }, { "epoch": 2.9699415530197606, "grad_norm": 0.27316680550575256, "learning_rate": 2.732021533494167e-09, "loss": 0.3802, "step": 3557 }, { "epoch": 2.9707765098803227, "grad_norm": 0.2943369746208191, "learning_rate": 2.5736913106033657e-09, "loss": 0.3879, "step": 3558 }, { "epoch": 2.9716114667408853, "grad_norm": 0.2820945680141449, "learning_rate": 2.4200857654782528e-09, "loss": 0.3906, "step": 3559 }, { "epoch": 2.9724464236014474, "grad_norm": 0.2676917612552643, "learning_rate": 2.271205043340441e-09, "loss": 0.3751, "step": 3560 }, { "epoch": 2.9732813804620095, "grad_norm": 0.2716110050678253, "learning_rate": 2.1270492849456704e-09, "loss": 0.4213, "step": 3561 }, { "epoch": 2.9741163373225716, "grad_norm": 0.287484347820282, "learning_rate": 1.987618626582144e-09, "loss": 0.4148, "step": 3562 }, { "epoch": 2.9749512941831338, "grad_norm": 0.29078513383865356, "learning_rate": 1.8529132000699724e-09, "loss": 0.3876, "step": 3563 }, { "epoch": 2.9757862510436963, "grad_norm": 0.29917746782302856, "learning_rate": 1.7229331327633935e-09, "loss": 0.3999, "step": 3564 }, { "epoch": 2.9766212079042584, "grad_norm": 0.2968723177909851, "learning_rate": 1.5976785475479984e-09, "loss": 0.3547, "step": 3565 }, { "epoch": 2.9774561647648206, "grad_norm": 0.286594957113266, "learning_rate": 1.477149562842395e-09, "loss": 0.401, "step": 3566 }, { "epoch": 2.9782911216253827, "grad_norm": 0.3024488687515259, "learning_rate": 1.3613462925976539e-09, "loss": 0.4061, "step": 3567 }, { "epoch": 2.979126078485945, "grad_norm": 0.30017709732055664, "learning_rate": 1.2502688462961986e-09, "loss": 0.4316, "step": 3568 }, { "epoch": 2.9799610353465074, "grad_norm": 0.27157965302467346, "learning_rate": 1.14391732895347e-09, "loss": 0.3828, "step": 3569 }, { "epoch": 2.980795992207069, "grad_norm": 0.2673535943031311, "learning_rate": 1.0422918411168159e-09, "loss": 0.3981, "step": 3570 }, { "epoch": 2.9816309490676316, "grad_norm": 0.25993114709854126, "learning_rate": 9.45392478864382e-10, "loss": 0.338, "step": 3571 }, { "epoch": 2.9824659059281937, "grad_norm": 0.2882920205593109, "learning_rate": 8.532193338073313e-10, "loss": 0.4014, "step": 3572 }, { "epoch": 2.983300862788756, "grad_norm": 0.31772467494010925, "learning_rate": 7.657724930887344e-10, "loss": 0.4518, "step": 3573 }, { "epoch": 2.984135819649318, "grad_norm": 0.2731872797012329, "learning_rate": 6.830520393824591e-10, "loss": 0.3488, "step": 3574 }, { "epoch": 2.98497077650988, "grad_norm": 0.2973591685295105, "learning_rate": 6.050580508937254e-10, "loss": 0.3942, "step": 3575 }, { "epoch": 2.9858057333704426, "grad_norm": 0.3103412389755249, "learning_rate": 5.31790601360771e-10, "loss": 0.3989, "step": 3576 }, { "epoch": 2.9866406902310048, "grad_norm": 0.29644739627838135, "learning_rate": 4.632497600515207e-10, "loss": 0.3644, "step": 3577 }, { "epoch": 2.987475647091567, "grad_norm": 0.2749888002872467, "learning_rate": 3.994355917663617e-10, "loss": 0.3972, "step": 3578 }, { "epoch": 2.988310603952129, "grad_norm": 0.2862166166305542, "learning_rate": 3.403481568364786e-10, "loss": 0.3266, "step": 3579 }, { "epoch": 2.989145560812691, "grad_norm": 0.2968607544898987, "learning_rate": 2.8598751112385306e-10, "loss": 0.4437, "step": 3580 }, { "epoch": 2.9899805176732537, "grad_norm": 0.29956144094467163, "learning_rate": 2.363537060234844e-10, "loss": 0.3934, "step": 3581 }, { "epoch": 2.990815474533816, "grad_norm": 0.29467248916625977, "learning_rate": 1.9144678845950393e-10, "loss": 0.4048, "step": 3582 }, { "epoch": 2.991650431394378, "grad_norm": 0.27305465936660767, "learning_rate": 1.5126680088795031e-10, "loss": 0.3452, "step": 3583 }, { "epoch": 2.99248538825494, "grad_norm": 0.29849836230278015, "learning_rate": 1.1581378129621457e-10, "loss": 0.4389, "step": 3584 }, { "epoch": 2.993320345115502, "grad_norm": 0.2878829836845398, "learning_rate": 8.50877632019298e-11, "loss": 0.378, "step": 3585 }, { "epoch": 2.9941553019760647, "grad_norm": 0.2793327867984772, "learning_rate": 5.908877565408145e-11, "loss": 0.376, "step": 3586 }, { "epoch": 2.994990258836627, "grad_norm": 0.3075646460056305, "learning_rate": 3.7816843233562387e-11, "loss": 0.4275, "step": 3587 }, { "epoch": 2.995825215697189, "grad_norm": 0.2971005439758301, "learning_rate": 2.1271986050397375e-11, "loss": 0.4004, "step": 3588 }, { "epoch": 2.996660172557751, "grad_norm": 0.3145476281642914, "learning_rate": 9.454219746518612e-12, "loss": 0.3703, "step": 3589 }, { "epoch": 2.997495129418313, "grad_norm": 0.2904053330421448, "learning_rate": 2.363555495210612e-12, "loss": 0.4336, "step": 3590 }, { "epoch": 2.9983300862788758, "grad_norm": 0.2960866093635559, "learning_rate": 0.0, "loss": 0.3992, "step": 3591 }, { "epoch": 2.9983300862788758, "step": 3591, "total_flos": 4676861506813952.0, "train_loss": 0.4409126893818727, "train_runtime": 71717.2839, "train_samples_per_second": 4.809, "train_steps_per_second": 0.05 } ], "logging_steps": 1.0, "max_steps": 3591, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 4676861506813952.0, "train_batch_size": 1, "trial_name": null, "trial_params": null }