{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 0.6887693108835272, "eval_steps": 2951, "global_step": 7440, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.00010168155877829607, "grad_norm": 3.390625, "learning_rate": 0.00011299435028248587, "loss": 3.2031, "step": 1 }, { "epoch": 0.00020336311755659214, "grad_norm": 3.359375, "learning_rate": 0.00022598870056497175, "loss": 3.0147, "step": 2 }, { "epoch": 0.0003050446763348882, "grad_norm": 3.171875, "learning_rate": 0.00033898305084745765, "loss": 3.1702, "step": 3 }, { "epoch": 0.0004067262351131843, "grad_norm": 3.421875, "learning_rate": 0.0004519774011299435, "loss": 3.1697, "step": 4 }, { "epoch": 0.0005084077938914803, "grad_norm": 3.515625, "learning_rate": 0.0005649717514124294, "loss": 3.099, "step": 5 }, { "epoch": 0.0006100893526697764, "grad_norm": 3.484375, "learning_rate": 0.0006779661016949153, "loss": 3.1276, "step": 6 }, { "epoch": 0.0007117709114480725, "grad_norm": 3.21875, "learning_rate": 0.0007909604519774012, "loss": 3.1475, "step": 7 }, { "epoch": 0.0008134524702263686, "grad_norm": 3.375, "learning_rate": 0.000903954802259887, "loss": 3.0928, "step": 8 }, { "epoch": 0.0009151340290046646, "grad_norm": 3.09375, "learning_rate": 0.001016949152542373, "loss": 3.028, "step": 9 }, { "epoch": 0.0010168155877829607, "grad_norm": 3.21875, "learning_rate": 0.0011299435028248588, "loss": 3.0019, "step": 10 }, { "epoch": 0.0011184971465612568, "grad_norm": 3.359375, "learning_rate": 0.0012429378531073447, "loss": 2.999, "step": 11 }, { "epoch": 0.0012201787053395528, "grad_norm": 3.171875, "learning_rate": 0.0013559322033898306, "loss": 3.0901, "step": 12 }, { "epoch": 0.001321860264117849, "grad_norm": 3.046875, "learning_rate": 0.0014689265536723165, "loss": 2.9991, "step": 13 }, { "epoch": 0.001423541822896145, "grad_norm": 3.1875, "learning_rate": 0.0015819209039548024, "loss": 2.9065, "step": 14 }, { "epoch": 0.001525223381674441, "grad_norm": 2.546875, "learning_rate": 0.001694915254237288, "loss": 2.8828, "step": 15 }, { "epoch": 0.0016269049404527372, "grad_norm": 3.03125, "learning_rate": 0.001807909604519774, "loss": 2.8672, "step": 16 }, { "epoch": 0.0017285864992310333, "grad_norm": 3.3125, "learning_rate": 0.0019209039548022599, "loss": 2.8512, "step": 17 }, { "epoch": 0.0018302680580093292, "grad_norm": 3.390625, "learning_rate": 0.002033898305084746, "loss": 2.8261, "step": 18 }, { "epoch": 0.0019319496167876254, "grad_norm": 2.96875, "learning_rate": 0.0021468926553672315, "loss": 2.7482, "step": 19 }, { "epoch": 0.0020336311755659213, "grad_norm": 2.328125, "learning_rate": 0.0022598870056497176, "loss": 2.5005, "step": 20 }, { "epoch": 0.0021353127343442177, "grad_norm": 2.265625, "learning_rate": 0.0023728813559322037, "loss": 2.6218, "step": 21 }, { "epoch": 0.0022369942931225136, "grad_norm": 1.6171875, "learning_rate": 0.0024858757062146894, "loss": 2.4375, "step": 22 }, { "epoch": 0.0023386758519008096, "grad_norm": 1.109375, "learning_rate": 0.002598870056497175, "loss": 2.3563, "step": 23 }, { "epoch": 0.0024403574106791055, "grad_norm": 1.09375, "learning_rate": 0.002711864406779661, "loss": 2.4473, "step": 24 }, { "epoch": 0.002542038969457402, "grad_norm": 0.953125, "learning_rate": 0.002824858757062147, "loss": 2.4006, "step": 25 }, { "epoch": 0.002643720528235698, "grad_norm": 0.76171875, "learning_rate": 0.002937853107344633, "loss": 2.4106, "step": 26 }, { "epoch": 0.0027454020870139938, "grad_norm": 0.60546875, "learning_rate": 0.003050847457627119, "loss": 2.3258, "step": 27 }, { "epoch": 0.00284708364579229, "grad_norm": 0.3671875, "learning_rate": 0.003163841807909605, "loss": 2.2928, "step": 28 }, { "epoch": 0.002948765204570586, "grad_norm": 0.40625, "learning_rate": 0.00327683615819209, "loss": 2.3009, "step": 29 }, { "epoch": 0.003050446763348882, "grad_norm": 0.267578125, "learning_rate": 0.003389830508474576, "loss": 2.2469, "step": 30 }, { "epoch": 0.0031521283221271784, "grad_norm": 0.21484375, "learning_rate": 0.0035028248587570623, "loss": 2.2102, "step": 31 }, { "epoch": 0.0032538098809054743, "grad_norm": 0.1923828125, "learning_rate": 0.003615819209039548, "loss": 2.2146, "step": 32 }, { "epoch": 0.0033554914396837702, "grad_norm": 0.1787109375, "learning_rate": 0.003728813559322034, "loss": 2.2294, "step": 33 }, { "epoch": 0.0034571729984620666, "grad_norm": 0.162109375, "learning_rate": 0.0038418079096045198, "loss": 2.2151, "step": 34 }, { "epoch": 0.0035588545572403625, "grad_norm": 0.146484375, "learning_rate": 0.003954802259887006, "loss": 2.2166, "step": 35 }, { "epoch": 0.0036605361160186585, "grad_norm": 0.1298828125, "learning_rate": 0.004067796610169492, "loss": 2.2415, "step": 36 }, { "epoch": 0.003762217674796955, "grad_norm": 0.1337890625, "learning_rate": 0.004180790960451978, "loss": 2.1685, "step": 37 }, { "epoch": 0.003863899233575251, "grad_norm": 0.150390625, "learning_rate": 0.004293785310734463, "loss": 2.1851, "step": 38 }, { "epoch": 0.003965580792353547, "grad_norm": 0.193359375, "learning_rate": 0.004406779661016949, "loss": 2.165, "step": 39 }, { "epoch": 0.004067262351131843, "grad_norm": 0.1923828125, "learning_rate": 0.004519774011299435, "loss": 2.177, "step": 40 }, { "epoch": 0.004168943909910139, "grad_norm": 0.19921875, "learning_rate": 0.004632768361581921, "loss": 2.1863, "step": 41 }, { "epoch": 0.004270625468688435, "grad_norm": 0.1650390625, "learning_rate": 0.004745762711864407, "loss": 2.1063, "step": 42 }, { "epoch": 0.004372307027466731, "grad_norm": 0.1650390625, "learning_rate": 0.004858757062146893, "loss": 2.1679, "step": 43 }, { "epoch": 0.004473988586245027, "grad_norm": 0.1904296875, "learning_rate": 0.004971751412429379, "loss": 2.151, "step": 44 }, { "epoch": 0.004575670145023323, "grad_norm": 0.201171875, "learning_rate": 0.005084745762711864, "loss": 2.1011, "step": 45 }, { "epoch": 0.004677351703801619, "grad_norm": 0.1826171875, "learning_rate": 0.00519774011299435, "loss": 2.1333, "step": 46 }, { "epoch": 0.0047790332625799155, "grad_norm": 0.1806640625, "learning_rate": 0.005310734463276836, "loss": 2.0644, "step": 47 }, { "epoch": 0.004880714821358211, "grad_norm": 0.16796875, "learning_rate": 0.005423728813559322, "loss": 2.1194, "step": 48 }, { "epoch": 0.004982396380136507, "grad_norm": 0.1845703125, "learning_rate": 0.005536723163841808, "loss": 2.1437, "step": 49 }, { "epoch": 0.005084077938914804, "grad_norm": 0.166015625, "learning_rate": 0.005649717514124294, "loss": 2.1265, "step": 50 }, { "epoch": 0.005185759497693099, "grad_norm": 0.154296875, "learning_rate": 0.00576271186440678, "loss": 2.1426, "step": 51 }, { "epoch": 0.005287441056471396, "grad_norm": 0.1650390625, "learning_rate": 0.005875706214689266, "loss": 2.1135, "step": 52 }, { "epoch": 0.005389122615249692, "grad_norm": 0.19140625, "learning_rate": 0.005988700564971752, "loss": 2.0959, "step": 53 }, { "epoch": 0.0054908041740279875, "grad_norm": 0.1904296875, "learning_rate": 0.006101694915254238, "loss": 2.1376, "step": 54 }, { "epoch": 0.005592485732806284, "grad_norm": 0.1884765625, "learning_rate": 0.0062146892655367235, "loss": 2.0737, "step": 55 }, { "epoch": 0.00569416729158458, "grad_norm": 0.1708984375, "learning_rate": 0.00632768361581921, "loss": 2.1113, "step": 56 }, { "epoch": 0.005795848850362876, "grad_norm": 0.19140625, "learning_rate": 0.006440677966101695, "loss": 2.1405, "step": 57 }, { "epoch": 0.005897530409141172, "grad_norm": 0.1923828125, "learning_rate": 0.00655367231638418, "loss": 2.1187, "step": 58 }, { "epoch": 0.0059992119679194685, "grad_norm": 0.1953125, "learning_rate": 0.006666666666666666, "loss": 2.1115, "step": 59 }, { "epoch": 0.006100893526697764, "grad_norm": 0.2001953125, "learning_rate": 0.006779661016949152, "loss": 2.138, "step": 60 }, { "epoch": 0.00620257508547606, "grad_norm": 0.21484375, "learning_rate": 0.0068926553672316385, "loss": 2.1772, "step": 61 }, { "epoch": 0.006304256644254357, "grad_norm": 0.22265625, "learning_rate": 0.007005649717514125, "loss": 2.1571, "step": 62 }, { "epoch": 0.006405938203032652, "grad_norm": 0.2080078125, "learning_rate": 0.00711864406779661, "loss": 2.1095, "step": 63 }, { "epoch": 0.006507619761810949, "grad_norm": 0.1904296875, "learning_rate": 0.007231638418079096, "loss": 2.0307, "step": 64 }, { "epoch": 0.006609301320589245, "grad_norm": 0.205078125, "learning_rate": 0.007344632768361582, "loss": 2.0889, "step": 65 }, { "epoch": 0.0067109828793675405, "grad_norm": 0.1953125, "learning_rate": 0.007457627118644068, "loss": 2.0393, "step": 66 }, { "epoch": 0.006812664438145837, "grad_norm": 0.2138671875, "learning_rate": 0.007570621468926554, "loss": 2.067, "step": 67 }, { "epoch": 0.006914345996924133, "grad_norm": 0.2060546875, "learning_rate": 0.0076836158192090396, "loss": 2.1042, "step": 68 }, { "epoch": 0.007016027555702429, "grad_norm": 0.201171875, "learning_rate": 0.007796610169491526, "loss": 2.081, "step": 69 }, { "epoch": 0.007117709114480725, "grad_norm": 0.205078125, "learning_rate": 0.007909604519774013, "loss": 2.0768, "step": 70 }, { "epoch": 0.0072193906732590215, "grad_norm": 0.23828125, "learning_rate": 0.008022598870056498, "loss": 2.1325, "step": 71 }, { "epoch": 0.007321072232037317, "grad_norm": 0.220703125, "learning_rate": 0.008135593220338983, "loss": 2.1196, "step": 72 }, { "epoch": 0.007422753790815613, "grad_norm": 0.2314453125, "learning_rate": 0.008248587570621468, "loss": 2.1501, "step": 73 }, { "epoch": 0.00752443534959391, "grad_norm": 0.1953125, "learning_rate": 0.008361581920903955, "loss": 2.0384, "step": 74 }, { "epoch": 0.007626116908372205, "grad_norm": 0.2109375, "learning_rate": 0.00847457627118644, "loss": 2.0667, "step": 75 }, { "epoch": 0.007727798467150502, "grad_norm": 0.203125, "learning_rate": 0.008587570621468926, "loss": 2.1045, "step": 76 }, { "epoch": 0.007829480025928797, "grad_norm": 0.2080078125, "learning_rate": 0.008700564971751413, "loss": 2.077, "step": 77 }, { "epoch": 0.007931161584707094, "grad_norm": 0.212890625, "learning_rate": 0.008813559322033898, "loss": 2.0602, "step": 78 }, { "epoch": 0.00803284314348539, "grad_norm": 0.22265625, "learning_rate": 0.008926553672316385, "loss": 2.0987, "step": 79 }, { "epoch": 0.008134524702263685, "grad_norm": 0.2294921875, "learning_rate": 0.00903954802259887, "loss": 2.1236, "step": 80 }, { "epoch": 0.008236206261041983, "grad_norm": 0.21875, "learning_rate": 0.009152542372881356, "loss": 2.0861, "step": 81 }, { "epoch": 0.008337887819820278, "grad_norm": 0.19921875, "learning_rate": 0.009265536723163843, "loss": 2.0189, "step": 82 }, { "epoch": 0.008439569378598574, "grad_norm": 0.2421875, "learning_rate": 0.009378531073446328, "loss": 2.0509, "step": 83 }, { "epoch": 0.00854125093737687, "grad_norm": 0.2177734375, "learning_rate": 0.009491525423728815, "loss": 2.0923, "step": 84 }, { "epoch": 0.008642932496155166, "grad_norm": 0.2177734375, "learning_rate": 0.0096045197740113, "loss": 2.0488, "step": 85 }, { "epoch": 0.008744614054933462, "grad_norm": 0.2236328125, "learning_rate": 0.009717514124293785, "loss": 2.0658, "step": 86 }, { "epoch": 0.008846295613711759, "grad_norm": 0.26171875, "learning_rate": 0.009830508474576272, "loss": 2.032, "step": 87 }, { "epoch": 0.008947977172490055, "grad_norm": 0.232421875, "learning_rate": 0.009943502824858758, "loss": 2.0085, "step": 88 }, { "epoch": 0.00904965873126835, "grad_norm": 0.259765625, "learning_rate": 0.010056497175141245, "loss": 2.0155, "step": 89 }, { "epoch": 0.009151340290046646, "grad_norm": 0.2412109375, "learning_rate": 0.010169491525423728, "loss": 2.047, "step": 90 }, { "epoch": 0.009253021848824943, "grad_norm": 0.2431640625, "learning_rate": 0.010282485875706215, "loss": 2.0138, "step": 91 }, { "epoch": 0.009354703407603238, "grad_norm": 0.224609375, "learning_rate": 0.0103954802259887, "loss": 2.0701, "step": 92 }, { "epoch": 0.009456384966381534, "grad_norm": 0.23046875, "learning_rate": 0.010508474576271187, "loss": 1.9933, "step": 93 }, { "epoch": 0.009558066525159831, "grad_norm": 0.23046875, "learning_rate": 0.010621468926553673, "loss": 2.0354, "step": 94 }, { "epoch": 0.009659748083938127, "grad_norm": 0.2353515625, "learning_rate": 0.01073446327683616, "loss": 2.0852, "step": 95 }, { "epoch": 0.009761429642716422, "grad_norm": 0.25, "learning_rate": 0.010847457627118645, "loss": 2.0171, "step": 96 }, { "epoch": 0.00986311120149472, "grad_norm": 0.240234375, "learning_rate": 0.010960451977401128, "loss": 1.9798, "step": 97 }, { "epoch": 0.009964792760273015, "grad_norm": 0.2158203125, "learning_rate": 0.011073446327683615, "loss": 2.0219, "step": 98 }, { "epoch": 0.01006647431905131, "grad_norm": 0.2421875, "learning_rate": 0.0111864406779661, "loss": 2.0537, "step": 99 }, { "epoch": 0.010168155877829608, "grad_norm": 0.2392578125, "learning_rate": 0.011299435028248588, "loss": 2.124, "step": 100 }, { "epoch": 0.010269837436607903, "grad_norm": 0.259765625, "learning_rate": 0.011412429378531073, "loss": 2.0611, "step": 101 }, { "epoch": 0.010371518995386199, "grad_norm": 0.2294921875, "learning_rate": 0.01152542372881356, "loss": 2.0089, "step": 102 }, { "epoch": 0.010473200554164496, "grad_norm": 0.2275390625, "learning_rate": 0.011638418079096045, "loss": 2.0065, "step": 103 }, { "epoch": 0.010574882112942791, "grad_norm": 0.2412109375, "learning_rate": 0.011751412429378532, "loss": 1.9934, "step": 104 }, { "epoch": 0.010676563671721087, "grad_norm": 0.248046875, "learning_rate": 0.011864406779661017, "loss": 2.0497, "step": 105 }, { "epoch": 0.010778245230499384, "grad_norm": 0.23828125, "learning_rate": 0.011977401129943504, "loss": 2.0126, "step": 106 }, { "epoch": 0.01087992678927768, "grad_norm": 0.27734375, "learning_rate": 0.012090395480225988, "loss": 2.0156, "step": 107 }, { "epoch": 0.010981608348055975, "grad_norm": 0.2294921875, "learning_rate": 0.012203389830508476, "loss": 2.0018, "step": 108 }, { "epoch": 0.011083289906834272, "grad_norm": 0.294921875, "learning_rate": 0.01231638418079096, "loss": 1.99, "step": 109 }, { "epoch": 0.011184971465612568, "grad_norm": 0.296875, "learning_rate": 0.012429378531073447, "loss": 2.0445, "step": 110 }, { "epoch": 0.011286653024390863, "grad_norm": 0.32421875, "learning_rate": 0.012542372881355932, "loss": 2.0848, "step": 111 }, { "epoch": 0.01138833458316916, "grad_norm": 0.318359375, "learning_rate": 0.01265536723163842, "loss": 2.0249, "step": 112 }, { "epoch": 0.011490016141947456, "grad_norm": 0.318359375, "learning_rate": 0.012768361581920904, "loss": 2.0644, "step": 113 }, { "epoch": 0.011591697700725751, "grad_norm": 0.306640625, "learning_rate": 0.01288135593220339, "loss": 2.0365, "step": 114 }, { "epoch": 0.011693379259504049, "grad_norm": 0.3046875, "learning_rate": 0.012994350282485877, "loss": 2.0184, "step": 115 }, { "epoch": 0.011795060818282344, "grad_norm": 0.296875, "learning_rate": 0.01310734463276836, "loss": 2.0587, "step": 116 }, { "epoch": 0.01189674237706064, "grad_norm": 0.265625, "learning_rate": 0.013220338983050847, "loss": 2.0332, "step": 117 }, { "epoch": 0.011998423935838937, "grad_norm": 0.267578125, "learning_rate": 0.013333333333333332, "loss": 1.9925, "step": 118 }, { "epoch": 0.012100105494617232, "grad_norm": 0.29296875, "learning_rate": 0.01344632768361582, "loss": 2.008, "step": 119 }, { "epoch": 0.012201787053395528, "grad_norm": 0.279296875, "learning_rate": 0.013559322033898305, "loss": 2.0001, "step": 120 }, { "epoch": 0.012303468612173825, "grad_norm": 0.271484375, "learning_rate": 0.013672316384180792, "loss": 2.0134, "step": 121 }, { "epoch": 0.01240515017095212, "grad_norm": 0.28125, "learning_rate": 0.013785310734463277, "loss": 2.0145, "step": 122 }, { "epoch": 0.012506831729730416, "grad_norm": 0.306640625, "learning_rate": 0.013898305084745764, "loss": 2.0826, "step": 123 }, { "epoch": 0.012608513288508713, "grad_norm": 0.30859375, "learning_rate": 0.01401129943502825, "loss": 2.0491, "step": 124 }, { "epoch": 0.012710194847287009, "grad_norm": 0.306640625, "learning_rate": 0.014124293785310736, "loss": 2.0546, "step": 125 }, { "epoch": 0.012811876406065304, "grad_norm": 0.3046875, "learning_rate": 0.01423728813559322, "loss": 1.9846, "step": 126 }, { "epoch": 0.012913557964843602, "grad_norm": 0.2890625, "learning_rate": 0.014350282485875707, "loss": 2.0048, "step": 127 }, { "epoch": 0.013015239523621897, "grad_norm": 0.2734375, "learning_rate": 0.014463276836158192, "loss": 2.0125, "step": 128 }, { "epoch": 0.013116921082400193, "grad_norm": 0.294921875, "learning_rate": 0.014576271186440677, "loss": 2.0268, "step": 129 }, { "epoch": 0.01321860264117849, "grad_norm": 0.279296875, "learning_rate": 0.014689265536723164, "loss": 2.0574, "step": 130 }, { "epoch": 0.013320284199956785, "grad_norm": 0.3046875, "learning_rate": 0.01480225988700565, "loss": 2.0416, "step": 131 }, { "epoch": 0.013421965758735081, "grad_norm": 0.294921875, "learning_rate": 0.014915254237288136, "loss": 2.0474, "step": 132 }, { "epoch": 0.013523647317513378, "grad_norm": 0.3125, "learning_rate": 0.015028248587570622, "loss": 2.0576, "step": 133 }, { "epoch": 0.013625328876291674, "grad_norm": 0.30078125, "learning_rate": 0.015141242937853109, "loss": 1.9987, "step": 134 }, { "epoch": 0.01372701043506997, "grad_norm": 0.30859375, "learning_rate": 0.015254237288135592, "loss": 1.9953, "step": 135 }, { "epoch": 0.013828691993848266, "grad_norm": 0.29296875, "learning_rate": 0.015367231638418079, "loss": 2.0199, "step": 136 }, { "epoch": 0.013930373552626562, "grad_norm": 0.3203125, "learning_rate": 0.015480225988700564, "loss": 2.0736, "step": 137 }, { "epoch": 0.014032055111404857, "grad_norm": 0.337890625, "learning_rate": 0.015593220338983051, "loss": 1.9911, "step": 138 }, { "epoch": 0.014133736670183155, "grad_norm": 0.34375, "learning_rate": 0.015706214689265537, "loss": 2.0036, "step": 139 }, { "epoch": 0.01423541822896145, "grad_norm": 0.31640625, "learning_rate": 0.015819209039548025, "loss": 2.0224, "step": 140 }, { "epoch": 0.014337099787739746, "grad_norm": 0.30078125, "learning_rate": 0.015932203389830507, "loss": 1.9657, "step": 141 }, { "epoch": 0.014438781346518043, "grad_norm": 0.314453125, "learning_rate": 0.016045197740112996, "loss": 2.039, "step": 142 }, { "epoch": 0.014540462905296338, "grad_norm": 0.30859375, "learning_rate": 0.01615819209039548, "loss": 2.0156, "step": 143 }, { "epoch": 0.014642144464074634, "grad_norm": 0.31640625, "learning_rate": 0.016271186440677966, "loss": 2.0155, "step": 144 }, { "epoch": 0.014743826022852931, "grad_norm": 0.296875, "learning_rate": 0.01638418079096045, "loss": 1.9882, "step": 145 }, { "epoch": 0.014845507581631227, "grad_norm": 0.310546875, "learning_rate": 0.016497175141242937, "loss": 1.96, "step": 146 }, { "epoch": 0.014947189140409522, "grad_norm": 0.296875, "learning_rate": 0.016610169491525426, "loss": 2.0671, "step": 147 }, { "epoch": 0.01504887069918782, "grad_norm": 0.294921875, "learning_rate": 0.01672316384180791, "loss": 2.0444, "step": 148 }, { "epoch": 0.015150552257966115, "grad_norm": 0.267578125, "learning_rate": 0.016836158192090396, "loss": 2.0075, "step": 149 }, { "epoch": 0.01525223381674441, "grad_norm": 0.279296875, "learning_rate": 0.01694915254237288, "loss": 1.958, "step": 150 }, { "epoch": 0.015353915375522706, "grad_norm": 0.271484375, "learning_rate": 0.017062146892655367, "loss": 1.97, "step": 151 }, { "epoch": 0.015455596934301003, "grad_norm": 0.3125, "learning_rate": 0.017175141242937852, "loss": 1.9909, "step": 152 }, { "epoch": 0.015557278493079299, "grad_norm": 0.26953125, "learning_rate": 0.01728813559322034, "loss": 1.9762, "step": 153 }, { "epoch": 0.015658960051857594, "grad_norm": 0.287109375, "learning_rate": 0.017401129943502826, "loss": 1.9928, "step": 154 }, { "epoch": 0.01576064161063589, "grad_norm": 0.271484375, "learning_rate": 0.01751412429378531, "loss": 1.9856, "step": 155 }, { "epoch": 0.01586232316941419, "grad_norm": 0.302734375, "learning_rate": 0.017627118644067796, "loss": 2.0208, "step": 156 }, { "epoch": 0.015964004728192482, "grad_norm": 0.30078125, "learning_rate": 0.017740112994350285, "loss": 2.064, "step": 157 }, { "epoch": 0.01606568628697078, "grad_norm": 0.3046875, "learning_rate": 0.01785310734463277, "loss": 1.9849, "step": 158 }, { "epoch": 0.016167367845749077, "grad_norm": 0.2734375, "learning_rate": 0.017966101694915255, "loss": 1.9804, "step": 159 }, { "epoch": 0.01626904940452737, "grad_norm": 0.27734375, "learning_rate": 0.01807909604519774, "loss": 1.9864, "step": 160 }, { "epoch": 0.016370730963305668, "grad_norm": 0.296875, "learning_rate": 0.018192090395480226, "loss": 2.0279, "step": 161 }, { "epoch": 0.016472412522083965, "grad_norm": 0.326171875, "learning_rate": 0.01830508474576271, "loss": 2.0376, "step": 162 }, { "epoch": 0.01657409408086226, "grad_norm": 0.291015625, "learning_rate": 0.018418079096045196, "loss": 2.0915, "step": 163 }, { "epoch": 0.016675775639640556, "grad_norm": 0.2890625, "learning_rate": 0.018531073446327685, "loss": 2.0137, "step": 164 }, { "epoch": 0.016777457198418853, "grad_norm": 0.2890625, "learning_rate": 0.01864406779661017, "loss": 1.9784, "step": 165 }, { "epoch": 0.016879138757197147, "grad_norm": 0.28515625, "learning_rate": 0.018757062146892656, "loss": 2.0182, "step": 166 }, { "epoch": 0.016980820315975444, "grad_norm": 0.275390625, "learning_rate": 0.01887005649717514, "loss": 1.9851, "step": 167 }, { "epoch": 0.01708250187475374, "grad_norm": 0.302734375, "learning_rate": 0.01898305084745763, "loss": 2.0214, "step": 168 }, { "epoch": 0.017184183433532035, "grad_norm": 0.294921875, "learning_rate": 0.01909604519774011, "loss": 2.0199, "step": 169 }, { "epoch": 0.017285864992310333, "grad_norm": 0.310546875, "learning_rate": 0.0192090395480226, "loss": 2.0289, "step": 170 }, { "epoch": 0.01738754655108863, "grad_norm": 0.298828125, "learning_rate": 0.019322033898305085, "loss": 1.9159, "step": 171 }, { "epoch": 0.017489228109866924, "grad_norm": 0.34765625, "learning_rate": 0.01943502824858757, "loss": 2.0473, "step": 172 }, { "epoch": 0.01759090966864522, "grad_norm": 0.33984375, "learning_rate": 0.019548022598870056, "loss": 2.0882, "step": 173 }, { "epoch": 0.017692591227423518, "grad_norm": 0.349609375, "learning_rate": 0.019661016949152545, "loss": 1.9956, "step": 174 }, { "epoch": 0.017794272786201812, "grad_norm": 0.318359375, "learning_rate": 0.01977401129943503, "loss": 1.9571, "step": 175 }, { "epoch": 0.01789595434498011, "grad_norm": 0.369140625, "learning_rate": 0.019887005649717515, "loss": 2.0089, "step": 176 }, { "epoch": 0.017997635903758406, "grad_norm": 0.32421875, "learning_rate": 0.02, "loss": 2.0011, "step": 177 }, { "epoch": 0.0180993174625367, "grad_norm": 0.330078125, "learning_rate": 0.02, "loss": 2.007, "step": 178 }, { "epoch": 0.018200999021314997, "grad_norm": 0.306640625, "learning_rate": 0.02, "loss": 1.9815, "step": 179 }, { "epoch": 0.01830268058009329, "grad_norm": 0.345703125, "learning_rate": 0.02, "loss": 2.062, "step": 180 }, { "epoch": 0.01840436213887159, "grad_norm": 0.310546875, "learning_rate": 0.02, "loss": 1.9555, "step": 181 }, { "epoch": 0.018506043697649886, "grad_norm": 0.32421875, "learning_rate": 0.02, "loss": 2.0017, "step": 182 }, { "epoch": 0.01860772525642818, "grad_norm": 0.32421875, "learning_rate": 0.02, "loss": 2.0067, "step": 183 }, { "epoch": 0.018709406815206477, "grad_norm": 0.3359375, "learning_rate": 0.02, "loss": 2.0034, "step": 184 }, { "epoch": 0.018811088373984774, "grad_norm": 0.28125, "learning_rate": 0.02, "loss": 1.9166, "step": 185 }, { "epoch": 0.018912769932763068, "grad_norm": 0.310546875, "learning_rate": 0.02, "loss": 2.0066, "step": 186 }, { "epoch": 0.019014451491541365, "grad_norm": 0.30078125, "learning_rate": 0.02, "loss": 1.9807, "step": 187 }, { "epoch": 0.019116133050319662, "grad_norm": 0.294921875, "learning_rate": 0.02, "loss": 1.9777, "step": 188 }, { "epoch": 0.019217814609097956, "grad_norm": 0.302734375, "learning_rate": 0.02, "loss": 2.005, "step": 189 }, { "epoch": 0.019319496167876253, "grad_norm": 0.3046875, "learning_rate": 0.02, "loss": 2.0068, "step": 190 }, { "epoch": 0.01942117772665455, "grad_norm": 0.29296875, "learning_rate": 0.02, "loss": 2.0115, "step": 191 }, { "epoch": 0.019522859285432844, "grad_norm": 0.28125, "learning_rate": 0.02, "loss": 2.0295, "step": 192 }, { "epoch": 0.01962454084421114, "grad_norm": 0.28125, "learning_rate": 0.02, "loss": 1.9956, "step": 193 }, { "epoch": 0.01972622240298944, "grad_norm": 0.26171875, "learning_rate": 0.02, "loss": 1.9922, "step": 194 }, { "epoch": 0.019827903961767732, "grad_norm": 0.25, "learning_rate": 0.02, "loss": 2.0226, "step": 195 }, { "epoch": 0.01992958552054603, "grad_norm": 0.267578125, "learning_rate": 0.02, "loss": 2.0312, "step": 196 }, { "epoch": 0.020031267079324327, "grad_norm": 0.26171875, "learning_rate": 0.02, "loss": 1.971, "step": 197 }, { "epoch": 0.02013294863810262, "grad_norm": 0.29296875, "learning_rate": 0.02, "loss": 1.9968, "step": 198 }, { "epoch": 0.020234630196880918, "grad_norm": 0.3203125, "learning_rate": 0.02, "loss": 2.0127, "step": 199 }, { "epoch": 0.020336311755659215, "grad_norm": 0.34765625, "learning_rate": 0.02, "loss": 1.97, "step": 200 }, { "epoch": 0.02043799331443751, "grad_norm": 0.345703125, "learning_rate": 0.02, "loss": 2.0109, "step": 201 }, { "epoch": 0.020539674873215806, "grad_norm": 0.365234375, "learning_rate": 0.02, "loss": 2.034, "step": 202 }, { "epoch": 0.020641356431994103, "grad_norm": 0.38671875, "learning_rate": 0.02, "loss": 2.1007, "step": 203 }, { "epoch": 0.020743037990772397, "grad_norm": 0.32421875, "learning_rate": 0.02, "loss": 1.9621, "step": 204 }, { "epoch": 0.020844719549550694, "grad_norm": 0.3046875, "learning_rate": 0.02, "loss": 1.98, "step": 205 }, { "epoch": 0.02094640110832899, "grad_norm": 0.283203125, "learning_rate": 0.02, "loss": 1.9535, "step": 206 }, { "epoch": 0.021048082667107285, "grad_norm": 0.3046875, "learning_rate": 0.02, "loss": 1.982, "step": 207 }, { "epoch": 0.021149764225885583, "grad_norm": 0.28515625, "learning_rate": 0.02, "loss": 1.9799, "step": 208 }, { "epoch": 0.02125144578466388, "grad_norm": 0.3203125, "learning_rate": 0.02, "loss": 1.9682, "step": 209 }, { "epoch": 0.021353127343442174, "grad_norm": 0.298828125, "learning_rate": 0.02, "loss": 2.0116, "step": 210 }, { "epoch": 0.02145480890222047, "grad_norm": 0.29296875, "learning_rate": 0.02, "loss": 1.982, "step": 211 }, { "epoch": 0.021556490460998768, "grad_norm": 0.271484375, "learning_rate": 0.02, "loss": 1.9805, "step": 212 }, { "epoch": 0.021658172019777062, "grad_norm": 0.26953125, "learning_rate": 0.02, "loss": 1.9555, "step": 213 }, { "epoch": 0.02175985357855536, "grad_norm": 0.244140625, "learning_rate": 0.02, "loss": 1.9305, "step": 214 }, { "epoch": 0.021861535137333656, "grad_norm": 0.25390625, "learning_rate": 0.02, "loss": 1.9542, "step": 215 }, { "epoch": 0.02196321669611195, "grad_norm": 0.263671875, "learning_rate": 0.02, "loss": 1.931, "step": 216 }, { "epoch": 0.022064898254890247, "grad_norm": 0.265625, "learning_rate": 0.02, "loss": 2.0024, "step": 217 }, { "epoch": 0.022166579813668544, "grad_norm": 0.275390625, "learning_rate": 0.02, "loss": 1.9948, "step": 218 }, { "epoch": 0.022268261372446838, "grad_norm": 0.27734375, "learning_rate": 0.02, "loss": 2.0353, "step": 219 }, { "epoch": 0.022369942931225135, "grad_norm": 0.283203125, "learning_rate": 0.02, "loss": 1.9739, "step": 220 }, { "epoch": 0.022471624490003433, "grad_norm": 0.271484375, "learning_rate": 0.02, "loss": 2.0091, "step": 221 }, { "epoch": 0.022573306048781727, "grad_norm": 0.275390625, "learning_rate": 0.02, "loss": 2.0047, "step": 222 }, { "epoch": 0.022674987607560024, "grad_norm": 0.263671875, "learning_rate": 0.02, "loss": 1.9813, "step": 223 }, { "epoch": 0.02277666916633832, "grad_norm": 0.30078125, "learning_rate": 0.02, "loss": 1.9593, "step": 224 }, { "epoch": 0.022878350725116615, "grad_norm": 0.267578125, "learning_rate": 0.02, "loss": 1.9584, "step": 225 }, { "epoch": 0.022980032283894912, "grad_norm": 0.29296875, "learning_rate": 0.02, "loss": 1.9614, "step": 226 }, { "epoch": 0.02308171384267321, "grad_norm": 0.251953125, "learning_rate": 0.02, "loss": 1.9228, "step": 227 }, { "epoch": 0.023183395401451503, "grad_norm": 0.291015625, "learning_rate": 0.02, "loss": 1.9252, "step": 228 }, { "epoch": 0.0232850769602298, "grad_norm": 0.263671875, "learning_rate": 0.02, "loss": 1.955, "step": 229 }, { "epoch": 0.023386758519008097, "grad_norm": 0.302734375, "learning_rate": 0.02, "loss": 1.9955, "step": 230 }, { "epoch": 0.02348844007778639, "grad_norm": 0.279296875, "learning_rate": 0.02, "loss": 2.006, "step": 231 }, { "epoch": 0.02359012163656469, "grad_norm": 0.306640625, "learning_rate": 0.02, "loss": 2.0029, "step": 232 }, { "epoch": 0.023691803195342986, "grad_norm": 0.29296875, "learning_rate": 0.02, "loss": 2.0084, "step": 233 }, { "epoch": 0.02379348475412128, "grad_norm": 0.30078125, "learning_rate": 0.02, "loss": 2.0177, "step": 234 }, { "epoch": 0.023895166312899577, "grad_norm": 0.2734375, "learning_rate": 0.02, "loss": 2.0138, "step": 235 }, { "epoch": 0.023996847871677874, "grad_norm": 0.267578125, "learning_rate": 0.02, "loss": 1.9433, "step": 236 }, { "epoch": 0.024098529430456168, "grad_norm": 0.259765625, "learning_rate": 0.02, "loss": 1.9131, "step": 237 }, { "epoch": 0.024200210989234465, "grad_norm": 0.26953125, "learning_rate": 0.02, "loss": 1.9632, "step": 238 }, { "epoch": 0.024301892548012762, "grad_norm": 0.275390625, "learning_rate": 0.02, "loss": 1.9729, "step": 239 }, { "epoch": 0.024403574106791056, "grad_norm": 0.2490234375, "learning_rate": 0.02, "loss": 1.9081, "step": 240 }, { "epoch": 0.024505255665569353, "grad_norm": 0.2431640625, "learning_rate": 0.02, "loss": 1.938, "step": 241 }, { "epoch": 0.02460693722434765, "grad_norm": 0.23828125, "learning_rate": 0.02, "loss": 1.9033, "step": 242 }, { "epoch": 0.024708618783125944, "grad_norm": 0.26171875, "learning_rate": 0.02, "loss": 1.9676, "step": 243 }, { "epoch": 0.02481030034190424, "grad_norm": 0.2392578125, "learning_rate": 0.02, "loss": 1.9472, "step": 244 }, { "epoch": 0.02491198190068254, "grad_norm": 0.2470703125, "learning_rate": 0.02, "loss": 1.9551, "step": 245 }, { "epoch": 0.025013663459460832, "grad_norm": 0.2431640625, "learning_rate": 0.02, "loss": 1.9475, "step": 246 }, { "epoch": 0.02511534501823913, "grad_norm": 0.2578125, "learning_rate": 0.02, "loss": 1.9453, "step": 247 }, { "epoch": 0.025217026577017427, "grad_norm": 0.2421875, "learning_rate": 0.02, "loss": 1.9622, "step": 248 }, { "epoch": 0.02531870813579572, "grad_norm": 0.2412109375, "learning_rate": 0.02, "loss": 1.952, "step": 249 }, { "epoch": 0.025420389694574018, "grad_norm": 0.234375, "learning_rate": 0.02, "loss": 1.9272, "step": 250 }, { "epoch": 0.025522071253352315, "grad_norm": 0.26171875, "learning_rate": 0.02, "loss": 1.9224, "step": 251 }, { "epoch": 0.02562375281213061, "grad_norm": 0.26171875, "learning_rate": 0.02, "loss": 1.9779, "step": 252 }, { "epoch": 0.025725434370908906, "grad_norm": 0.27734375, "learning_rate": 0.02, "loss": 1.981, "step": 253 }, { "epoch": 0.025827115929687203, "grad_norm": 0.2421875, "learning_rate": 0.02, "loss": 1.9122, "step": 254 }, { "epoch": 0.025928797488465497, "grad_norm": 0.26953125, "learning_rate": 0.02, "loss": 1.9942, "step": 255 }, { "epoch": 0.026030479047243794, "grad_norm": 0.2734375, "learning_rate": 0.02, "loss": 1.965, "step": 256 }, { "epoch": 0.02613216060602209, "grad_norm": 0.26953125, "learning_rate": 0.02, "loss": 1.9614, "step": 257 }, { "epoch": 0.026233842164800385, "grad_norm": 0.212890625, "learning_rate": 0.02, "loss": 1.9624, "step": 258 }, { "epoch": 0.026335523723578683, "grad_norm": 0.23046875, "learning_rate": 0.02, "loss": 1.9564, "step": 259 }, { "epoch": 0.02643720528235698, "grad_norm": 0.21875, "learning_rate": 0.02, "loss": 1.9381, "step": 260 }, { "epoch": 0.026538886841135274, "grad_norm": 0.22265625, "learning_rate": 0.02, "loss": 1.9782, "step": 261 }, { "epoch": 0.02664056839991357, "grad_norm": 0.2060546875, "learning_rate": 0.02, "loss": 2.0137, "step": 262 }, { "epoch": 0.026742249958691868, "grad_norm": 0.2216796875, "learning_rate": 0.02, "loss": 1.9617, "step": 263 }, { "epoch": 0.026843931517470162, "grad_norm": 0.20703125, "learning_rate": 0.02, "loss": 1.9304, "step": 264 }, { "epoch": 0.02694561307624846, "grad_norm": 0.2158203125, "learning_rate": 0.02, "loss": 1.9319, "step": 265 }, { "epoch": 0.027047294635026756, "grad_norm": 0.2177734375, "learning_rate": 0.02, "loss": 2.0122, "step": 266 }, { "epoch": 0.02714897619380505, "grad_norm": 0.2392578125, "learning_rate": 0.02, "loss": 1.9253, "step": 267 }, { "epoch": 0.027250657752583347, "grad_norm": 0.2578125, "learning_rate": 0.02, "loss": 2.0081, "step": 268 }, { "epoch": 0.027352339311361645, "grad_norm": 0.2890625, "learning_rate": 0.02, "loss": 1.997, "step": 269 }, { "epoch": 0.02745402087013994, "grad_norm": 0.279296875, "learning_rate": 0.02, "loss": 1.883, "step": 270 }, { "epoch": 0.027555702428918236, "grad_norm": 0.291015625, "learning_rate": 0.02, "loss": 1.9693, "step": 271 }, { "epoch": 0.027657383987696533, "grad_norm": 0.27734375, "learning_rate": 0.02, "loss": 1.9459, "step": 272 }, { "epoch": 0.027759065546474827, "grad_norm": 0.2578125, "learning_rate": 0.02, "loss": 1.9048, "step": 273 }, { "epoch": 0.027860747105253124, "grad_norm": 0.27734375, "learning_rate": 0.02, "loss": 1.9607, "step": 274 }, { "epoch": 0.02796242866403142, "grad_norm": 0.26953125, "learning_rate": 0.02, "loss": 1.9306, "step": 275 }, { "epoch": 0.028064110222809715, "grad_norm": 0.267578125, "learning_rate": 0.02, "loss": 1.9706, "step": 276 }, { "epoch": 0.028165791781588012, "grad_norm": 0.2470703125, "learning_rate": 0.02, "loss": 1.9179, "step": 277 }, { "epoch": 0.02826747334036631, "grad_norm": 0.24609375, "learning_rate": 0.02, "loss": 1.9612, "step": 278 }, { "epoch": 0.028369154899144603, "grad_norm": 0.25390625, "learning_rate": 0.02, "loss": 1.9409, "step": 279 }, { "epoch": 0.0284708364579229, "grad_norm": 0.2578125, "learning_rate": 0.02, "loss": 1.9908, "step": 280 }, { "epoch": 0.028572518016701198, "grad_norm": 0.2353515625, "learning_rate": 0.02, "loss": 1.8841, "step": 281 }, { "epoch": 0.02867419957547949, "grad_norm": 0.2373046875, "learning_rate": 0.02, "loss": 1.8824, "step": 282 }, { "epoch": 0.02877588113425779, "grad_norm": 0.23046875, "learning_rate": 0.02, "loss": 1.9361, "step": 283 }, { "epoch": 0.028877562693036086, "grad_norm": 0.25390625, "learning_rate": 0.02, "loss": 1.92, "step": 284 }, { "epoch": 0.02897924425181438, "grad_norm": 0.240234375, "learning_rate": 0.02, "loss": 1.9297, "step": 285 }, { "epoch": 0.029080925810592677, "grad_norm": 0.2451171875, "learning_rate": 0.02, "loss": 1.8725, "step": 286 }, { "epoch": 0.029182607369370974, "grad_norm": 0.2294921875, "learning_rate": 0.02, "loss": 1.9288, "step": 287 }, { "epoch": 0.029284288928149268, "grad_norm": 0.24609375, "learning_rate": 0.02, "loss": 1.9688, "step": 288 }, { "epoch": 0.029385970486927565, "grad_norm": 0.232421875, "learning_rate": 0.02, "loss": 1.9505, "step": 289 }, { "epoch": 0.029487652045705862, "grad_norm": 0.2333984375, "learning_rate": 0.02, "loss": 1.9474, "step": 290 }, { "epoch": 0.029589333604484156, "grad_norm": 0.2265625, "learning_rate": 0.02, "loss": 1.8796, "step": 291 }, { "epoch": 0.029691015163262453, "grad_norm": 0.263671875, "learning_rate": 0.02, "loss": 2.0346, "step": 292 }, { "epoch": 0.02979269672204075, "grad_norm": 0.2578125, "learning_rate": 0.02, "loss": 1.9779, "step": 293 }, { "epoch": 0.029894378280819044, "grad_norm": 0.25390625, "learning_rate": 0.02, "loss": 1.9338, "step": 294 }, { "epoch": 0.02999605983959734, "grad_norm": 0.255859375, "learning_rate": 0.02, "loss": 1.9454, "step": 295 }, { "epoch": 0.03009774139837564, "grad_norm": 0.234375, "learning_rate": 0.02, "loss": 1.9089, "step": 296 }, { "epoch": 0.030199422957153933, "grad_norm": 0.2314453125, "learning_rate": 0.02, "loss": 1.9519, "step": 297 }, { "epoch": 0.03030110451593223, "grad_norm": 0.240234375, "learning_rate": 0.02, "loss": 1.9262, "step": 298 }, { "epoch": 0.030402786074710524, "grad_norm": 0.2275390625, "learning_rate": 0.02, "loss": 1.9226, "step": 299 }, { "epoch": 0.03050446763348882, "grad_norm": 0.240234375, "learning_rate": 0.02, "loss": 1.8828, "step": 300 }, { "epoch": 0.030606149192267118, "grad_norm": 0.2353515625, "learning_rate": 0.02, "loss": 1.9003, "step": 301 }, { "epoch": 0.030707830751045412, "grad_norm": 0.232421875, "learning_rate": 0.02, "loss": 1.9077, "step": 302 }, { "epoch": 0.03080951230982371, "grad_norm": 0.2353515625, "learning_rate": 0.02, "loss": 2.0387, "step": 303 }, { "epoch": 0.030911193868602006, "grad_norm": 0.212890625, "learning_rate": 0.02, "loss": 1.8985, "step": 304 }, { "epoch": 0.0310128754273803, "grad_norm": 0.220703125, "learning_rate": 0.02, "loss": 1.9086, "step": 305 }, { "epoch": 0.031114556986158597, "grad_norm": 0.2216796875, "learning_rate": 0.02, "loss": 2.0058, "step": 306 }, { "epoch": 0.031216238544936895, "grad_norm": 0.2353515625, "learning_rate": 0.02, "loss": 2.0162, "step": 307 }, { "epoch": 0.03131792010371519, "grad_norm": 0.2255859375, "learning_rate": 0.02, "loss": 1.9123, "step": 308 }, { "epoch": 0.03141960166249349, "grad_norm": 0.2265625, "learning_rate": 0.02, "loss": 1.9021, "step": 309 }, { "epoch": 0.03152128322127178, "grad_norm": 0.22265625, "learning_rate": 0.02, "loss": 1.899, "step": 310 }, { "epoch": 0.03162296478005008, "grad_norm": 0.2255859375, "learning_rate": 0.02, "loss": 1.8917, "step": 311 }, { "epoch": 0.03172464633882838, "grad_norm": 0.232421875, "learning_rate": 0.02, "loss": 2.0207, "step": 312 }, { "epoch": 0.03182632789760667, "grad_norm": 0.248046875, "learning_rate": 0.02, "loss": 1.9945, "step": 313 }, { "epoch": 0.031928009456384965, "grad_norm": 0.2451171875, "learning_rate": 0.02, "loss": 1.9405, "step": 314 }, { "epoch": 0.032029691015163265, "grad_norm": 0.2177734375, "learning_rate": 0.02, "loss": 1.879, "step": 315 }, { "epoch": 0.03213137257394156, "grad_norm": 0.2216796875, "learning_rate": 0.02, "loss": 1.9376, "step": 316 }, { "epoch": 0.03223305413271985, "grad_norm": 0.2080078125, "learning_rate": 0.02, "loss": 1.9242, "step": 317 }, { "epoch": 0.032334735691498154, "grad_norm": 0.2080078125, "learning_rate": 0.02, "loss": 1.9547, "step": 318 }, { "epoch": 0.03243641725027645, "grad_norm": 0.1982421875, "learning_rate": 0.02, "loss": 1.881, "step": 319 }, { "epoch": 0.03253809880905474, "grad_norm": 0.21484375, "learning_rate": 0.02, "loss": 2.008, "step": 320 }, { "epoch": 0.03263978036783304, "grad_norm": 0.205078125, "learning_rate": 0.02, "loss": 1.9909, "step": 321 }, { "epoch": 0.032741461926611336, "grad_norm": 0.2080078125, "learning_rate": 0.02, "loss": 1.9824, "step": 322 }, { "epoch": 0.03284314348538963, "grad_norm": 0.1904296875, "learning_rate": 0.02, "loss": 1.9, "step": 323 }, { "epoch": 0.03294482504416793, "grad_norm": 0.205078125, "learning_rate": 0.02, "loss": 1.9193, "step": 324 }, { "epoch": 0.033046506602946224, "grad_norm": 0.2138671875, "learning_rate": 0.02, "loss": 1.9114, "step": 325 }, { "epoch": 0.03314818816172452, "grad_norm": 0.2060546875, "learning_rate": 0.02, "loss": 1.8746, "step": 326 }, { "epoch": 0.03324986972050282, "grad_norm": 0.208984375, "learning_rate": 0.02, "loss": 1.9326, "step": 327 }, { "epoch": 0.03335155127928111, "grad_norm": 0.19140625, "learning_rate": 0.02, "loss": 1.885, "step": 328 }, { "epoch": 0.033453232838059406, "grad_norm": 0.2060546875, "learning_rate": 0.02, "loss": 1.863, "step": 329 }, { "epoch": 0.03355491439683771, "grad_norm": 0.181640625, "learning_rate": 0.02, "loss": 1.9164, "step": 330 }, { "epoch": 0.033656595955616, "grad_norm": 0.1982421875, "learning_rate": 0.02, "loss": 1.8594, "step": 331 }, { "epoch": 0.033758277514394294, "grad_norm": 0.189453125, "learning_rate": 0.02, "loss": 1.8836, "step": 332 }, { "epoch": 0.033859959073172595, "grad_norm": 0.2099609375, "learning_rate": 0.02, "loss": 1.9283, "step": 333 }, { "epoch": 0.03396164063195089, "grad_norm": 0.19921875, "learning_rate": 0.02, "loss": 1.8882, "step": 334 }, { "epoch": 0.03406332219072918, "grad_norm": 0.2138671875, "learning_rate": 0.02, "loss": 1.8417, "step": 335 }, { "epoch": 0.03416500374950748, "grad_norm": 0.1943359375, "learning_rate": 0.02, "loss": 1.8984, "step": 336 }, { "epoch": 0.03426668530828578, "grad_norm": 0.20703125, "learning_rate": 0.02, "loss": 1.867, "step": 337 }, { "epoch": 0.03436836686706407, "grad_norm": 0.1962890625, "learning_rate": 0.02, "loss": 1.9648, "step": 338 }, { "epoch": 0.03447004842584237, "grad_norm": 0.216796875, "learning_rate": 0.02, "loss": 1.8923, "step": 339 }, { "epoch": 0.034571729984620665, "grad_norm": 0.220703125, "learning_rate": 0.02, "loss": 1.8713, "step": 340 }, { "epoch": 0.03467341154339896, "grad_norm": 0.23046875, "learning_rate": 0.02, "loss": 1.9215, "step": 341 }, { "epoch": 0.03477509310217726, "grad_norm": 0.2197265625, "learning_rate": 0.02, "loss": 1.9153, "step": 342 }, { "epoch": 0.03487677466095555, "grad_norm": 0.2255859375, "learning_rate": 0.02, "loss": 1.8511, "step": 343 }, { "epoch": 0.03497845621973385, "grad_norm": 0.224609375, "learning_rate": 0.02, "loss": 1.9254, "step": 344 }, { "epoch": 0.03508013777851215, "grad_norm": 0.2412109375, "learning_rate": 0.02, "loss": 1.9455, "step": 345 }, { "epoch": 0.03518181933729044, "grad_norm": 0.2294921875, "learning_rate": 0.02, "loss": 1.8929, "step": 346 }, { "epoch": 0.035283500896068735, "grad_norm": 0.2353515625, "learning_rate": 0.02, "loss": 1.9297, "step": 347 }, { "epoch": 0.035385182454847036, "grad_norm": 0.2216796875, "learning_rate": 0.02, "loss": 1.9091, "step": 348 }, { "epoch": 0.03548686401362533, "grad_norm": 0.2255859375, "learning_rate": 0.02, "loss": 1.9613, "step": 349 }, { "epoch": 0.035588545572403624, "grad_norm": 0.2373046875, "learning_rate": 0.02, "loss": 1.9152, "step": 350 }, { "epoch": 0.035690227131181924, "grad_norm": 0.2236328125, "learning_rate": 0.02, "loss": 1.9256, "step": 351 }, { "epoch": 0.03579190868996022, "grad_norm": 0.21875, "learning_rate": 0.02, "loss": 1.8975, "step": 352 }, { "epoch": 0.03589359024873851, "grad_norm": 0.2373046875, "learning_rate": 0.02, "loss": 1.9908, "step": 353 }, { "epoch": 0.03599527180751681, "grad_norm": 0.2060546875, "learning_rate": 0.02, "loss": 1.8537, "step": 354 }, { "epoch": 0.036096953366295106, "grad_norm": 0.2099609375, "learning_rate": 0.02, "loss": 1.9156, "step": 355 }, { "epoch": 0.0361986349250734, "grad_norm": 0.205078125, "learning_rate": 0.02, "loss": 1.91, "step": 356 }, { "epoch": 0.0363003164838517, "grad_norm": 0.2041015625, "learning_rate": 0.02, "loss": 1.9222, "step": 357 }, { "epoch": 0.036401998042629995, "grad_norm": 0.2021484375, "learning_rate": 0.02, "loss": 1.8976, "step": 358 }, { "epoch": 0.03650367960140829, "grad_norm": 0.2060546875, "learning_rate": 0.02, "loss": 1.898, "step": 359 }, { "epoch": 0.03660536116018658, "grad_norm": 0.2001953125, "learning_rate": 0.02, "loss": 1.9171, "step": 360 }, { "epoch": 0.03670704271896488, "grad_norm": 0.2060546875, "learning_rate": 0.02, "loss": 1.9262, "step": 361 }, { "epoch": 0.03680872427774318, "grad_norm": 0.236328125, "learning_rate": 0.02, "loss": 1.9066, "step": 362 }, { "epoch": 0.03691040583652147, "grad_norm": 0.224609375, "learning_rate": 0.02, "loss": 1.8847, "step": 363 }, { "epoch": 0.03701208739529977, "grad_norm": 0.2275390625, "learning_rate": 0.02, "loss": 1.9028, "step": 364 }, { "epoch": 0.037113768954078065, "grad_norm": 0.2236328125, "learning_rate": 0.02, "loss": 1.8746, "step": 365 }, { "epoch": 0.03721545051285636, "grad_norm": 0.2314453125, "learning_rate": 0.02, "loss": 1.8823, "step": 366 }, { "epoch": 0.03731713207163466, "grad_norm": 0.2373046875, "learning_rate": 0.02, "loss": 1.9134, "step": 367 }, { "epoch": 0.03741881363041295, "grad_norm": 0.2314453125, "learning_rate": 0.02, "loss": 1.9073, "step": 368 }, { "epoch": 0.03752049518919125, "grad_norm": 0.23046875, "learning_rate": 0.02, "loss": 1.8681, "step": 369 }, { "epoch": 0.03762217674796955, "grad_norm": 0.25, "learning_rate": 0.02, "loss": 1.9754, "step": 370 }, { "epoch": 0.03772385830674784, "grad_norm": 0.2490234375, "learning_rate": 0.02, "loss": 1.9868, "step": 371 }, { "epoch": 0.037825539865526135, "grad_norm": 0.2392578125, "learning_rate": 0.02, "loss": 1.9113, "step": 372 }, { "epoch": 0.037927221424304436, "grad_norm": 0.22265625, "learning_rate": 0.02, "loss": 1.8791, "step": 373 }, { "epoch": 0.03802890298308273, "grad_norm": 0.2255859375, "learning_rate": 0.02, "loss": 1.9005, "step": 374 }, { "epoch": 0.03813058454186102, "grad_norm": 0.2236328125, "learning_rate": 0.02, "loss": 1.8517, "step": 375 }, { "epoch": 0.038232266100639324, "grad_norm": 0.2255859375, "learning_rate": 0.02, "loss": 1.8787, "step": 376 }, { "epoch": 0.03833394765941762, "grad_norm": 0.2353515625, "learning_rate": 0.02, "loss": 1.8634, "step": 377 }, { "epoch": 0.03843562921819591, "grad_norm": 0.234375, "learning_rate": 0.02, "loss": 1.8804, "step": 378 }, { "epoch": 0.03853731077697421, "grad_norm": 0.228515625, "learning_rate": 0.02, "loss": 1.8802, "step": 379 }, { "epoch": 0.038638992335752506, "grad_norm": 0.224609375, "learning_rate": 0.02, "loss": 1.8983, "step": 380 }, { "epoch": 0.0387406738945308, "grad_norm": 0.232421875, "learning_rate": 0.02, "loss": 1.8915, "step": 381 }, { "epoch": 0.0388423554533091, "grad_norm": 0.2470703125, "learning_rate": 0.02, "loss": 1.9942, "step": 382 }, { "epoch": 0.038944037012087394, "grad_norm": 0.220703125, "learning_rate": 0.02, "loss": 1.852, "step": 383 }, { "epoch": 0.03904571857086569, "grad_norm": 0.2119140625, "learning_rate": 0.02, "loss": 1.8505, "step": 384 }, { "epoch": 0.03914740012964399, "grad_norm": 0.216796875, "learning_rate": 0.02, "loss": 1.8551, "step": 385 }, { "epoch": 0.03924908168842228, "grad_norm": 0.2255859375, "learning_rate": 0.02, "loss": 1.864, "step": 386 }, { "epoch": 0.039350763247200576, "grad_norm": 0.2255859375, "learning_rate": 0.02, "loss": 1.8806, "step": 387 }, { "epoch": 0.03945244480597888, "grad_norm": 0.2177734375, "learning_rate": 0.02, "loss": 1.8932, "step": 388 }, { "epoch": 0.03955412636475717, "grad_norm": 0.220703125, "learning_rate": 0.02, "loss": 1.9155, "step": 389 }, { "epoch": 0.039655807923535465, "grad_norm": 0.201171875, "learning_rate": 0.02, "loss": 1.9133, "step": 390 }, { "epoch": 0.039757489482313765, "grad_norm": 0.203125, "learning_rate": 0.02, "loss": 1.9024, "step": 391 }, { "epoch": 0.03985917104109206, "grad_norm": 0.2080078125, "learning_rate": 0.02, "loss": 1.9514, "step": 392 }, { "epoch": 0.03996085259987035, "grad_norm": 0.1923828125, "learning_rate": 0.02, "loss": 1.8982, "step": 393 }, { "epoch": 0.040062534158648654, "grad_norm": 0.19921875, "learning_rate": 0.02, "loss": 1.914, "step": 394 }, { "epoch": 0.04016421571742695, "grad_norm": 0.21484375, "learning_rate": 0.02, "loss": 1.9033, "step": 395 }, { "epoch": 0.04026589727620524, "grad_norm": 0.2119140625, "learning_rate": 0.02, "loss": 1.8651, "step": 396 }, { "epoch": 0.04036757883498354, "grad_norm": 0.2314453125, "learning_rate": 0.02, "loss": 1.9016, "step": 397 }, { "epoch": 0.040469260393761836, "grad_norm": 0.224609375, "learning_rate": 0.02, "loss": 1.9074, "step": 398 }, { "epoch": 0.04057094195254013, "grad_norm": 0.228515625, "learning_rate": 0.02, "loss": 1.9165, "step": 399 }, { "epoch": 0.04067262351131843, "grad_norm": 0.2109375, "learning_rate": 0.02, "loss": 1.8182, "step": 400 }, { "epoch": 0.040774305070096724, "grad_norm": 0.2119140625, "learning_rate": 0.02, "loss": 1.8673, "step": 401 }, { "epoch": 0.04087598662887502, "grad_norm": 0.2177734375, "learning_rate": 0.02, "loss": 1.8879, "step": 402 }, { "epoch": 0.04097766818765332, "grad_norm": 0.2119140625, "learning_rate": 0.02, "loss": 1.8446, "step": 403 }, { "epoch": 0.04107934974643161, "grad_norm": 0.2080078125, "learning_rate": 0.02, "loss": 1.8926, "step": 404 }, { "epoch": 0.041181031305209906, "grad_norm": 0.1982421875, "learning_rate": 0.02, "loss": 1.8921, "step": 405 }, { "epoch": 0.04128271286398821, "grad_norm": 0.205078125, "learning_rate": 0.02, "loss": 1.8637, "step": 406 }, { "epoch": 0.0413843944227665, "grad_norm": 0.2109375, "learning_rate": 0.02, "loss": 1.937, "step": 407 }, { "epoch": 0.041486075981544794, "grad_norm": 0.2060546875, "learning_rate": 0.02, "loss": 1.9151, "step": 408 }, { "epoch": 0.041587757540323095, "grad_norm": 0.2060546875, "learning_rate": 0.02, "loss": 1.8858, "step": 409 }, { "epoch": 0.04168943909910139, "grad_norm": 0.205078125, "learning_rate": 0.02, "loss": 1.8709, "step": 410 }, { "epoch": 0.04179112065787968, "grad_norm": 0.20703125, "learning_rate": 0.02, "loss": 1.8554, "step": 411 }, { "epoch": 0.04189280221665798, "grad_norm": 0.224609375, "learning_rate": 0.02, "loss": 2.001, "step": 412 }, { "epoch": 0.04199448377543628, "grad_norm": 0.2080078125, "learning_rate": 0.02, "loss": 1.8804, "step": 413 }, { "epoch": 0.04209616533421457, "grad_norm": 0.19921875, "learning_rate": 0.02, "loss": 1.7878, "step": 414 }, { "epoch": 0.04219784689299287, "grad_norm": 0.2001953125, "learning_rate": 0.02, "loss": 1.9061, "step": 415 }, { "epoch": 0.042299528451771165, "grad_norm": 0.224609375, "learning_rate": 0.02, "loss": 1.9463, "step": 416 }, { "epoch": 0.04240121001054946, "grad_norm": 0.21484375, "learning_rate": 0.02, "loss": 1.8504, "step": 417 }, { "epoch": 0.04250289156932776, "grad_norm": 0.208984375, "learning_rate": 0.02, "loss": 1.8812, "step": 418 }, { "epoch": 0.04260457312810605, "grad_norm": 0.21484375, "learning_rate": 0.02, "loss": 1.8442, "step": 419 }, { "epoch": 0.04270625468688435, "grad_norm": 0.2080078125, "learning_rate": 0.02, "loss": 1.9442, "step": 420 }, { "epoch": 0.04280793624566265, "grad_norm": 0.2177734375, "learning_rate": 0.02, "loss": 1.9365, "step": 421 }, { "epoch": 0.04290961780444094, "grad_norm": 0.2119140625, "learning_rate": 0.02, "loss": 1.8515, "step": 422 }, { "epoch": 0.043011299363219235, "grad_norm": 0.220703125, "learning_rate": 0.02, "loss": 1.887, "step": 423 }, { "epoch": 0.043112980921997536, "grad_norm": 0.2255859375, "learning_rate": 0.02, "loss": 1.934, "step": 424 }, { "epoch": 0.04321466248077583, "grad_norm": 0.2353515625, "learning_rate": 0.02, "loss": 1.8694, "step": 425 }, { "epoch": 0.043316344039554124, "grad_norm": 0.2353515625, "learning_rate": 0.02, "loss": 1.8941, "step": 426 }, { "epoch": 0.043418025598332424, "grad_norm": 0.234375, "learning_rate": 0.02, "loss": 1.8909, "step": 427 }, { "epoch": 0.04351970715711072, "grad_norm": 0.2216796875, "learning_rate": 0.02, "loss": 1.9057, "step": 428 }, { "epoch": 0.04362138871588901, "grad_norm": 0.2255859375, "learning_rate": 0.02, "loss": 1.8465, "step": 429 }, { "epoch": 0.04372307027466731, "grad_norm": 0.2109375, "learning_rate": 0.02, "loss": 1.8173, "step": 430 }, { "epoch": 0.043824751833445606, "grad_norm": 0.22265625, "learning_rate": 0.02, "loss": 1.9444, "step": 431 }, { "epoch": 0.0439264333922239, "grad_norm": 0.2041015625, "learning_rate": 0.02, "loss": 1.8878, "step": 432 }, { "epoch": 0.0440281149510022, "grad_norm": 0.2021484375, "learning_rate": 0.02, "loss": 1.9324, "step": 433 }, { "epoch": 0.044129796509780495, "grad_norm": 0.216796875, "learning_rate": 0.02, "loss": 1.8873, "step": 434 }, { "epoch": 0.04423147806855879, "grad_norm": 0.208984375, "learning_rate": 0.02, "loss": 1.9502, "step": 435 }, { "epoch": 0.04433315962733709, "grad_norm": 0.2060546875, "learning_rate": 0.02, "loss": 1.9115, "step": 436 }, { "epoch": 0.04443484118611538, "grad_norm": 0.197265625, "learning_rate": 0.02, "loss": 1.845, "step": 437 }, { "epoch": 0.044536522744893677, "grad_norm": 0.2001953125, "learning_rate": 0.02, "loss": 1.956, "step": 438 }, { "epoch": 0.04463820430367198, "grad_norm": 0.1962890625, "learning_rate": 0.02, "loss": 1.8409, "step": 439 }, { "epoch": 0.04473988586245027, "grad_norm": 0.205078125, "learning_rate": 0.02, "loss": 1.9024, "step": 440 }, { "epoch": 0.044841567421228565, "grad_norm": 0.220703125, "learning_rate": 0.02, "loss": 1.8784, "step": 441 }, { "epoch": 0.044943248980006865, "grad_norm": 0.23046875, "learning_rate": 0.02, "loss": 1.8811, "step": 442 }, { "epoch": 0.04504493053878516, "grad_norm": 0.2431640625, "learning_rate": 0.02, "loss": 1.9148, "step": 443 }, { "epoch": 0.04514661209756345, "grad_norm": 0.234375, "learning_rate": 0.02, "loss": 1.8644, "step": 444 }, { "epoch": 0.045248293656341754, "grad_norm": 0.2255859375, "learning_rate": 0.02, "loss": 1.9102, "step": 445 }, { "epoch": 0.04534997521512005, "grad_norm": 0.228515625, "learning_rate": 0.02, "loss": 1.8952, "step": 446 }, { "epoch": 0.04545165677389834, "grad_norm": 0.25, "learning_rate": 0.02, "loss": 1.9325, "step": 447 }, { "epoch": 0.04555333833267664, "grad_norm": 0.21484375, "learning_rate": 0.02, "loss": 1.868, "step": 448 }, { "epoch": 0.045655019891454936, "grad_norm": 0.2138671875, "learning_rate": 0.02, "loss": 1.9058, "step": 449 }, { "epoch": 0.04575670145023323, "grad_norm": 0.228515625, "learning_rate": 0.02, "loss": 1.9316, "step": 450 }, { "epoch": 0.04585838300901153, "grad_norm": 0.234375, "learning_rate": 0.02, "loss": 1.8801, "step": 451 }, { "epoch": 0.045960064567789824, "grad_norm": 0.2177734375, "learning_rate": 0.02, "loss": 1.8815, "step": 452 }, { "epoch": 0.04606174612656812, "grad_norm": 0.1982421875, "learning_rate": 0.02, "loss": 1.8847, "step": 453 }, { "epoch": 0.04616342768534642, "grad_norm": 0.205078125, "learning_rate": 0.02, "loss": 1.9624, "step": 454 }, { "epoch": 0.04626510924412471, "grad_norm": 0.193359375, "learning_rate": 0.02, "loss": 1.9427, "step": 455 }, { "epoch": 0.046366790802903006, "grad_norm": 0.19921875, "learning_rate": 0.02, "loss": 1.8908, "step": 456 }, { "epoch": 0.04646847236168131, "grad_norm": 0.2138671875, "learning_rate": 0.02, "loss": 1.9354, "step": 457 }, { "epoch": 0.0465701539204596, "grad_norm": 0.2041015625, "learning_rate": 0.02, "loss": 1.8774, "step": 458 }, { "epoch": 0.046671835479237894, "grad_norm": 0.1826171875, "learning_rate": 0.02, "loss": 1.8669, "step": 459 }, { "epoch": 0.046773517038016195, "grad_norm": 0.2060546875, "learning_rate": 0.02, "loss": 1.9547, "step": 460 }, { "epoch": 0.04687519859679449, "grad_norm": 0.181640625, "learning_rate": 0.02, "loss": 1.8196, "step": 461 }, { "epoch": 0.04697688015557278, "grad_norm": 0.201171875, "learning_rate": 0.02, "loss": 1.9181, "step": 462 }, { "epoch": 0.04707856171435108, "grad_norm": 0.1982421875, "learning_rate": 0.02, "loss": 1.8848, "step": 463 }, { "epoch": 0.04718024327312938, "grad_norm": 0.2119140625, "learning_rate": 0.02, "loss": 1.8992, "step": 464 }, { "epoch": 0.04728192483190767, "grad_norm": 0.2138671875, "learning_rate": 0.02, "loss": 1.8566, "step": 465 }, { "epoch": 0.04738360639068597, "grad_norm": 0.20703125, "learning_rate": 0.02, "loss": 1.8819, "step": 466 }, { "epoch": 0.047485287949464265, "grad_norm": 0.1884765625, "learning_rate": 0.02, "loss": 1.8654, "step": 467 }, { "epoch": 0.04758696950824256, "grad_norm": 0.1962890625, "learning_rate": 0.02, "loss": 1.9501, "step": 468 }, { "epoch": 0.04768865106702086, "grad_norm": 0.1904296875, "learning_rate": 0.02, "loss": 1.885, "step": 469 }, { "epoch": 0.04779033262579915, "grad_norm": 0.1943359375, "learning_rate": 0.02, "loss": 1.8242, "step": 470 }, { "epoch": 0.04789201418457745, "grad_norm": 0.19921875, "learning_rate": 0.02, "loss": 1.9175, "step": 471 }, { "epoch": 0.04799369574335575, "grad_norm": 0.205078125, "learning_rate": 0.02, "loss": 1.9177, "step": 472 }, { "epoch": 0.04809537730213404, "grad_norm": 0.1806640625, "learning_rate": 0.02, "loss": 1.8493, "step": 473 }, { "epoch": 0.048197058860912335, "grad_norm": 0.1748046875, "learning_rate": 0.02, "loss": 1.8796, "step": 474 }, { "epoch": 0.048298740419690636, "grad_norm": 0.171875, "learning_rate": 0.02, "loss": 1.8145, "step": 475 }, { "epoch": 0.04840042197846893, "grad_norm": 0.1748046875, "learning_rate": 0.02, "loss": 1.8888, "step": 476 }, { "epoch": 0.048502103537247224, "grad_norm": 0.16796875, "learning_rate": 0.02, "loss": 1.9088, "step": 477 }, { "epoch": 0.048603785096025524, "grad_norm": 0.185546875, "learning_rate": 0.02, "loss": 1.9166, "step": 478 }, { "epoch": 0.04870546665480382, "grad_norm": 0.1884765625, "learning_rate": 0.02, "loss": 1.8738, "step": 479 }, { "epoch": 0.04880714821358211, "grad_norm": 0.19921875, "learning_rate": 0.02, "loss": 1.8741, "step": 480 }, { "epoch": 0.04890882977236041, "grad_norm": 0.21484375, "learning_rate": 0.02, "loss": 1.9206, "step": 481 }, { "epoch": 0.049010511331138706, "grad_norm": 0.2177734375, "learning_rate": 0.02, "loss": 1.898, "step": 482 }, { "epoch": 0.049112192889917, "grad_norm": 0.203125, "learning_rate": 0.02, "loss": 1.9377, "step": 483 }, { "epoch": 0.0492138744486953, "grad_norm": 0.2236328125, "learning_rate": 0.02, "loss": 1.8967, "step": 484 }, { "epoch": 0.049315556007473595, "grad_norm": 0.21484375, "learning_rate": 0.02, "loss": 1.8982, "step": 485 }, { "epoch": 0.04941723756625189, "grad_norm": 0.22265625, "learning_rate": 0.02, "loss": 1.8574, "step": 486 }, { "epoch": 0.04951891912503019, "grad_norm": 0.224609375, "learning_rate": 0.02, "loss": 1.85, "step": 487 }, { "epoch": 0.04962060068380848, "grad_norm": 0.21875, "learning_rate": 0.02, "loss": 1.8345, "step": 488 }, { "epoch": 0.04972228224258678, "grad_norm": 0.2060546875, "learning_rate": 0.02, "loss": 1.8557, "step": 489 }, { "epoch": 0.04982396380136508, "grad_norm": 0.220703125, "learning_rate": 0.02, "loss": 1.8801, "step": 490 }, { "epoch": 0.04992564536014337, "grad_norm": 0.220703125, "learning_rate": 0.02, "loss": 1.8832, "step": 491 }, { "epoch": 0.050027326918921665, "grad_norm": 0.208984375, "learning_rate": 0.02, "loss": 1.9192, "step": 492 }, { "epoch": 0.050129008477699966, "grad_norm": 0.216796875, "learning_rate": 0.02, "loss": 1.9003, "step": 493 }, { "epoch": 0.05023069003647826, "grad_norm": 0.2158203125, "learning_rate": 0.02, "loss": 1.8599, "step": 494 }, { "epoch": 0.05033237159525655, "grad_norm": 0.208984375, "learning_rate": 0.02, "loss": 1.8821, "step": 495 }, { "epoch": 0.050434053154034854, "grad_norm": 0.201171875, "learning_rate": 0.02, "loss": 1.8741, "step": 496 }, { "epoch": 0.05053573471281315, "grad_norm": 0.2216796875, "learning_rate": 0.02, "loss": 1.8974, "step": 497 }, { "epoch": 0.05063741627159144, "grad_norm": 0.2001953125, "learning_rate": 0.02, "loss": 1.8582, "step": 498 }, { "epoch": 0.05073909783036974, "grad_norm": 0.20703125, "learning_rate": 0.02, "loss": 1.8548, "step": 499 }, { "epoch": 0.050840779389148036, "grad_norm": 0.2099609375, "learning_rate": 0.02, "loss": 1.8275, "step": 500 }, { "epoch": 0.05094246094792633, "grad_norm": 0.20703125, "learning_rate": 0.02, "loss": 1.8852, "step": 501 }, { "epoch": 0.05104414250670463, "grad_norm": 0.1953125, "learning_rate": 0.02, "loss": 1.8189, "step": 502 }, { "epoch": 0.051145824065482924, "grad_norm": 0.2001953125, "learning_rate": 0.02, "loss": 1.8342, "step": 503 }, { "epoch": 0.05124750562426122, "grad_norm": 0.2021484375, "learning_rate": 0.02, "loss": 1.8366, "step": 504 }, { "epoch": 0.05134918718303952, "grad_norm": 0.2138671875, "learning_rate": 0.02, "loss": 1.8983, "step": 505 }, { "epoch": 0.05145086874181781, "grad_norm": 0.19921875, "learning_rate": 0.02, "loss": 1.8734, "step": 506 }, { "epoch": 0.051552550300596106, "grad_norm": 0.2001953125, "learning_rate": 0.02, "loss": 1.8972, "step": 507 }, { "epoch": 0.05165423185937441, "grad_norm": 0.205078125, "learning_rate": 0.02, "loss": 1.9273, "step": 508 }, { "epoch": 0.0517559134181527, "grad_norm": 0.201171875, "learning_rate": 0.02, "loss": 1.8707, "step": 509 }, { "epoch": 0.051857594976930994, "grad_norm": 0.208984375, "learning_rate": 0.02, "loss": 1.96, "step": 510 }, { "epoch": 0.051959276535709295, "grad_norm": 0.2080078125, "learning_rate": 0.02, "loss": 1.8575, "step": 511 }, { "epoch": 0.05206095809448759, "grad_norm": 0.205078125, "learning_rate": 0.02, "loss": 1.8271, "step": 512 }, { "epoch": 0.05216263965326588, "grad_norm": 0.20703125, "learning_rate": 0.02, "loss": 1.8723, "step": 513 }, { "epoch": 0.05226432121204418, "grad_norm": 0.205078125, "learning_rate": 0.02, "loss": 1.8493, "step": 514 }, { "epoch": 0.05236600277082248, "grad_norm": 0.2138671875, "learning_rate": 0.02, "loss": 1.8842, "step": 515 }, { "epoch": 0.05246768432960077, "grad_norm": 0.21875, "learning_rate": 0.02, "loss": 1.8515, "step": 516 }, { "epoch": 0.05256936588837907, "grad_norm": 0.2431640625, "learning_rate": 0.02, "loss": 1.8979, "step": 517 }, { "epoch": 0.052671047447157365, "grad_norm": 0.21484375, "learning_rate": 0.02, "loss": 1.9194, "step": 518 }, { "epoch": 0.05277272900593566, "grad_norm": 0.2197265625, "learning_rate": 0.02, "loss": 1.9144, "step": 519 }, { "epoch": 0.05287441056471396, "grad_norm": 0.21484375, "learning_rate": 0.02, "loss": 1.8921, "step": 520 }, { "epoch": 0.052976092123492254, "grad_norm": 0.19921875, "learning_rate": 0.02, "loss": 1.8319, "step": 521 }, { "epoch": 0.05307777368227055, "grad_norm": 0.197265625, "learning_rate": 0.02, "loss": 1.8916, "step": 522 }, { "epoch": 0.05317945524104885, "grad_norm": 0.201171875, "learning_rate": 0.02, "loss": 1.8827, "step": 523 }, { "epoch": 0.05328113679982714, "grad_norm": 0.203125, "learning_rate": 0.02, "loss": 1.8601, "step": 524 }, { "epoch": 0.053382818358605436, "grad_norm": 0.203125, "learning_rate": 0.02, "loss": 1.8542, "step": 525 }, { "epoch": 0.053484499917383736, "grad_norm": 0.228515625, "learning_rate": 0.02, "loss": 1.8244, "step": 526 }, { "epoch": 0.05358618147616203, "grad_norm": 0.205078125, "learning_rate": 0.02, "loss": 1.8316, "step": 527 }, { "epoch": 0.053687863034940324, "grad_norm": 0.1865234375, "learning_rate": 0.02, "loss": 1.8592, "step": 528 }, { "epoch": 0.053789544593718625, "grad_norm": 0.193359375, "learning_rate": 0.02, "loss": 1.8283, "step": 529 }, { "epoch": 0.05389122615249692, "grad_norm": 0.1962890625, "learning_rate": 0.02, "loss": 1.926, "step": 530 }, { "epoch": 0.05399290771127521, "grad_norm": 0.205078125, "learning_rate": 0.02, "loss": 1.8904, "step": 531 }, { "epoch": 0.05409458927005351, "grad_norm": 0.201171875, "learning_rate": 0.02, "loss": 1.9216, "step": 532 }, { "epoch": 0.054196270828831807, "grad_norm": 0.197265625, "learning_rate": 0.02, "loss": 1.8909, "step": 533 }, { "epoch": 0.0542979523876101, "grad_norm": 0.1787109375, "learning_rate": 0.02, "loss": 1.8765, "step": 534 }, { "epoch": 0.0543996339463884, "grad_norm": 0.1806640625, "learning_rate": 0.02, "loss": 1.8119, "step": 535 }, { "epoch": 0.054501315505166695, "grad_norm": 0.181640625, "learning_rate": 0.02, "loss": 1.8851, "step": 536 }, { "epoch": 0.05460299706394499, "grad_norm": 0.1865234375, "learning_rate": 0.02, "loss": 1.8522, "step": 537 }, { "epoch": 0.05470467862272329, "grad_norm": 0.1728515625, "learning_rate": 0.02, "loss": 1.8735, "step": 538 }, { "epoch": 0.05480636018150158, "grad_norm": 0.16796875, "learning_rate": 0.02, "loss": 1.8601, "step": 539 }, { "epoch": 0.05490804174027988, "grad_norm": 0.158203125, "learning_rate": 0.02, "loss": 1.8188, "step": 540 }, { "epoch": 0.05500972329905818, "grad_norm": 0.177734375, "learning_rate": 0.02, "loss": 1.8671, "step": 541 }, { "epoch": 0.05511140485783647, "grad_norm": 0.169921875, "learning_rate": 0.02, "loss": 1.9311, "step": 542 }, { "epoch": 0.055213086416614765, "grad_norm": 0.1748046875, "learning_rate": 0.02, "loss": 1.9234, "step": 543 }, { "epoch": 0.055314767975393066, "grad_norm": 0.1767578125, "learning_rate": 0.02, "loss": 1.864, "step": 544 }, { "epoch": 0.05541644953417136, "grad_norm": 0.1962890625, "learning_rate": 0.02, "loss": 1.9139, "step": 545 }, { "epoch": 0.05551813109294965, "grad_norm": 0.1875, "learning_rate": 0.02, "loss": 1.859, "step": 546 }, { "epoch": 0.055619812651727954, "grad_norm": 0.1923828125, "learning_rate": 0.02, "loss": 1.8353, "step": 547 }, { "epoch": 0.05572149421050625, "grad_norm": 0.1865234375, "learning_rate": 0.02, "loss": 1.9553, "step": 548 }, { "epoch": 0.05582317576928454, "grad_norm": 0.181640625, "learning_rate": 0.02, "loss": 1.8461, "step": 549 }, { "epoch": 0.05592485732806284, "grad_norm": 0.18359375, "learning_rate": 0.02, "loss": 1.84, "step": 550 }, { "epoch": 0.056026538886841136, "grad_norm": 0.2021484375, "learning_rate": 0.02, "loss": 1.8416, "step": 551 }, { "epoch": 0.05612822044561943, "grad_norm": 0.19140625, "learning_rate": 0.02, "loss": 1.8286, "step": 552 }, { "epoch": 0.05622990200439773, "grad_norm": 0.2021484375, "learning_rate": 0.02, "loss": 1.854, "step": 553 }, { "epoch": 0.056331583563176024, "grad_norm": 0.19921875, "learning_rate": 0.02, "loss": 1.8725, "step": 554 }, { "epoch": 0.05643326512195432, "grad_norm": 0.1953125, "learning_rate": 0.02, "loss": 1.8643, "step": 555 }, { "epoch": 0.05653494668073262, "grad_norm": 0.1982421875, "learning_rate": 0.02, "loss": 1.8885, "step": 556 }, { "epoch": 0.05663662823951091, "grad_norm": 0.185546875, "learning_rate": 0.02, "loss": 1.8842, "step": 557 }, { "epoch": 0.056738309798289206, "grad_norm": 0.1953125, "learning_rate": 0.02, "loss": 1.8473, "step": 558 }, { "epoch": 0.05683999135706751, "grad_norm": 0.1875, "learning_rate": 0.02, "loss": 1.7754, "step": 559 }, { "epoch": 0.0569416729158458, "grad_norm": 0.1923828125, "learning_rate": 0.02, "loss": 1.8843, "step": 560 }, { "epoch": 0.057043354474624094, "grad_norm": 0.185546875, "learning_rate": 0.02, "loss": 1.8368, "step": 561 }, { "epoch": 0.057145036033402395, "grad_norm": 0.1982421875, "learning_rate": 0.02, "loss": 1.8479, "step": 562 }, { "epoch": 0.05724671759218069, "grad_norm": 0.212890625, "learning_rate": 0.02, "loss": 1.9351, "step": 563 }, { "epoch": 0.05734839915095898, "grad_norm": 0.2109375, "learning_rate": 0.02, "loss": 1.8939, "step": 564 }, { "epoch": 0.05745008070973728, "grad_norm": 0.1904296875, "learning_rate": 0.02, "loss": 1.8028, "step": 565 }, { "epoch": 0.05755176226851558, "grad_norm": 0.201171875, "learning_rate": 0.02, "loss": 1.8764, "step": 566 }, { "epoch": 0.05765344382729387, "grad_norm": 0.1923828125, "learning_rate": 0.02, "loss": 1.8186, "step": 567 }, { "epoch": 0.05775512538607217, "grad_norm": 0.1943359375, "learning_rate": 0.02, "loss": 1.882, "step": 568 }, { "epoch": 0.057856806944850465, "grad_norm": 0.1943359375, "learning_rate": 0.02, "loss": 1.8457, "step": 569 }, { "epoch": 0.05795848850362876, "grad_norm": 0.1845703125, "learning_rate": 0.02, "loss": 1.8292, "step": 570 }, { "epoch": 0.05806017006240706, "grad_norm": 0.1884765625, "learning_rate": 0.02, "loss": 1.8868, "step": 571 }, { "epoch": 0.058161851621185354, "grad_norm": 0.1787109375, "learning_rate": 0.02, "loss": 1.851, "step": 572 }, { "epoch": 0.05826353317996365, "grad_norm": 0.1904296875, "learning_rate": 0.02, "loss": 1.8639, "step": 573 }, { "epoch": 0.05836521473874195, "grad_norm": 0.1787109375, "learning_rate": 0.02, "loss": 1.7678, "step": 574 }, { "epoch": 0.05846689629752024, "grad_norm": 0.1953125, "learning_rate": 0.02, "loss": 1.8709, "step": 575 }, { "epoch": 0.058568577856298536, "grad_norm": 0.18359375, "learning_rate": 0.02, "loss": 1.8733, "step": 576 }, { "epoch": 0.058670259415076836, "grad_norm": 0.1962890625, "learning_rate": 0.02, "loss": 1.8186, "step": 577 }, { "epoch": 0.05877194097385513, "grad_norm": 0.2041015625, "learning_rate": 0.02, "loss": 1.9111, "step": 578 }, { "epoch": 0.058873622532633424, "grad_norm": 0.2080078125, "learning_rate": 0.02, "loss": 1.8715, "step": 579 }, { "epoch": 0.058975304091411725, "grad_norm": 0.2001953125, "learning_rate": 0.02, "loss": 1.8842, "step": 580 }, { "epoch": 0.05907698565019002, "grad_norm": 0.2080078125, "learning_rate": 0.02, "loss": 1.8512, "step": 581 }, { "epoch": 0.05917866720896831, "grad_norm": 0.22265625, "learning_rate": 0.02, "loss": 1.9205, "step": 582 }, { "epoch": 0.05928034876774661, "grad_norm": 0.212890625, "learning_rate": 0.02, "loss": 1.8521, "step": 583 }, { "epoch": 0.05938203032652491, "grad_norm": 0.2158203125, "learning_rate": 0.02, "loss": 1.819, "step": 584 }, { "epoch": 0.0594837118853032, "grad_norm": 0.212890625, "learning_rate": 0.02, "loss": 1.8738, "step": 585 }, { "epoch": 0.0595853934440815, "grad_norm": 0.224609375, "learning_rate": 0.02, "loss": 1.8838, "step": 586 }, { "epoch": 0.059687075002859795, "grad_norm": 0.216796875, "learning_rate": 0.02, "loss": 1.8294, "step": 587 }, { "epoch": 0.05978875656163809, "grad_norm": 0.212890625, "learning_rate": 0.02, "loss": 1.8372, "step": 588 }, { "epoch": 0.05989043812041639, "grad_norm": 0.224609375, "learning_rate": 0.02, "loss": 1.8074, "step": 589 }, { "epoch": 0.05999211967919468, "grad_norm": 0.2080078125, "learning_rate": 0.02, "loss": 1.7526, "step": 590 }, { "epoch": 0.06009380123797298, "grad_norm": 0.20703125, "learning_rate": 0.02, "loss": 1.8414, "step": 591 }, { "epoch": 0.06019548279675128, "grad_norm": 0.2109375, "learning_rate": 0.02, "loss": 1.8939, "step": 592 }, { "epoch": 0.06029716435552957, "grad_norm": 0.1904296875, "learning_rate": 0.02, "loss": 1.8106, "step": 593 }, { "epoch": 0.060398845914307865, "grad_norm": 0.1865234375, "learning_rate": 0.02, "loss": 1.8324, "step": 594 }, { "epoch": 0.060500527473086166, "grad_norm": 0.1904296875, "learning_rate": 0.02, "loss": 1.854, "step": 595 }, { "epoch": 0.06060220903186446, "grad_norm": 0.1845703125, "learning_rate": 0.02, "loss": 1.8131, "step": 596 }, { "epoch": 0.06070389059064275, "grad_norm": 0.1865234375, "learning_rate": 0.02, "loss": 1.8018, "step": 597 }, { "epoch": 0.06080557214942105, "grad_norm": 0.1787109375, "learning_rate": 0.02, "loss": 1.8308, "step": 598 }, { "epoch": 0.06090725370819935, "grad_norm": 0.1865234375, "learning_rate": 0.02, "loss": 1.7933, "step": 599 }, { "epoch": 0.06100893526697764, "grad_norm": 0.171875, "learning_rate": 0.02, "loss": 1.808, "step": 600 }, { "epoch": 0.061110616825755935, "grad_norm": 0.1806640625, "learning_rate": 0.02, "loss": 1.8487, "step": 601 }, { "epoch": 0.061212298384534236, "grad_norm": 0.1904296875, "learning_rate": 0.02, "loss": 1.9196, "step": 602 }, { "epoch": 0.06131397994331253, "grad_norm": 0.1953125, "learning_rate": 0.02, "loss": 1.8699, "step": 603 }, { "epoch": 0.061415661502090824, "grad_norm": 0.1884765625, "learning_rate": 0.02, "loss": 1.8149, "step": 604 }, { "epoch": 0.061517343060869124, "grad_norm": 0.1904296875, "learning_rate": 0.02, "loss": 1.8576, "step": 605 }, { "epoch": 0.06161902461964742, "grad_norm": 0.1904296875, "learning_rate": 0.02, "loss": 1.9418, "step": 606 }, { "epoch": 0.06172070617842571, "grad_norm": 0.1865234375, "learning_rate": 0.02, "loss": 1.8505, "step": 607 }, { "epoch": 0.06182238773720401, "grad_norm": 0.1865234375, "learning_rate": 0.02, "loss": 1.798, "step": 608 }, { "epoch": 0.061924069295982306, "grad_norm": 0.185546875, "learning_rate": 0.02, "loss": 1.8397, "step": 609 }, { "epoch": 0.0620257508547606, "grad_norm": 0.181640625, "learning_rate": 0.02, "loss": 1.8976, "step": 610 }, { "epoch": 0.0621274324135389, "grad_norm": 0.1806640625, "learning_rate": 0.02, "loss": 1.8797, "step": 611 }, { "epoch": 0.062229113972317195, "grad_norm": 0.1748046875, "learning_rate": 0.02, "loss": 1.8658, "step": 612 }, { "epoch": 0.06233079553109549, "grad_norm": 0.1728515625, "learning_rate": 0.02, "loss": 1.8707, "step": 613 }, { "epoch": 0.06243247708987379, "grad_norm": 0.1845703125, "learning_rate": 0.02, "loss": 1.8478, "step": 614 }, { "epoch": 0.06253415864865208, "grad_norm": 0.18359375, "learning_rate": 0.02, "loss": 1.8331, "step": 615 }, { "epoch": 0.06263584020743038, "grad_norm": 0.19921875, "learning_rate": 0.02, "loss": 1.8719, "step": 616 }, { "epoch": 0.06273752176620867, "grad_norm": 0.2080078125, "learning_rate": 0.02, "loss": 1.9068, "step": 617 }, { "epoch": 0.06283920332498698, "grad_norm": 0.19140625, "learning_rate": 0.02, "loss": 1.8235, "step": 618 }, { "epoch": 0.06294088488376527, "grad_norm": 0.177734375, "learning_rate": 0.02, "loss": 1.8041, "step": 619 }, { "epoch": 0.06304256644254357, "grad_norm": 0.169921875, "learning_rate": 0.02, "loss": 1.8535, "step": 620 }, { "epoch": 0.06314424800132186, "grad_norm": 0.1689453125, "learning_rate": 0.02, "loss": 1.7843, "step": 621 }, { "epoch": 0.06324592956010015, "grad_norm": 0.181640625, "learning_rate": 0.02, "loss": 1.8573, "step": 622 }, { "epoch": 0.06334761111887845, "grad_norm": 0.193359375, "learning_rate": 0.02, "loss": 1.8991, "step": 623 }, { "epoch": 0.06344929267765675, "grad_norm": 0.1865234375, "learning_rate": 0.02, "loss": 1.8153, "step": 624 }, { "epoch": 0.06355097423643505, "grad_norm": 0.181640625, "learning_rate": 0.02, "loss": 1.7881, "step": 625 }, { "epoch": 0.06365265579521334, "grad_norm": 0.189453125, "learning_rate": 0.02, "loss": 1.8484, "step": 626 }, { "epoch": 0.06375433735399164, "grad_norm": 0.177734375, "learning_rate": 0.02, "loss": 1.8426, "step": 627 }, { "epoch": 0.06385601891276993, "grad_norm": 0.1728515625, "learning_rate": 0.02, "loss": 1.8266, "step": 628 }, { "epoch": 0.06395770047154822, "grad_norm": 0.19921875, "learning_rate": 0.02, "loss": 1.8731, "step": 629 }, { "epoch": 0.06405938203032653, "grad_norm": 0.193359375, "learning_rate": 0.02, "loss": 1.8761, "step": 630 }, { "epoch": 0.06416106358910482, "grad_norm": 0.18359375, "learning_rate": 0.02, "loss": 1.8348, "step": 631 }, { "epoch": 0.06426274514788312, "grad_norm": 0.1884765625, "learning_rate": 0.02, "loss": 1.8366, "step": 632 }, { "epoch": 0.06436442670666141, "grad_norm": 0.197265625, "learning_rate": 0.02, "loss": 1.8757, "step": 633 }, { "epoch": 0.0644661082654397, "grad_norm": 0.1884765625, "learning_rate": 0.02, "loss": 1.8282, "step": 634 }, { "epoch": 0.064567789824218, "grad_norm": 0.19140625, "learning_rate": 0.02, "loss": 1.8026, "step": 635 }, { "epoch": 0.06466947138299631, "grad_norm": 0.2001953125, "learning_rate": 0.02, "loss": 1.8798, "step": 636 }, { "epoch": 0.0647711529417746, "grad_norm": 0.1865234375, "learning_rate": 0.02, "loss": 1.763, "step": 637 }, { "epoch": 0.0648728345005529, "grad_norm": 0.18359375, "learning_rate": 0.02, "loss": 1.7795, "step": 638 }, { "epoch": 0.06497451605933119, "grad_norm": 0.171875, "learning_rate": 0.02, "loss": 1.8312, "step": 639 }, { "epoch": 0.06507619761810948, "grad_norm": 0.1923828125, "learning_rate": 0.02, "loss": 1.8387, "step": 640 }, { "epoch": 0.06517787917688778, "grad_norm": 0.197265625, "learning_rate": 0.02, "loss": 1.8017, "step": 641 }, { "epoch": 0.06527956073566608, "grad_norm": 0.1826171875, "learning_rate": 0.02, "loss": 1.8566, "step": 642 }, { "epoch": 0.06538124229444438, "grad_norm": 0.1982421875, "learning_rate": 0.02, "loss": 1.7853, "step": 643 }, { "epoch": 0.06548292385322267, "grad_norm": 0.185546875, "learning_rate": 0.02, "loss": 1.8474, "step": 644 }, { "epoch": 0.06558460541200097, "grad_norm": 0.177734375, "learning_rate": 0.02, "loss": 1.8743, "step": 645 }, { "epoch": 0.06568628697077926, "grad_norm": 0.185546875, "learning_rate": 0.02, "loss": 1.8229, "step": 646 }, { "epoch": 0.06578796852955755, "grad_norm": 0.189453125, "learning_rate": 0.02, "loss": 1.8764, "step": 647 }, { "epoch": 0.06588965008833586, "grad_norm": 0.203125, "learning_rate": 0.02, "loss": 1.8945, "step": 648 }, { "epoch": 0.06599133164711415, "grad_norm": 0.193359375, "learning_rate": 0.02, "loss": 1.824, "step": 649 }, { "epoch": 0.06609301320589245, "grad_norm": 0.1953125, "learning_rate": 0.02, "loss": 1.7852, "step": 650 }, { "epoch": 0.06619469476467074, "grad_norm": 0.1875, "learning_rate": 0.02, "loss": 1.8277, "step": 651 }, { "epoch": 0.06629637632344904, "grad_norm": 0.2060546875, "learning_rate": 0.02, "loss": 1.8476, "step": 652 }, { "epoch": 0.06639805788222733, "grad_norm": 0.201171875, "learning_rate": 0.02, "loss": 1.8818, "step": 653 }, { "epoch": 0.06649973944100564, "grad_norm": 0.193359375, "learning_rate": 0.02, "loss": 1.8788, "step": 654 }, { "epoch": 0.06660142099978393, "grad_norm": 0.1923828125, "learning_rate": 0.02, "loss": 1.8708, "step": 655 }, { "epoch": 0.06670310255856222, "grad_norm": 0.208984375, "learning_rate": 0.02, "loss": 1.8532, "step": 656 }, { "epoch": 0.06680478411734052, "grad_norm": 0.1904296875, "learning_rate": 0.02, "loss": 1.8254, "step": 657 }, { "epoch": 0.06690646567611881, "grad_norm": 0.193359375, "learning_rate": 0.02, "loss": 1.8561, "step": 658 }, { "epoch": 0.0670081472348971, "grad_norm": 0.1875, "learning_rate": 0.02, "loss": 1.8459, "step": 659 }, { "epoch": 0.06710982879367541, "grad_norm": 0.1806640625, "learning_rate": 0.02, "loss": 1.7987, "step": 660 }, { "epoch": 0.06721151035245371, "grad_norm": 0.185546875, "learning_rate": 0.02, "loss": 1.8576, "step": 661 }, { "epoch": 0.067313191911232, "grad_norm": 0.1796875, "learning_rate": 0.02, "loss": 1.8265, "step": 662 }, { "epoch": 0.0674148734700103, "grad_norm": 0.1787109375, "learning_rate": 0.02, "loss": 1.8151, "step": 663 }, { "epoch": 0.06751655502878859, "grad_norm": 0.1904296875, "learning_rate": 0.02, "loss": 1.8283, "step": 664 }, { "epoch": 0.06761823658756688, "grad_norm": 0.201171875, "learning_rate": 0.02, "loss": 1.8181, "step": 665 }, { "epoch": 0.06771991814634519, "grad_norm": 0.189453125, "learning_rate": 0.02, "loss": 1.8193, "step": 666 }, { "epoch": 0.06782159970512348, "grad_norm": 0.197265625, "learning_rate": 0.02, "loss": 1.8483, "step": 667 }, { "epoch": 0.06792328126390178, "grad_norm": 0.1845703125, "learning_rate": 0.02, "loss": 1.8552, "step": 668 }, { "epoch": 0.06802496282268007, "grad_norm": 0.1806640625, "learning_rate": 0.02, "loss": 1.8108, "step": 669 }, { "epoch": 0.06812664438145836, "grad_norm": 0.1875, "learning_rate": 0.02, "loss": 1.8038, "step": 670 }, { "epoch": 0.06822832594023666, "grad_norm": 0.1923828125, "learning_rate": 0.02, "loss": 1.8213, "step": 671 }, { "epoch": 0.06833000749901497, "grad_norm": 0.19921875, "learning_rate": 0.02, "loss": 1.8612, "step": 672 }, { "epoch": 0.06843168905779326, "grad_norm": 0.1923828125, "learning_rate": 0.02, "loss": 1.8215, "step": 673 }, { "epoch": 0.06853337061657155, "grad_norm": 0.2001953125, "learning_rate": 0.02, "loss": 1.8086, "step": 674 }, { "epoch": 0.06863505217534985, "grad_norm": 0.20703125, "learning_rate": 0.02, "loss": 1.8234, "step": 675 }, { "epoch": 0.06873673373412814, "grad_norm": 0.2021484375, "learning_rate": 0.02, "loss": 1.7992, "step": 676 }, { "epoch": 0.06883841529290644, "grad_norm": 0.197265625, "learning_rate": 0.02, "loss": 1.8377, "step": 677 }, { "epoch": 0.06894009685168474, "grad_norm": 0.19921875, "learning_rate": 0.02, "loss": 1.8476, "step": 678 }, { "epoch": 0.06904177841046304, "grad_norm": 0.1865234375, "learning_rate": 0.02, "loss": 1.7821, "step": 679 }, { "epoch": 0.06914345996924133, "grad_norm": 0.1875, "learning_rate": 0.02, "loss": 1.8701, "step": 680 }, { "epoch": 0.06924514152801962, "grad_norm": 0.17578125, "learning_rate": 0.02, "loss": 1.8449, "step": 681 }, { "epoch": 0.06934682308679792, "grad_norm": 0.1875, "learning_rate": 0.02, "loss": 1.9272, "step": 682 }, { "epoch": 0.06944850464557621, "grad_norm": 0.189453125, "learning_rate": 0.02, "loss": 1.7936, "step": 683 }, { "epoch": 0.06955018620435452, "grad_norm": 0.185546875, "learning_rate": 0.02, "loss": 1.8335, "step": 684 }, { "epoch": 0.06965186776313281, "grad_norm": 0.1728515625, "learning_rate": 0.02, "loss": 1.8328, "step": 685 }, { "epoch": 0.0697535493219111, "grad_norm": 0.1826171875, "learning_rate": 0.02, "loss": 1.8895, "step": 686 }, { "epoch": 0.0698552308806894, "grad_norm": 0.16796875, "learning_rate": 0.02, "loss": 1.7895, "step": 687 }, { "epoch": 0.0699569124394677, "grad_norm": 0.181640625, "learning_rate": 0.02, "loss": 1.8795, "step": 688 }, { "epoch": 0.07005859399824599, "grad_norm": 0.1787109375, "learning_rate": 0.02, "loss": 1.8543, "step": 689 }, { "epoch": 0.0701602755570243, "grad_norm": 0.1650390625, "learning_rate": 0.02, "loss": 1.733, "step": 690 }, { "epoch": 0.07026195711580259, "grad_norm": 0.166015625, "learning_rate": 0.02, "loss": 1.8614, "step": 691 }, { "epoch": 0.07036363867458088, "grad_norm": 0.171875, "learning_rate": 0.02, "loss": 1.7814, "step": 692 }, { "epoch": 0.07046532023335918, "grad_norm": 0.173828125, "learning_rate": 0.02, "loss": 1.8387, "step": 693 }, { "epoch": 0.07056700179213747, "grad_norm": 0.17578125, "learning_rate": 0.02, "loss": 1.8363, "step": 694 }, { "epoch": 0.07066868335091576, "grad_norm": 0.1845703125, "learning_rate": 0.02, "loss": 1.9104, "step": 695 }, { "epoch": 0.07077036490969407, "grad_norm": 0.1787109375, "learning_rate": 0.02, "loss": 1.7387, "step": 696 }, { "epoch": 0.07087204646847237, "grad_norm": 0.1982421875, "learning_rate": 0.02, "loss": 1.9323, "step": 697 }, { "epoch": 0.07097372802725066, "grad_norm": 0.1806640625, "learning_rate": 0.02, "loss": 1.8411, "step": 698 }, { "epoch": 0.07107540958602895, "grad_norm": 0.181640625, "learning_rate": 0.02, "loss": 1.8653, "step": 699 }, { "epoch": 0.07117709114480725, "grad_norm": 0.189453125, "learning_rate": 0.02, "loss": 1.7791, "step": 700 }, { "epoch": 0.07127877270358554, "grad_norm": 0.1904296875, "learning_rate": 0.02, "loss": 1.7871, "step": 701 }, { "epoch": 0.07138045426236385, "grad_norm": 0.19140625, "learning_rate": 0.02, "loss": 1.8244, "step": 702 }, { "epoch": 0.07148213582114214, "grad_norm": 0.1806640625, "learning_rate": 0.02, "loss": 1.7784, "step": 703 }, { "epoch": 0.07158381737992044, "grad_norm": 0.1904296875, "learning_rate": 0.02, "loss": 1.8928, "step": 704 }, { "epoch": 0.07168549893869873, "grad_norm": 0.177734375, "learning_rate": 0.02, "loss": 1.8151, "step": 705 }, { "epoch": 0.07178718049747702, "grad_norm": 0.185546875, "learning_rate": 0.02, "loss": 1.8431, "step": 706 }, { "epoch": 0.07188886205625532, "grad_norm": 0.181640625, "learning_rate": 0.02, "loss": 1.8198, "step": 707 }, { "epoch": 0.07199054361503363, "grad_norm": 0.1884765625, "learning_rate": 0.02, "loss": 1.8371, "step": 708 }, { "epoch": 0.07209222517381192, "grad_norm": 0.1826171875, "learning_rate": 0.02, "loss": 1.8308, "step": 709 }, { "epoch": 0.07219390673259021, "grad_norm": 0.1875, "learning_rate": 0.02, "loss": 1.835, "step": 710 }, { "epoch": 0.0722955882913685, "grad_norm": 0.1865234375, "learning_rate": 0.02, "loss": 1.8718, "step": 711 }, { "epoch": 0.0723972698501468, "grad_norm": 0.1865234375, "learning_rate": 0.02, "loss": 1.7849, "step": 712 }, { "epoch": 0.0724989514089251, "grad_norm": 0.1884765625, "learning_rate": 0.02, "loss": 1.7968, "step": 713 }, { "epoch": 0.0726006329677034, "grad_norm": 0.1962890625, "learning_rate": 0.02, "loss": 1.7529, "step": 714 }, { "epoch": 0.0727023145264817, "grad_norm": 0.1923828125, "learning_rate": 0.02, "loss": 1.8578, "step": 715 }, { "epoch": 0.07280399608525999, "grad_norm": 0.203125, "learning_rate": 0.02, "loss": 1.8665, "step": 716 }, { "epoch": 0.07290567764403828, "grad_norm": 0.2138671875, "learning_rate": 0.02, "loss": 1.8932, "step": 717 }, { "epoch": 0.07300735920281658, "grad_norm": 0.2060546875, "learning_rate": 0.02, "loss": 1.7948, "step": 718 }, { "epoch": 0.07310904076159487, "grad_norm": 0.1982421875, "learning_rate": 0.02, "loss": 1.8575, "step": 719 }, { "epoch": 0.07321072232037316, "grad_norm": 0.2080078125, "learning_rate": 0.02, "loss": 1.808, "step": 720 }, { "epoch": 0.07331240387915147, "grad_norm": 0.220703125, "learning_rate": 0.02, "loss": 1.8696, "step": 721 }, { "epoch": 0.07341408543792977, "grad_norm": 0.20703125, "learning_rate": 0.02, "loss": 1.8024, "step": 722 }, { "epoch": 0.07351576699670806, "grad_norm": 0.1962890625, "learning_rate": 0.02, "loss": 1.7953, "step": 723 }, { "epoch": 0.07361744855548635, "grad_norm": 0.18359375, "learning_rate": 0.02, "loss": 1.7472, "step": 724 }, { "epoch": 0.07371913011426465, "grad_norm": 0.1845703125, "learning_rate": 0.02, "loss": 1.8354, "step": 725 }, { "epoch": 0.07382081167304294, "grad_norm": 0.1943359375, "learning_rate": 0.02, "loss": 1.7947, "step": 726 }, { "epoch": 0.07392249323182125, "grad_norm": 0.1884765625, "learning_rate": 0.02, "loss": 1.8359, "step": 727 }, { "epoch": 0.07402417479059954, "grad_norm": 0.1748046875, "learning_rate": 0.02, "loss": 1.7698, "step": 728 }, { "epoch": 0.07412585634937784, "grad_norm": 0.1923828125, "learning_rate": 0.02, "loss": 1.8036, "step": 729 }, { "epoch": 0.07422753790815613, "grad_norm": 0.197265625, "learning_rate": 0.02, "loss": 1.8761, "step": 730 }, { "epoch": 0.07432921946693442, "grad_norm": 0.189453125, "learning_rate": 0.02, "loss": 1.8475, "step": 731 }, { "epoch": 0.07443090102571272, "grad_norm": 0.1904296875, "learning_rate": 0.02, "loss": 1.819, "step": 732 }, { "epoch": 0.07453258258449103, "grad_norm": 0.1884765625, "learning_rate": 0.02, "loss": 1.8036, "step": 733 }, { "epoch": 0.07463426414326932, "grad_norm": 0.1962890625, "learning_rate": 0.02, "loss": 1.8351, "step": 734 }, { "epoch": 0.07473594570204761, "grad_norm": 0.171875, "learning_rate": 0.02, "loss": 1.775, "step": 735 }, { "epoch": 0.0748376272608259, "grad_norm": 0.1845703125, "learning_rate": 0.02, "loss": 1.8744, "step": 736 }, { "epoch": 0.0749393088196042, "grad_norm": 0.1796875, "learning_rate": 0.02, "loss": 1.8185, "step": 737 }, { "epoch": 0.0750409903783825, "grad_norm": 0.1806640625, "learning_rate": 0.02, "loss": 1.8726, "step": 738 }, { "epoch": 0.0751426719371608, "grad_norm": 0.16796875, "learning_rate": 0.02, "loss": 1.7476, "step": 739 }, { "epoch": 0.0752443534959391, "grad_norm": 0.1748046875, "learning_rate": 0.02, "loss": 1.8592, "step": 740 }, { "epoch": 0.07534603505471739, "grad_norm": 0.16796875, "learning_rate": 0.02, "loss": 1.8928, "step": 741 }, { "epoch": 0.07544771661349568, "grad_norm": 0.18359375, "learning_rate": 0.02, "loss": 1.8444, "step": 742 }, { "epoch": 0.07554939817227398, "grad_norm": 0.1640625, "learning_rate": 0.02, "loss": 1.7664, "step": 743 }, { "epoch": 0.07565107973105227, "grad_norm": 0.173828125, "learning_rate": 0.02, "loss": 1.8221, "step": 744 }, { "epoch": 0.07575276128983058, "grad_norm": 0.1669921875, "learning_rate": 0.02, "loss": 1.8262, "step": 745 }, { "epoch": 0.07585444284860887, "grad_norm": 0.171875, "learning_rate": 0.02, "loss": 1.896, "step": 746 }, { "epoch": 0.07595612440738717, "grad_norm": 0.1689453125, "learning_rate": 0.02, "loss": 1.7998, "step": 747 }, { "epoch": 0.07605780596616546, "grad_norm": 0.16796875, "learning_rate": 0.02, "loss": 1.8514, "step": 748 }, { "epoch": 0.07615948752494375, "grad_norm": 0.1640625, "learning_rate": 0.02, "loss": 1.7885, "step": 749 }, { "epoch": 0.07626116908372205, "grad_norm": 0.1806640625, "learning_rate": 0.02, "loss": 1.8944, "step": 750 }, { "epoch": 0.07636285064250035, "grad_norm": 0.17578125, "learning_rate": 0.02, "loss": 1.8745, "step": 751 }, { "epoch": 0.07646453220127865, "grad_norm": 0.1806640625, "learning_rate": 0.02, "loss": 1.8889, "step": 752 }, { "epoch": 0.07656621376005694, "grad_norm": 0.1708984375, "learning_rate": 0.02, "loss": 1.7657, "step": 753 }, { "epoch": 0.07666789531883524, "grad_norm": 0.1806640625, "learning_rate": 0.02, "loss": 1.783, "step": 754 }, { "epoch": 0.07676957687761353, "grad_norm": 0.181640625, "learning_rate": 0.02, "loss": 1.8282, "step": 755 }, { "epoch": 0.07687125843639182, "grad_norm": 0.1796875, "learning_rate": 0.02, "loss": 1.8033, "step": 756 }, { "epoch": 0.07697293999517013, "grad_norm": 0.171875, "learning_rate": 0.02, "loss": 1.7795, "step": 757 }, { "epoch": 0.07707462155394842, "grad_norm": 0.185546875, "learning_rate": 0.02, "loss": 1.8305, "step": 758 }, { "epoch": 0.07717630311272672, "grad_norm": 0.1845703125, "learning_rate": 0.02, "loss": 1.8464, "step": 759 }, { "epoch": 0.07727798467150501, "grad_norm": 0.1865234375, "learning_rate": 0.02, "loss": 1.8401, "step": 760 }, { "epoch": 0.0773796662302833, "grad_norm": 0.1875, "learning_rate": 0.02, "loss": 1.8621, "step": 761 }, { "epoch": 0.0774813477890616, "grad_norm": 0.189453125, "learning_rate": 0.02, "loss": 1.7909, "step": 762 }, { "epoch": 0.07758302934783991, "grad_norm": 0.1845703125, "learning_rate": 0.02, "loss": 1.8341, "step": 763 }, { "epoch": 0.0776847109066182, "grad_norm": 0.19140625, "learning_rate": 0.02, "loss": 1.871, "step": 764 }, { "epoch": 0.0777863924653965, "grad_norm": 0.1943359375, "learning_rate": 0.02, "loss": 1.8292, "step": 765 }, { "epoch": 0.07788807402417479, "grad_norm": 0.1962890625, "learning_rate": 0.02, "loss": 1.8056, "step": 766 }, { "epoch": 0.07798975558295308, "grad_norm": 0.201171875, "learning_rate": 0.02, "loss": 1.7988, "step": 767 }, { "epoch": 0.07809143714173138, "grad_norm": 0.193359375, "learning_rate": 0.02, "loss": 1.7788, "step": 768 }, { "epoch": 0.07819311870050968, "grad_norm": 0.1953125, "learning_rate": 0.02, "loss": 1.8213, "step": 769 }, { "epoch": 0.07829480025928798, "grad_norm": 0.177734375, "learning_rate": 0.02, "loss": 1.8004, "step": 770 }, { "epoch": 0.07839648181806627, "grad_norm": 0.1708984375, "learning_rate": 0.02, "loss": 1.7865, "step": 771 }, { "epoch": 0.07849816337684457, "grad_norm": 0.189453125, "learning_rate": 0.02, "loss": 1.8506, "step": 772 }, { "epoch": 0.07859984493562286, "grad_norm": 0.1826171875, "learning_rate": 0.02, "loss": 1.8121, "step": 773 }, { "epoch": 0.07870152649440115, "grad_norm": 0.1787109375, "learning_rate": 0.02, "loss": 1.7919, "step": 774 }, { "epoch": 0.07880320805317946, "grad_norm": 0.1826171875, "learning_rate": 0.02, "loss": 1.8866, "step": 775 }, { "epoch": 0.07890488961195775, "grad_norm": 0.185546875, "learning_rate": 0.02, "loss": 1.8767, "step": 776 }, { "epoch": 0.07900657117073605, "grad_norm": 0.1806640625, "learning_rate": 0.02, "loss": 1.8389, "step": 777 }, { "epoch": 0.07910825272951434, "grad_norm": 0.1806640625, "learning_rate": 0.02, "loss": 1.86, "step": 778 }, { "epoch": 0.07920993428829264, "grad_norm": 0.1669921875, "learning_rate": 0.02, "loss": 1.7923, "step": 779 }, { "epoch": 0.07931161584707093, "grad_norm": 0.1669921875, "learning_rate": 0.02, "loss": 1.7526, "step": 780 }, { "epoch": 0.07941329740584924, "grad_norm": 0.177734375, "learning_rate": 0.02, "loss": 1.7722, "step": 781 }, { "epoch": 0.07951497896462753, "grad_norm": 0.169921875, "learning_rate": 0.02, "loss": 1.8182, "step": 782 }, { "epoch": 0.07961666052340582, "grad_norm": 0.1689453125, "learning_rate": 0.02, "loss": 1.852, "step": 783 }, { "epoch": 0.07971834208218412, "grad_norm": 0.169921875, "learning_rate": 0.02, "loss": 1.8158, "step": 784 }, { "epoch": 0.07982002364096241, "grad_norm": 0.1826171875, "learning_rate": 0.02, "loss": 1.8744, "step": 785 }, { "epoch": 0.0799217051997407, "grad_norm": 0.171875, "learning_rate": 0.02, "loss": 1.8585, "step": 786 }, { "epoch": 0.08002338675851901, "grad_norm": 0.1708984375, "learning_rate": 0.02, "loss": 1.8676, "step": 787 }, { "epoch": 0.08012506831729731, "grad_norm": 0.17578125, "learning_rate": 0.02, "loss": 1.8289, "step": 788 }, { "epoch": 0.0802267498760756, "grad_norm": 0.177734375, "learning_rate": 0.02, "loss": 1.8452, "step": 789 }, { "epoch": 0.0803284314348539, "grad_norm": 0.1806640625, "learning_rate": 0.02, "loss": 1.8626, "step": 790 }, { "epoch": 0.08043011299363219, "grad_norm": 0.1640625, "learning_rate": 0.02, "loss": 1.793, "step": 791 }, { "epoch": 0.08053179455241048, "grad_norm": 0.1708984375, "learning_rate": 0.02, "loss": 1.8057, "step": 792 }, { "epoch": 0.08063347611118879, "grad_norm": 0.1787109375, "learning_rate": 0.02, "loss": 1.8584, "step": 793 }, { "epoch": 0.08073515766996708, "grad_norm": 0.177734375, "learning_rate": 0.02, "loss": 1.8268, "step": 794 }, { "epoch": 0.08083683922874538, "grad_norm": 0.181640625, "learning_rate": 0.02, "loss": 1.773, "step": 795 }, { "epoch": 0.08093852078752367, "grad_norm": 0.17578125, "learning_rate": 0.02, "loss": 1.7506, "step": 796 }, { "epoch": 0.08104020234630196, "grad_norm": 0.1796875, "learning_rate": 0.02, "loss": 1.7988, "step": 797 }, { "epoch": 0.08114188390508026, "grad_norm": 0.181640625, "learning_rate": 0.02, "loss": 1.9268, "step": 798 }, { "epoch": 0.08124356546385857, "grad_norm": 0.1875, "learning_rate": 0.02, "loss": 1.8278, "step": 799 }, { "epoch": 0.08134524702263686, "grad_norm": 0.1865234375, "learning_rate": 0.02, "loss": 1.8372, "step": 800 }, { "epoch": 0.08144692858141515, "grad_norm": 0.197265625, "learning_rate": 0.02, "loss": 1.8037, "step": 801 }, { "epoch": 0.08154861014019345, "grad_norm": 0.1875, "learning_rate": 0.02, "loss": 1.8436, "step": 802 }, { "epoch": 0.08165029169897174, "grad_norm": 0.1865234375, "learning_rate": 0.02, "loss": 1.8158, "step": 803 }, { "epoch": 0.08175197325775004, "grad_norm": 0.1826171875, "learning_rate": 0.02, "loss": 1.7991, "step": 804 }, { "epoch": 0.08185365481652834, "grad_norm": 0.1904296875, "learning_rate": 0.02, "loss": 1.8109, "step": 805 }, { "epoch": 0.08195533637530664, "grad_norm": 0.1787109375, "learning_rate": 0.02, "loss": 1.8185, "step": 806 }, { "epoch": 0.08205701793408493, "grad_norm": 0.173828125, "learning_rate": 0.02, "loss": 1.7887, "step": 807 }, { "epoch": 0.08215869949286322, "grad_norm": 0.193359375, "learning_rate": 0.02, "loss": 1.8596, "step": 808 }, { "epoch": 0.08226038105164152, "grad_norm": 0.18359375, "learning_rate": 0.02, "loss": 1.8081, "step": 809 }, { "epoch": 0.08236206261041981, "grad_norm": 0.1875, "learning_rate": 0.02, "loss": 1.7933, "step": 810 }, { "epoch": 0.08246374416919812, "grad_norm": 0.1962890625, "learning_rate": 0.02, "loss": 1.8441, "step": 811 }, { "epoch": 0.08256542572797641, "grad_norm": 0.197265625, "learning_rate": 0.02, "loss": 1.7854, "step": 812 }, { "epoch": 0.0826671072867547, "grad_norm": 0.1875, "learning_rate": 0.02, "loss": 1.7749, "step": 813 }, { "epoch": 0.082768788845533, "grad_norm": 0.1875, "learning_rate": 0.02, "loss": 1.7733, "step": 814 }, { "epoch": 0.0828704704043113, "grad_norm": 0.1875, "learning_rate": 0.02, "loss": 1.8147, "step": 815 }, { "epoch": 0.08297215196308959, "grad_norm": 0.1826171875, "learning_rate": 0.02, "loss": 1.8334, "step": 816 }, { "epoch": 0.0830738335218679, "grad_norm": 0.18359375, "learning_rate": 0.02, "loss": 1.8312, "step": 817 }, { "epoch": 0.08317551508064619, "grad_norm": 0.1806640625, "learning_rate": 0.02, "loss": 1.7844, "step": 818 }, { "epoch": 0.08327719663942448, "grad_norm": 0.1826171875, "learning_rate": 0.02, "loss": 1.8148, "step": 819 }, { "epoch": 0.08337887819820278, "grad_norm": 0.1787109375, "learning_rate": 0.02, "loss": 1.8021, "step": 820 }, { "epoch": 0.08348055975698107, "grad_norm": 0.1982421875, "learning_rate": 0.02, "loss": 1.8545, "step": 821 }, { "epoch": 0.08358224131575936, "grad_norm": 0.201171875, "learning_rate": 0.02, "loss": 1.8369, "step": 822 }, { "epoch": 0.08368392287453767, "grad_norm": 0.185546875, "learning_rate": 0.02, "loss": 1.7756, "step": 823 }, { "epoch": 0.08378560443331597, "grad_norm": 0.181640625, "learning_rate": 0.02, "loss": 1.8681, "step": 824 }, { "epoch": 0.08388728599209426, "grad_norm": 0.1904296875, "learning_rate": 0.02, "loss": 1.8223, "step": 825 }, { "epoch": 0.08398896755087255, "grad_norm": 0.1845703125, "learning_rate": 0.02, "loss": 1.8387, "step": 826 }, { "epoch": 0.08409064910965085, "grad_norm": 0.1884765625, "learning_rate": 0.02, "loss": 1.7933, "step": 827 }, { "epoch": 0.08419233066842914, "grad_norm": 0.1806640625, "learning_rate": 0.02, "loss": 1.7993, "step": 828 }, { "epoch": 0.08429401222720745, "grad_norm": 0.1875, "learning_rate": 0.02, "loss": 1.8489, "step": 829 }, { "epoch": 0.08439569378598574, "grad_norm": 0.1796875, "learning_rate": 0.02, "loss": 1.8037, "step": 830 }, { "epoch": 0.08449737534476404, "grad_norm": 0.1875, "learning_rate": 0.02, "loss": 1.8211, "step": 831 }, { "epoch": 0.08459905690354233, "grad_norm": 0.1787109375, "learning_rate": 0.02, "loss": 1.7969, "step": 832 }, { "epoch": 0.08470073846232062, "grad_norm": 0.193359375, "learning_rate": 0.02, "loss": 1.8219, "step": 833 }, { "epoch": 0.08480242002109892, "grad_norm": 0.1796875, "learning_rate": 0.02, "loss": 1.7981, "step": 834 }, { "epoch": 0.08490410157987723, "grad_norm": 0.1796875, "learning_rate": 0.02, "loss": 1.8286, "step": 835 }, { "epoch": 0.08500578313865552, "grad_norm": 0.185546875, "learning_rate": 0.02, "loss": 1.8327, "step": 836 }, { "epoch": 0.08510746469743381, "grad_norm": 0.177734375, "learning_rate": 0.02, "loss": 1.7936, "step": 837 }, { "epoch": 0.0852091462562121, "grad_norm": 0.1787109375, "learning_rate": 0.02, "loss": 1.8004, "step": 838 }, { "epoch": 0.0853108278149904, "grad_norm": 0.169921875, "learning_rate": 0.02, "loss": 1.7659, "step": 839 }, { "epoch": 0.0854125093737687, "grad_norm": 0.171875, "learning_rate": 0.02, "loss": 1.834, "step": 840 }, { "epoch": 0.085514190932547, "grad_norm": 0.1845703125, "learning_rate": 0.02, "loss": 1.8353, "step": 841 }, { "epoch": 0.0856158724913253, "grad_norm": 0.1806640625, "learning_rate": 0.02, "loss": 1.8302, "step": 842 }, { "epoch": 0.08571755405010359, "grad_norm": 0.193359375, "learning_rate": 0.02, "loss": 1.797, "step": 843 }, { "epoch": 0.08581923560888188, "grad_norm": 0.1796875, "learning_rate": 0.02, "loss": 1.787, "step": 844 }, { "epoch": 0.08592091716766018, "grad_norm": 0.169921875, "learning_rate": 0.02, "loss": 1.7846, "step": 845 }, { "epoch": 0.08602259872643847, "grad_norm": 0.1796875, "learning_rate": 0.02, "loss": 1.7675, "step": 846 }, { "epoch": 0.08612428028521678, "grad_norm": 0.1708984375, "learning_rate": 0.02, "loss": 1.7912, "step": 847 }, { "epoch": 0.08622596184399507, "grad_norm": 0.1904296875, "learning_rate": 0.02, "loss": 1.8381, "step": 848 }, { "epoch": 0.08632764340277337, "grad_norm": 0.18359375, "learning_rate": 0.02, "loss": 1.8145, "step": 849 }, { "epoch": 0.08642932496155166, "grad_norm": 0.1904296875, "learning_rate": 0.02, "loss": 1.8315, "step": 850 }, { "epoch": 0.08653100652032995, "grad_norm": 0.1923828125, "learning_rate": 0.02, "loss": 1.844, "step": 851 }, { "epoch": 0.08663268807910825, "grad_norm": 0.1748046875, "learning_rate": 0.02, "loss": 1.764, "step": 852 }, { "epoch": 0.08673436963788655, "grad_norm": 0.173828125, "learning_rate": 0.02, "loss": 1.8211, "step": 853 }, { "epoch": 0.08683605119666485, "grad_norm": 0.19921875, "learning_rate": 0.02, "loss": 1.8561, "step": 854 }, { "epoch": 0.08693773275544314, "grad_norm": 0.1689453125, "learning_rate": 0.02, "loss": 1.7917, "step": 855 }, { "epoch": 0.08703941431422144, "grad_norm": 0.169921875, "learning_rate": 0.02, "loss": 1.7871, "step": 856 }, { "epoch": 0.08714109587299973, "grad_norm": 0.173828125, "learning_rate": 0.02, "loss": 1.8257, "step": 857 }, { "epoch": 0.08724277743177802, "grad_norm": 0.1728515625, "learning_rate": 0.02, "loss": 1.8424, "step": 858 }, { "epoch": 0.08734445899055633, "grad_norm": 0.28125, "learning_rate": 0.02, "loss": 1.819, "step": 859 }, { "epoch": 0.08744614054933462, "grad_norm": 0.177734375, "learning_rate": 0.02, "loss": 1.8355, "step": 860 }, { "epoch": 0.08754782210811292, "grad_norm": 0.173828125, "learning_rate": 0.02, "loss": 1.8782, "step": 861 }, { "epoch": 0.08764950366689121, "grad_norm": 0.1669921875, "learning_rate": 0.02, "loss": 1.8362, "step": 862 }, { "epoch": 0.0877511852256695, "grad_norm": 0.16015625, "learning_rate": 0.02, "loss": 1.819, "step": 863 }, { "epoch": 0.0878528667844478, "grad_norm": 0.162109375, "learning_rate": 0.02, "loss": 1.8046, "step": 864 }, { "epoch": 0.08795454834322611, "grad_norm": 0.166015625, "learning_rate": 0.02, "loss": 1.8154, "step": 865 }, { "epoch": 0.0880562299020044, "grad_norm": 0.1708984375, "learning_rate": 0.02, "loss": 1.7993, "step": 866 }, { "epoch": 0.0881579114607827, "grad_norm": 0.1689453125, "learning_rate": 0.02, "loss": 1.8413, "step": 867 }, { "epoch": 0.08825959301956099, "grad_norm": 0.1748046875, "learning_rate": 0.02, "loss": 1.7567, "step": 868 }, { "epoch": 0.08836127457833928, "grad_norm": 0.16796875, "learning_rate": 0.02, "loss": 1.8311, "step": 869 }, { "epoch": 0.08846295613711758, "grad_norm": 0.1611328125, "learning_rate": 0.02, "loss": 1.7413, "step": 870 }, { "epoch": 0.08856463769589588, "grad_norm": 0.1572265625, "learning_rate": 0.02, "loss": 1.7816, "step": 871 }, { "epoch": 0.08866631925467418, "grad_norm": 0.158203125, "learning_rate": 0.02, "loss": 1.8152, "step": 872 }, { "epoch": 0.08876800081345247, "grad_norm": 0.1640625, "learning_rate": 0.02, "loss": 1.8257, "step": 873 }, { "epoch": 0.08886968237223077, "grad_norm": 0.1640625, "learning_rate": 0.02, "loss": 1.7654, "step": 874 }, { "epoch": 0.08897136393100906, "grad_norm": 0.173828125, "learning_rate": 0.02, "loss": 1.8536, "step": 875 }, { "epoch": 0.08907304548978735, "grad_norm": 0.1650390625, "learning_rate": 0.02, "loss": 1.8041, "step": 876 }, { "epoch": 0.08917472704856566, "grad_norm": 0.162109375, "learning_rate": 0.02, "loss": 1.7858, "step": 877 }, { "epoch": 0.08927640860734395, "grad_norm": 0.162109375, "learning_rate": 0.02, "loss": 1.7797, "step": 878 }, { "epoch": 0.08937809016612225, "grad_norm": 0.1689453125, "learning_rate": 0.02, "loss": 1.8342, "step": 879 }, { "epoch": 0.08947977172490054, "grad_norm": 0.1748046875, "learning_rate": 0.02, "loss": 1.8047, "step": 880 }, { "epoch": 0.08958145328367884, "grad_norm": 0.16796875, "learning_rate": 0.02, "loss": 1.7864, "step": 881 }, { "epoch": 0.08968313484245713, "grad_norm": 0.1650390625, "learning_rate": 0.02, "loss": 1.845, "step": 882 }, { "epoch": 0.08978481640123544, "grad_norm": 0.1650390625, "learning_rate": 0.02, "loss": 1.7736, "step": 883 }, { "epoch": 0.08988649796001373, "grad_norm": 0.16796875, "learning_rate": 0.02, "loss": 1.7616, "step": 884 }, { "epoch": 0.08998817951879202, "grad_norm": 0.181640625, "learning_rate": 0.02, "loss": 1.8677, "step": 885 }, { "epoch": 0.09008986107757032, "grad_norm": 0.1796875, "learning_rate": 0.02, "loss": 1.8485, "step": 886 }, { "epoch": 0.09019154263634861, "grad_norm": 0.1708984375, "learning_rate": 0.02, "loss": 1.849, "step": 887 }, { "epoch": 0.0902932241951269, "grad_norm": 0.185546875, "learning_rate": 0.02, "loss": 1.7924, "step": 888 }, { "epoch": 0.09039490575390521, "grad_norm": 0.1748046875, "learning_rate": 0.02, "loss": 1.8337, "step": 889 }, { "epoch": 0.09049658731268351, "grad_norm": 0.189453125, "learning_rate": 0.02, "loss": 1.8768, "step": 890 }, { "epoch": 0.0905982688714618, "grad_norm": 0.1748046875, "learning_rate": 0.02, "loss": 1.7679, "step": 891 }, { "epoch": 0.0906999504302401, "grad_norm": 0.1796875, "learning_rate": 0.02, "loss": 1.8219, "step": 892 }, { "epoch": 0.09080163198901839, "grad_norm": 0.17578125, "learning_rate": 0.02, "loss": 1.7588, "step": 893 }, { "epoch": 0.09090331354779668, "grad_norm": 0.185546875, "learning_rate": 0.02, "loss": 1.8212, "step": 894 }, { "epoch": 0.09100499510657499, "grad_norm": 0.1904296875, "learning_rate": 0.02, "loss": 1.8164, "step": 895 }, { "epoch": 0.09110667666535328, "grad_norm": 0.1875, "learning_rate": 0.02, "loss": 1.7975, "step": 896 }, { "epoch": 0.09120835822413158, "grad_norm": 0.1982421875, "learning_rate": 0.02, "loss": 1.7958, "step": 897 }, { "epoch": 0.09131003978290987, "grad_norm": 0.1943359375, "learning_rate": 0.02, "loss": 1.7718, "step": 898 }, { "epoch": 0.09141172134168817, "grad_norm": 0.1708984375, "learning_rate": 0.02, "loss": 1.7032, "step": 899 }, { "epoch": 0.09151340290046646, "grad_norm": 0.1728515625, "learning_rate": 0.02, "loss": 1.8431, "step": 900 }, { "epoch": 0.09161508445924477, "grad_norm": 0.1904296875, "learning_rate": 0.02, "loss": 1.8235, "step": 901 }, { "epoch": 0.09171676601802306, "grad_norm": 0.1796875, "learning_rate": 0.02, "loss": 1.8196, "step": 902 }, { "epoch": 0.09181844757680135, "grad_norm": 0.171875, "learning_rate": 0.02, "loss": 1.7137, "step": 903 }, { "epoch": 0.09192012913557965, "grad_norm": 0.1630859375, "learning_rate": 0.02, "loss": 1.7716, "step": 904 }, { "epoch": 0.09202181069435794, "grad_norm": 0.173828125, "learning_rate": 0.02, "loss": 1.8181, "step": 905 }, { "epoch": 0.09212349225313624, "grad_norm": 0.185546875, "learning_rate": 0.02, "loss": 1.8386, "step": 906 }, { "epoch": 0.09222517381191454, "grad_norm": 0.177734375, "learning_rate": 0.02, "loss": 1.7804, "step": 907 }, { "epoch": 0.09232685537069284, "grad_norm": 0.1669921875, "learning_rate": 0.02, "loss": 1.7931, "step": 908 }, { "epoch": 0.09242853692947113, "grad_norm": 0.1630859375, "learning_rate": 0.02, "loss": 1.7825, "step": 909 }, { "epoch": 0.09253021848824942, "grad_norm": 0.166015625, "learning_rate": 0.02, "loss": 1.7829, "step": 910 }, { "epoch": 0.09263190004702772, "grad_norm": 0.171875, "learning_rate": 0.02, "loss": 1.7802, "step": 911 }, { "epoch": 0.09273358160580601, "grad_norm": 0.1728515625, "learning_rate": 0.02, "loss": 1.788, "step": 912 }, { "epoch": 0.09283526316458432, "grad_norm": 0.1640625, "learning_rate": 0.02, "loss": 1.7972, "step": 913 }, { "epoch": 0.09293694472336261, "grad_norm": 0.1689453125, "learning_rate": 0.02, "loss": 1.8285, "step": 914 }, { "epoch": 0.09303862628214091, "grad_norm": 0.1552734375, "learning_rate": 0.02, "loss": 1.7813, "step": 915 }, { "epoch": 0.0931403078409192, "grad_norm": 0.173828125, "learning_rate": 0.02, "loss": 1.7964, "step": 916 }, { "epoch": 0.0932419893996975, "grad_norm": 0.171875, "learning_rate": 0.02, "loss": 1.85, "step": 917 }, { "epoch": 0.09334367095847579, "grad_norm": 0.1630859375, "learning_rate": 0.02, "loss": 1.8029, "step": 918 }, { "epoch": 0.0934453525172541, "grad_norm": 0.162109375, "learning_rate": 0.02, "loss": 1.7945, "step": 919 }, { "epoch": 0.09354703407603239, "grad_norm": 0.1630859375, "learning_rate": 0.02, "loss": 1.7884, "step": 920 }, { "epoch": 0.09364871563481068, "grad_norm": 0.18359375, "learning_rate": 0.02, "loss": 1.8814, "step": 921 }, { "epoch": 0.09375039719358898, "grad_norm": 0.1767578125, "learning_rate": 0.02, "loss": 1.8192, "step": 922 }, { "epoch": 0.09385207875236727, "grad_norm": 0.1728515625, "learning_rate": 0.02, "loss": 1.8482, "step": 923 }, { "epoch": 0.09395376031114556, "grad_norm": 0.1748046875, "learning_rate": 0.02, "loss": 1.8093, "step": 924 }, { "epoch": 0.09405544186992387, "grad_norm": 0.1962890625, "learning_rate": 0.02, "loss": 1.7632, "step": 925 }, { "epoch": 0.09415712342870217, "grad_norm": 0.181640625, "learning_rate": 0.02, "loss": 1.798, "step": 926 }, { "epoch": 0.09425880498748046, "grad_norm": 0.1787109375, "learning_rate": 0.02, "loss": 1.7879, "step": 927 }, { "epoch": 0.09436048654625875, "grad_norm": 0.1689453125, "learning_rate": 0.02, "loss": 1.761, "step": 928 }, { "epoch": 0.09446216810503705, "grad_norm": 0.1767578125, "learning_rate": 0.02, "loss": 1.7625, "step": 929 }, { "epoch": 0.09456384966381534, "grad_norm": 0.1669921875, "learning_rate": 0.02, "loss": 1.8176, "step": 930 }, { "epoch": 0.09466553122259365, "grad_norm": 0.16796875, "learning_rate": 0.02, "loss": 1.7582, "step": 931 }, { "epoch": 0.09476721278137194, "grad_norm": 0.158203125, "learning_rate": 0.02, "loss": 1.7841, "step": 932 }, { "epoch": 0.09486889434015024, "grad_norm": 0.1689453125, "learning_rate": 0.02, "loss": 1.786, "step": 933 }, { "epoch": 0.09497057589892853, "grad_norm": 0.1640625, "learning_rate": 0.02, "loss": 1.7297, "step": 934 }, { "epoch": 0.09507225745770682, "grad_norm": 0.1728515625, "learning_rate": 0.02, "loss": 1.86, "step": 935 }, { "epoch": 0.09517393901648512, "grad_norm": 0.1767578125, "learning_rate": 0.02, "loss": 1.8217, "step": 936 }, { "epoch": 0.09527562057526343, "grad_norm": 0.1591796875, "learning_rate": 0.02, "loss": 1.7516, "step": 937 }, { "epoch": 0.09537730213404172, "grad_norm": 0.1708984375, "learning_rate": 0.02, "loss": 1.8442, "step": 938 }, { "epoch": 0.09547898369282001, "grad_norm": 0.1865234375, "learning_rate": 0.02, "loss": 1.8773, "step": 939 }, { "epoch": 0.0955806652515983, "grad_norm": 0.171875, "learning_rate": 0.02, "loss": 1.7955, "step": 940 }, { "epoch": 0.0956823468103766, "grad_norm": 0.173828125, "learning_rate": 0.02, "loss": 1.8266, "step": 941 }, { "epoch": 0.0957840283691549, "grad_norm": 0.1630859375, "learning_rate": 0.02, "loss": 1.831, "step": 942 }, { "epoch": 0.0958857099279332, "grad_norm": 0.16015625, "learning_rate": 0.02, "loss": 1.7877, "step": 943 }, { "epoch": 0.0959873914867115, "grad_norm": 0.1591796875, "learning_rate": 0.02, "loss": 1.7276, "step": 944 }, { "epoch": 0.09608907304548979, "grad_norm": 0.1689453125, "learning_rate": 0.02, "loss": 1.8482, "step": 945 }, { "epoch": 0.09619075460426808, "grad_norm": 0.16796875, "learning_rate": 0.02, "loss": 1.7766, "step": 946 }, { "epoch": 0.09629243616304638, "grad_norm": 0.1640625, "learning_rate": 0.02, "loss": 1.7882, "step": 947 }, { "epoch": 0.09639411772182467, "grad_norm": 0.171875, "learning_rate": 0.02, "loss": 1.755, "step": 948 }, { "epoch": 0.09649579928060298, "grad_norm": 0.1826171875, "learning_rate": 0.02, "loss": 1.7958, "step": 949 }, { "epoch": 0.09659748083938127, "grad_norm": 0.19140625, "learning_rate": 0.02, "loss": 1.7857, "step": 950 }, { "epoch": 0.09669916239815957, "grad_norm": 0.1669921875, "learning_rate": 0.02, "loss": 1.7638, "step": 951 }, { "epoch": 0.09680084395693786, "grad_norm": 0.181640625, "learning_rate": 0.02, "loss": 1.789, "step": 952 }, { "epoch": 0.09690252551571615, "grad_norm": 0.1728515625, "learning_rate": 0.02, "loss": 1.8508, "step": 953 }, { "epoch": 0.09700420707449445, "grad_norm": 0.166015625, "learning_rate": 0.02, "loss": 1.793, "step": 954 }, { "epoch": 0.09710588863327275, "grad_norm": 0.154296875, "learning_rate": 0.02, "loss": 1.7845, "step": 955 }, { "epoch": 0.09720757019205105, "grad_norm": 0.17578125, "learning_rate": 0.02, "loss": 1.7691, "step": 956 }, { "epoch": 0.09730925175082934, "grad_norm": 0.1669921875, "learning_rate": 0.02, "loss": 1.7955, "step": 957 }, { "epoch": 0.09741093330960764, "grad_norm": 0.16015625, "learning_rate": 0.02, "loss": 1.8115, "step": 958 }, { "epoch": 0.09751261486838593, "grad_norm": 0.162109375, "learning_rate": 0.02, "loss": 1.803, "step": 959 }, { "epoch": 0.09761429642716422, "grad_norm": 0.1728515625, "learning_rate": 0.02, "loss": 1.8425, "step": 960 }, { "epoch": 0.09771597798594252, "grad_norm": 0.16796875, "learning_rate": 0.02, "loss": 1.7651, "step": 961 }, { "epoch": 0.09781765954472083, "grad_norm": 0.1787109375, "learning_rate": 0.02, "loss": 1.8492, "step": 962 }, { "epoch": 0.09791934110349912, "grad_norm": 0.1728515625, "learning_rate": 0.02, "loss": 1.7465, "step": 963 }, { "epoch": 0.09802102266227741, "grad_norm": 0.1728515625, "learning_rate": 0.02, "loss": 1.7946, "step": 964 }, { "epoch": 0.0981227042210557, "grad_norm": 0.17578125, "learning_rate": 0.02, "loss": 1.8125, "step": 965 }, { "epoch": 0.098224385779834, "grad_norm": 0.17578125, "learning_rate": 0.02, "loss": 1.7573, "step": 966 }, { "epoch": 0.0983260673386123, "grad_norm": 0.173828125, "learning_rate": 0.02, "loss": 1.8205, "step": 967 }, { "epoch": 0.0984277488973906, "grad_norm": 0.1865234375, "learning_rate": 0.02, "loss": 1.7724, "step": 968 }, { "epoch": 0.0985294304561689, "grad_norm": 0.1875, "learning_rate": 0.02, "loss": 1.8132, "step": 969 }, { "epoch": 0.09863111201494719, "grad_norm": 0.1962890625, "learning_rate": 0.02, "loss": 1.837, "step": 970 }, { "epoch": 0.09873279357372548, "grad_norm": 0.18359375, "learning_rate": 0.02, "loss": 1.7598, "step": 971 }, { "epoch": 0.09883447513250378, "grad_norm": 0.1767578125, "learning_rate": 0.02, "loss": 1.8047, "step": 972 }, { "epoch": 0.09893615669128207, "grad_norm": 0.1796875, "learning_rate": 0.02, "loss": 1.8728, "step": 973 }, { "epoch": 0.09903783825006038, "grad_norm": 0.185546875, "learning_rate": 0.02, "loss": 1.8184, "step": 974 }, { "epoch": 0.09913951980883867, "grad_norm": 0.1904296875, "learning_rate": 0.02, "loss": 1.8127, "step": 975 }, { "epoch": 0.09924120136761697, "grad_norm": 0.169921875, "learning_rate": 0.02, "loss": 1.7804, "step": 976 }, { "epoch": 0.09934288292639526, "grad_norm": 0.181640625, "learning_rate": 0.02, "loss": 1.8445, "step": 977 }, { "epoch": 0.09944456448517355, "grad_norm": 0.177734375, "learning_rate": 0.02, "loss": 1.8376, "step": 978 }, { "epoch": 0.09954624604395185, "grad_norm": 0.16796875, "learning_rate": 0.02, "loss": 1.7915, "step": 979 }, { "epoch": 0.09964792760273015, "grad_norm": 0.1767578125, "learning_rate": 0.02, "loss": 1.8519, "step": 980 }, { "epoch": 0.09974960916150845, "grad_norm": 0.1669921875, "learning_rate": 0.02, "loss": 1.7622, "step": 981 }, { "epoch": 0.09985129072028674, "grad_norm": 0.1796875, "learning_rate": 0.02, "loss": 1.83, "step": 982 }, { "epoch": 0.09995297227906504, "grad_norm": 0.166015625, "learning_rate": 0.02, "loss": 1.7567, "step": 983 }, { "epoch": 0.10005465383784333, "grad_norm": 0.1650390625, "learning_rate": 0.02, "loss": 1.8012, "step": 984 }, { "epoch": 0.10015633539662162, "grad_norm": 0.1572265625, "learning_rate": 0.02, "loss": 1.7905, "step": 985 }, { "epoch": 0.10025801695539993, "grad_norm": 0.171875, "learning_rate": 0.02, "loss": 1.7949, "step": 986 }, { "epoch": 0.10035969851417822, "grad_norm": 0.1787109375, "learning_rate": 0.02, "loss": 1.7828, "step": 987 }, { "epoch": 0.10046138007295652, "grad_norm": 0.1923828125, "learning_rate": 0.02, "loss": 1.8657, "step": 988 }, { "epoch": 0.10056306163173481, "grad_norm": 0.193359375, "learning_rate": 0.02, "loss": 1.8223, "step": 989 }, { "epoch": 0.1006647431905131, "grad_norm": 0.1865234375, "learning_rate": 0.02, "loss": 1.7826, "step": 990 }, { "epoch": 0.1007664247492914, "grad_norm": 0.1826171875, "learning_rate": 0.02, "loss": 1.7462, "step": 991 }, { "epoch": 0.10086810630806971, "grad_norm": 0.1826171875, "learning_rate": 0.02, "loss": 1.8202, "step": 992 }, { "epoch": 0.100969787866848, "grad_norm": 0.169921875, "learning_rate": 0.02, "loss": 1.7925, "step": 993 }, { "epoch": 0.1010714694256263, "grad_norm": 0.181640625, "learning_rate": 0.02, "loss": 1.8566, "step": 994 }, { "epoch": 0.10117315098440459, "grad_norm": 0.1904296875, "learning_rate": 0.02, "loss": 1.8353, "step": 995 }, { "epoch": 0.10127483254318288, "grad_norm": 0.193359375, "learning_rate": 0.02, "loss": 1.8391, "step": 996 }, { "epoch": 0.10137651410196118, "grad_norm": 0.189453125, "learning_rate": 0.02, "loss": 1.8983, "step": 997 }, { "epoch": 0.10147819566073948, "grad_norm": 0.1748046875, "learning_rate": 0.02, "loss": 1.8108, "step": 998 }, { "epoch": 0.10157987721951778, "grad_norm": 0.1748046875, "learning_rate": 0.02, "loss": 1.7916, "step": 999 }, { "epoch": 0.10168155877829607, "grad_norm": 0.1669921875, "learning_rate": 0.02, "loss": 1.7679, "step": 1000 }, { "epoch": 0.10178324033707437, "grad_norm": 0.1708984375, "learning_rate": 0.02, "loss": 1.8105, "step": 1001 }, { "epoch": 0.10188492189585266, "grad_norm": 0.169921875, "learning_rate": 0.02, "loss": 1.847, "step": 1002 }, { "epoch": 0.10198660345463095, "grad_norm": 0.16796875, "learning_rate": 0.02, "loss": 1.7991, "step": 1003 }, { "epoch": 0.10208828501340926, "grad_norm": 0.171875, "learning_rate": 0.02, "loss": 1.7445, "step": 1004 }, { "epoch": 0.10218996657218755, "grad_norm": 0.1630859375, "learning_rate": 0.02, "loss": 1.7129, "step": 1005 }, { "epoch": 0.10229164813096585, "grad_norm": 0.1708984375, "learning_rate": 0.02, "loss": 1.8329, "step": 1006 }, { "epoch": 0.10239332968974414, "grad_norm": 0.1796875, "learning_rate": 0.02, "loss": 1.7607, "step": 1007 }, { "epoch": 0.10249501124852244, "grad_norm": 0.1787109375, "learning_rate": 0.02, "loss": 1.8429, "step": 1008 }, { "epoch": 0.10259669280730073, "grad_norm": 0.16015625, "learning_rate": 0.02, "loss": 1.7746, "step": 1009 }, { "epoch": 0.10269837436607904, "grad_norm": 0.1728515625, "learning_rate": 0.02, "loss": 1.8397, "step": 1010 }, { "epoch": 0.10280005592485733, "grad_norm": 0.16796875, "learning_rate": 0.02, "loss": 1.7599, "step": 1011 }, { "epoch": 0.10290173748363562, "grad_norm": 0.14453125, "learning_rate": 0.02, "loss": 1.7751, "step": 1012 }, { "epoch": 0.10300341904241392, "grad_norm": 0.15625, "learning_rate": 0.02, "loss": 1.772, "step": 1013 }, { "epoch": 0.10310510060119221, "grad_norm": 0.16015625, "learning_rate": 0.02, "loss": 1.7809, "step": 1014 }, { "epoch": 0.1032067821599705, "grad_norm": 0.158203125, "learning_rate": 0.02, "loss": 1.7961, "step": 1015 }, { "epoch": 0.10330846371874881, "grad_norm": 0.1611328125, "learning_rate": 0.02, "loss": 1.781, "step": 1016 }, { "epoch": 0.10341014527752711, "grad_norm": 0.158203125, "learning_rate": 0.02, "loss": 1.7537, "step": 1017 }, { "epoch": 0.1035118268363054, "grad_norm": 0.15625, "learning_rate": 0.02, "loss": 1.7569, "step": 1018 }, { "epoch": 0.1036135083950837, "grad_norm": 0.16015625, "learning_rate": 0.02, "loss": 1.7635, "step": 1019 }, { "epoch": 0.10371518995386199, "grad_norm": 0.15625, "learning_rate": 0.02, "loss": 1.79, "step": 1020 }, { "epoch": 0.10381687151264028, "grad_norm": 0.1591796875, "learning_rate": 0.02, "loss": 1.7823, "step": 1021 }, { "epoch": 0.10391855307141859, "grad_norm": 0.1630859375, "learning_rate": 0.02, "loss": 1.7923, "step": 1022 }, { "epoch": 0.10402023463019688, "grad_norm": 0.16015625, "learning_rate": 0.02, "loss": 1.7438, "step": 1023 }, { "epoch": 0.10412191618897518, "grad_norm": 0.1552734375, "learning_rate": 0.02, "loss": 1.814, "step": 1024 }, { "epoch": 0.10422359774775347, "grad_norm": 0.1513671875, "learning_rate": 0.02, "loss": 1.8191, "step": 1025 }, { "epoch": 0.10432527930653177, "grad_norm": 0.1455078125, "learning_rate": 0.02, "loss": 1.8547, "step": 1026 }, { "epoch": 0.10442696086531006, "grad_norm": 0.1455078125, "learning_rate": 0.02, "loss": 1.7693, "step": 1027 }, { "epoch": 0.10452864242408837, "grad_norm": 0.14453125, "learning_rate": 0.02, "loss": 1.7325, "step": 1028 }, { "epoch": 0.10463032398286666, "grad_norm": 0.1484375, "learning_rate": 0.02, "loss": 1.7932, "step": 1029 }, { "epoch": 0.10473200554164495, "grad_norm": 0.1494140625, "learning_rate": 0.02, "loss": 1.7675, "step": 1030 }, { "epoch": 0.10483368710042325, "grad_norm": 0.1591796875, "learning_rate": 0.02, "loss": 1.8031, "step": 1031 }, { "epoch": 0.10493536865920154, "grad_norm": 0.154296875, "learning_rate": 0.02, "loss": 1.7804, "step": 1032 }, { "epoch": 0.10503705021797984, "grad_norm": 0.1689453125, "learning_rate": 0.02, "loss": 1.7204, "step": 1033 }, { "epoch": 0.10513873177675814, "grad_norm": 0.1650390625, "learning_rate": 0.02, "loss": 1.7715, "step": 1034 }, { "epoch": 0.10524041333553644, "grad_norm": 0.17578125, "learning_rate": 0.02, "loss": 1.816, "step": 1035 }, { "epoch": 0.10534209489431473, "grad_norm": 0.1728515625, "learning_rate": 0.02, "loss": 1.764, "step": 1036 }, { "epoch": 0.10544377645309302, "grad_norm": 0.1640625, "learning_rate": 0.02, "loss": 1.7055, "step": 1037 }, { "epoch": 0.10554545801187132, "grad_norm": 0.1484375, "learning_rate": 0.02, "loss": 1.7523, "step": 1038 }, { "epoch": 0.10564713957064961, "grad_norm": 0.1708984375, "learning_rate": 0.02, "loss": 1.8196, "step": 1039 }, { "epoch": 0.10574882112942792, "grad_norm": 0.1630859375, "learning_rate": 0.02, "loss": 1.83, "step": 1040 }, { "epoch": 0.10585050268820621, "grad_norm": 0.16796875, "learning_rate": 0.02, "loss": 1.8039, "step": 1041 }, { "epoch": 0.10595218424698451, "grad_norm": 0.15625, "learning_rate": 0.02, "loss": 1.7534, "step": 1042 }, { "epoch": 0.1060538658057628, "grad_norm": 0.16796875, "learning_rate": 0.02, "loss": 1.7747, "step": 1043 }, { "epoch": 0.1061555473645411, "grad_norm": 0.1689453125, "learning_rate": 0.02, "loss": 1.7919, "step": 1044 }, { "epoch": 0.10625722892331939, "grad_norm": 0.1689453125, "learning_rate": 0.02, "loss": 1.7642, "step": 1045 }, { "epoch": 0.1063589104820977, "grad_norm": 0.1552734375, "learning_rate": 0.02, "loss": 1.7742, "step": 1046 }, { "epoch": 0.10646059204087599, "grad_norm": 0.16796875, "learning_rate": 0.02, "loss": 1.7643, "step": 1047 }, { "epoch": 0.10656227359965428, "grad_norm": 0.1611328125, "learning_rate": 0.02, "loss": 1.724, "step": 1048 }, { "epoch": 0.10666395515843258, "grad_norm": 0.1689453125, "learning_rate": 0.02, "loss": 1.7091, "step": 1049 }, { "epoch": 0.10676563671721087, "grad_norm": 0.171875, "learning_rate": 0.02, "loss": 1.8569, "step": 1050 }, { "epoch": 0.10686731827598916, "grad_norm": 0.16015625, "learning_rate": 0.02, "loss": 1.7687, "step": 1051 }, { "epoch": 0.10696899983476747, "grad_norm": 0.1611328125, "learning_rate": 0.02, "loss": 1.7952, "step": 1052 }, { "epoch": 0.10707068139354577, "grad_norm": 0.158203125, "learning_rate": 0.02, "loss": 1.7378, "step": 1053 }, { "epoch": 0.10717236295232406, "grad_norm": 0.1640625, "learning_rate": 0.02, "loss": 1.769, "step": 1054 }, { "epoch": 0.10727404451110235, "grad_norm": 0.162109375, "learning_rate": 0.02, "loss": 1.774, "step": 1055 }, { "epoch": 0.10737572606988065, "grad_norm": 0.1611328125, "learning_rate": 0.02, "loss": 1.8556, "step": 1056 }, { "epoch": 0.10747740762865894, "grad_norm": 0.158203125, "learning_rate": 0.02, "loss": 1.7691, "step": 1057 }, { "epoch": 0.10757908918743725, "grad_norm": 0.1630859375, "learning_rate": 0.02, "loss": 1.7571, "step": 1058 }, { "epoch": 0.10768077074621554, "grad_norm": 0.166015625, "learning_rate": 0.02, "loss": 1.7878, "step": 1059 }, { "epoch": 0.10778245230499384, "grad_norm": 0.1650390625, "learning_rate": 0.02, "loss": 1.7837, "step": 1060 }, { "epoch": 0.10788413386377213, "grad_norm": 0.1669921875, "learning_rate": 0.02, "loss": 1.8254, "step": 1061 }, { "epoch": 0.10798581542255042, "grad_norm": 0.1669921875, "learning_rate": 0.02, "loss": 1.7935, "step": 1062 }, { "epoch": 0.10808749698132872, "grad_norm": 0.1630859375, "learning_rate": 0.02, "loss": 1.8086, "step": 1063 }, { "epoch": 0.10818917854010703, "grad_norm": 0.15625, "learning_rate": 0.02, "loss": 1.749, "step": 1064 }, { "epoch": 0.10829086009888532, "grad_norm": 0.166015625, "learning_rate": 0.02, "loss": 1.7908, "step": 1065 }, { "epoch": 0.10839254165766361, "grad_norm": 0.16796875, "learning_rate": 0.02, "loss": 1.7457, "step": 1066 }, { "epoch": 0.1084942232164419, "grad_norm": 0.1640625, "learning_rate": 0.02, "loss": 1.8241, "step": 1067 }, { "epoch": 0.1085959047752202, "grad_norm": 0.1611328125, "learning_rate": 0.02, "loss": 1.6965, "step": 1068 }, { "epoch": 0.1086975863339985, "grad_norm": 0.1650390625, "learning_rate": 0.02, "loss": 1.72, "step": 1069 }, { "epoch": 0.1087992678927768, "grad_norm": 0.1591796875, "learning_rate": 0.02, "loss": 1.8007, "step": 1070 }, { "epoch": 0.1089009494515551, "grad_norm": 0.1650390625, "learning_rate": 0.02, "loss": 1.8191, "step": 1071 }, { "epoch": 0.10900263101033339, "grad_norm": 0.16015625, "learning_rate": 0.02, "loss": 1.7594, "step": 1072 }, { "epoch": 0.10910431256911168, "grad_norm": 0.1640625, "learning_rate": 0.02, "loss": 1.7726, "step": 1073 }, { "epoch": 0.10920599412788998, "grad_norm": 0.158203125, "learning_rate": 0.02, "loss": 1.7647, "step": 1074 }, { "epoch": 0.10930767568666827, "grad_norm": 0.1708984375, "learning_rate": 0.02, "loss": 1.7941, "step": 1075 }, { "epoch": 0.10940935724544658, "grad_norm": 0.1611328125, "learning_rate": 0.02, "loss": 1.8022, "step": 1076 }, { "epoch": 0.10951103880422487, "grad_norm": 0.1650390625, "learning_rate": 0.02, "loss": 1.7575, "step": 1077 }, { "epoch": 0.10961272036300317, "grad_norm": 0.1630859375, "learning_rate": 0.02, "loss": 1.7482, "step": 1078 }, { "epoch": 0.10971440192178146, "grad_norm": 0.162109375, "learning_rate": 0.02, "loss": 1.8132, "step": 1079 }, { "epoch": 0.10981608348055975, "grad_norm": 0.1669921875, "learning_rate": 0.02, "loss": 1.7811, "step": 1080 }, { "epoch": 0.10991776503933805, "grad_norm": 0.1708984375, "learning_rate": 0.02, "loss": 1.7758, "step": 1081 }, { "epoch": 0.11001944659811635, "grad_norm": 0.181640625, "learning_rate": 0.02, "loss": 1.8112, "step": 1082 }, { "epoch": 0.11012112815689465, "grad_norm": 0.1689453125, "learning_rate": 0.02, "loss": 1.8042, "step": 1083 }, { "epoch": 0.11022280971567294, "grad_norm": 0.17578125, "learning_rate": 0.02, "loss": 1.7298, "step": 1084 }, { "epoch": 0.11032449127445124, "grad_norm": 0.166015625, "learning_rate": 0.02, "loss": 1.7872, "step": 1085 }, { "epoch": 0.11042617283322953, "grad_norm": 0.1669921875, "learning_rate": 0.02, "loss": 1.7755, "step": 1086 }, { "epoch": 0.11052785439200782, "grad_norm": 0.1630859375, "learning_rate": 0.02, "loss": 1.7783, "step": 1087 }, { "epoch": 0.11062953595078613, "grad_norm": 0.173828125, "learning_rate": 0.02, "loss": 1.8241, "step": 1088 }, { "epoch": 0.11073121750956443, "grad_norm": 0.1787109375, "learning_rate": 0.02, "loss": 1.7515, "step": 1089 }, { "epoch": 0.11083289906834272, "grad_norm": 0.1806640625, "learning_rate": 0.02, "loss": 1.7002, "step": 1090 }, { "epoch": 0.11093458062712101, "grad_norm": 0.1875, "learning_rate": 0.02, "loss": 1.7775, "step": 1091 }, { "epoch": 0.1110362621858993, "grad_norm": 0.1875, "learning_rate": 0.02, "loss": 1.8097, "step": 1092 }, { "epoch": 0.1111379437446776, "grad_norm": 0.181640625, "learning_rate": 0.02, "loss": 1.7664, "step": 1093 }, { "epoch": 0.11123962530345591, "grad_norm": 0.1708984375, "learning_rate": 0.02, "loss": 1.7309, "step": 1094 }, { "epoch": 0.1113413068622342, "grad_norm": 0.1708984375, "learning_rate": 0.02, "loss": 1.7601, "step": 1095 }, { "epoch": 0.1114429884210125, "grad_norm": 0.19140625, "learning_rate": 0.02, "loss": 1.7835, "step": 1096 }, { "epoch": 0.11154466997979079, "grad_norm": 0.1904296875, "learning_rate": 0.02, "loss": 1.7941, "step": 1097 }, { "epoch": 0.11164635153856908, "grad_norm": 0.173828125, "learning_rate": 0.02, "loss": 1.7781, "step": 1098 }, { "epoch": 0.11174803309734738, "grad_norm": 0.1845703125, "learning_rate": 0.02, "loss": 1.7912, "step": 1099 }, { "epoch": 0.11184971465612568, "grad_norm": 0.1875, "learning_rate": 0.02, "loss": 1.8193, "step": 1100 }, { "epoch": 0.11195139621490398, "grad_norm": 0.1787109375, "learning_rate": 0.02, "loss": 1.7886, "step": 1101 }, { "epoch": 0.11205307777368227, "grad_norm": 0.169921875, "learning_rate": 0.02, "loss": 1.6915, "step": 1102 }, { "epoch": 0.11215475933246057, "grad_norm": 0.171875, "learning_rate": 0.02, "loss": 1.7768, "step": 1103 }, { "epoch": 0.11225644089123886, "grad_norm": 0.1708984375, "learning_rate": 0.02, "loss": 1.7633, "step": 1104 }, { "epoch": 0.11235812245001715, "grad_norm": 0.1728515625, "learning_rate": 0.02, "loss": 1.8038, "step": 1105 }, { "epoch": 0.11245980400879546, "grad_norm": 0.16796875, "learning_rate": 0.02, "loss": 1.8038, "step": 1106 }, { "epoch": 0.11256148556757375, "grad_norm": 0.1611328125, "learning_rate": 0.02, "loss": 1.7862, "step": 1107 }, { "epoch": 0.11266316712635205, "grad_norm": 0.1689453125, "learning_rate": 0.02, "loss": 1.8266, "step": 1108 }, { "epoch": 0.11276484868513034, "grad_norm": 0.1650390625, "learning_rate": 0.02, "loss": 1.7586, "step": 1109 }, { "epoch": 0.11286653024390864, "grad_norm": 0.169921875, "learning_rate": 0.02, "loss": 1.8271, "step": 1110 }, { "epoch": 0.11296821180268693, "grad_norm": 0.1708984375, "learning_rate": 0.02, "loss": 1.8184, "step": 1111 }, { "epoch": 0.11306989336146524, "grad_norm": 0.1826171875, "learning_rate": 0.02, "loss": 1.8082, "step": 1112 }, { "epoch": 0.11317157492024353, "grad_norm": 0.185546875, "learning_rate": 0.02, "loss": 1.8117, "step": 1113 }, { "epoch": 0.11327325647902182, "grad_norm": 0.1875, "learning_rate": 0.02, "loss": 1.8365, "step": 1114 }, { "epoch": 0.11337493803780012, "grad_norm": 0.181640625, "learning_rate": 0.02, "loss": 1.795, "step": 1115 }, { "epoch": 0.11347661959657841, "grad_norm": 0.177734375, "learning_rate": 0.02, "loss": 1.7282, "step": 1116 }, { "epoch": 0.1135783011553567, "grad_norm": 0.171875, "learning_rate": 0.02, "loss": 1.774, "step": 1117 }, { "epoch": 0.11367998271413501, "grad_norm": 0.16015625, "learning_rate": 0.02, "loss": 1.7359, "step": 1118 }, { "epoch": 0.11378166427291331, "grad_norm": 0.171875, "learning_rate": 0.02, "loss": 1.8277, "step": 1119 }, { "epoch": 0.1138833458316916, "grad_norm": 0.16796875, "learning_rate": 0.02, "loss": 1.8568, "step": 1120 }, { "epoch": 0.1139850273904699, "grad_norm": 0.1728515625, "learning_rate": 0.02, "loss": 1.8186, "step": 1121 }, { "epoch": 0.11408670894924819, "grad_norm": 0.1533203125, "learning_rate": 0.02, "loss": 1.764, "step": 1122 }, { "epoch": 0.11418839050802648, "grad_norm": 0.173828125, "learning_rate": 0.02, "loss": 1.772, "step": 1123 }, { "epoch": 0.11429007206680479, "grad_norm": 0.1728515625, "learning_rate": 0.02, "loss": 1.8044, "step": 1124 }, { "epoch": 0.11439175362558308, "grad_norm": 0.173828125, "learning_rate": 0.02, "loss": 1.8177, "step": 1125 }, { "epoch": 0.11449343518436138, "grad_norm": 0.1689453125, "learning_rate": 0.02, "loss": 1.8175, "step": 1126 }, { "epoch": 0.11459511674313967, "grad_norm": 0.1806640625, "learning_rate": 0.02, "loss": 1.8023, "step": 1127 }, { "epoch": 0.11469679830191797, "grad_norm": 0.17578125, "learning_rate": 0.02, "loss": 1.7754, "step": 1128 }, { "epoch": 0.11479847986069626, "grad_norm": 0.185546875, "learning_rate": 0.02, "loss": 1.7804, "step": 1129 }, { "epoch": 0.11490016141947457, "grad_norm": 0.18359375, "learning_rate": 0.02, "loss": 1.8124, "step": 1130 }, { "epoch": 0.11500184297825286, "grad_norm": 0.173828125, "learning_rate": 0.02, "loss": 1.7606, "step": 1131 }, { "epoch": 0.11510352453703115, "grad_norm": 0.177734375, "learning_rate": 0.02, "loss": 1.8351, "step": 1132 }, { "epoch": 0.11520520609580945, "grad_norm": 0.177734375, "learning_rate": 0.02, "loss": 1.7849, "step": 1133 }, { "epoch": 0.11530688765458774, "grad_norm": 0.1689453125, "learning_rate": 0.02, "loss": 1.7514, "step": 1134 }, { "epoch": 0.11540856921336604, "grad_norm": 0.1572265625, "learning_rate": 0.02, "loss": 1.7363, "step": 1135 }, { "epoch": 0.11551025077214434, "grad_norm": 0.171875, "learning_rate": 0.02, "loss": 1.7347, "step": 1136 }, { "epoch": 0.11561193233092264, "grad_norm": 0.16015625, "learning_rate": 0.02, "loss": 1.7926, "step": 1137 }, { "epoch": 0.11571361388970093, "grad_norm": 0.173828125, "learning_rate": 0.02, "loss": 1.7948, "step": 1138 }, { "epoch": 0.11581529544847922, "grad_norm": 0.16796875, "learning_rate": 0.02, "loss": 1.7986, "step": 1139 }, { "epoch": 0.11591697700725752, "grad_norm": 0.169921875, "learning_rate": 0.02, "loss": 1.7657, "step": 1140 }, { "epoch": 0.11601865856603581, "grad_norm": 0.1669921875, "learning_rate": 0.02, "loss": 1.752, "step": 1141 }, { "epoch": 0.11612034012481412, "grad_norm": 0.1640625, "learning_rate": 0.02, "loss": 1.7653, "step": 1142 }, { "epoch": 0.11622202168359241, "grad_norm": 0.1572265625, "learning_rate": 0.02, "loss": 1.801, "step": 1143 }, { "epoch": 0.11632370324237071, "grad_norm": 0.171875, "learning_rate": 0.02, "loss": 1.8259, "step": 1144 }, { "epoch": 0.116425384801149, "grad_norm": 0.1611328125, "learning_rate": 0.02, "loss": 1.6915, "step": 1145 }, { "epoch": 0.1165270663599273, "grad_norm": 0.1611328125, "learning_rate": 0.02, "loss": 1.7973, "step": 1146 }, { "epoch": 0.11662874791870559, "grad_norm": 0.166015625, "learning_rate": 0.02, "loss": 1.7581, "step": 1147 }, { "epoch": 0.1167304294774839, "grad_norm": 0.15234375, "learning_rate": 0.02, "loss": 1.7106, "step": 1148 }, { "epoch": 0.11683211103626219, "grad_norm": 0.1640625, "learning_rate": 0.02, "loss": 1.8247, "step": 1149 }, { "epoch": 0.11693379259504048, "grad_norm": 0.1591796875, "learning_rate": 0.02, "loss": 1.7923, "step": 1150 }, { "epoch": 0.11703547415381878, "grad_norm": 0.15625, "learning_rate": 0.02, "loss": 1.7563, "step": 1151 }, { "epoch": 0.11713715571259707, "grad_norm": 0.1708984375, "learning_rate": 0.02, "loss": 1.8211, "step": 1152 }, { "epoch": 0.11723883727137537, "grad_norm": 0.1728515625, "learning_rate": 0.02, "loss": 1.7994, "step": 1153 }, { "epoch": 0.11734051883015367, "grad_norm": 0.1630859375, "learning_rate": 0.02, "loss": 1.7964, "step": 1154 }, { "epoch": 0.11744220038893197, "grad_norm": 0.1640625, "learning_rate": 0.02, "loss": 1.7692, "step": 1155 }, { "epoch": 0.11754388194771026, "grad_norm": 0.171875, "learning_rate": 0.02, "loss": 1.7474, "step": 1156 }, { "epoch": 0.11764556350648855, "grad_norm": 0.1884765625, "learning_rate": 0.02, "loss": 1.8117, "step": 1157 }, { "epoch": 0.11774724506526685, "grad_norm": 0.1748046875, "learning_rate": 0.02, "loss": 1.7315, "step": 1158 }, { "epoch": 0.11784892662404514, "grad_norm": 0.1669921875, "learning_rate": 0.02, "loss": 1.7405, "step": 1159 }, { "epoch": 0.11795060818282345, "grad_norm": 0.1708984375, "learning_rate": 0.02, "loss": 1.7342, "step": 1160 }, { "epoch": 0.11805228974160174, "grad_norm": 0.185546875, "learning_rate": 0.02, "loss": 1.8057, "step": 1161 }, { "epoch": 0.11815397130038004, "grad_norm": 0.18359375, "learning_rate": 0.02, "loss": 1.727, "step": 1162 }, { "epoch": 0.11825565285915833, "grad_norm": 0.1689453125, "learning_rate": 0.02, "loss": 1.7222, "step": 1163 }, { "epoch": 0.11835733441793662, "grad_norm": 0.177734375, "learning_rate": 0.02, "loss": 1.8001, "step": 1164 }, { "epoch": 0.11845901597671492, "grad_norm": 0.1728515625, "learning_rate": 0.02, "loss": 1.719, "step": 1165 }, { "epoch": 0.11856069753549323, "grad_norm": 0.16796875, "learning_rate": 0.02, "loss": 1.7512, "step": 1166 }, { "epoch": 0.11866237909427152, "grad_norm": 0.166015625, "learning_rate": 0.02, "loss": 1.765, "step": 1167 }, { "epoch": 0.11876406065304981, "grad_norm": 0.1591796875, "learning_rate": 0.02, "loss": 1.6882, "step": 1168 }, { "epoch": 0.11886574221182811, "grad_norm": 0.158203125, "learning_rate": 0.02, "loss": 1.8208, "step": 1169 }, { "epoch": 0.1189674237706064, "grad_norm": 0.162109375, "learning_rate": 0.02, "loss": 1.8008, "step": 1170 }, { "epoch": 0.1190691053293847, "grad_norm": 0.15234375, "learning_rate": 0.02, "loss": 1.7341, "step": 1171 }, { "epoch": 0.119170786888163, "grad_norm": 0.1533203125, "learning_rate": 0.02, "loss": 1.802, "step": 1172 }, { "epoch": 0.1192724684469413, "grad_norm": 0.1630859375, "learning_rate": 0.02, "loss": 1.74, "step": 1173 }, { "epoch": 0.11937415000571959, "grad_norm": 0.15234375, "learning_rate": 0.02, "loss": 1.8017, "step": 1174 }, { "epoch": 0.11947583156449788, "grad_norm": 0.16015625, "learning_rate": 0.02, "loss": 1.7054, "step": 1175 }, { "epoch": 0.11957751312327618, "grad_norm": 0.16015625, "learning_rate": 0.02, "loss": 1.765, "step": 1176 }, { "epoch": 0.11967919468205447, "grad_norm": 0.1640625, "learning_rate": 0.02, "loss": 1.7601, "step": 1177 }, { "epoch": 0.11978087624083278, "grad_norm": 0.1611328125, "learning_rate": 0.02, "loss": 1.7667, "step": 1178 }, { "epoch": 0.11988255779961107, "grad_norm": 0.1708984375, "learning_rate": 0.02, "loss": 1.7817, "step": 1179 }, { "epoch": 0.11998423935838937, "grad_norm": 0.1630859375, "learning_rate": 0.02, "loss": 1.7647, "step": 1180 }, { "epoch": 0.12008592091716766, "grad_norm": 0.1630859375, "learning_rate": 0.02, "loss": 1.7701, "step": 1181 }, { "epoch": 0.12018760247594595, "grad_norm": 0.1669921875, "learning_rate": 0.02, "loss": 1.7251, "step": 1182 }, { "epoch": 0.12028928403472425, "grad_norm": 0.154296875, "learning_rate": 0.02, "loss": 1.7622, "step": 1183 }, { "epoch": 0.12039096559350256, "grad_norm": 0.1611328125, "learning_rate": 0.02, "loss": 1.8365, "step": 1184 }, { "epoch": 0.12049264715228085, "grad_norm": 0.1591796875, "learning_rate": 0.02, "loss": 1.812, "step": 1185 }, { "epoch": 0.12059432871105914, "grad_norm": 0.146484375, "learning_rate": 0.02, "loss": 1.7464, "step": 1186 }, { "epoch": 0.12069601026983744, "grad_norm": 0.142578125, "learning_rate": 0.02, "loss": 1.7302, "step": 1187 }, { "epoch": 0.12079769182861573, "grad_norm": 0.1416015625, "learning_rate": 0.02, "loss": 1.733, "step": 1188 }, { "epoch": 0.12089937338739402, "grad_norm": 0.14453125, "learning_rate": 0.02, "loss": 1.7314, "step": 1189 }, { "epoch": 0.12100105494617233, "grad_norm": 0.1484375, "learning_rate": 0.02, "loss": 1.6839, "step": 1190 }, { "epoch": 0.12110273650495063, "grad_norm": 0.1484375, "learning_rate": 0.02, "loss": 1.6929, "step": 1191 }, { "epoch": 0.12120441806372892, "grad_norm": 0.1513671875, "learning_rate": 0.02, "loss": 1.7416, "step": 1192 }, { "epoch": 0.12130609962250721, "grad_norm": 0.146484375, "learning_rate": 0.02, "loss": 1.6873, "step": 1193 }, { "epoch": 0.1214077811812855, "grad_norm": 0.1572265625, "learning_rate": 0.02, "loss": 1.7268, "step": 1194 }, { "epoch": 0.1215094627400638, "grad_norm": 0.162109375, "learning_rate": 0.02, "loss": 1.7338, "step": 1195 }, { "epoch": 0.1216111442988421, "grad_norm": 0.171875, "learning_rate": 0.02, "loss": 1.836, "step": 1196 }, { "epoch": 0.1217128258576204, "grad_norm": 0.1640625, "learning_rate": 0.02, "loss": 1.7663, "step": 1197 }, { "epoch": 0.1218145074163987, "grad_norm": 0.1630859375, "learning_rate": 0.02, "loss": 1.807, "step": 1198 }, { "epoch": 0.12191618897517699, "grad_norm": 0.1640625, "learning_rate": 0.02, "loss": 1.7463, "step": 1199 }, { "epoch": 0.12201787053395528, "grad_norm": 0.1708984375, "learning_rate": 0.02, "loss": 1.8054, "step": 1200 }, { "epoch": 0.12211955209273358, "grad_norm": 0.166015625, "learning_rate": 0.02, "loss": 1.7266, "step": 1201 }, { "epoch": 0.12222123365151187, "grad_norm": 0.1669921875, "learning_rate": 0.02, "loss": 1.7968, "step": 1202 }, { "epoch": 0.12232291521029018, "grad_norm": 0.1689453125, "learning_rate": 0.02, "loss": 1.7131, "step": 1203 }, { "epoch": 0.12242459676906847, "grad_norm": 0.1611328125, "learning_rate": 0.02, "loss": 1.8452, "step": 1204 }, { "epoch": 0.12252627832784677, "grad_norm": 0.1669921875, "learning_rate": 0.02, "loss": 1.8087, "step": 1205 }, { "epoch": 0.12262795988662506, "grad_norm": 0.1640625, "learning_rate": 0.02, "loss": 1.7691, "step": 1206 }, { "epoch": 0.12272964144540335, "grad_norm": 0.162109375, "learning_rate": 0.02, "loss": 1.7694, "step": 1207 }, { "epoch": 0.12283132300418165, "grad_norm": 0.16015625, "learning_rate": 0.02, "loss": 1.7805, "step": 1208 }, { "epoch": 0.12293300456295995, "grad_norm": 0.1630859375, "learning_rate": 0.02, "loss": 1.7803, "step": 1209 }, { "epoch": 0.12303468612173825, "grad_norm": 0.1591796875, "learning_rate": 0.02, "loss": 1.7658, "step": 1210 }, { "epoch": 0.12313636768051654, "grad_norm": 0.1650390625, "learning_rate": 0.02, "loss": 1.7674, "step": 1211 }, { "epoch": 0.12323804923929484, "grad_norm": 0.1650390625, "learning_rate": 0.02, "loss": 1.7158, "step": 1212 }, { "epoch": 0.12333973079807313, "grad_norm": 0.166015625, "learning_rate": 0.02, "loss": 1.7606, "step": 1213 }, { "epoch": 0.12344141235685142, "grad_norm": 0.1640625, "learning_rate": 0.02, "loss": 1.7539, "step": 1214 }, { "epoch": 0.12354309391562973, "grad_norm": 0.1611328125, "learning_rate": 0.02, "loss": 1.7732, "step": 1215 }, { "epoch": 0.12364477547440803, "grad_norm": 0.1650390625, "learning_rate": 0.02, "loss": 1.7843, "step": 1216 }, { "epoch": 0.12374645703318632, "grad_norm": 0.169921875, "learning_rate": 0.02, "loss": 1.8134, "step": 1217 }, { "epoch": 0.12384813859196461, "grad_norm": 0.1640625, "learning_rate": 0.02, "loss": 1.7009, "step": 1218 }, { "epoch": 0.1239498201507429, "grad_norm": 0.1748046875, "learning_rate": 0.02, "loss": 1.8168, "step": 1219 }, { "epoch": 0.1240515017095212, "grad_norm": 0.171875, "learning_rate": 0.02, "loss": 1.7382, "step": 1220 }, { "epoch": 0.12415318326829951, "grad_norm": 0.1650390625, "learning_rate": 0.02, "loss": 1.7442, "step": 1221 }, { "epoch": 0.1242548648270778, "grad_norm": 0.177734375, "learning_rate": 0.02, "loss": 1.8461, "step": 1222 }, { "epoch": 0.1243565463858561, "grad_norm": 0.171875, "learning_rate": 0.02, "loss": 1.7661, "step": 1223 }, { "epoch": 0.12445822794463439, "grad_norm": 0.1708984375, "learning_rate": 0.02, "loss": 1.762, "step": 1224 }, { "epoch": 0.12455990950341268, "grad_norm": 0.1650390625, "learning_rate": 0.02, "loss": 1.7302, "step": 1225 }, { "epoch": 0.12466159106219098, "grad_norm": 0.169921875, "learning_rate": 0.02, "loss": 1.7769, "step": 1226 }, { "epoch": 0.12476327262096928, "grad_norm": 0.1708984375, "learning_rate": 0.02, "loss": 1.7469, "step": 1227 }, { "epoch": 0.12486495417974758, "grad_norm": 0.1640625, "learning_rate": 0.02, "loss": 1.7486, "step": 1228 }, { "epoch": 0.12496663573852587, "grad_norm": 0.1630859375, "learning_rate": 0.02, "loss": 1.7501, "step": 1229 }, { "epoch": 0.12506831729730417, "grad_norm": 0.1572265625, "learning_rate": 0.02, "loss": 1.7341, "step": 1230 }, { "epoch": 0.12516999885608246, "grad_norm": 0.15625, "learning_rate": 0.02, "loss": 1.7453, "step": 1231 }, { "epoch": 0.12527168041486075, "grad_norm": 0.1640625, "learning_rate": 0.02, "loss": 1.7882, "step": 1232 }, { "epoch": 0.12537336197363905, "grad_norm": 0.16015625, "learning_rate": 0.02, "loss": 1.7361, "step": 1233 }, { "epoch": 0.12547504353241734, "grad_norm": 0.1611328125, "learning_rate": 0.02, "loss": 1.7391, "step": 1234 }, { "epoch": 0.12557672509119563, "grad_norm": 0.16015625, "learning_rate": 0.02, "loss": 1.7482, "step": 1235 }, { "epoch": 0.12567840664997396, "grad_norm": 0.1650390625, "learning_rate": 0.02, "loss": 1.7512, "step": 1236 }, { "epoch": 0.12578008820875225, "grad_norm": 0.16015625, "learning_rate": 0.02, "loss": 1.7942, "step": 1237 }, { "epoch": 0.12588176976753054, "grad_norm": 0.1787109375, "learning_rate": 0.02, "loss": 1.8066, "step": 1238 }, { "epoch": 0.12598345132630884, "grad_norm": 0.1708984375, "learning_rate": 0.02, "loss": 1.7552, "step": 1239 }, { "epoch": 0.12608513288508713, "grad_norm": 0.1650390625, "learning_rate": 0.02, "loss": 1.6601, "step": 1240 }, { "epoch": 0.12618681444386542, "grad_norm": 0.1591796875, "learning_rate": 0.02, "loss": 1.8199, "step": 1241 }, { "epoch": 0.12628849600264372, "grad_norm": 0.171875, "learning_rate": 0.02, "loss": 1.8033, "step": 1242 }, { "epoch": 0.126390177561422, "grad_norm": 0.154296875, "learning_rate": 0.02, "loss": 1.708, "step": 1243 }, { "epoch": 0.1264918591202003, "grad_norm": 0.166015625, "learning_rate": 0.02, "loss": 1.7213, "step": 1244 }, { "epoch": 0.1265935406789786, "grad_norm": 0.1669921875, "learning_rate": 0.02, "loss": 1.7336, "step": 1245 }, { "epoch": 0.1266952222377569, "grad_norm": 0.1630859375, "learning_rate": 0.02, "loss": 1.7938, "step": 1246 }, { "epoch": 0.1267969037965352, "grad_norm": 0.1611328125, "learning_rate": 0.02, "loss": 1.7526, "step": 1247 }, { "epoch": 0.1268985853553135, "grad_norm": 0.16015625, "learning_rate": 0.02, "loss": 1.6914, "step": 1248 }, { "epoch": 0.1270002669140918, "grad_norm": 0.169921875, "learning_rate": 0.02, "loss": 1.7773, "step": 1249 }, { "epoch": 0.1271019484728701, "grad_norm": 0.1767578125, "learning_rate": 0.02, "loss": 1.7379, "step": 1250 }, { "epoch": 0.1272036300316484, "grad_norm": 0.16796875, "learning_rate": 0.02, "loss": 1.756, "step": 1251 }, { "epoch": 0.12730531159042668, "grad_norm": 0.1669921875, "learning_rate": 0.02, "loss": 1.7439, "step": 1252 }, { "epoch": 0.12740699314920498, "grad_norm": 0.162109375, "learning_rate": 0.02, "loss": 1.7325, "step": 1253 }, { "epoch": 0.12750867470798327, "grad_norm": 0.1708984375, "learning_rate": 0.02, "loss": 1.8304, "step": 1254 }, { "epoch": 0.12761035626676157, "grad_norm": 0.1591796875, "learning_rate": 0.02, "loss": 1.8108, "step": 1255 }, { "epoch": 0.12771203782553986, "grad_norm": 0.1640625, "learning_rate": 0.02, "loss": 1.7706, "step": 1256 }, { "epoch": 0.12781371938431815, "grad_norm": 0.1845703125, "learning_rate": 0.02, "loss": 1.7721, "step": 1257 }, { "epoch": 0.12791540094309645, "grad_norm": 0.1513671875, "learning_rate": 0.02, "loss": 1.716, "step": 1258 }, { "epoch": 0.12801708250187474, "grad_norm": 0.158203125, "learning_rate": 0.02, "loss": 1.8283, "step": 1259 }, { "epoch": 0.12811876406065306, "grad_norm": 0.171875, "learning_rate": 0.02, "loss": 1.7438, "step": 1260 }, { "epoch": 0.12822044561943136, "grad_norm": 0.162109375, "learning_rate": 0.02, "loss": 1.7609, "step": 1261 }, { "epoch": 0.12832212717820965, "grad_norm": 0.1708984375, "learning_rate": 0.02, "loss": 1.7445, "step": 1262 }, { "epoch": 0.12842380873698794, "grad_norm": 0.171875, "learning_rate": 0.02, "loss": 1.7293, "step": 1263 }, { "epoch": 0.12852549029576624, "grad_norm": 0.1796875, "learning_rate": 0.02, "loss": 1.7813, "step": 1264 }, { "epoch": 0.12862717185454453, "grad_norm": 0.1748046875, "learning_rate": 0.02, "loss": 1.7922, "step": 1265 }, { "epoch": 0.12872885341332282, "grad_norm": 0.1669921875, "learning_rate": 0.02, "loss": 1.7412, "step": 1266 }, { "epoch": 0.12883053497210112, "grad_norm": 0.1533203125, "learning_rate": 0.02, "loss": 1.7378, "step": 1267 }, { "epoch": 0.1289322165308794, "grad_norm": 0.1708984375, "learning_rate": 0.02, "loss": 1.7551, "step": 1268 }, { "epoch": 0.1290338980896577, "grad_norm": 0.1728515625, "learning_rate": 0.02, "loss": 1.779, "step": 1269 }, { "epoch": 0.129135579648436, "grad_norm": 0.17578125, "learning_rate": 0.02, "loss": 1.7811, "step": 1270 }, { "epoch": 0.1292372612072143, "grad_norm": 0.1640625, "learning_rate": 0.02, "loss": 1.7487, "step": 1271 }, { "epoch": 0.12933894276599261, "grad_norm": 0.1669921875, "learning_rate": 0.02, "loss": 1.6837, "step": 1272 }, { "epoch": 0.1294406243247709, "grad_norm": 0.1767578125, "learning_rate": 0.02, "loss": 1.7726, "step": 1273 }, { "epoch": 0.1295423058835492, "grad_norm": 0.1787109375, "learning_rate": 0.02, "loss": 1.7636, "step": 1274 }, { "epoch": 0.1296439874423275, "grad_norm": 0.1748046875, "learning_rate": 0.02, "loss": 1.7639, "step": 1275 }, { "epoch": 0.1297456690011058, "grad_norm": 0.166015625, "learning_rate": 0.02, "loss": 1.817, "step": 1276 }, { "epoch": 0.12984735055988408, "grad_norm": 0.1630859375, "learning_rate": 0.02, "loss": 1.763, "step": 1277 }, { "epoch": 0.12994903211866238, "grad_norm": 0.1611328125, "learning_rate": 0.02, "loss": 1.7636, "step": 1278 }, { "epoch": 0.13005071367744067, "grad_norm": 0.16015625, "learning_rate": 0.02, "loss": 1.7346, "step": 1279 }, { "epoch": 0.13015239523621897, "grad_norm": 0.154296875, "learning_rate": 0.02, "loss": 1.7621, "step": 1280 }, { "epoch": 0.13025407679499726, "grad_norm": 0.1611328125, "learning_rate": 0.02, "loss": 1.7249, "step": 1281 }, { "epoch": 0.13035575835377555, "grad_norm": 0.1552734375, "learning_rate": 0.02, "loss": 1.7408, "step": 1282 }, { "epoch": 0.13045743991255385, "grad_norm": 0.154296875, "learning_rate": 0.02, "loss": 1.7338, "step": 1283 }, { "epoch": 0.13055912147133217, "grad_norm": 0.1552734375, "learning_rate": 0.02, "loss": 1.7501, "step": 1284 }, { "epoch": 0.13066080303011046, "grad_norm": 0.1572265625, "learning_rate": 0.02, "loss": 1.7303, "step": 1285 }, { "epoch": 0.13076248458888876, "grad_norm": 0.1533203125, "learning_rate": 0.02, "loss": 1.7866, "step": 1286 }, { "epoch": 0.13086416614766705, "grad_norm": 0.1650390625, "learning_rate": 0.02, "loss": 1.7747, "step": 1287 }, { "epoch": 0.13096584770644534, "grad_norm": 0.15625, "learning_rate": 0.02, "loss": 1.8164, "step": 1288 }, { "epoch": 0.13106752926522364, "grad_norm": 0.1591796875, "learning_rate": 0.02, "loss": 1.7105, "step": 1289 }, { "epoch": 0.13116921082400193, "grad_norm": 0.1533203125, "learning_rate": 0.02, "loss": 1.7104, "step": 1290 }, { "epoch": 0.13127089238278022, "grad_norm": 0.154296875, "learning_rate": 0.02, "loss": 1.7551, "step": 1291 }, { "epoch": 0.13137257394155852, "grad_norm": 0.1494140625, "learning_rate": 0.02, "loss": 1.8149, "step": 1292 }, { "epoch": 0.1314742555003368, "grad_norm": 0.1591796875, "learning_rate": 0.02, "loss": 1.7947, "step": 1293 }, { "epoch": 0.1315759370591151, "grad_norm": 0.1591796875, "learning_rate": 0.02, "loss": 1.8442, "step": 1294 }, { "epoch": 0.1316776186178934, "grad_norm": 0.154296875, "learning_rate": 0.02, "loss": 1.7244, "step": 1295 }, { "epoch": 0.13177930017667172, "grad_norm": 0.16015625, "learning_rate": 0.02, "loss": 1.7775, "step": 1296 }, { "epoch": 0.13188098173545001, "grad_norm": 0.1611328125, "learning_rate": 0.02, "loss": 1.7756, "step": 1297 }, { "epoch": 0.1319826632942283, "grad_norm": 0.1650390625, "learning_rate": 0.02, "loss": 1.7467, "step": 1298 }, { "epoch": 0.1320843448530066, "grad_norm": 0.1611328125, "learning_rate": 0.02, "loss": 1.7429, "step": 1299 }, { "epoch": 0.1321860264117849, "grad_norm": 0.1650390625, "learning_rate": 0.02, "loss": 1.7654, "step": 1300 }, { "epoch": 0.1322877079705632, "grad_norm": 0.15234375, "learning_rate": 0.02, "loss": 1.7014, "step": 1301 }, { "epoch": 0.13238938952934148, "grad_norm": 0.1650390625, "learning_rate": 0.02, "loss": 1.7339, "step": 1302 }, { "epoch": 0.13249107108811978, "grad_norm": 0.1572265625, "learning_rate": 0.02, "loss": 1.7738, "step": 1303 }, { "epoch": 0.13259275264689807, "grad_norm": 0.166015625, "learning_rate": 0.02, "loss": 1.7056, "step": 1304 }, { "epoch": 0.13269443420567636, "grad_norm": 0.162109375, "learning_rate": 0.02, "loss": 1.7472, "step": 1305 }, { "epoch": 0.13279611576445466, "grad_norm": 0.1748046875, "learning_rate": 0.02, "loss": 1.7511, "step": 1306 }, { "epoch": 0.13289779732323295, "grad_norm": 0.1806640625, "learning_rate": 0.02, "loss": 1.8341, "step": 1307 }, { "epoch": 0.13299947888201127, "grad_norm": 0.17578125, "learning_rate": 0.02, "loss": 1.7422, "step": 1308 }, { "epoch": 0.13310116044078957, "grad_norm": 0.1552734375, "learning_rate": 0.02, "loss": 1.745, "step": 1309 }, { "epoch": 0.13320284199956786, "grad_norm": 0.1689453125, "learning_rate": 0.02, "loss": 1.7862, "step": 1310 }, { "epoch": 0.13330452355834616, "grad_norm": 0.1728515625, "learning_rate": 0.02, "loss": 1.7328, "step": 1311 }, { "epoch": 0.13340620511712445, "grad_norm": 0.158203125, "learning_rate": 0.02, "loss": 1.7351, "step": 1312 }, { "epoch": 0.13350788667590274, "grad_norm": 0.169921875, "learning_rate": 0.02, "loss": 1.7622, "step": 1313 }, { "epoch": 0.13360956823468104, "grad_norm": 0.162109375, "learning_rate": 0.02, "loss": 1.7401, "step": 1314 }, { "epoch": 0.13371124979345933, "grad_norm": 0.1552734375, "learning_rate": 0.02, "loss": 1.7696, "step": 1315 }, { "epoch": 0.13381293135223762, "grad_norm": 0.1611328125, "learning_rate": 0.02, "loss": 1.7736, "step": 1316 }, { "epoch": 0.13391461291101592, "grad_norm": 0.1728515625, "learning_rate": 0.02, "loss": 1.7818, "step": 1317 }, { "epoch": 0.1340162944697942, "grad_norm": 0.158203125, "learning_rate": 0.02, "loss": 1.7763, "step": 1318 }, { "epoch": 0.1341179760285725, "grad_norm": 0.1474609375, "learning_rate": 0.02, "loss": 1.7045, "step": 1319 }, { "epoch": 0.13421965758735083, "grad_norm": 0.1474609375, "learning_rate": 0.02, "loss": 1.7131, "step": 1320 }, { "epoch": 0.13432133914612912, "grad_norm": 0.162109375, "learning_rate": 0.02, "loss": 1.7626, "step": 1321 }, { "epoch": 0.13442302070490741, "grad_norm": 0.1689453125, "learning_rate": 0.02, "loss": 1.7848, "step": 1322 }, { "epoch": 0.1345247022636857, "grad_norm": 0.1591796875, "learning_rate": 0.02, "loss": 1.7532, "step": 1323 }, { "epoch": 0.134626383822464, "grad_norm": 0.1630859375, "learning_rate": 0.02, "loss": 1.7635, "step": 1324 }, { "epoch": 0.1347280653812423, "grad_norm": 0.1689453125, "learning_rate": 0.02, "loss": 1.7558, "step": 1325 }, { "epoch": 0.1348297469400206, "grad_norm": 0.166015625, "learning_rate": 0.02, "loss": 1.8283, "step": 1326 }, { "epoch": 0.13493142849879888, "grad_norm": 0.169921875, "learning_rate": 0.02, "loss": 1.7598, "step": 1327 }, { "epoch": 0.13503311005757718, "grad_norm": 0.1572265625, "learning_rate": 0.02, "loss": 1.7181, "step": 1328 }, { "epoch": 0.13513479161635547, "grad_norm": 0.16796875, "learning_rate": 0.02, "loss": 1.7498, "step": 1329 }, { "epoch": 0.13523647317513376, "grad_norm": 0.166015625, "learning_rate": 0.02, "loss": 1.7518, "step": 1330 }, { "epoch": 0.13533815473391206, "grad_norm": 0.15234375, "learning_rate": 0.02, "loss": 1.7221, "step": 1331 }, { "epoch": 0.13543983629269038, "grad_norm": 0.158203125, "learning_rate": 0.02, "loss": 1.7365, "step": 1332 }, { "epoch": 0.13554151785146867, "grad_norm": 0.1513671875, "learning_rate": 0.02, "loss": 1.7975, "step": 1333 }, { "epoch": 0.13564319941024697, "grad_norm": 0.1416015625, "learning_rate": 0.02, "loss": 1.7172, "step": 1334 }, { "epoch": 0.13574488096902526, "grad_norm": 0.154296875, "learning_rate": 0.02, "loss": 1.7796, "step": 1335 }, { "epoch": 0.13584656252780355, "grad_norm": 0.1494140625, "learning_rate": 0.02, "loss": 1.753, "step": 1336 }, { "epoch": 0.13594824408658185, "grad_norm": 0.1533203125, "learning_rate": 0.02, "loss": 1.7518, "step": 1337 }, { "epoch": 0.13604992564536014, "grad_norm": 0.1513671875, "learning_rate": 0.02, "loss": 1.7312, "step": 1338 }, { "epoch": 0.13615160720413844, "grad_norm": 0.154296875, "learning_rate": 0.02, "loss": 1.7118, "step": 1339 }, { "epoch": 0.13625328876291673, "grad_norm": 0.1494140625, "learning_rate": 0.02, "loss": 1.7147, "step": 1340 }, { "epoch": 0.13635497032169502, "grad_norm": 0.166015625, "learning_rate": 0.02, "loss": 1.7535, "step": 1341 }, { "epoch": 0.13645665188047332, "grad_norm": 0.1640625, "learning_rate": 0.02, "loss": 1.7651, "step": 1342 }, { "epoch": 0.1365583334392516, "grad_norm": 0.1552734375, "learning_rate": 0.02, "loss": 1.7713, "step": 1343 }, { "epoch": 0.13666001499802993, "grad_norm": 0.1650390625, "learning_rate": 0.02, "loss": 1.7759, "step": 1344 }, { "epoch": 0.13676169655680823, "grad_norm": 0.1611328125, "learning_rate": 0.02, "loss": 1.7579, "step": 1345 }, { "epoch": 0.13686337811558652, "grad_norm": 0.15625, "learning_rate": 0.02, "loss": 1.7188, "step": 1346 }, { "epoch": 0.13696505967436481, "grad_norm": 0.1611328125, "learning_rate": 0.02, "loss": 1.7917, "step": 1347 }, { "epoch": 0.1370667412331431, "grad_norm": 0.1630859375, "learning_rate": 0.02, "loss": 1.7419, "step": 1348 }, { "epoch": 0.1371684227919214, "grad_norm": 0.1669921875, "learning_rate": 0.02, "loss": 1.7777, "step": 1349 }, { "epoch": 0.1372701043506997, "grad_norm": 0.166015625, "learning_rate": 0.02, "loss": 1.7106, "step": 1350 }, { "epoch": 0.137371785909478, "grad_norm": 0.1572265625, "learning_rate": 0.02, "loss": 1.7276, "step": 1351 }, { "epoch": 0.13747346746825628, "grad_norm": 0.17578125, "learning_rate": 0.02, "loss": 1.6742, "step": 1352 }, { "epoch": 0.13757514902703458, "grad_norm": 0.1640625, "learning_rate": 0.02, "loss": 1.7615, "step": 1353 }, { "epoch": 0.13767683058581287, "grad_norm": 0.16015625, "learning_rate": 0.02, "loss": 1.7383, "step": 1354 }, { "epoch": 0.13777851214459116, "grad_norm": 0.1572265625, "learning_rate": 0.02, "loss": 1.6922, "step": 1355 }, { "epoch": 0.13788019370336949, "grad_norm": 0.16015625, "learning_rate": 0.02, "loss": 1.7494, "step": 1356 }, { "epoch": 0.13798187526214778, "grad_norm": 0.1572265625, "learning_rate": 0.02, "loss": 1.7874, "step": 1357 }, { "epoch": 0.13808355682092607, "grad_norm": 0.158203125, "learning_rate": 0.02, "loss": 1.7756, "step": 1358 }, { "epoch": 0.13818523837970437, "grad_norm": 0.154296875, "learning_rate": 0.02, "loss": 1.7477, "step": 1359 }, { "epoch": 0.13828691993848266, "grad_norm": 0.158203125, "learning_rate": 0.02, "loss": 1.7848, "step": 1360 }, { "epoch": 0.13838860149726095, "grad_norm": 0.1552734375, "learning_rate": 0.02, "loss": 1.782, "step": 1361 }, { "epoch": 0.13849028305603925, "grad_norm": 0.15625, "learning_rate": 0.02, "loss": 1.8124, "step": 1362 }, { "epoch": 0.13859196461481754, "grad_norm": 0.1533203125, "learning_rate": 0.02, "loss": 1.737, "step": 1363 }, { "epoch": 0.13869364617359584, "grad_norm": 0.154296875, "learning_rate": 0.02, "loss": 1.7795, "step": 1364 }, { "epoch": 0.13879532773237413, "grad_norm": 0.1611328125, "learning_rate": 0.02, "loss": 1.801, "step": 1365 }, { "epoch": 0.13889700929115242, "grad_norm": 0.1484375, "learning_rate": 0.02, "loss": 1.7043, "step": 1366 }, { "epoch": 0.13899869084993072, "grad_norm": 0.1533203125, "learning_rate": 0.02, "loss": 1.8057, "step": 1367 }, { "epoch": 0.13910037240870904, "grad_norm": 0.146484375, "learning_rate": 0.02, "loss": 1.7576, "step": 1368 }, { "epoch": 0.13920205396748733, "grad_norm": 0.16015625, "learning_rate": 0.02, "loss": 1.7519, "step": 1369 }, { "epoch": 0.13930373552626563, "grad_norm": 0.1611328125, "learning_rate": 0.02, "loss": 1.7847, "step": 1370 }, { "epoch": 0.13940541708504392, "grad_norm": 0.1630859375, "learning_rate": 0.02, "loss": 1.7554, "step": 1371 }, { "epoch": 0.1395070986438222, "grad_norm": 0.16015625, "learning_rate": 0.02, "loss": 1.7547, "step": 1372 }, { "epoch": 0.1396087802026005, "grad_norm": 0.1767578125, "learning_rate": 0.02, "loss": 1.8331, "step": 1373 }, { "epoch": 0.1397104617613788, "grad_norm": 0.1689453125, "learning_rate": 0.02, "loss": 1.7868, "step": 1374 }, { "epoch": 0.1398121433201571, "grad_norm": 0.162109375, "learning_rate": 0.02, "loss": 1.7622, "step": 1375 }, { "epoch": 0.1399138248789354, "grad_norm": 0.16015625, "learning_rate": 0.02, "loss": 1.7615, "step": 1376 }, { "epoch": 0.14001550643771368, "grad_norm": 0.1630859375, "learning_rate": 0.02, "loss": 1.768, "step": 1377 }, { "epoch": 0.14011718799649198, "grad_norm": 0.16015625, "learning_rate": 0.02, "loss": 1.7781, "step": 1378 }, { "epoch": 0.14021886955527027, "grad_norm": 0.15625, "learning_rate": 0.02, "loss": 1.6745, "step": 1379 }, { "epoch": 0.1403205511140486, "grad_norm": 0.1611328125, "learning_rate": 0.02, "loss": 1.7431, "step": 1380 }, { "epoch": 0.14042223267282689, "grad_norm": 0.1552734375, "learning_rate": 0.02, "loss": 1.7573, "step": 1381 }, { "epoch": 0.14052391423160518, "grad_norm": 0.16796875, "learning_rate": 0.02, "loss": 1.7759, "step": 1382 }, { "epoch": 0.14062559579038347, "grad_norm": 0.16796875, "learning_rate": 0.02, "loss": 1.7539, "step": 1383 }, { "epoch": 0.14072727734916177, "grad_norm": 0.162109375, "learning_rate": 0.02, "loss": 1.7877, "step": 1384 }, { "epoch": 0.14082895890794006, "grad_norm": 0.1533203125, "learning_rate": 0.02, "loss": 1.6706, "step": 1385 }, { "epoch": 0.14093064046671835, "grad_norm": 0.1640625, "learning_rate": 0.02, "loss": 1.7903, "step": 1386 }, { "epoch": 0.14103232202549665, "grad_norm": 0.16015625, "learning_rate": 0.02, "loss": 1.7173, "step": 1387 }, { "epoch": 0.14113400358427494, "grad_norm": 0.15234375, "learning_rate": 0.02, "loss": 1.7371, "step": 1388 }, { "epoch": 0.14123568514305324, "grad_norm": 0.154296875, "learning_rate": 0.02, "loss": 1.7518, "step": 1389 }, { "epoch": 0.14133736670183153, "grad_norm": 0.16015625, "learning_rate": 0.02, "loss": 1.718, "step": 1390 }, { "epoch": 0.14143904826060982, "grad_norm": 0.1484375, "learning_rate": 0.02, "loss": 1.7683, "step": 1391 }, { "epoch": 0.14154072981938814, "grad_norm": 0.162109375, "learning_rate": 0.02, "loss": 1.7493, "step": 1392 }, { "epoch": 0.14164241137816644, "grad_norm": 0.1513671875, "learning_rate": 0.02, "loss": 1.7558, "step": 1393 }, { "epoch": 0.14174409293694473, "grad_norm": 0.15625, "learning_rate": 0.02, "loss": 1.7258, "step": 1394 }, { "epoch": 0.14184577449572303, "grad_norm": 0.158203125, "learning_rate": 0.02, "loss": 1.7299, "step": 1395 }, { "epoch": 0.14194745605450132, "grad_norm": 0.15625, "learning_rate": 0.02, "loss": 1.7058, "step": 1396 }, { "epoch": 0.1420491376132796, "grad_norm": 0.162109375, "learning_rate": 0.02, "loss": 1.7874, "step": 1397 }, { "epoch": 0.1421508191720579, "grad_norm": 0.1494140625, "learning_rate": 0.02, "loss": 1.7452, "step": 1398 }, { "epoch": 0.1422525007308362, "grad_norm": 0.1484375, "learning_rate": 0.02, "loss": 1.7673, "step": 1399 }, { "epoch": 0.1423541822896145, "grad_norm": 0.146484375, "learning_rate": 0.02, "loss": 1.7199, "step": 1400 }, { "epoch": 0.1424558638483928, "grad_norm": 0.15234375, "learning_rate": 0.02, "loss": 1.7077, "step": 1401 }, { "epoch": 0.14255754540717108, "grad_norm": 0.1494140625, "learning_rate": 0.02, "loss": 1.7797, "step": 1402 }, { "epoch": 0.14265922696594938, "grad_norm": 0.1435546875, "learning_rate": 0.02, "loss": 1.7858, "step": 1403 }, { "epoch": 0.1427609085247277, "grad_norm": 0.13671875, "learning_rate": 0.02, "loss": 1.7234, "step": 1404 }, { "epoch": 0.142862590083506, "grad_norm": 0.1396484375, "learning_rate": 0.02, "loss": 1.736, "step": 1405 }, { "epoch": 0.14296427164228429, "grad_norm": 0.162109375, "learning_rate": 0.02, "loss": 1.7419, "step": 1406 }, { "epoch": 0.14306595320106258, "grad_norm": 0.1533203125, "learning_rate": 0.02, "loss": 1.7145, "step": 1407 }, { "epoch": 0.14316763475984087, "grad_norm": 0.1533203125, "learning_rate": 0.02, "loss": 1.7836, "step": 1408 }, { "epoch": 0.14326931631861917, "grad_norm": 0.150390625, "learning_rate": 0.02, "loss": 1.7112, "step": 1409 }, { "epoch": 0.14337099787739746, "grad_norm": 0.146484375, "learning_rate": 0.02, "loss": 1.7446, "step": 1410 }, { "epoch": 0.14347267943617575, "grad_norm": 0.1474609375, "learning_rate": 0.02, "loss": 1.7952, "step": 1411 }, { "epoch": 0.14357436099495405, "grad_norm": 0.1484375, "learning_rate": 0.02, "loss": 1.7623, "step": 1412 }, { "epoch": 0.14367604255373234, "grad_norm": 0.1533203125, "learning_rate": 0.02, "loss": 1.7319, "step": 1413 }, { "epoch": 0.14377772411251064, "grad_norm": 0.158203125, "learning_rate": 0.02, "loss": 1.793, "step": 1414 }, { "epoch": 0.14387940567128893, "grad_norm": 0.1650390625, "learning_rate": 0.02, "loss": 1.7762, "step": 1415 }, { "epoch": 0.14398108723006725, "grad_norm": 0.158203125, "learning_rate": 0.02, "loss": 1.8109, "step": 1416 }, { "epoch": 0.14408276878884554, "grad_norm": 0.1669921875, "learning_rate": 0.02, "loss": 1.7015, "step": 1417 }, { "epoch": 0.14418445034762384, "grad_norm": 0.1630859375, "learning_rate": 0.02, "loss": 1.7708, "step": 1418 }, { "epoch": 0.14428613190640213, "grad_norm": 0.16796875, "learning_rate": 0.02, "loss": 1.713, "step": 1419 }, { "epoch": 0.14438781346518043, "grad_norm": 0.16796875, "learning_rate": 0.02, "loss": 1.7238, "step": 1420 }, { "epoch": 0.14448949502395872, "grad_norm": 0.1806640625, "learning_rate": 0.02, "loss": 1.77, "step": 1421 }, { "epoch": 0.144591176582737, "grad_norm": 0.1875, "learning_rate": 0.02, "loss": 1.7768, "step": 1422 }, { "epoch": 0.1446928581415153, "grad_norm": 0.1728515625, "learning_rate": 0.02, "loss": 1.7715, "step": 1423 }, { "epoch": 0.1447945397002936, "grad_norm": 0.1865234375, "learning_rate": 0.02, "loss": 1.8237, "step": 1424 }, { "epoch": 0.1448962212590719, "grad_norm": 0.177734375, "learning_rate": 0.02, "loss": 1.7804, "step": 1425 }, { "epoch": 0.1449979028178502, "grad_norm": 0.169921875, "learning_rate": 0.02, "loss": 1.7198, "step": 1426 }, { "epoch": 0.14509958437662848, "grad_norm": 0.1689453125, "learning_rate": 0.02, "loss": 1.8006, "step": 1427 }, { "epoch": 0.1452012659354068, "grad_norm": 0.15625, "learning_rate": 0.02, "loss": 1.7845, "step": 1428 }, { "epoch": 0.1453029474941851, "grad_norm": 0.1611328125, "learning_rate": 0.02, "loss": 1.775, "step": 1429 }, { "epoch": 0.1454046290529634, "grad_norm": 0.154296875, "learning_rate": 0.02, "loss": 1.7629, "step": 1430 }, { "epoch": 0.14550631061174168, "grad_norm": 0.1630859375, "learning_rate": 0.02, "loss": 1.7838, "step": 1431 }, { "epoch": 0.14560799217051998, "grad_norm": 0.15234375, "learning_rate": 0.02, "loss": 1.7217, "step": 1432 }, { "epoch": 0.14570967372929827, "grad_norm": 0.1552734375, "learning_rate": 0.02, "loss": 1.8087, "step": 1433 }, { "epoch": 0.14581135528807657, "grad_norm": 0.1572265625, "learning_rate": 0.02, "loss": 1.7876, "step": 1434 }, { "epoch": 0.14591303684685486, "grad_norm": 0.1611328125, "learning_rate": 0.02, "loss": 1.8058, "step": 1435 }, { "epoch": 0.14601471840563315, "grad_norm": 0.1474609375, "learning_rate": 0.02, "loss": 1.8048, "step": 1436 }, { "epoch": 0.14611639996441145, "grad_norm": 0.158203125, "learning_rate": 0.02, "loss": 1.727, "step": 1437 }, { "epoch": 0.14621808152318974, "grad_norm": 0.1494140625, "learning_rate": 0.02, "loss": 1.8061, "step": 1438 }, { "epoch": 0.14631976308196804, "grad_norm": 0.158203125, "learning_rate": 0.02, "loss": 1.7914, "step": 1439 }, { "epoch": 0.14642144464074633, "grad_norm": 0.1494140625, "learning_rate": 0.02, "loss": 1.7213, "step": 1440 }, { "epoch": 0.14652312619952465, "grad_norm": 0.1708984375, "learning_rate": 0.02, "loss": 1.7535, "step": 1441 }, { "epoch": 0.14662480775830294, "grad_norm": 0.166015625, "learning_rate": 0.02, "loss": 1.7393, "step": 1442 }, { "epoch": 0.14672648931708124, "grad_norm": 0.1640625, "learning_rate": 0.02, "loss": 1.7575, "step": 1443 }, { "epoch": 0.14682817087585953, "grad_norm": 0.1533203125, "learning_rate": 0.02, "loss": 1.7674, "step": 1444 }, { "epoch": 0.14692985243463783, "grad_norm": 0.169921875, "learning_rate": 0.02, "loss": 1.7349, "step": 1445 }, { "epoch": 0.14703153399341612, "grad_norm": 0.1630859375, "learning_rate": 0.02, "loss": 1.707, "step": 1446 }, { "epoch": 0.1471332155521944, "grad_norm": 0.1630859375, "learning_rate": 0.02, "loss": 1.8016, "step": 1447 }, { "epoch": 0.1472348971109727, "grad_norm": 0.1640625, "learning_rate": 0.02, "loss": 1.7914, "step": 1448 }, { "epoch": 0.147336578669751, "grad_norm": 0.1611328125, "learning_rate": 0.02, "loss": 1.762, "step": 1449 }, { "epoch": 0.1474382602285293, "grad_norm": 0.166015625, "learning_rate": 0.02, "loss": 1.7791, "step": 1450 }, { "epoch": 0.1475399417873076, "grad_norm": 0.17578125, "learning_rate": 0.02, "loss": 1.847, "step": 1451 }, { "epoch": 0.14764162334608588, "grad_norm": 0.1708984375, "learning_rate": 0.02, "loss": 1.8126, "step": 1452 }, { "epoch": 0.1477433049048642, "grad_norm": 0.15625, "learning_rate": 0.02, "loss": 1.7861, "step": 1453 }, { "epoch": 0.1478449864636425, "grad_norm": 0.1591796875, "learning_rate": 0.02, "loss": 1.708, "step": 1454 }, { "epoch": 0.1479466680224208, "grad_norm": 0.15625, "learning_rate": 0.02, "loss": 1.7236, "step": 1455 }, { "epoch": 0.14804834958119908, "grad_norm": 0.1552734375, "learning_rate": 0.02, "loss": 1.6702, "step": 1456 }, { "epoch": 0.14815003113997738, "grad_norm": 0.15625, "learning_rate": 0.02, "loss": 1.835, "step": 1457 }, { "epoch": 0.14825171269875567, "grad_norm": 0.1484375, "learning_rate": 0.02, "loss": 1.7007, "step": 1458 }, { "epoch": 0.14835339425753397, "grad_norm": 0.1484375, "learning_rate": 0.02, "loss": 1.724, "step": 1459 }, { "epoch": 0.14845507581631226, "grad_norm": 0.16015625, "learning_rate": 0.02, "loss": 1.6749, "step": 1460 }, { "epoch": 0.14855675737509055, "grad_norm": 0.1484375, "learning_rate": 0.02, "loss": 1.7015, "step": 1461 }, { "epoch": 0.14865843893386885, "grad_norm": 0.1640625, "learning_rate": 0.02, "loss": 1.7596, "step": 1462 }, { "epoch": 0.14876012049264714, "grad_norm": 0.1572265625, "learning_rate": 0.02, "loss": 1.7954, "step": 1463 }, { "epoch": 0.14886180205142543, "grad_norm": 0.1552734375, "learning_rate": 0.02, "loss": 1.7359, "step": 1464 }, { "epoch": 0.14896348361020376, "grad_norm": 0.1552734375, "learning_rate": 0.02, "loss": 1.7783, "step": 1465 }, { "epoch": 0.14906516516898205, "grad_norm": 0.1640625, "learning_rate": 0.02, "loss": 1.7015, "step": 1466 }, { "epoch": 0.14916684672776034, "grad_norm": 0.169921875, "learning_rate": 0.02, "loss": 1.8073, "step": 1467 }, { "epoch": 0.14926852828653864, "grad_norm": 0.1572265625, "learning_rate": 0.02, "loss": 1.7495, "step": 1468 }, { "epoch": 0.14937020984531693, "grad_norm": 0.16015625, "learning_rate": 0.02, "loss": 1.7353, "step": 1469 }, { "epoch": 0.14947189140409523, "grad_norm": 0.1611328125, "learning_rate": 0.02, "loss": 1.8099, "step": 1470 }, { "epoch": 0.14957357296287352, "grad_norm": 0.1591796875, "learning_rate": 0.02, "loss": 1.7412, "step": 1471 }, { "epoch": 0.1496752545216518, "grad_norm": 0.1484375, "learning_rate": 0.02, "loss": 1.7316, "step": 1472 }, { "epoch": 0.1497769360804301, "grad_norm": 0.15234375, "learning_rate": 0.02, "loss": 1.7171, "step": 1473 }, { "epoch": 0.1498786176392084, "grad_norm": 0.158203125, "learning_rate": 0.02, "loss": 1.7919, "step": 1474 }, { "epoch": 0.1499802991979867, "grad_norm": 0.1572265625, "learning_rate": 0.02, "loss": 1.7501, "step": 1475 }, { "epoch": 0.150081980756765, "grad_norm": 0.15234375, "learning_rate": 0.02, "loss": 1.7415, "step": 1476 }, { "epoch": 0.13673552045362494, "grad_norm": 0.146484375, "learning_rate": 0.02, "loss": 1.7336, "step": 1477 }, { "epoch": 0.136828096973905, "grad_norm": 0.1650390625, "learning_rate": 0.02, "loss": 1.7345, "step": 1478 }, { "epoch": 0.13692067349418505, "grad_norm": 0.1767578125, "learning_rate": 0.02, "loss": 1.7464, "step": 1479 }, { "epoch": 0.13701325001446507, "grad_norm": 0.1748046875, "learning_rate": 0.02, "loss": 1.7494, "step": 1480 }, { "epoch": 0.13710582653474512, "grad_norm": 0.173828125, "learning_rate": 0.02, "loss": 1.7619, "step": 1481 }, { "epoch": 0.13719840305502518, "grad_norm": 0.1923828125, "learning_rate": 0.02, "loss": 1.7448, "step": 1482 }, { "epoch": 0.1372909795753052, "grad_norm": 0.1796875, "learning_rate": 0.02, "loss": 1.7474, "step": 1483 }, { "epoch": 0.13738355609558525, "grad_norm": 0.1787109375, "learning_rate": 0.02, "loss": 1.7293, "step": 1484 }, { "epoch": 0.1374761326158653, "grad_norm": 0.1669921875, "learning_rate": 0.02, "loss": 1.7892, "step": 1485 }, { "epoch": 0.13756870913614536, "grad_norm": 0.173828125, "learning_rate": 0.02, "loss": 1.7649, "step": 1486 }, { "epoch": 0.13766128565642538, "grad_norm": 0.166015625, "learning_rate": 0.02, "loss": 1.7535, "step": 1487 }, { "epoch": 0.13775386217670543, "grad_norm": 0.1591796875, "learning_rate": 0.02, "loss": 1.7285, "step": 1488 }, { "epoch": 0.13784643869698548, "grad_norm": 0.171875, "learning_rate": 0.02, "loss": 1.7585, "step": 1489 }, { "epoch": 0.1379390152172655, "grad_norm": 0.166015625, "learning_rate": 0.02, "loss": 1.7321, "step": 1490 }, { "epoch": 0.13803159173754556, "grad_norm": 0.15234375, "learning_rate": 0.02, "loss": 1.702, "step": 1491 }, { "epoch": 0.1381241682578256, "grad_norm": 0.162109375, "learning_rate": 0.02, "loss": 1.7497, "step": 1492 }, { "epoch": 0.13821674477810567, "grad_norm": 0.16796875, "learning_rate": 0.02, "loss": 1.6854, "step": 1493 }, { "epoch": 0.1383093212983857, "grad_norm": 0.158203125, "learning_rate": 0.02, "loss": 1.6824, "step": 1494 }, { "epoch": 0.13840189781866574, "grad_norm": 0.17578125, "learning_rate": 0.02, "loss": 1.7125, "step": 1495 }, { "epoch": 0.1384944743389458, "grad_norm": 0.16015625, "learning_rate": 0.02, "loss": 1.7885, "step": 1496 }, { "epoch": 0.13858705085922582, "grad_norm": 0.1591796875, "learning_rate": 0.02, "loss": 1.7029, "step": 1497 }, { "epoch": 0.13867962737950587, "grad_norm": 0.1552734375, "learning_rate": 0.02, "loss": 1.7193, "step": 1498 }, { "epoch": 0.13877220389978592, "grad_norm": 0.162109375, "learning_rate": 0.02, "loss": 1.7591, "step": 1499 }, { "epoch": 0.13886478042006595, "grad_norm": 0.17578125, "learning_rate": 0.02, "loss": 1.7179, "step": 1500 }, { "epoch": 0.138957356940346, "grad_norm": 0.1875, "learning_rate": 0.02, "loss": 1.7505, "step": 1501 }, { "epoch": 0.13904993346062605, "grad_norm": 0.1826171875, "learning_rate": 0.02, "loss": 1.8072, "step": 1502 }, { "epoch": 0.1391425099809061, "grad_norm": 0.17578125, "learning_rate": 0.02, "loss": 1.71, "step": 1503 }, { "epoch": 0.13923508650118613, "grad_norm": 0.185546875, "learning_rate": 0.02, "loss": 1.7634, "step": 1504 }, { "epoch": 0.13932766302146618, "grad_norm": 0.166015625, "learning_rate": 0.02, "loss": 1.7296, "step": 1505 }, { "epoch": 0.13942023954174623, "grad_norm": 0.1748046875, "learning_rate": 0.02, "loss": 1.7251, "step": 1506 }, { "epoch": 0.13951281606202626, "grad_norm": 0.16796875, "learning_rate": 0.02, "loss": 1.736, "step": 1507 }, { "epoch": 0.1396053925823063, "grad_norm": 0.171875, "learning_rate": 0.02, "loss": 1.7416, "step": 1508 }, { "epoch": 0.13969796910258636, "grad_norm": 0.173828125, "learning_rate": 0.02, "loss": 1.7921, "step": 1509 }, { "epoch": 0.1397905456228664, "grad_norm": 0.1708984375, "learning_rate": 0.02, "loss": 1.7154, "step": 1510 }, { "epoch": 0.13988312214314644, "grad_norm": 0.169921875, "learning_rate": 0.02, "loss": 1.7204, "step": 1511 }, { "epoch": 0.1399756986634265, "grad_norm": 0.16796875, "learning_rate": 0.02, "loss": 1.6484, "step": 1512 }, { "epoch": 0.14006827518370654, "grad_norm": 0.171875, "learning_rate": 0.02, "loss": 1.764, "step": 1513 }, { "epoch": 0.14016085170398657, "grad_norm": 0.197265625, "learning_rate": 0.02, "loss": 1.7804, "step": 1514 }, { "epoch": 0.14025342822426662, "grad_norm": 0.162109375, "learning_rate": 0.02, "loss": 1.7415, "step": 1515 }, { "epoch": 0.14034600474454667, "grad_norm": 0.1728515625, "learning_rate": 0.02, "loss": 1.7211, "step": 1516 }, { "epoch": 0.1404385812648267, "grad_norm": 0.1767578125, "learning_rate": 0.02, "loss": 1.7944, "step": 1517 }, { "epoch": 0.14053115778510675, "grad_norm": 0.1748046875, "learning_rate": 0.02, "loss": 1.7671, "step": 1518 }, { "epoch": 0.1406237343053868, "grad_norm": 0.1748046875, "learning_rate": 0.02, "loss": 1.7775, "step": 1519 }, { "epoch": 0.14071631082566685, "grad_norm": 0.1796875, "learning_rate": 0.02, "loss": 1.7762, "step": 1520 }, { "epoch": 0.14080888734594688, "grad_norm": 0.17578125, "learning_rate": 0.02, "loss": 1.7523, "step": 1521 }, { "epoch": 0.14090146386622693, "grad_norm": 0.16015625, "learning_rate": 0.02, "loss": 1.6953, "step": 1522 }, { "epoch": 0.14099404038650698, "grad_norm": 0.1640625, "learning_rate": 0.02, "loss": 1.7305, "step": 1523 }, { "epoch": 0.141086616906787, "grad_norm": 0.1572265625, "learning_rate": 0.02, "loss": 1.7339, "step": 1524 }, { "epoch": 0.14117919342706706, "grad_norm": 0.1513671875, "learning_rate": 0.02, "loss": 1.7074, "step": 1525 }, { "epoch": 0.1412717699473471, "grad_norm": 0.154296875, "learning_rate": 0.02, "loss": 1.6777, "step": 1526 }, { "epoch": 0.14136434646762716, "grad_norm": 0.1572265625, "learning_rate": 0.02, "loss": 1.7668, "step": 1527 }, { "epoch": 0.14145692298790719, "grad_norm": 0.1630859375, "learning_rate": 0.02, "loss": 1.7179, "step": 1528 }, { "epoch": 0.14154949950818724, "grad_norm": 0.158203125, "learning_rate": 0.02, "loss": 1.7723, "step": 1529 }, { "epoch": 0.1416420760284673, "grad_norm": 0.1611328125, "learning_rate": 0.02, "loss": 1.7511, "step": 1530 }, { "epoch": 0.14173465254874731, "grad_norm": 0.15625, "learning_rate": 0.02, "loss": 1.7889, "step": 1531 }, { "epoch": 0.14182722906902737, "grad_norm": 0.1708984375, "learning_rate": 0.02, "loss": 1.7614, "step": 1532 }, { "epoch": 0.14191980558930742, "grad_norm": 0.16015625, "learning_rate": 0.02, "loss": 1.6925, "step": 1533 }, { "epoch": 0.14201238210958744, "grad_norm": 0.1630859375, "learning_rate": 0.02, "loss": 1.7316, "step": 1534 }, { "epoch": 0.1421049586298675, "grad_norm": 0.15234375, "learning_rate": 0.02, "loss": 1.7882, "step": 1535 }, { "epoch": 0.14219753515014755, "grad_norm": 0.1591796875, "learning_rate": 0.02, "loss": 1.7725, "step": 1536 }, { "epoch": 0.1422901116704276, "grad_norm": 0.1611328125, "learning_rate": 0.02, "loss": 1.6861, "step": 1537 }, { "epoch": 0.14238268819070762, "grad_norm": 0.1611328125, "learning_rate": 0.02, "loss": 1.749, "step": 1538 }, { "epoch": 0.14247526471098768, "grad_norm": 0.16015625, "learning_rate": 0.02, "loss": 1.7974, "step": 1539 }, { "epoch": 0.14256784123126773, "grad_norm": 0.1533203125, "learning_rate": 0.02, "loss": 1.7325, "step": 1540 }, { "epoch": 0.14266041775154775, "grad_norm": 0.154296875, "learning_rate": 0.02, "loss": 1.6767, "step": 1541 }, { "epoch": 0.1427529942718278, "grad_norm": 0.158203125, "learning_rate": 0.02, "loss": 1.7416, "step": 1542 }, { "epoch": 0.14284557079210786, "grad_norm": 0.1708984375, "learning_rate": 0.02, "loss": 1.7867, "step": 1543 }, { "epoch": 0.1429381473123879, "grad_norm": 0.158203125, "learning_rate": 0.02, "loss": 1.7044, "step": 1544 }, { "epoch": 0.14303072383266793, "grad_norm": 0.15625, "learning_rate": 0.02, "loss": 1.697, "step": 1545 }, { "epoch": 0.14312330035294799, "grad_norm": 0.1630859375, "learning_rate": 0.02, "loss": 1.7754, "step": 1546 }, { "epoch": 0.14321587687322804, "grad_norm": 0.177734375, "learning_rate": 0.02, "loss": 1.7834, "step": 1547 }, { "epoch": 0.14330845339350806, "grad_norm": 0.1767578125, "learning_rate": 0.02, "loss": 1.8335, "step": 1548 }, { "epoch": 0.14340102991378811, "grad_norm": 0.169921875, "learning_rate": 0.02, "loss": 1.755, "step": 1549 }, { "epoch": 0.14349360643406817, "grad_norm": 0.16796875, "learning_rate": 0.02, "loss": 1.7561, "step": 1550 }, { "epoch": 0.1435861829543482, "grad_norm": 0.1826171875, "learning_rate": 0.02, "loss": 1.7053, "step": 1551 }, { "epoch": 0.14367875947462824, "grad_norm": 0.1806640625, "learning_rate": 0.02, "loss": 1.7926, "step": 1552 }, { "epoch": 0.1437713359949083, "grad_norm": 0.1630859375, "learning_rate": 0.02, "loss": 1.7589, "step": 1553 }, { "epoch": 0.14386391251518835, "grad_norm": 0.181640625, "learning_rate": 0.02, "loss": 1.7877, "step": 1554 }, { "epoch": 0.14395648903546837, "grad_norm": 0.1728515625, "learning_rate": 0.02, "loss": 1.7595, "step": 1555 }, { "epoch": 0.14404906555574842, "grad_norm": 0.173828125, "learning_rate": 0.02, "loss": 1.7671, "step": 1556 }, { "epoch": 0.14414164207602848, "grad_norm": 0.1708984375, "learning_rate": 0.02, "loss": 1.7392, "step": 1557 }, { "epoch": 0.1442342185963085, "grad_norm": 0.1923828125, "learning_rate": 0.02, "loss": 1.7876, "step": 1558 }, { "epoch": 0.14432679511658855, "grad_norm": 0.173828125, "learning_rate": 0.02, "loss": 1.7606, "step": 1559 }, { "epoch": 0.1444193716368686, "grad_norm": 0.177734375, "learning_rate": 0.02, "loss": 1.6797, "step": 1560 }, { "epoch": 0.14451194815714866, "grad_norm": 0.1552734375, "learning_rate": 0.02, "loss": 1.6944, "step": 1561 }, { "epoch": 0.14460452467742868, "grad_norm": 0.1796875, "learning_rate": 0.02, "loss": 1.7753, "step": 1562 }, { "epoch": 0.14469710119770873, "grad_norm": 0.1728515625, "learning_rate": 0.02, "loss": 1.7208, "step": 1563 }, { "epoch": 0.14478967771798879, "grad_norm": 0.1767578125, "learning_rate": 0.02, "loss": 1.7764, "step": 1564 }, { "epoch": 0.1448822542382688, "grad_norm": 0.16796875, "learning_rate": 0.02, "loss": 1.77, "step": 1565 }, { "epoch": 0.14497483075854886, "grad_norm": 0.1767578125, "learning_rate": 0.02, "loss": 1.7693, "step": 1566 }, { "epoch": 0.1450674072788289, "grad_norm": 0.166015625, "learning_rate": 0.02, "loss": 1.7288, "step": 1567 }, { "epoch": 0.14515998379910894, "grad_norm": 0.166015625, "learning_rate": 0.02, "loss": 1.6845, "step": 1568 }, { "epoch": 0.145252560319389, "grad_norm": 0.1572265625, "learning_rate": 0.02, "loss": 1.6983, "step": 1569 }, { "epoch": 0.14534513683966904, "grad_norm": 0.1513671875, "learning_rate": 0.02, "loss": 1.6766, "step": 1570 }, { "epoch": 0.1454377133599491, "grad_norm": 0.15625, "learning_rate": 0.02, "loss": 1.7793, "step": 1571 }, { "epoch": 0.14553028988022912, "grad_norm": 0.158203125, "learning_rate": 0.02, "loss": 1.6795, "step": 1572 }, { "epoch": 0.14562286640050917, "grad_norm": 0.1552734375, "learning_rate": 0.02, "loss": 1.8063, "step": 1573 }, { "epoch": 0.14571544292078922, "grad_norm": 0.1494140625, "learning_rate": 0.02, "loss": 1.72, "step": 1574 }, { "epoch": 0.14580801944106925, "grad_norm": 0.1552734375, "learning_rate": 0.02, "loss": 1.818, "step": 1575 }, { "epoch": 0.1459005959613493, "grad_norm": 0.1552734375, "learning_rate": 0.02, "loss": 1.724, "step": 1576 }, { "epoch": 0.14599317248162935, "grad_norm": 0.1611328125, "learning_rate": 0.02, "loss": 1.7855, "step": 1577 }, { "epoch": 0.1460857490019094, "grad_norm": 0.158203125, "learning_rate": 0.02, "loss": 1.7232, "step": 1578 }, { "epoch": 0.14617832552218943, "grad_norm": 0.158203125, "learning_rate": 0.02, "loss": 1.7863, "step": 1579 }, { "epoch": 0.14627090204246948, "grad_norm": 0.1533203125, "learning_rate": 0.02, "loss": 1.7437, "step": 1580 }, { "epoch": 0.14636347856274953, "grad_norm": 0.169921875, "learning_rate": 0.02, "loss": 1.7387, "step": 1581 }, { "epoch": 0.14645605508302956, "grad_norm": 0.1533203125, "learning_rate": 0.02, "loss": 1.7742, "step": 1582 }, { "epoch": 0.1465486316033096, "grad_norm": 0.1474609375, "learning_rate": 0.02, "loss": 1.7453, "step": 1583 }, { "epoch": 0.14664120812358966, "grad_norm": 0.15625, "learning_rate": 0.02, "loss": 1.6541, "step": 1584 }, { "epoch": 0.14673378464386969, "grad_norm": 0.158203125, "learning_rate": 0.02, "loss": 1.7074, "step": 1585 }, { "epoch": 0.14682636116414974, "grad_norm": 0.1591796875, "learning_rate": 0.02, "loss": 1.7341, "step": 1586 }, { "epoch": 0.1469189376844298, "grad_norm": 0.1708984375, "learning_rate": 0.02, "loss": 1.787, "step": 1587 }, { "epoch": 0.14701151420470984, "grad_norm": 0.1708984375, "learning_rate": 0.02, "loss": 1.7208, "step": 1588 }, { "epoch": 0.14710409072498987, "grad_norm": 0.1728515625, "learning_rate": 0.02, "loss": 1.7693, "step": 1589 }, { "epoch": 0.14719666724526992, "grad_norm": 0.1748046875, "learning_rate": 0.02, "loss": 1.7527, "step": 1590 }, { "epoch": 0.14728924376554997, "grad_norm": 0.1572265625, "learning_rate": 0.02, "loss": 1.6955, "step": 1591 }, { "epoch": 0.14738182028583, "grad_norm": 0.1513671875, "learning_rate": 0.02, "loss": 1.6725, "step": 1592 }, { "epoch": 0.14747439680611005, "grad_norm": 0.1591796875, "learning_rate": 0.02, "loss": 1.722, "step": 1593 }, { "epoch": 0.1475669733263901, "grad_norm": 0.166015625, "learning_rate": 0.02, "loss": 1.7813, "step": 1594 }, { "epoch": 0.14765954984667015, "grad_norm": 0.1552734375, "learning_rate": 0.02, "loss": 1.7214, "step": 1595 }, { "epoch": 0.14775212636695018, "grad_norm": 0.158203125, "learning_rate": 0.02, "loss": 1.6779, "step": 1596 }, { "epoch": 0.14784470288723023, "grad_norm": 0.1484375, "learning_rate": 0.02, "loss": 1.6628, "step": 1597 }, { "epoch": 0.14793727940751028, "grad_norm": 0.146484375, "learning_rate": 0.02, "loss": 1.7532, "step": 1598 }, { "epoch": 0.1480298559277903, "grad_norm": 0.1533203125, "learning_rate": 0.02, "loss": 1.7023, "step": 1599 }, { "epoch": 0.14812243244807036, "grad_norm": 0.1513671875, "learning_rate": 0.02, "loss": 1.7006, "step": 1600 }, { "epoch": 0.1482150089683504, "grad_norm": 0.1611328125, "learning_rate": 0.02, "loss": 1.7344, "step": 1601 }, { "epoch": 0.14830758548863043, "grad_norm": 0.1591796875, "learning_rate": 0.02, "loss": 1.7522, "step": 1602 }, { "epoch": 0.14840016200891049, "grad_norm": 0.1591796875, "learning_rate": 0.02, "loss": 1.6943, "step": 1603 }, { "epoch": 0.14849273852919054, "grad_norm": 0.1533203125, "learning_rate": 0.02, "loss": 1.7198, "step": 1604 }, { "epoch": 0.1485853150494706, "grad_norm": 0.1455078125, "learning_rate": 0.02, "loss": 1.7337, "step": 1605 }, { "epoch": 0.14867789156975061, "grad_norm": 0.158203125, "learning_rate": 0.02, "loss": 1.6912, "step": 1606 }, { "epoch": 0.14877046809003067, "grad_norm": 0.162109375, "learning_rate": 0.02, "loss": 1.764, "step": 1607 }, { "epoch": 0.14886304461031072, "grad_norm": 0.17578125, "learning_rate": 0.02, "loss": 1.8474, "step": 1608 }, { "epoch": 0.14895562113059074, "grad_norm": 0.1552734375, "learning_rate": 0.02, "loss": 1.6748, "step": 1609 }, { "epoch": 0.1490481976508708, "grad_norm": 0.16796875, "learning_rate": 0.02, "loss": 1.7134, "step": 1610 }, { "epoch": 0.14914077417115085, "grad_norm": 0.1552734375, "learning_rate": 0.02, "loss": 1.6912, "step": 1611 }, { "epoch": 0.1492333506914309, "grad_norm": 0.16015625, "learning_rate": 0.02, "loss": 1.7664, "step": 1612 }, { "epoch": 0.14932592721171092, "grad_norm": 0.1748046875, "learning_rate": 0.02, "loss": 1.7777, "step": 1613 }, { "epoch": 0.14941850373199098, "grad_norm": 0.1591796875, "learning_rate": 0.02, "loss": 1.7536, "step": 1614 }, { "epoch": 0.14951108025227103, "grad_norm": 0.1669921875, "learning_rate": 0.02, "loss": 1.7571, "step": 1615 }, { "epoch": 0.14960365677255105, "grad_norm": 0.1630859375, "learning_rate": 0.02, "loss": 1.6507, "step": 1616 }, { "epoch": 0.1496962332928311, "grad_norm": 0.158203125, "learning_rate": 0.02, "loss": 1.7261, "step": 1617 }, { "epoch": 0.14978880981311116, "grad_norm": 0.16015625, "learning_rate": 0.02, "loss": 1.7554, "step": 1618 }, { "epoch": 0.14988138633339118, "grad_norm": 0.1708984375, "learning_rate": 0.02, "loss": 1.6885, "step": 1619 }, { "epoch": 0.14997396285367123, "grad_norm": 0.1552734375, "learning_rate": 0.02, "loss": 1.6819, "step": 1620 }, { "epoch": 0.15006653937395129, "grad_norm": 0.1611328125, "learning_rate": 0.02, "loss": 1.7647, "step": 1621 }, { "epoch": 0.15015911589423134, "grad_norm": 0.1435546875, "learning_rate": 0.02, "loss": 1.7588, "step": 1622 }, { "epoch": 0.15025169241451136, "grad_norm": 0.1357421875, "learning_rate": 0.02, "loss": 1.6873, "step": 1623 }, { "epoch": 0.15034426893479141, "grad_norm": 0.1484375, "learning_rate": 0.02, "loss": 1.6251, "step": 1624 }, { "epoch": 0.15043684545507147, "grad_norm": 0.150390625, "learning_rate": 0.02, "loss": 1.7115, "step": 1625 }, { "epoch": 0.1505294219753515, "grad_norm": 0.162109375, "learning_rate": 0.02, "loss": 1.7399, "step": 1626 }, { "epoch": 0.15062199849563154, "grad_norm": 0.1689453125, "learning_rate": 0.02, "loss": 1.7085, "step": 1627 }, { "epoch": 0.1507145750159116, "grad_norm": 0.1630859375, "learning_rate": 0.02, "loss": 1.6791, "step": 1628 }, { "epoch": 0.15080715153619165, "grad_norm": 0.1611328125, "learning_rate": 0.02, "loss": 1.6827, "step": 1629 }, { "epoch": 0.15089972805647167, "grad_norm": 0.1748046875, "learning_rate": 0.02, "loss": 1.7669, "step": 1630 }, { "epoch": 0.15099230457675172, "grad_norm": 0.1630859375, "learning_rate": 0.02, "loss": 1.7021, "step": 1631 }, { "epoch": 0.15108488109703178, "grad_norm": 0.1650390625, "learning_rate": 0.02, "loss": 1.705, "step": 1632 }, { "epoch": 0.1511774576173118, "grad_norm": 0.171875, "learning_rate": 0.02, "loss": 1.7375, "step": 1633 }, { "epoch": 0.15127003413759185, "grad_norm": 0.169921875, "learning_rate": 0.02, "loss": 1.8, "step": 1634 }, { "epoch": 0.1513626106578719, "grad_norm": 0.1650390625, "learning_rate": 0.02, "loss": 1.7563, "step": 1635 }, { "epoch": 0.15145518717815193, "grad_norm": 0.16796875, "learning_rate": 0.02, "loss": 1.79, "step": 1636 }, { "epoch": 0.15154776369843198, "grad_norm": 0.1708984375, "learning_rate": 0.02, "loss": 1.7276, "step": 1637 }, { "epoch": 0.15164034021871203, "grad_norm": 0.1689453125, "learning_rate": 0.02, "loss": 1.7292, "step": 1638 }, { "epoch": 0.15173291673899209, "grad_norm": 0.1748046875, "learning_rate": 0.02, "loss": 1.7643, "step": 1639 }, { "epoch": 0.1518254932592721, "grad_norm": 0.16796875, "learning_rate": 0.02, "loss": 1.6778, "step": 1640 }, { "epoch": 0.15191806977955216, "grad_norm": 0.171875, "learning_rate": 0.02, "loss": 1.7134, "step": 1641 }, { "epoch": 0.15201064629983221, "grad_norm": 0.1748046875, "learning_rate": 0.02, "loss": 1.7491, "step": 1642 }, { "epoch": 0.15210322282011224, "grad_norm": 0.1640625, "learning_rate": 0.02, "loss": 1.7118, "step": 1643 }, { "epoch": 0.1521957993403923, "grad_norm": 0.1689453125, "learning_rate": 0.02, "loss": 1.7222, "step": 1644 }, { "epoch": 0.15228837586067234, "grad_norm": 0.16015625, "learning_rate": 0.02, "loss": 1.667, "step": 1645 }, { "epoch": 0.1523809523809524, "grad_norm": 0.158203125, "learning_rate": 0.02, "loss": 1.6846, "step": 1646 }, { "epoch": 0.15247352890123242, "grad_norm": 0.169921875, "learning_rate": 0.02, "loss": 1.733, "step": 1647 }, { "epoch": 0.15256610542151247, "grad_norm": 0.154296875, "learning_rate": 0.02, "loss": 1.6514, "step": 1648 }, { "epoch": 0.15265868194179252, "grad_norm": 0.146484375, "learning_rate": 0.02, "loss": 1.7198, "step": 1649 }, { "epoch": 0.15275125846207255, "grad_norm": 0.1396484375, "learning_rate": 0.02, "loss": 1.6607, "step": 1650 }, { "epoch": 0.1528438349823526, "grad_norm": 0.15234375, "learning_rate": 0.02, "loss": 1.7451, "step": 1651 }, { "epoch": 0.15293641150263265, "grad_norm": 0.1650390625, "learning_rate": 0.02, "loss": 1.7399, "step": 1652 }, { "epoch": 0.15302898802291268, "grad_norm": 0.162109375, "learning_rate": 0.02, "loss": 1.7182, "step": 1653 }, { "epoch": 0.15312156454319273, "grad_norm": 0.158203125, "learning_rate": 0.02, "loss": 1.7885, "step": 1654 }, { "epoch": 0.15321414106347278, "grad_norm": 0.15625, "learning_rate": 0.02, "loss": 1.7413, "step": 1655 }, { "epoch": 0.15330671758375283, "grad_norm": 0.1474609375, "learning_rate": 0.02, "loss": 1.6885, "step": 1656 }, { "epoch": 0.15339929410403286, "grad_norm": 0.15625, "learning_rate": 0.02, "loss": 1.7265, "step": 1657 }, { "epoch": 0.1534918706243129, "grad_norm": 0.1533203125, "learning_rate": 0.02, "loss": 1.7435, "step": 1658 }, { "epoch": 0.15358444714459296, "grad_norm": 0.1650390625, "learning_rate": 0.02, "loss": 1.7849, "step": 1659 }, { "epoch": 0.15367702366487299, "grad_norm": 0.1640625, "learning_rate": 0.02, "loss": 1.673, "step": 1660 }, { "epoch": 0.15376960018515304, "grad_norm": 0.15625, "learning_rate": 0.02, "loss": 1.7819, "step": 1661 }, { "epoch": 0.1538621767054331, "grad_norm": 0.1552734375, "learning_rate": 0.02, "loss": 1.767, "step": 1662 }, { "epoch": 0.15395475322571314, "grad_norm": 0.158203125, "learning_rate": 0.02, "loss": 1.8164, "step": 1663 }, { "epoch": 0.15404732974599317, "grad_norm": 0.150390625, "learning_rate": 0.02, "loss": 1.7146, "step": 1664 }, { "epoch": 0.15413990626627322, "grad_norm": 0.1474609375, "learning_rate": 0.02, "loss": 1.729, "step": 1665 }, { "epoch": 0.15423248278655327, "grad_norm": 0.1474609375, "learning_rate": 0.02, "loss": 1.6952, "step": 1666 }, { "epoch": 0.1543250593068333, "grad_norm": 0.166015625, "learning_rate": 0.02, "loss": 1.7781, "step": 1667 }, { "epoch": 0.15441763582711335, "grad_norm": 0.1513671875, "learning_rate": 0.02, "loss": 1.6934, "step": 1668 }, { "epoch": 0.1545102123473934, "grad_norm": 0.1513671875, "learning_rate": 0.02, "loss": 1.7041, "step": 1669 }, { "epoch": 0.15460278886767342, "grad_norm": 0.1533203125, "learning_rate": 0.02, "loss": 1.7173, "step": 1670 }, { "epoch": 0.15469536538795348, "grad_norm": 0.1572265625, "learning_rate": 0.02, "loss": 1.7129, "step": 1671 }, { "epoch": 0.15478794190823353, "grad_norm": 0.1533203125, "learning_rate": 0.02, "loss": 1.7837, "step": 1672 }, { "epoch": 0.15488051842851358, "grad_norm": 0.1611328125, "learning_rate": 0.02, "loss": 1.6999, "step": 1673 }, { "epoch": 0.1549730949487936, "grad_norm": 0.1767578125, "learning_rate": 0.02, "loss": 1.6898, "step": 1674 }, { "epoch": 0.15506567146907366, "grad_norm": 0.1669921875, "learning_rate": 0.02, "loss": 1.7481, "step": 1675 }, { "epoch": 0.1551582479893537, "grad_norm": 0.150390625, "learning_rate": 0.02, "loss": 1.7452, "step": 1676 }, { "epoch": 0.15525082450963373, "grad_norm": 0.1552734375, "learning_rate": 0.02, "loss": 1.6663, "step": 1677 }, { "epoch": 0.15534340102991379, "grad_norm": 0.1591796875, "learning_rate": 0.02, "loss": 1.7347, "step": 1678 }, { "epoch": 0.15543597755019384, "grad_norm": 0.16015625, "learning_rate": 0.02, "loss": 1.6629, "step": 1679 }, { "epoch": 0.15552855407047386, "grad_norm": 0.166015625, "learning_rate": 0.02, "loss": 1.7273, "step": 1680 }, { "epoch": 0.15562113059075391, "grad_norm": 0.171875, "learning_rate": 0.02, "loss": 1.6701, "step": 1681 }, { "epoch": 0.15571370711103397, "grad_norm": 0.1611328125, "learning_rate": 0.02, "loss": 1.6781, "step": 1682 }, { "epoch": 0.15580628363131402, "grad_norm": 0.162109375, "learning_rate": 0.02, "loss": 1.7793, "step": 1683 }, { "epoch": 0.15589886015159404, "grad_norm": 0.171875, "learning_rate": 0.02, "loss": 1.7159, "step": 1684 }, { "epoch": 0.1559914366718741, "grad_norm": 0.1640625, "learning_rate": 0.02, "loss": 1.7607, "step": 1685 }, { "epoch": 0.15608401319215415, "grad_norm": 0.1640625, "learning_rate": 0.02, "loss": 1.77, "step": 1686 }, { "epoch": 0.15617658971243417, "grad_norm": 0.162109375, "learning_rate": 0.02, "loss": 1.7372, "step": 1687 }, { "epoch": 0.15626916623271422, "grad_norm": 0.150390625, "learning_rate": 0.02, "loss": 1.7087, "step": 1688 }, { "epoch": 0.15636174275299428, "grad_norm": 0.1494140625, "learning_rate": 0.02, "loss": 1.6929, "step": 1689 }, { "epoch": 0.15645431927327433, "grad_norm": 0.1640625, "learning_rate": 0.02, "loss": 1.7713, "step": 1690 }, { "epoch": 0.15654689579355435, "grad_norm": 0.1650390625, "learning_rate": 0.02, "loss": 1.7394, "step": 1691 }, { "epoch": 0.1566394723138344, "grad_norm": 0.15625, "learning_rate": 0.02, "loss": 1.6709, "step": 1692 }, { "epoch": 0.15673204883411446, "grad_norm": 0.1572265625, "learning_rate": 0.02, "loss": 1.7308, "step": 1693 }, { "epoch": 0.15682462535439448, "grad_norm": 0.1552734375, "learning_rate": 0.02, "loss": 1.6811, "step": 1694 }, { "epoch": 0.15691720187467453, "grad_norm": 0.1572265625, "learning_rate": 0.02, "loss": 1.7271, "step": 1695 }, { "epoch": 0.15700977839495459, "grad_norm": 0.1630859375, "learning_rate": 0.02, "loss": 1.7058, "step": 1696 }, { "epoch": 0.1571023549152346, "grad_norm": 0.1640625, "learning_rate": 0.02, "loss": 1.7717, "step": 1697 }, { "epoch": 0.15719493143551466, "grad_norm": 0.17578125, "learning_rate": 0.02, "loss": 1.6923, "step": 1698 }, { "epoch": 0.15728750795579471, "grad_norm": 0.1494140625, "learning_rate": 0.02, "loss": 1.7289, "step": 1699 }, { "epoch": 0.15738008447607477, "grad_norm": 0.1572265625, "learning_rate": 0.02, "loss": 1.73, "step": 1700 }, { "epoch": 0.1574726609963548, "grad_norm": 0.1484375, "learning_rate": 0.02, "loss": 1.6899, "step": 1701 }, { "epoch": 0.15756523751663484, "grad_norm": 0.150390625, "learning_rate": 0.02, "loss": 1.7319, "step": 1702 }, { "epoch": 0.1576578140369149, "grad_norm": 0.16015625, "learning_rate": 0.02, "loss": 1.7535, "step": 1703 }, { "epoch": 0.15775039055719492, "grad_norm": 0.158203125, "learning_rate": 0.02, "loss": 1.6829, "step": 1704 }, { "epoch": 0.15784296707747497, "grad_norm": 0.16796875, "learning_rate": 0.02, "loss": 1.7318, "step": 1705 }, { "epoch": 0.15793554359775502, "grad_norm": 0.1513671875, "learning_rate": 0.02, "loss": 1.7082, "step": 1706 }, { "epoch": 0.15802812011803508, "grad_norm": 0.1572265625, "learning_rate": 0.02, "loss": 1.6716, "step": 1707 }, { "epoch": 0.1581206966383151, "grad_norm": 0.15625, "learning_rate": 0.02, "loss": 1.7782, "step": 1708 }, { "epoch": 0.15821327315859515, "grad_norm": 0.158203125, "learning_rate": 0.02, "loss": 1.7259, "step": 1709 }, { "epoch": 0.1583058496788752, "grad_norm": 0.1591796875, "learning_rate": 0.02, "loss": 1.7213, "step": 1710 }, { "epoch": 0.15839842619915523, "grad_norm": 0.1689453125, "learning_rate": 0.02, "loss": 1.7482, "step": 1711 }, { "epoch": 0.15849100271943528, "grad_norm": 0.1533203125, "learning_rate": 0.02, "loss": 1.668, "step": 1712 }, { "epoch": 0.15858357923971533, "grad_norm": 0.1552734375, "learning_rate": 0.02, "loss": 1.7365, "step": 1713 }, { "epoch": 0.15867615575999536, "grad_norm": 0.1640625, "learning_rate": 0.02, "loss": 1.6991, "step": 1714 }, { "epoch": 0.1587687322802754, "grad_norm": 0.1640625, "learning_rate": 0.02, "loss": 1.7288, "step": 1715 }, { "epoch": 0.15886130880055546, "grad_norm": 0.1572265625, "learning_rate": 0.02, "loss": 1.7081, "step": 1716 }, { "epoch": 0.15895388532083551, "grad_norm": 0.1552734375, "learning_rate": 0.02, "loss": 1.7055, "step": 1717 }, { "epoch": 0.15904646184111554, "grad_norm": 0.154296875, "learning_rate": 0.02, "loss": 1.7039, "step": 1718 }, { "epoch": 0.1591390383613956, "grad_norm": 0.1494140625, "learning_rate": 0.02, "loss": 1.6755, "step": 1719 }, { "epoch": 0.15923161488167564, "grad_norm": 0.1572265625, "learning_rate": 0.02, "loss": 1.7025, "step": 1720 }, { "epoch": 0.15932419140195567, "grad_norm": 0.16015625, "learning_rate": 0.02, "loss": 1.7182, "step": 1721 }, { "epoch": 0.15941676792223572, "grad_norm": 0.169921875, "learning_rate": 0.02, "loss": 1.7508, "step": 1722 }, { "epoch": 0.15950934444251577, "grad_norm": 0.1650390625, "learning_rate": 0.02, "loss": 1.6867, "step": 1723 }, { "epoch": 0.15960192096279582, "grad_norm": 0.16015625, "learning_rate": 0.02, "loss": 1.7096, "step": 1724 }, { "epoch": 0.15969449748307585, "grad_norm": 0.154296875, "learning_rate": 0.02, "loss": 1.7029, "step": 1725 }, { "epoch": 0.1597870740033559, "grad_norm": 0.1611328125, "learning_rate": 0.02, "loss": 1.7493, "step": 1726 }, { "epoch": 0.15987965052363595, "grad_norm": 0.1494140625, "learning_rate": 0.02, "loss": 1.6887, "step": 1727 }, { "epoch": 0.15997222704391598, "grad_norm": 0.1552734375, "learning_rate": 0.02, "loss": 1.7744, "step": 1728 }, { "epoch": 0.16006480356419603, "grad_norm": 0.146484375, "learning_rate": 0.02, "loss": 1.7936, "step": 1729 }, { "epoch": 0.16015738008447608, "grad_norm": 0.1591796875, "learning_rate": 0.02, "loss": 1.7217, "step": 1730 }, { "epoch": 0.1602499566047561, "grad_norm": 0.5859375, "learning_rate": 0.02, "loss": 1.6995, "step": 1731 }, { "epoch": 0.16034253312503616, "grad_norm": 0.1298828125, "learning_rate": 0.02, "loss": 1.6918, "step": 1732 }, { "epoch": 0.1604351096453162, "grad_norm": 0.150390625, "learning_rate": 0.02, "loss": 1.7681, "step": 1733 }, { "epoch": 0.16052768616559626, "grad_norm": 0.1533203125, "learning_rate": 0.02, "loss": 1.6322, "step": 1734 }, { "epoch": 0.16062026268587629, "grad_norm": 0.162109375, "learning_rate": 0.02, "loss": 1.6792, "step": 1735 }, { "epoch": 0.16071283920615634, "grad_norm": 0.1767578125, "learning_rate": 0.02, "loss": 1.7489, "step": 1736 }, { "epoch": 0.1608054157264364, "grad_norm": 0.1787109375, "learning_rate": 0.02, "loss": 1.7204, "step": 1737 }, { "epoch": 0.16089799224671641, "grad_norm": 0.1708984375, "learning_rate": 0.02, "loss": 1.6787, "step": 1738 }, { "epoch": 0.16099056876699647, "grad_norm": 0.1689453125, "learning_rate": 0.02, "loss": 1.6703, "step": 1739 }, { "epoch": 0.16108314528727652, "grad_norm": 0.16015625, "learning_rate": 0.02, "loss": 1.7162, "step": 1740 }, { "epoch": 0.16117572180755657, "grad_norm": 0.158203125, "learning_rate": 0.02, "loss": 1.6905, "step": 1741 }, { "epoch": 0.1612682983278366, "grad_norm": 0.177734375, "learning_rate": 0.02, "loss": 1.7373, "step": 1742 }, { "epoch": 0.16136087484811665, "grad_norm": 0.158203125, "learning_rate": 0.02, "loss": 1.6403, "step": 1743 }, { "epoch": 0.1614534513683967, "grad_norm": 0.158203125, "learning_rate": 0.02, "loss": 1.7284, "step": 1744 }, { "epoch": 0.16154602788867672, "grad_norm": 0.1689453125, "learning_rate": 0.02, "loss": 1.7715, "step": 1745 }, { "epoch": 0.16163860440895678, "grad_norm": 0.1650390625, "learning_rate": 0.02, "loss": 1.7437, "step": 1746 }, { "epoch": 0.16173118092923683, "grad_norm": 0.1611328125, "learning_rate": 0.02, "loss": 1.6859, "step": 1747 }, { "epoch": 0.16182375744951685, "grad_norm": 0.1611328125, "learning_rate": 0.02, "loss": 1.7414, "step": 1748 }, { "epoch": 0.1619163339697969, "grad_norm": 0.1572265625, "learning_rate": 0.02, "loss": 1.6924, "step": 1749 }, { "epoch": 0.16200891049007696, "grad_norm": 0.16796875, "learning_rate": 0.02, "loss": 1.7354, "step": 1750 }, { "epoch": 0.162101487010357, "grad_norm": 0.1630859375, "learning_rate": 0.02, "loss": 1.7759, "step": 1751 }, { "epoch": 0.16219406353063703, "grad_norm": 0.1572265625, "learning_rate": 0.02, "loss": 1.68, "step": 1752 }, { "epoch": 0.16228664005091709, "grad_norm": 0.1572265625, "learning_rate": 0.02, "loss": 1.7465, "step": 1753 }, { "epoch": 0.16237921657119714, "grad_norm": 0.1689453125, "learning_rate": 0.02, "loss": 1.7368, "step": 1754 }, { "epoch": 0.16247179309147716, "grad_norm": 0.1708984375, "learning_rate": 0.02, "loss": 1.7238, "step": 1755 }, { "epoch": 0.16256436961175721, "grad_norm": 0.1708984375, "learning_rate": 0.02, "loss": 1.7112, "step": 1756 }, { "epoch": 0.16265694613203727, "grad_norm": 0.1708984375, "learning_rate": 0.02, "loss": 1.7656, "step": 1757 }, { "epoch": 0.16274952265231732, "grad_norm": 0.16015625, "learning_rate": 0.02, "loss": 1.7135, "step": 1758 }, { "epoch": 0.16284209917259734, "grad_norm": 0.162109375, "learning_rate": 0.02, "loss": 1.6978, "step": 1759 }, { "epoch": 0.1629346756928774, "grad_norm": 0.15234375, "learning_rate": 0.02, "loss": 1.6596, "step": 1760 }, { "epoch": 0.16302725221315745, "grad_norm": 0.1484375, "learning_rate": 0.02, "loss": 1.7043, "step": 1761 }, { "epoch": 0.16311982873343747, "grad_norm": 0.1572265625, "learning_rate": 0.02, "loss": 1.7538, "step": 1762 }, { "epoch": 0.16321240525371752, "grad_norm": 0.162109375, "learning_rate": 0.02, "loss": 1.7601, "step": 1763 }, { "epoch": 0.16330498177399758, "grad_norm": 0.1533203125, "learning_rate": 0.02, "loss": 1.7431, "step": 1764 }, { "epoch": 0.1633975582942776, "grad_norm": 0.154296875, "learning_rate": 0.02, "loss": 1.6536, "step": 1765 }, { "epoch": 0.16349013481455765, "grad_norm": 0.1552734375, "learning_rate": 0.02, "loss": 1.6967, "step": 1766 }, { "epoch": 0.1635827113348377, "grad_norm": 0.1513671875, "learning_rate": 0.02, "loss": 1.7011, "step": 1767 }, { "epoch": 0.16367528785511776, "grad_norm": 0.1572265625, "learning_rate": 0.02, "loss": 1.7324, "step": 1768 }, { "epoch": 0.16376786437539778, "grad_norm": 0.166015625, "learning_rate": 0.02, "loss": 1.7582, "step": 1769 }, { "epoch": 0.16386044089567783, "grad_norm": 0.1533203125, "learning_rate": 0.02, "loss": 1.6755, "step": 1770 }, { "epoch": 0.16395301741595789, "grad_norm": 0.150390625, "learning_rate": 0.02, "loss": 1.7652, "step": 1771 }, { "epoch": 0.1640455939362379, "grad_norm": 0.16015625, "learning_rate": 0.02, "loss": 1.6623, "step": 1772 }, { "epoch": 0.16413817045651796, "grad_norm": 0.158203125, "learning_rate": 0.02, "loss": 1.717, "step": 1773 }, { "epoch": 0.16423074697679801, "grad_norm": 0.16015625, "learning_rate": 0.02, "loss": 1.7195, "step": 1774 }, { "epoch": 0.16432332349707807, "grad_norm": 0.1591796875, "learning_rate": 0.02, "loss": 1.6993, "step": 1775 }, { "epoch": 0.1644159000173581, "grad_norm": 0.1455078125, "learning_rate": 0.02, "loss": 1.6974, "step": 1776 }, { "epoch": 0.16450847653763814, "grad_norm": 0.15234375, "learning_rate": 0.02, "loss": 1.7481, "step": 1777 }, { "epoch": 0.1646010530579182, "grad_norm": 0.150390625, "learning_rate": 0.02, "loss": 1.7608, "step": 1778 }, { "epoch": 0.16469362957819822, "grad_norm": 0.158203125, "learning_rate": 0.02, "loss": 1.7714, "step": 1779 }, { "epoch": 0.16478620609847827, "grad_norm": 0.158203125, "learning_rate": 0.02, "loss": 1.7118, "step": 1780 }, { "epoch": 0.16487878261875832, "grad_norm": 0.1474609375, "learning_rate": 0.02, "loss": 1.6788, "step": 1781 }, { "epoch": 0.16497135913903835, "grad_norm": 0.142578125, "learning_rate": 0.02, "loss": 1.7028, "step": 1782 }, { "epoch": 0.1650639356593184, "grad_norm": 0.162109375, "learning_rate": 0.02, "loss": 1.7656, "step": 1783 }, { "epoch": 0.16515651217959845, "grad_norm": 0.1533203125, "learning_rate": 0.02, "loss": 1.6312, "step": 1784 }, { "epoch": 0.1652490886998785, "grad_norm": 0.1591796875, "learning_rate": 0.02, "loss": 1.7758, "step": 1785 }, { "epoch": 0.16534166522015853, "grad_norm": 0.15625, "learning_rate": 0.02, "loss": 1.6692, "step": 1786 }, { "epoch": 0.16543424174043858, "grad_norm": 0.1689453125, "learning_rate": 0.02, "loss": 1.762, "step": 1787 }, { "epoch": 0.16552681826071863, "grad_norm": 0.1640625, "learning_rate": 0.02, "loss": 1.6304, "step": 1788 }, { "epoch": 0.16561939478099866, "grad_norm": 0.1689453125, "learning_rate": 0.02, "loss": 1.6829, "step": 1789 }, { "epoch": 0.1657119713012787, "grad_norm": 0.162109375, "learning_rate": 0.02, "loss": 1.7535, "step": 1790 }, { "epoch": 0.16580454782155876, "grad_norm": 0.15625, "learning_rate": 0.02, "loss": 1.6422, "step": 1791 }, { "epoch": 0.16589712434183881, "grad_norm": 0.162109375, "learning_rate": 0.02, "loss": 1.6806, "step": 1792 }, { "epoch": 0.16598970086211884, "grad_norm": 0.162109375, "learning_rate": 0.02, "loss": 1.7147, "step": 1793 }, { "epoch": 0.1660822773823989, "grad_norm": 0.162109375, "learning_rate": 0.02, "loss": 1.6755, "step": 1794 }, { "epoch": 0.16617485390267894, "grad_norm": 0.1591796875, "learning_rate": 0.02, "loss": 1.6934, "step": 1795 }, { "epoch": 0.16626743042295897, "grad_norm": 0.169921875, "learning_rate": 0.02, "loss": 1.7774, "step": 1796 }, { "epoch": 0.16636000694323902, "grad_norm": 0.158203125, "learning_rate": 0.02, "loss": 1.7597, "step": 1797 }, { "epoch": 0.16645258346351907, "grad_norm": 0.1640625, "learning_rate": 0.02, "loss": 1.6785, "step": 1798 }, { "epoch": 0.1665451599837991, "grad_norm": 0.1533203125, "learning_rate": 0.02, "loss": 1.7124, "step": 1799 }, { "epoch": 0.16663773650407915, "grad_norm": 0.1572265625, "learning_rate": 0.02, "loss": 1.7191, "step": 1800 }, { "epoch": 0.1667303130243592, "grad_norm": 0.1572265625, "learning_rate": 0.02, "loss": 1.7283, "step": 1801 }, { "epoch": 0.16682288954463925, "grad_norm": 0.1611328125, "learning_rate": 0.02, "loss": 1.7171, "step": 1802 }, { "epoch": 0.16691546606491928, "grad_norm": 0.154296875, "learning_rate": 0.02, "loss": 1.6692, "step": 1803 }, { "epoch": 0.16700804258519933, "grad_norm": 0.1533203125, "learning_rate": 0.02, "loss": 1.7535, "step": 1804 }, { "epoch": 0.16710061910547938, "grad_norm": 0.1533203125, "learning_rate": 0.02, "loss": 1.7359, "step": 1805 }, { "epoch": 0.1671931956257594, "grad_norm": 0.1572265625, "learning_rate": 0.02, "loss": 1.672, "step": 1806 }, { "epoch": 0.16728577214603946, "grad_norm": 0.1484375, "learning_rate": 0.02, "loss": 1.7069, "step": 1807 }, { "epoch": 0.1673783486663195, "grad_norm": 0.15234375, "learning_rate": 0.02, "loss": 1.6823, "step": 1808 }, { "epoch": 0.16747092518659956, "grad_norm": 0.15234375, "learning_rate": 0.02, "loss": 1.7021, "step": 1809 }, { "epoch": 0.1675635017068796, "grad_norm": 0.1552734375, "learning_rate": 0.02, "loss": 1.7331, "step": 1810 }, { "epoch": 0.16765607822715964, "grad_norm": 0.1611328125, "learning_rate": 0.02, "loss": 1.6734, "step": 1811 }, { "epoch": 0.1677486547474397, "grad_norm": 0.1611328125, "learning_rate": 0.02, "loss": 1.6964, "step": 1812 }, { "epoch": 0.16784123126771971, "grad_norm": 0.1708984375, "learning_rate": 0.02, "loss": 1.7706, "step": 1813 }, { "epoch": 0.16793380778799977, "grad_norm": 0.1630859375, "learning_rate": 0.02, "loss": 1.7134, "step": 1814 }, { "epoch": 0.16802638430827982, "grad_norm": 0.162109375, "learning_rate": 0.02, "loss": 1.6997, "step": 1815 }, { "epoch": 0.16811896082855984, "grad_norm": 0.1728515625, "learning_rate": 0.02, "loss": 1.745, "step": 1816 }, { "epoch": 0.1682115373488399, "grad_norm": 0.1708984375, "learning_rate": 0.02, "loss": 1.7253, "step": 1817 }, { "epoch": 0.16830411386911995, "grad_norm": 0.162109375, "learning_rate": 0.02, "loss": 1.6337, "step": 1818 }, { "epoch": 0.1683966903894, "grad_norm": 0.154296875, "learning_rate": 0.02, "loss": 1.663, "step": 1819 }, { "epoch": 0.16848926690968002, "grad_norm": 0.1630859375, "learning_rate": 0.02, "loss": 1.6866, "step": 1820 }, { "epoch": 0.16858184342996008, "grad_norm": 0.16015625, "learning_rate": 0.02, "loss": 1.7706, "step": 1821 }, { "epoch": 0.16867441995024013, "grad_norm": 0.1552734375, "learning_rate": 0.02, "loss": 1.7335, "step": 1822 }, { "epoch": 0.16876699647052015, "grad_norm": 0.1513671875, "learning_rate": 0.02, "loss": 1.6794, "step": 1823 }, { "epoch": 0.1688595729908002, "grad_norm": 0.1455078125, "learning_rate": 0.02, "loss": 1.6891, "step": 1824 }, { "epoch": 0.16895214951108026, "grad_norm": 0.1533203125, "learning_rate": 0.02, "loss": 1.6981, "step": 1825 }, { "epoch": 0.1690447260313603, "grad_norm": 0.158203125, "learning_rate": 0.02, "loss": 1.752, "step": 1826 }, { "epoch": 0.16913730255164033, "grad_norm": 0.166015625, "learning_rate": 0.02, "loss": 1.7738, "step": 1827 }, { "epoch": 0.16922987907192039, "grad_norm": 0.16015625, "learning_rate": 0.02, "loss": 1.7045, "step": 1828 }, { "epoch": 0.16932245559220044, "grad_norm": 0.158203125, "learning_rate": 0.02, "loss": 1.7015, "step": 1829 }, { "epoch": 0.16941503211248046, "grad_norm": 0.15234375, "learning_rate": 0.02, "loss": 1.7152, "step": 1830 }, { "epoch": 0.16950760863276051, "grad_norm": 0.158203125, "learning_rate": 0.02, "loss": 1.7155, "step": 1831 }, { "epoch": 0.16960018515304057, "grad_norm": 0.150390625, "learning_rate": 0.02, "loss": 1.7375, "step": 1832 }, { "epoch": 0.1696927616733206, "grad_norm": 0.1689453125, "learning_rate": 0.02, "loss": 1.742, "step": 1833 }, { "epoch": 0.16978533819360064, "grad_norm": 0.158203125, "learning_rate": 0.02, "loss": 1.7514, "step": 1834 }, { "epoch": 0.1698779147138807, "grad_norm": 0.15234375, "learning_rate": 0.02, "loss": 1.7095, "step": 1835 }, { "epoch": 0.16997049123416075, "grad_norm": 0.150390625, "learning_rate": 0.02, "loss": 1.7184, "step": 1836 }, { "epoch": 0.17006306775444077, "grad_norm": 0.158203125, "learning_rate": 0.02, "loss": 1.7822, "step": 1837 }, { "epoch": 0.17015564427472082, "grad_norm": 0.15234375, "learning_rate": 0.02, "loss": 1.7563, "step": 1838 }, { "epoch": 0.17024822079500088, "grad_norm": 0.1533203125, "learning_rate": 0.02, "loss": 1.787, "step": 1839 }, { "epoch": 0.1703407973152809, "grad_norm": 0.1474609375, "learning_rate": 0.02, "loss": 1.7484, "step": 1840 }, { "epoch": 0.17043337383556095, "grad_norm": 0.150390625, "learning_rate": 0.02, "loss": 1.7808, "step": 1841 }, { "epoch": 0.170525950355841, "grad_norm": 0.154296875, "learning_rate": 0.02, "loss": 1.7258, "step": 1842 }, { "epoch": 0.17061852687612106, "grad_norm": 0.1455078125, "learning_rate": 0.02, "loss": 1.6565, "step": 1843 }, { "epoch": 0.17071110339640108, "grad_norm": 0.1513671875, "learning_rate": 0.02, "loss": 1.7133, "step": 1844 }, { "epoch": 0.17080367991668113, "grad_norm": 0.1376953125, "learning_rate": 0.02, "loss": 1.6565, "step": 1845 }, { "epoch": 0.17089625643696119, "grad_norm": 0.15234375, "learning_rate": 0.02, "loss": 1.7222, "step": 1846 }, { "epoch": 0.1709888329572412, "grad_norm": 0.1484375, "learning_rate": 0.02, "loss": 1.6759, "step": 1847 }, { "epoch": 0.17108140947752126, "grad_norm": 0.15234375, "learning_rate": 0.02, "loss": 1.702, "step": 1848 }, { "epoch": 0.17117398599780131, "grad_norm": 0.1484375, "learning_rate": 0.02, "loss": 1.7478, "step": 1849 }, { "epoch": 0.17126656251808134, "grad_norm": 0.1474609375, "learning_rate": 0.02, "loss": 1.6986, "step": 1850 }, { "epoch": 0.1713591390383614, "grad_norm": 0.15234375, "learning_rate": 0.02, "loss": 1.7241, "step": 1851 }, { "epoch": 0.17145171555864144, "grad_norm": 0.158203125, "learning_rate": 0.02, "loss": 1.7057, "step": 1852 }, { "epoch": 0.1715442920789215, "grad_norm": 0.1474609375, "learning_rate": 0.02, "loss": 1.7188, "step": 1853 }, { "epoch": 0.17163686859920152, "grad_norm": 0.15625, "learning_rate": 0.02, "loss": 1.7214, "step": 1854 }, { "epoch": 0.17172944511948157, "grad_norm": 0.1533203125, "learning_rate": 0.02, "loss": 1.7221, "step": 1855 }, { "epoch": 0.17182202163976162, "grad_norm": 0.15234375, "learning_rate": 0.02, "loss": 1.6769, "step": 1856 }, { "epoch": 0.17191459816004165, "grad_norm": 0.1630859375, "learning_rate": 0.02, "loss": 1.7411, "step": 1857 }, { "epoch": 0.1720071746803217, "grad_norm": 0.171875, "learning_rate": 0.02, "loss": 1.7384, "step": 1858 }, { "epoch": 0.17209975120060175, "grad_norm": 0.16015625, "learning_rate": 0.02, "loss": 1.7282, "step": 1859 }, { "epoch": 0.1721923277208818, "grad_norm": 0.173828125, "learning_rate": 0.02, "loss": 1.7134, "step": 1860 }, { "epoch": 0.17228490424116183, "grad_norm": 0.166015625, "learning_rate": 0.02, "loss": 1.6696, "step": 1861 }, { "epoch": 0.17237748076144188, "grad_norm": 0.177734375, "learning_rate": 0.02, "loss": 1.709, "step": 1862 }, { "epoch": 0.17247005728172193, "grad_norm": 0.1669921875, "learning_rate": 0.02, "loss": 1.7327, "step": 1863 }, { "epoch": 0.17256263380200196, "grad_norm": 0.1689453125, "learning_rate": 0.02, "loss": 1.6961, "step": 1864 }, { "epoch": 0.172655210322282, "grad_norm": 0.1630859375, "learning_rate": 0.02, "loss": 1.6818, "step": 1865 }, { "epoch": 0.17274778684256206, "grad_norm": 0.1796875, "learning_rate": 0.02, "loss": 1.7705, "step": 1866 }, { "epoch": 0.1728403633628421, "grad_norm": 0.173828125, "learning_rate": 0.02, "loss": 1.7219, "step": 1867 }, { "epoch": 0.17293293988312214, "grad_norm": 0.154296875, "learning_rate": 0.02, "loss": 1.7791, "step": 1868 }, { "epoch": 0.1730255164034022, "grad_norm": 0.158203125, "learning_rate": 0.02, "loss": 1.7107, "step": 1869 }, { "epoch": 0.17311809292368224, "grad_norm": 0.1640625, "learning_rate": 0.02, "loss": 1.7457, "step": 1870 }, { "epoch": 0.17321066944396227, "grad_norm": 0.1630859375, "learning_rate": 0.02, "loss": 1.7197, "step": 1871 }, { "epoch": 0.17330324596424232, "grad_norm": 0.16015625, "learning_rate": 0.02, "loss": 1.7191, "step": 1872 }, { "epoch": 0.17339582248452237, "grad_norm": 0.1640625, "learning_rate": 0.02, "loss": 1.7699, "step": 1873 }, { "epoch": 0.1734883990048024, "grad_norm": 0.1689453125, "learning_rate": 0.02, "loss": 1.7782, "step": 1874 }, { "epoch": 0.17358097552508245, "grad_norm": 0.1640625, "learning_rate": 0.02, "loss": 1.7174, "step": 1875 }, { "epoch": 0.1736735520453625, "grad_norm": 0.1806640625, "learning_rate": 0.02, "loss": 1.7879, "step": 1876 }, { "epoch": 0.17376612856564255, "grad_norm": 0.154296875, "learning_rate": 0.02, "loss": 1.7067, "step": 1877 }, { "epoch": 0.17385870508592258, "grad_norm": 0.14453125, "learning_rate": 0.02, "loss": 1.7108, "step": 1878 }, { "epoch": 0.17395128160620263, "grad_norm": 0.1494140625, "learning_rate": 0.02, "loss": 1.6946, "step": 1879 }, { "epoch": 0.17404385812648268, "grad_norm": 0.154296875, "learning_rate": 0.02, "loss": 1.7228, "step": 1880 }, { "epoch": 0.1741364346467627, "grad_norm": 0.1533203125, "learning_rate": 0.02, "loss": 1.7238, "step": 1881 }, { "epoch": 0.17422901116704276, "grad_norm": 0.1484375, "learning_rate": 0.02, "loss": 1.737, "step": 1882 }, { "epoch": 0.1743215876873228, "grad_norm": 0.1357421875, "learning_rate": 0.02, "loss": 1.6407, "step": 1883 }, { "epoch": 0.17441416420760283, "grad_norm": 0.1513671875, "learning_rate": 0.02, "loss": 1.7359, "step": 1884 }, { "epoch": 0.1745067407278829, "grad_norm": 0.1630859375, "learning_rate": 0.02, "loss": 1.7058, "step": 1885 }, { "epoch": 0.17459931724816294, "grad_norm": 0.1572265625, "learning_rate": 0.02, "loss": 1.8002, "step": 1886 }, { "epoch": 0.174691893768443, "grad_norm": 0.1552734375, "learning_rate": 0.02, "loss": 1.7126, "step": 1887 }, { "epoch": 0.17478447028872302, "grad_norm": 0.17578125, "learning_rate": 0.02, "loss": 1.8146, "step": 1888 }, { "epoch": 0.17487704680900307, "grad_norm": 0.169921875, "learning_rate": 0.02, "loss": 1.6902, "step": 1889 }, { "epoch": 0.17496962332928312, "grad_norm": 0.173828125, "learning_rate": 0.02, "loss": 1.6664, "step": 1890 }, { "epoch": 0.17506219984956314, "grad_norm": 0.162109375, "learning_rate": 0.02, "loss": 1.7447, "step": 1891 }, { "epoch": 0.1751547763698432, "grad_norm": 0.1796875, "learning_rate": 0.02, "loss": 1.6768, "step": 1892 }, { "epoch": 0.17524735289012325, "grad_norm": 0.1611328125, "learning_rate": 0.02, "loss": 1.7119, "step": 1893 }, { "epoch": 0.1753399294104033, "grad_norm": 0.1640625, "learning_rate": 0.02, "loss": 1.7018, "step": 1894 }, { "epoch": 0.17543250593068332, "grad_norm": 0.1552734375, "learning_rate": 0.02, "loss": 1.7114, "step": 1895 }, { "epoch": 0.17552508245096338, "grad_norm": 0.1591796875, "learning_rate": 0.02, "loss": 1.7443, "step": 1896 }, { "epoch": 0.17561765897124343, "grad_norm": 0.1572265625, "learning_rate": 0.02, "loss": 1.7091, "step": 1897 }, { "epoch": 0.17571023549152345, "grad_norm": 0.1572265625, "learning_rate": 0.02, "loss": 1.7352, "step": 1898 }, { "epoch": 0.1758028120118035, "grad_norm": 0.162109375, "learning_rate": 0.02, "loss": 1.7409, "step": 1899 }, { "epoch": 0.17589538853208356, "grad_norm": 0.1513671875, "learning_rate": 0.02, "loss": 1.6562, "step": 1900 }, { "epoch": 0.17598796505236358, "grad_norm": 0.15234375, "learning_rate": 0.02, "loss": 1.7162, "step": 1901 }, { "epoch": 0.17608054157264363, "grad_norm": 0.1689453125, "learning_rate": 0.02, "loss": 1.731, "step": 1902 }, { "epoch": 0.1761731180929237, "grad_norm": 0.1748046875, "learning_rate": 0.02, "loss": 1.7849, "step": 1903 }, { "epoch": 0.17626569461320374, "grad_norm": 0.1708984375, "learning_rate": 0.02, "loss": 1.6457, "step": 1904 }, { "epoch": 0.17635827113348376, "grad_norm": 0.17578125, "learning_rate": 0.02, "loss": 1.7128, "step": 1905 }, { "epoch": 0.17645084765376381, "grad_norm": 0.1650390625, "learning_rate": 0.02, "loss": 1.7143, "step": 1906 }, { "epoch": 0.17654342417404387, "grad_norm": 0.1640625, "learning_rate": 0.02, "loss": 1.7145, "step": 1907 }, { "epoch": 0.1766360006943239, "grad_norm": 0.1669921875, "learning_rate": 0.02, "loss": 1.7816, "step": 1908 }, { "epoch": 0.17672857721460394, "grad_norm": 0.154296875, "learning_rate": 0.02, "loss": 1.6194, "step": 1909 }, { "epoch": 0.176821153734884, "grad_norm": 0.1708984375, "learning_rate": 0.02, "loss": 1.7553, "step": 1910 }, { "epoch": 0.17691373025516405, "grad_norm": 0.1630859375, "learning_rate": 0.02, "loss": 1.676, "step": 1911 }, { "epoch": 0.17700630677544407, "grad_norm": 0.166015625, "learning_rate": 0.02, "loss": 1.7514, "step": 1912 }, { "epoch": 0.17709888329572412, "grad_norm": 0.16796875, "learning_rate": 0.02, "loss": 1.756, "step": 1913 }, { "epoch": 0.17719145981600418, "grad_norm": 0.1552734375, "learning_rate": 0.02, "loss": 1.682, "step": 1914 }, { "epoch": 0.1772840363362842, "grad_norm": 0.150390625, "learning_rate": 0.02, "loss": 1.6527, "step": 1915 }, { "epoch": 0.17737661285656425, "grad_norm": 0.1572265625, "learning_rate": 0.02, "loss": 1.7208, "step": 1916 }, { "epoch": 0.1774691893768443, "grad_norm": 0.15625, "learning_rate": 0.02, "loss": 1.7489, "step": 1917 }, { "epoch": 0.17756176589712433, "grad_norm": 0.1513671875, "learning_rate": 0.02, "loss": 1.7205, "step": 1918 }, { "epoch": 0.17765434241740438, "grad_norm": 0.1474609375, "learning_rate": 0.02, "loss": 1.6896, "step": 1919 }, { "epoch": 0.17774691893768443, "grad_norm": 0.1533203125, "learning_rate": 0.02, "loss": 1.7096, "step": 1920 }, { "epoch": 0.17783949545796449, "grad_norm": 0.140625, "learning_rate": 0.02, "loss": 1.6364, "step": 1921 }, { "epoch": 0.1779320719782445, "grad_norm": 0.14453125, "learning_rate": 0.02, "loss": 1.6905, "step": 1922 }, { "epoch": 0.17802464849852456, "grad_norm": 0.1591796875, "learning_rate": 0.02, "loss": 1.6875, "step": 1923 }, { "epoch": 0.17811722501880461, "grad_norm": 0.1552734375, "learning_rate": 0.02, "loss": 1.7383, "step": 1924 }, { "epoch": 0.17820980153908464, "grad_norm": 0.1572265625, "learning_rate": 0.02, "loss": 1.7046, "step": 1925 }, { "epoch": 0.1783023780593647, "grad_norm": 0.1572265625, "learning_rate": 0.02, "loss": 1.7725, "step": 1926 }, { "epoch": 0.17839495457964474, "grad_norm": 0.1552734375, "learning_rate": 0.02, "loss": 1.7222, "step": 1927 }, { "epoch": 0.17848753109992477, "grad_norm": 0.1640625, "learning_rate": 0.02, "loss": 1.7845, "step": 1928 }, { "epoch": 0.17858010762020482, "grad_norm": 0.150390625, "learning_rate": 0.02, "loss": 1.7074, "step": 1929 }, { "epoch": 0.17867268414048487, "grad_norm": 0.1513671875, "learning_rate": 0.02, "loss": 1.6691, "step": 1930 }, { "epoch": 0.17876526066076492, "grad_norm": 0.1552734375, "learning_rate": 0.02, "loss": 1.7014, "step": 1931 }, { "epoch": 0.17885783718104495, "grad_norm": 0.1494140625, "learning_rate": 0.02, "loss": 1.6983, "step": 1932 }, { "epoch": 0.178950413701325, "grad_norm": 0.1591796875, "learning_rate": 0.02, "loss": 1.6964, "step": 1933 }, { "epoch": 0.17904299022160505, "grad_norm": 0.1669921875, "learning_rate": 0.02, "loss": 1.7239, "step": 1934 }, { "epoch": 0.17913556674188508, "grad_norm": 0.171875, "learning_rate": 0.02, "loss": 1.7461, "step": 1935 }, { "epoch": 0.17922814326216513, "grad_norm": 0.1650390625, "learning_rate": 0.02, "loss": 1.7426, "step": 1936 }, { "epoch": 0.17932071978244518, "grad_norm": 0.1630859375, "learning_rate": 0.02, "loss": 1.6839, "step": 1937 }, { "epoch": 0.17941329630272523, "grad_norm": 0.15625, "learning_rate": 0.02, "loss": 1.6973, "step": 1938 }, { "epoch": 0.17950587282300526, "grad_norm": 0.16796875, "learning_rate": 0.02, "loss": 1.7558, "step": 1939 }, { "epoch": 0.1795984493432853, "grad_norm": 0.150390625, "learning_rate": 0.02, "loss": 1.6441, "step": 1940 }, { "epoch": 0.17969102586356536, "grad_norm": 0.1630859375, "learning_rate": 0.02, "loss": 1.6938, "step": 1941 }, { "epoch": 0.1797836023838454, "grad_norm": 0.1552734375, "learning_rate": 0.02, "loss": 1.7175, "step": 1942 }, { "epoch": 0.17987617890412544, "grad_norm": 0.16796875, "learning_rate": 0.02, "loss": 1.7586, "step": 1943 }, { "epoch": 0.1799687554244055, "grad_norm": 0.154296875, "learning_rate": 0.02, "loss": 1.7102, "step": 1944 }, { "epoch": 0.18006133194468552, "grad_norm": 0.16015625, "learning_rate": 0.02, "loss": 1.7301, "step": 1945 }, { "epoch": 0.18015390846496557, "grad_norm": 0.162109375, "learning_rate": 0.02, "loss": 1.698, "step": 1946 }, { "epoch": 0.18024648498524562, "grad_norm": 0.1650390625, "learning_rate": 0.02, "loss": 1.7026, "step": 1947 }, { "epoch": 0.18033906150552567, "grad_norm": 0.166015625, "learning_rate": 0.02, "loss": 1.72, "step": 1948 }, { "epoch": 0.1804316380258057, "grad_norm": 0.169921875, "learning_rate": 0.02, "loss": 1.7046, "step": 1949 }, { "epoch": 0.18052421454608575, "grad_norm": 0.1630859375, "learning_rate": 0.02, "loss": 1.6986, "step": 1950 }, { "epoch": 0.1806167910663658, "grad_norm": 0.1640625, "learning_rate": 0.02, "loss": 1.6592, "step": 1951 }, { "epoch": 0.18070936758664582, "grad_norm": 0.1591796875, "learning_rate": 0.02, "loss": 1.7238, "step": 1952 }, { "epoch": 0.18080194410692588, "grad_norm": 0.1533203125, "learning_rate": 0.02, "loss": 1.685, "step": 1953 }, { "epoch": 0.18089452062720593, "grad_norm": 0.146484375, "learning_rate": 0.02, "loss": 1.6963, "step": 1954 }, { "epoch": 0.18098709714748598, "grad_norm": 0.1474609375, "learning_rate": 0.02, "loss": 1.6632, "step": 1955 }, { "epoch": 0.181079673667766, "grad_norm": 0.1494140625, "learning_rate": 0.02, "loss": 1.6768, "step": 1956 }, { "epoch": 0.18117225018804606, "grad_norm": 0.1484375, "learning_rate": 0.02, "loss": 1.6919, "step": 1957 }, { "epoch": 0.1812648267083261, "grad_norm": 0.1640625, "learning_rate": 0.02, "loss": 1.7618, "step": 1958 }, { "epoch": 0.18135740322860613, "grad_norm": 0.1484375, "learning_rate": 0.02, "loss": 1.6658, "step": 1959 }, { "epoch": 0.1814499797488862, "grad_norm": 0.1513671875, "learning_rate": 0.02, "loss": 1.7078, "step": 1960 }, { "epoch": 0.18154255626916624, "grad_norm": 0.1630859375, "learning_rate": 0.02, "loss": 1.6669, "step": 1961 }, { "epoch": 0.18163513278944626, "grad_norm": 0.166015625, "learning_rate": 0.02, "loss": 1.6942, "step": 1962 }, { "epoch": 0.18172770930972632, "grad_norm": 0.1533203125, "learning_rate": 0.02, "loss": 1.6821, "step": 1963 }, { "epoch": 0.18182028583000637, "grad_norm": 0.150390625, "learning_rate": 0.02, "loss": 1.6622, "step": 1964 }, { "epoch": 0.18191286235028642, "grad_norm": 0.15625, "learning_rate": 0.02, "loss": 1.6843, "step": 1965 }, { "epoch": 0.18200543887056644, "grad_norm": 0.15625, "learning_rate": 0.02, "loss": 1.6453, "step": 1966 }, { "epoch": 0.1820980153908465, "grad_norm": 0.1533203125, "learning_rate": 0.02, "loss": 1.6997, "step": 1967 }, { "epoch": 0.18219059191112655, "grad_norm": 0.15234375, "learning_rate": 0.02, "loss": 1.6407, "step": 1968 }, { "epoch": 0.18228316843140657, "grad_norm": 0.166015625, "learning_rate": 0.02, "loss": 1.7293, "step": 1969 }, { "epoch": 0.18237574495168662, "grad_norm": 0.1640625, "learning_rate": 0.02, "loss": 1.7052, "step": 1970 }, { "epoch": 0.18246832147196668, "grad_norm": 0.1708984375, "learning_rate": 0.02, "loss": 1.7328, "step": 1971 }, { "epoch": 0.18256089799224673, "grad_norm": 0.16796875, "learning_rate": 0.02, "loss": 1.6089, "step": 1972 }, { "epoch": 0.18265347451252675, "grad_norm": 0.1630859375, "learning_rate": 0.02, "loss": 1.6946, "step": 1973 }, { "epoch": 0.1827460510328068, "grad_norm": 0.1640625, "learning_rate": 0.02, "loss": 1.6757, "step": 1974 }, { "epoch": 0.18283862755308686, "grad_norm": 0.1572265625, "learning_rate": 0.02, "loss": 1.709, "step": 1975 }, { "epoch": 0.18293120407336688, "grad_norm": 0.1591796875, "learning_rate": 0.02, "loss": 1.7105, "step": 1976 }, { "epoch": 0.18302378059364693, "grad_norm": 0.1611328125, "learning_rate": 0.02, "loss": 1.6856, "step": 1977 }, { "epoch": 0.183116357113927, "grad_norm": 0.15625, "learning_rate": 0.02, "loss": 1.6847, "step": 1978 }, { "epoch": 0.183208933634207, "grad_norm": 0.15234375, "learning_rate": 0.02, "loss": 1.6579, "step": 1979 }, { "epoch": 0.18330151015448706, "grad_norm": 0.1640625, "learning_rate": 0.02, "loss": 1.7462, "step": 1980 }, { "epoch": 0.18339408667476712, "grad_norm": 0.1591796875, "learning_rate": 0.02, "loss": 1.7133, "step": 1981 }, { "epoch": 0.18348666319504717, "grad_norm": 0.142578125, "learning_rate": 0.02, "loss": 1.6938, "step": 1982 }, { "epoch": 0.1835792397153272, "grad_norm": 0.146484375, "learning_rate": 0.02, "loss": 1.7614, "step": 1983 }, { "epoch": 0.18367181623560724, "grad_norm": 0.15234375, "learning_rate": 0.02, "loss": 1.6767, "step": 1984 }, { "epoch": 0.1837643927558873, "grad_norm": 0.1552734375, "learning_rate": 0.02, "loss": 1.6718, "step": 1985 }, { "epoch": 0.18385696927616732, "grad_norm": 0.1494140625, "learning_rate": 0.02, "loss": 1.685, "step": 1986 }, { "epoch": 0.18394954579644737, "grad_norm": 0.1533203125, "learning_rate": 0.02, "loss": 1.7522, "step": 1987 }, { "epoch": 0.18404212231672742, "grad_norm": 0.150390625, "learning_rate": 0.02, "loss": 1.6803, "step": 1988 }, { "epoch": 0.18413469883700748, "grad_norm": 0.1416015625, "learning_rate": 0.02, "loss": 1.6179, "step": 1989 }, { "epoch": 0.1842272753572875, "grad_norm": 0.1416015625, "learning_rate": 0.02, "loss": 1.6848, "step": 1990 }, { "epoch": 0.18431985187756755, "grad_norm": 0.1455078125, "learning_rate": 0.02, "loss": 1.7282, "step": 1991 }, { "epoch": 0.1844124283978476, "grad_norm": 0.1416015625, "learning_rate": 0.02, "loss": 1.6806, "step": 1992 }, { "epoch": 0.18450500491812763, "grad_norm": 0.15234375, "learning_rate": 0.02, "loss": 1.7804, "step": 1993 }, { "epoch": 0.18459758143840768, "grad_norm": 0.158203125, "learning_rate": 0.02, "loss": 1.7467, "step": 1994 }, { "epoch": 0.18469015795868773, "grad_norm": 0.154296875, "learning_rate": 0.02, "loss": 1.6922, "step": 1995 }, { "epoch": 0.18478273447896776, "grad_norm": 0.1533203125, "learning_rate": 0.02, "loss": 1.6524, "step": 1996 }, { "epoch": 0.1848753109992478, "grad_norm": 0.154296875, "learning_rate": 0.02, "loss": 1.6851, "step": 1997 }, { "epoch": 0.18496788751952786, "grad_norm": 0.1552734375, "learning_rate": 0.02, "loss": 1.713, "step": 1998 }, { "epoch": 0.18506046403980791, "grad_norm": 0.1484375, "learning_rate": 0.02, "loss": 1.6675, "step": 1999 }, { "epoch": 0.18515304056008794, "grad_norm": 0.1689453125, "learning_rate": 0.02, "loss": 1.7356, "step": 2000 }, { "epoch": 0.185245617080368, "grad_norm": 0.1650390625, "learning_rate": 0.02, "loss": 1.7156, "step": 2001 }, { "epoch": 0.18533819360064804, "grad_norm": 0.1552734375, "learning_rate": 0.02, "loss": 1.6602, "step": 2002 }, { "epoch": 0.18543077012092807, "grad_norm": 0.1611328125, "learning_rate": 0.02, "loss": 1.7262, "step": 2003 }, { "epoch": 0.18552334664120812, "grad_norm": 0.1533203125, "learning_rate": 0.02, "loss": 1.6697, "step": 2004 }, { "epoch": 0.18561592316148817, "grad_norm": 0.146484375, "learning_rate": 0.02, "loss": 1.7074, "step": 2005 }, { "epoch": 0.18570849968176822, "grad_norm": 0.1572265625, "learning_rate": 0.02, "loss": 1.7156, "step": 2006 }, { "epoch": 0.18580107620204825, "grad_norm": 0.16015625, "learning_rate": 0.02, "loss": 1.7394, "step": 2007 }, { "epoch": 0.1858936527223283, "grad_norm": 0.16015625, "learning_rate": 0.02, "loss": 1.71, "step": 2008 }, { "epoch": 0.18598622924260835, "grad_norm": 0.15234375, "learning_rate": 0.02, "loss": 1.7212, "step": 2009 }, { "epoch": 0.18607880576288838, "grad_norm": 0.166015625, "learning_rate": 0.02, "loss": 1.6844, "step": 2010 }, { "epoch": 0.18617138228316843, "grad_norm": 0.1513671875, "learning_rate": 0.02, "loss": 1.6101, "step": 2011 }, { "epoch": 0.18626395880344848, "grad_norm": 0.1552734375, "learning_rate": 0.02, "loss": 1.7118, "step": 2012 }, { "epoch": 0.1863565353237285, "grad_norm": 0.171875, "learning_rate": 0.02, "loss": 1.7459, "step": 2013 }, { "epoch": 0.18644911184400856, "grad_norm": 0.166015625, "learning_rate": 0.02, "loss": 1.7813, "step": 2014 }, { "epoch": 0.1865416883642886, "grad_norm": 0.166015625, "learning_rate": 0.02, "loss": 1.6308, "step": 2015 }, { "epoch": 0.18663426488456866, "grad_norm": 0.1611328125, "learning_rate": 0.02, "loss": 1.7353, "step": 2016 }, { "epoch": 0.1867268414048487, "grad_norm": 0.158203125, "learning_rate": 0.02, "loss": 1.7122, "step": 2017 }, { "epoch": 0.18681941792512874, "grad_norm": 0.1640625, "learning_rate": 0.02, "loss": 1.6964, "step": 2018 }, { "epoch": 0.1869119944454088, "grad_norm": 0.154296875, "learning_rate": 0.02, "loss": 1.6373, "step": 2019 }, { "epoch": 0.18700457096568882, "grad_norm": 0.1552734375, "learning_rate": 0.02, "loss": 1.7122, "step": 2020 }, { "epoch": 0.18709714748596887, "grad_norm": 0.154296875, "learning_rate": 0.02, "loss": 1.644, "step": 2021 }, { "epoch": 0.18718972400624892, "grad_norm": 0.1533203125, "learning_rate": 0.02, "loss": 1.7326, "step": 2022 }, { "epoch": 0.18728230052652897, "grad_norm": 0.15625, "learning_rate": 0.02, "loss": 1.7191, "step": 2023 }, { "epoch": 0.187374877046809, "grad_norm": 0.1650390625, "learning_rate": 0.02, "loss": 1.7629, "step": 2024 }, { "epoch": 0.18746745356708905, "grad_norm": 0.1640625, "learning_rate": 0.02, "loss": 1.7203, "step": 2025 }, { "epoch": 0.1875600300873691, "grad_norm": 0.1669921875, "learning_rate": 0.02, "loss": 1.7729, "step": 2026 }, { "epoch": 0.18765260660764913, "grad_norm": 0.1533203125, "learning_rate": 0.02, "loss": 1.6184, "step": 2027 }, { "epoch": 0.18774518312792918, "grad_norm": 0.154296875, "learning_rate": 0.02, "loss": 1.6972, "step": 2028 }, { "epoch": 0.18783775964820923, "grad_norm": 0.1416015625, "learning_rate": 0.02, "loss": 1.636, "step": 2029 }, { "epoch": 0.18793033616848925, "grad_norm": 0.154296875, "learning_rate": 0.02, "loss": 1.7116, "step": 2030 }, { "epoch": 0.1880229126887693, "grad_norm": 0.166015625, "learning_rate": 0.02, "loss": 1.6986, "step": 2031 }, { "epoch": 0.18811548920904936, "grad_norm": 0.16796875, "learning_rate": 0.02, "loss": 1.6926, "step": 2032 }, { "epoch": 0.1882080657293294, "grad_norm": 0.146484375, "learning_rate": 0.02, "loss": 1.632, "step": 2033 }, { "epoch": 0.18830064224960943, "grad_norm": 0.1533203125, "learning_rate": 0.02, "loss": 1.7037, "step": 2034 }, { "epoch": 0.1883932187698895, "grad_norm": 0.1494140625, "learning_rate": 0.02, "loss": 1.7188, "step": 2035 }, { "epoch": 0.18848579529016954, "grad_norm": 0.15625, "learning_rate": 0.02, "loss": 1.6342, "step": 2036 }, { "epoch": 0.18857837181044956, "grad_norm": 0.154296875, "learning_rate": 0.02, "loss": 1.7019, "step": 2037 }, { "epoch": 0.18867094833072962, "grad_norm": 0.16015625, "learning_rate": 0.02, "loss": 1.7128, "step": 2038 }, { "epoch": 0.18876352485100967, "grad_norm": 0.1640625, "learning_rate": 0.02, "loss": 1.6879, "step": 2039 }, { "epoch": 0.18885610137128972, "grad_norm": 0.1728515625, "learning_rate": 0.02, "loss": 1.7407, "step": 2040 }, { "epoch": 0.18894867789156974, "grad_norm": 0.171875, "learning_rate": 0.02, "loss": 1.7686, "step": 2041 }, { "epoch": 0.1890412544118498, "grad_norm": 0.15625, "learning_rate": 0.02, "loss": 1.7048, "step": 2042 }, { "epoch": 0.18913383093212985, "grad_norm": 0.15625, "learning_rate": 0.02, "loss": 1.7504, "step": 2043 }, { "epoch": 0.18922640745240987, "grad_norm": 0.1533203125, "learning_rate": 0.02, "loss": 1.6752, "step": 2044 }, { "epoch": 0.18931898397268992, "grad_norm": 0.1435546875, "learning_rate": 0.02, "loss": 1.6606, "step": 2045 }, { "epoch": 0.18941156049296998, "grad_norm": 0.15625, "learning_rate": 0.02, "loss": 1.7154, "step": 2046 }, { "epoch": 0.18950413701325, "grad_norm": 0.14453125, "learning_rate": 0.02, "loss": 1.6755, "step": 2047 }, { "epoch": 0.18959671353353005, "grad_norm": 0.146484375, "learning_rate": 0.02, "loss": 1.6865, "step": 2048 }, { "epoch": 0.1896892900538101, "grad_norm": 0.1494140625, "learning_rate": 0.02, "loss": 1.671, "step": 2049 }, { "epoch": 0.18978186657409016, "grad_norm": 0.1611328125, "learning_rate": 0.02, "loss": 1.7964, "step": 2050 }, { "epoch": 0.18987444309437018, "grad_norm": 0.1552734375, "learning_rate": 0.02, "loss": 1.7116, "step": 2051 }, { "epoch": 0.18996701961465023, "grad_norm": 0.15234375, "learning_rate": 0.02, "loss": 1.7394, "step": 2052 }, { "epoch": 0.1900595961349303, "grad_norm": 0.1611328125, "learning_rate": 0.02, "loss": 1.7398, "step": 2053 }, { "epoch": 0.1901521726552103, "grad_norm": 0.1552734375, "learning_rate": 0.02, "loss": 1.72, "step": 2054 }, { "epoch": 0.19024474917549036, "grad_norm": 0.1572265625, "learning_rate": 0.02, "loss": 1.6506, "step": 2055 }, { "epoch": 0.19033732569577042, "grad_norm": 0.1416015625, "learning_rate": 0.02, "loss": 1.6708, "step": 2056 }, { "epoch": 0.19042990221605047, "grad_norm": 0.1533203125, "learning_rate": 0.02, "loss": 1.6983, "step": 2057 }, { "epoch": 0.1905224787363305, "grad_norm": 0.16015625, "learning_rate": 0.02, "loss": 1.6789, "step": 2058 }, { "epoch": 0.19061505525661054, "grad_norm": 0.154296875, "learning_rate": 0.02, "loss": 1.7309, "step": 2059 }, { "epoch": 0.1907076317768906, "grad_norm": 0.15234375, "learning_rate": 0.02, "loss": 1.7047, "step": 2060 }, { "epoch": 0.19080020829717062, "grad_norm": 0.154296875, "learning_rate": 0.02, "loss": 1.6947, "step": 2061 }, { "epoch": 0.19089278481745067, "grad_norm": 0.150390625, "learning_rate": 0.02, "loss": 1.7503, "step": 2062 }, { "epoch": 0.19098536133773072, "grad_norm": 0.1591796875, "learning_rate": 0.02, "loss": 1.7178, "step": 2063 }, { "epoch": 0.19107793785801075, "grad_norm": 0.154296875, "learning_rate": 0.02, "loss": 1.6914, "step": 2064 }, { "epoch": 0.1911705143782908, "grad_norm": 0.1650390625, "learning_rate": 0.02, "loss": 1.7948, "step": 2065 }, { "epoch": 0.19126309089857085, "grad_norm": 0.1572265625, "learning_rate": 0.02, "loss": 1.7001, "step": 2066 }, { "epoch": 0.1913556674188509, "grad_norm": 0.16015625, "learning_rate": 0.02, "loss": 1.6825, "step": 2067 }, { "epoch": 0.19144824393913093, "grad_norm": 0.1728515625, "learning_rate": 0.02, "loss": 1.6968, "step": 2068 }, { "epoch": 0.19154082045941098, "grad_norm": 0.1611328125, "learning_rate": 0.02, "loss": 1.7039, "step": 2069 }, { "epoch": 0.19163339697969103, "grad_norm": 0.162109375, "learning_rate": 0.02, "loss": 1.6617, "step": 2070 }, { "epoch": 0.19172597349997106, "grad_norm": 0.177734375, "learning_rate": 0.02, "loss": 1.7288, "step": 2071 }, { "epoch": 0.1918185500202511, "grad_norm": 0.15234375, "learning_rate": 0.02, "loss": 1.6488, "step": 2072 }, { "epoch": 0.19191112654053116, "grad_norm": 0.16015625, "learning_rate": 0.02, "loss": 1.6898, "step": 2073 }, { "epoch": 0.19200370306081121, "grad_norm": 0.16015625, "learning_rate": 0.02, "loss": 1.7528, "step": 2074 }, { "epoch": 0.19209627958109124, "grad_norm": 0.16015625, "learning_rate": 0.02, "loss": 1.672, "step": 2075 }, { "epoch": 0.1921888561013713, "grad_norm": 0.1552734375, "learning_rate": 0.02, "loss": 1.6858, "step": 2076 }, { "epoch": 0.19228143262165134, "grad_norm": 0.16015625, "learning_rate": 0.02, "loss": 1.6184, "step": 2077 }, { "epoch": 0.19237400914193137, "grad_norm": 0.16015625, "learning_rate": 0.02, "loss": 1.7549, "step": 2078 }, { "epoch": 0.19246658566221142, "grad_norm": 0.1572265625, "learning_rate": 0.02, "loss": 1.7248, "step": 2079 }, { "epoch": 0.19255916218249147, "grad_norm": 0.1640625, "learning_rate": 0.02, "loss": 1.7108, "step": 2080 }, { "epoch": 0.1926517387027715, "grad_norm": 0.16015625, "learning_rate": 0.02, "loss": 1.6804, "step": 2081 }, { "epoch": 0.19274431522305155, "grad_norm": 0.15625, "learning_rate": 0.02, "loss": 1.6753, "step": 2082 }, { "epoch": 0.1928368917433316, "grad_norm": 0.1494140625, "learning_rate": 0.02, "loss": 1.7203, "step": 2083 }, { "epoch": 0.19292946826361165, "grad_norm": 0.150390625, "learning_rate": 0.02, "loss": 1.6809, "step": 2084 }, { "epoch": 0.19302204478389168, "grad_norm": 0.15234375, "learning_rate": 0.02, "loss": 1.6447, "step": 2085 }, { "epoch": 0.19311462130417173, "grad_norm": 0.1591796875, "learning_rate": 0.02, "loss": 1.7363, "step": 2086 }, { "epoch": 0.19320719782445178, "grad_norm": 0.1416015625, "learning_rate": 0.02, "loss": 1.7091, "step": 2087 }, { "epoch": 0.1932997743447318, "grad_norm": 0.14453125, "learning_rate": 0.02, "loss": 1.7135, "step": 2088 }, { "epoch": 0.19339235086501186, "grad_norm": 0.1552734375, "learning_rate": 0.02, "loss": 1.7697, "step": 2089 }, { "epoch": 0.1934849273852919, "grad_norm": 0.150390625, "learning_rate": 0.02, "loss": 1.6556, "step": 2090 }, { "epoch": 0.19357750390557196, "grad_norm": 0.146484375, "learning_rate": 0.02, "loss": 1.6341, "step": 2091 }, { "epoch": 0.193670080425852, "grad_norm": 0.1533203125, "learning_rate": 0.02, "loss": 1.6572, "step": 2092 }, { "epoch": 0.19376265694613204, "grad_norm": 0.1728515625, "learning_rate": 0.02, "loss": 1.761, "step": 2093 }, { "epoch": 0.1938552334664121, "grad_norm": 0.1611328125, "learning_rate": 0.02, "loss": 1.7115, "step": 2094 }, { "epoch": 0.19394780998669212, "grad_norm": 0.16015625, "learning_rate": 0.02, "loss": 1.7205, "step": 2095 }, { "epoch": 0.19404038650697217, "grad_norm": 0.15234375, "learning_rate": 0.02, "loss": 1.6502, "step": 2096 }, { "epoch": 0.19413296302725222, "grad_norm": 0.1552734375, "learning_rate": 0.02, "loss": 1.7324, "step": 2097 }, { "epoch": 0.19422553954753224, "grad_norm": 0.1513671875, "learning_rate": 0.02, "loss": 1.669, "step": 2098 }, { "epoch": 0.1943181160678123, "grad_norm": 0.162109375, "learning_rate": 0.02, "loss": 1.7379, "step": 2099 }, { "epoch": 0.19441069258809235, "grad_norm": 0.15625, "learning_rate": 0.02, "loss": 1.6664, "step": 2100 }, { "epoch": 0.1945032691083724, "grad_norm": 0.1455078125, "learning_rate": 0.02, "loss": 1.5892, "step": 2101 }, { "epoch": 0.19459584562865243, "grad_norm": 0.1455078125, "learning_rate": 0.02, "loss": 1.7528, "step": 2102 }, { "epoch": 0.19468842214893248, "grad_norm": 0.1455078125, "learning_rate": 0.02, "loss": 1.6901, "step": 2103 }, { "epoch": 0.19478099866921253, "grad_norm": 0.1513671875, "learning_rate": 0.02, "loss": 1.6666, "step": 2104 }, { "epoch": 0.19487357518949255, "grad_norm": 0.1455078125, "learning_rate": 0.02, "loss": 1.7132, "step": 2105 }, { "epoch": 0.1949661517097726, "grad_norm": 0.142578125, "learning_rate": 0.02, "loss": 1.6642, "step": 2106 }, { "epoch": 0.19505872823005266, "grad_norm": 0.16015625, "learning_rate": 0.02, "loss": 1.7328, "step": 2107 }, { "epoch": 0.1951513047503327, "grad_norm": 0.150390625, "learning_rate": 0.02, "loss": 1.744, "step": 2108 }, { "epoch": 0.19524388127061273, "grad_norm": 0.1474609375, "learning_rate": 0.02, "loss": 1.7135, "step": 2109 }, { "epoch": 0.1953364577908928, "grad_norm": 0.1416015625, "learning_rate": 0.02, "loss": 1.7284, "step": 2110 }, { "epoch": 0.19542903431117284, "grad_norm": 0.1455078125, "learning_rate": 0.02, "loss": 1.6999, "step": 2111 }, { "epoch": 0.19552161083145286, "grad_norm": 0.15234375, "learning_rate": 0.02, "loss": 1.7638, "step": 2112 }, { "epoch": 0.19561418735173292, "grad_norm": 0.154296875, "learning_rate": 0.02, "loss": 1.7332, "step": 2113 }, { "epoch": 0.19570676387201297, "grad_norm": 0.14453125, "learning_rate": 0.02, "loss": 1.7082, "step": 2114 }, { "epoch": 0.195799340392293, "grad_norm": 0.1611328125, "learning_rate": 0.02, "loss": 1.7836, "step": 2115 }, { "epoch": 0.19589191691257304, "grad_norm": 0.1611328125, "learning_rate": 0.02, "loss": 1.6755, "step": 2116 }, { "epoch": 0.1959844934328531, "grad_norm": 0.1591796875, "learning_rate": 0.02, "loss": 1.6913, "step": 2117 }, { "epoch": 0.19607706995313315, "grad_norm": 0.16015625, "learning_rate": 0.02, "loss": 1.729, "step": 2118 }, { "epoch": 0.19616964647341317, "grad_norm": 0.1630859375, "learning_rate": 0.02, "loss": 1.6792, "step": 2119 }, { "epoch": 0.19626222299369322, "grad_norm": 0.150390625, "learning_rate": 0.02, "loss": 1.7056, "step": 2120 }, { "epoch": 0.19635479951397328, "grad_norm": 0.166015625, "learning_rate": 0.02, "loss": 1.7487, "step": 2121 }, { "epoch": 0.1964473760342533, "grad_norm": 0.1689453125, "learning_rate": 0.02, "loss": 1.7112, "step": 2122 }, { "epoch": 0.19653995255453335, "grad_norm": 0.18359375, "learning_rate": 0.02, "loss": 1.7427, "step": 2123 }, { "epoch": 0.1966325290748134, "grad_norm": 0.1533203125, "learning_rate": 0.02, "loss": 1.6512, "step": 2124 }, { "epoch": 0.19672510559509346, "grad_norm": 0.1484375, "learning_rate": 0.02, "loss": 1.6859, "step": 2125 }, { "epoch": 0.19681768211537348, "grad_norm": 0.158203125, "learning_rate": 0.02, "loss": 1.7452, "step": 2126 }, { "epoch": 0.19691025863565353, "grad_norm": 0.1640625, "learning_rate": 0.02, "loss": 1.7576, "step": 2127 }, { "epoch": 0.1970028351559336, "grad_norm": 0.1484375, "learning_rate": 0.02, "loss": 1.6885, "step": 2128 }, { "epoch": 0.1970954116762136, "grad_norm": 0.15234375, "learning_rate": 0.02, "loss": 1.6833, "step": 2129 }, { "epoch": 0.19718798819649366, "grad_norm": 0.158203125, "learning_rate": 0.02, "loss": 1.7472, "step": 2130 }, { "epoch": 0.19728056471677372, "grad_norm": 0.166015625, "learning_rate": 0.02, "loss": 1.6968, "step": 2131 }, { "epoch": 0.19737314123705374, "grad_norm": 0.1552734375, "learning_rate": 0.02, "loss": 1.7243, "step": 2132 }, { "epoch": 0.1974657177573338, "grad_norm": 0.1552734375, "learning_rate": 0.02, "loss": 1.7443, "step": 2133 }, { "epoch": 0.19755829427761384, "grad_norm": 0.1416015625, "learning_rate": 0.02, "loss": 1.7289, "step": 2134 }, { "epoch": 0.1976508707978939, "grad_norm": 0.15625, "learning_rate": 0.02, "loss": 1.6307, "step": 2135 }, { "epoch": 0.19774344731817392, "grad_norm": 0.1611328125, "learning_rate": 0.02, "loss": 1.6646, "step": 2136 }, { "epoch": 0.19783602383845397, "grad_norm": 0.1455078125, "learning_rate": 0.02, "loss": 1.6737, "step": 2137 }, { "epoch": 0.19792860035873402, "grad_norm": 0.140625, "learning_rate": 0.02, "loss": 1.6805, "step": 2138 }, { "epoch": 0.19802117687901405, "grad_norm": 0.1533203125, "learning_rate": 0.02, "loss": 1.66, "step": 2139 }, { "epoch": 0.1981137533992941, "grad_norm": 0.1455078125, "learning_rate": 0.02, "loss": 1.6444, "step": 2140 }, { "epoch": 0.19820632991957415, "grad_norm": 0.1611328125, "learning_rate": 0.02, "loss": 1.7332, "step": 2141 }, { "epoch": 0.1982989064398542, "grad_norm": 0.16015625, "learning_rate": 0.02, "loss": 1.6902, "step": 2142 }, { "epoch": 0.19839148296013423, "grad_norm": 0.1591796875, "learning_rate": 0.02, "loss": 1.7389, "step": 2143 }, { "epoch": 0.19848405948041428, "grad_norm": 2.71875, "learning_rate": 0.02, "loss": 1.742, "step": 2144 }, { "epoch": 0.19857663600069433, "grad_norm": 0.1416015625, "learning_rate": 0.02, "loss": 1.6547, "step": 2145 }, { "epoch": 0.19866921252097436, "grad_norm": 0.177734375, "learning_rate": 0.02, "loss": 1.7349, "step": 2146 }, { "epoch": 0.1987617890412544, "grad_norm": 0.1474609375, "learning_rate": 0.02, "loss": 1.6321, "step": 2147 }, { "epoch": 0.19885436556153446, "grad_norm": 0.1630859375, "learning_rate": 0.02, "loss": 1.6685, "step": 2148 }, { "epoch": 0.1989469420818145, "grad_norm": 0.154296875, "learning_rate": 0.02, "loss": 1.67, "step": 2149 }, { "epoch": 0.19903951860209454, "grad_norm": 0.1591796875, "learning_rate": 0.02, "loss": 1.7364, "step": 2150 }, { "epoch": 0.1991320951223746, "grad_norm": 0.1474609375, "learning_rate": 0.02, "loss": 1.6153, "step": 2151 }, { "epoch": 0.19922467164265464, "grad_norm": 0.1572265625, "learning_rate": 0.02, "loss": 1.6903, "step": 2152 }, { "epoch": 0.19931724816293467, "grad_norm": 0.16015625, "learning_rate": 0.02, "loss": 1.6518, "step": 2153 }, { "epoch": 0.19940982468321472, "grad_norm": 0.1630859375, "learning_rate": 0.02, "loss": 1.7578, "step": 2154 }, { "epoch": 0.19950240120349477, "grad_norm": 0.1572265625, "learning_rate": 0.02, "loss": 1.6931, "step": 2155 }, { "epoch": 0.1995949777237748, "grad_norm": 0.1591796875, "learning_rate": 0.02, "loss": 1.6951, "step": 2156 }, { "epoch": 0.19968755424405485, "grad_norm": 0.1455078125, "learning_rate": 0.02, "loss": 1.6124, "step": 2157 }, { "epoch": 0.1997801307643349, "grad_norm": 0.14453125, "learning_rate": 0.02, "loss": 1.6833, "step": 2158 }, { "epoch": 0.19987270728461495, "grad_norm": 0.1513671875, "learning_rate": 0.02, "loss": 1.7332, "step": 2159 }, { "epoch": 0.19996528380489498, "grad_norm": 0.1591796875, "learning_rate": 0.02, "loss": 1.6938, "step": 2160 }, { "epoch": 0.20005786032517503, "grad_norm": 0.1630859375, "learning_rate": 0.02, "loss": 1.7023, "step": 2161 }, { "epoch": 0.20015043684545508, "grad_norm": 0.146484375, "learning_rate": 0.02, "loss": 1.679, "step": 2162 }, { "epoch": 0.2002430133657351, "grad_norm": 0.15625, "learning_rate": 0.02, "loss": 1.7224, "step": 2163 }, { "epoch": 0.20033558988601516, "grad_norm": 0.1474609375, "learning_rate": 0.02, "loss": 1.695, "step": 2164 }, { "epoch": 0.2004281664062952, "grad_norm": 0.154296875, "learning_rate": 0.02, "loss": 1.6812, "step": 2165 }, { "epoch": 0.20052074292657524, "grad_norm": 0.1533203125, "learning_rate": 0.02, "loss": 1.7512, "step": 2166 }, { "epoch": 0.2006133194468553, "grad_norm": 0.150390625, "learning_rate": 0.02, "loss": 1.693, "step": 2167 }, { "epoch": 0.20070589596713534, "grad_norm": 0.1533203125, "learning_rate": 0.02, "loss": 1.7483, "step": 2168 }, { "epoch": 0.2007984724874154, "grad_norm": 0.171875, "learning_rate": 0.02, "loss": 1.7088, "step": 2169 }, { "epoch": 0.20089104900769542, "grad_norm": 0.162109375, "learning_rate": 0.02, "loss": 1.7216, "step": 2170 }, { "epoch": 0.20098362552797547, "grad_norm": 0.1591796875, "learning_rate": 0.02, "loss": 1.6827, "step": 2171 }, { "epoch": 0.20107620204825552, "grad_norm": 0.1552734375, "learning_rate": 0.02, "loss": 1.6509, "step": 2172 }, { "epoch": 0.20116877856853554, "grad_norm": 0.158203125, "learning_rate": 0.02, "loss": 1.7076, "step": 2173 }, { "epoch": 0.2012613550888156, "grad_norm": 0.1484375, "learning_rate": 0.02, "loss": 1.678, "step": 2174 }, { "epoch": 0.20135393160909565, "grad_norm": 0.1533203125, "learning_rate": 0.02, "loss": 1.6815, "step": 2175 }, { "epoch": 0.2014465081293757, "grad_norm": 0.154296875, "learning_rate": 0.02, "loss": 1.6827, "step": 2176 }, { "epoch": 0.20153908464965573, "grad_norm": 0.154296875, "learning_rate": 0.02, "loss": 1.6789, "step": 2177 }, { "epoch": 0.20163166116993578, "grad_norm": 0.1474609375, "learning_rate": 0.02, "loss": 1.6442, "step": 2178 }, { "epoch": 0.20172423769021583, "grad_norm": 0.150390625, "learning_rate": 0.02, "loss": 1.6823, "step": 2179 }, { "epoch": 0.20181681421049585, "grad_norm": 0.1513671875, "learning_rate": 0.02, "loss": 1.6828, "step": 2180 }, { "epoch": 0.2019093907307759, "grad_norm": 0.1474609375, "learning_rate": 0.02, "loss": 1.657, "step": 2181 }, { "epoch": 0.20200196725105596, "grad_norm": 0.1455078125, "learning_rate": 0.02, "loss": 1.744, "step": 2182 }, { "epoch": 0.20209454377133598, "grad_norm": 0.1416015625, "learning_rate": 0.02, "loss": 1.6591, "step": 2183 }, { "epoch": 0.20218712029161603, "grad_norm": 0.1396484375, "learning_rate": 0.02, "loss": 1.6668, "step": 2184 }, { "epoch": 0.2022796968118961, "grad_norm": 0.1435546875, "learning_rate": 0.02, "loss": 1.6198, "step": 2185 }, { "epoch": 0.20237227333217614, "grad_norm": 0.1494140625, "learning_rate": 0.02, "loss": 1.6713, "step": 2186 }, { "epoch": 0.20246484985245616, "grad_norm": 0.1611328125, "learning_rate": 0.02, "loss": 1.7345, "step": 2187 }, { "epoch": 0.20255742637273622, "grad_norm": 0.1484375, "learning_rate": 0.02, "loss": 1.6319, "step": 2188 }, { "epoch": 0.20265000289301627, "grad_norm": 0.1552734375, "learning_rate": 0.02, "loss": 1.6334, "step": 2189 }, { "epoch": 0.2027425794132963, "grad_norm": 0.158203125, "learning_rate": 0.02, "loss": 1.6899, "step": 2190 }, { "epoch": 0.20283515593357634, "grad_norm": 0.1435546875, "learning_rate": 0.02, "loss": 1.5566, "step": 2191 }, { "epoch": 0.2029277324538564, "grad_norm": 0.150390625, "learning_rate": 0.02, "loss": 1.7674, "step": 2192 }, { "epoch": 0.20302030897413642, "grad_norm": 0.166015625, "learning_rate": 0.02, "loss": 1.7312, "step": 2193 }, { "epoch": 0.20311288549441647, "grad_norm": 0.150390625, "learning_rate": 0.02, "loss": 1.6664, "step": 2194 }, { "epoch": 0.20320546201469653, "grad_norm": 0.1357421875, "learning_rate": 0.02, "loss": 1.6446, "step": 2195 }, { "epoch": 0.20329803853497658, "grad_norm": 0.1494140625, "learning_rate": 0.02, "loss": 1.6689, "step": 2196 }, { "epoch": 0.2033906150552566, "grad_norm": 0.1474609375, "learning_rate": 0.02, "loss": 1.6863, "step": 2197 }, { "epoch": 0.20348319157553665, "grad_norm": 0.146484375, "learning_rate": 0.02, "loss": 1.6405, "step": 2198 }, { "epoch": 0.2035757680958167, "grad_norm": 0.1533203125, "learning_rate": 0.02, "loss": 1.6826, "step": 2199 }, { "epoch": 0.20366834461609673, "grad_norm": 0.1572265625, "learning_rate": 0.02, "loss": 1.7133, "step": 2200 }, { "epoch": 0.20376092113637678, "grad_norm": 0.1689453125, "learning_rate": 0.02, "loss": 1.7858, "step": 2201 }, { "epoch": 0.20385349765665683, "grad_norm": 0.1494140625, "learning_rate": 0.02, "loss": 1.6514, "step": 2202 }, { "epoch": 0.2039460741769369, "grad_norm": 0.1474609375, "learning_rate": 0.02, "loss": 1.6532, "step": 2203 }, { "epoch": 0.2040386506972169, "grad_norm": 0.162109375, "learning_rate": 0.02, "loss": 1.7098, "step": 2204 }, { "epoch": 0.20413122721749696, "grad_norm": 0.1689453125, "learning_rate": 0.02, "loss": 1.749, "step": 2205 }, { "epoch": 0.20422380373777702, "grad_norm": 0.158203125, "learning_rate": 0.02, "loss": 1.7086, "step": 2206 }, { "epoch": 0.20431638025805704, "grad_norm": 0.1650390625, "learning_rate": 0.02, "loss": 1.7345, "step": 2207 }, { "epoch": 0.2044089567783371, "grad_norm": 0.166015625, "learning_rate": 0.02, "loss": 1.6735, "step": 2208 }, { "epoch": 0.20450153329861714, "grad_norm": 0.154296875, "learning_rate": 0.02, "loss": 1.6244, "step": 2209 }, { "epoch": 0.20459410981889717, "grad_norm": 0.1494140625, "learning_rate": 0.02, "loss": 1.6507, "step": 2210 }, { "epoch": 0.20468668633917722, "grad_norm": 0.142578125, "learning_rate": 0.02, "loss": 1.5844, "step": 2211 }, { "epoch": 0.20477926285945727, "grad_norm": 0.1484375, "learning_rate": 0.02, "loss": 1.6182, "step": 2212 }, { "epoch": 0.20487183937973732, "grad_norm": 0.1513671875, "learning_rate": 0.02, "loss": 1.7161, "step": 2213 }, { "epoch": 0.20496441590001735, "grad_norm": 0.15625, "learning_rate": 0.02, "loss": 1.7438, "step": 2214 }, { "epoch": 0.2050569924202974, "grad_norm": 0.14453125, "learning_rate": 0.02, "loss": 1.5888, "step": 2215 }, { "epoch": 0.20514956894057745, "grad_norm": 0.140625, "learning_rate": 0.02, "loss": 1.7291, "step": 2216 }, { "epoch": 0.20524214546085748, "grad_norm": 0.142578125, "learning_rate": 0.02, "loss": 1.6678, "step": 2217 }, { "epoch": 0.20533472198113753, "grad_norm": 0.1435546875, "learning_rate": 0.02, "loss": 1.6798, "step": 2218 }, { "epoch": 0.20542729850141758, "grad_norm": 0.14453125, "learning_rate": 0.02, "loss": 1.6693, "step": 2219 }, { "epoch": 0.20551987502169763, "grad_norm": 0.1455078125, "learning_rate": 0.02, "loss": 1.6952, "step": 2220 }, { "epoch": 0.20561245154197766, "grad_norm": 0.1494140625, "learning_rate": 0.02, "loss": 1.6962, "step": 2221 }, { "epoch": 0.2057050280622577, "grad_norm": 0.1484375, "learning_rate": 0.02, "loss": 1.7078, "step": 2222 }, { "epoch": 0.20579760458253776, "grad_norm": 0.1484375, "learning_rate": 0.02, "loss": 1.6842, "step": 2223 }, { "epoch": 0.2058901811028178, "grad_norm": 0.14453125, "learning_rate": 0.02, "loss": 1.6564, "step": 2224 }, { "epoch": 0.20598275762309784, "grad_norm": 0.138671875, "learning_rate": 0.02, "loss": 1.6397, "step": 2225 }, { "epoch": 0.2060753341433779, "grad_norm": 0.1494140625, "learning_rate": 0.02, "loss": 1.6697, "step": 2226 }, { "epoch": 0.20616791066365792, "grad_norm": 0.15625, "learning_rate": 0.02, "loss": 1.7181, "step": 2227 }, { "epoch": 0.20626048718393797, "grad_norm": 0.154296875, "learning_rate": 0.02, "loss": 1.7259, "step": 2228 }, { "epoch": 0.20635306370421802, "grad_norm": 0.1533203125, "learning_rate": 0.02, "loss": 1.6679, "step": 2229 }, { "epoch": 0.20644564022449807, "grad_norm": 0.1552734375, "learning_rate": 0.02, "loss": 1.6626, "step": 2230 }, { "epoch": 0.2065382167447781, "grad_norm": 0.15625, "learning_rate": 0.02, "loss": 1.6985, "step": 2231 }, { "epoch": 0.20663079326505815, "grad_norm": 0.1494140625, "learning_rate": 0.02, "loss": 1.6018, "step": 2232 }, { "epoch": 0.2067233697853382, "grad_norm": 0.1533203125, "learning_rate": 0.02, "loss": 1.6282, "step": 2233 }, { "epoch": 0.20681594630561823, "grad_norm": 0.1552734375, "learning_rate": 0.02, "loss": 1.6331, "step": 2234 }, { "epoch": 0.20690852282589828, "grad_norm": 0.15234375, "learning_rate": 0.02, "loss": 1.6835, "step": 2235 }, { "epoch": 0.20700109934617833, "grad_norm": 0.1640625, "learning_rate": 0.02, "loss": 1.6722, "step": 2236 }, { "epoch": 0.20709367586645838, "grad_norm": 0.1669921875, "learning_rate": 0.02, "loss": 1.6795, "step": 2237 }, { "epoch": 0.2071862523867384, "grad_norm": 0.162109375, "learning_rate": 0.02, "loss": 1.7243, "step": 2238 }, { "epoch": 0.20727882890701846, "grad_norm": 0.158203125, "learning_rate": 0.02, "loss": 1.6204, "step": 2239 }, { "epoch": 0.2073714054272985, "grad_norm": 0.1591796875, "learning_rate": 0.02, "loss": 1.676, "step": 2240 }, { "epoch": 0.20746398194757854, "grad_norm": 0.1728515625, "learning_rate": 0.02, "loss": 1.6498, "step": 2241 }, { "epoch": 0.2075565584678586, "grad_norm": 0.1689453125, "learning_rate": 0.02, "loss": 1.6584, "step": 2242 }, { "epoch": 0.20764913498813864, "grad_norm": 0.1640625, "learning_rate": 0.02, "loss": 1.6253, "step": 2243 }, { "epoch": 0.20774171150841866, "grad_norm": 0.154296875, "learning_rate": 0.02, "loss": 1.6988, "step": 2244 }, { "epoch": 0.20783428802869872, "grad_norm": 0.1572265625, "learning_rate": 0.02, "loss": 1.6934, "step": 2245 }, { "epoch": 0.20792686454897877, "grad_norm": 0.1552734375, "learning_rate": 0.02, "loss": 1.6959, "step": 2246 }, { "epoch": 0.20801944106925882, "grad_norm": 0.1630859375, "learning_rate": 0.02, "loss": 1.7298, "step": 2247 }, { "epoch": 0.20811201758953884, "grad_norm": 0.1591796875, "learning_rate": 0.02, "loss": 1.7086, "step": 2248 }, { "epoch": 0.2082045941098189, "grad_norm": 0.16015625, "learning_rate": 0.02, "loss": 1.6933, "step": 2249 }, { "epoch": 0.20829717063009895, "grad_norm": 0.1484375, "learning_rate": 0.02, "loss": 1.6184, "step": 2250 }, { "epoch": 0.20838974715037897, "grad_norm": 0.1474609375, "learning_rate": 0.02, "loss": 1.6969, "step": 2251 }, { "epoch": 0.20848232367065903, "grad_norm": 0.1552734375, "learning_rate": 0.02, "loss": 1.7096, "step": 2252 }, { "epoch": 0.20857490019093908, "grad_norm": 0.1513671875, "learning_rate": 0.02, "loss": 1.6467, "step": 2253 }, { "epoch": 0.20866747671121913, "grad_norm": 0.1494140625, "learning_rate": 0.02, "loss": 1.6679, "step": 2254 }, { "epoch": 0.20876005323149915, "grad_norm": 0.1494140625, "learning_rate": 0.02, "loss": 1.742, "step": 2255 }, { "epoch": 0.2088526297517792, "grad_norm": 0.1474609375, "learning_rate": 0.02, "loss": 1.6675, "step": 2256 }, { "epoch": 0.20894520627205926, "grad_norm": 0.1611328125, "learning_rate": 0.02, "loss": 1.7369, "step": 2257 }, { "epoch": 0.20903778279233928, "grad_norm": 0.1689453125, "learning_rate": 0.02, "loss": 1.7149, "step": 2258 }, { "epoch": 0.20913035931261933, "grad_norm": 0.1552734375, "learning_rate": 0.02, "loss": 1.6748, "step": 2259 }, { "epoch": 0.2092229358328994, "grad_norm": 0.154296875, "learning_rate": 0.02, "loss": 1.6358, "step": 2260 }, { "epoch": 0.2093155123531794, "grad_norm": 0.14453125, "learning_rate": 0.02, "loss": 1.6819, "step": 2261 }, { "epoch": 0.20940808887345946, "grad_norm": 0.1494140625, "learning_rate": 0.02, "loss": 1.7656, "step": 2262 }, { "epoch": 0.20950066539373952, "grad_norm": 0.1533203125, "learning_rate": 0.02, "loss": 1.7314, "step": 2263 }, { "epoch": 0.20959324191401957, "grad_norm": 0.14453125, "learning_rate": 0.02, "loss": 1.6883, "step": 2264 }, { "epoch": 0.2096858184342996, "grad_norm": 0.1416015625, "learning_rate": 0.02, "loss": 1.6098, "step": 2265 }, { "epoch": 0.20977839495457964, "grad_norm": 0.1484375, "learning_rate": 0.02, "loss": 1.6629, "step": 2266 }, { "epoch": 0.2098709714748597, "grad_norm": 0.1630859375, "learning_rate": 0.02, "loss": 1.6758, "step": 2267 }, { "epoch": 0.20996354799513972, "grad_norm": 0.14453125, "learning_rate": 0.02, "loss": 1.6963, "step": 2268 }, { "epoch": 0.21005612451541977, "grad_norm": 0.1435546875, "learning_rate": 0.02, "loss": 1.6706, "step": 2269 }, { "epoch": 0.21014870103569983, "grad_norm": 0.13671875, "learning_rate": 0.02, "loss": 1.65, "step": 2270 }, { "epoch": 0.21024127755597988, "grad_norm": 0.1552734375, "learning_rate": 0.02, "loss": 1.6881, "step": 2271 }, { "epoch": 0.2103338540762599, "grad_norm": 0.15234375, "learning_rate": 0.02, "loss": 1.707, "step": 2272 }, { "epoch": 0.21042643059653995, "grad_norm": 0.1513671875, "learning_rate": 0.02, "loss": 1.667, "step": 2273 }, { "epoch": 0.21051900711682, "grad_norm": 0.1630859375, "learning_rate": 0.02, "loss": 1.707, "step": 2274 }, { "epoch": 0.21061158363710003, "grad_norm": 0.162109375, "learning_rate": 0.02, "loss": 1.6709, "step": 2275 }, { "epoch": 0.21070416015738008, "grad_norm": 0.1533203125, "learning_rate": 0.02, "loss": 1.6751, "step": 2276 }, { "epoch": 0.21079673667766013, "grad_norm": 0.1591796875, "learning_rate": 0.02, "loss": 1.7352, "step": 2277 }, { "epoch": 0.21088931319794016, "grad_norm": 0.15625, "learning_rate": 0.02, "loss": 1.7086, "step": 2278 }, { "epoch": 0.2109818897182202, "grad_norm": 0.1533203125, "learning_rate": 0.02, "loss": 1.7229, "step": 2279 }, { "epoch": 0.21107446623850026, "grad_norm": 0.1474609375, "learning_rate": 0.02, "loss": 1.6865, "step": 2280 }, { "epoch": 0.21116704275878032, "grad_norm": 0.1494140625, "learning_rate": 0.02, "loss": 1.7141, "step": 2281 }, { "epoch": 0.21125961927906034, "grad_norm": 0.146484375, "learning_rate": 0.02, "loss": 1.6777, "step": 2282 }, { "epoch": 0.2113521957993404, "grad_norm": 0.140625, "learning_rate": 0.02, "loss": 1.638, "step": 2283 }, { "epoch": 0.21144477231962044, "grad_norm": 0.15234375, "learning_rate": 0.02, "loss": 1.715, "step": 2284 }, { "epoch": 0.21153734883990047, "grad_norm": 0.1591796875, "learning_rate": 0.02, "loss": 1.7129, "step": 2285 }, { "epoch": 0.21162992536018052, "grad_norm": 0.1513671875, "learning_rate": 0.02, "loss": 1.7031, "step": 2286 }, { "epoch": 0.21172250188046057, "grad_norm": 0.1640625, "learning_rate": 0.02, "loss": 1.662, "step": 2287 }, { "epoch": 0.21181507840074063, "grad_norm": 0.169921875, "learning_rate": 0.02, "loss": 1.735, "step": 2288 }, { "epoch": 0.21190765492102065, "grad_norm": 0.1669921875, "learning_rate": 0.02, "loss": 1.7036, "step": 2289 }, { "epoch": 0.2120002314413007, "grad_norm": 0.1533203125, "learning_rate": 0.02, "loss": 1.6343, "step": 2290 }, { "epoch": 0.21209280796158075, "grad_norm": 0.1552734375, "learning_rate": 0.02, "loss": 1.6716, "step": 2291 }, { "epoch": 0.21218538448186078, "grad_norm": 0.166015625, "learning_rate": 0.02, "loss": 1.6504, "step": 2292 }, { "epoch": 0.21227796100214083, "grad_norm": 0.1474609375, "learning_rate": 0.02, "loss": 1.7147, "step": 2293 }, { "epoch": 0.21237053752242088, "grad_norm": 0.1591796875, "learning_rate": 0.02, "loss": 1.7561, "step": 2294 }, { "epoch": 0.2124631140427009, "grad_norm": 0.15625, "learning_rate": 0.02, "loss": 1.7232, "step": 2295 }, { "epoch": 0.21255569056298096, "grad_norm": 0.1513671875, "learning_rate": 0.02, "loss": 1.7352, "step": 2296 }, { "epoch": 0.212648267083261, "grad_norm": 0.146484375, "learning_rate": 0.02, "loss": 1.6457, "step": 2297 }, { "epoch": 0.21274084360354106, "grad_norm": 0.15234375, "learning_rate": 0.02, "loss": 1.7242, "step": 2298 }, { "epoch": 0.2128334201238211, "grad_norm": 0.1591796875, "learning_rate": 0.02, "loss": 1.7565, "step": 2299 }, { "epoch": 0.21292599664410114, "grad_norm": 0.1611328125, "learning_rate": 0.02, "loss": 1.7135, "step": 2300 }, { "epoch": 0.2130185731643812, "grad_norm": 0.15234375, "learning_rate": 0.02, "loss": 1.6772, "step": 2301 }, { "epoch": 0.21311114968466122, "grad_norm": 0.1591796875, "learning_rate": 0.02, "loss": 1.7056, "step": 2302 }, { "epoch": 0.21320372620494127, "grad_norm": 0.154296875, "learning_rate": 0.02, "loss": 1.7291, "step": 2303 }, { "epoch": 0.21329630272522132, "grad_norm": 0.1572265625, "learning_rate": 0.02, "loss": 1.72, "step": 2304 }, { "epoch": 0.21338887924550137, "grad_norm": 0.150390625, "learning_rate": 0.02, "loss": 1.6572, "step": 2305 }, { "epoch": 0.2134814557657814, "grad_norm": 0.1611328125, "learning_rate": 0.02, "loss": 1.6714, "step": 2306 }, { "epoch": 0.21357403228606145, "grad_norm": 0.162109375, "learning_rate": 0.02, "loss": 1.6786, "step": 2307 }, { "epoch": 0.2136666088063415, "grad_norm": 0.166015625, "learning_rate": 0.02, "loss": 1.6361, "step": 2308 }, { "epoch": 0.21375918532662153, "grad_norm": 0.1630859375, "learning_rate": 0.02, "loss": 1.6881, "step": 2309 }, { "epoch": 0.21385176184690158, "grad_norm": 0.1669921875, "learning_rate": 0.02, "loss": 1.6372, "step": 2310 }, { "epoch": 0.21394433836718163, "grad_norm": 0.1669921875, "learning_rate": 0.02, "loss": 1.6519, "step": 2311 }, { "epoch": 0.21403691488746165, "grad_norm": 0.15234375, "learning_rate": 0.02, "loss": 1.6172, "step": 2312 }, { "epoch": 0.2141294914077417, "grad_norm": 0.162109375, "learning_rate": 0.02, "loss": 1.7293, "step": 2313 }, { "epoch": 0.21422206792802176, "grad_norm": 0.1591796875, "learning_rate": 0.02, "loss": 1.7064, "step": 2314 }, { "epoch": 0.2143146444483018, "grad_norm": 0.154296875, "learning_rate": 0.02, "loss": 1.6752, "step": 2315 }, { "epoch": 0.21440722096858184, "grad_norm": 0.1572265625, "learning_rate": 0.02, "loss": 1.7071, "step": 2316 }, { "epoch": 0.2144997974888619, "grad_norm": 0.15625, "learning_rate": 0.02, "loss": 1.6869, "step": 2317 }, { "epoch": 0.21459237400914194, "grad_norm": 0.1611328125, "learning_rate": 0.02, "loss": 1.6465, "step": 2318 }, { "epoch": 0.21468495052942196, "grad_norm": 0.1533203125, "learning_rate": 0.02, "loss": 1.6479, "step": 2319 }, { "epoch": 0.21477752704970202, "grad_norm": 0.1533203125, "learning_rate": 0.02, "loss": 1.6983, "step": 2320 }, { "epoch": 0.21487010356998207, "grad_norm": 0.1494140625, "learning_rate": 0.02, "loss": 1.6588, "step": 2321 }, { "epoch": 0.21496268009026212, "grad_norm": 0.1640625, "learning_rate": 0.02, "loss": 1.7237, "step": 2322 }, { "epoch": 0.21505525661054214, "grad_norm": 0.150390625, "learning_rate": 0.02, "loss": 1.6046, "step": 2323 }, { "epoch": 0.2151478331308222, "grad_norm": 0.162109375, "learning_rate": 0.02, "loss": 1.6883, "step": 2324 }, { "epoch": 0.21524040965110225, "grad_norm": 0.15625, "learning_rate": 0.02, "loss": 1.7044, "step": 2325 }, { "epoch": 0.21533298617138227, "grad_norm": 0.1552734375, "learning_rate": 0.02, "loss": 1.6342, "step": 2326 }, { "epoch": 0.21542556269166233, "grad_norm": 0.158203125, "learning_rate": 0.02, "loss": 1.6834, "step": 2327 }, { "epoch": 0.21551813921194238, "grad_norm": 0.166015625, "learning_rate": 0.02, "loss": 1.687, "step": 2328 }, { "epoch": 0.2156107157322224, "grad_norm": 0.1650390625, "learning_rate": 0.02, "loss": 1.6894, "step": 2329 }, { "epoch": 0.21570329225250245, "grad_norm": 0.158203125, "learning_rate": 0.02, "loss": 1.6541, "step": 2330 }, { "epoch": 0.2157958687727825, "grad_norm": 0.1591796875, "learning_rate": 0.02, "loss": 1.622, "step": 2331 }, { "epoch": 0.21588844529306256, "grad_norm": 0.1591796875, "learning_rate": 0.02, "loss": 1.7154, "step": 2332 }, { "epoch": 0.21598102181334258, "grad_norm": 0.1640625, "learning_rate": 0.02, "loss": 1.7682, "step": 2333 }, { "epoch": 0.21607359833362264, "grad_norm": 0.15625, "learning_rate": 0.02, "loss": 1.7001, "step": 2334 }, { "epoch": 0.2161661748539027, "grad_norm": 0.1484375, "learning_rate": 0.02, "loss": 1.6272, "step": 2335 }, { "epoch": 0.2162587513741827, "grad_norm": 0.1435546875, "learning_rate": 0.02, "loss": 1.604, "step": 2336 }, { "epoch": 0.21635132789446276, "grad_norm": 0.1494140625, "learning_rate": 0.02, "loss": 1.7081, "step": 2337 }, { "epoch": 0.21644390441474282, "grad_norm": 0.146484375, "learning_rate": 0.02, "loss": 1.6456, "step": 2338 }, { "epoch": 0.21653648093502287, "grad_norm": 0.15234375, "learning_rate": 0.02, "loss": 1.7205, "step": 2339 }, { "epoch": 0.2166290574553029, "grad_norm": 0.138671875, "learning_rate": 0.02, "loss": 1.6662, "step": 2340 }, { "epoch": 0.21672163397558294, "grad_norm": 0.14453125, "learning_rate": 0.02, "loss": 1.6588, "step": 2341 }, { "epoch": 0.216814210495863, "grad_norm": 0.1474609375, "learning_rate": 0.02, "loss": 1.6844, "step": 2342 }, { "epoch": 0.21690678701614302, "grad_norm": 0.1494140625, "learning_rate": 0.02, "loss": 1.7136, "step": 2343 }, { "epoch": 0.21699936353642307, "grad_norm": 0.1435546875, "learning_rate": 0.02, "loss": 1.6815, "step": 2344 }, { "epoch": 0.21709194005670313, "grad_norm": 0.1474609375, "learning_rate": 0.02, "loss": 1.7222, "step": 2345 }, { "epoch": 0.21718451657698315, "grad_norm": 0.1484375, "learning_rate": 0.02, "loss": 1.6899, "step": 2346 }, { "epoch": 0.2172770930972632, "grad_norm": 0.1435546875, "learning_rate": 0.02, "loss": 1.7076, "step": 2347 }, { "epoch": 0.21736966961754325, "grad_norm": 0.1376953125, "learning_rate": 0.02, "loss": 1.7344, "step": 2348 }, { "epoch": 0.2174622461378233, "grad_norm": 0.140625, "learning_rate": 0.02, "loss": 1.6791, "step": 2349 }, { "epoch": 0.21755482265810333, "grad_norm": 0.1435546875, "learning_rate": 0.02, "loss": 1.6833, "step": 2350 }, { "epoch": 0.21764739917838338, "grad_norm": 0.142578125, "learning_rate": 0.02, "loss": 1.7055, "step": 2351 }, { "epoch": 0.21773997569866343, "grad_norm": 0.1455078125, "learning_rate": 0.02, "loss": 1.689, "step": 2352 }, { "epoch": 0.21783255221894346, "grad_norm": 0.1337890625, "learning_rate": 0.02, "loss": 1.641, "step": 2353 }, { "epoch": 0.2179251287392235, "grad_norm": 0.1435546875, "learning_rate": 0.02, "loss": 1.6432, "step": 2354 }, { "epoch": 0.21801770525950356, "grad_norm": 0.1396484375, "learning_rate": 0.02, "loss": 1.683, "step": 2355 }, { "epoch": 0.21811028177978362, "grad_norm": 0.1416015625, "learning_rate": 0.02, "loss": 1.6819, "step": 2356 }, { "epoch": 0.21820285830006364, "grad_norm": 0.1533203125, "learning_rate": 0.02, "loss": 1.7525, "step": 2357 }, { "epoch": 0.2182954348203437, "grad_norm": 0.1533203125, "learning_rate": 0.02, "loss": 1.6519, "step": 2358 }, { "epoch": 0.21838801134062374, "grad_norm": 0.1337890625, "learning_rate": 0.02, "loss": 1.5901, "step": 2359 }, { "epoch": 0.21848058786090377, "grad_norm": 0.154296875, "learning_rate": 0.02, "loss": 1.6748, "step": 2360 }, { "epoch": 0.21857316438118382, "grad_norm": 0.15234375, "learning_rate": 0.02, "loss": 1.669, "step": 2361 }, { "epoch": 0.21866574090146387, "grad_norm": 0.1513671875, "learning_rate": 0.02, "loss": 1.6975, "step": 2362 }, { "epoch": 0.2187583174217439, "grad_norm": 0.142578125, "learning_rate": 0.02, "loss": 1.6691, "step": 2363 }, { "epoch": 0.21885089394202395, "grad_norm": 0.1552734375, "learning_rate": 0.02, "loss": 1.718, "step": 2364 }, { "epoch": 0.218943470462304, "grad_norm": 0.146484375, "learning_rate": 0.02, "loss": 1.5778, "step": 2365 }, { "epoch": 0.21903604698258405, "grad_norm": 0.158203125, "learning_rate": 0.02, "loss": 1.6972, "step": 2366 }, { "epoch": 0.21912862350286408, "grad_norm": 0.138671875, "learning_rate": 0.02, "loss": 1.6269, "step": 2367 }, { "epoch": 0.21922120002314413, "grad_norm": 0.1484375, "learning_rate": 0.02, "loss": 1.6289, "step": 2368 }, { "epoch": 0.21931377654342418, "grad_norm": 0.142578125, "learning_rate": 0.02, "loss": 1.5948, "step": 2369 }, { "epoch": 0.2194063530637042, "grad_norm": 0.1552734375, "learning_rate": 0.02, "loss": 1.7188, "step": 2370 }, { "epoch": 0.21949892958398426, "grad_norm": 0.15234375, "learning_rate": 0.02, "loss": 1.613, "step": 2371 }, { "epoch": 0.2195915061042643, "grad_norm": 0.1552734375, "learning_rate": 0.02, "loss": 1.6879, "step": 2372 }, { "epoch": 0.21968408262454436, "grad_norm": 0.15625, "learning_rate": 0.02, "loss": 1.675, "step": 2373 }, { "epoch": 0.2197766591448244, "grad_norm": 0.1552734375, "learning_rate": 0.02, "loss": 1.6235, "step": 2374 }, { "epoch": 0.21986923566510444, "grad_norm": 0.15625, "learning_rate": 0.02, "loss": 1.6524, "step": 2375 }, { "epoch": 0.2199618121853845, "grad_norm": 0.173828125, "learning_rate": 0.02, "loss": 1.6443, "step": 2376 }, { "epoch": 0.22005438870566452, "grad_norm": 0.171875, "learning_rate": 0.02, "loss": 1.6429, "step": 2377 }, { "epoch": 0.22014696522594457, "grad_norm": 0.166015625, "learning_rate": 0.02, "loss": 1.6696, "step": 2378 }, { "epoch": 0.22023954174622462, "grad_norm": 0.16015625, "learning_rate": 0.02, "loss": 1.6902, "step": 2379 }, { "epoch": 0.22033211826650465, "grad_norm": 0.166015625, "learning_rate": 0.02, "loss": 1.7101, "step": 2380 }, { "epoch": 0.2204246947867847, "grad_norm": 0.1552734375, "learning_rate": 0.02, "loss": 1.7581, "step": 2381 }, { "epoch": 0.22051727130706475, "grad_norm": 0.15234375, "learning_rate": 0.02, "loss": 1.6344, "step": 2382 }, { "epoch": 0.2206098478273448, "grad_norm": 0.15234375, "learning_rate": 0.02, "loss": 1.663, "step": 2383 }, { "epoch": 0.22070242434762483, "grad_norm": 0.150390625, "learning_rate": 0.02, "loss": 1.6737, "step": 2384 }, { "epoch": 0.22079500086790488, "grad_norm": 0.1435546875, "learning_rate": 0.02, "loss": 1.7072, "step": 2385 }, { "epoch": 0.22088757738818493, "grad_norm": 0.1455078125, "learning_rate": 0.02, "loss": 1.6284, "step": 2386 }, { "epoch": 0.22098015390846495, "grad_norm": 0.142578125, "learning_rate": 0.02, "loss": 1.6288, "step": 2387 }, { "epoch": 0.221072730428745, "grad_norm": 0.146484375, "learning_rate": 0.02, "loss": 1.6524, "step": 2388 }, { "epoch": 0.22116530694902506, "grad_norm": 0.150390625, "learning_rate": 0.02, "loss": 1.6679, "step": 2389 }, { "epoch": 0.2212578834693051, "grad_norm": 0.1484375, "learning_rate": 0.02, "loss": 1.6388, "step": 2390 }, { "epoch": 0.22135045998958514, "grad_norm": 0.14453125, "learning_rate": 0.02, "loss": 1.6882, "step": 2391 }, { "epoch": 0.2214430365098652, "grad_norm": 0.15234375, "learning_rate": 0.02, "loss": 1.6947, "step": 2392 }, { "epoch": 0.22153561303014524, "grad_norm": 0.154296875, "learning_rate": 0.02, "loss": 1.6905, "step": 2393 }, { "epoch": 0.22162818955042526, "grad_norm": 0.142578125, "learning_rate": 0.02, "loss": 1.6473, "step": 2394 }, { "epoch": 0.22172076607070532, "grad_norm": 0.146484375, "learning_rate": 0.02, "loss": 1.6751, "step": 2395 }, { "epoch": 0.22181334259098537, "grad_norm": 0.1484375, "learning_rate": 0.02, "loss": 1.6913, "step": 2396 }, { "epoch": 0.2219059191112654, "grad_norm": 0.1533203125, "learning_rate": 0.02, "loss": 1.6832, "step": 2397 }, { "epoch": 0.22199849563154544, "grad_norm": 0.1455078125, "learning_rate": 0.02, "loss": 1.6759, "step": 2398 }, { "epoch": 0.2220910721518255, "grad_norm": 0.1650390625, "learning_rate": 0.02, "loss": 1.679, "step": 2399 }, { "epoch": 0.22218364867210555, "grad_norm": 0.166015625, "learning_rate": 0.02, "loss": 1.7043, "step": 2400 }, { "epoch": 0.22227622519238557, "grad_norm": 0.1572265625, "learning_rate": 0.02, "loss": 1.6588, "step": 2401 }, { "epoch": 0.22236880171266563, "grad_norm": 0.1591796875, "learning_rate": 0.02, "loss": 1.6382, "step": 2402 }, { "epoch": 0.22246137823294568, "grad_norm": 0.1640625, "learning_rate": 0.02, "loss": 1.7331, "step": 2403 }, { "epoch": 0.2225539547532257, "grad_norm": 0.166015625, "learning_rate": 0.02, "loss": 1.7345, "step": 2404 }, { "epoch": 0.22264653127350575, "grad_norm": 0.171875, "learning_rate": 0.02, "loss": 1.711, "step": 2405 }, { "epoch": 0.2227391077937858, "grad_norm": 0.16796875, "learning_rate": 0.02, "loss": 1.723, "step": 2406 }, { "epoch": 0.22283168431406586, "grad_norm": 0.1650390625, "learning_rate": 0.02, "loss": 1.6605, "step": 2407 }, { "epoch": 0.22292426083434588, "grad_norm": 0.166015625, "learning_rate": 0.02, "loss": 1.7576, "step": 2408 }, { "epoch": 0.22301683735462594, "grad_norm": 0.1552734375, "learning_rate": 0.02, "loss": 1.6733, "step": 2409 }, { "epoch": 0.223109413874906, "grad_norm": 0.150390625, "learning_rate": 0.02, "loss": 1.688, "step": 2410 }, { "epoch": 0.223201990395186, "grad_norm": 0.166015625, "learning_rate": 0.02, "loss": 1.7182, "step": 2411 }, { "epoch": 0.22329456691546606, "grad_norm": 0.171875, "learning_rate": 0.02, "loss": 1.724, "step": 2412 }, { "epoch": 0.22338714343574612, "grad_norm": 0.15625, "learning_rate": 0.02, "loss": 1.761, "step": 2413 }, { "epoch": 0.22347971995602614, "grad_norm": 0.166015625, "learning_rate": 0.02, "loss": 1.6487, "step": 2414 }, { "epoch": 0.2235722964763062, "grad_norm": 0.154296875, "learning_rate": 0.02, "loss": 1.6773, "step": 2415 }, { "epoch": 0.22366487299658624, "grad_norm": 0.1513671875, "learning_rate": 0.02, "loss": 1.6829, "step": 2416 }, { "epoch": 0.2237574495168663, "grad_norm": 0.1533203125, "learning_rate": 0.02, "loss": 1.6672, "step": 2417 }, { "epoch": 0.22385002603714632, "grad_norm": 0.15625, "learning_rate": 0.02, "loss": 1.6825, "step": 2418 }, { "epoch": 0.22394260255742637, "grad_norm": 0.1533203125, "learning_rate": 0.02, "loss": 1.6647, "step": 2419 }, { "epoch": 0.22403517907770643, "grad_norm": 0.166015625, "learning_rate": 0.02, "loss": 1.6496, "step": 2420 }, { "epoch": 0.22412775559798645, "grad_norm": 0.1484375, "learning_rate": 0.02, "loss": 1.683, "step": 2421 }, { "epoch": 0.2242203321182665, "grad_norm": 0.1572265625, "learning_rate": 0.02, "loss": 1.6988, "step": 2422 }, { "epoch": 0.22431290863854655, "grad_norm": 0.15625, "learning_rate": 0.02, "loss": 1.643, "step": 2423 }, { "epoch": 0.2244054851588266, "grad_norm": 0.162109375, "learning_rate": 0.02, "loss": 1.7374, "step": 2424 }, { "epoch": 0.22449806167910663, "grad_norm": 0.1533203125, "learning_rate": 0.02, "loss": 1.7249, "step": 2425 }, { "epoch": 0.22459063819938668, "grad_norm": 0.15625, "learning_rate": 0.02, "loss": 1.6691, "step": 2426 }, { "epoch": 0.22468321471966674, "grad_norm": 0.154296875, "learning_rate": 0.02, "loss": 1.6134, "step": 2427 }, { "epoch": 0.22477579123994676, "grad_norm": 0.150390625, "learning_rate": 0.02, "loss": 1.6605, "step": 2428 }, { "epoch": 0.2248683677602268, "grad_norm": 0.1494140625, "learning_rate": 0.02, "loss": 1.6566, "step": 2429 }, { "epoch": 0.22496094428050686, "grad_norm": 0.14453125, "learning_rate": 0.02, "loss": 1.63, "step": 2430 }, { "epoch": 0.2250535208007869, "grad_norm": 0.16015625, "learning_rate": 0.02, "loss": 1.6774, "step": 2431 }, { "epoch": 0.22514609732106694, "grad_norm": 0.1474609375, "learning_rate": 0.02, "loss": 1.615, "step": 2432 }, { "epoch": 0.225238673841347, "grad_norm": 0.1552734375, "learning_rate": 0.02, "loss": 1.7399, "step": 2433 }, { "epoch": 0.22533125036162704, "grad_norm": 0.140625, "learning_rate": 0.02, "loss": 1.6869, "step": 2434 }, { "epoch": 0.22542382688190707, "grad_norm": 0.1494140625, "learning_rate": 0.02, "loss": 1.6816, "step": 2435 }, { "epoch": 0.22551640340218712, "grad_norm": 0.15625, "learning_rate": 0.02, "loss": 1.5897, "step": 2436 }, { "epoch": 0.22560897992246717, "grad_norm": 0.15234375, "learning_rate": 0.02, "loss": 1.7538, "step": 2437 }, { "epoch": 0.2257015564427472, "grad_norm": 0.154296875, "learning_rate": 0.02, "loss": 1.7187, "step": 2438 }, { "epoch": 0.22579413296302725, "grad_norm": 0.1552734375, "learning_rate": 0.02, "loss": 1.7201, "step": 2439 }, { "epoch": 0.2258867094833073, "grad_norm": 0.1494140625, "learning_rate": 0.02, "loss": 1.644, "step": 2440 }, { "epoch": 0.22597928600358733, "grad_norm": 0.1484375, "learning_rate": 0.02, "loss": 1.6601, "step": 2441 }, { "epoch": 0.22607186252386738, "grad_norm": 0.142578125, "learning_rate": 0.02, "loss": 1.6402, "step": 2442 }, { "epoch": 0.22616443904414743, "grad_norm": 0.1533203125, "learning_rate": 0.02, "loss": 1.6923, "step": 2443 }, { "epoch": 0.22625701556442748, "grad_norm": 0.1650390625, "learning_rate": 0.02, "loss": 1.7219, "step": 2444 }, { "epoch": 0.2263495920847075, "grad_norm": 0.1533203125, "learning_rate": 0.02, "loss": 1.6885, "step": 2445 }, { "epoch": 0.22644216860498756, "grad_norm": 0.1484375, "learning_rate": 0.02, "loss": 1.714, "step": 2446 }, { "epoch": 0.2265347451252676, "grad_norm": 0.16015625, "learning_rate": 0.02, "loss": 1.6631, "step": 2447 }, { "epoch": 0.22662732164554764, "grad_norm": 0.16015625, "learning_rate": 0.02, "loss": 1.6479, "step": 2448 }, { "epoch": 0.2267198981658277, "grad_norm": 0.150390625, "learning_rate": 0.02, "loss": 1.6514, "step": 2449 }, { "epoch": 0.22681247468610774, "grad_norm": 0.150390625, "learning_rate": 0.02, "loss": 1.6651, "step": 2450 }, { "epoch": 0.2269050512063878, "grad_norm": 0.1591796875, "learning_rate": 0.02, "loss": 1.6721, "step": 2451 }, { "epoch": 0.22699762772666782, "grad_norm": 0.1552734375, "learning_rate": 0.02, "loss": 1.6544, "step": 2452 }, { "epoch": 0.22709020424694787, "grad_norm": 0.1455078125, "learning_rate": 0.02, "loss": 1.6817, "step": 2453 }, { "epoch": 0.22718278076722792, "grad_norm": 0.14453125, "learning_rate": 0.02, "loss": 1.6717, "step": 2454 }, { "epoch": 0.22727535728750795, "grad_norm": 0.1552734375, "learning_rate": 0.02, "loss": 1.6491, "step": 2455 }, { "epoch": 0.227367933807788, "grad_norm": 0.1494140625, "learning_rate": 0.02, "loss": 1.6324, "step": 2456 }, { "epoch": 0.22746051032806805, "grad_norm": 0.140625, "learning_rate": 0.02, "loss": 1.6846, "step": 2457 }, { "epoch": 0.22755308684834807, "grad_norm": 0.1455078125, "learning_rate": 0.02, "loss": 1.6214, "step": 2458 }, { "epoch": 0.22764566336862813, "grad_norm": 0.1484375, "learning_rate": 0.02, "loss": 1.6544, "step": 2459 }, { "epoch": 0.22773823988890818, "grad_norm": 0.1552734375, "learning_rate": 0.02, "loss": 1.6362, "step": 2460 }, { "epoch": 0.22783081640918823, "grad_norm": 0.15234375, "learning_rate": 0.02, "loss": 1.6988, "step": 2461 }, { "epoch": 0.22792339292946825, "grad_norm": 0.1484375, "learning_rate": 0.02, "loss": 1.7047, "step": 2462 }, { "epoch": 0.2280159694497483, "grad_norm": 0.1494140625, "learning_rate": 0.02, "loss": 1.7028, "step": 2463 }, { "epoch": 0.22810854597002836, "grad_norm": 0.1494140625, "learning_rate": 0.02, "loss": 1.6961, "step": 2464 }, { "epoch": 0.22820112249030838, "grad_norm": 0.1533203125, "learning_rate": 0.02, "loss": 1.659, "step": 2465 }, { "epoch": 0.22829369901058844, "grad_norm": 0.1591796875, "learning_rate": 0.02, "loss": 1.6269, "step": 2466 }, { "epoch": 0.2283862755308685, "grad_norm": 0.146484375, "learning_rate": 0.02, "loss": 1.68, "step": 2467 }, { "epoch": 0.22847885205114854, "grad_norm": 0.1494140625, "learning_rate": 0.02, "loss": 1.6858, "step": 2468 }, { "epoch": 0.22857142857142856, "grad_norm": 0.1416015625, "learning_rate": 0.02, "loss": 1.6022, "step": 2469 }, { "epoch": 0.22866400509170862, "grad_norm": 0.1494140625, "learning_rate": 0.02, "loss": 1.7036, "step": 2470 }, { "epoch": 0.22875658161198867, "grad_norm": 0.1455078125, "learning_rate": 0.02, "loss": 1.644, "step": 2471 }, { "epoch": 0.2288491581322687, "grad_norm": 0.1494140625, "learning_rate": 0.02, "loss": 1.6868, "step": 2472 }, { "epoch": 0.22894173465254875, "grad_norm": 0.1494140625, "learning_rate": 0.02, "loss": 1.7016, "step": 2473 }, { "epoch": 0.2290343111728288, "grad_norm": 0.1484375, "learning_rate": 0.02, "loss": 1.614, "step": 2474 }, { "epoch": 0.22912688769310882, "grad_norm": 0.1474609375, "learning_rate": 0.02, "loss": 1.6701, "step": 2475 }, { "epoch": 0.22921946421338887, "grad_norm": 0.1552734375, "learning_rate": 0.02, "loss": 1.6503, "step": 2476 }, { "epoch": 0.22931204073366893, "grad_norm": 0.1611328125, "learning_rate": 0.02, "loss": 1.774, "step": 2477 }, { "epoch": 0.22940461725394898, "grad_norm": 0.1513671875, "learning_rate": 0.02, "loss": 1.6693, "step": 2478 }, { "epoch": 0.229497193774229, "grad_norm": 0.14453125, "learning_rate": 0.02, "loss": 1.6432, "step": 2479 }, { "epoch": 0.22958977029450905, "grad_norm": 0.154296875, "learning_rate": 0.02, "loss": 1.7218, "step": 2480 }, { "epoch": 0.2296823468147891, "grad_norm": 0.150390625, "learning_rate": 0.02, "loss": 1.628, "step": 2481 }, { "epoch": 0.22977492333506913, "grad_norm": 0.1484375, "learning_rate": 0.02, "loss": 1.653, "step": 2482 }, { "epoch": 0.22986749985534918, "grad_norm": 0.1572265625, "learning_rate": 0.02, "loss": 1.6877, "step": 2483 }, { "epoch": 0.22996007637562924, "grad_norm": 0.1630859375, "learning_rate": 0.02, "loss": 1.7259, "step": 2484 }, { "epoch": 0.2300526528959093, "grad_norm": 0.1611328125, "learning_rate": 0.02, "loss": 1.6571, "step": 2485 }, { "epoch": 0.2301452294161893, "grad_norm": 0.1474609375, "learning_rate": 0.02, "loss": 1.6584, "step": 2486 }, { "epoch": 0.23023780593646936, "grad_norm": 0.1474609375, "learning_rate": 0.02, "loss": 1.6268, "step": 2487 }, { "epoch": 0.23033038245674942, "grad_norm": 0.150390625, "learning_rate": 0.02, "loss": 1.7213, "step": 2488 }, { "epoch": 0.23042295897702944, "grad_norm": 0.15234375, "learning_rate": 0.02, "loss": 1.6607, "step": 2489 }, { "epoch": 0.2305155354973095, "grad_norm": 0.15234375, "learning_rate": 0.02, "loss": 1.6339, "step": 2490 }, { "epoch": 0.23060811201758954, "grad_norm": 0.1572265625, "learning_rate": 0.02, "loss": 1.6952, "step": 2491 }, { "epoch": 0.23070068853786957, "grad_norm": 0.1533203125, "learning_rate": 0.02, "loss": 1.6764, "step": 2492 }, { "epoch": 0.23079326505814962, "grad_norm": 0.1474609375, "learning_rate": 0.02, "loss": 1.7212, "step": 2493 }, { "epoch": 0.23088584157842967, "grad_norm": 0.138671875, "learning_rate": 0.02, "loss": 1.6924, "step": 2494 }, { "epoch": 0.23097841809870973, "grad_norm": 0.140625, "learning_rate": 0.02, "loss": 1.6027, "step": 2495 }, { "epoch": 0.23107099461898975, "grad_norm": 0.142578125, "learning_rate": 0.02, "loss": 1.6437, "step": 2496 }, { "epoch": 0.2311635711392698, "grad_norm": 0.1484375, "learning_rate": 0.02, "loss": 1.6949, "step": 2497 }, { "epoch": 0.23125614765954985, "grad_norm": 0.1357421875, "learning_rate": 0.02, "loss": 1.676, "step": 2498 }, { "epoch": 0.23134872417982988, "grad_norm": 0.13671875, "learning_rate": 0.02, "loss": 1.6361, "step": 2499 }, { "epoch": 0.23144130070010993, "grad_norm": 0.146484375, "learning_rate": 0.02, "loss": 1.7479, "step": 2500 }, { "epoch": 0.23153387722038998, "grad_norm": 0.15234375, "learning_rate": 0.02, "loss": 1.7123, "step": 2501 }, { "epoch": 0.23162645374067004, "grad_norm": 0.1455078125, "learning_rate": 0.02, "loss": 1.6494, "step": 2502 }, { "epoch": 0.23171903026095006, "grad_norm": 0.15234375, "learning_rate": 0.02, "loss": 1.7023, "step": 2503 }, { "epoch": 0.2318116067812301, "grad_norm": 0.1455078125, "learning_rate": 0.02, "loss": 1.6683, "step": 2504 }, { "epoch": 0.23190418330151016, "grad_norm": 0.1357421875, "learning_rate": 0.02, "loss": 1.689, "step": 2505 }, { "epoch": 0.2319967598217902, "grad_norm": 0.138671875, "learning_rate": 0.02, "loss": 1.6452, "step": 2506 }, { "epoch": 0.23208933634207024, "grad_norm": 0.1513671875, "learning_rate": 0.02, "loss": 1.6936, "step": 2507 }, { "epoch": 0.2321819128623503, "grad_norm": 0.1572265625, "learning_rate": 0.02, "loss": 1.7269, "step": 2508 }, { "epoch": 0.23227448938263032, "grad_norm": 0.1513671875, "learning_rate": 0.02, "loss": 1.7082, "step": 2509 }, { "epoch": 0.23236706590291037, "grad_norm": 0.1494140625, "learning_rate": 0.02, "loss": 1.7202, "step": 2510 }, { "epoch": 0.23245964242319042, "grad_norm": 0.1533203125, "learning_rate": 0.02, "loss": 1.6258, "step": 2511 }, { "epoch": 0.23255221894347047, "grad_norm": 0.158203125, "learning_rate": 0.02, "loss": 1.612, "step": 2512 }, { "epoch": 0.2326447954637505, "grad_norm": 0.15625, "learning_rate": 0.02, "loss": 1.7478, "step": 2513 }, { "epoch": 0.23273737198403055, "grad_norm": 0.1552734375, "learning_rate": 0.02, "loss": 1.7372, "step": 2514 }, { "epoch": 0.2328299485043106, "grad_norm": 0.1474609375, "learning_rate": 0.02, "loss": 1.6854, "step": 2515 }, { "epoch": 0.23292252502459063, "grad_norm": 0.142578125, "learning_rate": 0.02, "loss": 1.6367, "step": 2516 }, { "epoch": 0.23301510154487068, "grad_norm": 0.14453125, "learning_rate": 0.02, "loss": 1.6622, "step": 2517 }, { "epoch": 0.23310767806515073, "grad_norm": 0.142578125, "learning_rate": 0.02, "loss": 1.6445, "step": 2518 }, { "epoch": 0.23320025458543078, "grad_norm": 0.1484375, "learning_rate": 0.02, "loss": 1.6398, "step": 2519 }, { "epoch": 0.2332928311057108, "grad_norm": 0.1484375, "learning_rate": 0.02, "loss": 1.6635, "step": 2520 }, { "epoch": 0.23338540762599086, "grad_norm": 0.1455078125, "learning_rate": 0.02, "loss": 1.6919, "step": 2521 }, { "epoch": 0.2334779841462709, "grad_norm": 0.16015625, "learning_rate": 0.02, "loss": 1.6623, "step": 2522 }, { "epoch": 0.23357056066655094, "grad_norm": 0.15625, "learning_rate": 0.02, "loss": 1.7046, "step": 2523 }, { "epoch": 0.233663137186831, "grad_norm": 0.1552734375, "learning_rate": 0.02, "loss": 1.689, "step": 2524 }, { "epoch": 0.23375571370711104, "grad_norm": 0.14453125, "learning_rate": 0.02, "loss": 1.713, "step": 2525 }, { "epoch": 0.23384829022739106, "grad_norm": 0.1416015625, "learning_rate": 0.02, "loss": 1.6724, "step": 2526 }, { "epoch": 0.23394086674767112, "grad_norm": 0.1455078125, "learning_rate": 0.02, "loss": 1.701, "step": 2527 }, { "epoch": 0.23403344326795117, "grad_norm": 0.15625, "learning_rate": 0.02, "loss": 1.707, "step": 2528 }, { "epoch": 0.23412601978823122, "grad_norm": 0.154296875, "learning_rate": 0.02, "loss": 1.596, "step": 2529 }, { "epoch": 0.23421859630851125, "grad_norm": 0.14453125, "learning_rate": 0.02, "loss": 1.6777, "step": 2530 }, { "epoch": 0.2343111728287913, "grad_norm": 0.154296875, "learning_rate": 0.02, "loss": 1.6366, "step": 2531 }, { "epoch": 0.23440374934907135, "grad_norm": 0.1513671875, "learning_rate": 0.02, "loss": 1.6342, "step": 2532 }, { "epoch": 0.23449632586935137, "grad_norm": 0.158203125, "learning_rate": 0.02, "loss": 1.611, "step": 2533 }, { "epoch": 0.23458890238963143, "grad_norm": 0.154296875, "learning_rate": 0.02, "loss": 1.7006, "step": 2534 }, { "epoch": 0.23468147890991148, "grad_norm": 0.1650390625, "learning_rate": 0.02, "loss": 1.6867, "step": 2535 }, { "epoch": 0.23477405543019153, "grad_norm": 0.1572265625, "learning_rate": 0.02, "loss": 1.753, "step": 2536 }, { "epoch": 0.23486663195047155, "grad_norm": 0.16015625, "learning_rate": 0.02, "loss": 1.6279, "step": 2537 }, { "epoch": 0.2349592084707516, "grad_norm": 0.150390625, "learning_rate": 0.02, "loss": 1.7052, "step": 2538 }, { "epoch": 0.23505178499103166, "grad_norm": 0.1650390625, "learning_rate": 0.02, "loss": 1.621, "step": 2539 }, { "epoch": 0.23514436151131168, "grad_norm": 0.1552734375, "learning_rate": 0.02, "loss": 1.6222, "step": 2540 }, { "epoch": 0.23523693803159174, "grad_norm": 0.1591796875, "learning_rate": 0.02, "loss": 1.6234, "step": 2541 }, { "epoch": 0.2353295145518718, "grad_norm": 0.1494140625, "learning_rate": 0.02, "loss": 1.7017, "step": 2542 }, { "epoch": 0.2354220910721518, "grad_norm": 0.1572265625, "learning_rate": 0.02, "loss": 1.675, "step": 2543 }, { "epoch": 0.23551466759243186, "grad_norm": 0.158203125, "learning_rate": 0.02, "loss": 1.7356, "step": 2544 }, { "epoch": 0.23560724411271192, "grad_norm": 0.166015625, "learning_rate": 0.02, "loss": 1.6933, "step": 2545 }, { "epoch": 0.23569982063299197, "grad_norm": 0.1650390625, "learning_rate": 0.02, "loss": 1.6727, "step": 2546 }, { "epoch": 0.235792397153272, "grad_norm": 0.1630859375, "learning_rate": 0.02, "loss": 1.6447, "step": 2547 }, { "epoch": 0.23588497367355205, "grad_norm": 0.1572265625, "learning_rate": 0.02, "loss": 1.6778, "step": 2548 }, { "epoch": 0.2359775501938321, "grad_norm": 0.16796875, "learning_rate": 0.02, "loss": 1.6902, "step": 2549 }, { "epoch": 0.23607012671411212, "grad_norm": 0.1630859375, "learning_rate": 0.02, "loss": 1.6965, "step": 2550 }, { "epoch": 0.23616270323439217, "grad_norm": 0.1533203125, "learning_rate": 0.02, "loss": 1.6387, "step": 2551 }, { "epoch": 0.23625527975467223, "grad_norm": 0.15234375, "learning_rate": 0.02, "loss": 1.6252, "step": 2552 }, { "epoch": 0.23634785627495228, "grad_norm": 0.154296875, "learning_rate": 0.02, "loss": 1.5882, "step": 2553 }, { "epoch": 0.2364404327952323, "grad_norm": 0.162109375, "learning_rate": 0.02, "loss": 1.6347, "step": 2554 }, { "epoch": 0.23653300931551235, "grad_norm": 0.154296875, "learning_rate": 0.02, "loss": 1.648, "step": 2555 }, { "epoch": 0.2366255858357924, "grad_norm": 0.1513671875, "learning_rate": 0.02, "loss": 1.6711, "step": 2556 }, { "epoch": 0.23671816235607243, "grad_norm": 0.16015625, "learning_rate": 0.02, "loss": 1.653, "step": 2557 }, { "epoch": 0.23681073887635248, "grad_norm": 0.1455078125, "learning_rate": 0.02, "loss": 1.6545, "step": 2558 }, { "epoch": 0.23690331539663254, "grad_norm": 0.1533203125, "learning_rate": 0.02, "loss": 1.6898, "step": 2559 }, { "epoch": 0.23699589191691256, "grad_norm": 0.16015625, "learning_rate": 0.02, "loss": 1.6893, "step": 2560 }, { "epoch": 0.2370884684371926, "grad_norm": 0.154296875, "learning_rate": 0.02, "loss": 1.6841, "step": 2561 }, { "epoch": 0.23718104495747266, "grad_norm": 0.150390625, "learning_rate": 0.02, "loss": 1.7139, "step": 2562 }, { "epoch": 0.23727362147775272, "grad_norm": 0.1591796875, "learning_rate": 0.02, "loss": 1.6801, "step": 2563 }, { "epoch": 0.23736619799803274, "grad_norm": 0.16015625, "learning_rate": 0.02, "loss": 1.6869, "step": 2564 }, { "epoch": 0.2374587745183128, "grad_norm": 0.15234375, "learning_rate": 0.02, "loss": 1.6134, "step": 2565 }, { "epoch": 0.23755135103859285, "grad_norm": 0.1396484375, "learning_rate": 0.02, "loss": 1.6441, "step": 2566 }, { "epoch": 0.23764392755887287, "grad_norm": 0.1533203125, "learning_rate": 0.02, "loss": 1.6278, "step": 2567 }, { "epoch": 0.23773650407915292, "grad_norm": 0.1455078125, "learning_rate": 0.02, "loss": 1.6582, "step": 2568 }, { "epoch": 0.23782908059943297, "grad_norm": 0.158203125, "learning_rate": 0.02, "loss": 1.7179, "step": 2569 }, { "epoch": 0.23792165711971303, "grad_norm": 0.15625, "learning_rate": 0.02, "loss": 1.6585, "step": 2570 }, { "epoch": 0.23801423363999305, "grad_norm": 0.1474609375, "learning_rate": 0.02, "loss": 1.6518, "step": 2571 }, { "epoch": 0.2381068101602731, "grad_norm": 0.1416015625, "learning_rate": 0.02, "loss": 1.6047, "step": 2572 }, { "epoch": 0.23819938668055315, "grad_norm": 0.1533203125, "learning_rate": 0.02, "loss": 1.7011, "step": 2573 }, { "epoch": 0.23829196320083318, "grad_norm": 0.1484375, "learning_rate": 0.02, "loss": 1.6548, "step": 2574 }, { "epoch": 0.23838453972111323, "grad_norm": 0.1455078125, "learning_rate": 0.02, "loss": 1.6359, "step": 2575 }, { "epoch": 0.23847711624139328, "grad_norm": 0.1376953125, "learning_rate": 0.02, "loss": 1.6733, "step": 2576 }, { "epoch": 0.2385696927616733, "grad_norm": 0.146484375, "learning_rate": 0.02, "loss": 1.6377, "step": 2577 }, { "epoch": 0.23866226928195336, "grad_norm": 0.1396484375, "learning_rate": 0.02, "loss": 1.6935, "step": 2578 }, { "epoch": 0.2387548458022334, "grad_norm": 0.1591796875, "learning_rate": 0.02, "loss": 1.6977, "step": 2579 }, { "epoch": 0.23884742232251346, "grad_norm": 0.1435546875, "learning_rate": 0.02, "loss": 1.5989, "step": 2580 }, { "epoch": 0.2389399988427935, "grad_norm": 0.1513671875, "learning_rate": 0.02, "loss": 1.7097, "step": 2581 }, { "epoch": 0.23903257536307354, "grad_norm": 0.1396484375, "learning_rate": 0.02, "loss": 1.6746, "step": 2582 }, { "epoch": 0.2391251518833536, "grad_norm": 0.1435546875, "learning_rate": 0.02, "loss": 1.6428, "step": 2583 }, { "epoch": 0.23921772840363362, "grad_norm": 0.14453125, "learning_rate": 0.02, "loss": 1.6383, "step": 2584 }, { "epoch": 0.23931030492391367, "grad_norm": 0.1494140625, "learning_rate": 0.02, "loss": 1.6793, "step": 2585 }, { "epoch": 0.23940288144419372, "grad_norm": 0.140625, "learning_rate": 0.02, "loss": 1.6865, "step": 2586 }, { "epoch": 0.23949545796447377, "grad_norm": 0.1484375, "learning_rate": 0.02, "loss": 1.6283, "step": 2587 }, { "epoch": 0.2395880344847538, "grad_norm": 0.146484375, "learning_rate": 0.02, "loss": 1.7432, "step": 2588 }, { "epoch": 0.23968061100503385, "grad_norm": 0.1474609375, "learning_rate": 0.02, "loss": 1.6889, "step": 2589 }, { "epoch": 0.2397731875253139, "grad_norm": 0.1416015625, "learning_rate": 0.02, "loss": 1.7378, "step": 2590 }, { "epoch": 0.23986576404559393, "grad_norm": 0.150390625, "learning_rate": 0.02, "loss": 1.58, "step": 2591 }, { "epoch": 0.23995834056587398, "grad_norm": 0.1552734375, "learning_rate": 0.02, "loss": 1.6611, "step": 2592 }, { "epoch": 0.24005091708615403, "grad_norm": 0.162109375, "learning_rate": 0.02, "loss": 1.6326, "step": 2593 }, { "epoch": 0.24014349360643406, "grad_norm": 0.162109375, "learning_rate": 0.02, "loss": 1.646, "step": 2594 }, { "epoch": 0.2402360701267141, "grad_norm": 0.1572265625, "learning_rate": 0.02, "loss": 1.641, "step": 2595 }, { "epoch": 0.24032864664699416, "grad_norm": 0.15234375, "learning_rate": 0.02, "loss": 1.6926, "step": 2596 }, { "epoch": 0.2404212231672742, "grad_norm": 0.1689453125, "learning_rate": 0.02, "loss": 1.6752, "step": 2597 }, { "epoch": 0.24051379968755424, "grad_norm": 0.1630859375, "learning_rate": 0.02, "loss": 1.7213, "step": 2598 }, { "epoch": 0.2406063762078343, "grad_norm": 0.16015625, "learning_rate": 0.02, "loss": 1.7003, "step": 2599 }, { "epoch": 0.24069895272811434, "grad_norm": 0.14453125, "learning_rate": 0.02, "loss": 1.6978, "step": 2600 }, { "epoch": 0.24079152924839436, "grad_norm": 0.1474609375, "learning_rate": 0.02, "loss": 1.6217, "step": 2601 }, { "epoch": 0.24088410576867442, "grad_norm": 0.1513671875, "learning_rate": 0.02, "loss": 1.6908, "step": 2602 }, { "epoch": 0.24097668228895447, "grad_norm": 0.1611328125, "learning_rate": 0.02, "loss": 1.6978, "step": 2603 }, { "epoch": 0.24106925880923452, "grad_norm": 0.1494140625, "learning_rate": 0.02, "loss": 1.6323, "step": 2604 }, { "epoch": 0.24116183532951455, "grad_norm": 0.14453125, "learning_rate": 0.02, "loss": 1.7372, "step": 2605 }, { "epoch": 0.2412544118497946, "grad_norm": 0.1484375, "learning_rate": 0.02, "loss": 1.6353, "step": 2606 }, { "epoch": 0.24134698837007465, "grad_norm": 0.15234375, "learning_rate": 0.02, "loss": 1.6863, "step": 2607 }, { "epoch": 0.24143956489035467, "grad_norm": 0.146484375, "learning_rate": 0.02, "loss": 1.6135, "step": 2608 }, { "epoch": 0.24153214141063473, "grad_norm": 0.154296875, "learning_rate": 0.02, "loss": 1.6775, "step": 2609 }, { "epoch": 0.24162471793091478, "grad_norm": 0.14453125, "learning_rate": 0.02, "loss": 1.6705, "step": 2610 }, { "epoch": 0.2417172944511948, "grad_norm": 0.14453125, "learning_rate": 0.02, "loss": 1.6209, "step": 2611 }, { "epoch": 0.24180987097147486, "grad_norm": 0.134765625, "learning_rate": 0.02, "loss": 1.6261, "step": 2612 }, { "epoch": 0.2419024474917549, "grad_norm": 0.1376953125, "learning_rate": 0.02, "loss": 1.6419, "step": 2613 }, { "epoch": 0.24199502401203496, "grad_norm": 0.15234375, "learning_rate": 0.02, "loss": 1.6701, "step": 2614 }, { "epoch": 0.24208760053231498, "grad_norm": 0.142578125, "learning_rate": 0.02, "loss": 1.687, "step": 2615 }, { "epoch": 0.24218017705259504, "grad_norm": 0.1435546875, "learning_rate": 0.02, "loss": 1.6425, "step": 2616 }, { "epoch": 0.2422727535728751, "grad_norm": 0.1484375, "learning_rate": 0.02, "loss": 1.6786, "step": 2617 }, { "epoch": 0.2423653300931551, "grad_norm": 0.1513671875, "learning_rate": 0.02, "loss": 1.5871, "step": 2618 }, { "epoch": 0.24245790661343516, "grad_norm": 0.1455078125, "learning_rate": 0.02, "loss": 1.603, "step": 2619 }, { "epoch": 0.24255048313371522, "grad_norm": 0.15625, "learning_rate": 0.02, "loss": 1.6614, "step": 2620 }, { "epoch": 0.24264305965399527, "grad_norm": 0.1533203125, "learning_rate": 0.02, "loss": 1.6613, "step": 2621 }, { "epoch": 0.2427356361742753, "grad_norm": 0.15625, "learning_rate": 0.02, "loss": 1.6533, "step": 2622 }, { "epoch": 0.24282821269455535, "grad_norm": 0.154296875, "learning_rate": 0.02, "loss": 1.6639, "step": 2623 }, { "epoch": 0.2429207892148354, "grad_norm": 0.16015625, "learning_rate": 0.02, "loss": 1.6339, "step": 2624 }, { "epoch": 0.24301336573511542, "grad_norm": 0.154296875, "learning_rate": 0.02, "loss": 1.6194, "step": 2625 }, { "epoch": 0.24310594225539547, "grad_norm": 0.15234375, "learning_rate": 0.02, "loss": 1.6777, "step": 2626 }, { "epoch": 0.24319851877567553, "grad_norm": 0.1533203125, "learning_rate": 0.02, "loss": 1.694, "step": 2627 }, { "epoch": 0.24329109529595555, "grad_norm": 0.14453125, "learning_rate": 0.02, "loss": 1.666, "step": 2628 }, { "epoch": 0.2433836718162356, "grad_norm": 0.1494140625, "learning_rate": 0.02, "loss": 1.6598, "step": 2629 }, { "epoch": 0.24347624833651565, "grad_norm": 0.1474609375, "learning_rate": 0.02, "loss": 1.6678, "step": 2630 }, { "epoch": 0.2435688248567957, "grad_norm": 0.1513671875, "learning_rate": 0.02, "loss": 1.6775, "step": 2631 }, { "epoch": 0.24366140137707573, "grad_norm": 0.1494140625, "learning_rate": 0.02, "loss": 1.6848, "step": 2632 }, { "epoch": 0.24375397789735578, "grad_norm": 0.1494140625, "learning_rate": 0.02, "loss": 1.6924, "step": 2633 }, { "epoch": 0.24384655441763584, "grad_norm": 0.1357421875, "learning_rate": 0.02, "loss": 1.5874, "step": 2634 }, { "epoch": 0.24393913093791586, "grad_norm": 0.1416015625, "learning_rate": 0.02, "loss": 1.6152, "step": 2635 }, { "epoch": 0.2440317074581959, "grad_norm": 0.1357421875, "learning_rate": 0.02, "loss": 1.6561, "step": 2636 }, { "epoch": 0.24412428397847596, "grad_norm": 0.150390625, "learning_rate": 0.02, "loss": 1.6867, "step": 2637 }, { "epoch": 0.24421686049875602, "grad_norm": 0.16015625, "learning_rate": 0.02, "loss": 1.7139, "step": 2638 }, { "epoch": 0.24430943701903604, "grad_norm": 0.154296875, "learning_rate": 0.02, "loss": 1.6819, "step": 2639 }, { "epoch": 0.2444020135393161, "grad_norm": 0.146484375, "learning_rate": 0.02, "loss": 1.6436, "step": 2640 }, { "epoch": 0.24449459005959615, "grad_norm": 0.1513671875, "learning_rate": 0.02, "loss": 1.6243, "step": 2641 }, { "epoch": 0.24458716657987617, "grad_norm": 0.1416015625, "learning_rate": 0.02, "loss": 1.6036, "step": 2642 }, { "epoch": 0.24467974310015622, "grad_norm": 0.150390625, "learning_rate": 0.02, "loss": 1.6353, "step": 2643 }, { "epoch": 0.24477231962043627, "grad_norm": 0.1494140625, "learning_rate": 0.02, "loss": 1.5888, "step": 2644 }, { "epoch": 0.2448648961407163, "grad_norm": 0.1455078125, "learning_rate": 0.02, "loss": 1.6343, "step": 2645 }, { "epoch": 0.24495747266099635, "grad_norm": 0.1396484375, "learning_rate": 0.02, "loss": 1.6279, "step": 2646 }, { "epoch": 0.2450500491812764, "grad_norm": 0.1435546875, "learning_rate": 0.02, "loss": 1.703, "step": 2647 }, { "epoch": 0.24514262570155645, "grad_norm": 0.142578125, "learning_rate": 0.02, "loss": 1.6384, "step": 2648 }, { "epoch": 0.24523520222183648, "grad_norm": 0.1474609375, "learning_rate": 0.02, "loss": 1.6152, "step": 2649 }, { "epoch": 0.24532777874211653, "grad_norm": 0.150390625, "learning_rate": 0.02, "loss": 1.6887, "step": 2650 }, { "epoch": 0.24542035526239658, "grad_norm": 0.1494140625, "learning_rate": 0.02, "loss": 1.6758, "step": 2651 }, { "epoch": 0.2455129317826766, "grad_norm": 0.1494140625, "learning_rate": 0.02, "loss": 1.6623, "step": 2652 }, { "epoch": 0.24560550830295666, "grad_norm": 0.154296875, "learning_rate": 0.02, "loss": 1.706, "step": 2653 }, { "epoch": 0.2456980848232367, "grad_norm": 0.1669921875, "learning_rate": 0.02, "loss": 1.7213, "step": 2654 }, { "epoch": 0.24579066134351676, "grad_norm": 0.1669921875, "learning_rate": 0.02, "loss": 1.7687, "step": 2655 }, { "epoch": 0.2458832378637968, "grad_norm": 0.1611328125, "learning_rate": 0.02, "loss": 1.7282, "step": 2656 }, { "epoch": 0.24597581438407684, "grad_norm": 0.1611328125, "learning_rate": 0.02, "loss": 1.7056, "step": 2657 }, { "epoch": 0.2460683909043569, "grad_norm": 0.15234375, "learning_rate": 0.02, "loss": 1.6426, "step": 2658 }, { "epoch": 0.24616096742463692, "grad_norm": 0.1552734375, "learning_rate": 0.02, "loss": 1.6115, "step": 2659 }, { "epoch": 0.24625354394491697, "grad_norm": 0.154296875, "learning_rate": 0.02, "loss": 1.6938, "step": 2660 }, { "epoch": 0.24634612046519702, "grad_norm": 0.1591796875, "learning_rate": 0.02, "loss": 1.6401, "step": 2661 }, { "epoch": 0.24643869698547705, "grad_norm": 0.1484375, "learning_rate": 0.02, "loss": 1.6226, "step": 2662 }, { "epoch": 0.2465312735057571, "grad_norm": 0.158203125, "learning_rate": 0.02, "loss": 1.6953, "step": 2663 }, { "epoch": 0.24662385002603715, "grad_norm": 0.158203125, "learning_rate": 0.02, "loss": 1.6921, "step": 2664 }, { "epoch": 0.2467164265463172, "grad_norm": 0.1484375, "learning_rate": 0.02, "loss": 1.6004, "step": 2665 }, { "epoch": 0.24680900306659723, "grad_norm": 0.1396484375, "learning_rate": 0.02, "loss": 1.6302, "step": 2666 }, { "epoch": 0.24690157958687728, "grad_norm": 0.1435546875, "learning_rate": 0.02, "loss": 1.604, "step": 2667 }, { "epoch": 0.24699415610715733, "grad_norm": 0.1494140625, "learning_rate": 0.02, "loss": 1.5945, "step": 2668 }, { "epoch": 0.24708673262743736, "grad_norm": 0.1630859375, "learning_rate": 0.02, "loss": 1.6713, "step": 2669 }, { "epoch": 0.2471793091477174, "grad_norm": 0.158203125, "learning_rate": 0.02, "loss": 1.6453, "step": 2670 }, { "epoch": 0.24727188566799746, "grad_norm": 0.15234375, "learning_rate": 0.02, "loss": 1.6212, "step": 2671 }, { "epoch": 0.2473644621882775, "grad_norm": 0.1552734375, "learning_rate": 0.02, "loss": 1.6691, "step": 2672 }, { "epoch": 0.24745703870855754, "grad_norm": 0.16015625, "learning_rate": 0.02, "loss": 1.6895, "step": 2673 }, { "epoch": 0.2475496152288376, "grad_norm": 0.1494140625, "learning_rate": 0.02, "loss": 1.6548, "step": 2674 }, { "epoch": 0.24764219174911764, "grad_norm": 0.15625, "learning_rate": 0.02, "loss": 1.6663, "step": 2675 }, { "epoch": 0.24773476826939766, "grad_norm": 0.15234375, "learning_rate": 0.02, "loss": 1.6238, "step": 2676 }, { "epoch": 0.24782734478967772, "grad_norm": 0.16015625, "learning_rate": 0.02, "loss": 1.6859, "step": 2677 }, { "epoch": 0.24791992130995777, "grad_norm": 0.1630859375, "learning_rate": 0.02, "loss": 1.5579, "step": 2678 }, { "epoch": 0.2480124978302378, "grad_norm": 0.1572265625, "learning_rate": 0.02, "loss": 1.6192, "step": 2679 }, { "epoch": 0.24810507435051785, "grad_norm": 0.1552734375, "learning_rate": 0.02, "loss": 1.6615, "step": 2680 }, { "epoch": 0.2481976508707979, "grad_norm": 0.1552734375, "learning_rate": 0.02, "loss": 1.6472, "step": 2681 }, { "epoch": 0.24829022739107795, "grad_norm": 0.1484375, "learning_rate": 0.02, "loss": 1.6743, "step": 2682 }, { "epoch": 0.24838280391135797, "grad_norm": 0.150390625, "learning_rate": 0.02, "loss": 1.6743, "step": 2683 }, { "epoch": 0.24847538043163803, "grad_norm": 0.140625, "learning_rate": 0.02, "loss": 1.676, "step": 2684 }, { "epoch": 0.24856795695191808, "grad_norm": 0.1396484375, "learning_rate": 0.02, "loss": 1.6005, "step": 2685 }, { "epoch": 0.2486605334721981, "grad_norm": 0.13671875, "learning_rate": 0.02, "loss": 1.6041, "step": 2686 }, { "epoch": 0.24875310999247816, "grad_norm": 0.1455078125, "learning_rate": 0.02, "loss": 1.6173, "step": 2687 }, { "epoch": 0.2488456865127582, "grad_norm": 0.13671875, "learning_rate": 0.02, "loss": 1.6514, "step": 2688 }, { "epoch": 0.24893826303303826, "grad_norm": 0.1376953125, "learning_rate": 0.02, "loss": 1.6773, "step": 2689 }, { "epoch": 0.24903083955331828, "grad_norm": 0.14453125, "learning_rate": 0.02, "loss": 1.6496, "step": 2690 }, { "epoch": 0.24912341607359834, "grad_norm": 0.1572265625, "learning_rate": 0.02, "loss": 1.6966, "step": 2691 }, { "epoch": 0.2492159925938784, "grad_norm": 0.1513671875, "learning_rate": 0.02, "loss": 1.6741, "step": 2692 }, { "epoch": 0.2493085691141584, "grad_norm": 0.166015625, "learning_rate": 0.02, "loss": 1.6864, "step": 2693 }, { "epoch": 0.24940114563443846, "grad_norm": 0.1572265625, "learning_rate": 0.02, "loss": 1.6526, "step": 2694 }, { "epoch": 0.24949372215471852, "grad_norm": 0.1611328125, "learning_rate": 0.02, "loss": 1.7017, "step": 2695 }, { "epoch": 0.24958629867499854, "grad_norm": 0.1533203125, "learning_rate": 0.02, "loss": 1.6501, "step": 2696 }, { "epoch": 0.2496788751952786, "grad_norm": 0.16015625, "learning_rate": 0.02, "loss": 1.6995, "step": 2697 }, { "epoch": 0.24977145171555865, "grad_norm": 0.1552734375, "learning_rate": 0.02, "loss": 1.6778, "step": 2698 }, { "epoch": 0.2498640282358387, "grad_norm": 0.1552734375, "learning_rate": 0.02, "loss": 1.6304, "step": 2699 }, { "epoch": 0.24995660475611872, "grad_norm": 0.142578125, "learning_rate": 0.02, "loss": 1.7184, "step": 2700 }, { "epoch": 0.25004918127639875, "grad_norm": 0.16015625, "learning_rate": 0.02, "loss": 1.7369, "step": 2701 }, { "epoch": 0.2501417577966788, "grad_norm": 0.1484375, "learning_rate": 0.02, "loss": 1.6094, "step": 2702 }, { "epoch": 0.25023433431695885, "grad_norm": 0.1533203125, "learning_rate": 0.02, "loss": 1.6358, "step": 2703 }, { "epoch": 0.2503269108372389, "grad_norm": 0.15234375, "learning_rate": 0.02, "loss": 1.6927, "step": 2704 }, { "epoch": 0.25041948735751896, "grad_norm": 0.1572265625, "learning_rate": 0.02, "loss": 1.6453, "step": 2705 }, { "epoch": 0.250512063877799, "grad_norm": 0.1728515625, "learning_rate": 0.02, "loss": 1.6397, "step": 2706 }, { "epoch": 0.25060464039807906, "grad_norm": 0.1533203125, "learning_rate": 0.02, "loss": 1.6373, "step": 2707 }, { "epoch": 0.25069721691835906, "grad_norm": 0.1572265625, "learning_rate": 0.02, "loss": 1.6574, "step": 2708 }, { "epoch": 0.2507897934386391, "grad_norm": 0.1669921875, "learning_rate": 0.02, "loss": 1.683, "step": 2709 }, { "epoch": 0.25088236995891916, "grad_norm": 0.1640625, "learning_rate": 0.02, "loss": 1.7074, "step": 2710 }, { "epoch": 0.2509749464791992, "grad_norm": 0.158203125, "learning_rate": 0.02, "loss": 1.6745, "step": 2711 }, { "epoch": 0.25106752299947926, "grad_norm": 0.1552734375, "learning_rate": 0.02, "loss": 1.6705, "step": 2712 }, { "epoch": 0.2511600995197593, "grad_norm": 0.162109375, "learning_rate": 0.02, "loss": 1.6188, "step": 2713 }, { "epoch": 0.25125267604003937, "grad_norm": 0.1474609375, "learning_rate": 0.02, "loss": 1.6323, "step": 2714 }, { "epoch": 0.25134525256031937, "grad_norm": 0.158203125, "learning_rate": 0.02, "loss": 1.6792, "step": 2715 }, { "epoch": 0.2514378290805994, "grad_norm": 0.1552734375, "learning_rate": 0.02, "loss": 1.7093, "step": 2716 }, { "epoch": 0.25153040560087947, "grad_norm": 0.158203125, "learning_rate": 0.02, "loss": 1.6941, "step": 2717 }, { "epoch": 0.2516229821211595, "grad_norm": 0.1474609375, "learning_rate": 0.02, "loss": 1.6991, "step": 2718 }, { "epoch": 0.2517155586414396, "grad_norm": 0.150390625, "learning_rate": 0.02, "loss": 1.6604, "step": 2719 }, { "epoch": 0.2518081351617196, "grad_norm": 0.142578125, "learning_rate": 0.02, "loss": 1.7023, "step": 2720 }, { "epoch": 0.2519007116819997, "grad_norm": 0.1552734375, "learning_rate": 0.02, "loss": 1.72, "step": 2721 }, { "epoch": 0.2519932882022797, "grad_norm": 0.1474609375, "learning_rate": 0.02, "loss": 1.6754, "step": 2722 }, { "epoch": 0.2520858647225597, "grad_norm": 0.146484375, "learning_rate": 0.02, "loss": 1.713, "step": 2723 }, { "epoch": 0.2521784412428398, "grad_norm": 0.150390625, "learning_rate": 0.02, "loss": 1.728, "step": 2724 }, { "epoch": 0.25227101776311983, "grad_norm": 0.1533203125, "learning_rate": 0.02, "loss": 1.6826, "step": 2725 }, { "epoch": 0.2523635942833999, "grad_norm": 0.158203125, "learning_rate": 0.02, "loss": 1.6465, "step": 2726 }, { "epoch": 0.25245617080367994, "grad_norm": 0.1533203125, "learning_rate": 0.02, "loss": 1.6399, "step": 2727 }, { "epoch": 0.25254874732396, "grad_norm": 0.1435546875, "learning_rate": 0.02, "loss": 1.6052, "step": 2728 }, { "epoch": 0.25264132384424, "grad_norm": 0.1416015625, "learning_rate": 0.02, "loss": 1.5989, "step": 2729 }, { "epoch": 0.25273390036452004, "grad_norm": 0.14453125, "learning_rate": 0.02, "loss": 1.6454, "step": 2730 }, { "epoch": 0.2528264768848001, "grad_norm": 0.1533203125, "learning_rate": 0.02, "loss": 1.6814, "step": 2731 }, { "epoch": 0.25291905340508014, "grad_norm": 0.166015625, "learning_rate": 0.02, "loss": 1.6407, "step": 2732 }, { "epoch": 0.2530116299253602, "grad_norm": 0.1484375, "learning_rate": 0.02, "loss": 1.6849, "step": 2733 }, { "epoch": 0.25310420644564025, "grad_norm": 0.1552734375, "learning_rate": 0.02, "loss": 1.5967, "step": 2734 }, { "epoch": 0.25319678296592024, "grad_norm": 0.1640625, "learning_rate": 0.02, "loss": 1.6596, "step": 2735 }, { "epoch": 0.2532893594862003, "grad_norm": 0.15234375, "learning_rate": 0.02, "loss": 1.6836, "step": 2736 }, { "epoch": 0.25338193600648035, "grad_norm": 0.146484375, "learning_rate": 0.02, "loss": 1.689, "step": 2737 }, { "epoch": 0.2534745125267604, "grad_norm": 0.150390625, "learning_rate": 0.02, "loss": 1.7011, "step": 2738 }, { "epoch": 0.25356708904704045, "grad_norm": 0.140625, "learning_rate": 0.02, "loss": 1.6348, "step": 2739 }, { "epoch": 0.2536596655673205, "grad_norm": 0.142578125, "learning_rate": 0.02, "loss": 1.6331, "step": 2740 }, { "epoch": 0.25375224208760055, "grad_norm": 0.1484375, "learning_rate": 0.02, "loss": 1.62, "step": 2741 }, { "epoch": 0.25384481860788055, "grad_norm": 0.146484375, "learning_rate": 0.02, "loss": 1.6445, "step": 2742 }, { "epoch": 0.2539373951281606, "grad_norm": 0.1416015625, "learning_rate": 0.02, "loss": 1.6724, "step": 2743 }, { "epoch": 0.25402997164844066, "grad_norm": 0.150390625, "learning_rate": 0.02, "loss": 1.6466, "step": 2744 }, { "epoch": 0.2541225481687207, "grad_norm": 0.146484375, "learning_rate": 0.02, "loss": 1.689, "step": 2745 }, { "epoch": 0.25421512468900076, "grad_norm": 0.1494140625, "learning_rate": 0.02, "loss": 1.6311, "step": 2746 }, { "epoch": 0.2543077012092808, "grad_norm": 0.158203125, "learning_rate": 0.02, "loss": 1.6765, "step": 2747 }, { "epoch": 0.25440027772956086, "grad_norm": 0.1611328125, "learning_rate": 0.02, "loss": 1.6515, "step": 2748 }, { "epoch": 0.25449285424984086, "grad_norm": 0.15234375, "learning_rate": 0.02, "loss": 1.6626, "step": 2749 }, { "epoch": 0.2545854307701209, "grad_norm": 0.158203125, "learning_rate": 0.02, "loss": 1.7178, "step": 2750 }, { "epoch": 0.25467800729040097, "grad_norm": 0.1513671875, "learning_rate": 0.02, "loss": 1.6839, "step": 2751 }, { "epoch": 0.254770583810681, "grad_norm": 0.1572265625, "learning_rate": 0.02, "loss": 1.6863, "step": 2752 }, { "epoch": 0.25486316033096107, "grad_norm": 0.1552734375, "learning_rate": 0.02, "loss": 1.6898, "step": 2753 }, { "epoch": 0.2549557368512411, "grad_norm": 0.1630859375, "learning_rate": 0.02, "loss": 1.609, "step": 2754 }, { "epoch": 0.2550483133715212, "grad_norm": 0.1533203125, "learning_rate": 0.02, "loss": 1.616, "step": 2755 }, { "epoch": 0.25514088989180117, "grad_norm": 0.158203125, "learning_rate": 0.02, "loss": 1.6102, "step": 2756 }, { "epoch": 0.2552334664120812, "grad_norm": 0.1572265625, "learning_rate": 0.02, "loss": 1.6667, "step": 2757 }, { "epoch": 0.2553260429323613, "grad_norm": 0.154296875, "learning_rate": 0.02, "loss": 1.7157, "step": 2758 }, { "epoch": 0.2554186194526413, "grad_norm": 0.150390625, "learning_rate": 0.02, "loss": 1.6555, "step": 2759 }, { "epoch": 0.2555111959729214, "grad_norm": 0.142578125, "learning_rate": 0.02, "loss": 1.6451, "step": 2760 }, { "epoch": 0.25560377249320143, "grad_norm": 0.154296875, "learning_rate": 0.02, "loss": 1.5886, "step": 2761 }, { "epoch": 0.2556963490134815, "grad_norm": 0.1416015625, "learning_rate": 0.02, "loss": 1.6771, "step": 2762 }, { "epoch": 0.2557889255337615, "grad_norm": 0.1572265625, "learning_rate": 0.02, "loss": 1.6772, "step": 2763 }, { "epoch": 0.25588150205404153, "grad_norm": 0.1591796875, "learning_rate": 0.02, "loss": 1.6507, "step": 2764 }, { "epoch": 0.2559740785743216, "grad_norm": 0.15234375, "learning_rate": 0.02, "loss": 1.6707, "step": 2765 }, { "epoch": 0.25606665509460164, "grad_norm": 0.14453125, "learning_rate": 0.02, "loss": 1.624, "step": 2766 }, { "epoch": 0.2561592316148817, "grad_norm": 0.140625, "learning_rate": 0.02, "loss": 1.6516, "step": 2767 }, { "epoch": 0.25625180813516174, "grad_norm": 0.1513671875, "learning_rate": 0.02, "loss": 1.6816, "step": 2768 }, { "epoch": 0.25634438465544174, "grad_norm": 0.15625, "learning_rate": 0.02, "loss": 1.6997, "step": 2769 }, { "epoch": 0.2564369611757218, "grad_norm": 0.142578125, "learning_rate": 0.02, "loss": 1.6429, "step": 2770 }, { "epoch": 0.25652953769600184, "grad_norm": 0.1572265625, "learning_rate": 0.02, "loss": 1.6529, "step": 2771 }, { "epoch": 0.2566221142162819, "grad_norm": 0.1572265625, "learning_rate": 0.02, "loss": 1.6278, "step": 2772 }, { "epoch": 0.25671469073656195, "grad_norm": 0.16015625, "learning_rate": 0.02, "loss": 1.6645, "step": 2773 }, { "epoch": 0.256807267256842, "grad_norm": 0.1513671875, "learning_rate": 0.02, "loss": 1.6599, "step": 2774 }, { "epoch": 0.25689984377712205, "grad_norm": 0.154296875, "learning_rate": 0.02, "loss": 1.6418, "step": 2775 }, { "epoch": 0.25699242029740205, "grad_norm": 0.1484375, "learning_rate": 0.02, "loss": 1.6788, "step": 2776 }, { "epoch": 0.2570849968176821, "grad_norm": 0.154296875, "learning_rate": 0.02, "loss": 1.638, "step": 2777 }, { "epoch": 0.25717757333796215, "grad_norm": 0.1484375, "learning_rate": 0.02, "loss": 1.6118, "step": 2778 }, { "epoch": 0.2572701498582422, "grad_norm": 0.1494140625, "learning_rate": 0.02, "loss": 1.6631, "step": 2779 }, { "epoch": 0.25736272637852226, "grad_norm": 0.15234375, "learning_rate": 0.02, "loss": 1.6593, "step": 2780 }, { "epoch": 0.2574553028988023, "grad_norm": 0.1572265625, "learning_rate": 0.02, "loss": 1.7023, "step": 2781 }, { "epoch": 0.25754787941908236, "grad_norm": 0.1484375, "learning_rate": 0.02, "loss": 1.6811, "step": 2782 }, { "epoch": 0.25764045593936236, "grad_norm": 0.142578125, "learning_rate": 0.02, "loss": 1.6432, "step": 2783 }, { "epoch": 0.2577330324596424, "grad_norm": 0.150390625, "learning_rate": 0.02, "loss": 1.663, "step": 2784 }, { "epoch": 0.25782560897992246, "grad_norm": 0.142578125, "learning_rate": 0.02, "loss": 1.5793, "step": 2785 }, { "epoch": 0.2579181855002025, "grad_norm": 0.1494140625, "learning_rate": 0.02, "loss": 1.6421, "step": 2786 }, { "epoch": 0.25801076202048256, "grad_norm": 0.150390625, "learning_rate": 0.02, "loss": 1.6715, "step": 2787 }, { "epoch": 0.2581033385407626, "grad_norm": 0.1396484375, "learning_rate": 0.02, "loss": 1.7028, "step": 2788 }, { "epoch": 0.25819591506104267, "grad_norm": 0.150390625, "learning_rate": 0.02, "loss": 1.5634, "step": 2789 }, { "epoch": 0.25828849158132267, "grad_norm": 0.154296875, "learning_rate": 0.02, "loss": 1.7299, "step": 2790 }, { "epoch": 0.2583810681016027, "grad_norm": 0.1455078125, "learning_rate": 0.02, "loss": 1.6353, "step": 2791 }, { "epoch": 0.25847364462188277, "grad_norm": 0.1396484375, "learning_rate": 0.02, "loss": 1.6583, "step": 2792 }, { "epoch": 0.2585662211421628, "grad_norm": 0.13671875, "learning_rate": 0.02, "loss": 1.6632, "step": 2793 }, { "epoch": 0.2586587976624429, "grad_norm": 0.1474609375, "learning_rate": 0.02, "loss": 1.7041, "step": 2794 }, { "epoch": 0.2587513741827229, "grad_norm": 0.1474609375, "learning_rate": 0.02, "loss": 1.6851, "step": 2795 }, { "epoch": 0.258843950703003, "grad_norm": 0.1416015625, "learning_rate": 0.02, "loss": 1.6089, "step": 2796 }, { "epoch": 0.258936527223283, "grad_norm": 0.154296875, "learning_rate": 0.02, "loss": 1.6764, "step": 2797 }, { "epoch": 0.259029103743563, "grad_norm": 0.1455078125, "learning_rate": 0.02, "loss": 1.5725, "step": 2798 }, { "epoch": 0.2591216802638431, "grad_norm": 0.1513671875, "learning_rate": 0.02, "loss": 1.6125, "step": 2799 }, { "epoch": 0.25921425678412313, "grad_norm": 0.154296875, "learning_rate": 0.02, "loss": 1.6711, "step": 2800 }, { "epoch": 0.2593068333044032, "grad_norm": 0.154296875, "learning_rate": 0.02, "loss": 1.6687, "step": 2801 }, { "epoch": 0.25939940982468324, "grad_norm": 0.154296875, "learning_rate": 0.02, "loss": 1.6241, "step": 2802 }, { "epoch": 0.25949198634496323, "grad_norm": 0.1484375, "learning_rate": 0.02, "loss": 1.6478, "step": 2803 }, { "epoch": 0.2595845628652433, "grad_norm": 0.1455078125, "learning_rate": 0.02, "loss": 1.6141, "step": 2804 }, { "epoch": 0.25967713938552334, "grad_norm": 0.1513671875, "learning_rate": 0.02, "loss": 1.6976, "step": 2805 }, { "epoch": 0.2597697159058034, "grad_norm": 0.1484375, "learning_rate": 0.02, "loss": 1.6385, "step": 2806 }, { "epoch": 0.25986229242608344, "grad_norm": 0.15234375, "learning_rate": 0.02, "loss": 1.6946, "step": 2807 }, { "epoch": 0.2599548689463635, "grad_norm": 0.1484375, "learning_rate": 0.02, "loss": 1.6303, "step": 2808 }, { "epoch": 0.26004744546664355, "grad_norm": 0.1591796875, "learning_rate": 0.02, "loss": 1.7219, "step": 2809 }, { "epoch": 0.26014002198692354, "grad_norm": 0.158203125, "learning_rate": 0.02, "loss": 1.6885, "step": 2810 }, { "epoch": 0.2602325985072036, "grad_norm": 0.1513671875, "learning_rate": 0.02, "loss": 1.6945, "step": 2811 }, { "epoch": 0.26032517502748365, "grad_norm": 0.1435546875, "learning_rate": 0.02, "loss": 1.6883, "step": 2812 }, { "epoch": 0.2604177515477637, "grad_norm": 0.15625, "learning_rate": 0.02, "loss": 1.7024, "step": 2813 }, { "epoch": 0.26051032806804375, "grad_norm": 0.154296875, "learning_rate": 0.02, "loss": 1.7098, "step": 2814 }, { "epoch": 0.2606029045883238, "grad_norm": 0.1455078125, "learning_rate": 0.02, "loss": 1.6763, "step": 2815 }, { "epoch": 0.26069548110860385, "grad_norm": 0.146484375, "learning_rate": 0.02, "loss": 1.6422, "step": 2816 }, { "epoch": 0.26078805762888385, "grad_norm": 0.140625, "learning_rate": 0.02, "loss": 1.6549, "step": 2817 }, { "epoch": 0.2608806341491639, "grad_norm": 0.1474609375, "learning_rate": 0.02, "loss": 1.6597, "step": 2818 }, { "epoch": 0.26097321066944396, "grad_norm": 0.1435546875, "learning_rate": 0.02, "loss": 1.632, "step": 2819 }, { "epoch": 0.261065787189724, "grad_norm": 0.1494140625, "learning_rate": 0.02, "loss": 1.7053, "step": 2820 }, { "epoch": 0.26115836371000406, "grad_norm": 0.140625, "learning_rate": 0.02, "loss": 1.6298, "step": 2821 }, { "epoch": 0.2612509402302841, "grad_norm": 0.1435546875, "learning_rate": 0.02, "loss": 1.6617, "step": 2822 }, { "epoch": 0.26134351675056416, "grad_norm": 0.1455078125, "learning_rate": 0.02, "loss": 1.6142, "step": 2823 }, { "epoch": 0.26143609327084416, "grad_norm": 0.146484375, "learning_rate": 0.02, "loss": 1.6591, "step": 2824 }, { "epoch": 0.2615286697911242, "grad_norm": 0.1396484375, "learning_rate": 0.02, "loss": 1.6943, "step": 2825 }, { "epoch": 0.26162124631140427, "grad_norm": 0.14453125, "learning_rate": 0.02, "loss": 1.6459, "step": 2826 }, { "epoch": 0.2617138228316843, "grad_norm": 0.1494140625, "learning_rate": 0.02, "loss": 1.7123, "step": 2827 }, { "epoch": 0.26180639935196437, "grad_norm": 0.1552734375, "learning_rate": 0.02, "loss": 1.7169, "step": 2828 }, { "epoch": 0.2618989758722444, "grad_norm": 0.1435546875, "learning_rate": 0.02, "loss": 1.6423, "step": 2829 }, { "epoch": 0.2619915523925244, "grad_norm": 0.134765625, "learning_rate": 0.02, "loss": 1.6434, "step": 2830 }, { "epoch": 0.26208412891280447, "grad_norm": 0.1494140625, "learning_rate": 0.02, "loss": 1.6682, "step": 2831 }, { "epoch": 0.2621767054330845, "grad_norm": 0.1552734375, "learning_rate": 0.02, "loss": 1.6019, "step": 2832 }, { "epoch": 0.2622692819533646, "grad_norm": 0.1552734375, "learning_rate": 0.02, "loss": 1.6233, "step": 2833 }, { "epoch": 0.2623618584736446, "grad_norm": 0.1552734375, "learning_rate": 0.02, "loss": 1.6967, "step": 2834 }, { "epoch": 0.2624544349939247, "grad_norm": 0.158203125, "learning_rate": 0.02, "loss": 1.6257, "step": 2835 }, { "epoch": 0.26254701151420473, "grad_norm": 0.15234375, "learning_rate": 0.02, "loss": 1.6696, "step": 2836 }, { "epoch": 0.26263958803448473, "grad_norm": 0.158203125, "learning_rate": 0.02, "loss": 1.6953, "step": 2837 }, { "epoch": 0.2627321645547648, "grad_norm": 0.1474609375, "learning_rate": 0.02, "loss": 1.6865, "step": 2838 }, { "epoch": 0.26282474107504483, "grad_norm": 0.1513671875, "learning_rate": 0.02, "loss": 1.6295, "step": 2839 }, { "epoch": 0.2629173175953249, "grad_norm": 0.1591796875, "learning_rate": 0.02, "loss": 1.6418, "step": 2840 }, { "epoch": 0.26300989411560494, "grad_norm": 0.1572265625, "learning_rate": 0.02, "loss": 1.6713, "step": 2841 }, { "epoch": 0.263102470635885, "grad_norm": 0.1474609375, "learning_rate": 0.02, "loss": 1.6951, "step": 2842 }, { "epoch": 0.26319504715616504, "grad_norm": 0.1474609375, "learning_rate": 0.02, "loss": 1.6727, "step": 2843 }, { "epoch": 0.26328762367644504, "grad_norm": 0.146484375, "learning_rate": 0.02, "loss": 1.5912, "step": 2844 }, { "epoch": 0.2633802001967251, "grad_norm": 0.1533203125, "learning_rate": 0.02, "loss": 1.6671, "step": 2845 }, { "epoch": 0.26347277671700514, "grad_norm": 0.1533203125, "learning_rate": 0.02, "loss": 1.6898, "step": 2846 }, { "epoch": 0.2635653532372852, "grad_norm": 0.1591796875, "learning_rate": 0.02, "loss": 1.6778, "step": 2847 }, { "epoch": 0.26365792975756525, "grad_norm": 0.1484375, "learning_rate": 0.02, "loss": 1.6392, "step": 2848 }, { "epoch": 0.2637505062778453, "grad_norm": 0.1591796875, "learning_rate": 0.02, "loss": 1.6483, "step": 2849 }, { "epoch": 0.26384308279812535, "grad_norm": 0.154296875, "learning_rate": 0.02, "loss": 1.6028, "step": 2850 }, { "epoch": 0.26393565931840535, "grad_norm": 0.16015625, "learning_rate": 0.02, "loss": 1.5759, "step": 2851 }, { "epoch": 0.2640282358386854, "grad_norm": 0.16015625, "learning_rate": 0.02, "loss": 1.6332, "step": 2852 }, { "epoch": 0.26412081235896545, "grad_norm": 0.15625, "learning_rate": 0.02, "loss": 1.6672, "step": 2853 }, { "epoch": 0.2642133888792455, "grad_norm": 0.1611328125, "learning_rate": 0.02, "loss": 1.6469, "step": 2854 }, { "epoch": 0.26430596539952556, "grad_norm": 0.1513671875, "learning_rate": 0.02, "loss": 1.7041, "step": 2855 }, { "epoch": 0.2643985419198056, "grad_norm": 0.1494140625, "learning_rate": 0.02, "loss": 1.6714, "step": 2856 }, { "epoch": 0.26449111844008566, "grad_norm": 0.142578125, "learning_rate": 0.02, "loss": 1.5934, "step": 2857 }, { "epoch": 0.26458369496036566, "grad_norm": 0.15234375, "learning_rate": 0.02, "loss": 1.6739, "step": 2858 }, { "epoch": 0.2646762714806457, "grad_norm": 0.1474609375, "learning_rate": 0.02, "loss": 1.6576, "step": 2859 }, { "epoch": 0.26476884800092576, "grad_norm": 0.154296875, "learning_rate": 0.02, "loss": 1.6805, "step": 2860 }, { "epoch": 0.2648614245212058, "grad_norm": 0.1416015625, "learning_rate": 0.02, "loss": 1.6645, "step": 2861 }, { "epoch": 0.26495400104148586, "grad_norm": 0.15234375, "learning_rate": 0.02, "loss": 1.6594, "step": 2862 }, { "epoch": 0.2650465775617659, "grad_norm": 0.146484375, "learning_rate": 0.02, "loss": 1.6264, "step": 2863 }, { "epoch": 0.2651391540820459, "grad_norm": 0.1455078125, "learning_rate": 0.02, "loss": 1.6439, "step": 2864 }, { "epoch": 0.26523173060232597, "grad_norm": 0.1396484375, "learning_rate": 0.02, "loss": 1.6258, "step": 2865 }, { "epoch": 0.265324307122606, "grad_norm": 0.150390625, "learning_rate": 0.02, "loss": 1.6453, "step": 2866 }, { "epoch": 0.26541688364288607, "grad_norm": 0.150390625, "learning_rate": 0.02, "loss": 1.643, "step": 2867 }, { "epoch": 0.2655094601631661, "grad_norm": 0.1474609375, "learning_rate": 0.02, "loss": 1.7105, "step": 2868 }, { "epoch": 0.2656020366834462, "grad_norm": 0.1591796875, "learning_rate": 0.02, "loss": 1.6835, "step": 2869 }, { "epoch": 0.2656946132037262, "grad_norm": 0.1484375, "learning_rate": 0.02, "loss": 1.6739, "step": 2870 }, { "epoch": 0.2657871897240062, "grad_norm": 0.154296875, "learning_rate": 0.02, "loss": 1.6796, "step": 2871 }, { "epoch": 0.2658797662442863, "grad_norm": 0.146484375, "learning_rate": 0.02, "loss": 1.6223, "step": 2872 }, { "epoch": 0.2659723427645663, "grad_norm": 0.1455078125, "learning_rate": 0.02, "loss": 1.6662, "step": 2873 }, { "epoch": 0.2660649192848464, "grad_norm": 0.1455078125, "learning_rate": 0.02, "loss": 1.6816, "step": 2874 }, { "epoch": 0.26615749580512643, "grad_norm": 0.1513671875, "learning_rate": 0.02, "loss": 1.6126, "step": 2875 }, { "epoch": 0.2662500723254065, "grad_norm": 0.1572265625, "learning_rate": 0.02, "loss": 1.719, "step": 2876 }, { "epoch": 0.26634264884568654, "grad_norm": 0.15234375, "learning_rate": 0.02, "loss": 1.639, "step": 2877 }, { "epoch": 0.26643522536596653, "grad_norm": 0.154296875, "learning_rate": 0.02, "loss": 1.6367, "step": 2878 }, { "epoch": 0.2665278018862466, "grad_norm": 0.1494140625, "learning_rate": 0.02, "loss": 1.5886, "step": 2879 }, { "epoch": 0.26662037840652664, "grad_norm": 0.154296875, "learning_rate": 0.02, "loss": 1.5996, "step": 2880 }, { "epoch": 0.2667129549268067, "grad_norm": 0.1474609375, "learning_rate": 0.02, "loss": 1.6856, "step": 2881 }, { "epoch": 0.26680553144708674, "grad_norm": 0.16015625, "learning_rate": 0.02, "loss": 1.7201, "step": 2882 }, { "epoch": 0.2668981079673668, "grad_norm": 0.1474609375, "learning_rate": 0.02, "loss": 1.6602, "step": 2883 }, { "epoch": 0.26699068448764685, "grad_norm": 0.1611328125, "learning_rate": 0.02, "loss": 1.7035, "step": 2884 }, { "epoch": 0.26708326100792684, "grad_norm": 0.15625, "learning_rate": 0.02, "loss": 1.6811, "step": 2885 }, { "epoch": 0.2671758375282069, "grad_norm": 0.16015625, "learning_rate": 0.02, "loss": 1.6841, "step": 2886 }, { "epoch": 0.26726841404848695, "grad_norm": 0.1435546875, "learning_rate": 0.02, "loss": 1.6108, "step": 2887 }, { "epoch": 0.267360990568767, "grad_norm": 0.15625, "learning_rate": 0.02, "loss": 1.7174, "step": 2888 }, { "epoch": 0.26745356708904705, "grad_norm": 0.1435546875, "learning_rate": 0.02, "loss": 1.5924, "step": 2889 }, { "epoch": 0.2675461436093271, "grad_norm": 0.1513671875, "learning_rate": 0.02, "loss": 1.6595, "step": 2890 }, { "epoch": 0.26763872012960715, "grad_norm": 0.1513671875, "learning_rate": 0.02, "loss": 1.6713, "step": 2891 }, { "epoch": 0.26773129664988715, "grad_norm": 0.162109375, "learning_rate": 0.02, "loss": 1.7279, "step": 2892 }, { "epoch": 0.2678238731701672, "grad_norm": 0.1455078125, "learning_rate": 0.02, "loss": 1.6218, "step": 2893 }, { "epoch": 0.26791644969044726, "grad_norm": 0.158203125, "learning_rate": 0.02, "loss": 1.6724, "step": 2894 }, { "epoch": 0.2680090262107273, "grad_norm": 0.1474609375, "learning_rate": 0.02, "loss": 1.7069, "step": 2895 }, { "epoch": 0.26810160273100736, "grad_norm": 0.1494140625, "learning_rate": 0.02, "loss": 1.6208, "step": 2896 }, { "epoch": 0.2681941792512874, "grad_norm": 0.1396484375, "learning_rate": 0.02, "loss": 1.691, "step": 2897 }, { "epoch": 0.2682867557715674, "grad_norm": 0.1591796875, "learning_rate": 0.02, "loss": 1.7264, "step": 2898 }, { "epoch": 0.26837933229184746, "grad_norm": 0.1455078125, "learning_rate": 0.02, "loss": 1.6605, "step": 2899 }, { "epoch": 0.2684719088121275, "grad_norm": 0.150390625, "learning_rate": 0.02, "loss": 1.6441, "step": 2900 }, { "epoch": 0.26856448533240757, "grad_norm": 0.1494140625, "learning_rate": 0.02, "loss": 1.6617, "step": 2901 }, { "epoch": 0.2686570618526876, "grad_norm": 0.1552734375, "learning_rate": 0.02, "loss": 1.6914, "step": 2902 }, { "epoch": 0.26874963837296767, "grad_norm": 0.1513671875, "learning_rate": 0.02, "loss": 1.6822, "step": 2903 }, { "epoch": 0.2688422148932477, "grad_norm": 0.1376953125, "learning_rate": 0.02, "loss": 1.5771, "step": 2904 }, { "epoch": 0.2689347914135277, "grad_norm": 0.146484375, "learning_rate": 0.02, "loss": 1.6515, "step": 2905 }, { "epoch": 0.26902736793380777, "grad_norm": 0.1494140625, "learning_rate": 0.02, "loss": 1.6529, "step": 2906 }, { "epoch": 0.2691199444540878, "grad_norm": 0.1474609375, "learning_rate": 0.02, "loss": 1.639, "step": 2907 }, { "epoch": 0.2692125209743679, "grad_norm": 0.1435546875, "learning_rate": 0.02, "loss": 1.6697, "step": 2908 }, { "epoch": 0.2693050974946479, "grad_norm": 0.1396484375, "learning_rate": 0.02, "loss": 1.6447, "step": 2909 }, { "epoch": 0.269397674014928, "grad_norm": 0.142578125, "learning_rate": 0.02, "loss": 1.6552, "step": 2910 }, { "epoch": 0.26949025053520803, "grad_norm": 0.1494140625, "learning_rate": 0.02, "loss": 1.7034, "step": 2911 }, { "epoch": 0.26958282705548803, "grad_norm": 0.1484375, "learning_rate": 0.02, "loss": 1.7098, "step": 2912 }, { "epoch": 0.2696754035757681, "grad_norm": 0.1494140625, "learning_rate": 0.02, "loss": 1.7295, "step": 2913 }, { "epoch": 0.26976798009604813, "grad_norm": 0.1474609375, "learning_rate": 0.02, "loss": 1.6588, "step": 2914 }, { "epoch": 0.2698605566163282, "grad_norm": 0.138671875, "learning_rate": 0.02, "loss": 1.6701, "step": 2915 }, { "epoch": 0.26995313313660824, "grad_norm": 0.154296875, "learning_rate": 0.02, "loss": 1.6751, "step": 2916 }, { "epoch": 0.2700457096568883, "grad_norm": 0.1513671875, "learning_rate": 0.02, "loss": 1.6817, "step": 2917 }, { "epoch": 0.27013828617716834, "grad_norm": 0.1494140625, "learning_rate": 0.02, "loss": 1.7306, "step": 2918 }, { "epoch": 0.27023086269744834, "grad_norm": 0.142578125, "learning_rate": 0.02, "loss": 1.6562, "step": 2919 }, { "epoch": 0.2703234392177284, "grad_norm": 0.142578125, "learning_rate": 0.02, "loss": 1.5626, "step": 2920 }, { "epoch": 0.27041601573800844, "grad_norm": 0.1416015625, "learning_rate": 0.02, "loss": 1.6434, "step": 2921 }, { "epoch": 0.2705085922582885, "grad_norm": 0.1513671875, "learning_rate": 0.02, "loss": 1.6845, "step": 2922 }, { "epoch": 0.27060116877856855, "grad_norm": 0.1435546875, "learning_rate": 0.02, "loss": 1.6769, "step": 2923 }, { "epoch": 0.2706937452988486, "grad_norm": 0.146484375, "learning_rate": 0.02, "loss": 1.6294, "step": 2924 }, { "epoch": 0.27078632181912865, "grad_norm": 0.1435546875, "learning_rate": 0.02, "loss": 1.6379, "step": 2925 }, { "epoch": 0.27087889833940865, "grad_norm": 0.142578125, "learning_rate": 0.02, "loss": 1.6709, "step": 2926 }, { "epoch": 0.2709714748596887, "grad_norm": 0.1357421875, "learning_rate": 0.02, "loss": 1.586, "step": 2927 }, { "epoch": 0.27106405137996875, "grad_norm": 0.14453125, "learning_rate": 0.02, "loss": 1.5888, "step": 2928 }, { "epoch": 0.2711566279002488, "grad_norm": 0.142578125, "learning_rate": 0.02, "loss": 1.587, "step": 2929 }, { "epoch": 0.27124920442052886, "grad_norm": 0.146484375, "learning_rate": 0.02, "loss": 1.6487, "step": 2930 }, { "epoch": 0.2713417809408089, "grad_norm": 0.1474609375, "learning_rate": 0.02, "loss": 1.6153, "step": 2931 }, { "epoch": 0.2714343574610889, "grad_norm": 0.16015625, "learning_rate": 0.02, "loss": 1.6689, "step": 2932 }, { "epoch": 0.27152693398136896, "grad_norm": 0.1669921875, "learning_rate": 0.02, "loss": 1.6482, "step": 2933 }, { "epoch": 0.271619510501649, "grad_norm": 0.150390625, "learning_rate": 0.02, "loss": 1.7232, "step": 2934 }, { "epoch": 0.27171208702192906, "grad_norm": 0.14453125, "learning_rate": 0.02, "loss": 1.6562, "step": 2935 }, { "epoch": 0.2718046635422091, "grad_norm": 0.1552734375, "learning_rate": 0.02, "loss": 1.6476, "step": 2936 }, { "epoch": 0.27189724006248916, "grad_norm": 0.16015625, "learning_rate": 0.02, "loss": 1.6312, "step": 2937 }, { "epoch": 0.2719898165827692, "grad_norm": 0.15234375, "learning_rate": 0.02, "loss": 1.7164, "step": 2938 }, { "epoch": 0.2720823931030492, "grad_norm": 0.1533203125, "learning_rate": 0.02, "loss": 1.6385, "step": 2939 }, { "epoch": 0.27217496962332927, "grad_norm": 0.1455078125, "learning_rate": 0.02, "loss": 1.6087, "step": 2940 }, { "epoch": 0.2722675461436093, "grad_norm": 0.1494140625, "learning_rate": 0.02, "loss": 1.6188, "step": 2941 }, { "epoch": 0.27236012266388937, "grad_norm": 0.142578125, "learning_rate": 0.02, "loss": 1.6092, "step": 2942 }, { "epoch": 0.2724526991841694, "grad_norm": 0.14453125, "learning_rate": 0.02, "loss": 1.6976, "step": 2943 }, { "epoch": 0.2725452757044495, "grad_norm": 0.1416015625, "learning_rate": 0.02, "loss": 1.6015, "step": 2944 }, { "epoch": 0.2726378522247295, "grad_norm": 0.154296875, "learning_rate": 0.02, "loss": 1.6333, "step": 2945 }, { "epoch": 0.2727304287450095, "grad_norm": 0.158203125, "learning_rate": 0.02, "loss": 1.7133, "step": 2946 }, { "epoch": 0.2728230052652896, "grad_norm": 0.1533203125, "learning_rate": 0.02, "loss": 1.6359, "step": 2947 }, { "epoch": 0.2729155817855696, "grad_norm": 0.15625, "learning_rate": 0.02, "loss": 1.5624, "step": 2948 }, { "epoch": 0.2730081583058497, "grad_norm": 0.154296875, "learning_rate": 0.02, "loss": 1.6915, "step": 2949 }, { "epoch": 0.27310073482612973, "grad_norm": 0.154296875, "learning_rate": 0.02, "loss": 1.647, "step": 2950 }, { "epoch": 0.2731933113464098, "grad_norm": 0.158203125, "learning_rate": 0.02, "loss": 1.7084, "step": 2951 }, { "epoch": 0.2731933113464098, "eval_loss": 1.6467796564102173, "eval_runtime": 63.8421, "eval_samples_per_second": 23.871, "eval_steps_per_second": 5.968, "step": 2951 }, { "epoch": 0.27328588786668984, "grad_norm": 0.1552734375, "learning_rate": 0.02, "loss": 1.6319, "step": 2952 }, { "epoch": 0.27337846438696983, "grad_norm": 0.1494140625, "learning_rate": 0.02, "loss": 1.6934, "step": 2953 }, { "epoch": 0.2734710409072499, "grad_norm": 0.1494140625, "learning_rate": 0.02, "loss": 1.6256, "step": 2954 }, { "epoch": 0.27356361742752994, "grad_norm": 0.1376953125, "learning_rate": 0.02, "loss": 1.5788, "step": 2955 }, { "epoch": 0.27365619394781, "grad_norm": 0.1484375, "learning_rate": 0.02, "loss": 1.6994, "step": 2956 }, { "epoch": 0.27374877046809004, "grad_norm": 0.15234375, "learning_rate": 0.02, "loss": 1.7362, "step": 2957 }, { "epoch": 0.2738413469883701, "grad_norm": 0.150390625, "learning_rate": 0.02, "loss": 1.6726, "step": 2958 }, { "epoch": 0.27393392350865015, "grad_norm": 0.1494140625, "learning_rate": 0.02, "loss": 1.6046, "step": 2959 }, { "epoch": 0.27402650002893014, "grad_norm": 0.1494140625, "learning_rate": 0.02, "loss": 1.6937, "step": 2960 }, { "epoch": 0.2741190765492102, "grad_norm": 0.1513671875, "learning_rate": 0.02, "loss": 1.6249, "step": 2961 }, { "epoch": 0.27421165306949025, "grad_norm": 0.154296875, "learning_rate": 0.02, "loss": 1.6794, "step": 2962 }, { "epoch": 0.2743042295897703, "grad_norm": 0.1416015625, "learning_rate": 0.02, "loss": 1.6063, "step": 2963 }, { "epoch": 0.27439680611005035, "grad_norm": 0.1484375, "learning_rate": 0.02, "loss": 1.6664, "step": 2964 }, { "epoch": 0.2744893826303304, "grad_norm": 0.1474609375, "learning_rate": 0.02, "loss": 1.6308, "step": 2965 }, { "epoch": 0.2745819591506104, "grad_norm": 0.146484375, "learning_rate": 0.02, "loss": 1.6903, "step": 2966 }, { "epoch": 0.27467453567089045, "grad_norm": 0.1416015625, "learning_rate": 0.02, "loss": 1.5371, "step": 2967 }, { "epoch": 0.2747671121911705, "grad_norm": 0.1552734375, "learning_rate": 0.02, "loss": 1.7016, "step": 2968 }, { "epoch": 0.27485968871145056, "grad_norm": 0.1533203125, "learning_rate": 0.02, "loss": 1.6229, "step": 2969 }, { "epoch": 0.2749522652317306, "grad_norm": 0.1513671875, "learning_rate": 0.02, "loss": 1.6649, "step": 2970 }, { "epoch": 0.27504484175201066, "grad_norm": 0.158203125, "learning_rate": 0.02, "loss": 1.672, "step": 2971 }, { "epoch": 0.2751374182722907, "grad_norm": 0.15234375, "learning_rate": 0.02, "loss": 1.6233, "step": 2972 }, { "epoch": 0.2752299947925707, "grad_norm": 0.142578125, "learning_rate": 0.02, "loss": 1.6205, "step": 2973 }, { "epoch": 0.27532257131285076, "grad_norm": 0.1494140625, "learning_rate": 0.02, "loss": 1.582, "step": 2974 }, { "epoch": 0.2754151478331308, "grad_norm": 0.146484375, "learning_rate": 0.02, "loss": 1.6705, "step": 2975 }, { "epoch": 0.27550772435341087, "grad_norm": 0.15625, "learning_rate": 0.02, "loss": 1.6853, "step": 2976 }, { "epoch": 0.2756003008736909, "grad_norm": 0.158203125, "learning_rate": 0.02, "loss": 1.6507, "step": 2977 }, { "epoch": 0.27569287739397097, "grad_norm": 0.1484375, "learning_rate": 0.02, "loss": 1.6084, "step": 2978 }, { "epoch": 0.275785453914251, "grad_norm": 0.146484375, "learning_rate": 0.02, "loss": 1.639, "step": 2979 }, { "epoch": 0.275878030434531, "grad_norm": 0.154296875, "learning_rate": 0.02, "loss": 1.6382, "step": 2980 }, { "epoch": 0.27597060695481107, "grad_norm": 0.1416015625, "learning_rate": 0.02, "loss": 1.5851, "step": 2981 }, { "epoch": 0.2760631834750911, "grad_norm": 0.16015625, "learning_rate": 0.02, "loss": 1.6638, "step": 2982 }, { "epoch": 0.2761557599953712, "grad_norm": 0.14453125, "learning_rate": 0.02, "loss": 1.5929, "step": 2983 }, { "epoch": 0.2762483365156512, "grad_norm": 0.154296875, "learning_rate": 0.02, "loss": 1.6806, "step": 2984 }, { "epoch": 0.2763409130359313, "grad_norm": 0.154296875, "learning_rate": 0.02, "loss": 1.6414, "step": 2985 }, { "epoch": 0.27643348955621133, "grad_norm": 0.1552734375, "learning_rate": 0.02, "loss": 1.6779, "step": 2986 }, { "epoch": 0.27652606607649133, "grad_norm": 0.15234375, "learning_rate": 0.02, "loss": 1.6147, "step": 2987 }, { "epoch": 0.2766186425967714, "grad_norm": 0.169921875, "learning_rate": 0.02, "loss": 1.6612, "step": 2988 }, { "epoch": 0.27671121911705143, "grad_norm": 0.1689453125, "learning_rate": 0.02, "loss": 1.6038, "step": 2989 }, { "epoch": 0.2768037956373315, "grad_norm": 0.1572265625, "learning_rate": 0.02, "loss": 1.6406, "step": 2990 }, { "epoch": 0.27689637215761154, "grad_norm": 0.1474609375, "learning_rate": 0.02, "loss": 1.6711, "step": 2991 }, { "epoch": 0.2769889486778916, "grad_norm": 0.1552734375, "learning_rate": 0.02, "loss": 1.6493, "step": 2992 }, { "epoch": 0.27708152519817164, "grad_norm": 0.15234375, "learning_rate": 0.02, "loss": 1.598, "step": 2993 }, { "epoch": 0.27717410171845164, "grad_norm": 0.150390625, "learning_rate": 0.02, "loss": 1.6276, "step": 2994 }, { "epoch": 0.2772666782387317, "grad_norm": 0.142578125, "learning_rate": 0.02, "loss": 1.6419, "step": 2995 }, { "epoch": 0.27735925475901174, "grad_norm": 0.146484375, "learning_rate": 0.02, "loss": 1.6313, "step": 2996 }, { "epoch": 0.2774518312792918, "grad_norm": 0.154296875, "learning_rate": 0.02, "loss": 1.6995, "step": 2997 }, { "epoch": 0.27754440779957185, "grad_norm": 0.1591796875, "learning_rate": 0.02, "loss": 1.6983, "step": 2998 }, { "epoch": 0.2776369843198519, "grad_norm": 0.146484375, "learning_rate": 0.02, "loss": 1.6808, "step": 2999 }, { "epoch": 0.2777295608401319, "grad_norm": 0.1552734375, "learning_rate": 0.02, "loss": 1.6811, "step": 3000 }, { "epoch": 0.27782213736041195, "grad_norm": 0.16015625, "learning_rate": 0.02, "loss": 1.6762, "step": 3001 }, { "epoch": 0.277914713880692, "grad_norm": 0.1416015625, "learning_rate": 0.02, "loss": 1.668, "step": 3002 }, { "epoch": 0.27800729040097205, "grad_norm": 0.1455078125, "learning_rate": 0.02, "loss": 1.6282, "step": 3003 }, { "epoch": 0.2780998669212521, "grad_norm": 0.13671875, "learning_rate": 0.02, "loss": 1.5678, "step": 3004 }, { "epoch": 0.27819244344153216, "grad_norm": 0.1484375, "learning_rate": 0.02, "loss": 1.6592, "step": 3005 }, { "epoch": 0.2782850199618122, "grad_norm": 0.1455078125, "learning_rate": 0.02, "loss": 1.6368, "step": 3006 }, { "epoch": 0.2783775964820922, "grad_norm": 0.15234375, "learning_rate": 0.02, "loss": 1.5944, "step": 3007 }, { "epoch": 0.27847017300237226, "grad_norm": 0.1474609375, "learning_rate": 0.02, "loss": 1.6043, "step": 3008 }, { "epoch": 0.2785627495226523, "grad_norm": 0.154296875, "learning_rate": 0.02, "loss": 1.6701, "step": 3009 }, { "epoch": 0.27865532604293236, "grad_norm": 0.1552734375, "learning_rate": 0.02, "loss": 1.6936, "step": 3010 }, { "epoch": 0.2787479025632124, "grad_norm": 0.1591796875, "learning_rate": 0.02, "loss": 1.7021, "step": 3011 }, { "epoch": 0.27884047908349247, "grad_norm": 0.150390625, "learning_rate": 0.02, "loss": 1.6176, "step": 3012 }, { "epoch": 0.2789330556037725, "grad_norm": 0.1533203125, "learning_rate": 0.02, "loss": 1.6846, "step": 3013 }, { "epoch": 0.2790256321240525, "grad_norm": 0.138671875, "learning_rate": 0.02, "loss": 1.6148, "step": 3014 }, { "epoch": 0.27911820864433257, "grad_norm": 0.14453125, "learning_rate": 0.02, "loss": 1.605, "step": 3015 }, { "epoch": 0.2792107851646126, "grad_norm": 0.1474609375, "learning_rate": 0.02, "loss": 1.6793, "step": 3016 }, { "epoch": 0.27930336168489267, "grad_norm": 0.1455078125, "learning_rate": 0.02, "loss": 1.696, "step": 3017 }, { "epoch": 0.2793959382051727, "grad_norm": 0.14453125, "learning_rate": 0.02, "loss": 1.6339, "step": 3018 }, { "epoch": 0.2794885147254528, "grad_norm": 0.1435546875, "learning_rate": 0.02, "loss": 1.67, "step": 3019 }, { "epoch": 0.2795810912457328, "grad_norm": 0.1494140625, "learning_rate": 0.02, "loss": 1.6844, "step": 3020 }, { "epoch": 0.2796736677660128, "grad_norm": 0.1533203125, "learning_rate": 0.02, "loss": 1.6333, "step": 3021 }, { "epoch": 0.2797662442862929, "grad_norm": 0.15234375, "learning_rate": 0.02, "loss": 1.7006, "step": 3022 }, { "epoch": 0.2798588208065729, "grad_norm": 0.154296875, "learning_rate": 0.02, "loss": 1.651, "step": 3023 }, { "epoch": 0.279951397326853, "grad_norm": 0.1416015625, "learning_rate": 0.02, "loss": 1.647, "step": 3024 }, { "epoch": 0.28004397384713303, "grad_norm": 0.1533203125, "learning_rate": 0.02, "loss": 1.6122, "step": 3025 }, { "epoch": 0.2801365503674131, "grad_norm": 0.1435546875, "learning_rate": 0.02, "loss": 1.6672, "step": 3026 }, { "epoch": 0.28022912688769314, "grad_norm": 0.150390625, "learning_rate": 0.02, "loss": 1.661, "step": 3027 }, { "epoch": 0.28032170340797313, "grad_norm": 0.150390625, "learning_rate": 0.02, "loss": 1.6573, "step": 3028 }, { "epoch": 0.2804142799282532, "grad_norm": 0.15234375, "learning_rate": 0.02, "loss": 1.6587, "step": 3029 }, { "epoch": 0.28050685644853324, "grad_norm": 0.1337890625, "learning_rate": 0.02, "loss": 1.5925, "step": 3030 }, { "epoch": 0.2805994329688133, "grad_norm": 0.134765625, "learning_rate": 0.02, "loss": 1.6513, "step": 3031 }, { "epoch": 0.28069200948909334, "grad_norm": 0.140625, "learning_rate": 0.02, "loss": 1.6461, "step": 3032 }, { "epoch": 0.2807845860093734, "grad_norm": 0.140625, "learning_rate": 0.02, "loss": 1.6294, "step": 3033 }, { "epoch": 0.2808771625296534, "grad_norm": 0.1357421875, "learning_rate": 0.02, "loss": 1.5613, "step": 3034 }, { "epoch": 0.28096973904993344, "grad_norm": 0.1396484375, "learning_rate": 0.02, "loss": 1.6022, "step": 3035 }, { "epoch": 0.2810623155702135, "grad_norm": 0.1484375, "learning_rate": 0.02, "loss": 1.6439, "step": 3036 }, { "epoch": 0.28115489209049355, "grad_norm": 0.154296875, "learning_rate": 0.02, "loss": 1.7109, "step": 3037 }, { "epoch": 0.2812474686107736, "grad_norm": 0.142578125, "learning_rate": 0.02, "loss": 1.7053, "step": 3038 }, { "epoch": 0.28134004513105365, "grad_norm": 0.1455078125, "learning_rate": 0.02, "loss": 1.6403, "step": 3039 }, { "epoch": 0.2814326216513337, "grad_norm": 0.1513671875, "learning_rate": 0.02, "loss": 1.6558, "step": 3040 }, { "epoch": 0.2815251981716137, "grad_norm": 0.146484375, "learning_rate": 0.02, "loss": 1.661, "step": 3041 }, { "epoch": 0.28161777469189375, "grad_norm": 0.140625, "learning_rate": 0.02, "loss": 1.6084, "step": 3042 }, { "epoch": 0.2817103512121738, "grad_norm": 0.140625, "learning_rate": 0.02, "loss": 1.6369, "step": 3043 }, { "epoch": 0.28180292773245386, "grad_norm": 0.1416015625, "learning_rate": 0.02, "loss": 1.7114, "step": 3044 }, { "epoch": 0.2818955042527339, "grad_norm": 0.1474609375, "learning_rate": 0.02, "loss": 1.7034, "step": 3045 }, { "epoch": 0.28198808077301396, "grad_norm": 0.1513671875, "learning_rate": 0.02, "loss": 1.6974, "step": 3046 }, { "epoch": 0.282080657293294, "grad_norm": 0.1572265625, "learning_rate": 0.02, "loss": 1.5933, "step": 3047 }, { "epoch": 0.282173233813574, "grad_norm": 0.1474609375, "learning_rate": 0.02, "loss": 1.5932, "step": 3048 }, { "epoch": 0.28226581033385406, "grad_norm": 0.146484375, "learning_rate": 0.02, "loss": 1.6345, "step": 3049 }, { "epoch": 0.2823583868541341, "grad_norm": 0.1474609375, "learning_rate": 0.02, "loss": 1.607, "step": 3050 }, { "epoch": 0.28245096337441417, "grad_norm": 0.1572265625, "learning_rate": 0.02, "loss": 1.7024, "step": 3051 }, { "epoch": 0.2825435398946942, "grad_norm": 0.15625, "learning_rate": 0.02, "loss": 1.637, "step": 3052 }, { "epoch": 0.28263611641497427, "grad_norm": 0.15625, "learning_rate": 0.02, "loss": 1.6633, "step": 3053 }, { "epoch": 0.2827286929352543, "grad_norm": 0.1474609375, "learning_rate": 0.02, "loss": 1.6791, "step": 3054 }, { "epoch": 0.2828212694555343, "grad_norm": 0.1435546875, "learning_rate": 0.02, "loss": 1.6469, "step": 3055 }, { "epoch": 0.28291384597581437, "grad_norm": 0.15234375, "learning_rate": 0.02, "loss": 1.6856, "step": 3056 }, { "epoch": 0.2830064224960944, "grad_norm": 0.154296875, "learning_rate": 0.02, "loss": 1.647, "step": 3057 }, { "epoch": 0.2830989990163745, "grad_norm": 0.154296875, "learning_rate": 0.02, "loss": 1.6691, "step": 3058 }, { "epoch": 0.2831915755366545, "grad_norm": 0.1435546875, "learning_rate": 0.02, "loss": 1.5618, "step": 3059 }, { "epoch": 0.2832841520569346, "grad_norm": 0.15234375, "learning_rate": 0.02, "loss": 1.7262, "step": 3060 }, { "epoch": 0.28337672857721463, "grad_norm": 0.154296875, "learning_rate": 0.02, "loss": 1.651, "step": 3061 }, { "epoch": 0.28346930509749463, "grad_norm": 0.1474609375, "learning_rate": 0.02, "loss": 1.6853, "step": 3062 }, { "epoch": 0.2835618816177747, "grad_norm": 0.154296875, "learning_rate": 0.02, "loss": 1.7013, "step": 3063 }, { "epoch": 0.28365445813805473, "grad_norm": 0.154296875, "learning_rate": 0.02, "loss": 1.6417, "step": 3064 }, { "epoch": 0.2837470346583348, "grad_norm": 0.1416015625, "learning_rate": 0.02, "loss": 1.645, "step": 3065 }, { "epoch": 0.28383961117861484, "grad_norm": 0.1484375, "learning_rate": 0.02, "loss": 1.688, "step": 3066 }, { "epoch": 0.2839321876988949, "grad_norm": 0.1513671875, "learning_rate": 0.02, "loss": 1.6017, "step": 3067 }, { "epoch": 0.2840247642191749, "grad_norm": 0.1435546875, "learning_rate": 0.02, "loss": 1.7023, "step": 3068 }, { "epoch": 0.28411734073945494, "grad_norm": 0.1455078125, "learning_rate": 0.02, "loss": 1.6343, "step": 3069 }, { "epoch": 0.284209917259735, "grad_norm": 0.16015625, "learning_rate": 0.02, "loss": 1.7089, "step": 3070 }, { "epoch": 0.28430249378001504, "grad_norm": 0.15625, "learning_rate": 0.02, "loss": 1.608, "step": 3071 }, { "epoch": 0.2843950703002951, "grad_norm": 0.16015625, "learning_rate": 0.02, "loss": 1.5908, "step": 3072 }, { "epoch": 0.28448764682057515, "grad_norm": 0.162109375, "learning_rate": 0.02, "loss": 1.6105, "step": 3073 }, { "epoch": 0.2845802233408552, "grad_norm": 0.1591796875, "learning_rate": 0.02, "loss": 1.6281, "step": 3074 }, { "epoch": 0.2846727998611352, "grad_norm": 0.1689453125, "learning_rate": 0.02, "loss": 1.6848, "step": 3075 }, { "epoch": 0.28476537638141525, "grad_norm": 0.16796875, "learning_rate": 0.02, "loss": 1.6712, "step": 3076 }, { "epoch": 0.2848579529016953, "grad_norm": 0.158203125, "learning_rate": 0.02, "loss": 1.6009, "step": 3077 }, { "epoch": 0.28495052942197535, "grad_norm": 0.162109375, "learning_rate": 0.02, "loss": 1.706, "step": 3078 }, { "epoch": 0.2850431059422554, "grad_norm": 0.15625, "learning_rate": 0.02, "loss": 1.639, "step": 3079 }, { "epoch": 0.28513568246253546, "grad_norm": 0.1513671875, "learning_rate": 0.02, "loss": 1.6045, "step": 3080 }, { "epoch": 0.2852282589828155, "grad_norm": 0.14453125, "learning_rate": 0.02, "loss": 1.5797, "step": 3081 }, { "epoch": 0.2853208355030955, "grad_norm": 0.158203125, "learning_rate": 0.02, "loss": 1.666, "step": 3082 }, { "epoch": 0.28541341202337556, "grad_norm": 0.1611328125, "learning_rate": 0.02, "loss": 1.6524, "step": 3083 }, { "epoch": 0.2855059885436556, "grad_norm": 0.154296875, "learning_rate": 0.02, "loss": 1.6488, "step": 3084 }, { "epoch": 0.28559856506393566, "grad_norm": 0.1552734375, "learning_rate": 0.02, "loss": 1.6292, "step": 3085 }, { "epoch": 0.2856911415842157, "grad_norm": 0.1494140625, "learning_rate": 0.02, "loss": 1.6197, "step": 3086 }, { "epoch": 0.28578371810449577, "grad_norm": 0.1533203125, "learning_rate": 0.02, "loss": 1.6344, "step": 3087 }, { "epoch": 0.2858762946247758, "grad_norm": 0.166015625, "learning_rate": 0.02, "loss": 1.6809, "step": 3088 }, { "epoch": 0.2859688711450558, "grad_norm": 0.1591796875, "learning_rate": 0.02, "loss": 1.6324, "step": 3089 }, { "epoch": 0.28606144766533587, "grad_norm": 0.1416015625, "learning_rate": 0.02, "loss": 1.6053, "step": 3090 }, { "epoch": 0.2861540241856159, "grad_norm": 0.1572265625, "learning_rate": 0.02, "loss": 1.6379, "step": 3091 }, { "epoch": 0.28624660070589597, "grad_norm": 0.1396484375, "learning_rate": 0.02, "loss": 1.5912, "step": 3092 }, { "epoch": 0.286339177226176, "grad_norm": 0.1494140625, "learning_rate": 0.02, "loss": 1.6259, "step": 3093 }, { "epoch": 0.2864317537464561, "grad_norm": 0.1435546875, "learning_rate": 0.02, "loss": 1.6026, "step": 3094 }, { "epoch": 0.28652433026673607, "grad_norm": 0.1484375, "learning_rate": 0.02, "loss": 1.6866, "step": 3095 }, { "epoch": 0.2866169067870161, "grad_norm": 0.166015625, "learning_rate": 0.02, "loss": 1.5995, "step": 3096 }, { "epoch": 0.2867094833072962, "grad_norm": 0.154296875, "learning_rate": 0.02, "loss": 1.6426, "step": 3097 }, { "epoch": 0.28680205982757623, "grad_norm": 0.15234375, "learning_rate": 0.02, "loss": 1.6621, "step": 3098 }, { "epoch": 0.2868946363478563, "grad_norm": 0.1572265625, "learning_rate": 0.02, "loss": 1.5918, "step": 3099 }, { "epoch": 0.28698721286813633, "grad_norm": 0.1416015625, "learning_rate": 0.02, "loss": 1.585, "step": 3100 }, { "epoch": 0.2870797893884164, "grad_norm": 0.1484375, "learning_rate": 0.02, "loss": 1.6172, "step": 3101 }, { "epoch": 0.2871723659086964, "grad_norm": 0.1630859375, "learning_rate": 0.02, "loss": 1.7021, "step": 3102 }, { "epoch": 0.28726494242897643, "grad_norm": 0.1513671875, "learning_rate": 0.02, "loss": 1.6727, "step": 3103 }, { "epoch": 0.2873575189492565, "grad_norm": 0.138671875, "learning_rate": 0.02, "loss": 1.5781, "step": 3104 }, { "epoch": 0.28745009546953654, "grad_norm": 0.138671875, "learning_rate": 0.02, "loss": 1.6349, "step": 3105 }, { "epoch": 0.2875426719898166, "grad_norm": 0.1474609375, "learning_rate": 0.02, "loss": 1.6374, "step": 3106 }, { "epoch": 0.28763524851009664, "grad_norm": 0.142578125, "learning_rate": 0.02, "loss": 1.5979, "step": 3107 }, { "epoch": 0.2877278250303767, "grad_norm": 0.15234375, "learning_rate": 0.02, "loss": 1.6611, "step": 3108 }, { "epoch": 0.2878204015506567, "grad_norm": 0.14453125, "learning_rate": 0.02, "loss": 1.6233, "step": 3109 }, { "epoch": 0.28791297807093674, "grad_norm": 0.1435546875, "learning_rate": 0.02, "loss": 1.634, "step": 3110 }, { "epoch": 0.2880055545912168, "grad_norm": 0.1435546875, "learning_rate": 0.02, "loss": 1.658, "step": 3111 }, { "epoch": 0.28809813111149685, "grad_norm": 0.1396484375, "learning_rate": 0.02, "loss": 1.6028, "step": 3112 }, { "epoch": 0.2881907076317769, "grad_norm": 0.1357421875, "learning_rate": 0.02, "loss": 1.5755, "step": 3113 }, { "epoch": 0.28828328415205695, "grad_norm": 0.150390625, "learning_rate": 0.02, "loss": 1.6498, "step": 3114 }, { "epoch": 0.288375860672337, "grad_norm": 0.15234375, "learning_rate": 0.02, "loss": 1.6294, "step": 3115 }, { "epoch": 0.288468437192617, "grad_norm": 0.1474609375, "learning_rate": 0.02, "loss": 1.6725, "step": 3116 }, { "epoch": 0.28856101371289705, "grad_norm": 0.142578125, "learning_rate": 0.02, "loss": 1.5937, "step": 3117 }, { "epoch": 0.2886535902331771, "grad_norm": 0.1396484375, "learning_rate": 0.02, "loss": 1.6173, "step": 3118 }, { "epoch": 0.28874616675345716, "grad_norm": 0.1474609375, "learning_rate": 0.02, "loss": 1.7205, "step": 3119 }, { "epoch": 0.2888387432737372, "grad_norm": 0.1455078125, "learning_rate": 0.02, "loss": 1.6108, "step": 3120 }, { "epoch": 0.28893131979401726, "grad_norm": 0.1357421875, "learning_rate": 0.02, "loss": 1.5598, "step": 3121 }, { "epoch": 0.2890238963142973, "grad_norm": 0.138671875, "learning_rate": 0.02, "loss": 1.6216, "step": 3122 }, { "epoch": 0.2891164728345773, "grad_norm": 0.1494140625, "learning_rate": 0.02, "loss": 1.6773, "step": 3123 }, { "epoch": 0.28920904935485736, "grad_norm": 0.1484375, "learning_rate": 0.02, "loss": 1.7219, "step": 3124 }, { "epoch": 0.2893016258751374, "grad_norm": 0.146484375, "learning_rate": 0.02, "loss": 1.664, "step": 3125 }, { "epoch": 0.28939420239541747, "grad_norm": 0.1455078125, "learning_rate": 0.02, "loss": 1.5667, "step": 3126 }, { "epoch": 0.2894867789156975, "grad_norm": 0.140625, "learning_rate": 0.02, "loss": 1.6766, "step": 3127 }, { "epoch": 0.28957935543597757, "grad_norm": 0.1337890625, "learning_rate": 0.02, "loss": 1.6073, "step": 3128 }, { "epoch": 0.28967193195625757, "grad_norm": 0.142578125, "learning_rate": 0.02, "loss": 1.6359, "step": 3129 }, { "epoch": 0.2897645084765376, "grad_norm": 0.1435546875, "learning_rate": 0.02, "loss": 1.6336, "step": 3130 }, { "epoch": 0.28985708499681767, "grad_norm": 0.1484375, "learning_rate": 0.02, "loss": 1.6282, "step": 3131 }, { "epoch": 0.2899496615170977, "grad_norm": 0.1484375, "learning_rate": 0.02, "loss": 1.6412, "step": 3132 }, { "epoch": 0.2900422380373778, "grad_norm": 0.1640625, "learning_rate": 0.02, "loss": 1.5947, "step": 3133 }, { "epoch": 0.2901348145576578, "grad_norm": 0.162109375, "learning_rate": 0.02, "loss": 1.6327, "step": 3134 }, { "epoch": 0.2902273910779379, "grad_norm": 0.16796875, "learning_rate": 0.02, "loss": 1.6913, "step": 3135 }, { "epoch": 0.2903199675982179, "grad_norm": 0.150390625, "learning_rate": 0.02, "loss": 1.6276, "step": 3136 }, { "epoch": 0.29041254411849793, "grad_norm": 0.1611328125, "learning_rate": 0.02, "loss": 1.6735, "step": 3137 }, { "epoch": 0.290505120638778, "grad_norm": 0.15625, "learning_rate": 0.02, "loss": 1.6666, "step": 3138 }, { "epoch": 0.29059769715905803, "grad_norm": 0.158203125, "learning_rate": 0.02, "loss": 1.6725, "step": 3139 }, { "epoch": 0.2906902736793381, "grad_norm": 0.15234375, "learning_rate": 0.02, "loss": 1.655, "step": 3140 }, { "epoch": 0.29078285019961814, "grad_norm": 0.16796875, "learning_rate": 0.02, "loss": 1.6524, "step": 3141 }, { "epoch": 0.2908754267198982, "grad_norm": 0.162109375, "learning_rate": 0.02, "loss": 1.6581, "step": 3142 }, { "epoch": 0.2909680032401782, "grad_norm": 0.1611328125, "learning_rate": 0.02, "loss": 1.6657, "step": 3143 }, { "epoch": 0.29106057976045824, "grad_norm": 0.1572265625, "learning_rate": 0.02, "loss": 1.6058, "step": 3144 }, { "epoch": 0.2911531562807383, "grad_norm": 0.150390625, "learning_rate": 0.02, "loss": 1.6796, "step": 3145 }, { "epoch": 0.29124573280101834, "grad_norm": 0.1435546875, "learning_rate": 0.02, "loss": 1.6344, "step": 3146 }, { "epoch": 0.2913383093212984, "grad_norm": 0.1572265625, "learning_rate": 0.02, "loss": 1.65, "step": 3147 }, { "epoch": 0.29143088584157845, "grad_norm": 0.14453125, "learning_rate": 0.02, "loss": 1.6311, "step": 3148 }, { "epoch": 0.2915234623618585, "grad_norm": 0.142578125, "learning_rate": 0.02, "loss": 1.6832, "step": 3149 }, { "epoch": 0.2916160388821385, "grad_norm": 0.1318359375, "learning_rate": 0.02, "loss": 1.5998, "step": 3150 }, { "epoch": 0.29170861540241855, "grad_norm": 0.142578125, "learning_rate": 0.02, "loss": 1.6324, "step": 3151 }, { "epoch": 0.2918011919226986, "grad_norm": 0.14453125, "learning_rate": 0.02, "loss": 1.7262, "step": 3152 }, { "epoch": 0.29189376844297865, "grad_norm": 0.1435546875, "learning_rate": 0.02, "loss": 1.6261, "step": 3153 }, { "epoch": 0.2919863449632587, "grad_norm": 0.1494140625, "learning_rate": 0.02, "loss": 1.6936, "step": 3154 }, { "epoch": 0.29207892148353876, "grad_norm": 0.1484375, "learning_rate": 0.02, "loss": 1.7054, "step": 3155 }, { "epoch": 0.2921714980038188, "grad_norm": 0.1435546875, "learning_rate": 0.02, "loss": 1.5872, "step": 3156 }, { "epoch": 0.2922640745240988, "grad_norm": 0.146484375, "learning_rate": 0.02, "loss": 1.6888, "step": 3157 }, { "epoch": 0.29235665104437886, "grad_norm": 0.1494140625, "learning_rate": 0.02, "loss": 1.6674, "step": 3158 }, { "epoch": 0.2924492275646589, "grad_norm": 0.142578125, "learning_rate": 0.02, "loss": 1.6291, "step": 3159 }, { "epoch": 0.29254180408493896, "grad_norm": 0.14453125, "learning_rate": 0.02, "loss": 1.5985, "step": 3160 }, { "epoch": 0.292634380605219, "grad_norm": 0.150390625, "learning_rate": 0.02, "loss": 1.6188, "step": 3161 }, { "epoch": 0.29272695712549907, "grad_norm": 0.1494140625, "learning_rate": 0.02, "loss": 1.6746, "step": 3162 }, { "epoch": 0.29281953364577906, "grad_norm": 0.1455078125, "learning_rate": 0.02, "loss": 1.5703, "step": 3163 }, { "epoch": 0.2929121101660591, "grad_norm": 0.1435546875, "learning_rate": 0.02, "loss": 1.6448, "step": 3164 }, { "epoch": 0.29300468668633917, "grad_norm": 0.146484375, "learning_rate": 0.02, "loss": 1.6263, "step": 3165 }, { "epoch": 0.2930972632066192, "grad_norm": 0.142578125, "learning_rate": 0.02, "loss": 1.6676, "step": 3166 }, { "epoch": 0.29318983972689927, "grad_norm": 0.1474609375, "learning_rate": 0.02, "loss": 1.6766, "step": 3167 }, { "epoch": 0.2932824162471793, "grad_norm": 0.1357421875, "learning_rate": 0.02, "loss": 1.648, "step": 3168 }, { "epoch": 0.2933749927674594, "grad_norm": 0.1435546875, "learning_rate": 0.02, "loss": 1.6572, "step": 3169 }, { "epoch": 0.29346756928773937, "grad_norm": 0.1376953125, "learning_rate": 0.02, "loss": 1.5613, "step": 3170 }, { "epoch": 0.2935601458080194, "grad_norm": 0.140625, "learning_rate": 0.02, "loss": 1.6178, "step": 3171 }, { "epoch": 0.2936527223282995, "grad_norm": 0.142578125, "learning_rate": 0.02, "loss": 1.6233, "step": 3172 }, { "epoch": 0.29374529884857953, "grad_norm": 0.142578125, "learning_rate": 0.02, "loss": 1.6083, "step": 3173 }, { "epoch": 0.2938378753688596, "grad_norm": 0.146484375, "learning_rate": 0.02, "loss": 1.6699, "step": 3174 }, { "epoch": 0.29393045188913963, "grad_norm": 0.1455078125, "learning_rate": 0.02, "loss": 1.6405, "step": 3175 }, { "epoch": 0.2940230284094197, "grad_norm": 0.1533203125, "learning_rate": 0.02, "loss": 1.6556, "step": 3176 }, { "epoch": 0.2941156049296997, "grad_norm": 0.1396484375, "learning_rate": 0.02, "loss": 1.6218, "step": 3177 }, { "epoch": 0.29420818144997973, "grad_norm": 0.1376953125, "learning_rate": 0.02, "loss": 1.6585, "step": 3178 }, { "epoch": 0.2943007579702598, "grad_norm": 0.1474609375, "learning_rate": 0.02, "loss": 1.624, "step": 3179 }, { "epoch": 0.29439333449053984, "grad_norm": 0.1484375, "learning_rate": 0.02, "loss": 1.6089, "step": 3180 }, { "epoch": 0.2944859110108199, "grad_norm": 0.1455078125, "learning_rate": 0.02, "loss": 1.596, "step": 3181 }, { "epoch": 0.29457848753109994, "grad_norm": 0.1455078125, "learning_rate": 0.02, "loss": 1.641, "step": 3182 }, { "epoch": 0.29467106405138, "grad_norm": 0.1572265625, "learning_rate": 0.02, "loss": 1.6858, "step": 3183 }, { "epoch": 0.29476364057166, "grad_norm": 0.150390625, "learning_rate": 0.02, "loss": 1.6303, "step": 3184 }, { "epoch": 0.29485621709194004, "grad_norm": 0.15625, "learning_rate": 0.02, "loss": 1.6397, "step": 3185 }, { "epoch": 0.2949487936122201, "grad_norm": 0.1484375, "learning_rate": 0.02, "loss": 1.5597, "step": 3186 }, { "epoch": 0.29504137013250015, "grad_norm": 0.15234375, "learning_rate": 0.02, "loss": 1.6552, "step": 3187 }, { "epoch": 0.2951339466527802, "grad_norm": 0.1435546875, "learning_rate": 0.02, "loss": 1.6316, "step": 3188 }, { "epoch": 0.29522652317306025, "grad_norm": 0.1494140625, "learning_rate": 0.02, "loss": 1.6607, "step": 3189 }, { "epoch": 0.2953190996933403, "grad_norm": 0.150390625, "learning_rate": 0.02, "loss": 1.6265, "step": 3190 }, { "epoch": 0.2954116762136203, "grad_norm": 0.146484375, "learning_rate": 0.02, "loss": 1.6501, "step": 3191 }, { "epoch": 0.29550425273390035, "grad_norm": 0.138671875, "learning_rate": 0.02, "loss": 1.6192, "step": 3192 }, { "epoch": 0.2955968292541804, "grad_norm": 0.1455078125, "learning_rate": 0.02, "loss": 1.6023, "step": 3193 }, { "epoch": 0.29568940577446046, "grad_norm": 0.1416015625, "learning_rate": 0.02, "loss": 1.5745, "step": 3194 }, { "epoch": 0.2957819822947405, "grad_norm": 0.158203125, "learning_rate": 0.02, "loss": 1.6576, "step": 3195 }, { "epoch": 0.29587455881502056, "grad_norm": 0.1533203125, "learning_rate": 0.02, "loss": 1.6259, "step": 3196 }, { "epoch": 0.29596713533530056, "grad_norm": 0.1435546875, "learning_rate": 0.02, "loss": 1.611, "step": 3197 }, { "epoch": 0.2960597118555806, "grad_norm": 0.134765625, "learning_rate": 0.02, "loss": 1.6503, "step": 3198 }, { "epoch": 0.29615228837586066, "grad_norm": 0.146484375, "learning_rate": 0.02, "loss": 1.6313, "step": 3199 }, { "epoch": 0.2962448648961407, "grad_norm": 0.1396484375, "learning_rate": 0.02, "loss": 1.6734, "step": 3200 }, { "epoch": 0.29633744141642077, "grad_norm": 0.1416015625, "learning_rate": 0.02, "loss": 1.659, "step": 3201 }, { "epoch": 0.2964300179367008, "grad_norm": 0.146484375, "learning_rate": 0.02, "loss": 1.6526, "step": 3202 }, { "epoch": 0.29652259445698087, "grad_norm": 0.142578125, "learning_rate": 0.02, "loss": 1.5782, "step": 3203 }, { "epoch": 0.29661517097726087, "grad_norm": 0.138671875, "learning_rate": 0.02, "loss": 1.6747, "step": 3204 }, { "epoch": 0.2967077474975409, "grad_norm": 0.1396484375, "learning_rate": 0.02, "loss": 1.6502, "step": 3205 }, { "epoch": 0.29680032401782097, "grad_norm": 0.142578125, "learning_rate": 0.02, "loss": 1.6704, "step": 3206 }, { "epoch": 0.296892900538101, "grad_norm": 0.142578125, "learning_rate": 0.02, "loss": 1.6721, "step": 3207 }, { "epoch": 0.2969854770583811, "grad_norm": 0.142578125, "learning_rate": 0.02, "loss": 1.6302, "step": 3208 }, { "epoch": 0.2970780535786611, "grad_norm": 0.14453125, "learning_rate": 0.02, "loss": 1.6616, "step": 3209 }, { "epoch": 0.2971706300989412, "grad_norm": 0.1484375, "learning_rate": 0.02, "loss": 1.6429, "step": 3210 }, { "epoch": 0.2972632066192212, "grad_norm": 0.142578125, "learning_rate": 0.02, "loss": 1.6255, "step": 3211 }, { "epoch": 0.29735578313950123, "grad_norm": 0.1591796875, "learning_rate": 0.02, "loss": 1.6812, "step": 3212 }, { "epoch": 0.2974483596597813, "grad_norm": 0.1455078125, "learning_rate": 0.02, "loss": 1.6323, "step": 3213 }, { "epoch": 0.29754093618006133, "grad_norm": 0.1630859375, "learning_rate": 0.02, "loss": 1.696, "step": 3214 }, { "epoch": 0.2976335127003414, "grad_norm": 0.138671875, "learning_rate": 0.02, "loss": 1.6188, "step": 3215 }, { "epoch": 0.29772608922062144, "grad_norm": 0.1396484375, "learning_rate": 0.02, "loss": 1.5786, "step": 3216 }, { "epoch": 0.2978186657409015, "grad_norm": 0.1337890625, "learning_rate": 0.02, "loss": 1.6151, "step": 3217 }, { "epoch": 0.2979112422611815, "grad_norm": 0.1416015625, "learning_rate": 0.02, "loss": 1.636, "step": 3218 }, { "epoch": 0.29800381878146154, "grad_norm": 0.1318359375, "learning_rate": 0.02, "loss": 1.628, "step": 3219 }, { "epoch": 0.2980963953017416, "grad_norm": 0.1416015625, "learning_rate": 0.02, "loss": 1.6125, "step": 3220 }, { "epoch": 0.29818897182202164, "grad_norm": 0.1298828125, "learning_rate": 0.02, "loss": 1.639, "step": 3221 }, { "epoch": 0.2982815483423017, "grad_norm": 0.1474609375, "learning_rate": 0.02, "loss": 1.6691, "step": 3222 }, { "epoch": 0.29837412486258175, "grad_norm": 0.1357421875, "learning_rate": 0.02, "loss": 1.6394, "step": 3223 }, { "epoch": 0.2984667013828618, "grad_norm": 0.1455078125, "learning_rate": 0.02, "loss": 1.6618, "step": 3224 }, { "epoch": 0.2985592779031418, "grad_norm": 0.1396484375, "learning_rate": 0.02, "loss": 1.611, "step": 3225 }, { "epoch": 0.29865185442342185, "grad_norm": 0.1396484375, "learning_rate": 0.02, "loss": 1.6264, "step": 3226 }, { "epoch": 0.2987444309437019, "grad_norm": 0.14453125, "learning_rate": 0.02, "loss": 1.6753, "step": 3227 }, { "epoch": 0.29883700746398195, "grad_norm": 0.1494140625, "learning_rate": 0.02, "loss": 1.5888, "step": 3228 }, { "epoch": 0.298929583984262, "grad_norm": 0.14453125, "learning_rate": 0.02, "loss": 1.6223, "step": 3229 }, { "epoch": 0.29902216050454206, "grad_norm": 0.14453125, "learning_rate": 0.02, "loss": 1.6428, "step": 3230 }, { "epoch": 0.29911473702482205, "grad_norm": 0.150390625, "learning_rate": 0.02, "loss": 1.655, "step": 3231 }, { "epoch": 0.2992073135451021, "grad_norm": 0.1396484375, "learning_rate": 0.02, "loss": 1.6157, "step": 3232 }, { "epoch": 0.29929989006538216, "grad_norm": 0.146484375, "learning_rate": 0.02, "loss": 1.6688, "step": 3233 }, { "epoch": 0.2993924665856622, "grad_norm": 0.1552734375, "learning_rate": 0.02, "loss": 1.7371, "step": 3234 }, { "epoch": 0.29948504310594226, "grad_norm": 0.15625, "learning_rate": 0.02, "loss": 1.6679, "step": 3235 }, { "epoch": 0.2995776196262223, "grad_norm": 0.15234375, "learning_rate": 0.02, "loss": 1.6308, "step": 3236 }, { "epoch": 0.29967019614650237, "grad_norm": 0.1611328125, "learning_rate": 0.02, "loss": 1.653, "step": 3237 }, { "epoch": 0.29976277266678236, "grad_norm": 0.1455078125, "learning_rate": 0.02, "loss": 1.6206, "step": 3238 }, { "epoch": 0.2998553491870624, "grad_norm": 0.16015625, "learning_rate": 0.02, "loss": 1.6326, "step": 3239 }, { "epoch": 0.29994792570734247, "grad_norm": 0.1484375, "learning_rate": 0.02, "loss": 1.6291, "step": 3240 }, { "epoch": 0.3000405022276225, "grad_norm": 0.1552734375, "learning_rate": 0.02, "loss": 1.6518, "step": 3241 }, { "epoch": 0.30013307874790257, "grad_norm": 0.1474609375, "learning_rate": 0.02, "loss": 1.6397, "step": 3242 }, { "epoch": 0.3002256552681826, "grad_norm": 0.1533203125, "learning_rate": 0.02, "loss": 1.6372, "step": 3243 }, { "epoch": 0.3003182317884627, "grad_norm": 0.134765625, "learning_rate": 0.02, "loss": 1.5555, "step": 3244 }, { "epoch": 0.30041080830874267, "grad_norm": 0.14453125, "learning_rate": 0.02, "loss": 1.6611, "step": 3245 }, { "epoch": 0.3005033848290227, "grad_norm": 0.138671875, "learning_rate": 0.02, "loss": 1.631, "step": 3246 }, { "epoch": 0.3005959613493028, "grad_norm": 0.1513671875, "learning_rate": 0.02, "loss": 1.6936, "step": 3247 }, { "epoch": 0.30068853786958283, "grad_norm": 0.140625, "learning_rate": 0.02, "loss": 1.5454, "step": 3248 }, { "epoch": 0.3007811143898629, "grad_norm": 0.154296875, "learning_rate": 0.02, "loss": 1.7128, "step": 3249 }, { "epoch": 0.30087369091014293, "grad_norm": 0.146484375, "learning_rate": 0.02, "loss": 1.6363, "step": 3250 }, { "epoch": 0.300966267430423, "grad_norm": 0.1494140625, "learning_rate": 0.02, "loss": 1.6148, "step": 3251 }, { "epoch": 0.301058843950703, "grad_norm": 0.1484375, "learning_rate": 0.02, "loss": 1.6271, "step": 3252 }, { "epoch": 0.30115142047098303, "grad_norm": 0.15234375, "learning_rate": 0.02, "loss": 1.6397, "step": 3253 }, { "epoch": 0.3012439969912631, "grad_norm": 0.1455078125, "learning_rate": 0.02, "loss": 1.6242, "step": 3254 }, { "epoch": 0.30133657351154314, "grad_norm": 0.162109375, "learning_rate": 0.02, "loss": 1.5787, "step": 3255 }, { "epoch": 0.3014291500318232, "grad_norm": 0.1396484375, "learning_rate": 0.02, "loss": 1.6559, "step": 3256 }, { "epoch": 0.30152172655210324, "grad_norm": 0.1494140625, "learning_rate": 0.02, "loss": 1.5733, "step": 3257 }, { "epoch": 0.3016143030723833, "grad_norm": 0.1484375, "learning_rate": 0.02, "loss": 1.6118, "step": 3258 }, { "epoch": 0.3017068795926633, "grad_norm": 0.1611328125, "learning_rate": 0.02, "loss": 1.6765, "step": 3259 }, { "epoch": 0.30179945611294334, "grad_norm": 0.1455078125, "learning_rate": 0.02, "loss": 1.6529, "step": 3260 }, { "epoch": 0.3018920326332234, "grad_norm": 0.1357421875, "learning_rate": 0.02, "loss": 1.5974, "step": 3261 }, { "epoch": 0.30198460915350345, "grad_norm": 0.1494140625, "learning_rate": 0.02, "loss": 1.6656, "step": 3262 }, { "epoch": 0.3020771856737835, "grad_norm": 0.1435546875, "learning_rate": 0.02, "loss": 1.6544, "step": 3263 }, { "epoch": 0.30216976219406355, "grad_norm": 0.14453125, "learning_rate": 0.02, "loss": 1.6942, "step": 3264 }, { "epoch": 0.30226233871434355, "grad_norm": 0.154296875, "learning_rate": 0.02, "loss": 1.6032, "step": 3265 }, { "epoch": 0.3023549152346236, "grad_norm": 0.1416015625, "learning_rate": 0.02, "loss": 1.5938, "step": 3266 }, { "epoch": 0.30244749175490365, "grad_norm": 0.15234375, "learning_rate": 0.02, "loss": 1.6453, "step": 3267 }, { "epoch": 0.3025400682751837, "grad_norm": 0.1474609375, "learning_rate": 0.02, "loss": 1.6159, "step": 3268 }, { "epoch": 0.30263264479546376, "grad_norm": 0.1572265625, "learning_rate": 0.02, "loss": 1.6854, "step": 3269 }, { "epoch": 0.3027252213157438, "grad_norm": 0.1494140625, "learning_rate": 0.02, "loss": 1.6284, "step": 3270 }, { "epoch": 0.30281779783602386, "grad_norm": 0.146484375, "learning_rate": 0.02, "loss": 1.6025, "step": 3271 }, { "epoch": 0.30291037435630386, "grad_norm": 0.142578125, "learning_rate": 0.02, "loss": 1.651, "step": 3272 }, { "epoch": 0.3030029508765839, "grad_norm": 0.142578125, "learning_rate": 0.02, "loss": 1.6148, "step": 3273 }, { "epoch": 0.30309552739686396, "grad_norm": 0.1513671875, "learning_rate": 0.02, "loss": 1.6254, "step": 3274 }, { "epoch": 0.303188103917144, "grad_norm": 0.154296875, "learning_rate": 0.02, "loss": 1.6396, "step": 3275 }, { "epoch": 0.30328068043742407, "grad_norm": 0.1474609375, "learning_rate": 0.02, "loss": 1.5783, "step": 3276 }, { "epoch": 0.3033732569577041, "grad_norm": 0.1455078125, "learning_rate": 0.02, "loss": 1.6279, "step": 3277 }, { "epoch": 0.30346583347798417, "grad_norm": 0.1513671875, "learning_rate": 0.02, "loss": 1.6059, "step": 3278 }, { "epoch": 0.30355840999826417, "grad_norm": 0.150390625, "learning_rate": 0.02, "loss": 1.6968, "step": 3279 }, { "epoch": 0.3036509865185442, "grad_norm": 0.1494140625, "learning_rate": 0.02, "loss": 1.647, "step": 3280 }, { "epoch": 0.30374356303882427, "grad_norm": 0.15234375, "learning_rate": 0.02, "loss": 1.6193, "step": 3281 }, { "epoch": 0.3038361395591043, "grad_norm": 0.15625, "learning_rate": 0.02, "loss": 1.7335, "step": 3282 }, { "epoch": 0.3039287160793844, "grad_norm": 0.1494140625, "learning_rate": 0.02, "loss": 1.6726, "step": 3283 }, { "epoch": 0.30402129259966443, "grad_norm": 0.16015625, "learning_rate": 0.02, "loss": 1.636, "step": 3284 }, { "epoch": 0.3041138691199445, "grad_norm": 0.166015625, "learning_rate": 0.02, "loss": 1.6922, "step": 3285 }, { "epoch": 0.3042064456402245, "grad_norm": 0.16015625, "learning_rate": 0.02, "loss": 1.6413, "step": 3286 }, { "epoch": 0.30429902216050453, "grad_norm": 0.14453125, "learning_rate": 0.02, "loss": 1.5575, "step": 3287 }, { "epoch": 0.3043915986807846, "grad_norm": 0.150390625, "learning_rate": 0.02, "loss": 1.6456, "step": 3288 }, { "epoch": 0.30448417520106463, "grad_norm": 0.15234375, "learning_rate": 0.02, "loss": 1.6726, "step": 3289 }, { "epoch": 0.3045767517213447, "grad_norm": 0.1640625, "learning_rate": 0.02, "loss": 1.7019, "step": 3290 }, { "epoch": 0.30466932824162474, "grad_norm": 0.142578125, "learning_rate": 0.02, "loss": 1.617, "step": 3291 }, { "epoch": 0.3047619047619048, "grad_norm": 0.15234375, "learning_rate": 0.02, "loss": 1.7183, "step": 3292 }, { "epoch": 0.3048544812821848, "grad_norm": 0.154296875, "learning_rate": 0.02, "loss": 1.6508, "step": 3293 }, { "epoch": 0.30494705780246484, "grad_norm": 0.158203125, "learning_rate": 0.02, "loss": 1.7104, "step": 3294 }, { "epoch": 0.3050396343227449, "grad_norm": 0.1513671875, "learning_rate": 0.02, "loss": 1.6996, "step": 3295 }, { "epoch": 0.30513221084302494, "grad_norm": 0.1435546875, "learning_rate": 0.02, "loss": 1.5764, "step": 3296 }, { "epoch": 0.305224787363305, "grad_norm": 0.142578125, "learning_rate": 0.02, "loss": 1.6836, "step": 3297 }, { "epoch": 0.30531736388358505, "grad_norm": 0.13671875, "learning_rate": 0.02, "loss": 1.5789, "step": 3298 }, { "epoch": 0.30540994040386504, "grad_norm": 0.1376953125, "learning_rate": 0.02, "loss": 1.6352, "step": 3299 }, { "epoch": 0.3055025169241451, "grad_norm": 0.1337890625, "learning_rate": 0.02, "loss": 1.592, "step": 3300 }, { "epoch": 0.30559509344442515, "grad_norm": 0.1455078125, "learning_rate": 0.02, "loss": 1.6561, "step": 3301 }, { "epoch": 0.3056876699647052, "grad_norm": 0.1396484375, "learning_rate": 0.02, "loss": 1.6149, "step": 3302 }, { "epoch": 0.30578024648498525, "grad_norm": 0.14453125, "learning_rate": 0.02, "loss": 1.6394, "step": 3303 }, { "epoch": 0.3058728230052653, "grad_norm": 0.1494140625, "learning_rate": 0.02, "loss": 1.6654, "step": 3304 }, { "epoch": 0.30596539952554536, "grad_norm": 0.1494140625, "learning_rate": 0.02, "loss": 1.6923, "step": 3305 }, { "epoch": 0.30605797604582535, "grad_norm": 0.1455078125, "learning_rate": 0.02, "loss": 1.5938, "step": 3306 }, { "epoch": 0.3061505525661054, "grad_norm": 0.134765625, "learning_rate": 0.02, "loss": 1.5949, "step": 3307 }, { "epoch": 0.30624312908638546, "grad_norm": 0.146484375, "learning_rate": 0.02, "loss": 1.5991, "step": 3308 }, { "epoch": 0.3063357056066655, "grad_norm": 0.146484375, "learning_rate": 0.02, "loss": 1.5976, "step": 3309 }, { "epoch": 0.30642828212694556, "grad_norm": 0.1494140625, "learning_rate": 0.02, "loss": 1.6423, "step": 3310 }, { "epoch": 0.3065208586472256, "grad_norm": 0.1376953125, "learning_rate": 0.02, "loss": 1.6318, "step": 3311 }, { "epoch": 0.30661343516750567, "grad_norm": 0.14453125, "learning_rate": 0.02, "loss": 1.6725, "step": 3312 }, { "epoch": 0.30670601168778566, "grad_norm": 0.14453125, "learning_rate": 0.02, "loss": 1.6234, "step": 3313 }, { "epoch": 0.3067985882080657, "grad_norm": 0.140625, "learning_rate": 0.02, "loss": 1.569, "step": 3314 }, { "epoch": 0.30689116472834577, "grad_norm": 0.140625, "learning_rate": 0.02, "loss": 1.6449, "step": 3315 }, { "epoch": 0.3069837412486258, "grad_norm": 0.1416015625, "learning_rate": 0.02, "loss": 1.6356, "step": 3316 }, { "epoch": 0.30707631776890587, "grad_norm": 0.1435546875, "learning_rate": 0.02, "loss": 1.6449, "step": 3317 }, { "epoch": 0.3071688942891859, "grad_norm": 0.142578125, "learning_rate": 0.02, "loss": 1.5654, "step": 3318 }, { "epoch": 0.307261470809466, "grad_norm": 0.140625, "learning_rate": 0.02, "loss": 1.616, "step": 3319 }, { "epoch": 0.30735404732974597, "grad_norm": 0.1533203125, "learning_rate": 0.02, "loss": 1.623, "step": 3320 }, { "epoch": 0.307446623850026, "grad_norm": 0.1455078125, "learning_rate": 0.02, "loss": 1.7176, "step": 3321 }, { "epoch": 0.3075392003703061, "grad_norm": 0.1494140625, "learning_rate": 0.02, "loss": 1.6575, "step": 3322 }, { "epoch": 0.30763177689058613, "grad_norm": 0.142578125, "learning_rate": 0.02, "loss": 1.5491, "step": 3323 }, { "epoch": 0.3077243534108662, "grad_norm": 0.1474609375, "learning_rate": 0.02, "loss": 1.5852, "step": 3324 }, { "epoch": 0.30781692993114623, "grad_norm": 0.1474609375, "learning_rate": 0.02, "loss": 1.6261, "step": 3325 }, { "epoch": 0.3079095064514263, "grad_norm": 0.140625, "learning_rate": 0.02, "loss": 1.622, "step": 3326 }, { "epoch": 0.3080020829717063, "grad_norm": 0.1435546875, "learning_rate": 0.02, "loss": 1.6383, "step": 3327 }, { "epoch": 0.30809465949198633, "grad_norm": 0.1416015625, "learning_rate": 0.02, "loss": 1.6769, "step": 3328 }, { "epoch": 0.3081872360122664, "grad_norm": 0.1474609375, "learning_rate": 0.02, "loss": 1.6652, "step": 3329 }, { "epoch": 0.30827981253254644, "grad_norm": 0.1513671875, "learning_rate": 0.02, "loss": 1.6284, "step": 3330 }, { "epoch": 0.3083723890528265, "grad_norm": 0.14453125, "learning_rate": 0.02, "loss": 1.6718, "step": 3331 }, { "epoch": 0.30846496557310654, "grad_norm": 0.138671875, "learning_rate": 0.02, "loss": 1.669, "step": 3332 }, { "epoch": 0.30855754209338654, "grad_norm": 0.140625, "learning_rate": 0.02, "loss": 1.583, "step": 3333 }, { "epoch": 0.3086501186136666, "grad_norm": 0.1396484375, "learning_rate": 0.02, "loss": 1.6543, "step": 3334 }, { "epoch": 0.30874269513394664, "grad_norm": 0.1435546875, "learning_rate": 0.02, "loss": 1.5555, "step": 3335 }, { "epoch": 0.3088352716542267, "grad_norm": 0.142578125, "learning_rate": 0.02, "loss": 1.6438, "step": 3336 }, { "epoch": 0.30892784817450675, "grad_norm": 0.1357421875, "learning_rate": 0.02, "loss": 1.5781, "step": 3337 }, { "epoch": 0.3090204246947868, "grad_norm": 0.142578125, "learning_rate": 0.02, "loss": 1.6306, "step": 3338 }, { "epoch": 0.30911300121506685, "grad_norm": 0.15234375, "learning_rate": 0.02, "loss": 1.6629, "step": 3339 }, { "epoch": 0.30920557773534685, "grad_norm": 0.14453125, "learning_rate": 0.02, "loss": 1.6566, "step": 3340 }, { "epoch": 0.3092981542556269, "grad_norm": 0.1435546875, "learning_rate": 0.02, "loss": 1.6189, "step": 3341 }, { "epoch": 0.30939073077590695, "grad_norm": 0.15625, "learning_rate": 0.02, "loss": 1.61, "step": 3342 }, { "epoch": 0.309483307296187, "grad_norm": 0.1669921875, "learning_rate": 0.02, "loss": 1.6632, "step": 3343 }, { "epoch": 0.30957588381646706, "grad_norm": 0.1533203125, "learning_rate": 0.02, "loss": 1.5994, "step": 3344 }, { "epoch": 0.3096684603367471, "grad_norm": 0.1416015625, "learning_rate": 0.02, "loss": 1.6619, "step": 3345 }, { "epoch": 0.30976103685702716, "grad_norm": 0.1455078125, "learning_rate": 0.02, "loss": 1.6666, "step": 3346 }, { "epoch": 0.30985361337730716, "grad_norm": 0.1513671875, "learning_rate": 0.02, "loss": 1.6267, "step": 3347 }, { "epoch": 0.3099461898975872, "grad_norm": 0.1474609375, "learning_rate": 0.02, "loss": 1.6621, "step": 3348 }, { "epoch": 0.31003876641786726, "grad_norm": 0.1533203125, "learning_rate": 0.02, "loss": 1.5985, "step": 3349 }, { "epoch": 0.3101313429381473, "grad_norm": 0.1513671875, "learning_rate": 0.02, "loss": 1.6961, "step": 3350 }, { "epoch": 0.31022391945842737, "grad_norm": 0.146484375, "learning_rate": 0.02, "loss": 1.6537, "step": 3351 }, { "epoch": 0.3103164959787074, "grad_norm": 0.1416015625, "learning_rate": 0.02, "loss": 1.6215, "step": 3352 }, { "epoch": 0.31040907249898747, "grad_norm": 0.154296875, "learning_rate": 0.02, "loss": 1.6499, "step": 3353 }, { "epoch": 0.31050164901926747, "grad_norm": 0.1455078125, "learning_rate": 0.02, "loss": 1.603, "step": 3354 }, { "epoch": 0.3105942255395475, "grad_norm": 0.1494140625, "learning_rate": 0.02, "loss": 1.589, "step": 3355 }, { "epoch": 0.31068680205982757, "grad_norm": 0.146484375, "learning_rate": 0.02, "loss": 1.64, "step": 3356 }, { "epoch": 0.3107793785801076, "grad_norm": 0.1572265625, "learning_rate": 0.02, "loss": 1.6171, "step": 3357 }, { "epoch": 0.3108719551003877, "grad_norm": 0.1484375, "learning_rate": 0.02, "loss": 1.5694, "step": 3358 }, { "epoch": 0.31096453162066773, "grad_norm": 0.1455078125, "learning_rate": 0.02, "loss": 1.6401, "step": 3359 }, { "epoch": 0.3110571081409477, "grad_norm": 0.1494140625, "learning_rate": 0.02, "loss": 1.6859, "step": 3360 }, { "epoch": 0.3111496846612278, "grad_norm": 0.13671875, "learning_rate": 0.02, "loss": 1.6235, "step": 3361 }, { "epoch": 0.31124226118150783, "grad_norm": 0.1416015625, "learning_rate": 0.02, "loss": 1.6298, "step": 3362 }, { "epoch": 0.3113348377017879, "grad_norm": 0.1435546875, "learning_rate": 0.02, "loss": 1.5688, "step": 3363 }, { "epoch": 0.31142741422206793, "grad_norm": 0.1396484375, "learning_rate": 0.02, "loss": 1.6668, "step": 3364 }, { "epoch": 0.311519990742348, "grad_norm": 0.13671875, "learning_rate": 0.02, "loss": 1.6217, "step": 3365 }, { "epoch": 0.31161256726262804, "grad_norm": 0.14453125, "learning_rate": 0.02, "loss": 1.6841, "step": 3366 }, { "epoch": 0.31170514378290803, "grad_norm": 0.1435546875, "learning_rate": 0.02, "loss": 1.6522, "step": 3367 }, { "epoch": 0.3117977203031881, "grad_norm": 0.1328125, "learning_rate": 0.02, "loss": 1.6177, "step": 3368 }, { "epoch": 0.31189029682346814, "grad_norm": 0.1416015625, "learning_rate": 0.02, "loss": 1.6097, "step": 3369 }, { "epoch": 0.3119828733437482, "grad_norm": 0.1455078125, "learning_rate": 0.02, "loss": 1.6387, "step": 3370 }, { "epoch": 0.31207544986402824, "grad_norm": 0.142578125, "learning_rate": 0.02, "loss": 1.6277, "step": 3371 }, { "epoch": 0.3121680263843083, "grad_norm": 0.1416015625, "learning_rate": 0.02, "loss": 1.6893, "step": 3372 }, { "epoch": 0.31226060290458835, "grad_norm": 0.1396484375, "learning_rate": 0.02, "loss": 1.5554, "step": 3373 }, { "epoch": 0.31235317942486834, "grad_norm": 0.140625, "learning_rate": 0.02, "loss": 1.5794, "step": 3374 }, { "epoch": 0.3124457559451484, "grad_norm": 0.1435546875, "learning_rate": 0.02, "loss": 1.5752, "step": 3375 }, { "epoch": 0.31253833246542845, "grad_norm": 0.146484375, "learning_rate": 0.02, "loss": 1.6252, "step": 3376 }, { "epoch": 0.3126309089857085, "grad_norm": 0.1494140625, "learning_rate": 0.02, "loss": 1.6201, "step": 3377 }, { "epoch": 0.31272348550598855, "grad_norm": 0.15234375, "learning_rate": 0.02, "loss": 1.672, "step": 3378 }, { "epoch": 0.3128160620262686, "grad_norm": 0.1591796875, "learning_rate": 0.02, "loss": 1.6504, "step": 3379 }, { "epoch": 0.31290863854654866, "grad_norm": 0.162109375, "learning_rate": 0.02, "loss": 1.5595, "step": 3380 }, { "epoch": 0.31300121506682865, "grad_norm": 0.16015625, "learning_rate": 0.02, "loss": 1.6157, "step": 3381 }, { "epoch": 0.3130937915871087, "grad_norm": 0.166015625, "learning_rate": 0.02, "loss": 1.7263, "step": 3382 }, { "epoch": 0.31318636810738876, "grad_norm": 0.1650390625, "learning_rate": 0.02, "loss": 1.6506, "step": 3383 }, { "epoch": 0.3132789446276688, "grad_norm": 0.1640625, "learning_rate": 0.02, "loss": 1.6336, "step": 3384 }, { "epoch": 0.31337152114794886, "grad_norm": 0.1572265625, "learning_rate": 0.02, "loss": 1.6702, "step": 3385 }, { "epoch": 0.3134640976682289, "grad_norm": 0.1669921875, "learning_rate": 0.02, "loss": 1.69, "step": 3386 }, { "epoch": 0.31355667418850897, "grad_norm": 0.1513671875, "learning_rate": 0.02, "loss": 1.6635, "step": 3387 }, { "epoch": 0.31364925070878896, "grad_norm": 0.15234375, "learning_rate": 0.02, "loss": 1.673, "step": 3388 }, { "epoch": 0.313741827229069, "grad_norm": 0.1484375, "learning_rate": 0.02, "loss": 1.6639, "step": 3389 }, { "epoch": 0.31383440374934907, "grad_norm": 0.1552734375, "learning_rate": 0.02, "loss": 1.5989, "step": 3390 }, { "epoch": 0.3139269802696291, "grad_norm": 0.1416015625, "learning_rate": 0.02, "loss": 1.6657, "step": 3391 }, { "epoch": 0.31401955678990917, "grad_norm": 0.158203125, "learning_rate": 0.02, "loss": 1.646, "step": 3392 }, { "epoch": 0.3141121333101892, "grad_norm": 0.15625, "learning_rate": 0.02, "loss": 1.6502, "step": 3393 }, { "epoch": 0.3142047098304692, "grad_norm": 0.1513671875, "learning_rate": 0.02, "loss": 1.5719, "step": 3394 }, { "epoch": 0.31429728635074927, "grad_norm": 0.1552734375, "learning_rate": 0.02, "loss": 1.6369, "step": 3395 }, { "epoch": 0.3143898628710293, "grad_norm": 0.1474609375, "learning_rate": 0.02, "loss": 1.5653, "step": 3396 }, { "epoch": 0.3144824393913094, "grad_norm": 0.1474609375, "learning_rate": 0.02, "loss": 1.6197, "step": 3397 }, { "epoch": 0.31457501591158943, "grad_norm": 0.14453125, "learning_rate": 0.02, "loss": 1.5871, "step": 3398 }, { "epoch": 0.3146675924318695, "grad_norm": 0.1533203125, "learning_rate": 0.02, "loss": 1.633, "step": 3399 }, { "epoch": 0.31476016895214953, "grad_norm": 0.146484375, "learning_rate": 0.02, "loss": 1.6056, "step": 3400 }, { "epoch": 0.31485274547242953, "grad_norm": 0.1484375, "learning_rate": 0.02, "loss": 1.5768, "step": 3401 }, { "epoch": 0.3149453219927096, "grad_norm": 0.1494140625, "learning_rate": 0.02, "loss": 1.6338, "step": 3402 }, { "epoch": 0.31503789851298963, "grad_norm": 0.1474609375, "learning_rate": 0.02, "loss": 1.6571, "step": 3403 }, { "epoch": 0.3151304750332697, "grad_norm": 0.1494140625, "learning_rate": 0.02, "loss": 1.6316, "step": 3404 }, { "epoch": 0.31522305155354974, "grad_norm": 0.16015625, "learning_rate": 0.02, "loss": 1.6412, "step": 3405 }, { "epoch": 0.3153156280738298, "grad_norm": 0.1572265625, "learning_rate": 0.02, "loss": 1.6724, "step": 3406 }, { "epoch": 0.31540820459410984, "grad_norm": 0.1513671875, "learning_rate": 0.02, "loss": 1.6106, "step": 3407 }, { "epoch": 0.31550078111438984, "grad_norm": 0.1533203125, "learning_rate": 0.02, "loss": 1.704, "step": 3408 }, { "epoch": 0.3155933576346699, "grad_norm": 0.1533203125, "learning_rate": 0.02, "loss": 1.6396, "step": 3409 }, { "epoch": 0.31568593415494994, "grad_norm": 0.1455078125, "learning_rate": 0.02, "loss": 1.621, "step": 3410 }, { "epoch": 0.31577851067523, "grad_norm": 0.150390625, "learning_rate": 0.02, "loss": 1.6133, "step": 3411 }, { "epoch": 0.31587108719551005, "grad_norm": 0.1484375, "learning_rate": 0.02, "loss": 1.6351, "step": 3412 }, { "epoch": 0.3159636637157901, "grad_norm": 0.150390625, "learning_rate": 0.02, "loss": 1.6477, "step": 3413 }, { "epoch": 0.31605624023607015, "grad_norm": 0.1474609375, "learning_rate": 0.02, "loss": 1.6309, "step": 3414 }, { "epoch": 0.31614881675635015, "grad_norm": 0.140625, "learning_rate": 0.02, "loss": 1.6207, "step": 3415 }, { "epoch": 0.3162413932766302, "grad_norm": 0.1376953125, "learning_rate": 0.02, "loss": 1.624, "step": 3416 }, { "epoch": 0.31633396979691025, "grad_norm": 0.1474609375, "learning_rate": 0.02, "loss": 1.6112, "step": 3417 }, { "epoch": 0.3164265463171903, "grad_norm": 0.1455078125, "learning_rate": 0.02, "loss": 1.6216, "step": 3418 }, { "epoch": 0.31651912283747036, "grad_norm": 0.1435546875, "learning_rate": 0.02, "loss": 1.6124, "step": 3419 }, { "epoch": 0.3166116993577504, "grad_norm": 0.1455078125, "learning_rate": 0.02, "loss": 1.6625, "step": 3420 }, { "epoch": 0.31670427587803046, "grad_norm": 0.15234375, "learning_rate": 0.02, "loss": 1.6671, "step": 3421 }, { "epoch": 0.31679685239831046, "grad_norm": 0.146484375, "learning_rate": 0.02, "loss": 1.5982, "step": 3422 }, { "epoch": 0.3168894289185905, "grad_norm": 0.1533203125, "learning_rate": 0.02, "loss": 1.5931, "step": 3423 }, { "epoch": 0.31698200543887056, "grad_norm": 0.154296875, "learning_rate": 0.02, "loss": 1.5872, "step": 3424 }, { "epoch": 0.3170745819591506, "grad_norm": 0.15234375, "learning_rate": 0.02, "loss": 1.6526, "step": 3425 }, { "epoch": 0.31716715847943067, "grad_norm": 0.1494140625, "learning_rate": 0.02, "loss": 1.577, "step": 3426 }, { "epoch": 0.3172597349997107, "grad_norm": 0.1435546875, "learning_rate": 0.02, "loss": 1.6219, "step": 3427 }, { "epoch": 0.3173523115199907, "grad_norm": 0.1474609375, "learning_rate": 0.02, "loss": 1.609, "step": 3428 }, { "epoch": 0.31744488804027077, "grad_norm": 0.1513671875, "learning_rate": 0.02, "loss": 1.6502, "step": 3429 }, { "epoch": 0.3175374645605508, "grad_norm": 0.1572265625, "learning_rate": 0.02, "loss": 1.6508, "step": 3430 }, { "epoch": 0.31763004108083087, "grad_norm": 0.1474609375, "learning_rate": 0.02, "loss": 1.6173, "step": 3431 }, { "epoch": 0.3177226176011109, "grad_norm": 0.1513671875, "learning_rate": 0.02, "loss": 1.6465, "step": 3432 }, { "epoch": 0.317815194121391, "grad_norm": 0.146484375, "learning_rate": 0.02, "loss": 1.5968, "step": 3433 }, { "epoch": 0.31790777064167103, "grad_norm": 0.1474609375, "learning_rate": 0.02, "loss": 1.6194, "step": 3434 }, { "epoch": 0.318000347161951, "grad_norm": 0.138671875, "learning_rate": 0.02, "loss": 1.6008, "step": 3435 }, { "epoch": 0.3180929236822311, "grad_norm": 0.15625, "learning_rate": 0.02, "loss": 1.6365, "step": 3436 }, { "epoch": 0.31818550020251113, "grad_norm": 0.1513671875, "learning_rate": 0.02, "loss": 1.6135, "step": 3437 }, { "epoch": 0.3182780767227912, "grad_norm": 0.1533203125, "learning_rate": 0.02, "loss": 1.6052, "step": 3438 }, { "epoch": 0.31837065324307123, "grad_norm": 0.150390625, "learning_rate": 0.02, "loss": 1.5808, "step": 3439 }, { "epoch": 0.3184632297633513, "grad_norm": 0.1533203125, "learning_rate": 0.02, "loss": 1.6795, "step": 3440 }, { "epoch": 0.31855580628363134, "grad_norm": 0.1484375, "learning_rate": 0.02, "loss": 1.6343, "step": 3441 }, { "epoch": 0.31864838280391133, "grad_norm": 0.1513671875, "learning_rate": 0.02, "loss": 1.6497, "step": 3442 }, { "epoch": 0.3187409593241914, "grad_norm": 0.1474609375, "learning_rate": 0.02, "loss": 1.6385, "step": 3443 }, { "epoch": 0.31883353584447144, "grad_norm": 0.1455078125, "learning_rate": 0.02, "loss": 1.5686, "step": 3444 }, { "epoch": 0.3189261123647515, "grad_norm": 0.1494140625, "learning_rate": 0.02, "loss": 1.6257, "step": 3445 }, { "epoch": 0.31901868888503154, "grad_norm": 0.15234375, "learning_rate": 0.02, "loss": 1.6117, "step": 3446 }, { "epoch": 0.3191112654053116, "grad_norm": 0.1572265625, "learning_rate": 0.02, "loss": 1.6358, "step": 3447 }, { "epoch": 0.31920384192559165, "grad_norm": 0.16015625, "learning_rate": 0.02, "loss": 1.6852, "step": 3448 }, { "epoch": 0.31929641844587164, "grad_norm": 0.1474609375, "learning_rate": 0.02, "loss": 1.6323, "step": 3449 }, { "epoch": 0.3193889949661517, "grad_norm": 0.14453125, "learning_rate": 0.02, "loss": 1.6015, "step": 3450 }, { "epoch": 0.31948157148643175, "grad_norm": 0.150390625, "learning_rate": 0.02, "loss": 1.6646, "step": 3451 }, { "epoch": 0.3195741480067118, "grad_norm": 0.1513671875, "learning_rate": 0.02, "loss": 1.6323, "step": 3452 }, { "epoch": 0.31966672452699185, "grad_norm": 0.1484375, "learning_rate": 0.02, "loss": 1.6481, "step": 3453 }, { "epoch": 0.3197593010472719, "grad_norm": 0.15234375, "learning_rate": 0.02, "loss": 1.5606, "step": 3454 }, { "epoch": 0.31985187756755196, "grad_norm": 0.150390625, "learning_rate": 0.02, "loss": 1.6365, "step": 3455 }, { "epoch": 0.31994445408783195, "grad_norm": 0.154296875, "learning_rate": 0.02, "loss": 1.5948, "step": 3456 }, { "epoch": 0.320037030608112, "grad_norm": 0.150390625, "learning_rate": 0.02, "loss": 1.6112, "step": 3457 }, { "epoch": 0.32012960712839206, "grad_norm": 0.150390625, "learning_rate": 0.02, "loss": 1.6261, "step": 3458 }, { "epoch": 0.3202221836486721, "grad_norm": 0.1533203125, "learning_rate": 0.02, "loss": 1.6163, "step": 3459 }, { "epoch": 0.32031476016895216, "grad_norm": 0.1474609375, "learning_rate": 0.02, "loss": 1.6378, "step": 3460 }, { "epoch": 0.3204073366892322, "grad_norm": 0.1513671875, "learning_rate": 0.02, "loss": 1.6249, "step": 3461 }, { "epoch": 0.3204999132095122, "grad_norm": 0.138671875, "learning_rate": 0.02, "loss": 1.616, "step": 3462 }, { "epoch": 0.32059248972979226, "grad_norm": 0.1416015625, "learning_rate": 0.02, "loss": 1.6481, "step": 3463 }, { "epoch": 0.3206850662500723, "grad_norm": 0.1337890625, "learning_rate": 0.02, "loss": 1.6064, "step": 3464 }, { "epoch": 0.32077764277035237, "grad_norm": 0.146484375, "learning_rate": 0.02, "loss": 1.6112, "step": 3465 }, { "epoch": 0.3208702192906324, "grad_norm": 0.1435546875, "learning_rate": 0.02, "loss": 1.5945, "step": 3466 }, { "epoch": 0.32096279581091247, "grad_norm": 0.1474609375, "learning_rate": 0.02, "loss": 1.5334, "step": 3467 }, { "epoch": 0.3210553723311925, "grad_norm": 0.1474609375, "learning_rate": 0.02, "loss": 1.5626, "step": 3468 }, { "epoch": 0.3211479488514725, "grad_norm": 0.1650390625, "learning_rate": 0.02, "loss": 1.6743, "step": 3469 }, { "epoch": 0.32124052537175257, "grad_norm": 0.1455078125, "learning_rate": 0.02, "loss": 1.6488, "step": 3470 }, { "epoch": 0.3213331018920326, "grad_norm": 0.1513671875, "learning_rate": 0.02, "loss": 1.625, "step": 3471 }, { "epoch": 0.3214256784123127, "grad_norm": 0.15234375, "learning_rate": 0.02, "loss": 1.6444, "step": 3472 }, { "epoch": 0.32151825493259273, "grad_norm": 0.150390625, "learning_rate": 0.02, "loss": 1.6116, "step": 3473 }, { "epoch": 0.3216108314528728, "grad_norm": 0.14453125, "learning_rate": 0.02, "loss": 1.6154, "step": 3474 }, { "epoch": 0.32170340797315283, "grad_norm": 0.15625, "learning_rate": 0.02, "loss": 1.6158, "step": 3475 }, { "epoch": 0.32179598449343283, "grad_norm": 0.1484375, "learning_rate": 0.02, "loss": 1.6474, "step": 3476 }, { "epoch": 0.3218885610137129, "grad_norm": 0.154296875, "learning_rate": 0.02, "loss": 1.6261, "step": 3477 }, { "epoch": 0.32198113753399293, "grad_norm": 0.1494140625, "learning_rate": 0.02, "loss": 1.64, "step": 3478 }, { "epoch": 0.322073714054273, "grad_norm": 0.146484375, "learning_rate": 0.02, "loss": 1.5817, "step": 3479 }, { "epoch": 0.32216629057455304, "grad_norm": 0.1494140625, "learning_rate": 0.02, "loss": 1.6878, "step": 3480 }, { "epoch": 0.3222588670948331, "grad_norm": 0.16015625, "learning_rate": 0.02, "loss": 1.7114, "step": 3481 }, { "epoch": 0.32235144361511314, "grad_norm": 0.154296875, "learning_rate": 0.02, "loss": 1.6221, "step": 3482 }, { "epoch": 0.32244402013539314, "grad_norm": 0.15625, "learning_rate": 0.02, "loss": 1.6737, "step": 3483 }, { "epoch": 0.3225365966556732, "grad_norm": 0.15625, "learning_rate": 0.02, "loss": 1.6348, "step": 3484 }, { "epoch": 0.32262917317595324, "grad_norm": 0.15234375, "learning_rate": 0.02, "loss": 1.6341, "step": 3485 }, { "epoch": 0.3227217496962333, "grad_norm": 0.1591796875, "learning_rate": 0.02, "loss": 1.6601, "step": 3486 }, { "epoch": 0.32281432621651335, "grad_norm": 0.14453125, "learning_rate": 0.02, "loss": 1.7011, "step": 3487 }, { "epoch": 0.3229069027367934, "grad_norm": 0.1513671875, "learning_rate": 0.02, "loss": 1.6072, "step": 3488 }, { "epoch": 0.32299947925707345, "grad_norm": 0.15234375, "learning_rate": 0.02, "loss": 1.6238, "step": 3489 }, { "epoch": 0.32309205577735345, "grad_norm": 0.1513671875, "learning_rate": 0.02, "loss": 1.6359, "step": 3490 }, { "epoch": 0.3231846322976335, "grad_norm": 0.1435546875, "learning_rate": 0.02, "loss": 1.6213, "step": 3491 }, { "epoch": 0.32327720881791355, "grad_norm": 0.162109375, "learning_rate": 0.02, "loss": 1.6555, "step": 3492 }, { "epoch": 0.3233697853381936, "grad_norm": 0.1455078125, "learning_rate": 0.02, "loss": 1.6087, "step": 3493 }, { "epoch": 0.32346236185847366, "grad_norm": 0.1494140625, "learning_rate": 0.02, "loss": 1.6881, "step": 3494 }, { "epoch": 0.3235549383787537, "grad_norm": 0.1484375, "learning_rate": 0.02, "loss": 1.6306, "step": 3495 }, { "epoch": 0.3236475148990337, "grad_norm": 0.134765625, "learning_rate": 0.02, "loss": 1.6663, "step": 3496 }, { "epoch": 0.32374009141931376, "grad_norm": 0.1484375, "learning_rate": 0.02, "loss": 1.6182, "step": 3497 }, { "epoch": 0.3238326679395938, "grad_norm": 0.1484375, "learning_rate": 0.02, "loss": 1.6129, "step": 3498 }, { "epoch": 0.32392524445987386, "grad_norm": 0.1396484375, "learning_rate": 0.02, "loss": 1.532, "step": 3499 }, { "epoch": 0.3240178209801539, "grad_norm": 0.1455078125, "learning_rate": 0.02, "loss": 1.6481, "step": 3500 }, { "epoch": 0.32411039750043397, "grad_norm": 0.1513671875, "learning_rate": 0.02, "loss": 1.583, "step": 3501 }, { "epoch": 0.324202974020714, "grad_norm": 0.13671875, "learning_rate": 0.02, "loss": 1.6264, "step": 3502 }, { "epoch": 0.324295550540994, "grad_norm": 0.140625, "learning_rate": 0.02, "loss": 1.653, "step": 3503 }, { "epoch": 0.32438812706127407, "grad_norm": 0.1494140625, "learning_rate": 0.02, "loss": 1.6324, "step": 3504 }, { "epoch": 0.3244807035815541, "grad_norm": 0.140625, "learning_rate": 0.02, "loss": 1.6921, "step": 3505 }, { "epoch": 0.32457328010183417, "grad_norm": 0.142578125, "learning_rate": 0.02, "loss": 1.6196, "step": 3506 }, { "epoch": 0.3246658566221142, "grad_norm": 0.1357421875, "learning_rate": 0.02, "loss": 1.6684, "step": 3507 }, { "epoch": 0.3247584331423943, "grad_norm": 0.1376953125, "learning_rate": 0.02, "loss": 1.516, "step": 3508 }, { "epoch": 0.32485100966267433, "grad_norm": 0.1357421875, "learning_rate": 0.02, "loss": 1.5396, "step": 3509 }, { "epoch": 0.3249435861829543, "grad_norm": 0.1416015625, "learning_rate": 0.02, "loss": 1.662, "step": 3510 }, { "epoch": 0.3250361627032344, "grad_norm": 0.146484375, "learning_rate": 0.02, "loss": 1.6509, "step": 3511 }, { "epoch": 0.32512873922351443, "grad_norm": 0.1435546875, "learning_rate": 0.02, "loss": 1.5799, "step": 3512 }, { "epoch": 0.3252213157437945, "grad_norm": 0.1396484375, "learning_rate": 0.02, "loss": 1.6464, "step": 3513 }, { "epoch": 0.32531389226407453, "grad_norm": 0.142578125, "learning_rate": 0.02, "loss": 1.6565, "step": 3514 }, { "epoch": 0.3254064687843546, "grad_norm": 0.1484375, "learning_rate": 0.02, "loss": 1.653, "step": 3515 }, { "epoch": 0.32549904530463464, "grad_norm": 0.1435546875, "learning_rate": 0.02, "loss": 1.6026, "step": 3516 }, { "epoch": 0.32559162182491463, "grad_norm": 0.1533203125, "learning_rate": 0.02, "loss": 1.6132, "step": 3517 }, { "epoch": 0.3256841983451947, "grad_norm": 0.1591796875, "learning_rate": 0.02, "loss": 1.6225, "step": 3518 }, { "epoch": 0.32577677486547474, "grad_norm": 0.1552734375, "learning_rate": 0.02, "loss": 1.6452, "step": 3519 }, { "epoch": 0.3258693513857548, "grad_norm": 0.1513671875, "learning_rate": 0.02, "loss": 1.6392, "step": 3520 }, { "epoch": 0.32596192790603484, "grad_norm": 0.1474609375, "learning_rate": 0.02, "loss": 1.6246, "step": 3521 }, { "epoch": 0.3260545044263149, "grad_norm": 0.1494140625, "learning_rate": 0.02, "loss": 1.6611, "step": 3522 }, { "epoch": 0.32614708094659495, "grad_norm": 0.1474609375, "learning_rate": 0.02, "loss": 1.6143, "step": 3523 }, { "epoch": 0.32623965746687494, "grad_norm": 0.1416015625, "learning_rate": 0.02, "loss": 1.6248, "step": 3524 }, { "epoch": 0.326332233987155, "grad_norm": 0.154296875, "learning_rate": 0.02, "loss": 1.6398, "step": 3525 }, { "epoch": 0.32642481050743505, "grad_norm": 0.140625, "learning_rate": 0.02, "loss": 1.5938, "step": 3526 }, { "epoch": 0.3265173870277151, "grad_norm": 0.1435546875, "learning_rate": 0.02, "loss": 1.5937, "step": 3527 }, { "epoch": 0.32660996354799515, "grad_norm": 0.1416015625, "learning_rate": 0.02, "loss": 1.611, "step": 3528 }, { "epoch": 0.3267025400682752, "grad_norm": 0.14453125, "learning_rate": 0.02, "loss": 1.6384, "step": 3529 }, { "epoch": 0.3267951165885552, "grad_norm": 0.1376953125, "learning_rate": 0.02, "loss": 1.6031, "step": 3530 }, { "epoch": 0.32688769310883525, "grad_norm": 0.140625, "learning_rate": 0.02, "loss": 1.6222, "step": 3531 }, { "epoch": 0.3269802696291153, "grad_norm": 0.1474609375, "learning_rate": 0.02, "loss": 1.6251, "step": 3532 }, { "epoch": 0.32707284614939536, "grad_norm": 0.1484375, "learning_rate": 0.02, "loss": 1.6246, "step": 3533 }, { "epoch": 0.3271654226696754, "grad_norm": 0.142578125, "learning_rate": 0.02, "loss": 1.6047, "step": 3534 }, { "epoch": 0.32725799918995546, "grad_norm": 0.1396484375, "learning_rate": 0.02, "loss": 1.6059, "step": 3535 }, { "epoch": 0.3273505757102355, "grad_norm": 0.1376953125, "learning_rate": 0.02, "loss": 1.5769, "step": 3536 }, { "epoch": 0.3274431522305155, "grad_norm": 0.1474609375, "learning_rate": 0.02, "loss": 1.6778, "step": 3537 }, { "epoch": 0.32753572875079556, "grad_norm": 0.1513671875, "learning_rate": 0.02, "loss": 1.6636, "step": 3538 }, { "epoch": 0.3276283052710756, "grad_norm": 0.1484375, "learning_rate": 0.02, "loss": 1.6217, "step": 3539 }, { "epoch": 0.32772088179135567, "grad_norm": 0.150390625, "learning_rate": 0.02, "loss": 1.6622, "step": 3540 }, { "epoch": 0.3278134583116357, "grad_norm": 0.1298828125, "learning_rate": 0.02, "loss": 1.594, "step": 3541 }, { "epoch": 0.32790603483191577, "grad_norm": 0.1416015625, "learning_rate": 0.02, "loss": 1.5466, "step": 3542 }, { "epoch": 0.3279986113521958, "grad_norm": 0.150390625, "learning_rate": 0.02, "loss": 1.6406, "step": 3543 }, { "epoch": 0.3280911878724758, "grad_norm": 0.158203125, "learning_rate": 0.02, "loss": 1.7219, "step": 3544 }, { "epoch": 0.3281837643927559, "grad_norm": 0.14453125, "learning_rate": 0.02, "loss": 1.5939, "step": 3545 }, { "epoch": 0.3282763409130359, "grad_norm": 0.1513671875, "learning_rate": 0.02, "loss": 1.5639, "step": 3546 }, { "epoch": 0.328368917433316, "grad_norm": 0.15234375, "learning_rate": 0.02, "loss": 1.5972, "step": 3547 }, { "epoch": 0.32846149395359603, "grad_norm": 0.1552734375, "learning_rate": 0.02, "loss": 1.5744, "step": 3548 }, { "epoch": 0.3285540704738761, "grad_norm": 0.1513671875, "learning_rate": 0.02, "loss": 1.6138, "step": 3549 }, { "epoch": 0.32864664699415613, "grad_norm": 0.1572265625, "learning_rate": 0.02, "loss": 1.6863, "step": 3550 }, { "epoch": 0.32873922351443613, "grad_norm": 0.16015625, "learning_rate": 0.02, "loss": 1.6147, "step": 3551 }, { "epoch": 0.3288318000347162, "grad_norm": 0.16015625, "learning_rate": 0.02, "loss": 1.6539, "step": 3552 }, { "epoch": 0.32892437655499623, "grad_norm": 0.1435546875, "learning_rate": 0.02, "loss": 1.6315, "step": 3553 }, { "epoch": 0.3290169530752763, "grad_norm": 0.15234375, "learning_rate": 0.02, "loss": 1.6432, "step": 3554 }, { "epoch": 0.32910952959555634, "grad_norm": 0.1533203125, "learning_rate": 0.02, "loss": 1.6377, "step": 3555 }, { "epoch": 0.3292021061158364, "grad_norm": 0.150390625, "learning_rate": 0.02, "loss": 1.6561, "step": 3556 }, { "epoch": 0.32929468263611644, "grad_norm": 0.150390625, "learning_rate": 0.02, "loss": 1.6354, "step": 3557 }, { "epoch": 0.32938725915639644, "grad_norm": 0.15234375, "learning_rate": 0.02, "loss": 1.6712, "step": 3558 }, { "epoch": 0.3294798356766765, "grad_norm": 0.14453125, "learning_rate": 0.02, "loss": 1.6444, "step": 3559 }, { "epoch": 0.32957241219695654, "grad_norm": 0.134765625, "learning_rate": 0.02, "loss": 1.6125, "step": 3560 }, { "epoch": 0.3296649887172366, "grad_norm": 0.140625, "learning_rate": 0.02, "loss": 1.6423, "step": 3561 }, { "epoch": 0.32975756523751665, "grad_norm": 0.138671875, "learning_rate": 0.02, "loss": 1.6035, "step": 3562 }, { "epoch": 0.3298501417577967, "grad_norm": 0.1416015625, "learning_rate": 0.02, "loss": 1.6691, "step": 3563 }, { "epoch": 0.3299427182780767, "grad_norm": 0.1455078125, "learning_rate": 0.02, "loss": 1.6464, "step": 3564 }, { "epoch": 0.33003529479835675, "grad_norm": 0.1494140625, "learning_rate": 0.02, "loss": 1.6471, "step": 3565 }, { "epoch": 0.3301278713186368, "grad_norm": 0.134765625, "learning_rate": 0.02, "loss": 1.6549, "step": 3566 }, { "epoch": 0.33022044783891685, "grad_norm": 0.146484375, "learning_rate": 0.02, "loss": 1.5738, "step": 3567 }, { "epoch": 0.3303130243591969, "grad_norm": 0.14453125, "learning_rate": 0.02, "loss": 1.6771, "step": 3568 }, { "epoch": 0.33040560087947696, "grad_norm": 0.138671875, "learning_rate": 0.02, "loss": 1.6506, "step": 3569 }, { "epoch": 0.330498177399757, "grad_norm": 0.138671875, "learning_rate": 0.02, "loss": 1.575, "step": 3570 }, { "epoch": 0.330590753920037, "grad_norm": 0.150390625, "learning_rate": 0.02, "loss": 1.639, "step": 3571 }, { "epoch": 0.33068333044031706, "grad_norm": 0.150390625, "learning_rate": 0.02, "loss": 1.631, "step": 3572 }, { "epoch": 0.3307759069605971, "grad_norm": 0.15234375, "learning_rate": 0.02, "loss": 1.6146, "step": 3573 }, { "epoch": 0.33086848348087716, "grad_norm": 0.15625, "learning_rate": 0.02, "loss": 1.6657, "step": 3574 }, { "epoch": 0.3309610600011572, "grad_norm": 0.16015625, "learning_rate": 0.02, "loss": 1.664, "step": 3575 }, { "epoch": 0.33105363652143727, "grad_norm": 0.158203125, "learning_rate": 0.02, "loss": 1.5513, "step": 3576 }, { "epoch": 0.3311462130417173, "grad_norm": 0.162109375, "learning_rate": 0.02, "loss": 1.5895, "step": 3577 }, { "epoch": 0.3312387895619973, "grad_norm": 0.1708984375, "learning_rate": 0.02, "loss": 1.639, "step": 3578 }, { "epoch": 0.33133136608227737, "grad_norm": 0.1533203125, "learning_rate": 0.02, "loss": 1.6016, "step": 3579 }, { "epoch": 0.3314239426025574, "grad_norm": 0.15625, "learning_rate": 0.02, "loss": 1.6136, "step": 3580 }, { "epoch": 0.33151651912283747, "grad_norm": 0.1484375, "learning_rate": 0.02, "loss": 1.639, "step": 3581 }, { "epoch": 0.3316090956431175, "grad_norm": 0.1484375, "learning_rate": 0.02, "loss": 1.5776, "step": 3582 }, { "epoch": 0.3317016721633976, "grad_norm": 0.150390625, "learning_rate": 0.02, "loss": 1.6496, "step": 3583 }, { "epoch": 0.33179424868367763, "grad_norm": 0.16015625, "learning_rate": 0.02, "loss": 1.584, "step": 3584 }, { "epoch": 0.3318868252039576, "grad_norm": 0.1455078125, "learning_rate": 0.02, "loss": 1.6763, "step": 3585 }, { "epoch": 0.3319794017242377, "grad_norm": 0.1630859375, "learning_rate": 0.02, "loss": 1.6906, "step": 3586 }, { "epoch": 0.33207197824451773, "grad_norm": 0.154296875, "learning_rate": 0.02, "loss": 1.6719, "step": 3587 }, { "epoch": 0.3321645547647978, "grad_norm": 0.15234375, "learning_rate": 0.02, "loss": 1.6538, "step": 3588 }, { "epoch": 0.33225713128507783, "grad_norm": 0.15625, "learning_rate": 0.02, "loss": 1.7027, "step": 3589 }, { "epoch": 0.3323497078053579, "grad_norm": 0.1630859375, "learning_rate": 0.02, "loss": 1.6914, "step": 3590 }, { "epoch": 0.3324422843256379, "grad_norm": 0.16015625, "learning_rate": 0.02, "loss": 1.6118, "step": 3591 }, { "epoch": 0.33253486084591793, "grad_norm": 0.1435546875, "learning_rate": 0.02, "loss": 1.6219, "step": 3592 }, { "epoch": 0.332627437366198, "grad_norm": 0.1416015625, "learning_rate": 0.02, "loss": 1.5402, "step": 3593 }, { "epoch": 0.33272001388647804, "grad_norm": 0.1416015625, "learning_rate": 0.02, "loss": 1.6113, "step": 3594 }, { "epoch": 0.3328125904067581, "grad_norm": 0.14453125, "learning_rate": 0.02, "loss": 1.6098, "step": 3595 }, { "epoch": 0.33290516692703814, "grad_norm": 0.1484375, "learning_rate": 0.02, "loss": 1.6616, "step": 3596 }, { "epoch": 0.3329977434473182, "grad_norm": 0.1533203125, "learning_rate": 0.02, "loss": 1.6963, "step": 3597 }, { "epoch": 0.3330903199675982, "grad_norm": 0.1474609375, "learning_rate": 0.02, "loss": 1.5985, "step": 3598 }, { "epoch": 0.33318289648787824, "grad_norm": 0.146484375, "learning_rate": 0.02, "loss": 1.6088, "step": 3599 }, { "epoch": 0.3332754730081583, "grad_norm": 0.1396484375, "learning_rate": 0.02, "loss": 1.6495, "step": 3600 }, { "epoch": 0.33336804952843835, "grad_norm": 0.1376953125, "learning_rate": 0.02, "loss": 1.5908, "step": 3601 }, { "epoch": 0.3334606260487184, "grad_norm": 0.140625, "learning_rate": 0.02, "loss": 1.5552, "step": 3602 }, { "epoch": 0.33355320256899845, "grad_norm": 0.1435546875, "learning_rate": 0.02, "loss": 1.5637, "step": 3603 }, { "epoch": 0.3336457790892785, "grad_norm": 0.146484375, "learning_rate": 0.02, "loss": 1.5979, "step": 3604 }, { "epoch": 0.3337383556095585, "grad_norm": 0.1484375, "learning_rate": 0.02, "loss": 1.6072, "step": 3605 }, { "epoch": 0.33383093212983855, "grad_norm": 0.1455078125, "learning_rate": 0.02, "loss": 1.6743, "step": 3606 }, { "epoch": 0.3339235086501186, "grad_norm": 0.1533203125, "learning_rate": 0.02, "loss": 1.6041, "step": 3607 }, { "epoch": 0.33401608517039866, "grad_norm": 0.15625, "learning_rate": 0.02, "loss": 1.6683, "step": 3608 }, { "epoch": 0.3341086616906787, "grad_norm": 0.1494140625, "learning_rate": 0.02, "loss": 1.6248, "step": 3609 }, { "epoch": 0.33420123821095876, "grad_norm": 0.1455078125, "learning_rate": 0.02, "loss": 1.5878, "step": 3610 }, { "epoch": 0.3342938147312388, "grad_norm": 0.150390625, "learning_rate": 0.02, "loss": 1.6758, "step": 3611 }, { "epoch": 0.3343863912515188, "grad_norm": 0.158203125, "learning_rate": 0.02, "loss": 1.6826, "step": 3612 }, { "epoch": 0.33447896777179886, "grad_norm": 0.1416015625, "learning_rate": 0.02, "loss": 1.6429, "step": 3613 }, { "epoch": 0.3345715442920789, "grad_norm": 0.1455078125, "learning_rate": 0.02, "loss": 1.6361, "step": 3614 }, { "epoch": 0.33466412081235897, "grad_norm": 0.146484375, "learning_rate": 0.02, "loss": 1.6642, "step": 3615 }, { "epoch": 0.334756697332639, "grad_norm": 0.1474609375, "learning_rate": 0.02, "loss": 1.6747, "step": 3616 }, { "epoch": 0.33484927385291907, "grad_norm": 0.14453125, "learning_rate": 0.02, "loss": 1.6602, "step": 3617 }, { "epoch": 0.3349418503731991, "grad_norm": 0.1572265625, "learning_rate": 0.02, "loss": 1.7148, "step": 3618 }, { "epoch": 0.3350344268934791, "grad_norm": 0.1455078125, "learning_rate": 0.02, "loss": 1.6145, "step": 3619 }, { "epoch": 0.3351270034137592, "grad_norm": 0.140625, "learning_rate": 0.02, "loss": 1.6358, "step": 3620 }, { "epoch": 0.3352195799340392, "grad_norm": 0.1474609375, "learning_rate": 0.02, "loss": 1.6383, "step": 3621 }, { "epoch": 0.3353121564543193, "grad_norm": 0.1474609375, "learning_rate": 0.02, "loss": 1.6064, "step": 3622 }, { "epoch": 0.33540473297459933, "grad_norm": 0.14453125, "learning_rate": 0.02, "loss": 1.6287, "step": 3623 }, { "epoch": 0.3354973094948794, "grad_norm": 0.1484375, "learning_rate": 0.02, "loss": 1.6054, "step": 3624 }, { "epoch": 0.3355898860151594, "grad_norm": 0.1474609375, "learning_rate": 0.02, "loss": 1.6386, "step": 3625 }, { "epoch": 0.33568246253543943, "grad_norm": 0.14453125, "learning_rate": 0.02, "loss": 1.5893, "step": 3626 }, { "epoch": 0.3357750390557195, "grad_norm": 0.140625, "learning_rate": 0.02, "loss": 1.5905, "step": 3627 }, { "epoch": 0.33586761557599953, "grad_norm": 0.1357421875, "learning_rate": 0.02, "loss": 1.5678, "step": 3628 }, { "epoch": 0.3359601920962796, "grad_norm": 0.1474609375, "learning_rate": 0.02, "loss": 1.6292, "step": 3629 }, { "epoch": 0.33605276861655964, "grad_norm": 0.1435546875, "learning_rate": 0.02, "loss": 1.6397, "step": 3630 }, { "epoch": 0.3361453451368397, "grad_norm": 0.1396484375, "learning_rate": 0.02, "loss": 1.6446, "step": 3631 }, { "epoch": 0.3362379216571197, "grad_norm": 0.1513671875, "learning_rate": 0.02, "loss": 1.6185, "step": 3632 }, { "epoch": 0.33633049817739974, "grad_norm": 0.15234375, "learning_rate": 0.02, "loss": 1.6329, "step": 3633 }, { "epoch": 0.3364230746976798, "grad_norm": 0.154296875, "learning_rate": 0.02, "loss": 1.7175, "step": 3634 }, { "epoch": 0.33651565121795984, "grad_norm": 0.1494140625, "learning_rate": 0.02, "loss": 1.5675, "step": 3635 }, { "epoch": 0.3366082277382399, "grad_norm": 0.15234375, "learning_rate": 0.02, "loss": 1.6986, "step": 3636 }, { "epoch": 0.33670080425851995, "grad_norm": 0.138671875, "learning_rate": 0.02, "loss": 1.5875, "step": 3637 }, { "epoch": 0.3367933807788, "grad_norm": 0.1474609375, "learning_rate": 0.02, "loss": 1.6244, "step": 3638 }, { "epoch": 0.33688595729908, "grad_norm": 0.13671875, "learning_rate": 0.02, "loss": 1.6303, "step": 3639 }, { "epoch": 0.33697853381936005, "grad_norm": 0.140625, "learning_rate": 0.02, "loss": 1.7024, "step": 3640 }, { "epoch": 0.3370711103396401, "grad_norm": 0.1240234375, "learning_rate": 0.02, "loss": 1.5659, "step": 3641 }, { "epoch": 0.33716368685992015, "grad_norm": 0.1298828125, "learning_rate": 0.02, "loss": 1.6211, "step": 3642 }, { "epoch": 0.3372562633802002, "grad_norm": 0.130859375, "learning_rate": 0.02, "loss": 1.6761, "step": 3643 }, { "epoch": 0.33734883990048026, "grad_norm": 0.1435546875, "learning_rate": 0.02, "loss": 1.6796, "step": 3644 }, { "epoch": 0.3374414164207603, "grad_norm": 0.1298828125, "learning_rate": 0.02, "loss": 1.5816, "step": 3645 }, { "epoch": 0.3375339929410403, "grad_norm": 0.1337890625, "learning_rate": 0.02, "loss": 1.5651, "step": 3646 }, { "epoch": 0.33762656946132036, "grad_norm": 0.1396484375, "learning_rate": 0.02, "loss": 1.5543, "step": 3647 }, { "epoch": 0.3377191459816004, "grad_norm": 0.142578125, "learning_rate": 0.02, "loss": 1.6741, "step": 3648 }, { "epoch": 0.33781172250188046, "grad_norm": 0.1494140625, "learning_rate": 0.02, "loss": 1.7472, "step": 3649 }, { "epoch": 0.3379042990221605, "grad_norm": 0.154296875, "learning_rate": 0.02, "loss": 1.578, "step": 3650 }, { "epoch": 0.33799687554244057, "grad_norm": 0.154296875, "learning_rate": 0.02, "loss": 1.5945, "step": 3651 }, { "epoch": 0.3380894520627206, "grad_norm": 0.1455078125, "learning_rate": 0.02, "loss": 1.6126, "step": 3652 }, { "epoch": 0.3381820285830006, "grad_norm": 0.146484375, "learning_rate": 0.02, "loss": 1.6367, "step": 3653 }, { "epoch": 0.33827460510328067, "grad_norm": 0.1455078125, "learning_rate": 0.02, "loss": 1.5793, "step": 3654 }, { "epoch": 0.3383671816235607, "grad_norm": 0.1611328125, "learning_rate": 0.02, "loss": 1.6068, "step": 3655 }, { "epoch": 0.33845975814384077, "grad_norm": 0.146484375, "learning_rate": 0.02, "loss": 1.6714, "step": 3656 }, { "epoch": 0.3385523346641208, "grad_norm": 0.1611328125, "learning_rate": 0.02, "loss": 1.588, "step": 3657 }, { "epoch": 0.3386449111844009, "grad_norm": 0.140625, "learning_rate": 0.02, "loss": 1.6489, "step": 3658 }, { "epoch": 0.3387374877046809, "grad_norm": 0.1474609375, "learning_rate": 0.02, "loss": 1.6349, "step": 3659 }, { "epoch": 0.3388300642249609, "grad_norm": 0.1533203125, "learning_rate": 0.02, "loss": 1.6639, "step": 3660 }, { "epoch": 0.338922640745241, "grad_norm": 0.1640625, "learning_rate": 0.02, "loss": 1.6153, "step": 3661 }, { "epoch": 0.33901521726552103, "grad_norm": 0.15234375, "learning_rate": 0.02, "loss": 1.6417, "step": 3662 }, { "epoch": 0.3391077937858011, "grad_norm": 0.158203125, "learning_rate": 0.02, "loss": 1.5968, "step": 3663 }, { "epoch": 0.33920037030608113, "grad_norm": 0.1552734375, "learning_rate": 0.02, "loss": 1.6297, "step": 3664 }, { "epoch": 0.3392929468263612, "grad_norm": 0.158203125, "learning_rate": 0.02, "loss": 1.6368, "step": 3665 }, { "epoch": 0.3393855233466412, "grad_norm": 0.1513671875, "learning_rate": 0.02, "loss": 1.6357, "step": 3666 }, { "epoch": 0.33947809986692123, "grad_norm": 0.1494140625, "learning_rate": 0.02, "loss": 1.6479, "step": 3667 }, { "epoch": 0.3395706763872013, "grad_norm": 0.1591796875, "learning_rate": 0.02, "loss": 1.7271, "step": 3668 }, { "epoch": 0.33966325290748134, "grad_norm": 0.14453125, "learning_rate": 0.02, "loss": 1.5947, "step": 3669 }, { "epoch": 0.3397558294277614, "grad_norm": 0.1474609375, "learning_rate": 0.02, "loss": 1.6121, "step": 3670 }, { "epoch": 0.33984840594804144, "grad_norm": 0.1435546875, "learning_rate": 0.02, "loss": 1.6151, "step": 3671 }, { "epoch": 0.3399409824683215, "grad_norm": 0.1513671875, "learning_rate": 0.02, "loss": 1.6224, "step": 3672 }, { "epoch": 0.3400335589886015, "grad_norm": 0.146484375, "learning_rate": 0.02, "loss": 1.6478, "step": 3673 }, { "epoch": 0.34012613550888154, "grad_norm": 0.1513671875, "learning_rate": 0.02, "loss": 1.5428, "step": 3674 }, { "epoch": 0.3402187120291616, "grad_norm": 0.1484375, "learning_rate": 0.02, "loss": 1.6599, "step": 3675 }, { "epoch": 0.34031128854944165, "grad_norm": 0.1494140625, "learning_rate": 0.02, "loss": 1.6393, "step": 3676 }, { "epoch": 0.3404038650697217, "grad_norm": 0.1513671875, "learning_rate": 0.02, "loss": 1.5852, "step": 3677 }, { "epoch": 0.34049644159000175, "grad_norm": 0.1591796875, "learning_rate": 0.02, "loss": 1.5561, "step": 3678 }, { "epoch": 0.3405890181102818, "grad_norm": 0.1474609375, "learning_rate": 0.02, "loss": 1.6326, "step": 3679 }, { "epoch": 0.3406815946305618, "grad_norm": 0.1455078125, "learning_rate": 0.02, "loss": 1.5907, "step": 3680 }, { "epoch": 0.34077417115084185, "grad_norm": 0.1591796875, "learning_rate": 0.02, "loss": 1.6777, "step": 3681 }, { "epoch": 0.3408667476711219, "grad_norm": 0.1552734375, "learning_rate": 0.02, "loss": 1.6489, "step": 3682 }, { "epoch": 0.34095932419140196, "grad_norm": 0.1533203125, "learning_rate": 0.02, "loss": 1.6372, "step": 3683 }, { "epoch": 0.341051900711682, "grad_norm": 0.1474609375, "learning_rate": 0.02, "loss": 1.6049, "step": 3684 }, { "epoch": 0.34114447723196206, "grad_norm": 0.1484375, "learning_rate": 0.02, "loss": 1.5568, "step": 3685 }, { "epoch": 0.3412370537522421, "grad_norm": 0.1484375, "learning_rate": 0.02, "loss": 1.583, "step": 3686 }, { "epoch": 0.3413296302725221, "grad_norm": 0.1650390625, "learning_rate": 0.02, "loss": 1.6499, "step": 3687 }, { "epoch": 0.34142220679280216, "grad_norm": 0.150390625, "learning_rate": 0.02, "loss": 1.6281, "step": 3688 }, { "epoch": 0.3415147833130822, "grad_norm": 0.154296875, "learning_rate": 0.02, "loss": 1.6792, "step": 3689 }, { "epoch": 0.34160735983336227, "grad_norm": 0.14453125, "learning_rate": 0.02, "loss": 1.6357, "step": 3690 }, { "epoch": 0.3416999363536423, "grad_norm": 0.1572265625, "learning_rate": 0.02, "loss": 1.6354, "step": 3691 }, { "epoch": 0.34179251287392237, "grad_norm": 0.1455078125, "learning_rate": 0.02, "loss": 1.5725, "step": 3692 }, { "epoch": 0.34188508939420237, "grad_norm": 0.15234375, "learning_rate": 0.02, "loss": 1.6339, "step": 3693 }, { "epoch": 0.3419776659144824, "grad_norm": 0.12890625, "learning_rate": 0.02, "loss": 1.6186, "step": 3694 }, { "epoch": 0.3420702424347625, "grad_norm": 0.140625, "learning_rate": 0.02, "loss": 1.6146, "step": 3695 }, { "epoch": 0.3421628189550425, "grad_norm": 0.140625, "learning_rate": 0.02, "loss": 1.6581, "step": 3696 }, { "epoch": 0.3422553954753226, "grad_norm": 0.1474609375, "learning_rate": 0.02, "loss": 1.6254, "step": 3697 }, { "epoch": 0.34234797199560263, "grad_norm": 0.146484375, "learning_rate": 0.02, "loss": 1.6768, "step": 3698 }, { "epoch": 0.3424405485158827, "grad_norm": 0.142578125, "learning_rate": 0.02, "loss": 1.5825, "step": 3699 }, { "epoch": 0.3425331250361627, "grad_norm": 0.1484375, "learning_rate": 0.02, "loss": 1.639, "step": 3700 }, { "epoch": 0.34262570155644273, "grad_norm": 0.1484375, "learning_rate": 0.02, "loss": 1.5902, "step": 3701 }, { "epoch": 0.3427182780767228, "grad_norm": 0.1494140625, "learning_rate": 0.02, "loss": 1.6316, "step": 3702 }, { "epoch": 0.34281085459700283, "grad_norm": 0.1455078125, "learning_rate": 0.02, "loss": 1.6078, "step": 3703 }, { "epoch": 0.3429034311172829, "grad_norm": 0.1484375, "learning_rate": 0.02, "loss": 1.6179, "step": 3704 }, { "epoch": 0.34299600763756294, "grad_norm": 0.1484375, "learning_rate": 0.02, "loss": 1.5844, "step": 3705 }, { "epoch": 0.343088584157843, "grad_norm": 0.1552734375, "learning_rate": 0.02, "loss": 1.6676, "step": 3706 }, { "epoch": 0.343181160678123, "grad_norm": 0.154296875, "learning_rate": 0.02, "loss": 1.6073, "step": 3707 }, { "epoch": 0.34327373719840304, "grad_norm": 0.1484375, "learning_rate": 0.02, "loss": 1.6562, "step": 3708 }, { "epoch": 0.3433663137186831, "grad_norm": 0.1513671875, "learning_rate": 0.02, "loss": 1.6269, "step": 3709 }, { "epoch": 0.34345889023896314, "grad_norm": 0.1533203125, "learning_rate": 0.02, "loss": 1.6232, "step": 3710 }, { "epoch": 0.3435514667592432, "grad_norm": 0.1484375, "learning_rate": 0.02, "loss": 1.6839, "step": 3711 }, { "epoch": 0.34364404327952325, "grad_norm": 0.15625, "learning_rate": 0.02, "loss": 1.6347, "step": 3712 }, { "epoch": 0.3437366197998033, "grad_norm": 0.1484375, "learning_rate": 0.02, "loss": 1.5939, "step": 3713 }, { "epoch": 0.3438291963200833, "grad_norm": 0.1455078125, "learning_rate": 0.02, "loss": 1.5895, "step": 3714 }, { "epoch": 0.34392177284036335, "grad_norm": 0.1552734375, "learning_rate": 0.02, "loss": 1.7031, "step": 3715 }, { "epoch": 0.3440143493606434, "grad_norm": 0.1474609375, "learning_rate": 0.02, "loss": 1.5668, "step": 3716 }, { "epoch": 0.34410692588092345, "grad_norm": 0.1494140625, "learning_rate": 0.02, "loss": 1.653, "step": 3717 }, { "epoch": 0.3441995024012035, "grad_norm": 0.15625, "learning_rate": 0.02, "loss": 1.6684, "step": 3718 }, { "epoch": 0.34429207892148356, "grad_norm": 0.15234375, "learning_rate": 0.02, "loss": 1.6562, "step": 3719 }, { "epoch": 0.3443846554417636, "grad_norm": 0.142578125, "learning_rate": 0.02, "loss": 1.5942, "step": 3720 }, { "epoch": 0.3444772319620436, "grad_norm": 0.150390625, "learning_rate": 0.02, "loss": 1.6845, "step": 3721 }, { "epoch": 0.34456980848232366, "grad_norm": 0.1484375, "learning_rate": 0.02, "loss": 1.5711, "step": 3722 }, { "epoch": 0.3446623850026037, "grad_norm": 0.1513671875, "learning_rate": 0.02, "loss": 1.6205, "step": 3723 }, { "epoch": 0.34475496152288376, "grad_norm": 0.1474609375, "learning_rate": 0.02, "loss": 1.5916, "step": 3724 }, { "epoch": 0.3448475380431638, "grad_norm": 0.1435546875, "learning_rate": 0.02, "loss": 1.6276, "step": 3725 }, { "epoch": 0.34494011456344387, "grad_norm": 0.1474609375, "learning_rate": 0.02, "loss": 1.66, "step": 3726 }, { "epoch": 0.34503269108372386, "grad_norm": 0.1396484375, "learning_rate": 0.02, "loss": 1.5579, "step": 3727 }, { "epoch": 0.3451252676040039, "grad_norm": 0.1376953125, "learning_rate": 0.02, "loss": 1.6133, "step": 3728 }, { "epoch": 0.34521784412428397, "grad_norm": 0.1376953125, "learning_rate": 0.02, "loss": 1.6028, "step": 3729 }, { "epoch": 0.345310420644564, "grad_norm": 0.146484375, "learning_rate": 0.02, "loss": 1.5933, "step": 3730 }, { "epoch": 0.34540299716484407, "grad_norm": 0.138671875, "learning_rate": 0.02, "loss": 1.6603, "step": 3731 }, { "epoch": 0.3454955736851241, "grad_norm": 0.1435546875, "learning_rate": 0.02, "loss": 1.5888, "step": 3732 }, { "epoch": 0.3455881502054042, "grad_norm": 0.1318359375, "learning_rate": 0.02, "loss": 1.6662, "step": 3733 }, { "epoch": 0.3456807267256842, "grad_norm": 0.134765625, "learning_rate": 0.02, "loss": 1.5963, "step": 3734 }, { "epoch": 0.3457733032459642, "grad_norm": 0.1455078125, "learning_rate": 0.02, "loss": 1.6505, "step": 3735 }, { "epoch": 0.3458658797662443, "grad_norm": 0.138671875, "learning_rate": 0.02, "loss": 1.5623, "step": 3736 }, { "epoch": 0.34595845628652433, "grad_norm": 0.15234375, "learning_rate": 0.02, "loss": 1.6733, "step": 3737 }, { "epoch": 0.3460510328068044, "grad_norm": 0.140625, "learning_rate": 0.02, "loss": 1.7053, "step": 3738 }, { "epoch": 0.34614360932708443, "grad_norm": 0.1484375, "learning_rate": 0.02, "loss": 1.5926, "step": 3739 }, { "epoch": 0.3462361858473645, "grad_norm": 0.158203125, "learning_rate": 0.02, "loss": 1.5331, "step": 3740 }, { "epoch": 0.3463287623676445, "grad_norm": 0.14453125, "learning_rate": 0.02, "loss": 1.6089, "step": 3741 }, { "epoch": 0.34642133888792453, "grad_norm": 0.1435546875, "learning_rate": 0.02, "loss": 1.6237, "step": 3742 }, { "epoch": 0.3465139154082046, "grad_norm": 0.1474609375, "learning_rate": 0.02, "loss": 1.6297, "step": 3743 }, { "epoch": 0.34660649192848464, "grad_norm": 0.1484375, "learning_rate": 0.02, "loss": 1.6532, "step": 3744 }, { "epoch": 0.3466990684487647, "grad_norm": 0.1611328125, "learning_rate": 0.02, "loss": 1.5554, "step": 3745 }, { "epoch": 0.34679164496904474, "grad_norm": 0.154296875, "learning_rate": 0.02, "loss": 1.5998, "step": 3746 }, { "epoch": 0.3468842214893248, "grad_norm": 0.1455078125, "learning_rate": 0.02, "loss": 1.5607, "step": 3747 }, { "epoch": 0.3469767980096048, "grad_norm": 0.1494140625, "learning_rate": 0.02, "loss": 1.6011, "step": 3748 }, { "epoch": 0.34706937452988484, "grad_norm": 0.1552734375, "learning_rate": 0.02, "loss": 1.6114, "step": 3749 }, { "epoch": 0.3471619510501649, "grad_norm": 0.1474609375, "learning_rate": 0.02, "loss": 1.6484, "step": 3750 }, { "epoch": 0.34725452757044495, "grad_norm": 0.158203125, "learning_rate": 0.02, "loss": 1.6572, "step": 3751 }, { "epoch": 0.347347104090725, "grad_norm": 0.1494140625, "learning_rate": 0.02, "loss": 1.6082, "step": 3752 }, { "epoch": 0.34743968061100505, "grad_norm": 0.1484375, "learning_rate": 0.02, "loss": 1.5558, "step": 3753 }, { "epoch": 0.3475322571312851, "grad_norm": 0.14453125, "learning_rate": 0.02, "loss": 1.5755, "step": 3754 }, { "epoch": 0.3476248336515651, "grad_norm": 0.1494140625, "learning_rate": 0.02, "loss": 1.6208, "step": 3755 }, { "epoch": 0.34771741017184515, "grad_norm": 0.1435546875, "learning_rate": 0.02, "loss": 1.5578, "step": 3756 }, { "epoch": 0.3478099866921252, "grad_norm": 0.1513671875, "learning_rate": 0.02, "loss": 1.6016, "step": 3757 }, { "epoch": 0.34790256321240526, "grad_norm": 0.1455078125, "learning_rate": 0.02, "loss": 1.6373, "step": 3758 }, { "epoch": 0.3479951397326853, "grad_norm": 0.15234375, "learning_rate": 0.02, "loss": 1.6537, "step": 3759 }, { "epoch": 0.34808771625296536, "grad_norm": 0.1396484375, "learning_rate": 0.02, "loss": 1.6472, "step": 3760 }, { "epoch": 0.34818029277324536, "grad_norm": 0.146484375, "learning_rate": 0.02, "loss": 1.6512, "step": 3761 }, { "epoch": 0.3482728692935254, "grad_norm": 0.1396484375, "learning_rate": 0.02, "loss": 1.6488, "step": 3762 }, { "epoch": 0.34836544581380546, "grad_norm": 0.1396484375, "learning_rate": 0.02, "loss": 1.6077, "step": 3763 }, { "epoch": 0.3484580223340855, "grad_norm": 0.1337890625, "learning_rate": 0.02, "loss": 1.6731, "step": 3764 }, { "epoch": 0.34855059885436557, "grad_norm": 0.142578125, "learning_rate": 0.02, "loss": 1.6217, "step": 3765 }, { "epoch": 0.3486431753746456, "grad_norm": 0.1337890625, "learning_rate": 0.02, "loss": 1.6024, "step": 3766 }, { "epoch": 0.34873575189492567, "grad_norm": 0.15234375, "learning_rate": 0.02, "loss": 1.6348, "step": 3767 }, { "epoch": 0.34882832841520567, "grad_norm": 0.146484375, "learning_rate": 0.02, "loss": 1.6067, "step": 3768 }, { "epoch": 0.3489209049354857, "grad_norm": 0.14453125, "learning_rate": 0.02, "loss": 1.6543, "step": 3769 }, { "epoch": 0.3490134814557658, "grad_norm": 0.150390625, "learning_rate": 0.02, "loss": 1.6383, "step": 3770 }, { "epoch": 0.3491060579760458, "grad_norm": 0.1376953125, "learning_rate": 0.02, "loss": 1.6562, "step": 3771 }, { "epoch": 0.3491986344963259, "grad_norm": 0.1396484375, "learning_rate": 0.02, "loss": 1.635, "step": 3772 }, { "epoch": 0.34929121101660593, "grad_norm": 0.140625, "learning_rate": 0.02, "loss": 1.6485, "step": 3773 }, { "epoch": 0.349383787536886, "grad_norm": 0.1533203125, "learning_rate": 0.02, "loss": 1.646, "step": 3774 }, { "epoch": 0.349476364057166, "grad_norm": 0.150390625, "learning_rate": 0.02, "loss": 1.6126, "step": 3775 }, { "epoch": 0.34956894057744603, "grad_norm": 0.1474609375, "learning_rate": 0.02, "loss": 1.5677, "step": 3776 }, { "epoch": 0.3496615170977261, "grad_norm": 0.1484375, "learning_rate": 0.02, "loss": 1.6085, "step": 3777 }, { "epoch": 0.34975409361800613, "grad_norm": 0.1474609375, "learning_rate": 0.02, "loss": 1.614, "step": 3778 }, { "epoch": 0.3498466701382862, "grad_norm": 0.1376953125, "learning_rate": 0.02, "loss": 1.648, "step": 3779 }, { "epoch": 0.34993924665856624, "grad_norm": 0.1435546875, "learning_rate": 0.02, "loss": 1.6374, "step": 3780 }, { "epoch": 0.3500318231788463, "grad_norm": 0.1533203125, "learning_rate": 0.02, "loss": 1.5644, "step": 3781 }, { "epoch": 0.3501243996991263, "grad_norm": 0.154296875, "learning_rate": 0.02, "loss": 1.6079, "step": 3782 }, { "epoch": 0.35021697621940634, "grad_norm": 0.1533203125, "learning_rate": 0.02, "loss": 1.6498, "step": 3783 }, { "epoch": 0.3503095527396864, "grad_norm": 0.1533203125, "learning_rate": 0.02, "loss": 1.5614, "step": 3784 }, { "epoch": 0.35040212925996644, "grad_norm": 0.1513671875, "learning_rate": 0.02, "loss": 1.6637, "step": 3785 }, { "epoch": 0.3504947057802465, "grad_norm": 0.142578125, "learning_rate": 0.02, "loss": 1.612, "step": 3786 }, { "epoch": 0.35058728230052655, "grad_norm": 0.140625, "learning_rate": 0.02, "loss": 1.5839, "step": 3787 }, { "epoch": 0.3506798588208066, "grad_norm": 0.142578125, "learning_rate": 0.02, "loss": 1.5602, "step": 3788 }, { "epoch": 0.3507724353410866, "grad_norm": 0.1416015625, "learning_rate": 0.02, "loss": 1.5884, "step": 3789 }, { "epoch": 0.35086501186136665, "grad_norm": 0.134765625, "learning_rate": 0.02, "loss": 1.6025, "step": 3790 }, { "epoch": 0.3509575883816467, "grad_norm": 0.1376953125, "learning_rate": 0.02, "loss": 1.5644, "step": 3791 }, { "epoch": 0.35105016490192675, "grad_norm": 0.142578125, "learning_rate": 0.02, "loss": 1.6227, "step": 3792 }, { "epoch": 0.3511427414222068, "grad_norm": 0.146484375, "learning_rate": 0.02, "loss": 1.6218, "step": 3793 }, { "epoch": 0.35123531794248686, "grad_norm": 0.140625, "learning_rate": 0.02, "loss": 1.6603, "step": 3794 }, { "epoch": 0.35132789446276685, "grad_norm": 0.142578125, "learning_rate": 0.02, "loss": 1.6046, "step": 3795 }, { "epoch": 0.3514204709830469, "grad_norm": 0.1494140625, "learning_rate": 0.02, "loss": 1.6306, "step": 3796 }, { "epoch": 0.35151304750332696, "grad_norm": 0.1455078125, "learning_rate": 0.02, "loss": 1.6375, "step": 3797 }, { "epoch": 0.351605624023607, "grad_norm": 0.1455078125, "learning_rate": 0.02, "loss": 1.6258, "step": 3798 }, { "epoch": 0.35169820054388706, "grad_norm": 0.1455078125, "learning_rate": 0.02, "loss": 1.6587, "step": 3799 }, { "epoch": 0.3517907770641671, "grad_norm": 0.140625, "learning_rate": 0.02, "loss": 1.637, "step": 3800 }, { "epoch": 0.35188335358444717, "grad_norm": 0.1572265625, "learning_rate": 0.02, "loss": 1.6579, "step": 3801 }, { "epoch": 0.35197593010472716, "grad_norm": 0.154296875, "learning_rate": 0.02, "loss": 1.5959, "step": 3802 }, { "epoch": 0.3520685066250072, "grad_norm": 0.1591796875, "learning_rate": 0.02, "loss": 1.6087, "step": 3803 }, { "epoch": 0.35216108314528727, "grad_norm": 0.1396484375, "learning_rate": 0.02, "loss": 1.6423, "step": 3804 }, { "epoch": 0.3522536596655673, "grad_norm": 0.1513671875, "learning_rate": 0.02, "loss": 1.5693, "step": 3805 }, { "epoch": 0.3523462361858474, "grad_norm": 0.142578125, "learning_rate": 0.02, "loss": 1.6441, "step": 3806 }, { "epoch": 0.3524388127061274, "grad_norm": 0.1591796875, "learning_rate": 0.02, "loss": 1.6331, "step": 3807 }, { "epoch": 0.3525313892264075, "grad_norm": 0.1494140625, "learning_rate": 0.02, "loss": 1.6329, "step": 3808 }, { "epoch": 0.3526239657466875, "grad_norm": 0.1640625, "learning_rate": 0.02, "loss": 1.6701, "step": 3809 }, { "epoch": 0.3527165422669675, "grad_norm": 0.1552734375, "learning_rate": 0.02, "loss": 1.6705, "step": 3810 }, { "epoch": 0.3528091187872476, "grad_norm": 0.1533203125, "learning_rate": 0.02, "loss": 1.6354, "step": 3811 }, { "epoch": 0.35290169530752763, "grad_norm": 0.146484375, "learning_rate": 0.02, "loss": 1.6349, "step": 3812 }, { "epoch": 0.3529942718278077, "grad_norm": 0.1669921875, "learning_rate": 0.02, "loss": 1.6589, "step": 3813 }, { "epoch": 0.35308684834808773, "grad_norm": 0.1513671875, "learning_rate": 0.02, "loss": 1.6048, "step": 3814 }, { "epoch": 0.3531794248683678, "grad_norm": 0.1552734375, "learning_rate": 0.02, "loss": 1.634, "step": 3815 }, { "epoch": 0.3532720013886478, "grad_norm": 0.1494140625, "learning_rate": 0.02, "loss": 1.6434, "step": 3816 }, { "epoch": 0.35336457790892783, "grad_norm": 0.1455078125, "learning_rate": 0.02, "loss": 1.6586, "step": 3817 }, { "epoch": 0.3534571544292079, "grad_norm": 0.1416015625, "learning_rate": 0.02, "loss": 1.6455, "step": 3818 }, { "epoch": 0.35354973094948794, "grad_norm": 0.1484375, "learning_rate": 0.02, "loss": 1.6226, "step": 3819 }, { "epoch": 0.353642307469768, "grad_norm": 0.1416015625, "learning_rate": 0.02, "loss": 1.6149, "step": 3820 }, { "epoch": 0.35373488399004804, "grad_norm": 0.1484375, "learning_rate": 0.02, "loss": 1.6497, "step": 3821 }, { "epoch": 0.3538274605103281, "grad_norm": 0.146484375, "learning_rate": 0.02, "loss": 1.6285, "step": 3822 }, { "epoch": 0.3539200370306081, "grad_norm": 0.1494140625, "learning_rate": 0.02, "loss": 1.6433, "step": 3823 }, { "epoch": 0.35401261355088814, "grad_norm": 0.142578125, "learning_rate": 0.02, "loss": 1.5357, "step": 3824 }, { "epoch": 0.3541051900711682, "grad_norm": 0.1435546875, "learning_rate": 0.02, "loss": 1.6336, "step": 3825 }, { "epoch": 0.35419776659144825, "grad_norm": 0.1552734375, "learning_rate": 0.02, "loss": 1.672, "step": 3826 }, { "epoch": 0.3542903431117283, "grad_norm": 0.15234375, "learning_rate": 0.02, "loss": 1.6421, "step": 3827 }, { "epoch": 0.35438291963200835, "grad_norm": 0.15234375, "learning_rate": 0.02, "loss": 1.5841, "step": 3828 }, { "epoch": 0.35447549615228835, "grad_norm": 0.1416015625, "learning_rate": 0.02, "loss": 1.6408, "step": 3829 }, { "epoch": 0.3545680726725684, "grad_norm": 0.1376953125, "learning_rate": 0.02, "loss": 1.5927, "step": 3830 }, { "epoch": 0.35466064919284845, "grad_norm": 0.1435546875, "learning_rate": 0.02, "loss": 1.6408, "step": 3831 }, { "epoch": 0.3547532257131285, "grad_norm": 0.1494140625, "learning_rate": 0.02, "loss": 1.6348, "step": 3832 }, { "epoch": 0.35484580223340856, "grad_norm": 0.1455078125, "learning_rate": 0.02, "loss": 1.5923, "step": 3833 }, { "epoch": 0.3549383787536886, "grad_norm": 0.1474609375, "learning_rate": 0.02, "loss": 1.5471, "step": 3834 }, { "epoch": 0.35503095527396866, "grad_norm": 0.15234375, "learning_rate": 0.02, "loss": 1.5771, "step": 3835 }, { "epoch": 0.35512353179424866, "grad_norm": 0.14453125, "learning_rate": 0.02, "loss": 1.5083, "step": 3836 }, { "epoch": 0.3552161083145287, "grad_norm": 0.1484375, "learning_rate": 0.02, "loss": 1.6237, "step": 3837 }, { "epoch": 0.35530868483480876, "grad_norm": 0.1533203125, "learning_rate": 0.02, "loss": 1.6219, "step": 3838 }, { "epoch": 0.3554012613550888, "grad_norm": 0.1474609375, "learning_rate": 0.02, "loss": 1.666, "step": 3839 }, { "epoch": 0.35549383787536887, "grad_norm": 0.1416015625, "learning_rate": 0.02, "loss": 1.6001, "step": 3840 }, { "epoch": 0.3555864143956489, "grad_norm": 0.14453125, "learning_rate": 0.02, "loss": 1.6452, "step": 3841 }, { "epoch": 0.35567899091592897, "grad_norm": 0.1376953125, "learning_rate": 0.02, "loss": 1.583, "step": 3842 }, { "epoch": 0.35577156743620897, "grad_norm": 0.150390625, "learning_rate": 0.02, "loss": 1.6709, "step": 3843 }, { "epoch": 0.355864143956489, "grad_norm": 0.1328125, "learning_rate": 0.02, "loss": 1.5817, "step": 3844 }, { "epoch": 0.3559567204767691, "grad_norm": 0.1484375, "learning_rate": 0.02, "loss": 1.5625, "step": 3845 }, { "epoch": 0.3560492969970491, "grad_norm": 0.142578125, "learning_rate": 0.02, "loss": 1.5845, "step": 3846 }, { "epoch": 0.3561418735173292, "grad_norm": 0.14453125, "learning_rate": 0.02, "loss": 1.572, "step": 3847 }, { "epoch": 0.35623445003760923, "grad_norm": 0.1474609375, "learning_rate": 0.02, "loss": 1.6522, "step": 3848 }, { "epoch": 0.3563270265578893, "grad_norm": 0.1494140625, "learning_rate": 0.02, "loss": 1.5321, "step": 3849 }, { "epoch": 0.3564196030781693, "grad_norm": 0.1484375, "learning_rate": 0.02, "loss": 1.7081, "step": 3850 }, { "epoch": 0.35651217959844933, "grad_norm": 0.142578125, "learning_rate": 0.02, "loss": 1.6225, "step": 3851 }, { "epoch": 0.3566047561187294, "grad_norm": 0.142578125, "learning_rate": 0.02, "loss": 1.67, "step": 3852 }, { "epoch": 0.35669733263900943, "grad_norm": 0.138671875, "learning_rate": 0.02, "loss": 1.5372, "step": 3853 }, { "epoch": 0.3567899091592895, "grad_norm": 0.1416015625, "learning_rate": 0.02, "loss": 1.592, "step": 3854 }, { "epoch": 0.35688248567956954, "grad_norm": 0.1396484375, "learning_rate": 0.02, "loss": 1.594, "step": 3855 }, { "epoch": 0.35697506219984954, "grad_norm": 0.142578125, "learning_rate": 0.02, "loss": 1.627, "step": 3856 }, { "epoch": 0.3570676387201296, "grad_norm": 0.146484375, "learning_rate": 0.02, "loss": 1.6161, "step": 3857 }, { "epoch": 0.35716021524040964, "grad_norm": 0.158203125, "learning_rate": 0.02, "loss": 1.6321, "step": 3858 }, { "epoch": 0.3572527917606897, "grad_norm": 0.15234375, "learning_rate": 0.02, "loss": 1.6147, "step": 3859 }, { "epoch": 0.35734536828096974, "grad_norm": 0.154296875, "learning_rate": 0.02, "loss": 1.653, "step": 3860 }, { "epoch": 0.3574379448012498, "grad_norm": 0.1494140625, "learning_rate": 0.02, "loss": 1.5721, "step": 3861 }, { "epoch": 0.35753052132152985, "grad_norm": 0.1513671875, "learning_rate": 0.02, "loss": 1.6282, "step": 3862 }, { "epoch": 0.35762309784180984, "grad_norm": 0.1533203125, "learning_rate": 0.02, "loss": 1.5964, "step": 3863 }, { "epoch": 0.3577156743620899, "grad_norm": 0.1435546875, "learning_rate": 0.02, "loss": 1.5671, "step": 3864 }, { "epoch": 0.35780825088236995, "grad_norm": 0.14453125, "learning_rate": 0.02, "loss": 1.5996, "step": 3865 }, { "epoch": 0.35790082740265, "grad_norm": 0.158203125, "learning_rate": 0.02, "loss": 1.644, "step": 3866 }, { "epoch": 0.35799340392293005, "grad_norm": 0.146484375, "learning_rate": 0.02, "loss": 1.7005, "step": 3867 }, { "epoch": 0.3580859804432101, "grad_norm": 0.1494140625, "learning_rate": 0.02, "loss": 1.6199, "step": 3868 }, { "epoch": 0.35817855696349016, "grad_norm": 0.1357421875, "learning_rate": 0.02, "loss": 1.6139, "step": 3869 }, { "epoch": 0.35827113348377015, "grad_norm": 0.1318359375, "learning_rate": 0.02, "loss": 1.5307, "step": 3870 }, { "epoch": 0.3583637100040502, "grad_norm": 0.1396484375, "learning_rate": 0.02, "loss": 1.606, "step": 3871 }, { "epoch": 0.35845628652433026, "grad_norm": 0.146484375, "learning_rate": 0.02, "loss": 1.648, "step": 3872 }, { "epoch": 0.3585488630446103, "grad_norm": 0.138671875, "learning_rate": 0.02, "loss": 1.5962, "step": 3873 }, { "epoch": 0.35864143956489036, "grad_norm": 0.142578125, "learning_rate": 0.02, "loss": 1.6171, "step": 3874 }, { "epoch": 0.3587340160851704, "grad_norm": 0.140625, "learning_rate": 0.02, "loss": 1.6289, "step": 3875 }, { "epoch": 0.35882659260545047, "grad_norm": 0.1533203125, "learning_rate": 0.02, "loss": 1.6223, "step": 3876 }, { "epoch": 0.35891916912573046, "grad_norm": 0.150390625, "learning_rate": 0.02, "loss": 1.6237, "step": 3877 }, { "epoch": 0.3590117456460105, "grad_norm": 0.1552734375, "learning_rate": 0.02, "loss": 1.6947, "step": 3878 }, { "epoch": 0.35910432216629057, "grad_norm": 0.1533203125, "learning_rate": 0.02, "loss": 1.6143, "step": 3879 }, { "epoch": 0.3591968986865706, "grad_norm": 0.158203125, "learning_rate": 0.02, "loss": 1.7056, "step": 3880 }, { "epoch": 0.3592894752068507, "grad_norm": 0.154296875, "learning_rate": 0.02, "loss": 1.6103, "step": 3881 }, { "epoch": 0.3593820517271307, "grad_norm": 0.1494140625, "learning_rate": 0.02, "loss": 1.6573, "step": 3882 }, { "epoch": 0.3594746282474108, "grad_norm": 0.1611328125, "learning_rate": 0.02, "loss": 1.6183, "step": 3883 }, { "epoch": 0.3595672047676908, "grad_norm": 0.1591796875, "learning_rate": 0.02, "loss": 1.655, "step": 3884 }, { "epoch": 0.3596597812879708, "grad_norm": 0.15234375, "learning_rate": 0.02, "loss": 1.6329, "step": 3885 }, { "epoch": 0.3597523578082509, "grad_norm": 0.1513671875, "learning_rate": 0.02, "loss": 1.6464, "step": 3886 }, { "epoch": 0.35984493432853093, "grad_norm": 0.1474609375, "learning_rate": 0.02, "loss": 1.635, "step": 3887 }, { "epoch": 0.359937510848811, "grad_norm": 0.1396484375, "learning_rate": 0.02, "loss": 1.5642, "step": 3888 }, { "epoch": 0.36003008736909103, "grad_norm": 0.1455078125, "learning_rate": 0.02, "loss": 1.5528, "step": 3889 }, { "epoch": 0.36012266388937103, "grad_norm": 0.16015625, "learning_rate": 0.02, "loss": 1.6013, "step": 3890 }, { "epoch": 0.3602152404096511, "grad_norm": 0.154296875, "learning_rate": 0.02, "loss": 1.5972, "step": 3891 }, { "epoch": 0.36030781692993114, "grad_norm": 0.1572265625, "learning_rate": 0.02, "loss": 1.6172, "step": 3892 }, { "epoch": 0.3604003934502112, "grad_norm": 0.150390625, "learning_rate": 0.02, "loss": 1.6589, "step": 3893 }, { "epoch": 0.36049296997049124, "grad_norm": 0.142578125, "learning_rate": 0.02, "loss": 1.5219, "step": 3894 }, { "epoch": 0.3605855464907713, "grad_norm": 0.138671875, "learning_rate": 0.02, "loss": 1.6024, "step": 3895 }, { "epoch": 0.36067812301105134, "grad_norm": 0.1435546875, "learning_rate": 0.02, "loss": 1.6213, "step": 3896 }, { "epoch": 0.36077069953133134, "grad_norm": 0.142578125, "learning_rate": 0.02, "loss": 1.6123, "step": 3897 }, { "epoch": 0.3608632760516114, "grad_norm": 0.1455078125, "learning_rate": 0.02, "loss": 1.6611, "step": 3898 }, { "epoch": 0.36095585257189144, "grad_norm": 0.1474609375, "learning_rate": 0.02, "loss": 1.6032, "step": 3899 }, { "epoch": 0.3610484290921715, "grad_norm": 0.140625, "learning_rate": 0.02, "loss": 1.6415, "step": 3900 }, { "epoch": 0.36114100561245155, "grad_norm": 0.154296875, "learning_rate": 0.02, "loss": 1.627, "step": 3901 }, { "epoch": 0.3612335821327316, "grad_norm": 0.1591796875, "learning_rate": 0.02, "loss": 1.6804, "step": 3902 }, { "epoch": 0.36132615865301165, "grad_norm": 0.1572265625, "learning_rate": 0.02, "loss": 1.635, "step": 3903 }, { "epoch": 0.36141873517329165, "grad_norm": 0.1484375, "learning_rate": 0.02, "loss": 1.6479, "step": 3904 }, { "epoch": 0.3615113116935717, "grad_norm": 0.150390625, "learning_rate": 0.02, "loss": 1.6098, "step": 3905 }, { "epoch": 0.36160388821385175, "grad_norm": 0.1494140625, "learning_rate": 0.02, "loss": 1.6453, "step": 3906 }, { "epoch": 0.3616964647341318, "grad_norm": 0.1474609375, "learning_rate": 0.02, "loss": 1.6487, "step": 3907 }, { "epoch": 0.36178904125441186, "grad_norm": 0.1494140625, "learning_rate": 0.02, "loss": 1.6017, "step": 3908 }, { "epoch": 0.3618816177746919, "grad_norm": 0.140625, "learning_rate": 0.02, "loss": 1.5603, "step": 3909 }, { "epoch": 0.36197419429497196, "grad_norm": 0.1650390625, "learning_rate": 0.02, "loss": 1.6123, "step": 3910 }, { "epoch": 0.36206677081525196, "grad_norm": 0.142578125, "learning_rate": 0.02, "loss": 1.6488, "step": 3911 }, { "epoch": 0.362159347335532, "grad_norm": 0.1513671875, "learning_rate": 0.02, "loss": 1.6025, "step": 3912 }, { "epoch": 0.36225192385581206, "grad_norm": 0.138671875, "learning_rate": 0.02, "loss": 1.6486, "step": 3913 }, { "epoch": 0.3623445003760921, "grad_norm": 0.1435546875, "learning_rate": 0.02, "loss": 1.5401, "step": 3914 }, { "epoch": 0.36243707689637217, "grad_norm": 0.150390625, "learning_rate": 0.02, "loss": 1.5429, "step": 3915 }, { "epoch": 0.3625296534166522, "grad_norm": 0.1552734375, "learning_rate": 0.02, "loss": 1.6584, "step": 3916 }, { "epoch": 0.36262222993693227, "grad_norm": 0.142578125, "learning_rate": 0.02, "loss": 1.6181, "step": 3917 }, { "epoch": 0.36271480645721227, "grad_norm": 0.154296875, "learning_rate": 0.02, "loss": 1.5855, "step": 3918 }, { "epoch": 0.3628073829774923, "grad_norm": 0.146484375, "learning_rate": 0.02, "loss": 1.6158, "step": 3919 }, { "epoch": 0.3628999594977724, "grad_norm": 0.1572265625, "learning_rate": 0.02, "loss": 1.6084, "step": 3920 }, { "epoch": 0.3629925360180524, "grad_norm": 0.140625, "learning_rate": 0.02, "loss": 1.5693, "step": 3921 }, { "epoch": 0.3630851125383325, "grad_norm": 0.14453125, "learning_rate": 0.02, "loss": 1.53, "step": 3922 }, { "epoch": 0.36317768905861253, "grad_norm": 0.1357421875, "learning_rate": 0.02, "loss": 1.5726, "step": 3923 }, { "epoch": 0.3632702655788925, "grad_norm": 0.142578125, "learning_rate": 0.02, "loss": 1.6285, "step": 3924 }, { "epoch": 0.3633628420991726, "grad_norm": 0.150390625, "learning_rate": 0.02, "loss": 1.637, "step": 3925 }, { "epoch": 0.36345541861945263, "grad_norm": 0.1484375, "learning_rate": 0.02, "loss": 1.5843, "step": 3926 }, { "epoch": 0.3635479951397327, "grad_norm": 0.146484375, "learning_rate": 0.02, "loss": 1.5602, "step": 3927 }, { "epoch": 0.36364057166001273, "grad_norm": 0.1474609375, "learning_rate": 0.02, "loss": 1.6251, "step": 3928 }, { "epoch": 0.3637331481802928, "grad_norm": 0.162109375, "learning_rate": 0.02, "loss": 1.6077, "step": 3929 }, { "epoch": 0.36382572470057284, "grad_norm": 0.150390625, "learning_rate": 0.02, "loss": 1.6236, "step": 3930 }, { "epoch": 0.36391830122085284, "grad_norm": 0.16015625, "learning_rate": 0.02, "loss": 1.6879, "step": 3931 }, { "epoch": 0.3640108777411329, "grad_norm": 0.15625, "learning_rate": 0.02, "loss": 1.5462, "step": 3932 }, { "epoch": 0.36410345426141294, "grad_norm": 0.154296875, "learning_rate": 0.02, "loss": 1.5955, "step": 3933 }, { "epoch": 0.364196030781693, "grad_norm": 0.14453125, "learning_rate": 0.02, "loss": 1.5524, "step": 3934 }, { "epoch": 0.36428860730197304, "grad_norm": 0.150390625, "learning_rate": 0.02, "loss": 1.6648, "step": 3935 }, { "epoch": 0.3643811838222531, "grad_norm": 0.1474609375, "learning_rate": 0.02, "loss": 1.6245, "step": 3936 }, { "epoch": 0.36447376034253315, "grad_norm": 0.1494140625, "learning_rate": 0.02, "loss": 1.5954, "step": 3937 }, { "epoch": 0.36456633686281315, "grad_norm": 0.1494140625, "learning_rate": 0.02, "loss": 1.641, "step": 3938 }, { "epoch": 0.3646589133830932, "grad_norm": 0.1591796875, "learning_rate": 0.02, "loss": 1.5892, "step": 3939 }, { "epoch": 0.36475148990337325, "grad_norm": 0.14453125, "learning_rate": 0.02, "loss": 1.609, "step": 3940 }, { "epoch": 0.3648440664236533, "grad_norm": 0.138671875, "learning_rate": 0.02, "loss": 1.5894, "step": 3941 }, { "epoch": 0.36493664294393335, "grad_norm": 0.1416015625, "learning_rate": 0.02, "loss": 1.5245, "step": 3942 }, { "epoch": 0.3650292194642134, "grad_norm": 0.1474609375, "learning_rate": 0.02, "loss": 1.6495, "step": 3943 }, { "epoch": 0.36512179598449346, "grad_norm": 0.1435546875, "learning_rate": 0.02, "loss": 1.6082, "step": 3944 }, { "epoch": 0.36521437250477345, "grad_norm": 0.1455078125, "learning_rate": 0.02, "loss": 1.5968, "step": 3945 }, { "epoch": 0.3653069490250535, "grad_norm": 0.14453125, "learning_rate": 0.02, "loss": 1.6733, "step": 3946 }, { "epoch": 0.36539952554533356, "grad_norm": 0.1416015625, "learning_rate": 0.02, "loss": 1.6568, "step": 3947 }, { "epoch": 0.3654921020656136, "grad_norm": 0.1416015625, "learning_rate": 0.02, "loss": 1.6908, "step": 3948 }, { "epoch": 0.36558467858589366, "grad_norm": 0.1474609375, "learning_rate": 0.02, "loss": 1.6909, "step": 3949 }, { "epoch": 0.3656772551061737, "grad_norm": 0.1337890625, "learning_rate": 0.02, "loss": 1.6146, "step": 3950 }, { "epoch": 0.36576983162645377, "grad_norm": 0.138671875, "learning_rate": 0.02, "loss": 1.5759, "step": 3951 }, { "epoch": 0.36586240814673376, "grad_norm": 0.1396484375, "learning_rate": 0.02, "loss": 1.6181, "step": 3952 }, { "epoch": 0.3659549846670138, "grad_norm": 0.1533203125, "learning_rate": 0.02, "loss": 1.587, "step": 3953 }, { "epoch": 0.36604756118729387, "grad_norm": 0.146484375, "learning_rate": 0.02, "loss": 1.6717, "step": 3954 }, { "epoch": 0.3661401377075739, "grad_norm": 0.15625, "learning_rate": 0.02, "loss": 1.6436, "step": 3955 }, { "epoch": 0.366232714227854, "grad_norm": 0.13671875, "learning_rate": 0.02, "loss": 1.6008, "step": 3956 }, { "epoch": 0.366325290748134, "grad_norm": 0.1533203125, "learning_rate": 0.02, "loss": 1.6127, "step": 3957 }, { "epoch": 0.366417867268414, "grad_norm": 0.150390625, "learning_rate": 0.02, "loss": 1.606, "step": 3958 }, { "epoch": 0.3665104437886941, "grad_norm": 0.162109375, "learning_rate": 0.02, "loss": 1.6458, "step": 3959 }, { "epoch": 0.3666030203089741, "grad_norm": 0.1484375, "learning_rate": 0.02, "loss": 1.5833, "step": 3960 }, { "epoch": 0.3666955968292542, "grad_norm": 0.150390625, "learning_rate": 0.02, "loss": 1.616, "step": 3961 }, { "epoch": 0.36678817334953423, "grad_norm": 0.1513671875, "learning_rate": 0.02, "loss": 1.562, "step": 3962 }, { "epoch": 0.3668807498698143, "grad_norm": 0.1435546875, "learning_rate": 0.02, "loss": 1.5867, "step": 3963 }, { "epoch": 0.36697332639009433, "grad_norm": 0.1416015625, "learning_rate": 0.02, "loss": 1.5319, "step": 3964 }, { "epoch": 0.36706590291037433, "grad_norm": 0.1494140625, "learning_rate": 0.02, "loss": 1.5863, "step": 3965 }, { "epoch": 0.3671584794306544, "grad_norm": 0.1513671875, "learning_rate": 0.02, "loss": 1.5301, "step": 3966 }, { "epoch": 0.36725105595093444, "grad_norm": 0.1513671875, "learning_rate": 0.02, "loss": 1.5862, "step": 3967 }, { "epoch": 0.3673436324712145, "grad_norm": 0.1591796875, "learning_rate": 0.02, "loss": 1.6369, "step": 3968 }, { "epoch": 0.36743620899149454, "grad_norm": 0.1591796875, "learning_rate": 0.02, "loss": 1.6175, "step": 3969 }, { "epoch": 0.3675287855117746, "grad_norm": 0.154296875, "learning_rate": 0.02, "loss": 1.6166, "step": 3970 }, { "epoch": 0.36762136203205464, "grad_norm": 0.15234375, "learning_rate": 0.02, "loss": 1.5819, "step": 3971 }, { "epoch": 0.36771393855233464, "grad_norm": 0.1669921875, "learning_rate": 0.02, "loss": 1.6271, "step": 3972 }, { "epoch": 0.3678065150726147, "grad_norm": 0.15625, "learning_rate": 0.02, "loss": 1.6427, "step": 3973 }, { "epoch": 0.36789909159289474, "grad_norm": 0.158203125, "learning_rate": 0.02, "loss": 1.6029, "step": 3974 }, { "epoch": 0.3679916681131748, "grad_norm": 0.1416015625, "learning_rate": 0.02, "loss": 1.6067, "step": 3975 }, { "epoch": 0.36808424463345485, "grad_norm": 0.1533203125, "learning_rate": 0.02, "loss": 1.6145, "step": 3976 }, { "epoch": 0.3681768211537349, "grad_norm": 0.1416015625, "learning_rate": 0.02, "loss": 1.6037, "step": 3977 }, { "epoch": 0.36826939767401495, "grad_norm": 0.146484375, "learning_rate": 0.02, "loss": 1.6308, "step": 3978 }, { "epoch": 0.36836197419429495, "grad_norm": 0.1455078125, "learning_rate": 0.02, "loss": 1.5922, "step": 3979 }, { "epoch": 0.368454550714575, "grad_norm": 0.142578125, "learning_rate": 0.02, "loss": 1.5804, "step": 3980 }, { "epoch": 0.36854712723485505, "grad_norm": 0.1416015625, "learning_rate": 0.02, "loss": 1.5504, "step": 3981 }, { "epoch": 0.3686397037551351, "grad_norm": 0.1484375, "learning_rate": 0.02, "loss": 1.5425, "step": 3982 }, { "epoch": 0.36873228027541516, "grad_norm": 0.1474609375, "learning_rate": 0.02, "loss": 1.5728, "step": 3983 }, { "epoch": 0.3688248567956952, "grad_norm": 0.140625, "learning_rate": 0.02, "loss": 1.5863, "step": 3984 }, { "epoch": 0.36891743331597526, "grad_norm": 0.14453125, "learning_rate": 0.02, "loss": 1.5823, "step": 3985 }, { "epoch": 0.36901000983625526, "grad_norm": 0.150390625, "learning_rate": 0.02, "loss": 1.6161, "step": 3986 }, { "epoch": 0.3691025863565353, "grad_norm": 0.1435546875, "learning_rate": 0.02, "loss": 1.5945, "step": 3987 }, { "epoch": 0.36919516287681536, "grad_norm": 0.138671875, "learning_rate": 0.02, "loss": 1.5958, "step": 3988 }, { "epoch": 0.3692877393970954, "grad_norm": 0.1474609375, "learning_rate": 0.02, "loss": 1.5926, "step": 3989 }, { "epoch": 0.36938031591737547, "grad_norm": 0.1396484375, "learning_rate": 0.02, "loss": 1.6403, "step": 3990 }, { "epoch": 0.3694728924376555, "grad_norm": 0.15234375, "learning_rate": 0.02, "loss": 1.6067, "step": 3991 }, { "epoch": 0.3695654689579355, "grad_norm": 0.134765625, "learning_rate": 0.02, "loss": 1.6001, "step": 3992 }, { "epoch": 0.36965804547821557, "grad_norm": 0.1396484375, "learning_rate": 0.02, "loss": 1.5701, "step": 3993 }, { "epoch": 0.3697506219984956, "grad_norm": 0.1396484375, "learning_rate": 0.02, "loss": 1.6214, "step": 3994 }, { "epoch": 0.3698431985187757, "grad_norm": 0.1484375, "learning_rate": 0.02, "loss": 1.6074, "step": 3995 }, { "epoch": 0.3699357750390557, "grad_norm": 0.146484375, "learning_rate": 0.02, "loss": 1.5829, "step": 3996 }, { "epoch": 0.3700283515593358, "grad_norm": 0.154296875, "learning_rate": 0.02, "loss": 1.6616, "step": 3997 }, { "epoch": 0.37012092807961583, "grad_norm": 0.1513671875, "learning_rate": 0.02, "loss": 1.6001, "step": 3998 }, { "epoch": 0.3702135045998958, "grad_norm": 0.1474609375, "learning_rate": 0.02, "loss": 1.5932, "step": 3999 }, { "epoch": 0.3703060811201759, "grad_norm": 0.1484375, "learning_rate": 0.02, "loss": 1.6599, "step": 4000 }, { "epoch": 0.37039865764045593, "grad_norm": 0.1533203125, "learning_rate": 0.02, "loss": 1.6349, "step": 4001 }, { "epoch": 0.370491234160736, "grad_norm": 0.15234375, "learning_rate": 0.02, "loss": 1.6165, "step": 4002 }, { "epoch": 0.37058381068101603, "grad_norm": 0.142578125, "learning_rate": 0.02, "loss": 1.5804, "step": 4003 }, { "epoch": 0.3706763872012961, "grad_norm": 0.1416015625, "learning_rate": 0.02, "loss": 1.5717, "step": 4004 }, { "epoch": 0.37076896372157614, "grad_norm": 0.1455078125, "learning_rate": 0.02, "loss": 1.5796, "step": 4005 }, { "epoch": 0.37086154024185614, "grad_norm": 0.1435546875, "learning_rate": 0.02, "loss": 1.6141, "step": 4006 }, { "epoch": 0.3709541167621362, "grad_norm": 0.1513671875, "learning_rate": 0.02, "loss": 1.6844, "step": 4007 }, { "epoch": 0.37104669328241624, "grad_norm": 0.1513671875, "learning_rate": 0.02, "loss": 1.5828, "step": 4008 }, { "epoch": 0.3711392698026963, "grad_norm": 0.1611328125, "learning_rate": 0.02, "loss": 1.6121, "step": 4009 }, { "epoch": 0.37123184632297634, "grad_norm": 0.154296875, "learning_rate": 0.02, "loss": 1.6915, "step": 4010 }, { "epoch": 0.3713244228432564, "grad_norm": 0.140625, "learning_rate": 0.02, "loss": 1.6615, "step": 4011 }, { "epoch": 0.37141699936353645, "grad_norm": 0.1337890625, "learning_rate": 0.02, "loss": 1.5642, "step": 4012 }, { "epoch": 0.37150957588381645, "grad_norm": 0.1435546875, "learning_rate": 0.02, "loss": 1.5622, "step": 4013 }, { "epoch": 0.3716021524040965, "grad_norm": 0.140625, "learning_rate": 0.02, "loss": 1.6465, "step": 4014 }, { "epoch": 0.37169472892437655, "grad_norm": 0.1435546875, "learning_rate": 0.02, "loss": 1.6447, "step": 4015 }, { "epoch": 0.3717873054446566, "grad_norm": 0.1357421875, "learning_rate": 0.02, "loss": 1.6123, "step": 4016 }, { "epoch": 0.37187988196493665, "grad_norm": 0.138671875, "learning_rate": 0.02, "loss": 1.6093, "step": 4017 }, { "epoch": 0.3719724584852167, "grad_norm": 0.1416015625, "learning_rate": 0.02, "loss": 1.6216, "step": 4018 }, { "epoch": 0.37206503500549676, "grad_norm": 0.138671875, "learning_rate": 0.02, "loss": 1.6126, "step": 4019 }, { "epoch": 0.37215761152577675, "grad_norm": 0.140625, "learning_rate": 0.02, "loss": 1.5888, "step": 4020 }, { "epoch": 0.3722501880460568, "grad_norm": 0.140625, "learning_rate": 0.02, "loss": 1.6132, "step": 4021 }, { "epoch": 0.37234276456633686, "grad_norm": 0.14453125, "learning_rate": 0.02, "loss": 1.5446, "step": 4022 }, { "epoch": 0.3724353410866169, "grad_norm": 0.1396484375, "learning_rate": 0.02, "loss": 1.6131, "step": 4023 }, { "epoch": 0.37252791760689696, "grad_norm": 0.142578125, "learning_rate": 0.02, "loss": 1.5855, "step": 4024 }, { "epoch": 0.372620494127177, "grad_norm": 0.140625, "learning_rate": 0.02, "loss": 1.6256, "step": 4025 }, { "epoch": 0.372713070647457, "grad_norm": 0.1435546875, "learning_rate": 0.02, "loss": 1.6451, "step": 4026 }, { "epoch": 0.37280564716773706, "grad_norm": 0.142578125, "learning_rate": 0.02, "loss": 1.6151, "step": 4027 }, { "epoch": 0.3728982236880171, "grad_norm": 0.146484375, "learning_rate": 0.02, "loss": 1.6087, "step": 4028 }, { "epoch": 0.37299080020829717, "grad_norm": 0.1552734375, "learning_rate": 0.02, "loss": 1.6046, "step": 4029 }, { "epoch": 0.3730833767285772, "grad_norm": 0.1455078125, "learning_rate": 0.02, "loss": 1.6161, "step": 4030 }, { "epoch": 0.3731759532488573, "grad_norm": 0.1640625, "learning_rate": 0.02, "loss": 1.6405, "step": 4031 }, { "epoch": 0.3732685297691373, "grad_norm": 0.154296875, "learning_rate": 0.02, "loss": 1.5586, "step": 4032 }, { "epoch": 0.3733611062894173, "grad_norm": 0.16015625, "learning_rate": 0.02, "loss": 1.6651, "step": 4033 }, { "epoch": 0.3734536828096974, "grad_norm": 0.1474609375, "learning_rate": 0.02, "loss": 1.5984, "step": 4034 }, { "epoch": 0.3735462593299774, "grad_norm": 0.1572265625, "learning_rate": 0.02, "loss": 1.5842, "step": 4035 }, { "epoch": 0.3736388358502575, "grad_norm": 0.1513671875, "learning_rate": 0.02, "loss": 1.6177, "step": 4036 }, { "epoch": 0.37373141237053753, "grad_norm": 0.1455078125, "learning_rate": 0.02, "loss": 1.6245, "step": 4037 }, { "epoch": 0.3738239888908176, "grad_norm": 0.1513671875, "learning_rate": 0.02, "loss": 1.6941, "step": 4038 }, { "epoch": 0.37391656541109763, "grad_norm": 0.1484375, "learning_rate": 0.02, "loss": 1.6252, "step": 4039 }, { "epoch": 0.37400914193137763, "grad_norm": 0.142578125, "learning_rate": 0.02, "loss": 1.508, "step": 4040 }, { "epoch": 0.3741017184516577, "grad_norm": 0.142578125, "learning_rate": 0.02, "loss": 1.646, "step": 4041 }, { "epoch": 0.37419429497193774, "grad_norm": 0.15234375, "learning_rate": 0.02, "loss": 1.6139, "step": 4042 }, { "epoch": 0.3742868714922178, "grad_norm": 0.142578125, "learning_rate": 0.02, "loss": 1.6028, "step": 4043 }, { "epoch": 0.37437944801249784, "grad_norm": 0.1533203125, "learning_rate": 0.02, "loss": 1.6157, "step": 4044 }, { "epoch": 0.3744720245327779, "grad_norm": 0.146484375, "learning_rate": 0.02, "loss": 1.6583, "step": 4045 }, { "epoch": 0.37456460105305794, "grad_norm": 0.150390625, "learning_rate": 0.02, "loss": 1.5949, "step": 4046 }, { "epoch": 0.37465717757333794, "grad_norm": 0.1396484375, "learning_rate": 0.02, "loss": 1.6439, "step": 4047 }, { "epoch": 0.374749754093618, "grad_norm": 0.15625, "learning_rate": 0.02, "loss": 1.6939, "step": 4048 }, { "epoch": 0.37484233061389804, "grad_norm": 0.1572265625, "learning_rate": 0.02, "loss": 1.6058, "step": 4049 }, { "epoch": 0.3749349071341781, "grad_norm": 0.1435546875, "learning_rate": 0.02, "loss": 1.5668, "step": 4050 }, { "epoch": 0.37502748365445815, "grad_norm": 0.142578125, "learning_rate": 0.02, "loss": 1.6346, "step": 4051 }, { "epoch": 0.3751200601747382, "grad_norm": 0.1533203125, "learning_rate": 0.02, "loss": 1.5838, "step": 4052 }, { "epoch": 0.37521263669501825, "grad_norm": 0.1474609375, "learning_rate": 0.02, "loss": 1.5954, "step": 4053 }, { "epoch": 0.37530521321529825, "grad_norm": 0.1416015625, "learning_rate": 0.02, "loss": 1.5734, "step": 4054 }, { "epoch": 0.3753977897355783, "grad_norm": 0.1416015625, "learning_rate": 0.02, "loss": 1.5643, "step": 4055 }, { "epoch": 0.37549036625585835, "grad_norm": 0.14453125, "learning_rate": 0.02, "loss": 1.6497, "step": 4056 }, { "epoch": 0.3755829427761384, "grad_norm": 0.1484375, "learning_rate": 0.02, "loss": 1.599, "step": 4057 }, { "epoch": 0.37567551929641846, "grad_norm": 0.14453125, "learning_rate": 0.02, "loss": 1.5963, "step": 4058 }, { "epoch": 0.3757680958166985, "grad_norm": 0.1533203125, "learning_rate": 0.02, "loss": 1.5897, "step": 4059 }, { "epoch": 0.3758606723369785, "grad_norm": 0.14453125, "learning_rate": 0.02, "loss": 1.5644, "step": 4060 }, { "epoch": 0.37595324885725856, "grad_norm": 0.1484375, "learning_rate": 0.02, "loss": 1.6111, "step": 4061 }, { "epoch": 0.3760458253775386, "grad_norm": 0.1416015625, "learning_rate": 0.02, "loss": 1.6527, "step": 4062 }, { "epoch": 0.37613840189781866, "grad_norm": 0.1474609375, "learning_rate": 0.02, "loss": 1.633, "step": 4063 }, { "epoch": 0.3762309784180987, "grad_norm": 0.1396484375, "learning_rate": 0.02, "loss": 1.6249, "step": 4064 }, { "epoch": 0.37632355493837877, "grad_norm": 0.1494140625, "learning_rate": 0.02, "loss": 1.5536, "step": 4065 }, { "epoch": 0.3764161314586588, "grad_norm": 0.13671875, "learning_rate": 0.02, "loss": 1.5869, "step": 4066 }, { "epoch": 0.3765087079789388, "grad_norm": 0.1376953125, "learning_rate": 0.02, "loss": 1.5885, "step": 4067 }, { "epoch": 0.37660128449921887, "grad_norm": 0.13671875, "learning_rate": 0.02, "loss": 1.5618, "step": 4068 }, { "epoch": 0.3766938610194989, "grad_norm": 0.1396484375, "learning_rate": 0.02, "loss": 1.5974, "step": 4069 }, { "epoch": 0.376786437539779, "grad_norm": 0.1435546875, "learning_rate": 0.02, "loss": 1.5592, "step": 4070 }, { "epoch": 0.376879014060059, "grad_norm": 0.1513671875, "learning_rate": 0.02, "loss": 1.6602, "step": 4071 }, { "epoch": 0.3769715905803391, "grad_norm": 0.146484375, "learning_rate": 0.02, "loss": 1.5945, "step": 4072 }, { "epoch": 0.37706416710061913, "grad_norm": 0.1484375, "learning_rate": 0.02, "loss": 1.555, "step": 4073 }, { "epoch": 0.3771567436208991, "grad_norm": 0.1484375, "learning_rate": 0.02, "loss": 1.6416, "step": 4074 }, { "epoch": 0.3772493201411792, "grad_norm": 0.1484375, "learning_rate": 0.02, "loss": 1.591, "step": 4075 }, { "epoch": 0.37734189666145923, "grad_norm": 0.150390625, "learning_rate": 0.02, "loss": 1.6398, "step": 4076 }, { "epoch": 0.3774344731817393, "grad_norm": 0.1650390625, "learning_rate": 0.02, "loss": 1.6582, "step": 4077 }, { "epoch": 0.37752704970201933, "grad_norm": 0.146484375, "learning_rate": 0.02, "loss": 1.5782, "step": 4078 }, { "epoch": 0.3776196262222994, "grad_norm": 0.15625, "learning_rate": 0.02, "loss": 1.5382, "step": 4079 }, { "epoch": 0.37771220274257944, "grad_norm": 0.1484375, "learning_rate": 0.02, "loss": 1.5922, "step": 4080 }, { "epoch": 0.37780477926285944, "grad_norm": 0.1494140625, "learning_rate": 0.02, "loss": 1.5861, "step": 4081 }, { "epoch": 0.3778973557831395, "grad_norm": 0.1669921875, "learning_rate": 0.02, "loss": 1.6978, "step": 4082 }, { "epoch": 0.37798993230341954, "grad_norm": 0.1513671875, "learning_rate": 0.02, "loss": 1.5812, "step": 4083 }, { "epoch": 0.3780825088236996, "grad_norm": 0.15234375, "learning_rate": 0.02, "loss": 1.6042, "step": 4084 }, { "epoch": 0.37817508534397964, "grad_norm": 0.142578125, "learning_rate": 0.02, "loss": 1.6433, "step": 4085 }, { "epoch": 0.3782676618642597, "grad_norm": 0.16015625, "learning_rate": 0.02, "loss": 1.6058, "step": 4086 }, { "epoch": 0.37836023838453975, "grad_norm": 0.146484375, "learning_rate": 0.02, "loss": 1.5878, "step": 4087 }, { "epoch": 0.37845281490481975, "grad_norm": 0.1455078125, "learning_rate": 0.02, "loss": 1.6515, "step": 4088 }, { "epoch": 0.3785453914250998, "grad_norm": 0.1611328125, "learning_rate": 0.02, "loss": 1.609, "step": 4089 }, { "epoch": 0.37863796794537985, "grad_norm": 0.162109375, "learning_rate": 0.02, "loss": 1.6488, "step": 4090 }, { "epoch": 0.3787305444656599, "grad_norm": 0.1513671875, "learning_rate": 0.02, "loss": 1.561, "step": 4091 }, { "epoch": 0.37882312098593995, "grad_norm": 0.1533203125, "learning_rate": 0.02, "loss": 1.5352, "step": 4092 }, { "epoch": 0.37891569750622, "grad_norm": 0.1455078125, "learning_rate": 0.02, "loss": 1.5793, "step": 4093 }, { "epoch": 0.3790082740265, "grad_norm": 0.146484375, "learning_rate": 0.02, "loss": 1.6268, "step": 4094 }, { "epoch": 0.37910085054678005, "grad_norm": 0.146484375, "learning_rate": 0.02, "loss": 1.6011, "step": 4095 }, { "epoch": 0.3791934270670601, "grad_norm": 0.142578125, "learning_rate": 0.02, "loss": 1.6456, "step": 4096 }, { "epoch": 0.37928600358734016, "grad_norm": 0.146484375, "learning_rate": 0.02, "loss": 1.6555, "step": 4097 }, { "epoch": 0.3793785801076202, "grad_norm": 0.140625, "learning_rate": 0.02, "loss": 1.6045, "step": 4098 }, { "epoch": 0.37947115662790026, "grad_norm": 0.130859375, "learning_rate": 0.02, "loss": 1.5133, "step": 4099 }, { "epoch": 0.3795637331481803, "grad_norm": 0.1318359375, "learning_rate": 0.02, "loss": 1.609, "step": 4100 }, { "epoch": 0.3796563096684603, "grad_norm": 0.13671875, "learning_rate": 0.02, "loss": 1.5401, "step": 4101 }, { "epoch": 0.37974888618874036, "grad_norm": 0.1376953125, "learning_rate": 0.02, "loss": 1.5756, "step": 4102 }, { "epoch": 0.3798414627090204, "grad_norm": 0.1513671875, "learning_rate": 0.02, "loss": 1.543, "step": 4103 }, { "epoch": 0.37993403922930047, "grad_norm": 0.1611328125, "learning_rate": 0.02, "loss": 1.6058, "step": 4104 }, { "epoch": 0.3800266157495805, "grad_norm": 0.166015625, "learning_rate": 0.02, "loss": 1.6035, "step": 4105 }, { "epoch": 0.3801191922698606, "grad_norm": 0.154296875, "learning_rate": 0.02, "loss": 1.5653, "step": 4106 }, { "epoch": 0.3802117687901406, "grad_norm": 0.1552734375, "learning_rate": 0.02, "loss": 1.5872, "step": 4107 }, { "epoch": 0.3803043453104206, "grad_norm": 0.1669921875, "learning_rate": 0.02, "loss": 1.5941, "step": 4108 }, { "epoch": 0.3803969218307007, "grad_norm": 0.162109375, "learning_rate": 0.02, "loss": 1.6186, "step": 4109 }, { "epoch": 0.3804894983509807, "grad_norm": 0.150390625, "learning_rate": 0.02, "loss": 1.5742, "step": 4110 }, { "epoch": 0.3805820748712608, "grad_norm": 0.142578125, "learning_rate": 0.02, "loss": 1.5936, "step": 4111 }, { "epoch": 0.38067465139154083, "grad_norm": 0.1484375, "learning_rate": 0.02, "loss": 1.56, "step": 4112 }, { "epoch": 0.3807672279118209, "grad_norm": 0.150390625, "learning_rate": 0.02, "loss": 1.6227, "step": 4113 }, { "epoch": 0.38085980443210093, "grad_norm": 0.1533203125, "learning_rate": 0.02, "loss": 1.6092, "step": 4114 }, { "epoch": 0.38095238095238093, "grad_norm": 0.140625, "learning_rate": 0.02, "loss": 1.6056, "step": 4115 }, { "epoch": 0.381044957472661, "grad_norm": 0.1474609375, "learning_rate": 0.02, "loss": 1.6413, "step": 4116 }, { "epoch": 0.38113753399294104, "grad_norm": 0.138671875, "learning_rate": 0.02, "loss": 1.5748, "step": 4117 }, { "epoch": 0.3812301105132211, "grad_norm": 0.1416015625, "learning_rate": 0.02, "loss": 1.5694, "step": 4118 }, { "epoch": 0.38132268703350114, "grad_norm": 0.150390625, "learning_rate": 0.02, "loss": 1.6314, "step": 4119 }, { "epoch": 0.3814152635537812, "grad_norm": 0.142578125, "learning_rate": 0.02, "loss": 1.5968, "step": 4120 }, { "epoch": 0.3815078400740612, "grad_norm": 0.142578125, "learning_rate": 0.02, "loss": 1.647, "step": 4121 }, { "epoch": 0.38160041659434124, "grad_norm": 0.1435546875, "learning_rate": 0.02, "loss": 1.5674, "step": 4122 }, { "epoch": 0.3816929931146213, "grad_norm": 0.1552734375, "learning_rate": 0.02, "loss": 1.646, "step": 4123 }, { "epoch": 0.38178556963490135, "grad_norm": 0.1552734375, "learning_rate": 0.02, "loss": 1.6189, "step": 4124 }, { "epoch": 0.3818781461551814, "grad_norm": 0.1650390625, "learning_rate": 0.02, "loss": 1.5681, "step": 4125 }, { "epoch": 0.38197072267546145, "grad_norm": 0.1533203125, "learning_rate": 0.02, "loss": 1.5854, "step": 4126 }, { "epoch": 0.3820632991957415, "grad_norm": 0.162109375, "learning_rate": 0.02, "loss": 1.634, "step": 4127 }, { "epoch": 0.3821558757160215, "grad_norm": 0.150390625, "learning_rate": 0.02, "loss": 1.712, "step": 4128 }, { "epoch": 0.38224845223630155, "grad_norm": 0.1572265625, "learning_rate": 0.02, "loss": 1.6649, "step": 4129 }, { "epoch": 0.3823410287565816, "grad_norm": 0.14453125, "learning_rate": 0.02, "loss": 1.6005, "step": 4130 }, { "epoch": 0.38243360527686165, "grad_norm": 0.15234375, "learning_rate": 0.02, "loss": 1.5706, "step": 4131 }, { "epoch": 0.3825261817971417, "grad_norm": 0.134765625, "learning_rate": 0.02, "loss": 1.5867, "step": 4132 }, { "epoch": 0.38261875831742176, "grad_norm": 0.1484375, "learning_rate": 0.02, "loss": 1.5997, "step": 4133 }, { "epoch": 0.3827113348377018, "grad_norm": 0.134765625, "learning_rate": 0.02, "loss": 1.5512, "step": 4134 }, { "epoch": 0.3828039113579818, "grad_norm": 0.1435546875, "learning_rate": 0.02, "loss": 1.611, "step": 4135 }, { "epoch": 0.38289648787826186, "grad_norm": 0.150390625, "learning_rate": 0.02, "loss": 1.5931, "step": 4136 }, { "epoch": 0.3829890643985419, "grad_norm": 0.154296875, "learning_rate": 0.02, "loss": 1.561, "step": 4137 }, { "epoch": 0.38308164091882196, "grad_norm": 0.1455078125, "learning_rate": 0.02, "loss": 1.5728, "step": 4138 }, { "epoch": 0.383174217439102, "grad_norm": 0.142578125, "learning_rate": 0.02, "loss": 1.5945, "step": 4139 }, { "epoch": 0.38326679395938207, "grad_norm": 0.15234375, "learning_rate": 0.02, "loss": 1.5759, "step": 4140 }, { "epoch": 0.3833593704796621, "grad_norm": 0.1455078125, "learning_rate": 0.02, "loss": 1.6014, "step": 4141 }, { "epoch": 0.3834519469999421, "grad_norm": 0.14453125, "learning_rate": 0.02, "loss": 1.5781, "step": 4142 }, { "epoch": 0.38354452352022217, "grad_norm": 0.14453125, "learning_rate": 0.02, "loss": 1.6056, "step": 4143 }, { "epoch": 0.3836371000405022, "grad_norm": 0.150390625, "learning_rate": 0.02, "loss": 1.611, "step": 4144 }, { "epoch": 0.3837296765607823, "grad_norm": 0.134765625, "learning_rate": 0.02, "loss": 1.514, "step": 4145 }, { "epoch": 0.3838222530810623, "grad_norm": 0.1484375, "learning_rate": 0.02, "loss": 1.5825, "step": 4146 }, { "epoch": 0.3839148296013424, "grad_norm": 0.138671875, "learning_rate": 0.02, "loss": 1.6236, "step": 4147 }, { "epoch": 0.38400740612162243, "grad_norm": 0.140625, "learning_rate": 0.02, "loss": 1.5875, "step": 4148 }, { "epoch": 0.3840999826419024, "grad_norm": 0.1474609375, "learning_rate": 0.02, "loss": 1.5974, "step": 4149 }, { "epoch": 0.3841925591621825, "grad_norm": 0.146484375, "learning_rate": 0.02, "loss": 1.5513, "step": 4150 }, { "epoch": 0.38428513568246253, "grad_norm": 0.1328125, "learning_rate": 0.02, "loss": 1.5998, "step": 4151 }, { "epoch": 0.3843777122027426, "grad_norm": 0.1513671875, "learning_rate": 0.02, "loss": 1.592, "step": 4152 }, { "epoch": 0.38447028872302264, "grad_norm": 0.1513671875, "learning_rate": 0.02, "loss": 1.6064, "step": 4153 }, { "epoch": 0.3845628652433027, "grad_norm": 0.1533203125, "learning_rate": 0.02, "loss": 1.6101, "step": 4154 }, { "epoch": 0.3846554417635827, "grad_norm": 0.1669921875, "learning_rate": 0.02, "loss": 1.6752, "step": 4155 }, { "epoch": 0.38474801828386274, "grad_norm": 0.14453125, "learning_rate": 0.02, "loss": 1.5853, "step": 4156 }, { "epoch": 0.3848405948041428, "grad_norm": 0.154296875, "learning_rate": 0.02, "loss": 1.6166, "step": 4157 }, { "epoch": 0.38493317132442284, "grad_norm": 0.13671875, "learning_rate": 0.02, "loss": 1.6698, "step": 4158 }, { "epoch": 0.3850257478447029, "grad_norm": 0.14453125, "learning_rate": 0.02, "loss": 1.6147, "step": 4159 }, { "epoch": 0.38511832436498294, "grad_norm": 0.1416015625, "learning_rate": 0.02, "loss": 1.5945, "step": 4160 }, { "epoch": 0.385210900885263, "grad_norm": 0.1416015625, "learning_rate": 0.02, "loss": 1.6361, "step": 4161 }, { "epoch": 0.385303477405543, "grad_norm": 0.138671875, "learning_rate": 0.02, "loss": 1.5931, "step": 4162 }, { "epoch": 0.38539605392582305, "grad_norm": 0.14453125, "learning_rate": 0.02, "loss": 1.5974, "step": 4163 }, { "epoch": 0.3854886304461031, "grad_norm": 0.146484375, "learning_rate": 0.02, "loss": 1.6292, "step": 4164 }, { "epoch": 0.38558120696638315, "grad_norm": 0.15625, "learning_rate": 0.02, "loss": 1.577, "step": 4165 }, { "epoch": 0.3856737834866632, "grad_norm": 0.1396484375, "learning_rate": 0.02, "loss": 1.5449, "step": 4166 }, { "epoch": 0.38576636000694325, "grad_norm": 0.1591796875, "learning_rate": 0.02, "loss": 1.5932, "step": 4167 }, { "epoch": 0.3858589365272233, "grad_norm": 0.13671875, "learning_rate": 0.02, "loss": 1.5603, "step": 4168 }, { "epoch": 0.3859515130475033, "grad_norm": 0.146484375, "learning_rate": 0.02, "loss": 1.5479, "step": 4169 }, { "epoch": 0.38604408956778336, "grad_norm": 0.142578125, "learning_rate": 0.02, "loss": 1.6323, "step": 4170 }, { "epoch": 0.3861366660880634, "grad_norm": 0.15625, "learning_rate": 0.02, "loss": 1.6388, "step": 4171 }, { "epoch": 0.38622924260834346, "grad_norm": 0.1435546875, "learning_rate": 0.02, "loss": 1.5816, "step": 4172 }, { "epoch": 0.3863218191286235, "grad_norm": 0.1494140625, "learning_rate": 0.02, "loss": 1.6271, "step": 4173 }, { "epoch": 0.38641439564890356, "grad_norm": 0.1455078125, "learning_rate": 0.02, "loss": 1.5756, "step": 4174 }, { "epoch": 0.3865069721691836, "grad_norm": 0.150390625, "learning_rate": 0.02, "loss": 1.5889, "step": 4175 }, { "epoch": 0.3865995486894636, "grad_norm": 0.1435546875, "learning_rate": 0.02, "loss": 1.6069, "step": 4176 }, { "epoch": 0.38669212520974366, "grad_norm": 0.1474609375, "learning_rate": 0.02, "loss": 1.6513, "step": 4177 }, { "epoch": 0.3867847017300237, "grad_norm": 0.140625, "learning_rate": 0.02, "loss": 1.5952, "step": 4178 }, { "epoch": 0.38687727825030377, "grad_norm": 0.14453125, "learning_rate": 0.02, "loss": 1.6068, "step": 4179 }, { "epoch": 0.3869698547705838, "grad_norm": 0.1435546875, "learning_rate": 0.02, "loss": 1.6604, "step": 4180 }, { "epoch": 0.3870624312908639, "grad_norm": 0.150390625, "learning_rate": 0.02, "loss": 1.5652, "step": 4181 }, { "epoch": 0.3871550078111439, "grad_norm": 0.1572265625, "learning_rate": 0.02, "loss": 1.5416, "step": 4182 }, { "epoch": 0.3872475843314239, "grad_norm": 0.1630859375, "learning_rate": 0.02, "loss": 1.6489, "step": 4183 }, { "epoch": 0.387340160851704, "grad_norm": 0.1513671875, "learning_rate": 0.02, "loss": 1.6367, "step": 4184 }, { "epoch": 0.387432737371984, "grad_norm": 0.1591796875, "learning_rate": 0.02, "loss": 1.6441, "step": 4185 }, { "epoch": 0.3875253138922641, "grad_norm": 0.1455078125, "learning_rate": 0.02, "loss": 1.5737, "step": 4186 }, { "epoch": 0.38761789041254413, "grad_norm": 0.1533203125, "learning_rate": 0.02, "loss": 1.6161, "step": 4187 }, { "epoch": 0.3877104669328242, "grad_norm": 0.1337890625, "learning_rate": 0.02, "loss": 1.5753, "step": 4188 }, { "epoch": 0.3878030434531042, "grad_norm": 0.1455078125, "learning_rate": 0.02, "loss": 1.6292, "step": 4189 }, { "epoch": 0.38789561997338423, "grad_norm": 0.1435546875, "learning_rate": 0.02, "loss": 1.5591, "step": 4190 }, { "epoch": 0.3879881964936643, "grad_norm": 0.1640625, "learning_rate": 0.02, "loss": 1.6344, "step": 4191 }, { "epoch": 0.38808077301394434, "grad_norm": 0.1533203125, "learning_rate": 0.02, "loss": 1.6642, "step": 4192 }, { "epoch": 0.3881733495342244, "grad_norm": 0.1572265625, "learning_rate": 0.02, "loss": 1.5711, "step": 4193 }, { "epoch": 0.38826592605450444, "grad_norm": 0.134765625, "learning_rate": 0.02, "loss": 1.5871, "step": 4194 }, { "epoch": 0.3883585025747845, "grad_norm": 0.1435546875, "learning_rate": 0.02, "loss": 1.5651, "step": 4195 }, { "epoch": 0.3884510790950645, "grad_norm": 0.138671875, "learning_rate": 0.02, "loss": 1.5437, "step": 4196 }, { "epoch": 0.38854365561534454, "grad_norm": 0.1396484375, "learning_rate": 0.02, "loss": 1.6441, "step": 4197 }, { "epoch": 0.3886362321356246, "grad_norm": 0.1474609375, "learning_rate": 0.02, "loss": 1.6114, "step": 4198 }, { "epoch": 0.38872880865590465, "grad_norm": 0.1513671875, "learning_rate": 0.02, "loss": 1.572, "step": 4199 }, { "epoch": 0.3888213851761847, "grad_norm": 0.150390625, "learning_rate": 0.02, "loss": 1.5845, "step": 4200 }, { "epoch": 0.38891396169646475, "grad_norm": 0.1484375, "learning_rate": 0.02, "loss": 1.6258, "step": 4201 }, { "epoch": 0.3890065382167448, "grad_norm": 0.158203125, "learning_rate": 0.02, "loss": 1.583, "step": 4202 }, { "epoch": 0.3890991147370248, "grad_norm": 0.1494140625, "learning_rate": 0.02, "loss": 1.6492, "step": 4203 }, { "epoch": 0.38919169125730485, "grad_norm": 0.158203125, "learning_rate": 0.02, "loss": 1.558, "step": 4204 }, { "epoch": 0.3892842677775849, "grad_norm": 0.1474609375, "learning_rate": 0.02, "loss": 1.7135, "step": 4205 }, { "epoch": 0.38937684429786495, "grad_norm": 0.15234375, "learning_rate": 0.02, "loss": 1.5927, "step": 4206 }, { "epoch": 0.389469420818145, "grad_norm": 0.1455078125, "learning_rate": 0.02, "loss": 1.577, "step": 4207 }, { "epoch": 0.38956199733842506, "grad_norm": 0.134765625, "learning_rate": 0.02, "loss": 1.5624, "step": 4208 }, { "epoch": 0.3896545738587051, "grad_norm": 0.1396484375, "learning_rate": 0.02, "loss": 1.6452, "step": 4209 }, { "epoch": 0.3897471503789851, "grad_norm": 0.13671875, "learning_rate": 0.02, "loss": 1.6489, "step": 4210 }, { "epoch": 0.38983972689926516, "grad_norm": 0.138671875, "learning_rate": 0.02, "loss": 1.5723, "step": 4211 }, { "epoch": 0.3899323034195452, "grad_norm": 0.13671875, "learning_rate": 0.02, "loss": 1.609, "step": 4212 }, { "epoch": 0.39002487993982526, "grad_norm": 0.142578125, "learning_rate": 0.02, "loss": 1.6052, "step": 4213 }, { "epoch": 0.3901174564601053, "grad_norm": 0.140625, "learning_rate": 0.02, "loss": 1.5764, "step": 4214 }, { "epoch": 0.39021003298038537, "grad_norm": 0.140625, "learning_rate": 0.02, "loss": 1.6119, "step": 4215 }, { "epoch": 0.3903026095006654, "grad_norm": 0.1416015625, "learning_rate": 0.02, "loss": 1.5814, "step": 4216 }, { "epoch": 0.3903951860209454, "grad_norm": 0.1552734375, "learning_rate": 0.02, "loss": 1.5487, "step": 4217 }, { "epoch": 0.39048776254122547, "grad_norm": 0.1484375, "learning_rate": 0.02, "loss": 1.5588, "step": 4218 }, { "epoch": 0.3905803390615055, "grad_norm": 0.154296875, "learning_rate": 0.02, "loss": 1.6287, "step": 4219 }, { "epoch": 0.3906729155817856, "grad_norm": 0.140625, "learning_rate": 0.02, "loss": 1.5852, "step": 4220 }, { "epoch": 0.3907654921020656, "grad_norm": 0.158203125, "learning_rate": 0.02, "loss": 1.6163, "step": 4221 }, { "epoch": 0.3908580686223457, "grad_norm": 0.1513671875, "learning_rate": 0.02, "loss": 1.5547, "step": 4222 }, { "epoch": 0.3909506451426257, "grad_norm": 0.154296875, "learning_rate": 0.02, "loss": 1.5735, "step": 4223 }, { "epoch": 0.3910432216629057, "grad_norm": 0.1455078125, "learning_rate": 0.02, "loss": 1.5658, "step": 4224 }, { "epoch": 0.3911357981831858, "grad_norm": 0.138671875, "learning_rate": 0.02, "loss": 1.5676, "step": 4225 }, { "epoch": 0.39122837470346583, "grad_norm": 0.14453125, "learning_rate": 0.02, "loss": 1.6048, "step": 4226 }, { "epoch": 0.3913209512237459, "grad_norm": 0.1455078125, "learning_rate": 0.02, "loss": 1.5977, "step": 4227 }, { "epoch": 0.39141352774402594, "grad_norm": 0.1435546875, "learning_rate": 0.02, "loss": 1.6514, "step": 4228 }, { "epoch": 0.391506104264306, "grad_norm": 0.140625, "learning_rate": 0.02, "loss": 1.5914, "step": 4229 }, { "epoch": 0.391598680784586, "grad_norm": 0.1484375, "learning_rate": 0.02, "loss": 1.5828, "step": 4230 }, { "epoch": 0.39169125730486604, "grad_norm": 0.1552734375, "learning_rate": 0.02, "loss": 1.6153, "step": 4231 }, { "epoch": 0.3917838338251461, "grad_norm": 0.1484375, "learning_rate": 0.02, "loss": 1.5978, "step": 4232 }, { "epoch": 0.39187641034542614, "grad_norm": 0.1455078125, "learning_rate": 0.02, "loss": 1.588, "step": 4233 }, { "epoch": 0.3919689868657062, "grad_norm": 0.1435546875, "learning_rate": 0.02, "loss": 1.5525, "step": 4234 }, { "epoch": 0.39206156338598624, "grad_norm": 0.1513671875, "learning_rate": 0.02, "loss": 1.6438, "step": 4235 }, { "epoch": 0.3921541399062663, "grad_norm": 0.1572265625, "learning_rate": 0.02, "loss": 1.6077, "step": 4236 }, { "epoch": 0.3922467164265463, "grad_norm": 0.15234375, "learning_rate": 0.02, "loss": 1.6195, "step": 4237 }, { "epoch": 0.39233929294682635, "grad_norm": 0.1416015625, "learning_rate": 0.02, "loss": 1.6483, "step": 4238 }, { "epoch": 0.3924318694671064, "grad_norm": 0.1513671875, "learning_rate": 0.02, "loss": 1.6536, "step": 4239 }, { "epoch": 0.39252444598738645, "grad_norm": 0.140625, "learning_rate": 0.02, "loss": 1.6264, "step": 4240 }, { "epoch": 0.3926170225076665, "grad_norm": 0.142578125, "learning_rate": 0.02, "loss": 1.6642, "step": 4241 }, { "epoch": 0.39270959902794655, "grad_norm": 0.13671875, "learning_rate": 0.02, "loss": 1.5683, "step": 4242 }, { "epoch": 0.3928021755482266, "grad_norm": 0.140625, "learning_rate": 0.02, "loss": 1.5813, "step": 4243 }, { "epoch": 0.3928947520685066, "grad_norm": 0.1376953125, "learning_rate": 0.02, "loss": 1.5724, "step": 4244 }, { "epoch": 0.39298732858878666, "grad_norm": 0.146484375, "learning_rate": 0.02, "loss": 1.594, "step": 4245 }, { "epoch": 0.3930799051090667, "grad_norm": 0.146484375, "learning_rate": 0.02, "loss": 1.5493, "step": 4246 }, { "epoch": 0.39317248162934676, "grad_norm": 0.1474609375, "learning_rate": 0.02, "loss": 1.67, "step": 4247 }, { "epoch": 0.3932650581496268, "grad_norm": 0.1533203125, "learning_rate": 0.02, "loss": 1.6045, "step": 4248 }, { "epoch": 0.39335763466990686, "grad_norm": 0.150390625, "learning_rate": 0.02, "loss": 1.6172, "step": 4249 }, { "epoch": 0.3934502111901869, "grad_norm": 0.1455078125, "learning_rate": 0.02, "loss": 1.5652, "step": 4250 }, { "epoch": 0.3935427877104669, "grad_norm": 0.142578125, "learning_rate": 0.02, "loss": 1.582, "step": 4251 }, { "epoch": 0.39363536423074696, "grad_norm": 0.1533203125, "learning_rate": 0.02, "loss": 1.6105, "step": 4252 }, { "epoch": 0.393727940751027, "grad_norm": 0.1435546875, "learning_rate": 0.02, "loss": 1.5323, "step": 4253 }, { "epoch": 0.39382051727130707, "grad_norm": 0.1513671875, "learning_rate": 0.02, "loss": 1.6132, "step": 4254 }, { "epoch": 0.3939130937915871, "grad_norm": 0.1494140625, "learning_rate": 0.02, "loss": 1.5509, "step": 4255 }, { "epoch": 0.3940056703118672, "grad_norm": 0.154296875, "learning_rate": 0.02, "loss": 1.5851, "step": 4256 }, { "epoch": 0.39409824683214717, "grad_norm": 0.150390625, "learning_rate": 0.02, "loss": 1.5867, "step": 4257 }, { "epoch": 0.3941908233524272, "grad_norm": 0.1435546875, "learning_rate": 0.02, "loss": 1.6072, "step": 4258 }, { "epoch": 0.3942833998727073, "grad_norm": 0.1494140625, "learning_rate": 0.02, "loss": 1.5707, "step": 4259 }, { "epoch": 0.3943759763929873, "grad_norm": 0.1435546875, "learning_rate": 0.02, "loss": 1.6016, "step": 4260 }, { "epoch": 0.3944685529132674, "grad_norm": 0.1416015625, "learning_rate": 0.02, "loss": 1.5656, "step": 4261 }, { "epoch": 0.39456112943354743, "grad_norm": 0.138671875, "learning_rate": 0.02, "loss": 1.5968, "step": 4262 }, { "epoch": 0.3946537059538275, "grad_norm": 0.1474609375, "learning_rate": 0.02, "loss": 1.6659, "step": 4263 }, { "epoch": 0.3947462824741075, "grad_norm": 0.14453125, "learning_rate": 0.02, "loss": 1.6445, "step": 4264 }, { "epoch": 0.39483885899438753, "grad_norm": 0.1455078125, "learning_rate": 0.02, "loss": 1.6074, "step": 4265 }, { "epoch": 0.3949314355146676, "grad_norm": 0.1484375, "learning_rate": 0.02, "loss": 1.6087, "step": 4266 }, { "epoch": 0.39502401203494764, "grad_norm": 0.1552734375, "learning_rate": 0.02, "loss": 1.5641, "step": 4267 }, { "epoch": 0.3951165885552277, "grad_norm": 0.1455078125, "learning_rate": 0.02, "loss": 1.6064, "step": 4268 }, { "epoch": 0.39520916507550774, "grad_norm": 0.1494140625, "learning_rate": 0.02, "loss": 1.6465, "step": 4269 }, { "epoch": 0.3953017415957878, "grad_norm": 0.1455078125, "learning_rate": 0.02, "loss": 1.5649, "step": 4270 }, { "epoch": 0.3953943181160678, "grad_norm": 0.1416015625, "learning_rate": 0.02, "loss": 1.5933, "step": 4271 }, { "epoch": 0.39548689463634784, "grad_norm": 0.1513671875, "learning_rate": 0.02, "loss": 1.6468, "step": 4272 }, { "epoch": 0.3955794711566279, "grad_norm": 0.1474609375, "learning_rate": 0.02, "loss": 1.6289, "step": 4273 }, { "epoch": 0.39567204767690795, "grad_norm": 0.146484375, "learning_rate": 0.02, "loss": 1.6267, "step": 4274 }, { "epoch": 0.395764624197188, "grad_norm": 0.1416015625, "learning_rate": 0.02, "loss": 1.5651, "step": 4275 }, { "epoch": 0.39585720071746805, "grad_norm": 0.1474609375, "learning_rate": 0.02, "loss": 1.5928, "step": 4276 }, { "epoch": 0.3959497772377481, "grad_norm": 0.15625, "learning_rate": 0.02, "loss": 1.533, "step": 4277 }, { "epoch": 0.3960423537580281, "grad_norm": 0.1513671875, "learning_rate": 0.02, "loss": 1.5808, "step": 4278 }, { "epoch": 0.39613493027830815, "grad_norm": 0.1474609375, "learning_rate": 0.02, "loss": 1.6344, "step": 4279 }, { "epoch": 0.3962275067985882, "grad_norm": 0.1552734375, "learning_rate": 0.02, "loss": 1.619, "step": 4280 }, { "epoch": 0.39632008331886825, "grad_norm": 0.150390625, "learning_rate": 0.02, "loss": 1.538, "step": 4281 }, { "epoch": 0.3964126598391483, "grad_norm": 0.1513671875, "learning_rate": 0.02, "loss": 1.5859, "step": 4282 }, { "epoch": 0.39650523635942836, "grad_norm": 0.1572265625, "learning_rate": 0.02, "loss": 1.6614, "step": 4283 }, { "epoch": 0.3965978128797084, "grad_norm": 0.146484375, "learning_rate": 0.02, "loss": 1.5336, "step": 4284 }, { "epoch": 0.3966903893999884, "grad_norm": 0.14453125, "learning_rate": 0.02, "loss": 1.6162, "step": 4285 }, { "epoch": 0.39678296592026846, "grad_norm": 0.14453125, "learning_rate": 0.02, "loss": 1.576, "step": 4286 }, { "epoch": 0.3968755424405485, "grad_norm": 0.1455078125, "learning_rate": 0.02, "loss": 1.6403, "step": 4287 }, { "epoch": 0.39696811896082856, "grad_norm": 0.1484375, "learning_rate": 0.02, "loss": 1.6411, "step": 4288 }, { "epoch": 0.3970606954811086, "grad_norm": 0.1484375, "learning_rate": 0.02, "loss": 1.5927, "step": 4289 }, { "epoch": 0.39715327200138867, "grad_norm": 0.1455078125, "learning_rate": 0.02, "loss": 1.5979, "step": 4290 }, { "epoch": 0.39724584852166867, "grad_norm": 0.142578125, "learning_rate": 0.02, "loss": 1.5287, "step": 4291 }, { "epoch": 0.3973384250419487, "grad_norm": 0.1435546875, "learning_rate": 0.02, "loss": 1.5715, "step": 4292 }, { "epoch": 0.39743100156222877, "grad_norm": 0.1484375, "learning_rate": 0.02, "loss": 1.6159, "step": 4293 }, { "epoch": 0.3975235780825088, "grad_norm": 0.1455078125, "learning_rate": 0.02, "loss": 1.655, "step": 4294 }, { "epoch": 0.3976161546027889, "grad_norm": 0.1572265625, "learning_rate": 0.02, "loss": 1.6396, "step": 4295 }, { "epoch": 0.3977087311230689, "grad_norm": 0.138671875, "learning_rate": 0.02, "loss": 1.6249, "step": 4296 }, { "epoch": 0.397801307643349, "grad_norm": 0.1376953125, "learning_rate": 0.02, "loss": 1.5371, "step": 4297 }, { "epoch": 0.397893884163629, "grad_norm": 0.1533203125, "learning_rate": 0.02, "loss": 1.5571, "step": 4298 }, { "epoch": 0.397986460683909, "grad_norm": 0.1533203125, "learning_rate": 0.02, "loss": 1.615, "step": 4299 }, { "epoch": 0.3980790372041891, "grad_norm": 0.154296875, "learning_rate": 0.02, "loss": 1.6152, "step": 4300 }, { "epoch": 0.39817161372446913, "grad_norm": 0.15234375, "learning_rate": 0.02, "loss": 1.5848, "step": 4301 }, { "epoch": 0.3982641902447492, "grad_norm": 0.13671875, "learning_rate": 0.02, "loss": 1.5751, "step": 4302 }, { "epoch": 0.39835676676502924, "grad_norm": 0.146484375, "learning_rate": 0.02, "loss": 1.6026, "step": 4303 }, { "epoch": 0.3984493432853093, "grad_norm": 0.142578125, "learning_rate": 0.02, "loss": 1.6335, "step": 4304 }, { "epoch": 0.3985419198055893, "grad_norm": 0.138671875, "learning_rate": 0.02, "loss": 1.5008, "step": 4305 }, { "epoch": 0.39863449632586934, "grad_norm": 0.1416015625, "learning_rate": 0.02, "loss": 1.6145, "step": 4306 }, { "epoch": 0.3987270728461494, "grad_norm": 0.1416015625, "learning_rate": 0.02, "loss": 1.5593, "step": 4307 }, { "epoch": 0.39881964936642944, "grad_norm": 0.138671875, "learning_rate": 0.02, "loss": 1.5442, "step": 4308 }, { "epoch": 0.3989122258867095, "grad_norm": 0.1416015625, "learning_rate": 0.02, "loss": 1.5972, "step": 4309 }, { "epoch": 0.39900480240698954, "grad_norm": 0.1484375, "learning_rate": 0.02, "loss": 1.5826, "step": 4310 }, { "epoch": 0.3990973789272696, "grad_norm": 0.150390625, "learning_rate": 0.02, "loss": 1.6189, "step": 4311 }, { "epoch": 0.3991899554475496, "grad_norm": 0.1513671875, "learning_rate": 0.02, "loss": 1.5986, "step": 4312 }, { "epoch": 0.39928253196782965, "grad_norm": 0.1513671875, "learning_rate": 0.02, "loss": 1.5762, "step": 4313 }, { "epoch": 0.3993751084881097, "grad_norm": 0.16015625, "learning_rate": 0.02, "loss": 1.63, "step": 4314 }, { "epoch": 0.39946768500838975, "grad_norm": 0.1416015625, "learning_rate": 0.02, "loss": 1.6487, "step": 4315 }, { "epoch": 0.3995602615286698, "grad_norm": 0.1435546875, "learning_rate": 0.02, "loss": 1.578, "step": 4316 }, { "epoch": 0.39965283804894985, "grad_norm": 0.1416015625, "learning_rate": 0.02, "loss": 1.6128, "step": 4317 }, { "epoch": 0.3997454145692299, "grad_norm": 0.1484375, "learning_rate": 0.02, "loss": 1.5804, "step": 4318 }, { "epoch": 0.3998379910895099, "grad_norm": 0.138671875, "learning_rate": 0.02, "loss": 1.5669, "step": 4319 }, { "epoch": 0.39993056760978996, "grad_norm": 0.1435546875, "learning_rate": 0.02, "loss": 1.5968, "step": 4320 }, { "epoch": 0.40002314413007, "grad_norm": 0.14453125, "learning_rate": 0.02, "loss": 1.6179, "step": 4321 }, { "epoch": 0.40011572065035006, "grad_norm": 0.142578125, "learning_rate": 0.02, "loss": 1.5544, "step": 4322 }, { "epoch": 0.4002082971706301, "grad_norm": 0.14453125, "learning_rate": 0.02, "loss": 1.5982, "step": 4323 }, { "epoch": 0.40030087369091016, "grad_norm": 0.14453125, "learning_rate": 0.02, "loss": 1.6672, "step": 4324 }, { "epoch": 0.40039345021119016, "grad_norm": 0.1474609375, "learning_rate": 0.02, "loss": 1.591, "step": 4325 }, { "epoch": 0.4004860267314702, "grad_norm": 0.146484375, "learning_rate": 0.02, "loss": 1.6131, "step": 4326 }, { "epoch": 0.40057860325175026, "grad_norm": 0.15234375, "learning_rate": 0.02, "loss": 1.5896, "step": 4327 }, { "epoch": 0.4006711797720303, "grad_norm": 0.1513671875, "learning_rate": 0.02, "loss": 1.6251, "step": 4328 }, { "epoch": 0.40076375629231037, "grad_norm": 0.15625, "learning_rate": 0.02, "loss": 1.6239, "step": 4329 }, { "epoch": 0.4008563328125904, "grad_norm": 0.1435546875, "learning_rate": 0.02, "loss": 1.5361, "step": 4330 }, { "epoch": 0.4009489093328705, "grad_norm": 0.142578125, "learning_rate": 0.02, "loss": 1.5812, "step": 4331 }, { "epoch": 0.40104148585315047, "grad_norm": 0.14453125, "learning_rate": 0.02, "loss": 1.543, "step": 4332 }, { "epoch": 0.4011340623734305, "grad_norm": 0.1533203125, "learning_rate": 0.02, "loss": 1.5317, "step": 4333 }, { "epoch": 0.4012266388937106, "grad_norm": 0.1455078125, "learning_rate": 0.02, "loss": 1.5782, "step": 4334 }, { "epoch": 0.4013192154139906, "grad_norm": 0.142578125, "learning_rate": 0.02, "loss": 1.509, "step": 4335 }, { "epoch": 0.4014117919342707, "grad_norm": 0.1455078125, "learning_rate": 0.02, "loss": 1.6284, "step": 4336 }, { "epoch": 0.40150436845455073, "grad_norm": 0.146484375, "learning_rate": 0.02, "loss": 1.5663, "step": 4337 }, { "epoch": 0.4015969449748308, "grad_norm": 0.1455078125, "learning_rate": 0.02, "loss": 1.5685, "step": 4338 }, { "epoch": 0.4016895214951108, "grad_norm": 0.1552734375, "learning_rate": 0.02, "loss": 1.6109, "step": 4339 }, { "epoch": 0.40178209801539083, "grad_norm": 0.1552734375, "learning_rate": 0.02, "loss": 1.5703, "step": 4340 }, { "epoch": 0.4018746745356709, "grad_norm": 0.1484375, "learning_rate": 0.02, "loss": 1.5928, "step": 4341 }, { "epoch": 0.40196725105595094, "grad_norm": 0.15234375, "learning_rate": 0.02, "loss": 1.5843, "step": 4342 }, { "epoch": 0.402059827576231, "grad_norm": 0.1376953125, "learning_rate": 0.02, "loss": 1.5141, "step": 4343 }, { "epoch": 0.40215240409651104, "grad_norm": 0.146484375, "learning_rate": 0.02, "loss": 1.5929, "step": 4344 }, { "epoch": 0.4022449806167911, "grad_norm": 0.1396484375, "learning_rate": 0.02, "loss": 1.586, "step": 4345 }, { "epoch": 0.4023375571370711, "grad_norm": 0.1484375, "learning_rate": 0.02, "loss": 1.5676, "step": 4346 }, { "epoch": 0.40243013365735114, "grad_norm": 0.1357421875, "learning_rate": 0.02, "loss": 1.5804, "step": 4347 }, { "epoch": 0.4025227101776312, "grad_norm": 0.1494140625, "learning_rate": 0.02, "loss": 1.6231, "step": 4348 }, { "epoch": 0.40261528669791125, "grad_norm": 0.134765625, "learning_rate": 0.02, "loss": 1.6123, "step": 4349 }, { "epoch": 0.4027078632181913, "grad_norm": 0.146484375, "learning_rate": 0.02, "loss": 1.6297, "step": 4350 }, { "epoch": 0.40280043973847135, "grad_norm": 0.1416015625, "learning_rate": 0.02, "loss": 1.5915, "step": 4351 }, { "epoch": 0.4028930162587514, "grad_norm": 0.1376953125, "learning_rate": 0.02, "loss": 1.557, "step": 4352 }, { "epoch": 0.4029855927790314, "grad_norm": 0.1416015625, "learning_rate": 0.02, "loss": 1.5881, "step": 4353 }, { "epoch": 0.40307816929931145, "grad_norm": 0.142578125, "learning_rate": 0.02, "loss": 1.5967, "step": 4354 }, { "epoch": 0.4031707458195915, "grad_norm": 0.1552734375, "learning_rate": 0.02, "loss": 1.6133, "step": 4355 }, { "epoch": 0.40326332233987155, "grad_norm": 0.1416015625, "learning_rate": 0.02, "loss": 1.5454, "step": 4356 }, { "epoch": 0.4033558988601516, "grad_norm": 0.15234375, "learning_rate": 0.02, "loss": 1.6063, "step": 4357 }, { "epoch": 0.40344847538043166, "grad_norm": 0.15625, "learning_rate": 0.02, "loss": 1.6164, "step": 4358 }, { "epoch": 0.40354105190071166, "grad_norm": 0.1337890625, "learning_rate": 0.02, "loss": 1.5722, "step": 4359 }, { "epoch": 0.4036336284209917, "grad_norm": 0.150390625, "learning_rate": 0.02, "loss": 1.6426, "step": 4360 }, { "epoch": 0.40372620494127176, "grad_norm": 0.1435546875, "learning_rate": 0.02, "loss": 1.6325, "step": 4361 }, { "epoch": 0.4038187814615518, "grad_norm": 0.14453125, "learning_rate": 0.02, "loss": 1.6269, "step": 4362 }, { "epoch": 0.40391135798183186, "grad_norm": 0.154296875, "learning_rate": 0.02, "loss": 1.6531, "step": 4363 }, { "epoch": 0.4040039345021119, "grad_norm": 0.15625, "learning_rate": 0.02, "loss": 1.5789, "step": 4364 }, { "epoch": 0.40409651102239197, "grad_norm": 0.1513671875, "learning_rate": 0.02, "loss": 1.5905, "step": 4365 }, { "epoch": 0.40418908754267197, "grad_norm": 0.15625, "learning_rate": 0.02, "loss": 1.6093, "step": 4366 }, { "epoch": 0.404281664062952, "grad_norm": 0.1474609375, "learning_rate": 0.02, "loss": 1.5237, "step": 4367 }, { "epoch": 0.40437424058323207, "grad_norm": 0.1640625, "learning_rate": 0.02, "loss": 1.6448, "step": 4368 }, { "epoch": 0.4044668171035121, "grad_norm": 0.15234375, "learning_rate": 0.02, "loss": 1.5699, "step": 4369 }, { "epoch": 0.4045593936237922, "grad_norm": 0.150390625, "learning_rate": 0.02, "loss": 1.5926, "step": 4370 }, { "epoch": 0.4046519701440722, "grad_norm": 0.1484375, "learning_rate": 0.02, "loss": 1.5787, "step": 4371 }, { "epoch": 0.4047445466643523, "grad_norm": 0.1435546875, "learning_rate": 0.02, "loss": 1.611, "step": 4372 }, { "epoch": 0.4048371231846323, "grad_norm": 0.1455078125, "learning_rate": 0.02, "loss": 1.5798, "step": 4373 }, { "epoch": 0.4049296997049123, "grad_norm": 0.1455078125, "learning_rate": 0.02, "loss": 1.5685, "step": 4374 }, { "epoch": 0.4050222762251924, "grad_norm": 0.16015625, "learning_rate": 0.02, "loss": 1.5967, "step": 4375 }, { "epoch": 0.40511485274547243, "grad_norm": 0.146484375, "learning_rate": 0.02, "loss": 1.6312, "step": 4376 }, { "epoch": 0.4052074292657525, "grad_norm": 0.1552734375, "learning_rate": 0.02, "loss": 1.5789, "step": 4377 }, { "epoch": 0.40530000578603254, "grad_norm": 0.1474609375, "learning_rate": 0.02, "loss": 1.5515, "step": 4378 }, { "epoch": 0.4053925823063126, "grad_norm": 0.1474609375, "learning_rate": 0.02, "loss": 1.5905, "step": 4379 }, { "epoch": 0.4054851588265926, "grad_norm": 0.1455078125, "learning_rate": 0.02, "loss": 1.6472, "step": 4380 }, { "epoch": 0.40557773534687264, "grad_norm": 0.146484375, "learning_rate": 0.02, "loss": 1.6021, "step": 4381 }, { "epoch": 0.4056703118671527, "grad_norm": 0.1376953125, "learning_rate": 0.02, "loss": 1.6058, "step": 4382 }, { "epoch": 0.40576288838743274, "grad_norm": 0.150390625, "learning_rate": 0.02, "loss": 1.6217, "step": 4383 }, { "epoch": 0.4058554649077128, "grad_norm": 0.138671875, "learning_rate": 0.02, "loss": 1.5953, "step": 4384 }, { "epoch": 0.40594804142799285, "grad_norm": 0.1455078125, "learning_rate": 0.02, "loss": 1.5417, "step": 4385 }, { "epoch": 0.40604061794827284, "grad_norm": 0.138671875, "learning_rate": 0.02, "loss": 1.5838, "step": 4386 }, { "epoch": 0.4061331944685529, "grad_norm": 0.1435546875, "learning_rate": 0.02, "loss": 1.5173, "step": 4387 }, { "epoch": 0.40622577098883295, "grad_norm": 0.1455078125, "learning_rate": 0.02, "loss": 1.5733, "step": 4388 }, { "epoch": 0.406318347509113, "grad_norm": 0.1513671875, "learning_rate": 0.02, "loss": 1.6026, "step": 4389 }, { "epoch": 0.40641092402939305, "grad_norm": 0.1494140625, "learning_rate": 0.02, "loss": 1.5971, "step": 4390 }, { "epoch": 0.4065035005496731, "grad_norm": 0.15625, "learning_rate": 0.02, "loss": 1.6225, "step": 4391 }, { "epoch": 0.40659607706995315, "grad_norm": 0.140625, "learning_rate": 0.02, "loss": 1.5724, "step": 4392 }, { "epoch": 0.40668865359023315, "grad_norm": 0.140625, "learning_rate": 0.02, "loss": 1.6342, "step": 4393 }, { "epoch": 0.4067812301105132, "grad_norm": 0.1337890625, "learning_rate": 0.02, "loss": 1.591, "step": 4394 }, { "epoch": 0.40687380663079326, "grad_norm": 0.1494140625, "learning_rate": 0.02, "loss": 1.6436, "step": 4395 }, { "epoch": 0.4069663831510733, "grad_norm": 0.140625, "learning_rate": 0.02, "loss": 1.5401, "step": 4396 }, { "epoch": 0.40705895967135336, "grad_norm": 0.1376953125, "learning_rate": 0.02, "loss": 1.5578, "step": 4397 }, { "epoch": 0.4071515361916334, "grad_norm": 0.142578125, "learning_rate": 0.02, "loss": 1.6371, "step": 4398 }, { "epoch": 0.40724411271191346, "grad_norm": 0.1337890625, "learning_rate": 0.02, "loss": 1.585, "step": 4399 }, { "epoch": 0.40733668923219346, "grad_norm": 0.15234375, "learning_rate": 0.02, "loss": 1.5539, "step": 4400 }, { "epoch": 0.4074292657524735, "grad_norm": 0.150390625, "learning_rate": 0.02, "loss": 1.5983, "step": 4401 }, { "epoch": 0.40752184227275356, "grad_norm": 0.1591796875, "learning_rate": 0.02, "loss": 1.657, "step": 4402 }, { "epoch": 0.4076144187930336, "grad_norm": 0.1484375, "learning_rate": 0.02, "loss": 1.5813, "step": 4403 }, { "epoch": 0.40770699531331367, "grad_norm": 0.1484375, "learning_rate": 0.02, "loss": 1.5999, "step": 4404 }, { "epoch": 0.4077995718335937, "grad_norm": 0.1494140625, "learning_rate": 0.02, "loss": 1.6276, "step": 4405 }, { "epoch": 0.4078921483538738, "grad_norm": 0.150390625, "learning_rate": 0.02, "loss": 1.6393, "step": 4406 }, { "epoch": 0.40798472487415377, "grad_norm": 0.15625, "learning_rate": 0.02, "loss": 1.5902, "step": 4407 }, { "epoch": 0.4080773013944338, "grad_norm": 0.1572265625, "learning_rate": 0.02, "loss": 1.6684, "step": 4408 }, { "epoch": 0.4081698779147139, "grad_norm": 0.14453125, "learning_rate": 0.02, "loss": 1.5642, "step": 4409 }, { "epoch": 0.4082624544349939, "grad_norm": 0.1455078125, "learning_rate": 0.02, "loss": 1.6028, "step": 4410 }, { "epoch": 0.408355030955274, "grad_norm": 0.1494140625, "learning_rate": 0.02, "loss": 1.6342, "step": 4411 }, { "epoch": 0.40844760747555403, "grad_norm": 0.154296875, "learning_rate": 0.02, "loss": 1.5859, "step": 4412 }, { "epoch": 0.4085401839958341, "grad_norm": 0.1533203125, "learning_rate": 0.02, "loss": 1.6234, "step": 4413 }, { "epoch": 0.4086327605161141, "grad_norm": 0.1357421875, "learning_rate": 0.02, "loss": 1.6188, "step": 4414 }, { "epoch": 0.40872533703639413, "grad_norm": 0.146484375, "learning_rate": 0.02, "loss": 1.6022, "step": 4415 }, { "epoch": 0.4088179135566742, "grad_norm": 0.1484375, "learning_rate": 0.02, "loss": 1.5897, "step": 4416 }, { "epoch": 0.40891049007695424, "grad_norm": 0.15625, "learning_rate": 0.02, "loss": 1.6064, "step": 4417 }, { "epoch": 0.4090030665972343, "grad_norm": 0.1494140625, "learning_rate": 0.02, "loss": 1.5657, "step": 4418 }, { "epoch": 0.40909564311751434, "grad_norm": 0.1513671875, "learning_rate": 0.02, "loss": 1.5803, "step": 4419 }, { "epoch": 0.40918821963779434, "grad_norm": 0.130859375, "learning_rate": 0.02, "loss": 1.5977, "step": 4420 }, { "epoch": 0.4092807961580744, "grad_norm": 0.1416015625, "learning_rate": 0.02, "loss": 1.6341, "step": 4421 }, { "epoch": 0.40937337267835444, "grad_norm": 0.1337890625, "learning_rate": 0.02, "loss": 1.5287, "step": 4422 }, { "epoch": 0.4094659491986345, "grad_norm": 0.1533203125, "learning_rate": 0.02, "loss": 1.6646, "step": 4423 }, { "epoch": 0.40955852571891455, "grad_norm": 0.1474609375, "learning_rate": 0.02, "loss": 1.5776, "step": 4424 }, { "epoch": 0.4096511022391946, "grad_norm": 0.1513671875, "learning_rate": 0.02, "loss": 1.609, "step": 4425 }, { "epoch": 0.40974367875947465, "grad_norm": 0.15625, "learning_rate": 0.02, "loss": 1.5847, "step": 4426 }, { "epoch": 0.40983625527975465, "grad_norm": 0.158203125, "learning_rate": 0.02, "loss": 1.6153, "step": 4427 }, { "epoch": 0.4099288318000347, "grad_norm": 0.1484375, "learning_rate": 0.02, "loss": 1.6572, "step": 4428 }, { "epoch": 0.41002140832031475, "grad_norm": 0.14453125, "learning_rate": 0.02, "loss": 1.5959, "step": 4429 }, { "epoch": 0.4101139848405948, "grad_norm": 0.1494140625, "learning_rate": 0.02, "loss": 1.629, "step": 4430 }, { "epoch": 0.41020656136087486, "grad_norm": 0.1396484375, "learning_rate": 0.02, "loss": 1.5934, "step": 4431 }, { "epoch": 0.4102991378811549, "grad_norm": 0.1474609375, "learning_rate": 0.02, "loss": 1.6031, "step": 4432 }, { "epoch": 0.41039171440143496, "grad_norm": 0.142578125, "learning_rate": 0.02, "loss": 1.6148, "step": 4433 }, { "epoch": 0.41048429092171496, "grad_norm": 0.146484375, "learning_rate": 0.02, "loss": 1.5342, "step": 4434 }, { "epoch": 0.410576867441995, "grad_norm": 0.142578125, "learning_rate": 0.02, "loss": 1.593, "step": 4435 }, { "epoch": 0.41066944396227506, "grad_norm": 0.1396484375, "learning_rate": 0.02, "loss": 1.5406, "step": 4436 }, { "epoch": 0.4107620204825551, "grad_norm": 0.14453125, "learning_rate": 0.02, "loss": 1.5595, "step": 4437 }, { "epoch": 0.41085459700283516, "grad_norm": 0.1513671875, "learning_rate": 0.02, "loss": 1.5677, "step": 4438 }, { "epoch": 0.4109471735231152, "grad_norm": 0.1474609375, "learning_rate": 0.02, "loss": 1.578, "step": 4439 }, { "epoch": 0.41103975004339527, "grad_norm": 0.154296875, "learning_rate": 0.02, "loss": 1.5751, "step": 4440 }, { "epoch": 0.41113232656367527, "grad_norm": 0.1494140625, "learning_rate": 0.02, "loss": 1.5438, "step": 4441 }, { "epoch": 0.4112249030839553, "grad_norm": 0.15234375, "learning_rate": 0.02, "loss": 1.5558, "step": 4442 }, { "epoch": 0.41131747960423537, "grad_norm": 0.154296875, "learning_rate": 0.02, "loss": 1.5849, "step": 4443 }, { "epoch": 0.4114100561245154, "grad_norm": 0.146484375, "learning_rate": 0.02, "loss": 1.6392, "step": 4444 }, { "epoch": 0.4115026326447955, "grad_norm": 0.1455078125, "learning_rate": 0.02, "loss": 1.5588, "step": 4445 }, { "epoch": 0.4115952091650755, "grad_norm": 0.15234375, "learning_rate": 0.02, "loss": 1.6405, "step": 4446 }, { "epoch": 0.4116877856853556, "grad_norm": 0.142578125, "learning_rate": 0.02, "loss": 1.6484, "step": 4447 }, { "epoch": 0.4117803622056356, "grad_norm": 0.1494140625, "learning_rate": 0.02, "loss": 1.585, "step": 4448 }, { "epoch": 0.4118729387259156, "grad_norm": 0.1416015625, "learning_rate": 0.02, "loss": 1.5502, "step": 4449 }, { "epoch": 0.4119655152461957, "grad_norm": 0.158203125, "learning_rate": 0.02, "loss": 1.591, "step": 4450 }, { "epoch": 0.41205809176647573, "grad_norm": 0.1494140625, "learning_rate": 0.02, "loss": 1.5943, "step": 4451 }, { "epoch": 0.4121506682867558, "grad_norm": 0.1552734375, "learning_rate": 0.02, "loss": 1.6667, "step": 4452 }, { "epoch": 0.41224324480703584, "grad_norm": 0.1533203125, "learning_rate": 0.02, "loss": 1.5859, "step": 4453 }, { "epoch": 0.41233582132731583, "grad_norm": 0.15234375, "learning_rate": 0.02, "loss": 1.5311, "step": 4454 }, { "epoch": 0.4124283978475959, "grad_norm": 0.1416015625, "learning_rate": 0.02, "loss": 1.5879, "step": 4455 }, { "epoch": 0.41252097436787594, "grad_norm": 0.14453125, "learning_rate": 0.02, "loss": 1.6047, "step": 4456 }, { "epoch": 0.412613550888156, "grad_norm": 0.142578125, "learning_rate": 0.02, "loss": 1.607, "step": 4457 }, { "epoch": 0.41270612740843604, "grad_norm": 0.1533203125, "learning_rate": 0.02, "loss": 1.6122, "step": 4458 }, { "epoch": 0.4127987039287161, "grad_norm": 0.154296875, "learning_rate": 0.02, "loss": 1.6209, "step": 4459 }, { "epoch": 0.41289128044899615, "grad_norm": 0.1572265625, "learning_rate": 0.02, "loss": 1.6432, "step": 4460 }, { "epoch": 0.41298385696927614, "grad_norm": 0.1376953125, "learning_rate": 0.02, "loss": 1.5889, "step": 4461 }, { "epoch": 0.4130764334895562, "grad_norm": 0.1552734375, "learning_rate": 0.02, "loss": 1.5624, "step": 4462 }, { "epoch": 0.41316901000983625, "grad_norm": 0.142578125, "learning_rate": 0.02, "loss": 1.5778, "step": 4463 }, { "epoch": 0.4132615865301163, "grad_norm": 0.14453125, "learning_rate": 0.02, "loss": 1.6137, "step": 4464 }, { "epoch": 0.41335416305039635, "grad_norm": 0.1396484375, "learning_rate": 0.02, "loss": 1.6035, "step": 4465 }, { "epoch": 0.4134467395706764, "grad_norm": 0.140625, "learning_rate": 0.02, "loss": 1.5849, "step": 4466 }, { "epoch": 0.41353931609095645, "grad_norm": 0.1357421875, "learning_rate": 0.02, "loss": 1.5956, "step": 4467 }, { "epoch": 0.41363189261123645, "grad_norm": 0.1416015625, "learning_rate": 0.02, "loss": 1.6086, "step": 4468 }, { "epoch": 0.4137244691315165, "grad_norm": 0.1357421875, "learning_rate": 0.02, "loss": 1.5476, "step": 4469 }, { "epoch": 0.41381704565179656, "grad_norm": 0.1396484375, "learning_rate": 0.02, "loss": 1.6172, "step": 4470 }, { "epoch": 0.4139096221720766, "grad_norm": 0.15234375, "learning_rate": 0.02, "loss": 1.6261, "step": 4471 }, { "epoch": 0.41400219869235666, "grad_norm": 0.1455078125, "learning_rate": 0.02, "loss": 1.6049, "step": 4472 }, { "epoch": 0.4140947752126367, "grad_norm": 0.14453125, "learning_rate": 0.02, "loss": 1.6064, "step": 4473 }, { "epoch": 0.41418735173291676, "grad_norm": 0.1455078125, "learning_rate": 0.02, "loss": 1.6408, "step": 4474 }, { "epoch": 0.41427992825319676, "grad_norm": 0.158203125, "learning_rate": 0.02, "loss": 1.5912, "step": 4475 }, { "epoch": 0.4143725047734768, "grad_norm": 0.146484375, "learning_rate": 0.02, "loss": 1.5729, "step": 4476 }, { "epoch": 0.41446508129375687, "grad_norm": 0.14453125, "learning_rate": 0.02, "loss": 1.6144, "step": 4477 }, { "epoch": 0.4145576578140369, "grad_norm": 0.154296875, "learning_rate": 0.02, "loss": 1.5756, "step": 4478 }, { "epoch": 0.41465023433431697, "grad_norm": 0.1435546875, "learning_rate": 0.02, "loss": 1.5629, "step": 4479 }, { "epoch": 0.414742810854597, "grad_norm": 0.1484375, "learning_rate": 0.02, "loss": 1.6255, "step": 4480 }, { "epoch": 0.4148353873748771, "grad_norm": 0.15625, "learning_rate": 0.02, "loss": 1.6016, "step": 4481 }, { "epoch": 0.41492796389515707, "grad_norm": 0.1533203125, "learning_rate": 0.02, "loss": 1.6495, "step": 4482 }, { "epoch": 0.4150205404154371, "grad_norm": 0.1484375, "learning_rate": 0.02, "loss": 1.5437, "step": 4483 }, { "epoch": 0.4151131169357172, "grad_norm": 0.1357421875, "learning_rate": 0.02, "loss": 1.6318, "step": 4484 }, { "epoch": 0.4152056934559972, "grad_norm": 0.1474609375, "learning_rate": 0.02, "loss": 1.6394, "step": 4485 }, { "epoch": 0.4152982699762773, "grad_norm": 0.1396484375, "learning_rate": 0.02, "loss": 1.571, "step": 4486 }, { "epoch": 0.41539084649655733, "grad_norm": 0.15625, "learning_rate": 0.02, "loss": 1.6369, "step": 4487 }, { "epoch": 0.4154834230168373, "grad_norm": 0.1533203125, "learning_rate": 0.02, "loss": 1.5549, "step": 4488 }, { "epoch": 0.4155759995371174, "grad_norm": 0.1533203125, "learning_rate": 0.02, "loss": 1.5776, "step": 4489 }, { "epoch": 0.41566857605739743, "grad_norm": 0.1435546875, "learning_rate": 0.02, "loss": 1.5503, "step": 4490 }, { "epoch": 0.4157611525776775, "grad_norm": 0.158203125, "learning_rate": 0.02, "loss": 1.6125, "step": 4491 }, { "epoch": 0.41585372909795754, "grad_norm": 0.1533203125, "learning_rate": 0.02, "loss": 1.5797, "step": 4492 }, { "epoch": 0.4159463056182376, "grad_norm": 0.1513671875, "learning_rate": 0.02, "loss": 1.5436, "step": 4493 }, { "epoch": 0.41603888213851764, "grad_norm": 0.1533203125, "learning_rate": 0.02, "loss": 1.5828, "step": 4494 }, { "epoch": 0.41613145865879764, "grad_norm": 0.15625, "learning_rate": 0.02, "loss": 1.6467, "step": 4495 }, { "epoch": 0.4162240351790777, "grad_norm": 0.1494140625, "learning_rate": 0.02, "loss": 1.6121, "step": 4496 }, { "epoch": 0.41631661169935774, "grad_norm": 0.150390625, "learning_rate": 0.02, "loss": 1.5894, "step": 4497 }, { "epoch": 0.4164091882196378, "grad_norm": 0.150390625, "learning_rate": 0.02, "loss": 1.5417, "step": 4498 }, { "epoch": 0.41650176473991785, "grad_norm": 0.1494140625, "learning_rate": 0.02, "loss": 1.6291, "step": 4499 }, { "epoch": 0.4165943412601979, "grad_norm": 0.158203125, "learning_rate": 0.02, "loss": 1.5477, "step": 4500 }, { "epoch": 0.41668691778047795, "grad_norm": 0.1513671875, "learning_rate": 0.02, "loss": 1.6016, "step": 4501 }, { "epoch": 0.41677949430075795, "grad_norm": 0.15234375, "learning_rate": 0.02, "loss": 1.562, "step": 4502 }, { "epoch": 0.416872070821038, "grad_norm": 0.1513671875, "learning_rate": 0.02, "loss": 1.6075, "step": 4503 }, { "epoch": 0.41696464734131805, "grad_norm": 0.1630859375, "learning_rate": 0.02, "loss": 1.5617, "step": 4504 }, { "epoch": 0.4170572238615981, "grad_norm": 0.1396484375, "learning_rate": 0.02, "loss": 1.5996, "step": 4505 }, { "epoch": 0.41714980038187816, "grad_norm": 0.14453125, "learning_rate": 0.02, "loss": 1.5851, "step": 4506 }, { "epoch": 0.4172423769021582, "grad_norm": 0.1435546875, "learning_rate": 0.02, "loss": 1.5739, "step": 4507 }, { "epoch": 0.41733495342243826, "grad_norm": 0.1494140625, "learning_rate": 0.02, "loss": 1.5689, "step": 4508 }, { "epoch": 0.41742752994271826, "grad_norm": 0.1455078125, "learning_rate": 0.02, "loss": 1.5432, "step": 4509 }, { "epoch": 0.4175201064629983, "grad_norm": 0.15234375, "learning_rate": 0.02, "loss": 1.5707, "step": 4510 }, { "epoch": 0.41761268298327836, "grad_norm": 0.1455078125, "learning_rate": 0.02, "loss": 1.6407, "step": 4511 }, { "epoch": 0.4177052595035584, "grad_norm": 0.15234375, "learning_rate": 0.02, "loss": 1.6059, "step": 4512 }, { "epoch": 0.41779783602383846, "grad_norm": 0.150390625, "learning_rate": 0.02, "loss": 1.5431, "step": 4513 }, { "epoch": 0.4178904125441185, "grad_norm": 0.1416015625, "learning_rate": 0.02, "loss": 1.6293, "step": 4514 }, { "epoch": 0.41798298906439857, "grad_norm": 0.1376953125, "learning_rate": 0.02, "loss": 1.5335, "step": 4515 }, { "epoch": 0.41807556558467857, "grad_norm": 0.142578125, "learning_rate": 0.02, "loss": 1.5229, "step": 4516 }, { "epoch": 0.4181681421049586, "grad_norm": 0.150390625, "learning_rate": 0.02, "loss": 1.6021, "step": 4517 }, { "epoch": 0.41826071862523867, "grad_norm": 0.1455078125, "learning_rate": 0.02, "loss": 1.6042, "step": 4518 }, { "epoch": 0.4183532951455187, "grad_norm": 0.15625, "learning_rate": 0.02, "loss": 1.6203, "step": 4519 }, { "epoch": 0.4184458716657988, "grad_norm": 0.150390625, "learning_rate": 0.02, "loss": 1.6173, "step": 4520 }, { "epoch": 0.4185384481860788, "grad_norm": 0.1484375, "learning_rate": 0.02, "loss": 1.5846, "step": 4521 }, { "epoch": 0.4186310247063588, "grad_norm": 0.1494140625, "learning_rate": 0.02, "loss": 1.6173, "step": 4522 }, { "epoch": 0.4187236012266389, "grad_norm": 0.1494140625, "learning_rate": 0.02, "loss": 1.5804, "step": 4523 }, { "epoch": 0.4188161777469189, "grad_norm": 0.1376953125, "learning_rate": 0.02, "loss": 1.5647, "step": 4524 }, { "epoch": 0.418908754267199, "grad_norm": 0.138671875, "learning_rate": 0.02, "loss": 1.5745, "step": 4525 }, { "epoch": 0.41900133078747903, "grad_norm": 0.1337890625, "learning_rate": 0.02, "loss": 1.6194, "step": 4526 }, { "epoch": 0.4190939073077591, "grad_norm": 0.1396484375, "learning_rate": 0.02, "loss": 1.6075, "step": 4527 }, { "epoch": 0.41918648382803914, "grad_norm": 0.1396484375, "learning_rate": 0.02, "loss": 1.6039, "step": 4528 }, { "epoch": 0.41927906034831913, "grad_norm": 0.1357421875, "learning_rate": 0.02, "loss": 1.6012, "step": 4529 }, { "epoch": 0.4193716368685992, "grad_norm": 0.134765625, "learning_rate": 0.02, "loss": 1.5522, "step": 4530 }, { "epoch": 0.41946421338887924, "grad_norm": 0.13671875, "learning_rate": 0.02, "loss": 1.5983, "step": 4531 }, { "epoch": 0.4195567899091593, "grad_norm": 0.1474609375, "learning_rate": 0.02, "loss": 1.5856, "step": 4532 }, { "epoch": 0.41964936642943934, "grad_norm": 0.154296875, "learning_rate": 0.02, "loss": 1.6198, "step": 4533 }, { "epoch": 0.4197419429497194, "grad_norm": 0.154296875, "learning_rate": 0.02, "loss": 1.6228, "step": 4534 }, { "epoch": 0.41983451946999945, "grad_norm": 0.1484375, "learning_rate": 0.02, "loss": 1.6646, "step": 4535 }, { "epoch": 0.41992709599027944, "grad_norm": 0.1494140625, "learning_rate": 0.02, "loss": 1.6177, "step": 4536 }, { "epoch": 0.4200196725105595, "grad_norm": 0.1435546875, "learning_rate": 0.02, "loss": 1.5565, "step": 4537 }, { "epoch": 0.42011224903083955, "grad_norm": 0.146484375, "learning_rate": 0.02, "loss": 1.6071, "step": 4538 }, { "epoch": 0.4202048255511196, "grad_norm": 0.1318359375, "learning_rate": 0.02, "loss": 1.5883, "step": 4539 }, { "epoch": 0.42029740207139965, "grad_norm": 0.1455078125, "learning_rate": 0.02, "loss": 1.539, "step": 4540 }, { "epoch": 0.4203899785916797, "grad_norm": 0.1416015625, "learning_rate": 0.02, "loss": 1.596, "step": 4541 }, { "epoch": 0.42048255511195975, "grad_norm": 0.1474609375, "learning_rate": 0.02, "loss": 1.6224, "step": 4542 }, { "epoch": 0.42057513163223975, "grad_norm": 0.15625, "learning_rate": 0.02, "loss": 1.6061, "step": 4543 }, { "epoch": 0.4206677081525198, "grad_norm": 0.15234375, "learning_rate": 0.02, "loss": 1.6681, "step": 4544 }, { "epoch": 0.42076028467279986, "grad_norm": 0.140625, "learning_rate": 0.02, "loss": 1.5662, "step": 4545 }, { "epoch": 0.4208528611930799, "grad_norm": 0.1435546875, "learning_rate": 0.02, "loss": 1.6055, "step": 4546 }, { "epoch": 0.42094543771335996, "grad_norm": 0.1396484375, "learning_rate": 0.02, "loss": 1.6651, "step": 4547 }, { "epoch": 0.42103801423364, "grad_norm": 0.1474609375, "learning_rate": 0.02, "loss": 1.6726, "step": 4548 }, { "epoch": 0.42113059075392006, "grad_norm": 0.1552734375, "learning_rate": 0.02, "loss": 1.5954, "step": 4549 }, { "epoch": 0.42122316727420006, "grad_norm": 0.138671875, "learning_rate": 0.02, "loss": 1.5733, "step": 4550 }, { "epoch": 0.4213157437944801, "grad_norm": 0.1474609375, "learning_rate": 0.02, "loss": 1.616, "step": 4551 }, { "epoch": 0.42140832031476017, "grad_norm": 0.134765625, "learning_rate": 0.02, "loss": 1.5566, "step": 4552 }, { "epoch": 0.4215008968350402, "grad_norm": 0.1435546875, "learning_rate": 0.02, "loss": 1.571, "step": 4553 }, { "epoch": 0.42159347335532027, "grad_norm": 0.1474609375, "learning_rate": 0.02, "loss": 1.6187, "step": 4554 }, { "epoch": 0.4216860498756003, "grad_norm": 0.142578125, "learning_rate": 0.02, "loss": 1.5604, "step": 4555 }, { "epoch": 0.4217786263958803, "grad_norm": 0.140625, "learning_rate": 0.02, "loss": 1.5655, "step": 4556 }, { "epoch": 0.42187120291616037, "grad_norm": 0.140625, "learning_rate": 0.02, "loss": 1.5874, "step": 4557 }, { "epoch": 0.4219637794364404, "grad_norm": 0.142578125, "learning_rate": 0.02, "loss": 1.5224, "step": 4558 }, { "epoch": 0.4220563559567205, "grad_norm": 0.16015625, "learning_rate": 0.02, "loss": 1.6681, "step": 4559 }, { "epoch": 0.4221489324770005, "grad_norm": 0.1513671875, "learning_rate": 0.02, "loss": 1.5736, "step": 4560 }, { "epoch": 0.4222415089972806, "grad_norm": 0.1484375, "learning_rate": 0.02, "loss": 1.5621, "step": 4561 }, { "epoch": 0.42233408551756063, "grad_norm": 0.1513671875, "learning_rate": 0.02, "loss": 1.6073, "step": 4562 }, { "epoch": 0.42242666203784063, "grad_norm": 0.1630859375, "learning_rate": 0.02, "loss": 1.5996, "step": 4563 }, { "epoch": 0.4225192385581207, "grad_norm": 0.1533203125, "learning_rate": 0.02, "loss": 1.5678, "step": 4564 }, { "epoch": 0.42261181507840073, "grad_norm": 0.1591796875, "learning_rate": 0.02, "loss": 1.5855, "step": 4565 }, { "epoch": 0.4227043915986808, "grad_norm": 0.14453125, "learning_rate": 0.02, "loss": 1.5989, "step": 4566 }, { "epoch": 0.42279696811896084, "grad_norm": 0.1533203125, "learning_rate": 0.02, "loss": 1.6074, "step": 4567 }, { "epoch": 0.4228895446392409, "grad_norm": 0.1396484375, "learning_rate": 0.02, "loss": 1.5978, "step": 4568 }, { "epoch": 0.42298212115952094, "grad_norm": 0.1533203125, "learning_rate": 0.02, "loss": 1.6012, "step": 4569 }, { "epoch": 0.42307469767980094, "grad_norm": 0.1474609375, "learning_rate": 0.02, "loss": 1.6151, "step": 4570 }, { "epoch": 0.423167274200081, "grad_norm": 0.150390625, "learning_rate": 0.02, "loss": 1.5633, "step": 4571 }, { "epoch": 0.42325985072036104, "grad_norm": 0.14453125, "learning_rate": 0.02, "loss": 1.4883, "step": 4572 }, { "epoch": 0.4233524272406411, "grad_norm": 0.1455078125, "learning_rate": 0.02, "loss": 1.6219, "step": 4573 }, { "epoch": 0.42344500376092115, "grad_norm": 0.142578125, "learning_rate": 0.02, "loss": 1.6101, "step": 4574 }, { "epoch": 0.4235375802812012, "grad_norm": 0.1416015625, "learning_rate": 0.02, "loss": 1.598, "step": 4575 }, { "epoch": 0.42363015680148125, "grad_norm": 0.1455078125, "learning_rate": 0.02, "loss": 1.554, "step": 4576 }, { "epoch": 0.42372273332176125, "grad_norm": 0.1494140625, "learning_rate": 0.02, "loss": 1.5637, "step": 4577 }, { "epoch": 0.4238153098420413, "grad_norm": 0.14453125, "learning_rate": 0.02, "loss": 1.6173, "step": 4578 }, { "epoch": 0.42390788636232135, "grad_norm": 0.1484375, "learning_rate": 0.02, "loss": 1.6129, "step": 4579 }, { "epoch": 0.4240004628826014, "grad_norm": 0.14453125, "learning_rate": 0.02, "loss": 1.5938, "step": 4580 }, { "epoch": 0.42409303940288146, "grad_norm": 0.146484375, "learning_rate": 0.02, "loss": 1.5648, "step": 4581 }, { "epoch": 0.4241856159231615, "grad_norm": 0.1484375, "learning_rate": 0.02, "loss": 1.6303, "step": 4582 }, { "epoch": 0.42427819244344156, "grad_norm": 0.1533203125, "learning_rate": 0.02, "loss": 1.578, "step": 4583 }, { "epoch": 0.42437076896372156, "grad_norm": 0.14453125, "learning_rate": 0.02, "loss": 1.5716, "step": 4584 }, { "epoch": 0.4244633454840016, "grad_norm": 0.1513671875, "learning_rate": 0.02, "loss": 1.5285, "step": 4585 }, { "epoch": 0.42455592200428166, "grad_norm": 0.1455078125, "learning_rate": 0.02, "loss": 1.6214, "step": 4586 }, { "epoch": 0.4246484985245617, "grad_norm": 0.146484375, "learning_rate": 0.02, "loss": 1.6134, "step": 4587 }, { "epoch": 0.42474107504484176, "grad_norm": 0.146484375, "learning_rate": 0.02, "loss": 1.5909, "step": 4588 }, { "epoch": 0.4248336515651218, "grad_norm": 0.1513671875, "learning_rate": 0.02, "loss": 1.5924, "step": 4589 }, { "epoch": 0.4249262280854018, "grad_norm": 0.140625, "learning_rate": 0.02, "loss": 1.5629, "step": 4590 }, { "epoch": 0.42501880460568187, "grad_norm": 0.1376953125, "learning_rate": 0.02, "loss": 1.5216, "step": 4591 }, { "epoch": 0.4251113811259619, "grad_norm": 0.14453125, "learning_rate": 0.02, "loss": 1.6209, "step": 4592 }, { "epoch": 0.42520395764624197, "grad_norm": 0.1416015625, "learning_rate": 0.02, "loss": 1.6058, "step": 4593 }, { "epoch": 0.425296534166522, "grad_norm": 0.1435546875, "learning_rate": 0.02, "loss": 1.5569, "step": 4594 }, { "epoch": 0.4253891106868021, "grad_norm": 0.1416015625, "learning_rate": 0.02, "loss": 1.538, "step": 4595 }, { "epoch": 0.4254816872070821, "grad_norm": 0.150390625, "learning_rate": 0.02, "loss": 1.5408, "step": 4596 }, { "epoch": 0.4255742637273621, "grad_norm": 0.15234375, "learning_rate": 0.02, "loss": 1.6279, "step": 4597 }, { "epoch": 0.4256668402476422, "grad_norm": 0.15234375, "learning_rate": 0.02, "loss": 1.5667, "step": 4598 }, { "epoch": 0.4257594167679222, "grad_norm": 0.158203125, "learning_rate": 0.02, "loss": 1.5523, "step": 4599 }, { "epoch": 0.4258519932882023, "grad_norm": 0.1640625, "learning_rate": 0.02, "loss": 1.6297, "step": 4600 }, { "epoch": 0.42594456980848233, "grad_norm": 0.158203125, "learning_rate": 0.02, "loss": 1.5951, "step": 4601 }, { "epoch": 0.4260371463287624, "grad_norm": 0.166015625, "learning_rate": 0.02, "loss": 1.5858, "step": 4602 }, { "epoch": 0.42612972284904244, "grad_norm": 0.15234375, "learning_rate": 0.02, "loss": 1.6128, "step": 4603 }, { "epoch": 0.42622229936932243, "grad_norm": 0.154296875, "learning_rate": 0.02, "loss": 1.5569, "step": 4604 }, { "epoch": 0.4263148758896025, "grad_norm": 0.1513671875, "learning_rate": 0.02, "loss": 1.6101, "step": 4605 }, { "epoch": 0.42640745240988254, "grad_norm": 0.1533203125, "learning_rate": 0.02, "loss": 1.6742, "step": 4606 }, { "epoch": 0.4265000289301626, "grad_norm": 0.1513671875, "learning_rate": 0.02, "loss": 1.5509, "step": 4607 }, { "epoch": 0.42659260545044264, "grad_norm": 0.1513671875, "learning_rate": 0.02, "loss": 1.6307, "step": 4608 }, { "epoch": 0.4266851819707227, "grad_norm": 0.1455078125, "learning_rate": 0.02, "loss": 1.5433, "step": 4609 }, { "epoch": 0.42677775849100275, "grad_norm": 0.1484375, "learning_rate": 0.02, "loss": 1.6117, "step": 4610 }, { "epoch": 0.42687033501128274, "grad_norm": 0.1494140625, "learning_rate": 0.02, "loss": 1.5966, "step": 4611 }, { "epoch": 0.4269629115315628, "grad_norm": 0.140625, "learning_rate": 0.02, "loss": 1.6227, "step": 4612 }, { "epoch": 0.42705548805184285, "grad_norm": 0.13671875, "learning_rate": 0.02, "loss": 1.5412, "step": 4613 }, { "epoch": 0.4271480645721229, "grad_norm": 0.1494140625, "learning_rate": 0.02, "loss": 1.6433, "step": 4614 }, { "epoch": 0.42724064109240295, "grad_norm": 0.142578125, "learning_rate": 0.02, "loss": 1.5747, "step": 4615 }, { "epoch": 0.427333217612683, "grad_norm": 0.1494140625, "learning_rate": 0.02, "loss": 1.5988, "step": 4616 }, { "epoch": 0.427425794132963, "grad_norm": 0.1396484375, "learning_rate": 0.02, "loss": 1.6299, "step": 4617 }, { "epoch": 0.42751837065324305, "grad_norm": 0.1396484375, "learning_rate": 0.02, "loss": 1.5879, "step": 4618 }, { "epoch": 0.4276109471735231, "grad_norm": 0.1376953125, "learning_rate": 0.02, "loss": 1.5556, "step": 4619 }, { "epoch": 0.42770352369380316, "grad_norm": 0.1435546875, "learning_rate": 0.02, "loss": 1.6339, "step": 4620 }, { "epoch": 0.4277961002140832, "grad_norm": 0.1455078125, "learning_rate": 0.02, "loss": 1.5919, "step": 4621 }, { "epoch": 0.42788867673436326, "grad_norm": 0.138671875, "learning_rate": 0.02, "loss": 1.6252, "step": 4622 }, { "epoch": 0.4279812532546433, "grad_norm": 0.1455078125, "learning_rate": 0.02, "loss": 1.5898, "step": 4623 }, { "epoch": 0.4280738297749233, "grad_norm": 0.1474609375, "learning_rate": 0.02, "loss": 1.589, "step": 4624 }, { "epoch": 0.42816640629520336, "grad_norm": 0.1552734375, "learning_rate": 0.02, "loss": 1.625, "step": 4625 }, { "epoch": 0.4282589828154834, "grad_norm": 0.1416015625, "learning_rate": 0.02, "loss": 1.618, "step": 4626 }, { "epoch": 0.42835155933576347, "grad_norm": 0.15234375, "learning_rate": 0.02, "loss": 1.6095, "step": 4627 }, { "epoch": 0.4284441358560435, "grad_norm": 0.1396484375, "learning_rate": 0.02, "loss": 1.6287, "step": 4628 }, { "epoch": 0.42853671237632357, "grad_norm": 0.1494140625, "learning_rate": 0.02, "loss": 1.6039, "step": 4629 }, { "epoch": 0.4286292888966036, "grad_norm": 0.1376953125, "learning_rate": 0.02, "loss": 1.5732, "step": 4630 }, { "epoch": 0.4287218654168836, "grad_norm": 0.150390625, "learning_rate": 0.02, "loss": 1.6061, "step": 4631 }, { "epoch": 0.42881444193716367, "grad_norm": 0.134765625, "learning_rate": 0.02, "loss": 1.5666, "step": 4632 }, { "epoch": 0.4289070184574437, "grad_norm": 0.1396484375, "learning_rate": 0.02, "loss": 1.5948, "step": 4633 }, { "epoch": 0.4289995949777238, "grad_norm": 0.1337890625, "learning_rate": 0.02, "loss": 1.5487, "step": 4634 }, { "epoch": 0.4290921714980038, "grad_norm": 0.1513671875, "learning_rate": 0.02, "loss": 1.6123, "step": 4635 }, { "epoch": 0.4291847480182839, "grad_norm": 0.1513671875, "learning_rate": 0.02, "loss": 1.6444, "step": 4636 }, { "epoch": 0.42927732453856393, "grad_norm": 0.146484375, "learning_rate": 0.02, "loss": 1.5958, "step": 4637 }, { "epoch": 0.42936990105884393, "grad_norm": 0.138671875, "learning_rate": 0.02, "loss": 1.5434, "step": 4638 }, { "epoch": 0.429462477579124, "grad_norm": 0.1484375, "learning_rate": 0.02, "loss": 1.6169, "step": 4639 }, { "epoch": 0.42955505409940403, "grad_norm": 0.1435546875, "learning_rate": 0.02, "loss": 1.5716, "step": 4640 }, { "epoch": 0.4296476306196841, "grad_norm": 0.15234375, "learning_rate": 0.02, "loss": 1.6589, "step": 4641 }, { "epoch": 0.42974020713996414, "grad_norm": 0.1494140625, "learning_rate": 0.02, "loss": 1.5798, "step": 4642 }, { "epoch": 0.4298327836602442, "grad_norm": 0.1484375, "learning_rate": 0.02, "loss": 1.5836, "step": 4643 }, { "epoch": 0.42992536018052424, "grad_norm": 0.158203125, "learning_rate": 0.02, "loss": 1.6254, "step": 4644 }, { "epoch": 0.43001793670080424, "grad_norm": 0.1416015625, "learning_rate": 0.02, "loss": 1.5951, "step": 4645 }, { "epoch": 0.4301105132210843, "grad_norm": 0.142578125, "learning_rate": 0.02, "loss": 1.6165, "step": 4646 }, { "epoch": 0.43020308974136434, "grad_norm": 0.1435546875, "learning_rate": 0.02, "loss": 1.5704, "step": 4647 }, { "epoch": 0.4302956662616444, "grad_norm": 0.1484375, "learning_rate": 0.02, "loss": 1.5434, "step": 4648 }, { "epoch": 0.43038824278192445, "grad_norm": 0.1357421875, "learning_rate": 0.02, "loss": 1.5505, "step": 4649 }, { "epoch": 0.4304808193022045, "grad_norm": 0.142578125, "learning_rate": 0.02, "loss": 1.5706, "step": 4650 }, { "epoch": 0.4305733958224845, "grad_norm": 0.1357421875, "learning_rate": 0.02, "loss": 1.5844, "step": 4651 }, { "epoch": 0.43066597234276455, "grad_norm": 0.138671875, "learning_rate": 0.02, "loss": 1.6047, "step": 4652 }, { "epoch": 0.4307585488630446, "grad_norm": 0.146484375, "learning_rate": 0.02, "loss": 1.5945, "step": 4653 }, { "epoch": 0.43085112538332465, "grad_norm": 0.146484375, "learning_rate": 0.02, "loss": 1.5545, "step": 4654 }, { "epoch": 0.4309437019036047, "grad_norm": 0.150390625, "learning_rate": 0.02, "loss": 1.6208, "step": 4655 }, { "epoch": 0.43103627842388476, "grad_norm": 0.1474609375, "learning_rate": 0.02, "loss": 1.6051, "step": 4656 }, { "epoch": 0.4311288549441648, "grad_norm": 0.1552734375, "learning_rate": 0.02, "loss": 1.5962, "step": 4657 }, { "epoch": 0.4312214314644448, "grad_norm": 0.1455078125, "learning_rate": 0.02, "loss": 1.6331, "step": 4658 }, { "epoch": 0.43131400798472486, "grad_norm": 0.16015625, "learning_rate": 0.02, "loss": 1.6221, "step": 4659 }, { "epoch": 0.4314065845050049, "grad_norm": 0.14453125, "learning_rate": 0.02, "loss": 1.5742, "step": 4660 }, { "epoch": 0.43149916102528496, "grad_norm": 0.150390625, "learning_rate": 0.02, "loss": 1.5714, "step": 4661 }, { "epoch": 0.431591737545565, "grad_norm": 0.1337890625, "learning_rate": 0.02, "loss": 1.5328, "step": 4662 }, { "epoch": 0.43168431406584507, "grad_norm": 0.1513671875, "learning_rate": 0.02, "loss": 1.5598, "step": 4663 }, { "epoch": 0.4317768905861251, "grad_norm": 0.1357421875, "learning_rate": 0.02, "loss": 1.5923, "step": 4664 }, { "epoch": 0.4318694671064051, "grad_norm": 0.1376953125, "learning_rate": 0.02, "loss": 1.5421, "step": 4665 }, { "epoch": 0.43196204362668517, "grad_norm": 0.1435546875, "learning_rate": 0.02, "loss": 1.6316, "step": 4666 }, { "epoch": 0.4320546201469652, "grad_norm": 0.14453125, "learning_rate": 0.02, "loss": 1.611, "step": 4667 }, { "epoch": 0.43214719666724527, "grad_norm": 0.146484375, "learning_rate": 0.02, "loss": 1.5944, "step": 4668 }, { "epoch": 0.4322397731875253, "grad_norm": 0.1435546875, "learning_rate": 0.02, "loss": 1.572, "step": 4669 }, { "epoch": 0.4323323497078054, "grad_norm": 0.150390625, "learning_rate": 0.02, "loss": 1.534, "step": 4670 }, { "epoch": 0.4324249262280854, "grad_norm": 0.13671875, "learning_rate": 0.02, "loss": 1.5814, "step": 4671 }, { "epoch": 0.4325175027483654, "grad_norm": 0.14453125, "learning_rate": 0.02, "loss": 1.5709, "step": 4672 }, { "epoch": 0.4326100792686455, "grad_norm": 0.138671875, "learning_rate": 0.02, "loss": 1.5352, "step": 4673 }, { "epoch": 0.4327026557889255, "grad_norm": 0.1484375, "learning_rate": 0.02, "loss": 1.5398, "step": 4674 }, { "epoch": 0.4327952323092056, "grad_norm": 0.15625, "learning_rate": 0.02, "loss": 1.6126, "step": 4675 }, { "epoch": 0.43288780882948563, "grad_norm": 0.1484375, "learning_rate": 0.02, "loss": 1.5753, "step": 4676 }, { "epoch": 0.4329803853497657, "grad_norm": 0.15234375, "learning_rate": 0.02, "loss": 1.5966, "step": 4677 }, { "epoch": 0.43307296187004574, "grad_norm": 0.150390625, "learning_rate": 0.02, "loss": 1.5827, "step": 4678 }, { "epoch": 0.43316553839032573, "grad_norm": 0.1474609375, "learning_rate": 0.02, "loss": 1.537, "step": 4679 }, { "epoch": 0.4332581149106058, "grad_norm": 0.150390625, "learning_rate": 0.02, "loss": 1.5989, "step": 4680 }, { "epoch": 0.43335069143088584, "grad_norm": 0.1494140625, "learning_rate": 0.02, "loss": 1.6216, "step": 4681 }, { "epoch": 0.4334432679511659, "grad_norm": 0.14453125, "learning_rate": 0.02, "loss": 1.523, "step": 4682 }, { "epoch": 0.43353584447144594, "grad_norm": 0.1494140625, "learning_rate": 0.02, "loss": 1.6209, "step": 4683 }, { "epoch": 0.433628420991726, "grad_norm": 0.1474609375, "learning_rate": 0.02, "loss": 1.5372, "step": 4684 }, { "epoch": 0.433720997512006, "grad_norm": 0.146484375, "learning_rate": 0.02, "loss": 1.6042, "step": 4685 }, { "epoch": 0.43381357403228604, "grad_norm": 0.1494140625, "learning_rate": 0.02, "loss": 1.576, "step": 4686 }, { "epoch": 0.4339061505525661, "grad_norm": 0.1494140625, "learning_rate": 0.02, "loss": 1.5958, "step": 4687 }, { "epoch": 0.43399872707284615, "grad_norm": 0.1552734375, "learning_rate": 0.02, "loss": 1.5918, "step": 4688 }, { "epoch": 0.4340913035931262, "grad_norm": 0.1435546875, "learning_rate": 0.02, "loss": 1.6306, "step": 4689 }, { "epoch": 0.43418388011340625, "grad_norm": 0.1494140625, "learning_rate": 0.02, "loss": 1.6193, "step": 4690 }, { "epoch": 0.4342764566336863, "grad_norm": 0.14453125, "learning_rate": 0.02, "loss": 1.6097, "step": 4691 }, { "epoch": 0.4343690331539663, "grad_norm": 0.1513671875, "learning_rate": 0.02, "loss": 1.5536, "step": 4692 }, { "epoch": 0.43446160967424635, "grad_norm": 0.1640625, "learning_rate": 0.02, "loss": 1.6389, "step": 4693 }, { "epoch": 0.4345541861945264, "grad_norm": 0.1533203125, "learning_rate": 0.02, "loss": 1.5533, "step": 4694 }, { "epoch": 0.43464676271480646, "grad_norm": 0.1572265625, "learning_rate": 0.02, "loss": 1.6027, "step": 4695 }, { "epoch": 0.4347393392350865, "grad_norm": 0.138671875, "learning_rate": 0.02, "loss": 1.5512, "step": 4696 }, { "epoch": 0.43483191575536656, "grad_norm": 0.1474609375, "learning_rate": 0.02, "loss": 1.6184, "step": 4697 }, { "epoch": 0.4349244922756466, "grad_norm": 0.1376953125, "learning_rate": 0.02, "loss": 1.6185, "step": 4698 }, { "epoch": 0.4350170687959266, "grad_norm": 0.1396484375, "learning_rate": 0.02, "loss": 1.6005, "step": 4699 }, { "epoch": 0.43510964531620666, "grad_norm": 0.1455078125, "learning_rate": 0.02, "loss": 1.628, "step": 4700 }, { "epoch": 0.4352022218364867, "grad_norm": 0.150390625, "learning_rate": 0.02, "loss": 1.5702, "step": 4701 }, { "epoch": 0.43529479835676677, "grad_norm": 0.1396484375, "learning_rate": 0.02, "loss": 1.5505, "step": 4702 }, { "epoch": 0.4353873748770468, "grad_norm": 0.1494140625, "learning_rate": 0.02, "loss": 1.6205, "step": 4703 }, { "epoch": 0.43547995139732687, "grad_norm": 0.142578125, "learning_rate": 0.02, "loss": 1.5705, "step": 4704 }, { "epoch": 0.4355725279176069, "grad_norm": 0.1591796875, "learning_rate": 0.02, "loss": 1.6085, "step": 4705 }, { "epoch": 0.4356651044378869, "grad_norm": 0.1396484375, "learning_rate": 0.02, "loss": 1.6355, "step": 4706 }, { "epoch": 0.43575768095816697, "grad_norm": 0.15234375, "learning_rate": 0.02, "loss": 1.6231, "step": 4707 }, { "epoch": 0.435850257478447, "grad_norm": 0.1396484375, "learning_rate": 0.02, "loss": 1.5542, "step": 4708 }, { "epoch": 0.4359428339987271, "grad_norm": 0.1552734375, "learning_rate": 0.02, "loss": 1.6085, "step": 4709 }, { "epoch": 0.4360354105190071, "grad_norm": 0.1533203125, "learning_rate": 0.02, "loss": 1.6192, "step": 4710 }, { "epoch": 0.4361279870392872, "grad_norm": 0.154296875, "learning_rate": 0.02, "loss": 1.5708, "step": 4711 }, { "epoch": 0.43622056355956723, "grad_norm": 0.1494140625, "learning_rate": 0.02, "loss": 1.6533, "step": 4712 }, { "epoch": 0.43631314007984723, "grad_norm": 0.15625, "learning_rate": 0.02, "loss": 1.6195, "step": 4713 }, { "epoch": 0.4364057166001273, "grad_norm": 0.1474609375, "learning_rate": 0.02, "loss": 1.6226, "step": 4714 }, { "epoch": 0.43649829312040733, "grad_norm": 0.154296875, "learning_rate": 0.02, "loss": 1.5419, "step": 4715 }, { "epoch": 0.4365908696406874, "grad_norm": 0.146484375, "learning_rate": 0.02, "loss": 1.5818, "step": 4716 }, { "epoch": 0.43668344616096744, "grad_norm": 0.1552734375, "learning_rate": 0.02, "loss": 1.5584, "step": 4717 }, { "epoch": 0.4367760226812475, "grad_norm": 0.1552734375, "learning_rate": 0.02, "loss": 1.5771, "step": 4718 }, { "epoch": 0.4368685992015275, "grad_norm": 0.16796875, "learning_rate": 0.02, "loss": 1.5385, "step": 4719 }, { "epoch": 0.43696117572180754, "grad_norm": 0.166015625, "learning_rate": 0.02, "loss": 1.6139, "step": 4720 }, { "epoch": 0.4370537522420876, "grad_norm": 0.15234375, "learning_rate": 0.02, "loss": 1.5875, "step": 4721 }, { "epoch": 0.43714632876236764, "grad_norm": 0.1533203125, "learning_rate": 0.02, "loss": 1.5578, "step": 4722 }, { "epoch": 0.4372389052826477, "grad_norm": 0.1494140625, "learning_rate": 0.02, "loss": 1.5593, "step": 4723 }, { "epoch": 0.43733148180292775, "grad_norm": 0.162109375, "learning_rate": 0.02, "loss": 1.5683, "step": 4724 }, { "epoch": 0.4374240583232078, "grad_norm": 0.1376953125, "learning_rate": 0.02, "loss": 1.5324, "step": 4725 }, { "epoch": 0.4375166348434878, "grad_norm": 0.1591796875, "learning_rate": 0.02, "loss": 1.6243, "step": 4726 }, { "epoch": 0.43760921136376785, "grad_norm": 0.146484375, "learning_rate": 0.02, "loss": 1.5824, "step": 4727 }, { "epoch": 0.4377017878840479, "grad_norm": 0.1474609375, "learning_rate": 0.02, "loss": 1.5923, "step": 4728 }, { "epoch": 0.43779436440432795, "grad_norm": 0.1396484375, "learning_rate": 0.02, "loss": 1.5624, "step": 4729 }, { "epoch": 0.437886940924608, "grad_norm": 0.1416015625, "learning_rate": 0.02, "loss": 1.5118, "step": 4730 }, { "epoch": 0.43797951744488806, "grad_norm": 0.138671875, "learning_rate": 0.02, "loss": 1.5556, "step": 4731 }, { "epoch": 0.4380720939651681, "grad_norm": 0.1484375, "learning_rate": 0.02, "loss": 1.6186, "step": 4732 }, { "epoch": 0.4381646704854481, "grad_norm": 0.1513671875, "learning_rate": 0.02, "loss": 1.6069, "step": 4733 }, { "epoch": 0.43825724700572816, "grad_norm": 0.154296875, "learning_rate": 0.02, "loss": 1.5917, "step": 4734 }, { "epoch": 0.4383498235260082, "grad_norm": 0.14453125, "learning_rate": 0.02, "loss": 1.6041, "step": 4735 }, { "epoch": 0.43844240004628826, "grad_norm": 0.1455078125, "learning_rate": 0.02, "loss": 1.5238, "step": 4736 }, { "epoch": 0.4385349765665683, "grad_norm": 0.14453125, "learning_rate": 0.02, "loss": 1.5837, "step": 4737 }, { "epoch": 0.43862755308684837, "grad_norm": 0.142578125, "learning_rate": 0.02, "loss": 1.5512, "step": 4738 }, { "epoch": 0.4387201296071284, "grad_norm": 0.1494140625, "learning_rate": 0.02, "loss": 1.6138, "step": 4739 }, { "epoch": 0.4388127061274084, "grad_norm": 0.1474609375, "learning_rate": 0.02, "loss": 1.5986, "step": 4740 }, { "epoch": 0.43890528264768847, "grad_norm": 0.1435546875, "learning_rate": 0.02, "loss": 1.5692, "step": 4741 }, { "epoch": 0.4389978591679685, "grad_norm": 0.1455078125, "learning_rate": 0.02, "loss": 1.5533, "step": 4742 }, { "epoch": 0.43909043568824857, "grad_norm": 0.1435546875, "learning_rate": 0.02, "loss": 1.6276, "step": 4743 }, { "epoch": 0.4391830122085286, "grad_norm": 0.138671875, "learning_rate": 0.02, "loss": 1.6314, "step": 4744 }, { "epoch": 0.4392755887288087, "grad_norm": 0.1474609375, "learning_rate": 0.02, "loss": 1.6274, "step": 4745 }, { "epoch": 0.4393681652490887, "grad_norm": 0.1455078125, "learning_rate": 0.02, "loss": 1.5821, "step": 4746 }, { "epoch": 0.4394607417693687, "grad_norm": 0.1416015625, "learning_rate": 0.02, "loss": 1.5417, "step": 4747 }, { "epoch": 0.4395533182896488, "grad_norm": 0.1376953125, "learning_rate": 0.02, "loss": 1.5658, "step": 4748 }, { "epoch": 0.4396458948099288, "grad_norm": 0.14453125, "learning_rate": 0.02, "loss": 1.6306, "step": 4749 }, { "epoch": 0.4397384713302089, "grad_norm": 0.1484375, "learning_rate": 0.02, "loss": 1.5915, "step": 4750 }, { "epoch": 0.43983104785048893, "grad_norm": 0.150390625, "learning_rate": 0.02, "loss": 1.5479, "step": 4751 }, { "epoch": 0.439923624370769, "grad_norm": 0.14453125, "learning_rate": 0.02, "loss": 1.6272, "step": 4752 }, { "epoch": 0.440016200891049, "grad_norm": 0.1513671875, "learning_rate": 0.02, "loss": 1.5506, "step": 4753 }, { "epoch": 0.44010877741132903, "grad_norm": 0.1455078125, "learning_rate": 0.02, "loss": 1.5466, "step": 4754 }, { "epoch": 0.4402013539316091, "grad_norm": 0.1484375, "learning_rate": 0.02, "loss": 1.6237, "step": 4755 }, { "epoch": 0.44029393045188914, "grad_norm": 0.1513671875, "learning_rate": 0.02, "loss": 1.6163, "step": 4756 }, { "epoch": 0.4403865069721692, "grad_norm": 0.150390625, "learning_rate": 0.02, "loss": 1.5718, "step": 4757 }, { "epoch": 0.44047908349244924, "grad_norm": 0.13671875, "learning_rate": 0.02, "loss": 1.6571, "step": 4758 }, { "epoch": 0.4405716600127293, "grad_norm": 0.14453125, "learning_rate": 0.02, "loss": 1.5623, "step": 4759 }, { "epoch": 0.4406642365330093, "grad_norm": 0.1455078125, "learning_rate": 0.02, "loss": 1.6397, "step": 4760 }, { "epoch": 0.44075681305328934, "grad_norm": 0.1533203125, "learning_rate": 0.02, "loss": 1.627, "step": 4761 }, { "epoch": 0.4408493895735694, "grad_norm": 0.1474609375, "learning_rate": 0.02, "loss": 1.6514, "step": 4762 }, { "epoch": 0.44094196609384945, "grad_norm": 0.1435546875, "learning_rate": 0.02, "loss": 1.5836, "step": 4763 }, { "epoch": 0.4410345426141295, "grad_norm": 0.1337890625, "learning_rate": 0.02, "loss": 1.5145, "step": 4764 }, { "epoch": 0.44112711913440955, "grad_norm": 0.142578125, "learning_rate": 0.02, "loss": 1.6188, "step": 4765 }, { "epoch": 0.4412196956546896, "grad_norm": 0.1435546875, "learning_rate": 0.02, "loss": 1.5971, "step": 4766 }, { "epoch": 0.4413122721749696, "grad_norm": 0.140625, "learning_rate": 0.02, "loss": 1.67, "step": 4767 }, { "epoch": 0.44140484869524965, "grad_norm": 0.1494140625, "learning_rate": 0.02, "loss": 1.6274, "step": 4768 }, { "epoch": 0.4414974252155297, "grad_norm": 0.15234375, "learning_rate": 0.02, "loss": 1.5865, "step": 4769 }, { "epoch": 0.44159000173580976, "grad_norm": 0.1357421875, "learning_rate": 0.02, "loss": 1.6749, "step": 4770 }, { "epoch": 0.4416825782560898, "grad_norm": 0.140625, "learning_rate": 0.02, "loss": 1.5823, "step": 4771 }, { "epoch": 0.44177515477636986, "grad_norm": 0.140625, "learning_rate": 0.02, "loss": 1.5746, "step": 4772 }, { "epoch": 0.4418677312966499, "grad_norm": 0.5703125, "learning_rate": 0.02, "loss": 1.6225, "step": 4773 }, { "epoch": 0.4419603078169299, "grad_norm": 0.1337890625, "learning_rate": 0.02, "loss": 1.5649, "step": 4774 }, { "epoch": 0.44205288433720996, "grad_norm": 0.1484375, "learning_rate": 0.02, "loss": 1.5969, "step": 4775 }, { "epoch": 0.44214546085749, "grad_norm": 0.1337890625, "learning_rate": 0.02, "loss": 1.5714, "step": 4776 }, { "epoch": 0.44223803737777007, "grad_norm": 0.1494140625, "learning_rate": 0.02, "loss": 1.5154, "step": 4777 }, { "epoch": 0.4423306138980501, "grad_norm": 0.1484375, "learning_rate": 0.02, "loss": 1.5613, "step": 4778 }, { "epoch": 0.44242319041833017, "grad_norm": 0.140625, "learning_rate": 0.02, "loss": 1.5313, "step": 4779 }, { "epoch": 0.4425157669386102, "grad_norm": 0.140625, "learning_rate": 0.02, "loss": 1.6095, "step": 4780 }, { "epoch": 0.4426083434588902, "grad_norm": 0.138671875, "learning_rate": 0.02, "loss": 1.5675, "step": 4781 }, { "epoch": 0.44270091997917027, "grad_norm": 0.1494140625, "learning_rate": 0.02, "loss": 1.5443, "step": 4782 }, { "epoch": 0.4427934964994503, "grad_norm": 0.1337890625, "learning_rate": 0.02, "loss": 1.5985, "step": 4783 }, { "epoch": 0.4428860730197304, "grad_norm": 0.14453125, "learning_rate": 0.02, "loss": 1.5337, "step": 4784 }, { "epoch": 0.4429786495400104, "grad_norm": 0.138671875, "learning_rate": 0.02, "loss": 1.623, "step": 4785 }, { "epoch": 0.4430712260602905, "grad_norm": 0.1572265625, "learning_rate": 0.02, "loss": 1.6207, "step": 4786 }, { "epoch": 0.4431638025805705, "grad_norm": 0.1552734375, "learning_rate": 0.02, "loss": 1.6004, "step": 4787 }, { "epoch": 0.44325637910085053, "grad_norm": 0.1611328125, "learning_rate": 0.02, "loss": 1.5711, "step": 4788 }, { "epoch": 0.4433489556211306, "grad_norm": 0.1533203125, "learning_rate": 0.02, "loss": 1.5864, "step": 4789 }, { "epoch": 0.44344153214141063, "grad_norm": 0.150390625, "learning_rate": 0.02, "loss": 1.5904, "step": 4790 }, { "epoch": 0.4435341086616907, "grad_norm": 0.1572265625, "learning_rate": 0.02, "loss": 1.6189, "step": 4791 }, { "epoch": 0.44362668518197074, "grad_norm": 0.140625, "learning_rate": 0.02, "loss": 1.5644, "step": 4792 }, { "epoch": 0.4437192617022508, "grad_norm": 0.15234375, "learning_rate": 0.02, "loss": 1.5978, "step": 4793 }, { "epoch": 0.4438118382225308, "grad_norm": 0.1494140625, "learning_rate": 0.02, "loss": 1.6133, "step": 4794 }, { "epoch": 0.44390441474281084, "grad_norm": 0.1591796875, "learning_rate": 0.02, "loss": 1.6147, "step": 4795 }, { "epoch": 0.4439969912630909, "grad_norm": 0.158203125, "learning_rate": 0.02, "loss": 1.5561, "step": 4796 }, { "epoch": 0.44408956778337094, "grad_norm": 0.154296875, "learning_rate": 0.02, "loss": 1.5985, "step": 4797 }, { "epoch": 0.444182144303651, "grad_norm": 0.15625, "learning_rate": 0.02, "loss": 1.5668, "step": 4798 }, { "epoch": 0.44427472082393105, "grad_norm": 0.154296875, "learning_rate": 0.02, "loss": 1.6194, "step": 4799 }, { "epoch": 0.4443672973442111, "grad_norm": 0.1494140625, "learning_rate": 0.02, "loss": 1.5684, "step": 4800 }, { "epoch": 0.4444598738644911, "grad_norm": 0.1484375, "learning_rate": 0.02, "loss": 1.5336, "step": 4801 }, { "epoch": 0.44455245038477115, "grad_norm": 0.1474609375, "learning_rate": 0.02, "loss": 1.619, "step": 4802 }, { "epoch": 0.4446450269050512, "grad_norm": 0.154296875, "learning_rate": 0.02, "loss": 1.6381, "step": 4803 }, { "epoch": 0.44473760342533125, "grad_norm": 0.1474609375, "learning_rate": 0.02, "loss": 1.5861, "step": 4804 }, { "epoch": 0.4448301799456113, "grad_norm": 0.1396484375, "learning_rate": 0.02, "loss": 1.5925, "step": 4805 }, { "epoch": 0.44492275646589136, "grad_norm": 0.158203125, "learning_rate": 0.02, "loss": 1.6528, "step": 4806 }, { "epoch": 0.4450153329861714, "grad_norm": 0.146484375, "learning_rate": 0.02, "loss": 1.5431, "step": 4807 }, { "epoch": 0.4451079095064514, "grad_norm": 0.142578125, "learning_rate": 0.02, "loss": 1.6005, "step": 4808 }, { "epoch": 0.44520048602673146, "grad_norm": 0.1435546875, "learning_rate": 0.02, "loss": 1.6329, "step": 4809 }, { "epoch": 0.4452930625470115, "grad_norm": 0.154296875, "learning_rate": 0.02, "loss": 1.5346, "step": 4810 }, { "epoch": 0.44538563906729156, "grad_norm": 0.1572265625, "learning_rate": 0.02, "loss": 1.5869, "step": 4811 }, { "epoch": 0.4454782155875716, "grad_norm": 0.1376953125, "learning_rate": 0.02, "loss": 1.5585, "step": 4812 }, { "epoch": 0.44557079210785167, "grad_norm": 0.1474609375, "learning_rate": 0.02, "loss": 1.5826, "step": 4813 }, { "epoch": 0.4456633686281317, "grad_norm": 0.1484375, "learning_rate": 0.02, "loss": 1.512, "step": 4814 }, { "epoch": 0.4457559451484117, "grad_norm": 0.14453125, "learning_rate": 0.02, "loss": 1.5581, "step": 4815 }, { "epoch": 0.44584852166869177, "grad_norm": 0.1552734375, "learning_rate": 0.02, "loss": 1.5445, "step": 4816 }, { "epoch": 0.4459410981889718, "grad_norm": 0.1474609375, "learning_rate": 0.02, "loss": 1.5623, "step": 4817 }, { "epoch": 0.44603367470925187, "grad_norm": 0.1484375, "learning_rate": 0.02, "loss": 1.6114, "step": 4818 }, { "epoch": 0.4461262512295319, "grad_norm": 0.1513671875, "learning_rate": 0.02, "loss": 1.5814, "step": 4819 }, { "epoch": 0.446218827749812, "grad_norm": 0.1455078125, "learning_rate": 0.02, "loss": 1.638, "step": 4820 }, { "epoch": 0.44631140427009197, "grad_norm": 0.1435546875, "learning_rate": 0.02, "loss": 1.6134, "step": 4821 }, { "epoch": 0.446403980790372, "grad_norm": 0.14453125, "learning_rate": 0.02, "loss": 1.5706, "step": 4822 }, { "epoch": 0.4464965573106521, "grad_norm": 0.13671875, "learning_rate": 0.02, "loss": 1.5607, "step": 4823 }, { "epoch": 0.44658913383093213, "grad_norm": 0.1455078125, "learning_rate": 0.02, "loss": 1.5669, "step": 4824 }, { "epoch": 0.4466817103512122, "grad_norm": 0.14453125, "learning_rate": 0.02, "loss": 1.5869, "step": 4825 }, { "epoch": 0.44677428687149223, "grad_norm": 0.1591796875, "learning_rate": 0.02, "loss": 1.6242, "step": 4826 }, { "epoch": 0.4468668633917723, "grad_norm": 0.1513671875, "learning_rate": 0.02, "loss": 1.5901, "step": 4827 }, { "epoch": 0.4469594399120523, "grad_norm": 0.154296875, "learning_rate": 0.02, "loss": 1.5699, "step": 4828 }, { "epoch": 0.44705201643233233, "grad_norm": 0.1484375, "learning_rate": 0.02, "loss": 1.6239, "step": 4829 }, { "epoch": 0.4471445929526124, "grad_norm": 0.158203125, "learning_rate": 0.02, "loss": 1.6225, "step": 4830 }, { "epoch": 0.44723716947289244, "grad_norm": 0.1484375, "learning_rate": 0.02, "loss": 1.5669, "step": 4831 }, { "epoch": 0.4473297459931725, "grad_norm": 0.15234375, "learning_rate": 0.02, "loss": 1.5358, "step": 4832 }, { "epoch": 0.44742232251345254, "grad_norm": 0.1455078125, "learning_rate": 0.02, "loss": 1.5854, "step": 4833 }, { "epoch": 0.4475148990337326, "grad_norm": 0.1416015625, "learning_rate": 0.02, "loss": 1.5541, "step": 4834 }, { "epoch": 0.4476074755540126, "grad_norm": 0.1435546875, "learning_rate": 0.02, "loss": 1.4802, "step": 4835 }, { "epoch": 0.44770005207429264, "grad_norm": 0.1435546875, "learning_rate": 0.02, "loss": 1.6207, "step": 4836 }, { "epoch": 0.4477926285945727, "grad_norm": 0.1416015625, "learning_rate": 0.02, "loss": 1.6119, "step": 4837 }, { "epoch": 0.44788520511485275, "grad_norm": 0.1416015625, "learning_rate": 0.02, "loss": 1.5677, "step": 4838 }, { "epoch": 0.4479777816351328, "grad_norm": 0.1455078125, "learning_rate": 0.02, "loss": 1.624, "step": 4839 }, { "epoch": 0.44807035815541285, "grad_norm": 0.1416015625, "learning_rate": 0.02, "loss": 1.6127, "step": 4840 }, { "epoch": 0.4481629346756929, "grad_norm": 0.150390625, "learning_rate": 0.02, "loss": 1.5657, "step": 4841 }, { "epoch": 0.4482555111959729, "grad_norm": 0.1435546875, "learning_rate": 0.02, "loss": 1.5734, "step": 4842 }, { "epoch": 0.44834808771625295, "grad_norm": 0.146484375, "learning_rate": 0.02, "loss": 1.4944, "step": 4843 }, { "epoch": 0.448440664236533, "grad_norm": 0.1435546875, "learning_rate": 0.02, "loss": 1.5631, "step": 4844 }, { "epoch": 0.44853324075681306, "grad_norm": 0.1533203125, "learning_rate": 0.02, "loss": 1.6115, "step": 4845 }, { "epoch": 0.4486258172770931, "grad_norm": 0.1455078125, "learning_rate": 0.02, "loss": 1.5945, "step": 4846 }, { "epoch": 0.44871839379737316, "grad_norm": 0.1455078125, "learning_rate": 0.02, "loss": 1.5627, "step": 4847 }, { "epoch": 0.4488109703176532, "grad_norm": 0.1357421875, "learning_rate": 0.02, "loss": 1.6345, "step": 4848 }, { "epoch": 0.4489035468379332, "grad_norm": 0.15234375, "learning_rate": 0.02, "loss": 1.598, "step": 4849 }, { "epoch": 0.44899612335821326, "grad_norm": 0.1435546875, "learning_rate": 0.02, "loss": 1.5791, "step": 4850 }, { "epoch": 0.4490886998784933, "grad_norm": 0.1513671875, "learning_rate": 0.02, "loss": 1.637, "step": 4851 }, { "epoch": 0.44918127639877337, "grad_norm": 0.1435546875, "learning_rate": 0.02, "loss": 1.5911, "step": 4852 }, { "epoch": 0.4492738529190534, "grad_norm": 0.1484375, "learning_rate": 0.02, "loss": 1.6176, "step": 4853 }, { "epoch": 0.44936642943933347, "grad_norm": 0.1337890625, "learning_rate": 0.02, "loss": 1.5884, "step": 4854 }, { "epoch": 0.44945900595961347, "grad_norm": 0.1474609375, "learning_rate": 0.02, "loss": 1.5821, "step": 4855 }, { "epoch": 0.4495515824798935, "grad_norm": 0.1435546875, "learning_rate": 0.02, "loss": 1.5949, "step": 4856 }, { "epoch": 0.44964415900017357, "grad_norm": 0.1484375, "learning_rate": 0.02, "loss": 1.5593, "step": 4857 }, { "epoch": 0.4497367355204536, "grad_norm": 0.1533203125, "learning_rate": 0.02, "loss": 1.597, "step": 4858 }, { "epoch": 0.4498293120407337, "grad_norm": 0.16015625, "learning_rate": 0.02, "loss": 1.6091, "step": 4859 }, { "epoch": 0.4499218885610137, "grad_norm": 0.1494140625, "learning_rate": 0.02, "loss": 1.5986, "step": 4860 }, { "epoch": 0.4500144650812938, "grad_norm": 0.1474609375, "learning_rate": 0.02, "loss": 1.6156, "step": 4861 }, { "epoch": 0.4501070416015738, "grad_norm": 0.150390625, "learning_rate": 0.02, "loss": 1.521, "step": 4862 }, { "epoch": 0.45019961812185383, "grad_norm": 0.1474609375, "learning_rate": 0.02, "loss": 1.5272, "step": 4863 }, { "epoch": 0.4502921946421339, "grad_norm": 0.1474609375, "learning_rate": 0.02, "loss": 1.5673, "step": 4864 }, { "epoch": 0.45038477116241393, "grad_norm": 0.14453125, "learning_rate": 0.02, "loss": 1.5691, "step": 4865 }, { "epoch": 0.450477347682694, "grad_norm": 0.14453125, "learning_rate": 0.02, "loss": 1.5539, "step": 4866 }, { "epoch": 0.45056992420297404, "grad_norm": 0.134765625, "learning_rate": 0.02, "loss": 1.5198, "step": 4867 }, { "epoch": 0.4506625007232541, "grad_norm": 0.1416015625, "learning_rate": 0.02, "loss": 1.5394, "step": 4868 }, { "epoch": 0.4507550772435341, "grad_norm": 0.150390625, "learning_rate": 0.02, "loss": 1.5548, "step": 4869 }, { "epoch": 0.45084765376381414, "grad_norm": 0.1513671875, "learning_rate": 0.02, "loss": 1.5975, "step": 4870 }, { "epoch": 0.4509402302840942, "grad_norm": 0.140625, "learning_rate": 0.02, "loss": 1.5192, "step": 4871 }, { "epoch": 0.45103280680437424, "grad_norm": 0.1455078125, "learning_rate": 0.02, "loss": 1.5534, "step": 4872 }, { "epoch": 0.4511253833246543, "grad_norm": 0.1416015625, "learning_rate": 0.02, "loss": 1.581, "step": 4873 }, { "epoch": 0.45121795984493435, "grad_norm": 0.146484375, "learning_rate": 0.02, "loss": 1.5944, "step": 4874 }, { "epoch": 0.4513105363652144, "grad_norm": 0.140625, "learning_rate": 0.02, "loss": 1.6471, "step": 4875 }, { "epoch": 0.4514031128854944, "grad_norm": 0.146484375, "learning_rate": 0.02, "loss": 1.5724, "step": 4876 }, { "epoch": 0.45149568940577445, "grad_norm": 0.1376953125, "learning_rate": 0.02, "loss": 1.5459, "step": 4877 }, { "epoch": 0.4515882659260545, "grad_norm": 0.140625, "learning_rate": 0.02, "loss": 1.5167, "step": 4878 }, { "epoch": 0.45168084244633455, "grad_norm": 0.134765625, "learning_rate": 0.02, "loss": 1.6038, "step": 4879 }, { "epoch": 0.4517734189666146, "grad_norm": 0.14453125, "learning_rate": 0.02, "loss": 1.5275, "step": 4880 }, { "epoch": 0.45186599548689466, "grad_norm": 0.150390625, "learning_rate": 0.02, "loss": 1.6062, "step": 4881 }, { "epoch": 0.45195857200717465, "grad_norm": 0.1484375, "learning_rate": 0.02, "loss": 1.6199, "step": 4882 }, { "epoch": 0.4520511485274547, "grad_norm": 0.15625, "learning_rate": 0.02, "loss": 1.5715, "step": 4883 }, { "epoch": 0.45214372504773476, "grad_norm": 0.1474609375, "learning_rate": 0.02, "loss": 1.5391, "step": 4884 }, { "epoch": 0.4522363015680148, "grad_norm": 0.146484375, "learning_rate": 0.02, "loss": 1.678, "step": 4885 }, { "epoch": 0.45232887808829486, "grad_norm": 0.1474609375, "learning_rate": 0.02, "loss": 1.5174, "step": 4886 }, { "epoch": 0.4524214546085749, "grad_norm": 0.1513671875, "learning_rate": 0.02, "loss": 1.6053, "step": 4887 }, { "epoch": 0.45251403112885497, "grad_norm": 0.1494140625, "learning_rate": 0.02, "loss": 1.6344, "step": 4888 }, { "epoch": 0.45260660764913496, "grad_norm": 0.1416015625, "learning_rate": 0.02, "loss": 1.5213, "step": 4889 }, { "epoch": 0.452699184169415, "grad_norm": 0.1328125, "learning_rate": 0.02, "loss": 1.6058, "step": 4890 }, { "epoch": 0.45279176068969507, "grad_norm": 0.14453125, "learning_rate": 0.02, "loss": 1.5874, "step": 4891 }, { "epoch": 0.4528843372099751, "grad_norm": 0.150390625, "learning_rate": 0.02, "loss": 1.5516, "step": 4892 }, { "epoch": 0.45297691373025517, "grad_norm": 0.142578125, "learning_rate": 0.02, "loss": 1.5429, "step": 4893 }, { "epoch": 0.4530694902505352, "grad_norm": 0.150390625, "learning_rate": 0.02, "loss": 1.6065, "step": 4894 }, { "epoch": 0.4531620667708153, "grad_norm": 0.146484375, "learning_rate": 0.02, "loss": 1.6063, "step": 4895 }, { "epoch": 0.45325464329109527, "grad_norm": 0.146484375, "learning_rate": 0.02, "loss": 1.6133, "step": 4896 }, { "epoch": 0.4533472198113753, "grad_norm": 0.1484375, "learning_rate": 0.02, "loss": 1.5856, "step": 4897 }, { "epoch": 0.4534397963316554, "grad_norm": 0.1484375, "learning_rate": 0.02, "loss": 1.5771, "step": 4898 }, { "epoch": 0.45353237285193543, "grad_norm": 0.14453125, "learning_rate": 0.02, "loss": 1.5586, "step": 4899 }, { "epoch": 0.4536249493722155, "grad_norm": 0.1416015625, "learning_rate": 0.02, "loss": 1.5948, "step": 4900 }, { "epoch": 0.45371752589249553, "grad_norm": 0.15234375, "learning_rate": 0.02, "loss": 1.535, "step": 4901 }, { "epoch": 0.4538101024127756, "grad_norm": 0.1630859375, "learning_rate": 0.02, "loss": 1.6473, "step": 4902 }, { "epoch": 0.4539026789330556, "grad_norm": 0.1474609375, "learning_rate": 0.02, "loss": 1.5765, "step": 4903 }, { "epoch": 0.45399525545333563, "grad_norm": 0.140625, "learning_rate": 0.02, "loss": 1.552, "step": 4904 }, { "epoch": 0.4540878319736157, "grad_norm": 0.1396484375, "learning_rate": 0.02, "loss": 1.5337, "step": 4905 }, { "epoch": 0.45418040849389574, "grad_norm": 0.13671875, "learning_rate": 0.02, "loss": 1.5063, "step": 4906 }, { "epoch": 0.4542729850141758, "grad_norm": 0.1396484375, "learning_rate": 0.02, "loss": 1.5422, "step": 4907 }, { "epoch": 0.45436556153445584, "grad_norm": 0.134765625, "learning_rate": 0.02, "loss": 1.5495, "step": 4908 }, { "epoch": 0.4544581380547359, "grad_norm": 0.1435546875, "learning_rate": 0.02, "loss": 1.6485, "step": 4909 }, { "epoch": 0.4545507145750159, "grad_norm": 0.1416015625, "learning_rate": 0.02, "loss": 1.6041, "step": 4910 }, { "epoch": 0.45464329109529594, "grad_norm": 0.1318359375, "learning_rate": 0.02, "loss": 1.5321, "step": 4911 }, { "epoch": 0.454735867615576, "grad_norm": 0.1357421875, "learning_rate": 0.02, "loss": 1.545, "step": 4912 }, { "epoch": 0.45482844413585605, "grad_norm": 0.142578125, "learning_rate": 0.02, "loss": 1.6113, "step": 4913 }, { "epoch": 0.4549210206561361, "grad_norm": 0.142578125, "learning_rate": 0.02, "loss": 1.6505, "step": 4914 }, { "epoch": 0.45501359717641615, "grad_norm": 0.130859375, "learning_rate": 0.02, "loss": 1.589, "step": 4915 }, { "epoch": 0.45510617369669615, "grad_norm": 0.130859375, "learning_rate": 0.02, "loss": 1.4747, "step": 4916 }, { "epoch": 0.4551987502169762, "grad_norm": 0.1298828125, "learning_rate": 0.02, "loss": 1.623, "step": 4917 }, { "epoch": 0.45529132673725625, "grad_norm": 0.1376953125, "learning_rate": 0.02, "loss": 1.5873, "step": 4918 }, { "epoch": 0.4553839032575363, "grad_norm": 0.1357421875, "learning_rate": 0.02, "loss": 1.605, "step": 4919 }, { "epoch": 0.45547647977781636, "grad_norm": 0.1416015625, "learning_rate": 0.02, "loss": 1.5821, "step": 4920 }, { "epoch": 0.4555690562980964, "grad_norm": 0.1416015625, "learning_rate": 0.02, "loss": 1.6358, "step": 4921 }, { "epoch": 0.45566163281837646, "grad_norm": 0.1396484375, "learning_rate": 0.02, "loss": 1.5615, "step": 4922 }, { "epoch": 0.45575420933865646, "grad_norm": 0.1455078125, "learning_rate": 0.02, "loss": 1.5608, "step": 4923 }, { "epoch": 0.4558467858589365, "grad_norm": 0.1416015625, "learning_rate": 0.02, "loss": 1.5909, "step": 4924 }, { "epoch": 0.45593936237921656, "grad_norm": 0.1494140625, "learning_rate": 0.02, "loss": 1.6073, "step": 4925 }, { "epoch": 0.4560319388994966, "grad_norm": 0.1435546875, "learning_rate": 0.02, "loss": 1.6187, "step": 4926 }, { "epoch": 0.45612451541977667, "grad_norm": 0.150390625, "learning_rate": 0.02, "loss": 1.6156, "step": 4927 }, { "epoch": 0.4562170919400567, "grad_norm": 0.1455078125, "learning_rate": 0.02, "loss": 1.5838, "step": 4928 }, { "epoch": 0.45630966846033677, "grad_norm": 0.14453125, "learning_rate": 0.02, "loss": 1.5458, "step": 4929 }, { "epoch": 0.45640224498061677, "grad_norm": 0.1494140625, "learning_rate": 0.02, "loss": 1.6099, "step": 4930 }, { "epoch": 0.4564948215008968, "grad_norm": 0.1591796875, "learning_rate": 0.02, "loss": 1.5513, "step": 4931 }, { "epoch": 0.45658739802117687, "grad_norm": 0.150390625, "learning_rate": 0.02, "loss": 1.654, "step": 4932 }, { "epoch": 0.4566799745414569, "grad_norm": 0.15625, "learning_rate": 0.02, "loss": 1.5393, "step": 4933 }, { "epoch": 0.456772551061737, "grad_norm": 0.1474609375, "learning_rate": 0.02, "loss": 1.5586, "step": 4934 }, { "epoch": 0.456865127582017, "grad_norm": 0.1533203125, "learning_rate": 0.02, "loss": 1.604, "step": 4935 }, { "epoch": 0.4569577041022971, "grad_norm": 0.1611328125, "learning_rate": 0.02, "loss": 1.5997, "step": 4936 }, { "epoch": 0.4570502806225771, "grad_norm": 0.1494140625, "learning_rate": 0.02, "loss": 1.5936, "step": 4937 }, { "epoch": 0.45714285714285713, "grad_norm": 0.1484375, "learning_rate": 0.02, "loss": 1.529, "step": 4938 }, { "epoch": 0.4572354336631372, "grad_norm": 0.1357421875, "learning_rate": 0.02, "loss": 1.6017, "step": 4939 }, { "epoch": 0.45732801018341723, "grad_norm": 0.1474609375, "learning_rate": 0.02, "loss": 1.5797, "step": 4940 }, { "epoch": 0.4574205867036973, "grad_norm": 0.14453125, "learning_rate": 0.02, "loss": 1.6091, "step": 4941 }, { "epoch": 0.45751316322397734, "grad_norm": 0.1484375, "learning_rate": 0.02, "loss": 1.5932, "step": 4942 }, { "epoch": 0.4576057397442574, "grad_norm": 0.14453125, "learning_rate": 0.02, "loss": 1.5603, "step": 4943 }, { "epoch": 0.4576983162645374, "grad_norm": 0.1474609375, "learning_rate": 0.02, "loss": 1.5473, "step": 4944 }, { "epoch": 0.45779089278481744, "grad_norm": 0.134765625, "learning_rate": 0.02, "loss": 1.5749, "step": 4945 }, { "epoch": 0.4578834693050975, "grad_norm": 0.1513671875, "learning_rate": 0.02, "loss": 1.5967, "step": 4946 }, { "epoch": 0.45797604582537754, "grad_norm": 0.1435546875, "learning_rate": 0.02, "loss": 1.6556, "step": 4947 }, { "epoch": 0.4580686223456576, "grad_norm": 0.15625, "learning_rate": 0.02, "loss": 1.5833, "step": 4948 }, { "epoch": 0.45816119886593765, "grad_norm": 0.1435546875, "learning_rate": 0.02, "loss": 1.5878, "step": 4949 }, { "epoch": 0.45825377538621764, "grad_norm": 0.15625, "learning_rate": 0.02, "loss": 1.6003, "step": 4950 }, { "epoch": 0.4583463519064977, "grad_norm": 0.1484375, "learning_rate": 0.02, "loss": 1.5656, "step": 4951 }, { "epoch": 0.45843892842677775, "grad_norm": 0.166015625, "learning_rate": 0.02, "loss": 1.6149, "step": 4952 }, { "epoch": 0.4585315049470578, "grad_norm": 0.142578125, "learning_rate": 0.02, "loss": 1.5574, "step": 4953 }, { "epoch": 0.45862408146733785, "grad_norm": 0.1552734375, "learning_rate": 0.02, "loss": 1.5644, "step": 4954 }, { "epoch": 0.4587166579876179, "grad_norm": 0.1416015625, "learning_rate": 0.02, "loss": 1.5955, "step": 4955 }, { "epoch": 0.45880923450789796, "grad_norm": 0.1435546875, "learning_rate": 0.02, "loss": 1.5727, "step": 4956 }, { "epoch": 0.45890181102817795, "grad_norm": 0.15234375, "learning_rate": 0.02, "loss": 1.561, "step": 4957 }, { "epoch": 0.458994387548458, "grad_norm": 0.154296875, "learning_rate": 0.02, "loss": 1.5827, "step": 4958 }, { "epoch": 0.45908696406873806, "grad_norm": 0.14453125, "learning_rate": 0.02, "loss": 1.6292, "step": 4959 }, { "epoch": 0.4591795405890181, "grad_norm": 0.1396484375, "learning_rate": 0.02, "loss": 1.5529, "step": 4960 }, { "epoch": 0.45927211710929816, "grad_norm": 0.1416015625, "learning_rate": 0.02, "loss": 1.5898, "step": 4961 }, { "epoch": 0.4593646936295782, "grad_norm": 0.138671875, "learning_rate": 0.02, "loss": 1.593, "step": 4962 }, { "epoch": 0.45945727014985827, "grad_norm": 0.138671875, "learning_rate": 0.02, "loss": 1.5675, "step": 4963 }, { "epoch": 0.45954984667013826, "grad_norm": 0.1435546875, "learning_rate": 0.02, "loss": 1.5739, "step": 4964 }, { "epoch": 0.4596424231904183, "grad_norm": 0.1513671875, "learning_rate": 0.02, "loss": 1.6362, "step": 4965 }, { "epoch": 0.45973499971069837, "grad_norm": 0.142578125, "learning_rate": 0.02, "loss": 1.5963, "step": 4966 }, { "epoch": 0.4598275762309784, "grad_norm": 0.1533203125, "learning_rate": 0.02, "loss": 1.5341, "step": 4967 }, { "epoch": 0.45992015275125847, "grad_norm": 0.1455078125, "learning_rate": 0.02, "loss": 1.5792, "step": 4968 }, { "epoch": 0.4600127292715385, "grad_norm": 0.1513671875, "learning_rate": 0.02, "loss": 1.6138, "step": 4969 }, { "epoch": 0.4601053057918186, "grad_norm": 0.142578125, "learning_rate": 0.02, "loss": 1.6301, "step": 4970 }, { "epoch": 0.46019788231209857, "grad_norm": 0.158203125, "learning_rate": 0.02, "loss": 1.5923, "step": 4971 }, { "epoch": 0.4602904588323786, "grad_norm": 0.158203125, "learning_rate": 0.02, "loss": 1.593, "step": 4972 }, { "epoch": 0.4603830353526587, "grad_norm": 0.14453125, "learning_rate": 0.02, "loss": 1.5555, "step": 4973 }, { "epoch": 0.46047561187293873, "grad_norm": 0.1474609375, "learning_rate": 0.02, "loss": 1.6212, "step": 4974 }, { "epoch": 0.4605681883932188, "grad_norm": 0.1455078125, "learning_rate": 0.02, "loss": 1.5977, "step": 4975 }, { "epoch": 0.46066076491349883, "grad_norm": 0.1572265625, "learning_rate": 0.02, "loss": 1.542, "step": 4976 }, { "epoch": 0.4607533414337789, "grad_norm": 0.1513671875, "learning_rate": 0.02, "loss": 1.5552, "step": 4977 }, { "epoch": 0.4608459179540589, "grad_norm": 0.15234375, "learning_rate": 0.02, "loss": 1.5959, "step": 4978 }, { "epoch": 0.46093849447433893, "grad_norm": 0.1455078125, "learning_rate": 0.02, "loss": 1.6141, "step": 4979 }, { "epoch": 0.461031070994619, "grad_norm": 0.1640625, "learning_rate": 0.02, "loss": 1.6296, "step": 4980 }, { "epoch": 0.46112364751489904, "grad_norm": 0.15625, "learning_rate": 0.02, "loss": 1.647, "step": 4981 }, { "epoch": 0.4612162240351791, "grad_norm": 0.154296875, "learning_rate": 0.02, "loss": 1.5912, "step": 4982 }, { "epoch": 0.46130880055545914, "grad_norm": 0.14453125, "learning_rate": 0.02, "loss": 1.5685, "step": 4983 }, { "epoch": 0.46140137707573914, "grad_norm": 0.1494140625, "learning_rate": 0.02, "loss": 1.5693, "step": 4984 }, { "epoch": 0.4614939535960192, "grad_norm": 0.1455078125, "learning_rate": 0.02, "loss": 1.5598, "step": 4985 }, { "epoch": 0.46158653011629924, "grad_norm": 0.1591796875, "learning_rate": 0.02, "loss": 1.5667, "step": 4986 }, { "epoch": 0.4616791066365793, "grad_norm": 0.13671875, "learning_rate": 0.02, "loss": 1.5461, "step": 4987 }, { "epoch": 0.46177168315685935, "grad_norm": 0.1435546875, "learning_rate": 0.02, "loss": 1.5805, "step": 4988 }, { "epoch": 0.4618642596771394, "grad_norm": 0.140625, "learning_rate": 0.02, "loss": 1.6048, "step": 4989 }, { "epoch": 0.46195683619741945, "grad_norm": 0.1494140625, "learning_rate": 0.02, "loss": 1.5919, "step": 4990 }, { "epoch": 0.46204941271769945, "grad_norm": 0.146484375, "learning_rate": 0.02, "loss": 1.5872, "step": 4991 }, { "epoch": 0.4621419892379795, "grad_norm": 0.150390625, "learning_rate": 0.02, "loss": 1.5659, "step": 4992 }, { "epoch": 0.46223456575825955, "grad_norm": 0.1513671875, "learning_rate": 0.02, "loss": 1.594, "step": 4993 }, { "epoch": 0.4623271422785396, "grad_norm": 0.1484375, "learning_rate": 0.02, "loss": 1.6401, "step": 4994 }, { "epoch": 0.46241971879881966, "grad_norm": 0.1416015625, "learning_rate": 0.02, "loss": 1.6329, "step": 4995 }, { "epoch": 0.4625122953190997, "grad_norm": 0.1533203125, "learning_rate": 0.02, "loss": 1.6339, "step": 4996 }, { "epoch": 0.46260487183937976, "grad_norm": 0.1416015625, "learning_rate": 0.02, "loss": 1.5022, "step": 4997 }, { "epoch": 0.46269744835965976, "grad_norm": 0.1494140625, "learning_rate": 0.02, "loss": 1.63, "step": 4998 }, { "epoch": 0.4627900248799398, "grad_norm": 0.142578125, "learning_rate": 0.02, "loss": 1.537, "step": 4999 }, { "epoch": 0.46288260140021986, "grad_norm": 0.1455078125, "learning_rate": 0.02, "loss": 1.6122, "step": 5000 }, { "epoch": 0.4629751779204999, "grad_norm": 0.14453125, "learning_rate": 0.02, "loss": 1.6579, "step": 5001 }, { "epoch": 0.46306775444077997, "grad_norm": 0.1455078125, "learning_rate": 0.02, "loss": 1.5961, "step": 5002 }, { "epoch": 0.46316033096106, "grad_norm": 0.1474609375, "learning_rate": 0.02, "loss": 1.5256, "step": 5003 }, { "epoch": 0.46325290748134007, "grad_norm": 0.1513671875, "learning_rate": 0.02, "loss": 1.5616, "step": 5004 }, { "epoch": 0.46334548400162007, "grad_norm": 0.142578125, "learning_rate": 0.02, "loss": 1.5908, "step": 5005 }, { "epoch": 0.4634380605219001, "grad_norm": 0.15234375, "learning_rate": 0.02, "loss": 1.6272, "step": 5006 }, { "epoch": 0.46353063704218017, "grad_norm": 0.146484375, "learning_rate": 0.02, "loss": 1.5789, "step": 5007 }, { "epoch": 0.4636232135624602, "grad_norm": 0.1455078125, "learning_rate": 0.02, "loss": 1.6178, "step": 5008 }, { "epoch": 0.4637157900827403, "grad_norm": 0.140625, "learning_rate": 0.02, "loss": 1.6064, "step": 5009 }, { "epoch": 0.46380836660302033, "grad_norm": 0.140625, "learning_rate": 0.02, "loss": 1.5885, "step": 5010 }, { "epoch": 0.4639009431233004, "grad_norm": 0.1552734375, "learning_rate": 0.02, "loss": 1.5668, "step": 5011 }, { "epoch": 0.4639935196435804, "grad_norm": 0.1455078125, "learning_rate": 0.02, "loss": 1.5139, "step": 5012 }, { "epoch": 0.46408609616386043, "grad_norm": 0.14453125, "learning_rate": 0.02, "loss": 1.603, "step": 5013 }, { "epoch": 0.4641786726841405, "grad_norm": 0.1513671875, "learning_rate": 0.02, "loss": 1.5632, "step": 5014 }, { "epoch": 0.46427124920442053, "grad_norm": 0.1455078125, "learning_rate": 0.02, "loss": 1.5896, "step": 5015 }, { "epoch": 0.4643638257247006, "grad_norm": 0.1376953125, "learning_rate": 0.02, "loss": 1.6015, "step": 5016 }, { "epoch": 0.46445640224498064, "grad_norm": 0.1416015625, "learning_rate": 0.02, "loss": 1.6165, "step": 5017 }, { "epoch": 0.46454897876526063, "grad_norm": 0.14453125, "learning_rate": 0.02, "loss": 1.6327, "step": 5018 }, { "epoch": 0.4646415552855407, "grad_norm": 0.1376953125, "learning_rate": 0.02, "loss": 1.587, "step": 5019 }, { "epoch": 0.46473413180582074, "grad_norm": 0.142578125, "learning_rate": 0.02, "loss": 1.626, "step": 5020 }, { "epoch": 0.4648267083261008, "grad_norm": 0.1484375, "learning_rate": 0.02, "loss": 1.6121, "step": 5021 }, { "epoch": 0.46491928484638084, "grad_norm": 0.142578125, "learning_rate": 0.02, "loss": 1.5399, "step": 5022 }, { "epoch": 0.4650118613666609, "grad_norm": 0.1474609375, "learning_rate": 0.02, "loss": 1.6237, "step": 5023 }, { "epoch": 0.46510443788694095, "grad_norm": 0.150390625, "learning_rate": 0.02, "loss": 1.5081, "step": 5024 }, { "epoch": 0.46519701440722094, "grad_norm": 0.1435546875, "learning_rate": 0.02, "loss": 1.5853, "step": 5025 }, { "epoch": 0.465289590927501, "grad_norm": 0.1513671875, "learning_rate": 0.02, "loss": 1.6273, "step": 5026 }, { "epoch": 0.46538216744778105, "grad_norm": 0.154296875, "learning_rate": 0.02, "loss": 1.6433, "step": 5027 }, { "epoch": 0.4654747439680611, "grad_norm": 0.154296875, "learning_rate": 0.02, "loss": 1.5992, "step": 5028 }, { "epoch": 0.46556732048834115, "grad_norm": 0.1591796875, "learning_rate": 0.02, "loss": 1.5682, "step": 5029 }, { "epoch": 0.4656598970086212, "grad_norm": 0.1533203125, "learning_rate": 0.02, "loss": 1.5806, "step": 5030 }, { "epoch": 0.46575247352890126, "grad_norm": 0.150390625, "learning_rate": 0.02, "loss": 1.5322, "step": 5031 }, { "epoch": 0.46584505004918125, "grad_norm": 0.1552734375, "learning_rate": 0.02, "loss": 1.5194, "step": 5032 }, { "epoch": 0.4659376265694613, "grad_norm": 0.1552734375, "learning_rate": 0.02, "loss": 1.6416, "step": 5033 }, { "epoch": 0.46603020308974136, "grad_norm": 0.15625, "learning_rate": 0.02, "loss": 1.613, "step": 5034 }, { "epoch": 0.4661227796100214, "grad_norm": 0.1494140625, "learning_rate": 0.02, "loss": 1.5531, "step": 5035 }, { "epoch": 0.46621535613030146, "grad_norm": 0.146484375, "learning_rate": 0.02, "loss": 1.4898, "step": 5036 }, { "epoch": 0.4663079326505815, "grad_norm": 0.1484375, "learning_rate": 0.02, "loss": 1.6057, "step": 5037 }, { "epoch": 0.46640050917086157, "grad_norm": 0.1474609375, "learning_rate": 0.02, "loss": 1.5574, "step": 5038 }, { "epoch": 0.46649308569114156, "grad_norm": 0.1455078125, "learning_rate": 0.02, "loss": 1.6665, "step": 5039 }, { "epoch": 0.4665856622114216, "grad_norm": 0.1533203125, "learning_rate": 0.02, "loss": 1.5461, "step": 5040 }, { "epoch": 0.46667823873170167, "grad_norm": 0.150390625, "learning_rate": 0.02, "loss": 1.614, "step": 5041 }, { "epoch": 0.4667708152519817, "grad_norm": 0.1494140625, "learning_rate": 0.02, "loss": 1.5602, "step": 5042 }, { "epoch": 0.46686339177226177, "grad_norm": 0.162109375, "learning_rate": 0.02, "loss": 1.6208, "step": 5043 }, { "epoch": 0.4669559682925418, "grad_norm": 0.15234375, "learning_rate": 0.02, "loss": 1.6067, "step": 5044 }, { "epoch": 0.4670485448128219, "grad_norm": 0.16015625, "learning_rate": 0.02, "loss": 1.5679, "step": 5045 }, { "epoch": 0.46714112133310187, "grad_norm": 0.14453125, "learning_rate": 0.02, "loss": 1.606, "step": 5046 }, { "epoch": 0.4672336978533819, "grad_norm": 0.1513671875, "learning_rate": 0.02, "loss": 1.607, "step": 5047 }, { "epoch": 0.467326274373662, "grad_norm": 0.1630859375, "learning_rate": 0.02, "loss": 1.5772, "step": 5048 }, { "epoch": 0.46741885089394203, "grad_norm": 0.1630859375, "learning_rate": 0.02, "loss": 1.6084, "step": 5049 }, { "epoch": 0.4675114274142221, "grad_norm": 0.1552734375, "learning_rate": 0.02, "loss": 1.5495, "step": 5050 }, { "epoch": 0.46760400393450213, "grad_norm": 0.150390625, "learning_rate": 0.02, "loss": 1.5244, "step": 5051 }, { "epoch": 0.46769658045478213, "grad_norm": 0.14453125, "learning_rate": 0.02, "loss": 1.5103, "step": 5052 }, { "epoch": 0.4677891569750622, "grad_norm": 0.1552734375, "learning_rate": 0.02, "loss": 1.661, "step": 5053 }, { "epoch": 0.46788173349534223, "grad_norm": 0.1455078125, "learning_rate": 0.02, "loss": 1.5923, "step": 5054 }, { "epoch": 0.4679743100156223, "grad_norm": 0.1494140625, "learning_rate": 0.02, "loss": 1.5992, "step": 5055 }, { "epoch": 0.46806688653590234, "grad_norm": 0.154296875, "learning_rate": 0.02, "loss": 1.5888, "step": 5056 }, { "epoch": 0.4681594630561824, "grad_norm": 0.154296875, "learning_rate": 0.02, "loss": 1.6233, "step": 5057 }, { "epoch": 0.46825203957646244, "grad_norm": 0.15234375, "learning_rate": 0.02, "loss": 1.5452, "step": 5058 }, { "epoch": 0.46834461609674244, "grad_norm": 0.1611328125, "learning_rate": 0.02, "loss": 1.6027, "step": 5059 }, { "epoch": 0.4684371926170225, "grad_norm": 0.158203125, "learning_rate": 0.02, "loss": 1.6228, "step": 5060 }, { "epoch": 0.46852976913730254, "grad_norm": 0.142578125, "learning_rate": 0.02, "loss": 1.519, "step": 5061 }, { "epoch": 0.4686223456575826, "grad_norm": 0.146484375, "learning_rate": 0.02, "loss": 1.5966, "step": 5062 }, { "epoch": 0.46871492217786265, "grad_norm": 0.1474609375, "learning_rate": 0.02, "loss": 1.5847, "step": 5063 }, { "epoch": 0.4688074986981427, "grad_norm": 0.1435546875, "learning_rate": 0.02, "loss": 1.6143, "step": 5064 }, { "epoch": 0.46890007521842275, "grad_norm": 0.142578125, "learning_rate": 0.02, "loss": 1.5503, "step": 5065 }, { "epoch": 0.46899265173870275, "grad_norm": 0.1494140625, "learning_rate": 0.02, "loss": 1.5431, "step": 5066 }, { "epoch": 0.4690852282589828, "grad_norm": 0.1435546875, "learning_rate": 0.02, "loss": 1.579, "step": 5067 }, { "epoch": 0.46917780477926285, "grad_norm": 0.146484375, "learning_rate": 0.02, "loss": 1.5345, "step": 5068 }, { "epoch": 0.4692703812995429, "grad_norm": 0.14453125, "learning_rate": 0.02, "loss": 1.561, "step": 5069 }, { "epoch": 0.46936295781982296, "grad_norm": 0.1630859375, "learning_rate": 0.02, "loss": 1.613, "step": 5070 }, { "epoch": 0.469455534340103, "grad_norm": 0.154296875, "learning_rate": 0.02, "loss": 1.5814, "step": 5071 }, { "epoch": 0.46954811086038306, "grad_norm": 0.15234375, "learning_rate": 0.02, "loss": 1.5992, "step": 5072 }, { "epoch": 0.46964068738066306, "grad_norm": 0.150390625, "learning_rate": 0.02, "loss": 1.5806, "step": 5073 }, { "epoch": 0.4697332639009431, "grad_norm": 0.1455078125, "learning_rate": 0.02, "loss": 1.5307, "step": 5074 }, { "epoch": 0.46982584042122316, "grad_norm": 0.140625, "learning_rate": 0.02, "loss": 1.597, "step": 5075 }, { "epoch": 0.4699184169415032, "grad_norm": 0.1474609375, "learning_rate": 0.02, "loss": 1.5508, "step": 5076 }, { "epoch": 0.47001099346178327, "grad_norm": 0.146484375, "learning_rate": 0.02, "loss": 1.5679, "step": 5077 }, { "epoch": 0.4701035699820633, "grad_norm": 0.1572265625, "learning_rate": 0.02, "loss": 1.64, "step": 5078 }, { "epoch": 0.47019614650234337, "grad_norm": 0.140625, "learning_rate": 0.02, "loss": 1.556, "step": 5079 }, { "epoch": 0.47028872302262337, "grad_norm": 0.154296875, "learning_rate": 0.02, "loss": 1.6323, "step": 5080 }, { "epoch": 0.4703812995429034, "grad_norm": 0.15625, "learning_rate": 0.02, "loss": 1.6241, "step": 5081 }, { "epoch": 0.47047387606318347, "grad_norm": 0.1650390625, "learning_rate": 0.02, "loss": 1.6182, "step": 5082 }, { "epoch": 0.4705664525834635, "grad_norm": 0.150390625, "learning_rate": 0.02, "loss": 1.5931, "step": 5083 }, { "epoch": 0.4706590291037436, "grad_norm": 0.1552734375, "learning_rate": 0.02, "loss": 1.5805, "step": 5084 }, { "epoch": 0.47075160562402363, "grad_norm": 0.1474609375, "learning_rate": 0.02, "loss": 1.633, "step": 5085 }, { "epoch": 0.4708441821443036, "grad_norm": 0.171875, "learning_rate": 0.02, "loss": 1.5754, "step": 5086 }, { "epoch": 0.4709367586645837, "grad_norm": 0.1474609375, "learning_rate": 0.02, "loss": 1.5537, "step": 5087 }, { "epoch": 0.47102933518486373, "grad_norm": 0.1552734375, "learning_rate": 0.02, "loss": 1.5757, "step": 5088 }, { "epoch": 0.4711219117051438, "grad_norm": 0.1513671875, "learning_rate": 0.02, "loss": 1.6048, "step": 5089 }, { "epoch": 0.47121448822542383, "grad_norm": 0.1396484375, "learning_rate": 0.02, "loss": 1.5495, "step": 5090 }, { "epoch": 0.4713070647457039, "grad_norm": 0.1494140625, "learning_rate": 0.02, "loss": 1.6546, "step": 5091 }, { "epoch": 0.47139964126598394, "grad_norm": 0.1376953125, "learning_rate": 0.02, "loss": 1.5512, "step": 5092 }, { "epoch": 0.47149221778626393, "grad_norm": 0.15234375, "learning_rate": 0.02, "loss": 1.5345, "step": 5093 }, { "epoch": 0.471584794306544, "grad_norm": 0.1455078125, "learning_rate": 0.02, "loss": 1.586, "step": 5094 }, { "epoch": 0.47167737082682404, "grad_norm": 0.1435546875, "learning_rate": 0.02, "loss": 1.5092, "step": 5095 }, { "epoch": 0.4717699473471041, "grad_norm": 0.1298828125, "learning_rate": 0.02, "loss": 1.5619, "step": 5096 }, { "epoch": 0.47186252386738414, "grad_norm": 0.134765625, "learning_rate": 0.02, "loss": 1.53, "step": 5097 }, { "epoch": 0.4719551003876642, "grad_norm": 0.1357421875, "learning_rate": 0.02, "loss": 1.5472, "step": 5098 }, { "epoch": 0.47204767690794425, "grad_norm": 0.1513671875, "learning_rate": 0.02, "loss": 1.5747, "step": 5099 }, { "epoch": 0.47214025342822424, "grad_norm": 0.1396484375, "learning_rate": 0.02, "loss": 1.5923, "step": 5100 }, { "epoch": 0.4722328299485043, "grad_norm": 0.1474609375, "learning_rate": 0.02, "loss": 1.5399, "step": 5101 }, { "epoch": 0.47232540646878435, "grad_norm": 0.1513671875, "learning_rate": 0.02, "loss": 1.506, "step": 5102 }, { "epoch": 0.4724179829890644, "grad_norm": 0.1533203125, "learning_rate": 0.02, "loss": 1.565, "step": 5103 }, { "epoch": 0.47251055950934445, "grad_norm": 0.150390625, "learning_rate": 0.02, "loss": 1.5535, "step": 5104 }, { "epoch": 0.4726031360296245, "grad_norm": 0.158203125, "learning_rate": 0.02, "loss": 1.5861, "step": 5105 }, { "epoch": 0.47269571254990456, "grad_norm": 0.162109375, "learning_rate": 0.02, "loss": 1.6176, "step": 5106 }, { "epoch": 0.47278828907018455, "grad_norm": 0.1630859375, "learning_rate": 0.02, "loss": 1.6228, "step": 5107 }, { "epoch": 0.4728808655904646, "grad_norm": 0.162109375, "learning_rate": 0.02, "loss": 1.5279, "step": 5108 }, { "epoch": 0.47297344211074466, "grad_norm": 0.1416015625, "learning_rate": 0.02, "loss": 1.5505, "step": 5109 }, { "epoch": 0.4730660186310247, "grad_norm": 0.1552734375, "learning_rate": 0.02, "loss": 1.5624, "step": 5110 }, { "epoch": 0.47315859515130476, "grad_norm": 0.1572265625, "learning_rate": 0.02, "loss": 1.6392, "step": 5111 }, { "epoch": 0.4732511716715848, "grad_norm": 0.1533203125, "learning_rate": 0.02, "loss": 1.5223, "step": 5112 }, { "epoch": 0.47334374819186487, "grad_norm": 0.1435546875, "learning_rate": 0.02, "loss": 1.5969, "step": 5113 }, { "epoch": 0.47343632471214486, "grad_norm": 0.1572265625, "learning_rate": 0.02, "loss": 1.5696, "step": 5114 }, { "epoch": 0.4735289012324249, "grad_norm": 0.1572265625, "learning_rate": 0.02, "loss": 1.5923, "step": 5115 }, { "epoch": 0.47362147775270497, "grad_norm": 0.15234375, "learning_rate": 0.02, "loss": 1.5744, "step": 5116 }, { "epoch": 0.473714054272985, "grad_norm": 0.138671875, "learning_rate": 0.02, "loss": 1.5612, "step": 5117 }, { "epoch": 0.47380663079326507, "grad_norm": 0.15234375, "learning_rate": 0.02, "loss": 1.5646, "step": 5118 }, { "epoch": 0.4738992073135451, "grad_norm": 0.1455078125, "learning_rate": 0.02, "loss": 1.6037, "step": 5119 }, { "epoch": 0.4739917838338251, "grad_norm": 0.138671875, "learning_rate": 0.02, "loss": 1.5591, "step": 5120 }, { "epoch": 0.47408436035410517, "grad_norm": 0.138671875, "learning_rate": 0.02, "loss": 1.5739, "step": 5121 }, { "epoch": 0.4741769368743852, "grad_norm": 0.1416015625, "learning_rate": 0.02, "loss": 1.6316, "step": 5122 }, { "epoch": 0.4742695133946653, "grad_norm": 0.12890625, "learning_rate": 0.02, "loss": 1.4958, "step": 5123 }, { "epoch": 0.47436208991494533, "grad_norm": 0.142578125, "learning_rate": 0.02, "loss": 1.5957, "step": 5124 }, { "epoch": 0.4744546664352254, "grad_norm": 0.1435546875, "learning_rate": 0.02, "loss": 1.5495, "step": 5125 }, { "epoch": 0.47454724295550543, "grad_norm": 0.130859375, "learning_rate": 0.02, "loss": 1.5666, "step": 5126 }, { "epoch": 0.47463981947578543, "grad_norm": 0.14453125, "learning_rate": 0.02, "loss": 1.598, "step": 5127 }, { "epoch": 0.4747323959960655, "grad_norm": 0.134765625, "learning_rate": 0.02, "loss": 1.6183, "step": 5128 }, { "epoch": 0.47482497251634553, "grad_norm": 0.1416015625, "learning_rate": 0.02, "loss": 1.6022, "step": 5129 }, { "epoch": 0.4749175490366256, "grad_norm": 0.1396484375, "learning_rate": 0.02, "loss": 1.5978, "step": 5130 }, { "epoch": 0.47501012555690564, "grad_norm": 0.1435546875, "learning_rate": 0.02, "loss": 1.5331, "step": 5131 }, { "epoch": 0.4751027020771857, "grad_norm": 0.1494140625, "learning_rate": 0.02, "loss": 1.6638, "step": 5132 }, { "epoch": 0.47519527859746574, "grad_norm": 0.1455078125, "learning_rate": 0.02, "loss": 1.5853, "step": 5133 }, { "epoch": 0.47528785511774574, "grad_norm": 0.1474609375, "learning_rate": 0.02, "loss": 1.5829, "step": 5134 }, { "epoch": 0.4753804316380258, "grad_norm": 0.1494140625, "learning_rate": 0.02, "loss": 1.6233, "step": 5135 }, { "epoch": 0.47547300815830584, "grad_norm": 0.16015625, "learning_rate": 0.02, "loss": 1.5859, "step": 5136 }, { "epoch": 0.4755655846785859, "grad_norm": 0.1572265625, "learning_rate": 0.02, "loss": 1.5846, "step": 5137 }, { "epoch": 0.47565816119886595, "grad_norm": 0.1552734375, "learning_rate": 0.02, "loss": 1.6065, "step": 5138 }, { "epoch": 0.475750737719146, "grad_norm": 0.142578125, "learning_rate": 0.02, "loss": 1.6059, "step": 5139 }, { "epoch": 0.47584331423942605, "grad_norm": 0.1474609375, "learning_rate": 0.02, "loss": 1.6168, "step": 5140 }, { "epoch": 0.47593589075970605, "grad_norm": 0.1552734375, "learning_rate": 0.02, "loss": 1.5622, "step": 5141 }, { "epoch": 0.4760284672799861, "grad_norm": 0.14453125, "learning_rate": 0.02, "loss": 1.5803, "step": 5142 }, { "epoch": 0.47612104380026615, "grad_norm": 0.1572265625, "learning_rate": 0.02, "loss": 1.6031, "step": 5143 }, { "epoch": 0.4762136203205462, "grad_norm": 0.1494140625, "learning_rate": 0.02, "loss": 1.5645, "step": 5144 }, { "epoch": 0.47630619684082626, "grad_norm": 0.1533203125, "learning_rate": 0.02, "loss": 1.6759, "step": 5145 }, { "epoch": 0.4763987733611063, "grad_norm": 0.142578125, "learning_rate": 0.02, "loss": 1.5746, "step": 5146 }, { "epoch": 0.4764913498813863, "grad_norm": 0.1494140625, "learning_rate": 0.02, "loss": 1.537, "step": 5147 }, { "epoch": 0.47658392640166636, "grad_norm": 0.1455078125, "learning_rate": 0.02, "loss": 1.5538, "step": 5148 }, { "epoch": 0.4766765029219464, "grad_norm": 0.158203125, "learning_rate": 0.02, "loss": 1.6009, "step": 5149 }, { "epoch": 0.47676907944222646, "grad_norm": 0.158203125, "learning_rate": 0.02, "loss": 1.57, "step": 5150 }, { "epoch": 0.4768616559625065, "grad_norm": 0.16015625, "learning_rate": 0.02, "loss": 1.5735, "step": 5151 }, { "epoch": 0.47695423248278657, "grad_norm": 0.15234375, "learning_rate": 0.02, "loss": 1.5598, "step": 5152 }, { "epoch": 0.4770468090030666, "grad_norm": 0.1484375, "learning_rate": 0.02, "loss": 1.598, "step": 5153 }, { "epoch": 0.4771393855233466, "grad_norm": 0.146484375, "learning_rate": 0.02, "loss": 1.5372, "step": 5154 }, { "epoch": 0.47723196204362667, "grad_norm": 0.146484375, "learning_rate": 0.02, "loss": 1.5614, "step": 5155 }, { "epoch": 0.4773245385639067, "grad_norm": 0.1494140625, "learning_rate": 0.02, "loss": 1.5483, "step": 5156 }, { "epoch": 0.47741711508418677, "grad_norm": 0.1474609375, "learning_rate": 0.02, "loss": 1.5707, "step": 5157 }, { "epoch": 0.4775096916044668, "grad_norm": 0.142578125, "learning_rate": 0.02, "loss": 1.5155, "step": 5158 }, { "epoch": 0.4776022681247469, "grad_norm": 0.1416015625, "learning_rate": 0.02, "loss": 1.5481, "step": 5159 }, { "epoch": 0.47769484464502693, "grad_norm": 0.1474609375, "learning_rate": 0.02, "loss": 1.631, "step": 5160 }, { "epoch": 0.4777874211653069, "grad_norm": 0.138671875, "learning_rate": 0.02, "loss": 1.5817, "step": 5161 }, { "epoch": 0.477879997685587, "grad_norm": 0.13671875, "learning_rate": 0.02, "loss": 1.5436, "step": 5162 }, { "epoch": 0.47797257420586703, "grad_norm": 0.13671875, "learning_rate": 0.02, "loss": 1.5785, "step": 5163 }, { "epoch": 0.4780651507261471, "grad_norm": 0.142578125, "learning_rate": 0.02, "loss": 1.5564, "step": 5164 }, { "epoch": 0.47815772724642713, "grad_norm": 0.13671875, "learning_rate": 0.02, "loss": 1.6239, "step": 5165 }, { "epoch": 0.4782503037667072, "grad_norm": 0.134765625, "learning_rate": 0.02, "loss": 1.5507, "step": 5166 }, { "epoch": 0.47834288028698724, "grad_norm": 0.138671875, "learning_rate": 0.02, "loss": 1.629, "step": 5167 }, { "epoch": 0.47843545680726723, "grad_norm": 0.1416015625, "learning_rate": 0.02, "loss": 1.5894, "step": 5168 }, { "epoch": 0.4785280333275473, "grad_norm": 0.1416015625, "learning_rate": 0.02, "loss": 1.5949, "step": 5169 }, { "epoch": 0.47862060984782734, "grad_norm": 0.1396484375, "learning_rate": 0.02, "loss": 1.5474, "step": 5170 }, { "epoch": 0.4787131863681074, "grad_norm": 0.140625, "learning_rate": 0.02, "loss": 1.6076, "step": 5171 }, { "epoch": 0.47880576288838744, "grad_norm": 0.154296875, "learning_rate": 0.02, "loss": 1.5487, "step": 5172 }, { "epoch": 0.4788983394086675, "grad_norm": 0.1455078125, "learning_rate": 0.02, "loss": 1.6559, "step": 5173 }, { "epoch": 0.47899091592894755, "grad_norm": 0.1435546875, "learning_rate": 0.02, "loss": 1.613, "step": 5174 }, { "epoch": 0.47908349244922754, "grad_norm": 0.154296875, "learning_rate": 0.02, "loss": 1.5819, "step": 5175 }, { "epoch": 0.4791760689695076, "grad_norm": 0.14453125, "learning_rate": 0.02, "loss": 1.5748, "step": 5176 }, { "epoch": 0.47926864548978765, "grad_norm": 0.1455078125, "learning_rate": 0.02, "loss": 1.5541, "step": 5177 }, { "epoch": 0.4793612220100677, "grad_norm": 0.138671875, "learning_rate": 0.02, "loss": 1.588, "step": 5178 }, { "epoch": 0.47945379853034775, "grad_norm": 0.1513671875, "learning_rate": 0.02, "loss": 1.5873, "step": 5179 }, { "epoch": 0.4795463750506278, "grad_norm": 0.1494140625, "learning_rate": 0.02, "loss": 1.5254, "step": 5180 }, { "epoch": 0.4796389515709078, "grad_norm": 0.15234375, "learning_rate": 0.02, "loss": 1.5601, "step": 5181 }, { "epoch": 0.47973152809118785, "grad_norm": 0.1455078125, "learning_rate": 0.02, "loss": 1.6135, "step": 5182 }, { "epoch": 0.4798241046114679, "grad_norm": 0.150390625, "learning_rate": 0.02, "loss": 1.5853, "step": 5183 }, { "epoch": 0.47991668113174796, "grad_norm": 0.150390625, "learning_rate": 0.02, "loss": 1.6079, "step": 5184 }, { "epoch": 0.480009257652028, "grad_norm": 0.1494140625, "learning_rate": 0.02, "loss": 1.603, "step": 5185 }, { "epoch": 0.48010183417230806, "grad_norm": 0.146484375, "learning_rate": 0.02, "loss": 1.6292, "step": 5186 }, { "epoch": 0.4801944106925881, "grad_norm": 0.1396484375, "learning_rate": 0.02, "loss": 1.4767, "step": 5187 }, { "epoch": 0.4802869872128681, "grad_norm": 0.1357421875, "learning_rate": 0.02, "loss": 1.5956, "step": 5188 }, { "epoch": 0.48037956373314816, "grad_norm": 0.146484375, "learning_rate": 0.02, "loss": 1.5793, "step": 5189 }, { "epoch": 0.4804721402534282, "grad_norm": 0.140625, "learning_rate": 0.02, "loss": 1.6129, "step": 5190 }, { "epoch": 0.48056471677370827, "grad_norm": 0.1474609375, "learning_rate": 0.02, "loss": 1.5421, "step": 5191 }, { "epoch": 0.4806572932939883, "grad_norm": 0.146484375, "learning_rate": 0.02, "loss": 1.5508, "step": 5192 }, { "epoch": 0.48074986981426837, "grad_norm": 0.1533203125, "learning_rate": 0.02, "loss": 1.5944, "step": 5193 }, { "epoch": 0.4808424463345484, "grad_norm": 0.150390625, "learning_rate": 0.02, "loss": 1.5731, "step": 5194 }, { "epoch": 0.4809350228548284, "grad_norm": 0.1533203125, "learning_rate": 0.02, "loss": 1.5523, "step": 5195 }, { "epoch": 0.48102759937510847, "grad_norm": 0.1474609375, "learning_rate": 0.02, "loss": 1.5948, "step": 5196 }, { "epoch": 0.4811201758953885, "grad_norm": 0.1494140625, "learning_rate": 0.02, "loss": 1.5689, "step": 5197 }, { "epoch": 0.4812127524156686, "grad_norm": 0.1552734375, "learning_rate": 0.02, "loss": 1.5545, "step": 5198 }, { "epoch": 0.48130532893594863, "grad_norm": 0.1435546875, "learning_rate": 0.02, "loss": 1.6645, "step": 5199 }, { "epoch": 0.4813979054562287, "grad_norm": 0.146484375, "learning_rate": 0.02, "loss": 1.5953, "step": 5200 }, { "epoch": 0.48149048197650873, "grad_norm": 0.1396484375, "learning_rate": 0.02, "loss": 1.5307, "step": 5201 }, { "epoch": 0.48158305849678873, "grad_norm": 0.1484375, "learning_rate": 0.02, "loss": 1.5539, "step": 5202 }, { "epoch": 0.4816756350170688, "grad_norm": 0.1494140625, "learning_rate": 0.02, "loss": 1.5936, "step": 5203 }, { "epoch": 0.48176821153734883, "grad_norm": 0.15234375, "learning_rate": 0.02, "loss": 1.5998, "step": 5204 }, { "epoch": 0.4818607880576289, "grad_norm": 0.14453125, "learning_rate": 0.02, "loss": 1.5581, "step": 5205 }, { "epoch": 0.48195336457790894, "grad_norm": 0.146484375, "learning_rate": 0.02, "loss": 1.5773, "step": 5206 }, { "epoch": 0.482045941098189, "grad_norm": 0.142578125, "learning_rate": 0.02, "loss": 1.6036, "step": 5207 }, { "epoch": 0.48213851761846904, "grad_norm": 0.1552734375, "learning_rate": 0.02, "loss": 1.5793, "step": 5208 }, { "epoch": 0.48223109413874904, "grad_norm": 0.1455078125, "learning_rate": 0.02, "loss": 1.6449, "step": 5209 }, { "epoch": 0.4823236706590291, "grad_norm": 0.142578125, "learning_rate": 0.02, "loss": 1.5928, "step": 5210 }, { "epoch": 0.48241624717930914, "grad_norm": 0.1484375, "learning_rate": 0.02, "loss": 1.5432, "step": 5211 }, { "epoch": 0.4825088236995892, "grad_norm": 0.142578125, "learning_rate": 0.02, "loss": 1.6019, "step": 5212 }, { "epoch": 0.48260140021986925, "grad_norm": 0.1533203125, "learning_rate": 0.02, "loss": 1.5829, "step": 5213 }, { "epoch": 0.4826939767401493, "grad_norm": 0.146484375, "learning_rate": 0.02, "loss": 1.5677, "step": 5214 }, { "epoch": 0.4827865532604293, "grad_norm": 0.142578125, "learning_rate": 0.02, "loss": 1.6137, "step": 5215 }, { "epoch": 0.48287912978070935, "grad_norm": 0.1474609375, "learning_rate": 0.02, "loss": 1.5235, "step": 5216 }, { "epoch": 0.4829717063009894, "grad_norm": 0.1591796875, "learning_rate": 0.02, "loss": 1.5868, "step": 5217 }, { "epoch": 0.48306428282126945, "grad_norm": 0.146484375, "learning_rate": 0.02, "loss": 1.5665, "step": 5218 }, { "epoch": 0.4831568593415495, "grad_norm": 0.15234375, "learning_rate": 0.02, "loss": 1.5567, "step": 5219 }, { "epoch": 0.48324943586182956, "grad_norm": 0.1494140625, "learning_rate": 0.02, "loss": 1.5403, "step": 5220 }, { "epoch": 0.4833420123821096, "grad_norm": 0.1484375, "learning_rate": 0.02, "loss": 1.6335, "step": 5221 }, { "epoch": 0.4834345889023896, "grad_norm": 0.154296875, "learning_rate": 0.02, "loss": 1.5777, "step": 5222 }, { "epoch": 0.48352716542266966, "grad_norm": 0.1533203125, "learning_rate": 0.02, "loss": 1.5708, "step": 5223 }, { "epoch": 0.4836197419429497, "grad_norm": 0.142578125, "learning_rate": 0.02, "loss": 1.5493, "step": 5224 }, { "epoch": 0.48371231846322976, "grad_norm": 0.150390625, "learning_rate": 0.02, "loss": 1.6235, "step": 5225 }, { "epoch": 0.4838048949835098, "grad_norm": 0.1416015625, "learning_rate": 0.02, "loss": 1.5845, "step": 5226 }, { "epoch": 0.48389747150378987, "grad_norm": 0.1416015625, "learning_rate": 0.02, "loss": 1.5669, "step": 5227 }, { "epoch": 0.4839900480240699, "grad_norm": 0.15234375, "learning_rate": 0.02, "loss": 1.5014, "step": 5228 }, { "epoch": 0.4840826245443499, "grad_norm": 0.1396484375, "learning_rate": 0.02, "loss": 1.5104, "step": 5229 }, { "epoch": 0.48417520106462997, "grad_norm": 0.138671875, "learning_rate": 0.02, "loss": 1.5567, "step": 5230 }, { "epoch": 0.48426777758491, "grad_norm": 0.1484375, "learning_rate": 0.02, "loss": 1.5419, "step": 5231 }, { "epoch": 0.48436035410519007, "grad_norm": 0.1416015625, "learning_rate": 0.02, "loss": 1.5595, "step": 5232 }, { "epoch": 0.4844529306254701, "grad_norm": 0.154296875, "learning_rate": 0.02, "loss": 1.5907, "step": 5233 }, { "epoch": 0.4845455071457502, "grad_norm": 0.150390625, "learning_rate": 0.02, "loss": 1.6157, "step": 5234 }, { "epoch": 0.48463808366603023, "grad_norm": 0.1572265625, "learning_rate": 0.02, "loss": 1.6121, "step": 5235 }, { "epoch": 0.4847306601863102, "grad_norm": 0.1396484375, "learning_rate": 0.02, "loss": 1.5972, "step": 5236 }, { "epoch": 0.4848232367065903, "grad_norm": 0.15234375, "learning_rate": 0.02, "loss": 1.6024, "step": 5237 }, { "epoch": 0.48491581322687033, "grad_norm": 0.1337890625, "learning_rate": 0.02, "loss": 1.5984, "step": 5238 }, { "epoch": 0.4850083897471504, "grad_norm": 0.14453125, "learning_rate": 0.02, "loss": 1.6026, "step": 5239 }, { "epoch": 0.48510096626743043, "grad_norm": 0.1337890625, "learning_rate": 0.02, "loss": 1.5711, "step": 5240 }, { "epoch": 0.4851935427877105, "grad_norm": 0.1435546875, "learning_rate": 0.02, "loss": 1.5659, "step": 5241 }, { "epoch": 0.48528611930799054, "grad_norm": 0.13671875, "learning_rate": 0.02, "loss": 1.5055, "step": 5242 }, { "epoch": 0.48537869582827053, "grad_norm": 0.1416015625, "learning_rate": 0.02, "loss": 1.5643, "step": 5243 }, { "epoch": 0.4854712723485506, "grad_norm": 0.13671875, "learning_rate": 0.02, "loss": 1.6087, "step": 5244 }, { "epoch": 0.48556384886883064, "grad_norm": 0.158203125, "learning_rate": 0.02, "loss": 1.5798, "step": 5245 }, { "epoch": 0.4856564253891107, "grad_norm": 0.1376953125, "learning_rate": 0.02, "loss": 1.5219, "step": 5246 }, { "epoch": 0.48574900190939074, "grad_norm": 0.1455078125, "learning_rate": 0.02, "loss": 1.6027, "step": 5247 }, { "epoch": 0.4858415784296708, "grad_norm": 0.138671875, "learning_rate": 0.02, "loss": 1.5147, "step": 5248 }, { "epoch": 0.4859341549499508, "grad_norm": 0.150390625, "learning_rate": 0.02, "loss": 1.5634, "step": 5249 }, { "epoch": 0.48602673147023084, "grad_norm": 0.1396484375, "learning_rate": 0.02, "loss": 1.617, "step": 5250 }, { "epoch": 0.4861193079905109, "grad_norm": 0.1484375, "learning_rate": 0.02, "loss": 1.5371, "step": 5251 }, { "epoch": 0.48621188451079095, "grad_norm": 0.142578125, "learning_rate": 0.02, "loss": 1.5322, "step": 5252 }, { "epoch": 0.486304461031071, "grad_norm": 0.1494140625, "learning_rate": 0.02, "loss": 1.6197, "step": 5253 }, { "epoch": 0.48639703755135105, "grad_norm": 0.150390625, "learning_rate": 0.02, "loss": 1.6107, "step": 5254 }, { "epoch": 0.4864896140716311, "grad_norm": 0.154296875, "learning_rate": 0.02, "loss": 1.6126, "step": 5255 }, { "epoch": 0.4865821905919111, "grad_norm": 0.140625, "learning_rate": 0.02, "loss": 1.487, "step": 5256 }, { "epoch": 0.48667476711219115, "grad_norm": 0.15234375, "learning_rate": 0.02, "loss": 1.581, "step": 5257 }, { "epoch": 0.4867673436324712, "grad_norm": 0.1572265625, "learning_rate": 0.02, "loss": 1.5984, "step": 5258 }, { "epoch": 0.48685992015275126, "grad_norm": 0.14453125, "learning_rate": 0.02, "loss": 1.6304, "step": 5259 }, { "epoch": 0.4869524966730313, "grad_norm": 0.1533203125, "learning_rate": 0.02, "loss": 1.5557, "step": 5260 }, { "epoch": 0.48704507319331136, "grad_norm": 0.140625, "learning_rate": 0.02, "loss": 1.5914, "step": 5261 }, { "epoch": 0.4871376497135914, "grad_norm": 0.1416015625, "learning_rate": 0.02, "loss": 1.5066, "step": 5262 }, { "epoch": 0.4872302262338714, "grad_norm": 0.14453125, "learning_rate": 0.02, "loss": 1.5189, "step": 5263 }, { "epoch": 0.48732280275415146, "grad_norm": 0.1484375, "learning_rate": 0.02, "loss": 1.5536, "step": 5264 }, { "epoch": 0.4874153792744315, "grad_norm": 0.14453125, "learning_rate": 0.02, "loss": 1.6115, "step": 5265 }, { "epoch": 0.48750795579471157, "grad_norm": 0.13671875, "learning_rate": 0.02, "loss": 1.5907, "step": 5266 }, { "epoch": 0.4876005323149916, "grad_norm": 0.140625, "learning_rate": 0.02, "loss": 1.5773, "step": 5267 }, { "epoch": 0.48769310883527167, "grad_norm": 0.1455078125, "learning_rate": 0.02, "loss": 1.6089, "step": 5268 }, { "epoch": 0.4877856853555517, "grad_norm": 0.1396484375, "learning_rate": 0.02, "loss": 1.5679, "step": 5269 }, { "epoch": 0.4878782618758317, "grad_norm": 0.1484375, "learning_rate": 0.02, "loss": 1.6113, "step": 5270 }, { "epoch": 0.4879708383961118, "grad_norm": 0.1455078125, "learning_rate": 0.02, "loss": 1.5508, "step": 5271 }, { "epoch": 0.4880634149163918, "grad_norm": 0.1494140625, "learning_rate": 0.02, "loss": 1.5656, "step": 5272 }, { "epoch": 0.4881559914366719, "grad_norm": 0.13671875, "learning_rate": 0.02, "loss": 1.5553, "step": 5273 }, { "epoch": 0.48824856795695193, "grad_norm": 0.138671875, "learning_rate": 0.02, "loss": 1.5922, "step": 5274 }, { "epoch": 0.488341144477232, "grad_norm": 0.1484375, "learning_rate": 0.02, "loss": 1.5054, "step": 5275 }, { "epoch": 0.48843372099751203, "grad_norm": 0.1328125, "learning_rate": 0.02, "loss": 1.5433, "step": 5276 }, { "epoch": 0.48852629751779203, "grad_norm": 0.1455078125, "learning_rate": 0.02, "loss": 1.5845, "step": 5277 }, { "epoch": 0.4886188740380721, "grad_norm": 0.146484375, "learning_rate": 0.02, "loss": 1.5008, "step": 5278 }, { "epoch": 0.48871145055835213, "grad_norm": 0.1455078125, "learning_rate": 0.02, "loss": 1.5799, "step": 5279 }, { "epoch": 0.4888040270786322, "grad_norm": 0.142578125, "learning_rate": 0.02, "loss": 1.5477, "step": 5280 }, { "epoch": 0.48889660359891224, "grad_norm": 0.1494140625, "learning_rate": 0.02, "loss": 1.5784, "step": 5281 }, { "epoch": 0.4889891801191923, "grad_norm": 0.15234375, "learning_rate": 0.02, "loss": 1.5902, "step": 5282 }, { "epoch": 0.4890817566394723, "grad_norm": 0.1552734375, "learning_rate": 0.02, "loss": 1.5798, "step": 5283 }, { "epoch": 0.48917433315975234, "grad_norm": 0.1484375, "learning_rate": 0.02, "loss": 1.563, "step": 5284 }, { "epoch": 0.4892669096800324, "grad_norm": 0.158203125, "learning_rate": 0.02, "loss": 1.5473, "step": 5285 }, { "epoch": 0.48935948620031244, "grad_norm": 0.1533203125, "learning_rate": 0.02, "loss": 1.5577, "step": 5286 }, { "epoch": 0.4894520627205925, "grad_norm": 0.1474609375, "learning_rate": 0.02, "loss": 1.5503, "step": 5287 }, { "epoch": 0.48954463924087255, "grad_norm": 0.1552734375, "learning_rate": 0.02, "loss": 1.5766, "step": 5288 }, { "epoch": 0.4896372157611526, "grad_norm": 0.1591796875, "learning_rate": 0.02, "loss": 1.5315, "step": 5289 }, { "epoch": 0.4897297922814326, "grad_norm": 0.14453125, "learning_rate": 0.02, "loss": 1.5873, "step": 5290 }, { "epoch": 0.48982236880171265, "grad_norm": 0.15625, "learning_rate": 0.02, "loss": 1.6014, "step": 5291 }, { "epoch": 0.4899149453219927, "grad_norm": 0.146484375, "learning_rate": 0.02, "loss": 1.5605, "step": 5292 }, { "epoch": 0.49000752184227275, "grad_norm": 0.1533203125, "learning_rate": 0.02, "loss": 1.5215, "step": 5293 }, { "epoch": 0.4901000983625528, "grad_norm": 0.154296875, "learning_rate": 0.02, "loss": 1.5837, "step": 5294 }, { "epoch": 0.49019267488283286, "grad_norm": 0.15625, "learning_rate": 0.02, "loss": 1.5939, "step": 5295 }, { "epoch": 0.4902852514031129, "grad_norm": 0.1484375, "learning_rate": 0.02, "loss": 1.5319, "step": 5296 }, { "epoch": 0.4903778279233929, "grad_norm": 0.1494140625, "learning_rate": 0.02, "loss": 1.586, "step": 5297 }, { "epoch": 0.49047040444367296, "grad_norm": 0.146484375, "learning_rate": 0.02, "loss": 1.6067, "step": 5298 }, { "epoch": 0.490562980963953, "grad_norm": 0.1435546875, "learning_rate": 0.02, "loss": 1.5843, "step": 5299 }, { "epoch": 0.49065555748423306, "grad_norm": 0.1435546875, "learning_rate": 0.02, "loss": 1.5874, "step": 5300 }, { "epoch": 0.4907481340045131, "grad_norm": 0.142578125, "learning_rate": 0.02, "loss": 1.5642, "step": 5301 }, { "epoch": 0.49084071052479317, "grad_norm": 0.1435546875, "learning_rate": 0.02, "loss": 1.592, "step": 5302 }, { "epoch": 0.4909332870450732, "grad_norm": 0.1494140625, "learning_rate": 0.02, "loss": 1.6081, "step": 5303 }, { "epoch": 0.4910258635653532, "grad_norm": 0.1533203125, "learning_rate": 0.02, "loss": 1.554, "step": 5304 }, { "epoch": 0.49111844008563327, "grad_norm": 0.1396484375, "learning_rate": 0.02, "loss": 1.5694, "step": 5305 }, { "epoch": 0.4912110166059133, "grad_norm": 0.1533203125, "learning_rate": 0.02, "loss": 1.5816, "step": 5306 }, { "epoch": 0.49130359312619337, "grad_norm": 0.150390625, "learning_rate": 0.02, "loss": 1.589, "step": 5307 }, { "epoch": 0.4913961696464734, "grad_norm": 0.146484375, "learning_rate": 0.02, "loss": 1.5347, "step": 5308 }, { "epoch": 0.4914887461667535, "grad_norm": 0.150390625, "learning_rate": 0.02, "loss": 1.486, "step": 5309 }, { "epoch": 0.49158132268703353, "grad_norm": 0.1474609375, "learning_rate": 0.02, "loss": 1.5794, "step": 5310 }, { "epoch": 0.4916738992073135, "grad_norm": 0.1435546875, "learning_rate": 0.02, "loss": 1.623, "step": 5311 }, { "epoch": 0.4917664757275936, "grad_norm": 0.1513671875, "learning_rate": 0.02, "loss": 1.5892, "step": 5312 }, { "epoch": 0.49185905224787363, "grad_norm": 0.140625, "learning_rate": 0.02, "loss": 1.6062, "step": 5313 }, { "epoch": 0.4919516287681537, "grad_norm": 0.1474609375, "learning_rate": 0.02, "loss": 1.5238, "step": 5314 }, { "epoch": 0.49204420528843373, "grad_norm": 0.1396484375, "learning_rate": 0.02, "loss": 1.5722, "step": 5315 }, { "epoch": 0.4921367818087138, "grad_norm": 0.1435546875, "learning_rate": 0.02, "loss": 1.5742, "step": 5316 }, { "epoch": 0.4922293583289938, "grad_norm": 0.1416015625, "learning_rate": 0.02, "loss": 1.5703, "step": 5317 }, { "epoch": 0.49232193484927383, "grad_norm": 0.1376953125, "learning_rate": 0.02, "loss": 1.5686, "step": 5318 }, { "epoch": 0.4924145113695539, "grad_norm": 0.1435546875, "learning_rate": 0.02, "loss": 1.6056, "step": 5319 }, { "epoch": 0.49250708788983394, "grad_norm": 0.1494140625, "learning_rate": 0.02, "loss": 1.5212, "step": 5320 }, { "epoch": 0.492599664410114, "grad_norm": 0.140625, "learning_rate": 0.02, "loss": 1.5276, "step": 5321 }, { "epoch": 0.49269224093039404, "grad_norm": 0.146484375, "learning_rate": 0.02, "loss": 1.5485, "step": 5322 }, { "epoch": 0.4927848174506741, "grad_norm": 0.154296875, "learning_rate": 0.02, "loss": 1.6596, "step": 5323 }, { "epoch": 0.4928773939709541, "grad_norm": 0.1513671875, "learning_rate": 0.02, "loss": 1.6072, "step": 5324 }, { "epoch": 0.49296997049123414, "grad_norm": 0.1513671875, "learning_rate": 0.02, "loss": 1.5931, "step": 5325 }, { "epoch": 0.4930625470115142, "grad_norm": 0.1533203125, "learning_rate": 0.02, "loss": 1.6059, "step": 5326 }, { "epoch": 0.49315512353179425, "grad_norm": 0.1455078125, "learning_rate": 0.02, "loss": 1.5387, "step": 5327 }, { "epoch": 0.4932477000520743, "grad_norm": 0.1318359375, "learning_rate": 0.02, "loss": 1.5824, "step": 5328 }, { "epoch": 0.49334027657235435, "grad_norm": 0.1494140625, "learning_rate": 0.02, "loss": 1.5485, "step": 5329 }, { "epoch": 0.4934328530926344, "grad_norm": 0.146484375, "learning_rate": 0.02, "loss": 1.5829, "step": 5330 }, { "epoch": 0.4935254296129144, "grad_norm": 0.154296875, "learning_rate": 0.02, "loss": 1.6059, "step": 5331 }, { "epoch": 0.49361800613319445, "grad_norm": 0.1552734375, "learning_rate": 0.02, "loss": 1.525, "step": 5332 }, { "epoch": 0.4937105826534745, "grad_norm": 0.1611328125, "learning_rate": 0.02, "loss": 1.5624, "step": 5333 }, { "epoch": 0.49380315917375456, "grad_norm": 0.1474609375, "learning_rate": 0.02, "loss": 1.5449, "step": 5334 }, { "epoch": 0.4938957356940346, "grad_norm": 0.15234375, "learning_rate": 0.02, "loss": 1.5593, "step": 5335 }, { "epoch": 0.49398831221431466, "grad_norm": 0.1494140625, "learning_rate": 0.02, "loss": 1.5092, "step": 5336 }, { "epoch": 0.4940808887345947, "grad_norm": 0.1474609375, "learning_rate": 0.02, "loss": 1.5592, "step": 5337 }, { "epoch": 0.4941734652548747, "grad_norm": 0.146484375, "learning_rate": 0.02, "loss": 1.5401, "step": 5338 }, { "epoch": 0.49426604177515476, "grad_norm": 0.150390625, "learning_rate": 0.02, "loss": 1.6003, "step": 5339 }, { "epoch": 0.4943586182954348, "grad_norm": 0.1435546875, "learning_rate": 0.02, "loss": 1.5927, "step": 5340 }, { "epoch": 0.49445119481571487, "grad_norm": 0.14453125, "learning_rate": 0.02, "loss": 1.5788, "step": 5341 }, { "epoch": 0.4945437713359949, "grad_norm": 0.1484375, "learning_rate": 0.02, "loss": 1.5713, "step": 5342 }, { "epoch": 0.49463634785627497, "grad_norm": 0.15625, "learning_rate": 0.02, "loss": 1.57, "step": 5343 }, { "epoch": 0.494728924376555, "grad_norm": 0.1484375, "learning_rate": 0.02, "loss": 1.5222, "step": 5344 }, { "epoch": 0.494821500896835, "grad_norm": 0.146484375, "learning_rate": 0.02, "loss": 1.5395, "step": 5345 }, { "epoch": 0.4949140774171151, "grad_norm": 0.14453125, "learning_rate": 0.02, "loss": 1.5909, "step": 5346 }, { "epoch": 0.4950066539373951, "grad_norm": 0.1494140625, "learning_rate": 0.02, "loss": 1.5382, "step": 5347 }, { "epoch": 0.4950992304576752, "grad_norm": 0.1416015625, "learning_rate": 0.02, "loss": 1.5869, "step": 5348 }, { "epoch": 0.49519180697795523, "grad_norm": 0.1298828125, "learning_rate": 0.02, "loss": 1.5435, "step": 5349 }, { "epoch": 0.4952843834982353, "grad_norm": 0.1337890625, "learning_rate": 0.02, "loss": 1.5902, "step": 5350 }, { "epoch": 0.4953769600185153, "grad_norm": 0.15234375, "learning_rate": 0.02, "loss": 1.6712, "step": 5351 }, { "epoch": 0.49546953653879533, "grad_norm": 0.15234375, "learning_rate": 0.02, "loss": 1.6068, "step": 5352 }, { "epoch": 0.4955621130590754, "grad_norm": 0.158203125, "learning_rate": 0.02, "loss": 1.5549, "step": 5353 }, { "epoch": 0.49565468957935543, "grad_norm": 0.14453125, "learning_rate": 0.02, "loss": 1.5191, "step": 5354 }, { "epoch": 0.4957472660996355, "grad_norm": 0.1376953125, "learning_rate": 0.02, "loss": 1.5377, "step": 5355 }, { "epoch": 0.49583984261991554, "grad_norm": 0.1484375, "learning_rate": 0.02, "loss": 1.5552, "step": 5356 }, { "epoch": 0.4959324191401956, "grad_norm": 0.1435546875, "learning_rate": 0.02, "loss": 1.5378, "step": 5357 }, { "epoch": 0.4960249956604756, "grad_norm": 0.138671875, "learning_rate": 0.02, "loss": 1.5607, "step": 5358 }, { "epoch": 0.49611757218075564, "grad_norm": 0.1533203125, "learning_rate": 0.02, "loss": 1.595, "step": 5359 }, { "epoch": 0.4962101487010357, "grad_norm": 0.1416015625, "learning_rate": 0.02, "loss": 1.5896, "step": 5360 }, { "epoch": 0.49630272522131574, "grad_norm": 0.1376953125, "learning_rate": 0.02, "loss": 1.5742, "step": 5361 }, { "epoch": 0.4963953017415958, "grad_norm": 0.134765625, "learning_rate": 0.02, "loss": 1.5511, "step": 5362 }, { "epoch": 0.49648787826187585, "grad_norm": 0.1494140625, "learning_rate": 0.02, "loss": 1.6353, "step": 5363 }, { "epoch": 0.4965804547821559, "grad_norm": 0.1484375, "learning_rate": 0.02, "loss": 1.5846, "step": 5364 }, { "epoch": 0.4966730313024359, "grad_norm": 0.1474609375, "learning_rate": 0.02, "loss": 1.5004, "step": 5365 }, { "epoch": 0.49676560782271595, "grad_norm": 0.1494140625, "learning_rate": 0.02, "loss": 1.5388, "step": 5366 }, { "epoch": 0.496858184342996, "grad_norm": 0.1572265625, "learning_rate": 0.02, "loss": 1.6027, "step": 5367 }, { "epoch": 0.49695076086327605, "grad_norm": 0.1533203125, "learning_rate": 0.02, "loss": 1.5493, "step": 5368 }, { "epoch": 0.4970433373835561, "grad_norm": 0.140625, "learning_rate": 0.02, "loss": 1.5243, "step": 5369 }, { "epoch": 0.49713591390383616, "grad_norm": 0.1416015625, "learning_rate": 0.02, "loss": 1.5527, "step": 5370 }, { "epoch": 0.4972284904241162, "grad_norm": 0.14453125, "learning_rate": 0.02, "loss": 1.5678, "step": 5371 }, { "epoch": 0.4973210669443962, "grad_norm": 0.15234375, "learning_rate": 0.02, "loss": 1.5361, "step": 5372 }, { "epoch": 0.49741364346467626, "grad_norm": 0.14453125, "learning_rate": 0.02, "loss": 1.5033, "step": 5373 }, { "epoch": 0.4975062199849563, "grad_norm": 0.142578125, "learning_rate": 0.02, "loss": 1.5937, "step": 5374 }, { "epoch": 0.49759879650523636, "grad_norm": 0.15625, "learning_rate": 0.02, "loss": 1.5392, "step": 5375 }, { "epoch": 0.4976913730255164, "grad_norm": 0.1591796875, "learning_rate": 0.02, "loss": 1.5877, "step": 5376 }, { "epoch": 0.49778394954579647, "grad_norm": 0.1494140625, "learning_rate": 0.02, "loss": 1.5377, "step": 5377 }, { "epoch": 0.4978765260660765, "grad_norm": 0.16015625, "learning_rate": 0.02, "loss": 1.5806, "step": 5378 }, { "epoch": 0.4979691025863565, "grad_norm": 0.1513671875, "learning_rate": 0.02, "loss": 1.5221, "step": 5379 }, { "epoch": 0.49806167910663657, "grad_norm": 0.1484375, "learning_rate": 0.02, "loss": 1.5749, "step": 5380 }, { "epoch": 0.4981542556269166, "grad_norm": 0.1474609375, "learning_rate": 0.02, "loss": 1.5265, "step": 5381 }, { "epoch": 0.49824683214719667, "grad_norm": 0.150390625, "learning_rate": 0.02, "loss": 1.6026, "step": 5382 }, { "epoch": 0.4983394086674767, "grad_norm": 0.1435546875, "learning_rate": 0.02, "loss": 1.5605, "step": 5383 }, { "epoch": 0.4984319851877568, "grad_norm": 0.1494140625, "learning_rate": 0.02, "loss": 1.528, "step": 5384 }, { "epoch": 0.4985245617080368, "grad_norm": 0.1533203125, "learning_rate": 0.02, "loss": 1.5318, "step": 5385 }, { "epoch": 0.4986171382283168, "grad_norm": 0.1630859375, "learning_rate": 0.02, "loss": 1.5616, "step": 5386 }, { "epoch": 0.4987097147485969, "grad_norm": 0.1455078125, "learning_rate": 0.02, "loss": 1.5163, "step": 5387 }, { "epoch": 0.49880229126887693, "grad_norm": 0.1533203125, "learning_rate": 0.02, "loss": 1.5446, "step": 5388 }, { "epoch": 0.498894867789157, "grad_norm": 0.1484375, "learning_rate": 0.02, "loss": 1.563, "step": 5389 }, { "epoch": 0.49898744430943703, "grad_norm": 0.1484375, "learning_rate": 0.02, "loss": 1.5657, "step": 5390 }, { "epoch": 0.4990800208297171, "grad_norm": 0.1376953125, "learning_rate": 0.02, "loss": 1.5155, "step": 5391 }, { "epoch": 0.4991725973499971, "grad_norm": 0.1455078125, "learning_rate": 0.02, "loss": 1.5253, "step": 5392 }, { "epoch": 0.49926517387027713, "grad_norm": 0.1376953125, "learning_rate": 0.02, "loss": 1.559, "step": 5393 }, { "epoch": 0.4993577503905572, "grad_norm": 0.1552734375, "learning_rate": 0.02, "loss": 1.6396, "step": 5394 }, { "epoch": 0.49945032691083724, "grad_norm": 0.154296875, "learning_rate": 0.02, "loss": 1.5363, "step": 5395 }, { "epoch": 0.4995429034311173, "grad_norm": 0.154296875, "learning_rate": 0.02, "loss": 1.5384, "step": 5396 }, { "epoch": 0.49963547995139734, "grad_norm": 0.1455078125, "learning_rate": 0.02, "loss": 1.4743, "step": 5397 }, { "epoch": 0.4997280564716774, "grad_norm": 0.1484375, "learning_rate": 0.02, "loss": 1.6183, "step": 5398 }, { "epoch": 0.4998206329919574, "grad_norm": 0.1513671875, "learning_rate": 0.02, "loss": 1.5957, "step": 5399 }, { "epoch": 0.49991320951223744, "grad_norm": 0.1591796875, "learning_rate": 0.02, "loss": 1.5754, "step": 5400 }, { "epoch": 0.5000057860325176, "grad_norm": 0.15625, "learning_rate": 0.02, "loss": 1.5451, "step": 5401 }, { "epoch": 0.5000983625527975, "grad_norm": 0.15625, "learning_rate": 0.02, "loss": 1.5401, "step": 5402 }, { "epoch": 0.5001909390730775, "grad_norm": 0.15625, "learning_rate": 0.02, "loss": 1.6026, "step": 5403 }, { "epoch": 0.5002835155933576, "grad_norm": 0.1533203125, "learning_rate": 0.02, "loss": 1.5788, "step": 5404 }, { "epoch": 0.5003760921136376, "grad_norm": 0.1435546875, "learning_rate": 0.02, "loss": 1.4953, "step": 5405 }, { "epoch": 0.5004686686339177, "grad_norm": 0.142578125, "learning_rate": 0.02, "loss": 1.5448, "step": 5406 }, { "epoch": 0.5005612451541978, "grad_norm": 0.1396484375, "learning_rate": 0.02, "loss": 1.5784, "step": 5407 }, { "epoch": 0.5006538216744778, "grad_norm": 0.154296875, "learning_rate": 0.02, "loss": 1.5271, "step": 5408 }, { "epoch": 0.5007463981947579, "grad_norm": 0.1416015625, "learning_rate": 0.02, "loss": 1.5274, "step": 5409 }, { "epoch": 0.5008389747150379, "grad_norm": 0.158203125, "learning_rate": 0.02, "loss": 1.6237, "step": 5410 }, { "epoch": 0.500931551235318, "grad_norm": 0.142578125, "learning_rate": 0.02, "loss": 1.5826, "step": 5411 }, { "epoch": 0.501024127755598, "grad_norm": 0.1513671875, "learning_rate": 0.02, "loss": 1.5492, "step": 5412 }, { "epoch": 0.5011167042758781, "grad_norm": 0.1416015625, "learning_rate": 0.02, "loss": 1.6137, "step": 5413 }, { "epoch": 0.5012092807961581, "grad_norm": 0.138671875, "learning_rate": 0.02, "loss": 1.5293, "step": 5414 }, { "epoch": 0.5013018573164382, "grad_norm": 0.1396484375, "learning_rate": 0.02, "loss": 1.5341, "step": 5415 }, { "epoch": 0.5013944338367181, "grad_norm": 0.1435546875, "learning_rate": 0.02, "loss": 1.6153, "step": 5416 }, { "epoch": 0.5014870103569982, "grad_norm": 0.1416015625, "learning_rate": 0.02, "loss": 1.5939, "step": 5417 }, { "epoch": 0.5015795868772782, "grad_norm": 0.146484375, "learning_rate": 0.02, "loss": 1.5577, "step": 5418 }, { "epoch": 0.5016721633975583, "grad_norm": 0.142578125, "learning_rate": 0.02, "loss": 1.5556, "step": 5419 }, { "epoch": 0.5017647399178383, "grad_norm": 0.1474609375, "learning_rate": 0.02, "loss": 1.5994, "step": 5420 }, { "epoch": 0.5018573164381184, "grad_norm": 0.142578125, "learning_rate": 0.02, "loss": 1.5808, "step": 5421 }, { "epoch": 0.5019498929583984, "grad_norm": 0.13671875, "learning_rate": 0.02, "loss": 1.5485, "step": 5422 }, { "epoch": 0.5020424694786785, "grad_norm": 0.14453125, "learning_rate": 0.02, "loss": 1.6195, "step": 5423 }, { "epoch": 0.5021350459989585, "grad_norm": 0.16015625, "learning_rate": 0.02, "loss": 1.6219, "step": 5424 }, { "epoch": 0.5022276225192386, "grad_norm": 0.15234375, "learning_rate": 0.02, "loss": 1.6012, "step": 5425 }, { "epoch": 0.5023201990395186, "grad_norm": 0.1494140625, "learning_rate": 0.02, "loss": 1.6037, "step": 5426 }, { "epoch": 0.5024127755597987, "grad_norm": 0.15625, "learning_rate": 0.02, "loss": 1.5328, "step": 5427 }, { "epoch": 0.5025053520800787, "grad_norm": 0.1484375, "learning_rate": 0.02, "loss": 1.5461, "step": 5428 }, { "epoch": 0.5025979286003587, "grad_norm": 0.142578125, "learning_rate": 0.02, "loss": 1.5378, "step": 5429 }, { "epoch": 0.5026905051206387, "grad_norm": 0.1474609375, "learning_rate": 0.02, "loss": 1.5835, "step": 5430 }, { "epoch": 0.5027830816409188, "grad_norm": 0.1591796875, "learning_rate": 0.02, "loss": 1.6232, "step": 5431 }, { "epoch": 0.5028756581611988, "grad_norm": 0.1484375, "learning_rate": 0.02, "loss": 1.6271, "step": 5432 }, { "epoch": 0.5029682346814789, "grad_norm": 0.1572265625, "learning_rate": 0.02, "loss": 1.6059, "step": 5433 }, { "epoch": 0.5030608112017589, "grad_norm": 0.154296875, "learning_rate": 0.02, "loss": 1.5599, "step": 5434 }, { "epoch": 0.503153387722039, "grad_norm": 0.1513671875, "learning_rate": 0.02, "loss": 1.5803, "step": 5435 }, { "epoch": 0.503245964242319, "grad_norm": 0.1455078125, "learning_rate": 0.02, "loss": 1.5508, "step": 5436 }, { "epoch": 0.5033385407625991, "grad_norm": 0.1435546875, "learning_rate": 0.02, "loss": 1.6099, "step": 5437 }, { "epoch": 0.5034311172828791, "grad_norm": 0.1513671875, "learning_rate": 0.02, "loss": 1.5877, "step": 5438 }, { "epoch": 0.5035236938031592, "grad_norm": 0.1396484375, "learning_rate": 0.02, "loss": 1.508, "step": 5439 }, { "epoch": 0.5036162703234393, "grad_norm": 0.1494140625, "learning_rate": 0.02, "loss": 1.5323, "step": 5440 }, { "epoch": 0.5037088468437193, "grad_norm": 0.14453125, "learning_rate": 0.02, "loss": 1.5173, "step": 5441 }, { "epoch": 0.5038014233639994, "grad_norm": 0.1494140625, "learning_rate": 0.02, "loss": 1.5442, "step": 5442 }, { "epoch": 0.5038939998842793, "grad_norm": 0.1455078125, "learning_rate": 0.02, "loss": 1.6403, "step": 5443 }, { "epoch": 0.5039865764045593, "grad_norm": 0.15625, "learning_rate": 0.02, "loss": 1.6548, "step": 5444 }, { "epoch": 0.5040791529248394, "grad_norm": 0.1416015625, "learning_rate": 0.02, "loss": 1.5687, "step": 5445 }, { "epoch": 0.5041717294451195, "grad_norm": 0.142578125, "learning_rate": 0.02, "loss": 1.5597, "step": 5446 }, { "epoch": 0.5042643059653995, "grad_norm": 0.142578125, "learning_rate": 0.02, "loss": 1.6117, "step": 5447 }, { "epoch": 0.5043568824856796, "grad_norm": 0.1494140625, "learning_rate": 0.02, "loss": 1.5894, "step": 5448 }, { "epoch": 0.5044494590059596, "grad_norm": 0.1435546875, "learning_rate": 0.02, "loss": 1.5926, "step": 5449 }, { "epoch": 0.5045420355262397, "grad_norm": 0.138671875, "learning_rate": 0.02, "loss": 1.5914, "step": 5450 }, { "epoch": 0.5046346120465197, "grad_norm": 0.1396484375, "learning_rate": 0.02, "loss": 1.53, "step": 5451 }, { "epoch": 0.5047271885667998, "grad_norm": 0.1494140625, "learning_rate": 0.02, "loss": 1.6101, "step": 5452 }, { "epoch": 0.5048197650870798, "grad_norm": 0.146484375, "learning_rate": 0.02, "loss": 1.6035, "step": 5453 }, { "epoch": 0.5049123416073599, "grad_norm": 0.1552734375, "learning_rate": 0.02, "loss": 1.6262, "step": 5454 }, { "epoch": 0.5050049181276399, "grad_norm": 0.1455078125, "learning_rate": 0.02, "loss": 1.5474, "step": 5455 }, { "epoch": 0.50509749464792, "grad_norm": 0.1474609375, "learning_rate": 0.02, "loss": 1.5535, "step": 5456 }, { "epoch": 0.5051900711681999, "grad_norm": 0.142578125, "learning_rate": 0.02, "loss": 1.5687, "step": 5457 }, { "epoch": 0.50528264768848, "grad_norm": 0.154296875, "learning_rate": 0.02, "loss": 1.5846, "step": 5458 }, { "epoch": 0.50537522420876, "grad_norm": 0.150390625, "learning_rate": 0.02, "loss": 1.5515, "step": 5459 }, { "epoch": 0.5054678007290401, "grad_norm": 0.1474609375, "learning_rate": 0.02, "loss": 1.5568, "step": 5460 }, { "epoch": 0.5055603772493201, "grad_norm": 0.1357421875, "learning_rate": 0.02, "loss": 1.541, "step": 5461 }, { "epoch": 0.5056529537696002, "grad_norm": 0.150390625, "learning_rate": 0.02, "loss": 1.5909, "step": 5462 }, { "epoch": 0.5057455302898802, "grad_norm": 0.15234375, "learning_rate": 0.02, "loss": 1.5745, "step": 5463 }, { "epoch": 0.5058381068101603, "grad_norm": 0.1552734375, "learning_rate": 0.02, "loss": 1.5156, "step": 5464 }, { "epoch": 0.5059306833304403, "grad_norm": 0.138671875, "learning_rate": 0.02, "loss": 1.5531, "step": 5465 }, { "epoch": 0.5060232598507204, "grad_norm": 0.1552734375, "learning_rate": 0.02, "loss": 1.6237, "step": 5466 }, { "epoch": 0.5061158363710004, "grad_norm": 0.1416015625, "learning_rate": 0.02, "loss": 1.5672, "step": 5467 }, { "epoch": 0.5062084128912805, "grad_norm": 0.154296875, "learning_rate": 0.02, "loss": 1.5292, "step": 5468 }, { "epoch": 0.5063009894115605, "grad_norm": 0.1630859375, "learning_rate": 0.02, "loss": 1.5306, "step": 5469 }, { "epoch": 0.5063935659318405, "grad_norm": 0.1630859375, "learning_rate": 0.02, "loss": 1.5699, "step": 5470 }, { "epoch": 0.5064861424521205, "grad_norm": 0.1513671875, "learning_rate": 0.02, "loss": 1.5358, "step": 5471 }, { "epoch": 0.5065787189724006, "grad_norm": 0.158203125, "learning_rate": 0.02, "loss": 1.643, "step": 5472 }, { "epoch": 0.5066712954926806, "grad_norm": 0.150390625, "learning_rate": 0.02, "loss": 1.6485, "step": 5473 }, { "epoch": 0.5067638720129607, "grad_norm": 0.16015625, "learning_rate": 0.02, "loss": 1.6087, "step": 5474 }, { "epoch": 0.5068564485332407, "grad_norm": 0.150390625, "learning_rate": 0.02, "loss": 1.6516, "step": 5475 }, { "epoch": 0.5069490250535208, "grad_norm": 0.15234375, "learning_rate": 0.02, "loss": 1.5645, "step": 5476 }, { "epoch": 0.5070416015738008, "grad_norm": 0.1494140625, "learning_rate": 0.02, "loss": 1.6008, "step": 5477 }, { "epoch": 0.5071341780940809, "grad_norm": 0.1455078125, "learning_rate": 0.02, "loss": 1.5659, "step": 5478 }, { "epoch": 0.507226754614361, "grad_norm": 0.1455078125, "learning_rate": 0.02, "loss": 1.5733, "step": 5479 }, { "epoch": 0.507319331134641, "grad_norm": 0.1513671875, "learning_rate": 0.02, "loss": 1.5633, "step": 5480 }, { "epoch": 0.5074119076549211, "grad_norm": 0.1533203125, "learning_rate": 0.02, "loss": 1.6251, "step": 5481 }, { "epoch": 0.5075044841752011, "grad_norm": 0.14453125, "learning_rate": 0.02, "loss": 1.5369, "step": 5482 }, { "epoch": 0.5075970606954812, "grad_norm": 0.1435546875, "learning_rate": 0.02, "loss": 1.5577, "step": 5483 }, { "epoch": 0.5076896372157611, "grad_norm": 0.1484375, "learning_rate": 0.02, "loss": 1.583, "step": 5484 }, { "epoch": 0.5077822137360412, "grad_norm": 0.1474609375, "learning_rate": 0.02, "loss": 1.5729, "step": 5485 }, { "epoch": 0.5078747902563212, "grad_norm": 0.1474609375, "learning_rate": 0.02, "loss": 1.619, "step": 5486 }, { "epoch": 0.5079673667766013, "grad_norm": 0.1435546875, "learning_rate": 0.02, "loss": 1.604, "step": 5487 }, { "epoch": 0.5080599432968813, "grad_norm": 0.138671875, "learning_rate": 0.02, "loss": 1.5296, "step": 5488 }, { "epoch": 0.5081525198171614, "grad_norm": 0.1318359375, "learning_rate": 0.02, "loss": 1.5624, "step": 5489 }, { "epoch": 0.5082450963374414, "grad_norm": 0.1494140625, "learning_rate": 0.02, "loss": 1.6056, "step": 5490 }, { "epoch": 0.5083376728577215, "grad_norm": 0.14453125, "learning_rate": 0.02, "loss": 1.5433, "step": 5491 }, { "epoch": 0.5084302493780015, "grad_norm": 0.154296875, "learning_rate": 0.02, "loss": 1.6075, "step": 5492 }, { "epoch": 0.5085228258982816, "grad_norm": 0.142578125, "learning_rate": 0.02, "loss": 1.5074, "step": 5493 }, { "epoch": 0.5086154024185616, "grad_norm": 0.15625, "learning_rate": 0.02, "loss": 1.6281, "step": 5494 }, { "epoch": 0.5087079789388417, "grad_norm": 0.1494140625, "learning_rate": 0.02, "loss": 1.5236, "step": 5495 }, { "epoch": 0.5088005554591217, "grad_norm": 0.1494140625, "learning_rate": 0.02, "loss": 1.5889, "step": 5496 }, { "epoch": 0.5088931319794017, "grad_norm": 0.14453125, "learning_rate": 0.02, "loss": 1.5567, "step": 5497 }, { "epoch": 0.5089857084996817, "grad_norm": 0.1552734375, "learning_rate": 0.02, "loss": 1.446, "step": 5498 }, { "epoch": 0.5090782850199618, "grad_norm": 0.1484375, "learning_rate": 0.02, "loss": 1.5453, "step": 5499 }, { "epoch": 0.5091708615402418, "grad_norm": 0.15625, "learning_rate": 0.02, "loss": 1.61, "step": 5500 }, { "epoch": 0.5092634380605219, "grad_norm": 0.15234375, "learning_rate": 0.02, "loss": 1.6106, "step": 5501 }, { "epoch": 0.5093560145808019, "grad_norm": 0.14453125, "learning_rate": 0.02, "loss": 1.4881, "step": 5502 }, { "epoch": 0.509448591101082, "grad_norm": 0.1494140625, "learning_rate": 0.02, "loss": 1.5881, "step": 5503 }, { "epoch": 0.509541167621362, "grad_norm": 0.146484375, "learning_rate": 0.02, "loss": 1.5341, "step": 5504 }, { "epoch": 0.5096337441416421, "grad_norm": 0.150390625, "learning_rate": 0.02, "loss": 1.5998, "step": 5505 }, { "epoch": 0.5097263206619221, "grad_norm": 0.140625, "learning_rate": 0.02, "loss": 1.5046, "step": 5506 }, { "epoch": 0.5098188971822022, "grad_norm": 0.146484375, "learning_rate": 0.02, "loss": 1.5171, "step": 5507 }, { "epoch": 0.5099114737024822, "grad_norm": 0.154296875, "learning_rate": 0.02, "loss": 1.5175, "step": 5508 }, { "epoch": 0.5100040502227623, "grad_norm": 0.1474609375, "learning_rate": 0.02, "loss": 1.5656, "step": 5509 }, { "epoch": 0.5100966267430423, "grad_norm": 0.154296875, "learning_rate": 0.02, "loss": 1.525, "step": 5510 }, { "epoch": 0.5101892032633223, "grad_norm": 0.1494140625, "learning_rate": 0.02, "loss": 1.5537, "step": 5511 }, { "epoch": 0.5102817797836023, "grad_norm": 0.1455078125, "learning_rate": 0.02, "loss": 1.5825, "step": 5512 }, { "epoch": 0.5103743563038824, "grad_norm": 0.1416015625, "learning_rate": 0.02, "loss": 1.5682, "step": 5513 }, { "epoch": 0.5104669328241624, "grad_norm": 0.1474609375, "learning_rate": 0.02, "loss": 1.6305, "step": 5514 }, { "epoch": 0.5105595093444425, "grad_norm": 0.15625, "learning_rate": 0.02, "loss": 1.6275, "step": 5515 }, { "epoch": 0.5106520858647225, "grad_norm": 0.14453125, "learning_rate": 0.02, "loss": 1.5814, "step": 5516 }, { "epoch": 0.5107446623850026, "grad_norm": 0.150390625, "learning_rate": 0.02, "loss": 1.5702, "step": 5517 }, { "epoch": 0.5108372389052827, "grad_norm": 0.146484375, "learning_rate": 0.02, "loss": 1.6146, "step": 5518 }, { "epoch": 0.5109298154255627, "grad_norm": 0.1513671875, "learning_rate": 0.02, "loss": 1.5855, "step": 5519 }, { "epoch": 0.5110223919458428, "grad_norm": 0.15234375, "learning_rate": 0.02, "loss": 1.6187, "step": 5520 }, { "epoch": 0.5111149684661228, "grad_norm": 0.150390625, "learning_rate": 0.02, "loss": 1.5857, "step": 5521 }, { "epoch": 0.5112075449864029, "grad_norm": 0.15625, "learning_rate": 0.02, "loss": 1.6048, "step": 5522 }, { "epoch": 0.5113001215066829, "grad_norm": 0.1513671875, "learning_rate": 0.02, "loss": 1.5783, "step": 5523 }, { "epoch": 0.511392698026963, "grad_norm": 0.15234375, "learning_rate": 0.02, "loss": 1.5406, "step": 5524 }, { "epoch": 0.5114852745472429, "grad_norm": 0.142578125, "learning_rate": 0.02, "loss": 1.5278, "step": 5525 }, { "epoch": 0.511577851067523, "grad_norm": 0.1533203125, "learning_rate": 0.02, "loss": 1.5315, "step": 5526 }, { "epoch": 0.511670427587803, "grad_norm": 0.1435546875, "learning_rate": 0.02, "loss": 1.4632, "step": 5527 }, { "epoch": 0.5117630041080831, "grad_norm": 0.1494140625, "learning_rate": 0.02, "loss": 1.5531, "step": 5528 }, { "epoch": 0.5118555806283631, "grad_norm": 0.146484375, "learning_rate": 0.02, "loss": 1.5129, "step": 5529 }, { "epoch": 0.5119481571486432, "grad_norm": 0.142578125, "learning_rate": 0.02, "loss": 1.4993, "step": 5530 }, { "epoch": 0.5120407336689232, "grad_norm": 0.1328125, "learning_rate": 0.02, "loss": 1.6121, "step": 5531 }, { "epoch": 0.5121333101892033, "grad_norm": 0.1533203125, "learning_rate": 0.02, "loss": 1.5603, "step": 5532 }, { "epoch": 0.5122258867094833, "grad_norm": 0.15625, "learning_rate": 0.02, "loss": 1.582, "step": 5533 }, { "epoch": 0.5123184632297634, "grad_norm": 0.1513671875, "learning_rate": 0.02, "loss": 1.5415, "step": 5534 }, { "epoch": 0.5124110397500434, "grad_norm": 0.1396484375, "learning_rate": 0.02, "loss": 1.6253, "step": 5535 }, { "epoch": 0.5125036162703235, "grad_norm": 0.146484375, "learning_rate": 0.02, "loss": 1.6506, "step": 5536 }, { "epoch": 0.5125961927906035, "grad_norm": 0.15234375, "learning_rate": 0.02, "loss": 1.5405, "step": 5537 }, { "epoch": 0.5126887693108835, "grad_norm": 0.1484375, "learning_rate": 0.02, "loss": 1.5412, "step": 5538 }, { "epoch": 0.5127813458311635, "grad_norm": 0.1494140625, "learning_rate": 0.02, "loss": 1.5546, "step": 5539 }, { "epoch": 0.5128739223514436, "grad_norm": 0.154296875, "learning_rate": 0.02, "loss": 1.535, "step": 5540 }, { "epoch": 0.5129664988717236, "grad_norm": 0.14453125, "learning_rate": 0.02, "loss": 1.589, "step": 5541 }, { "epoch": 0.5130590753920037, "grad_norm": 0.14453125, "learning_rate": 0.02, "loss": 1.5297, "step": 5542 }, { "epoch": 0.5131516519122837, "grad_norm": 0.142578125, "learning_rate": 0.02, "loss": 1.5781, "step": 5543 }, { "epoch": 0.5132442284325638, "grad_norm": 0.146484375, "learning_rate": 0.02, "loss": 1.5871, "step": 5544 }, { "epoch": 0.5133368049528438, "grad_norm": 0.1357421875, "learning_rate": 0.02, "loss": 1.5262, "step": 5545 }, { "epoch": 0.5134293814731239, "grad_norm": 0.1494140625, "learning_rate": 0.02, "loss": 1.5194, "step": 5546 }, { "epoch": 0.5135219579934039, "grad_norm": 0.12890625, "learning_rate": 0.02, "loss": 1.5894, "step": 5547 }, { "epoch": 0.513614534513684, "grad_norm": 0.1494140625, "learning_rate": 0.02, "loss": 1.5414, "step": 5548 }, { "epoch": 0.513707111033964, "grad_norm": 0.1494140625, "learning_rate": 0.02, "loss": 1.5235, "step": 5549 }, { "epoch": 0.5137996875542441, "grad_norm": 0.150390625, "learning_rate": 0.02, "loss": 1.5574, "step": 5550 }, { "epoch": 0.5138922640745242, "grad_norm": 0.1376953125, "learning_rate": 0.02, "loss": 1.4927, "step": 5551 }, { "epoch": 0.5139848405948041, "grad_norm": 0.138671875, "learning_rate": 0.02, "loss": 1.5426, "step": 5552 }, { "epoch": 0.5140774171150841, "grad_norm": 0.1376953125, "learning_rate": 0.02, "loss": 1.5162, "step": 5553 }, { "epoch": 0.5141699936353642, "grad_norm": 0.15625, "learning_rate": 0.02, "loss": 1.5243, "step": 5554 }, { "epoch": 0.5142625701556442, "grad_norm": 0.1328125, "learning_rate": 0.02, "loss": 1.5388, "step": 5555 }, { "epoch": 0.5143551466759243, "grad_norm": 0.142578125, "learning_rate": 0.02, "loss": 1.5877, "step": 5556 }, { "epoch": 0.5144477231962044, "grad_norm": 0.1376953125, "learning_rate": 0.02, "loss": 1.5154, "step": 5557 }, { "epoch": 0.5145402997164844, "grad_norm": 0.146484375, "learning_rate": 0.02, "loss": 1.5702, "step": 5558 }, { "epoch": 0.5146328762367645, "grad_norm": 0.1357421875, "learning_rate": 0.02, "loss": 1.5694, "step": 5559 }, { "epoch": 0.5147254527570445, "grad_norm": 0.150390625, "learning_rate": 0.02, "loss": 1.6226, "step": 5560 }, { "epoch": 0.5148180292773246, "grad_norm": 0.1455078125, "learning_rate": 0.02, "loss": 1.5816, "step": 5561 }, { "epoch": 0.5149106057976046, "grad_norm": 0.154296875, "learning_rate": 0.02, "loss": 1.5232, "step": 5562 }, { "epoch": 0.5150031823178847, "grad_norm": 0.1416015625, "learning_rate": 0.02, "loss": 1.6383, "step": 5563 }, { "epoch": 0.5150957588381647, "grad_norm": 0.1474609375, "learning_rate": 0.02, "loss": 1.5337, "step": 5564 }, { "epoch": 0.5151883353584447, "grad_norm": 0.1455078125, "learning_rate": 0.02, "loss": 1.6234, "step": 5565 }, { "epoch": 0.5152809118787247, "grad_norm": 0.1455078125, "learning_rate": 0.02, "loss": 1.5013, "step": 5566 }, { "epoch": 0.5153734883990048, "grad_norm": 0.1416015625, "learning_rate": 0.02, "loss": 1.5613, "step": 5567 }, { "epoch": 0.5154660649192848, "grad_norm": 0.1435546875, "learning_rate": 0.02, "loss": 1.5847, "step": 5568 }, { "epoch": 0.5155586414395649, "grad_norm": 0.1455078125, "learning_rate": 0.02, "loss": 1.5715, "step": 5569 }, { "epoch": 0.5156512179598449, "grad_norm": 0.14453125, "learning_rate": 0.02, "loss": 1.6177, "step": 5570 }, { "epoch": 0.515743794480125, "grad_norm": 0.150390625, "learning_rate": 0.02, "loss": 1.539, "step": 5571 }, { "epoch": 0.515836371000405, "grad_norm": 0.1455078125, "learning_rate": 0.02, "loss": 1.5639, "step": 5572 }, { "epoch": 0.5159289475206851, "grad_norm": 0.150390625, "learning_rate": 0.02, "loss": 1.5264, "step": 5573 }, { "epoch": 0.5160215240409651, "grad_norm": 0.1533203125, "learning_rate": 0.02, "loss": 1.564, "step": 5574 }, { "epoch": 0.5161141005612452, "grad_norm": 0.1435546875, "learning_rate": 0.02, "loss": 1.5835, "step": 5575 }, { "epoch": 0.5162066770815252, "grad_norm": 0.1416015625, "learning_rate": 0.02, "loss": 1.5913, "step": 5576 }, { "epoch": 0.5162992536018053, "grad_norm": 0.1494140625, "learning_rate": 0.02, "loss": 1.5821, "step": 5577 }, { "epoch": 0.5163918301220853, "grad_norm": 0.150390625, "learning_rate": 0.02, "loss": 1.5878, "step": 5578 }, { "epoch": 0.5164844066423653, "grad_norm": 0.1572265625, "learning_rate": 0.02, "loss": 1.5942, "step": 5579 }, { "epoch": 0.5165769831626453, "grad_norm": 0.1474609375, "learning_rate": 0.02, "loss": 1.5124, "step": 5580 }, { "epoch": 0.5166695596829254, "grad_norm": 0.1533203125, "learning_rate": 0.02, "loss": 1.6171, "step": 5581 }, { "epoch": 0.5167621362032054, "grad_norm": 0.1474609375, "learning_rate": 0.02, "loss": 1.5279, "step": 5582 }, { "epoch": 0.5168547127234855, "grad_norm": 0.146484375, "learning_rate": 0.02, "loss": 1.4688, "step": 5583 }, { "epoch": 0.5169472892437655, "grad_norm": 0.1474609375, "learning_rate": 0.02, "loss": 1.5892, "step": 5584 }, { "epoch": 0.5170398657640456, "grad_norm": 0.1591796875, "learning_rate": 0.02, "loss": 1.5418, "step": 5585 }, { "epoch": 0.5171324422843256, "grad_norm": 0.142578125, "learning_rate": 0.02, "loss": 1.535, "step": 5586 }, { "epoch": 0.5172250188046057, "grad_norm": 0.1474609375, "learning_rate": 0.02, "loss": 1.5462, "step": 5587 }, { "epoch": 0.5173175953248857, "grad_norm": 0.142578125, "learning_rate": 0.02, "loss": 1.5502, "step": 5588 }, { "epoch": 0.5174101718451658, "grad_norm": 0.1474609375, "learning_rate": 0.02, "loss": 1.5061, "step": 5589 }, { "epoch": 0.5175027483654459, "grad_norm": 0.146484375, "learning_rate": 0.02, "loss": 1.5245, "step": 5590 }, { "epoch": 0.5175953248857259, "grad_norm": 0.150390625, "learning_rate": 0.02, "loss": 1.5777, "step": 5591 }, { "epoch": 0.517687901406006, "grad_norm": 0.1572265625, "learning_rate": 0.02, "loss": 1.5995, "step": 5592 }, { "epoch": 0.5177804779262859, "grad_norm": 0.15625, "learning_rate": 0.02, "loss": 1.542, "step": 5593 }, { "epoch": 0.517873054446566, "grad_norm": 0.1513671875, "learning_rate": 0.02, "loss": 1.5977, "step": 5594 }, { "epoch": 0.517965630966846, "grad_norm": 0.154296875, "learning_rate": 0.02, "loss": 1.5656, "step": 5595 }, { "epoch": 0.518058207487126, "grad_norm": 0.150390625, "learning_rate": 0.02, "loss": 1.5765, "step": 5596 }, { "epoch": 0.5181507840074061, "grad_norm": 0.162109375, "learning_rate": 0.02, "loss": 1.6417, "step": 5597 }, { "epoch": 0.5182433605276862, "grad_norm": 0.140625, "learning_rate": 0.02, "loss": 1.6526, "step": 5598 }, { "epoch": 0.5183359370479662, "grad_norm": 0.1484375, "learning_rate": 0.02, "loss": 1.501, "step": 5599 }, { "epoch": 0.5184285135682463, "grad_norm": 0.1455078125, "learning_rate": 0.02, "loss": 1.5727, "step": 5600 }, { "epoch": 0.5185210900885263, "grad_norm": 0.15234375, "learning_rate": 0.02, "loss": 1.5447, "step": 5601 }, { "epoch": 0.5186136666088064, "grad_norm": 0.142578125, "learning_rate": 0.02, "loss": 1.5363, "step": 5602 }, { "epoch": 0.5187062431290864, "grad_norm": 0.1591796875, "learning_rate": 0.02, "loss": 1.6159, "step": 5603 }, { "epoch": 0.5187988196493665, "grad_norm": 0.1494140625, "learning_rate": 0.02, "loss": 1.5919, "step": 5604 }, { "epoch": 0.5188913961696465, "grad_norm": 0.1494140625, "learning_rate": 0.02, "loss": 1.5805, "step": 5605 }, { "epoch": 0.5189839726899265, "grad_norm": 0.138671875, "learning_rate": 0.02, "loss": 1.6058, "step": 5606 }, { "epoch": 0.5190765492102065, "grad_norm": 0.1435546875, "learning_rate": 0.02, "loss": 1.5873, "step": 5607 }, { "epoch": 0.5191691257304866, "grad_norm": 0.1474609375, "learning_rate": 0.02, "loss": 1.5949, "step": 5608 }, { "epoch": 0.5192617022507666, "grad_norm": 0.1533203125, "learning_rate": 0.02, "loss": 1.5869, "step": 5609 }, { "epoch": 0.5193542787710467, "grad_norm": 0.150390625, "learning_rate": 0.02, "loss": 1.621, "step": 5610 }, { "epoch": 0.5194468552913267, "grad_norm": 0.158203125, "learning_rate": 0.02, "loss": 1.6007, "step": 5611 }, { "epoch": 0.5195394318116068, "grad_norm": 0.150390625, "learning_rate": 0.02, "loss": 1.5925, "step": 5612 }, { "epoch": 0.5196320083318868, "grad_norm": 0.1396484375, "learning_rate": 0.02, "loss": 1.5582, "step": 5613 }, { "epoch": 0.5197245848521669, "grad_norm": 0.15625, "learning_rate": 0.02, "loss": 1.5882, "step": 5614 }, { "epoch": 0.5198171613724469, "grad_norm": 0.1572265625, "learning_rate": 0.02, "loss": 1.515, "step": 5615 }, { "epoch": 0.519909737892727, "grad_norm": 0.15625, "learning_rate": 0.02, "loss": 1.5285, "step": 5616 }, { "epoch": 0.520002314413007, "grad_norm": 0.1357421875, "learning_rate": 0.02, "loss": 1.5238, "step": 5617 }, { "epoch": 0.5200948909332871, "grad_norm": 0.15234375, "learning_rate": 0.02, "loss": 1.5652, "step": 5618 }, { "epoch": 0.5201874674535671, "grad_norm": 0.1416015625, "learning_rate": 0.02, "loss": 1.5393, "step": 5619 }, { "epoch": 0.5202800439738471, "grad_norm": 0.15625, "learning_rate": 0.02, "loss": 1.5613, "step": 5620 }, { "epoch": 0.5203726204941271, "grad_norm": 0.140625, "learning_rate": 0.02, "loss": 1.5867, "step": 5621 }, { "epoch": 0.5204651970144072, "grad_norm": 0.1533203125, "learning_rate": 0.02, "loss": 1.5207, "step": 5622 }, { "epoch": 0.5205577735346872, "grad_norm": 0.142578125, "learning_rate": 0.02, "loss": 1.5611, "step": 5623 }, { "epoch": 0.5206503500549673, "grad_norm": 0.1533203125, "learning_rate": 0.02, "loss": 1.5276, "step": 5624 }, { "epoch": 0.5207429265752473, "grad_norm": 0.14453125, "learning_rate": 0.02, "loss": 1.5444, "step": 5625 }, { "epoch": 0.5208355030955274, "grad_norm": 0.1455078125, "learning_rate": 0.02, "loss": 1.6065, "step": 5626 }, { "epoch": 0.5209280796158074, "grad_norm": 0.146484375, "learning_rate": 0.02, "loss": 1.5021, "step": 5627 }, { "epoch": 0.5210206561360875, "grad_norm": 0.154296875, "learning_rate": 0.02, "loss": 1.5988, "step": 5628 }, { "epoch": 0.5211132326563676, "grad_norm": 0.1494140625, "learning_rate": 0.02, "loss": 1.5045, "step": 5629 }, { "epoch": 0.5212058091766476, "grad_norm": 0.1640625, "learning_rate": 0.02, "loss": 1.63, "step": 5630 }, { "epoch": 0.5212983856969277, "grad_norm": 0.134765625, "learning_rate": 0.02, "loss": 1.4858, "step": 5631 }, { "epoch": 0.5213909622172077, "grad_norm": 0.1494140625, "learning_rate": 0.02, "loss": 1.6249, "step": 5632 }, { "epoch": 0.5214835387374877, "grad_norm": 0.1572265625, "learning_rate": 0.02, "loss": 1.5701, "step": 5633 }, { "epoch": 0.5215761152577677, "grad_norm": 0.154296875, "learning_rate": 0.02, "loss": 1.5857, "step": 5634 }, { "epoch": 0.5216686917780478, "grad_norm": 0.1494140625, "learning_rate": 0.02, "loss": 1.5783, "step": 5635 }, { "epoch": 0.5217612682983278, "grad_norm": 0.150390625, "learning_rate": 0.02, "loss": 1.587, "step": 5636 }, { "epoch": 0.5218538448186079, "grad_norm": 0.1416015625, "learning_rate": 0.02, "loss": 1.5809, "step": 5637 }, { "epoch": 0.5219464213388879, "grad_norm": 0.1435546875, "learning_rate": 0.02, "loss": 1.5329, "step": 5638 }, { "epoch": 0.522038997859168, "grad_norm": 0.1357421875, "learning_rate": 0.02, "loss": 1.576, "step": 5639 }, { "epoch": 0.522131574379448, "grad_norm": 0.1435546875, "learning_rate": 0.02, "loss": 1.5481, "step": 5640 }, { "epoch": 0.5222241508997281, "grad_norm": 0.142578125, "learning_rate": 0.02, "loss": 1.5401, "step": 5641 }, { "epoch": 0.5223167274200081, "grad_norm": 0.1455078125, "learning_rate": 0.02, "loss": 1.5676, "step": 5642 }, { "epoch": 0.5224093039402882, "grad_norm": 0.140625, "learning_rate": 0.02, "loss": 1.5298, "step": 5643 }, { "epoch": 0.5225018804605682, "grad_norm": 0.146484375, "learning_rate": 0.02, "loss": 1.5904, "step": 5644 }, { "epoch": 0.5225944569808483, "grad_norm": 0.1484375, "learning_rate": 0.02, "loss": 1.5045, "step": 5645 }, { "epoch": 0.5226870335011283, "grad_norm": 0.142578125, "learning_rate": 0.02, "loss": 1.5447, "step": 5646 }, { "epoch": 0.5227796100214083, "grad_norm": 0.1455078125, "learning_rate": 0.02, "loss": 1.5638, "step": 5647 }, { "epoch": 0.5228721865416883, "grad_norm": 0.146484375, "learning_rate": 0.02, "loss": 1.6583, "step": 5648 }, { "epoch": 0.5229647630619684, "grad_norm": 0.1376953125, "learning_rate": 0.02, "loss": 1.5409, "step": 5649 }, { "epoch": 0.5230573395822484, "grad_norm": 0.140625, "learning_rate": 0.02, "loss": 1.5308, "step": 5650 }, { "epoch": 0.5231499161025285, "grad_norm": 0.146484375, "learning_rate": 0.02, "loss": 1.5416, "step": 5651 }, { "epoch": 0.5232424926228085, "grad_norm": 0.142578125, "learning_rate": 0.02, "loss": 1.54, "step": 5652 }, { "epoch": 0.5233350691430886, "grad_norm": 0.140625, "learning_rate": 0.02, "loss": 1.5503, "step": 5653 }, { "epoch": 0.5234276456633686, "grad_norm": 0.150390625, "learning_rate": 0.02, "loss": 1.5672, "step": 5654 }, { "epoch": 0.5235202221836487, "grad_norm": 0.1435546875, "learning_rate": 0.02, "loss": 1.5595, "step": 5655 }, { "epoch": 0.5236127987039287, "grad_norm": 0.1455078125, "learning_rate": 0.02, "loss": 1.5893, "step": 5656 }, { "epoch": 0.5237053752242088, "grad_norm": 0.142578125, "learning_rate": 0.02, "loss": 1.6014, "step": 5657 }, { "epoch": 0.5237979517444888, "grad_norm": 0.158203125, "learning_rate": 0.02, "loss": 1.6032, "step": 5658 }, { "epoch": 0.5238905282647689, "grad_norm": 0.154296875, "learning_rate": 0.02, "loss": 1.5902, "step": 5659 }, { "epoch": 0.5239831047850488, "grad_norm": 0.1513671875, "learning_rate": 0.02, "loss": 1.4611, "step": 5660 }, { "epoch": 0.5240756813053289, "grad_norm": 0.1416015625, "learning_rate": 0.02, "loss": 1.6066, "step": 5661 }, { "epoch": 0.5241682578256089, "grad_norm": 0.154296875, "learning_rate": 0.02, "loss": 1.5992, "step": 5662 }, { "epoch": 0.524260834345889, "grad_norm": 0.162109375, "learning_rate": 0.02, "loss": 1.5699, "step": 5663 }, { "epoch": 0.524353410866169, "grad_norm": 0.1591796875, "learning_rate": 0.02, "loss": 1.5554, "step": 5664 }, { "epoch": 0.5244459873864491, "grad_norm": 0.154296875, "learning_rate": 0.02, "loss": 1.6391, "step": 5665 }, { "epoch": 0.5245385639067291, "grad_norm": 0.1591796875, "learning_rate": 0.02, "loss": 1.5671, "step": 5666 }, { "epoch": 0.5246311404270092, "grad_norm": 0.1494140625, "learning_rate": 0.02, "loss": 1.5848, "step": 5667 }, { "epoch": 0.5247237169472893, "grad_norm": 0.1435546875, "learning_rate": 0.02, "loss": 1.5572, "step": 5668 }, { "epoch": 0.5248162934675693, "grad_norm": 0.150390625, "learning_rate": 0.02, "loss": 1.5508, "step": 5669 }, { "epoch": 0.5249088699878494, "grad_norm": 0.142578125, "learning_rate": 0.02, "loss": 1.6037, "step": 5670 }, { "epoch": 0.5250014465081294, "grad_norm": 0.146484375, "learning_rate": 0.02, "loss": 1.5491, "step": 5671 }, { "epoch": 0.5250940230284095, "grad_norm": 0.1455078125, "learning_rate": 0.02, "loss": 1.5112, "step": 5672 }, { "epoch": 0.5251865995486895, "grad_norm": 0.1474609375, "learning_rate": 0.02, "loss": 1.5568, "step": 5673 }, { "epoch": 0.5252791760689695, "grad_norm": 0.1533203125, "learning_rate": 0.02, "loss": 1.5084, "step": 5674 }, { "epoch": 0.5253717525892495, "grad_norm": 0.158203125, "learning_rate": 0.02, "loss": 1.533, "step": 5675 }, { "epoch": 0.5254643291095296, "grad_norm": 0.1513671875, "learning_rate": 0.02, "loss": 1.6157, "step": 5676 }, { "epoch": 0.5255569056298096, "grad_norm": 0.154296875, "learning_rate": 0.02, "loss": 1.5642, "step": 5677 }, { "epoch": 0.5256494821500897, "grad_norm": 0.1533203125, "learning_rate": 0.02, "loss": 1.6299, "step": 5678 }, { "epoch": 0.5257420586703697, "grad_norm": 0.1494140625, "learning_rate": 0.02, "loss": 1.6029, "step": 5679 }, { "epoch": 0.5258346351906498, "grad_norm": 0.1591796875, "learning_rate": 0.02, "loss": 1.5389, "step": 5680 }, { "epoch": 0.5259272117109298, "grad_norm": 0.15625, "learning_rate": 0.02, "loss": 1.6055, "step": 5681 }, { "epoch": 0.5260197882312099, "grad_norm": 0.1533203125, "learning_rate": 0.02, "loss": 1.6091, "step": 5682 }, { "epoch": 0.5261123647514899, "grad_norm": 0.1494140625, "learning_rate": 0.02, "loss": 1.5225, "step": 5683 }, { "epoch": 0.52620494127177, "grad_norm": 0.14453125, "learning_rate": 0.02, "loss": 1.5242, "step": 5684 }, { "epoch": 0.52629751779205, "grad_norm": 0.1396484375, "learning_rate": 0.02, "loss": 1.5286, "step": 5685 }, { "epoch": 0.5263900943123301, "grad_norm": 0.1357421875, "learning_rate": 0.02, "loss": 1.4955, "step": 5686 }, { "epoch": 0.5264826708326101, "grad_norm": 0.1435546875, "learning_rate": 0.02, "loss": 1.525, "step": 5687 }, { "epoch": 0.5265752473528901, "grad_norm": 0.1357421875, "learning_rate": 0.02, "loss": 1.568, "step": 5688 }, { "epoch": 0.5266678238731701, "grad_norm": 0.146484375, "learning_rate": 0.02, "loss": 1.6592, "step": 5689 }, { "epoch": 0.5267604003934502, "grad_norm": 0.130859375, "learning_rate": 0.02, "loss": 1.511, "step": 5690 }, { "epoch": 0.5268529769137302, "grad_norm": 0.134765625, "learning_rate": 0.02, "loss": 1.5322, "step": 5691 }, { "epoch": 0.5269455534340103, "grad_norm": 0.1376953125, "learning_rate": 0.02, "loss": 1.6176, "step": 5692 }, { "epoch": 0.5270381299542903, "grad_norm": 0.14453125, "learning_rate": 0.02, "loss": 1.5095, "step": 5693 }, { "epoch": 0.5271307064745704, "grad_norm": 0.1416015625, "learning_rate": 0.02, "loss": 1.5558, "step": 5694 }, { "epoch": 0.5272232829948504, "grad_norm": 0.150390625, "learning_rate": 0.02, "loss": 1.5321, "step": 5695 }, { "epoch": 0.5273158595151305, "grad_norm": 0.146484375, "learning_rate": 0.02, "loss": 1.5475, "step": 5696 }, { "epoch": 0.5274084360354105, "grad_norm": 0.1513671875, "learning_rate": 0.02, "loss": 1.525, "step": 5697 }, { "epoch": 0.5275010125556906, "grad_norm": 0.1396484375, "learning_rate": 0.02, "loss": 1.5447, "step": 5698 }, { "epoch": 0.5275935890759706, "grad_norm": 0.1513671875, "learning_rate": 0.02, "loss": 1.5739, "step": 5699 }, { "epoch": 0.5276861655962507, "grad_norm": 0.1455078125, "learning_rate": 0.02, "loss": 1.5947, "step": 5700 }, { "epoch": 0.5277787421165306, "grad_norm": 0.158203125, "learning_rate": 0.02, "loss": 1.5814, "step": 5701 }, { "epoch": 0.5278713186368107, "grad_norm": 0.146484375, "learning_rate": 0.02, "loss": 1.5637, "step": 5702 }, { "epoch": 0.5279638951570907, "grad_norm": 0.16015625, "learning_rate": 0.02, "loss": 1.542, "step": 5703 }, { "epoch": 0.5280564716773708, "grad_norm": 0.14453125, "learning_rate": 0.02, "loss": 1.6247, "step": 5704 }, { "epoch": 0.5281490481976508, "grad_norm": 0.1513671875, "learning_rate": 0.02, "loss": 1.5731, "step": 5705 }, { "epoch": 0.5282416247179309, "grad_norm": 0.146484375, "learning_rate": 0.02, "loss": 1.5614, "step": 5706 }, { "epoch": 0.528334201238211, "grad_norm": 0.1455078125, "learning_rate": 0.02, "loss": 1.5748, "step": 5707 }, { "epoch": 0.528426777758491, "grad_norm": 0.154296875, "learning_rate": 0.02, "loss": 1.6907, "step": 5708 }, { "epoch": 0.5285193542787711, "grad_norm": 0.1494140625, "learning_rate": 0.02, "loss": 1.575, "step": 5709 }, { "epoch": 0.5286119307990511, "grad_norm": 0.1474609375, "learning_rate": 0.02, "loss": 1.5058, "step": 5710 }, { "epoch": 0.5287045073193312, "grad_norm": 0.1455078125, "learning_rate": 0.02, "loss": 1.5229, "step": 5711 }, { "epoch": 0.5287970838396112, "grad_norm": 0.1494140625, "learning_rate": 0.02, "loss": 1.5645, "step": 5712 }, { "epoch": 0.5288896603598913, "grad_norm": 0.1337890625, "learning_rate": 0.02, "loss": 1.5277, "step": 5713 }, { "epoch": 0.5289822368801713, "grad_norm": 0.1455078125, "learning_rate": 0.02, "loss": 1.6384, "step": 5714 }, { "epoch": 0.5290748134004513, "grad_norm": 0.1455078125, "learning_rate": 0.02, "loss": 1.5712, "step": 5715 }, { "epoch": 0.5291673899207313, "grad_norm": 0.1572265625, "learning_rate": 0.02, "loss": 1.5863, "step": 5716 }, { "epoch": 0.5292599664410114, "grad_norm": 0.1494140625, "learning_rate": 0.02, "loss": 1.4992, "step": 5717 }, { "epoch": 0.5293525429612914, "grad_norm": 0.158203125, "learning_rate": 0.02, "loss": 1.589, "step": 5718 }, { "epoch": 0.5294451194815715, "grad_norm": 0.13671875, "learning_rate": 0.02, "loss": 1.5657, "step": 5719 }, { "epoch": 0.5295376960018515, "grad_norm": 0.154296875, "learning_rate": 0.02, "loss": 1.6115, "step": 5720 }, { "epoch": 0.5296302725221316, "grad_norm": 0.146484375, "learning_rate": 0.02, "loss": 1.5699, "step": 5721 }, { "epoch": 0.5297228490424116, "grad_norm": 0.14453125, "learning_rate": 0.02, "loss": 1.5456, "step": 5722 }, { "epoch": 0.5298154255626917, "grad_norm": 0.1435546875, "learning_rate": 0.02, "loss": 1.5527, "step": 5723 }, { "epoch": 0.5299080020829717, "grad_norm": 0.1513671875, "learning_rate": 0.02, "loss": 1.5857, "step": 5724 }, { "epoch": 0.5300005786032518, "grad_norm": 0.146484375, "learning_rate": 0.02, "loss": 1.5903, "step": 5725 }, { "epoch": 0.5300931551235318, "grad_norm": 0.15625, "learning_rate": 0.02, "loss": 1.5664, "step": 5726 }, { "epoch": 0.5301857316438119, "grad_norm": 0.1533203125, "learning_rate": 0.02, "loss": 1.542, "step": 5727 }, { "epoch": 0.5302783081640918, "grad_norm": 0.14453125, "learning_rate": 0.02, "loss": 1.5147, "step": 5728 }, { "epoch": 0.5303708846843719, "grad_norm": 0.1513671875, "learning_rate": 0.02, "loss": 1.4981, "step": 5729 }, { "epoch": 0.5304634612046519, "grad_norm": 0.15625, "learning_rate": 0.02, "loss": 1.5373, "step": 5730 }, { "epoch": 0.530556037724932, "grad_norm": 0.1591796875, "learning_rate": 0.02, "loss": 1.5503, "step": 5731 }, { "epoch": 0.530648614245212, "grad_norm": 0.1484375, "learning_rate": 0.02, "loss": 1.552, "step": 5732 }, { "epoch": 0.5307411907654921, "grad_norm": 0.1552734375, "learning_rate": 0.02, "loss": 1.5609, "step": 5733 }, { "epoch": 0.5308337672857721, "grad_norm": 0.1376953125, "learning_rate": 0.02, "loss": 1.5659, "step": 5734 }, { "epoch": 0.5309263438060522, "grad_norm": 0.1513671875, "learning_rate": 0.02, "loss": 1.5689, "step": 5735 }, { "epoch": 0.5310189203263322, "grad_norm": 0.1455078125, "learning_rate": 0.02, "loss": 1.5796, "step": 5736 }, { "epoch": 0.5311114968466123, "grad_norm": 0.138671875, "learning_rate": 0.02, "loss": 1.4686, "step": 5737 }, { "epoch": 0.5312040733668923, "grad_norm": 0.1494140625, "learning_rate": 0.02, "loss": 1.5829, "step": 5738 }, { "epoch": 0.5312966498871724, "grad_norm": 0.14453125, "learning_rate": 0.02, "loss": 1.5861, "step": 5739 }, { "epoch": 0.5313892264074525, "grad_norm": 0.150390625, "learning_rate": 0.02, "loss": 1.5235, "step": 5740 }, { "epoch": 0.5314818029277325, "grad_norm": 0.1572265625, "learning_rate": 0.02, "loss": 1.5315, "step": 5741 }, { "epoch": 0.5315743794480124, "grad_norm": 0.158203125, "learning_rate": 0.02, "loss": 1.5576, "step": 5742 }, { "epoch": 0.5316669559682925, "grad_norm": 0.16015625, "learning_rate": 0.02, "loss": 1.5559, "step": 5743 }, { "epoch": 0.5317595324885726, "grad_norm": 0.1552734375, "learning_rate": 0.02, "loss": 1.5712, "step": 5744 }, { "epoch": 0.5318521090088526, "grad_norm": 0.1572265625, "learning_rate": 0.02, "loss": 1.5565, "step": 5745 }, { "epoch": 0.5319446855291327, "grad_norm": 0.1494140625, "learning_rate": 0.02, "loss": 1.5852, "step": 5746 }, { "epoch": 0.5320372620494127, "grad_norm": 0.154296875, "learning_rate": 0.02, "loss": 1.5734, "step": 5747 }, { "epoch": 0.5321298385696928, "grad_norm": 0.1630859375, "learning_rate": 0.02, "loss": 1.5662, "step": 5748 }, { "epoch": 0.5322224150899728, "grad_norm": 0.1591796875, "learning_rate": 0.02, "loss": 1.6231, "step": 5749 }, { "epoch": 0.5323149916102529, "grad_norm": 0.140625, "learning_rate": 0.02, "loss": 1.6235, "step": 5750 }, { "epoch": 0.5324075681305329, "grad_norm": 0.1533203125, "learning_rate": 0.02, "loss": 1.5688, "step": 5751 }, { "epoch": 0.532500144650813, "grad_norm": 0.1513671875, "learning_rate": 0.02, "loss": 1.6294, "step": 5752 }, { "epoch": 0.532592721171093, "grad_norm": 0.14453125, "learning_rate": 0.02, "loss": 1.6234, "step": 5753 }, { "epoch": 0.5326852976913731, "grad_norm": 0.14453125, "learning_rate": 0.02, "loss": 1.5155, "step": 5754 }, { "epoch": 0.5327778742116531, "grad_norm": 0.134765625, "learning_rate": 0.02, "loss": 1.5926, "step": 5755 }, { "epoch": 0.5328704507319331, "grad_norm": 0.1416015625, "learning_rate": 0.02, "loss": 1.4946, "step": 5756 }, { "epoch": 0.5329630272522131, "grad_norm": 0.1455078125, "learning_rate": 0.02, "loss": 1.5543, "step": 5757 }, { "epoch": 0.5330556037724932, "grad_norm": 0.1435546875, "learning_rate": 0.02, "loss": 1.5824, "step": 5758 }, { "epoch": 0.5331481802927732, "grad_norm": 0.134765625, "learning_rate": 0.02, "loss": 1.5393, "step": 5759 }, { "epoch": 0.5332407568130533, "grad_norm": 0.1572265625, "learning_rate": 0.02, "loss": 1.5693, "step": 5760 }, { "epoch": 0.5333333333333333, "grad_norm": 0.15234375, "learning_rate": 0.02, "loss": 1.5481, "step": 5761 }, { "epoch": 0.5334259098536134, "grad_norm": 0.1484375, "learning_rate": 0.02, "loss": 1.5586, "step": 5762 }, { "epoch": 0.5335184863738934, "grad_norm": 0.142578125, "learning_rate": 0.02, "loss": 1.5419, "step": 5763 }, { "epoch": 0.5336110628941735, "grad_norm": 0.146484375, "learning_rate": 0.02, "loss": 1.5489, "step": 5764 }, { "epoch": 0.5337036394144535, "grad_norm": 0.142578125, "learning_rate": 0.02, "loss": 1.5127, "step": 5765 }, { "epoch": 0.5337962159347336, "grad_norm": 0.140625, "learning_rate": 0.02, "loss": 1.5612, "step": 5766 }, { "epoch": 0.5338887924550136, "grad_norm": 0.1474609375, "learning_rate": 0.02, "loss": 1.4899, "step": 5767 }, { "epoch": 0.5339813689752937, "grad_norm": 0.1455078125, "learning_rate": 0.02, "loss": 1.5442, "step": 5768 }, { "epoch": 0.5340739454955736, "grad_norm": 0.1337890625, "learning_rate": 0.02, "loss": 1.5307, "step": 5769 }, { "epoch": 0.5341665220158537, "grad_norm": 0.1474609375, "learning_rate": 0.02, "loss": 1.5282, "step": 5770 }, { "epoch": 0.5342590985361337, "grad_norm": 0.15234375, "learning_rate": 0.02, "loss": 1.6234, "step": 5771 }, { "epoch": 0.5343516750564138, "grad_norm": 0.154296875, "learning_rate": 0.02, "loss": 1.5161, "step": 5772 }, { "epoch": 0.5344442515766938, "grad_norm": 0.1455078125, "learning_rate": 0.02, "loss": 1.5619, "step": 5773 }, { "epoch": 0.5345368280969739, "grad_norm": 0.15234375, "learning_rate": 0.02, "loss": 1.4989, "step": 5774 }, { "epoch": 0.534629404617254, "grad_norm": 0.1533203125, "learning_rate": 0.02, "loss": 1.5622, "step": 5775 }, { "epoch": 0.534721981137534, "grad_norm": 0.154296875, "learning_rate": 0.02, "loss": 1.5197, "step": 5776 }, { "epoch": 0.534814557657814, "grad_norm": 0.142578125, "learning_rate": 0.02, "loss": 1.5258, "step": 5777 }, { "epoch": 0.5349071341780941, "grad_norm": 0.1484375, "learning_rate": 0.02, "loss": 1.5959, "step": 5778 }, { "epoch": 0.5349997106983742, "grad_norm": 0.1474609375, "learning_rate": 0.02, "loss": 1.5726, "step": 5779 }, { "epoch": 0.5350922872186542, "grad_norm": 0.1435546875, "learning_rate": 0.02, "loss": 1.5748, "step": 5780 }, { "epoch": 0.5351848637389343, "grad_norm": 0.1474609375, "learning_rate": 0.02, "loss": 1.5471, "step": 5781 }, { "epoch": 0.5352774402592143, "grad_norm": 0.1337890625, "learning_rate": 0.02, "loss": 1.5755, "step": 5782 }, { "epoch": 0.5353700167794943, "grad_norm": 0.1376953125, "learning_rate": 0.02, "loss": 1.5694, "step": 5783 }, { "epoch": 0.5354625932997743, "grad_norm": 0.142578125, "learning_rate": 0.02, "loss": 1.5967, "step": 5784 }, { "epoch": 0.5355551698200544, "grad_norm": 0.146484375, "learning_rate": 0.02, "loss": 1.618, "step": 5785 }, { "epoch": 0.5356477463403344, "grad_norm": 0.1484375, "learning_rate": 0.02, "loss": 1.622, "step": 5786 }, { "epoch": 0.5357403228606145, "grad_norm": 0.1357421875, "learning_rate": 0.02, "loss": 1.5226, "step": 5787 }, { "epoch": 0.5358328993808945, "grad_norm": 0.1396484375, "learning_rate": 0.02, "loss": 1.5578, "step": 5788 }, { "epoch": 0.5359254759011746, "grad_norm": 0.1533203125, "learning_rate": 0.02, "loss": 1.5921, "step": 5789 }, { "epoch": 0.5360180524214546, "grad_norm": 0.1552734375, "learning_rate": 0.02, "loss": 1.5601, "step": 5790 }, { "epoch": 0.5361106289417347, "grad_norm": 0.146484375, "learning_rate": 0.02, "loss": 1.5137, "step": 5791 }, { "epoch": 0.5362032054620147, "grad_norm": 0.158203125, "learning_rate": 0.02, "loss": 1.5625, "step": 5792 }, { "epoch": 0.5362957819822948, "grad_norm": 0.14453125, "learning_rate": 0.02, "loss": 1.5131, "step": 5793 }, { "epoch": 0.5363883585025748, "grad_norm": 0.1455078125, "learning_rate": 0.02, "loss": 1.5666, "step": 5794 }, { "epoch": 0.5364809350228549, "grad_norm": 0.1337890625, "learning_rate": 0.02, "loss": 1.5945, "step": 5795 }, { "epoch": 0.5365735115431348, "grad_norm": 0.1455078125, "learning_rate": 0.02, "loss": 1.5771, "step": 5796 }, { "epoch": 0.5366660880634149, "grad_norm": 0.14453125, "learning_rate": 0.02, "loss": 1.5952, "step": 5797 }, { "epoch": 0.5367586645836949, "grad_norm": 0.146484375, "learning_rate": 0.02, "loss": 1.5912, "step": 5798 }, { "epoch": 0.536851241103975, "grad_norm": 0.1416015625, "learning_rate": 0.02, "loss": 1.5397, "step": 5799 }, { "epoch": 0.536943817624255, "grad_norm": 0.138671875, "learning_rate": 0.02, "loss": 1.5784, "step": 5800 }, { "epoch": 0.5370363941445351, "grad_norm": 0.1376953125, "learning_rate": 0.02, "loss": 1.5275, "step": 5801 }, { "epoch": 0.5371289706648151, "grad_norm": 0.142578125, "learning_rate": 0.02, "loss": 1.5367, "step": 5802 }, { "epoch": 0.5372215471850952, "grad_norm": 0.1533203125, "learning_rate": 0.02, "loss": 1.6292, "step": 5803 }, { "epoch": 0.5373141237053752, "grad_norm": 0.138671875, "learning_rate": 0.02, "loss": 1.4849, "step": 5804 }, { "epoch": 0.5374067002256553, "grad_norm": 0.1455078125, "learning_rate": 0.02, "loss": 1.5716, "step": 5805 }, { "epoch": 0.5374992767459353, "grad_norm": 0.1494140625, "learning_rate": 0.02, "loss": 1.6047, "step": 5806 }, { "epoch": 0.5375918532662154, "grad_norm": 0.1533203125, "learning_rate": 0.02, "loss": 1.5968, "step": 5807 }, { "epoch": 0.5376844297864954, "grad_norm": 0.1376953125, "learning_rate": 0.02, "loss": 1.5546, "step": 5808 }, { "epoch": 0.5377770063067755, "grad_norm": 0.1455078125, "learning_rate": 0.02, "loss": 1.5526, "step": 5809 }, { "epoch": 0.5378695828270554, "grad_norm": 0.1357421875, "learning_rate": 0.02, "loss": 1.5768, "step": 5810 }, { "epoch": 0.5379621593473355, "grad_norm": 0.150390625, "learning_rate": 0.02, "loss": 1.6045, "step": 5811 }, { "epoch": 0.5380547358676155, "grad_norm": 0.1533203125, "learning_rate": 0.02, "loss": 1.5763, "step": 5812 }, { "epoch": 0.5381473123878956, "grad_norm": 0.1650390625, "learning_rate": 0.02, "loss": 1.5734, "step": 5813 }, { "epoch": 0.5382398889081756, "grad_norm": 0.142578125, "learning_rate": 0.02, "loss": 1.4989, "step": 5814 }, { "epoch": 0.5383324654284557, "grad_norm": 0.1572265625, "learning_rate": 0.02, "loss": 1.5008, "step": 5815 }, { "epoch": 0.5384250419487357, "grad_norm": 0.1572265625, "learning_rate": 0.02, "loss": 1.5593, "step": 5816 }, { "epoch": 0.5385176184690158, "grad_norm": 0.166015625, "learning_rate": 0.02, "loss": 1.5773, "step": 5817 }, { "epoch": 0.5386101949892959, "grad_norm": 0.142578125, "learning_rate": 0.02, "loss": 1.551, "step": 5818 }, { "epoch": 0.5387027715095759, "grad_norm": 0.1611328125, "learning_rate": 0.02, "loss": 1.6043, "step": 5819 }, { "epoch": 0.538795348029856, "grad_norm": 0.1455078125, "learning_rate": 0.02, "loss": 1.5852, "step": 5820 }, { "epoch": 0.538887924550136, "grad_norm": 0.1708984375, "learning_rate": 0.02, "loss": 1.609, "step": 5821 }, { "epoch": 0.5389805010704161, "grad_norm": 0.146484375, "learning_rate": 0.02, "loss": 1.5678, "step": 5822 }, { "epoch": 0.5390730775906961, "grad_norm": 0.1552734375, "learning_rate": 0.02, "loss": 1.5503, "step": 5823 }, { "epoch": 0.5391656541109761, "grad_norm": 0.1494140625, "learning_rate": 0.02, "loss": 1.539, "step": 5824 }, { "epoch": 0.5392582306312561, "grad_norm": 0.1484375, "learning_rate": 0.02, "loss": 1.533, "step": 5825 }, { "epoch": 0.5393508071515362, "grad_norm": 0.14453125, "learning_rate": 0.02, "loss": 1.5477, "step": 5826 }, { "epoch": 0.5394433836718162, "grad_norm": 0.16015625, "learning_rate": 0.02, "loss": 1.5122, "step": 5827 }, { "epoch": 0.5395359601920963, "grad_norm": 0.1494140625, "learning_rate": 0.02, "loss": 1.5745, "step": 5828 }, { "epoch": 0.5396285367123763, "grad_norm": 0.154296875, "learning_rate": 0.02, "loss": 1.4951, "step": 5829 }, { "epoch": 0.5397211132326564, "grad_norm": 0.1435546875, "learning_rate": 0.02, "loss": 1.5755, "step": 5830 }, { "epoch": 0.5398136897529364, "grad_norm": 0.158203125, "learning_rate": 0.02, "loss": 1.4915, "step": 5831 }, { "epoch": 0.5399062662732165, "grad_norm": 0.15234375, "learning_rate": 0.02, "loss": 1.6049, "step": 5832 }, { "epoch": 0.5399988427934965, "grad_norm": 0.15625, "learning_rate": 0.02, "loss": 1.5282, "step": 5833 }, { "epoch": 0.5400914193137766, "grad_norm": 0.1484375, "learning_rate": 0.02, "loss": 1.5428, "step": 5834 }, { "epoch": 0.5401839958340566, "grad_norm": 0.1552734375, "learning_rate": 0.02, "loss": 1.6081, "step": 5835 }, { "epoch": 0.5402765723543367, "grad_norm": 0.15625, "learning_rate": 0.02, "loss": 1.5754, "step": 5836 }, { "epoch": 0.5403691488746166, "grad_norm": 0.15234375, "learning_rate": 0.02, "loss": 1.5854, "step": 5837 }, { "epoch": 0.5404617253948967, "grad_norm": 0.1533203125, "learning_rate": 0.02, "loss": 1.5622, "step": 5838 }, { "epoch": 0.5405543019151767, "grad_norm": 0.1455078125, "learning_rate": 0.02, "loss": 1.537, "step": 5839 }, { "epoch": 0.5406468784354568, "grad_norm": 0.14453125, "learning_rate": 0.02, "loss": 1.5068, "step": 5840 }, { "epoch": 0.5407394549557368, "grad_norm": 0.138671875, "learning_rate": 0.02, "loss": 1.4914, "step": 5841 }, { "epoch": 0.5408320314760169, "grad_norm": 0.1484375, "learning_rate": 0.02, "loss": 1.5127, "step": 5842 }, { "epoch": 0.5409246079962969, "grad_norm": 0.1455078125, "learning_rate": 0.02, "loss": 1.5356, "step": 5843 }, { "epoch": 0.541017184516577, "grad_norm": 0.140625, "learning_rate": 0.02, "loss": 1.5117, "step": 5844 }, { "epoch": 0.541109761036857, "grad_norm": 0.1357421875, "learning_rate": 0.02, "loss": 1.5753, "step": 5845 }, { "epoch": 0.5412023375571371, "grad_norm": 0.150390625, "learning_rate": 0.02, "loss": 1.5767, "step": 5846 }, { "epoch": 0.5412949140774171, "grad_norm": 0.1513671875, "learning_rate": 0.02, "loss": 1.5817, "step": 5847 }, { "epoch": 0.5413874905976972, "grad_norm": 0.154296875, "learning_rate": 0.02, "loss": 1.5504, "step": 5848 }, { "epoch": 0.5414800671179772, "grad_norm": 0.1435546875, "learning_rate": 0.02, "loss": 1.5873, "step": 5849 }, { "epoch": 0.5415726436382573, "grad_norm": 0.150390625, "learning_rate": 0.02, "loss": 1.609, "step": 5850 }, { "epoch": 0.5416652201585372, "grad_norm": 0.1435546875, "learning_rate": 0.02, "loss": 1.6099, "step": 5851 }, { "epoch": 0.5417577966788173, "grad_norm": 0.150390625, "learning_rate": 0.02, "loss": 1.5399, "step": 5852 }, { "epoch": 0.5418503731990973, "grad_norm": 0.146484375, "learning_rate": 0.02, "loss": 1.4805, "step": 5853 }, { "epoch": 0.5419429497193774, "grad_norm": 0.14453125, "learning_rate": 0.02, "loss": 1.5661, "step": 5854 }, { "epoch": 0.5420355262396575, "grad_norm": 0.1435546875, "learning_rate": 0.02, "loss": 1.5368, "step": 5855 }, { "epoch": 0.5421281027599375, "grad_norm": 0.1474609375, "learning_rate": 0.02, "loss": 1.5484, "step": 5856 }, { "epoch": 0.5422206792802176, "grad_norm": 0.150390625, "learning_rate": 0.02, "loss": 1.6101, "step": 5857 }, { "epoch": 0.5423132558004976, "grad_norm": 0.1484375, "learning_rate": 0.02, "loss": 1.5662, "step": 5858 }, { "epoch": 0.5424058323207777, "grad_norm": 0.140625, "learning_rate": 0.02, "loss": 1.5391, "step": 5859 }, { "epoch": 0.5424984088410577, "grad_norm": 0.142578125, "learning_rate": 0.02, "loss": 1.5583, "step": 5860 }, { "epoch": 0.5425909853613378, "grad_norm": 0.1337890625, "learning_rate": 0.02, "loss": 1.5513, "step": 5861 }, { "epoch": 0.5426835618816178, "grad_norm": 0.1396484375, "learning_rate": 0.02, "loss": 1.5707, "step": 5862 }, { "epoch": 0.5427761384018979, "grad_norm": 0.1533203125, "learning_rate": 0.02, "loss": 1.5909, "step": 5863 }, { "epoch": 0.5428687149221778, "grad_norm": 0.154296875, "learning_rate": 0.02, "loss": 1.5945, "step": 5864 }, { "epoch": 0.5429612914424579, "grad_norm": 0.1669921875, "learning_rate": 0.02, "loss": 1.5323, "step": 5865 }, { "epoch": 0.5430538679627379, "grad_norm": 0.1533203125, "learning_rate": 0.02, "loss": 1.5626, "step": 5866 }, { "epoch": 0.543146444483018, "grad_norm": 0.1650390625, "learning_rate": 0.02, "loss": 1.5511, "step": 5867 }, { "epoch": 0.543239021003298, "grad_norm": 0.142578125, "learning_rate": 0.02, "loss": 1.5997, "step": 5868 }, { "epoch": 0.5433315975235781, "grad_norm": 0.1591796875, "learning_rate": 0.02, "loss": 1.6159, "step": 5869 }, { "epoch": 0.5434241740438581, "grad_norm": 0.1572265625, "learning_rate": 0.02, "loss": 1.57, "step": 5870 }, { "epoch": 0.5435167505641382, "grad_norm": 0.1552734375, "learning_rate": 0.02, "loss": 1.4412, "step": 5871 }, { "epoch": 0.5436093270844182, "grad_norm": 0.1435546875, "learning_rate": 0.02, "loss": 1.5522, "step": 5872 }, { "epoch": 0.5437019036046983, "grad_norm": 0.1455078125, "learning_rate": 0.02, "loss": 1.4979, "step": 5873 }, { "epoch": 0.5437944801249783, "grad_norm": 0.1513671875, "learning_rate": 0.02, "loss": 1.5915, "step": 5874 }, { "epoch": 0.5438870566452584, "grad_norm": 0.140625, "learning_rate": 0.02, "loss": 1.6201, "step": 5875 }, { "epoch": 0.5439796331655384, "grad_norm": 0.1435546875, "learning_rate": 0.02, "loss": 1.5164, "step": 5876 }, { "epoch": 0.5440722096858185, "grad_norm": 0.138671875, "learning_rate": 0.02, "loss": 1.572, "step": 5877 }, { "epoch": 0.5441647862060984, "grad_norm": 0.1455078125, "learning_rate": 0.02, "loss": 1.6121, "step": 5878 }, { "epoch": 0.5442573627263785, "grad_norm": 0.150390625, "learning_rate": 0.02, "loss": 1.5557, "step": 5879 }, { "epoch": 0.5443499392466585, "grad_norm": 0.142578125, "learning_rate": 0.02, "loss": 1.4983, "step": 5880 }, { "epoch": 0.5444425157669386, "grad_norm": 0.1513671875, "learning_rate": 0.02, "loss": 1.5493, "step": 5881 }, { "epoch": 0.5445350922872186, "grad_norm": 0.15625, "learning_rate": 0.02, "loss": 1.5164, "step": 5882 }, { "epoch": 0.5446276688074987, "grad_norm": 0.14453125, "learning_rate": 0.02, "loss": 1.581, "step": 5883 }, { "epoch": 0.5447202453277787, "grad_norm": 0.1484375, "learning_rate": 0.02, "loss": 1.5421, "step": 5884 }, { "epoch": 0.5448128218480588, "grad_norm": 0.1474609375, "learning_rate": 0.02, "loss": 1.5711, "step": 5885 }, { "epoch": 0.5449053983683388, "grad_norm": 0.1533203125, "learning_rate": 0.02, "loss": 1.5623, "step": 5886 }, { "epoch": 0.5449979748886189, "grad_norm": 0.140625, "learning_rate": 0.02, "loss": 1.4868, "step": 5887 }, { "epoch": 0.545090551408899, "grad_norm": 0.1572265625, "learning_rate": 0.02, "loss": 1.6125, "step": 5888 }, { "epoch": 0.545183127929179, "grad_norm": 0.142578125, "learning_rate": 0.02, "loss": 1.5816, "step": 5889 }, { "epoch": 0.545275704449459, "grad_norm": 0.1376953125, "learning_rate": 0.02, "loss": 1.5506, "step": 5890 }, { "epoch": 0.5453682809697391, "grad_norm": 0.1376953125, "learning_rate": 0.02, "loss": 1.5072, "step": 5891 }, { "epoch": 0.545460857490019, "grad_norm": 0.14453125, "learning_rate": 0.02, "loss": 1.5341, "step": 5892 }, { "epoch": 0.5455534340102991, "grad_norm": 0.142578125, "learning_rate": 0.02, "loss": 1.5754, "step": 5893 }, { "epoch": 0.5456460105305792, "grad_norm": 0.140625, "learning_rate": 0.02, "loss": 1.4926, "step": 5894 }, { "epoch": 0.5457385870508592, "grad_norm": 0.1376953125, "learning_rate": 0.02, "loss": 1.562, "step": 5895 }, { "epoch": 0.5458311635711393, "grad_norm": 0.1474609375, "learning_rate": 0.02, "loss": 1.4652, "step": 5896 }, { "epoch": 0.5459237400914193, "grad_norm": 0.1396484375, "learning_rate": 0.02, "loss": 1.5794, "step": 5897 }, { "epoch": 0.5460163166116994, "grad_norm": 0.1474609375, "learning_rate": 0.02, "loss": 1.5151, "step": 5898 }, { "epoch": 0.5461088931319794, "grad_norm": 0.1435546875, "learning_rate": 0.02, "loss": 1.5507, "step": 5899 }, { "epoch": 0.5462014696522595, "grad_norm": 0.1474609375, "learning_rate": 0.02, "loss": 1.5732, "step": 5900 }, { "epoch": 0.5462940461725395, "grad_norm": 0.140625, "learning_rate": 0.02, "loss": 1.6071, "step": 5901 }, { "epoch": 0.5463866226928196, "grad_norm": 0.14453125, "learning_rate": 0.02, "loss": 1.5658, "step": 5902 }, { "epoch": 0.5463866226928196, "eval_loss": 1.5515261888504028, "eval_runtime": 59.9872, "eval_samples_per_second": 25.405, "eval_steps_per_second": 6.351, "step": 5902 }, { "epoch": 0.5464791992130996, "grad_norm": 0.142578125, "learning_rate": 0.02, "loss": 1.6059, "step": 5903 }, { "epoch": 0.5465717757333797, "grad_norm": 0.140625, "learning_rate": 0.02, "loss": 1.565, "step": 5904 }, { "epoch": 0.5466643522536596, "grad_norm": 0.138671875, "learning_rate": 0.02, "loss": 1.5874, "step": 5905 }, { "epoch": 0.5467569287739397, "grad_norm": 0.15234375, "learning_rate": 0.02, "loss": 1.595, "step": 5906 }, { "epoch": 0.5468495052942197, "grad_norm": 0.150390625, "learning_rate": 0.02, "loss": 1.5294, "step": 5907 }, { "epoch": 0.5469420818144998, "grad_norm": 0.146484375, "learning_rate": 0.02, "loss": 1.5086, "step": 5908 }, { "epoch": 0.5470346583347798, "grad_norm": 0.150390625, "learning_rate": 0.02, "loss": 1.5447, "step": 5909 }, { "epoch": 0.5471272348550599, "grad_norm": 0.1572265625, "learning_rate": 0.02, "loss": 1.5581, "step": 5910 }, { "epoch": 0.5472198113753399, "grad_norm": 0.15625, "learning_rate": 0.02, "loss": 1.5498, "step": 5911 }, { "epoch": 0.54731238789562, "grad_norm": 0.158203125, "learning_rate": 0.02, "loss": 1.6479, "step": 5912 }, { "epoch": 0.5474049644159, "grad_norm": 0.158203125, "learning_rate": 0.02, "loss": 1.581, "step": 5913 }, { "epoch": 0.5474975409361801, "grad_norm": 0.1533203125, "learning_rate": 0.02, "loss": 1.569, "step": 5914 }, { "epoch": 0.5475901174564601, "grad_norm": 0.1572265625, "learning_rate": 0.02, "loss": 1.5129, "step": 5915 }, { "epoch": 0.5476826939767402, "grad_norm": 0.1376953125, "learning_rate": 0.02, "loss": 1.5106, "step": 5916 }, { "epoch": 0.5477752704970202, "grad_norm": 0.154296875, "learning_rate": 0.02, "loss": 1.543, "step": 5917 }, { "epoch": 0.5478678470173003, "grad_norm": 0.146484375, "learning_rate": 0.02, "loss": 1.6299, "step": 5918 }, { "epoch": 0.5479604235375802, "grad_norm": 0.1474609375, "learning_rate": 0.02, "loss": 1.5737, "step": 5919 }, { "epoch": 0.5480530000578603, "grad_norm": 0.138671875, "learning_rate": 0.02, "loss": 1.5662, "step": 5920 }, { "epoch": 0.5481455765781403, "grad_norm": 0.150390625, "learning_rate": 0.02, "loss": 1.5858, "step": 5921 }, { "epoch": 0.5482381530984204, "grad_norm": 0.142578125, "learning_rate": 0.02, "loss": 1.5562, "step": 5922 }, { "epoch": 0.5483307296187004, "grad_norm": 0.1416015625, "learning_rate": 0.02, "loss": 1.566, "step": 5923 }, { "epoch": 0.5484233061389805, "grad_norm": 0.1396484375, "learning_rate": 0.02, "loss": 1.5291, "step": 5924 }, { "epoch": 0.5485158826592605, "grad_norm": 0.1474609375, "learning_rate": 0.02, "loss": 1.5176, "step": 5925 }, { "epoch": 0.5486084591795406, "grad_norm": 0.1552734375, "learning_rate": 0.02, "loss": 1.5948, "step": 5926 }, { "epoch": 0.5487010356998206, "grad_norm": 0.1513671875, "learning_rate": 0.02, "loss": 1.562, "step": 5927 }, { "epoch": 0.5487936122201007, "grad_norm": 0.154296875, "learning_rate": 0.02, "loss": 1.5255, "step": 5928 }, { "epoch": 0.5488861887403808, "grad_norm": 0.1513671875, "learning_rate": 0.02, "loss": 1.4922, "step": 5929 }, { "epoch": 0.5489787652606608, "grad_norm": 0.1474609375, "learning_rate": 0.02, "loss": 1.5217, "step": 5930 }, { "epoch": 0.5490713417809409, "grad_norm": 0.158203125, "learning_rate": 0.02, "loss": 1.5319, "step": 5931 }, { "epoch": 0.5491639183012208, "grad_norm": 0.15625, "learning_rate": 0.02, "loss": 1.5527, "step": 5932 }, { "epoch": 0.5492564948215009, "grad_norm": 0.154296875, "learning_rate": 0.02, "loss": 1.5471, "step": 5933 }, { "epoch": 0.5493490713417809, "grad_norm": 0.1572265625, "learning_rate": 0.02, "loss": 1.545, "step": 5934 }, { "epoch": 0.549441647862061, "grad_norm": 0.1513671875, "learning_rate": 0.02, "loss": 1.5043, "step": 5935 }, { "epoch": 0.549534224382341, "grad_norm": 0.1572265625, "learning_rate": 0.02, "loss": 1.5489, "step": 5936 }, { "epoch": 0.5496268009026211, "grad_norm": 0.146484375, "learning_rate": 0.02, "loss": 1.5497, "step": 5937 }, { "epoch": 0.5497193774229011, "grad_norm": 0.15234375, "learning_rate": 0.02, "loss": 1.5397, "step": 5938 }, { "epoch": 0.5498119539431812, "grad_norm": 0.158203125, "learning_rate": 0.02, "loss": 1.519, "step": 5939 }, { "epoch": 0.5499045304634612, "grad_norm": 0.15625, "learning_rate": 0.02, "loss": 1.593, "step": 5940 }, { "epoch": 0.5499971069837413, "grad_norm": 0.1513671875, "learning_rate": 0.02, "loss": 1.5034, "step": 5941 }, { "epoch": 0.5500896835040213, "grad_norm": 0.14453125, "learning_rate": 0.02, "loss": 1.5427, "step": 5942 }, { "epoch": 0.5501822600243014, "grad_norm": 0.158203125, "learning_rate": 0.02, "loss": 1.5985, "step": 5943 }, { "epoch": 0.5502748365445814, "grad_norm": 0.1435546875, "learning_rate": 0.02, "loss": 1.5973, "step": 5944 }, { "epoch": 0.5503674130648615, "grad_norm": 0.15234375, "learning_rate": 0.02, "loss": 1.5879, "step": 5945 }, { "epoch": 0.5504599895851414, "grad_norm": 0.1552734375, "learning_rate": 0.02, "loss": 1.5132, "step": 5946 }, { "epoch": 0.5505525661054215, "grad_norm": 0.154296875, "learning_rate": 0.02, "loss": 1.5282, "step": 5947 }, { "epoch": 0.5506451426257015, "grad_norm": 0.1572265625, "learning_rate": 0.02, "loss": 1.5109, "step": 5948 }, { "epoch": 0.5507377191459816, "grad_norm": 0.15234375, "learning_rate": 0.02, "loss": 1.5365, "step": 5949 }, { "epoch": 0.5508302956662616, "grad_norm": 0.1640625, "learning_rate": 0.02, "loss": 1.5245, "step": 5950 }, { "epoch": 0.5509228721865417, "grad_norm": 0.1591796875, "learning_rate": 0.02, "loss": 1.5004, "step": 5951 }, { "epoch": 0.5510154487068217, "grad_norm": 0.158203125, "learning_rate": 0.02, "loss": 1.5682, "step": 5952 }, { "epoch": 0.5511080252271018, "grad_norm": 0.16015625, "learning_rate": 0.02, "loss": 1.5883, "step": 5953 }, { "epoch": 0.5512006017473818, "grad_norm": 0.1611328125, "learning_rate": 0.02, "loss": 1.5506, "step": 5954 }, { "epoch": 0.5512931782676619, "grad_norm": 0.1591796875, "learning_rate": 0.02, "loss": 1.5621, "step": 5955 }, { "epoch": 0.5513857547879419, "grad_norm": 0.1552734375, "learning_rate": 0.02, "loss": 1.5737, "step": 5956 }, { "epoch": 0.551478331308222, "grad_norm": 0.1396484375, "learning_rate": 0.02, "loss": 1.5726, "step": 5957 }, { "epoch": 0.551570907828502, "grad_norm": 0.1455078125, "learning_rate": 0.02, "loss": 1.5612, "step": 5958 }, { "epoch": 0.551663484348782, "grad_norm": 0.15234375, "learning_rate": 0.02, "loss": 1.448, "step": 5959 }, { "epoch": 0.551756060869062, "grad_norm": 0.1640625, "learning_rate": 0.02, "loss": 1.5272, "step": 5960 }, { "epoch": 0.5518486373893421, "grad_norm": 0.1552734375, "learning_rate": 0.02, "loss": 1.5679, "step": 5961 }, { "epoch": 0.5519412139096221, "grad_norm": 0.1474609375, "learning_rate": 0.02, "loss": 1.4825, "step": 5962 }, { "epoch": 0.5520337904299022, "grad_norm": 0.1416015625, "learning_rate": 0.02, "loss": 1.5138, "step": 5963 }, { "epoch": 0.5521263669501822, "grad_norm": 0.146484375, "learning_rate": 0.02, "loss": 1.5615, "step": 5964 }, { "epoch": 0.5522189434704623, "grad_norm": 0.142578125, "learning_rate": 0.02, "loss": 1.5261, "step": 5965 }, { "epoch": 0.5523115199907424, "grad_norm": 0.1435546875, "learning_rate": 0.02, "loss": 1.5633, "step": 5966 }, { "epoch": 0.5524040965110224, "grad_norm": 0.1416015625, "learning_rate": 0.02, "loss": 1.5021, "step": 5967 }, { "epoch": 0.5524966730313025, "grad_norm": 0.142578125, "learning_rate": 0.02, "loss": 1.6194, "step": 5968 }, { "epoch": 0.5525892495515825, "grad_norm": 0.1474609375, "learning_rate": 0.02, "loss": 1.5489, "step": 5969 }, { "epoch": 0.5526818260718626, "grad_norm": 0.150390625, "learning_rate": 0.02, "loss": 1.5773, "step": 5970 }, { "epoch": 0.5527744025921426, "grad_norm": 0.1552734375, "learning_rate": 0.02, "loss": 1.6275, "step": 5971 }, { "epoch": 0.5528669791124227, "grad_norm": 0.1533203125, "learning_rate": 0.02, "loss": 1.5864, "step": 5972 }, { "epoch": 0.5529595556327026, "grad_norm": 0.154296875, "learning_rate": 0.02, "loss": 1.5235, "step": 5973 }, { "epoch": 0.5530521321529827, "grad_norm": 0.150390625, "learning_rate": 0.02, "loss": 1.574, "step": 5974 }, { "epoch": 0.5531447086732627, "grad_norm": 0.1474609375, "learning_rate": 0.02, "loss": 1.5496, "step": 5975 }, { "epoch": 0.5532372851935428, "grad_norm": 0.158203125, "learning_rate": 0.02, "loss": 1.5547, "step": 5976 }, { "epoch": 0.5533298617138228, "grad_norm": 0.1455078125, "learning_rate": 0.02, "loss": 1.5179, "step": 5977 }, { "epoch": 0.5534224382341029, "grad_norm": 0.154296875, "learning_rate": 0.02, "loss": 1.6324, "step": 5978 }, { "epoch": 0.5535150147543829, "grad_norm": 0.1474609375, "learning_rate": 0.02, "loss": 1.6043, "step": 5979 }, { "epoch": 0.553607591274663, "grad_norm": 0.15234375, "learning_rate": 0.02, "loss": 1.5972, "step": 5980 }, { "epoch": 0.553700167794943, "grad_norm": 0.1513671875, "learning_rate": 0.02, "loss": 1.5851, "step": 5981 }, { "epoch": 0.5537927443152231, "grad_norm": 0.1494140625, "learning_rate": 0.02, "loss": 1.5137, "step": 5982 }, { "epoch": 0.5538853208355031, "grad_norm": 0.14453125, "learning_rate": 0.02, "loss": 1.5073, "step": 5983 }, { "epoch": 0.5539778973557832, "grad_norm": 0.158203125, "learning_rate": 0.02, "loss": 1.5631, "step": 5984 }, { "epoch": 0.5540704738760632, "grad_norm": 0.14453125, "learning_rate": 0.02, "loss": 1.5708, "step": 5985 }, { "epoch": 0.5541630503963433, "grad_norm": 0.150390625, "learning_rate": 0.02, "loss": 1.505, "step": 5986 }, { "epoch": 0.5542556269166232, "grad_norm": 0.1640625, "learning_rate": 0.02, "loss": 1.513, "step": 5987 }, { "epoch": 0.5543482034369033, "grad_norm": 0.154296875, "learning_rate": 0.02, "loss": 1.5705, "step": 5988 }, { "epoch": 0.5544407799571833, "grad_norm": 0.154296875, "learning_rate": 0.02, "loss": 1.6026, "step": 5989 }, { "epoch": 0.5545333564774634, "grad_norm": 0.1484375, "learning_rate": 0.02, "loss": 1.505, "step": 5990 }, { "epoch": 0.5546259329977434, "grad_norm": 0.14453125, "learning_rate": 0.02, "loss": 1.5464, "step": 5991 }, { "epoch": 0.5547185095180235, "grad_norm": 0.154296875, "learning_rate": 0.02, "loss": 1.5789, "step": 5992 }, { "epoch": 0.5548110860383035, "grad_norm": 0.1533203125, "learning_rate": 0.02, "loss": 1.6512, "step": 5993 }, { "epoch": 0.5549036625585836, "grad_norm": 0.1533203125, "learning_rate": 0.02, "loss": 1.6024, "step": 5994 }, { "epoch": 0.5549962390788636, "grad_norm": 0.142578125, "learning_rate": 0.02, "loss": 1.5744, "step": 5995 }, { "epoch": 0.5550888155991437, "grad_norm": 0.1435546875, "learning_rate": 0.02, "loss": 1.5413, "step": 5996 }, { "epoch": 0.5551813921194237, "grad_norm": 0.1513671875, "learning_rate": 0.02, "loss": 1.5813, "step": 5997 }, { "epoch": 0.5552739686397038, "grad_norm": 0.1455078125, "learning_rate": 0.02, "loss": 1.55, "step": 5998 }, { "epoch": 0.5553665451599838, "grad_norm": 0.14453125, "learning_rate": 0.02, "loss": 1.6071, "step": 5999 }, { "epoch": 0.5554591216802638, "grad_norm": 0.1396484375, "learning_rate": 0.02, "loss": 1.6078, "step": 6000 }, { "epoch": 0.5555516982005438, "grad_norm": 0.1494140625, "learning_rate": 0.02, "loss": 1.5247, "step": 6001 }, { "epoch": 0.5556442747208239, "grad_norm": 0.1416015625, "learning_rate": 0.02, "loss": 1.6026, "step": 6002 }, { "epoch": 0.555736851241104, "grad_norm": 0.1455078125, "learning_rate": 0.02, "loss": 1.5895, "step": 6003 }, { "epoch": 0.555829427761384, "grad_norm": 0.142578125, "learning_rate": 0.02, "loss": 1.4875, "step": 6004 }, { "epoch": 0.555922004281664, "grad_norm": 0.1455078125, "learning_rate": 0.02, "loss": 1.5848, "step": 6005 }, { "epoch": 0.5560145808019441, "grad_norm": 0.14453125, "learning_rate": 0.02, "loss": 1.523, "step": 6006 }, { "epoch": 0.5561071573222242, "grad_norm": 0.1435546875, "learning_rate": 0.02, "loss": 1.6117, "step": 6007 }, { "epoch": 0.5561997338425042, "grad_norm": 0.1435546875, "learning_rate": 0.02, "loss": 1.5251, "step": 6008 }, { "epoch": 0.5562923103627843, "grad_norm": 0.14453125, "learning_rate": 0.02, "loss": 1.5458, "step": 6009 }, { "epoch": 0.5563848868830643, "grad_norm": 0.1416015625, "learning_rate": 0.02, "loss": 1.6022, "step": 6010 }, { "epoch": 0.5564774634033444, "grad_norm": 0.1474609375, "learning_rate": 0.02, "loss": 1.5169, "step": 6011 }, { "epoch": 0.5565700399236244, "grad_norm": 0.146484375, "learning_rate": 0.02, "loss": 1.5442, "step": 6012 }, { "epoch": 0.5566626164439045, "grad_norm": 0.158203125, "learning_rate": 0.02, "loss": 1.5905, "step": 6013 }, { "epoch": 0.5567551929641844, "grad_norm": 0.1435546875, "learning_rate": 0.02, "loss": 1.5151, "step": 6014 }, { "epoch": 0.5568477694844645, "grad_norm": 0.150390625, "learning_rate": 0.02, "loss": 1.6318, "step": 6015 }, { "epoch": 0.5569403460047445, "grad_norm": 0.1494140625, "learning_rate": 0.02, "loss": 1.507, "step": 6016 }, { "epoch": 0.5570329225250246, "grad_norm": 0.1533203125, "learning_rate": 0.02, "loss": 1.5276, "step": 6017 }, { "epoch": 0.5571254990453046, "grad_norm": 0.1484375, "learning_rate": 0.02, "loss": 1.552, "step": 6018 }, { "epoch": 0.5572180755655847, "grad_norm": 0.14453125, "learning_rate": 0.02, "loss": 1.5458, "step": 6019 }, { "epoch": 0.5573106520858647, "grad_norm": 0.1591796875, "learning_rate": 0.02, "loss": 1.6208, "step": 6020 }, { "epoch": 0.5574032286061448, "grad_norm": 0.1513671875, "learning_rate": 0.02, "loss": 1.5786, "step": 6021 }, { "epoch": 0.5574958051264248, "grad_norm": 0.1552734375, "learning_rate": 0.02, "loss": 1.559, "step": 6022 }, { "epoch": 0.5575883816467049, "grad_norm": 0.1416015625, "learning_rate": 0.02, "loss": 1.5314, "step": 6023 }, { "epoch": 0.5576809581669849, "grad_norm": 0.1484375, "learning_rate": 0.02, "loss": 1.526, "step": 6024 }, { "epoch": 0.557773534687265, "grad_norm": 0.15625, "learning_rate": 0.02, "loss": 1.6247, "step": 6025 }, { "epoch": 0.557866111207545, "grad_norm": 0.1552734375, "learning_rate": 0.02, "loss": 1.6031, "step": 6026 }, { "epoch": 0.557958687727825, "grad_norm": 0.1484375, "learning_rate": 0.02, "loss": 1.521, "step": 6027 }, { "epoch": 0.558051264248105, "grad_norm": 0.1494140625, "learning_rate": 0.02, "loss": 1.5293, "step": 6028 }, { "epoch": 0.5581438407683851, "grad_norm": 0.134765625, "learning_rate": 0.02, "loss": 1.5642, "step": 6029 }, { "epoch": 0.5582364172886651, "grad_norm": 0.1474609375, "learning_rate": 0.02, "loss": 1.6067, "step": 6030 }, { "epoch": 0.5583289938089452, "grad_norm": 0.1416015625, "learning_rate": 0.02, "loss": 1.6204, "step": 6031 }, { "epoch": 0.5584215703292252, "grad_norm": 0.1572265625, "learning_rate": 0.02, "loss": 1.5232, "step": 6032 }, { "epoch": 0.5585141468495053, "grad_norm": 0.1328125, "learning_rate": 0.02, "loss": 1.6223, "step": 6033 }, { "epoch": 0.5586067233697853, "grad_norm": 0.1396484375, "learning_rate": 0.02, "loss": 1.5342, "step": 6034 }, { "epoch": 0.5586992998900654, "grad_norm": 0.1474609375, "learning_rate": 0.02, "loss": 1.5241, "step": 6035 }, { "epoch": 0.5587918764103454, "grad_norm": 0.154296875, "learning_rate": 0.02, "loss": 1.5571, "step": 6036 }, { "epoch": 0.5588844529306255, "grad_norm": 0.15234375, "learning_rate": 0.02, "loss": 1.5282, "step": 6037 }, { "epoch": 0.5589770294509055, "grad_norm": 0.1494140625, "learning_rate": 0.02, "loss": 1.5159, "step": 6038 }, { "epoch": 0.5590696059711856, "grad_norm": 0.1474609375, "learning_rate": 0.02, "loss": 1.5565, "step": 6039 }, { "epoch": 0.5591621824914657, "grad_norm": 0.16015625, "learning_rate": 0.02, "loss": 1.5704, "step": 6040 }, { "epoch": 0.5592547590117456, "grad_norm": 0.1591796875, "learning_rate": 0.02, "loss": 1.5175, "step": 6041 }, { "epoch": 0.5593473355320256, "grad_norm": 0.150390625, "learning_rate": 0.02, "loss": 1.4938, "step": 6042 }, { "epoch": 0.5594399120523057, "grad_norm": 0.1416015625, "learning_rate": 0.02, "loss": 1.5651, "step": 6043 }, { "epoch": 0.5595324885725858, "grad_norm": 0.15625, "learning_rate": 0.02, "loss": 1.5619, "step": 6044 }, { "epoch": 0.5596250650928658, "grad_norm": 0.15234375, "learning_rate": 0.02, "loss": 1.5835, "step": 6045 }, { "epoch": 0.5597176416131459, "grad_norm": 0.1533203125, "learning_rate": 0.02, "loss": 1.6105, "step": 6046 }, { "epoch": 0.5598102181334259, "grad_norm": 0.1484375, "learning_rate": 0.02, "loss": 1.5772, "step": 6047 }, { "epoch": 0.559902794653706, "grad_norm": 0.150390625, "learning_rate": 0.02, "loss": 1.5302, "step": 6048 }, { "epoch": 0.559995371173986, "grad_norm": 0.1533203125, "learning_rate": 0.02, "loss": 1.5118, "step": 6049 }, { "epoch": 0.5600879476942661, "grad_norm": 0.1396484375, "learning_rate": 0.02, "loss": 1.5045, "step": 6050 }, { "epoch": 0.5601805242145461, "grad_norm": 0.1455078125, "learning_rate": 0.02, "loss": 1.5051, "step": 6051 }, { "epoch": 0.5602731007348262, "grad_norm": 0.146484375, "learning_rate": 0.02, "loss": 1.6011, "step": 6052 }, { "epoch": 0.5603656772551062, "grad_norm": 0.138671875, "learning_rate": 0.02, "loss": 1.5594, "step": 6053 }, { "epoch": 0.5604582537753863, "grad_norm": 0.1416015625, "learning_rate": 0.02, "loss": 1.5485, "step": 6054 }, { "epoch": 0.5605508302956662, "grad_norm": 0.1484375, "learning_rate": 0.02, "loss": 1.5687, "step": 6055 }, { "epoch": 0.5606434068159463, "grad_norm": 0.1435546875, "learning_rate": 0.02, "loss": 1.5773, "step": 6056 }, { "epoch": 0.5607359833362263, "grad_norm": 0.14453125, "learning_rate": 0.02, "loss": 1.5983, "step": 6057 }, { "epoch": 0.5608285598565064, "grad_norm": 0.1533203125, "learning_rate": 0.02, "loss": 1.5267, "step": 6058 }, { "epoch": 0.5609211363767864, "grad_norm": 0.146484375, "learning_rate": 0.02, "loss": 1.6068, "step": 6059 }, { "epoch": 0.5610137128970665, "grad_norm": 0.15234375, "learning_rate": 0.02, "loss": 1.4765, "step": 6060 }, { "epoch": 0.5611062894173465, "grad_norm": 0.150390625, "learning_rate": 0.02, "loss": 1.5653, "step": 6061 }, { "epoch": 0.5611988659376266, "grad_norm": 0.16015625, "learning_rate": 0.02, "loss": 1.5231, "step": 6062 }, { "epoch": 0.5612914424579066, "grad_norm": 0.142578125, "learning_rate": 0.02, "loss": 1.5525, "step": 6063 }, { "epoch": 0.5613840189781867, "grad_norm": 0.1494140625, "learning_rate": 0.02, "loss": 1.5029, "step": 6064 }, { "epoch": 0.5614765954984667, "grad_norm": 0.14453125, "learning_rate": 0.02, "loss": 1.5684, "step": 6065 }, { "epoch": 0.5615691720187468, "grad_norm": 0.1689453125, "learning_rate": 0.02, "loss": 1.6028, "step": 6066 }, { "epoch": 0.5616617485390268, "grad_norm": 0.1455078125, "learning_rate": 0.02, "loss": 1.5625, "step": 6067 }, { "epoch": 0.5617543250593068, "grad_norm": 0.1484375, "learning_rate": 0.02, "loss": 1.5099, "step": 6068 }, { "epoch": 0.5618469015795868, "grad_norm": 0.150390625, "learning_rate": 0.02, "loss": 1.6117, "step": 6069 }, { "epoch": 0.5619394780998669, "grad_norm": 0.15234375, "learning_rate": 0.02, "loss": 1.558, "step": 6070 }, { "epoch": 0.5620320546201469, "grad_norm": 0.1337890625, "learning_rate": 0.02, "loss": 1.5471, "step": 6071 }, { "epoch": 0.562124631140427, "grad_norm": 0.1494140625, "learning_rate": 0.02, "loss": 1.5803, "step": 6072 }, { "epoch": 0.562217207660707, "grad_norm": 0.140625, "learning_rate": 0.02, "loss": 1.5761, "step": 6073 }, { "epoch": 0.5623097841809871, "grad_norm": 0.1455078125, "learning_rate": 0.02, "loss": 1.6077, "step": 6074 }, { "epoch": 0.5624023607012671, "grad_norm": 0.146484375, "learning_rate": 0.02, "loss": 1.5395, "step": 6075 }, { "epoch": 0.5624949372215472, "grad_norm": 0.150390625, "learning_rate": 0.02, "loss": 1.5342, "step": 6076 }, { "epoch": 0.5625875137418272, "grad_norm": 0.1455078125, "learning_rate": 0.02, "loss": 1.5355, "step": 6077 }, { "epoch": 0.5626800902621073, "grad_norm": 0.138671875, "learning_rate": 0.02, "loss": 1.4983, "step": 6078 }, { "epoch": 0.5627726667823874, "grad_norm": 0.146484375, "learning_rate": 0.02, "loss": 1.5737, "step": 6079 }, { "epoch": 0.5628652433026674, "grad_norm": 0.150390625, "learning_rate": 0.02, "loss": 1.5429, "step": 6080 }, { "epoch": 0.5629578198229475, "grad_norm": 0.1533203125, "learning_rate": 0.02, "loss": 1.528, "step": 6081 }, { "epoch": 0.5630503963432274, "grad_norm": 0.1484375, "learning_rate": 0.02, "loss": 1.562, "step": 6082 }, { "epoch": 0.5631429728635075, "grad_norm": 0.146484375, "learning_rate": 0.02, "loss": 1.5798, "step": 6083 }, { "epoch": 0.5632355493837875, "grad_norm": 0.14453125, "learning_rate": 0.02, "loss": 1.5786, "step": 6084 }, { "epoch": 0.5633281259040676, "grad_norm": 0.146484375, "learning_rate": 0.02, "loss": 1.5148, "step": 6085 }, { "epoch": 0.5634207024243476, "grad_norm": 0.1484375, "learning_rate": 0.02, "loss": 1.5807, "step": 6086 }, { "epoch": 0.5635132789446277, "grad_norm": 0.14453125, "learning_rate": 0.02, "loss": 1.5962, "step": 6087 }, { "epoch": 0.5636058554649077, "grad_norm": 0.146484375, "learning_rate": 0.02, "loss": 1.4903, "step": 6088 }, { "epoch": 0.5636984319851878, "grad_norm": 0.1474609375, "learning_rate": 0.02, "loss": 1.5515, "step": 6089 }, { "epoch": 0.5637910085054678, "grad_norm": 0.1484375, "learning_rate": 0.02, "loss": 1.5646, "step": 6090 }, { "epoch": 0.5638835850257479, "grad_norm": 0.138671875, "learning_rate": 0.02, "loss": 1.4961, "step": 6091 }, { "epoch": 0.5639761615460279, "grad_norm": 0.146484375, "learning_rate": 0.02, "loss": 1.5592, "step": 6092 }, { "epoch": 0.564068738066308, "grad_norm": 0.1513671875, "learning_rate": 0.02, "loss": 1.5387, "step": 6093 }, { "epoch": 0.564161314586588, "grad_norm": 0.1494140625, "learning_rate": 0.02, "loss": 1.5201, "step": 6094 }, { "epoch": 0.564253891106868, "grad_norm": 0.1474609375, "learning_rate": 0.02, "loss": 1.5454, "step": 6095 }, { "epoch": 0.564346467627148, "grad_norm": 0.1435546875, "learning_rate": 0.02, "loss": 1.6085, "step": 6096 }, { "epoch": 0.5644390441474281, "grad_norm": 0.1533203125, "learning_rate": 0.02, "loss": 1.5805, "step": 6097 }, { "epoch": 0.5645316206677081, "grad_norm": 0.142578125, "learning_rate": 0.02, "loss": 1.5672, "step": 6098 }, { "epoch": 0.5646241971879882, "grad_norm": 0.146484375, "learning_rate": 0.02, "loss": 1.5656, "step": 6099 }, { "epoch": 0.5647167737082682, "grad_norm": 0.146484375, "learning_rate": 0.02, "loss": 1.5225, "step": 6100 }, { "epoch": 0.5648093502285483, "grad_norm": 0.15234375, "learning_rate": 0.02, "loss": 1.5923, "step": 6101 }, { "epoch": 0.5649019267488283, "grad_norm": 0.1455078125, "learning_rate": 0.02, "loss": 1.5151, "step": 6102 }, { "epoch": 0.5649945032691084, "grad_norm": 0.15625, "learning_rate": 0.02, "loss": 1.6001, "step": 6103 }, { "epoch": 0.5650870797893884, "grad_norm": 0.146484375, "learning_rate": 0.02, "loss": 1.6102, "step": 6104 }, { "epoch": 0.5651796563096685, "grad_norm": 0.1533203125, "learning_rate": 0.02, "loss": 1.5519, "step": 6105 }, { "epoch": 0.5652722328299485, "grad_norm": 0.12890625, "learning_rate": 0.02, "loss": 1.5612, "step": 6106 }, { "epoch": 0.5653648093502286, "grad_norm": 0.146484375, "learning_rate": 0.02, "loss": 1.5942, "step": 6107 }, { "epoch": 0.5654573858705086, "grad_norm": 0.1396484375, "learning_rate": 0.02, "loss": 1.574, "step": 6108 }, { "epoch": 0.5655499623907886, "grad_norm": 0.1513671875, "learning_rate": 0.02, "loss": 1.5701, "step": 6109 }, { "epoch": 0.5656425389110686, "grad_norm": 0.1435546875, "learning_rate": 0.02, "loss": 1.5858, "step": 6110 }, { "epoch": 0.5657351154313487, "grad_norm": 0.16015625, "learning_rate": 0.02, "loss": 1.5318, "step": 6111 }, { "epoch": 0.5658276919516287, "grad_norm": 0.14453125, "learning_rate": 0.02, "loss": 1.5587, "step": 6112 }, { "epoch": 0.5659202684719088, "grad_norm": 0.1484375, "learning_rate": 0.02, "loss": 1.571, "step": 6113 }, { "epoch": 0.5660128449921888, "grad_norm": 0.1474609375, "learning_rate": 0.02, "loss": 1.5667, "step": 6114 }, { "epoch": 0.5661054215124689, "grad_norm": 0.1552734375, "learning_rate": 0.02, "loss": 1.5175, "step": 6115 }, { "epoch": 0.566197998032749, "grad_norm": 0.1435546875, "learning_rate": 0.02, "loss": 1.4651, "step": 6116 }, { "epoch": 0.566290574553029, "grad_norm": 0.15234375, "learning_rate": 0.02, "loss": 1.5533, "step": 6117 }, { "epoch": 0.566383151073309, "grad_norm": 0.1328125, "learning_rate": 0.02, "loss": 1.6063, "step": 6118 }, { "epoch": 0.5664757275935891, "grad_norm": 0.1484375, "learning_rate": 0.02, "loss": 1.5644, "step": 6119 }, { "epoch": 0.5665683041138692, "grad_norm": 0.14453125, "learning_rate": 0.02, "loss": 1.5103, "step": 6120 }, { "epoch": 0.5666608806341492, "grad_norm": 0.158203125, "learning_rate": 0.02, "loss": 1.5295, "step": 6121 }, { "epoch": 0.5667534571544293, "grad_norm": 0.1455078125, "learning_rate": 0.02, "loss": 1.4538, "step": 6122 }, { "epoch": 0.5668460336747092, "grad_norm": 0.1591796875, "learning_rate": 0.02, "loss": 1.5419, "step": 6123 }, { "epoch": 0.5669386101949893, "grad_norm": 0.1318359375, "learning_rate": 0.02, "loss": 1.5419, "step": 6124 }, { "epoch": 0.5670311867152693, "grad_norm": 0.1416015625, "learning_rate": 0.02, "loss": 1.6051, "step": 6125 }, { "epoch": 0.5671237632355494, "grad_norm": 0.1376953125, "learning_rate": 0.02, "loss": 1.5779, "step": 6126 }, { "epoch": 0.5672163397558294, "grad_norm": 0.1484375, "learning_rate": 0.02, "loss": 1.5425, "step": 6127 }, { "epoch": 0.5673089162761095, "grad_norm": 0.146484375, "learning_rate": 0.02, "loss": 1.6006, "step": 6128 }, { "epoch": 0.5674014927963895, "grad_norm": 0.1474609375, "learning_rate": 0.02, "loss": 1.5269, "step": 6129 }, { "epoch": 0.5674940693166696, "grad_norm": 0.1376953125, "learning_rate": 0.02, "loss": 1.6145, "step": 6130 }, { "epoch": 0.5675866458369496, "grad_norm": 0.1474609375, "learning_rate": 0.02, "loss": 1.552, "step": 6131 }, { "epoch": 0.5676792223572297, "grad_norm": 0.1416015625, "learning_rate": 0.02, "loss": 1.4956, "step": 6132 }, { "epoch": 0.5677717988775097, "grad_norm": 0.14453125, "learning_rate": 0.02, "loss": 1.5368, "step": 6133 }, { "epoch": 0.5678643753977898, "grad_norm": 0.134765625, "learning_rate": 0.02, "loss": 1.552, "step": 6134 }, { "epoch": 0.5679569519180698, "grad_norm": 0.154296875, "learning_rate": 0.02, "loss": 1.6023, "step": 6135 }, { "epoch": 0.5680495284383498, "grad_norm": 0.154296875, "learning_rate": 0.02, "loss": 1.5595, "step": 6136 }, { "epoch": 0.5681421049586298, "grad_norm": 0.1533203125, "learning_rate": 0.02, "loss": 1.621, "step": 6137 }, { "epoch": 0.5682346814789099, "grad_norm": 0.1455078125, "learning_rate": 0.02, "loss": 1.6304, "step": 6138 }, { "epoch": 0.5683272579991899, "grad_norm": 0.1552734375, "learning_rate": 0.02, "loss": 1.5273, "step": 6139 }, { "epoch": 0.56841983451947, "grad_norm": 0.1494140625, "learning_rate": 0.02, "loss": 1.459, "step": 6140 }, { "epoch": 0.56851241103975, "grad_norm": 0.1513671875, "learning_rate": 0.02, "loss": 1.5311, "step": 6141 }, { "epoch": 0.5686049875600301, "grad_norm": 0.1376953125, "learning_rate": 0.02, "loss": 1.5889, "step": 6142 }, { "epoch": 0.5686975640803101, "grad_norm": 0.1435546875, "learning_rate": 0.02, "loss": 1.5465, "step": 6143 }, { "epoch": 0.5687901406005902, "grad_norm": 0.140625, "learning_rate": 0.02, "loss": 1.5221, "step": 6144 }, { "epoch": 0.5688827171208702, "grad_norm": 0.154296875, "learning_rate": 0.02, "loss": 1.6692, "step": 6145 }, { "epoch": 0.5689752936411503, "grad_norm": 0.1376953125, "learning_rate": 0.02, "loss": 1.5939, "step": 6146 }, { "epoch": 0.5690678701614303, "grad_norm": 0.1474609375, "learning_rate": 0.02, "loss": 1.6172, "step": 6147 }, { "epoch": 0.5691604466817104, "grad_norm": 0.154296875, "learning_rate": 0.02, "loss": 1.5864, "step": 6148 }, { "epoch": 0.5692530232019904, "grad_norm": 0.1318359375, "learning_rate": 0.02, "loss": 1.4831, "step": 6149 }, { "epoch": 0.5693455997222704, "grad_norm": 0.1484375, "learning_rate": 0.02, "loss": 1.5976, "step": 6150 }, { "epoch": 0.5694381762425504, "grad_norm": 0.140625, "learning_rate": 0.02, "loss": 1.5434, "step": 6151 }, { "epoch": 0.5695307527628305, "grad_norm": 0.1396484375, "learning_rate": 0.02, "loss": 1.4685, "step": 6152 }, { "epoch": 0.5696233292831105, "grad_norm": 0.15234375, "learning_rate": 0.02, "loss": 1.5528, "step": 6153 }, { "epoch": 0.5697159058033906, "grad_norm": 0.1513671875, "learning_rate": 0.02, "loss": 1.5527, "step": 6154 }, { "epoch": 0.5698084823236707, "grad_norm": 0.1533203125, "learning_rate": 0.02, "loss": 1.656, "step": 6155 }, { "epoch": 0.5699010588439507, "grad_norm": 0.1484375, "learning_rate": 0.02, "loss": 1.5508, "step": 6156 }, { "epoch": 0.5699936353642308, "grad_norm": 0.1513671875, "learning_rate": 0.02, "loss": 1.5532, "step": 6157 }, { "epoch": 0.5700862118845108, "grad_norm": 0.142578125, "learning_rate": 0.02, "loss": 1.5807, "step": 6158 }, { "epoch": 0.5701787884047909, "grad_norm": 0.1484375, "learning_rate": 0.02, "loss": 1.5246, "step": 6159 }, { "epoch": 0.5702713649250709, "grad_norm": 0.142578125, "learning_rate": 0.02, "loss": 1.5369, "step": 6160 }, { "epoch": 0.570363941445351, "grad_norm": 0.1533203125, "learning_rate": 0.02, "loss": 1.5218, "step": 6161 }, { "epoch": 0.570456517965631, "grad_norm": 0.1396484375, "learning_rate": 0.02, "loss": 1.535, "step": 6162 }, { "epoch": 0.570549094485911, "grad_norm": 0.15234375, "learning_rate": 0.02, "loss": 1.5569, "step": 6163 }, { "epoch": 0.570641671006191, "grad_norm": 0.142578125, "learning_rate": 0.02, "loss": 1.5727, "step": 6164 }, { "epoch": 0.5707342475264711, "grad_norm": 0.150390625, "learning_rate": 0.02, "loss": 1.5428, "step": 6165 }, { "epoch": 0.5708268240467511, "grad_norm": 0.1455078125, "learning_rate": 0.02, "loss": 1.5897, "step": 6166 }, { "epoch": 0.5709194005670312, "grad_norm": 0.14453125, "learning_rate": 0.02, "loss": 1.5386, "step": 6167 }, { "epoch": 0.5710119770873112, "grad_norm": 0.1435546875, "learning_rate": 0.02, "loss": 1.5456, "step": 6168 }, { "epoch": 0.5711045536075913, "grad_norm": 0.142578125, "learning_rate": 0.02, "loss": 1.54, "step": 6169 }, { "epoch": 0.5711971301278713, "grad_norm": 0.140625, "learning_rate": 0.02, "loss": 1.4929, "step": 6170 }, { "epoch": 0.5712897066481514, "grad_norm": 0.1376953125, "learning_rate": 0.02, "loss": 1.5632, "step": 6171 }, { "epoch": 0.5713822831684314, "grad_norm": 0.1513671875, "learning_rate": 0.02, "loss": 1.6497, "step": 6172 }, { "epoch": 0.5714748596887115, "grad_norm": 0.1357421875, "learning_rate": 0.02, "loss": 1.5095, "step": 6173 }, { "epoch": 0.5715674362089915, "grad_norm": 0.1435546875, "learning_rate": 0.02, "loss": 1.5955, "step": 6174 }, { "epoch": 0.5716600127292716, "grad_norm": 0.142578125, "learning_rate": 0.02, "loss": 1.577, "step": 6175 }, { "epoch": 0.5717525892495516, "grad_norm": 0.1484375, "learning_rate": 0.02, "loss": 1.5497, "step": 6176 }, { "epoch": 0.5718451657698316, "grad_norm": 0.14453125, "learning_rate": 0.02, "loss": 1.5829, "step": 6177 }, { "epoch": 0.5719377422901116, "grad_norm": 0.1484375, "learning_rate": 0.02, "loss": 1.6288, "step": 6178 }, { "epoch": 0.5720303188103917, "grad_norm": 0.1484375, "learning_rate": 0.02, "loss": 1.527, "step": 6179 }, { "epoch": 0.5721228953306717, "grad_norm": 0.154296875, "learning_rate": 0.02, "loss": 1.5059, "step": 6180 }, { "epoch": 0.5722154718509518, "grad_norm": 0.142578125, "learning_rate": 0.02, "loss": 1.5368, "step": 6181 }, { "epoch": 0.5723080483712318, "grad_norm": 0.16015625, "learning_rate": 0.02, "loss": 1.6413, "step": 6182 }, { "epoch": 0.5724006248915119, "grad_norm": 0.15234375, "learning_rate": 0.02, "loss": 1.5558, "step": 6183 }, { "epoch": 0.5724932014117919, "grad_norm": 0.146484375, "learning_rate": 0.02, "loss": 1.4733, "step": 6184 }, { "epoch": 0.572585777932072, "grad_norm": 0.14453125, "learning_rate": 0.02, "loss": 1.5474, "step": 6185 }, { "epoch": 0.572678354452352, "grad_norm": 0.1474609375, "learning_rate": 0.02, "loss": 1.5813, "step": 6186 }, { "epoch": 0.5727709309726321, "grad_norm": 0.162109375, "learning_rate": 0.02, "loss": 1.6214, "step": 6187 }, { "epoch": 0.5728635074929121, "grad_norm": 0.158203125, "learning_rate": 0.02, "loss": 1.5323, "step": 6188 }, { "epoch": 0.5729560840131922, "grad_norm": 0.1484375, "learning_rate": 0.02, "loss": 1.5501, "step": 6189 }, { "epoch": 0.5730486605334721, "grad_norm": 0.16015625, "learning_rate": 0.02, "loss": 1.5082, "step": 6190 }, { "epoch": 0.5731412370537522, "grad_norm": 0.1494140625, "learning_rate": 0.02, "loss": 1.5008, "step": 6191 }, { "epoch": 0.5732338135740322, "grad_norm": 0.1572265625, "learning_rate": 0.02, "loss": 1.5606, "step": 6192 }, { "epoch": 0.5733263900943123, "grad_norm": 0.1494140625, "learning_rate": 0.02, "loss": 1.5407, "step": 6193 }, { "epoch": 0.5734189666145924, "grad_norm": 0.146484375, "learning_rate": 0.02, "loss": 1.5331, "step": 6194 }, { "epoch": 0.5735115431348724, "grad_norm": 0.1474609375, "learning_rate": 0.02, "loss": 1.4957, "step": 6195 }, { "epoch": 0.5736041196551525, "grad_norm": 0.15234375, "learning_rate": 0.02, "loss": 1.5677, "step": 6196 }, { "epoch": 0.5736966961754325, "grad_norm": 0.154296875, "learning_rate": 0.02, "loss": 1.6088, "step": 6197 }, { "epoch": 0.5737892726957126, "grad_norm": 0.15234375, "learning_rate": 0.02, "loss": 1.5389, "step": 6198 }, { "epoch": 0.5738818492159926, "grad_norm": 0.146484375, "learning_rate": 0.02, "loss": 1.5088, "step": 6199 }, { "epoch": 0.5739744257362727, "grad_norm": 0.142578125, "learning_rate": 0.02, "loss": 1.5406, "step": 6200 }, { "epoch": 0.5740670022565527, "grad_norm": 0.1484375, "learning_rate": 0.02, "loss": 1.4772, "step": 6201 }, { "epoch": 0.5741595787768328, "grad_norm": 0.1494140625, "learning_rate": 0.02, "loss": 1.5772, "step": 6202 }, { "epoch": 0.5742521552971128, "grad_norm": 0.1513671875, "learning_rate": 0.02, "loss": 1.558, "step": 6203 }, { "epoch": 0.5743447318173928, "grad_norm": 0.1591796875, "learning_rate": 0.02, "loss": 1.5736, "step": 6204 }, { "epoch": 0.5744373083376728, "grad_norm": 0.142578125, "learning_rate": 0.02, "loss": 1.4973, "step": 6205 }, { "epoch": 0.5745298848579529, "grad_norm": 0.1455078125, "learning_rate": 0.02, "loss": 1.5749, "step": 6206 }, { "epoch": 0.5746224613782329, "grad_norm": 0.15234375, "learning_rate": 0.02, "loss": 1.6057, "step": 6207 }, { "epoch": 0.574715037898513, "grad_norm": 0.154296875, "learning_rate": 0.02, "loss": 1.6144, "step": 6208 }, { "epoch": 0.574807614418793, "grad_norm": 0.1513671875, "learning_rate": 0.02, "loss": 1.5222, "step": 6209 }, { "epoch": 0.5749001909390731, "grad_norm": 0.150390625, "learning_rate": 0.02, "loss": 1.5857, "step": 6210 }, { "epoch": 0.5749927674593531, "grad_norm": 0.1572265625, "learning_rate": 0.02, "loss": 1.6274, "step": 6211 }, { "epoch": 0.5750853439796332, "grad_norm": 0.1572265625, "learning_rate": 0.02, "loss": 1.5724, "step": 6212 }, { "epoch": 0.5751779204999132, "grad_norm": 0.154296875, "learning_rate": 0.02, "loss": 1.5634, "step": 6213 }, { "epoch": 0.5752704970201933, "grad_norm": 0.1474609375, "learning_rate": 0.02, "loss": 1.5589, "step": 6214 }, { "epoch": 0.5753630735404733, "grad_norm": 0.142578125, "learning_rate": 0.02, "loss": 1.5267, "step": 6215 }, { "epoch": 0.5754556500607534, "grad_norm": 0.150390625, "learning_rate": 0.02, "loss": 1.58, "step": 6216 }, { "epoch": 0.5755482265810334, "grad_norm": 0.1455078125, "learning_rate": 0.02, "loss": 1.5863, "step": 6217 }, { "epoch": 0.5756408031013134, "grad_norm": 0.15234375, "learning_rate": 0.02, "loss": 1.5335, "step": 6218 }, { "epoch": 0.5757333796215934, "grad_norm": 0.146484375, "learning_rate": 0.02, "loss": 1.5579, "step": 6219 }, { "epoch": 0.5758259561418735, "grad_norm": 0.16015625, "learning_rate": 0.02, "loss": 1.5358, "step": 6220 }, { "epoch": 0.5759185326621535, "grad_norm": 0.1484375, "learning_rate": 0.02, "loss": 1.5837, "step": 6221 }, { "epoch": 0.5760111091824336, "grad_norm": 0.1533203125, "learning_rate": 0.02, "loss": 1.5617, "step": 6222 }, { "epoch": 0.5761036857027136, "grad_norm": 0.1435546875, "learning_rate": 0.02, "loss": 1.6017, "step": 6223 }, { "epoch": 0.5761962622229937, "grad_norm": 0.1591796875, "learning_rate": 0.02, "loss": 1.5398, "step": 6224 }, { "epoch": 0.5762888387432737, "grad_norm": 0.1494140625, "learning_rate": 0.02, "loss": 1.5637, "step": 6225 }, { "epoch": 0.5763814152635538, "grad_norm": 0.1484375, "learning_rate": 0.02, "loss": 1.5417, "step": 6226 }, { "epoch": 0.5764739917838339, "grad_norm": 0.1494140625, "learning_rate": 0.02, "loss": 1.5244, "step": 6227 }, { "epoch": 0.5765665683041139, "grad_norm": 0.1513671875, "learning_rate": 0.02, "loss": 1.634, "step": 6228 }, { "epoch": 0.576659144824394, "grad_norm": 0.1435546875, "learning_rate": 0.02, "loss": 1.5735, "step": 6229 }, { "epoch": 0.576751721344674, "grad_norm": 0.140625, "learning_rate": 0.02, "loss": 1.5315, "step": 6230 }, { "epoch": 0.576844297864954, "grad_norm": 0.1494140625, "learning_rate": 0.02, "loss": 1.5709, "step": 6231 }, { "epoch": 0.576936874385234, "grad_norm": 0.142578125, "learning_rate": 0.02, "loss": 1.627, "step": 6232 }, { "epoch": 0.577029450905514, "grad_norm": 0.1435546875, "learning_rate": 0.02, "loss": 1.5224, "step": 6233 }, { "epoch": 0.5771220274257941, "grad_norm": 0.1376953125, "learning_rate": 0.02, "loss": 1.527, "step": 6234 }, { "epoch": 0.5772146039460742, "grad_norm": 0.1474609375, "learning_rate": 0.02, "loss": 1.593, "step": 6235 }, { "epoch": 0.5773071804663542, "grad_norm": 0.1416015625, "learning_rate": 0.02, "loss": 1.5215, "step": 6236 }, { "epoch": 0.5773997569866343, "grad_norm": 0.1455078125, "learning_rate": 0.02, "loss": 1.489, "step": 6237 }, { "epoch": 0.5774923335069143, "grad_norm": 0.1435546875, "learning_rate": 0.02, "loss": 1.5513, "step": 6238 }, { "epoch": 0.5775849100271944, "grad_norm": 0.1416015625, "learning_rate": 0.02, "loss": 1.5078, "step": 6239 }, { "epoch": 0.5776774865474744, "grad_norm": 0.1416015625, "learning_rate": 0.02, "loss": 1.4865, "step": 6240 }, { "epoch": 0.5777700630677545, "grad_norm": 0.15625, "learning_rate": 0.02, "loss": 1.529, "step": 6241 }, { "epoch": 0.5778626395880345, "grad_norm": 0.140625, "learning_rate": 0.02, "loss": 1.5794, "step": 6242 }, { "epoch": 0.5779552161083146, "grad_norm": 0.15234375, "learning_rate": 0.02, "loss": 1.5606, "step": 6243 }, { "epoch": 0.5780477926285946, "grad_norm": 0.1416015625, "learning_rate": 0.02, "loss": 1.5319, "step": 6244 }, { "epoch": 0.5781403691488746, "grad_norm": 0.16015625, "learning_rate": 0.02, "loss": 1.5463, "step": 6245 }, { "epoch": 0.5782329456691546, "grad_norm": 0.1533203125, "learning_rate": 0.02, "loss": 1.567, "step": 6246 }, { "epoch": 0.5783255221894347, "grad_norm": 0.154296875, "learning_rate": 0.02, "loss": 1.5152, "step": 6247 }, { "epoch": 0.5784180987097147, "grad_norm": 0.1484375, "learning_rate": 0.02, "loss": 1.5863, "step": 6248 }, { "epoch": 0.5785106752299948, "grad_norm": 0.1455078125, "learning_rate": 0.02, "loss": 1.5728, "step": 6249 }, { "epoch": 0.5786032517502748, "grad_norm": 0.1416015625, "learning_rate": 0.02, "loss": 1.5268, "step": 6250 }, { "epoch": 0.5786958282705549, "grad_norm": 0.1416015625, "learning_rate": 0.02, "loss": 1.5605, "step": 6251 }, { "epoch": 0.5787884047908349, "grad_norm": 0.1416015625, "learning_rate": 0.02, "loss": 1.5085, "step": 6252 }, { "epoch": 0.578880981311115, "grad_norm": 0.1435546875, "learning_rate": 0.02, "loss": 1.5225, "step": 6253 }, { "epoch": 0.578973557831395, "grad_norm": 0.140625, "learning_rate": 0.02, "loss": 1.5207, "step": 6254 }, { "epoch": 0.5790661343516751, "grad_norm": 0.1396484375, "learning_rate": 0.02, "loss": 1.5876, "step": 6255 }, { "epoch": 0.5791587108719551, "grad_norm": 0.1455078125, "learning_rate": 0.02, "loss": 1.6537, "step": 6256 }, { "epoch": 0.5792512873922352, "grad_norm": 0.1416015625, "learning_rate": 0.02, "loss": 1.5429, "step": 6257 }, { "epoch": 0.5793438639125151, "grad_norm": 0.1484375, "learning_rate": 0.02, "loss": 1.5922, "step": 6258 }, { "epoch": 0.5794364404327952, "grad_norm": 0.14453125, "learning_rate": 0.02, "loss": 1.5318, "step": 6259 }, { "epoch": 0.5795290169530752, "grad_norm": 0.1455078125, "learning_rate": 0.02, "loss": 1.5117, "step": 6260 }, { "epoch": 0.5796215934733553, "grad_norm": 0.1640625, "learning_rate": 0.02, "loss": 1.5841, "step": 6261 }, { "epoch": 0.5797141699936353, "grad_norm": 0.138671875, "learning_rate": 0.02, "loss": 1.541, "step": 6262 }, { "epoch": 0.5798067465139154, "grad_norm": 0.140625, "learning_rate": 0.02, "loss": 1.5798, "step": 6263 }, { "epoch": 0.5798993230341954, "grad_norm": 0.1396484375, "learning_rate": 0.02, "loss": 1.551, "step": 6264 }, { "epoch": 0.5799918995544755, "grad_norm": 0.15234375, "learning_rate": 0.02, "loss": 1.537, "step": 6265 }, { "epoch": 0.5800844760747556, "grad_norm": 0.1474609375, "learning_rate": 0.02, "loss": 1.5439, "step": 6266 }, { "epoch": 0.5801770525950356, "grad_norm": 0.1484375, "learning_rate": 0.02, "loss": 1.4923, "step": 6267 }, { "epoch": 0.5802696291153157, "grad_norm": 0.1396484375, "learning_rate": 0.02, "loss": 1.4585, "step": 6268 }, { "epoch": 0.5803622056355957, "grad_norm": 0.1533203125, "learning_rate": 0.02, "loss": 1.5752, "step": 6269 }, { "epoch": 0.5804547821558758, "grad_norm": 0.1572265625, "learning_rate": 0.02, "loss": 1.5792, "step": 6270 }, { "epoch": 0.5805473586761558, "grad_norm": 0.15625, "learning_rate": 0.02, "loss": 1.5695, "step": 6271 }, { "epoch": 0.5806399351964358, "grad_norm": 0.13671875, "learning_rate": 0.02, "loss": 1.538, "step": 6272 }, { "epoch": 0.5807325117167158, "grad_norm": 0.1474609375, "learning_rate": 0.02, "loss": 1.5323, "step": 6273 }, { "epoch": 0.5808250882369959, "grad_norm": 0.13671875, "learning_rate": 0.02, "loss": 1.5639, "step": 6274 }, { "epoch": 0.5809176647572759, "grad_norm": 0.1484375, "learning_rate": 0.02, "loss": 1.5445, "step": 6275 }, { "epoch": 0.581010241277556, "grad_norm": 0.1474609375, "learning_rate": 0.02, "loss": 1.5044, "step": 6276 }, { "epoch": 0.581102817797836, "grad_norm": 0.1455078125, "learning_rate": 0.02, "loss": 1.4582, "step": 6277 }, { "epoch": 0.5811953943181161, "grad_norm": 0.14453125, "learning_rate": 0.02, "loss": 1.5625, "step": 6278 }, { "epoch": 0.5812879708383961, "grad_norm": 0.140625, "learning_rate": 0.02, "loss": 1.553, "step": 6279 }, { "epoch": 0.5813805473586762, "grad_norm": 0.15234375, "learning_rate": 0.02, "loss": 1.5976, "step": 6280 }, { "epoch": 0.5814731238789562, "grad_norm": 0.154296875, "learning_rate": 0.02, "loss": 1.5379, "step": 6281 }, { "epoch": 0.5815657003992363, "grad_norm": 0.142578125, "learning_rate": 0.02, "loss": 1.5477, "step": 6282 }, { "epoch": 0.5816582769195163, "grad_norm": 0.1572265625, "learning_rate": 0.02, "loss": 1.5659, "step": 6283 }, { "epoch": 0.5817508534397964, "grad_norm": 0.1484375, "learning_rate": 0.02, "loss": 1.4831, "step": 6284 }, { "epoch": 0.5818434299600764, "grad_norm": 0.1494140625, "learning_rate": 0.02, "loss": 1.5283, "step": 6285 }, { "epoch": 0.5819360064803564, "grad_norm": 0.1513671875, "learning_rate": 0.02, "loss": 1.518, "step": 6286 }, { "epoch": 0.5820285830006364, "grad_norm": 0.1513671875, "learning_rate": 0.02, "loss": 1.5323, "step": 6287 }, { "epoch": 0.5821211595209165, "grad_norm": 0.1435546875, "learning_rate": 0.02, "loss": 1.5101, "step": 6288 }, { "epoch": 0.5822137360411965, "grad_norm": 0.1484375, "learning_rate": 0.02, "loss": 1.5626, "step": 6289 }, { "epoch": 0.5823063125614766, "grad_norm": 0.14453125, "learning_rate": 0.02, "loss": 1.548, "step": 6290 }, { "epoch": 0.5823988890817566, "grad_norm": 0.1435546875, "learning_rate": 0.02, "loss": 1.4608, "step": 6291 }, { "epoch": 0.5824914656020367, "grad_norm": 0.146484375, "learning_rate": 0.02, "loss": 1.5382, "step": 6292 }, { "epoch": 0.5825840421223167, "grad_norm": 0.1513671875, "learning_rate": 0.02, "loss": 1.5461, "step": 6293 }, { "epoch": 0.5826766186425968, "grad_norm": 0.14453125, "learning_rate": 0.02, "loss": 1.5345, "step": 6294 }, { "epoch": 0.5827691951628768, "grad_norm": 0.1474609375, "learning_rate": 0.02, "loss": 1.5537, "step": 6295 }, { "epoch": 0.5828617716831569, "grad_norm": 0.15234375, "learning_rate": 0.02, "loss": 1.5838, "step": 6296 }, { "epoch": 0.582954348203437, "grad_norm": 0.16015625, "learning_rate": 0.02, "loss": 1.5383, "step": 6297 }, { "epoch": 0.583046924723717, "grad_norm": 0.1484375, "learning_rate": 0.02, "loss": 1.5583, "step": 6298 }, { "epoch": 0.5831395012439969, "grad_norm": 0.15234375, "learning_rate": 0.02, "loss": 1.5919, "step": 6299 }, { "epoch": 0.583232077764277, "grad_norm": 0.1416015625, "learning_rate": 0.02, "loss": 1.5756, "step": 6300 }, { "epoch": 0.583324654284557, "grad_norm": 0.14453125, "learning_rate": 0.02, "loss": 1.591, "step": 6301 }, { "epoch": 0.5834172308048371, "grad_norm": 0.1376953125, "learning_rate": 0.02, "loss": 1.4678, "step": 6302 }, { "epoch": 0.5835098073251171, "grad_norm": 0.1484375, "learning_rate": 0.02, "loss": 1.5689, "step": 6303 }, { "epoch": 0.5836023838453972, "grad_norm": 0.146484375, "learning_rate": 0.02, "loss": 1.5426, "step": 6304 }, { "epoch": 0.5836949603656773, "grad_norm": 0.15234375, "learning_rate": 0.02, "loss": 1.5045, "step": 6305 }, { "epoch": 0.5837875368859573, "grad_norm": 0.140625, "learning_rate": 0.02, "loss": 1.5852, "step": 6306 }, { "epoch": 0.5838801134062374, "grad_norm": 0.1591796875, "learning_rate": 0.02, "loss": 1.5393, "step": 6307 }, { "epoch": 0.5839726899265174, "grad_norm": 0.1474609375, "learning_rate": 0.02, "loss": 1.5541, "step": 6308 }, { "epoch": 0.5840652664467975, "grad_norm": 0.1533203125, "learning_rate": 0.02, "loss": 1.5501, "step": 6309 }, { "epoch": 0.5841578429670775, "grad_norm": 0.1455078125, "learning_rate": 0.02, "loss": 1.5962, "step": 6310 }, { "epoch": 0.5842504194873576, "grad_norm": 0.1552734375, "learning_rate": 0.02, "loss": 1.5288, "step": 6311 }, { "epoch": 0.5843429960076376, "grad_norm": 0.1484375, "learning_rate": 0.02, "loss": 1.4677, "step": 6312 }, { "epoch": 0.5844355725279176, "grad_norm": 0.142578125, "learning_rate": 0.02, "loss": 1.5353, "step": 6313 }, { "epoch": 0.5845281490481976, "grad_norm": 0.142578125, "learning_rate": 0.02, "loss": 1.4701, "step": 6314 }, { "epoch": 0.5846207255684777, "grad_norm": 0.158203125, "learning_rate": 0.02, "loss": 1.5017, "step": 6315 }, { "epoch": 0.5847133020887577, "grad_norm": 0.1513671875, "learning_rate": 0.02, "loss": 1.4944, "step": 6316 }, { "epoch": 0.5848058786090378, "grad_norm": 0.15234375, "learning_rate": 0.02, "loss": 1.5353, "step": 6317 }, { "epoch": 0.5848984551293178, "grad_norm": 0.1611328125, "learning_rate": 0.02, "loss": 1.6036, "step": 6318 }, { "epoch": 0.5849910316495979, "grad_norm": 0.146484375, "learning_rate": 0.02, "loss": 1.5741, "step": 6319 }, { "epoch": 0.5850836081698779, "grad_norm": 0.154296875, "learning_rate": 0.02, "loss": 1.5497, "step": 6320 }, { "epoch": 0.585176184690158, "grad_norm": 0.1455078125, "learning_rate": 0.02, "loss": 1.5268, "step": 6321 }, { "epoch": 0.585268761210438, "grad_norm": 0.1494140625, "learning_rate": 0.02, "loss": 1.5545, "step": 6322 }, { "epoch": 0.5853613377307181, "grad_norm": 0.1455078125, "learning_rate": 0.02, "loss": 1.6326, "step": 6323 }, { "epoch": 0.5854539142509981, "grad_norm": 0.1552734375, "learning_rate": 0.02, "loss": 1.572, "step": 6324 }, { "epoch": 0.5855464907712782, "grad_norm": 0.150390625, "learning_rate": 0.02, "loss": 1.553, "step": 6325 }, { "epoch": 0.5856390672915581, "grad_norm": 0.15625, "learning_rate": 0.02, "loss": 1.5608, "step": 6326 }, { "epoch": 0.5857316438118382, "grad_norm": 0.162109375, "learning_rate": 0.02, "loss": 1.528, "step": 6327 }, { "epoch": 0.5858242203321182, "grad_norm": 0.169921875, "learning_rate": 0.02, "loss": 1.5681, "step": 6328 }, { "epoch": 0.5859167968523983, "grad_norm": 0.1552734375, "learning_rate": 0.02, "loss": 1.5855, "step": 6329 }, { "epoch": 0.5860093733726783, "grad_norm": 0.15625, "learning_rate": 0.02, "loss": 1.5622, "step": 6330 }, { "epoch": 0.5861019498929584, "grad_norm": 0.16015625, "learning_rate": 0.02, "loss": 1.5444, "step": 6331 }, { "epoch": 0.5861945264132384, "grad_norm": 0.146484375, "learning_rate": 0.02, "loss": 1.5229, "step": 6332 }, { "epoch": 0.5862871029335185, "grad_norm": 0.1611328125, "learning_rate": 0.02, "loss": 1.5173, "step": 6333 }, { "epoch": 0.5863796794537985, "grad_norm": 0.1455078125, "learning_rate": 0.02, "loss": 1.5245, "step": 6334 }, { "epoch": 0.5864722559740786, "grad_norm": 0.1533203125, "learning_rate": 0.02, "loss": 1.4832, "step": 6335 }, { "epoch": 0.5865648324943586, "grad_norm": 0.1416015625, "learning_rate": 0.02, "loss": 1.543, "step": 6336 }, { "epoch": 0.5866574090146387, "grad_norm": 0.15625, "learning_rate": 0.02, "loss": 1.5048, "step": 6337 }, { "epoch": 0.5867499855349187, "grad_norm": 0.1376953125, "learning_rate": 0.02, "loss": 1.5277, "step": 6338 }, { "epoch": 0.5868425620551988, "grad_norm": 0.1513671875, "learning_rate": 0.02, "loss": 1.5372, "step": 6339 }, { "epoch": 0.5869351385754787, "grad_norm": 0.1513671875, "learning_rate": 0.02, "loss": 1.5939, "step": 6340 }, { "epoch": 0.5870277150957588, "grad_norm": 0.154296875, "learning_rate": 0.02, "loss": 1.5363, "step": 6341 }, { "epoch": 0.5871202916160388, "grad_norm": 0.150390625, "learning_rate": 0.02, "loss": 1.5968, "step": 6342 }, { "epoch": 0.5872128681363189, "grad_norm": 0.1572265625, "learning_rate": 0.02, "loss": 1.5293, "step": 6343 }, { "epoch": 0.587305444656599, "grad_norm": 0.146484375, "learning_rate": 0.02, "loss": 1.5396, "step": 6344 }, { "epoch": 0.587398021176879, "grad_norm": 0.16015625, "learning_rate": 0.02, "loss": 1.5327, "step": 6345 }, { "epoch": 0.5874905976971591, "grad_norm": 0.150390625, "learning_rate": 0.02, "loss": 1.5035, "step": 6346 }, { "epoch": 0.5875831742174391, "grad_norm": 0.1533203125, "learning_rate": 0.02, "loss": 1.5403, "step": 6347 }, { "epoch": 0.5876757507377192, "grad_norm": 0.150390625, "learning_rate": 0.02, "loss": 1.5447, "step": 6348 }, { "epoch": 0.5877683272579992, "grad_norm": 0.150390625, "learning_rate": 0.02, "loss": 1.5663, "step": 6349 }, { "epoch": 0.5878609037782793, "grad_norm": 0.150390625, "learning_rate": 0.02, "loss": 1.5374, "step": 6350 }, { "epoch": 0.5879534802985593, "grad_norm": 0.1552734375, "learning_rate": 0.02, "loss": 1.587, "step": 6351 }, { "epoch": 0.5880460568188394, "grad_norm": 0.150390625, "learning_rate": 0.02, "loss": 1.5679, "step": 6352 }, { "epoch": 0.5881386333391194, "grad_norm": 0.140625, "learning_rate": 0.02, "loss": 1.5836, "step": 6353 }, { "epoch": 0.5882312098593994, "grad_norm": 0.146484375, "learning_rate": 0.02, "loss": 1.6134, "step": 6354 }, { "epoch": 0.5883237863796794, "grad_norm": 0.1435546875, "learning_rate": 0.02, "loss": 1.4913, "step": 6355 }, { "epoch": 0.5884163628999595, "grad_norm": 0.150390625, "learning_rate": 0.02, "loss": 1.5116, "step": 6356 }, { "epoch": 0.5885089394202395, "grad_norm": 0.14453125, "learning_rate": 0.02, "loss": 1.4725, "step": 6357 }, { "epoch": 0.5886015159405196, "grad_norm": 0.142578125, "learning_rate": 0.02, "loss": 1.5865, "step": 6358 }, { "epoch": 0.5886940924607996, "grad_norm": 0.1484375, "learning_rate": 0.02, "loss": 1.4726, "step": 6359 }, { "epoch": 0.5887866689810797, "grad_norm": 0.1552734375, "learning_rate": 0.02, "loss": 1.6126, "step": 6360 }, { "epoch": 0.5888792455013597, "grad_norm": 0.142578125, "learning_rate": 0.02, "loss": 1.5546, "step": 6361 }, { "epoch": 0.5889718220216398, "grad_norm": 0.1455078125, "learning_rate": 0.02, "loss": 1.5451, "step": 6362 }, { "epoch": 0.5890643985419198, "grad_norm": 0.1474609375, "learning_rate": 0.02, "loss": 1.5333, "step": 6363 }, { "epoch": 0.5891569750621999, "grad_norm": 0.15234375, "learning_rate": 0.02, "loss": 1.5465, "step": 6364 }, { "epoch": 0.5892495515824799, "grad_norm": 0.14453125, "learning_rate": 0.02, "loss": 1.5933, "step": 6365 }, { "epoch": 0.58934212810276, "grad_norm": 0.1494140625, "learning_rate": 0.02, "loss": 1.4986, "step": 6366 }, { "epoch": 0.5894347046230399, "grad_norm": 0.140625, "learning_rate": 0.02, "loss": 1.5465, "step": 6367 }, { "epoch": 0.58952728114332, "grad_norm": 0.1455078125, "learning_rate": 0.02, "loss": 1.5526, "step": 6368 }, { "epoch": 0.5896198576636, "grad_norm": 0.1533203125, "learning_rate": 0.02, "loss": 1.5368, "step": 6369 }, { "epoch": 0.5897124341838801, "grad_norm": 0.1494140625, "learning_rate": 0.02, "loss": 1.5293, "step": 6370 }, { "epoch": 0.5898050107041601, "grad_norm": 0.150390625, "learning_rate": 0.02, "loss": 1.5985, "step": 6371 }, { "epoch": 0.5898975872244402, "grad_norm": 0.154296875, "learning_rate": 0.02, "loss": 1.5128, "step": 6372 }, { "epoch": 0.5899901637447202, "grad_norm": 0.1484375, "learning_rate": 0.02, "loss": 1.5541, "step": 6373 }, { "epoch": 0.5900827402650003, "grad_norm": 0.158203125, "learning_rate": 0.02, "loss": 1.5625, "step": 6374 }, { "epoch": 0.5901753167852803, "grad_norm": 0.1455078125, "learning_rate": 0.02, "loss": 1.5565, "step": 6375 }, { "epoch": 0.5902678933055604, "grad_norm": 0.15234375, "learning_rate": 0.02, "loss": 1.561, "step": 6376 }, { "epoch": 0.5903604698258405, "grad_norm": 0.14453125, "learning_rate": 0.02, "loss": 1.5212, "step": 6377 }, { "epoch": 0.5904530463461205, "grad_norm": 0.140625, "learning_rate": 0.02, "loss": 1.5104, "step": 6378 }, { "epoch": 0.5905456228664006, "grad_norm": 0.1484375, "learning_rate": 0.02, "loss": 1.5835, "step": 6379 }, { "epoch": 0.5906381993866806, "grad_norm": 0.1484375, "learning_rate": 0.02, "loss": 1.5764, "step": 6380 }, { "epoch": 0.5907307759069605, "grad_norm": 0.13671875, "learning_rate": 0.02, "loss": 1.5509, "step": 6381 }, { "epoch": 0.5908233524272406, "grad_norm": 0.150390625, "learning_rate": 0.02, "loss": 1.5665, "step": 6382 }, { "epoch": 0.5909159289475207, "grad_norm": 0.146484375, "learning_rate": 0.02, "loss": 1.4999, "step": 6383 }, { "epoch": 0.5910085054678007, "grad_norm": 0.1455078125, "learning_rate": 0.02, "loss": 1.5412, "step": 6384 }, { "epoch": 0.5911010819880808, "grad_norm": 0.1484375, "learning_rate": 0.02, "loss": 1.5162, "step": 6385 }, { "epoch": 0.5911936585083608, "grad_norm": 0.15234375, "learning_rate": 0.02, "loss": 1.4715, "step": 6386 }, { "epoch": 0.5912862350286409, "grad_norm": 0.15625, "learning_rate": 0.02, "loss": 1.5593, "step": 6387 }, { "epoch": 0.5913788115489209, "grad_norm": 0.15625, "learning_rate": 0.02, "loss": 1.5979, "step": 6388 }, { "epoch": 0.591471388069201, "grad_norm": 0.1513671875, "learning_rate": 0.02, "loss": 1.5641, "step": 6389 }, { "epoch": 0.591563964589481, "grad_norm": 0.15625, "learning_rate": 0.02, "loss": 1.5768, "step": 6390 }, { "epoch": 0.5916565411097611, "grad_norm": 0.15234375, "learning_rate": 0.02, "loss": 1.5787, "step": 6391 }, { "epoch": 0.5917491176300411, "grad_norm": 0.1513671875, "learning_rate": 0.02, "loss": 1.543, "step": 6392 }, { "epoch": 0.5918416941503212, "grad_norm": 0.13671875, "learning_rate": 0.02, "loss": 1.5481, "step": 6393 }, { "epoch": 0.5919342706706011, "grad_norm": 0.1455078125, "learning_rate": 0.02, "loss": 1.5603, "step": 6394 }, { "epoch": 0.5920268471908812, "grad_norm": 0.1328125, "learning_rate": 0.02, "loss": 1.5628, "step": 6395 }, { "epoch": 0.5921194237111612, "grad_norm": 0.1416015625, "learning_rate": 0.02, "loss": 1.5432, "step": 6396 }, { "epoch": 0.5922120002314413, "grad_norm": 0.1455078125, "learning_rate": 0.02, "loss": 1.5468, "step": 6397 }, { "epoch": 0.5923045767517213, "grad_norm": 0.14453125, "learning_rate": 0.02, "loss": 1.5261, "step": 6398 }, { "epoch": 0.5923971532720014, "grad_norm": 0.146484375, "learning_rate": 0.02, "loss": 1.5705, "step": 6399 }, { "epoch": 0.5924897297922814, "grad_norm": 0.1455078125, "learning_rate": 0.02, "loss": 1.5513, "step": 6400 }, { "epoch": 0.5925823063125615, "grad_norm": 0.1494140625, "learning_rate": 0.02, "loss": 1.5644, "step": 6401 }, { "epoch": 0.5926748828328415, "grad_norm": 0.1533203125, "learning_rate": 0.02, "loss": 1.5196, "step": 6402 }, { "epoch": 0.5927674593531216, "grad_norm": 0.154296875, "learning_rate": 0.02, "loss": 1.5728, "step": 6403 }, { "epoch": 0.5928600358734016, "grad_norm": 0.1416015625, "learning_rate": 0.02, "loss": 1.5213, "step": 6404 }, { "epoch": 0.5929526123936817, "grad_norm": 0.146484375, "learning_rate": 0.02, "loss": 1.5383, "step": 6405 }, { "epoch": 0.5930451889139617, "grad_norm": 0.1376953125, "learning_rate": 0.02, "loss": 1.5566, "step": 6406 }, { "epoch": 0.5931377654342418, "grad_norm": 0.140625, "learning_rate": 0.02, "loss": 1.5459, "step": 6407 }, { "epoch": 0.5932303419545217, "grad_norm": 0.1552734375, "learning_rate": 0.02, "loss": 1.4937, "step": 6408 }, { "epoch": 0.5933229184748018, "grad_norm": 0.1591796875, "learning_rate": 0.02, "loss": 1.6202, "step": 6409 }, { "epoch": 0.5934154949950818, "grad_norm": 0.1484375, "learning_rate": 0.02, "loss": 1.5716, "step": 6410 }, { "epoch": 0.5935080715153619, "grad_norm": 0.1533203125, "learning_rate": 0.02, "loss": 1.5845, "step": 6411 }, { "epoch": 0.5936006480356419, "grad_norm": 0.14453125, "learning_rate": 0.02, "loss": 1.5689, "step": 6412 }, { "epoch": 0.593693224555922, "grad_norm": 0.1611328125, "learning_rate": 0.02, "loss": 1.5358, "step": 6413 }, { "epoch": 0.593785801076202, "grad_norm": 0.1357421875, "learning_rate": 0.02, "loss": 1.6157, "step": 6414 }, { "epoch": 0.5938783775964821, "grad_norm": 0.14453125, "learning_rate": 0.02, "loss": 1.5641, "step": 6415 }, { "epoch": 0.5939709541167622, "grad_norm": 0.14453125, "learning_rate": 0.02, "loss": 1.6275, "step": 6416 }, { "epoch": 0.5940635306370422, "grad_norm": 0.16015625, "learning_rate": 0.02, "loss": 1.5707, "step": 6417 }, { "epoch": 0.5941561071573223, "grad_norm": 0.150390625, "learning_rate": 0.02, "loss": 1.5931, "step": 6418 }, { "epoch": 0.5942486836776023, "grad_norm": 0.1533203125, "learning_rate": 0.02, "loss": 1.5819, "step": 6419 }, { "epoch": 0.5943412601978824, "grad_norm": 0.1611328125, "learning_rate": 0.02, "loss": 1.5657, "step": 6420 }, { "epoch": 0.5944338367181623, "grad_norm": 0.1513671875, "learning_rate": 0.02, "loss": 1.5176, "step": 6421 }, { "epoch": 0.5945264132384424, "grad_norm": 0.1484375, "learning_rate": 0.02, "loss": 1.5463, "step": 6422 }, { "epoch": 0.5946189897587224, "grad_norm": 0.1494140625, "learning_rate": 0.02, "loss": 1.5355, "step": 6423 }, { "epoch": 0.5947115662790025, "grad_norm": 0.1494140625, "learning_rate": 0.02, "loss": 1.6074, "step": 6424 }, { "epoch": 0.5948041427992825, "grad_norm": 0.1474609375, "learning_rate": 0.02, "loss": 1.5958, "step": 6425 }, { "epoch": 0.5948967193195626, "grad_norm": 0.1494140625, "learning_rate": 0.02, "loss": 1.5317, "step": 6426 }, { "epoch": 0.5949892958398426, "grad_norm": 0.1630859375, "learning_rate": 0.02, "loss": 1.5086, "step": 6427 }, { "epoch": 0.5950818723601227, "grad_norm": 0.158203125, "learning_rate": 0.02, "loss": 1.5121, "step": 6428 }, { "epoch": 0.5951744488804027, "grad_norm": 0.15234375, "learning_rate": 0.02, "loss": 1.5949, "step": 6429 }, { "epoch": 0.5952670254006828, "grad_norm": 0.15234375, "learning_rate": 0.02, "loss": 1.5292, "step": 6430 }, { "epoch": 0.5953596019209628, "grad_norm": 0.158203125, "learning_rate": 0.02, "loss": 1.6059, "step": 6431 }, { "epoch": 0.5954521784412429, "grad_norm": 0.1611328125, "learning_rate": 0.02, "loss": 1.5515, "step": 6432 }, { "epoch": 0.5955447549615229, "grad_norm": 0.1455078125, "learning_rate": 0.02, "loss": 1.502, "step": 6433 }, { "epoch": 0.595637331481803, "grad_norm": 0.15625, "learning_rate": 0.02, "loss": 1.5245, "step": 6434 }, { "epoch": 0.5957299080020829, "grad_norm": 0.1494140625, "learning_rate": 0.02, "loss": 1.6093, "step": 6435 }, { "epoch": 0.595822484522363, "grad_norm": 0.150390625, "learning_rate": 0.02, "loss": 1.5204, "step": 6436 }, { "epoch": 0.595915061042643, "grad_norm": 0.1376953125, "learning_rate": 0.02, "loss": 1.5549, "step": 6437 }, { "epoch": 0.5960076375629231, "grad_norm": 0.154296875, "learning_rate": 0.02, "loss": 1.59, "step": 6438 }, { "epoch": 0.5961002140832031, "grad_norm": 0.1484375, "learning_rate": 0.02, "loss": 1.5688, "step": 6439 }, { "epoch": 0.5961927906034832, "grad_norm": 0.1640625, "learning_rate": 0.02, "loss": 1.5686, "step": 6440 }, { "epoch": 0.5962853671237632, "grad_norm": 0.166015625, "learning_rate": 0.02, "loss": 1.5391, "step": 6441 }, { "epoch": 0.5963779436440433, "grad_norm": 0.1630859375, "learning_rate": 0.02, "loss": 1.5544, "step": 6442 }, { "epoch": 0.5964705201643233, "grad_norm": 0.1611328125, "learning_rate": 0.02, "loss": 1.5486, "step": 6443 }, { "epoch": 0.5965630966846034, "grad_norm": 0.150390625, "learning_rate": 0.02, "loss": 1.5564, "step": 6444 }, { "epoch": 0.5966556732048834, "grad_norm": 0.1455078125, "learning_rate": 0.02, "loss": 1.5502, "step": 6445 }, { "epoch": 0.5967482497251635, "grad_norm": 0.142578125, "learning_rate": 0.02, "loss": 1.5162, "step": 6446 }, { "epoch": 0.5968408262454435, "grad_norm": 0.1484375, "learning_rate": 0.02, "loss": 1.5785, "step": 6447 }, { "epoch": 0.5969334027657236, "grad_norm": 0.1455078125, "learning_rate": 0.02, "loss": 1.5395, "step": 6448 }, { "epoch": 0.5970259792860035, "grad_norm": 0.1494140625, "learning_rate": 0.02, "loss": 1.5465, "step": 6449 }, { "epoch": 0.5971185558062836, "grad_norm": 0.1513671875, "learning_rate": 0.02, "loss": 1.5495, "step": 6450 }, { "epoch": 0.5972111323265636, "grad_norm": 0.1533203125, "learning_rate": 0.02, "loss": 1.5932, "step": 6451 }, { "epoch": 0.5973037088468437, "grad_norm": 0.15234375, "learning_rate": 0.02, "loss": 1.5477, "step": 6452 }, { "epoch": 0.5973962853671237, "grad_norm": 0.1552734375, "learning_rate": 0.02, "loss": 1.5092, "step": 6453 }, { "epoch": 0.5974888618874038, "grad_norm": 0.1572265625, "learning_rate": 0.02, "loss": 1.5084, "step": 6454 }, { "epoch": 0.5975814384076839, "grad_norm": 0.146484375, "learning_rate": 0.02, "loss": 1.5462, "step": 6455 }, { "epoch": 0.5976740149279639, "grad_norm": 0.1494140625, "learning_rate": 0.02, "loss": 1.54, "step": 6456 }, { "epoch": 0.597766591448244, "grad_norm": 0.1513671875, "learning_rate": 0.02, "loss": 1.5101, "step": 6457 }, { "epoch": 0.597859167968524, "grad_norm": 0.150390625, "learning_rate": 0.02, "loss": 1.5885, "step": 6458 }, { "epoch": 0.5979517444888041, "grad_norm": 0.1494140625, "learning_rate": 0.02, "loss": 1.5022, "step": 6459 }, { "epoch": 0.5980443210090841, "grad_norm": 0.1357421875, "learning_rate": 0.02, "loss": 1.6019, "step": 6460 }, { "epoch": 0.5981368975293642, "grad_norm": 0.1484375, "learning_rate": 0.02, "loss": 1.6054, "step": 6461 }, { "epoch": 0.5982294740496441, "grad_norm": 0.1455078125, "learning_rate": 0.02, "loss": 1.5466, "step": 6462 }, { "epoch": 0.5983220505699242, "grad_norm": 0.142578125, "learning_rate": 0.02, "loss": 1.4979, "step": 6463 }, { "epoch": 0.5984146270902042, "grad_norm": 0.1513671875, "learning_rate": 0.02, "loss": 1.4881, "step": 6464 }, { "epoch": 0.5985072036104843, "grad_norm": 0.15625, "learning_rate": 0.02, "loss": 1.5608, "step": 6465 }, { "epoch": 0.5985997801307643, "grad_norm": 0.14453125, "learning_rate": 0.02, "loss": 1.5846, "step": 6466 }, { "epoch": 0.5986923566510444, "grad_norm": 0.16015625, "learning_rate": 0.02, "loss": 1.535, "step": 6467 }, { "epoch": 0.5987849331713244, "grad_norm": 0.14453125, "learning_rate": 0.02, "loss": 1.4723, "step": 6468 }, { "epoch": 0.5988775096916045, "grad_norm": 0.1552734375, "learning_rate": 0.02, "loss": 1.6076, "step": 6469 }, { "epoch": 0.5989700862118845, "grad_norm": 0.1474609375, "learning_rate": 0.02, "loss": 1.5674, "step": 6470 }, { "epoch": 0.5990626627321646, "grad_norm": 0.154296875, "learning_rate": 0.02, "loss": 1.535, "step": 6471 }, { "epoch": 0.5991552392524446, "grad_norm": 0.150390625, "learning_rate": 0.02, "loss": 1.6081, "step": 6472 }, { "epoch": 0.5992478157727247, "grad_norm": 0.1494140625, "learning_rate": 0.02, "loss": 1.4759, "step": 6473 }, { "epoch": 0.5993403922930047, "grad_norm": 0.150390625, "learning_rate": 0.02, "loss": 1.5287, "step": 6474 }, { "epoch": 0.5994329688132848, "grad_norm": 0.14453125, "learning_rate": 0.02, "loss": 1.471, "step": 6475 }, { "epoch": 0.5995255453335647, "grad_norm": 0.1494140625, "learning_rate": 0.02, "loss": 1.5687, "step": 6476 }, { "epoch": 0.5996181218538448, "grad_norm": 0.142578125, "learning_rate": 0.02, "loss": 1.4771, "step": 6477 }, { "epoch": 0.5997106983741248, "grad_norm": 0.1474609375, "learning_rate": 0.02, "loss": 1.5557, "step": 6478 }, { "epoch": 0.5998032748944049, "grad_norm": 0.15234375, "learning_rate": 0.02, "loss": 1.5703, "step": 6479 }, { "epoch": 0.5998958514146849, "grad_norm": 0.142578125, "learning_rate": 0.02, "loss": 1.5532, "step": 6480 }, { "epoch": 0.599988427934965, "grad_norm": 0.1533203125, "learning_rate": 0.02, "loss": 1.552, "step": 6481 }, { "epoch": 0.600081004455245, "grad_norm": 0.1484375, "learning_rate": 0.02, "loss": 1.5465, "step": 6482 }, { "epoch": 0.6001735809755251, "grad_norm": 0.150390625, "learning_rate": 0.02, "loss": 1.5658, "step": 6483 }, { "epoch": 0.6002661574958051, "grad_norm": 0.15625, "learning_rate": 0.02, "loss": 1.5777, "step": 6484 }, { "epoch": 0.6003587340160852, "grad_norm": 0.1669921875, "learning_rate": 0.02, "loss": 1.5446, "step": 6485 }, { "epoch": 0.6004513105363652, "grad_norm": 0.15625, "learning_rate": 0.02, "loss": 1.5848, "step": 6486 }, { "epoch": 0.6005438870566453, "grad_norm": 0.154296875, "learning_rate": 0.02, "loss": 1.5498, "step": 6487 }, { "epoch": 0.6006364635769254, "grad_norm": 0.1494140625, "learning_rate": 0.02, "loss": 1.5528, "step": 6488 }, { "epoch": 0.6007290400972053, "grad_norm": 0.1552734375, "learning_rate": 0.02, "loss": 1.5257, "step": 6489 }, { "epoch": 0.6008216166174853, "grad_norm": 0.14453125, "learning_rate": 0.02, "loss": 1.5368, "step": 6490 }, { "epoch": 0.6009141931377654, "grad_norm": 0.150390625, "learning_rate": 0.02, "loss": 1.4677, "step": 6491 }, { "epoch": 0.6010067696580454, "grad_norm": 0.14453125, "learning_rate": 0.02, "loss": 1.5882, "step": 6492 }, { "epoch": 0.6010993461783255, "grad_norm": 0.15625, "learning_rate": 0.02, "loss": 1.5558, "step": 6493 }, { "epoch": 0.6011919226986056, "grad_norm": 0.158203125, "learning_rate": 0.02, "loss": 1.4939, "step": 6494 }, { "epoch": 0.6012844992188856, "grad_norm": 0.1572265625, "learning_rate": 0.02, "loss": 1.5687, "step": 6495 }, { "epoch": 0.6013770757391657, "grad_norm": 0.158203125, "learning_rate": 0.02, "loss": 1.5171, "step": 6496 }, { "epoch": 0.6014696522594457, "grad_norm": 0.150390625, "learning_rate": 0.02, "loss": 1.4823, "step": 6497 }, { "epoch": 0.6015622287797258, "grad_norm": 0.16015625, "learning_rate": 0.02, "loss": 1.6103, "step": 6498 }, { "epoch": 0.6016548053000058, "grad_norm": 0.154296875, "learning_rate": 0.02, "loss": 1.5202, "step": 6499 }, { "epoch": 0.6017473818202859, "grad_norm": 0.162109375, "learning_rate": 0.02, "loss": 1.519, "step": 6500 }, { "epoch": 0.6018399583405659, "grad_norm": 0.140625, "learning_rate": 0.02, "loss": 1.5342, "step": 6501 }, { "epoch": 0.601932534860846, "grad_norm": 0.1552734375, "learning_rate": 0.02, "loss": 1.515, "step": 6502 }, { "epoch": 0.6020251113811259, "grad_norm": 0.1455078125, "learning_rate": 0.02, "loss": 1.5204, "step": 6503 }, { "epoch": 0.602117687901406, "grad_norm": 0.1494140625, "learning_rate": 0.02, "loss": 1.5829, "step": 6504 }, { "epoch": 0.602210264421686, "grad_norm": 0.1435546875, "learning_rate": 0.02, "loss": 1.582, "step": 6505 }, { "epoch": 0.6023028409419661, "grad_norm": 0.154296875, "learning_rate": 0.02, "loss": 1.5545, "step": 6506 }, { "epoch": 0.6023954174622461, "grad_norm": 0.1484375, "learning_rate": 0.02, "loss": 1.6172, "step": 6507 }, { "epoch": 0.6024879939825262, "grad_norm": 0.150390625, "learning_rate": 0.02, "loss": 1.5134, "step": 6508 }, { "epoch": 0.6025805705028062, "grad_norm": 0.1455078125, "learning_rate": 0.02, "loss": 1.5258, "step": 6509 }, { "epoch": 0.6026731470230863, "grad_norm": 0.1455078125, "learning_rate": 0.02, "loss": 1.5799, "step": 6510 }, { "epoch": 0.6027657235433663, "grad_norm": 0.14453125, "learning_rate": 0.02, "loss": 1.5681, "step": 6511 }, { "epoch": 0.6028583000636464, "grad_norm": 0.13671875, "learning_rate": 0.02, "loss": 1.5795, "step": 6512 }, { "epoch": 0.6029508765839264, "grad_norm": 0.1455078125, "learning_rate": 0.02, "loss": 1.5378, "step": 6513 }, { "epoch": 0.6030434531042065, "grad_norm": 0.1357421875, "learning_rate": 0.02, "loss": 1.5804, "step": 6514 }, { "epoch": 0.6031360296244865, "grad_norm": 0.140625, "learning_rate": 0.02, "loss": 1.4941, "step": 6515 }, { "epoch": 0.6032286061447666, "grad_norm": 0.146484375, "learning_rate": 0.02, "loss": 1.5601, "step": 6516 }, { "epoch": 0.6033211826650465, "grad_norm": 0.15234375, "learning_rate": 0.02, "loss": 1.5465, "step": 6517 }, { "epoch": 0.6034137591853266, "grad_norm": 0.1416015625, "learning_rate": 0.02, "loss": 1.5613, "step": 6518 }, { "epoch": 0.6035063357056066, "grad_norm": 0.1494140625, "learning_rate": 0.02, "loss": 1.5093, "step": 6519 }, { "epoch": 0.6035989122258867, "grad_norm": 0.1513671875, "learning_rate": 0.02, "loss": 1.5122, "step": 6520 }, { "epoch": 0.6036914887461667, "grad_norm": 0.15625, "learning_rate": 0.02, "loss": 1.6274, "step": 6521 }, { "epoch": 0.6037840652664468, "grad_norm": 0.1533203125, "learning_rate": 0.02, "loss": 1.555, "step": 6522 }, { "epoch": 0.6038766417867268, "grad_norm": 0.1513671875, "learning_rate": 0.02, "loss": 1.5507, "step": 6523 }, { "epoch": 0.6039692183070069, "grad_norm": 0.1494140625, "learning_rate": 0.02, "loss": 1.5681, "step": 6524 }, { "epoch": 0.604061794827287, "grad_norm": 0.1552734375, "learning_rate": 0.02, "loss": 1.5563, "step": 6525 }, { "epoch": 0.604154371347567, "grad_norm": 0.1689453125, "learning_rate": 0.02, "loss": 1.5846, "step": 6526 }, { "epoch": 0.604246947867847, "grad_norm": 0.1552734375, "learning_rate": 0.02, "loss": 1.5819, "step": 6527 }, { "epoch": 0.6043395243881271, "grad_norm": 0.1533203125, "learning_rate": 0.02, "loss": 1.5792, "step": 6528 }, { "epoch": 0.6044321009084072, "grad_norm": 0.154296875, "learning_rate": 0.02, "loss": 1.5261, "step": 6529 }, { "epoch": 0.6045246774286871, "grad_norm": 0.15625, "learning_rate": 0.02, "loss": 1.5948, "step": 6530 }, { "epoch": 0.6046172539489671, "grad_norm": 0.1572265625, "learning_rate": 0.02, "loss": 1.5082, "step": 6531 }, { "epoch": 0.6047098304692472, "grad_norm": 0.150390625, "learning_rate": 0.02, "loss": 1.5443, "step": 6532 }, { "epoch": 0.6048024069895273, "grad_norm": 0.1416015625, "learning_rate": 0.02, "loss": 1.4872, "step": 6533 }, { "epoch": 0.6048949835098073, "grad_norm": 0.146484375, "learning_rate": 0.02, "loss": 1.6424, "step": 6534 }, { "epoch": 0.6049875600300874, "grad_norm": 0.150390625, "learning_rate": 0.02, "loss": 1.496, "step": 6535 }, { "epoch": 0.6050801365503674, "grad_norm": 0.15625, "learning_rate": 0.02, "loss": 1.4944, "step": 6536 }, { "epoch": 0.6051727130706475, "grad_norm": 0.146484375, "learning_rate": 0.02, "loss": 1.507, "step": 6537 }, { "epoch": 0.6052652895909275, "grad_norm": 0.1484375, "learning_rate": 0.02, "loss": 1.5267, "step": 6538 }, { "epoch": 0.6053578661112076, "grad_norm": 0.16015625, "learning_rate": 0.02, "loss": 1.5416, "step": 6539 }, { "epoch": 0.6054504426314876, "grad_norm": 0.1533203125, "learning_rate": 0.02, "loss": 1.5959, "step": 6540 }, { "epoch": 0.6055430191517677, "grad_norm": 0.154296875, "learning_rate": 0.02, "loss": 1.5292, "step": 6541 }, { "epoch": 0.6056355956720477, "grad_norm": 0.1552734375, "learning_rate": 0.02, "loss": 1.5418, "step": 6542 }, { "epoch": 0.6057281721923278, "grad_norm": 0.15625, "learning_rate": 0.02, "loss": 1.5231, "step": 6543 }, { "epoch": 0.6058207487126077, "grad_norm": 0.158203125, "learning_rate": 0.02, "loss": 1.5853, "step": 6544 }, { "epoch": 0.6059133252328878, "grad_norm": 0.1435546875, "learning_rate": 0.02, "loss": 1.5003, "step": 6545 }, { "epoch": 0.6060059017531678, "grad_norm": 0.15234375, "learning_rate": 0.02, "loss": 1.5482, "step": 6546 }, { "epoch": 0.6060984782734479, "grad_norm": 0.146484375, "learning_rate": 0.02, "loss": 1.6247, "step": 6547 }, { "epoch": 0.6061910547937279, "grad_norm": 0.1533203125, "learning_rate": 0.02, "loss": 1.4844, "step": 6548 }, { "epoch": 0.606283631314008, "grad_norm": 0.1455078125, "learning_rate": 0.02, "loss": 1.5107, "step": 6549 }, { "epoch": 0.606376207834288, "grad_norm": 0.150390625, "learning_rate": 0.02, "loss": 1.5626, "step": 6550 }, { "epoch": 0.6064687843545681, "grad_norm": 0.142578125, "learning_rate": 0.02, "loss": 1.5299, "step": 6551 }, { "epoch": 0.6065613608748481, "grad_norm": 0.1572265625, "learning_rate": 0.02, "loss": 1.5385, "step": 6552 }, { "epoch": 0.6066539373951282, "grad_norm": 0.1435546875, "learning_rate": 0.02, "loss": 1.4735, "step": 6553 }, { "epoch": 0.6067465139154082, "grad_norm": 0.1435546875, "learning_rate": 0.02, "loss": 1.5624, "step": 6554 }, { "epoch": 0.6068390904356883, "grad_norm": 0.154296875, "learning_rate": 0.02, "loss": 1.5498, "step": 6555 }, { "epoch": 0.6069316669559683, "grad_norm": 0.1435546875, "learning_rate": 0.02, "loss": 1.4612, "step": 6556 }, { "epoch": 0.6070242434762483, "grad_norm": 0.1455078125, "learning_rate": 0.02, "loss": 1.5409, "step": 6557 }, { "epoch": 0.6071168199965283, "grad_norm": 0.14453125, "learning_rate": 0.02, "loss": 1.5862, "step": 6558 }, { "epoch": 0.6072093965168084, "grad_norm": 0.146484375, "learning_rate": 0.02, "loss": 1.5758, "step": 6559 }, { "epoch": 0.6073019730370884, "grad_norm": 0.1484375, "learning_rate": 0.02, "loss": 1.557, "step": 6560 }, { "epoch": 0.6073945495573685, "grad_norm": 0.146484375, "learning_rate": 0.02, "loss": 1.5748, "step": 6561 }, { "epoch": 0.6074871260776485, "grad_norm": 0.140625, "learning_rate": 0.02, "loss": 1.565, "step": 6562 }, { "epoch": 0.6075797025979286, "grad_norm": 0.134765625, "learning_rate": 0.02, "loss": 1.5265, "step": 6563 }, { "epoch": 0.6076722791182086, "grad_norm": 0.1328125, "learning_rate": 0.02, "loss": 1.5407, "step": 6564 }, { "epoch": 0.6077648556384887, "grad_norm": 0.1396484375, "learning_rate": 0.02, "loss": 1.5846, "step": 6565 }, { "epoch": 0.6078574321587688, "grad_norm": 0.1484375, "learning_rate": 0.02, "loss": 1.5542, "step": 6566 }, { "epoch": 0.6079500086790488, "grad_norm": 0.13671875, "learning_rate": 0.02, "loss": 1.5203, "step": 6567 }, { "epoch": 0.6080425851993289, "grad_norm": 0.1376953125, "learning_rate": 0.02, "loss": 1.5503, "step": 6568 }, { "epoch": 0.6081351617196089, "grad_norm": 0.13671875, "learning_rate": 0.02, "loss": 1.5956, "step": 6569 }, { "epoch": 0.608227738239889, "grad_norm": 0.138671875, "learning_rate": 0.02, "loss": 1.6191, "step": 6570 }, { "epoch": 0.6083203147601689, "grad_norm": 0.1435546875, "learning_rate": 0.02, "loss": 1.5918, "step": 6571 }, { "epoch": 0.608412891280449, "grad_norm": 0.14453125, "learning_rate": 0.02, "loss": 1.5376, "step": 6572 }, { "epoch": 0.608505467800729, "grad_norm": 0.154296875, "learning_rate": 0.02, "loss": 1.5655, "step": 6573 }, { "epoch": 0.6085980443210091, "grad_norm": 0.1455078125, "learning_rate": 0.02, "loss": 1.5514, "step": 6574 }, { "epoch": 0.6086906208412891, "grad_norm": 0.1484375, "learning_rate": 0.02, "loss": 1.4902, "step": 6575 }, { "epoch": 0.6087831973615692, "grad_norm": 0.142578125, "learning_rate": 0.02, "loss": 1.4581, "step": 6576 }, { "epoch": 0.6088757738818492, "grad_norm": 0.15234375, "learning_rate": 0.02, "loss": 1.5505, "step": 6577 }, { "epoch": 0.6089683504021293, "grad_norm": 0.154296875, "learning_rate": 0.02, "loss": 1.4879, "step": 6578 }, { "epoch": 0.6090609269224093, "grad_norm": 0.1474609375, "learning_rate": 0.02, "loss": 1.5769, "step": 6579 }, { "epoch": 0.6091535034426894, "grad_norm": 0.1494140625, "learning_rate": 0.02, "loss": 1.5708, "step": 6580 }, { "epoch": 0.6092460799629694, "grad_norm": 0.142578125, "learning_rate": 0.02, "loss": 1.4736, "step": 6581 }, { "epoch": 0.6093386564832495, "grad_norm": 0.150390625, "learning_rate": 0.02, "loss": 1.5186, "step": 6582 }, { "epoch": 0.6094312330035295, "grad_norm": 0.150390625, "learning_rate": 0.02, "loss": 1.5759, "step": 6583 }, { "epoch": 0.6095238095238096, "grad_norm": 0.1533203125, "learning_rate": 0.02, "loss": 1.5327, "step": 6584 }, { "epoch": 0.6096163860440895, "grad_norm": 0.154296875, "learning_rate": 0.02, "loss": 1.5683, "step": 6585 }, { "epoch": 0.6097089625643696, "grad_norm": 0.1533203125, "learning_rate": 0.02, "loss": 1.5488, "step": 6586 }, { "epoch": 0.6098015390846496, "grad_norm": 0.14453125, "learning_rate": 0.02, "loss": 1.5267, "step": 6587 }, { "epoch": 0.6098941156049297, "grad_norm": 0.1494140625, "learning_rate": 0.02, "loss": 1.5775, "step": 6588 }, { "epoch": 0.6099866921252097, "grad_norm": 0.15234375, "learning_rate": 0.02, "loss": 1.5455, "step": 6589 }, { "epoch": 0.6100792686454898, "grad_norm": 0.1484375, "learning_rate": 0.02, "loss": 1.5672, "step": 6590 }, { "epoch": 0.6101718451657698, "grad_norm": 0.1416015625, "learning_rate": 0.02, "loss": 1.5174, "step": 6591 }, { "epoch": 0.6102644216860499, "grad_norm": 0.1484375, "learning_rate": 0.02, "loss": 1.5988, "step": 6592 }, { "epoch": 0.6103569982063299, "grad_norm": 0.1484375, "learning_rate": 0.02, "loss": 1.4693, "step": 6593 }, { "epoch": 0.61044957472661, "grad_norm": 0.1435546875, "learning_rate": 0.02, "loss": 1.5315, "step": 6594 }, { "epoch": 0.61054215124689, "grad_norm": 0.134765625, "learning_rate": 0.02, "loss": 1.5557, "step": 6595 }, { "epoch": 0.6106347277671701, "grad_norm": 0.14453125, "learning_rate": 0.02, "loss": 1.5256, "step": 6596 }, { "epoch": 0.6107273042874501, "grad_norm": 0.1455078125, "learning_rate": 0.02, "loss": 1.5602, "step": 6597 }, { "epoch": 0.6108198808077301, "grad_norm": 0.1484375, "learning_rate": 0.02, "loss": 1.5272, "step": 6598 }, { "epoch": 0.6109124573280101, "grad_norm": 0.1474609375, "learning_rate": 0.02, "loss": 1.5499, "step": 6599 }, { "epoch": 0.6110050338482902, "grad_norm": 0.1435546875, "learning_rate": 0.02, "loss": 1.5372, "step": 6600 }, { "epoch": 0.6110976103685702, "grad_norm": 0.146484375, "learning_rate": 0.02, "loss": 1.5684, "step": 6601 }, { "epoch": 0.6111901868888503, "grad_norm": 0.150390625, "learning_rate": 0.02, "loss": 1.4804, "step": 6602 }, { "epoch": 0.6112827634091303, "grad_norm": 0.1416015625, "learning_rate": 0.02, "loss": 1.534, "step": 6603 }, { "epoch": 0.6113753399294104, "grad_norm": 0.142578125, "learning_rate": 0.02, "loss": 1.5451, "step": 6604 }, { "epoch": 0.6114679164496905, "grad_norm": 0.142578125, "learning_rate": 0.02, "loss": 1.6211, "step": 6605 }, { "epoch": 0.6115604929699705, "grad_norm": 0.154296875, "learning_rate": 0.02, "loss": 1.5822, "step": 6606 }, { "epoch": 0.6116530694902506, "grad_norm": 0.15625, "learning_rate": 0.02, "loss": 1.547, "step": 6607 }, { "epoch": 0.6117456460105306, "grad_norm": 0.158203125, "learning_rate": 0.02, "loss": 1.5704, "step": 6608 }, { "epoch": 0.6118382225308107, "grad_norm": 0.1513671875, "learning_rate": 0.02, "loss": 1.5664, "step": 6609 }, { "epoch": 0.6119307990510907, "grad_norm": 0.1591796875, "learning_rate": 0.02, "loss": 1.5201, "step": 6610 }, { "epoch": 0.6120233755713708, "grad_norm": 0.1396484375, "learning_rate": 0.02, "loss": 1.4944, "step": 6611 }, { "epoch": 0.6121159520916507, "grad_norm": 0.15234375, "learning_rate": 0.02, "loss": 1.5013, "step": 6612 }, { "epoch": 0.6122085286119308, "grad_norm": 0.1416015625, "learning_rate": 0.02, "loss": 1.4498, "step": 6613 }, { "epoch": 0.6123011051322108, "grad_norm": 0.158203125, "learning_rate": 0.02, "loss": 1.5259, "step": 6614 }, { "epoch": 0.6123936816524909, "grad_norm": 0.140625, "learning_rate": 0.02, "loss": 1.5289, "step": 6615 }, { "epoch": 0.6124862581727709, "grad_norm": 0.138671875, "learning_rate": 0.02, "loss": 1.5128, "step": 6616 }, { "epoch": 0.612578834693051, "grad_norm": 0.1416015625, "learning_rate": 0.02, "loss": 1.5374, "step": 6617 }, { "epoch": 0.612671411213331, "grad_norm": 0.15625, "learning_rate": 0.02, "loss": 1.5595, "step": 6618 }, { "epoch": 0.6127639877336111, "grad_norm": 0.138671875, "learning_rate": 0.02, "loss": 1.4861, "step": 6619 }, { "epoch": 0.6128565642538911, "grad_norm": 0.1572265625, "learning_rate": 0.02, "loss": 1.482, "step": 6620 }, { "epoch": 0.6129491407741712, "grad_norm": 0.15234375, "learning_rate": 0.02, "loss": 1.5353, "step": 6621 }, { "epoch": 0.6130417172944512, "grad_norm": 0.16015625, "learning_rate": 0.02, "loss": 1.5561, "step": 6622 }, { "epoch": 0.6131342938147313, "grad_norm": 0.1552734375, "learning_rate": 0.02, "loss": 1.6124, "step": 6623 }, { "epoch": 0.6132268703350113, "grad_norm": 0.162109375, "learning_rate": 0.02, "loss": 1.5961, "step": 6624 }, { "epoch": 0.6133194468552913, "grad_norm": 0.1640625, "learning_rate": 0.02, "loss": 1.5311, "step": 6625 }, { "epoch": 0.6134120233755713, "grad_norm": 0.1474609375, "learning_rate": 0.02, "loss": 1.5189, "step": 6626 }, { "epoch": 0.6135045998958514, "grad_norm": 0.1494140625, "learning_rate": 0.02, "loss": 1.5266, "step": 6627 }, { "epoch": 0.6135971764161314, "grad_norm": 0.150390625, "learning_rate": 0.02, "loss": 1.5757, "step": 6628 }, { "epoch": 0.6136897529364115, "grad_norm": 0.15625, "learning_rate": 0.02, "loss": 1.6004, "step": 6629 }, { "epoch": 0.6137823294566915, "grad_norm": 0.14453125, "learning_rate": 0.02, "loss": 1.5896, "step": 6630 }, { "epoch": 0.6138749059769716, "grad_norm": 0.1455078125, "learning_rate": 0.02, "loss": 1.5633, "step": 6631 }, { "epoch": 0.6139674824972516, "grad_norm": 0.1416015625, "learning_rate": 0.02, "loss": 1.4798, "step": 6632 }, { "epoch": 0.6140600590175317, "grad_norm": 0.16015625, "learning_rate": 0.02, "loss": 1.5741, "step": 6633 }, { "epoch": 0.6141526355378117, "grad_norm": 0.1513671875, "learning_rate": 0.02, "loss": 1.5684, "step": 6634 }, { "epoch": 0.6142452120580918, "grad_norm": 0.1494140625, "learning_rate": 0.02, "loss": 1.5334, "step": 6635 }, { "epoch": 0.6143377885783718, "grad_norm": 0.140625, "learning_rate": 0.02, "loss": 1.4836, "step": 6636 }, { "epoch": 0.6144303650986519, "grad_norm": 0.15234375, "learning_rate": 0.02, "loss": 1.57, "step": 6637 }, { "epoch": 0.614522941618932, "grad_norm": 0.13671875, "learning_rate": 0.02, "loss": 1.5456, "step": 6638 }, { "epoch": 0.6146155181392119, "grad_norm": 0.1484375, "learning_rate": 0.02, "loss": 1.528, "step": 6639 }, { "epoch": 0.6147080946594919, "grad_norm": 0.142578125, "learning_rate": 0.02, "loss": 1.5032, "step": 6640 }, { "epoch": 0.614800671179772, "grad_norm": 0.13671875, "learning_rate": 0.02, "loss": 1.5767, "step": 6641 }, { "epoch": 0.614893247700052, "grad_norm": 0.142578125, "learning_rate": 0.02, "loss": 1.5037, "step": 6642 }, { "epoch": 0.6149858242203321, "grad_norm": 0.1533203125, "learning_rate": 0.02, "loss": 1.5108, "step": 6643 }, { "epoch": 0.6150784007406122, "grad_norm": 0.1494140625, "learning_rate": 0.02, "loss": 1.5914, "step": 6644 }, { "epoch": 0.6151709772608922, "grad_norm": 0.1533203125, "learning_rate": 0.02, "loss": 1.5933, "step": 6645 }, { "epoch": 0.6152635537811723, "grad_norm": 0.1474609375, "learning_rate": 0.02, "loss": 1.5199, "step": 6646 }, { "epoch": 0.6153561303014523, "grad_norm": 0.15234375, "learning_rate": 0.02, "loss": 1.5621, "step": 6647 }, { "epoch": 0.6154487068217324, "grad_norm": 0.1552734375, "learning_rate": 0.02, "loss": 1.5604, "step": 6648 }, { "epoch": 0.6155412833420124, "grad_norm": 0.146484375, "learning_rate": 0.02, "loss": 1.4952, "step": 6649 }, { "epoch": 0.6156338598622925, "grad_norm": 0.14453125, "learning_rate": 0.02, "loss": 1.5301, "step": 6650 }, { "epoch": 0.6157264363825725, "grad_norm": 0.1513671875, "learning_rate": 0.02, "loss": 1.5563, "step": 6651 }, { "epoch": 0.6158190129028526, "grad_norm": 0.146484375, "learning_rate": 0.02, "loss": 1.5388, "step": 6652 }, { "epoch": 0.6159115894231325, "grad_norm": 0.1513671875, "learning_rate": 0.02, "loss": 1.6154, "step": 6653 }, { "epoch": 0.6160041659434126, "grad_norm": 0.15625, "learning_rate": 0.02, "loss": 1.5262, "step": 6654 }, { "epoch": 0.6160967424636926, "grad_norm": 0.1689453125, "learning_rate": 0.02, "loss": 1.5928, "step": 6655 }, { "epoch": 0.6161893189839727, "grad_norm": 0.1572265625, "learning_rate": 0.02, "loss": 1.5204, "step": 6656 }, { "epoch": 0.6162818955042527, "grad_norm": 0.14453125, "learning_rate": 0.02, "loss": 1.5998, "step": 6657 }, { "epoch": 0.6163744720245328, "grad_norm": 0.1533203125, "learning_rate": 0.02, "loss": 1.5215, "step": 6658 }, { "epoch": 0.6164670485448128, "grad_norm": 0.1474609375, "learning_rate": 0.02, "loss": 1.4974, "step": 6659 }, { "epoch": 0.6165596250650929, "grad_norm": 0.16015625, "learning_rate": 0.02, "loss": 1.5237, "step": 6660 }, { "epoch": 0.6166522015853729, "grad_norm": 0.1533203125, "learning_rate": 0.02, "loss": 1.5332, "step": 6661 }, { "epoch": 0.616744778105653, "grad_norm": 0.162109375, "learning_rate": 0.02, "loss": 1.582, "step": 6662 }, { "epoch": 0.616837354625933, "grad_norm": 0.1435546875, "learning_rate": 0.02, "loss": 1.5228, "step": 6663 }, { "epoch": 0.6169299311462131, "grad_norm": 0.150390625, "learning_rate": 0.02, "loss": 1.5022, "step": 6664 }, { "epoch": 0.6170225076664931, "grad_norm": 0.14453125, "learning_rate": 0.02, "loss": 1.5566, "step": 6665 }, { "epoch": 0.6171150841867731, "grad_norm": 0.1474609375, "learning_rate": 0.02, "loss": 1.5247, "step": 6666 }, { "epoch": 0.6172076607070531, "grad_norm": 0.14453125, "learning_rate": 0.02, "loss": 1.546, "step": 6667 }, { "epoch": 0.6173002372273332, "grad_norm": 0.154296875, "learning_rate": 0.02, "loss": 1.5824, "step": 6668 }, { "epoch": 0.6173928137476132, "grad_norm": 0.158203125, "learning_rate": 0.02, "loss": 1.5199, "step": 6669 }, { "epoch": 0.6174853902678933, "grad_norm": 0.1591796875, "learning_rate": 0.02, "loss": 1.6074, "step": 6670 }, { "epoch": 0.6175779667881733, "grad_norm": 0.1552734375, "learning_rate": 0.02, "loss": 1.5608, "step": 6671 }, { "epoch": 0.6176705433084534, "grad_norm": 0.15625, "learning_rate": 0.02, "loss": 1.5506, "step": 6672 }, { "epoch": 0.6177631198287334, "grad_norm": 0.142578125, "learning_rate": 0.02, "loss": 1.4967, "step": 6673 }, { "epoch": 0.6178556963490135, "grad_norm": 0.1484375, "learning_rate": 0.02, "loss": 1.5419, "step": 6674 }, { "epoch": 0.6179482728692935, "grad_norm": 0.1416015625, "learning_rate": 0.02, "loss": 1.5208, "step": 6675 }, { "epoch": 0.6180408493895736, "grad_norm": 0.1484375, "learning_rate": 0.02, "loss": 1.4989, "step": 6676 }, { "epoch": 0.6181334259098537, "grad_norm": 0.15234375, "learning_rate": 0.02, "loss": 1.5445, "step": 6677 }, { "epoch": 0.6182260024301337, "grad_norm": 0.158203125, "learning_rate": 0.02, "loss": 1.5214, "step": 6678 }, { "epoch": 0.6183185789504138, "grad_norm": 0.150390625, "learning_rate": 0.02, "loss": 1.5326, "step": 6679 }, { "epoch": 0.6184111554706937, "grad_norm": 0.15234375, "learning_rate": 0.02, "loss": 1.5612, "step": 6680 }, { "epoch": 0.6185037319909737, "grad_norm": 0.15625, "learning_rate": 0.02, "loss": 1.5966, "step": 6681 }, { "epoch": 0.6185963085112538, "grad_norm": 0.15625, "learning_rate": 0.02, "loss": 1.5649, "step": 6682 }, { "epoch": 0.6186888850315339, "grad_norm": 0.15234375, "learning_rate": 0.02, "loss": 1.5808, "step": 6683 }, { "epoch": 0.6187814615518139, "grad_norm": 0.1494140625, "learning_rate": 0.02, "loss": 1.5585, "step": 6684 }, { "epoch": 0.618874038072094, "grad_norm": 0.16015625, "learning_rate": 0.02, "loss": 1.5065, "step": 6685 }, { "epoch": 0.618966614592374, "grad_norm": 0.146484375, "learning_rate": 0.02, "loss": 1.503, "step": 6686 }, { "epoch": 0.6190591911126541, "grad_norm": 0.1708984375, "learning_rate": 0.02, "loss": 1.4825, "step": 6687 }, { "epoch": 0.6191517676329341, "grad_norm": 0.1396484375, "learning_rate": 0.02, "loss": 1.5768, "step": 6688 }, { "epoch": 0.6192443441532142, "grad_norm": 0.1650390625, "learning_rate": 0.02, "loss": 1.5553, "step": 6689 }, { "epoch": 0.6193369206734942, "grad_norm": 0.138671875, "learning_rate": 0.02, "loss": 1.6236, "step": 6690 }, { "epoch": 0.6194294971937743, "grad_norm": 0.1474609375, "learning_rate": 0.02, "loss": 1.4929, "step": 6691 }, { "epoch": 0.6195220737140543, "grad_norm": 0.1435546875, "learning_rate": 0.02, "loss": 1.4727, "step": 6692 }, { "epoch": 0.6196146502343343, "grad_norm": 0.1591796875, "learning_rate": 0.02, "loss": 1.4949, "step": 6693 }, { "epoch": 0.6197072267546143, "grad_norm": 0.140625, "learning_rate": 0.02, "loss": 1.5192, "step": 6694 }, { "epoch": 0.6197998032748944, "grad_norm": 0.1572265625, "learning_rate": 0.02, "loss": 1.5407, "step": 6695 }, { "epoch": 0.6198923797951744, "grad_norm": 0.1474609375, "learning_rate": 0.02, "loss": 1.4978, "step": 6696 }, { "epoch": 0.6199849563154545, "grad_norm": 0.15625, "learning_rate": 0.02, "loss": 1.5381, "step": 6697 }, { "epoch": 0.6200775328357345, "grad_norm": 0.1396484375, "learning_rate": 0.02, "loss": 1.5135, "step": 6698 }, { "epoch": 0.6201701093560146, "grad_norm": 0.1611328125, "learning_rate": 0.02, "loss": 1.5321, "step": 6699 }, { "epoch": 0.6202626858762946, "grad_norm": 0.1591796875, "learning_rate": 0.02, "loss": 1.5232, "step": 6700 }, { "epoch": 0.6203552623965747, "grad_norm": 0.1611328125, "learning_rate": 0.02, "loss": 1.495, "step": 6701 }, { "epoch": 0.6204478389168547, "grad_norm": 0.14453125, "learning_rate": 0.02, "loss": 1.5319, "step": 6702 }, { "epoch": 0.6205404154371348, "grad_norm": 0.1611328125, "learning_rate": 0.02, "loss": 1.5022, "step": 6703 }, { "epoch": 0.6206329919574148, "grad_norm": 0.1513671875, "learning_rate": 0.02, "loss": 1.5856, "step": 6704 }, { "epoch": 0.6207255684776949, "grad_norm": 0.1630859375, "learning_rate": 0.02, "loss": 1.5209, "step": 6705 }, { "epoch": 0.6208181449979749, "grad_norm": 0.1611328125, "learning_rate": 0.02, "loss": 1.5802, "step": 6706 }, { "epoch": 0.6209107215182549, "grad_norm": 0.14453125, "learning_rate": 0.02, "loss": 1.538, "step": 6707 }, { "epoch": 0.6210032980385349, "grad_norm": 0.150390625, "learning_rate": 0.02, "loss": 1.5421, "step": 6708 }, { "epoch": 0.621095874558815, "grad_norm": 0.15234375, "learning_rate": 0.02, "loss": 1.482, "step": 6709 }, { "epoch": 0.621188451079095, "grad_norm": 0.1474609375, "learning_rate": 0.02, "loss": 1.4862, "step": 6710 }, { "epoch": 0.6212810275993751, "grad_norm": 0.15234375, "learning_rate": 0.02, "loss": 1.5777, "step": 6711 }, { "epoch": 0.6213736041196551, "grad_norm": 0.140625, "learning_rate": 0.02, "loss": 1.4995, "step": 6712 }, { "epoch": 0.6214661806399352, "grad_norm": 0.1552734375, "learning_rate": 0.02, "loss": 1.6032, "step": 6713 }, { "epoch": 0.6215587571602152, "grad_norm": 0.1396484375, "learning_rate": 0.02, "loss": 1.5751, "step": 6714 }, { "epoch": 0.6216513336804953, "grad_norm": 0.14453125, "learning_rate": 0.02, "loss": 1.5646, "step": 6715 }, { "epoch": 0.6217439102007754, "grad_norm": 0.1533203125, "learning_rate": 0.02, "loss": 1.5763, "step": 6716 }, { "epoch": 0.6218364867210554, "grad_norm": 0.1474609375, "learning_rate": 0.02, "loss": 1.5474, "step": 6717 }, { "epoch": 0.6219290632413355, "grad_norm": 0.1455078125, "learning_rate": 0.02, "loss": 1.6049, "step": 6718 }, { "epoch": 0.6220216397616155, "grad_norm": 0.15234375, "learning_rate": 0.02, "loss": 1.5419, "step": 6719 }, { "epoch": 0.6221142162818954, "grad_norm": 0.1376953125, "learning_rate": 0.02, "loss": 1.5154, "step": 6720 }, { "epoch": 0.6222067928021755, "grad_norm": 0.1484375, "learning_rate": 0.02, "loss": 1.5755, "step": 6721 }, { "epoch": 0.6222993693224556, "grad_norm": 0.1484375, "learning_rate": 0.02, "loss": 1.5325, "step": 6722 }, { "epoch": 0.6223919458427356, "grad_norm": 0.1494140625, "learning_rate": 0.02, "loss": 1.6207, "step": 6723 }, { "epoch": 0.6224845223630157, "grad_norm": 0.1513671875, "learning_rate": 0.02, "loss": 1.5891, "step": 6724 }, { "epoch": 0.6225770988832957, "grad_norm": 0.146484375, "learning_rate": 0.02, "loss": 1.5534, "step": 6725 }, { "epoch": 0.6226696754035758, "grad_norm": 0.1611328125, "learning_rate": 0.02, "loss": 1.5288, "step": 6726 }, { "epoch": 0.6227622519238558, "grad_norm": 0.1484375, "learning_rate": 0.02, "loss": 1.4771, "step": 6727 }, { "epoch": 0.6228548284441359, "grad_norm": 0.162109375, "learning_rate": 0.02, "loss": 1.5371, "step": 6728 }, { "epoch": 0.6229474049644159, "grad_norm": 0.146484375, "learning_rate": 0.02, "loss": 1.4744, "step": 6729 }, { "epoch": 0.623039981484696, "grad_norm": 0.1435546875, "learning_rate": 0.02, "loss": 1.4472, "step": 6730 }, { "epoch": 0.623132558004976, "grad_norm": 0.13671875, "learning_rate": 0.02, "loss": 1.5206, "step": 6731 }, { "epoch": 0.6232251345252561, "grad_norm": 0.16015625, "learning_rate": 0.02, "loss": 1.4971, "step": 6732 }, { "epoch": 0.6233177110455361, "grad_norm": 0.1474609375, "learning_rate": 0.02, "loss": 1.5427, "step": 6733 }, { "epoch": 0.6234102875658161, "grad_norm": 0.1533203125, "learning_rate": 0.02, "loss": 1.5065, "step": 6734 }, { "epoch": 0.6235028640860961, "grad_norm": 0.1376953125, "learning_rate": 0.02, "loss": 1.5552, "step": 6735 }, { "epoch": 0.6235954406063762, "grad_norm": 0.1552734375, "learning_rate": 0.02, "loss": 1.4611, "step": 6736 }, { "epoch": 0.6236880171266562, "grad_norm": 0.14453125, "learning_rate": 0.02, "loss": 1.4739, "step": 6737 }, { "epoch": 0.6237805936469363, "grad_norm": 0.15234375, "learning_rate": 0.02, "loss": 1.5504, "step": 6738 }, { "epoch": 0.6238731701672163, "grad_norm": 0.1416015625, "learning_rate": 0.02, "loss": 1.511, "step": 6739 }, { "epoch": 0.6239657466874964, "grad_norm": 0.14453125, "learning_rate": 0.02, "loss": 1.4979, "step": 6740 }, { "epoch": 0.6240583232077764, "grad_norm": 0.138671875, "learning_rate": 0.02, "loss": 1.5451, "step": 6741 }, { "epoch": 0.6241508997280565, "grad_norm": 0.1416015625, "learning_rate": 0.02, "loss": 1.5801, "step": 6742 }, { "epoch": 0.6242434762483365, "grad_norm": 0.150390625, "learning_rate": 0.02, "loss": 1.5192, "step": 6743 }, { "epoch": 0.6243360527686166, "grad_norm": 0.1328125, "learning_rate": 0.02, "loss": 1.5191, "step": 6744 }, { "epoch": 0.6244286292888966, "grad_norm": 0.146484375, "learning_rate": 0.02, "loss": 1.5168, "step": 6745 }, { "epoch": 0.6245212058091767, "grad_norm": 0.138671875, "learning_rate": 0.02, "loss": 1.531, "step": 6746 }, { "epoch": 0.6246137823294567, "grad_norm": 0.146484375, "learning_rate": 0.02, "loss": 1.5246, "step": 6747 }, { "epoch": 0.6247063588497367, "grad_norm": 0.142578125, "learning_rate": 0.02, "loss": 1.6065, "step": 6748 }, { "epoch": 0.6247989353700167, "grad_norm": 0.15234375, "learning_rate": 0.02, "loss": 1.4978, "step": 6749 }, { "epoch": 0.6248915118902968, "grad_norm": 0.13671875, "learning_rate": 0.02, "loss": 1.5058, "step": 6750 }, { "epoch": 0.6249840884105768, "grad_norm": 0.15625, "learning_rate": 0.02, "loss": 1.4946, "step": 6751 }, { "epoch": 0.6250766649308569, "grad_norm": 0.1484375, "learning_rate": 0.02, "loss": 1.6439, "step": 6752 }, { "epoch": 0.625169241451137, "grad_norm": 0.154296875, "learning_rate": 0.02, "loss": 1.5023, "step": 6753 }, { "epoch": 0.625261817971417, "grad_norm": 0.134765625, "learning_rate": 0.02, "loss": 1.5575, "step": 6754 }, { "epoch": 0.625354394491697, "grad_norm": 0.1474609375, "learning_rate": 0.02, "loss": 1.5247, "step": 6755 }, { "epoch": 0.6254469710119771, "grad_norm": 0.15234375, "learning_rate": 0.02, "loss": 1.5078, "step": 6756 }, { "epoch": 0.6255395475322572, "grad_norm": 0.15234375, "learning_rate": 0.02, "loss": 1.5392, "step": 6757 }, { "epoch": 0.6256321240525372, "grad_norm": 0.15234375, "learning_rate": 0.02, "loss": 1.5691, "step": 6758 }, { "epoch": 0.6257247005728173, "grad_norm": 0.15625, "learning_rate": 0.02, "loss": 1.4953, "step": 6759 }, { "epoch": 0.6258172770930973, "grad_norm": 0.142578125, "learning_rate": 0.02, "loss": 1.5074, "step": 6760 }, { "epoch": 0.6259098536133773, "grad_norm": 0.1533203125, "learning_rate": 0.02, "loss": 1.5939, "step": 6761 }, { "epoch": 0.6260024301336573, "grad_norm": 0.1533203125, "learning_rate": 0.02, "loss": 1.557, "step": 6762 }, { "epoch": 0.6260950066539374, "grad_norm": 0.1591796875, "learning_rate": 0.02, "loss": 1.6182, "step": 6763 }, { "epoch": 0.6261875831742174, "grad_norm": 0.1552734375, "learning_rate": 0.02, "loss": 1.6222, "step": 6764 }, { "epoch": 0.6262801596944975, "grad_norm": 0.158203125, "learning_rate": 0.02, "loss": 1.5027, "step": 6765 }, { "epoch": 0.6263727362147775, "grad_norm": 0.1484375, "learning_rate": 0.02, "loss": 1.5019, "step": 6766 }, { "epoch": 0.6264653127350576, "grad_norm": 0.15234375, "learning_rate": 0.02, "loss": 1.5826, "step": 6767 }, { "epoch": 0.6265578892553376, "grad_norm": 0.14453125, "learning_rate": 0.02, "loss": 1.5582, "step": 6768 }, { "epoch": 0.6266504657756177, "grad_norm": 0.15234375, "learning_rate": 0.02, "loss": 1.5937, "step": 6769 }, { "epoch": 0.6267430422958977, "grad_norm": 0.15234375, "learning_rate": 0.02, "loss": 1.6126, "step": 6770 }, { "epoch": 0.6268356188161778, "grad_norm": 0.1455078125, "learning_rate": 0.02, "loss": 1.527, "step": 6771 }, { "epoch": 0.6269281953364578, "grad_norm": 0.154296875, "learning_rate": 0.02, "loss": 1.5487, "step": 6772 }, { "epoch": 0.6270207718567379, "grad_norm": 0.146484375, "learning_rate": 0.02, "loss": 1.5721, "step": 6773 }, { "epoch": 0.6271133483770179, "grad_norm": 0.150390625, "learning_rate": 0.02, "loss": 1.5953, "step": 6774 }, { "epoch": 0.6272059248972979, "grad_norm": 0.1474609375, "learning_rate": 0.02, "loss": 1.494, "step": 6775 }, { "epoch": 0.6272985014175779, "grad_norm": 0.138671875, "learning_rate": 0.02, "loss": 1.5298, "step": 6776 }, { "epoch": 0.627391077937858, "grad_norm": 0.1494140625, "learning_rate": 0.02, "loss": 1.5538, "step": 6777 }, { "epoch": 0.627483654458138, "grad_norm": 0.15234375, "learning_rate": 0.02, "loss": 1.5399, "step": 6778 }, { "epoch": 0.6275762309784181, "grad_norm": 0.1513671875, "learning_rate": 0.02, "loss": 1.5315, "step": 6779 }, { "epoch": 0.6276688074986981, "grad_norm": 0.1474609375, "learning_rate": 0.02, "loss": 1.5445, "step": 6780 }, { "epoch": 0.6277613840189782, "grad_norm": 0.1474609375, "learning_rate": 0.02, "loss": 1.5176, "step": 6781 }, { "epoch": 0.6278539605392582, "grad_norm": 0.146484375, "learning_rate": 0.02, "loss": 1.5561, "step": 6782 }, { "epoch": 0.6279465370595383, "grad_norm": 0.146484375, "learning_rate": 0.02, "loss": 1.6013, "step": 6783 }, { "epoch": 0.6280391135798183, "grad_norm": 0.1494140625, "learning_rate": 0.02, "loss": 1.5188, "step": 6784 }, { "epoch": 0.6281316901000984, "grad_norm": 0.1533203125, "learning_rate": 0.02, "loss": 1.5482, "step": 6785 }, { "epoch": 0.6282242666203784, "grad_norm": 0.1533203125, "learning_rate": 0.02, "loss": 1.5199, "step": 6786 }, { "epoch": 0.6283168431406585, "grad_norm": 0.1630859375, "learning_rate": 0.02, "loss": 1.5347, "step": 6787 }, { "epoch": 0.6284094196609384, "grad_norm": 0.15234375, "learning_rate": 0.02, "loss": 1.6074, "step": 6788 }, { "epoch": 0.6285019961812185, "grad_norm": 0.1572265625, "learning_rate": 0.02, "loss": 1.5224, "step": 6789 }, { "epoch": 0.6285945727014985, "grad_norm": 0.1552734375, "learning_rate": 0.02, "loss": 1.566, "step": 6790 }, { "epoch": 0.6286871492217786, "grad_norm": 0.154296875, "learning_rate": 0.02, "loss": 1.4844, "step": 6791 }, { "epoch": 0.6287797257420586, "grad_norm": 0.142578125, "learning_rate": 0.02, "loss": 1.5236, "step": 6792 }, { "epoch": 0.6288723022623387, "grad_norm": 0.154296875, "learning_rate": 0.02, "loss": 1.5125, "step": 6793 }, { "epoch": 0.6289648787826188, "grad_norm": 0.138671875, "learning_rate": 0.02, "loss": 1.5127, "step": 6794 }, { "epoch": 0.6290574553028988, "grad_norm": 0.150390625, "learning_rate": 0.02, "loss": 1.5138, "step": 6795 }, { "epoch": 0.6291500318231789, "grad_norm": 0.13671875, "learning_rate": 0.02, "loss": 1.4401, "step": 6796 }, { "epoch": 0.6292426083434589, "grad_norm": 0.1533203125, "learning_rate": 0.02, "loss": 1.5157, "step": 6797 }, { "epoch": 0.629335184863739, "grad_norm": 0.140625, "learning_rate": 0.02, "loss": 1.5402, "step": 6798 }, { "epoch": 0.629427761384019, "grad_norm": 0.146484375, "learning_rate": 0.02, "loss": 1.5397, "step": 6799 }, { "epoch": 0.6295203379042991, "grad_norm": 0.1513671875, "learning_rate": 0.02, "loss": 1.634, "step": 6800 }, { "epoch": 0.6296129144245791, "grad_norm": 0.150390625, "learning_rate": 0.02, "loss": 1.5347, "step": 6801 }, { "epoch": 0.6297054909448591, "grad_norm": 0.1494140625, "learning_rate": 0.02, "loss": 1.5449, "step": 6802 }, { "epoch": 0.6297980674651391, "grad_norm": 0.1591796875, "learning_rate": 0.02, "loss": 1.5431, "step": 6803 }, { "epoch": 0.6298906439854192, "grad_norm": 0.154296875, "learning_rate": 0.02, "loss": 1.5944, "step": 6804 }, { "epoch": 0.6299832205056992, "grad_norm": 0.1533203125, "learning_rate": 0.02, "loss": 1.5464, "step": 6805 }, { "epoch": 0.6300757970259793, "grad_norm": 0.1552734375, "learning_rate": 0.02, "loss": 1.5359, "step": 6806 }, { "epoch": 0.6301683735462593, "grad_norm": 0.1533203125, "learning_rate": 0.02, "loss": 1.5277, "step": 6807 }, { "epoch": 0.6302609500665394, "grad_norm": 0.1611328125, "learning_rate": 0.02, "loss": 1.5286, "step": 6808 }, { "epoch": 0.6303535265868194, "grad_norm": 0.138671875, "learning_rate": 0.02, "loss": 1.5568, "step": 6809 }, { "epoch": 0.6304461031070995, "grad_norm": 0.15234375, "learning_rate": 0.02, "loss": 1.4955, "step": 6810 }, { "epoch": 0.6305386796273795, "grad_norm": 0.14453125, "learning_rate": 0.02, "loss": 1.6033, "step": 6811 }, { "epoch": 0.6306312561476596, "grad_norm": 0.150390625, "learning_rate": 0.02, "loss": 1.5297, "step": 6812 }, { "epoch": 0.6307238326679396, "grad_norm": 0.1455078125, "learning_rate": 0.02, "loss": 1.5824, "step": 6813 }, { "epoch": 0.6308164091882197, "grad_norm": 0.1474609375, "learning_rate": 0.02, "loss": 1.4772, "step": 6814 }, { "epoch": 0.6309089857084997, "grad_norm": 0.134765625, "learning_rate": 0.02, "loss": 1.4861, "step": 6815 }, { "epoch": 0.6310015622287797, "grad_norm": 0.1455078125, "learning_rate": 0.02, "loss": 1.5484, "step": 6816 }, { "epoch": 0.6310941387490597, "grad_norm": 0.1513671875, "learning_rate": 0.02, "loss": 1.5725, "step": 6817 }, { "epoch": 0.6311867152693398, "grad_norm": 0.1591796875, "learning_rate": 0.02, "loss": 1.5253, "step": 6818 }, { "epoch": 0.6312792917896198, "grad_norm": 0.140625, "learning_rate": 0.02, "loss": 1.5673, "step": 6819 }, { "epoch": 0.6313718683098999, "grad_norm": 0.146484375, "learning_rate": 0.02, "loss": 1.518, "step": 6820 }, { "epoch": 0.6314644448301799, "grad_norm": 0.14453125, "learning_rate": 0.02, "loss": 1.5634, "step": 6821 }, { "epoch": 0.63155702135046, "grad_norm": 0.1435546875, "learning_rate": 0.02, "loss": 1.5053, "step": 6822 }, { "epoch": 0.63164959787074, "grad_norm": 0.1435546875, "learning_rate": 0.02, "loss": 1.6209, "step": 6823 }, { "epoch": 0.6317421743910201, "grad_norm": 0.1494140625, "learning_rate": 0.02, "loss": 1.5405, "step": 6824 }, { "epoch": 0.6318347509113001, "grad_norm": 0.1611328125, "learning_rate": 0.02, "loss": 1.5051, "step": 6825 }, { "epoch": 0.6319273274315802, "grad_norm": 0.1494140625, "learning_rate": 0.02, "loss": 1.6131, "step": 6826 }, { "epoch": 0.6320199039518603, "grad_norm": 0.1484375, "learning_rate": 0.02, "loss": 1.5352, "step": 6827 }, { "epoch": 0.6321124804721403, "grad_norm": 0.146484375, "learning_rate": 0.02, "loss": 1.5225, "step": 6828 }, { "epoch": 0.6322050569924202, "grad_norm": 0.158203125, "learning_rate": 0.02, "loss": 1.5132, "step": 6829 }, { "epoch": 0.6322976335127003, "grad_norm": 0.1396484375, "learning_rate": 0.02, "loss": 1.5845, "step": 6830 }, { "epoch": 0.6323902100329803, "grad_norm": 0.154296875, "learning_rate": 0.02, "loss": 1.5778, "step": 6831 }, { "epoch": 0.6324827865532604, "grad_norm": 0.14453125, "learning_rate": 0.02, "loss": 1.5354, "step": 6832 }, { "epoch": 0.6325753630735405, "grad_norm": 0.1552734375, "learning_rate": 0.02, "loss": 1.5108, "step": 6833 }, { "epoch": 0.6326679395938205, "grad_norm": 0.15234375, "learning_rate": 0.02, "loss": 1.5193, "step": 6834 }, { "epoch": 0.6327605161141006, "grad_norm": 0.1591796875, "learning_rate": 0.02, "loss": 1.5823, "step": 6835 }, { "epoch": 0.6328530926343806, "grad_norm": 0.1474609375, "learning_rate": 0.02, "loss": 1.4716, "step": 6836 }, { "epoch": 0.6329456691546607, "grad_norm": 0.15234375, "learning_rate": 0.02, "loss": 1.531, "step": 6837 }, { "epoch": 0.6330382456749407, "grad_norm": 0.13671875, "learning_rate": 0.02, "loss": 1.5429, "step": 6838 }, { "epoch": 0.6331308221952208, "grad_norm": 0.15234375, "learning_rate": 0.02, "loss": 1.5171, "step": 6839 }, { "epoch": 0.6332233987155008, "grad_norm": 0.158203125, "learning_rate": 0.02, "loss": 1.4949, "step": 6840 }, { "epoch": 0.6333159752357809, "grad_norm": 0.14453125, "learning_rate": 0.02, "loss": 1.5696, "step": 6841 }, { "epoch": 0.6334085517560609, "grad_norm": 0.1474609375, "learning_rate": 0.02, "loss": 1.5396, "step": 6842 }, { "epoch": 0.6335011282763409, "grad_norm": 0.14453125, "learning_rate": 0.02, "loss": 1.5314, "step": 6843 }, { "epoch": 0.6335937047966209, "grad_norm": 0.16015625, "learning_rate": 0.02, "loss": 1.6416, "step": 6844 }, { "epoch": 0.633686281316901, "grad_norm": 0.1572265625, "learning_rate": 0.02, "loss": 1.5146, "step": 6845 }, { "epoch": 0.633778857837181, "grad_norm": 0.1572265625, "learning_rate": 0.02, "loss": 1.5109, "step": 6846 }, { "epoch": 0.6338714343574611, "grad_norm": 0.1435546875, "learning_rate": 0.02, "loss": 1.5465, "step": 6847 }, { "epoch": 0.6339640108777411, "grad_norm": 0.1494140625, "learning_rate": 0.02, "loss": 1.4715, "step": 6848 }, { "epoch": 0.6340565873980212, "grad_norm": 0.146484375, "learning_rate": 0.02, "loss": 1.5156, "step": 6849 }, { "epoch": 0.6341491639183012, "grad_norm": 0.1552734375, "learning_rate": 0.02, "loss": 1.5612, "step": 6850 }, { "epoch": 0.6342417404385813, "grad_norm": 0.154296875, "learning_rate": 0.02, "loss": 1.5648, "step": 6851 }, { "epoch": 0.6343343169588613, "grad_norm": 0.1455078125, "learning_rate": 0.02, "loss": 1.5548, "step": 6852 }, { "epoch": 0.6344268934791414, "grad_norm": 0.14453125, "learning_rate": 0.02, "loss": 1.5017, "step": 6853 }, { "epoch": 0.6345194699994214, "grad_norm": 0.1455078125, "learning_rate": 0.02, "loss": 1.5028, "step": 6854 }, { "epoch": 0.6346120465197015, "grad_norm": 0.1513671875, "learning_rate": 0.02, "loss": 1.5272, "step": 6855 }, { "epoch": 0.6347046230399814, "grad_norm": 0.158203125, "learning_rate": 0.02, "loss": 1.5115, "step": 6856 }, { "epoch": 0.6347971995602615, "grad_norm": 0.1533203125, "learning_rate": 0.02, "loss": 1.5362, "step": 6857 }, { "epoch": 0.6348897760805415, "grad_norm": 0.1533203125, "learning_rate": 0.02, "loss": 1.5531, "step": 6858 }, { "epoch": 0.6349823526008216, "grad_norm": 0.140625, "learning_rate": 0.02, "loss": 1.5135, "step": 6859 }, { "epoch": 0.6350749291211016, "grad_norm": 0.1455078125, "learning_rate": 0.02, "loss": 1.5148, "step": 6860 }, { "epoch": 0.6351675056413817, "grad_norm": 0.142578125, "learning_rate": 0.02, "loss": 1.5144, "step": 6861 }, { "epoch": 0.6352600821616617, "grad_norm": 0.1474609375, "learning_rate": 0.02, "loss": 1.5273, "step": 6862 }, { "epoch": 0.6353526586819418, "grad_norm": 0.142578125, "learning_rate": 0.02, "loss": 1.5307, "step": 6863 }, { "epoch": 0.6354452352022218, "grad_norm": 0.150390625, "learning_rate": 0.02, "loss": 1.5497, "step": 6864 }, { "epoch": 0.6355378117225019, "grad_norm": 0.140625, "learning_rate": 0.02, "loss": 1.5532, "step": 6865 }, { "epoch": 0.635630388242782, "grad_norm": 0.15234375, "learning_rate": 0.02, "loss": 1.5032, "step": 6866 }, { "epoch": 0.635722964763062, "grad_norm": 0.1435546875, "learning_rate": 0.02, "loss": 1.484, "step": 6867 }, { "epoch": 0.6358155412833421, "grad_norm": 0.1513671875, "learning_rate": 0.02, "loss": 1.5252, "step": 6868 }, { "epoch": 0.6359081178036221, "grad_norm": 0.140625, "learning_rate": 0.02, "loss": 1.6132, "step": 6869 }, { "epoch": 0.636000694323902, "grad_norm": 0.1611328125, "learning_rate": 0.02, "loss": 1.5779, "step": 6870 }, { "epoch": 0.6360932708441821, "grad_norm": 0.150390625, "learning_rate": 0.02, "loss": 1.5395, "step": 6871 }, { "epoch": 0.6361858473644622, "grad_norm": 0.1611328125, "learning_rate": 0.02, "loss": 1.4735, "step": 6872 }, { "epoch": 0.6362784238847422, "grad_norm": 0.1494140625, "learning_rate": 0.02, "loss": 1.6134, "step": 6873 }, { "epoch": 0.6363710004050223, "grad_norm": 0.1669921875, "learning_rate": 0.02, "loss": 1.5436, "step": 6874 }, { "epoch": 0.6364635769253023, "grad_norm": 0.1455078125, "learning_rate": 0.02, "loss": 1.4479, "step": 6875 }, { "epoch": 0.6365561534455824, "grad_norm": 0.154296875, "learning_rate": 0.02, "loss": 1.5501, "step": 6876 }, { "epoch": 0.6366487299658624, "grad_norm": 0.1533203125, "learning_rate": 0.02, "loss": 1.5602, "step": 6877 }, { "epoch": 0.6367413064861425, "grad_norm": 0.16015625, "learning_rate": 0.02, "loss": 1.5308, "step": 6878 }, { "epoch": 0.6368338830064225, "grad_norm": 0.1513671875, "learning_rate": 0.02, "loss": 1.5616, "step": 6879 }, { "epoch": 0.6369264595267026, "grad_norm": 0.1591796875, "learning_rate": 0.02, "loss": 1.4953, "step": 6880 }, { "epoch": 0.6370190360469826, "grad_norm": 0.1494140625, "learning_rate": 0.02, "loss": 1.5021, "step": 6881 }, { "epoch": 0.6371116125672627, "grad_norm": 0.1552734375, "learning_rate": 0.02, "loss": 1.5274, "step": 6882 }, { "epoch": 0.6372041890875427, "grad_norm": 0.1416015625, "learning_rate": 0.02, "loss": 1.5295, "step": 6883 }, { "epoch": 0.6372967656078227, "grad_norm": 0.1494140625, "learning_rate": 0.02, "loss": 1.5061, "step": 6884 }, { "epoch": 0.6373893421281027, "grad_norm": 0.1484375, "learning_rate": 0.02, "loss": 1.5102, "step": 6885 }, { "epoch": 0.6374819186483828, "grad_norm": 0.1513671875, "learning_rate": 0.02, "loss": 1.5372, "step": 6886 }, { "epoch": 0.6375744951686628, "grad_norm": 0.15234375, "learning_rate": 0.02, "loss": 1.5279, "step": 6887 }, { "epoch": 0.6376670716889429, "grad_norm": 0.1455078125, "learning_rate": 0.02, "loss": 1.5596, "step": 6888 }, { "epoch": 0.6377596482092229, "grad_norm": 0.1474609375, "learning_rate": 0.02, "loss": 1.5539, "step": 6889 }, { "epoch": 0.637852224729503, "grad_norm": 0.1669921875, "learning_rate": 0.02, "loss": 1.5243, "step": 6890 }, { "epoch": 0.637944801249783, "grad_norm": 0.1611328125, "learning_rate": 0.02, "loss": 1.559, "step": 6891 }, { "epoch": 0.6380373777700631, "grad_norm": 0.1474609375, "learning_rate": 0.02, "loss": 1.4824, "step": 6892 }, { "epoch": 0.6381299542903431, "grad_norm": 0.1533203125, "learning_rate": 0.02, "loss": 1.5057, "step": 6893 }, { "epoch": 0.6382225308106232, "grad_norm": 0.1435546875, "learning_rate": 0.02, "loss": 1.5187, "step": 6894 }, { "epoch": 0.6383151073309032, "grad_norm": 0.1494140625, "learning_rate": 0.02, "loss": 1.5176, "step": 6895 }, { "epoch": 0.6384076838511833, "grad_norm": 0.150390625, "learning_rate": 0.02, "loss": 1.5905, "step": 6896 }, { "epoch": 0.6385002603714632, "grad_norm": 0.1455078125, "learning_rate": 0.02, "loss": 1.5079, "step": 6897 }, { "epoch": 0.6385928368917433, "grad_norm": 0.1484375, "learning_rate": 0.02, "loss": 1.5887, "step": 6898 }, { "epoch": 0.6386854134120233, "grad_norm": 0.1533203125, "learning_rate": 0.02, "loss": 1.5667, "step": 6899 }, { "epoch": 0.6387779899323034, "grad_norm": 0.1533203125, "learning_rate": 0.02, "loss": 1.4797, "step": 6900 }, { "epoch": 0.6388705664525834, "grad_norm": 0.1376953125, "learning_rate": 0.02, "loss": 1.5768, "step": 6901 }, { "epoch": 0.6389631429728635, "grad_norm": 0.1513671875, "learning_rate": 0.02, "loss": 1.5428, "step": 6902 }, { "epoch": 0.6390557194931435, "grad_norm": 0.1533203125, "learning_rate": 0.02, "loss": 1.5882, "step": 6903 }, { "epoch": 0.6391482960134236, "grad_norm": 0.150390625, "learning_rate": 0.02, "loss": 1.5279, "step": 6904 }, { "epoch": 0.6392408725337037, "grad_norm": 0.146484375, "learning_rate": 0.02, "loss": 1.5541, "step": 6905 }, { "epoch": 0.6393334490539837, "grad_norm": 0.158203125, "learning_rate": 0.02, "loss": 1.5721, "step": 6906 }, { "epoch": 0.6394260255742638, "grad_norm": 0.1552734375, "learning_rate": 0.02, "loss": 1.4988, "step": 6907 }, { "epoch": 0.6395186020945438, "grad_norm": 0.1640625, "learning_rate": 0.02, "loss": 1.5483, "step": 6908 }, { "epoch": 0.6396111786148239, "grad_norm": 0.15234375, "learning_rate": 0.02, "loss": 1.5715, "step": 6909 }, { "epoch": 0.6397037551351039, "grad_norm": 0.15234375, "learning_rate": 0.02, "loss": 1.6095, "step": 6910 }, { "epoch": 0.6397963316553839, "grad_norm": 0.146484375, "learning_rate": 0.02, "loss": 1.5479, "step": 6911 }, { "epoch": 0.6398889081756639, "grad_norm": 0.1552734375, "learning_rate": 0.02, "loss": 1.5332, "step": 6912 }, { "epoch": 0.639981484695944, "grad_norm": 0.1484375, "learning_rate": 0.02, "loss": 1.5467, "step": 6913 }, { "epoch": 0.640074061216224, "grad_norm": 0.1484375, "learning_rate": 0.02, "loss": 1.5142, "step": 6914 }, { "epoch": 0.6401666377365041, "grad_norm": 0.158203125, "learning_rate": 0.02, "loss": 1.4879, "step": 6915 }, { "epoch": 0.6402592142567841, "grad_norm": 0.154296875, "learning_rate": 0.02, "loss": 1.517, "step": 6916 }, { "epoch": 0.6403517907770642, "grad_norm": 0.1474609375, "learning_rate": 0.02, "loss": 1.5169, "step": 6917 }, { "epoch": 0.6404443672973442, "grad_norm": 0.1611328125, "learning_rate": 0.02, "loss": 1.5689, "step": 6918 }, { "epoch": 0.6405369438176243, "grad_norm": 0.1513671875, "learning_rate": 0.02, "loss": 1.5503, "step": 6919 }, { "epoch": 0.6406295203379043, "grad_norm": 0.1591796875, "learning_rate": 0.02, "loss": 1.5423, "step": 6920 }, { "epoch": 0.6407220968581844, "grad_norm": 0.134765625, "learning_rate": 0.02, "loss": 1.5662, "step": 6921 }, { "epoch": 0.6408146733784644, "grad_norm": 0.15234375, "learning_rate": 0.02, "loss": 1.5408, "step": 6922 }, { "epoch": 0.6409072498987445, "grad_norm": 0.1474609375, "learning_rate": 0.02, "loss": 1.5496, "step": 6923 }, { "epoch": 0.6409998264190244, "grad_norm": 0.15234375, "learning_rate": 0.02, "loss": 1.4823, "step": 6924 }, { "epoch": 0.6410924029393045, "grad_norm": 0.1416015625, "learning_rate": 0.02, "loss": 1.4633, "step": 6925 }, { "epoch": 0.6411849794595845, "grad_norm": 0.146484375, "learning_rate": 0.02, "loss": 1.5403, "step": 6926 }, { "epoch": 0.6412775559798646, "grad_norm": 0.1474609375, "learning_rate": 0.02, "loss": 1.5125, "step": 6927 }, { "epoch": 0.6413701325001446, "grad_norm": 0.1494140625, "learning_rate": 0.02, "loss": 1.5201, "step": 6928 }, { "epoch": 0.6414627090204247, "grad_norm": 0.12890625, "learning_rate": 0.02, "loss": 1.4283, "step": 6929 }, { "epoch": 0.6415552855407047, "grad_norm": 0.1435546875, "learning_rate": 0.02, "loss": 1.5329, "step": 6930 }, { "epoch": 0.6416478620609848, "grad_norm": 0.142578125, "learning_rate": 0.02, "loss": 1.5394, "step": 6931 }, { "epoch": 0.6417404385812648, "grad_norm": 0.150390625, "learning_rate": 0.02, "loss": 1.5008, "step": 6932 }, { "epoch": 0.6418330151015449, "grad_norm": 0.1416015625, "learning_rate": 0.02, "loss": 1.5371, "step": 6933 }, { "epoch": 0.6419255916218249, "grad_norm": 0.1435546875, "learning_rate": 0.02, "loss": 1.565, "step": 6934 }, { "epoch": 0.642018168142105, "grad_norm": 0.140625, "learning_rate": 0.02, "loss": 1.5065, "step": 6935 }, { "epoch": 0.642110744662385, "grad_norm": 0.1533203125, "learning_rate": 0.02, "loss": 1.5359, "step": 6936 }, { "epoch": 0.6422033211826651, "grad_norm": 0.142578125, "learning_rate": 0.02, "loss": 1.5331, "step": 6937 }, { "epoch": 0.642295897702945, "grad_norm": 0.154296875, "learning_rate": 0.02, "loss": 1.5694, "step": 6938 }, { "epoch": 0.6423884742232251, "grad_norm": 0.1494140625, "learning_rate": 0.02, "loss": 1.5365, "step": 6939 }, { "epoch": 0.6424810507435051, "grad_norm": 0.1591796875, "learning_rate": 0.02, "loss": 1.5305, "step": 6940 }, { "epoch": 0.6425736272637852, "grad_norm": 0.142578125, "learning_rate": 0.02, "loss": 1.5389, "step": 6941 }, { "epoch": 0.6426662037840652, "grad_norm": 0.1552734375, "learning_rate": 0.02, "loss": 1.4914, "step": 6942 }, { "epoch": 0.6427587803043453, "grad_norm": 0.1435546875, "learning_rate": 0.02, "loss": 1.5213, "step": 6943 }, { "epoch": 0.6428513568246254, "grad_norm": 0.154296875, "learning_rate": 0.02, "loss": 1.5706, "step": 6944 }, { "epoch": 0.6429439333449054, "grad_norm": 0.1494140625, "learning_rate": 0.02, "loss": 1.5141, "step": 6945 }, { "epoch": 0.6430365098651855, "grad_norm": 0.1513671875, "learning_rate": 0.02, "loss": 1.4908, "step": 6946 }, { "epoch": 0.6431290863854655, "grad_norm": 0.14453125, "learning_rate": 0.02, "loss": 1.5179, "step": 6947 }, { "epoch": 0.6432216629057456, "grad_norm": 0.154296875, "learning_rate": 0.02, "loss": 1.509, "step": 6948 }, { "epoch": 0.6433142394260256, "grad_norm": 0.140625, "learning_rate": 0.02, "loss": 1.5446, "step": 6949 }, { "epoch": 0.6434068159463057, "grad_norm": 0.1572265625, "learning_rate": 0.02, "loss": 1.5493, "step": 6950 }, { "epoch": 0.6434993924665856, "grad_norm": 0.1513671875, "learning_rate": 0.02, "loss": 1.5626, "step": 6951 }, { "epoch": 0.6435919689868657, "grad_norm": 0.1552734375, "learning_rate": 0.02, "loss": 1.5388, "step": 6952 }, { "epoch": 0.6436845455071457, "grad_norm": 0.140625, "learning_rate": 0.02, "loss": 1.5022, "step": 6953 }, { "epoch": 0.6437771220274258, "grad_norm": 0.1552734375, "learning_rate": 0.02, "loss": 1.532, "step": 6954 }, { "epoch": 0.6438696985477058, "grad_norm": 0.146484375, "learning_rate": 0.02, "loss": 1.5086, "step": 6955 }, { "epoch": 0.6439622750679859, "grad_norm": 0.1435546875, "learning_rate": 0.02, "loss": 1.5256, "step": 6956 }, { "epoch": 0.6440548515882659, "grad_norm": 0.1357421875, "learning_rate": 0.02, "loss": 1.545, "step": 6957 }, { "epoch": 0.644147428108546, "grad_norm": 0.140625, "learning_rate": 0.02, "loss": 1.5534, "step": 6958 }, { "epoch": 0.644240004628826, "grad_norm": 0.142578125, "learning_rate": 0.02, "loss": 1.5293, "step": 6959 }, { "epoch": 0.6443325811491061, "grad_norm": 0.142578125, "learning_rate": 0.02, "loss": 1.4989, "step": 6960 }, { "epoch": 0.6444251576693861, "grad_norm": 0.1513671875, "learning_rate": 0.02, "loss": 1.5141, "step": 6961 }, { "epoch": 0.6445177341896662, "grad_norm": 0.1572265625, "learning_rate": 0.02, "loss": 1.5295, "step": 6962 }, { "epoch": 0.6446103107099462, "grad_norm": 0.150390625, "learning_rate": 0.02, "loss": 1.4915, "step": 6963 }, { "epoch": 0.6447028872302263, "grad_norm": 0.140625, "learning_rate": 0.02, "loss": 1.537, "step": 6964 }, { "epoch": 0.6447954637505062, "grad_norm": 0.1552734375, "learning_rate": 0.02, "loss": 1.6043, "step": 6965 }, { "epoch": 0.6448880402707863, "grad_norm": 0.1455078125, "learning_rate": 0.02, "loss": 1.513, "step": 6966 }, { "epoch": 0.6449806167910663, "grad_norm": 0.154296875, "learning_rate": 0.02, "loss": 1.513, "step": 6967 }, { "epoch": 0.6450731933113464, "grad_norm": 0.1455078125, "learning_rate": 0.02, "loss": 1.5594, "step": 6968 }, { "epoch": 0.6451657698316264, "grad_norm": 0.1552734375, "learning_rate": 0.02, "loss": 1.5712, "step": 6969 }, { "epoch": 0.6452583463519065, "grad_norm": 0.15625, "learning_rate": 0.02, "loss": 1.5551, "step": 6970 }, { "epoch": 0.6453509228721865, "grad_norm": 0.1591796875, "learning_rate": 0.02, "loss": 1.5477, "step": 6971 }, { "epoch": 0.6454434993924666, "grad_norm": 0.134765625, "learning_rate": 0.02, "loss": 1.4643, "step": 6972 }, { "epoch": 0.6455360759127466, "grad_norm": 0.1552734375, "learning_rate": 0.02, "loss": 1.5483, "step": 6973 }, { "epoch": 0.6456286524330267, "grad_norm": 0.138671875, "learning_rate": 0.02, "loss": 1.4795, "step": 6974 }, { "epoch": 0.6457212289533067, "grad_norm": 0.1474609375, "learning_rate": 0.02, "loss": 1.5788, "step": 6975 }, { "epoch": 0.6458138054735868, "grad_norm": 0.1455078125, "learning_rate": 0.02, "loss": 1.5272, "step": 6976 }, { "epoch": 0.6459063819938669, "grad_norm": 0.146484375, "learning_rate": 0.02, "loss": 1.5188, "step": 6977 }, { "epoch": 0.6459989585141469, "grad_norm": 0.1396484375, "learning_rate": 0.02, "loss": 1.5237, "step": 6978 }, { "epoch": 0.6460915350344268, "grad_norm": 0.15625, "learning_rate": 0.02, "loss": 1.5556, "step": 6979 }, { "epoch": 0.6461841115547069, "grad_norm": 0.15625, "learning_rate": 0.02, "loss": 1.6078, "step": 6980 }, { "epoch": 0.646276688074987, "grad_norm": 0.146484375, "learning_rate": 0.02, "loss": 1.5068, "step": 6981 }, { "epoch": 0.646369264595267, "grad_norm": 0.150390625, "learning_rate": 0.02, "loss": 1.5594, "step": 6982 }, { "epoch": 0.646461841115547, "grad_norm": 0.1376953125, "learning_rate": 0.02, "loss": 1.5739, "step": 6983 }, { "epoch": 0.6465544176358271, "grad_norm": 0.1474609375, "learning_rate": 0.02, "loss": 1.5648, "step": 6984 }, { "epoch": 0.6466469941561072, "grad_norm": 0.1484375, "learning_rate": 0.02, "loss": 1.5095, "step": 6985 }, { "epoch": 0.6467395706763872, "grad_norm": 0.150390625, "learning_rate": 0.02, "loss": 1.5381, "step": 6986 }, { "epoch": 0.6468321471966673, "grad_norm": 0.162109375, "learning_rate": 0.02, "loss": 1.592, "step": 6987 }, { "epoch": 0.6469247237169473, "grad_norm": 0.1533203125, "learning_rate": 0.02, "loss": 1.5577, "step": 6988 }, { "epoch": 0.6470173002372274, "grad_norm": 0.158203125, "learning_rate": 0.02, "loss": 1.5469, "step": 6989 }, { "epoch": 0.6471098767575074, "grad_norm": 0.1416015625, "learning_rate": 0.02, "loss": 1.5236, "step": 6990 }, { "epoch": 0.6472024532777875, "grad_norm": 0.1474609375, "learning_rate": 0.02, "loss": 1.496, "step": 6991 }, { "epoch": 0.6472950297980674, "grad_norm": 0.1435546875, "learning_rate": 0.02, "loss": 1.5682, "step": 6992 }, { "epoch": 0.6473876063183475, "grad_norm": 0.1591796875, "learning_rate": 0.02, "loss": 1.5051, "step": 6993 }, { "epoch": 0.6474801828386275, "grad_norm": 0.15234375, "learning_rate": 0.02, "loss": 1.4882, "step": 6994 }, { "epoch": 0.6475727593589076, "grad_norm": 0.1591796875, "learning_rate": 0.02, "loss": 1.5555, "step": 6995 }, { "epoch": 0.6476653358791876, "grad_norm": 0.158203125, "learning_rate": 0.02, "loss": 1.5144, "step": 6996 }, { "epoch": 0.6477579123994677, "grad_norm": 0.16015625, "learning_rate": 0.02, "loss": 1.5738, "step": 6997 }, { "epoch": 0.6478504889197477, "grad_norm": 0.15625, "learning_rate": 0.02, "loss": 1.5481, "step": 6998 }, { "epoch": 0.6479430654400278, "grad_norm": 0.15234375, "learning_rate": 0.02, "loss": 1.5341, "step": 6999 }, { "epoch": 0.6480356419603078, "grad_norm": 0.1611328125, "learning_rate": 0.02, "loss": 1.57, "step": 7000 }, { "epoch": 0.6481282184805879, "grad_norm": 0.1611328125, "learning_rate": 0.02, "loss": 1.5522, "step": 7001 }, { "epoch": 0.6482207950008679, "grad_norm": 0.142578125, "learning_rate": 0.02, "loss": 1.4615, "step": 7002 }, { "epoch": 0.648313371521148, "grad_norm": 0.1376953125, "learning_rate": 0.02, "loss": 1.4901, "step": 7003 }, { "epoch": 0.648405948041428, "grad_norm": 0.142578125, "learning_rate": 0.02, "loss": 1.5291, "step": 7004 }, { "epoch": 0.6484985245617081, "grad_norm": 0.1455078125, "learning_rate": 0.02, "loss": 1.5162, "step": 7005 }, { "epoch": 0.648591101081988, "grad_norm": 0.1328125, "learning_rate": 0.02, "loss": 1.55, "step": 7006 }, { "epoch": 0.6486836776022681, "grad_norm": 0.13671875, "learning_rate": 0.02, "loss": 1.503, "step": 7007 }, { "epoch": 0.6487762541225481, "grad_norm": 0.14453125, "learning_rate": 0.02, "loss": 1.5728, "step": 7008 }, { "epoch": 0.6488688306428282, "grad_norm": 0.142578125, "learning_rate": 0.02, "loss": 1.5504, "step": 7009 }, { "epoch": 0.6489614071631082, "grad_norm": 0.1416015625, "learning_rate": 0.02, "loss": 1.5112, "step": 7010 }, { "epoch": 0.6490539836833883, "grad_norm": 0.1484375, "learning_rate": 0.02, "loss": 1.498, "step": 7011 }, { "epoch": 0.6491465602036683, "grad_norm": 0.1533203125, "learning_rate": 0.02, "loss": 1.5504, "step": 7012 }, { "epoch": 0.6492391367239484, "grad_norm": 0.1484375, "learning_rate": 0.02, "loss": 1.5146, "step": 7013 }, { "epoch": 0.6493317132442284, "grad_norm": 0.134765625, "learning_rate": 0.02, "loss": 1.5434, "step": 7014 }, { "epoch": 0.6494242897645085, "grad_norm": 0.142578125, "learning_rate": 0.02, "loss": 1.6207, "step": 7015 }, { "epoch": 0.6495168662847886, "grad_norm": 0.1474609375, "learning_rate": 0.02, "loss": 1.5758, "step": 7016 }, { "epoch": 0.6496094428050686, "grad_norm": 0.140625, "learning_rate": 0.02, "loss": 1.5382, "step": 7017 }, { "epoch": 0.6497020193253487, "grad_norm": 0.142578125, "learning_rate": 0.02, "loss": 1.51, "step": 7018 }, { "epoch": 0.6497945958456286, "grad_norm": 0.1474609375, "learning_rate": 0.02, "loss": 1.5833, "step": 7019 }, { "epoch": 0.6498871723659086, "grad_norm": 0.13671875, "learning_rate": 0.02, "loss": 1.5017, "step": 7020 }, { "epoch": 0.6499797488861887, "grad_norm": 0.14453125, "learning_rate": 0.02, "loss": 1.6002, "step": 7021 }, { "epoch": 0.6500723254064688, "grad_norm": 0.158203125, "learning_rate": 0.02, "loss": 1.5729, "step": 7022 }, { "epoch": 0.6501649019267488, "grad_norm": 0.1611328125, "learning_rate": 0.02, "loss": 1.5552, "step": 7023 }, { "epoch": 0.6502574784470289, "grad_norm": 0.1533203125, "learning_rate": 0.02, "loss": 1.5883, "step": 7024 }, { "epoch": 0.6503500549673089, "grad_norm": 0.16015625, "learning_rate": 0.02, "loss": 1.5193, "step": 7025 }, { "epoch": 0.650442631487589, "grad_norm": 0.1640625, "learning_rate": 0.02, "loss": 1.5085, "step": 7026 }, { "epoch": 0.650535208007869, "grad_norm": 0.146484375, "learning_rate": 0.02, "loss": 1.5219, "step": 7027 }, { "epoch": 0.6506277845281491, "grad_norm": 0.1455078125, "learning_rate": 0.02, "loss": 1.5924, "step": 7028 }, { "epoch": 0.6507203610484291, "grad_norm": 0.162109375, "learning_rate": 0.02, "loss": 1.5353, "step": 7029 }, { "epoch": 0.6508129375687092, "grad_norm": 0.15625, "learning_rate": 0.02, "loss": 1.498, "step": 7030 }, { "epoch": 0.6509055140889892, "grad_norm": 0.1435546875, "learning_rate": 0.02, "loss": 1.5127, "step": 7031 }, { "epoch": 0.6509980906092693, "grad_norm": 0.1552734375, "learning_rate": 0.02, "loss": 1.5929, "step": 7032 }, { "epoch": 0.6510906671295492, "grad_norm": 0.146484375, "learning_rate": 0.02, "loss": 1.5752, "step": 7033 }, { "epoch": 0.6511832436498293, "grad_norm": 0.1533203125, "learning_rate": 0.02, "loss": 1.5528, "step": 7034 }, { "epoch": 0.6512758201701093, "grad_norm": 0.1484375, "learning_rate": 0.02, "loss": 1.5595, "step": 7035 }, { "epoch": 0.6513683966903894, "grad_norm": 0.14453125, "learning_rate": 0.02, "loss": 1.5401, "step": 7036 }, { "epoch": 0.6514609732106694, "grad_norm": 0.1572265625, "learning_rate": 0.02, "loss": 1.6001, "step": 7037 }, { "epoch": 0.6515535497309495, "grad_norm": 0.1494140625, "learning_rate": 0.02, "loss": 1.5089, "step": 7038 }, { "epoch": 0.6516461262512295, "grad_norm": 0.1474609375, "learning_rate": 0.02, "loss": 1.5124, "step": 7039 }, { "epoch": 0.6517387027715096, "grad_norm": 0.1435546875, "learning_rate": 0.02, "loss": 1.5675, "step": 7040 }, { "epoch": 0.6518312792917896, "grad_norm": 0.15625, "learning_rate": 0.02, "loss": 1.4882, "step": 7041 }, { "epoch": 0.6519238558120697, "grad_norm": 0.14453125, "learning_rate": 0.02, "loss": 1.5613, "step": 7042 }, { "epoch": 0.6520164323323497, "grad_norm": 0.1494140625, "learning_rate": 0.02, "loss": 1.5192, "step": 7043 }, { "epoch": 0.6521090088526298, "grad_norm": 0.1474609375, "learning_rate": 0.02, "loss": 1.5245, "step": 7044 }, { "epoch": 0.6522015853729098, "grad_norm": 0.1533203125, "learning_rate": 0.02, "loss": 1.5628, "step": 7045 }, { "epoch": 0.6522941618931899, "grad_norm": 0.1357421875, "learning_rate": 0.02, "loss": 1.5165, "step": 7046 }, { "epoch": 0.6523867384134698, "grad_norm": 0.1640625, "learning_rate": 0.02, "loss": 1.4986, "step": 7047 }, { "epoch": 0.6524793149337499, "grad_norm": 0.1376953125, "learning_rate": 0.02, "loss": 1.5836, "step": 7048 }, { "epoch": 0.6525718914540299, "grad_norm": 0.1513671875, "learning_rate": 0.02, "loss": 1.5569, "step": 7049 }, { "epoch": 0.65266446797431, "grad_norm": 0.13671875, "learning_rate": 0.02, "loss": 1.5142, "step": 7050 }, { "epoch": 0.65275704449459, "grad_norm": 0.1435546875, "learning_rate": 0.02, "loss": 1.6124, "step": 7051 }, { "epoch": 0.6528496210148701, "grad_norm": 0.1357421875, "learning_rate": 0.02, "loss": 1.4804, "step": 7052 }, { "epoch": 0.6529421975351501, "grad_norm": 0.1455078125, "learning_rate": 0.02, "loss": 1.534, "step": 7053 }, { "epoch": 0.6530347740554302, "grad_norm": 0.1376953125, "learning_rate": 0.02, "loss": 1.5432, "step": 7054 }, { "epoch": 0.6531273505757103, "grad_norm": 0.1455078125, "learning_rate": 0.02, "loss": 1.4559, "step": 7055 }, { "epoch": 0.6532199270959903, "grad_norm": 0.14453125, "learning_rate": 0.02, "loss": 1.5517, "step": 7056 }, { "epoch": 0.6533125036162704, "grad_norm": 0.150390625, "learning_rate": 0.02, "loss": 1.5942, "step": 7057 }, { "epoch": 0.6534050801365504, "grad_norm": 0.14453125, "learning_rate": 0.02, "loss": 1.4725, "step": 7058 }, { "epoch": 0.6534976566568305, "grad_norm": 0.1533203125, "learning_rate": 0.02, "loss": 1.5225, "step": 7059 }, { "epoch": 0.6535902331771104, "grad_norm": 0.1513671875, "learning_rate": 0.02, "loss": 1.5129, "step": 7060 }, { "epoch": 0.6536828096973905, "grad_norm": 0.15234375, "learning_rate": 0.02, "loss": 1.5477, "step": 7061 }, { "epoch": 0.6537753862176705, "grad_norm": 0.1533203125, "learning_rate": 0.02, "loss": 1.4667, "step": 7062 }, { "epoch": 0.6538679627379506, "grad_norm": 0.150390625, "learning_rate": 0.02, "loss": 1.5612, "step": 7063 }, { "epoch": 0.6539605392582306, "grad_norm": 0.1630859375, "learning_rate": 0.02, "loss": 1.524, "step": 7064 }, { "epoch": 0.6540531157785107, "grad_norm": 0.1630859375, "learning_rate": 0.02, "loss": 1.5737, "step": 7065 }, { "epoch": 0.6541456922987907, "grad_norm": 0.16015625, "learning_rate": 0.02, "loss": 1.5636, "step": 7066 }, { "epoch": 0.6542382688190708, "grad_norm": 0.1435546875, "learning_rate": 0.02, "loss": 1.538, "step": 7067 }, { "epoch": 0.6543308453393508, "grad_norm": 0.1591796875, "learning_rate": 0.02, "loss": 1.4911, "step": 7068 }, { "epoch": 0.6544234218596309, "grad_norm": 0.146484375, "learning_rate": 0.02, "loss": 1.5825, "step": 7069 }, { "epoch": 0.6545159983799109, "grad_norm": 0.1474609375, "learning_rate": 0.02, "loss": 1.489, "step": 7070 }, { "epoch": 0.654608574900191, "grad_norm": 0.13671875, "learning_rate": 0.02, "loss": 1.4667, "step": 7071 }, { "epoch": 0.654701151420471, "grad_norm": 0.1474609375, "learning_rate": 0.02, "loss": 1.5188, "step": 7072 }, { "epoch": 0.6547937279407511, "grad_norm": 0.15234375, "learning_rate": 0.02, "loss": 1.4967, "step": 7073 }, { "epoch": 0.654886304461031, "grad_norm": 0.154296875, "learning_rate": 0.02, "loss": 1.502, "step": 7074 }, { "epoch": 0.6549788809813111, "grad_norm": 0.1416015625, "learning_rate": 0.02, "loss": 1.538, "step": 7075 }, { "epoch": 0.6550714575015911, "grad_norm": 0.154296875, "learning_rate": 0.02, "loss": 1.5159, "step": 7076 }, { "epoch": 0.6551640340218712, "grad_norm": 0.14453125, "learning_rate": 0.02, "loss": 1.5382, "step": 7077 }, { "epoch": 0.6552566105421512, "grad_norm": 0.1455078125, "learning_rate": 0.02, "loss": 1.5602, "step": 7078 }, { "epoch": 0.6553491870624313, "grad_norm": 0.1572265625, "learning_rate": 0.02, "loss": 1.4543, "step": 7079 }, { "epoch": 0.6554417635827113, "grad_norm": 0.1572265625, "learning_rate": 0.02, "loss": 1.543, "step": 7080 }, { "epoch": 0.6555343401029914, "grad_norm": 0.1533203125, "learning_rate": 0.02, "loss": 1.5178, "step": 7081 }, { "epoch": 0.6556269166232714, "grad_norm": 0.162109375, "learning_rate": 0.02, "loss": 1.5675, "step": 7082 }, { "epoch": 0.6557194931435515, "grad_norm": 0.1455078125, "learning_rate": 0.02, "loss": 1.5709, "step": 7083 }, { "epoch": 0.6558120696638315, "grad_norm": 0.15625, "learning_rate": 0.02, "loss": 1.5332, "step": 7084 }, { "epoch": 0.6559046461841116, "grad_norm": 0.1455078125, "learning_rate": 0.02, "loss": 1.6065, "step": 7085 }, { "epoch": 0.6559972227043916, "grad_norm": 0.13671875, "learning_rate": 0.02, "loss": 1.5203, "step": 7086 }, { "epoch": 0.6560897992246716, "grad_norm": 0.140625, "learning_rate": 0.02, "loss": 1.5032, "step": 7087 }, { "epoch": 0.6561823757449516, "grad_norm": 0.1474609375, "learning_rate": 0.02, "loss": 1.5191, "step": 7088 }, { "epoch": 0.6562749522652317, "grad_norm": 0.1572265625, "learning_rate": 0.02, "loss": 1.5865, "step": 7089 }, { "epoch": 0.6563675287855117, "grad_norm": 0.1572265625, "learning_rate": 0.02, "loss": 1.5691, "step": 7090 }, { "epoch": 0.6564601053057918, "grad_norm": 0.1484375, "learning_rate": 0.02, "loss": 1.5632, "step": 7091 }, { "epoch": 0.6565526818260718, "grad_norm": 0.1435546875, "learning_rate": 0.02, "loss": 1.5852, "step": 7092 }, { "epoch": 0.6566452583463519, "grad_norm": 0.1484375, "learning_rate": 0.02, "loss": 1.5442, "step": 7093 }, { "epoch": 0.656737834866632, "grad_norm": 0.1513671875, "learning_rate": 0.02, "loss": 1.5066, "step": 7094 }, { "epoch": 0.656830411386912, "grad_norm": 0.16015625, "learning_rate": 0.02, "loss": 1.5269, "step": 7095 }, { "epoch": 0.6569229879071921, "grad_norm": 0.1484375, "learning_rate": 0.02, "loss": 1.5695, "step": 7096 }, { "epoch": 0.6570155644274721, "grad_norm": 0.1474609375, "learning_rate": 0.02, "loss": 1.5025, "step": 7097 }, { "epoch": 0.6571081409477522, "grad_norm": 0.1513671875, "learning_rate": 0.02, "loss": 1.5173, "step": 7098 }, { "epoch": 0.6572007174680322, "grad_norm": 0.1455078125, "learning_rate": 0.02, "loss": 1.5416, "step": 7099 }, { "epoch": 0.6572932939883123, "grad_norm": 0.1357421875, "learning_rate": 0.02, "loss": 1.513, "step": 7100 }, { "epoch": 0.6573858705085922, "grad_norm": 0.15234375, "learning_rate": 0.02, "loss": 1.5878, "step": 7101 }, { "epoch": 0.6574784470288723, "grad_norm": 0.14453125, "learning_rate": 0.02, "loss": 1.5321, "step": 7102 }, { "epoch": 0.6575710235491523, "grad_norm": 0.1552734375, "learning_rate": 0.02, "loss": 1.4953, "step": 7103 }, { "epoch": 0.6576636000694324, "grad_norm": 0.1474609375, "learning_rate": 0.02, "loss": 1.5587, "step": 7104 }, { "epoch": 0.6577561765897124, "grad_norm": 0.158203125, "learning_rate": 0.02, "loss": 1.5255, "step": 7105 }, { "epoch": 0.6578487531099925, "grad_norm": 0.142578125, "learning_rate": 0.02, "loss": 1.5729, "step": 7106 }, { "epoch": 0.6579413296302725, "grad_norm": 0.14453125, "learning_rate": 0.02, "loss": 1.4722, "step": 7107 }, { "epoch": 0.6580339061505526, "grad_norm": 0.1396484375, "learning_rate": 0.02, "loss": 1.5141, "step": 7108 }, { "epoch": 0.6581264826708326, "grad_norm": 0.15234375, "learning_rate": 0.02, "loss": 1.53, "step": 7109 }, { "epoch": 0.6582190591911127, "grad_norm": 0.14453125, "learning_rate": 0.02, "loss": 1.5049, "step": 7110 }, { "epoch": 0.6583116357113927, "grad_norm": 0.1552734375, "learning_rate": 0.02, "loss": 1.5695, "step": 7111 }, { "epoch": 0.6584042122316728, "grad_norm": 0.1474609375, "learning_rate": 0.02, "loss": 1.5702, "step": 7112 }, { "epoch": 0.6584967887519528, "grad_norm": 0.1474609375, "learning_rate": 0.02, "loss": 1.5384, "step": 7113 }, { "epoch": 0.6585893652722329, "grad_norm": 0.13671875, "learning_rate": 0.02, "loss": 1.5777, "step": 7114 }, { "epoch": 0.6586819417925128, "grad_norm": 0.1484375, "learning_rate": 0.02, "loss": 1.5142, "step": 7115 }, { "epoch": 0.6587745183127929, "grad_norm": 0.146484375, "learning_rate": 0.02, "loss": 1.5431, "step": 7116 }, { "epoch": 0.6588670948330729, "grad_norm": 0.1455078125, "learning_rate": 0.02, "loss": 1.4982, "step": 7117 }, { "epoch": 0.658959671353353, "grad_norm": 0.1474609375, "learning_rate": 0.02, "loss": 1.5792, "step": 7118 }, { "epoch": 0.659052247873633, "grad_norm": 0.1484375, "learning_rate": 0.02, "loss": 1.5506, "step": 7119 }, { "epoch": 0.6591448243939131, "grad_norm": 0.1552734375, "learning_rate": 0.02, "loss": 1.5503, "step": 7120 }, { "epoch": 0.6592374009141931, "grad_norm": 0.1474609375, "learning_rate": 0.02, "loss": 1.5406, "step": 7121 }, { "epoch": 0.6593299774344732, "grad_norm": 0.1435546875, "learning_rate": 0.02, "loss": 1.5276, "step": 7122 }, { "epoch": 0.6594225539547532, "grad_norm": 0.142578125, "learning_rate": 0.02, "loss": 1.5232, "step": 7123 }, { "epoch": 0.6595151304750333, "grad_norm": 0.1552734375, "learning_rate": 0.02, "loss": 1.562, "step": 7124 }, { "epoch": 0.6596077069953133, "grad_norm": 0.14453125, "learning_rate": 0.02, "loss": 1.5392, "step": 7125 }, { "epoch": 0.6597002835155934, "grad_norm": 0.150390625, "learning_rate": 0.02, "loss": 1.5357, "step": 7126 }, { "epoch": 0.6597928600358735, "grad_norm": 0.1474609375, "learning_rate": 0.02, "loss": 1.5559, "step": 7127 }, { "epoch": 0.6598854365561534, "grad_norm": 0.162109375, "learning_rate": 0.02, "loss": 1.4738, "step": 7128 }, { "epoch": 0.6599780130764334, "grad_norm": 0.1416015625, "learning_rate": 0.02, "loss": 1.5174, "step": 7129 }, { "epoch": 0.6600705895967135, "grad_norm": 0.1513671875, "learning_rate": 0.02, "loss": 1.4874, "step": 7130 }, { "epoch": 0.6601631661169935, "grad_norm": 0.13671875, "learning_rate": 0.02, "loss": 1.5331, "step": 7131 }, { "epoch": 0.6602557426372736, "grad_norm": 0.146484375, "learning_rate": 0.02, "loss": 1.5166, "step": 7132 }, { "epoch": 0.6603483191575537, "grad_norm": 0.154296875, "learning_rate": 0.02, "loss": 1.5238, "step": 7133 }, { "epoch": 0.6604408956778337, "grad_norm": 0.16015625, "learning_rate": 0.02, "loss": 1.5368, "step": 7134 }, { "epoch": 0.6605334721981138, "grad_norm": 0.1484375, "learning_rate": 0.02, "loss": 1.4943, "step": 7135 }, { "epoch": 0.6606260487183938, "grad_norm": 0.1591796875, "learning_rate": 0.02, "loss": 1.5634, "step": 7136 }, { "epoch": 0.6607186252386739, "grad_norm": 0.14453125, "learning_rate": 0.02, "loss": 1.5243, "step": 7137 }, { "epoch": 0.6608112017589539, "grad_norm": 0.158203125, "learning_rate": 0.02, "loss": 1.4649, "step": 7138 }, { "epoch": 0.660903778279234, "grad_norm": 0.1376953125, "learning_rate": 0.02, "loss": 1.5548, "step": 7139 }, { "epoch": 0.660996354799514, "grad_norm": 0.15234375, "learning_rate": 0.02, "loss": 1.546, "step": 7140 }, { "epoch": 0.6610889313197941, "grad_norm": 0.142578125, "learning_rate": 0.02, "loss": 1.5231, "step": 7141 }, { "epoch": 0.661181507840074, "grad_norm": 0.154296875, "learning_rate": 0.02, "loss": 1.4466, "step": 7142 }, { "epoch": 0.6612740843603541, "grad_norm": 0.1416015625, "learning_rate": 0.02, "loss": 1.6078, "step": 7143 }, { "epoch": 0.6613666608806341, "grad_norm": 0.1513671875, "learning_rate": 0.02, "loss": 1.5178, "step": 7144 }, { "epoch": 0.6614592374009142, "grad_norm": 0.138671875, "learning_rate": 0.02, "loss": 1.5519, "step": 7145 }, { "epoch": 0.6615518139211942, "grad_norm": 0.1533203125, "learning_rate": 0.02, "loss": 1.5726, "step": 7146 }, { "epoch": 0.6616443904414743, "grad_norm": 0.14453125, "learning_rate": 0.02, "loss": 1.5071, "step": 7147 }, { "epoch": 0.6617369669617543, "grad_norm": 0.15625, "learning_rate": 0.02, "loss": 1.5506, "step": 7148 }, { "epoch": 0.6618295434820344, "grad_norm": 0.15625, "learning_rate": 0.02, "loss": 1.5557, "step": 7149 }, { "epoch": 0.6619221200023144, "grad_norm": 0.1640625, "learning_rate": 0.02, "loss": 1.598, "step": 7150 }, { "epoch": 0.6620146965225945, "grad_norm": 0.150390625, "learning_rate": 0.02, "loss": 1.5964, "step": 7151 }, { "epoch": 0.6621072730428745, "grad_norm": 0.166015625, "learning_rate": 0.02, "loss": 1.5408, "step": 7152 }, { "epoch": 0.6621998495631546, "grad_norm": 0.1416015625, "learning_rate": 0.02, "loss": 1.5969, "step": 7153 }, { "epoch": 0.6622924260834346, "grad_norm": 0.166015625, "learning_rate": 0.02, "loss": 1.5948, "step": 7154 }, { "epoch": 0.6623850026037146, "grad_norm": 0.162109375, "learning_rate": 0.02, "loss": 1.5563, "step": 7155 }, { "epoch": 0.6624775791239946, "grad_norm": 0.166015625, "learning_rate": 0.02, "loss": 1.5378, "step": 7156 }, { "epoch": 0.6625701556442747, "grad_norm": 0.1787109375, "learning_rate": 0.02, "loss": 1.5333, "step": 7157 }, { "epoch": 0.6626627321645547, "grad_norm": 0.1533203125, "learning_rate": 0.02, "loss": 1.5619, "step": 7158 }, { "epoch": 0.6627553086848348, "grad_norm": 0.1494140625, "learning_rate": 0.02, "loss": 1.5354, "step": 7159 }, { "epoch": 0.6628478852051148, "grad_norm": 0.1552734375, "learning_rate": 0.02, "loss": 1.5298, "step": 7160 }, { "epoch": 0.6629404617253949, "grad_norm": 0.1552734375, "learning_rate": 0.02, "loss": 1.5279, "step": 7161 }, { "epoch": 0.6630330382456749, "grad_norm": 0.1552734375, "learning_rate": 0.02, "loss": 1.5337, "step": 7162 }, { "epoch": 0.663125614765955, "grad_norm": 0.1572265625, "learning_rate": 0.02, "loss": 1.5593, "step": 7163 }, { "epoch": 0.663218191286235, "grad_norm": 0.15625, "learning_rate": 0.02, "loss": 1.5441, "step": 7164 }, { "epoch": 0.6633107678065151, "grad_norm": 0.162109375, "learning_rate": 0.02, "loss": 1.6139, "step": 7165 }, { "epoch": 0.6634033443267952, "grad_norm": 0.1591796875, "learning_rate": 0.02, "loss": 1.4896, "step": 7166 }, { "epoch": 0.6634959208470752, "grad_norm": 0.158203125, "learning_rate": 0.02, "loss": 1.6001, "step": 7167 }, { "epoch": 0.6635884973673553, "grad_norm": 0.1474609375, "learning_rate": 0.02, "loss": 1.5397, "step": 7168 }, { "epoch": 0.6636810738876352, "grad_norm": 0.1552734375, "learning_rate": 0.02, "loss": 1.5601, "step": 7169 }, { "epoch": 0.6637736504079152, "grad_norm": 0.1513671875, "learning_rate": 0.02, "loss": 1.5898, "step": 7170 }, { "epoch": 0.6638662269281953, "grad_norm": 0.1611328125, "learning_rate": 0.02, "loss": 1.5099, "step": 7171 }, { "epoch": 0.6639588034484754, "grad_norm": 0.1552734375, "learning_rate": 0.02, "loss": 1.5629, "step": 7172 }, { "epoch": 0.6640513799687554, "grad_norm": 0.1484375, "learning_rate": 0.02, "loss": 1.4783, "step": 7173 }, { "epoch": 0.6641439564890355, "grad_norm": 0.138671875, "learning_rate": 0.02, "loss": 1.4947, "step": 7174 }, { "epoch": 0.6642365330093155, "grad_norm": 0.1474609375, "learning_rate": 0.02, "loss": 1.5053, "step": 7175 }, { "epoch": 0.6643291095295956, "grad_norm": 0.142578125, "learning_rate": 0.02, "loss": 1.5149, "step": 7176 }, { "epoch": 0.6644216860498756, "grad_norm": 0.1435546875, "learning_rate": 0.02, "loss": 1.5256, "step": 7177 }, { "epoch": 0.6645142625701557, "grad_norm": 0.14453125, "learning_rate": 0.02, "loss": 1.5664, "step": 7178 }, { "epoch": 0.6646068390904357, "grad_norm": 0.1552734375, "learning_rate": 0.02, "loss": 1.5148, "step": 7179 }, { "epoch": 0.6646994156107158, "grad_norm": 0.15234375, "learning_rate": 0.02, "loss": 1.5602, "step": 7180 }, { "epoch": 0.6647919921309958, "grad_norm": 0.146484375, "learning_rate": 0.02, "loss": 1.5878, "step": 7181 }, { "epoch": 0.6648845686512758, "grad_norm": 0.150390625, "learning_rate": 0.02, "loss": 1.5171, "step": 7182 }, { "epoch": 0.6649771451715558, "grad_norm": 0.140625, "learning_rate": 0.02, "loss": 1.5691, "step": 7183 }, { "epoch": 0.6650697216918359, "grad_norm": 0.1474609375, "learning_rate": 0.02, "loss": 1.5745, "step": 7184 }, { "epoch": 0.6651622982121159, "grad_norm": 0.1484375, "learning_rate": 0.02, "loss": 1.4759, "step": 7185 }, { "epoch": 0.665254874732396, "grad_norm": 0.1494140625, "learning_rate": 0.02, "loss": 1.5892, "step": 7186 }, { "epoch": 0.665347451252676, "grad_norm": 0.1435546875, "learning_rate": 0.02, "loss": 1.5543, "step": 7187 }, { "epoch": 0.6654400277729561, "grad_norm": 0.154296875, "learning_rate": 0.02, "loss": 1.502, "step": 7188 }, { "epoch": 0.6655326042932361, "grad_norm": 0.1513671875, "learning_rate": 0.02, "loss": 1.474, "step": 7189 }, { "epoch": 0.6656251808135162, "grad_norm": 0.146484375, "learning_rate": 0.02, "loss": 1.5121, "step": 7190 }, { "epoch": 0.6657177573337962, "grad_norm": 0.14453125, "learning_rate": 0.02, "loss": 1.4957, "step": 7191 }, { "epoch": 0.6658103338540763, "grad_norm": 0.140625, "learning_rate": 0.02, "loss": 1.4405, "step": 7192 }, { "epoch": 0.6659029103743563, "grad_norm": 0.1416015625, "learning_rate": 0.02, "loss": 1.5286, "step": 7193 }, { "epoch": 0.6659954868946364, "grad_norm": 0.146484375, "learning_rate": 0.02, "loss": 1.5325, "step": 7194 }, { "epoch": 0.6660880634149164, "grad_norm": 0.1591796875, "learning_rate": 0.02, "loss": 1.5783, "step": 7195 }, { "epoch": 0.6661806399351964, "grad_norm": 0.150390625, "learning_rate": 0.02, "loss": 1.5453, "step": 7196 }, { "epoch": 0.6662732164554764, "grad_norm": 0.150390625, "learning_rate": 0.02, "loss": 1.5387, "step": 7197 }, { "epoch": 0.6663657929757565, "grad_norm": 0.142578125, "learning_rate": 0.02, "loss": 1.571, "step": 7198 }, { "epoch": 0.6664583694960365, "grad_norm": 0.158203125, "learning_rate": 0.02, "loss": 1.5366, "step": 7199 }, { "epoch": 0.6665509460163166, "grad_norm": 0.1455078125, "learning_rate": 0.02, "loss": 1.5379, "step": 7200 }, { "epoch": 0.6666435225365966, "grad_norm": 0.1474609375, "learning_rate": 0.02, "loss": 1.4347, "step": 7201 }, { "epoch": 0.6667360990568767, "grad_norm": 0.1455078125, "learning_rate": 0.02, "loss": 1.5078, "step": 7202 }, { "epoch": 0.6668286755771567, "grad_norm": 0.1591796875, "learning_rate": 0.02, "loss": 1.4877, "step": 7203 }, { "epoch": 0.6669212520974368, "grad_norm": 0.154296875, "learning_rate": 0.02, "loss": 1.577, "step": 7204 }, { "epoch": 0.6670138286177169, "grad_norm": 0.1533203125, "learning_rate": 0.02, "loss": 1.5395, "step": 7205 }, { "epoch": 0.6671064051379969, "grad_norm": 0.154296875, "learning_rate": 0.02, "loss": 1.5582, "step": 7206 }, { "epoch": 0.667198981658277, "grad_norm": 0.162109375, "learning_rate": 0.02, "loss": 1.6177, "step": 7207 }, { "epoch": 0.667291558178557, "grad_norm": 0.1572265625, "learning_rate": 0.02, "loss": 1.5635, "step": 7208 }, { "epoch": 0.6673841346988371, "grad_norm": 0.138671875, "learning_rate": 0.02, "loss": 1.4763, "step": 7209 }, { "epoch": 0.667476711219117, "grad_norm": 0.1640625, "learning_rate": 0.02, "loss": 1.5029, "step": 7210 }, { "epoch": 0.667569287739397, "grad_norm": 0.1533203125, "learning_rate": 0.02, "loss": 1.5508, "step": 7211 }, { "epoch": 0.6676618642596771, "grad_norm": 0.16015625, "learning_rate": 0.02, "loss": 1.5336, "step": 7212 }, { "epoch": 0.6677544407799572, "grad_norm": 0.14453125, "learning_rate": 0.02, "loss": 1.4986, "step": 7213 }, { "epoch": 0.6678470173002372, "grad_norm": 0.162109375, "learning_rate": 0.02, "loss": 1.5035, "step": 7214 }, { "epoch": 0.6679395938205173, "grad_norm": 0.1513671875, "learning_rate": 0.02, "loss": 1.56, "step": 7215 }, { "epoch": 0.6680321703407973, "grad_norm": 0.15234375, "learning_rate": 0.02, "loss": 1.5499, "step": 7216 }, { "epoch": 0.6681247468610774, "grad_norm": 0.140625, "learning_rate": 0.02, "loss": 1.6123, "step": 7217 }, { "epoch": 0.6682173233813574, "grad_norm": 0.158203125, "learning_rate": 0.02, "loss": 1.5434, "step": 7218 }, { "epoch": 0.6683098999016375, "grad_norm": 0.1455078125, "learning_rate": 0.02, "loss": 1.4776, "step": 7219 }, { "epoch": 0.6684024764219175, "grad_norm": 0.1533203125, "learning_rate": 0.02, "loss": 1.524, "step": 7220 }, { "epoch": 0.6684950529421976, "grad_norm": 0.15625, "learning_rate": 0.02, "loss": 1.525, "step": 7221 }, { "epoch": 0.6685876294624776, "grad_norm": 0.15625, "learning_rate": 0.02, "loss": 1.5257, "step": 7222 }, { "epoch": 0.6686802059827576, "grad_norm": 0.1513671875, "learning_rate": 0.02, "loss": 1.5541, "step": 7223 }, { "epoch": 0.6687727825030376, "grad_norm": 0.15625, "learning_rate": 0.02, "loss": 1.5456, "step": 7224 }, { "epoch": 0.6688653590233177, "grad_norm": 0.1591796875, "learning_rate": 0.02, "loss": 1.4785, "step": 7225 }, { "epoch": 0.6689579355435977, "grad_norm": 0.15625, "learning_rate": 0.02, "loss": 1.5146, "step": 7226 }, { "epoch": 0.6690505120638778, "grad_norm": 0.142578125, "learning_rate": 0.02, "loss": 1.4814, "step": 7227 }, { "epoch": 0.6691430885841578, "grad_norm": 0.15234375, "learning_rate": 0.02, "loss": 1.4842, "step": 7228 }, { "epoch": 0.6692356651044379, "grad_norm": 0.146484375, "learning_rate": 0.02, "loss": 1.4826, "step": 7229 }, { "epoch": 0.6693282416247179, "grad_norm": 0.140625, "learning_rate": 0.02, "loss": 1.5381, "step": 7230 }, { "epoch": 0.669420818144998, "grad_norm": 0.154296875, "learning_rate": 0.02, "loss": 1.6165, "step": 7231 }, { "epoch": 0.669513394665278, "grad_norm": 0.15234375, "learning_rate": 0.02, "loss": 1.4714, "step": 7232 }, { "epoch": 0.6696059711855581, "grad_norm": 0.1494140625, "learning_rate": 0.02, "loss": 1.5282, "step": 7233 }, { "epoch": 0.6696985477058381, "grad_norm": 0.1435546875, "learning_rate": 0.02, "loss": 1.6161, "step": 7234 }, { "epoch": 0.6697911242261182, "grad_norm": 0.1494140625, "learning_rate": 0.02, "loss": 1.498, "step": 7235 }, { "epoch": 0.6698837007463982, "grad_norm": 0.1474609375, "learning_rate": 0.02, "loss": 1.465, "step": 7236 }, { "epoch": 0.6699762772666782, "grad_norm": 0.1513671875, "learning_rate": 0.02, "loss": 1.5868, "step": 7237 }, { "epoch": 0.6700688537869582, "grad_norm": 0.1484375, "learning_rate": 0.02, "loss": 1.5231, "step": 7238 }, { "epoch": 0.6701614303072383, "grad_norm": 0.1513671875, "learning_rate": 0.02, "loss": 1.5378, "step": 7239 }, { "epoch": 0.6702540068275183, "grad_norm": 0.1435546875, "learning_rate": 0.02, "loss": 1.5046, "step": 7240 }, { "epoch": 0.6703465833477984, "grad_norm": 0.1494140625, "learning_rate": 0.02, "loss": 1.5005, "step": 7241 }, { "epoch": 0.6704391598680784, "grad_norm": 0.1416015625, "learning_rate": 0.02, "loss": 1.4253, "step": 7242 }, { "epoch": 0.6705317363883585, "grad_norm": 0.142578125, "learning_rate": 0.02, "loss": 1.4826, "step": 7243 }, { "epoch": 0.6706243129086386, "grad_norm": 0.142578125, "learning_rate": 0.02, "loss": 1.4633, "step": 7244 }, { "epoch": 0.6707168894289186, "grad_norm": 0.1416015625, "learning_rate": 0.02, "loss": 1.4599, "step": 7245 }, { "epoch": 0.6708094659491987, "grad_norm": 0.1513671875, "learning_rate": 0.02, "loss": 1.5515, "step": 7246 }, { "epoch": 0.6709020424694787, "grad_norm": 0.14453125, "learning_rate": 0.02, "loss": 1.5312, "step": 7247 }, { "epoch": 0.6709946189897588, "grad_norm": 0.140625, "learning_rate": 0.02, "loss": 1.488, "step": 7248 }, { "epoch": 0.6710871955100388, "grad_norm": 0.1484375, "learning_rate": 0.02, "loss": 1.5267, "step": 7249 }, { "epoch": 0.6711797720303188, "grad_norm": 0.1455078125, "learning_rate": 0.02, "loss": 1.4875, "step": 7250 }, { "epoch": 0.6712723485505988, "grad_norm": 0.14453125, "learning_rate": 0.02, "loss": 1.5055, "step": 7251 }, { "epoch": 0.6713649250708789, "grad_norm": 0.1416015625, "learning_rate": 0.02, "loss": 1.5251, "step": 7252 }, { "epoch": 0.6714575015911589, "grad_norm": 0.1591796875, "learning_rate": 0.02, "loss": 1.4931, "step": 7253 }, { "epoch": 0.671550078111439, "grad_norm": 0.154296875, "learning_rate": 0.02, "loss": 1.5863, "step": 7254 }, { "epoch": 0.671642654631719, "grad_norm": 0.1611328125, "learning_rate": 0.02, "loss": 1.5178, "step": 7255 }, { "epoch": 0.6717352311519991, "grad_norm": 0.142578125, "learning_rate": 0.02, "loss": 1.5436, "step": 7256 }, { "epoch": 0.6718278076722791, "grad_norm": 0.1484375, "learning_rate": 0.02, "loss": 1.4701, "step": 7257 }, { "epoch": 0.6719203841925592, "grad_norm": 0.1494140625, "learning_rate": 0.02, "loss": 1.5031, "step": 7258 }, { "epoch": 0.6720129607128392, "grad_norm": 0.150390625, "learning_rate": 0.02, "loss": 1.5146, "step": 7259 }, { "epoch": 0.6721055372331193, "grad_norm": 0.154296875, "learning_rate": 0.02, "loss": 1.5435, "step": 7260 }, { "epoch": 0.6721981137533993, "grad_norm": 0.15234375, "learning_rate": 0.02, "loss": 1.4942, "step": 7261 }, { "epoch": 0.6722906902736794, "grad_norm": 0.1484375, "learning_rate": 0.02, "loss": 1.4986, "step": 7262 }, { "epoch": 0.6723832667939594, "grad_norm": 0.1484375, "learning_rate": 0.02, "loss": 1.4925, "step": 7263 }, { "epoch": 0.6724758433142394, "grad_norm": 0.150390625, "learning_rate": 0.02, "loss": 1.5009, "step": 7264 }, { "epoch": 0.6725684198345194, "grad_norm": 0.154296875, "learning_rate": 0.02, "loss": 1.5492, "step": 7265 }, { "epoch": 0.6726609963547995, "grad_norm": 0.1552734375, "learning_rate": 0.02, "loss": 1.529, "step": 7266 }, { "epoch": 0.6727535728750795, "grad_norm": 0.154296875, "learning_rate": 0.02, "loss": 1.4179, "step": 7267 }, { "epoch": 0.6728461493953596, "grad_norm": 0.1611328125, "learning_rate": 0.02, "loss": 1.4933, "step": 7268 }, { "epoch": 0.6729387259156396, "grad_norm": 0.1435546875, "learning_rate": 0.02, "loss": 1.5538, "step": 7269 }, { "epoch": 0.6730313024359197, "grad_norm": 0.1572265625, "learning_rate": 0.02, "loss": 1.5521, "step": 7270 }, { "epoch": 0.6731238789561997, "grad_norm": 0.14453125, "learning_rate": 0.02, "loss": 1.5315, "step": 7271 }, { "epoch": 0.6732164554764798, "grad_norm": 0.1455078125, "learning_rate": 0.02, "loss": 1.5084, "step": 7272 }, { "epoch": 0.6733090319967598, "grad_norm": 0.1435546875, "learning_rate": 0.02, "loss": 1.5228, "step": 7273 }, { "epoch": 0.6734016085170399, "grad_norm": 0.1513671875, "learning_rate": 0.02, "loss": 1.5578, "step": 7274 }, { "epoch": 0.67349418503732, "grad_norm": 0.150390625, "learning_rate": 0.02, "loss": 1.4632, "step": 7275 }, { "epoch": 0.6735867615576, "grad_norm": 0.154296875, "learning_rate": 0.02, "loss": 1.5971, "step": 7276 }, { "epoch": 0.67367933807788, "grad_norm": 0.150390625, "learning_rate": 0.02, "loss": 1.5007, "step": 7277 }, { "epoch": 0.67377191459816, "grad_norm": 0.162109375, "learning_rate": 0.02, "loss": 1.5587, "step": 7278 }, { "epoch": 0.67386449111844, "grad_norm": 0.1513671875, "learning_rate": 0.02, "loss": 1.4848, "step": 7279 }, { "epoch": 0.6739570676387201, "grad_norm": 0.1494140625, "learning_rate": 0.02, "loss": 1.4997, "step": 7280 }, { "epoch": 0.6740496441590001, "grad_norm": 0.158203125, "learning_rate": 0.02, "loss": 1.5154, "step": 7281 }, { "epoch": 0.6741422206792802, "grad_norm": 0.15625, "learning_rate": 0.02, "loss": 1.5275, "step": 7282 }, { "epoch": 0.6742347971995603, "grad_norm": 0.1513671875, "learning_rate": 0.02, "loss": 1.5133, "step": 7283 }, { "epoch": 0.6743273737198403, "grad_norm": 0.162109375, "learning_rate": 0.02, "loss": 1.502, "step": 7284 }, { "epoch": 0.6744199502401204, "grad_norm": 0.1484375, "learning_rate": 0.02, "loss": 1.5601, "step": 7285 }, { "epoch": 0.6745125267604004, "grad_norm": 0.1533203125, "learning_rate": 0.02, "loss": 1.5227, "step": 7286 }, { "epoch": 0.6746051032806805, "grad_norm": 0.1416015625, "learning_rate": 0.02, "loss": 1.4801, "step": 7287 }, { "epoch": 0.6746976798009605, "grad_norm": 0.1484375, "learning_rate": 0.02, "loss": 1.5126, "step": 7288 }, { "epoch": 0.6747902563212406, "grad_norm": 0.1474609375, "learning_rate": 0.02, "loss": 1.5353, "step": 7289 }, { "epoch": 0.6748828328415206, "grad_norm": 0.142578125, "learning_rate": 0.02, "loss": 1.5646, "step": 7290 }, { "epoch": 0.6749754093618006, "grad_norm": 0.1435546875, "learning_rate": 0.02, "loss": 1.4658, "step": 7291 }, { "epoch": 0.6750679858820806, "grad_norm": 0.15234375, "learning_rate": 0.02, "loss": 1.5118, "step": 7292 }, { "epoch": 0.6751605624023607, "grad_norm": 0.146484375, "learning_rate": 0.02, "loss": 1.596, "step": 7293 }, { "epoch": 0.6752531389226407, "grad_norm": 0.142578125, "learning_rate": 0.02, "loss": 1.5276, "step": 7294 }, { "epoch": 0.6753457154429208, "grad_norm": 0.150390625, "learning_rate": 0.02, "loss": 1.5659, "step": 7295 }, { "epoch": 0.6754382919632008, "grad_norm": 0.15234375, "learning_rate": 0.02, "loss": 1.5002, "step": 7296 }, { "epoch": 0.6755308684834809, "grad_norm": 0.1572265625, "learning_rate": 0.02, "loss": 1.5878, "step": 7297 }, { "epoch": 0.6756234450037609, "grad_norm": 0.142578125, "learning_rate": 0.02, "loss": 1.5248, "step": 7298 }, { "epoch": 0.675716021524041, "grad_norm": 0.1552734375, "learning_rate": 0.02, "loss": 1.4757, "step": 7299 }, { "epoch": 0.675808598044321, "grad_norm": 0.140625, "learning_rate": 0.02, "loss": 1.477, "step": 7300 }, { "epoch": 0.6759011745646011, "grad_norm": 0.150390625, "learning_rate": 0.02, "loss": 1.5331, "step": 7301 }, { "epoch": 0.6759937510848811, "grad_norm": 0.146484375, "learning_rate": 0.02, "loss": 1.4789, "step": 7302 }, { "epoch": 0.6760863276051612, "grad_norm": 0.1533203125, "learning_rate": 0.02, "loss": 1.5093, "step": 7303 }, { "epoch": 0.6761789041254412, "grad_norm": 0.1435546875, "learning_rate": 0.02, "loss": 1.5175, "step": 7304 }, { "epoch": 0.6762714806457212, "grad_norm": 0.150390625, "learning_rate": 0.02, "loss": 1.4642, "step": 7305 }, { "epoch": 0.6763640571660012, "grad_norm": 0.15234375, "learning_rate": 0.02, "loss": 1.5742, "step": 7306 }, { "epoch": 0.6764566336862813, "grad_norm": 0.15625, "learning_rate": 0.02, "loss": 1.6081, "step": 7307 }, { "epoch": 0.6765492102065613, "grad_norm": 0.1513671875, "learning_rate": 0.02, "loss": 1.5655, "step": 7308 }, { "epoch": 0.6766417867268414, "grad_norm": 0.1572265625, "learning_rate": 0.02, "loss": 1.5124, "step": 7309 }, { "epoch": 0.6767343632471214, "grad_norm": 0.1533203125, "learning_rate": 0.02, "loss": 1.4942, "step": 7310 }, { "epoch": 0.6768269397674015, "grad_norm": 0.1533203125, "learning_rate": 0.02, "loss": 1.5616, "step": 7311 }, { "epoch": 0.6769195162876815, "grad_norm": 0.15625, "learning_rate": 0.02, "loss": 1.5361, "step": 7312 }, { "epoch": 0.6770120928079616, "grad_norm": 0.1484375, "learning_rate": 0.02, "loss": 1.5343, "step": 7313 }, { "epoch": 0.6771046693282416, "grad_norm": 0.1513671875, "learning_rate": 0.02, "loss": 1.5188, "step": 7314 }, { "epoch": 0.6771972458485217, "grad_norm": 0.14453125, "learning_rate": 0.02, "loss": 1.56, "step": 7315 }, { "epoch": 0.6772898223688018, "grad_norm": 0.1650390625, "learning_rate": 0.02, "loss": 1.5331, "step": 7316 }, { "epoch": 0.6773823988890818, "grad_norm": 0.1416015625, "learning_rate": 0.02, "loss": 1.5047, "step": 7317 }, { "epoch": 0.6774749754093617, "grad_norm": 0.1474609375, "learning_rate": 0.02, "loss": 1.4803, "step": 7318 }, { "epoch": 0.6775675519296418, "grad_norm": 0.150390625, "learning_rate": 0.02, "loss": 1.5665, "step": 7319 }, { "epoch": 0.6776601284499219, "grad_norm": 0.1572265625, "learning_rate": 0.02, "loss": 1.5727, "step": 7320 }, { "epoch": 0.6777527049702019, "grad_norm": 0.1533203125, "learning_rate": 0.02, "loss": 1.5347, "step": 7321 }, { "epoch": 0.677845281490482, "grad_norm": 0.15625, "learning_rate": 0.02, "loss": 1.4872, "step": 7322 }, { "epoch": 0.677937858010762, "grad_norm": 0.1591796875, "learning_rate": 0.02, "loss": 1.5092, "step": 7323 }, { "epoch": 0.6780304345310421, "grad_norm": 0.1591796875, "learning_rate": 0.02, "loss": 1.4781, "step": 7324 }, { "epoch": 0.6781230110513221, "grad_norm": 0.1533203125, "learning_rate": 0.02, "loss": 1.5573, "step": 7325 }, { "epoch": 0.6782155875716022, "grad_norm": 0.1611328125, "learning_rate": 0.02, "loss": 1.4438, "step": 7326 }, { "epoch": 0.6783081640918822, "grad_norm": 0.162109375, "learning_rate": 0.02, "loss": 1.552, "step": 7327 }, { "epoch": 0.6784007406121623, "grad_norm": 0.1591796875, "learning_rate": 0.02, "loss": 1.5302, "step": 7328 }, { "epoch": 0.6784933171324423, "grad_norm": 0.154296875, "learning_rate": 0.02, "loss": 1.5429, "step": 7329 }, { "epoch": 0.6785858936527224, "grad_norm": 0.146484375, "learning_rate": 0.02, "loss": 1.4792, "step": 7330 }, { "epoch": 0.6786784701730024, "grad_norm": 0.1552734375, "learning_rate": 0.02, "loss": 1.5632, "step": 7331 }, { "epoch": 0.6787710466932824, "grad_norm": 0.1533203125, "learning_rate": 0.02, "loss": 1.5327, "step": 7332 }, { "epoch": 0.6788636232135624, "grad_norm": 0.158203125, "learning_rate": 0.02, "loss": 1.5624, "step": 7333 }, { "epoch": 0.6789561997338425, "grad_norm": 0.1533203125, "learning_rate": 0.02, "loss": 1.5891, "step": 7334 }, { "epoch": 0.6790487762541225, "grad_norm": 0.1494140625, "learning_rate": 0.02, "loss": 1.5289, "step": 7335 }, { "epoch": 0.6791413527744026, "grad_norm": 0.1416015625, "learning_rate": 0.02, "loss": 1.4554, "step": 7336 }, { "epoch": 0.6792339292946826, "grad_norm": 0.158203125, "learning_rate": 0.02, "loss": 1.525, "step": 7337 }, { "epoch": 0.6793265058149627, "grad_norm": 0.1552734375, "learning_rate": 0.02, "loss": 1.5914, "step": 7338 }, { "epoch": 0.6794190823352427, "grad_norm": 0.1533203125, "learning_rate": 0.02, "loss": 1.5697, "step": 7339 }, { "epoch": 0.6795116588555228, "grad_norm": 0.142578125, "learning_rate": 0.02, "loss": 1.4911, "step": 7340 }, { "epoch": 0.6796042353758028, "grad_norm": 0.154296875, "learning_rate": 0.02, "loss": 1.5078, "step": 7341 }, { "epoch": 0.6796968118960829, "grad_norm": 0.1484375, "learning_rate": 0.02, "loss": 1.5713, "step": 7342 }, { "epoch": 0.6797893884163629, "grad_norm": 0.166015625, "learning_rate": 0.02, "loss": 1.4377, "step": 7343 }, { "epoch": 0.679881964936643, "grad_norm": 0.1533203125, "learning_rate": 0.02, "loss": 1.4304, "step": 7344 }, { "epoch": 0.679974541456923, "grad_norm": 0.1611328125, "learning_rate": 0.02, "loss": 1.6018, "step": 7345 }, { "epoch": 0.680067117977203, "grad_norm": 0.1513671875, "learning_rate": 0.02, "loss": 1.6202, "step": 7346 }, { "epoch": 0.680159694497483, "grad_norm": 0.16015625, "learning_rate": 0.02, "loss": 1.489, "step": 7347 }, { "epoch": 0.6802522710177631, "grad_norm": 0.146484375, "learning_rate": 0.02, "loss": 1.5354, "step": 7348 }, { "epoch": 0.6803448475380431, "grad_norm": 0.1494140625, "learning_rate": 0.02, "loss": 1.4541, "step": 7349 }, { "epoch": 0.6804374240583232, "grad_norm": 0.150390625, "learning_rate": 0.02, "loss": 1.5599, "step": 7350 }, { "epoch": 0.6805300005786032, "grad_norm": 0.14453125, "learning_rate": 0.02, "loss": 1.5078, "step": 7351 }, { "epoch": 0.6806225770988833, "grad_norm": 0.1455078125, "learning_rate": 0.02, "loss": 1.4506, "step": 7352 }, { "epoch": 0.6807151536191633, "grad_norm": 0.1455078125, "learning_rate": 0.02, "loss": 1.5107, "step": 7353 }, { "epoch": 0.6808077301394434, "grad_norm": 0.1513671875, "learning_rate": 0.02, "loss": 1.5214, "step": 7354 }, { "epoch": 0.6809003066597235, "grad_norm": 0.154296875, "learning_rate": 0.02, "loss": 1.4983, "step": 7355 }, { "epoch": 0.6809928831800035, "grad_norm": 0.1494140625, "learning_rate": 0.02, "loss": 1.4641, "step": 7356 }, { "epoch": 0.6810854597002836, "grad_norm": 0.142578125, "learning_rate": 0.02, "loss": 1.5502, "step": 7357 }, { "epoch": 0.6811780362205636, "grad_norm": 0.146484375, "learning_rate": 0.02, "loss": 1.5399, "step": 7358 }, { "epoch": 0.6812706127408436, "grad_norm": 0.1435546875, "learning_rate": 0.02, "loss": 1.5021, "step": 7359 }, { "epoch": 0.6813631892611236, "grad_norm": 0.1416015625, "learning_rate": 0.02, "loss": 1.469, "step": 7360 }, { "epoch": 0.6814557657814037, "grad_norm": 0.14453125, "learning_rate": 0.02, "loss": 1.5576, "step": 7361 }, { "epoch": 0.6815483423016837, "grad_norm": 0.142578125, "learning_rate": 0.02, "loss": 1.5761, "step": 7362 }, { "epoch": 0.6816409188219638, "grad_norm": 0.150390625, "learning_rate": 0.02, "loss": 1.488, "step": 7363 }, { "epoch": 0.6817334953422438, "grad_norm": 0.1533203125, "learning_rate": 0.02, "loss": 1.4215, "step": 7364 }, { "epoch": 0.6818260718625239, "grad_norm": 0.1533203125, "learning_rate": 0.02, "loss": 1.5353, "step": 7365 }, { "epoch": 0.6819186483828039, "grad_norm": 0.1533203125, "learning_rate": 0.02, "loss": 1.5073, "step": 7366 }, { "epoch": 0.682011224903084, "grad_norm": 0.158203125, "learning_rate": 0.02, "loss": 1.5642, "step": 7367 }, { "epoch": 0.682103801423364, "grad_norm": 0.158203125, "learning_rate": 0.02, "loss": 1.4755, "step": 7368 }, { "epoch": 0.6821963779436441, "grad_norm": 0.1435546875, "learning_rate": 0.02, "loss": 1.5806, "step": 7369 }, { "epoch": 0.6822889544639241, "grad_norm": 0.1494140625, "learning_rate": 0.02, "loss": 1.5347, "step": 7370 }, { "epoch": 0.6823815309842042, "grad_norm": 0.1513671875, "learning_rate": 0.02, "loss": 1.5303, "step": 7371 }, { "epoch": 0.6824741075044842, "grad_norm": 0.15234375, "learning_rate": 0.02, "loss": 1.5137, "step": 7372 }, { "epoch": 0.6825666840247642, "grad_norm": 0.15234375, "learning_rate": 0.02, "loss": 1.5442, "step": 7373 }, { "epoch": 0.6826592605450442, "grad_norm": 0.1455078125, "learning_rate": 0.02, "loss": 1.5166, "step": 7374 }, { "epoch": 0.6827518370653243, "grad_norm": 0.1552734375, "learning_rate": 0.02, "loss": 1.4808, "step": 7375 }, { "epoch": 0.6828444135856043, "grad_norm": 0.142578125, "learning_rate": 0.02, "loss": 1.497, "step": 7376 }, { "epoch": 0.6829369901058844, "grad_norm": 0.1572265625, "learning_rate": 0.02, "loss": 1.4695, "step": 7377 }, { "epoch": 0.6830295666261644, "grad_norm": 0.1533203125, "learning_rate": 0.02, "loss": 1.521, "step": 7378 }, { "epoch": 0.6831221431464445, "grad_norm": 0.162109375, "learning_rate": 0.02, "loss": 1.4887, "step": 7379 }, { "epoch": 0.6832147196667245, "grad_norm": 0.1357421875, "learning_rate": 0.02, "loss": 1.4984, "step": 7380 }, { "epoch": 0.6833072961870046, "grad_norm": 0.146484375, "learning_rate": 0.02, "loss": 1.5001, "step": 7381 }, { "epoch": 0.6833998727072846, "grad_norm": 0.146484375, "learning_rate": 0.02, "loss": 1.5183, "step": 7382 }, { "epoch": 0.6834924492275647, "grad_norm": 0.1494140625, "learning_rate": 0.02, "loss": 1.495, "step": 7383 }, { "epoch": 0.6835850257478447, "grad_norm": 0.1474609375, "learning_rate": 0.02, "loss": 1.5766, "step": 7384 }, { "epoch": 0.6836776022681248, "grad_norm": 0.1572265625, "learning_rate": 0.02, "loss": 1.5788, "step": 7385 }, { "epoch": 0.6837701787884047, "grad_norm": 0.154296875, "learning_rate": 0.02, "loss": 1.5495, "step": 7386 }, { "epoch": 0.6838627553086848, "grad_norm": 0.1513671875, "learning_rate": 0.02, "loss": 1.4891, "step": 7387 }, { "epoch": 0.6839553318289648, "grad_norm": 0.1533203125, "learning_rate": 0.02, "loss": 1.5912, "step": 7388 }, { "epoch": 0.6840479083492449, "grad_norm": 0.1494140625, "learning_rate": 0.02, "loss": 1.5436, "step": 7389 }, { "epoch": 0.684140484869525, "grad_norm": 0.1611328125, "learning_rate": 0.02, "loss": 1.5307, "step": 7390 }, { "epoch": 0.684233061389805, "grad_norm": 0.142578125, "learning_rate": 0.02, "loss": 1.5441, "step": 7391 }, { "epoch": 0.684325637910085, "grad_norm": 0.15625, "learning_rate": 0.02, "loss": 1.5218, "step": 7392 }, { "epoch": 0.6844182144303651, "grad_norm": 0.1533203125, "learning_rate": 0.02, "loss": 1.6006, "step": 7393 }, { "epoch": 0.6845107909506452, "grad_norm": 0.16015625, "learning_rate": 0.02, "loss": 1.5419, "step": 7394 }, { "epoch": 0.6846033674709252, "grad_norm": 0.1494140625, "learning_rate": 0.02, "loss": 1.4775, "step": 7395 }, { "epoch": 0.6846959439912053, "grad_norm": 0.154296875, "learning_rate": 0.02, "loss": 1.545, "step": 7396 }, { "epoch": 0.6847885205114853, "grad_norm": 0.150390625, "learning_rate": 0.02, "loss": 1.5628, "step": 7397 }, { "epoch": 0.6848810970317654, "grad_norm": 0.14453125, "learning_rate": 0.02, "loss": 1.5521, "step": 7398 }, { "epoch": 0.6849736735520454, "grad_norm": 0.1513671875, "learning_rate": 0.02, "loss": 1.514, "step": 7399 }, { "epoch": 0.6850662500723254, "grad_norm": 0.15234375, "learning_rate": 0.02, "loss": 1.5556, "step": 7400 }, { "epoch": 0.6851588265926054, "grad_norm": 0.14453125, "learning_rate": 0.02, "loss": 1.5129, "step": 7401 }, { "epoch": 0.6852514031128855, "grad_norm": 0.1484375, "learning_rate": 0.02, "loss": 1.5262, "step": 7402 }, { "epoch": 0.6853439796331655, "grad_norm": 0.1611328125, "learning_rate": 0.02, "loss": 1.5519, "step": 7403 }, { "epoch": 0.6854365561534456, "grad_norm": 0.16015625, "learning_rate": 0.02, "loss": 1.5364, "step": 7404 }, { "epoch": 0.6855291326737256, "grad_norm": 0.16015625, "learning_rate": 0.02, "loss": 1.4768, "step": 7405 }, { "epoch": 0.6856217091940057, "grad_norm": 0.162109375, "learning_rate": 0.02, "loss": 1.5421, "step": 7406 }, { "epoch": 0.6857142857142857, "grad_norm": 0.1591796875, "learning_rate": 0.02, "loss": 1.5112, "step": 7407 }, { "epoch": 0.6858068622345658, "grad_norm": 0.15625, "learning_rate": 0.02, "loss": 1.5195, "step": 7408 }, { "epoch": 0.6858994387548458, "grad_norm": 0.15625, "learning_rate": 0.02, "loss": 1.5407, "step": 7409 }, { "epoch": 0.6859920152751259, "grad_norm": 0.1728515625, "learning_rate": 0.02, "loss": 1.4589, "step": 7410 }, { "epoch": 0.6860845917954059, "grad_norm": 0.1455078125, "learning_rate": 0.02, "loss": 1.5148, "step": 7411 }, { "epoch": 0.686177168315686, "grad_norm": 0.158203125, "learning_rate": 0.02, "loss": 1.4883, "step": 7412 }, { "epoch": 0.686269744835966, "grad_norm": 0.14453125, "learning_rate": 0.02, "loss": 1.5213, "step": 7413 }, { "epoch": 0.686362321356246, "grad_norm": 0.1572265625, "learning_rate": 0.02, "loss": 1.5104, "step": 7414 }, { "epoch": 0.686454897876526, "grad_norm": 0.1494140625, "learning_rate": 0.02, "loss": 1.548, "step": 7415 }, { "epoch": 0.6865474743968061, "grad_norm": 0.1552734375, "learning_rate": 0.02, "loss": 1.5317, "step": 7416 }, { "epoch": 0.6866400509170861, "grad_norm": 0.1416015625, "learning_rate": 0.02, "loss": 1.516, "step": 7417 }, { "epoch": 0.6867326274373662, "grad_norm": 0.146484375, "learning_rate": 0.02, "loss": 1.5046, "step": 7418 }, { "epoch": 0.6868252039576462, "grad_norm": 0.1484375, "learning_rate": 0.02, "loss": 1.4812, "step": 7419 }, { "epoch": 0.6869177804779263, "grad_norm": 0.146484375, "learning_rate": 0.02, "loss": 1.5018, "step": 7420 }, { "epoch": 0.6870103569982063, "grad_norm": 0.142578125, "learning_rate": 0.02, "loss": 1.5047, "step": 7421 }, { "epoch": 0.6871029335184864, "grad_norm": 0.1552734375, "learning_rate": 0.02, "loss": 1.5227, "step": 7422 }, { "epoch": 0.6871955100387664, "grad_norm": 0.1474609375, "learning_rate": 0.02, "loss": 1.5506, "step": 7423 }, { "epoch": 0.6872880865590465, "grad_norm": 0.15234375, "learning_rate": 0.02, "loss": 1.4786, "step": 7424 }, { "epoch": 0.6873806630793265, "grad_norm": 0.1513671875, "learning_rate": 0.02, "loss": 1.4945, "step": 7425 }, { "epoch": 0.6874732395996066, "grad_norm": 0.142578125, "learning_rate": 0.02, "loss": 1.4975, "step": 7426 }, { "epoch": 0.6875658161198865, "grad_norm": 0.150390625, "learning_rate": 0.02, "loss": 1.6003, "step": 7427 }, { "epoch": 0.6876583926401666, "grad_norm": 0.14453125, "learning_rate": 0.02, "loss": 1.4866, "step": 7428 }, { "epoch": 0.6877509691604466, "grad_norm": 0.16015625, "learning_rate": 0.02, "loss": 1.4946, "step": 7429 }, { "epoch": 0.6878435456807267, "grad_norm": 0.1337890625, "learning_rate": 0.02, "loss": 1.501, "step": 7430 }, { "epoch": 0.6879361222010068, "grad_norm": 0.146484375, "learning_rate": 0.02, "loss": 1.5101, "step": 7431 }, { "epoch": 0.6880286987212868, "grad_norm": 0.150390625, "learning_rate": 0.02, "loss": 1.4444, "step": 7432 }, { "epoch": 0.6881212752415669, "grad_norm": 0.1494140625, "learning_rate": 0.02, "loss": 1.5034, "step": 7433 }, { "epoch": 0.6882138517618469, "grad_norm": 0.146484375, "learning_rate": 0.02, "loss": 1.5182, "step": 7434 }, { "epoch": 0.688306428282127, "grad_norm": 0.154296875, "learning_rate": 0.02, "loss": 1.6076, "step": 7435 }, { "epoch": 0.688399004802407, "grad_norm": 0.1494140625, "learning_rate": 0.02, "loss": 1.4451, "step": 7436 }, { "epoch": 0.6884915813226871, "grad_norm": 0.15234375, "learning_rate": 0.02, "loss": 1.513, "step": 7437 }, { "epoch": 0.6885841578429671, "grad_norm": 0.15234375, "learning_rate": 0.02, "loss": 1.5914, "step": 7438 }, { "epoch": 0.6886767343632472, "grad_norm": 0.1630859375, "learning_rate": 0.02, "loss": 1.5765, "step": 7439 }, { "epoch": 0.6887693108835272, "grad_norm": 0.1357421875, "learning_rate": 0.02, "loss": 1.5209, "step": 7440 } ], "logging_steps": 1, "max_steps": 16203, "num_input_tokens_seen": 0, "num_train_epochs": 2, "save_steps": 1476, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": false, "should_training_stop": false }, "attributes": {} } }, "total_flos": 6.554468380893837e+18, "train_batch_size": 8, "trial_name": null, "trial_params": null }