{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 0.04336384823824889, "eval_steps": 500, "global_step": 1966, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 2.20568912707268e-05, "grad_norm": 11.1875, "learning_rate": 0.0, "loss": 12.1899, "step": 1 }, { "epoch": 4.41137825414536e-05, "grad_norm": 11.6875, "learning_rate": 7.342143906020558e-07, "loss": 12.1858, "step": 2 }, { "epoch": 6.617067381218041e-05, "grad_norm": 11.375, "learning_rate": 1.4684287812041115e-06, "loss": 12.1846, "step": 3 }, { "epoch": 8.82275650829072e-05, "grad_norm": 11.3125, "learning_rate": 2.2026431718061677e-06, "loss": 12.1856, "step": 4 }, { "epoch": 0.000110284456353634, "grad_norm": 8.9375, "learning_rate": 2.936857562408223e-06, "loss": 12.1877, "step": 5 }, { "epoch": 0.00013234134762436081, "grad_norm": 7.96875, "learning_rate": 3.671071953010279e-06, "loss": 12.1868, "step": 6 }, { "epoch": 0.0001543982388950876, "grad_norm": 9.6875, "learning_rate": 4.4052863436123355e-06, "loss": 12.1744, "step": 7 }, { "epoch": 0.0001764551301658144, "grad_norm": 5.34375, "learning_rate": 5.139500734214391e-06, "loss": 12.1808, "step": 8 }, { "epoch": 0.0001985120214365412, "grad_norm": 4.59375, "learning_rate": 5.873715124816446e-06, "loss": 12.1705, "step": 9 }, { "epoch": 0.000220568912707268, "grad_norm": 7.6875, "learning_rate": 6.607929515418502e-06, "loss": 12.1623, "step": 10 }, { "epoch": 0.0002426258039779948, "grad_norm": 5.375, "learning_rate": 7.342143906020558e-06, "loss": 12.1506, "step": 11 }, { "epoch": 0.00026468269524872163, "grad_norm": 3.484375, "learning_rate": 8.076358296622615e-06, "loss": 12.1461, "step": 12 }, { "epoch": 0.0002867395865194484, "grad_norm": 6.78125, "learning_rate": 8.810572687224671e-06, "loss": 12.1311, "step": 13 }, { "epoch": 0.0003087964777901752, "grad_norm": 3.140625, "learning_rate": 9.544787077826725e-06, "loss": 12.1074, "step": 14 }, { "epoch": 0.000330853369060902, "grad_norm": 3.328125, "learning_rate": 1.0279001468428782e-05, "loss": 12.1089, "step": 15 }, { "epoch": 0.0003529102603316288, "grad_norm": 3.953125, "learning_rate": 1.1013215859030838e-05, "loss": 12.0676, "step": 16 }, { "epoch": 0.0003749671516023556, "grad_norm": 3.90625, "learning_rate": 1.1747430249632892e-05, "loss": 12.041, "step": 17 }, { "epoch": 0.0003970240428730824, "grad_norm": 2.390625, "learning_rate": 1.2481644640234949e-05, "loss": 12.0379, "step": 18 }, { "epoch": 0.0004190809341438092, "grad_norm": 3.15625, "learning_rate": 1.3215859030837005e-05, "loss": 12.0009, "step": 19 }, { "epoch": 0.000441137825414536, "grad_norm": 4.03125, "learning_rate": 1.3950073421439061e-05, "loss": 11.9771, "step": 20 }, { "epoch": 0.0004631947166852628, "grad_norm": 2.421875, "learning_rate": 1.4684287812041115e-05, "loss": 11.989, "step": 21 }, { "epoch": 0.0004852516079559896, "grad_norm": 2.625, "learning_rate": 1.5418502202643173e-05, "loss": 11.9301, "step": 22 }, { "epoch": 0.0005073084992267164, "grad_norm": 1.8359375, "learning_rate": 1.615271659324523e-05, "loss": 11.9088, "step": 23 }, { "epoch": 0.0005293653904974433, "grad_norm": 2.625, "learning_rate": 1.6886930983847282e-05, "loss": 11.8265, "step": 24 }, { "epoch": 0.00055142228176817, "grad_norm": 2.390625, "learning_rate": 1.7621145374449342e-05, "loss": 11.7725, "step": 25 }, { "epoch": 0.0005734791730388968, "grad_norm": 2.15625, "learning_rate": 1.8355359765051395e-05, "loss": 11.7115, "step": 26 }, { "epoch": 0.0005955360643096236, "grad_norm": 1.9453125, "learning_rate": 1.908957415565345e-05, "loss": 11.6759, "step": 27 }, { "epoch": 0.0006175929555803504, "grad_norm": 2.609375, "learning_rate": 1.9823788546255507e-05, "loss": 11.6486, "step": 28 }, { "epoch": 0.0006396498468510772, "grad_norm": 2.171875, "learning_rate": 2.0558002936857563e-05, "loss": 11.6313, "step": 29 }, { "epoch": 0.000661706738121804, "grad_norm": 1.9375, "learning_rate": 2.1292217327459616e-05, "loss": 11.5207, "step": 30 }, { "epoch": 0.0006837636293925308, "grad_norm": 2.015625, "learning_rate": 2.2026431718061676e-05, "loss": 11.4698, "step": 31 }, { "epoch": 0.0007058205206632576, "grad_norm": 2.046875, "learning_rate": 2.2760646108663732e-05, "loss": 11.3403, "step": 32 }, { "epoch": 0.0007278774119339845, "grad_norm": 2.25, "learning_rate": 2.3494860499265785e-05, "loss": 11.2796, "step": 33 }, { "epoch": 0.0007499343032047112, "grad_norm": 2.09375, "learning_rate": 2.4229074889867844e-05, "loss": 11.2097, "step": 34 }, { "epoch": 0.000771991194475438, "grad_norm": 1.9140625, "learning_rate": 2.4963289280469897e-05, "loss": 11.2075, "step": 35 }, { "epoch": 0.0007940480857461648, "grad_norm": 1.8828125, "learning_rate": 2.5697503671071953e-05, "loss": 11.1051, "step": 36 }, { "epoch": 0.0008161049770168917, "grad_norm": 1.8046875, "learning_rate": 2.643171806167401e-05, "loss": 10.9758, "step": 37 }, { "epoch": 0.0008381618682876184, "grad_norm": 1.75, "learning_rate": 2.7165932452276066e-05, "loss": 10.9066, "step": 38 }, { "epoch": 0.0008602187595583452, "grad_norm": 1.6484375, "learning_rate": 2.7900146842878122e-05, "loss": 10.8176, "step": 39 }, { "epoch": 0.000882275650829072, "grad_norm": 1.5546875, "learning_rate": 2.8634361233480178e-05, "loss": 10.7472, "step": 40 }, { "epoch": 0.0009043325420997988, "grad_norm": 1.6640625, "learning_rate": 2.936857562408223e-05, "loss": 10.6545, "step": 41 }, { "epoch": 0.0009263894333705256, "grad_norm": 1.625, "learning_rate": 3.010279001468429e-05, "loss": 10.6001, "step": 42 }, { "epoch": 0.0009484463246412524, "grad_norm": 1.609375, "learning_rate": 3.0837004405286347e-05, "loss": 10.5341, "step": 43 }, { "epoch": 0.0009705032159119792, "grad_norm": 1.53125, "learning_rate": 3.15712187958884e-05, "loss": 10.4656, "step": 44 }, { "epoch": 0.000992560107182706, "grad_norm": 1.859375, "learning_rate": 3.230543318649046e-05, "loss": 10.3747, "step": 45 }, { "epoch": 0.0010146169984534329, "grad_norm": 1.53125, "learning_rate": 3.303964757709251e-05, "loss": 10.3567, "step": 46 }, { "epoch": 0.0010366738897241597, "grad_norm": 1.703125, "learning_rate": 3.3773861967694565e-05, "loss": 10.2778, "step": 47 }, { "epoch": 0.0010587307809948865, "grad_norm": 1.6640625, "learning_rate": 3.450807635829662e-05, "loss": 10.2249, "step": 48 }, { "epoch": 0.0010807876722656131, "grad_norm": 1.5078125, "learning_rate": 3.5242290748898684e-05, "loss": 10.1845, "step": 49 }, { "epoch": 0.00110284456353634, "grad_norm": 1.5546875, "learning_rate": 3.597650513950074e-05, "loss": 10.1419, "step": 50 }, { "epoch": 0.0011249014548070668, "grad_norm": 1.734375, "learning_rate": 3.671071953010279e-05, "loss": 10.0832, "step": 51 }, { "epoch": 0.0011469583460777936, "grad_norm": 2.875, "learning_rate": 3.7444933920704846e-05, "loss": 10.0205, "step": 52 }, { "epoch": 0.0011690152373485204, "grad_norm": 1.46875, "learning_rate": 3.81791483113069e-05, "loss": 9.9769, "step": 53 }, { "epoch": 0.0011910721286192472, "grad_norm": 1.421875, "learning_rate": 3.891336270190896e-05, "loss": 9.895, "step": 54 }, { "epoch": 0.001213129019889974, "grad_norm": 1.4609375, "learning_rate": 3.9647577092511014e-05, "loss": 9.8653, "step": 55 }, { "epoch": 0.001235185911160701, "grad_norm": 1.4296875, "learning_rate": 4.038179148311307e-05, "loss": 9.802, "step": 56 }, { "epoch": 0.0012572428024314277, "grad_norm": 2.15625, "learning_rate": 4.1116005873715127e-05, "loss": 9.7511, "step": 57 }, { "epoch": 0.0012792996937021543, "grad_norm": 1.453125, "learning_rate": 4.185022026431718e-05, "loss": 9.6394, "step": 58 }, { "epoch": 0.0013013565849728812, "grad_norm": 1.296875, "learning_rate": 4.258443465491923e-05, "loss": 9.6142, "step": 59 }, { "epoch": 0.001323413476243608, "grad_norm": 1.296875, "learning_rate": 4.331864904552129e-05, "loss": 9.5419, "step": 60 }, { "epoch": 0.0013454703675143348, "grad_norm": 1.3046875, "learning_rate": 4.405286343612335e-05, "loss": 9.4671, "step": 61 }, { "epoch": 0.0013675272587850616, "grad_norm": 1.3125, "learning_rate": 4.478707782672541e-05, "loss": 9.3619, "step": 62 }, { "epoch": 0.0013895841500557885, "grad_norm": 1.25, "learning_rate": 4.5521292217327464e-05, "loss": 9.2738, "step": 63 }, { "epoch": 0.0014116410413265153, "grad_norm": 1.2578125, "learning_rate": 4.625550660792951e-05, "loss": 9.1672, "step": 64 }, { "epoch": 0.001433697932597242, "grad_norm": 1.21875, "learning_rate": 4.698972099853157e-05, "loss": 9.1043, "step": 65 }, { "epoch": 0.001455754823867969, "grad_norm": 1.1328125, "learning_rate": 4.772393538913363e-05, "loss": 9.0433, "step": 66 }, { "epoch": 0.0014778117151386955, "grad_norm": 1.1875, "learning_rate": 4.845814977973569e-05, "loss": 8.8955, "step": 67 }, { "epoch": 0.0014998686064094224, "grad_norm": 1.1015625, "learning_rate": 4.919236417033774e-05, "loss": 8.8527, "step": 68 }, { "epoch": 0.0015219254976801492, "grad_norm": 1.2265625, "learning_rate": 4.9926578560939794e-05, "loss": 8.7209, "step": 69 }, { "epoch": 0.001543982388950876, "grad_norm": 2.0, "learning_rate": 5.066079295154185e-05, "loss": 8.6532, "step": 70 }, { "epoch": 0.0015660392802216028, "grad_norm": 1.1796875, "learning_rate": 5.1395007342143906e-05, "loss": 8.5749, "step": 71 }, { "epoch": 0.0015880961714923297, "grad_norm": 1.265625, "learning_rate": 5.212922173274596e-05, "loss": 8.484, "step": 72 }, { "epoch": 0.0016101530627630565, "grad_norm": 2.359375, "learning_rate": 5.286343612334802e-05, "loss": 8.3638, "step": 73 }, { "epoch": 0.0016322099540337833, "grad_norm": 1.09375, "learning_rate": 5.3597650513950075e-05, "loss": 8.3516, "step": 74 }, { "epoch": 0.0016542668453045101, "grad_norm": 1.234375, "learning_rate": 5.433186490455213e-05, "loss": 8.2767, "step": 75 }, { "epoch": 0.0016763237365752367, "grad_norm": 0.96484375, "learning_rate": 5.506607929515418e-05, "loss": 8.2027, "step": 76 }, { "epoch": 0.0016983806278459636, "grad_norm": 1.265625, "learning_rate": 5.5800293685756244e-05, "loss": 8.1303, "step": 77 }, { "epoch": 0.0017204375191166904, "grad_norm": 1.8671875, "learning_rate": 5.65345080763583e-05, "loss": 8.0693, "step": 78 }, { "epoch": 0.0017424944103874172, "grad_norm": 1.15625, "learning_rate": 5.7268722466960356e-05, "loss": 7.9826, "step": 79 }, { "epoch": 0.001764551301658144, "grad_norm": 1.2265625, "learning_rate": 5.800293685756241e-05, "loss": 7.9649, "step": 80 }, { "epoch": 0.0017866081929288709, "grad_norm": 1.1328125, "learning_rate": 5.873715124816446e-05, "loss": 7.8775, "step": 81 }, { "epoch": 0.0018086650841995977, "grad_norm": 0.92578125, "learning_rate": 5.947136563876652e-05, "loss": 7.8471, "step": 82 }, { "epoch": 0.0018307219754703245, "grad_norm": 0.91796875, "learning_rate": 6.020558002936858e-05, "loss": 7.7813, "step": 83 }, { "epoch": 0.0018527788667410511, "grad_norm": 1.40625, "learning_rate": 6.093979441997064e-05, "loss": 7.7604, "step": 84 }, { "epoch": 0.001874835758011778, "grad_norm": 0.76171875, "learning_rate": 6.167400881057269e-05, "loss": 7.6922, "step": 85 }, { "epoch": 0.0018968926492825048, "grad_norm": 0.75390625, "learning_rate": 6.240822320117475e-05, "loss": 7.7067, "step": 86 }, { "epoch": 0.0019189495405532316, "grad_norm": 0.77734375, "learning_rate": 6.31424375917768e-05, "loss": 7.652, "step": 87 }, { "epoch": 0.0019410064318239584, "grad_norm": 0.640625, "learning_rate": 6.387665198237885e-05, "loss": 7.5976, "step": 88 }, { "epoch": 0.0019630633230946853, "grad_norm": 0.55078125, "learning_rate": 6.461086637298092e-05, "loss": 7.552, "step": 89 }, { "epoch": 0.001985120214365412, "grad_norm": 0.64453125, "learning_rate": 6.534508076358296e-05, "loss": 7.5718, "step": 90 }, { "epoch": 0.002007177105636139, "grad_norm": 0.609375, "learning_rate": 6.607929515418502e-05, "loss": 7.4999, "step": 91 }, { "epoch": 0.0020292339969068657, "grad_norm": 0.52734375, "learning_rate": 6.681350954478709e-05, "loss": 7.4991, "step": 92 }, { "epoch": 0.0020512908881775926, "grad_norm": 0.6015625, "learning_rate": 6.754772393538913e-05, "loss": 7.4252, "step": 93 }, { "epoch": 0.0020733477794483194, "grad_norm": 0.52734375, "learning_rate": 6.82819383259912e-05, "loss": 7.46, "step": 94 }, { "epoch": 0.002095404670719046, "grad_norm": 0.5078125, "learning_rate": 6.901615271659324e-05, "loss": 7.3857, "step": 95 }, { "epoch": 0.002117461561989773, "grad_norm": 0.4375, "learning_rate": 6.97503671071953e-05, "loss": 7.385, "step": 96 }, { "epoch": 0.0021395184532605, "grad_norm": 0.435546875, "learning_rate": 7.048458149779737e-05, "loss": 7.3641, "step": 97 }, { "epoch": 0.0021615753445312262, "grad_norm": 0.3984375, "learning_rate": 7.121879588839941e-05, "loss": 7.3631, "step": 98 }, { "epoch": 0.002183632235801953, "grad_norm": 0.439453125, "learning_rate": 7.195301027900148e-05, "loss": 7.3646, "step": 99 }, { "epoch": 0.00220568912707268, "grad_norm": 0.392578125, "learning_rate": 7.268722466960352e-05, "loss": 7.342, "step": 100 }, { "epoch": 0.0022277460183434067, "grad_norm": 0.4453125, "learning_rate": 7.342143906020558e-05, "loss": 7.3113, "step": 101 }, { "epoch": 0.0022498029096141335, "grad_norm": 0.4921875, "learning_rate": 7.415565345080763e-05, "loss": 7.3234, "step": 102 }, { "epoch": 0.0022718598008848604, "grad_norm": 0.3671875, "learning_rate": 7.488986784140969e-05, "loss": 7.3107, "step": 103 }, { "epoch": 0.002293916692155587, "grad_norm": 0.365234375, "learning_rate": 7.562408223201175e-05, "loss": 7.2739, "step": 104 }, { "epoch": 0.002315973583426314, "grad_norm": 0.47265625, "learning_rate": 7.63582966226138e-05, "loss": 7.2599, "step": 105 }, { "epoch": 0.002338030474697041, "grad_norm": 0.376953125, "learning_rate": 7.709251101321586e-05, "loss": 7.2875, "step": 106 }, { "epoch": 0.0023600873659677677, "grad_norm": 0.6328125, "learning_rate": 7.782672540381792e-05, "loss": 7.2475, "step": 107 }, { "epoch": 0.0023821442572384945, "grad_norm": 0.357421875, "learning_rate": 7.856093979441997e-05, "loss": 7.2397, "step": 108 }, { "epoch": 0.0024042011485092213, "grad_norm": 0.416015625, "learning_rate": 7.929515418502203e-05, "loss": 7.2286, "step": 109 }, { "epoch": 0.002426258039779948, "grad_norm": 0.359375, "learning_rate": 8.002936857562408e-05, "loss": 7.2382, "step": 110 }, { "epoch": 0.002448314931050675, "grad_norm": 0.5234375, "learning_rate": 8.076358296622614e-05, "loss": 7.1672, "step": 111 }, { "epoch": 0.002470371822321402, "grad_norm": 0.7265625, "learning_rate": 8.14977973568282e-05, "loss": 7.1799, "step": 112 }, { "epoch": 0.0024924287135921286, "grad_norm": 0.439453125, "learning_rate": 8.223201174743025e-05, "loss": 7.1805, "step": 113 }, { "epoch": 0.0025144856048628554, "grad_norm": 0.40625, "learning_rate": 8.296622613803231e-05, "loss": 7.1815, "step": 114 }, { "epoch": 0.002536542496133582, "grad_norm": 0.51171875, "learning_rate": 8.370044052863437e-05, "loss": 7.1882, "step": 115 }, { "epoch": 0.0025585993874043087, "grad_norm": 0.50390625, "learning_rate": 8.443465491923642e-05, "loss": 7.1558, "step": 116 }, { "epoch": 0.0025806562786750355, "grad_norm": 0.84375, "learning_rate": 8.516886930983846e-05, "loss": 7.133, "step": 117 }, { "epoch": 0.0026027131699457623, "grad_norm": 3.625, "learning_rate": 8.590308370044053e-05, "loss": 7.2016, "step": 118 }, { "epoch": 0.002624770061216489, "grad_norm": 0.7421875, "learning_rate": 8.663729809104258e-05, "loss": 7.1757, "step": 119 }, { "epoch": 0.002646826952487216, "grad_norm": 1.4375, "learning_rate": 8.737151248164465e-05, "loss": 7.1321, "step": 120 }, { "epoch": 0.002668883843757943, "grad_norm": 0.6171875, "learning_rate": 8.81057268722467e-05, "loss": 7.1209, "step": 121 }, { "epoch": 0.0026909407350286696, "grad_norm": 1.3828125, "learning_rate": 8.883994126284875e-05, "loss": 7.1278, "step": 122 }, { "epoch": 0.0027129976262993964, "grad_norm": 0.7109375, "learning_rate": 8.957415565345081e-05, "loss": 7.152, "step": 123 }, { "epoch": 0.0027350545175701233, "grad_norm": 0.65625, "learning_rate": 9.030837004405286e-05, "loss": 7.0859, "step": 124 }, { "epoch": 0.00275711140884085, "grad_norm": 0.6171875, "learning_rate": 9.104258443465493e-05, "loss": 7.1078, "step": 125 }, { "epoch": 0.002779168300111577, "grad_norm": 0.609375, "learning_rate": 9.177679882525698e-05, "loss": 7.0985, "step": 126 }, { "epoch": 0.0028012251913823037, "grad_norm": 0.5703125, "learning_rate": 9.251101321585903e-05, "loss": 7.0883, "step": 127 }, { "epoch": 0.0028232820826530306, "grad_norm": 0.7890625, "learning_rate": 9.32452276064611e-05, "loss": 7.0485, "step": 128 }, { "epoch": 0.0028453389739237574, "grad_norm": 0.625, "learning_rate": 9.397944199706314e-05, "loss": 7.0456, "step": 129 }, { "epoch": 0.002867395865194484, "grad_norm": 0.63671875, "learning_rate": 9.47136563876652e-05, "loss": 7.0164, "step": 130 }, { "epoch": 0.002889452756465211, "grad_norm": 0.49609375, "learning_rate": 9.544787077826726e-05, "loss": 7.0017, "step": 131 }, { "epoch": 0.002911509647735938, "grad_norm": 0.60546875, "learning_rate": 9.618208516886931e-05, "loss": 7.0714, "step": 132 }, { "epoch": 0.0029335665390066643, "grad_norm": 0.59375, "learning_rate": 9.691629955947138e-05, "loss": 7.0172, "step": 133 }, { "epoch": 0.002955623430277391, "grad_norm": 0.5390625, "learning_rate": 9.765051395007342e-05, "loss": 7.0357, "step": 134 }, { "epoch": 0.002977680321548118, "grad_norm": 0.60546875, "learning_rate": 9.838472834067548e-05, "loss": 7.0322, "step": 135 }, { "epoch": 0.0029997372128188447, "grad_norm": 1.0546875, "learning_rate": 9.911894273127753e-05, "loss": 7.0059, "step": 136 }, { "epoch": 0.0030217941040895716, "grad_norm": 1.359375, "learning_rate": 9.985315712187959e-05, "loss": 7.0118, "step": 137 }, { "epoch": 0.0030438509953602984, "grad_norm": 0.59765625, "learning_rate": 0.00010058737151248166, "loss": 6.9769, "step": 138 }, { "epoch": 0.003065907886631025, "grad_norm": 1.8828125, "learning_rate": 0.0001013215859030837, "loss": 6.9284, "step": 139 }, { "epoch": 0.003087964777901752, "grad_norm": 0.73828125, "learning_rate": 0.00010205580029368576, "loss": 6.9097, "step": 140 }, { "epoch": 0.003110021669172479, "grad_norm": 1.0390625, "learning_rate": 0.00010279001468428781, "loss": 6.9374, "step": 141 }, { "epoch": 0.0031320785604432057, "grad_norm": 1.0390625, "learning_rate": 0.00010352422907488987, "loss": 6.9396, "step": 142 }, { "epoch": 0.0031541354517139325, "grad_norm": 0.6796875, "learning_rate": 0.00010425844346549193, "loss": 6.8912, "step": 143 }, { "epoch": 0.0031761923429846593, "grad_norm": 0.94921875, "learning_rate": 0.00010499265785609398, "loss": 6.8723, "step": 144 }, { "epoch": 0.003198249234255386, "grad_norm": 1.1015625, "learning_rate": 0.00010572687224669604, "loss": 6.918, "step": 145 }, { "epoch": 0.003220306125526113, "grad_norm": 0.828125, "learning_rate": 0.0001064610866372981, "loss": 6.8915, "step": 146 }, { "epoch": 0.00324236301679684, "grad_norm": 0.95703125, "learning_rate": 0.00010719530102790015, "loss": 6.8955, "step": 147 }, { "epoch": 0.0032644199080675666, "grad_norm": 1.125, "learning_rate": 0.0001079295154185022, "loss": 6.8517, "step": 148 }, { "epoch": 0.0032864767993382935, "grad_norm": 1.34375, "learning_rate": 0.00010866372980910426, "loss": 6.8872, "step": 149 }, { "epoch": 0.0033085336906090203, "grad_norm": 0.91015625, "learning_rate": 0.00010939794419970632, "loss": 6.867, "step": 150 }, { "epoch": 0.0033305905818797467, "grad_norm": 1.8671875, "learning_rate": 0.00011013215859030836, "loss": 6.8356, "step": 151 }, { "epoch": 0.0033526474731504735, "grad_norm": 0.78515625, "learning_rate": 0.00011086637298091043, "loss": 6.8252, "step": 152 }, { "epoch": 0.0033747043644212003, "grad_norm": 0.9921875, "learning_rate": 0.00011160058737151249, "loss": 6.7941, "step": 153 }, { "epoch": 0.003396761255691927, "grad_norm": 1.5625, "learning_rate": 0.00011233480176211454, "loss": 6.7796, "step": 154 }, { "epoch": 0.003418818146962654, "grad_norm": 1.3046875, "learning_rate": 0.0001130690161527166, "loss": 6.8368, "step": 155 }, { "epoch": 0.003440875038233381, "grad_norm": 1.65625, "learning_rate": 0.00011380323054331864, "loss": 6.7799, "step": 156 }, { "epoch": 0.0034629319295041076, "grad_norm": 1.234375, "learning_rate": 0.00011453744493392071, "loss": 6.6854, "step": 157 }, { "epoch": 0.0034849888207748344, "grad_norm": 0.84375, "learning_rate": 0.00011527165932452275, "loss": 6.7247, "step": 158 }, { "epoch": 0.0035070457120455613, "grad_norm": 1.1328125, "learning_rate": 0.00011600587371512482, "loss": 6.684, "step": 159 }, { "epoch": 0.003529102603316288, "grad_norm": 1.109375, "learning_rate": 0.00011674008810572688, "loss": 6.6966, "step": 160 }, { "epoch": 0.003551159494587015, "grad_norm": 0.80078125, "learning_rate": 0.00011747430249632892, "loss": 6.6325, "step": 161 }, { "epoch": 0.0035732163858577417, "grad_norm": 0.86328125, "learning_rate": 0.00011820851688693099, "loss": 6.6946, "step": 162 }, { "epoch": 0.0035952732771284686, "grad_norm": 0.99609375, "learning_rate": 0.00011894273127753304, "loss": 6.6273, "step": 163 }, { "epoch": 0.0036173301683991954, "grad_norm": 0.828125, "learning_rate": 0.00011967694566813509, "loss": 6.5999, "step": 164 }, { "epoch": 0.0036393870596699222, "grad_norm": 0.92578125, "learning_rate": 0.00012041116005873716, "loss": 6.5321, "step": 165 }, { "epoch": 0.003661443950940649, "grad_norm": 1.078125, "learning_rate": 0.0001211453744493392, "loss": 6.5957, "step": 166 }, { "epoch": 0.003683500842211376, "grad_norm": 1.109375, "learning_rate": 0.00012187958883994127, "loss": 6.567, "step": 167 }, { "epoch": 0.0037055577334821023, "grad_norm": 0.984375, "learning_rate": 0.00012261380323054332, "loss": 6.5523, "step": 168 }, { "epoch": 0.003727614624752829, "grad_norm": 0.9765625, "learning_rate": 0.00012334801762114539, "loss": 6.4926, "step": 169 }, { "epoch": 0.003749671516023556, "grad_norm": 1.1015625, "learning_rate": 0.00012408223201174743, "loss": 6.5288, "step": 170 }, { "epoch": 0.0037717284072942827, "grad_norm": 1.078125, "learning_rate": 0.0001248164464023495, "loss": 6.4436, "step": 171 }, { "epoch": 0.0037937852985650096, "grad_norm": 1.3046875, "learning_rate": 0.00012555066079295154, "loss": 6.4819, "step": 172 }, { "epoch": 0.0038158421898357364, "grad_norm": 0.79296875, "learning_rate": 0.0001262848751835536, "loss": 6.3629, "step": 173 }, { "epoch": 0.003837899081106463, "grad_norm": 1.046875, "learning_rate": 0.00012701908957415565, "loss": 6.3673, "step": 174 }, { "epoch": 0.00385995597237719, "grad_norm": 1.0625, "learning_rate": 0.0001277533039647577, "loss": 6.3286, "step": 175 }, { "epoch": 0.003882012863647917, "grad_norm": 0.75, "learning_rate": 0.00012848751835535977, "loss": 6.3642, "step": 176 }, { "epoch": 0.0039040697549186437, "grad_norm": 1.0234375, "learning_rate": 0.00012922173274596184, "loss": 6.3405, "step": 177 }, { "epoch": 0.0039261266461893705, "grad_norm": 0.82421875, "learning_rate": 0.00012995594713656388, "loss": 6.2621, "step": 178 }, { "epoch": 0.003948183537460097, "grad_norm": 1.0703125, "learning_rate": 0.00013069016152716592, "loss": 6.3182, "step": 179 }, { "epoch": 0.003970240428730824, "grad_norm": 1.21875, "learning_rate": 0.000131424375917768, "loss": 6.2987, "step": 180 }, { "epoch": 0.003992297320001551, "grad_norm": 1.5078125, "learning_rate": 0.00013215859030837003, "loss": 6.271, "step": 181 }, { "epoch": 0.004014354211272278, "grad_norm": 1.3125, "learning_rate": 0.0001328928046989721, "loss": 6.1225, "step": 182 }, { "epoch": 0.004036411102543005, "grad_norm": 0.8828125, "learning_rate": 0.00013362701908957417, "loss": 6.1233, "step": 183 }, { "epoch": 0.0040584679938137315, "grad_norm": 1.765625, "learning_rate": 0.00013436123348017622, "loss": 6.0927, "step": 184 }, { "epoch": 0.004080524885084458, "grad_norm": 1.0625, "learning_rate": 0.00013509544787077826, "loss": 6.1263, "step": 185 }, { "epoch": 0.004102581776355185, "grad_norm": 1.265625, "learning_rate": 0.00013582966226138033, "loss": 6.153, "step": 186 }, { "epoch": 0.004124638667625912, "grad_norm": 1.2109375, "learning_rate": 0.0001365638766519824, "loss": 6.0684, "step": 187 }, { "epoch": 0.004146695558896639, "grad_norm": 1.2421875, "learning_rate": 0.00013729809104258444, "loss": 6.0746, "step": 188 }, { "epoch": 0.004168752450167366, "grad_norm": 0.9453125, "learning_rate": 0.00013803230543318648, "loss": 6.0501, "step": 189 }, { "epoch": 0.004190809341438092, "grad_norm": 1.2109375, "learning_rate": 0.00013876651982378855, "loss": 6.0667, "step": 190 }, { "epoch": 0.004212866232708819, "grad_norm": 1.15625, "learning_rate": 0.0001395007342143906, "loss": 5.9851, "step": 191 }, { "epoch": 0.004234923123979546, "grad_norm": 0.87109375, "learning_rate": 0.00014023494860499267, "loss": 5.9159, "step": 192 }, { "epoch": 0.004256980015250273, "grad_norm": 0.8046875, "learning_rate": 0.00014096916299559473, "loss": 5.9505, "step": 193 }, { "epoch": 0.004279036906521, "grad_norm": 1.03125, "learning_rate": 0.00014170337738619675, "loss": 5.9605, "step": 194 }, { "epoch": 0.004301093797791726, "grad_norm": 1.2421875, "learning_rate": 0.00014243759177679882, "loss": 5.8839, "step": 195 }, { "epoch": 0.0043231506890624525, "grad_norm": 1.109375, "learning_rate": 0.0001431718061674009, "loss": 5.8517, "step": 196 }, { "epoch": 0.004345207580333179, "grad_norm": 1.015625, "learning_rate": 0.00014390602055800296, "loss": 5.843, "step": 197 }, { "epoch": 0.004367264471603906, "grad_norm": 1.1796875, "learning_rate": 0.000144640234948605, "loss": 5.8076, "step": 198 }, { "epoch": 0.004389321362874633, "grad_norm": 0.9296875, "learning_rate": 0.00014537444933920705, "loss": 5.7791, "step": 199 }, { "epoch": 0.00441137825414536, "grad_norm": 0.97265625, "learning_rate": 0.00014610866372980911, "loss": 5.7523, "step": 200 }, { "epoch": 0.004433435145416087, "grad_norm": 1.203125, "learning_rate": 0.00014684287812041116, "loss": 5.7736, "step": 201 }, { "epoch": 0.0044554920366868134, "grad_norm": 1.015625, "learning_rate": 0.00014757709251101323, "loss": 5.7096, "step": 202 }, { "epoch": 0.00447754892795754, "grad_norm": 1.1171875, "learning_rate": 0.00014831130690161527, "loss": 5.6951, "step": 203 }, { "epoch": 0.004499605819228267, "grad_norm": 1.3828125, "learning_rate": 0.0001490455212922173, "loss": 5.72, "step": 204 }, { "epoch": 0.004521662710498994, "grad_norm": 1.046875, "learning_rate": 0.00014977973568281938, "loss": 5.6654, "step": 205 }, { "epoch": 0.004543719601769721, "grad_norm": 1.421875, "learning_rate": 0.00015051395007342145, "loss": 5.721, "step": 206 }, { "epoch": 0.004565776493040448, "grad_norm": 1.0703125, "learning_rate": 0.0001512481644640235, "loss": 5.7629, "step": 207 }, { "epoch": 0.004587833384311174, "grad_norm": 1.0859375, "learning_rate": 0.00015198237885462554, "loss": 5.6433, "step": 208 }, { "epoch": 0.004609890275581901, "grad_norm": 1.484375, "learning_rate": 0.0001527165932452276, "loss": 5.6614, "step": 209 }, { "epoch": 0.004631947166852628, "grad_norm": 1.015625, "learning_rate": 0.00015345080763582968, "loss": 5.6061, "step": 210 }, { "epoch": 0.004654004058123355, "grad_norm": 1.0703125, "learning_rate": 0.00015418502202643172, "loss": 5.6158, "step": 211 }, { "epoch": 0.004676060949394082, "grad_norm": 1.28125, "learning_rate": 0.0001549192364170338, "loss": 5.6389, "step": 212 }, { "epoch": 0.0046981178406648085, "grad_norm": 0.91796875, "learning_rate": 0.00015565345080763583, "loss": 5.5846, "step": 213 }, { "epoch": 0.004720174731935535, "grad_norm": 1.109375, "learning_rate": 0.00015638766519823787, "loss": 5.5412, "step": 214 }, { "epoch": 0.004742231623206262, "grad_norm": 1.0390625, "learning_rate": 0.00015712187958883994, "loss": 5.5169, "step": 215 }, { "epoch": 0.004764288514476989, "grad_norm": 0.9140625, "learning_rate": 0.00015785609397944201, "loss": 5.4836, "step": 216 }, { "epoch": 0.004786345405747716, "grad_norm": 1.1640625, "learning_rate": 0.00015859030837004406, "loss": 5.5169, "step": 217 }, { "epoch": 0.004808402297018443, "grad_norm": 0.98828125, "learning_rate": 0.0001593245227606461, "loss": 5.4594, "step": 218 }, { "epoch": 0.0048304591882891695, "grad_norm": 1.0, "learning_rate": 0.00016005873715124817, "loss": 5.4977, "step": 219 }, { "epoch": 0.004852516079559896, "grad_norm": 0.94921875, "learning_rate": 0.0001607929515418502, "loss": 5.3792, "step": 220 }, { "epoch": 0.004874572970830623, "grad_norm": 1.140625, "learning_rate": 0.00016152716593245228, "loss": 5.386, "step": 221 }, { "epoch": 0.00489662986210135, "grad_norm": 1.28125, "learning_rate": 0.00016226138032305435, "loss": 5.4568, "step": 222 }, { "epoch": 0.004918686753372077, "grad_norm": 1.578125, "learning_rate": 0.0001629955947136564, "loss": 5.4386, "step": 223 }, { "epoch": 0.004940743644642804, "grad_norm": 0.75390625, "learning_rate": 0.00016372980910425844, "loss": 5.3631, "step": 224 }, { "epoch": 0.00496280053591353, "grad_norm": 0.90234375, "learning_rate": 0.0001644640234948605, "loss": 5.32, "step": 225 }, { "epoch": 0.004984857427184257, "grad_norm": 1.390625, "learning_rate": 0.00016519823788546258, "loss": 5.3664, "step": 226 }, { "epoch": 0.005006914318454984, "grad_norm": 1.171875, "learning_rate": 0.00016593245227606462, "loss": 5.3078, "step": 227 }, { "epoch": 0.005028971209725711, "grad_norm": 1.0234375, "learning_rate": 0.00016666666666666666, "loss": 5.35, "step": 228 }, { "epoch": 0.005051028100996438, "grad_norm": 1.40625, "learning_rate": 0.00016740088105726873, "loss": 5.2177, "step": 229 }, { "epoch": 0.005073084992267164, "grad_norm": 1.6328125, "learning_rate": 0.00016813509544787077, "loss": 5.3458, "step": 230 }, { "epoch": 0.0050951418835378905, "grad_norm": 1.015625, "learning_rate": 0.00016886930983847284, "loss": 5.3287, "step": 231 }, { "epoch": 0.005117198774808617, "grad_norm": 1.1484375, "learning_rate": 0.0001696035242290749, "loss": 5.2533, "step": 232 }, { "epoch": 0.005139255666079344, "grad_norm": 0.9921875, "learning_rate": 0.00017033773861967693, "loss": 5.2755, "step": 233 }, { "epoch": 0.005161312557350071, "grad_norm": 1.28125, "learning_rate": 0.000171071953010279, "loss": 5.286, "step": 234 }, { "epoch": 0.005183369448620798, "grad_norm": 1.0859375, "learning_rate": 0.00017180616740088107, "loss": 5.2293, "step": 235 }, { "epoch": 0.005205426339891525, "grad_norm": 1.421875, "learning_rate": 0.00017254038179148314, "loss": 5.2214, "step": 236 }, { "epoch": 0.0052274832311622514, "grad_norm": 0.953125, "learning_rate": 0.00017327459618208515, "loss": 5.2367, "step": 237 }, { "epoch": 0.005249540122432978, "grad_norm": 1.171875, "learning_rate": 0.00017400881057268722, "loss": 5.2176, "step": 238 }, { "epoch": 0.005271597013703705, "grad_norm": 1.3671875, "learning_rate": 0.0001747430249632893, "loss": 5.2316, "step": 239 }, { "epoch": 0.005293653904974432, "grad_norm": 0.83984375, "learning_rate": 0.00017547723935389134, "loss": 5.1137, "step": 240 }, { "epoch": 0.005315710796245159, "grad_norm": 0.87890625, "learning_rate": 0.0001762114537444934, "loss": 5.2055, "step": 241 }, { "epoch": 0.005337767687515886, "grad_norm": 1.078125, "learning_rate": 0.00017694566813509545, "loss": 5.0941, "step": 242 }, { "epoch": 0.005359824578786612, "grad_norm": 1.140625, "learning_rate": 0.0001776798825256975, "loss": 5.1132, "step": 243 }, { "epoch": 0.005381881470057339, "grad_norm": 0.83203125, "learning_rate": 0.00017841409691629956, "loss": 5.1893, "step": 244 }, { "epoch": 0.005403938361328066, "grad_norm": 1.0546875, "learning_rate": 0.00017914831130690163, "loss": 5.1123, "step": 245 }, { "epoch": 0.005425995252598793, "grad_norm": 1.1953125, "learning_rate": 0.00017988252569750367, "loss": 4.9987, "step": 246 }, { "epoch": 0.00544805214386952, "grad_norm": 1.265625, "learning_rate": 0.00018061674008810572, "loss": 5.1007, "step": 247 }, { "epoch": 0.0054701090351402465, "grad_norm": 1.34375, "learning_rate": 0.00018135095447870778, "loss": 5.0443, "step": 248 }, { "epoch": 0.005492165926410973, "grad_norm": 1.359375, "learning_rate": 0.00018208516886930985, "loss": 5.0741, "step": 249 }, { "epoch": 0.0055142228176817, "grad_norm": 1.3203125, "learning_rate": 0.0001828193832599119, "loss": 5.0755, "step": 250 }, { "epoch": 0.005536279708952427, "grad_norm": 0.984375, "learning_rate": 0.00018355359765051397, "loss": 5.0437, "step": 251 }, { "epoch": 0.005558336600223154, "grad_norm": 1.0625, "learning_rate": 0.000184287812041116, "loss": 4.9275, "step": 252 }, { "epoch": 0.005580393491493881, "grad_norm": 1.4921875, "learning_rate": 0.00018502202643171805, "loss": 4.963, "step": 253 }, { "epoch": 0.0056024503827646075, "grad_norm": 0.84375, "learning_rate": 0.00018575624082232012, "loss": 5.0368, "step": 254 }, { "epoch": 0.005624507274035334, "grad_norm": 0.99609375, "learning_rate": 0.0001864904552129222, "loss": 5.0047, "step": 255 }, { "epoch": 0.005646564165306061, "grad_norm": 1.4609375, "learning_rate": 0.00018722466960352423, "loss": 4.9737, "step": 256 }, { "epoch": 0.005668621056576788, "grad_norm": 1.1953125, "learning_rate": 0.00018795888399412628, "loss": 4.9629, "step": 257 }, { "epoch": 0.005690677947847515, "grad_norm": 0.95703125, "learning_rate": 0.00018869309838472835, "loss": 4.9414, "step": 258 }, { "epoch": 0.005712734839118242, "grad_norm": 1.015625, "learning_rate": 0.0001894273127753304, "loss": 4.9772, "step": 259 }, { "epoch": 0.005734791730388968, "grad_norm": 1.4921875, "learning_rate": 0.00019016152716593246, "loss": 4.9489, "step": 260 }, { "epoch": 0.005756848621659695, "grad_norm": 1.0, "learning_rate": 0.00019089574155653453, "loss": 4.9715, "step": 261 }, { "epoch": 0.005778905512930422, "grad_norm": 2.0625, "learning_rate": 0.00019162995594713657, "loss": 4.9751, "step": 262 }, { "epoch": 0.005800962404201149, "grad_norm": 0.96875, "learning_rate": 0.00019236417033773861, "loss": 4.9404, "step": 263 }, { "epoch": 0.005823019295471876, "grad_norm": 1.296875, "learning_rate": 0.00019309838472834068, "loss": 4.9671, "step": 264 }, { "epoch": 0.005845076186742602, "grad_norm": 1.171875, "learning_rate": 0.00019383259911894275, "loss": 4.9202, "step": 265 }, { "epoch": 0.0058671330780133285, "grad_norm": 1.25, "learning_rate": 0.0001945668135095448, "loss": 4.8837, "step": 266 }, { "epoch": 0.005889189969284055, "grad_norm": 0.9609375, "learning_rate": 0.00019530102790014684, "loss": 4.9053, "step": 267 }, { "epoch": 0.005911246860554782, "grad_norm": 1.078125, "learning_rate": 0.0001960352422907489, "loss": 4.8615, "step": 268 }, { "epoch": 0.005933303751825509, "grad_norm": 1.4296875, "learning_rate": 0.00019676945668135095, "loss": 4.8301, "step": 269 }, { "epoch": 0.005955360643096236, "grad_norm": 1.3125, "learning_rate": 0.00019750367107195302, "loss": 4.8775, "step": 270 }, { "epoch": 0.005977417534366963, "grad_norm": 1.328125, "learning_rate": 0.00019823788546255506, "loss": 4.8069, "step": 271 }, { "epoch": 0.0059994744256376895, "grad_norm": 1.03125, "learning_rate": 0.0001989720998531571, "loss": 4.8446, "step": 272 }, { "epoch": 0.006021531316908416, "grad_norm": 1.0625, "learning_rate": 0.00019970631424375918, "loss": 4.7679, "step": 273 }, { "epoch": 0.006043588208179143, "grad_norm": 1.0390625, "learning_rate": 0.00020044052863436125, "loss": 4.7493, "step": 274 }, { "epoch": 0.00606564509944987, "grad_norm": 1.265625, "learning_rate": 0.00020117474302496332, "loss": 4.8898, "step": 275 }, { "epoch": 0.006087701990720597, "grad_norm": 1.4609375, "learning_rate": 0.00020190895741556533, "loss": 4.8191, "step": 276 }, { "epoch": 0.006109758881991324, "grad_norm": 1.0390625, "learning_rate": 0.0002026431718061674, "loss": 4.8124, "step": 277 }, { "epoch": 0.00613181577326205, "grad_norm": 1.1328125, "learning_rate": 0.00020337738619676947, "loss": 4.7823, "step": 278 }, { "epoch": 0.006153872664532777, "grad_norm": 0.90234375, "learning_rate": 0.0002041116005873715, "loss": 4.8034, "step": 279 }, { "epoch": 0.006175929555803504, "grad_norm": 0.91015625, "learning_rate": 0.00020484581497797358, "loss": 4.7449, "step": 280 }, { "epoch": 0.006197986447074231, "grad_norm": 0.70703125, "learning_rate": 0.00020558002936857563, "loss": 4.7508, "step": 281 }, { "epoch": 0.006220043338344958, "grad_norm": 0.75390625, "learning_rate": 0.00020631424375917767, "loss": 4.6495, "step": 282 }, { "epoch": 0.0062421002296156845, "grad_norm": 0.78125, "learning_rate": 0.00020704845814977974, "loss": 4.7939, "step": 283 }, { "epoch": 0.006264157120886411, "grad_norm": 0.80859375, "learning_rate": 0.0002077826725403818, "loss": 4.7274, "step": 284 }, { "epoch": 0.006286214012157138, "grad_norm": 0.87109375, "learning_rate": 0.00020851688693098385, "loss": 4.7283, "step": 285 }, { "epoch": 0.006308270903427865, "grad_norm": 1.0703125, "learning_rate": 0.0002092511013215859, "loss": 4.7648, "step": 286 }, { "epoch": 0.006330327794698592, "grad_norm": 1.3203125, "learning_rate": 0.00020998531571218796, "loss": 4.6887, "step": 287 }, { "epoch": 0.006352384685969319, "grad_norm": 0.74609375, "learning_rate": 0.00021071953010279, "loss": 4.8196, "step": 288 }, { "epoch": 0.0063744415772400455, "grad_norm": 1.03125, "learning_rate": 0.00021145374449339208, "loss": 4.699, "step": 289 }, { "epoch": 0.006396498468510772, "grad_norm": 1.1640625, "learning_rate": 0.00021218795888399415, "loss": 4.6755, "step": 290 }, { "epoch": 0.006418555359781499, "grad_norm": 1.328125, "learning_rate": 0.0002129221732745962, "loss": 4.5866, "step": 291 }, { "epoch": 0.006440612251052226, "grad_norm": 0.94921875, "learning_rate": 0.00021365638766519823, "loss": 4.6718, "step": 292 }, { "epoch": 0.006462669142322953, "grad_norm": 0.97265625, "learning_rate": 0.0002143906020558003, "loss": 4.7494, "step": 293 }, { "epoch": 0.00648472603359368, "grad_norm": 1.484375, "learning_rate": 0.00021512481644640237, "loss": 4.6197, "step": 294 }, { "epoch": 0.006506782924864406, "grad_norm": 1.2265625, "learning_rate": 0.0002158590308370044, "loss": 4.673, "step": 295 }, { "epoch": 0.006528839816135133, "grad_norm": 0.96484375, "learning_rate": 0.00021659324522760646, "loss": 4.622, "step": 296 }, { "epoch": 0.00655089670740586, "grad_norm": 0.9921875, "learning_rate": 0.00021732745961820852, "loss": 4.5954, "step": 297 }, { "epoch": 0.006572953598676587, "grad_norm": 1.0625, "learning_rate": 0.00021806167400881057, "loss": 4.5558, "step": 298 }, { "epoch": 0.006595010489947314, "grad_norm": 1.171875, "learning_rate": 0.00021879588839941264, "loss": 4.5904, "step": 299 }, { "epoch": 0.0066170673812180406, "grad_norm": 0.9921875, "learning_rate": 0.0002195301027900147, "loss": 4.616, "step": 300 }, { "epoch": 0.0066391242724887665, "grad_norm": 1.09375, "learning_rate": 0.00022026431718061672, "loss": 4.6208, "step": 301 }, { "epoch": 0.006661181163759493, "grad_norm": 1.359375, "learning_rate": 0.0002209985315712188, "loss": 4.5705, "step": 302 }, { "epoch": 0.00668323805503022, "grad_norm": 0.9140625, "learning_rate": 0.00022173274596182086, "loss": 4.596, "step": 303 }, { "epoch": 0.006705294946300947, "grad_norm": 1.109375, "learning_rate": 0.00022246696035242293, "loss": 4.502, "step": 304 }, { "epoch": 0.006727351837571674, "grad_norm": 1.203125, "learning_rate": 0.00022320117474302497, "loss": 4.6088, "step": 305 }, { "epoch": 0.006749408728842401, "grad_norm": 1.3359375, "learning_rate": 0.00022393538913362702, "loss": 4.5413, "step": 306 }, { "epoch": 0.0067714656201131275, "grad_norm": 1.2109375, "learning_rate": 0.0002246696035242291, "loss": 4.6212, "step": 307 }, { "epoch": 0.006793522511383854, "grad_norm": 0.890625, "learning_rate": 0.00022540381791483113, "loss": 4.5317, "step": 308 }, { "epoch": 0.006815579402654581, "grad_norm": 0.75, "learning_rate": 0.0002261380323054332, "loss": 4.5466, "step": 309 }, { "epoch": 0.006837636293925308, "grad_norm": 0.69921875, "learning_rate": 0.00022687224669603524, "loss": 4.542, "step": 310 }, { "epoch": 0.006859693185196035, "grad_norm": 0.7734375, "learning_rate": 0.00022760646108663728, "loss": 4.4534, "step": 311 }, { "epoch": 0.006881750076466762, "grad_norm": 0.7421875, "learning_rate": 0.00022834067547723935, "loss": 4.5028, "step": 312 }, { "epoch": 0.006903806967737488, "grad_norm": 0.71484375, "learning_rate": 0.00022907488986784142, "loss": 4.5897, "step": 313 }, { "epoch": 0.006925863859008215, "grad_norm": 0.8046875, "learning_rate": 0.00022980910425844347, "loss": 4.5375, "step": 314 }, { "epoch": 0.006947920750278942, "grad_norm": 1.1640625, "learning_rate": 0.0002305433186490455, "loss": 4.5767, "step": 315 }, { "epoch": 0.006969977641549669, "grad_norm": 1.1875, "learning_rate": 0.00023127753303964758, "loss": 4.5085, "step": 316 }, { "epoch": 0.006992034532820396, "grad_norm": 0.95703125, "learning_rate": 0.00023201174743024965, "loss": 4.5616, "step": 317 }, { "epoch": 0.0070140914240911225, "grad_norm": 0.88671875, "learning_rate": 0.0002327459618208517, "loss": 4.524, "step": 318 }, { "epoch": 0.007036148315361849, "grad_norm": 0.8046875, "learning_rate": 0.00023348017621145376, "loss": 4.5379, "step": 319 }, { "epoch": 0.007058205206632576, "grad_norm": 0.80859375, "learning_rate": 0.0002342143906020558, "loss": 4.4451, "step": 320 }, { "epoch": 0.007080262097903303, "grad_norm": 0.7265625, "learning_rate": 0.00023494860499265785, "loss": 4.4615, "step": 321 }, { "epoch": 0.00710231898917403, "grad_norm": 0.72265625, "learning_rate": 0.00023568281938325992, "loss": 4.5016, "step": 322 }, { "epoch": 0.007124375880444757, "grad_norm": 0.74609375, "learning_rate": 0.00023641703377386199, "loss": 4.4429, "step": 323 }, { "epoch": 0.0071464327717154835, "grad_norm": 0.76953125, "learning_rate": 0.00023715124816446403, "loss": 4.4686, "step": 324 }, { "epoch": 0.00716848966298621, "grad_norm": 0.69140625, "learning_rate": 0.00023788546255506607, "loss": 4.4523, "step": 325 }, { "epoch": 0.007190546554256937, "grad_norm": 0.76171875, "learning_rate": 0.00023861967694566814, "loss": 4.4137, "step": 326 }, { "epoch": 0.007212603445527664, "grad_norm": 0.8984375, "learning_rate": 0.00023935389133627018, "loss": 4.5161, "step": 327 }, { "epoch": 0.007234660336798391, "grad_norm": 0.89453125, "learning_rate": 0.00024008810572687225, "loss": 4.4525, "step": 328 }, { "epoch": 0.007256717228069118, "grad_norm": 0.98828125, "learning_rate": 0.00024082232011747432, "loss": 4.4242, "step": 329 }, { "epoch": 0.0072787741193398444, "grad_norm": 1.21875, "learning_rate": 0.00024155653450807637, "loss": 4.3519, "step": 330 }, { "epoch": 0.007300831010610571, "grad_norm": 0.87109375, "learning_rate": 0.0002422907488986784, "loss": 4.4701, "step": 331 }, { "epoch": 0.007322887901881298, "grad_norm": 1.0703125, "learning_rate": 0.00024302496328928048, "loss": 4.3013, "step": 332 }, { "epoch": 0.007344944793152025, "grad_norm": 1.078125, "learning_rate": 0.00024375917767988255, "loss": 4.4216, "step": 333 }, { "epoch": 0.007367001684422752, "grad_norm": 1.15625, "learning_rate": 0.0002444933920704846, "loss": 4.4275, "step": 334 }, { "epoch": 0.0073890585756934786, "grad_norm": 0.92578125, "learning_rate": 0.00024522760646108663, "loss": 4.37, "step": 335 }, { "epoch": 0.0074111154669642045, "grad_norm": 1.2265625, "learning_rate": 0.0002459618208516887, "loss": 4.4537, "step": 336 }, { "epoch": 0.007433172358234931, "grad_norm": 1.078125, "learning_rate": 0.00024669603524229077, "loss": 4.3588, "step": 337 }, { "epoch": 0.007455229249505658, "grad_norm": 0.83984375, "learning_rate": 0.00024743024963289284, "loss": 4.4745, "step": 338 }, { "epoch": 0.007477286140776385, "grad_norm": 0.9765625, "learning_rate": 0.00024816446402349486, "loss": 4.3467, "step": 339 }, { "epoch": 0.007499343032047112, "grad_norm": 1.296875, "learning_rate": 0.00024889867841409693, "loss": 4.4169, "step": 340 }, { "epoch": 0.007521399923317839, "grad_norm": 1.2265625, "learning_rate": 0.000249632892804699, "loss": 4.459, "step": 341 }, { "epoch": 0.0075434568145885655, "grad_norm": 0.82421875, "learning_rate": 0.000250367107195301, "loss": 4.3392, "step": 342 }, { "epoch": 0.007565513705859292, "grad_norm": 0.84765625, "learning_rate": 0.0002511013215859031, "loss": 4.2748, "step": 343 }, { "epoch": 0.007587570597130019, "grad_norm": 0.8203125, "learning_rate": 0.00025183553597650515, "loss": 4.2866, "step": 344 }, { "epoch": 0.007609627488400746, "grad_norm": 0.6953125, "learning_rate": 0.0002525697503671072, "loss": 4.341, "step": 345 }, { "epoch": 0.007631684379671473, "grad_norm": 0.75, "learning_rate": 0.0002533039647577093, "loss": 4.3137, "step": 346 }, { "epoch": 0.0076537412709422, "grad_norm": 0.83203125, "learning_rate": 0.0002540381791483113, "loss": 4.4657, "step": 347 }, { "epoch": 0.007675798162212926, "grad_norm": 0.92578125, "learning_rate": 0.0002547723935389133, "loss": 4.3421, "step": 348 }, { "epoch": 0.007697855053483653, "grad_norm": 1.0, "learning_rate": 0.0002555066079295154, "loss": 4.3822, "step": 349 }, { "epoch": 0.00771991194475438, "grad_norm": 1.1171875, "learning_rate": 0.00025624082232011746, "loss": 4.3309, "step": 350 }, { "epoch": 0.007741968836025107, "grad_norm": 0.9375, "learning_rate": 0.00025697503671071953, "loss": 4.3004, "step": 351 }, { "epoch": 0.007764025727295834, "grad_norm": 0.859375, "learning_rate": 0.0002577092511013216, "loss": 4.3782, "step": 352 }, { "epoch": 0.0077860826185665605, "grad_norm": 0.7578125, "learning_rate": 0.00025844346549192367, "loss": 4.2833, "step": 353 }, { "epoch": 0.007808139509837287, "grad_norm": 1.0703125, "learning_rate": 0.00025917767988252574, "loss": 4.4069, "step": 354 }, { "epoch": 0.007830196401108014, "grad_norm": 1.1796875, "learning_rate": 0.00025991189427312776, "loss": 4.3639, "step": 355 }, { "epoch": 0.007852253292378741, "grad_norm": 0.80859375, "learning_rate": 0.0002606461086637298, "loss": 4.2821, "step": 356 }, { "epoch": 0.007874310183649468, "grad_norm": 0.84765625, "learning_rate": 0.00026138032305433184, "loss": 4.2608, "step": 357 }, { "epoch": 0.007896367074920195, "grad_norm": 0.83984375, "learning_rate": 0.0002621145374449339, "loss": 4.289, "step": 358 }, { "epoch": 0.007918423966190921, "grad_norm": 0.6953125, "learning_rate": 0.000262848751835536, "loss": 4.2043, "step": 359 }, { "epoch": 0.007940480857461648, "grad_norm": 0.66015625, "learning_rate": 0.00026358296622613805, "loss": 4.2015, "step": 360 }, { "epoch": 0.007962537748732375, "grad_norm": 0.640625, "learning_rate": 0.00026431718061674007, "loss": 4.2316, "step": 361 }, { "epoch": 0.007984594640003102, "grad_norm": 0.671875, "learning_rate": 0.00026505139500734214, "loss": 4.2641, "step": 362 }, { "epoch": 0.008006651531273829, "grad_norm": 0.69921875, "learning_rate": 0.0002657856093979442, "loss": 4.1716, "step": 363 }, { "epoch": 0.008028708422544556, "grad_norm": 0.7265625, "learning_rate": 0.0002665198237885463, "loss": 4.1068, "step": 364 }, { "epoch": 0.008050765313815282, "grad_norm": 0.63671875, "learning_rate": 0.00026725403817914835, "loss": 4.2252, "step": 365 }, { "epoch": 0.00807282220508601, "grad_norm": 0.61328125, "learning_rate": 0.00026798825256975036, "loss": 4.2426, "step": 366 }, { "epoch": 0.008094879096356736, "grad_norm": 0.6640625, "learning_rate": 0.00026872246696035243, "loss": 4.1857, "step": 367 }, { "epoch": 0.008116935987627463, "grad_norm": 0.66015625, "learning_rate": 0.00026945668135095445, "loss": 4.1869, "step": 368 }, { "epoch": 0.00813899287889819, "grad_norm": 0.62890625, "learning_rate": 0.0002701908957415565, "loss": 4.2147, "step": 369 }, { "epoch": 0.008161049770168917, "grad_norm": 0.6171875, "learning_rate": 0.0002709251101321586, "loss": 4.3334, "step": 370 }, { "epoch": 0.008183106661439643, "grad_norm": 0.63671875, "learning_rate": 0.00027165932452276066, "loss": 4.2097, "step": 371 }, { "epoch": 0.00820516355271037, "grad_norm": 0.72265625, "learning_rate": 0.0002723935389133627, "loss": 4.1962, "step": 372 }, { "epoch": 0.008227220443981097, "grad_norm": 0.6875, "learning_rate": 0.0002731277533039648, "loss": 4.1785, "step": 373 }, { "epoch": 0.008249277335251824, "grad_norm": 0.73828125, "learning_rate": 0.0002738619676945668, "loss": 4.1153, "step": 374 }, { "epoch": 0.00827133422652255, "grad_norm": 0.9453125, "learning_rate": 0.0002745961820851689, "loss": 4.1065, "step": 375 }, { "epoch": 0.008293391117793278, "grad_norm": 1.2578125, "learning_rate": 0.0002753303964757709, "loss": 4.2205, "step": 376 }, { "epoch": 0.008315448009064004, "grad_norm": 0.9921875, "learning_rate": 0.00027606461086637297, "loss": 4.1689, "step": 377 }, { "epoch": 0.008337504900334731, "grad_norm": 0.8515625, "learning_rate": 0.00027679882525697504, "loss": 4.2652, "step": 378 }, { "epoch": 0.008359561791605458, "grad_norm": 1.109375, "learning_rate": 0.0002775330396475771, "loss": 4.2334, "step": 379 }, { "epoch": 0.008381618682876185, "grad_norm": 1.171875, "learning_rate": 0.0002782672540381792, "loss": 4.1274, "step": 380 }, { "epoch": 0.008403675574146912, "grad_norm": 0.82421875, "learning_rate": 0.0002790014684287812, "loss": 4.1452, "step": 381 }, { "epoch": 0.008425732465417638, "grad_norm": 0.7578125, "learning_rate": 0.00027973568281938326, "loss": 4.1612, "step": 382 }, { "epoch": 0.008447789356688365, "grad_norm": 0.859375, "learning_rate": 0.00028046989720998533, "loss": 4.1586, "step": 383 }, { "epoch": 0.008469846247959092, "grad_norm": 0.9609375, "learning_rate": 0.0002812041116005874, "loss": 4.115, "step": 384 }, { "epoch": 0.008491903139229819, "grad_norm": 0.9453125, "learning_rate": 0.00028193832599118947, "loss": 4.1361, "step": 385 }, { "epoch": 0.008513960030500546, "grad_norm": 1.09375, "learning_rate": 0.0002826725403817915, "loss": 4.106, "step": 386 }, { "epoch": 0.008536016921771273, "grad_norm": 0.890625, "learning_rate": 0.0002834067547723935, "loss": 4.0788, "step": 387 }, { "epoch": 0.008558073813042, "grad_norm": 0.8515625, "learning_rate": 0.00028414096916299557, "loss": 4.2196, "step": 388 }, { "epoch": 0.008580130704312725, "grad_norm": 0.828125, "learning_rate": 0.00028487518355359764, "loss": 4.1695, "step": 389 }, { "epoch": 0.008602187595583451, "grad_norm": 0.73046875, "learning_rate": 0.0002856093979441997, "loss": 4.0784, "step": 390 }, { "epoch": 0.008624244486854178, "grad_norm": 0.6640625, "learning_rate": 0.0002863436123348018, "loss": 4.0353, "step": 391 }, { "epoch": 0.008646301378124905, "grad_norm": 0.71484375, "learning_rate": 0.00028707782672540385, "loss": 4.1199, "step": 392 }, { "epoch": 0.008668358269395632, "grad_norm": 0.8203125, "learning_rate": 0.0002878120411160059, "loss": 4.1328, "step": 393 }, { "epoch": 0.008690415160666359, "grad_norm": 0.73046875, "learning_rate": 0.00028854625550660794, "loss": 4.1741, "step": 394 }, { "epoch": 0.008712472051937085, "grad_norm": 0.62890625, "learning_rate": 0.00028928046989721, "loss": 4.0385, "step": 395 }, { "epoch": 0.008734528943207812, "grad_norm": 0.57421875, "learning_rate": 0.000290014684287812, "loss": 4.128, "step": 396 }, { "epoch": 0.008756585834478539, "grad_norm": 0.6171875, "learning_rate": 0.0002907488986784141, "loss": 4.1042, "step": 397 }, { "epoch": 0.008778642725749266, "grad_norm": 0.578125, "learning_rate": 0.00029148311306901616, "loss": 3.9853, "step": 398 }, { "epoch": 0.008800699617019993, "grad_norm": 0.51953125, "learning_rate": 0.00029221732745961823, "loss": 4.1129, "step": 399 }, { "epoch": 0.00882275650829072, "grad_norm": 0.58984375, "learning_rate": 0.00029295154185022025, "loss": 4.1016, "step": 400 }, { "epoch": 0.008844813399561446, "grad_norm": 0.5703125, "learning_rate": 0.0002936857562408223, "loss": 4.1313, "step": 401 }, { "epoch": 0.008866870290832173, "grad_norm": 0.5546875, "learning_rate": 0.0002944199706314244, "loss": 4.1434, "step": 402 }, { "epoch": 0.0088889271821029, "grad_norm": 0.578125, "learning_rate": 0.00029515418502202645, "loss": 4.0612, "step": 403 }, { "epoch": 0.008910984073373627, "grad_norm": 0.7421875, "learning_rate": 0.0002958883994126285, "loss": 4.0488, "step": 404 }, { "epoch": 0.008933040964644354, "grad_norm": 0.6640625, "learning_rate": 0.00029662261380323054, "loss": 4.1197, "step": 405 }, { "epoch": 0.00895509785591508, "grad_norm": 0.55078125, "learning_rate": 0.0002973568281938326, "loss": 4.1032, "step": 406 }, { "epoch": 0.008977154747185807, "grad_norm": 0.671875, "learning_rate": 0.0002980910425844346, "loss": 4.0156, "step": 407 }, { "epoch": 0.008999211638456534, "grad_norm": 0.78515625, "learning_rate": 0.0002988252569750367, "loss": 4.0009, "step": 408 }, { "epoch": 0.009021268529727261, "grad_norm": 0.94921875, "learning_rate": 0.00029955947136563876, "loss": 4.0848, "step": 409 }, { "epoch": 0.009043325420997988, "grad_norm": 0.92578125, "learning_rate": 0.00030029368575624083, "loss": 4.0344, "step": 410 }, { "epoch": 0.009065382312268715, "grad_norm": 0.76953125, "learning_rate": 0.0003010279001468429, "loss": 4.0146, "step": 411 }, { "epoch": 0.009087439203539441, "grad_norm": 0.85546875, "learning_rate": 0.000301762114537445, "loss": 4.064, "step": 412 }, { "epoch": 0.009109496094810168, "grad_norm": 0.91796875, "learning_rate": 0.000302496328928047, "loss": 4.0834, "step": 413 }, { "epoch": 0.009131552986080895, "grad_norm": 1.0390625, "learning_rate": 0.00030323054331864906, "loss": 3.9925, "step": 414 }, { "epoch": 0.009153609877351622, "grad_norm": 0.8671875, "learning_rate": 0.0003039647577092511, "loss": 4.0487, "step": 415 }, { "epoch": 0.009175666768622349, "grad_norm": 0.8203125, "learning_rate": 0.00030469897209985314, "loss": 4.0501, "step": 416 }, { "epoch": 0.009197723659893076, "grad_norm": 0.796875, "learning_rate": 0.0003054331864904552, "loss": 3.9849, "step": 417 }, { "epoch": 0.009219780551163802, "grad_norm": 0.84375, "learning_rate": 0.0003061674008810573, "loss": 3.9757, "step": 418 }, { "epoch": 0.00924183744243453, "grad_norm": 0.76953125, "learning_rate": 0.00030690161527165935, "loss": 3.9475, "step": 419 }, { "epoch": 0.009263894333705256, "grad_norm": 0.6328125, "learning_rate": 0.00030763582966226137, "loss": 4.0022, "step": 420 }, { "epoch": 0.009285951224975983, "grad_norm": 0.69921875, "learning_rate": 0.00030837004405286344, "loss": 4.0391, "step": 421 }, { "epoch": 0.00930800811624671, "grad_norm": 0.64453125, "learning_rate": 0.0003091042584434655, "loss": 3.9806, "step": 422 }, { "epoch": 0.009330065007517437, "grad_norm": 0.578125, "learning_rate": 0.0003098384728340676, "loss": 4.0429, "step": 423 }, { "epoch": 0.009352121898788163, "grad_norm": 0.5859375, "learning_rate": 0.00031057268722466965, "loss": 4.0391, "step": 424 }, { "epoch": 0.00937417879005889, "grad_norm": 0.58984375, "learning_rate": 0.00031130690161527166, "loss": 3.9352, "step": 425 }, { "epoch": 0.009396235681329617, "grad_norm": 0.61328125, "learning_rate": 0.0003120411160058737, "loss": 3.9711, "step": 426 }, { "epoch": 0.009418292572600344, "grad_norm": 0.5859375, "learning_rate": 0.00031277533039647575, "loss": 4.0075, "step": 427 }, { "epoch": 0.00944034946387107, "grad_norm": 0.59765625, "learning_rate": 0.0003135095447870778, "loss": 3.9589, "step": 428 }, { "epoch": 0.009462406355141798, "grad_norm": 0.5234375, "learning_rate": 0.0003142437591776799, "loss": 4.0364, "step": 429 }, { "epoch": 0.009484463246412524, "grad_norm": 0.546875, "learning_rate": 0.00031497797356828196, "loss": 3.9871, "step": 430 }, { "epoch": 0.009506520137683251, "grad_norm": 0.5859375, "learning_rate": 0.00031571218795888403, "loss": 3.9766, "step": 431 }, { "epoch": 0.009528577028953978, "grad_norm": 0.63671875, "learning_rate": 0.0003164464023494861, "loss": 3.9221, "step": 432 }, { "epoch": 0.009550633920224705, "grad_norm": 0.55859375, "learning_rate": 0.0003171806167400881, "loss": 3.9393, "step": 433 }, { "epoch": 0.009572690811495432, "grad_norm": 0.5390625, "learning_rate": 0.00031791483113069013, "loss": 4.0216, "step": 434 }, { "epoch": 0.009594747702766158, "grad_norm": 0.55078125, "learning_rate": 0.0003186490455212922, "loss": 3.9577, "step": 435 }, { "epoch": 0.009616804594036885, "grad_norm": 0.49609375, "learning_rate": 0.00031938325991189427, "loss": 4.0153, "step": 436 }, { "epoch": 0.009638861485307612, "grad_norm": 0.55078125, "learning_rate": 0.00032011747430249634, "loss": 3.8975, "step": 437 }, { "epoch": 0.009660918376578339, "grad_norm": 0.5625, "learning_rate": 0.0003208516886930984, "loss": 3.9605, "step": 438 }, { "epoch": 0.009682975267849066, "grad_norm": 0.49609375, "learning_rate": 0.0003215859030837004, "loss": 3.9097, "step": 439 }, { "epoch": 0.009705032159119793, "grad_norm": 0.53515625, "learning_rate": 0.0003223201174743025, "loss": 3.9205, "step": 440 }, { "epoch": 0.00972708905039052, "grad_norm": 0.71484375, "learning_rate": 0.00032305433186490456, "loss": 3.9674, "step": 441 }, { "epoch": 0.009749145941661246, "grad_norm": 0.77734375, "learning_rate": 0.00032378854625550663, "loss": 3.9836, "step": 442 }, { "epoch": 0.009771202832931973, "grad_norm": 0.79296875, "learning_rate": 0.0003245227606461087, "loss": 4.0141, "step": 443 }, { "epoch": 0.0097932597242027, "grad_norm": 0.62109375, "learning_rate": 0.0003252569750367107, "loss": 3.9828, "step": 444 }, { "epoch": 0.009815316615473427, "grad_norm": 0.578125, "learning_rate": 0.0003259911894273128, "loss": 3.9415, "step": 445 }, { "epoch": 0.009837373506744154, "grad_norm": 0.6484375, "learning_rate": 0.0003267254038179148, "loss": 3.8966, "step": 446 }, { "epoch": 0.00985943039801488, "grad_norm": 0.6171875, "learning_rate": 0.00032745961820851687, "loss": 4.0387, "step": 447 }, { "epoch": 0.009881487289285607, "grad_norm": 0.609375, "learning_rate": 0.00032819383259911894, "loss": 3.8989, "step": 448 }, { "epoch": 0.009903544180556334, "grad_norm": 0.69921875, "learning_rate": 0.000328928046989721, "loss": 3.9583, "step": 449 }, { "epoch": 0.00992560107182706, "grad_norm": 0.7109375, "learning_rate": 0.0003296622613803231, "loss": 3.9199, "step": 450 }, { "epoch": 0.009947657963097788, "grad_norm": 0.81640625, "learning_rate": 0.00033039647577092515, "loss": 3.9314, "step": 451 }, { "epoch": 0.009969714854368514, "grad_norm": 0.7734375, "learning_rate": 0.00033113069016152717, "loss": 3.8986, "step": 452 }, { "epoch": 0.009991771745639241, "grad_norm": 0.77734375, "learning_rate": 0.00033186490455212924, "loss": 3.9714, "step": 453 }, { "epoch": 0.010013828636909968, "grad_norm": 0.78515625, "learning_rate": 0.00033259911894273125, "loss": 3.888, "step": 454 }, { "epoch": 0.010035885528180695, "grad_norm": 0.7109375, "learning_rate": 0.0003333333333333333, "loss": 3.968, "step": 455 }, { "epoch": 0.010057942419451422, "grad_norm": 0.8046875, "learning_rate": 0.0003340675477239354, "loss": 3.9217, "step": 456 }, { "epoch": 0.010079999310722149, "grad_norm": 0.6484375, "learning_rate": 0.00033480176211453746, "loss": 3.8972, "step": 457 }, { "epoch": 0.010102056201992875, "grad_norm": 0.63671875, "learning_rate": 0.00033553597650513953, "loss": 3.827, "step": 458 }, { "epoch": 0.0101241130932636, "grad_norm": 0.70703125, "learning_rate": 0.00033627019089574155, "loss": 3.9624, "step": 459 }, { "epoch": 0.010146169984534327, "grad_norm": 0.5859375, "learning_rate": 0.0003370044052863436, "loss": 3.8425, "step": 460 }, { "epoch": 0.010168226875805054, "grad_norm": 0.58984375, "learning_rate": 0.0003377386196769457, "loss": 3.9249, "step": 461 }, { "epoch": 0.010190283767075781, "grad_norm": 0.5625, "learning_rate": 0.00033847283406754776, "loss": 3.9476, "step": 462 }, { "epoch": 0.010212340658346508, "grad_norm": 0.625, "learning_rate": 0.0003392070484581498, "loss": 3.872, "step": 463 }, { "epoch": 0.010234397549617235, "grad_norm": 0.486328125, "learning_rate": 0.00033994126284875184, "loss": 3.797, "step": 464 }, { "epoch": 0.010256454440887961, "grad_norm": 0.5390625, "learning_rate": 0.00034067547723935386, "loss": 3.9119, "step": 465 }, { "epoch": 0.010278511332158688, "grad_norm": 0.57421875, "learning_rate": 0.0003414096916299559, "loss": 3.9151, "step": 466 }, { "epoch": 0.010300568223429415, "grad_norm": 0.6328125, "learning_rate": 0.000342143906020558, "loss": 3.7625, "step": 467 }, { "epoch": 0.010322625114700142, "grad_norm": 0.76953125, "learning_rate": 0.00034287812041116007, "loss": 3.8558, "step": 468 }, { "epoch": 0.010344682005970869, "grad_norm": 0.7109375, "learning_rate": 0.00034361233480176214, "loss": 3.9922, "step": 469 }, { "epoch": 0.010366738897241596, "grad_norm": 0.62890625, "learning_rate": 0.0003443465491923642, "loss": 3.8405, "step": 470 }, { "epoch": 0.010388795788512322, "grad_norm": 0.6796875, "learning_rate": 0.0003450807635829663, "loss": 3.9655, "step": 471 }, { "epoch": 0.01041085267978305, "grad_norm": 0.63671875, "learning_rate": 0.0003458149779735683, "loss": 3.8625, "step": 472 }, { "epoch": 0.010432909571053776, "grad_norm": 0.62890625, "learning_rate": 0.0003465491923641703, "loss": 3.844, "step": 473 }, { "epoch": 0.010454966462324503, "grad_norm": 0.59765625, "learning_rate": 0.0003472834067547724, "loss": 3.8431, "step": 474 }, { "epoch": 0.01047702335359523, "grad_norm": 0.490234375, "learning_rate": 0.00034801762114537445, "loss": 3.8544, "step": 475 }, { "epoch": 0.010499080244865957, "grad_norm": 0.4453125, "learning_rate": 0.0003487518355359765, "loss": 3.7682, "step": 476 }, { "epoch": 0.010521137136136683, "grad_norm": 0.48828125, "learning_rate": 0.0003494860499265786, "loss": 3.8169, "step": 477 }, { "epoch": 0.01054319402740741, "grad_norm": 0.5546875, "learning_rate": 0.0003502202643171806, "loss": 3.839, "step": 478 }, { "epoch": 0.010565250918678137, "grad_norm": 0.5546875, "learning_rate": 0.00035095447870778267, "loss": 3.8427, "step": 479 }, { "epoch": 0.010587307809948864, "grad_norm": 0.58203125, "learning_rate": 0.00035168869309838474, "loss": 3.8443, "step": 480 }, { "epoch": 0.01060936470121959, "grad_norm": 0.5625, "learning_rate": 0.0003524229074889868, "loss": 3.8176, "step": 481 }, { "epoch": 0.010631421592490317, "grad_norm": 0.58203125, "learning_rate": 0.0003531571218795889, "loss": 3.8471, "step": 482 }, { "epoch": 0.010653478483761044, "grad_norm": 0.53125, "learning_rate": 0.0003538913362701909, "loss": 3.7803, "step": 483 }, { "epoch": 0.010675535375031771, "grad_norm": 0.5859375, "learning_rate": 0.00035462555066079297, "loss": 3.7913, "step": 484 }, { "epoch": 0.010697592266302498, "grad_norm": 0.63671875, "learning_rate": 0.000355359765051395, "loss": 3.8698, "step": 485 }, { "epoch": 0.010719649157573225, "grad_norm": 0.58203125, "learning_rate": 0.00035609397944199705, "loss": 3.8499, "step": 486 }, { "epoch": 0.010741706048843952, "grad_norm": 0.498046875, "learning_rate": 0.0003568281938325991, "loss": 3.7538, "step": 487 }, { "epoch": 0.010763762940114678, "grad_norm": 0.65234375, "learning_rate": 0.0003575624082232012, "loss": 3.7788, "step": 488 }, { "epoch": 0.010785819831385405, "grad_norm": 0.69921875, "learning_rate": 0.00035829662261380326, "loss": 3.8521, "step": 489 }, { "epoch": 0.010807876722656132, "grad_norm": 0.5625, "learning_rate": 0.00035903083700440533, "loss": 3.7454, "step": 490 }, { "epoch": 0.010829933613926859, "grad_norm": 0.49609375, "learning_rate": 0.00035976505139500735, "loss": 3.7143, "step": 491 }, { "epoch": 0.010851990505197586, "grad_norm": 0.55078125, "learning_rate": 0.0003604992657856094, "loss": 3.737, "step": 492 }, { "epoch": 0.010874047396468313, "grad_norm": 0.63671875, "learning_rate": 0.00036123348017621143, "loss": 3.8137, "step": 493 }, { "epoch": 0.01089610428773904, "grad_norm": 0.64453125, "learning_rate": 0.0003619676945668135, "loss": 3.7559, "step": 494 }, { "epoch": 0.010918161179009766, "grad_norm": 0.51953125, "learning_rate": 0.00036270190895741557, "loss": 3.8143, "step": 495 }, { "epoch": 0.010940218070280493, "grad_norm": 0.4921875, "learning_rate": 0.00036343612334801764, "loss": 3.7696, "step": 496 }, { "epoch": 0.01096227496155122, "grad_norm": 0.470703125, "learning_rate": 0.0003641703377386197, "loss": 3.7482, "step": 497 }, { "epoch": 0.010984331852821947, "grad_norm": 0.498046875, "learning_rate": 0.0003649045521292217, "loss": 3.8063, "step": 498 }, { "epoch": 0.011006388744092674, "grad_norm": 0.486328125, "learning_rate": 0.0003656387665198238, "loss": 3.7613, "step": 499 }, { "epoch": 0.0110284456353634, "grad_norm": 0.482421875, "learning_rate": 0.00036637298091042586, "loss": 3.7548, "step": 500 }, { "epoch": 0.011050502526634127, "grad_norm": 0.44921875, "learning_rate": 0.00036710719530102793, "loss": 3.8576, "step": 501 }, { "epoch": 0.011072559417904854, "grad_norm": 0.44140625, "learning_rate": 0.00036784140969163, "loss": 3.7253, "step": 502 }, { "epoch": 0.01109461630917558, "grad_norm": 0.53125, "learning_rate": 0.000368575624082232, "loss": 3.7582, "step": 503 }, { "epoch": 0.011116673200446308, "grad_norm": 0.53125, "learning_rate": 0.00036930983847283404, "loss": 3.6852, "step": 504 }, { "epoch": 0.011138730091717034, "grad_norm": 0.447265625, "learning_rate": 0.0003700440528634361, "loss": 3.7848, "step": 505 }, { "epoch": 0.011160786982987761, "grad_norm": 0.458984375, "learning_rate": 0.0003707782672540382, "loss": 3.6714, "step": 506 }, { "epoch": 0.011182843874258488, "grad_norm": 0.478515625, "learning_rate": 0.00037151248164464024, "loss": 3.7402, "step": 507 }, { "epoch": 0.011204900765529215, "grad_norm": 0.484375, "learning_rate": 0.0003722466960352423, "loss": 3.8247, "step": 508 }, { "epoch": 0.011226957656799942, "grad_norm": 0.494140625, "learning_rate": 0.0003729809104258444, "loss": 3.7703, "step": 509 }, { "epoch": 0.011249014548070669, "grad_norm": 0.45703125, "learning_rate": 0.00037371512481644645, "loss": 3.8114, "step": 510 }, { "epoch": 0.011271071439341395, "grad_norm": 0.453125, "learning_rate": 0.00037444933920704847, "loss": 3.7435, "step": 511 }, { "epoch": 0.011293128330612122, "grad_norm": 0.49609375, "learning_rate": 0.0003751835535976505, "loss": 3.7998, "step": 512 }, { "epoch": 0.011315185221882849, "grad_norm": 0.5078125, "learning_rate": 0.00037591776798825255, "loss": 3.7481, "step": 513 }, { "epoch": 0.011337242113153576, "grad_norm": 0.4921875, "learning_rate": 0.0003766519823788546, "loss": 3.7254, "step": 514 }, { "epoch": 0.011359299004424303, "grad_norm": 0.486328125, "learning_rate": 0.0003773861967694567, "loss": 3.7595, "step": 515 }, { "epoch": 0.01138135589569503, "grad_norm": 0.462890625, "learning_rate": 0.00037812041116005876, "loss": 3.7963, "step": 516 }, { "epoch": 0.011403412786965756, "grad_norm": 0.494140625, "learning_rate": 0.0003788546255506608, "loss": 3.7327, "step": 517 }, { "epoch": 0.011425469678236483, "grad_norm": 0.484375, "learning_rate": 0.00037958883994126285, "loss": 3.7817, "step": 518 }, { "epoch": 0.01144752656950721, "grad_norm": 0.5, "learning_rate": 0.0003803230543318649, "loss": 3.6731, "step": 519 }, { "epoch": 0.011469583460777937, "grad_norm": 0.54296875, "learning_rate": 0.000381057268722467, "loss": 3.7333, "step": 520 }, { "epoch": 0.011491640352048664, "grad_norm": 0.546875, "learning_rate": 0.00038179148311306906, "loss": 3.6446, "step": 521 }, { "epoch": 0.01151369724331939, "grad_norm": 0.5390625, "learning_rate": 0.0003825256975036711, "loss": 3.6608, "step": 522 }, { "epoch": 0.011535754134590117, "grad_norm": 0.5703125, "learning_rate": 0.00038325991189427314, "loss": 3.6931, "step": 523 }, { "epoch": 0.011557811025860844, "grad_norm": 0.5, "learning_rate": 0.00038399412628487516, "loss": 3.673, "step": 524 }, { "epoch": 0.011579867917131571, "grad_norm": 0.515625, "learning_rate": 0.00038472834067547723, "loss": 3.6981, "step": 525 }, { "epoch": 0.011601924808402298, "grad_norm": 0.56640625, "learning_rate": 0.0003854625550660793, "loss": 3.6582, "step": 526 }, { "epoch": 0.011623981699673025, "grad_norm": 0.64453125, "learning_rate": 0.00038619676945668137, "loss": 3.6819, "step": 527 }, { "epoch": 0.011646038590943751, "grad_norm": 0.70703125, "learning_rate": 0.00038693098384728344, "loss": 3.6961, "step": 528 }, { "epoch": 0.011668095482214478, "grad_norm": 0.66015625, "learning_rate": 0.0003876651982378855, "loss": 3.7037, "step": 529 }, { "epoch": 0.011690152373485203, "grad_norm": 0.84765625, "learning_rate": 0.0003883994126284875, "loss": 3.5962, "step": 530 }, { "epoch": 0.01171220926475593, "grad_norm": 1.0, "learning_rate": 0.0003891336270190896, "loss": 3.6984, "step": 531 }, { "epoch": 0.011734266156026657, "grad_norm": 1.0078125, "learning_rate": 0.0003898678414096916, "loss": 3.7039, "step": 532 }, { "epoch": 0.011756323047297384, "grad_norm": 0.82421875, "learning_rate": 0.0003906020558002937, "loss": 3.7629, "step": 533 }, { "epoch": 0.01177837993856811, "grad_norm": 1.0625, "learning_rate": 0.00039133627019089575, "loss": 3.7325, "step": 534 }, { "epoch": 0.011800436829838837, "grad_norm": 1.1875, "learning_rate": 0.0003920704845814978, "loss": 3.6959, "step": 535 }, { "epoch": 0.011822493721109564, "grad_norm": 1.046875, "learning_rate": 0.0003928046989720999, "loss": 3.7176, "step": 536 }, { "epoch": 0.011844550612380291, "grad_norm": 0.828125, "learning_rate": 0.0003935389133627019, "loss": 3.7211, "step": 537 }, { "epoch": 0.011866607503651018, "grad_norm": 0.98046875, "learning_rate": 0.00039427312775330397, "loss": 3.6654, "step": 538 }, { "epoch": 0.011888664394921745, "grad_norm": 1.0, "learning_rate": 0.00039500734214390604, "loss": 3.7114, "step": 539 }, { "epoch": 0.011910721286192472, "grad_norm": 1.09375, "learning_rate": 0.0003957415565345081, "loss": 3.5856, "step": 540 }, { "epoch": 0.011932778177463198, "grad_norm": 0.8046875, "learning_rate": 0.00039647577092511013, "loss": 3.7063, "step": 541 }, { "epoch": 0.011954835068733925, "grad_norm": 0.91015625, "learning_rate": 0.0003972099853157122, "loss": 3.7159, "step": 542 }, { "epoch": 0.011976891960004652, "grad_norm": 0.93359375, "learning_rate": 0.0003979441997063142, "loss": 3.6617, "step": 543 }, { "epoch": 0.011998948851275379, "grad_norm": 0.921875, "learning_rate": 0.0003986784140969163, "loss": 3.6768, "step": 544 }, { "epoch": 0.012021005742546106, "grad_norm": 1.328125, "learning_rate": 0.00039941262848751835, "loss": 3.7763, "step": 545 }, { "epoch": 0.012043062633816833, "grad_norm": 0.98828125, "learning_rate": 0.0004001468428781204, "loss": 3.7553, "step": 546 }, { "epoch": 0.01206511952508756, "grad_norm": 0.890625, "learning_rate": 0.0004008810572687225, "loss": 3.6826, "step": 547 }, { "epoch": 0.012087176416358286, "grad_norm": 0.80859375, "learning_rate": 0.00040161527165932456, "loss": 3.783, "step": 548 }, { "epoch": 0.012109233307629013, "grad_norm": 0.9453125, "learning_rate": 0.00040234948604992663, "loss": 3.647, "step": 549 }, { "epoch": 0.01213129019889974, "grad_norm": 0.97265625, "learning_rate": 0.00040308370044052865, "loss": 3.6847, "step": 550 }, { "epoch": 0.012153347090170467, "grad_norm": 0.95703125, "learning_rate": 0.00040381791483113066, "loss": 3.7068, "step": 551 }, { "epoch": 0.012175403981441194, "grad_norm": 0.7421875, "learning_rate": 0.00040455212922173273, "loss": 3.6775, "step": 552 }, { "epoch": 0.01219746087271192, "grad_norm": 0.6328125, "learning_rate": 0.0004052863436123348, "loss": 3.6628, "step": 553 }, { "epoch": 0.012219517763982647, "grad_norm": 0.6328125, "learning_rate": 0.00040602055800293687, "loss": 3.618, "step": 554 }, { "epoch": 0.012241574655253374, "grad_norm": 0.59375, "learning_rate": 0.00040675477239353894, "loss": 3.5528, "step": 555 }, { "epoch": 0.0122636315465241, "grad_norm": 0.443359375, "learning_rate": 0.00040748898678414096, "loss": 3.6801, "step": 556 }, { "epoch": 0.012285688437794828, "grad_norm": 0.4765625, "learning_rate": 0.000408223201174743, "loss": 3.5528, "step": 557 }, { "epoch": 0.012307745329065554, "grad_norm": 0.412109375, "learning_rate": 0.0004089574155653451, "loss": 3.6885, "step": 558 }, { "epoch": 0.012329802220336281, "grad_norm": 0.37109375, "learning_rate": 0.00040969162995594717, "loss": 3.6048, "step": 559 }, { "epoch": 0.012351859111607008, "grad_norm": 0.408203125, "learning_rate": 0.00041042584434654924, "loss": 3.6117, "step": 560 }, { "epoch": 0.012373916002877735, "grad_norm": 0.353515625, "learning_rate": 0.00041116005873715125, "loss": 3.6365, "step": 561 }, { "epoch": 0.012395972894148462, "grad_norm": 0.37109375, "learning_rate": 0.00041189427312775327, "loss": 3.6174, "step": 562 }, { "epoch": 0.012418029785419189, "grad_norm": 0.3828125, "learning_rate": 0.00041262848751835534, "loss": 3.6257, "step": 563 }, { "epoch": 0.012440086676689915, "grad_norm": 0.373046875, "learning_rate": 0.0004133627019089574, "loss": 3.5553, "step": 564 }, { "epoch": 0.012462143567960642, "grad_norm": 0.353515625, "learning_rate": 0.0004140969162995595, "loss": 3.6233, "step": 565 }, { "epoch": 0.012484200459231369, "grad_norm": 0.359375, "learning_rate": 0.00041483113069016155, "loss": 3.6455, "step": 566 }, { "epoch": 0.012506257350502096, "grad_norm": 0.357421875, "learning_rate": 0.0004155653450807636, "loss": 3.6146, "step": 567 }, { "epoch": 0.012528314241772823, "grad_norm": 0.38671875, "learning_rate": 0.0004162995594713657, "loss": 3.5235, "step": 568 }, { "epoch": 0.01255037113304355, "grad_norm": 0.330078125, "learning_rate": 0.0004170337738619677, "loss": 3.4837, "step": 569 }, { "epoch": 0.012572428024314276, "grad_norm": 0.361328125, "learning_rate": 0.00041776798825256977, "loss": 3.5487, "step": 570 }, { "epoch": 0.012594484915585003, "grad_norm": 0.369140625, "learning_rate": 0.0004185022026431718, "loss": 3.5253, "step": 571 }, { "epoch": 0.01261654180685573, "grad_norm": 0.349609375, "learning_rate": 0.00041923641703377386, "loss": 3.514, "step": 572 }, { "epoch": 0.012638598698126457, "grad_norm": 0.35546875, "learning_rate": 0.0004199706314243759, "loss": 3.5045, "step": 573 }, { "epoch": 0.012660655589397184, "grad_norm": 0.3359375, "learning_rate": 0.000420704845814978, "loss": 3.5599, "step": 574 }, { "epoch": 0.01268271248066791, "grad_norm": 0.35546875, "learning_rate": 0.00042143906020558, "loss": 3.5605, "step": 575 }, { "epoch": 0.012704769371938637, "grad_norm": 0.34765625, "learning_rate": 0.0004221732745961821, "loss": 3.6041, "step": 576 }, { "epoch": 0.012726826263209364, "grad_norm": 0.341796875, "learning_rate": 0.00042290748898678415, "loss": 3.4277, "step": 577 }, { "epoch": 0.012748883154480091, "grad_norm": 0.361328125, "learning_rate": 0.0004236417033773862, "loss": 3.5717, "step": 578 }, { "epoch": 0.012770940045750818, "grad_norm": 0.361328125, "learning_rate": 0.0004243759177679883, "loss": 3.4844, "step": 579 }, { "epoch": 0.012792996937021545, "grad_norm": 0.37109375, "learning_rate": 0.0004251101321585903, "loss": 3.5025, "step": 580 }, { "epoch": 0.012815053828292271, "grad_norm": 0.353515625, "learning_rate": 0.0004258443465491924, "loss": 3.5135, "step": 581 }, { "epoch": 0.012837110719562998, "grad_norm": 0.361328125, "learning_rate": 0.0004265785609397944, "loss": 3.5852, "step": 582 }, { "epoch": 0.012859167610833725, "grad_norm": 0.39453125, "learning_rate": 0.00042731277533039646, "loss": 3.5151, "step": 583 }, { "epoch": 0.012881224502104452, "grad_norm": 0.361328125, "learning_rate": 0.00042804698972099853, "loss": 3.5384, "step": 584 }, { "epoch": 0.012903281393375179, "grad_norm": 0.388671875, "learning_rate": 0.0004287812041116006, "loss": 3.4602, "step": 585 }, { "epoch": 0.012925338284645906, "grad_norm": 0.37890625, "learning_rate": 0.00042951541850220267, "loss": 3.3667, "step": 586 }, { "epoch": 0.012947395175916632, "grad_norm": 0.34765625, "learning_rate": 0.00043024963289280474, "loss": 3.4728, "step": 587 }, { "epoch": 0.01296945206718736, "grad_norm": 0.337890625, "learning_rate": 0.0004309838472834068, "loss": 3.4695, "step": 588 }, { "epoch": 0.012991508958458086, "grad_norm": 0.34765625, "learning_rate": 0.0004317180616740088, "loss": 3.4528, "step": 589 }, { "epoch": 0.013013565849728813, "grad_norm": 0.35546875, "learning_rate": 0.00043245227606461084, "loss": 3.5245, "step": 590 }, { "epoch": 0.01303562274099954, "grad_norm": 0.322265625, "learning_rate": 0.0004331864904552129, "loss": 3.4359, "step": 591 }, { "epoch": 0.013057679632270267, "grad_norm": 0.341796875, "learning_rate": 0.000433920704845815, "loss": 3.4656, "step": 592 }, { "epoch": 0.013079736523540993, "grad_norm": 0.33984375, "learning_rate": 0.00043465491923641705, "loss": 3.4329, "step": 593 }, { "epoch": 0.01310179341481172, "grad_norm": 0.35546875, "learning_rate": 0.0004353891336270191, "loss": 3.5249, "step": 594 }, { "epoch": 0.013123850306082447, "grad_norm": 0.33203125, "learning_rate": 0.00043612334801762114, "loss": 3.4651, "step": 595 }, { "epoch": 0.013145907197353174, "grad_norm": 0.341796875, "learning_rate": 0.0004368575624082232, "loss": 3.4611, "step": 596 }, { "epoch": 0.0131679640886239, "grad_norm": 0.34375, "learning_rate": 0.0004375917767988253, "loss": 3.4339, "step": 597 }, { "epoch": 0.013190020979894627, "grad_norm": 0.35546875, "learning_rate": 0.00043832599118942734, "loss": 3.4939, "step": 598 }, { "epoch": 0.013212077871165354, "grad_norm": 0.361328125, "learning_rate": 0.0004390602055800294, "loss": 3.3428, "step": 599 }, { "epoch": 0.013234134762436081, "grad_norm": 0.361328125, "learning_rate": 0.00043979441997063143, "loss": 3.4028, "step": 600 }, { "epoch": 0.013256191653706806, "grad_norm": 0.34375, "learning_rate": 0.00044052863436123345, "loss": 3.3552, "step": 601 }, { "epoch": 0.013278248544977533, "grad_norm": 0.35546875, "learning_rate": 0.0004412628487518355, "loss": 3.4203, "step": 602 }, { "epoch": 0.01330030543624826, "grad_norm": 0.349609375, "learning_rate": 0.0004419970631424376, "loss": 3.4737, "step": 603 }, { "epoch": 0.013322362327518987, "grad_norm": 0.35546875, "learning_rate": 0.00044273127753303965, "loss": 3.3603, "step": 604 }, { "epoch": 0.013344419218789714, "grad_norm": 0.353515625, "learning_rate": 0.0004434654919236417, "loss": 3.3239, "step": 605 }, { "epoch": 0.01336647611006044, "grad_norm": 0.353515625, "learning_rate": 0.0004441997063142438, "loss": 3.442, "step": 606 }, { "epoch": 0.013388533001331167, "grad_norm": 0.361328125, "learning_rate": 0.00044493392070484586, "loss": 3.3946, "step": 607 }, { "epoch": 0.013410589892601894, "grad_norm": 0.357421875, "learning_rate": 0.0004456681350954479, "loss": 3.4329, "step": 608 }, { "epoch": 0.01343264678387262, "grad_norm": 0.373046875, "learning_rate": 0.00044640234948604995, "loss": 3.4629, "step": 609 }, { "epoch": 0.013454703675143348, "grad_norm": 0.361328125, "learning_rate": 0.00044713656387665196, "loss": 3.3732, "step": 610 }, { "epoch": 0.013476760566414074, "grad_norm": 0.373046875, "learning_rate": 0.00044787077826725403, "loss": 3.3808, "step": 611 }, { "epoch": 0.013498817457684801, "grad_norm": 0.41796875, "learning_rate": 0.0004486049926578561, "loss": 3.4752, "step": 612 }, { "epoch": 0.013520874348955528, "grad_norm": 0.384765625, "learning_rate": 0.0004493392070484582, "loss": 3.4366, "step": 613 }, { "epoch": 0.013542931240226255, "grad_norm": 0.36328125, "learning_rate": 0.0004500734214390602, "loss": 3.3842, "step": 614 }, { "epoch": 0.013564988131496982, "grad_norm": 0.390625, "learning_rate": 0.00045080763582966226, "loss": 3.4059, "step": 615 }, { "epoch": 0.013587045022767709, "grad_norm": 0.361328125, "learning_rate": 0.00045154185022026433, "loss": 3.4776, "step": 616 }, { "epoch": 0.013609101914038435, "grad_norm": 0.36328125, "learning_rate": 0.0004522760646108664, "loss": 3.4379, "step": 617 }, { "epoch": 0.013631158805309162, "grad_norm": 0.375, "learning_rate": 0.00045301027900146847, "loss": 3.4618, "step": 618 }, { "epoch": 0.013653215696579889, "grad_norm": 0.359375, "learning_rate": 0.0004537444933920705, "loss": 3.3678, "step": 619 }, { "epoch": 0.013675272587850616, "grad_norm": 0.357421875, "learning_rate": 0.00045447870778267255, "loss": 3.3342, "step": 620 }, { "epoch": 0.013697329479121343, "grad_norm": 0.384765625, "learning_rate": 0.00045521292217327457, "loss": 3.3359, "step": 621 }, { "epoch": 0.01371938637039207, "grad_norm": 0.357421875, "learning_rate": 0.00045594713656387664, "loss": 3.3665, "step": 622 }, { "epoch": 0.013741443261662796, "grad_norm": 0.34765625, "learning_rate": 0.0004566813509544787, "loss": 3.3785, "step": 623 }, { "epoch": 0.013763500152933523, "grad_norm": 0.4140625, "learning_rate": 0.0004574155653450808, "loss": 3.4284, "step": 624 }, { "epoch": 0.01378555704420425, "grad_norm": 0.404296875, "learning_rate": 0.00045814977973568285, "loss": 3.45, "step": 625 }, { "epoch": 0.013807613935474977, "grad_norm": 0.408203125, "learning_rate": 0.0004588839941262849, "loss": 3.4998, "step": 626 }, { "epoch": 0.013829670826745704, "grad_norm": 0.357421875, "learning_rate": 0.00045961820851688693, "loss": 3.3147, "step": 627 }, { "epoch": 0.01385172771801643, "grad_norm": 0.373046875, "learning_rate": 0.000460352422907489, "loss": 3.4098, "step": 628 }, { "epoch": 0.013873784609287157, "grad_norm": 0.357421875, "learning_rate": 0.000461086637298091, "loss": 3.4197, "step": 629 }, { "epoch": 0.013895841500557884, "grad_norm": 0.341796875, "learning_rate": 0.0004618208516886931, "loss": 3.399, "step": 630 }, { "epoch": 0.013917898391828611, "grad_norm": 0.3515625, "learning_rate": 0.00046255506607929516, "loss": 3.4024, "step": 631 }, { "epoch": 0.013939955283099338, "grad_norm": 0.3828125, "learning_rate": 0.00046328928046989723, "loss": 3.3698, "step": 632 }, { "epoch": 0.013962012174370065, "grad_norm": 0.44140625, "learning_rate": 0.0004640234948604993, "loss": 3.3908, "step": 633 }, { "epoch": 0.013984069065640791, "grad_norm": 0.404296875, "learning_rate": 0.0004647577092511013, "loss": 3.4387, "step": 634 }, { "epoch": 0.014006125956911518, "grad_norm": 0.37109375, "learning_rate": 0.0004654919236417034, "loss": 3.4145, "step": 635 }, { "epoch": 0.014028182848182245, "grad_norm": 0.427734375, "learning_rate": 0.00046622613803230545, "loss": 3.3629, "step": 636 }, { "epoch": 0.014050239739452972, "grad_norm": 0.380859375, "learning_rate": 0.0004669603524229075, "loss": 3.3445, "step": 637 }, { "epoch": 0.014072296630723699, "grad_norm": 0.357421875, "learning_rate": 0.0004676945668135096, "loss": 3.4073, "step": 638 }, { "epoch": 0.014094353521994426, "grad_norm": 0.375, "learning_rate": 0.0004684287812041116, "loss": 3.3341, "step": 639 }, { "epoch": 0.014116410413265152, "grad_norm": 0.421875, "learning_rate": 0.0004691629955947136, "loss": 3.3365, "step": 640 }, { "epoch": 0.01413846730453588, "grad_norm": 0.384765625, "learning_rate": 0.0004698972099853157, "loss": 3.3481, "step": 641 }, { "epoch": 0.014160524195806606, "grad_norm": 0.353515625, "learning_rate": 0.00047063142437591776, "loss": 3.3786, "step": 642 }, { "epoch": 0.014182581087077333, "grad_norm": 0.3359375, "learning_rate": 0.00047136563876651983, "loss": 3.3064, "step": 643 }, { "epoch": 0.01420463797834806, "grad_norm": 0.37890625, "learning_rate": 0.0004720998531571219, "loss": 3.4278, "step": 644 }, { "epoch": 0.014226694869618787, "grad_norm": 0.35546875, "learning_rate": 0.00047283406754772397, "loss": 3.3983, "step": 645 }, { "epoch": 0.014248751760889513, "grad_norm": 0.353515625, "learning_rate": 0.00047356828193832604, "loss": 3.3879, "step": 646 }, { "epoch": 0.01427080865216024, "grad_norm": 0.36328125, "learning_rate": 0.00047430249632892806, "loss": 3.3581, "step": 647 }, { "epoch": 0.014292865543430967, "grad_norm": 0.38671875, "learning_rate": 0.00047503671071953007, "loss": 3.3765, "step": 648 }, { "epoch": 0.014314922434701694, "grad_norm": 0.35546875, "learning_rate": 0.00047577092511013214, "loss": 3.3051, "step": 649 }, { "epoch": 0.01433697932597242, "grad_norm": 0.365234375, "learning_rate": 0.0004765051395007342, "loss": 3.2859, "step": 650 }, { "epoch": 0.014359036217243147, "grad_norm": 0.3359375, "learning_rate": 0.0004772393538913363, "loss": 3.3245, "step": 651 }, { "epoch": 0.014381093108513874, "grad_norm": 0.33203125, "learning_rate": 0.00047797356828193835, "loss": 3.3084, "step": 652 }, { "epoch": 0.014403149999784601, "grad_norm": 0.37109375, "learning_rate": 0.00047870778267254037, "loss": 3.3675, "step": 653 }, { "epoch": 0.014425206891055328, "grad_norm": 0.3671875, "learning_rate": 0.00047944199706314244, "loss": 3.3933, "step": 654 }, { "epoch": 0.014447263782326055, "grad_norm": 0.38671875, "learning_rate": 0.0004801762114537445, "loss": 3.2685, "step": 655 }, { "epoch": 0.014469320673596782, "grad_norm": 0.39453125, "learning_rate": 0.0004809104258443466, "loss": 3.2747, "step": 656 }, { "epoch": 0.014491377564867508, "grad_norm": 0.33984375, "learning_rate": 0.00048164464023494865, "loss": 3.2752, "step": 657 }, { "epoch": 0.014513434456138235, "grad_norm": 0.333984375, "learning_rate": 0.00048237885462555066, "loss": 3.2821, "step": 658 }, { "epoch": 0.014535491347408962, "grad_norm": 0.35546875, "learning_rate": 0.00048311306901615273, "loss": 3.275, "step": 659 }, { "epoch": 0.014557548238679689, "grad_norm": 0.357421875, "learning_rate": 0.00048384728340675475, "loss": 3.2665, "step": 660 }, { "epoch": 0.014579605129950416, "grad_norm": 0.357421875, "learning_rate": 0.0004845814977973568, "loss": 3.3098, "step": 661 }, { "epoch": 0.014601662021221143, "grad_norm": 0.37890625, "learning_rate": 0.0004853157121879589, "loss": 3.4244, "step": 662 }, { "epoch": 0.01462371891249187, "grad_norm": 0.40234375, "learning_rate": 0.00048604992657856096, "loss": 3.3887, "step": 663 }, { "epoch": 0.014645775803762596, "grad_norm": 0.384765625, "learning_rate": 0.000486784140969163, "loss": 3.2746, "step": 664 }, { "epoch": 0.014667832695033323, "grad_norm": 0.33203125, "learning_rate": 0.0004875183553597651, "loss": 3.3319, "step": 665 }, { "epoch": 0.01468988958630405, "grad_norm": 0.384765625, "learning_rate": 0.0004882525697503671, "loss": 3.3167, "step": 666 }, { "epoch": 0.014711946477574777, "grad_norm": 0.3671875, "learning_rate": 0.0004889867841409692, "loss": 3.3902, "step": 667 }, { "epoch": 0.014734003368845503, "grad_norm": 0.3671875, "learning_rate": 0.0004897209985315713, "loss": 3.3157, "step": 668 }, { "epoch": 0.01475606026011623, "grad_norm": 0.333984375, "learning_rate": 0.0004904552129221733, "loss": 3.3596, "step": 669 }, { "epoch": 0.014778117151386957, "grad_norm": 0.3671875, "learning_rate": 0.0004911894273127753, "loss": 3.3497, "step": 670 }, { "epoch": 0.014800174042657682, "grad_norm": 0.3515625, "learning_rate": 0.0004919236417033774, "loss": 3.2671, "step": 671 }, { "epoch": 0.014822230933928409, "grad_norm": 0.337890625, "learning_rate": 0.0004926578560939794, "loss": 3.3384, "step": 672 }, { "epoch": 0.014844287825199136, "grad_norm": 0.400390625, "learning_rate": 0.0004933920704845815, "loss": 3.2762, "step": 673 }, { "epoch": 0.014866344716469863, "grad_norm": 0.396484375, "learning_rate": 0.0004941262848751836, "loss": 3.2396, "step": 674 }, { "epoch": 0.01488840160774059, "grad_norm": 0.349609375, "learning_rate": 0.0004948604992657857, "loss": 3.2912, "step": 675 }, { "epoch": 0.014910458499011316, "grad_norm": 0.34375, "learning_rate": 0.0004955947136563877, "loss": 3.3264, "step": 676 }, { "epoch": 0.014932515390282043, "grad_norm": 0.3203125, "learning_rate": 0.0004963289280469897, "loss": 3.3702, "step": 677 }, { "epoch": 0.01495457228155277, "grad_norm": 0.349609375, "learning_rate": 0.0004970631424375917, "loss": 3.234, "step": 678 }, { "epoch": 0.014976629172823497, "grad_norm": 0.369140625, "learning_rate": 0.0004977973568281939, "loss": 3.2117, "step": 679 }, { "epoch": 0.014998686064094224, "grad_norm": 0.365234375, "learning_rate": 0.0004985315712187959, "loss": 3.2638, "step": 680 }, { "epoch": 0.01502074295536495, "grad_norm": 0.357421875, "learning_rate": 0.000499265785609398, "loss": 3.1726, "step": 681 }, { "epoch": 0.015042799846635677, "grad_norm": 0.373046875, "learning_rate": 0.0005, "loss": 3.303, "step": 682 }, { "epoch": 0.015064856737906404, "grad_norm": 0.39453125, "learning_rate": 0.0004999999997278276, "loss": 3.1892, "step": 683 }, { "epoch": 0.015086913629177131, "grad_norm": 0.35546875, "learning_rate": 0.0004999999989113103, "loss": 3.3022, "step": 684 }, { "epoch": 0.015108970520447858, "grad_norm": 0.34375, "learning_rate": 0.0004999999975504482, "loss": 3.2671, "step": 685 }, { "epoch": 0.015131027411718585, "grad_norm": 0.36328125, "learning_rate": 0.0004999999956452412, "loss": 3.2977, "step": 686 }, { "epoch": 0.015153084302989311, "grad_norm": 0.333984375, "learning_rate": 0.0004999999931956892, "loss": 3.2662, "step": 687 }, { "epoch": 0.015175141194260038, "grad_norm": 0.37109375, "learning_rate": 0.0004999999902017925, "loss": 3.3541, "step": 688 }, { "epoch": 0.015197198085530765, "grad_norm": 0.369140625, "learning_rate": 0.000499999986663551, "loss": 3.2559, "step": 689 }, { "epoch": 0.015219254976801492, "grad_norm": 0.349609375, "learning_rate": 0.0004999999825809646, "loss": 3.3113, "step": 690 }, { "epoch": 0.015241311868072219, "grad_norm": 0.353515625, "learning_rate": 0.0004999999779540334, "loss": 3.2849, "step": 691 }, { "epoch": 0.015263368759342946, "grad_norm": 0.33203125, "learning_rate": 0.0004999999727827573, "loss": 3.3027, "step": 692 }, { "epoch": 0.015285425650613672, "grad_norm": 0.322265625, "learning_rate": 0.0004999999670671365, "loss": 3.2048, "step": 693 }, { "epoch": 0.0153074825418844, "grad_norm": 0.333984375, "learning_rate": 0.0004999999608071709, "loss": 3.1831, "step": 694 }, { "epoch": 0.015329539433155126, "grad_norm": 0.35546875, "learning_rate": 0.0004999999540028605, "loss": 3.1978, "step": 695 }, { "epoch": 0.015351596324425853, "grad_norm": 0.373046875, "learning_rate": 0.0004999999466542053, "loss": 3.2326, "step": 696 }, { "epoch": 0.01537365321569658, "grad_norm": 0.40625, "learning_rate": 0.0004999999387612054, "loss": 3.2942, "step": 697 }, { "epoch": 0.015395710106967306, "grad_norm": 0.4375, "learning_rate": 0.0004999999303238608, "loss": 3.2523, "step": 698 }, { "epoch": 0.015417766998238033, "grad_norm": 0.4453125, "learning_rate": 0.0004999999213421715, "loss": 3.136, "step": 699 }, { "epoch": 0.01543982388950876, "grad_norm": 0.384765625, "learning_rate": 0.0004999999118161375, "loss": 3.2507, "step": 700 }, { "epoch": 0.015461880780779487, "grad_norm": 0.357421875, "learning_rate": 0.0004999999017457587, "loss": 3.3019, "step": 701 }, { "epoch": 0.015483937672050214, "grad_norm": 0.3671875, "learning_rate": 0.0004999998911310354, "loss": 3.2615, "step": 702 }, { "epoch": 0.01550599456332094, "grad_norm": 0.3359375, "learning_rate": 0.0004999998799719674, "loss": 3.1914, "step": 703 }, { "epoch": 0.015528051454591667, "grad_norm": 0.3515625, "learning_rate": 0.0004999998682685548, "loss": 3.2856, "step": 704 }, { "epoch": 0.015550108345862394, "grad_norm": 0.326171875, "learning_rate": 0.0004999998560207976, "loss": 3.1968, "step": 705 }, { "epoch": 0.015572165237133121, "grad_norm": 0.349609375, "learning_rate": 0.000499999843228696, "loss": 3.1599, "step": 706 }, { "epoch": 0.015594222128403848, "grad_norm": 0.31640625, "learning_rate": 0.0004999998298922496, "loss": 3.2084, "step": 707 }, { "epoch": 0.015616279019674575, "grad_norm": 0.33203125, "learning_rate": 0.000499999816011459, "loss": 3.2323, "step": 708 }, { "epoch": 0.0156383359109453, "grad_norm": 0.341796875, "learning_rate": 0.0004999998015863239, "loss": 3.2076, "step": 709 }, { "epoch": 0.01566039280221603, "grad_norm": 0.345703125, "learning_rate": 0.0004999997866168442, "loss": 3.2472, "step": 710 }, { "epoch": 0.015682449693486755, "grad_norm": 0.318359375, "learning_rate": 0.0004999997711030202, "loss": 3.1519, "step": 711 }, { "epoch": 0.015704506584757482, "grad_norm": 0.330078125, "learning_rate": 0.0004999997550448518, "loss": 3.2199, "step": 712 }, { "epoch": 0.01572656347602821, "grad_norm": 0.33203125, "learning_rate": 0.0004999997384423392, "loss": 3.1996, "step": 713 }, { "epoch": 0.015748620367298936, "grad_norm": 0.333984375, "learning_rate": 0.0004999997212954821, "loss": 3.2239, "step": 714 }, { "epoch": 0.015770677258569663, "grad_norm": 0.353515625, "learning_rate": 0.0004999997036042808, "loss": 3.2539, "step": 715 }, { "epoch": 0.01579273414984039, "grad_norm": 0.376953125, "learning_rate": 0.0004999996853687354, "loss": 3.2438, "step": 716 }, { "epoch": 0.015814791041111116, "grad_norm": 0.345703125, "learning_rate": 0.0004999996665888457, "loss": 3.228, "step": 717 }, { "epoch": 0.015836847932381843, "grad_norm": 0.3515625, "learning_rate": 0.0004999996472646119, "loss": 3.278, "step": 718 }, { "epoch": 0.01585890482365257, "grad_norm": 0.314453125, "learning_rate": 0.0004999996273960341, "loss": 3.1972, "step": 719 }, { "epoch": 0.015880961714923297, "grad_norm": 0.34375, "learning_rate": 0.0004999996069831121, "loss": 3.2343, "step": 720 }, { "epoch": 0.015903018606194023, "grad_norm": 0.34375, "learning_rate": 0.0004999995860258461, "loss": 3.2911, "step": 721 }, { "epoch": 0.01592507549746475, "grad_norm": 0.341796875, "learning_rate": 0.0004999995645242364, "loss": 3.1954, "step": 722 }, { "epoch": 0.015947132388735477, "grad_norm": 0.333984375, "learning_rate": 0.0004999995424782825, "loss": 3.1646, "step": 723 }, { "epoch": 0.015969189280006204, "grad_norm": 0.33984375, "learning_rate": 0.0004999995198879848, "loss": 3.1855, "step": 724 }, { "epoch": 0.01599124617127693, "grad_norm": 0.330078125, "learning_rate": 0.0004999994967533433, "loss": 3.1922, "step": 725 }, { "epoch": 0.016013303062547658, "grad_norm": 0.306640625, "learning_rate": 0.0004999994730743581, "loss": 3.2038, "step": 726 }, { "epoch": 0.016035359953818384, "grad_norm": 0.3359375, "learning_rate": 0.0004999994488510291, "loss": 3.2731, "step": 727 }, { "epoch": 0.01605741684508911, "grad_norm": 0.333984375, "learning_rate": 0.0004999994240833564, "loss": 3.1731, "step": 728 }, { "epoch": 0.016079473736359838, "grad_norm": 0.318359375, "learning_rate": 0.0004999993987713402, "loss": 3.2209, "step": 729 }, { "epoch": 0.016101530627630565, "grad_norm": 0.33984375, "learning_rate": 0.0004999993729149804, "loss": 3.2988, "step": 730 }, { "epoch": 0.01612358751890129, "grad_norm": 0.318359375, "learning_rate": 0.0004999993465142771, "loss": 3.2021, "step": 731 }, { "epoch": 0.01614564441017202, "grad_norm": 0.3203125, "learning_rate": 0.0004999993195692303, "loss": 3.1464, "step": 732 }, { "epoch": 0.016167701301442745, "grad_norm": 0.3359375, "learning_rate": 0.0004999992920798402, "loss": 3.2011, "step": 733 }, { "epoch": 0.016189758192713472, "grad_norm": 0.34375, "learning_rate": 0.0004999992640461067, "loss": 3.2106, "step": 734 }, { "epoch": 0.0162118150839842, "grad_norm": 0.32421875, "learning_rate": 0.00049999923546803, "loss": 3.2186, "step": 735 }, { "epoch": 0.016233871975254926, "grad_norm": 0.361328125, "learning_rate": 0.0004999992063456102, "loss": 3.2616, "step": 736 }, { "epoch": 0.016255928866525653, "grad_norm": 0.33203125, "learning_rate": 0.0004999991766788471, "loss": 3.1461, "step": 737 }, { "epoch": 0.01627798575779638, "grad_norm": 0.330078125, "learning_rate": 0.000499999146467741, "loss": 3.2044, "step": 738 }, { "epoch": 0.016300042649067106, "grad_norm": 0.37109375, "learning_rate": 0.000499999115712292, "loss": 3.1291, "step": 739 }, { "epoch": 0.016322099540337833, "grad_norm": 0.34765625, "learning_rate": 0.0004999990844124999, "loss": 3.1292, "step": 740 }, { "epoch": 0.01634415643160856, "grad_norm": 0.333984375, "learning_rate": 0.000499999052568365, "loss": 3.2623, "step": 741 }, { "epoch": 0.016366213322879287, "grad_norm": 0.345703125, "learning_rate": 0.0004999990201798873, "loss": 3.1947, "step": 742 }, { "epoch": 0.016388270214150014, "grad_norm": 0.373046875, "learning_rate": 0.0004999989872470668, "loss": 3.1981, "step": 743 }, { "epoch": 0.01641032710542074, "grad_norm": 0.34375, "learning_rate": 0.0004999989537699036, "loss": 3.104, "step": 744 }, { "epoch": 0.016432383996691467, "grad_norm": 0.330078125, "learning_rate": 0.000499998919748398, "loss": 3.1295, "step": 745 }, { "epoch": 0.016454440887962194, "grad_norm": 0.333984375, "learning_rate": 0.0004999988851825499, "loss": 3.1753, "step": 746 }, { "epoch": 0.01647649777923292, "grad_norm": 0.3046875, "learning_rate": 0.0004999988500723592, "loss": 3.2715, "step": 747 }, { "epoch": 0.016498554670503648, "grad_norm": 0.30078125, "learning_rate": 0.0004999988144178262, "loss": 3.2558, "step": 748 }, { "epoch": 0.016520611561774375, "grad_norm": 0.310546875, "learning_rate": 0.0004999987782189508, "loss": 3.1394, "step": 749 }, { "epoch": 0.0165426684530451, "grad_norm": 0.322265625, "learning_rate": 0.0004999987414757334, "loss": 3.2253, "step": 750 }, { "epoch": 0.016564725344315828, "grad_norm": 0.3046875, "learning_rate": 0.0004999987041881738, "loss": 3.1401, "step": 751 }, { "epoch": 0.016586782235586555, "grad_norm": 0.30859375, "learning_rate": 0.0004999986663562722, "loss": 3.1377, "step": 752 }, { "epoch": 0.016608839126857282, "grad_norm": 0.3125, "learning_rate": 0.0004999986279800286, "loss": 3.218, "step": 753 }, { "epoch": 0.01663089601812801, "grad_norm": 0.333984375, "learning_rate": 0.0004999985890594431, "loss": 3.0753, "step": 754 }, { "epoch": 0.016652952909398736, "grad_norm": 0.314453125, "learning_rate": 0.0004999985495945159, "loss": 3.2084, "step": 755 }, { "epoch": 0.016675009800669462, "grad_norm": 0.30859375, "learning_rate": 0.0004999985095852469, "loss": 3.1809, "step": 756 }, { "epoch": 0.01669706669194019, "grad_norm": 0.3828125, "learning_rate": 0.0004999984690316364, "loss": 3.2144, "step": 757 }, { "epoch": 0.016719123583210916, "grad_norm": 0.404296875, "learning_rate": 0.0004999984279336842, "loss": 3.2221, "step": 758 }, { "epoch": 0.016741180474481643, "grad_norm": 0.333984375, "learning_rate": 0.0004999983862913907, "loss": 3.1541, "step": 759 }, { "epoch": 0.01676323736575237, "grad_norm": 0.314453125, "learning_rate": 0.0004999983441047557, "loss": 3.1944, "step": 760 }, { "epoch": 0.016785294257023096, "grad_norm": 0.3515625, "learning_rate": 0.0004999983013737796, "loss": 3.1803, "step": 761 }, { "epoch": 0.016807351148293823, "grad_norm": 0.30078125, "learning_rate": 0.0004999982580984623, "loss": 3.069, "step": 762 }, { "epoch": 0.01682940803956455, "grad_norm": 0.314453125, "learning_rate": 0.0004999982142788039, "loss": 3.1952, "step": 763 }, { "epoch": 0.016851464930835277, "grad_norm": 0.3125, "learning_rate": 0.0004999981699148045, "loss": 3.0446, "step": 764 }, { "epoch": 0.016873521822106004, "grad_norm": 0.3359375, "learning_rate": 0.0004999981250064643, "loss": 3.1367, "step": 765 }, { "epoch": 0.01689557871337673, "grad_norm": 0.337890625, "learning_rate": 0.0004999980795537832, "loss": 3.0658, "step": 766 }, { "epoch": 0.016917635604647457, "grad_norm": 0.31640625, "learning_rate": 0.0004999980335567615, "loss": 3.1603, "step": 767 }, { "epoch": 0.016939692495918184, "grad_norm": 0.322265625, "learning_rate": 0.0004999979870153993, "loss": 3.075, "step": 768 }, { "epoch": 0.01696174938718891, "grad_norm": 0.314453125, "learning_rate": 0.0004999979399296965, "loss": 3.1992, "step": 769 }, { "epoch": 0.016983806278459638, "grad_norm": 0.30859375, "learning_rate": 0.0004999978922996534, "loss": 3.0945, "step": 770 }, { "epoch": 0.017005863169730365, "grad_norm": 0.337890625, "learning_rate": 0.00049999784412527, "loss": 3.213, "step": 771 }, { "epoch": 0.01702792006100109, "grad_norm": 0.31640625, "learning_rate": 0.0004999977954065464, "loss": 3.1727, "step": 772 }, { "epoch": 0.01704997695227182, "grad_norm": 0.3125, "learning_rate": 0.0004999977461434827, "loss": 3.0925, "step": 773 }, { "epoch": 0.017072033843542545, "grad_norm": 0.33984375, "learning_rate": 0.0004999976963360791, "loss": 3.1893, "step": 774 }, { "epoch": 0.017094090734813272, "grad_norm": 0.318359375, "learning_rate": 0.0004999976459843358, "loss": 3.0584, "step": 775 }, { "epoch": 0.017116147626084, "grad_norm": 0.314453125, "learning_rate": 0.0004999975950882525, "loss": 3.1853, "step": 776 }, { "epoch": 0.017138204517354722, "grad_norm": 0.294921875, "learning_rate": 0.0004999975436478296, "loss": 3.1372, "step": 777 }, { "epoch": 0.01716026140862545, "grad_norm": 0.3125, "learning_rate": 0.0004999974916630674, "loss": 3.1012, "step": 778 }, { "epoch": 0.017182318299896176, "grad_norm": 0.29296875, "learning_rate": 0.0004999974391339657, "loss": 3.0429, "step": 779 }, { "epoch": 0.017204375191166903, "grad_norm": 0.306640625, "learning_rate": 0.0004999973860605247, "loss": 3.0674, "step": 780 }, { "epoch": 0.01722643208243763, "grad_norm": 0.298828125, "learning_rate": 0.0004999973324427445, "loss": 3.1528, "step": 781 }, { "epoch": 0.017248488973708356, "grad_norm": 0.3203125, "learning_rate": 0.0004999972782806253, "loss": 3.0889, "step": 782 }, { "epoch": 0.017270545864979083, "grad_norm": 0.318359375, "learning_rate": 0.0004999972235741671, "loss": 3.1746, "step": 783 }, { "epoch": 0.01729260275624981, "grad_norm": 0.296875, "learning_rate": 0.0004999971683233701, "loss": 3.1131, "step": 784 }, { "epoch": 0.017314659647520537, "grad_norm": 0.361328125, "learning_rate": 0.0004999971125282343, "loss": 3.1443, "step": 785 }, { "epoch": 0.017336716538791264, "grad_norm": 0.3203125, "learning_rate": 0.0004999970561887601, "loss": 3.0672, "step": 786 }, { "epoch": 0.01735877343006199, "grad_norm": 0.298828125, "learning_rate": 0.0004999969993049473, "loss": 3.1747, "step": 787 }, { "epoch": 0.017380830321332717, "grad_norm": 0.328125, "learning_rate": 0.0004999969418767963, "loss": 3.1051, "step": 788 }, { "epoch": 0.017402887212603444, "grad_norm": 0.296875, "learning_rate": 0.0004999968839043071, "loss": 3.1736, "step": 789 }, { "epoch": 0.01742494410387417, "grad_norm": 0.30078125, "learning_rate": 0.0004999968253874798, "loss": 3.0872, "step": 790 }, { "epoch": 0.017447000995144898, "grad_norm": 0.30078125, "learning_rate": 0.0004999967663263144, "loss": 3.2354, "step": 791 }, { "epoch": 0.017469057886415625, "grad_norm": 0.31640625, "learning_rate": 0.0004999967067208114, "loss": 3.1158, "step": 792 }, { "epoch": 0.01749111477768635, "grad_norm": 0.359375, "learning_rate": 0.0004999966465709705, "loss": 3.0941, "step": 793 }, { "epoch": 0.017513171668957078, "grad_norm": 0.30078125, "learning_rate": 0.0004999965858767922, "loss": 3.1499, "step": 794 }, { "epoch": 0.017535228560227805, "grad_norm": 0.3203125, "learning_rate": 0.0004999965246382765, "loss": 3.1328, "step": 795 }, { "epoch": 0.017557285451498532, "grad_norm": 0.32421875, "learning_rate": 0.0004999964628554234, "loss": 3.0619, "step": 796 }, { "epoch": 0.01757934234276926, "grad_norm": 0.318359375, "learning_rate": 0.0004999964005282331, "loss": 3.1283, "step": 797 }, { "epoch": 0.017601399234039986, "grad_norm": 0.306640625, "learning_rate": 0.0004999963376567059, "loss": 3.0543, "step": 798 }, { "epoch": 0.017623456125310712, "grad_norm": 0.298828125, "learning_rate": 0.0004999962742408417, "loss": 3.1038, "step": 799 }, { "epoch": 0.01764551301658144, "grad_norm": 0.296875, "learning_rate": 0.0004999962102806408, "loss": 3.0412, "step": 800 }, { "epoch": 0.017667569907852166, "grad_norm": 0.31640625, "learning_rate": 0.0004999961457761033, "loss": 3.0643, "step": 801 }, { "epoch": 0.017689626799122893, "grad_norm": 0.296875, "learning_rate": 0.0004999960807272293, "loss": 3.0916, "step": 802 }, { "epoch": 0.01771168369039362, "grad_norm": 0.291015625, "learning_rate": 0.0004999960151340191, "loss": 3.0113, "step": 803 }, { "epoch": 0.017733740581664346, "grad_norm": 0.3203125, "learning_rate": 0.0004999959489964725, "loss": 3.1334, "step": 804 }, { "epoch": 0.017755797472935073, "grad_norm": 0.3046875, "learning_rate": 0.00049999588231459, "loss": 3.1208, "step": 805 }, { "epoch": 0.0177778543642058, "grad_norm": 0.30078125, "learning_rate": 0.0004999958150883715, "loss": 3.0813, "step": 806 }, { "epoch": 0.017799911255476527, "grad_norm": 0.3046875, "learning_rate": 0.0004999957473178174, "loss": 3.1098, "step": 807 }, { "epoch": 0.017821968146747254, "grad_norm": 0.3046875, "learning_rate": 0.0004999956790029276, "loss": 3.1485, "step": 808 }, { "epoch": 0.01784402503801798, "grad_norm": 0.298828125, "learning_rate": 0.0004999956101437023, "loss": 3.083, "step": 809 }, { "epoch": 0.017866081929288707, "grad_norm": 0.287109375, "learning_rate": 0.0004999955407401418, "loss": 3.0845, "step": 810 }, { "epoch": 0.017888138820559434, "grad_norm": 0.31640625, "learning_rate": 0.0004999954707922462, "loss": 3.0318, "step": 811 }, { "epoch": 0.01791019571183016, "grad_norm": 0.3125, "learning_rate": 0.0004999954003000154, "loss": 3.0118, "step": 812 }, { "epoch": 0.017932252603100888, "grad_norm": 0.287109375, "learning_rate": 0.0004999953292634498, "loss": 3.1577, "step": 813 }, { "epoch": 0.017954309494371615, "grad_norm": 0.29296875, "learning_rate": 0.0004999952576825496, "loss": 3.0774, "step": 814 }, { "epoch": 0.01797636638564234, "grad_norm": 0.30859375, "learning_rate": 0.0004999951855573148, "loss": 3.1906, "step": 815 }, { "epoch": 0.01799842327691307, "grad_norm": 0.28515625, "learning_rate": 0.0004999951128877456, "loss": 3.1352, "step": 816 }, { "epoch": 0.018020480168183795, "grad_norm": 0.3125, "learning_rate": 0.0004999950396738423, "loss": 3.0541, "step": 817 }, { "epoch": 0.018042537059454522, "grad_norm": 0.30078125, "learning_rate": 0.0004999949659156049, "loss": 3.0976, "step": 818 }, { "epoch": 0.01806459395072525, "grad_norm": 0.291015625, "learning_rate": 0.0004999948916130335, "loss": 3.1311, "step": 819 }, { "epoch": 0.018086650841995976, "grad_norm": 0.302734375, "learning_rate": 0.0004999948167661285, "loss": 3.0366, "step": 820 }, { "epoch": 0.018108707733266702, "grad_norm": 0.2890625, "learning_rate": 0.0004999947413748898, "loss": 3.1016, "step": 821 }, { "epoch": 0.01813076462453743, "grad_norm": 0.3203125, "learning_rate": 0.0004999946654393177, "loss": 3.0249, "step": 822 }, { "epoch": 0.018152821515808156, "grad_norm": 0.30078125, "learning_rate": 0.0004999945889594124, "loss": 3.0507, "step": 823 }, { "epoch": 0.018174878407078883, "grad_norm": 0.28125, "learning_rate": 0.000499994511935174, "loss": 3.0763, "step": 824 }, { "epoch": 0.01819693529834961, "grad_norm": 0.3046875, "learning_rate": 0.0004999944343666027, "loss": 2.994, "step": 825 }, { "epoch": 0.018218992189620337, "grad_norm": 0.302734375, "learning_rate": 0.0004999943562536985, "loss": 3.1226, "step": 826 }, { "epoch": 0.018241049080891063, "grad_norm": 0.3046875, "learning_rate": 0.000499994277596462, "loss": 3.0696, "step": 827 }, { "epoch": 0.01826310597216179, "grad_norm": 0.30078125, "learning_rate": 0.000499994198394893, "loss": 2.979, "step": 828 }, { "epoch": 0.018285162863432517, "grad_norm": 0.294921875, "learning_rate": 0.0004999941186489917, "loss": 2.9941, "step": 829 }, { "epoch": 0.018307219754703244, "grad_norm": 0.291015625, "learning_rate": 0.0004999940383587584, "loss": 3.0067, "step": 830 }, { "epoch": 0.01832927664597397, "grad_norm": 0.314453125, "learning_rate": 0.0004999939575241932, "loss": 3.0354, "step": 831 }, { "epoch": 0.018351333537244698, "grad_norm": 0.294921875, "learning_rate": 0.0004999938761452962, "loss": 3.0707, "step": 832 }, { "epoch": 0.018373390428515424, "grad_norm": 0.30078125, "learning_rate": 0.0004999937942220678, "loss": 3.0774, "step": 833 }, { "epoch": 0.01839544731978615, "grad_norm": 0.306640625, "learning_rate": 0.000499993711754508, "loss": 3.0269, "step": 834 }, { "epoch": 0.018417504211056878, "grad_norm": 0.275390625, "learning_rate": 0.0004999936287426171, "loss": 3.0541, "step": 835 }, { "epoch": 0.018439561102327605, "grad_norm": 0.3046875, "learning_rate": 0.0004999935451863952, "loss": 3.0383, "step": 836 }, { "epoch": 0.01846161799359833, "grad_norm": 0.30078125, "learning_rate": 0.0004999934610858425, "loss": 3.0253, "step": 837 }, { "epoch": 0.01848367488486906, "grad_norm": 0.302734375, "learning_rate": 0.000499993376440959, "loss": 2.9811, "step": 838 }, { "epoch": 0.018505731776139785, "grad_norm": 0.30859375, "learning_rate": 0.0004999932912517452, "loss": 3.0774, "step": 839 }, { "epoch": 0.018527788667410512, "grad_norm": 0.310546875, "learning_rate": 0.0004999932055182012, "loss": 3.0852, "step": 840 }, { "epoch": 0.01854984555868124, "grad_norm": 0.294921875, "learning_rate": 0.0004999931192403269, "loss": 3.0135, "step": 841 }, { "epoch": 0.018571902449951966, "grad_norm": 0.306640625, "learning_rate": 0.000499993032418123, "loss": 3.0346, "step": 842 }, { "epoch": 0.018593959341222693, "grad_norm": 0.30859375, "learning_rate": 0.0004999929450515892, "loss": 3.1205, "step": 843 }, { "epoch": 0.01861601623249342, "grad_norm": 0.2890625, "learning_rate": 0.000499992857140726, "loss": 3.0532, "step": 844 }, { "epoch": 0.018638073123764146, "grad_norm": 0.302734375, "learning_rate": 0.0004999927686855335, "loss": 3.1097, "step": 845 }, { "epoch": 0.018660130015034873, "grad_norm": 0.30078125, "learning_rate": 0.0004999926796860119, "loss": 3.0989, "step": 846 }, { "epoch": 0.0186821869063056, "grad_norm": 0.322265625, "learning_rate": 0.0004999925901421613, "loss": 3.0031, "step": 847 }, { "epoch": 0.018704243797576327, "grad_norm": 0.2890625, "learning_rate": 0.000499992500053982, "loss": 2.99, "step": 848 }, { "epoch": 0.018726300688847054, "grad_norm": 0.2890625, "learning_rate": 0.0004999924094214741, "loss": 3.0347, "step": 849 }, { "epoch": 0.01874835758011778, "grad_norm": 0.294921875, "learning_rate": 0.000499992318244638, "loss": 3.0519, "step": 850 }, { "epoch": 0.018770414471388507, "grad_norm": 0.29296875, "learning_rate": 0.0004999922265234737, "loss": 3.0794, "step": 851 }, { "epoch": 0.018792471362659234, "grad_norm": 0.28515625, "learning_rate": 0.0004999921342579815, "loss": 3.0122, "step": 852 }, { "epoch": 0.01881452825392996, "grad_norm": 0.30078125, "learning_rate": 0.0004999920414481614, "loss": 3.107, "step": 853 }, { "epoch": 0.018836585145200688, "grad_norm": 0.359375, "learning_rate": 0.000499991948094014, "loss": 3.0273, "step": 854 }, { "epoch": 0.018858642036471415, "grad_norm": 0.37109375, "learning_rate": 0.0004999918541955391, "loss": 3.0103, "step": 855 }, { "epoch": 0.01888069892774214, "grad_norm": 0.306640625, "learning_rate": 0.0004999917597527372, "loss": 3.037, "step": 856 }, { "epoch": 0.018902755819012868, "grad_norm": 0.298828125, "learning_rate": 0.0004999916647656083, "loss": 3.0242, "step": 857 }, { "epoch": 0.018924812710283595, "grad_norm": 0.30859375, "learning_rate": 0.0004999915692341526, "loss": 3.012, "step": 858 }, { "epoch": 0.018946869601554322, "grad_norm": 0.294921875, "learning_rate": 0.0004999914731583705, "loss": 3.0799, "step": 859 }, { "epoch": 0.01896892649282505, "grad_norm": 0.29296875, "learning_rate": 0.0004999913765382622, "loss": 3.0011, "step": 860 }, { "epoch": 0.018990983384095775, "grad_norm": 0.32421875, "learning_rate": 0.0004999912793738278, "loss": 2.957, "step": 861 }, { "epoch": 0.019013040275366502, "grad_norm": 0.30859375, "learning_rate": 0.0004999911816650674, "loss": 3.0649, "step": 862 }, { "epoch": 0.01903509716663723, "grad_norm": 0.2890625, "learning_rate": 0.0004999910834119814, "loss": 3.0464, "step": 863 }, { "epoch": 0.019057154057907956, "grad_norm": 0.3203125, "learning_rate": 0.0004999909846145699, "loss": 3.137, "step": 864 }, { "epoch": 0.019079210949178683, "grad_norm": 0.28125, "learning_rate": 0.0004999908852728332, "loss": 3.0566, "step": 865 }, { "epoch": 0.01910126784044941, "grad_norm": 0.27734375, "learning_rate": 0.0004999907853867714, "loss": 3.0099, "step": 866 }, { "epoch": 0.019123324731720136, "grad_norm": 0.298828125, "learning_rate": 0.000499990684956385, "loss": 3.1141, "step": 867 }, { "epoch": 0.019145381622990863, "grad_norm": 0.279296875, "learning_rate": 0.0004999905839816739, "loss": 3.0229, "step": 868 }, { "epoch": 0.01916743851426159, "grad_norm": 0.296875, "learning_rate": 0.0004999904824626384, "loss": 3.0745, "step": 869 }, { "epoch": 0.019189495405532317, "grad_norm": 0.271484375, "learning_rate": 0.0004999903803992788, "loss": 3.0085, "step": 870 }, { "epoch": 0.019211552296803044, "grad_norm": 0.28515625, "learning_rate": 0.0004999902777915952, "loss": 3.0243, "step": 871 }, { "epoch": 0.01923360918807377, "grad_norm": 0.314453125, "learning_rate": 0.000499990174639588, "loss": 3.0441, "step": 872 }, { "epoch": 0.019255666079344497, "grad_norm": 0.30078125, "learning_rate": 0.0004999900709432574, "loss": 3.0458, "step": 873 }, { "epoch": 0.019277722970615224, "grad_norm": 0.294921875, "learning_rate": 0.0004999899667026035, "loss": 3.0568, "step": 874 }, { "epoch": 0.01929977986188595, "grad_norm": 0.2890625, "learning_rate": 0.0004999898619176265, "loss": 3.0089, "step": 875 }, { "epoch": 0.019321836753156678, "grad_norm": 0.279296875, "learning_rate": 0.0004999897565883268, "loss": 3.0574, "step": 876 }, { "epoch": 0.019343893644427405, "grad_norm": 0.296875, "learning_rate": 0.0004999896507147045, "loss": 3.056, "step": 877 }, { "epoch": 0.01936595053569813, "grad_norm": 0.283203125, "learning_rate": 0.00049998954429676, "loss": 3.0017, "step": 878 }, { "epoch": 0.01938800742696886, "grad_norm": 0.28515625, "learning_rate": 0.0004999894373344932, "loss": 3.032, "step": 879 }, { "epoch": 0.019410064318239585, "grad_norm": 0.283203125, "learning_rate": 0.0004999893298279047, "loss": 3.0688, "step": 880 }, { "epoch": 0.019432121209510312, "grad_norm": 0.28125, "learning_rate": 0.0004999892217769945, "loss": 3.1257, "step": 881 }, { "epoch": 0.01945417810078104, "grad_norm": 0.291015625, "learning_rate": 0.0004999891131817628, "loss": 3.0188, "step": 882 }, { "epoch": 0.019476234992051766, "grad_norm": 0.291015625, "learning_rate": 0.0004999890040422102, "loss": 2.9571, "step": 883 }, { "epoch": 0.019498291883322492, "grad_norm": 0.296875, "learning_rate": 0.0004999888943583365, "loss": 3.007, "step": 884 }, { "epoch": 0.01952034877459322, "grad_norm": 0.275390625, "learning_rate": 0.000499988784130142, "loss": 2.9885, "step": 885 }, { "epoch": 0.019542405665863946, "grad_norm": 0.3203125, "learning_rate": 0.0004999886733576273, "loss": 2.9178, "step": 886 }, { "epoch": 0.019564462557134673, "grad_norm": 0.318359375, "learning_rate": 0.0004999885620407923, "loss": 3.0473, "step": 887 }, { "epoch": 0.0195865194484054, "grad_norm": 0.29296875, "learning_rate": 0.0004999884501796373, "loss": 2.9252, "step": 888 }, { "epoch": 0.019608576339676127, "grad_norm": 0.37109375, "learning_rate": 0.0004999883377741626, "loss": 3.0137, "step": 889 }, { "epoch": 0.019630633230946853, "grad_norm": 0.302734375, "learning_rate": 0.0004999882248243686, "loss": 3.0105, "step": 890 }, { "epoch": 0.01965269012221758, "grad_norm": 0.306640625, "learning_rate": 0.0004999881113302552, "loss": 3.045, "step": 891 }, { "epoch": 0.019674747013488307, "grad_norm": 0.3359375, "learning_rate": 0.0004999879972918228, "loss": 3.0148, "step": 892 }, { "epoch": 0.019696803904759034, "grad_norm": 0.296875, "learning_rate": 0.0004999878827090718, "loss": 3.0376, "step": 893 }, { "epoch": 0.01971886079602976, "grad_norm": 0.314453125, "learning_rate": 0.0004999877675820022, "loss": 2.9921, "step": 894 }, { "epoch": 0.019740917687300488, "grad_norm": 0.30078125, "learning_rate": 0.0004999876519106145, "loss": 3.0518, "step": 895 }, { "epoch": 0.019762974578571214, "grad_norm": 0.294921875, "learning_rate": 0.0004999875356949087, "loss": 2.9413, "step": 896 }, { "epoch": 0.01978503146984194, "grad_norm": 0.30859375, "learning_rate": 0.0004999874189348853, "loss": 2.9434, "step": 897 }, { "epoch": 0.019807088361112668, "grad_norm": 0.29296875, "learning_rate": 0.0004999873016305442, "loss": 2.9563, "step": 898 }, { "epoch": 0.019829145252383395, "grad_norm": 0.345703125, "learning_rate": 0.0004999871837818861, "loss": 3.0567, "step": 899 }, { "epoch": 0.01985120214365412, "grad_norm": 0.30078125, "learning_rate": 0.000499987065388911, "loss": 2.9851, "step": 900 }, { "epoch": 0.01987325903492485, "grad_norm": 0.3046875, "learning_rate": 0.0004999869464516192, "loss": 2.9952, "step": 901 }, { "epoch": 0.019895315926195575, "grad_norm": 0.27734375, "learning_rate": 0.0004999868269700109, "loss": 2.9725, "step": 902 }, { "epoch": 0.019917372817466302, "grad_norm": 0.2890625, "learning_rate": 0.0004999867069440865, "loss": 2.9685, "step": 903 }, { "epoch": 0.01993942970873703, "grad_norm": 0.298828125, "learning_rate": 0.0004999865863738462, "loss": 2.9896, "step": 904 }, { "epoch": 0.019961486600007756, "grad_norm": 0.296875, "learning_rate": 0.0004999864652592902, "loss": 2.9898, "step": 905 }, { "epoch": 0.019983543491278483, "grad_norm": 0.271484375, "learning_rate": 0.0004999863436004188, "loss": 3.0374, "step": 906 }, { "epoch": 0.02000560038254921, "grad_norm": 0.30859375, "learning_rate": 0.0004999862213972323, "loss": 3.0769, "step": 907 }, { "epoch": 0.020027657273819936, "grad_norm": 0.28515625, "learning_rate": 0.0004999860986497309, "loss": 3.0512, "step": 908 }, { "epoch": 0.020049714165090663, "grad_norm": 0.275390625, "learning_rate": 0.0004999859753579149, "loss": 3.0054, "step": 909 }, { "epoch": 0.02007177105636139, "grad_norm": 0.275390625, "learning_rate": 0.0004999858515217847, "loss": 3.0728, "step": 910 }, { "epoch": 0.020093827947632117, "grad_norm": 0.2734375, "learning_rate": 0.0004999857271413403, "loss": 2.9853, "step": 911 }, { "epoch": 0.020115884838902844, "grad_norm": 0.27734375, "learning_rate": 0.0004999856022165821, "loss": 2.9381, "step": 912 }, { "epoch": 0.02013794173017357, "grad_norm": 0.2890625, "learning_rate": 0.0004999854767475105, "loss": 2.9703, "step": 913 }, { "epoch": 0.020159998621444297, "grad_norm": 0.271484375, "learning_rate": 0.0004999853507341255, "loss": 2.9535, "step": 914 }, { "epoch": 0.020182055512715024, "grad_norm": 0.279296875, "learning_rate": 0.0004999852241764276, "loss": 2.9253, "step": 915 }, { "epoch": 0.02020411240398575, "grad_norm": 0.283203125, "learning_rate": 0.0004999850970744172, "loss": 2.9372, "step": 916 }, { "epoch": 0.020226169295256478, "grad_norm": 0.27734375, "learning_rate": 0.0004999849694280942, "loss": 2.9978, "step": 917 }, { "epoch": 0.0202482261865272, "grad_norm": 0.302734375, "learning_rate": 0.0004999848412374591, "loss": 3.0156, "step": 918 }, { "epoch": 0.020270283077797928, "grad_norm": 0.279296875, "learning_rate": 0.000499984712502512, "loss": 3.0015, "step": 919 }, { "epoch": 0.020292339969068655, "grad_norm": 0.271484375, "learning_rate": 0.0004999845832232535, "loss": 3.0444, "step": 920 }, { "epoch": 0.02031439686033938, "grad_norm": 0.28125, "learning_rate": 0.0004999844533996838, "loss": 2.9578, "step": 921 }, { "epoch": 0.02033645375161011, "grad_norm": 0.279296875, "learning_rate": 0.0004999843230318029, "loss": 3.0352, "step": 922 }, { "epoch": 0.020358510642880835, "grad_norm": 0.2734375, "learning_rate": 0.0004999841921196113, "loss": 2.949, "step": 923 }, { "epoch": 0.020380567534151562, "grad_norm": 0.283203125, "learning_rate": 0.0004999840606631093, "loss": 3.0193, "step": 924 }, { "epoch": 0.02040262442542229, "grad_norm": 0.271484375, "learning_rate": 0.0004999839286622971, "loss": 2.9891, "step": 925 }, { "epoch": 0.020424681316693016, "grad_norm": 0.27734375, "learning_rate": 0.0004999837961171752, "loss": 2.9738, "step": 926 }, { "epoch": 0.020446738207963742, "grad_norm": 0.283203125, "learning_rate": 0.0004999836630277436, "loss": 2.9749, "step": 927 }, { "epoch": 0.02046879509923447, "grad_norm": 0.27734375, "learning_rate": 0.0004999835293940026, "loss": 2.9772, "step": 928 }, { "epoch": 0.020490851990505196, "grad_norm": 0.30078125, "learning_rate": 0.0004999833952159527, "loss": 2.9945, "step": 929 }, { "epoch": 0.020512908881775923, "grad_norm": 0.302734375, "learning_rate": 0.0004999832604935941, "loss": 3.0331, "step": 930 }, { "epoch": 0.02053496577304665, "grad_norm": 0.27734375, "learning_rate": 0.0004999831252269271, "loss": 2.912, "step": 931 }, { "epoch": 0.020557022664317377, "grad_norm": 0.296875, "learning_rate": 0.0004999829894159519, "loss": 2.9802, "step": 932 }, { "epoch": 0.020579079555588103, "grad_norm": 0.283203125, "learning_rate": 0.0004999828530606689, "loss": 2.9785, "step": 933 }, { "epoch": 0.02060113644685883, "grad_norm": 0.28125, "learning_rate": 0.0004999827161610784, "loss": 2.9595, "step": 934 }, { "epoch": 0.020623193338129557, "grad_norm": 0.294921875, "learning_rate": 0.0004999825787171807, "loss": 3.0007, "step": 935 }, { "epoch": 0.020645250229400284, "grad_norm": 0.2734375, "learning_rate": 0.000499982440728976, "loss": 2.9348, "step": 936 }, { "epoch": 0.02066730712067101, "grad_norm": 0.302734375, "learning_rate": 0.0004999823021964647, "loss": 3.0331, "step": 937 }, { "epoch": 0.020689364011941738, "grad_norm": 0.30859375, "learning_rate": 0.0004999821631196472, "loss": 2.9748, "step": 938 }, { "epoch": 0.020711420903212464, "grad_norm": 0.267578125, "learning_rate": 0.0004999820234985235, "loss": 2.8979, "step": 939 }, { "epoch": 0.02073347779448319, "grad_norm": 0.29296875, "learning_rate": 0.0004999818833330941, "loss": 3.0333, "step": 940 }, { "epoch": 0.020755534685753918, "grad_norm": 0.275390625, "learning_rate": 0.0004999817426233593, "loss": 2.9285, "step": 941 }, { "epoch": 0.020777591577024645, "grad_norm": 0.29296875, "learning_rate": 0.0004999816013693195, "loss": 2.9692, "step": 942 }, { "epoch": 0.02079964846829537, "grad_norm": 0.279296875, "learning_rate": 0.0004999814595709748, "loss": 2.9767, "step": 943 }, { "epoch": 0.0208217053595661, "grad_norm": 0.287109375, "learning_rate": 0.0004999813172283257, "loss": 2.9303, "step": 944 }, { "epoch": 0.020843762250836825, "grad_norm": 0.271484375, "learning_rate": 0.0004999811743413722, "loss": 2.9575, "step": 945 }, { "epoch": 0.020865819142107552, "grad_norm": 0.287109375, "learning_rate": 0.000499981030910115, "loss": 2.9469, "step": 946 }, { "epoch": 0.02088787603337828, "grad_norm": 0.27734375, "learning_rate": 0.0004999808869345543, "loss": 2.9873, "step": 947 }, { "epoch": 0.020909932924649006, "grad_norm": 0.279296875, "learning_rate": 0.0004999807424146902, "loss": 3.0085, "step": 948 }, { "epoch": 0.020931989815919733, "grad_norm": 0.28125, "learning_rate": 0.0004999805973505232, "loss": 2.9409, "step": 949 }, { "epoch": 0.02095404670719046, "grad_norm": 0.255859375, "learning_rate": 0.0004999804517420536, "loss": 2.9836, "step": 950 }, { "epoch": 0.020976103598461186, "grad_norm": 0.265625, "learning_rate": 0.0004999803055892817, "loss": 3.0139, "step": 951 }, { "epoch": 0.020998160489731913, "grad_norm": 0.287109375, "learning_rate": 0.0004999801588922078, "loss": 2.9903, "step": 952 }, { "epoch": 0.02102021738100264, "grad_norm": 0.26953125, "learning_rate": 0.0004999800116508323, "loss": 2.9863, "step": 953 }, { "epoch": 0.021042274272273367, "grad_norm": 0.279296875, "learning_rate": 0.0004999798638651555, "loss": 2.8706, "step": 954 }, { "epoch": 0.021064331163544094, "grad_norm": 0.283203125, "learning_rate": 0.0004999797155351776, "loss": 2.999, "step": 955 }, { "epoch": 0.02108638805481482, "grad_norm": 0.29296875, "learning_rate": 0.000499979566660899, "loss": 2.9728, "step": 956 }, { "epoch": 0.021108444946085547, "grad_norm": 0.322265625, "learning_rate": 0.00049997941724232, "loss": 3.0681, "step": 957 }, { "epoch": 0.021130501837356274, "grad_norm": 0.267578125, "learning_rate": 0.000499979267279441, "loss": 2.9421, "step": 958 }, { "epoch": 0.021152558728627, "grad_norm": 0.294921875, "learning_rate": 0.0004999791167722622, "loss": 3.0205, "step": 959 }, { "epoch": 0.021174615619897728, "grad_norm": 0.275390625, "learning_rate": 0.0004999789657207841, "loss": 3.0151, "step": 960 }, { "epoch": 0.021196672511168455, "grad_norm": 0.2734375, "learning_rate": 0.000499978814125007, "loss": 3.0258, "step": 961 }, { "epoch": 0.02121872940243918, "grad_norm": 0.291015625, "learning_rate": 0.000499978661984931, "loss": 2.9678, "step": 962 }, { "epoch": 0.021240786293709908, "grad_norm": 0.29296875, "learning_rate": 0.0004999785093005568, "loss": 2.9508, "step": 963 }, { "epoch": 0.021262843184980635, "grad_norm": 0.287109375, "learning_rate": 0.0004999783560718844, "loss": 2.8579, "step": 964 }, { "epoch": 0.021284900076251362, "grad_norm": 0.26171875, "learning_rate": 0.0004999782022989144, "loss": 2.9316, "step": 965 }, { "epoch": 0.02130695696752209, "grad_norm": 0.271484375, "learning_rate": 0.0004999780479816469, "loss": 3.076, "step": 966 }, { "epoch": 0.021329013858792815, "grad_norm": 0.275390625, "learning_rate": 0.0004999778931200822, "loss": 2.9081, "step": 967 }, { "epoch": 0.021351070750063542, "grad_norm": 0.2490234375, "learning_rate": 0.0004999777377142208, "loss": 2.9185, "step": 968 }, { "epoch": 0.02137312764133427, "grad_norm": 0.294921875, "learning_rate": 0.0004999775817640631, "loss": 2.8157, "step": 969 }, { "epoch": 0.021395184532604996, "grad_norm": 0.26171875, "learning_rate": 0.0004999774252696094, "loss": 2.9963, "step": 970 }, { "epoch": 0.021417241423875723, "grad_norm": 0.26171875, "learning_rate": 0.00049997726823086, "loss": 3.0337, "step": 971 }, { "epoch": 0.02143929831514645, "grad_norm": 0.26953125, "learning_rate": 0.0004999771106478151, "loss": 2.9953, "step": 972 }, { "epoch": 0.021461355206417176, "grad_norm": 0.2578125, "learning_rate": 0.0004999769525204753, "loss": 2.8835, "step": 973 }, { "epoch": 0.021483412097687903, "grad_norm": 0.265625, "learning_rate": 0.0004999767938488408, "loss": 3.0619, "step": 974 }, { "epoch": 0.02150546898895863, "grad_norm": 0.294921875, "learning_rate": 0.0004999766346329119, "loss": 2.9375, "step": 975 }, { "epoch": 0.021527525880229357, "grad_norm": 0.26171875, "learning_rate": 0.0004999764748726891, "loss": 2.9394, "step": 976 }, { "epoch": 0.021549582771500084, "grad_norm": 0.2734375, "learning_rate": 0.0004999763145681726, "loss": 3.0155, "step": 977 }, { "epoch": 0.02157163966277081, "grad_norm": 0.28125, "learning_rate": 0.0004999761537193628, "loss": 2.96, "step": 978 }, { "epoch": 0.021593696554041537, "grad_norm": 0.298828125, "learning_rate": 0.0004999759923262602, "loss": 2.8685, "step": 979 }, { "epoch": 0.021615753445312264, "grad_norm": 0.2734375, "learning_rate": 0.0004999758303888648, "loss": 2.9342, "step": 980 }, { "epoch": 0.02163781033658299, "grad_norm": 0.275390625, "learning_rate": 0.0004999756679071772, "loss": 2.9417, "step": 981 }, { "epoch": 0.021659867227853718, "grad_norm": 0.265625, "learning_rate": 0.0004999755048811978, "loss": 2.9143, "step": 982 }, { "epoch": 0.021681924119124445, "grad_norm": 0.267578125, "learning_rate": 0.0004999753413109268, "loss": 2.9826, "step": 983 }, { "epoch": 0.02170398101039517, "grad_norm": 0.271484375, "learning_rate": 0.0004999751771963647, "loss": 2.8654, "step": 984 }, { "epoch": 0.0217260379016659, "grad_norm": 0.27734375, "learning_rate": 0.0004999750125375118, "loss": 2.9566, "step": 985 }, { "epoch": 0.021748094792936625, "grad_norm": 0.259765625, "learning_rate": 0.0004999748473343684, "loss": 2.9109, "step": 986 }, { "epoch": 0.021770151684207352, "grad_norm": 0.279296875, "learning_rate": 0.0004999746815869349, "loss": 2.9768, "step": 987 }, { "epoch": 0.02179220857547808, "grad_norm": 0.27734375, "learning_rate": 0.0004999745152952117, "loss": 2.9469, "step": 988 }, { "epoch": 0.021814265466748806, "grad_norm": 0.259765625, "learning_rate": 0.0004999743484591991, "loss": 2.9436, "step": 989 }, { "epoch": 0.021836322358019532, "grad_norm": 0.26171875, "learning_rate": 0.0004999741810788975, "loss": 2.9769, "step": 990 }, { "epoch": 0.02185837924929026, "grad_norm": 0.306640625, "learning_rate": 0.0004999740131543072, "loss": 2.9466, "step": 991 }, { "epoch": 0.021880436140560986, "grad_norm": 0.26953125, "learning_rate": 0.0004999738446854287, "loss": 3.0289, "step": 992 }, { "epoch": 0.021902493031831713, "grad_norm": 0.26171875, "learning_rate": 0.0004999736756722623, "loss": 2.9516, "step": 993 }, { "epoch": 0.02192454992310244, "grad_norm": 0.26953125, "learning_rate": 0.0004999735061148083, "loss": 2.9186, "step": 994 }, { "epoch": 0.021946606814373167, "grad_norm": 0.28125, "learning_rate": 0.0004999733360130671, "loss": 2.9252, "step": 995 }, { "epoch": 0.021968663705643893, "grad_norm": 0.2734375, "learning_rate": 0.0004999731653670392, "loss": 3.0428, "step": 996 }, { "epoch": 0.02199072059691462, "grad_norm": 0.271484375, "learning_rate": 0.0004999729941767248, "loss": 2.9874, "step": 997 }, { "epoch": 0.022012777488185347, "grad_norm": 0.279296875, "learning_rate": 0.0004999728224421243, "loss": 2.9696, "step": 998 }, { "epoch": 0.022034834379456074, "grad_norm": 0.265625, "learning_rate": 0.0004999726501632381, "loss": 2.95, "step": 999 }, { "epoch": 0.0220568912707268, "grad_norm": 0.263671875, "learning_rate": 0.0004999724773400667, "loss": 2.93, "step": 1000 }, { "epoch": 0.022078948161997528, "grad_norm": 0.28125, "learning_rate": 0.0004999723039726103, "loss": 2.9686, "step": 1001 }, { "epoch": 0.022101005053268254, "grad_norm": 0.26171875, "learning_rate": 0.0004999721300608694, "loss": 2.9623, "step": 1002 }, { "epoch": 0.02212306194453898, "grad_norm": 0.2734375, "learning_rate": 0.0004999719556048442, "loss": 2.898, "step": 1003 }, { "epoch": 0.022145118835809708, "grad_norm": 0.28125, "learning_rate": 0.0004999717806045353, "loss": 2.9201, "step": 1004 }, { "epoch": 0.022167175727080435, "grad_norm": 0.265625, "learning_rate": 0.000499971605059943, "loss": 2.9032, "step": 1005 }, { "epoch": 0.02218923261835116, "grad_norm": 0.279296875, "learning_rate": 0.0004999714289710675, "loss": 2.8978, "step": 1006 }, { "epoch": 0.02221128950962189, "grad_norm": 0.27734375, "learning_rate": 0.0004999712523379096, "loss": 2.9106, "step": 1007 }, { "epoch": 0.022233346400892615, "grad_norm": 0.28125, "learning_rate": 0.0004999710751604692, "loss": 3.0036, "step": 1008 }, { "epoch": 0.022255403292163342, "grad_norm": 0.265625, "learning_rate": 0.000499970897438747, "loss": 2.9839, "step": 1009 }, { "epoch": 0.02227746018343407, "grad_norm": 0.26171875, "learning_rate": 0.0004999707191727434, "loss": 2.9591, "step": 1010 }, { "epoch": 0.022299517074704796, "grad_norm": 0.265625, "learning_rate": 0.0004999705403624586, "loss": 3.0148, "step": 1011 }, { "epoch": 0.022321573965975523, "grad_norm": 0.28125, "learning_rate": 0.000499970361007893, "loss": 2.9532, "step": 1012 }, { "epoch": 0.02234363085724625, "grad_norm": 0.255859375, "learning_rate": 0.0004999701811090472, "loss": 2.9253, "step": 1013 }, { "epoch": 0.022365687748516976, "grad_norm": 0.255859375, "learning_rate": 0.0004999700006659215, "loss": 2.9107, "step": 1014 }, { "epoch": 0.022387744639787703, "grad_norm": 0.25390625, "learning_rate": 0.0004999698196785162, "loss": 2.8749, "step": 1015 }, { "epoch": 0.02240980153105843, "grad_norm": 0.26171875, "learning_rate": 0.0004999696381468317, "loss": 2.9272, "step": 1016 }, { "epoch": 0.022431858422329157, "grad_norm": 0.259765625, "learning_rate": 0.0004999694560708684, "loss": 2.9428, "step": 1017 }, { "epoch": 0.022453915313599884, "grad_norm": 0.259765625, "learning_rate": 0.0004999692734506269, "loss": 2.9721, "step": 1018 }, { "epoch": 0.02247597220487061, "grad_norm": 0.271484375, "learning_rate": 0.0004999690902861073, "loss": 2.9666, "step": 1019 }, { "epoch": 0.022498029096141337, "grad_norm": 0.271484375, "learning_rate": 0.0004999689065773103, "loss": 2.9866, "step": 1020 }, { "epoch": 0.022520085987412064, "grad_norm": 0.25390625, "learning_rate": 0.0004999687223242361, "loss": 2.8924, "step": 1021 }, { "epoch": 0.02254214287868279, "grad_norm": 0.26171875, "learning_rate": 0.000499968537526885, "loss": 2.9237, "step": 1022 }, { "epoch": 0.022564199769953518, "grad_norm": 0.25390625, "learning_rate": 0.0004999683521852576, "loss": 2.9001, "step": 1023 }, { "epoch": 0.022586256661224244, "grad_norm": 0.267578125, "learning_rate": 0.0004999681662993543, "loss": 3.0084, "step": 1024 }, { "epoch": 0.02260831355249497, "grad_norm": 0.259765625, "learning_rate": 0.0004999679798691754, "loss": 2.9266, "step": 1025 }, { "epoch": 0.022630370443765698, "grad_norm": 0.255859375, "learning_rate": 0.0004999677928947215, "loss": 2.9084, "step": 1026 }, { "epoch": 0.022652427335036425, "grad_norm": 0.26171875, "learning_rate": 0.0004999676053759927, "loss": 2.8528, "step": 1027 }, { "epoch": 0.022674484226307152, "grad_norm": 0.26171875, "learning_rate": 0.0004999674173129896, "loss": 3.0079, "step": 1028 }, { "epoch": 0.02269654111757788, "grad_norm": 0.265625, "learning_rate": 0.0004999672287057126, "loss": 2.8978, "step": 1029 }, { "epoch": 0.022718598008848605, "grad_norm": 0.25390625, "learning_rate": 0.0004999670395541622, "loss": 2.8646, "step": 1030 }, { "epoch": 0.022740654900119332, "grad_norm": 0.26171875, "learning_rate": 0.0004999668498583386, "loss": 2.8432, "step": 1031 }, { "epoch": 0.02276271179139006, "grad_norm": 0.25390625, "learning_rate": 0.0004999666596182423, "loss": 2.9156, "step": 1032 }, { "epoch": 0.022784768682660786, "grad_norm": 0.2734375, "learning_rate": 0.0004999664688338737, "loss": 2.9291, "step": 1033 }, { "epoch": 0.022806825573931513, "grad_norm": 0.251953125, "learning_rate": 0.0004999662775052333, "loss": 2.8558, "step": 1034 }, { "epoch": 0.02282888246520224, "grad_norm": 0.26953125, "learning_rate": 0.0004999660856323215, "loss": 2.9057, "step": 1035 }, { "epoch": 0.022850939356472966, "grad_norm": 0.25390625, "learning_rate": 0.0004999658932151387, "loss": 2.7993, "step": 1036 }, { "epoch": 0.022872996247743693, "grad_norm": 0.259765625, "learning_rate": 0.0004999657002536851, "loss": 2.9195, "step": 1037 }, { "epoch": 0.02289505313901442, "grad_norm": 0.259765625, "learning_rate": 0.0004999655067479615, "loss": 2.9627, "step": 1038 }, { "epoch": 0.022917110030285147, "grad_norm": 0.26171875, "learning_rate": 0.0004999653126979682, "loss": 2.9237, "step": 1039 }, { "epoch": 0.022939166921555874, "grad_norm": 0.2578125, "learning_rate": 0.0004999651181037054, "loss": 2.9291, "step": 1040 }, { "epoch": 0.0229612238128266, "grad_norm": 0.255859375, "learning_rate": 0.0004999649229651738, "loss": 2.9507, "step": 1041 }, { "epoch": 0.022983280704097327, "grad_norm": 0.2734375, "learning_rate": 0.0004999647272823737, "loss": 2.9861, "step": 1042 }, { "epoch": 0.023005337595368054, "grad_norm": 0.26171875, "learning_rate": 0.0004999645310553055, "loss": 2.89, "step": 1043 }, { "epoch": 0.02302739448663878, "grad_norm": 0.265625, "learning_rate": 0.0004999643342839697, "loss": 2.8985, "step": 1044 }, { "epoch": 0.023049451377909508, "grad_norm": 0.259765625, "learning_rate": 0.0004999641369683666, "loss": 2.8769, "step": 1045 }, { "epoch": 0.023071508269180235, "grad_norm": 0.26171875, "learning_rate": 0.0004999639391084969, "loss": 2.9636, "step": 1046 }, { "epoch": 0.02309356516045096, "grad_norm": 0.26171875, "learning_rate": 0.0004999637407043608, "loss": 2.9605, "step": 1047 }, { "epoch": 0.02311562205172169, "grad_norm": 0.2578125, "learning_rate": 0.0004999635417559587, "loss": 2.8909, "step": 1048 }, { "epoch": 0.023137678942992415, "grad_norm": 0.26953125, "learning_rate": 0.0004999633422632911, "loss": 2.9147, "step": 1049 }, { "epoch": 0.023159735834263142, "grad_norm": 0.25390625, "learning_rate": 0.0004999631422263586, "loss": 2.9158, "step": 1050 }, { "epoch": 0.02318179272553387, "grad_norm": 0.25390625, "learning_rate": 0.0004999629416451614, "loss": 2.9586, "step": 1051 }, { "epoch": 0.023203849616804596, "grad_norm": 0.259765625, "learning_rate": 0.0004999627405197, "loss": 2.8715, "step": 1052 }, { "epoch": 0.023225906508075322, "grad_norm": 0.251953125, "learning_rate": 0.0004999625388499749, "loss": 2.9594, "step": 1053 }, { "epoch": 0.02324796339934605, "grad_norm": 0.271484375, "learning_rate": 0.0004999623366359865, "loss": 2.9494, "step": 1054 }, { "epoch": 0.023270020290616776, "grad_norm": 0.259765625, "learning_rate": 0.0004999621338777353, "loss": 2.8774, "step": 1055 }, { "epoch": 0.023292077181887503, "grad_norm": 0.255859375, "learning_rate": 0.0004999619305752216, "loss": 2.923, "step": 1056 }, { "epoch": 0.02331413407315823, "grad_norm": 0.263671875, "learning_rate": 0.0004999617267284461, "loss": 2.8392, "step": 1057 }, { "epoch": 0.023336190964428957, "grad_norm": 0.275390625, "learning_rate": 0.0004999615223374088, "loss": 2.8485, "step": 1058 }, { "epoch": 0.023358247855699683, "grad_norm": 0.251953125, "learning_rate": 0.0004999613174021107, "loss": 2.968, "step": 1059 }, { "epoch": 0.023380304746970407, "grad_norm": 0.279296875, "learning_rate": 0.0004999611119225518, "loss": 3.0028, "step": 1060 }, { "epoch": 0.023402361638241134, "grad_norm": 0.251953125, "learning_rate": 0.0004999609058987328, "loss": 2.9585, "step": 1061 }, { "epoch": 0.02342441852951186, "grad_norm": 0.2578125, "learning_rate": 0.000499960699330654, "loss": 2.8748, "step": 1062 }, { "epoch": 0.023446475420782587, "grad_norm": 0.25390625, "learning_rate": 0.0004999604922183158, "loss": 2.9607, "step": 1063 }, { "epoch": 0.023468532312053314, "grad_norm": 0.263671875, "learning_rate": 0.000499960284561719, "loss": 2.9132, "step": 1064 }, { "epoch": 0.02349058920332404, "grad_norm": 0.2578125, "learning_rate": 0.0004999600763608636, "loss": 2.8965, "step": 1065 }, { "epoch": 0.023512646094594768, "grad_norm": 0.263671875, "learning_rate": 0.0004999598676157503, "loss": 2.9247, "step": 1066 }, { "epoch": 0.023534702985865495, "grad_norm": 0.2578125, "learning_rate": 0.0004999596583263796, "loss": 2.9295, "step": 1067 }, { "epoch": 0.02355675987713622, "grad_norm": 0.263671875, "learning_rate": 0.0004999594484927518, "loss": 2.9349, "step": 1068 }, { "epoch": 0.023578816768406948, "grad_norm": 0.251953125, "learning_rate": 0.0004999592381148676, "loss": 2.8938, "step": 1069 }, { "epoch": 0.023600873659677675, "grad_norm": 0.275390625, "learning_rate": 0.0004999590271927271, "loss": 2.845, "step": 1070 }, { "epoch": 0.023622930550948402, "grad_norm": 0.255859375, "learning_rate": 0.0004999588157263311, "loss": 2.8998, "step": 1071 }, { "epoch": 0.02364498744221913, "grad_norm": 0.275390625, "learning_rate": 0.0004999586037156798, "loss": 2.8241, "step": 1072 }, { "epoch": 0.023667044333489855, "grad_norm": 0.25, "learning_rate": 0.0004999583911607738, "loss": 2.8854, "step": 1073 }, { "epoch": 0.023689101224760582, "grad_norm": 0.265625, "learning_rate": 0.0004999581780616135, "loss": 2.8694, "step": 1074 }, { "epoch": 0.02371115811603131, "grad_norm": 0.267578125, "learning_rate": 0.0004999579644181995, "loss": 2.871, "step": 1075 }, { "epoch": 0.023733215007302036, "grad_norm": 0.2578125, "learning_rate": 0.0004999577502305321, "loss": 2.7762, "step": 1076 }, { "epoch": 0.023755271898572763, "grad_norm": 0.265625, "learning_rate": 0.0004999575354986118, "loss": 2.8497, "step": 1077 }, { "epoch": 0.02377732878984349, "grad_norm": 0.251953125, "learning_rate": 0.0004999573202224392, "loss": 2.8819, "step": 1078 }, { "epoch": 0.023799385681114216, "grad_norm": 0.25390625, "learning_rate": 0.0004999571044020146, "loss": 2.9031, "step": 1079 }, { "epoch": 0.023821442572384943, "grad_norm": 0.25, "learning_rate": 0.0004999568880373385, "loss": 2.8309, "step": 1080 }, { "epoch": 0.02384349946365567, "grad_norm": 0.267578125, "learning_rate": 0.0004999566711284115, "loss": 2.951, "step": 1081 }, { "epoch": 0.023865556354926397, "grad_norm": 0.263671875, "learning_rate": 0.0004999564536752339, "loss": 2.8696, "step": 1082 }, { "epoch": 0.023887613246197124, "grad_norm": 0.267578125, "learning_rate": 0.0004999562356778063, "loss": 2.8606, "step": 1083 }, { "epoch": 0.02390967013746785, "grad_norm": 0.267578125, "learning_rate": 0.0004999560171361292, "loss": 2.7927, "step": 1084 }, { "epoch": 0.023931727028738577, "grad_norm": 0.255859375, "learning_rate": 0.0004999557980502028, "loss": 2.7934, "step": 1085 }, { "epoch": 0.023953783920009304, "grad_norm": 0.255859375, "learning_rate": 0.000499955578420028, "loss": 2.8303, "step": 1086 }, { "epoch": 0.02397584081128003, "grad_norm": 0.255859375, "learning_rate": 0.0004999553582456049, "loss": 2.9102, "step": 1087 }, { "epoch": 0.023997897702550758, "grad_norm": 0.271484375, "learning_rate": 0.0004999551375269342, "loss": 2.8567, "step": 1088 }, { "epoch": 0.024019954593821485, "grad_norm": 0.263671875, "learning_rate": 0.0004999549162640163, "loss": 2.8957, "step": 1089 }, { "epoch": 0.02404201148509221, "grad_norm": 0.251953125, "learning_rate": 0.0004999546944568517, "loss": 2.8416, "step": 1090 }, { "epoch": 0.02406406837636294, "grad_norm": 0.244140625, "learning_rate": 0.0004999544721054409, "loss": 2.8965, "step": 1091 }, { "epoch": 0.024086125267633665, "grad_norm": 0.251953125, "learning_rate": 0.0004999542492097845, "loss": 2.8787, "step": 1092 }, { "epoch": 0.024108182158904392, "grad_norm": 0.255859375, "learning_rate": 0.0004999540257698827, "loss": 2.944, "step": 1093 }, { "epoch": 0.02413023905017512, "grad_norm": 0.263671875, "learning_rate": 0.0004999538017857361, "loss": 2.8514, "step": 1094 }, { "epoch": 0.024152295941445846, "grad_norm": 0.251953125, "learning_rate": 0.0004999535772573454, "loss": 2.9253, "step": 1095 }, { "epoch": 0.024174352832716572, "grad_norm": 0.2734375, "learning_rate": 0.0004999533521847108, "loss": 2.9828, "step": 1096 }, { "epoch": 0.0241964097239873, "grad_norm": 0.259765625, "learning_rate": 0.0004999531265678328, "loss": 2.9645, "step": 1097 }, { "epoch": 0.024218466615258026, "grad_norm": 0.2734375, "learning_rate": 0.0004999529004067122, "loss": 2.7745, "step": 1098 }, { "epoch": 0.024240523506528753, "grad_norm": 0.251953125, "learning_rate": 0.0004999526737013493, "loss": 2.8547, "step": 1099 }, { "epoch": 0.02426258039779948, "grad_norm": 0.287109375, "learning_rate": 0.0004999524464517444, "loss": 2.8155, "step": 1100 }, { "epoch": 0.024284637289070207, "grad_norm": 0.259765625, "learning_rate": 0.0004999522186578983, "loss": 2.8546, "step": 1101 }, { "epoch": 0.024306694180340933, "grad_norm": 0.283203125, "learning_rate": 0.0004999519903198114, "loss": 2.9706, "step": 1102 }, { "epoch": 0.02432875107161166, "grad_norm": 0.251953125, "learning_rate": 0.0004999517614374841, "loss": 2.8495, "step": 1103 }, { "epoch": 0.024350807962882387, "grad_norm": 0.2470703125, "learning_rate": 0.000499951532010917, "loss": 2.875, "step": 1104 }, { "epoch": 0.024372864854153114, "grad_norm": 0.2490234375, "learning_rate": 0.0004999513020401106, "loss": 2.8547, "step": 1105 }, { "epoch": 0.02439492174542384, "grad_norm": 0.25, "learning_rate": 0.0004999510715250653, "loss": 2.821, "step": 1106 }, { "epoch": 0.024416978636694568, "grad_norm": 0.244140625, "learning_rate": 0.0004999508404657818, "loss": 2.9242, "step": 1107 }, { "epoch": 0.024439035527965294, "grad_norm": 0.2490234375, "learning_rate": 0.0004999506088622603, "loss": 2.9447, "step": 1108 }, { "epoch": 0.02446109241923602, "grad_norm": 0.25, "learning_rate": 0.0004999503767145016, "loss": 2.871, "step": 1109 }, { "epoch": 0.024483149310506748, "grad_norm": 0.25, "learning_rate": 0.000499950144022506, "loss": 2.9274, "step": 1110 }, { "epoch": 0.024505206201777475, "grad_norm": 0.25390625, "learning_rate": 0.0004999499107862742, "loss": 2.888, "step": 1111 }, { "epoch": 0.0245272630930482, "grad_norm": 0.25, "learning_rate": 0.0004999496770058065, "loss": 2.9196, "step": 1112 }, { "epoch": 0.02454931998431893, "grad_norm": 0.26171875, "learning_rate": 0.0004999494426811037, "loss": 2.9021, "step": 1113 }, { "epoch": 0.024571376875589655, "grad_norm": 0.265625, "learning_rate": 0.0004999492078121659, "loss": 2.9324, "step": 1114 }, { "epoch": 0.024593433766860382, "grad_norm": 0.259765625, "learning_rate": 0.0004999489723989939, "loss": 2.8886, "step": 1115 }, { "epoch": 0.02461549065813111, "grad_norm": 0.2490234375, "learning_rate": 0.0004999487364415882, "loss": 2.8626, "step": 1116 }, { "epoch": 0.024637547549401836, "grad_norm": 0.265625, "learning_rate": 0.0004999484999399492, "loss": 2.8424, "step": 1117 }, { "epoch": 0.024659604440672563, "grad_norm": 0.259765625, "learning_rate": 0.0004999482628940774, "loss": 2.8723, "step": 1118 }, { "epoch": 0.02468166133194329, "grad_norm": 0.248046875, "learning_rate": 0.0004999480253039735, "loss": 2.8343, "step": 1119 }, { "epoch": 0.024703718223214016, "grad_norm": 0.25, "learning_rate": 0.000499947787169638, "loss": 2.8471, "step": 1120 }, { "epoch": 0.024725775114484743, "grad_norm": 0.26171875, "learning_rate": 0.0004999475484910712, "loss": 2.8835, "step": 1121 }, { "epoch": 0.02474783200575547, "grad_norm": 0.2451171875, "learning_rate": 0.0004999473092682737, "loss": 2.8866, "step": 1122 }, { "epoch": 0.024769888897026197, "grad_norm": 0.263671875, "learning_rate": 0.0004999470695012462, "loss": 2.8424, "step": 1123 }, { "epoch": 0.024791945788296924, "grad_norm": 0.2490234375, "learning_rate": 0.0004999468291899889, "loss": 2.8549, "step": 1124 }, { "epoch": 0.02481400267956765, "grad_norm": 0.26171875, "learning_rate": 0.0004999465883345026, "loss": 2.854, "step": 1125 }, { "epoch": 0.024836059570838377, "grad_norm": 0.236328125, "learning_rate": 0.0004999463469347878, "loss": 2.8743, "step": 1126 }, { "epoch": 0.024858116462109104, "grad_norm": 0.2578125, "learning_rate": 0.0004999461049908448, "loss": 3.002, "step": 1127 }, { "epoch": 0.02488017335337983, "grad_norm": 0.2470703125, "learning_rate": 0.0004999458625026744, "loss": 2.8768, "step": 1128 }, { "epoch": 0.024902230244650558, "grad_norm": 0.251953125, "learning_rate": 0.0004999456194702771, "loss": 2.8821, "step": 1129 }, { "epoch": 0.024924287135921284, "grad_norm": 0.2412109375, "learning_rate": 0.0004999453758936532, "loss": 2.8429, "step": 1130 }, { "epoch": 0.02494634402719201, "grad_norm": 0.24609375, "learning_rate": 0.0004999451317728034, "loss": 2.8137, "step": 1131 }, { "epoch": 0.024968400918462738, "grad_norm": 0.2470703125, "learning_rate": 0.0004999448871077283, "loss": 2.8565, "step": 1132 }, { "epoch": 0.024990457809733465, "grad_norm": 0.251953125, "learning_rate": 0.0004999446418984283, "loss": 2.8355, "step": 1133 }, { "epoch": 0.025012514701004192, "grad_norm": 0.2470703125, "learning_rate": 0.000499944396144904, "loss": 2.8335, "step": 1134 }, { "epoch": 0.02503457159227492, "grad_norm": 0.259765625, "learning_rate": 0.0004999441498471557, "loss": 2.8215, "step": 1135 }, { "epoch": 0.025056628483545645, "grad_norm": 0.267578125, "learning_rate": 0.0004999439030051844, "loss": 2.8272, "step": 1136 }, { "epoch": 0.025078685374816372, "grad_norm": 0.25, "learning_rate": 0.0004999436556189903, "loss": 2.8517, "step": 1137 }, { "epoch": 0.0251007422660871, "grad_norm": 0.2392578125, "learning_rate": 0.000499943407688574, "loss": 2.8665, "step": 1138 }, { "epoch": 0.025122799157357826, "grad_norm": 0.275390625, "learning_rate": 0.0004999431592139361, "loss": 2.8951, "step": 1139 }, { "epoch": 0.025144856048628553, "grad_norm": 0.2734375, "learning_rate": 0.0004999429101950769, "loss": 2.8629, "step": 1140 }, { "epoch": 0.02516691293989928, "grad_norm": 0.275390625, "learning_rate": 0.0004999426606319974, "loss": 2.8571, "step": 1141 }, { "epoch": 0.025188969831170006, "grad_norm": 0.2578125, "learning_rate": 0.0004999424105246978, "loss": 2.8409, "step": 1142 }, { "epoch": 0.025211026722440733, "grad_norm": 0.2421875, "learning_rate": 0.0004999421598731787, "loss": 2.8293, "step": 1143 }, { "epoch": 0.02523308361371146, "grad_norm": 0.279296875, "learning_rate": 0.0004999419086774407, "loss": 2.9138, "step": 1144 }, { "epoch": 0.025255140504982187, "grad_norm": 0.263671875, "learning_rate": 0.0004999416569374844, "loss": 2.8361, "step": 1145 }, { "epoch": 0.025277197396252914, "grad_norm": 0.27734375, "learning_rate": 0.0004999414046533102, "loss": 2.8343, "step": 1146 }, { "epoch": 0.02529925428752364, "grad_norm": 0.251953125, "learning_rate": 0.0004999411518249187, "loss": 2.8226, "step": 1147 }, { "epoch": 0.025321311178794367, "grad_norm": 0.265625, "learning_rate": 0.0004999408984523105, "loss": 2.8729, "step": 1148 }, { "epoch": 0.025343368070065094, "grad_norm": 0.2578125, "learning_rate": 0.0004999406445354861, "loss": 2.8692, "step": 1149 }, { "epoch": 0.02536542496133582, "grad_norm": 0.248046875, "learning_rate": 0.0004999403900744461, "loss": 2.8671, "step": 1150 }, { "epoch": 0.025387481852606548, "grad_norm": 0.255859375, "learning_rate": 0.000499940135069191, "loss": 2.7901, "step": 1151 }, { "epoch": 0.025409538743877275, "grad_norm": 0.248046875, "learning_rate": 0.0004999398795197214, "loss": 2.8497, "step": 1152 }, { "epoch": 0.025431595635148, "grad_norm": 0.251953125, "learning_rate": 0.0004999396234260379, "loss": 2.8648, "step": 1153 }, { "epoch": 0.02545365252641873, "grad_norm": 0.2490234375, "learning_rate": 0.0004999393667881409, "loss": 2.8193, "step": 1154 }, { "epoch": 0.025475709417689455, "grad_norm": 0.2734375, "learning_rate": 0.0004999391096060312, "loss": 2.839, "step": 1155 }, { "epoch": 0.025497766308960182, "grad_norm": 0.25, "learning_rate": 0.0004999388518797091, "loss": 2.749, "step": 1156 }, { "epoch": 0.02551982320023091, "grad_norm": 0.251953125, "learning_rate": 0.0004999385936091753, "loss": 2.7958, "step": 1157 }, { "epoch": 0.025541880091501636, "grad_norm": 0.244140625, "learning_rate": 0.0004999383347944304, "loss": 2.8183, "step": 1158 }, { "epoch": 0.025563936982772362, "grad_norm": 0.25, "learning_rate": 0.0004999380754354748, "loss": 2.8317, "step": 1159 }, { "epoch": 0.02558599387404309, "grad_norm": 0.26171875, "learning_rate": 0.0004999378155323093, "loss": 2.9496, "step": 1160 }, { "epoch": 0.025608050765313816, "grad_norm": 0.2470703125, "learning_rate": 0.0004999375550849342, "loss": 2.9073, "step": 1161 }, { "epoch": 0.025630107656584543, "grad_norm": 0.2578125, "learning_rate": 0.0004999372940933504, "loss": 2.8886, "step": 1162 }, { "epoch": 0.02565216454785527, "grad_norm": 0.251953125, "learning_rate": 0.0004999370325575581, "loss": 2.8138, "step": 1163 }, { "epoch": 0.025674221439125997, "grad_norm": 0.2451171875, "learning_rate": 0.000499936770477558, "loss": 2.8538, "step": 1164 }, { "epoch": 0.025696278330396723, "grad_norm": 0.251953125, "learning_rate": 0.0004999365078533508, "loss": 2.9552, "step": 1165 }, { "epoch": 0.02571833522166745, "grad_norm": 0.2470703125, "learning_rate": 0.000499936244684937, "loss": 2.831, "step": 1166 }, { "epoch": 0.025740392112938177, "grad_norm": 0.2431640625, "learning_rate": 0.0004999359809723172, "loss": 2.8951, "step": 1167 }, { "epoch": 0.025762449004208904, "grad_norm": 0.248046875, "learning_rate": 0.0004999357167154918, "loss": 2.9344, "step": 1168 }, { "epoch": 0.02578450589547963, "grad_norm": 0.25390625, "learning_rate": 0.0004999354519144617, "loss": 2.7618, "step": 1169 }, { "epoch": 0.025806562786750357, "grad_norm": 0.244140625, "learning_rate": 0.000499935186569227, "loss": 2.8372, "step": 1170 }, { "epoch": 0.025828619678021084, "grad_norm": 0.244140625, "learning_rate": 0.0004999349206797889, "loss": 2.8206, "step": 1171 }, { "epoch": 0.02585067656929181, "grad_norm": 0.2373046875, "learning_rate": 0.0004999346542461473, "loss": 2.8633, "step": 1172 }, { "epoch": 0.025872733460562538, "grad_norm": 0.267578125, "learning_rate": 0.0004999343872683034, "loss": 2.8247, "step": 1173 }, { "epoch": 0.025894790351833265, "grad_norm": 0.265625, "learning_rate": 0.0004999341197462573, "loss": 2.8168, "step": 1174 }, { "epoch": 0.02591684724310399, "grad_norm": 0.255859375, "learning_rate": 0.00049993385168001, "loss": 2.8485, "step": 1175 }, { "epoch": 0.02593890413437472, "grad_norm": 0.251953125, "learning_rate": 0.0004999335830695616, "loss": 2.8671, "step": 1176 }, { "epoch": 0.025960961025645445, "grad_norm": 0.2578125, "learning_rate": 0.0004999333139149131, "loss": 2.8506, "step": 1177 }, { "epoch": 0.025983017916916172, "grad_norm": 0.251953125, "learning_rate": 0.000499933044216065, "loss": 2.8273, "step": 1178 }, { "epoch": 0.0260050748081869, "grad_norm": 0.2431640625, "learning_rate": 0.0004999327739730178, "loss": 2.7891, "step": 1179 }, { "epoch": 0.026027131699457626, "grad_norm": 0.251953125, "learning_rate": 0.000499932503185772, "loss": 2.8278, "step": 1180 }, { "epoch": 0.026049188590728353, "grad_norm": 0.25390625, "learning_rate": 0.0004999322318543283, "loss": 2.8015, "step": 1181 }, { "epoch": 0.02607124548199908, "grad_norm": 0.24609375, "learning_rate": 0.0004999319599786874, "loss": 2.7552, "step": 1182 }, { "epoch": 0.026093302373269806, "grad_norm": 0.2451171875, "learning_rate": 0.0004999316875588498, "loss": 2.868, "step": 1183 }, { "epoch": 0.026115359264540533, "grad_norm": 0.263671875, "learning_rate": 0.000499931414594816, "loss": 2.8673, "step": 1184 }, { "epoch": 0.02613741615581126, "grad_norm": 0.2451171875, "learning_rate": 0.0004999311410865866, "loss": 2.8676, "step": 1185 }, { "epoch": 0.026159473047081987, "grad_norm": 0.24609375, "learning_rate": 0.0004999308670341624, "loss": 2.872, "step": 1186 }, { "epoch": 0.026181529938352713, "grad_norm": 0.2451171875, "learning_rate": 0.0004999305924375439, "loss": 2.7188, "step": 1187 }, { "epoch": 0.02620358682962344, "grad_norm": 0.2392578125, "learning_rate": 0.0004999303172967315, "loss": 2.7951, "step": 1188 }, { "epoch": 0.026225643720894167, "grad_norm": 0.25, "learning_rate": 0.0004999300416117262, "loss": 2.8476, "step": 1189 }, { "epoch": 0.026247700612164894, "grad_norm": 0.2490234375, "learning_rate": 0.0004999297653825282, "loss": 2.8811, "step": 1190 }, { "epoch": 0.02626975750343562, "grad_norm": 0.25, "learning_rate": 0.0004999294886091383, "loss": 2.8548, "step": 1191 }, { "epoch": 0.026291814394706348, "grad_norm": 0.236328125, "learning_rate": 0.000499929211291557, "loss": 2.8203, "step": 1192 }, { "epoch": 0.026313871285977074, "grad_norm": 0.2412109375, "learning_rate": 0.000499928933429785, "loss": 2.7321, "step": 1193 }, { "epoch": 0.0263359281772478, "grad_norm": 0.244140625, "learning_rate": 0.000499928655023823, "loss": 2.8286, "step": 1194 }, { "epoch": 0.026357985068518528, "grad_norm": 0.25, "learning_rate": 0.0004999283760736714, "loss": 2.9079, "step": 1195 }, { "epoch": 0.026380041959789255, "grad_norm": 0.236328125, "learning_rate": 0.0004999280965793309, "loss": 2.8194, "step": 1196 }, { "epoch": 0.026402098851059982, "grad_norm": 0.25390625, "learning_rate": 0.000499927816540802, "loss": 2.8337, "step": 1197 }, { "epoch": 0.02642415574233071, "grad_norm": 0.2373046875, "learning_rate": 0.0004999275359580855, "loss": 2.7966, "step": 1198 }, { "epoch": 0.026446212633601435, "grad_norm": 0.255859375, "learning_rate": 0.0004999272548311819, "loss": 2.8448, "step": 1199 }, { "epoch": 0.026468269524872162, "grad_norm": 0.248046875, "learning_rate": 0.0004999269731600918, "loss": 2.8357, "step": 1200 }, { "epoch": 0.026490326416142886, "grad_norm": 0.2578125, "learning_rate": 0.0004999266909448159, "loss": 2.8357, "step": 1201 }, { "epoch": 0.026512383307413612, "grad_norm": 0.267578125, "learning_rate": 0.0004999264081853548, "loss": 2.8981, "step": 1202 }, { "epoch": 0.02653444019868434, "grad_norm": 0.2451171875, "learning_rate": 0.000499926124881709, "loss": 2.9312, "step": 1203 }, { "epoch": 0.026556497089955066, "grad_norm": 0.2490234375, "learning_rate": 0.0004999258410338792, "loss": 2.8172, "step": 1204 }, { "epoch": 0.026578553981225793, "grad_norm": 0.251953125, "learning_rate": 0.000499925556641866, "loss": 2.8974, "step": 1205 }, { "epoch": 0.02660061087249652, "grad_norm": 0.259765625, "learning_rate": 0.00049992527170567, "loss": 2.8611, "step": 1206 }, { "epoch": 0.026622667763767247, "grad_norm": 0.2578125, "learning_rate": 0.000499924986225292, "loss": 2.7734, "step": 1207 }, { "epoch": 0.026644724655037973, "grad_norm": 0.2451171875, "learning_rate": 0.0004999247002007324, "loss": 2.8329, "step": 1208 }, { "epoch": 0.0266667815463087, "grad_norm": 0.25, "learning_rate": 0.0004999244136319918, "loss": 2.7636, "step": 1209 }, { "epoch": 0.026688838437579427, "grad_norm": 0.24609375, "learning_rate": 0.0004999241265190709, "loss": 2.8127, "step": 1210 }, { "epoch": 0.026710895328850154, "grad_norm": 0.244140625, "learning_rate": 0.0004999238388619704, "loss": 2.831, "step": 1211 }, { "epoch": 0.02673295222012088, "grad_norm": 0.240234375, "learning_rate": 0.000499923550660691, "loss": 2.7763, "step": 1212 }, { "epoch": 0.026755009111391607, "grad_norm": 0.244140625, "learning_rate": 0.0004999232619152331, "loss": 2.8469, "step": 1213 }, { "epoch": 0.026777066002662334, "grad_norm": 0.2431640625, "learning_rate": 0.0004999229726255973, "loss": 2.8158, "step": 1214 }, { "epoch": 0.02679912289393306, "grad_norm": 0.236328125, "learning_rate": 0.0004999226827917846, "loss": 2.741, "step": 1215 }, { "epoch": 0.026821179785203788, "grad_norm": 0.240234375, "learning_rate": 0.0004999223924137952, "loss": 2.8455, "step": 1216 }, { "epoch": 0.026843236676474515, "grad_norm": 0.236328125, "learning_rate": 0.00049992210149163, "loss": 2.8289, "step": 1217 }, { "epoch": 0.02686529356774524, "grad_norm": 0.2421875, "learning_rate": 0.0004999218100252894, "loss": 2.8506, "step": 1218 }, { "epoch": 0.02688735045901597, "grad_norm": 0.240234375, "learning_rate": 0.0004999215180147744, "loss": 2.6954, "step": 1219 }, { "epoch": 0.026909407350286695, "grad_norm": 0.2333984375, "learning_rate": 0.0004999212254600853, "loss": 2.7963, "step": 1220 }, { "epoch": 0.026931464241557422, "grad_norm": 0.232421875, "learning_rate": 0.0004999209323612229, "loss": 2.865, "step": 1221 }, { "epoch": 0.02695352113282815, "grad_norm": 0.23828125, "learning_rate": 0.0004999206387181879, "loss": 2.9032, "step": 1222 }, { "epoch": 0.026975578024098876, "grad_norm": 0.2353515625, "learning_rate": 0.0004999203445309807, "loss": 2.9007, "step": 1223 }, { "epoch": 0.026997634915369603, "grad_norm": 0.24609375, "learning_rate": 0.000499920049799602, "loss": 2.8158, "step": 1224 }, { "epoch": 0.02701969180664033, "grad_norm": 0.23828125, "learning_rate": 0.0004999197545240526, "loss": 2.7516, "step": 1225 }, { "epoch": 0.027041748697911056, "grad_norm": 0.2412109375, "learning_rate": 0.0004999194587043332, "loss": 2.8306, "step": 1226 }, { "epoch": 0.027063805589181783, "grad_norm": 0.25390625, "learning_rate": 0.0004999191623404442, "loss": 2.9281, "step": 1227 }, { "epoch": 0.02708586248045251, "grad_norm": 0.2353515625, "learning_rate": 0.0004999188654323863, "loss": 2.7734, "step": 1228 }, { "epoch": 0.027107919371723237, "grad_norm": 0.25390625, "learning_rate": 0.0004999185679801602, "loss": 2.8661, "step": 1229 }, { "epoch": 0.027129976262993964, "grad_norm": 0.2392578125, "learning_rate": 0.0004999182699837667, "loss": 2.9116, "step": 1230 }, { "epoch": 0.02715203315426469, "grad_norm": 0.25390625, "learning_rate": 0.0004999179714432062, "loss": 2.8285, "step": 1231 }, { "epoch": 0.027174090045535417, "grad_norm": 0.2353515625, "learning_rate": 0.0004999176723584793, "loss": 2.8005, "step": 1232 }, { "epoch": 0.027196146936806144, "grad_norm": 0.25, "learning_rate": 0.000499917372729587, "loss": 2.7654, "step": 1233 }, { "epoch": 0.02721820382807687, "grad_norm": 0.2470703125, "learning_rate": 0.0004999170725565296, "loss": 2.8457, "step": 1234 }, { "epoch": 0.027240260719347598, "grad_norm": 0.2353515625, "learning_rate": 0.0004999167718393081, "loss": 2.8171, "step": 1235 }, { "epoch": 0.027262317610618324, "grad_norm": 0.2314453125, "learning_rate": 0.0004999164705779228, "loss": 2.7674, "step": 1236 }, { "epoch": 0.02728437450188905, "grad_norm": 0.25390625, "learning_rate": 0.0004999161687723745, "loss": 2.8801, "step": 1237 }, { "epoch": 0.027306431393159778, "grad_norm": 0.2412109375, "learning_rate": 0.000499915866422664, "loss": 2.7881, "step": 1238 }, { "epoch": 0.027328488284430505, "grad_norm": 0.2353515625, "learning_rate": 0.0004999155635287916, "loss": 2.7755, "step": 1239 }, { "epoch": 0.027350545175701232, "grad_norm": 0.2373046875, "learning_rate": 0.0004999152600907584, "loss": 2.8352, "step": 1240 }, { "epoch": 0.02737260206697196, "grad_norm": 0.2470703125, "learning_rate": 0.0004999149561085648, "loss": 2.814, "step": 1241 }, { "epoch": 0.027394658958242685, "grad_norm": 0.23828125, "learning_rate": 0.0004999146515822114, "loss": 2.8713, "step": 1242 }, { "epoch": 0.027416715849513412, "grad_norm": 0.23046875, "learning_rate": 0.0004999143465116991, "loss": 2.777, "step": 1243 }, { "epoch": 0.02743877274078414, "grad_norm": 0.2470703125, "learning_rate": 0.0004999140408970285, "loss": 2.7184, "step": 1244 }, { "epoch": 0.027460829632054866, "grad_norm": 0.240234375, "learning_rate": 0.0004999137347382, "loss": 2.9372, "step": 1245 }, { "epoch": 0.027482886523325593, "grad_norm": 0.25, "learning_rate": 0.0004999134280352145, "loss": 2.7858, "step": 1246 }, { "epoch": 0.02750494341459632, "grad_norm": 0.24609375, "learning_rate": 0.0004999131207880728, "loss": 2.813, "step": 1247 }, { "epoch": 0.027527000305867046, "grad_norm": 0.23828125, "learning_rate": 0.0004999128129967753, "loss": 2.8103, "step": 1248 }, { "epoch": 0.027549057197137773, "grad_norm": 0.228515625, "learning_rate": 0.0004999125046613227, "loss": 2.7747, "step": 1249 }, { "epoch": 0.0275711140884085, "grad_norm": 0.2431640625, "learning_rate": 0.000499912195781716, "loss": 2.8168, "step": 1250 }, { "epoch": 0.027593170979679227, "grad_norm": 0.2470703125, "learning_rate": 0.0004999118863579554, "loss": 2.8008, "step": 1251 }, { "epoch": 0.027615227870949954, "grad_norm": 0.2451171875, "learning_rate": 0.0004999115763900418, "loss": 2.8458, "step": 1252 }, { "epoch": 0.02763728476222068, "grad_norm": 0.2431640625, "learning_rate": 0.000499911265877976, "loss": 2.8471, "step": 1253 }, { "epoch": 0.027659341653491407, "grad_norm": 0.25390625, "learning_rate": 0.0004999109548217584, "loss": 2.8736, "step": 1254 }, { "epoch": 0.027681398544762134, "grad_norm": 0.248046875, "learning_rate": 0.0004999106432213899, "loss": 2.7748, "step": 1255 }, { "epoch": 0.02770345543603286, "grad_norm": 0.2412109375, "learning_rate": 0.0004999103310768711, "loss": 2.8104, "step": 1256 }, { "epoch": 0.027725512327303588, "grad_norm": 0.2490234375, "learning_rate": 0.0004999100183882027, "loss": 2.8866, "step": 1257 }, { "epoch": 0.027747569218574315, "grad_norm": 0.25, "learning_rate": 0.0004999097051553854, "loss": 2.7941, "step": 1258 }, { "epoch": 0.02776962610984504, "grad_norm": 0.2490234375, "learning_rate": 0.0004999093913784197, "loss": 2.7731, "step": 1259 }, { "epoch": 0.027791683001115768, "grad_norm": 0.25390625, "learning_rate": 0.0004999090770573065, "loss": 2.6952, "step": 1260 }, { "epoch": 0.027813739892386495, "grad_norm": 0.2451171875, "learning_rate": 0.0004999087621920464, "loss": 2.7449, "step": 1261 }, { "epoch": 0.027835796783657222, "grad_norm": 0.2470703125, "learning_rate": 0.0004999084467826402, "loss": 2.7608, "step": 1262 }, { "epoch": 0.02785785367492795, "grad_norm": 0.2431640625, "learning_rate": 0.0004999081308290884, "loss": 2.7735, "step": 1263 }, { "epoch": 0.027879910566198676, "grad_norm": 0.234375, "learning_rate": 0.0004999078143313918, "loss": 2.7609, "step": 1264 }, { "epoch": 0.027901967457469402, "grad_norm": 0.2431640625, "learning_rate": 0.000499907497289551, "loss": 2.8279, "step": 1265 }, { "epoch": 0.02792402434874013, "grad_norm": 0.236328125, "learning_rate": 0.0004999071797035667, "loss": 2.7366, "step": 1266 }, { "epoch": 0.027946081240010856, "grad_norm": 0.255859375, "learning_rate": 0.0004999068615734398, "loss": 2.8425, "step": 1267 }, { "epoch": 0.027968138131281583, "grad_norm": 0.228515625, "learning_rate": 0.0004999065428991707, "loss": 2.8264, "step": 1268 }, { "epoch": 0.02799019502255231, "grad_norm": 0.25390625, "learning_rate": 0.0004999062236807604, "loss": 2.7883, "step": 1269 }, { "epoch": 0.028012251913823037, "grad_norm": 0.244140625, "learning_rate": 0.0004999059039182092, "loss": 2.8426, "step": 1270 }, { "epoch": 0.028034308805093763, "grad_norm": 0.25, "learning_rate": 0.0004999055836115182, "loss": 2.8184, "step": 1271 }, { "epoch": 0.02805636569636449, "grad_norm": 0.234375, "learning_rate": 0.0004999052627606878, "loss": 2.8857, "step": 1272 }, { "epoch": 0.028078422587635217, "grad_norm": 0.2412109375, "learning_rate": 0.0004999049413657189, "loss": 2.7923, "step": 1273 }, { "epoch": 0.028100479478905944, "grad_norm": 0.2451171875, "learning_rate": 0.000499904619426612, "loss": 2.7743, "step": 1274 }, { "epoch": 0.02812253637017667, "grad_norm": 0.236328125, "learning_rate": 0.000499904296943368, "loss": 2.7927, "step": 1275 }, { "epoch": 0.028144593261447397, "grad_norm": 0.2431640625, "learning_rate": 0.0004999039739159876, "loss": 2.7499, "step": 1276 }, { "epoch": 0.028166650152718124, "grad_norm": 0.2333984375, "learning_rate": 0.0004999036503444713, "loss": 2.7627, "step": 1277 }, { "epoch": 0.02818870704398885, "grad_norm": 0.2373046875, "learning_rate": 0.00049990332622882, "loss": 2.8393, "step": 1278 }, { "epoch": 0.028210763935259578, "grad_norm": 0.251953125, "learning_rate": 0.0004999030015690342, "loss": 2.8298, "step": 1279 }, { "epoch": 0.028232820826530305, "grad_norm": 0.2412109375, "learning_rate": 0.0004999026763651148, "loss": 2.7484, "step": 1280 }, { "epoch": 0.02825487771780103, "grad_norm": 0.236328125, "learning_rate": 0.0004999023506170626, "loss": 2.9092, "step": 1281 }, { "epoch": 0.02827693460907176, "grad_norm": 0.2470703125, "learning_rate": 0.000499902024324878, "loss": 2.744, "step": 1282 }, { "epoch": 0.028298991500342485, "grad_norm": 0.240234375, "learning_rate": 0.000499901697488562, "loss": 2.8377, "step": 1283 }, { "epoch": 0.028321048391613212, "grad_norm": 0.2373046875, "learning_rate": 0.000499901370108115, "loss": 2.8607, "step": 1284 }, { "epoch": 0.02834310528288394, "grad_norm": 0.2373046875, "learning_rate": 0.0004999010421835381, "loss": 2.8026, "step": 1285 }, { "epoch": 0.028365162174154666, "grad_norm": 0.236328125, "learning_rate": 0.0004999007137148315, "loss": 2.7943, "step": 1286 }, { "epoch": 0.028387219065425393, "grad_norm": 0.236328125, "learning_rate": 0.0004999003847019964, "loss": 2.7362, "step": 1287 }, { "epoch": 0.02840927595669612, "grad_norm": 0.2373046875, "learning_rate": 0.0004999000551450335, "loss": 2.7164, "step": 1288 }, { "epoch": 0.028431332847966846, "grad_norm": 0.2451171875, "learning_rate": 0.0004998997250439432, "loss": 2.7961, "step": 1289 }, { "epoch": 0.028453389739237573, "grad_norm": 0.2333984375, "learning_rate": 0.0004998993943987263, "loss": 2.7966, "step": 1290 }, { "epoch": 0.0284754466305083, "grad_norm": 0.23828125, "learning_rate": 0.0004998990632093837, "loss": 2.8247, "step": 1291 }, { "epoch": 0.028497503521779027, "grad_norm": 0.2490234375, "learning_rate": 0.000499898731475916, "loss": 2.7787, "step": 1292 }, { "epoch": 0.028519560413049753, "grad_norm": 0.25, "learning_rate": 0.000499898399198324, "loss": 2.8375, "step": 1293 }, { "epoch": 0.02854161730432048, "grad_norm": 0.2392578125, "learning_rate": 0.0004998980663766082, "loss": 2.7633, "step": 1294 }, { "epoch": 0.028563674195591207, "grad_norm": 0.2470703125, "learning_rate": 0.0004998977330107696, "loss": 2.7586, "step": 1295 }, { "epoch": 0.028585731086861934, "grad_norm": 0.251953125, "learning_rate": 0.0004998973991008087, "loss": 2.71, "step": 1296 }, { "epoch": 0.02860778797813266, "grad_norm": 0.240234375, "learning_rate": 0.0004998970646467266, "loss": 2.7717, "step": 1297 }, { "epoch": 0.028629844869403388, "grad_norm": 0.263671875, "learning_rate": 0.0004998967296485235, "loss": 2.8029, "step": 1298 }, { "epoch": 0.028651901760674114, "grad_norm": 0.25, "learning_rate": 0.0004998963941062005, "loss": 2.8586, "step": 1299 }, { "epoch": 0.02867395865194484, "grad_norm": 0.2333984375, "learning_rate": 0.0004998960580197583, "loss": 2.8105, "step": 1300 }, { "epoch": 0.028696015543215568, "grad_norm": 0.240234375, "learning_rate": 0.0004998957213891974, "loss": 2.6743, "step": 1301 }, { "epoch": 0.028718072434486295, "grad_norm": 0.24609375, "learning_rate": 0.0004998953842145187, "loss": 2.8483, "step": 1302 }, { "epoch": 0.02874012932575702, "grad_norm": 0.2275390625, "learning_rate": 0.000499895046495723, "loss": 2.6921, "step": 1303 }, { "epoch": 0.02876218621702775, "grad_norm": 0.2451171875, "learning_rate": 0.000499894708232811, "loss": 2.7492, "step": 1304 }, { "epoch": 0.028784243108298475, "grad_norm": 0.234375, "learning_rate": 0.0004998943694257833, "loss": 2.8133, "step": 1305 }, { "epoch": 0.028806299999569202, "grad_norm": 0.232421875, "learning_rate": 0.0004998940300746409, "loss": 2.7554, "step": 1306 }, { "epoch": 0.02882835689083993, "grad_norm": 0.240234375, "learning_rate": 0.0004998936901793841, "loss": 2.8947, "step": 1307 }, { "epoch": 0.028850413782110656, "grad_norm": 0.2294921875, "learning_rate": 0.0004998933497400141, "loss": 2.7799, "step": 1308 }, { "epoch": 0.028872470673381383, "grad_norm": 0.234375, "learning_rate": 0.0004998930087565315, "loss": 2.7344, "step": 1309 }, { "epoch": 0.02889452756465211, "grad_norm": 0.2294921875, "learning_rate": 0.0004998926672289369, "loss": 2.816, "step": 1310 }, { "epoch": 0.028916584455922836, "grad_norm": 0.23828125, "learning_rate": 0.0004998923251572312, "loss": 2.809, "step": 1311 }, { "epoch": 0.028938641347193563, "grad_norm": 0.2392578125, "learning_rate": 0.000499891982541415, "loss": 2.8869, "step": 1312 }, { "epoch": 0.02896069823846429, "grad_norm": 0.2314453125, "learning_rate": 0.0004998916393814892, "loss": 2.7406, "step": 1313 }, { "epoch": 0.028982755129735017, "grad_norm": 0.23828125, "learning_rate": 0.0004998912956774544, "loss": 2.6625, "step": 1314 }, { "epoch": 0.029004812021005744, "grad_norm": 0.2412109375, "learning_rate": 0.0004998909514293115, "loss": 2.8334, "step": 1315 }, { "epoch": 0.02902686891227647, "grad_norm": 0.23828125, "learning_rate": 0.0004998906066370612, "loss": 2.7996, "step": 1316 }, { "epoch": 0.029048925803547197, "grad_norm": 0.2294921875, "learning_rate": 0.0004998902613007042, "loss": 2.7979, "step": 1317 }, { "epoch": 0.029070982694817924, "grad_norm": 0.232421875, "learning_rate": 0.0004998899154202413, "loss": 2.7403, "step": 1318 }, { "epoch": 0.02909303958608865, "grad_norm": 0.2275390625, "learning_rate": 0.0004998895689956732, "loss": 2.701, "step": 1319 }, { "epoch": 0.029115096477359378, "grad_norm": 0.228515625, "learning_rate": 0.0004998892220270007, "loss": 2.8276, "step": 1320 }, { "epoch": 0.029137153368630105, "grad_norm": 0.236328125, "learning_rate": 0.0004998888745142246, "loss": 2.8539, "step": 1321 }, { "epoch": 0.02915921025990083, "grad_norm": 0.2392578125, "learning_rate": 0.0004998885264573455, "loss": 2.7745, "step": 1322 }, { "epoch": 0.029181267151171558, "grad_norm": 0.232421875, "learning_rate": 0.0004998881778563643, "loss": 2.7332, "step": 1323 }, { "epoch": 0.029203324042442285, "grad_norm": 0.23828125, "learning_rate": 0.0004998878287112816, "loss": 2.7386, "step": 1324 }, { "epoch": 0.029225380933713012, "grad_norm": 0.2353515625, "learning_rate": 0.0004998874790220985, "loss": 2.7895, "step": 1325 }, { "epoch": 0.02924743782498374, "grad_norm": 0.232421875, "learning_rate": 0.0004998871287888154, "loss": 2.7655, "step": 1326 }, { "epoch": 0.029269494716254466, "grad_norm": 0.224609375, "learning_rate": 0.0004998867780114332, "loss": 2.7018, "step": 1327 }, { "epoch": 0.029291551607525192, "grad_norm": 0.2255859375, "learning_rate": 0.0004998864266899527, "loss": 2.8048, "step": 1328 }, { "epoch": 0.02931360849879592, "grad_norm": 0.2333984375, "learning_rate": 0.0004998860748243746, "loss": 2.8489, "step": 1329 }, { "epoch": 0.029335665390066646, "grad_norm": 0.228515625, "learning_rate": 0.0004998857224146997, "loss": 2.8371, "step": 1330 }, { "epoch": 0.029357722281337373, "grad_norm": 0.2373046875, "learning_rate": 0.0004998853694609289, "loss": 2.8293, "step": 1331 }, { "epoch": 0.0293797791726081, "grad_norm": 0.2392578125, "learning_rate": 0.0004998850159630627, "loss": 2.8064, "step": 1332 }, { "epoch": 0.029401836063878826, "grad_norm": 0.2236328125, "learning_rate": 0.000499884661921102, "loss": 2.7887, "step": 1333 }, { "epoch": 0.029423892955149553, "grad_norm": 0.24609375, "learning_rate": 0.0004998843073350476, "loss": 2.7175, "step": 1334 }, { "epoch": 0.02944594984642028, "grad_norm": 0.2314453125, "learning_rate": 0.0004998839522049002, "loss": 2.7557, "step": 1335 }, { "epoch": 0.029468006737691007, "grad_norm": 0.2412109375, "learning_rate": 0.0004998835965306607, "loss": 2.7455, "step": 1336 }, { "epoch": 0.029490063628961734, "grad_norm": 0.2578125, "learning_rate": 0.0004998832403123297, "loss": 2.8225, "step": 1337 }, { "epoch": 0.02951212052023246, "grad_norm": 0.244140625, "learning_rate": 0.0004998828835499081, "loss": 2.7455, "step": 1338 }, { "epoch": 0.029534177411503187, "grad_norm": 0.23046875, "learning_rate": 0.0004998825262433967, "loss": 2.8004, "step": 1339 }, { "epoch": 0.029556234302773914, "grad_norm": 0.2294921875, "learning_rate": 0.0004998821683927962, "loss": 2.6937, "step": 1340 }, { "epoch": 0.02957829119404464, "grad_norm": 0.23046875, "learning_rate": 0.0004998818099981074, "loss": 2.7825, "step": 1341 }, { "epoch": 0.029600348085315364, "grad_norm": 0.244140625, "learning_rate": 0.000499881451059331, "loss": 2.7487, "step": 1342 }, { "epoch": 0.02962240497658609, "grad_norm": 0.236328125, "learning_rate": 0.000499881091576468, "loss": 2.794, "step": 1343 }, { "epoch": 0.029644461867856818, "grad_norm": 0.2333984375, "learning_rate": 0.000499880731549519, "loss": 2.78, "step": 1344 }, { "epoch": 0.029666518759127545, "grad_norm": 0.2490234375, "learning_rate": 0.0004998803709784847, "loss": 2.8044, "step": 1345 }, { "epoch": 0.02968857565039827, "grad_norm": 0.2353515625, "learning_rate": 0.0004998800098633662, "loss": 2.8044, "step": 1346 }, { "epoch": 0.029710632541669, "grad_norm": 0.234375, "learning_rate": 0.000499879648204164, "loss": 2.8047, "step": 1347 }, { "epoch": 0.029732689432939725, "grad_norm": 0.2333984375, "learning_rate": 0.000499879286000879, "loss": 2.8211, "step": 1348 }, { "epoch": 0.029754746324210452, "grad_norm": 0.24609375, "learning_rate": 0.0004998789232535119, "loss": 2.7813, "step": 1349 }, { "epoch": 0.02977680321548118, "grad_norm": 0.2333984375, "learning_rate": 0.0004998785599620637, "loss": 2.7482, "step": 1350 }, { "epoch": 0.029798860106751906, "grad_norm": 0.2490234375, "learning_rate": 0.000499878196126535, "loss": 2.8252, "step": 1351 }, { "epoch": 0.029820916998022633, "grad_norm": 0.2314453125, "learning_rate": 0.0004998778317469267, "loss": 2.7632, "step": 1352 }, { "epoch": 0.02984297388929336, "grad_norm": 0.232421875, "learning_rate": 0.0004998774668232395, "loss": 2.8184, "step": 1353 }, { "epoch": 0.029865030780564086, "grad_norm": 0.2275390625, "learning_rate": 0.0004998771013554743, "loss": 2.7328, "step": 1354 }, { "epoch": 0.029887087671834813, "grad_norm": 0.236328125, "learning_rate": 0.0004998767353436317, "loss": 2.6894, "step": 1355 }, { "epoch": 0.02990914456310554, "grad_norm": 0.2314453125, "learning_rate": 0.0004998763687877127, "loss": 2.8165, "step": 1356 }, { "epoch": 0.029931201454376267, "grad_norm": 0.2314453125, "learning_rate": 0.0004998760016877181, "loss": 2.8063, "step": 1357 }, { "epoch": 0.029953258345646994, "grad_norm": 0.2294921875, "learning_rate": 0.0004998756340436485, "loss": 2.8669, "step": 1358 }, { "epoch": 0.02997531523691772, "grad_norm": 0.2314453125, "learning_rate": 0.0004998752658555049, "loss": 2.6835, "step": 1359 }, { "epoch": 0.029997372128188447, "grad_norm": 0.2412109375, "learning_rate": 0.0004998748971232881, "loss": 2.7858, "step": 1360 }, { "epoch": 0.030019429019459174, "grad_norm": 0.23828125, "learning_rate": 0.0004998745278469987, "loss": 2.8123, "step": 1361 }, { "epoch": 0.0300414859107299, "grad_norm": 0.2314453125, "learning_rate": 0.0004998741580266378, "loss": 2.7758, "step": 1362 }, { "epoch": 0.030063542802000628, "grad_norm": 0.2451171875, "learning_rate": 0.0004998737876622058, "loss": 2.8038, "step": 1363 }, { "epoch": 0.030085599693271355, "grad_norm": 0.2373046875, "learning_rate": 0.000499873416753704, "loss": 2.8563, "step": 1364 }, { "epoch": 0.03010765658454208, "grad_norm": 0.2392578125, "learning_rate": 0.0004998730453011328, "loss": 2.719, "step": 1365 }, { "epoch": 0.030129713475812808, "grad_norm": 0.244140625, "learning_rate": 0.0004998726733044933, "loss": 2.8807, "step": 1366 }, { "epoch": 0.030151770367083535, "grad_norm": 0.2333984375, "learning_rate": 0.000499872300763786, "loss": 2.7361, "step": 1367 }, { "epoch": 0.030173827258354262, "grad_norm": 0.2314453125, "learning_rate": 0.000499871927679012, "loss": 2.8272, "step": 1368 }, { "epoch": 0.03019588414962499, "grad_norm": 0.2333984375, "learning_rate": 0.000499871554050172, "loss": 2.8141, "step": 1369 }, { "epoch": 0.030217941040895716, "grad_norm": 0.2412109375, "learning_rate": 0.0004998711798772668, "loss": 2.8084, "step": 1370 }, { "epoch": 0.030239997932166442, "grad_norm": 0.2314453125, "learning_rate": 0.0004998708051602972, "loss": 2.8176, "step": 1371 }, { "epoch": 0.03026205482343717, "grad_norm": 0.2314453125, "learning_rate": 0.000499870429899264, "loss": 2.7681, "step": 1372 }, { "epoch": 0.030284111714707896, "grad_norm": 0.23046875, "learning_rate": 0.0004998700540941682, "loss": 2.8583, "step": 1373 }, { "epoch": 0.030306168605978623, "grad_norm": 0.2373046875, "learning_rate": 0.0004998696777450104, "loss": 2.824, "step": 1374 }, { "epoch": 0.03032822549724935, "grad_norm": 0.23828125, "learning_rate": 0.0004998693008517915, "loss": 2.692, "step": 1375 }, { "epoch": 0.030350282388520076, "grad_norm": 0.236328125, "learning_rate": 0.0004998689234145123, "loss": 2.7649, "step": 1376 }, { "epoch": 0.030372339279790803, "grad_norm": 0.232421875, "learning_rate": 0.0004998685454331737, "loss": 2.7601, "step": 1377 }, { "epoch": 0.03039439617106153, "grad_norm": 0.2470703125, "learning_rate": 0.0004998681669077763, "loss": 2.8413, "step": 1378 }, { "epoch": 0.030416453062332257, "grad_norm": 0.2333984375, "learning_rate": 0.0004998677878383214, "loss": 2.8193, "step": 1379 }, { "epoch": 0.030438509953602984, "grad_norm": 0.234375, "learning_rate": 0.0004998674082248093, "loss": 2.7581, "step": 1380 }, { "epoch": 0.03046056684487371, "grad_norm": 0.234375, "learning_rate": 0.000499867028067241, "loss": 2.797, "step": 1381 }, { "epoch": 0.030482623736144437, "grad_norm": 0.2392578125, "learning_rate": 0.0004998666473656175, "loss": 2.804, "step": 1382 }, { "epoch": 0.030504680627415164, "grad_norm": 0.234375, "learning_rate": 0.0004998662661199393, "loss": 2.8005, "step": 1383 }, { "epoch": 0.03052673751868589, "grad_norm": 0.234375, "learning_rate": 0.0004998658843302076, "loss": 2.8526, "step": 1384 }, { "epoch": 0.030548794409956618, "grad_norm": 0.2333984375, "learning_rate": 0.000499865501996423, "loss": 2.7638, "step": 1385 }, { "epoch": 0.030570851301227345, "grad_norm": 0.2373046875, "learning_rate": 0.0004998651191185865, "loss": 2.7909, "step": 1386 }, { "epoch": 0.03059290819249807, "grad_norm": 0.2392578125, "learning_rate": 0.0004998647356966987, "loss": 2.7627, "step": 1387 }, { "epoch": 0.0306149650837688, "grad_norm": 0.23828125, "learning_rate": 0.0004998643517307605, "loss": 2.7678, "step": 1388 }, { "epoch": 0.030637021975039525, "grad_norm": 0.2265625, "learning_rate": 0.000499863967220773, "loss": 2.7567, "step": 1389 }, { "epoch": 0.030659078866310252, "grad_norm": 0.23046875, "learning_rate": 0.0004998635821667367, "loss": 2.7519, "step": 1390 }, { "epoch": 0.03068113575758098, "grad_norm": 0.2177734375, "learning_rate": 0.0004998631965686525, "loss": 2.6958, "step": 1391 }, { "epoch": 0.030703192648851706, "grad_norm": 0.232421875, "learning_rate": 0.0004998628104265215, "loss": 2.7737, "step": 1392 }, { "epoch": 0.030725249540122433, "grad_norm": 0.224609375, "learning_rate": 0.0004998624237403441, "loss": 2.7303, "step": 1393 }, { "epoch": 0.03074730643139316, "grad_norm": 0.228515625, "learning_rate": 0.0004998620365101216, "loss": 2.7326, "step": 1394 }, { "epoch": 0.030769363322663886, "grad_norm": 0.2255859375, "learning_rate": 0.0004998616487358546, "loss": 2.7492, "step": 1395 }, { "epoch": 0.030791420213934613, "grad_norm": 0.236328125, "learning_rate": 0.000499861260417544, "loss": 2.7746, "step": 1396 }, { "epoch": 0.03081347710520534, "grad_norm": 0.232421875, "learning_rate": 0.0004998608715551905, "loss": 2.7062, "step": 1397 }, { "epoch": 0.030835533996476067, "grad_norm": 0.2265625, "learning_rate": 0.0004998604821487953, "loss": 2.8237, "step": 1398 }, { "epoch": 0.030857590887746793, "grad_norm": 0.232421875, "learning_rate": 0.0004998600921983588, "loss": 2.7183, "step": 1399 }, { "epoch": 0.03087964777901752, "grad_norm": 0.228515625, "learning_rate": 0.0004998597017038821, "loss": 2.7629, "step": 1400 }, { "epoch": 0.030901704670288247, "grad_norm": 0.2421875, "learning_rate": 0.0004998593106653661, "loss": 2.7528, "step": 1401 }, { "epoch": 0.030923761561558974, "grad_norm": 0.2275390625, "learning_rate": 0.0004998589190828115, "loss": 2.7332, "step": 1402 }, { "epoch": 0.0309458184528297, "grad_norm": 0.232421875, "learning_rate": 0.0004998585269562194, "loss": 2.8338, "step": 1403 }, { "epoch": 0.030967875344100428, "grad_norm": 0.2421875, "learning_rate": 0.0004998581342855903, "loss": 2.8057, "step": 1404 }, { "epoch": 0.030989932235371154, "grad_norm": 0.2373046875, "learning_rate": 0.0004998577410709253, "loss": 2.7983, "step": 1405 }, { "epoch": 0.03101198912664188, "grad_norm": 0.2373046875, "learning_rate": 0.0004998573473122251, "loss": 2.7342, "step": 1406 }, { "epoch": 0.031034046017912608, "grad_norm": 0.2255859375, "learning_rate": 0.0004998569530094908, "loss": 2.7165, "step": 1407 }, { "epoch": 0.031056102909183335, "grad_norm": 0.23046875, "learning_rate": 0.000499856558162723, "loss": 2.7877, "step": 1408 }, { "epoch": 0.03107815980045406, "grad_norm": 0.2333984375, "learning_rate": 0.0004998561627719226, "loss": 2.7003, "step": 1409 }, { "epoch": 0.03110021669172479, "grad_norm": 0.236328125, "learning_rate": 0.0004998557668370907, "loss": 2.799, "step": 1410 }, { "epoch": 0.031122273582995515, "grad_norm": 0.2138671875, "learning_rate": 0.0004998553703582278, "loss": 2.7854, "step": 1411 }, { "epoch": 0.031144330474266242, "grad_norm": 0.228515625, "learning_rate": 0.0004998549733353352, "loss": 2.7694, "step": 1412 }, { "epoch": 0.03116638736553697, "grad_norm": 0.220703125, "learning_rate": 0.0004998545757684133, "loss": 2.707, "step": 1413 }, { "epoch": 0.031188444256807696, "grad_norm": 0.2265625, "learning_rate": 0.0004998541776574632, "loss": 2.7548, "step": 1414 }, { "epoch": 0.031210501148078423, "grad_norm": 0.2216796875, "learning_rate": 0.0004998537790024858, "loss": 2.624, "step": 1415 }, { "epoch": 0.03123255803934915, "grad_norm": 0.232421875, "learning_rate": 0.0004998533798034819, "loss": 2.7101, "step": 1416 }, { "epoch": 0.03125461493061987, "grad_norm": 0.2275390625, "learning_rate": 0.0004998529800604525, "loss": 2.7945, "step": 1417 }, { "epoch": 0.0312766718218906, "grad_norm": 0.228515625, "learning_rate": 0.0004998525797733982, "loss": 2.7432, "step": 1418 }, { "epoch": 0.031298728713161326, "grad_norm": 0.22265625, "learning_rate": 0.00049985217894232, "loss": 2.7497, "step": 1419 }, { "epoch": 0.03132078560443206, "grad_norm": 0.2255859375, "learning_rate": 0.0004998517775672189, "loss": 2.7114, "step": 1420 }, { "epoch": 0.03134284249570278, "grad_norm": 0.228515625, "learning_rate": 0.0004998513756480957, "loss": 2.6924, "step": 1421 }, { "epoch": 0.03136489938697351, "grad_norm": 0.2392578125, "learning_rate": 0.0004998509731849512, "loss": 2.8302, "step": 1422 }, { "epoch": 0.031386956278244234, "grad_norm": 0.234375, "learning_rate": 0.0004998505701777862, "loss": 2.8278, "step": 1423 }, { "epoch": 0.031409013169514964, "grad_norm": 0.236328125, "learning_rate": 0.0004998501666266019, "loss": 2.8061, "step": 1424 }, { "epoch": 0.03143107006078569, "grad_norm": 0.2392578125, "learning_rate": 0.0004998497625313989, "loss": 2.7667, "step": 1425 }, { "epoch": 0.03145312695205642, "grad_norm": 0.220703125, "learning_rate": 0.0004998493578921782, "loss": 2.7952, "step": 1426 }, { "epoch": 0.03147518384332714, "grad_norm": 0.232421875, "learning_rate": 0.0004998489527089406, "loss": 2.6737, "step": 1427 }, { "epoch": 0.03149724073459787, "grad_norm": 0.22265625, "learning_rate": 0.000499848546981687, "loss": 2.6417, "step": 1428 }, { "epoch": 0.031519297625868595, "grad_norm": 0.21484375, "learning_rate": 0.0004998481407104184, "loss": 2.718, "step": 1429 }, { "epoch": 0.031541354517139325, "grad_norm": 0.2138671875, "learning_rate": 0.0004998477338951355, "loss": 2.6855, "step": 1430 }, { "epoch": 0.03156341140841005, "grad_norm": 0.251953125, "learning_rate": 0.0004998473265358393, "loss": 2.8569, "step": 1431 }, { "epoch": 0.03158546829968078, "grad_norm": 0.2197265625, "learning_rate": 0.0004998469186325307, "loss": 2.6813, "step": 1432 }, { "epoch": 0.0316075251909515, "grad_norm": 0.22265625, "learning_rate": 0.0004998465101852106, "loss": 2.8128, "step": 1433 }, { "epoch": 0.03162958208222223, "grad_norm": 0.2314453125, "learning_rate": 0.0004998461011938797, "loss": 2.8563, "step": 1434 }, { "epoch": 0.031651638973492956, "grad_norm": 0.2216796875, "learning_rate": 0.0004998456916585391, "loss": 2.8122, "step": 1435 }, { "epoch": 0.031673695864763686, "grad_norm": 0.23828125, "learning_rate": 0.0004998452815791896, "loss": 2.8375, "step": 1436 }, { "epoch": 0.03169575275603441, "grad_norm": 0.2197265625, "learning_rate": 0.0004998448709558322, "loss": 2.6664, "step": 1437 }, { "epoch": 0.03171780964730514, "grad_norm": 0.2314453125, "learning_rate": 0.0004998444597884677, "loss": 2.7607, "step": 1438 }, { "epoch": 0.03173986653857586, "grad_norm": 0.22265625, "learning_rate": 0.0004998440480770969, "loss": 2.7586, "step": 1439 }, { "epoch": 0.03176192342984659, "grad_norm": 0.2353515625, "learning_rate": 0.0004998436358217209, "loss": 2.8503, "step": 1440 }, { "epoch": 0.03178398032111732, "grad_norm": 0.22265625, "learning_rate": 0.0004998432230223405, "loss": 2.7814, "step": 1441 }, { "epoch": 0.03180603721238805, "grad_norm": 0.23046875, "learning_rate": 0.0004998428096789566, "loss": 2.7072, "step": 1442 }, { "epoch": 0.03182809410365877, "grad_norm": 0.23828125, "learning_rate": 0.0004998423957915701, "loss": 2.7934, "step": 1443 }, { "epoch": 0.0318501509949295, "grad_norm": 0.2294921875, "learning_rate": 0.0004998419813601819, "loss": 2.8275, "step": 1444 }, { "epoch": 0.031872207886200224, "grad_norm": 0.232421875, "learning_rate": 0.0004998415663847928, "loss": 2.8368, "step": 1445 }, { "epoch": 0.031894264777470954, "grad_norm": 0.234375, "learning_rate": 0.0004998411508654039, "loss": 2.6972, "step": 1446 }, { "epoch": 0.03191632166874168, "grad_norm": 0.2353515625, "learning_rate": 0.0004998407348020161, "loss": 2.7416, "step": 1447 }, { "epoch": 0.03193837856001241, "grad_norm": 0.244140625, "learning_rate": 0.00049984031819463, "loss": 2.6943, "step": 1448 }, { "epoch": 0.03196043545128313, "grad_norm": 0.2236328125, "learning_rate": 0.0004998399010432469, "loss": 2.812, "step": 1449 }, { "epoch": 0.03198249234255386, "grad_norm": 0.228515625, "learning_rate": 0.0004998394833478676, "loss": 2.6413, "step": 1450 }, { "epoch": 0.032004549233824585, "grad_norm": 0.22265625, "learning_rate": 0.0004998390651084928, "loss": 2.7203, "step": 1451 }, { "epoch": 0.032026606125095315, "grad_norm": 0.2314453125, "learning_rate": 0.0004998386463251236, "loss": 2.836, "step": 1452 }, { "epoch": 0.03204866301636604, "grad_norm": 0.23046875, "learning_rate": 0.0004998382269977608, "loss": 2.743, "step": 1453 }, { "epoch": 0.03207071990763677, "grad_norm": 0.228515625, "learning_rate": 0.0004998378071264054, "loss": 2.6669, "step": 1454 }, { "epoch": 0.03209277679890749, "grad_norm": 0.23046875, "learning_rate": 0.0004998373867110583, "loss": 2.7286, "step": 1455 }, { "epoch": 0.03211483369017822, "grad_norm": 0.2294921875, "learning_rate": 0.0004998369657517205, "loss": 2.7652, "step": 1456 }, { "epoch": 0.032136890581448946, "grad_norm": 0.228515625, "learning_rate": 0.0004998365442483927, "loss": 2.7778, "step": 1457 }, { "epoch": 0.032158947472719676, "grad_norm": 0.2216796875, "learning_rate": 0.000499836122201076, "loss": 2.7677, "step": 1458 }, { "epoch": 0.0321810043639904, "grad_norm": 0.2333984375, "learning_rate": 0.0004998356996097713, "loss": 2.7577, "step": 1459 }, { "epoch": 0.03220306125526113, "grad_norm": 0.2333984375, "learning_rate": 0.0004998352764744794, "loss": 2.7385, "step": 1460 }, { "epoch": 0.03222511814653185, "grad_norm": 0.2333984375, "learning_rate": 0.0004998348527952013, "loss": 2.7347, "step": 1461 }, { "epoch": 0.03224717503780258, "grad_norm": 0.22265625, "learning_rate": 0.0004998344285719379, "loss": 2.7337, "step": 1462 }, { "epoch": 0.03226923192907331, "grad_norm": 0.2275390625, "learning_rate": 0.0004998340038046903, "loss": 2.7796, "step": 1463 }, { "epoch": 0.03229128882034404, "grad_norm": 0.2392578125, "learning_rate": 0.0004998335784934592, "loss": 2.7751, "step": 1464 }, { "epoch": 0.03231334571161476, "grad_norm": 0.234375, "learning_rate": 0.0004998331526382456, "loss": 2.7237, "step": 1465 }, { "epoch": 0.03233540260288549, "grad_norm": 0.2265625, "learning_rate": 0.0004998327262390504, "loss": 2.7783, "step": 1466 }, { "epoch": 0.032357459494156214, "grad_norm": 0.2197265625, "learning_rate": 0.0004998322992958746, "loss": 2.7266, "step": 1467 }, { "epoch": 0.032379516385426944, "grad_norm": 0.2275390625, "learning_rate": 0.000499831871808719, "loss": 2.756, "step": 1468 }, { "epoch": 0.03240157327669767, "grad_norm": 0.21484375, "learning_rate": 0.0004998314437775847, "loss": 2.8037, "step": 1469 }, { "epoch": 0.0324236301679684, "grad_norm": 0.2275390625, "learning_rate": 0.0004998310152024726, "loss": 2.7344, "step": 1470 }, { "epoch": 0.03244568705923912, "grad_norm": 0.22265625, "learning_rate": 0.0004998305860833834, "loss": 2.7578, "step": 1471 }, { "epoch": 0.03246774395050985, "grad_norm": 0.2216796875, "learning_rate": 0.0004998301564203184, "loss": 2.695, "step": 1472 }, { "epoch": 0.032489800841780575, "grad_norm": 0.220703125, "learning_rate": 0.0004998297262132782, "loss": 2.7584, "step": 1473 }, { "epoch": 0.032511857733051305, "grad_norm": 0.2236328125, "learning_rate": 0.000499829295462264, "loss": 2.702, "step": 1474 }, { "epoch": 0.03253391462432203, "grad_norm": 0.22265625, "learning_rate": 0.0004998288641672766, "loss": 2.7316, "step": 1475 }, { "epoch": 0.03255597151559276, "grad_norm": 0.2197265625, "learning_rate": 0.0004998284323283168, "loss": 2.7052, "step": 1476 }, { "epoch": 0.03257802840686348, "grad_norm": 0.2275390625, "learning_rate": 0.0004998279999453859, "loss": 2.7799, "step": 1477 }, { "epoch": 0.03260008529813421, "grad_norm": 0.2158203125, "learning_rate": 0.0004998275670184847, "loss": 2.7121, "step": 1478 }, { "epoch": 0.032622142189404936, "grad_norm": 0.2265625, "learning_rate": 0.0004998271335476139, "loss": 2.775, "step": 1479 }, { "epoch": 0.032644199080675666, "grad_norm": 0.2236328125, "learning_rate": 0.0004998266995327748, "loss": 2.8014, "step": 1480 }, { "epoch": 0.03266625597194639, "grad_norm": 0.232421875, "learning_rate": 0.0004998262649739681, "loss": 2.6858, "step": 1481 }, { "epoch": 0.03268831286321712, "grad_norm": 0.220703125, "learning_rate": 0.0004998258298711948, "loss": 2.8008, "step": 1482 }, { "epoch": 0.03271036975448784, "grad_norm": 0.228515625, "learning_rate": 0.000499825394224456, "loss": 2.7604, "step": 1483 }, { "epoch": 0.032732426645758574, "grad_norm": 0.216796875, "learning_rate": 0.0004998249580337523, "loss": 2.6819, "step": 1484 }, { "epoch": 0.0327544835370293, "grad_norm": 0.232421875, "learning_rate": 0.000499824521299085, "loss": 2.7367, "step": 1485 }, { "epoch": 0.03277654042830003, "grad_norm": 0.21875, "learning_rate": 0.0004998240840204549, "loss": 2.7287, "step": 1486 }, { "epoch": 0.03279859731957075, "grad_norm": 0.2216796875, "learning_rate": 0.0004998236461978631, "loss": 2.676, "step": 1487 }, { "epoch": 0.03282065421084148, "grad_norm": 0.232421875, "learning_rate": 0.0004998232078313103, "loss": 2.7029, "step": 1488 }, { "epoch": 0.032842711102112204, "grad_norm": 0.2216796875, "learning_rate": 0.0004998227689207975, "loss": 2.7299, "step": 1489 }, { "epoch": 0.032864767993382935, "grad_norm": 0.2255859375, "learning_rate": 0.0004998223294663259, "loss": 2.7198, "step": 1490 }, { "epoch": 0.03288682488465366, "grad_norm": 0.2158203125, "learning_rate": 0.0004998218894678962, "loss": 2.7186, "step": 1491 }, { "epoch": 0.03290888177592439, "grad_norm": 0.2265625, "learning_rate": 0.0004998214489255094, "loss": 2.7397, "step": 1492 }, { "epoch": 0.03293093866719511, "grad_norm": 0.2119140625, "learning_rate": 0.0004998210078391666, "loss": 2.7352, "step": 1493 }, { "epoch": 0.03295299555846584, "grad_norm": 0.228515625, "learning_rate": 0.0004998205662088687, "loss": 2.7329, "step": 1494 }, { "epoch": 0.032975052449736565, "grad_norm": 0.2109375, "learning_rate": 0.0004998201240346166, "loss": 2.7783, "step": 1495 }, { "epoch": 0.032997109341007295, "grad_norm": 0.21484375, "learning_rate": 0.0004998196813164113, "loss": 2.7779, "step": 1496 }, { "epoch": 0.03301916623227802, "grad_norm": 0.220703125, "learning_rate": 0.0004998192380542537, "loss": 2.7542, "step": 1497 }, { "epoch": 0.03304122312354875, "grad_norm": 0.21875, "learning_rate": 0.0004998187942481449, "loss": 2.7716, "step": 1498 }, { "epoch": 0.03306328001481947, "grad_norm": 0.2236328125, "learning_rate": 0.0004998183498980857, "loss": 2.7414, "step": 1499 }, { "epoch": 0.0330853369060902, "grad_norm": 0.2177734375, "learning_rate": 0.0004998179050040773, "loss": 2.7389, "step": 1500 }, { "epoch": 0.033107393797360926, "grad_norm": 0.224609375, "learning_rate": 0.0004998174595661203, "loss": 2.7095, "step": 1501 }, { "epoch": 0.033129450688631656, "grad_norm": 0.2216796875, "learning_rate": 0.0004998170135842162, "loss": 2.773, "step": 1502 }, { "epoch": 0.03315150757990238, "grad_norm": 0.2314453125, "learning_rate": 0.0004998165670583655, "loss": 2.6727, "step": 1503 }, { "epoch": 0.03317356447117311, "grad_norm": 0.228515625, "learning_rate": 0.0004998161199885694, "loss": 2.7987, "step": 1504 }, { "epoch": 0.03319562136244383, "grad_norm": 0.2255859375, "learning_rate": 0.0004998156723748287, "loss": 2.7581, "step": 1505 }, { "epoch": 0.033217678253714564, "grad_norm": 0.21875, "learning_rate": 0.0004998152242171446, "loss": 2.6103, "step": 1506 }, { "epoch": 0.03323973514498529, "grad_norm": 0.2216796875, "learning_rate": 0.0004998147755155179, "loss": 2.7028, "step": 1507 }, { "epoch": 0.03326179203625602, "grad_norm": 0.216796875, "learning_rate": 0.0004998143262699497, "loss": 2.6241, "step": 1508 }, { "epoch": 0.03328384892752674, "grad_norm": 0.220703125, "learning_rate": 0.0004998138764804408, "loss": 2.6293, "step": 1509 }, { "epoch": 0.03330590581879747, "grad_norm": 0.224609375, "learning_rate": 0.0004998134261469924, "loss": 2.6667, "step": 1510 }, { "epoch": 0.033327962710068194, "grad_norm": 0.220703125, "learning_rate": 0.0004998129752696054, "loss": 2.6568, "step": 1511 }, { "epoch": 0.033350019601338925, "grad_norm": 0.2197265625, "learning_rate": 0.0004998125238482807, "loss": 2.7714, "step": 1512 }, { "epoch": 0.03337207649260965, "grad_norm": 0.22265625, "learning_rate": 0.0004998120718830194, "loss": 2.6802, "step": 1513 }, { "epoch": 0.03339413338388038, "grad_norm": 0.208984375, "learning_rate": 0.0004998116193738224, "loss": 2.5658, "step": 1514 }, { "epoch": 0.0334161902751511, "grad_norm": 0.228515625, "learning_rate": 0.0004998111663206906, "loss": 2.6699, "step": 1515 }, { "epoch": 0.03343824716642183, "grad_norm": 0.2158203125, "learning_rate": 0.0004998107127236252, "loss": 2.7318, "step": 1516 }, { "epoch": 0.033460304057692555, "grad_norm": 0.220703125, "learning_rate": 0.0004998102585826272, "loss": 2.7272, "step": 1517 }, { "epoch": 0.033482360948963286, "grad_norm": 0.22265625, "learning_rate": 0.0004998098038976974, "loss": 2.7074, "step": 1518 }, { "epoch": 0.03350441784023401, "grad_norm": 0.224609375, "learning_rate": 0.0004998093486688368, "loss": 2.6693, "step": 1519 }, { "epoch": 0.03352647473150474, "grad_norm": 0.2197265625, "learning_rate": 0.0004998088928960465, "loss": 2.7098, "step": 1520 }, { "epoch": 0.03354853162277546, "grad_norm": 0.2275390625, "learning_rate": 0.0004998084365793274, "loss": 2.6221, "step": 1521 }, { "epoch": 0.03357058851404619, "grad_norm": 0.2158203125, "learning_rate": 0.0004998079797186805, "loss": 2.7327, "step": 1522 }, { "epoch": 0.033592645405316916, "grad_norm": 0.220703125, "learning_rate": 0.000499807522314107, "loss": 2.7027, "step": 1523 }, { "epoch": 0.03361470229658765, "grad_norm": 0.21484375, "learning_rate": 0.0004998070643656077, "loss": 2.6706, "step": 1524 }, { "epoch": 0.03363675918785837, "grad_norm": 0.2265625, "learning_rate": 0.0004998066058731835, "loss": 2.6692, "step": 1525 }, { "epoch": 0.0336588160791291, "grad_norm": 0.2255859375, "learning_rate": 0.0004998061468368357, "loss": 2.6839, "step": 1526 }, { "epoch": 0.033680872970399824, "grad_norm": 0.2158203125, "learning_rate": 0.0004998056872565651, "loss": 2.6858, "step": 1527 }, { "epoch": 0.033702929861670554, "grad_norm": 0.224609375, "learning_rate": 0.0004998052271323727, "loss": 2.7491, "step": 1528 }, { "epoch": 0.03372498675294128, "grad_norm": 0.21875, "learning_rate": 0.0004998047664642594, "loss": 2.6871, "step": 1529 }, { "epoch": 0.03374704364421201, "grad_norm": 0.2197265625, "learning_rate": 0.0004998043052522265, "loss": 2.681, "step": 1530 }, { "epoch": 0.03376910053548273, "grad_norm": 0.2255859375, "learning_rate": 0.0004998038434962747, "loss": 2.7471, "step": 1531 }, { "epoch": 0.03379115742675346, "grad_norm": 0.2236328125, "learning_rate": 0.0004998033811964053, "loss": 2.7816, "step": 1532 }, { "epoch": 0.033813214318024185, "grad_norm": 0.21484375, "learning_rate": 0.000499802918352619, "loss": 2.6842, "step": 1533 }, { "epoch": 0.033835271209294915, "grad_norm": 0.2236328125, "learning_rate": 0.0004998024549649171, "loss": 2.7754, "step": 1534 }, { "epoch": 0.03385732810056564, "grad_norm": 0.2197265625, "learning_rate": 0.0004998019910333005, "loss": 2.7333, "step": 1535 }, { "epoch": 0.03387938499183637, "grad_norm": 0.212890625, "learning_rate": 0.00049980152655777, "loss": 2.728, "step": 1536 }, { "epoch": 0.03390144188310709, "grad_norm": 0.212890625, "learning_rate": 0.000499801061538327, "loss": 2.7746, "step": 1537 }, { "epoch": 0.03392349877437782, "grad_norm": 0.220703125, "learning_rate": 0.0004998005959749721, "loss": 2.7392, "step": 1538 }, { "epoch": 0.033945555665648545, "grad_norm": 0.216796875, "learning_rate": 0.0004998001298677065, "loss": 2.6486, "step": 1539 }, { "epoch": 0.033967612556919276, "grad_norm": 0.2275390625, "learning_rate": 0.0004997996632165314, "loss": 2.6987, "step": 1540 }, { "epoch": 0.03398966944819, "grad_norm": 0.2177734375, "learning_rate": 0.0004997991960214475, "loss": 2.7176, "step": 1541 }, { "epoch": 0.03401172633946073, "grad_norm": 0.2294921875, "learning_rate": 0.0004997987282824561, "loss": 2.7213, "step": 1542 }, { "epoch": 0.03403378323073145, "grad_norm": 0.240234375, "learning_rate": 0.000499798259999558, "loss": 2.7962, "step": 1543 }, { "epoch": 0.03405584012200218, "grad_norm": 0.2158203125, "learning_rate": 0.0004997977911727543, "loss": 2.682, "step": 1544 }, { "epoch": 0.034077897013272906, "grad_norm": 0.2236328125, "learning_rate": 0.000499797321802046, "loss": 2.7606, "step": 1545 }, { "epoch": 0.03409995390454364, "grad_norm": 0.21875, "learning_rate": 0.0004997968518874343, "loss": 2.6816, "step": 1546 }, { "epoch": 0.03412201079581436, "grad_norm": 0.2216796875, "learning_rate": 0.00049979638142892, "loss": 2.7024, "step": 1547 }, { "epoch": 0.03414406768708509, "grad_norm": 0.2177734375, "learning_rate": 0.0004997959104265041, "loss": 2.8112, "step": 1548 }, { "epoch": 0.034166124578355814, "grad_norm": 0.2216796875, "learning_rate": 0.0004997954388801878, "loss": 2.7068, "step": 1549 }, { "epoch": 0.034188181469626544, "grad_norm": 0.21875, "learning_rate": 0.000499794966789972, "loss": 2.7556, "step": 1550 }, { "epoch": 0.03421023836089727, "grad_norm": 0.216796875, "learning_rate": 0.0004997944941558577, "loss": 2.6805, "step": 1551 }, { "epoch": 0.034232295252168, "grad_norm": 0.220703125, "learning_rate": 0.0004997940209778461, "loss": 2.6776, "step": 1552 }, { "epoch": 0.03425435214343872, "grad_norm": 0.2158203125, "learning_rate": 0.0004997935472559382, "loss": 2.6688, "step": 1553 }, { "epoch": 0.034276409034709444, "grad_norm": 0.2265625, "learning_rate": 0.0004997930729901349, "loss": 2.8301, "step": 1554 }, { "epoch": 0.034298465925980175, "grad_norm": 0.224609375, "learning_rate": 0.0004997925981804372, "loss": 2.7537, "step": 1555 }, { "epoch": 0.0343205228172509, "grad_norm": 0.2216796875, "learning_rate": 0.0004997921228268463, "loss": 2.7506, "step": 1556 }, { "epoch": 0.03434257970852163, "grad_norm": 0.2197265625, "learning_rate": 0.0004997916469293631, "loss": 2.7435, "step": 1557 }, { "epoch": 0.03436463659979235, "grad_norm": 0.220703125, "learning_rate": 0.0004997911704879889, "loss": 2.7411, "step": 1558 }, { "epoch": 0.03438669349106308, "grad_norm": 0.2216796875, "learning_rate": 0.0004997906935027244, "loss": 2.6515, "step": 1559 }, { "epoch": 0.034408750382333805, "grad_norm": 0.21875, "learning_rate": 0.0004997902159735708, "loss": 2.7149, "step": 1560 }, { "epoch": 0.034430807273604536, "grad_norm": 0.2080078125, "learning_rate": 0.000499789737900529, "loss": 2.725, "step": 1561 }, { "epoch": 0.03445286416487526, "grad_norm": 0.2255859375, "learning_rate": 0.0004997892592836004, "loss": 2.684, "step": 1562 }, { "epoch": 0.03447492105614599, "grad_norm": 0.2119140625, "learning_rate": 0.0004997887801227856, "loss": 2.7045, "step": 1563 }, { "epoch": 0.03449697794741671, "grad_norm": 0.205078125, "learning_rate": 0.000499788300418086, "loss": 2.7123, "step": 1564 }, { "epoch": 0.03451903483868744, "grad_norm": 0.212890625, "learning_rate": 0.0004997878201695023, "loss": 2.643, "step": 1565 }, { "epoch": 0.034541091729958166, "grad_norm": 0.2158203125, "learning_rate": 0.0004997873393770358, "loss": 2.7023, "step": 1566 }, { "epoch": 0.0345631486212289, "grad_norm": 0.2158203125, "learning_rate": 0.0004997868580406875, "loss": 2.7546, "step": 1567 }, { "epoch": 0.03458520551249962, "grad_norm": 0.21875, "learning_rate": 0.0004997863761604585, "loss": 2.8221, "step": 1568 }, { "epoch": 0.03460726240377035, "grad_norm": 0.212890625, "learning_rate": 0.0004997858937363497, "loss": 2.7458, "step": 1569 }, { "epoch": 0.034629319295041074, "grad_norm": 0.2158203125, "learning_rate": 0.0004997854107683622, "loss": 2.7545, "step": 1570 }, { "epoch": 0.034651376186311804, "grad_norm": 0.2109375, "learning_rate": 0.0004997849272564971, "loss": 2.7255, "step": 1571 }, { "epoch": 0.03467343307758253, "grad_norm": 0.2158203125, "learning_rate": 0.0004997844432007556, "loss": 2.731, "step": 1572 }, { "epoch": 0.03469548996885326, "grad_norm": 0.2080078125, "learning_rate": 0.0004997839586011383, "loss": 2.7172, "step": 1573 }, { "epoch": 0.03471754686012398, "grad_norm": 0.2197265625, "learning_rate": 0.0004997834734576467, "loss": 2.7363, "step": 1574 }, { "epoch": 0.03473960375139471, "grad_norm": 0.2099609375, "learning_rate": 0.0004997829877702817, "loss": 2.7866, "step": 1575 }, { "epoch": 0.034761660642665435, "grad_norm": 0.2109375, "learning_rate": 0.0004997825015390444, "loss": 2.6826, "step": 1576 }, { "epoch": 0.034783717533936165, "grad_norm": 0.2138671875, "learning_rate": 0.0004997820147639357, "loss": 2.7795, "step": 1577 }, { "epoch": 0.03480577442520689, "grad_norm": 0.2158203125, "learning_rate": 0.0004997815274449569, "loss": 2.665, "step": 1578 }, { "epoch": 0.03482783131647762, "grad_norm": 0.2177734375, "learning_rate": 0.0004997810395821088, "loss": 2.6598, "step": 1579 }, { "epoch": 0.03484988820774834, "grad_norm": 0.21875, "learning_rate": 0.0004997805511753927, "loss": 2.6487, "step": 1580 }, { "epoch": 0.03487194509901907, "grad_norm": 0.216796875, "learning_rate": 0.0004997800622248094, "loss": 2.7767, "step": 1581 }, { "epoch": 0.034894001990289795, "grad_norm": 0.224609375, "learning_rate": 0.0004997795727303603, "loss": 2.7041, "step": 1582 }, { "epoch": 0.034916058881560526, "grad_norm": 0.21484375, "learning_rate": 0.0004997790826920462, "loss": 2.7579, "step": 1583 }, { "epoch": 0.03493811577283125, "grad_norm": 0.236328125, "learning_rate": 0.0004997785921098683, "loss": 2.6726, "step": 1584 }, { "epoch": 0.03496017266410198, "grad_norm": 0.2099609375, "learning_rate": 0.0004997781009838276, "loss": 2.6521, "step": 1585 }, { "epoch": 0.0349822295553727, "grad_norm": 0.224609375, "learning_rate": 0.0004997776093139252, "loss": 2.6174, "step": 1586 }, { "epoch": 0.03500428644664343, "grad_norm": 0.2138671875, "learning_rate": 0.0004997771171001622, "loss": 2.6287, "step": 1587 }, { "epoch": 0.035026343337914156, "grad_norm": 0.22265625, "learning_rate": 0.0004997766243425396, "loss": 2.8235, "step": 1588 }, { "epoch": 0.03504840022918489, "grad_norm": 0.2138671875, "learning_rate": 0.0004997761310410586, "loss": 2.6601, "step": 1589 }, { "epoch": 0.03507045712045561, "grad_norm": 0.21875, "learning_rate": 0.0004997756371957201, "loss": 2.7029, "step": 1590 }, { "epoch": 0.03509251401172634, "grad_norm": 0.220703125, "learning_rate": 0.0004997751428065252, "loss": 2.6943, "step": 1591 }, { "epoch": 0.035114570902997064, "grad_norm": 0.2109375, "learning_rate": 0.0004997746478734752, "loss": 2.6622, "step": 1592 }, { "epoch": 0.035136627794267794, "grad_norm": 0.2138671875, "learning_rate": 0.000499774152396571, "loss": 2.724, "step": 1593 }, { "epoch": 0.03515868468553852, "grad_norm": 0.21875, "learning_rate": 0.0004997736563758135, "loss": 2.6697, "step": 1594 }, { "epoch": 0.03518074157680925, "grad_norm": 0.2275390625, "learning_rate": 0.0004997731598112041, "loss": 2.7813, "step": 1595 }, { "epoch": 0.03520279846807997, "grad_norm": 0.22265625, "learning_rate": 0.0004997726627027438, "loss": 2.6766, "step": 1596 }, { "epoch": 0.0352248553593507, "grad_norm": 0.2236328125, "learning_rate": 0.0004997721650504335, "loss": 2.687, "step": 1597 }, { "epoch": 0.035246912250621425, "grad_norm": 0.2197265625, "learning_rate": 0.0004997716668542746, "loss": 2.6896, "step": 1598 }, { "epoch": 0.035268969141892155, "grad_norm": 0.21484375, "learning_rate": 0.0004997711681142678, "loss": 2.6631, "step": 1599 }, { "epoch": 0.03529102603316288, "grad_norm": 0.21875, "learning_rate": 0.0004997706688304145, "loss": 2.7104, "step": 1600 }, { "epoch": 0.03531308292443361, "grad_norm": 0.2333984375, "learning_rate": 0.0004997701690027158, "loss": 2.7239, "step": 1601 }, { "epoch": 0.03533513981570433, "grad_norm": 0.2373046875, "learning_rate": 0.0004997696686311725, "loss": 2.7337, "step": 1602 }, { "epoch": 0.03535719670697506, "grad_norm": 0.2216796875, "learning_rate": 0.000499769167715786, "loss": 2.7339, "step": 1603 }, { "epoch": 0.035379253598245786, "grad_norm": 0.2236328125, "learning_rate": 0.000499768666256557, "loss": 2.7193, "step": 1604 }, { "epoch": 0.035401310489516516, "grad_norm": 0.2265625, "learning_rate": 0.000499768164253487, "loss": 2.7808, "step": 1605 }, { "epoch": 0.03542336738078724, "grad_norm": 0.224609375, "learning_rate": 0.0004997676617065769, "loss": 2.7549, "step": 1606 }, { "epoch": 0.03544542427205797, "grad_norm": 0.216796875, "learning_rate": 0.0004997671586158279, "loss": 2.7052, "step": 1607 }, { "epoch": 0.03546748116332869, "grad_norm": 0.2119140625, "learning_rate": 0.0004997666549812409, "loss": 2.6875, "step": 1608 }, { "epoch": 0.03548953805459942, "grad_norm": 0.2197265625, "learning_rate": 0.0004997661508028173, "loss": 2.7041, "step": 1609 }, { "epoch": 0.03551159494587015, "grad_norm": 0.2138671875, "learning_rate": 0.0004997656460805579, "loss": 2.8056, "step": 1610 }, { "epoch": 0.03553365183714088, "grad_norm": 0.212890625, "learning_rate": 0.0004997651408144639, "loss": 2.6723, "step": 1611 }, { "epoch": 0.0355557087284116, "grad_norm": 0.21875, "learning_rate": 0.0004997646350045363, "loss": 2.6619, "step": 1612 }, { "epoch": 0.03557776561968233, "grad_norm": 0.2109375, "learning_rate": 0.0004997641286507765, "loss": 2.7033, "step": 1613 }, { "epoch": 0.035599822510953054, "grad_norm": 0.2080078125, "learning_rate": 0.0004997636217531855, "loss": 2.6584, "step": 1614 }, { "epoch": 0.035621879402223784, "grad_norm": 0.216796875, "learning_rate": 0.0004997631143117641, "loss": 2.726, "step": 1615 }, { "epoch": 0.03564393629349451, "grad_norm": 0.21484375, "learning_rate": 0.0004997626063265137, "loss": 2.7285, "step": 1616 }, { "epoch": 0.03566599318476524, "grad_norm": 0.2197265625, "learning_rate": 0.0004997620977974355, "loss": 2.7064, "step": 1617 }, { "epoch": 0.03568805007603596, "grad_norm": 0.2099609375, "learning_rate": 0.0004997615887245302, "loss": 2.6286, "step": 1618 }, { "epoch": 0.03571010696730669, "grad_norm": 0.2109375, "learning_rate": 0.0004997610791077992, "loss": 2.679, "step": 1619 }, { "epoch": 0.035732163858577415, "grad_norm": 0.2138671875, "learning_rate": 0.0004997605689472436, "loss": 2.7297, "step": 1620 }, { "epoch": 0.035754220749848145, "grad_norm": 0.2060546875, "learning_rate": 0.0004997600582428646, "loss": 2.7037, "step": 1621 }, { "epoch": 0.03577627764111887, "grad_norm": 0.208984375, "learning_rate": 0.000499759546994663, "loss": 2.7952, "step": 1622 }, { "epoch": 0.0357983345323896, "grad_norm": 0.20703125, "learning_rate": 0.0004997590352026401, "loss": 2.7081, "step": 1623 }, { "epoch": 0.03582039142366032, "grad_norm": 0.2041015625, "learning_rate": 0.0004997585228667971, "loss": 2.6174, "step": 1624 }, { "epoch": 0.03584244831493105, "grad_norm": 0.2138671875, "learning_rate": 0.0004997580099871349, "loss": 2.6732, "step": 1625 }, { "epoch": 0.035864505206201776, "grad_norm": 0.2119140625, "learning_rate": 0.0004997574965636549, "loss": 2.6863, "step": 1626 }, { "epoch": 0.035886562097472506, "grad_norm": 0.2080078125, "learning_rate": 0.000499756982596358, "loss": 2.623, "step": 1627 }, { "epoch": 0.03590861898874323, "grad_norm": 0.2138671875, "learning_rate": 0.0004997564680852453, "loss": 2.6804, "step": 1628 }, { "epoch": 0.03593067588001396, "grad_norm": 0.208984375, "learning_rate": 0.000499755953030318, "loss": 2.6615, "step": 1629 }, { "epoch": 0.03595273277128468, "grad_norm": 0.20703125, "learning_rate": 0.0004997554374315773, "loss": 2.7059, "step": 1630 }, { "epoch": 0.03597478966255541, "grad_norm": 0.2099609375, "learning_rate": 0.0004997549212890243, "loss": 2.658, "step": 1631 }, { "epoch": 0.03599684655382614, "grad_norm": 0.2060546875, "learning_rate": 0.00049975440460266, "loss": 2.6233, "step": 1632 }, { "epoch": 0.03601890344509687, "grad_norm": 0.2099609375, "learning_rate": 0.0004997538873724855, "loss": 2.7551, "step": 1633 }, { "epoch": 0.03604096033636759, "grad_norm": 0.2109375, "learning_rate": 0.0004997533695985021, "loss": 2.6678, "step": 1634 }, { "epoch": 0.03606301722763832, "grad_norm": 0.2158203125, "learning_rate": 0.0004997528512807109, "loss": 2.599, "step": 1635 }, { "epoch": 0.036085074118909044, "grad_norm": 0.2158203125, "learning_rate": 0.0004997523324191129, "loss": 2.681, "step": 1636 }, { "epoch": 0.036107131010179774, "grad_norm": 0.21875, "learning_rate": 0.0004997518130137093, "loss": 2.7583, "step": 1637 }, { "epoch": 0.0361291879014505, "grad_norm": 0.2177734375, "learning_rate": 0.0004997512930645013, "loss": 2.6679, "step": 1638 }, { "epoch": 0.03615124479272123, "grad_norm": 0.2216796875, "learning_rate": 0.0004997507725714898, "loss": 2.7581, "step": 1639 }, { "epoch": 0.03617330168399195, "grad_norm": 0.21484375, "learning_rate": 0.0004997502515346763, "loss": 2.7497, "step": 1640 }, { "epoch": 0.03619535857526268, "grad_norm": 0.2158203125, "learning_rate": 0.0004997497299540617, "loss": 2.627, "step": 1641 }, { "epoch": 0.036217415466533405, "grad_norm": 0.2197265625, "learning_rate": 0.000499749207829647, "loss": 2.6025, "step": 1642 }, { "epoch": 0.036239472357804135, "grad_norm": 0.21484375, "learning_rate": 0.0004997486851614337, "loss": 2.6902, "step": 1643 }, { "epoch": 0.03626152924907486, "grad_norm": 0.2197265625, "learning_rate": 0.0004997481619494228, "loss": 2.7758, "step": 1644 }, { "epoch": 0.03628358614034559, "grad_norm": 0.208984375, "learning_rate": 0.0004997476381936152, "loss": 2.6648, "step": 1645 }, { "epoch": 0.03630564303161631, "grad_norm": 0.212890625, "learning_rate": 0.0004997471138940123, "loss": 2.6748, "step": 1646 }, { "epoch": 0.03632769992288704, "grad_norm": 0.2138671875, "learning_rate": 0.0004997465890506151, "loss": 2.7168, "step": 1647 }, { "epoch": 0.036349756814157766, "grad_norm": 0.2158203125, "learning_rate": 0.0004997460636634249, "loss": 2.6816, "step": 1648 }, { "epoch": 0.036371813705428496, "grad_norm": 0.21484375, "learning_rate": 0.0004997455377324427, "loss": 2.728, "step": 1649 }, { "epoch": 0.03639387059669922, "grad_norm": 0.2158203125, "learning_rate": 0.0004997450112576697, "loss": 2.7318, "step": 1650 }, { "epoch": 0.03641592748796995, "grad_norm": 0.2080078125, "learning_rate": 0.0004997444842391071, "loss": 2.6163, "step": 1651 }, { "epoch": 0.03643798437924067, "grad_norm": 0.21484375, "learning_rate": 0.0004997439566767559, "loss": 2.6168, "step": 1652 }, { "epoch": 0.036460041270511404, "grad_norm": 0.2099609375, "learning_rate": 0.0004997434285706174, "loss": 2.6578, "step": 1653 }, { "epoch": 0.03648209816178213, "grad_norm": 0.2109375, "learning_rate": 0.0004997428999206927, "loss": 2.6507, "step": 1654 }, { "epoch": 0.03650415505305286, "grad_norm": 0.21484375, "learning_rate": 0.0004997423707269829, "loss": 2.704, "step": 1655 }, { "epoch": 0.03652621194432358, "grad_norm": 0.20703125, "learning_rate": 0.0004997418409894893, "loss": 2.6629, "step": 1656 }, { "epoch": 0.03654826883559431, "grad_norm": 0.2080078125, "learning_rate": 0.0004997413107082128, "loss": 2.6771, "step": 1657 }, { "epoch": 0.036570325726865034, "grad_norm": 0.2119140625, "learning_rate": 0.0004997407798831548, "loss": 2.6736, "step": 1658 }, { "epoch": 0.036592382618135764, "grad_norm": 0.216796875, "learning_rate": 0.0004997402485143164, "loss": 2.6393, "step": 1659 }, { "epoch": 0.03661443950940649, "grad_norm": 0.208984375, "learning_rate": 0.0004997397166016986, "loss": 2.6951, "step": 1660 }, { "epoch": 0.03663649640067722, "grad_norm": 0.21484375, "learning_rate": 0.0004997391841453028, "loss": 2.7516, "step": 1661 }, { "epoch": 0.03665855329194794, "grad_norm": 0.21875, "learning_rate": 0.0004997386511451298, "loss": 2.7091, "step": 1662 }, { "epoch": 0.03668061018321867, "grad_norm": 0.2177734375, "learning_rate": 0.0004997381176011812, "loss": 2.6596, "step": 1663 }, { "epoch": 0.036702667074489395, "grad_norm": 0.2158203125, "learning_rate": 0.0004997375835134579, "loss": 2.7182, "step": 1664 }, { "epoch": 0.036724723965760125, "grad_norm": 0.2099609375, "learning_rate": 0.0004997370488819611, "loss": 2.5392, "step": 1665 }, { "epoch": 0.03674678085703085, "grad_norm": 0.205078125, "learning_rate": 0.000499736513706692, "loss": 2.7093, "step": 1666 }, { "epoch": 0.03676883774830158, "grad_norm": 0.212890625, "learning_rate": 0.0004997359779876518, "loss": 2.7411, "step": 1667 }, { "epoch": 0.0367908946395723, "grad_norm": 0.2109375, "learning_rate": 0.0004997354417248416, "loss": 2.6174, "step": 1668 }, { "epoch": 0.03681295153084303, "grad_norm": 0.21484375, "learning_rate": 0.0004997349049182624, "loss": 2.7053, "step": 1669 }, { "epoch": 0.036835008422113756, "grad_norm": 0.2041015625, "learning_rate": 0.0004997343675679157, "loss": 2.6549, "step": 1670 }, { "epoch": 0.036857065313384486, "grad_norm": 0.2177734375, "learning_rate": 0.0004997338296738026, "loss": 2.7466, "step": 1671 }, { "epoch": 0.03687912220465521, "grad_norm": 0.2138671875, "learning_rate": 0.0004997332912359241, "loss": 2.678, "step": 1672 }, { "epoch": 0.03690117909592594, "grad_norm": 0.2177734375, "learning_rate": 0.0004997327522542813, "loss": 2.7021, "step": 1673 }, { "epoch": 0.03692323598719666, "grad_norm": 0.22265625, "learning_rate": 0.0004997322127288758, "loss": 2.6674, "step": 1674 }, { "epoch": 0.036945292878467394, "grad_norm": 0.220703125, "learning_rate": 0.0004997316726597084, "loss": 2.6094, "step": 1675 }, { "epoch": 0.03696734976973812, "grad_norm": 0.220703125, "learning_rate": 0.0004997311320467804, "loss": 2.656, "step": 1676 }, { "epoch": 0.03698940666100885, "grad_norm": 0.208984375, "learning_rate": 0.000499730590890093, "loss": 2.666, "step": 1677 }, { "epoch": 0.03701146355227957, "grad_norm": 0.21484375, "learning_rate": 0.0004997300491896472, "loss": 2.6359, "step": 1678 }, { "epoch": 0.0370335204435503, "grad_norm": 0.2080078125, "learning_rate": 0.0004997295069454445, "loss": 2.748, "step": 1679 }, { "epoch": 0.037055577334821024, "grad_norm": 0.212890625, "learning_rate": 0.0004997289641574859, "loss": 2.7052, "step": 1680 }, { "epoch": 0.037077634226091755, "grad_norm": 0.2138671875, "learning_rate": 0.0004997284208257725, "loss": 2.7336, "step": 1681 }, { "epoch": 0.03709969111736248, "grad_norm": 0.205078125, "learning_rate": 0.0004997278769503055, "loss": 2.7091, "step": 1682 }, { "epoch": 0.03712174800863321, "grad_norm": 0.2060546875, "learning_rate": 0.0004997273325310863, "loss": 2.6776, "step": 1683 }, { "epoch": 0.03714380489990393, "grad_norm": 0.21484375, "learning_rate": 0.0004997267875681159, "loss": 2.6743, "step": 1684 }, { "epoch": 0.03716586179117466, "grad_norm": 0.20703125, "learning_rate": 0.0004997262420613955, "loss": 2.6298, "step": 1685 }, { "epoch": 0.037187918682445385, "grad_norm": 0.2099609375, "learning_rate": 0.0004997256960109263, "loss": 2.7232, "step": 1686 }, { "epoch": 0.037209975573716116, "grad_norm": 0.212890625, "learning_rate": 0.0004997251494167096, "loss": 2.5857, "step": 1687 }, { "epoch": 0.03723203246498684, "grad_norm": 0.2119140625, "learning_rate": 0.0004997246022787463, "loss": 2.6817, "step": 1688 }, { "epoch": 0.03725408935625757, "grad_norm": 0.20703125, "learning_rate": 0.0004997240545970379, "loss": 2.7054, "step": 1689 }, { "epoch": 0.03727614624752829, "grad_norm": 0.21484375, "learning_rate": 0.0004997235063715855, "loss": 2.6876, "step": 1690 }, { "epoch": 0.03729820313879902, "grad_norm": 0.208984375, "learning_rate": 0.0004997229576023903, "loss": 2.7272, "step": 1691 }, { "epoch": 0.037320260030069746, "grad_norm": 0.212890625, "learning_rate": 0.0004997224082894533, "loss": 2.6622, "step": 1692 }, { "epoch": 0.03734231692134048, "grad_norm": 0.20703125, "learning_rate": 0.000499721858432776, "loss": 2.6905, "step": 1693 }, { "epoch": 0.0373643738126112, "grad_norm": 0.21875, "learning_rate": 0.0004997213080323593, "loss": 2.6633, "step": 1694 }, { "epoch": 0.03738643070388192, "grad_norm": 0.203125, "learning_rate": 0.0004997207570882048, "loss": 2.6132, "step": 1695 }, { "epoch": 0.037408487595152654, "grad_norm": 0.20703125, "learning_rate": 0.0004997202056003133, "loss": 2.7098, "step": 1696 }, { "epoch": 0.03743054448642338, "grad_norm": 0.2080078125, "learning_rate": 0.0004997196535686862, "loss": 2.6783, "step": 1697 }, { "epoch": 0.03745260137769411, "grad_norm": 0.2041015625, "learning_rate": 0.0004997191009933246, "loss": 2.7049, "step": 1698 }, { "epoch": 0.03747465826896483, "grad_norm": 0.212890625, "learning_rate": 0.0004997185478742298, "loss": 2.7824, "step": 1699 }, { "epoch": 0.03749671516023556, "grad_norm": 0.2099609375, "learning_rate": 0.000499717994211403, "loss": 2.7025, "step": 1700 }, { "epoch": 0.037518772051506284, "grad_norm": 0.21484375, "learning_rate": 0.0004997174400048453, "loss": 2.7283, "step": 1701 }, { "epoch": 0.037540828942777014, "grad_norm": 0.208984375, "learning_rate": 0.000499716885254558, "loss": 2.6926, "step": 1702 }, { "epoch": 0.03756288583404774, "grad_norm": 0.212890625, "learning_rate": 0.0004997163299605424, "loss": 2.6968, "step": 1703 }, { "epoch": 0.03758494272531847, "grad_norm": 0.2041015625, "learning_rate": 0.0004997157741227995, "loss": 2.6489, "step": 1704 }, { "epoch": 0.03760699961658919, "grad_norm": 0.2119140625, "learning_rate": 0.0004997152177413306, "loss": 2.6759, "step": 1705 }, { "epoch": 0.03762905650785992, "grad_norm": 0.2021484375, "learning_rate": 0.0004997146608161369, "loss": 2.6659, "step": 1706 }, { "epoch": 0.037651113399130645, "grad_norm": 0.2119140625, "learning_rate": 0.0004997141033472196, "loss": 2.6992, "step": 1707 }, { "epoch": 0.037673170290401375, "grad_norm": 0.2109375, "learning_rate": 0.0004997135453345801, "loss": 2.7087, "step": 1708 }, { "epoch": 0.0376952271816721, "grad_norm": 0.212890625, "learning_rate": 0.0004997129867782193, "loss": 2.7402, "step": 1709 }, { "epoch": 0.03771728407294283, "grad_norm": 0.2109375, "learning_rate": 0.0004997124276781386, "loss": 2.6084, "step": 1710 }, { "epoch": 0.03773934096421355, "grad_norm": 0.22265625, "learning_rate": 0.0004997118680343392, "loss": 2.7216, "step": 1711 }, { "epoch": 0.03776139785548428, "grad_norm": 0.2109375, "learning_rate": 0.0004997113078468224, "loss": 2.5914, "step": 1712 }, { "epoch": 0.037783454746755006, "grad_norm": 0.20703125, "learning_rate": 0.0004997107471155891, "loss": 2.7316, "step": 1713 }, { "epoch": 0.037805511638025736, "grad_norm": 0.216796875, "learning_rate": 0.0004997101858406409, "loss": 2.676, "step": 1714 }, { "epoch": 0.03782756852929646, "grad_norm": 0.2177734375, "learning_rate": 0.0004997096240219789, "loss": 2.6387, "step": 1715 }, { "epoch": 0.03784962542056719, "grad_norm": 0.2138671875, "learning_rate": 0.0004997090616596044, "loss": 2.679, "step": 1716 }, { "epoch": 0.03787168231183791, "grad_norm": 0.2080078125, "learning_rate": 0.0004997084987535183, "loss": 2.7822, "step": 1717 }, { "epoch": 0.037893739203108644, "grad_norm": 0.21875, "learning_rate": 0.0004997079353037221, "loss": 2.6591, "step": 1718 }, { "epoch": 0.03791579609437937, "grad_norm": 0.197265625, "learning_rate": 0.0004997073713102171, "loss": 2.6847, "step": 1719 }, { "epoch": 0.0379378529856501, "grad_norm": 0.2138671875, "learning_rate": 0.0004997068067730042, "loss": 2.7381, "step": 1720 }, { "epoch": 0.03795990987692082, "grad_norm": 0.2021484375, "learning_rate": 0.000499706241692085, "loss": 2.7524, "step": 1721 }, { "epoch": 0.03798196676819155, "grad_norm": 0.2119140625, "learning_rate": 0.0004997056760674605, "loss": 2.6516, "step": 1722 }, { "epoch": 0.038004023659462274, "grad_norm": 0.2060546875, "learning_rate": 0.0004997051098991321, "loss": 2.6799, "step": 1723 }, { "epoch": 0.038026080550733005, "grad_norm": 0.2158203125, "learning_rate": 0.0004997045431871008, "loss": 2.6607, "step": 1724 }, { "epoch": 0.03804813744200373, "grad_norm": 0.220703125, "learning_rate": 0.0004997039759313679, "loss": 2.7637, "step": 1725 }, { "epoch": 0.03807019433327446, "grad_norm": 0.205078125, "learning_rate": 0.0004997034081319348, "loss": 2.6697, "step": 1726 }, { "epoch": 0.03809225122454518, "grad_norm": 0.2314453125, "learning_rate": 0.0004997028397888026, "loss": 2.7427, "step": 1727 }, { "epoch": 0.03811430811581591, "grad_norm": 0.2060546875, "learning_rate": 0.0004997022709019726, "loss": 2.7754, "step": 1728 }, { "epoch": 0.038136365007086635, "grad_norm": 0.2197265625, "learning_rate": 0.000499701701471446, "loss": 2.6891, "step": 1729 }, { "epoch": 0.038158421898357366, "grad_norm": 0.2109375, "learning_rate": 0.0004997011314972241, "loss": 2.6011, "step": 1730 }, { "epoch": 0.03818047878962809, "grad_norm": 0.21875, "learning_rate": 0.000499700560979308, "loss": 2.6601, "step": 1731 }, { "epoch": 0.03820253568089882, "grad_norm": 0.208984375, "learning_rate": 0.0004996999899176991, "loss": 2.6867, "step": 1732 }, { "epoch": 0.03822459257216954, "grad_norm": 0.208984375, "learning_rate": 0.0004996994183123984, "loss": 2.6271, "step": 1733 }, { "epoch": 0.03824664946344027, "grad_norm": 0.208984375, "learning_rate": 0.0004996988461634075, "loss": 2.6213, "step": 1734 }, { "epoch": 0.038268706354710996, "grad_norm": 0.2109375, "learning_rate": 0.0004996982734707274, "loss": 2.7059, "step": 1735 }, { "epoch": 0.03829076324598173, "grad_norm": 0.205078125, "learning_rate": 0.0004996977002343593, "loss": 2.6186, "step": 1736 }, { "epoch": 0.03831282013725245, "grad_norm": 0.2158203125, "learning_rate": 0.0004996971264543047, "loss": 2.6917, "step": 1737 }, { "epoch": 0.03833487702852318, "grad_norm": 0.2001953125, "learning_rate": 0.0004996965521305648, "loss": 2.5909, "step": 1738 }, { "epoch": 0.038356933919793904, "grad_norm": 0.2236328125, "learning_rate": 0.0004996959772631405, "loss": 2.6297, "step": 1739 }, { "epoch": 0.038378990811064634, "grad_norm": 0.2060546875, "learning_rate": 0.0004996954018520335, "loss": 2.7437, "step": 1740 }, { "epoch": 0.03840104770233536, "grad_norm": 0.2236328125, "learning_rate": 0.0004996948258972448, "loss": 2.6752, "step": 1741 }, { "epoch": 0.03842310459360609, "grad_norm": 0.2119140625, "learning_rate": 0.0004996942493987757, "loss": 2.6955, "step": 1742 }, { "epoch": 0.03844516148487681, "grad_norm": 0.212890625, "learning_rate": 0.0004996936723566276, "loss": 2.7051, "step": 1743 }, { "epoch": 0.03846721837614754, "grad_norm": 0.2080078125, "learning_rate": 0.0004996930947708015, "loss": 2.7066, "step": 1744 }, { "epoch": 0.038489275267418264, "grad_norm": 0.2099609375, "learning_rate": 0.0004996925166412988, "loss": 2.6895, "step": 1745 }, { "epoch": 0.038511332158688995, "grad_norm": 0.20703125, "learning_rate": 0.0004996919379681207, "loss": 2.6353, "step": 1746 }, { "epoch": 0.03853338904995972, "grad_norm": 0.2109375, "learning_rate": 0.0004996913587512686, "loss": 2.6671, "step": 1747 }, { "epoch": 0.03855544594123045, "grad_norm": 0.208984375, "learning_rate": 0.0004996907789907437, "loss": 2.7036, "step": 1748 }, { "epoch": 0.03857750283250117, "grad_norm": 0.2109375, "learning_rate": 0.000499690198686547, "loss": 2.7248, "step": 1749 }, { "epoch": 0.0385995597237719, "grad_norm": 0.212890625, "learning_rate": 0.0004996896178386803, "loss": 2.6913, "step": 1750 }, { "epoch": 0.038621616615042625, "grad_norm": 0.2109375, "learning_rate": 0.0004996890364471443, "loss": 2.6494, "step": 1751 }, { "epoch": 0.038643673506313356, "grad_norm": 0.2216796875, "learning_rate": 0.0004996884545119408, "loss": 2.6242, "step": 1752 }, { "epoch": 0.03866573039758408, "grad_norm": 0.2177734375, "learning_rate": 0.0004996878720330705, "loss": 2.6942, "step": 1753 }, { "epoch": 0.03868778728885481, "grad_norm": 0.2216796875, "learning_rate": 0.0004996872890105351, "loss": 2.7407, "step": 1754 }, { "epoch": 0.03870984418012553, "grad_norm": 0.21875, "learning_rate": 0.0004996867054443357, "loss": 2.7169, "step": 1755 }, { "epoch": 0.03873190107139626, "grad_norm": 0.2138671875, "learning_rate": 0.0004996861213344737, "loss": 2.5944, "step": 1756 }, { "epoch": 0.038753957962666986, "grad_norm": 0.2109375, "learning_rate": 0.0004996855366809502, "loss": 2.6315, "step": 1757 }, { "epoch": 0.03877601485393772, "grad_norm": 0.2099609375, "learning_rate": 0.0004996849514837665, "loss": 2.647, "step": 1758 }, { "epoch": 0.03879807174520844, "grad_norm": 0.208984375, "learning_rate": 0.0004996843657429238, "loss": 2.656, "step": 1759 }, { "epoch": 0.03882012863647917, "grad_norm": 0.220703125, "learning_rate": 0.0004996837794584237, "loss": 2.6574, "step": 1760 }, { "epoch": 0.038842185527749894, "grad_norm": 0.2099609375, "learning_rate": 0.0004996831926302672, "loss": 2.7271, "step": 1761 }, { "epoch": 0.038864242419020624, "grad_norm": 0.2119140625, "learning_rate": 0.0004996826052584557, "loss": 2.6926, "step": 1762 }, { "epoch": 0.03888629931029135, "grad_norm": 0.2041015625, "learning_rate": 0.0004996820173429903, "loss": 2.6695, "step": 1763 }, { "epoch": 0.03890835620156208, "grad_norm": 0.2490234375, "learning_rate": 0.0004996814288838725, "loss": 2.7048, "step": 1764 }, { "epoch": 0.0389304130928328, "grad_norm": 0.2080078125, "learning_rate": 0.0004996808398811034, "loss": 2.61, "step": 1765 }, { "epoch": 0.03895246998410353, "grad_norm": 0.2109375, "learning_rate": 0.0004996802503346846, "loss": 2.7068, "step": 1766 }, { "epoch": 0.038974526875374255, "grad_norm": 0.2041015625, "learning_rate": 0.000499679660244617, "loss": 2.6409, "step": 1767 }, { "epoch": 0.038996583766644985, "grad_norm": 0.205078125, "learning_rate": 0.000499679069610902, "loss": 2.6639, "step": 1768 }, { "epoch": 0.03901864065791571, "grad_norm": 0.2080078125, "learning_rate": 0.000499678478433541, "loss": 2.6462, "step": 1769 }, { "epoch": 0.03904069754918644, "grad_norm": 0.208984375, "learning_rate": 0.0004996778867125351, "loss": 2.7151, "step": 1770 }, { "epoch": 0.03906275444045716, "grad_norm": 0.21484375, "learning_rate": 0.0004996772944478858, "loss": 2.6955, "step": 1771 }, { "epoch": 0.03908481133172789, "grad_norm": 0.2099609375, "learning_rate": 0.0004996767016395942, "loss": 2.6647, "step": 1772 }, { "epoch": 0.039106868222998616, "grad_norm": 0.208984375, "learning_rate": 0.0004996761082876617, "loss": 2.7813, "step": 1773 }, { "epoch": 0.039128925114269346, "grad_norm": 0.2080078125, "learning_rate": 0.0004996755143920897, "loss": 2.5862, "step": 1774 }, { "epoch": 0.03915098200554007, "grad_norm": 0.2060546875, "learning_rate": 0.0004996749199528793, "loss": 2.6172, "step": 1775 }, { "epoch": 0.0391730388968108, "grad_norm": 0.2080078125, "learning_rate": 0.0004996743249700319, "loss": 2.7098, "step": 1776 }, { "epoch": 0.03919509578808152, "grad_norm": 0.2080078125, "learning_rate": 0.0004996737294435486, "loss": 2.743, "step": 1777 }, { "epoch": 0.03921715267935225, "grad_norm": 0.2060546875, "learning_rate": 0.000499673133373431, "loss": 2.6819, "step": 1778 }, { "epoch": 0.03923920957062298, "grad_norm": 0.2021484375, "learning_rate": 0.0004996725367596802, "loss": 2.6444, "step": 1779 }, { "epoch": 0.03926126646189371, "grad_norm": 0.20703125, "learning_rate": 0.0004996719396022975, "loss": 2.6418, "step": 1780 }, { "epoch": 0.03928332335316443, "grad_norm": 0.208984375, "learning_rate": 0.0004996713419012842, "loss": 2.6721, "step": 1781 }, { "epoch": 0.03930538024443516, "grad_norm": 0.2060546875, "learning_rate": 0.0004996707436566418, "loss": 2.6834, "step": 1782 }, { "epoch": 0.039327437135705884, "grad_norm": 0.2099609375, "learning_rate": 0.0004996701448683714, "loss": 2.6781, "step": 1783 }, { "epoch": 0.039349494026976614, "grad_norm": 0.2138671875, "learning_rate": 0.0004996695455364744, "loss": 2.5698, "step": 1784 }, { "epoch": 0.03937155091824734, "grad_norm": 0.212890625, "learning_rate": 0.000499668945660952, "loss": 2.7385, "step": 1785 }, { "epoch": 0.03939360780951807, "grad_norm": 0.2041015625, "learning_rate": 0.0004996683452418056, "loss": 2.6254, "step": 1786 }, { "epoch": 0.03941566470078879, "grad_norm": 0.2001953125, "learning_rate": 0.0004996677442790365, "loss": 2.5595, "step": 1787 }, { "epoch": 0.03943772159205952, "grad_norm": 0.2001953125, "learning_rate": 0.0004996671427726459, "loss": 2.6178, "step": 1788 }, { "epoch": 0.039459778483330245, "grad_norm": 0.2041015625, "learning_rate": 0.0004996665407226352, "loss": 2.6872, "step": 1789 }, { "epoch": 0.039481835374600975, "grad_norm": 0.208984375, "learning_rate": 0.0004996659381290058, "loss": 2.6313, "step": 1790 }, { "epoch": 0.0395038922658717, "grad_norm": 0.2158203125, "learning_rate": 0.0004996653349917589, "loss": 2.6968, "step": 1791 }, { "epoch": 0.03952594915714243, "grad_norm": 0.201171875, "learning_rate": 0.0004996647313108958, "loss": 2.565, "step": 1792 }, { "epoch": 0.03954800604841315, "grad_norm": 0.2041015625, "learning_rate": 0.0004996641270864178, "loss": 2.6211, "step": 1793 }, { "epoch": 0.03957006293968388, "grad_norm": 0.2021484375, "learning_rate": 0.0004996635223183263, "loss": 2.6666, "step": 1794 }, { "epoch": 0.039592119830954606, "grad_norm": 0.21484375, "learning_rate": 0.0004996629170066225, "loss": 2.7252, "step": 1795 }, { "epoch": 0.039614176722225336, "grad_norm": 0.205078125, "learning_rate": 0.000499662311151308, "loss": 2.6745, "step": 1796 }, { "epoch": 0.03963623361349606, "grad_norm": 0.205078125, "learning_rate": 0.0004996617047523837, "loss": 2.6993, "step": 1797 }, { "epoch": 0.03965829050476679, "grad_norm": 0.20703125, "learning_rate": 0.0004996610978098512, "loss": 2.588, "step": 1798 }, { "epoch": 0.03968034739603751, "grad_norm": 0.208984375, "learning_rate": 0.0004996604903237118, "loss": 2.6385, "step": 1799 }, { "epoch": 0.03970240428730824, "grad_norm": 0.2060546875, "learning_rate": 0.0004996598822939668, "loss": 2.6407, "step": 1800 }, { "epoch": 0.03972446117857897, "grad_norm": 0.20703125, "learning_rate": 0.0004996592737206174, "loss": 2.6275, "step": 1801 }, { "epoch": 0.0397465180698497, "grad_norm": 0.208984375, "learning_rate": 0.0004996586646036651, "loss": 2.6996, "step": 1802 }, { "epoch": 0.03976857496112042, "grad_norm": 0.2041015625, "learning_rate": 0.0004996580549431111, "loss": 2.6355, "step": 1803 }, { "epoch": 0.03979063185239115, "grad_norm": 0.2119140625, "learning_rate": 0.000499657444738957, "loss": 2.6793, "step": 1804 }, { "epoch": 0.039812688743661874, "grad_norm": 0.1962890625, "learning_rate": 0.0004996568339912036, "loss": 2.5155, "step": 1805 }, { "epoch": 0.039834745634932604, "grad_norm": 0.208984375, "learning_rate": 0.0004996562226998527, "loss": 2.7823, "step": 1806 }, { "epoch": 0.03985680252620333, "grad_norm": 0.2041015625, "learning_rate": 0.0004996556108649054, "loss": 2.6248, "step": 1807 }, { "epoch": 0.03987885941747406, "grad_norm": 0.2021484375, "learning_rate": 0.0004996549984863632, "loss": 2.6739, "step": 1808 }, { "epoch": 0.03990091630874478, "grad_norm": 0.2021484375, "learning_rate": 0.0004996543855642272, "loss": 2.6778, "step": 1809 }, { "epoch": 0.03992297320001551, "grad_norm": 0.2109375, "learning_rate": 0.000499653772098499, "loss": 2.7097, "step": 1810 }, { "epoch": 0.039945030091286235, "grad_norm": 0.203125, "learning_rate": 0.0004996531580891797, "loss": 2.6903, "step": 1811 }, { "epoch": 0.039967086982556965, "grad_norm": 0.2060546875, "learning_rate": 0.0004996525435362708, "loss": 2.5471, "step": 1812 }, { "epoch": 0.03998914387382769, "grad_norm": 0.203125, "learning_rate": 0.0004996519284397737, "loss": 2.6446, "step": 1813 }, { "epoch": 0.04001120076509842, "grad_norm": 0.2001953125, "learning_rate": 0.0004996513127996894, "loss": 2.6436, "step": 1814 }, { "epoch": 0.04003325765636914, "grad_norm": 0.208984375, "learning_rate": 0.0004996506966160196, "loss": 2.6202, "step": 1815 }, { "epoch": 0.04005531454763987, "grad_norm": 0.20703125, "learning_rate": 0.0004996500798887654, "loss": 2.6804, "step": 1816 }, { "epoch": 0.040077371438910596, "grad_norm": 0.2060546875, "learning_rate": 0.0004996494626179285, "loss": 2.6124, "step": 1817 }, { "epoch": 0.040099428330181326, "grad_norm": 0.2021484375, "learning_rate": 0.0004996488448035097, "loss": 2.6199, "step": 1818 }, { "epoch": 0.04012148522145205, "grad_norm": 0.2060546875, "learning_rate": 0.0004996482264455108, "loss": 2.7144, "step": 1819 }, { "epoch": 0.04014354211272278, "grad_norm": 0.203125, "learning_rate": 0.0004996476075439329, "loss": 2.6406, "step": 1820 }, { "epoch": 0.0401655990039935, "grad_norm": 0.2060546875, "learning_rate": 0.0004996469880987775, "loss": 2.7182, "step": 1821 }, { "epoch": 0.040187655895264233, "grad_norm": 0.19921875, "learning_rate": 0.0004996463681100458, "loss": 2.563, "step": 1822 }, { "epoch": 0.04020971278653496, "grad_norm": 0.2041015625, "learning_rate": 0.0004996457475777392, "loss": 2.7094, "step": 1823 }, { "epoch": 0.04023176967780569, "grad_norm": 0.2041015625, "learning_rate": 0.0004996451265018592, "loss": 2.634, "step": 1824 }, { "epoch": 0.04025382656907641, "grad_norm": 0.2021484375, "learning_rate": 0.000499644504882407, "loss": 2.6537, "step": 1825 }, { "epoch": 0.04027588346034714, "grad_norm": 0.208984375, "learning_rate": 0.0004996438827193838, "loss": 2.619, "step": 1826 }, { "epoch": 0.040297940351617864, "grad_norm": 0.208984375, "learning_rate": 0.0004996432600127914, "loss": 2.6902, "step": 1827 }, { "epoch": 0.040319997242888594, "grad_norm": 0.203125, "learning_rate": 0.0004996426367626308, "loss": 2.6234, "step": 1828 }, { "epoch": 0.04034205413415932, "grad_norm": 0.2041015625, "learning_rate": 0.0004996420129689034, "loss": 2.6672, "step": 1829 }, { "epoch": 0.04036411102543005, "grad_norm": 0.205078125, "learning_rate": 0.0004996413886316107, "loss": 2.6792, "step": 1830 }, { "epoch": 0.04038616791670077, "grad_norm": 0.205078125, "learning_rate": 0.0004996407637507539, "loss": 2.6045, "step": 1831 }, { "epoch": 0.0404082248079715, "grad_norm": 0.203125, "learning_rate": 0.0004996401383263345, "loss": 2.6777, "step": 1832 }, { "epoch": 0.040430281699242225, "grad_norm": 0.1962890625, "learning_rate": 0.0004996395123583537, "loss": 2.5702, "step": 1833 }, { "epoch": 0.040452338590512955, "grad_norm": 0.205078125, "learning_rate": 0.0004996388858468131, "loss": 2.7322, "step": 1834 }, { "epoch": 0.04047439548178368, "grad_norm": 0.2021484375, "learning_rate": 0.0004996382587917138, "loss": 2.5995, "step": 1835 }, { "epoch": 0.0404964523730544, "grad_norm": 0.2099609375, "learning_rate": 0.0004996376311930573, "loss": 2.7391, "step": 1836 }, { "epoch": 0.04051850926432513, "grad_norm": 0.2021484375, "learning_rate": 0.000499637003050845, "loss": 2.5572, "step": 1837 }, { "epoch": 0.040540566155595856, "grad_norm": 0.2001953125, "learning_rate": 0.0004996363743650781, "loss": 2.6317, "step": 1838 }, { "epoch": 0.040562623046866586, "grad_norm": 0.2060546875, "learning_rate": 0.0004996357451357582, "loss": 2.6935, "step": 1839 }, { "epoch": 0.04058467993813731, "grad_norm": 0.197265625, "learning_rate": 0.0004996351153628866, "loss": 2.6169, "step": 1840 }, { "epoch": 0.04060673682940804, "grad_norm": 0.20703125, "learning_rate": 0.0004996344850464646, "loss": 2.6691, "step": 1841 }, { "epoch": 0.04062879372067876, "grad_norm": 0.201171875, "learning_rate": 0.0004996338541864934, "loss": 2.5865, "step": 1842 }, { "epoch": 0.04065085061194949, "grad_norm": 0.203125, "learning_rate": 0.0004996332227829749, "loss": 2.6894, "step": 1843 }, { "epoch": 0.04067290750322022, "grad_norm": 0.1962890625, "learning_rate": 0.00049963259083591, "loss": 2.5918, "step": 1844 }, { "epoch": 0.04069496439449095, "grad_norm": 0.2001953125, "learning_rate": 0.0004996319583453001, "loss": 2.6656, "step": 1845 }, { "epoch": 0.04071702128576167, "grad_norm": 0.2021484375, "learning_rate": 0.0004996313253111469, "loss": 2.6787, "step": 1846 }, { "epoch": 0.0407390781770324, "grad_norm": 0.2021484375, "learning_rate": 0.0004996306917334515, "loss": 2.5788, "step": 1847 }, { "epoch": 0.040761135068303124, "grad_norm": 0.2080078125, "learning_rate": 0.0004996300576122153, "loss": 2.714, "step": 1848 }, { "epoch": 0.040783191959573854, "grad_norm": 0.2021484375, "learning_rate": 0.0004996294229474399, "loss": 2.6422, "step": 1849 }, { "epoch": 0.04080524885084458, "grad_norm": 0.2060546875, "learning_rate": 0.0004996287877391263, "loss": 2.8125, "step": 1850 }, { "epoch": 0.04082730574211531, "grad_norm": 0.2001953125, "learning_rate": 0.0004996281519872763, "loss": 2.6404, "step": 1851 }, { "epoch": 0.04084936263338603, "grad_norm": 0.2021484375, "learning_rate": 0.000499627515691891, "loss": 2.5852, "step": 1852 }, { "epoch": 0.04087141952465676, "grad_norm": 0.2021484375, "learning_rate": 0.0004996268788529719, "loss": 2.6952, "step": 1853 }, { "epoch": 0.040893476415927485, "grad_norm": 0.2041015625, "learning_rate": 0.0004996262414705204, "loss": 2.6966, "step": 1854 }, { "epoch": 0.040915533307198215, "grad_norm": 0.2001953125, "learning_rate": 0.0004996256035445378, "loss": 2.705, "step": 1855 }, { "epoch": 0.04093759019846894, "grad_norm": 0.203125, "learning_rate": 0.0004996249650750256, "loss": 2.6704, "step": 1856 }, { "epoch": 0.04095964708973967, "grad_norm": 0.1953125, "learning_rate": 0.0004996243260619851, "loss": 2.6546, "step": 1857 }, { "epoch": 0.04098170398101039, "grad_norm": 0.208984375, "learning_rate": 0.0004996236865054177, "loss": 2.6089, "step": 1858 }, { "epoch": 0.04100376087228112, "grad_norm": 0.203125, "learning_rate": 0.0004996230464053248, "loss": 2.6412, "step": 1859 }, { "epoch": 0.041025817763551846, "grad_norm": 0.20703125, "learning_rate": 0.0004996224057617079, "loss": 2.6758, "step": 1860 }, { "epoch": 0.041047874654822576, "grad_norm": 0.197265625, "learning_rate": 0.0004996217645745682, "loss": 2.655, "step": 1861 }, { "epoch": 0.0410699315460933, "grad_norm": 0.205078125, "learning_rate": 0.0004996211228439073, "loss": 2.6604, "step": 1862 }, { "epoch": 0.04109198843736403, "grad_norm": 0.205078125, "learning_rate": 0.0004996204805697265, "loss": 2.6072, "step": 1863 }, { "epoch": 0.04111404532863475, "grad_norm": 0.19921875, "learning_rate": 0.000499619837752027, "loss": 2.6221, "step": 1864 }, { "epoch": 0.041136102219905483, "grad_norm": 0.2041015625, "learning_rate": 0.0004996191943908105, "loss": 2.644, "step": 1865 }, { "epoch": 0.04115815911117621, "grad_norm": 0.197265625, "learning_rate": 0.0004996185504860784, "loss": 2.5307, "step": 1866 }, { "epoch": 0.04118021600244694, "grad_norm": 0.2109375, "learning_rate": 0.0004996179060378318, "loss": 2.6718, "step": 1867 }, { "epoch": 0.04120227289371766, "grad_norm": 0.2041015625, "learning_rate": 0.0004996172610460725, "loss": 2.5811, "step": 1868 }, { "epoch": 0.04122432978498839, "grad_norm": 0.1953125, "learning_rate": 0.0004996166155108017, "loss": 2.681, "step": 1869 }, { "epoch": 0.041246386676259114, "grad_norm": 0.2099609375, "learning_rate": 0.0004996159694320207, "loss": 2.6521, "step": 1870 }, { "epoch": 0.041268443567529844, "grad_norm": 0.197265625, "learning_rate": 0.000499615322809731, "loss": 2.6385, "step": 1871 }, { "epoch": 0.04129050045880057, "grad_norm": 0.2138671875, "learning_rate": 0.0004996146756439341, "loss": 2.6753, "step": 1872 }, { "epoch": 0.0413125573500713, "grad_norm": 0.19140625, "learning_rate": 0.0004996140279346314, "loss": 2.5793, "step": 1873 }, { "epoch": 0.04133461424134202, "grad_norm": 0.2060546875, "learning_rate": 0.0004996133796818241, "loss": 2.5623, "step": 1874 }, { "epoch": 0.04135667113261275, "grad_norm": 0.208984375, "learning_rate": 0.0004996127308855139, "loss": 2.6315, "step": 1875 }, { "epoch": 0.041378728023883475, "grad_norm": 0.201171875, "learning_rate": 0.0004996120815457019, "loss": 2.6447, "step": 1876 }, { "epoch": 0.041400784915154205, "grad_norm": 0.193359375, "learning_rate": 0.0004996114316623897, "loss": 2.631, "step": 1877 }, { "epoch": 0.04142284180642493, "grad_norm": 0.19140625, "learning_rate": 0.0004996107812355789, "loss": 2.6235, "step": 1878 }, { "epoch": 0.04144489869769566, "grad_norm": 0.203125, "learning_rate": 0.0004996101302652706, "loss": 2.5695, "step": 1879 }, { "epoch": 0.04146695558896638, "grad_norm": 0.20703125, "learning_rate": 0.0004996094787514662, "loss": 2.6257, "step": 1880 }, { "epoch": 0.04148901248023711, "grad_norm": 0.2001953125, "learning_rate": 0.0004996088266941675, "loss": 2.6235, "step": 1881 }, { "epoch": 0.041511069371507836, "grad_norm": 0.19921875, "learning_rate": 0.0004996081740933755, "loss": 2.6083, "step": 1882 }, { "epoch": 0.041533126262778566, "grad_norm": 0.2099609375, "learning_rate": 0.0004996075209490918, "loss": 2.6608, "step": 1883 }, { "epoch": 0.04155518315404929, "grad_norm": 0.19921875, "learning_rate": 0.0004996068672613179, "loss": 2.5841, "step": 1884 }, { "epoch": 0.04157724004532002, "grad_norm": 0.208984375, "learning_rate": 0.0004996062130300551, "loss": 2.6515, "step": 1885 }, { "epoch": 0.04159929693659074, "grad_norm": 0.205078125, "learning_rate": 0.0004996055582553047, "loss": 2.639, "step": 1886 }, { "epoch": 0.041621353827861474, "grad_norm": 0.2060546875, "learning_rate": 0.0004996049029370685, "loss": 2.6555, "step": 1887 }, { "epoch": 0.0416434107191322, "grad_norm": 0.2109375, "learning_rate": 0.0004996042470753475, "loss": 2.7092, "step": 1888 }, { "epoch": 0.04166546761040293, "grad_norm": 0.2001953125, "learning_rate": 0.0004996035906701436, "loss": 2.5671, "step": 1889 }, { "epoch": 0.04168752450167365, "grad_norm": 0.20703125, "learning_rate": 0.0004996029337214578, "loss": 2.6764, "step": 1890 }, { "epoch": 0.04170958139294438, "grad_norm": 0.2080078125, "learning_rate": 0.0004996022762292918, "loss": 2.6718, "step": 1891 }, { "epoch": 0.041731638284215104, "grad_norm": 0.201171875, "learning_rate": 0.0004996016181936469, "loss": 2.6123, "step": 1892 }, { "epoch": 0.041753695175485835, "grad_norm": 0.2080078125, "learning_rate": 0.0004996009596145246, "loss": 2.6526, "step": 1893 }, { "epoch": 0.04177575206675656, "grad_norm": 0.193359375, "learning_rate": 0.0004996003004919262, "loss": 2.6525, "step": 1894 }, { "epoch": 0.04179780895802729, "grad_norm": 0.201171875, "learning_rate": 0.0004995996408258533, "loss": 2.5989, "step": 1895 }, { "epoch": 0.04181986584929801, "grad_norm": 0.2001953125, "learning_rate": 0.0004995989806163073, "loss": 2.6323, "step": 1896 }, { "epoch": 0.04184192274056874, "grad_norm": 0.2041015625, "learning_rate": 0.0004995983198632895, "loss": 2.662, "step": 1897 }, { "epoch": 0.041863979631839465, "grad_norm": 0.1982421875, "learning_rate": 0.0004995976585668015, "loss": 2.6606, "step": 1898 }, { "epoch": 0.041886036523110196, "grad_norm": 0.201171875, "learning_rate": 0.0004995969967268448, "loss": 2.625, "step": 1899 }, { "epoch": 0.04190809341438092, "grad_norm": 0.205078125, "learning_rate": 0.0004995963343434205, "loss": 2.5792, "step": 1900 }, { "epoch": 0.04193015030565165, "grad_norm": 0.203125, "learning_rate": 0.0004995956714165305, "loss": 2.5803, "step": 1901 }, { "epoch": 0.04195220719692237, "grad_norm": 0.1943359375, "learning_rate": 0.0004995950079461759, "loss": 2.6246, "step": 1902 }, { "epoch": 0.0419742640881931, "grad_norm": 0.2080078125, "learning_rate": 0.0004995943439323583, "loss": 2.6153, "step": 1903 }, { "epoch": 0.041996320979463826, "grad_norm": 0.1982421875, "learning_rate": 0.0004995936793750791, "loss": 2.5946, "step": 1904 }, { "epoch": 0.042018377870734556, "grad_norm": 0.203125, "learning_rate": 0.0004995930142743396, "loss": 2.5941, "step": 1905 }, { "epoch": 0.04204043476200528, "grad_norm": 0.2060546875, "learning_rate": 0.0004995923486301416, "loss": 2.6764, "step": 1906 }, { "epoch": 0.04206249165327601, "grad_norm": 0.2060546875, "learning_rate": 0.0004995916824424863, "loss": 2.6697, "step": 1907 }, { "epoch": 0.042084548544546733, "grad_norm": 0.2001953125, "learning_rate": 0.0004995910157113751, "loss": 2.6134, "step": 1908 }, { "epoch": 0.042106605435817464, "grad_norm": 0.1982421875, "learning_rate": 0.0004995903484368096, "loss": 2.6798, "step": 1909 }, { "epoch": 0.04212866232708819, "grad_norm": 0.2021484375, "learning_rate": 0.0004995896806187913, "loss": 2.5775, "step": 1910 }, { "epoch": 0.04215071921835892, "grad_norm": 0.1923828125, "learning_rate": 0.0004995890122573215, "loss": 2.5489, "step": 1911 }, { "epoch": 0.04217277610962964, "grad_norm": 0.2001953125, "learning_rate": 0.0004995883433524017, "loss": 2.6272, "step": 1912 }, { "epoch": 0.04219483300090037, "grad_norm": 0.1982421875, "learning_rate": 0.0004995876739040334, "loss": 2.6244, "step": 1913 }, { "epoch": 0.042216889892171094, "grad_norm": 0.20703125, "learning_rate": 0.000499587003912218, "loss": 2.591, "step": 1914 }, { "epoch": 0.042238946783441825, "grad_norm": 0.193359375, "learning_rate": 0.000499586333376957, "loss": 2.6378, "step": 1915 }, { "epoch": 0.04226100367471255, "grad_norm": 0.1982421875, "learning_rate": 0.0004995856622982518, "loss": 2.6569, "step": 1916 }, { "epoch": 0.04228306056598328, "grad_norm": 0.1962890625, "learning_rate": 0.0004995849906761039, "loss": 2.6981, "step": 1917 }, { "epoch": 0.042305117457254, "grad_norm": 0.205078125, "learning_rate": 0.0004995843185105149, "loss": 2.6052, "step": 1918 }, { "epoch": 0.04232717434852473, "grad_norm": 0.201171875, "learning_rate": 0.000499583645801486, "loss": 2.6194, "step": 1919 }, { "epoch": 0.042349231239795455, "grad_norm": 0.2041015625, "learning_rate": 0.0004995829725490189, "loss": 2.6594, "step": 1920 }, { "epoch": 0.042371288131066186, "grad_norm": 0.21484375, "learning_rate": 0.0004995822987531148, "loss": 2.6865, "step": 1921 }, { "epoch": 0.04239334502233691, "grad_norm": 0.19921875, "learning_rate": 0.0004995816244137755, "loss": 2.6342, "step": 1922 }, { "epoch": 0.04241540191360764, "grad_norm": 0.2099609375, "learning_rate": 0.0004995809495310023, "loss": 2.7216, "step": 1923 }, { "epoch": 0.04243745880487836, "grad_norm": 0.2041015625, "learning_rate": 0.0004995802741047966, "loss": 2.7292, "step": 1924 }, { "epoch": 0.04245951569614909, "grad_norm": 0.203125, "learning_rate": 0.00049957959813516, "loss": 2.63, "step": 1925 }, { "epoch": 0.042481572587419816, "grad_norm": 0.2021484375, "learning_rate": 0.0004995789216220938, "loss": 2.6512, "step": 1926 }, { "epoch": 0.04250362947869055, "grad_norm": 0.19921875, "learning_rate": 0.0004995782445655996, "loss": 2.5201, "step": 1927 }, { "epoch": 0.04252568636996127, "grad_norm": 0.19921875, "learning_rate": 0.000499577566965679, "loss": 2.6274, "step": 1928 }, { "epoch": 0.042547743261232, "grad_norm": 0.2001953125, "learning_rate": 0.0004995768888223332, "loss": 2.6663, "step": 1929 }, { "epoch": 0.042569800152502724, "grad_norm": 0.2080078125, "learning_rate": 0.0004995762101355638, "loss": 2.7116, "step": 1930 }, { "epoch": 0.042591857043773454, "grad_norm": 0.2021484375, "learning_rate": 0.0004995755309053724, "loss": 2.6512, "step": 1931 }, { "epoch": 0.04261391393504418, "grad_norm": 0.2080078125, "learning_rate": 0.0004995748511317603, "loss": 2.5821, "step": 1932 }, { "epoch": 0.04263597082631491, "grad_norm": 0.201171875, "learning_rate": 0.000499574170814729, "loss": 2.5868, "step": 1933 }, { "epoch": 0.04265802771758563, "grad_norm": 0.203125, "learning_rate": 0.0004995734899542802, "loss": 2.6135, "step": 1934 }, { "epoch": 0.04268008460885636, "grad_norm": 0.2119140625, "learning_rate": 0.0004995728085504151, "loss": 2.6892, "step": 1935 }, { "epoch": 0.042702141500127085, "grad_norm": 0.19921875, "learning_rate": 0.0004995721266031352, "loss": 2.7238, "step": 1936 }, { "epoch": 0.042724198391397815, "grad_norm": 0.2021484375, "learning_rate": 0.0004995714441124422, "loss": 2.6076, "step": 1937 }, { "epoch": 0.04274625528266854, "grad_norm": 0.205078125, "learning_rate": 0.0004995707610783375, "loss": 2.6075, "step": 1938 }, { "epoch": 0.04276831217393927, "grad_norm": 0.193359375, "learning_rate": 0.0004995700775008225, "loss": 2.6188, "step": 1939 }, { "epoch": 0.04279036906520999, "grad_norm": 0.208984375, "learning_rate": 0.0004995693933798987, "loss": 2.5801, "step": 1940 }, { "epoch": 0.04281242595648072, "grad_norm": 0.193359375, "learning_rate": 0.0004995687087155677, "loss": 2.6337, "step": 1941 }, { "epoch": 0.042834482847751446, "grad_norm": 0.20703125, "learning_rate": 0.0004995680235078308, "loss": 2.6366, "step": 1942 }, { "epoch": 0.042856539739022176, "grad_norm": 0.1962890625, "learning_rate": 0.0004995673377566897, "loss": 2.6839, "step": 1943 }, { "epoch": 0.0428785966302929, "grad_norm": 0.2001953125, "learning_rate": 0.0004995666514621458, "loss": 2.533, "step": 1944 }, { "epoch": 0.04290065352156363, "grad_norm": 0.1962890625, "learning_rate": 0.0004995659646242007, "loss": 2.6156, "step": 1945 }, { "epoch": 0.04292271041283435, "grad_norm": 0.1982421875, "learning_rate": 0.0004995652772428557, "loss": 2.6203, "step": 1946 }, { "epoch": 0.04294476730410508, "grad_norm": 0.1953125, "learning_rate": 0.0004995645893181124, "loss": 2.6489, "step": 1947 }, { "epoch": 0.042966824195375806, "grad_norm": 0.2001953125, "learning_rate": 0.0004995639008499723, "loss": 2.536, "step": 1948 }, { "epoch": 0.04298888108664654, "grad_norm": 0.201171875, "learning_rate": 0.0004995632118384369, "loss": 2.6271, "step": 1949 }, { "epoch": 0.04301093797791726, "grad_norm": 0.19921875, "learning_rate": 0.0004995625222835076, "loss": 2.5584, "step": 1950 }, { "epoch": 0.04303299486918799, "grad_norm": 0.20703125, "learning_rate": 0.0004995618321851861, "loss": 2.5461, "step": 1951 }, { "epoch": 0.043055051760458714, "grad_norm": 0.2021484375, "learning_rate": 0.0004995611415434738, "loss": 2.648, "step": 1952 }, { "epoch": 0.043077108651729444, "grad_norm": 0.1982421875, "learning_rate": 0.0004995604503583721, "loss": 2.6304, "step": 1953 }, { "epoch": 0.04309916554300017, "grad_norm": 0.19921875, "learning_rate": 0.0004995597586298828, "loss": 2.6268, "step": 1954 }, { "epoch": 0.0431212224342709, "grad_norm": 0.2041015625, "learning_rate": 0.000499559066358007, "loss": 2.5493, "step": 1955 }, { "epoch": 0.04314327932554162, "grad_norm": 0.1982421875, "learning_rate": 0.0004995583735427464, "loss": 2.617, "step": 1956 }, { "epoch": 0.04316533621681235, "grad_norm": 0.20703125, "learning_rate": 0.0004995576801841027, "loss": 2.6328, "step": 1957 }, { "epoch": 0.043187393108083075, "grad_norm": 0.2001953125, "learning_rate": 0.0004995569862820772, "loss": 2.6944, "step": 1958 }, { "epoch": 0.043209449999353805, "grad_norm": 0.1962890625, "learning_rate": 0.0004995562918366714, "loss": 2.6489, "step": 1959 }, { "epoch": 0.04323150689062453, "grad_norm": 0.208984375, "learning_rate": 0.0004995555968478869, "loss": 2.6122, "step": 1960 }, { "epoch": 0.04325356378189526, "grad_norm": 0.20703125, "learning_rate": 0.0004995549013157251, "loss": 2.6372, "step": 1961 }, { "epoch": 0.04327562067316598, "grad_norm": 0.205078125, "learning_rate": 0.0004995542052401876, "loss": 2.6644, "step": 1962 }, { "epoch": 0.04329767756443671, "grad_norm": 0.2021484375, "learning_rate": 0.0004995535086212759, "loss": 2.6722, "step": 1963 }, { "epoch": 0.043319734455707436, "grad_norm": 0.2001953125, "learning_rate": 0.0004995528114589916, "loss": 2.656, "step": 1964 }, { "epoch": 0.043341791346978166, "grad_norm": 0.193359375, "learning_rate": 0.0004995521137533361, "loss": 2.5855, "step": 1965 }, { "epoch": 0.04336384823824889, "grad_norm": 0.2001953125, "learning_rate": 0.0004995514155043109, "loss": 2.6658, "step": 1966 } ], "logging_steps": 1, "max_steps": 68007, "num_input_tokens_seen": 0, "num_train_epochs": 2, "save_steps": 681, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": false, "should_training_stop": false }, "attributes": {} } }, "total_flos": 1.040064351450624e+18, "train_batch_size": 4, "trial_name": null, "trial_params": null }