{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 126.08, "eval_steps": 100, "global_step": 788000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.016, "grad_norm": 1.1145988702774048, "learning_rate": 5.94e-05, "loss": 129.2138, "step": 100 }, { "epoch": 0.032, "grad_norm": 0.3314463794231415, "learning_rate": 0.0001194, "loss": 147.1265, "step": 200 }, { "epoch": 0.048, "grad_norm": 0.30200499296188354, "learning_rate": 0.00017939999999999997, "loss": 147.1375, "step": 300 }, { "epoch": 0.064, "grad_norm": 0.20890414714813232, "learning_rate": 0.0002394, "loss": 141.107, "step": 400 }, { "epoch": 0.08, "grad_norm": 0.19977182149887085, "learning_rate": 0.00029939999999999996, "loss": 130.2311, "step": 500 }, { "epoch": 0.096, "grad_norm": 0.1718936711549759, "learning_rate": 0.00029999762390495616, "loss": 116.9488, "step": 600 }, { "epoch": 0.112, "grad_norm": 0.21659506857395172, "learning_rate": 0.00029999522380895233, "loss": 106.3702, "step": 700 }, { "epoch": 0.128, "grad_norm": 0.19612713158130646, "learning_rate": 0.0002999928237129485, "loss": 98.8033, "step": 800 }, { "epoch": 0.144, "grad_norm": 0.18958421051502228, "learning_rate": 0.00029999042361694467, "loss": 94.6761, "step": 900 }, { "epoch": 0.16, "grad_norm": 0.25341877341270447, "learning_rate": 0.00029998802352094084, "loss": 88.2629, "step": 1000 }, { "epoch": 0.176, "grad_norm": 0.1762186735868454, "learning_rate": 0.000299985623424937, "loss": 87.4362, "step": 1100 }, { "epoch": 0.192, "grad_norm": 0.23407000303268433, "learning_rate": 0.0002999832233289331, "loss": 85.7211, "step": 1200 }, { "epoch": 0.208, "grad_norm": 0.23202084004878998, "learning_rate": 0.0002999808232329293, "loss": 81.4749, "step": 1300 }, { "epoch": 0.224, "grad_norm": 0.1819111853837967, "learning_rate": 0.00029997842313692546, "loss": 80.3999, "step": 1400 }, { "epoch": 0.24, "grad_norm": 0.16154050827026367, "learning_rate": 0.00029997602304092163, "loss": 80.5113, "step": 1500 }, { "epoch": 0.256, "grad_norm": 0.20147816836833954, "learning_rate": 0.0002999736229449178, "loss": 77.4306, "step": 1600 }, { "epoch": 0.272, "grad_norm": 0.2032860815525055, "learning_rate": 0.0002999712228489139, "loss": 76.3299, "step": 1700 }, { "epoch": 0.288, "grad_norm": 0.20103086531162262, "learning_rate": 0.0002999688227529101, "loss": 77.0755, "step": 1800 }, { "epoch": 0.304, "grad_norm": 0.1930929720401764, "learning_rate": 0.00029996642265690625, "loss": 74.2643, "step": 1900 }, { "epoch": 0.32, "grad_norm": 0.21013671159744263, "learning_rate": 0.0002999640225609024, "loss": 75.9168, "step": 2000 }, { "epoch": 0.336, "grad_norm": 0.2554585635662079, "learning_rate": 0.0002999616224648986, "loss": 75.2005, "step": 2100 }, { "epoch": 0.352, "grad_norm": 0.21000510454177856, "learning_rate": 0.00029995922236889476, "loss": 74.1565, "step": 2200 }, { "epoch": 0.368, "grad_norm": 0.2096049040555954, "learning_rate": 0.0002999568222728909, "loss": 73.3684, "step": 2300 }, { "epoch": 0.384, "grad_norm": 0.2806188464164734, "learning_rate": 0.00029995442217688705, "loss": 73.9772, "step": 2400 }, { "epoch": 0.4, "grad_norm": 0.17476481199264526, "learning_rate": 0.0002999520220808832, "loss": 73.7125, "step": 2500 }, { "epoch": 0.416, "grad_norm": 0.26867198944091797, "learning_rate": 0.0002999496219848794, "loss": 72.5119, "step": 2600 }, { "epoch": 0.432, "grad_norm": 0.1896703690290451, "learning_rate": 0.00029994722188887555, "loss": 72.6918, "step": 2700 }, { "epoch": 0.448, "grad_norm": 0.2521280348300934, "learning_rate": 0.00029994482179287167, "loss": 72.1229, "step": 2800 }, { "epoch": 0.464, "grad_norm": 0.20409554243087769, "learning_rate": 0.00029994242169686784, "loss": 72.3524, "step": 2900 }, { "epoch": 0.48, "grad_norm": 0.1911861002445221, "learning_rate": 0.000299940021600864, "loss": 70.9714, "step": 3000 }, { "epoch": 0.496, "grad_norm": 0.21338903903961182, "learning_rate": 0.0002999376215048602, "loss": 69.5716, "step": 3100 }, { "epoch": 0.512, "grad_norm": 0.20922720432281494, "learning_rate": 0.00029993522140885634, "loss": 70.1812, "step": 3200 }, { "epoch": 0.528, "grad_norm": 0.2678331434726715, "learning_rate": 0.0002999328213128525, "loss": 68.8041, "step": 3300 }, { "epoch": 0.544, "grad_norm": 0.25610026717185974, "learning_rate": 0.00029993042121684863, "loss": 71.186, "step": 3400 }, { "epoch": 0.56, "grad_norm": 0.23267875611782074, "learning_rate": 0.0002999280211208448, "loss": 68.9921, "step": 3500 }, { "epoch": 0.576, "grad_norm": 0.23876765370368958, "learning_rate": 0.00029992562102484097, "loss": 69.738, "step": 3600 }, { "epoch": 0.592, "grad_norm": 0.1865028291940689, "learning_rate": 0.00029992322092883714, "loss": 68.9813, "step": 3700 }, { "epoch": 0.608, "grad_norm": 0.21735595166683197, "learning_rate": 0.0002999208208328333, "loss": 67.5755, "step": 3800 }, { "epoch": 0.624, "grad_norm": 0.16909943521022797, "learning_rate": 0.0002999184207368294, "loss": 66.3015, "step": 3900 }, { "epoch": 0.64, "grad_norm": 0.19918648898601532, "learning_rate": 0.0002999160206408256, "loss": 67.3844, "step": 4000 }, { "epoch": 0.656, "grad_norm": 0.22282840311527252, "learning_rate": 0.00029991362054482176, "loss": 66.0008, "step": 4100 }, { "epoch": 0.672, "grad_norm": 0.19900047779083252, "learning_rate": 0.00029991122044881793, "loss": 66.029, "step": 4200 }, { "epoch": 0.688, "grad_norm": 0.2067142128944397, "learning_rate": 0.0002999088203528141, "loss": 65.7196, "step": 4300 }, { "epoch": 0.704, "grad_norm": 0.24062038958072662, "learning_rate": 0.00029990642025681027, "loss": 66.7571, "step": 4400 }, { "epoch": 0.72, "grad_norm": 0.2454902082681656, "learning_rate": 0.0002999040201608064, "loss": 65.7736, "step": 4500 }, { "epoch": 0.736, "grad_norm": 0.24499955773353577, "learning_rate": 0.00029990162006480255, "loss": 65.498, "step": 4600 }, { "epoch": 0.752, "grad_norm": 0.2421354055404663, "learning_rate": 0.0002998992199687987, "loss": 65.9207, "step": 4700 }, { "epoch": 0.768, "grad_norm": 0.1900254338979721, "learning_rate": 0.0002998968198727949, "loss": 63.4017, "step": 4800 }, { "epoch": 0.784, "grad_norm": 0.21995197236537933, "learning_rate": 0.00029989441977679106, "loss": 65.4319, "step": 4900 }, { "epoch": 0.8, "grad_norm": 0.2170778065919876, "learning_rate": 0.00029989201968078717, "loss": 64.1503, "step": 5000 }, { "epoch": 0.816, "grad_norm": 0.29141783714294434, "learning_rate": 0.00029988961958478334, "loss": 63.4509, "step": 5100 }, { "epoch": 0.832, "grad_norm": 0.2149534821510315, "learning_rate": 0.0002998872194887795, "loss": 63.8549, "step": 5200 }, { "epoch": 0.848, "grad_norm": 0.2090325504541397, "learning_rate": 0.0002998848193927757, "loss": 62.5135, "step": 5300 }, { "epoch": 0.864, "grad_norm": 0.19093327224254608, "learning_rate": 0.00029988241929677185, "loss": 64.1856, "step": 5400 }, { "epoch": 0.88, "grad_norm": 0.24676312506198883, "learning_rate": 0.000299880019200768, "loss": 62.8992, "step": 5500 }, { "epoch": 0.896, "grad_norm": 0.2047237902879715, "learning_rate": 0.00029987761910476413, "loss": 63.5, "step": 5600 }, { "epoch": 0.912, "grad_norm": 0.2169736623764038, "learning_rate": 0.0002998752190087603, "loss": 63.2706, "step": 5700 }, { "epoch": 0.928, "grad_norm": 0.2212333083152771, "learning_rate": 0.00029987281891275647, "loss": 62.8563, "step": 5800 }, { "epoch": 0.944, "grad_norm": 0.22105100750923157, "learning_rate": 0.00029987041881675264, "loss": 61.4049, "step": 5900 }, { "epoch": 0.96, "grad_norm": 0.21934692561626434, "learning_rate": 0.0002998680187207488, "loss": 61.2102, "step": 6000 }, { "epoch": 0.976, "grad_norm": 0.231471449136734, "learning_rate": 0.0002998656186247449, "loss": 61.161, "step": 6100 }, { "epoch": 0.992, "grad_norm": 0.20244845747947693, "learning_rate": 0.0002998632185287411, "loss": 61.5284, "step": 6200 }, { "epoch": 1.008, "grad_norm": 0.31659385561943054, "learning_rate": 0.00029986081843273726, "loss": 59.6197, "step": 6300 }, { "epoch": 1.024, "grad_norm": 0.22351042926311493, "learning_rate": 0.00029985841833673343, "loss": 60.8731, "step": 6400 }, { "epoch": 1.04, "grad_norm": 0.20470276474952698, "learning_rate": 0.0002998560182407296, "loss": 60.5648, "step": 6500 }, { "epoch": 1.056, "grad_norm": 0.17768125236034393, "learning_rate": 0.00029985361814472577, "loss": 59.2689, "step": 6600 }, { "epoch": 1.072, "grad_norm": 0.20775848627090454, "learning_rate": 0.0002998512180487219, "loss": 58.2776, "step": 6700 }, { "epoch": 1.088, "grad_norm": 0.2682810127735138, "learning_rate": 0.00029984881795271806, "loss": 60.5164, "step": 6800 }, { "epoch": 1.104, "grad_norm": 0.22458679974079132, "learning_rate": 0.0002998464178567142, "loss": 60.1217, "step": 6900 }, { "epoch": 1.12, "grad_norm": 0.22781415283679962, "learning_rate": 0.0002998440177607104, "loss": 58.191, "step": 7000 }, { "epoch": 1.1360000000000001, "grad_norm": 0.2532273232936859, "learning_rate": 0.00029984161766470656, "loss": 58.8972, "step": 7100 }, { "epoch": 1.152, "grad_norm": 0.2014983743429184, "learning_rate": 0.00029983921756870273, "loss": 58.7748, "step": 7200 }, { "epoch": 1.168, "grad_norm": 0.19773030281066895, "learning_rate": 0.0002998368174726989, "loss": 57.9689, "step": 7300 }, { "epoch": 1.184, "grad_norm": 0.245356023311615, "learning_rate": 0.00029983441737669507, "loss": 57.855, "step": 7400 }, { "epoch": 1.2, "grad_norm": 0.2565186023712158, "learning_rate": 0.00029983201728069124, "loss": 56.8152, "step": 7500 }, { "epoch": 1.216, "grad_norm": 0.17781591415405273, "learning_rate": 0.00029982961718468735, "loss": 55.2139, "step": 7600 }, { "epoch": 1.232, "grad_norm": 0.21849973499774933, "learning_rate": 0.0002998272170886835, "loss": 55.9843, "step": 7700 }, { "epoch": 1.248, "grad_norm": 0.17623578011989594, "learning_rate": 0.0002998248169926797, "loss": 57.3084, "step": 7800 }, { "epoch": 1.264, "grad_norm": 0.22286267578601837, "learning_rate": 0.00029982241689667586, "loss": 56.4191, "step": 7900 }, { "epoch": 1.28, "grad_norm": 0.20891787111759186, "learning_rate": 0.00029982001680067203, "loss": 56.4775, "step": 8000 }, { "epoch": 1.296, "grad_norm": 0.19925983250141144, "learning_rate": 0.00029981761670466815, "loss": 55.0521, "step": 8100 }, { "epoch": 1.312, "grad_norm": 0.22015956044197083, "learning_rate": 0.0002998152166086643, "loss": 55.6771, "step": 8200 }, { "epoch": 1.328, "grad_norm": 0.24997876584529877, "learning_rate": 0.0002998128165126605, "loss": 53.8931, "step": 8300 }, { "epoch": 1.3439999999999999, "grad_norm": 0.2933981418609619, "learning_rate": 0.00029981041641665665, "loss": 56.6028, "step": 8400 }, { "epoch": 1.3599999999999999, "grad_norm": 0.1963578313589096, "learning_rate": 0.0002998080163206528, "loss": 54.5404, "step": 8500 }, { "epoch": 1.376, "grad_norm": 0.21487855911254883, "learning_rate": 0.000299805616224649, "loss": 54.2586, "step": 8600 }, { "epoch": 1.392, "grad_norm": 0.21776583790779114, "learning_rate": 0.0002998032161286451, "loss": 53.9896, "step": 8700 }, { "epoch": 1.408, "grad_norm": 0.2172229140996933, "learning_rate": 0.0002998008160326413, "loss": 53.8424, "step": 8800 }, { "epoch": 1.424, "grad_norm": 0.23105138540267944, "learning_rate": 0.00029979841593663745, "loss": 54.1874, "step": 8900 }, { "epoch": 1.44, "grad_norm": 0.18797878921031952, "learning_rate": 0.0002997960158406336, "loss": 53.3869, "step": 9000 }, { "epoch": 1.456, "grad_norm": 0.20597319304943085, "learning_rate": 0.0002997936157446298, "loss": 53.7132, "step": 9100 }, { "epoch": 1.472, "grad_norm": 0.21674391627311707, "learning_rate": 0.00029979121564862595, "loss": 52.2728, "step": 9200 }, { "epoch": 1.488, "grad_norm": 0.2250959277153015, "learning_rate": 0.00029978881555262207, "loss": 53.3457, "step": 9300 }, { "epoch": 1.504, "grad_norm": 0.19289842247962952, "learning_rate": 0.00029978641545661824, "loss": 52.898, "step": 9400 }, { "epoch": 1.52, "grad_norm": 0.2215307652950287, "learning_rate": 0.0002997840153606144, "loss": 52.8446, "step": 9500 }, { "epoch": 1.536, "grad_norm": 0.19949446618556976, "learning_rate": 0.0002997816152646106, "loss": 51.9649, "step": 9600 }, { "epoch": 1.552, "grad_norm": 0.1753661036491394, "learning_rate": 0.00029977921516860675, "loss": 51.5562, "step": 9700 }, { "epoch": 1.568, "grad_norm": 0.22938130795955658, "learning_rate": 0.00029977681507260286, "loss": 52.4538, "step": 9800 }, { "epoch": 1.584, "grad_norm": 0.255227655172348, "learning_rate": 0.00029977441497659903, "loss": 50.8902, "step": 9900 }, { "epoch": 1.6, "grad_norm": 0.24369871616363525, "learning_rate": 0.0002997720148805952, "loss": 50.8092, "step": 10000 }, { "epoch": 1.616, "grad_norm": 0.22126376628875732, "learning_rate": 0.0002997696387855514, "loss": 51.0513, "step": 10100 }, { "epoch": 1.6320000000000001, "grad_norm": 0.199215367436409, "learning_rate": 0.00029976723868954756, "loss": 49.6234, "step": 10200 }, { "epoch": 1.6480000000000001, "grad_norm": 0.22058773040771484, "learning_rate": 0.0002997648385935437, "loss": 51.2333, "step": 10300 }, { "epoch": 1.6640000000000001, "grad_norm": 0.26106688380241394, "learning_rate": 0.0002997624384975399, "loss": 49.6582, "step": 10400 }, { "epoch": 1.6800000000000002, "grad_norm": 0.23437049984931946, "learning_rate": 0.00029976003840153606, "loss": 49.6097, "step": 10500 }, { "epoch": 1.696, "grad_norm": 0.1709340363740921, "learning_rate": 0.00029975763830553223, "loss": 49.9149, "step": 10600 }, { "epoch": 1.712, "grad_norm": 0.2278878539800644, "learning_rate": 0.00029975523820952835, "loss": 50.2495, "step": 10700 }, { "epoch": 1.728, "grad_norm": 0.25324809551239014, "learning_rate": 0.0002997528381135245, "loss": 48.3701, "step": 10800 }, { "epoch": 1.744, "grad_norm": 0.21413564682006836, "learning_rate": 0.0002997504380175207, "loss": 48.8447, "step": 10900 }, { "epoch": 1.76, "grad_norm": 0.2975509464740753, "learning_rate": 0.00029974803792151686, "loss": 50.0095, "step": 11000 }, { "epoch": 1.776, "grad_norm": 0.19792191684246063, "learning_rate": 0.00029974566182647304, "loss": 49.2986, "step": 11100 }, { "epoch": 1.792, "grad_norm": 0.2350345253944397, "learning_rate": 0.0002997432617304692, "loss": 48.7027, "step": 11200 }, { "epoch": 1.808, "grad_norm": 0.19396322965621948, "learning_rate": 0.00029974086163446533, "loss": 47.9713, "step": 11300 }, { "epoch": 1.8239999999999998, "grad_norm": 0.2414630949497223, "learning_rate": 0.0002997384615384615, "loss": 48.7363, "step": 11400 }, { "epoch": 1.8399999999999999, "grad_norm": 0.2678147554397583, "learning_rate": 0.00029973606144245767, "loss": 48.4818, "step": 11500 }, { "epoch": 1.8559999999999999, "grad_norm": 0.19563674926757812, "learning_rate": 0.00029973366134645384, "loss": 48.2693, "step": 11600 }, { "epoch": 1.8719999999999999, "grad_norm": 0.22531713545322418, "learning_rate": 0.00029973126125045, "loss": 47.758, "step": 11700 }, { "epoch": 1.888, "grad_norm": 0.22199738025665283, "learning_rate": 0.0002997288611544461, "loss": 46.9644, "step": 11800 }, { "epoch": 1.904, "grad_norm": 0.253896027803421, "learning_rate": 0.0002997264610584423, "loss": 46.5968, "step": 11900 }, { "epoch": 1.92, "grad_norm": 0.18806882202625275, "learning_rate": 0.00029972406096243846, "loss": 48.2712, "step": 12000 }, { "epoch": 1.936, "grad_norm": 0.22023610770702362, "learning_rate": 0.00029972166086643463, "loss": 47.2612, "step": 12100 }, { "epoch": 1.952, "grad_norm": 0.213795468211174, "learning_rate": 0.0002997192607704308, "loss": 45.9592, "step": 12200 }, { "epoch": 1.968, "grad_norm": 0.19787845015525818, "learning_rate": 0.00029971686067442697, "loss": 47.5647, "step": 12300 }, { "epoch": 1.984, "grad_norm": 0.19648146629333496, "learning_rate": 0.0002997144605784231, "loss": 46.8397, "step": 12400 }, { "epoch": 2.0, "grad_norm": 0.1904546618461609, "learning_rate": 0.00029971206048241925, "loss": 46.2783, "step": 12500 }, { "epoch": 2.016, "grad_norm": 0.23515231907367706, "learning_rate": 0.0002997096603864154, "loss": 46.5475, "step": 12600 }, { "epoch": 2.032, "grad_norm": 0.21483579277992249, "learning_rate": 0.0002997072602904116, "loss": 44.2442, "step": 12700 }, { "epoch": 2.048, "grad_norm": 0.2563657760620117, "learning_rate": 0.00029970486019440776, "loss": 46.1955, "step": 12800 }, { "epoch": 2.064, "grad_norm": 0.20812326669692993, "learning_rate": 0.00029970246009840387, "loss": 45.5704, "step": 12900 }, { "epoch": 2.08, "grad_norm": 0.2190365344285965, "learning_rate": 0.00029970006000240004, "loss": 45.7909, "step": 13000 }, { "epoch": 2.096, "grad_norm": 0.2379041463136673, "learning_rate": 0.0002996976599063962, "loss": 46.2324, "step": 13100 }, { "epoch": 2.112, "grad_norm": 0.2170909345149994, "learning_rate": 0.0002996952598103924, "loss": 44.766, "step": 13200 }, { "epoch": 2.128, "grad_norm": 0.15927261114120483, "learning_rate": 0.00029969285971438855, "loss": 43.669, "step": 13300 }, { "epoch": 2.144, "grad_norm": 0.22271278500556946, "learning_rate": 0.0002996904596183847, "loss": 45.0739, "step": 13400 }, { "epoch": 2.16, "grad_norm": 0.17792785167694092, "learning_rate": 0.0002996880595223809, "loss": 43.8963, "step": 13500 }, { "epoch": 2.176, "grad_norm": 0.28457048535346985, "learning_rate": 0.00029968565942637706, "loss": 44.6317, "step": 13600 }, { "epoch": 2.192, "grad_norm": 0.19491800665855408, "learning_rate": 0.0002996832593303732, "loss": 43.8541, "step": 13700 }, { "epoch": 2.208, "grad_norm": 0.21633195877075195, "learning_rate": 0.00029968085923436934, "loss": 43.2844, "step": 13800 }, { "epoch": 2.224, "grad_norm": 0.2146127074956894, "learning_rate": 0.0002996784591383655, "loss": 45.0415, "step": 13900 }, { "epoch": 2.24, "grad_norm": 0.2204289436340332, "learning_rate": 0.0002996760590423617, "loss": 44.2757, "step": 14000 }, { "epoch": 2.2560000000000002, "grad_norm": 0.3051868677139282, "learning_rate": 0.00029967365894635785, "loss": 42.7227, "step": 14100 }, { "epoch": 2.2720000000000002, "grad_norm": 0.23641665279865265, "learning_rate": 0.000299671258850354, "loss": 44.0578, "step": 14200 }, { "epoch": 2.288, "grad_norm": 0.18554934859275818, "learning_rate": 0.0002996688587543502, "loss": 42.5159, "step": 14300 }, { "epoch": 2.304, "grad_norm": 0.24741467833518982, "learning_rate": 0.0002996664586583463, "loss": 42.9106, "step": 14400 }, { "epoch": 2.32, "grad_norm": 0.18483412265777588, "learning_rate": 0.00029966405856234247, "loss": 42.2459, "step": 14500 }, { "epoch": 2.336, "grad_norm": 0.24359823763370514, "learning_rate": 0.00029966165846633864, "loss": 42.6733, "step": 14600 }, { "epoch": 2.352, "grad_norm": 0.20456752181053162, "learning_rate": 0.0002996592583703348, "loss": 41.5754, "step": 14700 }, { "epoch": 2.368, "grad_norm": 0.24165822565555573, "learning_rate": 0.000299656858274331, "loss": 43.6988, "step": 14800 }, { "epoch": 2.384, "grad_norm": 0.20422741770744324, "learning_rate": 0.0002996544581783271, "loss": 41.9116, "step": 14900 }, { "epoch": 2.4, "grad_norm": 0.2413185089826584, "learning_rate": 0.00029965205808232326, "loss": 41.8573, "step": 15000 }, { "epoch": 2.416, "grad_norm": 0.20443005859851837, "learning_rate": 0.00029964968198727945, "loss": 42.3368, "step": 15100 }, { "epoch": 2.432, "grad_norm": 0.21270470321178436, "learning_rate": 0.0002996472818912756, "loss": 40.336, "step": 15200 }, { "epoch": 2.448, "grad_norm": 0.21689313650131226, "learning_rate": 0.0002996448817952718, "loss": 40.5125, "step": 15300 }, { "epoch": 2.464, "grad_norm": 0.25577059388160706, "learning_rate": 0.00029964248169926796, "loss": 40.5761, "step": 15400 }, { "epoch": 2.48, "grad_norm": 0.2624509930610657, "learning_rate": 0.0002996400816032641, "loss": 40.3047, "step": 15500 }, { "epoch": 2.496, "grad_norm": 0.225455641746521, "learning_rate": 0.00029963768150726024, "loss": 40.3576, "step": 15600 }, { "epoch": 2.512, "grad_norm": 0.18313691020011902, "learning_rate": 0.0002996352814112564, "loss": 41.113, "step": 15700 }, { "epoch": 2.528, "grad_norm": 0.21272344887256622, "learning_rate": 0.0002996328813152526, "loss": 41.2563, "step": 15800 }, { "epoch": 2.544, "grad_norm": 0.23525486886501312, "learning_rate": 0.00029963048121924875, "loss": 41.2227, "step": 15900 }, { "epoch": 2.56, "grad_norm": 0.226985365152359, "learning_rate": 0.00029962808112324487, "loss": 40.6251, "step": 16000 }, { "epoch": 2.576, "grad_norm": 0.20422585308551788, "learning_rate": 0.00029962568102724103, "loss": 40.6449, "step": 16100 }, { "epoch": 2.592, "grad_norm": 0.18906068801879883, "learning_rate": 0.0002996232809312372, "loss": 39.5927, "step": 16200 }, { "epoch": 2.608, "grad_norm": 0.21180450916290283, "learning_rate": 0.0002996208808352334, "loss": 39.7467, "step": 16300 }, { "epoch": 2.624, "grad_norm": 0.2399897575378418, "learning_rate": 0.00029961848073922954, "loss": 38.9522, "step": 16400 }, { "epoch": 2.64, "grad_norm": 0.1941596120595932, "learning_rate": 0.0002996160806432257, "loss": 39.5798, "step": 16500 }, { "epoch": 2.656, "grad_norm": 0.19715790450572968, "learning_rate": 0.0002996136805472218, "loss": 39.9061, "step": 16600 }, { "epoch": 2.672, "grad_norm": 0.22090336680412292, "learning_rate": 0.00029961128045121805, "loss": 39.6083, "step": 16700 }, { "epoch": 2.6879999999999997, "grad_norm": 0.26035964488983154, "learning_rate": 0.00029960890435617424, "loss": 39.3414, "step": 16800 }, { "epoch": 2.7039999999999997, "grad_norm": 0.21888568997383118, "learning_rate": 0.00029960650426017035, "loss": 38.3817, "step": 16900 }, { "epoch": 2.7199999999999998, "grad_norm": 0.29924601316452026, "learning_rate": 0.0002996041041641665, "loss": 38.3896, "step": 17000 }, { "epoch": 2.7359999999999998, "grad_norm": 0.20395514369010925, "learning_rate": 0.0002996017040681627, "loss": 38.8915, "step": 17100 }, { "epoch": 2.752, "grad_norm": 0.20730023086071014, "learning_rate": 0.00029959930397215886, "loss": 38.9281, "step": 17200 }, { "epoch": 2.768, "grad_norm": 0.23472309112548828, "learning_rate": 0.00029959690387615503, "loss": 39.371, "step": 17300 }, { "epoch": 2.784, "grad_norm": 0.2272721529006958, "learning_rate": 0.0002995945037801512, "loss": 38.7238, "step": 17400 }, { "epoch": 2.8, "grad_norm": 0.20280113816261292, "learning_rate": 0.0002995921036841473, "loss": 38.1639, "step": 17500 }, { "epoch": 2.816, "grad_norm": 0.21985846757888794, "learning_rate": 0.0002995897035881435, "loss": 38.2459, "step": 17600 }, { "epoch": 2.832, "grad_norm": 0.22791948914527893, "learning_rate": 0.00029958730349213965, "loss": 38.365, "step": 17700 }, { "epoch": 2.848, "grad_norm": 0.218161940574646, "learning_rate": 0.0002995849033961358, "loss": 37.7998, "step": 17800 }, { "epoch": 2.864, "grad_norm": 0.23389916121959686, "learning_rate": 0.000299582503300132, "loss": 38.0078, "step": 17900 }, { "epoch": 2.88, "grad_norm": 0.20153094828128815, "learning_rate": 0.0002995801032041281, "loss": 37.1053, "step": 18000 }, { "epoch": 2.896, "grad_norm": 0.231399804353714, "learning_rate": 0.0002995777031081243, "loss": 37.6589, "step": 18100 }, { "epoch": 2.912, "grad_norm": 0.19814245402812958, "learning_rate": 0.00029957530301212044, "loss": 36.8171, "step": 18200 }, { "epoch": 2.928, "grad_norm": 0.22390811145305634, "learning_rate": 0.0002995729029161166, "loss": 36.6616, "step": 18300 }, { "epoch": 2.944, "grad_norm": 0.19958479702472687, "learning_rate": 0.0002995705028201128, "loss": 36.0232, "step": 18400 }, { "epoch": 2.96, "grad_norm": 0.1972126066684723, "learning_rate": 0.00029956810272410895, "loss": 36.5331, "step": 18500 }, { "epoch": 2.976, "grad_norm": 0.18196193873882294, "learning_rate": 0.00029956570262810507, "loss": 36.8888, "step": 18600 }, { "epoch": 2.992, "grad_norm": 0.17047256231307983, "learning_rate": 0.00029956330253210124, "loss": 36.5987, "step": 18700 }, { "epoch": 3.008, "grad_norm": 0.22138766944408417, "learning_rate": 0.0002995609024360974, "loss": 36.2777, "step": 18800 }, { "epoch": 3.024, "grad_norm": 0.22713051736354828, "learning_rate": 0.0002995585023400936, "loss": 35.768, "step": 18900 }, { "epoch": 3.04, "grad_norm": 0.1997511237859726, "learning_rate": 0.00029955610224408974, "loss": 35.872, "step": 19000 }, { "epoch": 3.056, "grad_norm": 0.19796296954154968, "learning_rate": 0.00029955370214808586, "loss": 34.8971, "step": 19100 }, { "epoch": 3.072, "grad_norm": 0.1922471821308136, "learning_rate": 0.00029955130205208203, "loss": 35.4181, "step": 19200 }, { "epoch": 3.088, "grad_norm": 0.18493038415908813, "learning_rate": 0.0002995489019560782, "loss": 36.3712, "step": 19300 }, { "epoch": 3.104, "grad_norm": 0.22148194909095764, "learning_rate": 0.00029954650186007437, "loss": 34.5266, "step": 19400 }, { "epoch": 3.12, "grad_norm": 0.19701820611953735, "learning_rate": 0.00029954410176407054, "loss": 35.2642, "step": 19500 }, { "epoch": 3.136, "grad_norm": 0.1763058602809906, "learning_rate": 0.0002995417016680667, "loss": 36.1582, "step": 19600 }, { "epoch": 3.152, "grad_norm": 0.2792583107948303, "learning_rate": 0.0002995393015720628, "loss": 34.755, "step": 19700 }, { "epoch": 3.168, "grad_norm": 0.20418234169483185, "learning_rate": 0.00029953690147605904, "loss": 34.5373, "step": 19800 }, { "epoch": 3.184, "grad_norm": 0.24839259684085846, "learning_rate": 0.0002995345013800552, "loss": 34.5007, "step": 19900 }, { "epoch": 3.2, "grad_norm": 0.22200001776218414, "learning_rate": 0.00029953210128405133, "loss": 34.8183, "step": 20000 }, { "epoch": 3.216, "grad_norm": 0.2371726781129837, "learning_rate": 0.0002995297011880475, "loss": 34.0164, "step": 20100 }, { "epoch": 3.232, "grad_norm": 0.21370230615139008, "learning_rate": 0.00029952730109204367, "loss": 34.8268, "step": 20200 }, { "epoch": 3.248, "grad_norm": 0.20940592885017395, "learning_rate": 0.00029952490099603983, "loss": 33.8475, "step": 20300 }, { "epoch": 3.2640000000000002, "grad_norm": 0.18580414354801178, "learning_rate": 0.000299522500900036, "loss": 33.8718, "step": 20400 }, { "epoch": 3.2800000000000002, "grad_norm": 0.2200319468975067, "learning_rate": 0.0002995201008040322, "loss": 33.9083, "step": 20500 }, { "epoch": 3.296, "grad_norm": 0.18141067028045654, "learning_rate": 0.0002995177007080283, "loss": 33.2878, "step": 20600 }, { "epoch": 3.312, "grad_norm": 0.24104055762290955, "learning_rate": 0.00029951530061202446, "loss": 34.4549, "step": 20700 }, { "epoch": 3.328, "grad_norm": 0.22455894947052002, "learning_rate": 0.0002995129005160206, "loss": 33.2184, "step": 20800 }, { "epoch": 3.344, "grad_norm": 0.19662746787071228, "learning_rate": 0.0002995105244209768, "loss": 33.836, "step": 20900 }, { "epoch": 3.36, "grad_norm": 0.2322922796010971, "learning_rate": 0.000299508124324973, "loss": 33.1089, "step": 21000 }, { "epoch": 3.376, "grad_norm": 0.2140241116285324, "learning_rate": 0.0002995057482299292, "loss": 32.8205, "step": 21100 }, { "epoch": 3.392, "grad_norm": 0.19320878386497498, "learning_rate": 0.00029950334813392534, "loss": 32.8251, "step": 21200 }, { "epoch": 3.408, "grad_norm": 0.18298691511154175, "learning_rate": 0.0002995009480379215, "loss": 33.2469, "step": 21300 }, { "epoch": 3.424, "grad_norm": 0.22385163605213165, "learning_rate": 0.0002994985479419177, "loss": 32.4997, "step": 21400 }, { "epoch": 3.44, "grad_norm": 0.2047736793756485, "learning_rate": 0.0002994961478459138, "loss": 33.5516, "step": 21500 }, { "epoch": 3.456, "grad_norm": 0.242600679397583, "learning_rate": 0.00029949374774990996, "loss": 33.4754, "step": 21600 }, { "epoch": 3.472, "grad_norm": 0.21438950300216675, "learning_rate": 0.00029949134765390613, "loss": 33.2636, "step": 21700 }, { "epoch": 3.488, "grad_norm": 0.16991284489631653, "learning_rate": 0.0002994889475579023, "loss": 32.2435, "step": 21800 }, { "epoch": 3.504, "grad_norm": 0.21854659914970398, "learning_rate": 0.00029948654746189847, "loss": 32.986, "step": 21900 }, { "epoch": 3.52, "grad_norm": 0.22860901057720184, "learning_rate": 0.0002994841473658946, "loss": 32.1887, "step": 22000 }, { "epoch": 3.536, "grad_norm": 0.20433278381824493, "learning_rate": 0.00029948174726989076, "loss": 32.1502, "step": 22100 }, { "epoch": 3.552, "grad_norm": 0.19475246965885162, "learning_rate": 0.0002994793471738869, "loss": 32.0844, "step": 22200 }, { "epoch": 3.568, "grad_norm": 0.20006608963012695, "learning_rate": 0.0002994769470778831, "loss": 32.5956, "step": 22300 }, { "epoch": 3.584, "grad_norm": 0.17535006999969482, "learning_rate": 0.00029947454698187926, "loss": 32.1812, "step": 22400 }, { "epoch": 3.6, "grad_norm": 0.22252418100833893, "learning_rate": 0.00029947214688587543, "loss": 30.6041, "step": 22500 }, { "epoch": 3.616, "grad_norm": 0.18110983073711395, "learning_rate": 0.00029946974678987155, "loss": 31.7236, "step": 22600 }, { "epoch": 3.632, "grad_norm": 0.227754145860672, "learning_rate": 0.0002994673466938677, "loss": 31.2323, "step": 22700 }, { "epoch": 3.648, "grad_norm": 0.19320198893547058, "learning_rate": 0.0002994649465978639, "loss": 31.4608, "step": 22800 }, { "epoch": 3.664, "grad_norm": 0.17932754755020142, "learning_rate": 0.00029946254650186006, "loss": 31.9613, "step": 22900 }, { "epoch": 3.68, "grad_norm": 0.19677236676216125, "learning_rate": 0.0002994601464058562, "loss": 30.9284, "step": 23000 }, { "epoch": 3.6959999999999997, "grad_norm": 0.22562915086746216, "learning_rate": 0.00029945774630985234, "loss": 30.7692, "step": 23100 }, { "epoch": 3.7119999999999997, "grad_norm": 0.19202880561351776, "learning_rate": 0.0002994553462138485, "loss": 31.2991, "step": 23200 }, { "epoch": 3.7279999999999998, "grad_norm": 0.22251880168914795, "learning_rate": 0.0002994529461178447, "loss": 29.574, "step": 23300 }, { "epoch": 3.7439999999999998, "grad_norm": 0.18705110251903534, "learning_rate": 0.00029945054602184085, "loss": 30.2693, "step": 23400 }, { "epoch": 3.76, "grad_norm": 0.18061533570289612, "learning_rate": 0.000299448145925837, "loss": 30.0086, "step": 23500 }, { "epoch": 3.776, "grad_norm": 0.23449186980724335, "learning_rate": 0.0002994457458298332, "loss": 29.9262, "step": 23600 }, { "epoch": 3.792, "grad_norm": 0.20259559154510498, "learning_rate": 0.0002994433457338293, "loss": 30.0139, "step": 23700 }, { "epoch": 3.808, "grad_norm": 0.21019335091114044, "learning_rate": 0.00029944094563782547, "loss": 30.853, "step": 23800 }, { "epoch": 3.824, "grad_norm": 0.17927643656730652, "learning_rate": 0.00029943854554182164, "loss": 30.7392, "step": 23900 }, { "epoch": 3.84, "grad_norm": 0.18862564861774445, "learning_rate": 0.0002994361454458178, "loss": 29.3096, "step": 24000 }, { "epoch": 3.856, "grad_norm": 0.22294782102108002, "learning_rate": 0.000299433745349814, "loss": 30.2642, "step": 24100 }, { "epoch": 3.872, "grad_norm": 0.20843671262264252, "learning_rate": 0.0002994313452538101, "loss": 29.4115, "step": 24200 }, { "epoch": 3.888, "grad_norm": 0.19081708788871765, "learning_rate": 0.00029942894515780626, "loss": 30.0382, "step": 24300 }, { "epoch": 3.904, "grad_norm": 0.18849343061447144, "learning_rate": 0.00029942654506180243, "loss": 29.6371, "step": 24400 }, { "epoch": 3.92, "grad_norm": 0.2084178924560547, "learning_rate": 0.0002994241449657986, "loss": 29.5353, "step": 24500 }, { "epoch": 3.936, "grad_norm": 0.179380401968956, "learning_rate": 0.00029942174486979477, "loss": 29.1119, "step": 24600 }, { "epoch": 3.952, "grad_norm": 0.2312467098236084, "learning_rate": 0.00029941934477379094, "loss": 29.3352, "step": 24700 }, { "epoch": 3.968, "grad_norm": 0.19268761575222015, "learning_rate": 0.00029941694467778705, "loss": 29.1584, "step": 24800 }, { "epoch": 3.984, "grad_norm": 0.19523601233959198, "learning_rate": 0.0002994145445817832, "loss": 29.3122, "step": 24900 }, { "epoch": 4.0, "grad_norm": 0.18007320165634155, "learning_rate": 0.0002994121444857794, "loss": 29.1468, "step": 25000 }, { "epoch": 4.016, "grad_norm": 0.19717352092266083, "learning_rate": 0.00029940974438977556, "loss": 29.2291, "step": 25100 }, { "epoch": 4.032, "grad_norm": 0.18931248784065247, "learning_rate": 0.00029940736829473175, "loss": 28.4476, "step": 25200 }, { "epoch": 4.048, "grad_norm": 0.17574016749858856, "learning_rate": 0.0002994049681987279, "loss": 27.6189, "step": 25300 }, { "epoch": 4.064, "grad_norm": 0.19395378232002258, "learning_rate": 0.0002994025681027241, "loss": 28.3701, "step": 25400 }, { "epoch": 4.08, "grad_norm": 0.1916889250278473, "learning_rate": 0.00029940016800672026, "loss": 28.3605, "step": 25500 }, { "epoch": 4.096, "grad_norm": 0.229524627327919, "learning_rate": 0.0002993977679107164, "loss": 27.7045, "step": 25600 }, { "epoch": 4.112, "grad_norm": 0.191976860165596, "learning_rate": 0.00029939536781471254, "loss": 27.6015, "step": 25700 }, { "epoch": 4.128, "grad_norm": 0.20611730217933655, "learning_rate": 0.0002993929917196688, "loss": 27.3844, "step": 25800 }, { "epoch": 4.144, "grad_norm": 0.21954050660133362, "learning_rate": 0.00029939059162366495, "loss": 27.6474, "step": 25900 }, { "epoch": 4.16, "grad_norm": 0.23369371891021729, "learning_rate": 0.00029938819152766107, "loss": 27.0846, "step": 26000 }, { "epoch": 4.176, "grad_norm": 0.19088931381702423, "learning_rate": 0.00029938579143165724, "loss": 27.0919, "step": 26100 }, { "epoch": 4.192, "grad_norm": 0.16385389864444733, "learning_rate": 0.0002993833913356534, "loss": 26.7928, "step": 26200 }, { "epoch": 4.208, "grad_norm": 0.22816230356693268, "learning_rate": 0.0002993809912396496, "loss": 26.597, "step": 26300 }, { "epoch": 4.224, "grad_norm": 0.22640523314476013, "learning_rate": 0.00029937859114364574, "loss": 26.6011, "step": 26400 }, { "epoch": 4.24, "grad_norm": 0.18119996786117554, "learning_rate": 0.0002993761910476419, "loss": 26.8414, "step": 26500 }, { "epoch": 4.256, "grad_norm": 0.2026926428079605, "learning_rate": 0.00029937379095163803, "loss": 26.9172, "step": 26600 }, { "epoch": 4.272, "grad_norm": 0.20275373756885529, "learning_rate": 0.0002993713908556342, "loss": 26.6568, "step": 26700 }, { "epoch": 4.288, "grad_norm": 0.2261670082807541, "learning_rate": 0.00029936899075963037, "loss": 27.1839, "step": 26800 }, { "epoch": 4.304, "grad_norm": 0.18411505222320557, "learning_rate": 0.00029936659066362654, "loss": 26.4785, "step": 26900 }, { "epoch": 4.32, "grad_norm": 0.2916317582130432, "learning_rate": 0.0002993641905676227, "loss": 26.5309, "step": 27000 }, { "epoch": 4.336, "grad_norm": 0.18537244200706482, "learning_rate": 0.0002993617904716188, "loss": 27.1665, "step": 27100 }, { "epoch": 4.352, "grad_norm": 0.16285920143127441, "learning_rate": 0.000299359390375615, "loss": 27.2424, "step": 27200 }, { "epoch": 4.368, "grad_norm": 0.15773992240428925, "learning_rate": 0.00029935699027961116, "loss": 26.5359, "step": 27300 }, { "epoch": 4.384, "grad_norm": 0.18703384697437286, "learning_rate": 0.00029935459018360733, "loss": 27.342, "step": 27400 }, { "epoch": 4.4, "grad_norm": 0.18335498869419098, "learning_rate": 0.0002993521900876035, "loss": 27.0257, "step": 27500 }, { "epoch": 4.416, "grad_norm": 0.19414934515953064, "learning_rate": 0.00029934978999159967, "loss": 26.2998, "step": 27600 }, { "epoch": 4.432, "grad_norm": 0.20599210262298584, "learning_rate": 0.0002993473898955958, "loss": 25.9369, "step": 27700 }, { "epoch": 4.448, "grad_norm": 0.27044299244880676, "learning_rate": 0.00029934498979959195, "loss": 26.4132, "step": 27800 }, { "epoch": 4.464, "grad_norm": 0.22304300963878632, "learning_rate": 0.0002993425897035881, "loss": 26.2685, "step": 27900 }, { "epoch": 4.48, "grad_norm": 0.20784711837768555, "learning_rate": 0.0002993401896075843, "loss": 25.336, "step": 28000 }, { "epoch": 4.496, "grad_norm": 0.2017608880996704, "learning_rate": 0.00029933778951158046, "loss": 26.1331, "step": 28100 }, { "epoch": 4.5120000000000005, "grad_norm": 0.18563418090343475, "learning_rate": 0.0002993353894155766, "loss": 25.6813, "step": 28200 }, { "epoch": 4.5280000000000005, "grad_norm": 0.21515151858329773, "learning_rate": 0.00029933298931957274, "loss": 26.2951, "step": 28300 }, { "epoch": 4.5440000000000005, "grad_norm": 0.20512834191322327, "learning_rate": 0.0002993305892235689, "loss": 25.2256, "step": 28400 }, { "epoch": 4.5600000000000005, "grad_norm": 0.23129431903362274, "learning_rate": 0.0002993281891275651, "loss": 25.7071, "step": 28500 }, { "epoch": 4.576, "grad_norm": 0.18308007717132568, "learning_rate": 0.00029932578903156125, "loss": 25.5192, "step": 28600 }, { "epoch": 4.592, "grad_norm": 0.217178076505661, "learning_rate": 0.0002993233889355574, "loss": 25.349, "step": 28700 }, { "epoch": 4.608, "grad_norm": 0.18590569496154785, "learning_rate": 0.00029932098883955353, "loss": 25.2593, "step": 28800 }, { "epoch": 4.624, "grad_norm": 0.20052315294742584, "learning_rate": 0.0002993185887435497, "loss": 24.8334, "step": 28900 }, { "epoch": 4.64, "grad_norm": 0.21725590527057648, "learning_rate": 0.00029931621264850595, "loss": 24.6134, "step": 29000 }, { "epoch": 4.656, "grad_norm": 0.23973499238491058, "learning_rate": 0.00029931381255250206, "loss": 24.8209, "step": 29100 }, { "epoch": 4.672, "grad_norm": 0.20804470777511597, "learning_rate": 0.00029931141245649823, "loss": 25.0912, "step": 29200 }, { "epoch": 4.688, "grad_norm": 0.17555804550647736, "learning_rate": 0.0002993090363614544, "loss": 25.1723, "step": 29300 }, { "epoch": 4.704, "grad_norm": 0.17459039390087128, "learning_rate": 0.0002993066362654506, "loss": 24.5282, "step": 29400 }, { "epoch": 4.72, "grad_norm": 0.211078941822052, "learning_rate": 0.00029930423616944676, "loss": 24.6043, "step": 29500 }, { "epoch": 4.736, "grad_norm": 0.16957704722881317, "learning_rate": 0.0002993018360734429, "loss": 24.7947, "step": 29600 }, { "epoch": 4.752, "grad_norm": 0.2855212092399597, "learning_rate": 0.00029929943597743904, "loss": 24.5785, "step": 29700 }, { "epoch": 4.768, "grad_norm": 0.19777260720729828, "learning_rate": 0.0002992970358814352, "loss": 24.4989, "step": 29800 }, { "epoch": 4.784, "grad_norm": 0.17237554490566254, "learning_rate": 0.0002992946357854314, "loss": 24.6684, "step": 29900 }, { "epoch": 4.8, "grad_norm": 0.1824658066034317, "learning_rate": 0.00029929223568942755, "loss": 24.934, "step": 30000 }, { "epoch": 4.816, "grad_norm": 0.19774967432022095, "learning_rate": 0.0002992898355934237, "loss": 24.4343, "step": 30100 }, { "epoch": 4.832, "grad_norm": 0.2127138376235962, "learning_rate": 0.00029928743549741983, "loss": 24.7444, "step": 30200 }, { "epoch": 4.848, "grad_norm": 0.21794643998146057, "learning_rate": 0.000299285035401416, "loss": 25.2811, "step": 30300 }, { "epoch": 4.864, "grad_norm": 0.178062304854393, "learning_rate": 0.00029928263530541217, "loss": 24.9453, "step": 30400 }, { "epoch": 4.88, "grad_norm": 0.22796912491321564, "learning_rate": 0.00029928023520940834, "loss": 23.9367, "step": 30500 }, { "epoch": 4.896, "grad_norm": 0.18951456248760223, "learning_rate": 0.0002992778351134045, "loss": 23.7658, "step": 30600 }, { "epoch": 4.912, "grad_norm": 0.24202126264572144, "learning_rate": 0.0002992754350174007, "loss": 23.9004, "step": 30700 }, { "epoch": 4.928, "grad_norm": 0.19269002974033356, "learning_rate": 0.0002992730349213968, "loss": 23.2493, "step": 30800 }, { "epoch": 4.944, "grad_norm": 0.1657482087612152, "learning_rate": 0.00029927063482539296, "loss": 23.8883, "step": 30900 }, { "epoch": 4.96, "grad_norm": 0.151734858751297, "learning_rate": 0.00029926823472938913, "loss": 23.7884, "step": 31000 }, { "epoch": 4.976, "grad_norm": 0.2854020595550537, "learning_rate": 0.0002992658346333853, "loss": 24.1054, "step": 31100 }, { "epoch": 4.992, "grad_norm": 0.17750577628612518, "learning_rate": 0.00029926343453738147, "loss": 23.6583, "step": 31200 }, { "epoch": 5.008, "grad_norm": 0.17882367968559265, "learning_rate": 0.00029926103444137764, "loss": 23.4828, "step": 31300 }, { "epoch": 5.024, "grad_norm": 0.17182889580726624, "learning_rate": 0.0002992586343453738, "loss": 22.8774, "step": 31400 }, { "epoch": 5.04, "grad_norm": 0.20355378091335297, "learning_rate": 0.00029925623424937, "loss": 23.3064, "step": 31500 }, { "epoch": 5.056, "grad_norm": 0.21614141762256622, "learning_rate": 0.00029925383415336615, "loss": 22.8978, "step": 31600 }, { "epoch": 5.072, "grad_norm": 0.20654118061065674, "learning_rate": 0.00029925143405736226, "loss": 24.0182, "step": 31700 }, { "epoch": 5.088, "grad_norm": 0.17882691323757172, "learning_rate": 0.00029924903396135843, "loss": 22.8556, "step": 31800 }, { "epoch": 5.104, "grad_norm": 0.16477125883102417, "learning_rate": 0.0002992466338653546, "loss": 22.63, "step": 31900 }, { "epoch": 5.12, "grad_norm": 0.15241862833499908, "learning_rate": 0.00029924423376935077, "loss": 22.9513, "step": 32000 }, { "epoch": 5.136, "grad_norm": 0.17560409009456635, "learning_rate": 0.00029924183367334694, "loss": 22.808, "step": 32100 }, { "epoch": 5.152, "grad_norm": 0.18167634308338165, "learning_rate": 0.00029923943357734305, "loss": 23.0177, "step": 32200 }, { "epoch": 5.168, "grad_norm": 0.18328386545181274, "learning_rate": 0.0002992370334813392, "loss": 22.5144, "step": 32300 }, { "epoch": 5.184, "grad_norm": 0.20202048122882843, "learning_rate": 0.0002992346333853354, "loss": 23.1037, "step": 32400 }, { "epoch": 5.2, "grad_norm": 0.20026326179504395, "learning_rate": 0.00029923223328933156, "loss": 22.3593, "step": 32500 }, { "epoch": 5.216, "grad_norm": 0.1727285534143448, "learning_rate": 0.00029922983319332773, "loss": 22.214, "step": 32600 }, { "epoch": 5.232, "grad_norm": 0.1824960708618164, "learning_rate": 0.0002992274330973239, "loss": 22.2179, "step": 32700 }, { "epoch": 5.248, "grad_norm": 0.19371069967746735, "learning_rate": 0.00029922503300132, "loss": 22.453, "step": 32800 }, { "epoch": 5.264, "grad_norm": 0.22930407524108887, "learning_rate": 0.0002992226329053162, "loss": 22.1665, "step": 32900 }, { "epoch": 5.28, "grad_norm": 0.20372043550014496, "learning_rate": 0.00029922023280931235, "loss": 22.1181, "step": 33000 }, { "epoch": 5.296, "grad_norm": 0.20339564979076385, "learning_rate": 0.0002992178327133085, "loss": 22.5446, "step": 33100 }, { "epoch": 5.312, "grad_norm": 0.2182660847902298, "learning_rate": 0.0002992154326173047, "loss": 22.3062, "step": 33200 }, { "epoch": 5.328, "grad_norm": 0.18666419386863708, "learning_rate": 0.0002992130325213008, "loss": 22.0127, "step": 33300 }, { "epoch": 5.344, "grad_norm": 0.2193373292684555, "learning_rate": 0.000299210632425297, "loss": 22.1167, "step": 33400 }, { "epoch": 5.36, "grad_norm": 0.19642606377601624, "learning_rate": 0.00029920823232929315, "loss": 21.8393, "step": 33500 }, { "epoch": 5.376, "grad_norm": 0.24106252193450928, "learning_rate": 0.0002992058322332893, "loss": 21.7386, "step": 33600 }, { "epoch": 5.392, "grad_norm": 0.17611666023731232, "learning_rate": 0.0002992034321372855, "loss": 22.1787, "step": 33700 }, { "epoch": 5.408, "grad_norm": 0.23640978336334229, "learning_rate": 0.00029920103204128165, "loss": 21.5912, "step": 33800 }, { "epoch": 5.424, "grad_norm": 0.19579695165157318, "learning_rate": 0.00029919863194527777, "loss": 22.1147, "step": 33900 }, { "epoch": 5.44, "grad_norm": 0.18251273036003113, "learning_rate": 0.00029919623184927394, "loss": 21.8284, "step": 34000 }, { "epoch": 5.456, "grad_norm": 0.2099759876728058, "learning_rate": 0.0002991938317532701, "loss": 21.5234, "step": 34100 }, { "epoch": 5.4719999999999995, "grad_norm": 0.21391774713993073, "learning_rate": 0.0002991914316572663, "loss": 21.1876, "step": 34200 }, { "epoch": 5.4879999999999995, "grad_norm": 0.17656175792217255, "learning_rate": 0.00029918903156126244, "loss": 21.7905, "step": 34300 }, { "epoch": 5.504, "grad_norm": 0.1752483993768692, "learning_rate": 0.00029918663146525856, "loss": 20.9481, "step": 34400 }, { "epoch": 5.52, "grad_norm": 0.29879820346832275, "learning_rate": 0.00029918423136925473, "loss": 21.2073, "step": 34500 }, { "epoch": 5.536, "grad_norm": 0.1947035789489746, "learning_rate": 0.0002991818312732509, "loss": 21.0199, "step": 34600 }, { "epoch": 5.552, "grad_norm": 0.15402550995349884, "learning_rate": 0.00029917943117724707, "loss": 21.4862, "step": 34700 }, { "epoch": 5.568, "grad_norm": 0.21479055285453796, "learning_rate": 0.00029917703108124324, "loss": 20.3479, "step": 34800 }, { "epoch": 5.584, "grad_norm": 0.15968792140483856, "learning_rate": 0.0002991746309852394, "loss": 20.8151, "step": 34900 }, { "epoch": 5.6, "grad_norm": 0.16876402497291565, "learning_rate": 0.0002991722308892355, "loss": 21.8482, "step": 35000 }, { "epoch": 5.616, "grad_norm": 0.16191044449806213, "learning_rate": 0.0002991698307932317, "loss": 21.4486, "step": 35100 }, { "epoch": 5.632, "grad_norm": 0.20595960319042206, "learning_rate": 0.00029916743069722786, "loss": 21.7225, "step": 35200 }, { "epoch": 5.648, "grad_norm": 0.1939288079738617, "learning_rate": 0.00029916503060122403, "loss": 21.0107, "step": 35300 }, { "epoch": 5.664, "grad_norm": 0.20212168991565704, "learning_rate": 0.0002991626305052202, "loss": 20.4026, "step": 35400 }, { "epoch": 5.68, "grad_norm": 0.1956707388162613, "learning_rate": 0.0002991602544101764, "loss": 20.9491, "step": 35500 }, { "epoch": 5.696, "grad_norm": 0.22702528536319733, "learning_rate": 0.00029915785431417256, "loss": 21.12, "step": 35600 }, { "epoch": 5.712, "grad_norm": 0.19706673920154572, "learning_rate": 0.00029915547821912874, "loss": 21.5166, "step": 35700 }, { "epoch": 5.728, "grad_norm": 0.18108151853084564, "learning_rate": 0.0002991530781231249, "loss": 20.4059, "step": 35800 }, { "epoch": 5.744, "grad_norm": 0.1714268922805786, "learning_rate": 0.00029915067802712103, "loss": 20.2456, "step": 35900 }, { "epoch": 5.76, "grad_norm": 0.1415804773569107, "learning_rate": 0.0002991482779311172, "loss": 20.3176, "step": 36000 }, { "epoch": 5.776, "grad_norm": 0.1928543597459793, "learning_rate": 0.00029914587783511337, "loss": 20.797, "step": 36100 }, { "epoch": 5.792, "grad_norm": 0.17042042315006256, "learning_rate": 0.00029914347773910954, "loss": 20.2684, "step": 36200 }, { "epoch": 5.808, "grad_norm": 0.1929057389497757, "learning_rate": 0.0002991410776431057, "loss": 19.7169, "step": 36300 }, { "epoch": 5.824, "grad_norm": 0.19770380854606628, "learning_rate": 0.0002991386775471018, "loss": 20.3972, "step": 36400 }, { "epoch": 5.84, "grad_norm": 0.19927264750003815, "learning_rate": 0.000299136277451098, "loss": 20.3105, "step": 36500 }, { "epoch": 5.856, "grad_norm": 0.2222350686788559, "learning_rate": 0.00029913387735509416, "loss": 20.3396, "step": 36600 }, { "epoch": 5.872, "grad_norm": 0.15629681944847107, "learning_rate": 0.00029913147725909033, "loss": 19.7281, "step": 36700 }, { "epoch": 5.888, "grad_norm": 0.1714082509279251, "learning_rate": 0.0002991290771630865, "loss": 20.2121, "step": 36800 }, { "epoch": 5.904, "grad_norm": 0.19152860343456268, "learning_rate": 0.00029912667706708267, "loss": 20.3316, "step": 36900 }, { "epoch": 5.92, "grad_norm": 0.18097779154777527, "learning_rate": 0.0002991242769710788, "loss": 19.9225, "step": 37000 }, { "epoch": 5.936, "grad_norm": 0.21503089368343353, "learning_rate": 0.00029912187687507495, "loss": 20.3151, "step": 37100 }, { "epoch": 5.952, "grad_norm": 0.16976934671401978, "learning_rate": 0.0002991194767790711, "loss": 20.4782, "step": 37200 }, { "epoch": 5.968, "grad_norm": 0.1788826435804367, "learning_rate": 0.0002991170766830673, "loss": 19.616, "step": 37300 }, { "epoch": 5.984, "grad_norm": 0.17762643098831177, "learning_rate": 0.00029911467658706346, "loss": 19.4074, "step": 37400 }, { "epoch": 6.0, "grad_norm": 0.19231481850147247, "learning_rate": 0.0002991122764910596, "loss": 19.3966, "step": 37500 }, { "epoch": 6.016, "grad_norm": 0.2067825198173523, "learning_rate": 0.0002991098763950558, "loss": 19.6924, "step": 37600 }, { "epoch": 6.032, "grad_norm": 0.1930302083492279, "learning_rate": 0.00029910747629905196, "loss": 19.765, "step": 37700 }, { "epoch": 6.048, "grad_norm": 0.2076890915632248, "learning_rate": 0.00029910507620304813, "loss": 19.0516, "step": 37800 }, { "epoch": 6.064, "grad_norm": 0.2006111741065979, "learning_rate": 0.00029910267610704425, "loss": 19.1025, "step": 37900 }, { "epoch": 6.08, "grad_norm": 0.1836411952972412, "learning_rate": 0.0002991002760110404, "loss": 19.3714, "step": 38000 }, { "epoch": 6.096, "grad_norm": 0.1817934662103653, "learning_rate": 0.0002990978759150366, "loss": 19.1752, "step": 38100 }, { "epoch": 6.112, "grad_norm": 0.18150608241558075, "learning_rate": 0.00029909547581903276, "loss": 19.5865, "step": 38200 }, { "epoch": 6.128, "grad_norm": 0.3108033835887909, "learning_rate": 0.0002990930757230289, "loss": 19.3632, "step": 38300 }, { "epoch": 6.144, "grad_norm": 0.18861189484596252, "learning_rate": 0.00029909067562702504, "loss": 19.9617, "step": 38400 }, { "epoch": 6.16, "grad_norm": 0.16909874975681305, "learning_rate": 0.0002990882755310212, "loss": 19.8722, "step": 38500 }, { "epoch": 6.176, "grad_norm": 0.16401100158691406, "learning_rate": 0.0002990858754350174, "loss": 19.3652, "step": 38600 }, { "epoch": 6.192, "grad_norm": 0.17053301632404327, "learning_rate": 0.00029908347533901355, "loss": 19.4264, "step": 38700 }, { "epoch": 6.208, "grad_norm": 0.18607936799526215, "learning_rate": 0.0002990810752430097, "loss": 19.3128, "step": 38800 }, { "epoch": 6.224, "grad_norm": 0.2513495087623596, "learning_rate": 0.0002990786751470059, "loss": 20.1134, "step": 38900 }, { "epoch": 6.24, "grad_norm": 0.21938976645469666, "learning_rate": 0.000299076275051002, "loss": 19.5682, "step": 39000 }, { "epoch": 6.256, "grad_norm": 0.21253296732902527, "learning_rate": 0.00029907387495499817, "loss": 18.7325, "step": 39100 }, { "epoch": 6.272, "grad_norm": 0.21298116445541382, "learning_rate": 0.00029907147485899434, "loss": 19.0698, "step": 39200 }, { "epoch": 6.288, "grad_norm": 0.17804065346717834, "learning_rate": 0.0002990690747629905, "loss": 18.3022, "step": 39300 }, { "epoch": 6.304, "grad_norm": 0.31990084052085876, "learning_rate": 0.0002990666986679467, "loss": 18.9093, "step": 39400 }, { "epoch": 6.32, "grad_norm": 0.17742526531219482, "learning_rate": 0.0002990642985719428, "loss": 18.6614, "step": 39500 }, { "epoch": 6.336, "grad_norm": 0.20601534843444824, "learning_rate": 0.000299061898475939, "loss": 19.6871, "step": 39600 }, { "epoch": 6.352, "grad_norm": 0.16021846234798431, "learning_rate": 0.00029905949837993515, "loss": 18.6417, "step": 39700 }, { "epoch": 6.368, "grad_norm": 0.1588086634874344, "learning_rate": 0.0002990570982839313, "loss": 18.3146, "step": 39800 }, { "epoch": 6.384, "grad_norm": 0.21372877061367035, "learning_rate": 0.0002990546981879275, "loss": 19.0519, "step": 39900 }, { "epoch": 6.4, "grad_norm": 0.18066450953483582, "learning_rate": 0.00029905229809192366, "loss": 19.2848, "step": 40000 }, { "epoch": 6.416, "grad_norm": 0.23790153861045837, "learning_rate": 0.0002990498979959198, "loss": 18.7495, "step": 40100 }, { "epoch": 6.432, "grad_norm": 0.21764115989208221, "learning_rate": 0.00029904749789991594, "loss": 18.5835, "step": 40200 }, { "epoch": 6.448, "grad_norm": 0.18615952134132385, "learning_rate": 0.0002990450978039121, "loss": 17.9751, "step": 40300 }, { "epoch": 6.464, "grad_norm": 0.1657874882221222, "learning_rate": 0.0002990426977079083, "loss": 18.5635, "step": 40400 }, { "epoch": 6.48, "grad_norm": 0.3158019185066223, "learning_rate": 0.00029904029761190445, "loss": 18.6618, "step": 40500 }, { "epoch": 6.496, "grad_norm": 0.2320430427789688, "learning_rate": 0.0002990378975159006, "loss": 18.2968, "step": 40600 }, { "epoch": 6.5120000000000005, "grad_norm": 0.20868684351444244, "learning_rate": 0.0002990354974198968, "loss": 18.595, "step": 40700 }, { "epoch": 6.5280000000000005, "grad_norm": 0.2185734063386917, "learning_rate": 0.00029903309732389296, "loss": 17.9672, "step": 40800 }, { "epoch": 6.5440000000000005, "grad_norm": 0.22871826589107513, "learning_rate": 0.0002990306972278891, "loss": 18.0843, "step": 40900 }, { "epoch": 6.5600000000000005, "grad_norm": 0.16801375150680542, "learning_rate": 0.00029902829713188524, "loss": 18.138, "step": 41000 }, { "epoch": 6.576, "grad_norm": 0.17401717603206635, "learning_rate": 0.0002990258970358814, "loss": 18.7431, "step": 41100 }, { "epoch": 6.592, "grad_norm": 0.17664673924446106, "learning_rate": 0.0002990234969398776, "loss": 17.966, "step": 41200 }, { "epoch": 6.608, "grad_norm": 0.2024875283241272, "learning_rate": 0.00029902109684387375, "loss": 17.9339, "step": 41300 }, { "epoch": 6.624, "grad_norm": 0.19322896003723145, "learning_rate": 0.0002990186967478699, "loss": 18.5554, "step": 41400 }, { "epoch": 6.64, "grad_norm": 0.2797154188156128, "learning_rate": 0.00029901629665186603, "loss": 17.5192, "step": 41500 }, { "epoch": 6.656, "grad_norm": 0.2197944074869156, "learning_rate": 0.0002990138965558622, "loss": 18.4582, "step": 41600 }, { "epoch": 6.672, "grad_norm": 0.18805234134197235, "learning_rate": 0.00029901149645985837, "loss": 17.9245, "step": 41700 }, { "epoch": 6.688, "grad_norm": 0.14986388385295868, "learning_rate": 0.00029900909636385454, "loss": 17.7746, "step": 41800 }, { "epoch": 6.704, "grad_norm": 0.26323381066322327, "learning_rate": 0.0002990066962678507, "loss": 17.6134, "step": 41900 }, { "epoch": 6.72, "grad_norm": 0.1791141778230667, "learning_rate": 0.0002990042961718469, "loss": 17.7648, "step": 42000 }, { "epoch": 6.736, "grad_norm": 0.22629794478416443, "learning_rate": 0.000299001920076803, "loss": 18.2337, "step": 42100 }, { "epoch": 6.752, "grad_norm": 0.17983581125736237, "learning_rate": 0.0002989995199807992, "loss": 17.4193, "step": 42200 }, { "epoch": 6.768, "grad_norm": 0.17379482090473175, "learning_rate": 0.00029899711988479535, "loss": 17.9815, "step": 42300 }, { "epoch": 6.784, "grad_norm": 0.2074684351682663, "learning_rate": 0.0002989947197887915, "loss": 17.898, "step": 42400 }, { "epoch": 6.8, "grad_norm": 0.16909289360046387, "learning_rate": 0.0002989923196927877, "loss": 17.7292, "step": 42500 }, { "epoch": 6.816, "grad_norm": 0.184371218085289, "learning_rate": 0.00029898991959678386, "loss": 18.0706, "step": 42600 }, { "epoch": 6.832, "grad_norm": 0.17724382877349854, "learning_rate": 0.00029898751950078, "loss": 17.9871, "step": 42700 }, { "epoch": 6.848, "grad_norm": 0.2286718785762787, "learning_rate": 0.00029898511940477614, "loss": 17.5911, "step": 42800 }, { "epoch": 6.864, "grad_norm": 0.2002006471157074, "learning_rate": 0.0002989827193087723, "loss": 17.4336, "step": 42900 }, { "epoch": 6.88, "grad_norm": 0.20236457884311676, "learning_rate": 0.0002989803192127685, "loss": 17.0849, "step": 43000 }, { "epoch": 6.896, "grad_norm": 0.23483681678771973, "learning_rate": 0.00029897791911676465, "loss": 17.7893, "step": 43100 }, { "epoch": 6.912, "grad_norm": 0.18751464784145355, "learning_rate": 0.00029897551902076077, "loss": 17.4798, "step": 43200 }, { "epoch": 6.928, "grad_norm": 0.17341011762619019, "learning_rate": 0.00029897311892475694, "loss": 17.7278, "step": 43300 }, { "epoch": 6.944, "grad_norm": 0.15160439908504486, "learning_rate": 0.0002989707188287531, "loss": 17.4948, "step": 43400 }, { "epoch": 6.96, "grad_norm": 0.19316324591636658, "learning_rate": 0.0002989683187327493, "loss": 17.3409, "step": 43500 }, { "epoch": 6.976, "grad_norm": 0.1800646036863327, "learning_rate": 0.00029896591863674544, "loss": 17.5152, "step": 43600 }, { "epoch": 6.992, "grad_norm": 0.19359643757343292, "learning_rate": 0.0002989635185407416, "loss": 17.2701, "step": 43700 }, { "epoch": 7.008, "grad_norm": 0.21103709936141968, "learning_rate": 0.0002989611184447378, "loss": 17.0028, "step": 43800 }, { "epoch": 7.024, "grad_norm": 0.18972234427928925, "learning_rate": 0.00029895871834873395, "loss": 16.8714, "step": 43900 }, { "epoch": 7.04, "grad_norm": 0.16335220634937286, "learning_rate": 0.0002989563182527301, "loss": 17.1409, "step": 44000 }, { "epoch": 7.056, "grad_norm": 0.16595561802387238, "learning_rate": 0.00029895391815672624, "loss": 17.1677, "step": 44100 }, { "epoch": 7.072, "grad_norm": 0.1885690540075302, "learning_rate": 0.0002989515180607224, "loss": 17.1327, "step": 44200 }, { "epoch": 7.088, "grad_norm": 0.16525697708129883, "learning_rate": 0.0002989491179647186, "loss": 17.0265, "step": 44300 }, { "epoch": 7.104, "grad_norm": 0.17798613011837006, "learning_rate": 0.00029894671786871474, "loss": 16.5858, "step": 44400 }, { "epoch": 7.12, "grad_norm": 0.17442761361598969, "learning_rate": 0.0002989443177727109, "loss": 16.7029, "step": 44500 }, { "epoch": 7.136, "grad_norm": 0.17014281451702118, "learning_rate": 0.0002989419176767071, "loss": 16.3283, "step": 44600 }, { "epoch": 7.152, "grad_norm": 0.21125547587871552, "learning_rate": 0.0002989395175807032, "loss": 17.0964, "step": 44700 }, { "epoch": 7.168, "grad_norm": 0.15473531186580658, "learning_rate": 0.00029893711748469937, "loss": 17.2634, "step": 44800 }, { "epoch": 7.184, "grad_norm": 0.22423428297042847, "learning_rate": 0.00029893471738869553, "loss": 16.6492, "step": 44900 }, { "epoch": 7.2, "grad_norm": 0.23651999235153198, "learning_rate": 0.0002989323172926917, "loss": 17.2672, "step": 45000 }, { "epoch": 7.216, "grad_norm": 0.18389280140399933, "learning_rate": 0.00029892991719668787, "loss": 16.3061, "step": 45100 }, { "epoch": 7.232, "grad_norm": 0.19786329567432404, "learning_rate": 0.000298927517100684, "loss": 16.7178, "step": 45200 }, { "epoch": 7.248, "grad_norm": 0.1748264580965042, "learning_rate": 0.00029892511700468016, "loss": 16.8728, "step": 45300 }, { "epoch": 7.264, "grad_norm": 0.17337900400161743, "learning_rate": 0.0002989227169086763, "loss": 16.143, "step": 45400 }, { "epoch": 7.28, "grad_norm": 0.1627172827720642, "learning_rate": 0.0002989203168126725, "loss": 16.677, "step": 45500 }, { "epoch": 7.296, "grad_norm": 0.18607047200202942, "learning_rate": 0.00029891791671666866, "loss": 16.6493, "step": 45600 }, { "epoch": 7.312, "grad_norm": 0.17733363807201385, "learning_rate": 0.00029891551662066483, "loss": 16.8518, "step": 45700 }, { "epoch": 7.328, "grad_norm": 0.17257067561149597, "learning_rate": 0.00029891311652466095, "loss": 16.7963, "step": 45800 }, { "epoch": 7.344, "grad_norm": 0.22989864647388458, "learning_rate": 0.0002989107164286571, "loss": 16.6846, "step": 45900 }, { "epoch": 7.36, "grad_norm": 0.1924850195646286, "learning_rate": 0.0002989083163326533, "loss": 16.7258, "step": 46000 }, { "epoch": 7.376, "grad_norm": 0.15162524580955505, "learning_rate": 0.00029890591623664946, "loss": 16.0529, "step": 46100 }, { "epoch": 7.392, "grad_norm": 0.19990018010139465, "learning_rate": 0.00029890354014160564, "loss": 16.3768, "step": 46200 }, { "epoch": 7.408, "grad_norm": 0.1724652647972107, "learning_rate": 0.00029890114004560176, "loss": 17.0495, "step": 46300 }, { "epoch": 7.424, "grad_norm": 0.1920676976442337, "learning_rate": 0.00029889873994959793, "loss": 16.1202, "step": 46400 }, { "epoch": 7.44, "grad_norm": 0.1957552433013916, "learning_rate": 0.00029889636385455417, "loss": 16.413, "step": 46500 }, { "epoch": 7.456, "grad_norm": 0.14071592688560486, "learning_rate": 0.00029889396375855034, "loss": 15.732, "step": 46600 }, { "epoch": 7.4719999999999995, "grad_norm": 0.1833236664533615, "learning_rate": 0.00029889156366254646, "loss": 16.7192, "step": 46700 }, { "epoch": 7.4879999999999995, "grad_norm": 0.2189483791589737, "learning_rate": 0.0002988891635665426, "loss": 16.0979, "step": 46800 }, { "epoch": 7.504, "grad_norm": 0.17360301315784454, "learning_rate": 0.0002988867634705388, "loss": 15.8968, "step": 46900 }, { "epoch": 7.52, "grad_norm": 0.1952562779188156, "learning_rate": 0.00029888436337453496, "loss": 15.9731, "step": 47000 }, { "epoch": 7.536, "grad_norm": 0.1601036638021469, "learning_rate": 0.00029888196327853113, "loss": 16.392, "step": 47100 }, { "epoch": 7.552, "grad_norm": 0.17277076840400696, "learning_rate": 0.00029887956318252725, "loss": 15.9779, "step": 47200 }, { "epoch": 7.568, "grad_norm": 0.1868811696767807, "learning_rate": 0.0002988771630865234, "loss": 15.5355, "step": 47300 }, { "epoch": 7.584, "grad_norm": 0.2078930139541626, "learning_rate": 0.00029887478699147966, "loss": 15.8833, "step": 47400 }, { "epoch": 7.6, "grad_norm": 0.17647911608219147, "learning_rate": 0.0002988723868954758, "loss": 16.0442, "step": 47500 }, { "epoch": 7.616, "grad_norm": 0.20268210768699646, "learning_rate": 0.00029886998679947194, "loss": 16.1957, "step": 47600 }, { "epoch": 7.632, "grad_norm": 0.1820913553237915, "learning_rate": 0.0002988675867034681, "loss": 15.8208, "step": 47700 }, { "epoch": 7.648, "grad_norm": 0.2001231610774994, "learning_rate": 0.0002988651866074643, "loss": 16.1706, "step": 47800 }, { "epoch": 7.664, "grad_norm": 0.18558456003665924, "learning_rate": 0.00029886278651146045, "loss": 15.9747, "step": 47900 }, { "epoch": 7.68, "grad_norm": 0.17034992575645447, "learning_rate": 0.0002988603864154566, "loss": 16.4537, "step": 48000 }, { "epoch": 7.696, "grad_norm": 0.16974206268787384, "learning_rate": 0.00029885798631945274, "loss": 15.5116, "step": 48100 }, { "epoch": 7.712, "grad_norm": 0.1771545112133026, "learning_rate": 0.0002988555862234489, "loss": 15.8605, "step": 48200 }, { "epoch": 7.728, "grad_norm": 0.17756806313991547, "learning_rate": 0.0002988531861274451, "loss": 15.8965, "step": 48300 }, { "epoch": 7.744, "grad_norm": 0.20773237943649292, "learning_rate": 0.00029885078603144124, "loss": 15.1184, "step": 48400 }, { "epoch": 7.76, "grad_norm": 0.18383237719535828, "learning_rate": 0.0002988483859354374, "loss": 16.0467, "step": 48500 }, { "epoch": 7.776, "grad_norm": 0.18748898804187775, "learning_rate": 0.0002988459858394336, "loss": 15.3286, "step": 48600 }, { "epoch": 7.792, "grad_norm": 0.2877133786678314, "learning_rate": 0.0002988435857434297, "loss": 15.8562, "step": 48700 }, { "epoch": 7.808, "grad_norm": 0.168177530169487, "learning_rate": 0.00029884118564742587, "loss": 15.8613, "step": 48800 }, { "epoch": 7.824, "grad_norm": 0.18536759912967682, "learning_rate": 0.00029883878555142203, "loss": 15.8204, "step": 48900 }, { "epoch": 7.84, "grad_norm": 0.15699341893196106, "learning_rate": 0.0002988363854554182, "loss": 15.6026, "step": 49000 }, { "epoch": 7.856, "grad_norm": 0.17730812728405, "learning_rate": 0.0002988339853594144, "loss": 15.5268, "step": 49100 }, { "epoch": 7.872, "grad_norm": 0.16140446066856384, "learning_rate": 0.0002988315852634105, "loss": 15.3766, "step": 49200 }, { "epoch": 7.888, "grad_norm": 0.16114762425422668, "learning_rate": 0.00029882918516740666, "loss": 15.8614, "step": 49300 }, { "epoch": 7.904, "grad_norm": 0.19132892787456512, "learning_rate": 0.0002988267850714028, "loss": 15.4026, "step": 49400 }, { "epoch": 7.92, "grad_norm": 0.190206840634346, "learning_rate": 0.000298824384975399, "loss": 15.42, "step": 49500 }, { "epoch": 7.936, "grad_norm": 0.18264752626419067, "learning_rate": 0.00029882198487939516, "loss": 15.5455, "step": 49600 }, { "epoch": 7.952, "grad_norm": 0.1774350255727768, "learning_rate": 0.00029881958478339133, "loss": 15.7328, "step": 49700 }, { "epoch": 7.968, "grad_norm": 0.1655503213405609, "learning_rate": 0.00029881718468738745, "loss": 15.5836, "step": 49800 }, { "epoch": 7.984, "grad_norm": 0.18890833854675293, "learning_rate": 0.0002988147845913836, "loss": 15.4838, "step": 49900 }, { "epoch": 8.0, "grad_norm": 0.1880652904510498, "learning_rate": 0.0002988123844953798, "loss": 15.2114, "step": 50000 }, { "epoch": 8.016, "grad_norm": 0.18285752832889557, "learning_rate": 0.00029880998439937596, "loss": 14.9511, "step": 50100 }, { "epoch": 8.032, "grad_norm": 0.19436243176460266, "learning_rate": 0.0002988075843033721, "loss": 15.4968, "step": 50200 }, { "epoch": 8.048, "grad_norm": 0.1822815239429474, "learning_rate": 0.00029880518420736824, "loss": 14.7632, "step": 50300 }, { "epoch": 8.064, "grad_norm": 0.16189494729042053, "learning_rate": 0.0002988027841113644, "loss": 14.937, "step": 50400 }, { "epoch": 8.08, "grad_norm": 0.152993842959404, "learning_rate": 0.0002988003840153606, "loss": 14.676, "step": 50500 }, { "epoch": 8.096, "grad_norm": 0.2119678407907486, "learning_rate": 0.00029879798391935675, "loss": 15.725, "step": 50600 }, { "epoch": 8.112, "grad_norm": 0.22487041354179382, "learning_rate": 0.0002987955838233529, "loss": 15.0505, "step": 50700 }, { "epoch": 8.128, "grad_norm": 0.16072215139865875, "learning_rate": 0.0002987931837273491, "loss": 15.4103, "step": 50800 }, { "epoch": 8.144, "grad_norm": 0.16657765209674835, "learning_rate": 0.0002987907836313452, "loss": 14.7139, "step": 50900 }, { "epoch": 8.16, "grad_norm": 0.15327660739421844, "learning_rate": 0.00029878838353534137, "loss": 14.6325, "step": 51000 }, { "epoch": 8.176, "grad_norm": 0.20472773909568787, "learning_rate": 0.00029878598343933754, "loss": 14.7217, "step": 51100 }, { "epoch": 8.192, "grad_norm": 0.214088574051857, "learning_rate": 0.0002987835833433337, "loss": 14.121, "step": 51200 }, { "epoch": 8.208, "grad_norm": 0.20903360843658447, "learning_rate": 0.0002987811832473299, "loss": 15.1448, "step": 51300 }, { "epoch": 8.224, "grad_norm": 0.20621182024478912, "learning_rate": 0.000298778783151326, "loss": 14.7588, "step": 51400 }, { "epoch": 8.24, "grad_norm": 0.18515250086784363, "learning_rate": 0.00029877638305532216, "loss": 15.3639, "step": 51500 }, { "epoch": 8.256, "grad_norm": 0.17146657407283783, "learning_rate": 0.00029877398295931833, "loss": 14.4964, "step": 51600 }, { "epoch": 8.272, "grad_norm": 0.18953190743923187, "learning_rate": 0.0002987715828633145, "loss": 14.5639, "step": 51700 }, { "epoch": 8.288, "grad_norm": 0.17434297502040863, "learning_rate": 0.0002987692067682707, "loss": 15.2875, "step": 51800 }, { "epoch": 8.304, "grad_norm": 0.16686853766441345, "learning_rate": 0.00029876680667226686, "loss": 14.4679, "step": 51900 }, { "epoch": 8.32, "grad_norm": 0.14394892752170563, "learning_rate": 0.00029876440657626303, "loss": 14.5162, "step": 52000 }, { "epoch": 8.336, "grad_norm": 0.20816083252429962, "learning_rate": 0.0002987620064802592, "loss": 15.2646, "step": 52100 }, { "epoch": 8.352, "grad_norm": 0.16660048067569733, "learning_rate": 0.00029875960638425537, "loss": 15.0214, "step": 52200 }, { "epoch": 8.368, "grad_norm": 0.16948403418064117, "learning_rate": 0.0002987572062882515, "loss": 14.7227, "step": 52300 }, { "epoch": 8.384, "grad_norm": 0.15360529720783234, "learning_rate": 0.00029875480619224765, "loss": 14.8453, "step": 52400 }, { "epoch": 8.4, "grad_norm": 0.1730951964855194, "learning_rate": 0.0002987524060962438, "loss": 14.6784, "step": 52500 }, { "epoch": 8.416, "grad_norm": 0.1714763641357422, "learning_rate": 0.00029875000600024, "loss": 14.3347, "step": 52600 }, { "epoch": 8.432, "grad_norm": 0.21991823613643646, "learning_rate": 0.00029874760590423616, "loss": 14.7373, "step": 52700 }, { "epoch": 8.448, "grad_norm": 0.26085495948791504, "learning_rate": 0.00029874520580823233, "loss": 14.4799, "step": 52800 }, { "epoch": 8.464, "grad_norm": 0.15623599290847778, "learning_rate": 0.00029874280571222844, "loss": 14.9737, "step": 52900 }, { "epoch": 8.48, "grad_norm": 0.14685533940792084, "learning_rate": 0.0002987404056162246, "loss": 14.4126, "step": 53000 }, { "epoch": 8.496, "grad_norm": 0.19048573076725006, "learning_rate": 0.0002987380055202208, "loss": 14.6049, "step": 53100 }, { "epoch": 8.512, "grad_norm": 0.15729829668998718, "learning_rate": 0.00029873560542421695, "loss": 14.8894, "step": 53200 }, { "epoch": 8.528, "grad_norm": 0.18257932364940643, "learning_rate": 0.0002987332053282131, "loss": 14.3249, "step": 53300 }, { "epoch": 8.544, "grad_norm": 0.20492464303970337, "learning_rate": 0.00029873080523220923, "loss": 15.0053, "step": 53400 }, { "epoch": 8.56, "grad_norm": 0.22026245296001434, "learning_rate": 0.0002987284051362054, "loss": 14.1141, "step": 53500 }, { "epoch": 8.576, "grad_norm": 0.16078276932239532, "learning_rate": 0.00029872600504020157, "loss": 14.3822, "step": 53600 }, { "epoch": 8.592, "grad_norm": 0.19619469344615936, "learning_rate": 0.00029872360494419774, "loss": 14.3099, "step": 53700 }, { "epoch": 8.608, "grad_norm": 0.15051692724227905, "learning_rate": 0.0002987212048481939, "loss": 13.7999, "step": 53800 }, { "epoch": 8.624, "grad_norm": 0.19525863230228424, "learning_rate": 0.0002987188047521901, "loss": 14.3567, "step": 53900 }, { "epoch": 8.64, "grad_norm": 0.16883693635463715, "learning_rate": 0.0002987164046561862, "loss": 13.3731, "step": 54000 }, { "epoch": 8.656, "grad_norm": 0.1703290492296219, "learning_rate": 0.00029871400456018236, "loss": 13.8462, "step": 54100 }, { "epoch": 8.672, "grad_norm": 0.18907932937145233, "learning_rate": 0.00029871160446417853, "loss": 14.5297, "step": 54200 }, { "epoch": 8.688, "grad_norm": 0.16260308027267456, "learning_rate": 0.0002987092043681747, "loss": 14.0573, "step": 54300 }, { "epoch": 8.704, "grad_norm": 0.1732938140630722, "learning_rate": 0.0002987068282731309, "loss": 14.1114, "step": 54400 }, { "epoch": 8.72, "grad_norm": 0.20591895282268524, "learning_rate": 0.00029870442817712706, "loss": 13.7101, "step": 54500 }, { "epoch": 8.736, "grad_norm": 0.1871296912431717, "learning_rate": 0.00029870202808112323, "loss": 14.539, "step": 54600 }, { "epoch": 8.752, "grad_norm": 0.15711694955825806, "learning_rate": 0.0002986996279851194, "loss": 14.4353, "step": 54700 }, { "epoch": 8.768, "grad_norm": 0.1790015697479248, "learning_rate": 0.00029869722788911557, "loss": 14.4861, "step": 54800 }, { "epoch": 8.784, "grad_norm": 0.1903577744960785, "learning_rate": 0.0002986948277931117, "loss": 14.2582, "step": 54900 }, { "epoch": 8.8, "grad_norm": 0.18150964379310608, "learning_rate": 0.00029869242769710785, "loss": 13.9522, "step": 55000 }, { "epoch": 8.816, "grad_norm": 0.17604489624500275, "learning_rate": 0.000298690027601104, "loss": 14.4482, "step": 55100 }, { "epoch": 8.832, "grad_norm": 0.18487071990966797, "learning_rate": 0.0002986876275051002, "loss": 13.9656, "step": 55200 }, { "epoch": 8.848, "grad_norm": 0.15276212990283966, "learning_rate": 0.00029868522740909636, "loss": 14.2513, "step": 55300 }, { "epoch": 8.864, "grad_norm": 0.19339829683303833, "learning_rate": 0.0002986828273130925, "loss": 13.7151, "step": 55400 }, { "epoch": 8.88, "grad_norm": 0.14462265372276306, "learning_rate": 0.00029868042721708864, "loss": 13.8859, "step": 55500 }, { "epoch": 8.896, "grad_norm": 0.16163522005081177, "learning_rate": 0.0002986780271210848, "loss": 13.7567, "step": 55600 }, { "epoch": 8.912, "grad_norm": 0.15859289467334747, "learning_rate": 0.000298675627025081, "loss": 14.4693, "step": 55700 }, { "epoch": 8.928, "grad_norm": 0.1641652137041092, "learning_rate": 0.00029867322692907715, "loss": 13.6118, "step": 55800 }, { "epoch": 8.943999999999999, "grad_norm": 0.18410654366016388, "learning_rate": 0.0002986708268330733, "loss": 14.3033, "step": 55900 }, { "epoch": 8.96, "grad_norm": 0.18847694993019104, "learning_rate": 0.00029866842673706944, "loss": 13.2935, "step": 56000 }, { "epoch": 8.975999999999999, "grad_norm": 0.15224353969097137, "learning_rate": 0.0002986660266410656, "loss": 13.6185, "step": 56100 }, { "epoch": 8.992, "grad_norm": 0.15307171642780304, "learning_rate": 0.0002986636265450618, "loss": 13.9229, "step": 56200 }, { "epoch": 9.008, "grad_norm": 0.1455143541097641, "learning_rate": 0.00029866122644905794, "loss": 13.9716, "step": 56300 }, { "epoch": 9.024, "grad_norm": 0.18889980018138885, "learning_rate": 0.0002986588263530541, "loss": 13.8509, "step": 56400 }, { "epoch": 9.04, "grad_norm": 0.19757011532783508, "learning_rate": 0.0002986564262570502, "loss": 14.0519, "step": 56500 }, { "epoch": 9.056, "grad_norm": 0.18008406460285187, "learning_rate": 0.00029865405016200647, "loss": 13.1833, "step": 56600 }, { "epoch": 9.072, "grad_norm": 0.1602972447872162, "learning_rate": 0.00029865165006600264, "loss": 13.2838, "step": 56700 }, { "epoch": 9.088, "grad_norm": 0.17582525312900543, "learning_rate": 0.0002986492499699988, "loss": 13.898, "step": 56800 }, { "epoch": 9.104, "grad_norm": 0.15762995183467865, "learning_rate": 0.0002986468498739949, "loss": 13.5733, "step": 56900 }, { "epoch": 9.12, "grad_norm": 0.1670118272304535, "learning_rate": 0.0002986444497779911, "loss": 13.5845, "step": 57000 }, { "epoch": 9.136, "grad_norm": 0.18542303144931793, "learning_rate": 0.00029864204968198726, "loss": 13.9615, "step": 57100 }, { "epoch": 9.152, "grad_norm": 0.18144281208515167, "learning_rate": 0.00029863964958598343, "loss": 13.0945, "step": 57200 }, { "epoch": 9.168, "grad_norm": 0.18359419703483582, "learning_rate": 0.0002986372494899796, "loss": 13.4529, "step": 57300 }, { "epoch": 9.184, "grad_norm": 0.2034582495689392, "learning_rate": 0.0002986348493939757, "loss": 13.2086, "step": 57400 }, { "epoch": 9.2, "grad_norm": 0.1561286300420761, "learning_rate": 0.0002986324492979719, "loss": 13.5699, "step": 57500 }, { "epoch": 9.216, "grad_norm": 0.2128494530916214, "learning_rate": 0.00029863004920196805, "loss": 13.7906, "step": 57600 }, { "epoch": 9.232, "grad_norm": 0.18951255083084106, "learning_rate": 0.0002986276491059642, "loss": 13.4684, "step": 57700 }, { "epoch": 9.248, "grad_norm": 0.14849476516246796, "learning_rate": 0.0002986252490099604, "loss": 13.6832, "step": 57800 }, { "epoch": 9.264, "grad_norm": 0.19169315695762634, "learning_rate": 0.00029862284891395656, "loss": 12.9751, "step": 57900 }, { "epoch": 9.28, "grad_norm": 0.219793900847435, "learning_rate": 0.0002986204488179527, "loss": 13.4069, "step": 58000 }, { "epoch": 9.296, "grad_norm": 0.2139630764722824, "learning_rate": 0.00029861804872194884, "loss": 12.9185, "step": 58100 }, { "epoch": 9.312, "grad_norm": 0.1722664088010788, "learning_rate": 0.000298615648625945, "loss": 13.4876, "step": 58200 }, { "epoch": 9.328, "grad_norm": 0.15841473639011383, "learning_rate": 0.0002986132485299412, "loss": 13.481, "step": 58300 }, { "epoch": 9.344, "grad_norm": 0.17484904825687408, "learning_rate": 0.00029861084843393735, "loss": 13.5925, "step": 58400 }, { "epoch": 9.36, "grad_norm": 0.20388108491897583, "learning_rate": 0.00029860844833793347, "loss": 13.2549, "step": 58500 }, { "epoch": 9.376, "grad_norm": 0.17959387600421906, "learning_rate": 0.00029860604824192964, "loss": 13.571, "step": 58600 }, { "epoch": 9.392, "grad_norm": 0.1830485612154007, "learning_rate": 0.0002986036481459258, "loss": 13.0808, "step": 58700 }, { "epoch": 9.408, "grad_norm": 0.1935325413942337, "learning_rate": 0.000298601248049922, "loss": 12.9193, "step": 58800 }, { "epoch": 9.424, "grad_norm": 0.22928985953330994, "learning_rate": 0.00029859884795391814, "loss": 12.9233, "step": 58900 }, { "epoch": 9.44, "grad_norm": 0.17562927305698395, "learning_rate": 0.0002985964478579143, "loss": 13.0933, "step": 59000 }, { "epoch": 9.456, "grad_norm": 0.21014900505542755, "learning_rate": 0.00029859404776191043, "loss": 12.9421, "step": 59100 }, { "epoch": 9.472, "grad_norm": 0.16698358952999115, "learning_rate": 0.0002985916476659066, "loss": 13.6465, "step": 59200 }, { "epoch": 9.488, "grad_norm": 0.15990376472473145, "learning_rate": 0.00029858924756990277, "loss": 12.9832, "step": 59300 }, { "epoch": 9.504, "grad_norm": 0.21185587346553802, "learning_rate": 0.00029858684747389894, "loss": 13.3695, "step": 59400 }, { "epoch": 9.52, "grad_norm": 0.16105149686336517, "learning_rate": 0.0002985844473778951, "loss": 13.0733, "step": 59500 }, { "epoch": 9.536, "grad_norm": 0.22624213993549347, "learning_rate": 0.0002985820472818912, "loss": 13.2586, "step": 59600 }, { "epoch": 9.552, "grad_norm": 0.1732643097639084, "learning_rate": 0.0002985796471858874, "loss": 12.9246, "step": 59700 }, { "epoch": 9.568, "grad_norm": 0.18406638503074646, "learning_rate": 0.00029857724708988356, "loss": 13.4556, "step": 59800 }, { "epoch": 9.584, "grad_norm": 0.18207241594791412, "learning_rate": 0.0002985748709948398, "loss": 12.8405, "step": 59900 }, { "epoch": 9.6, "grad_norm": 0.14808227121829987, "learning_rate": 0.0002985724708988359, "loss": 13.0075, "step": 60000 }, { "epoch": 9.616, "grad_norm": 0.1976134330034256, "learning_rate": 0.0002985700708028321, "loss": 12.687, "step": 60100 }, { "epoch": 9.632, "grad_norm": 0.1712380349636078, "learning_rate": 0.00029856767070682825, "loss": 13.003, "step": 60200 }, { "epoch": 9.648, "grad_norm": 0.1509382426738739, "learning_rate": 0.0002985652706108244, "loss": 13.0863, "step": 60300 }, { "epoch": 9.664, "grad_norm": 0.1992410570383072, "learning_rate": 0.0002985628705148206, "loss": 13.1396, "step": 60400 }, { "epoch": 9.68, "grad_norm": 0.19914288818836212, "learning_rate": 0.0002985604704188167, "loss": 13.0716, "step": 60500 }, { "epoch": 9.696, "grad_norm": 0.17157557606697083, "learning_rate": 0.0002985580703228129, "loss": 12.5376, "step": 60600 }, { "epoch": 9.712, "grad_norm": 0.14820295572280884, "learning_rate": 0.00029855567022680905, "loss": 12.9209, "step": 60700 }, { "epoch": 9.728, "grad_norm": 0.17262442409992218, "learning_rate": 0.0002985532701308052, "loss": 13.3595, "step": 60800 }, { "epoch": 9.744, "grad_norm": 0.1804870218038559, "learning_rate": 0.0002985508700348014, "loss": 13.0037, "step": 60900 }, { "epoch": 9.76, "grad_norm": 0.1507444977760315, "learning_rate": 0.00029854846993879755, "loss": 12.5568, "step": 61000 }, { "epoch": 9.776, "grad_norm": 0.17809054255485535, "learning_rate": 0.00029854606984279367, "loss": 12.9826, "step": 61100 }, { "epoch": 9.792, "grad_norm": 0.25455987453460693, "learning_rate": 0.00029854366974678984, "loss": 12.5432, "step": 61200 }, { "epoch": 9.808, "grad_norm": 0.15175747871398926, "learning_rate": 0.000298541269650786, "loss": 12.9513, "step": 61300 }, { "epoch": 9.824, "grad_norm": 0.22233819961547852, "learning_rate": 0.0002985388695547822, "loss": 13.2744, "step": 61400 }, { "epoch": 9.84, "grad_norm": 0.1534196138381958, "learning_rate": 0.00029853646945877835, "loss": 12.4878, "step": 61500 }, { "epoch": 9.856, "grad_norm": 0.17612405121326447, "learning_rate": 0.00029853406936277446, "loss": 12.6281, "step": 61600 }, { "epoch": 9.872, "grad_norm": 0.14971201121807098, "learning_rate": 0.00029853166926677063, "loss": 12.4393, "step": 61700 }, { "epoch": 9.888, "grad_norm": 0.15717633068561554, "learning_rate": 0.0002985292691707668, "loss": 12.6903, "step": 61800 }, { "epoch": 9.904, "grad_norm": 0.1695670634508133, "learning_rate": 0.00029852686907476297, "loss": 12.9557, "step": 61900 }, { "epoch": 9.92, "grad_norm": 0.16429013013839722, "learning_rate": 0.00029852446897875914, "loss": 12.9804, "step": 62000 }, { "epoch": 9.936, "grad_norm": 0.1919148713350296, "learning_rate": 0.0002985220688827553, "loss": 12.8735, "step": 62100 }, { "epoch": 9.952, "grad_norm": 0.1977461278438568, "learning_rate": 0.0002985196687867514, "loss": 12.6665, "step": 62200 }, { "epoch": 9.968, "grad_norm": 0.3409396708011627, "learning_rate": 0.0002985172686907476, "loss": 11.9422, "step": 62300 }, { "epoch": 9.984, "grad_norm": 0.1977001428604126, "learning_rate": 0.00029851486859474376, "loss": 13.392, "step": 62400 }, { "epoch": 10.0, "grad_norm": 0.19805894792079926, "learning_rate": 0.00029851246849873993, "loss": 12.3432, "step": 62500 }, { "epoch": 10.016, "grad_norm": 0.1851508915424347, "learning_rate": 0.0002985100684027361, "loss": 12.8953, "step": 62600 }, { "epoch": 10.032, "grad_norm": 0.15137746930122375, "learning_rate": 0.0002985076683067322, "loss": 12.8256, "step": 62700 }, { "epoch": 10.048, "grad_norm": 0.1815025508403778, "learning_rate": 0.00029850529221168846, "loss": 12.2427, "step": 62800 }, { "epoch": 10.064, "grad_norm": 0.282045841217041, "learning_rate": 0.0002985028921156846, "loss": 12.5777, "step": 62900 }, { "epoch": 10.08, "grad_norm": 0.19669105112552643, "learning_rate": 0.0002985004920196808, "loss": 12.85, "step": 63000 }, { "epoch": 10.096, "grad_norm": 0.1557861566543579, "learning_rate": 0.0002984980919236769, "loss": 12.6325, "step": 63100 }, { "epoch": 10.112, "grad_norm": 0.16353458166122437, "learning_rate": 0.0002984956918276731, "loss": 12.5578, "step": 63200 }, { "epoch": 10.128, "grad_norm": 0.19124484062194824, "learning_rate": 0.00029849329173166925, "loss": 12.8784, "step": 63300 }, { "epoch": 10.144, "grad_norm": 0.16097944974899292, "learning_rate": 0.0002984908916356654, "loss": 11.7994, "step": 63400 }, { "epoch": 10.16, "grad_norm": 0.155614972114563, "learning_rate": 0.0002984884915396616, "loss": 11.9617, "step": 63500 }, { "epoch": 10.176, "grad_norm": 0.19013510644435883, "learning_rate": 0.0002984860914436577, "loss": 12.1663, "step": 63600 }, { "epoch": 10.192, "grad_norm": 0.21610714495182037, "learning_rate": 0.00029848369134765387, "loss": 12.2304, "step": 63700 }, { "epoch": 10.208, "grad_norm": 0.15554966032505035, "learning_rate": 0.00029848129125165004, "loss": 11.9337, "step": 63800 }, { "epoch": 10.224, "grad_norm": 0.14373019337654114, "learning_rate": 0.0002984788911556462, "loss": 12.5049, "step": 63900 }, { "epoch": 10.24, "grad_norm": 0.197763592004776, "learning_rate": 0.0002984764910596424, "loss": 12.2087, "step": 64000 }, { "epoch": 10.256, "grad_norm": 0.1522061824798584, "learning_rate": 0.00029847409096363855, "loss": 12.475, "step": 64100 }, { "epoch": 10.272, "grad_norm": 0.15849411487579346, "learning_rate": 0.00029847169086763466, "loss": 12.1301, "step": 64200 }, { "epoch": 10.288, "grad_norm": 0.1680125594139099, "learning_rate": 0.00029846929077163083, "loss": 12.2041, "step": 64300 }, { "epoch": 10.304, "grad_norm": 0.17618972063064575, "learning_rate": 0.000298466890675627, "loss": 12.1634, "step": 64400 }, { "epoch": 10.32, "grad_norm": 0.19345271587371826, "learning_rate": 0.00029846449057962317, "loss": 12.0509, "step": 64500 }, { "epoch": 10.336, "grad_norm": 0.15981802344322205, "learning_rate": 0.00029846209048361934, "loss": 11.879, "step": 64600 }, { "epoch": 10.352, "grad_norm": 0.1640341877937317, "learning_rate": 0.00029845969038761545, "loss": 12.3471, "step": 64700 }, { "epoch": 10.368, "grad_norm": 0.1751720905303955, "learning_rate": 0.0002984572902916116, "loss": 11.7085, "step": 64800 }, { "epoch": 10.384, "grad_norm": 0.15203487873077393, "learning_rate": 0.00029845491419656787, "loss": 11.9901, "step": 64900 }, { "epoch": 10.4, "grad_norm": 0.1836910843849182, "learning_rate": 0.00029845251410056403, "loss": 11.5864, "step": 65000 }, { "epoch": 10.416, "grad_norm": 0.2329769879579544, "learning_rate": 0.00029845011400456015, "loss": 11.8386, "step": 65100 }, { "epoch": 10.432, "grad_norm": 0.25904643535614014, "learning_rate": 0.0002984477139085563, "loss": 11.6842, "step": 65200 }, { "epoch": 10.448, "grad_norm": 0.16373856365680695, "learning_rate": 0.0002984453138125525, "loss": 11.9861, "step": 65300 }, { "epoch": 10.464, "grad_norm": 0.1684304028749466, "learning_rate": 0.00029844291371654866, "loss": 12.1751, "step": 65400 }, { "epoch": 10.48, "grad_norm": 0.1975129395723343, "learning_rate": 0.0002984405136205448, "loss": 11.9744, "step": 65500 }, { "epoch": 10.496, "grad_norm": 0.144730344414711, "learning_rate": 0.00029843811352454094, "loss": 11.7554, "step": 65600 }, { "epoch": 10.512, "grad_norm": 0.21416126191616058, "learning_rate": 0.0002984357134285371, "loss": 11.7885, "step": 65700 }, { "epoch": 10.528, "grad_norm": 0.1401461511850357, "learning_rate": 0.0002984333133325333, "loss": 12.2278, "step": 65800 }, { "epoch": 10.544, "grad_norm": 0.15199688076972961, "learning_rate": 0.00029843091323652945, "loss": 12.0611, "step": 65900 }, { "epoch": 10.56, "grad_norm": 0.16079574823379517, "learning_rate": 0.0002984285131405256, "loss": 11.3473, "step": 66000 }, { "epoch": 10.576, "grad_norm": 0.14441320300102234, "learning_rate": 0.0002984261130445218, "loss": 11.5284, "step": 66100 }, { "epoch": 10.592, "grad_norm": 0.1676328480243683, "learning_rate": 0.0002984237129485179, "loss": 11.6487, "step": 66200 }, { "epoch": 10.608, "grad_norm": 0.13956011831760406, "learning_rate": 0.00029842131285251407, "loss": 11.772, "step": 66300 }, { "epoch": 10.624, "grad_norm": 0.17723798751831055, "learning_rate": 0.00029841891275651024, "loss": 11.7424, "step": 66400 }, { "epoch": 10.64, "grad_norm": 0.18211066722869873, "learning_rate": 0.0002984165126605064, "loss": 11.9263, "step": 66500 }, { "epoch": 10.656, "grad_norm": 0.18465609848499298, "learning_rate": 0.0002984141125645026, "loss": 12.1533, "step": 66600 }, { "epoch": 10.672, "grad_norm": 0.15032535791397095, "learning_rate": 0.0002984117124684987, "loss": 11.8711, "step": 66700 }, { "epoch": 10.688, "grad_norm": 0.25048136711120605, "learning_rate": 0.00029840931237249486, "loss": 12.1925, "step": 66800 }, { "epoch": 10.704, "grad_norm": 0.17632503807544708, "learning_rate": 0.00029840691227649103, "loss": 12.0652, "step": 66900 }, { "epoch": 10.72, "grad_norm": 0.17492571473121643, "learning_rate": 0.0002984045121804872, "loss": 12.3961, "step": 67000 }, { "epoch": 10.736, "grad_norm": 0.17848367989063263, "learning_rate": 0.00029840211208448337, "loss": 12.0021, "step": 67100 }, { "epoch": 10.752, "grad_norm": 0.23175941407680511, "learning_rate": 0.00029839971198847954, "loss": 11.4583, "step": 67200 }, { "epoch": 10.768, "grad_norm": 0.24281519651412964, "learning_rate": 0.0002983973358934357, "loss": 12.0376, "step": 67300 }, { "epoch": 10.784, "grad_norm": 0.18129272758960724, "learning_rate": 0.00029839493579743184, "loss": 12.1892, "step": 67400 }, { "epoch": 10.8, "grad_norm": 0.1454136222600937, "learning_rate": 0.000298392535701428, "loss": 11.9333, "step": 67500 }, { "epoch": 10.816, "grad_norm": 0.12412439286708832, "learning_rate": 0.0002983901356054242, "loss": 11.0441, "step": 67600 }, { "epoch": 10.832, "grad_norm": 0.19814914464950562, "learning_rate": 0.00029838773550942035, "loss": 11.4348, "step": 67700 }, { "epoch": 10.848, "grad_norm": 0.2250308245420456, "learning_rate": 0.0002983853354134165, "loss": 11.723, "step": 67800 }, { "epoch": 10.864, "grad_norm": 0.1328551471233368, "learning_rate": 0.0002983829353174127, "loss": 11.4324, "step": 67900 }, { "epoch": 10.88, "grad_norm": 0.2366170883178711, "learning_rate": 0.00029838053522140886, "loss": 12.1462, "step": 68000 }, { "epoch": 10.896, "grad_norm": 0.20911742746829987, "learning_rate": 0.00029837813512540503, "loss": 11.6067, "step": 68100 }, { "epoch": 10.912, "grad_norm": 0.1770290583372116, "learning_rate": 0.00029837573502940114, "loss": 11.9299, "step": 68200 }, { "epoch": 10.928, "grad_norm": 0.21429571509361267, "learning_rate": 0.0002983733349333973, "loss": 11.3683, "step": 68300 }, { "epoch": 10.943999999999999, "grad_norm": 0.1542270928621292, "learning_rate": 0.0002983709348373935, "loss": 11.3472, "step": 68400 }, { "epoch": 10.96, "grad_norm": 0.2420985847711563, "learning_rate": 0.00029836853474138965, "loss": 11.5805, "step": 68500 }, { "epoch": 10.975999999999999, "grad_norm": 0.17665143311023712, "learning_rate": 0.0002983661346453858, "loss": 11.7406, "step": 68600 }, { "epoch": 10.992, "grad_norm": 0.26210835576057434, "learning_rate": 0.00029836373454938193, "loss": 11.7457, "step": 68700 }, { "epoch": 11.008, "grad_norm": 0.14472606778144836, "learning_rate": 0.0002983613344533781, "loss": 11.4662, "step": 68800 }, { "epoch": 11.024, "grad_norm": 0.17449091374874115, "learning_rate": 0.0002983589343573743, "loss": 11.0297, "step": 68900 }, { "epoch": 11.04, "grad_norm": 0.15488724410533905, "learning_rate": 0.00029835653426137044, "loss": 11.792, "step": 69000 }, { "epoch": 11.056, "grad_norm": 0.1447325348854065, "learning_rate": 0.0002983541341653666, "loss": 11.4483, "step": 69100 }, { "epoch": 11.072, "grad_norm": 0.17111489176750183, "learning_rate": 0.0002983517340693628, "loss": 11.1499, "step": 69200 }, { "epoch": 11.088, "grad_norm": 0.17446951568126678, "learning_rate": 0.0002983493339733589, "loss": 10.6961, "step": 69300 }, { "epoch": 11.104, "grad_norm": 0.1421278566122055, "learning_rate": 0.00029834693387735506, "loss": 11.4794, "step": 69400 }, { "epoch": 11.12, "grad_norm": 0.17439322173595428, "learning_rate": 0.00029834455778231125, "loss": 11.0965, "step": 69500 }, { "epoch": 11.136, "grad_norm": 0.16200323402881622, "learning_rate": 0.0002983421576863074, "loss": 11.1367, "step": 69600 }, { "epoch": 11.152, "grad_norm": 0.3391527831554413, "learning_rate": 0.0002983397575903036, "loss": 10.7709, "step": 69700 }, { "epoch": 11.168, "grad_norm": 0.18793489038944244, "learning_rate": 0.0002983373574942997, "loss": 11.1479, "step": 69800 }, { "epoch": 11.184, "grad_norm": 0.1996636688709259, "learning_rate": 0.0002983349573982959, "loss": 11.8347, "step": 69900 }, { "epoch": 11.2, "grad_norm": 0.166090190410614, "learning_rate": 0.00029833255730229205, "loss": 10.9514, "step": 70000 }, { "epoch": 11.216, "grad_norm": 0.17243006825447083, "learning_rate": 0.0002983301572062882, "loss": 11.2505, "step": 70100 }, { "epoch": 11.232, "grad_norm": 0.17860250174999237, "learning_rate": 0.0002983277571102844, "loss": 11.023, "step": 70200 }, { "epoch": 11.248, "grad_norm": 0.13896320760250092, "learning_rate": 0.00029832535701428055, "loss": 11.092, "step": 70300 }, { "epoch": 11.264, "grad_norm": 0.20008546113967896, "learning_rate": 0.00029832295691827667, "loss": 11.2161, "step": 70400 }, { "epoch": 11.28, "grad_norm": 0.14014984667301178, "learning_rate": 0.00029832055682227284, "loss": 11.315, "step": 70500 }, { "epoch": 11.296, "grad_norm": 0.16158168017864227, "learning_rate": 0.000298318156726269, "loss": 11.3935, "step": 70600 }, { "epoch": 11.312, "grad_norm": 0.15444719791412354, "learning_rate": 0.0002983157566302652, "loss": 10.9662, "step": 70700 }, { "epoch": 11.328, "grad_norm": 0.21788270771503448, "learning_rate": 0.00029831335653426134, "loss": 11.4848, "step": 70800 }, { "epoch": 11.344, "grad_norm": 0.17685194313526154, "learning_rate": 0.0002983109564382575, "loss": 11.3436, "step": 70900 }, { "epoch": 11.36, "grad_norm": 0.15553423762321472, "learning_rate": 0.0002983085563422537, "loss": 11.1136, "step": 71000 }, { "epoch": 11.376, "grad_norm": 0.1547129899263382, "learning_rate": 0.00029830615624624985, "loss": 10.7924, "step": 71100 }, { "epoch": 11.392, "grad_norm": 0.1907842457294464, "learning_rate": 0.000298303756150246, "loss": 10.9726, "step": 71200 }, { "epoch": 11.408, "grad_norm": 0.15053051710128784, "learning_rate": 0.00029830135605424214, "loss": 12.0626, "step": 71300 }, { "epoch": 11.424, "grad_norm": 0.14403216540813446, "learning_rate": 0.0002982989559582383, "loss": 11.428, "step": 71400 }, { "epoch": 11.44, "grad_norm": 0.15850169956684113, "learning_rate": 0.0002982965558622345, "loss": 11.1033, "step": 71500 }, { "epoch": 11.456, "grad_norm": 0.18223829567432404, "learning_rate": 0.00029829417976719066, "loss": 11.5088, "step": 71600 }, { "epoch": 11.472, "grad_norm": 0.18121246993541718, "learning_rate": 0.00029829177967118683, "loss": 11.0869, "step": 71700 }, { "epoch": 11.488, "grad_norm": 0.1591707020998001, "learning_rate": 0.00029828937957518295, "loss": 10.5898, "step": 71800 }, { "epoch": 11.504, "grad_norm": 0.1652923971414566, "learning_rate": 0.0002982869794791791, "loss": 11.3647, "step": 71900 }, { "epoch": 11.52, "grad_norm": 0.1930815577507019, "learning_rate": 0.0002982845793831753, "loss": 11.4873, "step": 72000 }, { "epoch": 11.536, "grad_norm": 0.1646055281162262, "learning_rate": 0.00029828217928717145, "loss": 11.3799, "step": 72100 }, { "epoch": 11.552, "grad_norm": 0.19326475262641907, "learning_rate": 0.0002982797791911676, "loss": 10.8387, "step": 72200 }, { "epoch": 11.568, "grad_norm": 0.23909342288970947, "learning_rate": 0.0002982773790951638, "loss": 10.757, "step": 72300 }, { "epoch": 11.584, "grad_norm": 0.1616702377796173, "learning_rate": 0.0002982749789991599, "loss": 10.7907, "step": 72400 }, { "epoch": 11.6, "grad_norm": 0.16581912338733673, "learning_rate": 0.0002982725789031561, "loss": 10.8977, "step": 72500 }, { "epoch": 11.616, "grad_norm": 0.1478215605020523, "learning_rate": 0.00029827017880715225, "loss": 10.9325, "step": 72600 }, { "epoch": 11.632, "grad_norm": 0.2693212628364563, "learning_rate": 0.0002982677787111484, "loss": 11.2731, "step": 72700 }, { "epoch": 11.648, "grad_norm": 0.15163065493106842, "learning_rate": 0.0002982653786151446, "loss": 11.0141, "step": 72800 }, { "epoch": 11.664, "grad_norm": 0.15364685654640198, "learning_rate": 0.00029826297851914075, "loss": 10.6781, "step": 72900 }, { "epoch": 11.68, "grad_norm": 0.1410771906375885, "learning_rate": 0.00029826057842313687, "loss": 11.0262, "step": 73000 }, { "epoch": 11.696, "grad_norm": 0.2245720773935318, "learning_rate": 0.00029825817832713304, "loss": 11.51, "step": 73100 }, { "epoch": 11.712, "grad_norm": 0.17434003949165344, "learning_rate": 0.0002982557782311292, "loss": 10.7819, "step": 73200 }, { "epoch": 11.728, "grad_norm": 0.13878166675567627, "learning_rate": 0.0002982534021360854, "loss": 10.8833, "step": 73300 }, { "epoch": 11.744, "grad_norm": 0.13650259375572205, "learning_rate": 0.00029825100204008157, "loss": 11.0158, "step": 73400 }, { "epoch": 11.76, "grad_norm": 0.22818398475646973, "learning_rate": 0.00029824860194407773, "loss": 10.8819, "step": 73500 }, { "epoch": 11.776, "grad_norm": 0.14601178467273712, "learning_rate": 0.0002982462018480739, "loss": 10.0593, "step": 73600 }, { "epoch": 11.792, "grad_norm": 0.2245131880044937, "learning_rate": 0.00029824380175207007, "loss": 10.6634, "step": 73700 }, { "epoch": 11.808, "grad_norm": 1.000320553779602, "learning_rate": 0.0002982414016560662, "loss": 10.961, "step": 73800 }, { "epoch": 11.824, "grad_norm": 0.18026384711265564, "learning_rate": 0.00029823900156006236, "loss": 11.1536, "step": 73900 }, { "epoch": 11.84, "grad_norm": 0.15758727490901947, "learning_rate": 0.0002982366014640585, "loss": 10.6586, "step": 74000 }, { "epoch": 11.856, "grad_norm": 0.19163353741168976, "learning_rate": 0.0002982342013680547, "loss": 11.0334, "step": 74100 }, { "epoch": 11.872, "grad_norm": 0.11467296630144119, "learning_rate": 0.00029823180127205086, "loss": 10.8224, "step": 74200 }, { "epoch": 11.888, "grad_norm": 0.15869416296482086, "learning_rate": 0.00029822940117604703, "loss": 10.4906, "step": 74300 }, { "epoch": 11.904, "grad_norm": 0.1966274380683899, "learning_rate": 0.00029822700108004315, "loss": 10.4152, "step": 74400 }, { "epoch": 11.92, "grad_norm": 0.16446225345134735, "learning_rate": 0.0002982246009840393, "loss": 10.4887, "step": 74500 }, { "epoch": 11.936, "grad_norm": 0.16940893232822418, "learning_rate": 0.0002982222008880355, "loss": 10.39, "step": 74600 }, { "epoch": 11.952, "grad_norm": 0.1838199496269226, "learning_rate": 0.00029821980079203166, "loss": 10.384, "step": 74700 }, { "epoch": 11.968, "grad_norm": 0.17523860931396484, "learning_rate": 0.0002982174006960278, "loss": 10.8568, "step": 74800 }, { "epoch": 11.984, "grad_norm": 0.1432792991399765, "learning_rate": 0.000298215000600024, "loss": 10.3596, "step": 74900 }, { "epoch": 12.0, "grad_norm": 0.20020250976085663, "learning_rate": 0.0002982126005040201, "loss": 10.14, "step": 75000 }, { "epoch": 12.016, "grad_norm": 0.19777518510818481, "learning_rate": 0.0002982102004080163, "loss": 10.9224, "step": 75100 }, { "epoch": 12.032, "grad_norm": 0.17126210033893585, "learning_rate": 0.00029820780031201245, "loss": 10.5306, "step": 75200 }, { "epoch": 12.048, "grad_norm": 0.16797253489494324, "learning_rate": 0.0002982054002160086, "loss": 10.8089, "step": 75300 }, { "epoch": 12.064, "grad_norm": 0.20862014591693878, "learning_rate": 0.0002982030001200048, "loss": 10.4757, "step": 75400 }, { "epoch": 12.08, "grad_norm": 0.18397895991802216, "learning_rate": 0.0002982006000240009, "loss": 9.9135, "step": 75500 }, { "epoch": 12.096, "grad_norm": 0.16641663014888763, "learning_rate": 0.00029819819992799707, "loss": 10.6077, "step": 75600 }, { "epoch": 12.112, "grad_norm": 0.16870319843292236, "learning_rate": 0.00029819579983199324, "loss": 10.5788, "step": 75700 }, { "epoch": 12.128, "grad_norm": 0.16674315929412842, "learning_rate": 0.0002981933997359894, "loss": 10.7791, "step": 75800 }, { "epoch": 12.144, "grad_norm": 0.1637590378522873, "learning_rate": 0.0002981909996399856, "loss": 10.0084, "step": 75900 }, { "epoch": 12.16, "grad_norm": 0.16165070235729218, "learning_rate": 0.00029818859954398175, "loss": 10.7957, "step": 76000 }, { "epoch": 12.176, "grad_norm": 0.1414174884557724, "learning_rate": 0.00029818619944797786, "loss": 9.8668, "step": 76100 }, { "epoch": 12.192, "grad_norm": 0.1490393877029419, "learning_rate": 0.00029818379935197403, "loss": 10.5844, "step": 76200 }, { "epoch": 12.208, "grad_norm": 0.15608841180801392, "learning_rate": 0.0002981813992559702, "loss": 10.7121, "step": 76300 }, { "epoch": 12.224, "grad_norm": 0.1658240258693695, "learning_rate": 0.00029817899915996637, "loss": 10.4018, "step": 76400 }, { "epoch": 12.24, "grad_norm": 0.1533997803926468, "learning_rate": 0.00029817659906396254, "loss": 10.0445, "step": 76500 }, { "epoch": 12.256, "grad_norm": 0.14606164395809174, "learning_rate": 0.00029817419896795865, "loss": 10.8624, "step": 76600 }, { "epoch": 12.272, "grad_norm": 0.1926526576280594, "learning_rate": 0.0002981717988719548, "loss": 9.9639, "step": 76700 }, { "epoch": 12.288, "grad_norm": 0.16846922039985657, "learning_rate": 0.000298169398775951, "loss": 10.4076, "step": 76800 }, { "epoch": 12.304, "grad_norm": 0.1497686505317688, "learning_rate": 0.00029816699867994716, "loss": 10.3741, "step": 76900 }, { "epoch": 12.32, "grad_norm": 0.17146418988704681, "learning_rate": 0.00029816459858394333, "loss": 10.6163, "step": 77000 }, { "epoch": 12.336, "grad_norm": 0.169904425740242, "learning_rate": 0.0002981621984879395, "loss": 10.0631, "step": 77100 }, { "epoch": 12.352, "grad_norm": 0.15850874781608582, "learning_rate": 0.00029815979839193567, "loss": 10.0799, "step": 77200 }, { "epoch": 12.368, "grad_norm": 0.15920597314834595, "learning_rate": 0.00029815739829593184, "loss": 9.6119, "step": 77300 }, { "epoch": 12.384, "grad_norm": 0.2246374636888504, "learning_rate": 0.000298154998199928, "loss": 10.3029, "step": 77400 }, { "epoch": 12.4, "grad_norm": 0.168796569108963, "learning_rate": 0.0002981525981039241, "loss": 10.3374, "step": 77500 }, { "epoch": 12.416, "grad_norm": 0.1864066869020462, "learning_rate": 0.0002981501980079203, "loss": 10.0087, "step": 77600 }, { "epoch": 12.432, "grad_norm": 0.14401012659072876, "learning_rate": 0.0002981478219128765, "loss": 10.1803, "step": 77700 }, { "epoch": 12.448, "grad_norm": 0.1375201791524887, "learning_rate": 0.00029814542181687265, "loss": 9.911, "step": 77800 }, { "epoch": 12.464, "grad_norm": 0.1398741900920868, "learning_rate": 0.0002981430217208688, "loss": 10.261, "step": 77900 }, { "epoch": 12.48, "grad_norm": 0.15873165428638458, "learning_rate": 0.000298140621624865, "loss": 10.7101, "step": 78000 }, { "epoch": 12.496, "grad_norm": 0.1714644730091095, "learning_rate": 0.0002981382215288611, "loss": 10.1714, "step": 78100 }, { "epoch": 12.512, "grad_norm": 0.1591562181711197, "learning_rate": 0.00029813582143285727, "loss": 10.1645, "step": 78200 }, { "epoch": 12.528, "grad_norm": 0.18264716863632202, "learning_rate": 0.00029813342133685344, "loss": 10.3564, "step": 78300 }, { "epoch": 12.544, "grad_norm": 0.1514509618282318, "learning_rate": 0.0002981310212408496, "loss": 10.0476, "step": 78400 }, { "epoch": 12.56, "grad_norm": 0.19021818041801453, "learning_rate": 0.0002981286211448458, "loss": 10.2492, "step": 78500 }, { "epoch": 12.576, "grad_norm": 0.21221980452537537, "learning_rate": 0.0002981262210488419, "loss": 9.7379, "step": 78600 }, { "epoch": 12.592, "grad_norm": 0.16575005650520325, "learning_rate": 0.00029812382095283806, "loss": 10.237, "step": 78700 }, { "epoch": 12.608, "grad_norm": 0.12602052092552185, "learning_rate": 0.00029812142085683423, "loss": 10.0729, "step": 78800 }, { "epoch": 12.624, "grad_norm": 0.23105710744857788, "learning_rate": 0.0002981190207608304, "loss": 9.8609, "step": 78900 }, { "epoch": 12.64, "grad_norm": 0.29600638151168823, "learning_rate": 0.00029811662066482657, "loss": 9.8653, "step": 79000 }, { "epoch": 12.656, "grad_norm": 0.19172607362270355, "learning_rate": 0.00029811422056882274, "loss": 9.8614, "step": 79100 }, { "epoch": 12.672, "grad_norm": 0.1930418759584427, "learning_rate": 0.00029811182047281886, "loss": 10.0208, "step": 79200 }, { "epoch": 12.688, "grad_norm": 0.12393278628587723, "learning_rate": 0.000298109420376815, "loss": 10.349, "step": 79300 }, { "epoch": 12.704, "grad_norm": 0.1565830409526825, "learning_rate": 0.0002981070202808112, "loss": 10.5402, "step": 79400 }, { "epoch": 12.72, "grad_norm": 0.13968247175216675, "learning_rate": 0.00029810462018480736, "loss": 9.9296, "step": 79500 }, { "epoch": 12.736, "grad_norm": 0.17765802145004272, "learning_rate": 0.00029810222008880353, "loss": 9.8002, "step": 79600 }, { "epoch": 12.752, "grad_norm": 0.23838719725608826, "learning_rate": 0.00029809981999279965, "loss": 9.8636, "step": 79700 }, { "epoch": 12.768, "grad_norm": 0.23086270689964294, "learning_rate": 0.0002980974438977559, "loss": 9.9585, "step": 79800 }, { "epoch": 12.784, "grad_norm": 0.14923255145549774, "learning_rate": 0.00029809504380175206, "loss": 9.5379, "step": 79900 }, { "epoch": 12.8, "grad_norm": 0.1599462628364563, "learning_rate": 0.00029809264370574823, "loss": 9.641, "step": 80000 }, { "epoch": 12.816, "grad_norm": 0.1716078370809555, "learning_rate": 0.00029809024360974434, "loss": 9.8697, "step": 80100 }, { "epoch": 12.832, "grad_norm": 0.19052661955356598, "learning_rate": 0.0002980878435137405, "loss": 9.6785, "step": 80200 }, { "epoch": 12.848, "grad_norm": 0.15575654804706573, "learning_rate": 0.0002980854434177367, "loss": 9.9394, "step": 80300 }, { "epoch": 12.864, "grad_norm": 0.19439518451690674, "learning_rate": 0.00029808304332173285, "loss": 9.5522, "step": 80400 }, { "epoch": 12.88, "grad_norm": 0.17798827588558197, "learning_rate": 0.000298080643225729, "loss": 9.9453, "step": 80500 }, { "epoch": 12.896, "grad_norm": 0.16586044430732727, "learning_rate": 0.00029807824312972513, "loss": 9.8505, "step": 80600 }, { "epoch": 12.912, "grad_norm": 0.15794214606285095, "learning_rate": 0.0002980758430337213, "loss": 10.0497, "step": 80700 }, { "epoch": 12.928, "grad_norm": 0.1685098111629486, "learning_rate": 0.0002980734429377175, "loss": 10.2658, "step": 80800 }, { "epoch": 12.943999999999999, "grad_norm": 0.16599301993846893, "learning_rate": 0.00029807104284171364, "loss": 9.837, "step": 80900 }, { "epoch": 12.96, "grad_norm": 0.14692434668540955, "learning_rate": 0.0002980686427457098, "loss": 10.1817, "step": 81000 }, { "epoch": 12.975999999999999, "grad_norm": 0.15374502539634705, "learning_rate": 0.000298066242649706, "loss": 10.1231, "step": 81100 }, { "epoch": 12.992, "grad_norm": 0.1369294375181198, "learning_rate": 0.0002980638425537021, "loss": 9.8245, "step": 81200 }, { "epoch": 13.008, "grad_norm": 0.20259645581245422, "learning_rate": 0.00029806144245769826, "loss": 9.7027, "step": 81300 }, { "epoch": 13.024, "grad_norm": 0.1258879452943802, "learning_rate": 0.00029805904236169443, "loss": 9.8863, "step": 81400 }, { "epoch": 13.04, "grad_norm": 0.14773085713386536, "learning_rate": 0.0002980566422656906, "loss": 9.4255, "step": 81500 }, { "epoch": 13.056, "grad_norm": 0.17212265729904175, "learning_rate": 0.00029805424216968677, "loss": 10.0506, "step": 81600 }, { "epoch": 13.072, "grad_norm": 0.179426372051239, "learning_rate": 0.0002980518420736829, "loss": 9.5137, "step": 81700 }, { "epoch": 13.088, "grad_norm": 0.15935377776622772, "learning_rate": 0.00029804944197767906, "loss": 9.3141, "step": 81800 }, { "epoch": 13.104, "grad_norm": 0.17460429668426514, "learning_rate": 0.0002980470418816752, "loss": 9.8005, "step": 81900 }, { "epoch": 13.12, "grad_norm": 0.20005491375923157, "learning_rate": 0.0002980446417856714, "loss": 9.7239, "step": 82000 }, { "epoch": 13.136, "grad_norm": 0.15051016211509705, "learning_rate": 0.00029804224168966756, "loss": 10.214, "step": 82100 }, { "epoch": 13.152, "grad_norm": 0.16659046709537506, "learning_rate": 0.00029803984159366373, "loss": 9.4695, "step": 82200 }, { "epoch": 13.168, "grad_norm": 0.16346730291843414, "learning_rate": 0.00029803744149765985, "loss": 9.5839, "step": 82300 }, { "epoch": 13.184, "grad_norm": 0.16145597398281097, "learning_rate": 0.000298035041401656, "loss": 9.2663, "step": 82400 }, { "epoch": 13.2, "grad_norm": 0.13834603130817413, "learning_rate": 0.00029803266530661226, "loss": 9.6926, "step": 82500 }, { "epoch": 13.216, "grad_norm": 0.17841538786888123, "learning_rate": 0.0002980302652106084, "loss": 9.4752, "step": 82600 }, { "epoch": 13.232, "grad_norm": 0.14639347791671753, "learning_rate": 0.00029802786511460454, "loss": 9.9606, "step": 82700 }, { "epoch": 13.248, "grad_norm": 0.15291540324687958, "learning_rate": 0.0002980254650186007, "loss": 9.9284, "step": 82800 }, { "epoch": 13.264, "grad_norm": 0.15908333659172058, "learning_rate": 0.0002980230649225969, "loss": 9.5464, "step": 82900 }, { "epoch": 13.28, "grad_norm": 0.16768860816955566, "learning_rate": 0.00029802066482659305, "loss": 10.2164, "step": 83000 }, { "epoch": 13.296, "grad_norm": 0.18221326172351837, "learning_rate": 0.0002980182647305892, "loss": 9.6566, "step": 83100 }, { "epoch": 13.312, "grad_norm": 0.13944192230701447, "learning_rate": 0.00029801586463458534, "loss": 9.4149, "step": 83200 }, { "epoch": 13.328, "grad_norm": 0.20090098679065704, "learning_rate": 0.0002980134645385815, "loss": 9.1968, "step": 83300 }, { "epoch": 13.344, "grad_norm": 0.17636704444885254, "learning_rate": 0.0002980110644425777, "loss": 9.4497, "step": 83400 }, { "epoch": 13.36, "grad_norm": 0.19672048091888428, "learning_rate": 0.00029800866434657384, "loss": 9.3083, "step": 83500 }, { "epoch": 13.376, "grad_norm": 0.1991618573665619, "learning_rate": 0.00029800626425057, "loss": 9.59, "step": 83600 }, { "epoch": 13.392, "grad_norm": 0.17260773479938507, "learning_rate": 0.00029800386415456613, "loss": 9.9553, "step": 83700 }, { "epoch": 13.408, "grad_norm": 0.13101576268672943, "learning_rate": 0.0002980014640585623, "loss": 10.0732, "step": 83800 }, { "epoch": 13.424, "grad_norm": 0.16349157691001892, "learning_rate": 0.00029799906396255847, "loss": 9.8363, "step": 83900 }, { "epoch": 13.44, "grad_norm": 0.1792200654745102, "learning_rate": 0.00029799666386655464, "loss": 9.9456, "step": 84000 }, { "epoch": 13.456, "grad_norm": 0.13476693630218506, "learning_rate": 0.0002979942637705508, "loss": 9.4642, "step": 84100 }, { "epoch": 13.472, "grad_norm": 0.17343075573444366, "learning_rate": 0.000297991863674547, "loss": 9.4041, "step": 84200 }, { "epoch": 13.488, "grad_norm": 0.16127794981002808, "learning_rate": 0.0002979894635785431, "loss": 9.2465, "step": 84300 }, { "epoch": 13.504, "grad_norm": 0.14993996918201447, "learning_rate": 0.00029798706348253926, "loss": 9.5946, "step": 84400 }, { "epoch": 13.52, "grad_norm": 0.21931160986423492, "learning_rate": 0.0002979846873874955, "loss": 9.5796, "step": 84500 }, { "epoch": 13.536, "grad_norm": 0.15303994715213776, "learning_rate": 0.0002979822872914916, "loss": 9.4222, "step": 84600 }, { "epoch": 13.552, "grad_norm": 0.1905248612165451, "learning_rate": 0.0002979798871954878, "loss": 9.4192, "step": 84700 }, { "epoch": 13.568, "grad_norm": 0.17656217515468597, "learning_rate": 0.00029797748709948395, "loss": 9.685, "step": 84800 }, { "epoch": 13.584, "grad_norm": 0.31464865803718567, "learning_rate": 0.0002979750870034801, "loss": 9.4839, "step": 84900 }, { "epoch": 13.6, "grad_norm": 0.20140250027179718, "learning_rate": 0.0002979726869074763, "loss": 9.4393, "step": 85000 }, { "epoch": 13.616, "grad_norm": 0.1453031599521637, "learning_rate": 0.00029797028681147246, "loss": 9.4777, "step": 85100 }, { "epoch": 13.632, "grad_norm": 0.15559718012809753, "learning_rate": 0.0002979678867154686, "loss": 9.7772, "step": 85200 }, { "epoch": 13.648, "grad_norm": 0.16849826276302338, "learning_rate": 0.00029796548661946475, "loss": 9.0954, "step": 85300 }, { "epoch": 13.664, "grad_norm": 0.15798023343086243, "learning_rate": 0.0002979630865234609, "loss": 9.7756, "step": 85400 }, { "epoch": 13.68, "grad_norm": 0.0940115824341774, "learning_rate": 0.0002979606864274571, "loss": 9.9294, "step": 85500 }, { "epoch": 13.696, "grad_norm": 0.18608032166957855, "learning_rate": 0.00029795828633145325, "loss": 9.4524, "step": 85600 }, { "epoch": 13.712, "grad_norm": 0.16172797977924347, "learning_rate": 0.00029795588623544937, "loss": 9.6146, "step": 85700 }, { "epoch": 13.728, "grad_norm": 0.1493913233280182, "learning_rate": 0.00029795348613944554, "loss": 8.8783, "step": 85800 }, { "epoch": 13.744, "grad_norm": 0.1365765631198883, "learning_rate": 0.0002979510860434417, "loss": 9.4707, "step": 85900 }, { "epoch": 13.76, "grad_norm": 0.17814397811889648, "learning_rate": 0.0002979486859474379, "loss": 9.4121, "step": 86000 }, { "epoch": 13.776, "grad_norm": 0.16484831273555756, "learning_rate": 0.00029794628585143405, "loss": 9.0902, "step": 86100 }, { "epoch": 13.792, "grad_norm": 0.1435382217168808, "learning_rate": 0.0002979438857554302, "loss": 9.4565, "step": 86200 }, { "epoch": 13.808, "grad_norm": 0.1451929211616516, "learning_rate": 0.00029794148565942633, "loss": 9.6377, "step": 86300 }, { "epoch": 13.824, "grad_norm": 0.1439056396484375, "learning_rate": 0.0002979390855634225, "loss": 9.2624, "step": 86400 }, { "epoch": 13.84, "grad_norm": 0.1712324023246765, "learning_rate": 0.00029793668546741867, "loss": 9.2021, "step": 86500 }, { "epoch": 13.856, "grad_norm": 0.15382009744644165, "learning_rate": 0.00029793428537141484, "loss": 8.8688, "step": 86600 }, { "epoch": 13.872, "grad_norm": 0.14327426254749298, "learning_rate": 0.000297931885275411, "loss": 9.2336, "step": 86700 }, { "epoch": 13.888, "grad_norm": 0.21682417392730713, "learning_rate": 0.0002979294851794071, "loss": 8.9508, "step": 86800 }, { "epoch": 13.904, "grad_norm": 0.18012550473213196, "learning_rate": 0.0002979270850834033, "loss": 8.8259, "step": 86900 }, { "epoch": 13.92, "grad_norm": 0.19224317371845245, "learning_rate": 0.00029792468498739946, "loss": 9.0594, "step": 87000 }, { "epoch": 13.936, "grad_norm": 0.14684438705444336, "learning_rate": 0.00029792228489139563, "loss": 8.6664, "step": 87100 }, { "epoch": 13.952, "grad_norm": 0.15808767080307007, "learning_rate": 0.0002979198847953918, "loss": 8.8133, "step": 87200 }, { "epoch": 13.968, "grad_norm": 0.1466471403837204, "learning_rate": 0.00029791748469938797, "loss": 9.2512, "step": 87300 }, { "epoch": 13.984, "grad_norm": 0.13929226994514465, "learning_rate": 0.0002979150846033841, "loss": 9.0263, "step": 87400 }, { "epoch": 14.0, "grad_norm": 0.1410779356956482, "learning_rate": 0.00029791268450738025, "loss": 9.0906, "step": 87500 }, { "epoch": 14.016, "grad_norm": 0.16633394360542297, "learning_rate": 0.0002979102844113764, "loss": 8.8764, "step": 87600 }, { "epoch": 14.032, "grad_norm": 0.19240239262580872, "learning_rate": 0.0002979078843153726, "loss": 8.6873, "step": 87700 }, { "epoch": 14.048, "grad_norm": 0.2285340428352356, "learning_rate": 0.00029790548421936876, "loss": 8.7636, "step": 87800 }, { "epoch": 14.064, "grad_norm": 0.16399361193180084, "learning_rate": 0.0002979030841233649, "loss": 9.3241, "step": 87900 }, { "epoch": 14.08, "grad_norm": 0.14966578781604767, "learning_rate": 0.00029790068402736104, "loss": 9.0301, "step": 88000 }, { "epoch": 14.096, "grad_norm": 0.17241202294826508, "learning_rate": 0.0002978982839313572, "loss": 8.9678, "step": 88100 }, { "epoch": 14.112, "grad_norm": 0.13520659506320953, "learning_rate": 0.0002978958838353534, "loss": 9.0678, "step": 88200 }, { "epoch": 14.128, "grad_norm": 0.15996631979942322, "learning_rate": 0.00029789348373934955, "loss": 8.7807, "step": 88300 }, { "epoch": 14.144, "grad_norm": 0.14483466744422913, "learning_rate": 0.0002978910836433457, "loss": 8.6088, "step": 88400 }, { "epoch": 14.16, "grad_norm": 0.15150679647922516, "learning_rate": 0.00029788868354734183, "loss": 9.2128, "step": 88500 }, { "epoch": 14.176, "grad_norm": 0.1668185293674469, "learning_rate": 0.0002978863074522981, "loss": 9.518, "step": 88600 }, { "epoch": 14.192, "grad_norm": 0.17209367454051971, "learning_rate": 0.00029788393135725427, "loss": 8.5952, "step": 88700 }, { "epoch": 14.208, "grad_norm": 0.15907296538352966, "learning_rate": 0.00029788155526221045, "loss": 8.7632, "step": 88800 }, { "epoch": 14.224, "grad_norm": 0.18298570811748505, "learning_rate": 0.0002978791551662066, "loss": 8.8021, "step": 88900 }, { "epoch": 14.24, "grad_norm": 0.19813942909240723, "learning_rate": 0.0002978767550702028, "loss": 9.1381, "step": 89000 }, { "epoch": 14.256, "grad_norm": 0.1819518506526947, "learning_rate": 0.00029787435497419896, "loss": 9.3086, "step": 89100 }, { "epoch": 14.272, "grad_norm": 0.1506895273923874, "learning_rate": 0.0002978719548781951, "loss": 8.7471, "step": 89200 }, { "epoch": 14.288, "grad_norm": 0.1686287224292755, "learning_rate": 0.00029786955478219125, "loss": 8.8441, "step": 89300 }, { "epoch": 14.304, "grad_norm": 0.1486745923757553, "learning_rate": 0.0002978671546861874, "loss": 9.1216, "step": 89400 }, { "epoch": 14.32, "grad_norm": 0.18762429058551788, "learning_rate": 0.0002978647545901836, "loss": 9.402, "step": 89500 }, { "epoch": 14.336, "grad_norm": 0.13964596390724182, "learning_rate": 0.00029786235449417975, "loss": 9.2773, "step": 89600 }, { "epoch": 14.352, "grad_norm": 0.2629782557487488, "learning_rate": 0.00029785995439817587, "loss": 9.05, "step": 89700 }, { "epoch": 14.368, "grad_norm": 0.12668898701667786, "learning_rate": 0.00029785755430217204, "loss": 8.8949, "step": 89800 }, { "epoch": 14.384, "grad_norm": 0.14362965524196625, "learning_rate": 0.0002978551542061682, "loss": 8.6261, "step": 89900 }, { "epoch": 14.4, "grad_norm": 0.16700971126556396, "learning_rate": 0.0002978527541101644, "loss": 8.8621, "step": 90000 }, { "epoch": 14.416, "grad_norm": 0.1597680300474167, "learning_rate": 0.00029785035401416055, "loss": 9.1614, "step": 90100 }, { "epoch": 14.432, "grad_norm": 0.16268526017665863, "learning_rate": 0.0002978479539181567, "loss": 9.2429, "step": 90200 }, { "epoch": 14.448, "grad_norm": 0.19829140603542328, "learning_rate": 0.00029784555382215283, "loss": 8.6337, "step": 90300 }, { "epoch": 14.464, "grad_norm": 0.1362706571817398, "learning_rate": 0.000297843153726149, "loss": 8.5578, "step": 90400 }, { "epoch": 14.48, "grad_norm": 0.17475652694702148, "learning_rate": 0.00029784075363014517, "loss": 9.3407, "step": 90500 }, { "epoch": 14.496, "grad_norm": 0.139988973736763, "learning_rate": 0.00029783835353414134, "loss": 8.9509, "step": 90600 }, { "epoch": 14.512, "grad_norm": 0.15270425379276276, "learning_rate": 0.0002978359534381375, "loss": 8.6833, "step": 90700 }, { "epoch": 14.528, "grad_norm": 0.12172385305166245, "learning_rate": 0.0002978335533421336, "loss": 8.1913, "step": 90800 }, { "epoch": 14.544, "grad_norm": 0.18453091382980347, "learning_rate": 0.0002978311532461298, "loss": 9.0573, "step": 90900 }, { "epoch": 14.56, "grad_norm": 0.12650534510612488, "learning_rate": 0.00029782875315012596, "loss": 8.8951, "step": 91000 }, { "epoch": 14.576, "grad_norm": 0.19508056342601776, "learning_rate": 0.00029782635305412213, "loss": 8.8831, "step": 91100 }, { "epoch": 14.592, "grad_norm": 0.12826193869113922, "learning_rate": 0.0002978239529581183, "loss": 8.7428, "step": 91200 }, { "epoch": 14.608, "grad_norm": 0.16497032344341278, "learning_rate": 0.00029782155286211447, "loss": 9.226, "step": 91300 }, { "epoch": 14.624, "grad_norm": 0.1467789113521576, "learning_rate": 0.0002978191527661106, "loss": 8.56, "step": 91400 }, { "epoch": 14.64, "grad_norm": 0.13535846769809723, "learning_rate": 0.00029781675267010675, "loss": 9.2005, "step": 91500 }, { "epoch": 14.656, "grad_norm": 0.2261963039636612, "learning_rate": 0.000297814352574103, "loss": 8.9913, "step": 91600 }, { "epoch": 14.672, "grad_norm": 0.16329319775104523, "learning_rate": 0.0002978119524780991, "loss": 8.8455, "step": 91700 }, { "epoch": 14.688, "grad_norm": 0.14644941687583923, "learning_rate": 0.00029780955238209526, "loss": 8.8035, "step": 91800 }, { "epoch": 14.704, "grad_norm": 0.17719560861587524, "learning_rate": 0.00029780715228609143, "loss": 8.9548, "step": 91900 }, { "epoch": 14.72, "grad_norm": 0.17204242944717407, "learning_rate": 0.0002978047521900876, "loss": 8.9065, "step": 92000 }, { "epoch": 14.736, "grad_norm": 0.15323054790496826, "learning_rate": 0.00029780235209408377, "loss": 8.642, "step": 92100 }, { "epoch": 14.752, "grad_norm": 0.12264496088027954, "learning_rate": 0.00029779995199807994, "loss": 8.7372, "step": 92200 }, { "epoch": 14.768, "grad_norm": 0.13607698678970337, "learning_rate": 0.00029779755190207605, "loss": 8.649, "step": 92300 }, { "epoch": 14.784, "grad_norm": 0.1529749035835266, "learning_rate": 0.0002977951518060722, "loss": 8.6928, "step": 92400 }, { "epoch": 14.8, "grad_norm": 0.14829668402671814, "learning_rate": 0.0002977927517100684, "loss": 8.2178, "step": 92500 }, { "epoch": 14.816, "grad_norm": 0.15614420175552368, "learning_rate": 0.00029779035161406456, "loss": 8.4939, "step": 92600 }, { "epoch": 14.832, "grad_norm": 0.18708457052707672, "learning_rate": 0.00029778795151806073, "loss": 8.4044, "step": 92700 }, { "epoch": 14.848, "grad_norm": 0.1700950413942337, "learning_rate": 0.00029778555142205684, "loss": 9.142, "step": 92800 }, { "epoch": 14.864, "grad_norm": 0.17176997661590576, "learning_rate": 0.000297783151326053, "loss": 8.3459, "step": 92900 }, { "epoch": 14.88, "grad_norm": 0.17668530344963074, "learning_rate": 0.0002977807512300492, "loss": 8.4129, "step": 93000 }, { "epoch": 14.896, "grad_norm": 0.13708771765232086, "learning_rate": 0.00029777835113404535, "loss": 8.6625, "step": 93100 }, { "epoch": 14.912, "grad_norm": 0.2073189914226532, "learning_rate": 0.00029777597503900154, "loss": 8.8295, "step": 93200 }, { "epoch": 14.928, "grad_norm": 0.1584160029888153, "learning_rate": 0.0002977735749429977, "loss": 8.2892, "step": 93300 }, { "epoch": 14.943999999999999, "grad_norm": 0.13419002294540405, "learning_rate": 0.0002977711748469938, "loss": 8.6564, "step": 93400 }, { "epoch": 14.96, "grad_norm": 0.12294425070285797, "learning_rate": 0.00029776877475099, "loss": 8.6937, "step": 93500 }, { "epoch": 14.975999999999999, "grad_norm": 0.12022320926189423, "learning_rate": 0.00029776637465498616, "loss": 8.6577, "step": 93600 }, { "epoch": 14.992, "grad_norm": 0.1635560393333435, "learning_rate": 0.00029776397455898233, "loss": 8.4075, "step": 93700 }, { "epoch": 15.008, "grad_norm": 0.12280473113059998, "learning_rate": 0.0002977615744629785, "loss": 8.3065, "step": 93800 }, { "epoch": 15.024, "grad_norm": 0.14091894030570984, "learning_rate": 0.0002977591743669746, "loss": 8.3845, "step": 93900 }, { "epoch": 15.04, "grad_norm": 0.16942408680915833, "learning_rate": 0.0002977567742709708, "loss": 8.2751, "step": 94000 }, { "epoch": 15.056, "grad_norm": 0.1858222782611847, "learning_rate": 0.00029775437417496695, "loss": 8.5152, "step": 94100 }, { "epoch": 15.072, "grad_norm": 0.15426284074783325, "learning_rate": 0.0002977519740789631, "loss": 8.2321, "step": 94200 }, { "epoch": 15.088, "grad_norm": 0.13960111141204834, "learning_rate": 0.0002977495739829593, "loss": 8.4343, "step": 94300 }, { "epoch": 15.104, "grad_norm": 0.1927483230829239, "learning_rate": 0.00029774717388695546, "loss": 8.26, "step": 94400 }, { "epoch": 15.12, "grad_norm": 0.15174433588981628, "learning_rate": 0.0002977447737909516, "loss": 8.665, "step": 94500 }, { "epoch": 15.136, "grad_norm": 0.14686360955238342, "learning_rate": 0.00029774237369494774, "loss": 8.0608, "step": 94600 }, { "epoch": 15.152, "grad_norm": 0.15865716338157654, "learning_rate": 0.00029773997359894397, "loss": 8.4204, "step": 94700 }, { "epoch": 15.168, "grad_norm": 0.14913444221019745, "learning_rate": 0.0002977375735029401, "loss": 8.5544, "step": 94800 }, { "epoch": 15.184, "grad_norm": 0.12727545201778412, "learning_rate": 0.00029773517340693625, "loss": 7.9671, "step": 94900 }, { "epoch": 15.2, "grad_norm": 0.18612131476402283, "learning_rate": 0.0002977327733109324, "loss": 8.5797, "step": 95000 }, { "epoch": 15.216, "grad_norm": 0.1876545250415802, "learning_rate": 0.0002977303732149286, "loss": 8.3126, "step": 95100 }, { "epoch": 15.232, "grad_norm": 0.45961084961891174, "learning_rate": 0.00029772797311892476, "loss": 8.772, "step": 95200 }, { "epoch": 15.248, "grad_norm": 0.16763293743133545, "learning_rate": 0.00029772557302292093, "loss": 8.6089, "step": 95300 }, { "epoch": 15.264, "grad_norm": 0.17058174312114716, "learning_rate": 0.00029772317292691704, "loss": 8.5425, "step": 95400 }, { "epoch": 15.28, "grad_norm": 0.17006829380989075, "learning_rate": 0.0002977207728309132, "loss": 8.8057, "step": 95500 }, { "epoch": 15.296, "grad_norm": 0.09077399969100952, "learning_rate": 0.0002977183727349094, "loss": 8.343, "step": 95600 }, { "epoch": 15.312, "grad_norm": 0.0950964093208313, "learning_rate": 0.00029771599663986557, "loss": 8.3518, "step": 95700 }, { "epoch": 15.328, "grad_norm": 0.14622962474822998, "learning_rate": 0.00029771359654386174, "loss": 8.1654, "step": 95800 }, { "epoch": 15.344, "grad_norm": 0.16222132742404938, "learning_rate": 0.00029771119644785785, "loss": 8.6123, "step": 95900 }, { "epoch": 15.36, "grad_norm": 0.13185660541057587, "learning_rate": 0.000297708796351854, "loss": 8.6665, "step": 96000 }, { "epoch": 15.376, "grad_norm": 0.1910812258720398, "learning_rate": 0.0002977063962558502, "loss": 8.2323, "step": 96100 }, { "epoch": 15.392, "grad_norm": 0.18493321537971497, "learning_rate": 0.00029770399615984636, "loss": 8.2076, "step": 96200 }, { "epoch": 15.408, "grad_norm": 0.15737323462963104, "learning_rate": 0.00029770159606384253, "loss": 8.4031, "step": 96300 }, { "epoch": 15.424, "grad_norm": 0.1808168590068817, "learning_rate": 0.0002976991959678387, "loss": 8.0816, "step": 96400 }, { "epoch": 15.44, "grad_norm": 0.12530648708343506, "learning_rate": 0.0002976967958718348, "loss": 8.0609, "step": 96500 }, { "epoch": 15.456, "grad_norm": 0.12963543832302094, "learning_rate": 0.000297694395775831, "loss": 8.092, "step": 96600 }, { "epoch": 15.472, "grad_norm": 0.1329260617494583, "learning_rate": 0.00029769199567982715, "loss": 8.4219, "step": 96700 }, { "epoch": 15.488, "grad_norm": 0.1603865921497345, "learning_rate": 0.0002976895955838233, "loss": 7.8878, "step": 96800 }, { "epoch": 15.504, "grad_norm": 0.16902674734592438, "learning_rate": 0.0002976871954878195, "loss": 8.2197, "step": 96900 }, { "epoch": 15.52, "grad_norm": 0.15807543694972992, "learning_rate": 0.0002976847953918156, "loss": 7.937, "step": 97000 }, { "epoch": 15.536, "grad_norm": 0.15132875740528107, "learning_rate": 0.0002976823952958118, "loss": 8.6177, "step": 97100 }, { "epoch": 15.552, "grad_norm": 0.1347590982913971, "learning_rate": 0.00029767999519980795, "loss": 8.7107, "step": 97200 }, { "epoch": 15.568, "grad_norm": 0.16151072084903717, "learning_rate": 0.0002976775951038041, "loss": 8.4782, "step": 97300 }, { "epoch": 15.584, "grad_norm": 0.194889098405838, "learning_rate": 0.0002976751950078003, "loss": 8.128, "step": 97400 }, { "epoch": 15.6, "grad_norm": 0.18148979544639587, "learning_rate": 0.00029767279491179645, "loss": 8.3591, "step": 97500 }, { "epoch": 15.616, "grad_norm": 0.1610337197780609, "learning_rate": 0.00029767039481579257, "loss": 8.8492, "step": 97600 }, { "epoch": 15.632, "grad_norm": 0.15079425275325775, "learning_rate": 0.00029766799471978874, "loss": 8.2512, "step": 97700 }, { "epoch": 15.648, "grad_norm": 0.1274147629737854, "learning_rate": 0.0002976655946237849, "loss": 8.2239, "step": 97800 }, { "epoch": 15.664, "grad_norm": 0.14330662786960602, "learning_rate": 0.0002976631945277811, "loss": 8.3046, "step": 97900 }, { "epoch": 15.68, "grad_norm": 0.17394746840000153, "learning_rate": 0.00029766079443177725, "loss": 8.2542, "step": 98000 }, { "epoch": 15.696, "grad_norm": 0.15639960765838623, "learning_rate": 0.0002976583943357734, "loss": 8.3993, "step": 98100 }, { "epoch": 15.712, "grad_norm": 0.12845559418201447, "learning_rate": 0.0002976559942397696, "loss": 8.2055, "step": 98200 }, { "epoch": 15.728, "grad_norm": 0.1673252284526825, "learning_rate": 0.00029765359414376575, "loss": 8.2969, "step": 98300 }, { "epoch": 15.744, "grad_norm": 0.12345835566520691, "learning_rate": 0.0002976511940477619, "loss": 8.4381, "step": 98400 }, { "epoch": 15.76, "grad_norm": 0.19648896157741547, "learning_rate": 0.00029764879395175804, "loss": 8.0932, "step": 98500 }, { "epoch": 15.776, "grad_norm": 0.14960013329982758, "learning_rate": 0.0002976463938557542, "loss": 8.4303, "step": 98600 }, { "epoch": 15.792, "grad_norm": 0.19554351270198822, "learning_rate": 0.0002976439937597504, "loss": 8.0159, "step": 98700 }, { "epoch": 15.808, "grad_norm": 0.1545807123184204, "learning_rate": 0.00029764159366374654, "loss": 8.0277, "step": 98800 }, { "epoch": 15.824, "grad_norm": 0.11705837398767471, "learning_rate": 0.0002976391935677427, "loss": 8.2474, "step": 98900 }, { "epoch": 15.84, "grad_norm": 0.16222915053367615, "learning_rate": 0.00029763679347173883, "loss": 7.8129, "step": 99000 }, { "epoch": 15.856, "grad_norm": 0.18901053071022034, "learning_rate": 0.000297634393375735, "loss": 8.3068, "step": 99100 }, { "epoch": 15.872, "grad_norm": 0.13031688332557678, "learning_rate": 0.00029763199327973117, "loss": 8.1526, "step": 99200 }, { "epoch": 15.888, "grad_norm": 0.17539045214653015, "learning_rate": 0.00029762959318372734, "loss": 7.7545, "step": 99300 }, { "epoch": 15.904, "grad_norm": NaN, "learning_rate": 0.0002976271930877235, "loss": 7.8745, "step": 99400 }, { "epoch": 15.92, "grad_norm": 0.17992717027664185, "learning_rate": 0.0002976248169926797, "loss": 7.9663, "step": 99500 }, { "epoch": 15.936, "grad_norm": 0.40667879581451416, "learning_rate": 0.0002976224168966758, "loss": 8.1505, "step": 99600 }, { "epoch": 15.952, "grad_norm": 0.15805494785308838, "learning_rate": 0.000297620016800672, "loss": 8.4417, "step": 99700 }, { "epoch": 15.968, "grad_norm": 0.16626039147377014, "learning_rate": 0.00029761761670466815, "loss": 8.2951, "step": 99800 }, { "epoch": 15.984, "grad_norm": 0.14239948987960815, "learning_rate": 0.0002976152166086643, "loss": 8.3205, "step": 99900 }, { "epoch": 16.0, "grad_norm": 0.24553033709526062, "learning_rate": 0.0002976128165126605, "loss": 8.2056, "step": 100000 }, { "epoch": 16.016, "grad_norm": 0.18159309029579163, "learning_rate": 0.0002976104164166566, "loss": 7.9151, "step": 100100 }, { "epoch": 16.032, "grad_norm": 0.16968666017055511, "learning_rate": 0.00029760801632065277, "loss": 7.8903, "step": 100200 }, { "epoch": 16.048, "grad_norm": 0.1661410927772522, "learning_rate": 0.00029760561622464894, "loss": 8.3051, "step": 100300 }, { "epoch": 16.064, "grad_norm": 0.1526879370212555, "learning_rate": 0.0002976032161286451, "loss": 7.8435, "step": 100400 }, { "epoch": 16.08, "grad_norm": 0.14917099475860596, "learning_rate": 0.0002976008160326413, "loss": 8.0571, "step": 100500 }, { "epoch": 16.096, "grad_norm": 0.15157845616340637, "learning_rate": 0.00029759841593663745, "loss": 8.0002, "step": 100600 }, { "epoch": 16.112, "grad_norm": 0.1487221121788025, "learning_rate": 0.00029759601584063356, "loss": 7.864, "step": 100700 }, { "epoch": 16.128, "grad_norm": 0.1397908627986908, "learning_rate": 0.00029759361574462973, "loss": 8.0639, "step": 100800 }, { "epoch": 16.144, "grad_norm": 0.1495772898197174, "learning_rate": 0.0002975912156486259, "loss": 7.8346, "step": 100900 }, { "epoch": 16.16, "grad_norm": 0.17440412938594818, "learning_rate": 0.00029758881555262207, "loss": 8.1732, "step": 101000 }, { "epoch": 16.176, "grad_norm": 0.15802791714668274, "learning_rate": 0.00029758641545661824, "loss": 7.9528, "step": 101100 }, { "epoch": 16.192, "grad_norm": 0.15488143265247345, "learning_rate": 0.0002975840153606144, "loss": 7.8414, "step": 101200 }, { "epoch": 16.208, "grad_norm": 0.1365291178226471, "learning_rate": 0.0002975816152646106, "loss": 7.9363, "step": 101300 }, { "epoch": 16.224, "grad_norm": 0.13933680951595306, "learning_rate": 0.00029757921516860675, "loss": 7.5429, "step": 101400 }, { "epoch": 16.24, "grad_norm": 0.19280196726322174, "learning_rate": 0.0002975768150726029, "loss": 7.913, "step": 101500 }, { "epoch": 16.256, "grad_norm": 0.11700501292943954, "learning_rate": 0.00029757441497659903, "loss": 8.0237, "step": 101600 }, { "epoch": 16.272, "grad_norm": 0.16518530249595642, "learning_rate": 0.0002975720388815552, "loss": 7.8771, "step": 101700 }, { "epoch": 16.288, "grad_norm": 0.14215916395187378, "learning_rate": 0.0002975696387855514, "loss": 8.2513, "step": 101800 }, { "epoch": 16.304, "grad_norm": 0.15119720995426178, "learning_rate": 0.00029756723868954756, "loss": 8.0416, "step": 101900 }, { "epoch": 16.32, "grad_norm": 0.17267923057079315, "learning_rate": 0.0002975648385935437, "loss": 7.7183, "step": 102000 }, { "epoch": 16.336, "grad_norm": 0.13659106194972992, "learning_rate": 0.00029756243849753984, "loss": 7.6539, "step": 102100 }, { "epoch": 16.352, "grad_norm": 0.13859499990940094, "learning_rate": 0.000297560038401536, "loss": 7.9309, "step": 102200 }, { "epoch": 16.368, "grad_norm": 0.16713272035121918, "learning_rate": 0.0002975576383055322, "loss": 7.7884, "step": 102300 }, { "epoch": 16.384, "grad_norm": 0.19469381868839264, "learning_rate": 0.00029755523820952835, "loss": 7.6944, "step": 102400 }, { "epoch": 16.4, "grad_norm": 0.14082291722297668, "learning_rate": 0.0002975528381135245, "loss": 7.5828, "step": 102500 }, { "epoch": 16.416, "grad_norm": 0.12121783196926117, "learning_rate": 0.0002975504380175207, "loss": 7.813, "step": 102600 }, { "epoch": 16.432, "grad_norm": 0.22072196006774902, "learning_rate": 0.0002975480379215168, "loss": 8.2315, "step": 102700 }, { "epoch": 16.448, "grad_norm": 0.1469603329896927, "learning_rate": 0.00029754563782551297, "loss": 8.0137, "step": 102800 }, { "epoch": 16.464, "grad_norm": 0.11437113583087921, "learning_rate": 0.00029754323772950914, "loss": 7.3291, "step": 102900 }, { "epoch": 16.48, "grad_norm": 0.17373935878276825, "learning_rate": 0.0002975408376335053, "loss": 8.0078, "step": 103000 }, { "epoch": 16.496, "grad_norm": 0.12379905581474304, "learning_rate": 0.0002975384375375015, "loss": 8.0724, "step": 103100 }, { "epoch": 16.512, "grad_norm": 0.1540013700723648, "learning_rate": 0.00029753603744149765, "loss": 7.6953, "step": 103200 }, { "epoch": 16.528, "grad_norm": 0.21880146861076355, "learning_rate": 0.00029753363734549376, "loss": 8.0522, "step": 103300 }, { "epoch": 16.544, "grad_norm": 0.14410023391246796, "learning_rate": 0.00029753123724948993, "loss": 8.191, "step": 103400 }, { "epoch": 16.56, "grad_norm": 0.13037148118019104, "learning_rate": 0.0002975288371534861, "loss": 7.6117, "step": 103500 }, { "epoch": 16.576, "grad_norm": 0.16236849129199982, "learning_rate": 0.00029752643705748227, "loss": 7.9894, "step": 103600 }, { "epoch": 16.592, "grad_norm": 0.1502009928226471, "learning_rate": 0.00029752403696147844, "loss": 7.7302, "step": 103700 }, { "epoch": 16.608, "grad_norm": 0.18485447764396667, "learning_rate": 0.00029752163686547455, "loss": 7.8743, "step": 103800 }, { "epoch": 16.624, "grad_norm": 0.12873640656471252, "learning_rate": 0.0002975192367694707, "loss": 7.6197, "step": 103900 }, { "epoch": 16.64, "grad_norm": 0.11517874896526337, "learning_rate": 0.0002975168366734669, "loss": 7.4887, "step": 104000 }, { "epoch": 16.656, "grad_norm": 0.11515144258737564, "learning_rate": 0.00029751443657746306, "loss": 7.706, "step": 104100 }, { "epoch": 16.672, "grad_norm": 0.15465959906578064, "learning_rate": 0.00029751203648145923, "loss": 7.3052, "step": 104200 }, { "epoch": 16.688, "grad_norm": 0.12962587177753448, "learning_rate": 0.0002975096603864154, "loss": 7.8117, "step": 104300 }, { "epoch": 16.704, "grad_norm": 0.18321260809898376, "learning_rate": 0.0002975072602904116, "loss": 7.4464, "step": 104400 }, { "epoch": 16.72, "grad_norm": 0.1769808679819107, "learning_rate": 0.00029750486019440776, "loss": 7.8639, "step": 104500 }, { "epoch": 16.736, "grad_norm": 0.15869227051734924, "learning_rate": 0.00029750246009840393, "loss": 7.7956, "step": 104600 }, { "epoch": 16.752, "grad_norm": 0.12134505808353424, "learning_rate": 0.00029750006000240004, "loss": 7.5809, "step": 104700 }, { "epoch": 16.768, "grad_norm": 0.13986830413341522, "learning_rate": 0.0002974976599063962, "loss": 7.4372, "step": 104800 }, { "epoch": 16.784, "grad_norm": 0.1761140078306198, "learning_rate": 0.0002974952598103924, "loss": 7.7486, "step": 104900 }, { "epoch": 16.8, "grad_norm": 0.13163812458515167, "learning_rate": 0.00029749285971438855, "loss": 7.834, "step": 105000 }, { "epoch": 16.816, "grad_norm": 0.1813841462135315, "learning_rate": 0.0002974904596183847, "loss": 7.5974, "step": 105100 }, { "epoch": 16.832, "grad_norm": 0.15655750036239624, "learning_rate": 0.0002974880595223809, "loss": 7.4437, "step": 105200 }, { "epoch": 16.848, "grad_norm": 0.16123917698860168, "learning_rate": 0.000297485659426377, "loss": 7.347, "step": 105300 }, { "epoch": 16.864, "grad_norm": 0.18692290782928467, "learning_rate": 0.00029748325933037317, "loss": 7.8658, "step": 105400 }, { "epoch": 16.88, "grad_norm": 0.15913629531860352, "learning_rate": 0.00029748085923436934, "loss": 7.9134, "step": 105500 }, { "epoch": 16.896, "grad_norm": 0.1343807876110077, "learning_rate": 0.0002974784591383655, "loss": 7.5983, "step": 105600 }, { "epoch": 16.912, "grad_norm": 0.2009182572364807, "learning_rate": 0.0002974760590423617, "loss": 7.3442, "step": 105700 }, { "epoch": 16.928, "grad_norm": 0.1569000780582428, "learning_rate": 0.0002974736589463578, "loss": 7.5953, "step": 105800 }, { "epoch": 16.944, "grad_norm": 0.1601628214120865, "learning_rate": 0.00029747125885035396, "loss": 7.5624, "step": 105900 }, { "epoch": 16.96, "grad_norm": 0.14143775403499603, "learning_rate": 0.00029746885875435013, "loss": 7.579, "step": 106000 }, { "epoch": 16.976, "grad_norm": 0.2106146216392517, "learning_rate": 0.0002974664586583463, "loss": 7.5958, "step": 106100 }, { "epoch": 16.992, "grad_norm": 0.17329080402851105, "learning_rate": 0.00029746405856234247, "loss": 8.0935, "step": 106200 }, { "epoch": 17.008, "grad_norm": 0.19225256145000458, "learning_rate": 0.00029746165846633864, "loss": 6.8958, "step": 106300 }, { "epoch": 17.024, "grad_norm": 0.17550058662891388, "learning_rate": 0.00029745925837033476, "loss": 7.4002, "step": 106400 }, { "epoch": 17.04, "grad_norm": 0.16778625547885895, "learning_rate": 0.0002974568582743309, "loss": 7.698, "step": 106500 }, { "epoch": 17.056, "grad_norm": 0.14647962152957916, "learning_rate": 0.0002974544581783271, "loss": 7.5615, "step": 106600 }, { "epoch": 17.072, "grad_norm": 0.15024389326572418, "learning_rate": 0.00029745205808232326, "loss": 7.6671, "step": 106700 }, { "epoch": 17.088, "grad_norm": 0.11949127167463303, "learning_rate": 0.00029744965798631943, "loss": 7.6843, "step": 106800 }, { "epoch": 17.104, "grad_norm": 0.15480674803256989, "learning_rate": 0.00029744725789031555, "loss": 7.9465, "step": 106900 }, { "epoch": 17.12, "grad_norm": 0.14191922545433044, "learning_rate": 0.0002974448577943117, "loss": 7.7372, "step": 107000 }, { "epoch": 17.136, "grad_norm": 0.19336700439453125, "learning_rate": 0.0002974424576983079, "loss": 7.6904, "step": 107100 }, { "epoch": 17.152, "grad_norm": 0.17240415513515472, "learning_rate": 0.0002974400576023041, "loss": 7.4487, "step": 107200 }, { "epoch": 17.168, "grad_norm": 0.135718435049057, "learning_rate": 0.0002974376575063002, "loss": 7.5844, "step": 107300 }, { "epoch": 17.184, "grad_norm": 0.13594204187393188, "learning_rate": 0.0002974352574102964, "loss": 7.1186, "step": 107400 }, { "epoch": 17.2, "grad_norm": 0.14997251331806183, "learning_rate": 0.00029743285731429256, "loss": 7.3525, "step": 107500 }, { "epoch": 17.216, "grad_norm": 0.1264813244342804, "learning_rate": 0.00029743045721828873, "loss": 7.8519, "step": 107600 }, { "epoch": 17.232, "grad_norm": 0.16751745343208313, "learning_rate": 0.0002974280571222849, "loss": 7.346, "step": 107700 }, { "epoch": 17.248, "grad_norm": 0.196015402674675, "learning_rate": 0.000297425657026281, "loss": 7.5401, "step": 107800 }, { "epoch": 17.264, "grad_norm": 0.14854785799980164, "learning_rate": 0.0002974232569302772, "loss": 7.3802, "step": 107900 }, { "epoch": 17.28, "grad_norm": 0.1462150365114212, "learning_rate": 0.00029742085683427335, "loss": 7.56, "step": 108000 }, { "epoch": 17.296, "grad_norm": 0.18656545877456665, "learning_rate": 0.0002974184567382695, "loss": 7.4044, "step": 108100 }, { "epoch": 17.312, "grad_norm": 0.15170492231845856, "learning_rate": 0.0002974160566422657, "loss": 7.1246, "step": 108200 }, { "epoch": 17.328, "grad_norm": 0.13659091293811798, "learning_rate": 0.00029741365654626186, "loss": 7.5455, "step": 108300 }, { "epoch": 17.344, "grad_norm": 0.1527138650417328, "learning_rate": 0.000297411256450258, "loss": 7.5807, "step": 108400 }, { "epoch": 17.36, "grad_norm": 0.15352298319339752, "learning_rate": 0.00029740885635425415, "loss": 7.3586, "step": 108500 }, { "epoch": 17.376, "grad_norm": 0.16372795403003693, "learning_rate": 0.0002974065042601704, "loss": 7.5309, "step": 108600 }, { "epoch": 17.392, "grad_norm": 0.14718171954154968, "learning_rate": 0.0002974041041641665, "loss": 7.7871, "step": 108700 }, { "epoch": 17.408, "grad_norm": 0.13745012879371643, "learning_rate": 0.0002974017040681627, "loss": 7.4228, "step": 108800 }, { "epoch": 17.424, "grad_norm": 0.1310426890850067, "learning_rate": 0.00029739930397215886, "loss": 6.914, "step": 108900 }, { "epoch": 17.44, "grad_norm": 0.1291857808828354, "learning_rate": 0.00029739690387615503, "loss": 7.5163, "step": 109000 }, { "epoch": 17.456, "grad_norm": 0.1615869104862213, "learning_rate": 0.0002973945037801512, "loss": 6.9051, "step": 109100 }, { "epoch": 17.472, "grad_norm": 0.11409099400043488, "learning_rate": 0.00029739210368414737, "loss": 7.4919, "step": 109200 }, { "epoch": 17.488, "grad_norm": 0.12527474761009216, "learning_rate": 0.0002973897035881435, "loss": 7.5104, "step": 109300 }, { "epoch": 17.504, "grad_norm": 0.1936863362789154, "learning_rate": 0.00029738730349213965, "loss": 7.1046, "step": 109400 }, { "epoch": 17.52, "grad_norm": 0.12854978442192078, "learning_rate": 0.0002973849033961358, "loss": 7.4067, "step": 109500 }, { "epoch": 17.536, "grad_norm": 0.13116727769374847, "learning_rate": 0.000297382503300132, "loss": 7.2106, "step": 109600 }, { "epoch": 17.552, "grad_norm": 0.16138528287410736, "learning_rate": 0.00029738010320412816, "loss": 7.263, "step": 109700 }, { "epoch": 17.568, "grad_norm": 0.14999186992645264, "learning_rate": 0.0002973777031081243, "loss": 7.428, "step": 109800 }, { "epoch": 17.584, "grad_norm": 0.13564202189445496, "learning_rate": 0.00029737530301212045, "loss": 7.6592, "step": 109900 }, { "epoch": 17.6, "grad_norm": 0.14535826444625854, "learning_rate": 0.0002973729029161166, "loss": 7.2886, "step": 110000 }, { "epoch": 17.616, "grad_norm": 0.13466519117355347, "learning_rate": 0.0002973705028201128, "loss": 7.4852, "step": 110100 }, { "epoch": 17.632, "grad_norm": 0.1622999757528305, "learning_rate": 0.00029736810272410895, "loss": 7.6437, "step": 110200 }, { "epoch": 17.648, "grad_norm": 0.15417474508285522, "learning_rate": 0.0002973657026281051, "loss": 7.4305, "step": 110300 }, { "epoch": 17.664, "grad_norm": 0.1484052836894989, "learning_rate": 0.00029736330253210124, "loss": 7.5558, "step": 110400 }, { "epoch": 17.68, "grad_norm": 0.15688396990299225, "learning_rate": 0.0002973609024360974, "loss": 7.4349, "step": 110500 }, { "epoch": 17.696, "grad_norm": 0.15338055789470673, "learning_rate": 0.0002973585023400936, "loss": 7.2818, "step": 110600 }, { "epoch": 17.712, "grad_norm": 0.1761266142129898, "learning_rate": 0.00029735610224408974, "loss": 7.2618, "step": 110700 }, { "epoch": 17.728, "grad_norm": 0.17337530851364136, "learning_rate": 0.0002973537021480859, "loss": 7.0263, "step": 110800 }, { "epoch": 17.744, "grad_norm": 0.14693669974803925, "learning_rate": 0.00029735130205208203, "loss": 6.9075, "step": 110900 }, { "epoch": 17.76, "grad_norm": 0.14184145629405975, "learning_rate": 0.00029734892595703827, "loss": 7.1306, "step": 111000 }, { "epoch": 17.776, "grad_norm": 0.15281623601913452, "learning_rate": 0.00029734652586103444, "loss": 6.9965, "step": 111100 }, { "epoch": 17.792, "grad_norm": 0.30168259143829346, "learning_rate": 0.0002973441257650306, "loss": 7.3388, "step": 111200 }, { "epoch": 17.808, "grad_norm": 0.15365231037139893, "learning_rate": 0.0002973417256690267, "loss": 7.2799, "step": 111300 }, { "epoch": 17.824, "grad_norm": 0.1704150289297104, "learning_rate": 0.0002973393255730229, "loss": 7.3031, "step": 111400 }, { "epoch": 17.84, "grad_norm": 0.16025039553642273, "learning_rate": 0.00029733692547701906, "loss": 6.9446, "step": 111500 }, { "epoch": 17.856, "grad_norm": 0.14661014080047607, "learning_rate": 0.00029733452538101523, "loss": 7.4911, "step": 111600 }, { "epoch": 17.872, "grad_norm": 0.18997499346733093, "learning_rate": 0.0002973321252850114, "loss": 7.2489, "step": 111700 }, { "epoch": 17.888, "grad_norm": 0.16025018692016602, "learning_rate": 0.0002973297251890075, "loss": 7.4835, "step": 111800 }, { "epoch": 17.904, "grad_norm": 0.19556750357151031, "learning_rate": 0.0002973273250930037, "loss": 7.5087, "step": 111900 }, { "epoch": 17.92, "grad_norm": 0.14444762468338013, "learning_rate": 0.00029732492499699986, "loss": 7.3942, "step": 112000 }, { "epoch": 17.936, "grad_norm": 0.12939786911010742, "learning_rate": 0.000297322524900996, "loss": 7.0694, "step": 112100 }, { "epoch": 17.951999999999998, "grad_norm": 0.1845860481262207, "learning_rate": 0.0002973201248049922, "loss": 7.3517, "step": 112200 }, { "epoch": 17.968, "grad_norm": 0.1611936390399933, "learning_rate": 0.00029731772470898836, "loss": 7.3119, "step": 112300 }, { "epoch": 17.984, "grad_norm": 0.1410474330186844, "learning_rate": 0.0002973153246129845, "loss": 7.1857, "step": 112400 }, { "epoch": 18.0, "grad_norm": 0.14935807883739471, "learning_rate": 0.00029731292451698065, "loss": 7.2314, "step": 112500 }, { "epoch": 18.016, "grad_norm": 0.11792614310979843, "learning_rate": 0.0002973105244209768, "loss": 7.0182, "step": 112600 }, { "epoch": 18.032, "grad_norm": 0.19907847046852112, "learning_rate": 0.000297308124324973, "loss": 7.036, "step": 112700 }, { "epoch": 18.048, "grad_norm": 0.11814866214990616, "learning_rate": 0.00029730572422896915, "loss": 7.2484, "step": 112800 }, { "epoch": 18.064, "grad_norm": 0.16914184391498566, "learning_rate": 0.00029730332413296527, "loss": 7.0729, "step": 112900 }, { "epoch": 18.08, "grad_norm": 0.11930215358734131, "learning_rate": 0.00029730092403696144, "loss": 6.9642, "step": 113000 }, { "epoch": 18.096, "grad_norm": 0.14744411408901215, "learning_rate": 0.0002972985239409576, "loss": 7.1132, "step": 113100 }, { "epoch": 18.112, "grad_norm": 0.1400415003299713, "learning_rate": 0.0002972961238449538, "loss": 7.1415, "step": 113200 }, { "epoch": 18.128, "grad_norm": 0.1671387106180191, "learning_rate": 0.00029729374774990997, "loss": 7.2558, "step": 113300 }, { "epoch": 18.144, "grad_norm": 0.16554495692253113, "learning_rate": 0.00029729134765390613, "loss": 6.9987, "step": 113400 }, { "epoch": 18.16, "grad_norm": 0.1383550763130188, "learning_rate": 0.0002972889475579023, "loss": 7.0975, "step": 113500 }, { "epoch": 18.176, "grad_norm": 0.1566449999809265, "learning_rate": 0.0002972865474618985, "loss": 7.0562, "step": 113600 }, { "epoch": 18.192, "grad_norm": 0.19498635828495026, "learning_rate": 0.00029728414736589464, "loss": 6.6165, "step": 113700 }, { "epoch": 18.208, "grad_norm": 0.1640356481075287, "learning_rate": 0.00029728174726989076, "loss": 7.1794, "step": 113800 }, { "epoch": 18.224, "grad_norm": 0.11614058166742325, "learning_rate": 0.0002972793471738869, "loss": 7.285, "step": 113900 }, { "epoch": 18.24, "grad_norm": 0.15918317437171936, "learning_rate": 0.0002972769470778831, "loss": 7.163, "step": 114000 }, { "epoch": 18.256, "grad_norm": 0.1565544754266739, "learning_rate": 0.00029727454698187926, "loss": 7.225, "step": 114100 }, { "epoch": 18.272, "grad_norm": 0.17850929498672485, "learning_rate": 0.00029727214688587543, "loss": 6.801, "step": 114200 }, { "epoch": 18.288, "grad_norm": 0.11589377373456955, "learning_rate": 0.0002972697467898716, "loss": 6.8754, "step": 114300 }, { "epoch": 18.304, "grad_norm": 0.13528980314731598, "learning_rate": 0.0002972673466938677, "loss": 7.1785, "step": 114400 }, { "epoch": 18.32, "grad_norm": 0.14462067186832428, "learning_rate": 0.0002972649465978639, "loss": 6.7743, "step": 114500 }, { "epoch": 18.336, "grad_norm": 0.11352884024381638, "learning_rate": 0.0002972625705028201, "loss": 7.195, "step": 114600 }, { "epoch": 18.352, "grad_norm": 0.15487293899059296, "learning_rate": 0.00029726017040681624, "loss": 6.9974, "step": 114700 }, { "epoch": 18.368, "grad_norm": 0.18302305042743683, "learning_rate": 0.0002972577703108124, "loss": 7.3688, "step": 114800 }, { "epoch": 18.384, "grad_norm": 0.13732467591762543, "learning_rate": 0.00029725537021480853, "loss": 7.1072, "step": 114900 }, { "epoch": 18.4, "grad_norm": 0.16661597788333893, "learning_rate": 0.0002972529701188047, "loss": 6.9747, "step": 115000 }, { "epoch": 18.416, "grad_norm": 0.13797527551651, "learning_rate": 0.00029725057002280087, "loss": 6.9419, "step": 115100 }, { "epoch": 18.432, "grad_norm": 0.12859782576560974, "learning_rate": 0.00029724816992679704, "loss": 6.7853, "step": 115200 }, { "epoch": 18.448, "grad_norm": 0.14815713465213776, "learning_rate": 0.0002972457698307932, "loss": 7.2451, "step": 115300 }, { "epoch": 18.464, "grad_norm": 0.17937737703323364, "learning_rate": 0.0002972433697347894, "loss": 6.9378, "step": 115400 }, { "epoch": 18.48, "grad_norm": 0.1678260713815689, "learning_rate": 0.0002972409696387855, "loss": 7.324, "step": 115500 }, { "epoch": 18.496, "grad_norm": 0.1482672095298767, "learning_rate": 0.0002972385695427817, "loss": 6.7464, "step": 115600 }, { "epoch": 18.512, "grad_norm": 0.13717281818389893, "learning_rate": 0.0002972361694467779, "loss": 6.9728, "step": 115700 }, { "epoch": 18.528, "grad_norm": 0.16356568038463593, "learning_rate": 0.000297233769350774, "loss": 6.4269, "step": 115800 }, { "epoch": 18.544, "grad_norm": 0.11255384981632233, "learning_rate": 0.00029723136925477017, "loss": 6.8938, "step": 115900 }, { "epoch": 18.56, "grad_norm": 0.18403998017311096, "learning_rate": 0.00029722896915876634, "loss": 7.5852, "step": 116000 }, { "epoch": 18.576, "grad_norm": 0.16399045288562775, "learning_rate": 0.0002972265690627625, "loss": 6.8499, "step": 116100 }, { "epoch": 18.592, "grad_norm": 0.1565336287021637, "learning_rate": 0.0002972241689667587, "loss": 6.7727, "step": 116200 }, { "epoch": 18.608, "grad_norm": 0.19689014554023743, "learning_rate": 0.00029722176887075484, "loss": 7.1385, "step": 116300 }, { "epoch": 18.624, "grad_norm": 0.13252195715904236, "learning_rate": 0.00029721936877475096, "loss": 6.6291, "step": 116400 }, { "epoch": 18.64, "grad_norm": 0.12019433081150055, "learning_rate": 0.00029721696867874713, "loss": 6.8913, "step": 116500 }, { "epoch": 18.656, "grad_norm": 0.16386528313159943, "learning_rate": 0.0002972145685827433, "loss": 6.7989, "step": 116600 }, { "epoch": 18.672, "grad_norm": 0.13716477155685425, "learning_rate": 0.00029721216848673947, "loss": 6.6763, "step": 116700 }, { "epoch": 18.688, "grad_norm": 0.13785770535469055, "learning_rate": 0.00029720976839073564, "loss": 6.6476, "step": 116800 }, { "epoch": 18.704, "grad_norm": 0.1605842560529709, "learning_rate": 0.00029720736829473175, "loss": 6.6566, "step": 116900 }, { "epoch": 18.72, "grad_norm": 0.19339755177497864, "learning_rate": 0.0002972049681987279, "loss": 6.9454, "step": 117000 }, { "epoch": 18.736, "grad_norm": 0.14963068068027496, "learning_rate": 0.0002972025681027241, "loss": 7.0718, "step": 117100 }, { "epoch": 18.752, "grad_norm": 0.1378934234380722, "learning_rate": 0.00029720016800672026, "loss": 6.7582, "step": 117200 }, { "epoch": 18.768, "grad_norm": 0.1546606719493866, "learning_rate": 0.0002971977679107164, "loss": 6.9278, "step": 117300 }, { "epoch": 18.784, "grad_norm": 0.13777601718902588, "learning_rate": 0.0002971953678147126, "loss": 6.821, "step": 117400 }, { "epoch": 18.8, "grad_norm": 0.1833031326532364, "learning_rate": 0.0002971929677187087, "loss": 7.345, "step": 117500 }, { "epoch": 18.816, "grad_norm": 0.13752517104148865, "learning_rate": 0.0002971905676227049, "loss": 7.0435, "step": 117600 }, { "epoch": 18.832, "grad_norm": 0.14740273356437683, "learning_rate": 0.00029718816752670105, "loss": 7.0617, "step": 117700 }, { "epoch": 18.848, "grad_norm": 0.13207408785820007, "learning_rate": 0.0002971857674306972, "loss": 6.9374, "step": 117800 }, { "epoch": 18.864, "grad_norm": 0.14092418551445007, "learning_rate": 0.0002971833673346934, "loss": 6.5626, "step": 117900 }, { "epoch": 18.88, "grad_norm": 0.19631852209568024, "learning_rate": 0.0002971809672386895, "loss": 7.162, "step": 118000 }, { "epoch": 18.896, "grad_norm": 0.12741628289222717, "learning_rate": 0.00029717856714268567, "loss": 6.8316, "step": 118100 }, { "epoch": 18.912, "grad_norm": 0.17144246399402618, "learning_rate": 0.00029717616704668184, "loss": 6.5714, "step": 118200 }, { "epoch": 18.928, "grad_norm": 0.1456017643213272, "learning_rate": 0.000297173766950678, "loss": 7.1563, "step": 118300 }, { "epoch": 18.944, "grad_norm": 0.17816682159900665, "learning_rate": 0.0002971713668546742, "loss": 7.1767, "step": 118400 }, { "epoch": 18.96, "grad_norm": 0.274588942527771, "learning_rate": 0.00029716896675867035, "loss": 6.9244, "step": 118500 }, { "epoch": 18.976, "grad_norm": 0.14686717092990875, "learning_rate": 0.00029716656666266646, "loss": 6.9108, "step": 118600 }, { "epoch": 18.992, "grad_norm": 0.1549716740846634, "learning_rate": 0.00029716416656666263, "loss": 7.1166, "step": 118700 }, { "epoch": 19.008, "grad_norm": 0.24241045117378235, "learning_rate": 0.0002971617664706588, "loss": 6.7128, "step": 118800 }, { "epoch": 19.024, "grad_norm": 0.14365893602371216, "learning_rate": 0.00029715936637465497, "loss": 6.5973, "step": 118900 }, { "epoch": 19.04, "grad_norm": 0.1771174818277359, "learning_rate": 0.00029715696627865114, "loss": 6.8558, "step": 119000 }, { "epoch": 19.056, "grad_norm": 0.1703067272901535, "learning_rate": 0.00029715456618264726, "loss": 6.748, "step": 119100 }, { "epoch": 19.072, "grad_norm": 0.1466696858406067, "learning_rate": 0.0002971521660866434, "loss": 6.6093, "step": 119200 }, { "epoch": 19.088, "grad_norm": 0.16070063412189484, "learning_rate": 0.0002971497659906396, "loss": 6.7417, "step": 119300 }, { "epoch": 19.104, "grad_norm": 0.2056402564048767, "learning_rate": 0.00029714738989559584, "loss": 6.4175, "step": 119400 }, { "epoch": 19.12, "grad_norm": 0.207046240568161, "learning_rate": 0.00029714498979959195, "loss": 6.9465, "step": 119500 }, { "epoch": 19.136, "grad_norm": 0.12638603150844574, "learning_rate": 0.0002971425897035881, "loss": 6.882, "step": 119600 }, { "epoch": 19.152, "grad_norm": 0.17709197103977203, "learning_rate": 0.0002971401896075843, "loss": 6.5151, "step": 119700 }, { "epoch": 19.168, "grad_norm": 0.14313985407352448, "learning_rate": 0.00029713778951158046, "loss": 6.6897, "step": 119800 }, { "epoch": 19.184, "grad_norm": 0.14212185144424438, "learning_rate": 0.00029713538941557663, "loss": 7.0293, "step": 119900 }, { "epoch": 19.2, "grad_norm": 0.14830344915390015, "learning_rate": 0.00029713298931957274, "loss": 6.8398, "step": 120000 }, { "epoch": 19.216, "grad_norm": 0.24165965616703033, "learning_rate": 0.0002971305892235689, "loss": 6.715, "step": 120100 }, { "epoch": 19.232, "grad_norm": 0.13292773067951202, "learning_rate": 0.0002971281891275651, "loss": 6.8165, "step": 120200 }, { "epoch": 19.248, "grad_norm": 0.1639406383037567, "learning_rate": 0.00029712578903156125, "loss": 6.9099, "step": 120300 }, { "epoch": 19.264, "grad_norm": 0.18321408331394196, "learning_rate": 0.0002971233889355574, "loss": 6.4805, "step": 120400 }, { "epoch": 19.28, "grad_norm": 0.18382756412029266, "learning_rate": 0.0002971209888395536, "loss": 6.8172, "step": 120500 }, { "epoch": 19.296, "grad_norm": 0.15303823351860046, "learning_rate": 0.0002971185887435497, "loss": 6.2661, "step": 120600 }, { "epoch": 19.312, "grad_norm": 0.1740507036447525, "learning_rate": 0.0002971161886475459, "loss": 6.6127, "step": 120700 }, { "epoch": 19.328, "grad_norm": 0.14414259791374207, "learning_rate": 0.00029711378855154204, "loss": 6.4442, "step": 120800 }, { "epoch": 19.344, "grad_norm": 0.14647360146045685, "learning_rate": 0.0002971113884555382, "loss": 6.6076, "step": 120900 }, { "epoch": 19.36, "grad_norm": 0.15991808474063873, "learning_rate": 0.0002971089883595344, "loss": 6.787, "step": 121000 }, { "epoch": 19.376, "grad_norm": 0.1332535594701767, "learning_rate": 0.0002971065882635305, "loss": 6.7092, "step": 121100 }, { "epoch": 19.392, "grad_norm": 0.14746126532554626, "learning_rate": 0.00029710418816752667, "loss": 6.7574, "step": 121200 }, { "epoch": 19.408, "grad_norm": 0.13268060982227325, "learning_rate": 0.00029710178807152283, "loss": 6.4729, "step": 121300 }, { "epoch": 19.424, "grad_norm": 0.18852052092552185, "learning_rate": 0.000297099387975519, "loss": 6.7246, "step": 121400 }, { "epoch": 19.44, "grad_norm": 0.20590665936470032, "learning_rate": 0.00029709698787951517, "loss": 6.7032, "step": 121500 }, { "epoch": 19.456, "grad_norm": 0.18409046530723572, "learning_rate": 0.00029709458778351134, "loss": 6.9088, "step": 121600 }, { "epoch": 19.472, "grad_norm": 0.1330518126487732, "learning_rate": 0.00029709218768750746, "loss": 6.7912, "step": 121700 }, { "epoch": 19.488, "grad_norm": 0.17881762981414795, "learning_rate": 0.0002970897875915036, "loss": 6.6976, "step": 121800 }, { "epoch": 19.504, "grad_norm": 0.1952984780073166, "learning_rate": 0.0002970873874954998, "loss": 6.6684, "step": 121900 }, { "epoch": 19.52, "grad_norm": 0.10283193737268448, "learning_rate": 0.00029708498739949596, "loss": 6.8239, "step": 122000 }, { "epoch": 19.536, "grad_norm": 0.14318746328353882, "learning_rate": 0.00029708258730349213, "loss": 6.3829, "step": 122100 }, { "epoch": 19.552, "grad_norm": 0.27563196420669556, "learning_rate": 0.00029708018720748825, "loss": 6.5011, "step": 122200 }, { "epoch": 19.568, "grad_norm": 0.22338111698627472, "learning_rate": 0.0002970777871114844, "loss": 6.5485, "step": 122300 }, { "epoch": 19.584, "grad_norm": 0.12649616599082947, "learning_rate": 0.0002970753870154806, "loss": 6.7374, "step": 122400 }, { "epoch": 19.6, "grad_norm": 0.15860269963741302, "learning_rate": 0.00029707298691947676, "loss": 6.3596, "step": 122500 }, { "epoch": 19.616, "grad_norm": 0.12358345836400986, "learning_rate": 0.00029707061082443294, "loss": 6.3242, "step": 122600 }, { "epoch": 19.632, "grad_norm": 0.16506068408489227, "learning_rate": 0.0002970682107284291, "loss": 6.5935, "step": 122700 }, { "epoch": 19.648, "grad_norm": 0.19951657950878143, "learning_rate": 0.0002970658106324253, "loss": 6.4781, "step": 122800 }, { "epoch": 19.664, "grad_norm": 0.16879688203334808, "learning_rate": 0.00029706341053642145, "loss": 6.4468, "step": 122900 }, { "epoch": 19.68, "grad_norm": 0.14565648138523102, "learning_rate": 0.0002970610104404176, "loss": 6.635, "step": 123000 }, { "epoch": 19.696, "grad_norm": 0.12739145755767822, "learning_rate": 0.00029705861034441374, "loss": 6.7823, "step": 123100 }, { "epoch": 19.712, "grad_norm": 0.1428256332874298, "learning_rate": 0.0002970562102484099, "loss": 6.3011, "step": 123200 }, { "epoch": 19.728, "grad_norm": 0.1541672646999359, "learning_rate": 0.0002970538101524061, "loss": 6.93, "step": 123300 }, { "epoch": 19.744, "grad_norm": 0.14009244740009308, "learning_rate": 0.00029705141005640224, "loss": 6.4553, "step": 123400 }, { "epoch": 19.76, "grad_norm": 0.1925840973854065, "learning_rate": 0.0002970490099603984, "loss": 6.812, "step": 123500 }, { "epoch": 19.776, "grad_norm": 0.1624009907245636, "learning_rate": 0.0002970466098643946, "loss": 6.644, "step": 123600 }, { "epoch": 19.792, "grad_norm": 0.12902632355690002, "learning_rate": 0.0002970442097683907, "loss": 6.8444, "step": 123700 }, { "epoch": 19.808, "grad_norm": 0.1572074443101883, "learning_rate": 0.00029704180967238687, "loss": 6.8285, "step": 123800 }, { "epoch": 19.824, "grad_norm": 0.17196834087371826, "learning_rate": 0.00029703940957638304, "loss": 6.318, "step": 123900 }, { "epoch": 19.84, "grad_norm": 0.14329147338867188, "learning_rate": 0.0002970370094803792, "loss": 6.5197, "step": 124000 }, { "epoch": 19.856, "grad_norm": 0.12039805948734283, "learning_rate": 0.0002970346093843754, "loss": 6.3033, "step": 124100 }, { "epoch": 19.872, "grad_norm": 0.1786791980266571, "learning_rate": 0.0002970322092883715, "loss": 6.669, "step": 124200 }, { "epoch": 19.888, "grad_norm": 0.12987840175628662, "learning_rate": 0.00029702980919236766, "loss": 6.2543, "step": 124300 }, { "epoch": 19.904, "grad_norm": 0.12259730696678162, "learning_rate": 0.00029702740909636383, "loss": 6.4946, "step": 124400 }, { "epoch": 19.92, "grad_norm": 0.10069935768842697, "learning_rate": 0.00029702500900036, "loss": 6.7976, "step": 124500 }, { "epoch": 19.936, "grad_norm": 0.14555324614048004, "learning_rate": 0.00029702260890435617, "loss": 6.3994, "step": 124600 }, { "epoch": 19.951999999999998, "grad_norm": 0.15070566534996033, "learning_rate": 0.00029702020880835234, "loss": 6.3558, "step": 124700 }, { "epoch": 19.968, "grad_norm": 0.13936389982700348, "learning_rate": 0.00029701780871234845, "loss": 6.369, "step": 124800 }, { "epoch": 19.984, "grad_norm": 0.20414897799491882, "learning_rate": 0.0002970154086163446, "loss": 6.4591, "step": 124900 }, { "epoch": 20.0, "grad_norm": 0.17090056836605072, "learning_rate": 0.0002970130085203408, "loss": 6.6428, "step": 125000 }, { "epoch": 20.016, "grad_norm": 0.13628321886062622, "learning_rate": 0.00029701060842433696, "loss": 6.6142, "step": 125100 }, { "epoch": 20.032, "grad_norm": 0.1602114588022232, "learning_rate": 0.0002970082083283331, "loss": 6.2906, "step": 125200 }, { "epoch": 20.048, "grad_norm": 0.16529148817062378, "learning_rate": 0.00029700580823232924, "loss": 6.32, "step": 125300 }, { "epoch": 20.064, "grad_norm": 0.09591558575630188, "learning_rate": 0.0002970034081363254, "loss": 6.5236, "step": 125400 }, { "epoch": 20.08, "grad_norm": 0.16209086775779724, "learning_rate": 0.0002970010080403216, "loss": 6.0982, "step": 125500 }, { "epoch": 20.096, "grad_norm": 0.14823907613754272, "learning_rate": 0.00029699860794431775, "loss": 6.5177, "step": 125600 }, { "epoch": 20.112, "grad_norm": 0.14667312800884247, "learning_rate": 0.0002969962078483139, "loss": 6.2496, "step": 125700 }, { "epoch": 20.128, "grad_norm": 0.14101973176002502, "learning_rate": 0.0002969938077523101, "loss": 6.4982, "step": 125800 }, { "epoch": 20.144, "grad_norm": 0.15947328507900238, "learning_rate": 0.0002969914076563062, "loss": 6.2799, "step": 125900 }, { "epoch": 20.16, "grad_norm": 0.1501172035932541, "learning_rate": 0.00029698900756030237, "loss": 6.3317, "step": 126000 }, { "epoch": 20.176, "grad_norm": 0.15825922787189484, "learning_rate": 0.00029698660746429854, "loss": 6.2838, "step": 126100 }, { "epoch": 20.192, "grad_norm": 0.14270856976509094, "learning_rate": 0.00029698423136925473, "loss": 6.2077, "step": 126200 }, { "epoch": 20.208, "grad_norm": 0.1994931846857071, "learning_rate": 0.0002969818312732509, "loss": 6.3276, "step": 126300 }, { "epoch": 20.224, "grad_norm": 0.2308851182460785, "learning_rate": 0.00029697943117724707, "loss": 6.3211, "step": 126400 }, { "epoch": 20.24, "grad_norm": 0.21615839004516602, "learning_rate": 0.00029697703108124324, "loss": 6.2481, "step": 126500 }, { "epoch": 20.256, "grad_norm": 0.14972296357154846, "learning_rate": 0.0002969746309852394, "loss": 6.3543, "step": 126600 }, { "epoch": 20.272, "grad_norm": 0.164517343044281, "learning_rate": 0.0002969722308892356, "loss": 6.3991, "step": 126700 }, { "epoch": 20.288, "grad_norm": 0.15623216331005096, "learning_rate": 0.0002969698307932317, "loss": 6.6786, "step": 126800 }, { "epoch": 20.304, "grad_norm": 0.1451660692691803, "learning_rate": 0.00029696743069722786, "loss": 6.2966, "step": 126900 }, { "epoch": 20.32, "grad_norm": 0.17200326919555664, "learning_rate": 0.00029696503060122403, "loss": 6.4685, "step": 127000 }, { "epoch": 20.336, "grad_norm": 0.15096783638000488, "learning_rate": 0.0002969626305052202, "loss": 6.2486, "step": 127100 }, { "epoch": 20.352, "grad_norm": 0.14257729053497314, "learning_rate": 0.00029696023040921637, "loss": 6.2078, "step": 127200 }, { "epoch": 20.368, "grad_norm": 0.21399612724781036, "learning_rate": 0.0002969578303132125, "loss": 6.0766, "step": 127300 }, { "epoch": 20.384, "grad_norm": 0.11737848818302155, "learning_rate": 0.00029695543021720865, "loss": 6.3663, "step": 127400 }, { "epoch": 20.4, "grad_norm": 0.13575823605060577, "learning_rate": 0.0002969530301212048, "loss": 6.202, "step": 127500 }, { "epoch": 20.416, "grad_norm": 0.15899422764778137, "learning_rate": 0.000296950630025201, "loss": 6.0727, "step": 127600 }, { "epoch": 20.432, "grad_norm": 0.18363483250141144, "learning_rate": 0.00029694822992919716, "loss": 6.594, "step": 127700 }, { "epoch": 20.448, "grad_norm": 0.1325751096010208, "learning_rate": 0.00029694582983319333, "loss": 6.532, "step": 127800 }, { "epoch": 20.464, "grad_norm": 0.13950107991695404, "learning_rate": 0.00029694342973718944, "loss": 5.9695, "step": 127900 }, { "epoch": 20.48, "grad_norm": 0.09819541126489639, "learning_rate": 0.0002969410296411856, "loss": 6.3775, "step": 128000 }, { "epoch": 20.496, "grad_norm": 0.15788622200489044, "learning_rate": 0.0002969386295451818, "loss": 6.5626, "step": 128100 }, { "epoch": 20.512, "grad_norm": 0.1338583081960678, "learning_rate": 0.00029693622944917795, "loss": 6.3808, "step": 128200 }, { "epoch": 20.528, "grad_norm": 0.1711709052324295, "learning_rate": 0.0002969338293531741, "loss": 6.3297, "step": 128300 }, { "epoch": 20.544, "grad_norm": 0.10356644541025162, "learning_rate": 0.00029693142925717023, "loss": 6.2275, "step": 128400 }, { "epoch": 20.56, "grad_norm": 0.17266201972961426, "learning_rate": 0.0002969290291611664, "loss": 6.399, "step": 128500 }, { "epoch": 20.576, "grad_norm": 0.1582164466381073, "learning_rate": 0.0002969266290651626, "loss": 6.186, "step": 128600 }, { "epoch": 20.592, "grad_norm": 0.15661326050758362, "learning_rate": 0.00029692422896915874, "loss": 6.3988, "step": 128700 }, { "epoch": 20.608, "grad_norm": 0.12148367613554001, "learning_rate": 0.00029692185287411493, "loss": 6.4026, "step": 128800 }, { "epoch": 20.624, "grad_norm": 0.15861108899116516, "learning_rate": 0.0002969194527781111, "loss": 6.1632, "step": 128900 }, { "epoch": 20.64, "grad_norm": 0.21511606872081757, "learning_rate": 0.00029691705268210727, "loss": 6.1254, "step": 129000 }, { "epoch": 20.656, "grad_norm": 0.17380183935165405, "learning_rate": 0.00029691465258610344, "loss": 5.8979, "step": 129100 }, { "epoch": 20.672, "grad_norm": 0.15295742452144623, "learning_rate": 0.0002969122524900996, "loss": 6.1504, "step": 129200 }, { "epoch": 20.688, "grad_norm": 0.14123979210853577, "learning_rate": 0.0002969098523940957, "loss": 6.3968, "step": 129300 }, { "epoch": 20.704, "grad_norm": 0.11941767483949661, "learning_rate": 0.0002969074522980919, "loss": 6.2761, "step": 129400 }, { "epoch": 20.72, "grad_norm": 0.1716291755437851, "learning_rate": 0.00029690505220208806, "loss": 6.1725, "step": 129500 }, { "epoch": 20.736, "grad_norm": 0.10485927015542984, "learning_rate": 0.00029690265210608423, "loss": 6.3992, "step": 129600 }, { "epoch": 20.752, "grad_norm": 0.14606288075447083, "learning_rate": 0.0002969002520100804, "loss": 6.3221, "step": 129700 }, { "epoch": 20.768, "grad_norm": 0.1599857658147812, "learning_rate": 0.00029689785191407657, "loss": 6.4159, "step": 129800 }, { "epoch": 20.784, "grad_norm": 0.1607884019613266, "learning_rate": 0.0002968954518180727, "loss": 6.2899, "step": 129900 }, { "epoch": 20.8, "grad_norm": 0.17046970129013062, "learning_rate": 0.00029689305172206885, "loss": 6.195, "step": 130000 }, { "epoch": 20.816, "grad_norm": 0.17893536388874054, "learning_rate": 0.000296890651626065, "loss": 6.3987, "step": 130100 }, { "epoch": 20.832, "grad_norm": 0.15878397226333618, "learning_rate": 0.0002968882515300612, "loss": 6.8826, "step": 130200 }, { "epoch": 20.848, "grad_norm": 0.17702220380306244, "learning_rate": 0.00029688585143405736, "loss": 6.4912, "step": 130300 }, { "epoch": 20.864, "grad_norm": 0.1281166672706604, "learning_rate": 0.0002968834513380535, "loss": 6.5531, "step": 130400 }, { "epoch": 20.88, "grad_norm": 0.16799704730510712, "learning_rate": 0.00029688105124204964, "loss": 5.9929, "step": 130500 }, { "epoch": 20.896, "grad_norm": 0.1236133724451065, "learning_rate": 0.0002968786511460458, "loss": 6.0232, "step": 130600 }, { "epoch": 20.912, "grad_norm": 0.1369544267654419, "learning_rate": 0.000296876251050042, "loss": 6.5761, "step": 130700 }, { "epoch": 20.928, "grad_norm": 0.13266846537590027, "learning_rate": 0.00029687385095403815, "loss": 6.1677, "step": 130800 }, { "epoch": 20.944, "grad_norm": 0.11849372833967209, "learning_rate": 0.0002968714508580343, "loss": 6.0787, "step": 130900 }, { "epoch": 20.96, "grad_norm": 0.11395172029733658, "learning_rate": 0.00029686905076203044, "loss": 6.2634, "step": 131000 }, { "epoch": 20.976, "grad_norm": 0.11821906268596649, "learning_rate": 0.0002968666746669866, "loss": 6.388, "step": 131100 }, { "epoch": 20.992, "grad_norm": 0.12622199952602386, "learning_rate": 0.00029686427457098285, "loss": 6.0103, "step": 131200 }, { "epoch": 21.008, "grad_norm": 0.16676801443099976, "learning_rate": 0.00029686187447497896, "loss": 5.865, "step": 131300 }, { "epoch": 21.024, "grad_norm": 0.15502384305000305, "learning_rate": 0.00029685947437897513, "loss": 6.165, "step": 131400 }, { "epoch": 21.04, "grad_norm": 0.24440471827983856, "learning_rate": 0.0002968570742829713, "loss": 5.9314, "step": 131500 }, { "epoch": 21.056, "grad_norm": 0.1315223127603531, "learning_rate": 0.00029685467418696747, "loss": 6.0678, "step": 131600 }, { "epoch": 21.072, "grad_norm": 0.1865660399198532, "learning_rate": 0.00029685227409096364, "loss": 5.9805, "step": 131700 }, { "epoch": 21.088, "grad_norm": 0.2066924124956131, "learning_rate": 0.0002968498739949598, "loss": 6.1499, "step": 131800 }, { "epoch": 21.104, "grad_norm": 0.14284636080265045, "learning_rate": 0.0002968474738989559, "loss": 5.7731, "step": 131900 }, { "epoch": 21.12, "grad_norm": 0.15058225393295288, "learning_rate": 0.0002968450738029521, "loss": 6.1113, "step": 132000 }, { "epoch": 21.136, "grad_norm": 0.12619538605213165, "learning_rate": 0.00029684267370694826, "loss": 5.9437, "step": 132100 }, { "epoch": 21.152, "grad_norm": 0.15766064822673798, "learning_rate": 0.00029684027361094443, "loss": 6.2503, "step": 132200 }, { "epoch": 21.168, "grad_norm": 0.14563268423080444, "learning_rate": 0.0002968378735149406, "loss": 5.96, "step": 132300 }, { "epoch": 21.184, "grad_norm": 0.14157824218273163, "learning_rate": 0.0002968354734189367, "loss": 6.1794, "step": 132400 }, { "epoch": 21.2, "grad_norm": 0.18574143946170807, "learning_rate": 0.0002968330733229329, "loss": 6.3155, "step": 132500 }, { "epoch": 21.216, "grad_norm": 0.11855421960353851, "learning_rate": 0.00029683067322692905, "loss": 6.4108, "step": 132600 }, { "epoch": 21.232, "grad_norm": 0.12140708416700363, "learning_rate": 0.0002968282731309252, "loss": 6.0888, "step": 132700 }, { "epoch": 21.248, "grad_norm": 0.17192867398262024, "learning_rate": 0.0002968258730349214, "loss": 6.2884, "step": 132800 }, { "epoch": 21.264, "grad_norm": 0.13360394537448883, "learning_rate": 0.00029682347293891756, "loss": 6.1993, "step": 132900 }, { "epoch": 21.28, "grad_norm": 0.16163136065006256, "learning_rate": 0.0002968210968438737, "loss": 6.2262, "step": 133000 }, { "epoch": 21.296, "grad_norm": 0.12919676303863525, "learning_rate": 0.00029681869674786987, "loss": 5.8, "step": 133100 }, { "epoch": 21.312, "grad_norm": 0.1594499945640564, "learning_rate": 0.00029681629665186603, "loss": 5.8055, "step": 133200 }, { "epoch": 21.328, "grad_norm": 0.12262352555990219, "learning_rate": 0.0002968138965558622, "loss": 5.6412, "step": 133300 }, { "epoch": 21.344, "grad_norm": 0.16952601075172424, "learning_rate": 0.0002968114964598584, "loss": 6.0173, "step": 133400 }, { "epoch": 21.36, "grad_norm": 0.17378447949886322, "learning_rate": 0.0002968090963638545, "loss": 5.5105, "step": 133500 }, { "epoch": 21.376, "grad_norm": 0.12117540836334229, "learning_rate": 0.00029680669626785066, "loss": 6.5432, "step": 133600 }, { "epoch": 21.392, "grad_norm": 0.15760718286037445, "learning_rate": 0.0002968042961718468, "loss": 5.6998, "step": 133700 }, { "epoch": 21.408, "grad_norm": 0.20163291692733765, "learning_rate": 0.000296801896075843, "loss": 5.9457, "step": 133800 }, { "epoch": 21.424, "grad_norm": 0.1601804941892624, "learning_rate": 0.00029679949597983916, "loss": 5.7331, "step": 133900 }, { "epoch": 21.44, "grad_norm": 0.147283673286438, "learning_rate": 0.00029679709588383533, "loss": 6.034, "step": 134000 }, { "epoch": 21.456, "grad_norm": 0.1677253395318985, "learning_rate": 0.00029679469578783145, "loss": 6.4454, "step": 134100 }, { "epoch": 21.472, "grad_norm": 0.1402285099029541, "learning_rate": 0.0002967922956918276, "loss": 5.9842, "step": 134200 }, { "epoch": 21.488, "grad_norm": 0.185127392411232, "learning_rate": 0.00029678989559582384, "loss": 6.0976, "step": 134300 }, { "epoch": 21.504, "grad_norm": 0.17136482894420624, "learning_rate": 0.00029678749549981996, "loss": 6.3848, "step": 134400 }, { "epoch": 21.52, "grad_norm": 0.14343611896038055, "learning_rate": 0.0002967850954038161, "loss": 6.1087, "step": 134500 }, { "epoch": 21.536, "grad_norm": 0.13721515238285065, "learning_rate": 0.0002967826953078123, "loss": 6.0383, "step": 134600 }, { "epoch": 21.552, "grad_norm": 0.13419759273529053, "learning_rate": 0.00029678029521180846, "loss": 5.8767, "step": 134700 }, { "epoch": 21.568, "grad_norm": 0.18504373729228973, "learning_rate": 0.00029677789511580463, "loss": 6.0607, "step": 134800 }, { "epoch": 21.584, "grad_norm": 0.14880910515785217, "learning_rate": 0.0002967754950198008, "loss": 5.9108, "step": 134900 }, { "epoch": 21.6, "grad_norm": 0.13054971396923065, "learning_rate": 0.0002967730949237969, "loss": 6.0197, "step": 135000 }, { "epoch": 21.616, "grad_norm": 0.16096660494804382, "learning_rate": 0.0002967706948277931, "loss": 5.8114, "step": 135100 }, { "epoch": 21.632, "grad_norm": 0.16552191972732544, "learning_rate": 0.00029676829473178926, "loss": 6.2389, "step": 135200 }, { "epoch": 21.648, "grad_norm": 0.13705958425998688, "learning_rate": 0.0002967658946357854, "loss": 6.2474, "step": 135300 }, { "epoch": 21.664, "grad_norm": 0.17535176873207092, "learning_rate": 0.0002967634945397816, "loss": 6.0806, "step": 135400 }, { "epoch": 21.68, "grad_norm": 0.15185397863388062, "learning_rate": 0.0002967610944437777, "loss": 6.2673, "step": 135500 }, { "epoch": 21.696, "grad_norm": 0.1459989696741104, "learning_rate": 0.0002967586943477739, "loss": 6.1566, "step": 135600 }, { "epoch": 21.712, "grad_norm": 0.1216706857085228, "learning_rate": 0.00029675629425177005, "loss": 5.9801, "step": 135700 }, { "epoch": 21.728, "grad_norm": 0.1349131315946579, "learning_rate": 0.0002967538941557662, "loss": 5.8902, "step": 135800 }, { "epoch": 21.744, "grad_norm": 0.14793895184993744, "learning_rate": 0.0002967514940597624, "loss": 5.7143, "step": 135900 }, { "epoch": 21.76, "grad_norm": 0.171220600605011, "learning_rate": 0.00029674909396375855, "loss": 5.7715, "step": 136000 }, { "epoch": 21.776, "grad_norm": 0.18677209317684174, "learning_rate": 0.00029674669386775467, "loss": 5.9996, "step": 136100 }, { "epoch": 21.792, "grad_norm": 0.153004989027977, "learning_rate": 0.00029674429377175084, "loss": 6.1678, "step": 136200 }, { "epoch": 21.808, "grad_norm": 0.12716227769851685, "learning_rate": 0.000296741893675747, "loss": 5.8525, "step": 136300 }, { "epoch": 21.824, "grad_norm": 0.15531957149505615, "learning_rate": 0.0002967394935797432, "loss": 5.703, "step": 136400 }, { "epoch": 21.84, "grad_norm": 0.16813132166862488, "learning_rate": 0.00029673709348373935, "loss": 5.7367, "step": 136500 }, { "epoch": 21.856, "grad_norm": 0.1366407722234726, "learning_rate": 0.0002967346933877355, "loss": 6.4011, "step": 136600 }, { "epoch": 21.872, "grad_norm": 0.1486620455980301, "learning_rate": 0.00029673229329173163, "loss": 6.0592, "step": 136700 }, { "epoch": 21.888, "grad_norm": 0.1474551409482956, "learning_rate": 0.0002967298931957278, "loss": 6.1269, "step": 136800 }, { "epoch": 21.904, "grad_norm": 0.1317261904478073, "learning_rate": 0.00029672749309972397, "loss": 6.2704, "step": 136900 }, { "epoch": 21.92, "grad_norm": 0.12736591696739197, "learning_rate": 0.00029672511700468016, "loss": 5.9018, "step": 137000 }, { "epoch": 21.936, "grad_norm": 0.17512458562850952, "learning_rate": 0.0002967227169086763, "loss": 6.1423, "step": 137100 }, { "epoch": 21.951999999999998, "grad_norm": 0.2035478949546814, "learning_rate": 0.0002967203408136325, "loss": 5.8421, "step": 137200 }, { "epoch": 21.968, "grad_norm": 0.15790584683418274, "learning_rate": 0.0002967179407176287, "loss": 5.6449, "step": 137300 }, { "epoch": 21.984, "grad_norm": 0.13050822913646698, "learning_rate": 0.00029671554062162485, "loss": 6.0866, "step": 137400 }, { "epoch": 22.0, "grad_norm": 0.1332990825176239, "learning_rate": 0.00029671314052562097, "loss": 5.8362, "step": 137500 }, { "epoch": 22.016, "grad_norm": 0.14409734308719635, "learning_rate": 0.00029671074042961714, "loss": 5.7401, "step": 137600 }, { "epoch": 22.032, "grad_norm": 0.1513838768005371, "learning_rate": 0.0002967083403336133, "loss": 5.8022, "step": 137700 }, { "epoch": 22.048, "grad_norm": 0.14416912198066711, "learning_rate": 0.0002967059402376095, "loss": 5.7687, "step": 137800 }, { "epoch": 22.064, "grad_norm": 0.13069897890090942, "learning_rate": 0.00029670354014160565, "loss": 5.7314, "step": 137900 }, { "epoch": 22.08, "grad_norm": 0.15089532732963562, "learning_rate": 0.0002967011400456018, "loss": 5.6511, "step": 138000 }, { "epoch": 22.096, "grad_norm": 0.1493406444787979, "learning_rate": 0.00029669873994959793, "loss": 5.7553, "step": 138100 }, { "epoch": 22.112, "grad_norm": 0.11403771489858627, "learning_rate": 0.0002966963398535941, "loss": 5.8785, "step": 138200 }, { "epoch": 22.128, "grad_norm": 0.1418454647064209, "learning_rate": 0.00029669393975759027, "loss": 5.906, "step": 138300 }, { "epoch": 22.144, "grad_norm": 0.14632883667945862, "learning_rate": 0.00029669153966158644, "loss": 5.7911, "step": 138400 }, { "epoch": 22.16, "grad_norm": 0.18317896127700806, "learning_rate": 0.0002966891395655826, "loss": 5.6022, "step": 138500 }, { "epoch": 22.176, "grad_norm": 0.14640462398529053, "learning_rate": 0.0002966867394695788, "loss": 5.6879, "step": 138600 }, { "epoch": 22.192, "grad_norm": 0.11322261393070221, "learning_rate": 0.0002966843393735749, "loss": 5.679, "step": 138700 }, { "epoch": 22.208, "grad_norm": 0.14412596821784973, "learning_rate": 0.00029668193927757106, "loss": 5.6202, "step": 138800 }, { "epoch": 22.224, "grad_norm": 0.14023444056510925, "learning_rate": 0.00029667953918156723, "loss": 6.0133, "step": 138900 }, { "epoch": 22.24, "grad_norm": 0.18092051148414612, "learning_rate": 0.0002966771390855634, "loss": 5.6881, "step": 139000 }, { "epoch": 22.256, "grad_norm": 0.13267236948013306, "learning_rate": 0.00029667473898955957, "loss": 5.742, "step": 139100 }, { "epoch": 22.272, "grad_norm": 0.1066688597202301, "learning_rate": 0.0002966723388935557, "loss": 5.9524, "step": 139200 }, { "epoch": 22.288, "grad_norm": 0.17234094440937042, "learning_rate": 0.00029666993879755185, "loss": 6.0385, "step": 139300 }, { "epoch": 22.304, "grad_norm": 0.1593136042356491, "learning_rate": 0.000296667538701548, "loss": 5.7894, "step": 139400 }, { "epoch": 22.32, "grad_norm": 0.1161966621875763, "learning_rate": 0.0002966651386055442, "loss": 5.6333, "step": 139500 }, { "epoch": 22.336, "grad_norm": 0.16088221967220306, "learning_rate": 0.00029666273850954036, "loss": 5.3016, "step": 139600 }, { "epoch": 22.352, "grad_norm": 0.195027694106102, "learning_rate": 0.00029666033841353653, "loss": 5.8886, "step": 139700 }, { "epoch": 22.368, "grad_norm": 0.17010509967803955, "learning_rate": 0.00029665793831753264, "loss": 5.7462, "step": 139800 }, { "epoch": 22.384, "grad_norm": 0.15900500118732452, "learning_rate": 0.0002966555382215288, "loss": 6.1951, "step": 139900 }, { "epoch": 22.4, "grad_norm": 0.20321440696716309, "learning_rate": 0.000296653138125525, "loss": 5.8264, "step": 140000 }, { "epoch": 22.416, "grad_norm": 0.21823586523532867, "learning_rate": 0.00029665073802952115, "loss": 5.7779, "step": 140100 }, { "epoch": 22.432, "grad_norm": 0.12739881873130798, "learning_rate": 0.0002966483379335173, "loss": 5.6477, "step": 140200 }, { "epoch": 22.448, "grad_norm": 0.1288122534751892, "learning_rate": 0.00029664593783751344, "loss": 5.5937, "step": 140300 }, { "epoch": 22.464, "grad_norm": 0.12690824270248413, "learning_rate": 0.0002966435377415096, "loss": 6.0249, "step": 140400 }, { "epoch": 22.48, "grad_norm": 0.16361913084983826, "learning_rate": 0.00029664113764550583, "loss": 5.8957, "step": 140500 }, { "epoch": 22.496, "grad_norm": 0.13729694485664368, "learning_rate": 0.000296638737549502, "loss": 5.8405, "step": 140600 }, { "epoch": 22.512, "grad_norm": 0.19917264580726624, "learning_rate": 0.0002966363374534981, "loss": 5.9084, "step": 140700 }, { "epoch": 22.528, "grad_norm": 0.15145164728164673, "learning_rate": 0.0002966339373574943, "loss": 5.4631, "step": 140800 }, { "epoch": 22.544, "grad_norm": 0.11967241019010544, "learning_rate": 0.00029663153726149045, "loss": 5.9098, "step": 140900 }, { "epoch": 22.56, "grad_norm": 0.15000027418136597, "learning_rate": 0.0002966291371654866, "loss": 5.7238, "step": 141000 }, { "epoch": 22.576, "grad_norm": 0.16883157193660736, "learning_rate": 0.0002966267370694828, "loss": 5.738, "step": 141100 }, { "epoch": 22.592, "grad_norm": 0.13367842137813568, "learning_rate": 0.0002966243369734789, "loss": 5.5043, "step": 141200 }, { "epoch": 22.608, "grad_norm": 0.15113677084445953, "learning_rate": 0.00029662193687747507, "loss": 5.6651, "step": 141300 }, { "epoch": 22.624, "grad_norm": 0.13519582152366638, "learning_rate": 0.00029661953678147124, "loss": 5.9082, "step": 141400 }, { "epoch": 22.64, "grad_norm": 0.15879906713962555, "learning_rate": 0.0002966171366854674, "loss": 6.094, "step": 141500 }, { "epoch": 22.656, "grad_norm": 0.16288715600967407, "learning_rate": 0.0002966147365894636, "loss": 5.5707, "step": 141600 }, { "epoch": 22.672, "grad_norm": 0.14412395656108856, "learning_rate": 0.00029661233649345975, "loss": 5.6827, "step": 141700 }, { "epoch": 22.688, "grad_norm": 0.14847436547279358, "learning_rate": 0.00029660993639745586, "loss": 5.4179, "step": 141800 }, { "epoch": 22.704, "grad_norm": 0.13256803154945374, "learning_rate": 0.00029660753630145203, "loss": 5.6927, "step": 141900 }, { "epoch": 22.72, "grad_norm": 0.13526926934719086, "learning_rate": 0.0002966051362054482, "loss": 5.7505, "step": 142000 }, { "epoch": 22.736, "grad_norm": 0.2226150929927826, "learning_rate": 0.00029660273610944437, "loss": 5.6683, "step": 142100 }, { "epoch": 22.752, "grad_norm": 0.12251828610897064, "learning_rate": 0.00029660033601344054, "loss": 5.4908, "step": 142200 }, { "epoch": 22.768, "grad_norm": 0.15432491898536682, "learning_rate": 0.00029659793591743666, "loss": 5.5662, "step": 142300 }, { "epoch": 22.784, "grad_norm": 0.13890361785888672, "learning_rate": 0.0002965955358214328, "loss": 5.6202, "step": 142400 }, { "epoch": 22.8, "grad_norm": 0.10568337142467499, "learning_rate": 0.000296593135725429, "loss": 5.7232, "step": 142500 }, { "epoch": 22.816, "grad_norm": 0.14877153933048248, "learning_rate": 0.00029659073562942516, "loss": 5.4585, "step": 142600 }, { "epoch": 22.832, "grad_norm": 0.1703936904668808, "learning_rate": 0.00029658833553342133, "loss": 5.8294, "step": 142700 }, { "epoch": 22.848, "grad_norm": 0.12574242055416107, "learning_rate": 0.0002965859594383775, "loss": 6.0963, "step": 142800 }, { "epoch": 22.864, "grad_norm": 0.1556757390499115, "learning_rate": 0.00029658355934237364, "loss": 5.6681, "step": 142900 }, { "epoch": 22.88, "grad_norm": 0.14058822393417358, "learning_rate": 0.0002965811592463698, "loss": 5.6148, "step": 143000 }, { "epoch": 22.896, "grad_norm": 0.1746063232421875, "learning_rate": 0.000296578759150366, "loss": 5.698, "step": 143100 }, { "epoch": 22.912, "grad_norm": 0.14458870887756348, "learning_rate": 0.00029657635905436214, "loss": 5.439, "step": 143200 }, { "epoch": 22.928, "grad_norm": 0.1708308756351471, "learning_rate": 0.0002965739589583583, "loss": 5.8077, "step": 143300 }, { "epoch": 22.944, "grad_norm": 0.1382734328508377, "learning_rate": 0.00029657155886235443, "loss": 5.603, "step": 143400 }, { "epoch": 22.96, "grad_norm": 0.15728691220283508, "learning_rate": 0.0002965691587663506, "loss": 5.8985, "step": 143500 }, { "epoch": 22.976, "grad_norm": 0.12880076467990875, "learning_rate": 0.00029656675867034677, "loss": 5.7958, "step": 143600 }, { "epoch": 22.992, "grad_norm": 0.130670964717865, "learning_rate": 0.000296564358574343, "loss": 5.6226, "step": 143700 }, { "epoch": 23.008, "grad_norm": 0.1519329994916916, "learning_rate": 0.0002965619584783391, "loss": 5.5619, "step": 143800 }, { "epoch": 23.024, "grad_norm": 0.11900737136602402, "learning_rate": 0.0002965595583823353, "loss": 5.5148, "step": 143900 }, { "epoch": 23.04, "grad_norm": 0.13805437088012695, "learning_rate": 0.00029655715828633144, "loss": 5.1992, "step": 144000 }, { "epoch": 23.056, "grad_norm": 0.15381775796413422, "learning_rate": 0.0002965547581903276, "loss": 5.6994, "step": 144100 }, { "epoch": 23.072, "grad_norm": 0.17571000754833221, "learning_rate": 0.0002965523580943238, "loss": 5.4076, "step": 144200 }, { "epoch": 23.088, "grad_norm": 0.1299617439508438, "learning_rate": 0.0002965499579983199, "loss": 5.5817, "step": 144300 }, { "epoch": 23.104, "grad_norm": 0.1709066480398178, "learning_rate": 0.00029654755790231607, "loss": 5.6442, "step": 144400 }, { "epoch": 23.12, "grad_norm": 0.11673315614461899, "learning_rate": 0.00029654515780631224, "loss": 5.4461, "step": 144500 }, { "epoch": 23.136, "grad_norm": 0.17694547772407532, "learning_rate": 0.0002965427577103084, "loss": 5.4203, "step": 144600 }, { "epoch": 23.152, "grad_norm": 0.1397058516740799, "learning_rate": 0.0002965403576143046, "loss": 5.6535, "step": 144700 }, { "epoch": 23.168, "grad_norm": 0.14913706481456757, "learning_rate": 0.00029653795751830074, "loss": 5.327, "step": 144800 }, { "epoch": 23.184, "grad_norm": 0.0980440080165863, "learning_rate": 0.0002965355814232569, "loss": 5.6265, "step": 144900 }, { "epoch": 23.2, "grad_norm": 0.14519555866718292, "learning_rate": 0.00029653318132725305, "loss": 5.5968, "step": 145000 }, { "epoch": 23.216, "grad_norm": 0.14121969044208527, "learning_rate": 0.0002965307812312492, "loss": 5.3419, "step": 145100 }, { "epoch": 23.232, "grad_norm": 0.14867204427719116, "learning_rate": 0.0002965283811352454, "loss": 5.5432, "step": 145200 }, { "epoch": 23.248, "grad_norm": 0.14526410400867462, "learning_rate": 0.00029652598103924155, "loss": 5.4119, "step": 145300 }, { "epoch": 23.264, "grad_norm": 0.16068951785564423, "learning_rate": 0.00029652358094323767, "loss": 5.6084, "step": 145400 }, { "epoch": 23.28, "grad_norm": 0.1540200263261795, "learning_rate": 0.00029652118084723384, "loss": 5.3346, "step": 145500 }, { "epoch": 23.296, "grad_norm": 0.1306939572095871, "learning_rate": 0.00029651878075123, "loss": 5.4401, "step": 145600 }, { "epoch": 23.312, "grad_norm": 0.19503143429756165, "learning_rate": 0.0002965163806552262, "loss": 5.5145, "step": 145700 }, { "epoch": 23.328, "grad_norm": 0.16698400676250458, "learning_rate": 0.00029651398055922235, "loss": 5.4459, "step": 145800 }, { "epoch": 23.344, "grad_norm": 0.14990036189556122, "learning_rate": 0.0002965115804632185, "loss": 5.9844, "step": 145900 }, { "epoch": 23.36, "grad_norm": 0.12152257561683655, "learning_rate": 0.00029650918036721463, "loss": 5.4034, "step": 146000 }, { "epoch": 23.376, "grad_norm": 0.12588883936405182, "learning_rate": 0.0002965067802712108, "loss": 5.6587, "step": 146100 }, { "epoch": 23.392, "grad_norm": 0.13769680261611938, "learning_rate": 0.00029650438017520697, "loss": 5.6661, "step": 146200 }, { "epoch": 23.408, "grad_norm": 0.18270593881607056, "learning_rate": 0.00029650198007920314, "loss": 5.4772, "step": 146300 }, { "epoch": 23.424, "grad_norm": 0.16988155245780945, "learning_rate": 0.0002964995799831993, "loss": 5.861, "step": 146400 }, { "epoch": 23.44, "grad_norm": 0.15813444554805756, "learning_rate": 0.0002964971798871954, "loss": 5.5742, "step": 146500 }, { "epoch": 23.456, "grad_norm": 0.20319218933582306, "learning_rate": 0.0002964947797911916, "loss": 5.5046, "step": 146600 }, { "epoch": 23.472, "grad_norm": 0.1794954091310501, "learning_rate": 0.00029649237969518776, "loss": 5.4266, "step": 146700 }, { "epoch": 23.488, "grad_norm": 0.18233439326286316, "learning_rate": 0.000296489979599184, "loss": 5.7988, "step": 146800 }, { "epoch": 23.504, "grad_norm": 0.24476204812526703, "learning_rate": 0.0002964875795031801, "loss": 5.5573, "step": 146900 }, { "epoch": 23.52, "grad_norm": 0.12210160493850708, "learning_rate": 0.00029648517940717627, "loss": 5.3991, "step": 147000 }, { "epoch": 23.536, "grad_norm": 0.18380597233772278, "learning_rate": 0.00029648277931117244, "loss": 5.7061, "step": 147100 }, { "epoch": 23.552, "grad_norm": 0.14776001870632172, "learning_rate": 0.0002964803792151686, "loss": 5.6827, "step": 147200 }, { "epoch": 23.568, "grad_norm": 0.13290056586265564, "learning_rate": 0.0002964779791191648, "loss": 5.6598, "step": 147300 }, { "epoch": 23.584, "grad_norm": 0.12878666818141937, "learning_rate": 0.0002964755790231609, "loss": 5.4732, "step": 147400 }, { "epoch": 23.6, "grad_norm": 0.11875222623348236, "learning_rate": 0.00029647317892715706, "loss": 5.9345, "step": 147500 }, { "epoch": 23.616, "grad_norm": 0.1489972323179245, "learning_rate": 0.00029647077883115323, "loss": 5.5631, "step": 147600 }, { "epoch": 23.632, "grad_norm": 0.22594046592712402, "learning_rate": 0.0002964683787351494, "loss": 5.2854, "step": 147700 }, { "epoch": 23.648, "grad_norm": 0.14621250331401825, "learning_rate": 0.00029646597863914557, "loss": 5.2938, "step": 147800 }, { "epoch": 23.664, "grad_norm": 0.14641734957695007, "learning_rate": 0.00029646357854314174, "loss": 5.7265, "step": 147900 }, { "epoch": 23.68, "grad_norm": 0.14452804625034332, "learning_rate": 0.00029646117844713785, "loss": 5.3081, "step": 148000 }, { "epoch": 23.696, "grad_norm": 0.1696479767560959, "learning_rate": 0.000296458778351134, "loss": 5.7359, "step": 148100 }, { "epoch": 23.712, "grad_norm": 0.1629931777715683, "learning_rate": 0.0002964563782551302, "loss": 5.8091, "step": 148200 }, { "epoch": 23.728, "grad_norm": 0.1588413119316101, "learning_rate": 0.00029645397815912636, "loss": 5.8185, "step": 148300 }, { "epoch": 23.744, "grad_norm": 0.1528206616640091, "learning_rate": 0.00029645157806312253, "loss": 5.6945, "step": 148400 }, { "epoch": 23.76, "grad_norm": 0.16446250677108765, "learning_rate": 0.00029644917796711864, "loss": 5.1739, "step": 148500 }, { "epoch": 23.776, "grad_norm": 0.14487922191619873, "learning_rate": 0.00029644680187207483, "loss": 5.5836, "step": 148600 }, { "epoch": 23.792, "grad_norm": 0.297879159450531, "learning_rate": 0.0002964444257770311, "loss": 5.5247, "step": 148700 }, { "epoch": 23.808, "grad_norm": 0.1171737089753151, "learning_rate": 0.00029644202568102724, "loss": 5.3085, "step": 148800 }, { "epoch": 23.824, "grad_norm": 0.1464715600013733, "learning_rate": 0.00029643962558502336, "loss": 5.3029, "step": 148900 }, { "epoch": 23.84, "grad_norm": 0.16126649081707, "learning_rate": 0.0002964372254890195, "loss": 5.7273, "step": 149000 }, { "epoch": 23.856, "grad_norm": 0.10824692994356155, "learning_rate": 0.0002964348253930157, "loss": 5.3296, "step": 149100 }, { "epoch": 23.872, "grad_norm": 0.14661309123039246, "learning_rate": 0.00029643242529701187, "loss": 5.828, "step": 149200 }, { "epoch": 23.888, "grad_norm": 0.16918961703777313, "learning_rate": 0.00029643002520100803, "loss": 5.359, "step": 149300 }, { "epoch": 23.904, "grad_norm": 0.14028948545455933, "learning_rate": 0.00029642762510500415, "loss": 5.5027, "step": 149400 }, { "epoch": 23.92, "grad_norm": 0.15497733652591705, "learning_rate": 0.0002964252250090003, "loss": 5.7539, "step": 149500 }, { "epoch": 23.936, "grad_norm": 0.12349986284971237, "learning_rate": 0.0002964228249129965, "loss": 5.1582, "step": 149600 }, { "epoch": 23.951999999999998, "grad_norm": 0.1359599381685257, "learning_rate": 0.00029642042481699266, "loss": 5.4394, "step": 149700 }, { "epoch": 23.968, "grad_norm": 0.18629401922225952, "learning_rate": 0.0002964180247209888, "loss": 5.4743, "step": 149800 }, { "epoch": 23.984, "grad_norm": 0.1438770890235901, "learning_rate": 0.000296415624624985, "loss": 5.4707, "step": 149900 }, { "epoch": 24.0, "grad_norm": 0.11876608431339264, "learning_rate": 0.0002964132245289811, "loss": 5.2108, "step": 150000 }, { "epoch": 24.016, "grad_norm": 0.1379069685935974, "learning_rate": 0.0002964108244329773, "loss": 5.5858, "step": 150100 }, { "epoch": 24.032, "grad_norm": 0.15197959542274475, "learning_rate": 0.00029640842433697345, "loss": 5.3452, "step": 150200 }, { "epoch": 24.048, "grad_norm": 0.16093584895133972, "learning_rate": 0.0002964060242409696, "loss": 5.1725, "step": 150300 }, { "epoch": 24.064, "grad_norm": 0.14459937810897827, "learning_rate": 0.0002964036241449658, "loss": 5.529, "step": 150400 }, { "epoch": 24.08, "grad_norm": 0.15908825397491455, "learning_rate": 0.0002964012240489619, "loss": 5.0667, "step": 150500 }, { "epoch": 24.096, "grad_norm": 0.14320479333400726, "learning_rate": 0.00029639882395295807, "loss": 5.4541, "step": 150600 }, { "epoch": 24.112, "grad_norm": 0.1382274329662323, "learning_rate": 0.00029639642385695424, "loss": 5.4337, "step": 150700 }, { "epoch": 24.128, "grad_norm": 0.09485090523958206, "learning_rate": 0.0002963940237609504, "loss": 5.5169, "step": 150800 }, { "epoch": 24.144, "grad_norm": 0.1434488147497177, "learning_rate": 0.0002963916236649466, "loss": 5.1838, "step": 150900 }, { "epoch": 24.16, "grad_norm": 0.172550767660141, "learning_rate": 0.00029638922356894275, "loss": 5.4995, "step": 151000 }, { "epoch": 24.176, "grad_norm": 0.17296665906906128, "learning_rate": 0.00029638682347293886, "loss": 5.3814, "step": 151100 }, { "epoch": 24.192, "grad_norm": 0.13183431327342987, "learning_rate": 0.00029638442337693503, "loss": 5.4961, "step": 151200 }, { "epoch": 24.208, "grad_norm": 0.11805009096860886, "learning_rate": 0.0002963820472818913, "loss": 5.3575, "step": 151300 }, { "epoch": 24.224, "grad_norm": 0.1694483608007431, "learning_rate": 0.0002963796471858874, "loss": 5.4198, "step": 151400 }, { "epoch": 24.24, "grad_norm": 0.14694049954414368, "learning_rate": 0.00029637724708988356, "loss": 5.2369, "step": 151500 }, { "epoch": 24.256, "grad_norm": 0.14818693697452545, "learning_rate": 0.00029637484699387973, "loss": 5.5989, "step": 151600 }, { "epoch": 24.272, "grad_norm": 0.12142101675271988, "learning_rate": 0.0002963724468978759, "loss": 5.5808, "step": 151700 }, { "epoch": 24.288, "grad_norm": 0.1072693020105362, "learning_rate": 0.00029637004680187207, "loss": 5.2257, "step": 151800 }, { "epoch": 24.304, "grad_norm": 0.20452247560024261, "learning_rate": 0.00029636764670586824, "loss": 4.9512, "step": 151900 }, { "epoch": 24.32, "grad_norm": 0.13785667717456818, "learning_rate": 0.00029636524660986435, "loss": 5.3486, "step": 152000 }, { "epoch": 24.336, "grad_norm": 0.16348302364349365, "learning_rate": 0.0002963628465138605, "loss": 5.416, "step": 152100 }, { "epoch": 24.352, "grad_norm": 0.12873555719852448, "learning_rate": 0.0002963604464178567, "loss": 5.4854, "step": 152200 }, { "epoch": 24.368, "grad_norm": 0.14430370926856995, "learning_rate": 0.00029635804632185286, "loss": 5.083, "step": 152300 }, { "epoch": 24.384, "grad_norm": 0.14247077703475952, "learning_rate": 0.00029635564622584903, "loss": 5.2926, "step": 152400 }, { "epoch": 24.4, "grad_norm": 0.12942449748516083, "learning_rate": 0.00029635324612984514, "loss": 5.2287, "step": 152500 }, { "epoch": 24.416, "grad_norm": 0.1290571689605713, "learning_rate": 0.0002963508460338413, "loss": 5.1295, "step": 152600 }, { "epoch": 24.432, "grad_norm": 0.14392858743667603, "learning_rate": 0.0002963484459378375, "loss": 5.2795, "step": 152700 }, { "epoch": 24.448, "grad_norm": 0.10403969883918762, "learning_rate": 0.00029634604584183365, "loss": 5.4616, "step": 152800 }, { "epoch": 24.464, "grad_norm": 0.1357210874557495, "learning_rate": 0.0002963436457458298, "loss": 5.0671, "step": 152900 }, { "epoch": 24.48, "grad_norm": 0.162188321352005, "learning_rate": 0.000296341245649826, "loss": 5.1244, "step": 153000 }, { "epoch": 24.496, "grad_norm": 0.1423524171113968, "learning_rate": 0.0002963388455538221, "loss": 5.2658, "step": 153100 }, { "epoch": 24.512, "grad_norm": 0.15725597739219666, "learning_rate": 0.00029633644545781827, "loss": 5.4486, "step": 153200 }, { "epoch": 24.528, "grad_norm": 0.10184895247220993, "learning_rate": 0.00029633404536181444, "loss": 5.1975, "step": 153300 }, { "epoch": 24.544, "grad_norm": 0.11968593299388885, "learning_rate": 0.0002963316452658106, "loss": 5.0282, "step": 153400 }, { "epoch": 24.56, "grad_norm": 0.15125450491905212, "learning_rate": 0.0002963292451698068, "loss": 5.0548, "step": 153500 }, { "epoch": 24.576, "grad_norm": 0.1498018205165863, "learning_rate": 0.0002963268450738029, "loss": 5.2235, "step": 153600 }, { "epoch": 24.592, "grad_norm": 0.14961381256580353, "learning_rate": 0.00029632444497779906, "loss": 5.282, "step": 153700 }, { "epoch": 24.608, "grad_norm": 0.10805343836545944, "learning_rate": 0.00029632204488179523, "loss": 5.2164, "step": 153800 }, { "epoch": 24.624, "grad_norm": 0.1407497674226761, "learning_rate": 0.0002963196447857914, "loss": 5.8793, "step": 153900 }, { "epoch": 24.64, "grad_norm": 0.15589803457260132, "learning_rate": 0.00029631724468978757, "loss": 5.2803, "step": 154000 }, { "epoch": 24.656, "grad_norm": 0.15549539029598236, "learning_rate": 0.00029631484459378374, "loss": 5.5255, "step": 154100 }, { "epoch": 24.672, "grad_norm": 0.14697429537773132, "learning_rate": 0.00029631244449777986, "loss": 5.2088, "step": 154200 }, { "epoch": 24.688, "grad_norm": 0.14445632696151733, "learning_rate": 0.000296310044401776, "loss": 5.314, "step": 154300 }, { "epoch": 24.704, "grad_norm": 0.13264203071594238, "learning_rate": 0.0002963076443057722, "loss": 5.1363, "step": 154400 }, { "epoch": 24.72, "grad_norm": 0.14595112204551697, "learning_rate": 0.00029630524420976836, "loss": 5.1834, "step": 154500 }, { "epoch": 24.736, "grad_norm": 0.15063650906085968, "learning_rate": 0.00029630284411376453, "loss": 5.2409, "step": 154600 }, { "epoch": 24.752, "grad_norm": 0.1531144678592682, "learning_rate": 0.00029630044401776065, "loss": 5.3414, "step": 154700 }, { "epoch": 24.768, "grad_norm": 0.15418265759944916, "learning_rate": 0.0002962980439217568, "loss": 5.3579, "step": 154800 }, { "epoch": 24.784, "grad_norm": 0.13664741814136505, "learning_rate": 0.000296295643825753, "loss": 5.4855, "step": 154900 }, { "epoch": 24.8, "grad_norm": 0.15261198580265045, "learning_rate": 0.00029629324372974916, "loss": 5.5078, "step": 155000 }, { "epoch": 24.816, "grad_norm": 0.1436208039522171, "learning_rate": 0.0002962908436337453, "loss": 5.2359, "step": 155100 }, { "epoch": 24.832, "grad_norm": 0.1557721495628357, "learning_rate": 0.0002962884435377415, "loss": 5.1472, "step": 155200 }, { "epoch": 24.848, "grad_norm": 0.1639142483472824, "learning_rate": 0.0002962860434417376, "loss": 5.1701, "step": 155300 }, { "epoch": 24.864, "grad_norm": 0.1857120245695114, "learning_rate": 0.0002962836433457338, "loss": 5.3149, "step": 155400 }, { "epoch": 24.88, "grad_norm": 0.1384589672088623, "learning_rate": 0.00029628124324972995, "loss": 5.1655, "step": 155500 }, { "epoch": 24.896, "grad_norm": 0.16934780776500702, "learning_rate": 0.0002962788431537261, "loss": 5.0212, "step": 155600 }, { "epoch": 24.912, "grad_norm": 0.14011263847351074, "learning_rate": 0.0002962764430577223, "loss": 5.3506, "step": 155700 }, { "epoch": 24.928, "grad_norm": 0.12232084572315216, "learning_rate": 0.0002962740429617184, "loss": 4.9836, "step": 155800 }, { "epoch": 24.944, "grad_norm": 0.1219339519739151, "learning_rate": 0.00029627164286571457, "loss": 5.337, "step": 155900 }, { "epoch": 24.96, "grad_norm": 0.13951101899147034, "learning_rate": 0.0002962692667706708, "loss": 5.6947, "step": 156000 }, { "epoch": 24.976, "grad_norm": 0.15717874467372894, "learning_rate": 0.000296266866674667, "loss": 5.0598, "step": 156100 }, { "epoch": 24.992, "grad_norm": 0.16753438115119934, "learning_rate": 0.0002962644665786631, "loss": 5.1918, "step": 156200 }, { "epoch": 25.008, "grad_norm": 0.11955256760120392, "learning_rate": 0.00029626206648265927, "loss": 5.1705, "step": 156300 }, { "epoch": 25.024, "grad_norm": 0.11964499950408936, "learning_rate": 0.00029625966638665544, "loss": 5.3443, "step": 156400 }, { "epoch": 25.04, "grad_norm": 0.123370461165905, "learning_rate": 0.0002962572662906516, "loss": 4.9845, "step": 156500 }, { "epoch": 25.056, "grad_norm": 0.12556427717208862, "learning_rate": 0.0002962548661946478, "loss": 4.9369, "step": 156600 }, { "epoch": 25.072, "grad_norm": 0.15033285319805145, "learning_rate": 0.0002962524660986439, "loss": 5.1891, "step": 156700 }, { "epoch": 25.088, "grad_norm": 0.157626673579216, "learning_rate": 0.00029625006600264006, "loss": 5.0871, "step": 156800 }, { "epoch": 25.104, "grad_norm": 0.12489177286624908, "learning_rate": 0.0002962476659066362, "loss": 4.9887, "step": 156900 }, { "epoch": 25.12, "grad_norm": 0.17784586548805237, "learning_rate": 0.0002962452658106324, "loss": 4.9263, "step": 157000 }, { "epoch": 25.136, "grad_norm": 0.26584434509277344, "learning_rate": 0.00029624286571462857, "loss": 5.1268, "step": 157100 }, { "epoch": 25.152, "grad_norm": 0.14168865978717804, "learning_rate": 0.00029624046561862473, "loss": 5.4578, "step": 157200 }, { "epoch": 25.168, "grad_norm": 0.1289631426334381, "learning_rate": 0.00029623806552262085, "loss": 5.2466, "step": 157300 }, { "epoch": 25.184, "grad_norm": 0.12273957580327988, "learning_rate": 0.000296235665426617, "loss": 4.7845, "step": 157400 }, { "epoch": 25.2, "grad_norm": 0.24651670455932617, "learning_rate": 0.0002962332653306132, "loss": 5.0988, "step": 157500 }, { "epoch": 25.216, "grad_norm": 0.1415649801492691, "learning_rate": 0.00029623086523460936, "loss": 5.0998, "step": 157600 }, { "epoch": 25.232, "grad_norm": 0.1132798045873642, "learning_rate": 0.0002962284651386055, "loss": 5.2229, "step": 157700 }, { "epoch": 25.248, "grad_norm": 0.10961470752954483, "learning_rate": 0.00029622606504260164, "loss": 4.9959, "step": 157800 }, { "epoch": 25.264, "grad_norm": 0.16054928302764893, "learning_rate": 0.0002962236649465978, "loss": 4.989, "step": 157900 }, { "epoch": 25.28, "grad_norm": 0.16918180882930756, "learning_rate": 0.000296221264850594, "loss": 5.0824, "step": 158000 }, { "epoch": 25.296, "grad_norm": 0.12880262732505798, "learning_rate": 0.00029621886475459015, "loss": 4.6069, "step": 158100 }, { "epoch": 25.312, "grad_norm": 0.16930246353149414, "learning_rate": 0.0002962164646585863, "loss": 5.0421, "step": 158200 }, { "epoch": 25.328, "grad_norm": 0.15791450440883636, "learning_rate": 0.0002962140645625825, "loss": 5.1324, "step": 158300 }, { "epoch": 25.344, "grad_norm": 0.12896622717380524, "learning_rate": 0.0002962116644665786, "loss": 4.8697, "step": 158400 }, { "epoch": 25.36, "grad_norm": 0.15522588789463043, "learning_rate": 0.00029620926437057477, "loss": 5.112, "step": 158500 }, { "epoch": 25.376, "grad_norm": 0.15994909405708313, "learning_rate": 0.00029620686427457094, "loss": 5.1186, "step": 158600 }, { "epoch": 25.392, "grad_norm": 0.16203735768795013, "learning_rate": 0.0002962044641785671, "loss": 5.2136, "step": 158700 }, { "epoch": 25.408, "grad_norm": 0.14830628037452698, "learning_rate": 0.0002962020640825633, "loss": 4.8028, "step": 158800 }, { "epoch": 25.424, "grad_norm": 0.17855019867420197, "learning_rate": 0.00029619966398655945, "loss": 5.2293, "step": 158900 }, { "epoch": 25.44, "grad_norm": 0.13485394418239594, "learning_rate": 0.00029619728789151564, "loss": 5.1688, "step": 159000 }, { "epoch": 25.456, "grad_norm": 0.15001603960990906, "learning_rate": 0.0002961948877955118, "loss": 5.2429, "step": 159100 }, { "epoch": 25.472, "grad_norm": 0.15747343003749847, "learning_rate": 0.000296192487699508, "loss": 5.0648, "step": 159200 }, { "epoch": 25.488, "grad_norm": 0.11709601432085037, "learning_rate": 0.0002961900876035041, "loss": 4.9424, "step": 159300 }, { "epoch": 25.504, "grad_norm": 0.14115624129772186, "learning_rate": 0.00029618768750750026, "loss": 5.2824, "step": 159400 }, { "epoch": 25.52, "grad_norm": 0.13271014392375946, "learning_rate": 0.00029618528741149643, "loss": 5.2082, "step": 159500 }, { "epoch": 25.536, "grad_norm": 0.13927429914474487, "learning_rate": 0.0002961828873154926, "loss": 5.0302, "step": 159600 }, { "epoch": 25.552, "grad_norm": 0.1625901609659195, "learning_rate": 0.00029618048721948877, "loss": 5.2649, "step": 159700 }, { "epoch": 25.568, "grad_norm": 0.1242537572979927, "learning_rate": 0.0002961780871234849, "loss": 5.3638, "step": 159800 }, { "epoch": 25.584, "grad_norm": 0.22442211210727692, "learning_rate": 0.00029617568702748105, "loss": 4.7374, "step": 159900 }, { "epoch": 25.6, "grad_norm": 0.1424286961555481, "learning_rate": 0.0002961732869314772, "loss": 5.0878, "step": 160000 }, { "epoch": 25.616, "grad_norm": 0.16174399852752686, "learning_rate": 0.0002961708868354734, "loss": 5.4059, "step": 160100 }, { "epoch": 25.632, "grad_norm": 0.12529495358467102, "learning_rate": 0.00029616848673946956, "loss": 5.1528, "step": 160200 }, { "epoch": 25.648, "grad_norm": 0.14766289293766022, "learning_rate": 0.00029616608664346573, "loss": 5.2453, "step": 160300 }, { "epoch": 25.664, "grad_norm": 0.12722782790660858, "learning_rate": 0.00029616368654746184, "loss": 5.1237, "step": 160400 }, { "epoch": 25.68, "grad_norm": 0.1653498262166977, "learning_rate": 0.000296161286451458, "loss": 5.2606, "step": 160500 }, { "epoch": 25.696, "grad_norm": 0.15743720531463623, "learning_rate": 0.0002961588863554542, "loss": 5.3842, "step": 160600 }, { "epoch": 25.712, "grad_norm": 0.11641506105661392, "learning_rate": 0.00029615648625945035, "loss": 5.0112, "step": 160700 }, { "epoch": 25.728, "grad_norm": 0.1600313037633896, "learning_rate": 0.0002961540861634465, "loss": 5.1207, "step": 160800 }, { "epoch": 25.744, "grad_norm": 0.1792784333229065, "learning_rate": 0.0002961516860674427, "loss": 5.1801, "step": 160900 }, { "epoch": 25.76, "grad_norm": 0.12263203412294388, "learning_rate": 0.0002961492859714388, "loss": 5.1875, "step": 161000 }, { "epoch": 25.776, "grad_norm": 0.1638142168521881, "learning_rate": 0.00029614690987639505, "loss": 5.5503, "step": 161100 }, { "epoch": 25.792, "grad_norm": 0.12107832729816437, "learning_rate": 0.0002961445097803912, "loss": 5.312, "step": 161200 }, { "epoch": 25.808, "grad_norm": 0.1593557745218277, "learning_rate": 0.00029614210968438733, "loss": 5.0444, "step": 161300 }, { "epoch": 25.824, "grad_norm": 0.14629554748535156, "learning_rate": 0.0002961397095883835, "loss": 5.2007, "step": 161400 }, { "epoch": 25.84, "grad_norm": 0.14022816717624664, "learning_rate": 0.00029613730949237967, "loss": 5.1234, "step": 161500 }, { "epoch": 25.856, "grad_norm": 0.15026092529296875, "learning_rate": 0.00029613490939637584, "loss": 5.1459, "step": 161600 }, { "epoch": 25.872, "grad_norm": 0.16642487049102783, "learning_rate": 0.000296132509300372, "loss": 5.074, "step": 161700 }, { "epoch": 25.888, "grad_norm": 0.16100358963012695, "learning_rate": 0.0002961301092043681, "loss": 4.8445, "step": 161800 }, { "epoch": 25.904, "grad_norm": 0.14411258697509766, "learning_rate": 0.0002961277091083643, "loss": 4.7157, "step": 161900 }, { "epoch": 25.92, "grad_norm": 0.10813727974891663, "learning_rate": 0.00029612530901236046, "loss": 5.0682, "step": 162000 }, { "epoch": 25.936, "grad_norm": 0.14450779557228088, "learning_rate": 0.00029612290891635663, "loss": 5.241, "step": 162100 }, { "epoch": 25.951999999999998, "grad_norm": 0.16171583533287048, "learning_rate": 0.0002961205088203528, "loss": 5.1133, "step": 162200 }, { "epoch": 25.968, "grad_norm": 0.12712721526622772, "learning_rate": 0.00029611810872434897, "loss": 5.0328, "step": 162300 }, { "epoch": 25.984, "grad_norm": 0.12672489881515503, "learning_rate": 0.0002961157086283451, "loss": 4.8169, "step": 162400 }, { "epoch": 26.0, "grad_norm": 0.15172095596790314, "learning_rate": 0.00029611330853234125, "loss": 5.092, "step": 162500 }, { "epoch": 26.016, "grad_norm": 0.18036304414272308, "learning_rate": 0.0002961109084363374, "loss": 4.7511, "step": 162600 }, { "epoch": 26.032, "grad_norm": 0.16676302254199982, "learning_rate": 0.0002961085083403336, "loss": 4.9628, "step": 162700 }, { "epoch": 26.048, "grad_norm": 0.1724889576435089, "learning_rate": 0.00029610610824432976, "loss": 4.8742, "step": 162800 }, { "epoch": 26.064, "grad_norm": 0.1280188113451004, "learning_rate": 0.00029610370814832593, "loss": 5.3059, "step": 162900 }, { "epoch": 26.08, "grad_norm": 0.15785780549049377, "learning_rate": 0.00029610130805232204, "loss": 4.8671, "step": 163000 }, { "epoch": 26.096, "grad_norm": 0.14080898463726044, "learning_rate": 0.0002960989079563182, "loss": 5.1418, "step": 163100 }, { "epoch": 26.112, "grad_norm": 0.13095679879188538, "learning_rate": 0.0002960965078603144, "loss": 4.7194, "step": 163200 }, { "epoch": 26.128, "grad_norm": 0.1574213057756424, "learning_rate": 0.00029609410776431055, "loss": 4.9184, "step": 163300 }, { "epoch": 26.144, "grad_norm": 0.13669663667678833, "learning_rate": 0.0002960917076683067, "loss": 5.0563, "step": 163400 }, { "epoch": 26.16, "grad_norm": 0.15946930646896362, "learning_rate": 0.00029608930757230284, "loss": 4.7656, "step": 163500 }, { "epoch": 26.176, "grad_norm": 0.1457744687795639, "learning_rate": 0.000296086907476299, "loss": 4.894, "step": 163600 }, { "epoch": 26.192, "grad_norm": 0.10747674852609634, "learning_rate": 0.0002960845073802952, "loss": 5.1462, "step": 163700 }, { "epoch": 26.208, "grad_norm": 0.22094644606113434, "learning_rate": 0.00029608210728429134, "loss": 5.3243, "step": 163800 }, { "epoch": 26.224, "grad_norm": 0.12370151281356812, "learning_rate": 0.0002960797071882875, "loss": 4.8294, "step": 163900 }, { "epoch": 26.24, "grad_norm": 0.1479647010564804, "learning_rate": 0.0002960773070922837, "loss": 5.0416, "step": 164000 }, { "epoch": 26.256, "grad_norm": 0.15605013072490692, "learning_rate": 0.0002960749069962798, "loss": 5.2773, "step": 164100 }, { "epoch": 26.272, "grad_norm": 0.1911146342754364, "learning_rate": 0.00029607250690027597, "loss": 4.939, "step": 164200 }, { "epoch": 26.288, "grad_norm": 0.12012562155723572, "learning_rate": 0.0002960701308052322, "loss": 4.8719, "step": 164300 }, { "epoch": 26.304, "grad_norm": 0.12493129819631577, "learning_rate": 0.0002960677307092283, "loss": 4.7802, "step": 164400 }, { "epoch": 26.32, "grad_norm": 0.12632489204406738, "learning_rate": 0.0002960653306132245, "loss": 4.8725, "step": 164500 }, { "epoch": 26.336, "grad_norm": 0.15591692924499512, "learning_rate": 0.00029606293051722066, "loss": 5.2183, "step": 164600 }, { "epoch": 26.352, "grad_norm": 0.12113320082426071, "learning_rate": 0.00029606053042121683, "loss": 4.981, "step": 164700 }, { "epoch": 26.368, "grad_norm": 0.12973067164421082, "learning_rate": 0.000296058130325213, "loss": 5.1433, "step": 164800 }, { "epoch": 26.384, "grad_norm": 0.15297859907150269, "learning_rate": 0.00029605573022920917, "loss": 4.9628, "step": 164900 }, { "epoch": 26.4, "grad_norm": 0.13537169992923737, "learning_rate": 0.0002960533301332053, "loss": 4.6621, "step": 165000 }, { "epoch": 26.416, "grad_norm": 0.12161804735660553, "learning_rate": 0.00029605093003720145, "loss": 4.9027, "step": 165100 }, { "epoch": 26.432, "grad_norm": 0.14561276137828827, "learning_rate": 0.0002960485299411976, "loss": 4.7497, "step": 165200 }, { "epoch": 26.448, "grad_norm": 0.1523263305425644, "learning_rate": 0.0002960461298451938, "loss": 4.7575, "step": 165300 }, { "epoch": 26.464, "grad_norm": 0.13894937932491302, "learning_rate": 0.00029604372974918996, "loss": 5.1487, "step": 165400 }, { "epoch": 26.48, "grad_norm": 0.1122347041964531, "learning_rate": 0.0002960413296531861, "loss": 4.8517, "step": 165500 }, { "epoch": 26.496, "grad_norm": 0.12737123668193817, "learning_rate": 0.00029603892955718225, "loss": 4.8187, "step": 165600 }, { "epoch": 26.512, "grad_norm": 0.1302328109741211, "learning_rate": 0.0002960365294611784, "loss": 4.6812, "step": 165700 }, { "epoch": 26.528, "grad_norm": 0.14844807982444763, "learning_rate": 0.0002960341293651746, "loss": 4.9271, "step": 165800 }, { "epoch": 26.544, "grad_norm": 0.17675945162773132, "learning_rate": 0.00029603172926917075, "loss": 4.7797, "step": 165900 }, { "epoch": 26.56, "grad_norm": 0.18416370451450348, "learning_rate": 0.0002960293291731669, "loss": 5.1626, "step": 166000 }, { "epoch": 26.576, "grad_norm": 0.12005133926868439, "learning_rate": 0.00029602692907716304, "loss": 4.7074, "step": 166100 }, { "epoch": 26.592, "grad_norm": 0.185636967420578, "learning_rate": 0.0002960245289811592, "loss": 5.175, "step": 166200 }, { "epoch": 26.608, "grad_norm": 0.11722932010889053, "learning_rate": 0.0002960221288851554, "loss": 4.9977, "step": 166300 }, { "epoch": 26.624, "grad_norm": 0.13763803243637085, "learning_rate": 0.00029601972878915154, "loss": 4.732, "step": 166400 }, { "epoch": 26.64, "grad_norm": 0.13912682235240936, "learning_rate": 0.0002960173286931477, "loss": 4.877, "step": 166500 }, { "epoch": 26.656, "grad_norm": 0.10087449103593826, "learning_rate": 0.00029601492859714383, "loss": 4.7994, "step": 166600 }, { "epoch": 26.672, "grad_norm": 0.1845891773700714, "learning_rate": 0.00029601252850114, "loss": 5.4515, "step": 166700 }, { "epoch": 26.688, "grad_norm": 0.14900504052639008, "learning_rate": 0.00029601012840513617, "loss": 5.0709, "step": 166800 }, { "epoch": 26.704, "grad_norm": 0.19447046518325806, "learning_rate": 0.00029600772830913234, "loss": 4.8345, "step": 166900 }, { "epoch": 26.72, "grad_norm": 0.15507912635803223, "learning_rate": 0.0002960053282131285, "loss": 4.909, "step": 167000 }, { "epoch": 26.736, "grad_norm": 0.12142092734575272, "learning_rate": 0.0002960029281171247, "loss": 4.8017, "step": 167100 }, { "epoch": 26.752, "grad_norm": 0.12530605494976044, "learning_rate": 0.0002960005280211208, "loss": 5.1347, "step": 167200 }, { "epoch": 26.768, "grad_norm": 0.14327798783779144, "learning_rate": 0.00029599812792511696, "loss": 4.7235, "step": 167300 }, { "epoch": 26.784, "grad_norm": 0.14647874236106873, "learning_rate": 0.00029599572782911313, "loss": 4.9018, "step": 167400 }, { "epoch": 26.8, "grad_norm": 0.13197900354862213, "learning_rate": 0.0002959933277331093, "loss": 5.1885, "step": 167500 }, { "epoch": 26.816, "grad_norm": 0.13953787088394165, "learning_rate": 0.00029599092763710547, "loss": 4.8121, "step": 167600 }, { "epoch": 26.832, "grad_norm": 0.16823934018611908, "learning_rate": 0.0002959885275411016, "loss": 4.7129, "step": 167700 }, { "epoch": 26.848, "grad_norm": 0.1557362824678421, "learning_rate": 0.00029598612744509775, "loss": 5.2257, "step": 167800 }, { "epoch": 26.864, "grad_norm": 0.16123229265213013, "learning_rate": 0.000295983751350054, "loss": 4.8921, "step": 167900 }, { "epoch": 26.88, "grad_norm": 0.1613980084657669, "learning_rate": 0.00029598135125405016, "loss": 5.0361, "step": 168000 }, { "epoch": 26.896, "grad_norm": 0.1302555948495865, "learning_rate": 0.0002959789511580463, "loss": 5.0077, "step": 168100 }, { "epoch": 26.912, "grad_norm": 0.15182837843894958, "learning_rate": 0.00029597655106204245, "loss": 5.0202, "step": 168200 }, { "epoch": 26.928, "grad_norm": 0.13955193758010864, "learning_rate": 0.0002959741509660386, "loss": 4.9305, "step": 168300 }, { "epoch": 26.944, "grad_norm": 0.1417885273694992, "learning_rate": 0.0002959717508700348, "loss": 5.0889, "step": 168400 }, { "epoch": 26.96, "grad_norm": 0.14792856574058533, "learning_rate": 0.00029596935077403095, "loss": 4.8685, "step": 168500 }, { "epoch": 26.976, "grad_norm": 0.14266085624694824, "learning_rate": 0.00029596695067802707, "loss": 5.1578, "step": 168600 }, { "epoch": 26.992, "grad_norm": 0.11925966292619705, "learning_rate": 0.00029596455058202324, "loss": 4.6746, "step": 168700 }, { "epoch": 27.008, "grad_norm": 0.13332228362560272, "learning_rate": 0.0002959621504860194, "loss": 5.1295, "step": 168800 }, { "epoch": 27.024, "grad_norm": 0.13257551193237305, "learning_rate": 0.0002959597503900156, "loss": 5.0958, "step": 168900 }, { "epoch": 27.04, "grad_norm": 0.11077175289392471, "learning_rate": 0.00029595735029401175, "loss": 4.6509, "step": 169000 }, { "epoch": 27.056, "grad_norm": 0.1581268608570099, "learning_rate": 0.0002959549501980079, "loss": 4.7619, "step": 169100 }, { "epoch": 27.072, "grad_norm": 0.15108828246593475, "learning_rate": 0.00029595255010200403, "loss": 4.7792, "step": 169200 }, { "epoch": 27.088, "grad_norm": 0.15362246334552765, "learning_rate": 0.0002959501500060002, "loss": 5.189, "step": 169300 }, { "epoch": 27.104, "grad_norm": 0.1353999823331833, "learning_rate": 0.00029594774990999637, "loss": 4.7698, "step": 169400 }, { "epoch": 27.12, "grad_norm": 0.15684208273887634, "learning_rate": 0.00029594534981399254, "loss": 4.8111, "step": 169500 }, { "epoch": 27.136, "grad_norm": 0.17176128923892975, "learning_rate": 0.0002959429497179887, "loss": 4.8735, "step": 169600 }, { "epoch": 27.152, "grad_norm": 0.12857766449451447, "learning_rate": 0.0002959405496219848, "loss": 4.5602, "step": 169700 }, { "epoch": 27.168, "grad_norm": 0.2216508835554123, "learning_rate": 0.000295938149525981, "loss": 4.6848, "step": 169800 }, { "epoch": 27.184, "grad_norm": 0.18342281877994537, "learning_rate": 0.00029593577343093723, "loss": 4.9973, "step": 169900 }, { "epoch": 27.2, "grad_norm": 0.2726237177848816, "learning_rate": 0.0002959333733349334, "loss": 4.8341, "step": 170000 }, { "epoch": 27.216, "grad_norm": 0.1373586356639862, "learning_rate": 0.0002959309732389295, "loss": 4.914, "step": 170100 }, { "epoch": 27.232, "grad_norm": 0.13454484939575195, "learning_rate": 0.0002959285731429257, "loss": 5.0239, "step": 170200 }, { "epoch": 27.248, "grad_norm": 0.146050363779068, "learning_rate": 0.00029592617304692186, "loss": 4.7314, "step": 170300 }, { "epoch": 27.264, "grad_norm": 0.14222508668899536, "learning_rate": 0.000295923772950918, "loss": 4.6159, "step": 170400 }, { "epoch": 27.28, "grad_norm": 0.14632238447666168, "learning_rate": 0.0002959213728549142, "loss": 4.4062, "step": 170500 }, { "epoch": 27.296, "grad_norm": 0.16428226232528687, "learning_rate": 0.0002959189727589103, "loss": 5.1747, "step": 170600 }, { "epoch": 27.312, "grad_norm": 0.1323370337486267, "learning_rate": 0.0002959165726629065, "loss": 4.5199, "step": 170700 }, { "epoch": 27.328, "grad_norm": 0.14235830307006836, "learning_rate": 0.00029591417256690265, "loss": 4.9103, "step": 170800 }, { "epoch": 27.344, "grad_norm": 0.13216975331306458, "learning_rate": 0.0002959117724708988, "loss": 4.8293, "step": 170900 }, { "epoch": 27.36, "grad_norm": 0.15071095526218414, "learning_rate": 0.000295909372374895, "loss": 4.9801, "step": 171000 }, { "epoch": 27.376, "grad_norm": 0.1272030919790268, "learning_rate": 0.00029590697227889116, "loss": 4.9456, "step": 171100 }, { "epoch": 27.392, "grad_norm": 0.13579507172107697, "learning_rate": 0.00029590457218288727, "loss": 4.8712, "step": 171200 }, { "epoch": 27.408, "grad_norm": 0.12844951450824738, "learning_rate": 0.00029590217208688344, "loss": 4.679, "step": 171300 }, { "epoch": 27.424, "grad_norm": 0.10488644242286682, "learning_rate": 0.0002958997719908796, "loss": 4.8333, "step": 171400 }, { "epoch": 27.44, "grad_norm": 0.1397544890642166, "learning_rate": 0.0002958973718948758, "loss": 4.9637, "step": 171500 }, { "epoch": 27.456, "grad_norm": 0.17122800648212433, "learning_rate": 0.00029589497179887195, "loss": 4.5042, "step": 171600 }, { "epoch": 27.472, "grad_norm": 0.1432805061340332, "learning_rate": 0.00029589257170286806, "loss": 4.9236, "step": 171700 }, { "epoch": 27.488, "grad_norm": 0.2430882304906845, "learning_rate": 0.00029589017160686423, "loss": 4.6134, "step": 171800 }, { "epoch": 27.504, "grad_norm": 0.12965236604213715, "learning_rate": 0.0002958877715108604, "loss": 4.8867, "step": 171900 }, { "epoch": 27.52, "grad_norm": 0.13079382479190826, "learning_rate": 0.00029588537141485657, "loss": 4.7196, "step": 172000 }, { "epoch": 27.536, "grad_norm": 0.16515448689460754, "learning_rate": 0.00029588299531981276, "loss": 4.6995, "step": 172100 }, { "epoch": 27.552, "grad_norm": 0.12594960629940033, "learning_rate": 0.00029588059522380893, "loss": 4.8708, "step": 172200 }, { "epoch": 27.568, "grad_norm": 0.1570487916469574, "learning_rate": 0.0002958781951278051, "loss": 4.8169, "step": 172300 }, { "epoch": 27.584, "grad_norm": 0.13092289865016937, "learning_rate": 0.00029587579503180127, "loss": 4.695, "step": 172400 }, { "epoch": 27.6, "grad_norm": 0.14942535758018494, "learning_rate": 0.00029587339493579744, "loss": 4.7415, "step": 172500 }, { "epoch": 27.616, "grad_norm": 0.12075886875391006, "learning_rate": 0.00029587099483979355, "loss": 4.4839, "step": 172600 }, { "epoch": 27.632, "grad_norm": 0.11725221574306488, "learning_rate": 0.0002958685947437897, "loss": 4.8162, "step": 172700 }, { "epoch": 27.648, "grad_norm": 0.20893152058124542, "learning_rate": 0.0002958661946477859, "loss": 4.78, "step": 172800 }, { "epoch": 27.664, "grad_norm": 0.14231526851654053, "learning_rate": 0.00029586379455178206, "loss": 4.7212, "step": 172900 }, { "epoch": 27.68, "grad_norm": 0.1261710226535797, "learning_rate": 0.0002958613944557782, "loss": 4.96, "step": 173000 }, { "epoch": 27.696, "grad_norm": 0.1408015638589859, "learning_rate": 0.0002958589943597744, "loss": 4.7388, "step": 173100 }, { "epoch": 27.712, "grad_norm": 0.14422334730625153, "learning_rate": 0.0002958565942637705, "loss": 4.5018, "step": 173200 }, { "epoch": 27.728, "grad_norm": 0.17371025681495667, "learning_rate": 0.0002958541941677667, "loss": 4.792, "step": 173300 }, { "epoch": 27.744, "grad_norm": 0.21515819430351257, "learning_rate": 0.00029585179407176285, "loss": 4.8225, "step": 173400 }, { "epoch": 27.76, "grad_norm": 0.1557329297065735, "learning_rate": 0.000295849393975759, "loss": 4.6305, "step": 173500 }, { "epoch": 27.776, "grad_norm": 0.13870660960674286, "learning_rate": 0.0002958469938797552, "loss": 4.5486, "step": 173600 }, { "epoch": 27.792, "grad_norm": 0.13383133709430695, "learning_rate": 0.0002958445937837513, "loss": 4.6136, "step": 173700 }, { "epoch": 27.808, "grad_norm": 0.1399243175983429, "learning_rate": 0.00029584219368774747, "loss": 4.9352, "step": 173800 }, { "epoch": 27.824, "grad_norm": 0.11231095343828201, "learning_rate": 0.00029583979359174364, "loss": 4.9996, "step": 173900 }, { "epoch": 27.84, "grad_norm": 0.16128210723400116, "learning_rate": 0.0002958373934957398, "loss": 4.7546, "step": 174000 }, { "epoch": 27.856, "grad_norm": 0.15589210391044617, "learning_rate": 0.000295834993399736, "loss": 4.8234, "step": 174100 }, { "epoch": 27.872, "grad_norm": 0.22979894280433655, "learning_rate": 0.00029583259330373215, "loss": 4.8117, "step": 174200 }, { "epoch": 27.888, "grad_norm": 0.14024117588996887, "learning_rate": 0.00029583019320772826, "loss": 4.5712, "step": 174300 }, { "epoch": 27.904, "grad_norm": 0.16881561279296875, "learning_rate": 0.00029582779311172443, "loss": 4.8696, "step": 174400 }, { "epoch": 27.92, "grad_norm": 0.14194153249263763, "learning_rate": 0.0002958253930157206, "loss": 4.7792, "step": 174500 }, { "epoch": 27.936, "grad_norm": 0.16409501433372498, "learning_rate": 0.00029582299291971677, "loss": 4.862, "step": 174600 }, { "epoch": 27.951999999999998, "grad_norm": 0.21548931300640106, "learning_rate": 0.00029582059282371294, "loss": 4.6556, "step": 174700 }, { "epoch": 27.968, "grad_norm": 0.15370036661624908, "learning_rate": 0.00029581819272770906, "loss": 4.7855, "step": 174800 }, { "epoch": 27.984, "grad_norm": 0.1505698263645172, "learning_rate": 0.0002958157926317052, "loss": 4.5333, "step": 174900 }, { "epoch": 28.0, "grad_norm": 0.13952812552452087, "learning_rate": 0.0002958133925357014, "loss": 5.0827, "step": 175000 }, { "epoch": 28.016, "grad_norm": 0.14113423228263855, "learning_rate": 0.00029581099243969756, "loss": 4.4652, "step": 175100 }, { "epoch": 28.032, "grad_norm": 0.13563218712806702, "learning_rate": 0.00029580859234369373, "loss": 4.4769, "step": 175200 }, { "epoch": 28.048, "grad_norm": 0.16485312581062317, "learning_rate": 0.0002958061922476899, "loss": 4.7196, "step": 175300 }, { "epoch": 28.064, "grad_norm": 0.1928679645061493, "learning_rate": 0.000295803792151686, "loss": 4.5181, "step": 175400 }, { "epoch": 28.08, "grad_norm": 0.16406244039535522, "learning_rate": 0.00029580141605664226, "loss": 4.5547, "step": 175500 }, { "epoch": 28.096, "grad_norm": 0.12744209170341492, "learning_rate": 0.00029579901596063843, "loss": 4.6802, "step": 175600 }, { "epoch": 28.112, "grad_norm": 0.15242663025856018, "learning_rate": 0.00029579661586463454, "loss": 4.7076, "step": 175700 }, { "epoch": 28.128, "grad_norm": 0.1231980100274086, "learning_rate": 0.0002957942157686307, "loss": 4.7097, "step": 175800 }, { "epoch": 28.144, "grad_norm": 0.1742876172065735, "learning_rate": 0.0002957918156726269, "loss": 4.8166, "step": 175900 }, { "epoch": 28.16, "grad_norm": 0.15425816178321838, "learning_rate": 0.00029578941557662305, "loss": 4.6306, "step": 176000 }, { "epoch": 28.176, "grad_norm": 0.1423932909965515, "learning_rate": 0.0002957870154806192, "loss": 4.7671, "step": 176100 }, { "epoch": 28.192, "grad_norm": 0.13283143937587738, "learning_rate": 0.0002957846153846154, "loss": 4.5074, "step": 176200 }, { "epoch": 28.208, "grad_norm": 0.1560533046722412, "learning_rate": 0.0002957822152886115, "loss": 4.8514, "step": 176300 }, { "epoch": 28.224, "grad_norm": 0.12814775109291077, "learning_rate": 0.0002957798151926077, "loss": 4.7173, "step": 176400 }, { "epoch": 28.24, "grad_norm": 0.1441114842891693, "learning_rate": 0.00029577741509660384, "loss": 4.7003, "step": 176500 }, { "epoch": 28.256, "grad_norm": 0.13554996252059937, "learning_rate": 0.0002957750150006, "loss": 4.6206, "step": 176600 }, { "epoch": 28.272, "grad_norm": 0.21647945046424866, "learning_rate": 0.0002957726149045962, "loss": 4.9289, "step": 176700 }, { "epoch": 28.288, "grad_norm": 0.1216735765337944, "learning_rate": 0.0002957702148085923, "loss": 4.7441, "step": 176800 }, { "epoch": 28.304, "grad_norm": 0.12911395728588104, "learning_rate": 0.00029576781471258847, "loss": 4.6493, "step": 176900 }, { "epoch": 28.32, "grad_norm": 0.12240692973136902, "learning_rate": 0.00029576541461658463, "loss": 4.7305, "step": 177000 }, { "epoch": 28.336, "grad_norm": 0.17344659566879272, "learning_rate": 0.0002957630145205808, "loss": 4.5246, "step": 177100 }, { "epoch": 28.352, "grad_norm": 0.12759949266910553, "learning_rate": 0.00029576061442457697, "loss": 4.6852, "step": 177200 }, { "epoch": 28.368, "grad_norm": 0.12402662634849548, "learning_rate": 0.00029575821432857314, "loss": 4.5194, "step": 177300 }, { "epoch": 28.384, "grad_norm": 0.19976910948753357, "learning_rate": 0.00029575581423256926, "loss": 4.5166, "step": 177400 }, { "epoch": 28.4, "grad_norm": 0.14362084865570068, "learning_rate": 0.0002957534141365654, "loss": 4.5147, "step": 177500 }, { "epoch": 28.416, "grad_norm": 0.13851560652256012, "learning_rate": 0.0002957510140405616, "loss": 4.5473, "step": 177600 }, { "epoch": 28.432, "grad_norm": 0.13696688413619995, "learning_rate": 0.00029574861394455776, "loss": 4.7163, "step": 177700 }, { "epoch": 28.448, "grad_norm": 0.1331932544708252, "learning_rate": 0.00029574621384855393, "loss": 5.0066, "step": 177800 }, { "epoch": 28.464, "grad_norm": 0.13118359446525574, "learning_rate": 0.00029574381375255005, "loss": 4.7009, "step": 177900 }, { "epoch": 28.48, "grad_norm": 0.11460904031991959, "learning_rate": 0.0002957414136565462, "loss": 4.5525, "step": 178000 }, { "epoch": 28.496, "grad_norm": 0.11112211644649506, "learning_rate": 0.0002957390135605424, "loss": 4.8012, "step": 178100 }, { "epoch": 28.512, "grad_norm": 0.1618378460407257, "learning_rate": 0.00029573661346453856, "loss": 4.8419, "step": 178200 }, { "epoch": 28.528, "grad_norm": 0.13665986061096191, "learning_rate": 0.0002957342133685347, "loss": 4.6129, "step": 178300 }, { "epoch": 28.544, "grad_norm": 0.10059978067874908, "learning_rate": 0.0002957318132725309, "loss": 4.7326, "step": 178400 }, { "epoch": 28.56, "grad_norm": 0.1575680524110794, "learning_rate": 0.000295729413176527, "loss": 5.0102, "step": 178500 }, { "epoch": 28.576, "grad_norm": 0.10887812077999115, "learning_rate": 0.0002957270130805232, "loss": 4.7228, "step": 178600 }, { "epoch": 28.592, "grad_norm": 0.08943487703800201, "learning_rate": 0.0002957246369854794, "loss": 4.4294, "step": 178700 }, { "epoch": 28.608, "grad_norm": 0.14149336516857147, "learning_rate": 0.00029572223688947554, "loss": 4.6056, "step": 178800 }, { "epoch": 28.624, "grad_norm": 0.12872636318206787, "learning_rate": 0.0002957198367934717, "loss": 4.8457, "step": 178900 }, { "epoch": 28.64, "grad_norm": 0.15382656455039978, "learning_rate": 0.0002957174366974679, "loss": 4.7641, "step": 179000 }, { "epoch": 28.656, "grad_norm": 0.15484744310379028, "learning_rate": 0.00029571503660146404, "loss": 4.7261, "step": 179100 }, { "epoch": 28.672, "grad_norm": 0.1385447382926941, "learning_rate": 0.0002957126365054602, "loss": 4.8178, "step": 179200 }, { "epoch": 28.688, "grad_norm": 0.09416704624891281, "learning_rate": 0.0002957102364094564, "loss": 4.462, "step": 179300 }, { "epoch": 28.704, "grad_norm": 0.11756269633769989, "learning_rate": 0.0002957078363134525, "loss": 4.9817, "step": 179400 }, { "epoch": 28.72, "grad_norm": 0.16298645734786987, "learning_rate": 0.00029570543621744867, "loss": 4.7884, "step": 179500 }, { "epoch": 28.736, "grad_norm": 0.1666107177734375, "learning_rate": 0.00029570303612144484, "loss": 4.5478, "step": 179600 }, { "epoch": 28.752, "grad_norm": 0.14432166516780853, "learning_rate": 0.000295700636025441, "loss": 4.5671, "step": 179700 }, { "epoch": 28.768, "grad_norm": 0.14455050230026245, "learning_rate": 0.0002956982359294372, "loss": 4.4565, "step": 179800 }, { "epoch": 28.784, "grad_norm": 0.11911621689796448, "learning_rate": 0.0002956958358334333, "loss": 4.8298, "step": 179900 }, { "epoch": 28.8, "grad_norm": 0.11492261290550232, "learning_rate": 0.00029569343573742946, "loss": 4.8744, "step": 180000 }, { "epoch": 28.816, "grad_norm": 0.11532367020845413, "learning_rate": 0.00029569103564142563, "loss": 4.9461, "step": 180100 }, { "epoch": 28.832, "grad_norm": 0.11335845291614532, "learning_rate": 0.0002956886355454218, "loss": 4.6438, "step": 180200 }, { "epoch": 28.848, "grad_norm": 0.13290923833847046, "learning_rate": 0.00029568623544941797, "loss": 4.5029, "step": 180300 }, { "epoch": 28.864, "grad_norm": 0.12123245000839233, "learning_rate": 0.00029568383535341414, "loss": 5.002, "step": 180400 }, { "epoch": 28.88, "grad_norm": 0.1688774973154068, "learning_rate": 0.00029568143525741025, "loss": 4.5888, "step": 180500 }, { "epoch": 28.896, "grad_norm": 0.12593814730644226, "learning_rate": 0.0002956790351614064, "loss": 4.5949, "step": 180600 }, { "epoch": 28.912, "grad_norm": 0.13134326040744781, "learning_rate": 0.0002956766350654026, "loss": 4.3431, "step": 180700 }, { "epoch": 28.928, "grad_norm": 0.14252367615699768, "learning_rate": 0.00029567423496939876, "loss": 4.1599, "step": 180800 }, { "epoch": 28.944, "grad_norm": 0.13371191918849945, "learning_rate": 0.0002956718348733949, "loss": 4.4618, "step": 180900 }, { "epoch": 28.96, "grad_norm": 0.2305118888616562, "learning_rate": 0.00029566943477739104, "loss": 4.7324, "step": 181000 }, { "epoch": 28.976, "grad_norm": 0.17778520286083221, "learning_rate": 0.0002956670346813872, "loss": 4.5895, "step": 181100 }, { "epoch": 28.992, "grad_norm": 0.16209328174591064, "learning_rate": 0.0002956646345853834, "loss": 4.5924, "step": 181200 }, { "epoch": 29.008, "grad_norm": 0.13874457776546478, "learning_rate": 0.0002956622584903396, "loss": 4.5032, "step": 181300 }, { "epoch": 29.024, "grad_norm": 0.13318394124507904, "learning_rate": 0.00029565985839433574, "loss": 4.3979, "step": 181400 }, { "epoch": 29.04, "grad_norm": 0.1424497812986374, "learning_rate": 0.0002956574582983319, "loss": 4.6121, "step": 181500 }, { "epoch": 29.056, "grad_norm": 0.1274562031030655, "learning_rate": 0.0002956550582023281, "loss": 4.6716, "step": 181600 }, { "epoch": 29.072, "grad_norm": 0.15418770909309387, "learning_rate": 0.00029565265810632425, "loss": 4.4586, "step": 181700 }, { "epoch": 29.088, "grad_norm": 0.1679641753435135, "learning_rate": 0.0002956502580103204, "loss": 4.4676, "step": 181800 }, { "epoch": 29.104, "grad_norm": 0.10988187789916992, "learning_rate": 0.00029564788191527655, "loss": 4.4074, "step": 181900 }, { "epoch": 29.12, "grad_norm": 0.13705100119113922, "learning_rate": 0.0002956454818192727, "loss": 4.5681, "step": 182000 }, { "epoch": 29.136, "grad_norm": 0.12808799743652344, "learning_rate": 0.0002956430817232689, "loss": 4.5169, "step": 182100 }, { "epoch": 29.152, "grad_norm": 0.12796570360660553, "learning_rate": 0.00029564068162726506, "loss": 4.4512, "step": 182200 }, { "epoch": 29.168, "grad_norm": 0.147149458527565, "learning_rate": 0.0002956382815312612, "loss": 4.5589, "step": 182300 }, { "epoch": 29.184, "grad_norm": 0.13126406073570251, "learning_rate": 0.0002956358814352574, "loss": 4.6464, "step": 182400 }, { "epoch": 29.2, "grad_norm": 0.13180950284004211, "learning_rate": 0.0002956334813392535, "loss": 4.35, "step": 182500 }, { "epoch": 29.216, "grad_norm": 0.1481064260005951, "learning_rate": 0.0002956310812432497, "loss": 4.8751, "step": 182600 }, { "epoch": 29.232, "grad_norm": 0.1164550855755806, "learning_rate": 0.00029562868114724585, "loss": 4.4465, "step": 182700 }, { "epoch": 29.248, "grad_norm": 0.1666630357503891, "learning_rate": 0.000295626281051242, "loss": 4.6663, "step": 182800 }, { "epoch": 29.264, "grad_norm": 0.1992766559123993, "learning_rate": 0.0002956238809552382, "loss": 4.6739, "step": 182900 }, { "epoch": 29.28, "grad_norm": 0.16324801743030548, "learning_rate": 0.0002956214808592343, "loss": 4.4456, "step": 183000 }, { "epoch": 29.296, "grad_norm": 0.11294930428266525, "learning_rate": 0.00029561908076323047, "loss": 4.3819, "step": 183100 }, { "epoch": 29.312, "grad_norm": 0.12660732865333557, "learning_rate": 0.0002956166806672267, "loss": 4.6977, "step": 183200 }, { "epoch": 29.328, "grad_norm": 0.14016014337539673, "learning_rate": 0.00029561428057122286, "loss": 4.3781, "step": 183300 }, { "epoch": 29.344, "grad_norm": 0.1163877472281456, "learning_rate": 0.000295611880475219, "loss": 4.7432, "step": 183400 }, { "epoch": 29.36, "grad_norm": 0.12794825434684753, "learning_rate": 0.00029560948037921515, "loss": 4.2617, "step": 183500 }, { "epoch": 29.376, "grad_norm": 0.1507662683725357, "learning_rate": 0.0002956070802832113, "loss": 4.6586, "step": 183600 }, { "epoch": 29.392, "grad_norm": 0.1828104853630066, "learning_rate": 0.0002956046801872075, "loss": 4.4693, "step": 183700 }, { "epoch": 29.408, "grad_norm": 0.15640473365783691, "learning_rate": 0.00029560228009120366, "loss": 4.4264, "step": 183800 }, { "epoch": 29.424, "grad_norm": 0.11081469058990479, "learning_rate": 0.00029559987999519977, "loss": 4.4166, "step": 183900 }, { "epoch": 29.44, "grad_norm": 0.21402069926261902, "learning_rate": 0.00029559747989919594, "loss": 4.6278, "step": 184000 }, { "epoch": 29.456, "grad_norm": 0.16012965142726898, "learning_rate": 0.0002955950798031921, "loss": 4.5287, "step": 184100 }, { "epoch": 29.472, "grad_norm": 0.1100313737988472, "learning_rate": 0.0002955926797071883, "loss": 4.6011, "step": 184200 }, { "epoch": 29.488, "grad_norm": 0.11403873562812805, "learning_rate": 0.00029559027961118445, "loss": 4.6962, "step": 184300 }, { "epoch": 29.504, "grad_norm": 0.13019561767578125, "learning_rate": 0.0002955878795151806, "loss": 4.5873, "step": 184400 }, { "epoch": 29.52, "grad_norm": 0.2004174143075943, "learning_rate": 0.00029558547941917673, "loss": 4.7923, "step": 184500 }, { "epoch": 29.536, "grad_norm": 0.1290491372346878, "learning_rate": 0.0002955830793231729, "loss": 4.3873, "step": 184600 }, { "epoch": 29.552, "grad_norm": 0.1811065971851349, "learning_rate": 0.00029558067922716907, "loss": 4.823, "step": 184700 }, { "epoch": 29.568, "grad_norm": 0.14687442779541016, "learning_rate": 0.00029557827913116524, "loss": 4.7868, "step": 184800 }, { "epoch": 29.584, "grad_norm": 0.16283226013183594, "learning_rate": 0.0002955758790351614, "loss": 4.532, "step": 184900 }, { "epoch": 29.6, "grad_norm": 0.11363033950328827, "learning_rate": 0.0002955734789391575, "loss": 4.6361, "step": 185000 }, { "epoch": 29.616, "grad_norm": 0.1472877413034439, "learning_rate": 0.0002955710788431537, "loss": 4.7752, "step": 185100 }, { "epoch": 29.632, "grad_norm": 0.12992092967033386, "learning_rate": 0.00029556867874714986, "loss": 4.6286, "step": 185200 }, { "epoch": 29.648, "grad_norm": 0.1578865796327591, "learning_rate": 0.00029556627865114603, "loss": 4.718, "step": 185300 }, { "epoch": 29.664, "grad_norm": 0.13645659387111664, "learning_rate": 0.0002955638785551422, "loss": 4.5285, "step": 185400 }, { "epoch": 29.68, "grad_norm": 0.12883096933364868, "learning_rate": 0.00029556147845913837, "loss": 4.3766, "step": 185500 }, { "epoch": 29.696, "grad_norm": 0.14604073762893677, "learning_rate": 0.0002955590783631345, "loss": 4.6148, "step": 185600 }, { "epoch": 29.712, "grad_norm": 0.10391728579998016, "learning_rate": 0.00029555667826713065, "loss": 4.5858, "step": 185700 }, { "epoch": 29.728, "grad_norm": 0.1335868388414383, "learning_rate": 0.0002955542781711268, "loss": 4.473, "step": 185800 }, { "epoch": 29.744, "grad_norm": NaN, "learning_rate": 0.000295551878075123, "loss": 4.6667, "step": 185900 }, { "epoch": 29.76, "grad_norm": 0.13097989559173584, "learning_rate": 0.0002955495019800792, "loss": 4.5837, "step": 186000 }, { "epoch": 29.776, "grad_norm": 0.12291578948497772, "learning_rate": 0.0002955471018840753, "loss": 4.4456, "step": 186100 }, { "epoch": 29.792, "grad_norm": 0.13081465661525726, "learning_rate": 0.00029554470178807146, "loss": 4.35, "step": 186200 }, { "epoch": 29.808, "grad_norm": 0.12602901458740234, "learning_rate": 0.0002955423016920677, "loss": 4.4492, "step": 186300 }, { "epoch": 29.824, "grad_norm": 0.100369893014431, "learning_rate": 0.00029553990159606386, "loss": 4.4268, "step": 186400 }, { "epoch": 29.84, "grad_norm": 0.1391635686159134, "learning_rate": 0.00029553750150005997, "loss": 4.7105, "step": 186500 }, { "epoch": 29.856, "grad_norm": 0.14119628071784973, "learning_rate": 0.00029553512540501616, "loss": 4.5727, "step": 186600 }, { "epoch": 29.872, "grad_norm": 0.09387778490781784, "learning_rate": 0.00029553272530901233, "loss": 4.6228, "step": 186700 }, { "epoch": 29.888, "grad_norm": 0.1252400130033493, "learning_rate": 0.0002955303252130085, "loss": 4.9945, "step": 186800 }, { "epoch": 29.904, "grad_norm": 0.12458446621894836, "learning_rate": 0.00029552792511700467, "loss": 4.3859, "step": 186900 }, { "epoch": 29.92, "grad_norm": 0.14113031327724457, "learning_rate": 0.0002955255250210008, "loss": 4.6897, "step": 187000 }, { "epoch": 29.936, "grad_norm": 0.10882208496332169, "learning_rate": 0.00029552312492499695, "loss": 4.5721, "step": 187100 }, { "epoch": 29.951999999999998, "grad_norm": 0.14268061518669128, "learning_rate": 0.0002955207248289931, "loss": 4.4369, "step": 187200 }, { "epoch": 29.968, "grad_norm": 0.13580568134784698, "learning_rate": 0.0002955183247329893, "loss": 4.6109, "step": 187300 }, { "epoch": 29.984, "grad_norm": 0.13918522000312805, "learning_rate": 0.00029551592463698546, "loss": 4.4577, "step": 187400 }, { "epoch": 30.0, "grad_norm": 0.15967246890068054, "learning_rate": 0.00029551352454098163, "loss": 4.614, "step": 187500 }, { "epoch": 30.016, "grad_norm": 0.1574871689081192, "learning_rate": 0.00029551112444497774, "loss": 4.4328, "step": 187600 }, { "epoch": 30.032, "grad_norm": 0.12702183425426483, "learning_rate": 0.0002955087243489739, "loss": 4.3792, "step": 187700 }, { "epoch": 30.048, "grad_norm": 0.14296884834766388, "learning_rate": 0.0002955063242529701, "loss": 4.4004, "step": 187800 }, { "epoch": 30.064, "grad_norm": 0.1492658257484436, "learning_rate": 0.00029550392415696625, "loss": 4.3511, "step": 187900 }, { "epoch": 30.08, "grad_norm": 0.13314266502857208, "learning_rate": 0.0002955015240609624, "loss": 4.0878, "step": 188000 }, { "epoch": 30.096, "grad_norm": 0.12954705953598022, "learning_rate": 0.00029549912396495854, "loss": 4.1828, "step": 188100 }, { "epoch": 30.112, "grad_norm": 0.15165077149868011, "learning_rate": 0.0002954967238689547, "loss": 4.4413, "step": 188200 }, { "epoch": 30.128, "grad_norm": 0.11701655387878418, "learning_rate": 0.0002954943237729509, "loss": 4.4069, "step": 188300 }, { "epoch": 30.144, "grad_norm": 0.15118548274040222, "learning_rate": 0.00029549192367694704, "loss": 4.7566, "step": 188400 }, { "epoch": 30.16, "grad_norm": 0.1188935711979866, "learning_rate": 0.0002954895235809432, "loss": 4.514, "step": 188500 }, { "epoch": 30.176, "grad_norm": 0.1596667617559433, "learning_rate": 0.0002954871234849394, "loss": 4.4178, "step": 188600 }, { "epoch": 30.192, "grad_norm": 0.13460184633731842, "learning_rate": 0.0002954847233889355, "loss": 4.5119, "step": 188700 }, { "epoch": 30.208, "grad_norm": 0.20185166597366333, "learning_rate": 0.00029548232329293167, "loss": 4.7872, "step": 188800 }, { "epoch": 30.224, "grad_norm": 0.12327948957681656, "learning_rate": 0.00029547992319692783, "loss": 4.3173, "step": 188900 }, { "epoch": 30.24, "grad_norm": 0.1796080619096756, "learning_rate": 0.000295477523100924, "loss": 4.3608, "step": 189000 }, { "epoch": 30.256, "grad_norm": 0.10353237390518188, "learning_rate": 0.00029547512300492017, "loss": 4.2835, "step": 189100 }, { "epoch": 30.272, "grad_norm": 0.20595349371433258, "learning_rate": 0.00029547272290891634, "loss": 4.6235, "step": 189200 }, { "epoch": 30.288, "grad_norm": 0.12586836516857147, "learning_rate": 0.00029547032281291246, "loss": 4.5945, "step": 189300 }, { "epoch": 30.304, "grad_norm": 0.1621522456407547, "learning_rate": 0.0002954679467178687, "loss": 4.5074, "step": 189400 }, { "epoch": 30.32, "grad_norm": 0.14377695322036743, "learning_rate": 0.00029546554662186487, "loss": 4.5901, "step": 189500 }, { "epoch": 30.336, "grad_norm": 0.1328209638595581, "learning_rate": 0.000295463146525861, "loss": 4.4787, "step": 189600 }, { "epoch": 30.352, "grad_norm": 0.13098841905593872, "learning_rate": 0.00029546074642985715, "loss": 4.3414, "step": 189700 }, { "epoch": 30.368, "grad_norm": 0.10994672030210495, "learning_rate": 0.0002954583463338533, "loss": 4.2318, "step": 189800 }, { "epoch": 30.384, "grad_norm": 0.13585075736045837, "learning_rate": 0.0002954559462378495, "loss": 4.2293, "step": 189900 }, { "epoch": 30.4, "grad_norm": 0.1245153397321701, "learning_rate": 0.00029545354614184566, "loss": 4.43, "step": 190000 }, { "epoch": 30.416, "grad_norm": 0.15724925696849823, "learning_rate": 0.0002954511460458418, "loss": 4.311, "step": 190100 }, { "epoch": 30.432, "grad_norm": 0.1526051014661789, "learning_rate": 0.00029544874594983794, "loss": 4.337, "step": 190200 }, { "epoch": 30.448, "grad_norm": 0.16760113835334778, "learning_rate": 0.0002954463458538341, "loss": 4.2577, "step": 190300 }, { "epoch": 30.464, "grad_norm": 0.12043719738721848, "learning_rate": 0.0002954439457578303, "loss": 4.2991, "step": 190400 }, { "epoch": 30.48, "grad_norm": 0.13768455386161804, "learning_rate": 0.00029544154566182645, "loss": 4.7788, "step": 190500 }, { "epoch": 30.496, "grad_norm": 0.18012315034866333, "learning_rate": 0.0002954391455658226, "loss": 4.2085, "step": 190600 }, { "epoch": 30.512, "grad_norm": 0.15594148635864258, "learning_rate": 0.00029543674546981874, "loss": 4.1886, "step": 190700 }, { "epoch": 30.528, "grad_norm": 0.16506901383399963, "learning_rate": 0.0002954343453738149, "loss": 4.4662, "step": 190800 }, { "epoch": 30.544, "grad_norm": 0.10285619646310806, "learning_rate": 0.0002954319452778111, "loss": 4.6142, "step": 190900 }, { "epoch": 30.56, "grad_norm": 0.1580812931060791, "learning_rate": 0.00029542954518180724, "loss": 4.5751, "step": 191000 }, { "epoch": 30.576, "grad_norm": 0.19402793049812317, "learning_rate": 0.0002954271450858034, "loss": 4.52, "step": 191100 }, { "epoch": 30.592, "grad_norm": 0.14133353531360626, "learning_rate": 0.0002954247449897996, "loss": 4.4395, "step": 191200 }, { "epoch": 30.608, "grad_norm": 0.10284803062677383, "learning_rate": 0.0002954223448937957, "loss": 4.6318, "step": 191300 }, { "epoch": 30.624, "grad_norm": 0.12501497566699982, "learning_rate": 0.00029541994479779187, "loss": 4.2929, "step": 191400 }, { "epoch": 30.64, "grad_norm": 0.12848247587680817, "learning_rate": 0.00029541754470178804, "loss": 4.5549, "step": 191500 }, { "epoch": 30.656, "grad_norm": 0.1291554719209671, "learning_rate": 0.0002954151446057842, "loss": 4.3063, "step": 191600 }, { "epoch": 30.672, "grad_norm": 0.13024528324604034, "learning_rate": 0.0002954127445097804, "loss": 4.4285, "step": 191700 }, { "epoch": 30.688, "grad_norm": 0.18324004113674164, "learning_rate": 0.0002954103444137765, "loss": 4.4929, "step": 191800 }, { "epoch": 30.704, "grad_norm": 0.14168959856033325, "learning_rate": 0.00029540794431777266, "loss": 4.7074, "step": 191900 }, { "epoch": 30.72, "grad_norm": 0.15016552805900574, "learning_rate": 0.00029540554422176883, "loss": 4.5041, "step": 192000 }, { "epoch": 30.736, "grad_norm": 0.12042218446731567, "learning_rate": 0.000295403144125765, "loss": 4.1052, "step": 192100 }, { "epoch": 30.752, "grad_norm": 0.11903825402259827, "learning_rate": 0.00029540074402976117, "loss": 4.3894, "step": 192200 }, { "epoch": 30.768, "grad_norm": 0.15753494203090668, "learning_rate": 0.00029539834393375734, "loss": 4.6028, "step": 192300 }, { "epoch": 30.784, "grad_norm": 0.13112273812294006, "learning_rate": 0.00029539594383775345, "loss": 4.5076, "step": 192400 }, { "epoch": 30.8, "grad_norm": 0.12961137294769287, "learning_rate": 0.0002953935437417496, "loss": 4.1266, "step": 192500 }, { "epoch": 30.816, "grad_norm": 0.13988713920116425, "learning_rate": 0.00029539114364574584, "loss": 4.4225, "step": 192600 }, { "epoch": 30.832, "grad_norm": 0.43816298246383667, "learning_rate": 0.00029538874354974196, "loss": 4.3343, "step": 192700 }, { "epoch": 30.848, "grad_norm": 0.15897555649280548, "learning_rate": 0.0002953863434537381, "loss": 4.3025, "step": 192800 }, { "epoch": 30.864, "grad_norm": 0.14185336232185364, "learning_rate": 0.0002953839433577343, "loss": 4.0551, "step": 192900 }, { "epoch": 30.88, "grad_norm": 0.2000892013311386, "learning_rate": 0.00029538154326173047, "loss": 4.4564, "step": 193000 }, { "epoch": 30.896, "grad_norm": 0.16042204201221466, "learning_rate": 0.00029537914316572663, "loss": 4.1802, "step": 193100 }, { "epoch": 30.912, "grad_norm": 0.1357438713312149, "learning_rate": 0.0002953767430697228, "loss": 4.4476, "step": 193200 }, { "epoch": 30.928, "grad_norm": 0.13625992834568024, "learning_rate": 0.0002953743429737189, "loss": 4.2467, "step": 193300 }, { "epoch": 30.944, "grad_norm": 0.14825522899627686, "learning_rate": 0.0002953719428777151, "loss": 4.2474, "step": 193400 }, { "epoch": 30.96, "grad_norm": 0.12946005165576935, "learning_rate": 0.00029536954278171126, "loss": 4.5001, "step": 193500 }, { "epoch": 30.976, "grad_norm": 0.14169323444366455, "learning_rate": 0.0002953671426857074, "loss": 4.5985, "step": 193600 }, { "epoch": 30.992, "grad_norm": 0.11265687644481659, "learning_rate": 0.0002953647425897036, "loss": 4.4854, "step": 193700 }, { "epoch": 31.008, "grad_norm": 0.12322665005922318, "learning_rate": 0.00029536236649465973, "loss": 4.1432, "step": 193800 }, { "epoch": 31.024, "grad_norm": 0.1335851401090622, "learning_rate": 0.0002953599663986559, "loss": 4.1797, "step": 193900 }, { "epoch": 31.04, "grad_norm": 0.11365660279989243, "learning_rate": 0.00029535756630265207, "loss": 4.3424, "step": 194000 }, { "epoch": 31.056, "grad_norm": 0.1466190218925476, "learning_rate": 0.00029535516620664824, "loss": 4.1073, "step": 194100 }, { "epoch": 31.072, "grad_norm": 0.144695445895195, "learning_rate": 0.0002953527661106444, "loss": 4.3619, "step": 194200 }, { "epoch": 31.088, "grad_norm": 0.1622026115655899, "learning_rate": 0.0002953503660146406, "loss": 4.5876, "step": 194300 }, { "epoch": 31.104, "grad_norm": 0.16062675416469574, "learning_rate": 0.0002953479659186367, "loss": 4.1208, "step": 194400 }, { "epoch": 31.12, "grad_norm": 0.13044805824756622, "learning_rate": 0.00029534556582263286, "loss": 4.1851, "step": 194500 }, { "epoch": 31.136, "grad_norm": 0.1537386029958725, "learning_rate": 0.00029534316572662903, "loss": 4.3419, "step": 194600 }, { "epoch": 31.152, "grad_norm": 0.22397293150424957, "learning_rate": 0.0002953407656306252, "loss": 4.3696, "step": 194700 }, { "epoch": 31.168, "grad_norm": 0.13327784836292267, "learning_rate": 0.00029533836553462137, "loss": 4.1155, "step": 194800 }, { "epoch": 31.184, "grad_norm": 0.2029825896024704, "learning_rate": 0.0002953359654386175, "loss": 4.4423, "step": 194900 }, { "epoch": 31.2, "grad_norm": 0.23170556128025055, "learning_rate": 0.00029533356534261365, "loss": 4.2385, "step": 195000 }, { "epoch": 31.216, "grad_norm": 0.13994359970092773, "learning_rate": 0.0002953311652466098, "loss": 4.4766, "step": 195100 }, { "epoch": 31.232, "grad_norm": 0.14667505025863647, "learning_rate": 0.000295328765150606, "loss": 4.1339, "step": 195200 }, { "epoch": 31.248, "grad_norm": 0.190034419298172, "learning_rate": 0.00029532636505460216, "loss": 4.3487, "step": 195300 }, { "epoch": 31.264, "grad_norm": 0.1049342155456543, "learning_rate": 0.00029532396495859833, "loss": 4.5035, "step": 195400 }, { "epoch": 31.28, "grad_norm": 0.1694996953010559, "learning_rate": 0.00029532156486259444, "loss": 4.1846, "step": 195500 }, { "epoch": 31.296, "grad_norm": 0.15784844756126404, "learning_rate": 0.0002953191647665906, "loss": 4.1881, "step": 195600 }, { "epoch": 31.312, "grad_norm": 0.13916942477226257, "learning_rate": 0.00029531676467058684, "loss": 4.5098, "step": 195700 }, { "epoch": 31.328, "grad_norm": 0.10092104226350784, "learning_rate": 0.00029531436457458295, "loss": 4.2838, "step": 195800 }, { "epoch": 31.344, "grad_norm": 0.11172811686992645, "learning_rate": 0.0002953119644785791, "loss": 4.2704, "step": 195900 }, { "epoch": 31.36, "grad_norm": 0.15887293219566345, "learning_rate": 0.0002953095643825753, "loss": 4.332, "step": 196000 }, { "epoch": 31.376, "grad_norm": 0.16801437735557556, "learning_rate": 0.00029530716428657146, "loss": 4.3279, "step": 196100 }, { "epoch": 31.392, "grad_norm": 0.15790340304374695, "learning_rate": 0.00029530478819152765, "loss": 4.391, "step": 196200 }, { "epoch": 31.408, "grad_norm": 0.12497889250516891, "learning_rate": 0.0002953023880955238, "loss": 4.3275, "step": 196300 }, { "epoch": 31.424, "grad_norm": 0.11301842331886292, "learning_rate": 0.00029529998799951993, "loss": 4.2469, "step": 196400 }, { "epoch": 31.44, "grad_norm": 0.12991459667682648, "learning_rate": 0.0002952975879035161, "loss": 4.3467, "step": 196500 }, { "epoch": 31.456, "grad_norm": 0.1175173819065094, "learning_rate": 0.00029529518780751227, "loss": 4.6184, "step": 196600 }, { "epoch": 31.472, "grad_norm": 0.17387010157108307, "learning_rate": 0.00029529278771150844, "loss": 4.0393, "step": 196700 }, { "epoch": 31.488, "grad_norm": 0.12786489725112915, "learning_rate": 0.00029529041161646463, "loss": 4.4725, "step": 196800 }, { "epoch": 31.504, "grad_norm": 0.16325689852237701, "learning_rate": 0.0002952880115204608, "loss": 4.274, "step": 196900 }, { "epoch": 31.52, "grad_norm": 0.17208231985569, "learning_rate": 0.00029528561142445697, "loss": 4.3541, "step": 197000 }, { "epoch": 31.536, "grad_norm": 0.14289776980876923, "learning_rate": 0.00029528321132845313, "loss": 4.2753, "step": 197100 }, { "epoch": 31.552, "grad_norm": 0.16391971707344055, "learning_rate": 0.0002952808112324493, "loss": 4.3825, "step": 197200 }, { "epoch": 31.568, "grad_norm": 0.1396835893392563, "learning_rate": 0.0002952784111364454, "loss": 4.3642, "step": 197300 }, { "epoch": 31.584, "grad_norm": 0.14884528517723083, "learning_rate": 0.0002952760110404416, "loss": 4.2918, "step": 197400 }, { "epoch": 31.6, "grad_norm": 0.22162237763404846, "learning_rate": 0.00029527361094443776, "loss": 4.1788, "step": 197500 }, { "epoch": 31.616, "grad_norm": 0.12749993801116943, "learning_rate": 0.0002952712108484339, "loss": 4.5262, "step": 197600 }, { "epoch": 31.632, "grad_norm": 0.16303063929080963, "learning_rate": 0.0002952688107524301, "loss": 4.4256, "step": 197700 }, { "epoch": 31.648, "grad_norm": 0.1132497638463974, "learning_rate": 0.0002952664106564262, "loss": 4.2708, "step": 197800 }, { "epoch": 31.664, "grad_norm": 0.16598041355609894, "learning_rate": 0.0002952640105604224, "loss": 4.103, "step": 197900 }, { "epoch": 31.68, "grad_norm": 0.11640356481075287, "learning_rate": 0.00029526161046441855, "loss": 4.143, "step": 198000 }, { "epoch": 31.696, "grad_norm": 0.1772977113723755, "learning_rate": 0.0002952592103684147, "loss": 4.3649, "step": 198100 }, { "epoch": 31.712, "grad_norm": 0.14579568803310394, "learning_rate": 0.0002952568102724109, "loss": 4.27, "step": 198200 }, { "epoch": 31.728, "grad_norm": 0.16226862370967865, "learning_rate": 0.00029525441017640706, "loss": 4.5255, "step": 198300 }, { "epoch": 31.744, "grad_norm": 0.13466237485408783, "learning_rate": 0.00029525201008040317, "loss": 4.596, "step": 198400 }, { "epoch": 31.76, "grad_norm": 0.14329834282398224, "learning_rate": 0.00029524960998439934, "loss": 4.0086, "step": 198500 }, { "epoch": 31.776, "grad_norm": 0.12730365991592407, "learning_rate": 0.0002952472098883955, "loss": 4.2994, "step": 198600 }, { "epoch": 31.792, "grad_norm": 0.12442094832658768, "learning_rate": 0.0002952448097923917, "loss": 4.4052, "step": 198700 }, { "epoch": 31.808, "grad_norm": 0.16480585932731628, "learning_rate": 0.00029524240969638785, "loss": 4.3279, "step": 198800 }, { "epoch": 31.824, "grad_norm": 0.13462135195732117, "learning_rate": 0.00029524000960038396, "loss": 4.2811, "step": 198900 }, { "epoch": 31.84, "grad_norm": 0.15313585102558136, "learning_rate": 0.00029523760950438013, "loss": 4.0526, "step": 199000 }, { "epoch": 31.856, "grad_norm": 0.13190476596355438, "learning_rate": 0.0002952352094083763, "loss": 4.3033, "step": 199100 }, { "epoch": 31.872, "grad_norm": 0.13541580736637115, "learning_rate": 0.00029523280931237247, "loss": 4.3324, "step": 199200 }, { "epoch": 31.888, "grad_norm": 0.1338634192943573, "learning_rate": 0.00029523040921636864, "loss": 4.5336, "step": 199300 }, { "epoch": 31.904, "grad_norm": 0.17132961750030518, "learning_rate": 0.0002952280091203648, "loss": 4.4689, "step": 199400 }, { "epoch": 31.92, "grad_norm": 0.16414079070091248, "learning_rate": 0.0002952256090243609, "loss": 4.3152, "step": 199500 }, { "epoch": 31.936, "grad_norm": 0.12077530473470688, "learning_rate": 0.0002952232089283571, "loss": 4.2898, "step": 199600 }, { "epoch": 31.951999999999998, "grad_norm": 0.16863426566123962, "learning_rate": 0.00029522080883235326, "loss": 4.4005, "step": 199700 }, { "epoch": 31.968, "grad_norm": 0.155312180519104, "learning_rate": 0.00029521840873634943, "loss": 4.485, "step": 199800 }, { "epoch": 31.984, "grad_norm": 0.17334511876106262, "learning_rate": 0.0002952160086403456, "loss": 4.4705, "step": 199900 }, { "epoch": 32.0, "grad_norm": 0.1425628811120987, "learning_rate": 0.0002952136085443417, "loss": 4.2511, "step": 200000 }, { "epoch": 32.016, "grad_norm": 0.13688121736049652, "learning_rate": 0.0002952112084483379, "loss": 3.9956, "step": 200100 }, { "epoch": 32.032, "grad_norm": 0.14101459085941315, "learning_rate": 0.00029520880835233405, "loss": 4.238, "step": 200200 }, { "epoch": 32.048, "grad_norm": 0.14536666870117188, "learning_rate": 0.0002952064082563302, "loss": 4.0564, "step": 200300 }, { "epoch": 32.064, "grad_norm": 0.14904972910881042, "learning_rate": 0.0002952040081603264, "loss": 3.8258, "step": 200400 }, { "epoch": 32.08, "grad_norm": 0.12304028868675232, "learning_rate": 0.00029520160806432256, "loss": 4.1282, "step": 200500 }, { "epoch": 32.096, "grad_norm": 0.22818078100681305, "learning_rate": 0.0002951992079683187, "loss": 3.9446, "step": 200600 }, { "epoch": 32.112, "grad_norm": 0.1323474943637848, "learning_rate": 0.00029519680787231485, "loss": 4.4717, "step": 200700 }, { "epoch": 32.128, "grad_norm": 0.14777085185050964, "learning_rate": 0.0002951944317772711, "loss": 4.091, "step": 200800 }, { "epoch": 32.144, "grad_norm": 0.1911545991897583, "learning_rate": 0.0002951920316812672, "loss": 4.4814, "step": 200900 }, { "epoch": 32.16, "grad_norm": 0.14611530303955078, "learning_rate": 0.0002951896315852634, "loss": 4.0986, "step": 201000 }, { "epoch": 32.176, "grad_norm": 0.1381932646036148, "learning_rate": 0.00029518723148925954, "loss": 4.2302, "step": 201100 }, { "epoch": 32.192, "grad_norm": 0.1606259047985077, "learning_rate": 0.0002951848313932557, "loss": 4.1582, "step": 201200 }, { "epoch": 32.208, "grad_norm": 0.10917247086763382, "learning_rate": 0.0002951824312972519, "loss": 4.3843, "step": 201300 }, { "epoch": 32.224, "grad_norm": 0.12173420190811157, "learning_rate": 0.00029518003120124805, "loss": 4.0978, "step": 201400 }, { "epoch": 32.24, "grad_norm": 0.1371530294418335, "learning_rate": 0.00029517763110524416, "loss": 4.4797, "step": 201500 }, { "epoch": 32.256, "grad_norm": 0.17694091796875, "learning_rate": 0.00029517523100924033, "loss": 4.2425, "step": 201600 }, { "epoch": 32.272, "grad_norm": 0.1385003924369812, "learning_rate": 0.0002951728309132365, "loss": 4.1134, "step": 201700 }, { "epoch": 32.288, "grad_norm": 0.12365297228097916, "learning_rate": 0.00029517043081723267, "loss": 4.284, "step": 201800 }, { "epoch": 32.304, "grad_norm": 0.18151600658893585, "learning_rate": 0.00029516803072122884, "loss": 3.9913, "step": 201900 }, { "epoch": 32.32, "grad_norm": 0.14787739515304565, "learning_rate": 0.00029516563062522496, "loss": 4.2671, "step": 202000 }, { "epoch": 32.336, "grad_norm": 0.12719404697418213, "learning_rate": 0.0002951632305292211, "loss": 4.3195, "step": 202100 }, { "epoch": 32.352, "grad_norm": 0.16615448892116547, "learning_rate": 0.0002951608304332173, "loss": 4.1606, "step": 202200 }, { "epoch": 32.368, "grad_norm": 0.11277727037668228, "learning_rate": 0.00029515843033721346, "loss": 4.1351, "step": 202300 }, { "epoch": 32.384, "grad_norm": 0.14769276976585388, "learning_rate": 0.00029515603024120963, "loss": 4.2443, "step": 202400 }, { "epoch": 32.4, "grad_norm": 0.1351872831583023, "learning_rate": 0.0002951536301452058, "loss": 4.3203, "step": 202500 }, { "epoch": 32.416, "grad_norm": 0.1368459165096283, "learning_rate": 0.0002951512300492019, "loss": 4.3464, "step": 202600 }, { "epoch": 32.432, "grad_norm": 0.1382042020559311, "learning_rate": 0.0002951488299531981, "loss": 4.1787, "step": 202700 }, { "epoch": 32.448, "grad_norm": 0.16098842024803162, "learning_rate": 0.00029514642985719426, "loss": 4.3152, "step": 202800 }, { "epoch": 32.464, "grad_norm": 0.10519302636384964, "learning_rate": 0.0002951440297611904, "loss": 4.3782, "step": 202900 }, { "epoch": 32.48, "grad_norm": 0.15000683069229126, "learning_rate": 0.0002951416296651866, "loss": 4.0357, "step": 203000 }, { "epoch": 32.496, "grad_norm": 0.19593103229999542, "learning_rate": 0.0002951392295691827, "loss": 4.2191, "step": 203100 }, { "epoch": 32.512, "grad_norm": 0.12107524275779724, "learning_rate": 0.0002951368294731789, "loss": 4.1605, "step": 203200 }, { "epoch": 32.528, "grad_norm": 0.13653840124607086, "learning_rate": 0.00029513442937717505, "loss": 4.6763, "step": 203300 }, { "epoch": 32.544, "grad_norm": 0.10400217771530151, "learning_rate": 0.0002951320292811712, "loss": 4.5462, "step": 203400 }, { "epoch": 32.56, "grad_norm": 0.14215317368507385, "learning_rate": 0.0002951296291851674, "loss": 4.1685, "step": 203500 }, { "epoch": 32.576, "grad_norm": 0.12748879194259644, "learning_rate": 0.00029512722908916356, "loss": 4.2786, "step": 203600 }, { "epoch": 32.592, "grad_norm": 0.18144790828227997, "learning_rate": 0.00029512482899315967, "loss": 4.1811, "step": 203700 }, { "epoch": 32.608, "grad_norm": 0.14090293645858765, "learning_rate": 0.0002951224528981159, "loss": 4.084, "step": 203800 }, { "epoch": 32.624, "grad_norm": 0.18896403908729553, "learning_rate": 0.0002951200528021121, "loss": 4.1615, "step": 203900 }, { "epoch": 32.64, "grad_norm": 0.10253412276506424, "learning_rate": 0.0002951176527061082, "loss": 4.2997, "step": 204000 }, { "epoch": 32.656, "grad_norm": 0.16733315587043762, "learning_rate": 0.00029511525261010437, "loss": 4.2193, "step": 204100 }, { "epoch": 32.672, "grad_norm": 0.10441160202026367, "learning_rate": 0.00029511285251410054, "loss": 4.0593, "step": 204200 }, { "epoch": 32.688, "grad_norm": 0.156474307179451, "learning_rate": 0.0002951104524180967, "loss": 4.2956, "step": 204300 }, { "epoch": 32.704, "grad_norm": 0.14369884133338928, "learning_rate": 0.0002951080523220929, "loss": 3.9944, "step": 204400 }, { "epoch": 32.72, "grad_norm": 0.14539478719234467, "learning_rate": 0.00029510565222608904, "loss": 4.1756, "step": 204500 }, { "epoch": 32.736, "grad_norm": 0.15353453159332275, "learning_rate": 0.00029510325213008516, "loss": 4.194, "step": 204600 }, { "epoch": 32.752, "grad_norm": 0.13915778696537018, "learning_rate": 0.0002951008520340813, "loss": 4.1445, "step": 204700 }, { "epoch": 32.768, "grad_norm": 0.14787444472312927, "learning_rate": 0.0002950984519380775, "loss": 4.5282, "step": 204800 }, { "epoch": 32.784, "grad_norm": 0.1597234606742859, "learning_rate": 0.00029509605184207367, "loss": 4.2362, "step": 204900 }, { "epoch": 32.8, "grad_norm": 0.13494624197483063, "learning_rate": 0.00029509365174606983, "loss": 3.8672, "step": 205000 }, { "epoch": 32.816, "grad_norm": 0.16409239172935486, "learning_rate": 0.00029509125165006595, "loss": 4.0315, "step": 205100 }, { "epoch": 32.832, "grad_norm": 0.11486226320266724, "learning_rate": 0.0002950888515540621, "loss": 4.2725, "step": 205200 }, { "epoch": 32.848, "grad_norm": 0.15554744005203247, "learning_rate": 0.0002950864514580583, "loss": 4.247, "step": 205300 }, { "epoch": 32.864, "grad_norm": 0.20082280039787292, "learning_rate": 0.00029508405136205446, "loss": 4.2875, "step": 205400 }, { "epoch": 32.88, "grad_norm": 0.19824598729610443, "learning_rate": 0.0002950816512660506, "loss": 4.2953, "step": 205500 }, { "epoch": 32.896, "grad_norm": 0.15054188668727875, "learning_rate": 0.0002950792511700468, "loss": 4.0188, "step": 205600 }, { "epoch": 32.912, "grad_norm": 0.1216757595539093, "learning_rate": 0.0002950768510740429, "loss": 4.37, "step": 205700 }, { "epoch": 32.928, "grad_norm": 0.1336234211921692, "learning_rate": 0.0002950744509780391, "loss": 3.884, "step": 205800 }, { "epoch": 32.944, "grad_norm": 0.13341887295246124, "learning_rate": 0.00029507205088203525, "loss": 3.9673, "step": 205900 }, { "epoch": 32.96, "grad_norm": 0.13689292967319489, "learning_rate": 0.0002950696507860314, "loss": 4.0492, "step": 206000 }, { "epoch": 32.976, "grad_norm": 0.11188139766454697, "learning_rate": 0.0002950672506900276, "loss": 3.9314, "step": 206100 }, { "epoch": 32.992, "grad_norm": 0.12683609127998352, "learning_rate": 0.0002950648505940237, "loss": 4.0422, "step": 206200 }, { "epoch": 33.008, "grad_norm": 0.14881892502307892, "learning_rate": 0.00029506247449897994, "loss": 4.3816, "step": 206300 }, { "epoch": 33.024, "grad_norm": 0.1395110785961151, "learning_rate": 0.0002950600744029761, "loss": 4.089, "step": 206400 }, { "epoch": 33.04, "grad_norm": 0.26342907547950745, "learning_rate": 0.0002950576743069723, "loss": 4.0765, "step": 206500 }, { "epoch": 33.056, "grad_norm": 0.15031449496746063, "learning_rate": 0.0002950552982119284, "loss": 4.0564, "step": 206600 }, { "epoch": 33.072, "grad_norm": 0.15763530135154724, "learning_rate": 0.0002950528981159246, "loss": 3.9028, "step": 206700 }, { "epoch": 33.088, "grad_norm": 0.12433154135942459, "learning_rate": 0.00029505049801992076, "loss": 4.1041, "step": 206800 }, { "epoch": 33.104, "grad_norm": 0.20200709998607635, "learning_rate": 0.0002950480979239169, "loss": 4.0021, "step": 206900 }, { "epoch": 33.12, "grad_norm": 0.10807151347398758, "learning_rate": 0.0002950456978279131, "loss": 4.1771, "step": 207000 }, { "epoch": 33.136, "grad_norm": 0.20743700861930847, "learning_rate": 0.0002950432977319092, "loss": 4.1294, "step": 207100 }, { "epoch": 33.152, "grad_norm": 0.15659132599830627, "learning_rate": 0.00029504089763590543, "loss": 4.4363, "step": 207200 }, { "epoch": 33.168, "grad_norm": 0.137447327375412, "learning_rate": 0.0002950384975399016, "loss": 4.1354, "step": 207300 }, { "epoch": 33.184, "grad_norm": 0.1178567036986351, "learning_rate": 0.00029503609744389777, "loss": 4.1492, "step": 207400 }, { "epoch": 33.2, "grad_norm": 0.1532943695783615, "learning_rate": 0.0002950336973478939, "loss": 4.2447, "step": 207500 }, { "epoch": 33.216, "grad_norm": 0.12529316544532776, "learning_rate": 0.00029503129725189006, "loss": 4.0434, "step": 207600 }, { "epoch": 33.232, "grad_norm": 0.15973010659217834, "learning_rate": 0.0002950288971558862, "loss": 4.2938, "step": 207700 }, { "epoch": 33.248, "grad_norm": 0.10873132944107056, "learning_rate": 0.0002950264970598824, "loss": 4.5886, "step": 207800 }, { "epoch": 33.264, "grad_norm": 0.14896368980407715, "learning_rate": 0.00029502409696387856, "loss": 4.099, "step": 207900 }, { "epoch": 33.28, "grad_norm": 0.1567344069480896, "learning_rate": 0.0002950216968678747, "loss": 4.1805, "step": 208000 }, { "epoch": 33.296, "grad_norm": 0.11498137563467026, "learning_rate": 0.00029501929677187085, "loss": 4.2578, "step": 208100 }, { "epoch": 33.312, "grad_norm": 0.1541927009820938, "learning_rate": 0.000295016896675867, "loss": 3.8944, "step": 208200 }, { "epoch": 33.328, "grad_norm": 0.1181056872010231, "learning_rate": 0.0002950144965798632, "loss": 3.984, "step": 208300 }, { "epoch": 33.344, "grad_norm": 0.17530953884124756, "learning_rate": 0.00029501209648385935, "loss": 4.1395, "step": 208400 }, { "epoch": 33.36, "grad_norm": 0.14198775589466095, "learning_rate": 0.0002950096963878555, "loss": 4.1338, "step": 208500 }, { "epoch": 33.376, "grad_norm": 0.14517010748386383, "learning_rate": 0.00029500729629185164, "loss": 4.0205, "step": 208600 }, { "epoch": 33.392, "grad_norm": 0.15430046617984772, "learning_rate": 0.0002950048961958478, "loss": 4.0759, "step": 208700 }, { "epoch": 33.408, "grad_norm": 0.13241171836853027, "learning_rate": 0.000295002496099844, "loss": 3.9922, "step": 208800 }, { "epoch": 33.424, "grad_norm": 0.1318008303642273, "learning_rate": 0.00029500009600384015, "loss": 3.9547, "step": 208900 }, { "epoch": 33.44, "grad_norm": 0.17162205278873444, "learning_rate": 0.0002949976959078363, "loss": 4.0941, "step": 209000 }, { "epoch": 33.456, "grad_norm": 0.19532734155654907, "learning_rate": 0.00029499529581183243, "loss": 4.2355, "step": 209100 }, { "epoch": 33.472, "grad_norm": 0.15209433436393738, "learning_rate": 0.0002949928957158286, "loss": 3.9896, "step": 209200 }, { "epoch": 33.488, "grad_norm": 0.1435602754354477, "learning_rate": 0.00029499049561982477, "loss": 3.9636, "step": 209300 }, { "epoch": 33.504, "grad_norm": 0.1843099594116211, "learning_rate": 0.00029498809552382094, "loss": 4.1853, "step": 209400 }, { "epoch": 33.52, "grad_norm": 0.11624948680400848, "learning_rate": 0.0002949856954278171, "loss": 3.9493, "step": 209500 }, { "epoch": 33.536, "grad_norm": 0.1631406992673874, "learning_rate": 0.0002949832953318133, "loss": 4.3431, "step": 209600 }, { "epoch": 33.552, "grad_norm": 0.1258581429719925, "learning_rate": 0.0002949808952358094, "loss": 3.9717, "step": 209700 }, { "epoch": 33.568, "grad_norm": 0.1421457827091217, "learning_rate": 0.00029497849513980556, "loss": 3.8985, "step": 209800 }, { "epoch": 33.584, "grad_norm": 0.10745751112699509, "learning_rate": 0.00029497609504380173, "loss": 4.0869, "step": 209900 }, { "epoch": 33.6, "grad_norm": 0.11653444916009903, "learning_rate": 0.0002949736949477979, "loss": 4.1893, "step": 210000 }, { "epoch": 33.616, "grad_norm": 0.16387063264846802, "learning_rate": 0.00029497129485179407, "loss": 4.1087, "step": 210100 }, { "epoch": 33.632, "grad_norm": 0.1574537456035614, "learning_rate": 0.0002949688947557902, "loss": 4.2927, "step": 210200 }, { "epoch": 33.648, "grad_norm": 0.15371888875961304, "learning_rate": 0.00029496649465978635, "loss": 4.2714, "step": 210300 }, { "epoch": 33.664, "grad_norm": 0.17925389111042023, "learning_rate": 0.0002949640945637825, "loss": 4.0223, "step": 210400 }, { "epoch": 33.68, "grad_norm": 0.16378769278526306, "learning_rate": 0.0002949616944677787, "loss": 4.3686, "step": 210500 }, { "epoch": 33.696, "grad_norm": 0.13784043490886688, "learning_rate": 0.00029495929437177486, "loss": 3.9584, "step": 210600 }, { "epoch": 33.712, "grad_norm": 0.12318069487810135, "learning_rate": 0.00029495689427577103, "loss": 4.2228, "step": 210700 }, { "epoch": 33.728, "grad_norm": 0.16449718177318573, "learning_rate": 0.00029495449417976714, "loss": 4.0423, "step": 210800 }, { "epoch": 33.744, "grad_norm": 0.11204762756824493, "learning_rate": 0.0002949520940837633, "loss": 4.2005, "step": 210900 }, { "epoch": 33.76, "grad_norm": 0.12373457103967667, "learning_rate": 0.0002949496939877595, "loss": 4.1658, "step": 211000 }, { "epoch": 33.776, "grad_norm": 0.11655792593955994, "learning_rate": 0.00029494729389175565, "loss": 4.2622, "step": 211100 }, { "epoch": 33.792, "grad_norm": 0.15446454286575317, "learning_rate": 0.0002949448937957518, "loss": 4.0178, "step": 211200 }, { "epoch": 33.808, "grad_norm": 0.12194042652845383, "learning_rate": 0.00029494249369974794, "loss": 3.8066, "step": 211300 }, { "epoch": 33.824, "grad_norm": 0.11337129771709442, "learning_rate": 0.0002949400936037441, "loss": 4.2283, "step": 211400 }, { "epoch": 33.84, "grad_norm": 0.1534884124994278, "learning_rate": 0.0002949376935077403, "loss": 4.2556, "step": 211500 }, { "epoch": 33.856, "grad_norm": 0.15468348562717438, "learning_rate": 0.00029493529341173644, "loss": 4.1433, "step": 211600 }, { "epoch": 33.872, "grad_norm": 0.12156946212053299, "learning_rate": 0.0002949328933157326, "loss": 3.9688, "step": 211700 }, { "epoch": 33.888, "grad_norm": 0.13466264307498932, "learning_rate": 0.0002949304932197288, "loss": 4.3466, "step": 211800 }, { "epoch": 33.904, "grad_norm": 0.18881022930145264, "learning_rate": 0.00029492811712468497, "loss": 4.055, "step": 211900 }, { "epoch": 33.92, "grad_norm": 0.11762301623821259, "learning_rate": 0.00029492574102964116, "loss": 4.0179, "step": 212000 }, { "epoch": 33.936, "grad_norm": 0.1613592952489853, "learning_rate": 0.00029492334093363733, "loss": 3.984, "step": 212100 }, { "epoch": 33.952, "grad_norm": 0.1494964361190796, "learning_rate": 0.00029492094083763344, "loss": 4.1671, "step": 212200 }, { "epoch": 33.968, "grad_norm": 0.15549595654010773, "learning_rate": 0.0002949185407416296, "loss": 4.0406, "step": 212300 }, { "epoch": 33.984, "grad_norm": 0.10423394292593002, "learning_rate": 0.0002949161406456258, "loss": 4.1282, "step": 212400 }, { "epoch": 34.0, "grad_norm": 0.13447262346744537, "learning_rate": 0.00029491374054962195, "loss": 4.1241, "step": 212500 }, { "epoch": 34.016, "grad_norm": 0.18732032179832458, "learning_rate": 0.0002949113404536181, "loss": 4.259, "step": 212600 }, { "epoch": 34.032, "grad_norm": 0.30485400557518005, "learning_rate": 0.0002949089403576143, "loss": 4.1516, "step": 212700 }, { "epoch": 34.048, "grad_norm": 0.15403038263320923, "learning_rate": 0.0002949065402616104, "loss": 3.9734, "step": 212800 }, { "epoch": 34.064, "grad_norm": 0.15754011273384094, "learning_rate": 0.0002949041401656066, "loss": 4.2089, "step": 212900 }, { "epoch": 34.08, "grad_norm": 0.12924934923648834, "learning_rate": 0.00029490174006960274, "loss": 3.9435, "step": 213000 }, { "epoch": 34.096, "grad_norm": 0.14943893253803253, "learning_rate": 0.0002948993399735989, "loss": 4.1173, "step": 213100 }, { "epoch": 34.112, "grad_norm": 0.12638643383979797, "learning_rate": 0.0002948969398775951, "loss": 4.0159, "step": 213200 }, { "epoch": 34.128, "grad_norm": 0.13172921538352966, "learning_rate": 0.0002948945397815912, "loss": 4.2183, "step": 213300 }, { "epoch": 34.144, "grad_norm": 0.13359788060188293, "learning_rate": 0.0002948921396855874, "loss": 3.9132, "step": 213400 }, { "epoch": 34.16, "grad_norm": 0.1338624805212021, "learning_rate": 0.0002948897395895836, "loss": 4.2056, "step": 213500 }, { "epoch": 34.176, "grad_norm": 0.14948703348636627, "learning_rate": 0.00029488733949357976, "loss": 4.2255, "step": 213600 }, { "epoch": 34.192, "grad_norm": 0.10894934833049774, "learning_rate": 0.00029488493939757587, "loss": 3.9387, "step": 213700 }, { "epoch": 34.208, "grad_norm": 0.1240374892950058, "learning_rate": 0.00029488253930157204, "loss": 3.788, "step": 213800 }, { "epoch": 34.224, "grad_norm": 0.13845741748809814, "learning_rate": 0.0002948801392055682, "loss": 3.8461, "step": 213900 }, { "epoch": 34.24, "grad_norm": 0.18485264480113983, "learning_rate": 0.0002948777391095644, "loss": 4.1546, "step": 214000 }, { "epoch": 34.256, "grad_norm": 0.166182279586792, "learning_rate": 0.00029487533901356055, "loss": 3.9286, "step": 214100 }, { "epoch": 34.272, "grad_norm": 0.14583687484264374, "learning_rate": 0.00029487293891755666, "loss": 4.296, "step": 214200 }, { "epoch": 34.288, "grad_norm": 0.16349993646144867, "learning_rate": 0.00029487053882155283, "loss": 4.0985, "step": 214300 }, { "epoch": 34.304, "grad_norm": 0.13059383630752563, "learning_rate": 0.000294868138725549, "loss": 3.9836, "step": 214400 }, { "epoch": 34.32, "grad_norm": 0.1310618817806244, "learning_rate": 0.00029486573862954517, "loss": 4.3945, "step": 214500 }, { "epoch": 34.336, "grad_norm": 0.17246989905834198, "learning_rate": 0.00029486333853354134, "loss": 4.0882, "step": 214600 }, { "epoch": 34.352, "grad_norm": 0.13871462643146515, "learning_rate": 0.0002948609384375375, "loss": 3.9547, "step": 214700 }, { "epoch": 34.368, "grad_norm": 0.13195845484733582, "learning_rate": 0.0002948585383415336, "loss": 4.1859, "step": 214800 }, { "epoch": 34.384, "grad_norm": 0.15401357412338257, "learning_rate": 0.0002948561382455298, "loss": 3.9998, "step": 214900 }, { "epoch": 34.4, "grad_norm": 0.12858301401138306, "learning_rate": 0.00029485373814952596, "loss": 3.8763, "step": 215000 }, { "epoch": 34.416, "grad_norm": 0.14892907440662384, "learning_rate": 0.00029485133805352213, "loss": 4.1454, "step": 215100 }, { "epoch": 34.432, "grad_norm": 0.0917261615395546, "learning_rate": 0.0002948489379575183, "loss": 3.9588, "step": 215200 }, { "epoch": 34.448, "grad_norm": 0.20822525024414062, "learning_rate": 0.0002948465378615144, "loss": 4.0508, "step": 215300 }, { "epoch": 34.464, "grad_norm": 0.1588893085718155, "learning_rate": 0.0002948441377655106, "loss": 3.8435, "step": 215400 }, { "epoch": 34.48, "grad_norm": 0.12551400065422058, "learning_rate": 0.00029484173766950676, "loss": 3.9901, "step": 215500 }, { "epoch": 34.496, "grad_norm": 0.1471443623304367, "learning_rate": 0.0002948393375735029, "loss": 4.1671, "step": 215600 }, { "epoch": 34.512, "grad_norm": 0.1180228590965271, "learning_rate": 0.0002948369374774991, "loss": 4.0775, "step": 215700 }, { "epoch": 34.528, "grad_norm": 0.12607137858867645, "learning_rate": 0.00029483453738149526, "loss": 4.2987, "step": 215800 }, { "epoch": 34.544, "grad_norm": 0.17207618057727814, "learning_rate": 0.0002948321372854914, "loss": 4.22, "step": 215900 }, { "epoch": 34.56, "grad_norm": 0.15183746814727783, "learning_rate": 0.00029482973718948755, "loss": 4.0779, "step": 216000 }, { "epoch": 34.576, "grad_norm": 0.11966774612665176, "learning_rate": 0.0002948273370934837, "loss": 4.2382, "step": 216100 }, { "epoch": 34.592, "grad_norm": 0.1343713253736496, "learning_rate": 0.0002948249609984399, "loss": 4.086, "step": 216200 }, { "epoch": 34.608, "grad_norm": 0.14770160615444183, "learning_rate": 0.0002948225609024361, "loss": 4.0071, "step": 216300 }, { "epoch": 34.624, "grad_norm": 0.18623992800712585, "learning_rate": 0.0002948201608064322, "loss": 4.4554, "step": 216400 }, { "epoch": 34.64, "grad_norm": 0.3026518225669861, "learning_rate": 0.0002948177607104284, "loss": 4.1326, "step": 216500 }, { "epoch": 34.656, "grad_norm": 0.12520667910575867, "learning_rate": 0.0002948153606144246, "loss": 3.896, "step": 216600 }, { "epoch": 34.672, "grad_norm": 0.1331157386302948, "learning_rate": 0.00029481296051842075, "loss": 4.0022, "step": 216700 }, { "epoch": 34.688, "grad_norm": 0.14815694093704224, "learning_rate": 0.00029481056042241687, "loss": 4.0968, "step": 216800 }, { "epoch": 34.704, "grad_norm": 0.1581687331199646, "learning_rate": 0.00029480816032641303, "loss": 3.9902, "step": 216900 }, { "epoch": 34.72, "grad_norm": 0.11639413237571716, "learning_rate": 0.0002948057602304092, "loss": 3.8053, "step": 217000 }, { "epoch": 34.736, "grad_norm": 0.1016254872083664, "learning_rate": 0.0002948033841353654, "loss": 3.9166, "step": 217100 }, { "epoch": 34.752, "grad_norm": 0.1620020717382431, "learning_rate": 0.00029480098403936156, "loss": 3.824, "step": 217200 }, { "epoch": 34.768, "grad_norm": 0.1362091600894928, "learning_rate": 0.0002947985839433577, "loss": 4.0775, "step": 217300 }, { "epoch": 34.784, "grad_norm": 0.14123298227787018, "learning_rate": 0.00029479618384735385, "loss": 4.1231, "step": 217400 }, { "epoch": 34.8, "grad_norm": 0.14489391446113586, "learning_rate": 0.00029479378375135, "loss": 4.0822, "step": 217500 }, { "epoch": 34.816, "grad_norm": 0.13847863674163818, "learning_rate": 0.0002947913836553462, "loss": 4.1459, "step": 217600 }, { "epoch": 34.832, "grad_norm": 0.14205105602741241, "learning_rate": 0.00029478898355934235, "loss": 4.372, "step": 217700 }, { "epoch": 34.848, "grad_norm": 0.11645542085170746, "learning_rate": 0.0002947865834633385, "loss": 4.0315, "step": 217800 }, { "epoch": 34.864, "grad_norm": 0.12202384322881699, "learning_rate": 0.00029478418336733464, "loss": 3.9054, "step": 217900 }, { "epoch": 34.88, "grad_norm": 0.20459352433681488, "learning_rate": 0.0002947817832713308, "loss": 3.7618, "step": 218000 }, { "epoch": 34.896, "grad_norm": 0.15570496022701263, "learning_rate": 0.000294779383175327, "loss": 3.8624, "step": 218100 }, { "epoch": 34.912, "grad_norm": 0.12296664714813232, "learning_rate": 0.00029477698307932315, "loss": 4.0208, "step": 218200 }, { "epoch": 34.928, "grad_norm": 0.11334937810897827, "learning_rate": 0.0002947745829833193, "loss": 3.904, "step": 218300 }, { "epoch": 34.944, "grad_norm": 0.1409468650817871, "learning_rate": 0.00029477218288731543, "loss": 4.363, "step": 218400 }, { "epoch": 34.96, "grad_norm": 0.1470145583152771, "learning_rate": 0.0002947697827913116, "loss": 4.054, "step": 218500 }, { "epoch": 34.976, "grad_norm": 0.13652758300304413, "learning_rate": 0.00029476738269530777, "loss": 4.0239, "step": 218600 }, { "epoch": 34.992, "grad_norm": 0.13240677118301392, "learning_rate": 0.00029476498259930394, "loss": 4.3156, "step": 218700 }, { "epoch": 35.008, "grad_norm": 0.13786444067955017, "learning_rate": 0.0002947625825033001, "loss": 3.6274, "step": 218800 }, { "epoch": 35.024, "grad_norm": 0.12573717534542084, "learning_rate": 0.0002947601824072963, "loss": 3.6958, "step": 218900 }, { "epoch": 35.04, "grad_norm": 0.17400629818439484, "learning_rate": 0.0002947577823112924, "loss": 3.7097, "step": 219000 }, { "epoch": 35.056, "grad_norm": 0.16587473452091217, "learning_rate": 0.00029475538221528856, "loss": 3.9811, "step": 219100 }, { "epoch": 35.072, "grad_norm": 0.13060854375362396, "learning_rate": 0.00029475298211928473, "loss": 4.0015, "step": 219200 }, { "epoch": 35.088, "grad_norm": 0.1316165030002594, "learning_rate": 0.0002947505820232809, "loss": 3.9748, "step": 219300 }, { "epoch": 35.104, "grad_norm": 0.14657026529312134, "learning_rate": 0.00029474818192727707, "loss": 3.8762, "step": 219400 }, { "epoch": 35.12, "grad_norm": 0.13535165786743164, "learning_rate": 0.0002947457818312732, "loss": 4.0664, "step": 219500 }, { "epoch": 35.136, "grad_norm": 0.14600715041160583, "learning_rate": 0.0002947433817352694, "loss": 4.2579, "step": 219600 }, { "epoch": 35.152, "grad_norm": 0.15709176659584045, "learning_rate": 0.0002947409816392656, "loss": 3.9654, "step": 219700 }, { "epoch": 35.168, "grad_norm": 0.16152530908584595, "learning_rate": 0.00029473858154326174, "loss": 3.9506, "step": 219800 }, { "epoch": 35.184, "grad_norm": 0.1502491682767868, "learning_rate": 0.00029473618144725786, "loss": 4.0792, "step": 219900 }, { "epoch": 35.2, "grad_norm": 0.17572453618049622, "learning_rate": 0.00029473378135125403, "loss": 3.8552, "step": 220000 }, { "epoch": 35.216, "grad_norm": 0.1262667030096054, "learning_rate": 0.0002947313812552502, "loss": 3.8581, "step": 220100 }, { "epoch": 35.232, "grad_norm": 0.12413062155246735, "learning_rate": 0.00029472898115924637, "loss": 3.8523, "step": 220200 }, { "epoch": 35.248, "grad_norm": 0.15677058696746826, "learning_rate": 0.00029472658106324254, "loss": 3.9481, "step": 220300 }, { "epoch": 35.264, "grad_norm": 0.12353496253490448, "learning_rate": 0.00029472418096723865, "loss": 4.1325, "step": 220400 }, { "epoch": 35.28, "grad_norm": 0.15826676785945892, "learning_rate": 0.0002947217808712348, "loss": 4.1481, "step": 220500 }, { "epoch": 35.296, "grad_norm": 0.1419243961572647, "learning_rate": 0.000294719380775231, "loss": 3.9928, "step": 220600 }, { "epoch": 35.312, "grad_norm": 0.1518523097038269, "learning_rate": 0.00029471698067922716, "loss": 4.0497, "step": 220700 }, { "epoch": 35.328, "grad_norm": 0.14802731573581696, "learning_rate": 0.00029471458058322333, "loss": 4.0123, "step": 220800 }, { "epoch": 35.344, "grad_norm": 0.11468464136123657, "learning_rate": 0.0002947121804872195, "loss": 4.0139, "step": 220900 }, { "epoch": 35.36, "grad_norm": 0.12150175869464874, "learning_rate": 0.0002947097803912156, "loss": 4.0437, "step": 221000 }, { "epoch": 35.376, "grad_norm": 0.15461412072181702, "learning_rate": 0.0002947074042961718, "loss": 3.8937, "step": 221100 }, { "epoch": 35.392, "grad_norm": 0.12761832773685455, "learning_rate": 0.00029470500420016797, "loss": 4.0424, "step": 221200 }, { "epoch": 35.408, "grad_norm": 0.12180495262145996, "learning_rate": 0.00029470260410416414, "loss": 3.9253, "step": 221300 }, { "epoch": 35.424, "grad_norm": 0.16864702105522156, "learning_rate": 0.0002947002040081603, "loss": 3.9617, "step": 221400 }, { "epoch": 35.44, "grad_norm": 0.12439541518688202, "learning_rate": 0.0002946978039121564, "loss": 4.2194, "step": 221500 }, { "epoch": 35.456, "grad_norm": 0.13322286307811737, "learning_rate": 0.0002946954038161526, "loss": 3.9745, "step": 221600 }, { "epoch": 35.472, "grad_norm": 0.10073237866163254, "learning_rate": 0.00029469300372014876, "loss": 3.9598, "step": 221700 }, { "epoch": 35.488, "grad_norm": 0.14016889035701752, "learning_rate": 0.00029469060362414493, "loss": 4.0477, "step": 221800 }, { "epoch": 35.504, "grad_norm": 0.09197822958230972, "learning_rate": 0.0002946882035281411, "loss": 4.022, "step": 221900 }, { "epoch": 35.52, "grad_norm": 0.13880948722362518, "learning_rate": 0.00029468580343213727, "loss": 3.8861, "step": 222000 }, { "epoch": 35.536, "grad_norm": 0.171486496925354, "learning_rate": 0.0002946834033361334, "loss": 4.0127, "step": 222100 }, { "epoch": 35.552, "grad_norm": 0.12463164329528809, "learning_rate": 0.00029468100324012955, "loss": 4.1731, "step": 222200 }, { "epoch": 35.568, "grad_norm": 0.10366872698068619, "learning_rate": 0.0002946786031441257, "loss": 4.1263, "step": 222300 }, { "epoch": 35.584, "grad_norm": 0.112830750644207, "learning_rate": 0.0002946762030481219, "loss": 3.9893, "step": 222400 }, { "epoch": 35.6, "grad_norm": 0.21659503877162933, "learning_rate": 0.00029467380295211806, "loss": 3.8116, "step": 222500 }, { "epoch": 35.616, "grad_norm": 0.14348146319389343, "learning_rate": 0.00029467140285611423, "loss": 4.2375, "step": 222600 }, { "epoch": 35.632, "grad_norm": 0.10218438506126404, "learning_rate": 0.00029466900276011034, "loss": 4.1607, "step": 222700 }, { "epoch": 35.648, "grad_norm": 0.14614859223365784, "learning_rate": 0.00029466660266410657, "loss": 3.9639, "step": 222800 }, { "epoch": 35.664, "grad_norm": 0.13791266083717346, "learning_rate": 0.00029466420256810274, "loss": 4.1592, "step": 222900 }, { "epoch": 35.68, "grad_norm": 0.12821194529533386, "learning_rate": 0.00029466180247209885, "loss": 3.9398, "step": 223000 }, { "epoch": 35.696, "grad_norm": 0.14080853760242462, "learning_rate": 0.000294659402376095, "loss": 4.002, "step": 223100 }, { "epoch": 35.712, "grad_norm": 0.1372658759355545, "learning_rate": 0.0002946570022800912, "loss": 3.8826, "step": 223200 }, { "epoch": 35.728, "grad_norm": 0.15756765007972717, "learning_rate": 0.00029465460218408736, "loss": 3.924, "step": 223300 }, { "epoch": 35.744, "grad_norm": 0.14242912828922272, "learning_rate": 0.00029465220208808353, "loss": 4.0292, "step": 223400 }, { "epoch": 35.76, "grad_norm": 0.11862137168645859, "learning_rate": 0.00029464980199207964, "loss": 3.8352, "step": 223500 }, { "epoch": 35.776, "grad_norm": 0.13872364163398743, "learning_rate": 0.0002946474018960758, "loss": 3.8637, "step": 223600 }, { "epoch": 35.792, "grad_norm": 0.11519769579172134, "learning_rate": 0.000294645001800072, "loss": 4.1729, "step": 223700 }, { "epoch": 35.808, "grad_norm": 0.14018337428569794, "learning_rate": 0.00029464260170406815, "loss": 3.9018, "step": 223800 }, { "epoch": 35.824, "grad_norm": 0.11294206976890564, "learning_rate": 0.0002946402016080643, "loss": 3.5407, "step": 223900 }, { "epoch": 35.84, "grad_norm": 0.12088992446660995, "learning_rate": 0.0002946378015120605, "loss": 3.9882, "step": 224000 }, { "epoch": 35.856, "grad_norm": 0.1576825976371765, "learning_rate": 0.0002946354014160566, "loss": 3.8303, "step": 224100 }, { "epoch": 35.872, "grad_norm": 0.11264470964670181, "learning_rate": 0.0002946330013200528, "loss": 4.1679, "step": 224200 }, { "epoch": 35.888, "grad_norm": 0.16877155005931854, "learning_rate": 0.00029463060122404894, "loss": 3.8069, "step": 224300 }, { "epoch": 35.904, "grad_norm": 0.1412443071603775, "learning_rate": 0.0002946282011280451, "loss": 4.0674, "step": 224400 }, { "epoch": 35.92, "grad_norm": 0.12756004929542542, "learning_rate": 0.0002946258010320413, "loss": 4.0602, "step": 224500 }, { "epoch": 35.936, "grad_norm": 0.08994324505329132, "learning_rate": 0.00029462340093603745, "loss": 3.6892, "step": 224600 }, { "epoch": 35.952, "grad_norm": 0.12541432678699493, "learning_rate": 0.00029462100084003357, "loss": 3.9669, "step": 224700 }, { "epoch": 35.968, "grad_norm": 0.13347621262073517, "learning_rate": 0.00029461860074402973, "loss": 4.1704, "step": 224800 }, { "epoch": 35.984, "grad_norm": 0.1366623342037201, "learning_rate": 0.0002946162006480259, "loss": 3.6926, "step": 224900 }, { "epoch": 36.0, "grad_norm": 0.17399033904075623, "learning_rate": 0.0002946138005520221, "loss": 3.8951, "step": 225000 }, { "epoch": 36.016, "grad_norm": 0.133558988571167, "learning_rate": 0.00029461140045601824, "loss": 3.4914, "step": 225100 }, { "epoch": 36.032, "grad_norm": 0.11081928759813309, "learning_rate": 0.00029460900036001436, "loss": 3.9312, "step": 225200 }, { "epoch": 36.048, "grad_norm": 0.18542762100696564, "learning_rate": 0.0002946066002640105, "loss": 3.8511, "step": 225300 }, { "epoch": 36.064, "grad_norm": 0.15145191550254822, "learning_rate": 0.0002946042001680067, "loss": 4.2243, "step": 225400 }, { "epoch": 36.08, "grad_norm": 0.09183911979198456, "learning_rate": 0.0002946018240729629, "loss": 3.7616, "step": 225500 }, { "epoch": 36.096, "grad_norm": 0.12848086655139923, "learning_rate": 0.00029459942397695905, "loss": 3.9567, "step": 225600 }, { "epoch": 36.112, "grad_norm": 0.135446697473526, "learning_rate": 0.00029459704788191524, "loss": 3.7831, "step": 225700 }, { "epoch": 36.128, "grad_norm": 0.14743280410766602, "learning_rate": 0.0002945946477859114, "loss": 3.7522, "step": 225800 }, { "epoch": 36.144, "grad_norm": 0.17610248923301697, "learning_rate": 0.0002945922476899076, "loss": 4.0898, "step": 225900 }, { "epoch": 36.16, "grad_norm": 0.13490499556064606, "learning_rate": 0.00029458984759390375, "loss": 3.9141, "step": 226000 }, { "epoch": 36.176, "grad_norm": 0.13071954250335693, "learning_rate": 0.00029458744749789986, "loss": 3.7696, "step": 226100 }, { "epoch": 36.192, "grad_norm": 0.13196925818920135, "learning_rate": 0.00029458504740189603, "loss": 4.0474, "step": 226200 }, { "epoch": 36.208, "grad_norm": 0.12276848405599594, "learning_rate": 0.0002945826473058922, "loss": 3.826, "step": 226300 }, { "epoch": 36.224, "grad_norm": 0.14857009053230286, "learning_rate": 0.00029458024720988837, "loss": 4.2173, "step": 226400 }, { "epoch": 36.24, "grad_norm": 0.1347169578075409, "learning_rate": 0.00029457784711388454, "loss": 3.9464, "step": 226500 }, { "epoch": 36.256, "grad_norm": 0.15719103813171387, "learning_rate": 0.0002945754470178807, "loss": 4.2225, "step": 226600 }, { "epoch": 36.272, "grad_norm": 0.16743576526641846, "learning_rate": 0.0002945730469218768, "loss": 3.9054, "step": 226700 }, { "epoch": 36.288, "grad_norm": 0.10689014941453934, "learning_rate": 0.000294570646825873, "loss": 3.8102, "step": 226800 }, { "epoch": 36.304, "grad_norm": 0.12143420428037643, "learning_rate": 0.00029456824672986916, "loss": 3.9324, "step": 226900 }, { "epoch": 36.32, "grad_norm": 0.12312216311693192, "learning_rate": 0.00029456584663386533, "loss": 3.6197, "step": 227000 }, { "epoch": 36.336, "grad_norm": 0.13755667209625244, "learning_rate": 0.0002945634465378615, "loss": 3.8858, "step": 227100 }, { "epoch": 36.352, "grad_norm": 0.17473535239696503, "learning_rate": 0.0002945610464418576, "loss": 3.8713, "step": 227200 }, { "epoch": 36.368, "grad_norm": 0.13464218378067017, "learning_rate": 0.0002945586463458538, "loss": 4.0924, "step": 227300 }, { "epoch": 36.384, "grad_norm": 0.14463555812835693, "learning_rate": 0.00029455624624984996, "loss": 3.9226, "step": 227400 }, { "epoch": 36.4, "grad_norm": 0.12049020826816559, "learning_rate": 0.0002945538461538461, "loss": 3.9616, "step": 227500 }, { "epoch": 36.416, "grad_norm": 0.14573007822036743, "learning_rate": 0.0002945514460578423, "loss": 3.9403, "step": 227600 }, { "epoch": 36.432, "grad_norm": 0.1761842966079712, "learning_rate": 0.00029454904596183846, "loss": 4.2201, "step": 227700 }, { "epoch": 36.448, "grad_norm": 0.12458252161741257, "learning_rate": 0.0002945466458658346, "loss": 4.1187, "step": 227800 }, { "epoch": 36.464, "grad_norm": 0.11267081648111343, "learning_rate": 0.00029454424576983075, "loss": 3.8796, "step": 227900 }, { "epoch": 36.48, "grad_norm": 0.20789049565792084, "learning_rate": 0.0002945418456738269, "loss": 3.7935, "step": 228000 }, { "epoch": 36.496, "grad_norm": 0.14261110126972198, "learning_rate": 0.0002945394455778231, "loss": 3.9474, "step": 228100 }, { "epoch": 36.512, "grad_norm": 0.12748025357723236, "learning_rate": 0.0002945370694827793, "loss": 4.0566, "step": 228200 }, { "epoch": 36.528, "grad_norm": 0.1438203603029251, "learning_rate": 0.00029453466938677544, "loss": 4.0041, "step": 228300 }, { "epoch": 36.544, "grad_norm": 0.12537771463394165, "learning_rate": 0.0002945322692907716, "loss": 3.8083, "step": 228400 }, { "epoch": 36.56, "grad_norm": 0.13745057582855225, "learning_rate": 0.0002945298691947678, "loss": 3.9805, "step": 228500 }, { "epoch": 36.576, "grad_norm": 0.1653604805469513, "learning_rate": 0.00029452746909876395, "loss": 3.9848, "step": 228600 }, { "epoch": 36.592, "grad_norm": 0.13784770667552948, "learning_rate": 0.00029452506900276007, "loss": 4.0744, "step": 228700 }, { "epoch": 36.608, "grad_norm": 0.1534193605184555, "learning_rate": 0.00029452266890675623, "loss": 3.8756, "step": 228800 }, { "epoch": 36.624, "grad_norm": 0.15484607219696045, "learning_rate": 0.0002945202688107524, "loss": 4.0308, "step": 228900 }, { "epoch": 36.64, "grad_norm": 0.14803513884544373, "learning_rate": 0.0002945178687147486, "loss": 3.9649, "step": 229000 }, { "epoch": 36.656, "grad_norm": 0.16564255952835083, "learning_rate": 0.00029451549261970476, "loss": 3.7631, "step": 229100 }, { "epoch": 36.672, "grad_norm": 0.15230271220207214, "learning_rate": 0.00029451309252370093, "loss": 3.8678, "step": 229200 }, { "epoch": 36.688, "grad_norm": 0.20385032892227173, "learning_rate": 0.0002945106924276971, "loss": 3.9139, "step": 229300 }, { "epoch": 36.704, "grad_norm": 0.13889221847057343, "learning_rate": 0.00029450829233169327, "loss": 3.8816, "step": 229400 }, { "epoch": 36.72, "grad_norm": 0.1421527862548828, "learning_rate": 0.0002945058922356894, "loss": 3.7661, "step": 229500 }, { "epoch": 36.736, "grad_norm": 0.13975366950035095, "learning_rate": 0.00029450349213968555, "loss": 3.7392, "step": 229600 }, { "epoch": 36.752, "grad_norm": 0.11928146332502365, "learning_rate": 0.0002945010920436817, "loss": 4.11, "step": 229700 }, { "epoch": 36.768, "grad_norm": 0.14216962456703186, "learning_rate": 0.0002944986919476779, "loss": 3.8369, "step": 229800 }, { "epoch": 36.784, "grad_norm": 0.1422761231660843, "learning_rate": 0.00029449629185167406, "loss": 3.4156, "step": 229900 }, { "epoch": 36.8, "grad_norm": 0.1502903699874878, "learning_rate": 0.00029449389175567023, "loss": 3.8996, "step": 230000 }, { "epoch": 36.816, "grad_norm": 0.10828652232885361, "learning_rate": 0.00029449149165966635, "loss": 4.2215, "step": 230100 }, { "epoch": 36.832, "grad_norm": 0.16323305666446686, "learning_rate": 0.0002944890915636625, "loss": 3.7405, "step": 230200 }, { "epoch": 36.848, "grad_norm": 0.22473923861980438, "learning_rate": 0.0002944866914676587, "loss": 4.1824, "step": 230300 }, { "epoch": 36.864, "grad_norm": 0.14632506668567657, "learning_rate": 0.00029448429137165485, "loss": 3.9567, "step": 230400 }, { "epoch": 36.88, "grad_norm": 0.12744389474391937, "learning_rate": 0.000294481891275651, "loss": 4.1179, "step": 230500 }, { "epoch": 36.896, "grad_norm": 0.12241805344820023, "learning_rate": 0.0002944794911796472, "loss": 3.8339, "step": 230600 }, { "epoch": 36.912, "grad_norm": 0.13950026035308838, "learning_rate": 0.0002944770910836433, "loss": 3.6589, "step": 230700 }, { "epoch": 36.928, "grad_norm": 0.15407587587833405, "learning_rate": 0.0002944746909876395, "loss": 4.2868, "step": 230800 }, { "epoch": 36.944, "grad_norm": 0.12287917733192444, "learning_rate": 0.00029447229089163564, "loss": 3.9783, "step": 230900 }, { "epoch": 36.96, "grad_norm": 0.14091332256793976, "learning_rate": 0.0002944698907956318, "loss": 4.1851, "step": 231000 }, { "epoch": 36.976, "grad_norm": 0.09322869777679443, "learning_rate": 0.000294467490699628, "loss": 3.9702, "step": 231100 }, { "epoch": 36.992, "grad_norm": 0.1574215143918991, "learning_rate": 0.0002944650906036241, "loss": 3.87, "step": 231200 }, { "epoch": 37.008, "grad_norm": 0.11709416657686234, "learning_rate": 0.00029446269050762027, "loss": 3.8988, "step": 231300 }, { "epoch": 37.024, "grad_norm": 0.15440665185451508, "learning_rate": 0.00029446029041161644, "loss": 3.9288, "step": 231400 }, { "epoch": 37.04, "grad_norm": 0.14457173645496368, "learning_rate": 0.0002944578903156126, "loss": 4.1169, "step": 231500 }, { "epoch": 37.056, "grad_norm": 0.12255891412496567, "learning_rate": 0.0002944554902196088, "loss": 3.8575, "step": 231600 }, { "epoch": 37.072, "grad_norm": 0.12257202714681625, "learning_rate": 0.00029445309012360494, "loss": 3.9594, "step": 231700 }, { "epoch": 37.088, "grad_norm": 0.15330275893211365, "learning_rate": 0.00029445069002760106, "loss": 3.7446, "step": 231800 }, { "epoch": 37.104, "grad_norm": 0.21277838945388794, "learning_rate": 0.00029444828993159723, "loss": 3.7117, "step": 231900 }, { "epoch": 37.12, "grad_norm": 0.1602598875761032, "learning_rate": 0.0002944458898355934, "loss": 4.0417, "step": 232000 }, { "epoch": 37.136, "grad_norm": 0.15083283185958862, "learning_rate": 0.00029444348973958957, "loss": 3.942, "step": 232100 }, { "epoch": 37.152, "grad_norm": 0.1540352702140808, "learning_rate": 0.00029444108964358574, "loss": 4.0611, "step": 232200 }, { "epoch": 37.168, "grad_norm": 0.16435472667217255, "learning_rate": 0.00029443868954758185, "loss": 3.9479, "step": 232300 }, { "epoch": 37.184, "grad_norm": 0.1132495179772377, "learning_rate": 0.000294436289451578, "loss": 3.7645, "step": 232400 }, { "epoch": 37.2, "grad_norm": 0.11963652074337006, "learning_rate": 0.0002944338893555742, "loss": 4.051, "step": 232500 }, { "epoch": 37.216, "grad_norm": 0.16020596027374268, "learning_rate": 0.00029443148925957036, "loss": 3.7861, "step": 232600 }, { "epoch": 37.232, "grad_norm": 0.1341710090637207, "learning_rate": 0.00029442908916356653, "loss": 3.784, "step": 232700 }, { "epoch": 37.248, "grad_norm": 0.19433575868606567, "learning_rate": 0.0002944266890675627, "loss": 4.0236, "step": 232800 }, { "epoch": 37.264, "grad_norm": 0.10191713273525238, "learning_rate": 0.0002944242889715588, "loss": 3.876, "step": 232900 }, { "epoch": 37.28, "grad_norm": 0.14330856502056122, "learning_rate": 0.000294421888875555, "loss": 3.9151, "step": 233000 }, { "epoch": 37.296, "grad_norm": 0.1420859545469284, "learning_rate": 0.00029441948877955115, "loss": 3.9934, "step": 233100 }, { "epoch": 37.312, "grad_norm": 0.13155090808868408, "learning_rate": 0.00029441711268450734, "loss": 4.0392, "step": 233200 }, { "epoch": 37.328, "grad_norm": 0.1243603527545929, "learning_rate": 0.0002944147125885035, "loss": 4.0342, "step": 233300 }, { "epoch": 37.344, "grad_norm": 0.12558481097221375, "learning_rate": 0.0002944123124924997, "loss": 3.8491, "step": 233400 }, { "epoch": 37.36, "grad_norm": 0.15857043862342834, "learning_rate": 0.00029440991239649585, "loss": 4.1648, "step": 233500 }, { "epoch": 37.376, "grad_norm": 0.13253138959407806, "learning_rate": 0.000294407512300492, "loss": 4.0209, "step": 233600 }, { "epoch": 37.392, "grad_norm": 0.1475256234407425, "learning_rate": 0.0002944051122044882, "loss": 3.6073, "step": 233700 }, { "epoch": 37.408, "grad_norm": 0.14224381744861603, "learning_rate": 0.0002944027121084843, "loss": 3.9372, "step": 233800 }, { "epoch": 37.424, "grad_norm": 0.16577352583408356, "learning_rate": 0.00029440031201248047, "loss": 4.0985, "step": 233900 }, { "epoch": 37.44, "grad_norm": 0.1617100089788437, "learning_rate": 0.00029439791191647664, "loss": 3.6574, "step": 234000 }, { "epoch": 37.456, "grad_norm": 0.16101917624473572, "learning_rate": 0.0002943955118204728, "loss": 3.8793, "step": 234100 }, { "epoch": 37.472, "grad_norm": 0.10672401636838913, "learning_rate": 0.000294393111724469, "loss": 3.9097, "step": 234200 }, { "epoch": 37.488, "grad_norm": 0.15882691740989685, "learning_rate": 0.0002943907116284651, "loss": 4.1482, "step": 234300 }, { "epoch": 37.504, "grad_norm": 0.13513688743114471, "learning_rate": 0.00029438831153246126, "loss": 3.5087, "step": 234400 }, { "epoch": 37.52, "grad_norm": 0.128375843167305, "learning_rate": 0.00029438591143645743, "loss": 3.9971, "step": 234500 }, { "epoch": 37.536, "grad_norm": 0.1471218466758728, "learning_rate": 0.0002943835113404536, "loss": 3.5767, "step": 234600 }, { "epoch": 37.552, "grad_norm": 0.1631135642528534, "learning_rate": 0.00029438111124444977, "loss": 3.879, "step": 234700 }, { "epoch": 37.568, "grad_norm": 0.19412393867969513, "learning_rate": 0.00029437871114844594, "loss": 3.7831, "step": 234800 }, { "epoch": 37.584, "grad_norm": 0.14130400121212006, "learning_rate": 0.00029437631105244205, "loss": 3.7503, "step": 234900 }, { "epoch": 37.6, "grad_norm": 0.1691070795059204, "learning_rate": 0.0002943739109564382, "loss": 3.9815, "step": 235000 }, { "epoch": 37.616, "grad_norm": 0.12281352281570435, "learning_rate": 0.0002943715108604344, "loss": 4.0573, "step": 235100 }, { "epoch": 37.632, "grad_norm": 0.13063079118728638, "learning_rate": 0.00029436911076443056, "loss": 3.6936, "step": 235200 }, { "epoch": 37.648, "grad_norm": 0.1277218759059906, "learning_rate": 0.00029436671066842673, "loss": 4.0944, "step": 235300 }, { "epoch": 37.664, "grad_norm": 0.13247045874595642, "learning_rate": 0.00029436431057242284, "loss": 3.6067, "step": 235400 }, { "epoch": 37.68, "grad_norm": 0.23526181280612946, "learning_rate": 0.000294361910476419, "loss": 3.8545, "step": 235500 }, { "epoch": 37.696, "grad_norm": 0.12313655018806458, "learning_rate": 0.00029435953438137526, "loss": 3.6527, "step": 235600 }, { "epoch": 37.712, "grad_norm": 0.13720963895320892, "learning_rate": 0.0002943571342853714, "loss": 3.9405, "step": 235700 }, { "epoch": 37.728, "grad_norm": 0.13914041221141815, "learning_rate": 0.00029435473418936754, "loss": 3.8785, "step": 235800 }, { "epoch": 37.744, "grad_norm": 0.11534754186868668, "learning_rate": 0.0002943523340933637, "loss": 4.0382, "step": 235900 }, { "epoch": 37.76, "grad_norm": 0.1204744353890419, "learning_rate": 0.0002943499339973599, "loss": 3.9121, "step": 236000 }, { "epoch": 37.776, "grad_norm": 0.11999385803937912, "learning_rate": 0.00029434753390135605, "loss": 4.0028, "step": 236100 }, { "epoch": 37.792, "grad_norm": 0.10596102476119995, "learning_rate": 0.0002943451338053522, "loss": 3.8203, "step": 236200 }, { "epoch": 37.808, "grad_norm": 0.16842292249202728, "learning_rate": 0.00029434273370934833, "loss": 3.8712, "step": 236300 }, { "epoch": 37.824, "grad_norm": 0.11650809645652771, "learning_rate": 0.0002943403336133445, "loss": 3.8061, "step": 236400 }, { "epoch": 37.84, "grad_norm": 0.6413545608520508, "learning_rate": 0.00029433793351734067, "loss": 3.642, "step": 236500 }, { "epoch": 37.856, "grad_norm": 0.13304497301578522, "learning_rate": 0.00029433553342133684, "loss": 3.8441, "step": 236600 }, { "epoch": 37.872, "grad_norm": 0.10673511773347855, "learning_rate": 0.000294333133325333, "loss": 3.8872, "step": 236700 }, { "epoch": 37.888, "grad_norm": 0.23516732454299927, "learning_rate": 0.0002943307332293292, "loss": 3.8594, "step": 236800 }, { "epoch": 37.904, "grad_norm": 0.14567965269088745, "learning_rate": 0.0002943283331333253, "loss": 4.0177, "step": 236900 }, { "epoch": 37.92, "grad_norm": 0.14179280400276184, "learning_rate": 0.00029432593303732146, "loss": 4.0851, "step": 237000 }, { "epoch": 37.936, "grad_norm": 0.1587454080581665, "learning_rate": 0.00029432353294131763, "loss": 3.9843, "step": 237100 }, { "epoch": 37.952, "grad_norm": 0.16633637249469757, "learning_rate": 0.0002943211328453138, "loss": 3.9048, "step": 237200 }, { "epoch": 37.968, "grad_norm": 0.14927643537521362, "learning_rate": 0.00029431873274930997, "loss": 4.029, "step": 237300 }, { "epoch": 37.984, "grad_norm": 0.12191181629896164, "learning_rate": 0.0002943163326533061, "loss": 3.9872, "step": 237400 }, { "epoch": 38.0, "grad_norm": 0.13742613792419434, "learning_rate": 0.00029431393255730225, "loss": 3.8581, "step": 237500 }, { "epoch": 38.016, "grad_norm": 0.15457387268543243, "learning_rate": 0.0002943115324612984, "loss": 3.4112, "step": 237600 }, { "epoch": 38.032, "grad_norm": 0.20382297039031982, "learning_rate": 0.0002943091323652946, "loss": 3.6149, "step": 237700 }, { "epoch": 38.048, "grad_norm": 0.12244167178869247, "learning_rate": 0.00029430673226929076, "loss": 3.751, "step": 237800 }, { "epoch": 38.064, "grad_norm": 0.14330147206783295, "learning_rate": 0.00029430433217328693, "loss": 3.7122, "step": 237900 }, { "epoch": 38.08, "grad_norm": 0.28787901997566223, "learning_rate": 0.0002943019560782431, "loss": 3.6369, "step": 238000 }, { "epoch": 38.096, "grad_norm": 0.16729654371738434, "learning_rate": 0.0002942995559822393, "loss": 3.6873, "step": 238100 }, { "epoch": 38.112, "grad_norm": 0.1431603729724884, "learning_rate": 0.00029429715588623546, "loss": 3.6741, "step": 238200 }, { "epoch": 38.128, "grad_norm": 0.14671485126018524, "learning_rate": 0.00029429475579023157, "loss": 3.8048, "step": 238300 }, { "epoch": 38.144, "grad_norm": 0.12680453062057495, "learning_rate": 0.00029429235569422774, "loss": 3.648, "step": 238400 }, { "epoch": 38.16, "grad_norm": 0.11994794756174088, "learning_rate": 0.0002942899555982239, "loss": 3.7326, "step": 238500 }, { "epoch": 38.176, "grad_norm": 0.13523763418197632, "learning_rate": 0.0002942875555022201, "loss": 4.0274, "step": 238600 }, { "epoch": 38.192, "grad_norm": 0.15768542885780334, "learning_rate": 0.00029428515540621625, "loss": 3.8379, "step": 238700 }, { "epoch": 38.208, "grad_norm": 0.15748849511146545, "learning_rate": 0.0002942827553102124, "loss": 4.1857, "step": 238800 }, { "epoch": 38.224, "grad_norm": 0.14461331069469452, "learning_rate": 0.00029428035521420853, "loss": 3.6514, "step": 238900 }, { "epoch": 38.24, "grad_norm": 0.0995117649435997, "learning_rate": 0.0002942779551182047, "loss": 3.5997, "step": 239000 }, { "epoch": 38.256, "grad_norm": 0.1244683489203453, "learning_rate": 0.00029427555502220087, "loss": 3.6721, "step": 239100 }, { "epoch": 38.272, "grad_norm": 0.16178888082504272, "learning_rate": 0.00029427315492619704, "loss": 3.9333, "step": 239200 }, { "epoch": 38.288, "grad_norm": 0.11731797456741333, "learning_rate": 0.0002942707548301932, "loss": 3.6787, "step": 239300 }, { "epoch": 38.304, "grad_norm": 0.16544343531131744, "learning_rate": 0.0002942683547341893, "loss": 3.6318, "step": 239400 }, { "epoch": 38.32, "grad_norm": 0.16252316534519196, "learning_rate": 0.0002942659546381855, "loss": 3.7224, "step": 239500 }, { "epoch": 38.336, "grad_norm": 0.12585404515266418, "learning_rate": 0.00029426355454218166, "loss": 3.9481, "step": 239600 }, { "epoch": 38.352, "grad_norm": 0.17890673875808716, "learning_rate": 0.00029426115444617783, "loss": 3.6491, "step": 239700 }, { "epoch": 38.368, "grad_norm": 0.15015524625778198, "learning_rate": 0.000294258754350174, "loss": 3.8047, "step": 239800 }, { "epoch": 38.384, "grad_norm": 0.16843190789222717, "learning_rate": 0.00029425635425417017, "loss": 3.846, "step": 239900 }, { "epoch": 38.4, "grad_norm": 0.1351003646850586, "learning_rate": 0.0002942539541581663, "loss": 3.833, "step": 240000 }, { "epoch": 38.416, "grad_norm": 0.18114784359931946, "learning_rate": 0.00029425155406216245, "loss": 3.9342, "step": 240100 }, { "epoch": 38.432, "grad_norm": 0.14112015068531036, "learning_rate": 0.0002942491539661586, "loss": 3.9637, "step": 240200 }, { "epoch": 38.448, "grad_norm": 0.1418529450893402, "learning_rate": 0.0002942467538701548, "loss": 3.8524, "step": 240300 }, { "epoch": 38.464, "grad_norm": 0.15976481139659882, "learning_rate": 0.00029424435377415096, "loss": 3.7826, "step": 240400 }, { "epoch": 38.48, "grad_norm": 0.1492980718612671, "learning_rate": 0.0002942419536781471, "loss": 3.8872, "step": 240500 }, { "epoch": 38.496, "grad_norm": 0.14472247660160065, "learning_rate": 0.00029423955358214325, "loss": 3.8882, "step": 240600 }, { "epoch": 38.512, "grad_norm": 0.12396157532930374, "learning_rate": 0.0002942371534861394, "loss": 3.8247, "step": 240700 }, { "epoch": 38.528, "grad_norm": 0.15059390664100647, "learning_rate": 0.00029423477739109566, "loss": 3.7262, "step": 240800 }, { "epoch": 38.544, "grad_norm": 0.14525267481803894, "learning_rate": 0.0002942323772950918, "loss": 3.6061, "step": 240900 }, { "epoch": 38.56, "grad_norm": 0.1356857717037201, "learning_rate": 0.00029422997719908794, "loss": 3.8736, "step": 241000 }, { "epoch": 38.576, "grad_norm": 0.12578502297401428, "learning_rate": 0.0002942275771030841, "loss": 4.0134, "step": 241100 }, { "epoch": 38.592, "grad_norm": 0.15805451571941376, "learning_rate": 0.0002942251770070803, "loss": 3.8061, "step": 241200 }, { "epoch": 38.608, "grad_norm": 0.2903938591480255, "learning_rate": 0.00029422277691107645, "loss": 3.684, "step": 241300 }, { "epoch": 38.624, "grad_norm": 0.15910255908966064, "learning_rate": 0.00029422037681507257, "loss": 3.867, "step": 241400 }, { "epoch": 38.64, "grad_norm": 0.12789271771907806, "learning_rate": 0.00029421797671906873, "loss": 3.7186, "step": 241500 }, { "epoch": 38.656, "grad_norm": 0.14649321138858795, "learning_rate": 0.0002942155766230649, "loss": 3.8367, "step": 241600 }, { "epoch": 38.672, "grad_norm": 0.11850130558013916, "learning_rate": 0.00029421317652706107, "loss": 3.6316, "step": 241700 }, { "epoch": 38.688, "grad_norm": 0.14355382323265076, "learning_rate": 0.00029421077643105724, "loss": 3.4228, "step": 241800 }, { "epoch": 38.704, "grad_norm": 0.12180539965629578, "learning_rate": 0.0002942083763350534, "loss": 3.7536, "step": 241900 }, { "epoch": 38.72, "grad_norm": 0.13362665474414825, "learning_rate": 0.0002942059762390495, "loss": 3.423, "step": 242000 }, { "epoch": 38.736, "grad_norm": 0.12294348329305649, "learning_rate": 0.0002942035761430457, "loss": 3.7567, "step": 242100 }, { "epoch": 38.752, "grad_norm": 0.12114428728818893, "learning_rate": 0.00029420117604704186, "loss": 3.7296, "step": 242200 }, { "epoch": 38.768, "grad_norm": 0.1333882212638855, "learning_rate": 0.00029419877595103803, "loss": 3.4815, "step": 242300 }, { "epoch": 38.784, "grad_norm": 0.09998457133769989, "learning_rate": 0.0002941963758550342, "loss": 3.7432, "step": 242400 }, { "epoch": 38.8, "grad_norm": 0.19230994582176208, "learning_rate": 0.0002941939757590303, "loss": 3.8833, "step": 242500 }, { "epoch": 38.816, "grad_norm": 0.1394679695367813, "learning_rate": 0.0002941915756630265, "loss": 3.6942, "step": 242600 }, { "epoch": 38.832, "grad_norm": 0.11077361553907394, "learning_rate": 0.00029418917556702266, "loss": 3.7826, "step": 242700 }, { "epoch": 38.848, "grad_norm": 0.11457112431526184, "learning_rate": 0.0002941867754710188, "loss": 3.8788, "step": 242800 }, { "epoch": 38.864, "grad_norm": 0.1613580584526062, "learning_rate": 0.000294184375375015, "loss": 3.4958, "step": 242900 }, { "epoch": 38.88, "grad_norm": 0.10331695526838303, "learning_rate": 0.00029418197527901116, "loss": 3.6553, "step": 243000 }, { "epoch": 38.896, "grad_norm": 0.11617998778820038, "learning_rate": 0.0002941795751830073, "loss": 3.6254, "step": 243100 }, { "epoch": 38.912, "grad_norm": 0.19181759655475616, "learning_rate": 0.00029417717508700345, "loss": 3.7321, "step": 243200 }, { "epoch": 38.928, "grad_norm": 0.13622602820396423, "learning_rate": 0.0002941747749909996, "loss": 3.8559, "step": 243300 }, { "epoch": 38.944, "grad_norm": 0.13252651691436768, "learning_rate": 0.0002941723748949958, "loss": 3.7124, "step": 243400 }, { "epoch": 38.96, "grad_norm": 0.1304987072944641, "learning_rate": 0.00029416997479899196, "loss": 3.7115, "step": 243500 }, { "epoch": 38.976, "grad_norm": 0.16668929159641266, "learning_rate": 0.00029416757470298807, "loss": 3.8687, "step": 243600 }, { "epoch": 38.992, "grad_norm": 0.1449284702539444, "learning_rate": 0.00029416517460698424, "loss": 3.7294, "step": 243700 }, { "epoch": 39.008, "grad_norm": 0.13552846014499664, "learning_rate": 0.0002941627745109804, "loss": 4.0152, "step": 243800 }, { "epoch": 39.024, "grad_norm": 0.11217848211526871, "learning_rate": 0.0002941603744149766, "loss": 3.8025, "step": 243900 }, { "epoch": 39.04, "grad_norm": 0.1166686862707138, "learning_rate": 0.00029415799831993277, "loss": 3.4854, "step": 244000 }, { "epoch": 39.056, "grad_norm": 0.13719838857650757, "learning_rate": 0.00029415559822392894, "loss": 3.7729, "step": 244100 }, { "epoch": 39.072, "grad_norm": 0.12795104086399078, "learning_rate": 0.0002941531981279251, "loss": 3.6918, "step": 244200 }, { "epoch": 39.088, "grad_norm": 0.18888691067695618, "learning_rate": 0.0002941507980319213, "loss": 3.8504, "step": 244300 }, { "epoch": 39.104, "grad_norm": 0.16503533720970154, "learning_rate": 0.00029414839793591744, "loss": 3.8306, "step": 244400 }, { "epoch": 39.12, "grad_norm": 0.16922880709171295, "learning_rate": 0.00029414599783991356, "loss": 3.7714, "step": 244500 }, { "epoch": 39.136, "grad_norm": 0.17991846799850464, "learning_rate": 0.00029414359774390973, "loss": 3.8338, "step": 244600 }, { "epoch": 39.152, "grad_norm": 0.12885114550590515, "learning_rate": 0.0002941411976479059, "loss": 4.1608, "step": 244700 }, { "epoch": 39.168, "grad_norm": 0.12000029534101486, "learning_rate": 0.00029413879755190207, "loss": 3.8216, "step": 244800 }, { "epoch": 39.184, "grad_norm": 0.12693548202514648, "learning_rate": 0.00029413639745589823, "loss": 3.473, "step": 244900 }, { "epoch": 39.2, "grad_norm": 0.1817048341035843, "learning_rate": 0.0002941339973598944, "loss": 3.6365, "step": 245000 }, { "epoch": 39.216, "grad_norm": 0.13877522945404053, "learning_rate": 0.0002941315972638905, "loss": 3.8406, "step": 245100 }, { "epoch": 39.232, "grad_norm": 0.12691953778266907, "learning_rate": 0.0002941291971678867, "loss": 3.7665, "step": 245200 }, { "epoch": 39.248, "grad_norm": 0.11608737707138062, "learning_rate": 0.00029412679707188286, "loss": 3.9396, "step": 245300 }, { "epoch": 39.264, "grad_norm": 0.11453837156295776, "learning_rate": 0.000294124396975879, "loss": 3.6378, "step": 245400 }, { "epoch": 39.28, "grad_norm": 0.13206778466701508, "learning_rate": 0.0002941219968798752, "loss": 3.8031, "step": 245500 }, { "epoch": 39.296, "grad_norm": 0.16421152651309967, "learning_rate": 0.0002941195967838713, "loss": 3.7598, "step": 245600 }, { "epoch": 39.312, "grad_norm": 0.13598020374774933, "learning_rate": 0.0002941171966878675, "loss": 3.5715, "step": 245700 }, { "epoch": 39.328, "grad_norm": 0.1567818820476532, "learning_rate": 0.00029411479659186365, "loss": 3.7087, "step": 245800 }, { "epoch": 39.344, "grad_norm": 0.10842732340097427, "learning_rate": 0.0002941123964958598, "loss": 3.6934, "step": 245900 }, { "epoch": 39.36, "grad_norm": 0.11685346066951752, "learning_rate": 0.000294109996399856, "loss": 3.7408, "step": 246000 }, { "epoch": 39.376, "grad_norm": 0.1216963455080986, "learning_rate": 0.00029410759630385216, "loss": 3.4407, "step": 246100 }, { "epoch": 39.392, "grad_norm": 0.17730294167995453, "learning_rate": 0.00029410519620784827, "loss": 3.3906, "step": 246200 }, { "epoch": 39.408, "grad_norm": 0.12658190727233887, "learning_rate": 0.00029410279611184444, "loss": 3.6821, "step": 246300 }, { "epoch": 39.424, "grad_norm": 0.13732874393463135, "learning_rate": 0.0002941003960158406, "loss": 3.9568, "step": 246400 }, { "epoch": 39.44, "grad_norm": 0.12225256115198135, "learning_rate": 0.0002940979959198368, "loss": 3.6022, "step": 246500 }, { "epoch": 39.456, "grad_norm": 0.14544585347175598, "learning_rate": 0.00029409559582383295, "loss": 3.4184, "step": 246600 }, { "epoch": 39.472, "grad_norm": 0.18220408260822296, "learning_rate": 0.00029409319572782906, "loss": 3.8565, "step": 246700 }, { "epoch": 39.488, "grad_norm": 0.14879514276981354, "learning_rate": 0.00029409079563182523, "loss": 3.8002, "step": 246800 }, { "epoch": 39.504, "grad_norm": 0.13701634109020233, "learning_rate": 0.0002940883955358214, "loss": 3.5608, "step": 246900 }, { "epoch": 39.52, "grad_norm": 0.11361294239759445, "learning_rate": 0.00029408601944077764, "loss": 3.8864, "step": 247000 }, { "epoch": 39.536, "grad_norm": 0.11188530176877975, "learning_rate": 0.00029408361934477376, "loss": 3.8018, "step": 247100 }, { "epoch": 39.552, "grad_norm": 0.1307881772518158, "learning_rate": 0.00029408121924876993, "loss": 3.4101, "step": 247200 }, { "epoch": 39.568, "grad_norm": 0.14631599187850952, "learning_rate": 0.0002940788191527661, "loss": 3.6862, "step": 247300 }, { "epoch": 39.584, "grad_norm": 0.1048213541507721, "learning_rate": 0.00029407641905676227, "loss": 3.6868, "step": 247400 }, { "epoch": 39.6, "grad_norm": 0.16636796295642853, "learning_rate": 0.00029407401896075844, "loss": 3.6092, "step": 247500 }, { "epoch": 39.616, "grad_norm": 0.1403869092464447, "learning_rate": 0.00029407161886475455, "loss": 3.7433, "step": 247600 }, { "epoch": 39.632, "grad_norm": 0.16360409557819366, "learning_rate": 0.0002940692187687507, "loss": 3.627, "step": 247700 }, { "epoch": 39.648, "grad_norm": 0.12884710729122162, "learning_rate": 0.0002940668186727469, "loss": 3.7088, "step": 247800 }, { "epoch": 39.664, "grad_norm": 0.11536765098571777, "learning_rate": 0.00029406441857674306, "loss": 3.6567, "step": 247900 }, { "epoch": 39.68, "grad_norm": 0.13595843315124512, "learning_rate": 0.00029406201848073923, "loss": 3.7039, "step": 248000 }, { "epoch": 39.696, "grad_norm": 0.1451483964920044, "learning_rate": 0.0002940596183847354, "loss": 3.8462, "step": 248100 }, { "epoch": 39.712, "grad_norm": 0.1567588746547699, "learning_rate": 0.0002940572182887315, "loss": 3.5062, "step": 248200 }, { "epoch": 39.728, "grad_norm": 0.15415284037590027, "learning_rate": 0.0002940548181927277, "loss": 3.4701, "step": 248300 }, { "epoch": 39.744, "grad_norm": 0.15984618663787842, "learning_rate": 0.00029405241809672385, "loss": 3.8381, "step": 248400 }, { "epoch": 39.76, "grad_norm": 0.16312451660633087, "learning_rate": 0.00029405001800072, "loss": 3.7477, "step": 248500 }, { "epoch": 39.776, "grad_norm": 0.1532231867313385, "learning_rate": 0.0002940476179047162, "loss": 3.6642, "step": 248600 }, { "epoch": 39.792, "grad_norm": 0.1336035430431366, "learning_rate": 0.0002940452178087123, "loss": 3.6826, "step": 248700 }, { "epoch": 39.808, "grad_norm": 0.12330620735883713, "learning_rate": 0.0002940428177127085, "loss": 3.6787, "step": 248800 }, { "epoch": 39.824, "grad_norm": 0.20183226466178894, "learning_rate": 0.00029404041761670464, "loss": 3.6931, "step": 248900 }, { "epoch": 39.84, "grad_norm": 0.1860995590686798, "learning_rate": 0.0002940380175207008, "loss": 3.84, "step": 249000 }, { "epoch": 39.856, "grad_norm": 0.1320524513721466, "learning_rate": 0.000294035617424697, "loss": 3.8531, "step": 249100 }, { "epoch": 39.872, "grad_norm": 0.14837528765201569, "learning_rate": 0.00029403321732869315, "loss": 3.6836, "step": 249200 }, { "epoch": 39.888, "grad_norm": 0.14694556593894958, "learning_rate": 0.00029403081723268926, "loss": 3.4842, "step": 249300 }, { "epoch": 39.904, "grad_norm": 0.14449697732925415, "learning_rate": 0.00029402841713668543, "loss": 3.7136, "step": 249400 }, { "epoch": 39.92, "grad_norm": 0.13799022138118744, "learning_rate": 0.0002940260170406816, "loss": 3.5355, "step": 249500 }, { "epoch": 39.936, "grad_norm": 0.1362803429365158, "learning_rate": 0.00029402361694467777, "loss": 3.7728, "step": 249600 }, { "epoch": 39.952, "grad_norm": 0.11272797733545303, "learning_rate": 0.00029402121684867394, "loss": 3.7805, "step": 249700 }, { "epoch": 39.968, "grad_norm": 0.1051228940486908, "learning_rate": 0.00029401881675267006, "loss": 4.025, "step": 249800 }, { "epoch": 39.984, "grad_norm": 0.12237226217985153, "learning_rate": 0.0002940164166566662, "loss": 3.7847, "step": 249900 }, { "epoch": 40.0, "grad_norm": 0.14232830703258514, "learning_rate": 0.0002940140165606624, "loss": 4.0043, "step": 250000 }, { "epoch": 40.016, "grad_norm": 0.16174167394638062, "learning_rate": 0.00029401161646465856, "loss": 3.5942, "step": 250100 }, { "epoch": 40.032, "grad_norm": 0.12757262587547302, "learning_rate": 0.00029400924036961475, "loss": 3.6782, "step": 250200 }, { "epoch": 40.048, "grad_norm": 0.1745404601097107, "learning_rate": 0.0002940068402736109, "loss": 3.5218, "step": 250300 }, { "epoch": 40.064, "grad_norm": 0.207327201962471, "learning_rate": 0.0002940044401776071, "loss": 3.7237, "step": 250400 }, { "epoch": 40.08, "grad_norm": 0.21152953803539276, "learning_rate": 0.00029400204008160326, "loss": 3.5957, "step": 250500 }, { "epoch": 40.096, "grad_norm": 0.12471391260623932, "learning_rate": 0.00029399963998559943, "loss": 3.5836, "step": 250600 }, { "epoch": 40.112, "grad_norm": 0.15421348810195923, "learning_rate": 0.00029399723988959554, "loss": 3.7999, "step": 250700 }, { "epoch": 40.128, "grad_norm": 0.14191734790802002, "learning_rate": 0.0002939948397935917, "loss": 3.5853, "step": 250800 }, { "epoch": 40.144, "grad_norm": 0.1669834703207016, "learning_rate": 0.0002939924396975879, "loss": 3.8975, "step": 250900 }, { "epoch": 40.16, "grad_norm": 0.12944963574409485, "learning_rate": 0.00029399003960158405, "loss": 3.6046, "step": 251000 }, { "epoch": 40.176, "grad_norm": 0.14710021018981934, "learning_rate": 0.0002939876395055802, "loss": 3.789, "step": 251100 }, { "epoch": 40.192, "grad_norm": 0.11721208691596985, "learning_rate": 0.0002939852394095764, "loss": 3.665, "step": 251200 }, { "epoch": 40.208, "grad_norm": 0.14062952995300293, "learning_rate": 0.0002939828393135725, "loss": 3.5288, "step": 251300 }, { "epoch": 40.224, "grad_norm": 0.1441258192062378, "learning_rate": 0.0002939804392175687, "loss": 3.9891, "step": 251400 }, { "epoch": 40.24, "grad_norm": 0.14592832326889038, "learning_rate": 0.00029397803912156484, "loss": 3.5997, "step": 251500 }, { "epoch": 40.256, "grad_norm": 0.1378191113471985, "learning_rate": 0.000293975639025561, "loss": 4.0047, "step": 251600 }, { "epoch": 40.272, "grad_norm": 0.16312789916992188, "learning_rate": 0.0002939732389295572, "loss": 3.6065, "step": 251700 }, { "epoch": 40.288, "grad_norm": 0.13440461456775665, "learning_rate": 0.0002939708388335533, "loss": 3.8163, "step": 251800 }, { "epoch": 40.304, "grad_norm": 0.17410612106323242, "learning_rate": 0.00029396843873754947, "loss": 3.8559, "step": 251900 }, { "epoch": 40.32, "grad_norm": 0.12994222342967987, "learning_rate": 0.00029396603864154564, "loss": 3.6883, "step": 252000 }, { "epoch": 40.336, "grad_norm": 0.12502700090408325, "learning_rate": 0.0002939636625465018, "loss": 3.8668, "step": 252100 }, { "epoch": 40.352, "grad_norm": 0.14753608405590057, "learning_rate": 0.000293961262450498, "loss": 3.7555, "step": 252200 }, { "epoch": 40.368, "grad_norm": 0.13866469264030457, "learning_rate": 0.00029395886235449416, "loss": 3.5555, "step": 252300 }, { "epoch": 40.384, "grad_norm": 0.1241583600640297, "learning_rate": 0.0002939564622584903, "loss": 3.8468, "step": 252400 }, { "epoch": 40.4, "grad_norm": 0.10730107873678207, "learning_rate": 0.00029395406216248645, "loss": 3.9761, "step": 252500 }, { "epoch": 40.416, "grad_norm": 0.12991303205490112, "learning_rate": 0.0002939516620664826, "loss": 3.7378, "step": 252600 }, { "epoch": 40.432, "grad_norm": 0.13791170716285706, "learning_rate": 0.0002939492619704788, "loss": 3.8762, "step": 252700 }, { "epoch": 40.448, "grad_norm": 0.1265360563993454, "learning_rate": 0.00029394686187447495, "loss": 3.552, "step": 252800 }, { "epoch": 40.464, "grad_norm": 0.22990497946739197, "learning_rate": 0.0002939444617784711, "loss": 3.6475, "step": 252900 }, { "epoch": 40.48, "grad_norm": 0.13332819938659668, "learning_rate": 0.0002939420616824673, "loss": 3.483, "step": 253000 }, { "epoch": 40.496, "grad_norm": 0.12976233661174774, "learning_rate": 0.00029393966158646346, "loss": 3.6666, "step": 253100 }, { "epoch": 40.512, "grad_norm": 0.15102140605449677, "learning_rate": 0.00029393726149045963, "loss": 3.6249, "step": 253200 }, { "epoch": 40.528, "grad_norm": 0.150995135307312, "learning_rate": 0.00029393486139445575, "loss": 3.6201, "step": 253300 }, { "epoch": 40.544, "grad_norm": 0.15954133868217468, "learning_rate": 0.0002939324612984519, "loss": 3.5654, "step": 253400 }, { "epoch": 40.56, "grad_norm": 0.15610170364379883, "learning_rate": 0.0002939300612024481, "loss": 3.534, "step": 253500 }, { "epoch": 40.576, "grad_norm": 0.15301832556724548, "learning_rate": 0.00029392766110644425, "loss": 3.685, "step": 253600 }, { "epoch": 40.592, "grad_norm": 0.13154812157154083, "learning_rate": 0.0002939252610104404, "loss": 3.8896, "step": 253700 }, { "epoch": 40.608, "grad_norm": 0.10300294309854507, "learning_rate": 0.00029392286091443654, "loss": 3.5597, "step": 253800 }, { "epoch": 40.624, "grad_norm": 0.1325826197862625, "learning_rate": 0.0002939204608184327, "loss": 3.6599, "step": 253900 }, { "epoch": 40.64, "grad_norm": 0.13581326603889465, "learning_rate": 0.0002939180607224289, "loss": 3.2905, "step": 254000 }, { "epoch": 40.656, "grad_norm": 0.12036558240652084, "learning_rate": 0.00029391566062642505, "loss": 3.7272, "step": 254100 }, { "epoch": 40.672, "grad_norm": 0.10396745800971985, "learning_rate": 0.0002939132605304212, "loss": 3.6529, "step": 254200 }, { "epoch": 40.688, "grad_norm": 0.1512759029865265, "learning_rate": 0.0002939108604344174, "loss": 3.4839, "step": 254300 }, { "epoch": 40.704, "grad_norm": 0.11556356400251389, "learning_rate": 0.0002939084603384135, "loss": 3.5659, "step": 254400 }, { "epoch": 40.72, "grad_norm": 0.12830249965190887, "learning_rate": 0.00029390606024240967, "loss": 3.7582, "step": 254500 }, { "epoch": 40.736, "grad_norm": 0.11143537610769272, "learning_rate": 0.00029390366014640584, "loss": 3.504, "step": 254600 }, { "epoch": 40.752, "grad_norm": 0.13892756402492523, "learning_rate": 0.000293901260050402, "loss": 3.802, "step": 254700 }, { "epoch": 40.768, "grad_norm": 0.1631072759628296, "learning_rate": 0.0002938988599543982, "loss": 3.9221, "step": 254800 }, { "epoch": 40.784, "grad_norm": 0.14123888313770294, "learning_rate": 0.00029389645985839434, "loss": 3.4704, "step": 254900 }, { "epoch": 40.8, "grad_norm": 0.12425115704536438, "learning_rate": 0.00029389405976239046, "loss": 3.7102, "step": 255000 }, { "epoch": 40.816, "grad_norm": 0.12264734506607056, "learning_rate": 0.00029389165966638663, "loss": 3.4844, "step": 255100 }, { "epoch": 40.832, "grad_norm": 0.21314075589179993, "learning_rate": 0.0002938892595703828, "loss": 3.5184, "step": 255200 }, { "epoch": 40.848, "grad_norm": 0.1335386335849762, "learning_rate": 0.00029388685947437897, "loss": 3.7781, "step": 255300 }, { "epoch": 40.864, "grad_norm": 0.11609821766614914, "learning_rate": 0.00029388445937837514, "loss": 3.5772, "step": 255400 }, { "epoch": 40.88, "grad_norm": 0.13669651746749878, "learning_rate": 0.00029388205928237125, "loss": 3.9628, "step": 255500 }, { "epoch": 40.896, "grad_norm": 0.14478664100170135, "learning_rate": 0.0002938796591863674, "loss": 3.6225, "step": 255600 }, { "epoch": 40.912, "grad_norm": 0.11656352877616882, "learning_rate": 0.0002938772590903636, "loss": 3.7815, "step": 255700 }, { "epoch": 40.928, "grad_norm": 0.15658356249332428, "learning_rate": 0.00029387485899435976, "loss": 3.8461, "step": 255800 }, { "epoch": 40.944, "grad_norm": 0.11184842139482498, "learning_rate": 0.00029387245889835593, "loss": 3.7847, "step": 255900 }, { "epoch": 40.96, "grad_norm": 0.14163506031036377, "learning_rate": 0.0002938700588023521, "loss": 3.801, "step": 256000 }, { "epoch": 40.976, "grad_norm": 0.13326971232891083, "learning_rate": 0.0002938676587063482, "loss": 3.5765, "step": 256100 }, { "epoch": 40.992, "grad_norm": 0.1375322937965393, "learning_rate": 0.0002938652586103444, "loss": 3.7679, "step": 256200 }, { "epoch": 41.008, "grad_norm": 0.1259625256061554, "learning_rate": 0.00029386285851434055, "loss": 3.5381, "step": 256300 }, { "epoch": 41.024, "grad_norm": 0.15569034218788147, "learning_rate": 0.0002938604584183367, "loss": 3.5461, "step": 256400 }, { "epoch": 41.04, "grad_norm": 0.13957558572292328, "learning_rate": 0.0002938580583223329, "loss": 3.631, "step": 256500 }, { "epoch": 41.056, "grad_norm": 0.23950715363025665, "learning_rate": 0.000293855658226329, "loss": 3.6075, "step": 256600 }, { "epoch": 41.072, "grad_norm": 0.15292255580425262, "learning_rate": 0.0002938532581303252, "loss": 3.5532, "step": 256700 }, { "epoch": 41.088, "grad_norm": 0.13068901002407074, "learning_rate": 0.0002938508820352814, "loss": 3.3775, "step": 256800 }, { "epoch": 41.104, "grad_norm": 0.13932311534881592, "learning_rate": 0.0002938484819392776, "loss": 3.6521, "step": 256900 }, { "epoch": 41.12, "grad_norm": 0.19892440736293793, "learning_rate": 0.0002938460818432737, "loss": 3.823, "step": 257000 }, { "epoch": 41.136, "grad_norm": 0.12685498595237732, "learning_rate": 0.00029384368174726987, "loss": 3.6785, "step": 257100 }, { "epoch": 41.152, "grad_norm": 0.12287259846925735, "learning_rate": 0.00029384128165126604, "loss": 3.6484, "step": 257200 }, { "epoch": 41.168, "grad_norm": 0.16155798733234406, "learning_rate": 0.0002938388815552622, "loss": 3.6533, "step": 257300 }, { "epoch": 41.184, "grad_norm": 0.1555916965007782, "learning_rate": 0.0002938364814592584, "loss": 3.5762, "step": 257400 }, { "epoch": 41.2, "grad_norm": 0.10826678574085236, "learning_rate": 0.0002938340813632545, "loss": 3.7505, "step": 257500 }, { "epoch": 41.216, "grad_norm": 0.18390688300132751, "learning_rate": 0.00029383168126725066, "loss": 3.5967, "step": 257600 }, { "epoch": 41.232, "grad_norm": 0.1177935004234314, "learning_rate": 0.00029382928117124683, "loss": 3.569, "step": 257700 }, { "epoch": 41.248, "grad_norm": 0.14743617177009583, "learning_rate": 0.000293826881075243, "loss": 3.5278, "step": 257800 }, { "epoch": 41.264, "grad_norm": 0.19529984891414642, "learning_rate": 0.00029382448097923917, "loss": 3.6172, "step": 257900 }, { "epoch": 41.28, "grad_norm": 0.1910737156867981, "learning_rate": 0.00029382208088323534, "loss": 3.7249, "step": 258000 }, { "epoch": 41.296, "grad_norm": 0.11363770812749863, "learning_rate": 0.00029381968078723145, "loss": 3.5956, "step": 258100 }, { "epoch": 41.312, "grad_norm": 0.17443308234214783, "learning_rate": 0.0002938172806912276, "loss": 3.7305, "step": 258200 }, { "epoch": 41.328, "grad_norm": 0.1306672841310501, "learning_rate": 0.0002938148805952238, "loss": 3.6149, "step": 258300 }, { "epoch": 41.344, "grad_norm": 0.1476161628961563, "learning_rate": 0.00029381248049921996, "loss": 3.6963, "step": 258400 }, { "epoch": 41.36, "grad_norm": 0.1369323879480362, "learning_rate": 0.00029381008040321613, "loss": 3.5821, "step": 258500 }, { "epoch": 41.376, "grad_norm": 0.15780112147331238, "learning_rate": 0.00029380768030721224, "loss": 3.7009, "step": 258600 }, { "epoch": 41.392, "grad_norm": 0.11665251106023788, "learning_rate": 0.0002938052802112084, "loss": 3.4389, "step": 258700 }, { "epoch": 41.408, "grad_norm": 0.12598150968551636, "learning_rate": 0.0002938028801152046, "loss": 3.3633, "step": 258800 }, { "epoch": 41.424, "grad_norm": 0.12654541432857513, "learning_rate": 0.00029380048001920075, "loss": 3.6301, "step": 258900 }, { "epoch": 41.44, "grad_norm": 0.1514132022857666, "learning_rate": 0.0002937980799231969, "loss": 3.6985, "step": 259000 }, { "epoch": 41.456, "grad_norm": 0.15360794961452484, "learning_rate": 0.0002937956798271931, "loss": 3.3812, "step": 259100 }, { "epoch": 41.472, "grad_norm": 0.14269192516803741, "learning_rate": 0.0002937933037321493, "loss": 3.6113, "step": 259200 }, { "epoch": 41.488, "grad_norm": 0.13047970831394196, "learning_rate": 0.00029379090363614545, "loss": 3.3654, "step": 259300 }, { "epoch": 41.504, "grad_norm": 0.16103631258010864, "learning_rate": 0.0002937885035401416, "loss": 3.556, "step": 259400 }, { "epoch": 41.52, "grad_norm": 0.13219304382801056, "learning_rate": 0.00029378610344413773, "loss": 3.5701, "step": 259500 }, { "epoch": 41.536, "grad_norm": 0.13965025544166565, "learning_rate": 0.0002937837033481339, "loss": 3.3464, "step": 259600 }, { "epoch": 41.552, "grad_norm": 0.12311989814043045, "learning_rate": 0.00029378130325213007, "loss": 3.7566, "step": 259700 }, { "epoch": 41.568, "grad_norm": 0.14324423670768738, "learning_rate": 0.00029377890315612624, "loss": 3.6659, "step": 259800 }, { "epoch": 41.584, "grad_norm": 0.13608694076538086, "learning_rate": 0.0002937765030601224, "loss": 3.7461, "step": 259900 }, { "epoch": 41.6, "grad_norm": 0.15044239163398743, "learning_rate": 0.0002937741029641186, "loss": 3.5441, "step": 260000 }, { "epoch": 41.616, "grad_norm": 0.12868137657642365, "learning_rate": 0.0002937717028681147, "loss": 3.7904, "step": 260100 }, { "epoch": 41.632, "grad_norm": 0.17670278251171112, "learning_rate": 0.00029376930277211086, "loss": 3.5517, "step": 260200 }, { "epoch": 41.648, "grad_norm": 0.1420828402042389, "learning_rate": 0.00029376690267610703, "loss": 3.5436, "step": 260300 }, { "epoch": 41.664, "grad_norm": 0.15569482743740082, "learning_rate": 0.0002937645025801032, "loss": 3.6917, "step": 260400 }, { "epoch": 41.68, "grad_norm": 0.15980389714241028, "learning_rate": 0.00029376210248409937, "loss": 3.4665, "step": 260500 }, { "epoch": 41.696, "grad_norm": 0.1492902636528015, "learning_rate": 0.0002937597023880955, "loss": 3.6891, "step": 260600 }, { "epoch": 41.712, "grad_norm": 0.18961113691329956, "learning_rate": 0.00029375730229209165, "loss": 3.7746, "step": 260700 }, { "epoch": 41.728, "grad_norm": 0.12037073820829391, "learning_rate": 0.0002937549021960878, "loss": 3.786, "step": 260800 }, { "epoch": 41.744, "grad_norm": 0.17683453857898712, "learning_rate": 0.000293752502100084, "loss": 3.4662, "step": 260900 }, { "epoch": 41.76, "grad_norm": 0.18315567076206207, "learning_rate": 0.00029375010200408016, "loss": 3.8429, "step": 261000 }, { "epoch": 41.776, "grad_norm": 0.1487838327884674, "learning_rate": 0.00029374770190807633, "loss": 3.5626, "step": 261100 }, { "epoch": 41.792, "grad_norm": 0.16062875092029572, "learning_rate": 0.00029374530181207245, "loss": 3.6905, "step": 261200 }, { "epoch": 41.808, "grad_norm": 0.20533856749534607, "learning_rate": 0.0002937429497179887, "loss": 3.5168, "step": 261300 }, { "epoch": 41.824, "grad_norm": 0.16050328314304352, "learning_rate": 0.0002937405496219849, "loss": 3.7247, "step": 261400 }, { "epoch": 41.84, "grad_norm": 0.1527540385723114, "learning_rate": 0.000293738149525981, "loss": 3.4572, "step": 261500 }, { "epoch": 41.856, "grad_norm": 0.14462052285671234, "learning_rate": 0.00029373574942997716, "loss": 3.7395, "step": 261600 }, { "epoch": 41.872, "grad_norm": 0.20031331479549408, "learning_rate": 0.00029373334933397333, "loss": 3.5114, "step": 261700 }, { "epoch": 41.888, "grad_norm": 0.16863399744033813, "learning_rate": 0.0002937309492379695, "loss": 3.3848, "step": 261800 }, { "epoch": 41.904, "grad_norm": 0.15074513852596283, "learning_rate": 0.00029372854914196567, "loss": 3.6762, "step": 261900 }, { "epoch": 41.92, "grad_norm": 0.1178865060210228, "learning_rate": 0.00029372614904596184, "loss": 3.5694, "step": 262000 }, { "epoch": 41.936, "grad_norm": 0.11632855981588364, "learning_rate": 0.00029372374894995795, "loss": 3.6343, "step": 262100 }, { "epoch": 41.952, "grad_norm": 0.18044792115688324, "learning_rate": 0.0002937213488539541, "loss": 3.648, "step": 262200 }, { "epoch": 41.968, "grad_norm": 0.10297371447086334, "learning_rate": 0.0002937189487579503, "loss": 3.6071, "step": 262300 }, { "epoch": 41.984, "grad_norm": 0.14058756828308105, "learning_rate": 0.00029371654866194646, "loss": 3.6271, "step": 262400 }, { "epoch": 42.0, "grad_norm": 0.09474769234657288, "learning_rate": 0.00029371414856594263, "loss": 3.6684, "step": 262500 }, { "epoch": 42.016, "grad_norm": 0.16156066954135895, "learning_rate": 0.00029371174846993874, "loss": 3.1841, "step": 262600 }, { "epoch": 42.032, "grad_norm": 0.12073547393083572, "learning_rate": 0.0002937093483739349, "loss": 3.581, "step": 262700 }, { "epoch": 42.048, "grad_norm": 0.16735853254795074, "learning_rate": 0.0002937069482779311, "loss": 3.6946, "step": 262800 }, { "epoch": 42.064, "grad_norm": 0.13211970031261444, "learning_rate": 0.00029370454818192725, "loss": 3.7033, "step": 262900 }, { "epoch": 42.08, "grad_norm": 0.1313381791114807, "learning_rate": 0.0002937021480859234, "loss": 3.6705, "step": 263000 }, { "epoch": 42.096, "grad_norm": 0.18267682194709778, "learning_rate": 0.0002936997479899196, "loss": 3.6602, "step": 263100 }, { "epoch": 42.112, "grad_norm": 0.12447042018175125, "learning_rate": 0.0002936973478939157, "loss": 3.23, "step": 263200 }, { "epoch": 42.128, "grad_norm": 0.12706032395362854, "learning_rate": 0.0002936949477979119, "loss": 3.667, "step": 263300 }, { "epoch": 42.144, "grad_norm": 0.1309398114681244, "learning_rate": 0.00029369254770190804, "loss": 3.4056, "step": 263400 }, { "epoch": 42.16, "grad_norm": 0.16127915680408478, "learning_rate": 0.0002936901476059042, "loss": 3.4085, "step": 263500 }, { "epoch": 42.176, "grad_norm": 0.15786415338516235, "learning_rate": 0.0002936877475099004, "loss": 3.5949, "step": 263600 }, { "epoch": 42.192, "grad_norm": 0.18576839566230774, "learning_rate": 0.0002936853474138965, "loss": 3.5085, "step": 263700 }, { "epoch": 42.208, "grad_norm": 0.17807281017303467, "learning_rate": 0.00029368294731789267, "loss": 3.2276, "step": 263800 }, { "epoch": 42.224, "grad_norm": 0.11483947187662125, "learning_rate": 0.00029368054722188884, "loss": 3.4073, "step": 263900 }, { "epoch": 42.24, "grad_norm": 0.1563270092010498, "learning_rate": 0.000293678147125885, "loss": 3.532, "step": 264000 }, { "epoch": 42.256, "grad_norm": 0.14988571405410767, "learning_rate": 0.0002936757470298812, "loss": 3.3485, "step": 264100 }, { "epoch": 42.272, "grad_norm": 0.1600615233182907, "learning_rate": 0.00029367334693387734, "loss": 3.5401, "step": 264200 }, { "epoch": 42.288, "grad_norm": 0.10367301106452942, "learning_rate": 0.00029367097083883353, "loss": 3.8633, "step": 264300 }, { "epoch": 42.304, "grad_norm": 0.20797039568424225, "learning_rate": 0.0002936685707428297, "loss": 3.5468, "step": 264400 }, { "epoch": 42.32, "grad_norm": 0.16946087777614594, "learning_rate": 0.00029366617064682587, "loss": 3.2335, "step": 264500 }, { "epoch": 42.336, "grad_norm": 0.191020667552948, "learning_rate": 0.000293663770550822, "loss": 3.7333, "step": 264600 }, { "epoch": 42.352, "grad_norm": 0.12999840080738068, "learning_rate": 0.00029366137045481815, "loss": 3.5601, "step": 264700 }, { "epoch": 42.368, "grad_norm": 0.10429554432630539, "learning_rate": 0.0002936589703588143, "loss": 3.5495, "step": 264800 }, { "epoch": 42.384, "grad_norm": 0.18428346514701843, "learning_rate": 0.0002936565702628105, "loss": 3.4436, "step": 264900 }, { "epoch": 42.4, "grad_norm": 0.10777553170919418, "learning_rate": 0.00029365417016680666, "loss": 3.2807, "step": 265000 }, { "epoch": 42.416, "grad_norm": 0.1506805717945099, "learning_rate": 0.00029365177007080283, "loss": 3.6521, "step": 265100 }, { "epoch": 42.432, "grad_norm": 0.16227808594703674, "learning_rate": 0.00029364936997479895, "loss": 3.7157, "step": 265200 }, { "epoch": 42.448, "grad_norm": 0.16502080857753754, "learning_rate": 0.0002936469698787951, "loss": 3.8207, "step": 265300 }, { "epoch": 42.464, "grad_norm": 0.14601773023605347, "learning_rate": 0.0002936445697827913, "loss": 3.5874, "step": 265400 }, { "epoch": 42.48, "grad_norm": 0.12585802376270294, "learning_rate": 0.00029364216968678745, "loss": 3.3724, "step": 265500 }, { "epoch": 42.496, "grad_norm": 0.16403427720069885, "learning_rate": 0.0002936397695907836, "loss": 3.7427, "step": 265600 }, { "epoch": 42.512, "grad_norm": 0.2234642207622528, "learning_rate": 0.00029363736949477974, "loss": 3.5755, "step": 265700 }, { "epoch": 42.528, "grad_norm": 0.1259508579969406, "learning_rate": 0.0002936349693987759, "loss": 3.3681, "step": 265800 }, { "epoch": 42.544, "grad_norm": 0.16016298532485962, "learning_rate": 0.0002936325693027721, "loss": 3.4035, "step": 265900 }, { "epoch": 42.56, "grad_norm": 0.16391395032405853, "learning_rate": 0.00029363016920676825, "loss": 3.445, "step": 266000 }, { "epoch": 42.576, "grad_norm": 0.17547176778316498, "learning_rate": 0.0002936277691107644, "loss": 3.7729, "step": 266100 }, { "epoch": 42.592, "grad_norm": 0.13189229369163513, "learning_rate": 0.0002936253690147606, "loss": 3.4294, "step": 266200 }, { "epoch": 42.608, "grad_norm": 0.10672536492347717, "learning_rate": 0.0002936229689187567, "loss": 3.766, "step": 266300 }, { "epoch": 42.624, "grad_norm": 0.15849149227142334, "learning_rate": 0.00029362056882275287, "loss": 3.5439, "step": 266400 }, { "epoch": 42.64, "grad_norm": 0.1756531149148941, "learning_rate": 0.00029361816872674904, "loss": 3.4491, "step": 266500 }, { "epoch": 42.656, "grad_norm": 0.11969030648469925, "learning_rate": 0.0002936157686307452, "loss": 3.7564, "step": 266600 }, { "epoch": 42.672, "grad_norm": 0.12340911477804184, "learning_rate": 0.0002936133685347414, "loss": 3.2914, "step": 266700 }, { "epoch": 42.688, "grad_norm": 0.12453418970108032, "learning_rate": 0.0002936109684387375, "loss": 3.4867, "step": 266800 }, { "epoch": 42.704, "grad_norm": 0.17998869717121124, "learning_rate": 0.00029360856834273366, "loss": 3.8194, "step": 266900 }, { "epoch": 42.72, "grad_norm": 0.1365824192762375, "learning_rate": 0.00029360616824672983, "loss": 3.484, "step": 267000 }, { "epoch": 42.736, "grad_norm": 0.14475534856319427, "learning_rate": 0.000293603768150726, "loss": 3.9079, "step": 267100 }, { "epoch": 42.752, "grad_norm": 0.14762897789478302, "learning_rate": 0.00029360136805472217, "loss": 3.5505, "step": 267200 }, { "epoch": 42.768, "grad_norm": 0.16979500651359558, "learning_rate": 0.00029359896795871834, "loss": 3.5919, "step": 267300 }, { "epoch": 42.784, "grad_norm": 0.16020934283733368, "learning_rate": 0.00029359656786271445, "loss": 3.6073, "step": 267400 }, { "epoch": 42.8, "grad_norm": 0.14011001586914062, "learning_rate": 0.0002935941677667106, "loss": 3.2919, "step": 267500 }, { "epoch": 42.816, "grad_norm": 0.16544441878795624, "learning_rate": 0.0002935917676707068, "loss": 3.496, "step": 267600 }, { "epoch": 42.832, "grad_norm": 0.17239537835121155, "learning_rate": 0.00029358936757470296, "loss": 3.4509, "step": 267700 }, { "epoch": 42.848, "grad_norm": 0.10768439620733261, "learning_rate": 0.00029358696747869913, "loss": 3.6296, "step": 267800 }, { "epoch": 42.864, "grad_norm": 0.15744425356388092, "learning_rate": 0.00029358456738269524, "loss": 3.4296, "step": 267900 }, { "epoch": 42.88, "grad_norm": 0.16955333948135376, "learning_rate": 0.0002935821672866914, "loss": 3.5791, "step": 268000 }, { "epoch": 42.896, "grad_norm": 0.1334005445241928, "learning_rate": 0.0002935797671906876, "loss": 3.7341, "step": 268100 }, { "epoch": 42.912, "grad_norm": 0.13382244110107422, "learning_rate": 0.00029357736709468375, "loss": 3.5309, "step": 268200 }, { "epoch": 42.928, "grad_norm": 0.15216900408267975, "learning_rate": 0.0002935749669986799, "loss": 4.0343, "step": 268300 }, { "epoch": 42.944, "grad_norm": 0.13688887655735016, "learning_rate": 0.0002935725669026761, "loss": 3.6904, "step": 268400 }, { "epoch": 42.96, "grad_norm": 0.14086109399795532, "learning_rate": 0.0002935701668066722, "loss": 3.5934, "step": 268500 }, { "epoch": 42.976, "grad_norm": 0.1646319478750229, "learning_rate": 0.00029356776671066843, "loss": 3.5711, "step": 268600 }, { "epoch": 42.992, "grad_norm": 0.12537230551242828, "learning_rate": 0.0002935653666146646, "loss": 3.3006, "step": 268700 }, { "epoch": 43.008, "grad_norm": 0.180904820561409, "learning_rate": 0.0002935629665186607, "loss": 3.5119, "step": 268800 }, { "epoch": 43.024, "grad_norm": 0.12639842927455902, "learning_rate": 0.0002935605664226569, "loss": 3.2688, "step": 268900 }, { "epoch": 43.04, "grad_norm": 0.12120150029659271, "learning_rate": 0.00029355816632665305, "loss": 3.5053, "step": 269000 }, { "epoch": 43.056, "grad_norm": 0.10983855277299881, "learning_rate": 0.0002935557662306492, "loss": 3.5363, "step": 269100 }, { "epoch": 43.072, "grad_norm": 0.16801497340202332, "learning_rate": 0.0002935533661346454, "loss": 3.4351, "step": 269200 }, { "epoch": 43.088, "grad_norm": 0.12963399291038513, "learning_rate": 0.00029355096603864156, "loss": 3.7016, "step": 269300 }, { "epoch": 43.104, "grad_norm": 0.15802137553691864, "learning_rate": 0.0002935485899435977, "loss": 3.4505, "step": 269400 }, { "epoch": 43.12, "grad_norm": 0.13458667695522308, "learning_rate": 0.00029354618984759386, "loss": 3.5449, "step": 269500 }, { "epoch": 43.136, "grad_norm": 0.15915992856025696, "learning_rate": 0.00029354378975159003, "loss": 3.6303, "step": 269600 }, { "epoch": 43.152, "grad_norm": 0.13288740813732147, "learning_rate": 0.0002935413896555862, "loss": 3.7843, "step": 269700 }, { "epoch": 43.168, "grad_norm": 0.12764649093151093, "learning_rate": 0.00029353898955958237, "loss": 3.4263, "step": 269800 }, { "epoch": 43.184, "grad_norm": 0.13177116215229034, "learning_rate": 0.0002935365894635785, "loss": 3.6666, "step": 269900 }, { "epoch": 43.2, "grad_norm": 0.18173980712890625, "learning_rate": 0.00029353418936757465, "loss": 3.4425, "step": 270000 }, { "epoch": 43.216, "grad_norm": 0.13036012649536133, "learning_rate": 0.0002935317892715708, "loss": 3.6618, "step": 270100 }, { "epoch": 43.232, "grad_norm": 0.13550440967082977, "learning_rate": 0.000293529389175567, "loss": 3.531, "step": 270200 }, { "epoch": 43.248, "grad_norm": 0.14626292884349823, "learning_rate": 0.00029352698907956316, "loss": 3.5801, "step": 270300 }, { "epoch": 43.264, "grad_norm": 0.1537875384092331, "learning_rate": 0.00029352458898355933, "loss": 3.8838, "step": 270400 }, { "epoch": 43.28, "grad_norm": 0.13375823199748993, "learning_rate": 0.00029352218888755544, "loss": 3.5786, "step": 270500 }, { "epoch": 43.296, "grad_norm": 0.1354847550392151, "learning_rate": 0.0002935197887915516, "loss": 3.6394, "step": 270600 }, { "epoch": 43.312, "grad_norm": 0.1401492804288864, "learning_rate": 0.0002935173886955478, "loss": 3.4432, "step": 270700 }, { "epoch": 43.328, "grad_norm": 0.17879655957221985, "learning_rate": 0.00029351498859954395, "loss": 3.5412, "step": 270800 }, { "epoch": 43.344, "grad_norm": 0.18204811215400696, "learning_rate": 0.0002935125885035401, "loss": 3.4188, "step": 270900 }, { "epoch": 43.36, "grad_norm": 0.13775452971458435, "learning_rate": 0.00029351018840753624, "loss": 3.6903, "step": 271000 }, { "epoch": 43.376, "grad_norm": 0.14243382215499878, "learning_rate": 0.0002935077883115324, "loss": 3.4005, "step": 271100 }, { "epoch": 43.392, "grad_norm": 0.14261014759540558, "learning_rate": 0.0002935053882155286, "loss": 3.5656, "step": 271200 }, { "epoch": 43.408, "grad_norm": 0.16877153515815735, "learning_rate": 0.00029350298811952474, "loss": 3.3224, "step": 271300 }, { "epoch": 43.424, "grad_norm": NaN, "learning_rate": 0.0002935005880235209, "loss": 3.4325, "step": 271400 }, { "epoch": 43.44, "grad_norm": 0.17013391852378845, "learning_rate": 0.0002934982119284771, "loss": 3.5221, "step": 271500 }, { "epoch": 43.456, "grad_norm": 0.1461247056722641, "learning_rate": 0.00029349581183247327, "loss": 3.5917, "step": 271600 }, { "epoch": 43.472, "grad_norm": 0.09930270165205002, "learning_rate": 0.00029349341173646944, "loss": 3.8381, "step": 271700 }, { "epoch": 43.488, "grad_norm": 0.16227060556411743, "learning_rate": 0.0002934910116404656, "loss": 3.4227, "step": 271800 }, { "epoch": 43.504, "grad_norm": 0.15075701475143433, "learning_rate": 0.0002934886115444617, "loss": 3.6302, "step": 271900 }, { "epoch": 43.52, "grad_norm": 0.13108420372009277, "learning_rate": 0.0002934862114484579, "loss": 3.8756, "step": 272000 }, { "epoch": 43.536, "grad_norm": 0.1393056958913803, "learning_rate": 0.00029348381135245406, "loss": 3.4647, "step": 272100 }, { "epoch": 43.552, "grad_norm": 0.13311314582824707, "learning_rate": 0.00029348141125645023, "loss": 3.5177, "step": 272200 }, { "epoch": 43.568, "grad_norm": 0.09804750978946686, "learning_rate": 0.0002934790111604464, "loss": 3.6251, "step": 272300 }, { "epoch": 43.584, "grad_norm": 0.12439204007387161, "learning_rate": 0.00029347661106444257, "loss": 3.7025, "step": 272400 }, { "epoch": 43.6, "grad_norm": 0.16347762942314148, "learning_rate": 0.0002934742109684387, "loss": 3.3641, "step": 272500 }, { "epoch": 43.616, "grad_norm": 0.1650267094373703, "learning_rate": 0.00029347181087243485, "loss": 3.5107, "step": 272600 }, { "epoch": 43.632, "grad_norm": 0.14733003079891205, "learning_rate": 0.000293469410776431, "loss": 3.5082, "step": 272700 }, { "epoch": 43.648, "grad_norm": 0.1645534634590149, "learning_rate": 0.0002934670106804272, "loss": 3.528, "step": 272800 }, { "epoch": 43.664, "grad_norm": 0.15281584858894348, "learning_rate": 0.00029346461058442336, "loss": 3.4978, "step": 272900 }, { "epoch": 43.68, "grad_norm": 0.13903990387916565, "learning_rate": 0.0002934622104884195, "loss": 3.4299, "step": 273000 }, { "epoch": 43.696, "grad_norm": 0.14665891230106354, "learning_rate": 0.00029345981039241565, "loss": 3.5965, "step": 273100 }, { "epoch": 43.712, "grad_norm": 0.16011710464954376, "learning_rate": 0.0002934574102964118, "loss": 3.8562, "step": 273200 }, { "epoch": 43.728, "grad_norm": 0.13237501680850983, "learning_rate": 0.000293455010200408, "loss": 3.4666, "step": 273300 }, { "epoch": 43.744, "grad_norm": 0.17945890128612518, "learning_rate": 0.00029345261010440415, "loss": 3.4137, "step": 273400 }, { "epoch": 43.76, "grad_norm": 0.12889237701892853, "learning_rate": 0.00029345023400936034, "loss": 3.5771, "step": 273500 }, { "epoch": 43.776, "grad_norm": 0.12061195820569992, "learning_rate": 0.0002934478339133565, "loss": 3.6067, "step": 273600 }, { "epoch": 43.792, "grad_norm": 0.18037542700767517, "learning_rate": 0.0002934454338173527, "loss": 3.3744, "step": 273700 }, { "epoch": 43.808, "grad_norm": 0.1448611617088318, "learning_rate": 0.00029344303372134885, "loss": 3.598, "step": 273800 }, { "epoch": 43.824, "grad_norm": 0.13129834830760956, "learning_rate": 0.00029344063362534496, "loss": 3.3471, "step": 273900 }, { "epoch": 43.84, "grad_norm": 0.13635963201522827, "learning_rate": 0.00029343823352934113, "loss": 3.6441, "step": 274000 }, { "epoch": 43.856, "grad_norm": 0.12417308986186981, "learning_rate": 0.0002934358334333373, "loss": 3.6395, "step": 274100 }, { "epoch": 43.872, "grad_norm": 0.12382373213768005, "learning_rate": 0.00029343343333733347, "loss": 3.4041, "step": 274200 }, { "epoch": 43.888, "grad_norm": 0.13451391458511353, "learning_rate": 0.00029343103324132964, "loss": 3.6931, "step": 274300 }, { "epoch": 43.904, "grad_norm": 0.15478438138961792, "learning_rate": 0.0002934286331453258, "loss": 3.911, "step": 274400 }, { "epoch": 43.92, "grad_norm": 0.13183178007602692, "learning_rate": 0.0002934262330493219, "loss": 3.8277, "step": 274500 }, { "epoch": 43.936, "grad_norm": 0.15390954911708832, "learning_rate": 0.0002934238329533181, "loss": 3.5169, "step": 274600 }, { "epoch": 43.952, "grad_norm": 0.20243871212005615, "learning_rate": 0.00029342143285731426, "loss": 3.6921, "step": 274700 }, { "epoch": 43.968, "grad_norm": 0.1516224443912506, "learning_rate": 0.00029341903276131043, "loss": 3.6493, "step": 274800 }, { "epoch": 43.984, "grad_norm": 0.14728285372257233, "learning_rate": 0.0002934166326653066, "loss": 3.5567, "step": 274900 }, { "epoch": 44.0, "grad_norm": 0.1121896430850029, "learning_rate": 0.0002934142325693027, "loss": 3.4837, "step": 275000 }, { "epoch": 44.016, "grad_norm": 0.10630350559949875, "learning_rate": 0.0002934118324732989, "loss": 3.4566, "step": 275100 }, { "epoch": 44.032, "grad_norm": 0.12264316529035568, "learning_rate": 0.00029340943237729506, "loss": 3.2843, "step": 275200 }, { "epoch": 44.048, "grad_norm": 0.1184447780251503, "learning_rate": 0.0002934070322812912, "loss": 3.5181, "step": 275300 }, { "epoch": 44.064, "grad_norm": 0.17456239461898804, "learning_rate": 0.0002934046321852874, "loss": 3.3908, "step": 275400 }, { "epoch": 44.08, "grad_norm": 0.13953928649425507, "learning_rate": 0.00029340223208928356, "loss": 3.3264, "step": 275500 }, { "epoch": 44.096, "grad_norm": 0.1314176619052887, "learning_rate": 0.0002933998319932797, "loss": 3.5627, "step": 275600 }, { "epoch": 44.112, "grad_norm": 0.12712983787059784, "learning_rate": 0.00029339743189727585, "loss": 3.7446, "step": 275700 }, { "epoch": 44.128, "grad_norm": 0.1755862981081009, "learning_rate": 0.000293395031801272, "loss": 3.4231, "step": 275800 }, { "epoch": 44.144, "grad_norm": 0.12341305613517761, "learning_rate": 0.0002933926317052682, "loss": 3.6464, "step": 275900 }, { "epoch": 44.16, "grad_norm": 0.2150745987892151, "learning_rate": 0.00029339023160926435, "loss": 3.4009, "step": 276000 }, { "epoch": 44.176, "grad_norm": 0.11823127418756485, "learning_rate": 0.00029338783151326047, "loss": 3.5804, "step": 276100 }, { "epoch": 44.192, "grad_norm": 0.14805470407009125, "learning_rate": 0.00029338543141725664, "loss": 3.2387, "step": 276200 }, { "epoch": 44.208, "grad_norm": 0.14680412411689758, "learning_rate": 0.0002933830313212528, "loss": 3.4667, "step": 276300 }, { "epoch": 44.224, "grad_norm": 0.15967050194740295, "learning_rate": 0.000293380631225249, "loss": 3.5317, "step": 276400 }, { "epoch": 44.24, "grad_norm": 0.16053569316864014, "learning_rate": 0.00029337823112924515, "loss": 3.4642, "step": 276500 }, { "epoch": 44.256, "grad_norm": 0.1451679766178131, "learning_rate": 0.0002933758310332413, "loss": 3.2417, "step": 276600 }, { "epoch": 44.272, "grad_norm": 0.1452629566192627, "learning_rate": 0.0002933734549381975, "loss": 3.2945, "step": 276700 }, { "epoch": 44.288, "grad_norm": 0.16265207529067993, "learning_rate": 0.0002933710548421937, "loss": 3.7074, "step": 276800 }, { "epoch": 44.304, "grad_norm": 0.13818389177322388, "learning_rate": 0.00029336865474618984, "loss": 3.5, "step": 276900 }, { "epoch": 44.32, "grad_norm": 0.13445216417312622, "learning_rate": 0.00029336625465018596, "loss": 3.5042, "step": 277000 }, { "epoch": 44.336, "grad_norm": 0.1801571100950241, "learning_rate": 0.0002933638545541821, "loss": 3.4745, "step": 277100 }, { "epoch": 44.352, "grad_norm": 0.1715065836906433, "learning_rate": 0.0002933614544581783, "loss": 3.3776, "step": 277200 }, { "epoch": 44.368, "grad_norm": 0.15216653048992157, "learning_rate": 0.00029335905436217447, "loss": 3.5964, "step": 277300 }, { "epoch": 44.384, "grad_norm": 0.16377267241477966, "learning_rate": 0.00029335665426617063, "loss": 3.6009, "step": 277400 }, { "epoch": 44.4, "grad_norm": 0.15152789652347565, "learning_rate": 0.0002933542541701668, "loss": 3.4732, "step": 277500 }, { "epoch": 44.416, "grad_norm": 0.14938165247440338, "learning_rate": 0.0002933518540741629, "loss": 3.528, "step": 277600 }, { "epoch": 44.432, "grad_norm": 0.13746148347854614, "learning_rate": 0.0002933494539781591, "loss": 3.6061, "step": 277700 }, { "epoch": 44.448, "grad_norm": 0.13411153852939606, "learning_rate": 0.00029334705388215526, "loss": 3.4856, "step": 277800 }, { "epoch": 44.464, "grad_norm": 0.18585266172885895, "learning_rate": 0.0002933446537861514, "loss": 3.5931, "step": 277900 }, { "epoch": 44.48, "grad_norm": 0.14464905858039856, "learning_rate": 0.0002933422536901476, "loss": 3.6448, "step": 278000 }, { "epoch": 44.496, "grad_norm": 0.14120474457740784, "learning_rate": 0.0002933398535941437, "loss": 3.33, "step": 278100 }, { "epoch": 44.512, "grad_norm": 0.16268903017044067, "learning_rate": 0.0002933374534981399, "loss": 3.4522, "step": 278200 }, { "epoch": 44.528, "grad_norm": 0.17417596280574799, "learning_rate": 0.00029333505340213605, "loss": 3.2645, "step": 278300 }, { "epoch": 44.544, "grad_norm": 0.1481594443321228, "learning_rate": 0.0002933326533061322, "loss": 3.6196, "step": 278400 }, { "epoch": 44.56, "grad_norm": 0.20624825358390808, "learning_rate": 0.0002933302532101284, "loss": 3.2205, "step": 278500 }, { "epoch": 44.576, "grad_norm": 0.17555658519268036, "learning_rate": 0.00029332785311412456, "loss": 3.5862, "step": 278600 }, { "epoch": 44.592, "grad_norm": 0.16219860315322876, "learning_rate": 0.00029332545301812067, "loss": 3.738, "step": 278700 }, { "epoch": 44.608, "grad_norm": 0.15791872143745422, "learning_rate": 0.00029332305292211684, "loss": 3.4329, "step": 278800 }, { "epoch": 44.624, "grad_norm": 0.22273766994476318, "learning_rate": 0.0002933206768270731, "loss": 3.6421, "step": 278900 }, { "epoch": 44.64, "grad_norm": 0.1661103367805481, "learning_rate": 0.0002933182767310692, "loss": 3.4777, "step": 279000 }, { "epoch": 44.656, "grad_norm": 0.18122217059135437, "learning_rate": 0.00029331587663506537, "loss": 3.5126, "step": 279100 }, { "epoch": 44.672, "grad_norm": 0.12279829382896423, "learning_rate": 0.00029331347653906154, "loss": 3.4174, "step": 279200 }, { "epoch": 44.688, "grad_norm": 0.12625300884246826, "learning_rate": 0.0002933110764430577, "loss": 3.318, "step": 279300 }, { "epoch": 44.704, "grad_norm": 0.18073433637619019, "learning_rate": 0.0002933086763470539, "loss": 3.317, "step": 279400 }, { "epoch": 44.72, "grad_norm": 0.15470409393310547, "learning_rate": 0.00029330627625105004, "loss": 3.4654, "step": 279500 }, { "epoch": 44.736, "grad_norm": 0.18051142990589142, "learning_rate": 0.00029330387615504616, "loss": 3.5104, "step": 279600 }, { "epoch": 44.752, "grad_norm": 0.18153658509254456, "learning_rate": 0.00029330147605904233, "loss": 3.4169, "step": 279700 }, { "epoch": 44.768, "grad_norm": 0.16639018058776855, "learning_rate": 0.0002932990759630385, "loss": 3.5777, "step": 279800 }, { "epoch": 44.784, "grad_norm": 0.13046576082706451, "learning_rate": 0.0002932966998679947, "loss": 3.5254, "step": 279900 }, { "epoch": 44.8, "grad_norm": 0.10483568906784058, "learning_rate": 0.00029329429977199085, "loss": 3.4055, "step": 280000 }, { "epoch": 44.816, "grad_norm": 0.1785745769739151, "learning_rate": 0.000293291899675987, "loss": 3.3598, "step": 280100 }, { "epoch": 44.832, "grad_norm": 0.15332992374897003, "learning_rate": 0.0002932894995799832, "loss": 3.4509, "step": 280200 }, { "epoch": 44.848, "grad_norm": 0.12138580530881882, "learning_rate": 0.00029328709948397936, "loss": 3.3811, "step": 280300 }, { "epoch": 44.864, "grad_norm": 0.1440860629081726, "learning_rate": 0.00029328469938797553, "loss": 3.4278, "step": 280400 }, { "epoch": 44.88, "grad_norm": 0.2238217145204544, "learning_rate": 0.00029328229929197165, "loss": 3.8012, "step": 280500 }, { "epoch": 44.896, "grad_norm": 0.09558553993701935, "learning_rate": 0.0002932798991959678, "loss": 3.3316, "step": 280600 }, { "epoch": 44.912, "grad_norm": 0.2027113437652588, "learning_rate": 0.000293277499099964, "loss": 3.6478, "step": 280700 }, { "epoch": 44.928, "grad_norm": 0.13465124368667603, "learning_rate": 0.00029327509900396015, "loss": 3.6373, "step": 280800 }, { "epoch": 44.944, "grad_norm": 0.1399954855442047, "learning_rate": 0.0002932726989079563, "loss": 3.6136, "step": 280900 }, { "epoch": 44.96, "grad_norm": 0.14491981267929077, "learning_rate": 0.00029327029881195244, "loss": 3.4986, "step": 281000 }, { "epoch": 44.976, "grad_norm": 0.13748334348201752, "learning_rate": 0.0002932678987159486, "loss": 3.3829, "step": 281100 }, { "epoch": 44.992, "grad_norm": 0.16135229170322418, "learning_rate": 0.0002932654986199448, "loss": 3.569, "step": 281200 }, { "epoch": 45.008, "grad_norm": 0.2003306895494461, "learning_rate": 0.00029326309852394095, "loss": 3.2235, "step": 281300 }, { "epoch": 45.024, "grad_norm": 0.15842370688915253, "learning_rate": 0.0002932606984279371, "loss": 3.3112, "step": 281400 }, { "epoch": 45.04, "grad_norm": 0.11556213349103928, "learning_rate": 0.0002932582983319333, "loss": 3.2202, "step": 281500 }, { "epoch": 45.056, "grad_norm": 0.12779296934604645, "learning_rate": 0.0002932558982359294, "loss": 3.353, "step": 281600 }, { "epoch": 45.072, "grad_norm": 0.10898097604513168, "learning_rate": 0.00029325349813992557, "loss": 3.2572, "step": 281700 }, { "epoch": 45.088, "grad_norm": 0.2053193598985672, "learning_rate": 0.00029325109804392174, "loss": 3.1951, "step": 281800 }, { "epoch": 45.104, "grad_norm": 0.141945481300354, "learning_rate": 0.0002932486979479179, "loss": 3.3674, "step": 281900 }, { "epoch": 45.12, "grad_norm": 0.13190023601055145, "learning_rate": 0.0002932462978519141, "loss": 3.4983, "step": 282000 }, { "epoch": 45.136, "grad_norm": 0.12898696959018707, "learning_rate": 0.0002932438977559102, "loss": 3.3925, "step": 282100 }, { "epoch": 45.152, "grad_norm": 0.12142718583345413, "learning_rate": 0.00029324149765990636, "loss": 3.5165, "step": 282200 }, { "epoch": 45.168, "grad_norm": 0.15442530810832977, "learning_rate": 0.00029323909756390253, "loss": 3.3523, "step": 282300 }, { "epoch": 45.184, "grad_norm": 0.14434312283992767, "learning_rate": 0.0002932366974678987, "loss": 3.6819, "step": 282400 }, { "epoch": 45.2, "grad_norm": 0.1444673389196396, "learning_rate": 0.00029323429737189487, "loss": 3.3462, "step": 282500 }, { "epoch": 45.216, "grad_norm": 0.12616056203842163, "learning_rate": 0.00029323189727589104, "loss": 3.2496, "step": 282600 }, { "epoch": 45.232, "grad_norm": 0.16552147269248962, "learning_rate": 0.00029322949717988715, "loss": 3.8643, "step": 282700 }, { "epoch": 45.248, "grad_norm": 0.10914212465286255, "learning_rate": 0.0002932270970838833, "loss": 3.6589, "step": 282800 }, { "epoch": 45.264, "grad_norm": 0.14966654777526855, "learning_rate": 0.0002932246969878795, "loss": 3.6154, "step": 282900 }, { "epoch": 45.28, "grad_norm": 0.1459076851606369, "learning_rate": 0.00029322229689187566, "loss": 3.5092, "step": 283000 }, { "epoch": 45.296, "grad_norm": 0.1542978584766388, "learning_rate": 0.00029321989679587183, "loss": 3.5857, "step": 283100 }, { "epoch": 45.312, "grad_norm": 0.19426435232162476, "learning_rate": 0.000293217496699868, "loss": 3.3866, "step": 283200 }, { "epoch": 45.328, "grad_norm": 0.1491609662771225, "learning_rate": 0.0002932150966038641, "loss": 3.5355, "step": 283300 }, { "epoch": 45.344, "grad_norm": 0.1340736299753189, "learning_rate": 0.00029321272050882036, "loss": 3.5229, "step": 283400 }, { "epoch": 45.36, "grad_norm": 0.12446684390306473, "learning_rate": 0.0002932103204128165, "loss": 3.6548, "step": 283500 }, { "epoch": 45.376, "grad_norm": 0.14667928218841553, "learning_rate": 0.00029320792031681264, "loss": 3.5698, "step": 283600 }, { "epoch": 45.392, "grad_norm": 0.13137926161289215, "learning_rate": 0.0002932055202208088, "loss": 3.3846, "step": 283700 }, { "epoch": 45.408, "grad_norm": 0.1317296326160431, "learning_rate": 0.000293203120124805, "loss": 3.5825, "step": 283800 }, { "epoch": 45.424, "grad_norm": 0.15226396918296814, "learning_rate": 0.00029320072002880115, "loss": 3.8337, "step": 283900 }, { "epoch": 45.44, "grad_norm": 0.18884405493736267, "learning_rate": 0.0002931983199327973, "loss": 3.7956, "step": 284000 }, { "epoch": 45.456, "grad_norm": 0.12663649022579193, "learning_rate": 0.00029319591983679343, "loss": 3.6663, "step": 284100 }, { "epoch": 45.472, "grad_norm": 0.13002488017082214, "learning_rate": 0.0002931935197407896, "loss": 3.4465, "step": 284200 }, { "epoch": 45.488, "grad_norm": 0.1442418247461319, "learning_rate": 0.00029319111964478577, "loss": 3.6103, "step": 284300 }, { "epoch": 45.504, "grad_norm": 0.22032426297664642, "learning_rate": 0.00029318871954878194, "loss": 3.4176, "step": 284400 }, { "epoch": 45.52, "grad_norm": 0.1283002644777298, "learning_rate": 0.0002931863194527781, "loss": 3.4283, "step": 284500 }, { "epoch": 45.536, "grad_norm": 0.13123351335525513, "learning_rate": 0.0002931839193567743, "loss": 3.7018, "step": 284600 }, { "epoch": 45.552, "grad_norm": 0.18064579367637634, "learning_rate": 0.0002931815192607704, "loss": 3.378, "step": 284700 }, { "epoch": 45.568, "grad_norm": 0.16902008652687073, "learning_rate": 0.00029317911916476656, "loss": 3.3518, "step": 284800 }, { "epoch": 45.584, "grad_norm": 0.10619229078292847, "learning_rate": 0.00029317671906876273, "loss": 3.6372, "step": 284900 }, { "epoch": 45.6, "grad_norm": 0.12764042615890503, "learning_rate": 0.0002931743189727589, "loss": 3.5055, "step": 285000 }, { "epoch": 45.616, "grad_norm": 0.1885993331670761, "learning_rate": 0.00029317191887675507, "loss": 3.418, "step": 285100 }, { "epoch": 45.632, "grad_norm": 0.1319042146205902, "learning_rate": 0.00029316951878075124, "loss": 3.4413, "step": 285200 }, { "epoch": 45.648, "grad_norm": 0.1256679743528366, "learning_rate": 0.00029316711868474735, "loss": 3.5535, "step": 285300 }, { "epoch": 45.664, "grad_norm": 0.15019376575946808, "learning_rate": 0.0002931647185887435, "loss": 3.4095, "step": 285400 }, { "epoch": 45.68, "grad_norm": 0.15256153047084808, "learning_rate": 0.0002931623184927397, "loss": 3.4886, "step": 285500 }, { "epoch": 45.696, "grad_norm": 0.14027139544487, "learning_rate": 0.00029315991839673586, "loss": 3.4729, "step": 285600 }, { "epoch": 45.712, "grad_norm": 0.1339462250471115, "learning_rate": 0.00029315751830073203, "loss": 3.4175, "step": 285700 }, { "epoch": 45.728, "grad_norm": 0.13590988516807556, "learning_rate": 0.00029315511820472815, "loss": 3.4436, "step": 285800 }, { "epoch": 45.744, "grad_norm": 0.12625733017921448, "learning_rate": 0.0002931527181087243, "loss": 3.2887, "step": 285900 }, { "epoch": 45.76, "grad_norm": 0.1880204677581787, "learning_rate": 0.0002931503180127205, "loss": 3.4947, "step": 286000 }, { "epoch": 45.776, "grad_norm": 0.1707364022731781, "learning_rate": 0.00029314791791671665, "loss": 3.3399, "step": 286100 }, { "epoch": 45.792, "grad_norm": 0.1893841177225113, "learning_rate": 0.0002931455178207128, "loss": 3.4161, "step": 286200 }, { "epoch": 45.808, "grad_norm": 0.2087971270084381, "learning_rate": 0.000293143117724709, "loss": 3.6137, "step": 286300 }, { "epoch": 45.824, "grad_norm": 0.12400948256254196, "learning_rate": 0.0002931407176287051, "loss": 3.2306, "step": 286400 }, { "epoch": 45.84, "grad_norm": 0.13003037869930267, "learning_rate": 0.0002931383175327013, "loss": 3.3108, "step": 286500 }, { "epoch": 45.856, "grad_norm": 0.19825997948646545, "learning_rate": 0.00029313591743669744, "loss": 3.4771, "step": 286600 }, { "epoch": 45.872, "grad_norm": 0.11891555786132812, "learning_rate": 0.00029313354134165363, "loss": 3.3498, "step": 286700 }, { "epoch": 45.888, "grad_norm": 0.16936162114143372, "learning_rate": 0.0002931311412456498, "loss": 3.5372, "step": 286800 }, { "epoch": 45.904, "grad_norm": 0.12974895536899567, "learning_rate": 0.00029312874114964597, "loss": 3.6391, "step": 286900 }, { "epoch": 45.92, "grad_norm": 0.16700510680675507, "learning_rate": 0.00029312634105364214, "loss": 3.1939, "step": 287000 }, { "epoch": 45.936, "grad_norm": 0.10666525363922119, "learning_rate": 0.0002931239409576383, "loss": 3.3924, "step": 287100 }, { "epoch": 45.952, "grad_norm": 0.14988939464092255, "learning_rate": 0.0002931215408616345, "loss": 3.4792, "step": 287200 }, { "epoch": 45.968, "grad_norm": 0.16441181302070618, "learning_rate": 0.0002931191407656306, "loss": 3.7128, "step": 287300 }, { "epoch": 45.984, "grad_norm": 0.20187672972679138, "learning_rate": 0.00029311674066962676, "loss": 3.5682, "step": 287400 }, { "epoch": 46.0, "grad_norm": 0.1383114755153656, "learning_rate": 0.00029311434057362293, "loss": 3.5134, "step": 287500 }, { "epoch": 46.016, "grad_norm": 0.1713894158601761, "learning_rate": 0.0002931119404776191, "loss": 3.2125, "step": 287600 }, { "epoch": 46.032, "grad_norm": 0.11573702841997147, "learning_rate": 0.00029310954038161527, "loss": 3.5088, "step": 287700 }, { "epoch": 46.048, "grad_norm": 0.1351267546415329, "learning_rate": 0.0002931071402856114, "loss": 3.4569, "step": 287800 }, { "epoch": 46.064, "grad_norm": 0.15071484446525574, "learning_rate": 0.00029310474018960755, "loss": 3.429, "step": 287900 }, { "epoch": 46.08, "grad_norm": 0.10531795024871826, "learning_rate": 0.0002931023400936037, "loss": 3.4192, "step": 288000 }, { "epoch": 46.096, "grad_norm": 0.13621048629283905, "learning_rate": 0.0002930999399975999, "loss": 3.5626, "step": 288100 }, { "epoch": 46.112, "grad_norm": 0.13405868411064148, "learning_rate": 0.00029309753990159606, "loss": 3.4114, "step": 288200 }, { "epoch": 46.128, "grad_norm": 0.1718856245279312, "learning_rate": 0.00029309513980559223, "loss": 3.3622, "step": 288300 }, { "epoch": 46.144, "grad_norm": 0.1297181397676468, "learning_rate": 0.00029309273970958835, "loss": 3.3292, "step": 288400 }, { "epoch": 46.16, "grad_norm": 0.12089966982603073, "learning_rate": 0.0002930903396135845, "loss": 3.3957, "step": 288500 }, { "epoch": 46.176, "grad_norm": 0.14486800134181976, "learning_rate": 0.0002930879395175807, "loss": 3.5016, "step": 288600 }, { "epoch": 46.192, "grad_norm": 0.13648533821105957, "learning_rate": 0.00029308553942157685, "loss": 3.6381, "step": 288700 }, { "epoch": 46.208, "grad_norm": 0.14221973717212677, "learning_rate": 0.000293083139325573, "loss": 3.2327, "step": 288800 }, { "epoch": 46.224, "grad_norm": 0.12373923510313034, "learning_rate": 0.00029308073922956914, "loss": 3.4037, "step": 288900 }, { "epoch": 46.24, "grad_norm": 0.14260698854923248, "learning_rate": 0.0002930783391335653, "loss": 3.2914, "step": 289000 }, { "epoch": 46.256, "grad_norm": 0.16024409234523773, "learning_rate": 0.0002930759390375615, "loss": 3.3506, "step": 289100 }, { "epoch": 46.272, "grad_norm": 0.11378096044063568, "learning_rate": 0.00029307353894155765, "loss": 3.2966, "step": 289200 }, { "epoch": 46.288, "grad_norm": 0.15745824575424194, "learning_rate": 0.0002930711388455538, "loss": 3.6754, "step": 289300 }, { "epoch": 46.304, "grad_norm": 0.148289293050766, "learning_rate": 0.00029306873874955, "loss": 3.4296, "step": 289400 }, { "epoch": 46.32, "grad_norm": 0.11942636221647263, "learning_rate": 0.0002930663386535461, "loss": 3.626, "step": 289500 }, { "epoch": 46.336, "grad_norm": 0.15883904695510864, "learning_rate": 0.00029306393855754227, "loss": 3.661, "step": 289600 }, { "epoch": 46.352, "grad_norm": 0.1465967446565628, "learning_rate": 0.00029306153846153844, "loss": 3.3373, "step": 289700 }, { "epoch": 46.368, "grad_norm": 0.1392153799533844, "learning_rate": 0.0002930591383655346, "loss": 3.4597, "step": 289800 }, { "epoch": 46.384, "grad_norm": 0.13540080189704895, "learning_rate": 0.0002930567382695308, "loss": 3.3812, "step": 289900 }, { "epoch": 46.4, "grad_norm": 0.12052489072084427, "learning_rate": 0.0002930543381735269, "loss": 3.5023, "step": 290000 }, { "epoch": 46.416, "grad_norm": 0.17089907824993134, "learning_rate": 0.00029305193807752306, "loss": 3.5662, "step": 290100 }, { "epoch": 46.432, "grad_norm": 0.14174985885620117, "learning_rate": 0.00029304953798151923, "loss": 3.5804, "step": 290200 }, { "epoch": 46.448, "grad_norm": 0.1391628384590149, "learning_rate": 0.0002930471378855154, "loss": 3.6952, "step": 290300 }, { "epoch": 46.464, "grad_norm": 0.11880426108837128, "learning_rate": 0.00029304473778951157, "loss": 3.2734, "step": 290400 }, { "epoch": 46.48, "grad_norm": 0.14283739030361176, "learning_rate": 0.00029304233769350774, "loss": 3.3219, "step": 290500 }, { "epoch": 46.496, "grad_norm": 0.11448802798986435, "learning_rate": 0.00029303993759750385, "loss": 3.6805, "step": 290600 }, { "epoch": 46.512, "grad_norm": 0.14918987452983856, "learning_rate": 0.0002930375375015, "loss": 3.2734, "step": 290700 }, { "epoch": 46.528, "grad_norm": 0.16393804550170898, "learning_rate": 0.0002930351374054962, "loss": 3.4909, "step": 290800 }, { "epoch": 46.544, "grad_norm": 0.1624254435300827, "learning_rate": 0.00029303273730949236, "loss": 3.1743, "step": 290900 }, { "epoch": 46.56, "grad_norm": 0.15605472028255463, "learning_rate": 0.00029303033721348853, "loss": 3.3516, "step": 291000 }, { "epoch": 46.576, "grad_norm": 0.14577865600585938, "learning_rate": 0.0002930279611184447, "loss": 3.8344, "step": 291100 }, { "epoch": 46.592, "grad_norm": 0.18258944153785706, "learning_rate": 0.0002930255610224409, "loss": 3.2525, "step": 291200 }, { "epoch": 46.608, "grad_norm": 0.12408775836229324, "learning_rate": 0.00029302316092643706, "loss": 3.2082, "step": 291300 }, { "epoch": 46.624, "grad_norm": 0.16609829664230347, "learning_rate": 0.0002930207608304332, "loss": 3.2061, "step": 291400 }, { "epoch": 46.64, "grad_norm": 0.16239382326602936, "learning_rate": 0.00029301838473538936, "loss": 3.7638, "step": 291500 }, { "epoch": 46.656, "grad_norm": 0.14647479355335236, "learning_rate": 0.00029301598463938553, "loss": 3.5837, "step": 291600 }, { "epoch": 46.672, "grad_norm": 0.12308536469936371, "learning_rate": 0.0002930135845433817, "loss": 3.5024, "step": 291700 }, { "epoch": 46.688, "grad_norm": 0.15651263296604156, "learning_rate": 0.00029301118444737787, "loss": 3.3663, "step": 291800 }, { "epoch": 46.704, "grad_norm": 0.13770176470279694, "learning_rate": 0.00029300878435137404, "loss": 3.2935, "step": 291900 }, { "epoch": 46.72, "grad_norm": 0.1298212707042694, "learning_rate": 0.00029300638425537015, "loss": 3.2944, "step": 292000 }, { "epoch": 46.736, "grad_norm": 0.14035199582576752, "learning_rate": 0.0002930039841593663, "loss": 3.467, "step": 292100 }, { "epoch": 46.752, "grad_norm": 0.13452483713626862, "learning_rate": 0.0002930015840633625, "loss": 3.3878, "step": 292200 }, { "epoch": 46.768, "grad_norm": 0.14606568217277527, "learning_rate": 0.00029299918396735866, "loss": 3.2631, "step": 292300 }, { "epoch": 46.784, "grad_norm": 0.1612800657749176, "learning_rate": 0.00029299678387135483, "loss": 3.3325, "step": 292400 }, { "epoch": 46.8, "grad_norm": 0.17206759750843048, "learning_rate": 0.000292994383775351, "loss": 3.4534, "step": 292500 }, { "epoch": 46.816, "grad_norm": 0.12367130815982819, "learning_rate": 0.00029299198367934717, "loss": 3.4048, "step": 292600 }, { "epoch": 46.832, "grad_norm": 0.1129692867398262, "learning_rate": 0.00029298958358334334, "loss": 3.5043, "step": 292700 }, { "epoch": 46.848, "grad_norm": 0.13876508176326752, "learning_rate": 0.0002929871834873395, "loss": 3.2546, "step": 292800 }, { "epoch": 46.864, "grad_norm": 0.16503682732582092, "learning_rate": 0.0002929847833913356, "loss": 3.7122, "step": 292900 }, { "epoch": 46.88, "grad_norm": 0.18367713689804077, "learning_rate": 0.0002929823832953318, "loss": 3.3702, "step": 293000 }, { "epoch": 46.896, "grad_norm": 0.15001420676708221, "learning_rate": 0.00029297998319932796, "loss": 3.4367, "step": 293100 }, { "epoch": 46.912, "grad_norm": 0.18632866442203522, "learning_rate": 0.0002929775831033241, "loss": 3.1789, "step": 293200 }, { "epoch": 46.928, "grad_norm": 0.12426871806383133, "learning_rate": 0.0002929751830073203, "loss": 3.5193, "step": 293300 }, { "epoch": 46.944, "grad_norm": 0.1463916301727295, "learning_rate": 0.00029297278291131647, "loss": 3.2759, "step": 293400 }, { "epoch": 46.96, "grad_norm": 0.22038400173187256, "learning_rate": 0.0002929703828153126, "loss": 3.4947, "step": 293500 }, { "epoch": 46.976, "grad_norm": 0.13807697594165802, "learning_rate": 0.00029296798271930875, "loss": 3.507, "step": 293600 }, { "epoch": 46.992, "grad_norm": 0.16728141903877258, "learning_rate": 0.0002929655826233049, "loss": 3.2478, "step": 293700 }, { "epoch": 47.008, "grad_norm": 0.1269054114818573, "learning_rate": 0.0002929631825273011, "loss": 3.2226, "step": 293800 }, { "epoch": 47.024, "grad_norm": 0.18167871236801147, "learning_rate": 0.00029296078243129726, "loss": 3.3852, "step": 293900 }, { "epoch": 47.04, "grad_norm": 0.14696857333183289, "learning_rate": 0.00029295838233529337, "loss": 3.1772, "step": 294000 }, { "epoch": 47.056, "grad_norm": 0.181949183344841, "learning_rate": 0.00029295598223928954, "loss": 3.0847, "step": 294100 }, { "epoch": 47.072, "grad_norm": 0.1427411288022995, "learning_rate": 0.0002929535821432857, "loss": 3.2704, "step": 294200 }, { "epoch": 47.088, "grad_norm": 0.1611768901348114, "learning_rate": 0.0002929511820472819, "loss": 3.5081, "step": 294300 }, { "epoch": 47.104, "grad_norm": 0.1626470386981964, "learning_rate": 0.00029294878195127805, "loss": 3.3638, "step": 294400 }, { "epoch": 47.12, "grad_norm": 0.12833905220031738, "learning_rate": 0.0002929463818552742, "loss": 3.4306, "step": 294500 }, { "epoch": 47.136, "grad_norm": 0.19535695016384125, "learning_rate": 0.00029294398175927033, "loss": 3.4389, "step": 294600 }, { "epoch": 47.152, "grad_norm": 0.12675967812538147, "learning_rate": 0.0002929415816632665, "loss": 3.4453, "step": 294700 }, { "epoch": 47.168, "grad_norm": 0.18949109315872192, "learning_rate": 0.00029293918156726267, "loss": 3.7726, "step": 294800 }, { "epoch": 47.184, "grad_norm": 0.172979936003685, "learning_rate": 0.00029293678147125884, "loss": 3.4247, "step": 294900 }, { "epoch": 47.2, "grad_norm": 0.13652417063713074, "learning_rate": 0.000292934381375255, "loss": 3.4252, "step": 295000 }, { "epoch": 47.216, "grad_norm": 0.3640148639678955, "learning_rate": 0.0002929319812792511, "loss": 3.3167, "step": 295100 }, { "epoch": 47.232, "grad_norm": 0.1760084480047226, "learning_rate": 0.0002929295811832473, "loss": 3.5655, "step": 295200 }, { "epoch": 47.248, "grad_norm": 0.13810941576957703, "learning_rate": 0.00029292718108724346, "loss": 3.4773, "step": 295300 }, { "epoch": 47.264, "grad_norm": 0.11928413063287735, "learning_rate": 0.00029292478099123963, "loss": 3.4824, "step": 295400 }, { "epoch": 47.28, "grad_norm": 0.15801222622394562, "learning_rate": 0.0002929223808952358, "loss": 3.1887, "step": 295500 }, { "epoch": 47.296, "grad_norm": 0.18956507742404938, "learning_rate": 0.00029291998079923197, "loss": 3.2021, "step": 295600 }, { "epoch": 47.312, "grad_norm": 0.15280699729919434, "learning_rate": 0.0002929175807032281, "loss": 3.6204, "step": 295700 }, { "epoch": 47.328, "grad_norm": 0.12011358886957169, "learning_rate": 0.00029291518060722425, "loss": 3.4612, "step": 295800 }, { "epoch": 47.344, "grad_norm": 0.19788897037506104, "learning_rate": 0.0002929127805112204, "loss": 3.5945, "step": 295900 }, { "epoch": 47.36, "grad_norm": 0.11270062625408173, "learning_rate": 0.0002929104044161766, "loss": 3.4896, "step": 296000 }, { "epoch": 47.376, "grad_norm": 0.16197362542152405, "learning_rate": 0.0002929080043201728, "loss": 3.2306, "step": 296100 }, { "epoch": 47.392, "grad_norm": 0.15636633336544037, "learning_rate": 0.00029290560422416895, "loss": 3.516, "step": 296200 }, { "epoch": 47.408, "grad_norm": 0.12042834609746933, "learning_rate": 0.0002929032041281651, "loss": 3.392, "step": 296300 }, { "epoch": 47.424, "grad_norm": 0.13974405825138092, "learning_rate": 0.0002929008040321613, "loss": 3.2314, "step": 296400 }, { "epoch": 47.44, "grad_norm": 0.15637369453907013, "learning_rate": 0.00029289840393615746, "loss": 3.1683, "step": 296500 }, { "epoch": 47.456, "grad_norm": 0.13403476774692535, "learning_rate": 0.0002928960038401536, "loss": 3.4337, "step": 296600 }, { "epoch": 47.472, "grad_norm": 0.15799444913864136, "learning_rate": 0.00029289360374414974, "loss": 3.4505, "step": 296700 }, { "epoch": 47.488, "grad_norm": 0.1588505655527115, "learning_rate": 0.0002928912036481459, "loss": 3.1548, "step": 296800 }, { "epoch": 47.504, "grad_norm": 0.16606377065181732, "learning_rate": 0.0002928888035521421, "loss": 3.6133, "step": 296900 }, { "epoch": 47.52, "grad_norm": 0.18139302730560303, "learning_rate": 0.00029288640345613825, "loss": 3.3673, "step": 297000 }, { "epoch": 47.536, "grad_norm": 0.17892329394817352, "learning_rate": 0.00029288400336013437, "loss": 3.346, "step": 297100 }, { "epoch": 47.552, "grad_norm": 0.17963369190692902, "learning_rate": 0.00029288160326413053, "loss": 3.3555, "step": 297200 }, { "epoch": 47.568, "grad_norm": 0.16803108155727386, "learning_rate": 0.0002928792031681267, "loss": 3.6745, "step": 297300 }, { "epoch": 47.584, "grad_norm": 0.17306730151176453, "learning_rate": 0.00029287680307212287, "loss": 3.403, "step": 297400 }, { "epoch": 47.6, "grad_norm": 0.12998683750629425, "learning_rate": 0.00029287440297611904, "loss": 3.3441, "step": 297500 }, { "epoch": 47.616, "grad_norm": 0.12399992346763611, "learning_rate": 0.00029287202688107523, "loss": 3.6591, "step": 297600 }, { "epoch": 47.632, "grad_norm": 0.13444332778453827, "learning_rate": 0.00029286962678507135, "loss": 3.456, "step": 297700 }, { "epoch": 47.648, "grad_norm": 0.1419672816991806, "learning_rate": 0.0002928672266890675, "loss": 3.4847, "step": 297800 }, { "epoch": 47.664, "grad_norm": 0.17194510996341705, "learning_rate": 0.0002928648265930637, "loss": 3.538, "step": 297900 }, { "epoch": 47.68, "grad_norm": 0.1878005415201187, "learning_rate": 0.00029286242649705985, "loss": 3.512, "step": 298000 }, { "epoch": 47.696, "grad_norm": 0.14014719426631927, "learning_rate": 0.000292860026401056, "loss": 3.5127, "step": 298100 }, { "epoch": 47.712, "grad_norm": 0.1728430986404419, "learning_rate": 0.00029285762630505214, "loss": 3.5601, "step": 298200 }, { "epoch": 47.728, "grad_norm": 0.19693323969841003, "learning_rate": 0.0002928552262090483, "loss": 3.4382, "step": 298300 }, { "epoch": 47.744, "grad_norm": 0.1276881992816925, "learning_rate": 0.0002928528261130445, "loss": 3.4803, "step": 298400 }, { "epoch": 47.76, "grad_norm": 0.19769306480884552, "learning_rate": 0.00029285042601704064, "loss": 3.5894, "step": 298500 }, { "epoch": 47.776, "grad_norm": 0.1318090856075287, "learning_rate": 0.0002928480259210368, "loss": 3.1101, "step": 298600 }, { "epoch": 47.792, "grad_norm": 0.17290163040161133, "learning_rate": 0.000292845625825033, "loss": 3.3749, "step": 298700 }, { "epoch": 47.808, "grad_norm": 0.11835433542728424, "learning_rate": 0.00029284322572902915, "loss": 3.1245, "step": 298800 }, { "epoch": 47.824, "grad_norm": 0.13306981325149536, "learning_rate": 0.0002928408256330253, "loss": 3.7012, "step": 298900 }, { "epoch": 47.84, "grad_norm": 0.1157679557800293, "learning_rate": 0.0002928384255370215, "loss": 3.5746, "step": 299000 }, { "epoch": 47.856, "grad_norm": 0.1853678971529007, "learning_rate": 0.0002928360254410176, "loss": 3.3504, "step": 299100 }, { "epoch": 47.872, "grad_norm": 0.13322237133979797, "learning_rate": 0.0002928336253450138, "loss": 3.4267, "step": 299200 }, { "epoch": 47.888, "grad_norm": 0.1461511254310608, "learning_rate": 0.00029283122524900994, "loss": 3.436, "step": 299300 }, { "epoch": 47.904, "grad_norm": 0.15524837374687195, "learning_rate": 0.0002928288251530061, "loss": 3.4982, "step": 299400 }, { "epoch": 47.92, "grad_norm": 0.15792042016983032, "learning_rate": 0.0002928264250570023, "loss": 3.4688, "step": 299500 }, { "epoch": 47.936, "grad_norm": 0.14071331918239594, "learning_rate": 0.00029282402496099845, "loss": 3.5322, "step": 299600 }, { "epoch": 47.952, "grad_norm": 0.14697420597076416, "learning_rate": 0.00029282162486499457, "loss": 3.4556, "step": 299700 }, { "epoch": 47.968, "grad_norm": 0.1555061787366867, "learning_rate": 0.00029281922476899074, "loss": 3.5931, "step": 299800 }, { "epoch": 47.984, "grad_norm": 0.18719704449176788, "learning_rate": 0.0002928168246729869, "loss": 3.5654, "step": 299900 }, { "epoch": 48.0, "grad_norm": 0.12757553160190582, "learning_rate": 0.0002928144485779431, "loss": 3.5631, "step": 300000 }, { "epoch": 48.016, "grad_norm": 0.1527114063501358, "learning_rate": 0.00029281204848193926, "loss": 3.4385, "step": 300100 }, { "epoch": 48.032, "grad_norm": 0.15064184367656708, "learning_rate": 0.0002928096483859354, "loss": 3.2668, "step": 300200 }, { "epoch": 48.048, "grad_norm": 0.16151970624923706, "learning_rate": 0.00029280724828993155, "loss": 3.424, "step": 300300 }, { "epoch": 48.064, "grad_norm": 0.15914319455623627, "learning_rate": 0.0002928048481939277, "loss": 3.0403, "step": 300400 }, { "epoch": 48.08, "grad_norm": 0.1498068869113922, "learning_rate": 0.0002928024480979239, "loss": 3.3522, "step": 300500 }, { "epoch": 48.096, "grad_norm": 0.2268165498971939, "learning_rate": 0.00029280004800192005, "loss": 3.3519, "step": 300600 }, { "epoch": 48.112, "grad_norm": 0.105367511510849, "learning_rate": 0.0002927976479059162, "loss": 3.3427, "step": 300700 }, { "epoch": 48.128, "grad_norm": 0.15236985683441162, "learning_rate": 0.00029279524780991234, "loss": 3.2862, "step": 300800 }, { "epoch": 48.144, "grad_norm": 0.14581796526908875, "learning_rate": 0.0002927928477139085, "loss": 3.2799, "step": 300900 }, { "epoch": 48.16, "grad_norm": 0.1331760436296463, "learning_rate": 0.0002927904476179047, "loss": 3.4922, "step": 301000 }, { "epoch": 48.176, "grad_norm": 0.16795989871025085, "learning_rate": 0.00029278804752190085, "loss": 3.1424, "step": 301100 }, { "epoch": 48.192, "grad_norm": 0.13952326774597168, "learning_rate": 0.000292785647425897, "loss": 3.3513, "step": 301200 }, { "epoch": 48.208, "grad_norm": 0.12885794043540955, "learning_rate": 0.00029278324732989313, "loss": 3.496, "step": 301300 }, { "epoch": 48.224, "grad_norm": 0.13620954751968384, "learning_rate": 0.0002927808472338893, "loss": 3.3809, "step": 301400 }, { "epoch": 48.24, "grad_norm": 0.12675289809703827, "learning_rate": 0.00029277844713788547, "loss": 3.4621, "step": 301500 }, { "epoch": 48.256, "grad_norm": 0.18377567827701569, "learning_rate": 0.00029277604704188164, "loss": 3.485, "step": 301600 }, { "epoch": 48.272, "grad_norm": 0.1615479588508606, "learning_rate": 0.0002927736469458778, "loss": 3.2656, "step": 301700 }, { "epoch": 48.288, "grad_norm": 0.15714795887470245, "learning_rate": 0.000292771246849874, "loss": 3.5195, "step": 301800 }, { "epoch": 48.304, "grad_norm": 0.14529140293598175, "learning_rate": 0.00029276884675387015, "loss": 3.4183, "step": 301900 }, { "epoch": 48.32, "grad_norm": 0.15897373855113983, "learning_rate": 0.0002927664466578663, "loss": 3.4677, "step": 302000 }, { "epoch": 48.336, "grad_norm": 0.15734288096427917, "learning_rate": 0.0002927640465618625, "loss": 3.19, "step": 302100 }, { "epoch": 48.352, "grad_norm": 0.1462222784757614, "learning_rate": 0.0002927616704668186, "loss": 3.5205, "step": 302200 }, { "epoch": 48.368, "grad_norm": 0.12426906079053879, "learning_rate": 0.0002927592703708148, "loss": 3.2899, "step": 302300 }, { "epoch": 48.384, "grad_norm": 0.13904991745948792, "learning_rate": 0.00029275687027481096, "loss": 3.3475, "step": 302400 }, { "epoch": 48.4, "grad_norm": 0.15905629098415375, "learning_rate": 0.0002927544701788071, "loss": 3.1418, "step": 302500 }, { "epoch": 48.416, "grad_norm": 0.14589905738830566, "learning_rate": 0.0002927520700828033, "loss": 3.6127, "step": 302600 }, { "epoch": 48.432, "grad_norm": 0.1258993148803711, "learning_rate": 0.00029274966998679946, "loss": 3.6162, "step": 302700 }, { "epoch": 48.448, "grad_norm": 0.12040545791387558, "learning_rate": 0.0002927472698907956, "loss": 3.2991, "step": 302800 }, { "epoch": 48.464, "grad_norm": 0.1427164375782013, "learning_rate": 0.00029274486979479175, "loss": 3.4336, "step": 302900 }, { "epoch": 48.48, "grad_norm": 0.13156598806381226, "learning_rate": 0.0002927424696987879, "loss": 3.2392, "step": 303000 }, { "epoch": 48.496, "grad_norm": 0.15256963670253754, "learning_rate": 0.0002927400696027841, "loss": 3.3739, "step": 303100 }, { "epoch": 48.512, "grad_norm": 0.1322268396615982, "learning_rate": 0.00029273766950678026, "loss": 3.4524, "step": 303200 }, { "epoch": 48.528, "grad_norm": 0.17755664885044098, "learning_rate": 0.00029273526941077637, "loss": 3.3082, "step": 303300 }, { "epoch": 48.544, "grad_norm": 0.149530827999115, "learning_rate": 0.00029273286931477254, "loss": 3.6323, "step": 303400 }, { "epoch": 48.56, "grad_norm": 0.1665380895137787, "learning_rate": 0.0002927304692187687, "loss": 3.3386, "step": 303500 }, { "epoch": 48.576, "grad_norm": 0.1368718296289444, "learning_rate": 0.0002927280691227649, "loss": 3.7027, "step": 303600 }, { "epoch": 48.592, "grad_norm": 0.1340893805027008, "learning_rate": 0.00029272566902676105, "loss": 3.3491, "step": 303700 }, { "epoch": 48.608, "grad_norm": 0.13427545130252838, "learning_rate": 0.0002927232689307572, "loss": 3.5443, "step": 303800 }, { "epoch": 48.624, "grad_norm": 0.14170998334884644, "learning_rate": 0.00029272086883475333, "loss": 3.4902, "step": 303900 }, { "epoch": 48.64, "grad_norm": 0.15798647701740265, "learning_rate": 0.0002927184687387495, "loss": 3.3371, "step": 304000 }, { "epoch": 48.656, "grad_norm": 0.1416149139404297, "learning_rate": 0.00029271606864274567, "loss": 3.611, "step": 304100 }, { "epoch": 48.672, "grad_norm": 0.1171884685754776, "learning_rate": 0.00029271366854674184, "loss": 3.377, "step": 304200 }, { "epoch": 48.688, "grad_norm": 0.12915319204330444, "learning_rate": 0.000292711268450738, "loss": 3.5215, "step": 304300 }, { "epoch": 48.704, "grad_norm": 0.1330493986606598, "learning_rate": 0.0002927088683547341, "loss": 3.4579, "step": 304400 }, { "epoch": 48.72, "grad_norm": 0.13495703041553497, "learning_rate": 0.0002927064682587303, "loss": 3.3085, "step": 304500 }, { "epoch": 48.736, "grad_norm": 0.11526744812726974, "learning_rate": 0.00029270406816272646, "loss": 3.3837, "step": 304600 }, { "epoch": 48.752, "grad_norm": 0.11145114153623581, "learning_rate": 0.00029270166806672263, "loss": 3.2675, "step": 304700 }, { "epoch": 48.768, "grad_norm": 0.11500809341669083, "learning_rate": 0.0002926992679707188, "loss": 3.1829, "step": 304800 }, { "epoch": 48.784, "grad_norm": 0.1631956398487091, "learning_rate": 0.00029269686787471497, "loss": 3.4344, "step": 304900 }, { "epoch": 48.8, "grad_norm": 0.13540637493133545, "learning_rate": 0.00029269446777871114, "loss": 3.4833, "step": 305000 }, { "epoch": 48.816, "grad_norm": 0.15787220001220703, "learning_rate": 0.0002926920676827073, "loss": 3.4109, "step": 305100 }, { "epoch": 48.832, "grad_norm": 0.14519421756267548, "learning_rate": 0.0002926896675867035, "loss": 3.2832, "step": 305200 }, { "epoch": 48.848, "grad_norm": 0.16504503786563873, "learning_rate": 0.0002926872674906996, "loss": 3.4029, "step": 305300 }, { "epoch": 48.864, "grad_norm": 0.13371451199054718, "learning_rate": 0.00029268486739469576, "loss": 3.2553, "step": 305400 }, { "epoch": 48.88, "grad_norm": 0.14079883694648743, "learning_rate": 0.00029268246729869193, "loss": 3.5022, "step": 305500 }, { "epoch": 48.896, "grad_norm": 0.12718258798122406, "learning_rate": 0.0002926800672026881, "loss": 3.226, "step": 305600 }, { "epoch": 48.912, "grad_norm": 0.15953890979290009, "learning_rate": 0.00029267766710668427, "loss": 3.5145, "step": 305700 }, { "epoch": 48.928, "grad_norm": 0.1663414090871811, "learning_rate": 0.00029267526701068044, "loss": 3.3715, "step": 305800 }, { "epoch": 48.944, "grad_norm": 0.1909690648317337, "learning_rate": 0.00029267286691467655, "loss": 3.5014, "step": 305900 }, { "epoch": 48.96, "grad_norm": 0.18063285946846008, "learning_rate": 0.0002926704668186727, "loss": 3.1204, "step": 306000 }, { "epoch": 48.976, "grad_norm": 0.16490024328231812, "learning_rate": 0.0002926680667226689, "loss": 3.2836, "step": 306100 }, { "epoch": 48.992, "grad_norm": 0.1852523684501648, "learning_rate": 0.00029266566662666506, "loss": 3.2278, "step": 306200 }, { "epoch": 49.008, "grad_norm": 0.12620970606803894, "learning_rate": 0.00029266326653066123, "loss": 3.2235, "step": 306300 }, { "epoch": 49.024, "grad_norm": 0.1455821394920349, "learning_rate": 0.00029266086643465734, "loss": 3.3934, "step": 306400 }, { "epoch": 49.04, "grad_norm": 0.19908060133457184, "learning_rate": 0.0002926584663386535, "loss": 3.1959, "step": 306500 }, { "epoch": 49.056, "grad_norm": 0.14544044435024261, "learning_rate": 0.0002926560662426497, "loss": 3.2331, "step": 306600 }, { "epoch": 49.072, "grad_norm": 0.15325766801834106, "learning_rate": 0.00029265369014760587, "loss": 3.348, "step": 306700 }, { "epoch": 49.088, "grad_norm": 0.17598554491996765, "learning_rate": 0.00029265129005160204, "loss": 3.4638, "step": 306800 }, { "epoch": 49.104, "grad_norm": 0.16760598123073578, "learning_rate": 0.0002926488899555982, "loss": 3.3673, "step": 306900 }, { "epoch": 49.12, "grad_norm": 0.14175668358802795, "learning_rate": 0.0002926464898595943, "loss": 3.3224, "step": 307000 }, { "epoch": 49.136, "grad_norm": 0.12724894285202026, "learning_rate": 0.0002926440897635905, "loss": 3.5114, "step": 307100 }, { "epoch": 49.152, "grad_norm": 0.17642638087272644, "learning_rate": 0.00029264168966758666, "loss": 3.0097, "step": 307200 }, { "epoch": 49.168, "grad_norm": 0.14316816627979279, "learning_rate": 0.00029263928957158283, "loss": 3.4672, "step": 307300 }, { "epoch": 49.184, "grad_norm": 0.17261099815368652, "learning_rate": 0.000292636889475579, "loss": 3.3006, "step": 307400 }, { "epoch": 49.2, "grad_norm": 0.12539535760879517, "learning_rate": 0.0002926344893795751, "loss": 3.1715, "step": 307500 }, { "epoch": 49.216, "grad_norm": 0.14177612960338593, "learning_rate": 0.0002926320892835713, "loss": 3.5626, "step": 307600 }, { "epoch": 49.232, "grad_norm": 0.14468742907047272, "learning_rate": 0.00029262968918756745, "loss": 3.4257, "step": 307700 }, { "epoch": 49.248, "grad_norm": 0.17178675532341003, "learning_rate": 0.0002926272890915636, "loss": 2.9571, "step": 307800 }, { "epoch": 49.264, "grad_norm": 0.1535005271434784, "learning_rate": 0.0002926248889955598, "loss": 3.2527, "step": 307900 }, { "epoch": 49.28, "grad_norm": 0.1474490910768509, "learning_rate": 0.00029262248889955596, "loss": 3.3919, "step": 308000 }, { "epoch": 49.296, "grad_norm": 0.15156246721744537, "learning_rate": 0.00029262008880355213, "loss": 3.4851, "step": 308100 }, { "epoch": 49.312, "grad_norm": 0.1656617522239685, "learning_rate": 0.0002926176887075483, "loss": 3.6056, "step": 308200 }, { "epoch": 49.328, "grad_norm": 0.1583324372768402, "learning_rate": 0.0002926153126125045, "loss": 3.5364, "step": 308300 }, { "epoch": 49.344, "grad_norm": 0.1491817682981491, "learning_rate": 0.0002926129125165006, "loss": 3.2277, "step": 308400 }, { "epoch": 49.36, "grad_norm": 0.1447921097278595, "learning_rate": 0.0002926105124204968, "loss": 3.4012, "step": 308500 }, { "epoch": 49.376, "grad_norm": 0.17154252529144287, "learning_rate": 0.00029260811232449294, "loss": 3.4646, "step": 308600 }, { "epoch": 49.392, "grad_norm": 0.13414283096790314, "learning_rate": 0.0002926057122284891, "loss": 3.4364, "step": 308700 }, { "epoch": 49.408, "grad_norm": 0.14263108372688293, "learning_rate": 0.0002926033121324853, "loss": 3.307, "step": 308800 }, { "epoch": 49.424, "grad_norm": 0.12120252847671509, "learning_rate": 0.00029260091203648145, "loss": 3.5405, "step": 308900 }, { "epoch": 49.44, "grad_norm": 0.16384463012218475, "learning_rate": 0.00029259851194047757, "loss": 3.0433, "step": 309000 }, { "epoch": 49.456, "grad_norm": 0.14016559720039368, "learning_rate": 0.00029259611184447373, "loss": 3.1713, "step": 309100 }, { "epoch": 49.472, "grad_norm": 0.16465577483177185, "learning_rate": 0.0002925937117484699, "loss": 3.3579, "step": 309200 }, { "epoch": 49.488, "grad_norm": 0.1386871039867401, "learning_rate": 0.00029259131165246607, "loss": 3.4393, "step": 309300 }, { "epoch": 49.504, "grad_norm": 0.1558290719985962, "learning_rate": 0.00029258891155646224, "loss": 3.1419, "step": 309400 }, { "epoch": 49.52, "grad_norm": 0.14429768919944763, "learning_rate": 0.00029258651146045836, "loss": 3.3518, "step": 309500 }, { "epoch": 49.536, "grad_norm": 0.1292973756790161, "learning_rate": 0.0002925841113644545, "loss": 3.332, "step": 309600 }, { "epoch": 49.552, "grad_norm": 0.25383371114730835, "learning_rate": 0.0002925817112684507, "loss": 3.3268, "step": 309700 }, { "epoch": 49.568, "grad_norm": 0.14928728342056274, "learning_rate": 0.00029257931117244686, "loss": 3.2364, "step": 309800 }, { "epoch": 49.584, "grad_norm": 0.14944873750209808, "learning_rate": 0.00029257691107644303, "loss": 3.1873, "step": 309900 }, { "epoch": 49.6, "grad_norm": 0.15341447293758392, "learning_rate": 0.0002925745109804392, "loss": 3.2475, "step": 310000 }, { "epoch": 49.616, "grad_norm": 0.14407984912395477, "learning_rate": 0.0002925721108844353, "loss": 3.5093, "step": 310100 }, { "epoch": 49.632, "grad_norm": 0.12437509000301361, "learning_rate": 0.0002925697107884315, "loss": 3.2815, "step": 310200 }, { "epoch": 49.648, "grad_norm": 0.12687896192073822, "learning_rate": 0.00029256731069242766, "loss": 3.2296, "step": 310300 }, { "epoch": 49.664, "grad_norm": 0.14548717439174652, "learning_rate": 0.0002925649105964238, "loss": 3.1554, "step": 310400 }, { "epoch": 49.68, "grad_norm": 0.16296228766441345, "learning_rate": 0.00029256251050042, "loss": 3.4014, "step": 310500 }, { "epoch": 49.696, "grad_norm": 0.14272156357765198, "learning_rate": 0.00029256011040441616, "loss": 3.2776, "step": 310600 }, { "epoch": 49.712, "grad_norm": 0.15589386224746704, "learning_rate": 0.0002925577103084123, "loss": 3.4749, "step": 310700 }, { "epoch": 49.728, "grad_norm": 0.14554454386234283, "learning_rate": 0.00029255531021240845, "loss": 3.5522, "step": 310800 }, { "epoch": 49.744, "grad_norm": 0.13375023007392883, "learning_rate": 0.0002925529101164046, "loss": 3.2538, "step": 310900 }, { "epoch": 49.76, "grad_norm": 0.1303774118423462, "learning_rate": 0.0002925505100204008, "loss": 3.3587, "step": 311000 }, { "epoch": 49.776, "grad_norm": 0.14139986038208008, "learning_rate": 0.00029254810992439696, "loss": 3.4076, "step": 311100 }, { "epoch": 49.792, "grad_norm": 0.16969220340251923, "learning_rate": 0.0002925457098283931, "loss": 3.3505, "step": 311200 }, { "epoch": 49.808, "grad_norm": 0.2406594157218933, "learning_rate": 0.0002925433097323893, "loss": 3.4341, "step": 311300 }, { "epoch": 49.824, "grad_norm": 0.17198412120342255, "learning_rate": 0.00029254090963638546, "loss": 3.3435, "step": 311400 }, { "epoch": 49.84, "grad_norm": 0.16874215006828308, "learning_rate": 0.0002925385095403816, "loss": 3.3316, "step": 311500 }, { "epoch": 49.856, "grad_norm": 0.16119834780693054, "learning_rate": 0.00029253610944437775, "loss": 3.2503, "step": 311600 }, { "epoch": 49.872, "grad_norm": 0.14184482395648956, "learning_rate": 0.00029253373334933394, "loss": 3.3095, "step": 311700 }, { "epoch": 49.888, "grad_norm": 0.15777957439422607, "learning_rate": 0.0002925313332533301, "loss": 3.4818, "step": 311800 }, { "epoch": 49.904, "grad_norm": 0.15611158311367035, "learning_rate": 0.0002925289331573263, "loss": 3.3189, "step": 311900 }, { "epoch": 49.92, "grad_norm": 0.12207408994436264, "learning_rate": 0.00029252653306132244, "loss": 3.42, "step": 312000 }, { "epoch": 49.936, "grad_norm": 0.12645098567008972, "learning_rate": 0.00029252413296531856, "loss": 3.4532, "step": 312100 }, { "epoch": 49.952, "grad_norm": 0.20612449944019318, "learning_rate": 0.00029252173286931473, "loss": 3.3457, "step": 312200 }, { "epoch": 49.968, "grad_norm": 0.15680143237113953, "learning_rate": 0.0002925193327733109, "loss": 3.5185, "step": 312300 }, { "epoch": 49.984, "grad_norm": 0.12851519882678986, "learning_rate": 0.00029251693267730707, "loss": 3.3674, "step": 312400 }, { "epoch": 50.0, "grad_norm": 0.14841392636299133, "learning_rate": 0.00029251453258130323, "loss": 3.1417, "step": 312500 }, { "epoch": 50.016, "grad_norm": 0.11803581565618515, "learning_rate": 0.0002925121324852994, "loss": 3.5848, "step": 312600 }, { "epoch": 50.032, "grad_norm": 0.18108335137367249, "learning_rate": 0.0002925097323892955, "loss": 3.3841, "step": 312700 }, { "epoch": 50.048, "grad_norm": 0.1896141916513443, "learning_rate": 0.0002925073322932917, "loss": 3.5613, "step": 312800 }, { "epoch": 50.064, "grad_norm": 0.15530169010162354, "learning_rate": 0.00029250493219728786, "loss": 3.374, "step": 312900 }, { "epoch": 50.08, "grad_norm": 0.16883188486099243, "learning_rate": 0.000292502532101284, "loss": 3.3388, "step": 313000 }, { "epoch": 50.096, "grad_norm": 0.1228465661406517, "learning_rate": 0.0002925001320052802, "loss": 3.7437, "step": 313100 }, { "epoch": 50.112, "grad_norm": 0.13094381988048553, "learning_rate": 0.0002924977319092763, "loss": 3.218, "step": 313200 }, { "epoch": 50.128, "grad_norm": 0.18601219356060028, "learning_rate": 0.0002924953318132725, "loss": 3.2414, "step": 313300 }, { "epoch": 50.144, "grad_norm": 0.11474788188934326, "learning_rate": 0.00029249293171726865, "loss": 3.2841, "step": 313400 }, { "epoch": 50.16, "grad_norm": 0.15750759840011597, "learning_rate": 0.0002924905316212648, "loss": 3.2634, "step": 313500 }, { "epoch": 50.176, "grad_norm": 0.1897895187139511, "learning_rate": 0.000292488131525261, "loss": 3.4238, "step": 313600 }, { "epoch": 50.192, "grad_norm": 0.11503628641366959, "learning_rate": 0.00029248573142925716, "loss": 3.3235, "step": 313700 }, { "epoch": 50.208, "grad_norm": 0.11801855266094208, "learning_rate": 0.00029248333133325327, "loss": 3.2734, "step": 313800 }, { "epoch": 50.224, "grad_norm": 0.10749802738428116, "learning_rate": 0.00029248093123724944, "loss": 3.5235, "step": 313900 }, { "epoch": 50.24, "grad_norm": 0.11881539970636368, "learning_rate": 0.0002924785311412456, "loss": 3.4193, "step": 314000 }, { "epoch": 50.256, "grad_norm": 0.12610787153244019, "learning_rate": 0.0002924761310452418, "loss": 3.3605, "step": 314100 }, { "epoch": 50.272, "grad_norm": 0.18023726344108582, "learning_rate": 0.00029247373094923795, "loss": 3.3322, "step": 314200 }, { "epoch": 50.288, "grad_norm": 0.19306433200836182, "learning_rate": 0.00029247133085323406, "loss": 3.3139, "step": 314300 }, { "epoch": 50.304, "grad_norm": 0.12993058562278748, "learning_rate": 0.0002924689307572303, "loss": 3.4498, "step": 314400 }, { "epoch": 50.32, "grad_norm": 0.1524878889322281, "learning_rate": 0.00029246653066122646, "loss": 3.4568, "step": 314500 }, { "epoch": 50.336, "grad_norm": 0.14611756801605225, "learning_rate": 0.0002924641305652226, "loss": 3.3603, "step": 314600 }, { "epoch": 50.352, "grad_norm": 0.15164513885974884, "learning_rate": 0.00029246173046921874, "loss": 3.487, "step": 314700 }, { "epoch": 50.368, "grad_norm": 0.20256902277469635, "learning_rate": 0.0002924593303732149, "loss": 3.0035, "step": 314800 }, { "epoch": 50.384, "grad_norm": 0.14957864582538605, "learning_rate": 0.0002924569302772111, "loss": 3.6098, "step": 314900 }, { "epoch": 50.4, "grad_norm": 0.13473552465438843, "learning_rate": 0.00029245453018120725, "loss": 3.5125, "step": 315000 }, { "epoch": 50.416, "grad_norm": 0.13214446604251862, "learning_rate": 0.0002924521300852034, "loss": 3.5437, "step": 315100 }, { "epoch": 50.432, "grad_norm": 0.14056731760501862, "learning_rate": 0.00029244972998919953, "loss": 3.316, "step": 315200 }, { "epoch": 50.448, "grad_norm": 0.1563900262117386, "learning_rate": 0.0002924473298931957, "loss": 3.3829, "step": 315300 }, { "epoch": 50.464, "grad_norm": 0.12801362574100494, "learning_rate": 0.00029244492979719187, "loss": 3.376, "step": 315400 }, { "epoch": 50.48, "grad_norm": 0.12805210053920746, "learning_rate": 0.00029244252970118804, "loss": 3.2924, "step": 315500 }, { "epoch": 50.496, "grad_norm": 0.11581001430749893, "learning_rate": 0.0002924401296051842, "loss": 3.4011, "step": 315600 }, { "epoch": 50.512, "grad_norm": 0.16366904973983765, "learning_rate": 0.0002924377295091804, "loss": 3.2642, "step": 315700 }, { "epoch": 50.528, "grad_norm": 0.2001999318599701, "learning_rate": 0.0002924353294131765, "loss": 3.6134, "step": 315800 }, { "epoch": 50.544, "grad_norm": 0.18013878166675568, "learning_rate": 0.00029243292931717266, "loss": 3.4307, "step": 315900 }, { "epoch": 50.56, "grad_norm": 0.15581771731376648, "learning_rate": 0.00029243052922116883, "loss": 3.2671, "step": 316000 }, { "epoch": 50.576, "grad_norm": 0.14331108331680298, "learning_rate": 0.000292428129125165, "loss": 3.5247, "step": 316100 }, { "epoch": 50.592, "grad_norm": 0.12613464891910553, "learning_rate": 0.00029242572902916117, "loss": 3.3337, "step": 316200 }, { "epoch": 50.608, "grad_norm": 0.13762806355953217, "learning_rate": 0.0002924233289331573, "loss": 3.3593, "step": 316300 }, { "epoch": 50.624, "grad_norm": 0.11664793640375137, "learning_rate": 0.0002924209528381135, "loss": 3.2691, "step": 316400 }, { "epoch": 50.64, "grad_norm": 0.16478689014911652, "learning_rate": 0.00029241855274210964, "loss": 3.4801, "step": 316500 }, { "epoch": 50.656, "grad_norm": 0.14488086104393005, "learning_rate": 0.0002924161526461058, "loss": 3.4374, "step": 316600 }, { "epoch": 50.672, "grad_norm": 0.1400933563709259, "learning_rate": 0.000292413752550102, "loss": 3.5094, "step": 316700 }, { "epoch": 50.688, "grad_norm": 0.13223755359649658, "learning_rate": 0.00029241135245409815, "loss": 3.3504, "step": 316800 }, { "epoch": 50.704, "grad_norm": 0.15253742039203644, "learning_rate": 0.00029240895235809426, "loss": 3.132, "step": 316900 }, { "epoch": 50.72, "grad_norm": 0.16582566499710083, "learning_rate": 0.00029240655226209043, "loss": 3.2527, "step": 317000 }, { "epoch": 50.736, "grad_norm": 0.1532561480998993, "learning_rate": 0.0002924041521660866, "loss": 3.2806, "step": 317100 }, { "epoch": 50.752, "grad_norm": 0.16683708131313324, "learning_rate": 0.00029240175207008277, "loss": 3.3081, "step": 317200 }, { "epoch": 50.768, "grad_norm": 0.15980778634548187, "learning_rate": 0.00029239935197407894, "loss": 3.3985, "step": 317300 }, { "epoch": 50.784, "grad_norm": 0.1412450224161148, "learning_rate": 0.00029239695187807506, "loss": 3.5546, "step": 317400 }, { "epoch": 50.8, "grad_norm": 0.1799844652414322, "learning_rate": 0.0002923945517820713, "loss": 3.1961, "step": 317500 }, { "epoch": 50.816, "grad_norm": 0.1548961102962494, "learning_rate": 0.00029239215168606745, "loss": 3.3781, "step": 317600 }, { "epoch": 50.832, "grad_norm": 0.12473632395267487, "learning_rate": 0.0002923897515900636, "loss": 3.3523, "step": 317700 }, { "epoch": 50.848, "grad_norm": 0.14622873067855835, "learning_rate": 0.00029238735149405973, "loss": 3.3547, "step": 317800 }, { "epoch": 50.864, "grad_norm": 0.18247777223587036, "learning_rate": 0.0002923849513980559, "loss": 3.363, "step": 317900 }, { "epoch": 50.88, "grad_norm": 0.11367323249578476, "learning_rate": 0.00029238255130205207, "loss": 3.1904, "step": 318000 }, { "epoch": 50.896, "grad_norm": 0.1404496729373932, "learning_rate": 0.00029238015120604824, "loss": 3.405, "step": 318100 }, { "epoch": 50.912, "grad_norm": 0.11309421062469482, "learning_rate": 0.0002923777511100444, "loss": 3.4147, "step": 318200 }, { "epoch": 50.928, "grad_norm": 0.16930608451366425, "learning_rate": 0.0002923753510140405, "loss": 3.4314, "step": 318300 }, { "epoch": 50.944, "grad_norm": 0.14684052765369415, "learning_rate": 0.0002923729509180367, "loss": 3.7399, "step": 318400 }, { "epoch": 50.96, "grad_norm": 0.15725503861904144, "learning_rate": 0.00029237055082203286, "loss": 3.0311, "step": 318500 }, { "epoch": 50.976, "grad_norm": 0.18299442529678345, "learning_rate": 0.00029236815072602903, "loss": 3.5391, "step": 318600 }, { "epoch": 50.992, "grad_norm": 0.233064666390419, "learning_rate": 0.0002923657506300252, "loss": 3.446, "step": 318700 }, { "epoch": 51.008, "grad_norm": 0.12934905290603638, "learning_rate": 0.00029236335053402137, "loss": 3.3369, "step": 318800 }, { "epoch": 51.024, "grad_norm": 0.10636331886053085, "learning_rate": 0.0002923609504380175, "loss": 3.3126, "step": 318900 }, { "epoch": 51.04, "grad_norm": 0.1443183273077011, "learning_rate": 0.00029235855034201366, "loss": 3.0687, "step": 319000 }, { "epoch": 51.056, "grad_norm": 0.15277619659900665, "learning_rate": 0.0002923561502460098, "loss": 3.1662, "step": 319100 }, { "epoch": 51.072, "grad_norm": 0.13871514797210693, "learning_rate": 0.000292353750150006, "loss": 3.1776, "step": 319200 }, { "epoch": 51.088, "grad_norm": 0.1589903086423874, "learning_rate": 0.00029235135005400216, "loss": 3.3311, "step": 319300 }, { "epoch": 51.104, "grad_norm": 0.17010129988193512, "learning_rate": 0.0002923489739589583, "loss": 3.2954, "step": 319400 }, { "epoch": 51.12, "grad_norm": 0.1390664279460907, "learning_rate": 0.00029234657386295447, "loss": 3.1024, "step": 319500 }, { "epoch": 51.136, "grad_norm": 0.15585166215896606, "learning_rate": 0.00029234417376695064, "loss": 3.5188, "step": 319600 }, { "epoch": 51.152, "grad_norm": 0.17884500324726105, "learning_rate": 0.0002923417736709468, "loss": 3.2745, "step": 319700 }, { "epoch": 51.168, "grad_norm": 0.14813295006752014, "learning_rate": 0.000292339373574943, "loss": 3.3754, "step": 319800 }, { "epoch": 51.184, "grad_norm": 0.1276063323020935, "learning_rate": 0.00029233697347893914, "loss": 3.4102, "step": 319900 }, { "epoch": 51.2, "grad_norm": 0.20598426461219788, "learning_rate": 0.00029233457338293526, "loss": 3.2283, "step": 320000 }, { "epoch": 51.216, "grad_norm": 0.1701878011226654, "learning_rate": 0.00029233217328693143, "loss": 3.2727, "step": 320100 }, { "epoch": 51.232, "grad_norm": 0.18325436115264893, "learning_rate": 0.0002923297731909276, "loss": 3.5751, "step": 320200 }, { "epoch": 51.248, "grad_norm": 0.2000550627708435, "learning_rate": 0.0002923273970958838, "loss": 3.3013, "step": 320300 }, { "epoch": 51.264, "grad_norm": 0.24174946546554565, "learning_rate": 0.00029232499699987995, "loss": 3.5239, "step": 320400 }, { "epoch": 51.28, "grad_norm": 0.16021841764450073, "learning_rate": 0.0002923225969038761, "loss": 3.3734, "step": 320500 }, { "epoch": 51.296, "grad_norm": 0.18157915771007538, "learning_rate": 0.0002923201968078723, "loss": 3.5951, "step": 320600 }, { "epoch": 51.312, "grad_norm": 0.12459814548492432, "learning_rate": 0.00029231779671186846, "loss": 3.7042, "step": 320700 }, { "epoch": 51.328, "grad_norm": 0.12427054345607758, "learning_rate": 0.00029231539661586463, "loss": 3.4144, "step": 320800 }, { "epoch": 51.344, "grad_norm": 0.12698723375797272, "learning_rate": 0.00029231299651986075, "loss": 3.2732, "step": 320900 }, { "epoch": 51.36, "grad_norm": 0.19848290085792542, "learning_rate": 0.0002923105964238569, "loss": 3.3225, "step": 321000 }, { "epoch": 51.376, "grad_norm": 0.1457836627960205, "learning_rate": 0.0002923081963278531, "loss": 3.3021, "step": 321100 }, { "epoch": 51.392, "grad_norm": 0.2069910317659378, "learning_rate": 0.00029230579623184925, "loss": 3.0118, "step": 321200 }, { "epoch": 51.408, "grad_norm": 0.19537220895290375, "learning_rate": 0.0002923033961358454, "loss": 3.5989, "step": 321300 }, { "epoch": 51.424, "grad_norm": 0.15151028335094452, "learning_rate": 0.00029230099603984154, "loss": 3.3543, "step": 321400 }, { "epoch": 51.44, "grad_norm": 0.1707276999950409, "learning_rate": 0.0002922985959438377, "loss": 3.1669, "step": 321500 }, { "epoch": 51.456, "grad_norm": 0.14340677857398987, "learning_rate": 0.0002922961958478339, "loss": 3.2322, "step": 321600 }, { "epoch": 51.472, "grad_norm": 0.13606461882591248, "learning_rate": 0.00029229379575183005, "loss": 3.4924, "step": 321700 }, { "epoch": 51.488, "grad_norm": 0.1521935611963272, "learning_rate": 0.0002922913956558262, "loss": 3.3178, "step": 321800 }, { "epoch": 51.504, "grad_norm": 0.1891232579946518, "learning_rate": 0.0002922889955598224, "loss": 3.2282, "step": 321900 }, { "epoch": 51.52, "grad_norm": 0.14701153337955475, "learning_rate": 0.0002922865954638185, "loss": 3.3993, "step": 322000 }, { "epoch": 51.536, "grad_norm": 0.14147521555423737, "learning_rate": 0.00029228419536781467, "loss": 3.4662, "step": 322100 }, { "epoch": 51.552, "grad_norm": 0.15564191341400146, "learning_rate": 0.00029228179527181084, "loss": 3.1281, "step": 322200 }, { "epoch": 51.568, "grad_norm": 0.1432490348815918, "learning_rate": 0.000292279395175807, "loss": 3.1582, "step": 322300 }, { "epoch": 51.584, "grad_norm": 0.16484883427619934, "learning_rate": 0.0002922769950798032, "loss": 3.4491, "step": 322400 }, { "epoch": 51.6, "grad_norm": 0.1310778707265854, "learning_rate": 0.0002922745949837993, "loss": 3.5499, "step": 322500 }, { "epoch": 51.616, "grad_norm": 0.15870967507362366, "learning_rate": 0.00029227221888875553, "loss": 3.3747, "step": 322600 }, { "epoch": 51.632, "grad_norm": 0.12570875883102417, "learning_rate": 0.0002922698187927517, "loss": 3.3863, "step": 322700 }, { "epoch": 51.648, "grad_norm": 0.18124200403690338, "learning_rate": 0.00029226741869674787, "loss": 3.0866, "step": 322800 }, { "epoch": 51.664, "grad_norm": 0.13015252351760864, "learning_rate": 0.000292265018600744, "loss": 3.5409, "step": 322900 }, { "epoch": 51.68, "grad_norm": 0.12848562002182007, "learning_rate": 0.00029226261850474016, "loss": 3.3924, "step": 323000 }, { "epoch": 51.696, "grad_norm": 0.18112777173519135, "learning_rate": 0.0002922602184087363, "loss": 3.1644, "step": 323100 }, { "epoch": 51.712, "grad_norm": 0.13526692986488342, "learning_rate": 0.0002922578183127325, "loss": 3.1237, "step": 323200 }, { "epoch": 51.728, "grad_norm": 0.14333507418632507, "learning_rate": 0.00029225541821672866, "loss": 3.2928, "step": 323300 }, { "epoch": 51.744, "grad_norm": 0.14580577611923218, "learning_rate": 0.0002922530181207248, "loss": 3.382, "step": 323400 }, { "epoch": 51.76, "grad_norm": 0.125936359167099, "learning_rate": 0.00029225061802472095, "loss": 3.1868, "step": 323500 }, { "epoch": 51.776, "grad_norm": 0.1556197851896286, "learning_rate": 0.0002922482179287171, "loss": 3.5027, "step": 323600 }, { "epoch": 51.792, "grad_norm": 0.1739504039287567, "learning_rate": 0.0002922458178327133, "loss": 3.5135, "step": 323700 }, { "epoch": 51.808, "grad_norm": 0.14744147658348083, "learning_rate": 0.00029224341773670945, "loss": 3.3462, "step": 323800 }, { "epoch": 51.824, "grad_norm": 0.36669498682022095, "learning_rate": 0.0002922410176407056, "loss": 3.3993, "step": 323900 }, { "epoch": 51.84, "grad_norm": 0.11825872212648392, "learning_rate": 0.00029223861754470174, "loss": 3.4219, "step": 324000 }, { "epoch": 51.856, "grad_norm": 0.13973468542099, "learning_rate": 0.0002922362174486979, "loss": 3.4115, "step": 324100 }, { "epoch": 51.872, "grad_norm": 0.14907534420490265, "learning_rate": 0.0002922338173526941, "loss": 3.2073, "step": 324200 }, { "epoch": 51.888, "grad_norm": 0.15955807268619537, "learning_rate": 0.00029223141725669025, "loss": 3.2093, "step": 324300 }, { "epoch": 51.904, "grad_norm": 0.1269015222787857, "learning_rate": 0.0002922290171606864, "loss": 3.3441, "step": 324400 }, { "epoch": 51.92, "grad_norm": 0.17560407519340515, "learning_rate": 0.00029222661706468253, "loss": 3.074, "step": 324500 }, { "epoch": 51.936, "grad_norm": 0.15233613550662994, "learning_rate": 0.0002922242169686787, "loss": 3.445, "step": 324600 }, { "epoch": 51.952, "grad_norm": 0.15940634906291962, "learning_rate": 0.00029222181687267487, "loss": 2.9824, "step": 324700 }, { "epoch": 51.968, "grad_norm": 0.16424326598644257, "learning_rate": 0.00029221941677667104, "loss": 3.3405, "step": 324800 }, { "epoch": 51.984, "grad_norm": 0.13630399107933044, "learning_rate": 0.0002922170166806672, "loss": 3.2917, "step": 324900 }, { "epoch": 52.0, "grad_norm": 0.1434134989976883, "learning_rate": 0.0002922146165846634, "loss": 3.3354, "step": 325000 }, { "epoch": 52.016, "grad_norm": 0.14515653252601624, "learning_rate": 0.0002922122164886595, "loss": 3.1752, "step": 325100 }, { "epoch": 52.032, "grad_norm": 0.17594020068645477, "learning_rate": 0.00029220981639265566, "loss": 3.2219, "step": 325200 }, { "epoch": 52.048, "grad_norm": 0.1492474228143692, "learning_rate": 0.00029220741629665183, "loss": 3.3629, "step": 325300 }, { "epoch": 52.064, "grad_norm": 0.14805439114570618, "learning_rate": 0.000292205016200648, "loss": 3.1798, "step": 325400 }, { "epoch": 52.08, "grad_norm": 0.1386563628911972, "learning_rate": 0.00029220261610464417, "loss": 3.2574, "step": 325500 }, { "epoch": 52.096, "grad_norm": 0.15306046605110168, "learning_rate": 0.0002922002160086403, "loss": 3.2002, "step": 325600 }, { "epoch": 52.112, "grad_norm": 0.21999217569828033, "learning_rate": 0.00029219781591263645, "loss": 2.9786, "step": 325700 }, { "epoch": 52.128, "grad_norm": 0.12054209411144257, "learning_rate": 0.0002921954158166326, "loss": 3.1741, "step": 325800 }, { "epoch": 52.144, "grad_norm": 0.1314040869474411, "learning_rate": 0.0002921930157206288, "loss": 3.3035, "step": 325900 }, { "epoch": 52.16, "grad_norm": 0.12140999734401703, "learning_rate": 0.00029219061562462496, "loss": 3.4072, "step": 326000 }, { "epoch": 52.176, "grad_norm": 0.17435301840305328, "learning_rate": 0.00029218821552862113, "loss": 3.1879, "step": 326100 }, { "epoch": 52.192, "grad_norm": 0.14907237887382507, "learning_rate": 0.00029218581543261724, "loss": 3.4601, "step": 326200 }, { "epoch": 52.208, "grad_norm": 0.11840406060218811, "learning_rate": 0.0002921834153366134, "loss": 3.2933, "step": 326300 }, { "epoch": 52.224, "grad_norm": 0.12879161536693573, "learning_rate": 0.0002921810152406096, "loss": 3.5088, "step": 326400 }, { "epoch": 52.24, "grad_norm": 0.12271026521921158, "learning_rate": 0.00029217861514460575, "loss": 3.5216, "step": 326500 }, { "epoch": 52.256, "grad_norm": 0.16596658527851105, "learning_rate": 0.0002921762150486019, "loss": 3.2785, "step": 326600 }, { "epoch": 52.272, "grad_norm": 0.15686991810798645, "learning_rate": 0.00029217381495259804, "loss": 3.2761, "step": 326700 }, { "epoch": 52.288, "grad_norm": 0.1981448084115982, "learning_rate": 0.00029217141485659426, "loss": 3.3845, "step": 326800 }, { "epoch": 52.304, "grad_norm": 0.14509083330631256, "learning_rate": 0.00029216901476059043, "loss": 3.2953, "step": 326900 }, { "epoch": 52.32, "grad_norm": 0.18140809237957, "learning_rate": 0.0002921666386655466, "loss": 3.3031, "step": 327000 }, { "epoch": 52.336, "grad_norm": 0.15065407752990723, "learning_rate": 0.00029216423856954273, "loss": 2.997, "step": 327100 }, { "epoch": 52.352, "grad_norm": 0.13274575769901276, "learning_rate": 0.0002921618384735389, "loss": 3.2759, "step": 327200 }, { "epoch": 52.368, "grad_norm": 0.34924188256263733, "learning_rate": 0.00029215943837753507, "loss": 3.2741, "step": 327300 }, { "epoch": 52.384, "grad_norm": 0.1323745846748352, "learning_rate": 0.00029215703828153124, "loss": 3.4603, "step": 327400 }, { "epoch": 52.4, "grad_norm": 0.15839941799640656, "learning_rate": 0.0002921546381855274, "loss": 3.2152, "step": 327500 }, { "epoch": 52.416, "grad_norm": 0.13269568979740143, "learning_rate": 0.0002921522380895235, "loss": 3.2798, "step": 327600 }, { "epoch": 52.432, "grad_norm": 0.1345677524805069, "learning_rate": 0.0002921498379935197, "loss": 3.4142, "step": 327700 }, { "epoch": 52.448, "grad_norm": 0.25859150290489197, "learning_rate": 0.00029214743789751586, "loss": 3.0441, "step": 327800 }, { "epoch": 52.464, "grad_norm": 0.18836736679077148, "learning_rate": 0.00029214503780151203, "loss": 3.3652, "step": 327900 }, { "epoch": 52.48, "grad_norm": 0.1247740313410759, "learning_rate": 0.0002921426377055082, "loss": 3.4177, "step": 328000 }, { "epoch": 52.496, "grad_norm": 0.17369848489761353, "learning_rate": 0.00029214023760950437, "loss": 3.3721, "step": 328100 }, { "epoch": 52.512, "grad_norm": 0.1320822685956955, "learning_rate": 0.0002921378375135005, "loss": 3.0019, "step": 328200 }, { "epoch": 52.528, "grad_norm": 0.16522739827632904, "learning_rate": 0.00029213543741749665, "loss": 3.2823, "step": 328300 }, { "epoch": 52.544, "grad_norm": 0.16406702995300293, "learning_rate": 0.0002921330373214928, "loss": 3.3603, "step": 328400 }, { "epoch": 52.56, "grad_norm": 0.17635925114154816, "learning_rate": 0.000292130637225489, "loss": 3.3294, "step": 328500 }, { "epoch": 52.576, "grad_norm": 0.14966008067131042, "learning_rate": 0.00029212823712948516, "loss": 3.2692, "step": 328600 }, { "epoch": 52.592, "grad_norm": 0.134891077876091, "learning_rate": 0.0002921258370334813, "loss": 3.3542, "step": 328700 }, { "epoch": 52.608, "grad_norm": 0.19409526884555817, "learning_rate": 0.00029212343693747745, "loss": 3.1954, "step": 328800 }, { "epoch": 52.624, "grad_norm": 0.3218018412590027, "learning_rate": 0.0002921210368414736, "loss": 3.2824, "step": 328900 }, { "epoch": 52.64, "grad_norm": 0.13344421982765198, "learning_rate": 0.0002921186367454698, "loss": 3.4867, "step": 329000 }, { "epoch": 52.656, "grad_norm": 0.17045122385025024, "learning_rate": 0.00029211623664946595, "loss": 3.4088, "step": 329100 }, { "epoch": 52.672, "grad_norm": 0.17423571646213531, "learning_rate": 0.0002921138365534621, "loss": 3.2287, "step": 329200 }, { "epoch": 52.688, "grad_norm": 0.1669912189245224, "learning_rate": 0.0002921114604584183, "loss": 3.3932, "step": 329300 }, { "epoch": 52.704, "grad_norm": 0.14621156454086304, "learning_rate": 0.0002921090603624145, "loss": 3.2588, "step": 329400 }, { "epoch": 52.72, "grad_norm": 0.14840787649154663, "learning_rate": 0.00029210666026641065, "loss": 3.3406, "step": 329500 }, { "epoch": 52.736, "grad_norm": 0.4345189929008484, "learning_rate": 0.00029210426017040676, "loss": 3.4221, "step": 329600 }, { "epoch": 52.752, "grad_norm": 0.13816241919994354, "learning_rate": 0.00029210186007440293, "loss": 3.3087, "step": 329700 }, { "epoch": 52.768, "grad_norm": 0.13703888654708862, "learning_rate": 0.0002920994599783991, "loss": 3.1629, "step": 329800 }, { "epoch": 52.784, "grad_norm": 0.1522781103849411, "learning_rate": 0.00029209705988239527, "loss": 3.3917, "step": 329900 }, { "epoch": 52.8, "grad_norm": 0.18823499977588654, "learning_rate": 0.00029209465978639144, "loss": 3.3933, "step": 330000 }, { "epoch": 52.816, "grad_norm": 0.15977166593074799, "learning_rate": 0.0002920922596903876, "loss": 3.3208, "step": 330100 }, { "epoch": 52.832, "grad_norm": 0.1562654823064804, "learning_rate": 0.0002920898595943837, "loss": 3.3498, "step": 330200 }, { "epoch": 52.848, "grad_norm": 0.18120811879634857, "learning_rate": 0.0002920874594983799, "loss": 3.2206, "step": 330300 }, { "epoch": 52.864, "grad_norm": 0.18792439997196198, "learning_rate": 0.00029208505940237606, "loss": 3.3864, "step": 330400 }, { "epoch": 52.88, "grad_norm": 0.1495332419872284, "learning_rate": 0.00029208265930637223, "loss": 3.3469, "step": 330500 }, { "epoch": 52.896, "grad_norm": 0.11719579994678497, "learning_rate": 0.0002920802592103684, "loss": 3.5288, "step": 330600 }, { "epoch": 52.912, "grad_norm": 0.12642988562583923, "learning_rate": 0.0002920778591143645, "loss": 3.2969, "step": 330700 }, { "epoch": 52.928, "grad_norm": 0.14159709215164185, "learning_rate": 0.0002920754590183607, "loss": 3.2381, "step": 330800 }, { "epoch": 52.944, "grad_norm": 0.16122287511825562, "learning_rate": 0.00029207305892235686, "loss": 3.0611, "step": 330900 }, { "epoch": 52.96, "grad_norm": 0.13020676374435425, "learning_rate": 0.000292070658826353, "loss": 3.3583, "step": 331000 }, { "epoch": 52.976, "grad_norm": 0.12709808349609375, "learning_rate": 0.0002920682587303492, "loss": 3.5648, "step": 331100 }, { "epoch": 52.992, "grad_norm": 0.14696812629699707, "learning_rate": 0.00029206585863434536, "loss": 3.3875, "step": 331200 }, { "epoch": 53.008, "grad_norm": 0.14866533875465393, "learning_rate": 0.0002920634585383415, "loss": 3.3325, "step": 331300 }, { "epoch": 53.024, "grad_norm": 0.15353770554065704, "learning_rate": 0.00029206105844233765, "loss": 2.9629, "step": 331400 }, { "epoch": 53.04, "grad_norm": 0.12507115304470062, "learning_rate": 0.0002920586583463338, "loss": 3.1008, "step": 331500 }, { "epoch": 53.056, "grad_norm": 0.13784734904766083, "learning_rate": 0.00029205625825033, "loss": 3.3695, "step": 331600 }, { "epoch": 53.072, "grad_norm": 0.1522851437330246, "learning_rate": 0.00029205385815432615, "loss": 3.4621, "step": 331700 }, { "epoch": 53.088, "grad_norm": 0.1576448231935501, "learning_rate": 0.00029205145805832227, "loss": 3.201, "step": 331800 }, { "epoch": 53.104, "grad_norm": 0.21453367173671722, "learning_rate": 0.00029204905796231844, "loss": 3.1522, "step": 331900 }, { "epoch": 53.12, "grad_norm": 0.12820564210414886, "learning_rate": 0.0002920466578663146, "loss": 3.3751, "step": 332000 }, { "epoch": 53.136, "grad_norm": 0.18504217267036438, "learning_rate": 0.0002920442577703108, "loss": 3.2256, "step": 332100 }, { "epoch": 53.152, "grad_norm": 0.1599675714969635, "learning_rate": 0.00029204185767430695, "loss": 3.4482, "step": 332200 }, { "epoch": 53.168, "grad_norm": 0.14189773797988892, "learning_rate": 0.0002920394575783031, "loss": 3.5883, "step": 332300 }, { "epoch": 53.184, "grad_norm": 0.15951794385910034, "learning_rate": 0.00029203705748229923, "loss": 3.6851, "step": 332400 }, { "epoch": 53.2, "grad_norm": 0.23285101354122162, "learning_rate": 0.0002920346813872555, "loss": 3.3122, "step": 332500 }, { "epoch": 53.216, "grad_norm": 0.12848126888275146, "learning_rate": 0.00029203228129125164, "loss": 3.4601, "step": 332600 }, { "epoch": 53.232, "grad_norm": 0.16016559302806854, "learning_rate": 0.00029202988119524776, "loss": 3.3188, "step": 332700 }, { "epoch": 53.248, "grad_norm": 0.15803727507591248, "learning_rate": 0.0002920274810992439, "loss": 3.2162, "step": 332800 }, { "epoch": 53.264, "grad_norm": 0.1404639482498169, "learning_rate": 0.0002920250810032401, "loss": 3.2679, "step": 332900 }, { "epoch": 53.28, "grad_norm": 0.16527333855628967, "learning_rate": 0.00029202268090723627, "loss": 3.2695, "step": 333000 }, { "epoch": 53.296, "grad_norm": 0.11707833409309387, "learning_rate": 0.00029202028081123243, "loss": 3.2782, "step": 333100 }, { "epoch": 53.312, "grad_norm": 0.13054396212100983, "learning_rate": 0.0002920178807152286, "loss": 3.4418, "step": 333200 }, { "epoch": 53.328, "grad_norm": 0.15494994819164276, "learning_rate": 0.0002920154806192247, "loss": 3.53, "step": 333300 }, { "epoch": 53.344, "grad_norm": 0.14971211552619934, "learning_rate": 0.0002920130805232209, "loss": 3.3975, "step": 333400 }, { "epoch": 53.36, "grad_norm": 0.17437951266765594, "learning_rate": 0.00029201068042721706, "loss": 3.396, "step": 333500 }, { "epoch": 53.376, "grad_norm": 0.14597390592098236, "learning_rate": 0.0002920082803312132, "loss": 3.1456, "step": 333600 }, { "epoch": 53.392, "grad_norm": 0.1547769159078598, "learning_rate": 0.0002920059042361694, "loss": 3.309, "step": 333700 }, { "epoch": 53.408, "grad_norm": 0.1842828392982483, "learning_rate": 0.0002920035041401656, "loss": 3.5651, "step": 333800 }, { "epoch": 53.424, "grad_norm": 0.15604564547538757, "learning_rate": 0.00029200110404416175, "loss": 3.2857, "step": 333900 }, { "epoch": 53.44, "grad_norm": 0.2053932547569275, "learning_rate": 0.0002919987039481579, "loss": 3.5018, "step": 334000 }, { "epoch": 53.456, "grad_norm": 0.19722050428390503, "learning_rate": 0.0002919963038521541, "loss": 3.3155, "step": 334100 }, { "epoch": 53.472, "grad_norm": 0.16093458235263824, "learning_rate": 0.0002919939037561502, "loss": 3.0951, "step": 334200 }, { "epoch": 53.488, "grad_norm": 0.17090927064418793, "learning_rate": 0.0002919915036601464, "loss": 3.1929, "step": 334300 }, { "epoch": 53.504, "grad_norm": 0.11625811457633972, "learning_rate": 0.00029198910356414254, "loss": 3.2551, "step": 334400 }, { "epoch": 53.52, "grad_norm": 0.24087922275066376, "learning_rate": 0.0002919867034681387, "loss": 3.6485, "step": 334500 }, { "epoch": 53.536, "grad_norm": 0.13188178837299347, "learning_rate": 0.0002919843033721349, "loss": 3.286, "step": 334600 }, { "epoch": 53.552, "grad_norm": 0.18458682298660278, "learning_rate": 0.000291981903276131, "loss": 3.1847, "step": 334700 }, { "epoch": 53.568, "grad_norm": 0.12876921892166138, "learning_rate": 0.00029197950318012717, "loss": 3.3038, "step": 334800 }, { "epoch": 53.584, "grad_norm": 0.12692047655582428, "learning_rate": 0.00029197710308412334, "loss": 3.3635, "step": 334900 }, { "epoch": 53.6, "grad_norm": 0.14570464193820953, "learning_rate": 0.0002919747029881195, "loss": 3.0864, "step": 335000 }, { "epoch": 53.616, "grad_norm": 0.13279356062412262, "learning_rate": 0.0002919723028921157, "loss": 3.0374, "step": 335100 }, { "epoch": 53.632, "grad_norm": 0.1454782783985138, "learning_rate": 0.00029196990279611184, "loss": 3.1967, "step": 335200 }, { "epoch": 53.648, "grad_norm": 0.13740484416484833, "learning_rate": 0.00029196750270010796, "loss": 3.4504, "step": 335300 }, { "epoch": 53.664, "grad_norm": 0.13518641889095306, "learning_rate": 0.00029196510260410413, "loss": 3.4196, "step": 335400 }, { "epoch": 53.68, "grad_norm": 0.1286626160144806, "learning_rate": 0.0002919627025081003, "loss": 3.3766, "step": 335500 }, { "epoch": 53.696, "grad_norm": 0.15779829025268555, "learning_rate": 0.00029196030241209647, "loss": 3.2401, "step": 335600 }, { "epoch": 53.712, "grad_norm": 0.15997792780399323, "learning_rate": 0.00029195790231609264, "loss": 3.2608, "step": 335700 }, { "epoch": 53.728, "grad_norm": 0.1715650111436844, "learning_rate": 0.00029195550222008875, "loss": 3.3985, "step": 335800 }, { "epoch": 53.744, "grad_norm": 0.19790230691432953, "learning_rate": 0.0002919531021240849, "loss": 3.1285, "step": 335900 }, { "epoch": 53.76, "grad_norm": 0.13261134922504425, "learning_rate": 0.0002919507020280811, "loss": 3.4773, "step": 336000 }, { "epoch": 53.776, "grad_norm": 0.1352439969778061, "learning_rate": 0.00029194830193207726, "loss": 3.0569, "step": 336100 }, { "epoch": 53.792, "grad_norm": 0.1414068192243576, "learning_rate": 0.00029194590183607343, "loss": 3.1214, "step": 336200 }, { "epoch": 53.808, "grad_norm": 0.21068920195102692, "learning_rate": 0.0002919435017400696, "loss": 3.4935, "step": 336300 }, { "epoch": 53.824, "grad_norm": 0.13798032701015472, "learning_rate": 0.0002919411016440657, "loss": 3.3462, "step": 336400 }, { "epoch": 53.84, "grad_norm": 0.1718588024377823, "learning_rate": 0.0002919387015480619, "loss": 3.3585, "step": 336500 }, { "epoch": 53.856, "grad_norm": 0.1828896850347519, "learning_rate": 0.00029193630145205805, "loss": 3.3288, "step": 336600 }, { "epoch": 53.872, "grad_norm": 0.14540043473243713, "learning_rate": 0.0002919339013560542, "loss": 3.4462, "step": 336700 }, { "epoch": 53.888, "grad_norm": 0.15059055387973785, "learning_rate": 0.0002919315012600504, "loss": 3.359, "step": 336800 }, { "epoch": 53.904, "grad_norm": 0.14301247894763947, "learning_rate": 0.00029192910116404656, "loss": 3.26, "step": 336900 }, { "epoch": 53.92, "grad_norm": 0.15023201704025269, "learning_rate": 0.00029192670106804267, "loss": 3.3806, "step": 337000 }, { "epoch": 53.936, "grad_norm": 0.12267656624317169, "learning_rate": 0.00029192430097203884, "loss": 3.3033, "step": 337100 }, { "epoch": 53.952, "grad_norm": 0.1798514723777771, "learning_rate": 0.000291921900876035, "loss": 3.289, "step": 337200 }, { "epoch": 53.968, "grad_norm": 0.1366342008113861, "learning_rate": 0.0002919195007800312, "loss": 3.0695, "step": 337300 }, { "epoch": 53.984, "grad_norm": 0.15608462691307068, "learning_rate": 0.00029191710068402735, "loss": 3.453, "step": 337400 }, { "epoch": 54.0, "grad_norm": 0.14655713737010956, "learning_rate": 0.00029191470058802346, "loss": 3.3283, "step": 337500 }, { "epoch": 54.016, "grad_norm": 0.14786700904369354, "learning_rate": 0.00029191230049201963, "loss": 3.3752, "step": 337600 }, { "epoch": 54.032, "grad_norm": 0.14008936285972595, "learning_rate": 0.0002919099243969759, "loss": 3.4605, "step": 337700 }, { "epoch": 54.048, "grad_norm": 0.16710710525512695, "learning_rate": 0.000291907524300972, "loss": 3.2246, "step": 337800 }, { "epoch": 54.064, "grad_norm": 0.14744892716407776, "learning_rate": 0.00029190512420496816, "loss": 3.3089, "step": 337900 }, { "epoch": 54.08, "grad_norm": 0.17828384041786194, "learning_rate": 0.00029190272410896433, "loss": 3.1102, "step": 338000 }, { "epoch": 54.096, "grad_norm": 0.1318444162607193, "learning_rate": 0.0002919003240129605, "loss": 2.9895, "step": 338100 }, { "epoch": 54.112, "grad_norm": 0.19135650992393494, "learning_rate": 0.00029189792391695667, "loss": 3.3939, "step": 338200 }, { "epoch": 54.128, "grad_norm": 0.16092978417873383, "learning_rate": 0.00029189552382095284, "loss": 3.3561, "step": 338300 }, { "epoch": 54.144, "grad_norm": 0.18586471676826477, "learning_rate": 0.00029189312372494895, "loss": 3.273, "step": 338400 }, { "epoch": 54.16, "grad_norm": 0.13763539493083954, "learning_rate": 0.0002918907236289451, "loss": 3.358, "step": 338500 }, { "epoch": 54.176, "grad_norm": 0.17145951092243195, "learning_rate": 0.0002918883235329413, "loss": 3.2643, "step": 338600 }, { "epoch": 54.192, "grad_norm": 0.15052619576454163, "learning_rate": 0.00029188592343693746, "loss": 3.2145, "step": 338700 }, { "epoch": 54.208, "grad_norm": 0.16555961966514587, "learning_rate": 0.00029188352334093363, "loss": 3.2814, "step": 338800 }, { "epoch": 54.224, "grad_norm": 0.1434653401374817, "learning_rate": 0.0002918811232449298, "loss": 3.2235, "step": 338900 }, { "epoch": 54.24, "grad_norm": 0.17657096683979034, "learning_rate": 0.0002918787231489259, "loss": 3.4424, "step": 339000 }, { "epoch": 54.256, "grad_norm": 0.11164835095405579, "learning_rate": 0.0002918763230529221, "loss": 3.5098, "step": 339100 }, { "epoch": 54.272, "grad_norm": 0.15462209284305573, "learning_rate": 0.00029187392295691825, "loss": 3.325, "step": 339200 }, { "epoch": 54.288, "grad_norm": 0.16589704155921936, "learning_rate": 0.0002918715228609144, "loss": 3.0492, "step": 339300 }, { "epoch": 54.304, "grad_norm": 0.1392892599105835, "learning_rate": 0.0002918691227649106, "loss": 3.4288, "step": 339400 }, { "epoch": 54.32, "grad_norm": 0.1505461484193802, "learning_rate": 0.0002918667226689067, "loss": 3.0524, "step": 339500 }, { "epoch": 54.336, "grad_norm": 0.13551418483257294, "learning_rate": 0.0002918643225729029, "loss": 3.1029, "step": 339600 }, { "epoch": 54.352, "grad_norm": 0.12728756666183472, "learning_rate": 0.00029186192247689904, "loss": 3.4083, "step": 339700 }, { "epoch": 54.368, "grad_norm": 0.15158139169216156, "learning_rate": 0.0002918595223808952, "loss": 3.1884, "step": 339800 }, { "epoch": 54.384, "grad_norm": 0.166995108127594, "learning_rate": 0.0002918571222848914, "loss": 3.3493, "step": 339900 }, { "epoch": 54.4, "grad_norm": 0.1689627319574356, "learning_rate": 0.00029185472218888755, "loss": 3.1709, "step": 340000 }, { "epoch": 54.416, "grad_norm": 0.1396838277578354, "learning_rate": 0.00029185232209288367, "loss": 3.2496, "step": 340100 }, { "epoch": 54.432, "grad_norm": 0.16081513464450836, "learning_rate": 0.00029184992199687983, "loss": 3.4438, "step": 340200 }, { "epoch": 54.448, "grad_norm": 0.21025653183460236, "learning_rate": 0.000291847521900876, "loss": 3.3689, "step": 340300 }, { "epoch": 54.464, "grad_norm": 0.20488710701465607, "learning_rate": 0.0002918451218048722, "loss": 3.5275, "step": 340400 }, { "epoch": 54.48, "grad_norm": 0.16729529201984406, "learning_rate": 0.00029184272170886834, "loss": 3.0665, "step": 340500 }, { "epoch": 54.496, "grad_norm": 0.18309368193149567, "learning_rate": 0.00029184032161286446, "loss": 3.2002, "step": 340600 }, { "epoch": 54.512, "grad_norm": 0.19028951227664948, "learning_rate": 0.0002918379215168606, "loss": 3.1964, "step": 340700 }, { "epoch": 54.528, "grad_norm": 0.18053501844406128, "learning_rate": 0.0002918355214208568, "loss": 2.9901, "step": 340800 }, { "epoch": 54.544, "grad_norm": 0.2081013023853302, "learning_rate": 0.00029183312132485296, "loss": 3.376, "step": 340900 }, { "epoch": 54.56, "grad_norm": 0.16215258836746216, "learning_rate": 0.00029183072122884913, "loss": 3.1747, "step": 341000 }, { "epoch": 54.576, "grad_norm": 0.15656504034996033, "learning_rate": 0.0002918283211328453, "loss": 3.4268, "step": 341100 }, { "epoch": 54.592, "grad_norm": 0.15155315399169922, "learning_rate": 0.0002918259210368414, "loss": 3.1151, "step": 341200 }, { "epoch": 54.608, "grad_norm": 0.16554059088230133, "learning_rate": 0.0002918235209408376, "loss": 3.03, "step": 341300 }, { "epoch": 54.624, "grad_norm": 0.17343983054161072, "learning_rate": 0.00029182112084483376, "loss": 3.2444, "step": 341400 }, { "epoch": 54.64, "grad_norm": 0.1437683254480362, "learning_rate": 0.0002918187207488299, "loss": 3.2319, "step": 341500 }, { "epoch": 54.656, "grad_norm": 0.1805020421743393, "learning_rate": 0.0002918163206528261, "loss": 3.2731, "step": 341600 }, { "epoch": 54.672, "grad_norm": 0.17247991263866425, "learning_rate": 0.0002918139205568222, "loss": 3.2196, "step": 341700 }, { "epoch": 54.688, "grad_norm": 0.16937606036663055, "learning_rate": 0.00029181154446177845, "loss": 3.1921, "step": 341800 }, { "epoch": 54.704, "grad_norm": 0.1519298106431961, "learning_rate": 0.0002918091443657746, "loss": 3.1489, "step": 341900 }, { "epoch": 54.72, "grad_norm": 0.13763785362243652, "learning_rate": 0.0002918067682707308, "loss": 3.495, "step": 342000 }, { "epoch": 54.736, "grad_norm": 0.16473031044006348, "learning_rate": 0.000291804368174727, "loss": 3.0777, "step": 342100 }, { "epoch": 54.752, "grad_norm": 0.18427863717079163, "learning_rate": 0.00029180196807872315, "loss": 3.1842, "step": 342200 }, { "epoch": 54.768, "grad_norm": 0.16320018470287323, "learning_rate": 0.0002917995679827193, "loss": 3.2127, "step": 342300 }, { "epoch": 54.784, "grad_norm": 0.16119256615638733, "learning_rate": 0.00029179716788671543, "loss": 3.5969, "step": 342400 }, { "epoch": 54.8, "grad_norm": 0.13102026283740997, "learning_rate": 0.0002917947677907116, "loss": 3.1499, "step": 342500 }, { "epoch": 54.816, "grad_norm": 0.19485054910182953, "learning_rate": 0.00029179236769470777, "loss": 3.4044, "step": 342600 }, { "epoch": 54.832, "grad_norm": 0.1520545780658722, "learning_rate": 0.00029178996759870394, "loss": 3.1848, "step": 342700 }, { "epoch": 54.848, "grad_norm": 0.1294872760772705, "learning_rate": 0.0002917875675027001, "loss": 3.1871, "step": 342800 }, { "epoch": 54.864, "grad_norm": 0.13647159934043884, "learning_rate": 0.0002917851674066963, "loss": 3.0542, "step": 342900 }, { "epoch": 54.88, "grad_norm": 0.17753876745700836, "learning_rate": 0.0002917827673106924, "loss": 3.1053, "step": 343000 }, { "epoch": 54.896, "grad_norm": 0.12998761236667633, "learning_rate": 0.00029178036721468856, "loss": 3.1792, "step": 343100 }, { "epoch": 54.912, "grad_norm": 0.11028902232646942, "learning_rate": 0.00029177796711868473, "loss": 3.1548, "step": 343200 }, { "epoch": 54.928, "grad_norm": 0.1745411902666092, "learning_rate": 0.0002917755670226809, "loss": 3.404, "step": 343300 }, { "epoch": 54.944, "grad_norm": 0.1527205854654312, "learning_rate": 0.00029177316692667707, "loss": 3.4604, "step": 343400 }, { "epoch": 54.96, "grad_norm": 0.13147595524787903, "learning_rate": 0.0002917707668306732, "loss": 3.2499, "step": 343500 }, { "epoch": 54.976, "grad_norm": 0.17815326154232025, "learning_rate": 0.00029176836673466935, "loss": 3.3152, "step": 343600 }, { "epoch": 54.992, "grad_norm": 0.13113729655742645, "learning_rate": 0.0002917659666386655, "loss": 3.3881, "step": 343700 }, { "epoch": 55.008, "grad_norm": 0.1614774465560913, "learning_rate": 0.0002917635665426617, "loss": 3.5003, "step": 343800 }, { "epoch": 55.024, "grad_norm": 0.17780400812625885, "learning_rate": 0.00029176116644665786, "loss": 3.0981, "step": 343900 }, { "epoch": 55.04, "grad_norm": 0.1617867350578308, "learning_rate": 0.00029175876635065403, "loss": 3.1411, "step": 344000 }, { "epoch": 55.056, "grad_norm": 0.1891581416130066, "learning_rate": 0.00029175636625465015, "loss": 3.0627, "step": 344100 }, { "epoch": 55.072, "grad_norm": 0.13288554549217224, "learning_rate": 0.0002917539661586463, "loss": 3.3637, "step": 344200 }, { "epoch": 55.088, "grad_norm": 0.17043215036392212, "learning_rate": 0.0002917515660626425, "loss": 3.3858, "step": 344300 }, { "epoch": 55.104, "grad_norm": 0.16361024975776672, "learning_rate": 0.00029174916596663865, "loss": 3.2384, "step": 344400 }, { "epoch": 55.12, "grad_norm": 0.1416132152080536, "learning_rate": 0.0002917467658706348, "loss": 3.3317, "step": 344500 }, { "epoch": 55.136, "grad_norm": 0.17764806747436523, "learning_rate": 0.00029174436577463094, "loss": 3.2461, "step": 344600 }, { "epoch": 55.152, "grad_norm": 0.15568125247955322, "learning_rate": 0.0002917419656786271, "loss": 3.3518, "step": 344700 }, { "epoch": 55.168, "grad_norm": 0.1336747109889984, "learning_rate": 0.0002917395655826233, "loss": 3.3637, "step": 344800 }, { "epoch": 55.184, "grad_norm": 0.12551388144493103, "learning_rate": 0.00029173716548661945, "loss": 3.2018, "step": 344900 }, { "epoch": 55.2, "grad_norm": 0.23992879688739777, "learning_rate": 0.0002917347653906156, "loss": 3.3091, "step": 345000 }, { "epoch": 55.216, "grad_norm": 0.14601823687553406, "learning_rate": 0.0002917323652946118, "loss": 3.4565, "step": 345100 }, { "epoch": 55.232, "grad_norm": 0.11775591224431992, "learning_rate": 0.0002917299651986079, "loss": 3.4798, "step": 345200 }, { "epoch": 55.248, "grad_norm": 0.15643376111984253, "learning_rate": 0.00029172756510260407, "loss": 3.2663, "step": 345300 }, { "epoch": 55.264, "grad_norm": 0.13931778073310852, "learning_rate": 0.00029172516500660024, "loss": 3.4766, "step": 345400 }, { "epoch": 55.28, "grad_norm": 0.15530742704868317, "learning_rate": 0.0002917227649105964, "loss": 3.4843, "step": 345500 }, { "epoch": 55.296, "grad_norm": 0.1652204841375351, "learning_rate": 0.0002917203648145926, "loss": 3.2115, "step": 345600 }, { "epoch": 55.312, "grad_norm": 0.23143932223320007, "learning_rate": 0.0002917179647185887, "loss": 3.2778, "step": 345700 }, { "epoch": 55.328, "grad_norm": 0.15397463738918304, "learning_rate": 0.00029171556462258486, "loss": 3.1087, "step": 345800 }, { "epoch": 55.344, "grad_norm": 0.14131039381027222, "learning_rate": 0.00029171316452658103, "loss": 3.0095, "step": 345900 }, { "epoch": 55.36, "grad_norm": 0.17432500422000885, "learning_rate": 0.0002917107644305772, "loss": 3.5979, "step": 346000 }, { "epoch": 55.376, "grad_norm": 0.14557889103889465, "learning_rate": 0.00029170836433457337, "loss": 2.9839, "step": 346100 }, { "epoch": 55.392, "grad_norm": 0.15908761322498322, "learning_rate": 0.00029170596423856954, "loss": 3.4159, "step": 346200 }, { "epoch": 55.408, "grad_norm": 0.14167125523090363, "learning_rate": 0.0002917035881435257, "loss": 3.0188, "step": 346300 }, { "epoch": 55.424, "grad_norm": 0.15950991213321686, "learning_rate": 0.0002917011880475219, "loss": 3.032, "step": 346400 }, { "epoch": 55.44, "grad_norm": 0.14086678624153137, "learning_rate": 0.0002916988119524781, "loss": 3.3979, "step": 346500 }, { "epoch": 55.456, "grad_norm": 0.3359821140766144, "learning_rate": 0.0002916964118564742, "loss": 3.2745, "step": 346600 }, { "epoch": 55.472, "grad_norm": 0.1806408315896988, "learning_rate": 0.00029169401176047037, "loss": 3.2957, "step": 346700 }, { "epoch": 55.488, "grad_norm": 0.13673198223114014, "learning_rate": 0.00029169161166446654, "loss": 3.2542, "step": 346800 }, { "epoch": 55.504, "grad_norm": 0.15793466567993164, "learning_rate": 0.0002916892115684627, "loss": 3.1965, "step": 346900 }, { "epoch": 55.52, "grad_norm": 0.13120172917842865, "learning_rate": 0.0002916868114724589, "loss": 3.273, "step": 347000 }, { "epoch": 55.536, "grad_norm": 0.20869004726409912, "learning_rate": 0.00029168441137645504, "loss": 3.4659, "step": 347100 }, { "epoch": 55.552, "grad_norm": 0.12163787335157394, "learning_rate": 0.00029168201128045116, "loss": 3.3071, "step": 347200 }, { "epoch": 55.568, "grad_norm": 0.14563344419002533, "learning_rate": 0.00029167961118444733, "loss": 3.2059, "step": 347300 }, { "epoch": 55.584, "grad_norm": 0.15242354571819305, "learning_rate": 0.0002916772110884435, "loss": 3.2946, "step": 347400 }, { "epoch": 55.6, "grad_norm": 0.17935703694820404, "learning_rate": 0.00029167481099243967, "loss": 3.308, "step": 347500 }, { "epoch": 55.616, "grad_norm": 0.2002299427986145, "learning_rate": 0.00029167241089643584, "loss": 3.263, "step": 347600 }, { "epoch": 55.632, "grad_norm": 0.1682882010936737, "learning_rate": 0.000291670010800432, "loss": 3.2977, "step": 347700 }, { "epoch": 55.648, "grad_norm": 0.14220142364501953, "learning_rate": 0.0002916676107044282, "loss": 3.0261, "step": 347800 }, { "epoch": 55.664, "grad_norm": 0.1405324637889862, "learning_rate": 0.00029166521060842434, "loss": 3.2773, "step": 347900 }, { "epoch": 55.68, "grad_norm": 0.1324377804994583, "learning_rate": 0.0002916628105124205, "loss": 3.2114, "step": 348000 }, { "epoch": 55.696, "grad_norm": 0.16759875416755676, "learning_rate": 0.00029166041041641663, "loss": 3.2375, "step": 348100 }, { "epoch": 55.712, "grad_norm": 0.12659089267253876, "learning_rate": 0.0002916580103204128, "loss": 3.5295, "step": 348200 }, { "epoch": 55.728, "grad_norm": 0.18061448633670807, "learning_rate": 0.00029165561022440897, "loss": 3.1781, "step": 348300 }, { "epoch": 55.744, "grad_norm": 0.15430492162704468, "learning_rate": 0.00029165321012840514, "loss": 3.358, "step": 348400 }, { "epoch": 55.76, "grad_norm": 0.16473254561424255, "learning_rate": 0.0002916508100324013, "loss": 3.2675, "step": 348500 }, { "epoch": 55.776, "grad_norm": 0.1922348290681839, "learning_rate": 0.0002916484099363974, "loss": 3.4188, "step": 348600 }, { "epoch": 55.792, "grad_norm": 0.16183322668075562, "learning_rate": 0.0002916460098403936, "loss": 3.1852, "step": 348700 }, { "epoch": 55.808, "grad_norm": 0.16923582553863525, "learning_rate": 0.00029164360974438976, "loss": 3.2755, "step": 348800 }, { "epoch": 55.824, "grad_norm": 0.19045226275920868, "learning_rate": 0.0002916412096483859, "loss": 3.287, "step": 348900 }, { "epoch": 55.84, "grad_norm": 0.12736929953098297, "learning_rate": 0.0002916388095523821, "loss": 3.2743, "step": 349000 }, { "epoch": 55.856, "grad_norm": 0.16341276466846466, "learning_rate": 0.00029163640945637827, "loss": 3.4489, "step": 349100 }, { "epoch": 55.872, "grad_norm": 0.12756989896297455, "learning_rate": 0.0002916340093603744, "loss": 3.325, "step": 349200 }, { "epoch": 55.888, "grad_norm": 0.18688157200813293, "learning_rate": 0.00029163160926437055, "loss": 3.3165, "step": 349300 }, { "epoch": 55.904, "grad_norm": 0.13976339995861053, "learning_rate": 0.0002916292091683667, "loss": 3.3544, "step": 349400 }, { "epoch": 55.92, "grad_norm": 0.15256820619106293, "learning_rate": 0.0002916268090723629, "loss": 3.4997, "step": 349500 }, { "epoch": 55.936, "grad_norm": 0.14509008824825287, "learning_rate": 0.00029162440897635906, "loss": 3.2052, "step": 349600 }, { "epoch": 55.952, "grad_norm": 0.15143431723117828, "learning_rate": 0.00029162200888035517, "loss": 3.0511, "step": 349700 }, { "epoch": 55.968, "grad_norm": 0.14948220551013947, "learning_rate": 0.00029161960878435134, "loss": 3.4428, "step": 349800 }, { "epoch": 55.984, "grad_norm": 0.14705240726470947, "learning_rate": 0.0002916172086883475, "loss": 3.0517, "step": 349900 }, { "epoch": 56.0, "grad_norm": 0.1536393016576767, "learning_rate": 0.0002916148085923437, "loss": 3.2781, "step": 350000 }, { "epoch": 56.016, "grad_norm": 0.18801458179950714, "learning_rate": 0.00029161240849633985, "loss": 3.2739, "step": 350100 }, { "epoch": 56.032, "grad_norm": 0.16297225654125214, "learning_rate": 0.000291610008400336, "loss": 3.0651, "step": 350200 }, { "epoch": 56.048, "grad_norm": 0.12642435729503632, "learning_rate": 0.00029160760830433213, "loss": 3.173, "step": 350300 }, { "epoch": 56.064, "grad_norm": 0.14561578631401062, "learning_rate": 0.0002916052082083283, "loss": 3.2296, "step": 350400 }, { "epoch": 56.08, "grad_norm": 0.2067379653453827, "learning_rate": 0.00029160280811232447, "loss": 3.2915, "step": 350500 }, { "epoch": 56.096, "grad_norm": 0.13118477165699005, "learning_rate": 0.00029160040801632064, "loss": 3.1141, "step": 350600 }, { "epoch": 56.112, "grad_norm": 0.19944092631340027, "learning_rate": 0.0002915980079203168, "loss": 3.3139, "step": 350700 }, { "epoch": 56.128, "grad_norm": 0.1524505615234375, "learning_rate": 0.0002915956078243129, "loss": 3.2494, "step": 350800 }, { "epoch": 56.144, "grad_norm": 0.14479561150074005, "learning_rate": 0.0002915932077283091, "loss": 3.1644, "step": 350900 }, { "epoch": 56.16, "grad_norm": 0.16691967844963074, "learning_rate": 0.00029159083163326534, "loss": 3.1651, "step": 351000 }, { "epoch": 56.176, "grad_norm": 0.1572296917438507, "learning_rate": 0.0002915884315372615, "loss": 3.4227, "step": 351100 }, { "epoch": 56.192, "grad_norm": 0.18185962736606598, "learning_rate": 0.0002915860314412576, "loss": 3.5621, "step": 351200 }, { "epoch": 56.208, "grad_norm": 0.14918136596679688, "learning_rate": 0.0002915836313452538, "loss": 3.12, "step": 351300 }, { "epoch": 56.224, "grad_norm": 0.17335720360279083, "learning_rate": 0.00029158123124924996, "loss": 3.529, "step": 351400 }, { "epoch": 56.24, "grad_norm": 0.1472681760787964, "learning_rate": 0.00029157883115324613, "loss": 3.0796, "step": 351500 }, { "epoch": 56.256, "grad_norm": 0.1392885446548462, "learning_rate": 0.0002915764310572423, "loss": 3.2153, "step": 351600 }, { "epoch": 56.272, "grad_norm": 0.18404176831245422, "learning_rate": 0.0002915740309612384, "loss": 3.1954, "step": 351700 }, { "epoch": 56.288, "grad_norm": 0.12585780024528503, "learning_rate": 0.0002915716308652346, "loss": 3.1368, "step": 351800 }, { "epoch": 56.304, "grad_norm": 0.1552829146385193, "learning_rate": 0.00029156923076923075, "loss": 3.0194, "step": 351900 }, { "epoch": 56.32, "grad_norm": 0.17628629505634308, "learning_rate": 0.0002915668306732269, "loss": 3.2698, "step": 352000 }, { "epoch": 56.336, "grad_norm": 0.20134682953357697, "learning_rate": 0.0002915644305772231, "loss": 3.4574, "step": 352100 }, { "epoch": 56.352, "grad_norm": 0.15373845398426056, "learning_rate": 0.00029156203048121926, "loss": 3.3448, "step": 352200 }, { "epoch": 56.368, "grad_norm": 0.17761637270450592, "learning_rate": 0.0002915596303852154, "loss": 3.2381, "step": 352300 }, { "epoch": 56.384, "grad_norm": 0.15201790630817413, "learning_rate": 0.00029155723028921154, "loss": 3.5203, "step": 352400 }, { "epoch": 56.4, "grad_norm": 0.16162855923175812, "learning_rate": 0.0002915548301932077, "loss": 3.1569, "step": 352500 }, { "epoch": 56.416, "grad_norm": 0.1852641999721527, "learning_rate": 0.0002915524300972039, "loss": 3.401, "step": 352600 }, { "epoch": 56.432, "grad_norm": 0.15688061714172363, "learning_rate": 0.00029155003000120005, "loss": 3.2465, "step": 352700 }, { "epoch": 56.448, "grad_norm": 0.19564314186573029, "learning_rate": 0.00029154762990519616, "loss": 3.2482, "step": 352800 }, { "epoch": 56.464, "grad_norm": 0.14061832427978516, "learning_rate": 0.00029154522980919233, "loss": 3.1447, "step": 352900 }, { "epoch": 56.48, "grad_norm": 0.17311477661132812, "learning_rate": 0.0002915428297131885, "loss": 3.2265, "step": 353000 }, { "epoch": 56.496, "grad_norm": 0.143361896276474, "learning_rate": 0.00029154042961718467, "loss": 3.5229, "step": 353100 }, { "epoch": 56.512, "grad_norm": 0.1558486521244049, "learning_rate": 0.00029153802952118084, "loss": 3.2997, "step": 353200 }, { "epoch": 56.528, "grad_norm": 0.1404162347316742, "learning_rate": 0.000291535629425177, "loss": 3.2968, "step": 353300 }, { "epoch": 56.544, "grad_norm": 0.18301041424274445, "learning_rate": 0.0002915332293291731, "loss": 3.2361, "step": 353400 }, { "epoch": 56.56, "grad_norm": 0.1540903002023697, "learning_rate": 0.0002915308292331693, "loss": 3.3714, "step": 353500 }, { "epoch": 56.576, "grad_norm": 0.1258212774991989, "learning_rate": 0.0002915284531381255, "loss": 3.2226, "step": 353600 }, { "epoch": 56.592, "grad_norm": 0.1402517706155777, "learning_rate": 0.00029152605304212165, "loss": 3.1482, "step": 353700 }, { "epoch": 56.608, "grad_norm": 0.1720636636018753, "learning_rate": 0.0002915236529461178, "loss": 3.0305, "step": 353800 }, { "epoch": 56.624, "grad_norm": 0.13369300961494446, "learning_rate": 0.000291521252850114, "loss": 3.3346, "step": 353900 }, { "epoch": 56.64, "grad_norm": 0.18969473242759705, "learning_rate": 0.00029151885275411016, "loss": 3.119, "step": 354000 }, { "epoch": 56.656, "grad_norm": 0.15779845416545868, "learning_rate": 0.00029151645265810633, "loss": 3.4768, "step": 354100 }, { "epoch": 56.672, "grad_norm": 0.15905262529850006, "learning_rate": 0.0002915140525621025, "loss": 3.3682, "step": 354200 }, { "epoch": 56.688, "grad_norm": 0.15098144114017487, "learning_rate": 0.0002915116524660986, "loss": 3.2982, "step": 354300 }, { "epoch": 56.704, "grad_norm": 0.14586225152015686, "learning_rate": 0.0002915092523700948, "loss": 3.2062, "step": 354400 }, { "epoch": 56.72, "grad_norm": 0.16942176222801208, "learning_rate": 0.00029150685227409095, "loss": 3.3128, "step": 354500 }, { "epoch": 56.736, "grad_norm": 0.1610814929008484, "learning_rate": 0.0002915044521780871, "loss": 3.1303, "step": 354600 }, { "epoch": 56.752, "grad_norm": 0.13901068270206451, "learning_rate": 0.0002915020520820833, "loss": 3.1036, "step": 354700 }, { "epoch": 56.768, "grad_norm": 0.16940155625343323, "learning_rate": 0.0002914996519860794, "loss": 3.2535, "step": 354800 }, { "epoch": 56.784, "grad_norm": 0.12776970863342285, "learning_rate": 0.0002914972518900756, "loss": 3.5039, "step": 354900 }, { "epoch": 56.8, "grad_norm": 0.12710531055927277, "learning_rate": 0.00029149485179407174, "loss": 2.978, "step": 355000 }, { "epoch": 56.816, "grad_norm": 0.13047654926776886, "learning_rate": 0.0002914924516980679, "loss": 3.3318, "step": 355100 }, { "epoch": 56.832, "grad_norm": 0.18726320564746857, "learning_rate": 0.0002914900516020641, "loss": 3.3697, "step": 355200 }, { "epoch": 56.848, "grad_norm": 0.1398634910583496, "learning_rate": 0.00029148765150606025, "loss": 3.2642, "step": 355300 }, { "epoch": 56.864, "grad_norm": 0.17624936997890472, "learning_rate": 0.00029148525141005637, "loss": 3.2153, "step": 355400 }, { "epoch": 56.88, "grad_norm": 0.1277901530265808, "learning_rate": 0.00029148285131405254, "loss": 3.3253, "step": 355500 }, { "epoch": 56.896, "grad_norm": 0.17514747381210327, "learning_rate": 0.0002914804512180487, "loss": 3.2563, "step": 355600 }, { "epoch": 56.912, "grad_norm": 0.16842126846313477, "learning_rate": 0.0002914780511220449, "loss": 3.5709, "step": 355700 }, { "epoch": 56.928, "grad_norm": 0.23872219026088715, "learning_rate": 0.00029147565102604104, "loss": 3.4695, "step": 355800 }, { "epoch": 56.944, "grad_norm": 0.12151539325714111, "learning_rate": 0.00029147325093003716, "loss": 3.1643, "step": 355900 }, { "epoch": 56.96, "grad_norm": 0.13515882194042206, "learning_rate": 0.00029147085083403333, "loss": 3.3505, "step": 356000 }, { "epoch": 56.976, "grad_norm": 0.1254294365644455, "learning_rate": 0.0002914684507380295, "loss": 3.163, "step": 356100 }, { "epoch": 56.992, "grad_norm": 0.12843842804431915, "learning_rate": 0.0002914660746429857, "loss": 3.3363, "step": 356200 }, { "epoch": 57.008, "grad_norm": 0.13720254600048065, "learning_rate": 0.00029146367454698185, "loss": 3.2139, "step": 356300 }, { "epoch": 57.024, "grad_norm": 0.19129368662834167, "learning_rate": 0.000291461274450978, "loss": 3.183, "step": 356400 }, { "epoch": 57.04, "grad_norm": 0.1447821408510208, "learning_rate": 0.00029145887435497414, "loss": 3.3587, "step": 356500 }, { "epoch": 57.056, "grad_norm": 0.13338378071784973, "learning_rate": 0.0002914564742589703, "loss": 3.4234, "step": 356600 }, { "epoch": 57.072, "grad_norm": 0.18395552039146423, "learning_rate": 0.00029145409816392655, "loss": 3.099, "step": 356700 }, { "epoch": 57.088, "grad_norm": 0.13404163718223572, "learning_rate": 0.00029145169806792267, "loss": 3.1528, "step": 356800 }, { "epoch": 57.104, "grad_norm": 0.13742248713970184, "learning_rate": 0.00029144929797191883, "loss": 3.3131, "step": 356900 }, { "epoch": 57.12, "grad_norm": 0.16354690492153168, "learning_rate": 0.000291446897875915, "loss": 3.0905, "step": 357000 }, { "epoch": 57.136, "grad_norm": 0.1914769411087036, "learning_rate": 0.00029144449777991117, "loss": 3.4735, "step": 357100 }, { "epoch": 57.152, "grad_norm": 0.17639845609664917, "learning_rate": 0.00029144209768390734, "loss": 3.1101, "step": 357200 }, { "epoch": 57.168, "grad_norm": 0.19483628869056702, "learning_rate": 0.0002914396975879035, "loss": 3.2269, "step": 357300 }, { "epoch": 57.184, "grad_norm": 0.14299693703651428, "learning_rate": 0.0002914372974918996, "loss": 3.3497, "step": 357400 }, { "epoch": 57.2, "grad_norm": 0.1425316333770752, "learning_rate": 0.0002914348973958958, "loss": 3.4182, "step": 357500 }, { "epoch": 57.216, "grad_norm": 0.15660178661346436, "learning_rate": 0.00029143249729989196, "loss": 3.2975, "step": 357600 }, { "epoch": 57.232, "grad_norm": 0.18289236724376678, "learning_rate": 0.00029143009720388813, "loss": 3.4425, "step": 357700 }, { "epoch": 57.248, "grad_norm": 0.14912857115268707, "learning_rate": 0.0002914276971078843, "loss": 3.1711, "step": 357800 }, { "epoch": 57.264, "grad_norm": 0.17567536234855652, "learning_rate": 0.0002914252970118804, "loss": 3.3659, "step": 357900 }, { "epoch": 57.28, "grad_norm": 0.20695307850837708, "learning_rate": 0.0002914228969158766, "loss": 3.4243, "step": 358000 }, { "epoch": 57.296, "grad_norm": 0.25137093663215637, "learning_rate": 0.00029142049681987276, "loss": 3.4766, "step": 358100 }, { "epoch": 57.312, "grad_norm": 0.15527541935443878, "learning_rate": 0.0002914180967238689, "loss": 2.9317, "step": 358200 }, { "epoch": 57.328, "grad_norm": 0.1875419020652771, "learning_rate": 0.0002914156966278651, "loss": 3.4211, "step": 358300 }, { "epoch": 57.344, "grad_norm": 0.16856013238430023, "learning_rate": 0.00029141329653186126, "loss": 3.3236, "step": 358400 }, { "epoch": 57.36, "grad_norm": 0.19241203367710114, "learning_rate": 0.0002914108964358574, "loss": 3.3603, "step": 358500 }, { "epoch": 57.376, "grad_norm": 0.2182588279247284, "learning_rate": 0.00029140849633985355, "loss": 3.2698, "step": 358600 }, { "epoch": 57.392, "grad_norm": 0.13696244359016418, "learning_rate": 0.0002914060962438497, "loss": 3.2898, "step": 358700 }, { "epoch": 57.408, "grad_norm": 0.12402555346488953, "learning_rate": 0.0002914036961478459, "loss": 3.2233, "step": 358800 }, { "epoch": 57.424, "grad_norm": 0.1568015068769455, "learning_rate": 0.00029140129605184206, "loss": 3.3678, "step": 358900 }, { "epoch": 57.44, "grad_norm": 0.1727108508348465, "learning_rate": 0.00029139889595583817, "loss": 3.1797, "step": 359000 }, { "epoch": 57.456, "grad_norm": 0.16543570160865784, "learning_rate": 0.00029139649585983434, "loss": 3.2405, "step": 359100 }, { "epoch": 57.472, "grad_norm": 0.1571476012468338, "learning_rate": 0.0002913940957638305, "loss": 3.499, "step": 359200 }, { "epoch": 57.488, "grad_norm": 0.13967929780483246, "learning_rate": 0.0002913916956678267, "loss": 3.7313, "step": 359300 }, { "epoch": 57.504, "grad_norm": 0.25047239661216736, "learning_rate": 0.00029138929557182285, "loss": 3.1635, "step": 359400 }, { "epoch": 57.52, "grad_norm": 0.16786238551139832, "learning_rate": 0.000291386895475819, "loss": 3.6114, "step": 359500 }, { "epoch": 57.536, "grad_norm": 0.13720327615737915, "learning_rate": 0.00029138449537981513, "loss": 3.2239, "step": 359600 }, { "epoch": 57.552, "grad_norm": 0.14529596269130707, "learning_rate": 0.0002913820952838113, "loss": 3.3812, "step": 359700 }, { "epoch": 57.568, "grad_norm": 0.21819429099559784, "learning_rate": 0.00029137969518780747, "loss": 3.249, "step": 359800 }, { "epoch": 57.584, "grad_norm": 0.14769448339939117, "learning_rate": 0.00029137729509180364, "loss": 3.2365, "step": 359900 }, { "epoch": 57.6, "grad_norm": 0.149019256234169, "learning_rate": 0.0002913748949957998, "loss": 3.4686, "step": 360000 }, { "epoch": 57.616, "grad_norm": 0.15087425708770752, "learning_rate": 0.000291372494899796, "loss": 3.3603, "step": 360100 }, { "epoch": 57.632, "grad_norm": 0.1766323298215866, "learning_rate": 0.00029137009480379215, "loss": 3.0507, "step": 360200 }, { "epoch": 57.648, "grad_norm": 0.1742042899131775, "learning_rate": 0.0002913676947077883, "loss": 3.3158, "step": 360300 }, { "epoch": 57.664, "grad_norm": 0.15073540806770325, "learning_rate": 0.0002913652946117845, "loss": 3.0376, "step": 360400 }, { "epoch": 57.68, "grad_norm": 0.15346968173980713, "learning_rate": 0.0002913628945157806, "loss": 3.167, "step": 360500 }, { "epoch": 57.696, "grad_norm": 0.14456668496131897, "learning_rate": 0.00029136049441977677, "loss": 3.3747, "step": 360600 }, { "epoch": 57.712, "grad_norm": 0.15459845960140228, "learning_rate": 0.00029135809432377294, "loss": 3.1525, "step": 360700 }, { "epoch": 57.728, "grad_norm": 0.2050773650407791, "learning_rate": 0.0002913556942277691, "loss": 3.1547, "step": 360800 }, { "epoch": 57.744, "grad_norm": 0.1603861153125763, "learning_rate": 0.0002913532941317653, "loss": 3.0536, "step": 360900 }, { "epoch": 57.76, "grad_norm": 0.16386431455612183, "learning_rate": 0.0002913508940357614, "loss": 3.1751, "step": 361000 }, { "epoch": 57.776, "grad_norm": 0.13957689702510834, "learning_rate": 0.00029134849393975756, "loss": 3.0823, "step": 361100 }, { "epoch": 57.792, "grad_norm": 0.1812535524368286, "learning_rate": 0.00029134611784471375, "loss": 3.441, "step": 361200 }, { "epoch": 57.808, "grad_norm": 0.13382668793201447, "learning_rate": 0.0002913437177487099, "loss": 3.1329, "step": 361300 }, { "epoch": 57.824, "grad_norm": 0.1505519598722458, "learning_rate": 0.0002913413176527061, "loss": 3.2057, "step": 361400 }, { "epoch": 57.84, "grad_norm": 0.1993977129459381, "learning_rate": 0.00029133891755670226, "loss": 3.6493, "step": 361500 }, { "epoch": 57.856, "grad_norm": 0.1695670485496521, "learning_rate": 0.00029133651746069837, "loss": 3.14, "step": 361600 }, { "epoch": 57.872, "grad_norm": 0.16164475679397583, "learning_rate": 0.00029133411736469454, "loss": 3.3347, "step": 361700 }, { "epoch": 57.888, "grad_norm": 0.22371141612529755, "learning_rate": 0.0002913317172686907, "loss": 3.3503, "step": 361800 }, { "epoch": 57.904, "grad_norm": 0.16976477205753326, "learning_rate": 0.0002913293171726869, "loss": 3.2141, "step": 361900 }, { "epoch": 57.92, "grad_norm": 0.17026029527187347, "learning_rate": 0.00029132691707668305, "loss": 3.085, "step": 362000 }, { "epoch": 57.936, "grad_norm": 0.1460045576095581, "learning_rate": 0.00029132451698067916, "loss": 3.3066, "step": 362100 }, { "epoch": 57.952, "grad_norm": 0.12749767303466797, "learning_rate": 0.00029132211688467533, "loss": 3.2423, "step": 362200 }, { "epoch": 57.968, "grad_norm": 0.19256210327148438, "learning_rate": 0.0002913197167886715, "loss": 2.9048, "step": 362300 }, { "epoch": 57.984, "grad_norm": 0.1382358819246292, "learning_rate": 0.00029131731669266767, "loss": 3.2404, "step": 362400 }, { "epoch": 58.0, "grad_norm": 0.14838235080242157, "learning_rate": 0.00029131491659666384, "loss": 3.0462, "step": 362500 }, { "epoch": 58.016, "grad_norm": 0.1447829157114029, "learning_rate": 0.00029131251650066, "loss": 3.4776, "step": 362600 }, { "epoch": 58.032, "grad_norm": 0.15682578086853027, "learning_rate": 0.0002913101164046561, "loss": 2.9786, "step": 362700 }, { "epoch": 58.048, "grad_norm": 0.14890989661216736, "learning_rate": 0.0002913077163086523, "loss": 3.1137, "step": 362800 }, { "epoch": 58.064, "grad_norm": 0.1416264921426773, "learning_rate": 0.00029130531621264846, "loss": 3.0124, "step": 362900 }, { "epoch": 58.08, "grad_norm": 0.1471136063337326, "learning_rate": 0.00029130291611664463, "loss": 3.1431, "step": 363000 }, { "epoch": 58.096, "grad_norm": 0.17947404086589813, "learning_rate": 0.0002913005160206408, "loss": 3.1943, "step": 363100 }, { "epoch": 58.112, "grad_norm": 0.16566507518291473, "learning_rate": 0.000291298139925597, "loss": 3.6901, "step": 363200 }, { "epoch": 58.128, "grad_norm": 0.14840535819530487, "learning_rate": 0.00029129573982959316, "loss": 3.3726, "step": 363300 }, { "epoch": 58.144, "grad_norm": 0.20876899361610413, "learning_rate": 0.00029129333973358933, "loss": 3.1163, "step": 363400 }, { "epoch": 58.16, "grad_norm": 0.17489516735076904, "learning_rate": 0.0002912909396375855, "loss": 2.9799, "step": 363500 }, { "epoch": 58.176, "grad_norm": 0.1567423790693283, "learning_rate": 0.0002912885395415816, "loss": 3.5313, "step": 363600 }, { "epoch": 58.192, "grad_norm": 0.15877550840377808, "learning_rate": 0.0002912861394455778, "loss": 3.1961, "step": 363700 }, { "epoch": 58.208, "grad_norm": 0.1602148413658142, "learning_rate": 0.00029128373934957395, "loss": 3.2042, "step": 363800 }, { "epoch": 58.224, "grad_norm": 0.17230960726737976, "learning_rate": 0.0002912813392535701, "loss": 3.2267, "step": 363900 }, { "epoch": 58.24, "grad_norm": 0.13007359206676483, "learning_rate": 0.0002912789391575663, "loss": 3.2864, "step": 364000 }, { "epoch": 58.256, "grad_norm": 0.156801238656044, "learning_rate": 0.0002912765390615624, "loss": 3.1493, "step": 364100 }, { "epoch": 58.272, "grad_norm": 0.17613616585731506, "learning_rate": 0.0002912741389655586, "loss": 3.3055, "step": 364200 }, { "epoch": 58.288, "grad_norm": 0.18228021264076233, "learning_rate": 0.00029127173886955474, "loss": 3.3582, "step": 364300 }, { "epoch": 58.304, "grad_norm": 0.153053417801857, "learning_rate": 0.0002912693387735509, "loss": 3.4398, "step": 364400 }, { "epoch": 58.32, "grad_norm": 0.12198405712842941, "learning_rate": 0.0002912669386775471, "loss": 3.1336, "step": 364500 }, { "epoch": 58.336, "grad_norm": 0.14909978210926056, "learning_rate": 0.00029126453858154325, "loss": 3.2418, "step": 364600 }, { "epoch": 58.352, "grad_norm": 0.1476384848356247, "learning_rate": 0.00029126213848553937, "loss": 3.0851, "step": 364700 }, { "epoch": 58.368, "grad_norm": 0.2101411372423172, "learning_rate": 0.00029125973838953553, "loss": 3.5485, "step": 364800 }, { "epoch": 58.384, "grad_norm": 0.1767812818288803, "learning_rate": 0.0002912573382935317, "loss": 3.1665, "step": 364900 }, { "epoch": 58.4, "grad_norm": 0.17437565326690674, "learning_rate": 0.00029125493819752787, "loss": 3.2808, "step": 365000 }, { "epoch": 58.416, "grad_norm": 0.1894117295742035, "learning_rate": 0.00029125253810152404, "loss": 3.4529, "step": 365100 }, { "epoch": 58.432, "grad_norm": 0.14577741920948029, "learning_rate": 0.00029125013800552016, "loss": 3.254, "step": 365200 }, { "epoch": 58.448, "grad_norm": 0.17576606571674347, "learning_rate": 0.0002912477379095163, "loss": 3.4867, "step": 365300 }, { "epoch": 58.464, "grad_norm": 0.16922816634178162, "learning_rate": 0.0002912453378135125, "loss": 3.2409, "step": 365400 }, { "epoch": 58.48, "grad_norm": 0.14055779576301575, "learning_rate": 0.00029124293771750866, "loss": 3.3412, "step": 365500 }, { "epoch": 58.496, "grad_norm": 0.15684819221496582, "learning_rate": 0.00029124053762150483, "loss": 3.1562, "step": 365600 }, { "epoch": 58.512, "grad_norm": 0.15084801614284515, "learning_rate": 0.000291238137525501, "loss": 3.3939, "step": 365700 }, { "epoch": 58.528, "grad_norm": 0.136796772480011, "learning_rate": 0.0002912357374294971, "loss": 3.3518, "step": 365800 }, { "epoch": 58.544, "grad_norm": 0.16891789436340332, "learning_rate": 0.0002912333373334933, "loss": 3.4653, "step": 365900 }, { "epoch": 58.56, "grad_norm": 0.15335536003112793, "learning_rate": 0.00029123093723748946, "loss": 3.3748, "step": 366000 }, { "epoch": 58.576, "grad_norm": 0.15386122465133667, "learning_rate": 0.0002912285371414856, "loss": 3.3598, "step": 366100 }, { "epoch": 58.592, "grad_norm": 0.15091300010681152, "learning_rate": 0.0002912261370454818, "loss": 3.2422, "step": 366200 }, { "epoch": 58.608, "grad_norm": 0.18546633422374725, "learning_rate": 0.00029122373694947796, "loss": 3.0966, "step": 366300 }, { "epoch": 58.624, "grad_norm": 0.1467442512512207, "learning_rate": 0.00029122133685347413, "loss": 3.1563, "step": 366400 }, { "epoch": 58.64, "grad_norm": 0.1601731777191162, "learning_rate": 0.0002912189367574703, "loss": 3.4466, "step": 366500 }, { "epoch": 58.656, "grad_norm": 0.3009895086288452, "learning_rate": 0.00029121653666146647, "loss": 3.4999, "step": 366600 }, { "epoch": 58.672, "grad_norm": 0.1520434468984604, "learning_rate": 0.0002912141605664226, "loss": 3.1082, "step": 366700 }, { "epoch": 58.688, "grad_norm": 0.12060481309890747, "learning_rate": 0.0002912117604704188, "loss": 3.0247, "step": 366800 }, { "epoch": 58.704, "grad_norm": 0.22017452120780945, "learning_rate": 0.00029120936037441494, "loss": 3.3371, "step": 366900 }, { "epoch": 58.72, "grad_norm": 0.15032631158828735, "learning_rate": 0.0002912069602784111, "loss": 3.0663, "step": 367000 }, { "epoch": 58.736, "grad_norm": 0.1333540827035904, "learning_rate": 0.0002912045601824073, "loss": 3.1077, "step": 367100 }, { "epoch": 58.752, "grad_norm": 0.15080656111240387, "learning_rate": 0.0002912021600864034, "loss": 3.2283, "step": 367200 }, { "epoch": 58.768, "grad_norm": 0.16152629256248474, "learning_rate": 0.00029119975999039957, "loss": 3.5652, "step": 367300 }, { "epoch": 58.784, "grad_norm": 0.4008745551109314, "learning_rate": 0.00029119735989439574, "loss": 3.328, "step": 367400 }, { "epoch": 58.8, "grad_norm": 0.18696098029613495, "learning_rate": 0.0002911949597983919, "loss": 3.3059, "step": 367500 }, { "epoch": 58.816, "grad_norm": 0.20765410363674164, "learning_rate": 0.0002911925597023881, "loss": 3.5866, "step": 367600 }, { "epoch": 58.832, "grad_norm": 0.1775040477514267, "learning_rate": 0.00029119015960638424, "loss": 3.2926, "step": 367700 }, { "epoch": 58.848, "grad_norm": 0.20725421607494354, "learning_rate": 0.00029118775951038036, "loss": 2.9919, "step": 367800 }, { "epoch": 58.864, "grad_norm": 0.20184873044490814, "learning_rate": 0.00029118535941437653, "loss": 3.2881, "step": 367900 }, { "epoch": 58.88, "grad_norm": 0.1488567441701889, "learning_rate": 0.0002911829593183727, "loss": 3.267, "step": 368000 }, { "epoch": 58.896, "grad_norm": 0.13947978615760803, "learning_rate": 0.00029118055922236887, "loss": 3.2904, "step": 368100 }, { "epoch": 58.912, "grad_norm": 0.12687301635742188, "learning_rate": 0.00029117815912636503, "loss": 3.0004, "step": 368200 }, { "epoch": 58.928, "grad_norm": 0.17583495378494263, "learning_rate": 0.0002911757590303612, "loss": 3.2475, "step": 368300 }, { "epoch": 58.944, "grad_norm": 0.16284632682800293, "learning_rate": 0.0002911733589343573, "loss": 3.114, "step": 368400 }, { "epoch": 58.96, "grad_norm": 0.2161373347043991, "learning_rate": 0.0002911709588383535, "loss": 3.093, "step": 368500 }, { "epoch": 58.976, "grad_norm": 0.19376417994499207, "learning_rate": 0.00029116855874234966, "loss": 3.2303, "step": 368600 }, { "epoch": 58.992, "grad_norm": 0.12099174410104752, "learning_rate": 0.0002911661586463458, "loss": 3.4999, "step": 368700 }, { "epoch": 59.008, "grad_norm": 0.17481458187103271, "learning_rate": 0.000291163758550342, "loss": 3.253, "step": 368800 }, { "epoch": 59.024, "grad_norm": 0.14900051057338715, "learning_rate": 0.0002911613824552982, "loss": 3.1804, "step": 368900 }, { "epoch": 59.04, "grad_norm": 0.17668017745018005, "learning_rate": 0.00029115898235929435, "loss": 3.0129, "step": 369000 }, { "epoch": 59.056, "grad_norm": 0.15841874480247498, "learning_rate": 0.0002911565822632905, "loss": 3.271, "step": 369100 }, { "epoch": 59.072, "grad_norm": 0.18059758841991425, "learning_rate": 0.00029115418216728664, "loss": 3.0695, "step": 369200 }, { "epoch": 59.088, "grad_norm": 0.1598978340625763, "learning_rate": 0.0002911517820712828, "loss": 3.2757, "step": 369300 }, { "epoch": 59.104, "grad_norm": 0.1644565761089325, "learning_rate": 0.000291149381975279, "loss": 3.3485, "step": 369400 }, { "epoch": 59.12, "grad_norm": 0.15315434336662292, "learning_rate": 0.00029114698187927515, "loss": 3.0598, "step": 369500 }, { "epoch": 59.136, "grad_norm": 0.16145703196525574, "learning_rate": 0.0002911445817832713, "loss": 3.4571, "step": 369600 }, { "epoch": 59.152, "grad_norm": 0.15226326882839203, "learning_rate": 0.0002911421816872675, "loss": 3.7084, "step": 369700 }, { "epoch": 59.168, "grad_norm": 0.16199994087219238, "learning_rate": 0.0002911397815912636, "loss": 3.169, "step": 369800 }, { "epoch": 59.184, "grad_norm": 0.17364518344402313, "learning_rate": 0.00029113738149525977, "loss": 3.2016, "step": 369900 }, { "epoch": 59.2, "grad_norm": 0.17422154545783997, "learning_rate": 0.00029113498139925594, "loss": 3.2768, "step": 370000 }, { "epoch": 59.216, "grad_norm": 0.20733851194381714, "learning_rate": 0.0002911325813032521, "loss": 3.2377, "step": 370100 }, { "epoch": 59.232, "grad_norm": 0.17871487140655518, "learning_rate": 0.0002911301812072483, "loss": 3.1254, "step": 370200 }, { "epoch": 59.248, "grad_norm": 0.17736205458641052, "learning_rate": 0.00029112778111124444, "loss": 3.2014, "step": 370300 }, { "epoch": 59.264, "grad_norm": 0.1444612741470337, "learning_rate": 0.00029112538101524056, "loss": 3.2735, "step": 370400 }, { "epoch": 59.28, "grad_norm": 0.18607400357723236, "learning_rate": 0.00029112298091923673, "loss": 3.3425, "step": 370500 }, { "epoch": 59.296, "grad_norm": 0.17451156675815582, "learning_rate": 0.0002911205808232329, "loss": 3.4253, "step": 370600 }, { "epoch": 59.312, "grad_norm": 0.23071958124637604, "learning_rate": 0.00029111818072722907, "loss": 3.2414, "step": 370700 }, { "epoch": 59.328, "grad_norm": 0.16361716389656067, "learning_rate": 0.00029111578063122524, "loss": 3.186, "step": 370800 }, { "epoch": 59.344, "grad_norm": 0.14478786289691925, "learning_rate": 0.00029111338053522135, "loss": 3.24, "step": 370900 }, { "epoch": 59.36, "grad_norm": 0.1570741832256317, "learning_rate": 0.0002911109804392175, "loss": 3.4196, "step": 371000 }, { "epoch": 59.376, "grad_norm": 0.157021626830101, "learning_rate": 0.0002911085803432137, "loss": 3.3199, "step": 371100 }, { "epoch": 59.392, "grad_norm": 0.1522521823644638, "learning_rate": 0.0002911062042481699, "loss": 3.0774, "step": 371200 }, { "epoch": 59.408, "grad_norm": 0.2178211659193039, "learning_rate": 0.00029110380415216605, "loss": 3.2867, "step": 371300 }, { "epoch": 59.424, "grad_norm": 0.1403617560863495, "learning_rate": 0.00029110142805712224, "loss": 3.1934, "step": 371400 }, { "epoch": 59.44, "grad_norm": 0.14956898987293243, "learning_rate": 0.0002910990279611184, "loss": 3.185, "step": 371500 }, { "epoch": 59.456, "grad_norm": 0.15448886156082153, "learning_rate": 0.0002910966278651146, "loss": 3.4449, "step": 371600 }, { "epoch": 59.472, "grad_norm": 0.17708981037139893, "learning_rate": 0.00029109422776911074, "loss": 3.2147, "step": 371700 }, { "epoch": 59.488, "grad_norm": 0.18961367011070251, "learning_rate": 0.0002910918276731069, "loss": 3.0906, "step": 371800 }, { "epoch": 59.504, "grad_norm": 0.15669892728328705, "learning_rate": 0.0002910894275771031, "loss": 2.9546, "step": 371900 }, { "epoch": 59.52, "grad_norm": 0.16848404705524445, "learning_rate": 0.00029108702748109925, "loss": 3.3181, "step": 372000 }, { "epoch": 59.536, "grad_norm": 0.16907978057861328, "learning_rate": 0.00029108462738509537, "loss": 3.1459, "step": 372100 }, { "epoch": 59.552, "grad_norm": 0.1594504415988922, "learning_rate": 0.00029108222728909154, "loss": 3.4487, "step": 372200 }, { "epoch": 59.568, "grad_norm": 0.13635195791721344, "learning_rate": 0.0002910798271930877, "loss": 3.261, "step": 372300 }, { "epoch": 59.584, "grad_norm": 0.1518363058567047, "learning_rate": 0.0002910774270970839, "loss": 3.333, "step": 372400 }, { "epoch": 59.6, "grad_norm": 0.1379927098751068, "learning_rate": 0.00029107502700108004, "loss": 3.1654, "step": 372500 }, { "epoch": 59.616, "grad_norm": 0.18691717088222504, "learning_rate": 0.0002910726269050762, "loss": 3.1282, "step": 372600 }, { "epoch": 59.632, "grad_norm": 0.1514362096786499, "learning_rate": 0.0002910702268090723, "loss": 3.1979, "step": 372700 }, { "epoch": 59.648, "grad_norm": 0.153708815574646, "learning_rate": 0.0002910678267130685, "loss": 3.1382, "step": 372800 }, { "epoch": 59.664, "grad_norm": 0.15434116125106812, "learning_rate": 0.00029106542661706467, "loss": 3.3677, "step": 372900 }, { "epoch": 59.68, "grad_norm": 0.17685338854789734, "learning_rate": 0.00029106302652106083, "loss": 3.0645, "step": 373000 }, { "epoch": 59.696, "grad_norm": 0.1551292985677719, "learning_rate": 0.000291060626425057, "loss": 3.2981, "step": 373100 }, { "epoch": 59.712, "grad_norm": 0.1384434849023819, "learning_rate": 0.0002910582263290531, "loss": 3.3143, "step": 373200 }, { "epoch": 59.728, "grad_norm": 0.1918603479862213, "learning_rate": 0.0002910558262330493, "loss": 3.434, "step": 373300 }, { "epoch": 59.744, "grad_norm": 0.12813661992549896, "learning_rate": 0.00029105342613704546, "loss": 3.3954, "step": 373400 }, { "epoch": 59.76, "grad_norm": 0.18287447094917297, "learning_rate": 0.0002910510260410416, "loss": 3.3172, "step": 373500 }, { "epoch": 59.776, "grad_norm": 0.18375514447689056, "learning_rate": 0.0002910486259450378, "loss": 3.2704, "step": 373600 }, { "epoch": 59.792, "grad_norm": 0.16260720789432526, "learning_rate": 0.00029104622584903396, "loss": 3.3452, "step": 373700 }, { "epoch": 59.808, "grad_norm": 0.14883427321910858, "learning_rate": 0.0002910438257530301, "loss": 3.28, "step": 373800 }, { "epoch": 59.824, "grad_norm": 0.14226073026657104, "learning_rate": 0.00029104142565702625, "loss": 3.4588, "step": 373900 }, { "epoch": 59.84, "grad_norm": 0.1697024554014206, "learning_rate": 0.0002910390255610224, "loss": 3.1294, "step": 374000 }, { "epoch": 59.856, "grad_norm": 0.14510953426361084, "learning_rate": 0.0002910366254650186, "loss": 3.2372, "step": 374100 }, { "epoch": 59.872, "grad_norm": 0.14994293451309204, "learning_rate": 0.00029103422536901476, "loss": 3.2777, "step": 374200 }, { "epoch": 59.888, "grad_norm": 0.15765146911144257, "learning_rate": 0.0002910318252730109, "loss": 3.1029, "step": 374300 }, { "epoch": 59.904, "grad_norm": 0.1684778928756714, "learning_rate": 0.00029102942517700704, "loss": 3.3326, "step": 374400 }, { "epoch": 59.92, "grad_norm": 0.16783316433429718, "learning_rate": 0.0002910270250810032, "loss": 3.3698, "step": 374500 }, { "epoch": 59.936, "grad_norm": 0.16612444818019867, "learning_rate": 0.0002910246489859594, "loss": 3.1311, "step": 374600 }, { "epoch": 59.952, "grad_norm": 0.15881246328353882, "learning_rate": 0.00029102224888995557, "loss": 3.401, "step": 374700 }, { "epoch": 59.968, "grad_norm": 0.15880317986011505, "learning_rate": 0.00029101984879395174, "loss": 3.2849, "step": 374800 }, { "epoch": 59.984, "grad_norm": 0.15323412418365479, "learning_rate": 0.0002910174486979479, "loss": 3.4673, "step": 374900 }, { "epoch": 60.0, "grad_norm": 0.14061066508293152, "learning_rate": 0.0002910150486019441, "loss": 3.164, "step": 375000 }, { "epoch": 60.016, "grad_norm": 0.14499811828136444, "learning_rate": 0.00029101264850594024, "loss": 3.0783, "step": 375100 }, { "epoch": 60.032, "grad_norm": 0.13418063521385193, "learning_rate": 0.00029101024840993636, "loss": 3.1207, "step": 375200 }, { "epoch": 60.048, "grad_norm": 0.15519078075885773, "learning_rate": 0.00029100784831393253, "loss": 3.1144, "step": 375300 }, { "epoch": 60.064, "grad_norm": 0.12749047577381134, "learning_rate": 0.0002910054482179287, "loss": 2.9804, "step": 375400 }, { "epoch": 60.08, "grad_norm": 0.16074252128601074, "learning_rate": 0.00029100304812192487, "loss": 3.5591, "step": 375500 }, { "epoch": 60.096, "grad_norm": 0.18338169157505035, "learning_rate": 0.00029100064802592104, "loss": 3.271, "step": 375600 }, { "epoch": 60.112, "grad_norm": 0.1908230036497116, "learning_rate": 0.0002909982479299172, "loss": 3.2776, "step": 375700 }, { "epoch": 60.128, "grad_norm": 0.19279688596725464, "learning_rate": 0.0002909958478339133, "loss": 3.4064, "step": 375800 }, { "epoch": 60.144, "grad_norm": 0.14879485964775085, "learning_rate": 0.0002909934477379095, "loss": 3.4772, "step": 375900 }, { "epoch": 60.16, "grad_norm": 0.14709025621414185, "learning_rate": 0.00029099104764190566, "loss": 3.465, "step": 376000 }, { "epoch": 60.176, "grad_norm": 0.16434945166110992, "learning_rate": 0.00029098864754590183, "loss": 3.3129, "step": 376100 }, { "epoch": 60.192, "grad_norm": 0.15540476143360138, "learning_rate": 0.000290986247449898, "loss": 3.3198, "step": 376200 }, { "epoch": 60.208, "grad_norm": 0.1887914091348648, "learning_rate": 0.00029098384735389417, "loss": 3.4658, "step": 376300 }, { "epoch": 60.224, "grad_norm": 0.1829802691936493, "learning_rate": 0.0002909814472578903, "loss": 3.2588, "step": 376400 }, { "epoch": 60.24, "grad_norm": 0.17028504610061646, "learning_rate": 0.00029097904716188645, "loss": 3.3309, "step": 376500 }, { "epoch": 60.256, "grad_norm": 0.1807115375995636, "learning_rate": 0.0002909766470658826, "loss": 3.443, "step": 376600 }, { "epoch": 60.272, "grad_norm": 0.12593059241771698, "learning_rate": 0.0002909742469698788, "loss": 3.2253, "step": 376700 }, { "epoch": 60.288, "grad_norm": 0.17031250894069672, "learning_rate": 0.00029097184687387496, "loss": 3.3609, "step": 376800 }, { "epoch": 60.304, "grad_norm": 0.1526140421628952, "learning_rate": 0.00029096944677787107, "loss": 3.2401, "step": 376900 }, { "epoch": 60.32, "grad_norm": 0.14587371051311493, "learning_rate": 0.00029096704668186724, "loss": 3.2932, "step": 377000 }, { "epoch": 60.336, "grad_norm": 0.1644798368215561, "learning_rate": 0.0002909646465858634, "loss": 3.2645, "step": 377100 }, { "epoch": 60.352, "grad_norm": 0.1827557533979416, "learning_rate": 0.0002909622464898596, "loss": 3.2748, "step": 377200 }, { "epoch": 60.368, "grad_norm": 0.13262972235679626, "learning_rate": 0.00029095987039481577, "loss": 3.0798, "step": 377300 }, { "epoch": 60.384, "grad_norm": 0.16021347045898438, "learning_rate": 0.00029095747029881194, "loss": 3.3912, "step": 377400 }, { "epoch": 60.4, "grad_norm": 0.14383332431316376, "learning_rate": 0.00029095507020280805, "loss": 3.4172, "step": 377500 }, { "epoch": 60.416, "grad_norm": 0.18421560525894165, "learning_rate": 0.0002909526701068042, "loss": 3.2977, "step": 377600 }, { "epoch": 60.432, "grad_norm": 0.28111231327056885, "learning_rate": 0.0002909502700108004, "loss": 2.9646, "step": 377700 }, { "epoch": 60.448, "grad_norm": 0.15469829738140106, "learning_rate": 0.00029094786991479656, "loss": 3.3137, "step": 377800 }, { "epoch": 60.464, "grad_norm": 0.16736319661140442, "learning_rate": 0.00029094546981879273, "loss": 3.0557, "step": 377900 }, { "epoch": 60.48, "grad_norm": 0.17600922286510468, "learning_rate": 0.0002909430697227889, "loss": 3.4611, "step": 378000 }, { "epoch": 60.496, "grad_norm": 0.11533287912607193, "learning_rate": 0.00029094066962678507, "loss": 3.1738, "step": 378100 }, { "epoch": 60.512, "grad_norm": 0.23372051119804382, "learning_rate": 0.00029093826953078124, "loss": 3.0823, "step": 378200 }, { "epoch": 60.528, "grad_norm": 0.16743652522563934, "learning_rate": 0.0002909358694347774, "loss": 3.4662, "step": 378300 }, { "epoch": 60.544, "grad_norm": 0.17677438259124756, "learning_rate": 0.0002909334693387735, "loss": 3.4099, "step": 378400 }, { "epoch": 60.56, "grad_norm": 0.16041554510593414, "learning_rate": 0.0002909310692427697, "loss": 3.2426, "step": 378500 }, { "epoch": 60.576, "grad_norm": 0.2220979630947113, "learning_rate": 0.00029092866914676586, "loss": 3.4113, "step": 378600 }, { "epoch": 60.592, "grad_norm": 0.15731485188007355, "learning_rate": 0.00029092626905076203, "loss": 3.2795, "step": 378700 }, { "epoch": 60.608, "grad_norm": 0.15252497792243958, "learning_rate": 0.0002909238689547582, "loss": 3.1874, "step": 378800 }, { "epoch": 60.624, "grad_norm": 0.16804943978786469, "learning_rate": 0.0002909214688587543, "loss": 3.0902, "step": 378900 }, { "epoch": 60.64, "grad_norm": 0.12100684642791748, "learning_rate": 0.0002909190687627505, "loss": 3.3037, "step": 379000 }, { "epoch": 60.656, "grad_norm": 0.1853804737329483, "learning_rate": 0.00029091666866674665, "loss": 3.1483, "step": 379100 }, { "epoch": 60.672, "grad_norm": 0.1626957505941391, "learning_rate": 0.0002909142685707428, "loss": 3.2897, "step": 379200 }, { "epoch": 60.688, "grad_norm": 0.13886098563671112, "learning_rate": 0.000290911868474739, "loss": 3.2658, "step": 379300 }, { "epoch": 60.704, "grad_norm": 0.2123270332813263, "learning_rate": 0.00029090946837873516, "loss": 3.0525, "step": 379400 }, { "epoch": 60.72, "grad_norm": 0.12106005847454071, "learning_rate": 0.0002909070682827313, "loss": 3.4267, "step": 379500 }, { "epoch": 60.736, "grad_norm": 0.13238506019115448, "learning_rate": 0.00029090466818672744, "loss": 3.4785, "step": 379600 }, { "epoch": 60.752, "grad_norm": 0.1579398512840271, "learning_rate": 0.0002909022680907236, "loss": 3.0678, "step": 379700 }, { "epoch": 60.768, "grad_norm": 0.157019704580307, "learning_rate": 0.0002908998679947198, "loss": 3.1807, "step": 379800 }, { "epoch": 60.784, "grad_norm": 0.14942459762096405, "learning_rate": 0.00029089746789871595, "loss": 3.4529, "step": 379900 }, { "epoch": 60.8, "grad_norm": 0.2190953493118286, "learning_rate": 0.00029089506780271207, "loss": 3.147, "step": 380000 }, { "epoch": 60.816, "grad_norm": 0.17543238401412964, "learning_rate": 0.00029089266770670824, "loss": 3.2236, "step": 380100 }, { "epoch": 60.832, "grad_norm": 0.1725672483444214, "learning_rate": 0.0002908902676107044, "loss": 3.3292, "step": 380200 }, { "epoch": 60.848, "grad_norm": 0.23549331724643707, "learning_rate": 0.0002908878675147006, "loss": 3.1878, "step": 380300 }, { "epoch": 60.864, "grad_norm": 0.1479646861553192, "learning_rate": 0.00029088546741869674, "loss": 3.6843, "step": 380400 }, { "epoch": 60.88, "grad_norm": 0.16958242654800415, "learning_rate": 0.0002908830673226929, "loss": 3.2444, "step": 380500 }, { "epoch": 60.896, "grad_norm": 0.15236087143421173, "learning_rate": 0.000290880667226689, "loss": 2.8961, "step": 380600 }, { "epoch": 60.912, "grad_norm": 0.15799252688884735, "learning_rate": 0.0002908782671306852, "loss": 3.0255, "step": 380700 }, { "epoch": 60.928, "grad_norm": 0.14994435012340546, "learning_rate": 0.00029087586703468137, "loss": 3.0898, "step": 380800 }, { "epoch": 60.944, "grad_norm": 0.17937101423740387, "learning_rate": 0.00029087346693867753, "loss": 3.0835, "step": 380900 }, { "epoch": 60.96, "grad_norm": 0.14368091523647308, "learning_rate": 0.0002908710668426737, "loss": 3.5872, "step": 381000 }, { "epoch": 60.976, "grad_norm": 0.1677994728088379, "learning_rate": 0.0002908686667466698, "loss": 3.1594, "step": 381100 }, { "epoch": 60.992, "grad_norm": 0.15857039391994476, "learning_rate": 0.000290866266650666, "loss": 3.1259, "step": 381200 }, { "epoch": 61.008, "grad_norm": 0.20482750236988068, "learning_rate": 0.00029086386655466216, "loss": 3.0285, "step": 381300 }, { "epoch": 61.024, "grad_norm": 0.1506480574607849, "learning_rate": 0.0002908614664586583, "loss": 2.9123, "step": 381400 }, { "epoch": 61.04, "grad_norm": 0.14817015826702118, "learning_rate": 0.0002908590663626545, "loss": 2.8925, "step": 381500 }, { "epoch": 61.056, "grad_norm": 0.14361242949962616, "learning_rate": 0.00029085666626665066, "loss": 3.101, "step": 381600 }, { "epoch": 61.072, "grad_norm": 0.14265070855617523, "learning_rate": 0.0002908542661706468, "loss": 3.066, "step": 381700 }, { "epoch": 61.088, "grad_norm": 0.18102407455444336, "learning_rate": 0.00029085186607464295, "loss": 3.0794, "step": 381800 }, { "epoch": 61.104, "grad_norm": 0.16624779999256134, "learning_rate": 0.0002908494659786391, "loss": 3.0919, "step": 381900 }, { "epoch": 61.12, "grad_norm": 0.17047633230686188, "learning_rate": 0.0002908470658826353, "loss": 3.207, "step": 382000 }, { "epoch": 61.136, "grad_norm": 0.2104032188653946, "learning_rate": 0.00029084466578663146, "loss": 3.506, "step": 382100 }, { "epoch": 61.152, "grad_norm": 0.19247226417064667, "learning_rate": 0.00029084226569062757, "loss": 3.5212, "step": 382200 }, { "epoch": 61.168, "grad_norm": 0.15191778540611267, "learning_rate": 0.00029083986559462374, "loss": 3.1662, "step": 382300 }, { "epoch": 61.184, "grad_norm": 0.1673266738653183, "learning_rate": 0.0002908374654986199, "loss": 3.2153, "step": 382400 }, { "epoch": 61.2, "grad_norm": 0.1923072189092636, "learning_rate": 0.0002908350654026161, "loss": 3.2974, "step": 382500 }, { "epoch": 61.216, "grad_norm": 0.18206721544265747, "learning_rate": 0.00029083268930757227, "loss": 3.2977, "step": 382600 }, { "epoch": 61.232, "grad_norm": 0.1406172811985016, "learning_rate": 0.00029083028921156844, "loss": 3.3299, "step": 382700 }, { "epoch": 61.248, "grad_norm": 0.1382654309272766, "learning_rate": 0.0002908278891155646, "loss": 3.044, "step": 382800 }, { "epoch": 61.264, "grad_norm": 0.16156049072742462, "learning_rate": 0.0002908254890195608, "loss": 3.2699, "step": 382900 }, { "epoch": 61.28, "grad_norm": 0.20651774108409882, "learning_rate": 0.00029082308892355694, "loss": 3.3268, "step": 383000 }, { "epoch": 61.296, "grad_norm": 0.15802910923957825, "learning_rate": 0.00029082068882755306, "loss": 3.2684, "step": 383100 }, { "epoch": 61.312, "grad_norm": 0.20338857173919678, "learning_rate": 0.00029081828873154923, "loss": 3.3419, "step": 383200 }, { "epoch": 61.328, "grad_norm": 0.2333679497241974, "learning_rate": 0.0002908158886355454, "loss": 3.4272, "step": 383300 }, { "epoch": 61.344, "grad_norm": 0.16804112493991852, "learning_rate": 0.00029081348853954157, "loss": 3.3082, "step": 383400 }, { "epoch": 61.36, "grad_norm": 0.16731618344783783, "learning_rate": 0.00029081108844353774, "loss": 3.2191, "step": 383500 }, { "epoch": 61.376, "grad_norm": 0.1451852172613144, "learning_rate": 0.0002908086883475339, "loss": 3.3106, "step": 383600 }, { "epoch": 61.392, "grad_norm": 0.15098249912261963, "learning_rate": 0.00029080628825153, "loss": 3.3453, "step": 383700 }, { "epoch": 61.408, "grad_norm": 0.15251114964485168, "learning_rate": 0.0002908038881555262, "loss": 3.5522, "step": 383800 }, { "epoch": 61.424, "grad_norm": 0.21163199841976166, "learning_rate": 0.00029080148805952236, "loss": 3.1103, "step": 383900 }, { "epoch": 61.44, "grad_norm": 0.17154242098331451, "learning_rate": 0.00029079908796351853, "loss": 3.3452, "step": 384000 }, { "epoch": 61.456, "grad_norm": 0.1460789293050766, "learning_rate": 0.0002907966878675147, "loss": 3.3231, "step": 384100 }, { "epoch": 61.472, "grad_norm": 0.21403585374355316, "learning_rate": 0.0002907942877715108, "loss": 3.2198, "step": 384200 }, { "epoch": 61.488, "grad_norm": 0.1541745662689209, "learning_rate": 0.000290791887675507, "loss": 3.0228, "step": 384300 }, { "epoch": 61.504, "grad_norm": 0.1654757261276245, "learning_rate": 0.00029078948757950315, "loss": 3.424, "step": 384400 }, { "epoch": 61.52, "grad_norm": 0.16089855134487152, "learning_rate": 0.0002907870874834993, "loss": 2.995, "step": 384500 }, { "epoch": 61.536, "grad_norm": 0.12971287965774536, "learning_rate": 0.0002907846873874955, "loss": 3.3435, "step": 384600 }, { "epoch": 61.552, "grad_norm": 0.15201076865196228, "learning_rate": 0.00029078228729149166, "loss": 3.2067, "step": 384700 }, { "epoch": 61.568, "grad_norm": 0.16221588850021362, "learning_rate": 0.00029077988719548777, "loss": 3.2677, "step": 384800 }, { "epoch": 61.584, "grad_norm": 0.15324796736240387, "learning_rate": 0.000290777511100444, "loss": 3.149, "step": 384900 }, { "epoch": 61.6, "grad_norm": 0.17082548141479492, "learning_rate": 0.0002907751110044402, "loss": 3.19, "step": 385000 }, { "epoch": 61.616, "grad_norm": 0.20116890966892242, "learning_rate": 0.0002907727109084363, "loss": 3.1363, "step": 385100 }, { "epoch": 61.632, "grad_norm": 0.1536134034395218, "learning_rate": 0.00029077031081243247, "loss": 3.1352, "step": 385200 }, { "epoch": 61.648, "grad_norm": 0.1376706212759018, "learning_rate": 0.00029076791071642864, "loss": 3.1783, "step": 385300 }, { "epoch": 61.664, "grad_norm": 0.21908988058567047, "learning_rate": 0.0002907655106204248, "loss": 3.1399, "step": 385400 }, { "epoch": 61.68, "grad_norm": 0.1867181807756424, "learning_rate": 0.000290763110524421, "loss": 3.1297, "step": 385500 }, { "epoch": 61.696, "grad_norm": 0.1358814388513565, "learning_rate": 0.00029076071042841715, "loss": 2.9518, "step": 385600 }, { "epoch": 61.712, "grad_norm": 0.15086665749549866, "learning_rate": 0.00029075831033241326, "loss": 3.3137, "step": 385700 }, { "epoch": 61.728, "grad_norm": 0.15570083260536194, "learning_rate": 0.00029075591023640943, "loss": 3.5012, "step": 385800 }, { "epoch": 61.744, "grad_norm": 0.15328861773014069, "learning_rate": 0.0002907535101404056, "loss": 3.2828, "step": 385900 }, { "epoch": 61.76, "grad_norm": 0.1998985856771469, "learning_rate": 0.00029075111004440177, "loss": 3.2114, "step": 386000 }, { "epoch": 61.776, "grad_norm": 0.18721722066402435, "learning_rate": 0.00029074870994839794, "loss": 3.2861, "step": 386100 }, { "epoch": 61.792, "grad_norm": 0.16128158569335938, "learning_rate": 0.00029074630985239405, "loss": 3.2893, "step": 386200 }, { "epoch": 61.808, "grad_norm": 0.17908862233161926, "learning_rate": 0.0002907439097563902, "loss": 3.5053, "step": 386300 }, { "epoch": 61.824, "grad_norm": 0.17678572237491608, "learning_rate": 0.0002907415096603864, "loss": 3.1678, "step": 386400 }, { "epoch": 61.84, "grad_norm": 0.13656556606292725, "learning_rate": 0.00029073910956438256, "loss": 3.1107, "step": 386500 }, { "epoch": 61.856, "grad_norm": 0.14490580558776855, "learning_rate": 0.00029073670946837873, "loss": 3.2911, "step": 386600 }, { "epoch": 61.872, "grad_norm": 0.14818839728832245, "learning_rate": 0.0002907343333733349, "loss": 3.2403, "step": 386700 }, { "epoch": 61.888, "grad_norm": 0.15911749005317688, "learning_rate": 0.00029073193327733103, "loss": 3.2058, "step": 386800 }, { "epoch": 61.904, "grad_norm": 0.1618082970380783, "learning_rate": 0.0002907295331813272, "loss": 3.3962, "step": 386900 }, { "epoch": 61.92, "grad_norm": 0.16366302967071533, "learning_rate": 0.00029072713308532337, "loss": 3.293, "step": 387000 }, { "epoch": 61.936, "grad_norm": 0.14071735739707947, "learning_rate": 0.00029072473298931954, "loss": 3.337, "step": 387100 }, { "epoch": 61.952, "grad_norm": 0.2981182038784027, "learning_rate": 0.0002907223328933157, "loss": 3.1589, "step": 387200 }, { "epoch": 61.968, "grad_norm": 0.1741790920495987, "learning_rate": 0.0002907199327973119, "loss": 3.2754, "step": 387300 }, { "epoch": 61.984, "grad_norm": 0.15682289004325867, "learning_rate": 0.00029071753270130805, "loss": 3.2455, "step": 387400 }, { "epoch": 62.0, "grad_norm": 0.14351768791675568, "learning_rate": 0.0002907151326053042, "loss": 3.1652, "step": 387500 }, { "epoch": 62.016, "grad_norm": 0.213531494140625, "learning_rate": 0.0002907127325093004, "loss": 3.1034, "step": 387600 }, { "epoch": 62.032, "grad_norm": 0.1455306112766266, "learning_rate": 0.0002907103324132965, "loss": 3.5185, "step": 387700 }, { "epoch": 62.048, "grad_norm": 0.198695108294487, "learning_rate": 0.00029070793231729267, "loss": 3.2407, "step": 387800 }, { "epoch": 62.064, "grad_norm": 0.15311425924301147, "learning_rate": 0.00029070553222128884, "loss": 3.1861, "step": 387900 }, { "epoch": 62.08, "grad_norm": 0.16511523723602295, "learning_rate": 0.000290703132125285, "loss": 3.1092, "step": 388000 }, { "epoch": 62.096, "grad_norm": 0.1512548327445984, "learning_rate": 0.0002907007320292812, "loss": 3.1019, "step": 388100 }, { "epoch": 62.112, "grad_norm": 0.16061201691627502, "learning_rate": 0.0002906983319332773, "loss": 3.0333, "step": 388200 }, { "epoch": 62.128, "grad_norm": 0.15840667486190796, "learning_rate": 0.00029069593183727346, "loss": 3.0346, "step": 388300 }, { "epoch": 62.144, "grad_norm": 0.16902057826519012, "learning_rate": 0.00029069353174126963, "loss": 3.029, "step": 388400 }, { "epoch": 62.16, "grad_norm": 0.1631724238395691, "learning_rate": 0.0002906911316452658, "loss": 3.0414, "step": 388500 }, { "epoch": 62.176, "grad_norm": 0.19239960610866547, "learning_rate": 0.00029068873154926197, "loss": 3.3011, "step": 388600 }, { "epoch": 62.192, "grad_norm": 0.24047933518886566, "learning_rate": 0.00029068635545421816, "loss": 3.3573, "step": 388700 }, { "epoch": 62.208, "grad_norm": 0.15004439651966095, "learning_rate": 0.00029068395535821427, "loss": 3.255, "step": 388800 }, { "epoch": 62.224, "grad_norm": 0.14008037745952606, "learning_rate": 0.00029068155526221044, "loss": 3.2144, "step": 388900 }, { "epoch": 62.24, "grad_norm": 0.1692444235086441, "learning_rate": 0.0002906791551662066, "loss": 3.4316, "step": 389000 }, { "epoch": 62.256, "grad_norm": 0.1476495862007141, "learning_rate": 0.0002906767550702028, "loss": 3.066, "step": 389100 }, { "epoch": 62.272, "grad_norm": 0.15594840049743652, "learning_rate": 0.00029067435497419895, "loss": 3.1487, "step": 389200 }, { "epoch": 62.288, "grad_norm": 0.13510873913764954, "learning_rate": 0.00029067195487819506, "loss": 3.0693, "step": 389300 }, { "epoch": 62.304, "grad_norm": 0.19371680915355682, "learning_rate": 0.00029066955478219123, "loss": 3.1038, "step": 389400 }, { "epoch": 62.32, "grad_norm": 0.15567085146903992, "learning_rate": 0.0002906671546861874, "loss": 3.11, "step": 389500 }, { "epoch": 62.336, "grad_norm": 0.20137225091457367, "learning_rate": 0.00029066475459018357, "loss": 3.3279, "step": 389600 }, { "epoch": 62.352, "grad_norm": 0.14456264674663544, "learning_rate": 0.00029066235449417974, "loss": 3.2716, "step": 389700 }, { "epoch": 62.368, "grad_norm": 0.19456849992275238, "learning_rate": 0.0002906599543981759, "loss": 3.2453, "step": 389800 }, { "epoch": 62.384, "grad_norm": 0.1733856350183487, "learning_rate": 0.000290657554302172, "loss": 3.1698, "step": 389900 }, { "epoch": 62.4, "grad_norm": 0.20283350348472595, "learning_rate": 0.0002906551542061682, "loss": 3.1882, "step": 390000 }, { "epoch": 62.416, "grad_norm": 0.22308194637298584, "learning_rate": 0.00029065275411016436, "loss": 3.3559, "step": 390100 }, { "epoch": 62.432, "grad_norm": 0.1493431180715561, "learning_rate": 0.00029065035401416053, "loss": 3.3721, "step": 390200 }, { "epoch": 62.448, "grad_norm": 0.2367425560951233, "learning_rate": 0.0002906479539181567, "loss": 3.5567, "step": 390300 }, { "epoch": 62.464, "grad_norm": 0.18750756978988647, "learning_rate": 0.00029064555382215287, "loss": 3.1247, "step": 390400 }, { "epoch": 62.48, "grad_norm": 0.1720907837152481, "learning_rate": 0.00029064315372614904, "loss": 3.3307, "step": 390500 }, { "epoch": 62.496, "grad_norm": 0.13939453661441803, "learning_rate": 0.0002906407536301452, "loss": 3.3345, "step": 390600 }, { "epoch": 62.512, "grad_norm": 0.20821264386177063, "learning_rate": 0.0002906383535341414, "loss": 3.1677, "step": 390700 }, { "epoch": 62.528, "grad_norm": 0.16607046127319336, "learning_rate": 0.0002906359534381375, "loss": 3.2087, "step": 390800 }, { "epoch": 62.544, "grad_norm": 0.15326619148254395, "learning_rate": 0.00029063355334213366, "loss": 3.2386, "step": 390900 }, { "epoch": 62.56, "grad_norm": 0.19541656970977783, "learning_rate": 0.00029063115324612983, "loss": 3.236, "step": 391000 }, { "epoch": 62.576, "grad_norm": 0.13507741689682007, "learning_rate": 0.000290628753150126, "loss": 3.3094, "step": 391100 }, { "epoch": 62.592, "grad_norm": 0.1903313547372818, "learning_rate": 0.00029062635305412217, "loss": 3.0541, "step": 391200 }, { "epoch": 62.608, "grad_norm": 0.1746234893798828, "learning_rate": 0.0002906239529581183, "loss": 3.2078, "step": 391300 }, { "epoch": 62.624, "grad_norm": 0.16670985519886017, "learning_rate": 0.0002906215768630745, "loss": 3.1787, "step": 391400 }, { "epoch": 62.64, "grad_norm": 0.12755043804645538, "learning_rate": 0.00029061917676707064, "loss": 3.1585, "step": 391500 }, { "epoch": 62.656, "grad_norm": 0.1762136071920395, "learning_rate": 0.0002906167766710668, "loss": 3.401, "step": 391600 }, { "epoch": 62.672, "grad_norm": 0.16139496862888336, "learning_rate": 0.000290614376575063, "loss": 3.0759, "step": 391700 }, { "epoch": 62.688, "grad_norm": 0.15594922006130219, "learning_rate": 0.00029061197647905915, "loss": 3.2768, "step": 391800 }, { "epoch": 62.704, "grad_norm": 0.15929311513900757, "learning_rate": 0.00029060957638305527, "loss": 3.2191, "step": 391900 }, { "epoch": 62.72, "grad_norm": 0.14831338822841644, "learning_rate": 0.00029060717628705144, "loss": 3.3456, "step": 392000 }, { "epoch": 62.736, "grad_norm": 0.1562633514404297, "learning_rate": 0.0002906047761910476, "loss": 3.0808, "step": 392100 }, { "epoch": 62.752, "grad_norm": 0.16320686042308807, "learning_rate": 0.0002906023760950438, "loss": 3.3851, "step": 392200 }, { "epoch": 62.768, "grad_norm": 0.1324177235364914, "learning_rate": 0.00029059997599903994, "loss": 3.0212, "step": 392300 }, { "epoch": 62.784, "grad_norm": 0.14908549189567566, "learning_rate": 0.00029059757590303606, "loss": 3.3, "step": 392400 }, { "epoch": 62.8, "grad_norm": 0.1647721529006958, "learning_rate": 0.0002905951758070322, "loss": 3.3506, "step": 392500 }, { "epoch": 62.816, "grad_norm": 0.13982121646404266, "learning_rate": 0.0002905927757110284, "loss": 3.2787, "step": 392600 }, { "epoch": 62.832, "grad_norm": 0.15100371837615967, "learning_rate": 0.00029059037561502457, "loss": 3.3697, "step": 392700 }, { "epoch": 62.848, "grad_norm": 0.14921905100345612, "learning_rate": 0.00029058797551902073, "loss": 3.2781, "step": 392800 }, { "epoch": 62.864, "grad_norm": 0.14347714185714722, "learning_rate": 0.0002905855754230169, "loss": 3.433, "step": 392900 }, { "epoch": 62.88, "grad_norm": 0.17825868725776672, "learning_rate": 0.000290583175327013, "loss": 3.1108, "step": 393000 }, { "epoch": 62.896, "grad_norm": 0.1542351394891739, "learning_rate": 0.0002905807752310092, "loss": 3.307, "step": 393100 }, { "epoch": 62.912, "grad_norm": 0.16442036628723145, "learning_rate": 0.00029057837513500536, "loss": 3.3216, "step": 393200 }, { "epoch": 62.928, "grad_norm": 0.19775159657001495, "learning_rate": 0.0002905759750390015, "loss": 3.1829, "step": 393300 }, { "epoch": 62.944, "grad_norm": 0.1606547236442566, "learning_rate": 0.0002905735749429977, "loss": 3.2499, "step": 393400 }, { "epoch": 62.96, "grad_norm": 0.16156861186027527, "learning_rate": 0.00029057117484699386, "loss": 3.4032, "step": 393500 }, { "epoch": 62.976, "grad_norm": 0.14201924204826355, "learning_rate": 0.00029056877475099003, "loss": 3.179, "step": 393600 }, { "epoch": 62.992, "grad_norm": 0.1906752735376358, "learning_rate": 0.0002905663746549862, "loss": 3.3488, "step": 393700 }, { "epoch": 63.008, "grad_norm": 0.17384812235832214, "learning_rate": 0.00029056397455898237, "loss": 3.1938, "step": 393800 }, { "epoch": 63.024, "grad_norm": 0.18379640579223633, "learning_rate": 0.0002905615744629785, "loss": 3.028, "step": 393900 }, { "epoch": 63.04, "grad_norm": 0.1414775252342224, "learning_rate": 0.00029055917436697466, "loss": 3.3168, "step": 394000 }, { "epoch": 63.056, "grad_norm": 0.2467828243970871, "learning_rate": 0.00029055679827193084, "loss": 2.8162, "step": 394100 }, { "epoch": 63.072, "grad_norm": 0.13812999427318573, "learning_rate": 0.000290554398175927, "loss": 3.3998, "step": 394200 }, { "epoch": 63.088, "grad_norm": 0.18769370019435883, "learning_rate": 0.0002905519980799232, "loss": 3.3411, "step": 394300 }, { "epoch": 63.104, "grad_norm": 0.16412067413330078, "learning_rate": 0.0002905495979839193, "loss": 3.1384, "step": 394400 }, { "epoch": 63.12, "grad_norm": 0.20008309185504913, "learning_rate": 0.00029054719788791547, "loss": 3.2441, "step": 394500 }, { "epoch": 63.136, "grad_norm": 0.18194669485092163, "learning_rate": 0.00029054479779191164, "loss": 3.0892, "step": 394600 }, { "epoch": 63.152, "grad_norm": 0.16298596560955048, "learning_rate": 0.0002905423976959078, "loss": 3.0668, "step": 394700 }, { "epoch": 63.168, "grad_norm": 0.21915031969547272, "learning_rate": 0.000290539997599904, "loss": 3.0456, "step": 394800 }, { "epoch": 63.184, "grad_norm": 0.16760875284671783, "learning_rate": 0.00029053759750390014, "loss": 2.9902, "step": 394900 }, { "epoch": 63.2, "grad_norm": 0.23996813595294952, "learning_rate": 0.00029053519740789626, "loss": 3.2475, "step": 395000 }, { "epoch": 63.216, "grad_norm": 0.1492716521024704, "learning_rate": 0.00029053279731189243, "loss": 3.2879, "step": 395100 }, { "epoch": 63.232, "grad_norm": 0.15854014456272125, "learning_rate": 0.0002905303972158886, "loss": 3.4011, "step": 395200 }, { "epoch": 63.248, "grad_norm": 0.18721790611743927, "learning_rate": 0.00029052799711988477, "loss": 3.3054, "step": 395300 }, { "epoch": 63.264, "grad_norm": 0.14900335669517517, "learning_rate": 0.00029052559702388094, "loss": 3.2678, "step": 395400 }, { "epoch": 63.28, "grad_norm": 0.17905500531196594, "learning_rate": 0.00029052319692787705, "loss": 3.1092, "step": 395500 }, { "epoch": 63.296, "grad_norm": 0.1298065036535263, "learning_rate": 0.0002905207968318732, "loss": 3.0126, "step": 395600 }, { "epoch": 63.312, "grad_norm": 0.12892670929431915, "learning_rate": 0.0002905183967358694, "loss": 3.3734, "step": 395700 }, { "epoch": 63.328, "grad_norm": 0.13753476738929749, "learning_rate": 0.00029051599663986556, "loss": 3.3459, "step": 395800 }, { "epoch": 63.344, "grad_norm": 0.18333305418491364, "learning_rate": 0.00029051359654386173, "loss": 3.2713, "step": 395900 }, { "epoch": 63.36, "grad_norm": 0.18459205329418182, "learning_rate": 0.0002905111964478579, "loss": 3.2881, "step": 396000 }, { "epoch": 63.376, "grad_norm": 0.1673024743795395, "learning_rate": 0.000290508796351854, "loss": 3.4604, "step": 396100 }, { "epoch": 63.392, "grad_norm": 0.19726680219173431, "learning_rate": 0.0002905063962558502, "loss": 3.5662, "step": 396200 }, { "epoch": 63.408, "grad_norm": 0.18688827753067017, "learning_rate": 0.00029050399615984635, "loss": 3.2134, "step": 396300 }, { "epoch": 63.424, "grad_norm": 0.1942591369152069, "learning_rate": 0.0002905015960638425, "loss": 2.9418, "step": 396400 }, { "epoch": 63.44, "grad_norm": 0.15016606450080872, "learning_rate": 0.0002904991959678387, "loss": 3.1872, "step": 396500 }, { "epoch": 63.456, "grad_norm": 0.14090578258037567, "learning_rate": 0.00029049679587183486, "loss": 3.1979, "step": 396600 }, { "epoch": 63.472, "grad_norm": 0.14433197677135468, "learning_rate": 0.000290494395775831, "loss": 3.2551, "step": 396700 }, { "epoch": 63.488, "grad_norm": 0.16559575498104095, "learning_rate": 0.0002904919956798272, "loss": 3.3725, "step": 396800 }, { "epoch": 63.504, "grad_norm": 0.15526849031448364, "learning_rate": 0.00029048959558382337, "loss": 3.4118, "step": 396900 }, { "epoch": 63.52, "grad_norm": 0.1972206085920334, "learning_rate": 0.0002904871954878195, "loss": 3.3658, "step": 397000 }, { "epoch": 63.536, "grad_norm": 0.1702529937028885, "learning_rate": 0.00029048479539181565, "loss": 3.482, "step": 397100 }, { "epoch": 63.552, "grad_norm": 0.13739444315433502, "learning_rate": 0.0002904823952958118, "loss": 3.2735, "step": 397200 }, { "epoch": 63.568, "grad_norm": 0.15325240790843964, "learning_rate": 0.000290479995199808, "loss": 3.0551, "step": 397300 }, { "epoch": 63.584, "grad_norm": 0.17461207509040833, "learning_rate": 0.00029047759510380416, "loss": 3.0165, "step": 397400 }, { "epoch": 63.6, "grad_norm": 0.11855808645486832, "learning_rate": 0.00029047519500780027, "loss": 3.1076, "step": 397500 }, { "epoch": 63.616, "grad_norm": 0.13413432240486145, "learning_rate": 0.00029047279491179644, "loss": 3.3356, "step": 397600 }, { "epoch": 63.632, "grad_norm": 0.16549968719482422, "learning_rate": 0.0002904703948157926, "loss": 3.2481, "step": 397700 }, { "epoch": 63.648, "grad_norm": 0.14200444519519806, "learning_rate": 0.0002904679947197888, "loss": 3.1424, "step": 397800 }, { "epoch": 63.664, "grad_norm": 0.14829108119010925, "learning_rate": 0.00029046559462378495, "loss": 3.2366, "step": 397900 }, { "epoch": 63.68, "grad_norm": 0.16478104889392853, "learning_rate": 0.0002904631945277811, "loss": 2.905, "step": 398000 }, { "epoch": 63.696, "grad_norm": 0.18953610956668854, "learning_rate": 0.00029046079443177723, "loss": 3.1445, "step": 398100 }, { "epoch": 63.712, "grad_norm": 0.18510781228542328, "learning_rate": 0.0002904584183367334, "loss": 3.1513, "step": 398200 }, { "epoch": 63.728, "grad_norm": 0.1902746707201004, "learning_rate": 0.0002904560182407296, "loss": 3.1629, "step": 398300 }, { "epoch": 63.744, "grad_norm": 0.23207791149616241, "learning_rate": 0.00029045361814472576, "loss": 3.2992, "step": 398400 }, { "epoch": 63.76, "grad_norm": 0.1658455729484558, "learning_rate": 0.00029045121804872193, "loss": 3.5296, "step": 398500 }, { "epoch": 63.776, "grad_norm": 0.1634206622838974, "learning_rate": 0.0002904488179527181, "loss": 3.1377, "step": 398600 }, { "epoch": 63.792, "grad_norm": 0.17793874442577362, "learning_rate": 0.0002904464178567142, "loss": 3.2636, "step": 398700 }, { "epoch": 63.808, "grad_norm": 0.13751810789108276, "learning_rate": 0.0002904440177607104, "loss": 3.6099, "step": 398800 }, { "epoch": 63.824, "grad_norm": 0.15577585995197296, "learning_rate": 0.00029044161766470655, "loss": 3.4607, "step": 398900 }, { "epoch": 63.84, "grad_norm": 0.1426801085472107, "learning_rate": 0.0002904392175687027, "loss": 3.1492, "step": 399000 }, { "epoch": 63.856, "grad_norm": 0.19539377093315125, "learning_rate": 0.0002904368174726989, "loss": 3.0213, "step": 399100 }, { "epoch": 63.872, "grad_norm": 0.17734317481517792, "learning_rate": 0.000290434417376695, "loss": 3.5259, "step": 399200 }, { "epoch": 63.888, "grad_norm": 0.16792316734790802, "learning_rate": 0.0002904320172806912, "loss": 3.4326, "step": 399300 }, { "epoch": 63.904, "grad_norm": 0.15850253403186798, "learning_rate": 0.00029042961718468734, "loss": 3.3908, "step": 399400 }, { "epoch": 63.92, "grad_norm": 0.1588330864906311, "learning_rate": 0.0002904272170886835, "loss": 3.4876, "step": 399500 }, { "epoch": 63.936, "grad_norm": 0.20663544535636902, "learning_rate": 0.0002904248169926797, "loss": 2.9456, "step": 399600 }, { "epoch": 63.952, "grad_norm": 0.2078634351491928, "learning_rate": 0.00029042241689667585, "loss": 3.098, "step": 399700 }, { "epoch": 63.968, "grad_norm": 0.20219531655311584, "learning_rate": 0.000290420016800672, "loss": 3.3665, "step": 399800 }, { "epoch": 63.984, "grad_norm": 0.20857609808444977, "learning_rate": 0.0002904176167046682, "loss": 3.1537, "step": 399900 }, { "epoch": 64.0, "grad_norm": 0.1336994469165802, "learning_rate": 0.00029041521660866436, "loss": 3.2349, "step": 400000 }, { "epoch": 64.016, "grad_norm": 0.16698497533798218, "learning_rate": 0.0002904128165126605, "loss": 3.1468, "step": 400100 }, { "epoch": 64.032, "grad_norm": 0.2598242461681366, "learning_rate": 0.00029041041641665664, "loss": 3.1247, "step": 400200 }, { "epoch": 64.048, "grad_norm": 0.17946502566337585, "learning_rate": 0.00029040804032161283, "loss": 2.9629, "step": 400300 }, { "epoch": 64.064, "grad_norm": 0.2026471495628357, "learning_rate": 0.000290405640225609, "loss": 3.0926, "step": 400400 }, { "epoch": 64.08, "grad_norm": 0.18121060729026794, "learning_rate": 0.00029040324012960517, "loss": 2.9854, "step": 400500 }, { "epoch": 64.096, "grad_norm": 0.17052076756954193, "learning_rate": 0.00029040084003360134, "loss": 3.2403, "step": 400600 }, { "epoch": 64.112, "grad_norm": 0.16394643485546112, "learning_rate": 0.00029039843993759745, "loss": 3.181, "step": 400700 }, { "epoch": 64.128, "grad_norm": 0.1991247832775116, "learning_rate": 0.0002903960398415936, "loss": 3.3173, "step": 400800 }, { "epoch": 64.144, "grad_norm": 0.19139455258846283, "learning_rate": 0.0002903936397455898, "loss": 3.2295, "step": 400900 }, { "epoch": 64.16, "grad_norm": 0.1698758453130722, "learning_rate": 0.00029039123964958596, "loss": 3.0167, "step": 401000 }, { "epoch": 64.176, "grad_norm": 0.16084103286266327, "learning_rate": 0.00029038883955358213, "loss": 3.1043, "step": 401100 }, { "epoch": 64.192, "grad_norm": 0.17921245098114014, "learning_rate": 0.00029038643945757825, "loss": 3.3905, "step": 401200 }, { "epoch": 64.208, "grad_norm": 0.2001495659351349, "learning_rate": 0.0002903840393615744, "loss": 3.2603, "step": 401300 }, { "epoch": 64.224, "grad_norm": 0.2556336522102356, "learning_rate": 0.00029038166326653066, "loss": 2.9944, "step": 401400 }, { "epoch": 64.24, "grad_norm": 0.17932747304439545, "learning_rate": 0.00029037926317052677, "loss": 3.1276, "step": 401500 }, { "epoch": 64.256, "grad_norm": 0.15382349491119385, "learning_rate": 0.00029037686307452294, "loss": 3.3028, "step": 401600 }, { "epoch": 64.272, "grad_norm": 0.1520128846168518, "learning_rate": 0.0002903744629785191, "loss": 3.2216, "step": 401700 }, { "epoch": 64.288, "grad_norm": 0.13986560702323914, "learning_rate": 0.0002903720628825153, "loss": 3.0709, "step": 401800 }, { "epoch": 64.304, "grad_norm": 0.19709867238998413, "learning_rate": 0.00029036966278651145, "loss": 3.425, "step": 401900 }, { "epoch": 64.32, "grad_norm": 0.2046891450881958, "learning_rate": 0.0002903672626905076, "loss": 3.3036, "step": 402000 }, { "epoch": 64.336, "grad_norm": 0.17567172646522522, "learning_rate": 0.00029036486259450373, "loss": 3.3823, "step": 402100 }, { "epoch": 64.352, "grad_norm": 0.18577341735363007, "learning_rate": 0.0002903624624984999, "loss": 3.2937, "step": 402200 }, { "epoch": 64.368, "grad_norm": 0.1802494078874588, "learning_rate": 0.00029036006240249607, "loss": 3.1309, "step": 402300 }, { "epoch": 64.384, "grad_norm": 0.16048762202262878, "learning_rate": 0.00029035766230649224, "loss": 3.5476, "step": 402400 }, { "epoch": 64.4, "grad_norm": 0.133345827460289, "learning_rate": 0.0002903552622104884, "loss": 2.8361, "step": 402500 }, { "epoch": 64.416, "grad_norm": 0.17087054252624512, "learning_rate": 0.0002903528621144846, "loss": 3.3544, "step": 402600 }, { "epoch": 64.432, "grad_norm": 0.14181047677993774, "learning_rate": 0.0002903504620184807, "loss": 3.3965, "step": 402700 }, { "epoch": 64.448, "grad_norm": 0.18011067807674408, "learning_rate": 0.00029034806192247686, "loss": 3.1847, "step": 402800 }, { "epoch": 64.464, "grad_norm": 0.2002549022436142, "learning_rate": 0.00029034566182647303, "loss": 3.392, "step": 402900 }, { "epoch": 64.48, "grad_norm": 0.18152306973934174, "learning_rate": 0.0002903432617304692, "loss": 3.1722, "step": 403000 }, { "epoch": 64.496, "grad_norm": 0.16437137126922607, "learning_rate": 0.00029034086163446537, "loss": 3.3517, "step": 403100 }, { "epoch": 64.512, "grad_norm": 0.14304892718791962, "learning_rate": 0.0002903384615384615, "loss": 3.3089, "step": 403200 }, { "epoch": 64.528, "grad_norm": 0.22262035310268402, "learning_rate": 0.00029033606144245766, "loss": 3.1617, "step": 403300 }, { "epoch": 64.544, "grad_norm": 0.1788465827703476, "learning_rate": 0.0002903336613464538, "loss": 3.3749, "step": 403400 }, { "epoch": 64.56, "grad_norm": 0.14911124110221863, "learning_rate": 0.00029033126125045, "loss": 3.1146, "step": 403500 }, { "epoch": 64.576, "grad_norm": 0.15387211740016937, "learning_rate": 0.00029032886115444616, "loss": 3.2399, "step": 403600 }, { "epoch": 64.592, "grad_norm": 0.18804728984832764, "learning_rate": 0.00029032646105844233, "loss": 2.9931, "step": 403700 }, { "epoch": 64.608, "grad_norm": 0.13702034950256348, "learning_rate": 0.00029032406096243845, "loss": 3.2003, "step": 403800 }, { "epoch": 64.624, "grad_norm": 0.14432747662067413, "learning_rate": 0.0002903216608664346, "loss": 3.0, "step": 403900 }, { "epoch": 64.64, "grad_norm": 0.15524637699127197, "learning_rate": 0.0002903192607704308, "loss": 3.2473, "step": 404000 }, { "epoch": 64.656, "grad_norm": 0.13132387399673462, "learning_rate": 0.00029031686067442695, "loss": 3.2032, "step": 404100 }, { "epoch": 64.672, "grad_norm": 0.16007497906684875, "learning_rate": 0.0002903144605784231, "loss": 3.1671, "step": 404200 }, { "epoch": 64.688, "grad_norm": 0.1622149646282196, "learning_rate": 0.00029031206048241924, "loss": 3.4161, "step": 404300 }, { "epoch": 64.704, "grad_norm": 0.1628185212612152, "learning_rate": 0.0002903096603864154, "loss": 3.4449, "step": 404400 }, { "epoch": 64.72, "grad_norm": 0.15857945382595062, "learning_rate": 0.0002903072602904116, "loss": 3.1676, "step": 404500 }, { "epoch": 64.736, "grad_norm": 0.1804850548505783, "learning_rate": 0.00029030486019440775, "loss": 3.3102, "step": 404600 }, { "epoch": 64.752, "grad_norm": 0.20671199262142181, "learning_rate": 0.0002903024600984039, "loss": 3.2986, "step": 404700 }, { "epoch": 64.768, "grad_norm": 0.2038181573152542, "learning_rate": 0.0002903000600024001, "loss": 3.2885, "step": 404800 }, { "epoch": 64.784, "grad_norm": 0.1746440827846527, "learning_rate": 0.0002902976599063962, "loss": 3.1588, "step": 404900 }, { "epoch": 64.8, "grad_norm": 0.13814905285835266, "learning_rate": 0.00029029525981039237, "loss": 3.2376, "step": 405000 }, { "epoch": 64.816, "grad_norm": 0.15085092186927795, "learning_rate": 0.00029029285971438854, "loss": 3.2875, "step": 405100 }, { "epoch": 64.832, "grad_norm": 0.20160487294197083, "learning_rate": 0.0002902904596183847, "loss": 3.06, "step": 405200 }, { "epoch": 64.848, "grad_norm": 0.17333854734897614, "learning_rate": 0.0002902880595223809, "loss": 3.2431, "step": 405300 }, { "epoch": 64.864, "grad_norm": 0.17378629744052887, "learning_rate": 0.000290285659426377, "loss": 3.3722, "step": 405400 }, { "epoch": 64.88, "grad_norm": 0.15163622796535492, "learning_rate": 0.00029028325933037316, "loss": 3.5939, "step": 405500 }, { "epoch": 64.896, "grad_norm": 0.17250078916549683, "learning_rate": 0.00029028085923436933, "loss": 3.4541, "step": 405600 }, { "epoch": 64.912, "grad_norm": 0.151726633310318, "learning_rate": 0.0002902784591383655, "loss": 3.1915, "step": 405700 }, { "epoch": 64.928, "grad_norm": 0.17703600227832794, "learning_rate": 0.00029027605904236167, "loss": 3.3362, "step": 405800 }, { "epoch": 64.944, "grad_norm": 0.16018244624137878, "learning_rate": 0.00029027365894635784, "loss": 3.239, "step": 405900 }, { "epoch": 64.96, "grad_norm": 0.1666039079427719, "learning_rate": 0.000290271258850354, "loss": 3.1536, "step": 406000 }, { "epoch": 64.976, "grad_norm": 0.171372190117836, "learning_rate": 0.0002902688587543502, "loss": 3.3074, "step": 406100 }, { "epoch": 64.992, "grad_norm": 0.18402357399463654, "learning_rate": 0.00029026645865834634, "loss": 3.3786, "step": 406200 }, { "epoch": 65.008, "grad_norm": 0.15774022042751312, "learning_rate": 0.00029026405856234246, "loss": 3.0884, "step": 406300 }, { "epoch": 65.024, "grad_norm": 0.19954966008663177, "learning_rate": 0.00029026165846633863, "loss": 2.9599, "step": 406400 }, { "epoch": 65.04, "grad_norm": 0.2132602483034134, "learning_rate": 0.0002902592583703348, "loss": 2.9757, "step": 406500 }, { "epoch": 65.056, "grad_norm": 0.2071508765220642, "learning_rate": 0.00029025685827433097, "loss": 2.9889, "step": 406600 }, { "epoch": 65.072, "grad_norm": 0.17834685742855072, "learning_rate": 0.00029025445817832714, "loss": 3.1954, "step": 406700 }, { "epoch": 65.088, "grad_norm": 0.15195533633232117, "learning_rate": 0.0002902520580823233, "loss": 3.2208, "step": 406800 }, { "epoch": 65.104, "grad_norm": 0.15575651824474335, "learning_rate": 0.0002902496579863194, "loss": 3.1114, "step": 406900 }, { "epoch": 65.12, "grad_norm": 0.17903466522693634, "learning_rate": 0.0002902472578903156, "loss": 3.2656, "step": 407000 }, { "epoch": 65.136, "grad_norm": 0.17520958185195923, "learning_rate": 0.00029024485779431176, "loss": 3.3189, "step": 407100 }, { "epoch": 65.152, "grad_norm": 0.16596445441246033, "learning_rate": 0.00029024245769830793, "loss": 3.1907, "step": 407200 }, { "epoch": 65.168, "grad_norm": 0.17474126815795898, "learning_rate": 0.0002902400816032641, "loss": 3.1959, "step": 407300 }, { "epoch": 65.184, "grad_norm": 0.18716758489608765, "learning_rate": 0.00029023768150726023, "loss": 3.4495, "step": 407400 }, { "epoch": 65.2, "grad_norm": 0.14933140575885773, "learning_rate": 0.0002902352814112564, "loss": 2.9317, "step": 407500 }, { "epoch": 65.216, "grad_norm": 0.1563313603401184, "learning_rate": 0.00029023288131525257, "loss": 3.1038, "step": 407600 }, { "epoch": 65.232, "grad_norm": 0.15839719772338867, "learning_rate": 0.00029023048121924874, "loss": 3.2544, "step": 407700 }, { "epoch": 65.248, "grad_norm": 0.14431753754615784, "learning_rate": 0.0002902280811232449, "loss": 3.1246, "step": 407800 }, { "epoch": 65.264, "grad_norm": 0.1635461151599884, "learning_rate": 0.0002902256810272411, "loss": 3.1502, "step": 407900 }, { "epoch": 65.28, "grad_norm": 0.18367035686969757, "learning_rate": 0.0002902232809312372, "loss": 3.3397, "step": 408000 }, { "epoch": 65.296, "grad_norm": 0.15756921470165253, "learning_rate": 0.00029022088083523336, "loss": 3.4879, "step": 408100 }, { "epoch": 65.312, "grad_norm": 0.15093983709812164, "learning_rate": 0.00029021848073922953, "loss": 3.2373, "step": 408200 }, { "epoch": 65.328, "grad_norm": 0.21595241129398346, "learning_rate": 0.0002902160806432257, "loss": 3.2601, "step": 408300 }, { "epoch": 65.344, "grad_norm": 0.214829683303833, "learning_rate": 0.00029021368054722187, "loss": 3.1833, "step": 408400 }, { "epoch": 65.36, "grad_norm": 0.19364017248153687, "learning_rate": 0.000290211280451218, "loss": 3.4189, "step": 408500 }, { "epoch": 65.376, "grad_norm": 0.19000966846942902, "learning_rate": 0.00029020888035521415, "loss": 3.3216, "step": 408600 }, { "epoch": 65.392, "grad_norm": 0.1721198707818985, "learning_rate": 0.0002902064802592103, "loss": 3.241, "step": 408700 }, { "epoch": 65.408, "grad_norm": 0.14177899062633514, "learning_rate": 0.0002902040801632065, "loss": 3.4488, "step": 408800 }, { "epoch": 65.424, "grad_norm": 0.17747963964939117, "learning_rate": 0.00029020168006720266, "loss": 2.9949, "step": 408900 }, { "epoch": 65.44, "grad_norm": 0.20670276880264282, "learning_rate": 0.00029019927997119883, "loss": 3.0397, "step": 409000 }, { "epoch": 65.456, "grad_norm": 0.13698653876781464, "learning_rate": 0.000290196879875195, "loss": 3.3555, "step": 409100 }, { "epoch": 65.472, "grad_norm": 0.1749463677406311, "learning_rate": 0.00029019447977919117, "loss": 3.2269, "step": 409200 }, { "epoch": 65.488, "grad_norm": 0.24117213487625122, "learning_rate": 0.00029019207968318734, "loss": 3.4585, "step": 409300 }, { "epoch": 65.504, "grad_norm": 0.23197031021118164, "learning_rate": 0.00029018967958718345, "loss": 3.3039, "step": 409400 }, { "epoch": 65.52, "grad_norm": 0.20688806474208832, "learning_rate": 0.0002901872794911796, "loss": 3.0588, "step": 409500 }, { "epoch": 65.536, "grad_norm": 0.1552305519580841, "learning_rate": 0.0002901849033961358, "loss": 3.2379, "step": 409600 }, { "epoch": 65.552, "grad_norm": 0.15656064450740814, "learning_rate": 0.000290182503300132, "loss": 3.2908, "step": 409700 }, { "epoch": 65.568, "grad_norm": 0.18944887816905975, "learning_rate": 0.00029018010320412815, "loss": 3.2305, "step": 409800 }, { "epoch": 65.584, "grad_norm": 0.1652027666568756, "learning_rate": 0.0002901777031081243, "loss": 3.1723, "step": 409900 }, { "epoch": 65.6, "grad_norm": 0.17510142922401428, "learning_rate": 0.00029017530301212043, "loss": 3.0028, "step": 410000 }, { "epoch": 65.616, "grad_norm": 0.14485076069831848, "learning_rate": 0.0002901729029161166, "loss": 3.4473, "step": 410100 }, { "epoch": 65.632, "grad_norm": 0.23456601798534393, "learning_rate": 0.00029017050282011277, "loss": 3.3611, "step": 410200 }, { "epoch": 65.648, "grad_norm": 0.18096910417079926, "learning_rate": 0.00029016810272410894, "loss": 3.3931, "step": 410300 }, { "epoch": 65.664, "grad_norm": 0.27821463346481323, "learning_rate": 0.0002901657026281051, "loss": 3.0491, "step": 410400 }, { "epoch": 65.68, "grad_norm": 0.14268772304058075, "learning_rate": 0.0002901633025321012, "loss": 3.3073, "step": 410500 }, { "epoch": 65.696, "grad_norm": 0.1781199723482132, "learning_rate": 0.0002901609024360974, "loss": 3.2637, "step": 410600 }, { "epoch": 65.712, "grad_norm": 0.17378269135951996, "learning_rate": 0.00029015850234009356, "loss": 3.5129, "step": 410700 }, { "epoch": 65.728, "grad_norm": 0.18985050916671753, "learning_rate": 0.00029015610224408973, "loss": 3.3047, "step": 410800 }, { "epoch": 65.744, "grad_norm": 0.17438234388828278, "learning_rate": 0.0002901537021480859, "loss": 3.1805, "step": 410900 }, { "epoch": 65.76, "grad_norm": 0.16399656236171722, "learning_rate": 0.00029015130205208207, "loss": 3.2122, "step": 411000 }, { "epoch": 65.776, "grad_norm": 0.15934105217456818, "learning_rate": 0.0002901489019560782, "loss": 3.3233, "step": 411100 }, { "epoch": 65.792, "grad_norm": 0.1910143792629242, "learning_rate": 0.00029014650186007435, "loss": 3.2123, "step": 411200 }, { "epoch": 65.808, "grad_norm": 0.18893980979919434, "learning_rate": 0.0002901441017640705, "loss": 3.3659, "step": 411300 }, { "epoch": 65.824, "grad_norm": 0.17919322848320007, "learning_rate": 0.0002901417016680667, "loss": 3.1342, "step": 411400 }, { "epoch": 65.84, "grad_norm": 0.16091307997703552, "learning_rate": 0.00029013930157206286, "loss": 3.0727, "step": 411500 }, { "epoch": 65.856, "grad_norm": 0.16632620990276337, "learning_rate": 0.000290136901476059, "loss": 3.2308, "step": 411600 }, { "epoch": 65.872, "grad_norm": 0.17391012609004974, "learning_rate": 0.00029013450138005515, "loss": 2.9754, "step": 411700 }, { "epoch": 65.888, "grad_norm": 0.16671647131443024, "learning_rate": 0.0002901321012840513, "loss": 3.3123, "step": 411800 }, { "epoch": 65.904, "grad_norm": 0.16384051740169525, "learning_rate": 0.0002901297011880475, "loss": 3.0673, "step": 411900 }, { "epoch": 65.92, "grad_norm": 0.14739589393138885, "learning_rate": 0.0002901273250930037, "loss": 2.9736, "step": 412000 }, { "epoch": 65.936, "grad_norm": 0.1644303798675537, "learning_rate": 0.00029012492499699984, "loss": 3.2469, "step": 412100 }, { "epoch": 65.952, "grad_norm": 0.15651769936084747, "learning_rate": 0.000290122524900996, "loss": 2.9826, "step": 412200 }, { "epoch": 65.968, "grad_norm": 0.15484747290611267, "learning_rate": 0.0002901201488059522, "loss": 3.2888, "step": 412300 }, { "epoch": 65.984, "grad_norm": 0.1757742166519165, "learning_rate": 0.00029011774870994837, "loss": 3.1681, "step": 412400 }, { "epoch": 66.0, "grad_norm": 0.14456619322299957, "learning_rate": 0.00029011534861394454, "loss": 3.1945, "step": 412500 }, { "epoch": 66.016, "grad_norm": 0.18126849830150604, "learning_rate": 0.0002901129485179407, "loss": 3.3146, "step": 412600 }, { "epoch": 66.032, "grad_norm": 0.16280168294906616, "learning_rate": 0.0002901105484219369, "loss": 3.1897, "step": 412700 }, { "epoch": 66.048, "grad_norm": 0.18445008993148804, "learning_rate": 0.00029010814832593305, "loss": 3.2949, "step": 412800 }, { "epoch": 66.064, "grad_norm": 0.17318513989448547, "learning_rate": 0.00029010574822992916, "loss": 3.0535, "step": 412900 }, { "epoch": 66.08, "grad_norm": 0.2014450877904892, "learning_rate": 0.00029010334813392533, "loss": 3.1181, "step": 413000 }, { "epoch": 66.096, "grad_norm": 0.2118493616580963, "learning_rate": 0.0002901009480379215, "loss": 3.1883, "step": 413100 }, { "epoch": 66.112, "grad_norm": 0.15487846732139587, "learning_rate": 0.00029009854794191767, "loss": 3.234, "step": 413200 }, { "epoch": 66.128, "grad_norm": 0.21333056688308716, "learning_rate": 0.00029009614784591384, "loss": 3.4063, "step": 413300 }, { "epoch": 66.144, "grad_norm": 0.18924759328365326, "learning_rate": 0.00029009374774990995, "loss": 3.2968, "step": 413400 }, { "epoch": 66.16, "grad_norm": 0.16457180678844452, "learning_rate": 0.0002900913476539061, "loss": 3.4171, "step": 413500 }, { "epoch": 66.176, "grad_norm": 0.17960451543331146, "learning_rate": 0.0002900889475579023, "loss": 3.6916, "step": 413600 }, { "epoch": 66.192, "grad_norm": 0.29867005348205566, "learning_rate": 0.00029008654746189846, "loss": 3.3855, "step": 413700 }, { "epoch": 66.208, "grad_norm": 0.21048885583877563, "learning_rate": 0.00029008414736589463, "loss": 3.1132, "step": 413800 }, { "epoch": 66.224, "grad_norm": 0.2570095658302307, "learning_rate": 0.0002900817472698908, "loss": 3.2326, "step": 413900 }, { "epoch": 66.24, "grad_norm": 0.16181118786334991, "learning_rate": 0.0002900793471738869, "loss": 3.4352, "step": 414000 }, { "epoch": 66.256, "grad_norm": 0.16813646256923676, "learning_rate": 0.0002900769470778831, "loss": 3.0017, "step": 414100 }, { "epoch": 66.272, "grad_norm": 0.18295887112617493, "learning_rate": 0.00029007454698187925, "loss": 3.3593, "step": 414200 }, { "epoch": 66.288, "grad_norm": 0.13354727625846863, "learning_rate": 0.0002900721468858754, "loss": 3.3247, "step": 414300 }, { "epoch": 66.304, "grad_norm": 0.2023884505033493, "learning_rate": 0.0002900697467898716, "loss": 3.2492, "step": 414400 }, { "epoch": 66.32, "grad_norm": 0.1561778485774994, "learning_rate": 0.0002900673466938677, "loss": 3.2055, "step": 414500 }, { "epoch": 66.336, "grad_norm": 0.17942002415657043, "learning_rate": 0.0002900649465978639, "loss": 2.9485, "step": 414600 }, { "epoch": 66.352, "grad_norm": 0.1733298897743225, "learning_rate": 0.00029006254650186004, "loss": 3.4602, "step": 414700 }, { "epoch": 66.368, "grad_norm": 0.15922173857688904, "learning_rate": 0.0002900601464058562, "loss": 3.2157, "step": 414800 }, { "epoch": 66.384, "grad_norm": 0.16801393032073975, "learning_rate": 0.0002900577463098524, "loss": 3.023, "step": 414900 }, { "epoch": 66.4, "grad_norm": 0.16610319912433624, "learning_rate": 0.00029005534621384855, "loss": 3.2386, "step": 415000 }, { "epoch": 66.416, "grad_norm": 0.17150096595287323, "learning_rate": 0.00029005294611784467, "loss": 3.2649, "step": 415100 }, { "epoch": 66.432, "grad_norm": 0.12512612342834473, "learning_rate": 0.00029005054602184084, "loss": 3.3725, "step": 415200 }, { "epoch": 66.448, "grad_norm": 0.15628837049007416, "learning_rate": 0.000290048145925837, "loss": 3.0441, "step": 415300 }, { "epoch": 66.464, "grad_norm": 0.16978907585144043, "learning_rate": 0.0002900457458298332, "loss": 3.33, "step": 415400 }, { "epoch": 66.48, "grad_norm": 0.1583622545003891, "learning_rate": 0.00029004334573382934, "loss": 3.094, "step": 415500 }, { "epoch": 66.496, "grad_norm": 0.1863357126712799, "learning_rate": 0.00029004094563782546, "loss": 2.8885, "step": 415600 }, { "epoch": 66.512, "grad_norm": 0.2048700749874115, "learning_rate": 0.00029003854554182163, "loss": 3.2564, "step": 415700 }, { "epoch": 66.528, "grad_norm": 0.15916477143764496, "learning_rate": 0.0002900361454458178, "loss": 3.419, "step": 415800 }, { "epoch": 66.544, "grad_norm": 0.15353962779045105, "learning_rate": 0.00029003374534981397, "loss": 3.4767, "step": 415900 }, { "epoch": 66.56, "grad_norm": 0.1822701096534729, "learning_rate": 0.00029003134525381014, "loss": 3.4004, "step": 416000 }, { "epoch": 66.576, "grad_norm": 0.16991378366947174, "learning_rate": 0.0002900289451578063, "loss": 3.1907, "step": 416100 }, { "epoch": 66.592, "grad_norm": 0.17740173637866974, "learning_rate": 0.0002900265450618024, "loss": 3.1979, "step": 416200 }, { "epoch": 66.608, "grad_norm": 0.17677730321884155, "learning_rate": 0.0002900241449657986, "loss": 3.2828, "step": 416300 }, { "epoch": 66.624, "grad_norm": 0.15407836437225342, "learning_rate": 0.00029002176887075483, "loss": 3.0535, "step": 416400 }, { "epoch": 66.64, "grad_norm": 0.12872610986232758, "learning_rate": 0.00029001936877475095, "loss": 3.2981, "step": 416500 }, { "epoch": 66.656, "grad_norm": 0.1376706063747406, "learning_rate": 0.0002900169686787471, "loss": 3.2403, "step": 416600 }, { "epoch": 66.672, "grad_norm": 0.1815478652715683, "learning_rate": 0.0002900145685827433, "loss": 3.2383, "step": 416700 }, { "epoch": 66.688, "grad_norm": 0.16438031196594238, "learning_rate": 0.00029001216848673945, "loss": 3.1043, "step": 416800 }, { "epoch": 66.704, "grad_norm": 0.18337871134281158, "learning_rate": 0.0002900097683907356, "loss": 3.2412, "step": 416900 }, { "epoch": 66.72, "grad_norm": 0.13940657675266266, "learning_rate": 0.0002900073682947318, "loss": 3.2162, "step": 417000 }, { "epoch": 66.736, "grad_norm": 0.15541857481002808, "learning_rate": 0.0002900049681987279, "loss": 3.0911, "step": 417100 }, { "epoch": 66.752, "grad_norm": 0.2010001391172409, "learning_rate": 0.0002900025681027241, "loss": 3.6965, "step": 417200 }, { "epoch": 66.768, "grad_norm": 0.15841414034366608, "learning_rate": 0.00029000016800672025, "loss": 3.299, "step": 417300 }, { "epoch": 66.784, "grad_norm": 0.1667863130569458, "learning_rate": 0.0002899977679107164, "loss": 3.2021, "step": 417400 }, { "epoch": 66.8, "grad_norm": 0.23050279915332794, "learning_rate": 0.0002899953678147126, "loss": 3.377, "step": 417500 }, { "epoch": 66.816, "grad_norm": 0.18495294451713562, "learning_rate": 0.0002899929677187087, "loss": 3.3345, "step": 417600 }, { "epoch": 66.832, "grad_norm": 0.1721765249967575, "learning_rate": 0.00028999056762270487, "loss": 3.3583, "step": 417700 }, { "epoch": 66.848, "grad_norm": 0.18413980305194855, "learning_rate": 0.00028998816752670104, "loss": 3.3486, "step": 417800 }, { "epoch": 66.864, "grad_norm": 0.1536562740802765, "learning_rate": 0.0002899857674306972, "loss": 3.0571, "step": 417900 }, { "epoch": 66.88, "grad_norm": 0.18847540020942688, "learning_rate": 0.0002899833673346934, "loss": 3.1706, "step": 418000 }, { "epoch": 66.896, "grad_norm": 0.24180740118026733, "learning_rate": 0.00028998096723868954, "loss": 3.2695, "step": 418100 }, { "epoch": 66.912, "grad_norm": 0.16973194479942322, "learning_rate": 0.00028997856714268566, "loss": 3.2338, "step": 418200 }, { "epoch": 66.928, "grad_norm": 0.19181926548480988, "learning_rate": 0.00028997616704668183, "loss": 3.2762, "step": 418300 }, { "epoch": 66.944, "grad_norm": 0.15499474108219147, "learning_rate": 0.000289973766950678, "loss": 3.0093, "step": 418400 }, { "epoch": 66.96, "grad_norm": 0.2398979812860489, "learning_rate": 0.00028997136685467417, "loss": 3.2578, "step": 418500 }, { "epoch": 66.976, "grad_norm": 0.25835326313972473, "learning_rate": 0.00028996896675867034, "loss": 3.175, "step": 418600 }, { "epoch": 66.992, "grad_norm": 0.19141635298728943, "learning_rate": 0.00028996656666266645, "loss": 3.1055, "step": 418700 }, { "epoch": 67.008, "grad_norm": 0.15152227878570557, "learning_rate": 0.0002899641665666626, "loss": 3.3794, "step": 418800 }, { "epoch": 67.024, "grad_norm": 0.1598120480775833, "learning_rate": 0.0002899617664706588, "loss": 2.8063, "step": 418900 }, { "epoch": 67.04, "grad_norm": 0.19952699542045593, "learning_rate": 0.00028995936637465496, "loss": 3.0383, "step": 419000 }, { "epoch": 67.056, "grad_norm": 0.18428730964660645, "learning_rate": 0.00028995696627865113, "loss": 3.267, "step": 419100 }, { "epoch": 67.072, "grad_norm": 0.15677548944950104, "learning_rate": 0.0002899545901836073, "loss": 3.534, "step": 419200 }, { "epoch": 67.088, "grad_norm": 0.16008420288562775, "learning_rate": 0.0002899521900876035, "loss": 3.2383, "step": 419300 }, { "epoch": 67.104, "grad_norm": 0.20551377534866333, "learning_rate": 0.00028994978999159966, "loss": 3.0117, "step": 419400 }, { "epoch": 67.12, "grad_norm": 0.15715843439102173, "learning_rate": 0.0002899473898955958, "loss": 3.2531, "step": 419500 }, { "epoch": 67.136, "grad_norm": 0.14172272384166718, "learning_rate": 0.00028994498979959194, "loss": 3.3221, "step": 419600 }, { "epoch": 67.152, "grad_norm": 0.18836277723312378, "learning_rate": 0.0002899425897035881, "loss": 3.228, "step": 419700 }, { "epoch": 67.168, "grad_norm": 0.166545107960701, "learning_rate": 0.0002899401896075843, "loss": 3.2967, "step": 419800 }, { "epoch": 67.184, "grad_norm": 0.17769841849803925, "learning_rate": 0.00028993778951158045, "loss": 3.3036, "step": 419900 }, { "epoch": 67.2, "grad_norm": 0.1857639104127884, "learning_rate": 0.0002899353894155766, "loss": 3.1868, "step": 420000 }, { "epoch": 67.216, "grad_norm": 0.20483236014842987, "learning_rate": 0.0002899329893195728, "loss": 3.0521, "step": 420100 }, { "epoch": 67.232, "grad_norm": 0.1525556594133377, "learning_rate": 0.0002899305892235689, "loss": 3.3671, "step": 420200 }, { "epoch": 67.248, "grad_norm": 0.14867645502090454, "learning_rate": 0.00028992818912756507, "loss": 3.0443, "step": 420300 }, { "epoch": 67.264, "grad_norm": 0.15610575675964355, "learning_rate": 0.00028992578903156124, "loss": 3.3651, "step": 420400 }, { "epoch": 67.28, "grad_norm": 0.14688436686992645, "learning_rate": 0.0002899233889355574, "loss": 3.1796, "step": 420500 }, { "epoch": 67.296, "grad_norm": 0.15950454771518707, "learning_rate": 0.0002899209888395536, "loss": 3.3347, "step": 420600 }, { "epoch": 67.312, "grad_norm": 0.16857288777828217, "learning_rate": 0.00028991861274450977, "loss": 3.3411, "step": 420700 }, { "epoch": 67.328, "grad_norm": 0.17320764064788818, "learning_rate": 0.00028991621264850593, "loss": 2.9766, "step": 420800 }, { "epoch": 67.344, "grad_norm": 0.17212575674057007, "learning_rate": 0.0002899138125525021, "loss": 3.0923, "step": 420900 }, { "epoch": 67.36, "grad_norm": 0.1920507550239563, "learning_rate": 0.0002899114124564983, "loss": 3.5374, "step": 421000 }, { "epoch": 67.376, "grad_norm": 0.1613692194223404, "learning_rate": 0.0002899090123604944, "loss": 3.1199, "step": 421100 }, { "epoch": 67.392, "grad_norm": 0.20088893175125122, "learning_rate": 0.00028990661226449056, "loss": 3.2266, "step": 421200 }, { "epoch": 67.408, "grad_norm": 0.17936086654663086, "learning_rate": 0.0002899042121684867, "loss": 3.1731, "step": 421300 }, { "epoch": 67.424, "grad_norm": 0.13625045120716095, "learning_rate": 0.0002899018120724829, "loss": 3.1198, "step": 421400 }, { "epoch": 67.44, "grad_norm": 0.1878190040588379, "learning_rate": 0.00028989941197647906, "loss": 3.1575, "step": 421500 }, { "epoch": 67.456, "grad_norm": 0.16331274807453156, "learning_rate": 0.0002898970118804752, "loss": 3.2621, "step": 421600 }, { "epoch": 67.472, "grad_norm": 0.14811848104000092, "learning_rate": 0.00028989461178447135, "loss": 3.0034, "step": 421700 }, { "epoch": 67.488, "grad_norm": 0.183964341878891, "learning_rate": 0.0002898922116884675, "loss": 3.2659, "step": 421800 }, { "epoch": 67.504, "grad_norm": 0.18865571916103363, "learning_rate": 0.0002898898115924637, "loss": 3.4129, "step": 421900 }, { "epoch": 67.52, "grad_norm": 0.18336474895477295, "learning_rate": 0.00028988741149645986, "loss": 3.6978, "step": 422000 }, { "epoch": 67.536, "grad_norm": 0.16465769708156586, "learning_rate": 0.000289885011400456, "loss": 3.1777, "step": 422100 }, { "epoch": 67.552, "grad_norm": 0.17261596024036407, "learning_rate": 0.00028988261130445214, "loss": 3.4101, "step": 422200 }, { "epoch": 67.568, "grad_norm": 0.2232416868209839, "learning_rate": 0.0002898802112084483, "loss": 3.2037, "step": 422300 }, { "epoch": 67.584, "grad_norm": 0.18052959442138672, "learning_rate": 0.0002898778111124445, "loss": 3.4266, "step": 422400 }, { "epoch": 67.6, "grad_norm": 0.17907805740833282, "learning_rate": 0.00028987543501740067, "loss": 3.3219, "step": 422500 }, { "epoch": 67.616, "grad_norm": 0.14218075573444366, "learning_rate": 0.00028987303492139684, "loss": 3.4273, "step": 422600 }, { "epoch": 67.632, "grad_norm": 0.15934379398822784, "learning_rate": 0.00028987063482539295, "loss": 3.4743, "step": 422700 }, { "epoch": 67.648, "grad_norm": 0.16686351597309113, "learning_rate": 0.0002898682347293891, "loss": 3.1992, "step": 422800 }, { "epoch": 67.664, "grad_norm": 0.18816311657428741, "learning_rate": 0.0002898658346333853, "loss": 3.2974, "step": 422900 }, { "epoch": 67.68, "grad_norm": 0.1610630303621292, "learning_rate": 0.00028986343453738146, "loss": 3.0257, "step": 423000 }, { "epoch": 67.696, "grad_norm": 0.1849362552165985, "learning_rate": 0.00028986103444137763, "loss": 3.0311, "step": 423100 }, { "epoch": 67.712, "grad_norm": 0.17206250131130219, "learning_rate": 0.0002898586343453738, "loss": 3.0179, "step": 423200 }, { "epoch": 67.728, "grad_norm": 0.17204327881336212, "learning_rate": 0.0002898562342493699, "loss": 2.8746, "step": 423300 }, { "epoch": 67.744, "grad_norm": 0.14912815392017365, "learning_rate": 0.0002898538341533661, "loss": 3.3797, "step": 423400 }, { "epoch": 67.76, "grad_norm": 0.14870645105838776, "learning_rate": 0.00028985143405736225, "loss": 3.0973, "step": 423500 }, { "epoch": 67.776, "grad_norm": 0.190955251455307, "learning_rate": 0.0002898490339613584, "loss": 3.2309, "step": 423600 }, { "epoch": 67.792, "grad_norm": 0.17968475818634033, "learning_rate": 0.0002898466338653546, "loss": 3.0186, "step": 423700 }, { "epoch": 67.808, "grad_norm": 0.16226927936077118, "learning_rate": 0.00028984423376935076, "loss": 3.2795, "step": 423800 }, { "epoch": 67.824, "grad_norm": 0.16474373638629913, "learning_rate": 0.00028984183367334693, "loss": 3.1379, "step": 423900 }, { "epoch": 67.84, "grad_norm": 0.15299992263317108, "learning_rate": 0.0002898394335773431, "loss": 3.4795, "step": 424000 }, { "epoch": 67.856, "grad_norm": 0.16270536184310913, "learning_rate": 0.00028983703348133927, "loss": 3.1679, "step": 424100 }, { "epoch": 67.872, "grad_norm": 0.1815613955259323, "learning_rate": 0.0002898346333853354, "loss": 3.3678, "step": 424200 }, { "epoch": 67.888, "grad_norm": 0.1907004565000534, "learning_rate": 0.00028983223328933155, "loss": 3.2777, "step": 424300 }, { "epoch": 67.904, "grad_norm": 0.1513364613056183, "learning_rate": 0.0002898298331933277, "loss": 3.2139, "step": 424400 }, { "epoch": 67.92, "grad_norm": 0.16662321984767914, "learning_rate": 0.0002898274330973239, "loss": 3.2129, "step": 424500 }, { "epoch": 67.936, "grad_norm": 0.20239441096782684, "learning_rate": 0.00028982503300132006, "loss": 3.4447, "step": 424600 }, { "epoch": 67.952, "grad_norm": 0.21214716136455536, "learning_rate": 0.00028982263290531617, "loss": 3.3026, "step": 424700 }, { "epoch": 67.968, "grad_norm": 0.15906432271003723, "learning_rate": 0.00028982023280931234, "loss": 3.1662, "step": 424800 }, { "epoch": 67.984, "grad_norm": 0.13978344202041626, "learning_rate": 0.0002898178327133085, "loss": 3.1455, "step": 424900 }, { "epoch": 68.0, "grad_norm": 0.2016223967075348, "learning_rate": 0.0002898154326173047, "loss": 3.109, "step": 425000 }, { "epoch": 68.016, "grad_norm": 0.16401539742946625, "learning_rate": 0.00028981303252130085, "loss": 2.8232, "step": 425100 }, { "epoch": 68.032, "grad_norm": 0.17057131230831146, "learning_rate": 0.000289810632425297, "loss": 3.1773, "step": 425200 }, { "epoch": 68.048, "grad_norm": 0.1674639880657196, "learning_rate": 0.00028980823232929313, "loss": 3.0571, "step": 425300 }, { "epoch": 68.064, "grad_norm": 0.1504182666540146, "learning_rate": 0.0002898058322332893, "loss": 3.3046, "step": 425400 }, { "epoch": 68.08, "grad_norm": 0.18554842472076416, "learning_rate": 0.00028980343213728547, "loss": 3.1105, "step": 425500 }, { "epoch": 68.096, "grad_norm": 0.21054577827453613, "learning_rate": 0.00028980103204128164, "loss": 3.0267, "step": 425600 }, { "epoch": 68.112, "grad_norm": 0.14785288274288177, "learning_rate": 0.0002897986319452778, "loss": 3.0436, "step": 425700 }, { "epoch": 68.128, "grad_norm": 0.17836613953113556, "learning_rate": 0.0002897962318492739, "loss": 3.3304, "step": 425800 }, { "epoch": 68.144, "grad_norm": 0.18837901949882507, "learning_rate": 0.0002897938317532701, "loss": 2.9918, "step": 425900 }, { "epoch": 68.16, "grad_norm": 0.1699995994567871, "learning_rate": 0.00028979143165726626, "loss": 3.2832, "step": 426000 }, { "epoch": 68.176, "grad_norm": 0.18159790337085724, "learning_rate": 0.00028978903156126243, "loss": 3.2936, "step": 426100 }, { "epoch": 68.192, "grad_norm": 0.15881980955600739, "learning_rate": 0.0002897866314652586, "loss": 3.1597, "step": 426200 }, { "epoch": 68.208, "grad_norm": 0.17486238479614258, "learning_rate": 0.00028978423136925477, "loss": 3.3152, "step": 426300 }, { "epoch": 68.224, "grad_norm": 0.1679767519235611, "learning_rate": 0.0002897818312732509, "loss": 3.2742, "step": 426400 }, { "epoch": 68.24, "grad_norm": 0.18665814399719238, "learning_rate": 0.00028977943117724706, "loss": 3.1353, "step": 426500 }, { "epoch": 68.256, "grad_norm": 0.15344250202178955, "learning_rate": 0.0002897770310812432, "loss": 3.5542, "step": 426600 }, { "epoch": 68.272, "grad_norm": 0.14039750397205353, "learning_rate": 0.0002897746309852394, "loss": 3.0457, "step": 426700 }, { "epoch": 68.288, "grad_norm": 0.17560596764087677, "learning_rate": 0.00028977223088923556, "loss": 3.2932, "step": 426800 }, { "epoch": 68.304, "grad_norm": 0.17830394208431244, "learning_rate": 0.00028976983079323173, "loss": 3.6081, "step": 426900 }, { "epoch": 68.32, "grad_norm": 0.16969802975654602, "learning_rate": 0.00028976743069722785, "loss": 3.1385, "step": 427000 }, { "epoch": 68.336, "grad_norm": 0.2218111902475357, "learning_rate": 0.000289765030601224, "loss": 3.351, "step": 427100 }, { "epoch": 68.352, "grad_norm": 0.16429346799850464, "learning_rate": 0.0002897626305052202, "loss": 3.1271, "step": 427200 }, { "epoch": 68.368, "grad_norm": 0.19090032577514648, "learning_rate": 0.00028976023040921635, "loss": 3.1225, "step": 427300 }, { "epoch": 68.384, "grad_norm": 0.1908756047487259, "learning_rate": 0.0002897578303132125, "loss": 3.4976, "step": 427400 }, { "epoch": 68.4, "grad_norm": 0.17399418354034424, "learning_rate": 0.00028975543021720864, "loss": 3.4921, "step": 427500 }, { "epoch": 68.416, "grad_norm": 0.16562005877494812, "learning_rate": 0.0002897530301212048, "loss": 3.2077, "step": 427600 }, { "epoch": 68.432, "grad_norm": 0.16381105780601501, "learning_rate": 0.000289750630025201, "loss": 3.4274, "step": 427700 }, { "epoch": 68.448, "grad_norm": 0.1884421408176422, "learning_rate": 0.00028974822992919715, "loss": 3.2051, "step": 427800 }, { "epoch": 68.464, "grad_norm": 0.1738419383764267, "learning_rate": 0.0002897458298331933, "loss": 3.2285, "step": 427900 }, { "epoch": 68.48, "grad_norm": 0.15939660370349884, "learning_rate": 0.0002897434297371895, "loss": 3.0956, "step": 428000 }, { "epoch": 68.496, "grad_norm": 0.15453357994556427, "learning_rate": 0.0002897410296411856, "loss": 3.1257, "step": 428100 }, { "epoch": 68.512, "grad_norm": 0.18250925838947296, "learning_rate": 0.00028973862954518177, "loss": 3.2609, "step": 428200 }, { "epoch": 68.528, "grad_norm": 0.24881486594676971, "learning_rate": 0.00028973622944917794, "loss": 3.1906, "step": 428300 }, { "epoch": 68.544, "grad_norm": 0.19451306760311127, "learning_rate": 0.0002897338293531741, "loss": 3.2294, "step": 428400 }, { "epoch": 68.56, "grad_norm": 0.190110981464386, "learning_rate": 0.0002897314292571703, "loss": 3.2435, "step": 428500 }, { "epoch": 68.576, "grad_norm": 0.15392684936523438, "learning_rate": 0.0002897290291611664, "loss": 3.0781, "step": 428600 }, { "epoch": 68.592, "grad_norm": 0.20126976072788239, "learning_rate": 0.00028972662906516256, "loss": 3.1663, "step": 428700 }, { "epoch": 68.608, "grad_norm": 0.183677539229393, "learning_rate": 0.00028972422896915873, "loss": 3.3301, "step": 428800 }, { "epoch": 68.624, "grad_norm": 0.1698881834745407, "learning_rate": 0.0002897218288731549, "loss": 3.1511, "step": 428900 }, { "epoch": 68.64, "grad_norm": 0.1801426261663437, "learning_rate": 0.00028971942877715107, "loss": 2.9481, "step": 429000 }, { "epoch": 68.656, "grad_norm": 0.1574525237083435, "learning_rate": 0.00028971705268210726, "loss": 3.164, "step": 429100 }, { "epoch": 68.672, "grad_norm": 0.15590137243270874, "learning_rate": 0.0002897146525861034, "loss": 3.0113, "step": 429200 }, { "epoch": 68.688, "grad_norm": 0.18709461390972137, "learning_rate": 0.0002897122524900996, "loss": 3.4376, "step": 429300 }, { "epoch": 68.704, "grad_norm": 0.1890987753868103, "learning_rate": 0.00028970985239409576, "loss": 3.1295, "step": 429400 }, { "epoch": 68.72, "grad_norm": 0.14143402874469757, "learning_rate": 0.0002897074522980919, "loss": 3.1438, "step": 429500 }, { "epoch": 68.736, "grad_norm": 0.1756289005279541, "learning_rate": 0.00028970505220208805, "loss": 3.2501, "step": 429600 }, { "epoch": 68.752, "grad_norm": 0.17135892808437347, "learning_rate": 0.0002897026521060842, "loss": 3.1291, "step": 429700 }, { "epoch": 68.768, "grad_norm": 0.20300596952438354, "learning_rate": 0.0002897002520100804, "loss": 3.0968, "step": 429800 }, { "epoch": 68.784, "grad_norm": 0.2082676887512207, "learning_rate": 0.00028969785191407656, "loss": 3.4786, "step": 429900 }, { "epoch": 68.8, "grad_norm": 0.2389577180147171, "learning_rate": 0.0002896954518180727, "loss": 3.3932, "step": 430000 }, { "epoch": 68.816, "grad_norm": 0.2088221311569214, "learning_rate": 0.00028969305172206884, "loss": 3.2809, "step": 430100 }, { "epoch": 68.832, "grad_norm": 0.16121825575828552, "learning_rate": 0.000289690651626065, "loss": 3.1641, "step": 430200 }, { "epoch": 68.848, "grad_norm": 0.16565261781215668, "learning_rate": 0.0002896882515300612, "loss": 3.0375, "step": 430300 }, { "epoch": 68.864, "grad_norm": 0.15032577514648438, "learning_rate": 0.00028968585143405735, "loss": 3.4202, "step": 430400 }, { "epoch": 68.88, "grad_norm": 0.1612488180398941, "learning_rate": 0.0002896834513380535, "loss": 3.4116, "step": 430500 }, { "epoch": 68.896, "grad_norm": 0.20967325568199158, "learning_rate": 0.00028968105124204963, "loss": 3.0519, "step": 430600 }, { "epoch": 68.912, "grad_norm": 0.1563340425491333, "learning_rate": 0.0002896786511460458, "loss": 3.2808, "step": 430700 }, { "epoch": 68.928, "grad_norm": 0.1504407376050949, "learning_rate": 0.00028967625105004197, "loss": 3.2856, "step": 430800 }, { "epoch": 68.944, "grad_norm": 0.17815515398979187, "learning_rate": 0.00028967385095403814, "loss": 3.2755, "step": 430900 }, { "epoch": 68.96, "grad_norm": 0.1697603315114975, "learning_rate": 0.0002896714508580343, "loss": 3.0989, "step": 431000 }, { "epoch": 68.976, "grad_norm": 0.17162412405014038, "learning_rate": 0.0002896690507620305, "loss": 3.2605, "step": 431100 }, { "epoch": 68.992, "grad_norm": 0.19375640153884888, "learning_rate": 0.0002896666506660266, "loss": 3.1312, "step": 431200 }, { "epoch": 69.008, "grad_norm": 0.20433403551578522, "learning_rate": 0.00028966427457098284, "loss": 3.2732, "step": 431300 }, { "epoch": 69.024, "grad_norm": 0.1615174412727356, "learning_rate": 0.000289661874474979, "loss": 3.1633, "step": 431400 }, { "epoch": 69.04, "grad_norm": 0.17323802411556244, "learning_rate": 0.0002896594743789751, "loss": 3.3773, "step": 431500 }, { "epoch": 69.056, "grad_norm": 0.18792323768138885, "learning_rate": 0.0002896570742829713, "loss": 3.073, "step": 431600 }, { "epoch": 69.072, "grad_norm": 0.18214423954486847, "learning_rate": 0.00028965467418696746, "loss": 3.0197, "step": 431700 }, { "epoch": 69.088, "grad_norm": 0.1691034436225891, "learning_rate": 0.00028965229809192365, "loss": 3.1438, "step": 431800 }, { "epoch": 69.104, "grad_norm": 0.20476679503917694, "learning_rate": 0.0002896498979959198, "loss": 3.479, "step": 431900 }, { "epoch": 69.12, "grad_norm": 0.18023690581321716, "learning_rate": 0.000289647497899916, "loss": 3.0958, "step": 432000 }, { "epoch": 69.136, "grad_norm": 0.17657442390918732, "learning_rate": 0.0002896450978039121, "loss": 3.2868, "step": 432100 }, { "epoch": 69.152, "grad_norm": 0.16101005673408508, "learning_rate": 0.00028964269770790827, "loss": 3.1926, "step": 432200 }, { "epoch": 69.168, "grad_norm": 0.16814717650413513, "learning_rate": 0.00028964029761190444, "loss": 2.9615, "step": 432300 }, { "epoch": 69.184, "grad_norm": 0.19538766145706177, "learning_rate": 0.0002896378975159006, "loss": 3.2792, "step": 432400 }, { "epoch": 69.2, "grad_norm": 0.16429370641708374, "learning_rate": 0.0002896354974198968, "loss": 3.2783, "step": 432500 }, { "epoch": 69.216, "grad_norm": 0.1518297642469406, "learning_rate": 0.0002896330973238929, "loss": 3.1146, "step": 432600 }, { "epoch": 69.232, "grad_norm": 0.1916775405406952, "learning_rate": 0.00028963069722788906, "loss": 3.057, "step": 432700 }, { "epoch": 69.248, "grad_norm": 0.14847758412361145, "learning_rate": 0.00028962829713188523, "loss": 3.0734, "step": 432800 }, { "epoch": 69.264, "grad_norm": 0.16910727322101593, "learning_rate": 0.0002896258970358814, "loss": 3.0566, "step": 432900 }, { "epoch": 69.28, "grad_norm": 0.20631904900074005, "learning_rate": 0.00028962349693987757, "loss": 3.5853, "step": 433000 }, { "epoch": 69.296, "grad_norm": 0.2019880712032318, "learning_rate": 0.00028962109684387374, "loss": 3.2368, "step": 433100 }, { "epoch": 69.312, "grad_norm": 0.16344687342643738, "learning_rate": 0.0002896186967478699, "loss": 3.1102, "step": 433200 }, { "epoch": 69.328, "grad_norm": 0.22381144762039185, "learning_rate": 0.0002896162966518661, "loss": 3.5578, "step": 433300 }, { "epoch": 69.344, "grad_norm": 0.1971784383058548, "learning_rate": 0.00028961389655586225, "loss": 3.1679, "step": 433400 }, { "epoch": 69.36, "grad_norm": 0.20234297215938568, "learning_rate": 0.00028961149645985836, "loss": 3.3488, "step": 433500 }, { "epoch": 69.376, "grad_norm": 0.1439906358718872, "learning_rate": 0.00028960909636385453, "loss": 2.922, "step": 433600 }, { "epoch": 69.392, "grad_norm": 0.18036594986915588, "learning_rate": 0.0002896066962678507, "loss": 3.1894, "step": 433700 }, { "epoch": 69.408, "grad_norm": 0.1671627163887024, "learning_rate": 0.00028960429617184687, "loss": 3.2566, "step": 433800 }, { "epoch": 69.424, "grad_norm": 0.20040644705295563, "learning_rate": 0.00028960189607584304, "loss": 3.1219, "step": 433900 }, { "epoch": 69.44, "grad_norm": 0.16853700578212738, "learning_rate": 0.0002895994959798392, "loss": 3.2513, "step": 434000 }, { "epoch": 69.456, "grad_norm": 0.21704335510730743, "learning_rate": 0.0002895970958838353, "loss": 3.2303, "step": 434100 }, { "epoch": 69.472, "grad_norm": 0.1632520705461502, "learning_rate": 0.0002895946957878315, "loss": 3.2075, "step": 434200 }, { "epoch": 69.488, "grad_norm": 0.15098467469215393, "learning_rate": 0.00028959229569182766, "loss": 3.1632, "step": 434300 }, { "epoch": 69.504, "grad_norm": 0.22671981155872345, "learning_rate": 0.00028958989559582383, "loss": 3.2217, "step": 434400 }, { "epoch": 69.52, "grad_norm": 0.1431196630001068, "learning_rate": 0.00028958749549982, "loss": 3.1174, "step": 434500 }, { "epoch": 69.536, "grad_norm": 0.19929192960262299, "learning_rate": 0.0002895850954038161, "loss": 3.1209, "step": 434600 }, { "epoch": 69.552, "grad_norm": 0.18621821701526642, "learning_rate": 0.0002895826953078123, "loss": 3.1957, "step": 434700 }, { "epoch": 69.568, "grad_norm": 0.1595284640789032, "learning_rate": 0.00028958029521180845, "loss": 3.3171, "step": 434800 }, { "epoch": 69.584, "grad_norm": 0.16731344163417816, "learning_rate": 0.0002895778951158046, "loss": 3.3494, "step": 434900 }, { "epoch": 69.6, "grad_norm": 0.19960619509220123, "learning_rate": 0.0002895754950198008, "loss": 2.9833, "step": 435000 }, { "epoch": 69.616, "grad_norm": 0.17385992407798767, "learning_rate": 0.00028957309492379696, "loss": 3.1057, "step": 435100 }, { "epoch": 69.632, "grad_norm": 0.495706707239151, "learning_rate": 0.0002895706948277931, "loss": 3.4731, "step": 435200 }, { "epoch": 69.648, "grad_norm": 0.16650983691215515, "learning_rate": 0.00028956829473178924, "loss": 3.1773, "step": 435300 }, { "epoch": 69.664, "grad_norm": 0.1933530867099762, "learning_rate": 0.0002895658946357854, "loss": 3.1841, "step": 435400 }, { "epoch": 69.68, "grad_norm": 0.18537364900112152, "learning_rate": 0.0002895634945397816, "loss": 3.2968, "step": 435500 }, { "epoch": 69.696, "grad_norm": 0.18687385320663452, "learning_rate": 0.00028956109444377775, "loss": 3.0782, "step": 435600 }, { "epoch": 69.712, "grad_norm": 0.16163834929466248, "learning_rate": 0.00028955869434777387, "loss": 3.2556, "step": 435700 }, { "epoch": 69.728, "grad_norm": 0.15239547193050385, "learning_rate": 0.00028955629425177004, "loss": 3.2337, "step": 435800 }, { "epoch": 69.744, "grad_norm": 0.14040453732013702, "learning_rate": 0.0002895538941557662, "loss": 3.308, "step": 435900 }, { "epoch": 69.76, "grad_norm": 0.16704721748828888, "learning_rate": 0.0002895514940597624, "loss": 3.3507, "step": 436000 }, { "epoch": 69.776, "grad_norm": 0.17059355974197388, "learning_rate": 0.00028954909396375854, "loss": 3.2492, "step": 436100 }, { "epoch": 69.792, "grad_norm": 0.17115841805934906, "learning_rate": 0.0002895466938677547, "loss": 3.0856, "step": 436200 }, { "epoch": 69.808, "grad_norm": 0.289878249168396, "learning_rate": 0.0002895442937717508, "loss": 3.3864, "step": 436300 }, { "epoch": 69.824, "grad_norm": 0.18896961212158203, "learning_rate": 0.000289541893675747, "loss": 2.9981, "step": 436400 }, { "epoch": 69.84, "grad_norm": 0.18171896040439606, "learning_rate": 0.00028953951758070324, "loss": 3.0393, "step": 436500 }, { "epoch": 69.856, "grad_norm": 0.14681090414524078, "learning_rate": 0.00028953711748469935, "loss": 3.5522, "step": 436600 }, { "epoch": 69.872, "grad_norm": 0.17660827934741974, "learning_rate": 0.0002895347173886955, "loss": 2.9629, "step": 436700 }, { "epoch": 69.888, "grad_norm": 0.1661737561225891, "learning_rate": 0.0002895323172926917, "loss": 2.9277, "step": 436800 }, { "epoch": 69.904, "grad_norm": 0.15136516094207764, "learning_rate": 0.00028952991719668786, "loss": 3.0675, "step": 436900 }, { "epoch": 69.92, "grad_norm": 0.17573022842407227, "learning_rate": 0.00028952751710068403, "loss": 3.2295, "step": 437000 }, { "epoch": 69.936, "grad_norm": 0.18499794602394104, "learning_rate": 0.0002895251410056402, "loss": 3.636, "step": 437100 }, { "epoch": 69.952, "grad_norm": 0.15257014334201813, "learning_rate": 0.00028952274090963633, "loss": 3.5264, "step": 437200 }, { "epoch": 69.968, "grad_norm": 0.15885518491268158, "learning_rate": 0.0002895203408136325, "loss": 3.073, "step": 437300 }, { "epoch": 69.984, "grad_norm": 0.16496077179908752, "learning_rate": 0.00028951794071762867, "loss": 3.1511, "step": 437400 }, { "epoch": 70.0, "grad_norm": 0.19217410683631897, "learning_rate": 0.00028951554062162484, "loss": 3.5224, "step": 437500 }, { "epoch": 70.016, "grad_norm": 0.20848862826824188, "learning_rate": 0.000289513140525621, "loss": 3.033, "step": 437600 }, { "epoch": 70.032, "grad_norm": 0.16473689675331116, "learning_rate": 0.0002895107404296171, "loss": 3.448, "step": 437700 }, { "epoch": 70.048, "grad_norm": 0.2603306174278259, "learning_rate": 0.0002895083403336133, "loss": 3.2932, "step": 437800 }, { "epoch": 70.064, "grad_norm": 0.2066253274679184, "learning_rate": 0.00028950594023760946, "loss": 2.9853, "step": 437900 }, { "epoch": 70.08, "grad_norm": 0.17389659583568573, "learning_rate": 0.00028950354014160563, "loss": 3.4064, "step": 438000 }, { "epoch": 70.096, "grad_norm": 0.21632930636405945, "learning_rate": 0.0002895011400456018, "loss": 3.1405, "step": 438100 }, { "epoch": 70.112, "grad_norm": 0.1864621788263321, "learning_rate": 0.00028949873994959797, "loss": 3.174, "step": 438200 }, { "epoch": 70.128, "grad_norm": 0.18558798730373383, "learning_rate": 0.0002894963398535941, "loss": 3.3103, "step": 438300 }, { "epoch": 70.144, "grad_norm": 0.16501948237419128, "learning_rate": 0.00028949393975759026, "loss": 3.4386, "step": 438400 }, { "epoch": 70.16, "grad_norm": 0.15715397894382477, "learning_rate": 0.0002894915396615864, "loss": 2.9935, "step": 438500 }, { "epoch": 70.176, "grad_norm": 0.16916458308696747, "learning_rate": 0.0002894891395655826, "loss": 2.8977, "step": 438600 }, { "epoch": 70.192, "grad_norm": 0.18018291890621185, "learning_rate": 0.00028948673946957876, "loss": 3.4024, "step": 438700 }, { "epoch": 70.208, "grad_norm": 0.23395699262619019, "learning_rate": 0.0002894843393735749, "loss": 2.9219, "step": 438800 }, { "epoch": 70.224, "grad_norm": 0.13251520693302155, "learning_rate": 0.00028948193927757105, "loss": 3.015, "step": 438900 }, { "epoch": 70.24, "grad_norm": 0.20843835175037384, "learning_rate": 0.0002894795391815672, "loss": 3.2871, "step": 439000 }, { "epoch": 70.256, "grad_norm": 0.13735973834991455, "learning_rate": 0.0002894771390855634, "loss": 3.3741, "step": 439100 }, { "epoch": 70.272, "grad_norm": 0.16450630128383636, "learning_rate": 0.00028947473898955956, "loss": 3.0721, "step": 439200 }, { "epoch": 70.288, "grad_norm": 0.16014477610588074, "learning_rate": 0.0002894723388935557, "loss": 3.219, "step": 439300 }, { "epoch": 70.304, "grad_norm": 0.15617580711841583, "learning_rate": 0.0002894699387975519, "loss": 2.9745, "step": 439400 }, { "epoch": 70.32, "grad_norm": 0.1747596263885498, "learning_rate": 0.00028946753870154806, "loss": 3.1291, "step": 439500 }, { "epoch": 70.336, "grad_norm": 0.18420176208019257, "learning_rate": 0.00028946513860554423, "loss": 3.1321, "step": 439600 }, { "epoch": 70.352, "grad_norm": 0.20725403726100922, "learning_rate": 0.00028946273850954035, "loss": 3.0142, "step": 439700 }, { "epoch": 70.368, "grad_norm": 0.16519372165203094, "learning_rate": 0.0002894603384135365, "loss": 3.3263, "step": 439800 }, { "epoch": 70.384, "grad_norm": 0.17048390209674835, "learning_rate": 0.0002894579383175327, "loss": 2.9786, "step": 439900 }, { "epoch": 70.4, "grad_norm": 0.1758817434310913, "learning_rate": 0.00028945553822152885, "loss": 3.1476, "step": 440000 }, { "epoch": 70.416, "grad_norm": 0.19412407279014587, "learning_rate": 0.000289453138125525, "loss": 3.4381, "step": 440100 }, { "epoch": 70.432, "grad_norm": 0.1787838190793991, "learning_rate": 0.0002894507380295212, "loss": 3.3919, "step": 440200 }, { "epoch": 70.448, "grad_norm": 0.19283300638198853, "learning_rate": 0.0002894483379335173, "loss": 3.1531, "step": 440300 }, { "epoch": 70.464, "grad_norm": 0.20587024092674255, "learning_rate": 0.0002894459378375135, "loss": 3.2844, "step": 440400 }, { "epoch": 70.48, "grad_norm": 0.1669202744960785, "learning_rate": 0.00028944353774150965, "loss": 3.4127, "step": 440500 }, { "epoch": 70.496, "grad_norm": 0.15426042675971985, "learning_rate": 0.0002894411376455058, "loss": 3.2246, "step": 440600 }, { "epoch": 70.512, "grad_norm": 0.1521589756011963, "learning_rate": 0.000289438737549502, "loss": 3.3336, "step": 440700 }, { "epoch": 70.528, "grad_norm": 0.18205732107162476, "learning_rate": 0.0002894363374534981, "loss": 3.4318, "step": 440800 }, { "epoch": 70.544, "grad_norm": 0.2228144407272339, "learning_rate": 0.00028943393735749427, "loss": 3.1465, "step": 440900 }, { "epoch": 70.56, "grad_norm": 0.16021698713302612, "learning_rate": 0.00028943153726149044, "loss": 3.2512, "step": 441000 }, { "epoch": 70.576, "grad_norm": 0.17193831503391266, "learning_rate": 0.0002894291611664466, "loss": 3.3724, "step": 441100 }, { "epoch": 70.592, "grad_norm": 0.19629880785942078, "learning_rate": 0.0002894267610704428, "loss": 3.1332, "step": 441200 }, { "epoch": 70.608, "grad_norm": 0.15374675393104553, "learning_rate": 0.00028942436097443896, "loss": 3.1389, "step": 441300 }, { "epoch": 70.624, "grad_norm": 0.19270844757556915, "learning_rate": 0.0002894219608784351, "loss": 3.3992, "step": 441400 }, { "epoch": 70.64, "grad_norm": 0.2369609922170639, "learning_rate": 0.00028941956078243125, "loss": 3.1398, "step": 441500 }, { "epoch": 70.656, "grad_norm": 0.1909315288066864, "learning_rate": 0.0002894171606864274, "loss": 3.271, "step": 441600 }, { "epoch": 70.672, "grad_norm": 0.18760181963443756, "learning_rate": 0.0002894147605904236, "loss": 3.175, "step": 441700 }, { "epoch": 70.688, "grad_norm": 0.2074330449104309, "learning_rate": 0.00028941236049441976, "loss": 3.2477, "step": 441800 }, { "epoch": 70.704, "grad_norm": 0.20609621703624725, "learning_rate": 0.00028940996039841587, "loss": 3.0864, "step": 441900 }, { "epoch": 70.72, "grad_norm": 0.23718643188476562, "learning_rate": 0.00028940756030241204, "loss": 3.3179, "step": 442000 }, { "epoch": 70.736, "grad_norm": 0.16544261574745178, "learning_rate": 0.0002894051602064082, "loss": 3.182, "step": 442100 }, { "epoch": 70.752, "grad_norm": 0.16438299417495728, "learning_rate": 0.0002894027601104044, "loss": 3.1911, "step": 442200 }, { "epoch": 70.768, "grad_norm": 0.1899152249097824, "learning_rate": 0.00028940036001440055, "loss": 3.245, "step": 442300 }, { "epoch": 70.784, "grad_norm": 0.17458777129650116, "learning_rate": 0.0002893979599183967, "loss": 3.5369, "step": 442400 }, { "epoch": 70.8, "grad_norm": 0.1653827428817749, "learning_rate": 0.0002893955598223929, "loss": 3.2785, "step": 442500 }, { "epoch": 70.816, "grad_norm": 0.15743687748908997, "learning_rate": 0.00028939315972638906, "loss": 3.149, "step": 442600 }, { "epoch": 70.832, "grad_norm": 0.16887934505939484, "learning_rate": 0.0002893907596303852, "loss": 3.3105, "step": 442700 }, { "epoch": 70.848, "grad_norm": 0.14683187007904053, "learning_rate": 0.00028938835953438134, "loss": 3.167, "step": 442800 }, { "epoch": 70.864, "grad_norm": 0.1634693443775177, "learning_rate": 0.0002893859594383775, "loss": 3.1384, "step": 442900 }, { "epoch": 70.88, "grad_norm": 0.2079874575138092, "learning_rate": 0.0002893835593423737, "loss": 3.2673, "step": 443000 }, { "epoch": 70.896, "grad_norm": 0.1544930785894394, "learning_rate": 0.00028938115924636985, "loss": 2.9935, "step": 443100 }, { "epoch": 70.912, "grad_norm": 0.20376259088516235, "learning_rate": 0.000289378759150366, "loss": 3.4647, "step": 443200 }, { "epoch": 70.928, "grad_norm": 0.16860347986221313, "learning_rate": 0.0002893763590543622, "loss": 3.3845, "step": 443300 }, { "epoch": 70.944, "grad_norm": 0.18483613431453705, "learning_rate": 0.0002893739589583583, "loss": 3.2691, "step": 443400 }, { "epoch": 70.96, "grad_norm": 0.20026250183582306, "learning_rate": 0.00028937155886235447, "loss": 3.1599, "step": 443500 }, { "epoch": 70.976, "grad_norm": 0.15524977445602417, "learning_rate": 0.00028936915876635064, "loss": 3.1902, "step": 443600 }, { "epoch": 70.992, "grad_norm": 0.1960054337978363, "learning_rate": 0.0002893667586703468, "loss": 2.9457, "step": 443700 }, { "epoch": 71.008, "grad_norm": 0.18176890909671783, "learning_rate": 0.000289364358574343, "loss": 3.2823, "step": 443800 }, { "epoch": 71.024, "grad_norm": 0.16644245386123657, "learning_rate": 0.0002893619584783391, "loss": 3.4438, "step": 443900 }, { "epoch": 71.04, "grad_norm": 0.31776556372642517, "learning_rate": 0.00028935955838233526, "loss": 3.0368, "step": 444000 }, { "epoch": 71.056, "grad_norm": 0.1381322294473648, "learning_rate": 0.00028935715828633143, "loss": 3.2265, "step": 444100 }, { "epoch": 71.072, "grad_norm": 0.1733168214559555, "learning_rate": 0.0002893547581903276, "loss": 3.5906, "step": 444200 }, { "epoch": 71.088, "grad_norm": 0.1698007434606552, "learning_rate": 0.00028935235809432377, "loss": 3.2221, "step": 444300 }, { "epoch": 71.104, "grad_norm": 0.17059379816055298, "learning_rate": 0.00028934995799831994, "loss": 3.1711, "step": 444400 }, { "epoch": 71.12, "grad_norm": 0.15793649852275848, "learning_rate": 0.00028934755790231605, "loss": 3.1681, "step": 444500 }, { "epoch": 71.136, "grad_norm": 0.15613031387329102, "learning_rate": 0.0002893451578063122, "loss": 3.1363, "step": 444600 }, { "epoch": 71.152, "grad_norm": 0.1748955100774765, "learning_rate": 0.0002893427577103084, "loss": 3.2323, "step": 444700 }, { "epoch": 71.168, "grad_norm": 0.1581987738609314, "learning_rate": 0.00028934035761430456, "loss": 3.29, "step": 444800 }, { "epoch": 71.184, "grad_norm": 0.18031199276447296, "learning_rate": 0.00028933795751830073, "loss": 3.2672, "step": 444900 }, { "epoch": 71.2, "grad_norm": 0.17753353714942932, "learning_rate": 0.00028933555742229685, "loss": 3.1334, "step": 445000 }, { "epoch": 71.216, "grad_norm": 0.17647883296012878, "learning_rate": 0.00028933318132725303, "loss": 3.2619, "step": 445100 }, { "epoch": 71.232, "grad_norm": 0.14844240248203278, "learning_rate": 0.0002893307812312492, "loss": 3.3341, "step": 445200 }, { "epoch": 71.248, "grad_norm": 0.15686668455600739, "learning_rate": 0.00028932838113524537, "loss": 3.2476, "step": 445300 }, { "epoch": 71.264, "grad_norm": 0.20012395083904266, "learning_rate": 0.00028932598103924154, "loss": 3.3222, "step": 445400 }, { "epoch": 71.28, "grad_norm": 0.21968373656272888, "learning_rate": 0.0002893235809432377, "loss": 3.2059, "step": 445500 }, { "epoch": 71.296, "grad_norm": 0.1698504388332367, "learning_rate": 0.0002893211808472339, "loss": 3.2641, "step": 445600 }, { "epoch": 71.312, "grad_norm": 0.15901407599449158, "learning_rate": 0.00028931878075123005, "loss": 3.092, "step": 445700 }, { "epoch": 71.328, "grad_norm": 0.15464149415493011, "learning_rate": 0.0002893163806552262, "loss": 3.0453, "step": 445800 }, { "epoch": 71.344, "grad_norm": 0.23977750539779663, "learning_rate": 0.00028931398055922233, "loss": 3.3008, "step": 445900 }, { "epoch": 71.36, "grad_norm": 0.2044697403907776, "learning_rate": 0.0002893115804632185, "loss": 3.4216, "step": 446000 }, { "epoch": 71.376, "grad_norm": 0.18341383337974548, "learning_rate": 0.00028930918036721467, "loss": 3.1267, "step": 446100 }, { "epoch": 71.392, "grad_norm": 0.2781505882740021, "learning_rate": 0.00028930678027121084, "loss": 3.2276, "step": 446200 }, { "epoch": 71.408, "grad_norm": 0.22001801431179047, "learning_rate": 0.000289304380175207, "loss": 3.1083, "step": 446300 }, { "epoch": 71.424, "grad_norm": 0.1993340253829956, "learning_rate": 0.0002893019800792032, "loss": 3.4528, "step": 446400 }, { "epoch": 71.44, "grad_norm": 0.173289954662323, "learning_rate": 0.0002892995799831993, "loss": 3.383, "step": 446500 }, { "epoch": 71.456, "grad_norm": 0.24428610503673553, "learning_rate": 0.00028929717988719546, "loss": 3.343, "step": 446600 }, { "epoch": 71.472, "grad_norm": 0.14691118896007538, "learning_rate": 0.00028929477979119163, "loss": 3.1751, "step": 446700 }, { "epoch": 71.488, "grad_norm": 0.15431061387062073, "learning_rate": 0.0002892923796951878, "loss": 3.1115, "step": 446800 }, { "epoch": 71.504, "grad_norm": 0.21294987201690674, "learning_rate": 0.00028928997959918397, "loss": 3.0955, "step": 446900 }, { "epoch": 71.52, "grad_norm": 0.1920730173587799, "learning_rate": 0.0002892875795031801, "loss": 3.0498, "step": 447000 }, { "epoch": 71.536, "grad_norm": 0.17720144987106323, "learning_rate": 0.00028928517940717625, "loss": 3.307, "step": 447100 }, { "epoch": 71.552, "grad_norm": 0.177902951836586, "learning_rate": 0.0002892827793111724, "loss": 3.0189, "step": 447200 }, { "epoch": 71.568, "grad_norm": 0.3220179080963135, "learning_rate": 0.0002892803792151686, "loss": 2.9865, "step": 447300 }, { "epoch": 71.584, "grad_norm": 0.17597636580467224, "learning_rate": 0.00028927797911916476, "loss": 3.5427, "step": 447400 }, { "epoch": 71.6, "grad_norm": 0.17943957448005676, "learning_rate": 0.00028927557902316093, "loss": 3.4002, "step": 447500 }, { "epoch": 71.616, "grad_norm": 0.18409083783626556, "learning_rate": 0.00028927317892715705, "loss": 3.2165, "step": 447600 }, { "epoch": 71.632, "grad_norm": 0.16040459275245667, "learning_rate": 0.00028927080283211324, "loss": 3.5711, "step": 447700 }, { "epoch": 71.648, "grad_norm": 0.1698540896177292, "learning_rate": 0.0002892684027361094, "loss": 3.2132, "step": 447800 }, { "epoch": 71.664, "grad_norm": 0.14544951915740967, "learning_rate": 0.0002892660026401056, "loss": 3.0598, "step": 447900 }, { "epoch": 71.68, "grad_norm": 0.18815073370933533, "learning_rate": 0.00028926360254410174, "loss": 3.1498, "step": 448000 }, { "epoch": 71.696, "grad_norm": 0.16946826875209808, "learning_rate": 0.00028926120244809786, "loss": 3.1694, "step": 448100 }, { "epoch": 71.712, "grad_norm": 0.17721880972385406, "learning_rate": 0.000289258802352094, "loss": 3.2837, "step": 448200 }, { "epoch": 71.728, "grad_norm": 0.153748020529747, "learning_rate": 0.0002892564022560902, "loss": 3.2073, "step": 448300 }, { "epoch": 71.744, "grad_norm": 0.17390556633472443, "learning_rate": 0.00028925400216008637, "loss": 3.2802, "step": 448400 }, { "epoch": 71.76, "grad_norm": 0.20426709949970245, "learning_rate": 0.00028925160206408253, "loss": 3.2704, "step": 448500 }, { "epoch": 71.776, "grad_norm": 0.15583288669586182, "learning_rate": 0.0002892492019680787, "loss": 3.2304, "step": 448600 }, { "epoch": 71.792, "grad_norm": 0.15302972495555878, "learning_rate": 0.00028924680187207487, "loss": 3.2872, "step": 448700 }, { "epoch": 71.808, "grad_norm": 0.17233331501483917, "learning_rate": 0.00028924440177607104, "loss": 2.9262, "step": 448800 }, { "epoch": 71.824, "grad_norm": 0.15943768620491028, "learning_rate": 0.0002892420016800672, "loss": 3.1047, "step": 448900 }, { "epoch": 71.84, "grad_norm": 0.17479582130908966, "learning_rate": 0.0002892396015840633, "loss": 3.1332, "step": 449000 }, { "epoch": 71.856, "grad_norm": 0.14812318980693817, "learning_rate": 0.0002892372014880595, "loss": 3.3277, "step": 449100 }, { "epoch": 71.872, "grad_norm": 0.16242174804210663, "learning_rate": 0.00028923480139205566, "loss": 3.2288, "step": 449200 }, { "epoch": 71.888, "grad_norm": 0.1521635353565216, "learning_rate": 0.00028923240129605183, "loss": 3.0808, "step": 449300 }, { "epoch": 71.904, "grad_norm": 0.18427634239196777, "learning_rate": 0.000289230001200048, "loss": 3.3766, "step": 449400 }, { "epoch": 71.92, "grad_norm": 0.187370166182518, "learning_rate": 0.00028922760110404417, "loss": 3.4186, "step": 449500 }, { "epoch": 71.936, "grad_norm": 0.17574596405029297, "learning_rate": 0.0002892252010080403, "loss": 3.429, "step": 449600 }, { "epoch": 71.952, "grad_norm": 0.19248181581497192, "learning_rate": 0.00028922280091203646, "loss": 3.244, "step": 449700 }, { "epoch": 71.968, "grad_norm": 0.1707514226436615, "learning_rate": 0.0002892204008160326, "loss": 3.0772, "step": 449800 }, { "epoch": 71.984, "grad_norm": 0.16737103462219238, "learning_rate": 0.0002892180007200288, "loss": 3.03, "step": 449900 }, { "epoch": 72.0, "grad_norm": 0.15690504014492035, "learning_rate": 0.000289215624624985, "loss": 3.248, "step": 450000 }, { "epoch": 72.016, "grad_norm": 0.19089950621128082, "learning_rate": 0.0002892132245289811, "loss": 2.9906, "step": 450100 }, { "epoch": 72.032, "grad_norm": 0.17977645993232727, "learning_rate": 0.00028921082443297727, "loss": 3.0094, "step": 450200 }, { "epoch": 72.048, "grad_norm": 0.1755213588476181, "learning_rate": 0.00028920842433697344, "loss": 3.1757, "step": 450300 }, { "epoch": 72.064, "grad_norm": 0.24356718361377716, "learning_rate": 0.0002892060242409696, "loss": 3.0389, "step": 450400 }, { "epoch": 72.08, "grad_norm": 0.22811071574687958, "learning_rate": 0.0002892036241449658, "loss": 3.2376, "step": 450500 }, { "epoch": 72.096, "grad_norm": 0.1531262993812561, "learning_rate": 0.00028920122404896194, "loss": 3.2999, "step": 450600 }, { "epoch": 72.112, "grad_norm": 0.23869682848453522, "learning_rate": 0.00028919882395295806, "loss": 3.5788, "step": 450700 }, { "epoch": 72.128, "grad_norm": 0.21365249156951904, "learning_rate": 0.00028919642385695423, "loss": 3.2215, "step": 450800 }, { "epoch": 72.144, "grad_norm": 0.22282899916172028, "learning_rate": 0.0002891940237609504, "loss": 3.2918, "step": 450900 }, { "epoch": 72.16, "grad_norm": 0.17255432903766632, "learning_rate": 0.00028919162366494657, "loss": 3.115, "step": 451000 }, { "epoch": 72.176, "grad_norm": 0.19038493931293488, "learning_rate": 0.00028918922356894274, "loss": 3.103, "step": 451100 }, { "epoch": 72.192, "grad_norm": 0.1853111833333969, "learning_rate": 0.00028918682347293885, "loss": 3.0158, "step": 451200 }, { "epoch": 72.208, "grad_norm": 0.1742090880870819, "learning_rate": 0.000289184423376935, "loss": 3.2277, "step": 451300 }, { "epoch": 72.224, "grad_norm": 0.15177831053733826, "learning_rate": 0.0002891820232809312, "loss": 3.4023, "step": 451400 }, { "epoch": 72.24, "grad_norm": 0.18588292598724365, "learning_rate": 0.00028917962318492736, "loss": 3.0556, "step": 451500 }, { "epoch": 72.256, "grad_norm": 0.19483399391174316, "learning_rate": 0.00028917722308892353, "loss": 3.3666, "step": 451600 }, { "epoch": 72.272, "grad_norm": 0.16972720623016357, "learning_rate": 0.0002891748229929197, "loss": 3.3197, "step": 451700 }, { "epoch": 72.288, "grad_norm": 0.19139590859413147, "learning_rate": 0.00028917242289691587, "loss": 3.0645, "step": 451800 }, { "epoch": 72.304, "grad_norm": 0.21765542030334473, "learning_rate": 0.00028917002280091204, "loss": 3.4364, "step": 451900 }, { "epoch": 72.32, "grad_norm": 0.16429218649864197, "learning_rate": 0.0002891676227049082, "loss": 3.1932, "step": 452000 }, { "epoch": 72.336, "grad_norm": 0.16009396314620972, "learning_rate": 0.0002891652226089043, "loss": 3.3089, "step": 452100 }, { "epoch": 72.352, "grad_norm": 0.18952879309654236, "learning_rate": 0.0002891628225129005, "loss": 3.0075, "step": 452200 }, { "epoch": 72.368, "grad_norm": 0.1931355595588684, "learning_rate": 0.00028916042241689666, "loss": 3.1857, "step": 452300 }, { "epoch": 72.384, "grad_norm": 0.1811680942773819, "learning_rate": 0.0002891580223208928, "loss": 3.1435, "step": 452400 }, { "epoch": 72.4, "grad_norm": 0.19817516207695007, "learning_rate": 0.000289155622224889, "loss": 3.3221, "step": 452500 }, { "epoch": 72.416, "grad_norm": 0.2446821928024292, "learning_rate": 0.00028915322212888517, "loss": 3.0935, "step": 452600 }, { "epoch": 72.432, "grad_norm": 0.19353975355625153, "learning_rate": 0.0002891508460338413, "loss": 3.0895, "step": 452700 }, { "epoch": 72.448, "grad_norm": 0.15199607610702515, "learning_rate": 0.00028914844593783747, "loss": 3.2783, "step": 452800 }, { "epoch": 72.464, "grad_norm": 0.19124892354011536, "learning_rate": 0.00028914604584183364, "loss": 3.1816, "step": 452900 }, { "epoch": 72.48, "grad_norm": 0.19223283231258392, "learning_rate": 0.0002891436457458298, "loss": 3.2148, "step": 453000 }, { "epoch": 72.496, "grad_norm": 0.20413446426391602, "learning_rate": 0.000289141245649826, "loss": 2.8693, "step": 453100 }, { "epoch": 72.512, "grad_norm": 0.1571425199508667, "learning_rate": 0.00028913886955478216, "loss": 3.0738, "step": 453200 }, { "epoch": 72.528, "grad_norm": 0.1882653385400772, "learning_rate": 0.00028913646945877833, "loss": 3.3022, "step": 453300 }, { "epoch": 72.544, "grad_norm": 0.19114309549331665, "learning_rate": 0.0002891340693627745, "loss": 3.3999, "step": 453400 }, { "epoch": 72.56, "grad_norm": 0.1645190715789795, "learning_rate": 0.00028913166926677067, "loss": 3.0251, "step": 453500 }, { "epoch": 72.576, "grad_norm": 0.1666693240404129, "learning_rate": 0.0002891292691707668, "loss": 3.0889, "step": 453600 }, { "epoch": 72.592, "grad_norm": 0.2682633399963379, "learning_rate": 0.00028912686907476296, "loss": 3.3093, "step": 453700 }, { "epoch": 72.608, "grad_norm": 0.1782086193561554, "learning_rate": 0.0002891244689787591, "loss": 3.2233, "step": 453800 }, { "epoch": 72.624, "grad_norm": 0.16833864152431488, "learning_rate": 0.0002891220688827553, "loss": 3.1899, "step": 453900 }, { "epoch": 72.64, "grad_norm": 0.2170085459947586, "learning_rate": 0.00028911966878675146, "loss": 3.1862, "step": 454000 }, { "epoch": 72.656, "grad_norm": 0.1636306196451187, "learning_rate": 0.0002891172686907476, "loss": 3.0096, "step": 454100 }, { "epoch": 72.672, "grad_norm": 0.19049431383609772, "learning_rate": 0.00028911486859474375, "loss": 3.1726, "step": 454200 }, { "epoch": 72.688, "grad_norm": 0.1814054548740387, "learning_rate": 0.0002891124684987399, "loss": 2.9534, "step": 454300 }, { "epoch": 72.704, "grad_norm": 0.17133675515651703, "learning_rate": 0.0002891100684027361, "loss": 3.3311, "step": 454400 }, { "epoch": 72.72, "grad_norm": 0.15822678804397583, "learning_rate": 0.00028910766830673226, "loss": 2.9616, "step": 454500 }, { "epoch": 72.736, "grad_norm": 0.19994337856769562, "learning_rate": 0.0002891052682107284, "loss": 3.1435, "step": 454600 }, { "epoch": 72.752, "grad_norm": 0.293891966342926, "learning_rate": 0.00028910286811472454, "loss": 3.2182, "step": 454700 }, { "epoch": 72.768, "grad_norm": 0.15652576088905334, "learning_rate": 0.0002891004680187207, "loss": 3.196, "step": 454800 }, { "epoch": 72.784, "grad_norm": 0.21324369311332703, "learning_rate": 0.0002890980679227169, "loss": 3.2193, "step": 454900 }, { "epoch": 72.8, "grad_norm": 0.1824270486831665, "learning_rate": 0.00028909566782671305, "loss": 3.0328, "step": 455000 }, { "epoch": 72.816, "grad_norm": 0.1943599432706833, "learning_rate": 0.0002890932677307092, "loss": 3.5594, "step": 455100 }, { "epoch": 72.832, "grad_norm": 0.19573970139026642, "learning_rate": 0.00028909086763470533, "loss": 3.3026, "step": 455200 }, { "epoch": 72.848, "grad_norm": 0.20629402995109558, "learning_rate": 0.0002890884675387015, "loss": 3.0767, "step": 455300 }, { "epoch": 72.864, "grad_norm": 0.19048021733760834, "learning_rate": 0.00028908606744269767, "loss": 3.3033, "step": 455400 }, { "epoch": 72.88, "grad_norm": 0.15230311453342438, "learning_rate": 0.00028908366734669384, "loss": 3.371, "step": 455500 }, { "epoch": 72.896, "grad_norm": 0.18833421170711517, "learning_rate": 0.00028908126725069, "loss": 3.4189, "step": 455600 }, { "epoch": 72.912, "grad_norm": 0.18621788918972015, "learning_rate": 0.0002890788671546862, "loss": 3.3167, "step": 455700 }, { "epoch": 72.928, "grad_norm": 0.18521416187286377, "learning_rate": 0.0002890764670586823, "loss": 3.0077, "step": 455800 }, { "epoch": 72.944, "grad_norm": 0.16110347211360931, "learning_rate": 0.00028907406696267846, "loss": 2.9188, "step": 455900 }, { "epoch": 72.96, "grad_norm": 0.17979463934898376, "learning_rate": 0.0002890716908676347, "loss": 3.269, "step": 456000 }, { "epoch": 72.976, "grad_norm": 0.2207885980606079, "learning_rate": 0.0002890692907716308, "loss": 3.2341, "step": 456100 }, { "epoch": 72.992, "grad_norm": 0.1809971183538437, "learning_rate": 0.000289066890675627, "loss": 3.083, "step": 456200 }, { "epoch": 73.008, "grad_norm": 0.19880744814872742, "learning_rate": 0.00028906449057962316, "loss": 3.1554, "step": 456300 }, { "epoch": 73.024, "grad_norm": 0.21924132108688354, "learning_rate": 0.00028906209048361933, "loss": 3.2668, "step": 456400 }, { "epoch": 73.04, "grad_norm": 0.1715451031923294, "learning_rate": 0.0002890596903876155, "loss": 3.1031, "step": 456500 }, { "epoch": 73.056, "grad_norm": 0.1876036673784256, "learning_rate": 0.00028905729029161167, "loss": 3.352, "step": 456600 }, { "epoch": 73.072, "grad_norm": 0.19858501851558685, "learning_rate": 0.0002890548901956078, "loss": 2.9392, "step": 456700 }, { "epoch": 73.088, "grad_norm": 0.18968205153942108, "learning_rate": 0.00028905249009960395, "loss": 3.3626, "step": 456800 }, { "epoch": 73.104, "grad_norm": 0.1661369502544403, "learning_rate": 0.0002890500900036001, "loss": 3.1135, "step": 456900 }, { "epoch": 73.12, "grad_norm": 0.1884249448776245, "learning_rate": 0.0002890476899075963, "loss": 3.2408, "step": 457000 }, { "epoch": 73.136, "grad_norm": 0.17482635378837585, "learning_rate": 0.00028904528981159246, "loss": 3.2498, "step": 457100 }, { "epoch": 73.152, "grad_norm": 0.18069331347942352, "learning_rate": 0.00028904288971558857, "loss": 3.2644, "step": 457200 }, { "epoch": 73.168, "grad_norm": 0.20407070219516754, "learning_rate": 0.00028904048961958474, "loss": 3.1687, "step": 457300 }, { "epoch": 73.184, "grad_norm": 0.19597786664962769, "learning_rate": 0.0002890380895235809, "loss": 3.1782, "step": 457400 }, { "epoch": 73.2, "grad_norm": 0.24342136085033417, "learning_rate": 0.0002890356894275771, "loss": 2.9385, "step": 457500 }, { "epoch": 73.216, "grad_norm": 0.2318635731935501, "learning_rate": 0.00028903328933157325, "loss": 3.2354, "step": 457600 }, { "epoch": 73.232, "grad_norm": 0.2203083336353302, "learning_rate": 0.0002890308892355694, "loss": 3.4705, "step": 457700 }, { "epoch": 73.248, "grad_norm": 0.20941860973834991, "learning_rate": 0.00028902848913956553, "loss": 3.2757, "step": 457800 }, { "epoch": 73.264, "grad_norm": 0.184433713555336, "learning_rate": 0.0002890260890435617, "loss": 3.1405, "step": 457900 }, { "epoch": 73.28, "grad_norm": 0.1705232858657837, "learning_rate": 0.00028902368894755787, "loss": 3.4205, "step": 458000 }, { "epoch": 73.296, "grad_norm": 0.1932794451713562, "learning_rate": 0.00028902128885155404, "loss": 3.314, "step": 458100 }, { "epoch": 73.312, "grad_norm": 0.17786002159118652, "learning_rate": 0.0002890188887555502, "loss": 3.1989, "step": 458200 }, { "epoch": 73.328, "grad_norm": 0.18342936038970947, "learning_rate": 0.0002890164886595464, "loss": 3.0544, "step": 458300 }, { "epoch": 73.344, "grad_norm": 0.2074287384748459, "learning_rate": 0.0002890140885635425, "loss": 3.0364, "step": 458400 }, { "epoch": 73.36, "grad_norm": 0.20234628021717072, "learning_rate": 0.00028901171246849874, "loss": 3.234, "step": 458500 }, { "epoch": 73.376, "grad_norm": 0.15614551305770874, "learning_rate": 0.0002890093123724949, "loss": 3.1991, "step": 458600 }, { "epoch": 73.392, "grad_norm": 0.18375664949417114, "learning_rate": 0.000289006912276491, "loss": 3.4213, "step": 458700 }, { "epoch": 73.408, "grad_norm": 0.22218665480613708, "learning_rate": 0.0002890045121804872, "loss": 3.3021, "step": 458800 }, { "epoch": 73.424, "grad_norm": 0.21941842138767242, "learning_rate": 0.00028900211208448336, "loss": 3.1158, "step": 458900 }, { "epoch": 73.44, "grad_norm": 0.1931462436914444, "learning_rate": 0.00028899971198847953, "loss": 3.4578, "step": 459000 }, { "epoch": 73.456, "grad_norm": 0.1682085245847702, "learning_rate": 0.0002889973118924757, "loss": 3.489, "step": 459100 }, { "epoch": 73.472, "grad_norm": 0.1731686145067215, "learning_rate": 0.0002889949117964718, "loss": 3.1933, "step": 459200 }, { "epoch": 73.488, "grad_norm": 0.15209980309009552, "learning_rate": 0.000288992511700468, "loss": 3.1317, "step": 459300 }, { "epoch": 73.504, "grad_norm": 0.21587279438972473, "learning_rate": 0.00028899011160446415, "loss": 3.0099, "step": 459400 }, { "epoch": 73.52, "grad_norm": 0.19031621515750885, "learning_rate": 0.0002889877115084603, "loss": 3.2392, "step": 459500 }, { "epoch": 73.536, "grad_norm": 0.17445743083953857, "learning_rate": 0.0002889853114124565, "loss": 3.1653, "step": 459600 }, { "epoch": 73.552, "grad_norm": 0.1677224487066269, "learning_rate": 0.00028898291131645266, "loss": 3.3576, "step": 459700 }, { "epoch": 73.568, "grad_norm": 0.2168903350830078, "learning_rate": 0.0002889805112204488, "loss": 3.2812, "step": 459800 }, { "epoch": 73.584, "grad_norm": 0.18536674976348877, "learning_rate": 0.00028897811112444494, "loss": 3.2647, "step": 459900 }, { "epoch": 73.6, "grad_norm": 0.15385323762893677, "learning_rate": 0.0002889757110284411, "loss": 3.2823, "step": 460000 }, { "epoch": 73.616, "grad_norm": 0.18583352863788605, "learning_rate": 0.0002889733109324373, "loss": 3.3666, "step": 460100 }, { "epoch": 73.632, "grad_norm": 0.18316872417926788, "learning_rate": 0.00028897091083643345, "loss": 3.4004, "step": 460200 }, { "epoch": 73.648, "grad_norm": 0.21025300025939941, "learning_rate": 0.0002889685107404296, "loss": 3.4221, "step": 460300 }, { "epoch": 73.664, "grad_norm": 0.170668825507164, "learning_rate": 0.00028896611064442573, "loss": 3.0724, "step": 460400 }, { "epoch": 73.68, "grad_norm": 0.17161548137664795, "learning_rate": 0.0002889637105484219, "loss": 3.0502, "step": 460500 }, { "epoch": 73.696, "grad_norm": 0.16564396023750305, "learning_rate": 0.0002889613104524181, "loss": 3.3913, "step": 460600 }, { "epoch": 73.712, "grad_norm": 0.19530817866325378, "learning_rate": 0.00028895891035641424, "loss": 3.0389, "step": 460700 }, { "epoch": 73.728, "grad_norm": 0.22263728082180023, "learning_rate": 0.0002889565102604104, "loss": 3.1162, "step": 460800 }, { "epoch": 73.744, "grad_norm": 0.15226471424102783, "learning_rate": 0.0002889541101644065, "loss": 3.2116, "step": 460900 }, { "epoch": 73.76, "grad_norm": 0.1757274568080902, "learning_rate": 0.0002889517100684027, "loss": 3.3234, "step": 461000 }, { "epoch": 73.776, "grad_norm": 0.19014757871627808, "learning_rate": 0.00028894930997239886, "loss": 3.3394, "step": 461100 }, { "epoch": 73.792, "grad_norm": 0.1708543300628662, "learning_rate": 0.00028894690987639503, "loss": 3.3128, "step": 461200 }, { "epoch": 73.808, "grad_norm": 0.18081998825073242, "learning_rate": 0.0002889445337813512, "loss": 3.2055, "step": 461300 }, { "epoch": 73.824, "grad_norm": 0.22742171585559845, "learning_rate": 0.0002889421336853474, "loss": 3.2464, "step": 461400 }, { "epoch": 73.84, "grad_norm": 0.2704772651195526, "learning_rate": 0.00028893973358934356, "loss": 3.1131, "step": 461500 }, { "epoch": 73.856, "grad_norm": 0.1937403529882431, "learning_rate": 0.00028893733349333973, "loss": 3.2557, "step": 461600 }, { "epoch": 73.872, "grad_norm": 0.18519380688667297, "learning_rate": 0.0002889349333973359, "loss": 3.2944, "step": 461700 }, { "epoch": 73.888, "grad_norm": 0.17127548158168793, "learning_rate": 0.000288932533301332, "loss": 3.0354, "step": 461800 }, { "epoch": 73.904, "grad_norm": 0.17185784876346588, "learning_rate": 0.0002889301332053282, "loss": 3.1055, "step": 461900 }, { "epoch": 73.92, "grad_norm": 0.18004480004310608, "learning_rate": 0.00028892773310932435, "loss": 3.2689, "step": 462000 }, { "epoch": 73.936, "grad_norm": 0.21601063013076782, "learning_rate": 0.0002889253330133205, "loss": 3.183, "step": 462100 }, { "epoch": 73.952, "grad_norm": 0.22601087391376495, "learning_rate": 0.0002889229329173167, "loss": 3.1524, "step": 462200 }, { "epoch": 73.968, "grad_norm": 0.20585574209690094, "learning_rate": 0.00028892053282131286, "loss": 2.9969, "step": 462300 }, { "epoch": 73.984, "grad_norm": 0.17452140152454376, "learning_rate": 0.000288918132725309, "loss": 3.4012, "step": 462400 }, { "epoch": 74.0, "grad_norm": 0.18342724442481995, "learning_rate": 0.00028891573262930514, "loss": 3.0731, "step": 462500 }, { "epoch": 74.016, "grad_norm": 0.18427571654319763, "learning_rate": 0.0002889133325333013, "loss": 2.881, "step": 462600 }, { "epoch": 74.032, "grad_norm": 0.18638572096824646, "learning_rate": 0.0002889109324372975, "loss": 3.1627, "step": 462700 }, { "epoch": 74.048, "grad_norm": 0.17125241458415985, "learning_rate": 0.00028890853234129365, "loss": 3.0892, "step": 462800 }, { "epoch": 74.064, "grad_norm": 0.18404452502727509, "learning_rate": 0.00028890613224528977, "loss": 3.1108, "step": 462900 }, { "epoch": 74.08, "grad_norm": 0.20668601989746094, "learning_rate": 0.00028890373214928594, "loss": 3.2185, "step": 463000 }, { "epoch": 74.096, "grad_norm": 0.21079257130622864, "learning_rate": 0.0002889013320532821, "loss": 2.9978, "step": 463100 }, { "epoch": 74.112, "grad_norm": 0.4275696873664856, "learning_rate": 0.0002888989319572783, "loss": 3.52, "step": 463200 }, { "epoch": 74.128, "grad_norm": 0.18801356852054596, "learning_rate": 0.00028889653186127444, "loss": 3.2637, "step": 463300 }, { "epoch": 74.144, "grad_norm": 0.16097815334796906, "learning_rate": 0.0002888941317652706, "loss": 3.1833, "step": 463400 }, { "epoch": 74.16, "grad_norm": 0.21692661941051483, "learning_rate": 0.00028889173166926673, "loss": 3.5894, "step": 463500 }, { "epoch": 74.176, "grad_norm": 0.1616773158311844, "learning_rate": 0.0002888893315732629, "loss": 3.0897, "step": 463600 }, { "epoch": 74.192, "grad_norm": 0.21127666532993317, "learning_rate": 0.00028888693147725907, "loss": 3.1415, "step": 463700 }, { "epoch": 74.208, "grad_norm": 0.19975614547729492, "learning_rate": 0.00028888453138125524, "loss": 3.3399, "step": 463800 }, { "epoch": 74.224, "grad_norm": 0.187686026096344, "learning_rate": 0.0002888821552862114, "loss": 3.45, "step": 463900 }, { "epoch": 74.24, "grad_norm": 0.16505296528339386, "learning_rate": 0.0002888797551902076, "loss": 3.5185, "step": 464000 }, { "epoch": 74.256, "grad_norm": 0.19926078617572784, "learning_rate": 0.00028887735509420376, "loss": 3.1411, "step": 464100 }, { "epoch": 74.272, "grad_norm": 0.18490496277809143, "learning_rate": 0.00028887495499819993, "loss": 3.3636, "step": 464200 }, { "epoch": 74.288, "grad_norm": 0.20611432194709778, "learning_rate": 0.0002888725549021961, "loss": 3.1199, "step": 464300 }, { "epoch": 74.304, "grad_norm": 0.18325108289718628, "learning_rate": 0.0002888701548061922, "loss": 3.3158, "step": 464400 }, { "epoch": 74.32, "grad_norm": 0.2810143530368805, "learning_rate": 0.0002888677547101884, "loss": 3.3792, "step": 464500 }, { "epoch": 74.336, "grad_norm": 0.18470898270606995, "learning_rate": 0.00028886535461418455, "loss": 3.2707, "step": 464600 }, { "epoch": 74.352, "grad_norm": 0.19659152626991272, "learning_rate": 0.0002888629545181807, "loss": 3.3425, "step": 464700 }, { "epoch": 74.368, "grad_norm": 0.1849854439496994, "learning_rate": 0.0002888605544221769, "loss": 3.3384, "step": 464800 }, { "epoch": 74.384, "grad_norm": 0.16159076988697052, "learning_rate": 0.000288858154326173, "loss": 3.2585, "step": 464900 }, { "epoch": 74.4, "grad_norm": 0.24840687215328217, "learning_rate": 0.0002888557542301692, "loss": 3.4716, "step": 465000 }, { "epoch": 74.416, "grad_norm": 0.15893152356147766, "learning_rate": 0.00028885335413416535, "loss": 3.4904, "step": 465100 }, { "epoch": 74.432, "grad_norm": 0.20983876287937164, "learning_rate": 0.0002888509540381615, "loss": 3.6084, "step": 465200 }, { "epoch": 74.448, "grad_norm": 0.1706210970878601, "learning_rate": 0.0002888485539421577, "loss": 3.4179, "step": 465300 }, { "epoch": 74.464, "grad_norm": 0.2555503845214844, "learning_rate": 0.00028884615384615385, "loss": 3.2861, "step": 465400 }, { "epoch": 74.48, "grad_norm": 0.17568440735340118, "learning_rate": 0.00028884375375014997, "loss": 3.2165, "step": 465500 }, { "epoch": 74.496, "grad_norm": 0.18972322344779968, "learning_rate": 0.00028884135365414614, "loss": 3.0032, "step": 465600 }, { "epoch": 74.512, "grad_norm": 0.15899668633937836, "learning_rate": 0.0002888389535581423, "loss": 3.3086, "step": 465700 }, { "epoch": 74.528, "grad_norm": 0.17366304993629456, "learning_rate": 0.0002888365534621385, "loss": 3.2932, "step": 465800 }, { "epoch": 74.544, "grad_norm": 0.1895371526479721, "learning_rate": 0.00028883415336613464, "loss": 3.3906, "step": 465900 }, { "epoch": 74.56, "grad_norm": 0.3951095640659332, "learning_rate": 0.00028883175327013076, "loss": 3.1172, "step": 466000 }, { "epoch": 74.576, "grad_norm": 0.17255236208438873, "learning_rate": 0.00028882935317412693, "loss": 3.2701, "step": 466100 }, { "epoch": 74.592, "grad_norm": 0.20173990726470947, "learning_rate": 0.0002888269530781231, "loss": 3.3643, "step": 466200 }, { "epoch": 74.608, "grad_norm": 0.1874805986881256, "learning_rate": 0.00028882455298211927, "loss": 3.3985, "step": 466300 }, { "epoch": 74.624, "grad_norm": 0.18507951498031616, "learning_rate": 0.00028882215288611544, "loss": 3.3008, "step": 466400 }, { "epoch": 74.64, "grad_norm": 0.16647233068943024, "learning_rate": 0.0002888197527901116, "loss": 3.3859, "step": 466500 }, { "epoch": 74.656, "grad_norm": 0.18528230488300323, "learning_rate": 0.0002888173526941077, "loss": 3.4239, "step": 466600 }, { "epoch": 74.672, "grad_norm": 0.15344677865505219, "learning_rate": 0.0002888149525981039, "loss": 3.2066, "step": 466700 }, { "epoch": 74.688, "grad_norm": 0.15510307252407074, "learning_rate": 0.00028881255250210006, "loss": 3.0518, "step": 466800 }, { "epoch": 74.704, "grad_norm": 0.1649552881717682, "learning_rate": 0.00028881015240609623, "loss": 3.4638, "step": 466900 }, { "epoch": 74.72, "grad_norm": 0.22515319287776947, "learning_rate": 0.0002888077523100924, "loss": 3.2784, "step": 467000 }, { "epoch": 74.736, "grad_norm": 0.18832296133041382, "learning_rate": 0.0002888053522140885, "loss": 3.4563, "step": 467100 }, { "epoch": 74.752, "grad_norm": 0.19387690722942352, "learning_rate": 0.0002888029521180847, "loss": 3.5436, "step": 467200 }, { "epoch": 74.768, "grad_norm": 0.178435817360878, "learning_rate": 0.00028880055202208085, "loss": 2.8516, "step": 467300 }, { "epoch": 74.784, "grad_norm": 0.17215795814990997, "learning_rate": 0.000288798151926077, "loss": 3.4508, "step": 467400 }, { "epoch": 74.8, "grad_norm": 0.1482001394033432, "learning_rate": 0.0002887957518300732, "loss": 3.4038, "step": 467500 }, { "epoch": 74.816, "grad_norm": 0.16948302090168, "learning_rate": 0.00028879335173406936, "loss": 3.1987, "step": 467600 }, { "epoch": 74.832, "grad_norm": 0.17299839854240417, "learning_rate": 0.0002887909516380655, "loss": 3.2943, "step": 467700 }, { "epoch": 74.848, "grad_norm": 0.17209362983703613, "learning_rate": 0.00028878855154206164, "loss": 3.092, "step": 467800 }, { "epoch": 74.864, "grad_norm": 0.16499897837638855, "learning_rate": 0.0002887861514460578, "loss": 3.205, "step": 467900 }, { "epoch": 74.88, "grad_norm": 0.15456819534301758, "learning_rate": 0.000288783775351014, "loss": 3.2445, "step": 468000 }, { "epoch": 74.896, "grad_norm": 0.17871209979057312, "learning_rate": 0.00028878137525501017, "loss": 3.218, "step": 468100 }, { "epoch": 74.912, "grad_norm": 0.23323661088943481, "learning_rate": 0.00028877897515900634, "loss": 3.2689, "step": 468200 }, { "epoch": 74.928, "grad_norm": 0.15584339201450348, "learning_rate": 0.0002887765750630025, "loss": 3.1674, "step": 468300 }, { "epoch": 74.944, "grad_norm": 0.19970950484275818, "learning_rate": 0.0002887741749669987, "loss": 3.1795, "step": 468400 }, { "epoch": 74.96, "grad_norm": 0.22006675601005554, "learning_rate": 0.00028877177487099485, "loss": 3.3055, "step": 468500 }, { "epoch": 74.976, "grad_norm": 0.1716865748167038, "learning_rate": 0.00028876937477499096, "loss": 3.3104, "step": 468600 }, { "epoch": 74.992, "grad_norm": 0.16997221112251282, "learning_rate": 0.00028876697467898713, "loss": 3.0269, "step": 468700 }, { "epoch": 75.008, "grad_norm": 0.23156268894672394, "learning_rate": 0.0002887645745829833, "loss": 3.3231, "step": 468800 }, { "epoch": 75.024, "grad_norm": 0.21968360245227814, "learning_rate": 0.00028876217448697947, "loss": 3.0927, "step": 468900 }, { "epoch": 75.04, "grad_norm": 0.19005636870861053, "learning_rate": 0.00028875977439097564, "loss": 3.0187, "step": 469000 }, { "epoch": 75.056, "grad_norm": 0.21780367195606232, "learning_rate": 0.00028875737429497175, "loss": 3.4077, "step": 469100 }, { "epoch": 75.072, "grad_norm": 0.1845608800649643, "learning_rate": 0.0002887549741989679, "loss": 3.3877, "step": 469200 }, { "epoch": 75.088, "grad_norm": 0.30160391330718994, "learning_rate": 0.0002887525741029641, "loss": 3.0667, "step": 469300 }, { "epoch": 75.104, "grad_norm": 0.21253061294555664, "learning_rate": 0.00028875019800792033, "loss": 3.302, "step": 469400 }, { "epoch": 75.12, "grad_norm": 0.2639717757701874, "learning_rate": 0.00028874779791191645, "loss": 3.4732, "step": 469500 }, { "epoch": 75.136, "grad_norm": 0.16577164828777313, "learning_rate": 0.0002887453978159126, "loss": 3.4282, "step": 469600 }, { "epoch": 75.152, "grad_norm": 0.19703346490859985, "learning_rate": 0.0002887429977199088, "loss": 3.2459, "step": 469700 }, { "epoch": 75.168, "grad_norm": 0.18278123438358307, "learning_rate": 0.00028874059762390496, "loss": 3.0809, "step": 469800 }, { "epoch": 75.184, "grad_norm": 0.16061915457248688, "learning_rate": 0.0002887381975279011, "loss": 3.3501, "step": 469900 }, { "epoch": 75.2, "grad_norm": 0.17930340766906738, "learning_rate": 0.00028873579743189724, "loss": 3.1599, "step": 470000 }, { "epoch": 75.216, "grad_norm": 0.21522049605846405, "learning_rate": 0.0002887333973358934, "loss": 3.1048, "step": 470100 }, { "epoch": 75.232, "grad_norm": 0.17156116664409637, "learning_rate": 0.0002887309972398896, "loss": 3.3351, "step": 470200 }, { "epoch": 75.248, "grad_norm": 0.16258321702480316, "learning_rate": 0.00028872859714388575, "loss": 3.3144, "step": 470300 }, { "epoch": 75.264, "grad_norm": 0.15201978385448456, "learning_rate": 0.0002887261970478819, "loss": 3.303, "step": 470400 }, { "epoch": 75.28, "grad_norm": 0.19243723154067993, "learning_rate": 0.0002887237969518781, "loss": 3.1479, "step": 470500 }, { "epoch": 75.296, "grad_norm": 0.14120513200759888, "learning_rate": 0.0002887213968558742, "loss": 3.3283, "step": 470600 }, { "epoch": 75.312, "grad_norm": 0.21235162019729614, "learning_rate": 0.00028871899675987037, "loss": 3.3584, "step": 470700 }, { "epoch": 75.328, "grad_norm": 0.22147315740585327, "learning_rate": 0.00028871659666386654, "loss": 3.1143, "step": 470800 }, { "epoch": 75.344, "grad_norm": 0.23409822583198547, "learning_rate": 0.0002887141965678627, "loss": 3.1966, "step": 470900 }, { "epoch": 75.36, "grad_norm": 0.19430138170719147, "learning_rate": 0.0002887117964718589, "loss": 3.0991, "step": 471000 }, { "epoch": 75.376, "grad_norm": 0.1848958432674408, "learning_rate": 0.000288709396375855, "loss": 3.1895, "step": 471100 }, { "epoch": 75.392, "grad_norm": 0.18815754354000092, "learning_rate": 0.00028870699627985116, "loss": 3.3257, "step": 471200 }, { "epoch": 75.408, "grad_norm": 0.24557410180568695, "learning_rate": 0.00028870459618384733, "loss": 3.4322, "step": 471300 }, { "epoch": 75.424, "grad_norm": 0.1849823296070099, "learning_rate": 0.0002887021960878435, "loss": 3.59, "step": 471400 }, { "epoch": 75.44, "grad_norm": 0.17285886406898499, "learning_rate": 0.00028869979599183967, "loss": 3.2893, "step": 471500 }, { "epoch": 75.456, "grad_norm": 0.21323136985301971, "learning_rate": 0.00028869739589583584, "loss": 3.4539, "step": 471600 }, { "epoch": 75.472, "grad_norm": 0.17686007916927338, "learning_rate": 0.00028869499579983195, "loss": 3.0399, "step": 471700 }, { "epoch": 75.488, "grad_norm": 0.18322378396987915, "learning_rate": 0.0002886925957038281, "loss": 3.2185, "step": 471800 }, { "epoch": 75.504, "grad_norm": 0.19532054662704468, "learning_rate": 0.0002886901956078243, "loss": 3.3328, "step": 471900 }, { "epoch": 75.52, "grad_norm": 0.19691942632198334, "learning_rate": 0.00028868779551182046, "loss": 3.3934, "step": 472000 }, { "epoch": 75.536, "grad_norm": 0.2564931809902191, "learning_rate": 0.00028868539541581663, "loss": 3.4476, "step": 472100 }, { "epoch": 75.552, "grad_norm": 0.20029647648334503, "learning_rate": 0.00028868299531981275, "loss": 3.4082, "step": 472200 }, { "epoch": 75.568, "grad_norm": 0.17391833662986755, "learning_rate": 0.0002886805952238089, "loss": 3.3419, "step": 472300 }, { "epoch": 75.584, "grad_norm": 0.1912335604429245, "learning_rate": 0.0002886781951278051, "loss": 3.0852, "step": 472400 }, { "epoch": 75.6, "grad_norm": 0.17416317760944366, "learning_rate": 0.00028867579503180125, "loss": 3.2133, "step": 472500 }, { "epoch": 75.616, "grad_norm": 0.1852428913116455, "learning_rate": 0.0002886733949357974, "loss": 3.1425, "step": 472600 }, { "epoch": 75.632, "grad_norm": 0.1998564898967743, "learning_rate": 0.0002886709948397936, "loss": 3.5592, "step": 472700 }, { "epoch": 75.648, "grad_norm": 0.1678033173084259, "learning_rate": 0.0002886685947437897, "loss": 3.3626, "step": 472800 }, { "epoch": 75.664, "grad_norm": 0.21342706680297852, "learning_rate": 0.0002886661946477859, "loss": 3.3495, "step": 472900 }, { "epoch": 75.68, "grad_norm": 0.1705610156059265, "learning_rate": 0.00028866379455178205, "loss": 3.1125, "step": 473000 }, { "epoch": 75.696, "grad_norm": 0.20010249316692352, "learning_rate": 0.0002886613944557782, "loss": 3.3428, "step": 473100 }, { "epoch": 75.712, "grad_norm": 0.18478697538375854, "learning_rate": 0.0002886589943597744, "loss": 3.224, "step": 473200 }, { "epoch": 75.728, "grad_norm": 0.18213221430778503, "learning_rate": 0.0002886565942637705, "loss": 2.9566, "step": 473300 }, { "epoch": 75.744, "grad_norm": 0.17587748169898987, "learning_rate": 0.00028865419416776667, "loss": 3.3451, "step": 473400 }, { "epoch": 75.76, "grad_norm": 0.16022825241088867, "learning_rate": 0.00028865179407176284, "loss": 3.3619, "step": 473500 }, { "epoch": 75.776, "grad_norm": 0.16855917870998383, "learning_rate": 0.000288649393975759, "loss": 3.0243, "step": 473600 }, { "epoch": 75.792, "grad_norm": 0.16260436177253723, "learning_rate": 0.0002886469938797552, "loss": 3.1787, "step": 473700 }, { "epoch": 75.808, "grad_norm": 0.16150471568107605, "learning_rate": 0.00028864459378375134, "loss": 3.4308, "step": 473800 }, { "epoch": 75.824, "grad_norm": 0.19909095764160156, "learning_rate": 0.00028864219368774746, "loss": 3.117, "step": 473900 }, { "epoch": 75.84, "grad_norm": 0.16909876465797424, "learning_rate": 0.00028863979359174363, "loss": 3.1092, "step": 474000 }, { "epoch": 75.856, "grad_norm": 0.17448857426643372, "learning_rate": 0.0002886373934957398, "loss": 3.2014, "step": 474100 }, { "epoch": 75.872, "grad_norm": 0.17824389040470123, "learning_rate": 0.00028863499339973597, "loss": 3.1981, "step": 474200 }, { "epoch": 75.888, "grad_norm": 0.1820748746395111, "learning_rate": 0.00028863259330373214, "loss": 3.3634, "step": 474300 }, { "epoch": 75.904, "grad_norm": 0.16049763560295105, "learning_rate": 0.0002886302172086883, "loss": 3.3669, "step": 474400 }, { "epoch": 75.92, "grad_norm": 0.170709490776062, "learning_rate": 0.0002886278171126845, "loss": 3.0767, "step": 474500 }, { "epoch": 75.936, "grad_norm": 0.22051769495010376, "learning_rate": 0.0002886254410176407, "loss": 3.1427, "step": 474600 }, { "epoch": 75.952, "grad_norm": 0.17870846390724182, "learning_rate": 0.00028862304092163685, "loss": 3.3677, "step": 474700 }, { "epoch": 75.968, "grad_norm": 0.17632703483104706, "learning_rate": 0.00028862064082563297, "loss": 2.9031, "step": 474800 }, { "epoch": 75.984, "grad_norm": 0.22968852519989014, "learning_rate": 0.00028861824072962914, "loss": 2.9458, "step": 474900 }, { "epoch": 76.0, "grad_norm": 0.16667436063289642, "learning_rate": 0.0002886158406336253, "loss": 3.2074, "step": 475000 }, { "epoch": 76.016, "grad_norm": 0.2084140181541443, "learning_rate": 0.0002886134405376215, "loss": 3.3815, "step": 475100 }, { "epoch": 76.032, "grad_norm": 0.17995944619178772, "learning_rate": 0.00028861104044161764, "loss": 3.0258, "step": 475200 }, { "epoch": 76.048, "grad_norm": 0.20323926210403442, "learning_rate": 0.00028860864034561376, "loss": 3.4067, "step": 475300 }, { "epoch": 76.064, "grad_norm": 0.1671806424856186, "learning_rate": 0.00028860624024960993, "loss": 3.045, "step": 475400 }, { "epoch": 76.08, "grad_norm": 0.1831134408712387, "learning_rate": 0.0002886038401536061, "loss": 3.1461, "step": 475500 }, { "epoch": 76.096, "grad_norm": 0.1895858496427536, "learning_rate": 0.00028860144005760227, "loss": 3.3512, "step": 475600 }, { "epoch": 76.112, "grad_norm": 0.2748790383338928, "learning_rate": 0.00028859903996159844, "loss": 3.3026, "step": 475700 }, { "epoch": 76.128, "grad_norm": 0.1747456043958664, "learning_rate": 0.0002885966398655946, "loss": 3.5138, "step": 475800 }, { "epoch": 76.144, "grad_norm": 0.17267610132694244, "learning_rate": 0.0002885942397695908, "loss": 3.161, "step": 475900 }, { "epoch": 76.16, "grad_norm": 0.15348294377326965, "learning_rate": 0.00028859183967358694, "loss": 3.269, "step": 476000 }, { "epoch": 76.176, "grad_norm": 0.1753510981798172, "learning_rate": 0.0002885894395775831, "loss": 3.2201, "step": 476100 }, { "epoch": 76.192, "grad_norm": 0.18030767142772675, "learning_rate": 0.0002885870394815792, "loss": 3.5229, "step": 476200 }, { "epoch": 76.208, "grad_norm": 0.20827557146549225, "learning_rate": 0.0002885846393855754, "loss": 3.3336, "step": 476300 }, { "epoch": 76.224, "grad_norm": 0.18274344503879547, "learning_rate": 0.00028858223928957157, "loss": 3.3093, "step": 476400 }, { "epoch": 76.24, "grad_norm": 0.1735648363828659, "learning_rate": 0.00028857983919356773, "loss": 3.093, "step": 476500 }, { "epoch": 76.256, "grad_norm": 0.16284644603729248, "learning_rate": 0.0002885774390975639, "loss": 3.5689, "step": 476600 }, { "epoch": 76.272, "grad_norm": 0.22621631622314453, "learning_rate": 0.0002885750390015601, "loss": 3.1448, "step": 476700 }, { "epoch": 76.288, "grad_norm": 0.17902132868766785, "learning_rate": 0.0002885726389055562, "loss": 3.0953, "step": 476800 }, { "epoch": 76.304, "grad_norm": 0.15414053201675415, "learning_rate": 0.00028857023880955236, "loss": 3.2679, "step": 476900 }, { "epoch": 76.32, "grad_norm": 0.18799768388271332, "learning_rate": 0.0002885678387135485, "loss": 3.2766, "step": 477000 }, { "epoch": 76.336, "grad_norm": 0.20773567259311676, "learning_rate": 0.0002885654386175447, "loss": 3.3568, "step": 477100 }, { "epoch": 76.352, "grad_norm": 0.1889359951019287, "learning_rate": 0.00028856303852154086, "loss": 3.399, "step": 477200 }, { "epoch": 76.368, "grad_norm": 0.1734032779932022, "learning_rate": 0.000288560638425537, "loss": 3.067, "step": 477300 }, { "epoch": 76.384, "grad_norm": 0.1789797693490982, "learning_rate": 0.00028855823832953315, "loss": 3.7071, "step": 477400 }, { "epoch": 76.4, "grad_norm": 0.18260197341442108, "learning_rate": 0.0002885558382335293, "loss": 3.424, "step": 477500 }, { "epoch": 76.416, "grad_norm": 0.24848273396492004, "learning_rate": 0.0002885534381375255, "loss": 3.1735, "step": 477600 }, { "epoch": 76.432, "grad_norm": 0.18025368452072144, "learning_rate": 0.00028855103804152166, "loss": 3.4919, "step": 477700 }, { "epoch": 76.448, "grad_norm": 0.1723688691854477, "learning_rate": 0.0002885486379455178, "loss": 3.337, "step": 477800 }, { "epoch": 76.464, "grad_norm": 0.1997622847557068, "learning_rate": 0.00028854623784951394, "loss": 3.2611, "step": 477900 }, { "epoch": 76.48, "grad_norm": 0.1638079732656479, "learning_rate": 0.0002885438377535101, "loss": 3.2057, "step": 478000 }, { "epoch": 76.496, "grad_norm": 0.1768091917037964, "learning_rate": 0.0002885414376575063, "loss": 3.1041, "step": 478100 }, { "epoch": 76.512, "grad_norm": 0.3187304735183716, "learning_rate": 0.00028853903756150245, "loss": 3.2662, "step": 478200 }, { "epoch": 76.528, "grad_norm": 0.1731933206319809, "learning_rate": 0.0002885366374654986, "loss": 3.2487, "step": 478300 }, { "epoch": 76.544, "grad_norm": 0.22224627435207367, "learning_rate": 0.00028853423736949473, "loss": 3.3586, "step": 478400 }, { "epoch": 76.56, "grad_norm": 0.16906216740608215, "learning_rate": 0.0002885318372734909, "loss": 3.2331, "step": 478500 }, { "epoch": 76.576, "grad_norm": 0.2109525054693222, "learning_rate": 0.0002885294611784471, "loss": 3.4028, "step": 478600 }, { "epoch": 76.592, "grad_norm": 0.19348014891147614, "learning_rate": 0.00028852706108244326, "loss": 3.0912, "step": 478700 }, { "epoch": 76.608, "grad_norm": 0.19300812482833862, "learning_rate": 0.00028852466098643943, "loss": 3.2348, "step": 478800 }, { "epoch": 76.624, "grad_norm": 0.1987311840057373, "learning_rate": 0.0002885222608904356, "loss": 3.2399, "step": 478900 }, { "epoch": 76.64, "grad_norm": 0.18213589489459991, "learning_rate": 0.00028851986079443177, "loss": 3.0501, "step": 479000 }, { "epoch": 76.656, "grad_norm": 0.18810275197029114, "learning_rate": 0.00028851746069842794, "loss": 3.1089, "step": 479100 }, { "epoch": 76.672, "grad_norm": 0.19356615841388702, "learning_rate": 0.0002885150606024241, "loss": 3.2005, "step": 479200 }, { "epoch": 76.688, "grad_norm": 0.21786296367645264, "learning_rate": 0.0002885126605064202, "loss": 3.0687, "step": 479300 }, { "epoch": 76.704, "grad_norm": 0.21601447463035583, "learning_rate": 0.0002885102604104164, "loss": 3.2332, "step": 479400 }, { "epoch": 76.72, "grad_norm": 0.2214280664920807, "learning_rate": 0.00028850786031441256, "loss": 3.1878, "step": 479500 }, { "epoch": 76.736, "grad_norm": 0.22096656262874603, "learning_rate": 0.00028850546021840873, "loss": 3.2327, "step": 479600 }, { "epoch": 76.752, "grad_norm": 0.19889913499355316, "learning_rate": 0.0002885030601224049, "loss": 3.3174, "step": 479700 }, { "epoch": 76.768, "grad_norm": 0.20691253244876862, "learning_rate": 0.00028850066002640107, "loss": 3.1372, "step": 479800 }, { "epoch": 76.784, "grad_norm": 0.2157764881849289, "learning_rate": 0.0002884982599303972, "loss": 3.1768, "step": 479900 }, { "epoch": 76.8, "grad_norm": 0.17193534970283508, "learning_rate": 0.00028849585983439335, "loss": 3.0263, "step": 480000 }, { "epoch": 76.816, "grad_norm": 0.16051940619945526, "learning_rate": 0.0002884934597383895, "loss": 3.3876, "step": 480100 }, { "epoch": 76.832, "grad_norm": 0.21828553080558777, "learning_rate": 0.0002884910596423857, "loss": 3.3153, "step": 480200 }, { "epoch": 76.848, "grad_norm": 0.17573298513889313, "learning_rate": 0.00028848865954638186, "loss": 3.1403, "step": 480300 }, { "epoch": 76.864, "grad_norm": 0.16961078345775604, "learning_rate": 0.00028848625945037797, "loss": 3.2705, "step": 480400 }, { "epoch": 76.88, "grad_norm": 0.16715022921562195, "learning_rate": 0.00028848385935437414, "loss": 3.2123, "step": 480500 }, { "epoch": 76.896, "grad_norm": 0.17759741842746735, "learning_rate": 0.0002884814592583703, "loss": 3.2727, "step": 480600 }, { "epoch": 76.912, "grad_norm": 0.20439237356185913, "learning_rate": 0.0002884790591623665, "loss": 3.2525, "step": 480700 }, { "epoch": 76.928, "grad_norm": 0.16966432332992554, "learning_rate": 0.00028847665906636265, "loss": 3.1616, "step": 480800 }, { "epoch": 76.944, "grad_norm": 0.1711646020412445, "learning_rate": 0.0002884742589703588, "loss": 3.0168, "step": 480900 }, { "epoch": 76.96, "grad_norm": 0.20517414808273315, "learning_rate": 0.00028847188287531495, "loss": 3.1853, "step": 481000 }, { "epoch": 76.976, "grad_norm": 0.18622702360153198, "learning_rate": 0.0002884694827793111, "loss": 3.4392, "step": 481100 }, { "epoch": 76.992, "grad_norm": 0.20060504972934723, "learning_rate": 0.0002884670826833073, "loss": 3.1353, "step": 481200 }, { "epoch": 77.008, "grad_norm": 0.15381723642349243, "learning_rate": 0.00028846468258730346, "loss": 3.1444, "step": 481300 }, { "epoch": 77.024, "grad_norm": 0.17212416231632233, "learning_rate": 0.00028846228249129963, "loss": 2.9899, "step": 481400 }, { "epoch": 77.04, "grad_norm": 0.19999881088733673, "learning_rate": 0.00028845988239529574, "loss": 3.2377, "step": 481500 }, { "epoch": 77.056, "grad_norm": 0.17655926942825317, "learning_rate": 0.0002884574822992919, "loss": 3.0245, "step": 481600 }, { "epoch": 77.072, "grad_norm": 0.1644718199968338, "learning_rate": 0.0002884550822032881, "loss": 2.9834, "step": 481700 }, { "epoch": 77.088, "grad_norm": 0.1793651431798935, "learning_rate": 0.00028845268210728425, "loss": 3.2524, "step": 481800 }, { "epoch": 77.104, "grad_norm": 0.20348887145519257, "learning_rate": 0.0002884502820112804, "loss": 3.1962, "step": 481900 }, { "epoch": 77.12, "grad_norm": 0.18656928837299347, "learning_rate": 0.0002884478819152766, "loss": 2.9841, "step": 482000 }, { "epoch": 77.136, "grad_norm": 0.16353657841682434, "learning_rate": 0.00028844548181927276, "loss": 3.3127, "step": 482100 }, { "epoch": 77.152, "grad_norm": 0.2032342255115509, "learning_rate": 0.00028844308172326893, "loss": 3.2987, "step": 482200 }, { "epoch": 77.168, "grad_norm": 0.1874752640724182, "learning_rate": 0.0002884406816272651, "loss": 3.1784, "step": 482300 }, { "epoch": 77.184, "grad_norm": 0.202206552028656, "learning_rate": 0.0002884382815312612, "loss": 3.0039, "step": 482400 }, { "epoch": 77.2, "grad_norm": 0.18091371655464172, "learning_rate": 0.0002884358814352574, "loss": 3.2706, "step": 482500 }, { "epoch": 77.216, "grad_norm": 0.17988036572933197, "learning_rate": 0.00028843350534021357, "loss": 3.2441, "step": 482600 }, { "epoch": 77.232, "grad_norm": 0.20216509699821472, "learning_rate": 0.00028843110524420974, "loss": 3.2544, "step": 482700 }, { "epoch": 77.248, "grad_norm": 0.18859700858592987, "learning_rate": 0.0002884287051482059, "loss": 3.0422, "step": 482800 }, { "epoch": 77.264, "grad_norm": 0.1698991060256958, "learning_rate": 0.0002884263050522021, "loss": 2.9902, "step": 482900 }, { "epoch": 77.28, "grad_norm": 0.16603803634643555, "learning_rate": 0.0002884239049561982, "loss": 3.4067, "step": 483000 }, { "epoch": 77.296, "grad_norm": 0.1645193099975586, "learning_rate": 0.00028842150486019436, "loss": 3.1889, "step": 483100 }, { "epoch": 77.312, "grad_norm": 0.17591646313667297, "learning_rate": 0.00028841910476419053, "loss": 3.2062, "step": 483200 }, { "epoch": 77.328, "grad_norm": 0.18378666043281555, "learning_rate": 0.0002884167046681867, "loss": 3.3235, "step": 483300 }, { "epoch": 77.344, "grad_norm": 0.17582263052463531, "learning_rate": 0.00028841430457218287, "loss": 3.5863, "step": 483400 }, { "epoch": 77.36, "grad_norm": 0.1830289214849472, "learning_rate": 0.000288411904476179, "loss": 3.0265, "step": 483500 }, { "epoch": 77.376, "grad_norm": 0.18079505860805511, "learning_rate": 0.00028840950438017515, "loss": 3.185, "step": 483600 }, { "epoch": 77.392, "grad_norm": 0.17557358741760254, "learning_rate": 0.0002884071042841713, "loss": 3.1537, "step": 483700 }, { "epoch": 77.408, "grad_norm": 0.1893519163131714, "learning_rate": 0.0002884047041881675, "loss": 3.1925, "step": 483800 }, { "epoch": 77.424, "grad_norm": 0.18808972835540771, "learning_rate": 0.00028840230409216366, "loss": 3.3176, "step": 483900 }, { "epoch": 77.44, "grad_norm": 0.23982372879981995, "learning_rate": 0.00028839990399615983, "loss": 3.621, "step": 484000 }, { "epoch": 77.456, "grad_norm": 0.20537614822387695, "learning_rate": 0.00028839750390015595, "loss": 3.2247, "step": 484100 }, { "epoch": 77.472, "grad_norm": 0.21954742074012756, "learning_rate": 0.0002883951038041521, "loss": 3.3668, "step": 484200 }, { "epoch": 77.488, "grad_norm": 0.2095320224761963, "learning_rate": 0.0002883927037081483, "loss": 3.383, "step": 484300 }, { "epoch": 77.504, "grad_norm": 0.1934623122215271, "learning_rate": 0.00028839030361214445, "loss": 3.1361, "step": 484400 }, { "epoch": 77.52, "grad_norm": 0.18647393584251404, "learning_rate": 0.0002883879035161406, "loss": 3.0611, "step": 484500 }, { "epoch": 77.536, "grad_norm": 0.15348878502845764, "learning_rate": 0.0002883855274210968, "loss": 3.0845, "step": 484600 }, { "epoch": 77.552, "grad_norm": 0.1916002482175827, "learning_rate": 0.000288383127325093, "loss": 3.322, "step": 484700 }, { "epoch": 77.568, "grad_norm": 0.2017233669757843, "learning_rate": 0.00028838072722908915, "loss": 3.0105, "step": 484800 }, { "epoch": 77.584, "grad_norm": 0.21193411946296692, "learning_rate": 0.0002883783271330853, "loss": 3.3264, "step": 484900 }, { "epoch": 77.6, "grad_norm": 0.18349112570285797, "learning_rate": 0.00028837592703708143, "loss": 3.1514, "step": 485000 }, { "epoch": 77.616, "grad_norm": 0.18386046588420868, "learning_rate": 0.0002883735269410776, "loss": 3.4791, "step": 485100 }, { "epoch": 77.632, "grad_norm": 0.19964712858200073, "learning_rate": 0.00028837112684507377, "loss": 3.3792, "step": 485200 }, { "epoch": 77.648, "grad_norm": 0.19665805995464325, "learning_rate": 0.00028836872674906994, "loss": 3.2015, "step": 485300 }, { "epoch": 77.664, "grad_norm": 0.17579634487628937, "learning_rate": 0.0002883663266530661, "loss": 3.2487, "step": 485400 }, { "epoch": 77.68, "grad_norm": 0.2160295695066452, "learning_rate": 0.0002883639265570622, "loss": 3.2042, "step": 485500 }, { "epoch": 77.696, "grad_norm": 0.21755699813365936, "learning_rate": 0.0002883615264610584, "loss": 3.1731, "step": 485600 }, { "epoch": 77.712, "grad_norm": 0.17049720883369446, "learning_rate": 0.00028835912636505456, "loss": 3.3141, "step": 485700 }, { "epoch": 77.728, "grad_norm": 0.21340446174144745, "learning_rate": 0.00028835672626905073, "loss": 3.3955, "step": 485800 }, { "epoch": 77.744, "grad_norm": 0.195237398147583, "learning_rate": 0.0002883543261730469, "loss": 3.3615, "step": 485900 }, { "epoch": 77.76, "grad_norm": 0.16303806006908417, "learning_rate": 0.00028835192607704307, "loss": 3.1846, "step": 486000 }, { "epoch": 77.776, "grad_norm": 0.2329171746969223, "learning_rate": 0.0002883495259810392, "loss": 3.1006, "step": 486100 }, { "epoch": 77.792, "grad_norm": 0.15829315781593323, "learning_rate": 0.00028834712588503536, "loss": 3.1167, "step": 486200 }, { "epoch": 77.808, "grad_norm": 0.2007259875535965, "learning_rate": 0.0002883447257890315, "loss": 3.2951, "step": 486300 }, { "epoch": 77.824, "grad_norm": 0.18880033493041992, "learning_rate": 0.0002883423256930277, "loss": 3.5906, "step": 486400 }, { "epoch": 77.84, "grad_norm": 0.2175186723470688, "learning_rate": 0.00028833992559702386, "loss": 3.4568, "step": 486500 }, { "epoch": 77.856, "grad_norm": 0.24570512771606445, "learning_rate": 0.00028833752550102003, "loss": 3.3364, "step": 486600 }, { "epoch": 77.872, "grad_norm": 0.26171624660491943, "learning_rate": 0.00028833512540501615, "loss": 3.3578, "step": 486700 }, { "epoch": 77.888, "grad_norm": 0.20597206056118011, "learning_rate": 0.0002883327253090123, "loss": 3.1966, "step": 486800 }, { "epoch": 77.904, "grad_norm": 0.15314194560050964, "learning_rate": 0.0002883303252130085, "loss": 3.2431, "step": 486900 }, { "epoch": 77.92, "grad_norm": 0.14769768714904785, "learning_rate": 0.00028832792511700466, "loss": 3.3595, "step": 487000 }, { "epoch": 77.936, "grad_norm": 0.19397376477718353, "learning_rate": 0.0002883255250210008, "loss": 3.3348, "step": 487100 }, { "epoch": 77.952, "grad_norm": 0.19012783467769623, "learning_rate": 0.00028832312492499694, "loss": 3.1724, "step": 487200 }, { "epoch": 77.968, "grad_norm": 0.18658187985420227, "learning_rate": 0.0002883207248289931, "loss": 3.3826, "step": 487300 }, { "epoch": 77.984, "grad_norm": 0.1921878308057785, "learning_rate": 0.0002883183247329893, "loss": 3.3464, "step": 487400 }, { "epoch": 78.0, "grad_norm": 0.17443032562732697, "learning_rate": 0.00028831594863794547, "loss": 2.9107, "step": 487500 }, { "epoch": 78.016, "grad_norm": 0.21130195260047913, "learning_rate": 0.00028831354854194164, "loss": 3.3743, "step": 487600 }, { "epoch": 78.032, "grad_norm": 0.20503652095794678, "learning_rate": 0.0002883111484459378, "loss": 3.1115, "step": 487700 }, { "epoch": 78.048, "grad_norm": 0.20938697457313538, "learning_rate": 0.000288308748349934, "loss": 3.2651, "step": 487800 }, { "epoch": 78.064, "grad_norm": 0.17736966907978058, "learning_rate": 0.00028830634825393014, "loss": 3.0913, "step": 487900 }, { "epoch": 78.08, "grad_norm": 0.1713695526123047, "learning_rate": 0.0002883039481579263, "loss": 3.4758, "step": 488000 }, { "epoch": 78.096, "grad_norm": 0.19420044124126434, "learning_rate": 0.00028830154806192243, "loss": 3.2331, "step": 488100 }, { "epoch": 78.112, "grad_norm": 0.2261432558298111, "learning_rate": 0.0002882991479659186, "loss": 3.0972, "step": 488200 }, { "epoch": 78.128, "grad_norm": 0.23825183510780334, "learning_rate": 0.00028829674786991477, "loss": 3.2824, "step": 488300 }, { "epoch": 78.144, "grad_norm": 0.23167072236537933, "learning_rate": 0.00028829434777391093, "loss": 3.2964, "step": 488400 }, { "epoch": 78.16, "grad_norm": 0.17324386537075043, "learning_rate": 0.0002882919476779071, "loss": 3.237, "step": 488500 }, { "epoch": 78.176, "grad_norm": 0.1563355177640915, "learning_rate": 0.0002882895475819033, "loss": 3.1392, "step": 488600 }, { "epoch": 78.192, "grad_norm": 0.1905829757452011, "learning_rate": 0.0002882871474858994, "loss": 3.577, "step": 488700 }, { "epoch": 78.208, "grad_norm": 0.19480052590370178, "learning_rate": 0.00028828474738989556, "loss": 3.1977, "step": 488800 }, { "epoch": 78.224, "grad_norm": 0.2252776175737381, "learning_rate": 0.0002882823472938917, "loss": 3.4894, "step": 488900 }, { "epoch": 78.24, "grad_norm": 0.21050992608070374, "learning_rate": 0.0002882799471978879, "loss": 3.3065, "step": 489000 }, { "epoch": 78.256, "grad_norm": 0.21247756481170654, "learning_rate": 0.00028827754710188406, "loss": 3.2607, "step": 489100 }, { "epoch": 78.272, "grad_norm": 0.23322387039661407, "learning_rate": 0.0002882751470058802, "loss": 3.541, "step": 489200 }, { "epoch": 78.288, "grad_norm": 0.2278556227684021, "learning_rate": 0.00028827274690987635, "loss": 3.0939, "step": 489300 }, { "epoch": 78.304, "grad_norm": 0.1960030198097229, "learning_rate": 0.0002882703468138725, "loss": 2.9657, "step": 489400 }, { "epoch": 78.32, "grad_norm": 0.20741654932498932, "learning_rate": 0.0002882679467178687, "loss": 3.0966, "step": 489500 }, { "epoch": 78.336, "grad_norm": 0.17581628262996674, "learning_rate": 0.00028826554662186486, "loss": 3.0744, "step": 489600 }, { "epoch": 78.352, "grad_norm": 0.19990313053131104, "learning_rate": 0.000288263146525861, "loss": 3.1785, "step": 489700 }, { "epoch": 78.368, "grad_norm": 0.2037050873041153, "learning_rate": 0.00028826074642985714, "loss": 3.288, "step": 489800 }, { "epoch": 78.384, "grad_norm": 0.1900002509355545, "learning_rate": 0.0002882583463338533, "loss": 3.3837, "step": 489900 }, { "epoch": 78.4, "grad_norm": 0.1928175389766693, "learning_rate": 0.0002882559462378495, "loss": 3.1073, "step": 490000 }, { "epoch": 78.416, "grad_norm": 0.18612203001976013, "learning_rate": 0.00028825354614184565, "loss": 3.2823, "step": 490100 }, { "epoch": 78.432, "grad_norm": 0.20336517691612244, "learning_rate": 0.0002882511460458418, "loss": 3.2498, "step": 490200 }, { "epoch": 78.448, "grad_norm": 0.21317599713802338, "learning_rate": 0.00028824874594983793, "loss": 3.0838, "step": 490300 }, { "epoch": 78.464, "grad_norm": 0.20365267992019653, "learning_rate": 0.0002882463458538341, "loss": 3.1668, "step": 490400 }, { "epoch": 78.48, "grad_norm": 0.16608567535877228, "learning_rate": 0.00028824394575783027, "loss": 3.2389, "step": 490500 }, { "epoch": 78.496, "grad_norm": 0.21338753402233124, "learning_rate": 0.00028824154566182644, "loss": 2.9375, "step": 490600 }, { "epoch": 78.512, "grad_norm": 0.18601849675178528, "learning_rate": 0.0002882391455658226, "loss": 3.3523, "step": 490700 }, { "epoch": 78.528, "grad_norm": 0.17798122763633728, "learning_rate": 0.0002882367454698188, "loss": 2.8265, "step": 490800 }, { "epoch": 78.544, "grad_norm": 0.20656082034111023, "learning_rate": 0.0002882343453738149, "loss": 3.207, "step": 490900 }, { "epoch": 78.56, "grad_norm": 0.17426608502864838, "learning_rate": 0.00028823194527781106, "loss": 3.2075, "step": 491000 }, { "epoch": 78.576, "grad_norm": 0.21214397251605988, "learning_rate": 0.00028822954518180723, "loss": 3.7461, "step": 491100 }, { "epoch": 78.592, "grad_norm": 0.20507536828517914, "learning_rate": 0.0002882271450858034, "loss": 3.1439, "step": 491200 }, { "epoch": 78.608, "grad_norm": 0.15788741409778595, "learning_rate": 0.00028822474498979957, "loss": 3.2915, "step": 491300 }, { "epoch": 78.624, "grad_norm": 0.17243506014347076, "learning_rate": 0.00028822234489379574, "loss": 3.3557, "step": 491400 }, { "epoch": 78.64, "grad_norm": 0.17151911556720734, "learning_rate": 0.0002882199447977919, "loss": 3.02, "step": 491500 }, { "epoch": 78.656, "grad_norm": 0.22068029642105103, "learning_rate": 0.0002882175447017881, "loss": 3.1523, "step": 491600 }, { "epoch": 78.672, "grad_norm": 0.1868075579404831, "learning_rate": 0.00028821514460578425, "loss": 3.4248, "step": 491700 }, { "epoch": 78.688, "grad_norm": 0.1689171940088272, "learning_rate": 0.0002882127685107404, "loss": 3.4367, "step": 491800 }, { "epoch": 78.704, "grad_norm": 0.1843259632587433, "learning_rate": 0.00028821036841473655, "loss": 3.2792, "step": 491900 }, { "epoch": 78.72, "grad_norm": 0.21030554175376892, "learning_rate": 0.0002882079683187327, "loss": 3.3982, "step": 492000 }, { "epoch": 78.736, "grad_norm": 0.16767102479934692, "learning_rate": 0.0002882055682227289, "loss": 3.4188, "step": 492100 }, { "epoch": 78.752, "grad_norm": 0.17028939723968506, "learning_rate": 0.00028820316812672506, "loss": 3.3027, "step": 492200 }, { "epoch": 78.768, "grad_norm": 0.17717479169368744, "learning_rate": 0.0002882007680307212, "loss": 3.3464, "step": 492300 }, { "epoch": 78.784, "grad_norm": 0.1841953992843628, "learning_rate": 0.00028819836793471734, "loss": 3.2215, "step": 492400 }, { "epoch": 78.8, "grad_norm": 0.1778186410665512, "learning_rate": 0.0002881959678387135, "loss": 3.1847, "step": 492500 }, { "epoch": 78.816, "grad_norm": 0.18269284069538116, "learning_rate": 0.0002881935677427097, "loss": 3.2574, "step": 492600 }, { "epoch": 78.832, "grad_norm": 0.22652405500411987, "learning_rate": 0.00028819116764670585, "loss": 3.2747, "step": 492700 }, { "epoch": 78.848, "grad_norm": 0.15682004392147064, "learning_rate": 0.000288188767550702, "loss": 3.05, "step": 492800 }, { "epoch": 78.864, "grad_norm": 0.17050740122795105, "learning_rate": 0.00028818636745469813, "loss": 3.379, "step": 492900 }, { "epoch": 78.88, "grad_norm": 0.17876076698303223, "learning_rate": 0.0002881839673586943, "loss": 3.2241, "step": 493000 }, { "epoch": 78.896, "grad_norm": 0.21459339559078217, "learning_rate": 0.00028818156726269047, "loss": 3.2743, "step": 493100 }, { "epoch": 78.912, "grad_norm": 0.22350555658340454, "learning_rate": 0.00028817916716668664, "loss": 3.4576, "step": 493200 }, { "epoch": 78.928, "grad_norm": 0.23055286705493927, "learning_rate": 0.0002881767670706828, "loss": 3.3665, "step": 493300 }, { "epoch": 78.944, "grad_norm": 0.1723843663930893, "learning_rate": 0.0002881743669746789, "loss": 3.1288, "step": 493400 }, { "epoch": 78.96, "grad_norm": 0.17281213402748108, "learning_rate": 0.0002881719668786751, "loss": 3.3688, "step": 493500 }, { "epoch": 78.976, "grad_norm": 0.17554327845573425, "learning_rate": 0.00028816956678267126, "loss": 3.5794, "step": 493600 }, { "epoch": 78.992, "grad_norm": 0.18570677936077118, "learning_rate": 0.00028816716668666743, "loss": 2.9189, "step": 493700 }, { "epoch": 79.008, "grad_norm": 0.17389939725399017, "learning_rate": 0.0002881647665906636, "loss": 3.0325, "step": 493800 }, { "epoch": 79.024, "grad_norm": 0.20151086151599884, "learning_rate": 0.00028816236649465977, "loss": 3.3552, "step": 493900 }, { "epoch": 79.04, "grad_norm": 0.1858285516500473, "learning_rate": 0.0002881599663986559, "loss": 2.8926, "step": 494000 }, { "epoch": 79.056, "grad_norm": 0.20327745378017426, "learning_rate": 0.00028815756630265206, "loss": 3.1011, "step": 494100 }, { "epoch": 79.072, "grad_norm": 0.2225978821516037, "learning_rate": 0.0002881551662066482, "loss": 3.2525, "step": 494200 }, { "epoch": 79.088, "grad_norm": 0.19074854254722595, "learning_rate": 0.0002881527661106444, "loss": 3.0887, "step": 494300 }, { "epoch": 79.104, "grad_norm": 0.1984143704175949, "learning_rate": 0.00028815036601464056, "loss": 3.3105, "step": 494400 }, { "epoch": 79.12, "grad_norm": 0.168143630027771, "learning_rate": 0.00028814796591863673, "loss": 2.8986, "step": 494500 }, { "epoch": 79.136, "grad_norm": 0.17813006043434143, "learning_rate": 0.0002881455658226329, "loss": 3.2318, "step": 494600 }, { "epoch": 79.152, "grad_norm": 0.20841164886951447, "learning_rate": 0.00028814316572662907, "loss": 3.2173, "step": 494700 }, { "epoch": 79.168, "grad_norm": 0.21775470674037933, "learning_rate": 0.00028814076563062524, "loss": 2.9451, "step": 494800 }, { "epoch": 79.184, "grad_norm": 0.26405176520347595, "learning_rate": 0.0002881383895355814, "loss": 3.2751, "step": 494900 }, { "epoch": 79.2, "grad_norm": 0.1794021725654602, "learning_rate": 0.00028813598943957754, "loss": 3.4781, "step": 495000 }, { "epoch": 79.216, "grad_norm": 0.18064285814762115, "learning_rate": 0.0002881335893435737, "loss": 3.2916, "step": 495100 }, { "epoch": 79.232, "grad_norm": 0.22136500477790833, "learning_rate": 0.0002881311892475699, "loss": 3.2751, "step": 495200 }, { "epoch": 79.248, "grad_norm": 0.21629908680915833, "learning_rate": 0.00028812878915156605, "loss": 3.1948, "step": 495300 }, { "epoch": 79.264, "grad_norm": 0.1958998441696167, "learning_rate": 0.00028812638905556217, "loss": 3.1706, "step": 495400 }, { "epoch": 79.28, "grad_norm": 0.18197138607501984, "learning_rate": 0.00028812398895955834, "loss": 3.135, "step": 495500 }, { "epoch": 79.296, "grad_norm": 0.24022528529167175, "learning_rate": 0.0002881216128645146, "loss": 3.4635, "step": 495600 }, { "epoch": 79.312, "grad_norm": 0.20201241970062256, "learning_rate": 0.00028811921276851075, "loss": 3.2921, "step": 495700 }, { "epoch": 79.328, "grad_norm": 0.19205938279628754, "learning_rate": 0.00028811681267250686, "loss": 3.3351, "step": 495800 }, { "epoch": 79.344, "grad_norm": 0.2537722885608673, "learning_rate": 0.00028811441257650303, "loss": 3.3083, "step": 495900 }, { "epoch": 79.36, "grad_norm": 0.17929476499557495, "learning_rate": 0.0002881120124804992, "loss": 3.2012, "step": 496000 }, { "epoch": 79.376, "grad_norm": 0.1883215755224228, "learning_rate": 0.00028810961238449537, "loss": 3.4929, "step": 496100 }, { "epoch": 79.392, "grad_norm": 0.21978773176670074, "learning_rate": 0.00028810721228849154, "loss": 3.4306, "step": 496200 }, { "epoch": 79.408, "grad_norm": 0.2067193239927292, "learning_rate": 0.00028810481219248765, "loss": 3.5325, "step": 496300 }, { "epoch": 79.424, "grad_norm": 0.18766732513904572, "learning_rate": 0.0002881024120964838, "loss": 3.3823, "step": 496400 }, { "epoch": 79.44, "grad_norm": 0.16256745159626007, "learning_rate": 0.00028810001200048, "loss": 3.1791, "step": 496500 }, { "epoch": 79.456, "grad_norm": 0.21208395063877106, "learning_rate": 0.00028809761190447616, "loss": 3.3771, "step": 496600 }, { "epoch": 79.472, "grad_norm": 0.19402684271335602, "learning_rate": 0.00028809521180847233, "loss": 3.4174, "step": 496700 }, { "epoch": 79.488, "grad_norm": 0.18042083084583282, "learning_rate": 0.0002880928117124685, "loss": 3.2491, "step": 496800 }, { "epoch": 79.504, "grad_norm": 0.18346484005451202, "learning_rate": 0.0002880904116164646, "loss": 3.4229, "step": 496900 }, { "epoch": 79.52, "grad_norm": 0.23758134245872498, "learning_rate": 0.0002880880115204608, "loss": 3.246, "step": 497000 }, { "epoch": 79.536, "grad_norm": 0.22581183910369873, "learning_rate": 0.00028808561142445695, "loss": 3.5194, "step": 497100 }, { "epoch": 79.552, "grad_norm": 0.21654443442821503, "learning_rate": 0.0002880832113284531, "loss": 3.4239, "step": 497200 }, { "epoch": 79.568, "grad_norm": 0.18471881747245789, "learning_rate": 0.0002880808112324493, "loss": 3.4957, "step": 497300 }, { "epoch": 79.584, "grad_norm": 0.21423903107643127, "learning_rate": 0.0002880784111364454, "loss": 3.444, "step": 497400 }, { "epoch": 79.6, "grad_norm": 0.23198798298835754, "learning_rate": 0.0002880760110404416, "loss": 3.2053, "step": 497500 }, { "epoch": 79.616, "grad_norm": 0.2088962346315384, "learning_rate": 0.00028807361094443774, "loss": 3.3352, "step": 497600 }, { "epoch": 79.632, "grad_norm": 0.18486930429935455, "learning_rate": 0.0002880712108484339, "loss": 3.173, "step": 497700 }, { "epoch": 79.648, "grad_norm": 0.1727253496646881, "learning_rate": 0.0002880688107524301, "loss": 3.1595, "step": 497800 }, { "epoch": 79.664, "grad_norm": 0.2097383737564087, "learning_rate": 0.00028806641065642625, "loss": 3.4246, "step": 497900 }, { "epoch": 79.68, "grad_norm": 0.164577916264534, "learning_rate": 0.00028806401056042237, "loss": 3.1577, "step": 498000 }, { "epoch": 79.696, "grad_norm": 0.17887596786022186, "learning_rate": 0.00028806161046441854, "loss": 3.2193, "step": 498100 }, { "epoch": 79.712, "grad_norm": 0.1924980729818344, "learning_rate": 0.0002880592103684147, "loss": 3.1413, "step": 498200 }, { "epoch": 79.728, "grad_norm": 0.15148630738258362, "learning_rate": 0.0002880568102724109, "loss": 2.9436, "step": 498300 }, { "epoch": 79.744, "grad_norm": 0.19948723912239075, "learning_rate": 0.00028805441017640704, "loss": 3.3734, "step": 498400 }, { "epoch": 79.76, "grad_norm": 0.1554396152496338, "learning_rate": 0.00028805201008040316, "loss": 3.1003, "step": 498500 }, { "epoch": 79.776, "grad_norm": 0.19769465923309326, "learning_rate": 0.00028804960998439933, "loss": 3.1493, "step": 498600 }, { "epoch": 79.792, "grad_norm": 0.2229677438735962, "learning_rate": 0.0002880472098883955, "loss": 3.3678, "step": 498700 }, { "epoch": 79.808, "grad_norm": 0.19974124431610107, "learning_rate": 0.00028804480979239167, "loss": 3.1374, "step": 498800 }, { "epoch": 79.824, "grad_norm": 0.18868470191955566, "learning_rate": 0.00028804240969638784, "loss": 3.0919, "step": 498900 }, { "epoch": 79.84, "grad_norm": 0.1556967943906784, "learning_rate": 0.000288040009600384, "loss": 3.0831, "step": 499000 }, { "epoch": 79.856, "grad_norm": 0.18559803068637848, "learning_rate": 0.0002880376095043801, "loss": 3.2963, "step": 499100 }, { "epoch": 79.872, "grad_norm": 0.2483079731464386, "learning_rate": 0.0002880352094083763, "loss": 3.259, "step": 499200 }, { "epoch": 79.888, "grad_norm": 0.1605631709098816, "learning_rate": 0.00028803280931237246, "loss": 3.1528, "step": 499300 }, { "epoch": 79.904, "grad_norm": 0.18494854867458344, "learning_rate": 0.00028803040921636863, "loss": 3.2838, "step": 499400 }, { "epoch": 79.92, "grad_norm": 0.20409464836120605, "learning_rate": 0.0002880280091203648, "loss": 3.0662, "step": 499500 }, { "epoch": 79.936, "grad_norm": 0.1652652621269226, "learning_rate": 0.0002880256090243609, "loss": 3.1503, "step": 499600 }, { "epoch": 79.952, "grad_norm": 0.18050351738929749, "learning_rate": 0.0002880232089283571, "loss": 3.0836, "step": 499700 }, { "epoch": 79.968, "grad_norm": 0.18206700682640076, "learning_rate": 0.00028802080883235325, "loss": 3.3222, "step": 499800 }, { "epoch": 79.984, "grad_norm": 0.16074883937835693, "learning_rate": 0.0002880184087363494, "loss": 3.0279, "step": 499900 }, { "epoch": 80.0, "grad_norm": 0.21690300107002258, "learning_rate": 0.0002880160086403456, "loss": 3.0049, "step": 500000 }, { "epoch": 80.016, "grad_norm": 0.18897859752178192, "learning_rate": 0.00028801360854434176, "loss": 3.0462, "step": 500100 }, { "epoch": 80.032, "grad_norm": 0.20244303345680237, "learning_rate": 0.00028801120844833787, "loss": 3.0756, "step": 500200 }, { "epoch": 80.048, "grad_norm": 0.1859019696712494, "learning_rate": 0.00028800880835233404, "loss": 3.3817, "step": 500300 }, { "epoch": 80.064, "grad_norm": 0.1926698088645935, "learning_rate": 0.0002880064082563302, "loss": 3.0201, "step": 500400 }, { "epoch": 80.08, "grad_norm": 0.19421401619911194, "learning_rate": 0.0002880040081603264, "loss": 2.9802, "step": 500500 }, { "epoch": 80.096, "grad_norm": 0.24443002045154572, "learning_rate": 0.00028800160806432255, "loss": 3.3101, "step": 500600 }, { "epoch": 80.112, "grad_norm": 0.16974575817584991, "learning_rate": 0.0002879992079683187, "loss": 3.403, "step": 500700 }, { "epoch": 80.128, "grad_norm": 0.1943347305059433, "learning_rate": 0.0002879968078723149, "loss": 3.0497, "step": 500800 }, { "epoch": 80.144, "grad_norm": 0.24400247633457184, "learning_rate": 0.00028799440777631106, "loss": 3.2626, "step": 500900 }, { "epoch": 80.16, "grad_norm": 0.24454838037490845, "learning_rate": 0.0002879920076803072, "loss": 3.4507, "step": 501000 }, { "epoch": 80.176, "grad_norm": 0.19039402902126312, "learning_rate": 0.00028798963158526336, "loss": 3.4266, "step": 501100 }, { "epoch": 80.192, "grad_norm": 0.22030793130397797, "learning_rate": 0.00028798723148925953, "loss": 3.489, "step": 501200 }, { "epoch": 80.208, "grad_norm": 0.20017939805984497, "learning_rate": 0.0002879848313932557, "loss": 3.4135, "step": 501300 }, { "epoch": 80.224, "grad_norm": 0.17595651745796204, "learning_rate": 0.00028798243129725187, "loss": 3.4315, "step": 501400 }, { "epoch": 80.24, "grad_norm": 0.19730012118816376, "learning_rate": 0.00028798003120124804, "loss": 3.6157, "step": 501500 }, { "epoch": 80.256, "grad_norm": 0.17319773137569427, "learning_rate": 0.00028797763110524415, "loss": 3.4607, "step": 501600 }, { "epoch": 80.272, "grad_norm": 0.22085772454738617, "learning_rate": 0.0002879752310092403, "loss": 3.2313, "step": 501700 }, { "epoch": 80.288, "grad_norm": 0.20844422280788422, "learning_rate": 0.0002879728309132365, "loss": 3.237, "step": 501800 }, { "epoch": 80.304, "grad_norm": 0.23084773123264313, "learning_rate": 0.00028797043081723266, "loss": 3.2301, "step": 501900 }, { "epoch": 80.32, "grad_norm": 0.2085793912410736, "learning_rate": 0.00028796803072122883, "loss": 3.1021, "step": 502000 }, { "epoch": 80.336, "grad_norm": 0.205845907330513, "learning_rate": 0.000287965630625225, "loss": 3.3287, "step": 502100 }, { "epoch": 80.352, "grad_norm": 0.18506449460983276, "learning_rate": 0.0002879632305292211, "loss": 3.1705, "step": 502200 }, { "epoch": 80.368, "grad_norm": 0.1791771799325943, "learning_rate": 0.0002879608304332173, "loss": 3.2851, "step": 502300 }, { "epoch": 80.384, "grad_norm": 0.18030160665512085, "learning_rate": 0.00028795843033721345, "loss": 3.0824, "step": 502400 }, { "epoch": 80.4, "grad_norm": 0.22298966348171234, "learning_rate": 0.0002879560302412096, "loss": 3.5126, "step": 502500 }, { "epoch": 80.416, "grad_norm": 0.20575083792209625, "learning_rate": 0.0002879536301452058, "loss": 3.4192, "step": 502600 }, { "epoch": 80.432, "grad_norm": 0.1953856647014618, "learning_rate": 0.0002879512300492019, "loss": 3.3025, "step": 502700 }, { "epoch": 80.448, "grad_norm": 0.18458403646945953, "learning_rate": 0.0002879488299531981, "loss": 3.2081, "step": 502800 }, { "epoch": 80.464, "grad_norm": 0.18441970646381378, "learning_rate": 0.00028794642985719424, "loss": 3.1875, "step": 502900 }, { "epoch": 80.48, "grad_norm": 0.21644620597362518, "learning_rate": 0.0002879440297611904, "loss": 3.3041, "step": 503000 }, { "epoch": 80.496, "grad_norm": 0.18547526001930237, "learning_rate": 0.0002879416296651866, "loss": 3.4474, "step": 503100 }, { "epoch": 80.512, "grad_norm": 0.20009949803352356, "learning_rate": 0.00028793922956918275, "loss": 3.0378, "step": 503200 }, { "epoch": 80.528, "grad_norm": 0.19548571109771729, "learning_rate": 0.00028793682947317887, "loss": 3.2889, "step": 503300 }, { "epoch": 80.544, "grad_norm": 0.21069781482219696, "learning_rate": 0.00028793442937717504, "loss": 3.5325, "step": 503400 }, { "epoch": 80.56, "grad_norm": 0.21394751965999603, "learning_rate": 0.0002879320292811712, "loss": 3.1859, "step": 503500 }, { "epoch": 80.576, "grad_norm": 0.187339186668396, "learning_rate": 0.0002879296291851674, "loss": 3.4696, "step": 503600 }, { "epoch": 80.592, "grad_norm": 0.20522832870483398, "learning_rate": 0.00028792722908916354, "loss": 3.4427, "step": 503700 }, { "epoch": 80.608, "grad_norm": 0.187233105301857, "learning_rate": 0.0002879248289931597, "loss": 3.0908, "step": 503800 }, { "epoch": 80.624, "grad_norm": 0.24587441980838776, "learning_rate": 0.0002879224288971559, "loss": 3.2704, "step": 503900 }, { "epoch": 80.64, "grad_norm": 0.1926375925540924, "learning_rate": 0.00028792005280211207, "loss": 3.198, "step": 504000 }, { "epoch": 80.656, "grad_norm": 0.22519610822200775, "learning_rate": 0.00028791765270610824, "loss": 3.2988, "step": 504100 }, { "epoch": 80.672, "grad_norm": 0.20143365859985352, "learning_rate": 0.00028791525261010435, "loss": 3.3031, "step": 504200 }, { "epoch": 80.688, "grad_norm": 0.18084408342838287, "learning_rate": 0.0002879128525141005, "loss": 3.1371, "step": 504300 }, { "epoch": 80.704, "grad_norm": 0.23853391408920288, "learning_rate": 0.0002879104524180967, "loss": 3.087, "step": 504400 }, { "epoch": 80.72, "grad_norm": 0.15538068115711212, "learning_rate": 0.00028790805232209286, "loss": 3.186, "step": 504500 }, { "epoch": 80.736, "grad_norm": 0.21059878170490265, "learning_rate": 0.00028790565222608903, "loss": 3.3896, "step": 504600 }, { "epoch": 80.752, "grad_norm": 0.19267794489860535, "learning_rate": 0.00028790325213008515, "loss": 3.4227, "step": 504700 }, { "epoch": 80.768, "grad_norm": 0.21450594067573547, "learning_rate": 0.0002879008520340813, "loss": 3.1517, "step": 504800 }, { "epoch": 80.784, "grad_norm": 0.21050255000591278, "learning_rate": 0.0002878984519380775, "loss": 3.3993, "step": 504900 }, { "epoch": 80.8, "grad_norm": 0.18159128725528717, "learning_rate": 0.00028789605184207365, "loss": 3.4553, "step": 505000 }, { "epoch": 80.816, "grad_norm": 0.253479927778244, "learning_rate": 0.0002878936517460698, "loss": 3.3083, "step": 505100 }, { "epoch": 80.832, "grad_norm": 0.18854288756847382, "learning_rate": 0.000287891251650066, "loss": 3.0381, "step": 505200 }, { "epoch": 80.848, "grad_norm": 0.21290509402751923, "learning_rate": 0.0002878888515540621, "loss": 3.0931, "step": 505300 }, { "epoch": 80.864, "grad_norm": 0.22080031037330627, "learning_rate": 0.0002878864514580583, "loss": 3.2186, "step": 505400 }, { "epoch": 80.88, "grad_norm": 0.20558969676494598, "learning_rate": 0.00028788405136205444, "loss": 3.1661, "step": 505500 }, { "epoch": 80.896, "grad_norm": 0.18128767609596252, "learning_rate": 0.0002878816512660506, "loss": 3.0184, "step": 505600 }, { "epoch": 80.912, "grad_norm": 0.21074041724205017, "learning_rate": 0.0002878792511700468, "loss": 2.9371, "step": 505700 }, { "epoch": 80.928, "grad_norm": 0.1896892786026001, "learning_rate": 0.0002878768510740429, "loss": 3.2599, "step": 505800 }, { "epoch": 80.944, "grad_norm": 0.18695037066936493, "learning_rate": 0.00028787445097803907, "loss": 3.3049, "step": 505900 }, { "epoch": 80.96, "grad_norm": 0.17692960798740387, "learning_rate": 0.00028787205088203524, "loss": 3.365, "step": 506000 }, { "epoch": 80.976, "grad_norm": 0.19153694808483124, "learning_rate": 0.0002878696507860314, "loss": 3.3996, "step": 506100 }, { "epoch": 80.992, "grad_norm": 0.22796916961669922, "learning_rate": 0.0002878672506900276, "loss": 3.1648, "step": 506200 }, { "epoch": 81.008, "grad_norm": 0.1820497065782547, "learning_rate": 0.00028786485059402374, "loss": 3.118, "step": 506300 }, { "epoch": 81.024, "grad_norm": 0.21449916064739227, "learning_rate": 0.00028786247449897993, "loss": 2.9771, "step": 506400 }, { "epoch": 81.04, "grad_norm": 0.19596756994724274, "learning_rate": 0.0002878600744029761, "loss": 3.2011, "step": 506500 }, { "epoch": 81.056, "grad_norm": 0.17105627059936523, "learning_rate": 0.00028785767430697227, "loss": 3.4881, "step": 506600 }, { "epoch": 81.072, "grad_norm": 0.1890411227941513, "learning_rate": 0.0002878552742109684, "loss": 3.1655, "step": 506700 }, { "epoch": 81.088, "grad_norm": 0.19726742804050446, "learning_rate": 0.00028785287411496456, "loss": 3.084, "step": 506800 }, { "epoch": 81.104, "grad_norm": 0.18177059292793274, "learning_rate": 0.0002878504740189607, "loss": 3.1022, "step": 506900 }, { "epoch": 81.12, "grad_norm": 0.18699797987937927, "learning_rate": 0.0002878480739229569, "loss": 3.3072, "step": 507000 }, { "epoch": 81.136, "grad_norm": 0.1811225712299347, "learning_rate": 0.00028784567382695306, "loss": 3.1208, "step": 507100 }, { "epoch": 81.152, "grad_norm": 0.18931542336940765, "learning_rate": 0.00028784327373094923, "loss": 3.1798, "step": 507200 }, { "epoch": 81.168, "grad_norm": 0.19433625042438507, "learning_rate": 0.00028784087363494535, "loss": 2.9662, "step": 507300 }, { "epoch": 81.184, "grad_norm": 0.16275151073932648, "learning_rate": 0.0002878384735389415, "loss": 3.0424, "step": 507400 }, { "epoch": 81.2, "grad_norm": 0.17493396997451782, "learning_rate": 0.0002878360734429377, "loss": 2.8945, "step": 507500 }, { "epoch": 81.216, "grad_norm": 0.31615015864372253, "learning_rate": 0.00028783367334693385, "loss": 3.0864, "step": 507600 }, { "epoch": 81.232, "grad_norm": 0.19376209378242493, "learning_rate": 0.00028783127325093, "loss": 3.3347, "step": 507700 }, { "epoch": 81.248, "grad_norm": 0.18994078040122986, "learning_rate": 0.00028782887315492614, "loss": 3.1925, "step": 507800 }, { "epoch": 81.264, "grad_norm": 0.19736643135547638, "learning_rate": 0.0002878264730589223, "loss": 3.1753, "step": 507900 }, { "epoch": 81.28, "grad_norm": 0.1866271048784256, "learning_rate": 0.0002878240729629185, "loss": 3.4736, "step": 508000 }, { "epoch": 81.296, "grad_norm": 0.18929608166217804, "learning_rate": 0.00028782167286691465, "loss": 3.2436, "step": 508100 }, { "epoch": 81.312, "grad_norm": 0.20221060514450073, "learning_rate": 0.0002878192727709108, "loss": 3.4921, "step": 508200 }, { "epoch": 81.328, "grad_norm": 0.17838534712791443, "learning_rate": 0.000287816872674907, "loss": 3.5317, "step": 508300 }, { "epoch": 81.344, "grad_norm": 0.23303034901618958, "learning_rate": 0.0002878144725789031, "loss": 3.4134, "step": 508400 }, { "epoch": 81.36, "grad_norm": 0.17076440155506134, "learning_rate": 0.00028781207248289927, "loss": 3.3355, "step": 508500 }, { "epoch": 81.376, "grad_norm": 0.17452403903007507, "learning_rate": 0.00028780967238689544, "loss": 3.2665, "step": 508600 }, { "epoch": 81.392, "grad_norm": 0.18871890008449554, "learning_rate": 0.0002878072722908916, "loss": 3.4473, "step": 508700 }, { "epoch": 81.408, "grad_norm": 0.19785840809345245, "learning_rate": 0.0002878048961958478, "loss": 3.2983, "step": 508800 }, { "epoch": 81.424, "grad_norm": 0.25487086176872253, "learning_rate": 0.00028780249609984396, "loss": 3.351, "step": 508900 }, { "epoch": 81.44, "grad_norm": 0.2574397921562195, "learning_rate": 0.00028780009600384013, "loss": 3.2769, "step": 509000 }, { "epoch": 81.456, "grad_norm": 0.20789854228496552, "learning_rate": 0.0002877976959078363, "loss": 3.4577, "step": 509100 }, { "epoch": 81.472, "grad_norm": 0.242406964302063, "learning_rate": 0.00028779529581183247, "loss": 3.6046, "step": 509200 }, { "epoch": 81.488, "grad_norm": 0.21923862397670746, "learning_rate": 0.0002877928957158286, "loss": 3.5664, "step": 509300 }, { "epoch": 81.504, "grad_norm": 0.20027251541614532, "learning_rate": 0.00028779049561982476, "loss": 3.321, "step": 509400 }, { "epoch": 81.52, "grad_norm": 0.20074312388896942, "learning_rate": 0.0002877880955238209, "loss": 3.4908, "step": 509500 }, { "epoch": 81.536, "grad_norm": 0.16516053676605225, "learning_rate": 0.0002877856954278171, "loss": 3.3467, "step": 509600 }, { "epoch": 81.552, "grad_norm": 0.18322880566120148, "learning_rate": 0.00028778329533181326, "loss": 3.1755, "step": 509700 }, { "epoch": 81.568, "grad_norm": 0.1927451640367508, "learning_rate": 0.00028778091923676945, "loss": 3.2554, "step": 509800 }, { "epoch": 81.584, "grad_norm": 0.18183305859565735, "learning_rate": 0.0002877785191407656, "loss": 3.5712, "step": 509900 }, { "epoch": 81.6, "grad_norm": 0.1922469139099121, "learning_rate": 0.0002877761190447618, "loss": 2.9266, "step": 510000 }, { "epoch": 81.616, "grad_norm": 0.21112938225269318, "learning_rate": 0.00028777371894875796, "loss": 3.3481, "step": 510100 }, { "epoch": 81.632, "grad_norm": 0.18518602848052979, "learning_rate": 0.0002877713188527541, "loss": 3.1324, "step": 510200 }, { "epoch": 81.648, "grad_norm": 0.18847383558750153, "learning_rate": 0.00028776891875675024, "loss": 3.3676, "step": 510300 }, { "epoch": 81.664, "grad_norm": 0.17456047236919403, "learning_rate": 0.0002877665186607464, "loss": 3.3627, "step": 510400 }, { "epoch": 81.68, "grad_norm": 0.23148687183856964, "learning_rate": 0.0002877641185647426, "loss": 3.5886, "step": 510500 }, { "epoch": 81.696, "grad_norm": 0.196181520819664, "learning_rate": 0.00028776171846873875, "loss": 2.9218, "step": 510600 }, { "epoch": 81.712, "grad_norm": 0.22097356617450714, "learning_rate": 0.00028775931837273487, "loss": 3.4118, "step": 510700 }, { "epoch": 81.728, "grad_norm": 0.19147901237010956, "learning_rate": 0.00028775691827673104, "loss": 3.4112, "step": 510800 }, { "epoch": 81.744, "grad_norm": 0.16951951384544373, "learning_rate": 0.0002877545181807272, "loss": 3.3154, "step": 510900 }, { "epoch": 81.76, "grad_norm": 0.177503764629364, "learning_rate": 0.0002877521180847234, "loss": 3.317, "step": 511000 }, { "epoch": 81.776, "grad_norm": 0.22289292514324188, "learning_rate": 0.00028774971798871954, "loss": 3.458, "step": 511100 }, { "epoch": 81.792, "grad_norm": 0.20992234349250793, "learning_rate": 0.0002877473178927157, "loss": 3.2212, "step": 511200 }, { "epoch": 81.808, "grad_norm": 0.18103329837322235, "learning_rate": 0.00028774491779671183, "loss": 3.4326, "step": 511300 }, { "epoch": 81.824, "grad_norm": 0.8440600037574768, "learning_rate": 0.000287742517700708, "loss": 3.2956, "step": 511400 }, { "epoch": 81.84, "grad_norm": 0.2368299961090088, "learning_rate": 0.00028774011760470417, "loss": 3.4415, "step": 511500 }, { "epoch": 81.856, "grad_norm": 0.20669177174568176, "learning_rate": 0.00028773771750870034, "loss": 3.3339, "step": 511600 }, { "epoch": 81.872, "grad_norm": 0.22502228617668152, "learning_rate": 0.0002877353174126965, "loss": 3.364, "step": 511700 }, { "epoch": 81.888, "grad_norm": 0.170541450381279, "learning_rate": 0.0002877329173166926, "loss": 3.4535, "step": 511800 }, { "epoch": 81.904, "grad_norm": 0.2020583301782608, "learning_rate": 0.0002877305172206888, "loss": 3.5576, "step": 511900 }, { "epoch": 81.92, "grad_norm": 0.21318817138671875, "learning_rate": 0.00028772811712468496, "loss": 3.2993, "step": 512000 }, { "epoch": 81.936, "grad_norm": 0.21072900295257568, "learning_rate": 0.00028772571702868113, "loss": 3.262, "step": 512100 }, { "epoch": 81.952, "grad_norm": 0.1905866265296936, "learning_rate": 0.0002877233169326773, "loss": 3.1969, "step": 512200 }, { "epoch": 81.968, "grad_norm": 0.22642862796783447, "learning_rate": 0.00028772091683667347, "loss": 3.2809, "step": 512300 }, { "epoch": 81.984, "grad_norm": 0.17521391808986664, "learning_rate": 0.0002877185167406696, "loss": 3.1129, "step": 512400 }, { "epoch": 82.0, "grad_norm": 0.23625656962394714, "learning_rate": 0.0002877161406456258, "loss": 3.3621, "step": 512500 }, { "epoch": 82.016, "grad_norm": 0.19945095479488373, "learning_rate": 0.000287713740549622, "loss": 3.2764, "step": 512600 }, { "epoch": 82.032, "grad_norm": 0.2189551591873169, "learning_rate": 0.0002877113404536181, "loss": 2.9999, "step": 512700 }, { "epoch": 82.048, "grad_norm": 0.19852474331855774, "learning_rate": 0.0002877089403576143, "loss": 3.4939, "step": 512800 }, { "epoch": 82.064, "grad_norm": 0.22569644451141357, "learning_rate": 0.00028770654026161045, "loss": 3.2023, "step": 512900 }, { "epoch": 82.08, "grad_norm": 0.17677758634090424, "learning_rate": 0.0002877041401656066, "loss": 3.0945, "step": 513000 }, { "epoch": 82.096, "grad_norm": 0.1853078454732895, "learning_rate": 0.0002877017400696028, "loss": 3.2378, "step": 513100 }, { "epoch": 82.112, "grad_norm": 0.1868475079536438, "learning_rate": 0.00028769933997359895, "loss": 3.075, "step": 513200 }, { "epoch": 82.128, "grad_norm": 0.2468918263912201, "learning_rate": 0.00028769693987759507, "loss": 3.5608, "step": 513300 }, { "epoch": 82.144, "grad_norm": 0.24104951322078705, "learning_rate": 0.00028769453978159124, "loss": 3.3468, "step": 513400 }, { "epoch": 82.16, "grad_norm": 0.22282245755195618, "learning_rate": 0.0002876921396855874, "loss": 3.4347, "step": 513500 }, { "epoch": 82.176, "grad_norm": 0.1983826607465744, "learning_rate": 0.0002876897395895836, "loss": 3.2958, "step": 513600 }, { "epoch": 82.192, "grad_norm": 0.18913131952285767, "learning_rate": 0.00028768733949357975, "loss": 3.1614, "step": 513700 }, { "epoch": 82.208, "grad_norm": 0.20283183455467224, "learning_rate": 0.00028768493939757586, "loss": 3.302, "step": 513800 }, { "epoch": 82.224, "grad_norm": 0.22260057926177979, "learning_rate": 0.00028768253930157203, "loss": 3.1036, "step": 513900 }, { "epoch": 82.24, "grad_norm": 0.2049385905265808, "learning_rate": 0.0002876801392055682, "loss": 3.1531, "step": 514000 }, { "epoch": 82.256, "grad_norm": 0.19235225021839142, "learning_rate": 0.00028767773910956437, "loss": 3.0788, "step": 514100 }, { "epoch": 82.272, "grad_norm": 0.17928092181682587, "learning_rate": 0.00028767533901356054, "loss": 3.1586, "step": 514200 }, { "epoch": 82.288, "grad_norm": 0.20029732584953308, "learning_rate": 0.0002876729389175567, "loss": 3.2807, "step": 514300 }, { "epoch": 82.304, "grad_norm": 0.1503312587738037, "learning_rate": 0.0002876705388215528, "loss": 3.4846, "step": 514400 }, { "epoch": 82.32, "grad_norm": 0.21513989567756653, "learning_rate": 0.000287668138725549, "loss": 2.9441, "step": 514500 }, { "epoch": 82.336, "grad_norm": 0.19994869828224182, "learning_rate": 0.00028766573862954516, "loss": 3.5358, "step": 514600 }, { "epoch": 82.352, "grad_norm": 0.1702757030725479, "learning_rate": 0.00028766333853354133, "loss": 3.1243, "step": 514700 }, { "epoch": 82.368, "grad_norm": 0.19547484815120697, "learning_rate": 0.0002876609384375375, "loss": 3.2633, "step": 514800 }, { "epoch": 82.384, "grad_norm": 0.21057911217212677, "learning_rate": 0.0002876585383415336, "loss": 3.2059, "step": 514900 }, { "epoch": 82.4, "grad_norm": 0.19824661314487457, "learning_rate": 0.0002876561382455298, "loss": 3.2377, "step": 515000 }, { "epoch": 82.416, "grad_norm": 0.21216131746768951, "learning_rate": 0.00028765373814952595, "loss": 3.0808, "step": 515100 }, { "epoch": 82.432, "grad_norm": 0.17237813770771027, "learning_rate": 0.0002876513380535221, "loss": 3.2486, "step": 515200 }, { "epoch": 82.448, "grad_norm": 0.19697701930999756, "learning_rate": 0.0002876489379575183, "loss": 3.1579, "step": 515300 }, { "epoch": 82.464, "grad_norm": 0.23096217215061188, "learning_rate": 0.00028764653786151446, "loss": 3.2073, "step": 515400 }, { "epoch": 82.48, "grad_norm": 0.19257155060768127, "learning_rate": 0.0002876441377655106, "loss": 3.3042, "step": 515500 }, { "epoch": 82.496, "grad_norm": 0.22561697661876678, "learning_rate": 0.00028764173766950674, "loss": 3.1075, "step": 515600 }, { "epoch": 82.512, "grad_norm": 0.22609488666057587, "learning_rate": 0.0002876393375735029, "loss": 3.3276, "step": 515700 }, { "epoch": 82.528, "grad_norm": 0.2119765281677246, "learning_rate": 0.0002876369374774991, "loss": 3.1132, "step": 515800 }, { "epoch": 82.544, "grad_norm": 0.2053445726633072, "learning_rate": 0.00028763453738149525, "loss": 3.381, "step": 515900 }, { "epoch": 82.56, "grad_norm": 0.2010418027639389, "learning_rate": 0.0002876321372854914, "loss": 3.1503, "step": 516000 }, { "epoch": 82.576, "grad_norm": 0.23447327315807343, "learning_rate": 0.00028762973718948753, "loss": 3.4207, "step": 516100 }, { "epoch": 82.592, "grad_norm": 0.15631107985973358, "learning_rate": 0.0002876273370934837, "loss": 3.3339, "step": 516200 }, { "epoch": 82.608, "grad_norm": 0.2287520170211792, "learning_rate": 0.00028762493699747987, "loss": 3.1895, "step": 516300 }, { "epoch": 82.624, "grad_norm": 0.20766079425811768, "learning_rate": 0.00028762253690147604, "loss": 3.3965, "step": 516400 }, { "epoch": 82.64, "grad_norm": 0.20519259572029114, "learning_rate": 0.0002876201368054722, "loss": 3.4958, "step": 516500 }, { "epoch": 82.656, "grad_norm": 0.17191797494888306, "learning_rate": 0.0002876177367094683, "loss": 3.5529, "step": 516600 }, { "epoch": 82.672, "grad_norm": 0.2149634212255478, "learning_rate": 0.0002876153366134645, "loss": 3.2336, "step": 516700 }, { "epoch": 82.688, "grad_norm": 0.18788589537143707, "learning_rate": 0.00028761293651746066, "loss": 3.2934, "step": 516800 }, { "epoch": 82.704, "grad_norm": 0.22389648854732513, "learning_rate": 0.00028761053642145683, "loss": 3.2377, "step": 516900 }, { "epoch": 82.72, "grad_norm": 0.23382581770420074, "learning_rate": 0.000287608160326413, "loss": 3.0409, "step": 517000 }, { "epoch": 82.736, "grad_norm": 0.17369185388088226, "learning_rate": 0.0002876057602304092, "loss": 3.6608, "step": 517100 }, { "epoch": 82.752, "grad_norm": 0.24161741137504578, "learning_rate": 0.00028760336013440536, "loss": 2.9835, "step": 517200 }, { "epoch": 82.768, "grad_norm": 0.38931310176849365, "learning_rate": 0.00028760096003840153, "loss": 3.2176, "step": 517300 }, { "epoch": 82.784, "grad_norm": 0.19723406434059143, "learning_rate": 0.0002875985599423977, "loss": 3.3959, "step": 517400 }, { "epoch": 82.8, "grad_norm": 0.15975803136825562, "learning_rate": 0.0002875961598463938, "loss": 3.3927, "step": 517500 }, { "epoch": 82.816, "grad_norm": 0.21099808812141418, "learning_rate": 0.00028759375975039, "loss": 3.0475, "step": 517600 }, { "epoch": 82.832, "grad_norm": 0.1773194521665573, "learning_rate": 0.00028759135965438615, "loss": 3.2736, "step": 517700 }, { "epoch": 82.848, "grad_norm": 0.2625787854194641, "learning_rate": 0.0002875889595583823, "loss": 3.1016, "step": 517800 }, { "epoch": 82.864, "grad_norm": 0.19327396154403687, "learning_rate": 0.0002875865594623785, "loss": 3.2408, "step": 517900 }, { "epoch": 82.88, "grad_norm": 0.19634591042995453, "learning_rate": 0.00028758415936637466, "loss": 3.3628, "step": 518000 }, { "epoch": 82.896, "grad_norm": 0.2787955701351166, "learning_rate": 0.0002875817592703708, "loss": 3.5948, "step": 518100 }, { "epoch": 82.912, "grad_norm": 0.20431341230869293, "learning_rate": 0.00028757935917436694, "loss": 3.3645, "step": 518200 }, { "epoch": 82.928, "grad_norm": 0.20003816485404968, "learning_rate": 0.0002875769590783631, "loss": 3.1916, "step": 518300 }, { "epoch": 82.944, "grad_norm": 0.17225506901741028, "learning_rate": 0.0002875745589823593, "loss": 3.236, "step": 518400 }, { "epoch": 82.96, "grad_norm": 0.2433798760175705, "learning_rate": 0.00028757215888635545, "loss": 3.0958, "step": 518500 }, { "epoch": 82.976, "grad_norm": 0.22284001111984253, "learning_rate": 0.00028756975879035157, "loss": 3.2215, "step": 518600 }, { "epoch": 82.992, "grad_norm": 0.2101777195930481, "learning_rate": 0.00028756735869434774, "loss": 3.3022, "step": 518700 }, { "epoch": 83.008, "grad_norm": 0.24826256930828094, "learning_rate": 0.0002875649585983439, "loss": 3.085, "step": 518800 }, { "epoch": 83.024, "grad_norm": 0.20440295338630676, "learning_rate": 0.0002875625585023401, "loss": 3.0902, "step": 518900 }, { "epoch": 83.04, "grad_norm": 0.22137342393398285, "learning_rate": 0.00028756015840633624, "loss": 3.1057, "step": 519000 }, { "epoch": 83.056, "grad_norm": 0.21623703837394714, "learning_rate": 0.0002875577583103324, "loss": 3.5203, "step": 519100 }, { "epoch": 83.072, "grad_norm": 0.24218744039535522, "learning_rate": 0.00028755535821432853, "loss": 3.2141, "step": 519200 }, { "epoch": 83.088, "grad_norm": 0.18616627156734467, "learning_rate": 0.0002875529581183247, "loss": 3.0899, "step": 519300 }, { "epoch": 83.104, "grad_norm": 0.21814650297164917, "learning_rate": 0.00028755055802232087, "loss": 3.0577, "step": 519400 }, { "epoch": 83.12, "grad_norm": 0.2246387004852295, "learning_rate": 0.00028754815792631704, "loss": 3.1863, "step": 519500 }, { "epoch": 83.136, "grad_norm": 0.20025856792926788, "learning_rate": 0.0002875457578303132, "loss": 3.2988, "step": 519600 }, { "epoch": 83.152, "grad_norm": 0.18996724486351013, "learning_rate": 0.0002875433817352694, "loss": 3.0028, "step": 519700 }, { "epoch": 83.168, "grad_norm": 0.21390938758850098, "learning_rate": 0.00028754098163926556, "loss": 3.4829, "step": 519800 }, { "epoch": 83.184, "grad_norm": 0.19604432582855225, "learning_rate": 0.00028753858154326173, "loss": 3.354, "step": 519900 }, { "epoch": 83.2, "grad_norm": 0.19068492949008942, "learning_rate": 0.0002875361814472579, "loss": 3.1365, "step": 520000 }, { "epoch": 83.216, "grad_norm": 0.1746467798948288, "learning_rate": 0.000287533781351254, "loss": 3.6296, "step": 520100 }, { "epoch": 83.232, "grad_norm": 0.5951684713363647, "learning_rate": 0.0002875313812552502, "loss": 3.1539, "step": 520200 }, { "epoch": 83.248, "grad_norm": 0.22298580408096313, "learning_rate": 0.00028752898115924635, "loss": 3.3065, "step": 520300 }, { "epoch": 83.264, "grad_norm": 0.20346452295780182, "learning_rate": 0.0002875265810632425, "loss": 3.1742, "step": 520400 }, { "epoch": 83.28, "grad_norm": 0.2530750036239624, "learning_rate": 0.0002875241809672387, "loss": 3.4713, "step": 520500 }, { "epoch": 83.296, "grad_norm": 0.18968316912651062, "learning_rate": 0.0002875217808712348, "loss": 3.4136, "step": 520600 }, { "epoch": 83.312, "grad_norm": 0.18561182916164398, "learning_rate": 0.000287519380775231, "loss": 3.106, "step": 520700 }, { "epoch": 83.328, "grad_norm": 0.16972662508487701, "learning_rate": 0.00028751698067922715, "loss": 3.3476, "step": 520800 }, { "epoch": 83.344, "grad_norm": 0.19019530713558197, "learning_rate": 0.0002875145805832233, "loss": 3.1297, "step": 520900 }, { "epoch": 83.36, "grad_norm": 0.2020149827003479, "learning_rate": 0.0002875121804872195, "loss": 3.3013, "step": 521000 }, { "epoch": 83.376, "grad_norm": 0.23018641769886017, "learning_rate": 0.00028750978039121565, "loss": 3.2732, "step": 521100 }, { "epoch": 83.392, "grad_norm": 0.16380633413791656, "learning_rate": 0.00028750738029521177, "loss": 3.0776, "step": 521200 }, { "epoch": 83.408, "grad_norm": 0.23235613107681274, "learning_rate": 0.00028750498019920794, "loss": 3.213, "step": 521300 }, { "epoch": 83.424, "grad_norm": 0.18076905608177185, "learning_rate": 0.0002875025801032041, "loss": 3.1943, "step": 521400 }, { "epoch": 83.44, "grad_norm": 0.20028777420520782, "learning_rate": 0.0002875001800072003, "loss": 3.3461, "step": 521500 }, { "epoch": 83.456, "grad_norm": 0.21752874553203583, "learning_rate": 0.00028749777991119644, "loss": 3.3649, "step": 521600 }, { "epoch": 83.472, "grad_norm": 0.17570151388645172, "learning_rate": 0.00028749537981519256, "loss": 3.2366, "step": 521700 }, { "epoch": 83.488, "grad_norm": 0.18130946159362793, "learning_rate": 0.00028749297971918873, "loss": 3.1523, "step": 521800 }, { "epoch": 83.504, "grad_norm": 0.19461384415626526, "learning_rate": 0.0002874905796231849, "loss": 3.0307, "step": 521900 }, { "epoch": 83.52, "grad_norm": 0.20096386969089508, "learning_rate": 0.00028748817952718107, "loss": 3.0693, "step": 522000 }, { "epoch": 83.536, "grad_norm": 0.1872626692056656, "learning_rate": 0.00028748580343213726, "loss": 3.384, "step": 522100 }, { "epoch": 83.552, "grad_norm": 0.16634692251682281, "learning_rate": 0.0002874834033361334, "loss": 3.2468, "step": 522200 }, { "epoch": 83.568, "grad_norm": 0.24804724752902985, "learning_rate": 0.0002874810032401296, "loss": 3.168, "step": 522300 }, { "epoch": 83.584, "grad_norm": 0.17869065701961517, "learning_rate": 0.00028747860314412576, "loss": 3.5781, "step": 522400 }, { "epoch": 83.6, "grad_norm": 0.2136060744524002, "learning_rate": 0.00028747620304812193, "loss": 3.0711, "step": 522500 }, { "epoch": 83.616, "grad_norm": 0.2162410318851471, "learning_rate": 0.00028747380295211805, "loss": 3.2058, "step": 522600 }, { "epoch": 83.632, "grad_norm": 0.1965533345937729, "learning_rate": 0.0002874714028561142, "loss": 3.0725, "step": 522700 }, { "epoch": 83.648, "grad_norm": 0.17811721563339233, "learning_rate": 0.0002874690027601104, "loss": 3.3862, "step": 522800 }, { "epoch": 83.664, "grad_norm": 0.2106947898864746, "learning_rate": 0.00028746660266410656, "loss": 3.5108, "step": 522900 }, { "epoch": 83.68, "grad_norm": 0.20225031673908234, "learning_rate": 0.0002874642025681027, "loss": 3.3002, "step": 523000 }, { "epoch": 83.696, "grad_norm": 0.5723943114280701, "learning_rate": 0.0002874618024720989, "loss": 3.0884, "step": 523100 }, { "epoch": 83.712, "grad_norm": 0.2248970866203308, "learning_rate": 0.000287459402376095, "loss": 3.0272, "step": 523200 }, { "epoch": 83.728, "grad_norm": 0.23147505521774292, "learning_rate": 0.0002874570022800912, "loss": 3.3576, "step": 523300 }, { "epoch": 83.744, "grad_norm": 0.2020985335111618, "learning_rate": 0.00028745460218408735, "loss": 3.233, "step": 523400 }, { "epoch": 83.76, "grad_norm": 0.21133960783481598, "learning_rate": 0.0002874522020880835, "loss": 3.007, "step": 523500 }, { "epoch": 83.776, "grad_norm": 0.21259517967700958, "learning_rate": 0.0002874498019920797, "loss": 3.1849, "step": 523600 }, { "epoch": 83.792, "grad_norm": 0.20451851189136505, "learning_rate": 0.0002874474018960758, "loss": 3.2343, "step": 523700 }, { "epoch": 83.808, "grad_norm": 0.22563092410564423, "learning_rate": 0.00028744500180007197, "loss": 3.173, "step": 523800 }, { "epoch": 83.824, "grad_norm": 0.2254587560892105, "learning_rate": 0.00028744260170406814, "loss": 3.2403, "step": 523900 }, { "epoch": 83.84, "grad_norm": 0.178816556930542, "learning_rate": 0.0002874402016080643, "loss": 3.2577, "step": 524000 }, { "epoch": 83.856, "grad_norm": 0.21213556826114655, "learning_rate": 0.0002874378015120605, "loss": 3.0816, "step": 524100 }, { "epoch": 83.872, "grad_norm": 0.2274441123008728, "learning_rate": 0.00028743540141605665, "loss": 3.2086, "step": 524200 }, { "epoch": 83.888, "grad_norm": 0.1957772672176361, "learning_rate": 0.00028743300132005276, "loss": 3.3279, "step": 524300 }, { "epoch": 83.904, "grad_norm": 0.17492558062076569, "learning_rate": 0.00028743062522500895, "loss": 3.3701, "step": 524400 }, { "epoch": 83.92, "grad_norm": 0.22368937730789185, "learning_rate": 0.0002874282251290051, "loss": 3.1128, "step": 524500 }, { "epoch": 83.936, "grad_norm": 0.19607572257518768, "learning_rate": 0.0002874258250330013, "loss": 3.1852, "step": 524600 }, { "epoch": 83.952, "grad_norm": 0.18495967984199524, "learning_rate": 0.00028742342493699746, "loss": 3.7221, "step": 524700 }, { "epoch": 83.968, "grad_norm": 0.19189587235450745, "learning_rate": 0.0002874210248409936, "loss": 3.3112, "step": 524800 }, { "epoch": 83.984, "grad_norm": 0.21571657061576843, "learning_rate": 0.0002874186247449898, "loss": 3.3591, "step": 524900 }, { "epoch": 84.0, "grad_norm": 0.17699305713176727, "learning_rate": 0.00028741622464898596, "loss": 3.0737, "step": 525000 }, { "epoch": 84.016, "grad_norm": 0.20667868852615356, "learning_rate": 0.00028741382455298213, "loss": 3.3039, "step": 525100 }, { "epoch": 84.032, "grad_norm": 0.20704124867916107, "learning_rate": 0.00028741142445697825, "loss": 3.216, "step": 525200 }, { "epoch": 84.048, "grad_norm": 0.263192355632782, "learning_rate": 0.0002874090243609744, "loss": 3.1375, "step": 525300 }, { "epoch": 84.064, "grad_norm": 0.20215116441249847, "learning_rate": 0.0002874066242649706, "loss": 3.0864, "step": 525400 }, { "epoch": 84.08, "grad_norm": 0.24419277906417847, "learning_rate": 0.00028740422416896676, "loss": 3.1263, "step": 525500 }, { "epoch": 84.096, "grad_norm": 0.20155009627342224, "learning_rate": 0.0002874018240729629, "loss": 3.2928, "step": 525600 }, { "epoch": 84.112, "grad_norm": 0.18789434432983398, "learning_rate": 0.00028739942397695904, "loss": 3.3552, "step": 525700 }, { "epoch": 84.128, "grad_norm": 0.22908055782318115, "learning_rate": 0.0002873970238809552, "loss": 2.9903, "step": 525800 }, { "epoch": 84.144, "grad_norm": 0.21323330700397491, "learning_rate": 0.0002873946237849514, "loss": 3.1708, "step": 525900 }, { "epoch": 84.16, "grad_norm": 0.1927606612443924, "learning_rate": 0.00028739222368894755, "loss": 3.3335, "step": 526000 }, { "epoch": 84.176, "grad_norm": 0.19760139286518097, "learning_rate": 0.0002873898235929437, "loss": 3.2518, "step": 526100 }, { "epoch": 84.192, "grad_norm": 0.22990845143795013, "learning_rate": 0.0002873874234969399, "loss": 3.2377, "step": 526200 }, { "epoch": 84.208, "grad_norm": 0.18854781985282898, "learning_rate": 0.000287385023400936, "loss": 3.2518, "step": 526300 }, { "epoch": 84.224, "grad_norm": 0.23988498747348785, "learning_rate": 0.00028738262330493217, "loss": 2.9211, "step": 526400 }, { "epoch": 84.24, "grad_norm": 0.18887266516685486, "learning_rate": 0.00028738022320892834, "loss": 3.2192, "step": 526500 }, { "epoch": 84.256, "grad_norm": 0.18886294960975647, "learning_rate": 0.0002873778231129245, "loss": 3.2911, "step": 526600 }, { "epoch": 84.272, "grad_norm": 0.2305757701396942, "learning_rate": 0.0002873754230169207, "loss": 3.1635, "step": 526700 }, { "epoch": 84.288, "grad_norm": 0.19261591136455536, "learning_rate": 0.0002873730229209168, "loss": 3.1229, "step": 526800 }, { "epoch": 84.304, "grad_norm": 0.2184566706418991, "learning_rate": 0.000287370646825873, "loss": 2.9707, "step": 526900 }, { "epoch": 84.32, "grad_norm": 0.18747203052043915, "learning_rate": 0.00028736824672986915, "loss": 3.0775, "step": 527000 }, { "epoch": 84.336, "grad_norm": 0.1831730604171753, "learning_rate": 0.0002873658466338653, "loss": 3.3825, "step": 527100 }, { "epoch": 84.352, "grad_norm": 0.19208945333957672, "learning_rate": 0.0002873634465378615, "loss": 3.2277, "step": 527200 }, { "epoch": 84.368, "grad_norm": 0.22876760363578796, "learning_rate": 0.00028736104644185766, "loss": 3.1705, "step": 527300 }, { "epoch": 84.384, "grad_norm": 0.19228973984718323, "learning_rate": 0.0002873586463458538, "loss": 3.4561, "step": 527400 }, { "epoch": 84.4, "grad_norm": 0.17882364988327026, "learning_rate": 0.00028735624624984994, "loss": 3.1734, "step": 527500 }, { "epoch": 84.416, "grad_norm": 0.22104524075984955, "learning_rate": 0.0002873538461538461, "loss": 3.334, "step": 527600 }, { "epoch": 84.432, "grad_norm": 0.21842697262763977, "learning_rate": 0.0002873514460578423, "loss": 3.4256, "step": 527700 }, { "epoch": 84.448, "grad_norm": 0.2387317270040512, "learning_rate": 0.00028734904596183845, "loss": 3.3382, "step": 527800 }, { "epoch": 84.464, "grad_norm": 0.25075796246528625, "learning_rate": 0.0002873466458658346, "loss": 3.1976, "step": 527900 }, { "epoch": 84.48, "grad_norm": 0.22377777099609375, "learning_rate": 0.0002873442457698308, "loss": 3.4186, "step": 528000 }, { "epoch": 84.496, "grad_norm": 0.1857343167066574, "learning_rate": 0.00028734184567382696, "loss": 3.145, "step": 528100 }, { "epoch": 84.512, "grad_norm": 0.1752808690071106, "learning_rate": 0.00028733944557782313, "loss": 3.3125, "step": 528200 }, { "epoch": 84.528, "grad_norm": 0.20930412411689758, "learning_rate": 0.00028733704548181924, "loss": 3.3029, "step": 528300 }, { "epoch": 84.544, "grad_norm": 0.1968115121126175, "learning_rate": 0.0002873346453858154, "loss": 3.4598, "step": 528400 }, { "epoch": 84.56, "grad_norm": 0.20745854079723358, "learning_rate": 0.0002873322452898116, "loss": 3.1335, "step": 528500 }, { "epoch": 84.576, "grad_norm": 0.19791452586650848, "learning_rate": 0.00028732984519380775, "loss": 3.3167, "step": 528600 }, { "epoch": 84.592, "grad_norm": 0.19748631119728088, "learning_rate": 0.0002873274450978039, "loss": 3.4829, "step": 528700 }, { "epoch": 84.608, "grad_norm": 0.22433574497699738, "learning_rate": 0.00028732504500180003, "loss": 3.4244, "step": 528800 }, { "epoch": 84.624, "grad_norm": 0.23089340329170227, "learning_rate": 0.0002873226449057962, "loss": 3.1445, "step": 528900 }, { "epoch": 84.64, "grad_norm": 0.24120698869228363, "learning_rate": 0.00028732024480979237, "loss": 3.3049, "step": 529000 }, { "epoch": 84.656, "grad_norm": 0.15573149919509888, "learning_rate": 0.00028731784471378854, "loss": 3.0518, "step": 529100 }, { "epoch": 84.672, "grad_norm": 0.17704454064369202, "learning_rate": 0.0002873154446177847, "loss": 3.3423, "step": 529200 }, { "epoch": 84.688, "grad_norm": 0.21727091073989868, "learning_rate": 0.0002873130445217809, "loss": 3.2377, "step": 529300 }, { "epoch": 84.704, "grad_norm": 0.205272376537323, "learning_rate": 0.000287310644425777, "loss": 3.2431, "step": 529400 }, { "epoch": 84.72, "grad_norm": 0.21376042068004608, "learning_rate": 0.0002873082683307332, "loss": 3.4298, "step": 529500 }, { "epoch": 84.736, "grad_norm": 0.1962851732969284, "learning_rate": 0.00028730586823472935, "loss": 3.152, "step": 529600 }, { "epoch": 84.752, "grad_norm": 0.1770128756761551, "learning_rate": 0.0002873034681387255, "loss": 3.3572, "step": 529700 }, { "epoch": 84.768, "grad_norm": 0.19204385578632355, "learning_rate": 0.0002873010680427217, "loss": 3.2751, "step": 529800 }, { "epoch": 84.784, "grad_norm": 0.21514277160167694, "learning_rate": 0.0002872986679467178, "loss": 3.56, "step": 529900 }, { "epoch": 84.8, "grad_norm": 0.17253080010414124, "learning_rate": 0.000287296267850714, "loss": 3.1124, "step": 530000 }, { "epoch": 84.816, "grad_norm": 0.21355006098747253, "learning_rate": 0.00028729386775471014, "loss": 3.3986, "step": 530100 }, { "epoch": 84.832, "grad_norm": 0.17842072248458862, "learning_rate": 0.0002872914676587063, "loss": 3.3995, "step": 530200 }, { "epoch": 84.848, "grad_norm": 0.2089017778635025, "learning_rate": 0.0002872890675627025, "loss": 3.2692, "step": 530300 }, { "epoch": 84.864, "grad_norm": 0.24470074474811554, "learning_rate": 0.00028728666746669865, "loss": 3.3686, "step": 530400 }, { "epoch": 84.88, "grad_norm": 0.24530580639839172, "learning_rate": 0.00028728426737069477, "loss": 3.2916, "step": 530500 }, { "epoch": 84.896, "grad_norm": 0.1917545646429062, "learning_rate": 0.00028728186727469094, "loss": 3.1407, "step": 530600 }, { "epoch": 84.912, "grad_norm": 0.2069588005542755, "learning_rate": 0.0002872794671786871, "loss": 3.4391, "step": 530700 }, { "epoch": 84.928, "grad_norm": 0.1923648566007614, "learning_rate": 0.0002872770670826833, "loss": 3.373, "step": 530800 }, { "epoch": 84.944, "grad_norm": 0.19100335240364075, "learning_rate": 0.00028727466698667944, "loss": 3.0401, "step": 530900 }, { "epoch": 84.96, "grad_norm": 0.17429256439208984, "learning_rate": 0.0002872722668906756, "loss": 3.2214, "step": 531000 }, { "epoch": 84.976, "grad_norm": 0.20314279198646545, "learning_rate": 0.0002872698667946718, "loss": 3.304, "step": 531100 }, { "epoch": 84.992, "grad_norm": 0.19170233607292175, "learning_rate": 0.00028726746669866795, "loss": 3.1636, "step": 531200 }, { "epoch": 85.008, "grad_norm": 0.19781112670898438, "learning_rate": 0.0002872650666026641, "loss": 3.5191, "step": 531300 }, { "epoch": 85.024, "grad_norm": 0.22758899629116058, "learning_rate": 0.00028726266650666024, "loss": 3.3086, "step": 531400 }, { "epoch": 85.04, "grad_norm": NaN, "learning_rate": 0.0002872602664106564, "loss": 3.2517, "step": 531500 }, { "epoch": 85.056, "grad_norm": 0.20222727954387665, "learning_rate": 0.00028725791431657267, "loss": 3.3836, "step": 531600 }, { "epoch": 85.072, "grad_norm": 0.2079700082540512, "learning_rate": 0.0002872555142205688, "loss": 3.3149, "step": 531700 }, { "epoch": 85.088, "grad_norm": 0.1966814249753952, "learning_rate": 0.00028725311412456495, "loss": 3.2101, "step": 531800 }, { "epoch": 85.104, "grad_norm": 0.20170478522777557, "learning_rate": 0.0002872507140285611, "loss": 3.4396, "step": 531900 }, { "epoch": 85.12, "grad_norm": 0.23679381608963013, "learning_rate": 0.0002872483139325573, "loss": 3.4152, "step": 532000 }, { "epoch": 85.136, "grad_norm": 0.23783373832702637, "learning_rate": 0.00028724591383655346, "loss": 3.2319, "step": 532100 }, { "epoch": 85.152, "grad_norm": 0.20446625351905823, "learning_rate": 0.00028724351374054963, "loss": 3.5225, "step": 532200 }, { "epoch": 85.168, "grad_norm": 0.24894802272319794, "learning_rate": 0.00028724111364454574, "loss": 3.1607, "step": 532300 }, { "epoch": 85.184, "grad_norm": 0.3036673069000244, "learning_rate": 0.0002872387135485419, "loss": 3.2529, "step": 532400 }, { "epoch": 85.2, "grad_norm": 0.17959702014923096, "learning_rate": 0.0002872363134525381, "loss": 3.0348, "step": 532500 }, { "epoch": 85.216, "grad_norm": 0.21105308830738068, "learning_rate": 0.00028723391335653425, "loss": 3.4639, "step": 532600 }, { "epoch": 85.232, "grad_norm": 0.20549790561199188, "learning_rate": 0.0002872315132605304, "loss": 3.5864, "step": 532700 }, { "epoch": 85.248, "grad_norm": 0.2244565337896347, "learning_rate": 0.00028722911316452653, "loss": 3.728, "step": 532800 }, { "epoch": 85.264, "grad_norm": 0.20545077323913574, "learning_rate": 0.0002872267130685227, "loss": 3.2047, "step": 532900 }, { "epoch": 85.28, "grad_norm": 0.225424662232399, "learning_rate": 0.00028722431297251887, "loss": 3.144, "step": 533000 }, { "epoch": 85.296, "grad_norm": 0.23552089929580688, "learning_rate": 0.00028722191287651504, "loss": 3.5472, "step": 533100 }, { "epoch": 85.312, "grad_norm": 0.22086293995380402, "learning_rate": 0.0002872195127805112, "loss": 3.1677, "step": 533200 }, { "epoch": 85.328, "grad_norm": 0.23567910492420197, "learning_rate": 0.0002872171126845074, "loss": 3.5578, "step": 533300 }, { "epoch": 85.344, "grad_norm": 0.21976234018802643, "learning_rate": 0.0002872147125885035, "loss": 3.3528, "step": 533400 }, { "epoch": 85.36, "grad_norm": 0.2562665343284607, "learning_rate": 0.00028721231249249966, "loss": 3.2932, "step": 533500 }, { "epoch": 85.376, "grad_norm": 0.19112057983875275, "learning_rate": 0.00028720991239649583, "loss": 3.0792, "step": 533600 }, { "epoch": 85.392, "grad_norm": 0.19445466995239258, "learning_rate": 0.000287207512300492, "loss": 3.2365, "step": 533700 }, { "epoch": 85.408, "grad_norm": 0.20931798219680786, "learning_rate": 0.00028720511220448817, "loss": 3.4525, "step": 533800 }, { "epoch": 85.424, "grad_norm": 0.20011837780475616, "learning_rate": 0.0002872027121084843, "loss": 3.2703, "step": 533900 }, { "epoch": 85.44, "grad_norm": 0.24512815475463867, "learning_rate": 0.00028720031201248046, "loss": 3.2141, "step": 534000 }, { "epoch": 85.456, "grad_norm": 0.17623035609722137, "learning_rate": 0.0002871979119164766, "loss": 3.0301, "step": 534100 }, { "epoch": 85.472, "grad_norm": 0.16736280918121338, "learning_rate": 0.0002871955118204728, "loss": 3.23, "step": 534200 }, { "epoch": 85.488, "grad_norm": 0.20526567101478577, "learning_rate": 0.00028719311172446896, "loss": 3.3342, "step": 534300 }, { "epoch": 85.504, "grad_norm": 0.17738397419452667, "learning_rate": 0.00028719073562942515, "loss": 3.4953, "step": 534400 }, { "epoch": 85.52, "grad_norm": 0.18479827046394348, "learning_rate": 0.0002871883355334213, "loss": 3.4641, "step": 534500 }, { "epoch": 85.536, "grad_norm": 0.2121117264032364, "learning_rate": 0.0002871859354374175, "loss": 3.0788, "step": 534600 }, { "epoch": 85.552, "grad_norm": 0.19677281379699707, "learning_rate": 0.00028718353534141366, "loss": 3.3425, "step": 534700 }, { "epoch": 85.568, "grad_norm": 0.22496220469474792, "learning_rate": 0.0002871811352454098, "loss": 3.1667, "step": 534800 }, { "epoch": 85.584, "grad_norm": 0.21340025961399078, "learning_rate": 0.00028717873514940594, "loss": 3.2971, "step": 534900 }, { "epoch": 85.6, "grad_norm": 0.22133372724056244, "learning_rate": 0.0002871763350534021, "loss": 3.4563, "step": 535000 }, { "epoch": 85.616, "grad_norm": 0.21510477364063263, "learning_rate": 0.0002871739349573983, "loss": 3.0348, "step": 535100 }, { "epoch": 85.632, "grad_norm": 0.20055602490901947, "learning_rate": 0.00028717153486139445, "loss": 3.272, "step": 535200 }, { "epoch": 85.648, "grad_norm": 0.2368495613336563, "learning_rate": 0.0002871691347653906, "loss": 3.2747, "step": 535300 }, { "epoch": 85.664, "grad_norm": 0.17874714732170105, "learning_rate": 0.00028716673466938674, "loss": 3.2368, "step": 535400 }, { "epoch": 85.68, "grad_norm": 0.21435953676700592, "learning_rate": 0.0002871643345733829, "loss": 3.3286, "step": 535500 }, { "epoch": 85.696, "grad_norm": 0.19888566434383392, "learning_rate": 0.0002871619344773791, "loss": 3.0821, "step": 535600 }, { "epoch": 85.712, "grad_norm": 0.18102145195007324, "learning_rate": 0.00028715953438137524, "loss": 3.0583, "step": 535700 }, { "epoch": 85.728, "grad_norm": 0.19459478557109833, "learning_rate": 0.0002871571342853714, "loss": 3.116, "step": 535800 }, { "epoch": 85.744, "grad_norm": 0.19962053000926971, "learning_rate": 0.00028715473418936753, "loss": 3.1918, "step": 535900 }, { "epoch": 85.76, "grad_norm": 0.21262916922569275, "learning_rate": 0.0002871523340933637, "loss": 3.3905, "step": 536000 }, { "epoch": 85.776, "grad_norm": 0.20015409588813782, "learning_rate": 0.00028714993399735987, "loss": 3.4413, "step": 536100 }, { "epoch": 85.792, "grad_norm": 0.23237958550453186, "learning_rate": 0.00028714753390135603, "loss": 3.3335, "step": 536200 }, { "epoch": 85.808, "grad_norm": 0.21179480850696564, "learning_rate": 0.0002871451338053522, "loss": 2.9445, "step": 536300 }, { "epoch": 85.824, "grad_norm": 0.20587605237960815, "learning_rate": 0.0002871427337093484, "loss": 3.2058, "step": 536400 }, { "epoch": 85.84, "grad_norm": 0.18819034099578857, "learning_rate": 0.0002871403336133445, "loss": 3.142, "step": 536500 }, { "epoch": 85.856, "grad_norm": 0.17988522350788116, "learning_rate": 0.00028713793351734066, "loss": 3.3494, "step": 536600 }, { "epoch": 85.872, "grad_norm": 0.2129397690296173, "learning_rate": 0.0002871355334213368, "loss": 3.2698, "step": 536700 }, { "epoch": 85.888, "grad_norm": 0.20438863337039948, "learning_rate": 0.000287133133325333, "loss": 3.4673, "step": 536800 }, { "epoch": 85.904, "grad_norm": 0.20455360412597656, "learning_rate": 0.00028713073322932917, "loss": 3.1431, "step": 536900 }, { "epoch": 85.92, "grad_norm": 0.18095311522483826, "learning_rate": 0.0002871283331333253, "loss": 3.1251, "step": 537000 }, { "epoch": 85.936, "grad_norm": 0.1949184685945511, "learning_rate": 0.00028712593303732145, "loss": 3.3952, "step": 537100 }, { "epoch": 85.952, "grad_norm": 0.21244573593139648, "learning_rate": 0.0002871235329413176, "loss": 3.1659, "step": 537200 }, { "epoch": 85.968, "grad_norm": 0.2426280379295349, "learning_rate": 0.0002871211328453138, "loss": 3.4424, "step": 537300 }, { "epoch": 85.984, "grad_norm": 0.18393172323703766, "learning_rate": 0.00028711873274930996, "loss": 3.0359, "step": 537400 }, { "epoch": 86.0, "grad_norm": 0.19259412586688995, "learning_rate": 0.0002871163326533061, "loss": 3.3232, "step": 537500 }, { "epoch": 86.016, "grad_norm": 0.20468507707118988, "learning_rate": 0.00028711393255730224, "loss": 3.4292, "step": 537600 }, { "epoch": 86.032, "grad_norm": 0.23636262118816376, "learning_rate": 0.0002871115324612984, "loss": 3.2483, "step": 537700 }, { "epoch": 86.048, "grad_norm": 0.20133018493652344, "learning_rate": 0.0002871091323652946, "loss": 3.1816, "step": 537800 }, { "epoch": 86.064, "grad_norm": 0.20160752534866333, "learning_rate": 0.00028710673226929075, "loss": 2.9611, "step": 537900 }, { "epoch": 86.08, "grad_norm": 0.21956025063991547, "learning_rate": 0.0002871043321732869, "loss": 3.5264, "step": 538000 }, { "epoch": 86.096, "grad_norm": 0.21858000755310059, "learning_rate": 0.00028710193207728303, "loss": 3.2469, "step": 538100 }, { "epoch": 86.112, "grad_norm": 0.1866752654314041, "learning_rate": 0.0002870995319812792, "loss": 3.6676, "step": 538200 }, { "epoch": 86.128, "grad_norm": 0.18674691021442413, "learning_rate": 0.00028709713188527537, "loss": 3.1793, "step": 538300 }, { "epoch": 86.144, "grad_norm": 0.21549218893051147, "learning_rate": 0.00028709473178927154, "loss": 3.1231, "step": 538400 }, { "epoch": 86.16, "grad_norm": 0.2138526439666748, "learning_rate": 0.0002870923316932677, "loss": 3.2673, "step": 538500 }, { "epoch": 86.176, "grad_norm": 0.18383406102657318, "learning_rate": 0.0002870899315972639, "loss": 3.2912, "step": 538600 }, { "epoch": 86.192, "grad_norm": 0.20791390538215637, "learning_rate": 0.00028708753150126, "loss": 3.1312, "step": 538700 }, { "epoch": 86.208, "grad_norm": 0.20646317303180695, "learning_rate": 0.00028708515540621624, "loss": 3.3704, "step": 538800 }, { "epoch": 86.224, "grad_norm": 0.2005782276391983, "learning_rate": 0.0002870827553102124, "loss": 3.4787, "step": 538900 }, { "epoch": 86.24, "grad_norm": 0.23840229213237762, "learning_rate": 0.0002870803552142085, "loss": 3.3735, "step": 539000 }, { "epoch": 86.256, "grad_norm": 0.20217959582805634, "learning_rate": 0.0002870779551182047, "loss": 3.5897, "step": 539100 }, { "epoch": 86.272, "grad_norm": 0.2288231998682022, "learning_rate": 0.00028707555502220086, "loss": 3.3686, "step": 539200 }, { "epoch": 86.288, "grad_norm": 0.21632249653339386, "learning_rate": 0.00028707315492619703, "loss": 3.5448, "step": 539300 }, { "epoch": 86.304, "grad_norm": 0.2382534146308899, "learning_rate": 0.0002870707548301932, "loss": 3.4139, "step": 539400 }, { "epoch": 86.32, "grad_norm": 0.20966507494449615, "learning_rate": 0.00028706835473418937, "loss": 3.1526, "step": 539500 }, { "epoch": 86.336, "grad_norm": 0.20221956074237823, "learning_rate": 0.0002870659546381855, "loss": 3.1235, "step": 539600 }, { "epoch": 86.352, "grad_norm": 0.23274236917495728, "learning_rate": 0.00028706355454218165, "loss": 3.1158, "step": 539700 }, { "epoch": 86.368, "grad_norm": 0.21134445071220398, "learning_rate": 0.0002870611544461778, "loss": 3.071, "step": 539800 }, { "epoch": 86.384, "grad_norm": 0.20170465111732483, "learning_rate": 0.000287058754350174, "loss": 3.2235, "step": 539900 }, { "epoch": 86.4, "grad_norm": 0.22747117280960083, "learning_rate": 0.00028705635425417016, "loss": 3.4075, "step": 540000 }, { "epoch": 86.416, "grad_norm": 0.2059839367866516, "learning_rate": 0.0002870539541581663, "loss": 3.1707, "step": 540100 }, { "epoch": 86.432, "grad_norm": 0.21066737174987793, "learning_rate": 0.00028705155406216244, "loss": 3.3065, "step": 540200 }, { "epoch": 86.448, "grad_norm": 0.1852363795042038, "learning_rate": 0.0002870491539661586, "loss": 3.1258, "step": 540300 }, { "epoch": 86.464, "grad_norm": 0.20648455619812012, "learning_rate": 0.0002870467538701548, "loss": 3.2242, "step": 540400 }, { "epoch": 86.48, "grad_norm": 0.20014068484306335, "learning_rate": 0.00028704435377415095, "loss": 3.3507, "step": 540500 }, { "epoch": 86.496, "grad_norm": 0.1735866814851761, "learning_rate": 0.0002870419536781471, "loss": 3.2118, "step": 540600 }, { "epoch": 86.512, "grad_norm": 0.22751572728157043, "learning_rate": 0.00028703955358214323, "loss": 3.2115, "step": 540700 }, { "epoch": 86.528, "grad_norm": 0.24867379665374756, "learning_rate": 0.0002870371534861394, "loss": 3.2197, "step": 540800 }, { "epoch": 86.544, "grad_norm": 0.2214605212211609, "learning_rate": 0.00028703475339013557, "loss": 3.3003, "step": 540900 }, { "epoch": 86.56, "grad_norm": 0.2335253208875656, "learning_rate": 0.00028703235329413174, "loss": 3.0799, "step": 541000 }, { "epoch": 86.576, "grad_norm": 0.16858947277069092, "learning_rate": 0.0002870299531981279, "loss": 3.1541, "step": 541100 }, { "epoch": 86.592, "grad_norm": 0.2526245415210724, "learning_rate": 0.000287027553102124, "loss": 3.3782, "step": 541200 }, { "epoch": 86.608, "grad_norm": 0.19723577797412872, "learning_rate": 0.0002870251530061202, "loss": 3.0296, "step": 541300 }, { "epoch": 86.624, "grad_norm": 0.22654692828655243, "learning_rate": 0.00028702275291011636, "loss": 3.801, "step": 541400 }, { "epoch": 86.64, "grad_norm": 0.21284376084804535, "learning_rate": 0.00028702035281411253, "loss": 3.239, "step": 541500 }, { "epoch": 86.656, "grad_norm": 0.21220315992832184, "learning_rate": 0.0002870179527181087, "loss": 3.3104, "step": 541600 }, { "epoch": 86.672, "grad_norm": 0.2092636078596115, "learning_rate": 0.00028701555262210487, "loss": 3.4603, "step": 541700 }, { "epoch": 86.688, "grad_norm": 0.17866002023220062, "learning_rate": 0.000287013152526101, "loss": 3.6214, "step": 541800 }, { "epoch": 86.704, "grad_norm": 0.21893556416034698, "learning_rate": 0.00028701075243009716, "loss": 3.1274, "step": 541900 }, { "epoch": 86.72, "grad_norm": 0.1763399988412857, "learning_rate": 0.0002870083523340933, "loss": 3.2042, "step": 542000 }, { "epoch": 86.736, "grad_norm": 0.26390764117240906, "learning_rate": 0.0002870059522380895, "loss": 3.3091, "step": 542100 }, { "epoch": 86.752, "grad_norm": 0.16697712242603302, "learning_rate": 0.00028700355214208566, "loss": 3.3578, "step": 542200 }, { "epoch": 86.768, "grad_norm": 0.17967242002487183, "learning_rate": 0.00028700115204608183, "loss": 3.2105, "step": 542300 }, { "epoch": 86.784, "grad_norm": 0.23839950561523438, "learning_rate": 0.00028699875195007795, "loss": 3.2547, "step": 542400 }, { "epoch": 86.8, "grad_norm": 0.20231382548809052, "learning_rate": 0.0002869963518540741, "loss": 3.1961, "step": 542500 }, { "epoch": 86.816, "grad_norm": 0.23946474492549896, "learning_rate": 0.0002869939517580703, "loss": 3.0309, "step": 542600 }, { "epoch": 86.832, "grad_norm": 0.18490612506866455, "learning_rate": 0.00028699155166206646, "loss": 3.5014, "step": 542700 }, { "epoch": 86.848, "grad_norm": 0.18643441796302795, "learning_rate": 0.0002869891515660626, "loss": 3.365, "step": 542800 }, { "epoch": 86.864, "grad_norm": 0.21100588142871857, "learning_rate": 0.0002869867754710188, "loss": 3.2663, "step": 542900 }, { "epoch": 86.88, "grad_norm": 0.24017682671546936, "learning_rate": 0.000286984399375975, "loss": 3.3303, "step": 543000 }, { "epoch": 86.896, "grad_norm": 0.22533200681209564, "learning_rate": 0.00028698199927997117, "loss": 3.3681, "step": 543100 }, { "epoch": 86.912, "grad_norm": 0.18410493433475494, "learning_rate": 0.00028697959918396734, "loss": 3.4944, "step": 543200 }, { "epoch": 86.928, "grad_norm": 0.20693916082382202, "learning_rate": 0.0002869771990879635, "loss": 3.2483, "step": 543300 }, { "epoch": 86.944, "grad_norm": 0.20193831622600555, "learning_rate": 0.0002869747989919597, "loss": 3.2409, "step": 543400 }, { "epoch": 86.96, "grad_norm": 0.15906453132629395, "learning_rate": 0.00028697239889595585, "loss": 3.307, "step": 543500 }, { "epoch": 86.976, "grad_norm": 0.17312976717948914, "learning_rate": 0.00028696999879995196, "loss": 3.1739, "step": 543600 }, { "epoch": 86.992, "grad_norm": 0.2044171243906021, "learning_rate": 0.00028696759870394813, "loss": 3.3541, "step": 543700 }, { "epoch": 87.008, "grad_norm": 0.246306911110878, "learning_rate": 0.0002869651986079443, "loss": 3.0882, "step": 543800 }, { "epoch": 87.024, "grad_norm": 0.17726992070674896, "learning_rate": 0.00028696279851194047, "loss": 3.277, "step": 543900 }, { "epoch": 87.04, "grad_norm": 0.2584676444530487, "learning_rate": 0.00028696039841593664, "loss": 3.2273, "step": 544000 }, { "epoch": 87.056, "grad_norm": 0.2379465401172638, "learning_rate": 0.0002869580223208928, "loss": 3.2321, "step": 544100 }, { "epoch": 87.072, "grad_norm": 0.19023142755031586, "learning_rate": 0.00028695562222488894, "loss": 3.1276, "step": 544200 }, { "epoch": 87.088, "grad_norm": 0.21502317488193512, "learning_rate": 0.0002869532221288851, "loss": 3.3905, "step": 544300 }, { "epoch": 87.104, "grad_norm": 0.17801696062088013, "learning_rate": 0.0002869508220328813, "loss": 3.2723, "step": 544400 }, { "epoch": 87.12, "grad_norm": 0.23759901523590088, "learning_rate": 0.00028694842193687745, "loss": 3.0984, "step": 544500 }, { "epoch": 87.136, "grad_norm": 0.1929488629102707, "learning_rate": 0.0002869460218408736, "loss": 3.4604, "step": 544600 }, { "epoch": 87.152, "grad_norm": 0.21162709593772888, "learning_rate": 0.00028694362174486973, "loss": 3.1588, "step": 544700 }, { "epoch": 87.168, "grad_norm": 0.2205047905445099, "learning_rate": 0.0002869412216488659, "loss": 3.3883, "step": 544800 }, { "epoch": 87.184, "grad_norm": 0.2095824033021927, "learning_rate": 0.00028693882155286207, "loss": 3.3993, "step": 544900 }, { "epoch": 87.2, "grad_norm": 0.18567369878292084, "learning_rate": 0.00028693642145685824, "loss": 2.9783, "step": 545000 }, { "epoch": 87.216, "grad_norm": 0.22532936930656433, "learning_rate": 0.0002869340213608544, "loss": 3.1794, "step": 545100 }, { "epoch": 87.232, "grad_norm": 0.18840619921684265, "learning_rate": 0.0002869316212648505, "loss": 3.1162, "step": 545200 }, { "epoch": 87.248, "grad_norm": 0.178130641579628, "learning_rate": 0.0002869292211688467, "loss": 3.4002, "step": 545300 }, { "epoch": 87.264, "grad_norm": 0.20249220728874207, "learning_rate": 0.0002869268210728429, "loss": 3.5629, "step": 545400 }, { "epoch": 87.28, "grad_norm": 0.21021591126918793, "learning_rate": 0.0002869244209768391, "loss": 3.111, "step": 545500 }, { "epoch": 87.296, "grad_norm": 0.21554452180862427, "learning_rate": 0.0002869220208808352, "loss": 3.2556, "step": 545600 }, { "epoch": 87.312, "grad_norm": 0.19324743747711182, "learning_rate": 0.00028691962078483137, "loss": 3.0774, "step": 545700 }, { "epoch": 87.328, "grad_norm": 0.18971982598304749, "learning_rate": 0.00028691722068882754, "loss": 3.3242, "step": 545800 }, { "epoch": 87.344, "grad_norm": 0.18767528235912323, "learning_rate": 0.0002869148205928237, "loss": 3.2602, "step": 545900 }, { "epoch": 87.36, "grad_norm": 0.216440811753273, "learning_rate": 0.0002869124204968199, "loss": 3.212, "step": 546000 }, { "epoch": 87.376, "grad_norm": 0.20034517347812653, "learning_rate": 0.000286910020400816, "loss": 3.0946, "step": 546100 }, { "epoch": 87.392, "grad_norm": 0.22182875871658325, "learning_rate": 0.00028690762030481216, "loss": 3.0911, "step": 546200 }, { "epoch": 87.408, "grad_norm": 0.17026901245117188, "learning_rate": 0.00028690522020880833, "loss": 3.4174, "step": 546300 }, { "epoch": 87.424, "grad_norm": 0.18412840366363525, "learning_rate": 0.0002869028201128045, "loss": 3.1573, "step": 546400 }, { "epoch": 87.44, "grad_norm": 0.21057473123073578, "learning_rate": 0.00028690042001680067, "loss": 3.6449, "step": 546500 }, { "epoch": 87.456, "grad_norm": 0.18746419250965118, "learning_rate": 0.00028689801992079684, "loss": 3.2777, "step": 546600 }, { "epoch": 87.472, "grad_norm": 0.19062082469463348, "learning_rate": 0.00028689561982479296, "loss": 3.5323, "step": 546700 }, { "epoch": 87.488, "grad_norm": 0.21819601953029633, "learning_rate": 0.0002868932197287891, "loss": 3.547, "step": 546800 }, { "epoch": 87.504, "grad_norm": 0.2425980567932129, "learning_rate": 0.0002868908196327853, "loss": 2.9846, "step": 546900 }, { "epoch": 87.52, "grad_norm": 0.18173064291477203, "learning_rate": 0.0002868884435377415, "loss": 3.2512, "step": 547000 }, { "epoch": 87.536, "grad_norm": 0.20432589948177338, "learning_rate": 0.00028688604344173765, "loss": 3.3058, "step": 547100 }, { "epoch": 87.552, "grad_norm": 0.186666339635849, "learning_rate": 0.00028688364334573377, "loss": 3.0741, "step": 547200 }, { "epoch": 87.568, "grad_norm": 0.20140017569065094, "learning_rate": 0.00028688124324972994, "loss": 3.4437, "step": 547300 }, { "epoch": 87.584, "grad_norm": 0.24884779751300812, "learning_rate": 0.0002868788431537261, "loss": 3.1666, "step": 547400 }, { "epoch": 87.6, "grad_norm": 0.18325550854206085, "learning_rate": 0.0002868764430577223, "loss": 3.2467, "step": 547500 }, { "epoch": 87.616, "grad_norm": 0.18189637362957, "learning_rate": 0.00028687404296171844, "loss": 3.3981, "step": 547600 }, { "epoch": 87.632, "grad_norm": 0.19151942431926727, "learning_rate": 0.0002868716428657146, "loss": 3.5749, "step": 547700 }, { "epoch": 87.648, "grad_norm": 0.2085239142179489, "learning_rate": 0.00028686924276971073, "loss": 3.3246, "step": 547800 }, { "epoch": 87.664, "grad_norm": 0.2129281610250473, "learning_rate": 0.0002868668426737069, "loss": 3.4371, "step": 547900 }, { "epoch": 87.68, "grad_norm": 0.1811116635799408, "learning_rate": 0.00028686444257770307, "loss": 3.0997, "step": 548000 }, { "epoch": 87.696, "grad_norm": 0.23394188284873962, "learning_rate": 0.00028686204248169923, "loss": 3.2122, "step": 548100 }, { "epoch": 87.712, "grad_norm": 0.20377294719219208, "learning_rate": 0.0002868596423856954, "loss": 3.5422, "step": 548200 }, { "epoch": 87.728, "grad_norm": 0.2334965616464615, "learning_rate": 0.0002868572422896916, "loss": 3.2197, "step": 548300 }, { "epoch": 87.744, "grad_norm": 0.18401199579238892, "learning_rate": 0.0002868548421936877, "loss": 3.4013, "step": 548400 }, { "epoch": 87.76, "grad_norm": 0.2148619294166565, "learning_rate": 0.0002868524420976839, "loss": 3.4263, "step": 548500 }, { "epoch": 87.776, "grad_norm": 0.1931534856557846, "learning_rate": 0.0002868500420016801, "loss": 3.3653, "step": 548600 }, { "epoch": 87.792, "grad_norm": 0.21497105062007904, "learning_rate": 0.0002868476419056762, "loss": 3.2319, "step": 548700 }, { "epoch": 87.808, "grad_norm": 0.2266237586736679, "learning_rate": 0.00028684524180967237, "loss": 3.5328, "step": 548800 }, { "epoch": 87.824, "grad_norm": 0.27422523498535156, "learning_rate": 0.00028684284171366853, "loss": 2.9905, "step": 548900 }, { "epoch": 87.84, "grad_norm": 0.20548966526985168, "learning_rate": 0.0002868404416176647, "loss": 3.3762, "step": 549000 }, { "epoch": 87.856, "grad_norm": 0.21693210303783417, "learning_rate": 0.0002868380655226209, "loss": 3.3566, "step": 549100 }, { "epoch": 87.872, "grad_norm": 0.42484012246131897, "learning_rate": 0.000286835665426617, "loss": 3.2583, "step": 549200 }, { "epoch": 87.888, "grad_norm": 0.18603543937206268, "learning_rate": 0.0002868332653306132, "loss": 2.9332, "step": 549300 }, { "epoch": 87.904, "grad_norm": 0.30993223190307617, "learning_rate": 0.00028683086523460935, "loss": 3.0249, "step": 549400 }, { "epoch": 87.92, "grad_norm": 0.26901480555534363, "learning_rate": 0.0002868284651386055, "loss": 3.332, "step": 549500 }, { "epoch": 87.936, "grad_norm": 0.19797036051750183, "learning_rate": 0.0002868260650426017, "loss": 3.3374, "step": 549600 }, { "epoch": 87.952, "grad_norm": 0.20926043391227722, "learning_rate": 0.00028682366494659785, "loss": 3.2836, "step": 549700 }, { "epoch": 87.968, "grad_norm": 0.22547152638435364, "learning_rate": 0.00028682126485059397, "loss": 3.1919, "step": 549800 }, { "epoch": 87.984, "grad_norm": 0.18544933199882507, "learning_rate": 0.00028681886475459014, "loss": 3.0793, "step": 549900 }, { "epoch": 88.0, "grad_norm": 0.2433374673128128, "learning_rate": 0.0002868164646585863, "loss": 3.3844, "step": 550000 }, { "epoch": 88.016, "grad_norm": 0.22165155410766602, "learning_rate": 0.0002868140645625825, "loss": 2.8984, "step": 550100 }, { "epoch": 88.032, "grad_norm": 0.2097957730293274, "learning_rate": 0.00028681166446657864, "loss": 2.9746, "step": 550200 }, { "epoch": 88.048, "grad_norm": 0.18447642028331757, "learning_rate": 0.0002868092643705748, "loss": 3.1062, "step": 550300 }, { "epoch": 88.064, "grad_norm": 0.20325255393981934, "learning_rate": 0.00028680686427457093, "loss": 3.3236, "step": 550400 }, { "epoch": 88.08, "grad_norm": 0.20294924080371857, "learning_rate": 0.0002868044641785671, "loss": 3.2501, "step": 550500 }, { "epoch": 88.096, "grad_norm": 0.21589446067810059, "learning_rate": 0.00028680206408256327, "loss": 3.204, "step": 550600 }, { "epoch": 88.112, "grad_norm": 0.24200814962387085, "learning_rate": 0.00028679966398655944, "loss": 3.3273, "step": 550700 }, { "epoch": 88.128, "grad_norm": 0.20446619391441345, "learning_rate": 0.0002867972638905556, "loss": 3.5839, "step": 550800 }, { "epoch": 88.144, "grad_norm": 0.241311714053154, "learning_rate": 0.0002867948637945517, "loss": 3.0927, "step": 550900 }, { "epoch": 88.16, "grad_norm": 0.2238858938217163, "learning_rate": 0.0002867924636985479, "loss": 3.4185, "step": 551000 }, { "epoch": 88.176, "grad_norm": 0.23985828459262848, "learning_rate": 0.00028679006360254406, "loss": 3.271, "step": 551100 }, { "epoch": 88.192, "grad_norm": 0.41196146607398987, "learning_rate": 0.00028678766350654023, "loss": 3.1974, "step": 551200 }, { "epoch": 88.208, "grad_norm": 0.2380485087633133, "learning_rate": 0.0002867852634105364, "loss": 3.2796, "step": 551300 }, { "epoch": 88.224, "grad_norm": 0.2090853750705719, "learning_rate": 0.00028678286331453257, "loss": 3.3121, "step": 551400 }, { "epoch": 88.24, "grad_norm": 0.21315345168113708, "learning_rate": 0.0002867804632185287, "loss": 3.1942, "step": 551500 }, { "epoch": 88.256, "grad_norm": 0.24461527168750763, "learning_rate": 0.00028677806312252485, "loss": 3.7512, "step": 551600 }, { "epoch": 88.272, "grad_norm": 0.1863684356212616, "learning_rate": 0.0002867756630265211, "loss": 3.5278, "step": 551700 }, { "epoch": 88.288, "grad_norm": 0.1984078735113144, "learning_rate": 0.0002867732629305172, "loss": 3.3275, "step": 551800 }, { "epoch": 88.304, "grad_norm": 0.290886253118515, "learning_rate": 0.00028677086283451336, "loss": 3.2477, "step": 551900 }, { "epoch": 88.32, "grad_norm": 0.22955060005187988, "learning_rate": 0.00028676846273850953, "loss": 3.4387, "step": 552000 }, { "epoch": 88.336, "grad_norm": 0.1933952271938324, "learning_rate": 0.0002867660626425057, "loss": 3.4906, "step": 552100 }, { "epoch": 88.352, "grad_norm": 0.1866912841796875, "learning_rate": 0.00028676366254650187, "loss": 3.3134, "step": 552200 }, { "epoch": 88.368, "grad_norm": 0.23206499218940735, "learning_rate": 0.00028676126245049803, "loss": 3.2259, "step": 552300 }, { "epoch": 88.384, "grad_norm": 0.19009831547737122, "learning_rate": 0.00028675886235449415, "loss": 3.1381, "step": 552400 }, { "epoch": 88.4, "grad_norm": 0.19801484048366547, "learning_rate": 0.0002867564622584903, "loss": 3.0878, "step": 552500 }, { "epoch": 88.416, "grad_norm": 0.19386550784111023, "learning_rate": 0.0002867540621624865, "loss": 3.2623, "step": 552600 }, { "epoch": 88.432, "grad_norm": 0.2037118524312973, "learning_rate": 0.00028675166206648266, "loss": 3.3556, "step": 552700 }, { "epoch": 88.448, "grad_norm": 0.22364398837089539, "learning_rate": 0.0002867492619704788, "loss": 3.5504, "step": 552800 }, { "epoch": 88.464, "grad_norm": 0.23462700843811035, "learning_rate": 0.00028674686187447494, "loss": 3.3999, "step": 552900 }, { "epoch": 88.48, "grad_norm": 0.2048949897289276, "learning_rate": 0.0002867444617784711, "loss": 3.7393, "step": 553000 }, { "epoch": 88.496, "grad_norm": 0.233021080493927, "learning_rate": 0.0002867420616824673, "loss": 3.3557, "step": 553100 }, { "epoch": 88.512, "grad_norm": 0.20167861878871918, "learning_rate": 0.00028673966158646345, "loss": 3.2355, "step": 553200 }, { "epoch": 88.528, "grad_norm": 0.22040626406669617, "learning_rate": 0.00028673728549141964, "loss": 3.0956, "step": 553300 }, { "epoch": 88.544, "grad_norm": 0.1937365084886551, "learning_rate": 0.0002867348853954158, "loss": 3.1796, "step": 553400 }, { "epoch": 88.56, "grad_norm": 0.1868630200624466, "learning_rate": 0.0002867324852994119, "loss": 3.3314, "step": 553500 }, { "epoch": 88.576, "grad_norm": 0.20555472373962402, "learning_rate": 0.0002867300852034081, "loss": 3.5581, "step": 553600 }, { "epoch": 88.592, "grad_norm": 0.21666261553764343, "learning_rate": 0.00028672768510740426, "loss": 3.2559, "step": 553700 }, { "epoch": 88.608, "grad_norm": 0.17437386512756348, "learning_rate": 0.00028672528501140043, "loss": 3.4028, "step": 553800 }, { "epoch": 88.624, "grad_norm": 0.1924990862607956, "learning_rate": 0.0002867228849153966, "loss": 3.4806, "step": 553900 }, { "epoch": 88.64, "grad_norm": 0.20135606825351715, "learning_rate": 0.0002867204848193927, "loss": 3.5773, "step": 554000 }, { "epoch": 88.656, "grad_norm": 0.2091667652130127, "learning_rate": 0.0002867180847233889, "loss": 3.0627, "step": 554100 }, { "epoch": 88.672, "grad_norm": 0.2632102966308594, "learning_rate": 0.00028671568462738505, "loss": 3.3188, "step": 554200 }, { "epoch": 88.688, "grad_norm": 0.20954541862010956, "learning_rate": 0.0002867132845313812, "loss": 3.1283, "step": 554300 }, { "epoch": 88.704, "grad_norm": 0.2011537402868271, "learning_rate": 0.0002867108844353774, "loss": 3.4462, "step": 554400 }, { "epoch": 88.72, "grad_norm": 0.22066038846969604, "learning_rate": 0.00028670848433937356, "loss": 3.3665, "step": 554500 }, { "epoch": 88.736, "grad_norm": 0.1916740983724594, "learning_rate": 0.0002867060842433697, "loss": 2.9962, "step": 554600 }, { "epoch": 88.752, "grad_norm": 0.23123690485954285, "learning_rate": 0.00028670368414736584, "loss": 3.258, "step": 554700 }, { "epoch": 88.768, "grad_norm": 0.20115476846694946, "learning_rate": 0.00028670128405136207, "loss": 3.3069, "step": 554800 }, { "epoch": 88.784, "grad_norm": 0.2098074108362198, "learning_rate": 0.0002866988839553582, "loss": 3.2922, "step": 554900 }, { "epoch": 88.8, "grad_norm": 0.21080225706100464, "learning_rate": 0.00028669648385935435, "loss": 3.2615, "step": 555000 }, { "epoch": 88.816, "grad_norm": 0.2287459671497345, "learning_rate": 0.0002866940837633505, "loss": 3.2231, "step": 555100 }, { "epoch": 88.832, "grad_norm": 0.1955193728208542, "learning_rate": 0.0002866916836673467, "loss": 3.1474, "step": 555200 }, { "epoch": 88.848, "grad_norm": 0.19004836678504944, "learning_rate": 0.00028668928357134286, "loss": 3.3269, "step": 555300 }, { "epoch": 88.864, "grad_norm": 0.23870937526226044, "learning_rate": 0.00028668690747629905, "loss": 3.5854, "step": 555400 }, { "epoch": 88.88, "grad_norm": 0.19991730153560638, "learning_rate": 0.00028668450738029516, "loss": 3.4412, "step": 555500 }, { "epoch": 88.896, "grad_norm": 0.19217661023139954, "learning_rate": 0.00028668210728429133, "loss": 3.2791, "step": 555600 }, { "epoch": 88.912, "grad_norm": 0.2208884358406067, "learning_rate": 0.0002866797071882875, "loss": 3.3401, "step": 555700 }, { "epoch": 88.928, "grad_norm": 0.1984161138534546, "learning_rate": 0.00028667730709228367, "loss": 3.3804, "step": 555800 }, { "epoch": 88.944, "grad_norm": 0.2186305820941925, "learning_rate": 0.00028667490699627984, "loss": 3.1256, "step": 555900 }, { "epoch": 88.96, "grad_norm": 0.24030552804470062, "learning_rate": 0.00028667250690027595, "loss": 3.3992, "step": 556000 }, { "epoch": 88.976, "grad_norm": 0.22415557503700256, "learning_rate": 0.0002866701068042721, "loss": 3.4488, "step": 556100 }, { "epoch": 88.992, "grad_norm": 0.193254292011261, "learning_rate": 0.0002866677067082683, "loss": 3.2624, "step": 556200 }, { "epoch": 89.008, "grad_norm": 0.21018652617931366, "learning_rate": 0.00028666530661226446, "loss": 3.1804, "step": 556300 }, { "epoch": 89.024, "grad_norm": 0.25042489171028137, "learning_rate": 0.00028666290651626063, "loss": 3.1446, "step": 556400 }, { "epoch": 89.04, "grad_norm": 0.21171048283576965, "learning_rate": 0.0002866605064202568, "loss": 3.3565, "step": 556500 }, { "epoch": 89.056, "grad_norm": 0.1936231404542923, "learning_rate": 0.0002866581063242529, "loss": 3.6765, "step": 556600 }, { "epoch": 89.072, "grad_norm": 0.23182573914527893, "learning_rate": 0.0002866557062282491, "loss": 3.3268, "step": 556700 }, { "epoch": 89.088, "grad_norm": 0.2284616380929947, "learning_rate": 0.00028665330613224525, "loss": 3.0825, "step": 556800 }, { "epoch": 89.104, "grad_norm": 0.1883186548948288, "learning_rate": 0.0002866509060362414, "loss": 3.2251, "step": 556900 }, { "epoch": 89.12, "grad_norm": 0.19049620628356934, "learning_rate": 0.0002866485059402376, "loss": 3.3342, "step": 557000 }, { "epoch": 89.136, "grad_norm": 0.21390168368816376, "learning_rate": 0.0002866461058442337, "loss": 3.354, "step": 557100 }, { "epoch": 89.152, "grad_norm": 0.22417424619197845, "learning_rate": 0.0002866437057482299, "loss": 3.2998, "step": 557200 }, { "epoch": 89.168, "grad_norm": 0.1881093680858612, "learning_rate": 0.00028664130565222605, "loss": 3.2934, "step": 557300 }, { "epoch": 89.184, "grad_norm": 0.2287430614233017, "learning_rate": 0.0002866389055562222, "loss": 3.1434, "step": 557400 }, { "epoch": 89.2, "grad_norm": 0.21389326453208923, "learning_rate": 0.0002866365054602184, "loss": 3.2698, "step": 557500 }, { "epoch": 89.216, "grad_norm": 0.2029869258403778, "learning_rate": 0.00028663410536421455, "loss": 3.0859, "step": 557600 }, { "epoch": 89.232, "grad_norm": 0.24488292634487152, "learning_rate": 0.00028663170526821067, "loss": 3.3222, "step": 557700 }, { "epoch": 89.248, "grad_norm": 0.2327500432729721, "learning_rate": 0.00028662930517220684, "loss": 3.3269, "step": 557800 }, { "epoch": 89.264, "grad_norm": 0.23306792974472046, "learning_rate": 0.00028662690507620306, "loss": 3.2731, "step": 557900 }, { "epoch": 89.28, "grad_norm": 0.20656323432922363, "learning_rate": 0.0002866245049801992, "loss": 3.2772, "step": 558000 }, { "epoch": 89.296, "grad_norm": 0.1850086897611618, "learning_rate": 0.00028662212888515536, "loss": 3.3738, "step": 558100 }, { "epoch": 89.312, "grad_norm": 0.1833152025938034, "learning_rate": 0.0002866197527901116, "loss": 3.1471, "step": 558200 }, { "epoch": 89.328, "grad_norm": 0.228972390294075, "learning_rate": 0.0002866173526941078, "loss": 3.2079, "step": 558300 }, { "epoch": 89.344, "grad_norm": 0.2357115000486374, "learning_rate": 0.0002866149525981039, "loss": 3.4141, "step": 558400 }, { "epoch": 89.36, "grad_norm": 0.2266385555267334, "learning_rate": 0.00028661255250210006, "loss": 3.312, "step": 558500 }, { "epoch": 89.376, "grad_norm": 0.2260368913412094, "learning_rate": 0.00028661015240609623, "loss": 3.2374, "step": 558600 }, { "epoch": 89.392, "grad_norm": 0.2186807245016098, "learning_rate": 0.0002866077523100924, "loss": 3.6132, "step": 558700 }, { "epoch": 89.408, "grad_norm": 0.19993023574352264, "learning_rate": 0.00028660535221408857, "loss": 3.1182, "step": 558800 }, { "epoch": 89.424, "grad_norm": 0.20394548773765564, "learning_rate": 0.0002866029521180847, "loss": 3.2005, "step": 558900 }, { "epoch": 89.44, "grad_norm": 0.24943536520004272, "learning_rate": 0.00028660055202208085, "loss": 3.4172, "step": 559000 }, { "epoch": 89.456, "grad_norm": 0.21178671717643738, "learning_rate": 0.000286598151926077, "loss": 3.1194, "step": 559100 }, { "epoch": 89.472, "grad_norm": 0.23220469057559967, "learning_rate": 0.0002865957518300732, "loss": 3.2084, "step": 559200 }, { "epoch": 89.488, "grad_norm": 0.20656818151474, "learning_rate": 0.00028659335173406936, "loss": 3.4902, "step": 559300 }, { "epoch": 89.504, "grad_norm": 0.19396938383579254, "learning_rate": 0.00028659095163806553, "loss": 3.7027, "step": 559400 }, { "epoch": 89.52, "grad_norm": 0.19931559264659882, "learning_rate": 0.00028658855154206164, "loss": 3.5443, "step": 559500 }, { "epoch": 89.536, "grad_norm": 0.19816869497299194, "learning_rate": 0.0002865861514460578, "loss": 3.3838, "step": 559600 }, { "epoch": 89.552, "grad_norm": 0.1842900961637497, "learning_rate": 0.000286583751350054, "loss": 3.2632, "step": 559700 }, { "epoch": 89.568, "grad_norm": 0.20433823764324188, "learning_rate": 0.00028658135125405015, "loss": 3.6324, "step": 559800 }, { "epoch": 89.584, "grad_norm": 0.2238415628671646, "learning_rate": 0.0002865789511580463, "loss": 3.0909, "step": 559900 }, { "epoch": 89.6, "grad_norm": 0.2013416737318039, "learning_rate": 0.00028657655106204244, "loss": 3.2984, "step": 560000 }, { "epoch": 89.616, "grad_norm": 0.22782401740550995, "learning_rate": 0.0002865741509660386, "loss": 3.1674, "step": 560100 }, { "epoch": 89.632, "grad_norm": 0.24691571295261383, "learning_rate": 0.0002865717508700348, "loss": 3.3919, "step": 560200 }, { "epoch": 89.648, "grad_norm": 0.20691931247711182, "learning_rate": 0.00028656935077403094, "loss": 3.1655, "step": 560300 }, { "epoch": 89.664, "grad_norm": 0.19481778144836426, "learning_rate": 0.0002865669506780271, "loss": 3.0369, "step": 560400 }, { "epoch": 89.68, "grad_norm": 0.21386650204658508, "learning_rate": 0.0002865645505820233, "loss": 3.1819, "step": 560500 }, { "epoch": 89.696, "grad_norm": 0.21542881429195404, "learning_rate": 0.0002865621504860194, "loss": 3.2984, "step": 560600 }, { "epoch": 89.712, "grad_norm": 0.2008301019668579, "learning_rate": 0.00028655975039001557, "loss": 3.456, "step": 560700 }, { "epoch": 89.728, "grad_norm": 0.2477804720401764, "learning_rate": 0.00028655735029401173, "loss": 3.2708, "step": 560800 }, { "epoch": 89.744, "grad_norm": 0.19061526656150818, "learning_rate": 0.0002865549501980079, "loss": 3.2065, "step": 560900 }, { "epoch": 89.76, "grad_norm": 0.20808464288711548, "learning_rate": 0.00028655255010200407, "loss": 3.2109, "step": 561000 }, { "epoch": 89.776, "grad_norm": 0.18232519924640656, "learning_rate": 0.0002865501500060002, "loss": 3.0451, "step": 561100 }, { "epoch": 89.792, "grad_norm": 0.22634047269821167, "learning_rate": 0.00028654774990999636, "loss": 3.4212, "step": 561200 }, { "epoch": 89.808, "grad_norm": 0.17747636139392853, "learning_rate": 0.0002865453498139925, "loss": 3.2, "step": 561300 }, { "epoch": 89.824, "grad_norm": 0.21003036201000214, "learning_rate": 0.0002865429497179887, "loss": 3.2433, "step": 561400 }, { "epoch": 89.84, "grad_norm": 0.20037750899791718, "learning_rate": 0.00028654054962198486, "loss": 3.0687, "step": 561500 }, { "epoch": 89.856, "grad_norm": 0.18576157093048096, "learning_rate": 0.00028653814952598103, "loss": 3.4772, "step": 561600 }, { "epoch": 89.872, "grad_norm": 0.17800107598304749, "learning_rate": 0.00028653574942997715, "loss": 3.3253, "step": 561700 }, { "epoch": 89.888, "grad_norm": 0.211652472615242, "learning_rate": 0.0002865333493339733, "loss": 3.5561, "step": 561800 }, { "epoch": 89.904, "grad_norm": 0.2222364842891693, "learning_rate": 0.0002865309492379695, "loss": 3.329, "step": 561900 }, { "epoch": 89.92, "grad_norm": 0.21025773882865906, "learning_rate": 0.00028652854914196566, "loss": 2.9753, "step": 562000 }, { "epoch": 89.936, "grad_norm": 0.2333516627550125, "learning_rate": 0.0002865261490459618, "loss": 3.1892, "step": 562100 }, { "epoch": 89.952, "grad_norm": 0.22109241783618927, "learning_rate": 0.00028652374894995794, "loss": 3.3403, "step": 562200 }, { "epoch": 89.968, "grad_norm": 0.19103389978408813, "learning_rate": 0.0002865213728549142, "loss": 3.4019, "step": 562300 }, { "epoch": 89.984, "grad_norm": 0.19422155618667603, "learning_rate": 0.00028651897275891035, "loss": 3.1873, "step": 562400 }, { "epoch": 90.0, "grad_norm": 0.2150174081325531, "learning_rate": 0.0002865165726629065, "loss": 3.1205, "step": 562500 }, { "epoch": 90.016, "grad_norm": 0.22857332229614258, "learning_rate": 0.00028651417256690264, "loss": 3.1599, "step": 562600 }, { "epoch": 90.032, "grad_norm": 0.2032587081193924, "learning_rate": 0.0002865117724708988, "loss": 3.0646, "step": 562700 }, { "epoch": 90.048, "grad_norm": 0.21641705930233002, "learning_rate": 0.000286509372374895, "loss": 3.2443, "step": 562800 }, { "epoch": 90.064, "grad_norm": 0.19922591745853424, "learning_rate": 0.00028650697227889114, "loss": 3.2357, "step": 562900 }, { "epoch": 90.08, "grad_norm": 0.21346397697925568, "learning_rate": 0.0002865045721828873, "loss": 3.1123, "step": 563000 }, { "epoch": 90.096, "grad_norm": 0.21901477873325348, "learning_rate": 0.00028650217208688343, "loss": 3.241, "step": 563100 }, { "epoch": 90.112, "grad_norm": 0.2049214243888855, "learning_rate": 0.0002864997719908796, "loss": 3.2023, "step": 563200 }, { "epoch": 90.128, "grad_norm": 0.21204844117164612, "learning_rate": 0.00028649737189487577, "loss": 3.1343, "step": 563300 }, { "epoch": 90.144, "grad_norm": 0.24312610924243927, "learning_rate": 0.00028649497179887194, "loss": 3.8279, "step": 563400 }, { "epoch": 90.16, "grad_norm": 0.19027771055698395, "learning_rate": 0.0002864925717028681, "loss": 3.2057, "step": 563500 }, { "epoch": 90.176, "grad_norm": 0.19490858912467957, "learning_rate": 0.0002864901716068643, "loss": 3.1072, "step": 563600 }, { "epoch": 90.192, "grad_norm": 0.22195233404636383, "learning_rate": 0.0002864877715108604, "loss": 3.3165, "step": 563700 }, { "epoch": 90.208, "grad_norm": 0.21446292102336884, "learning_rate": 0.00028648537141485656, "loss": 3.1155, "step": 563800 }, { "epoch": 90.224, "grad_norm": 0.3575795292854309, "learning_rate": 0.00028648297131885273, "loss": 3.0969, "step": 563900 }, { "epoch": 90.24, "grad_norm": 0.20425687730312347, "learning_rate": 0.0002864805712228489, "loss": 3.6299, "step": 564000 }, { "epoch": 90.256, "grad_norm": 0.21876297891139984, "learning_rate": 0.0002864781951278051, "loss": 3.3104, "step": 564100 }, { "epoch": 90.272, "grad_norm": 0.23245739936828613, "learning_rate": 0.00028647579503180125, "loss": 3.512, "step": 564200 }, { "epoch": 90.288, "grad_norm": 0.2014285922050476, "learning_rate": 0.0002864733949357974, "loss": 3.5223, "step": 564300 }, { "epoch": 90.304, "grad_norm": 0.2034325897693634, "learning_rate": 0.0002864709948397936, "loss": 3.6262, "step": 564400 }, { "epoch": 90.32, "grad_norm": 0.23045022785663605, "learning_rate": 0.00028646859474378976, "loss": 3.3908, "step": 564500 }, { "epoch": 90.336, "grad_norm": 0.21888291835784912, "learning_rate": 0.0002864661946477859, "loss": 3.4616, "step": 564600 }, { "epoch": 90.352, "grad_norm": 0.23994798958301544, "learning_rate": 0.00028646379455178205, "loss": 3.2273, "step": 564700 }, { "epoch": 90.368, "grad_norm": 0.2167103886604309, "learning_rate": 0.0002864613944557782, "loss": 3.338, "step": 564800 }, { "epoch": 90.384, "grad_norm": 0.26398414373397827, "learning_rate": 0.0002864589943597744, "loss": 3.3125, "step": 564900 }, { "epoch": 90.4, "grad_norm": 0.23604340851306915, "learning_rate": 0.00028645659426377055, "loss": 3.1856, "step": 565000 }, { "epoch": 90.416, "grad_norm": 0.23276956379413605, "learning_rate": 0.00028645419416776667, "loss": 3.1345, "step": 565100 }, { "epoch": 90.432, "grad_norm": 0.18833298981189728, "learning_rate": 0.00028645179407176284, "loss": 3.3777, "step": 565200 }, { "epoch": 90.448, "grad_norm": 0.20463253557682037, "learning_rate": 0.000286449393975759, "loss": 3.3679, "step": 565300 }, { "epoch": 90.464, "grad_norm": 0.21704843640327454, "learning_rate": 0.0002864469938797552, "loss": 3.2613, "step": 565400 }, { "epoch": 90.48, "grad_norm": 0.23142609000205994, "learning_rate": 0.00028644459378375135, "loss": 3.3958, "step": 565500 }, { "epoch": 90.496, "grad_norm": 0.20706768333911896, "learning_rate": 0.0002864421936877475, "loss": 3.2745, "step": 565600 }, { "epoch": 90.512, "grad_norm": 0.22068098187446594, "learning_rate": 0.00028643979359174363, "loss": 3.2489, "step": 565700 }, { "epoch": 90.528, "grad_norm": 0.2457132190465927, "learning_rate": 0.0002864373934957398, "loss": 3.1564, "step": 565800 }, { "epoch": 90.544, "grad_norm": 0.19784599542617798, "learning_rate": 0.00028643499339973597, "loss": 3.6101, "step": 565900 }, { "epoch": 90.56, "grad_norm": 0.2891266644001007, "learning_rate": 0.00028643259330373214, "loss": 3.5468, "step": 566000 }, { "epoch": 90.576, "grad_norm": 0.21562765538692474, "learning_rate": 0.0002864301932077283, "loss": 3.3846, "step": 566100 }, { "epoch": 90.592, "grad_norm": 0.2570291757583618, "learning_rate": 0.0002864277931117244, "loss": 3.4644, "step": 566200 }, { "epoch": 90.608, "grad_norm": 0.22001256048679352, "learning_rate": 0.0002864253930157206, "loss": 3.2133, "step": 566300 }, { "epoch": 90.624, "grad_norm": 0.1938314288854599, "learning_rate": 0.00028642299291971676, "loss": 3.1184, "step": 566400 }, { "epoch": 90.64, "grad_norm": 0.23271851241588593, "learning_rate": 0.00028642059282371293, "loss": 3.723, "step": 566500 }, { "epoch": 90.656, "grad_norm": 0.21332885324954987, "learning_rate": 0.0002864181927277091, "loss": 3.2562, "step": 566600 }, { "epoch": 90.672, "grad_norm": 0.18531902134418488, "learning_rate": 0.00028641579263170527, "loss": 3.2351, "step": 566700 }, { "epoch": 90.688, "grad_norm": 0.20821158587932587, "learning_rate": 0.0002864133925357014, "loss": 3.3252, "step": 566800 }, { "epoch": 90.704, "grad_norm": 0.2375437170267105, "learning_rate": 0.00028641099243969755, "loss": 3.1068, "step": 566900 }, { "epoch": 90.72, "grad_norm": 0.20962677896022797, "learning_rate": 0.0002864085923436937, "loss": 3.3537, "step": 567000 }, { "epoch": 90.736, "grad_norm": 0.21604536473751068, "learning_rate": 0.0002864061922476899, "loss": 3.4977, "step": 567100 }, { "epoch": 90.752, "grad_norm": 0.185128316283226, "learning_rate": 0.0002864038161526461, "loss": 3.2493, "step": 567200 }, { "epoch": 90.768, "grad_norm": 0.2231953889131546, "learning_rate": 0.00028640141605664225, "loss": 3.2581, "step": 567300 }, { "epoch": 90.784, "grad_norm": 0.21313059329986572, "learning_rate": 0.0002863990159606384, "loss": 3.121, "step": 567400 }, { "epoch": 90.8, "grad_norm": 0.21720123291015625, "learning_rate": 0.0002863966158646346, "loss": 3.6317, "step": 567500 }, { "epoch": 90.816, "grad_norm": 0.20348872244358063, "learning_rate": 0.00028639421576863076, "loss": 3.0493, "step": 567600 }, { "epoch": 90.832, "grad_norm": 0.26213252544403076, "learning_rate": 0.00028639181567262687, "loss": 3.3432, "step": 567700 }, { "epoch": 90.848, "grad_norm": 0.16785147786140442, "learning_rate": 0.00028638941557662304, "loss": 3.1831, "step": 567800 }, { "epoch": 90.864, "grad_norm": 0.19918440282344818, "learning_rate": 0.0002863870154806192, "loss": 3.275, "step": 567900 }, { "epoch": 90.88, "grad_norm": 0.21450933814048767, "learning_rate": 0.0002863846153846154, "loss": 3.3753, "step": 568000 }, { "epoch": 90.896, "grad_norm": 0.2683629095554352, "learning_rate": 0.00028638221528861155, "loss": 3.331, "step": 568100 }, { "epoch": 90.912, "grad_norm": 0.25910070538520813, "learning_rate": 0.00028637981519260766, "loss": 3.3047, "step": 568200 }, { "epoch": 90.928, "grad_norm": 0.2525864541530609, "learning_rate": 0.00028637741509660383, "loss": 3.1418, "step": 568300 }, { "epoch": 90.944, "grad_norm": 0.18730616569519043, "learning_rate": 0.0002863750150006, "loss": 2.9176, "step": 568400 }, { "epoch": 90.96, "grad_norm": 0.20028169453144073, "learning_rate": 0.00028637261490459617, "loss": 3.2734, "step": 568500 }, { "epoch": 90.976, "grad_norm": 0.1716739535331726, "learning_rate": 0.00028637021480859234, "loss": 2.897, "step": 568600 }, { "epoch": 90.992, "grad_norm": 0.2220848947763443, "learning_rate": 0.0002863678147125885, "loss": 3.6169, "step": 568700 }, { "epoch": 91.008, "grad_norm": 0.2638801634311676, "learning_rate": 0.0002863654146165846, "loss": 3.293, "step": 568800 }, { "epoch": 91.024, "grad_norm": 0.2618657648563385, "learning_rate": 0.0002863630145205808, "loss": 3.2112, "step": 568900 }, { "epoch": 91.04, "grad_norm": 0.23519223928451538, "learning_rate": 0.00028636061442457696, "loss": 3.2718, "step": 569000 }, { "epoch": 91.056, "grad_norm": 0.22986142337322235, "learning_rate": 0.00028635821432857313, "loss": 2.9991, "step": 569100 }, { "epoch": 91.072, "grad_norm": 0.19826412200927734, "learning_rate": 0.0002863558142325693, "loss": 3.5084, "step": 569200 }, { "epoch": 91.088, "grad_norm": 0.19978399574756622, "learning_rate": 0.0002863534141365654, "loss": 3.3034, "step": 569300 }, { "epoch": 91.104, "grad_norm": 0.19917021691799164, "learning_rate": 0.0002863510140405616, "loss": 3.0498, "step": 569400 }, { "epoch": 91.12, "grad_norm": 0.2257513850927353, "learning_rate": 0.00028634861394455775, "loss": 3.1223, "step": 569500 }, { "epoch": 91.136, "grad_norm": 0.22996458411216736, "learning_rate": 0.0002863462138485539, "loss": 3.1962, "step": 569600 }, { "epoch": 91.152, "grad_norm": 0.2097974568605423, "learning_rate": 0.0002863438137525501, "loss": 3.3895, "step": 569700 }, { "epoch": 91.168, "grad_norm": 0.21383439004421234, "learning_rate": 0.00028634141365654626, "loss": 3.2957, "step": 569800 }, { "epoch": 91.184, "grad_norm": 0.23602856695652008, "learning_rate": 0.0002863390135605424, "loss": 3.3015, "step": 569900 }, { "epoch": 91.2, "grad_norm": 0.21398618817329407, "learning_rate": 0.00028633661346453854, "loss": 3.3335, "step": 570000 }, { "epoch": 91.216, "grad_norm": 0.2032211720943451, "learning_rate": 0.0002863342133685347, "loss": 3.1865, "step": 570100 }, { "epoch": 91.232, "grad_norm": 0.2014179229736328, "learning_rate": 0.0002863318132725309, "loss": 3.4211, "step": 570200 }, { "epoch": 91.248, "grad_norm": 0.20203636586666107, "learning_rate": 0.00028632941317652705, "loss": 3.3186, "step": 570300 }, { "epoch": 91.264, "grad_norm": 0.22661200165748596, "learning_rate": 0.00028632701308052317, "loss": 3.5606, "step": 570400 }, { "epoch": 91.28, "grad_norm": 0.21079663932323456, "learning_rate": 0.00028632461298451934, "loss": 3.6328, "step": 570500 }, { "epoch": 91.296, "grad_norm": 0.20312084257602692, "learning_rate": 0.0002863222128885155, "loss": 3.3212, "step": 570600 }, { "epoch": 91.312, "grad_norm": 0.19793091714382172, "learning_rate": 0.0002863198127925117, "loss": 3.3713, "step": 570700 }, { "epoch": 91.328, "grad_norm": 0.22346410155296326, "learning_rate": 0.00028631741269650784, "loss": 3.3717, "step": 570800 }, { "epoch": 91.344, "grad_norm": 0.26677313446998596, "learning_rate": 0.000286315012600504, "loss": 3.3666, "step": 570900 }, { "epoch": 91.36, "grad_norm": 0.19185614585876465, "learning_rate": 0.00028631261250450013, "loss": 3.3322, "step": 571000 }, { "epoch": 91.376, "grad_norm": 0.18988032639026642, "learning_rate": 0.0002863102124084963, "loss": 3.3975, "step": 571100 }, { "epoch": 91.392, "grad_norm": 0.2211666852235794, "learning_rate": 0.00028630783631345254, "loss": 3.702, "step": 571200 }, { "epoch": 91.408, "grad_norm": 0.20664595067501068, "learning_rate": 0.00028630543621744865, "loss": 3.1333, "step": 571300 }, { "epoch": 91.424, "grad_norm": 0.22061827778816223, "learning_rate": 0.0002863030361214448, "loss": 3.2963, "step": 571400 }, { "epoch": 91.44, "grad_norm": 0.1987735778093338, "learning_rate": 0.000286300636025441, "loss": 3.3216, "step": 571500 }, { "epoch": 91.456, "grad_norm": 0.18310017883777618, "learning_rate": 0.00028629823592943716, "loss": 3.3975, "step": 571600 }, { "epoch": 91.472, "grad_norm": 0.20672912895679474, "learning_rate": 0.00028629583583343333, "loss": 3.317, "step": 571700 }, { "epoch": 91.488, "grad_norm": 0.20197612047195435, "learning_rate": 0.0002862934357374295, "loss": 3.2414, "step": 571800 }, { "epoch": 91.504, "grad_norm": 0.21232970058918, "learning_rate": 0.0002862910356414256, "loss": 3.2893, "step": 571900 }, { "epoch": 91.52, "grad_norm": 0.22496598958969116, "learning_rate": 0.0002862886355454218, "loss": 3.11, "step": 572000 }, { "epoch": 91.536, "grad_norm": 0.22078803181648254, "learning_rate": 0.000286286259450378, "loss": 3.0145, "step": 572100 }, { "epoch": 91.552, "grad_norm": 0.21285466849803925, "learning_rate": 0.00028628385935437414, "loss": 3.5386, "step": 572200 }, { "epoch": 91.568, "grad_norm": 0.20507653057575226, "learning_rate": 0.0002862814592583703, "loss": 3.1213, "step": 572300 }, { "epoch": 91.584, "grad_norm": 0.26126718521118164, "learning_rate": 0.0002862790591623664, "loss": 3.3254, "step": 572400 }, { "epoch": 91.6, "grad_norm": 0.2177928388118744, "learning_rate": 0.00028627665906636265, "loss": 3.5072, "step": 572500 }, { "epoch": 91.616, "grad_norm": 0.19984374940395355, "learning_rate": 0.0002862742589703588, "loss": 3.3819, "step": 572600 }, { "epoch": 91.632, "grad_norm": 0.17621994018554688, "learning_rate": 0.000286271858874355, "loss": 3.3482, "step": 572700 }, { "epoch": 91.648, "grad_norm": 0.20971596240997314, "learning_rate": 0.0002862694587783511, "loss": 3.5095, "step": 572800 }, { "epoch": 91.664, "grad_norm": 0.22514531016349792, "learning_rate": 0.00028626705868234727, "loss": 3.5998, "step": 572900 }, { "epoch": 91.68, "grad_norm": 0.22629639506340027, "learning_rate": 0.00028626465858634344, "loss": 3.2458, "step": 573000 }, { "epoch": 91.696, "grad_norm": 0.20486420392990112, "learning_rate": 0.0002862622584903396, "loss": 3.3438, "step": 573100 }, { "epoch": 91.712, "grad_norm": 0.23075364530086517, "learning_rate": 0.0002862598583943358, "loss": 3.1065, "step": 573200 }, { "epoch": 91.728, "grad_norm": 0.19376493990421295, "learning_rate": 0.0002862574582983319, "loss": 3.2426, "step": 573300 }, { "epoch": 91.744, "grad_norm": 0.20220430195331573, "learning_rate": 0.00028625505820232806, "loss": 3.222, "step": 573400 }, { "epoch": 91.76, "grad_norm": 0.22177818417549133, "learning_rate": 0.00028625265810632423, "loss": 3.434, "step": 573500 }, { "epoch": 91.776, "grad_norm": 0.24672923982143402, "learning_rate": 0.0002862502580103204, "loss": 3.4339, "step": 573600 }, { "epoch": 91.792, "grad_norm": 0.1922437995672226, "learning_rate": 0.00028624785791431657, "loss": 3.4121, "step": 573700 }, { "epoch": 91.808, "grad_norm": 0.2237989753484726, "learning_rate": 0.00028624545781831274, "loss": 2.9955, "step": 573800 }, { "epoch": 91.824, "grad_norm": 0.20758382976055145, "learning_rate": 0.00028624305772230886, "loss": 3.3455, "step": 573900 }, { "epoch": 91.84, "grad_norm": 0.2497597634792328, "learning_rate": 0.000286240657626305, "loss": 3.351, "step": 574000 }, { "epoch": 91.856, "grad_norm": 0.23460674285888672, "learning_rate": 0.0002862382575303012, "loss": 3.2402, "step": 574100 }, { "epoch": 91.872, "grad_norm": 0.19564291834831238, "learning_rate": 0.0002862358814352574, "loss": 3.3218, "step": 574200 }, { "epoch": 91.888, "grad_norm": 0.25710493326187134, "learning_rate": 0.00028623348133925355, "loss": 3.0977, "step": 574300 }, { "epoch": 91.904, "grad_norm": 0.20134642720222473, "learning_rate": 0.00028623108124324967, "loss": 3.4047, "step": 574400 }, { "epoch": 91.92, "grad_norm": 0.206907719373703, "learning_rate": 0.00028622868114724584, "loss": 3.2916, "step": 574500 }, { "epoch": 91.936, "grad_norm": 0.21605372428894043, "learning_rate": 0.000286226281051242, "loss": 3.2484, "step": 574600 }, { "epoch": 91.952, "grad_norm": 0.1762874722480774, "learning_rate": 0.0002862238809552382, "loss": 3.3773, "step": 574700 }, { "epoch": 91.968, "grad_norm": 0.2237434983253479, "learning_rate": 0.00028622148085923434, "loss": 3.3512, "step": 574800 }, { "epoch": 91.984, "grad_norm": 0.19278034567832947, "learning_rate": 0.0002862190807632305, "loss": 3.3347, "step": 574900 }, { "epoch": 92.0, "grad_norm": 0.22047312557697296, "learning_rate": 0.00028621668066722663, "loss": 3.179, "step": 575000 }, { "epoch": 92.016, "grad_norm": 0.19956117868423462, "learning_rate": 0.0002862142805712228, "loss": 2.8425, "step": 575100 }, { "epoch": 92.032, "grad_norm": 0.2350604087114334, "learning_rate": 0.00028621188047521897, "loss": 3.1665, "step": 575200 }, { "epoch": 92.048, "grad_norm": 0.22141389548778534, "learning_rate": 0.00028620948037921514, "loss": 3.1393, "step": 575300 }, { "epoch": 92.064, "grad_norm": 0.23184287548065186, "learning_rate": 0.0002862070802832113, "loss": 3.2527, "step": 575400 }, { "epoch": 92.08, "grad_norm": 0.2270003706216812, "learning_rate": 0.0002862046801872074, "loss": 3.193, "step": 575500 }, { "epoch": 92.096, "grad_norm": 0.22535866498947144, "learning_rate": 0.00028620228009120364, "loss": 3.2449, "step": 575600 }, { "epoch": 92.112, "grad_norm": 0.20031937956809998, "learning_rate": 0.0002861998799951998, "loss": 3.1118, "step": 575700 }, { "epoch": 92.128, "grad_norm": 0.2141897827386856, "learning_rate": 0.000286197479899196, "loss": 3.3839, "step": 575800 }, { "epoch": 92.144, "grad_norm": 0.2231959104537964, "learning_rate": 0.0002861950798031921, "loss": 3.3456, "step": 575900 }, { "epoch": 92.16, "grad_norm": 0.23552505671977997, "learning_rate": 0.00028619267970718827, "loss": 3.3941, "step": 576000 }, { "epoch": 92.176, "grad_norm": 0.21892283856868744, "learning_rate": 0.00028619027961118444, "loss": 3.1047, "step": 576100 }, { "epoch": 92.192, "grad_norm": 0.1985369473695755, "learning_rate": 0.0002861878795151806, "loss": 3.2606, "step": 576200 }, { "epoch": 92.208, "grad_norm": 0.2131713330745697, "learning_rate": 0.0002861854794191768, "loss": 3.2993, "step": 576300 }, { "epoch": 92.224, "grad_norm": 0.24007542431354523, "learning_rate": 0.0002861830793231729, "loss": 3.461, "step": 576400 }, { "epoch": 92.24, "grad_norm": 0.21839924156665802, "learning_rate": 0.00028618067922716906, "loss": 3.4758, "step": 576500 }, { "epoch": 92.256, "grad_norm": 0.20572562515735626, "learning_rate": 0.0002861782791311652, "loss": 3.059, "step": 576600 }, { "epoch": 92.272, "grad_norm": 0.21093060076236725, "learning_rate": 0.0002861758790351614, "loss": 3.4485, "step": 576700 }, { "epoch": 92.288, "grad_norm": 0.24226167798042297, "learning_rate": 0.00028617347893915757, "loss": 3.0806, "step": 576800 }, { "epoch": 92.304, "grad_norm": 0.191156804561615, "learning_rate": 0.00028617107884315373, "loss": 3.2132, "step": 576900 }, { "epoch": 92.32, "grad_norm": 0.23934349417686462, "learning_rate": 0.00028616867874714985, "loss": 3.2881, "step": 577000 }, { "epoch": 92.336, "grad_norm": 0.20600828528404236, "learning_rate": 0.000286166278651146, "loss": 3.2834, "step": 577100 }, { "epoch": 92.352, "grad_norm": 0.24633784592151642, "learning_rate": 0.0002861638785551422, "loss": 3.3239, "step": 577200 }, { "epoch": 92.368, "grad_norm": 0.2657311260700226, "learning_rate": 0.00028616147845913836, "loss": 3.549, "step": 577300 }, { "epoch": 92.384, "grad_norm": 0.20873570442199707, "learning_rate": 0.0002861590783631345, "loss": 3.2368, "step": 577400 }, { "epoch": 92.4, "grad_norm": 0.21722570061683655, "learning_rate": 0.00028615667826713064, "loss": 3.2652, "step": 577500 }, { "epoch": 92.416, "grad_norm": 0.21039380133152008, "learning_rate": 0.0002861542781711268, "loss": 3.5281, "step": 577600 }, { "epoch": 92.432, "grad_norm": 0.2181796282529831, "learning_rate": 0.000286151878075123, "loss": 3.3682, "step": 577700 }, { "epoch": 92.448, "grad_norm": 0.24527274072170258, "learning_rate": 0.00028614947797911915, "loss": 3.2088, "step": 577800 }, { "epoch": 92.464, "grad_norm": 0.2320236712694168, "learning_rate": 0.0002861470778831153, "loss": 3.1908, "step": 577900 }, { "epoch": 92.48, "grad_norm": 0.22580017149448395, "learning_rate": 0.0002861446777871115, "loss": 3.1646, "step": 578000 }, { "epoch": 92.496, "grad_norm": 0.2213822454214096, "learning_rate": 0.0002861422776911076, "loss": 3.4703, "step": 578100 }, { "epoch": 92.512, "grad_norm": 0.21855008602142334, "learning_rate": 0.00028613987759510377, "loss": 3.0563, "step": 578200 }, { "epoch": 92.528, "grad_norm": 0.2313171774148941, "learning_rate": 0.00028613750150005996, "loss": 3.3578, "step": 578300 }, { "epoch": 92.544, "grad_norm": 0.19244647026062012, "learning_rate": 0.00028613510140405613, "loss": 3.0794, "step": 578400 }, { "epoch": 92.56, "grad_norm": 0.2732357978820801, "learning_rate": 0.0002861327013080523, "loss": 3.4857, "step": 578500 }, { "epoch": 92.576, "grad_norm": 0.20120055973529816, "learning_rate": 0.00028613030121204847, "loss": 3.4613, "step": 578600 }, { "epoch": 92.592, "grad_norm": 0.2631908357143402, "learning_rate": 0.00028612790111604464, "loss": 3.2488, "step": 578700 }, { "epoch": 92.608, "grad_norm": 0.25573375821113586, "learning_rate": 0.0002861255010200408, "loss": 3.5007, "step": 578800 }, { "epoch": 92.624, "grad_norm": 0.20656909048557281, "learning_rate": 0.000286123100924037, "loss": 3.3245, "step": 578900 }, { "epoch": 92.64, "grad_norm": 0.20724676549434662, "learning_rate": 0.0002861207008280331, "loss": 3.1364, "step": 579000 }, { "epoch": 92.656, "grad_norm": 0.206063911318779, "learning_rate": 0.00028611830073202926, "loss": 3.2487, "step": 579100 }, { "epoch": 92.672, "grad_norm": 0.19993562996387482, "learning_rate": 0.00028611590063602543, "loss": 2.9069, "step": 579200 }, { "epoch": 92.688, "grad_norm": 0.20589199662208557, "learning_rate": 0.0002861135005400216, "loss": 3.2999, "step": 579300 }, { "epoch": 92.704, "grad_norm": 0.29310309886932373, "learning_rate": 0.00028611110044401777, "loss": 3.361, "step": 579400 }, { "epoch": 92.72, "grad_norm": 0.220118910074234, "learning_rate": 0.0002861087003480139, "loss": 3.3661, "step": 579500 }, { "epoch": 92.736, "grad_norm": 0.189657062292099, "learning_rate": 0.00028610630025201005, "loss": 3.2295, "step": 579600 }, { "epoch": 92.752, "grad_norm": 0.23942863941192627, "learning_rate": 0.0002861039001560062, "loss": 3.4997, "step": 579700 }, { "epoch": 92.768, "grad_norm": 0.21263492107391357, "learning_rate": 0.0002861015000600024, "loss": 3.5013, "step": 579800 }, { "epoch": 92.784, "grad_norm": 0.23787973821163177, "learning_rate": 0.00028609909996399856, "loss": 3.3221, "step": 579900 }, { "epoch": 92.8, "grad_norm": 0.20642219483852386, "learning_rate": 0.00028609669986799473, "loss": 3.1114, "step": 580000 }, { "epoch": 92.816, "grad_norm": 0.21952418982982635, "learning_rate": 0.00028609429977199084, "loss": 3.4856, "step": 580100 }, { "epoch": 92.832, "grad_norm": 0.2339348942041397, "learning_rate": 0.000286091899675987, "loss": 3.435, "step": 580200 }, { "epoch": 92.848, "grad_norm": 0.19900140166282654, "learning_rate": 0.0002860894995799832, "loss": 3.1474, "step": 580300 }, { "epoch": 92.864, "grad_norm": 0.25460749864578247, "learning_rate": 0.00028608709948397935, "loss": 3.3628, "step": 580400 }, { "epoch": 92.88, "grad_norm": 0.21776685118675232, "learning_rate": 0.0002860846993879755, "loss": 3.4871, "step": 580500 }, { "epoch": 92.896, "grad_norm": 0.2303522378206253, "learning_rate": 0.0002860822992919717, "loss": 3.5047, "step": 580600 }, { "epoch": 92.912, "grad_norm": 0.23070001602172852, "learning_rate": 0.0002860798991959678, "loss": 3.515, "step": 580700 }, { "epoch": 92.928, "grad_norm": 0.2267686426639557, "learning_rate": 0.00028607749909996397, "loss": 3.2404, "step": 580800 }, { "epoch": 92.944, "grad_norm": 0.20990382134914398, "learning_rate": 0.00028607509900396014, "loss": 3.384, "step": 580900 }, { "epoch": 92.96, "grad_norm": 0.32669347524642944, "learning_rate": 0.0002860726989079563, "loss": 3.5287, "step": 581000 }, { "epoch": 92.976, "grad_norm": 0.24593308568000793, "learning_rate": 0.0002860702988119525, "loss": 3.5976, "step": 581100 }, { "epoch": 92.992, "grad_norm": 0.1984982043504715, "learning_rate": 0.0002860678987159486, "loss": 3.2682, "step": 581200 }, { "epoch": 93.008, "grad_norm": 0.2342703640460968, "learning_rate": 0.00028606549861994476, "loss": 3.2134, "step": 581300 }, { "epoch": 93.024, "grad_norm": 0.22064271569252014, "learning_rate": 0.00028606309852394093, "loss": 3.2507, "step": 581400 }, { "epoch": 93.04, "grad_norm": 0.21151140332221985, "learning_rate": 0.0002860606984279371, "loss": 3.2312, "step": 581500 }, { "epoch": 93.056, "grad_norm": 0.2118089199066162, "learning_rate": 0.00028605829833193327, "loss": 3.0219, "step": 581600 }, { "epoch": 93.072, "grad_norm": 0.21714529395103455, "learning_rate": 0.00028605589823592944, "loss": 3.3271, "step": 581700 }, { "epoch": 93.088, "grad_norm": 0.22278068959712982, "learning_rate": 0.00028605349813992556, "loss": 3.1385, "step": 581800 }, { "epoch": 93.104, "grad_norm": 0.2809254825115204, "learning_rate": 0.0002860510980439217, "loss": 3.0568, "step": 581900 }, { "epoch": 93.12, "grad_norm": 0.24120883643627167, "learning_rate": 0.0002860486979479179, "loss": 3.7676, "step": 582000 }, { "epoch": 93.136, "grad_norm": 0.2378741204738617, "learning_rate": 0.00028604629785191406, "loss": 3.3853, "step": 582100 }, { "epoch": 93.152, "grad_norm": 0.2164318710565567, "learning_rate": 0.00028604389775591023, "loss": 3.6942, "step": 582200 }, { "epoch": 93.168, "grad_norm": 0.2272815853357315, "learning_rate": 0.00028604149765990635, "loss": 3.2602, "step": 582300 }, { "epoch": 93.184, "grad_norm": 0.21973513066768646, "learning_rate": 0.0002860391215648626, "loss": 3.2779, "step": 582400 }, { "epoch": 93.2, "grad_norm": 0.29517069458961487, "learning_rate": 0.00028603672146885876, "loss": 3.2387, "step": 582500 }, { "epoch": 93.216, "grad_norm": 0.23826152086257935, "learning_rate": 0.00028603432137285493, "loss": 3.3861, "step": 582600 }, { "epoch": 93.232, "grad_norm": 0.19887636601924896, "learning_rate": 0.00028603194527781106, "loss": 3.2041, "step": 582700 }, { "epoch": 93.248, "grad_norm": 0.20587937533855438, "learning_rate": 0.00028602954518180723, "loss": 3.2221, "step": 582800 }, { "epoch": 93.264, "grad_norm": 0.21652254462242126, "learning_rate": 0.0002860271450858034, "loss": 3.4479, "step": 582900 }, { "epoch": 93.28, "grad_norm": 0.2670235335826874, "learning_rate": 0.00028602474498979957, "loss": 3.4117, "step": 583000 }, { "epoch": 93.296, "grad_norm": 0.22938990592956543, "learning_rate": 0.00028602234489379574, "loss": 3.2234, "step": 583100 }, { "epoch": 93.312, "grad_norm": 0.24898001551628113, "learning_rate": 0.00028601994479779186, "loss": 3.4333, "step": 583200 }, { "epoch": 93.328, "grad_norm": 0.2145637422800064, "learning_rate": 0.000286017544701788, "loss": 3.4309, "step": 583300 }, { "epoch": 93.344, "grad_norm": 0.20967821776866913, "learning_rate": 0.0002860151446057842, "loss": 3.4431, "step": 583400 }, { "epoch": 93.36, "grad_norm": 0.2510364353656769, "learning_rate": 0.00028601274450978036, "loss": 3.395, "step": 583500 }, { "epoch": 93.376, "grad_norm": 0.22854730486869812, "learning_rate": 0.00028601034441377653, "loss": 3.0428, "step": 583600 }, { "epoch": 93.392, "grad_norm": 0.24159136414527893, "learning_rate": 0.0002860079443177727, "loss": 3.1068, "step": 583700 }, { "epoch": 93.408, "grad_norm": 0.2101896107196808, "learning_rate": 0.0002860055442217688, "loss": 3.0453, "step": 583800 }, { "epoch": 93.424, "grad_norm": 0.23543773591518402, "learning_rate": 0.000286003144125765, "loss": 3.3744, "step": 583900 }, { "epoch": 93.44, "grad_norm": 0.19677650928497314, "learning_rate": 0.00028600074402976115, "loss": 3.3894, "step": 584000 }, { "epoch": 93.456, "grad_norm": 0.21982444822788239, "learning_rate": 0.0002859983439337573, "loss": 3.5357, "step": 584100 }, { "epoch": 93.472, "grad_norm": 0.22579291462898254, "learning_rate": 0.0002859959438377535, "loss": 3.2506, "step": 584200 }, { "epoch": 93.488, "grad_norm": 0.2285909354686737, "learning_rate": 0.0002859935437417496, "loss": 3.2104, "step": 584300 }, { "epoch": 93.504, "grad_norm": 0.1990664303302765, "learning_rate": 0.0002859911436457458, "loss": 3.5368, "step": 584400 }, { "epoch": 93.52, "grad_norm": 0.1826477348804474, "learning_rate": 0.00028598874354974195, "loss": 3.275, "step": 584500 }, { "epoch": 93.536, "grad_norm": 0.23065310716629028, "learning_rate": 0.0002859863434537381, "loss": 3.5469, "step": 584600 }, { "epoch": 93.552, "grad_norm": 0.21018242835998535, "learning_rate": 0.0002859839433577343, "loss": 3.2051, "step": 584700 }, { "epoch": 93.568, "grad_norm": 0.21878968179225922, "learning_rate": 0.00028598154326173045, "loss": 3.2663, "step": 584800 }, { "epoch": 93.584, "grad_norm": 0.2526184320449829, "learning_rate": 0.00028597916716668664, "loss": 3.1111, "step": 584900 }, { "epoch": 93.6, "grad_norm": 0.23974037170410156, "learning_rate": 0.0002859767670706828, "loss": 3.4041, "step": 585000 }, { "epoch": 93.616, "grad_norm": 0.2376585751771927, "learning_rate": 0.000285974366974679, "loss": 3.3408, "step": 585100 }, { "epoch": 93.632, "grad_norm": 0.21524523198604584, "learning_rate": 0.0002859719668786751, "loss": 3.4153, "step": 585200 }, { "epoch": 93.648, "grad_norm": 0.22155369818210602, "learning_rate": 0.00028596956678267126, "loss": 3.3212, "step": 585300 }, { "epoch": 93.664, "grad_norm": 0.2647278308868408, "learning_rate": 0.00028596716668666743, "loss": 3.6257, "step": 585400 }, { "epoch": 93.68, "grad_norm": 0.18570353090763092, "learning_rate": 0.0002859647665906636, "loss": 3.3664, "step": 585500 }, { "epoch": 93.696, "grad_norm": 0.23046262562274933, "learning_rate": 0.00028596236649465977, "loss": 3.1563, "step": 585600 }, { "epoch": 93.712, "grad_norm": 0.22783596813678741, "learning_rate": 0.00028595996639865594, "loss": 3.2512, "step": 585700 }, { "epoch": 93.728, "grad_norm": 0.2746724486351013, "learning_rate": 0.00028595756630265206, "loss": 3.0616, "step": 585800 }, { "epoch": 93.744, "grad_norm": 0.21427220106124878, "learning_rate": 0.0002859551662066482, "loss": 3.1105, "step": 585900 }, { "epoch": 93.76, "grad_norm": 0.21911174058914185, "learning_rate": 0.0002859527661106444, "loss": 3.36, "step": 586000 }, { "epoch": 93.776, "grad_norm": 0.21921710669994354, "learning_rate": 0.00028595036601464056, "loss": 3.2088, "step": 586100 }, { "epoch": 93.792, "grad_norm": 0.24615773558616638, "learning_rate": 0.00028594796591863673, "loss": 3.1813, "step": 586200 }, { "epoch": 93.808, "grad_norm": 0.3070753812789917, "learning_rate": 0.00028594556582263285, "loss": 3.2554, "step": 586300 }, { "epoch": 93.824, "grad_norm": 0.22500208020210266, "learning_rate": 0.000285943165726629, "loss": 3.2662, "step": 586400 }, { "epoch": 93.84, "grad_norm": 0.21056605875492096, "learning_rate": 0.0002859407656306252, "loss": 3.0616, "step": 586500 }, { "epoch": 93.856, "grad_norm": 0.23152212798595428, "learning_rate": 0.00028593836553462136, "loss": 3.4997, "step": 586600 }, { "epoch": 93.872, "grad_norm": 0.3269975483417511, "learning_rate": 0.0002859359654386175, "loss": 3.3708, "step": 586700 }, { "epoch": 93.888, "grad_norm": 0.26557618379592896, "learning_rate": 0.0002859335653426137, "loss": 3.2254, "step": 586800 }, { "epoch": 93.904, "grad_norm": 0.23735295236110687, "learning_rate": 0.0002859311652466098, "loss": 3.1901, "step": 586900 }, { "epoch": 93.92, "grad_norm": 0.22823069989681244, "learning_rate": 0.000285928765150606, "loss": 3.2941, "step": 587000 }, { "epoch": 93.936, "grad_norm": 0.18505319952964783, "learning_rate": 0.00028592636505460215, "loss": 3.4987, "step": 587100 }, { "epoch": 93.952, "grad_norm": 0.18919281661510468, "learning_rate": 0.0002859239649585983, "loss": 3.214, "step": 587200 }, { "epoch": 93.968, "grad_norm": 0.20495931804180145, "learning_rate": 0.0002859215648625945, "loss": 3.2814, "step": 587300 }, { "epoch": 93.984, "grad_norm": 0.19630426168441772, "learning_rate": 0.0002859191647665906, "loss": 3.2872, "step": 587400 }, { "epoch": 94.0, "grad_norm": 0.2065628319978714, "learning_rate": 0.00028591676467058677, "loss": 3.209, "step": 587500 }, { "epoch": 94.016, "grad_norm": 0.20131786167621613, "learning_rate": 0.00028591436457458294, "loss": 3.0299, "step": 587600 }, { "epoch": 94.032, "grad_norm": 0.3016274869441986, "learning_rate": 0.0002859119644785791, "loss": 3.0292, "step": 587700 }, { "epoch": 94.048, "grad_norm": 0.22840379178524017, "learning_rate": 0.0002859095643825753, "loss": 2.9989, "step": 587800 }, { "epoch": 94.064, "grad_norm": 0.18794792890548706, "learning_rate": 0.00028590716428657145, "loss": 3.0873, "step": 587900 }, { "epoch": 94.08, "grad_norm": 0.25050652027130127, "learning_rate": 0.00028590476419056756, "loss": 3.4086, "step": 588000 }, { "epoch": 94.096, "grad_norm": 0.26574671268463135, "learning_rate": 0.0002859023640945638, "loss": 3.4163, "step": 588100 }, { "epoch": 94.112, "grad_norm": 0.21301870048046112, "learning_rate": 0.00028589996399855995, "loss": 3.033, "step": 588200 }, { "epoch": 94.128, "grad_norm": 0.22252333164215088, "learning_rate": 0.00028589756390255607, "loss": 3.1724, "step": 588300 }, { "epoch": 94.144, "grad_norm": 0.23992778360843658, "learning_rate": 0.00028589516380655224, "loss": 3.449, "step": 588400 }, { "epoch": 94.16, "grad_norm": 0.23727191984653473, "learning_rate": 0.0002858927637105484, "loss": 3.6674, "step": 588500 }, { "epoch": 94.176, "grad_norm": 0.21550658345222473, "learning_rate": 0.0002858903636145446, "loss": 3.3356, "step": 588600 }, { "epoch": 94.192, "grad_norm": 0.2132403701543808, "learning_rate": 0.00028588796351854075, "loss": 3.601, "step": 588700 }, { "epoch": 94.208, "grad_norm": 0.22626931965351105, "learning_rate": 0.00028588558742349693, "loss": 3.005, "step": 588800 }, { "epoch": 94.224, "grad_norm": 0.23692327737808228, "learning_rate": 0.00028588318732749305, "loss": 3.3836, "step": 588900 }, { "epoch": 94.24, "grad_norm": 0.257031112909317, "learning_rate": 0.0002858807872314892, "loss": 3.246, "step": 589000 }, { "epoch": 94.256, "grad_norm": 0.2312926948070526, "learning_rate": 0.0002858783871354854, "loss": 3.4136, "step": 589100 }, { "epoch": 94.272, "grad_norm": 0.24025245010852814, "learning_rate": 0.00028587598703948156, "loss": 3.2726, "step": 589200 }, { "epoch": 94.288, "grad_norm": 0.22059328854084015, "learning_rate": 0.0002858735869434777, "loss": 3.5014, "step": 589300 }, { "epoch": 94.304, "grad_norm": 0.23081867396831512, "learning_rate": 0.00028587118684747384, "loss": 3.4652, "step": 589400 }, { "epoch": 94.32, "grad_norm": 0.21962495148181915, "learning_rate": 0.00028586878675147, "loss": 3.2895, "step": 589500 }, { "epoch": 94.336, "grad_norm": 0.2187066674232483, "learning_rate": 0.0002858663866554662, "loss": 3.4013, "step": 589600 }, { "epoch": 94.352, "grad_norm": 0.2224830687046051, "learning_rate": 0.00028586398655946235, "loss": 3.3175, "step": 589700 }, { "epoch": 94.368, "grad_norm": 0.2268897145986557, "learning_rate": 0.0002858615864634585, "loss": 3.0725, "step": 589800 }, { "epoch": 94.384, "grad_norm": 0.20699933171272278, "learning_rate": 0.0002858591863674547, "loss": 3.3527, "step": 589900 }, { "epoch": 94.4, "grad_norm": 0.22464817762374878, "learning_rate": 0.0002858567862714508, "loss": 3.4725, "step": 590000 }, { "epoch": 94.416, "grad_norm": 0.23188373446464539, "learning_rate": 0.00028585438617544697, "loss": 3.4117, "step": 590100 }, { "epoch": 94.432, "grad_norm": 0.21206721663475037, "learning_rate": 0.00028585198607944314, "loss": 3.3335, "step": 590200 }, { "epoch": 94.448, "grad_norm": 0.18949532508850098, "learning_rate": 0.0002858495859834393, "loss": 3.2554, "step": 590300 }, { "epoch": 94.464, "grad_norm": 0.20213110744953156, "learning_rate": 0.0002858471858874355, "loss": 3.3067, "step": 590400 }, { "epoch": 94.48, "grad_norm": 0.21618303656578064, "learning_rate": 0.0002858447857914316, "loss": 3.235, "step": 590500 }, { "epoch": 94.496, "grad_norm": 0.22793155908584595, "learning_rate": 0.00028584238569542776, "loss": 3.2929, "step": 590600 }, { "epoch": 94.512, "grad_norm": 0.21350187063217163, "learning_rate": 0.00028583998559942393, "loss": 3.097, "step": 590700 }, { "epoch": 94.528, "grad_norm": 0.23432369530200958, "learning_rate": 0.0002858376095043802, "loss": 3.1367, "step": 590800 }, { "epoch": 94.544, "grad_norm": 0.19393675029277802, "learning_rate": 0.0002858352094083763, "loss": 3.227, "step": 590900 }, { "epoch": 94.56, "grad_norm": 0.2530471384525299, "learning_rate": 0.00028583280931237246, "loss": 3.199, "step": 591000 }, { "epoch": 94.576, "grad_norm": 0.31334516406059265, "learning_rate": 0.00028583040921636863, "loss": 3.3621, "step": 591100 }, { "epoch": 94.592, "grad_norm": 0.20909591019153595, "learning_rate": 0.0002858280091203648, "loss": 3.1035, "step": 591200 }, { "epoch": 94.608, "grad_norm": 0.24616681039333344, "learning_rate": 0.00028582560902436097, "loss": 3.2992, "step": 591300 }, { "epoch": 94.624, "grad_norm": 0.22863896191120148, "learning_rate": 0.0002858232089283571, "loss": 3.3807, "step": 591400 }, { "epoch": 94.64, "grad_norm": 0.21407826244831085, "learning_rate": 0.00028582080883235325, "loss": 2.9907, "step": 591500 }, { "epoch": 94.656, "grad_norm": 0.21590180695056915, "learning_rate": 0.0002858184087363494, "loss": 3.2754, "step": 591600 }, { "epoch": 94.672, "grad_norm": 0.2181742936372757, "learning_rate": 0.0002858160086403456, "loss": 3.2484, "step": 591700 }, { "epoch": 94.688, "grad_norm": 0.24860164523124695, "learning_rate": 0.00028581360854434176, "loss": 3.2375, "step": 591800 }, { "epoch": 94.704, "grad_norm": 0.22996476292610168, "learning_rate": 0.00028581120844833793, "loss": 3.2741, "step": 591900 }, { "epoch": 94.72, "grad_norm": 0.24924755096435547, "learning_rate": 0.00028580880835233404, "loss": 3.3526, "step": 592000 }, { "epoch": 94.736, "grad_norm": 0.19682054221630096, "learning_rate": 0.0002858064082563302, "loss": 3.4193, "step": 592100 }, { "epoch": 94.752, "grad_norm": 0.22627626359462738, "learning_rate": 0.0002858040081603264, "loss": 3.2827, "step": 592200 }, { "epoch": 94.768, "grad_norm": 0.26685985922813416, "learning_rate": 0.00028580160806432255, "loss": 3.4791, "step": 592300 }, { "epoch": 94.784, "grad_norm": 0.23975403606891632, "learning_rate": 0.0002857992079683187, "loss": 3.3769, "step": 592400 }, { "epoch": 94.8, "grad_norm": 0.21896295249462128, "learning_rate": 0.00028579680787231483, "loss": 3.1532, "step": 592500 }, { "epoch": 94.816, "grad_norm": 0.2204912006855011, "learning_rate": 0.000285794407776311, "loss": 3.3538, "step": 592600 }, { "epoch": 94.832, "grad_norm": 0.2482822984457016, "learning_rate": 0.00028579200768030717, "loss": 3.3004, "step": 592700 }, { "epoch": 94.848, "grad_norm": 0.20391632616519928, "learning_rate": 0.00028578960758430334, "loss": 3.3543, "step": 592800 }, { "epoch": 94.864, "grad_norm": 0.19112515449523926, "learning_rate": 0.0002857872074882995, "loss": 3.3162, "step": 592900 }, { "epoch": 94.88, "grad_norm": 0.21175888180732727, "learning_rate": 0.0002857848313932557, "loss": 3.5258, "step": 593000 }, { "epoch": 94.896, "grad_norm": 0.2300572246313095, "learning_rate": 0.00028578243129725187, "loss": 3.5422, "step": 593100 }, { "epoch": 94.912, "grad_norm": 0.23502790927886963, "learning_rate": 0.00028578003120124804, "loss": 3.4557, "step": 593200 }, { "epoch": 94.928, "grad_norm": 0.22246569395065308, "learning_rate": 0.0002857776311052442, "loss": 3.2171, "step": 593300 }, { "epoch": 94.944, "grad_norm": 0.21184633672237396, "learning_rate": 0.0002857752310092403, "loss": 3.1531, "step": 593400 }, { "epoch": 94.96, "grad_norm": 0.2048822045326233, "learning_rate": 0.0002857728309132365, "loss": 3.3998, "step": 593500 }, { "epoch": 94.976, "grad_norm": 0.20890416204929352, "learning_rate": 0.00028577043081723266, "loss": 3.3343, "step": 593600 }, { "epoch": 94.992, "grad_norm": 0.3486609160900116, "learning_rate": 0.00028576803072122883, "loss": 3.1278, "step": 593700 }, { "epoch": 95.008, "grad_norm": 0.2070513516664505, "learning_rate": 0.000285765630625225, "loss": 2.9596, "step": 593800 }, { "epoch": 95.024, "grad_norm": 0.2147464007139206, "learning_rate": 0.00028576323052922117, "loss": 3.2952, "step": 593900 }, { "epoch": 95.04, "grad_norm": 0.2226506769657135, "learning_rate": 0.0002857608304332173, "loss": 3.0981, "step": 594000 }, { "epoch": 95.056, "grad_norm": 0.20001396536827087, "learning_rate": 0.00028575843033721345, "loss": 3.0341, "step": 594100 }, { "epoch": 95.072, "grad_norm": 0.2266564518213272, "learning_rate": 0.0002857560302412096, "loss": 3.1942, "step": 594200 }, { "epoch": 95.088, "grad_norm": 0.23040850460529327, "learning_rate": 0.0002857536301452058, "loss": 3.4627, "step": 594300 }, { "epoch": 95.104, "grad_norm": 0.23750241100788116, "learning_rate": 0.00028575123004920196, "loss": 3.27, "step": 594400 }, { "epoch": 95.12, "grad_norm": 0.25487127900123596, "learning_rate": 0.0002857488299531981, "loss": 3.5261, "step": 594500 }, { "epoch": 95.136, "grad_norm": 0.2698226571083069, "learning_rate": 0.00028574642985719424, "loss": 3.2706, "step": 594600 }, { "epoch": 95.152, "grad_norm": 0.2567874789237976, "learning_rate": 0.0002857440297611904, "loss": 3.4222, "step": 594700 }, { "epoch": 95.168, "grad_norm": 0.25426891446113586, "learning_rate": 0.0002857416296651866, "loss": 3.5047, "step": 594800 }, { "epoch": 95.184, "grad_norm": 0.2270069569349289, "learning_rate": 0.00028573922956918275, "loss": 3.087, "step": 594900 }, { "epoch": 95.2, "grad_norm": 0.24984213709831238, "learning_rate": 0.00028573685347413894, "loss": 2.9292, "step": 595000 }, { "epoch": 95.216, "grad_norm": 0.22331245243549347, "learning_rate": 0.0002857344533781351, "loss": 3.2082, "step": 595100 }, { "epoch": 95.232, "grad_norm": 0.25792786478996277, "learning_rate": 0.0002857320532821313, "loss": 3.3209, "step": 595200 }, { "epoch": 95.248, "grad_norm": 0.24845680594444275, "learning_rate": 0.00028572965318612745, "loss": 3.4526, "step": 595300 }, { "epoch": 95.264, "grad_norm": 0.22585606575012207, "learning_rate": 0.00028572725309012356, "loss": 3.1174, "step": 595400 }, { "epoch": 95.28, "grad_norm": 0.2059960812330246, "learning_rate": 0.00028572485299411973, "loss": 3.1219, "step": 595500 }, { "epoch": 95.296, "grad_norm": 0.22620223462581635, "learning_rate": 0.0002857224528981159, "loss": 3.0154, "step": 595600 }, { "epoch": 95.312, "grad_norm": 0.2267422080039978, "learning_rate": 0.00028572005280211207, "loss": 3.3147, "step": 595700 }, { "epoch": 95.328, "grad_norm": 0.22993837296962738, "learning_rate": 0.00028571765270610824, "loss": 3.2484, "step": 595800 }, { "epoch": 95.344, "grad_norm": 0.19279709458351135, "learning_rate": 0.0002857152526101044, "loss": 3.145, "step": 595900 }, { "epoch": 95.36, "grad_norm": 0.24150507152080536, "learning_rate": 0.0002857128525141005, "loss": 3.2864, "step": 596000 }, { "epoch": 95.376, "grad_norm": 0.21667680144309998, "learning_rate": 0.0002857104524180967, "loss": 3.2588, "step": 596100 }, { "epoch": 95.392, "grad_norm": 0.20911553502082825, "learning_rate": 0.00028570805232209286, "loss": 3.4002, "step": 596200 }, { "epoch": 95.408, "grad_norm": 0.22136501967906952, "learning_rate": 0.00028570565222608903, "loss": 3.4997, "step": 596300 }, { "epoch": 95.424, "grad_norm": 0.2603466212749481, "learning_rate": 0.0002857032521300852, "loss": 3.4816, "step": 596400 }, { "epoch": 95.44, "grad_norm": 0.27272462844848633, "learning_rate": 0.0002857008520340813, "loss": 3.4315, "step": 596500 }, { "epoch": 95.456, "grad_norm": 0.2266336977481842, "learning_rate": 0.0002856984519380775, "loss": 3.3678, "step": 596600 }, { "epoch": 95.472, "grad_norm": 0.21703729033470154, "learning_rate": 0.00028569605184207365, "loss": 3.2338, "step": 596700 }, { "epoch": 95.488, "grad_norm": 0.2503680884838104, "learning_rate": 0.0002856936517460698, "loss": 3.2628, "step": 596800 }, { "epoch": 95.504, "grad_norm": 0.2051069438457489, "learning_rate": 0.000285691251650066, "loss": 3.3531, "step": 596900 }, { "epoch": 95.52, "grad_norm": 0.22656872868537903, "learning_rate": 0.0002856888755550222, "loss": 3.1602, "step": 597000 }, { "epoch": 95.536, "grad_norm": 0.2092975676059723, "learning_rate": 0.00028568647545901835, "loss": 3.7186, "step": 597100 }, { "epoch": 95.552, "grad_norm": 0.22633928060531616, "learning_rate": 0.0002856840753630145, "loss": 3.225, "step": 597200 }, { "epoch": 95.568, "grad_norm": 0.22175456583499908, "learning_rate": 0.0002856816752670107, "loss": 3.4079, "step": 597300 }, { "epoch": 95.584, "grad_norm": 0.2256758064031601, "learning_rate": 0.0002856792751710068, "loss": 3.3437, "step": 597400 }, { "epoch": 95.6, "grad_norm": 0.2220381200313568, "learning_rate": 0.00028567687507500297, "loss": 3.5351, "step": 597500 }, { "epoch": 95.616, "grad_norm": 0.2190844863653183, "learning_rate": 0.00028567447497899914, "loss": 3.4494, "step": 597600 }, { "epoch": 95.632, "grad_norm": 0.22759532928466797, "learning_rate": 0.0002856720748829953, "loss": 3.5, "step": 597700 }, { "epoch": 95.648, "grad_norm": 0.24167296290397644, "learning_rate": 0.0002856696987879515, "loss": 3.4093, "step": 597800 }, { "epoch": 95.664, "grad_norm": 0.21727347373962402, "learning_rate": 0.00028566729869194767, "loss": 3.3567, "step": 597900 }, { "epoch": 95.68, "grad_norm": 0.2305884063243866, "learning_rate": 0.0002856648985959438, "loss": 3.232, "step": 598000 }, { "epoch": 95.696, "grad_norm": 0.2266814261674881, "learning_rate": 0.00028566249849993995, "loss": 3.1822, "step": 598100 }, { "epoch": 95.712, "grad_norm": 0.2707599401473999, "learning_rate": 0.0002856600984039361, "loss": 3.2867, "step": 598200 }, { "epoch": 95.728, "grad_norm": 0.21795891225337982, "learning_rate": 0.0002856576983079323, "loss": 3.095, "step": 598300 }, { "epoch": 95.744, "grad_norm": 0.22543436288833618, "learning_rate": 0.00028565529821192846, "loss": 3.2668, "step": 598400 }, { "epoch": 95.76, "grad_norm": 0.2147243320941925, "learning_rate": 0.0002856528981159246, "loss": 3.2968, "step": 598500 }, { "epoch": 95.776, "grad_norm": 0.21656152606010437, "learning_rate": 0.00028565049801992074, "loss": 3.3264, "step": 598600 }, { "epoch": 95.792, "grad_norm": 0.2224181443452835, "learning_rate": 0.0002856480979239169, "loss": 3.7044, "step": 598700 }, { "epoch": 95.808, "grad_norm": 0.21369607746601105, "learning_rate": 0.0002856456978279131, "loss": 3.3042, "step": 598800 }, { "epoch": 95.824, "grad_norm": 0.23280933499336243, "learning_rate": 0.00028564329773190925, "loss": 3.3425, "step": 598900 }, { "epoch": 95.84, "grad_norm": 0.25428760051727295, "learning_rate": 0.0002856408976359054, "loss": 3.672, "step": 599000 }, { "epoch": 95.856, "grad_norm": 0.23463310301303864, "learning_rate": 0.00028563849753990154, "loss": 3.3218, "step": 599100 }, { "epoch": 95.872, "grad_norm": 0.21253357827663422, "learning_rate": 0.0002856360974438977, "loss": 3.5356, "step": 599200 }, { "epoch": 95.888, "grad_norm": 0.2141728699207306, "learning_rate": 0.0002856336973478939, "loss": 3.3023, "step": 599300 }, { "epoch": 95.904, "grad_norm": 0.21563608944416046, "learning_rate": 0.00028563129725189004, "loss": 3.4662, "step": 599400 }, { "epoch": 95.92, "grad_norm": 0.19720785319805145, "learning_rate": 0.0002856288971558862, "loss": 3.0648, "step": 599500 }, { "epoch": 95.936, "grad_norm": 0.21825763583183289, "learning_rate": 0.0002856264970598824, "loss": 3.5876, "step": 599600 }, { "epoch": 95.952, "grad_norm": 0.20247408747673035, "learning_rate": 0.00028562409696387855, "loss": 3.2869, "step": 599700 }, { "epoch": 95.968, "grad_norm": 0.21993711590766907, "learning_rate": 0.0002856216968678747, "loss": 3.1233, "step": 599800 }, { "epoch": 95.984, "grad_norm": 0.21363361179828644, "learning_rate": 0.0002856192967718709, "loss": 3.3481, "step": 599900 }, { "epoch": 96.0, "grad_norm": 0.22291898727416992, "learning_rate": 0.000285616896675867, "loss": 3.304, "step": 600000 }, { "epoch": 96.016, "grad_norm": 0.22917704284191132, "learning_rate": 0.0002856144965798632, "loss": 3.331, "step": 600100 }, { "epoch": 96.032, "grad_norm": 0.21879549324512482, "learning_rate": 0.00028561209648385934, "loss": 3.341, "step": 600200 }, { "epoch": 96.048, "grad_norm": 0.2392585277557373, "learning_rate": 0.0002856096963878555, "loss": 3.4278, "step": 600300 }, { "epoch": 96.064, "grad_norm": 0.2479548454284668, "learning_rate": 0.0002856072962918517, "loss": 3.367, "step": 600400 }, { "epoch": 96.08, "grad_norm": 0.23781907558441162, "learning_rate": 0.0002856048961958478, "loss": 3.2438, "step": 600500 }, { "epoch": 96.096, "grad_norm": 0.2256447672843933, "learning_rate": 0.00028560249609984397, "loss": 3.2907, "step": 600600 }, { "epoch": 96.112, "grad_norm": 0.231655091047287, "learning_rate": 0.00028560012000480015, "loss": 3.2842, "step": 600700 }, { "epoch": 96.128, "grad_norm": 0.21018409729003906, "learning_rate": 0.0002855977199087963, "loss": 3.4449, "step": 600800 }, { "epoch": 96.144, "grad_norm": 0.23568317294120789, "learning_rate": 0.0002855953198127925, "loss": 3.6383, "step": 600900 }, { "epoch": 96.16, "grad_norm": 0.19802354276180267, "learning_rate": 0.00028559291971678866, "loss": 3.3704, "step": 601000 }, { "epoch": 96.176, "grad_norm": 0.2993736267089844, "learning_rate": 0.0002855905196207848, "loss": 3.4727, "step": 601100 }, { "epoch": 96.192, "grad_norm": 0.21036085486412048, "learning_rate": 0.00028558811952478095, "loss": 3.3403, "step": 601200 }, { "epoch": 96.208, "grad_norm": 0.2320370227098465, "learning_rate": 0.0002855857434297372, "loss": 3.1506, "step": 601300 }, { "epoch": 96.224, "grad_norm": 0.24175114929676056, "learning_rate": 0.0002855833433337333, "loss": 3.4549, "step": 601400 }, { "epoch": 96.24, "grad_norm": 0.2422960102558136, "learning_rate": 0.00028558094323772947, "loss": 3.1787, "step": 601500 }, { "epoch": 96.256, "grad_norm": 0.23370137810707092, "learning_rate": 0.00028557854314172564, "loss": 3.3479, "step": 601600 }, { "epoch": 96.272, "grad_norm": 0.2232499122619629, "learning_rate": 0.0002855761430457218, "loss": 3.2963, "step": 601700 }, { "epoch": 96.288, "grad_norm": 0.2128196656703949, "learning_rate": 0.000285573742949718, "loss": 3.1385, "step": 601800 }, { "epoch": 96.304, "grad_norm": 0.22125102579593658, "learning_rate": 0.00028557134285371415, "loss": 3.3142, "step": 601900 }, { "epoch": 96.32, "grad_norm": 0.23901867866516113, "learning_rate": 0.00028556894275771026, "loss": 3.7283, "step": 602000 }, { "epoch": 96.336, "grad_norm": 0.24254681169986725, "learning_rate": 0.00028556654266170643, "loss": 3.2893, "step": 602100 }, { "epoch": 96.352, "grad_norm": 0.23294085264205933, "learning_rate": 0.0002855641425657026, "loss": 3.3255, "step": 602200 }, { "epoch": 96.368, "grad_norm": 0.2431686967611313, "learning_rate": 0.00028556174246969877, "loss": 3.5516, "step": 602300 }, { "epoch": 96.384, "grad_norm": 0.26134422421455383, "learning_rate": 0.00028555934237369494, "loss": 3.1848, "step": 602400 }, { "epoch": 96.4, "grad_norm": 0.2328488528728485, "learning_rate": 0.00028555694227769106, "loss": 3.4137, "step": 602500 }, { "epoch": 96.416, "grad_norm": 0.21921634674072266, "learning_rate": 0.0002855545421816872, "loss": 3.5121, "step": 602600 }, { "epoch": 96.432, "grad_norm": 0.23854656517505646, "learning_rate": 0.0002855521420856834, "loss": 3.2802, "step": 602700 }, { "epoch": 96.448, "grad_norm": 0.21179726719856262, "learning_rate": 0.00028554974198967956, "loss": 3.2146, "step": 602800 }, { "epoch": 96.464, "grad_norm": 0.24020636081695557, "learning_rate": 0.00028554734189367573, "loss": 3.2918, "step": 602900 }, { "epoch": 96.48, "grad_norm": 0.2231982797384262, "learning_rate": 0.0002855449417976719, "loss": 3.5523, "step": 603000 }, { "epoch": 96.496, "grad_norm": 0.18913500010967255, "learning_rate": 0.000285542541701668, "loss": 3.3277, "step": 603100 }, { "epoch": 96.512, "grad_norm": 0.24146224558353424, "learning_rate": 0.0002855401416056642, "loss": 3.7347, "step": 603200 }, { "epoch": 96.528, "grad_norm": 0.23302996158599854, "learning_rate": 0.00028553774150966036, "loss": 3.2046, "step": 603300 }, { "epoch": 96.544, "grad_norm": 0.22378571331501007, "learning_rate": 0.0002855353414136565, "loss": 3.2374, "step": 603400 }, { "epoch": 96.56, "grad_norm": 0.21494945883750916, "learning_rate": 0.0002855329413176527, "loss": 3.2517, "step": 603500 }, { "epoch": 96.576, "grad_norm": 0.21504370868206024, "learning_rate": 0.0002855305412216488, "loss": 3.4283, "step": 603600 }, { "epoch": 96.592, "grad_norm": 0.22507824003696442, "learning_rate": 0.000285528141125645, "loss": 3.2145, "step": 603700 }, { "epoch": 96.608, "grad_norm": 0.2547476887702942, "learning_rate": 0.00028552574102964115, "loss": 3.4388, "step": 603800 }, { "epoch": 96.624, "grad_norm": 0.2271728515625, "learning_rate": 0.0002855233409336373, "loss": 3.1293, "step": 603900 }, { "epoch": 96.64, "grad_norm": 0.2463804930448532, "learning_rate": 0.0002855209408376335, "loss": 3.1975, "step": 604000 }, { "epoch": 96.656, "grad_norm": 0.2025107890367508, "learning_rate": 0.00028551854074162965, "loss": 3.5422, "step": 604100 }, { "epoch": 96.672, "grad_norm": 0.3198583424091339, "learning_rate": 0.00028551614064562577, "loss": 3.5458, "step": 604200 }, { "epoch": 96.688, "grad_norm": 0.24476909637451172, "learning_rate": 0.00028551374054962194, "loss": 3.2496, "step": 604300 }, { "epoch": 96.704, "grad_norm": 0.19842849671840668, "learning_rate": 0.0002855113404536181, "loss": 3.4658, "step": 604400 }, { "epoch": 96.72, "grad_norm": 0.25558191537857056, "learning_rate": 0.0002855089403576143, "loss": 3.6996, "step": 604500 }, { "epoch": 96.736, "grad_norm": 0.20555265247821808, "learning_rate": 0.00028550654026161045, "loss": 3.2487, "step": 604600 }, { "epoch": 96.752, "grad_norm": 0.23608358204364777, "learning_rate": 0.00028550414016560656, "loss": 3.3048, "step": 604700 }, { "epoch": 96.768, "grad_norm": 0.23814626038074493, "learning_rate": 0.0002855017640705628, "loss": 3.2647, "step": 604800 }, { "epoch": 96.784, "grad_norm": 0.1989974081516266, "learning_rate": 0.000285499363974559, "loss": 3.2265, "step": 604900 }, { "epoch": 96.8, "grad_norm": 0.241252139210701, "learning_rate": 0.00028549696387855514, "loss": 3.4702, "step": 605000 }, { "epoch": 96.816, "grad_norm": 0.22048689424991608, "learning_rate": 0.00028549456378255126, "loss": 3.0505, "step": 605100 }, { "epoch": 96.832, "grad_norm": 0.2050877958536148, "learning_rate": 0.0002854921636865474, "loss": 3.1101, "step": 605200 }, { "epoch": 96.848, "grad_norm": 0.21834488213062286, "learning_rate": 0.0002854897635905436, "loss": 3.4701, "step": 605300 }, { "epoch": 96.864, "grad_norm": 0.21084542572498322, "learning_rate": 0.0002854873874954998, "loss": 3.3026, "step": 605400 }, { "epoch": 96.88, "grad_norm": 0.235630065202713, "learning_rate": 0.00028548498739949595, "loss": 3.4506, "step": 605500 }, { "epoch": 96.896, "grad_norm": 0.23623348772525787, "learning_rate": 0.0002854825873034921, "loss": 3.4477, "step": 605600 }, { "epoch": 96.912, "grad_norm": 0.230832040309906, "learning_rate": 0.0002854801872074883, "loss": 3.1873, "step": 605700 }, { "epoch": 96.928, "grad_norm": 0.21558760106563568, "learning_rate": 0.00028547778711148446, "loss": 3.0362, "step": 605800 }, { "epoch": 96.944, "grad_norm": 0.19546976685523987, "learning_rate": 0.00028547538701548063, "loss": 3.146, "step": 605900 }, { "epoch": 96.96, "grad_norm": 0.2108405977487564, "learning_rate": 0.00028547298691947675, "loss": 3.3435, "step": 606000 }, { "epoch": 96.976, "grad_norm": 0.25688377022743225, "learning_rate": 0.0002854705868234729, "loss": 3.3436, "step": 606100 }, { "epoch": 96.992, "grad_norm": 0.24366192519664764, "learning_rate": 0.0002854681867274691, "loss": 3.2647, "step": 606200 }, { "epoch": 97.008, "grad_norm": 0.3079913258552551, "learning_rate": 0.00028546578663146525, "loss": 3.31, "step": 606300 }, { "epoch": 97.024, "grad_norm": 0.22586852312088013, "learning_rate": 0.0002854633865354614, "loss": 3.3663, "step": 606400 }, { "epoch": 97.04, "grad_norm": 0.24323342740535736, "learning_rate": 0.00028546098643945754, "loss": 3.127, "step": 606500 }, { "epoch": 97.056, "grad_norm": 0.20927488803863525, "learning_rate": 0.0002854585863434537, "loss": 3.4803, "step": 606600 }, { "epoch": 97.072, "grad_norm": 0.23506028950214386, "learning_rate": 0.0002854561862474499, "loss": 3.2552, "step": 606700 }, { "epoch": 97.088, "grad_norm": 0.20994023978710175, "learning_rate": 0.00028545378615144604, "loss": 3.4211, "step": 606800 }, { "epoch": 97.104, "grad_norm": 0.2398572564125061, "learning_rate": 0.0002854513860554422, "loss": 3.3357, "step": 606900 }, { "epoch": 97.12, "grad_norm": 0.22648067772388458, "learning_rate": 0.0002854489859594384, "loss": 3.3611, "step": 607000 }, { "epoch": 97.136, "grad_norm": 0.22437933087348938, "learning_rate": 0.0002854465858634345, "loss": 3.2125, "step": 607100 }, { "epoch": 97.152, "grad_norm": 0.21845991909503937, "learning_rate": 0.00028544418576743067, "loss": 3.1729, "step": 607200 }, { "epoch": 97.168, "grad_norm": 0.21280664205551147, "learning_rate": 0.00028544178567142684, "loss": 3.416, "step": 607300 }, { "epoch": 97.184, "grad_norm": 0.23820732533931732, "learning_rate": 0.000285439385575423, "loss": 3.2586, "step": 607400 }, { "epoch": 97.2, "grad_norm": 0.2508779764175415, "learning_rate": 0.0002854369854794192, "loss": 3.354, "step": 607500 }, { "epoch": 97.216, "grad_norm": 0.2347995936870575, "learning_rate": 0.0002854345853834153, "loss": 3.3406, "step": 607600 }, { "epoch": 97.232, "grad_norm": 0.2422303408384323, "learning_rate": 0.00028543218528741146, "loss": 3.3367, "step": 607700 }, { "epoch": 97.248, "grad_norm": 0.2242317944765091, "learning_rate": 0.00028542978519140763, "loss": 3.4688, "step": 607800 }, { "epoch": 97.264, "grad_norm": 0.2239970713853836, "learning_rate": 0.0002854273850954038, "loss": 3.4833, "step": 607900 }, { "epoch": 97.28, "grad_norm": 0.22829100489616394, "learning_rate": 0.00028542498499939997, "loss": 3.5744, "step": 608000 }, { "epoch": 97.296, "grad_norm": 0.19702139496803284, "learning_rate": 0.00028542258490339614, "loss": 3.3699, "step": 608100 }, { "epoch": 97.312, "grad_norm": 0.2466646432876587, "learning_rate": 0.00028542018480739225, "loss": 3.5506, "step": 608200 }, { "epoch": 97.328, "grad_norm": 0.21742872893810272, "learning_rate": 0.0002854177847113884, "loss": 2.9623, "step": 608300 }, { "epoch": 97.344, "grad_norm": 0.18319930136203766, "learning_rate": 0.0002854153846153846, "loss": 3.1359, "step": 608400 }, { "epoch": 97.36, "grad_norm": 0.2510473132133484, "learning_rate": 0.00028541298451938076, "loss": 3.5425, "step": 608500 }, { "epoch": 97.376, "grad_norm": 0.39960867166519165, "learning_rate": 0.00028541058442337693, "loss": 3.4059, "step": 608600 }, { "epoch": 97.392, "grad_norm": 0.23807504773139954, "learning_rate": 0.00028540818432737304, "loss": 3.5461, "step": 608700 }, { "epoch": 97.408, "grad_norm": 0.2638202905654907, "learning_rate": 0.0002854057842313692, "loss": 3.3496, "step": 608800 }, { "epoch": 97.424, "grad_norm": 0.23483234643936157, "learning_rate": 0.0002854033841353654, "loss": 3.3696, "step": 608900 }, { "epoch": 97.44, "grad_norm": 0.26239463686943054, "learning_rate": 0.00028540098403936155, "loss": 3.5037, "step": 609000 }, { "epoch": 97.456, "grad_norm": 0.22503939270973206, "learning_rate": 0.0002853985839433577, "loss": 3.0959, "step": 609100 }, { "epoch": 97.472, "grad_norm": 0.203696146607399, "learning_rate": 0.0002853961838473539, "loss": 3.5087, "step": 609200 }, { "epoch": 97.488, "grad_norm": 0.2249625027179718, "learning_rate": 0.00028539378375135, "loss": 3.3056, "step": 609300 }, { "epoch": 97.504, "grad_norm": 0.24046678841114044, "learning_rate": 0.00028539138365534617, "loss": 3.3151, "step": 609400 }, { "epoch": 97.52, "grad_norm": 0.20134972035884857, "learning_rate": 0.00028538898355934234, "loss": 3.0805, "step": 609500 }, { "epoch": 97.536, "grad_norm": 0.24085168540477753, "learning_rate": 0.0002853865834633385, "loss": 3.3199, "step": 609600 }, { "epoch": 97.552, "grad_norm": 0.22662381827831268, "learning_rate": 0.0002853841833673347, "loss": 3.1734, "step": 609700 }, { "epoch": 97.568, "grad_norm": 0.20207910239696503, "learning_rate": 0.0002853817832713308, "loss": 3.4383, "step": 609800 }, { "epoch": 97.584, "grad_norm": 0.21722915768623352, "learning_rate": 0.00028537938317532696, "loss": 3.4073, "step": 609900 }, { "epoch": 97.6, "grad_norm": 0.2275272160768509, "learning_rate": 0.00028537698307932313, "loss": 3.4618, "step": 610000 }, { "epoch": 97.616, "grad_norm": 0.21291860938072205, "learning_rate": 0.0002853745829833193, "loss": 3.2931, "step": 610100 }, { "epoch": 97.632, "grad_norm": 0.2150515913963318, "learning_rate": 0.00028537218288731547, "loss": 3.1855, "step": 610200 }, { "epoch": 97.648, "grad_norm": 0.22407354414463043, "learning_rate": 0.00028536978279131164, "loss": 3.1572, "step": 610300 }, { "epoch": 97.664, "grad_norm": 0.20501108467578888, "learning_rate": 0.00028536738269530776, "loss": 3.3893, "step": 610400 }, { "epoch": 97.68, "grad_norm": 0.22992455959320068, "learning_rate": 0.0002853649825993039, "loss": 3.1555, "step": 610500 }, { "epoch": 97.696, "grad_norm": 0.2163907140493393, "learning_rate": 0.0002853625825033001, "loss": 3.479, "step": 610600 }, { "epoch": 97.712, "grad_norm": 0.243418350815773, "learning_rate": 0.00028536018240729626, "loss": 3.325, "step": 610700 }, { "epoch": 97.728, "grad_norm": 0.18487276136875153, "learning_rate": 0.00028535778231129243, "loss": 3.3552, "step": 610800 }, { "epoch": 97.744, "grad_norm": 0.21128396689891815, "learning_rate": 0.0002853553822152886, "loss": 3.2912, "step": 610900 }, { "epoch": 97.76, "grad_norm": 0.22904300689697266, "learning_rate": 0.0002853529821192847, "loss": 3.3536, "step": 611000 }, { "epoch": 97.776, "grad_norm": 0.22056254744529724, "learning_rate": 0.0002853505820232809, "loss": 3.2027, "step": 611100 }, { "epoch": 97.792, "grad_norm": 0.2769961655139923, "learning_rate": 0.00028534818192727706, "loss": 3.2575, "step": 611200 }, { "epoch": 97.808, "grad_norm": 0.22923074662685394, "learning_rate": 0.0002853457818312732, "loss": 3.5042, "step": 611300 }, { "epoch": 97.824, "grad_norm": 0.23229067027568817, "learning_rate": 0.0002853433817352694, "loss": 3.4502, "step": 611400 }, { "epoch": 97.84, "grad_norm": 0.23677204549312592, "learning_rate": 0.0002853410056402256, "loss": 3.2223, "step": 611500 }, { "epoch": 97.856, "grad_norm": 0.22875341773033142, "learning_rate": 0.00028533860554422175, "loss": 3.5348, "step": 611600 }, { "epoch": 97.872, "grad_norm": 0.31173479557037354, "learning_rate": 0.0002853362054482179, "loss": 3.2546, "step": 611700 }, { "epoch": 97.888, "grad_norm": 0.2676738500595093, "learning_rate": 0.00028533380535221404, "loss": 3.4199, "step": 611800 }, { "epoch": 97.904, "grad_norm": 0.23304083943367004, "learning_rate": 0.0002853314052562102, "loss": 3.4924, "step": 611900 }, { "epoch": 97.92, "grad_norm": 0.18426913022994995, "learning_rate": 0.0002853290051602064, "loss": 3.2817, "step": 612000 }, { "epoch": 97.936, "grad_norm": 0.2191344052553177, "learning_rate": 0.00028532660506420254, "loss": 3.6855, "step": 612100 }, { "epoch": 97.952, "grad_norm": 0.23801879584789276, "learning_rate": 0.0002853242049681987, "loss": 3.3186, "step": 612200 }, { "epoch": 97.968, "grad_norm": 0.21832767128944397, "learning_rate": 0.0002853218048721949, "loss": 3.4929, "step": 612300 }, { "epoch": 97.984, "grad_norm": 0.24846479296684265, "learning_rate": 0.000285319404776191, "loss": 3.2356, "step": 612400 }, { "epoch": 98.0, "grad_norm": 0.25237852334976196, "learning_rate": 0.00028531700468018717, "loss": 3.338, "step": 612500 }, { "epoch": 98.016, "grad_norm": 0.2050340622663498, "learning_rate": 0.00028531460458418333, "loss": 3.1133, "step": 612600 }, { "epoch": 98.032, "grad_norm": 0.26038649678230286, "learning_rate": 0.0002853122044881795, "loss": 3.0697, "step": 612700 }, { "epoch": 98.048, "grad_norm": 0.25542765855789185, "learning_rate": 0.0002853098043921757, "loss": 3.2611, "step": 612800 }, { "epoch": 98.064, "grad_norm": 0.230669304728508, "learning_rate": 0.00028530740429617184, "loss": 3.3171, "step": 612900 }, { "epoch": 98.08, "grad_norm": 0.2168172001838684, "learning_rate": 0.00028530500420016796, "loss": 3.3634, "step": 613000 }, { "epoch": 98.096, "grad_norm": 0.22285284101963043, "learning_rate": 0.0002853026041041641, "loss": 3.2597, "step": 613100 }, { "epoch": 98.112, "grad_norm": 0.25928929448127747, "learning_rate": 0.0002853002040081603, "loss": 3.3788, "step": 613200 }, { "epoch": 98.128, "grad_norm": 0.23710690438747406, "learning_rate": 0.00028529780391215646, "loss": 3.3273, "step": 613300 }, { "epoch": 98.144, "grad_norm": 0.21528518199920654, "learning_rate": 0.00028529540381615263, "loss": 3.3752, "step": 613400 }, { "epoch": 98.16, "grad_norm": 0.2596631348133087, "learning_rate": 0.00028529300372014875, "loss": 3.5437, "step": 613500 }, { "epoch": 98.176, "grad_norm": 0.28394532203674316, "learning_rate": 0.0002852906036241449, "loss": 3.3937, "step": 613600 }, { "epoch": 98.192, "grad_norm": 0.23064373433589935, "learning_rate": 0.0002852882035281411, "loss": 3.5581, "step": 613700 }, { "epoch": 98.208, "grad_norm": 0.22094929218292236, "learning_rate": 0.00028528580343213726, "loss": 3.4288, "step": 613800 }, { "epoch": 98.224, "grad_norm": 0.2204524129629135, "learning_rate": 0.0002852834033361334, "loss": 3.313, "step": 613900 }, { "epoch": 98.24, "grad_norm": 0.22027687728405, "learning_rate": 0.0002852810032401296, "loss": 3.3323, "step": 614000 }, { "epoch": 98.256, "grad_norm": 0.2222013920545578, "learning_rate": 0.0002852786031441257, "loss": 3.4682, "step": 614100 }, { "epoch": 98.272, "grad_norm": 0.24597182869911194, "learning_rate": 0.0002852762030481219, "loss": 3.2488, "step": 614200 }, { "epoch": 98.288, "grad_norm": 0.22081920504570007, "learning_rate": 0.00028527380295211805, "loss": 3.26, "step": 614300 }, { "epoch": 98.304, "grad_norm": 0.22192060947418213, "learning_rate": 0.0002852714028561142, "loss": 3.2513, "step": 614400 }, { "epoch": 98.32, "grad_norm": 0.23192614316940308, "learning_rate": 0.0002852690027601104, "loss": 3.461, "step": 614500 }, { "epoch": 98.336, "grad_norm": 0.25059565901756287, "learning_rate": 0.0002852666026641065, "loss": 3.143, "step": 614600 }, { "epoch": 98.352, "grad_norm": 0.2301628142595291, "learning_rate": 0.00028526420256810267, "loss": 3.2538, "step": 614700 }, { "epoch": 98.368, "grad_norm": 0.20446966588497162, "learning_rate": 0.00028526180247209884, "loss": 3.2279, "step": 614800 }, { "epoch": 98.384, "grad_norm": 0.21458040177822113, "learning_rate": 0.000285259402376095, "loss": 3.5029, "step": 614900 }, { "epoch": 98.4, "grad_norm": 0.22754773497581482, "learning_rate": 0.0002852570022800912, "loss": 3.3931, "step": 615000 }, { "epoch": 98.416, "grad_norm": 0.24521510303020477, "learning_rate": 0.00028525462618504737, "loss": 3.6841, "step": 615100 }, { "epoch": 98.432, "grad_norm": 0.2578393816947937, "learning_rate": 0.00028525222608904354, "loss": 3.3936, "step": 615200 }, { "epoch": 98.448, "grad_norm": 0.21603095531463623, "learning_rate": 0.0002852498259930397, "loss": 3.1317, "step": 615300 }, { "epoch": 98.464, "grad_norm": 0.23642303049564362, "learning_rate": 0.0002852474258970359, "loss": 3.2083, "step": 615400 }, { "epoch": 98.48, "grad_norm": 0.21116937696933746, "learning_rate": 0.000285245025801032, "loss": 3.711, "step": 615500 }, { "epoch": 98.496, "grad_norm": 0.2322213351726532, "learning_rate": 0.00028524262570502816, "loss": 3.3084, "step": 615600 }, { "epoch": 98.512, "grad_norm": 0.20877033472061157, "learning_rate": 0.00028524022560902433, "loss": 3.3057, "step": 615700 }, { "epoch": 98.528, "grad_norm": 0.20526909828186035, "learning_rate": 0.0002852378255130205, "loss": 3.1906, "step": 615800 }, { "epoch": 98.544, "grad_norm": 0.2036816030740738, "learning_rate": 0.00028523542541701667, "loss": 3.4613, "step": 615900 }, { "epoch": 98.56, "grad_norm": 0.2395593225955963, "learning_rate": 0.00028523302532101284, "loss": 3.5386, "step": 616000 }, { "epoch": 98.576, "grad_norm": 0.22519393265247345, "learning_rate": 0.00028523062522500895, "loss": 3.398, "step": 616100 }, { "epoch": 98.592, "grad_norm": 0.18585830926895142, "learning_rate": 0.0002852282251290051, "loss": 3.158, "step": 616200 }, { "epoch": 98.608, "grad_norm": 0.21090470254421234, "learning_rate": 0.0002852258250330013, "loss": 3.3693, "step": 616300 }, { "epoch": 98.624, "grad_norm": 0.23189058899879456, "learning_rate": 0.00028522342493699746, "loss": 3.3599, "step": 616400 }, { "epoch": 98.64, "grad_norm": 0.2326330691576004, "learning_rate": 0.00028522102484099363, "loss": 3.3835, "step": 616500 }, { "epoch": 98.656, "grad_norm": 0.21433186531066895, "learning_rate": 0.00028521862474498974, "loss": 3.4453, "step": 616600 }, { "epoch": 98.672, "grad_norm": 0.22654365003108978, "learning_rate": 0.0002852162246489859, "loss": 3.2799, "step": 616700 }, { "epoch": 98.688, "grad_norm": 0.212114617228508, "learning_rate": 0.0002852138245529821, "loss": 3.3072, "step": 616800 }, { "epoch": 98.704, "grad_norm": 0.1960330456495285, "learning_rate": 0.00028521142445697825, "loss": 3.3275, "step": 616900 }, { "epoch": 98.72, "grad_norm": 0.21984192728996277, "learning_rate": 0.0002852090243609744, "loss": 2.9643, "step": 617000 }, { "epoch": 98.736, "grad_norm": 0.18229661881923676, "learning_rate": 0.0002852066242649706, "loss": 3.3311, "step": 617100 }, { "epoch": 98.752, "grad_norm": 0.23670999705791473, "learning_rate": 0.0002852042241689667, "loss": 3.1969, "step": 617200 }, { "epoch": 98.768, "grad_norm": 0.20878632366657257, "learning_rate": 0.00028520182407296287, "loss": 3.6294, "step": 617300 }, { "epoch": 98.784, "grad_norm": 0.2340656965970993, "learning_rate": 0.00028519942397695904, "loss": 3.5048, "step": 617400 }, { "epoch": 98.8, "grad_norm": 0.22716538608074188, "learning_rate": 0.0002851970238809552, "loss": 3.0206, "step": 617500 }, { "epoch": 98.816, "grad_norm": 0.2198362797498703, "learning_rate": 0.0002851946237849514, "loss": 3.275, "step": 617600 }, { "epoch": 98.832, "grad_norm": 0.2573487162590027, "learning_rate": 0.0002851922236889475, "loss": 3.1878, "step": 617700 }, { "epoch": 98.848, "grad_norm": 0.21390201151371002, "learning_rate": 0.00028518982359294366, "loss": 3.269, "step": 617800 }, { "epoch": 98.864, "grad_norm": 0.2554887533187866, "learning_rate": 0.00028518742349693983, "loss": 3.1945, "step": 617900 }, { "epoch": 98.88, "grad_norm": 0.23049183189868927, "learning_rate": 0.000285185023400936, "loss": 3.6483, "step": 618000 }, { "epoch": 98.896, "grad_norm": 0.21958649158477783, "learning_rate": 0.00028518262330493217, "loss": 3.2497, "step": 618100 }, { "epoch": 98.912, "grad_norm": 0.24168728291988373, "learning_rate": 0.00028518022320892834, "loss": 3.4205, "step": 618200 }, { "epoch": 98.928, "grad_norm": 0.21283933520317078, "learning_rate": 0.0002851778231129245, "loss": 3.3966, "step": 618300 }, { "epoch": 98.944, "grad_norm": 0.21453183889389038, "learning_rate": 0.0002851754230169207, "loss": 3.4284, "step": 618400 }, { "epoch": 98.96, "grad_norm": 0.2155948281288147, "learning_rate": 0.00028517302292091685, "loss": 3.2214, "step": 618500 }, { "epoch": 98.976, "grad_norm": 0.3017854392528534, "learning_rate": 0.00028517062282491296, "loss": 3.4041, "step": 618600 }, { "epoch": 98.992, "grad_norm": 0.20583203434944153, "learning_rate": 0.00028516822272890913, "loss": 3.1856, "step": 618700 }, { "epoch": 99.008, "grad_norm": 0.22984564304351807, "learning_rate": 0.0002851658226329053, "loss": 3.1425, "step": 618800 }, { "epoch": 99.024, "grad_norm": 0.2734873592853546, "learning_rate": 0.00028516342253690147, "loss": 3.1898, "step": 618900 }, { "epoch": 99.04, "grad_norm": 0.2406061887741089, "learning_rate": 0.00028516102244089764, "loss": 3.0588, "step": 619000 }, { "epoch": 99.056, "grad_norm": 0.24815870821475983, "learning_rate": 0.0002851586223448938, "loss": 3.4299, "step": 619100 }, { "epoch": 99.072, "grad_norm": 0.23232735693454742, "learning_rate": 0.0002851562222488899, "loss": 3.2995, "step": 619200 }, { "epoch": 99.088, "grad_norm": 0.22477750480175018, "learning_rate": 0.0002851538221528861, "loss": 3.0786, "step": 619300 }, { "epoch": 99.104, "grad_norm": 0.22794926166534424, "learning_rate": 0.00028515142205688226, "loss": 3.4621, "step": 619400 }, { "epoch": 99.12, "grad_norm": 0.2584769129753113, "learning_rate": 0.00028514902196087843, "loss": 3.3662, "step": 619500 }, { "epoch": 99.136, "grad_norm": 0.24449898302555084, "learning_rate": 0.0002851466218648746, "loss": 3.428, "step": 619600 }, { "epoch": 99.152, "grad_norm": 0.302802175283432, "learning_rate": 0.0002851442217688707, "loss": 3.3845, "step": 619700 }, { "epoch": 99.168, "grad_norm": 0.23159298300743103, "learning_rate": 0.0002851418216728669, "loss": 3.0007, "step": 619800 }, { "epoch": 99.184, "grad_norm": 0.21652960777282715, "learning_rate": 0.00028513942157686305, "loss": 3.2679, "step": 619900 }, { "epoch": 99.2, "grad_norm": 0.25420984625816345, "learning_rate": 0.0002851370214808592, "loss": 3.5099, "step": 620000 }, { "epoch": 99.216, "grad_norm": 0.2236713021993637, "learning_rate": 0.0002851346213848554, "loss": 3.0742, "step": 620100 }, { "epoch": 99.232, "grad_norm": 0.20484820008277893, "learning_rate": 0.00028513222128885156, "loss": 3.2553, "step": 620200 }, { "epoch": 99.248, "grad_norm": 0.22159065306186676, "learning_rate": 0.0002851298211928477, "loss": 3.3914, "step": 620300 }, { "epoch": 99.264, "grad_norm": 0.20298008620738983, "learning_rate": 0.00028512744509780387, "loss": 3.1438, "step": 620400 }, { "epoch": 99.28, "grad_norm": 0.21268081665039062, "learning_rate": 0.00028512504500180003, "loss": 3.2126, "step": 620500 }, { "epoch": 99.296, "grad_norm": 0.22831976413726807, "learning_rate": 0.0002851226449057962, "loss": 3.4477, "step": 620600 }, { "epoch": 99.312, "grad_norm": 0.2563095986843109, "learning_rate": 0.0002851202448097924, "loss": 3.1625, "step": 620700 }, { "epoch": 99.328, "grad_norm": 0.23886582255363464, "learning_rate": 0.0002851178447137885, "loss": 3.5544, "step": 620800 }, { "epoch": 99.344, "grad_norm": 0.20156171917915344, "learning_rate": 0.00028511544461778466, "loss": 3.2155, "step": 620900 }, { "epoch": 99.36, "grad_norm": 0.2769511342048645, "learning_rate": 0.0002851130445217808, "loss": 3.3717, "step": 621000 }, { "epoch": 99.376, "grad_norm": 0.23098132014274597, "learning_rate": 0.000285110644425777, "loss": 3.4828, "step": 621100 }, { "epoch": 99.392, "grad_norm": 0.23382019996643066, "learning_rate": 0.00028510824432977316, "loss": 3.3245, "step": 621200 }, { "epoch": 99.408, "grad_norm": 0.21196313202381134, "learning_rate": 0.00028510584423376933, "loss": 3.3253, "step": 621300 }, { "epoch": 99.424, "grad_norm": 0.3006671667098999, "learning_rate": 0.0002851034441377655, "loss": 3.5494, "step": 621400 }, { "epoch": 99.44, "grad_norm": 0.26735976338386536, "learning_rate": 0.00028510104404176167, "loss": 3.3919, "step": 621500 }, { "epoch": 99.456, "grad_norm": 0.23158571124076843, "learning_rate": 0.00028509864394575784, "loss": 3.6233, "step": 621600 }, { "epoch": 99.472, "grad_norm": 0.21897031366825104, "learning_rate": 0.00028509624384975396, "loss": 3.2316, "step": 621700 }, { "epoch": 99.488, "grad_norm": 0.2255755066871643, "learning_rate": 0.0002850938437537501, "loss": 3.3604, "step": 621800 }, { "epoch": 99.504, "grad_norm": 0.21226082742214203, "learning_rate": 0.0002850914436577463, "loss": 3.2019, "step": 621900 }, { "epoch": 99.52, "grad_norm": 0.2337443232536316, "learning_rate": 0.0002850890675627025, "loss": 3.3615, "step": 622000 }, { "epoch": 99.536, "grad_norm": 0.22864855825901031, "learning_rate": 0.00028508666746669865, "loss": 3.2585, "step": 622100 }, { "epoch": 99.552, "grad_norm": 0.23795311152935028, "learning_rate": 0.0002850842673706948, "loss": 3.4161, "step": 622200 }, { "epoch": 99.568, "grad_norm": 0.23652678728103638, "learning_rate": 0.00028508186727469094, "loss": 3.3818, "step": 622300 }, { "epoch": 99.584, "grad_norm": 0.25063568353652954, "learning_rate": 0.0002850794671786871, "loss": 3.2803, "step": 622400 }, { "epoch": 99.6, "grad_norm": 0.2142067849636078, "learning_rate": 0.0002850770670826833, "loss": 3.3075, "step": 622500 }, { "epoch": 99.616, "grad_norm": 0.2076985239982605, "learning_rate": 0.00028507466698667944, "loss": 3.363, "step": 622600 }, { "epoch": 99.632, "grad_norm": 0.22764377295970917, "learning_rate": 0.0002850722668906756, "loss": 3.5062, "step": 622700 }, { "epoch": 99.648, "grad_norm": 0.23594728112220764, "learning_rate": 0.00028506986679467173, "loss": 3.5655, "step": 622800 }, { "epoch": 99.664, "grad_norm": 0.2402174025774002, "learning_rate": 0.0002850674666986679, "loss": 3.1159, "step": 622900 }, { "epoch": 99.68, "grad_norm": 0.2376241385936737, "learning_rate": 0.00028506506660266407, "loss": 3.2512, "step": 623000 }, { "epoch": 99.696, "grad_norm": 0.2152601182460785, "learning_rate": 0.00028506266650666024, "loss": 3.4854, "step": 623100 }, { "epoch": 99.712, "grad_norm": 0.2238297313451767, "learning_rate": 0.0002850602664106564, "loss": 3.4446, "step": 623200 }, { "epoch": 99.728, "grad_norm": 0.27110403776168823, "learning_rate": 0.0002850578663146526, "loss": 3.5223, "step": 623300 }, { "epoch": 99.744, "grad_norm": 0.22096037864685059, "learning_rate": 0.0002850554662186487, "loss": 3.2399, "step": 623400 }, { "epoch": 99.76, "grad_norm": 0.25681114196777344, "learning_rate": 0.00028505306612264486, "loss": 3.2682, "step": 623500 }, { "epoch": 99.776, "grad_norm": 0.2193903625011444, "learning_rate": 0.00028505066602664103, "loss": 3.3008, "step": 623600 }, { "epoch": 99.792, "grad_norm": 0.24647705256938934, "learning_rate": 0.0002850482659306372, "loss": 3.3737, "step": 623700 }, { "epoch": 99.808, "grad_norm": 0.22421497106552124, "learning_rate": 0.00028504586583463337, "loss": 3.4828, "step": 623800 }, { "epoch": 99.824, "grad_norm": 0.24350090324878693, "learning_rate": 0.0002850434657386295, "loss": 3.2613, "step": 623900 }, { "epoch": 99.84, "grad_norm": 0.21070532500743866, "learning_rate": 0.00028504106564262565, "loss": 3.3938, "step": 624000 }, { "epoch": 99.856, "grad_norm": 0.22949440777301788, "learning_rate": 0.0002850386655466218, "loss": 3.1132, "step": 624100 }, { "epoch": 99.872, "grad_norm": 0.22671496868133545, "learning_rate": 0.000285036265450618, "loss": 3.6398, "step": 624200 }, { "epoch": 99.888, "grad_norm": 0.2608775198459625, "learning_rate": 0.00028503386535461416, "loss": 3.4976, "step": 624300 }, { "epoch": 99.904, "grad_norm": 0.23827584087848663, "learning_rate": 0.0002850314652586103, "loss": 3.4048, "step": 624400 }, { "epoch": 99.92, "grad_norm": 0.21724115312099457, "learning_rate": 0.0002850290651626065, "loss": 3.2982, "step": 624500 }, { "epoch": 99.936, "grad_norm": 0.2580735981464386, "learning_rate": 0.00028502666506660267, "loss": 3.3948, "step": 624600 }, { "epoch": 99.952, "grad_norm": 0.20969119668006897, "learning_rate": 0.00028502426497059883, "loss": 3.5991, "step": 624700 }, { "epoch": 99.968, "grad_norm": 0.2032906860113144, "learning_rate": 0.00028502186487459495, "loss": 3.4171, "step": 624800 }, { "epoch": 99.984, "grad_norm": 0.2390720546245575, "learning_rate": 0.0002850194647785911, "loss": 3.246, "step": 624900 }, { "epoch": 100.0, "grad_norm": 0.27626320719718933, "learning_rate": 0.0002850170646825873, "loss": 3.4049, "step": 625000 }, { "epoch": 100.016, "grad_norm": 0.22609221935272217, "learning_rate": 0.0002850146885875435, "loss": 3.0652, "step": 625100 }, { "epoch": 100.032, "grad_norm": 0.27503257989883423, "learning_rate": 0.00028501228849153965, "loss": 3.4626, "step": 625200 }, { "epoch": 100.048, "grad_norm": 0.23094575107097626, "learning_rate": 0.0002850098883955358, "loss": 3.2848, "step": 625300 }, { "epoch": 100.064, "grad_norm": 0.28230783343315125, "learning_rate": 0.00028500748829953193, "loss": 3.0379, "step": 625400 }, { "epoch": 100.08, "grad_norm": 0.2504979372024536, "learning_rate": 0.0002850050882035281, "loss": 3.2255, "step": 625500 }, { "epoch": 100.096, "grad_norm": 0.2607365846633911, "learning_rate": 0.00028500268810752427, "loss": 3.284, "step": 625600 }, { "epoch": 100.112, "grad_norm": 0.26622274518013, "learning_rate": 0.00028500028801152044, "loss": 3.335, "step": 625700 }, { "epoch": 100.128, "grad_norm": 0.2245699167251587, "learning_rate": 0.0002849978879155166, "loss": 3.3315, "step": 625800 }, { "epoch": 100.144, "grad_norm": 0.2339554727077484, "learning_rate": 0.0002849954878195127, "loss": 2.9791, "step": 625900 }, { "epoch": 100.16, "grad_norm": 0.23206953704357147, "learning_rate": 0.0002849930877235089, "loss": 3.2557, "step": 626000 }, { "epoch": 100.176, "grad_norm": 0.25487884879112244, "learning_rate": 0.00028499068762750506, "loss": 3.504, "step": 626100 }, { "epoch": 100.192, "grad_norm": 0.22563043236732483, "learning_rate": 0.00028498828753150123, "loss": 3.2423, "step": 626200 }, { "epoch": 100.208, "grad_norm": 0.2204793095588684, "learning_rate": 0.0002849858874354974, "loss": 3.2122, "step": 626300 }, { "epoch": 100.224, "grad_norm": 0.22749833762645721, "learning_rate": 0.00028498348733949357, "loss": 3.4979, "step": 626400 }, { "epoch": 100.24, "grad_norm": 0.22344544529914856, "learning_rate": 0.0002849810872434897, "loss": 3.1246, "step": 626500 }, { "epoch": 100.256, "grad_norm": 0.24171128869056702, "learning_rate": 0.00028497868714748585, "loss": 3.5502, "step": 626600 }, { "epoch": 100.272, "grad_norm": 0.2037758082151413, "learning_rate": 0.000284976287051482, "loss": 3.4248, "step": 626700 }, { "epoch": 100.288, "grad_norm": 0.21210506558418274, "learning_rate": 0.0002849738869554782, "loss": 3.3251, "step": 626800 }, { "epoch": 100.304, "grad_norm": 0.25696995854377747, "learning_rate": 0.00028497148685947436, "loss": 3.2812, "step": 626900 }, { "epoch": 100.32, "grad_norm": 0.22406229376792908, "learning_rate": 0.0002849690867634705, "loss": 3.3839, "step": 627000 }, { "epoch": 100.336, "grad_norm": 0.2696600556373596, "learning_rate": 0.00028496668666746664, "loss": 3.1231, "step": 627100 }, { "epoch": 100.352, "grad_norm": 0.24064290523529053, "learning_rate": 0.0002849642865714628, "loss": 3.3386, "step": 627200 }, { "epoch": 100.368, "grad_norm": 0.22860503196716309, "learning_rate": 0.000284961886475459, "loss": 3.5428, "step": 627300 }, { "epoch": 100.384, "grad_norm": 0.2667795717716217, "learning_rate": 0.00028495948637945515, "loss": 3.1099, "step": 627400 }, { "epoch": 100.4, "grad_norm": 0.24398189783096313, "learning_rate": 0.0002849570862834513, "loss": 3.2566, "step": 627500 }, { "epoch": 100.416, "grad_norm": 0.2574385404586792, "learning_rate": 0.0002849546861874475, "loss": 3.34, "step": 627600 }, { "epoch": 100.432, "grad_norm": 0.24641990661621094, "learning_rate": 0.00028495228609144366, "loss": 3.1389, "step": 627700 }, { "epoch": 100.448, "grad_norm": 0.2499409168958664, "learning_rate": 0.00028494988599543983, "loss": 3.1733, "step": 627800 }, { "epoch": 100.464, "grad_norm": 0.1875682771205902, "learning_rate": 0.00028494748589943594, "loss": 3.3961, "step": 627900 }, { "epoch": 100.48, "grad_norm": 0.23375554382801056, "learning_rate": 0.0002849450858034321, "loss": 3.401, "step": 628000 }, { "epoch": 100.496, "grad_norm": 0.23424772918224335, "learning_rate": 0.0002849426857074283, "loss": 3.3657, "step": 628100 }, { "epoch": 100.512, "grad_norm": 0.21573081612586975, "learning_rate": 0.00028494028561142445, "loss": 3.4892, "step": 628200 }, { "epoch": 100.528, "grad_norm": 0.2332104742527008, "learning_rate": 0.0002849378855154206, "loss": 3.5412, "step": 628300 }, { "epoch": 100.544, "grad_norm": 0.22819308936595917, "learning_rate": 0.0002849354854194168, "loss": 3.4078, "step": 628400 }, { "epoch": 100.56, "grad_norm": 0.22172127664089203, "learning_rate": 0.0002849330853234129, "loss": 3.4048, "step": 628500 }, { "epoch": 100.576, "grad_norm": 0.21054108440876007, "learning_rate": 0.00028493068522740907, "loss": 3.5935, "step": 628600 }, { "epoch": 100.592, "grad_norm": 0.22522467374801636, "learning_rate": 0.00028492828513140524, "loss": 3.4224, "step": 628700 }, { "epoch": 100.608, "grad_norm": 0.23360919952392578, "learning_rate": 0.0002849258850354014, "loss": 3.5225, "step": 628800 }, { "epoch": 100.624, "grad_norm": 0.28295594453811646, "learning_rate": 0.0002849234849393976, "loss": 3.2813, "step": 628900 }, { "epoch": 100.64, "grad_norm": 0.22829052805900574, "learning_rate": 0.0002849210848433937, "loss": 3.3406, "step": 629000 }, { "epoch": 100.656, "grad_norm": 0.22535817325115204, "learning_rate": 0.00028491868474738986, "loss": 3.1505, "step": 629100 }, { "epoch": 100.672, "grad_norm": 0.2054419368505478, "learning_rate": 0.00028491628465138603, "loss": 3.2065, "step": 629200 }, { "epoch": 100.688, "grad_norm": 0.20555439591407776, "learning_rate": 0.0002849138845553822, "loss": 3.2074, "step": 629300 }, { "epoch": 100.704, "grad_norm": 0.2754693031311035, "learning_rate": 0.00028491148445937837, "loss": 3.1368, "step": 629400 }, { "epoch": 100.72, "grad_norm": 0.20864638686180115, "learning_rate": 0.00028490908436337454, "loss": 3.3537, "step": 629500 }, { "epoch": 100.736, "grad_norm": 0.2350027710199356, "learning_rate": 0.00028490668426737066, "loss": 3.4888, "step": 629600 }, { "epoch": 100.752, "grad_norm": 0.2556154429912567, "learning_rate": 0.0002849042841713668, "loss": 3.2963, "step": 629700 }, { "epoch": 100.768, "grad_norm": 0.2480744570493698, "learning_rate": 0.000284901884075363, "loss": 2.9636, "step": 629800 }, { "epoch": 100.784, "grad_norm": 0.23960307240486145, "learning_rate": 0.00028489948397935916, "loss": 3.5956, "step": 629900 }, { "epoch": 100.8, "grad_norm": 0.1936337947845459, "learning_rate": 0.00028489708388335533, "loss": 3.181, "step": 630000 }, { "epoch": 100.816, "grad_norm": 0.24381332099437714, "learning_rate": 0.00028489468378735145, "loss": 3.207, "step": 630100 }, { "epoch": 100.832, "grad_norm": 0.2301921546459198, "learning_rate": 0.0002848922836913476, "loss": 3.3021, "step": 630200 }, { "epoch": 100.848, "grad_norm": 0.2692905068397522, "learning_rate": 0.0002848899075963038, "loss": 3.2734, "step": 630300 }, { "epoch": 100.864, "grad_norm": 0.5353929400444031, "learning_rate": 0.0002848875075003, "loss": 3.471, "step": 630400 }, { "epoch": 100.88, "grad_norm": 0.22961857914924622, "learning_rate": 0.00028488510740429614, "loss": 3.5162, "step": 630500 }, { "epoch": 100.896, "grad_norm": 0.24251912534236908, "learning_rate": 0.0002848827073082923, "loss": 3.2902, "step": 630600 }, { "epoch": 100.912, "grad_norm": 0.2104545533657074, "learning_rate": 0.00028488030721228843, "loss": 3.3643, "step": 630700 }, { "epoch": 100.928, "grad_norm": 0.2595306932926178, "learning_rate": 0.00028487790711628465, "loss": 3.2564, "step": 630800 }, { "epoch": 100.944, "grad_norm": 0.2272263914346695, "learning_rate": 0.0002848755070202808, "loss": 3.4404, "step": 630900 }, { "epoch": 100.96, "grad_norm": 0.20512396097183228, "learning_rate": 0.00028487310692427694, "loss": 2.9485, "step": 631000 }, { "epoch": 100.976, "grad_norm": 0.2744947671890259, "learning_rate": 0.0002848707068282731, "loss": 3.4195, "step": 631100 }, { "epoch": 100.992, "grad_norm": 0.24329674243927002, "learning_rate": 0.0002848683067322693, "loss": 3.3401, "step": 631200 }, { "epoch": 101.008, "grad_norm": 0.23915009200572968, "learning_rate": 0.00028486590663626544, "loss": 3.561, "step": 631300 }, { "epoch": 101.024, "grad_norm": 0.23262754082679749, "learning_rate": 0.0002848635065402616, "loss": 3.0888, "step": 631400 }, { "epoch": 101.04, "grad_norm": 0.24738608300685883, "learning_rate": 0.0002848611064442578, "loss": 3.1152, "step": 631500 }, { "epoch": 101.056, "grad_norm": 0.27080488204956055, "learning_rate": 0.0002848587063482539, "loss": 3.2857, "step": 631600 }, { "epoch": 101.072, "grad_norm": 0.2202967405319214, "learning_rate": 0.0002848563302532101, "loss": 3.2301, "step": 631700 }, { "epoch": 101.088, "grad_norm": 0.23763853311538696, "learning_rate": 0.00028485393015720625, "loss": 3.2549, "step": 631800 }, { "epoch": 101.104, "grad_norm": 0.24810639023780823, "learning_rate": 0.0002848515300612024, "loss": 3.4752, "step": 631900 }, { "epoch": 101.12, "grad_norm": 0.24184952676296234, "learning_rate": 0.0002848491299651986, "loss": 3.1555, "step": 632000 }, { "epoch": 101.136, "grad_norm": 0.21347619593143463, "learning_rate": 0.0002848467298691947, "loss": 3.2019, "step": 632100 }, { "epoch": 101.152, "grad_norm": 0.21796759963035583, "learning_rate": 0.0002848443297731909, "loss": 3.0978, "step": 632200 }, { "epoch": 101.168, "grad_norm": 0.2102041095495224, "learning_rate": 0.00028484192967718705, "loss": 3.2433, "step": 632300 }, { "epoch": 101.184, "grad_norm": 0.2400791347026825, "learning_rate": 0.0002848395295811832, "loss": 3.5451, "step": 632400 }, { "epoch": 101.2, "grad_norm": 0.2737637460231781, "learning_rate": 0.0002848371294851794, "loss": 3.4158, "step": 632500 }, { "epoch": 101.216, "grad_norm": 0.2498079240322113, "learning_rate": 0.00028483472938917555, "loss": 3.2376, "step": 632600 }, { "epoch": 101.232, "grad_norm": 0.25029441714286804, "learning_rate": 0.00028483232929317167, "loss": 3.4243, "step": 632700 }, { "epoch": 101.248, "grad_norm": 0.22614842653274536, "learning_rate": 0.00028482992919716784, "loss": 3.1379, "step": 632800 }, { "epoch": 101.264, "grad_norm": 0.24633535742759705, "learning_rate": 0.000284827529101164, "loss": 3.4037, "step": 632900 }, { "epoch": 101.28, "grad_norm": 0.24434518814086914, "learning_rate": 0.0002848251290051602, "loss": 3.7477, "step": 633000 }, { "epoch": 101.296, "grad_norm": 0.23642903566360474, "learning_rate": 0.00028482272890915635, "loss": 3.4271, "step": 633100 }, { "epoch": 101.312, "grad_norm": 0.24472475051879883, "learning_rate": 0.00028482032881315246, "loss": 3.6582, "step": 633200 }, { "epoch": 101.328, "grad_norm": 0.2097211331129074, "learning_rate": 0.00028481792871714863, "loss": 3.3976, "step": 633300 }, { "epoch": 101.344, "grad_norm": 0.24810966849327087, "learning_rate": 0.0002848155286211448, "loss": 3.5723, "step": 633400 }, { "epoch": 101.36, "grad_norm": 0.24931128323078156, "learning_rate": 0.00028481312852514097, "loss": 3.2817, "step": 633500 }, { "epoch": 101.376, "grad_norm": 0.2245187908411026, "learning_rate": 0.00028481072842913714, "loss": 3.4877, "step": 633600 }, { "epoch": 101.392, "grad_norm": 0.2543257772922516, "learning_rate": 0.0002848083283331333, "loss": 3.3428, "step": 633700 }, { "epoch": 101.408, "grad_norm": 0.2585226595401764, "learning_rate": 0.0002848059282371294, "loss": 3.3963, "step": 633800 }, { "epoch": 101.424, "grad_norm": 0.2279805690050125, "learning_rate": 0.00028480352814112564, "loss": 3.0926, "step": 633900 }, { "epoch": 101.44, "grad_norm": 0.220321387052536, "learning_rate": 0.0002848011280451218, "loss": 3.5988, "step": 634000 }, { "epoch": 101.456, "grad_norm": 0.2080024629831314, "learning_rate": 0.00028479872794911793, "loss": 3.3311, "step": 634100 }, { "epoch": 101.472, "grad_norm": 0.2052999585866928, "learning_rate": 0.0002847963278531141, "loss": 3.3918, "step": 634200 }, { "epoch": 101.488, "grad_norm": 0.22087636590003967, "learning_rate": 0.00028479392775711027, "loss": 3.5563, "step": 634300 }, { "epoch": 101.504, "grad_norm": 0.2778357267379761, "learning_rate": 0.00028479152766110644, "loss": 3.4463, "step": 634400 }, { "epoch": 101.52, "grad_norm": 0.22364462912082672, "learning_rate": 0.0002847891275651026, "loss": 3.3416, "step": 634500 }, { "epoch": 101.536, "grad_norm": 0.21418030560016632, "learning_rate": 0.0002847867274690988, "loss": 3.382, "step": 634600 }, { "epoch": 101.552, "grad_norm": 0.20711690187454224, "learning_rate": 0.0002847843273730949, "loss": 3.3987, "step": 634700 }, { "epoch": 101.568, "grad_norm": 0.2251412719488144, "learning_rate": 0.00028478192727709106, "loss": 3.2736, "step": 634800 }, { "epoch": 101.584, "grad_norm": 0.2372153252363205, "learning_rate": 0.00028477952718108723, "loss": 3.1663, "step": 634900 }, { "epoch": 101.6, "grad_norm": 0.2138531506061554, "learning_rate": 0.0002847771270850834, "loss": 3.6516, "step": 635000 }, { "epoch": 101.616, "grad_norm": 0.218136727809906, "learning_rate": 0.00028477472698907957, "loss": 3.3958, "step": 635100 }, { "epoch": 101.632, "grad_norm": 0.2049139440059662, "learning_rate": 0.0002847723268930757, "loss": 3.2366, "step": 635200 }, { "epoch": 101.648, "grad_norm": 0.2498401552438736, "learning_rate": 0.00028476992679707185, "loss": 3.2506, "step": 635300 }, { "epoch": 101.664, "grad_norm": 0.22981874644756317, "learning_rate": 0.000284767526701068, "loss": 3.4275, "step": 635400 }, { "epoch": 101.68, "grad_norm": 0.22421884536743164, "learning_rate": 0.0002847651266050642, "loss": 3.5426, "step": 635500 }, { "epoch": 101.696, "grad_norm": 0.2269371747970581, "learning_rate": 0.00028476272650906036, "loss": 3.2047, "step": 635600 }, { "epoch": 101.712, "grad_norm": 0.2363787442445755, "learning_rate": 0.00028476032641305653, "loss": 3.7334, "step": 635700 }, { "epoch": 101.728, "grad_norm": 0.21670831739902496, "learning_rate": 0.00028475792631705264, "loss": 3.119, "step": 635800 }, { "epoch": 101.744, "grad_norm": 0.21140550076961517, "learning_rate": 0.00028475555022200883, "loss": 3.4662, "step": 635900 }, { "epoch": 101.76, "grad_norm": 0.22398890554904938, "learning_rate": 0.000284753150126005, "loss": 3.5785, "step": 636000 }, { "epoch": 101.776, "grad_norm": 0.25052496790885925, "learning_rate": 0.00028475075003000117, "loss": 3.443, "step": 636100 }, { "epoch": 101.792, "grad_norm": 0.2437448501586914, "learning_rate": 0.00028474834993399734, "loss": 3.3388, "step": 636200 }, { "epoch": 101.808, "grad_norm": 0.21866008639335632, "learning_rate": 0.0002847459498379935, "loss": 3.5631, "step": 636300 }, { "epoch": 101.824, "grad_norm": 0.26176413893699646, "learning_rate": 0.0002847435497419896, "loss": 3.2798, "step": 636400 }, { "epoch": 101.84, "grad_norm": 0.22087961435317993, "learning_rate": 0.0002847411496459858, "loss": 3.5046, "step": 636500 }, { "epoch": 101.856, "grad_norm": 0.2106420397758484, "learning_rate": 0.00028473874954998196, "loss": 3.334, "step": 636600 }, { "epoch": 101.872, "grad_norm": 0.21376082301139832, "learning_rate": 0.00028473634945397813, "loss": 3.5397, "step": 636700 }, { "epoch": 101.888, "grad_norm": 0.20598718523979187, "learning_rate": 0.0002847339493579743, "loss": 3.3414, "step": 636800 }, { "epoch": 101.904, "grad_norm": 0.22688962519168854, "learning_rate": 0.0002847315492619704, "loss": 3.5803, "step": 636900 }, { "epoch": 101.92, "grad_norm": 0.21162588894367218, "learning_rate": 0.00028472914916596664, "loss": 3.2736, "step": 637000 }, { "epoch": 101.936, "grad_norm": 0.22268728911876678, "learning_rate": 0.0002847267490699628, "loss": 3.3541, "step": 637100 }, { "epoch": 101.952, "grad_norm": 0.24383500218391418, "learning_rate": 0.0002847243489739589, "loss": 3.3863, "step": 637200 }, { "epoch": 101.968, "grad_norm": 0.2269163280725479, "learning_rate": 0.0002847219728789151, "loss": 3.3671, "step": 637300 }, { "epoch": 101.984, "grad_norm": 0.21600216627120972, "learning_rate": 0.0002847195727829113, "loss": 3.3321, "step": 637400 }, { "epoch": 102.0, "grad_norm": 0.23087309300899506, "learning_rate": 0.00028471717268690745, "loss": 3.6564, "step": 637500 }, { "epoch": 102.016, "grad_norm": 0.21379485726356506, "learning_rate": 0.0002847147725909036, "loss": 3.3011, "step": 637600 }, { "epoch": 102.032, "grad_norm": 0.27410632371902466, "learning_rate": 0.0002847123724948998, "loss": 3.1184, "step": 637700 }, { "epoch": 102.048, "grad_norm": 0.24559953808784485, "learning_rate": 0.0002847099723988959, "loss": 3.5931, "step": 637800 }, { "epoch": 102.064, "grad_norm": 0.2401798814535141, "learning_rate": 0.00028470757230289207, "loss": 3.4484, "step": 637900 }, { "epoch": 102.08, "grad_norm": 0.24279727041721344, "learning_rate": 0.00028470517220688824, "loss": 3.0574, "step": 638000 }, { "epoch": 102.096, "grad_norm": 0.2618333399295807, "learning_rate": 0.0002847027721108844, "loss": 3.4454, "step": 638100 }, { "epoch": 102.112, "grad_norm": 0.28191009163856506, "learning_rate": 0.0002847003720148806, "loss": 3.3622, "step": 638200 }, { "epoch": 102.128, "grad_norm": 0.2583199143409729, "learning_rate": 0.00028469797191887675, "loss": 3.2865, "step": 638300 }, { "epoch": 102.144, "grad_norm": 0.2519609332084656, "learning_rate": 0.00028469557182287286, "loss": 3.4791, "step": 638400 }, { "epoch": 102.16, "grad_norm": 0.25726234912872314, "learning_rate": 0.00028469317172686903, "loss": 3.3051, "step": 638500 }, { "epoch": 102.176, "grad_norm": 0.24843822419643402, "learning_rate": 0.0002846907716308652, "loss": 3.4437, "step": 638600 }, { "epoch": 102.192, "grad_norm": 0.25002238154411316, "learning_rate": 0.00028468837153486137, "loss": 3.5176, "step": 638700 }, { "epoch": 102.208, "grad_norm": 0.22227154672145844, "learning_rate": 0.00028468597143885754, "loss": 3.125, "step": 638800 }, { "epoch": 102.224, "grad_norm": 0.22477781772613525, "learning_rate": 0.00028468357134285366, "loss": 3.3127, "step": 638900 }, { "epoch": 102.24, "grad_norm": 0.27012044191360474, "learning_rate": 0.0002846811712468498, "loss": 3.517, "step": 639000 }, { "epoch": 102.256, "grad_norm": 0.20710980892181396, "learning_rate": 0.000284678771150846, "loss": 3.3353, "step": 639100 }, { "epoch": 102.272, "grad_norm": 0.4465367794036865, "learning_rate": 0.00028467637105484216, "loss": 3.351, "step": 639200 }, { "epoch": 102.288, "grad_norm": 0.22805139422416687, "learning_rate": 0.00028467397095883833, "loss": 3.5074, "step": 639300 }, { "epoch": 102.304, "grad_norm": 0.23795142769813538, "learning_rate": 0.0002846715708628345, "loss": 3.007, "step": 639400 }, { "epoch": 102.32, "grad_norm": 0.2276441603899002, "learning_rate": 0.0002846691707668306, "loss": 3.2553, "step": 639500 }, { "epoch": 102.336, "grad_norm": 0.2388163059949875, "learning_rate": 0.0002846667706708268, "loss": 3.2791, "step": 639600 }, { "epoch": 102.352, "grad_norm": 0.2392502725124359, "learning_rate": 0.00028466437057482295, "loss": 3.5008, "step": 639700 }, { "epoch": 102.368, "grad_norm": 0.2050226926803589, "learning_rate": 0.0002846619704788191, "loss": 3.3566, "step": 639800 }, { "epoch": 102.384, "grad_norm": 0.27286985516548157, "learning_rate": 0.0002846595703828153, "loss": 3.3365, "step": 639900 }, { "epoch": 102.4, "grad_norm": 0.20118270814418793, "learning_rate": 0.0002846571702868114, "loss": 3.324, "step": 640000 }, { "epoch": 102.416, "grad_norm": 0.2658753991127014, "learning_rate": 0.00028465477019080763, "loss": 3.6128, "step": 640100 }, { "epoch": 102.432, "grad_norm": 0.24198958277702332, "learning_rate": 0.0002846523700948038, "loss": 3.3663, "step": 640200 }, { "epoch": 102.448, "grad_norm": 0.21644701063632965, "learning_rate": 0.00028464996999879997, "loss": 3.1393, "step": 640300 }, { "epoch": 102.464, "grad_norm": 0.28402799367904663, "learning_rate": 0.0002846475699027961, "loss": 3.4354, "step": 640400 }, { "epoch": 102.48, "grad_norm": 0.21918471157550812, "learning_rate": 0.00028464516980679225, "loss": 3.5383, "step": 640500 }, { "epoch": 102.496, "grad_norm": 0.6433680653572083, "learning_rate": 0.0002846427697107884, "loss": 3.4065, "step": 640600 }, { "epoch": 102.512, "grad_norm": 0.24643191695213318, "learning_rate": 0.0002846403936157446, "loss": 3.0082, "step": 640700 }, { "epoch": 102.528, "grad_norm": 0.2465837597846985, "learning_rate": 0.0002846379935197408, "loss": 3.4479, "step": 640800 }, { "epoch": 102.544, "grad_norm": 0.24558806419372559, "learning_rate": 0.0002846355934237369, "loss": 3.1125, "step": 640900 }, { "epoch": 102.56, "grad_norm": 0.25220155715942383, "learning_rate": 0.00028463319332773306, "loss": 3.3959, "step": 641000 }, { "epoch": 102.576, "grad_norm": 0.2634150981903076, "learning_rate": 0.00028463079323172923, "loss": 3.1838, "step": 641100 }, { "epoch": 102.592, "grad_norm": 0.24393072724342346, "learning_rate": 0.0002846283931357254, "loss": 3.2461, "step": 641200 }, { "epoch": 102.608, "grad_norm": 0.22388221323490143, "learning_rate": 0.00028462599303972157, "loss": 3.2067, "step": 641300 }, { "epoch": 102.624, "grad_norm": 0.21227715909481049, "learning_rate": 0.00028462359294371774, "loss": 3.4165, "step": 641400 }, { "epoch": 102.64, "grad_norm": 0.24092823266983032, "learning_rate": 0.00028462119284771386, "loss": 3.4945, "step": 641500 }, { "epoch": 102.656, "grad_norm": 0.22411774098873138, "learning_rate": 0.00028461879275171, "loss": 3.0938, "step": 641600 }, { "epoch": 102.672, "grad_norm": 0.2385229915380478, "learning_rate": 0.0002846163926557062, "loss": 3.2166, "step": 641700 }, { "epoch": 102.688, "grad_norm": 0.240799218416214, "learning_rate": 0.00028461399255970236, "loss": 3.5036, "step": 641800 }, { "epoch": 102.704, "grad_norm": 0.23637226223945618, "learning_rate": 0.00028461159246369853, "loss": 3.5187, "step": 641900 }, { "epoch": 102.72, "grad_norm": 0.2180604636669159, "learning_rate": 0.00028460919236769465, "loss": 3.426, "step": 642000 }, { "epoch": 102.736, "grad_norm": 0.27420979738235474, "learning_rate": 0.0002846067922716908, "loss": 3.3808, "step": 642100 }, { "epoch": 102.752, "grad_norm": 0.23421555757522583, "learning_rate": 0.000284604392175687, "loss": 3.5115, "step": 642200 }, { "epoch": 102.768, "grad_norm": 0.23199960589408875, "learning_rate": 0.00028460199207968316, "loss": 3.2116, "step": 642300 }, { "epoch": 102.784, "grad_norm": 0.25692686438560486, "learning_rate": 0.0002845995919836793, "loss": 3.6415, "step": 642400 }, { "epoch": 102.8, "grad_norm": 0.2578149437904358, "learning_rate": 0.0002845971918876755, "loss": 3.2153, "step": 642500 }, { "epoch": 102.816, "grad_norm": 0.22773298621177673, "learning_rate": 0.0002845947917916716, "loss": 3.2886, "step": 642600 }, { "epoch": 102.832, "grad_norm": 0.21338240802288055, "learning_rate": 0.0002845923916956678, "loss": 3.5473, "step": 642700 }, { "epoch": 102.848, "grad_norm": 0.23219239711761475, "learning_rate": 0.00028458999159966395, "loss": 3.2928, "step": 642800 }, { "epoch": 102.864, "grad_norm": 0.215921089053154, "learning_rate": 0.0002845875915036601, "loss": 3.2194, "step": 642900 }, { "epoch": 102.88, "grad_norm": 0.22786709666252136, "learning_rate": 0.0002845851914076563, "loss": 3.3304, "step": 643000 }, { "epoch": 102.896, "grad_norm": 0.2887389063835144, "learning_rate": 0.0002845827913116524, "loss": 3.312, "step": 643100 }, { "epoch": 102.912, "grad_norm": 0.23393656313419342, "learning_rate": 0.00028458039121564857, "loss": 3.2254, "step": 643200 }, { "epoch": 102.928, "grad_norm": 0.20228855311870575, "learning_rate": 0.0002845779911196448, "loss": 3.2597, "step": 643300 }, { "epoch": 102.944, "grad_norm": 0.2229207307100296, "learning_rate": 0.00028457559102364096, "loss": 3.514, "step": 643400 }, { "epoch": 102.96, "grad_norm": 0.22222624719142914, "learning_rate": 0.0002845731909276371, "loss": 3.4411, "step": 643500 }, { "epoch": 102.976, "grad_norm": 0.20149633288383484, "learning_rate": 0.00028457079083163325, "loss": 3.4057, "step": 643600 }, { "epoch": 102.992, "grad_norm": 0.2058512568473816, "learning_rate": 0.0002845683907356294, "loss": 3.539, "step": 643700 }, { "epoch": 103.008, "grad_norm": 0.23599344491958618, "learning_rate": 0.0002845659906396256, "loss": 3.2627, "step": 643800 }, { "epoch": 103.024, "grad_norm": 0.23680974543094635, "learning_rate": 0.00028456359054362175, "loss": 3.1816, "step": 643900 }, { "epoch": 103.04, "grad_norm": 0.2604714334011078, "learning_rate": 0.00028456119044761787, "loss": 3.1921, "step": 644000 }, { "epoch": 103.056, "grad_norm": 0.21781295537948608, "learning_rate": 0.00028455879035161404, "loss": 3.4579, "step": 644100 }, { "epoch": 103.072, "grad_norm": 0.22171662747859955, "learning_rate": 0.0002845563902556102, "loss": 3.3238, "step": 644200 }, { "epoch": 103.088, "grad_norm": 0.24980555474758148, "learning_rate": 0.0002845539901596064, "loss": 3.6251, "step": 644300 }, { "epoch": 103.104, "grad_norm": 0.23855330049991608, "learning_rate": 0.00028455159006360255, "loss": 3.3584, "step": 644400 }, { "epoch": 103.12, "grad_norm": 0.24156038463115692, "learning_rate": 0.0002845491899675987, "loss": 3.3551, "step": 644500 }, { "epoch": 103.136, "grad_norm": 0.2573011517524719, "learning_rate": 0.00028454678987159483, "loss": 3.2583, "step": 644600 }, { "epoch": 103.152, "grad_norm": 0.23266972601413727, "learning_rate": 0.000284544413776551, "loss": 3.2576, "step": 644700 }, { "epoch": 103.168, "grad_norm": 0.22941954433918, "learning_rate": 0.0002845420136805472, "loss": 3.1256, "step": 644800 }, { "epoch": 103.184, "grad_norm": 0.20858949422836304, "learning_rate": 0.00028453961358454336, "loss": 3.4128, "step": 644900 }, { "epoch": 103.2, "grad_norm": 0.26952147483825684, "learning_rate": 0.0002845372134885395, "loss": 3.5469, "step": 645000 }, { "epoch": 103.216, "grad_norm": 0.2304742932319641, "learning_rate": 0.00028453481339253564, "loss": 3.3921, "step": 645100 }, { "epoch": 103.232, "grad_norm": 0.26481392979621887, "learning_rate": 0.0002845324132965318, "loss": 3.2305, "step": 645200 }, { "epoch": 103.248, "grad_norm": 0.21594169735908508, "learning_rate": 0.000284530013200528, "loss": 3.0871, "step": 645300 }, { "epoch": 103.264, "grad_norm": 0.24391967058181763, "learning_rate": 0.00028452761310452415, "loss": 3.1261, "step": 645400 }, { "epoch": 103.28, "grad_norm": 0.22701428830623627, "learning_rate": 0.0002845252130085203, "loss": 3.6076, "step": 645500 }, { "epoch": 103.296, "grad_norm": 0.209235280752182, "learning_rate": 0.0002845228129125165, "loss": 3.455, "step": 645600 }, { "epoch": 103.312, "grad_norm": 0.2206258773803711, "learning_rate": 0.0002845204128165126, "loss": 3.6112, "step": 645700 }, { "epoch": 103.328, "grad_norm": 0.2745055854320526, "learning_rate": 0.00028451801272050877, "loss": 3.1872, "step": 645800 }, { "epoch": 103.344, "grad_norm": 0.2504189908504486, "learning_rate": 0.00028451561262450494, "loss": 3.724, "step": 645900 }, { "epoch": 103.36, "grad_norm": 0.25893208384513855, "learning_rate": 0.0002845132125285011, "loss": 3.2023, "step": 646000 }, { "epoch": 103.376, "grad_norm": 0.24826602637767792, "learning_rate": 0.0002845108124324973, "loss": 3.7138, "step": 646100 }, { "epoch": 103.392, "grad_norm": 0.3127690553665161, "learning_rate": 0.0002845084123364934, "loss": 3.4314, "step": 646200 }, { "epoch": 103.408, "grad_norm": 0.21634916961193085, "learning_rate": 0.00028450601224048956, "loss": 3.322, "step": 646300 }, { "epoch": 103.424, "grad_norm": 0.27332907915115356, "learning_rate": 0.0002845036361454458, "loss": 3.4529, "step": 646400 }, { "epoch": 103.44, "grad_norm": 0.24514175951480865, "learning_rate": 0.000284501260050402, "loss": 3.3761, "step": 646500 }, { "epoch": 103.456, "grad_norm": 0.2681935429573059, "learning_rate": 0.00028449885995439816, "loss": 2.9799, "step": 646600 }, { "epoch": 103.472, "grad_norm": 0.2731553018093109, "learning_rate": 0.00028449645985839433, "loss": 3.5577, "step": 646700 }, { "epoch": 103.488, "grad_norm": 0.24995994567871094, "learning_rate": 0.0002844940597623905, "loss": 3.5765, "step": 646800 }, { "epoch": 103.504, "grad_norm": 0.24754126369953156, "learning_rate": 0.0002844916596663866, "loss": 3.3863, "step": 646900 }, { "epoch": 103.52, "grad_norm": 0.2355126589536667, "learning_rate": 0.0002844892595703828, "loss": 3.4077, "step": 647000 }, { "epoch": 103.536, "grad_norm": 0.23105813562870026, "learning_rate": 0.00028448685947437896, "loss": 3.305, "step": 647100 }, { "epoch": 103.552, "grad_norm": 0.2330879122018814, "learning_rate": 0.0002844844593783751, "loss": 3.2751, "step": 647200 }, { "epoch": 103.568, "grad_norm": 0.23035113513469696, "learning_rate": 0.0002844820592823713, "loss": 3.7678, "step": 647300 }, { "epoch": 103.584, "grad_norm": 0.2601264417171478, "learning_rate": 0.00028447965918636746, "loss": 3.2762, "step": 647400 }, { "epoch": 103.6, "grad_norm": 0.25167396664619446, "learning_rate": 0.0002844772590903636, "loss": 3.6275, "step": 647500 }, { "epoch": 103.616, "grad_norm": 0.2255261093378067, "learning_rate": 0.00028447485899435975, "loss": 3.455, "step": 647600 }, { "epoch": 103.632, "grad_norm": 0.2350507378578186, "learning_rate": 0.0002844724588983559, "loss": 3.3528, "step": 647700 }, { "epoch": 103.648, "grad_norm": 0.2074195146560669, "learning_rate": 0.0002844700588023521, "loss": 3.4152, "step": 647800 }, { "epoch": 103.664, "grad_norm": 0.2429603636264801, "learning_rate": 0.00028446765870634825, "loss": 3.2535, "step": 647900 }, { "epoch": 103.68, "grad_norm": 0.22900013625621796, "learning_rate": 0.00028446525861034437, "loss": 3.2641, "step": 648000 }, { "epoch": 103.696, "grad_norm": 0.21364720165729523, "learning_rate": 0.00028446285851434054, "loss": 3.4276, "step": 648100 }, { "epoch": 103.712, "grad_norm": 0.2286633849143982, "learning_rate": 0.0002844604584183367, "loss": 3.4979, "step": 648200 }, { "epoch": 103.728, "grad_norm": 0.2537565529346466, "learning_rate": 0.0002844580583223329, "loss": 3.3474, "step": 648300 }, { "epoch": 103.744, "grad_norm": 0.22841069102287292, "learning_rate": 0.00028445565822632905, "loss": 3.1149, "step": 648400 }, { "epoch": 103.76, "grad_norm": 0.20124374330043793, "learning_rate": 0.0002844532581303252, "loss": 3.6767, "step": 648500 }, { "epoch": 103.776, "grad_norm": 0.2258940041065216, "learning_rate": 0.00028445085803432133, "loss": 3.2779, "step": 648600 }, { "epoch": 103.792, "grad_norm": 0.22126778960227966, "learning_rate": 0.0002844484579383175, "loss": 3.2024, "step": 648700 }, { "epoch": 103.808, "grad_norm": 0.20364996790885925, "learning_rate": 0.00028444605784231367, "loss": 3.2151, "step": 648800 }, { "epoch": 103.824, "grad_norm": 0.24500904977321625, "learning_rate": 0.00028444365774630984, "loss": 3.4987, "step": 648900 }, { "epoch": 103.84, "grad_norm": 0.24096691608428955, "learning_rate": 0.000284441257650306, "loss": 3.4052, "step": 649000 }, { "epoch": 103.856, "grad_norm": 0.20535537600517273, "learning_rate": 0.0002844388575543021, "loss": 3.247, "step": 649100 }, { "epoch": 103.872, "grad_norm": 0.23413003981113434, "learning_rate": 0.0002844364574582983, "loss": 3.4175, "step": 649200 }, { "epoch": 103.888, "grad_norm": 0.25611814856529236, "learning_rate": 0.00028443405736229446, "loss": 3.4598, "step": 649300 }, { "epoch": 103.904, "grad_norm": 0.2612318694591522, "learning_rate": 0.00028443165726629063, "loss": 3.1854, "step": 649400 }, { "epoch": 103.92, "grad_norm": 0.2509940266609192, "learning_rate": 0.0002844292571702868, "loss": 3.1153, "step": 649500 }, { "epoch": 103.936, "grad_norm": 0.238302543759346, "learning_rate": 0.00028442685707428297, "loss": 3.3616, "step": 649600 }, { "epoch": 103.952, "grad_norm": 0.20932263135910034, "learning_rate": 0.0002844244569782791, "loss": 3.2692, "step": 649700 }, { "epoch": 103.968, "grad_norm": 0.18263879418373108, "learning_rate": 0.00028442205688227525, "loss": 3.3132, "step": 649800 }, { "epoch": 103.984, "grad_norm": 0.24810369312763214, "learning_rate": 0.0002844196567862714, "loss": 3.3075, "step": 649900 }, { "epoch": 104.0, "grad_norm": 0.22892190515995026, "learning_rate": 0.0002844172566902676, "loss": 3.1985, "step": 650000 }, { "epoch": 104.016, "grad_norm": 0.23702287673950195, "learning_rate": 0.00028441485659426376, "loss": 3.3365, "step": 650100 }, { "epoch": 104.032, "grad_norm": 0.26172512769699097, "learning_rate": 0.0002844124564982599, "loss": 3.1814, "step": 650200 }, { "epoch": 104.048, "grad_norm": 0.2265368551015854, "learning_rate": 0.00028441005640225604, "loss": 3.2921, "step": 650300 }, { "epoch": 104.064, "grad_norm": 0.2594759166240692, "learning_rate": 0.0002844076563062522, "loss": 3.3159, "step": 650400 }, { "epoch": 104.08, "grad_norm": 0.2484055906534195, "learning_rate": 0.0002844052562102484, "loss": 3.0401, "step": 650500 }, { "epoch": 104.096, "grad_norm": 0.23028621077537537, "learning_rate": 0.00028440285611424455, "loss": 3.1875, "step": 650600 }, { "epoch": 104.112, "grad_norm": 0.23720590770244598, "learning_rate": 0.0002844004560182407, "loss": 3.3881, "step": 650700 }, { "epoch": 104.128, "grad_norm": 0.20638491213321686, "learning_rate": 0.00028439805592223684, "loss": 3.4843, "step": 650800 }, { "epoch": 104.144, "grad_norm": 0.27973419427871704, "learning_rate": 0.000284395655826233, "loss": 3.2217, "step": 650900 }, { "epoch": 104.16, "grad_norm": 0.24527285993099213, "learning_rate": 0.0002843932557302292, "loss": 3.7776, "step": 651000 }, { "epoch": 104.176, "grad_norm": 0.25221922993659973, "learning_rate": 0.00028439085563422534, "loss": 3.3008, "step": 651100 }, { "epoch": 104.192, "grad_norm": 0.28022199869155884, "learning_rate": 0.0002843884555382215, "loss": 3.2348, "step": 651200 }, { "epoch": 104.208, "grad_norm": 0.24445755779743195, "learning_rate": 0.00028438605544221763, "loss": 3.5244, "step": 651300 }, { "epoch": 104.224, "grad_norm": 0.24696870148181915, "learning_rate": 0.0002843836553462138, "loss": 3.3485, "step": 651400 }, { "epoch": 104.24, "grad_norm": 0.24330556392669678, "learning_rate": 0.00028438125525020997, "loss": 3.6387, "step": 651500 }, { "epoch": 104.256, "grad_norm": 0.25434115529060364, "learning_rate": 0.00028437885515420614, "loss": 3.2113, "step": 651600 }, { "epoch": 104.272, "grad_norm": 0.25154468417167664, "learning_rate": 0.0002843764550582023, "loss": 3.1925, "step": 651700 }, { "epoch": 104.288, "grad_norm": 0.26450204849243164, "learning_rate": 0.0002843740549621985, "loss": 3.4141, "step": 651800 }, { "epoch": 104.304, "grad_norm": 0.24355462193489075, "learning_rate": 0.0002843716548661946, "loss": 3.3229, "step": 651900 }, { "epoch": 104.32, "grad_norm": 0.20885953307151794, "learning_rate": 0.00028436925477019076, "loss": 3.4783, "step": 652000 }, { "epoch": 104.336, "grad_norm": 0.22760429978370667, "learning_rate": 0.0002843668546741869, "loss": 3.5821, "step": 652100 }, { "epoch": 104.352, "grad_norm": 0.22018635272979736, "learning_rate": 0.0002843644545781831, "loss": 3.5047, "step": 652200 }, { "epoch": 104.368, "grad_norm": 0.28871673345565796, "learning_rate": 0.00028436205448217927, "loss": 3.1755, "step": 652300 }, { "epoch": 104.384, "grad_norm": 0.22372925281524658, "learning_rate": 0.0002843596543861754, "loss": 3.5999, "step": 652400 }, { "epoch": 104.4, "grad_norm": 0.2205975353717804, "learning_rate": 0.0002843572782911316, "loss": 3.277, "step": 652500 }, { "epoch": 104.416, "grad_norm": 0.24594873189926147, "learning_rate": 0.0002843548781951278, "loss": 3.3148, "step": 652600 }, { "epoch": 104.432, "grad_norm": 0.24989329278469086, "learning_rate": 0.00028435247809912396, "loss": 3.2477, "step": 652700 }, { "epoch": 104.448, "grad_norm": 0.2930169999599457, "learning_rate": 0.0002843500780031201, "loss": 3.3191, "step": 652800 }, { "epoch": 104.464, "grad_norm": 0.22970502078533173, "learning_rate": 0.00028434767790711625, "loss": 3.4019, "step": 652900 }, { "epoch": 104.48, "grad_norm": 0.26105475425720215, "learning_rate": 0.0002843452778111124, "loss": 3.1289, "step": 653000 }, { "epoch": 104.496, "grad_norm": 0.23275180160999298, "learning_rate": 0.0002843428777151086, "loss": 3.4046, "step": 653100 }, { "epoch": 104.512, "grad_norm": 0.2405398190021515, "learning_rate": 0.00028434047761910475, "loss": 3.1093, "step": 653200 }, { "epoch": 104.528, "grad_norm": 0.23955371975898743, "learning_rate": 0.00028433807752310087, "loss": 3.2698, "step": 653300 }, { "epoch": 104.544, "grad_norm": 0.23837599158287048, "learning_rate": 0.00028433567742709704, "loss": 3.2581, "step": 653400 }, { "epoch": 104.56, "grad_norm": 0.2358938753604889, "learning_rate": 0.0002843332773310932, "loss": 3.3613, "step": 653500 }, { "epoch": 104.576, "grad_norm": 0.2246815711259842, "learning_rate": 0.0002843308772350894, "loss": 3.4766, "step": 653600 }, { "epoch": 104.592, "grad_norm": 0.23179692029953003, "learning_rate": 0.00028432847713908554, "loss": 3.4831, "step": 653700 }, { "epoch": 104.608, "grad_norm": 0.2091190218925476, "learning_rate": 0.0002843260770430817, "loss": 3.2686, "step": 653800 }, { "epoch": 104.624, "grad_norm": 0.2323284149169922, "learning_rate": 0.00028432367694707783, "loss": 3.429, "step": 653900 }, { "epoch": 104.64, "grad_norm": 0.22060588002204895, "learning_rate": 0.000284321276851074, "loss": 3.4739, "step": 654000 }, { "epoch": 104.656, "grad_norm": 0.2434462010860443, "learning_rate": 0.00028431887675507017, "loss": 3.5686, "step": 654100 }, { "epoch": 104.672, "grad_norm": 0.26021242141723633, "learning_rate": 0.00028431647665906634, "loss": 3.2078, "step": 654200 }, { "epoch": 104.688, "grad_norm": 0.2298927903175354, "learning_rate": 0.0002843140765630625, "loss": 3.5909, "step": 654300 }, { "epoch": 104.704, "grad_norm": 0.24671441316604614, "learning_rate": 0.0002843116764670586, "loss": 3.6237, "step": 654400 }, { "epoch": 104.72, "grad_norm": 0.19882960617542267, "learning_rate": 0.0002843092763710548, "loss": 3.4932, "step": 654500 }, { "epoch": 104.736, "grad_norm": 0.23922765254974365, "learning_rate": 0.00028430687627505096, "loss": 3.2931, "step": 654600 }, { "epoch": 104.752, "grad_norm": 0.21488609910011292, "learning_rate": 0.00028430447617904713, "loss": 3.5906, "step": 654700 }, { "epoch": 104.768, "grad_norm": 0.2657349705696106, "learning_rate": 0.0002843020760830433, "loss": 3.5508, "step": 654800 }, { "epoch": 104.784, "grad_norm": 0.22969777882099152, "learning_rate": 0.00028429967598703947, "loss": 3.3743, "step": 654900 }, { "epoch": 104.8, "grad_norm": 0.26145923137664795, "learning_rate": 0.0002842972758910356, "loss": 3.3543, "step": 655000 }, { "epoch": 104.816, "grad_norm": 0.2540701925754547, "learning_rate": 0.00028429487579503175, "loss": 3.416, "step": 655100 }, { "epoch": 104.832, "grad_norm": 0.24846939742565155, "learning_rate": 0.0002842924756990279, "loss": 3.3211, "step": 655200 }, { "epoch": 104.848, "grad_norm": 0.23585295677185059, "learning_rate": 0.0002842900756030241, "loss": 3.3401, "step": 655300 }, { "epoch": 104.864, "grad_norm": 0.23461899161338806, "learning_rate": 0.00028428767550702026, "loss": 3.2466, "step": 655400 }, { "epoch": 104.88, "grad_norm": 0.23822073638439178, "learning_rate": 0.0002842852754110164, "loss": 3.3994, "step": 655500 }, { "epoch": 104.896, "grad_norm": 0.21479012072086334, "learning_rate": 0.00028428287531501254, "loss": 3.2447, "step": 655600 }, { "epoch": 104.912, "grad_norm": 0.2714764177799225, "learning_rate": 0.00028428047521900877, "loss": 3.3706, "step": 655700 }, { "epoch": 104.928, "grad_norm": 0.2557421624660492, "learning_rate": 0.00028427807512300494, "loss": 3.6167, "step": 655800 }, { "epoch": 104.944, "grad_norm": 0.1883917599916458, "learning_rate": 0.00028427567502700105, "loss": 3.584, "step": 655900 }, { "epoch": 104.96, "grad_norm": 0.22518087923526764, "learning_rate": 0.0002842732749309972, "loss": 3.4894, "step": 656000 }, { "epoch": 104.976, "grad_norm": 0.22642415761947632, "learning_rate": 0.0002842708748349934, "loss": 3.4635, "step": 656100 }, { "epoch": 104.992, "grad_norm": 0.24555300176143646, "learning_rate": 0.00028426847473898956, "loss": 3.3379, "step": 656200 }, { "epoch": 105.008, "grad_norm": 0.22950072586536407, "learning_rate": 0.0002842660746429857, "loss": 3.2231, "step": 656300 }, { "epoch": 105.024, "grad_norm": 0.26484838128089905, "learning_rate": 0.00028426367454698184, "loss": 3.2378, "step": 656400 }, { "epoch": 105.04, "grad_norm": 0.21999940276145935, "learning_rate": 0.000284261274450978, "loss": 3.248, "step": 656500 }, { "epoch": 105.056, "grad_norm": 0.25044891238212585, "learning_rate": 0.0002842588743549742, "loss": 3.1703, "step": 656600 }, { "epoch": 105.072, "grad_norm": 0.2420949637889862, "learning_rate": 0.00028425649825993037, "loss": 3.4994, "step": 656700 }, { "epoch": 105.088, "grad_norm": 0.2508358359336853, "learning_rate": 0.00028425409816392654, "loss": 3.3258, "step": 656800 }, { "epoch": 105.104, "grad_norm": 0.22816090285778046, "learning_rate": 0.0002842516980679227, "loss": 3.3636, "step": 656900 }, { "epoch": 105.12, "grad_norm": 0.22051356732845306, "learning_rate": 0.0002842493219728789, "loss": 3.6449, "step": 657000 }, { "epoch": 105.136, "grad_norm": 0.2588076591491699, "learning_rate": 0.00028424692187687506, "loss": 3.0257, "step": 657100 }, { "epoch": 105.152, "grad_norm": 0.26251035928726196, "learning_rate": 0.00028424452178087123, "loss": 3.3619, "step": 657200 }, { "epoch": 105.168, "grad_norm": 0.24411281943321228, "learning_rate": 0.00028424212168486735, "loss": 3.216, "step": 657300 }, { "epoch": 105.184, "grad_norm": 0.23205170035362244, "learning_rate": 0.0002842397215888635, "loss": 3.3444, "step": 657400 }, { "epoch": 105.2, "grad_norm": 0.2555983364582062, "learning_rate": 0.0002842373214928597, "loss": 3.6121, "step": 657500 }, { "epoch": 105.216, "grad_norm": 0.2505091726779938, "learning_rate": 0.00028423492139685586, "loss": 3.9134, "step": 657600 }, { "epoch": 105.232, "grad_norm": 0.24029433727264404, "learning_rate": 0.000284232521300852, "loss": 3.3988, "step": 657700 }, { "epoch": 105.248, "grad_norm": 0.24311374127864838, "learning_rate": 0.0002842301212048482, "loss": 3.274, "step": 657800 }, { "epoch": 105.264, "grad_norm": 0.22484321892261505, "learning_rate": 0.0002842277211088443, "loss": 3.4971, "step": 657900 }, { "epoch": 105.28, "grad_norm": 0.26589685678482056, "learning_rate": 0.0002842253210128405, "loss": 3.4618, "step": 658000 }, { "epoch": 105.296, "grad_norm": 0.20673778653144836, "learning_rate": 0.00028422292091683665, "loss": 3.4581, "step": 658100 }, { "epoch": 105.312, "grad_norm": 0.27320054173469543, "learning_rate": 0.0002842205208208328, "loss": 3.2907, "step": 658200 }, { "epoch": 105.328, "grad_norm": 0.238305002450943, "learning_rate": 0.000284218120724829, "loss": 3.4281, "step": 658300 }, { "epoch": 105.344, "grad_norm": 0.25559520721435547, "learning_rate": 0.0002842157206288251, "loss": 3.1714, "step": 658400 }, { "epoch": 105.36, "grad_norm": 0.2279513031244278, "learning_rate": 0.00028421332053282127, "loss": 3.4586, "step": 658500 }, { "epoch": 105.376, "grad_norm": 0.22677943110466003, "learning_rate": 0.00028421092043681744, "loss": 3.237, "step": 658600 }, { "epoch": 105.392, "grad_norm": 0.22218625247478485, "learning_rate": 0.0002842085203408136, "loss": 3.5909, "step": 658700 }, { "epoch": 105.408, "grad_norm": 0.23830454051494598, "learning_rate": 0.0002842061202448098, "loss": 3.268, "step": 658800 }, { "epoch": 105.424, "grad_norm": 0.24671313166618347, "learning_rate": 0.00028420372014880595, "loss": 3.5832, "step": 658900 }, { "epoch": 105.44, "grad_norm": 0.2582648992538452, "learning_rate": 0.00028420132005280206, "loss": 3.1981, "step": 659000 }, { "epoch": 105.456, "grad_norm": 0.2277836799621582, "learning_rate": 0.00028419891995679823, "loss": 3.519, "step": 659100 }, { "epoch": 105.472, "grad_norm": 0.2557816505432129, "learning_rate": 0.0002841965198607944, "loss": 3.4698, "step": 659200 }, { "epoch": 105.488, "grad_norm": 0.2588292360305786, "learning_rate": 0.00028419411976479057, "loss": 3.23, "step": 659300 }, { "epoch": 105.504, "grad_norm": 0.23388925194740295, "learning_rate": 0.00028419171966878674, "loss": 3.3672, "step": 659400 }, { "epoch": 105.52, "grad_norm": 0.25549396872520447, "learning_rate": 0.00028418931957278285, "loss": 3.3516, "step": 659500 }, { "epoch": 105.536, "grad_norm": 0.24231429398059845, "learning_rate": 0.000284186919476779, "loss": 3.2906, "step": 659600 }, { "epoch": 105.552, "grad_norm": 0.22522485256195068, "learning_rate": 0.0002841845193807752, "loss": 3.2828, "step": 659700 }, { "epoch": 105.568, "grad_norm": 0.260113924741745, "learning_rate": 0.00028418211928477136, "loss": 3.4597, "step": 659800 }, { "epoch": 105.584, "grad_norm": 0.25693973898887634, "learning_rate": 0.00028417971918876753, "loss": 3.4651, "step": 659900 }, { "epoch": 105.6, "grad_norm": 0.21248573064804077, "learning_rate": 0.0002841773190927637, "loss": 3.5691, "step": 660000 }, { "epoch": 105.616, "grad_norm": 0.2712379992008209, "learning_rate": 0.0002841749189967598, "loss": 3.3249, "step": 660100 }, { "epoch": 105.632, "grad_norm": 0.24061483144760132, "learning_rate": 0.000284172518900756, "loss": 3.4633, "step": 660200 }, { "epoch": 105.648, "grad_norm": 0.397233784198761, "learning_rate": 0.00028417011880475215, "loss": 3.4978, "step": 660300 }, { "epoch": 105.664, "grad_norm": 0.2613547146320343, "learning_rate": 0.0002841677187087483, "loss": 3.4264, "step": 660400 }, { "epoch": 105.68, "grad_norm": 0.24503789842128754, "learning_rate": 0.0002841653186127445, "loss": 2.9845, "step": 660500 }, { "epoch": 105.696, "grad_norm": 0.23089216649532318, "learning_rate": 0.0002841629425177007, "loss": 3.3674, "step": 660600 }, { "epoch": 105.712, "grad_norm": 0.24558985233306885, "learning_rate": 0.00028416054242169685, "loss": 3.354, "step": 660700 }, { "epoch": 105.728, "grad_norm": 0.2642465829849243, "learning_rate": 0.000284158142325693, "loss": 3.4464, "step": 660800 }, { "epoch": 105.744, "grad_norm": 0.23990076780319214, "learning_rate": 0.0002841557422296892, "loss": 3.5426, "step": 660900 }, { "epoch": 105.76, "grad_norm": 0.25479015707969666, "learning_rate": 0.0002841533421336853, "loss": 3.5613, "step": 661000 }, { "epoch": 105.776, "grad_norm": 0.2357495278120041, "learning_rate": 0.00028415094203768147, "loss": 3.6867, "step": 661100 }, { "epoch": 105.792, "grad_norm": 0.2158200740814209, "learning_rate": 0.00028414854194167764, "loss": 3.4781, "step": 661200 }, { "epoch": 105.808, "grad_norm": 0.23140017688274384, "learning_rate": 0.0002841461418456738, "loss": 3.5344, "step": 661300 }, { "epoch": 105.824, "grad_norm": 0.250849187374115, "learning_rate": 0.00028414374174967, "loss": 3.2957, "step": 661400 }, { "epoch": 105.84, "grad_norm": 0.23716233670711517, "learning_rate": 0.0002841413416536661, "loss": 3.5191, "step": 661500 }, { "epoch": 105.856, "grad_norm": 0.24616023898124695, "learning_rate": 0.00028413894155766226, "loss": 3.1603, "step": 661600 }, { "epoch": 105.872, "grad_norm": 0.2249215692281723, "learning_rate": 0.00028413654146165843, "loss": 3.4803, "step": 661700 }, { "epoch": 105.888, "grad_norm": 0.26433080434799194, "learning_rate": 0.0002841341413656546, "loss": 3.6515, "step": 661800 }, { "epoch": 105.904, "grad_norm": 0.23406310379505157, "learning_rate": 0.00028413174126965077, "loss": 3.4194, "step": 661900 }, { "epoch": 105.92, "grad_norm": 0.2311294972896576, "learning_rate": 0.00028412934117364694, "loss": 3.4521, "step": 662000 }, { "epoch": 105.936, "grad_norm": 0.22122853994369507, "learning_rate": 0.00028412694107764306, "loss": 3.3053, "step": 662100 }, { "epoch": 105.952, "grad_norm": 0.23609280586242676, "learning_rate": 0.0002841245409816392, "loss": 3.3704, "step": 662200 }, { "epoch": 105.968, "grad_norm": 0.23415511846542358, "learning_rate": 0.0002841221408856354, "loss": 3.3872, "step": 662300 }, { "epoch": 105.984, "grad_norm": 0.23704978823661804, "learning_rate": 0.00028411974078963156, "loss": 3.1337, "step": 662400 }, { "epoch": 106.0, "grad_norm": 0.23508000373840332, "learning_rate": 0.00028411734069362773, "loss": 3.2867, "step": 662500 }, { "epoch": 106.016, "grad_norm": 0.24447311460971832, "learning_rate": 0.0002841149405976239, "loss": 3.3409, "step": 662600 }, { "epoch": 106.032, "grad_norm": 0.2571202516555786, "learning_rate": 0.00028411254050162, "loss": 3.1264, "step": 662700 }, { "epoch": 106.048, "grad_norm": 0.2796853184700012, "learning_rate": 0.0002841101404056162, "loss": 3.3162, "step": 662800 }, { "epoch": 106.064, "grad_norm": 0.23414820432662964, "learning_rate": 0.00028410774030961235, "loss": 3.3324, "step": 662900 }, { "epoch": 106.08, "grad_norm": 0.24637795984745026, "learning_rate": 0.0002841053402136085, "loss": 3.3187, "step": 663000 }, { "epoch": 106.096, "grad_norm": 0.2377786487340927, "learning_rate": 0.0002841029401176047, "loss": 3.5609, "step": 663100 }, { "epoch": 106.112, "grad_norm": 0.32185572385787964, "learning_rate": 0.0002841005400216008, "loss": 3.2753, "step": 663200 }, { "epoch": 106.128, "grad_norm": 0.2801622748374939, "learning_rate": 0.000284098139925597, "loss": 3.3861, "step": 663300 }, { "epoch": 106.144, "grad_norm": 0.24149608612060547, "learning_rate": 0.00028409573982959315, "loss": 3.4192, "step": 663400 }, { "epoch": 106.16, "grad_norm": 0.2217332124710083, "learning_rate": 0.0002840933397335893, "loss": 3.3605, "step": 663500 }, { "epoch": 106.176, "grad_norm": 0.2671542465686798, "learning_rate": 0.0002840909396375855, "loss": 3.4617, "step": 663600 }, { "epoch": 106.192, "grad_norm": 0.2661835551261902, "learning_rate": 0.00028408853954158165, "loss": 3.6296, "step": 663700 }, { "epoch": 106.208, "grad_norm": 0.24969659745693207, "learning_rate": 0.00028408613944557777, "loss": 3.1945, "step": 663800 }, { "epoch": 106.224, "grad_norm": 0.2293853461742401, "learning_rate": 0.00028408373934957394, "loss": 3.272, "step": 663900 }, { "epoch": 106.24, "grad_norm": 0.24301499128341675, "learning_rate": 0.0002840813632545302, "loss": 3.2257, "step": 664000 }, { "epoch": 106.256, "grad_norm": 0.24225707352161407, "learning_rate": 0.0002840789631585263, "loss": 3.2204, "step": 664100 }, { "epoch": 106.272, "grad_norm": 0.21903227269649506, "learning_rate": 0.00028407656306252247, "loss": 3.3154, "step": 664200 }, { "epoch": 106.288, "grad_norm": 0.23608280718326569, "learning_rate": 0.00028407416296651863, "loss": 3.4957, "step": 664300 }, { "epoch": 106.304, "grad_norm": 0.2529166042804718, "learning_rate": 0.0002840717628705148, "loss": 3.5038, "step": 664400 }, { "epoch": 106.32, "grad_norm": 0.2184343785047531, "learning_rate": 0.00028406936277451097, "loss": 3.3519, "step": 664500 }, { "epoch": 106.336, "grad_norm": 0.24684731662273407, "learning_rate": 0.00028406696267850714, "loss": 3.4808, "step": 664600 }, { "epoch": 106.352, "grad_norm": 0.3540830612182617, "learning_rate": 0.00028406456258250326, "loss": 3.1888, "step": 664700 }, { "epoch": 106.368, "grad_norm": 0.2594861686229706, "learning_rate": 0.0002840621624864994, "loss": 3.5785, "step": 664800 }, { "epoch": 106.384, "grad_norm": 0.28640374541282654, "learning_rate": 0.0002840597623904956, "loss": 3.5525, "step": 664900 }, { "epoch": 106.4, "grad_norm": 0.26739734411239624, "learning_rate": 0.00028405736229449176, "loss": 3.3742, "step": 665000 }, { "epoch": 106.416, "grad_norm": 0.392762690782547, "learning_rate": 0.00028405496219848793, "loss": 3.4806, "step": 665100 }, { "epoch": 106.432, "grad_norm": 0.24007190763950348, "learning_rate": 0.00028405256210248405, "loss": 3.4001, "step": 665200 }, { "epoch": 106.448, "grad_norm": 0.21913771331310272, "learning_rate": 0.0002840501620064802, "loss": 3.4341, "step": 665300 }, { "epoch": 106.464, "grad_norm": 0.22534574568271637, "learning_rate": 0.0002840477619104764, "loss": 3.5619, "step": 665400 }, { "epoch": 106.48, "grad_norm": 0.24639765918254852, "learning_rate": 0.00028404536181447256, "loss": 3.1089, "step": 665500 }, { "epoch": 106.496, "grad_norm": 0.25935250520706177, "learning_rate": 0.0002840429617184687, "loss": 3.4541, "step": 665600 }, { "epoch": 106.512, "grad_norm": 0.27552440762519836, "learning_rate": 0.0002840405616224649, "loss": 3.0969, "step": 665700 }, { "epoch": 106.528, "grad_norm": 0.21815058588981628, "learning_rate": 0.000284038161526461, "loss": 3.5528, "step": 665800 }, { "epoch": 106.544, "grad_norm": 0.25754597783088684, "learning_rate": 0.0002840357614304572, "loss": 3.2901, "step": 665900 }, { "epoch": 106.56, "grad_norm": 0.23136290907859802, "learning_rate": 0.00028403336133445335, "loss": 3.4337, "step": 666000 }, { "epoch": 106.576, "grad_norm": 0.2178489714860916, "learning_rate": 0.0002840309612384495, "loss": 3.2661, "step": 666100 }, { "epoch": 106.592, "grad_norm": 0.2239128202199936, "learning_rate": 0.0002840285611424457, "loss": 3.4474, "step": 666200 }, { "epoch": 106.608, "grad_norm": 0.21735559403896332, "learning_rate": 0.0002840261610464418, "loss": 3.353, "step": 666300 }, { "epoch": 106.624, "grad_norm": 0.23436056077480316, "learning_rate": 0.00028402376095043797, "loss": 3.4238, "step": 666400 }, { "epoch": 106.64, "grad_norm": 0.22677011787891388, "learning_rate": 0.00028402136085443414, "loss": 3.4362, "step": 666500 }, { "epoch": 106.656, "grad_norm": 0.2886824905872345, "learning_rate": 0.0002840189607584303, "loss": 3.3164, "step": 666600 }, { "epoch": 106.672, "grad_norm": 0.33641189336776733, "learning_rate": 0.0002840165606624265, "loss": 3.508, "step": 666700 }, { "epoch": 106.688, "grad_norm": 0.2419121414422989, "learning_rate": 0.00028401416056642265, "loss": 3.3012, "step": 666800 }, { "epoch": 106.704, "grad_norm": 0.25171130895614624, "learning_rate": 0.00028401176047041876, "loss": 3.1923, "step": 666900 }, { "epoch": 106.72, "grad_norm": 0.22668322920799255, "learning_rate": 0.00028400936037441493, "loss": 3.6569, "step": 667000 }, { "epoch": 106.736, "grad_norm": 0.2561470866203308, "learning_rate": 0.0002840069602784111, "loss": 3.3315, "step": 667100 }, { "epoch": 106.752, "grad_norm": 0.23514355719089508, "learning_rate": 0.00028400456018240727, "loss": 3.383, "step": 667200 }, { "epoch": 106.768, "grad_norm": 0.23546525835990906, "learning_rate": 0.00028400216008640344, "loss": 3.2692, "step": 667300 }, { "epoch": 106.784, "grad_norm": 0.31198737025260925, "learning_rate": 0.00028399975999039955, "loss": 3.3593, "step": 667400 }, { "epoch": 106.8, "grad_norm": 0.26971614360809326, "learning_rate": 0.0002839973598943957, "loss": 3.331, "step": 667500 }, { "epoch": 106.816, "grad_norm": 0.2315877228975296, "learning_rate": 0.0002839949597983919, "loss": 3.427, "step": 667600 }, { "epoch": 106.832, "grad_norm": 0.22431445121765137, "learning_rate": 0.00028399255970238806, "loss": 3.1732, "step": 667700 }, { "epoch": 106.848, "grad_norm": 0.2546941041946411, "learning_rate": 0.00028399015960638423, "loss": 3.3417, "step": 667800 }, { "epoch": 106.864, "grad_norm": 0.2259848564863205, "learning_rate": 0.0002839877595103804, "loss": 3.5572, "step": 667900 }, { "epoch": 106.88, "grad_norm": 0.2494620829820633, "learning_rate": 0.0002839853594143765, "loss": 3.4802, "step": 668000 }, { "epoch": 106.896, "grad_norm": 0.2854108214378357, "learning_rate": 0.0002839829593183727, "loss": 3.4483, "step": 668100 }, { "epoch": 106.912, "grad_norm": 0.30589741468429565, "learning_rate": 0.0002839805592223689, "loss": 3.3984, "step": 668200 }, { "epoch": 106.928, "grad_norm": 0.27431997656822205, "learning_rate": 0.000283978159126365, "loss": 3.3022, "step": 668300 }, { "epoch": 106.944, "grad_norm": 0.21641188859939575, "learning_rate": 0.0002839758070322813, "loss": 3.4329, "step": 668400 }, { "epoch": 106.96, "grad_norm": 0.23792274296283722, "learning_rate": 0.00028397340693627745, "loss": 3.1115, "step": 668500 }, { "epoch": 106.976, "grad_norm": 0.22689129412174225, "learning_rate": 0.0002839710068402736, "loss": 3.0985, "step": 668600 }, { "epoch": 106.992, "grad_norm": 0.25166594982147217, "learning_rate": 0.00028396860674426974, "loss": 3.41, "step": 668700 }, { "epoch": 107.008, "grad_norm": 0.2408822923898697, "learning_rate": 0.0002839662066482659, "loss": 3.4107, "step": 668800 }, { "epoch": 107.024, "grad_norm": 0.23702281713485718, "learning_rate": 0.0002839638065522621, "loss": 3.0066, "step": 668900 }, { "epoch": 107.04, "grad_norm": 0.25598224997520447, "learning_rate": 0.00028396140645625825, "loss": 3.1912, "step": 669000 }, { "epoch": 107.056, "grad_norm": 0.2253168523311615, "learning_rate": 0.0002839590063602544, "loss": 3.1193, "step": 669100 }, { "epoch": 107.072, "grad_norm": 0.2622341513633728, "learning_rate": 0.00028395660626425053, "loss": 3.5271, "step": 669200 }, { "epoch": 107.088, "grad_norm": 0.2748871445655823, "learning_rate": 0.0002839542061682467, "loss": 3.4102, "step": 669300 }, { "epoch": 107.104, "grad_norm": 0.23679840564727783, "learning_rate": 0.00028395180607224287, "loss": 3.5138, "step": 669400 }, { "epoch": 107.12, "grad_norm": 0.29354703426361084, "learning_rate": 0.00028394940597623904, "loss": 3.4587, "step": 669500 }, { "epoch": 107.136, "grad_norm": 0.2835995852947235, "learning_rate": 0.0002839470058802352, "loss": 3.1357, "step": 669600 }, { "epoch": 107.152, "grad_norm": 0.24777433276176453, "learning_rate": 0.0002839446057842314, "loss": 3.4317, "step": 669700 }, { "epoch": 107.168, "grad_norm": 0.23847317695617676, "learning_rate": 0.0002839422056882275, "loss": 3.3794, "step": 669800 }, { "epoch": 107.184, "grad_norm": 0.23639856278896332, "learning_rate": 0.00028393980559222366, "loss": 3.58, "step": 669900 }, { "epoch": 107.2, "grad_norm": 0.24652226269245148, "learning_rate": 0.00028393740549621983, "loss": 3.3764, "step": 670000 }, { "epoch": 107.216, "grad_norm": 0.2344384640455246, "learning_rate": 0.000283935005400216, "loss": 3.23, "step": 670100 }, { "epoch": 107.232, "grad_norm": 0.23132769763469696, "learning_rate": 0.00028393260530421217, "loss": 3.5058, "step": 670200 }, { "epoch": 107.248, "grad_norm": 0.2878231406211853, "learning_rate": 0.0002839302052082083, "loss": 3.2199, "step": 670300 }, { "epoch": 107.264, "grad_norm": 0.22765907645225525, "learning_rate": 0.00028392780511220445, "loss": 3.3687, "step": 670400 }, { "epoch": 107.28, "grad_norm": 0.25694018602371216, "learning_rate": 0.0002839254050162006, "loss": 3.3206, "step": 670500 }, { "epoch": 107.296, "grad_norm": 0.2656087577342987, "learning_rate": 0.0002839230049201968, "loss": 3.6038, "step": 670600 }, { "epoch": 107.312, "grad_norm": 0.2533394694328308, "learning_rate": 0.00028392060482419296, "loss": 3.2993, "step": 670700 }, { "epoch": 107.328, "grad_norm": 0.21578989923000336, "learning_rate": 0.00028391820472818913, "loss": 3.4026, "step": 670800 }, { "epoch": 107.344, "grad_norm": 0.23437944054603577, "learning_rate": 0.00028391580463218524, "loss": 3.315, "step": 670900 }, { "epoch": 107.36, "grad_norm": 0.24643434584140778, "learning_rate": 0.0002839134045361814, "loss": 3.2274, "step": 671000 }, { "epoch": 107.376, "grad_norm": 0.26905301213264465, "learning_rate": 0.0002839110044401776, "loss": 3.4072, "step": 671100 }, { "epoch": 107.392, "grad_norm": 0.2458474189043045, "learning_rate": 0.00028390860434417375, "loss": 3.496, "step": 671200 }, { "epoch": 107.408, "grad_norm": 0.23641358315944672, "learning_rate": 0.0002839062042481699, "loss": 3.4033, "step": 671300 }, { "epoch": 107.424, "grad_norm": 0.2382349669933319, "learning_rate": 0.00028390380415216604, "loss": 3.3531, "step": 671400 }, { "epoch": 107.44, "grad_norm": 0.24868005514144897, "learning_rate": 0.0002839014040561622, "loss": 3.2678, "step": 671500 }, { "epoch": 107.456, "grad_norm": 0.25233879685401917, "learning_rate": 0.0002838990039601584, "loss": 3.1304, "step": 671600 }, { "epoch": 107.472, "grad_norm": 0.24848225712776184, "learning_rate": 0.00028389660386415454, "loss": 3.5136, "step": 671700 }, { "epoch": 107.488, "grad_norm": 0.230527862906456, "learning_rate": 0.0002838942037681507, "loss": 3.3534, "step": 671800 }, { "epoch": 107.504, "grad_norm": 0.283657044172287, "learning_rate": 0.0002838918036721469, "loss": 3.2051, "step": 671900 }, { "epoch": 107.52, "grad_norm": 0.22450971603393555, "learning_rate": 0.00028388942757710307, "loss": 3.6592, "step": 672000 }, { "epoch": 107.536, "grad_norm": 0.2562612295150757, "learning_rate": 0.00028388702748109924, "loss": 3.4848, "step": 672100 }, { "epoch": 107.552, "grad_norm": 0.24983064830303192, "learning_rate": 0.0002838846273850954, "loss": 2.9213, "step": 672200 }, { "epoch": 107.568, "grad_norm": 0.25194036960601807, "learning_rate": 0.0002838822272890915, "loss": 3.4243, "step": 672300 }, { "epoch": 107.584, "grad_norm": 0.25367769598960876, "learning_rate": 0.0002838798271930877, "loss": 3.579, "step": 672400 }, { "epoch": 107.6, "grad_norm": 0.2437036633491516, "learning_rate": 0.00028387742709708386, "loss": 3.5314, "step": 672500 }, { "epoch": 107.616, "grad_norm": 0.2362956404685974, "learning_rate": 0.00028387502700108003, "loss": 3.7065, "step": 672600 }, { "epoch": 107.632, "grad_norm": 0.23655670881271362, "learning_rate": 0.0002838726269050762, "loss": 3.3389, "step": 672700 }, { "epoch": 107.648, "grad_norm": 0.24257999658584595, "learning_rate": 0.00028387022680907237, "loss": 3.5464, "step": 672800 }, { "epoch": 107.664, "grad_norm": 0.25666770339012146, "learning_rate": 0.0002838678267130685, "loss": 3.7833, "step": 672900 }, { "epoch": 107.68, "grad_norm": 0.24536189436912537, "learning_rate": 0.00028386542661706465, "loss": 3.5865, "step": 673000 }, { "epoch": 107.696, "grad_norm": 0.25664857029914856, "learning_rate": 0.0002838630265210608, "loss": 3.2764, "step": 673100 }, { "epoch": 107.712, "grad_norm": 0.27174893021583557, "learning_rate": 0.000283860626425057, "loss": 3.3167, "step": 673200 }, { "epoch": 107.728, "grad_norm": 0.24076195061206818, "learning_rate": 0.00028385822632905316, "loss": 3.389, "step": 673300 }, { "epoch": 107.744, "grad_norm": 0.238186314702034, "learning_rate": 0.0002838558262330493, "loss": 3.5787, "step": 673400 }, { "epoch": 107.76, "grad_norm": 0.20913533866405487, "learning_rate": 0.00028385342613704544, "loss": 3.4377, "step": 673500 }, { "epoch": 107.776, "grad_norm": 0.21053200960159302, "learning_rate": 0.0002838510260410416, "loss": 3.2443, "step": 673600 }, { "epoch": 107.792, "grad_norm": 0.25970494747161865, "learning_rate": 0.0002838486259450378, "loss": 3.2649, "step": 673700 }, { "epoch": 107.808, "grad_norm": 0.2635687589645386, "learning_rate": 0.00028384622584903395, "loss": 3.3965, "step": 673800 }, { "epoch": 107.824, "grad_norm": 0.24297122657299042, "learning_rate": 0.0002838438257530301, "loss": 3.1855, "step": 673900 }, { "epoch": 107.84, "grad_norm": 0.26199525594711304, "learning_rate": 0.00028384142565702624, "loss": 3.5456, "step": 674000 }, { "epoch": 107.856, "grad_norm": 0.24820935726165771, "learning_rate": 0.0002838390255610224, "loss": 3.2621, "step": 674100 }, { "epoch": 107.872, "grad_norm": 0.2623003125190735, "learning_rate": 0.0002838366254650186, "loss": 3.4009, "step": 674200 }, { "epoch": 107.888, "grad_norm": 0.2515687942504883, "learning_rate": 0.00028383422536901474, "loss": 3.3331, "step": 674300 }, { "epoch": 107.904, "grad_norm": 0.25258681178092957, "learning_rate": 0.0002838318252730109, "loss": 3.2587, "step": 674400 }, { "epoch": 107.92, "grad_norm": 0.2955288290977478, "learning_rate": 0.00028382942517700703, "loss": 3.4215, "step": 674500 }, { "epoch": 107.936, "grad_norm": 0.2170885056257248, "learning_rate": 0.0002838270250810032, "loss": 3.2092, "step": 674600 }, { "epoch": 107.952, "grad_norm": 0.26623672246932983, "learning_rate": 0.00028382462498499937, "loss": 3.1031, "step": 674700 }, { "epoch": 107.968, "grad_norm": 0.20776589214801788, "learning_rate": 0.00028382222488899554, "loss": 3.4694, "step": 674800 }, { "epoch": 107.984, "grad_norm": 0.2088155448436737, "learning_rate": 0.0002838198247929917, "loss": 3.2883, "step": 674900 }, { "epoch": 108.0, "grad_norm": 0.23498950898647308, "learning_rate": 0.0002838174246969879, "loss": 3.39, "step": 675000 }, { "epoch": 108.016, "grad_norm": 0.23156329989433289, "learning_rate": 0.000283815024600984, "loss": 2.9449, "step": 675100 }, { "epoch": 108.032, "grad_norm": 0.2783654034137726, "learning_rate": 0.00028381262450498016, "loss": 3.308, "step": 675200 }, { "epoch": 108.048, "grad_norm": 0.25967171788215637, "learning_rate": 0.0002838102484099364, "loss": 3.0259, "step": 675300 }, { "epoch": 108.064, "grad_norm": 0.2517883777618408, "learning_rate": 0.0002838078483139325, "loss": 3.5858, "step": 675400 }, { "epoch": 108.08, "grad_norm": 0.2282707244157791, "learning_rate": 0.0002838054482179287, "loss": 3.3258, "step": 675500 }, { "epoch": 108.096, "grad_norm": 0.32690176367759705, "learning_rate": 0.00028380304812192485, "loss": 3.4953, "step": 675600 }, { "epoch": 108.112, "grad_norm": 0.2525426149368286, "learning_rate": 0.000283800648025921, "loss": 3.2594, "step": 675700 }, { "epoch": 108.128, "grad_norm": 0.27359163761138916, "learning_rate": 0.0002837982479299172, "loss": 3.5737, "step": 675800 }, { "epoch": 108.144, "grad_norm": 0.3203819692134857, "learning_rate": 0.00028379584783391336, "loss": 3.2881, "step": 675900 }, { "epoch": 108.16, "grad_norm": 0.2932054400444031, "learning_rate": 0.0002837934477379095, "loss": 3.5451, "step": 676000 }, { "epoch": 108.176, "grad_norm": 0.21397531032562256, "learning_rate": 0.00028379104764190565, "loss": 3.5535, "step": 676100 }, { "epoch": 108.192, "grad_norm": 0.2503565549850464, "learning_rate": 0.0002837886475459018, "loss": 3.3037, "step": 676200 }, { "epoch": 108.208, "grad_norm": 0.26857495307922363, "learning_rate": 0.000283786247449898, "loss": 3.1556, "step": 676300 }, { "epoch": 108.224, "grad_norm": 0.23593921959400177, "learning_rate": 0.00028378384735389415, "loss": 3.3914, "step": 676400 }, { "epoch": 108.24, "grad_norm": 0.2466345876455307, "learning_rate": 0.00028378144725789027, "loss": 3.4825, "step": 676500 }, { "epoch": 108.256, "grad_norm": 0.24515531957149506, "learning_rate": 0.00028377904716188644, "loss": 3.2372, "step": 676600 }, { "epoch": 108.272, "grad_norm": 0.26230818033218384, "learning_rate": 0.0002837766470658826, "loss": 3.4738, "step": 676700 }, { "epoch": 108.288, "grad_norm": 0.2373523861169815, "learning_rate": 0.0002837742469698788, "loss": 3.3755, "step": 676800 }, { "epoch": 108.304, "grad_norm": 0.25039732456207275, "learning_rate": 0.00028377184687387495, "loss": 3.2816, "step": 676900 }, { "epoch": 108.32, "grad_norm": 0.2597050070762634, "learning_rate": 0.0002837694467778711, "loss": 3.2501, "step": 677000 }, { "epoch": 108.336, "grad_norm": 0.2436797320842743, "learning_rate": 0.00028376704668186723, "loss": 3.4923, "step": 677100 }, { "epoch": 108.352, "grad_norm": 0.2582792639732361, "learning_rate": 0.0002837646465858634, "loss": 3.3697, "step": 677200 }, { "epoch": 108.368, "grad_norm": 0.22966648638248444, "learning_rate": 0.00028376224648985957, "loss": 3.5784, "step": 677300 }, { "epoch": 108.384, "grad_norm": 0.20611372590065002, "learning_rate": 0.00028375984639385574, "loss": 3.7562, "step": 677400 }, { "epoch": 108.4, "grad_norm": 0.24376221001148224, "learning_rate": 0.0002837574462978519, "loss": 3.2316, "step": 677500 }, { "epoch": 108.416, "grad_norm": 0.26403799653053284, "learning_rate": 0.000283755046201848, "loss": 3.2089, "step": 677600 }, { "epoch": 108.432, "grad_norm": 0.24735133349895477, "learning_rate": 0.0002837526461058442, "loss": 3.1951, "step": 677700 }, { "epoch": 108.448, "grad_norm": 0.2402300238609314, "learning_rate": 0.00028375024600984036, "loss": 3.3648, "step": 677800 }, { "epoch": 108.464, "grad_norm": 0.23744890093803406, "learning_rate": 0.00028374784591383653, "loss": 3.3896, "step": 677900 }, { "epoch": 108.48, "grad_norm": 0.251057893037796, "learning_rate": 0.0002837454458178327, "loss": 3.3677, "step": 678000 }, { "epoch": 108.496, "grad_norm": 0.2792510986328125, "learning_rate": 0.00028374304572182887, "loss": 3.3351, "step": 678100 }, { "epoch": 108.512, "grad_norm": 0.2853145897388458, "learning_rate": 0.000283740645625825, "loss": 3.1956, "step": 678200 }, { "epoch": 108.528, "grad_norm": 0.26943522691726685, "learning_rate": 0.00028373824552982115, "loss": 3.5658, "step": 678300 }, { "epoch": 108.544, "grad_norm": 0.2537020146846771, "learning_rate": 0.0002837358454338173, "loss": 3.2348, "step": 678400 }, { "epoch": 108.56, "grad_norm": 0.24288037419319153, "learning_rate": 0.0002837334453378135, "loss": 3.3639, "step": 678500 }, { "epoch": 108.576, "grad_norm": 0.22683261334896088, "learning_rate": 0.00028373104524180966, "loss": 3.1451, "step": 678600 }, { "epoch": 108.592, "grad_norm": 0.2694734036922455, "learning_rate": 0.0002837286451458058, "loss": 3.4338, "step": 678700 }, { "epoch": 108.608, "grad_norm": 0.2541027069091797, "learning_rate": 0.00028372624504980194, "loss": 3.1981, "step": 678800 }, { "epoch": 108.624, "grad_norm": 0.257487952709198, "learning_rate": 0.0002837238449537981, "loss": 3.6192, "step": 678900 }, { "epoch": 108.64, "grad_norm": 0.26396316289901733, "learning_rate": 0.0002837214448577943, "loss": 3.4427, "step": 679000 }, { "epoch": 108.656, "grad_norm": 0.22858597338199615, "learning_rate": 0.00028371904476179045, "loss": 3.3462, "step": 679100 }, { "epoch": 108.672, "grad_norm": 0.2676556408405304, "learning_rate": 0.0002837166446657866, "loss": 3.8011, "step": 679200 }, { "epoch": 108.688, "grad_norm": 0.24442878365516663, "learning_rate": 0.00028371424456978273, "loss": 3.2274, "step": 679300 }, { "epoch": 108.704, "grad_norm": 0.24814069271087646, "learning_rate": 0.0002837118444737789, "loss": 3.2366, "step": 679400 }, { "epoch": 108.72, "grad_norm": 0.2479088008403778, "learning_rate": 0.0002837094443777751, "loss": 3.6281, "step": 679500 }, { "epoch": 108.736, "grad_norm": 0.22430573403835297, "learning_rate": 0.00028370704428177124, "loss": 2.8833, "step": 679600 }, { "epoch": 108.752, "grad_norm": 0.2706644535064697, "learning_rate": 0.00028370466818672743, "loss": 3.5164, "step": 679700 }, { "epoch": 108.768, "grad_norm": 0.21034064888954163, "learning_rate": 0.0002837022680907236, "loss": 3.5187, "step": 679800 }, { "epoch": 108.784, "grad_norm": 0.221243217587471, "learning_rate": 0.00028369986799471977, "loss": 3.3819, "step": 679900 }, { "epoch": 108.8, "grad_norm": 0.24024586379528046, "learning_rate": 0.00028369746789871594, "loss": 3.3484, "step": 680000 }, { "epoch": 108.816, "grad_norm": 0.2427552491426468, "learning_rate": 0.0002836950678027121, "loss": 3.6651, "step": 680100 }, { "epoch": 108.832, "grad_norm": 0.257856547832489, "learning_rate": 0.0002836926677067082, "loss": 3.3243, "step": 680200 }, { "epoch": 108.848, "grad_norm": 0.2732832729816437, "learning_rate": 0.0002836902676107044, "loss": 3.5374, "step": 680300 }, { "epoch": 108.864, "grad_norm": 0.27492764592170715, "learning_rate": 0.00028368786751470056, "loss": 3.2767, "step": 680400 }, { "epoch": 108.88, "grad_norm": 0.2922387421131134, "learning_rate": 0.00028368546741869673, "loss": 3.4513, "step": 680500 }, { "epoch": 108.896, "grad_norm": 0.26184144616127014, "learning_rate": 0.0002836830673226929, "loss": 3.5049, "step": 680600 }, { "epoch": 108.912, "grad_norm": 0.23386403918266296, "learning_rate": 0.000283680667226689, "loss": 3.5187, "step": 680700 }, { "epoch": 108.928, "grad_norm": 0.22501234710216522, "learning_rate": 0.0002836782671306852, "loss": 3.3596, "step": 680800 }, { "epoch": 108.944, "grad_norm": 0.25304245948791504, "learning_rate": 0.00028367586703468135, "loss": 3.34, "step": 680900 }, { "epoch": 108.96, "grad_norm": 0.22920961678028107, "learning_rate": 0.0002836734669386775, "loss": 3.2096, "step": 681000 }, { "epoch": 108.976, "grad_norm": 0.24957670271396637, "learning_rate": 0.0002836710668426737, "loss": 3.4252, "step": 681100 }, { "epoch": 108.992, "grad_norm": 0.27017152309417725, "learning_rate": 0.00028366866674666986, "loss": 3.4597, "step": 681200 }, { "epoch": 109.008, "grad_norm": 0.2403929978609085, "learning_rate": 0.000283666266650666, "loss": 3.3374, "step": 681300 }, { "epoch": 109.024, "grad_norm": 0.2575015127658844, "learning_rate": 0.00028366386655466214, "loss": 3.2223, "step": 681400 }, { "epoch": 109.04, "grad_norm": 0.2724219858646393, "learning_rate": 0.0002836614664586583, "loss": 3.4187, "step": 681500 }, { "epoch": 109.056, "grad_norm": 0.26324158906936646, "learning_rate": 0.0002836590663626545, "loss": 3.117, "step": 681600 }, { "epoch": 109.072, "grad_norm": 0.24597817659378052, "learning_rate": 0.00028365666626665065, "loss": 3.2001, "step": 681700 }, { "epoch": 109.088, "grad_norm": 0.26412299275398254, "learning_rate": 0.00028365426617064677, "loss": 3.2279, "step": 681800 }, { "epoch": 109.104, "grad_norm": 0.2738043963909149, "learning_rate": 0.000283651890075603, "loss": 3.5477, "step": 681900 }, { "epoch": 109.12, "grad_norm": 0.27921465039253235, "learning_rate": 0.0002836494899795992, "loss": 3.3038, "step": 682000 }, { "epoch": 109.136, "grad_norm": 0.2768918573856354, "learning_rate": 0.00028364708988359535, "loss": 2.976, "step": 682100 }, { "epoch": 109.152, "grad_norm": 0.23358409106731415, "learning_rate": 0.00028364468978759146, "loss": 3.5577, "step": 682200 }, { "epoch": 109.168, "grad_norm": 0.24958385527133942, "learning_rate": 0.00028364228969158763, "loss": 3.182, "step": 682300 }, { "epoch": 109.184, "grad_norm": 0.28457942605018616, "learning_rate": 0.0002836398895955838, "loss": 3.5217, "step": 682400 }, { "epoch": 109.2, "grad_norm": 0.24977420270442963, "learning_rate": 0.00028363748949957997, "loss": 3.5052, "step": 682500 }, { "epoch": 109.216, "grad_norm": 0.23548293113708496, "learning_rate": 0.00028363508940357614, "loss": 3.5296, "step": 682600 }, { "epoch": 109.232, "grad_norm": 0.3030262291431427, "learning_rate": 0.00028363268930757225, "loss": 3.4419, "step": 682700 }, { "epoch": 109.248, "grad_norm": 0.23364531993865967, "learning_rate": 0.0002836302892115684, "loss": 3.5658, "step": 682800 }, { "epoch": 109.264, "grad_norm": 0.25061169266700745, "learning_rate": 0.0002836278891155646, "loss": 3.3564, "step": 682900 }, { "epoch": 109.28, "grad_norm": 0.24135884642601013, "learning_rate": 0.00028362548901956076, "loss": 3.1737, "step": 683000 }, { "epoch": 109.296, "grad_norm": 0.29068347811698914, "learning_rate": 0.00028362308892355693, "loss": 3.5042, "step": 683100 }, { "epoch": 109.312, "grad_norm": 0.264761745929718, "learning_rate": 0.0002836206888275531, "loss": 3.3652, "step": 683200 }, { "epoch": 109.328, "grad_norm": 0.2658812999725342, "learning_rate": 0.0002836182887315492, "loss": 3.4536, "step": 683300 }, { "epoch": 109.344, "grad_norm": 0.21943619847297668, "learning_rate": 0.0002836158886355454, "loss": 3.1815, "step": 683400 }, { "epoch": 109.36, "grad_norm": 0.24630966782569885, "learning_rate": 0.00028361348853954155, "loss": 3.4555, "step": 683500 }, { "epoch": 109.376, "grad_norm": 0.28291237354278564, "learning_rate": 0.0002836110884435377, "loss": 3.3128, "step": 683600 }, { "epoch": 109.392, "grad_norm": 0.22826677560806274, "learning_rate": 0.0002836086883475339, "loss": 3.3803, "step": 683700 }, { "epoch": 109.408, "grad_norm": 0.25044000148773193, "learning_rate": 0.00028360628825153, "loss": 3.1611, "step": 683800 }, { "epoch": 109.424, "grad_norm": 0.21808885037899017, "learning_rate": 0.0002836038881555262, "loss": 3.2159, "step": 683900 }, { "epoch": 109.44, "grad_norm": 0.2294241338968277, "learning_rate": 0.00028360148805952235, "loss": 3.4367, "step": 684000 }, { "epoch": 109.456, "grad_norm": 0.2328399419784546, "learning_rate": 0.0002835990879635185, "loss": 3.2407, "step": 684100 }, { "epoch": 109.472, "grad_norm": 0.21513521671295166, "learning_rate": 0.0002835966878675147, "loss": 3.2972, "step": 684200 }, { "epoch": 109.488, "grad_norm": 0.2399367392063141, "learning_rate": 0.00028359428777151085, "loss": 3.7488, "step": 684300 }, { "epoch": 109.504, "grad_norm": 0.2720966637134552, "learning_rate": 0.00028359188767550697, "loss": 3.6608, "step": 684400 }, { "epoch": 109.52, "grad_norm": 0.2329401820898056, "learning_rate": 0.0002835895115804632, "loss": 3.1769, "step": 684500 }, { "epoch": 109.536, "grad_norm": 0.22485528886318207, "learning_rate": 0.0002835871114844594, "loss": 3.3478, "step": 684600 }, { "epoch": 109.552, "grad_norm": 0.23586802184581757, "learning_rate": 0.0002835847113884555, "loss": 3.4281, "step": 684700 }, { "epoch": 109.568, "grad_norm": 0.2517383098602295, "learning_rate": 0.00028358231129245166, "loss": 3.2151, "step": 684800 }, { "epoch": 109.584, "grad_norm": 0.2147078812122345, "learning_rate": 0.00028357991119644783, "loss": 3.4239, "step": 684900 }, { "epoch": 109.6, "grad_norm": 0.2456086128950119, "learning_rate": 0.000283577511100444, "loss": 3.4582, "step": 685000 }, { "epoch": 109.616, "grad_norm": 0.23596438765525818, "learning_rate": 0.00028357511100444017, "loss": 3.3005, "step": 685100 }, { "epoch": 109.632, "grad_norm": 0.24654224514961243, "learning_rate": 0.00028357271090843634, "loss": 3.3371, "step": 685200 }, { "epoch": 109.648, "grad_norm": 0.24208033084869385, "learning_rate": 0.00028357031081243246, "loss": 3.5006, "step": 685300 }, { "epoch": 109.664, "grad_norm": 0.27080753445625305, "learning_rate": 0.0002835679107164286, "loss": 3.2198, "step": 685400 }, { "epoch": 109.68, "grad_norm": 0.31909409165382385, "learning_rate": 0.0002835655106204248, "loss": 3.4483, "step": 685500 }, { "epoch": 109.696, "grad_norm": 0.2401597648859024, "learning_rate": 0.00028356311052442096, "loss": 3.6393, "step": 685600 }, { "epoch": 109.712, "grad_norm": 0.22720293700695038, "learning_rate": 0.00028356071042841713, "loss": 3.5168, "step": 685700 }, { "epoch": 109.728, "grad_norm": 0.2559794783592224, "learning_rate": 0.00028355831033241325, "loss": 3.3792, "step": 685800 }, { "epoch": 109.744, "grad_norm": 0.2452526092529297, "learning_rate": 0.0002835559102364094, "loss": 3.563, "step": 685900 }, { "epoch": 109.76, "grad_norm": 0.24783799052238464, "learning_rate": 0.0002835535101404056, "loss": 3.2885, "step": 686000 }, { "epoch": 109.776, "grad_norm": 0.22462084889411926, "learning_rate": 0.00028355111004440176, "loss": 3.3991, "step": 686100 }, { "epoch": 109.792, "grad_norm": 0.24774828553199768, "learning_rate": 0.0002835487099483979, "loss": 3.336, "step": 686200 }, { "epoch": 109.808, "grad_norm": 0.25938278436660767, "learning_rate": 0.0002835463098523941, "loss": 3.4401, "step": 686300 }, { "epoch": 109.824, "grad_norm": 0.2302609533071518, "learning_rate": 0.0002835439097563902, "loss": 3.3128, "step": 686400 }, { "epoch": 109.84, "grad_norm": 0.24469055235385895, "learning_rate": 0.0002835415096603864, "loss": 3.3644, "step": 686500 }, { "epoch": 109.856, "grad_norm": 0.2301245927810669, "learning_rate": 0.00028353910956438255, "loss": 3.337, "step": 686600 }, { "epoch": 109.872, "grad_norm": 0.26311954855918884, "learning_rate": 0.0002835367094683787, "loss": 3.5239, "step": 686700 }, { "epoch": 109.888, "grad_norm": 0.23806650936603546, "learning_rate": 0.0002835343333733349, "loss": 3.3178, "step": 686800 }, { "epoch": 109.904, "grad_norm": 0.3021426796913147, "learning_rate": 0.0002835319332773311, "loss": 3.5792, "step": 686900 }, { "epoch": 109.92, "grad_norm": 0.2302110344171524, "learning_rate": 0.00028352953318132724, "loss": 3.4242, "step": 687000 }, { "epoch": 109.936, "grad_norm": 0.26317691802978516, "learning_rate": 0.0002835271330853234, "loss": 3.1993, "step": 687100 }, { "epoch": 109.952, "grad_norm": 0.24439547955989838, "learning_rate": 0.0002835247569902796, "loss": 3.4274, "step": 687200 }, { "epoch": 109.968, "grad_norm": 0.22482885420322418, "learning_rate": 0.0002835223568942757, "loss": 3.1415, "step": 687300 }, { "epoch": 109.984, "grad_norm": 0.24591094255447388, "learning_rate": 0.0002835199567982719, "loss": 3.418, "step": 687400 }, { "epoch": 110.0, "grad_norm": 0.28217557072639465, "learning_rate": 0.00028351755670226805, "loss": 3.5579, "step": 687500 }, { "epoch": 110.016, "grad_norm": 0.24235092103481293, "learning_rate": 0.0002835151566062642, "loss": 3.3042, "step": 687600 }, { "epoch": 110.032, "grad_norm": 0.24026991426944733, "learning_rate": 0.0002835127565102604, "loss": 3.2891, "step": 687700 }, { "epoch": 110.048, "grad_norm": 0.22172647714614868, "learning_rate": 0.0002835103564142565, "loss": 3.2639, "step": 687800 }, { "epoch": 110.064, "grad_norm": 0.23762962222099304, "learning_rate": 0.0002835079563182527, "loss": 3.5005, "step": 687900 }, { "epoch": 110.08, "grad_norm": 0.29416534304618835, "learning_rate": 0.00028350555622224885, "loss": 3.2438, "step": 688000 }, { "epoch": 110.096, "grad_norm": 0.2743002474308014, "learning_rate": 0.000283503156126245, "loss": 3.2233, "step": 688100 }, { "epoch": 110.112, "grad_norm": 0.29509711265563965, "learning_rate": 0.0002835007560302412, "loss": 3.2198, "step": 688200 }, { "epoch": 110.128, "grad_norm": 0.2669745981693268, "learning_rate": 0.00028349835593423735, "loss": 3.1728, "step": 688300 }, { "epoch": 110.144, "grad_norm": 0.25538673996925354, "learning_rate": 0.00028349595583823347, "loss": 3.5872, "step": 688400 }, { "epoch": 110.16, "grad_norm": 0.28776025772094727, "learning_rate": 0.00028349355574222964, "loss": 3.3923, "step": 688500 }, { "epoch": 110.176, "grad_norm": 0.2596912086009979, "learning_rate": 0.0002834911556462258, "loss": 3.5314, "step": 688600 }, { "epoch": 110.192, "grad_norm": 0.251660019159317, "learning_rate": 0.000283488755550222, "loss": 3.8024, "step": 688700 }, { "epoch": 110.208, "grad_norm": 0.22735844552516937, "learning_rate": 0.00028348635545421815, "loss": 3.4539, "step": 688800 }, { "epoch": 110.224, "grad_norm": 0.24813252687454224, "learning_rate": 0.00028348395535821426, "loss": 3.1486, "step": 688900 }, { "epoch": 110.24, "grad_norm": 0.28367558121681213, "learning_rate": 0.0002834815552622105, "loss": 3.4735, "step": 689000 }, { "epoch": 110.256, "grad_norm": 0.25253960490226746, "learning_rate": 0.00028347915516620665, "loss": 3.3617, "step": 689100 }, { "epoch": 110.272, "grad_norm": 0.2546617090702057, "learning_rate": 0.0002834767550702028, "loss": 3.2702, "step": 689200 }, { "epoch": 110.288, "grad_norm": 0.23584550619125366, "learning_rate": 0.00028347435497419894, "loss": 3.4897, "step": 689300 }, { "epoch": 110.304, "grad_norm": 0.24655377864837646, "learning_rate": 0.0002834719548781951, "loss": 3.4161, "step": 689400 }, { "epoch": 110.32, "grad_norm": 0.254445344209671, "learning_rate": 0.0002834695547821913, "loss": 3.3823, "step": 689500 }, { "epoch": 110.336, "grad_norm": 0.24716593325138092, "learning_rate": 0.00028346715468618744, "loss": 3.6681, "step": 689600 }, { "epoch": 110.352, "grad_norm": 0.2911476790904999, "learning_rate": 0.0002834647545901836, "loss": 3.4948, "step": 689700 }, { "epoch": 110.368, "grad_norm": 0.32628417015075684, "learning_rate": 0.00028346235449417973, "loss": 3.4877, "step": 689800 }, { "epoch": 110.384, "grad_norm": 0.23550666868686676, "learning_rate": 0.0002834599543981759, "loss": 3.5954, "step": 689900 }, { "epoch": 110.4, "grad_norm": 0.22917796671390533, "learning_rate": 0.00028345755430217207, "loss": 3.2951, "step": 690000 }, { "epoch": 110.416, "grad_norm": 0.2318001091480255, "learning_rate": 0.00028345515420616824, "loss": 3.4126, "step": 690100 }, { "epoch": 110.432, "grad_norm": 0.2542356252670288, "learning_rate": 0.0002834527541101644, "loss": 3.2367, "step": 690200 }, { "epoch": 110.448, "grad_norm": 0.26175323128700256, "learning_rate": 0.0002834503540141606, "loss": 3.6192, "step": 690300 }, { "epoch": 110.464, "grad_norm": 0.2902243435382843, "learning_rate": 0.0002834479539181567, "loss": 3.1, "step": 690400 }, { "epoch": 110.48, "grad_norm": 0.2861901521682739, "learning_rate": 0.00028344555382215286, "loss": 3.3155, "step": 690500 }, { "epoch": 110.496, "grad_norm": 0.24021850526332855, "learning_rate": 0.00028344315372614903, "loss": 2.9731, "step": 690600 }, { "epoch": 110.512, "grad_norm": 0.21690945327281952, "learning_rate": 0.0002834407536301452, "loss": 3.3174, "step": 690700 }, { "epoch": 110.528, "grad_norm": 0.23869045078754425, "learning_rate": 0.00028343835353414137, "loss": 3.0881, "step": 690800 }, { "epoch": 110.544, "grad_norm": 0.27070125937461853, "learning_rate": 0.0002834359534381375, "loss": 3.4744, "step": 690900 }, { "epoch": 110.56, "grad_norm": 0.2991136908531189, "learning_rate": 0.00028343355334213365, "loss": 3.3914, "step": 691000 }, { "epoch": 110.576, "grad_norm": 0.2445349097251892, "learning_rate": 0.0002834311532461298, "loss": 3.4832, "step": 691100 }, { "epoch": 110.592, "grad_norm": 0.2641538083553314, "learning_rate": 0.000283428753150126, "loss": 3.3053, "step": 691200 }, { "epoch": 110.608, "grad_norm": 0.28418686985969543, "learning_rate": 0.00028342635305412216, "loss": 3.4682, "step": 691300 }, { "epoch": 110.624, "grad_norm": 0.24345530569553375, "learning_rate": 0.00028342395295811833, "loss": 3.4892, "step": 691400 }, { "epoch": 110.64, "grad_norm": 0.255415141582489, "learning_rate": 0.00028342155286211444, "loss": 3.2652, "step": 691500 }, { "epoch": 110.656, "grad_norm": 0.2613565921783447, "learning_rate": 0.0002834191527661106, "loss": 3.6989, "step": 691600 }, { "epoch": 110.672, "grad_norm": 0.24447424709796906, "learning_rate": 0.0002834167766710668, "loss": 3.2108, "step": 691700 }, { "epoch": 110.688, "grad_norm": 0.420487642288208, "learning_rate": 0.00028341437657506297, "loss": 3.5365, "step": 691800 }, { "epoch": 110.704, "grad_norm": 0.24203331768512726, "learning_rate": 0.00028341197647905914, "loss": 3.5775, "step": 691900 }, { "epoch": 110.72, "grad_norm": 0.232481449842453, "learning_rate": 0.0002834095763830553, "loss": 3.474, "step": 692000 }, { "epoch": 110.736, "grad_norm": 0.26393789052963257, "learning_rate": 0.0002834071762870514, "loss": 3.3311, "step": 692100 }, { "epoch": 110.752, "grad_norm": 0.21654748916625977, "learning_rate": 0.00028340477619104765, "loss": 3.6343, "step": 692200 }, { "epoch": 110.768, "grad_norm": 0.26952025294303894, "learning_rate": 0.0002834023760950438, "loss": 3.3272, "step": 692300 }, { "epoch": 110.784, "grad_norm": 0.24590443074703217, "learning_rate": 0.00028339997599903993, "loss": 3.5617, "step": 692400 }, { "epoch": 110.8, "grad_norm": 0.22557370364665985, "learning_rate": 0.0002833975999039961, "loss": 3.6833, "step": 692500 }, { "epoch": 110.816, "grad_norm": 0.24389097094535828, "learning_rate": 0.0002833951998079923, "loss": 3.3073, "step": 692600 }, { "epoch": 110.832, "grad_norm": 0.27352315187454224, "learning_rate": 0.00028339279971198846, "loss": 3.1582, "step": 692700 }, { "epoch": 110.848, "grad_norm": 0.23498205840587616, "learning_rate": 0.0002833903996159846, "loss": 3.3964, "step": 692800 }, { "epoch": 110.864, "grad_norm": 0.25124090909957886, "learning_rate": 0.00028338799951998074, "loss": 3.3459, "step": 692900 }, { "epoch": 110.88, "grad_norm": 0.2650180160999298, "learning_rate": 0.0002833855994239769, "loss": 3.5116, "step": 693000 }, { "epoch": 110.896, "grad_norm": 0.22068102657794952, "learning_rate": 0.0002833831993279731, "loss": 3.3711, "step": 693100 }, { "epoch": 110.912, "grad_norm": 0.24902330338954926, "learning_rate": 0.00028338079923196925, "loss": 3.2953, "step": 693200 }, { "epoch": 110.928, "grad_norm": 0.2310609370470047, "learning_rate": 0.0002833783991359654, "loss": 3.2164, "step": 693300 }, { "epoch": 110.944, "grad_norm": 0.22302871942520142, "learning_rate": 0.0002833759990399616, "loss": 3.3625, "step": 693400 }, { "epoch": 110.96, "grad_norm": 0.24910081923007965, "learning_rate": 0.0002833735989439577, "loss": 3.3875, "step": 693500 }, { "epoch": 110.976, "grad_norm": 0.2567470669746399, "learning_rate": 0.00028337119884795387, "loss": 3.3298, "step": 693600 }, { "epoch": 110.992, "grad_norm": 0.3174605369567871, "learning_rate": 0.00028336879875195004, "loss": 3.3426, "step": 693700 }, { "epoch": 111.008, "grad_norm": 0.20687976479530334, "learning_rate": 0.0002833663986559462, "loss": 3.2881, "step": 693800 }, { "epoch": 111.024, "grad_norm": 0.25511634349823, "learning_rate": 0.0002833639985599424, "loss": 3.3964, "step": 693900 }, { "epoch": 111.04, "grad_norm": 0.23358751833438873, "learning_rate": 0.00028336159846393855, "loss": 3.4703, "step": 694000 }, { "epoch": 111.056, "grad_norm": 0.31516677141189575, "learning_rate": 0.00028335919836793466, "loss": 3.1502, "step": 694100 }, { "epoch": 111.072, "grad_norm": 0.25507816672325134, "learning_rate": 0.00028335679827193083, "loss": 3.7016, "step": 694200 }, { "epoch": 111.088, "grad_norm": 0.25024932622909546, "learning_rate": 0.000283354398175927, "loss": 3.5239, "step": 694300 }, { "epoch": 111.104, "grad_norm": 0.25227320194244385, "learning_rate": 0.00028335199807992317, "loss": 3.0824, "step": 694400 }, { "epoch": 111.12, "grad_norm": 0.28832510113716125, "learning_rate": 0.00028334959798391934, "loss": 3.7823, "step": 694500 }, { "epoch": 111.136, "grad_norm": 0.24919858574867249, "learning_rate": 0.00028334719788791546, "loss": 3.5161, "step": 694600 }, { "epoch": 111.152, "grad_norm": 0.22532115876674652, "learning_rate": 0.0002833447977919116, "loss": 3.3078, "step": 694700 }, { "epoch": 111.168, "grad_norm": 0.24586330354213715, "learning_rate": 0.0002833423976959078, "loss": 3.344, "step": 694800 }, { "epoch": 111.184, "grad_norm": 0.289363831281662, "learning_rate": 0.00028333999759990396, "loss": 3.1629, "step": 694900 }, { "epoch": 111.2, "grad_norm": 0.28513431549072266, "learning_rate": 0.00028333759750390013, "loss": 3.3426, "step": 695000 }, { "epoch": 111.216, "grad_norm": 0.22257474064826965, "learning_rate": 0.0002833351974078963, "loss": 3.4605, "step": 695100 }, { "epoch": 111.232, "grad_norm": 0.26772117614746094, "learning_rate": 0.0002833327973118924, "loss": 3.2383, "step": 695200 }, { "epoch": 111.248, "grad_norm": 0.2713402509689331, "learning_rate": 0.00028333039721588864, "loss": 3.2202, "step": 695300 }, { "epoch": 111.264, "grad_norm": 0.26665595173835754, "learning_rate": 0.0002833279971198848, "loss": 3.728, "step": 695400 }, { "epoch": 111.28, "grad_norm": 0.2525724470615387, "learning_rate": 0.0002833255970238809, "loss": 3.2273, "step": 695500 }, { "epoch": 111.296, "grad_norm": 0.25077879428863525, "learning_rate": 0.0002833231969278771, "loss": 3.2846, "step": 695600 }, { "epoch": 111.312, "grad_norm": 0.2655024528503418, "learning_rate": 0.00028332079683187326, "loss": 3.3129, "step": 695700 }, { "epoch": 111.328, "grad_norm": 0.26893919706344604, "learning_rate": 0.00028331839673586943, "loss": 3.1743, "step": 695800 }, { "epoch": 111.344, "grad_norm": 0.22824645042419434, "learning_rate": 0.0002833159966398656, "loss": 3.261, "step": 695900 }, { "epoch": 111.36, "grad_norm": 0.26492342352867126, "learning_rate": 0.00028331359654386177, "loss": 3.3125, "step": 696000 }, { "epoch": 111.376, "grad_norm": 0.2652881145477295, "learning_rate": 0.0002833111964478579, "loss": 3.425, "step": 696100 }, { "epoch": 111.392, "grad_norm": 0.23992477357387543, "learning_rate": 0.00028330879635185405, "loss": 3.1909, "step": 696200 }, { "epoch": 111.408, "grad_norm": 0.24994248151779175, "learning_rate": 0.0002833063962558502, "loss": 3.5459, "step": 696300 }, { "epoch": 111.424, "grad_norm": 0.23871882259845734, "learning_rate": 0.0002833039961598464, "loss": 3.426, "step": 696400 }, { "epoch": 111.44, "grad_norm": 0.22574011981487274, "learning_rate": 0.00028330159606384256, "loss": 3.4541, "step": 696500 }, { "epoch": 111.456, "grad_norm": 0.24417616426944733, "learning_rate": 0.0002832991959678387, "loss": 3.3822, "step": 696600 }, { "epoch": 111.472, "grad_norm": 0.2514415979385376, "learning_rate": 0.00028329679587183485, "loss": 3.0937, "step": 696700 }, { "epoch": 111.488, "grad_norm": 0.227644145488739, "learning_rate": 0.00028329441977679103, "loss": 3.333, "step": 696800 }, { "epoch": 111.504, "grad_norm": 0.24092495441436768, "learning_rate": 0.0002832920196807872, "loss": 3.5715, "step": 696900 }, { "epoch": 111.52, "grad_norm": 0.2520754635334015, "learning_rate": 0.00028328961958478337, "loss": 2.9224, "step": 697000 }, { "epoch": 111.536, "grad_norm": 0.2371015101671219, "learning_rate": 0.00028328721948877954, "loss": 3.1406, "step": 697100 }, { "epoch": 111.552, "grad_norm": 0.26166656613349915, "learning_rate": 0.00028328481939277566, "loss": 3.5083, "step": 697200 }, { "epoch": 111.568, "grad_norm": 0.2511964738368988, "learning_rate": 0.0002832824192967718, "loss": 3.3109, "step": 697300 }, { "epoch": 111.584, "grad_norm": 0.26411423087120056, "learning_rate": 0.000283280019200768, "loss": 3.1816, "step": 697400 }, { "epoch": 111.6, "grad_norm": 0.23546847701072693, "learning_rate": 0.0002832776431057242, "loss": 3.3509, "step": 697500 }, { "epoch": 111.616, "grad_norm": 0.22910235822200775, "learning_rate": 0.00028327524300972035, "loss": 3.4316, "step": 697600 }, { "epoch": 111.632, "grad_norm": 0.24775634706020355, "learning_rate": 0.0002832728429137165, "loss": 3.27, "step": 697700 }, { "epoch": 111.648, "grad_norm": 0.2743050158023834, "learning_rate": 0.0002832704428177127, "loss": 3.2963, "step": 697800 }, { "epoch": 111.664, "grad_norm": 0.2384905070066452, "learning_rate": 0.00028326804272170886, "loss": 3.1535, "step": 697900 }, { "epoch": 111.68, "grad_norm": 0.253599613904953, "learning_rate": 0.00028326564262570503, "loss": 3.4493, "step": 698000 }, { "epoch": 111.696, "grad_norm": 0.23248180747032166, "learning_rate": 0.00028326324252970114, "loss": 3.4709, "step": 698100 }, { "epoch": 111.712, "grad_norm": 0.2088162899017334, "learning_rate": 0.0002832608424336973, "loss": 3.4165, "step": 698200 }, { "epoch": 111.728, "grad_norm": 0.24782414734363556, "learning_rate": 0.0002832584423376935, "loss": 3.5579, "step": 698300 }, { "epoch": 111.744, "grad_norm": 0.2584120035171509, "learning_rate": 0.00028325604224168965, "loss": 3.5214, "step": 698400 }, { "epoch": 111.76, "grad_norm": 0.22998690605163574, "learning_rate": 0.0002832536421456858, "loss": 3.266, "step": 698500 }, { "epoch": 111.776, "grad_norm": 0.242298886179924, "learning_rate": 0.00028325124204968194, "loss": 3.2504, "step": 698600 }, { "epoch": 111.792, "grad_norm": 0.2684089243412018, "learning_rate": 0.0002832488419536781, "loss": 3.4653, "step": 698700 }, { "epoch": 111.808, "grad_norm": 0.22471053898334503, "learning_rate": 0.0002832464418576743, "loss": 3.2776, "step": 698800 }, { "epoch": 111.824, "grad_norm": 0.23388497531414032, "learning_rate": 0.00028324404176167044, "loss": 3.426, "step": 698900 }, { "epoch": 111.84, "grad_norm": 0.2344246804714203, "learning_rate": 0.0002832416416656666, "loss": 3.6486, "step": 699000 }, { "epoch": 111.856, "grad_norm": 0.23144054412841797, "learning_rate": 0.0002832392415696628, "loss": 3.3853, "step": 699100 }, { "epoch": 111.872, "grad_norm": 0.26060977578163147, "learning_rate": 0.0002832368414736589, "loss": 3.5729, "step": 699200 }, { "epoch": 111.888, "grad_norm": 0.24162033200263977, "learning_rate": 0.00028323444137765507, "loss": 3.5223, "step": 699300 }, { "epoch": 111.904, "grad_norm": 0.21388567984104156, "learning_rate": 0.00028323204128165124, "loss": 3.6371, "step": 699400 }, { "epoch": 111.92, "grad_norm": 0.2791447341442108, "learning_rate": 0.0002832296411856474, "loss": 3.2788, "step": 699500 }, { "epoch": 111.936, "grad_norm": 0.23159286379814148, "learning_rate": 0.0002832272410896436, "loss": 3.6785, "step": 699600 }, { "epoch": 111.952, "grad_norm": 0.3105337619781494, "learning_rate": 0.0002832248409936397, "loss": 3.4161, "step": 699700 }, { "epoch": 111.968, "grad_norm": 0.2394779920578003, "learning_rate": 0.00028322244089763586, "loss": 3.2566, "step": 699800 }, { "epoch": 111.984, "grad_norm": 0.2327829748392105, "learning_rate": 0.000283220040801632, "loss": 3.3856, "step": 699900 }, { "epoch": 112.0, "grad_norm": 0.23543782532215118, "learning_rate": 0.0002832176407056282, "loss": 3.4793, "step": 700000 }, { "epoch": 112.016, "grad_norm": 0.25242942571640015, "learning_rate": 0.00028321524060962437, "loss": 3.0469, "step": 700100 }, { "epoch": 112.032, "grad_norm": 0.24384231865406036, "learning_rate": 0.00028321284051362053, "loss": 3.379, "step": 700200 }, { "epoch": 112.048, "grad_norm": 0.2469216138124466, "learning_rate": 0.00028321044041761665, "loss": 3.6162, "step": 700300 }, { "epoch": 112.064, "grad_norm": 0.25498148798942566, "learning_rate": 0.0002832080403216128, "loss": 3.2286, "step": 700400 }, { "epoch": 112.08, "grad_norm": 0.2699824571609497, "learning_rate": 0.000283205640225609, "loss": 3.1753, "step": 700500 }, { "epoch": 112.096, "grad_norm": 0.2751145660877228, "learning_rate": 0.00028320324012960516, "loss": 3.2458, "step": 700600 }, { "epoch": 112.112, "grad_norm": 0.23148493468761444, "learning_rate": 0.0002832008400336013, "loss": 3.4842, "step": 700700 }, { "epoch": 112.128, "grad_norm": 0.24788042902946472, "learning_rate": 0.00028319843993759744, "loss": 3.4705, "step": 700800 }, { "epoch": 112.144, "grad_norm": 0.2800045609474182, "learning_rate": 0.0002831960398415936, "loss": 3.5714, "step": 700900 }, { "epoch": 112.16, "grad_norm": 0.2880370020866394, "learning_rate": 0.0002831936397455898, "loss": 3.3564, "step": 701000 }, { "epoch": 112.176, "grad_norm": 0.26095157861709595, "learning_rate": 0.00028319123964958595, "loss": 3.2019, "step": 701100 }, { "epoch": 112.192, "grad_norm": 0.2513924837112427, "learning_rate": 0.0002831888395535821, "loss": 3.336, "step": 701200 }, { "epoch": 112.208, "grad_norm": 0.2576300799846649, "learning_rate": 0.0002831864394575783, "loss": 3.4092, "step": 701300 }, { "epoch": 112.224, "grad_norm": 0.28817862272262573, "learning_rate": 0.0002831840393615744, "loss": 3.3235, "step": 701400 }, { "epoch": 112.24, "grad_norm": 0.22965288162231445, "learning_rate": 0.0002831816392655706, "loss": 3.167, "step": 701500 }, { "epoch": 112.256, "grad_norm": 0.2572636902332306, "learning_rate": 0.0002831792391695668, "loss": 3.388, "step": 701600 }, { "epoch": 112.272, "grad_norm": 0.24507823586463928, "learning_rate": 0.0002831768390735629, "loss": 3.2904, "step": 701700 }, { "epoch": 112.288, "grad_norm": 0.26480549573898315, "learning_rate": 0.0002831744389775591, "loss": 3.2628, "step": 701800 }, { "epoch": 112.304, "grad_norm": 0.23393693566322327, "learning_rate": 0.00028317206288251527, "loss": 3.3375, "step": 701900 }, { "epoch": 112.32, "grad_norm": 0.29704412817955017, "learning_rate": 0.00028316966278651144, "loss": 3.6785, "step": 702000 }, { "epoch": 112.336, "grad_norm": 0.25575652718544006, "learning_rate": 0.0002831672626905076, "loss": 3.4199, "step": 702100 }, { "epoch": 112.352, "grad_norm": 0.2995404601097107, "learning_rate": 0.0002831648625945038, "loss": 3.5367, "step": 702200 }, { "epoch": 112.368, "grad_norm": 0.25039565563201904, "learning_rate": 0.0002831624624984999, "loss": 3.4664, "step": 702300 }, { "epoch": 112.384, "grad_norm": 0.37535813450813293, "learning_rate": 0.00028316006240249606, "loss": 3.6331, "step": 702400 }, { "epoch": 112.4, "grad_norm": 0.3281102478504181, "learning_rate": 0.00028315766230649223, "loss": 3.3799, "step": 702500 }, { "epoch": 112.416, "grad_norm": 0.23523707687854767, "learning_rate": 0.0002831552622104884, "loss": 3.7279, "step": 702600 }, { "epoch": 112.432, "grad_norm": 0.26115143299102783, "learning_rate": 0.00028315286211448457, "loss": 3.4296, "step": 702700 }, { "epoch": 112.448, "grad_norm": 0.25163009762763977, "learning_rate": 0.0002831504620184807, "loss": 3.6307, "step": 702800 }, { "epoch": 112.464, "grad_norm": 0.2507579028606415, "learning_rate": 0.00028314806192247685, "loss": 3.3733, "step": 702900 }, { "epoch": 112.48, "grad_norm": 0.26778313517570496, "learning_rate": 0.000283145661826473, "loss": 3.1869, "step": 703000 }, { "epoch": 112.496, "grad_norm": 0.2679891288280487, "learning_rate": 0.0002831432617304692, "loss": 3.3953, "step": 703100 }, { "epoch": 112.512, "grad_norm": 0.27026668190956116, "learning_rate": 0.00028314086163446536, "loss": 3.2687, "step": 703200 }, { "epoch": 112.528, "grad_norm": 0.2508668005466461, "learning_rate": 0.00028313846153846153, "loss": 3.3221, "step": 703300 }, { "epoch": 112.544, "grad_norm": 0.23915380239486694, "learning_rate": 0.00028313606144245764, "loss": 3.5665, "step": 703400 }, { "epoch": 112.56, "grad_norm": 0.2497367113828659, "learning_rate": 0.0002831336613464538, "loss": 3.4911, "step": 703500 }, { "epoch": 112.576, "grad_norm": 0.24986381828784943, "learning_rate": 0.00028313126125045, "loss": 3.3807, "step": 703600 }, { "epoch": 112.592, "grad_norm": 0.2526853680610657, "learning_rate": 0.00028312886115444615, "loss": 3.5786, "step": 703700 }, { "epoch": 112.608, "grad_norm": 0.40637916326522827, "learning_rate": 0.0002831264610584423, "loss": 3.3544, "step": 703800 }, { "epoch": 112.624, "grad_norm": 0.2687690556049347, "learning_rate": 0.0002831240849633985, "loss": 3.3824, "step": 703900 }, { "epoch": 112.64, "grad_norm": 0.26205042004585266, "learning_rate": 0.0002831216848673947, "loss": 3.3458, "step": 704000 }, { "epoch": 112.656, "grad_norm": 0.27584806084632874, "learning_rate": 0.00028311928477139085, "loss": 3.5565, "step": 704100 }, { "epoch": 112.672, "grad_norm": 0.25672757625579834, "learning_rate": 0.000283116884675387, "loss": 3.501, "step": 704200 }, { "epoch": 112.688, "grad_norm": 0.25835856795310974, "learning_rate": 0.00028311448457938313, "loss": 3.5172, "step": 704300 }, { "epoch": 112.704, "grad_norm": 0.25687384605407715, "learning_rate": 0.0002831120844833793, "loss": 3.6936, "step": 704400 }, { "epoch": 112.72, "grad_norm": 0.2360980063676834, "learning_rate": 0.00028310968438737547, "loss": 3.1226, "step": 704500 }, { "epoch": 112.736, "grad_norm": 0.24579311907291412, "learning_rate": 0.00028310728429137164, "loss": 3.6178, "step": 704600 }, { "epoch": 112.752, "grad_norm": 0.24657092988491058, "learning_rate": 0.0002831048841953678, "loss": 3.6744, "step": 704700 }, { "epoch": 112.768, "grad_norm": 0.23219206929206848, "learning_rate": 0.0002831024840993639, "loss": 3.7818, "step": 704800 }, { "epoch": 112.784, "grad_norm": 0.24069897830486298, "learning_rate": 0.0002831000840033601, "loss": 3.5106, "step": 704900 }, { "epoch": 112.8, "grad_norm": 0.2611320912837982, "learning_rate": 0.00028309768390735626, "loss": 3.3648, "step": 705000 }, { "epoch": 112.816, "grad_norm": 0.2411016821861267, "learning_rate": 0.00028309528381135243, "loss": 3.3225, "step": 705100 }, { "epoch": 112.832, "grad_norm": 0.25111010670661926, "learning_rate": 0.0002830928837153486, "loss": 3.2881, "step": 705200 }, { "epoch": 112.848, "grad_norm": 0.24118362367153168, "learning_rate": 0.00028309048361934477, "loss": 3.1679, "step": 705300 }, { "epoch": 112.864, "grad_norm": 0.2548289895057678, "learning_rate": 0.0002830880835233409, "loss": 3.1275, "step": 705400 }, { "epoch": 112.88, "grad_norm": 0.23075421154499054, "learning_rate": 0.00028308568342733705, "loss": 3.7669, "step": 705500 }, { "epoch": 112.896, "grad_norm": 0.2277739942073822, "learning_rate": 0.0002830832833313332, "loss": 3.7674, "step": 705600 }, { "epoch": 112.912, "grad_norm": 0.28233322501182556, "learning_rate": 0.0002830808832353294, "loss": 3.1727, "step": 705700 }, { "epoch": 112.928, "grad_norm": 0.24748297035694122, "learning_rate": 0.00028307848313932556, "loss": 3.5256, "step": 705800 }, { "epoch": 112.944, "grad_norm": 0.24218878149986267, "learning_rate": 0.0002830760830433217, "loss": 3.4606, "step": 705900 }, { "epoch": 112.96, "grad_norm": 0.2468445748090744, "learning_rate": 0.00028307368294731784, "loss": 3.4275, "step": 706000 }, { "epoch": 112.976, "grad_norm": 0.2162848562002182, "learning_rate": 0.000283071282851314, "loss": 3.1355, "step": 706100 }, { "epoch": 112.992, "grad_norm": 0.2809385657310486, "learning_rate": 0.00028306890675627026, "loss": 3.2749, "step": 706200 }, { "epoch": 113.008, "grad_norm": 0.24728211760520935, "learning_rate": 0.00028306650666026637, "loss": 3.1468, "step": 706300 }, { "epoch": 113.024, "grad_norm": 0.23327533900737762, "learning_rate": 0.00028306410656426254, "loss": 3.2259, "step": 706400 }, { "epoch": 113.04, "grad_norm": 0.25437256693840027, "learning_rate": 0.0002830617064682587, "loss": 3.5146, "step": 706500 }, { "epoch": 113.056, "grad_norm": 0.2467508167028427, "learning_rate": 0.0002830593063722549, "loss": 3.3546, "step": 706600 }, { "epoch": 113.072, "grad_norm": 0.29404351115226746, "learning_rate": 0.00028305690627625105, "loss": 3.3874, "step": 706700 }, { "epoch": 113.088, "grad_norm": 0.24162538349628448, "learning_rate": 0.00028305450618024716, "loss": 3.394, "step": 706800 }, { "epoch": 113.104, "grad_norm": 0.23747918009757996, "learning_rate": 0.00028305210608424333, "loss": 3.1008, "step": 706900 }, { "epoch": 113.12, "grad_norm": 0.28575026988983154, "learning_rate": 0.0002830497059882395, "loss": 3.5551, "step": 707000 }, { "epoch": 113.136, "grad_norm": 0.2838638722896576, "learning_rate": 0.00028304730589223567, "loss": 3.4553, "step": 707100 }, { "epoch": 113.152, "grad_norm": 0.2895011007785797, "learning_rate": 0.00028304490579623184, "loss": 3.3951, "step": 707200 }, { "epoch": 113.168, "grad_norm": 0.22953613102436066, "learning_rate": 0.000283042505700228, "loss": 3.693, "step": 707300 }, { "epoch": 113.184, "grad_norm": 0.2588672935962677, "learning_rate": 0.0002830401056042241, "loss": 3.1922, "step": 707400 }, { "epoch": 113.2, "grad_norm": 0.29651039838790894, "learning_rate": 0.0002830377055082203, "loss": 3.2498, "step": 707500 }, { "epoch": 113.216, "grad_norm": 0.3067834973335266, "learning_rate": 0.00028303530541221646, "loss": 3.4803, "step": 707600 }, { "epoch": 113.232, "grad_norm": 0.28011661767959595, "learning_rate": 0.00028303290531621263, "loss": 3.432, "step": 707700 }, { "epoch": 113.248, "grad_norm": 0.24989624321460724, "learning_rate": 0.0002830305052202088, "loss": 3.4818, "step": 707800 }, { "epoch": 113.264, "grad_norm": 0.2553512454032898, "learning_rate": 0.0002830281051242049, "loss": 3.4269, "step": 707900 }, { "epoch": 113.28, "grad_norm": 0.2464517056941986, "learning_rate": 0.0002830257050282011, "loss": 3.4337, "step": 708000 }, { "epoch": 113.296, "grad_norm": 0.2784627676010132, "learning_rate": 0.00028302330493219725, "loss": 3.2656, "step": 708100 }, { "epoch": 113.312, "grad_norm": 0.24361443519592285, "learning_rate": 0.0002830209048361934, "loss": 3.3107, "step": 708200 }, { "epoch": 113.328, "grad_norm": 0.2372400164604187, "learning_rate": 0.0002830185047401896, "loss": 3.4862, "step": 708300 }, { "epoch": 113.344, "grad_norm": 0.25915712118148804, "learning_rate": 0.00028301610464418576, "loss": 3.0497, "step": 708400 }, { "epoch": 113.36, "grad_norm": 0.3331905007362366, "learning_rate": 0.0002830137045481819, "loss": 3.5344, "step": 708500 }, { "epoch": 113.376, "grad_norm": 0.28822413086891174, "learning_rate": 0.00028301130445217805, "loss": 3.301, "step": 708600 }, { "epoch": 113.392, "grad_norm": 0.24839021265506744, "learning_rate": 0.0002830089043561742, "loss": 3.3895, "step": 708700 }, { "epoch": 113.408, "grad_norm": 0.2935023307800293, "learning_rate": 0.0002830065042601704, "loss": 3.3447, "step": 708800 }, { "epoch": 113.424, "grad_norm": 0.25703415274620056, "learning_rate": 0.00028300412816512657, "loss": 3.3134, "step": 708900 }, { "epoch": 113.44, "grad_norm": 0.3054325580596924, "learning_rate": 0.00028300172806912274, "loss": 3.2952, "step": 709000 }, { "epoch": 113.456, "grad_norm": 0.2375899702310562, "learning_rate": 0.0002829993279731189, "loss": 3.4216, "step": 709100 }, { "epoch": 113.472, "grad_norm": 0.29911863803863525, "learning_rate": 0.0002829969278771151, "loss": 3.5267, "step": 709200 }, { "epoch": 113.488, "grad_norm": 0.2569746971130371, "learning_rate": 0.00028299452778111125, "loss": 3.4241, "step": 709300 }, { "epoch": 113.504, "grad_norm": 0.3223182260990143, "learning_rate": 0.00028299212768510736, "loss": 3.5212, "step": 709400 }, { "epoch": 113.52, "grad_norm": 0.2800654470920563, "learning_rate": 0.00028298972758910353, "loss": 3.6693, "step": 709500 }, { "epoch": 113.536, "grad_norm": 0.22657354176044464, "learning_rate": 0.0002829873274930997, "loss": 3.5932, "step": 709600 }, { "epoch": 113.552, "grad_norm": 0.27916383743286133, "learning_rate": 0.00028298492739709587, "loss": 3.4419, "step": 709700 }, { "epoch": 113.568, "grad_norm": 0.2645629048347473, "learning_rate": 0.00028298252730109204, "loss": 3.4886, "step": 709800 }, { "epoch": 113.584, "grad_norm": 0.23743405938148499, "learning_rate": 0.00028298012720508816, "loss": 3.3637, "step": 709900 }, { "epoch": 113.6, "grad_norm": 0.26106029748916626, "learning_rate": 0.0002829777271090843, "loss": 3.2423, "step": 710000 }, { "epoch": 113.616, "grad_norm": 0.24535870552062988, "learning_rate": 0.0002829753270130805, "loss": 3.4124, "step": 710100 }, { "epoch": 113.632, "grad_norm": 0.29286354780197144, "learning_rate": 0.00028297292691707666, "loss": 3.5089, "step": 710200 }, { "epoch": 113.648, "grad_norm": 0.24544711410999298, "learning_rate": 0.00028297052682107283, "loss": 3.4162, "step": 710300 }, { "epoch": 113.664, "grad_norm": 0.2222941666841507, "learning_rate": 0.000282968126725069, "loss": 3.6651, "step": 710400 }, { "epoch": 113.68, "grad_norm": 0.2655826508998871, "learning_rate": 0.0002829657266290651, "loss": 3.2999, "step": 710500 }, { "epoch": 113.696, "grad_norm": 0.24868333339691162, "learning_rate": 0.0002829633265330613, "loss": 3.2406, "step": 710600 }, { "epoch": 113.712, "grad_norm": 0.25001734495162964, "learning_rate": 0.00028296092643705746, "loss": 3.3185, "step": 710700 }, { "epoch": 113.728, "grad_norm": 0.2854902446269989, "learning_rate": 0.0002829585263410536, "loss": 3.5968, "step": 710800 }, { "epoch": 113.744, "grad_norm": 0.2665022313594818, "learning_rate": 0.0002829561262450498, "loss": 3.7534, "step": 710900 }, { "epoch": 113.76, "grad_norm": 0.25650590658187866, "learning_rate": 0.0002829537261490459, "loss": 3.805, "step": 711000 }, { "epoch": 113.776, "grad_norm": 0.23813416063785553, "learning_rate": 0.0002829513260530421, "loss": 3.4499, "step": 711100 }, { "epoch": 113.792, "grad_norm": 0.24042657017707825, "learning_rate": 0.00028294892595703825, "loss": 3.1448, "step": 711200 }, { "epoch": 113.808, "grad_norm": 0.24195031821727753, "learning_rate": 0.0002829465258610344, "loss": 3.6443, "step": 711300 }, { "epoch": 113.824, "grad_norm": 0.26304054260253906, "learning_rate": 0.0002829441257650306, "loss": 3.7257, "step": 711400 }, { "epoch": 113.84, "grad_norm": 0.2396775782108307, "learning_rate": 0.00028294172566902675, "loss": 3.3987, "step": 711500 }, { "epoch": 113.856, "grad_norm": 0.22527167201042175, "learning_rate": 0.00028293932557302287, "loss": 3.2517, "step": 711600 }, { "epoch": 113.872, "grad_norm": 0.2827534079551697, "learning_rate": 0.00028293692547701904, "loss": 3.2057, "step": 711700 }, { "epoch": 113.888, "grad_norm": 0.2465319186449051, "learning_rate": 0.0002829345493819753, "loss": 3.4094, "step": 711800 }, { "epoch": 113.904, "grad_norm": 0.24909374117851257, "learning_rate": 0.0002829321492859714, "loss": 3.453, "step": 711900 }, { "epoch": 113.92, "grad_norm": 0.27631402015686035, "learning_rate": 0.00028292974918996757, "loss": 3.7651, "step": 712000 }, { "epoch": 113.936, "grad_norm": 0.23309600353240967, "learning_rate": 0.00028292734909396373, "loss": 3.393, "step": 712100 }, { "epoch": 113.952, "grad_norm": 0.25131940841674805, "learning_rate": 0.0002829249489979599, "loss": 3.5533, "step": 712200 }, { "epoch": 113.968, "grad_norm": 0.2750507593154907, "learning_rate": 0.0002829225489019561, "loss": 3.4646, "step": 712300 }, { "epoch": 113.984, "grad_norm": 0.22839730978012085, "learning_rate": 0.00028292014880595224, "loss": 3.2287, "step": 712400 }, { "epoch": 114.0, "grad_norm": 0.2306280881166458, "learning_rate": 0.00028291774870994836, "loss": 3.4285, "step": 712500 }, { "epoch": 114.016, "grad_norm": 0.24960674345493317, "learning_rate": 0.0002829153486139445, "loss": 3.2415, "step": 712600 }, { "epoch": 114.032, "grad_norm": 0.25186607241630554, "learning_rate": 0.0002829129485179407, "loss": 3.0419, "step": 712700 }, { "epoch": 114.048, "grad_norm": 0.23651187121868134, "learning_rate": 0.00028291054842193686, "loss": 3.4569, "step": 712800 }, { "epoch": 114.064, "grad_norm": 0.2016870081424713, "learning_rate": 0.00028290814832593303, "loss": 3.5673, "step": 712900 }, { "epoch": 114.08, "grad_norm": 0.26125824451446533, "learning_rate": 0.00028290574822992915, "loss": 3.3791, "step": 713000 }, { "epoch": 114.096, "grad_norm": 0.23844636976718903, "learning_rate": 0.0002829033481339253, "loss": 3.1896, "step": 713100 }, { "epoch": 114.112, "grad_norm": 0.24747252464294434, "learning_rate": 0.0002829009480379215, "loss": 3.3991, "step": 713200 }, { "epoch": 114.128, "grad_norm": 0.25846782326698303, "learning_rate": 0.00028289854794191766, "loss": 3.3077, "step": 713300 }, { "epoch": 114.144, "grad_norm": 0.23970943689346313, "learning_rate": 0.0002828961478459138, "loss": 3.3326, "step": 713400 }, { "epoch": 114.16, "grad_norm": 0.28521257638931274, "learning_rate": 0.00028289374774991, "loss": 3.3659, "step": 713500 }, { "epoch": 114.176, "grad_norm": 0.26147451996803284, "learning_rate": 0.0002828913476539061, "loss": 3.3848, "step": 713600 }, { "epoch": 114.192, "grad_norm": 0.2634747326374054, "learning_rate": 0.0002828889475579023, "loss": 3.21, "step": 713700 }, { "epoch": 114.208, "grad_norm": 0.3485942780971527, "learning_rate": 0.00028288654746189845, "loss": 3.4072, "step": 713800 }, { "epoch": 114.224, "grad_norm": 0.26971006393432617, "learning_rate": 0.0002828841473658946, "loss": 3.1635, "step": 713900 }, { "epoch": 114.24, "grad_norm": 0.2561601996421814, "learning_rate": 0.0002828817712708508, "loss": 3.0421, "step": 714000 }, { "epoch": 114.256, "grad_norm": 0.23710881173610687, "learning_rate": 0.000282879371174847, "loss": 3.5183, "step": 714100 }, { "epoch": 114.272, "grad_norm": 0.2587834596633911, "learning_rate": 0.00028287697107884314, "loss": 3.1893, "step": 714200 }, { "epoch": 114.288, "grad_norm": 0.29728561639785767, "learning_rate": 0.00028287459498379933, "loss": 3.2545, "step": 714300 }, { "epoch": 114.304, "grad_norm": 0.24662747979164124, "learning_rate": 0.0002828721948877955, "loss": 3.2661, "step": 714400 }, { "epoch": 114.32, "grad_norm": 0.23478160798549652, "learning_rate": 0.0002828697947917916, "loss": 3.4062, "step": 714500 }, { "epoch": 114.336, "grad_norm": 0.24710679054260254, "learning_rate": 0.0002828673946957878, "loss": 3.3139, "step": 714600 }, { "epoch": 114.352, "grad_norm": 0.26431888341903687, "learning_rate": 0.00028286499459978396, "loss": 3.4286, "step": 714700 }, { "epoch": 114.368, "grad_norm": 0.2843762934207916, "learning_rate": 0.0002828625945037801, "loss": 3.595, "step": 714800 }, { "epoch": 114.384, "grad_norm": 0.24814815819263458, "learning_rate": 0.0002828601944077763, "loss": 3.4154, "step": 714900 }, { "epoch": 114.4, "grad_norm": 0.2742476761341095, "learning_rate": 0.0002828577943117724, "loss": 3.2344, "step": 715000 }, { "epoch": 114.416, "grad_norm": 0.24269410967826843, "learning_rate": 0.0002828553942157686, "loss": 3.314, "step": 715100 }, { "epoch": 114.432, "grad_norm": 0.2841466963291168, "learning_rate": 0.00028285299411976475, "loss": 3.1555, "step": 715200 }, { "epoch": 114.448, "grad_norm": 0.2734326124191284, "learning_rate": 0.0002828505940237609, "loss": 3.4018, "step": 715300 }, { "epoch": 114.464, "grad_norm": 0.28492745757102966, "learning_rate": 0.0002828481939277571, "loss": 3.4247, "step": 715400 }, { "epoch": 114.48, "grad_norm": 0.2521827816963196, "learning_rate": 0.00028284579383175325, "loss": 3.7215, "step": 715500 }, { "epoch": 114.496, "grad_norm": 0.2472129613161087, "learning_rate": 0.00028284339373574937, "loss": 3.2641, "step": 715600 }, { "epoch": 114.512, "grad_norm": 0.26739317178726196, "learning_rate": 0.00028284099363974554, "loss": 3.5833, "step": 715700 }, { "epoch": 114.528, "grad_norm": 0.2798217833042145, "learning_rate": 0.0002828385935437417, "loss": 3.6621, "step": 715800 }, { "epoch": 114.544, "grad_norm": 0.2568438947200775, "learning_rate": 0.0002828361934477379, "loss": 3.486, "step": 715900 }, { "epoch": 114.56, "grad_norm": 0.24901685118675232, "learning_rate": 0.00028283379335173405, "loss": 3.541, "step": 716000 }, { "epoch": 114.576, "grad_norm": 0.2462301105260849, "learning_rate": 0.0002828313932557302, "loss": 3.617, "step": 716100 }, { "epoch": 114.592, "grad_norm": 0.2799660563468933, "learning_rate": 0.0002828289931597264, "loss": 3.5447, "step": 716200 }, { "epoch": 114.608, "grad_norm": 0.25878942012786865, "learning_rate": 0.00028282659306372255, "loss": 3.3364, "step": 716300 }, { "epoch": 114.624, "grad_norm": 0.25806915760040283, "learning_rate": 0.0002828241929677187, "loss": 3.5324, "step": 716400 }, { "epoch": 114.64, "grad_norm": 0.2444324791431427, "learning_rate": 0.00028282179287171484, "loss": 3.2642, "step": 716500 }, { "epoch": 114.656, "grad_norm": 0.27949032187461853, "learning_rate": 0.000282819392775711, "loss": 3.1824, "step": 716600 }, { "epoch": 114.672, "grad_norm": 0.2559106647968292, "learning_rate": 0.0002828169926797072, "loss": 3.6811, "step": 716700 }, { "epoch": 114.688, "grad_norm": 0.2398473471403122, "learning_rate": 0.00028281459258370335, "loss": 3.5797, "step": 716800 }, { "epoch": 114.704, "grad_norm": 0.2332819551229477, "learning_rate": 0.0002828121924876995, "loss": 3.2697, "step": 716900 }, { "epoch": 114.72, "grad_norm": 0.2629305422306061, "learning_rate": 0.00028280979239169563, "loss": 3.7366, "step": 717000 }, { "epoch": 114.736, "grad_norm": 0.2582314908504486, "learning_rate": 0.0002828073922956918, "loss": 3.5992, "step": 717100 }, { "epoch": 114.752, "grad_norm": 0.23168492317199707, "learning_rate": 0.00028280499219968797, "loss": 3.4231, "step": 717200 }, { "epoch": 114.768, "grad_norm": 0.26670923829078674, "learning_rate": 0.00028280259210368414, "loss": 3.382, "step": 717300 }, { "epoch": 114.784, "grad_norm": 0.27037569880485535, "learning_rate": 0.0002828001920076803, "loss": 3.7863, "step": 717400 }, { "epoch": 114.8, "grad_norm": 0.24236153066158295, "learning_rate": 0.0002827977919116765, "loss": 3.4003, "step": 717500 }, { "epoch": 114.816, "grad_norm": 0.25273558497428894, "learning_rate": 0.0002827953918156726, "loss": 3.3418, "step": 717600 }, { "epoch": 114.832, "grad_norm": 0.23722237348556519, "learning_rate": 0.00028279299171966876, "loss": 3.2485, "step": 717700 }, { "epoch": 114.848, "grad_norm": 0.2493530809879303, "learning_rate": 0.00028279059162366493, "loss": 3.2673, "step": 717800 }, { "epoch": 114.864, "grad_norm": 0.26313233375549316, "learning_rate": 0.0002827881915276611, "loss": 3.2029, "step": 717900 }, { "epoch": 114.88, "grad_norm": 0.23979316651821136, "learning_rate": 0.0002827858154326173, "loss": 3.6976, "step": 718000 }, { "epoch": 114.896, "grad_norm": 0.284667044878006, "learning_rate": 0.0002827834153366134, "loss": 3.5282, "step": 718100 }, { "epoch": 114.912, "grad_norm": 0.25656697154045105, "learning_rate": 0.00028278101524060957, "loss": 3.424, "step": 718200 }, { "epoch": 114.928, "grad_norm": 0.23854251205921173, "learning_rate": 0.00028277861514460574, "loss": 3.7087, "step": 718300 }, { "epoch": 114.944, "grad_norm": 0.24995991587638855, "learning_rate": 0.0002827762150486019, "loss": 3.1966, "step": 718400 }, { "epoch": 114.96, "grad_norm": 0.2615502178668976, "learning_rate": 0.0002827738149525981, "loss": 3.6109, "step": 718500 }, { "epoch": 114.976, "grad_norm": 0.25297093391418457, "learning_rate": 0.00028277141485659425, "loss": 3.4477, "step": 718600 }, { "epoch": 114.992, "grad_norm": 0.2530820071697235, "learning_rate": 0.00028276901476059036, "loss": 3.0568, "step": 718700 }, { "epoch": 115.008, "grad_norm": 0.26478704810142517, "learning_rate": 0.00028276661466458653, "loss": 3.3984, "step": 718800 }, { "epoch": 115.024, "grad_norm": 0.284270316362381, "learning_rate": 0.0002827642145685827, "loss": 3.1656, "step": 718900 }, { "epoch": 115.04, "grad_norm": 0.27693235874176025, "learning_rate": 0.00028276181447257887, "loss": 3.3561, "step": 719000 }, { "epoch": 115.056, "grad_norm": 0.30297908186912537, "learning_rate": 0.00028275941437657504, "loss": 3.1478, "step": 719100 }, { "epoch": 115.072, "grad_norm": 0.24701125919818878, "learning_rate": 0.0002827570142805712, "loss": 3.4802, "step": 719200 }, { "epoch": 115.088, "grad_norm": 0.23680268228054047, "learning_rate": 0.0002827546141845674, "loss": 3.4466, "step": 719300 }, { "epoch": 115.104, "grad_norm": 0.24459490180015564, "learning_rate": 0.00028275221408856355, "loss": 3.887, "step": 719400 }, { "epoch": 115.12, "grad_norm": 0.2961501479148865, "learning_rate": 0.0002827498139925597, "loss": 3.3826, "step": 719500 }, { "epoch": 115.136, "grad_norm": 0.2521384656429291, "learning_rate": 0.00028274741389655583, "loss": 3.2218, "step": 719600 }, { "epoch": 115.152, "grad_norm": 0.28566116094589233, "learning_rate": 0.000282745013800552, "loss": 3.3331, "step": 719700 }, { "epoch": 115.168, "grad_norm": 0.24146407842636108, "learning_rate": 0.00028274261370454817, "loss": 3.6292, "step": 719800 }, { "epoch": 115.184, "grad_norm": 0.24766694009304047, "learning_rate": 0.00028274021360854434, "loss": 3.6436, "step": 719900 }, { "epoch": 115.2, "grad_norm": 0.25487223267555237, "learning_rate": 0.0002827378135125405, "loss": 3.3208, "step": 720000 }, { "epoch": 115.216, "grad_norm": 0.2387288212776184, "learning_rate": 0.0002827354134165366, "loss": 3.6141, "step": 720100 }, { "epoch": 115.232, "grad_norm": 0.2441420555114746, "learning_rate": 0.0002827330133205328, "loss": 3.453, "step": 720200 }, { "epoch": 115.248, "grad_norm": 0.25408095121383667, "learning_rate": 0.00028273061322452896, "loss": 3.6247, "step": 720300 }, { "epoch": 115.264, "grad_norm": 0.26910626888275146, "learning_rate": 0.00028272821312852513, "loss": 3.6283, "step": 720400 }, { "epoch": 115.28, "grad_norm": 0.27585506439208984, "learning_rate": 0.0002827258130325213, "loss": 3.5211, "step": 720500 }, { "epoch": 115.296, "grad_norm": 0.2614658772945404, "learning_rate": 0.00028272341293651747, "loss": 2.9337, "step": 720600 }, { "epoch": 115.312, "grad_norm": 0.27684807777404785, "learning_rate": 0.0002827210128405136, "loss": 3.4368, "step": 720700 }, { "epoch": 115.328, "grad_norm": 0.2943115532398224, "learning_rate": 0.00028271861274450975, "loss": 3.2999, "step": 720800 }, { "epoch": 115.344, "grad_norm": 0.25693053007125854, "learning_rate": 0.0002827162126485059, "loss": 3.3081, "step": 720900 }, { "epoch": 115.36, "grad_norm": 0.26241758465766907, "learning_rate": 0.0002827138125525021, "loss": 3.2453, "step": 721000 }, { "epoch": 115.376, "grad_norm": 0.2463793307542801, "learning_rate": 0.00028271141245649826, "loss": 3.3798, "step": 721100 }, { "epoch": 115.392, "grad_norm": 0.2507757842540741, "learning_rate": 0.0002827090363614544, "loss": 3.6653, "step": 721200 }, { "epoch": 115.408, "grad_norm": 0.2571842074394226, "learning_rate": 0.00028270663626545056, "loss": 3.2774, "step": 721300 }, { "epoch": 115.424, "grad_norm": 0.2555173337459564, "learning_rate": 0.00028270423616944673, "loss": 3.2487, "step": 721400 }, { "epoch": 115.44, "grad_norm": 0.25485658645629883, "learning_rate": 0.0002827018360734429, "loss": 3.5282, "step": 721500 }, { "epoch": 115.456, "grad_norm": 0.27015218138694763, "learning_rate": 0.00028269943597743907, "loss": 3.9681, "step": 721600 }, { "epoch": 115.472, "grad_norm": 0.25159966945648193, "learning_rate": 0.00028269703588143524, "loss": 3.6744, "step": 721700 }, { "epoch": 115.488, "grad_norm": 0.23280364274978638, "learning_rate": 0.00028269463578543136, "loss": 3.3561, "step": 721800 }, { "epoch": 115.504, "grad_norm": 0.3762775659561157, "learning_rate": 0.0002826922356894275, "loss": 3.0697, "step": 721900 }, { "epoch": 115.52, "grad_norm": 0.22996415197849274, "learning_rate": 0.0002826898355934237, "loss": 3.4757, "step": 722000 }, { "epoch": 115.536, "grad_norm": 0.26372480392456055, "learning_rate": 0.00028268743549741986, "loss": 3.3642, "step": 722100 }, { "epoch": 115.552, "grad_norm": 0.3056419789791107, "learning_rate": 0.00028268503540141603, "loss": 3.3597, "step": 722200 }, { "epoch": 115.568, "grad_norm": 0.2406572550535202, "learning_rate": 0.0002826826353054122, "loss": 3.469, "step": 722300 }, { "epoch": 115.584, "grad_norm": 0.24243126809597015, "learning_rate": 0.00028268023520940837, "loss": 3.6087, "step": 722400 }, { "epoch": 115.6, "grad_norm": 0.23950238525867462, "learning_rate": 0.00028267783511340454, "loss": 3.4481, "step": 722500 }, { "epoch": 115.616, "grad_norm": 0.29371845722198486, "learning_rate": 0.0002826754350174007, "loss": 3.4637, "step": 722600 }, { "epoch": 115.632, "grad_norm": 0.2362532764673233, "learning_rate": 0.0002826730349213968, "loss": 3.3699, "step": 722700 }, { "epoch": 115.648, "grad_norm": 0.353241503238678, "learning_rate": 0.000282670634825393, "loss": 3.3333, "step": 722800 }, { "epoch": 115.664, "grad_norm": 0.2315215915441513, "learning_rate": 0.00028266823472938916, "loss": 3.4313, "step": 722900 }, { "epoch": 115.68, "grad_norm": 0.30195921659469604, "learning_rate": 0.00028266583463338533, "loss": 3.2781, "step": 723000 }, { "epoch": 115.696, "grad_norm": 0.23445023596286774, "learning_rate": 0.0002826634345373815, "loss": 3.755, "step": 723100 }, { "epoch": 115.712, "grad_norm": 0.23352116346359253, "learning_rate": 0.0002826610344413776, "loss": 3.2928, "step": 723200 }, { "epoch": 115.728, "grad_norm": 0.2572716176509857, "learning_rate": 0.0002826586343453738, "loss": 3.3952, "step": 723300 }, { "epoch": 115.744, "grad_norm": 0.266006201505661, "learning_rate": 0.00028265623424936995, "loss": 3.4793, "step": 723400 }, { "epoch": 115.76, "grad_norm": 0.3892303705215454, "learning_rate": 0.0002826538341533661, "loss": 3.4241, "step": 723500 }, { "epoch": 115.776, "grad_norm": 0.22728653252124786, "learning_rate": 0.0002826514340573623, "loss": 3.506, "step": 723600 }, { "epoch": 115.792, "grad_norm": 0.22989623248577118, "learning_rate": 0.00028264903396135846, "loss": 3.5478, "step": 723700 }, { "epoch": 115.808, "grad_norm": 0.27508121728897095, "learning_rate": 0.0002826466338653546, "loss": 3.4781, "step": 723800 }, { "epoch": 115.824, "grad_norm": 0.2624329626560211, "learning_rate": 0.00028264423376935075, "loss": 3.5936, "step": 723900 }, { "epoch": 115.84, "grad_norm": 0.27125710248947144, "learning_rate": 0.0002826418336733469, "loss": 3.3417, "step": 724000 }, { "epoch": 115.856, "grad_norm": 0.24917005002498627, "learning_rate": 0.0002826394335773431, "loss": 3.597, "step": 724100 }, { "epoch": 115.872, "grad_norm": 0.26362353563308716, "learning_rate": 0.00028263703348133925, "loss": 3.2249, "step": 724200 }, { "epoch": 115.888, "grad_norm": 0.2743411064147949, "learning_rate": 0.0002826346333853354, "loss": 3.233, "step": 724300 }, { "epoch": 115.904, "grad_norm": 0.23329561948776245, "learning_rate": 0.00028263223328933154, "loss": 3.4044, "step": 724400 }, { "epoch": 115.92, "grad_norm": 0.26769864559173584, "learning_rate": 0.0002826298331933277, "loss": 3.5952, "step": 724500 }, { "epoch": 115.936, "grad_norm": 0.20854608714580536, "learning_rate": 0.0002826274330973239, "loss": 3.3758, "step": 724600 }, { "epoch": 115.952, "grad_norm": 0.2617352604866028, "learning_rate": 0.00028262503300132005, "loss": 3.5792, "step": 724700 }, { "epoch": 115.968, "grad_norm": 0.2758657932281494, "learning_rate": 0.0002826226329053162, "loss": 3.4219, "step": 724800 }, { "epoch": 115.984, "grad_norm": 0.24562695622444153, "learning_rate": 0.00028262023280931233, "loss": 3.4762, "step": 724900 }, { "epoch": 116.0, "grad_norm": 0.27617982029914856, "learning_rate": 0.0002826178327133085, "loss": 3.3249, "step": 725000 }, { "epoch": 116.016, "grad_norm": 0.25852587819099426, "learning_rate": 0.00028261543261730467, "loss": 3.3709, "step": 725100 }, { "epoch": 116.032, "grad_norm": 0.23600706458091736, "learning_rate": 0.00028261303252130084, "loss": 3.4289, "step": 725200 }, { "epoch": 116.048, "grad_norm": 0.27892422676086426, "learning_rate": 0.000282610632425297, "loss": 3.6522, "step": 725300 }, { "epoch": 116.064, "grad_norm": 0.2347465604543686, "learning_rate": 0.0002826082323292932, "loss": 3.4049, "step": 725400 }, { "epoch": 116.08, "grad_norm": 0.3164409399032593, "learning_rate": 0.0002826058322332893, "loss": 2.9706, "step": 725500 }, { "epoch": 116.096, "grad_norm": 0.2578372359275818, "learning_rate": 0.00028260343213728546, "loss": 3.0914, "step": 725600 }, { "epoch": 116.112, "grad_norm": 0.2533608675003052, "learning_rate": 0.00028260103204128163, "loss": 3.3314, "step": 725700 }, { "epoch": 116.128, "grad_norm": 0.24831263720989227, "learning_rate": 0.0002825986319452778, "loss": 3.681, "step": 725800 }, { "epoch": 116.144, "grad_norm": 0.24624529480934143, "learning_rate": 0.00028259623184927397, "loss": 3.465, "step": 725900 }, { "epoch": 116.16, "grad_norm": 0.24719960987567902, "learning_rate": 0.0002825938317532701, "loss": 3.4299, "step": 726000 }, { "epoch": 116.176, "grad_norm": 0.21949611604213715, "learning_rate": 0.00028259143165726625, "loss": 3.4271, "step": 726100 }, { "epoch": 116.192, "grad_norm": 0.26238951086997986, "learning_rate": 0.0002825890315612624, "loss": 3.4393, "step": 726200 }, { "epoch": 116.208, "grad_norm": 0.2507668435573578, "learning_rate": 0.0002825866314652586, "loss": 3.2446, "step": 726300 }, { "epoch": 116.224, "grad_norm": 0.26609042286872864, "learning_rate": 0.0002825842553702148, "loss": 3.3158, "step": 726400 }, { "epoch": 116.24, "grad_norm": 0.2899859845638275, "learning_rate": 0.00028258185527421095, "loss": 3.8001, "step": 726500 }, { "epoch": 116.256, "grad_norm": 0.2592322528362274, "learning_rate": 0.0002825794551782071, "loss": 3.4098, "step": 726600 }, { "epoch": 116.272, "grad_norm": 0.25181737542152405, "learning_rate": 0.0002825770550822033, "loss": 3.4105, "step": 726700 }, { "epoch": 116.288, "grad_norm": 0.25866058468818665, "learning_rate": 0.00028257465498619946, "loss": 3.6337, "step": 726800 }, { "epoch": 116.304, "grad_norm": 0.2432262897491455, "learning_rate": 0.00028257225489019557, "loss": 3.3225, "step": 726900 }, { "epoch": 116.32, "grad_norm": 0.2858797311782837, "learning_rate": 0.00028256985479419174, "loss": 3.4809, "step": 727000 }, { "epoch": 116.336, "grad_norm": 0.2440464347600937, "learning_rate": 0.0002825674546981879, "loss": 3.5394, "step": 727100 }, { "epoch": 116.352, "grad_norm": 0.2593330144882202, "learning_rate": 0.0002825650546021841, "loss": 3.3472, "step": 727200 }, { "epoch": 116.368, "grad_norm": 0.2723061144351959, "learning_rate": 0.00028256265450618025, "loss": 3.5307, "step": 727300 }, { "epoch": 116.384, "grad_norm": 0.27521413564682007, "learning_rate": 0.0002825602544101764, "loss": 3.389, "step": 727400 }, { "epoch": 116.4, "grad_norm": 0.23234981298446655, "learning_rate": 0.00028255785431417253, "loss": 3.492, "step": 727500 }, { "epoch": 116.416, "grad_norm": 0.2340625673532486, "learning_rate": 0.0002825554542181687, "loss": 3.8432, "step": 727600 }, { "epoch": 116.432, "grad_norm": 0.323416531085968, "learning_rate": 0.00028255305412216487, "loss": 3.2957, "step": 727700 }, { "epoch": 116.448, "grad_norm": 0.2510561943054199, "learning_rate": 0.00028255065402616104, "loss": 3.6233, "step": 727800 }, { "epoch": 116.464, "grad_norm": 0.23543940484523773, "learning_rate": 0.00028254827793111723, "loss": 3.1727, "step": 727900 }, { "epoch": 116.48, "grad_norm": 0.28134143352508545, "learning_rate": 0.00028254587783511334, "loss": 3.3871, "step": 728000 }, { "epoch": 116.496, "grad_norm": 0.28129273653030396, "learning_rate": 0.0002825434777391095, "loss": 3.2762, "step": 728100 }, { "epoch": 116.512, "grad_norm": 0.2544237971305847, "learning_rate": 0.0002825410776431057, "loss": 3.3669, "step": 728200 }, { "epoch": 116.528, "grad_norm": 0.2603227198123932, "learning_rate": 0.00028253867754710185, "loss": 3.5626, "step": 728300 }, { "epoch": 116.544, "grad_norm": 0.3049345910549164, "learning_rate": 0.000282536277451098, "loss": 3.3586, "step": 728400 }, { "epoch": 116.56, "grad_norm": 0.24395814538002014, "learning_rate": 0.0002825338773550942, "loss": 3.7744, "step": 728500 }, { "epoch": 116.576, "grad_norm": 0.2513374984264374, "learning_rate": 0.00028253147725909036, "loss": 3.2384, "step": 728600 }, { "epoch": 116.592, "grad_norm": 0.2515411376953125, "learning_rate": 0.0002825290771630865, "loss": 3.4901, "step": 728700 }, { "epoch": 116.608, "grad_norm": 0.2492273449897766, "learning_rate": 0.0002825266770670827, "loss": 3.4719, "step": 728800 }, { "epoch": 116.624, "grad_norm": 0.2541722357273102, "learning_rate": 0.0002825242769710788, "loss": 3.3892, "step": 728900 }, { "epoch": 116.64, "grad_norm": 0.2635802924633026, "learning_rate": 0.000282521876875075, "loss": 3.788, "step": 729000 }, { "epoch": 116.656, "grad_norm": 0.24888859689235687, "learning_rate": 0.00028251947677907115, "loss": 3.3913, "step": 729100 }, { "epoch": 116.672, "grad_norm": 0.22501437366008759, "learning_rate": 0.0002825170766830673, "loss": 3.5568, "step": 729200 }, { "epoch": 116.688, "grad_norm": 0.23594221472740173, "learning_rate": 0.0002825146765870635, "loss": 3.3778, "step": 729300 }, { "epoch": 116.704, "grad_norm": 0.24976284801959991, "learning_rate": 0.00028251227649105966, "loss": 3.1972, "step": 729400 }, { "epoch": 116.72, "grad_norm": 0.2474806010723114, "learning_rate": 0.00028250987639505577, "loss": 3.3082, "step": 729500 }, { "epoch": 116.736, "grad_norm": 0.27343297004699707, "learning_rate": 0.00028250747629905194, "loss": 3.5639, "step": 729600 }, { "epoch": 116.752, "grad_norm": 0.26858264207839966, "learning_rate": 0.0002825050762030481, "loss": 3.4747, "step": 729700 }, { "epoch": 116.768, "grad_norm": 0.26976728439331055, "learning_rate": 0.0002825026761070443, "loss": 3.4624, "step": 729800 }, { "epoch": 116.784, "grad_norm": 0.25205498933792114, "learning_rate": 0.00028250027601104045, "loss": 3.5908, "step": 729900 }, { "epoch": 116.8, "grad_norm": 0.2739222049713135, "learning_rate": 0.00028249787591503656, "loss": 3.434, "step": 730000 }, { "epoch": 116.816, "grad_norm": 0.24399392306804657, "learning_rate": 0.00028249547581903273, "loss": 3.5695, "step": 730100 }, { "epoch": 116.832, "grad_norm": 0.24581046402454376, "learning_rate": 0.0002824930757230289, "loss": 3.2943, "step": 730200 }, { "epoch": 116.848, "grad_norm": 0.2535524368286133, "learning_rate": 0.00028249067562702507, "loss": 3.4768, "step": 730300 }, { "epoch": 116.864, "grad_norm": 0.24015963077545166, "learning_rate": 0.00028248827553102124, "loss": 3.6008, "step": 730400 }, { "epoch": 116.88, "grad_norm": 0.2663058042526245, "learning_rate": 0.0002824858754350174, "loss": 3.8637, "step": 730500 }, { "epoch": 116.896, "grad_norm": 0.23800675570964813, "learning_rate": 0.0002824834753390135, "loss": 3.4922, "step": 730600 }, { "epoch": 116.912, "grad_norm": 0.2448318898677826, "learning_rate": 0.0002824810752430097, "loss": 3.4704, "step": 730700 }, { "epoch": 116.928, "grad_norm": 0.2610863447189331, "learning_rate": 0.00028247867514700586, "loss": 3.3313, "step": 730800 }, { "epoch": 116.944, "grad_norm": 0.3328198790550232, "learning_rate": 0.00028247627505100203, "loss": 3.3931, "step": 730900 }, { "epoch": 116.96, "grad_norm": 0.215900257229805, "learning_rate": 0.0002824738749549982, "loss": 3.3345, "step": 731000 }, { "epoch": 116.976, "grad_norm": 0.246438667178154, "learning_rate": 0.0002824714748589943, "loss": 3.6799, "step": 731100 }, { "epoch": 116.992, "grad_norm": 0.27215370535850525, "learning_rate": 0.0002824690747629905, "loss": 3.2867, "step": 731200 }, { "epoch": 117.008, "grad_norm": 0.29140496253967285, "learning_rate": 0.00028246667466698665, "loss": 3.2722, "step": 731300 }, { "epoch": 117.024, "grad_norm": 0.23444639146327972, "learning_rate": 0.0002824642745709828, "loss": 3.2207, "step": 731400 }, { "epoch": 117.04, "grad_norm": 0.23386017978191376, "learning_rate": 0.000282461874474979, "loss": 3.4502, "step": 731500 }, { "epoch": 117.056, "grad_norm": 0.3018714487552643, "learning_rate": 0.00028245947437897516, "loss": 3.4484, "step": 731600 }, { "epoch": 117.072, "grad_norm": 0.29038986563682556, "learning_rate": 0.0002824570742829713, "loss": 3.2906, "step": 731700 }, { "epoch": 117.088, "grad_norm": 0.2570224404335022, "learning_rate": 0.00028245467418696745, "loss": 3.5017, "step": 731800 }, { "epoch": 117.104, "grad_norm": 0.276434063911438, "learning_rate": 0.0002824522740909636, "loss": 3.5499, "step": 731900 }, { "epoch": 117.12, "grad_norm": 0.26440420746803284, "learning_rate": 0.0002824498739949598, "loss": 3.4815, "step": 732000 }, { "epoch": 117.136, "grad_norm": 0.29553431272506714, "learning_rate": 0.00028244747389895595, "loss": 3.4369, "step": 732100 }, { "epoch": 117.152, "grad_norm": 0.2609844505786896, "learning_rate": 0.00028244507380295207, "loss": 3.4218, "step": 732200 }, { "epoch": 117.168, "grad_norm": 0.22543680667877197, "learning_rate": 0.00028244267370694824, "loss": 3.4954, "step": 732300 }, { "epoch": 117.184, "grad_norm": 0.27590006589889526, "learning_rate": 0.0002824402736109444, "loss": 3.6179, "step": 732400 }, { "epoch": 117.2, "grad_norm": 0.278642863035202, "learning_rate": 0.0002824378735149406, "loss": 3.4173, "step": 732500 }, { "epoch": 117.216, "grad_norm": 0.2769939601421356, "learning_rate": 0.00028243547341893675, "loss": 3.4521, "step": 732600 }, { "epoch": 117.232, "grad_norm": 0.2360726296901703, "learning_rate": 0.00028243309732389293, "loss": 3.3826, "step": 732700 }, { "epoch": 117.248, "grad_norm": 0.263240784406662, "learning_rate": 0.0002824306972278891, "loss": 3.448, "step": 732800 }, { "epoch": 117.264, "grad_norm": 0.2619096040725708, "learning_rate": 0.00028242829713188527, "loss": 3.5686, "step": 732900 }, { "epoch": 117.28, "grad_norm": 0.26582014560699463, "learning_rate": 0.00028242589703588144, "loss": 3.1008, "step": 733000 }, { "epoch": 117.296, "grad_norm": 0.24797554314136505, "learning_rate": 0.00028242349693987756, "loss": 3.4847, "step": 733100 }, { "epoch": 117.312, "grad_norm": 0.2668558359146118, "learning_rate": 0.0002824210968438737, "loss": 3.5259, "step": 733200 }, { "epoch": 117.328, "grad_norm": 0.2426624447107315, "learning_rate": 0.0002824186967478699, "loss": 3.5301, "step": 733300 }, { "epoch": 117.344, "grad_norm": 0.2450082153081894, "learning_rate": 0.00028241629665186606, "loss": 3.6702, "step": 733400 }, { "epoch": 117.36, "grad_norm": 0.32349541783332825, "learning_rate": 0.00028241389655586223, "loss": 3.5245, "step": 733500 }, { "epoch": 117.376, "grad_norm": 0.2681833505630493, "learning_rate": 0.0002824114964598584, "loss": 3.2465, "step": 733600 }, { "epoch": 117.392, "grad_norm": 0.2646019458770752, "learning_rate": 0.0002824090963638545, "loss": 3.5732, "step": 733700 }, { "epoch": 117.408, "grad_norm": 0.2534937560558319, "learning_rate": 0.0002824066962678507, "loss": 3.4963, "step": 733800 }, { "epoch": 117.424, "grad_norm": 0.3316575884819031, "learning_rate": 0.00028240429617184686, "loss": 3.3091, "step": 733900 }, { "epoch": 117.44, "grad_norm": 0.2683049440383911, "learning_rate": 0.000282401896075843, "loss": 3.4817, "step": 734000 }, { "epoch": 117.456, "grad_norm": 0.26970794796943665, "learning_rate": 0.0002823994959798392, "loss": 3.3915, "step": 734100 }, { "epoch": 117.472, "grad_norm": 0.27873656153678894, "learning_rate": 0.0002823970958838353, "loss": 3.4914, "step": 734200 }, { "epoch": 117.488, "grad_norm": 0.2708478569984436, "learning_rate": 0.0002823946957878315, "loss": 3.3675, "step": 734300 }, { "epoch": 117.504, "grad_norm": 0.2452394664287567, "learning_rate": 0.00028239229569182765, "loss": 3.1924, "step": 734400 }, { "epoch": 117.52, "grad_norm": 0.23963187634944916, "learning_rate": 0.0002823898955958238, "loss": 3.6351, "step": 734500 }, { "epoch": 117.536, "grad_norm": 0.2821185290813446, "learning_rate": 0.00028238749549982, "loss": 3.6449, "step": 734600 }, { "epoch": 117.552, "grad_norm": 0.24845242500305176, "learning_rate": 0.0002823851194047762, "loss": 3.4275, "step": 734700 }, { "epoch": 117.568, "grad_norm": 0.25804081559181213, "learning_rate": 0.00028238271930877234, "loss": 3.0738, "step": 734800 }, { "epoch": 117.584, "grad_norm": 0.25648200511932373, "learning_rate": 0.0002823803192127685, "loss": 3.497, "step": 734900 }, { "epoch": 117.6, "grad_norm": 0.21005713939666748, "learning_rate": 0.0002823779191167647, "loss": 3.526, "step": 735000 }, { "epoch": 117.616, "grad_norm": 0.2876966595649719, "learning_rate": 0.0002823755190207608, "loss": 3.6485, "step": 735100 }, { "epoch": 117.632, "grad_norm": 0.24351747334003448, "learning_rate": 0.00028237311892475697, "loss": 3.4889, "step": 735200 }, { "epoch": 117.648, "grad_norm": 0.2539328932762146, "learning_rate": 0.00028237071882875314, "loss": 3.4927, "step": 735300 }, { "epoch": 117.664, "grad_norm": 0.2501746714115143, "learning_rate": 0.0002823683187327493, "loss": 3.1698, "step": 735400 }, { "epoch": 117.68, "grad_norm": 0.25395020842552185, "learning_rate": 0.0002823659186367455, "loss": 3.5069, "step": 735500 }, { "epoch": 117.696, "grad_norm": 0.2517094314098358, "learning_rate": 0.00028236351854074164, "loss": 3.3696, "step": 735600 }, { "epoch": 117.712, "grad_norm": 0.23574011027812958, "learning_rate": 0.00028236111844473776, "loss": 3.2616, "step": 735700 }, { "epoch": 117.728, "grad_norm": 0.23911982774734497, "learning_rate": 0.0002823587183487339, "loss": 3.4578, "step": 735800 }, { "epoch": 117.744, "grad_norm": 0.2379283756017685, "learning_rate": 0.0002823563182527301, "loss": 3.4716, "step": 735900 }, { "epoch": 117.76, "grad_norm": 0.257030725479126, "learning_rate": 0.00028235391815672627, "loss": 3.1121, "step": 736000 }, { "epoch": 117.776, "grad_norm": 0.2791406810283661, "learning_rate": 0.00028235151806072243, "loss": 3.1435, "step": 736100 }, { "epoch": 117.792, "grad_norm": 0.25676003098487854, "learning_rate": 0.00028234911796471855, "loss": 3.2868, "step": 736200 }, { "epoch": 117.808, "grad_norm": 0.2552056312561035, "learning_rate": 0.0002823467178687147, "loss": 3.4974, "step": 736300 }, { "epoch": 117.824, "grad_norm": 0.23913656175136566, "learning_rate": 0.0002823443177727109, "loss": 3.4508, "step": 736400 }, { "epoch": 117.84, "grad_norm": 0.21784742176532745, "learning_rate": 0.00028234191767670706, "loss": 3.2322, "step": 736500 }, { "epoch": 117.856, "grad_norm": 0.2679052948951721, "learning_rate": 0.0002823395175807032, "loss": 3.3445, "step": 736600 }, { "epoch": 117.872, "grad_norm": 0.23290622234344482, "learning_rate": 0.0002823371174846994, "loss": 3.1631, "step": 736700 }, { "epoch": 117.888, "grad_norm": 0.25384727120399475, "learning_rate": 0.0002823347173886955, "loss": 3.1594, "step": 736800 }, { "epoch": 117.904, "grad_norm": 0.24228902161121368, "learning_rate": 0.0002823323172926917, "loss": 3.1512, "step": 736900 }, { "epoch": 117.92, "grad_norm": 0.28262650966644287, "learning_rate": 0.00028232991719668785, "loss": 3.6797, "step": 737000 }, { "epoch": 117.936, "grad_norm": 0.24762468039989471, "learning_rate": 0.000282327517100684, "loss": 3.4522, "step": 737100 }, { "epoch": 117.952, "grad_norm": 0.24954849481582642, "learning_rate": 0.0002823251170046802, "loss": 3.5205, "step": 737200 }, { "epoch": 117.968, "grad_norm": 0.27036574482917786, "learning_rate": 0.0002823227169086763, "loss": 3.2512, "step": 737300 }, { "epoch": 117.984, "grad_norm": 0.2614877223968506, "learning_rate": 0.00028232031681267247, "loss": 3.302, "step": 737400 }, { "epoch": 118.0, "grad_norm": 0.26119139790534973, "learning_rate": 0.00028231791671666864, "loss": 3.2537, "step": 737500 }, { "epoch": 118.016, "grad_norm": 0.255935937166214, "learning_rate": 0.0002823155166206648, "loss": 3.2045, "step": 737600 }, { "epoch": 118.032, "grad_norm": 0.2484011948108673, "learning_rate": 0.000282313116524661, "loss": 3.0202, "step": 737700 }, { "epoch": 118.048, "grad_norm": 0.2797905206680298, "learning_rate": 0.00028231071642865715, "loss": 3.5366, "step": 737800 }, { "epoch": 118.064, "grad_norm": 0.24033288657665253, "learning_rate": 0.00028230831633265326, "loss": 3.1492, "step": 737900 }, { "epoch": 118.08, "grad_norm": 0.253839910030365, "learning_rate": 0.00028230591623664943, "loss": 3.4179, "step": 738000 }, { "epoch": 118.096, "grad_norm": 0.24381184577941895, "learning_rate": 0.0002823035161406456, "loss": 3.3985, "step": 738100 }, { "epoch": 118.112, "grad_norm": 0.30366963148117065, "learning_rate": 0.00028230111604464177, "loss": 3.7434, "step": 738200 }, { "epoch": 118.128, "grad_norm": 0.28517311811447144, "learning_rate": 0.00028229871594863794, "loss": 3.3623, "step": 738300 }, { "epoch": 118.144, "grad_norm": 0.32882431149482727, "learning_rate": 0.00028229631585263405, "loss": 3.3109, "step": 738400 }, { "epoch": 118.16, "grad_norm": 0.26599353551864624, "learning_rate": 0.0002822939157566302, "loss": 3.6758, "step": 738500 }, { "epoch": 118.176, "grad_norm": 0.3503665328025818, "learning_rate": 0.0002822915156606264, "loss": 3.322, "step": 738600 }, { "epoch": 118.192, "grad_norm": 0.2833029329776764, "learning_rate": 0.00028228911556462256, "loss": 3.4711, "step": 738700 }, { "epoch": 118.208, "grad_norm": 0.28060340881347656, "learning_rate": 0.00028228671546861873, "loss": 3.6862, "step": 738800 }, { "epoch": 118.224, "grad_norm": 0.2638816833496094, "learning_rate": 0.0002822843153726149, "loss": 3.4206, "step": 738900 }, { "epoch": 118.24, "grad_norm": 0.31167083978652954, "learning_rate": 0.000282281915276611, "loss": 3.4921, "step": 739000 }, { "epoch": 118.256, "grad_norm": 0.24832859635353088, "learning_rate": 0.0002822795151806072, "loss": 3.1795, "step": 739100 }, { "epoch": 118.272, "grad_norm": 0.24197283387184143, "learning_rate": 0.00028227711508460335, "loss": 3.4962, "step": 739200 }, { "epoch": 118.288, "grad_norm": 0.23622553050518036, "learning_rate": 0.0002822747149885995, "loss": 3.5103, "step": 739300 }, { "epoch": 118.304, "grad_norm": 0.2807263433933258, "learning_rate": 0.0002822723148925957, "loss": 3.2143, "step": 739400 }, { "epoch": 118.32, "grad_norm": 0.2878613770008087, "learning_rate": 0.0002822699147965918, "loss": 3.264, "step": 739500 }, { "epoch": 118.336, "grad_norm": 0.24719741940498352, "learning_rate": 0.00028226753870154805, "loss": 3.4014, "step": 739600 }, { "epoch": 118.352, "grad_norm": 0.2720663547515869, "learning_rate": 0.0002822651386055442, "loss": 3.4612, "step": 739700 }, { "epoch": 118.368, "grad_norm": 0.2749626636505127, "learning_rate": 0.0002822627385095404, "loss": 3.3247, "step": 739800 }, { "epoch": 118.384, "grad_norm": 0.26567888259887695, "learning_rate": 0.0002822603384135365, "loss": 3.367, "step": 739900 }, { "epoch": 118.4, "grad_norm": 0.2686518132686615, "learning_rate": 0.00028225793831753267, "loss": 3.4567, "step": 740000 }, { "epoch": 118.416, "grad_norm": 0.2856196463108063, "learning_rate": 0.00028225553822152884, "loss": 3.7171, "step": 740100 }, { "epoch": 118.432, "grad_norm": 0.2966015338897705, "learning_rate": 0.000282253138125525, "loss": 3.4923, "step": 740200 }, { "epoch": 118.448, "grad_norm": 0.26969966292381287, "learning_rate": 0.0002822507620304812, "loss": 3.4438, "step": 740300 }, { "epoch": 118.464, "grad_norm": 0.2650420665740967, "learning_rate": 0.0002822483619344773, "loss": 3.6005, "step": 740400 }, { "epoch": 118.48, "grad_norm": 0.251692533493042, "learning_rate": 0.0002822459618384735, "loss": 3.6382, "step": 740500 }, { "epoch": 118.496, "grad_norm": 0.38041767477989197, "learning_rate": 0.00028224356174246965, "loss": 3.5126, "step": 740600 }, { "epoch": 118.512, "grad_norm": 0.27668753266334534, "learning_rate": 0.0002822411616464658, "loss": 3.7047, "step": 740700 }, { "epoch": 118.528, "grad_norm": 0.26538851857185364, "learning_rate": 0.000282238785551422, "loss": 3.5856, "step": 740800 }, { "epoch": 118.544, "grad_norm": 0.28001609444618225, "learning_rate": 0.0002822363854554182, "loss": 3.7823, "step": 740900 }, { "epoch": 118.56, "grad_norm": 0.23411844670772552, "learning_rate": 0.00028223398535941435, "loss": 3.2866, "step": 741000 }, { "epoch": 118.576, "grad_norm": 0.27543774247169495, "learning_rate": 0.0002822315852634105, "loss": 3.4352, "step": 741100 }, { "epoch": 118.592, "grad_norm": 0.2376939058303833, "learning_rate": 0.0002822291851674067, "loss": 3.2722, "step": 741200 }, { "epoch": 118.608, "grad_norm": 0.24522128701210022, "learning_rate": 0.0002822267850714028, "loss": 3.6276, "step": 741300 }, { "epoch": 118.624, "grad_norm": 0.2376002073287964, "learning_rate": 0.00028222438497539897, "loss": 3.2789, "step": 741400 }, { "epoch": 118.64, "grad_norm": 0.2330610603094101, "learning_rate": 0.00028222198487939514, "loss": 3.6224, "step": 741500 }, { "epoch": 118.656, "grad_norm": 0.2408720999956131, "learning_rate": 0.0002822195847833913, "loss": 3.391, "step": 741600 }, { "epoch": 118.672, "grad_norm": 0.3302859365940094, "learning_rate": 0.0002822171846873875, "loss": 3.2765, "step": 741700 }, { "epoch": 118.688, "grad_norm": 0.252069890499115, "learning_rate": 0.00028221478459138365, "loss": 3.5826, "step": 741800 }, { "epoch": 118.704, "grad_norm": 0.26680970191955566, "learning_rate": 0.00028221238449537976, "loss": 3.5626, "step": 741900 }, { "epoch": 118.72, "grad_norm": 0.2541826069355011, "learning_rate": 0.00028220998439937593, "loss": 3.6235, "step": 742000 }, { "epoch": 118.736, "grad_norm": 0.28214210271835327, "learning_rate": 0.0002822075843033721, "loss": 3.5777, "step": 742100 }, { "epoch": 118.752, "grad_norm": 0.25574231147766113, "learning_rate": 0.00028220518420736827, "loss": 3.4369, "step": 742200 }, { "epoch": 118.768, "grad_norm": 0.2353592962026596, "learning_rate": 0.00028220278411136444, "loss": 3.2378, "step": 742300 }, { "epoch": 118.784, "grad_norm": 0.2501591444015503, "learning_rate": 0.00028220038401536056, "loss": 3.3336, "step": 742400 }, { "epoch": 118.8, "grad_norm": 0.266666978597641, "learning_rate": 0.0002821979839193567, "loss": 3.0457, "step": 742500 }, { "epoch": 118.816, "grad_norm": 0.27611520886421204, "learning_rate": 0.0002821955838233529, "loss": 3.6803, "step": 742600 }, { "epoch": 118.832, "grad_norm": 0.2650684416294098, "learning_rate": 0.00028219318372734906, "loss": 3.4049, "step": 742700 }, { "epoch": 118.848, "grad_norm": 0.2229684442281723, "learning_rate": 0.00028219078363134523, "loss": 3.2082, "step": 742800 }, { "epoch": 118.864, "grad_norm": 0.29039567708969116, "learning_rate": 0.0002821883835353414, "loss": 3.3261, "step": 742900 }, { "epoch": 118.88, "grad_norm": 0.2985895872116089, "learning_rate": 0.0002821859834393375, "loss": 3.3806, "step": 743000 }, { "epoch": 118.896, "grad_norm": 0.23716650903224945, "learning_rate": 0.0002821835833433337, "loss": 3.4829, "step": 743100 }, { "epoch": 118.912, "grad_norm": 0.23522181808948517, "learning_rate": 0.00028218118324732985, "loss": 3.4022, "step": 743200 }, { "epoch": 118.928, "grad_norm": 0.23591063916683197, "learning_rate": 0.000282178783151326, "loss": 3.4927, "step": 743300 }, { "epoch": 118.944, "grad_norm": 0.2346610426902771, "learning_rate": 0.0002821763830553222, "loss": 3.363, "step": 743400 }, { "epoch": 118.96, "grad_norm": 0.29242438077926636, "learning_rate": 0.0002821739829593183, "loss": 3.5546, "step": 743500 }, { "epoch": 118.976, "grad_norm": 0.2665354609489441, "learning_rate": 0.0002821715828633145, "loss": 3.2239, "step": 743600 }, { "epoch": 118.992, "grad_norm": 0.22541740536689758, "learning_rate": 0.00028216918276731065, "loss": 3.7082, "step": 743700 }, { "epoch": 119.008, "grad_norm": 0.26477980613708496, "learning_rate": 0.0002821667826713068, "loss": 3.3894, "step": 743800 }, { "epoch": 119.024, "grad_norm": 0.23038630187511444, "learning_rate": 0.000282164382575303, "loss": 3.2046, "step": 743900 }, { "epoch": 119.04, "grad_norm": 0.27728456258773804, "learning_rate": 0.00028216198247929915, "loss": 3.1073, "step": 744000 }, { "epoch": 119.056, "grad_norm": 0.2587037682533264, "learning_rate": 0.00028215958238329527, "loss": 3.4417, "step": 744100 }, { "epoch": 119.072, "grad_norm": 0.2382882982492447, "learning_rate": 0.0002821571822872915, "loss": 3.3335, "step": 744200 }, { "epoch": 119.088, "grad_norm": 0.22428399324417114, "learning_rate": 0.00028215478219128766, "loss": 3.4219, "step": 744300 }, { "epoch": 119.104, "grad_norm": 0.2492159605026245, "learning_rate": 0.0002821523820952838, "loss": 3.3326, "step": 744400 }, { "epoch": 119.12, "grad_norm": 0.3141242563724518, "learning_rate": 0.00028214998199927995, "loss": 3.3838, "step": 744500 }, { "epoch": 119.136, "grad_norm": 0.23165807127952576, "learning_rate": 0.0002821475819032761, "loss": 3.717, "step": 744600 }, { "epoch": 119.152, "grad_norm": 0.2842344641685486, "learning_rate": 0.0002821451818072723, "loss": 3.3049, "step": 744700 }, { "epoch": 119.168, "grad_norm": 0.29067131876945496, "learning_rate": 0.00028214278171126845, "loss": 3.2429, "step": 744800 }, { "epoch": 119.184, "grad_norm": 0.26473063230514526, "learning_rate": 0.0002821403816152646, "loss": 3.4259, "step": 744900 }, { "epoch": 119.2, "grad_norm": 0.24900372326374054, "learning_rate": 0.00028213798151926074, "loss": 3.1297, "step": 745000 }, { "epoch": 119.216, "grad_norm": 0.2756974399089813, "learning_rate": 0.0002821355814232569, "loss": 3.3429, "step": 745100 }, { "epoch": 119.232, "grad_norm": 0.3019278645515442, "learning_rate": 0.0002821331813272531, "loss": 3.5281, "step": 745200 }, { "epoch": 119.248, "grad_norm": 0.24340340495109558, "learning_rate": 0.00028213078123124924, "loss": 3.5809, "step": 745300 }, { "epoch": 119.264, "grad_norm": 0.269959032535553, "learning_rate": 0.0002821283811352454, "loss": 3.3773, "step": 745400 }, { "epoch": 119.28, "grad_norm": 0.21360574662685394, "learning_rate": 0.00028212598103924153, "loss": 3.4996, "step": 745500 }, { "epoch": 119.296, "grad_norm": 0.2559244632720947, "learning_rate": 0.0002821235809432377, "loss": 3.2648, "step": 745600 }, { "epoch": 119.312, "grad_norm": 0.264619916677475, "learning_rate": 0.00028212118084723387, "loss": 3.2966, "step": 745700 }, { "epoch": 119.328, "grad_norm": 0.2614881098270416, "learning_rate": 0.00028211878075123004, "loss": 3.4238, "step": 745800 }, { "epoch": 119.344, "grad_norm": 0.252029150724411, "learning_rate": 0.0002821163806552262, "loss": 3.0982, "step": 745900 }, { "epoch": 119.36, "grad_norm": 0.25959324836730957, "learning_rate": 0.0002821139805592224, "loss": 3.3145, "step": 746000 }, { "epoch": 119.376, "grad_norm": 0.25771376490592957, "learning_rate": 0.0002821115804632185, "loss": 3.3419, "step": 746100 }, { "epoch": 119.392, "grad_norm": 0.28510209918022156, "learning_rate": 0.00028210918036721466, "loss": 3.1581, "step": 746200 }, { "epoch": 119.408, "grad_norm": 0.2628438174724579, "learning_rate": 0.00028210678027121083, "loss": 3.4198, "step": 746300 }, { "epoch": 119.424, "grad_norm": 0.2670115530490875, "learning_rate": 0.000282104380175207, "loss": 3.451, "step": 746400 }, { "epoch": 119.44, "grad_norm": 0.26854315400123596, "learning_rate": 0.00028210198007920317, "loss": 3.5786, "step": 746500 }, { "epoch": 119.456, "grad_norm": 0.29933038353919983, "learning_rate": 0.0002820995799831993, "loss": 3.5499, "step": 746600 }, { "epoch": 119.472, "grad_norm": 0.2632371187210083, "learning_rate": 0.00028209717988719545, "loss": 3.5414, "step": 746700 }, { "epoch": 119.488, "grad_norm": 0.2699783742427826, "learning_rate": 0.0002820947797911916, "loss": 3.7284, "step": 746800 }, { "epoch": 119.504, "grad_norm": 0.2658889889717102, "learning_rate": 0.0002820923796951878, "loss": 3.3509, "step": 746900 }, { "epoch": 119.52, "grad_norm": 0.2639762759208679, "learning_rate": 0.000282090003600144, "loss": 3.2956, "step": 747000 }, { "epoch": 119.536, "grad_norm": 0.2632792890071869, "learning_rate": 0.00028208760350414015, "loss": 3.6346, "step": 747100 }, { "epoch": 119.552, "grad_norm": 0.2773173749446869, "learning_rate": 0.00028208520340813626, "loss": 3.1875, "step": 747200 }, { "epoch": 119.568, "grad_norm": 0.25759485363960266, "learning_rate": 0.0002820828033121325, "loss": 3.5589, "step": 747300 }, { "epoch": 119.584, "grad_norm": 0.256000816822052, "learning_rate": 0.00028208040321612865, "loss": 3.319, "step": 747400 }, { "epoch": 119.6, "grad_norm": 0.20983095467090607, "learning_rate": 0.00028207800312012477, "loss": 3.4713, "step": 747500 }, { "epoch": 119.616, "grad_norm": 0.26973405480384827, "learning_rate": 0.00028207560302412094, "loss": 3.4465, "step": 747600 }, { "epoch": 119.632, "grad_norm": 0.28085216879844666, "learning_rate": 0.0002820732029281171, "loss": 3.2536, "step": 747700 }, { "epoch": 119.648, "grad_norm": 0.25588369369506836, "learning_rate": 0.0002820708028321133, "loss": 3.275, "step": 747800 }, { "epoch": 119.664, "grad_norm": 0.25276365876197815, "learning_rate": 0.00028206840273610945, "loss": 3.7306, "step": 747900 }, { "epoch": 119.68, "grad_norm": 0.28536149859428406, "learning_rate": 0.0002820660026401056, "loss": 3.4773, "step": 748000 }, { "epoch": 119.696, "grad_norm": 0.24941547214984894, "learning_rate": 0.00028206360254410173, "loss": 3.1819, "step": 748100 }, { "epoch": 119.712, "grad_norm": 0.25167927145957947, "learning_rate": 0.0002820612024480979, "loss": 3.4741, "step": 748200 }, { "epoch": 119.728, "grad_norm": 0.25772640109062195, "learning_rate": 0.00028205880235209407, "loss": 3.2323, "step": 748300 }, { "epoch": 119.744, "grad_norm": 0.298890620470047, "learning_rate": 0.00028205640225609024, "loss": 3.372, "step": 748400 }, { "epoch": 119.76, "grad_norm": 0.2618672549724579, "learning_rate": 0.0002820540021600864, "loss": 3.4069, "step": 748500 }, { "epoch": 119.776, "grad_norm": 0.2530100345611572, "learning_rate": 0.0002820516020640825, "loss": 3.3401, "step": 748600 }, { "epoch": 119.792, "grad_norm": 0.22730682790279388, "learning_rate": 0.0002820492019680787, "loss": 3.2444, "step": 748700 }, { "epoch": 119.808, "grad_norm": 0.24049325287342072, "learning_rate": 0.00028204680187207486, "loss": 3.284, "step": 748800 }, { "epoch": 119.824, "grad_norm": 0.27652987837791443, "learning_rate": 0.00028204440177607103, "loss": 3.303, "step": 748900 }, { "epoch": 119.84, "grad_norm": 0.2403784990310669, "learning_rate": 0.0002820420016800672, "loss": 3.3117, "step": 749000 }, { "epoch": 119.856, "grad_norm": 0.2426896095275879, "learning_rate": 0.00028203960158406337, "loss": 3.6528, "step": 749100 }, { "epoch": 119.872, "grad_norm": 0.28367921710014343, "learning_rate": 0.0002820372014880595, "loss": 3.0759, "step": 749200 }, { "epoch": 119.888, "grad_norm": 0.2799835503101349, "learning_rate": 0.00028203480139205565, "loss": 3.3882, "step": 749300 }, { "epoch": 119.904, "grad_norm": 0.22919310629367828, "learning_rate": 0.0002820324012960518, "loss": 3.3934, "step": 749400 }, { "epoch": 119.92, "grad_norm": 0.30829760432243347, "learning_rate": 0.000282030001200048, "loss": 3.3289, "step": 749500 }, { "epoch": 119.936, "grad_norm": 0.25858739018440247, "learning_rate": 0.00028202760110404416, "loss": 3.4546, "step": 749600 }, { "epoch": 119.952, "grad_norm": 0.26779863238334656, "learning_rate": 0.00028202520100804033, "loss": 3.5894, "step": 749700 }, { "epoch": 119.968, "grad_norm": 0.28546372056007385, "learning_rate": 0.00028202280091203644, "loss": 3.1893, "step": 749800 }, { "epoch": 119.984, "grad_norm": 0.22420597076416016, "learning_rate": 0.0002820204008160326, "loss": 3.446, "step": 749900 }, { "epoch": 120.0, "grad_norm": 0.25879529118537903, "learning_rate": 0.0002820180007200288, "loss": 3.4099, "step": 750000 }, { "epoch": 120.016, "grad_norm": 0.28953659534454346, "learning_rate": 0.00028201560062402495, "loss": 3.2384, "step": 750100 }, { "epoch": 120.032, "grad_norm": 0.2498900294303894, "learning_rate": 0.0002820132005280211, "loss": 3.2236, "step": 750200 }, { "epoch": 120.048, "grad_norm": 0.25570422410964966, "learning_rate": 0.00028201080043201724, "loss": 3.2084, "step": 750300 }, { "epoch": 120.064, "grad_norm": 0.2535562813282013, "learning_rate": 0.0002820084003360134, "loss": 3.3606, "step": 750400 }, { "epoch": 120.08, "grad_norm": 0.2253928780555725, "learning_rate": 0.0002820060002400096, "loss": 3.6836, "step": 750500 }, { "epoch": 120.096, "grad_norm": 0.28380632400512695, "learning_rate": 0.00028200362414496576, "loss": 3.2776, "step": 750600 }, { "epoch": 120.112, "grad_norm": 0.2885328531265259, "learning_rate": 0.00028200122404896193, "loss": 3.2536, "step": 750700 }, { "epoch": 120.128, "grad_norm": 0.2770821154117584, "learning_rate": 0.0002819988239529581, "loss": 3.1635, "step": 750800 }, { "epoch": 120.144, "grad_norm": 0.29701435565948486, "learning_rate": 0.00028199642385695427, "loss": 3.3696, "step": 750900 }, { "epoch": 120.16, "grad_norm": 0.2543824017047882, "learning_rate": 0.00028199402376095044, "loss": 3.1393, "step": 751000 }, { "epoch": 120.176, "grad_norm": 0.28196200728416443, "learning_rate": 0.0002819916236649466, "loss": 3.5499, "step": 751100 }, { "epoch": 120.192, "grad_norm": 0.29301127791404724, "learning_rate": 0.0002819892235689427, "loss": 3.3904, "step": 751200 }, { "epoch": 120.208, "grad_norm": 0.23633404076099396, "learning_rate": 0.0002819868234729389, "loss": 3.6016, "step": 751300 }, { "epoch": 120.224, "grad_norm": 0.2652928829193115, "learning_rate": 0.00028198442337693506, "loss": 3.5119, "step": 751400 }, { "epoch": 120.24, "grad_norm": 0.29348060488700867, "learning_rate": 0.00028198204728189125, "loss": 3.425, "step": 751500 }, { "epoch": 120.256, "grad_norm": 0.2750495672225952, "learning_rate": 0.0002819796471858874, "loss": 3.5851, "step": 751600 }, { "epoch": 120.272, "grad_norm": 0.23860104382038116, "learning_rate": 0.0002819772470898836, "loss": 3.4403, "step": 751700 }, { "epoch": 120.288, "grad_norm": 0.24085134267807007, "learning_rate": 0.0002819748469938797, "loss": 3.4172, "step": 751800 }, { "epoch": 120.304, "grad_norm": 0.26527267694473267, "learning_rate": 0.0002819724468978759, "loss": 3.2803, "step": 751900 }, { "epoch": 120.32, "grad_norm": 0.31052854657173157, "learning_rate": 0.00028197004680187204, "loss": 3.6092, "step": 752000 }, { "epoch": 120.336, "grad_norm": 0.2785443365573883, "learning_rate": 0.0002819676467058682, "loss": 3.3281, "step": 752100 }, { "epoch": 120.352, "grad_norm": 0.256177693605423, "learning_rate": 0.0002819652466098644, "loss": 3.4835, "step": 752200 }, { "epoch": 120.368, "grad_norm": 0.23874694108963013, "learning_rate": 0.0002819628465138605, "loss": 3.6781, "step": 752300 }, { "epoch": 120.384, "grad_norm": 0.25936493277549744, "learning_rate": 0.00028196044641785666, "loss": 3.3291, "step": 752400 }, { "epoch": 120.4, "grad_norm": 0.32394880056381226, "learning_rate": 0.00028195804632185283, "loss": 3.1141, "step": 752500 }, { "epoch": 120.416, "grad_norm": 0.27286025881767273, "learning_rate": 0.000281955646225849, "loss": 3.7341, "step": 752600 }, { "epoch": 120.432, "grad_norm": 0.2618236541748047, "learning_rate": 0.00028195324612984517, "loss": 3.2434, "step": 752700 }, { "epoch": 120.448, "grad_norm": 0.23572100698947906, "learning_rate": 0.00028195084603384134, "loss": 3.577, "step": 752800 }, { "epoch": 120.464, "grad_norm": 0.25645339488983154, "learning_rate": 0.00028194844593783746, "loss": 3.581, "step": 752900 }, { "epoch": 120.48, "grad_norm": 0.2770480811595917, "learning_rate": 0.0002819460458418336, "loss": 3.5041, "step": 753000 }, { "epoch": 120.496, "grad_norm": 0.2603878676891327, "learning_rate": 0.0002819436457458298, "loss": 3.886, "step": 753100 }, { "epoch": 120.512, "grad_norm": 0.2558247148990631, "learning_rate": 0.00028194124564982596, "loss": 3.6156, "step": 753200 }, { "epoch": 120.528, "grad_norm": 0.26111143827438354, "learning_rate": 0.00028193884555382213, "loss": 3.4748, "step": 753300 }, { "epoch": 120.544, "grad_norm": 0.29171621799468994, "learning_rate": 0.00028193644545781825, "loss": 3.4914, "step": 753400 }, { "epoch": 120.56, "grad_norm": 0.2736656665802002, "learning_rate": 0.0002819340453618144, "loss": 3.442, "step": 753500 }, { "epoch": 120.576, "grad_norm": 0.24989183247089386, "learning_rate": 0.00028193164526581064, "loss": 3.4203, "step": 753600 }, { "epoch": 120.592, "grad_norm": 0.25981807708740234, "learning_rate": 0.0002819292451698068, "loss": 3.1638, "step": 753700 }, { "epoch": 120.608, "grad_norm": 0.23469041287899017, "learning_rate": 0.0002819268450738029, "loss": 3.5097, "step": 753800 }, { "epoch": 120.624, "grad_norm": 0.3102607727050781, "learning_rate": 0.0002819244449777991, "loss": 3.2953, "step": 753900 }, { "epoch": 120.64, "grad_norm": 0.28260117769241333, "learning_rate": 0.00028192204488179526, "loss": 3.4992, "step": 754000 }, { "epoch": 120.656, "grad_norm": 0.27313467860221863, "learning_rate": 0.00028191964478579143, "loss": 3.6401, "step": 754100 }, { "epoch": 120.672, "grad_norm": 0.280712753534317, "learning_rate": 0.0002819172446897876, "loss": 3.317, "step": 754200 }, { "epoch": 120.688, "grad_norm": 0.25753772258758545, "learning_rate": 0.0002819148445937837, "loss": 3.5482, "step": 754300 }, { "epoch": 120.704, "grad_norm": 0.24893611669540405, "learning_rate": 0.0002819124444977799, "loss": 3.1934, "step": 754400 }, { "epoch": 120.72, "grad_norm": 0.2787137031555176, "learning_rate": 0.00028191004440177605, "loss": 3.2989, "step": 754500 }, { "epoch": 120.736, "grad_norm": 0.30311325192451477, "learning_rate": 0.0002819076443057722, "loss": 3.4921, "step": 754600 }, { "epoch": 120.752, "grad_norm": 0.2613597512245178, "learning_rate": 0.0002819052442097684, "loss": 3.5134, "step": 754700 }, { "epoch": 120.768, "grad_norm": 0.22419056296348572, "learning_rate": 0.00028190284411376456, "loss": 3.7867, "step": 754800 }, { "epoch": 120.784, "grad_norm": 0.2788069546222687, "learning_rate": 0.0002819004440177607, "loss": 3.7335, "step": 754900 }, { "epoch": 120.8, "grad_norm": 0.26235851645469666, "learning_rate": 0.00028189804392175685, "loss": 3.3866, "step": 755000 }, { "epoch": 120.816, "grad_norm": 0.2455996572971344, "learning_rate": 0.000281895643825753, "loss": 3.4888, "step": 755100 }, { "epoch": 120.832, "grad_norm": 0.2810782790184021, "learning_rate": 0.0002818932437297492, "loss": 3.4364, "step": 755200 }, { "epoch": 120.848, "grad_norm": 0.316495418548584, "learning_rate": 0.00028189084363374535, "loss": 3.3218, "step": 755300 }, { "epoch": 120.864, "grad_norm": 0.264822781085968, "learning_rate": 0.00028188844353774147, "loss": 3.4227, "step": 755400 }, { "epoch": 120.88, "grad_norm": 0.2761300206184387, "learning_rate": 0.00028188604344173764, "loss": 3.3314, "step": 755500 }, { "epoch": 120.896, "grad_norm": 0.25833436846733093, "learning_rate": 0.0002818836433457338, "loss": 3.6304, "step": 755600 }, { "epoch": 120.912, "grad_norm": 0.27688682079315186, "learning_rate": 0.00028188126725069, "loss": 3.7497, "step": 755700 }, { "epoch": 120.928, "grad_norm": 0.22535769641399384, "learning_rate": 0.00028187886715468617, "loss": 3.3326, "step": 755800 }, { "epoch": 120.944, "grad_norm": 0.3085270822048187, "learning_rate": 0.00028187646705868233, "loss": 3.1996, "step": 755900 }, { "epoch": 120.96, "grad_norm": 0.2545301616191864, "learning_rate": 0.00028187406696267845, "loss": 3.6929, "step": 756000 }, { "epoch": 120.976, "grad_norm": 0.26244398951530457, "learning_rate": 0.0002818716668666746, "loss": 3.2402, "step": 756100 }, { "epoch": 120.992, "grad_norm": 0.24823062121868134, "learning_rate": 0.0002818692667706708, "loss": 3.6423, "step": 756200 }, { "epoch": 121.008, "grad_norm": 0.2501659393310547, "learning_rate": 0.00028186686667466696, "loss": 3.3226, "step": 756300 }, { "epoch": 121.024, "grad_norm": 0.2327735275030136, "learning_rate": 0.0002818644665786631, "loss": 3.176, "step": 756400 }, { "epoch": 121.04, "grad_norm": 0.29024195671081543, "learning_rate": 0.00028186206648265924, "loss": 3.5157, "step": 756500 }, { "epoch": 121.056, "grad_norm": 0.2600007951259613, "learning_rate": 0.0002818596663866554, "loss": 3.4905, "step": 756600 }, { "epoch": 121.072, "grad_norm": 0.3109084963798523, "learning_rate": 0.00028185726629065163, "loss": 3.2462, "step": 756700 }, { "epoch": 121.088, "grad_norm": 0.26026275753974915, "learning_rate": 0.0002818548661946478, "loss": 3.3357, "step": 756800 }, { "epoch": 121.104, "grad_norm": 0.26330530643463135, "learning_rate": 0.0002818524660986439, "loss": 3.2997, "step": 756900 }, { "epoch": 121.12, "grad_norm": 0.2568432688713074, "learning_rate": 0.0002818500660026401, "loss": 3.3411, "step": 757000 }, { "epoch": 121.136, "grad_norm": 0.2717728912830353, "learning_rate": 0.00028184766590663626, "loss": 3.6193, "step": 757100 }, { "epoch": 121.152, "grad_norm": 0.23821836709976196, "learning_rate": 0.0002818452658106324, "loss": 3.3132, "step": 757200 }, { "epoch": 121.168, "grad_norm": 0.2664107084274292, "learning_rate": 0.0002818428897155886, "loss": 3.2192, "step": 757300 }, { "epoch": 121.184, "grad_norm": 0.2446657121181488, "learning_rate": 0.00028184048961958473, "loss": 3.5727, "step": 757400 }, { "epoch": 121.2, "grad_norm": 0.22309260070323944, "learning_rate": 0.0002818380895235809, "loss": 3.2573, "step": 757500 }, { "epoch": 121.216, "grad_norm": 0.2778756022453308, "learning_rate": 0.00028183568942757707, "loss": 3.5332, "step": 757600 }, { "epoch": 121.232, "grad_norm": 0.30613040924072266, "learning_rate": 0.00028183328933157324, "loss": 3.3718, "step": 757700 }, { "epoch": 121.248, "grad_norm": 0.2708306610584259, "learning_rate": 0.0002818308892355694, "loss": 3.4161, "step": 757800 }, { "epoch": 121.264, "grad_norm": 0.32740458846092224, "learning_rate": 0.0002818284891395656, "loss": 3.6959, "step": 757900 }, { "epoch": 121.28, "grad_norm": 0.2607426047325134, "learning_rate": 0.0002818260890435617, "loss": 3.0969, "step": 758000 }, { "epoch": 121.296, "grad_norm": 0.2641667425632477, "learning_rate": 0.00028182368894755786, "loss": 3.2519, "step": 758100 }, { "epoch": 121.312, "grad_norm": 0.23802638053894043, "learning_rate": 0.00028182128885155403, "loss": 3.4269, "step": 758200 }, { "epoch": 121.328, "grad_norm": 0.2765100598335266, "learning_rate": 0.0002818188887555502, "loss": 3.2607, "step": 758300 }, { "epoch": 121.344, "grad_norm": 0.2715032696723938, "learning_rate": 0.00028181648865954637, "loss": 3.8366, "step": 758400 }, { "epoch": 121.36, "grad_norm": 0.33775752782821655, "learning_rate": 0.0002818140885635425, "loss": 3.2088, "step": 758500 }, { "epoch": 121.376, "grad_norm": 0.27249830961227417, "learning_rate": 0.00028181168846753865, "loss": 3.5123, "step": 758600 }, { "epoch": 121.392, "grad_norm": 0.27828335762023926, "learning_rate": 0.0002818092883715348, "loss": 3.5487, "step": 758700 }, { "epoch": 121.408, "grad_norm": 0.2985057830810547, "learning_rate": 0.000281806888275531, "loss": 3.3964, "step": 758800 }, { "epoch": 121.424, "grad_norm": 0.26885563135147095, "learning_rate": 0.00028180448817952716, "loss": 3.5362, "step": 758900 }, { "epoch": 121.44, "grad_norm": 0.28213122487068176, "learning_rate": 0.00028180208808352333, "loss": 3.4154, "step": 759000 }, { "epoch": 121.456, "grad_norm": 0.31650233268737793, "learning_rate": 0.00028179968798751944, "loss": 3.6662, "step": 759100 }, { "epoch": 121.472, "grad_norm": 0.25813278555870056, "learning_rate": 0.0002817972878915156, "loss": 3.5952, "step": 759200 }, { "epoch": 121.488, "grad_norm": 0.2831670939922333, "learning_rate": 0.0002817948877955118, "loss": 3.424, "step": 759300 }, { "epoch": 121.504, "grad_norm": 0.25074365735054016, "learning_rate": 0.00028179248769950795, "loss": 3.3758, "step": 759400 }, { "epoch": 121.52, "grad_norm": 0.2600378394126892, "learning_rate": 0.0002817900876035041, "loss": 3.5348, "step": 759500 }, { "epoch": 121.536, "grad_norm": 0.27290281653404236, "learning_rate": 0.00028178768750750023, "loss": 3.4115, "step": 759600 }, { "epoch": 121.552, "grad_norm": 0.22378423810005188, "learning_rate": 0.0002817852874114964, "loss": 3.4285, "step": 759700 }, { "epoch": 121.568, "grad_norm": 0.2699776291847229, "learning_rate": 0.0002817828873154926, "loss": 3.4861, "step": 759800 }, { "epoch": 121.584, "grad_norm": 0.2538849413394928, "learning_rate": 0.0002817804872194888, "loss": 3.7224, "step": 759900 }, { "epoch": 121.6, "grad_norm": 0.33129388093948364, "learning_rate": 0.0002817780871234849, "loss": 3.2785, "step": 760000 }, { "epoch": 121.616, "grad_norm": 0.2427951693534851, "learning_rate": 0.0002817756870274811, "loss": 3.3292, "step": 760100 }, { "epoch": 121.632, "grad_norm": 0.24632561206817627, "learning_rate": 0.00028177328693147725, "loss": 3.6275, "step": 760200 }, { "epoch": 121.648, "grad_norm": 0.27734375, "learning_rate": 0.0002817708868354734, "loss": 3.309, "step": 760300 }, { "epoch": 121.664, "grad_norm": 0.29872772097587585, "learning_rate": 0.0002817684867394696, "loss": 3.2816, "step": 760400 }, { "epoch": 121.68, "grad_norm": 0.28716352581977844, "learning_rate": 0.0002817660866434657, "loss": 3.6274, "step": 760500 }, { "epoch": 121.696, "grad_norm": 0.29568153619766235, "learning_rate": 0.00028176368654746187, "loss": 3.3847, "step": 760600 }, { "epoch": 121.712, "grad_norm": 0.27593010663986206, "learning_rate": 0.00028176128645145804, "loss": 3.292, "step": 760700 }, { "epoch": 121.728, "grad_norm": 0.28390875458717346, "learning_rate": 0.0002817588863554542, "loss": 3.14, "step": 760800 }, { "epoch": 121.744, "grad_norm": 0.24730467796325684, "learning_rate": 0.0002817564862594504, "loss": 3.4452, "step": 760900 }, { "epoch": 121.76, "grad_norm": 0.25788500905036926, "learning_rate": 0.00028175408616344655, "loss": 3.66, "step": 761000 }, { "epoch": 121.776, "grad_norm": 0.30829212069511414, "learning_rate": 0.00028175168606744266, "loss": 3.6292, "step": 761100 }, { "epoch": 121.792, "grad_norm": 0.23908953368663788, "learning_rate": 0.00028174928597143883, "loss": 3.5657, "step": 761200 }, { "epoch": 121.808, "grad_norm": 0.26424601674079895, "learning_rate": 0.000281746885875435, "loss": 3.1487, "step": 761300 }, { "epoch": 121.824, "grad_norm": 0.23708350956439972, "learning_rate": 0.00028174448577943117, "loss": 3.3829, "step": 761400 }, { "epoch": 121.84, "grad_norm": 0.2971402406692505, "learning_rate": 0.00028174208568342734, "loss": 3.2436, "step": 761500 }, { "epoch": 121.856, "grad_norm": 0.21239988505840302, "learning_rate": 0.00028173968558742346, "loss": 3.1348, "step": 761600 }, { "epoch": 121.872, "grad_norm": 0.2699299454689026, "learning_rate": 0.0002817372854914196, "loss": 3.1902, "step": 761700 }, { "epoch": 121.888, "grad_norm": 0.2818828821182251, "learning_rate": 0.0002817348853954158, "loss": 3.6704, "step": 761800 }, { "epoch": 121.904, "grad_norm": 0.2506486475467682, "learning_rate": 0.00028173248529941196, "loss": 3.3838, "step": 761900 }, { "epoch": 121.92, "grad_norm": 0.245168074965477, "learning_rate": 0.00028173008520340813, "loss": 3.5395, "step": 762000 }, { "epoch": 121.936, "grad_norm": 0.24367959797382355, "learning_rate": 0.0002817276851074043, "loss": 3.3261, "step": 762100 }, { "epoch": 121.952, "grad_norm": 0.392422616481781, "learning_rate": 0.0002817252850114004, "loss": 3.193, "step": 762200 }, { "epoch": 121.968, "grad_norm": 0.2791200578212738, "learning_rate": 0.0002817228849153966, "loss": 3.5643, "step": 762300 }, { "epoch": 121.984, "grad_norm": 0.27794408798217773, "learning_rate": 0.00028172048481939275, "loss": 3.5193, "step": 762400 }, { "epoch": 122.0, "grad_norm": 0.2746586501598358, "learning_rate": 0.0002817180847233889, "loss": 3.7079, "step": 762500 }, { "epoch": 122.016, "grad_norm": 0.2610184848308563, "learning_rate": 0.0002817156846273851, "loss": 3.0311, "step": 762600 }, { "epoch": 122.032, "grad_norm": 0.2574387788772583, "learning_rate": 0.0002817132845313812, "loss": 3.3591, "step": 762700 }, { "epoch": 122.048, "grad_norm": 0.2639012634754181, "learning_rate": 0.0002817109084363374, "loss": 3.3056, "step": 762800 }, { "epoch": 122.064, "grad_norm": 0.25745663046836853, "learning_rate": 0.0002817085083403336, "loss": 3.4548, "step": 762900 }, { "epoch": 122.08, "grad_norm": 0.27057889103889465, "learning_rate": 0.0002817061082443298, "loss": 3.4824, "step": 763000 }, { "epoch": 122.096, "grad_norm": 0.2615489363670349, "learning_rate": 0.0002817037081483259, "loss": 3.4359, "step": 763100 }, { "epoch": 122.112, "grad_norm": 0.24910613894462585, "learning_rate": 0.0002817013080523221, "loss": 3.4737, "step": 763200 }, { "epoch": 122.128, "grad_norm": 0.30390194058418274, "learning_rate": 0.00028169890795631824, "loss": 3.4264, "step": 763300 }, { "epoch": 122.144, "grad_norm": 0.2769772708415985, "learning_rate": 0.0002816965078603144, "loss": 3.7253, "step": 763400 }, { "epoch": 122.16, "grad_norm": 0.25888702273368835, "learning_rate": 0.0002816941077643106, "loss": 3.274, "step": 763500 }, { "epoch": 122.176, "grad_norm": 0.290860116481781, "learning_rate": 0.0002816917076683067, "loss": 3.422, "step": 763600 }, { "epoch": 122.192, "grad_norm": 0.24823133647441864, "learning_rate": 0.00028168930757230287, "loss": 3.105, "step": 763700 }, { "epoch": 122.208, "grad_norm": 0.27584943175315857, "learning_rate": 0.00028168690747629903, "loss": 3.2857, "step": 763800 }, { "epoch": 122.224, "grad_norm": 0.268064022064209, "learning_rate": 0.0002816845073802952, "loss": 3.2666, "step": 763900 }, { "epoch": 122.24, "grad_norm": 0.25836244225502014, "learning_rate": 0.00028168210728429137, "loss": 3.1656, "step": 764000 }, { "epoch": 122.256, "grad_norm": 0.3051530420780182, "learning_rate": 0.00028167970718828754, "loss": 3.3409, "step": 764100 }, { "epoch": 122.272, "grad_norm": 0.24488084018230438, "learning_rate": 0.00028167730709228366, "loss": 3.3313, "step": 764200 }, { "epoch": 122.288, "grad_norm": 0.2511812746524811, "learning_rate": 0.0002816749069962798, "loss": 3.8069, "step": 764300 }, { "epoch": 122.304, "grad_norm": 0.27649766206741333, "learning_rate": 0.000281672506900276, "loss": 3.6488, "step": 764400 }, { "epoch": 122.32, "grad_norm": 0.27237293124198914, "learning_rate": 0.00028167010680427216, "loss": 3.5933, "step": 764500 }, { "epoch": 122.336, "grad_norm": 0.30965450406074524, "learning_rate": 0.00028166770670826833, "loss": 3.3755, "step": 764600 }, { "epoch": 122.352, "grad_norm": 0.26548096537590027, "learning_rate": 0.00028166530661226445, "loss": 3.5039, "step": 764700 }, { "epoch": 122.368, "grad_norm": 0.2893912196159363, "learning_rate": 0.00028166293051722064, "loss": 3.3393, "step": 764800 }, { "epoch": 122.384, "grad_norm": 0.22935006022453308, "learning_rate": 0.0002816605304212168, "loss": 3.3116, "step": 764900 }, { "epoch": 122.4, "grad_norm": 0.2508664131164551, "learning_rate": 0.000281658130325213, "loss": 3.6904, "step": 765000 }, { "epoch": 122.416, "grad_norm": 0.24797360599040985, "learning_rate": 0.00028165573022920914, "loss": 3.8244, "step": 765100 }, { "epoch": 122.432, "grad_norm": 0.25236016511917114, "learning_rate": 0.0002816533301332053, "loss": 3.3165, "step": 765200 }, { "epoch": 122.448, "grad_norm": 0.2617177367210388, "learning_rate": 0.00028165093003720143, "loss": 3.4754, "step": 765300 }, { "epoch": 122.464, "grad_norm": 0.22827617824077606, "learning_rate": 0.0002816485299411976, "loss": 3.3311, "step": 765400 }, { "epoch": 122.48, "grad_norm": 0.2559257447719574, "learning_rate": 0.00028164612984519377, "loss": 3.189, "step": 765500 }, { "epoch": 122.496, "grad_norm": 0.26298344135284424, "learning_rate": 0.00028164372974918994, "loss": 3.6269, "step": 765600 }, { "epoch": 122.512, "grad_norm": 0.2788505256175995, "learning_rate": 0.0002816413296531861, "loss": 3.3474, "step": 765700 }, { "epoch": 122.528, "grad_norm": 0.35366901755332947, "learning_rate": 0.0002816389295571822, "loss": 3.5063, "step": 765800 }, { "epoch": 122.544, "grad_norm": 0.23109523952007294, "learning_rate": 0.0002816365294611784, "loss": 3.625, "step": 765900 }, { "epoch": 122.56, "grad_norm": 0.24797919392585754, "learning_rate": 0.0002816341293651746, "loss": 3.3641, "step": 766000 }, { "epoch": 122.576, "grad_norm": 0.2686024010181427, "learning_rate": 0.0002816317292691708, "loss": 3.328, "step": 766100 }, { "epoch": 122.592, "grad_norm": 0.25985196232795715, "learning_rate": 0.0002816293291731669, "loss": 3.7878, "step": 766200 }, { "epoch": 122.608, "grad_norm": 0.2977939248085022, "learning_rate": 0.00028162692907716307, "loss": 3.5624, "step": 766300 }, { "epoch": 122.624, "grad_norm": 0.2663322389125824, "learning_rate": 0.00028162452898115924, "loss": 3.2452, "step": 766400 }, { "epoch": 122.64, "grad_norm": 0.2822306454181671, "learning_rate": 0.0002816221288851554, "loss": 3.2446, "step": 766500 }, { "epoch": 122.656, "grad_norm": 0.2511630356311798, "learning_rate": 0.0002816197287891516, "loss": 3.34, "step": 766600 }, { "epoch": 122.672, "grad_norm": 0.22130082547664642, "learning_rate": 0.0002816173286931477, "loss": 3.8724, "step": 766700 }, { "epoch": 122.688, "grad_norm": 0.2494204044342041, "learning_rate": 0.00028161492859714386, "loss": 3.3742, "step": 766800 }, { "epoch": 122.704, "grad_norm": 0.2444593906402588, "learning_rate": 0.00028161252850114003, "loss": 3.6359, "step": 766900 }, { "epoch": 122.72, "grad_norm": 0.25890520215034485, "learning_rate": 0.0002816101284051362, "loss": 3.3588, "step": 767000 }, { "epoch": 122.736, "grad_norm": 0.2567102611064911, "learning_rate": 0.00028160772830913237, "loss": 3.3272, "step": 767100 }, { "epoch": 122.752, "grad_norm": 0.2627166211605072, "learning_rate": 0.00028160532821312854, "loss": 3.5971, "step": 767200 }, { "epoch": 122.768, "grad_norm": 0.271358847618103, "learning_rate": 0.00028160292811712465, "loss": 3.5297, "step": 767300 }, { "epoch": 122.784, "grad_norm": 0.2598031163215637, "learning_rate": 0.0002816005280211208, "loss": 3.4707, "step": 767400 }, { "epoch": 122.8, "grad_norm": 0.2568700909614563, "learning_rate": 0.000281598127925117, "loss": 3.6482, "step": 767500 }, { "epoch": 122.816, "grad_norm": 0.24250730872154236, "learning_rate": 0.00028159572782911316, "loss": 3.1175, "step": 767600 }, { "epoch": 122.832, "grad_norm": 0.29085299372673035, "learning_rate": 0.0002815933277331093, "loss": 3.5094, "step": 767700 }, { "epoch": 122.848, "grad_norm": 0.29515552520751953, "learning_rate": 0.00028159092763710544, "loss": 3.2313, "step": 767800 }, { "epoch": 122.864, "grad_norm": 0.2616789638996124, "learning_rate": 0.0002815885275411016, "loss": 3.5416, "step": 767900 }, { "epoch": 122.88, "grad_norm": 0.29516923427581787, "learning_rate": 0.0002815861274450978, "loss": 3.5147, "step": 768000 }, { "epoch": 122.896, "grad_norm": 0.3168767988681793, "learning_rate": 0.00028158375135005397, "loss": 3.2977, "step": 768100 }, { "epoch": 122.912, "grad_norm": 0.23286525905132294, "learning_rate": 0.00028158135125405014, "loss": 3.3945, "step": 768200 }, { "epoch": 122.928, "grad_norm": 0.25413191318511963, "learning_rate": 0.0002815789511580463, "loss": 3.4042, "step": 768300 }, { "epoch": 122.944, "grad_norm": 0.2650444507598877, "learning_rate": 0.0002815765510620424, "loss": 3.5196, "step": 768400 }, { "epoch": 122.96, "grad_norm": 0.2768937349319458, "learning_rate": 0.0002815741509660386, "loss": 3.4054, "step": 768500 }, { "epoch": 122.976, "grad_norm": 0.28559446334838867, "learning_rate": 0.00028157175087003476, "loss": 3.4076, "step": 768600 }, { "epoch": 122.992, "grad_norm": 0.2623310387134552, "learning_rate": 0.00028156935077403093, "loss": 3.1068, "step": 768700 }, { "epoch": 123.008, "grad_norm": 0.27085864543914795, "learning_rate": 0.0002815669506780271, "loss": 3.3878, "step": 768800 }, { "epoch": 123.024, "grad_norm": 0.2909793555736542, "learning_rate": 0.0002815645505820232, "loss": 3.2362, "step": 768900 }, { "epoch": 123.04, "grad_norm": 0.250975638628006, "learning_rate": 0.0002815621504860194, "loss": 3.305, "step": 769000 }, { "epoch": 123.056, "grad_norm": 0.25437435507774353, "learning_rate": 0.00028155975039001555, "loss": 3.2637, "step": 769100 }, { "epoch": 123.072, "grad_norm": 0.27841538190841675, "learning_rate": 0.0002815573502940118, "loss": 3.3167, "step": 769200 }, { "epoch": 123.088, "grad_norm": 0.28622928261756897, "learning_rate": 0.0002815549501980079, "loss": 3.5501, "step": 769300 }, { "epoch": 123.104, "grad_norm": 0.2793700397014618, "learning_rate": 0.00028155255010200406, "loss": 3.5808, "step": 769400 }, { "epoch": 123.12, "grad_norm": 0.2522110641002655, "learning_rate": 0.00028155015000600023, "loss": 3.4806, "step": 769500 }, { "epoch": 123.136, "grad_norm": 0.26084649562835693, "learning_rate": 0.0002815477499099964, "loss": 3.6484, "step": 769600 }, { "epoch": 123.152, "grad_norm": 0.2823123037815094, "learning_rate": 0.00028154534981399257, "loss": 3.4653, "step": 769700 }, { "epoch": 123.168, "grad_norm": 0.277635782957077, "learning_rate": 0.0002815429497179887, "loss": 3.2543, "step": 769800 }, { "epoch": 123.184, "grad_norm": 0.3160555362701416, "learning_rate": 0.00028154054962198485, "loss": 3.3046, "step": 769900 }, { "epoch": 123.2, "grad_norm": 0.2680870294570923, "learning_rate": 0.000281538149525981, "loss": 3.4755, "step": 770000 }, { "epoch": 123.216, "grad_norm": 0.3102087676525116, "learning_rate": 0.0002815357494299772, "loss": 3.6812, "step": 770100 }, { "epoch": 123.232, "grad_norm": 0.2738526463508606, "learning_rate": 0.00028153334933397336, "loss": 3.6624, "step": 770200 }, { "epoch": 123.248, "grad_norm": 0.26290345191955566, "learning_rate": 0.00028153094923796953, "loss": 3.2122, "step": 770300 }, { "epoch": 123.264, "grad_norm": 0.24242518842220306, "learning_rate": 0.00028152854914196564, "loss": 3.5234, "step": 770400 }, { "epoch": 123.28, "grad_norm": 0.30065783858299255, "learning_rate": 0.0002815261490459618, "loss": 3.2437, "step": 770500 }, { "epoch": 123.296, "grad_norm": 0.2832750380039215, "learning_rate": 0.000281523748949958, "loss": 3.6315, "step": 770600 }, { "epoch": 123.312, "grad_norm": 0.2795088291168213, "learning_rate": 0.00028152134885395415, "loss": 3.315, "step": 770700 }, { "epoch": 123.328, "grad_norm": 0.2919885218143463, "learning_rate": 0.0002815189487579503, "loss": 3.5096, "step": 770800 }, { "epoch": 123.344, "grad_norm": 0.2640100121498108, "learning_rate": 0.00028151654866194643, "loss": 3.5074, "step": 770900 }, { "epoch": 123.36, "grad_norm": 0.29973137378692627, "learning_rate": 0.0002815141485659426, "loss": 3.3403, "step": 771000 }, { "epoch": 123.376, "grad_norm": 0.25364354252815247, "learning_rate": 0.0002815117484699388, "loss": 3.571, "step": 771100 }, { "epoch": 123.392, "grad_norm": 0.2540159225463867, "learning_rate": 0.00028150934837393494, "loss": 3.3236, "step": 771200 }, { "epoch": 123.408, "grad_norm": 0.26872316002845764, "learning_rate": 0.0002815069482779311, "loss": 3.3896, "step": 771300 }, { "epoch": 123.424, "grad_norm": 0.25999897718429565, "learning_rate": 0.0002815045481819273, "loss": 3.5558, "step": 771400 }, { "epoch": 123.44, "grad_norm": 0.28399667143821716, "learning_rate": 0.0002815021480859234, "loss": 3.6375, "step": 771500 }, { "epoch": 123.456, "grad_norm": 0.2979966700077057, "learning_rate": 0.00028149974798991956, "loss": 3.4813, "step": 771600 }, { "epoch": 123.472, "grad_norm": 0.2855674624443054, "learning_rate": 0.00028149734789391573, "loss": 3.3908, "step": 771700 }, { "epoch": 123.488, "grad_norm": 0.28086015582084656, "learning_rate": 0.0002814949477979119, "loss": 3.6177, "step": 771800 }, { "epoch": 123.504, "grad_norm": 0.3332791030406952, "learning_rate": 0.00028149254770190807, "loss": 3.3575, "step": 771900 }, { "epoch": 123.52, "grad_norm": 0.2809508144855499, "learning_rate": 0.00028149014760590424, "loss": 3.4691, "step": 772000 }, { "epoch": 123.536, "grad_norm": 0.2821815311908722, "learning_rate": 0.00028148774750990036, "loss": 3.4426, "step": 772100 }, { "epoch": 123.552, "grad_norm": 0.2734723687171936, "learning_rate": 0.0002814853474138965, "loss": 3.3379, "step": 772200 }, { "epoch": 123.568, "grad_norm": 0.26493144035339355, "learning_rate": 0.0002814829473178927, "loss": 3.3769, "step": 772300 }, { "epoch": 123.584, "grad_norm": 0.27805858850479126, "learning_rate": 0.00028148054722188886, "loss": 3.438, "step": 772400 }, { "epoch": 123.6, "grad_norm": 0.271894246339798, "learning_rate": 0.00028147814712588503, "loss": 3.193, "step": 772500 }, { "epoch": 123.616, "grad_norm": 0.29257968068122864, "learning_rate": 0.00028147574702988115, "loss": 3.4556, "step": 772600 }, { "epoch": 123.632, "grad_norm": 0.281989723443985, "learning_rate": 0.0002814733469338773, "loss": 3.3779, "step": 772700 }, { "epoch": 123.648, "grad_norm": 0.26742154359817505, "learning_rate": 0.0002814709468378735, "loss": 3.4265, "step": 772800 }, { "epoch": 123.664, "grad_norm": 0.2571890652179718, "learning_rate": 0.00028146854674186966, "loss": 3.5132, "step": 772900 }, { "epoch": 123.68, "grad_norm": 0.2494388222694397, "learning_rate": 0.0002814661466458658, "loss": 3.6602, "step": 773000 }, { "epoch": 123.696, "grad_norm": 0.28340256214141846, "learning_rate": 0.000281463770550822, "loss": 3.3025, "step": 773100 }, { "epoch": 123.712, "grad_norm": 0.2645024359226227, "learning_rate": 0.0002814613704548182, "loss": 3.3811, "step": 773200 }, { "epoch": 123.728, "grad_norm": 0.26017069816589355, "learning_rate": 0.00028145897035881435, "loss": 3.3926, "step": 773300 }, { "epoch": 123.744, "grad_norm": 0.318469375371933, "learning_rate": 0.0002814565702628105, "loss": 3.6248, "step": 773400 }, { "epoch": 123.76, "grad_norm": 0.24168674647808075, "learning_rate": 0.00028145417016680664, "loss": 3.3672, "step": 773500 }, { "epoch": 123.776, "grad_norm": 0.2264481633901596, "learning_rate": 0.0002814517700708028, "loss": 3.3874, "step": 773600 }, { "epoch": 123.792, "grad_norm": 0.2661517560482025, "learning_rate": 0.000281449369974799, "loss": 3.2788, "step": 773700 }, { "epoch": 123.808, "grad_norm": 0.2741529047489166, "learning_rate": 0.00028144696987879514, "loss": 3.5332, "step": 773800 }, { "epoch": 123.824, "grad_norm": 0.2765633165836334, "learning_rate": 0.0002814445697827913, "loss": 3.3466, "step": 773900 }, { "epoch": 123.84, "grad_norm": 0.272036612033844, "learning_rate": 0.0002814421696867875, "loss": 3.3073, "step": 774000 }, { "epoch": 123.856, "grad_norm": 0.26435431838035583, "learning_rate": 0.0002814397935917436, "loss": 3.3203, "step": 774100 }, { "epoch": 123.872, "grad_norm": 0.24299615621566772, "learning_rate": 0.0002814373934957398, "loss": 3.3155, "step": 774200 }, { "epoch": 123.888, "grad_norm": 0.2763180732727051, "learning_rate": 0.00028143499339973595, "loss": 3.2927, "step": 774300 }, { "epoch": 123.904, "grad_norm": 0.2555210292339325, "learning_rate": 0.0002814325933037321, "loss": 3.8537, "step": 774400 }, { "epoch": 123.92, "grad_norm": 0.29440027475357056, "learning_rate": 0.0002814301932077283, "loss": 3.4672, "step": 774500 }, { "epoch": 123.936, "grad_norm": 0.25204434990882874, "learning_rate": 0.0002814277931117244, "loss": 3.2243, "step": 774600 }, { "epoch": 123.952, "grad_norm": 0.2526017129421234, "learning_rate": 0.0002814253930157206, "loss": 3.4499, "step": 774700 }, { "epoch": 123.968, "grad_norm": 0.2646673619747162, "learning_rate": 0.00028142299291971675, "loss": 3.5129, "step": 774800 }, { "epoch": 123.984, "grad_norm": 0.25515174865722656, "learning_rate": 0.0002814205928237129, "loss": 3.6323, "step": 774900 }, { "epoch": 124.0, "grad_norm": 0.2787356674671173, "learning_rate": 0.0002814181927277091, "loss": 3.1287, "step": 775000 }, { "epoch": 124.016, "grad_norm": 0.28530046343803406, "learning_rate": 0.00028141579263170525, "loss": 3.0625, "step": 775100 }, { "epoch": 124.032, "grad_norm": 0.25394150614738464, "learning_rate": 0.00028141339253570137, "loss": 3.4272, "step": 775200 }, { "epoch": 124.048, "grad_norm": 0.28436362743377686, "learning_rate": 0.00028141099243969754, "loss": 3.4867, "step": 775300 }, { "epoch": 124.064, "grad_norm": 0.22898326814174652, "learning_rate": 0.00028140859234369376, "loss": 3.3172, "step": 775400 }, { "epoch": 124.08, "grad_norm": 0.3053949773311615, "learning_rate": 0.0002814061922476899, "loss": 3.4854, "step": 775500 }, { "epoch": 124.096, "grad_norm": 0.2595837116241455, "learning_rate": 0.00028140379215168605, "loss": 3.1708, "step": 775600 }, { "epoch": 124.112, "grad_norm": 0.25339674949645996, "learning_rate": 0.0002814013920556822, "loss": 3.2473, "step": 775700 }, { "epoch": 124.128, "grad_norm": 0.2417553812265396, "learning_rate": 0.0002813989919596784, "loss": 3.2604, "step": 775800 }, { "epoch": 124.144, "grad_norm": 0.28243288397789, "learning_rate": 0.00028139659186367455, "loss": 3.46, "step": 775900 }, { "epoch": 124.16, "grad_norm": 0.2519649863243103, "learning_rate": 0.0002813941917676707, "loss": 3.6064, "step": 776000 }, { "epoch": 124.176, "grad_norm": 0.28106069564819336, "learning_rate": 0.00028139179167166684, "loss": 3.5335, "step": 776100 }, { "epoch": 124.192, "grad_norm": 0.2552166283130646, "learning_rate": 0.000281389391575663, "loss": 3.6121, "step": 776200 }, { "epoch": 124.208, "grad_norm": 0.2761516571044922, "learning_rate": 0.0002813869914796592, "loss": 3.4361, "step": 776300 }, { "epoch": 124.224, "grad_norm": 0.2333584725856781, "learning_rate": 0.00028138459138365535, "loss": 3.3704, "step": 776400 }, { "epoch": 124.24, "grad_norm": 0.24913401901721954, "learning_rate": 0.0002813821912876515, "loss": 3.6863, "step": 776500 }, { "epoch": 124.256, "grad_norm": 0.31971681118011475, "learning_rate": 0.00028137979119164763, "loss": 2.9339, "step": 776600 }, { "epoch": 124.272, "grad_norm": 0.2869356572628021, "learning_rate": 0.0002813773910956438, "loss": 3.5142, "step": 776700 }, { "epoch": 124.288, "grad_norm": 0.32866528630256653, "learning_rate": 0.00028137499099963997, "loss": 3.6471, "step": 776800 }, { "epoch": 124.304, "grad_norm": 0.2540314495563507, "learning_rate": 0.00028137259090363614, "loss": 3.3878, "step": 776900 }, { "epoch": 124.32, "grad_norm": 0.34205400943756104, "learning_rate": 0.0002813701908076323, "loss": 3.3245, "step": 777000 }, { "epoch": 124.336, "grad_norm": 0.256075382232666, "learning_rate": 0.0002813677907116285, "loss": 3.1316, "step": 777100 }, { "epoch": 124.352, "grad_norm": 0.2819747030735016, "learning_rate": 0.0002813653906156246, "loss": 3.6469, "step": 777200 }, { "epoch": 124.368, "grad_norm": 0.2566411793231964, "learning_rate": 0.00028136299051962076, "loss": 3.4096, "step": 777300 }, { "epoch": 124.384, "grad_norm": 0.36658474802970886, "learning_rate": 0.00028136059042361693, "loss": 3.2863, "step": 777400 }, { "epoch": 124.4, "grad_norm": 0.28933051228523254, "learning_rate": 0.0002813581903276131, "loss": 3.5509, "step": 777500 }, { "epoch": 124.416, "grad_norm": 0.26699098944664, "learning_rate": 0.00028135579023160927, "loss": 3.5877, "step": 777600 }, { "epoch": 124.432, "grad_norm": 0.263371080160141, "learning_rate": 0.0002813533901356054, "loss": 3.6409, "step": 777700 }, { "epoch": 124.448, "grad_norm": 0.2628188133239746, "learning_rate": 0.00028135099003960155, "loss": 3.6326, "step": 777800 }, { "epoch": 124.464, "grad_norm": 0.255588561296463, "learning_rate": 0.0002813485899435977, "loss": 3.3809, "step": 777900 }, { "epoch": 124.48, "grad_norm": 0.2621082663536072, "learning_rate": 0.0002813461898475939, "loss": 3.3029, "step": 778000 }, { "epoch": 124.496, "grad_norm": 0.26528701186180115, "learning_rate": 0.00028134378975159006, "loss": 3.3697, "step": 778100 }, { "epoch": 124.512, "grad_norm": 0.3400605022907257, "learning_rate": 0.00028134138965558623, "loss": 3.7729, "step": 778200 }, { "epoch": 124.528, "grad_norm": 0.2711291015148163, "learning_rate": 0.00028133898955958234, "loss": 3.604, "step": 778300 }, { "epoch": 124.544, "grad_norm": 0.24584239721298218, "learning_rate": 0.0002813365894635785, "loss": 3.3333, "step": 778400 }, { "epoch": 124.56, "grad_norm": 0.2629411220550537, "learning_rate": 0.0002813341893675747, "loss": 3.4032, "step": 778500 }, { "epoch": 124.576, "grad_norm": 0.2662951648235321, "learning_rate": 0.00028133178927157085, "loss": 3.309, "step": 778600 }, { "epoch": 124.592, "grad_norm": 0.2691643238067627, "learning_rate": 0.000281329389175567, "loss": 3.6114, "step": 778700 }, { "epoch": 124.608, "grad_norm": 0.2464216947555542, "learning_rate": 0.0002813270130805232, "loss": 3.3441, "step": 778800 }, { "epoch": 124.624, "grad_norm": 0.26637086272239685, "learning_rate": 0.0002813246129845194, "loss": 3.3485, "step": 778900 }, { "epoch": 124.64, "grad_norm": 0.30841392278671265, "learning_rate": 0.00028132221288851555, "loss": 3.4686, "step": 779000 }, { "epoch": 124.656, "grad_norm": 0.32836803793907166, "learning_rate": 0.00028131983679347174, "loss": 3.4016, "step": 779100 }, { "epoch": 124.672, "grad_norm": 0.3291317820549011, "learning_rate": 0.00028131743669746785, "loss": 3.0924, "step": 779200 }, { "epoch": 124.688, "grad_norm": 0.2745928466320038, "learning_rate": 0.000281315036601464, "loss": 3.6839, "step": 779300 }, { "epoch": 124.704, "grad_norm": 0.2857306897640228, "learning_rate": 0.0002813126365054602, "loss": 3.5685, "step": 779400 }, { "epoch": 124.72, "grad_norm": 0.3180464506149292, "learning_rate": 0.00028131023640945636, "loss": 3.7347, "step": 779500 }, { "epoch": 124.736, "grad_norm": 0.282296359539032, "learning_rate": 0.0002813078363134525, "loss": 3.4635, "step": 779600 }, { "epoch": 124.752, "grad_norm": 0.28062519431114197, "learning_rate": 0.00028130543621744864, "loss": 3.2284, "step": 779700 }, { "epoch": 124.768, "grad_norm": 0.29224151372909546, "learning_rate": 0.0002813030361214448, "loss": 3.7144, "step": 779800 }, { "epoch": 124.784, "grad_norm": 0.28272897005081177, "learning_rate": 0.000281300636025441, "loss": 3.2758, "step": 779900 }, { "epoch": 124.8, "grad_norm": 0.2547975778579712, "learning_rate": 0.00028129823592943715, "loss": 3.4375, "step": 780000 }, { "epoch": 124.816, "grad_norm": 0.26319023966789246, "learning_rate": 0.0002812958358334333, "loss": 3.3862, "step": 780100 }, { "epoch": 124.832, "grad_norm": 0.25239232182502747, "learning_rate": 0.0002812934357374295, "loss": 3.3305, "step": 780200 }, { "epoch": 124.848, "grad_norm": 0.271629273891449, "learning_rate": 0.0002812910356414256, "loss": 3.5913, "step": 780300 }, { "epoch": 124.864, "grad_norm": 0.24333208799362183, "learning_rate": 0.00028128863554542177, "loss": 3.4692, "step": 780400 }, { "epoch": 124.88, "grad_norm": 0.24381664395332336, "learning_rate": 0.00028128623544941794, "loss": 3.6448, "step": 780500 }, { "epoch": 124.896, "grad_norm": 0.2557659149169922, "learning_rate": 0.0002812838353534141, "loss": 3.584, "step": 780600 }, { "epoch": 124.912, "grad_norm": 0.28464949131011963, "learning_rate": 0.0002812814352574103, "loss": 3.2577, "step": 780700 }, { "epoch": 124.928, "grad_norm": 0.25887975096702576, "learning_rate": 0.0002812790351614064, "loss": 3.2414, "step": 780800 }, { "epoch": 124.944, "grad_norm": 0.2753543555736542, "learning_rate": 0.00028127663506540256, "loss": 3.3414, "step": 780900 }, { "epoch": 124.96, "grad_norm": 0.28878799080848694, "learning_rate": 0.00028127423496939873, "loss": 3.6469, "step": 781000 }, { "epoch": 124.976, "grad_norm": 0.2941514849662781, "learning_rate": 0.0002812718348733949, "loss": 3.3721, "step": 781100 }, { "epoch": 124.992, "grad_norm": 0.23790079355239868, "learning_rate": 0.00028126943477739107, "loss": 3.3566, "step": 781200 }, { "epoch": 125.008, "grad_norm": 0.25186631083488464, "learning_rate": 0.00028126703468138724, "loss": 3.3804, "step": 781300 }, { "epoch": 125.024, "grad_norm": 0.24435026943683624, "learning_rate": 0.00028126463458538336, "loss": 3.2135, "step": 781400 }, { "epoch": 125.04, "grad_norm": 0.2653416097164154, "learning_rate": 0.0002812622344893795, "loss": 3.4751, "step": 781500 }, { "epoch": 125.056, "grad_norm": 0.2997099757194519, "learning_rate": 0.00028125983439337575, "loss": 3.4104, "step": 781600 }, { "epoch": 125.072, "grad_norm": 0.2571735382080078, "learning_rate": 0.00028125743429737186, "loss": 3.4957, "step": 781700 }, { "epoch": 125.088, "grad_norm": 0.2509530186653137, "learning_rate": 0.00028125503420136803, "loss": 3.4697, "step": 781800 }, { "epoch": 125.104, "grad_norm": 0.31149888038635254, "learning_rate": 0.0002812526341053642, "loss": 3.3627, "step": 781900 }, { "epoch": 125.12, "grad_norm": 0.2779514193534851, "learning_rate": 0.00028125023400936037, "loss": 3.5507, "step": 782000 }, { "epoch": 125.136, "grad_norm": 0.2553829252719879, "learning_rate": 0.00028124783391335654, "loss": 3.3784, "step": 782100 }, { "epoch": 125.152, "grad_norm": 0.29425540566444397, "learning_rate": 0.0002812454338173527, "loss": 3.5897, "step": 782200 }, { "epoch": 125.168, "grad_norm": 0.294141948223114, "learning_rate": 0.0002812430337213488, "loss": 3.6634, "step": 782300 }, { "epoch": 125.184, "grad_norm": 0.3019408881664276, "learning_rate": 0.000281240633625345, "loss": 3.559, "step": 782400 }, { "epoch": 125.2, "grad_norm": 0.2662094533443451, "learning_rate": 0.00028123823352934116, "loss": 3.6327, "step": 782500 }, { "epoch": 125.216, "grad_norm": 0.30373087525367737, "learning_rate": 0.00028123583343333733, "loss": 3.288, "step": 782600 }, { "epoch": 125.232, "grad_norm": 0.2566400468349457, "learning_rate": 0.0002812334333373335, "loss": 3.4373, "step": 782700 }, { "epoch": 125.248, "grad_norm": 0.27282437682151794, "learning_rate": 0.0002812310332413296, "loss": 3.698, "step": 782800 }, { "epoch": 125.264, "grad_norm": 0.2817208766937256, "learning_rate": 0.0002812286331453258, "loss": 3.4863, "step": 782900 }, { "epoch": 125.28, "grad_norm": 0.31470394134521484, "learning_rate": 0.00028122623304932195, "loss": 3.5325, "step": 783000 }, { "epoch": 125.296, "grad_norm": 0.2693648040294647, "learning_rate": 0.0002812238329533181, "loss": 3.4297, "step": 783100 }, { "epoch": 125.312, "grad_norm": 0.25766152143478394, "learning_rate": 0.0002812214328573143, "loss": 3.3937, "step": 783200 }, { "epoch": 125.328, "grad_norm": 0.27424249053001404, "learning_rate": 0.00028121903276131046, "loss": 3.8567, "step": 783300 }, { "epoch": 125.344, "grad_norm": 0.2864140272140503, "learning_rate": 0.0002812166566662666, "loss": 3.4222, "step": 783400 }, { "epoch": 125.36, "grad_norm": 0.24778969585895538, "learning_rate": 0.00028121425657026277, "loss": 3.4287, "step": 783500 }, { "epoch": 125.376, "grad_norm": 0.2866462171077728, "learning_rate": 0.00028121185647425893, "loss": 3.3479, "step": 783600 }, { "epoch": 125.392, "grad_norm": 0.26144641637802124, "learning_rate": 0.0002812094563782551, "loss": 3.5096, "step": 783700 }, { "epoch": 125.408, "grad_norm": 0.26455530524253845, "learning_rate": 0.00028120705628225127, "loss": 3.2582, "step": 783800 }, { "epoch": 125.424, "grad_norm": 0.26174432039260864, "learning_rate": 0.0002812046561862474, "loss": 3.2742, "step": 783900 }, { "epoch": 125.44, "grad_norm": 0.2816610038280487, "learning_rate": 0.00028120225609024356, "loss": 3.7642, "step": 784000 }, { "epoch": 125.456, "grad_norm": 0.29602038860321045, "learning_rate": 0.0002811998559942397, "loss": 3.6243, "step": 784100 }, { "epoch": 125.472, "grad_norm": 0.29332903027534485, "learning_rate": 0.0002811974558982359, "loss": 3.477, "step": 784200 }, { "epoch": 125.488, "grad_norm": 0.2837272584438324, "learning_rate": 0.00028119505580223206, "loss": 3.1714, "step": 784300 }, { "epoch": 125.504, "grad_norm": 0.3087293803691864, "learning_rate": 0.00028119267970718825, "loss": 3.4619, "step": 784400 }, { "epoch": 125.52, "grad_norm": 0.2412794530391693, "learning_rate": 0.0002811902796111844, "loss": 3.4746, "step": 784500 }, { "epoch": 125.536, "grad_norm": 0.26652655005455017, "learning_rate": 0.0002811878795151806, "loss": 3.5664, "step": 784600 }, { "epoch": 125.552, "grad_norm": 0.274814635515213, "learning_rate": 0.00028118547941917676, "loss": 3.7619, "step": 784700 }, { "epoch": 125.568, "grad_norm": 0.2539665997028351, "learning_rate": 0.0002811830793231729, "loss": 3.6141, "step": 784800 }, { "epoch": 125.584, "grad_norm": 0.25664061307907104, "learning_rate": 0.00028118067922716904, "loss": 3.3093, "step": 784900 }, { "epoch": 125.6, "grad_norm": 0.24914154410362244, "learning_rate": 0.0002811782791311652, "loss": 3.476, "step": 785000 }, { "epoch": 125.616, "grad_norm": 0.2731585204601288, "learning_rate": 0.0002811758790351614, "loss": 3.5726, "step": 785100 }, { "epoch": 125.632, "grad_norm": 0.25697219371795654, "learning_rate": 0.00028117347893915755, "loss": 3.2003, "step": 785200 }, { "epoch": 125.648, "grad_norm": 0.2891111671924591, "learning_rate": 0.0002811710788431537, "loss": 3.3439, "step": 785300 }, { "epoch": 125.664, "grad_norm": 0.25486916303634644, "learning_rate": 0.00028116867874714984, "loss": 3.4069, "step": 785400 }, { "epoch": 125.68, "grad_norm": 0.2741319239139557, "learning_rate": 0.000281166278651146, "loss": 3.5386, "step": 785500 }, { "epoch": 125.696, "grad_norm": 0.281552791595459, "learning_rate": 0.0002811638785551422, "loss": 3.3378, "step": 785600 }, { "epoch": 125.712, "grad_norm": 0.3090079128742218, "learning_rate": 0.00028116147845913834, "loss": 3.3191, "step": 785700 }, { "epoch": 125.728, "grad_norm": 0.24972812831401825, "learning_rate": 0.0002811590783631345, "loss": 3.3211, "step": 785800 }, { "epoch": 125.744, "grad_norm": 0.2950895428657532, "learning_rate": 0.00028115667826713063, "loss": 3.4116, "step": 785900 }, { "epoch": 125.76, "grad_norm": 0.26669931411743164, "learning_rate": 0.0002811542781711268, "loss": 3.0715, "step": 786000 }, { "epoch": 125.776, "grad_norm": 0.2824670076370239, "learning_rate": 0.00028115187807512297, "loss": 3.5904, "step": 786100 }, { "epoch": 125.792, "grad_norm": 0.30843865871429443, "learning_rate": 0.00028114947797911914, "loss": 3.4112, "step": 786200 }, { "epoch": 125.808, "grad_norm": 0.26361191272735596, "learning_rate": 0.0002811470778831153, "loss": 3.2453, "step": 786300 }, { "epoch": 125.824, "grad_norm": 0.26451802253723145, "learning_rate": 0.0002811446777871115, "loss": 3.4297, "step": 786400 }, { "epoch": 125.84, "grad_norm": 0.2545855939388275, "learning_rate": 0.0002811422776911076, "loss": 3.5651, "step": 786500 }, { "epoch": 125.856, "grad_norm": 0.2530280351638794, "learning_rate": 0.00028113987759510376, "loss": 3.3864, "step": 786600 }, { "epoch": 125.872, "grad_norm": 0.29318299889564514, "learning_rate": 0.00028113747749909993, "loss": 3.6029, "step": 786700 }, { "epoch": 125.888, "grad_norm": 0.28050485253334045, "learning_rate": 0.0002811350774030961, "loss": 3.4131, "step": 786800 }, { "epoch": 125.904, "grad_norm": 0.2648848295211792, "learning_rate": 0.00028113267730709227, "loss": 3.4015, "step": 786900 }, { "epoch": 125.92, "grad_norm": 0.264634370803833, "learning_rate": 0.0002811302772110884, "loss": 3.1853, "step": 787000 }, { "epoch": 125.936, "grad_norm": 0.26305893063545227, "learning_rate": 0.00028112787711508455, "loss": 3.2608, "step": 787100 }, { "epoch": 125.952, "grad_norm": 0.25991034507751465, "learning_rate": 0.0002811254770190807, "loss": 3.4795, "step": 787200 }, { "epoch": 125.968, "grad_norm": 0.24919983744621277, "learning_rate": 0.0002811230769230769, "loss": 3.4605, "step": 787300 }, { "epoch": 125.984, "grad_norm": 0.26587432622909546, "learning_rate": 0.00028112067682707306, "loss": 3.7024, "step": 787400 }, { "epoch": 126.0, "grad_norm": 0.26590222120285034, "learning_rate": 0.0002811182767310692, "loss": 3.2402, "step": 787500 }, { "epoch": 126.016, "grad_norm": 0.24342237412929535, "learning_rate": 0.00028111587663506534, "loss": 3.2185, "step": 787600 }, { "epoch": 126.032, "grad_norm": 0.27314895391464233, "learning_rate": 0.0002811134765390615, "loss": 3.3056, "step": 787700 }, { "epoch": 126.048, "grad_norm": 0.24812830984592438, "learning_rate": 0.0002811110764430577, "loss": 3.4904, "step": 787800 }, { "epoch": 126.064, "grad_norm": 0.290834903717041, "learning_rate": 0.00028110867634705385, "loss": 3.1269, "step": 787900 }, { "epoch": 126.08, "grad_norm": 0.24933023750782013, "learning_rate": 0.00028110627625105, "loss": 3.5836, "step": 788000 } ], "logging_steps": 100, "max_steps": 12500000, "num_input_tokens_seen": 0, "num_train_epochs": 2000, "save_steps": 1000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 3.2908011199487017e+18, "train_batch_size": 64, "trial_name": null, "trial_params": null }